You've already forked FFmpeg
							
							
				mirror of
				https://github.com/FFmpeg/FFmpeg.git
				synced 2025-10-30 23:18:11 +02:00 
			
		
		
		
	separate out put_signed_pixels_clamped() into its own function and
implement an optimized MMX version of the function Originally committed as revision 3082 to svn://svn.ffmpeg.org/ffmpeg/trunk
This commit is contained in:
		| @@ -332,6 +332,27 @@ static void put_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels, | ||||
|     } | ||||
| } | ||||
|  | ||||
| static void put_signed_pixels_clamped_c(const DCTELEM *block,  | ||||
|                                         uint8_t *restrict pixels, | ||||
|                                         int line_size) | ||||
| { | ||||
|     int i, j; | ||||
|  | ||||
|     for (i = 0; i < 8; i++) { | ||||
|         for (j = 0; j < 8; j++) { | ||||
|             if (*block < -128) | ||||
|                 *pixels = 0; | ||||
|             else if (*block > 127) | ||||
|                 *pixels = 255; | ||||
|             else | ||||
|                 *pixels = (uint8_t)(*block + 128); | ||||
|             block++; | ||||
|             pixels++; | ||||
|         } | ||||
|         pixels += (line_size - 8); | ||||
|     } | ||||
| } | ||||
|  | ||||
| static void add_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels, | ||||
|                           int line_size) | ||||
| { | ||||
| @@ -3131,6 +3152,7 @@ void dsputil_init(DSPContext* c, AVCodecContext *avctx) | ||||
|     c->get_pixels = get_pixels_c; | ||||
|     c->diff_pixels = diff_pixels_c; | ||||
|     c->put_pixels_clamped = put_pixels_clamped_c; | ||||
|     c->put_signed_pixels_clamped = put_signed_pixels_clamped_c; | ||||
|     c->add_pixels_clamped = add_pixels_clamped_c; | ||||
|     c->gmc1 = gmc1_c; | ||||
|     c->gmc = gmc_c; | ||||
|   | ||||
| @@ -137,6 +137,7 @@ typedef struct DSPContext { | ||||
|     void (*get_pixels)(DCTELEM *block/*align 16*/, const uint8_t *pixels/*align 8*/, int line_size); | ||||
|     void (*diff_pixels)(DCTELEM *block/*align 16*/, const uint8_t *s1/*align 8*/, const uint8_t *s2/*align 8*/, int stride); | ||||
|     void (*put_pixels_clamped)(const DCTELEM *block/*align 16*/, uint8_t *pixels/*align 8*/, int line_size); | ||||
|     void (*put_signed_pixels_clamped)(const DCTELEM *block/*align 16*/, uint8_t *pixels/*align 8*/, int line_size); | ||||
|     void (*add_pixels_clamped)(const DCTELEM *block/*align 16*/, uint8_t *pixels/*align 8*/, int line_size); | ||||
|     /** | ||||
|      * translational global motion compensation. | ||||
| @@ -374,6 +375,7 @@ extern int mm_flags; | ||||
|  | ||||
| void add_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size); | ||||
| void put_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size); | ||||
| void put_signed_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size); | ||||
|  | ||||
| static inline void emms(void) | ||||
| { | ||||
|   | ||||
| @@ -22,6 +22,7 @@ | ||||
|  | ||||
| #include "../dsputil.h" | ||||
| #include "../simple_idct.h" | ||||
| #include "mmx.h" | ||||
|  | ||||
| //#undef NDEBUG | ||||
| //#include <assert.h> | ||||
| @@ -293,6 +294,23 @@ void put_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size | ||||
| 	    :"memory"); | ||||
| } | ||||
|  | ||||
| void put_signed_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size) | ||||
| { | ||||
|     int i; | ||||
|     unsigned char __align8 vector128[8] = | ||||
|       { 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80 }; | ||||
|  | ||||
|     movq_m2r(*vector128, mm1); | ||||
|     for (i = 0; i < 8; i++) { | ||||
|         movq_m2r(*(block), mm0); | ||||
|         packsswb_m2r(*(block + 4), mm0); | ||||
|         block += 8; | ||||
|         paddb_r2r(mm1, mm0); | ||||
|         movq_r2m(mm0, *pixels); | ||||
|         pixels += line_size; | ||||
|     } | ||||
| } | ||||
|  | ||||
| void add_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size) | ||||
| { | ||||
|     const DCTELEM *p; | ||||
| @@ -2160,6 +2178,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) | ||||
|         c->diff_pixels = diff_pixels_mmx; | ||||
| #endif //CONFIG_ENCODERS | ||||
|         c->put_pixels_clamped = put_pixels_clamped_mmx; | ||||
|         c->put_signed_pixels_clamped = put_signed_pixels_clamped_mmx; | ||||
|         c->add_pixels_clamped = add_pixels_clamped_mmx; | ||||
|         c->clear_blocks = clear_blocks_mmx; | ||||
| #ifdef CONFIG_ENCODERS | ||||
|   | ||||
| @@ -2061,10 +2061,6 @@ static void render_fragments(Vp3DecodeContext *s, | ||||
|     int motion_halfpel_index; | ||||
|     uint8_t *motion_source; | ||||
|  | ||||
|     int16_t *op; | ||||
|     uint8_t *dest; | ||||
|     int j, k; | ||||
|  | ||||
|     debug_vp3("  vp3: rendering final fragments for %s\n", | ||||
|         (plane == 0) ? "Y plane" : (plane == 1) ? "U plane" : "V plane"); | ||||
|  | ||||
| @@ -2186,22 +2182,9 @@ av_log(s->avctx, AV_LOG_ERROR, " help! got beefy vector! (%X, %X)\n", motion_x, | ||||
|                     s->all_fragments[i].coeff_count, | ||||
|                     output_samples); | ||||
|                 if (s->all_fragments[i].coding_method == MODE_INTRA) { | ||||
|                     /* this really needs to be optimized sooner or later */ | ||||
|                     op = output_samples; | ||||
|                     dest = output_plane + s->all_fragments[i].first_pixel; | ||||
|                     for (j = 0; j < 8; j++) { | ||||
|                         for (k = 0; k < 8; k++) { | ||||
|                             if (*op < -128) | ||||
|                                 *dest = 0; | ||||
|                             else if (*op > 127) | ||||
|                                 *dest = 255; | ||||
|                             else | ||||
|                                 *dest = (uint8_t)(*op + 128); | ||||
|                             op++; | ||||
|                             dest++; | ||||
|                         } | ||||
|                         dest += (stride - 8); | ||||
|                     } | ||||
|                     s->dsp.put_signed_pixels_clamped(output_samples, | ||||
|                         output_plane + s->all_fragments[i].first_pixel, | ||||
|                         stride); | ||||
|                 } else { | ||||
|                     s->dsp.add_pixels_clamped(output_samples, | ||||
|                         output_plane + s->all_fragments[i].first_pixel, | ||||
|   | ||||
		Reference in New Issue
	
	Block a user