You've already forked FFmpeg
							
							
				mirror of
				https://github.com/FFmpeg/FFmpeg.git
				synced 2025-10-30 23:18:11 +02:00 
			
		
		
		
	separate out put_signed_pixels_clamped() into its own function and
implement an optimized MMX version of the function Originally committed as revision 3082 to svn://svn.ffmpeg.org/ffmpeg/trunk
This commit is contained in:
		| @@ -332,6 +332,27 @@ static void put_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels, | |||||||
|     } |     } | ||||||
| } | } | ||||||
|  |  | ||||||
|  | static void put_signed_pixels_clamped_c(const DCTELEM *block,  | ||||||
|  |                                         uint8_t *restrict pixels, | ||||||
|  |                                         int line_size) | ||||||
|  | { | ||||||
|  |     int i, j; | ||||||
|  |  | ||||||
|  |     for (i = 0; i < 8; i++) { | ||||||
|  |         for (j = 0; j < 8; j++) { | ||||||
|  |             if (*block < -128) | ||||||
|  |                 *pixels = 0; | ||||||
|  |             else if (*block > 127) | ||||||
|  |                 *pixels = 255; | ||||||
|  |             else | ||||||
|  |                 *pixels = (uint8_t)(*block + 128); | ||||||
|  |             block++; | ||||||
|  |             pixels++; | ||||||
|  |         } | ||||||
|  |         pixels += (line_size - 8); | ||||||
|  |     } | ||||||
|  | } | ||||||
|  |  | ||||||
| static void add_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels, | static void add_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels, | ||||||
|                           int line_size) |                           int line_size) | ||||||
| { | { | ||||||
| @@ -3131,6 +3152,7 @@ void dsputil_init(DSPContext* c, AVCodecContext *avctx) | |||||||
|     c->get_pixels = get_pixels_c; |     c->get_pixels = get_pixels_c; | ||||||
|     c->diff_pixels = diff_pixels_c; |     c->diff_pixels = diff_pixels_c; | ||||||
|     c->put_pixels_clamped = put_pixels_clamped_c; |     c->put_pixels_clamped = put_pixels_clamped_c; | ||||||
|  |     c->put_signed_pixels_clamped = put_signed_pixels_clamped_c; | ||||||
|     c->add_pixels_clamped = add_pixels_clamped_c; |     c->add_pixels_clamped = add_pixels_clamped_c; | ||||||
|     c->gmc1 = gmc1_c; |     c->gmc1 = gmc1_c; | ||||||
|     c->gmc = gmc_c; |     c->gmc = gmc_c; | ||||||
|   | |||||||
| @@ -137,6 +137,7 @@ typedef struct DSPContext { | |||||||
|     void (*get_pixels)(DCTELEM *block/*align 16*/, const uint8_t *pixels/*align 8*/, int line_size); |     void (*get_pixels)(DCTELEM *block/*align 16*/, const uint8_t *pixels/*align 8*/, int line_size); | ||||||
|     void (*diff_pixels)(DCTELEM *block/*align 16*/, const uint8_t *s1/*align 8*/, const uint8_t *s2/*align 8*/, int stride); |     void (*diff_pixels)(DCTELEM *block/*align 16*/, const uint8_t *s1/*align 8*/, const uint8_t *s2/*align 8*/, int stride); | ||||||
|     void (*put_pixels_clamped)(const DCTELEM *block/*align 16*/, uint8_t *pixels/*align 8*/, int line_size); |     void (*put_pixels_clamped)(const DCTELEM *block/*align 16*/, uint8_t *pixels/*align 8*/, int line_size); | ||||||
|  |     void (*put_signed_pixels_clamped)(const DCTELEM *block/*align 16*/, uint8_t *pixels/*align 8*/, int line_size); | ||||||
|     void (*add_pixels_clamped)(const DCTELEM *block/*align 16*/, uint8_t *pixels/*align 8*/, int line_size); |     void (*add_pixels_clamped)(const DCTELEM *block/*align 16*/, uint8_t *pixels/*align 8*/, int line_size); | ||||||
|     /** |     /** | ||||||
|      * translational global motion compensation. |      * translational global motion compensation. | ||||||
| @@ -374,6 +375,7 @@ extern int mm_flags; | |||||||
|  |  | ||||||
| void add_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size); | void add_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size); | ||||||
| void put_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size); | void put_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size); | ||||||
|  | void put_signed_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size); | ||||||
|  |  | ||||||
| static inline void emms(void) | static inline void emms(void) | ||||||
| { | { | ||||||
|   | |||||||
| @@ -22,6 +22,7 @@ | |||||||
|  |  | ||||||
| #include "../dsputil.h" | #include "../dsputil.h" | ||||||
| #include "../simple_idct.h" | #include "../simple_idct.h" | ||||||
|  | #include "mmx.h" | ||||||
|  |  | ||||||
| //#undef NDEBUG | //#undef NDEBUG | ||||||
| //#include <assert.h> | //#include <assert.h> | ||||||
| @@ -293,6 +294,23 @@ void put_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size | |||||||
| 	    :"memory"); | 	    :"memory"); | ||||||
| } | } | ||||||
|  |  | ||||||
|  | void put_signed_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size) | ||||||
|  | { | ||||||
|  |     int i; | ||||||
|  |     unsigned char __align8 vector128[8] = | ||||||
|  |       { 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80 }; | ||||||
|  |  | ||||||
|  |     movq_m2r(*vector128, mm1); | ||||||
|  |     for (i = 0; i < 8; i++) { | ||||||
|  |         movq_m2r(*(block), mm0); | ||||||
|  |         packsswb_m2r(*(block + 4), mm0); | ||||||
|  |         block += 8; | ||||||
|  |         paddb_r2r(mm1, mm0); | ||||||
|  |         movq_r2m(mm0, *pixels); | ||||||
|  |         pixels += line_size; | ||||||
|  |     } | ||||||
|  | } | ||||||
|  |  | ||||||
| void add_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size) | void add_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size) | ||||||
| { | { | ||||||
|     const DCTELEM *p; |     const DCTELEM *p; | ||||||
| @@ -2160,6 +2178,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) | |||||||
|         c->diff_pixels = diff_pixels_mmx; |         c->diff_pixels = diff_pixels_mmx; | ||||||
| #endif //CONFIG_ENCODERS | #endif //CONFIG_ENCODERS | ||||||
|         c->put_pixels_clamped = put_pixels_clamped_mmx; |         c->put_pixels_clamped = put_pixels_clamped_mmx; | ||||||
|  |         c->put_signed_pixels_clamped = put_signed_pixels_clamped_mmx; | ||||||
|         c->add_pixels_clamped = add_pixels_clamped_mmx; |         c->add_pixels_clamped = add_pixels_clamped_mmx; | ||||||
|         c->clear_blocks = clear_blocks_mmx; |         c->clear_blocks = clear_blocks_mmx; | ||||||
| #ifdef CONFIG_ENCODERS | #ifdef CONFIG_ENCODERS | ||||||
|   | |||||||
| @@ -2061,10 +2061,6 @@ static void render_fragments(Vp3DecodeContext *s, | |||||||
|     int motion_halfpel_index; |     int motion_halfpel_index; | ||||||
|     uint8_t *motion_source; |     uint8_t *motion_source; | ||||||
|  |  | ||||||
|     int16_t *op; |  | ||||||
|     uint8_t *dest; |  | ||||||
|     int j, k; |  | ||||||
|  |  | ||||||
|     debug_vp3("  vp3: rendering final fragments for %s\n", |     debug_vp3("  vp3: rendering final fragments for %s\n", | ||||||
|         (plane == 0) ? "Y plane" : (plane == 1) ? "U plane" : "V plane"); |         (plane == 0) ? "Y plane" : (plane == 1) ? "U plane" : "V plane"); | ||||||
|  |  | ||||||
| @@ -2186,22 +2182,9 @@ av_log(s->avctx, AV_LOG_ERROR, " help! got beefy vector! (%X, %X)\n", motion_x, | |||||||
|                     s->all_fragments[i].coeff_count, |                     s->all_fragments[i].coeff_count, | ||||||
|                     output_samples); |                     output_samples); | ||||||
|                 if (s->all_fragments[i].coding_method == MODE_INTRA) { |                 if (s->all_fragments[i].coding_method == MODE_INTRA) { | ||||||
|                     /* this really needs to be optimized sooner or later */ |                     s->dsp.put_signed_pixels_clamped(output_samples, | ||||||
|                     op = output_samples; |                         output_plane + s->all_fragments[i].first_pixel, | ||||||
|                     dest = output_plane + s->all_fragments[i].first_pixel; |                         stride); | ||||||
|                     for (j = 0; j < 8; j++) { |  | ||||||
|                         for (k = 0; k < 8; k++) { |  | ||||||
|                             if (*op < -128) |  | ||||||
|                                 *dest = 0; |  | ||||||
|                             else if (*op > 127) |  | ||||||
|                                 *dest = 255; |  | ||||||
|                             else |  | ||||||
|                                 *dest = (uint8_t)(*op + 128); |  | ||||||
|                             op++; |  | ||||||
|                             dest++; |  | ||||||
|                         } |  | ||||||
|                         dest += (stride - 8); |  | ||||||
|                     } |  | ||||||
|                 } else { |                 } else { | ||||||
|                     s->dsp.add_pixels_clamped(output_samples, |                     s->dsp.add_pixels_clamped(output_samples, | ||||||
|                         output_plane + s->all_fragments[i].first_pixel, |                         output_plane + s->all_fragments[i].first_pixel, | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user