mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2025-01-13 21:28:01 +02:00
separate out put_signed_pixels_clamped() into its own function and
implement an optimized MMX version of the function Originally committed as revision 3082 to svn://svn.ffmpeg.org/ffmpeg/trunk
This commit is contained in:
parent
c0c37848d8
commit
f9ed9d8584
@ -332,6 +332,27 @@ static void put_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void put_signed_pixels_clamped_c(const DCTELEM *block,
|
||||||
|
uint8_t *restrict pixels,
|
||||||
|
int line_size)
|
||||||
|
{
|
||||||
|
int i, j;
|
||||||
|
|
||||||
|
for (i = 0; i < 8; i++) {
|
||||||
|
for (j = 0; j < 8; j++) {
|
||||||
|
if (*block < -128)
|
||||||
|
*pixels = 0;
|
||||||
|
else if (*block > 127)
|
||||||
|
*pixels = 255;
|
||||||
|
else
|
||||||
|
*pixels = (uint8_t)(*block + 128);
|
||||||
|
block++;
|
||||||
|
pixels++;
|
||||||
|
}
|
||||||
|
pixels += (line_size - 8);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static void add_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels,
|
static void add_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels,
|
||||||
int line_size)
|
int line_size)
|
||||||
{
|
{
|
||||||
@ -3131,6 +3152,7 @@ void dsputil_init(DSPContext* c, AVCodecContext *avctx)
|
|||||||
c->get_pixels = get_pixels_c;
|
c->get_pixels = get_pixels_c;
|
||||||
c->diff_pixels = diff_pixels_c;
|
c->diff_pixels = diff_pixels_c;
|
||||||
c->put_pixels_clamped = put_pixels_clamped_c;
|
c->put_pixels_clamped = put_pixels_clamped_c;
|
||||||
|
c->put_signed_pixels_clamped = put_signed_pixels_clamped_c;
|
||||||
c->add_pixels_clamped = add_pixels_clamped_c;
|
c->add_pixels_clamped = add_pixels_clamped_c;
|
||||||
c->gmc1 = gmc1_c;
|
c->gmc1 = gmc1_c;
|
||||||
c->gmc = gmc_c;
|
c->gmc = gmc_c;
|
||||||
|
@ -137,6 +137,7 @@ typedef struct DSPContext {
|
|||||||
void (*get_pixels)(DCTELEM *block/*align 16*/, const uint8_t *pixels/*align 8*/, int line_size);
|
void (*get_pixels)(DCTELEM *block/*align 16*/, const uint8_t *pixels/*align 8*/, int line_size);
|
||||||
void (*diff_pixels)(DCTELEM *block/*align 16*/, const uint8_t *s1/*align 8*/, const uint8_t *s2/*align 8*/, int stride);
|
void (*diff_pixels)(DCTELEM *block/*align 16*/, const uint8_t *s1/*align 8*/, const uint8_t *s2/*align 8*/, int stride);
|
||||||
void (*put_pixels_clamped)(const DCTELEM *block/*align 16*/, uint8_t *pixels/*align 8*/, int line_size);
|
void (*put_pixels_clamped)(const DCTELEM *block/*align 16*/, uint8_t *pixels/*align 8*/, int line_size);
|
||||||
|
void (*put_signed_pixels_clamped)(const DCTELEM *block/*align 16*/, uint8_t *pixels/*align 8*/, int line_size);
|
||||||
void (*add_pixels_clamped)(const DCTELEM *block/*align 16*/, uint8_t *pixels/*align 8*/, int line_size);
|
void (*add_pixels_clamped)(const DCTELEM *block/*align 16*/, uint8_t *pixels/*align 8*/, int line_size);
|
||||||
/**
|
/**
|
||||||
* translational global motion compensation.
|
* translational global motion compensation.
|
||||||
@ -374,6 +375,7 @@ extern int mm_flags;
|
|||||||
|
|
||||||
void add_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size);
|
void add_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size);
|
||||||
void put_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size);
|
void put_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size);
|
||||||
|
void put_signed_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size);
|
||||||
|
|
||||||
static inline void emms(void)
|
static inline void emms(void)
|
||||||
{
|
{
|
||||||
|
@ -22,6 +22,7 @@
|
|||||||
|
|
||||||
#include "../dsputil.h"
|
#include "../dsputil.h"
|
||||||
#include "../simple_idct.h"
|
#include "../simple_idct.h"
|
||||||
|
#include "mmx.h"
|
||||||
|
|
||||||
//#undef NDEBUG
|
//#undef NDEBUG
|
||||||
//#include <assert.h>
|
//#include <assert.h>
|
||||||
@ -293,6 +294,23 @@ void put_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size
|
|||||||
:"memory");
|
:"memory");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void put_signed_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
unsigned char __align8 vector128[8] =
|
||||||
|
{ 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80 };
|
||||||
|
|
||||||
|
movq_m2r(*vector128, mm1);
|
||||||
|
for (i = 0; i < 8; i++) {
|
||||||
|
movq_m2r(*(block), mm0);
|
||||||
|
packsswb_m2r(*(block + 4), mm0);
|
||||||
|
block += 8;
|
||||||
|
paddb_r2r(mm1, mm0);
|
||||||
|
movq_r2m(mm0, *pixels);
|
||||||
|
pixels += line_size;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void add_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size)
|
void add_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size)
|
||||||
{
|
{
|
||||||
const DCTELEM *p;
|
const DCTELEM *p;
|
||||||
@ -2160,6 +2178,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
|
|||||||
c->diff_pixels = diff_pixels_mmx;
|
c->diff_pixels = diff_pixels_mmx;
|
||||||
#endif //CONFIG_ENCODERS
|
#endif //CONFIG_ENCODERS
|
||||||
c->put_pixels_clamped = put_pixels_clamped_mmx;
|
c->put_pixels_clamped = put_pixels_clamped_mmx;
|
||||||
|
c->put_signed_pixels_clamped = put_signed_pixels_clamped_mmx;
|
||||||
c->add_pixels_clamped = add_pixels_clamped_mmx;
|
c->add_pixels_clamped = add_pixels_clamped_mmx;
|
||||||
c->clear_blocks = clear_blocks_mmx;
|
c->clear_blocks = clear_blocks_mmx;
|
||||||
#ifdef CONFIG_ENCODERS
|
#ifdef CONFIG_ENCODERS
|
||||||
|
@ -2061,10 +2061,6 @@ static void render_fragments(Vp3DecodeContext *s,
|
|||||||
int motion_halfpel_index;
|
int motion_halfpel_index;
|
||||||
uint8_t *motion_source;
|
uint8_t *motion_source;
|
||||||
|
|
||||||
int16_t *op;
|
|
||||||
uint8_t *dest;
|
|
||||||
int j, k;
|
|
||||||
|
|
||||||
debug_vp3(" vp3: rendering final fragments for %s\n",
|
debug_vp3(" vp3: rendering final fragments for %s\n",
|
||||||
(plane == 0) ? "Y plane" : (plane == 1) ? "U plane" : "V plane");
|
(plane == 0) ? "Y plane" : (plane == 1) ? "U plane" : "V plane");
|
||||||
|
|
||||||
@ -2186,22 +2182,9 @@ av_log(s->avctx, AV_LOG_ERROR, " help! got beefy vector! (%X, %X)\n", motion_x,
|
|||||||
s->all_fragments[i].coeff_count,
|
s->all_fragments[i].coeff_count,
|
||||||
output_samples);
|
output_samples);
|
||||||
if (s->all_fragments[i].coding_method == MODE_INTRA) {
|
if (s->all_fragments[i].coding_method == MODE_INTRA) {
|
||||||
/* this really needs to be optimized sooner or later */
|
s->dsp.put_signed_pixels_clamped(output_samples,
|
||||||
op = output_samples;
|
output_plane + s->all_fragments[i].first_pixel,
|
||||||
dest = output_plane + s->all_fragments[i].first_pixel;
|
stride);
|
||||||
for (j = 0; j < 8; j++) {
|
|
||||||
for (k = 0; k < 8; k++) {
|
|
||||||
if (*op < -128)
|
|
||||||
*dest = 0;
|
|
||||||
else if (*op > 127)
|
|
||||||
*dest = 255;
|
|
||||||
else
|
|
||||||
*dest = (uint8_t)(*op + 128);
|
|
||||||
op++;
|
|
||||||
dest++;
|
|
||||||
}
|
|
||||||
dest += (stride - 8);
|
|
||||||
}
|
|
||||||
} else {
|
} else {
|
||||||
s->dsp.add_pixels_clamped(output_samples,
|
s->dsp.add_pixels_clamped(output_samples,
|
||||||
output_plane + s->all_fragments[i].first_pixel,
|
output_plane + s->all_fragments[i].first_pixel,
|
||||||
|
Loading…
Reference in New Issue
Block a user