mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2024-12-23 12:43:46 +02:00
avfilter/vf_ssim: improve precision
Use doubles for accumulating floats.
This commit is contained in:
parent
c35382aaf4
commit
fcc0424c93
@ -28,7 +28,7 @@ typedef struct SSIMDSPContext {
|
|||||||
void (*ssim_4x4_line)(const uint8_t *buf, ptrdiff_t buf_stride,
|
void (*ssim_4x4_line)(const uint8_t *buf, ptrdiff_t buf_stride,
|
||||||
const uint8_t *ref, ptrdiff_t ref_stride,
|
const uint8_t *ref, ptrdiff_t ref_stride,
|
||||||
int (*sums)[4], int w);
|
int (*sums)[4], int w);
|
||||||
float (*ssim_end_line)(const int (*sum0)[4], const int (*sum1)[4], int w);
|
double (*ssim_end_line)(const int (*sum0)[4], const int (*sum1)[4], int w);
|
||||||
} SSIMDSPContext;
|
} SSIMDSPContext;
|
||||||
|
|
||||||
void ff_ssim_init_x86(SSIMDSPContext *dsp);
|
void ff_ssim_init_x86(SSIMDSPContext *dsp);
|
||||||
|
@ -55,13 +55,13 @@ typedef struct SSIMContext {
|
|||||||
uint64_t nb_frames;
|
uint64_t nb_frames;
|
||||||
double ssim[4], ssim_total;
|
double ssim[4], ssim_total;
|
||||||
char comps[4];
|
char comps[4];
|
||||||
float coefs[4];
|
double coefs[4];
|
||||||
uint8_t rgba_map[4];
|
uint8_t rgba_map[4];
|
||||||
int planewidth[4];
|
int planewidth[4];
|
||||||
int planeheight[4];
|
int planeheight[4];
|
||||||
int *temp;
|
int *temp;
|
||||||
int is_rgb;
|
int is_rgb;
|
||||||
float (*ssim_plane)(SSIMDSPContext *dsp,
|
double (*ssim_plane)(SSIMDSPContext *dsp,
|
||||||
uint8_t *main, int main_stride,
|
uint8_t *main, int main_stride,
|
||||||
uint8_t *ref, int ref_stride,
|
uint8_t *ref, int ref_stride,
|
||||||
int width, int height, void *temp,
|
int width, int height, void *temp,
|
||||||
@ -206,9 +206,9 @@ static float ssim_endn_16bit(const int64_t (*sum0)[4], const int64_t (*sum1)[4],
|
|||||||
return ssim;
|
return ssim;
|
||||||
}
|
}
|
||||||
|
|
||||||
static float ssim_endn_8bit(const int (*sum0)[4], const int (*sum1)[4], int width)
|
static double ssim_endn_8bit(const int (*sum0)[4], const int (*sum1)[4], int width)
|
||||||
{
|
{
|
||||||
float ssim = 0.0;
|
double ssim = 0.0;
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
for (i = 0; i < width; i++)
|
for (i = 0; i < width; i++)
|
||||||
@ -221,14 +221,14 @@ static float ssim_endn_8bit(const int (*sum0)[4], const int (*sum1)[4], int widt
|
|||||||
|
|
||||||
#define SUM_LEN(w) (((w) >> 2) + 3)
|
#define SUM_LEN(w) (((w) >> 2) + 3)
|
||||||
|
|
||||||
static float ssim_plane_16bit(SSIMDSPContext *dsp,
|
static double ssim_plane_16bit(SSIMDSPContext *dsp,
|
||||||
uint8_t *main, int main_stride,
|
uint8_t *main, int main_stride,
|
||||||
uint8_t *ref, int ref_stride,
|
uint8_t *ref, int ref_stride,
|
||||||
int width, int height, void *temp,
|
int width, int height, void *temp,
|
||||||
int max)
|
int max)
|
||||||
{
|
{
|
||||||
int z = 0, y;
|
int z = 0, y;
|
||||||
float ssim = 0.0;
|
double ssim = 0.0;
|
||||||
int64_t (*sum0)[4] = temp;
|
int64_t (*sum0)[4] = temp;
|
||||||
int64_t (*sum1)[4] = sum0 + SUM_LEN(width);
|
int64_t (*sum1)[4] = sum0 + SUM_LEN(width);
|
||||||
|
|
||||||
@ -249,14 +249,14 @@ static float ssim_plane_16bit(SSIMDSPContext *dsp,
|
|||||||
return ssim / ((height - 1) * (width - 1));
|
return ssim / ((height - 1) * (width - 1));
|
||||||
}
|
}
|
||||||
|
|
||||||
static float ssim_plane(SSIMDSPContext *dsp,
|
static double ssim_plane(SSIMDSPContext *dsp,
|
||||||
uint8_t *main, int main_stride,
|
uint8_t *main, int main_stride,
|
||||||
uint8_t *ref, int ref_stride,
|
uint8_t *ref, int ref_stride,
|
||||||
int width, int height, void *temp,
|
int width, int height, void *temp,
|
||||||
int max)
|
int max)
|
||||||
{
|
{
|
||||||
int z = 0, y;
|
int z = 0, y;
|
||||||
float ssim = 0.0;
|
double ssim = 0.0;
|
||||||
int (*sum0)[4] = temp;
|
int (*sum0)[4] = temp;
|
||||||
int (*sum1)[4] = sum0 + SUM_LEN(width);
|
int (*sum1)[4] = sum0 + SUM_LEN(width);
|
||||||
|
|
||||||
@ -279,7 +279,7 @@ static float ssim_plane(SSIMDSPContext *dsp,
|
|||||||
|
|
||||||
static double ssim_db(double ssim, double weight)
|
static double ssim_db(double ssim, double weight)
|
||||||
{
|
{
|
||||||
return 10 * log10(weight / (weight - ssim));
|
return (fabs(weight - ssim) > 1e-9) ? 10.0 * log10(weight / (weight - ssim)) : INFINITY;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int do_ssim(FFFrameSync *fs)
|
static int do_ssim(FFFrameSync *fs)
|
||||||
@ -288,7 +288,7 @@ static int do_ssim(FFFrameSync *fs)
|
|||||||
SSIMContext *s = ctx->priv;
|
SSIMContext *s = ctx->priv;
|
||||||
AVFrame *master, *ref;
|
AVFrame *master, *ref;
|
||||||
AVDictionary **metadata;
|
AVDictionary **metadata;
|
||||||
float c[4], ssimv = 0.0;
|
double c[4] = { 0 }, ssimv = 0.0;
|
||||||
int ret, i;
|
int ret, i;
|
||||||
|
|
||||||
ret = ff_framesync_dualinput_get(fs, &master, &ref);
|
ret = ff_framesync_dualinput_get(fs, &master, &ref);
|
||||||
|
@ -169,8 +169,9 @@ SSIM_4X4_LINE 8
|
|||||||
%endif
|
%endif
|
||||||
|
|
||||||
INIT_XMM sse4
|
INIT_XMM sse4
|
||||||
cglobal ssim_end_line, 3, 3, 6, sum0, sum1, w
|
cglobal ssim_end_line, 3, 3, 7, sum0, sum1, w
|
||||||
pxor m0, m0
|
pxor m0, m0
|
||||||
|
pxor m6, m6
|
||||||
.loop:
|
.loop:
|
||||||
mova m1, [sum0q+mmsize*0]
|
mova m1, [sum0q+mmsize*0]
|
||||||
mova m2, [sum0q+mmsize*1]
|
mova m2, [sum0q+mmsize*1]
|
||||||
@ -214,34 +215,46 @@ cglobal ssim_end_line, 3, 3, 6, sum0, sum1, w
|
|||||||
mulps m4, m5
|
mulps m4, m5
|
||||||
mulps m3, m1
|
mulps m3, m1
|
||||||
divps m4, m3 ; ssim_endl
|
divps m4, m3 ; ssim_endl
|
||||||
addps m0, m4 ; ssim
|
mova m5, m4
|
||||||
|
cvtps2pd m3, m5
|
||||||
|
movhlps m5, m5
|
||||||
|
cvtps2pd m5, m5
|
||||||
|
addpd m0, m3 ; ssim
|
||||||
|
addpd m6, m5 ; ssim
|
||||||
add sum0q, mmsize*4
|
add sum0q, mmsize*4
|
||||||
add sum1q, mmsize*4
|
add sum1q, mmsize*4
|
||||||
sub wd, 4
|
sub wd, 4
|
||||||
jg .loop
|
jg .loop
|
||||||
|
|
||||||
; subps the ones we added too much
|
; subpd the ones we added too much
|
||||||
test wd, wd
|
test wd, wd
|
||||||
jz .end
|
jz .end
|
||||||
add wd, 4
|
add wd, 4
|
||||||
|
test wd, 3
|
||||||
|
jz .skip3
|
||||||
test wd, 2
|
test wd, 2
|
||||||
jz .skip2
|
jz .skip2
|
||||||
psrldq m4, 8
|
|
||||||
.skip2:
|
|
||||||
test wd, 1
|
test wd, 1
|
||||||
jz .skip1
|
jz .skip1
|
||||||
psrldq m4, 4
|
.skip3:
|
||||||
|
psrldq m5, 8
|
||||||
|
subpd m6, m5
|
||||||
|
jmp .end
|
||||||
|
.skip2:
|
||||||
|
psrldq m5, 8
|
||||||
|
subpd m6, m5
|
||||||
|
subpd m0, m3
|
||||||
|
jmp .end
|
||||||
.skip1:
|
.skip1:
|
||||||
subps m0, m4
|
psrldq m3, 16
|
||||||
|
subpd m6, m5
|
||||||
|
|
||||||
.end:
|
.end:
|
||||||
|
addpd m0, m6
|
||||||
movhlps m4, m0
|
movhlps m4, m0
|
||||||
addps m0, m4
|
addpd m0, m4
|
||||||
movss m4, m0
|
|
||||||
shufps m0, m0, 1
|
|
||||||
addss m0, m4
|
|
||||||
%if ARCH_X86_32
|
%if ARCH_X86_32
|
||||||
movss r0m, m0
|
movsd r0m, m0
|
||||||
fld r0mp
|
fld r0mp
|
||||||
%endif
|
%endif
|
||||||
RET
|
RET
|
||||||
|
@ -28,7 +28,7 @@ void ff_ssim_4x4_line_ssse3(const uint8_t *buf, ptrdiff_t buf_stride,
|
|||||||
void ff_ssim_4x4_line_xop (const uint8_t *buf, ptrdiff_t buf_stride,
|
void ff_ssim_4x4_line_xop (const uint8_t *buf, ptrdiff_t buf_stride,
|
||||||
const uint8_t *ref, ptrdiff_t ref_stride,
|
const uint8_t *ref, ptrdiff_t ref_stride,
|
||||||
int (*sums)[4], int w);
|
int (*sums)[4], int w);
|
||||||
float ff_ssim_end_line_sse4(const int (*sum0)[4], const int (*sum1)[4], int w);
|
double ff_ssim_end_line_sse4(const int (*sum0)[4], const int (*sum1)[4], int w);
|
||||||
|
|
||||||
void ff_ssim_init_x86(SSIMDSPContext *dsp)
|
void ff_ssim_init_x86(SSIMDSPContext *dsp)
|
||||||
{
|
{
|
||||||
|
Loading…
Reference in New Issue
Block a user