You've already forked FFmpeg
mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2025-08-04 22:03:09 +02:00
lavu/aes: add x86 AESNI optimizations
crypto_bench comparison for AES-128-ECB: lavu_aesni AES-128-ECB size: 1048576 runs: 1024 time: 0.596 +- 0.081 lavu_c AES-128-ECB size: 1048576 runs: 1024 time: 17.007 +- 2.131 crypto AES-128-ECB size: 1048576 runs: 1024 time: 0.612 +- 1.857 gcrypt AES-128-ECB size: 1048576 runs: 1024 time: 1.123 +- 0.224 tomcrypt AES-128-ECB size: 1048576 runs: 1024 time: 9.038 +- 0.790 Improved-By: Henrik Gramner <henrik@gramner.com> Signed-off-by: James Almer <jamrial@gmail.com>
This commit is contained in:
committed by
James Almer
parent
2daaafafc6
commit
2ea3c51795
@ -208,6 +208,8 @@ int av_aes_init(AVAES *a, const uint8_t *key, int key_bits, int decrypt)
|
|||||||
uint8_t alog8[512];
|
uint8_t alog8[512];
|
||||||
|
|
||||||
a->crypt = decrypt ? aes_decrypt : aes_encrypt;
|
a->crypt = decrypt ? aes_decrypt : aes_encrypt;
|
||||||
|
if (ARCH_X86)
|
||||||
|
ff_init_aes_x86(a, decrypt);
|
||||||
|
|
||||||
if (!enc_multbl[FF_ARRAY_ELEMS(enc_multbl) - 1][FF_ARRAY_ELEMS(enc_multbl[0]) - 1]) {
|
if (!enc_multbl[FF_ARRAY_ELEMS(enc_multbl) - 1][FF_ARRAY_ELEMS(enc_multbl[0]) - 1]) {
|
||||||
j = 1;
|
j = 1;
|
||||||
|
@ -40,4 +40,6 @@ typedef struct AVAES {
|
|||||||
void (*crypt)(struct AVAES *a, uint8_t *dst, const uint8_t *src, int count, uint8_t *iv, int rounds);
|
void (*crypt)(struct AVAES *a, uint8_t *dst, const uint8_t *src, int count, uint8_t *iv, int rounds);
|
||||||
} AVAES;
|
} AVAES;
|
||||||
|
|
||||||
|
void ff_init_aes_x86(AVAES *a, int decrypt);
|
||||||
|
|
||||||
#endif /* AVUTIL_AES_INTERNAL_H */
|
#endif /* AVUTIL_AES_INTERNAL_H */
|
||||||
|
@ -1,4 +1,5 @@
|
|||||||
OBJS += x86/cpu.o \
|
OBJS += x86/aes_init.o \
|
||||||
|
x86/cpu.o \
|
||||||
x86/fixed_dsp_init.o \
|
x86/fixed_dsp_init.o \
|
||||||
x86/float_dsp_init.o \
|
x86/float_dsp_init.o \
|
||||||
x86/imgutils_init.o \
|
x86/imgutils_init.o \
|
||||||
@ -10,7 +11,8 @@ OBJS-$(CONFIG_PIXELUTILS) += x86/pixelutils_init.o \
|
|||||||
|
|
||||||
EMMS_OBJS_$(HAVE_MMX_INLINE)_$(HAVE_MMX_EXTERNAL)_$(HAVE_MM_EMPTY) = x86/emms.o
|
EMMS_OBJS_$(HAVE_MMX_INLINE)_$(HAVE_MMX_EXTERNAL)_$(HAVE_MM_EMPTY) = x86/emms.o
|
||||||
|
|
||||||
X86ASM-OBJS += x86/cpuid.o \
|
X86ASM-OBJS += x86/aes.o \
|
||||||
|
x86/cpuid.o \
|
||||||
$(EMMS_OBJS__yes_) \
|
$(EMMS_OBJS__yes_) \
|
||||||
x86/fixed_dsp.o \
|
x86/fixed_dsp.o \
|
||||||
x86/float_dsp.o \
|
x86/float_dsp.o \
|
||||||
|
95
libavutil/x86/aes.asm
Normal file
95
libavutil/x86/aes.asm
Normal file
@ -0,0 +1,95 @@
|
|||||||
|
;*****************************************************************************
|
||||||
|
;* Copyright (c) 2015 Rodger Combs <rodger.combs@gmail.com>
|
||||||
|
;*
|
||||||
|
;* This file is part of FFmpeg.
|
||||||
|
;*
|
||||||
|
;* FFmpeg is free software; you can redistribute it and/or
|
||||||
|
;* modify it under the terms of the GNU Lesser General Public
|
||||||
|
;* License as published by the Free Software Foundation; either
|
||||||
|
;* version 2.1 of the License, or (at your option) any later version.
|
||||||
|
;*
|
||||||
|
;* FFmpeg is distributed in the hope that it will be useful,
|
||||||
|
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
;* Lesser General Public License for more details.
|
||||||
|
;*
|
||||||
|
;* You should have received a copy of the GNU Lesser General Public
|
||||||
|
;* License along with FFmpeg; if not, write to the Free Software
|
||||||
|
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||||
|
;******************************************************************************
|
||||||
|
|
||||||
|
%include "x86util.asm"
|
||||||
|
|
||||||
|
SECTION .text
|
||||||
|
|
||||||
|
;-----------------------------------------------------------------------------
|
||||||
|
; void ff_aes_decrypt(AVAES *a, uint8_t *dst, const uint8_t *src,
|
||||||
|
; int count, uint8_t *iv, int rounds)
|
||||||
|
;-----------------------------------------------------------------------------
|
||||||
|
%macro AES_CRYPT 1
|
||||||
|
cglobal aes_%1rypt, 6,6,2
|
||||||
|
test r3, r3
|
||||||
|
je .ret
|
||||||
|
shl r3d, 4
|
||||||
|
add r5d, r5d
|
||||||
|
add r0, 0x60
|
||||||
|
add r2, r3
|
||||||
|
add r1, r3
|
||||||
|
neg r3
|
||||||
|
pxor m1, m1
|
||||||
|
test r4, r4
|
||||||
|
je .block
|
||||||
|
movu m1, [r4] ; iv
|
||||||
|
.block:
|
||||||
|
movu m0, [r2+r3] ; state
|
||||||
|
%ifidn %1, enc
|
||||||
|
pxor m0, m1
|
||||||
|
%endif
|
||||||
|
pxor m0, [r0+8*r5-0x60]
|
||||||
|
cmp r5d, 24
|
||||||
|
je .rounds12
|
||||||
|
jl .rounds10
|
||||||
|
aes%1 m0, [r0+0x70]
|
||||||
|
aes%1 m0, [r0+0x60]
|
||||||
|
.rounds12:
|
||||||
|
aes%1 m0, [r0+0x50]
|
||||||
|
aes%1 m0, [r0+0x40]
|
||||||
|
.rounds10:
|
||||||
|
aes%1 m0, [r0+0x30]
|
||||||
|
aes%1 m0, [r0+0x20]
|
||||||
|
aes%1 m0, [r0+0x10]
|
||||||
|
aes%1 m0, [r0+0x00]
|
||||||
|
aes%1 m0, [r0-0x10]
|
||||||
|
aes%1 m0, [r0-0x20]
|
||||||
|
aes%1 m0, [r0-0x30]
|
||||||
|
aes%1 m0, [r0-0x40]
|
||||||
|
aes%1 m0, [r0-0x50]
|
||||||
|
aes%1last m0, [r0-0x60]
|
||||||
|
test r4, r4
|
||||||
|
je .noiv
|
||||||
|
%ifidn %1, enc
|
||||||
|
mova m1, m0
|
||||||
|
%else
|
||||||
|
pxor m0, m1
|
||||||
|
movu m1, [r2+r3]
|
||||||
|
%endif
|
||||||
|
.noiv:
|
||||||
|
movu [r1+r3], m0
|
||||||
|
add r3, 16
|
||||||
|
jl .block
|
||||||
|
test r4, r4
|
||||||
|
je .ret
|
||||||
|
%ifidn %1, dec
|
||||||
|
movu [r4], m1
|
||||||
|
%else
|
||||||
|
movu [r4], m0
|
||||||
|
%endif
|
||||||
|
.ret:
|
||||||
|
REP_RET
|
||||||
|
%endmacro
|
||||||
|
|
||||||
|
%if HAVE_AESNI_EXTERNAL
|
||||||
|
INIT_XMM aesni
|
||||||
|
AES_CRYPT enc
|
||||||
|
AES_CRYPT dec
|
||||||
|
%endif
|
36
libavutil/x86/aes_init.c
Normal file
36
libavutil/x86/aes_init.c
Normal file
@ -0,0 +1,36 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2015 Rodger Combs <rodger.combs@gmail.com>
|
||||||
|
*
|
||||||
|
* This file is part of FFmpeg.
|
||||||
|
*
|
||||||
|
* FFmpeg is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU Lesser General Public
|
||||||
|
* License as published by the Free Software Foundation; either
|
||||||
|
* version 2.1 of the License, or (at your option) any later version.
|
||||||
|
*
|
||||||
|
* FFmpeg is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
* Lesser General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU Lesser General Public
|
||||||
|
* License along with FFmpeg; if not, write to the Free Software
|
||||||
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <stddef.h>
|
||||||
|
#include "libavutil/aes_internal.h"
|
||||||
|
#include "libavutil/x86/cpu.h"
|
||||||
|
|
||||||
|
void ff_aes_decrypt_aesni(AVAES *a, uint8_t *dst, const uint8_t *src,
|
||||||
|
int count, uint8_t *iv, int rounds);
|
||||||
|
void ff_aes_encrypt_aesni(AVAES *a, uint8_t *dst, const uint8_t *src,
|
||||||
|
int count, uint8_t *iv, int rounds);
|
||||||
|
|
||||||
|
void ff_init_aes_x86(AVAES *a, int decrypt)
|
||||||
|
{
|
||||||
|
int cpu_flags = av_get_cpu_flags();
|
||||||
|
|
||||||
|
if (EXTERNAL_AESNI(cpu_flags))
|
||||||
|
a->crypt = decrypt ? ff_aes_decrypt_aesni : ff_aes_encrypt_aesni;
|
||||||
|
}
|
Reference in New Issue
Block a user