1
0
mirror of https://github.com/FFmpeg/FFmpeg.git synced 2025-08-04 22:03:09 +02:00

avutil/x86/aes: remove a few branches

The rounds value is constant and can be one of three hardcoded values, so
instead of checking it on every loop, just split the function into three
different implementations for each value.

Before:
aes_decrypt_128_aesni:                                  93.8 (47.58x)
aes_decrypt_192_aesni:                                 106.9 (49.30x)
aes_decrypt_256_aesni:                                 109.8 (56.50x)
aes_encrypt_128_aesni:                                  93.2 (47.70x)
aes_encrypt_192_aesni:                                 111.1 (48.36x)
aes_encrypt_256_aesni:                                 113.6 (56.27x)

After:
aes_decrypt_128_aesni:                                  71.5 (63.31x)
aes_decrypt_192_aesni:                                  96.8 (55.64x)
aes_decrypt_256_aesni:                                 106.1 (58.51x)
aes_encrypt_128_aesni:                                  81.3 (55.92x)
aes_encrypt_192_aesni:                                  91.2 (59.78x)
aes_encrypt_256_aesni:                                 109.0 (58.26x)

Signed-off-by: James Almer <jamrial@gmail.com>
This commit is contained in:
James Almer
2025-04-07 12:51:54 -03:00
parent 4d4b301e4a
commit a039726c2a
3 changed files with 37 additions and 22 deletions

View File

@ -234,6 +234,7 @@ int av_aes_init(AVAES *a, const uint8_t *key, int key_bits, int decrypt)
int KC = key_bits >> 5; int KC = key_bits >> 5;
int rounds = KC + 6; int rounds = KC + 6;
a->rounds = rounds;
a->crypt = decrypt ? aes_decrypt : aes_encrypt; a->crypt = decrypt ? aes_decrypt : aes_encrypt;
if (ARCH_X86) if (ARCH_X86)
ff_init_aes_x86(a, decrypt); ff_init_aes_x86(a, decrypt);
@ -243,8 +244,6 @@ int av_aes_init(AVAES *a, const uint8_t *key, int key_bits, int decrypt)
if (key_bits != 128 && key_bits != 192 && key_bits != 256) if (key_bits != 128 && key_bits != 192 && key_bits != 256)
return AVERROR(EINVAL); return AVERROR(EINVAL);
a->rounds = rounds;
memcpy(tk, key, KC * 4); memcpy(tk, key, KC * 4);
memcpy(a->round_key[0].u8, key, KC * 4); memcpy(a->round_key[0].u8, key, KC * 4);

View File

@ -26,13 +26,12 @@ SECTION .text
; void ff_aes_decrypt(AVAES *a, uint8_t *dst, const uint8_t *src, ; void ff_aes_decrypt(AVAES *a, uint8_t *dst, const uint8_t *src,
; int count, uint8_t *iv, int rounds) ; int count, uint8_t *iv, int rounds)
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
%macro AES_CRYPT 1 %macro AES_CRYPT 2
cglobal aes_%1rypt, 6,6,2 cglobal aes_%1rypt_%2, 5, 5, 2
test r3d, r3d test r3d, r3d
je .ret je .ret
shl r3d, 4 shl r3d, 4
add r5d, r5d add r0, 0x70
add r0, 0x60
add r2, r3 add r2, r3
add r1, r3 add r1, r3
neg r3 neg r3
@ -45,17 +44,15 @@ cglobal aes_%1rypt, 6,6,2
%ifidn %1, enc %ifidn %1, enc
pxor m0, m1 pxor m0, m1
%endif %endif
pxor m0, [r0+8*r5-0x60] pxor m0, [r0+8*2*%2-0x70]
cmp r5d, 24 %if %2 > 10
je .rounds12 %if %2 > 12
jl .rounds10
aes%1 m0, [r0+0x70]
aes%1 m0, [r0+0x60] aes%1 m0, [r0+0x60]
.rounds12:
aes%1 m0, [r0+0x50] aes%1 m0, [r0+0x50]
%endif
aes%1 m0, [r0+0x40] aes%1 m0, [r0+0x40]
.rounds10:
aes%1 m0, [r0+0x30] aes%1 m0, [r0+0x30]
%endif
aes%1 m0, [r0+0x20] aes%1 m0, [r0+0x20]
aes%1 m0, [r0+0x10] aes%1 m0, [r0+0x10]
aes%1 m0, [r0+0x00] aes%1 m0, [r0+0x00]
@ -64,7 +61,8 @@ cglobal aes_%1rypt, 6,6,2
aes%1 m0, [r0-0x30] aes%1 m0, [r0-0x30]
aes%1 m0, [r0-0x40] aes%1 m0, [r0-0x40]
aes%1 m0, [r0-0x50] aes%1 m0, [r0-0x50]
aes%1last m0, [r0-0x60] aes%1 m0, [r0-0x60]
aes%1last m0, [r0-0x70]
test r4, r4 test r4, r4
je .noiv je .noiv
%ifidn %1, enc %ifidn %1, enc
@ -90,6 +88,10 @@ cglobal aes_%1rypt, 6,6,2
%if HAVE_AESNI_EXTERNAL %if HAVE_AESNI_EXTERNAL
INIT_XMM aesni INIT_XMM aesni
AES_CRYPT enc AES_CRYPT enc, 10
AES_CRYPT dec AES_CRYPT enc, 12
AES_CRYPT enc, 14
AES_CRYPT dec, 10
AES_CRYPT dec, 12
AES_CRYPT dec, 14
%endif %endif

View File

@ -22,15 +22,29 @@
#include "libavutil/aes_internal.h" #include "libavutil/aes_internal.h"
#include "libavutil/x86/cpu.h" #include "libavutil/x86/cpu.h"
void ff_aes_decrypt_aesni(AVAES *a, uint8_t *dst, const uint8_t *src, void ff_aes_decrypt_10_aesni(AVAES *a, uint8_t *dst, const uint8_t *src,
int count, uint8_t *iv, int rounds); int count, uint8_t *iv, int rounds);
void ff_aes_encrypt_aesni(AVAES *a, uint8_t *dst, const uint8_t *src, void ff_aes_decrypt_12_aesni(AVAES *a, uint8_t *dst, const uint8_t *src,
int count, uint8_t *iv, int rounds);
void ff_aes_decrypt_14_aesni(AVAES *a, uint8_t *dst, const uint8_t *src,
int count, uint8_t *iv, int rounds);
void ff_aes_encrypt_10_aesni(AVAES *a, uint8_t *dst, const uint8_t *src,
int count, uint8_t *iv, int rounds);
void ff_aes_encrypt_12_aesni(AVAES *a, uint8_t *dst, const uint8_t *src,
int count, uint8_t *iv, int rounds);
void ff_aes_encrypt_14_aesni(AVAES *a, uint8_t *dst, const uint8_t *src,
int count, uint8_t *iv, int rounds); int count, uint8_t *iv, int rounds);
void ff_init_aes_x86(AVAES *a, int decrypt) void ff_init_aes_x86(AVAES *a, int decrypt)
{ {
int cpu_flags = av_get_cpu_flags(); int cpu_flags = av_get_cpu_flags();
if (EXTERNAL_AESNI(cpu_flags)) if (EXTERNAL_AESNI(cpu_flags)) {
a->crypt = decrypt ? ff_aes_decrypt_aesni : ff_aes_encrypt_aesni; if (a->rounds == 10)
a->crypt = decrypt ? ff_aes_decrypt_10_aesni : ff_aes_encrypt_10_aesni;
else if (a->rounds == 12)
a->crypt = decrypt ? ff_aes_decrypt_12_aesni : ff_aes_encrypt_12_aesni;
else if (a->rounds == 14)
a->crypt = decrypt ? ff_aes_decrypt_14_aesni : ff_aes_encrypt_14_aesni;
}
} }