You've already forked FFmpeg
mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2025-08-04 22:03:09 +02:00
avutil/x86/aes: remove a few branches
The rounds value is constant and can be one of three hardcoded values, so instead of checking it on every loop, just split the function into three different implementations for each value. Before: aes_decrypt_128_aesni: 93.8 (47.58x) aes_decrypt_192_aesni: 106.9 (49.30x) aes_decrypt_256_aesni: 109.8 (56.50x) aes_encrypt_128_aesni: 93.2 (47.70x) aes_encrypt_192_aesni: 111.1 (48.36x) aes_encrypt_256_aesni: 113.6 (56.27x) After: aes_decrypt_128_aesni: 71.5 (63.31x) aes_decrypt_192_aesni: 96.8 (55.64x) aes_decrypt_256_aesni: 106.1 (58.51x) aes_encrypt_128_aesni: 81.3 (55.92x) aes_encrypt_192_aesni: 91.2 (59.78x) aes_encrypt_256_aesni: 109.0 (58.26x) Signed-off-by: James Almer <jamrial@gmail.com>
This commit is contained in:
@ -234,6 +234,7 @@ int av_aes_init(AVAES *a, const uint8_t *key, int key_bits, int decrypt)
|
|||||||
int KC = key_bits >> 5;
|
int KC = key_bits >> 5;
|
||||||
int rounds = KC + 6;
|
int rounds = KC + 6;
|
||||||
|
|
||||||
|
a->rounds = rounds;
|
||||||
a->crypt = decrypt ? aes_decrypt : aes_encrypt;
|
a->crypt = decrypt ? aes_decrypt : aes_encrypt;
|
||||||
if (ARCH_X86)
|
if (ARCH_X86)
|
||||||
ff_init_aes_x86(a, decrypt);
|
ff_init_aes_x86(a, decrypt);
|
||||||
@ -243,8 +244,6 @@ int av_aes_init(AVAES *a, const uint8_t *key, int key_bits, int decrypt)
|
|||||||
if (key_bits != 128 && key_bits != 192 && key_bits != 256)
|
if (key_bits != 128 && key_bits != 192 && key_bits != 256)
|
||||||
return AVERROR(EINVAL);
|
return AVERROR(EINVAL);
|
||||||
|
|
||||||
a->rounds = rounds;
|
|
||||||
|
|
||||||
memcpy(tk, key, KC * 4);
|
memcpy(tk, key, KC * 4);
|
||||||
memcpy(a->round_key[0].u8, key, KC * 4);
|
memcpy(a->round_key[0].u8, key, KC * 4);
|
||||||
|
|
||||||
|
@ -26,13 +26,12 @@ SECTION .text
|
|||||||
; void ff_aes_decrypt(AVAES *a, uint8_t *dst, const uint8_t *src,
|
; void ff_aes_decrypt(AVAES *a, uint8_t *dst, const uint8_t *src,
|
||||||
; int count, uint8_t *iv, int rounds)
|
; int count, uint8_t *iv, int rounds)
|
||||||
;-----------------------------------------------------------------------------
|
;-----------------------------------------------------------------------------
|
||||||
%macro AES_CRYPT 1
|
%macro AES_CRYPT 2
|
||||||
cglobal aes_%1rypt, 6,6,2
|
cglobal aes_%1rypt_%2, 5, 5, 2
|
||||||
test r3d, r3d
|
test r3d, r3d
|
||||||
je .ret
|
je .ret
|
||||||
shl r3d, 4
|
shl r3d, 4
|
||||||
add r5d, r5d
|
add r0, 0x70
|
||||||
add r0, 0x60
|
|
||||||
add r2, r3
|
add r2, r3
|
||||||
add r1, r3
|
add r1, r3
|
||||||
neg r3
|
neg r3
|
||||||
@ -45,17 +44,15 @@ cglobal aes_%1rypt, 6,6,2
|
|||||||
%ifidn %1, enc
|
%ifidn %1, enc
|
||||||
pxor m0, m1
|
pxor m0, m1
|
||||||
%endif
|
%endif
|
||||||
pxor m0, [r0+8*r5-0x60]
|
pxor m0, [r0+8*2*%2-0x70]
|
||||||
cmp r5d, 24
|
%if %2 > 10
|
||||||
je .rounds12
|
%if %2 > 12
|
||||||
jl .rounds10
|
|
||||||
aes%1 m0, [r0+0x70]
|
|
||||||
aes%1 m0, [r0+0x60]
|
aes%1 m0, [r0+0x60]
|
||||||
.rounds12:
|
|
||||||
aes%1 m0, [r0+0x50]
|
aes%1 m0, [r0+0x50]
|
||||||
|
%endif
|
||||||
aes%1 m0, [r0+0x40]
|
aes%1 m0, [r0+0x40]
|
||||||
.rounds10:
|
|
||||||
aes%1 m0, [r0+0x30]
|
aes%1 m0, [r0+0x30]
|
||||||
|
%endif
|
||||||
aes%1 m0, [r0+0x20]
|
aes%1 m0, [r0+0x20]
|
||||||
aes%1 m0, [r0+0x10]
|
aes%1 m0, [r0+0x10]
|
||||||
aes%1 m0, [r0+0x00]
|
aes%1 m0, [r0+0x00]
|
||||||
@ -64,7 +61,8 @@ cglobal aes_%1rypt, 6,6,2
|
|||||||
aes%1 m0, [r0-0x30]
|
aes%1 m0, [r0-0x30]
|
||||||
aes%1 m0, [r0-0x40]
|
aes%1 m0, [r0-0x40]
|
||||||
aes%1 m0, [r0-0x50]
|
aes%1 m0, [r0-0x50]
|
||||||
aes%1last m0, [r0-0x60]
|
aes%1 m0, [r0-0x60]
|
||||||
|
aes%1last m0, [r0-0x70]
|
||||||
test r4, r4
|
test r4, r4
|
||||||
je .noiv
|
je .noiv
|
||||||
%ifidn %1, enc
|
%ifidn %1, enc
|
||||||
@ -90,6 +88,10 @@ cglobal aes_%1rypt, 6,6,2
|
|||||||
|
|
||||||
%if HAVE_AESNI_EXTERNAL
|
%if HAVE_AESNI_EXTERNAL
|
||||||
INIT_XMM aesni
|
INIT_XMM aesni
|
||||||
AES_CRYPT enc
|
AES_CRYPT enc, 10
|
||||||
AES_CRYPT dec
|
AES_CRYPT enc, 12
|
||||||
|
AES_CRYPT enc, 14
|
||||||
|
AES_CRYPT dec, 10
|
||||||
|
AES_CRYPT dec, 12
|
||||||
|
AES_CRYPT dec, 14
|
||||||
%endif
|
%endif
|
||||||
|
@ -22,15 +22,29 @@
|
|||||||
#include "libavutil/aes_internal.h"
|
#include "libavutil/aes_internal.h"
|
||||||
#include "libavutil/x86/cpu.h"
|
#include "libavutil/x86/cpu.h"
|
||||||
|
|
||||||
void ff_aes_decrypt_aesni(AVAES *a, uint8_t *dst, const uint8_t *src,
|
void ff_aes_decrypt_10_aesni(AVAES *a, uint8_t *dst, const uint8_t *src,
|
||||||
int count, uint8_t *iv, int rounds);
|
int count, uint8_t *iv, int rounds);
|
||||||
void ff_aes_encrypt_aesni(AVAES *a, uint8_t *dst, const uint8_t *src,
|
void ff_aes_decrypt_12_aesni(AVAES *a, uint8_t *dst, const uint8_t *src,
|
||||||
int count, uint8_t *iv, int rounds);
|
int count, uint8_t *iv, int rounds);
|
||||||
|
void ff_aes_decrypt_14_aesni(AVAES *a, uint8_t *dst, const uint8_t *src,
|
||||||
|
int count, uint8_t *iv, int rounds);
|
||||||
|
void ff_aes_encrypt_10_aesni(AVAES *a, uint8_t *dst, const uint8_t *src,
|
||||||
|
int count, uint8_t *iv, int rounds);
|
||||||
|
void ff_aes_encrypt_12_aesni(AVAES *a, uint8_t *dst, const uint8_t *src,
|
||||||
|
int count, uint8_t *iv, int rounds);
|
||||||
|
void ff_aes_encrypt_14_aesni(AVAES *a, uint8_t *dst, const uint8_t *src,
|
||||||
|
int count, uint8_t *iv, int rounds);
|
||||||
|
|
||||||
void ff_init_aes_x86(AVAES *a, int decrypt)
|
void ff_init_aes_x86(AVAES *a, int decrypt)
|
||||||
{
|
{
|
||||||
int cpu_flags = av_get_cpu_flags();
|
int cpu_flags = av_get_cpu_flags();
|
||||||
|
|
||||||
if (EXTERNAL_AESNI(cpu_flags))
|
if (EXTERNAL_AESNI(cpu_flags)) {
|
||||||
a->crypt = decrypt ? ff_aes_decrypt_aesni : ff_aes_encrypt_aesni;
|
if (a->rounds == 10)
|
||||||
|
a->crypt = decrypt ? ff_aes_decrypt_10_aesni : ff_aes_encrypt_10_aesni;
|
||||||
|
else if (a->rounds == 12)
|
||||||
|
a->crypt = decrypt ? ff_aes_decrypt_12_aesni : ff_aes_encrypt_12_aesni;
|
||||||
|
else if (a->rounds == 14)
|
||||||
|
a->crypt = decrypt ? ff_aes_decrypt_14_aesni : ff_aes_encrypt_14_aesni;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user