mirror of
https://github.com/facebook/zstd.git
synced 2025-09-16 09:36:32 +02:00
AArch64: Use better block copy8
The vector copy is only necessary for 16-byte blocks on AArch64. Decompression uplifts on a Neoverse V2 system, using Zstd-1.5.8 compiled with "-O3 -march=armv8.2-a+sve2": Clang-19 Clang-20 GCC-14 GCC-15 1#silesia.tar: +0.316% +0.865% +0.025% +0.096% 2#silesia.tar: +0.689% +1.374% +0.027% +0.065% 3#silesia.tar: +0.811% +1.654% +0.034% +0.033% 4#silesia.tar: +0.912% +1.755% +0.027% +0.042% 5#silesia.tar: +0.995% +1.826% +0.062% +0.094% 6#silesia.tar: +0.976% +1.777% +0.065% +0.104% 7#silesia.tar: +0.910% +1.738% +0.077% +0.110%
This commit is contained in:
@@ -168,7 +168,7 @@ static UNUSED_ATTR const U32 OF_defaultNormLog = OF_DEFAULTNORMLOG;
|
||||
* Shared functions to include for inlining
|
||||
*********************************************/
|
||||
static void ZSTD_copy8(void* dst, const void* src) {
|
||||
#if defined(ZSTD_ARCH_ARM_NEON)
|
||||
#if defined(ZSTD_ARCH_ARM_NEON) && !defined(__aarch64__)
|
||||
vst1_u8((uint8_t*)dst, vld1_u8((const uint8_t*)src));
|
||||
#else
|
||||
ZSTD_memcpy(dst, src, 8);
|
||||
|
Reference in New Issue
Block a user