diff --git a/config/c-compiler.m4 b/config/c-compiler.m4 index 689bb7f181..d530cf92c0 100644 --- a/config/c-compiler.m4 +++ b/config/c-compiler.m4 @@ -627,3 +627,34 @@ if test x"$Ac_cachevar" = x"yes"; then fi undefine([Ac_cachevar])dnl ])# PGAC_SSE42_CRC32_INTRINSICS + + +# PGAC_ARM64CE_CRC32C_INTRINSICS +# ----------------------- +# Check if the compiler supports the ARM64CE CRC32C instructions added in XXX +# using the __crc32cb, __crc32ch, __crc32cw, and __crc32cd intrinsic functions. +# +# An optional compiler flag can be passed as argument (e.g. -march=+crc). If the +# intrinsics are supported, sets pgac_arm64ce_crc32c_intrinsics, and CFLAGS_ARM64CE_CRC32C. +AC_DEFUN([PGAC_ARM64CE_CRC32C_INTRINSICS], +[define([Ac_cachevar], [AS_TR_SH([pgac_cv_arm64ce_crc32c_intrinsics_$1])])dnl +AC_CACHE_CHECK([for __crc32cb, __crc32ch, __crc32cw, and __crc32cd with CFLAGS=$1], [Ac_cachevar], +[pgac_save_CFLAGS=$CFLAGS +CFLAGS="$pgac_save_CFLAGS $1" +AC_LINK_IFELSE([AC_LANG_PROGRAM([#include ], + [unsigned int crc = 0; + crc = __crc32cb(crc, 0); + crc = __crc32ch(crc, 0); + crc = __crc32cw(crc, 0); + crc = __crc32cd(crc, 0); + /* return computed value, to prevent the above being optimized away */ + return crc == 0;])], + [Ac_cachevar=yes], + [Ac_cachevar=no]) +CFLAGS="$pgac_save_CFLAGS"]) +if test x"$Ac_cachevar" = x"yes"; then + CFLAGS_ARM64CE_CRC32C="$1" + pgac_arm64ce_crc32c_intrinsics=yes +fi +undefine([Ac_cachevar])dnl +])# PGAC_ARM64CE_CRC32C_INTRINSICS diff --git a/configure b/configure index 1242e310b4..9b1389df92 100755 --- a/configure +++ b/configure @@ -646,6 +646,7 @@ MSGMERGE MSGFMT_FLAGS MSGFMT PG_CRC32C_OBJS +CFLAGS_ARM64CE_CRC32C CFLAGS_SSE42 have_win32_dbghelp HAVE_IPV6 @@ -15509,28 +15510,175 @@ if ac_fn_c_try_compile "$LINENO"; then : fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +# Check for ARM64 CRC Extensions intrinsics to do CRC calculations. +# +# First check if __crc32c* intrinsics can be used with the default compiler +# flags. If not, check if adding -march=v8-a+crc flag helps. +# CFLAGS_ARM64CE_CRC32C is set if that's required. +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for __crc32cb, __crc32ch, __crc32cw, and __crc32cd with CFLAGS=" >&5 +$as_echo_n "checking for __crc32cb, __crc32ch, __crc32cw, and __crc32cd with CFLAGS=... " >&6; } +if ${pgac_cv_arm64ce_crc32c_intrinsics_+:} false; then : + $as_echo_n "(cached) " >&6 +else + pgac_save_CFLAGS=$CFLAGS +CFLAGS="$pgac_save_CFLAGS " +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +int +main () +{ +unsigned int crc = 0; + crc = __crc32cb(crc, 0); + crc = __crc32ch(crc, 0); + crc = __crc32cw(crc, 0); + crc = __crc32cd(crc, 0); + /* return computed value, to prevent the above being optimized away */ + return crc == 0; + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + pgac_cv_arm64ce_crc32c_intrinsics_=yes +else + pgac_cv_arm64ce_crc32c_intrinsics_=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +CFLAGS="$pgac_save_CFLAGS" +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv_arm64ce_crc32c_intrinsics_" >&5 +$as_echo "$pgac_cv_arm64ce_crc32c_intrinsics_" >&6; } +if test x"$pgac_cv_arm64ce_crc32c_intrinsics_" = x"yes"; then + CFLAGS_ARM64CE_CRC32C="" + pgac_arm64ce_crc32c_intrinsics=yes +fi + +if test x"$pgac_arm64ce_crc32c_intrinsics" != x"yes"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for __crc32cb, __crc32ch, __crc32cw, and __crc32cd with CFLAGS=-march=armv8-a+crc" >&5 +$as_echo_n "checking for __crc32cb, __crc32ch, __crc32cw, and __crc32cd with CFLAGS=-march=armv8-a+crc... " >&6; } +if ${pgac_cv_arm64ce_crc32c_intrinsics__march_armv8_apcrc+:} false; then : + $as_echo_n "(cached) " >&6 +else + pgac_save_CFLAGS=$CFLAGS +CFLAGS="$pgac_save_CFLAGS -march=armv8-a+crc" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +int +main () +{ +unsigned int crc = 0; + crc = __crc32cb(crc, 0); + crc = __crc32ch(crc, 0); + crc = __crc32cw(crc, 0); + crc = __crc32cd(crc, 0); + /* return computed value, to prevent the above being optimized away */ + return crc == 0; + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + pgac_cv_arm64ce_crc32c_intrinsics__march_armv8_apcrc=yes +else + pgac_cv_arm64ce_crc32c_intrinsics__march_armv8_apcrc=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +CFLAGS="$pgac_save_CFLAGS" +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv_arm64ce_crc32c_intrinsics__march_armv8_apcrc" >&5 +$as_echo "$pgac_cv_arm64ce_crc32c_intrinsics__march_armv8_apcrc" >&6; } +if test x"$pgac_cv_arm64ce_crc32c_intrinsics__march_armv8_apcrc" = x"yes"; then + CFLAGS_ARM64CE_CRC32C="-march=armv8-a+crc" + pgac_arm64ce_crc32c_intrinsics=yes +fi + +fi + + +# In order to detect at runtime, if the ARM64 CRC Extension is available, +# we will do "getauxval(AT_HWCAP) & HWCAP_CRC32". Check if we have +# everything we need for that. +for ac_func in getauxval +do : + ac_fn_c_check_func "$LINENO" "getauxval" "ac_cv_func_getauxval" +if test "x$ac_cv_func_getauxval" = xyes; then : + cat >>confdefs.h <<_ACEOF +#define HAVE_GETAUXVAL 1 +_ACEOF + +fi +done + +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +#include +#include + +int +main () +{ + +#ifndef AT_HWCAP +#error AT_HWCAP not defined +#endif +#ifndef HWCAP_CRC32 +#error HWCAP_CRC32 not defined +#endif + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + HAVE_HWCAP_CRC32=1 +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + # Select CRC-32C implementation. # -# If we are targeting a processor that has SSE 4.2 instructions, we can use the -# special CRC instructions for calculating CRC-32C. If we're not targeting such -# a processor, but we can nevertheless produce code that uses the SSE -# intrinsics, perhaps with some extra CFLAGS, compile both implementations and -# select which one to use at runtime, depending on whether SSE 4.2 is supported -# by the processor we're running on. +# If we are targeting a processor that has Intel SSE 4.2 instructions, we can +# use the special CRC instructions for calculating CRC-32C. If we're not +# targeting such a processor, but we can nevertheless produce code that uses +# the SSE intrinsics, perhaps with some extra CFLAGS, compile both +# implementations and select which one to use at runtime, depending on whether +# SSE 4.2 is supported by the processor we're running on. +# +# Similarly, if we are targeting an ARM processor that has CRC instructions +# that are part of the CRC Extension, use them, and if we're not targeting +# such a processor but can nevertheless produce code that uses the CRC +# instructions, compile both, and select at run time. # # You can override this logic by setting the appropriate USE_*_CRC32 flag to 1 # in the template or configure command line. -if test x"$USE_SSE42_CRC32C" = x"" && test x"$USE_SSE42_CRC32C_WITH_RUNTIME_CHECK" = x"" && test x"$USE_SLICING_BY_8_CRC32C" = x""; then +if test x"$USE_SLICING_BY_8_CRC32C" = x"" && test x"$USE_SSE42_CRC32C" = x"" && test x"$USE_SSE42_CRC32C_WITH_RUNTIME_CHECK" = x"" && test x"$USE_ARM64CE_CRC32C" = x"" && test x"$USE_ARM64CE_CRC32C_WITH_RUNTIME_CHECK" = x""; then + # Use Intel SSE 4.2 if available. if test x"$pgac_sse42_crc32_intrinsics" = x"yes" && test x"$SSE4_2_TARGETED" = x"1" ; then USE_SSE42_CRC32C=1 else - # the CPUID instruction is needed for the runtime check. + # Intel SSE 4.2, with runtime check? The CPUID instruction is needed for + # the runtime check. if test x"$pgac_sse42_crc32_intrinsics" = x"yes" && (test x"$pgac_cv__get_cpuid" = x"yes" || test x"$pgac_cv__cpuid" = x"yes"); then USE_SSE42_CRC32C_WITH_RUNTIME_CHECK=1 else - # fall back to slicing-by-8 algorithm which doesn't require any special - # CPU support. - USE_SLICING_BY_8_CRC32C=1 + # Use ARM64 CRC Extension if available. + if test x"$pgac_arm64ce_crc32c_intrinsics" = x"yes" && test x"$CFLAGS_ARM64CE_CRC32C" = x""; then + USE_ARM64CE_CRC32C=1 + else + # ARM64 CRC Extension, with runtime check? The getauxval() function and + # HWCAP_CRC32 are needed for the runtime check. + if test x"$pgac_arm64ce_crc32c_intrinsics" = x"yes" && test x"$ac_cv_func_getauxval" = x"yes" && test x"$HAVE_HWCAP_CRC32" = x"1"; then + USE_ARM64CE_CRC32C_WITH_RUNTIME_CHECK=1 + else + # fall back to slicing-by-8 algorithm which doesn't require any special + # CPU support. + USE_SLICING_BY_8_CRC32C=1 + fi + fi fi fi fi @@ -15550,16 +15698,34 @@ else $as_echo "#define USE_SSE42_CRC32C_WITH_RUNTIME_CHECK 1" >>confdefs.h - PG_CRC32C_OBJS="pg_crc32c_sse42.o pg_crc32c_sb8.o pg_crc32c_choose.o" + PG_CRC32C_OBJS="pg_crc32c_sse42.o pg_crc32c_sb8.o pg_crc32c_sse42_choose.o" { $as_echo "$as_me:${as_lineno-$LINENO}: result: SSE 4.2 with runtime check" >&5 $as_echo "SSE 4.2 with runtime check" >&6; } else + if test x"$USE_ARM64CE_CRC32C" = x"1"; then + +$as_echo "#define USE_ARM64CE_CRC32C 1" >>confdefs.h + + PG_CRC32C_OBJS="pg_crc32c_arm64ce.o" + { $as_echo "$as_me:${as_lineno-$LINENO}: result: ARM64 CE" >&5 +$as_echo "ARM64 CE" >&6; } + else + if test x"$USE_ARM64CE_CRC32C_WITH_RUNTIME_CHECK" = x"1"; then + +$as_echo "#define USE_ARM64CE_CRC32C_WITH_RUNTIME_CHECK 1" >>confdefs.h + + PG_CRC32C_OBJS="pg_crc32c_arm64ce.o pg_crc32c_sb8.o pg_crc32c_arm64ce_choose.o" + { $as_echo "$as_me:${as_lineno-$LINENO}: result: ARM64 CE with runtime check" >&5 +$as_echo "ARM64 CE with runtime check" >&6; } + else $as_echo "#define USE_SLICING_BY_8_CRC32C 1" >>confdefs.h - PG_CRC32C_OBJS="pg_crc32c_sb8.o" - { $as_echo "$as_me:${as_lineno-$LINENO}: result: slicing-by-8" >&5 + PG_CRC32C_OBJS="pg_crc32c_sb8.o" + { $as_echo "$as_me:${as_lineno-$LINENO}: result: slicing-by-8" >&5 $as_echo "slicing-by-8" >&6; } + fi + fi fi fi diff --git a/configure.in b/configure.in index aee3ab0867..4d2e61b231 100644 --- a/configure.in +++ b/configure.in @@ -1901,28 +1901,73 @@ AC_COMPILE_IFELSE([AC_LANG_PROGRAM([], [ #endif ])], [SSE4_2_TARGETED=1]) +# Check for ARM64 CRC Extensions intrinsics to do CRC calculations. +# +# First check if __crc32c* intrinsics can be used with the default compiler +# flags. If not, check if adding -march=v8-a+crc flag helps. +# CFLAGS_ARM64CE_CRC32C is set if that's required. +PGAC_ARM64CE_CRC32C_INTRINSICS([]) +if test x"$pgac_arm64ce_crc32c_intrinsics" != x"yes"; then + PGAC_ARM64CE_CRC32C_INTRINSICS([-march=armv8-a+crc]) +fi +AC_SUBST(CFLAGS_ARM64CE_CRC32C) + +# In order to detect at runtime, if the ARM64 CRC Extension is available, +# we will do "getauxval(AT_HWCAP) & HWCAP_CRC32". Check if we have +# everything we need for that. +AC_CHECK_FUNCS([getauxval]) +AC_COMPILE_IFELSE([AC_LANG_PROGRAM([ +#include +#include +], [ +#ifndef AT_HWCAP +#error AT_HWCAP not defined +#endif +#ifndef HWCAP_CRC32 +#error HWCAP_CRC32 not defined +#endif +])], [HAVE_HWCAP_CRC32=1]) + # Select CRC-32C implementation. # -# If we are targeting a processor that has SSE 4.2 instructions, we can use the -# special CRC instructions for calculating CRC-32C. If we're not targeting such -# a processor, but we can nevertheless produce code that uses the SSE -# intrinsics, perhaps with some extra CFLAGS, compile both implementations and -# select which one to use at runtime, depending on whether SSE 4.2 is supported -# by the processor we're running on. +# If we are targeting a processor that has Intel SSE 4.2 instructions, we can +# use the special CRC instructions for calculating CRC-32C. If we're not +# targeting such a processor, but we can nevertheless produce code that uses +# the SSE intrinsics, perhaps with some extra CFLAGS, compile both +# implementations and select which one to use at runtime, depending on whether +# SSE 4.2 is supported by the processor we're running on. +# +# Similarly, if we are targeting an ARM processor that has CRC instructions +# that are part of the CRC Extension, use them. And if we're not targeting +# such a processor, but can nevertheless produce code that uses the CRC +# instructions, compile both, and select at run time. # # You can override this logic by setting the appropriate USE_*_CRC32 flag to 1 # in the template or configure command line. -if test x"$USE_SSE42_CRC32C" = x"" && test x"$USE_SSE42_CRC32C_WITH_RUNTIME_CHECK" = x"" && test x"$USE_SLICING_BY_8_CRC32C" = x""; then +if test x"$USE_SLICING_BY_8_CRC32C" = x"" && test x"$USE_SSE42_CRC32C" = x"" && test x"$USE_SSE42_CRC32C_WITH_RUNTIME_CHECK" = x"" && test x"$USE_ARM64CE_CRC32C" = x"" && test x"$USE_ARM64CE_CRC32C_WITH_RUNTIME_CHECK" = x""; then + # Use Intel SSE 4.2 if available. if test x"$pgac_sse42_crc32_intrinsics" = x"yes" && test x"$SSE4_2_TARGETED" = x"1" ; then USE_SSE42_CRC32C=1 else - # the CPUID instruction is needed for the runtime check. + # Intel SSE 4.2, with runtime check? The CPUID instruction is needed for + # the runtime check. if test x"$pgac_sse42_crc32_intrinsics" = x"yes" && (test x"$pgac_cv__get_cpuid" = x"yes" || test x"$pgac_cv__cpuid" = x"yes"); then USE_SSE42_CRC32C_WITH_RUNTIME_CHECK=1 else - # fall back to slicing-by-8 algorithm which doesn't require any special - # CPU support. - USE_SLICING_BY_8_CRC32C=1 + # Use ARM64 CRC Extension if available. + if test x"$pgac_arm64ce_crc32c_intrinsics" = x"yes" && test x"$CFLAGS_ARM64CE_CRC32C" = x""; then + USE_ARM64CE_CRC32C=1 + else + # ARM64 CRC Extension, with runtime check? The getauxval() function and + # HWCAP_CRC32 are needed for the runtime check. + if test x"$pgac_arm64ce_crc32c_intrinsics" = x"yes" && test x"$ac_cv_func_getauxval" = x"yes" && test x"$HAVE_HWCAP_CRC32" = x"1"; then + USE_ARM64CE_CRC32C_WITH_RUNTIME_CHECK=1 + else + # fall back to slicing-by-8 algorithm which doesn't require any special + # CPU support. + USE_SLICING_BY_8_CRC32C=1 + fi + fi fi fi fi @@ -1936,12 +1981,24 @@ if test x"$USE_SSE42_CRC32C" = x"1"; then else if test x"$USE_SSE42_CRC32C_WITH_RUNTIME_CHECK" = x"1"; then AC_DEFINE(USE_SSE42_CRC32C_WITH_RUNTIME_CHECK, 1, [Define to 1 to use Intel SSSE 4.2 CRC instructions with a runtime check.]) - PG_CRC32C_OBJS="pg_crc32c_sse42.o pg_crc32c_sb8.o pg_crc32c_choose.o" + PG_CRC32C_OBJS="pg_crc32c_sse42.o pg_crc32c_sb8.o pg_crc32c_sse42_choose.o" AC_MSG_RESULT(SSE 4.2 with runtime check) else - AC_DEFINE(USE_SLICING_BY_8_CRC32C, 1, [Define to 1 to use Intel SSE 4.2 CRC instructions with a runtime check.]) - PG_CRC32C_OBJS="pg_crc32c_sb8.o" - AC_MSG_RESULT(slicing-by-8) + if test x"$USE_ARM64CE_CRC32C" = x"1"; then + AC_DEFINE(USE_ARM64CE_CRC32C, 1, [Define to 1 to use ARM64 CE CRC instructions.]) + PG_CRC32C_OBJS="pg_crc32c_arm64ce.o" + AC_MSG_RESULT(ARM64 CE) + else + if test x"$USE_ARM64CE_CRC32C_WITH_RUNTIME_CHECK" = x"1"; then + AC_DEFINE(USE_ARM64CE_CRC32C_WITH_RUNTIME_CHECK, 1, [Define to 1 to use ARM64 CE CRC instructions with a runtime check.]) + PG_CRC32C_OBJS="pg_crc32c_arm64ce.o pg_crc32c_sb8.o pg_crc32c_arm64ce_choose.o" + AC_MSG_RESULT(ARM64 CE with runtime check) + else + AC_DEFINE(USE_SLICING_BY_8_CRC32C, 1, [Define to 1 to use Intel SSE 4.2 CRC instructions with a runtime check.]) + PG_CRC32C_OBJS="pg_crc32c_sb8.o" + AC_MSG_RESULT(slicing-by-8) + fi + fi fi fi AC_SUBST(PG_CRC32C_OBJS) diff --git a/src/Makefile.global.in b/src/Makefile.global.in index dcb8dc5d90..1044642bcc 100644 --- a/src/Makefile.global.in +++ b/src/Makefile.global.in @@ -250,6 +250,7 @@ SUN_STUDIO_CC = @SUN_STUDIO_CC@ CFLAGS = @CFLAGS@ CFLAGS_VECTOR = @CFLAGS_VECTOR@ CFLAGS_SSE42 = @CFLAGS_SSE42@ +CFLAGS_ARM64CE_CRC32C = @CFLAGS_ARM64CE_CRC32C@ # Kind-of compilers diff --git a/src/include/pg_config.h.in b/src/include/pg_config.h.in index f98f773ff0..a683771535 100644 --- a/src/include/pg_config.h.in +++ b/src/include/pg_config.h.in @@ -814,6 +814,12 @@ /* Define to 1 if your declares `struct tm'. */ #undef TM_IN_SYS_TIME +/* Define to 1 to use ARM64 CE CRC instructions. */ +#undef USE_ARM64CE_CRC32C + +/* Define to 1 to use ARM64 CE CRC instructions with a runtime check. */ +#undef USE_ARM64CE_CRC32C_WITH_RUNTIME_CHECK + /* Define to 1 to build with assertion checks. (--enable-cassert) */ #undef USE_ASSERT_CHECKING diff --git a/src/include/port/pg_crc32c.h b/src/include/port/pg_crc32c.h index ae2701e958..f6f69fba68 100644 --- a/src/include/port/pg_crc32c.h +++ b/src/include/port/pg_crc32c.h @@ -42,26 +42,42 @@ typedef uint32 pg_crc32c; #define EQ_CRC32C(c1, c2) ((c1) == (c2)) #if defined(USE_SSE42_CRC32C) -/* Use SSE4.2 instructions. */ +/* Use Intel SSE4.2 instructions. */ #define COMP_CRC32C(crc, data, len) \ ((crc) = pg_comp_crc32c_sse42((crc), (data), (len))) #define FIN_CRC32C(crc) ((crc) ^= 0xFFFFFFFF) extern pg_crc32c pg_comp_crc32c_sse42(pg_crc32c crc, const void *data, size_t len); -#elif defined(USE_SSE42_CRC32C_WITH_RUNTIME_CHECK) +#elif defined(USE_ARM64CE_CRC32C) +/* Use ARM64 CRC Extensions instructions. */ + +#define COMP_CRC32C(crc, data, len) \ + ((crc) = pg_comp_crc32c_arm64((crc), (data), (len))) +#define FIN_CRC32C(crc) ((crc) ^= 0xFFFFFFFF) + +extern pg_crc32c pg_comp_crc32c_arm64(pg_crc32c crc, const void *data, size_t len); + +#elif defined(USE_SSE42_CRC32C_WITH_RUNTIME_CHECK) || defined(USE_ARM64CE_CRC32C_WITH_RUNTIME_CHECK) + /* - * Use SSE4.2 instructions, but perform a runtime check first to check that - * they are available. + * Use Intel SSE 4.2 or ARM64 instructions, but perform a runtime check first + * to check that they are available. */ #define COMP_CRC32C(crc, data, len) \ ((crc) = pg_comp_crc32c((crc), (data), (len))) #define FIN_CRC32C(crc) ((crc) ^= 0xFFFFFFFF) -extern pg_crc32c pg_comp_crc32c_sse42(pg_crc32c crc, const void *data, size_t len); extern pg_crc32c pg_comp_crc32c_sb8(pg_crc32c crc, const void *data, size_t len); extern pg_crc32c (*pg_comp_crc32c) (pg_crc32c crc, const void *data, size_t len); +#ifdef USE_SSE42_CRC32C_WITH_RUNTIME_CHECK +extern pg_crc32c pg_comp_crc32c_sse42(pg_crc32c crc, const void *data, size_t len); +#endif +#ifdef USE_ARM64CE_CRC32C_WITH_RUNTIME_CHECK +extern pg_crc32c pg_comp_crc32c_arm64(pg_crc32c crc, const void *data, size_t len); +#endif + #else /* * Use slicing-by-8 algorithm. diff --git a/src/port/Makefile b/src/port/Makefile index 81f01b25bb..519b8b1a11 100644 --- a/src/port/Makefile +++ b/src/port/Makefile @@ -65,6 +65,10 @@ thread.o: CFLAGS+=$(PTHREAD_CFLAGS) pg_crc32c_sse42.o: CFLAGS+=$(CFLAGS_SSE42) pg_crc32c_sse42_srv.o: CFLAGS+=$(CFLAGS_SSE42) +# pg_crc32c_arm64ce.o and its _srv.o version need CFLAGS_ARM64CE_CRC32C +pg_crc32c_arm64ce.o: CFLAGS+=$(CFLAGS_ARM64CE_CRC32C) +pg_crc32c_arm64ce_srv.o: CFLAGS+=$(CFLAGS_ARM64CE_CRC32C) + # # Server versions of object files # diff --git a/src/port/pg_crc32c_arm64ce.c b/src/port/pg_crc32c_arm64ce.c new file mode 100644 index 0000000000..bfbeef6dfb --- /dev/null +++ b/src/port/pg_crc32c_arm64ce.c @@ -0,0 +1,50 @@ +/*------------------------------------------------------------------------- + * + * pg_crc32c_arm64ce.c + * Compute CRC-32C checksum using ARM64 CRC Extension instructions + * + * Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/port/pg_crc32c_arm64ce.c + * + *------------------------------------------------------------------------- + */ +#include "c.h" + +#include "port/pg_crc32c.h" + +#include + +pg_crc32c +pg_comp_crc32c_arm64(pg_crc32c crc, const void *data, size_t len) +{ + const unsigned char *p = data; + const unsigned char *pend = p + len; + + while (p + 8 <= pend) + { + crc = __crc32cd(crc, *(uint64 *) p); + p += 8; + } + + if (p + 4 <= pend) + { + crc = __crc32cw(crc, *(uint32 *) p); + p += 4; + } + + if (p + 2 <= pend) + { + crc = __crc32ch(crc, *(uint16 *) p); + p += 2; + } + + if (p < pend) + { + crc = __crc32cb(crc, *p); + } + return crc; +} diff --git a/src/port/pg_crc32c_arm64ce_choose.c b/src/port/pg_crc32c_arm64ce_choose.c new file mode 100644 index 0000000000..0d45ca726a --- /dev/null +++ b/src/port/pg_crc32c_arm64ce_choose.c @@ -0,0 +1,50 @@ +/*------------------------------------------------------------------------- + * + * pg_crc32c_arm64ce_choose.c + * Choose which CRC-32C implementation to use, at runtime. + * + * Use the special CRC instructions introduced in ARMv8 CRC Extension, if + * available on the platform we're running on, but fall back to the + * slicing-by-8 implementation otherwise. + * + * Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/port/pg_crc32c_arm64ce_choose.c + * + *------------------------------------------------------------------------- + */ + +#include "c.h" + +#include +#include + +#include "port/pg_crc32c.h" + +static bool +pg_crc32c_arm64ce_available(void) +{ + unsigned long auxv = getauxval(AT_HWCAP); + + return (auxv & HWCAP_CRC32) != 0; +} + +/* + * This gets called on the first call. It replaces the function pointer + * so that subsequent calls are routed directly to the chosen implementation. + */ +static pg_crc32c +pg_comp_crc32c_choose(pg_crc32c crc, const void *data, size_t len) +{ + if (pg_crc32c_arm64ce_available()) + pg_comp_crc32c = pg_comp_crc32c_arm64; + else + pg_comp_crc32c = pg_comp_crc32c_sb8; + + return pg_comp_crc32c(crc, data, len); +} + +pg_crc32c (*pg_comp_crc32c) (pg_crc32c crc, const void *data, size_t len) = pg_comp_crc32c_choose; diff --git a/src/port/pg_crc32c_choose.c b/src/port/pg_crc32c_sse42_choose.c similarity index 87% rename from src/port/pg_crc32c_choose.c rename to src/port/pg_crc32c_sse42_choose.c index 40bee67b0a..cde38d8dbf 100644 --- a/src/port/pg_crc32c_choose.c +++ b/src/port/pg_crc32c_sse42_choose.c @@ -1,10 +1,10 @@ /*------------------------------------------------------------------------- * - * pg_crc32c_choose.c + * pg_crc32c_sse42_choose.c * Choose which CRC-32C implementation to use, at runtime. * - * Try to the special CRC instructions introduced in Intel SSE 4.2, - * if available on the platform we're running on, but fall back to the + * Use the special CRC instructions introduced in Intel SSE 4.2, if + * available on the platform we're running on, but fall back to the * slicing-by-8 implementation otherwise. * * Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group @@ -12,7 +12,7 @@ * * * IDENTIFICATION - * src/port/pg_crc32c_choose.c + * src/port/pg_crc32c_sse42_choose.c * *------------------------------------------------------------------------- */