Skip to content

Commit

Permalink
Version 5.5. Added IBM XL compiler intrinsics support, added "always_…
Browse files Browse the repository at this point in the history
…inline" attribute to internal functions.
  • Loading branch information
avaneev committed Aug 9, 2023
1 parent 7a27918 commit 339a7f5
Show file tree
Hide file tree
Showing 2 changed files with 94 additions and 67 deletions.
20 changes: 10 additions & 10 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ Compiler options: `/Ox /arch:sse2`; overhead: `1.8` cycles/h.

|Hash function |0-15b, cycles/h|8-28b, cycles/h|bulk, GB/s |
|---- |---- |---- |---- |
|**komihash 5.4**|10.3 |12.1 |26.5 |
|**komihash 5.5**|10.2 |12.1 |26.7 |
|komihash 4.5 |11.0 |12.7 |26.2 |
|komihash 4.3 |11.2 |13.0 |26.0 |
|komihash 3.6 |11.1 |16.9 |27.5 |
Expand All @@ -135,7 +135,7 @@ Compiler options: `/Ox -mavx2`; overhead: `1.8` cycles/h.

|Hash function |0-15b, cycles/h|8-28b, cycles/h|bulk, GB/s |
|---- |---- |---- |---- |
|**komihash 5.4**|10.3 |12.1 |26.5 |
|**komihash 5.5**|10.2 |12.1 |26.7 |
|komihash 4.5 |11.1 |12.7 |26.3 |
|komihash 4.3 |11.2 |13.0 |25.9 |
|komihash 3.6 |11.0 |16.3 |27.5 |
Expand All @@ -151,7 +151,7 @@ Compiler options: `/O3 /QxSSE2`; overhead: `2.0` cycles/h.

|Hash function |0-15b, cycles/h|8-28b, cycles/h|bulk, GB/s |
|---- |---- |---- |---- |
|**komihash 5.4**|12.6 |14.5 |22.3 |
|**komihash 5.5**|12.4 |14.5 |22.5 |
|komihash 4.5 |18.1 |21.9 |16.4 |
|komihash 4.3 |17.9 |21.6 |16.3 |
|komihash 3.6 |20.1 |24.0 |16.3 |
Expand All @@ -171,7 +171,7 @@ Compiler options: `-O3 -mavx2`; overhead: `5.3` cycles/h.

|Hash function |0-15b, cycles/h|8-28b, cycles/h|bulk, GB/s |
|---- |---- |---- |---- |
|**komihash 5.4**|12.7 |13.8 |22.8 |
|**komihash 5.5**|12.7 |13.8 |22.8 |
|komihash 4.5 |12.8 |14.4 |22.4 |
|komihash 4.3 |15.3 |16.3 |22.8 |
|komihash 3.6 |16.0 |19.0 |22.3 |
Expand All @@ -187,7 +187,7 @@ Compiler options: `-O3 -msse2`; overhead: `5.8` cycles/h.

|Hash function |0-15b, cycles/h|8-28b, cycles/h|bulk, GB/s |
|---- |---- |---- |---- |
|**komihash 5.4**|13.3 |14.4 |25.0 |
|**komihash 5.5**|13.3 |14.4 |25.0 |
|komihash 4.5 |13.2 |15.1 |24.7 |
|komihash 4.3 |15.4 |16.2 |24.4 |
|komihash 3.6 |16.4 |20.3 |24.7 |
Expand All @@ -201,7 +201,7 @@ Compiler options: `-O3 -mavx2`; overhead: `5.8` cycles/h.

|Hash function |0-15b, cycles/h|8-28b, cycles/h|bulk, GB/s |
|---- |---- |---- |---- |
|**komihash 5.4**|13.3 |14.3 |25.3 |
|**komihash 5.5**|13.3 |14.3 |25.3 |
|komihash 4.5 |13.8 |15.2 |24.7 |
|komihash 4.3 |15.3 |16.4 |24.4 |
|komihash 3.6 |15.8 |20.1 |24.7 |
Expand All @@ -217,7 +217,7 @@ Compiler options: `/Ox -mavx2`; overhead: `5.5` cycles/h.

|Hash function |0-15b, cycles/h|8-28b, cycles/h|bulk, GB/s |
|---- |---- |---- |---- |
|**komihash 5.4**|11.4 |12.7 |22.7 |
|**komihash 5.5**|11.4 |12.7 |22.7 |
|komihash 4.5 |12.6 |14.5 |22.2 |
|komihash 4.3 |14.1 |16.0 |22.0 |
|komihash 3.6 |14.0 |22.0 |22.9 |
Expand All @@ -233,7 +233,7 @@ Compiler options: `/O3 /QxSSE2`; overhead: `5.9` cycles/h.

|Hash function |0-15b, cycles/h|8-28b, cycles/h|bulk, GB/s |
|---- |---- |---- |---- |
|**komihash 5.4**|15.8 |19.3 |17.1 |
|**komihash 5.5**|15.8 |19.3 |17.1 |
|komihash 4.5 |18.1 |21.1 |17.2 |
|komihash 4.3 |18.7 |21.5 |18.5 |
|komihash 3.6 |19.5 |23.1 |18.1 |
Expand All @@ -249,7 +249,7 @@ Compiler options: `-O3`; overhead: `unestimatable`.

|Hash function |0-15b, cycles/h|8-28b, cycles/h|bulk, GB/s |
|---- |---- |---- |---- |
|**komihash 5.4**|8.2 |8.4 |23.6 |
|**komihash 5.5**|8.2 |8.4 |23.6 |
|komihash 4.5 |8.3 |8.7 |23.6 |
|komihash 4.3 |8.6 |9.0 |23.6 |
|komihash 3.6 |8.5 |10.7 |23.6 |
Expand All @@ -269,7 +269,7 @@ overhead. Measurement error is approximately 3%.

|Hash function |0-15b, cycles/h|8-28b, cycles/h|
|---- |---- |---- |
|**komihash 5.4**|**8.2** |**9.7** |
|**komihash 5.5**|**8.2** |**9.7** |
|komihash 4.5 |9.5 |11.4 |
|komihash 4.3 |10.4 |12.1 |
|komihash 3.6 |10.9 |15.4 |
Expand Down
141 changes: 84 additions & 57 deletions komihash.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/**
* komihash.h version 5.4
* komihash.h version 5.5
*
* The inclusion file for the "komihash" hash function, "komirand" 64-bit
* PRNG, and streamed "komihash" implementation.
Expand All @@ -8,6 +8,7 @@
* (located in Russia), native to the author.
*
* Description is available at https://github.com/avaneev/komihash
* E-mail: [email protected]
*
* License
*
Expand Down Expand Up @@ -38,38 +39,6 @@
#include <stdint.h>
#include <string.h>

// Macros that apply byte-swapping.

#if defined( __GNUC__ ) || defined( __clang__ )

#define KOMIHASH_BYTESW32( v ) __builtin_bswap32( v )
#define KOMIHASH_BYTESW64( v ) __builtin_bswap64( v )

#elif defined( _MSC_VER )

#define KOMIHASH_BYTESW32( v ) _byteswap_ulong( v )
#define KOMIHASH_BYTESW64( v ) _byteswap_uint64( v )

#else // defined( _MSC_VER )

#define KOMIHASH_BYTESW32( v ) ( \
( v & 0xFF000000 ) >> 24 | \
( v & 0x00FF0000 ) >> 8 | \
( v & 0x0000FF00 ) << 8 | \
( v & 0x000000FF ) << 24 )

#define KOMIHASH_BYTESW64( v ) ( \
( v & 0xFF00000000000000 ) >> 56 | \
( v & 0x00FF000000000000 ) >> 40 | \
( v & 0x0000FF0000000000 ) >> 24 | \
( v & 0x000000FF00000000 ) >> 8 | \
( v & 0x00000000FF000000 ) << 8 | \
( v & 0x0000000000FF0000 ) << 24 | \
( v & 0x000000000000FF00 ) << 40 | \
( v & 0x00000000000000FF ) << 56 )

#endif // defined( _MSC_VER )

// Endianness-definition macro, can be defined externally (e.g. =1, if
// endianness-correction is unnecessary in any case, to reduce its associated
// overhead).
Expand All @@ -80,7 +49,7 @@

#define KOMIHASH_LITTLE_ENDIAN 1

#elif defined( __BIG_ENDIAN__ ) || \
#elif defined( __BIG_ENDIAN__ ) || defined( _BIG_ENDIAN ) || \
( defined( __BYTE_ORDER__ ) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ )

#define KOMIHASH_LITTLE_ENDIAN 0
Expand All @@ -94,6 +63,15 @@
#endif // defined( __BIG_ENDIAN__ )
#endif // !defined( KOMIHASH_LITTLE_ENDIAN )

// Macro that denotes availability of required GCC-style built-in functions.

#if defined( __GNUC__ ) || defined( __clang__ ) || \
defined( __IBMC__ ) || defined( __IBMCPP__ )

#define KOMIHASH_GCC_BUILTINS 1

#endif // GCC built-ins.

// Macros that apply byte-swapping, used for endianness-correction.

#if KOMIHASH_LITTLE_ENDIAN
Expand All @@ -103,17 +81,44 @@

#else // KOMIHASH_LITTLE_ENDIAN

#define KOMIHASH_EC32( v ) KOMIHASH_BYTESW32( v )
#define KOMIHASH_EC64( v ) KOMIHASH_BYTESW64( v )
#if defined( KOMIHASH_GCC_BUILTINS )

#define KOMIHASH_EC32( v ) __builtin_bswap32( v )
#define KOMIHASH_EC64( v ) __builtin_bswap64( v )

#elif defined( _MSC_VER )

#define KOMIHASH_EC32( v ) _byteswap_ulong( v )
#define KOMIHASH_EC64( v ) _byteswap_uint64( v )

#else // defined( _MSC_VER )

#define KOMIHASH_EC32( v ) ( \
( v & 0xFF000000 ) >> 24 | \
( v & 0x00FF0000 ) >> 8 | \
( v & 0x0000FF00 ) << 8 | \
( v & 0x000000FF ) << 24 )

#define KOMIHASH_EC64( v ) ( \
( v & 0xFF00000000000000 ) >> 56 | \
( v & 0x00FF000000000000 ) >> 40 | \
( v & 0x0000FF0000000000 ) >> 24 | \
( v & 0x000000FF00000000 ) >> 8 | \
( v & 0x00000000FF000000 ) << 8 | \
( v & 0x0000000000FF0000 ) << 24 | \
( v & 0x000000000000FF00 ) << 40 | \
( v & 0x00000000000000FF ) << 56 )

#endif // defined( _MSC_VER )

#endif // KOMIHASH_LITTLE_ENDIAN

// Likelihood macros that are used for manually-guided micro-optimization.

#if defined( __GNUC__ ) || defined( __clang__ )
#if defined( KOMIHASH_GCC_BUILTINS )

#define KOMIHASH_LIKELY( x ) __builtin_expect( x, 1 )
#define KOMIHASH_UNLIKELY( x ) __builtin_expect( x, 0 )
#define KOMIHASH_LIKELY( x ) __builtin_expect( x, 1 )
#define KOMIHASH_UNLIKELY( x ) __builtin_expect( x, 0 )

#else // likelihood macros

Expand All @@ -125,7 +130,7 @@
// Memory address prefetch macro (temporal locality=1, in case a collision
// resolution would be necessary).

#if defined( __GNUC__ ) || defined( __clang__ )
#if defined( KOMIHASH_GCC_BUILTINS )

#define KOMIHASH_PREFETCH( addr ) __builtin_prefetch( addr, 0, 1 )

Expand All @@ -135,6 +140,18 @@

#endif // prefetch macro

// Macro to force code inlining.

#if defined( KOMIHASH_GCC_BUILTINS )

#define KOMIHASH_INLINE inline __attribute__((always_inline))

#else // defined( KOMIHASH_GCC_BUILTINS )

#define KOMIHASH_INLINE inline

#endif // defined( KOMIHASH_GCC_BUILTINS )

/**
* An auxiliary function that returns an unsigned 32-bit value created out of
* a sequence of bytes in memory. This function is used to convert endianness
Expand All @@ -145,7 +162,7 @@
* @return Endianness-corrected 32-bit value from memory.
*/

static inline uint32_t kh_lu32ec( const uint8_t* const p )
static KOMIHASH_INLINE uint32_t kh_lu32ec( const uint8_t* const p )
{
uint32_t v;
memcpy( &v, p, 4 );
Expand All @@ -163,7 +180,7 @@ static inline uint32_t kh_lu32ec( const uint8_t* const p )
* @return Endianness-corrected 64-bit value from memory.
*/

static inline uint64_t kh_lu64ec( const uint8_t* const p )
static KOMIHASH_INLINE uint64_t kh_lu64ec( const uint8_t* const p )
{
uint64_t v;
memcpy( &v, p, 8 );
Expand All @@ -182,7 +199,7 @@ static inline uint64_t kh_lu64ec( const uint8_t* const p )
* @return Final byte-padded value from the message.
*/

static inline uint64_t kh_lpu64ec_l3( const uint8_t* const Msg,
static KOMIHASH_INLINE uint64_t kh_lpu64ec_l3( const uint8_t* const Msg,
const size_t MsgLen )
{
const int ml8 = (int) ( MsgLen * 8 );
Expand Down Expand Up @@ -213,7 +230,7 @@ static inline uint64_t kh_lpu64ec_l3( const uint8_t* const Msg,
* @return Final byte-padded value from the message.
*/

static inline uint64_t kh_lpu64ec_nz( const uint8_t* const Msg,
static KOMIHASH_INLINE uint64_t kh_lpu64ec_nz( const uint8_t* const Msg,
const size_t MsgLen )
{
const int ml8 = (int) ( MsgLen * 8 );
Expand Down Expand Up @@ -252,7 +269,7 @@ static inline uint64_t kh_lpu64ec_nz( const uint8_t* const Msg,
* @return Final byte-padded value from the message.
*/

static inline uint64_t kh_lpu64ec_l4( const uint8_t* const Msg,
static KOMIHASH_INLINE uint64_t kh_lpu64ec_l4( const uint8_t* const Msg,
const size_t MsgLen )
{
const int ml8 = (int) ( MsgLen * 8 );
Expand Down Expand Up @@ -280,57 +297,67 @@ static inline uint64_t kh_lpu64ec_l4( const uint8_t* const Msg,
* @param[out] rh The higher half of the 128-bit result.
*/

static inline void kh_m128( const uint64_t m1, const uint64_t m2,
static KOMIHASH_INLINE void kh_m128( const uint64_t m1, const uint64_t m2,
uint64_t* const rl, uint64_t* const rh )
{
const __uint128_t r = (__uint128_t) m1 * m2;
const unsigned __int128 r = (unsigned __int128) m1 * m2;

*rl = (uint64_t) r;
*rh = (uint64_t) ( r >> 64 );
*rl = (uint64_t) r;
}

#elif ( defined( __IBMC__ ) || defined( __IBMCPP__ )) && defined( __LP64__ )

static KOMIHASH_INLINE void kh_m128( const uint64_t m1, const uint64_t m2,
uint64_t* const rl, uint64_t* const rh )
{
*rh = __mulhdu( m1, m2 );
*rl = m1 * m2;
}

#elif defined( _MSC_VER ) && ( defined( _M_ARM64 ) || \
( defined( _M_X64 ) && defined( __INTEL_COMPILER )))

#include <intrin.h>

static inline void kh_m128( const uint64_t m1, const uint64_t m2,
static KOMIHASH_INLINE void kh_m128( const uint64_t m1, const uint64_t m2,
uint64_t* const rl, uint64_t* const rh )
{
*rl = m1 * m2;
*rh = __umulh( m1, m2 );
*rl = m1 * m2;
}

#elif defined( _MSC_VER ) && defined( _M_X64 )
#elif defined( _MSC_VER ) && ( defined( _M_X64 ) || defined( _M_IA64 ))

#include <intrin.h>
#pragma intrinsic(_umul128)

static inline void kh_m128( const uint64_t m1, const uint64_t m2,
static KOMIHASH_INLINE void kh_m128( const uint64_t m1, const uint64_t m2,
uint64_t* const rl, uint64_t* const rh )
{
*rl = _umul128( m1, m2, rh );
}

#else // defined( _MSC_VER ) && defined( _M_X64 )

// _umul128() code for 32-bit systems, adapted from mullu(),
// from https://go.dev/src/runtime/softfloat64.go
// Licensed under BSD-style license.
// _umul128() code for 32-bit systems, adapted from Hacker's Delight,
// Henry S. Warren, Jr.

#if defined( _MSC_VER ) && !defined( __INTEL_COMPILER )

#include <intrin.h>
#pragma intrinsic(__emulu)

static inline uint64_t kh__emulu( const uint32_t x, const uint32_t y )
static KOMIHASH_INLINE uint64_t kh__emulu( const uint32_t x,
const uint32_t y )
{
return( __emulu( x, y ));
}

#else // defined( _MSC_VER ) && !defined( __INTEL_COMPILER )

static inline uint64_t kh__emulu( const uint32_t x, const uint32_t y )
static KOMIHASH_INLINE uint64_t kh__emulu( const uint32_t x,
const uint32_t y )
{
return( (uint64_t) x * y );
}
Expand Down

0 comments on commit 339a7f5

Please sign in to comment.