wyhash: update to final_version_3 (part 1) (#9451)

2023-08-10 21:13:21 +03:00 · 2021-03-24 16:57:38 +01:00 · 2021-03-24 16:57:38 +01:00 · 8a97fb890f
commit 8a97fb890f
parent f8f74d8587
1 changed files with 167 additions and 93 deletions
--- a/vlib/v/gen/c/cheaders.v
+++ b/vlib/v/gen/c/cheaders.v
@ -345,10 +345,23 @@ void _vcleanup();
 #define _ARR_LEN(a) ( (sizeof(a)) / (sizeof(a[0])) )
 // ============== wyhash ==============
-//Author: Wang Yi
+#ifndef wyhash_final_version_3
-#ifndef wyhash_version_gamma
+#define wyhash_final_version_3
-	#define wyhash_version_gamma
+
-	#define WYHASH_CONDOM 0
+#ifndef WYHASH_CONDOM
 //protections that produce different results:
 //1: normal valid behavior
 //2: extra protection against entropy loss (probability=2^-63), aka. "blind multiplication"
 #define WYHASH_CONDOM 1 
 #endif
 #ifndef WYHASH_32BIT_MUM
 //0: normal version, slow on 32 bit systems
 //1: faster on 32 bit systems but produces different results, incompatible with wy2u0k function
 #define WYHASH_32BIT_MUM 0  
 #endif
 //includes
 #include <stdint.h>
 #include <string.h>
 #if defined(_MSC_VER) && defined(_M_X64)
@ -356,101 +369,162 @@ void _vcleanup();
 	#pragma intrinsic(_umul128)
 #endif
-	//const uint64_t _wyp0=0xa0761d6478bd642full, _wyp1=0xe7037ed1a0b428dbull;
+//likely and unlikely macros
-	#define _wyp0 ((uint64_t)0xa0761d6478bd642full)
+#if defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__clang__)
 	#define _wyp1 ((uint64_t)0xe7037ed1a0b428dbull)
 	#if defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__clang__) || defined(__TINYC__)
 	#define _likely_(x)  __builtin_expect(x,1)
-		#define _unlikely_(x) __builtin_expect((x), 0)
+	#define _unlikely_(x)  __builtin_expect(x,0)
 #else
 	#define _likely_(x) (x)
 	#define _unlikely_(x) (x)
 #endif
-	#if defined(TARGET_ORDER_IS_LITTLE)
+//128bit multiply function
-		#define WYHASH_LITTLE_ENDIAN 1
+static inline uint64_t _wyrot(uint64_t x) { return (x>>32)|(x<<32); }
-	#elif defined(TARGET_ORDER_IS_BIG)
+static inline void _wymum(uint64_t *A, uint64_t *B){
-		#define WYHASH_LITTLE_ENDIAN 0
+#if(WYHASH_32BIT_MUM)
-	#endif
+	uint64_t hh=(*A>>32)*(*B>>32), hl=(*A>>32)*(uint32_t)*B, lh=(uint32_t)*A*(*B>>32), ll=(uint64_t)(uint32_t)*A*(uint32_t)*B;
-
+	#if(WYHASH_CONDOM>1)
-	#if (WYHASH_LITTLE_ENDIAN)
+	*A^=_wyrot(hl)^hh; *B^=_wyrot(lh)^ll;
 		static inline uint64_t _wyr8(const uint8_t *p) { uint64_t v; memcpy(&v, p, 8); return v;}
 		static inline uint64_t _wyr4(const uint8_t *p) { unsigned v; memcpy(&v, p, 4); return v;}
 	#else
-		#if defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__clang__)
+	*A=_wyrot(hl)^hh; *B=_wyrot(lh)^ll;
 			static inline uint64_t _wyr8(const uint8_t *p) { uint64_t v; memcpy(&v, p, 8); return __builtin_bswap64(v);}
 			static inline uint64_t _wyr4(const uint8_t *p) { unsigned v; memcpy(&v, p, 4); return __builtin_bswap32(v);}
 		#elif defined(_MSC_VER)
 			static inline uint64_t _wyr8(const uint8_t *p) { uint64_t v; memcpy(&v, p, 8); return _byteswap_uint64(v);}
 			static inline uint64_t _wyr4(const uint8_t *p) { unsigned v; memcpy(&v, p, 4); return _byteswap_ulong(v);}
 		#elif defined(__TINYC__)
 			static inline uint64_t _wyr8(const uint8_t *p) { uint64_t v; memcpy(&v, p, 8); return bswap_64(v);}
 			static inline uint64_t _wyr4(const uint8_t *p) { unsigned v; memcpy(&v, p, 4); return bswap_32(v);}
 	#endif
-	#endif
+#elif defined(__SIZEOF_INT128__)
-
+	__uint128_t r=*A; r*=*B; 
-	static inline uint64_t _wyr3(const uint8_t *p, unsigned k) { return (((uint64_t)p[0]) << 16) | (((uint64_t)p[k >> 1]) << 8) | p[k - 1];}
+	#if(WYHASH_CONDOM>1)
-	static inline uint64_t _wyrotr(uint64_t v, unsigned k) { return (v >> k) | (v << (64 - k));}
+	*A^=(uint64_t)r; *B^=(uint64_t)(r>>64);
 	static inline void _wymix128(uint64_t A, uint64_t B, uint64_t *C, uint64_t *D){
 		A^=*C;	B^=*D;
 	#ifdef UNOFFICIAL_WYHASH_32BIT
 		uint64_t hh=(A>>32)*(B>>32), hl=(A>>32)*(unsigned)B, lh=(unsigned)A*(B>>32), ll=(uint64_t)(unsigned)A*(unsigned)B;
 		*C=_wyrotr(hl,32)^hh; *D=_wyrotr(lh,32)^ll;
 	#else
-		#ifdef __SIZEOF_INT128__
+	*A=(uint64_t)r; *B=(uint64_t)(r>>64);
-			__uint128_t r=A; r*=B; *C=(uint64_t)r; *D=(uint64_t)(r>>64);
+	#endif
 #elif defined(_MSC_VER) && defined(_M_X64)
-			A=_umul128(A,B,&B); *C=A; *D=B;
+	#if(WYHASH_CONDOM>1)
 	uint64_t  a,  b;
 	a=_umul128(*A,*B,&b);
 	*A^=a;  *B^=b;
 	#else
-			uint64_t ha=A>>32, hb=B>>32, la=(uint32_t)A, lb=(uint32_t)B, hi, lo;
+	*A=_umul128(*A,*B,B);
 	#endif
 #else
 	uint64_t ha=*A>>32, hb=*B>>32, la=(uint32_t)*A, lb=(uint32_t)*B, hi, lo;
 	uint64_t rh=ha*hb, rm0=ha*lb, rm1=hb*la, rl=la*lb, t=rl+(rm0<<32), c=t<rl;
 	lo=t+(rm1<<32); c+=lo<t; hi=rh+(rm0>>32)+(rm1>>32)+c;
-			*C=lo;	*D=hi;
+	#if(WYHASH_CONDOM>1)
-		#endif
+	*A^=lo;  *B^=hi;
 	#endif
 	}
 	static inline uint64_t wyhash(const void *key, uint64_t len, uint64_t seed){
 		const uint8_t *p=(const uint8_t *)key;
 		uint64_t i=len, see1=seed;
 		start:
 		if (_likely_(i<=16)) {
 	#ifndef	WYHASH_CONDOM
 			uint64_t shift = (i<8)*((8-i)<<3);
 			//WARNING: intended reading outside buffer, trading for speed.
 			_wymix128((_wyr8(p)<<shift)^_wyp0, (_wyr8(p+i-8)>>shift)^_wyp1, &seed, &see1);
 	#else
-			if (_likely_(i<=8)) {
+	*A=lo;  *B=hi;
-				if (_likely_(i>=4)) _wymix128(_wyr4(p)^_wyp0,_wyr4(p+i-4)^_wyp1, &seed, &see1);
+	#endif
 				else if (_likely_(i)) _wymix128(_wyr3(p,i)^_wyp0,_wyp1, &seed, &see1);
 				else _wymix128(_wyp0,_wyp1, &seed, &see1);
 			}
 			else _wymix128(_wyr8(p)^_wyp0,_wyr8(p+i-8)^_wyp1, &seed, &see1);
 #endif
 			_wymix128(len,_wyp0, &seed, &see1);
 			return	seed^see1;
 }
-		_wymix128(_wyr8(p)^_wyp0,_wyr8(p+8)^_wyp1, &seed, &see1);
+
-		i-=16;	p+=16;	goto start;
+//multiply and xor mix function, aka MUM
 static inline uint64_t _wymix(uint64_t A, uint64_t B){ _wymum(&A,&B); return A^B; }
 //endian macros
 #ifndef WYHASH_LITTLE_ENDIAN
 	#if defined(_WIN32) || defined(__LITTLE_ENDIAN__) || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
 		#define WYHASH_LITTLE_ENDIAN 1
 	#elif defined(__BIG_ENDIAN__) || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
 		#define WYHASH_LITTLE_ENDIAN 0
 	#else
 		#warning could not determine endianness! Falling back to little endian.
 		#define WYHASH_LITTLE_ENDIAN 1
 	#endif
 #endif
 //read functions
 #if (WYHASH_LITTLE_ENDIAN)
 static inline uint64_t _wyr8(const uint8_t *p) { uint64_t v; memcpy(&v, p, 8); return v;}
 static inline uint64_t _wyr4(const uint8_t *p) { uint32_t v; memcpy(&v, p, 4); return v;}
 #elif defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__clang__)
 static inline uint64_t _wyr8(const uint8_t *p) { uint64_t v; memcpy(&v, p, 8); return __builtin_bswap64(v);}
 static inline uint64_t _wyr4(const uint8_t *p) { uint32_t v; memcpy(&v, p, 4); return __builtin_bswap32(v);}
 #elif defined(_MSC_VER)
 static inline uint64_t _wyr8(const uint8_t *p) { uint64_t v; memcpy(&v, p, 8); return _byteswap_uint64(v);}
 static inline uint64_t _wyr4(const uint8_t *p) { uint32_t v; memcpy(&v, p, 4); return _byteswap_ulong(v);}
 #else
 static inline uint64_t _wyr8(const uint8_t *p) {
 	uint64_t v; memcpy(&v, p, 8);
 	return (((v >> 56) & 0xff)| ((v >> 40) & 0xff00)| ((v >> 24) & 0xff0000)| ((v >>  8) & 0xff000000)| ((v <<  8) & 0xff00000000)| ((v << 24) & 0xff0000000000)| ((v << 40) & 0xff000000000000)| ((v << 56) & 0xff00000000000000));
 }
-	static inline uint64_t wyhash64(uint64_t A, uint64_t B){
+static inline uint64_t _wyr4(const uint8_t *p) {
-		_wymix128(_wyp0,_wyp1,&A,&B);
+	uint32_t v; memcpy(&v, p, 4);
-		_wymix128(0,0,&A,&B);
+	return (((v >> 24) & 0xff)| ((v >>  8) & 0xff00)| ((v <<  8) & 0xff0000)| ((v << 24) & 0xff000000));
 		return	A^B;
 }
-	static inline uint64_t wyrand(uint64_t *seed){
+#endif
-		*seed+=_wyp0;
+static inline uint64_t _wyr3(const uint8_t *p, size_t k) { return (((uint64_t)p[0])<<16)|(((uint64_t)p[k>>1])<<8)|p[k-1];}
-		uint64_t	a=0, b=0;
+
-		_wymix128(*seed,*seed^_wyp1,&a,&b);
+//wyhash main function
-		return	a^b;
+static inline uint64_t wyhash(const void *key, size_t len, uint64_t seed, const uint64_t *secret){
 	const uint8_t *p=(const uint8_t *)key; seed^=*secret;	uint64_t	a,	b;
 	if(_likely_(len<=16)){
 		if(_likely_(len>=4)){ a=(_wyr4(p)<<32)|_wyr4(p+((len>>3)<<2)); b=(_wyr4(p+len-4)<<32)|_wyr4(p+len-4-((len>>3)<<2)); }
 		else if(_likely_(len>0)){ a=_wyr3(p,len); b=0;}
 		else a=b=0;
 	}
-	static inline double wy2u01(uint64_t r) {
+	else{
-		const double _wynorm=1.0/(1ull<<52);
+		size_t i=len; 
-		return (r>>12)*_wynorm;
+		if(_unlikely_(i>48)){
 			uint64_t see1=seed, see2=seed;
 			do{
 				seed=_wymix(_wyr8(p)^secret[1],_wyr8(p+8)^seed);
 				see1=_wymix(_wyr8(p+16)^secret[2],_wyr8(p+24)^see1);
 				see2=_wymix(_wyr8(p+32)^secret[3],_wyr8(p+40)^see2);
 				p+=48; i-=48;
 			}while(_likely_(i>48));
 			seed^=see1^see2;
 		}
 		while(_unlikely_(i>16)){  seed=_wymix(_wyr8(p)^secret[1],_wyr8(p+8)^seed);  i-=16; p+=16;  }
 		a=_wyr8(p+i-16);  b=_wyr8(p+i-8);
 	}
 	return _wymix(secret[1]^len,_wymix(a^secret[1],b^seed));
 }
 //the default secret parameters
 static const uint64_t _wyp[4] = {0xa0761d6478bd642full, 0xe7037ed1a0b428dbull, 0x8ebc6af09c88c6e3ull, 0x589965cc75374cc3ull};
 //a useful 64bit-64bit mix function to produce deterministic pseudo random numbers that can pass BigCrush and PractRand
 static inline uint64_t wyhash64(uint64_t A, uint64_t B){ A^=_wyp[0]; B^=_wyp[1]; _wymum(&A,&B); return _wymix(A^_wyp[0],B^_wyp[1]);}
 //The wyrand PRNG that pass BigCrush and PractRand
 static inline uint64_t wyrand(uint64_t *seed){ *seed+=_wyp[0]; return _wymix(*seed,*seed^_wyp[1]);}
 //convert any 64 bit pseudo random numbers to uniform distribution [0,1). It can be combined with wyrand, wyhash64 or wyhash.
 static inline double wy2u01(uint64_t r){ const double _wynorm=1.0/(1ull<<52); return (r>>12)*_wynorm;}
 //convert any 64 bit pseudo random numbers to APPROXIMATE Gaussian distribution. It can be combined with wyrand, wyhash64 or wyhash.
 static inline double wy2gau(uint64_t r){ const double _wynorm=1.0/(1ull<<20); return ((r&0x1fffff)+((r>>21)&0x1fffff)+((r>>42)&0x1fffff))*_wynorm-3.0;}
 #if(!WYHASH_32BIT_MUM)
 //fast range integer random number generation on [0,k) credit to Daniel Lemire. May not work when WYHASH_32BIT_MUM=1. It can be combined with wyrand, wyhash64 or wyhash.
 static inline uint64_t wy2u0k(uint64_t r, uint64_t k){ _wymum(&r,&k); return k; }
 #endif
 //make your own secret
 static inline void make_secret(uint64_t seed, uint64_t *secret){
 	uint8_t c[] = {15, 23, 27, 29, 30, 39, 43, 45, 46, 51, 53, 54, 57, 58, 60, 71, 75, 77, 78, 83, 85, 86, 89, 90, 92, 99, 101, 102, 105, 106, 108, 113, 114, 116, 120, 135, 139, 141, 142, 147, 149, 150, 153, 154, 156, 163, 165, 166, 169, 170, 172, 177, 178, 180, 184, 195, 197, 198, 201, 202, 204, 209, 210, 212, 216, 225, 226, 228, 232, 240 };
 	for(size_t i=0;i<4;i++){
 		uint8_t ok;
 		do{
 			ok=1; secret[i]=0;
 			for(size_t j=0;j<64;j+=8) secret[i]|=((uint64_t)c[wyrand(&seed)%sizeof(c)])<<j;
 			if(secret[i]%2==0){ ok=0; continue; }
 			for(size_t j=0;j<i;j++) {
 #if defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__clang__)
 				if(__builtin_popcountll(secret[j]^secret[i])!=32){ ok=0; break; }
 #elif defined(_MSC_VER) && defined(_M_X64)
 				if(_mm_popcnt_u64(secret[j]^secret[i])!=32){ ok=0; break; }
 #else
 				//manual popcount
 				uint64_t x = secret[j]^secret[i];
 				x -= (x >> 1) & 0x5555555555555555;
 				x = (x & 0x3333333333333333) + ((x >> 2) & 0x3333333333333333);
 				x = (x + (x >> 4)) & 0x0f0f0f0f0f0f0f0f;
 				x = (x * 0x0101010101010101) >> 56;
 				if(x!=32){ ok=0; break; }
 #endif
 			}
 				if(!ok)continue;
 				for(uint64_t j=3;j<0x100000000ull;j+=2) if(secret[i]%j==0){ ok=0; break; }
 		}while(!ok);
 	}
 	static inline double wy2gau(uint64_t r) {
 		const double _wynorm=1.0/(1ull<<20);
 		return ((r&0x1fffff)+((r>>21)&0x1fffff)+((r>>42)&0x1fffff))*_wynorm-3.0;
 }
 #endif