4242#define ECB_H
4343
4444/* 16 bits major, 16 bits minor */
45- #define ECB_VERSION 0x00010004
45+ #define ECB_VERSION 0x00010005
4646
4747#ifdef _WIN32
4848 typedef signed char int8_t ;
6969 #endif
7070#else
7171 #include < inttypes.h>
72- #if UINTMAX_MAX > 0xffffffffU
72+ #if (defined INTPTR_MAX ? INTPTR_MAX : ULONG_MAX) > 0xffffffffU
7373 #define ECB_PTRSIZE 8
7474 #else
7575 #define ECB_PTRSIZE 4
157157 #include < builtins.h>
158158#endif
159159
160+ #if 1400 <= _MSC_VER
161+ #include < intrin.h> /* fence functions _ReadBarrier, also bit search functions _BitScanReverse */
162+ #endif
163+
160164#ifndef ECB_MEMORY_FENCE
161165 #if ECB_GCC_VERSION(2,5) || defined __INTEL_COMPILER || (__llvm__ && __GNUC__) || __SUNPRO_C >= 0x5110 || __SUNPRO_CC >= 0x5110
162166 #if __i386 || __i386__
169173 #define ECB_MEMORY_FENCE_RELEASE __asm__ __volatile__ (" " )
170174 #elif __powerpc__ || __ppc__ || __powerpc64__ || __ppc64__
171175 #define ECB_MEMORY_FENCE __asm__ __volatile__ (" sync" : : : " memory" )
176+ #elif defined __ARM_ARCH_2__ \
177+ || defined __ARM_ARCH_3__ || defined __ARM_ARCH_3M__ \
178+ || defined __ARM_ARCH_4__ || defined __ARM_ARCH_4T__ \
179+ || defined __ARM_ARCH_5__ || defined __ARM_ARCH_5E__ \
180+ || defined __ARM_ARCH_5T__ || defined __ARM_ARCH_5TE__ \
181+ || defined __ARM_ARCH_5TEJ__
182+ /* should not need any, unless running old code on newer cpu - arm doesn't support that */
172183 #elif defined __ARM_ARCH_6__ || defined __ARM_ARCH_6J__ \
173- || defined __ARM_ARCH_6K__ || defined __ARM_ARCH_6ZK__
184+ || defined __ARM_ARCH_6K__ || defined __ARM_ARCH_6ZK__ \
185+ || defined __ARM_ARCH_6T2__
174186 #define ECB_MEMORY_FENCE __asm__ __volatile__ (" mcr p15,0,%0,c7,c10,5" : : " r" (0 ) : "memory")
175187 #elif defined __ARM_ARCH_7__ || defined __ARM_ARCH_7A__ \
176- || defined __ARM_ARCH_7M__ || defined __ARM_ARCH_7R__
188+ || defined __ARM_ARCH_7R__ || defined __ARM_ARCH_7M__
177189 #define ECB_MEMORY_FENCE __asm__ __volatile__ (" dmb" : : : " memory" )
178190 #elif __aarch64__
179191 #define ECB_MEMORY_FENCE __asm__ __volatile__ (" dmb ish" : : : " memory" )
@@ -425,6 +437,11 @@ typedef int ecb_bool;
425437 ecb_function_ ecb_const int
426438 ecb_ctz32 (uint32_t x)
427439 {
440+ #if 1400 <= _MSC_VER && (_M_IX86 || _M_X64 || _M_IA64 || _M_ARM)
441+ unsigned long r;
442+ _BitScanForward (&r, x);
443+ return (int )r;
444+ #else
428445 int r = 0 ;
429446
430447 x &= ~x + 1 ; /* this isolates the lowest bit */
@@ -444,14 +461,21 @@ typedef int ecb_bool;
444461#endif
445462
446463 return r;
464+ #endif
447465 }
448466
449467 ecb_function_ ecb_const int ecb_ctz64 (uint64_t x);
450468 ecb_function_ ecb_const int
451469 ecb_ctz64 (uint64_t x)
452470 {
453- int shift = x & 0xffffffffU ? 0 : 32 ;
471+ #if 1400 <= _MSC_VER && (_M_X64 || _M_IA64 || _M_ARM)
472+ unsigned long r;
473+ _BitScanForward64 (&r, x);
474+ return (int )r;
475+ #else
476+ int shift = x & 0xffffffff ? 0 : 32 ;
454477 return ecb_ctz32 (x >> shift) + shift;
478+ #endif
455479 }
456480
457481 ecb_function_ ecb_const int ecb_popcount32 (uint32_t x);
@@ -469,6 +493,11 @@ typedef int ecb_bool;
469493 ecb_function_ ecb_const int ecb_ld32 (uint32_t x);
470494 ecb_function_ ecb_const int ecb_ld32 (uint32_t x)
471495 {
496+ #if 1400 <= _MSC_VER && (_M_IX86 || _M_X64 || _M_IA64 || _M_ARM)
497+ unsigned long r;
498+ _BitScanReverse (&r, x);
499+ return (int )r;
500+ #else
472501 int r = 0 ;
473502
474503 if (x >> 16 ) { x >>= 16 ; r += 16 ; }
@@ -478,16 +507,23 @@ typedef int ecb_bool;
478507 if (x >> 1 ) { r += 1 ; }
479508
480509 return r;
510+ #endif
481511 }
482512
483513 ecb_function_ ecb_const int ecb_ld64 (uint64_t x);
484514 ecb_function_ ecb_const int ecb_ld64 (uint64_t x)
485515 {
516+ #if 1400 <= _MSC_VER && (_M_X64 || _M_IA64 || _M_ARM)
517+ unsigned long r;
518+ _BitScanReverse64 (&r, x);
519+ return (int )r;
520+ #else
486521 int r = 0 ;
487522
488523 if (x >> 32 ) { x >>= 32 ; r += 32 ; }
489524
490525 return r + ecb_ld32 (x);
526+ #endif
491527 }
492528#endif
493529
@@ -600,8 +636,8 @@ ecb_inline ecb_const uint64_t ecb_rotr64 (uint64_t x, unsigned int count) { retu
600636/* try to tell the compiler that some condition is definitely true */
601637#define ecb_assume (cond ) if (!(cond)) ecb_unreachable (); else 0
602638
603- ecb_inline ecb_const unsigned char ecb_byteorder_helper (void );
604- ecb_inline ecb_const unsigned char
639+ ecb_inline ecb_const uint32_t ecb_byteorder_helper (void );
640+ ecb_inline ecb_const uint32_t
605641ecb_byteorder_helper (void )
606642{
607643 /* the union code still generates code under pressure in gcc, */
@@ -610,26 +646,28 @@ ecb_byteorder_helper (void)
610646 /* the reason why we have this horrible preprocessor mess */
611647 /* is to avoid it in all cases, at least on common architectures */
612648 /* or when using a recent enough gcc version (>= 4.6) */
613- #if ((__i386 || __i386__) && !__VOS__) || _M_IX86 || ECB_GCC_AMD64 || ECB_MSVC_AMD64
614- return 0x44 ;
615- #elif __BYTE_ORDER__ && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
616- return 0x44 ;
617- #elif __BYTE_ORDER__ && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
618- return 0x11 ;
649+ #if (defined __BYTE_ORDER__ && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) \
650+ || ((__i386 || __i386__ || _M_IX86 || ECB_GCC_AMD64 || ECB_MSVC_AMD64) && !__VOS__)
651+ #define ECB_LITTLE_ENDIAN 1
652+ return 0x44332211 ;
653+ #elif (defined __BYTE_ORDER__ && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) \
654+ || ((__AARCH64EB__ || __MIPSEB__ || __ARMEB__) && !__VOS__)
655+ #define ECB_BIG_ENDIAN 1
656+ return 0x11223344 ;
619657#else
620658 union
621659 {
622- uint32_t i ;
623- uint8_t c ;
624- } u = { 0x11223344 };
625- return u.c ;
660+ uint8_t c[ 4 ] ;
661+ uint32_t u ;
662+ } u = { 0x11 , 0x22 , 0x33 , 0x44 };
663+ return u.u ;
626664#endif
627665}
628666
629667ecb_inline ecb_const ecb_bool ecb_big_endian (void );
630- ecb_inline ecb_const ecb_bool ecb_big_endian (void ) { return ecb_byteorder_helper () == 0x11 ; }
668+ ecb_inline ecb_const ecb_bool ecb_big_endian (void ) { return ecb_byteorder_helper () == 0x11223344 ; }
631669ecb_inline ecb_const ecb_bool ecb_little_endian (void );
632- ecb_inline ecb_const ecb_bool ecb_little_endian (void ) { return ecb_byteorder_helper () == 0x44 ; }
670+ ecb_inline ecb_const ecb_bool ecb_little_endian (void ) { return ecb_byteorder_helper () == 0x44332211 ; }
633671
634672#if ECB_GCC_VERSION(3,0) || ECB_C99
635673 #define ecb_mod (m,n ) ((m) % (n) + ((m) % (n) < 0 ? (n) : 0 ))
@@ -664,6 +702,102 @@ ecb_inline ecb_const ecb_bool ecb_little_endian (void) { return ecb_byteorder_he
664702 #define ecb_array_length (name ) (sizeof (name) / sizeof (name [0 ]))
665703#endif
666704
705+ ecb_function_ ecb_const uint32_t ecb_binary16_to_binary32 (uint32_t x);
706+ ecb_function_ ecb_const uint32_t
707+ ecb_binary16_to_binary32 (uint32_t x)
708+ {
709+ unsigned int s = (x & 0x8000 ) << (31 - 15 );
710+ int e = (x >> 10 ) & 0x001f ;
711+ unsigned int m = x & 0x03ff ;
712+
713+ if (ecb_expect_false (e == 31 ))
714+ /* infinity or NaN */
715+ e = 255 - (127 - 15 );
716+ else if (ecb_expect_false (!e))
717+ {
718+ if (ecb_expect_true (!m))
719+ /* zero, handled by code below by forcing e to 0 */
720+ e = 0 - (127 - 15 );
721+ else
722+ {
723+ /* subnormal, renormalise */
724+ unsigned int s = 10 - ecb_ld32 (m);
725+
726+ m = (m << s) & 0x3ff ; /* mask implicit bit */
727+ e -= s - 1 ;
728+ }
729+ }
730+
731+ /* e and m now are normalised, or zero, (or inf or nan) */
732+ e += 127 - 15 ;
733+
734+ return s | (e << 23 ) | (m << (23 - 10 ));
735+ }
736+
737+ ecb_function_ ecb_const uint16_t ecb_binary32_to_binary16 (uint32_t x);
738+ ecb_function_ ecb_const uint16_t
739+ ecb_binary32_to_binary16 (uint32_t x)
740+ {
741+ unsigned int s = (x >> 16 ) & 0x00008000 ; /* sign bit, the easy part */
742+ unsigned int e = ((x >> 23 ) & 0x000000ff ) - (127 - 15 ); /* the desired exponent */
743+ unsigned int m = x & 0x007fffff ;
744+
745+ x &= 0x7fffffff ;
746+
747+ /* if it's within range of binary16 normals, use fast path */
748+ if (ecb_expect_true (0x38800000 <= x && x <= 0x477fefff ))
749+ {
750+ /* mantissa round-to-even */
751+ m += 0x00000fff + ((m >> (23 - 10 )) & 1 );
752+
753+ /* handle overflow */
754+ if (ecb_expect_false (m >= 0x00800000 ))
755+ {
756+ m >>= 1 ;
757+ e += 1 ;
758+ }
759+
760+ return s | (e << 10 ) | (m >> (23 - 10 ));
761+ }
762+
763+ /* handle large numbers and infinity */
764+ if (ecb_expect_true (0x477fefff < x && x <= 0x7f800000 ))
765+ return s | 0x7c00 ;
766+
767+ /* handle zero, subnormals and small numbers */
768+ if (ecb_expect_true (x < 0x38800000 ))
769+ {
770+ /* zero */
771+ if (ecb_expect_true (!x))
772+ return s;
773+
774+ /* handle subnormals */
775+
776+ /* too small, will be zero */
777+ if (e < (14 - 24 )) /* might not be sharp, but is good enough */
778+ return s;
779+
780+ m |= 0x00800000 ; /* make implicit bit explicit */
781+
782+ /* very tricky - we need to round to the nearest e (+10) bit value */
783+ {
784+ unsigned int bits = 14 - e;
785+ unsigned int half = (1 << (bits - 1 )) - 1 ;
786+ unsigned int even = (m >> bits) & 1 ;
787+
788+ /* if this overflows, we will end up with a normalised number */
789+ m = (m + half + even) >> bits;
790+ }
791+
792+ return s | m;
793+ }
794+
795+ /* handle NaNs, preserve leftmost nan bits, but make sure we don't turn them into infinities */
796+ m >>= 13 ;
797+
798+ return s | 0x7c00 | m | !m;
799+ }
800+
667801/* ******************************************************************************/
668802/* floating point stuff, can be disabled by defining ECB_NO_LIBM */
669803
@@ -715,23 +849,6 @@ ecb_inline ecb_const ecb_bool ecb_little_endian (void) { return ecb_byteorder_he
715849 #define ecb_frexpf (x,e ) (float ) frexp ((double ) (x), (e))
716850 #endif
717851
718- /* converts an ieee half/binary16 to a float */
719- ecb_function_ ecb_const float ecb_binary16_to_float (uint16_t x);
720- ecb_function_ ecb_const float
721- ecb_binary16_to_float (uint16_t x)
722- {
723- int e = (x >> 10 ) & 0x1f ;
724- int m = x & 0x3ff ;
725- float r;
726-
727- if (!e ) r = ecb_ldexpf (m , -24 );
728- else if (e != 31 ) r = ecb_ldexpf (m + 0x400 , e - 25 );
729- else if (m ) r = ECB_NAN;
730- else r = ECB_INFINITY;
731-
732- return x & 0x8000 ? -r : r;
733- }
734-
735852 /* convert a float to ieee single/binary32 */
736853 ecb_function_ ecb_const uint32_t ecb_float_to_binary32 (float x);
737854 ecb_function_ ecb_const uint32_t
@@ -872,6 +989,22 @@ ecb_inline ecb_const ecb_bool ecb_little_endian (void) { return ecb_byteorder_he
872989 return r;
873990 }
874991
992+ /* convert a float to ieee half/binary16 */
993+ ecb_function_ ecb_const uint16_t ecb_float_to_binary16 (float x);
994+ ecb_function_ ecb_const uint16_t
995+ ecb_float_to_binary16 (float x)
996+ {
997+ return ecb_binary32_to_binary16 (ecb_float_to_binary32 (x));
998+ }
999+
1000+ /* convert an ieee half/binary16 to float */
1001+ ecb_function_ ecb_const float ecb_binary16_to_float (uint16_t x);
1002+ ecb_function_ ecb_const float
1003+ ecb_binary16_to_float (uint16_t x)
1004+ {
1005+ return ecb_binary32_to_float (ecb_binary16_to_binary32 (x));
1006+ }
1007+
8751008#endif
8761009
8771010#endif
0 commit comments