// Specialized i686 versions of the cache line search functions. #include "defs.h" static inline int32_t mask(int32_t a, int32_t b) { return -(a == b); } #if defined(__GNUC__) static inline int32_t first_bit_set(int32_t a) { return __builtin_ffs(a) - 1; } #else static uint8_t de_bruijn_table[] = { 0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8, 31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9 }; static inline int32_t first_bit_set(int32_t a) { int32_t zero_case = mask(0, a); uint32_t x = (uint32_t) (a & -a); x *= 0x077CB531; x >>= 27; return zero_case | de_bruijn_table[x]; } #endif static inline uint32_t line_mask(small_hash_t* array, int start, small_hash_t x1) { small_hash_t* p = array + start; uint32_t m1 = 0; uint32_t m2 = 0; uint32_t m3 = 0; int offset = start & 0x1f; #define M (mask(*p, x1)) switch (offset) { case 0: m1 |= M & 0x1; ++p; case 1: m2 |= M & 0x2; ++p; case 2: m3 |= M & 0x4; ++p; case 3: m1 |= M & 0x8; ++p; case 4: m2 |= M & 0x10; ++p; case 5: m3 |= M & 0x20; ++p; case 6: m1 |= M & 0x40; ++p; case 7: m2 |= M & 0x80; ++p; case 8: m3 |= M & 0x100; ++p; case 9: m1 |= M & 0x200; ++p; case 10: m2 |= M & 0x400; ++p; case 11: m3 |= M & 0x800; ++p; case 12: m1 |= M & 0x1000; ++p; case 13: m2 |= M & 0x2000; ++p; case 14: m3 |= M & 0x4000; ++p; case 15: m1 |= M & 0x8000; ++p; case 16: m2 |= M & 0x10000; ++p; case 17: m3 |= M & 0x20000; ++p; case 18: m1 |= M & 0x40000; ++p; case 19: m2 |= M & 0x80000; ++p; case 20: m3 |= M & 0x100000; ++p; case 21: m1 |= M & 0x200000; ++p; case 22: m2 |= M & 0x400000; ++p; case 23: m3 |= M & 0x800000; ++p; case 24: m1 |= M & 0x1000000; ++p; case 25: m2 |= M & 0x2000000; ++p; case 26: m3 |= M & 0x4000000; ++p; case 27: m1 |= M & 0x8000000; ++p; case 28: m2 |= M & 0x10000000; ++p; case 29: m3 |= M & 0x20000000; ++p; case 30: m1 |= M & 0x40000000; ++p; case 31: m2 |= M & 0x80000000; ++p; } #undef M return (m1 | m2 | m3) >> offset; } static inline uint32_t line_mask_2(small_hash_t* array, int start, small_hash_t x1, small_hash_t x2) { small_hash_t* p = array + start; uint32_t m1 = 0; uint32_t m2 = 0; uint32_t m3 = 0; int offset = start & 0x1f; #define M (mask(*p, x1) | mask(*p, x2)) switch (offset) { case 0: m1 |= M & 0x1; ++p; case 1: m2 |= M & 0x2; ++p; case 2: m3 |= M & 0x4; ++p; case 3: m1 |= M & 0x8; ++p; case 4: m2 |= M & 0x10; ++p; case 5: m3 |= M & 0x20; ++p; case 6: m1 |= M & 0x40; ++p; case 7: m2 |= M & 0x80; ++p; case 8: m3 |= M & 0x100; ++p; case 9: m1 |= M & 0x200; ++p; case 10: m2 |= M & 0x400; ++p; case 11: m3 |= M & 0x800; ++p; case 12: m1 |= M & 0x1000; ++p; case 13: m2 |= M & 0x2000; ++p; case 14: m3 |= M & 0x4000; ++p; case 15: m1 |= M & 0x8000; ++p; case 16: m2 |= M & 0x10000; ++p; case 17: m3 |= M & 0x20000; ++p; case 18: m1 |= M & 0x40000; ++p; case 19: m2 |= M & 0x80000; ++p; case 20: m3 |= M & 0x100000; ++p; case 21: m1 |= M & 0x200000; ++p; case 22: m2 |= M & 0x400000; ++p; case 23: m3 |= M & 0x800000; ++p; case 24: m1 |= M & 0x1000000; ++p; case 25: m2 |= M & 0x2000000; ++p; case 26: m3 |= M & 0x4000000; ++p; case 27: m1 |= M & 0x8000000; ++p; case 28: m2 |= M & 0x10000000; ++p; case 29: m3 |= M & 0x20000000; ++p; case 30: m1 |= M & 0x40000000; ++p; case 31: m2 |= M & 0x80000000; ++p; } #undef M return (m1 | m2 | m3) >> offset; } static inline uint32_t line_mask_3(small_hash_t* array, int start, small_hash_t x1, small_hash_t x2, small_hash_t x3) { small_hash_t* p = array + start; uint32_t m1 = 0; uint32_t m2 = 0; uint32_t m3 = 0; int offset = start & 0x1f; #define M (mask(*p, x1) | mask(*p, x2) | mask(*p, x3)) switch (offset) { case 0: m1 |= M & 0x1; ++p; case 1: m2 |= M & 0x2; ++p; case 2: m3 |= M & 0x4; ++p; case 3: m1 |= M & 0x8; ++p; case 4: m2 |= M & 0x10; ++p; case 5: m3 |= M & 0x20; ++p; case 6: m1 |= M & 0x40; ++p; case 7: m2 |= M & 0x80; ++p; case 8: m3 |= M & 0x100; ++p; case 9: m1 |= M & 0x200; ++p; case 10: m2 |= M & 0x400; ++p; case 11: m3 |= M & 0x800; ++p; case 12: m1 |= M & 0x1000; ++p; case 13: m2 |= M & 0x2000; ++p; case 14: m3 |= M & 0x4000; ++p; case 15: m1 |= M & 0x8000; ++p; case 16: m2 |= M & 0x10000; ++p; case 17: m3 |= M & 0x20000; ++p; case 18: m1 |= M & 0x40000; ++p; case 19: m2 |= M & 0x80000; ++p; case 20: m3 |= M & 0x100000; ++p; case 21: m1 |= M & 0x200000; ++p; case 22: m2 |= M & 0x400000; ++p; case 23: m3 |= M & 0x800000; ++p; case 24: m1 |= M & 0x1000000; ++p; case 25: m2 |= M & 0x2000000; ++p; case 26: m3 |= M & 0x4000000; ++p; case 27: m1 |= M & 0x8000000; ++p; case 28: m2 |= M & 0x10000000; ++p; case 29: m3 |= M & 0x20000000; ++p; case 30: m1 |= M & 0x40000000; ++p; case 31: m2 |= M & 0x80000000; ++p; } #undef M return (m1 | m2 | m3) >> offset; } static inline int32_t line_result(uint32_t m, int start) { int32_t p = first_bit_set((int32_t) m); int32_t mm = mask(p, -1); return mm | (start + p); } int line_search(small_hash_t* array, int start, small_hash_t x1) { uint32_t m = line_mask(array, start, x1); return line_result(m, start); } int line_search_2(small_hash_t* array, int start, small_hash_t x1, small_hash_t x2) { uint32_t m = line_mask_2(array, start, x1, x2); return line_result(m, start); } int line_search_3(small_hash_t* array, int start, small_hash_t x1, small_hash_t x2, small_hash_t x3) { uint32_t m = line_mask_3(array, start, x1, x2, x3); return line_result(m, start); } void check_impl_specific() { }