From 556373fe7bb8517f1be033341e5c0c3f9cfafc1e Mon Sep 17 00:00:00 2001 From: Brady Date: Mon, 13 Oct 2025 18:22:08 -0500 Subject: [PATCH 1/9] Initial support for signature bitmasks --- include/libhat/scanner.hpp | 10 ++---- include/libhat/signature.hpp | 67 ++++++++++++++++++++++++++++-------- src/Scanner.cpp | 4 +-- src/arch/x86/AVX2.cpp | 17 ++++----- src/arch/x86/AVX512.cpp | 22 +++++------- src/arch/x86/SSE.cpp | 17 ++++----- 6 files changed, 79 insertions(+), 58 deletions(-) diff --git a/include/libhat/scanner.hpp b/include/libhat/scanner.hpp index 41d913f..0127892 100644 --- a/include/libhat/scanner.hpp +++ b/include/libhat/scanner.hpp @@ -269,9 +269,7 @@ namespace hat::detail { break; } // Compare everything after the first byte - auto match = std::equal(signature.begin() + 1, signature.end(), i + 1, [](auto opt, auto byte) { - return !opt.has_value() || *opt == byte; - }); + auto match = std::equal(signature.begin() + 1, signature.end(), i + 1); if (match) LIBHAT_UNLIKELY { return i; } @@ -293,9 +291,7 @@ namespace hat::detail { for (auto i = scanBegin; i != scanEnd; i += 16) { if (*i == firstByte) { - auto match = std::equal(signature.begin() + 1, signature.end(), i + 1, [](auto opt, auto byte) { - return !opt.has_value() || *opt == byte; - }); + auto match = std::equal(signature.begin() + 1, signature.end(), i + 1); if (match) LIBHAT_UNLIKELY { return i; } @@ -318,7 +314,7 @@ namespace hat::detail { // Truncate the leading wildcards from the signature size_t offset = 0; for (const auto& elem : signature) { - if (elem.has_value()) { + if (elem.any()) { break; } offset++; diff --git a/include/libhat/signature.hpp b/include/libhat/signature.hpp index 2cdd9dc..264fad5 100644 --- a/include/libhat/signature.hpp +++ b/include/libhat/signature.hpp @@ -22,7 +22,7 @@ LIBHAT_EXPORT namespace hat { struct signature_element { constexpr signature_element() noexcept {} constexpr signature_element(std::nullopt_t) noexcept {} - constexpr signature_element(const std::byte valueIn) noexcept : val(valueIn), present(true) {} + constexpr signature_element(const std::byte valueIn) noexcept : value_{valueIn}, mask_{0xFF} {} constexpr signature_element& operator=(std::nullopt_t) noexcept { return *this = signature_element{}; @@ -36,24 +36,49 @@ LIBHAT_EXPORT namespace hat { *this = std::nullopt; } - [[nodiscard]] constexpr bool has_value() const noexcept { - return this->present; - } - [[nodiscard]] constexpr std::byte value() const noexcept { - return this->val; + return this->value_; } - [[nodiscard]] constexpr operator bool() const noexcept { - return this->has_value(); + [[nodiscard]] constexpr std::byte mask() const noexcept { + return this->mask_; } [[nodiscard]] constexpr std::byte operator*() const noexcept { return this->value(); } + + [[nodiscard]] constexpr bool all() const noexcept { + return this->mask_ == std::byte{0xFF}; + } + + [[nodiscard]] constexpr bool any() const noexcept { + return this->mask_ != std::byte{0x00}; + } + + [[nodiscard]] constexpr bool none() const noexcept { + return this->mask_ == std::byte{0x00}; + } + + [[nodiscard]] constexpr bool has(const uint8_t digit) const noexcept { + const uint8_t m = std::to_integer(this->mask_); + return (m & (1u << digit)) != 0; + } + + [[nodiscard]] constexpr bool at(const uint8_t digit) const noexcept { + const uint8_t v = std::to_integer(this->value_); + return (v & (1u << digit)) != 0; + } + + [[nodiscard]] constexpr std::strong_ordering operator<=>(const signature_element& other) const noexcept = default; + + [[nodiscard]] constexpr bool operator==(const std::byte byte) const noexcept { + return (byte & this->mask_) == this->value_; + } + private: - std::byte val{}; - bool present = false; + std::byte value_{}; + std::byte mask_{}; }; using signature = std::vector; @@ -179,14 +204,28 @@ LIBHAT_EXPORT namespace hat { std::string ret; ret.reserve(signature.size() * 3); for (auto& element : signature) { - if (element.has_value()) { + const bool a = (element.mask() & std::byte{0xF0}) == std::byte{0xF0}; + const bool b = (element.mask() & std::byte{0x0F}) == std::byte{0x0F}; + if (a || b) { ret += { - hex[static_cast(element.value() >> 4) & 0xFu], - hex[static_cast(element.value() >> 0) & 0xFu], + a ? hex[static_cast(element.value() >> 4) & 0xFu] : '?', + b ? hex[static_cast(element.value() >> 0) & 0xFu] : '?', ' ' }; - } else { + } else if (element.none()) { ret += "? "; + } else { + ret += { + element.has(7) ? (element.at(7) ? '1' : '0') : '?', + element.has(6) ? (element.at(6) ? '1' : '0') : '?', + element.has(5) ? (element.at(5) ? '1' : '0') : '?', + element.has(4) ? (element.at(4) ? '1' : '0') : '?', + element.has(3) ? (element.at(3) ? '1' : '0') : '?', + element.has(2) ? (element.at(2) ? '1' : '0') : '?', + element.has(1) ? (element.at(1) ? '1' : '0') : '?', + element.has(0) ? (element.at(0) ? '1' : '0') : '?', + ' ' + }; } } ret.pop_back(); diff --git a/src/Scanner.cpp b/src/Scanner.cpp index 731abf8..39f6124 100644 --- a/src/Scanner.cpp +++ b/src/Scanner.cpp @@ -27,7 +27,7 @@ namespace hat::detail { auto& a = *it; auto& b = *std::next(it); - if (a.has_value() && b.has_value()) { + if (a.all() && b.all()) { const auto score = getScore(a.value(), b.value()); if (!bestPair || score > bestPair->second) { bestPair.emplace(i, score); @@ -48,7 +48,7 @@ namespace hat::detail { auto& a = *it; auto& b = *std::next(it); - if (a.has_value() && b.has_value()) { + if (a.all() && b.all()) { this->pairIndex = i; break; } diff --git a/src/arch/x86/AVX2.cpp b/src/arch/x86/AVX2.cpp index 1a49e7a..e1068a7 100644 --- a/src/arch/x86/AVX2.cpp +++ b/src/arch/x86/AVX2.cpp @@ -12,11 +12,8 @@ namespace hat::detail { std::byte byteBuffer[32]{}; // The remaining signature bytes std::byte maskBuffer[32]{}; // A bitmask for the signature bytes we care about for (size_t i = 0; i < signature.size(); i++) { - auto e = signature[i]; - if (e.has_value()) { - byteBuffer[i] = *e; - maskBuffer[i] = std::byte{0xFFu}; - } + byteBuffer[i] = signature[i].value(); + maskBuffer[i] = signature[i].mask(); } bytes = _mm256_loadu_si256(reinterpret_cast<__m256i*>(&byteBuffer)); mask = _mm256_loadu_si256(reinterpret_cast<__m256i*>(&maskBuffer)); @@ -70,15 +67,13 @@ namespace hat::detail { const auto i = reinterpret_cast(&it) + offset - cmpIndex; if constexpr (veccmp) { const auto data = _mm256_loadu_si256(reinterpret_cast(i)); - const auto cmpToSig = _mm256_cmpeq_epi8(signatureBytes, data); - const auto matched = _mm256_testc_si256(cmpToSig, signatureMask); - if (matched) LIBHAT_UNLIKELY { + const auto neqBits = _mm256_xor_si256(data, signatureBytes); + const auto match = _mm256_testz_si256(neqBits, signatureMask); + if (match) LIBHAT_UNLIKELY { return i; } } else { - auto match = std::equal(signature.begin(), signature.end(), i, [](auto opt, auto byte) { - return !opt.has_value() || *opt == byte; - }); + const auto match = std::equal(signature.begin(), signature.end(), i); if (match) LIBHAT_UNLIKELY { return i; } diff --git a/src/arch/x86/AVX512.cpp b/src/arch/x86/AVX512.cpp index 2f7a7b3..82dee65 100644 --- a/src/arch/x86/AVX512.cpp +++ b/src/arch/x86/AVX512.cpp @@ -8,18 +8,15 @@ namespace hat::detail { - inline void load_signature_512(const signature_view signature, __m512i& bytes, uint64_t& mask) { + inline void load_signature_512(const signature_view signature, __m512i& bytes, __m512i& mask) { std::byte byteBuffer[64]{}; // The remaining signature bytes - uint64_t maskBuffer{}; // A bitmask for the signature bytes we care about + std::byte maskBuffer[64]{}; // A bitmask for the signature bytes we care about for (size_t i = 0; i < signature.size(); i++) { - auto e = signature[i]; - if (e.has_value()) { - byteBuffer[i] = *e; - maskBuffer |= (1ull << i); - } + byteBuffer[i] = signature[i].value(); + maskBuffer[i] = signature[i].mask(); } bytes = _mm512_loadu_si512(&byteBuffer); - mask = maskBuffer; + mask = _mm512_loadu_si512(&maskBuffer); } template @@ -37,7 +34,7 @@ namespace hat::detail { } __m512i signatureBytes; - uint64_t signatureMask; + __m512i signatureMask; if constexpr (veccmp) { load_signature_512(signature, signatureBytes, signatureMask); } @@ -67,14 +64,13 @@ namespace hat::detail { const auto i = reinterpret_cast(&it) + offset - cmpIndex; if constexpr (veccmp) { const auto data = _mm512_loadu_si512(i); - const auto invalid = _mm512_mask_cmpneq_epi8_mask(signatureMask, signatureBytes, data); + const auto neqBits = _mm512_xor_si512(data, signatureBytes); + const auto invalid = _mm512_test_epi64_mask(neqBits, signatureMask); if (!invalid) LIBHAT_UNLIKELY { return i; } } else { - auto match = std::equal(signature.begin(), signature.end(), i, [](auto opt, auto byte) { - return !opt.has_value() || *opt == byte; - }); + const auto match = std::equal(signature.begin(), signature.end(), i); if (match) LIBHAT_UNLIKELY { return i; } diff --git a/src/arch/x86/SSE.cpp b/src/arch/x86/SSE.cpp index d13ac75..1a32762 100644 --- a/src/arch/x86/SSE.cpp +++ b/src/arch/x86/SSE.cpp @@ -28,11 +28,8 @@ namespace hat::detail { std::byte byteBuffer[16]{}; // The remaining signature bytes std::byte maskBuffer[16]{}; // A bitmask for the signature bytes we care about for (size_t i = 0; i < signature.size(); i++) { - auto e = signature[i]; - if (e.has_value()) { - byteBuffer[i] = *e; - maskBuffer[i] = std::byte{0xFFu}; - } + byteBuffer[i] = signature[i].value(); + maskBuffer[i] = signature[i].mask(); } bytes = _mm_loadu_si128(reinterpret_cast<__m128i*>(&byteBuffer)); mask = _mm_loadu_si128(reinterpret_cast<__m128i*>(&maskBuffer)); @@ -84,15 +81,13 @@ namespace hat::detail { const auto i = reinterpret_cast(&it) + offset - cmpIndex; if constexpr (veccmp) { const auto data = _mm_loadu_si128(reinterpret_cast(i)); - const auto cmpToSig = _mm_cmpeq_epi8(signatureBytes, data); - const auto matched = _mm_testc_si128(cmpToSig, signatureMask); - if (matched) LIBHAT_UNLIKELY { + const auto neqBits = _mm_xor_si128(data, signatureBytes); + const auto match = _mm_testz_si128(neqBits, signatureMask); + if (match) LIBHAT_UNLIKELY { return i; } } else { - auto match = std::equal(signature.begin(), signature.end(), i, [](auto opt, auto byte) { - return !opt.has_value() || *opt == byte; - }); + const auto match = std::equal(signature.begin(), signature.end(), i); if (match) LIBHAT_UNLIKELY { return i; } From 251d343df10597bfe8fa8e16967eaf4994fc8ea3 Mon Sep 17 00:00:00 2001 From: Brady Date: Wed, 15 Oct 2025 15:19:36 -0500 Subject: [PATCH 2/9] Support new syntax in signature parsing --- include/libhat/signature.hpp | 81 +++++++++++++++++++++++++++--------- include/libhat/strconv.hpp | 23 +++++----- 2 files changed, 73 insertions(+), 31 deletions(-) diff --git a/include/libhat/signature.hpp b/include/libhat/signature.hpp index 264fad5..1a6e2bb 100644 --- a/include/libhat/signature.hpp +++ b/include/libhat/signature.hpp @@ -20,9 +20,10 @@ LIBHAT_EXPORT namespace hat { /// Effectively std::optional, but with the added flexibility of being able to use std::bit_cast on /// instances of the class in constant expressions. struct signature_element { - constexpr signature_element() noexcept {} + constexpr signature_element() noexcept = default; constexpr signature_element(std::nullopt_t) noexcept {} - constexpr signature_element(const std::byte valueIn) noexcept : value_{valueIn}, mask_{0xFF} {} + constexpr signature_element(const std::byte value) noexcept : value_{value}, mask_{0xFF} {} + constexpr signature_element(const std::byte value, const std::byte mask) noexcept : value_{value}, mask_{mask} {} constexpr signature_element& operator=(std::nullopt_t) noexcept { return *this = signature_element{}; @@ -61,12 +62,12 @@ LIBHAT_EXPORT namespace hat { } [[nodiscard]] constexpr bool has(const uint8_t digit) const noexcept { - const uint8_t m = std::to_integer(this->mask_); + const auto m = std::to_integer(this->mask_); return (m & (1u << digit)) != 0; } [[nodiscard]] constexpr bool at(const uint8_t digit) const noexcept { - const uint8_t v = std::to_integer(this->value_); + const auto v = std::to_integer(this->value_); return (v & (1u << digit)) != 0; } @@ -132,26 +133,68 @@ LIBHAT_EXPORT namespace hat { empty_signature, }; - [[nodiscard]] LIBHAT_CONSTEXPR_RESULT result parse_signature_to(std::output_iterator auto out, std::string_view str) { + namespace detail { + + LIBHAT_CONSTEXPR_RESULT std::optional parse_signature_element(const std::string_view str, const uint8_t base) { + uint8_t value{}; + uint8_t mask{}; + for (auto& ch : str) { + value *= base; + mask *= base; + if (ch != '?') { + auto digit = hat::parse_int(&ch, &ch + 1, base); + if (!digit.has_value()) [[unlikely]] { + return std::nullopt; + } + value += digit.value(); + mask += base - 1; + } + } + + return signature_element{std::byte{value}, std::byte{mask}}; + } + } + + [[nodiscard]] LIBHAT_CONSTEXPR_RESULT result parse_signature_to(std::output_iterator auto out, const std::string_view str) { size_t written = 0; bool containsByte = false; - for (const auto& word : str | std::views::split(' ')) { - if (word.empty()) { - continue; - } - if (word[0] == '?') { - *out++ = signature_element{std::nullopt}; - written++; - } else { - const auto sv = std::string_view{word.begin(), word.end()}; - const auto parsed = parse_int(sv, 16); - if (parsed.has_value()) { - *out++ = signature_element{static_cast(parsed.value())}; + + for (auto&& sub : str | std::views::split(' ')) { + const std::string_view word{sub.begin(), sub.end()}; + switch (word.size()) { + case 0: { + continue; + } + case 1: { + if (word.front() != '?') { + return result_error{signature_parse_error::parse_error}; + } + *out++ = signature_element{std::nullopt}; written++; - } else { + break; + } + case 2: + case 8: { + const auto base = word.size() == 2 ? 16 : 2; + auto element = detail::parse_signature_element(word, base); + if (element) { + *out++ = *element; + written++; + + if (!containsByte && element->any()) { + if (!element->all()) { + return result_error{signature_parse_error::missing_byte}; + } + containsByte = true; + } + } else { + return result_error{signature_parse_error::parse_error}; + } + break; + } + default: { return result_error{signature_parse_error::parse_error}; } - containsByte = true; } } if (written == 0) { diff --git a/include/libhat/strconv.hpp b/include/libhat/strconv.hpp index bc4db80..f23d096 100644 --- a/include/libhat/strconv.hpp +++ b/include/libhat/strconv.hpp @@ -27,20 +27,20 @@ LIBHAT_EXPORT namespace hat { const int digits = base < 10 ? base : 10; const int letters = base > 10 ? base - 10 : 0; - for (auto iter = begin; iter != end; iter++) { - const char ch = *iter; - - if constexpr (std::is_signed_v) { - if (iter == begin) { - if (ch == '+') { - continue; - } else if (ch == '-') { - sign = -1; - continue; - } + auto iter = begin; + if constexpr (std::is_signed_v) { + if (iter != end) { + if (*iter == '+') { + iter++; + } else if (*iter == '-') { + sign = -1; + iter++; } } + } + for (; iter != end; iter++) { + const char ch = *iter; value *= base; if (ch >= '0' && ch < '0' + digits) { value += static_cast(ch - '0'); @@ -49,7 +49,6 @@ LIBHAT_EXPORT namespace hat { } else if (ch >= 'a' && ch < 'a' + letters) { value += static_cast(ch - 'a' + 10); } else { - // Throws an exception at runtime AND prevents constexpr evaluation return result_error{parse_int_error::illegal_char}; } } From ca01efac7a451c4340fbf80d496334769b3ac8d6 Mon Sep 17 00:00:00 2001 From: Brady Date: Wed, 15 Oct 2025 15:20:59 -0500 Subject: [PATCH 3/9] Fix warning --- include/libhat/signature.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/libhat/signature.hpp b/include/libhat/signature.hpp index 1a6e2bb..5f69757 100644 --- a/include/libhat/signature.hpp +++ b/include/libhat/signature.hpp @@ -175,7 +175,7 @@ LIBHAT_EXPORT namespace hat { } case 2: case 8: { - const auto base = word.size() == 2 ? 16 : 2; + const uint8_t base = word.size() == 2 ? 16 : 2; auto element = detail::parse_signature_element(word, base); if (element) { *out++ = *element; From aaa9e165f036fd73b6e5407f5340f2caffac0553 Mon Sep 17 00:00:00 2001 From: Brady Date: Fri, 17 Oct 2025 16:34:15 -0500 Subject: [PATCH 4/9] Fix ANOTHER warning --- include/libhat/signature.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/libhat/signature.hpp b/include/libhat/signature.hpp index 5f69757..2592074 100644 --- a/include/libhat/signature.hpp +++ b/include/libhat/signature.hpp @@ -147,7 +147,7 @@ LIBHAT_EXPORT namespace hat { return std::nullopt; } value += digit.value(); - mask += base - 1; + mask += static_cast(base - 1); } } From 24e33ab2bf7690d129820cada278c842507c6920 Mon Sep 17 00:00:00 2001 From: Brady Date: Fri, 17 Oct 2025 16:43:03 -0500 Subject: [PATCH 5/9] Ensure zeros for non-masked bits --- include/libhat/signature.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/libhat/signature.hpp b/include/libhat/signature.hpp index 2592074..338fde2 100644 --- a/include/libhat/signature.hpp +++ b/include/libhat/signature.hpp @@ -23,7 +23,7 @@ LIBHAT_EXPORT namespace hat { constexpr signature_element() noexcept = default; constexpr signature_element(std::nullopt_t) noexcept {} constexpr signature_element(const std::byte value) noexcept : value_{value}, mask_{0xFF} {} - constexpr signature_element(const std::byte value, const std::byte mask) noexcept : value_{value}, mask_{mask} {} + constexpr signature_element(const std::byte value, const std::byte mask) noexcept : value_{value & mask}, mask_{mask} {} constexpr signature_element& operator=(std::nullopt_t) noexcept { return *this = signature_element{}; From 3d2b422b9a47ad504755709302a5a73f9dbcf2ac Mon Sep 17 00:00:00 2001 From: Brady Date: Mon, 20 Oct 2025 21:54:02 -0500 Subject: [PATCH 6/9] Add benchmark compare against STL --- README.md | 50 +++++++++++++++++++++++--------------- test/benchmark/Compare.cpp | 49 ++++++++++++++++++++++++++++++++----- 2 files changed, 74 insertions(+), 25 deletions(-) diff --git a/README.md b/README.md index e644315..96cf5bf 100644 --- a/README.md +++ b/README.md @@ -18,27 +18,39 @@ may break at any time without the MAJOR version number being incremented. The table below compares the single threaded throughput in bytes/s (real time) between libhat and [two other](test/benchmark/vendor) commonly used implementations for pattern scanning. The input buffers were randomly generated using a fixed seed, and the pattern -scanned does not contain any match in the buffer. The benchmark was run on a system with -an i7-9700K (which supports libhat's [AVX2](src/arch/x86/AVX2.cpp) scanner implementation). +scanned does not contain any match in the buffer. The benchmark was compiled on Windows +with `clang-cl` 21.1.1, using the MSVC 14.44.35207 toolchain and the default release mode +flags (`/GR /EHsc /MD /O2 /Ob2`). The benchmark was run on a system with an i7-14700K +(supporting [AVX2](src/arch/x86/AVX2.cpp)) and 64GB (4x16GB) DDR5 6000 MT/s (30-38-38-96). The full source code is available [here](test/benchmark/Compare.cpp). ``` ---------------------------------------------------------------------------------------- -Benchmark Time CPU Iterations bytes_per_second ---------------------------------------------------------------------------------------- -BM_Throughput_Libhat/4MiB 131578 ns 48967 ns 21379 29.6876Gi/s -BM_Throughput_Libhat/16MiB 813977 ns 413524 ns 3514 19.1959Gi/s -BM_Throughput_Libhat/128MiB 6910936 ns 3993486 ns 403 18.0873Gi/s -BM_Throughput_Libhat/256MiB 13959379 ns 8121906 ns 202 17.9091Gi/s - -BM_Throughput_UC1/4MiB 4739731 ns 2776015 ns 591 843.93Mi/s -BM_Throughput_UC1/16MiB 19011485 ns 10841837 ns 147 841.597Mi/s -BM_Throughput_UC1/128MiB 152277511 ns 82465278 ns 18 840.571Mi/s -BM_Throughput_UC1/256MiB 304964544 ns 180555556 ns 9 839.442Mi/s - -BM_Throughput_UC2/4MiB 9633499 ns 4617698 ns 291 415.218Mi/s -BM_Throughput_UC2/16MiB 38507193 ns 22474315 ns 73 415.507Mi/s -BM_Throughput_UC2/128MiB 307989100 ns 164930556 ns 9 415.599Mi/s -BM_Throughput_UC2/256MiB 616449240 ns 331250000 ns 5 415.282Mi/s +--------------------------------------------------------------------------------------------------- +Benchmark Time CPU Iterations bytes_per_second +--------------------------------------------------------------------------------------------------- +BM_Throughput_libhat/4MiB 67686 ns 67816 ns 82254 57.7110Gi/s +BM_Throughput_libhat/16MiB 319801 ns 319558 ns 18287 48.8585Gi/s +BM_Throughput_libhat/128MiB 5325733 ns 5282315 ns 1056 23.4709Gi/s +BM_Throughput_libhat/256MiB 10921878 ns 10814951 ns 510 22.8898Gi/s + +BM_Throughput_std_search/4MiB 1364050 ns 1361672 ns 4108 2.86372Gi/s +BM_Throughput_std_search/16MiB 5470025 ns 5458783 ns 1019 2.85648Gi/s +BM_Throughput_std_search/128MiB 43622456 ns 43483527 ns 129 2.86550Gi/s +BM_Throughput_std_search/256MiB 88093320 ns 87158203 ns 64 2.83790Gi/s + +BM_Throughput_std_find_std_equal/4MiB 178567 ns 178586 ns 31410 21.8755Gi/s +BM_Throughput_std_find_std_equal/16MiB 806394 ns 805228 ns 7005 19.3764Gi/s +BM_Throughput_std_find_std_equal/128MiB 8944718 ns 8953652 ns 623 13.9747Gi/s +BM_Throughput_std_find_std_equal/256MiB 18092713 ns 18102751 ns 309 13.8177Gi/s + +BM_Throughput_UC1/4MiB 1727027 ns 1721236 ns 3268 2.26183Gi/s +BM_Throughput_UC1/16MiB 6878188 ns 6849054 ns 819 2.27167Gi/s +BM_Throughput_UC1/128MiB 55181849 ns 55300245 ns 102 2.26524Gi/s +BM_Throughput_UC1/256MiB 110209374 ns 110000000 ns 50 2.26841Gi/s + +BM_Throughput_UC2/4MiB 4011942 ns 4001524 ns 1394 997.023Mi/s +BM_Throughput_UC2/16MiB 16136510 ns 16166908 ns 346 991.540Mi/s +BM_Throughput_UC2/128MiB 130954740 ns 130087209 ns 43 977.437Mi/s +BM_Throughput_UC2/256MiB 261157833 ns 261160714 ns 21 980.250Mi/s ``` ## Platforms diff --git a/test/benchmark/Compare.cpp b/test/benchmark/Compare.cpp index 96a0cb8..5feec15 100644 --- a/test/benchmark/Compare.cpp +++ b/test/benchmark/Compare.cpp @@ -20,7 +20,7 @@ static auto gen_random_buffer(const size_t size) { return buffer; } -static void BM_Throughput_Libhat(benchmark::State& state) { +static void BM_Throughput_libhat(benchmark::State& state) { const size_t size = state.range(0); const auto buf = gen_random_buffer(size); const auto begin = std::to_address(buf.begin()); @@ -33,6 +33,34 @@ static void BM_Throughput_Libhat(benchmark::State& state) { state.SetBytesProcessed(static_cast(state.iterations() * size)); } +static void BM_Throughput_std_search(benchmark::State& state) { + const size_t size = state.range(0); + const auto buf = gen_random_buffer(size); + const auto begin = std::to_address(buf.begin()); + const auto end = std::to_address(buf.end()); + + const auto sig = hat::parse_signature(test_pattern).value(); + for (auto _ : state) { + benchmark::DoNotOptimize(std::search(begin, end, sig.begin(), sig.end())); + } + state.SetBytesProcessed(static_cast(state.iterations() * size)); +} + +static void BM_Throughput_std_find_std_equal(benchmark::State& state) { + const size_t size = state.range(0); + const auto buf = gen_random_buffer(size); + const auto begin = std::to_address(buf.begin()); + const auto end = std::to_address(buf.end()); + + // libhat's "Single" implementation uses std::find + std::equal + const auto sig = hat::parse_signature(test_pattern).value(); + const auto context = hat::detail::scan_context::create(sig, hat::scan_alignment::X1, hat::scan_hint::none); + for (auto _ : state) { + benchmark::DoNotOptimize(context.scan(begin, end)); + } + state.SetBytesProcessed(static_cast(state.iterations() * size)); +} + static void BM_Throughput_UC1(benchmark::State& state) { const size_t size = state.range(0); const auto buf = gen_random_buffer(size); @@ -58,11 +86,20 @@ static void BM_Throughput_UC2(benchmark::State& state) { state.SetBytesProcessed(static_cast(state.iterations() * size)); } -static int64_t rangeStart = 1 << 22; // 4 MiB -static int64_t rangeLimit = 1 << 28; // 256 MiB +static constexpr int64_t rangeStart = 1 << 22; // 4 MiB +static constexpr int64_t rangeLimit = 1 << 28; // 256 MiB + +#define LIBHAT_BENCHMARK(...) BENCHMARK(__VA_ARGS__) \ + ->Threads(1) \ + ->MinWarmUpTime(2) \ + ->MinTime(4) \ + ->Range(rangeStart, rangeLimit) \ + ->UseRealTime(); -BENCHMARK(BM_Throughput_Libhat)->Threads(1)->MinWarmUpTime(1)->MinTime(2)->Range(rangeStart, rangeLimit)->UseRealTime(); -BENCHMARK(BM_Throughput_UC1)->Threads(1)->MinWarmUpTime(1)->MinTime(2)->Range(rangeStart, rangeLimit)->UseRealTime(); -BENCHMARK(BM_Throughput_UC2)->Threads(1)->MinWarmUpTime(1)->MinTime(2)->Range(rangeStart, rangeLimit)->UseRealTime(); +LIBHAT_BENCHMARK(BM_Throughput_libhat); +LIBHAT_BENCHMARK(BM_Throughput_std_search); +LIBHAT_BENCHMARK(BM_Throughput_std_find_std_equal); +LIBHAT_BENCHMARK(BM_Throughput_UC1); +LIBHAT_BENCHMARK(BM_Throughput_UC2); BENCHMARK_MAIN(); From 3cf6a495c7405cf2ad380108af701e8557d9e684 Mon Sep 17 00:00:00 2001 From: Brady Date: Tue, 21 Oct 2025 15:11:58 -0500 Subject: [PATCH 7/9] Update README with new syntax --- README.md | 56 +++++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 54 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 96cf5bf..57174a7 100644 --- a/README.md +++ b/README.md @@ -72,16 +72,53 @@ Below is a summary of the support of libhat OS APIs on various platforms: | `hp::module::for_each_segment` | ✅ | ✅ | | ## Quick start -### Pattern scanning +### Defining patterns +libhat's signature syntax consists of space-delimited tokens and is backwards compatible with IDA syntax: + +- 8 character sequences are interpreted as binary +- 2 character sequences are interpreted as hex +- 1 character must be a wildcard (`?`) + +Any digit can be substituted for a wildcard, for example: +- `????1111` is a binary sequence, and matches any byte with all ones in the lower nibble +- `A?` is a hex sequence, and matches any byte of the form `1010????` +- Both `????????` and `??` are equivalent to `?`, and will match any byte + +A complete pattern might look like `AB ? 12 ?3`. This matches any 4-byte +subrange `s` for which all the following conditions are met: +- `s[0] == 0xAB` +- `s[2] == 0x12` +- `s[3] & 0x0F == 0x03` + +Due to how various scanning algorithms are implemented, there are some restrictions when defining a pattern: + +1) A pattern must contain at least one fully masked byte (i.e. `AB` or `10011001`) +2) The first byte with a non-zero mask must have a full mask + - `?1 02` is disallowed + - `01 02` is allowed + - `?? 01` is allowed + +In code, there are a few to initialize a signature from its string representation: + ```cpp #include // Parse a pattern's string representation to an array of bytes at compile time constexpr hat::fixed_signature pattern = hat::compile_signature<"48 8D 05 ? ? ? ? E8">(); -// ...or parse it at runtime +// Parse using the UDLs at compile time +using namespace hat::literals; +constexpr hat::fixed_signature pattern = "48 8D 05 ? ? ? ? E8"_sig; // stack owned +constexpr hat::signature_view pattern = "48 8D 05 ? ? ? ? E8"_sigv; // static lifetime + +// Parse it at runtime using parsed_t = hat::result; parsed_t runtime_pattern = hat::parse_signature("48 8D 05 ? ? ? ? E8"); +``` + +### Scanning patterns +```cpp +#include // Scan for this pattern using your CPU's vectorization features auto begin = /* a contiguous iterator over std::byte */; @@ -109,6 +146,21 @@ const std::byte* address = result.get(); const std::byte* relative_address = result.rel(3); ``` +libhat has a few optimizations for searching for patterns in `x86_64` machine code: +```cpp +#include + +// If a byte pattern matches at the start of a function, the result will be aligned on 16-bytes. +// This can be indicated via the defaulted `alignment` parameter (all overloads have this parameter): +std::span range = /* ... */; +hat::signature_view pattern = /* ... */; +hat::scan_result result = hat::find_pattern(range, pattern, hat::scan_alignment::X16); + +// Additionally, x86_64 contains a non-uniform distribution of byte pairs. By passing the `x86_64` +// scan hint, the search can be based on the least common byte pair that is found in the pattern. +hat::scan_result result = hat::find_pattern(range, pattern, hat::scan_alignment::X1, hat::scan_hint::x86_64); +``` + ### Accessing offsets ```cpp #include From cec7d949fcd81ddbc59f5ca248fb3ed463c2c362 Mon Sep 17 00:00:00 2001 From: Brady Date: Tue, 21 Oct 2025 15:14:19 -0500 Subject: [PATCH 8/9] Fix wording --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 57174a7..7075c2e 100644 --- a/README.md +++ b/README.md @@ -98,7 +98,7 @@ Due to how various scanning algorithms are implemented, there are some restricti - `01 02` is allowed - `?? 01` is allowed -In code, there are a few to initialize a signature from its string representation: +In code, there are a few ways to initialize a signature from its string representation: ```cpp #include From bcc78b8e97f766307bbf8df58304efbb1b2825e7 Mon Sep 17 00:00:00 2001 From: Brady Hahn Date: Tue, 21 Oct 2025 18:09:00 -0500 Subject: [PATCH 9/9] Update README.md Co-authored-by: CrackedMatter <81803926+CrackedMatter@users.noreply.github.com> --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 7075c2e..a09827e 100644 --- a/README.md +++ b/README.md @@ -109,7 +109,7 @@ constexpr hat::fixed_signature pattern = hat::compile_signature<"48 8D 05 ? ? ? // Parse using the UDLs at compile time using namespace hat::literals; constexpr hat::fixed_signature pattern = "48 8D 05 ? ? ? ? E8"_sig; // stack owned -constexpr hat::signature_view pattern = "48 8D 05 ? ? ? ? E8"_sigv; // static lifetime +constexpr hat::signature_view pattern = "48 8D 05 ? ? ? ? E8"_sigv; // static lifetime (requires C++23) // Parse it at runtime using parsed_t = hat::result;