diff --git a/benchmark/0022.from_chars/CMakeLists.txt b/benchmark/0022.from_chars/CMakeLists.txt index ca3b4791..9b3cb037 100644 --- a/benchmark/0022.from_chars/CMakeLists.txt +++ b/benchmark/0022.from_chars/CMakeLists.txt @@ -10,7 +10,6 @@ FetchContent_MakeAvailable(fast_float) add_executable(benchmark.0022.from_chars ${CMAKE_CURRENT_LIST_DIR}/atoi_vs_from_chars.cc) target_include_directories(benchmark.0022.from_chars PRIVATE ${CMAKE_SOURCE_DIR}/include) - target_compile_features(benchmark.0022.from_chars PRIVATE cxx_std_20) # Prefer official target if provided by fast_float; otherwise include headers directly @@ -22,3 +21,16 @@ else() target_include_directories(benchmark.0022.from_chars PRIVATE ${fast_float_SOURCE_DIR}/include) endif() endif() + +add_executable(benchmark.0022.from_chars_hex ${CMAKE_CURRENT_LIST_DIR}/atoi_vs_from_chars_hex.cc) +target_include_directories(benchmark.0022.from_chars_hex PRIVATE ${CMAKE_SOURCE_DIR}/include) +target_compile_features(benchmark.0022.from_chars_hex PRIVATE cxx_std_20) + +if (TARGET fast_float::fast_float) + target_link_libraries(benchmark.0022.from_chars_hex PRIVATE fast_float::fast_float) +else() + FetchContent_GetProperties(fast_float) + if (fast_float_SOURCE_DIR) + target_include_directories(benchmark.0022.from_chars_hex PRIVATE ${fast_float_SOURCE_DIR}/include) + endif() +endif() diff --git a/benchmark/0022.from_chars/atoi_vs_from_chars.cc b/benchmark/0022.from_chars/atoi_vs_from_chars.cc index 7c8b313f..419248db 100644 --- a/benchmark/0022.from_chars/atoi_vs_from_chars.cc +++ b/benchmark/0022.from_chars/atoi_vs_from_chars.cc @@ -6,9 +6,24 @@ #include #include #include +#include using namespace fast_io::io; +// NOTE: +// This benchmark compares the core integer parsing routines under identical preconditions. +// For each line, the pointer `p` is positioned at the first decimal digit; there is no +// leading whitespace, sign character, or base prefix in the [p, end) slice. +// The fast_io branch calls +// scan_int_contiguous_none_simd_space_part_define_impl<10, char>(p, end, v); +// the std::from_chars and fast_float::from_chars integer overloads are invoked on the same +// [p, end) range. +// By specification, std::from_chars and fast_float::from_chars for integer types do not +// skip leading whitespace, and scan_int_contiguous_none_simd_space_part_define_impl makes +// the same assumption that any preceding whitespace has already been consumed. Thus the +// starting conditions and termination rules are fully aligned, providing a fair comparison +// of "decimal digit substring → uint64_t" parsing performance. + static std::string make_numbers_buffer(std::size_t n) { std::string s; @@ -26,6 +41,71 @@ static std::string make_numbers_buffer(std::size_t n) return s; } +static std::string make_fixed_digits_numbers_buffer(std::size_t digits, std::size_t n) +{ + constexpr std::uint64_t pow10[20]{ + 1ull, + 10ull, + 100ull, + 1000ull, + 10000ull, + 100000ull, + 1000000ull, + 10000000ull, + 100000000ull, + 1000000000ull, + 10000000000ull, + 100000000000ull, + 1000000000000ull, + 10000000000000ull, + 100000000000000ull, + 1000000000000000ull, + 10000000000000000ull, + 100000000000000000ull, + 1000000000000000000ull, + 10000000000000000000ull}; + + if (digits == 0 || digits > 20) + { + return {}; + } + + std::string s; + s.reserve(n * (digits + 1)); + + std::uint64_t lo{}; + std::uint64_t count{}; + + if (digits == 1) + { + lo = 0; + count = 10; + } + else if (digits < 20) + { + lo = pow10[digits - 1]; + count = pow10[digits] - lo; + } + else + { + lo = pow10[19]; + count = (std::numeric_limits::max)() - lo + 1; + } + + for (std::size_t i{}; i != n; ++i) + { + auto old = s.size(); + s.resize(old + 32); + auto *first = s.data() + old; + auto *last = s.data() + s.size(); + std::uint64_t value = lo + static_cast(i % count); + auto res = std::to_chars(first, last - 1, value); + *res.ptr = '\n'; + s.resize(static_cast(res.ptr - s.data() + 1)); + } + return s; +} + int main() { constexpr std::size_t N = 10'000'000; @@ -39,7 +119,7 @@ int main() { lines += (*p == '\n'); } - fast_io::println("lines=", lines); + fast_io::perrln("lines=", lines); } // atoi @@ -85,7 +165,60 @@ int main() } - // fast_io char_digit_to_literal + // fast_io core sto (dec) - scalar/SWAR path: + // scan_int_contiguous_none_simd_space_part_define_impl (no SSE4.1 fast path) + { + fast_io::timer t(u8"fastio_scan_int_none_simd_dec"); + std::uint64_t sum{}; + char const *p = begin; + while (p < end) + { + std::uint64_t v{}; + auto res = ::fast_io::details::scan_int_contiguous_none_simd_space_part_define_impl<10, char>( + p, end, v); + if (res.code != fast_io::parse_code::ok) + { + break; + } + sum += v; + p = res.iter; + if (p < end && *p == '\n') + { + ++p; + } + } + std::uint64_t volatile sink = sum; + (void)sink; + } + +#if defined(__SSE4_1__) && (defined(__x86_64__) || defined(_M_AMD64)) + // fast_io core sto (dec) - SSE4.1-accelerated path: + // scan_int_contiguous_none_space_part_define_impl (may use sse_parse for base-10) + { + fast_io::timer t(u8"fastio_scan_int_sse4_dec"); + std::uint64_t sum{}; + char const *p = begin; + while (p < end) + { + std::uint64_t v{}; + auto res = ::fast_io::details::scan_int_contiguous_none_space_part_define_impl<10>(p, end, v); + if (res.code != fast_io::parse_code::ok) + { + break; + } + sum += v; + p = res.iter; + if (p < end && *p == '\n') + { + ++p; + } + } + std::uint64_t volatile sink = sum; + (void)sink; + } +#endif + + // fast_io char_digit_to_literal (hex) { fast_io::timer t(u8"fastio_char_digit_to_literal"); std::uint64_t sum{}; @@ -112,6 +245,7 @@ int main() (void)sink; } + // fast_float { fast_io::timer t(u8"fast_float_from_chars"); @@ -132,4 +266,165 @@ int main() (void)sink; } + // Per-digit decimal benchmarks: 1-digit up to theoretical max decimal digits of uint64_t (20) + { + constexpr std::size_t max_digits = 20; + for (std::size_t digits = 1; digits <= max_digits; ++digits) + { + auto buf_fixed = make_fixed_digits_numbers_buffer(digits, N); + char const *fixed_begin = buf_fixed.data(); + char const *fixed_end = buf_fixed.data() + buf_fixed.size(); + + fast_io::perrln("\n\nfixed_digits=", digits, " lines=", N); + + { + std::size_t lines{}; + for (char const *p = fixed_begin; p < fixed_end; ++p) + { + lines += (*p == '\n'); + } + fast_io::perrln("lines=", lines); + } + + // atoi on fixed-width decimal substrings + { + fast_io::timer t(u8"atoi_fixed"); + std::uint64_t sum{}; + char const *p = fixed_begin; + while (p < fixed_end) + { + int v = std::atoi(p); + sum += static_cast(v); + while (p < fixed_end && *p >= '0' && *p <= '9') + { + ++p; + } + if (p < fixed_end && *p == '\n') + { + ++p; + } + } + std::uint64_t volatile sink = sum; + (void)sink; + } + + // std::from_chars on fixed-width decimal substrings + { + fast_io::timer t(u8"std_from_chars_fixed"); + std::uint64_t sum{}; + char const *p = fixed_begin; + while (p < fixed_end) + { + std::uint64_t v{}; + auto res = std::from_chars(p, fixed_end, v); + sum += v; + p = res.ptr; + if (p < fixed_end && *p == '\n') + { + ++p; + } + } + std::uint64_t volatile sink = sum; + (void)sink; + } + + // fast_io char_digit_to_literal on fixed-width decimal substrings + { + fast_io::timer t(u8"fastio_char_digit_to_literal_fixed"); + std::uint64_t sum{}; + char const *p = fixed_begin; + while (p < fixed_end) + { + using UCh = std::make_unsigned_t; + std::uint64_t v{}; + char const *q = p; + while (q < fixed_end && *q != '\n') + { + UCh ch = static_cast(*q); + if (fast_io::details::char_digit_to_literal<10, char>(ch)) + { + break; + } + v = v * 10 + static_cast(ch); + ++q; + } + sum += v; + p = (q < fixed_end ? q + 1 : q); + } + std::uint64_t volatile sink = sum; + (void)sink; + } + + // fast_io core sto (dec) - scalar/SWAR path on fixed-width decimal substrings + { + fast_io::timer t(u8"fastio_scan_int_none_simd_dec_fixed"); + std::uint64_t sum{}; + char const *p = fixed_begin; + while (p < fixed_end) + { + std::uint64_t v{}; + auto res = ::fast_io::details::scan_int_contiguous_none_simd_space_part_define_impl<10, char>( + p, fixed_end, v); + if (res.code != fast_io::parse_code::ok) + { + break; + } + sum += v; + p = res.iter; + if (p < fixed_end && *p == '\n') + { + ++p; + } + } + std::uint64_t volatile sink = sum; + (void)sink; + } + +#if defined(__SSE4_1__) && (defined(__x86_64__) || defined(_M_AMD64)) + // fast_io core sto (dec) - SSE4.1-accelerated path on fixed-width decimal substrings + { + fast_io::timer t(u8"fastio_scan_int_sse4_dec_fixed"); + std::uint64_t sum{}; + char const *p = fixed_begin; + while (p < fixed_end) + { + std::uint64_t v{}; + auto res = ::fast_io::details::scan_int_contiguous_none_space_part_define_impl<10>(p, fixed_end, v); + if (res.code != fast_io::parse_code::ok) + { + break; + } + sum += v; + p = res.iter; + if (p < fixed_end && *p == '\n') + { + ++p; + } + } + std::uint64_t volatile sink = sum; + (void)sink; + } +#endif + + // fast_float integer from_chars on fixed-width decimal substrings + { + fast_io::timer t(u8"fast_float_from_chars_fixed"); + std::uint64_t sum{}; + char const *p = fixed_begin; + while (p < fixed_end) + { + std::uint64_t v{}; + auto res = fast_float::from_chars(p, fixed_end, v); + sum += v; + p = res.ptr; + if (p < fixed_end && *p == '\n') + { + ++p; + } + } + std::uint64_t volatile sink = sum; + (void)sink; + } + } + } } diff --git a/benchmark/0022.from_chars/atoi_vs_from_chars_hex.cc b/benchmark/0022.from_chars/atoi_vs_from_chars_hex.cc new file mode 100644 index 00000000..4ad5b03c --- /dev/null +++ b/benchmark/0022.from_chars/atoi_vs_from_chars_hex.cc @@ -0,0 +1,392 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace fast_io::io; + +// NOTE: +// This benchmark compares core integer parsing in base 16 under aligned conditions. +// For each line, the pointer `p` is positioned at the first hexadecimal digit; there is no +// leading whitespace or base prefix in the [p, end) slice, and the 0–9/A–F/a–f pattern of +// each line is identical for all libraries. Newline characters '\n' are skipped manually. +// The fast_io branch calls +// scan_int_contiguous_none_simd_space_part_define_impl<16, char>(p, end, v); +// the std::from_chars and fast_float::from_chars integer overloads are invoked on the same +// [p, end) range. +// As with the decimal benchmark, std::from_chars and fast_float::from_chars for integers +// do not skip leading whitespace, and scan_int_contiguous_none_simd_space_part_define_impl +// assumes that any preceding whitespace has already been consumed. The preconditions are +// therefore identical, making this a fair comparison of "hex digit substring → uint64_t" +// parsing performance. + +static std::string make_hex_numbers_buffer(std::size_t n) +{ + std::string s; + s.reserve(n * 8); + for (std::size_t i{}; i != n; ++i) + { + auto old = s.size(); + s.resize(old + 32); + auto *first = s.data() + old; + auto *last = s.data() + s.size(); + auto res = std::to_chars(first, last - 1, i, 16); + // mix lowercase/uppercase hex digits in the buffer + if ((i & 1u) != 0u) + { + for (auto p = first; p != res.ptr; ++p) + { + if (*p >= 'a' && *p <= 'f') + { + *p = static_cast(*p - 'a' + 'A'); + } + } + } + *res.ptr = '\n'; + s.resize(static_cast(res.ptr - s.data() + 1)); + } + return s; +} + +static std::string make_fixed_hex_numbers_buffer(std::size_t digits, std::size_t n) +{ + if (digits == 0 || digits > 16) + { + return {}; + } + + constexpr std::uint64_t pow16[16]{ + 1ull, + 16ull, + 256ull, + 4096ull, + 65536ull, + 1048576ull, + 16777216ull, + 268435456ull, + 4294967296ull, + 68719476736ull, + 1099511627776ull, + 17592186044416ull, + 281474976710656ull, + 4503599627370496ull, + 72057594037927936ull, + 1152921504606846976ull}; + + std::string s; + s.reserve(n * (digits + 1)); + + std::uint64_t lo{}; + std::uint64_t hi{}; + + if (digits == 1) + { + lo = 0; + hi = 0xFu; + } + else if (digits < 16) + { + lo = pow16[digits - 1]; + hi = pow16[digits] - 1; + } + else + { + lo = pow16[15]; + hi = (std::numeric_limits::max)(); + } + + std::uint64_t count = hi - lo + 1; + + for (std::size_t i{}; i != n; ++i) + { + auto old = s.size(); + s.resize(old + 32); + auto *first = s.data() + old; + auto *last = s.data() + s.size(); + std::uint64_t value = lo + static_cast(i % count); + auto res = std::to_chars(first, last - 1, value, 16); + // mix lowercase/uppercase hex digits in the buffer + if ((value & 1u) != 0u) + { + for (auto p = first; p != res.ptr; ++p) + { + if (*p >= 'a' && *p <= 'f') + { + *p = static_cast(*p - 'a' + 'A'); + } + } + } + *res.ptr = '\n'; + s.resize(static_cast(res.ptr - s.data() + 1)); + } + return s; +} + +int main() +{ + constexpr std::size_t N = 10'000'000; + auto buf = make_hex_numbers_buffer(N); + char const *begin = buf.data(); + char const *end = buf.data() + buf.size(); + + { + std::size_t lines{}; + for (char const *p = begin; p < end; ++p) + { + lines += (*p == '\n'); + } + fast_io::perrln("lines=", lines); + } + + // strtoul (hex) + { + fast_io::timer t(u8"strtoul_hex"); + std::uint64_t sum{}; + char const *p = begin; + while (p < end) + { + char *endptr{}; + auto v = std::strtoul(p, &endptr, 16); + sum += static_cast(v); + p = endptr; + if (p < end && *p == '\n') + { + ++p; + } + } + std::uint64_t volatile sink = sum; + (void)sink; + } + + // std::from_chars (hex) + { + fast_io::timer t(u8"std_from_chars_hex"); + std::uint64_t sum{}; + char const *p = begin; + while (p < end) + { + std::uint64_t v{}; + auto res = std::from_chars(p, end, v, 16); + sum += v; + p = res.ptr; + if (p < end && *p == '\n') + { + ++p; + } + } + std::uint64_t volatile sink = sum; + (void)sink; + } + + // fast_io core sto (hex) - scan_int_contiguous_none_simd_space_part_define_impl + { + fast_io::timer t(u8"fastio_scan_int_none_simd_hex (SIMT)"); + std::uint64_t sum{}; + char const *p = begin; + while (p < end) + { + std::uint64_t v{}; + auto res = ::fast_io::details::scan_int_contiguous_none_simd_space_part_define_impl<16, char>( + p, end, v); + if (res.code != fast_io::parse_code::ok) + { + break; + } + sum += v; + p = res.iter; + if (p < end && *p == '\n') + { + ++p; + } + } + std::uint64_t volatile sink = sum; + (void)sink; + } + + // fast_io char_digit_to_literal (hex) + { + fast_io::timer t(u8"fastio_char_digit_to_literal_hex"); + std::uint64_t sum{}; + char const *p = begin; + while (p < end) + { + using UCh = std::make_unsigned_t; + std::uint64_t v{}; + char const *q = p; + while (q < end && *q != '\n') + { + UCh ch = static_cast(*q); + if (fast_io::details::char_digit_to_literal<16, char>(ch)) + { + break; + } + v = (v << 4) + static_cast(ch); + ++q; + } + sum += v; + p = (q < end ? q + 1 : q); + } + std::uint64_t volatile sink = sum; + (void)sink; + } + + // fast_float (hex) + { + fast_io::timer t(u8"fast_float_from_chars_hex"); + std::uint64_t sum{}; + char const *p = begin; + while (p < end) + { + std::uint64_t v{}; + auto res = fast_float::from_chars(p, end, v, 16); + sum += v; + p = res.ptr; + if (p < end && *p == '\n') + { + ++p; + } + } + std::uint64_t volatile sink = sum; + (void)sink; + } + + // Per-digit hex benchmarks: 1-digit up to theoretical max hexadecimal digits of uint64_t (16) + { + constexpr std::size_t max_hex_digits = 16; + for (std::size_t digits = 1; digits <= max_hex_digits; ++digits) + { + auto buf_fixed = make_fixed_hex_numbers_buffer(digits, N); + char const *fixed_begin = buf_fixed.data(); + char const *fixed_end = buf_fixed.data() + buf_fixed.size(); + + fast_io::perrln("\n\nfixed_hex_digits=", digits, " lines=", N); + + { + std::size_t lines{}; + for (char const *p = fixed_begin; p < fixed_end; ++p) + { + lines += (*p == '\n'); + } + fast_io::perrln("lines=", lines); + } + + // strtoul (hex) on fixed-width hex substrings + { + fast_io::timer t(u8"strtoul_hex_fixed"); + std::uint64_t sum{}; + char const *p = fixed_begin; + while (p < fixed_end) + { + char *endptr{}; + auto v = std::strtoul(p, &endptr, 16); + sum += static_cast(v); + p = endptr; + if (p < fixed_end && *p == '\n') + { + ++p; + } + } + std::uint64_t volatile sink = sum; + (void)sink; + } + + // std::from_chars (hex) on fixed-width hex substrings + { + fast_io::timer t(u8"std_from_chars_hex_fixed"); + std::uint64_t sum{}; + char const *p = fixed_begin; + while (p < fixed_end) + { + std::uint64_t v{}; + auto res = std::from_chars(p, fixed_end, v, 16); + sum += v; + p = res.ptr; + if (p < fixed_end && *p == '\n') + { + ++p; + } + } + std::uint64_t volatile sink = sum; + (void)sink; + } + + // fast_io char_digit_to_literal (hex) on fixed-width hex substrings + { + fast_io::timer t(u8"fastio_char_digit_to_literal_hex_fixed"); + std::uint64_t sum{}; + char const *p = fixed_begin; + while (p < fixed_end) + { + using UCh = std::make_unsigned_t; + std::uint64_t v{}; + char const *q = p; + while (q < fixed_end && *q != '\n') + { + UCh ch = static_cast(*q); + if (fast_io::details::char_digit_to_literal<16, char>(ch)) + { + break; + } + v = (v << 4) + static_cast(ch); + ++q; + } + sum += v; + p = (q < fixed_end ? q + 1 : q); + } + std::uint64_t volatile sink = sum; + (void)sink; + } + + // fast_io core sto (hex) on fixed-width hex substrings + { + fast_io::timer t(u8"fastio_scan_int_none_simd_hex_fixed"); + std::uint64_t sum{}; + char const *p = fixed_begin; + while (p < fixed_end) + { + std::uint64_t v{}; + auto res = ::fast_io::details::scan_int_contiguous_none_simd_space_part_define_impl<16, char>( + p, fixed_end, v); + if (res.code != fast_io::parse_code::ok) + { + break; + } + sum += v; + p = res.iter; + if (p < fixed_end && *p == '\n') + { + ++p; + } + } + std::uint64_t volatile sink = sum; + (void)sink; + } + + // fast_float (hex) on fixed-width hex substrings + { + fast_io::timer t(u8"fast_float_from_chars_hex_fixed"); + std::uint64_t sum{}; + char const *p = fixed_begin; + while (p < fixed_end) + { + std::uint64_t v{}; + auto res = fast_float::from_chars(p, fixed_end, v, 16); + sum += v; + p = res.ptr; + if (p < fixed_end && *p == '\n') + { + ++p; + } + } + std::uint64_t volatile sink = sum; + (void)sink; + } + } + } +} diff --git a/third-party/googletest b/third-party/googletest new file mode 160000 index 00000000..fa8438ae --- /dev/null +++ b/third-party/googletest @@ -0,0 +1 @@ +Subproject commit fa8438ae6b70c57010177de47a9f13d7041a6328