diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 643e96e..6176ab8 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -86,6 +86,15 @@ jobs: run: | rustup toolchain install ${{ matrix.rust-version }} rustup default ${{ matrix.rust-version }} + - uses: actions/setup-go@v6 + with: + go-version: stable + cache-dependency-path: | + tools/snapshots/go/go.sum + - name: Generate test snapshots + shell: bash + working-directory: tools/snapshots/go + run: go test -v ./... - name: Build run: cargo build --workspace --all-features --bins --tests --examples --benches --lib - name: Run unit tests diff --git a/tests/hll_serialization_test.rs b/tests/hll_serialization_test.rs index 23e9973..1438868 100644 --- a/tests/hll_serialization_test.rs +++ b/tests/hll_serialization_test.rs @@ -18,8 +18,10 @@ //! HLL Sketch Serialization Compatibility Tests //! //! These tests verify binary compatibility with Apache DataSketches implementations: +//! //! - Java (datasketches-java) //! - C++ (datasketches-cpp) +//! - Go (datasketches-go) //! //! Test data is generated by the reference implementations and stored in: //! `tests/serialization_test_data/` @@ -41,7 +43,16 @@ fn get_test_data_path(sub_dir: &str, name: &str) -> PathBuf { fn test_sketch_file(path: PathBuf, expected_cardinality: usize, expected_lg_k: u8) { let expected = expected_cardinality as f64; - let bytes = fs::read(&path).unwrap(); + let bytes = match fs::read(&path) { + Ok(bytes) => bytes, + Err(err) if err.kind() == std::io::ErrorKind::NotFound => { + panic!( + "snapshot {} not found; generate it with ...", + path.display() + ); + } + Err(err) => panic!("failed to read test data file {}: {}", path.display(), err), + }; let sketch1 = HllSketch::deserialize(&bytes).unwrap(); let estimate1 = sketch1.estimate(); @@ -177,6 +188,39 @@ fn test_cpp_hll8_compatibility() { } } +#[test] +fn test_go_hll4_compatibility() { + let test_cases = [0, 1, 10, 100, 1000, 10000, 100000, 1000000]; + + for n in test_cases { + let filename = format!("hll4_n{}_go.sk", n); + let path = get_test_data_path("go_generated_files", &filename); + test_sketch_file(path, n, 12); + } +} + +#[test] +fn test_go_hll6_compatibility() { + let test_cases = [0, 1, 10, 100, 1000, 10000, 100000, 1000000]; + + for n in test_cases { + let filename = format!("hll6_n{}_go.sk", n); + let path = get_test_data_path("go_generated_files", &filename); + test_sketch_file(path, n, 12); + } +} + +#[test] +fn test_go_hll8_compatibility() { + let test_cases = [0, 1, 10, 100, 1000, 10000, 100000, 1000000]; + + for n in test_cases { + let filename = format!("hll8_n{}_go.sk", n); + let path = get_test_data_path("go_generated_files", &filename); + test_sketch_file(path, n, 12); + } +} + #[test] fn test_estimate_accuracy() { // This test verifies and prints actual estimates to show accuracy diff --git a/tests/serialization_test_data/go_generated_files/.gitignore b/tests/serialization_test_data/go_generated_files/.gitignore new file mode 100644 index 0000000..511d7aa --- /dev/null +++ b/tests/serialization_test_data/go_generated_files/.gitignore @@ -0,0 +1 @@ +*.sk diff --git a/tools/snapshots/go/go.mod b/tools/snapshots/go/go.mod new file mode 100644 index 0000000..1212375 --- /dev/null +++ b/tools/snapshots/go/go.mod @@ -0,0 +1,15 @@ +module datasketches.apache.org/tools + +go 1.25.5 + +require ( + github.com/apache/datasketches-go v0.0.0-20251212084617-f7bc4b1db865 + github.com/stretchr/testify v1.11.1 +) + +require ( + github.com/davecgh/go-spew v1.1.1 // indirect + github.com/pmezard/go-difflib v1.0.0 // indirect + github.com/twmb/murmur3 v1.1.8 // indirect + gopkg.in/yaml.v3 v3.0.1 // indirect +) diff --git a/tools/snapshots/go/go.sum b/tools/snapshots/go/go.sum new file mode 100644 index 0000000..de25bdf --- /dev/null +++ b/tools/snapshots/go/go.sum @@ -0,0 +1,14 @@ +github.com/apache/datasketches-go v0.0.0-20251212084617-f7bc4b1db865 h1:C8vuEhEBC2udZqWnx40Li2JkDw0NRyqJDycjedrauLw= +github.com/apache/datasketches-go v0.0.0-20251212084617-f7bc4b1db865/go.mod h1:tWCpdsIfpbCQH04ZV3mUb6RyIfovjjNwHn/dGT8wrTw= +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U= +github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U= +github.com/twmb/murmur3 v1.1.8 h1:8Yt9taO/WN3l08xErzjeschgZU2QSrwm1kclYq+0aRg= +github.com/twmb/murmur3 v1.1.8/go.mod h1:Qq/R7NUyOfr65zD+6Q5IHKsJLwP7exErjN6lyyq3OSQ= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/tools/snapshots/go/tools_test.go b/tools/snapshots/go/tools_test.go new file mode 100644 index 0000000..99fdf04 --- /dev/null +++ b/tools/snapshots/go/tools_test.go @@ -0,0 +1,54 @@ +package tools + +import ( + "fmt" + "os" + "path/filepath" + "testing" + + "github.com/apache/datasketches-go/hll" + "github.com/stretchr/testify/require" +) + +const defaultLgK = 12 + +func TestGenerateGoFile(t *testing.T) { + path, err := os.Getwd() + require.NoError(t, err) + + workspace := filepath.Join(path, "..", "..", "..") + target := filepath.Join(workspace, "tests", "serialization_test_data", "go_generated_files") + + nArr := []int{0, 1, 10, 100, 1000, 10000, 100000, 1000000} + for _, n := range nArr { + hll4, err := hll.NewHllSketch(defaultLgK, hll.TgtHllTypeHll4) + require.NoError(t, err) + hll6, err := hll.NewHllSketch(defaultLgK, hll.TgtHllTypeHll6) + require.NoError(t, err) + hll8, err := hll.NewHllSketch(defaultLgK, hll.TgtHllTypeHll8) + require.NoError(t, err) + + for i := 0; i < n; i++ { + require.NoError(t, hll4.UpdateUInt64(uint64(i))) + require.NoError(t, hll6.UpdateUInt64(uint64(i))) + require.NoError(t, hll8.UpdateUInt64(uint64(i))) + } + err = os.MkdirAll(target, os.ModePerm) + require.NoError(t, err) + + sl4, err := hll4.ToCompactSlice() + require.NoError(t, err) + err = os.WriteFile(fmt.Sprintf("%s/hll4_n%d_go.sk", target, n), sl4, 0644) + require.NoError(t, err) + + sl6, err := hll6.ToCompactSlice() + require.NoError(t, err) + err = os.WriteFile(fmt.Sprintf("%s/hll6_n%d_go.sk", target, n), sl6, 0644) + require.NoError(t, err) + + sl8, err := hll8.ToCompactSlice() + require.NoError(t, err) + err = os.WriteFile(fmt.Sprintf("%s/hll8_n%d_go.sk", target, n), sl8, 0644) + require.NoError(t, err) + } +}