diff --git a/cobs/construction/classic_index.cpp b/cobs/construction/classic_index.cpp index 8a09904..5f980f2 100644 --- a/cobs/construction/classic_index.cpp +++ b/cobs/construction/classic_index.cpp @@ -322,6 +322,7 @@ void classic_combine_streams( t.active("write"); ofs.write(out_block.data(), new_row_bytes * this_batch); + std::fill(out_block.begin(), out_block.end(), '\0'); } t.stop(); } diff --git a/tests/classic_index_construction.cpp b/tests/classic_index_construction.cpp index c092f33..d675734 100644 --- a/tests/classic_index_construction.cpp +++ b/tests/classic_index_construction.cpp @@ -6,6 +6,9 @@ * All rights reserved. Published under the MIT License in the LICENSE file. ******************************************************************************/ +#include +#include + #include "test_util.hpp" #include #include @@ -21,6 +24,26 @@ static fs::path index_dir = base_dir / "index"; static fs::path index_file = base_dir / "index.cobs_classic"; static fs::path tmp_path = base_dir / "tmp"; +// Compare two files. Return true if the contents of both files are the same. +bool compare_files(const std::string& filename1, const std::string& filename2) +{ + std::ifstream file1(filename1, std::ifstream::ate | std::ifstream::binary); //open file at the end + std::ifstream file2(filename2, std::ifstream::ate | std::ifstream::binary); //open file at the end + const std::ifstream::pos_type fileSize = file1.tellg(); + + if (fileSize != file2.tellg()) { + return false; //different file size + } + + file1.seekg(0); //rewind + file2.seekg(0); //rewind + + std::istreambuf_iterator begin1(file1); + std::istreambuf_iterator begin2(file2); + + return std::equal(begin1,std::istreambuf_iterator(),begin2); //Second argument is end-of-range iterator +} + class classic_index_construction : public ::testing::Test { protected: @@ -151,4 +174,50 @@ TEST_F(classic_index_construction, combine) { } } +TEST_F(classic_index_construction, combined_index_same_as_classic_constructed) { + // This test starts with 2 copies of the same randomly generated document. + // We build a classic index for each of these two documents. + // We then combine these two classic indices into one combined index. + // The combined index should be the same as the classic index generated + // through `cobs:classic_construct` on these two documents. + using cobs::pad_index; + fs::create_directories(index_dir); + fs::create_directories(index_dir/pad_index(0)); + fs::create_directories(index_dir/pad_index(1)); + fs::create_directories(index_dir/pad_index(2)); + + // prepare 2 copy of a randomly generated document + std::string random_doc_src_string = cobs::random_sequence(1000, 1); + auto random_docs = generate_documents_one(random_doc_src_string, 1); + generate_test_case(random_docs, "random_", input_dir/pad_index(0)); + generate_test_case(random_docs, "random_", input_dir/pad_index(1)); + + cobs::ClassicIndexParameters index_params; + index_params.false_positive_rate = 0.001; // in order to use large signature size + index_params.mem_bytes = 80; + index_params.num_threads = 1; + index_params.continue_ = true; + + // generate a classic index for each document + cobs::classic_construct(cobs::DocumentList(input_dir/pad_index(0)), + index_dir/pad_index(0)/(pad_index(0) + ".cobs_classic"), + tmp_path, index_params); + cobs::classic_construct(cobs::DocumentList(input_dir/pad_index(1)), + index_dir/pad_index(0)/(pad_index(1) + ".cobs_classic"), + tmp_path, index_params); + + // generate a combined index fro both classic constructed index + fs::path combined_index; + cobs::classic_combine(index_dir/pad_index(0), index_dir/pad_index(1), combined_index, + 80, 1, false); + + // generate a classic index for both docs through classic_construct + std::string classic_constructed_index = index_dir/pad_index(2)/(pad_index(0) + + ".cobs_classic"); + cobs::classic_construct(cobs::DocumentList(input_dir), classic_constructed_index, + tmp_path, index_params); + + ASSERT_TRUE(compare_files(combined_index.string(), classic_constructed_index)); +} + /******************************************************************************/