Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
109 commits
Select commit Hold shift + click to select a range
9a39023
added support for other algorithms
GuyAv46 Feb 13, 2022
d565635
re-write milvus.py file for thier new API
GuyAv46 Feb 13, 2022
d6c06af
added --run-group
GuyAv46 Feb 14, 2022
a80dd70
more updates for milvus algorithm
GuyAv46 Feb 14, 2022
eebac85
added support for multi-client build
GuyAv46 Feb 14, 2022
f52e623
milvus.py improvement
GuyAv46 Feb 16, 2022
4b07c48
default values update
GuyAv46 Feb 24, 2022
25ff7ae
rename TOP_K to KNN
GuyAv46 Mar 1, 2022
00f51b5
moved from L2 to either L2 or IP
Mar 3, 2022
05caf7e
added drop collection
Mar 3, 2022
253e5a8
Update ann_benchmarks/algorithms/milvus.py
Mar 3, 2022
94fc098
Merge pull request #3 from RedisAI/fix_milvus_metric
Mar 3, 2022
6ae9bff
Changes towards redisbench_admin integration. Workdir fix
filipecosta90 Mar 7, 2022
579b25a
added yandex 1B subset dataset generator
Mar 7, 2022
db852a5
moved to main folder
Mar 7, 2022
d0d5917
empty line
Mar 7, 2022
e714dd8
Merge pull request #4 from RedisAI/dataset_generator
Mar 7, 2022
947c6a9
hybrid datasets generator
Mar 7, 2022
d7ed689
write the id buckets to the hd5f file
Mar 7, 2022
0c0d073
empty line
Mar 7, 2022
1a8e116
Ensure workdir is used when creating build_stats results dir
filipecosta90 Mar 7, 2022
55b2e84
Merge pull request #6 from RedisAI/makedirs.fix
filipecosta90 Mar 8, 2022
f1d80d0
fix for passing number of runs
GuyAv46 Mar 8, 2022
5b98837
hybrid dataset generator. Redisearch hybrid load and run
Mar 9, 2022
766941b
fixed dataset name. redisearch fixes
Mar 9, 2022
0ba58d1
testers clients now read build stats and
GuyAv46 Mar 9, 2022
4f6e597
fixing types in multirun.py
GuyAv46 Mar 9, 2022
a6956c0
Merge pull request #5 from RedisAI/create_hybrid_datasets
Mar 10, 2022
bfa141e
Merge pull request #7 from RedisAI/build_stats
GuyAv46 Mar 10, 2022
e75dc79
added try..except
GuyAv46 Mar 10, 2022
78d73cc
Merge pull request #8 from RedisAI/build_stats
GuyAv46 Mar 10, 2022
bb906f1
Fixed redisearch query() on non hybrid runs
filipecosta90 Mar 11, 2022
73b674f
Merge pull request #9 from RedisAI/run.fix.text
filipecosta90 Mar 11, 2022
fc9ae71
aggregate clients
GuyAv46 Mar 14, 2022
aced151
Merge pull request #10 from RedisAI/aggregate_testers
GuyAv46 Mar 14, 2022
1e9c684
improved assertion log
GuyAv46 Mar 17, 2022
afe607d
Merge pull request #11 from RedisAI/aggregate_testers
GuyAv46 Mar 17, 2022
7a5bc76
fix hybrid creation. added big ann
Mar 21, 2022
ddeb8f8
updated big ann bucket
Mar 21, 2022
b9a1897
fixed big ann hybrid datasets name
Mar 21, 2022
2dd0684
Merge pull request #12 from RedisAI/fix_hybrid_creation_big_ann
Mar 21, 2022
d2d91b7
fixed initial capacity on FT.Create
Mar 22, 2022
316d308
Merge pull request #13 from RedisAI/fix_initial_cap
Mar 23, 2022
e98a337
fixed race condition
GuyAv46 Mar 23, 2022
a3bce91
wip
Mar 24, 2022
43a3e21
sirealized run-groups
GuyAv46 Mar 24, 2022
93f1344
skips aggregate files when running with 1 client
GuyAv46 Mar 24, 2022
19f82a5
added dialect 2 for redisreach
GuyAv46 Mar 24, 2022
1b9e3fb
added comments
GuyAv46 Mar 24, 2022
394109a
Merge pull request #15 from RedisAI/sirealize_run_groups
GuyAv46 Mar 24, 2022
a6f8345
In multirun change to the proper workdir asap
filipecosta90 Mar 25, 2022
41d3848
Merge pull request #16 from RedisAI/fix.get_run_groups
filipecosta90 Mar 25, 2022
e1a4d38
report memory in kb
Mar 25, 2022
95b2c18
Merge pull request #17 from RedisAI/memory_in_kb
Mar 25, 2022
ab08e39
fix float conversion of vector_index_sz_mb before multiplying
filipecosta90 Mar 25, 2022
0ce7a5a
Fixes per PR review
filipecosta90 Mar 25, 2022
fc17cb2
Merge pull request #18 from RedisAI/fix.memory_in_kb
filipecosta90 Mar 25, 2022
fe2e4b2
Merge remote-tracking branch 'origin/multiclient_tool' into fix_multi…
filipecosta90 Mar 25, 2022
5b6ade4
Merge pull request #14 from RedisAI/fix_multiclient_flow
filipecosta90 Mar 25, 2022
1b7f862
Revert "wip"
filipecosta90 Mar 25, 2022
55d4575
Revert 'wip'
filipecosta90 Mar 25, 2022
79aeedc
changed watcher to watch results dir
Mar 25, 2022
423ad07
Update multirun.py
Mar 25, 2022
4be13fb
Merge pull request #19 from RedisAI/fix_watcher
Mar 25, 2022
495bbba
dbpedia
May 2, 2022
0dd4689
fixed PR comment
May 2, 2022
0c56189
Merge pull request #20 from RedisAI/dbpedia_dataset
May 2, 2022
f9970c2
amazon reviews
May 18, 2022
7aeed36
fixed amazon review dataset creation
May 19, 2022
70b3a73
Merge pull request #21 from RedisAI/amazon_reviews
May 19, 2022
4566945
added shards aux arg
May 24, 2022
2316942
Merge pull request #22 from RedisAI/shards_arg
May 25, 2022
ab5ceb0
Fixed shards arg usage on multirun/redisearch
filipecosta90 May 25, 2022
7ee8bd5
Merge pull request #23 from RedisAI/fix.shards
filipecosta90 May 25, 2022
c21597d
redisearch ef runtime in algo name
May 30, 2022
8acc9f1
print qps to stdout
May 30, 2022
47c7ddd
new line
May 30, 2022
ca755d0
Merge pull request #24 from RedisAI/add_qps_and_redisearch_efruntime
filipecosta90 May 30, 2022
eaa4626
fix dbpedia download
Jun 1, 2022
5b7ada9
Merge pull request #25 from RedisAI/fix_dbpedia_download
Jun 1, 2022
2d6a223
Enable recall/latency charts on results
filipecosta90 Jun 4, 2022
3cd6fbb
removed create command optimizations
Jun 6, 2022
1bb2b77
Merge pull request #27 from RedisAI/fix_tf_create_command
filipecosta90 Jun 6, 2022
cf55960
Fixes per PR review
filipecosta90 Jun 6, 2022
46c5430
Merge pull request #26 from RedisAI/multiclient_latencies
filipecosta90 Jun 9, 2022
cbe99b0
Milvus update (#28)
GuyAv46 Jun 14, 2022
592f3ed
Added pinecone client (#29)
GuyAv46 Jun 23, 2022
12183f1
splitting the test load between test clients (#30)
GuyAv46 Jun 23, 2022
58c10f6
Update requirements.txt
Jul 5, 2022
accc744
Update requirements.txt
Jul 5, 2022
9bbf401
Merge pull request #31 from RedisAI/multirun-patch-1
Jul 5, 2022
fb3a46c
fixing double fetching(#33)
GuyAv46 Jul 19, 2022
020ad08
fixing bulk insertion (#36)
GuyAv46 Jul 21, 2022
3b5012d
Elastic client update (#34)
GuyAv46 Aug 16, 2022
80e8bf1
added vecsim lib algo
GuyAv46 Mar 8, 2023
d19900f
add dummy docker tag
GuyAv46 Mar 8, 2023
e092992
removing password from filename
GuyAv46 Mar 9, 2023
2f3000f
skipping using multiprocessing when `parallelism == 1`
GuyAv46 Mar 9, 2023
f9b5422
added throughput metric (collect start and end time)
GuyAv46 Mar 28, 2023
63d08d0
Updated throughput tracking
filipecosta90 Jun 2, 2023
f68fc3c
Merge pull request #38 from RedisAI/guyav-throughput_graph
filipecosta90 Jun 2, 2023
9d31f9f
Fixed algorithm str when using FLAT on redisearch
filipecosta90 Jun 11, 2023
0bcd51c
Merge pull request #39 from RedisAI/fix.ef
filipecosta90 Jun 12, 2023
fb02421
Ensure all primaries receive the FT.CREATE due to 'missing index erro…
filipecosta90 Jun 16, 2023
f92c29b
Merge pull request #40 from RedisAI/fix.ft.create
filipecosta90 Jun 17, 2023
166d1f4
Fix unary typo on redisearch __str__
filipecosta90 Jun 17, 2023
26c4daa
Merge pull request #41 from RedisAI/fix.unary.str
filipecosta90 Jun 18, 2023
cc36963
disable query timeout on redisearch
filipecosta90 Jun 18, 2023
765f2bf
Merge pull request #42 from RedisAI/timeout.0
filipecosta90 Jun 18, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ data/*

results/*
!results/*.png
website

venv

Expand Down
247 changes: 170 additions & 77 deletions algos.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,71 +10,100 @@ float:
arg-groups:
- {"M": 4, "efConstruction": 500}
query-args: [[10, 20, 40, 80, 120, 200, 400, 600, 800]]
# M-8:
# arg-groups:
# - {"M": 8, "efConstruction": 500}
# query-args: [[10, 20, 40, 80, 120, 200, 400, 600, 800]]
# M-12:
# arg-groups:
# - {"M": 12, "efConstruction": 500}
# query-args: [[10, 20, 40, 80, 120, 200, 400, 600, 800]]
# M-16:
# arg-groups:
# - {"M": 16, "efConstruction": 500}
# query-args: [[10, 20, 40, 80, 120, 200, 400, 600, 800]]
# M-24:
# arg-groups:
# - {"M": 24, "efConstruction": 500}
# query-args: [[10, 20, 40, 80, 120, 200, 400, 600, 800]]
# M-36:
# arg-groups:
# - {"M": 36, "efConstruction": 500}
# query-args: [[10, 20, 40, 80, 120, 200, 400, 600, 800]]
# M-48:
# arg-groups:
# - {"M": 48, "efConstruction": 500}
# query-args: [[10, 20, 40, 80, 120, 200, 400, 600, 800]]
# M-64:
# arg-groups:
# - {"M": 64, "efConstruction": 500}
# query-args: [[10, 20, 40, 80, 120, 200, 400, 600, 800]]
# M-96:
# arg-groups:
# - {"M": 96, "efConstruction": 500}
# query-args: [[10, 20, 40, 80, 120, 200, 400, 600, 800]]
M-8:
arg-groups:
- {"M": 8, "efConstruction": 500}
query-args: [[10, 20, 40, 80, 120, 200, 400, 600, 800]]
M-12:
arg-groups:
- {"M": 12, "efConstruction": 500}
query-args: [[10, 20, 40, 80, 120, 200, 400, 600, 800]]
M-16:
arg-groups:
- {"M": 16, "efConstruction": 500}
query-args: [[10, 20, 40, 80, 120, 200, 400, 600, 800]]
M-24:
arg-groups:
- {"M": 24, "efConstruction": 500}
query-args: [[10, 20, 40, 80, 120, 200, 400, 600, 800]]
M-36:
arg-groups:
- {"M": 36, "efConstruction": 500}
query-args: [[10, 20, 40, 80, 120, 200, 400, 600, 800]]
M-48:
arg-groups:
- {"M": 48, "efConstruction": 500}
query-args: [[10, 20, 40, 80, 120, 200, 400, 600, 800]]
M-64:
arg-groups:
- {"M": 64, "efConstruction": 500}
query-args: [[10, 20, 40, 80, 120, 200, 400, 600, 800]]
M-96:
arg-groups:
- {"M": 96, "efConstruction": 500}
query-args: [[10, 20, 40, 80, 120, 200, 400, 600, 800]]
redisearch-flat:
docker-tag: ann-benchmarks-redisearch
module: ann_benchmarks.algorithms.redisearch
constructor: RediSearch
base-args: ["FLAT", "@metric", "@connection"]
run-groups:
BS-2^20:
arg-groups:
- {"BLOCK_SIZE": 1048576}
# M-8:
# arg-groups:
# - {"BLOCK_SIZE": 1048576}
# M-12:
# arg-groups:
# - {"BLOCK_SIZE": 1048576}
# M-16:
# arg-groups:
# - {"BLOCK_SIZE": 1048576}
# M-24:
# arg-groups:
# - {"BLOCK_SIZE": 1048576}
# M-36:
# arg-groups:
# - {"BLOCK_SIZE": 1048576}
# M-48:
# arg-groups:
# - {"BLOCK_SIZE": 1048576}
# M-64:
# arg-groups:
# - {"BLOCK_SIZE": 1048576}
# M-96:
# arg-groups:
# - {"BLOCK_SIZE": 1048576}
BS-2^10:
arg-groups:
- {"BLOCK_SIZE": 1024}

vecsim-hnsw:
docker-tag: ann-benchmarks-vecsim
module: ann_benchmarks.algorithms.vecsim-hnsw
constructor: VecSimHnsw
base-args: ["@metric"]
run-groups:
M-4:
arg-groups:
- {"M": 4, "efConstruction": 500}
query-args: [[10, 20, 40, 80, 120, 200, 400, 600, 800]]
M-8:
arg-groups:
- {"M": 8, "efConstruction": 500}
query-args: [[10, 20, 40, 80, 120, 200, 400, 600, 800]]
M-12:
arg-groups:
- {"M": 12, "efConstruction": 500}
query-args: [[10, 20, 40, 80, 120, 200, 400, 600, 800]]
M-16:
arg-groups:
- {"M": 16, "efConstruction": 500}
query-args: [[10, 20, 40, 80, 120, 200, 400, 600, 800]]
M-24:
arg-groups:
- {"M": 24, "efConstruction": 500}
query-args: [[10, 20, 40, 80, 120, 200, 400, 600, 800]]
M-36:
arg-groups:
- {"M": 36, "efConstruction": 500}
query-args: [[10, 20, 40, 80, 120, 200, 400, 600, 800]]
M-48:
arg-groups:
- {"M": 48, "efConstruction": 500}
query-args: [[10, 20, 40, 80, 120, 200, 400, 600, 800]]
M-64:
arg-groups:
- {"M": 64, "efConstruction": 500}
query-args: [[10, 20, 40, 80, 120, 200, 400, 600, 800]]
M-96:
arg-groups:
- {"M": 96, "efConstruction": 500}
query-args: [[10, 20, 40, 80, 120, 200, 400, 600, 800]]
pinecone:
docker-tag: ann-benchmarks-pinecone
module: ann_benchmarks.algorithms.pinecone
constructor: Pinecone
base-args: ["@metric", "@dimension", "@connection"]
run-groups:
approximated:
args: [['approximated']]
exact:
args: [['exact']]
sptag:
docker-tag: ann-benchmarks-sptag
module: ann_benchmarks.algorithms.sptag
Expand Down Expand Up @@ -133,10 +162,10 @@ float:
base:
args: [[400, 1024, 4096, 8192, 16384],
[1, 10, 40, 100, 200]]
hnswlib:
vecsim-hnsw-blocks:
docker-tag: ann-benchmarks-hnswlib
module: ann_benchmarks.algorithms.hnswlib
constructor: HnswLib
module: ann_benchmarks.algorithms.vecsim-hnsw
constructor: VecSimHnsw
base-args: ["@metric"]
run-groups:
M-4:
Expand Down Expand Up @@ -240,14 +269,52 @@ float:
# This run group produces 3 algorithm instances -- Annoy("angular",
# 100), Annoy("angular", 200), and Annoy("angular", 400) -- each of
# which will be used to run 12 different queries.
milvus:
milvus-hnsw:
docker-tag: ann-benchmarks-milvus
module: ann_benchmarks.algorithms.milvus
constructor: Milvus
base-args: ["@metric"]
base-args: ["@metric", "@dimension", "@connection", "HNSW"]
run-groups:
M-4:
arg-groups:
- {"M": 4, "efConstruction": 500}
query-args: [[10, 20, 40, 80, 120, 200, 400, 600, 800]]
M-8:
arg-groups:
- {"M": 8, "efConstruction": 500}
query-args: [[10, 20, 40, 80, 120, 200, 400, 600, 800]]
M-12:
arg-groups:
- {"M": 12, "efConstruction": 500}
query-args: [[10, 20, 40, 80, 120, 200, 400, 600, 800]]
M-16:
arg-groups:
- {"M": 16, "efConstruction": 500}
query-args: [[10, 20, 40, 80, 120, 200, 400, 600, 800]]
M-24:
arg-groups:
- {"M": 24, "efConstruction": 500}
query-args: [[10, 20, 40, 80, 120, 200, 400, 600, 800]]
M-36:
arg-groups:
- {"M": 36, "efConstruction": 500}
query-args: [[10, 20, 40, 80, 120, 200, 400, 600, 800]]
M-48:
arg-groups:
- {"M": 48, "efConstruction": 500}
query-args: [[10, 20, 40, 80, 120, 200, 400, 600, 800]]
M-64:
arg-groups:
- {"M": 64, "efConstruction": 500}
query-args: [[10, 20, 40, 80, 120, 200, 400, 600, 800]]
milvus-ivf:
docker-tag: ann-benchmarks-milvus
module: ann_benchmarks.algorithms.milvus
constructor: Milvus
base-args: ["@metric", "@dimension", "@connection"]
run-groups:
milvus:
args: [['IVF_FLAT', 'IVF_SQ8'], [100, 300, 1000, 3000, 10000, 30000]] # nlist
args: [['IVF_FLAT', 'IVF_SQ8'], [{"nlist": 100}, {"nlist": 300}, {"nlist": 1000}, {"nlist": 3000}, {"nlist": 10000}, {"nlist": 30000}]] # nlist
query-args: [[1, 3, 10, 30, 100, 300]] # nprobe (should be <= nlist)
nearpy:
disabled: true
Expand Down Expand Up @@ -301,7 +368,7 @@ float:
M-96:
arg-groups:
- {"M": 96, "efConstruction": 500}
query-args: [[10, 20, 40, 80, 120, 200, 400, 600, 800]]
query-args: [[10, 20, 40, 80, 120, 200, 400, 600, 800]]

bruteforce:
disabled: true
Expand Down Expand Up @@ -648,14 +715,6 @@ float:
- {"n_neighbors": 60, "diversify_prob": 0.0,
"pruning_degree_multiplier":[2.0, 3.0], "leaf_size": 48}
query-args: [[0.0, 0.04, 0.08, 0.12, 0.16, 0.20, 0.24, 0.28, 0.32, 0.36]]
elasticsearch:
docker-tag: ann-benchmarks-elasticsearch
module: ann_benchmarks.algorithms.elasticsearch
constructor: ElasticsearchScriptScoreQuery
base-args: [ "@metric", "@dimension" ]
run-groups:
empty:
args: []
elastiknn-l2lsh:
docker-tag: ann-benchmarks-elastiknn
module: ann_benchmarks.algorithms.elastiknn
Expand Down Expand Up @@ -950,10 +1009,44 @@ float:
docker-tag: ann-benchmarks-elasticsearch
module: ann_benchmarks.algorithms.elasticsearch
constructor: ElasticsearchScriptScoreQuery
base-args: [ "@metric", "@dimension" ]
base-args: [ "@metric", "@dimension", "@connection" ]
run-groups:
empty:
args: []
M-4:
arg-groups:
- {"m": 4, "ef_construction": 500, "type": "hnsw"}
query-args: [[10, 20, 40, 80, 120, 200, 400, 600, 800]]
M-8:
arg-groups:
- {"m": 8, "ef_construction": 500, "type": "hnsw"}
query-args: [[10, 20, 40, 80, 120, 200, 400, 600, 800]]
M-12:
arg-groups:
- {"m": 12, "ef_construction": 500, "type": "hnsw"}
query-args: [[10, 20, 40, 80, 120, 200, 400, 600, 800]]
M-16:
arg-groups:
- {"m": 16, "ef_construction": 500, "type": "hnsw"}
query-args: [[10, 20, 40, 80, 120, 200, 400, 600, 800]]
M-24:
arg-groups:
- {"m": 24, "ef_construction": 500, "type": "hnsw"}
query-args: [[10, 20, 40, 80, 120, 200, 400, 600, 800]]
M-36:
arg-groups:
- {"m": 36, "ef_construction": 500, "type": "hnsw"}
query-args: [[10, 20, 40, 80, 120, 200, 400, 600, 800]]
M-48:
arg-groups:
- {"m": 48, "ef_construction": 500, "type": "hnsw"}
query-args: [[10, 20, 40, 80, 120, 200, 400, 600, 800]]
M-64:
arg-groups:
- {"m": 64, "ef_construction": 500, "type": "hnsw"}
query-args: [[10, 20, 40, 80, 120, 200, 400, 600, 800]]
M-96:
arg-groups:
- {"m": 96, "ef_construction": 500, "type": "hnsw"}
query-args: [[10, 20, 40, 80, 120, 200, 400, 600, 800]]
opensearchknn:
docker-tag: ann-benchmarks-opensearchknn
module: ann_benchmarks.algorithms.opensearchknn
Expand Down
19 changes: 16 additions & 3 deletions ann_benchmarks/algorithms/definitions.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@

Definition = collections.namedtuple(
'Definition',
['algorithm', 'constructor', 'module', 'docker_tag',
['algorithm', 'run_group', 'constructor', 'module', 'docker_tag',
'arguments', 'query_argument_groups', 'disabled'])


Expand Down Expand Up @@ -96,8 +96,20 @@ def get_unique_algorithms(definition_file):
return list(sorted(algos))


def get_run_groups(definition_file, algo = None):
definitions = _get_definitions(definition_file)
run_groups = set()
for point in definitions:
for metric in definitions[point]:
for algorithm in definitions[point][metric]:
if algo == None or algo == algorithm:
for run_group in definitions[point][metric][algorithm]['run-groups'].keys():
run_groups.add(run_group)
return list(sorted(run_groups))


def get_definitions(definition_file, dimension, point_type="float",
distance_metric="euclidean", count=10, conn_params=dict()):
distance_metric="euclidean", count=10, conn_params={'host': None, 'port': None, 'auth': None, 'user': None, 'cluster': False, 'shards': 1}):
definitions = _get_definitions(definition_file)

algorithm_definitions = {}
Expand All @@ -116,7 +128,7 @@ def get_definitions(definition_file, dimension, point_type="float",
if "base-args" in algo:
base_args = algo["base-args"]

for run_group in algo["run-groups"].values():
for run_group_name, run_group in algo["run-groups"].items():
if "arg-groups" in run_group:
groups = []
for arg_group in run_group["arg-groups"]:
Expand Down Expand Up @@ -163,6 +175,7 @@ def get_definitions(definition_file, dimension, point_type="float",
aargs = [_substitute_variables(arg, vs) for arg in aargs]
definitions.append(Definition(
algorithm=name,
run_group = run_group_name,
docker_tag=algo['docker-tag'],
module=algo['module'],
constructor=algo['constructor'],
Expand Down
Loading