Skip to content
This repository was archived by the owner on May 17, 2024. It is now read-only.

Commit 2d5f0a3

Browse files
committed
Added arbitrary 'where' option (issue #86)
1 parent adb2669 commit 2d5f0a3

File tree

4 files changed

+34
-6
lines changed

4 files changed

+34
-6
lines changed

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -205,6 +205,7 @@ Options:
205205
- `--bisection-factor` - Segments per iteration. When set to 2, it performs binary search.
206206
- `--bisection-threshold` - Minimal bisection threshold. i.e. maximum size of pages to diff locally.
207207
- `-j` or `--threads` - Number of worker threads to use per database. Default=1.
208+
- `-w`, `--where` - An additional 'where' expression to restrict the search space.
208209
- `--conf`, `--run` - Specify the run and configuration from a TOML file. (see below)
209210

210211
### How to use with a configuration file

data_diff/__main__.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@ def _remove_passwords_in_dict(d: dict):
7171
"A higher number will increase performance, but take more capacity from your database. "
7272
"'serial' guarantees a single-threaded execution of the algorithm (useful for debugging).",
7373
)
74+
@click.option("-w", "--where", default=None, help="An additional 'where' expression to restrict the search space.")
7475
@click.option(
7576
"--conf",
7677
default=None,
@@ -107,6 +108,7 @@ def _main(
107108
threads,
108109
keep_column_case,
109110
json_output,
111+
where,
110112
threads1=None,
111113
threads2=None,
112114
__conf__=None,
@@ -166,6 +168,7 @@ def _main(
166168
min_update=max_age and parse_time_before_now(max_age),
167169
max_update=min_age and parse_time_before_now(min_age),
168170
case_sensitive=keep_column_case,
171+
where=where,
169172
)
170173
except ParseError as e:
171174
logging.error("Error while parsing age expression: %s" % e)

data_diff/diff_tables.py

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
"""Provides classes for performing a table diff
22
"""
33

4-
from abc import ABC, abstractmethod
54
import time
65
import os
76
from numbers import Number
@@ -13,7 +12,7 @@
1312

1413
from runtype import dataclass
1514

16-
from .sql import Select, Checksum, Compare, DbPath, DbKey, DbTime, Count, TableName, Time, Min, Max, Value
15+
from .sql import Select, Checksum, Compare, DbPath, DbKey, DbTime, Count, TableName, Time, Value
1716
from .utils import safezip, split_space
1817
from .databases.base import Database
1918
from .databases.database_types import (
@@ -50,6 +49,9 @@ class TableSegment:
5049
max_key (:data:`DbKey`, optional): Highest key_column value, used to restrict the segment
5150
min_update (:data:`DbTime`, optional): Lowest update_column value, used to restrict the segment
5251
max_update (:data:`DbTime`, optional): Highest update_column value, used to restrict the segment
52+
where (str, optional): An additional 'where' expression to restrict the search space.
53+
54+
case_sensitive (bool): If false, the case of column names will adjust according to the schema. Default is true.
5355
5456
"""
5557

@@ -68,6 +70,7 @@ class TableSegment:
6870
min_update: DbTime = None
6971
max_update: DbTime = None
7072

73+
where: str = None
7174
case_sensitive: bool = True
7275
_schema: Schema = None
7376

@@ -114,7 +117,7 @@ def _normalize_column(self, name: str, template: str = None) -> str:
114117
return self.database.normalize_value_by_type(col, col_type)
115118

116119
def with_schema(self) -> "TableSegment":
117-
"Queries the table schema from the database, and returns a new instance of TableSegmentWithSchema."
120+
"Queries the table schema from the database, and returns a new instance of TableSegment, with a schema."
118121
if self._schema:
119122
return self
120123

@@ -149,7 +152,12 @@ def _make_update_range(self):
149152
def _make_select(self, *, table=None, columns=None, where=None, group_by=None, order_by=None):
150153
if columns is None:
151154
columns = [self._normalize_column(self.key_column)]
152-
where = list(self._make_key_range()) + list(self._make_update_range()) + ([] if where is None else [where])
155+
where = [
156+
*self._make_key_range(),
157+
*self._make_update_range(),
158+
*([] if where is None else [where]),
159+
*([] if self.where is None else [self.where]),
160+
]
153161
order_by = None if order_by is None else [order_by]
154162
return Select(
155163
table=table or TableName(self.table_path),

tests/test_api.py

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,9 @@ def setUp(self) -> None:
2525
)
2626
self.now = now = arrow.get(self.preql.now())
2727
self.preql.add(now, "now")
28+
self.preql.add(now, self.now.shift(seconds=-10))
29+
self.preql.add(now, self.now.shift(seconds=-7))
30+
self.preql.add(now, self.now.shift(seconds=-6))
2831

2932
self.preql(
3033
r"""
@@ -33,7 +36,7 @@ def setUp(self) -> None:
3336
"""
3437
)
3538

36-
self.preql.add(self.now.shift(seconds=-3), "2 seconds ago")
39+
self.preql.add(self.now.shift(seconds=-3), "3 seconds ago")
3740
self.preql.commit()
3841

3942
def tearDown(self) -> None:
@@ -47,7 +50,20 @@ def tearDown(self) -> None:
4750
def test_api(self):
4851
t1 = connect_to_table(TEST_MYSQL_CONN_STRING, "test_api")
4952
t2 = connect_to_table(TEST_MYSQL_CONN_STRING, ("test_api_2",))
50-
assert len(list(diff_tables(t1, t2))) == 1
53+
diff = list(diff_tables(t1, t2))
54+
assert len(diff) == 1
55+
56+
t1.database.close()
57+
t2.database.close()
58+
59+
# test where
60+
diff_id = diff[0][1][0]
61+
where = f"id != {diff_id}"
62+
63+
t1 = connect_to_table(TEST_MYSQL_CONN_STRING, "test_api", where=where)
64+
t2 = connect_to_table(TEST_MYSQL_CONN_STRING, "test_api_2", where=where)
65+
diff = list(diff_tables(t1, t2))
66+
assert len(diff) == 0
5167

5268
t1.database.close()
5369
t2.database.close()

0 commit comments

Comments
 (0)