Skip to content
This repository was archived by the owner on May 17, 2024. It is now read-only.

Commit 6e1c447

Browse files
committed
add more fields to the json output
1 parent 60a8eb7 commit 6e1c447

File tree

3 files changed

+78
-43
lines changed

3 files changed

+78
-43
lines changed

data_diff/dbt.py

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
from .cloud import DatafoldAPI, TCloudApiDataDiff, TCloudApiOrgMeta, get_or_create_data_source
1616
from .dbt_parser import DbtParser, PROJECT_FILE, TDatadiffConfig
1717
from .diff_tables import DiffResultWrapper
18-
from .format import jsonify
18+
from .format import jsonify, jsonify_exception
1919
from .tracking import (
2020
bool_ask_for_email,
2121
create_email_signup_event_json,
@@ -280,7 +280,16 @@ def _local_diff(diff_vars: TDiffVars, json_output: bool = False) -> None:
280280
)
281281
if json_output:
282282
# drain the iterator to get accumulated stats in diff.info_tree
283-
list(diff)
283+
try:
284+
list(diff)
285+
except Exception as e:
286+
print(json.dumps(jsonify_exception(
287+
list(table1.table_path),
288+
list(table2.table_path),
289+
diff_vars.dbt_model,
290+
e
291+
)), flush=True)
292+
return
284293

285294
print(json.dumps(
286295
jsonify(
@@ -290,7 +299,7 @@ def _local_diff(diff_vars: TDiffVars, json_output: bool = False) -> None:
290299
"added": columns_added,
291300
"removed": columns_removed,
292301
"changed": columns_type_changed,
293-
})))
302+
})), flush=True)
294303
return
295304

296305
if list(diff):

data_diff/format.py

Lines changed: 52 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,16 @@
55
from data_diff.diff_tables import DiffResultWrapper
66

77

8+
def jsonify_exception(table1: List[str], table2: List[str], dbt_model: str, exception: Exception) -> 'FailedDiff':
9+
return FailedDiff(
10+
status="failed",
11+
model=dbt_model,
12+
dataset1=table1,
13+
dataset2=table2,
14+
error=str(exception),
15+
).json()
16+
17+
818
def jsonify(diff: DiffResultWrapper,
919
dbt_model: str,
1020
with_summary: bool = False,
@@ -44,7 +54,7 @@ def jsonify(diff: DiffResultWrapper,
4454

4555
columns = None
4656
if with_columns:
47-
columns = _jsonify_columns_diff(with_columns)
57+
columns = _jsonify_columns_diff(with_columns, list(key_columns))
4858

4959
is_different = bool(
5060
t1_exclusive_rows
@@ -57,14 +67,15 @@ def jsonify(diff: DiffResultWrapper,
5767
)
5868
)
5969
return JsonDiff(
60-
status="different" if is_different else "identical",
70+
status="success",
71+
result="different" if is_different else "identical",
6172
model=dbt_model,
62-
table1=list(table1.table_path),
63-
table2=list(table2.table_path),
73+
dataset1=list(table1.table_path),
74+
dataset2=list(table2.table_path),
6475
rows=RowsDiff(
6576
exclusive=ExclusiveDiff(
66-
table1=t1_exclusive_rows_jsonified,
67-
table2=t2_exclusive_rows_jsonified
77+
dataset1=t1_exclusive_rows_jsonified,
78+
dataset2=t2_exclusive_rows_jsonified
6879
),
6980
diff=diff_rows_jsonified,
7081
),
@@ -88,22 +99,22 @@ class JsonDiffRowValue:
8899
"""
89100
Pair of diffed values for 2 rows with equal PKs
90101
"""
91-
table1: Any
92-
table2: Any
102+
dataset1: Any
103+
dataset2: Any
93104
isDiff: bool
94105
isPK: bool
95106

96107

97108
@dataclass
98109
class Total:
99-
table1: int
100-
table2: int
110+
dataset1: int
111+
dataset2: int
101112

102113

103114
@dataclass
104115
class ExclusiveRows:
105-
table1: int
106-
table2: int
116+
dataset1: int
117+
dataset2: int
107118

108119

109120
@dataclass
@@ -127,20 +138,21 @@ class JsonDiffSummary:
127138

128139
@dataclass
129140
class ExclusiveColumns:
130-
table1: List[str]
131-
table2: List[str]
141+
dataset1: List[str]
142+
dataset2: List[str]
132143

133144

134145
@dataclass
135146
class JsonColumnsSummary:
147+
primaryKey: List[str]
136148
exclusive: ExclusiveColumns
137149
typeChanged: List[str]
138150

139151

140152
@dataclass
141153
class ExclusiveDiff:
142-
table1: List[Dict[str, JsonExclusiveRowValue]]
143-
table2: List[Dict[str, JsonExclusiveRowValue]]
154+
dataset1: List[Dict[str, JsonExclusiveRowValue]]
155+
dataset2: List[Dict[str, JsonExclusiveRowValue]]
144156

145157

146158
@dataclass
@@ -149,12 +161,23 @@ class RowsDiff:
149161
diff: List[Dict[str, JsonDiffRowValue]]
150162

151163

164+
@dataclass
165+
class FailedDiff:
166+
status: str # Literal ["failed"]
167+
model: str
168+
dataset1: List[str]
169+
dataset2: List[str]
170+
error: str
171+
172+
version: str = '1.0.0'
173+
152174
@dataclass
153175
class JsonDiff:
154-
status: str # Literal ["identical", "different"]
176+
status: str # Literal ["success"]
177+
result: str # Literal ["different", "identical"]
155178
model: str
156-
table1: List[str]
157-
table2: List[str]
179+
dataset1: List[str]
180+
dataset2: List[str]
158181
rows: RowsDiff
159182
summary: Optional[JsonDiffSummary]
160183
columns: Optional[JsonColumnsSummary]
@@ -197,12 +220,12 @@ def _jsonify_diff(row: Dict[str, Any], key_columns: List[str]) -> Dict[str, Json
197220

198221
elif field.endswith('_a'):
199222
column_name = field.replace('_a', '')
200-
columns[column_name]['table1'] = value
223+
columns[column_name]['dataset1'] = value
201224
columns[column_name]['isPK'] = column_name in key_columns
202225

203226
elif field.endswith('_b'):
204227
column_name = field.replace('_b', '')
205-
columns[column_name]['table2'] = value
228+
columns[column_name]['dataset2'] = value
206229
columns[column_name]['isPK'] = column_name in key_columns
207230

208231
return {
@@ -236,12 +259,12 @@ def _jsonify_diff_summary(stats_dict: dict) -> JsonDiffSummary:
236259
return JsonDiffSummary(
237260
rows=Rows(
238261
total=Total(
239-
table1=stats_dict["rows_A"],
240-
table2=stats_dict["rows_B"]
262+
dataset1=stats_dict["rows_A"],
263+
dataset2=stats_dict["rows_B"]
241264
),
242265
exclusive=ExclusiveRows(
243-
table1=stats_dict["exclusive_A"],
244-
table2=stats_dict["exclusive_B"],
266+
dataset1=stats_dict["exclusive_A"],
267+
dataset2=stats_dict["exclusive_B"],
245268
),
246269
updated=stats_dict["updated"],
247270
unchanged=stats_dict["unchanged"]
@@ -252,11 +275,12 @@ def _jsonify_diff_summary(stats_dict: dict) -> JsonDiffSummary:
252275
)
253276

254277

255-
def _jsonify_columns_diff(columns_diff: Dict[str, List[str]]) -> JsonColumnsSummary:
278+
def _jsonify_columns_diff(columns_diff: Dict[str, List[str]], key_columns: List[str]) -> JsonColumnsSummary:
256279
return JsonColumnsSummary(
280+
primaryKey=key_columns,
257281
exclusive= ExclusiveColumns(
258-
table2= list(columns_diff.get('added', [])),
259-
table1= list(columns_diff.get('removed', [])),
282+
dataset2= list(columns_diff.get('added', [])),
283+
dataset1= list(columns_diff.get('removed', [])),
260284
),
261285
typeChanged=list(columns_diff.get('changed', [])),
262286
)

tests/test_format.py

Lines changed: 14 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -31,28 +31,29 @@ def test_jsonify_diff(self):
3131
json_diff = jsonify(diff, dbt_model='my_model')
3232
self.assertEqual(json_diff, {
3333
'version': '1.0.0',
34-
'status': 'different',
34+
'status': 'success',
35+
'result': 'different',
3536
'model': 'my_model',
36-
'table1': ['db', 'schema', 'table1'],
37-
'table2': ['db', 'schema', 'table2'],
37+
'dataset1': ['db', 'schema', 'table1'],
38+
'dataset2': ['db', 'schema', 'table2'],
3839
'rows': {
3940
'exclusive': {
40-
'table1': [
41+
'dataset1': [
4142
{
4243
'id': {'isPK': True, 'value': '2'},
4344
'value': {'isPK': False, 'value': '4'}
4445
}
4546
],
46-
'table2': [
47+
'dataset2': [
4748
{
4849
'id': {'isPK': True, 'value': '3'},
4950
'value': {'isPK': False, 'value': '202'}
5051
}
5152
]},
5253
'diff': [
5354
{
54-
'id': {'isPK': True, 'table1': '1', 'table2': '1', 'isDiff': False},
55-
'value': {'isPK': False, 'table1': '3', 'table2': '201', 'isDiff': True},
55+
'id': {'isPK': True, 'dataset1': '1', 'dataset2': '1', 'isDiff': False},
56+
'value': {'isPK': False, 'dataset1': '3', 'dataset2': '201', 'isDiff': True},
5657
},
5758
],
5859
},
@@ -82,14 +83,15 @@ def test_jsonify_diff_no_difeference(self):
8283
json_diff = jsonify(diff, dbt_model='model')
8384
self.assertEqual(json_diff, {
8485
'version': '1.0.0',
85-
'status': 'identical',
86+
'status': 'success',
87+
'result': 'identical',
8688
'model': 'model',
87-
'table1': ['db', 'schema', 'table1'],
88-
'table2': ['db', 'schema', 'table2'],
89+
'dataset1': ['db', 'schema', 'table1'],
90+
'dataset2': ['db', 'schema', 'table2'],
8991
'rows': {
9092
'exclusive': {
91-
'table1': [],
92-
'table2': []},
93+
'dataset1': [],
94+
'dataset2': []},
9395
'diff': [],
9496
},
9597
'summary': None,

0 commit comments

Comments
 (0)