22
33import clickhouse_driver .dbapi .connection
44
5- from .base import ThreadedDatabase , import_helper , ConnectError
6- from .base import MD5_HEXDIGITS , CHECKSUM_HEXDIGITS
7- from .database_types import (
8- ColType , Decimal , Float , Integer , FractionalType , Native_UUID , TemporalType , Text , Timestamp
5+ from .base import (
6+ MD5_HEXDIGITS ,
7+ CHECKSUM_HEXDIGITS ,
8+ TIMESTAMP_PRECISION_POS ,
9+ ThreadedDatabase ,
10+ import_helper ,
11+ ConnectError ,
912)
13+ from .database_types import ColType , Decimal , Float , Integer , FractionalType , Native_UUID , TemporalType , Text , Timestamp
1014
1115
1216@import_helper ("clickhouse" )
1317def import_clickhouse ():
1418 import clickhouse_driver
19+
1520 return clickhouse_driver
1621
1722
@@ -24,42 +29,35 @@ def cursor(self, cursor_factory=None):
2429
2530class Clickhouse (ThreadedDatabase ):
2631 TYPE_CLASSES = {
27- 'Int8' : Integer ,
28- 'Int16' : Integer ,
29- 'Int32' : Integer ,
30- 'Int64' : Integer ,
31- 'Int128' : Integer ,
32- 'Int256' : Integer ,
33-
34- 'UInt8' : Integer ,
35- 'UInt16' : Integer ,
36- 'UInt32' : Integer ,
37- 'UInt64' : Integer ,
38- 'UInt128' : Integer ,
39- 'UInt256' : Integer ,
40-
41- 'Float32' : Float ,
42- 'Float64' : Float ,
43-
44- 'Decimal' : Decimal ,
45-
46- 'UUID' : Native_UUID ,
47-
48- 'String' : Text ,
49- 'FixedString' : Text ,
50-
51- 'DateTime' : Timestamp ,
52- 'DateTime64' : Timestamp ,
53-
32+ "Int8" : Integer ,
33+ "Int16" : Integer ,
34+ "Int32" : Integer ,
35+ "Int64" : Integer ,
36+ "Int128" : Integer ,
37+ "Int256" : Integer ,
38+ "UInt8" : Integer ,
39+ "UInt16" : Integer ,
40+ "UInt32" : Integer ,
41+ "UInt64" : Integer ,
42+ "UInt128" : Integer ,
43+ "UInt256" : Integer ,
44+ "Float32" : Float ,
45+ "Float64" : Float ,
46+ "Decimal" : Decimal ,
47+ "UUID" : Native_UUID ,
48+ "String" : Text ,
49+ "FixedString" : Text ,
50+ "DateTime" : Timestamp ,
51+ "DateTime64" : Timestamp ,
5452 }
55- ROUNDS_ON_PREC_LOSS = True
53+ ROUNDS_ON_PREC_LOSS = False
5654
5755 def __init__ (self , * , thread_count : int , ** kw ):
5856 super ().__init__ (thread_count = thread_count )
5957
6058 self ._args = kw
6159 # In Clickhouse database and schema are the same
62- self .default_schema = kw [' database' ]
60+ self .default_schema = kw [" database" ]
6361
6462 def create_connection (self ):
6563 clickhouse = import_clickhouse ()
@@ -70,16 +68,16 @@ def create_connection(self):
7068 raise ConnectError (* e .args ) from e
7169
7270 def _parse_type_repr (self , type_repr : str ) -> Optional [Type [ColType ]]:
73- nullable_prefix = ' Nullable'
74- if type_repr .lower (). startswith (nullable_prefix . lower () ):
75- type_repr = type_repr [ len ( nullable_prefix ):]. lstrip ( '(' ).rstrip (')' )
71+ nullable_prefix = " Nullable("
72+ if type_repr .startswith (nullable_prefix ):
73+ type_repr = type_repr . replace ( "Nullable(" , "" ).rstrip (")" )
7674
77- if type_repr .startswith (' Decimal' ):
78- type_repr = ' Decimal'
79- elif type_repr .startswith (' FixedString' ):
80- type_repr = ' FixedString'
81- elif type_repr .startswith (' DateTime64' ):
82- type_repr = ' DateTime64'
75+ if type_repr .startswith (" Decimal" ):
76+ type_repr = " Decimal"
77+ elif type_repr .startswith (" FixedString" ):
78+ type_repr = " FixedString"
79+ elif type_repr .startswith (" DateTime64" ):
80+ type_repr = " DateTime64"
8381
8482 return self .TYPE_CLASSES .get (type_repr )
8583
@@ -88,19 +86,21 @@ def quote(self, s: str) -> str:
8886
8987 def md5_to_int (self , s : str ) -> str :
9088 substr_idx = 1 + MD5_HEXDIGITS - CHECKSUM_HEXDIGITS
91- return f' reinterpretAsUInt128(reverse(unhex(lowerUTF8(substr(hex(MD5({ s } )), { substr_idx } )))))'
89+ return f" reinterpretAsUInt128(reverse(unhex(lowerUTF8(substr(hex(MD5({ s } )), { substr_idx } )))))"
9290
9391 def to_string (self , s : str ) -> str :
9492 return f"toString({ s } )"
9593
9694 def normalize_timestamp (self , value : str , coltype : TemporalType ) -> str :
95+ prec = coltype .precision
9796 if coltype .rounds :
98- prec = coltype .precision
99- timestamp = f'toDateTime64(round(toUnixTimestamp64Micro(toDateTime64({ value } , 6)) / 1000000, { prec } ), 6)'
97+ timestamp = f"toDateTime64(round(toUnixTimestamp64Micro(toDateTime64({ value } , 6)) / 1000000, { prec } ), 6)"
10098 return self .to_string (timestamp )
101- else :
102- fractional = f'toUnixTimestamp64Micro(toDateTime64({ value } , 6)) % 1000000'
103- return f"formatDateTime({ value } , '%Y-%m-%d %H:%M:%S') || '.' || { self .to_string (fractional )} "
99+
100+ fractional = f"toUnixTimestamp64Micro(toDateTime64({ value } , { prec } )) % 1000000"
101+ fractional = f"lpad({ self .to_string (fractional )} , 6, '0')"
102+ value = f"formatDateTime({ value } , '%Y-%m-%d %H:%M:%S') || '.' || { self .to_string (fractional )} "
103+ return f"rpad({ value } , { TIMESTAMP_PRECISION_POS + 6 } , '0')"
104104
105105 def _convert_db_precision_to_digits (self , p : int ) -> int :
106106 # Done the same as for PostgreSQL but need to rewrite in another way
@@ -125,7 +125,7 @@ def normalize_number(self, value: str, coltype: FractionalType) -> str:
125125 # with length = digits in an integer part + 1 (symbol of ".") + precision
126126
127127 if coltype .precision == 0 :
128- return self .to_string (f' round({ value } )' )
128+ return self .to_string (f" round({ value } )" )
129129
130130 precision = coltype .precision
131131 # TODO: too complex, is there better performance way?
0 commit comments