diff --git a/di/mockDataGen/init.q b/di/mockDataGen/init.q new file mode 100644 index 0000000..4cdb055 --- /dev/null +++ b/di/mockDataGen/init.q @@ -0,0 +1,3 @@ +\l ::mockDataGen.q + +export:([initschema;mockDataOne;mockData;mockHdb;mockDataR;clearTables]) diff --git a/di/mockDataGen/mockDataGen.md b/di/mockDataGen/mockDataGen.md new file mode 100644 index 0000000..01621ff --- /dev/null +++ b/di/mockDataGen/mockDataGen.md @@ -0,0 +1,203 @@ +# Mock Data Generator + +This module is used for generating realistic mock datasets. This also allows to generate additional datasets and works in batch. Module consists of four main functions that generates realistic mock datasets based on the following inputs from the user: + +-sym/instrument: the symbol to generate for +-date : the date +-start time and end time : to allow generation within a range +-rowcount : the number of rows of data to generate +-start price: the starting price of the instrument/sym +-level: If level 1, generates data for trades and quotes tables. If level 2, generates data for depth along with trades and quotes tables + +## Example +Below is an example of loading the module into a session and viewing the functions present in the module. + +```q +// Loading the module into a session +mockData: use `di.mockDataGen + +// View dictionary of functions +mockData +``` + +## Overview + +- **`mockDataOne`** – Generates mock data for single instrument on a given date. +- **`mockData`** – Generates mock data for multiple instruments on a given date. +- **`mockDataR`** – Generates mock data for multiple instruments in a given date range. +- **`mockHdb`** – writes the data down to a specified HDB directory and sets the attribute to the date partitions. + + +## Functions + +### ⚙️`mockDataOne` + +Generates mock data for the given single instrument on the given date along with the following given parameters. + +**Parameters** +- `sym`: Instrument/symbol for which the data is generated. +- `date`: Trading date for which the data is generated. +- `startTime`: Market open time or the starting time from which data generation begins. +- `endTime`: Market close time or the ending time up to which data is generated. +- `rowCnt`: Number of rows to generate the data for. Also equals to the number of rows for trade table. +- `startPx`: Starting price of the instrument of type float. +- `level`: Controls the depth of data generation: + - `1`: Generates trades and quotes tables. + - `2`: Generates trades, quotes, and depth tables. + +**Examples** +```q +// Function signature: +mockDataOne[sym; date; startTime; endTime; rowCnt; startPx; level] + +// Loading the module into a session +md: use `di.mockDataGen + +// Level 1: Generate trades and quotes only +// for the AAPL instrument on a given trading day: +md.mockDataOne[`AAPL; 2025.01.10; 09:30:00.00; 17:30:00.00; 3000; 22.35; 1] + +// Level 2: Generate trades, quotes, and depth +// for the AAPL instrument on a given trading day: +md.mockDataOne[`AAPL; 2025.01.10; 09:30; 16:00; 300; 22.35; 2] + +// to view the data +.m.di.0mockDataGen.trades +.m.di.0mockDataGen.quotes +.m.di.0mockDataGen.depth +``` + +### ⚙️`mockData` + +Generates mock data for the given multiple instruments on the given date along with the following given parameters. + +**Parameters** +- `syms`: Instruments/symbols for which the data is generated. +- `date`: Trading date for which the data is generated. +- `startTime`: Market open time or the starting time from which data generation begins. +- `endTime`: Market close time or the ending time up to which data is generated. +- `rowCnts`: Number of rows to generate the data for each syms. This should be passed as a dictionary, for example: `AAPL`MSFT`META!300 500 200 +- `startPxs`: Starting price of the given instruments of type float. Should be passed as a dictionary, for example: `AAPL`MSFT`META!22.33 38.34 29.43 +- `level`: Controls the depth of data generation: + - `1`: Generates trades and quotes tables. + - `2`: Generates trades, quotes, and depth tables. + +**Examples** +```q +**Examples** +// Function signature: +mockData[syms; date; startTime; endTime; rowCnts; startPxs; level] +## Example +Below is an example of loading the module into a session and viewing the size of different objects. + +// Loading the module into a session +md: use `di.mockDataGen + +// Level 1: Generate trades and quotes for multiple instruments +// on a single trading day: +md.mockData[`AAPL`MSFT`META; 2025.01.10; 09:30:00; 16:00:00; + `AAPL`MSFT`META!300 500 200; + `AAPL`MSFT`META!22.33 38.34 29.43; + 1] + +// Level 2: Generate trades, quotes, and depth for multiple instruments +// on a single trading day: +md.mockData[`AAPL`MSFT`META; 2025.01.10; 09:30:00; 16:00:00; + `AAPL`MSFT`META!300 500 200; + `AAPL`MSFT`META!22.33 38.34 29.43; + 2] +``` + +### ⚙️`mockDataR` + +Generates mock data for the given multiple instruments in the given date range along with the following given parameters. + +**Parameters** +- `syms`: Instruments/symbols for which the data is generated. +- `datelist`: List of dates for which the data is generated. +- `startTime`: Market open time or the starting time from which data generation begins. +- `endTime`: Market close time or the ending time up to which data is generated. +- `rowCnts`: Number of rows to generate the data for each syms. This should be passed as a dictionary, for example: `AAPL`MSFT`META!300 500 200 +- `startPxs`: Starting price of the given instruments of type float. Should be passed as a dictionary, for example: `AAPL`MSFT`META!22.33 38.34 29.43 +- `level`: Controls the depth of data generation: + - `1`: Generates trades and quotes tables. + - `2`: Generates trades, quotes, and depth tables. + +// Note +- For multi-day data generation, price continuity is maintained by using the previous day’s last traded price as the opening price for the following day. + +**Examples** +```q +**Examples** +// Function signature: +mockDataR[syms; datelist; startTime; endTime; rowCnts; startPxs; level] + +// Loading the module into a session +md: use `di.mockDataGen + +// Level 1: Generate trades and quotes for multiple instruments +// on a single trading day: +md.mockDataR[`AAPL`MSFT`META; 2025.01.10 2025.01.11 2025.01.12; 09:30:00; 16:00:00; + `AAPL`MSFT`META!300 500 200; + `AAPL`MSFT`META!22.33 38.34 29.43; + 1] + +// Level 2: Generate trades, quotes, and depth for multiple instruments +// on a single trading day: +md.mockDataR[`AAPL`MSFT`META; 2025.01.10 2025.01.11 2025.01.12; 09:30:00; 16:00:00; + `AAPL`MSFT`META!300 500 200; + `AAPL`MSFT`META!22.33 38.34 29.43; + 2] +``` + + +### ⚙️`mockHdb` + +writes down the data to the specified HDB directory + +**Parameters** +- `dir`: Target HDB directory where the generated data will be written. +- `syms`: List of instrument symbols for which data is generated and saved to HDB. +- `datelist`: List of trading dates for which data will be generated and persisted. +- `startTime`: Market open time or the starting timestamp from which data generation begins. +- `endTime`: Market close time or the ending timestamp up to which data is generated. +- `rowCnts`: Number of rows to generate per instrument. + This must be provided as a dictionary, for example: + `AAPL`MSFT`META!300 500 200 +- `startPxs`: Starting price for each instrument, specified as floating-point values. + This must be provided as a dictionary, for example: + `AAPL`MSFT`META!22.33 38.34 29.43 +- `level`: Controls the depth of data generation: + - `1`: Generates and saves trades and quotes tables. + - `2`: Generates and saves trades, quotes, and depth tables. + +// Note +- price continuity is maintained by using the previous day’s last traded price as the opening price for the following day. + +**Examples** +```q +**Examples** +// Function signature: +mockHdb[dir; syms; datelist; startTime; endTime; rowCnts; startPxs; level] + +// Loading the module into a session +md: use `di.mockDataGen + +// Level 1: Generate trades and quotes for multiple instruments +// on a single trading day: +md.mockHdb[`:hdb;`AAPL`MSFT`META; 2025.01.10 2025.01.11 2025.01.12; 09:30:00; 16:00:00; + `AAPL`MSFT`META!300 500 200; + `AAPL`MSFT`META!22.33 38.34 29.43; + 1] + +// Level 2: Saves dwon the generated trades, quotes, and depth for multiple instruments to a specified HBD directory +// on a single trading day: +md.mockHdb[`:hdb;`AAPL`MSFT`META; 2025.01.10 2025.01.11 2025.01.12; 09:30:00; 16:00:00; + `AAPL`MSFT`META!300 500 200; + `AAPL`MSFT`META!22.33 38.34 29.43; + 2] + +// to view the data in HDB +\l hdb +select from trades +``` \ No newline at end of file diff --git a/di/mockDataGen/mockDataGen.q b/di/mockDataGen/mockDataGen.q new file mode 100644 index 0000000..9beca82 --- /dev/null +++ b/di/mockDataGen/mockDataGen.q @@ -0,0 +1,77 @@ +initschema:{[] + .z.m.trades:([] time:`timestamp$(); sym:`g#`$(); src:`g#`$(); price:`float$(); size:`int$()); + .z.m.quotes:([] time:`timestamp$(); sym:`g#`$(); src:`g#`$(); bid:`float$(); ask:`float$(); bsize:`int$(); asize:`int$()); + .z.m.depth:([] time:`timestamp$(); sym:`g#`$(); bid1:`float$(); bsize1:`int$(); bid2:`float$(); bsize2:`int$(); bid3:`float$(); bsize3:`int$(); bid4:`float$(); bsize4:`int$(); bid5:`float$(); bsize5:`int$(); ask1:`float$(); asize1:`int$(); ask2:`float$(); asize2:`int$(); ask3:`float$(); asize3:`int$(); ask4:`float$(); asize4:`int$(); ask5:`float$(); asize5:`int$()); + }; + +// Utility Functions +rnd:{0.01*floor 100*x}; + +clearTables:{[] + initschema[]; + }; + +// funtion to generate mock data for a single symbol/instrument +mockDataOne:{[sym;date;startTime;endTime;rowCnt;startPx;level] + tradeCnt:rowCnt; + quoteCnt:5*tradeCnt; + depthCnt:25*tradeCnt; + hoursinday:endTime-startTime; + t0:date+startTime; + t1:date+endTime; + ttimes:date+ `#asc startTime+tradeCnt?hoursinday; + qtimes:date+ `#asc startTime+quoteCnt?hoursinday; + dtimes:date+ `#asc startTime+depthCnt?hoursinday; + mids:startPx* exp sums 0.0005*-1+quoteCnt?2f; + mids:0.01*floor 100*mids; + bid:rnd mids-quoteCnt?0.03; + ask:rnd mids+quoteCnt?0.03; + bsize:`int$(600*1+quoteCnt?20); + asize:`int$(600*1+quoteCnt?20); + tradeIdx:til tradeCnt; + quoteIdx:5*tradeIdx; + side:tradeCnt?`buy`sell; + price:0.01*floor 100*?[side=`buy; ask[quoteIdx]; bid[quoteIdx]]; + tsize:`int$((tradeCnt?1f)*?[side=`buy; asize[quoteIdx]; bsize[quoteIdx]]); + .z.m.trades,:flip `time`sym`src`price`size!(ttimes;tradeCnt#sym;tradeCnt?`N`O`L;price;tsize); + .z.m.quotes,:flip `time`sym`src`bid`ask`bsize`asize!(qtimes;quoteCnt#sym;quoteCnt?`N`O`L;bid;ask;bsize;asize); + if[level=2; + dIdx:(til depthCnt) mod quoteCnt; + dBid:bid[dIdx];dAsk:ask[dIdx]; + b1:`int$(600*1+depthCnt?20);b2:b1+`int$(600*1+depthCnt?5);b3:b1+`int$(600*1+depthCnt?10);b4:b1+`int$(600*1+depthCnt?15);b5:b1+`int$(600*1+depthCnt?20); + a1:`int$(600*1+depthCnt?20);a2:a1+`int$(600*1+depthCnt?5);a3:a1+`int$(600*1+depthCnt?5);a4:a1+`int$(600*1+depthCnt?5);a5:a1+`int$(600*1+depthCnt?5); + .z.m.depth,:flip `time`sym`bid1`bsize1`bid2`bsize2`bid3`bsize3`bid4`bsize4`bid5`bsize5`ask1`asize1`ask2`asize2`ask3`asize3`ask4`asize4`ask5`asize5!(dtimes;depthCnt#sym;dBid;b1;dBid-0.01;b2;dBid-0.02;b3;dBid-0.03;b4;dBid-0.04;b5;dAsk;a1;dAsk+0.01;a2;dAsk+0.02;a3;dAsk+0.03;a4;dAsk+0.04;a5); + ]; + }; + +// function to generate the mock data for multiple syms on a given date +mockData:{[syms;date;startTime;endTime;rowCnts;startPxs;level] + syms:$[11h=type syms; syms; enlist syms]; + rc:$[99h=type rowCnts; rowCnts; (enlist syms)!enlist rowCnts]; + spx:$[99h=type startPxs; startPxs; (enlist syms)!enlist startPxs]; + {[s;rc;spx;date;startTime;endTime;level] + sp:$[`sp in key .z.m; $[null .z.m.sp[s]; spx[s]; .z.m.sp[s]]; spx[s]]; + mockDataOne[s;date;startTime;endTime;rc[s];sp;level]}[;rc;spx;date;startTime;endTime;level] each syms; + }; + +// function to generate mock data for multiple syms for the given date list +mockDataR:{[syms;datelist;startTime;endTime;rowCnts;startPxs;level] + mockData[syms;datelist[0];startTime;endTime;rowCnts;startPxs;level]; + .z.m.sp::exec last price by sym from .z.m.trades; + {[syms;x;startTime;endTime;rowCnts;sp;level] + .z.m.sp::exec last price by sym from .z.m.trades; + mockData[syms;x;startTime;endTime;rowCnts;sp;2]}[syms;;startTime;endTime;rowCnts;sp;2]each 1_datelist; + }; + +// function to write the data down to HDB for the given date list +mockHdb:{[dir;syms;dates;startTime;endTime;rowCnts;startPxs;level] + .z.m.sp:syms!(count syms)#0nf; + {[dir;syms;d;startTime;endTime;rowCnts;startPxs;level] + mockData[syms;d;startTime;endTime;rowCnts;startPxs;level]; + .z.m.sp:syms!{last exec price from .z.m.trades where sym = x} each syms; + `trades set .z.m.trades; + `quotes set .z.m.quotes; + `depth set .z.m.depth; + .Q.hdpf[`:;dir;d;`sym]; clearTables[] }[dir;syms;;startTime;endTime;rowCnts;startPxs;level] each dates; + }; + diff --git a/di/mockDataGen/test.csv b/di/mockDataGen/test.csv new file mode 100644 index 0000000..6a23e84 --- /dev/null +++ b/di/mockDataGen/test.csv @@ -0,0 +1,10 @@ +action,ms,bytes,lang,code,repeat,minver,comment +run,0,0,q,md:use`di.mockDataGen,1,,Load in mockDataGen module +run,0,0,q,md.mockDataOne[`AAPL;2024.01.10;09:30;16:00;300;22.35;2],1,,Generate mock data for single sym on a given date +run,0,0,q,count .m.di.0MockDataGen.trades = 300,1,,check for the counts of the trade table +run,0,0,q,md.mockData[`AAPL`MSFT`META;2025.01.10;09:30:00;16:00:00;`AAPL`MSFT`META!300 500 200;`AAPL`MSFT`META!22.33 38.34 29.43;2],1,,Generate mock data for multiple sym on a given date +run,0,0,q,md.mockDataR[`AAPL`MSFT`META;2025.01.10 2025.01.11 2025.01.12;09:30:00;16:00:00;`AAPL`MSFT`META!700 500 900;`AAPL`MSFT`META!27.33 38.34 29.43;2],1,,Generate mock data for multiple sym for a given date range +run,0,0,q,md.mockHdb[`:hdb;`AAPL`MSFT;2014.12.15 2014.12.16 2014.12.17; 09:30:00;17:30:00;`AAPL`MSFT!200 300;`AAPL`MSFT!23.45 45.89;2],1,,Writing down the data to a specified HDB directory for a given date list + +run,0,0,q,md.clearTables[],1,,clear the tables +run,0,0,q,md.initschema[],1,,define the schemas