From f2360f410d9ed25d06b92112cfd4ceba02cbaae7 Mon Sep 17 00:00:00 2001 From: Alex DeMeo Date: Thu, 28 Mar 2024 16:57:34 -0700 Subject: [PATCH 1/4] add contract --- contracts/pnw_bookings_30_days.yaml | 45 +++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) create mode 100644 contracts/pnw_bookings_30_days.yaml diff --git a/contracts/pnw_bookings_30_days.yaml b/contracts/pnw_bookings_30_days.yaml new file mode 100644 index 0000000..1551305 --- /dev/null +++ b/contracts/pnw_bookings_30_days.yaml @@ -0,0 +1,45 @@ +id: 16a6daa7-6c17-43b3-bf54-90e11ebcc3f0 +dataAssetResourceName: pyspark://git@github.com:alexdemeo/tutorial:pyspark:job.py:pnw_bookings_30_days +spec-version: 0.3.0 +doc: bookings for the last 30 days +name: pnw_bookings_30_days +namespace: Tutorial +owner: chad@gable.ai +schema: + - name: booking_date + type: string + logical: org.iso.8601.DateTime + doc: "booking date" + - name: commission_rate + type: float + bits: 64 + doc: "commission rate" + - name: payment_type + type: string + doc: "payment type" + - name: reward_id + type: int + bits: 64 + doc: "reward id" + - name: city_id + type: string + doc: "city id" + - name: city_name + type: string + doc: "city name" + - name: city_code + type: string + doc: "city code" + - name: country_id + type: string + doc: "country id" + - name: region + type: string + doc: "region" + - name: created_at + type: string + logical: org.iso.8601.DateTime + doc: "created at" + - name: enable_tip + type: bool + doc: "enable tip" From 23e5bb62dae3f8c27523c07f6196624c17015425 Mon Sep 17 00:00:00 2001 From: Alex DeMeo Date: Thu, 28 Mar 2024 18:02:38 -0700 Subject: [PATCH 2/4] add pyspark stuff to workflows --- .github/workflows/check-contracts-and-assets.yml | 15 ++++++++++++++- .../workflows/publish-contracts-and-assets.yml | 2 +- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/.github/workflows/check-contracts-and-assets.yml b/.github/workflows/check-contracts-and-assets.yml index 5641907..1930e57 100644 --- a/.github/workflows/check-contracts-and-assets.yml +++ b/.github/workflows/check-contracts-and-assets.yml @@ -117,4 +117,17 @@ jobs: --proxy-db tutorial \ --proxy-schema public \ --proxy-user postgres \ - --proxy-password postgres \ No newline at end of file + --proxy-password postgres + - name: Check pyspark + uses: gabledata/cicd/github-actions/check-data-assets@latest + with: + # Provide API key secret, and endpoint variable created in previous steps + gable-api-key: ${{secrets.GABLE_API_KEY}} + gable-api-endpoint: ${{secrets.GABLE_API_ENDPOINT}} + # List of paths to Avro files that should be checked with support for glob syntax. + # Can either be specified as a space separated list ('event1.proto event2.proto'), or + # a multiline string + data-asset-options: | + --project-root ../../pyspark \ + --spark-job-entrypoint "job.py --final_output_table pnw_bookings_30_days" \ + --csv-schema-file ../../pyspark/schemas.csv diff --git a/.github/workflows/publish-contracts-and-assets.yml b/.github/workflows/publish-contracts-and-assets.yml index 3b8af1a..b14fbbc 100644 --- a/.github/workflows/publish-contracts-and-assets.yml +++ b/.github/workflows/publish-contracts-and-assets.yml @@ -129,4 +129,4 @@ jobs: --proxy-host 0.0.0.0 \ --proxy-port 5432 \ --proxy-user postgres \ - --proxy-password postgres \ No newline at end of file + --proxy-password postgres From a2bf4124ca3dfc08a5ec465ce899c9dc288d6127 Mon Sep 17 00:00:00 2001 From: Alex DeMeo Date: Thu, 28 Mar 2024 19:30:45 -0700 Subject: [PATCH 3/4] fix cicd (#4) * add col * test * allow-gable-pre-release --- .github/workflows/check-contracts-and-assets.yml | 9 ++++++--- pyspark/job.py | 15 +++------------ 2 files changed, 9 insertions(+), 15 deletions(-) diff --git a/.github/workflows/check-contracts-and-assets.yml b/.github/workflows/check-contracts-and-assets.yml index 1930e57..231e3c9 100644 --- a/.github/workflows/check-contracts-and-assets.yml +++ b/.github/workflows/check-contracts-and-assets.yml @@ -124,10 +124,13 @@ jobs: # Provide API key secret, and endpoint variable created in previous steps gable-api-key: ${{secrets.GABLE_API_KEY}} gable-api-endpoint: ${{secrets.GABLE_API_ENDPOINT}} + allow-gable-pre-release: true + gable-version: 0.10.1a9 # List of paths to Avro files that should be checked with support for glob syntax. # Can either be specified as a space separated list ('event1.proto event2.proto'), or # a multiline string data-asset-options: | - --project-root ../../pyspark \ - --spark-job-entrypoint "job.py --final_output_table pnw_bookings_30_days" \ - --csv-schema-file ../../pyspark/schemas.csv + --source-type pyspark \ + --project-root ./pyspark \ + --spark-job-entrypoint job.py \ + --csv-schema-file ./pyspark/schemas.csv diff --git a/pyspark/job.py b/pyspark/job.py index 0b93123..36b9724 100644 --- a/pyspark/job.py +++ b/pyspark/job.py @@ -1,20 +1,11 @@ -import argparse +from script import run_job + from pyspark.sql import SparkSession from pyspark.sql.functions import * from pyspark.sql.types import * -from script import run_job - -def parse_arguments(argv=None): - ap = argparse.ArgumentParser() - ap.add_argument("--final_output_table") - return ap.parse_args(argv) - - if __name__ == "__main__": - # Parse args - args_main = parse_arguments() - final_output_table = args_main.final_output_table + final_output_table = "pnw_bookings_30_days" print(f"final_output_table: {final_output_table}") spark = SparkSession.builder.getOrCreate() From 47c996d475b94c6f3cd193e595d1c5fc83a71243 Mon Sep 17 00:00:00 2001 From: Alex DeMeo Date: Mon, 18 Nov 2024 10:44:39 -0800 Subject: [PATCH 4/4] remove productId --- typescript/server/cart.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/typescript/server/cart.ts b/typescript/server/cart.ts index ad489a0..4f43f77 100644 --- a/typescript/server/cart.ts +++ b/typescript/server/cart.ts @@ -8,7 +8,7 @@ app.post('/cart', (req, res) => { analytics.track({ userId: req.body.userId, event: 'Add to cart', - properties: { productId: '123456', quantity: '5' } + properties: { /*productId: '123456', */quantity: '5' } }) res.sendStatus(201) });