diff --git a/binder/CMR_queries.ipynb b/binder/CMR_queries.ipynb index dbdb6b2..2ce55ba 100644 --- a/binder/CMR_queries.ipynb +++ b/binder/CMR_queries.ipynb @@ -5,110 +5,66 @@ "id": "d0e31245-c554-4985-af28-705abe764ece", "metadata": {}, "source": [ - " **Using CMR to Find OPeNDAP URLs (Earthdata specific)**\n", + "# Discovering OPeNDAP URLs from NASA's Earthdata\n", "\n", - " The Common Metadata Repository (CMR) is a high-performance, high-quality, continuously evolving metadata system that catalogs Earth Science data and associated service metadata records. These metadata records are registered, modified, discovered, and accessed through programmatic interfaces leveraging standard protocols and APIs.\n", + "This tutorial demonstrates how to find OPeNDAP URLs from the [Common Metadata Repository](https://cmr.earthdata.nasa.gov/search) (CMR). The CMR is NASA's Earthdata API to query datasets available through many download and subset services, including OPeNDAP. The [CMR API](https://cmr.earthdata.nasa.gov/search/site/docs/search/api.html) is complex and broad in scope, and with `pydap.client.get_cmr_urls` users can query and retrieve OPeNDAP urls.\n", "\n", - " For more information about the CMR API go to: https://cmr.earthdata.nasa.gov/search/site/docs/search/api.html\n" + "**Requirements to run this notebook**\n", + "1. Have an Earth Data Login account\n", + "2. Knowledge of the Collection Concept ID (CCID), or Digital Object Identifier (DOI) of the collection of interest. \n", + "\n", + "**Objectives**\n", + " \n", + "Use [PyDAP](https://pydap.github.io/pydap/) to **discover all opendap urls in two simple case studies**\n", + "\n", + "1. Discover all possible OPeNDAP urls associated with a specific Collection Concept ID (and DOI).\n", + "2. Discover all possible OPeNDAP urls from a collection, that **match a time range and spatial bounding box of interest**. These parameters, and others, are widely used by the CMR (and Earthdata search) to filter the number of possible returns from querying the CMR, therefore narrowing the search.\n", + "\n", + "\n", + "`Author`: Miguel Jimenez-Urias, '25" ] }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "id": "adc54031-dc9f-4858-83be-a84c6ee4eef0", "metadata": {}, "outputs": [], "source": [ "from pydap.net import create_session\n", - "from pydap.client import get_cmr_urls" + "from pydap.client import get_cmr_urls\n", + "import pydap\n", + "import datetime as dt" ] }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "id": "f764fa2b-acc4-43ba-8762-b606dc4a96e4", "metadata": {}, "outputs": [], "source": [ - "ecostress_ccid = \"C2076114664-LPCLOUD\"" + "ecostress_ccid = \"C2076114664-LPCLOUD\"\n", + "time_range = [dt.datetime(2025, 3, 1), dt.datetime(2025, 3, 31)]\n", + "return_limit = 500 # How many urls to return. Default is 50" ] }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "id": "c4306a74-3848-42ec-9e57-e396f9f47b80", "metadata": {}, "outputs": [], "source": [ - "urls = get_cmr_urls(ccid=ecostress_ccid, bounding_box=list((-130.8, 41, -124, 45)))" + "urls = get_cmr_urls(ccid=ecostress_ccid, bounding_box=list((-130.8, 41, -124, 45)), time_range=time_range, limit=return_limit)" ] }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "id": "68aa026d-51d5-4bab-a642-2a5c04258712", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['https://opendap.earthdata.nasa.gov/collections/C2076114664-LPCLOUD/granules/ECOv002_L2_LSTE_00152_003_20180716T130457_0712_04',\n", - " 'https://opendap.earthdata.nasa.gov/collections/C2076114664-LPCLOUD/granules/ECOv002_L2_LSTE_00258_001_20180723T101233_0712_04',\n", - " 'https://opendap.earthdata.nasa.gov/collections/C2076114664-LPCLOUD/granules/ECOv002_L2_LSTE_00289_001_20180725T100502_0712_04',\n", - " 'https://opendap.earthdata.nasa.gov/collections/C2076114664-LPCLOUD/granules/ECOv002_L2_LSTE_00346_001_20180729T014458_0712_04',\n", - " 'https://opendap.earthdata.nasa.gov/collections/C2076114664-LPCLOUD/granules/ECOv002_L2_LSTE_00346_002_20180729T014550_0712_04',\n", - " 'https://opendap.earthdata.nasa.gov/collections/C2076114664-LPCLOUD/granules/ECOv002_L2_LSTE_00392_002_20180801T004543_0712_04',\n", - " 'https://opendap.earthdata.nasa.gov/collections/C2076114664-LPCLOUD/granules/ECOv002_L2_LSTE_00392_003_20180801T004635_0712_04',\n", - " 'https://opendap.earthdata.nasa.gov/collections/C2076114664-LPCLOUD/granules/ECOv002_L2_LSTE_00408_005_20180802T013113_0712_04',\n", - " 'https://opendap.earthdata.nasa.gov/collections/C2076114664-LPCLOUD/granules/ECOv002_L2_LSTE_00423_001_20180803T003844_0712_04',\n", - " 'https://opendap.earthdata.nasa.gov/collections/C2076114664-LPCLOUD/granules/ECOv002_L2_LSTE_00423_002_20180803T003936_0712_04',\n", - " 'https://opendap.earthdata.nasa.gov/collections/C2076114664-LPCLOUD/granules/ECOv002_L2_LSTE_00438_002_20180803T234626_0712_04',\n", - " 'https://opendap.earthdata.nasa.gov/collections/C2076114664-LPCLOUD/granules/ECOv002_L2_LSTE_00438_003_20180803T234718_0712_04',\n", - " 'https://opendap.earthdata.nasa.gov/collections/C2076114664-LPCLOUD/granules/ECOv002_L2_LSTE_00441_002_20180804T043829_0712_04',\n", - " 'https://opendap.earthdata.nasa.gov/collections/C2076114664-LPCLOUD/granules/ECOv002_L2_LSTE_00453_003_20180804T225449_0712_04',\n", - " 'https://opendap.earthdata.nasa.gov/collections/C2076114664-LPCLOUD/granules/ECOv002_L2_LSTE_00457_005_20180805T052214_0712_04',\n", - " 'https://opendap.earthdata.nasa.gov/collections/C2076114664-LPCLOUD/granules/ECOv002_L2_LSTE_00457_005_20180805T052214_0712_05',\n", - " 'https://opendap.earthdata.nasa.gov/collections/C2076114664-LPCLOUD/granules/ECOv002_L2_LSTE_00457_006_20180805T052306_0712_04',\n", - " 'https://opendap.earthdata.nasa.gov/collections/C2076114664-LPCLOUD/granules/ECOv002_L2_LSTE_00457_007_20180805T052358_0712_04',\n", - " 'https://opendap.earthdata.nasa.gov/collections/C2076114664-LPCLOUD/granules/ECOv002_L2_LSTE_00469_004_20180805T233925_0712_04',\n", - " 'https://opendap.earthdata.nasa.gov/collections/C2076114664-LPCLOUD/granules/ECOv002_L2_LSTE_00469_004_20180805T233925_0712_05',\n", - " 'https://opendap.earthdata.nasa.gov/collections/C2076114664-LPCLOUD/granules/ECOv002_L2_LSTE_00469_005_20180805T234017_0712_04',\n", - " 'https://opendap.earthdata.nasa.gov/collections/C2076114664-LPCLOUD/granules/ECOv002_L2_LSTE_00472_001_20180806T043039_0712_04',\n", - " 'https://opendap.earthdata.nasa.gov/collections/C2076114664-LPCLOUD/granules/ECOv002_L2_LSTE_00472_002_20180806T043131_0712_04',\n", - " 'https://opendap.earthdata.nasa.gov/collections/C2076114664-LPCLOUD/granules/ECOv002_L2_LSTE_00484_004_20180806T224658_0712_04',\n", - " 'https://opendap.earthdata.nasa.gov/collections/C2076114664-LPCLOUD/granules/ECOv002_L2_LSTE_00484_004_20180806T224658_0712_05',\n", - " 'https://opendap.earthdata.nasa.gov/collections/C2076114664-LPCLOUD/granules/ECOv002_L2_LSTE_00484_005_20180806T224750_0712_04',\n", - " 'https://opendap.earthdata.nasa.gov/collections/C2076114664-LPCLOUD/granules/ECOv002_L2_LSTE_00484_006_20180806T224842_0712_04',\n", - " 'https://opendap.earthdata.nasa.gov/collections/C2076114664-LPCLOUD/granules/ECOv002_L2_LSTE_00487_002_20180807T033902_0712_04',\n", - " 'https://opendap.earthdata.nasa.gov/collections/C2076114664-LPCLOUD/granules/ECOv002_L2_LSTE_00499_003_20180807T215521_0712_04',\n", - " 'https://opendap.earthdata.nasa.gov/collections/C2076114664-LPCLOUD/granules/ECOv002_L2_LSTE_00499_004_20180807T215613_0712_04',\n", - " 'https://opendap.earthdata.nasa.gov/collections/C2076114664-LPCLOUD/granules/ECOv002_L2_LSTE_00503_007_20180808T042338_0712_04',\n", - " 'https://opendap.earthdata.nasa.gov/collections/C2076114664-LPCLOUD/granules/ECOv002_L2_LSTE_00503_008_20180808T042430_0712_04',\n", - " 'https://opendap.earthdata.nasa.gov/collections/C2076114664-LPCLOUD/granules/ECOv002_L2_LSTE_00518_007_20180809T033122_0712_04',\n", - " 'https://opendap.earthdata.nasa.gov/collections/C2076114664-LPCLOUD/granules/ECOv002_L2_LSTE_00518_008_20180809T033214_0712_04',\n", - " 'https://opendap.earthdata.nasa.gov/collections/C2076114664-LPCLOUD/granules/ECOv002_L2_LSTE_00530_004_20180809T214742_0712_05',\n", - " 'https://opendap.earthdata.nasa.gov/collections/C2076114664-LPCLOUD/granules/ECOv002_L2_LSTE_00530_004_20180809T214742_0712_04',\n", - " 'https://opendap.earthdata.nasa.gov/collections/C2076114664-LPCLOUD/granules/ECOv002_L2_LSTE_00530_005_20180809T214834_0712_04',\n", - " 'https://opendap.earthdata.nasa.gov/collections/C2076114664-LPCLOUD/granules/ECOv002_L2_LSTE_00530_006_20180809T214926_0712_04',\n", - " 'https://opendap.earthdata.nasa.gov/collections/C2076114664-LPCLOUD/granules/ECOv002_L2_LSTE_00533_005_20180810T023944_0712_04',\n", - " 'https://opendap.earthdata.nasa.gov/collections/C2076114664-LPCLOUD/granules/ECOv002_L2_LSTE_00545_002_20180810T205606_0712_04',\n", - " 'https://opendap.earthdata.nasa.gov/collections/C2076114664-LPCLOUD/granules/ECOv002_L2_LSTE_00545_003_20180810T205658_0712_04',\n", - " 'https://opendap.earthdata.nasa.gov/collections/C2076114664-LPCLOUD/granules/ECOv002_L2_LSTE_00549_011_20180811T032420_0712_04',\n", - " 'https://opendap.earthdata.nasa.gov/collections/C2076114664-LPCLOUD/granules/ECOv002_L2_LSTE_00549_011_20180811T032420_0712_03',\n", - " 'https://opendap.earthdata.nasa.gov/collections/C2076114664-LPCLOUD/granules/ECOv002_L2_LSTE_00576_004_20180812T204906_0712_04',\n", - " 'https://opendap.earthdata.nasa.gov/collections/C2076114664-LPCLOUD/granules/ECOv002_L2_LSTE_00576_005_20180812T204958_0712_04',\n", - " 'https://opendap.earthdata.nasa.gov/collections/C2076114664-LPCLOUD/granules/ECOv002_L2_LSTE_00579_008_20180813T013925_0712_03',\n", - " 'https://opendap.earthdata.nasa.gov/collections/C2076114664-LPCLOUD/granules/ECOv002_L2_LSTE_00579_008_20180813T013925_0712_04',\n", - " 'https://opendap.earthdata.nasa.gov/collections/C2076114664-LPCLOUD/granules/ECOv002_L2_LSTE_00579_009_20180813T014017_0712_03',\n", - " 'https://opendap.earthdata.nasa.gov/collections/C2076114664-LPCLOUD/granules/ECOv002_L2_LSTE_00591_001_20180813T195545_0712_04',\n", - " 'https://opendap.earthdata.nasa.gov/collections/C2076114664-LPCLOUD/granules/ECOv002_L2_LSTE_00591_001_20180813T195545_0712_05']" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "urls" ] @@ -138,7 +94,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.13" + "version": "3.12.11" } }, "nbformat": 4, diff --git a/binder/ECCO.ipynb b/binder/ECCO.ipynb index a5b1dab..255869d 100644 --- a/binder/ECCO.ipynb +++ b/binder/ECCO.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "id": "dd69f13e-1576-46cc-88bf-8484bf53788f", "metadata": {}, "outputs": [], @@ -10,7 +10,6 @@ "import matplotlib.pyplot as plt\n", "import numpy as np\n", "from pydap.net import create_session\n", - "import json\n", "import cartopy.crs as ccrs\n", "import xarray as xr\n", "import datetime as dt\n", @@ -21,18 +20,10 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "id": "315ae4f8-dde2-4095-93c8-2d23ec754f70", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "pydap version: 3.5.7.dev2+gafd8e4c\n" - ] - } - ], + "outputs": [], "source": [ "print(\"pydap version: \", pydap.__version__)" ] @@ -60,51 +51,6 @@ "\n" ] }, - { - "cell_type": "code", - "execution_count": null, - "id": "941feb00-5526-4aba-a80f-2f1706b74fca", - "metadata": {}, - "outputs": [], - "source": [ - "session = requests.Session()" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "id": "f77714f7-6446-4323-bf8e-1cae2c87d95e", - "metadata": {}, - "outputs": [], - "source": [ - "# CMR API base url\n", - "cmrurl='https://cmr.earthdata.nasa.gov/search/'\n", - "doi = '10.5067/ECL5M-OTS44'" - ] - }, - { - "cell_type": "markdown", - "id": "82158ccc-ead9-4e3c-8fcf-450f20ddfdeb", - "metadata": {}, - "source": [ - " **Specify time range**\n", - "\n", - " This dataset covers `01-01-1992` to `01-18-2018`. \n" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "7886174c-77b2-48bf-a823-d9f761e98274", - "metadata": {}, - "outputs": [], - "source": [ - "start_date = dt.datetime(1992, 1, 1)\n", - "end_date = dt.datetime(2017, 12, 31)\n", - "\n", - "time_range=[start_date,end_date] # One month of data\n" - ] - }, { "cell_type": "markdown", "id": "75ede2ce-5c11-40e9-8f83-a8f7bceb0147", @@ -114,60 +60,33 @@ "\n", "The cell below will search/find all OPeNDAP URLs associated with the Collection concept ID.\n", "\n", - "The results wll be stored in the variable `granules_urls`.\n", + "The results will be stored in the variable `granules_urls`.\n", + "\n", + "We are interested in two collections: That of Oceanic Temperature and Salinity, and that of the Grid both on the Native LLC90 grid.\n", + "\n", " " ] }, { "cell_type": "code", - "execution_count": 18, + "execution_count": null, "id": "75b72f62-98bc-44c1-b87d-dba29c67e37d", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "CPU times: user 16.6 ms, sys: 6.82 ms, total: 23.4 ms\n", - "Wall time: 1.39 s\n" - ] - } - ], - "source": [ - "%%time\n", - "granules_urls = get_cmr_urls(doi=doi, time_range=time_range, limit=100)" - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "id": "592016ce-36cf-4298-b017-17fee64b5c19", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "WE found: 100 total Cloud OPeNDAP URLS associated with this collection!\n" - ] - } - ], - "source": [ - "print(\"WE found: \", len(granules_urls), \" total Cloud OPeNDAP URLS associated with this collection!\")" - ] - }, - { - "cell_type": "markdown", - "id": "6ba694bc-4613-4689-a1aa-0d03b1dad47f", - "metadata": {}, + "outputs": [], "source": [ - " **Pydap Approach**\n", + "ecco_ts_ccid = \"C1991543728-POCLOUD\" # \n", + "grid_ccid = \"C2013557893-POCLOUD\"\n", + "\n", + "# get 10 years of data\n", + "time_range = [dt.datetime(2007, 1, 1), dt.datetime(2017, 12, 31)]\n", "\n", - " We can use **PyDAP** to inspect the metadata associated with each of the urls.\n", + "cmr_urls = get_cmr_urls(ccid=ecco_ts_ccid, time_range=time_range, limit=1000) # you can incread the limit of results\n", + "print(\"################################################ \\n We found a total of \", len(cmr_urls), \"OPeNDAP URLS!!!\\n################################################\")\n", "\n", - " Below we illustrate the use of **PyDAP** with Token authentication to access OPeNDAP metadata.\n", "\n", - " This will be useful when accessing OPeNDAP URLs via xarray.\n" + "# Get the grid data and\n", + "grid_url = get_cmr_urls(ccid=grid_ccid)[0] # only one element\n", + "\n" ] }, { @@ -182,7 +101,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": null, "id": "322889ad-dc54-429c-bf48-35476d53bf8a", "metadata": {}, "outputs": [], @@ -190,7 +109,7 @@ "auth = earthaccess.login(strategy=\"interactive\", persist=True) # you will be promted to add your EDL credentials\n", "\n", "# pass Token Authorization to a new Session.\n", - "cache_kwargs={'cache_name':'ECCOv4'}\n", + "cache_kwargs={'cache_name':'data/ECCOv4'}\n", "my_session = create_session(use_cache=True, session=auth.get_session(), cache_kwargs=cache_kwargs)\n", "my_session.cache.clear()" ] @@ -200,1349 +119,249 @@ "id": "fa0042cb-053d-424b-971c-ed8b0d325725", "metadata": {}, "source": [ - " **Lazy access to remote data via pydap's client API**\n", + " **Construct DAP4 URLs and use Constraint Expressions!**\n", + "\n", + " Consider that we only want\n", + "- `THETA`\n", + "- `SALT`\n", "\n", - " **PyDAP** exploits the OPeNDAP's separation between metadata and data, to create lazy dataset objects that point to the data. These lazy objects contain all the attributes detailed in OPeNDAP's metadata files (DMR)" + " and their `dimensions`. " ] }, { "cell_type": "code", - "execution_count": 20, + "execution_count": null, "id": "5ced1396-be08-4a0b-b487-c430781bd247", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "CPU times: user 50.1 ms, sys: 10.4 ms, total: 60.5 ms\n", - "Wall time: 3.63 s\n" - ] - } - ], + "outputs": [], "source": [ - "%%time\n", - "pyds = open_url(granules_urls[0], session=my_session, protocol='dap4')" + "CE = \"dap4.ce=/time;/k;/tile;/j;/i;/THETA;/SALT\"\n", + "\n", + "# from grid, get the Depth, Z, and Coordinates XC and YC at scalar points\n", + "Grid_url = grid_url.replace(\"https\", \"dap4\") + \"?dap4.ce=/tile;/j;/i;/Z;/Depth;/XC;/YC\"\n", + "\n", + "dap4_urls = [url.replace(\"https\",\"dap4\") + \"?\" + CE for url in cmr_urls]\n", + "\n", + "dap4_urls[:2]" ] }, { - "cell_type": "code", - "execution_count": 21, - "id": "1bb158d1-7184-4d07-b152-bfb9f6300ffc", + "cell_type": "markdown", + "id": "b8b87a23-0730-4f08-b757-038200fa54ae", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - ".OCEAN_TEMPERATURE_SALINITY_mon_mean_1992-01_ECCO_V4r4_native_llc0090.nc\n", - "├──XG\n", - "├──Zp1\n", - "├──Zl\n", - "├──YC\n", - "├──XC\n", - "├──SALT\n", - "├──YG\n", - "├──XC_bnds\n", - "├──Zu\n", - "├──THETA\n", - "├──Z_bnds\n", - "├──YC_bnds\n", - "├──time_bnds\n", - "├──Z\n", - "├──i\n", - "├──i_g\n", - "├──j\n", - "├──j_g\n", - "├──k\n", - "├──k_l\n", - "├──k_p1\n", - "├──k_u\n", - "├──nb\n", - "├──nv\n", - "├──tile\n", - "└──time\n" - ] - } - ], "source": [ - "pyds.tree()" + " **Consolidate all URL Metadata Associated with the Data URL of cloud OPeNDAP URLs**\n", + "\n", + " You can construct a persistent reference to all Cloud OPeNDAP urls for later use!!!! \n" ] }, { - "cell_type": "markdown", - "id": "e438a621-f061-4904-a719-af9be89c1dc7", + "cell_type": "code", + "execution_count": null, + "id": "920caf59-adce-4e9c-bcbf-87ce4a194c13", "metadata": {}, + "outputs": [], "source": [ - " **Not all Variables are of interest. Lets use Constraint Expressions!**\n", - "\n", - " Consider that we only want\n", - "- `THETA`\n", - "- `SALT`\n", - "\n", - " and their `dimensions`. " + "# clear just in case\n", + "my_session.cache.clear()" ] }, { "cell_type": "code", - "execution_count": 22, - "id": "935e5a1c-a039-402a-95ea-9891ce021371", + "execution_count": null, + "id": "9841212a-5bf6-4e17-984e-4248a88604e3", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "dimension of THETA: ['/time', '/k', '/tile', '/j', '/i']\n", - "dimension of SALT: ['/time', '/k', '/tile', '/j', '/i']\n" - ] - } - ], + "outputs": [], "source": [ - "print(\"dimension of THETA:\" , pyds['THETA'].dims)\n", - "print(\"dimension of SALT:\" , pyds['SALT'].dims)" + "%%time\n", + "consolidate_metadata(dap4_urls, my_session, concat_dim='time')" ] }, { "cell_type": "markdown", - "id": "30604933-d567-4562-976f-c0f29da4b178", + "id": "67694cc9-a99d-477d-b2ca-c49c8ef9fc66", "metadata": {}, "source": [ - " **Construct Constraint Expression**\n", - "\n", - " That will instruct the Hyrax Data Server to only give use our desired variables.\n", + "## What happened?\n", "\n", - " This variable will be named `CE`. We will add it to each (granule) cloud OPeNDAP URL. THis will allow us to construct a `Data Cube`\n" + " All necessary metadata was fetch from opendap servers, and it can be used and reused by xarray. \n" ] }, { - "cell_type": "code", - "execution_count": 23, - "id": "2f7c2bf1-7c58-4c04-9238-bc166151c3f8", + "cell_type": "markdown", + "id": "c8faf11a-f0a0-4a04-b4e3-2df3b5bbdf08", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "constraint expression: ?dap4.ce=/THETA;/SALT;/time;/k;/tile;/j;/i\n" - ] - } - ], "source": [ - "dims = pyds['SALT'].dims\n", - "Vars = ['/THETA', '/SALT'] + dims\n", - "\n", - "# Below construct Contraint Expression\n", - "CE = \"?dap4.ce=\"+(\";\").join(Vars)\n", - "print(\"constraint expression: \", CE)" + "## Create a datacube with xarray and pydap as an engine!\n" ] }, { "cell_type": "code", - "execution_count": 24, - "id": "0b2b4dc7-08b0-4cfd-8d50-de940384aee4", + "execution_count": null, + "id": "34f9bc4c-b691-48e4-9be5-4019927aeac2", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - " Each Cloud OPeNDAP URL will look like: \n", - " https://opendap.earthdata.nasa.gov/providers/POCLOUD/collections/ECCO%20Ocean%20Temperature%20and%20Salinity%20-%20Monthly%20Mean%20llc90%20Grid%20(Version%204%20Release%204)/granules/OCEAN_TEMPERATURE_SALINITY_mon_mean_1992-01_ECCO_V4r4_native_llc0090?dap4.ce=/THETA;/SALT;/time;/k;/tile;/j;/i\n" - ] - } - ], + "outputs": [], "source": [ - "print(\" Each Cloud OPeNDAP URL will look like: \\n\", granules_urls[0]+CE)" + "%%time\n", + "ds = xr.open_mfdataset(\n", + " dap4_urls, \n", + " engine='pydap', \n", + " session=my_session, \n", + " parallel=True, \n", + " combine='nested', \n", + " concat_dim='time', \n", + " chunks={'tile':1, 'k':1})\n", + "ds" ] }, { "cell_type": "markdown", - "id": "af091d8f-1a3f-452a-95f7-6f59dca948c6", + "id": "3f4c4416-2513-4509-a53e-42731c90637a", "metadata": {}, "source": [ - " **Construct DAP4 URLS:**\n", - " \n", + "## Aggregate data\n", "\n", - " A DAP4 url begins with `dap4` as a scheme. \n", + "The field variables, and the Grid variables.\n", "\n", - " **NOTE**: This is only for xarray and **PyDAP**.\n" + "NOTE: When accessing/streaming Grid data from a separate collection, use a different session object.\n" ] }, { "cell_type": "code", - "execution_count": 25, - "id": "e87755fb-790e-472f-aeac-1f8d7840875a", + "execution_count": null, + "id": "eda49c00-22f2-462c-9276-cf496a7c63cb", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['dap4://opendap.earthdata.nasa.gov/providers/POCLOUD/collections/ECCO%20Ocean%20Temperature%20and%20Salinity%20-%20Monthly%20Mean%20llc90%20Grid%20(Version%204%20Release%204)/granules/OCEAN_TEMPERATURE_SALINITY_mon_mean_1992-01_ECCO_V4r4_native_llc0090?dap4.ce=/THETA;/SALT;/time;/k;/tile;/j;/i',\n", - " 'dap4://opendap.earthdata.nasa.gov/providers/POCLOUD/collections/ECCO%20Ocean%20Temperature%20and%20Salinity%20-%20Monthly%20Mean%20llc90%20Grid%20(Version%204%20Release%204)/granules/OCEAN_TEMPERATURE_SALINITY_mon_mean_1992-02_ECCO_V4r4_native_llc0090?dap4.ce=/THETA;/SALT;/time;/k;/tile;/j;/i',\n", - " 'dap4://opendap.earthdata.nasa.gov/providers/POCLOUD/collections/ECCO%20Ocean%20Temperature%20and%20Salinity%20-%20Monthly%20Mean%20llc90%20Grid%20(Version%204%20Release%204)/granules/OCEAN_TEMPERATURE_SALINITY_mon_mean_1992-03_ECCO_V4r4_native_llc0090?dap4.ce=/THETA;/SALT;/time;/k;/tile;/j;/i',\n", - " 'dap4://opendap.earthdata.nasa.gov/providers/POCLOUD/collections/ECCO%20Ocean%20Temperature%20and%20Salinity%20-%20Monthly%20Mean%20llc90%20Grid%20(Version%204%20Release%204)/granules/OCEAN_TEMPERATURE_SALINITY_mon_mean_1992-04_ECCO_V4r4_native_llc0090?dap4.ce=/THETA;/SALT;/time;/k;/tile;/j;/i']" - ] - }, - "execution_count": 25, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ - "new_urls = [url.replace(\"https\", \"dap4\") + CE for url in granules_urls][:100] # consider only the first 100 urls\n", - "new_urls[:4]" + "session = create_session(session=auth.get_session())" ] }, { - "cell_type": "markdown", - "id": "b8b87a23-0730-4f08-b757-038200fa54ae", + "cell_type": "code", + "execution_count": null, + "id": "c4114758-39a6-46b9-809f-b831f4e70b75", "metadata": {}, + "outputs": [], "source": [ - " **Consolidate all URL Metadata Associated with the Data URL of cloud OPeNDAP URLs**\n", - "\n", - " You can construct a persistent reference to all Cloud OPeNDAP urls for later use!!!! \n" + "Grid_url" ] }, { "cell_type": "code", - "execution_count": 26, - "id": "920caf59-adce-4e9c-bcbf-87ce4a194c13", + "execution_count": null, + "id": "eb3337c8-6617-4ea8-bdda-6877bb78f3fa", "metadata": {}, "outputs": [], "source": [ - "# clear just in case\n", - "my_session.cache.clear()" + "### create an individual dataset with only the variables of interest\n", + "grid_ds = xr.open_dataset(Grid_url, engine='pydap', session=session, chunks={\"k\":1, 'tile':1})\n", + "\n", + "#### Combine the two datasets into a single dataset reference\n", + "nds = xr.merge([ds, grid_ds])\n", + "nds" ] }, { "cell_type": "code", - "execution_count": 27, - "id": "9841212a-5bf6-4e17-984e-4248a88604e3", + "execution_count": null, + "id": "8daa6999-0822-4b1a-893d-6b1689ab03fa", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "datacube has dimensions ['i[0:1:89]', 'j[0:1:89]', 'k[0:1:49]', 'tile[0:1:12]'] , and concat dim: `time`\n", - "CPU times: user 1.25 s, sys: 521 ms, total: 1.77 s\n", - "Wall time: 33.5 s\n" - ] - } - ], - "source": [ - "%%time\n", - "consolidate_metadata(new_urls, my_session, concat_dim='time')" - ] + "outputs": [], + "source": [] }, { "cell_type": "markdown", - "id": "67694cc9-a99d-477d-b2ca-c49c8ef9fc66", + "id": "8b072c13-7f99-45dc-818f-91fc7b437e08", "metadata": {}, "source": [ - "## What happened?\n", - "\n", - " All necessary metadata was fetch from opendap servers, and it can be used and reused by xarray. \n" + "### Stream a year of data with OPeNDAP, subset tiles, and store data locally with Xarray and PyDAP as engine" ] }, { - "cell_type": "markdown", - "id": "c8faf11a-f0a0-4a04-b4e3-2df3b5bbdf08", + "cell_type": "code", + "execution_count": null, + "id": "bdf43e0f-8438-4648-ab51-60454f9e3638", "metadata": {}, + "outputs": [], "source": [ - "## Create a datacube with xarray and pydap as an engine!\n" + "%%time\n", + "nds.isel(time=slice(12), k=0, tile=[2,6,10]).to_netcdf(\"data/ECCOv4_NA.nc4\")" ] }, { - "cell_type": "code", - "execution_count": 31, - "id": "34f9bc4c-b691-48e4-9be5-4019927aeac2", + "cell_type": "markdown", + "id": "9d70f262-3264-4b7a-881f-4a151022d3a7", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "CPU times: user 1.38 s, sys: 193 ms, total: 1.58 s\n", - "Wall time: 1.45 s\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "
<xarray.Dataset> Size: 4GB\n",
-       "Dimensions:  (time: 100, k: 50, tile: 13, j: 90, i: 90)\n",
-       "Coordinates:\n",
-       "  * i        (i) int32 360B 0 1 2 3 4 5 6 7 8 9 ... 81 82 83 84 85 86 87 88 89\n",
-       "  * j        (j) int32 360B 0 1 2 3 4 5 6 7 8 9 ... 81 82 83 84 85 86 87 88 89\n",
-       "  * k        (k) int32 200B 0 1 2 3 4 5 6 7 8 9 ... 41 42 43 44 45 46 47 48 49\n",
-       "  * tile     (tile) int32 52B 0 1 2 3 4 5 6 7 8 9 10 11 12\n",
-       "  * time     (time) datetime64[ns] 800B 1992-01-16T18:00:00 ... 2000-04-16\n",
-       "Data variables:\n",
-       "    SALT     (time, k, tile, j, i) float32 2GB dask.array<chunksize=(1, 1, 1, 90, 90), meta=np.ndarray>\n",
-       "    THETA    (time, k, tile, j, i) float32 2GB dask.array<chunksize=(1, 1, 1, 90, 90), meta=np.ndarray>\n",
-       "Attributes: (12/62)\n",
-       "    acknowledgement:                 This research was carried out by the Jet...\n",
-       "    author:                          Ian Fenty and Ou Wang\n",
-       "    cdm_data_type:                   Grid\n",
-       "    comment:                         Fields provided on the curvilinear lat-l...\n",
-       "    Conventions:                     CF-1.8, ACDD-1.3\n",
-       "    coordinates_comment:             Note: the global 'coordinates' attribute...\n",
-       "    ...                              ...\n",
-       "    time_coverage_duration:          P1M\n",
-       "    time_coverage_end:               1992-02-01T00:00:00\n",
-       "    time_coverage_resolution:        P1M\n",
-       "    time_coverage_start:             1992-01-01T12:00:00\n",
-       "    title:                           ECCO Ocean Temperature and Salinity - Mo...\n",
-       "    uuid:                            f07693e6-4181-11eb-beb3-0cc47a3f44ff
" - ], - "text/plain": [ - " Size: 4GB\n", - "Dimensions: (time: 100, k: 50, tile: 13, j: 90, i: 90)\n", - "Coordinates:\n", - " * i (i) int32 360B 0 1 2 3 4 5 6 7 8 9 ... 81 82 83 84 85 86 87 88 89\n", - " * j (j) int32 360B 0 1 2 3 4 5 6 7 8 9 ... 81 82 83 84 85 86 87 88 89\n", - " * k (k) int32 200B 0 1 2 3 4 5 6 7 8 9 ... 41 42 43 44 45 46 47 48 49\n", - " * tile (tile) int32 52B 0 1 2 3 4 5 6 7 8 9 10 11 12\n", - " * time (time) datetime64[ns] 800B 1992-01-16T18:00:00 ... 2000-04-16\n", - "Data variables:\n", - " SALT (time, k, tile, j, i) float32 2GB dask.array\n", - " THETA (time, k, tile, j, i) float32 2GB dask.array\n", - "Attributes: (12/62)\n", - " acknowledgement: This research was carried out by the Jet...\n", - " author: Ian Fenty and Ou Wang\n", - " cdm_data_type: Grid\n", - " comment: Fields provided on the curvilinear lat-l...\n", - " Conventions: CF-1.8, ACDD-1.3\n", - " coordinates_comment: Note: the global 'coordinates' attribute...\n", - " ... ...\n", - " time_coverage_duration: P1M\n", - " time_coverage_end: 1992-02-01T00:00:00\n", - " time_coverage_resolution: P1M\n", - " time_coverage_start: 1992-01-01T12:00:00\n", - " title: ECCO Ocean Temperature and Salinity - Mo...\n", - " uuid: f07693e6-4181-11eb-beb3-0cc47a3f44ff" - ] - }, - "execution_count": 31, - "metadata": {}, - "output_type": "execute_result" - } - ], "source": [ - "%%time\n", - "ds = xr.open_mfdataset(new_urls, engine='pydap', session=my_session, parallel=True, combine='nested', concat_dim='time', chunks={'tile':1, 'k':1})\n", - "ds" + "### Finally, visualized the downloaded data" ] }, { - "cell_type": "markdown", - "id": "3f4c4416-2513-4509-a53e-42731c90637a", + "cell_type": "code", + "execution_count": null, + "id": "c6e8331b-97d3-446a-871c-eadf8471930e", "metadata": {}, + "outputs": [], "source": [ - "## Download some data\n", - "\n", - "So far, only metadata has been downloaded. Below we plot some data in the NorthAtlantic ocean\n", - "\n", - "\n", - "\n" + "mds = xr.open_dataset(\"data/ECCOv4_NA.nc4\")\n", + "mds" ] }, { "cell_type": "code", - "execution_count": 32, - "id": "eb3337c8-6617-4ea8-bdda-6877bb78f3fa", + "execution_count": null, + "id": "7b2c6fcd-b64c-4572-a46b-f6d167063a15", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "CPU times: user 197 ms, sys: 23.2 ms, total: 221 ms\n", - "Wall time: 5.51 s\n" - ] - }, - { - "data": { - "image/png": "", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ - "%%time\n", - "ds['THETA'].isel(time=0, k=0, tile=2).plot(cmap='RdBu_r', vmin=-4, vmax=30);" + "Variable = [mds['THETA'][0, i, :, :] for i in range(3)]\n", + "clevels = np.linspace(-5, 30, 100)\n", + "cMap='RdBu_r'\n", + "\n", + "ocean_mask = mds[\"Depth\"]>0\n" ] }, { "cell_type": "code", - "execution_count": 33, - "id": "8daa6999-0822-4b1a-893d-6b1689ab03fa", + "execution_count": null, + "id": "fe975589-1241-4b1c-a1f8-2589f409cb14", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "{'long_name': 'Potential temperature ',\n", - " 'units': 'degree_C',\n", - " 'coverage_content_type': 'modelResult',\n", - " 'standard_name': 'sea_water_potential_temperature',\n", - " 'comment': 'Sea water potential temperature is the temperature a parcel of sea water would have if moved adiabatically to sea level pressure. Note: the equation of state is a modified UNESCO formula by Jackett and McDougall (1995), which uses the model variable potential temperature as input assuming a horizontally and temporally constant pressure of $p_0=-g \\rho_{0} z$.',\n", - " 'valid_min': -2.2909388542175293,\n", - " 'valid_max': 36.032955169677734,\n", - " 'origname': 'THETA',\n", - " 'fullnamepath': '/THETA',\n", - " 'Maps': ()}" - ] - }, - "execution_count": 33, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ - "ds['THETA'].isel(time=0, k=0, tile=2).attrs" + "fig, axes = plt.subplots(nrows=2, ncols=2, figsize=(8, 8), gridspec_kw={'hspace':0.001, 'wspace':0.001})\n", + "AXES_NR = [\n", + " axes[1, 1],\n", + "]\n", + "AXES_CAP = [axes[0, 1]]\n", + "AXES_R = [\n", + " axes[1, 0],\n", + "]\n", + "for i in range(len(AXES_NR)):\n", + " ocean_mask.isel(tile=0).plot(ax=AXES_NR[i], cmap=\"Greys_r\", add_colorbar=False)\n", + " Variable[0].where(ocean_mask.isel(tile=0)).plot(ax=AXES_NR[i], levels=clevels, cmap=cMap, add_colorbar=False)\n", + "\n", + "for i in range(len(AXES_CAP)):\n", + " ocean_mask.isel(tile=1).transpose().plot(ax= AXES_CAP[i], cmap=\"Greys_r\", add_colorbar=False, xincrease=False)\n", + " Variable[1].transpose().where(ocean_mask.isel(tile=1)).plot(ax=AXES_CAP[i], levels=clevels, cmap=cMap, add_colorbar=False, xincrease=False)\n", + "\n", + "\n", + "for i in range(len(AXES_R)):\n", + " # AXES_R[i].contourf(Variable[2].transpose()[::-1, :], clevels, cmap=cMap)\n", + " ocean_mask.isel(tile=2).transpose().plot(ax= AXES_R[i], cmap=\"Greys_r\", add_colorbar=False, yincrease=False)\n", + " Variable[2].transpose().where(ocean_mask.isel(tile=2)).plot(ax=AXES_R[i], levels=clevels, cmap=cMap, add_colorbar=False, yincrease=False)\n", + "for ax in np.ravel(axes):\n", + " ax.axis('off')\n", + " plt.setp(ax.get_xticklabels(), visible=False)\n", + " plt.setp(ax.get_yticklabels(), visible=False)\n", + " plt.setp(ax.title, visible=False)\n", + "\n", + "plt.show()" ] }, { "cell_type": "code", "execution_count": null, - "id": "29fbbe3d-f4d0-4c73-ab97-15672011fc2a", + "id": "0012f5b3-1c1f-4f9f-b579-468a6bbc377d", "metadata": {}, "outputs": [], "source": [] @@ -1564,7 +383,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.13" + "version": "3.12.11" } }, "nbformat": 4, diff --git a/binder/GetStarted.ipynb b/binder/GetStarted.ipynb deleted file mode 100644 index c2a0b29..0000000 --- a/binder/GetStarted.ipynb +++ /dev/null @@ -1,222 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "9c7cc75e-6d95-4ffe-8227-25810e878b9a", - "metadata": {}, - "source": [ - " **Getting Started: Setting Earthdata Access**\n", - " \n", - " \n", - "\"drawing\" \n", - " \n", - "\n", - "\n", - " **Requirements**\n", - "1. Go to the [Login Page](https://urs.earthdata.nasa.gov/home) and set up a Username and Password.\n", - "2. Generate a Bearer Token.\n", - "3. Copy the Bearer Token onto clipboard.\n", - "\n", - "\"drawing\" \n", - "\n", - "\n", - "\n", - "\n", - " **Objectives**\n", - "- To demonstrate remote access via token to Earthdata.\n", - "- To store locally the EDL `token` to be used in other notebooks.\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1c0ccde6-3f2e-4bef-9d31-92a48064c64a", - "metadata": {}, - "outputs": [], - "source": [ - "from pydap.client import open_url\n", - "import json\n", - "from getpass import getpass\n", - "from pydap.net import create_session" - ] - }, - { - "cell_type": "markdown", - "id": "51b2528c-a02f-44cf-b826-408da0307dd7", - "metadata": {}, - "source": [ - " **EDL Token**: \n", - "\n", - " The cell below asks to paste your token, taken from your EDL account. No personal information will be displayed" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "2a407105-4897-44bf-827a-db7d5bd94e99", - "metadata": {}, - "outputs": [], - "source": [ - "# This gets the EDL token from the users keyboard.\n", - "edl_token = getpass(\"EDL Token Value: \")" - ] - }, - { - "cell_type": "markdown", - "id": "8c85a8c2-0c00-4ba1-a729-486762146a2a", - "metadata": {}, - "source": [ - " **Approach 1**: **Store Token locally to facilitate import**\n", - "\n", - " The code below will store the `Token Credentials` needed to access EarthData via pydap locally in a file called `token.json`.\n", - "\n", - " Data in `token.json` can now be imported in other notebooks for use when accessing Earthdata via Hyrax in the Cloud / cloud OPeNDAP.\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "f551a7c5-f51b-4cd6-a9a5-3f775ddae827", - "metadata": {}, - "outputs": [], - "source": [ - "credentials = {\"token\": edl_token}\n", - "\n", - "with open('token.json', 'w') as fp:\n", - " json.dump(credentials, fp)" - ] - }, - { - "cell_type": "markdown", - "id": "65dbf552-0996-4fb0-9389-b0945db6352b", - "metadata": {}, - "source": [ - " **Approach 2**: **Dynamically add your token to session**\n", - "\n", - " If you rather not persist your token during the binder session, you can add it to the requests session via **PyDAP**'s built in session creator. You\n", - "will have to do this every time you create a new session. It follows the syntax:\n", - "```python\n", - "from pydap.net import create_session\n", - "\n", - "session_kwargs = {\"token\": \"\"}\n", - "session = create_session(session_kwargs=session_kwargs)\n", - "\n", - "```" - ] - }, - { - "cell_type": "markdown", - "id": "e65d5bb7-6a0b-4306-b6f6-681765f168a7", - "metadata": {}, - "source": [ - " **Approach 3**: **Username/password**\n", - "\n", - " You can also authenticate using your EDL username and password. We recommend creating a `.netrc` document storing your authentication credentials. We do not demonstrate this form of authentication in these tutorials, but you can learn more about these on the official **PyDAP** documentation:\n", - "\n", - "- [How to Authenticate with PyDAP](https://pydap.github.io/pydap/en/notebooks/Authentication.html)\n", - "\n" - ] - }, - { - "cell_type": "markdown", - "id": "88226257-eede-4b80-9954-7821bd88fdf3", - "metadata": {}, - "source": [ - " **Test Access to Verify Authenticated**\n", - "\n", - " We now demonstrate how to import the token data and use it to access data via pure **PyDAP** (one Cloud OPeNDAP URL)\n", - "\n", - " For now, lets look into Sea Surface Temperature data from GHRSST for 2022-08-12. The URL is provided below.\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "25e602ab-541a-45a9-88f4-8bd9a5e8fa18", - "metadata": {}, - "outputs": [], - "source": [ - "# load token json data\n", - "with open('token.json', 'r') as fp:\n", - " token = json.load(fp)\n", - "\n", - "# pass Token Authorization to a new Session.\n", - "my_session = create_session(session_kwargs=token)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "02376ef7-1e87-4aaf-b708-0d249bcbb558", - "metadata": {}, - "outputs": [], - "source": [ - "dataset_url = \"https://opendap.earthdata.nasa.gov/collections/C2036877806-POCLOUD/granules/20220812010000-OSISAF-L3C_GHRSST-SSTsubskin-GOES16-ssteqc_goes16_20220812_010000-v02.0-fv01.0\"\n", - "print (\"dataset_url: \", dataset_url)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "fa983632-de6f-4355-a1ea-9c08477d8714", - "metadata": {}, - "outputs": [], - "source": [ - "%%time\n", - "dataset = open_url(dataset_url, session=my_session, protocol=\"dap4\")" - ] - }, - { - "cell_type": "markdown", - "id": "36079c1f-8ecc-4f4d-a0a2-6c2ea0e1589d", - "metadata": {}, - "source": [ - " **Inspect data without downloading**\n", - "\n", - " The `tree` method from **PyDAP** allows user to inspect all variables available from the dataset, without actually openning the dataset or downloading it into your machine.\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "28551462-d8e5-4eed-877a-0905ef7f8470", - "metadata": {}, - "outputs": [], - "source": [ - "dataset.tree()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "334c32ae-ad14-4d35-96e6-db736d69f0ca", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.11.12" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/binder/Iceberg_drift.ipynb b/binder/Iceberg_drift.ipynb index 11c8a5a..0d4c5ab 100644 --- a/binder/Iceberg_drift.ipynb +++ b/binder/Iceberg_drift.ipynb @@ -27,7 +27,6 @@ "from pydap.client import get_cmr_urls, consolidate_metadata, open_url\n", "import xarray as xr\n", "import datetime as dt\n", - "import json\n", "import glob\n", "import os\n", "import numpy as np\n", @@ -60,19 +59,11 @@ "metadata": {}, "outputs": [], "source": [ - "auth = earthaccess.login(strategy=\"interactive\")\n", - "fs = earthaccess.get_fsspec_https_session()\n", - "session_kwargs = {'token': fs.storage_options['client_kwargs']['headers']['Authorization'][7:]}" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "95feefc9-b239-4c75-8212-8810be0263c4", - "metadata": {}, - "outputs": [], - "source": [ - "my_session = create_session(use_cache=True, session_kwargs=session_kwargs)\n", + "auth = earthaccess.login(strategy=\"interactive\", persist=True) # you will be promted to add your EDL credentials\n", + "\n", + "# pass Token Authorization to a new Session.\n", + "cache_kwargs={'cache_name':'data/Iceberg'}\n", + "my_session = create_session(use_cache=True, session=auth.get_session(), cache_kwargs=cache_kwargs)\n", "my_session.cache.clear()" ] }, @@ -87,110 +78,124 @@ { "cell_type": "code", "execution_count": null, - "id": "6c31d252-3e4a-4b53-95f3-7ec940bcee5e", + "id": "36507ddc-4811-4498-a474-faafaa0a13e6", "metadata": {}, "outputs": [], "source": [ - "oscar_ccid = \"C2098858642-POCLOUD\" # https://podaac.jpl.nasa.gov/dataset/OSCAR_L4_OC_FINAL_V2.0" + "oscar_ccid = \"C2098858642-POCLOUD\" # https://podaac.jpl.nasa.gov/dataset/OSCAR_L4_OC_FINAL_V2.0\n", + "time_range = ['2019-08-16T00:00:00Z', '2020-09-16T00:00:00Z'] # 1 year of data\n", + "\n", + "ocean_urls = get_cmr_urls(ccid=oscar_ccid, time_range=time_range, session=my_session, limit=500)\n", + "print(\"found: \",len(ocean_urls), \"OSCAR urls\")\n", + "\n", + "# Turn urls into DAP4 urls\n", + "# This dataset has dfimensions lon and coordinates lat.\n", + "\n", + "CEs = \"?dap4.ce=/u;/v\"\n", + "\n", + "opendap_OSCAR_urls = [url.replace(\"https\", \"dap4\")+CEs for url in ocean_urls] # \n", + "\n", + "opendap_OSCAR_urls[:2]" ] }, { "cell_type": "markdown", - "id": "770dbf34-f0e4-4aad-95d1-42d3cd1a44e4", + "id": "70dc38d3-301d-4a4c-b704-9309b395d5a4", "metadata": {}, "source": [ - " **Filter data via Temporal Searches**\n" + " **Consolidate metadata**\n", + "\n", + " All URLs belonging to the same Collection share many identical variables and metadata. The following function\n", + "reduces redundant metadata\n" ] }, { "cell_type": "code", "execution_count": null, - "id": "36507ddc-4811-4498-a474-faafaa0a13e6", + "id": "78af6e01-ea53-4338-bfc3-eda8568a8a81", "metadata": {}, "outputs": [], "source": [ - "time_range = ['2019-08-16T00:00:00Z', '2020-09-16T00:00:00Z'] # 1 year of data\n", - "time_range" + "my_session.cache.clear()" ] }, { "cell_type": "code", "execution_count": null, - "id": "33bc012d-7713-43b6-8ce2-6a2e2becc245", + "id": "789554d1-0536-4b85-89b7-98e3e928213c", "metadata": {}, "outputs": [], "source": [ - "ocean_urls = get_cmr_urls(ccid=oscar_ccid, time_range=time_range, session=my_session, limit=500)\n", - "print(\"found: \",len(ocean_urls), \"OSCAR urls\")\n", - "ocean_urls[-1]" - ] - }, - { - "cell_type": "markdown", - "id": "e99107c6-6d04-48ef-a51a-8bc1818700b1", - "metadata": {}, - "source": [ - " **OSCAR data**\n" + "%%time\n", + "consolidate_metadata(opendap_OSCAR_urls, concat_dim='time', session=my_session)" ] }, { "cell_type": "code", "execution_count": null, - "id": "541c1e09-9865-4160-8e55-f949c02e2628", + "id": "609ea526-516e-493d-baeb-85575ab50b48", "metadata": {}, "outputs": [], "source": [ - "# Turn urls into DAP4 urls\n", - "opendap_OSCAR_urls = [url.replace(\"https\", \"dap4\") for url in ocean_urls] # \n", + "## Create\n", + "new_session = create_session(session=auth.get_session())\n", "\n", - "opendap_OSCAR_urls[:2]" + "ds_coords = xr.open_dataset(\n", + " opendap_OSCAR_urls[0]+\";/lat;/lon\", \n", + " engine='pydap',\n", + " session=new_session,\n", + " chunks={'latitude': 300},\n", + ").drop_vars([\"u\", \"v\"])\n", + "ds_coords" ] }, { "cell_type": "markdown", - "id": "70dc38d3-301d-4a4c-b704-9309b395d5a4", + "id": "1fe91987-06fc-4a60-92de-05f9e52509af", "metadata": {}, "source": [ - " **Consolidate metadata**\n", + " **Create Virtually Aggregated Dataset with Xarray**\n", "\n", - " All URLs belonging to the same Collection share many identical variables and metadata. The following function\n", - "reduces redundant metadata\n" + " Now, you can create a virtually aggregated view of the dataset that is ready to analyze with Xarray and Pydap as an engine.\n", + "\n", + "`ds_oscar` will contain all relevant ocean data.\n" ] }, { "cell_type": "code", "execution_count": null, - "id": "789554d1-0536-4b85-89b7-98e3e928213c", + "id": "e418db05-17ab-4888-b09c-6418edcd4fe6", "metadata": {}, "outputs": [], "source": [ "%%time\n", - "consolidate_metadata(opendap_OSCAR_urls, concat_dim='time', set_maps=True, session=my_session)" + "ds_ = xr.open_mfdataset(\n", + " opendap_OSCAR_urls, \n", + " engine='pydap', \n", + " session=my_session, \n", + " combine='nested', \n", + " concat_dim=\"time\", \n", + " chunks={'latitude': 300},\n", + ")\n" ] }, { - "cell_type": "markdown", - "id": "1fe91987-06fc-4a60-92de-05f9e52509af", + "cell_type": "code", + "execution_count": null, + "id": "3c4bf19a-5328-42d4-9def-4b7c00b68e95", "metadata": {}, + "outputs": [], "source": [ - " **Create Virtually Aggregated Dataset with Xarray**\n", - "\n", - " Now, you can create a virtually aggregated view of the dataset that is ready to analyze with Xarray and Pydap as an engine.\n", - "\n", - "`ds_oscar` will contain all relevant ocean data.\n" + "ds_oscar = xr.merge([ds_, ds_coords])" ] }, { "cell_type": "code", "execution_count": null, - "id": "e418db05-17ab-4888-b09c-6418edcd4fe6", + "id": "c2f24afd-dd46-46b7-ad56-91761268b374", "metadata": {}, "outputs": [], - "source": [ - "%%time\n", - "ds_oscar = xr.open_mfdataset(opendap_OSCAR_urls, engine='pydap', session=my_session, combine='nested', concat_dim=\"time\", chunks={'latitude': 300})\n", - "ds_oscar" - ] + "source": [] }, { "cell_type": "markdown", @@ -215,7 +220,7 @@ "outputs": [], "source": [ "ds_oscar['lon'], ds_oscar['lat'] = ds_oscar['lon'].load(), ds_oscar['lat'].load()\n", - "ds_oscar = ds_oscar.rename_vars({'lon':'longitude', 'lat':'latitude'}).set_index(longitude='longitude').set_index(latitude='latitude').drop_vars(['ug', 'vg'])" + "ds_oscar = ds_oscar.rename_vars({'lon':'longitude', 'lat':'latitude'}).set_index(longitude='longitude').set_index(latitude='latitude')" ] }, { @@ -269,7 +274,7 @@ "outputs": [], "source": [ "%%time\n", - "ds_oscar.isel(latitude=slice(40, 300)).to_netcdf(\"./data/Oscar_data.nc\")" + "ds_oscar.isel(latitude=slice(40, 300)).to_netcdf(\"./data/Oscar_data.nc4\")" ] }, { @@ -321,7 +326,7 @@ "metadata": {}, "outputs": [], "source": [ - "oscar_path = './data/Oscar_data.nc'" + "oscar_path = './data/Oscar_data.nc4'" ] }, { @@ -511,7 +516,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.13" + "version": "3.12.11" } }, "nbformat": 4, diff --git a/binder/MERRA-2_Access.ipynb b/binder/MERRA-2_Access.ipynb index 391d779..560c0a7 100644 --- a/binder/MERRA-2_Access.ipynb +++ b/binder/MERRA-2_Access.ipynb @@ -9,24 +9,26 @@ "\n", "\n", " **About the \"Modern-Era Retrospective analysis for Research and Applications\" Version 2 [MERRA-2](https://gmao.gsfc.nasa.gov/reanalysis/MERRA-2/docs/) data**\n", - "1. Assimilates observation types not available to its predecessor, MERRA, and includes updates to the Goddard Earth Observing System (GEOS) model and analysis scheme so as to provide a viable ongoing climate analysis beyond MERRA’s terminus.\n", - "2. The Modern-Era Retrospective Analysis for Research and Applications, version 2 (MERRA-2), is the latest atmospheric reanalysis of the modern satellite era produced by NASA’s Global Modeling and Assimilation Office (GMAO).\n", - "3. Other improvements in the quality of MERRA-2 compared with MERRA include the reduction of some spurious trends and jumps related to changes in the observing system and reduced biases and imbalances in aspects of the water cycle.\n", + "1. Assimilates observation types not available to its predecessor, MERRA, and includes updates to the Goddard Earth Observing System (GEOS) model and analysis scheme so as to provide a viable ongoing climate analysis beyond MERRA’s terminus.\n", + "2. The Modern-Era Retrospective Analysis for Research and Applications, version 2 (MERRA-2), is the latest atmospheric reanalysis of the modern satellite era produced by NASA’s Global Modeling and Assimilation Office (GMAO).\n", + "3. Other improvements in the quality of MERRA-2 compared with MERRA include the reduction of some spurious trends and jumps related to changes in the observing system and reduced biases and imbalances in aspects of the water cycle.\n", "\n", "**Source**: https://doi.org/10.1175/JCLI-D-16-0758.1\n", "\n", "\n", "\n", "**Requirements**\n", - "1. Have a Bearer Token for EarthData in the Cloud (See `GetStarted` Notebook)\n", - "2. Upload the Bearer Token from local file `token.json`\n", + "1. Have a Bearer Token for EarthData in the Cloud \n", + "2. Knowledge of the Collection Concept ID\n", "\n", - "1. Or alternatively, use earthaccess to recover the token interactively.\n" + "\n", + "\n", + "`Author`: Miguel Jimenez-Urias, '25" ] }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "id": "0cb3a702-a107-4915-8073-70f888457d60", "metadata": {}, "outputs": [], @@ -45,17 +47,13 @@ "id": "1483a0af-00db-4e55-bdd8-6421f092819c", "metadata": {}, "source": [ - " **Import Token Authorization and create Session**\n", - " \n", - "\n", - "\n", - " Here we use the Bearer Token to create an authenticated session. The Bearer token can be defined and stored locally, as described in the `GetStarted` Notebook. In the following scenario, we will make of earthaccess\n", + " **Authenticate**\n", "\n" ] }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "id": "1d34b47b-ee24-4156-8b50-6921c930f3b2", "metadata": {}, "outputs": [], @@ -63,7 +61,7 @@ "auth = earthaccess.login(strategy=\"interactive\", persist=True) # you will be promted to add your EDL credentials\n", "\n", "# pass Token Authorization to a new Session.\n", - "cache_kwargs={'cache_name':'MERRA2'}\n", + "cache_kwargs={'cache_name':'data/MERRA2'}\n", "my_session = create_session(use_cache=True, session=auth.get_session(), cache_kwargs=cache_kwargs)\n", "my_session.cache.clear()" ] @@ -78,7 +76,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "id": "2df161ab-be74-495c-abe4-68162b0bb309", "metadata": {}, "outputs": [], @@ -104,44 +102,17 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "id": "c34dccba-837c-4cd9-9edd-5d7c78883d3b", "metadata": {}, "outputs": [], "source": [ - "time_range=[dt.datetime(2023, 1, 1), dt.datetime(2023, 1, 31)] # One month of data" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "85e8921a-8a93-4866-8eb8-9a55c88622fc", - "metadata": {}, - "outputs": [], - "source": [ - "url_limits = 100 # controls the max number of urls returns. Default is 50" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "8dd2e5a7-34f3-4465-8222-03220c611c93", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "31" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ + "time_range=[dt.datetime(2023, 1, 1), dt.datetime(2023, 1, 31)] # One month of data\n", + "\n", + "url_limits = 100 # controls the max number of urls returns. Default is 50\n", + "\n", "urls = get_cmr_urls(doi=merra2_doi,time_range=time_range, limit=url_limits) # you can incread the limit of results\n", - "len(urls)" + "print(\"We found: \", len(urls), \" total Cloud OPeNDAP URLS associated with this collection!\")" ] }, { @@ -156,22 +127,10 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "id": "0d810290-fe77-47f8-a3e7-e921fcdc6a4d", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['https://opendap.earthdata.nasa.gov/collections/C1276812863-GES_DISC/granules/M2T1NXSLV.5.12.4%3AMERRA2_400.tavg1_2d_slv_Nx.20230101.nc4',\n", - " 'https://opendap.earthdata.nasa.gov/collections/C1276812863-GES_DISC/granules/M2T1NXSLV.5.12.4%3AMERRA2_400.tavg1_2d_slv_Nx.20230102.nc4']" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "urls[:2]" ] @@ -188,46 +147,23 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "id": "593be3c9-79d1-4e64-9bda-a086594ad6d4", "metadata": {}, "outputs": [], "source": [ - "new_urls = [url.replace(\"https\", \"dap4\") for url in urls] # " - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "id": "49553a6e-4a56-4157-add7-99c5207a4d45", - "metadata": {}, - "outputs": [], - "source": [ - "pyds = open_url(new_urls[0], session=my_session)" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "id": "14e9a463-2516-4608-b75a-4bed21647955", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "All variables within dataset: \n", - " ['lon', 'time', 'lat', 'TROPPB', 'T2M', 'TQL', 'T500', 'TOX', 'U2M', 'U850', 'PS', 'V850', 'OMEGA500', 'H250', 'Q250', 'T2MDEW', 'PBLTOP', 'V250', 'CLDPRS', 'V50M', 'Q500', 'DISPH', 'H1000', 'TO3', 'TS', 'T10M', 'TROPPT', 'TQI', 'SLP', 'TROPT', 'U250', 'Q850', 'ZLCL', 'TQV', 'V2M', 'T250', 'TROPQ', 'V10M', 'H850', 'T850', 'U50M', 'U10M', 'QV2M', 'CLDTMP', 'TROPPV', 'H500', 'V500', 'T2MWET', 'U500', 'QV10M']\n" - ] - } - ], - "source": [ + "# Make URL a DAP4 URL\n", + "\n", + "new_urls = [url.replace(\"https\", \"dap4\") for url in urls] \n", + "\n", + "pyds = open_url(new_urls[0], session=my_session)\n", + "\n", "print(\"All variables within dataset: \\n\", list(pyds.variables()))" ] }, { "cell_type": "code", - "execution_count": 11, + "execution_count": null, "id": "d73f6cc3-702f-4a73-b9f9-001979b324b7", "metadata": {}, "outputs": [], @@ -235,28 +171,8 @@ "Keep_vars = [\"/T2M\", \"/U2M\", \"/V2M\", \"/SLP\"] # this are the variables we want\n", "dims = list(set([dim for var in Keep_vars for dim in pyds[var].dims])) # retain their dimensions\n", "Keep_vars += dims\n", - "CE=\"?dap4.ce=\" + (';').join(Keep_vars) # need to add this to each url" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "id": "c124d538-ff65-4ae5-9888-075795205cb2", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['dap4://opendap.earthdata.nasa.gov/collections/C1276812863-GES_DISC/granules/M2T1NXSLV.5.12.4%3AMERRA2_400.tavg1_2d_slv_Nx.20230101.nc4?dap4.ce=/T2M;/U2M;/V2M;/SLP;/lon;/time;/lat',\n", - " 'dap4://opendap.earthdata.nasa.gov/collections/C1276812863-GES_DISC/granules/M2T1NXSLV.5.12.4%3AMERRA2_400.tavg1_2d_slv_Nx.20230102.nc4?dap4.ce=/T2M;/U2M;/V2M;/SLP;/lon;/time;/lat']" - ] - }, - "execution_count": 12, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ + "CE=\"?dap4.ce=\" + (';').join(Keep_vars) # need to add this to each url\n", + "\n", "opendap_urls = [url + CE for url in new_urls]\n", "opendap_urls[:2]" ] @@ -274,7 +190,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": null, "id": "72b63320-37b3-417b-8694-ea975b014982", "metadata": {}, "outputs": [], @@ -284,68 +200,13 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": null, "id": "7d0296c5-0d1a-4af1-8e14-32d1e26acd0a", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "datacube has dimensions ['lat[0:1:360]', 'lon[0:1:575]'] , and concat dim: `time`\n", - "CPU times: user 577 ms, sys: 239 ms, total: 816 ms\n", - "Wall time: 22.8 s\n" - ] - } - ], + "outputs": [], "source": [ "%%time\n", - "consolidate_metadata(opendap_urls, concat_dim='time', session=my_session, set_maps=True)" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "id": "9e79d70e-6859-4a0c-a016-03da3dd80667", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "63" - ] - }, - "execution_count": 15, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "len(my_session.cache.urls())" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "id": "8b273280-1a45-4af1-ab91-5f25ba4496ce", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['https://opendap.earthdata.nasa.gov/collections/C1276812863-GES_DISC/granules/M2T1NXSLV.5.12.4%3AMERRA2_400.tavg1_2d_slv_Nx.20230101.nc4.dap?dap4.ce=lat%5B0%3A1%3A360%5D%3Blon%5B0%3A1%3A575%5D&dap4.checksum=true',\n", - " 'https://opendap.earthdata.nasa.gov/collections/C1276812863-GES_DISC/granules/M2T1NXSLV.5.12.4%3AMERRA2_400.tavg1_2d_slv_Nx.20230101.nc4.dap?dap4.ce=time%5B0%3A1%3A23%5D&dap4.checksum=true',\n", - " 'https://opendap.earthdata.nasa.gov/collections/C1276812863-GES_DISC/granules/M2T1NXSLV.5.12.4%3AMERRA2_400.tavg1_2d_slv_Nx.20230101.nc4.dmr?dap4.ce=%2FT2M%3B%2FU2M%3B%2FV2M%3B%2FSLP%3B%2Flon%3B%2Ftime%3B%2Flat',\n", - " 'https://opendap.earthdata.nasa.gov/collections/C1276812863-GES_DISC/granules/M2T1NXSLV.5.12.4%3AMERRA2_400.tavg1_2d_slv_Nx.20230102.nc4.dap?dap4.ce=time%5B0%3A1%3A23%5D&dap4.checksum=true']" - ] - }, - "execution_count": 17, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "my_session.cache.urls()[:4]" + "consolidate_metadata(opendap_urls, concat_dim='time', session=my_session)" ] }, { @@ -360,1005 +221,20 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": null, "id": "6bddeee4-564d-4cea-ac12-74a0f7a5ea60", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "CPU times: user 412 ms, sys: 68.1 ms, total: 480 ms\n", - "Wall time: 563 ms\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "
<xarray.Dataset> Size: 5GB\n",
-       "Dimensions:  (time: 744, lat: 361, lon: 576)\n",
-       "Coordinates:\n",
-       "  * lon      (lon) float64 5kB -180.0 -179.4 -178.8 -178.1 ... 178.1 178.8 179.4\n",
-       "  * time     (time) datetime64[ns] 6kB 2023-01-01T00:30:00 ... 2023-01-31T23:...\n",
-       "  * lat      (lat) float64 3kB -90.0 -89.5 -89.0 -88.5 ... 88.5 89.0 89.5 90.0\n",
-       "Data variables:\n",
-       "    T2M      (time, lat, lon) float64 1GB dask.array<chunksize=(1, 361, 576), meta=np.ndarray>\n",
-       "    U2M      (time, lat, lon) float64 1GB dask.array<chunksize=(1, 361, 576), meta=np.ndarray>\n",
-       "    SLP      (time, lat, lon) float64 1GB dask.array<chunksize=(1, 361, 576), meta=np.ndarray>\n",
-       "    V2M      (time, lat, lon) float64 1GB dask.array<chunksize=(1, 361, 576), meta=np.ndarray>\n",
-       "Attributes: (12/31)\n",
-       "    History:                           Original file generated: Wed Jan 11 21...\n",
-       "    Comment:                           GMAO filename: d5124_m2_jan10.tavg1_2d...\n",
-       "    Filename:                          MERRA2_400.tavg1_2d_slv_Nx.20230101.nc4\n",
-       "    Conventions:                       CF-1\n",
-       "    Institution:                       NASA Global Modeling and Assimilation ...\n",
-       "    References:                        http://gmao.gsfc.nasa.gov\n",
-       "    ...                                ...\n",
-       "    identifier_product_doi:            10.5067/VJAFPLI1CSIV\n",
-       "    RangeBeginningDate:                2023-01-01\n",
-       "    RangeBeginningTime:                00:00:00.000000\n",
-       "    RangeEndingDate:                   2023-01-01\n",
-       "    RangeEndingTime:                   23:59:59.000000\n",
-       "    created:                           2025-01-07T19:36:33Z
" - ], - "text/plain": [ - " Size: 5GB\n", - "Dimensions: (time: 744, lat: 361, lon: 576)\n", - "Coordinates:\n", - " * lon (lon) float64 5kB -180.0 -179.4 -178.8 -178.1 ... 178.1 178.8 179.4\n", - " * time (time) datetime64[ns] 6kB 2023-01-01T00:30:00 ... 2023-01-31T23:...\n", - " * lat (lat) float64 3kB -90.0 -89.5 -89.0 -88.5 ... 88.5 89.0 89.5 90.0\n", - "Data variables:\n", - " T2M (time, lat, lon) float64 1GB dask.array\n", - " U2M (time, lat, lon) float64 1GB dask.array\n", - " SLP (time, lat, lon) float64 1GB dask.array\n", - " V2M (time, lat, lon) float64 1GB dask.array\n", - "Attributes: (12/31)\n", - " History: Original file generated: Wed Jan 11 21...\n", - " Comment: GMAO filename: d5124_m2_jan10.tavg1_2d...\n", - " Filename: MERRA2_400.tavg1_2d_slv_Nx.20230101.nc4\n", - " Conventions: CF-1\n", - " Institution: NASA Global Modeling and Assimilation ...\n", - " References: http://gmao.gsfc.nasa.gov\n", - " ... ...\n", - " identifier_product_doi: 10.5067/VJAFPLI1CSIV\n", - " RangeBeginningDate: 2023-01-01\n", - " RangeBeginningTime: 00:00:00.000000\n", - " RangeEndingDate: 2023-01-01\n", - " RangeEndingTime: 23:59:59.000000\n", - " created: 2025-01-07T19:36:33Z" - ] - }, - "execution_count": 18, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "%%time\n", - "ds = xr.open_mfdataset(opendap_urls, engine='pydap', session=my_session, combine='nested', concat_dim=\"time\", chunks={\"time\":1})\n", + "ds = xr.open_mfdataset(\n", + " opendap_urls, \n", + " engine='pydap', \n", + " session=my_session, \n", + " combine='nested', \n", + " concat_dim=\"time\", \n", + " chunks={\"time\":1}\n", + ")\n", "ds" ] }, @@ -1374,29 +250,10 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": null, "id": "a6f39ca9-89a6-422d-ae3d-ad32a3f89933", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "CPU times: user 182 ms, sys: 30.7 ms, total: 213 ms\n", - "Wall time: 5.54 s\n" - ] - }, - { - "data": { - "image/png": "", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "%%time\n", "fig, ax = plt.subplots(figsize=(12, 6))\n", @@ -1464,7 +321,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.13" + "version": "3.12.11" } }, "nbformat": 4, diff --git a/binder/OSCAR.ipynb b/binder/OSCAR.ipynb index 9fe2df9..bd681fa 100644 --- a/binder/OSCAR.ipynb +++ b/binder/OSCAR.ipynb @@ -38,7 +38,8 @@ "import datetime as dt\n", "import numpy as np\n", "import json\n", - "import matplotlib.pyplot as plt" + "import matplotlib.pyplot as plt\n", + "import earthaccess" ] }, { @@ -46,11 +47,11 @@ "id": "b766c90b-d699-4963-8c10-686b9b8cb6be", "metadata": {}, "source": [ - " **Import Token Authorization and create Session**\n", + " **Authenticate and create Session**\n", " \n", "\n", "\n", - " Here we use the Bearer Token to create an authenticated session. The Bearer token should be stored on a local json file, after completed the `GetStarted` Notebook.\n", + " Here we use the Bearer Token from our EDL account.\n", "\n" ] }, @@ -61,12 +62,11 @@ "metadata": {}, "outputs": [], "source": [ - "# load token json data\n", - "with open('token.json', 'r') as fp:\n", - " token = json.load(fp)\n", + "auth = earthaccess.login(strategy=\"interactive\", persist=True) # you will be promted to add your EDL credentials\n", "\n", "# pass Token Authorization to a new Session.\n", - "my_session = create_session(use_cache=True, session_kwargs=token)\n", + "cache_kwargs={'cache_name':'data/OSCAR'}\n", + "my_session = create_session(use_cache=True, session=auth.get_session(), cache_kwargs=cache_kwargs)\n", "my_session.cache.clear()" ] }, @@ -81,67 +81,19 @@ { "cell_type": "code", "execution_count": null, - "id": "5f18fa09-19c5-4f5d-9465-1f2150956a25", + "id": "4446cb20-bb1b-403f-ac7c-d63c27f67fcb", "metadata": {}, "outputs": [], "source": [ - "oscar_ccid = \"C2098858642-POCLOUD\"" - ] - }, - { - "cell_type": "markdown", - "id": "77c0b18a-f7c0-409b-bc70-6a73c4592ac8", - "metadata": {}, - "source": [ - " **Filter data via Temporal Searches**\n", + "oscar_ccid = \"C2098858642-POCLOUD\"\n", "\n", - " Users can specify date ranges NASA's CMR can \n", + "time_range=[dt.datetime(2020, 1, 1), dt.datetime(2020, 1, 31)] # One month of data\n", "\n", - " There are two ways to specify formats.\n", + "url_limits = 100 # controls the max number of urls returns. Default is 50\n", "\n", - " 1. Using `python`'s datetime package. It follows the `year-month-day` formatting\n", - " 2. A string with the following format: YYYY-MM-DDTHH:MM:SSZ\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "4446cb20-bb1b-403f-ac7c-d63c27f67fcb", - "metadata": {}, - "outputs": [], - "source": [ - "time_range=[dt.datetime(2020, 1, 1), dt.datetime(2020, 1, 31)] # One month of data" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "65d9242d-b7c3-4068-b38b-c1d9232cbfcd", - "metadata": {}, - "outputs": [], - "source": [ - "url_limits = 100 # controls the max number of urls returns. Default is 50" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "77302c47-af7d-4376-aba5-463f5547dfbb", - "metadata": {}, - "outputs": [], - "source": [ "urls = get_cmr_urls(ccid=oscar_ccid,time_range=time_range, limit=url_limits) # you can incread the limit of results\n", - "len(urls)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "b9b0af05-20ef-4028-a435-e13836606d57", - "metadata": {}, - "outputs": [], - "source": [ - "dap4_urls = [url.replace(\"https\", \"dap4\") for url in urls]" + "dap4_urls = [url.replace(\"https\", \"dap4\") for url in urls]\n", + "print(\"We found: \", len(dap4_urls), \" total Cloud OPeNDAP URLS associated with this collection!\")" ] }, { @@ -166,7 +118,7 @@ "outputs": [], "source": [ "%%time\n", - "consolidate_metadata(dap4_urls, concat_dim='time', safe_mode=False, set_maps=True, session=my_session)" + "consolidate_metadata(dap4_urls, concat_dim='time', set_maps=True, session=my_session)" ] }, { @@ -187,7 +139,13 @@ "outputs": [], "source": [ "%%time\n", - "ds = xr.open_mfdataset(dap4_urls, engine='pydap', session=my_session, combine='nested', concat_dim=\"time\")\n", + "ds = xr.open_mfdataset(\n", + " dap4_urls, \n", + " engine='pydap', \n", + " session=my_session, \n", + " combine='nested', \n", + " concat_dim=\"time\",\n", + ")\n", "ds" ] }, @@ -433,7 +391,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.12" + "version": "3.12.11" } }, "nbformat": 4, diff --git a/binder/earthaccess.ipynb b/binder/earthaccess.ipynb deleted file mode 100644 index 0d98fc0..0000000 --- a/binder/earthaccess.ipynb +++ /dev/null @@ -1,355 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "6e4a86e9-cd1d-445b-a7c9-89b2466414d9", - "metadata": {}, - "source": [ - " **Using Earthaccess for access data via Hyrax's DMR++** \n", - "\n", - "\n", - "\n", - "**Requirements**\n", - "1. An active EDL account.\n", - "\n", - " `earthaccess` has their own way to authenticate that makes use of your EDL logging information.\n", - "\n", - " **OPeNDAP, DMR++ and VirtualiZarr**\n", - "\n", - "\n", - " This notebook makes use of [earthacess](https://earthaccess.readthedocs.io/en/latest/), [VirtualiZarr](https://virtualizarr.readthedocs.io/en/latest/) and [xarray](https://docs.xarray.dev/en/stable/) to access NASA's cloud files currently on `S3`. [earthacess](https://earthaccess.readthedocs.io/en/latest/) has `built-in` support for accessing OPeNDAP in the Cloud's `DMR++` metadata directly, as opposed to OPeNDAP's Hyrax data server. DMR++ is then to Zarr metadata via [VirtualiZarr](https://virtualizarr.readthedocs.io/en/latest/), providing a huge performance boost for running both locally, or in a Cloud compute environment.\n", - "\n", - " **open_virtual_dataset**: \n", - "\n", - "[earthacess](https://earthaccess.readthedocs.io/en/latest/) allows data users to convert Hyrax's in the Cloud DMR++ metadata into cloud optimized reference files for the data stored in the cloud. THis is done via:\n", - "\n", - "- `earthaccess.open_virtual_dataset`\n", - "- `earthaccess.open_virtual_mfdataset`\n", - "\n", - "\n", - " **access=\"indirect\" vs access=\"direct\"**: \n", - "\n", - "\n", - " This tutorial loads data over `https` (`access=\"indirect\"`). However, there is a **significant speed improvement** when using these functions in-cloud and enabling `access=\"direct\"`. This is the case when running this notebook over managed cloud JupyterHubs like [NASA VEDA](https://www.earthdata.nasa.gov/dashboard/) or [2i2c Openscapes](https://workshop.openscapes.2i2c.cloud/hub/login?next=%2Fhub%2F). This is because the data is streamed directly from cloud storage to cloud compute.\n", - "\n", - " **Objectives**\n", - " \n", - " \n", - "- Demonstrate how to use [earthacess](https://earthaccess.readthedocs.io/en/latest/) to query datasets that are aviable via `OPeNDAP` in the Cloud.\n", - "- Demonstrate the use of [earthacess](https://earthaccess.readthedocs.io/en/latest/) to create a virtually aggregated xarray data cube, making use of the Zarr metadata created from DMR++.\n", - "- Demonstrate an advanced workflow for storing virtual reference as a Kerchunk object, for later use.\n", - "\n", - "\n", - " **WARNING**: \n", - "\n", - " This feature is current experimental and may change in the future. This feature relies on `NASA` / `OPeNDAP` **DMR++** metadata files which may not always be present for your dataset and you may get a `FileNotFoundError`.\n", - "\n", - "\n", - "\n", - "\n", - " **Additional References**: \n", - "\n", - "\n", - "* This tutorial largely follows: [Cloud optimized access to NASA data with earthaccess and virtualizarr](https://earthaccess.readthedocs.io/en/latest/tutorials/dmrpp-virtualizarr/), available on [earthacess](https://earthaccess.readthedocs.io/en/latest/)'s documentation.\n", - "\n", - "* [Nag, Ayush, Gallagher, James. (August, 2024). VirtualiZarr and DMR++. Zenodo. https://doi.org/10.5281/zenodo.13176038](https://doi.org/10.5281/zenodo.13176038).\n", - "\n", - "* [Gallagher, James, Yang, Kent, Lee, Hyokyung. (November, 2024). High-Performance Access to Archival Data Stored in HDF4 and HDF5 on Cloud Object Stores Without Reformatting the Files. Zenodo. https://doi.org/10.5281/zenodo.14232491](https://doi.org/10.5281/zenodo.14232491)." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "aac8388c-02aa-4026-93e8-05b3c5cb7c7c", - "metadata": {}, - "outputs": [], - "source": [ - "import earthaccess\n", - "import xarray as xr" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1e556c85-3316-4cb2-9b37-ba9114d38e8f", - "metadata": {}, - "outputs": [], - "source": [ - "print(\"`earthaccess` version: \", earthaccess.__version__)" - ] - }, - { - "cell_type": "markdown", - "id": "e157efb8-e38f-4bbf-a8a2-0754e2cd3821", - "metadata": {}, - "source": [ - "### Lets authenticate!\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "e2ee4402-3ca4-44f1-9e33-ac91a7674ca4", - "metadata": {}, - "outputs": [], - "source": [ - "auth = earthaccess.login(strategy=\"interactive\", persist=True)" - ] - }, - { - "cell_type": "markdown", - "id": "39878aed-fb53-49d4-9ea0-ef3bea712f5d", - "metadata": {}, - "source": [ - "### NASA JPL Multiscale Ultrahigh Resolution (MUR) Sea Surface Temperature (SST) dataset - 0.01 degree resolution\n", - "\n", - "We now search for NASA JPL MUR SST data. For that we need\n", - "- temporal range\n", - "- Short Name of collection" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "8d235f07-39b9-4736-8500-1c62ada34681", - "metadata": {}, - "outputs": [], - "source": [ - "results = earthaccess.search_data(\n", - " temporal=(\"2010-01-01\", \"2010-01-31\"), short_name=\"MUR-JPL-L4-GLOB-v4.1\"\n", - ")\n", - "len(results)" - ] - }, - { - "cell_type": "markdown", - "id": "274c89e7-ea70-4210-b497-e64758511f4b", - "metadata": {}, - "source": [ - "### access DMR++ and create a virtual xarray object\n", - "we set:\n", - "- `access=\"indirect\"`: Running this notebook on binder or local machine.\n", - "- `access=\"direct\"`. Use this when runnnig this notebook on an EC2 instance to make the best use of DMR++, xarray, and DASK.\n", - "\n", - "\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "46871b1b-575d-49a0-abac-8ecb296ad6de", - "metadata": {}, - "outputs": [], - "source": [ - "%%time\n", - "mur = earthaccess.open_virtual_mfdataset(\n", - " results,\n", - " access=\"indirect\",\n", - " load=True, # This means Dimensions are loaded into memory\n", - " concat_dim=\"time\",\n", - " coords=\"all\",\n", - " compat=\"override\",\n", - " combine_attrs=\"drop_conflicts\",\n", - ")\n", - "mur" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "7007bc94-a167-490d-bd02-2e40aac9a4c8", - "metadata": {}, - "outputs": [], - "source": [ - "print(\"This created a virtual reference pointing to \", mur.nbytes/1e9, \"GBs of data on the cloud!\")" - ] - }, - { - "cell_type": "markdown", - "id": "35120686-2211-45c6-a355-47e5d17df483", - "metadata": {}, - "source": [ - "## We now plot some data\n", - "\n", - " This will actually trigger download / computation of the selected dataset\n", - "\n", - " **NOTE**:\n", - "\n", - "* The dimensions are loaded into memory. We can manipulate them \n", - "* Dimensions are coordinates (not always the case). So we can subset by spatial lat/lon values!!\n", - "* We can also subset by time (time is a dimension)\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "88f3f96b-7c08-4f8c-977c-f074c05cdc88", - "metadata": {}, - "outputs": [], - "source": [ - "%%time\n", - "spatial_subset = mur.isel(time=0).sel(lat=slice(20, 45), lon=slice(-95, -50))\n", - "spatial_subset" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1e549f1b-17d6-48c9-88a5-0636796c1fc2", - "metadata": {}, - "outputs": [], - "source": [ - "%%time\n", - "spatial_subset[\"analysed_sst\"].plot.pcolormesh(x=\"lon\", y=\"lat\", cmap=\"RdBu_r\", figsize=(8, 4));" - ] - }, - { - "cell_type": "markdown", - "id": "40e1d34b-0c08-4d89-867b-a3089f3d6a16", - "metadata": {}, - "source": [ - "# A faster workflow:\n", - "\n", - "- Set `Load=False`\n", - "\n", - " This creates a virtual reference with only Chunk Manifets.\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "8a511969-21c2-4748-a9f5-0d9fdd9706f7", - "metadata": {}, - "outputs": [], - "source": [ - "%%time\n", - "mur_vd = earthaccess.open_virtual_mfdataset(\n", - " results,\n", - " access=\"indirect\",\n", - " load=False,\n", - " concat_dim=\"time\",\n", - " coords=\"all\",\n", - " compat=\"override\",\n", - " combine_attrs=\"drop_conflicts\",\n", - ")\n", - "mur_vd" - ] - }, - { - "cell_type": "markdown", - "id": "cde97235-d6a3-4df0-a480-c3402d8e6e7f", - "metadata": {}, - "source": [ - "## Example of what's inside this virtual dataset\n", - "\n", - "\n", - "- `earthaccess` parses OPeNDAP Hyrax's in the Cloud `DMR++`, extracting Chunk References\n", - "- Creates, using `VirtualiZarr`'s API, a virtual Zarray.\n", - "- Can then store the Kerchunk Reference" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d0b32a5f-3ea6-4ada-b54a-278273fe58d4", - "metadata": {}, - "outputs": [], - "source": [ - "print(mur_vd.analysed_sst.data.zarray)\n", - "print(\"\\n\")\n", - "print(mur_vd.analysed_sst.data.manifest.dict()[\"0.0.1\"])" - ] - }, - { - "cell_type": "markdown", - "id": "dab24577-259b-47d0-8d02-ffa50911ca3d", - "metadata": {}, - "source": [ - "## Store as Kerchunk Json" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "bb391681-951a-4831-81bf-e38148af5ae7", - "metadata": {}, - "outputs": [], - "source": [ - "mur_vd.virtualize.to_kerchunk(filepath=\"mur_kerchunk.json\", format=\"json\")" - ] - }, - { - "cell_type": "markdown", - "id": "e7cb7a1d-6417-40cd-a217-9dc891157934", - "metadata": {}, - "source": [ - "## Load using xarray and Kerchunk as engine" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "a4cf7050-325e-4cbf-8697-198c0cdcdee6", - "metadata": {}, - "outputs": [], - "source": [ - "%%time\n", - "fs = earthaccess.get_fsspec_https_session()\n", - "ds = xr.open_dataset(\n", - " \"reference://\",\n", - " engine=\"zarr\",\n", - " chunks={},\n", - " backend_kwargs={\n", - " \"consolidated\": False,\n", - " \"storage_options\": {\n", - " \"fo\": \"mur_kerchunk.json\",\n", - " \"remote_protocol\": fs.protocol,\n", - " \"remote_options\": fs.storage_options,\n", - " },\n", - " },\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "8f9e0237-a41f-4895-8017-6649b0c4bab7", - "metadata": {}, - "outputs": [], - "source": [ - "ds" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "78da4d72-34d8-40da-b74b-111600954ff7", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.11.12" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/binder/environment.yml b/binder/environment.yml index 48ff512..2872220 100644 --- a/binder/environment.yml +++ b/binder/environment.yml @@ -4,12 +4,12 @@ channels: dependencies: - pip - numpy -- python = 3.11 +- python = 3.12 - netCDF4 - matplotlib - jupyterlab - cartopy -# - xarray +- xarray - earthaccess - tqdm - dask @@ -18,7 +18,6 @@ dependencies: - xoak - pip: - git+https://github.com/pydap/pydap.git - - git+https://github.com/Mikejmnez/xarray.git@pydap4_scale - jupyter-contrib-nbextensions - ipywidgets - widgetsnbextension diff --git a/binder/on-premOPeNDAP.ipynb b/binder/on-premOPeNDAP.ipynb index 86fa6d9..6691665 100644 --- a/binder/on-premOPeNDAP.ipynb +++ b/binder/on-premOPeNDAP.ipynb @@ -11,29 +11,31 @@ "\n", "\n", "**Requirements**\n", - "1. Have a Bearer Token for EarthData in the Cloud (See `GetStarted` Notebook).\n", - "2. Upload the Bearer Token from local file `token.json`\n", + "1. Have a Bearer Token for EarthData in the Cloud (See `GetStarted` Notebook).\n", + "2. Upload the Bearer Token from local file `token.json`\n", "\n", "\n", - " For completion, this notebook acessess data from PACE via OPeNDAP on-premisses server. The workflow is identical to accessing data on Hyrax in the Cloud.\n", + " For completion, this notebook acessess data from PACE via OPeNDAP on-premisses server. The workflow is identical to accessing data on Hyrax in the Cloud.\n", "\n", "\n", " **Objectives**\n", " \n", " \n", - "- Demostrate how to use NASA's `Common Metadata Repository` ([CMR](https://cmr.earthdata.nasa.gov/search)) to find `OPeNDAP URLS` associated with a collection.\n", - "- Demonstrate the use of `Constraint Expressions` to reduce metadata during Virtual Dataset creation\n", - "- Use **PyDAP**'s `consolidate_metadata` to accelerate data cube creation via `xarray.open_mfdataset`.\n", - "- Demonstrate an advanced workflow for remote data access and plotting of **Level 3** PACE data concerning surface `chlorophyll a`.\n", + "- Demostrate how to use NASA's `Common Metadata Repository` ([CMR](https://cmr.earthdata.nasa.gov/search)) to find `OPeNDAP URLS` associated with a collection.\n", + "- Demonstrate the use of `Constraint Expressions` to reduce metadata during Virtual Dataset creation\n", + "- Use **PyDAP**'s `consolidate_metadata` to accelerate data cube creation via `xarray.open_mfdataset`.\n", + "- Demonstrate an advanced workflow for remote data access and plotting of **Level 3** PACE data concerning surface `chlorophyll a`.\n", "\n", "\n", "**Browsing Data**:\n", "\n", - " We are interested in PACE OCI data with **doi**: `10.5067/PACE/OCI/L3M/CHL/3.0`.\n", + " We are interested in PACE OCI data with **doi**: `10.5067/PACE/OCI/L3M/CHL/3.0`.\n", "\n", - " The **doi** can be found using Earthdata search.\n", + " The **doi** can be found using Earthdata search.\n", "\n", - " For more information about PACE, head to https://pace.oceansciences.org/ " + " For more information about PACE, head to https://pace.oceansciences.org/ \n", + "\n", + "`Author`: Miguel Jimenez-Urias, '25" ] }, { @@ -43,18 +45,16 @@ "metadata": {}, "outputs": [], "source": [ - "import matplotlib.pyplot as plt\n", "import numpy as np\n", "import requests\n", - "from pydap.client import open_url\n", + "from pydap.client import open_url, consolidate_metadata, get_cmr_urls\n", "from pydap.net import create_session\n", - "import json\n", "import cartopy.crs as ccrs\n", "import xarray as xr\n", "import datetime as dt\n", - "from pydap.client import consolidate_metadata\n", "import pydap\n", - "import requests_cache" + "import earthaccess\n", + "import matplotlib.pyplot as plt" ] }, { @@ -64,9 +64,7 @@ "metadata": {}, "outputs": [], "source": [ - "print(\"pydap version: \", pydap.__version__)\n", - "print(\"requests cache version: \", requests_cache.__version__)\n", - "print(\"requests version: \", requests.__version__)" + "print(\"pydap version: \", pydap.__version__)" ] }, { @@ -76,14 +74,14 @@ "source": [ "**Finding Cloud OPeNDAP URLs with NASA's CMR**:\n", "\n", - " Below we illustrate how to find OPeNDAP URLs via the **CMR**\n", + " Below we illustrate how to find OPeNDAP URLs via the **CMR**\n", "\n", " **To find (on-prem) OPeNDAP URLs you will need:**\n", "\n", "* One of `Collection Concept ID` or `dataset DOI`\n", "* Time Range\n", "\n", - " On-prem OPeNDAP URLs look distinct to cloud OPeNDAP URLs. However, the workflow for finding OPeNDAP URLs and accessing OPeNDAP-served data remains identical. \n", + " On-prem OPeNDAP URLs look distinct to cloud OPeNDAP URLs. However, the workflow for finding OPeNDAP URLs and accessing OPeNDAP-served data remains identical. \n", "\n", "\n" ] @@ -95,7 +93,9 @@ "metadata": {}, "outputs": [], "source": [ - "session = requests.Session()" + "auth = earthaccess.login(strategy=\"interactive\", persist=True)\n", + "session = create_session(use_cache=True, session=earthaccess.get_requests_https_session(), cache_kwargs={'cache_name': 'data/PACE'})\n", + "session.cache.clear()" ] }, { @@ -105,265 +105,313 @@ "metadata": {}, "outputs": [], "source": [ - "# CMR API base url\n", - "cmrurl='https://cmr.earthdata.nasa.gov/search/'\n", - "doi = \"10.5067/PACE/OCI/L3M/CHL/3.0\"\n", - "doisearch = cmrurl + 'collections.json?doi=' + doi\n", - "print(doisearch)\n", - "\n", - "concept_id = session.get(doisearch).json()['feed']['entry'][0]['id']\n", - "print(concept_id)" + "# Version 3.1 of Chlorophyll data\n", + "PACE_ccid = \"C3620140255-OB_CLOUD\"\n", + "\n", + "## Lets look for a year of data\n", + "time_range = [dt.datetime(2025, 1, 1), dt.datetime(2025, 7, 31)]\n", + "\n", + "granules_urls = get_cmr_urls(ccid=PACE_ccid, time_range=time_range)\n", + "\n", + "print(\"We found: \", len(granules_urls), \" total Non-Cloud OPeNDAP URLS associated with this collection!\")" ] }, { "cell_type": "markdown", - "id": "7f4ba5c9-7665-4d75-81f5-de4862b87ee6", + "id": "0ed35bf4-d33e-464f-8366-dad5f1b88f45", "metadata": {}, "source": [ - " **Specify time range**\n", + " **Further Filter via OPeNDAP Parameters:**\n", "\n", - " This dataset covers `March 2024` to present day. \n" + "* We want to specify the `DAP4`.\n", + "* 4km daily data\n", + "* We are only interested in chlo_a variable, and its dimensions (coordinates)" ] }, { "cell_type": "code", "execution_count": null, - "id": "c3de7046-f8f0-4dbd-93f5-f75609b99d7a", + "id": "ed8b2b05-64f9-4356-beab-41074fb8a0b2", "metadata": {}, "outputs": [], "source": [ - "start_date = dt.datetime(2024, 4, 1) \n", - "end_date = dt.datetime(2024, 12, 31)\n", - "\n", - "print(start_date, end_date,sep='\\n')\n", + "## Build a constraint expression in DAP4\n", + "CEs = \"?dap4.ce=/lat;/lon;/chlor_a\"\n", "\n", - "dt_format = '%Y-%m-%dT%H:%M:%SZ' # format requirement for datetime search\n", - "temporal_str = start_date.strftime(dt_format) + ',' + end_date.strftime(dt_format)\n", - "print(temporal_str)" + "# Filter the URLs for 4km and DAY in the URL string, and specify DAP4 in the url by replacing http --> dap4\n", + "new_urls = [url.replace(\"https\", \"dap4\") + CEs for url in granules_urls if '4km' in url and \"DAY\" in url]\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "eaafaa7a-9417-43c9-9231-d5e4dc5c1fa3", + "metadata": {}, + "outputs": [], + "source": [ + "new_urls[:4]" ] }, { "cell_type": "markdown", - "id": "b2680c99-1374-41ce-8d79-d2576fce0449", + "id": "99f870de-f894-4aa6-a9a3-1061c50d59d6", "metadata": {}, "source": [ - " **Get all available OPeNDAP URLs via CMR**\n", + "## Consolidate all URL Metadata Associated with the Data URL of cloud OPeNDAP URLs\n", "\n", - "The cell below will search/find all OPeNDAP URLs associated with the Collection concept ID.\n", + "**PyDAP** allows to construct a (cached) reference to all Cloud OPeNDAP urls, and can persist through sessions. Meaning, these Cloud OPenDAP URLS can be stored in your machine\n", + "for later use!!!! \n", "\n", - "The results wll be stored in the variable `granules_urls`.\n", - " " + "\n" ] }, { "cell_type": "code", "execution_count": null, - "id": "6ce8d678-3f0f-4616-a473-0b682ddf9906", + "id": "20e088a5-c314-410e-854f-84e0872f983d", "metadata": {}, "outputs": [], "source": [ - "def get_opendap_urls(concept_id, time_range, _session=None):\n", - " \"\"\"\n", - " Queries NASA's `Common Metadata Repository` to identify all OPeNDAP URLS\n", - " given collection concept ID and temporal time range.\n", - " \"\"\"\n", - " cmr_url = 'https://cmr.earthdata.nasa.gov/search/granules'\n", - " if not _session:\n", - " _session = requests.Session() \n", - " cmr_response = _session.get(cmr_url, params={'concept_id': concept_id,'temporal': time_range,'page_size': 500}, headers={'Accept': 'application/json'})\n", - " granules = cmr_response.json()['feed']['entry']\n", - " granules_urls = []\n", - " \n", - " # Filter and only retain the OPeNDAP URLs\n", - " for granule in granules:\n", - " item = next((item['href'] for item in granule['links'] if \"opendap\" in item[\"href\"]), None)\n", - " if item != None:\n", - " granules_urls.append(item)\n", - " return granules_urls" + "%%time\n", + "consolidate_metadata(new_urls, session=session)" + ] + }, + { + "cell_type": "markdown", + "id": "a406ae4b-64ce-48a1-916c-03ba18fbc8b6", + "metadata": {}, + "source": [ + "## Create a datacube with xarray and pydap as an engine!\n", + "\n", + "\n" ] }, { "cell_type": "code", "execution_count": null, - "id": "737eae5a-4263-4f7a-b4f4-6931c01d4812", + "id": "74f72bc7-7612-4552-a5ff-61bead9b9088", "metadata": {}, "outputs": [], "source": [ "%%time\n", - "granules_urls = get_opendap_urls(concept_id, temporal_str)" + "ds = xr.open_mfdataset(\n", + " new_urls, \n", + " engine='pydap', \n", + " session=session, \n", + " parallel=True, \n", + " combine='nested', \n", + " concat_dim='time')" ] }, { "cell_type": "code", "execution_count": null, - "id": "ed8b2b05-64f9-4356-beab-41074fb8a0b2", + "id": "6d318916-8819-46fb-9cd8-443bb3ba2e14", "metadata": {}, "outputs": [], "source": [ - "print(\"We found: \", len(granules_urls), \" total Non-Cloud OPeNDAP URLS associated with this collection! However not all these belong to the same datacube. WE need to further filter these\")" + "ds" ] }, { "cell_type": "code", "execution_count": null, - "id": "eaafaa7a-9417-43c9-9231-d5e4dc5c1fa3", + "id": "4dc79946-3c5e-421b-84f7-029188eeaa7b", "metadata": {}, "outputs": [], "source": [ - "granules_urls[:10]" + "chlor_a = ds['chlor_a'].isel(time=-1)\n", + "chlor_a" ] }, { "cell_type": "code", "execution_count": null, - "id": "889e38ce-d5f7-41ce-ac43-2b232a78e166", + "id": "acb433a6-23c9-4527-bf54-0df1122c0f90", "metadata": {}, "outputs": [], - "source": [ - "new_urls = [url.replace(\"https\", \"dap4\") for url in granules_urls if '4km' in url and \"DAY\" in url]\n", - "print(\"Of the 500 OPeNDAP URLs in the Collection, only \", len(new_urls), \" are associated with the correct data cube. \")" - ] + "source": [] }, { "cell_type": "code", "execution_count": null, - "id": "dcea9ae6-29c6-4530-9780-c27b0071975d", + "id": "55743fc2-8b50-40cd-b0f7-f6c44db55f64", "metadata": {}, "outputs": [], "source": [ - "new_urls[:10]" + "%%time\n", + "plt.figure(figsize=(25, 8))\n", + "ax = plt.axes(projection=ccrs.PlateCarree())\n", + "ax.set_global()\n", + "ax.coastlines()\n", + "plt.contourf(ds.lon, ds.lat, np.log(chlor_a), 400, cmap='nipy_spectral')\n", + "plt.colorbar().set_label(chlor_a.attrs['long_name'] + ' ['+chlor_a.attrs['units']+']')\n", + "plt.show()" ] }, { "cell_type": "markdown", - "id": "26f37f4e-93e1-45eb-971e-cebf873d1dfd", + "id": "ee3ec205-f09d-4d39-893e-9a193603f58c", "metadata": {}, "source": [ - "### Recover locally stored token for authentication" + "\n", + "### Identify spatial subset\n", + "\n", + "In this case, we are interested in a spatial subset. The data is Level 3 data (gridded) so latitude and longitude are uniform. Moreover, these are 1D, and have already been downloaded into memory!" ] }, { "cell_type": "code", "execution_count": null, - "id": "e003ecd3-f059-4fb2-a71c-e48d603fb073", + "id": "f256e1ab-e6fc-4bc6-960c-f3c56863bde6", "metadata": {}, "outputs": [], "source": [ - "# load token json data\n", - "with open('token.json', 'r') as fp:\n", - " token = json.load(fp)\n", - "\n", - "# pass Token Authorization to a new Session.\n", - "my_session = create_session(use_cache=True, session_kwargs=token)\n", - "# clear just in case\n", - "my_session.cache.clear()" + "lat, lon = ds['lat'].values, ds['lon'].values \n", + "\n", + "# Min/max of lon values\n", + "minLon, maxLon = -96, 10\n", + "\n", + "# Min/Max of lat values\n", + "minLat, maxLat = 6, 70\n", + "\n", + "# Find indexes where we want to retain data.\n", + "iLon = np.where((lon>minLon)&(lon < maxLon))[0]\n", + "iLat= np.where((lat>minLat)&(lat < maxLat))[0]\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3de93aa1-456e-42f4-8fec-1e61aa4a3706", + "metadata": {}, + "outputs": [], + "source": [ + "iLon[0], iLon[-1], iLat[0], iLat[-1]" ] }, { "cell_type": "markdown", - "id": "99f870de-f894-4aa6-a9a3-1061c50d59d6", + "id": "4a872144-a0e6-490a-beaa-bbb87b58f792", "metadata": {}, "source": [ - "## Consolidate all URL Metadata Associated with the Data URL of cloud OPeNDAP URLs\n", "\n", - "**PyDAP** allows to construct a (cached) reference to all Cloud OPeNDAP urls, and can persist through sessions. Meaning, these Cloud OPenDAP URLS can be stored in your machine\n", - "for later use!!!! \n", + "### Re-open dataset and chunk to match slices\n", "\n", - "\n" + "This is the approach, when opening multiple remote files, to pass the slice to the server so subsetting is done proximate to the data" ] }, { "cell_type": "code", "execution_count": null, - "id": "20e088a5-c314-410e-854f-84e0872f983d", + "id": "8218d137-380c-40c4-9ae2-23d7a7ab4dfe", "metadata": {}, "outputs": [], "source": [ "%%time\n", - "consolidate_metadata(new_urls, my_session)" + "ds = xr.open_mfdataset(\n", + " new_urls, \n", + " engine='pydap', \n", + " session=session, \n", + " parallel=True, \n", + " concat_dim='time', # <------ a time dimension will be created \n", + " combine='nested',\n", + " chunks = {'lon': len(iLon), 'lat':len(iLat)} # <----------- This instructs the OPeNDAP server to subset in space\n", + ")\n", + "ds" ] }, { "cell_type": "code", "execution_count": null, - "id": "dcc846e9-3b23-4924-b3ad-dd8476b8b9e5", + "id": "38e1b99b-e5b5-47fe-bc42-4d0482d6bf52", "metadata": {}, "outputs": [], "source": [ - "my_session.cache.urls()[:10]" + "ds[\"chlor_a\"] ## inspect the chunk of the data" ] }, { - "cell_type": "markdown", - "id": "a406ae4b-64ce-48a1-916c-03ba18fbc8b6", + "cell_type": "code", + "execution_count": null, + "id": "f6b384e6-18cb-4285-acd7-477968b59d9e", "metadata": {}, + "outputs": [], "source": [ - "## Create a datacube with xarray and pydap as an engine!\n", - "\n", - "\n" + "nds = ds.isel(lon=slice(iLon[0], iLon[-1]+1), lat=slice(iLat[0], iLat[-1]+1))" ] }, { "cell_type": "code", "execution_count": null, - "id": "74f72bc7-7612-4552-a5ff-61bead9b9088", + "id": "64184778-5bd9-490a-9927-6cb4907c6341", "metadata": {}, "outputs": [], "source": [ - "%%time\n", - "ds = xr.open_mfdataset(new_urls, engine='pydap', session=my_session, parallel=True, combine='nested', concat_dim='time')" + "nds['chlor_a']" ] }, { "cell_type": "code", "execution_count": null, - "id": "6d318916-8819-46fb-9cd8-443bb3ba2e14", + "id": "64430ebc-74e0-44ea-8379-bbc16e406ede", "metadata": {}, "outputs": [], "source": [ - "ds" + "%%time\n", + "nds.to_netcdf(\"data/pace_subset.nc4\", mode='w')" + ] + }, + { + "cell_type": "markdown", + "id": "19c4cec1-6979-44ac-8c67-b89f2d61ebe5", + "metadata": {}, + "source": [ + "## Finally inspect data" ] }, { "cell_type": "code", "execution_count": null, - "id": "4dc79946-3c5e-421b-84f7-029188eeaa7b", + "id": "6210c848-483b-41c3-8a50-0c1a7a9779fe", "metadata": {}, "outputs": [], "source": [ - "chlor_a = ds['chlor_a'].isel(time=0)\n", - "chlor_a" + "mds = xr.open_dataset(\"data/pace_subset.nc4\", chunks={\"time\":1}) # use default engine for NetCDF4 \n", + "mds" ] }, { - "cell_type": "markdown", - "id": "088e4df4-ae80-43e1-a8b4-6f26b62019d0", + "cell_type": "code", + "execution_count": null, + "id": "49b6c507-da19-4ca8-b2f7-69b5d1b5d51e", "metadata": {}, + "outputs": [], "source": [ - "## Lets visualize some data\n" + "chlor_a_sub = mds['chlor_a']\n", + "chlor_a_sub" ] }, { "cell_type": "code", "execution_count": null, - "id": "55743fc2-8b50-40cd-b0f7-f6c44db55f64", + "id": "50afd389-88a7-4264-8d39-05e6824f4f26", "metadata": {}, "outputs": [], "source": [ "%%time\n", "plt.figure(figsize=(25, 8))\n", "ax = plt.axes(projection=ccrs.PlateCarree())\n", - "ax.set_global()\n", "ax.coastlines()\n", - "plt.contourf(ds.lon, ds.lat, np.log(chlor_a), 400, cmap='nipy_spectral')\n", - "plt.colorbar().set_label(chlor_a.attrs['long_name'] + ' ['+chlor_a.attrs['units']+']')\n", + "plt.contourf(mds.lon, mds.lat, np.log(chlor_a_sub.isel(time=-1)), 400, cmap='nipy_spectral')\n", + "plt.colorbar().set_label(chlor_a_sub.attrs['long_name'] + ' ['+chlor_a_sub.attrs['units']+']')\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": null, - "id": "6375e132-770d-4318-800f-46c0fd933527", + "id": "447aef93-85b8-40be-bade-6e6eed7122c2", "metadata": {}, "outputs": [], "source": [] @@ -385,7 +433,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.12" + "version": "3.12.11" } }, "nbformat": 4,