diff --git a/CHANGELOG.md b/CHANGELOG.md index 02009f87..ce2cf6da 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -23,7 +23,9 @@ from the examples given in that link. ## ref-sample-data 0.1.1 (2025-01-08) -No significant changes. +### Bug Fixes + +- Correct the location of the datasets within the repository ([#1](https://github.com/CMIP-REF/ref-sample-data/pulls/1)) ## ref-sample-data 0.1.0 (2025-01-08) diff --git a/Makefile b/Makefile index a127a1ed..6680deef 100644 --- a/Makefile +++ b/Makefile @@ -51,6 +51,7 @@ virtual-environment: ## update virtual environment, create a new one if it does .PHONY: fetch-test-data fetch-test-data: ## Fetch test data + rm -rf data uv run python ./scripts/fetch_test_data.py registry.txt: data ## Generate a registry of all the packages diff --git a/changelog/1.bug.md b/changelog/1.bug.md deleted file mode 100644 index 61330175..00000000 --- a/changelog/1.bug.md +++ /dev/null @@ -1 +0,0 @@ -Correct the location of the datasets within the repository diff --git a/changelog/2.breaking.md b/changelog/2.breaking.md new file mode 100644 index 00000000..79aac73c --- /dev/null +++ b/changelog/2.breaking.md @@ -0,0 +1,2 @@ +Use the dataset version's from ESGF instead of the values in the netCDF files. +Different files in the same dataset may contain different versions inside their netCDF files. diff --git a/data/CMIP6/CMIP/CSIRO/ACCESS-ESM1-5/historical/r1i1p1f1/Amon/rlut/gn/v20191115/rlut_Amon_ACCESS-ESM1-5_historical_r1i1p1f1_gn_185001-201412.nc b/data/CMIP6/CMIP/CSIRO/ACCESS-ESM1-5/historical/r1i1p1f1/Amon/rlut/gn/20191115/rlut_Amon_ACCESS-ESM1-5_historical_r1i1p1f1_gn_185001-201412.nc similarity index 100% rename from data/CMIP6/CMIP/CSIRO/ACCESS-ESM1-5/historical/r1i1p1f1/Amon/rlut/gn/v20191115/rlut_Amon_ACCESS-ESM1-5_historical_r1i1p1f1_gn_185001-201412.nc rename to data/CMIP6/CMIP/CSIRO/ACCESS-ESM1-5/historical/r1i1p1f1/Amon/rlut/gn/20191115/rlut_Amon_ACCESS-ESM1-5_historical_r1i1p1f1_gn_185001-201412.nc diff --git a/data/CMIP6/CMIP/CSIRO/ACCESS-ESM1-5/historical/r1i1p1f1/Amon/rsdt/gn/v20191115/rsdt_Amon_ACCESS-ESM1-5_historical_r1i1p1f1_gn_185001-201412.nc b/data/CMIP6/CMIP/CSIRO/ACCESS-ESM1-5/historical/r1i1p1f1/Amon/rsdt/gn/20191115/rsdt_Amon_ACCESS-ESM1-5_historical_r1i1p1f1_gn_185001-201412.nc similarity index 100% rename from data/CMIP6/CMIP/CSIRO/ACCESS-ESM1-5/historical/r1i1p1f1/Amon/rsdt/gn/v20191115/rsdt_Amon_ACCESS-ESM1-5_historical_r1i1p1f1_gn_185001-201412.nc rename to data/CMIP6/CMIP/CSIRO/ACCESS-ESM1-5/historical/r1i1p1f1/Amon/rsdt/gn/20191115/rsdt_Amon_ACCESS-ESM1-5_historical_r1i1p1f1_gn_185001-201412.nc diff --git a/data/CMIP6/CMIP/CSIRO/ACCESS-ESM1-5/historical/r1i1p1f1/Amon/rsut/gn/v20191115/rsut_Amon_ACCESS-ESM1-5_historical_r1i1p1f1_gn_185001-201412.nc b/data/CMIP6/CMIP/CSIRO/ACCESS-ESM1-5/historical/r1i1p1f1/Amon/rsut/gn/20191115/rsut_Amon_ACCESS-ESM1-5_historical_r1i1p1f1_gn_185001-201412.nc similarity index 100% rename from data/CMIP6/CMIP/CSIRO/ACCESS-ESM1-5/historical/r1i1p1f1/Amon/rsut/gn/v20191115/rsut_Amon_ACCESS-ESM1-5_historical_r1i1p1f1_gn_185001-201412.nc rename to data/CMIP6/CMIP/CSIRO/ACCESS-ESM1-5/historical/r1i1p1f1/Amon/rsut/gn/20191115/rsut_Amon_ACCESS-ESM1-5_historical_r1i1p1f1_gn_185001-201412.nc diff --git a/data/CMIP6/CMIP/CSIRO/ACCESS-ESM1-5/historical/r1i1p1f1/Amon/tas/gn/v20191115/tas_Amon_ACCESS-ESM1-5_historical_r1i1p1f1_gn_185001-201412.nc b/data/CMIP6/CMIP/CSIRO/ACCESS-ESM1-5/historical/r1i1p1f1/Amon/tas/gn/20191115/tas_Amon_ACCESS-ESM1-5_historical_r1i1p1f1_gn_185001-201412.nc similarity index 100% rename from data/CMIP6/CMIP/CSIRO/ACCESS-ESM1-5/historical/r1i1p1f1/Amon/tas/gn/v20191115/tas_Amon_ACCESS-ESM1-5_historical_r1i1p1f1_gn_185001-201412.nc rename to data/CMIP6/CMIP/CSIRO/ACCESS-ESM1-5/historical/r1i1p1f1/Amon/tas/gn/20191115/tas_Amon_ACCESS-ESM1-5_historical_r1i1p1f1_gn_185001-201412.nc diff --git a/data/CMIP6/CMIP/CSIRO/ACCESS-ESM1-5/historical/r1i1p1f1/fx/areacella/gn/20191115/areacella_fx_ACCESS-ESM1-5_historical_r1i1p1f1_gn.nc b/data/CMIP6/CMIP/CSIRO/ACCESS-ESM1-5/historical/r1i1p1f1/fx/areacella/gn/20191115/areacella_fx_ACCESS-ESM1-5_historical_r1i1p1f1_gn.nc new file mode 100644 index 00000000..a4823cdf Binary files /dev/null and b/data/CMIP6/CMIP/CSIRO/ACCESS-ESM1-5/historical/r1i1p1f1/fx/areacella/gn/20191115/areacella_fx_ACCESS-ESM1-5_historical_r1i1p1f1_gn.nc differ diff --git a/data/CMIP6/ScenarioMIP/CSIRO/ACCESS-ESM1-5/ssp126/r1i1p1f1/Amon/rlut/gn/v20210318/rlut_Amon_ACCESS-ESM1-5_ssp126_r1i1p1f1_gn_210101-230012.nc b/data/CMIP6/ScenarioMIP/CSIRO/ACCESS-ESM1-5/ssp126/r1i1p1f1/Amon/rlut/gn/20210318/rlut_Amon_ACCESS-ESM1-5_ssp126_r1i1p1f1_gn_210101-230012.nc similarity index 100% rename from data/CMIP6/ScenarioMIP/CSIRO/ACCESS-ESM1-5/ssp126/r1i1p1f1/Amon/rlut/gn/v20210318/rlut_Amon_ACCESS-ESM1-5_ssp126_r1i1p1f1_gn_210101-230012.nc rename to data/CMIP6/ScenarioMIP/CSIRO/ACCESS-ESM1-5/ssp126/r1i1p1f1/Amon/rlut/gn/20210318/rlut_Amon_ACCESS-ESM1-5_ssp126_r1i1p1f1_gn_210101-230012.nc diff --git a/data/CMIP6/ScenarioMIP/CSIRO/ACCESS-ESM1-5/ssp126/r1i1p1f1/Amon/rsdt/gn/v20191115/rsdt_Amon_ACCESS-ESM1-5_ssp126_r1i1p1f1_gn_201501-210012.nc b/data/CMIP6/ScenarioMIP/CSIRO/ACCESS-ESM1-5/ssp126/r1i1p1f1/Amon/rsdt/gn/20210318/rsdt_Amon_ACCESS-ESM1-5_ssp126_r1i1p1f1_gn_201501-210012.nc similarity index 100% rename from data/CMIP6/ScenarioMIP/CSIRO/ACCESS-ESM1-5/ssp126/r1i1p1f1/Amon/rsdt/gn/v20191115/rsdt_Amon_ACCESS-ESM1-5_ssp126_r1i1p1f1_gn_201501-210012.nc rename to data/CMIP6/ScenarioMIP/CSIRO/ACCESS-ESM1-5/ssp126/r1i1p1f1/Amon/rsdt/gn/20210318/rsdt_Amon_ACCESS-ESM1-5_ssp126_r1i1p1f1_gn_201501-210012.nc diff --git a/data/CMIP6/ScenarioMIP/CSIRO/ACCESS-ESM1-5/ssp126/r1i1p1f1/Amon/rsdt/gn/v20210318/rsdt_Amon_ACCESS-ESM1-5_ssp126_r1i1p1f1_gn_210101-230012.nc b/data/CMIP6/ScenarioMIP/CSIRO/ACCESS-ESM1-5/ssp126/r1i1p1f1/Amon/rsdt/gn/20210318/rsdt_Amon_ACCESS-ESM1-5_ssp126_r1i1p1f1_gn_210101-230012.nc similarity index 100% rename from data/CMIP6/ScenarioMIP/CSIRO/ACCESS-ESM1-5/ssp126/r1i1p1f1/Amon/rsdt/gn/v20210318/rsdt_Amon_ACCESS-ESM1-5_ssp126_r1i1p1f1_gn_210101-230012.nc rename to data/CMIP6/ScenarioMIP/CSIRO/ACCESS-ESM1-5/ssp126/r1i1p1f1/Amon/rsdt/gn/20210318/rsdt_Amon_ACCESS-ESM1-5_ssp126_r1i1p1f1_gn_210101-230012.nc diff --git a/data/CMIP6/ScenarioMIP/CSIRO/ACCESS-ESM1-5/ssp126/r1i1p1f1/Amon/rsut/gn/v20191115/rsut_Amon_ACCESS-ESM1-5_ssp126_r1i1p1f1_gn_201501-210012.nc b/data/CMIP6/ScenarioMIP/CSIRO/ACCESS-ESM1-5/ssp126/r1i1p1f1/Amon/rsut/gn/20210318/rsut_Amon_ACCESS-ESM1-5_ssp126_r1i1p1f1_gn_201501-210012.nc similarity index 100% rename from data/CMIP6/ScenarioMIP/CSIRO/ACCESS-ESM1-5/ssp126/r1i1p1f1/Amon/rsut/gn/v20191115/rsut_Amon_ACCESS-ESM1-5_ssp126_r1i1p1f1_gn_201501-210012.nc rename to data/CMIP6/ScenarioMIP/CSIRO/ACCESS-ESM1-5/ssp126/r1i1p1f1/Amon/rsut/gn/20210318/rsut_Amon_ACCESS-ESM1-5_ssp126_r1i1p1f1_gn_201501-210012.nc diff --git a/data/CMIP6/ScenarioMIP/CSIRO/ACCESS-ESM1-5/ssp126/r1i1p1f1/Amon/rsut/gn/v20210318/rsut_Amon_ACCESS-ESM1-5_ssp126_r1i1p1f1_gn_210101-230012.nc b/data/CMIP6/ScenarioMIP/CSIRO/ACCESS-ESM1-5/ssp126/r1i1p1f1/Amon/rsut/gn/20210318/rsut_Amon_ACCESS-ESM1-5_ssp126_r1i1p1f1_gn_210101-230012.nc similarity index 100% rename from data/CMIP6/ScenarioMIP/CSIRO/ACCESS-ESM1-5/ssp126/r1i1p1f1/Amon/rsut/gn/v20210318/rsut_Amon_ACCESS-ESM1-5_ssp126_r1i1p1f1_gn_210101-230012.nc rename to data/CMIP6/ScenarioMIP/CSIRO/ACCESS-ESM1-5/ssp126/r1i1p1f1/Amon/rsut/gn/20210318/rsut_Amon_ACCESS-ESM1-5_ssp126_r1i1p1f1_gn_210101-230012.nc diff --git a/data/CMIP6/ScenarioMIP/CSIRO/ACCESS-ESM1-5/ssp126/r1i1p1f1/Amon/tas/gn/v20191115/tas_Amon_ACCESS-ESM1-5_ssp126_r1i1p1f1_gn_201501-210012.nc b/data/CMIP6/ScenarioMIP/CSIRO/ACCESS-ESM1-5/ssp126/r1i1p1f1/Amon/tas/gn/20210318/tas_Amon_ACCESS-ESM1-5_ssp126_r1i1p1f1_gn_201501-210012.nc similarity index 100% rename from data/CMIP6/ScenarioMIP/CSIRO/ACCESS-ESM1-5/ssp126/r1i1p1f1/Amon/tas/gn/v20191115/tas_Amon_ACCESS-ESM1-5_ssp126_r1i1p1f1_gn_201501-210012.nc rename to data/CMIP6/ScenarioMIP/CSIRO/ACCESS-ESM1-5/ssp126/r1i1p1f1/Amon/tas/gn/20210318/tas_Amon_ACCESS-ESM1-5_ssp126_r1i1p1f1_gn_201501-210012.nc diff --git a/data/CMIP6/ScenarioMIP/CSIRO/ACCESS-ESM1-5/ssp126/r1i1p1f1/Amon/tas/gn/v20210318/tas_Amon_ACCESS-ESM1-5_ssp126_r1i1p1f1_gn_210101-230012.nc b/data/CMIP6/ScenarioMIP/CSIRO/ACCESS-ESM1-5/ssp126/r1i1p1f1/Amon/tas/gn/20210318/tas_Amon_ACCESS-ESM1-5_ssp126_r1i1p1f1_gn_210101-230012.nc similarity index 100% rename from data/CMIP6/ScenarioMIP/CSIRO/ACCESS-ESM1-5/ssp126/r1i1p1f1/Amon/tas/gn/v20210318/tas_Amon_ACCESS-ESM1-5_ssp126_r1i1p1f1_gn_210101-230012.nc rename to data/CMIP6/ScenarioMIP/CSIRO/ACCESS-ESM1-5/ssp126/r1i1p1f1/Amon/tas/gn/20210318/tas_Amon_ACCESS-ESM1-5_ssp126_r1i1p1f1_gn_210101-230012.nc diff --git a/data/CMIP6/ScenarioMIP/CSIRO/ACCESS-ESM1-5/ssp126/r1i1p1f1/fx/areacella/gn/20210318/areacella_fx_ACCESS-ESM1-5_ssp126_r1i1p1f1_gn.nc b/data/CMIP6/ScenarioMIP/CSIRO/ACCESS-ESM1-5/ssp126/r1i1p1f1/fx/areacella/gn/20210318/areacella_fx_ACCESS-ESM1-5_ssp126_r1i1p1f1_gn.nc new file mode 100644 index 00000000..9f4d0033 Binary files /dev/null and b/data/CMIP6/ScenarioMIP/CSIRO/ACCESS-ESM1-5/ssp126/r1i1p1f1/fx/areacella/gn/20210318/areacella_fx_ACCESS-ESM1-5_ssp126_r1i1p1f1_gn.nc differ diff --git a/registry.txt b/registry.txt index 2104a176..85074d78 100644 --- a/registry.txt +++ b/registry.txt @@ -1,11 +1,13 @@ -CMIP6/CMIP/CSIRO/ACCESS-ESM1-5/historical/r1i1p1f1/Amon/rlut/gn/v20191115/rlut_Amon_ACCESS-ESM1-5_historical_r1i1p1f1_gn_185001-201412.nc 95341df80de95ddb0b45da11aed67db771414fff94508687fb30fce63b82c104 -CMIP6/CMIP/CSIRO/ACCESS-ESM1-5/historical/r1i1p1f1/Amon/rsdt/gn/v20191115/rsdt_Amon_ACCESS-ESM1-5_historical_r1i1p1f1_gn_185001-201412.nc 25e9e817a05ffab4a2b073078f6be0e52096fa9da8eb55f009d079842c708614 -CMIP6/CMIP/CSIRO/ACCESS-ESM1-5/historical/r1i1p1f1/Amon/rsut/gn/v20191115/rsut_Amon_ACCESS-ESM1-5_historical_r1i1p1f1_gn_185001-201412.nc 139c4c59d98c737ce2d7ca777e52e35e38d49fcb8b08dd98175ed0f1354f8e75 -CMIP6/CMIP/CSIRO/ACCESS-ESM1-5/historical/r1i1p1f1/Amon/tas/gn/v20191115/tas_Amon_ACCESS-ESM1-5_historical_r1i1p1f1_gn_185001-201412.nc be4a191c75b3643aad34238970c0587128a3852694f2c61425b4bbda42e5ff08 -CMIP6/ScenarioMIP/CSIRO/ACCESS-ESM1-5/ssp126/r1i1p1f1/Amon/rlut/gn/v20210318/rlut_Amon_ACCESS-ESM1-5_ssp126_r1i1p1f1_gn_210101-230012.nc 1083d92079e9c40d3797ecc4235df1c86c99af7ca3b9458b21f1d34054351041 -CMIP6/ScenarioMIP/CSIRO/ACCESS-ESM1-5/ssp126/r1i1p1f1/Amon/rsdt/gn/v20191115/rsdt_Amon_ACCESS-ESM1-5_ssp126_r1i1p1f1_gn_201501-210012.nc 9f9ae50efc55f4e18dc174d7c3af10f4e67a391c84d81cdb6ba574fa8b61b276 -CMIP6/ScenarioMIP/CSIRO/ACCESS-ESM1-5/ssp126/r1i1p1f1/Amon/rsdt/gn/v20210318/rsdt_Amon_ACCESS-ESM1-5_ssp126_r1i1p1f1_gn_210101-230012.nc 2299e10eb6ccf190fe07f7b60aa40b8700f7f964ca68c989f3572abe39eb22c7 -CMIP6/ScenarioMIP/CSIRO/ACCESS-ESM1-5/ssp126/r1i1p1f1/Amon/rsut/gn/v20191115/rsut_Amon_ACCESS-ESM1-5_ssp126_r1i1p1f1_gn_201501-210012.nc 5141bb64d6f457550d8bf429a4233af1bd706ed8b2131fc2ef329bcb6db7a236 -CMIP6/ScenarioMIP/CSIRO/ACCESS-ESM1-5/ssp126/r1i1p1f1/Amon/rsut/gn/v20210318/rsut_Amon_ACCESS-ESM1-5_ssp126_r1i1p1f1_gn_210101-230012.nc 49fbd6c0d7b8c0d10a270e8d88191764c02ba651f80b464605dfa5b0221d622b -CMIP6/ScenarioMIP/CSIRO/ACCESS-ESM1-5/ssp126/r1i1p1f1/Amon/tas/gn/v20191115/tas_Amon_ACCESS-ESM1-5_ssp126_r1i1p1f1_gn_201501-210012.nc bff52adef26d48d4b747368816aff3712c606cafa92f6b78f4974f23efcba510 -CMIP6/ScenarioMIP/CSIRO/ACCESS-ESM1-5/ssp126/r1i1p1f1/Amon/tas/gn/v20210318/tas_Amon_ACCESS-ESM1-5_ssp126_r1i1p1f1_gn_210101-230012.nc b61faa48540472be5b208a4ecf40873860c1d4cfb7f50a4dff4ac17ee2ba4f73 +CMIP6/CMIP/CSIRO/ACCESS-ESM1-5/historical/r1i1p1f1/Amon/rlut/gn/20191115/rlut_Amon_ACCESS-ESM1-5_historical_r1i1p1f1_gn_185001-201412.nc 95341df80de95ddb0b45da11aed67db771414fff94508687fb30fce63b82c104 +CMIP6/CMIP/CSIRO/ACCESS-ESM1-5/historical/r1i1p1f1/Amon/rsdt/gn/20191115/rsdt_Amon_ACCESS-ESM1-5_historical_r1i1p1f1_gn_185001-201412.nc 25e9e817a05ffab4a2b073078f6be0e52096fa9da8eb55f009d079842c708614 +CMIP6/CMIP/CSIRO/ACCESS-ESM1-5/historical/r1i1p1f1/Amon/rsut/gn/20191115/rsut_Amon_ACCESS-ESM1-5_historical_r1i1p1f1_gn_185001-201412.nc 139c4c59d98c737ce2d7ca777e52e35e38d49fcb8b08dd98175ed0f1354f8e75 +CMIP6/CMIP/CSIRO/ACCESS-ESM1-5/historical/r1i1p1f1/Amon/tas/gn/20191115/tas_Amon_ACCESS-ESM1-5_historical_r1i1p1f1_gn_185001-201412.nc be4a191c75b3643aad34238970c0587128a3852694f2c61425b4bbda42e5ff08 +CMIP6/CMIP/CSIRO/ACCESS-ESM1-5/historical/r1i1p1f1/fx/areacella/gn/20191115/areacella_fx_ACCESS-ESM1-5_historical_r1i1p1f1_gn.nc d9d07cacc65c196b9ec47d60cabcf86fd397b1e22063a32c3798a98ee3dfb16e +CMIP6/ScenarioMIP/CSIRO/ACCESS-ESM1-5/ssp126/r1i1p1f1/Amon/rlut/gn/20210318/rlut_Amon_ACCESS-ESM1-5_ssp126_r1i1p1f1_gn_210101-230012.nc 1083d92079e9c40d3797ecc4235df1c86c99af7ca3b9458b21f1d34054351041 +CMIP6/ScenarioMIP/CSIRO/ACCESS-ESM1-5/ssp126/r1i1p1f1/Amon/rsdt/gn/20210318/rsdt_Amon_ACCESS-ESM1-5_ssp126_r1i1p1f1_gn_201501-210012.nc 9f9ae50efc55f4e18dc174d7c3af10f4e67a391c84d81cdb6ba574fa8b61b276 +CMIP6/ScenarioMIP/CSIRO/ACCESS-ESM1-5/ssp126/r1i1p1f1/Amon/rsdt/gn/20210318/rsdt_Amon_ACCESS-ESM1-5_ssp126_r1i1p1f1_gn_210101-230012.nc 2299e10eb6ccf190fe07f7b60aa40b8700f7f964ca68c989f3572abe39eb22c7 +CMIP6/ScenarioMIP/CSIRO/ACCESS-ESM1-5/ssp126/r1i1p1f1/Amon/rsut/gn/20210318/rsut_Amon_ACCESS-ESM1-5_ssp126_r1i1p1f1_gn_201501-210012.nc 5141bb64d6f457550d8bf429a4233af1bd706ed8b2131fc2ef329bcb6db7a236 +CMIP6/ScenarioMIP/CSIRO/ACCESS-ESM1-5/ssp126/r1i1p1f1/Amon/rsut/gn/20210318/rsut_Amon_ACCESS-ESM1-5_ssp126_r1i1p1f1_gn_210101-230012.nc 49fbd6c0d7b8c0d10a270e8d88191764c02ba651f80b464605dfa5b0221d622b +CMIP6/ScenarioMIP/CSIRO/ACCESS-ESM1-5/ssp126/r1i1p1f1/Amon/tas/gn/20210318/tas_Amon_ACCESS-ESM1-5_ssp126_r1i1p1f1_gn_201501-210012.nc bff52adef26d48d4b747368816aff3712c606cafa92f6b78f4974f23efcba510 +CMIP6/ScenarioMIP/CSIRO/ACCESS-ESM1-5/ssp126/r1i1p1f1/Amon/tas/gn/20210318/tas_Amon_ACCESS-ESM1-5_ssp126_r1i1p1f1_gn_210101-230012.nc b61faa48540472be5b208a4ecf40873860c1d4cfb7f50a4dff4ac17ee2ba4f73 +CMIP6/ScenarioMIP/CSIRO/ACCESS-ESM1-5/ssp126/r1i1p1f1/fx/areacella/gn/20210318/areacella_fx_ACCESS-ESM1-5_ssp126_r1i1p1f1_gn.nc e8e3b873d9ba115974329c0f7785c9e30dcca66007fa973c22cd734efc46dcfd diff --git a/scripts/fetch_test_data.py b/scripts/fetch_test_data.py index c3fb8a1a..828f145f 100644 --- a/scripts/fetch_test_data.py +++ b/scripts/fetch_test_data.py @@ -9,13 +9,14 @@ from pathlib import Path from typing import Any +import pandas as pd import xarray as xr from intake_esgf import ESGFCatalog OUTPUT_PATH = Path("data") -def fetch_datasets(search_facets: dict[str, Any], remove_ensembles: bool) -> list[Path]: +def fetch_datasets(search_facets: dict[str, Any], remove_ensembles: bool) -> pd.DataFrame: """ Fetch the datasets from ESGF. @@ -37,13 +38,14 @@ def fetch_datasets(search_facets: dict[str, Any], remove_ensembles: bool) -> lis if remove_ensembles: cat.remove_ensembles() - path_dict = cat.to_path_dict(prefer_streaming=False) + path_dict = cat.to_path_dict(prefer_streaming=False, minimal_keys=False) - # Flatten list of lists into a single list - return [p for dataset_paths in path_dict.values() for p in dataset_paths] + merged_df = cat.df.merge(pd.Series(path_dict, name="files"), left_on="key", right_index=True) + return merged_df -def downscale_dataset(dataset: xr.Dataset) -> xr.Dataset: + +def decimate_dataset(dataset: xr.Dataset) -> xr.Dataset: """ Downscale the dataset to a smaller size. @@ -65,7 +67,7 @@ def downscale_dataset(dataset: xr.Dataset) -> xr.Dataset: return spatial_downscale -def create_out_filename(ds: xr.Dataset) -> pathlib.Path: +def create_out_filename(metadata: pd.Series, ds: xr.Dataset) -> pathlib.Path: """ Create the output filename for the dataset. @@ -80,11 +82,11 @@ def create_out_filename(ds: xr.Dataset) -> pathlib.Path: """ cmip6_path_items = [ "mip_era", - "activity_id", + "activity_drs", "institution_id", "source_id", "experiment_id", - "variant_label", + "member_id", "table_id", "variable_id", "grid_label", @@ -96,43 +98,54 @@ def create_out_filename(ds: xr.Dataset) -> pathlib.Path: "table_id", "source_id", "experiment_id", - "variant_label", + "member_id", "grid_label", ] - output_path = Path(os.path.join(*[str(ds.attrs[item]) for item in cmip6_path_items])) + output_path = Path(os.path.join(*[metadata[item] for item in cmip6_path_items])) + filename_prefix = "_".join([metadata[item] for item in cmip6_filename_paths]) if "time" in ds.dims: time_range = f"{ds.time.min().dt.strftime('%Y%m').item()}-{ds.time.max().dt.strftime('%Y%m').item()}" - filename = "_".join([str(ds.attrs[item]) for item in cmip6_filename_paths]) + f"_{time_range}.nc" + filename = f"{filename_prefix}_{time_range}.nc" else: - filename = "_".join([str(ds.attrs[item]) for item in cmip6_filename_paths]) + ".nc" + filename = f"{filename_prefix}.nc" return output_path / filename if __name__ == "__main__": - datasets: list[Path] = [] - facets_to_fetch = [ dict( source_id="ACCESS-ESM1-5", frequency="mon", variable_id=["tas", "rsut", "rlut", "rsdt"], - experiment_id=["ssp119", "ssp126", "historical"], + experiment_id=["ssp126", "historical"], + remove_ensembles=True, + ), + dict( + source_id="ACCESS-ESM1-5", + frequency="fx", + variable_id=["areacella"], + experiment_id=["ssp126", "historical"], remove_ensembles=True, ), ] + dataset_metadata_collection: list[pd.DataFrame] = [] for facets in facets_to_fetch: remove_ensembles = facets.pop("remove_ensembles", False) - datasets.extend(fetch_datasets(facets, remove_ensembles=remove_ensembles)) - print(datasets) - for dataset_path in datasets: - ds_orig = xr.open_dataset(dataset_path) + dataset_metadata_collection.append(fetch_datasets(facets, remove_ensembles=remove_ensembles)) + + datasets = pd.concat(dataset_metadata_collection) + + for _, dataset in datasets.iterrows(): + print(dataset.key) + for ds_filename in dataset["files"]: + ds_orig = xr.open_dataset(ds_filename) - ds_downscaled = downscale_dataset(ds_orig) + ds_downscaled = decimate_dataset(ds_orig) - output_filename = OUTPUT_PATH / create_out_filename(ds_orig) - output_filename.parent.mkdir(parents=True, exist_ok=True) - ds_downscaled.to_netcdf(output_filename) + output_filename = OUTPUT_PATH / create_out_filename(dataset, ds_orig) + output_filename.parent.mkdir(parents=True, exist_ok=True) + ds_downscaled.to_netcdf(output_filename)