From 2b4fbebe3587f11e61405f2605fbeb86f50a1548 Mon Sep 17 00:00:00 2001 From: David Hassell Date: Wed, 24 Sep 2025 16:02:26 +0100 Subject: [PATCH 01/10] dev --- Changelog.rst | 11 +++++++++++ README.md | 3 ++- docs/source/installation.rst | 6 ++++++ docs/source/introduction.rst | 7 ++++--- docs/source/tutorial.rst | 9 ++++++--- setup.py | 7 +++++-- 6 files changed, 34 insertions(+), 9 deletions(-) diff --git a/Changelog.rst b/Changelog.rst index d6ab07043d..502f931519 100644 --- a/Changelog.rst +++ b/Changelog.rst @@ -1,3 +1,14 @@ +Version NEXTVERSION +-------------- + +* Write Zarr v3 datasets with `cf.write`, and allow the reading of + grouped Zarr v2 and v3 datasets with `cf.read` + (https://github.com/NCAS-CMS/cf-python/issues/895) +* Read Zarr v2 and v3 datasets that contain a group hierarchy with + `cf.read` (https://github.com/NCAS-CMS/cf-python/issues/894) + +---- + Version 3.18.1 -------------- diff --git a/README.md b/README.md index ce752fd0af..45c70a4f17 100644 --- a/README.md +++ b/README.md @@ -87,7 +87,8 @@ of its array manipulation and can: * read field constructs from netCDF, CDL, Zarr, PP and UM datasets with a choice of netCDF backends,and in local, http, and s3 locations, * create new field constructs in memory, -* write and append field and domain constructs to netCDF datasets on disk, +* write and append field and domain constructs to netCDF and Zarr v3 + datasets on disk, * read, create, and manipulate UGRID mesh topologies, * read, write, and create coordinates defined by geometry cells, * read netCDF and CDL datasets containing hierarchical groups, diff --git a/docs/source/installation.rst b/docs/source/installation.rst index 40cec52247..29c7820947 100644 --- a/docs/source/installation.rst +++ b/docs/source/installation.rst @@ -265,6 +265,12 @@ Some further dependencies that enable further functionality are optional. This to facilitate cf-python being installed in restricted environments for which these features are not required. +.. rubric:: Zarr + +* `zarr `_, version 3.1.2 or newer. + + For reading and writing Zarr datasets. + .. rubric:: Regridding * `esmpy `_, previously diff --git a/docs/source/introduction.rst b/docs/source/introduction.rst index ca7789acb7..ee27ac47fb 100644 --- a/docs/source/introduction.rst +++ b/docs/source/introduction.rst @@ -67,8 +67,8 @@ may nonetheless be modified in memory. The `cf` package can: * read :term:`field constructs ` and :term:`domain - constructs ` from netCDF, CDL, PP and UM datasets - with a choice of netCDF backends, + constructs ` from netCDF, CDL, Zarr, PP and UM + datasets with a choice of netCDF backends, * read files from OPeNDAP servers and S3 object stores, @@ -76,7 +76,8 @@ The `cf` package can: * create new field constructs in memory, -* write and append field constructs to netCDF datasets on disk, +* write and append field and domain constructs to netCDF and Zarr + datasets on disk, * read, write, and manipulate UGRID mesh topologies, diff --git a/docs/source/tutorial.rst b/docs/source/tutorial.rst index 4b6806b619..9d501c91de 100644 --- a/docs/source/tutorial.rst +++ b/docs/source/tutorial.rst @@ -5275,8 +5275,11 @@ Method Classes **Writing to a netCDF dataset** ------------------------------- +**Writing to disk** +------------------- + The `cf.write` function writes a field construct, or a sequence of -field constructs, to a new netCDF file on disk: +field constructs, to a netCDF or Zarr dataset on disk: .. code-block:: python :caption: *Write a field construct to a netCDF dataset on disk.* @@ -5345,8 +5348,8 @@ By default the output file will be for CF-|version|. The `cf.write` function has optional parameters to -* set the output netCDF format (all netCDF3 and netCDF4 formats are - possible); +* set the output netCDF format (all netCDF3 and netCDF4 formats, as + well as Zarr v3 are possible); * append to the netCDF file rather than over-writing it by default; diff --git a/setup.py b/setup.py index f82bd9182d..6241d9ad3f 100755 --- a/setup.py +++ b/setup.py @@ -177,13 +177,13 @@ def compile(): The ``cf`` package can: -* read field constructs from netCDF, CDL, Zarr, PP and UM datasets, +* read field and domain constructs from netCDF, CDL, Zarr, PP and UM datasets, * be fully flexible with respect to dataset storage chunking, * create new field constructs in memory, -* write and append field constructs to netCDF datasets on disk, +* write and append field constructs and domain to netCDF and Zarr v3 datasets on disk, * read, write, and create coordinates defined by geometry cells, @@ -262,6 +262,9 @@ def compile(): "docformatter", "flake8", ], + "zarr": [ + "zarr>=3.1.2", + ], } setup( From dbbecf8b5f0346d6d777ed2b4dcce716fdb4bb25 Mon Sep 17 00:00:00 2001 From: David Hassell Date: Mon, 20 Oct 2025 17:52:14 +0100 Subject: [PATCH 02/10] dev --- Changelog.rst | 1 + cf/test/test_zarr.py | 333 +++++++++++++++++++++++++++++++++++ docs/source/installation.rst | 2 +- 3 files changed, 335 insertions(+), 1 deletion(-) create mode 100644 cf/test/test_zarr.py diff --git a/Changelog.rst b/Changelog.rst index 6bee2feca2..5546cf55db 100644 --- a/Changelog.rst +++ b/Changelog.rst @@ -6,6 +6,7 @@ Version NEXTVERSION (https://github.com/NCAS-CMS/cf-python/issues/895) * Read Zarr v2 and v3 datasets that contain a group hierarchy with `cf.read` (https://github.com/NCAS-CMS/cf-python/issues/894) +* New optional dependency: ``zarr>=3.1.2`` Version 3.18.2 -------------- diff --git a/cf/test/test_zarr.py b/cf/test/test_zarr.py new file mode 100644 index 0000000000..2310a83953 --- /dev/null +++ b/cf/test/test_zarr.py @@ -0,0 +1,333 @@ +import atexit +import datetime +import faulthandler +import os +import shutil +import tempfile +import unittest + +faulthandler.enable() # to debug seg faults and timeouts + +import zarr + +import cf + +warnings = False + +# Set up temporary directories +tmpdirs = [ + tempfile.mkdtemp("_test_zarr.zarr", dir=os.getcwd()) for i in range(2) +] +[tmpdir1, tmpdir2] = tmpdirs + +# Set up temporary files +tmpfiles = [ + tempfile.mkstemp("_test_zarr.nc", dir=os.getcwd())[1] for i in range(2) +] +[tmpfile1, tmpfile2] = tmpfiles + + +def _remove_tmpdirs(): + """Remove temporary files created during tests.""" + for f in tmpfiles: + try: + os.remove(f) + except OSError: + pass + + for d in tmpdirs: + try: + shutil.rmtree(d) + os.rmdir(d) + except OSError: + pass + + +atexit.register(_remove_tmpdirs) + + +class read_writeTest(unittest.TestCase): + """Test the reading and writing of field constructs from/to disk.""" + + f0 = cf.example_field(0) + + def setUp(self): + """Preparations called immediately before each test method.""" + # Disable log messages to silence expected warnings + cf.LOG_LEVEL("DISABLE") + # Note: to enable all messages for given methods, lines or + # calls (those without a 'verbose' option to do the same) + # e.g. to debug them, wrap them (for methods, start-to-end + # internally) as follows: cf.LOG_LEVEL('DEBUG') + # + # < ... test code ... > + # cf.log_level('DISABLE') + + def test_zarr_read_write_1(self): + """Test Zarr read/write on example fields.""" + for i, f in enumerate(cf.example_fields()): + if i in (8, 9, 10): + # Can't write UGRID yet + continue + + cf.write(f, tmpdir1, fmt="ZARR3") + z = cf.read(tmpdir1) + self.assertEqual(len(z), 1) + z = z[0] + self.assertTrue(z.equals(f)) + + # Check that the Zarr and netCDF4 encodings are equivalent + cf.write(f, tmpfile1, fmt="NETCDF4") + n = cf.read(tmpfile1)[0] + self.assertTrue(z.equals(n)) + + def test_zarr_read_write_2(self): + """Test Zarr read/write on various netCDF files.""" + for filename in ( + "DSG_timeSeries_contiguous.nc", + "DSG_timeSeries_indexed.nc", + "DSG_timeSeriesProfile_indexed_contiguous.nc", + "gathered.nc", + "geometry_1.nc", + "geometry_2.nc", + "geometry_3.nc", + "geometry_4.nc", + "string_char.nc", + ): + n = cf.read(filename) + cf.write(n, tmpdir1, fmt="ZARR3") + z = cf.read(tmpdir1) + self.assertEqual(len(z), len(n)) + for a, b in zip(z, n): + self.assertTrue(a.equals(b)) + + def test_zarr_read_write_chunks_shards(self): + """Test Zarr read/write with chunks and shards.""" + f = self.f0.copy() + f.data.nc_set_dataset_chunksizes([2, 3]) + + cf.write(f, tmpdir1, fmt="ZARR3") + z = cf.read(tmpdir1)[0] + self.assertTrue(z.equals(f)) + + z = zarr.open(tmpdir1) + self.assertEqual(z["q"].chunks, (2, 3)) + self.assertIsNone(z["q"].shards) + + # Make shards comprising 4 chunks + cf.write(f, tmpdir1, fmt="ZARR3", dataset_shards=4) + z = cf.read(tmpdir1, store_dataset_shards=False)[0] + self.assertTrue(z.equals(f)) + self.assertIsNone(z.data.nc_dataset_shards()) + + z = zarr.open(tmpdir1) + self.assertEqual(z["q"].chunks, (2, 3)) + self.assertEqual(z["q"].shards, (4, 6)) + + for shards in (4, [2, 2]): + f.data.nc_set_dataset_shards(shards) + cf.write(f, tmpdir1, fmt="ZARR3") + z = cf.read(tmpdir1)[0] + self.assertTrue(z.equals(f)) + self.assertEqual(z.data.nc_dataset_shards(), (2, 2)) + + z = zarr.open(tmpdir1) + self.assertEqual(z["q"].chunks, (2, 3)) + self.assertEqual(z["q"].shards, (4, 6)) + + def test_zarr_read_write_CFA(self): + """Test CF aggreagtion in Zarr.""" + f = self.f0 + + cf.write(f, tmpdir1, fmt="ZARR3") + cf.write(f, tmpfile1, fmt="NETCDF4") + + z = cf.read(tmpdir1, cfa_write="field")[0] + n = cf.read(tmpfile1, cfa_write="field")[0] + + self.assertTrue(z.equals(f)) + self.assertTrue(z.equals(n)) + + cf.write(z, tmpdir2, fmt="ZARR3", cfa="field") + cf.write(n, tmpfile2, fmt="NETCDF4", cfa="field") + + z = cf.read(tmpdir2)[0] + n = cf.read(tmpfile2)[0] + + self.assertTrue(z.equals(f)) + self.assertTrue(z.equals(n)) + + def test_zarr_groups_1(self): + """Test for the general handling of Zarr hierarchical groups.""" + f = cf.example_field(1) + + # Add a second grid mapping + datum = cf.Datum(parameters={"earth_radius": 7000000}) + conversion = cf.CoordinateConversion( + parameters={"grid_mapping_name": "latitude_longitude"} + ) + + grid = cf.CoordinateReference( + coordinate_conversion=conversion, + datum=datum, + coordinates=["auxiliarycoordinate0", "auxiliarycoordinate1"], + ) + + f.set_construct(grid) + + grid0 = f.construct("grid_mapping_name:rotated_latitude_longitude") + grid0.del_coordinate("auxiliarycoordinate0") + grid0.del_coordinate("auxiliarycoordinate1") + + grouped_dir = tmpdir1 + grouped_file = tmpfile1 + + # Set some groups + f.nc_set_variable_groups(["forecast", "model"]) + f.construct("grid_latitude").bounds.nc_set_variable_groups( + ["forecast"] + ) + for name in ( + "longitude", # Auxiliary coordinate + "latitude", # Auxiliary coordinate + "long_name=Grid latitude name", # Auxiliary coordinate + "measure:area", # Cell measure + "surface_altitude", # Domain ancillary + "air_temperature standard_error", # Field ancillary + "grid_mapping_name:rotated_latitude_longitude", + "time", # Dimension coordinate + "grid_latitude", # Dimension coordinate + ): + f.construct(name).nc_set_variable_groups(["forecast"]) + + # Check the groups + cf.write(f, grouped_file, fmt="NETCDF4") + cf.write(f, grouped_dir, fmt="ZARR3") + + n = cf.read(grouped_file)[0] + z = cf.read(grouped_dir)[0] + self.assertTrue(z.equals(n)) + self.assertTrue(z.equals(f)) + + # Directly check the groups in the Zarr dataset + x = zarr.open(grouped_dir) + self.assertEqual(list(x.group_keys()), ["forecast"]) + self.assertEqual(list(x["forecast"].group_keys()), ["model"]) + + cf.write(z, tmpdir2, fmt="ZARR3") + z1 = cf.read(tmpdir2)[0] + self.assertTrue(z1.equals(f)) + + def test_zarr_groups_dimension(self): + """Test Zarr groups dimensions.""" + f = self.f0.copy() + + grouped_dir = tmpdir1 + grouped_file = tmpfile1 + + # Set some groups + f.nc_set_variable_groups(["forecast", "model"]) + for construct in f.constructs.filter_by_data().values(): + construct.nc_set_variable_groups(["forecast"]) + + for construct in f.coordinates().values(): + try: + construct.bounds.nc_set_variable_groups(["forecast"]) + except ValueError: + pass + + domain_axis = f.domain_axis("latitude") + domain_axis.nc_set_dimension_groups(["forecast"]) + + # Check the groups + cf.write(f, grouped_file, fmt="NETCDF4") + cf.write(f, grouped_dir, fmt="ZARR3") + + n = cf.read(grouped_file)[0] + z = cf.read(grouped_dir)[0] + self.assertTrue(z.equals(n)) + self.assertTrue(z.equals(f)) + + # Check that grouped netCDF datasets can only be read with + # 'closest_ancestor' + cf.read(grouped_file, group_dimension_search="closest_ancestor") + for gsn in ("furthest_ancestor", "local", "BAD VALUE"): + with self.assertRaises(ValueError): + cf.read(grouped_file, group_dimension_search=gsn) + + def test_zarr_groups_DSG(self): + """Test Zarr groups containing DSGs.""" + f = cf.example_field(4) + + grouped_dir = tmpdir1 + grouped_file = tmpfile1 + + f.compress("indexed_contiguous", inplace=True) + f.data.get_count().nc_set_variable("count") + f.data.get_index().nc_set_variable("index") + + # Set some groups. (Write the read the field first to create + # the compressions variables on disk.) + cf.write(f, tmpfile2) + f = cf.read(tmpfile2)[0] + + # Set some groups + f.nc_set_variable_groups(["forecast", "model"]) + f.data.get_count().nc_set_variable_groups(["forecast"]) + f.data.get_index().nc_set_variable_groups(["forecast"]) + f.construct("altitude").nc_set_variable_groups(["forecast"]) + f.data.get_count().nc_set_sample_dimension_groups(["forecast"]) + + cf.write(f, grouped_file, fmt="NETCDF4") + cf.write(f, grouped_dir, fmt="ZARR3") + + n = cf.read(grouped_file) + z = cf.read(grouped_dir) + + n = n[0] + z = z[0] + self.assertTrue(z.equals(n)) + self.assertTrue(z.equals(f)) + + def test_zarr_groups_geometry(self): + """Test Zarr groups containing cell geometries.""" + f = cf.example_field(6) + + grouped_dir = tmpdir1 + grouped_file = tmpfile1 + + cf.write(f, tmpfile2) + f = cf.read(tmpfile2)[0] + + # Set some groups + f.nc_set_variable_groups(["forecast", "model"]) + f.nc_set_geometry_variable_groups(["forecast"]) + f.coordinate("longitude").bounds.nc_set_variable_groups(["forecast"]) + f.nc_set_component_variable_groups("node_count", ["forecast"]) + f.nc_set_component_variable_groups("part_node_count", ["forecast"]) + f.nc_set_component_variable("interior_ring", "interior_ring") + f.nc_set_component_variable_groups("interior_ring", ["forecast"]) + + # Check the groups + cf.write(f, grouped_file, fmt="NETCDF4") + cf.write(f, grouped_dir, fmt="ZARR3") + + n = cf.read(grouped_file)[0] + z = cf.read(grouped_dir)[0] + self.assertTrue(z.equals(n)) + self.assertTrue(z.equals(f)) + + def test_zarr_read_v2(self): + """Test reading Zarr v2.""" + f2 = cf.read("example_field_0.zarr2") + f3 = cf.read("example_field_0.zarr3") + self.assertEqual(len(f2), len(f3)) + self.assertEqual(len(f2), 1) + self.assertTrue(f2[0].equals(f3[0])) + + +if __name__ == "__main__": + print("Run date:", datetime.datetime.now()) + cf.environment() + print("") + unittest.main(verbosity=2) diff --git a/docs/source/installation.rst b/docs/source/installation.rst index 2d3bd43d4d..6309432127 100644 --- a/docs/source/installation.rst +++ b/docs/source/installation.rst @@ -270,7 +270,7 @@ environments for which these features are not required. * `zarr `_, version 3.1.2 or newer. For reading and writing Zarr datasets. - + .. rubric:: Regridding * `esmpy `_, previously From 276f5645b3ddd106544d1c5a2ee8ec51344af2eb Mon Sep 17 00:00:00 2001 From: David Hassell Date: Mon, 27 Oct 2025 14:01:14 +0000 Subject: [PATCH 03/10] dev --- cf/read_write/um/umread.py | 16 ++++++++-------- docs/source/conf.py | 3 +-- docs/source/recipes/plot_08_recipe.py | 3 +-- docs/source/recipes/plot_12_recipe.py | 2 +- docs/source/recipes/plot_13_recipe.py | 4 +--- docs/source/recipes/plot_17_recipe.py | 2 +- docs/source/recipes/plot_18_recipe.py | 4 ++-- docs/source/recipes/plot_19_recipe.py | 8 ++++++-- docs/source/recipes/plot_22_recipe.py | 5 +++-- docs/source/recipes/plot_23_recipe.py | 8 ++++---- 10 files changed, 28 insertions(+), 27 deletions(-) diff --git a/cf/read_write/um/umread.py b/cf/read_write/um/umread.py index 3289ea88b4..1451578440 100644 --- a/cf/read_write/um/umread.py +++ b/cf/read_write/um/umread.py @@ -3566,7 +3566,7 @@ def read( # Return now if there are valid file types return [] - f = self.file_open(filename, parse=True) + f = self.dataset_open(filename, parse=True) info = is_log_level_info(logger) @@ -3589,7 +3589,7 @@ def read( for var in f.vars ] - self.file_close() + self.dataset_close() return [field for x in um for field in x.fields if field] @@ -3623,7 +3623,7 @@ def _open_um_file( The open PP or FF file object. """ - self.file_close() + self.dataset_close() try: f = File( filename, @@ -3669,15 +3669,15 @@ def is_um_file(self, filename): try: # Note: No need to completely parse the file to ascertain # if it's PP or FF. - self.file_open(filename, parse=False) + self.dataset_open(filename, parse=False) except Exception: - self.file_close() + self.dataset_close() return False else: - self.file_close() + self.dataset_close() return True - def file_close(self): + def dataset_close(self): """Close the file that has been read. :Returns: @@ -3691,7 +3691,7 @@ def file_close(self): self._um_file = None - def file_open(self, filename, parse=True): + def dataset_open(self, filename, parse=True): """Open the file for reading. :Paramters: diff --git a/docs/source/conf.py b/docs/source/conf.py index 239aa988fa..4f45911a65 100755 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -389,7 +389,7 @@ def _get_date(): "run_stale_examples": False, # Below setting can be buggy: see: # https://github.com/sphinx-gallery/sphinx-gallery/issues/967 - #"reference_url": {"cf": None}, + # "reference_url": {"cf": None}, "backreferences_dir": "gen_modules/backreferences", "doc_module": ("cf",), "inspect_global_variables": True, @@ -476,7 +476,6 @@ def _get_date(): import cf - def linkcode_resolve(domain, info): # ================================================================= # Must delete all .doctrees directories in build for changes to be diff --git a/docs/source/recipes/plot_08_recipe.py b/docs/source/recipes/plot_08_recipe.py index 63427f62a7..6045f51448 100644 --- a/docs/source/recipes/plot_08_recipe.py +++ b/docs/source/recipes/plot_08_recipe.py @@ -9,11 +9,10 @@ # 1. Import cf-python, cf-plot, numpy and scipy.stats: import cfplot as cfp -import cf - import numpy as np import scipy.stats as stats +import cf # %% # 2. Three functions are defined: diff --git a/docs/source/recipes/plot_12_recipe.py b/docs/source/recipes/plot_12_recipe.py index b09db0b29f..5304194b19 100644 --- a/docs/source/recipes/plot_12_recipe.py +++ b/docs/source/recipes/plot_12_recipe.py @@ -13,8 +13,8 @@ # %% # 1. Import cf-python, cf-plot and matplotlib.pyplot: -import matplotlib.pyplot as plt import cfplot as cfp +import matplotlib.pyplot as plt import cf diff --git a/docs/source/recipes/plot_13_recipe.py b/docs/source/recipes/plot_13_recipe.py index bf0398713e..9b658597d8 100644 --- a/docs/source/recipes/plot_13_recipe.py +++ b/docs/source/recipes/plot_13_recipe.py @@ -18,13 +18,11 @@ # in next steps. import cartopy.crs as ccrs -import matplotlib.patches as mpatches - import cfplot as cfp +import matplotlib.patches as mpatches import cf - # %% # 2. Read and select the SST by index and look at its contents: sst = cf.read("~/recipes/ERA5_monthly_averaged_SST.nc")[0] diff --git a/docs/source/recipes/plot_17_recipe.py b/docs/source/recipes/plot_17_recipe.py index c94769e2ba..a66c90b518 100644 --- a/docs/source/recipes/plot_17_recipe.py +++ b/docs/source/recipes/plot_17_recipe.py @@ -11,8 +11,8 @@ # %% # 1. Import cf-python and cf-plot: -import matplotlib.pyplot as plt import cfplot as cfp +import matplotlib.pyplot as plt import cf diff --git a/docs/source/recipes/plot_18_recipe.py b/docs/source/recipes/plot_18_recipe.py index f0eae36e35..3beb9d0db9 100644 --- a/docs/source/recipes/plot_18_recipe.py +++ b/docs/source/recipes/plot_18_recipe.py @@ -10,15 +10,15 @@ """ +import cfplot as cfp + # %% # 1. Import cf-python, cf-plot and other required packages: import matplotlib.pyplot as plt import scipy.stats.mstats as mstats -import cfplot as cfp import cf - # %% # 2. Read the data in and unpack the Fields from FieldLists using indexing. # In our example We are investigating the influence of the land height on diff --git a/docs/source/recipes/plot_19_recipe.py b/docs/source/recipes/plot_19_recipe.py index dcc0926fbd..ceb9db1c5c 100644 --- a/docs/source/recipes/plot_19_recipe.py +++ b/docs/source/recipes/plot_19_recipe.py @@ -9,10 +9,11 @@ maxima. """ +import cfplot as cfp + # %% # 1. Import cf-python, cf-plot and other required packages: import matplotlib.pyplot as plt -import cfplot as cfp import cf @@ -55,7 +56,10 @@ # of the maxima, we loop through the season query mapping and do a # "T: mean" collapse setting the season as the grouping: cfp.gopen( - rows=2, columns=1, bottom=0.1, top=0.85, + rows=2, + columns=1, + bottom=0.1, + top=0.85, ) cfp.gpos(1) cfp.gset(xmin="1980-01-01", xmax="2022-12-01", ymin=304, ymax=312) diff --git a/docs/source/recipes/plot_22_recipe.py b/docs/source/recipes/plot_22_recipe.py index 377313c899..fe329cda9d 100644 --- a/docs/source/recipes/plot_22_recipe.py +++ b/docs/source/recipes/plot_22_recipe.py @@ -11,10 +11,11 @@ # %% # 1. Import cf-python, Dask.array, NumPy, and Matplotlib: -import cf import dask.array as da -import numpy as np import matplotlib.pyplot as plt +import numpy as np + +import cf # %% # 2. Read the field constructs and load the wind speed component fields: diff --git a/docs/source/recipes/plot_23_recipe.py b/docs/source/recipes/plot_23_recipe.py index 2499b0d875..29537803af 100644 --- a/docs/source/recipes/plot_23_recipe.py +++ b/docs/source/recipes/plot_23_recipe.py @@ -18,12 +18,12 @@ # sphinx_gallery_thumbnail_number = 2 # sphinx_gallery_end_ignore -import matplotlib.pyplot as plt import cfplot as cfp -import cf - -import numpy as np import dask.array as da +import matplotlib.pyplot as plt +import numpy as np + +import cf # %% # 2. Read example data field constructs, and set region for our plots: From 7a27417c6798c8b4743d26905a0e340e2b52e48a Mon Sep 17 00:00:00 2001 From: David Hassell Date: Thu, 13 Nov 2025 18:50:40 +0000 Subject: [PATCH 04/10] dev --- cf/functions.py | 4 ++-- docs/source/installation.rst | 2 +- setup.py | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/cf/functions.py b/cf/functions.py index f63b037305..daf30b8560 100644 --- a/cf/functions.py +++ b/cf/functions.py @@ -3184,7 +3184,7 @@ def environment(display=True, paths=True): netCDF4: 1.7.2 /home/miniconda3/lib/python3.12/site-packages/netCDF4/__init__.py h5netcdf: 1.3.0 /home/miniconda3/lib/python3.12/site-packages/h5netcdf/__init__.py h5py: 3.12.1 /home/miniconda3/lib/python3.12/site-packages/h5py/__init__.py - zarr: 3.0.8 /home/miniconda3/lib/python3.12/site-packages/zarr/__init__.py + zarr: 3.1.3 /home/miniconda3/lib/python3.12/site-packages/zarr/__init__.py s3fs: 2024.12.0 /home/miniconda3/lib/python3.12/site-packages/s3fs/__init__.py scipy: 1.15.1 /home/miniconda3/lib/python3.12/site-packages/scipy/__init__.py dask: 2025.5.1 /home/miniconda3/lib/python3.12/site-packages/dask/__init__.py @@ -3210,7 +3210,7 @@ def environment(display=True, paths=True): netCDF4: 1.7.2 h5netcdf: 1.3.0 h5py: 3.12.1 - zarr: 3.0.8 + zarr: 3.1.3 s3fs: 2024.12.0 scipy: 1.15.1 dask: 2025.5.1 diff --git a/docs/source/installation.rst b/docs/source/installation.rst index 6309432127..4ec36e0d67 100644 --- a/docs/source/installation.rst +++ b/docs/source/installation.rst @@ -267,7 +267,7 @@ environments for which these features are not required. .. rubric:: Zarr -* `zarr `_, version 3.1.2 or newer. +* `zarr `_, version 3.1.3 or newer. For reading and writing Zarr datasets. diff --git a/setup.py b/setup.py index 35ab6008da..a9bdd9f268 100755 --- a/setup.py +++ b/setup.py @@ -264,7 +264,7 @@ def compile(): "flake8", ], "zarr": [ - "zarr>=3.1.2", + "zarr>=3.1.3", ], } From 4692dbb369b14bf1a5d9865bc5418efa755c23f9 Mon Sep 17 00:00:00 2001 From: David Hassell Date: Thu, 8 Jan 2026 16:32:02 +0000 Subject: [PATCH 05/10] reverted recipe directory --- docs/source/recipes/plot_08_recipe.py | 3 ++- docs/source/recipes/plot_12_recipe.py | 2 +- docs/source/recipes/plot_13_recipe.py | 4 +++- docs/source/recipes/plot_17_recipe.py | 2 +- docs/source/recipes/plot_18_recipe.py | 4 ++-- docs/source/recipes/plot_19_recipe.py | 8 ++------ docs/source/recipes/recipe_list.txt | 8 +------- 7 files changed, 12 insertions(+), 19 deletions(-) diff --git a/docs/source/recipes/plot_08_recipe.py b/docs/source/recipes/plot_08_recipe.py index 6045f51448..63427f62a7 100644 --- a/docs/source/recipes/plot_08_recipe.py +++ b/docs/source/recipes/plot_08_recipe.py @@ -9,10 +9,11 @@ # 1. Import cf-python, cf-plot, numpy and scipy.stats: import cfplot as cfp +import cf + import numpy as np import scipy.stats as stats -import cf # %% # 2. Three functions are defined: diff --git a/docs/source/recipes/plot_12_recipe.py b/docs/source/recipes/plot_12_recipe.py index 5304194b19..b09db0b29f 100644 --- a/docs/source/recipes/plot_12_recipe.py +++ b/docs/source/recipes/plot_12_recipe.py @@ -13,8 +13,8 @@ # %% # 1. Import cf-python, cf-plot and matplotlib.pyplot: -import cfplot as cfp import matplotlib.pyplot as plt +import cfplot as cfp import cf diff --git a/docs/source/recipes/plot_13_recipe.py b/docs/source/recipes/plot_13_recipe.py index 9b658597d8..bf0398713e 100644 --- a/docs/source/recipes/plot_13_recipe.py +++ b/docs/source/recipes/plot_13_recipe.py @@ -18,11 +18,13 @@ # in next steps. import cartopy.crs as ccrs -import cfplot as cfp import matplotlib.patches as mpatches +import cfplot as cfp + import cf + # %% # 2. Read and select the SST by index and look at its contents: sst = cf.read("~/recipes/ERA5_monthly_averaged_SST.nc")[0] diff --git a/docs/source/recipes/plot_17_recipe.py b/docs/source/recipes/plot_17_recipe.py index a66c90b518..c94769e2ba 100644 --- a/docs/source/recipes/plot_17_recipe.py +++ b/docs/source/recipes/plot_17_recipe.py @@ -11,8 +11,8 @@ # %% # 1. Import cf-python and cf-plot: -import cfplot as cfp import matplotlib.pyplot as plt +import cfplot as cfp import cf diff --git a/docs/source/recipes/plot_18_recipe.py b/docs/source/recipes/plot_18_recipe.py index 3beb9d0db9..f0eae36e35 100644 --- a/docs/source/recipes/plot_18_recipe.py +++ b/docs/source/recipes/plot_18_recipe.py @@ -10,15 +10,15 @@ """ -import cfplot as cfp - # %% # 1. Import cf-python, cf-plot and other required packages: import matplotlib.pyplot as plt import scipy.stats.mstats as mstats +import cfplot as cfp import cf + # %% # 2. Read the data in and unpack the Fields from FieldLists using indexing. # In our example We are investigating the influence of the land height on diff --git a/docs/source/recipes/plot_19_recipe.py b/docs/source/recipes/plot_19_recipe.py index ceb9db1c5c..dcc0926fbd 100644 --- a/docs/source/recipes/plot_19_recipe.py +++ b/docs/source/recipes/plot_19_recipe.py @@ -9,11 +9,10 @@ maxima. """ -import cfplot as cfp - # %% # 1. Import cf-python, cf-plot and other required packages: import matplotlib.pyplot as plt +import cfplot as cfp import cf @@ -56,10 +55,7 @@ # of the maxima, we loop through the season query mapping and do a # "T: mean" collapse setting the season as the grouping: cfp.gopen( - rows=2, - columns=1, - bottom=0.1, - top=0.85, + rows=2, columns=1, bottom=0.1, top=0.85, ) cfp.gpos(1) cfp.gset(xmin="1980-01-01", xmax="2022-12-01", ymin=304, ymax=312) diff --git a/docs/source/recipes/recipe_list.txt b/docs/source/recipes/recipe_list.txt index 0a8930811a..3dad79a79c 100644 --- a/docs/source/recipes/recipe_list.txt +++ b/docs/source/recipes/recipe_list.txt @@ -37,10 +37,4 @@ plot_18_recipe.html#sphx-glr-recipes-plot-18-recipe-py plot_19_recipe.html#sphx-glr-recipes-plot-19-recipe-py
plot_20_recipe.html#sphx-glr-recipes-plot-20-recipe-py -
-plot_21_recipe.html#sphx-glr-recipes-plot-21-recipe-py -
-plot_22_recipe.html#sphx-glr-recipes-plot-22-recipe-py -
-plot_23_recipe.html#sphx-glr-recipes-plot-23-recipe-py -
+
\ No newline at end of file From 238fd84ca61fb8de345d51bac85c0a0f57dc1496 Mon Sep 17 00:00:00 2001 From: David Hassell Date: Wed, 14 Jan 2026 13:27:07 +0000 Subject: [PATCH 06/10] Typos Co-authored-by: Sadie L. Bartholomew --- cf/read_write/read.py | 2 +- docs/source/tutorial.rst | 2 +- setup.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/cf/read_write/read.py b/cf/read_write/read.py index 803a4d8e39..52cc132e7e 100644 --- a/cf/read_write/read.py +++ b/cf/read_write/read.py @@ -334,7 +334,7 @@ class read(cfdm.read): {{read group_dimension_search: `str`, optional}} - .. versionadded:: (cfdm) NEXTVERSION + .. versionadded:: NEXTVERSION umversion: deprecated at version 3.0.0 Use the *um* parameter instead. diff --git a/docs/source/tutorial.rst b/docs/source/tutorial.rst index 20cd5c11b6..a696524e8f 100644 --- a/docs/source/tutorial.rst +++ b/docs/source/tutorial.rst @@ -5348,7 +5348,7 @@ By default the output file will be for CF-|version|. The `cf.write` function has optional parameters to -* set the output netCDF format (all netCDF3 and netCDF4 formats, as +* set the output format (all netCDF3 and netCDF4 formats, as well as Zarr v3 are possible); * append to the netCDF file rather than over-writing it by default; diff --git a/setup.py b/setup.py index a9bdd9f268..2ed416976d 100755 --- a/setup.py +++ b/setup.py @@ -184,7 +184,7 @@ def compile(): * create new field constructs in memory, -* write and append field constructs and domain to netCDF and Zarr v3 datasets on disk, +* write and append field and domain constructs to netCDF and Zarr v3 datasets on disk, * read, write, and create coordinates defined by geometry cells, From 734ebfc5c34056084a49aeb492697fa8b71f3616 Mon Sep 17 00:00:00 2001 From: David Hassell Date: Wed, 14 Jan 2026 13:29:11 +0000 Subject: [PATCH 07/10] Remove duplicate info Co-authored-by: Sadie L. Bartholomew --- Changelog.rst | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Changelog.rst b/Changelog.rst index ca1a0b96bd..7b134323e2 100644 --- a/Changelog.rst +++ b/Changelog.rst @@ -3,8 +3,7 @@ Version NEXTVERSION **2026-01-??** -* Write Zarr v3 datasets with `cf.write`, and allow the reading of - grouped Zarr v2 and v3 datasets with `cf.read` +* Write Zarr v3 datasets with `cf.write` (https://github.com/NCAS-CMS/cf-python/issues/895) * Read Zarr v2 and v3 datasets that contain a group hierarchy with `cf.read` (https://github.com/NCAS-CMS/cf-python/issues/894) From 391d8ee2f2849253103711290447c0e1ce037c12 Mon Sep 17 00:00:00 2001 From: David Hassell Date: Wed, 14 Jan 2026 13:30:23 +0000 Subject: [PATCH 08/10] revert bad linting --- docs/source/recipes/plot_19_recipe.py | 5 ++++- docs/source/recipes/plot_22_recipe.py | 5 ++--- docs/source/recipes/plot_23_recipe.py | 8 ++++---- docs/source/recipes/recipe_list.txt | 8 +++++++- 4 files changed, 17 insertions(+), 9 deletions(-) diff --git a/docs/source/recipes/plot_19_recipe.py b/docs/source/recipes/plot_19_recipe.py index dcc0926fbd..02d493dc21 100644 --- a/docs/source/recipes/plot_19_recipe.py +++ b/docs/source/recipes/plot_19_recipe.py @@ -55,7 +55,10 @@ # of the maxima, we loop through the season query mapping and do a # "T: mean" collapse setting the season as the grouping: cfp.gopen( - rows=2, columns=1, bottom=0.1, top=0.85, + rows=2, + columns=1, + bottom=0.1, + top=0.85, ) cfp.gpos(1) cfp.gset(xmin="1980-01-01", xmax="2022-12-01", ymin=304, ymax=312) diff --git a/docs/source/recipes/plot_22_recipe.py b/docs/source/recipes/plot_22_recipe.py index fe329cda9d..377313c899 100644 --- a/docs/source/recipes/plot_22_recipe.py +++ b/docs/source/recipes/plot_22_recipe.py @@ -11,11 +11,10 @@ # %% # 1. Import cf-python, Dask.array, NumPy, and Matplotlib: +import cf import dask.array as da -import matplotlib.pyplot as plt import numpy as np - -import cf +import matplotlib.pyplot as plt # %% # 2. Read the field constructs and load the wind speed component fields: diff --git a/docs/source/recipes/plot_23_recipe.py b/docs/source/recipes/plot_23_recipe.py index 29537803af..2499b0d875 100644 --- a/docs/source/recipes/plot_23_recipe.py +++ b/docs/source/recipes/plot_23_recipe.py @@ -18,13 +18,13 @@ # sphinx_gallery_thumbnail_number = 2 # sphinx_gallery_end_ignore -import cfplot as cfp -import dask.array as da import matplotlib.pyplot as plt -import numpy as np - +import cfplot as cfp import cf +import numpy as np +import dask.array as da + # %% # 2. Read example data field constructs, and set region for our plots: diff --git a/docs/source/recipes/recipe_list.txt b/docs/source/recipes/recipe_list.txt index 3dad79a79c..0a8930811a 100644 --- a/docs/source/recipes/recipe_list.txt +++ b/docs/source/recipes/recipe_list.txt @@ -37,4 +37,10 @@ plot_18_recipe.html#sphx-glr-recipes-plot-18-recipe-py plot_19_recipe.html#sphx-glr-recipes-plot-19-recipe-py
plot_20_recipe.html#sphx-glr-recipes-plot-20-recipe-py -
\ No newline at end of file +
+plot_21_recipe.html#sphx-glr-recipes-plot-21-recipe-py +
+plot_22_recipe.html#sphx-glr-recipes-plot-22-recipe-py +
+plot_23_recipe.html#sphx-glr-recipes-plot-23-recipe-py +
From 8d6ff49c760e90d86503e20018dd388ed837e090 Mon Sep 17 00:00:00 2001 From: David Hassell Date: Wed, 14 Jan 2026 13:54:32 +0000 Subject: [PATCH 09/10] _get_cached_elements -> get_cached_elements --- cf/aggregate.py | 2 +- cf/dimensioncoordinate.py | 4 +- cf/field.py | 2 +- cf/test/test_Data.py | 81 +++++++++++++-------------------------- 4 files changed, 31 insertions(+), 58 deletions(-) diff --git a/cf/aggregate.py b/cf/aggregate.py index ff53f31aa3..149be58d67 100644 --- a/cf/aggregate.py +++ b/cf/aggregate.py @@ -4112,7 +4112,7 @@ def _get_hfl( # Record the bounds of the first and last (sorted) cells first, last = hfl_cache.flb.get(hash_value, (None, None)) if first is None: - cached_elements = d._get_cached_elements() + cached_elements = d.get_cached_elements() x = [] for i in (0, 1, -2, -1): value = cached_elements.get(i) diff --git a/cf/dimensioncoordinate.py b/cf/dimensioncoordinate.py index 644709bbda..c1a061e55f 100644 --- a/cf/dimensioncoordinate.py +++ b/cf/dimensioncoordinate.py @@ -165,7 +165,7 @@ def _infer_direction(self): if data is not None: # Infer the direction from the data if data.size > 1: - c = data._get_cached_elements() + c = data.get_cached_elements() if c: try: return bool(c.get(0) <= c.get(1)) @@ -179,7 +179,7 @@ def _infer_direction(self): data = self.get_bounds_data(None, _fill_value=False) if data is not None: # Infer the direction from the bounds - c = data._get_cached_elements() + c = data.get_cached_elements() if c: try: return bool(c.get(0) <= c.get(1)) diff --git a/cf/field.py b/cf/field.py index 53b889259d..b571158ea9 100644 --- a/cf/field.py +++ b/cf/field.py @@ -6994,7 +6994,7 @@ def collapse( else: b = dim.data - cached_elements = b._get_cached_elements() + cached_elements = b.get_cached_elements() try: # Try to set the new bounds from cached values bounds_data = Data( diff --git a/cf/test/test_Data.py b/cf/test/test_Data.py index 2300a62e47..15d67c894b 100644 --- a/cf/test/test_Data.py +++ b/cf/test/test_Data.py @@ -1165,7 +1165,7 @@ def test_Data_concatenate(self): str(d) str(e) f = cf.Data.concatenate([d, e], axis=1) - cached = f._get_cached_elements() + cached = f.get_cached_elements() self.assertEqual(cached[0], d.first_element()) self.assertEqual(cached[-1], e.last_element()) @@ -1205,7 +1205,7 @@ def test_Data_concatenate(self): repr(e) f = cf.Data.concatenate([d, e], axis=0) self.assertEqual( - f._get_cached_elements(), + f.get_cached_elements(), {0: d.first_element(), -1: e.last_element()}, ) @@ -4378,7 +4378,7 @@ def test_Data_Units(self): d = cf.Data([1000, 2000, 3000], "m") repr(d) d.Units = cf.Units("km") - self.assertEqual(d._get_cached_elements(), {0: 1.0, 1: 2.0, -1: 3.0}) + self.assertEqual(d.get_cached_elements(), {0: 1.0, 1: 2.0, -1: 3.0}) def test_Data_get_data(self): """Test the `get_data` Data method.""" @@ -4446,57 +4446,30 @@ def test_Data__init__datetime(self): self.assertTrue((q == d).array.all()) self.assertTrue((d == q).array.all()) - def test_Data__str__(self): - """Test `Data.__str__`""" - elements0 = (0, -1, 1) - for array in ([1], [1, 2], [1, 2, 3]): - elements = elements0[: len(array)] - - d = cf.Data(array) - cache = d._get_cached_elements() - for element in elements: - self.assertNotIn(element, cache) - - self.assertEqual(str(d), str(array)) - cache = d._get_cached_elements() - for element in elements: - self.assertIn(element, cache) - - d[0] = 1 - cache = d._get_cached_elements() - for element in elements: - self.assertNotIn(element, cache) - - self.assertEqual(str(d), str(array)) - cache = d._get_cached_elements() - for element in elements: - self.assertIn(element, cache) - - d += 0 - cache = d._get_cached_elements() - for element in elements: - self.assertNotIn(element, cache) - - self.assertEqual(str(d), str(array)) - cache = d._get_cached_elements() - for element in elements: - self.assertIn(element, cache) - - # Test when size > 3, i.e. second element is not there. - d = cf.Data([1, 2, 3, 4]) - cache = d._get_cached_elements() - for element in elements0: - self.assertNotIn(element, cache) - - self.assertEqual(str(d), "[1, ..., 4]") - cache = d._get_cached_elements() - self.assertNotIn(1, cache) - for element in elements0[:2]: - self.assertIn(element, cache) - - d[0] = 1 - for element in elements0: - self.assertNotIn(element, d._get_cached_elements()) + def test_Data__repr__str(self): + """Test all means of Data inspection.""" + for d in [ + cf.Data(9, units="km"), + cf.Data([9], units="km"), + cf.Data([[9]], units="km"), + cf.Data([8, 9], units="km"), + cf.Data([[8, 9]], units="km"), + cf.Data([7, 8, 9], units="km"), + cf.Data([[7, 8, 9]], units="km"), + cf.Data([6, 7, 8, 9], units="km"), + cf.Data([[6, 7, 8, 9]], units="km"), + cf.Data([[6, 7], [8, 9]], units="km"), + cf.Data([[6, 7, 8, 9], [6, 7, 8, 9]], units="km"), + ]: + _ = repr(d) + _ = str(d) + + # Test when the data contains date-times with the first + # element masked + dt = np.ma.array([10, 20], mask=[True, False]) + d = cf.Data(dt, units="days since 2000-01-01") + self.assertTrue(str(d) == "[--, 2000-01-21 00:00:00]") + self.assertTrue(repr(d) == "") def test_Data_cull_graph(self): """Test Data.cull_graph.""" From 30ad381dab381a67f96155900f771b650cf59b13 Mon Sep 17 00:00:00 2001 From: David Hassell Date: Wed, 14 Jan 2026 13:54:41 +0000 Subject: [PATCH 10/10] _get_cached_elements -> get_cached_elements --- cf/data/data.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cf/data/data.py b/cf/data/data.py index e906087c93..7b75d16008 100644 --- a/cf/data/data.py +++ b/cf/data/data.py @@ -2761,7 +2761,7 @@ def Units(self, value): self._set_dask(dx, clear=self._ALL ^ self._CACHE ^ self._CFA) # Adjust cached values for the new units - cache = self._get_cached_elements() + cache = self.get_cached_elements() if cache: self._set_cached_elements( {index: cf_func(value) for index, value in cache.items()}