From c1d8312a2353a6db3b415a54d758ea8c973c3251 Mon Sep 17 00:00:00 2001 From: Luca Marconato Date: Mon, 15 Dec 2025 15:46:46 +0100 Subject: [PATCH 01/10] less dask tests; fix preprelease test --- .github/workflows/test.yaml | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index ec753d95..89582031 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -20,17 +20,16 @@ jobs: matrix: include: - {os: windows-latest, python: "3.11", dask-version: "2025.2.0", name: "Dask 2025.2.0"} - - {os: windows-latest, python: "3.11", dask-version: "latest", name: "Dask latest"} - - {os: ubuntu-latest, python: "3.11", dask-version: "2025.2.0", name: "Dask 2025.2.0"} + - {os: windows-latest, python: "3.13", dask-version: "latest", name: "Dask latest"} - {os: ubuntu-latest, python: "3.11", dask-version: "latest", name: "Dask latest"} - {os: ubuntu-latest, python: "3.13", dask-version: "latest", name: "Dask latest"} - - {os: macos-latest, python: "3.11", dask-version: "2025.2.0", name: "Dask 2025.2.0"} - {os: macos-latest, python: "3.11", dask-version: "latest", name: "Dask latest"} - - {os: macos-latest, python: "3.12", pip-flags: "--pre", name: "Python 3.12 (pre-release)"} + - {os: macos-latest, python: "3.13", prerelease: "allow", name: "Python 3.13 (pre-release)"} env: OS: ${{ matrix.os }} PYTHON: ${{ matrix.python }} DASK_VERSION: ${{ matrix.dask-version }} + PRERELEASE: ${{ matrix.prerelease }} steps: - uses: actions/checkout@v2 @@ -41,7 +40,11 @@ jobs: python-version: ${{ matrix.python }} - name: Install dependencies run: | - uv sync --extra test + if [[ -n "${PRERELEASE}" ]]; then + uv sync --extra test --prerelease ${PRERELEASE} + else + uv sync --extra test + fi if [[ -n "${DASK_VERSION}" ]]; then if [[ "${DASK_VERSION}" == "latest" ]]; then uv pip install --upgrade dask From 6be84a41ffe93ac3d7cb9ff10ebf411b111c2231 Mon Sep 17 00:00:00 2001 From: Luca Marconato Date: Tue, 16 Dec 2025 12:35:52 +0100 Subject: [PATCH 02/10] include latest geopandas+anndata in ci; fix more tests --- .github/workflows/test.yaml | 2 +- pyproject.toml | 4 ++++ src/spatialdata/_core/operations/rasterize_bins.py | 2 +- src/spatialdata/_io/io_shapes.py | 7 +++++++ 4 files changed, 13 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 89582031..545ee65e 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -41,7 +41,7 @@ jobs: - name: Install dependencies run: | if [[ -n "${PRERELEASE}" ]]; then - uv sync --extra test --prerelease ${PRERELEASE} + uv sync --extra test --extra simulate-prerelease --prerelease ${PRERELEASE} else uv sync --extra test fi diff --git a/pyproject.toml b/pyproject.toml index d61d6a2c..fd75adfa 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -56,6 +56,10 @@ dev = [ "bump2version", "sentry-prevent-cli", ] +simulate-prerelease = [ + "geopandas @ git+https://github.com/geopandas/geopandas.git@main", + "anndata @ git+https://github.com/scverse/anndata.git@main", +] test = [ "pytest", "pytest-cov", diff --git a/src/spatialdata/_core/operations/rasterize_bins.py b/src/spatialdata/_core/operations/rasterize_bins.py index 17470812..f61d1758 100644 --- a/src/spatialdata/_core/operations/rasterize_bins.py +++ b/src/spatialdata/_core/operations/rasterize_bins.py @@ -246,7 +246,7 @@ def _get_relabeled_column_name(column_name: str) -> str: def _relabel_labels(table: AnnData, instance_key: str) -> pd.Series: labels_values_count = len(table.obs[instance_key].unique()) - is_not_numeric = not np.issubdtype(table.obs[instance_key].dtype, np.number) + is_not_numeric = not pd.api.types.is_numeric_dtype(table.obs[instance_key].dtype) zero_in_instance_key = 0 in table.obs[instance_key].values has_gaps = not is_not_numeric and labels_values_count != table.obs[instance_key].max() + int(zero_in_instance_key) diff --git a/src/spatialdata/_io/io_shapes.py b/src/spatialdata/_io/io_shapes.py index f204e387..fa6bf81e 100644 --- a/src/spatialdata/_io/io_shapes.py +++ b/src/spatialdata/_io/io_shapes.py @@ -149,9 +149,16 @@ def _write_shapes_v02_v03(shapes: GeoDataFrame, group: zarr.Group, element_forma element_format The format of the shapes element used to store it. """ + from spatialdata.models._utils import TRANSFORM_KEY + store_root = group.store_path.store.root path = store_root / group.path / "shapes.parquet" + + # Temporarily remove transformations from attrs to avoid serialization issues + transforms = shapes.attrs[TRANSFORM_KEY] + del shapes.attrs[TRANSFORM_KEY] shapes.to_parquet(path) + shapes.attrs[TRANSFORM_KEY] = transforms attrs = element_format.attrs_to_dict(shapes.attrs) attrs["version"] = element_format.spatialdata_format_version From d221bf2fd40aa6288270cf0bfc1ef153d14dbbc7 Mon Sep 17 00:00:00 2001 From: Luca Marconato Date: Tue, 16 Dec 2025 12:41:49 +0100 Subject: [PATCH 03/10] ci job with dep group instead of extra --- .github/workflows/test.yaml | 2 +- pyproject.toml | 10 ++++++---- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 545ee65e..cb1976bd 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -41,7 +41,7 @@ jobs: - name: Install dependencies run: | if [[ -n "${PRERELEASE}" ]]; then - uv sync --extra test --extra simulate-prerelease --prerelease ${PRERELEASE} + uv sync --extra test --group simulate-prerelease --prerelease ${PRERELEASE} else uv sync --extra test fi diff --git a/pyproject.toml b/pyproject.toml index fd75adfa..468be154 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -56,10 +56,6 @@ dev = [ "bump2version", "sentry-prevent-cli", ] -simulate-prerelease = [ - "geopandas @ git+https://github.com/geopandas/geopandas.git@main", - "anndata @ git+https://github.com/scverse/anndata.git@main", -] test = [ "pytest", "pytest-cov", @@ -91,6 +87,12 @@ extra = [ "spatialdata-io", ] +[dependency-groups] +simulate-prerelease = [ + "geopandas @ git+https://github.com/geopandas/geopandas.git@main", + "anndata @ git+https://github.com/scverse/anndata.git@main", +] + [tool.coverage.run] source = ["spatialdata"] omit = [ From ea00d7702a103d93bd6f79bc9f9ec896d599d6f6 Mon Sep 17 00:00:00 2001 From: Luca Marconato Date: Tue, 16 Dec 2025 12:48:01 +0100 Subject: [PATCH 04/10] fix pyproject.toml --- pyproject.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 468be154..46b230b2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -89,8 +89,8 @@ extra = [ [dependency-groups] simulate-prerelease = [ - "geopandas @ git+https://github.com/geopandas/geopandas.git@main", - "anndata @ git+https://github.com/scverse/anndata.git@main", + "geopandas @ git+https://github.com/geopandas/geopandas.git@main ; python_version >= '3.12'", + "anndata @ git+https://github.com/scverse/anndata.git@main ; python_version >= '3.12'", ] [tool.coverage.run] From 4790eba6f7363129834edb81f3898bca1a6e13f2 Mon Sep 17 00:00:00 2001 From: Luca Marconato Date: Tue, 16 Dec 2025 12:49:17 +0100 Subject: [PATCH 05/10] remove default dep group --- pyproject.toml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index 46b230b2..3e75b3e5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -93,6 +93,9 @@ simulate-prerelease = [ "anndata @ git+https://github.com/scverse/anndata.git@main ; python_version >= '3.12'", ] +[tool.uv] +default-groups = [] + [tool.coverage.run] source = ["spatialdata"] omit = [ From 08fbaa9b9c5bb1e5784c927ad0626c4ab5158394 Mon Sep 17 00:00:00 2001 From: Luca Marconato Date: Tue, 16 Dec 2025 12:52:42 +0100 Subject: [PATCH 06/10] min python 3.12 --- .github/workflows/test.yaml | 6 +++--- pyproject.toml | 9 +++------ 2 files changed, 6 insertions(+), 9 deletions(-) diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index cb1976bd..61931001 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -19,11 +19,11 @@ jobs: fail-fast: false matrix: include: - - {os: windows-latest, python: "3.11", dask-version: "2025.2.0", name: "Dask 2025.2.0"} + - {os: windows-latest, python: "3.12", dask-version: "2025.2.0", name: "Dask 2025.2.0"} - {os: windows-latest, python: "3.13", dask-version: "latest", name: "Dask latest"} - - {os: ubuntu-latest, python: "3.11", dask-version: "latest", name: "Dask latest"} + - {os: ubuntu-latest, python: "3.12", dask-version: "latest", name: "Dask latest"} - {os: ubuntu-latest, python: "3.13", dask-version: "latest", name: "Dask latest"} - - {os: macos-latest, python: "3.11", dask-version: "latest", name: "Dask latest"} + - {os: macos-latest, python: "3.12", dask-version: "latest", name: "Dask latest"} - {os: macos-latest, python: "3.13", prerelease: "allow", name: "Python 3.13 (pre-release)"} env: OS: ${{ matrix.os }} diff --git a/pyproject.toml b/pyproject.toml index 3e75b3e5..7a1dd8f1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -15,7 +15,7 @@ maintainers = [ urls.Documentation = "https://spatialdata.scverse.org/en/latest" urls.Source = "https://github.com/scverse/spatialdata.git" urls.Home-page = "https://github.com/scverse/spatialdata.git" -requires-python = ">=3.11" +requires-python = ">=3.12" dynamic= [ "version" # allow version to be set by git tags ] @@ -89,13 +89,10 @@ extra = [ [dependency-groups] simulate-prerelease = [ - "geopandas @ git+https://github.com/geopandas/geopandas.git@main ; python_version >= '3.12'", - "anndata @ git+https://github.com/scverse/anndata.git@main ; python_version >= '3.12'", + "geopandas @ git+https://github.com/geopandas/geopandas.git@main", + "anndata @ git+https://github.com/scverse/anndata.git@main", ] -[tool.uv] -default-groups = [] - [tool.coverage.run] source = ["spatialdata"] omit = [ From 96caf6488ed122132e7d5d5d9ca59b1d42afd4dd Mon Sep 17 00:00:00 2001 From: Luca Marconato Date: Tue, 16 Dec 2025 12:55:05 +0100 Subject: [PATCH 07/10] fix readthedocs python version --- .readthedocs.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.readthedocs.yaml b/.readthedocs.yaml index acecf90e..1b41715d 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -3,7 +3,7 @@ version: 2 build: os: ubuntu-20.04 tools: - python: "3.11" + python: "3.12" sphinx: configuration: docs/conf.py fail_on_warning: true From 9127b9d020a05404c484572996c11d5c74650fb6 Mon Sep 17 00:00:00 2001 From: Luca Marconato Date: Tue, 16 Dec 2025 14:54:23 +0100 Subject: [PATCH 08/10] only 1 test failing --- pyproject.toml | 2 ++ tests/io/test_partial_read.py | 46 ++++++++++++----------------------- 2 files changed, 18 insertions(+), 30 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 7a1dd8f1..20bde6dc 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -89,6 +89,8 @@ extra = [ [dependency-groups] simulate-prerelease = [ + # also use --prerelease allow, this is the full call: + # uv sync --extra test --group simulate-prerelease --prerelease allow --upgrade "geopandas @ git+https://github.com/geopandas/geopandas.git@main", "anndata @ git+https://github.com/scverse/anndata.git@main", ] diff --git a/tests/io/test_partial_read.py b/tests/io/test_partial_read.py index e200c1fa..7c5d4784 100644 --- a/tests/io/test_partial_read.py +++ b/tests/io/test_partial_read.py @@ -11,9 +11,9 @@ from pathlib import Path from typing import TYPE_CHECKING +import anndata import py import pytest -import zarr from pyarrow import ArrowInvalid from zarr.errors import ArrayNotFoundError, ZarrUserWarning @@ -397,21 +397,24 @@ def sdata_with_invalid_zarr_json_element_violating_spec(session_tmp_path: Path) ) -@pytest.fixture(scope="module") -def sdata_with_table_region_not_found_zarrv3(session_tmp_path: Path) -> PartialReadTestCase: +def _create_sdata_with_table_region_not_found(session_tmp_path: Path, zarr_version: int) -> PartialReadTestCase: + """Helper for table region not found test cases (zarr v2 and v3).""" # table/table/.zarr referring to a region that is not found # This has been emitting just a warning, but does not fail reading the table element. sdata = blobs() - sdata_path = session_tmp_path / "sdata_with_invalid_table_region_not_found_zarrv3.zarr" - sdata.write(sdata_path) + sdata_path = session_tmp_path / f"sdata_with_table_region_not_found_zarrv{zarr_version}.zarr" + if zarr_version == 2: + sdata.write(sdata_path, sdata_formats=SpatialDataContainerFormatV01()) + else: + sdata.write(sdata_path) corrupted = "blobs_labels" # The element data is missing sdata.delete_element_from_disk(corrupted) # But the labels element is referenced as a region in a table - regions = zarr.open_group(sdata_path / "tables" / "table" / "obs" / "region", mode="r") - arrs = dict(regions.arrays()) - assert corrupted in arrs["categories"][arrs["codes"]] + adata = anndata.read_zarr(sdata_path / "tables" / "table") + assert corrupted in adata.obs["region"].values + not_corrupted = [name for _, name, _ in sdata.gen_elements() if name != corrupted] return PartialReadTestCase( @@ -425,30 +428,13 @@ def sdata_with_table_region_not_found_zarrv3(session_tmp_path: Path) -> PartialR @pytest.fixture(scope="module") -def sdata_with_table_region_not_found_zarrv2(session_tmp_path: Path) -> PartialReadTestCase: - # table/table/.zarr referring to a region that is not found - # This has been emitting just a warning, but does not fail reading the table element. - sdata = blobs() - sdata_path = session_tmp_path / "sdata_with_invalid_zattrs_table_region_not_found.zarr" - sdata.write(sdata_path, sdata_formats=SpatialDataContainerFormatV01()) +def sdata_with_table_region_not_found_zarrv3(session_tmp_path: Path) -> PartialReadTestCase: + return _create_sdata_with_table_region_not_found(session_tmp_path, zarr_version=3) - corrupted = "blobs_labels" - # The element data is missing - sdata.delete_element_from_disk(corrupted) - # But the labels element is referenced as a region in a table - regions = zarr.open_group(sdata_path / "tables" / "table" / "obs" / "region", mode="r") - arrs = dict(regions.arrays()) - assert corrupted in arrs["categories"][arrs["codes"]] - not_corrupted = [name for _, name, _ in sdata.gen_elements() if name != corrupted] - return PartialReadTestCase( - path=sdata_path, - expected_elements=not_corrupted, - expected_exceptions=(), - warnings_patterns=[ - rf"The table is annotating '{re.escape(corrupted)}', which is not present in the SpatialData object" - ], - ) +@pytest.fixture(scope="module") +def sdata_with_table_region_not_found_zarrv2(session_tmp_path: Path) -> PartialReadTestCase: + return _create_sdata_with_table_region_not_found(session_tmp_path, zarr_version=2) @pytest.mark.parametrize( From 885493848276bfda0022dc19fa34ee132488719c Mon Sep 17 00:00:00 2001 From: Luca Marconato Date: Tue, 16 Dec 2025 15:11:40 +0100 Subject: [PATCH 09/10] fix all tests --- pyproject.toml | 1 + src/spatialdata/_io/io_shapes.py | 4 +++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 20bde6dc..2f5cb4b0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -91,6 +91,7 @@ extra = [ simulate-prerelease = [ # also use --prerelease allow, this is the full call: # uv sync --extra test --group simulate-prerelease --prerelease allow --upgrade + # uv run --no-sync python "geopandas @ git+https://github.com/geopandas/geopandas.git@main", "anndata @ git+https://github.com/scverse/anndata.git@main", ] diff --git a/src/spatialdata/_io/io_shapes.py b/src/spatialdata/_io/io_shapes.py index fa6bf81e..8e6d4a60 100644 --- a/src/spatialdata/_io/io_shapes.py +++ b/src/spatialdata/_io/io_shapes.py @@ -121,7 +121,9 @@ def _write_shapes_v01(shapes: GeoDataFrame, group: zarr.Group, element_format: F """ import numcodecs - geometry, coords, offsets = to_ragged_array(shapes.geometry) + # np.array() creates a writable copy, needed for pandas 3.0 CoW compatibility + # https://github.com/geopandas/geopandas/issues/3697 + geometry, coords, offsets = to_ragged_array(np.array(shapes.geometry)) group.create_array(name="coords", data=coords) for i, o in enumerate(offsets): group.create_array(name=f"offset{i}", data=o) From 1768e7ef7fc0c3bdc55feb72fd202b10df773588 Mon Sep 17 00:00:00 2001 From: Luca Marconato Date: Wed, 17 Dec 2025 17:39:27 +0100 Subject: [PATCH 10/10] fix models dtype --- src/spatialdata/models/models.py | 31 ++++++++++++++++++++----------- 1 file changed, 20 insertions(+), 11 deletions(-) diff --git a/src/spatialdata/models/models.py b/src/spatialdata/models/models.py index bed33ff1..5428c5c2 100644 --- a/src/spatialdata/models/models.py +++ b/src/spatialdata/models/models.py @@ -668,7 +668,11 @@ def validate(cls, data: DaskDataFrame) -> None: if ATTRS_KEY in data.attrs and "feature_key" in data.attrs[ATTRS_KEY]: feature_key = data.attrs[ATTRS_KEY][cls.FEATURE_KEY] if feature_key not in data.columns: - warnings.warn(f"Column `{feature_key}` not found." + SUGGESTION, UserWarning, stacklevel=2) + warnings.warn( + f"Column `{feature_key}` not found." + SUGGESTION, + UserWarning, + stacklevel=2, + ) @singledispatchmethod @classmethod @@ -1028,16 +1032,21 @@ def _validate_table_annotation_metadata(self, data: AnnData) -> None: raise ValueError(f"`{attr[self.REGION_KEY_KEY]}` not found in `adata.obs`. Please create the column.") if attr[self.INSTANCE_KEY] not in data.obs: raise ValueError(f"`{attr[self.INSTANCE_KEY]}` not found in `adata.obs`. Please create the column.") - if (dtype := data.obs[attr[self.INSTANCE_KEY]].dtype) not in [ - int, - np.int16, - np.uint16, - np.int32, - np.uint32, - np.int64, - np.uint64, - "O", - ] or (dtype == "O" and (val_dtype := type(data.obs[attr[self.INSTANCE_KEY]].iloc[0])) is not str): + if ( + (dtype := data.obs[attr[self.INSTANCE_KEY]].dtype) + not in [ + int, + np.int16, + np.uint16, + np.int32, + np.uint32, + np.int64, + np.uint64, + "O", + ] + and not pd.api.types.is_string_dtype(data.obs[attr[self.INSTANCE_KEY]]) + or (dtype == "O" and (val_dtype := type(data.obs[attr[self.INSTANCE_KEY]].iloc[0])) is not str) + ): dtype = dtype if dtype != "O" else val_dtype raise TypeError( f"Only int, np.int16, np.int32, np.int64, uint equivalents or string allowed as dtype for "