From 52fa993e70f869cdf80a38eb061f785c6c657067 Mon Sep 17 00:00:00 2001
From: Austin Noto-Moniz <anoto-moniz@citrine.io>
Date: Wed, 7 Jan 2026 14:27:14 -0500
Subject: [PATCH] Drop support for CSVDataSource

---
 src/citrine/informatics/data_sources.py | 61 +------------------------
 tests/informatics/test_data_source.py   | 26 +----------
 2 files changed, 3 insertions(+), 84 deletions(-)

diff --git a/src/citrine/informatics/data_sources.py b/src/citrine/informatics/data_sources.py
index 77bed62d6..5b209100f 100644
--- a/src/citrine/informatics/data_sources.py
+++ b/src/citrine/informatics/data_sources.py
@@ -1,19 +1,15 @@
 """Tools for working with Descriptors."""
 from abc import abstractmethod
-from typing import Type, List, Mapping, Optional, Union
+from typing import Type, List, Union
 from uuid import UUID
-from warnings import warn
 
 from citrine._serialization import properties
 from citrine._serialization.polymorphic_serializable import PolymorphicSerializable
 from citrine._serialization.serializable import Serializable
-from citrine.informatics.descriptors import Descriptor
-from citrine.resources.file_link import FileLink
 from citrine.resources.gemtables import GemTable
 
 __all__ = [
     'DataSource',
-    'CSVDataSource',
     'GemTableDataSource',
     'ExperimentDataSourceRef',
     'SnapshotDataSource',
@@ -36,7 +32,7 @@ def __eq__(self, other):
 
     @classmethod
     def _subclass_list(self) -> List[Type[Serializable]]:
-        return [CSVDataSource, GemTableDataSource, ExperimentDataSourceRef, SnapshotDataSource]
+        return [GemTableDataSource, ExperimentDataSourceRef, SnapshotDataSource]
 
     @classmethod
     def get_type(cls, data) -> Type[Serializable]:
@@ -72,59 +68,6 @@ def to_data_source_id(self) -> str:
         """Generate the data_source_id for this DataSource."""
 
 
-class CSVDataSource(Serializable['CSVDataSource'], DataSource):
-    """A data source based on a CSV file stored on the data platform.
-
-    Parameters
-    ----------
-    file_link: FileLink
-        link to the CSV file to read the data from
-    column_definitions: Mapping[str, Descriptor]
-        Map the column headers to the descriptors that will be used to interpret the cell contents
-    identifiers: Optional[List[str]]
-        List of one or more column headers whose values uniquely identify a row. These may overlap
-        with ``column_definitions`` if a column should be used as data and as an identifier,
-        but this is not necessary. Identifiers must be unique within a dataset. No two rows can
-        contain the same value.
-
-    """
-
-    typ = properties.String('type', default='csv_data_source', deserializable=False)
-    file_link = properties.Object(FileLink, "file_link")
-    column_definitions = properties.Mapping(
-        properties.String, properties.Object(Descriptor), "column_definitions")
-    identifiers = properties.Optional(properties.List(properties.String), "identifiers")
-
-    _data_source_type = "csv"
-
-    def __init__(self,
-                 *,
-                 file_link: FileLink,
-                 column_definitions: Mapping[str, Descriptor],
-                 identifiers: Optional[List[str]] = None):
-        warn("CSVDataSource is deprecated as of 3.28.0 and will be removed in 4.0.0. Please use "
-             "another type of data source, such as GemTableDataSource.",
-             category=DeprecationWarning)
-        self.file_link = file_link
-        self.column_definitions = column_definitions
-        self.identifiers = identifiers
-
-    @classmethod
-    def _data_source_id_builder(cls, *args) -> DataSource:
-        # TODO Figure out how to populate the column definitions
-        warn("A CSVDataSource was derived from a data_source_id "
-             "but is missing its column_definitions and identities",
-             UserWarning)
-        return CSVDataSource(
-            file_link=FileLink(url=args[0], filename=args[1]),
-            column_definitions={}
-        )
-
-    def to_data_source_id(self) -> str:
-        """Generate the data_source_id for this DataSource."""
-        return f"{self._data_source_type}::{self.file_link.url}::{self.file_link.filename}"
-
-
 class GemTableDataSource(Serializable['GemTableDataSource'], DataSource):
     """A data source based on a GEM Table hosted on the data platform.
 
diff --git a/tests/informatics/test_data_source.py b/tests/informatics/test_data_source.py
index b1b4e2a06..7ca003371 100644
--- a/tests/informatics/test_data_source.py
+++ b/tests/informatics/test_data_source.py
@@ -4,7 +4,7 @@
 import pytest
 
 from citrine.informatics.data_sources import (
-    DataSource, CSVDataSource, ExperimentDataSourceRef, GemTableDataSource, SnapshotDataSource
+    DataSource, ExperimentDataSourceRef, GemTableDataSource, SnapshotDataSource
 )
 from citrine.informatics.descriptors import RealDescriptor
 from citrine.resources.file_link import FileLink
@@ -54,27 +54,3 @@ def test_from_gem_table():
 def test_invalid_data_source_id():
     with pytest.raises(ValueError):
         DataSource.from_data_source_id(f"Undefined::{uuid.uuid4()}")
-
-
-def test_deser_from_parent_deprecated():
-    with pytest.deprecated_call():
-        data_source = CSVDataSource(file_link=FileLink("foo.spam", "http://example.com"),
-                                    column_definitions={"spam": RealDescriptor("eggs", lower_bound=0, upper_bound=1.0, units="")},
-                                    identifiers=["identifier"])
-
-    # Serialize and deserialize the descriptors, making sure they are round-trip serializable
-    data = data_source.dump()
-    data_source_deserialized = DataSource.build(data)
-    assert data_source == data_source_deserialized
-
-def test_data_source_id_deprecated():
-    with pytest.deprecated_call():
-        data_source = CSVDataSource(file_link=FileLink("foo.spam", "http://example.com"),
-                                    column_definitions={"spam": RealDescriptor("eggs", lower_bound=0, upper_bound=1.0, units="")},
-                                    identifiers=["identifier"])
-    
-    # TODO: There's no obvious way to recover the column_definitions & identifiers from the ID
-    with pytest.deprecated_call():
-        with pytest.warns(UserWarning):
-            transformed = DataSource.from_data_source_id(data_source.to_data_source_id())
-    assert transformed.file_link == data_source.file_link