Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 7 additions & 3 deletions fairscape_models/activity.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from pydantic import BaseModel, Field, ConfigDict
from typing import Optional, List
from pydantic import BaseModel, Field, ConfigDict, model_validator
from typing import Optional, List, Union

from fairscape_models.fairscape_base import IdentifierValue

Expand All @@ -13,4 +13,8 @@ class Activity(BaseModel):
generated: Optional[List[IdentifierValue]] = Field(default=[])
isPartOf: Optional[List[IdentifierValue]] = Field(default=[])

model_config = ConfigDict(extra="allow")
# PROV-O fields (auto-populated)
used: Optional[List[Union[str, IdentifierValue]]] = Field(default=[], alias="prov:used")
wasAssociatedWith: Optional[List[Union[str, IdentifierValue]]] = Field(default=[], alias="prov:wasAssociatedWith")

model_config = ConfigDict(extra="allow", populate_by_name=True)
21 changes: 18 additions & 3 deletions fairscape_models/annotation.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,27 @@
from pydantic import Field, ConfigDict
from typing import Optional, List
from pydantic import Field, ConfigDict, model_validator
from typing import Optional, List, Union

from fairscape_models.fairscape_base import IdentifierValue, ANNOTATION_TYPE
from fairscape_models.activity import Activity

class Annotation(Activity):
metadataType: Optional[str] = Field(default="https://w3id.org/EVI#Annotation", alias="@type")
additionalType: Optional[str] = Field(default=ANNOTATION_TYPE)
createdBy: str
createdBy: Union[str, IdentifierValue]
dateCreated: str
usedDataset: Optional[List[IdentifierValue]] = Field(default=[])

@model_validator(mode='after')
def populate_prov_fields(self):
"""Auto-populate PROV-O fields from EVI fields"""
# Map usedDataset to prov:used (preserving their types)
if self.usedDataset:
self.used = self.usedDataset
else:
self.used = []

# Map createdBy to prov:wasAssociatedWith (preserve type: str or IdentifierValue)
if self.createdBy:
self.wasAssociatedWith = [self.createdBy]

return self
22 changes: 20 additions & 2 deletions fairscape_models/computation.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from pydantic import Field, ConfigDict
from pydantic import Field, ConfigDict, model_validator
from typing import Optional, List, Union

from fairscape_models.fairscape_base import IdentifierValue, COMPUTATION_TYPE
Expand All @@ -7,10 +7,28 @@
class Computation(Activity):
metadataType: Optional[str] = Field(default="https://w3id.org/EVI#Computation", alias="@type")
additionalType: Optional[str] = Field(default=COMPUTATION_TYPE)
runBy: str
runBy: Union[str, IdentifierValue]
dateCreated: str
additionalDocumentation: Optional[str] = Field(default=None)
command: Optional[Union[List[str], str]] = Field(default=None)
usedSoftware: Optional[List[IdentifierValue]] = Field(default=[])
usedMLModel: Optional[List[IdentifierValue]] = Field(default=[])
usedDataset: Optional[List[IdentifierValue]] = Field(default=[])

@model_validator(mode='after')
def populate_prov_fields(self):
"""Auto-populate PROV-O fields from EVI fields"""
# Aggregate all inputs into prov:used
used_items = []
if self.usedSoftware:
used_items.extend(self.usedSoftware)
if self.usedMLModel:
used_items.extend(self.usedMLModel)
if self.usedDataset:
used_items.extend(self.usedDataset)
self.used = used_items

if self.runBy:
self.wasAssociatedWith = [self.runBy]

return self
30 changes: 28 additions & 2 deletions fairscape_models/dataset.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from pydantic import Field, ConfigDict, AliasChoices
from pydantic import Field, ConfigDict, AliasChoices, model_validator
from typing import Optional, List, Union

from fairscape_models.fairscape_base import IdentifierValue, DATASET_TYPE
Expand All @@ -16,4 +16,30 @@ class Dataset(DigitalObject):
default=None
)
generatedBy: Optional[Union[IdentifierValue, List[IdentifierValue]]] = Field(default=[])
derivedFrom: Optional[List[IdentifierValue]] = Field(default=[])
derivedFrom: Optional[List[IdentifierValue]] = Field(default=[])

@model_validator(mode='after')
def populate_prov_fields(self):
"""Auto-populate PROV-O fields from EVI fields"""
# Map generatedBy → prov:wasGeneratedBy
if self.generatedBy:
if isinstance(self.generatedBy, list):
self.wasGeneratedBy = self.generatedBy
else:
self.wasGeneratedBy = [self.generatedBy]
else:
self.wasGeneratedBy = []

# Map derivedFrom → prov:wasDerivedFrom
self.wasDerivedFrom = self.derivedFrom or []

# Map author
if self.author:
if isinstance(self.author, str):
self.wasAttributedTo = [IdentifierValue(**{"@id": self.author})]
elif isinstance(self.author, list):
self.wasAttributedTo = [IdentifierValue(**{"@id": a}) for a in self.author]
else:
self.wasAttributedTo = []

return self
11 changes: 8 additions & 3 deletions fairscape_models/digital_object.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from pydantic import BaseModel, Field, ConfigDict
from pydantic import BaseModel, Field, ConfigDict, model_validator
from typing import Optional, List, Union

from fairscape_models.fairscape_base import IdentifierValue
Expand All @@ -8,7 +8,7 @@ class DigitalObject(BaseModel):
guid: str = Field(alias="@id")
name: str
metadataType: Optional[str] = Field(default=None, alias="@type")
author: Union[str, List[str]]
author: Union[str, IdentifierValue, List[Union[str, IdentifierValue]]]
description: str = Field(min_length=10)
version: str = Field(default="0.1.0")
associatedPublication: Optional[Union[str, List[str]]] = Field(default=None)
Expand All @@ -17,4 +17,9 @@ class DigitalObject(BaseModel):
isPartOf: Optional[List[IdentifierValue]] = Field(default=[])
usedByComputation: Optional[List[IdentifierValue]] = Field(default=[])

model_config = ConfigDict(extra="allow")
# PROV-O fields (auto-populated)
wasGeneratedBy: Optional[List[Union[str, IdentifierValue]]] = Field(default=[], alias="prov:wasGeneratedBy")
wasDerivedFrom: Optional[List[Union[str, IdentifierValue]]] = Field(default=[], alias="prov:wasDerivedFrom")
wasAttributedTo: Optional[List[Union[str, IdentifierValue]]] = Field(default=[], alias="prov:wasAttributedTo")

model_config = ConfigDict(extra="allow", populate_by_name=True)
28 changes: 24 additions & 4 deletions fairscape_models/experiment.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,35 @@
from pydantic import Field, ConfigDict
from typing import Optional, List
from pydantic import Field, ConfigDict, model_validator
from typing import Optional, List, Union
from fairscape_models.fairscape_base import IdentifierValue
from fairscape_models.activity import Activity

class Experiment(Activity):
metadataType: Optional[str] = Field(default="https://w3id.org/EVI#Experiment", alias="@type")
experimentType: str
runBy: str
runBy: Union[str, IdentifierValue]
datePerformed: str
protocol: Optional[str] = Field(default=None)
usedInstrument: Optional[List[IdentifierValue]] = Field(default=[])
usedSample: Optional[List[IdentifierValue]] = Field(default=[])
usedTreatment: Optional[List[IdentifierValue]] = Field(default=[])
usedStain: Optional[List[IdentifierValue]] = Field(default=[])
usedStain: Optional[List[IdentifierValue]] = Field(default=[])

@model_validator(mode='after')
def populate_prov_fields(self):
"""Auto-populate PROV-O fields from EVI fields"""
# Aggregate all inputs into prov:used
used_items = []
if self.usedInstrument:
used_items.extend(self.usedInstrument)
if self.usedSample:
used_items.extend(self.usedSample)
if self.usedTreatment:
used_items.extend(self.usedTreatment)
if self.usedStain:
used_items.extend(self.usedStain)
self.used = used_items

if self.runBy:
self.wasAssociatedWith = [self.runBy]

return self
1 change: 1 addition & 0 deletions fairscape_models/fairscape_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
"@vocab": "https://schema.org/",
"evi": "https://w3id.org/EVI#",
"rai": "http://mlcommons.org/croissant/RAI/",
"prov": "http://www.w3.org/ns/prov#",

# TODO fully specify default context
"usedSoftware": {
Expand Down
31 changes: 30 additions & 1 deletion fairscape_models/mlmodel.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from pydantic import Field, ConfigDict
from pydantic import Field, ConfigDict, model_validator
from typing import Optional, List, Union

from fairscape_models.fairscape_base import IdentifierValue, MLMODEL_TYPE
Expand All @@ -12,3 +12,32 @@ class MLModel(DigitalObject):
modelTask: Optional[str] = Field(default=None)
modelArchitecture: Optional[str] = Field(default=None)
trainedOn: Optional[List[IdentifierValue]] = Field(default=[])
generatedBy: Optional[Union[IdentifierValue, List[IdentifierValue]]] = Field(default=[])
derivedFrom: Optional[List[IdentifierValue]] = Field(default=[])

@model_validator(mode='after')
def populate_prov_fields(self):
"""Auto-populate PROV-O fields from EVI fields"""

# Map generatedBy → prov:wasGeneratedBy
if self.generatedBy:
if isinstance(self.generatedBy, list):
self.wasGeneratedBy = self.generatedBy
else:
self.wasGeneratedBy = [self.generatedBy]
else:
self.wasGeneratedBy = []

# Map derivedFrom → prov:wasDerivedFrom
self.wasDerivedFrom = self.derivedFrom or []

# Map author → prov:wasAttributedTo
if self.author:
if isinstance(self.author, str):
self.wasAttributedTo = [IdentifierValue(**{"@id": self.author})]
elif isinstance(self.author, list):
self.wasAttributedTo = [IdentifierValue(**{"@id": a}) for a in self.author]
else:
self.wasAttributedTo = []

return self
23 changes: 23 additions & 0 deletions fairscape_models/rocrate.py
Original file line number Diff line number Diff line change
Expand Up @@ -245,14 +245,25 @@ def cleanIdentifierUnion(identifier_union):

cleanIdentifierUnion(elem.generatedBy)

# Clean PROV fields
cleanIdentifierList(elem.wasGeneratedBy)
cleanIdentifierList(elem.wasDerivedFrom)
cleanIdentifierList(elem.wasAttributedTo)

if isinstance(elem, Software):
cleanIdentifierList(elem.usedByComputation)

# Clean PROV fields
cleanIdentifierList(elem.wasAttributedTo)

if isinstance(elem, MLModel):
cleanIdentifierList(elem.usedByComputation)

cleanIdentifierList(elem.trainedOn)

# Clean PROV fields
cleanIdentifierList(elem.wasAttributedTo)

if isinstance(elem, Computation):

cleanIdentifierList(elem.usedDataset)
Expand All @@ -263,12 +274,20 @@ def cleanIdentifierUnion(identifier_union):

cleanIdentifierList(elem.usedMLModel)

# Clean PROV fields
cleanIdentifierList(elem.used)
cleanIdentifierList(elem.wasAssociatedWith)

if isinstance(elem, Annotation):

cleanIdentifierList(elem.usedDataset)

cleanIdentifierList(elem.generated)

# Clean PROV fields
cleanIdentifierList(elem.used)
cleanIdentifierList(elem.wasAssociatedWith)

if isinstance(elem, Experiment):

cleanIdentifierList(elem.usedInstrument)
Expand All @@ -281,6 +300,10 @@ def cleanIdentifierUnion(identifier_union):

cleanIdentifierList(elem.generated)

# Clean PROV fields
cleanIdentifierList(elem.used)
cleanIdentifierList(elem.wasAssociatedWith)

def getCrateMetadata(self)-> ROCrateMetadataElem:
""" Filter the Metadata Graph for the Metadata Element Describing the Toplevel ROCrate

Expand Down
19 changes: 17 additions & 2 deletions fairscape_models/software.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from pydantic import Field, ConfigDict
from pydantic import Field, ConfigDict, model_validator
from typing import Optional, List

from fairscape_models.fairscape_base import IdentifierValue, SOFTWARE_TYPE
Expand All @@ -7,5 +7,20 @@
class Software(DigitalObject):
metadataType: Optional[str] = Field(default="https://w3id.org/EVI#Software", alias="@type")
additionalType: Optional[str] = Field(default=SOFTWARE_TYPE)
dateModified: Optional[str]
dateModified: Optional[str] = None
fileFormat: str = Field(title="fileFormat", alias="format")

@model_validator(mode='after')
def populate_prov_fields(self):
"""Auto-populate PROV-O fields from EVI fields"""

# Map author → prov:wasAttributedTo
if self.author:
if isinstance(self.author, list):
self.wasAttributedTo = self.author
else:
self.wasAttributedTo = [self.author]
else:
self.wasAttributedTo = []

return self
22 changes: 22 additions & 0 deletions tests/test_annotation.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,37 @@
import pytest
from pydantic import ValidationError
from fairscape_models.annotation import Annotation
from fairscape_models.fairscape_base import IdentifierValue

def test_annotation_instantiation(annotation_minimal_data):
"""Test successful instantiation of an Annotation model."""
annotation = Annotation.model_validate(annotation_minimal_data)
assert annotation.guid == annotation_minimal_data["@id"]
assert annotation.description == annotation_minimal_data["description"]

# Test PROV field auto-population
assert annotation.used == [] # No usedDataset provided
assert len(annotation.wasAssociatedWith) == 1
assert annotation.wasAssociatedWith[0] == annotation_minimal_data["createdBy"]

def test_annotation_short_description(annotation_minimal_data):
"""Test that a short description raises a ValidationError."""
annotation_minimal_data["description"] = "too short"
with pytest.raises(ValidationError):
Annotation.model_validate(annotation_minimal_data)

def test_annotation_with_datasets(annotation_minimal_data):
"""Test PROV field population with usedDataset."""
annotation_minimal_data["usedDataset"] = [
{"@id": "ark:59852/dataset-1"},
{"@id": "ark:59852/dataset-2"}
]

annotation = Annotation.model_validate(annotation_minimal_data)

# Test PROV:used is populated from usedDataset
assert len(annotation.used) == 2
assert all(isinstance(item, IdentifierValue) for item in annotation.used)
used_ids = [item.guid for item in annotation.used]
assert "ark:59852/dataset-1" in used_ids
assert "ark:59852/dataset-2" in used_ids
Loading