From 95d61969254b782a6da59434082531dcb8bd42a6 Mon Sep 17 00:00:00 2001 From: jniestroy Date: Wed, 22 Jan 2025 10:32:22 -0500 Subject: [PATCH 1/4] new spec --- src/fairscape_cli/models/base.py | 5 - src/fairscape_cli/models/rocrate.py | 394 ++++++++++++++------------- src/fairscape_cli/rocrate/rocrate.py | 4 +- 3 files changed, 206 insertions(+), 197 deletions(-) diff --git a/src/fairscape_cli/models/base.py b/src/fairscape_cli/models/base.py index fed329e..187f3c8 100644 --- a/src/fairscape_cli/models/base.py +++ b/src/fairscape_cli/models/base.py @@ -40,11 +40,6 @@ class FairscapeBaseModel(BaseModel): title="guid", alias="@id" ) - context: Dict[str,str] = Field( - default=default_context, - title="context", - alias="@context" - ) metadataType: str = Field( title="metadataType", alias="@type" diff --git a/src/fairscape_cli/models/rocrate.py b/src/fairscape_cli/models/rocrate.py index 275c8b1..179db28 100644 --- a/src/fairscape_cli/models/rocrate.py +++ b/src/fairscape_cli/models/rocrate.py @@ -1,10 +1,8 @@ import pathlib import shutil import json -from typing import Optional, Union, List, Literal, Dict - -from prettytable import PrettyTable -from pydantic import BaseModel, computed_field, Field +from typing import Optional, Union, List, Literal, Dict, Any +from pydantic import BaseModel, Field, ConfigDict, model_validator from fairscape_cli.config import NAAN, DEFAULT_CONTEXT from fairscape_cli.models.software import Software @@ -12,15 +10,70 @@ from fairscape_cli.models.computation import Computation from fairscape_cli.models.guid_utils import GenerateDatetimeSquid +class ROCrateMetadataDescriptor(BaseModel): + model_config = ConfigDict(populate_by_name=True) + + id: str = Field(default="ro-crate-metadata.json", alias="@id") + type: Literal["CreativeWork"] = Field(alias="@type") + conformsTo: Dict = Field(default={ + "@id": "https://w3id.org/ro/crate/1.2-DRAFT" + }) + about: Dict[str, str] + class ROCrateMetadata(BaseModel): - guid: Optional[str] = Field(alias="@id", default=None) - metadataType: Optional[str] = Field(alias="@type", default= "https://w3id.org/EVI#ROCrate") - context: Dict[str, str] = Field(default=DEFAULT_CONTEXT) - name: str = Field(max_length=200) - description: str = Field(min_length=10) - keywords: List[str] = Field(default=[]) - isPartOf: Optional[List[Dict]] - metadataGraph: Optional[List[Union[Dataset,Software, Computation]]] = Field(alias="@graph", default=[]) + model_config = ConfigDict( + populate_by_name=True, + extra='forbid' + ) + + context: Dict[str, str] = Field( + default={ + "EVI": "https://w3id.org/EVI#", + "@vocab": "https://schema.org/" + }, + alias="@context" + ) + graph: List[Dict] = Field(alias="@graph") + + @model_validator(mode='after') + def validate_metadata(self) -> 'ROCrateMetadata': + self.validate_metadata_descriptor() + self.validate_graph_elements() + return self + + def validate_metadata_descriptor(self): + # Check for metadata descriptor + descriptors = [item for item in self.graph + if item.get("@id") == "ro-crate-metadata.json"] + if not descriptors: + raise ValueError("Missing required metadata descriptor in @graph") + + descriptor = descriptors[0] + # Validate descriptor + ROCrateMetadataDescriptor(**descriptor) + + # Validate about reference exists in graph + about_id = descriptor.get("about", {}).get("@id") + if not about_id: + raise ValueError("Metadata descriptor missing root node in about.@id") + + # Check root exists + root_items = [item for item in self.graph if item.get("@id") == about_id] + if not root_items: + raise ValueError(f"Root id {about_id} referenced in about.@id not found in @graph") + + def validate_graph_elements(self): + """Validate each element in @graph is flat and has an id""" + for item in self.graph: + if "@id" not in item or "@type" not in item: + raise ValueError("All @graph elements must have @id and @type properties") + + # Validate nested objects only contain @id + for key, value in item.items(): + if isinstance(value, dict): + allowed_keys = {"@id"} + if set(value.keys()) - allowed_keys: + raise ValueError(f"Nested object under '{key}' can only contain '@id' property") def GenerateROCrate( path: pathlib.Path, @@ -32,75 +85,80 @@ def GenerateROCrate( projectName: str = None, ): - # overwrite custom GUIDs + # Generate GUID if not provided sq = GenerateDatetimeSquid() guid = f"ark:{NAAN}/rocrate-{name.lower().replace(' ', '-')}-{sq}" - roCrateInstanceMetadata = { + # Create root dataset entity + root_dataset = { "@id": guid, - "@type": "https://w3id.org/EVI#ROCrate", + "@type": ["Dataset", "https://w3id.org/EVI#ROCrate"], "name": name, - "isPartOf": [], "keywords": keywords, "description": description, - "metadataGraph": [] - } + "hasPart": [] + } + if 'isPartOf' not in root_dataset: + root_dataset['isPartOf'] = [] + if organizationName: - organizationGuid = f"ark:{NAAN}/organization-{organizationName.lower().replace(' ', '-')}-{GenerateDatetimeSquid()}" - roCrateInstanceMetadata['isPartOf'].append( - { - "@id": organizationGuid, - "@type": "Organization", - "name": organizationName - } - ) + organization_guid = f"ark:{NAAN}/organization-{organizationName.lower().replace(' ', '-')}-{GenerateDatetimeSquid()}" + root_dataset['isPartOf'] = [{ + "@id": organization_guid + }] if projectName: - projectGuid = f"ark:{NAAN}/project-{projectName.lower().replace(' ', '-')}-{GenerateDatetimeSquid()}" - roCrateInstanceMetadata['isPartOf'].append( - { - "@id": projectGuid, - "@type": "Project", - "name": projectName - } - ) - - - rocrateInstance = ROCrateMetadata.model_validate(roCrateInstanceMetadata) + project_guid = f"ark:{NAAN}/project-{projectName.lower().replace(' ', '-')}-{GenerateDatetimeSquid()}" + + root_dataset['isPartOf'].append({ + "@id": project_guid + }) + + metadata_descriptor = { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "conformsTo": {"@id": "https://w3id.org/ro/crate/1.2-DRAFT"}, + "about": {"@id": guid} + } + + # Create full RO-Crate structure + rocrate_metadata = { + "@context": DEFAULT_CONTEXT, + "@graph": [ + metadata_descriptor, + root_dataset + ] + } + + # Validate the structure + ROCrateMetadata(**rocrate_metadata) + # Write to file if 'ro-crate-metadata.json' in str(path): roCrateMetadataPath = path - - # if the parent folder doesn't exist, create the parent folder if not path.parent.exists(): path.parent.mkdir(parents=True, exist_ok=True) else: roCrateMetadataPath = path / 'ro-crate-metadata.json' - - # if the parent folder doesn't exist, create the parent folder if not path.exists(): path.mkdir(parents=True, exist_ok=True) - with roCrateMetadataPath.open(mode="w") as metadataFile: - serializedMetadata = rocrateInstance.model_dump(by_alias=True) - json.dump(serializedMetadata, metadataFile, indent=2) - - return rocrateInstance - + json.dump(rocrate_metadata, metadataFile, indent=2) + return rocrate_metadata["@graph"][1] class ROCrate(BaseModel): + model_config = ConfigDict(populate_by_name=True) + guid: Optional[str] = Field(alias="@id", default=None) - metadataType: str = Field(alias="@type", default="https://w3id.org/EVI#ROCrate") name: str = Field(max_length=200) - description: str = Field(min_length=10) - keywords: List[str] = Field(...) - projectName: Optional[str] = Field(default=None) - organizationName: Optional[str] = Field(default=None) + description: str = Field(min_length=5) + keywords: List[str] + projectName: Optional[str] = None + organizationName: Optional[str] = None path: pathlib.Path - metadataGraph: Optional[List[Union[Dataset,Software, Computation]]] = Field(alias="@graph", default=[]) def generate_guid(self) -> str: if self.guid is None: @@ -108,158 +166,111 @@ def generate_guid(self) -> str: self.guid = f"ark:{NAAN}/rocrate-{self.name.replace(' ', '-').lower()}-{sq}" return self.guid - def createCrateFolder(self): self.path.mkdir(parents=True, exist_ok=True) - def initCrate(self): - """Create an rocrate at the current working directory, initilize the ro-crate-metadata.json - - """ - - # create basic rocrate metadata - if self.path.is_dir(): - ro_crate_metadata_path = self.path / 'ro-crate-metadata.json' - - # create guid if none exists + """Create an ROCrate and initialize ro-crate-metadata.json""" + ro_crate_metadata_path = self.path / 'ro-crate-metadata.json' self.generate_guid() - rocrate_metadata = { + # Create root dataset + root_dataset = { "@id": self.guid, - "@context": DEFAULT_CONTEXT, - "@type": "EVI:Dataset", + "@type": ["Dataset", "https://w3id.org/EVI#ROCrate"], "name": self.name, "description": self.description, "keywords": self.keywords, - "isPartOf": [], - "@graph": [] + "hasPart": [] } + # Add organization and project if specified if self.organizationName: organization_guid = f"ark:{NAAN}/organization-{self.organizationName.lower().replace(' ', '-')}-{GenerateDatetimeSquid()}" - rocrate_metadata['isPartOf'].append( - { - "@id": organization_guid, - "@type": "Organization", - "name": self.organizationName - } - ) + root_dataset['isPartOf'] = [{ + "@id": organization_guid, + }] if self.projectName: project_guid = f"ark:{NAAN}/project-{self.projectName.lower().replace(' ', '-')}-{GenerateDatetimeSquid()}" - rocrate_metadata['isPartOf'].append( - { - "@id": project_guid, - "@type": "Project", - "name": self.projectName - } - ) - - # write out to file - with ro_crate_metadata_path.open(mode="w") as metadata_file: - json.dump(rocrate_metadata, metadata_file, indent=2) - - #TODO add to cache - - #TODO list all contents that need to be registered as warnings - - - def copyObject(self, source_filepath: str, destination_filepath: str): - - if source_filepath == "": - raise Exception(message="source path is None") - - if destination_filepath == "": - raise Exception(message="destination path is None") - - # check if the source file exists - source_path = pathlib.Path(source_filepath) - destination_path = pathlib.Path(destination_filepath) - - if source_path.exists() != True: - raise Exception( - message =f"sourcePath: {source_path} Doesn't Exist" - ) - - # TODO check that destination path is in the rocrate + if 'isPartOf' not in root_dataset: + root_dataset['isPartOf'] = [] + root_dataset['isPartOf'].append({ + "@id": project_guid, + "@type": "Project", + "name": self.projectName + }) + + # Create metadata descriptor + metadata_descriptor = { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "conformsTo": {"@id": "https://w3id.org/ro/crate/1.2-DRAFT"}, + "about": {"@id": self.guid} + } - # copy the file into the destinationPath - shutil.copy(source_path, destination_path) + # Create full RO-Crate structure + rocrate_metadata = { + "@context": DEFAULT_CONTEXT, + "@graph": [ + metadata_descriptor, + root_dataset + ] + } + # Validate the structure + ROCrateMetadata(**rocrate_metadata) + # Write to file + with ro_crate_metadata_path.open(mode="w") as metadata_file: + json.dump(rocrate_metadata, metadata_file, indent=2) def registerObject(self, model: Union[Dataset, Software, Computation]): - ''' Add a specified peice of metadata to the graph of an ROCrate - Marshals a given model into JSON-LD, opens the ro-crate-metadata.json, - appends the new metadata to the @graph, and overwrites the ro-crate-metadata.json - ''' - - metadata_path = pathlib.Path(self.path) + """Add metadata to the graph of an ROCrate""" + metadata_path = self.path / 'ro-crate-metadata.json' with metadata_path.open("r+") as rocrate_metadata_file: rocrate_metadata = json.load(rocrate_metadata_file) - # TODO assure no duplicative content + # Add to the @graph + model_data = model.model_dump(by_alias=True, exclude_none=True) + rocrate_metadata['@graph'].append(model_data) + + # Add reference to root dataset's hasPart + root_dataset = rocrate_metadata['@graph'][1] # Second element after descriptor + if 'hasPart' not in root_dataset: + root_dataset['hasPart'] = [] + root_dataset['hasPart'].append({"@id": model_data["@id"]}) + + # Validate updated structure + ROCrateMetadata(**rocrate_metadata) - # add to the @graph - rocrate_metadata['@graph'].append(model.model_dump(by_alias=True)) + # Write back to file rocrate_metadata_file.seek(0) + rocrate_metadata_file.truncate() json.dump(rocrate_metadata, rocrate_metadata_file, indent=2) + def registerDataset(self, dataset: Dataset): + self.registerObject(dataset) - def registerDataset(self, Dataset): - # TODO check for entailment - self.registerObject(model=Dataset) - - - def registerSoftware(self, Software): - # TODO check for entailment - self.registerObject(model=Software) - - - def registerComputation(self, Computation): - # TODO check for entailment - self.registerObject(model=Computation) - - - - def listContents(self): - rocrate_table = PrettyTable() - - rocrate_table.field_names= ['ro_crate', '@id', 'type', 'name'] - for metadata_element in self.graph: - rocrate_table.add_row( - [ - "*", - metadata_element.guid, - metadata_element.type, - metadata_element.name - ] - ) - - return rocrate_table - + def registerSoftware(self, software: Software): + self.registerObject(software) + def registerComputation(self, computation: Computation): + self.registerObject(computation) -def ReadROCrateMetadata( - cratePath: pathlib.Path -)-> ROCrateMetadata: - """ Given a path read the rocrate metadata into a pydantic model - """ - - # if cratePath has metadata.json inside - if "ro-crate-metadata.json" in str(cratePath) : - metadataCratePath = cratePath +def ReadROCrateMetadata(cratePath: pathlib.Path) -> Dict[str, Any]: + """Read and validate ROCrate metadata""" + if "ro-crate-metadata.json" in str(cratePath): + metadata_path = cratePath else: - metadataCratePath = cratePath / "ro-crate-metadata.json" - - with metadataCratePath.open("r") as metadataFile: - crateMetadata = json.load(metadataFile) - readCrate = ROCrateMetadata.model_validate(crateMetadata) - - return readCrate + metadata_path = cratePath / "ro-crate-metadata.json" + with metadata_path.open("r") as metadata_file: + crate_metadata = json.load(metadata_file) + # Validate the structure + ROCrateMetadata(**crate_metadata) + return crate_metadata def AppendCrate( cratePath: pathlib.Path, @@ -268,21 +279,28 @@ def AppendCrate( if cratePath.is_dir(): cratePath = cratePath / 'ro-crate-metadata.json' - if len(elements) == 0: + if not elements: return None with cratePath.open("r+") as rocrate_metadata_file: rocrate_metadata = json.load(rocrate_metadata_file) + + # Add elements to @graph and references to root dataset + root_dataset = rocrate_metadata['@graph'][1] # Second element after descriptor + if 'hasPart' not in root_dataset: + root_dataset['hasPart'] = [] - # add to the @graph - for register_elem in elements: - rocrate_metadata['@graph'].append( - register_elem.model_dump( - by_alias=True, - exclude_none=True - )) + for element in elements: + element_data = element.model_dump(by_alias=True, exclude_none=True) + rocrate_metadata['@graph'].append(element_data) + root_dataset['hasPart'].append({"@id": element_data["@id"]}) + # Validate updated structure + ROCrateMetadata(**rocrate_metadata) + + # Write back to file rocrate_metadata_file.seek(0) + rocrate_metadata_file.truncate() json.dump(rocrate_metadata, rocrate_metadata_file, indent=2) @@ -311,12 +329,7 @@ def UpdateCrate( cratePath: pathlib.Path, element: Union[Dataset, Software, Computation] ): - """Update an existing element in the RO-Crate metadata by matching @id - - Args: - cratePath: Path to the RO-Crate directory or metadata file - element: Updated element to replace existing one with matching @id - """ + """Update an existing element in the RO-Crate metadata""" if cratePath.is_dir(): cratePath = cratePath / 'ro-crate-metadata.json' @@ -324,15 +337,16 @@ def UpdateCrate( rocrate_metadata = json.load(rocrate_metadata_file) # Find and replace the element with matching @id + element_data = element.model_dump(by_alias=True, exclude_none=True) for i, existing in enumerate(rocrate_metadata['@graph']): - if existing.get('@id') == element.guid: - rocrate_metadata['@graph'][i] = element.model_dump( - by_alias=True, - exclude_none=True - ) + if existing.get('@id') == element_data['@id']: + rocrate_metadata['@graph'][i] = element_data break - # Write back the updated metadata + # Validate updated structure + ROCrateMetadata(**rocrate_metadata) + + # Write back to file rocrate_metadata_file.seek(0) rocrate_metadata_file.truncate() json.dump(rocrate_metadata, rocrate_metadata_file, indent=2) \ No newline at end of file diff --git a/src/fairscape_cli/rocrate/rocrate.py b/src/fairscape_cli/rocrate/rocrate.py index 7fb2c22..745d5c0 100644 --- a/src/fairscape_cli/rocrate/rocrate.py +++ b/src/fairscape_cli/rocrate/rocrate.py @@ -79,7 +79,7 @@ def init( path = pathlib.Path.cwd(), ) - click.echo(passed_crate.guid) + click.echo(passed_crate.get("@id")) @rocrate.command('create') @@ -112,7 +112,7 @@ def create( path = rocrate_path ) - click.echo(passed_crate.guid) + click.echo(passed_crate.get("@id")) From 76037fade193889cadf986d4d5a2f7218e6f2857 Mon Sep 17 00:00:00 2001 From: jniestroy Date: Wed, 22 Jan 2025 11:04:47 -0500 Subject: [PATCH 2/4] flatten project --- src/fairscape_cli/models/rocrate.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/fairscape_cli/models/rocrate.py b/src/fairscape_cli/models/rocrate.py index 179db28..f748d40 100644 --- a/src/fairscape_cli/models/rocrate.py +++ b/src/fairscape_cli/models/rocrate.py @@ -196,9 +196,7 @@ def initCrate(self): if 'isPartOf' not in root_dataset: root_dataset['isPartOf'] = [] root_dataset['isPartOf'].append({ - "@id": project_guid, - "@type": "Project", - "name": self.projectName + "@id": project_guid }) # Create metadata descriptor From 8252f43f581116bbb7e9761c85ab478cc8aa8d27 Mon Sep 17 00:00:00 2001 From: jniestroy Date: Wed, 22 Jan 2025 11:08:21 -0500 Subject: [PATCH 3/4] subcrate --- src/fairscape_cli/models/__init__.py | 2 + src/fairscape_cli/rocrate/rocrate.py | 73 +++++++++++++++++++++++++++- 2 files changed, 74 insertions(+), 1 deletion(-) diff --git a/src/fairscape_cli/models/__init__.py b/src/fairscape_cli/models/__init__.py index 41a2ef8..e65d747 100644 --- a/src/fairscape_cli/models/__init__.py +++ b/src/fairscape_cli/models/__init__.py @@ -8,6 +8,7 @@ from fairscape_cli.models.computation import Computation, GenerateComputation from fairscape_cli.models.rocrate import ( ROCrate, + ROCrateMetadata, GenerateROCrate, ReadROCrateMetadata, AppendCrate, @@ -26,6 +27,7 @@ 'Computation', 'GenerateComputation', 'ROCrate', + 'ROCrateMetadata', 'GenerateROCrate', 'ReadROCrateMetadata', 'AppendCrate', diff --git a/src/fairscape_cli/rocrate/rocrate.py b/src/fairscape_cli/rocrate/rocrate.py index 745d5c0..a3175af 100644 --- a/src/fairscape_cli/rocrate/rocrate.py +++ b/src/fairscape_cli/rocrate/rocrate.py @@ -21,6 +21,7 @@ Software, Computation, ROCrate, + ROCrateMetadata, BagIt, # Generator functions @@ -373,7 +374,77 @@ def computation( click.echo(e) ctx.exit(code=1) - +@register.command('subrocrate') +@click.argument('rocrate-path', type=click.Path(exists=True, path_type=pathlib.Path)) +@click.argument('subrocrate-path', type=click.Path(path_type=pathlib.Path)) +@click.option('--guid', required=False, type=str, default="", show_default=False) +@click.option('--name', required=True, type=str) +@click.option('--organization-name', required=True, type=str) +@click.option('--project-name', required=True, type=str) +@click.option('--description', required=True, type=str) +@click.option('--keywords', required=True, multiple=True, type=str) +@click.pass_context +def subrocrate( + ctx, + rocrate_path: pathlib.Path, + subrocrate_path: pathlib.Path, + guid: str, + name: str, + organization_name: str, + project_name: str, + description: str, + keywords: List[str] +): + """Register a new RO-Crate within an existing RO-Crate directory. + + ROCRATE_PATH: Path to the parent RO-Crate + SUBCRATE_PATH: Relative path within the parent RO-Crate where the subcrate should be created + """ + try: + # Read parent crate metadata + parent_crate = ReadROCrateMetadata(rocrate_path) + + # Construct full path for subcrate + full_subcrate_path = rocrate_path / subrocrate_path + + # Create subcrate + subcrate = GenerateROCrate( + guid=guid, + name=name, + organizationName=organization_name, + projectName=project_name, + description=description, + keywords=keywords, + path=full_subcrate_path + ) + + # Update parent crate to include reference to subcrate + with (rocrate_path / 'ro-crate-metadata.json').open('r+') as f: + parent_metadata = json.load(f) + + root_dataset = parent_metadata['@graph'][1] + + if 'hasPart' not in root_dataset: + root_dataset['hasPart'] = [] + + subcrate_ref = { + "@id": subcrate['@id'] + } + + if not any(part.get('@id') == subcrate['@id'] for part in root_dataset['hasPart']): + root_dataset['hasPart'].append(subcrate_ref) + + # Validate and write updated parent metadata + ROCrateMetadata(**parent_metadata) + f.seek(0) + f.truncate() + json.dump(parent_metadata, f, indent=2) + + click.echo(subcrate['@id']) + + except Exception as exc: + click.echo(f"ERROR: {str(exc)}") + ctx.exit(code=1) # RO Crate add subcommands @rocrate.group('add') From 23beba14a84b049506324c396b52888a841bc65a Mon Sep 17 00:00:00 2001 From: jniestroy Date: Thu, 23 Jan 2025 14:39:42 -0500 Subject: [PATCH 4/4] valid spec --- src/fairscape_cli/models/rocrate.py | 150 ++++++++++++------------ src/fairscape_cli/rocrate/rocrate.py | 103 +++++++++-------- tests/test_rocrate_api.py | 4 +- tests/test_rocrate_commands.py | 163 +++++++++++++++++++++++++++ 4 files changed, 297 insertions(+), 123 deletions(-) create mode 100644 tests/test_rocrate_commands.py diff --git a/src/fairscape_cli/models/rocrate.py b/src/fairscape_cli/models/rocrate.py index f748d40..a567055 100644 --- a/src/fairscape_cli/models/rocrate.py +++ b/src/fairscape_cli/models/rocrate.py @@ -1,6 +1,7 @@ import pathlib import shutil import json +from datetime import datetime from typing import Optional, Union, List, Literal, Dict, Any from pydantic import BaseModel, Field, ConfigDict, model_validator @@ -76,78 +77,81 @@ def validate_graph_elements(self): raise ValueError(f"Nested object under '{key}' can only contain '@id' property") def GenerateROCrate( - path: pathlib.Path, - guid: str, - name: str, - description: str, - keywords: List[str], - organizationName: str = None, - projectName: str = None, - ): - - # Generate GUID if not provided - sq = GenerateDatetimeSquid() - guid = f"ark:{NAAN}/rocrate-{name.lower().replace(' ', '-')}-{sq}" - - # Create root dataset entity - root_dataset = { - "@id": guid, - "@type": ["Dataset", "https://w3id.org/EVI#ROCrate"], - "name": name, - "keywords": keywords, - "description": description, - "hasPart": [] - } - - if 'isPartOf' not in root_dataset: - root_dataset['isPartOf'] = [] - - if organizationName: - organization_guid = f"ark:{NAAN}/organization-{organizationName.lower().replace(' ', '-')}-{GenerateDatetimeSquid()}" - root_dataset['isPartOf'] = [{ - "@id": organization_guid - }] - - if projectName: - project_guid = f"ark:{NAAN}/project-{projectName.lower().replace(' ', '-')}-{GenerateDatetimeSquid()}" - - root_dataset['isPartOf'].append({ - "@id": project_guid - }) - - metadata_descriptor = { - "@id": "ro-crate-metadata.json", - "@type": "CreativeWork", - "conformsTo": {"@id": "https://w3id.org/ro/crate/1.2-DRAFT"}, - "about": {"@id": guid} - } - - # Create full RO-Crate structure - rocrate_metadata = { - "@context": DEFAULT_CONTEXT, - "@graph": [ - metadata_descriptor, - root_dataset - ] - } - - # Validate the structure - ROCrateMetadata(**rocrate_metadata) - - # Write to file - if 'ro-crate-metadata.json' in str(path): - roCrateMetadataPath = path - if not path.parent.exists(): - path.parent.mkdir(parents=True, exist_ok=True) - else: - roCrateMetadataPath = path / 'ro-crate-metadata.json' - if not path.exists(): - path.mkdir(parents=True, exist_ok=True) - - with roCrateMetadataPath.open(mode="w") as metadataFile: - json.dump(rocrate_metadata, metadataFile, indent=2) - - return rocrate_metadata["@graph"][1] + path: pathlib.Path, + guid: str, + name: str, + description: str, + keywords: List[str], + organizationName: str = None, + projectName: str = None, + license: str = "https://creativecommons.org/licenses/by/4.0/", + datePublished: str = None, +): + # Generate GUID if not provided + sq = GenerateDatetimeSquid() + guid = f"ark:{NAAN}/rocrate-{name.lower().replace(' ', '-')}-{sq}/" + + if datePublished is None: + datePublished = datetime.now().isoformat() + + # Create root dataset entity + root_dataset = { + "@id": guid, + "@type": ["Dataset", "https://w3id.org/EVI#ROCrate"], + "name": name, + "keywords": keywords, + "description": description, + "license": license, + "datePublished": datePublished, + "hasPart": [], + "isPartOf": [] + } + + if organizationName: + organization_guid = f"ark:{NAAN}/organization-{organizationName.lower().replace(' ', '-')}-{GenerateDatetimeSquid()}" + root_dataset['isPartOf'] = [{ + "@id": organization_guid + }] + + if projectName: + project_guid = f"ark:{NAAN}/project-{projectName.lower().replace(' ', '-')}-{GenerateDatetimeSquid()}" + root_dataset['isPartOf'].append({ + "@id": project_guid + }) + + metadata_descriptor = { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "conformsTo": {"@id": "https://w3id.org/ro/crate/1.2-DRAFT"}, + "about": {"@id": guid} + } + + # Create full RO-Crate structure + rocrate_metadata = { + "@context": DEFAULT_CONTEXT, + "@graph": [ + metadata_descriptor, + root_dataset + ] + } + + # Validate the structure + ROCrateMetadata(**rocrate_metadata) + + # Write to file + if 'ro-crate-metadata.json' in str(path): + roCrateMetadataPath = path + if not path.parent.exists(): + path.parent.mkdir(parents=True, exist_ok=True) + else: + roCrateMetadataPath = path / 'ro-crate-metadata.json' + if not path.exists(): + path.mkdir(parents=True, exist_ok=True) + + with roCrateMetadataPath.open(mode="w") as metadataFile: + json.dump(rocrate_metadata, metadataFile, indent=2) + + return rocrate_metadata["@graph"][1] class ROCrate(BaseModel): model_config = ConfigDict(populate_by_name=True) @@ -163,7 +167,7 @@ class ROCrate(BaseModel): def generate_guid(self) -> str: if self.guid is None: sq = GenerateDatetimeSquid() - self.guid = f"ark:{NAAN}/rocrate-{self.name.replace(' ', '-').lower()}-{sq}" + self.guid = f"ark:{NAAN}/rocrate-{self.name.replace(' ', '-').lower()}-{sq}/" return self.guid def createCrateFolder(self): diff --git a/src/fairscape_cli/rocrate/rocrate.py b/src/fairscape_cli/rocrate/rocrate.py index a3175af..eb3ee6e 100644 --- a/src/fairscape_cli/rocrate/rocrate.py +++ b/src/fairscape_cli/rocrate/rocrate.py @@ -54,66 +54,73 @@ def rocrate(): @rocrate.command('init') @click.option('--guid', required=False, type=str, default="", show_default=False) -@click.option('--name', required=True, type=str) -@click.option('--organization-name', required=True, type=str) -@click.option('--project-name', required=True, type=str) +@click.option('--name', required=True, type=str) +@click.option('--organization-name', required=True, type=str) +@click.option('--project-name', required=True, type=str) @click.option('--description', required=True, type=str) @click.option('--keywords', required=True, multiple=True, type=str) +@click.option('--license', required=False, type=str, default="https://creativecommons.org/licenses/by/4.0/") +@click.option('--date-published', required=False, type=str) def init( - guid, - name, - organization_name, - project_name, - description, - keywords + guid, + name, + organization_name, + project_name, + description, + keywords, + license, + date_published ): - """ Initalize a rocrate in the current working directory by instantiating a ro-crate-metadata.json file. - """ - - passed_crate = GenerateROCrate( - guid=guid, - name=name, - organizationName = organization_name, - projectName = project_name, - description = description, - keywords = keywords, - path = pathlib.Path.cwd(), - ) - - click.echo(passed_crate.get("@id")) - + """ Initialize a rocrate in the current working directory by instantiating a ro-crate-metadata.json file. + """ + passed_crate = GenerateROCrate( + guid=guid, + name=name, + organizationName=organization_name, + projectName=project_name, + description=description, + keywords=keywords, + license=license, + datePublished=date_published, + path=pathlib.Path.cwd(), + ) + click.echo(passed_crate.get("@id")) @rocrate.command('create') @click.option('--guid', required=False, type=str, default="", show_default=False) @click.option('--name', required=True, type=str) -@click.option('--organization-name', required=True, type=str) -@click.option('--project-name', required=True, type=str) +@click.option('--organization-name', required=True, type=str) +@click.option('--project-name', required=True, type=str) @click.option('--description', required=True, type=str) @click.option('--keywords', required=True, multiple=True, type=str) +@click.option('--license', required=False, type=str, default="https://creativecommons.org/licenses/by/4.0/") +@click.option('--date-published', required=False, type=str) @click.argument('rocrate-path', type=click.Path(exists=False, path_type=pathlib.Path)) def create( - rocrate_path, - guid, - name, - organization_name, - project_name, - description, - keywords -): - '''Create an ROCrate in a new path specified by the rocrate-path argument - ''' - - passed_crate = GenerateROCrate( - guid=guid, - name=name, - organizationName = organization_name, - projectName = project_name, - description = description, - keywords = keywords, - path = rocrate_path - ) - - click.echo(passed_crate.get("@id")) + rocrate_path, + guid, + name, + organization_name, + project_name, + description, + keywords, + license, + date_published +): + '''Create an ROCrate in a new path specified by the rocrate-path argument + ''' + passed_crate = GenerateROCrate( + guid=guid, + name=name, + organizationName=organization_name, + projectName=project_name, + description=description, + keywords=keywords, + license=license, + datePublished=date_published, + path=rocrate_path + ) + click.echo(passed_crate.get("@id")) diff --git a/tests/test_rocrate_api.py b/tests/test_rocrate_api.py index 2aec4d5..cfda2b9 100644 --- a/tests/test_rocrate_api.py +++ b/tests/test_rocrate_api.py @@ -132,7 +132,7 @@ def test_api(self): # Verify crate metadata rocrateMetadataRecord = ReadROCrateMetadata(self.rocratePath) - rocrateGUIDs = [elem.guid for elem in rocrateMetadataRecord.metadataGraph] + rocrateGUIDs = [elem["@id"] for elem in rocrateMetadataRecord["@graph"]] # Verify all dataset GUIDs are present for ds in datasetList: @@ -155,7 +155,7 @@ def test_api(self): # Final verification rocrateMetadataRecord = ReadROCrateMetadata(self.rocratePath) - rocrateGUIDs = [elem.guid for elem in rocrateMetadataRecord.metadataGraph] + rocrateGUIDs = [elem["@id"] for elem in rocrateMetadataRecord["@graph"]] self.assertIn(computation.guid, rocrateGUIDs, "Computation GUID not found in metadata") self.assertIn(software.guid, rocrateGUIDs, "Software GUID not found in metadata") diff --git a/tests/test_rocrate_commands.py b/tests/test_rocrate_commands.py new file mode 100644 index 0000000..e365042 --- /dev/null +++ b/tests/test_rocrate_commands.py @@ -0,0 +1,163 @@ +import unittest +import pathlib +import shutil +import subprocess +import json +import os + +class TestCLICommands(unittest.TestCase): + def setUp(self): + self.test_dir = pathlib.Path.cwd() / 'tests' / 'data' / 'test_cli' + self.test_dir.mkdir(parents=True, exist_ok=True) + + # Change to test directory + os.chdir(self.test_dir) + # Create files relative to test directory + pathlib.Path('input_data.csv').touch() + pathlib.Path('subcrate').mkdir(exist_ok=True) + pathlib.Path('subcrate/subcrate_data.csv').touch() + pathlib.Path('subcrate/software.py').touch() + + # def tearDown(self): + # if self.test_dir.exists(): + # shutil.rmtree(self.test_dir) + + def test_cli_workflow(self): + # Create top-level crate + top_crate_result = subprocess.run([ + 'fairscape-cli', 'rocrate', 'create', + '--name', 'Top Level Crate', + '--organization-name', 'Test Org', + '--project-name', 'Test Project', + '--description', 'Top level test crate', + '--keywords', 'test,top-level', + '.' + ], capture_output=True, text=True) + print(f"Top crate output: {top_crate_result.stdout}") + print(f"Top crate error: {top_crate_result.stderr}") + self.assertEqual(top_crate_result.returncode, 0) + top_crate_id = top_crate_result.stdout.strip() + + # Create subcrate + subcrate_result = subprocess.run([ + 'fairscape-cli', 'rocrate', 'register', 'subrocrate', + '.', 'subcrate', + '--name', 'Sub Crate', + '--organization-name', 'Test Org', + '--project-name', 'Test Project', + '--description', 'Test subcrate', + '--keywords', 'test,subcrate' + ], capture_output=True, text=True) + print(f"Subcrate output: {subcrate_result.stdout}") + print(f"Subcrate error: {subcrate_result.stderr}") + self.assertEqual(subcrate_result.returncode, 0) + subcrate_id = subcrate_result.stdout.strip() + + # Register top-level dataset + top_dataset_result = subprocess.run([ + 'fairscape-cli', 'rocrate', 'register', 'dataset', + '.', + '--name', 'Top Level Data', + '--author', 'Test Author', + '--version', '1.0', + '--date-published', '2025-01-22', + '--description', 'Top level test data', + '--keywords', 'test,data', + '--data-format', 'csv', + '--filepath', 'input_data.csv' + ], capture_output=True, text=True) + print(f"Top dataset output: {top_dataset_result.stdout}") + print(f"Top dataset error: {top_dataset_result.stderr}") + self.assertEqual(top_dataset_result.returncode, 0) + top_dataset_id = top_dataset_result.stdout.strip() + + # Register subcrate dataset + subcrate_dataset_result = subprocess.run([ + 'fairscape-cli', 'rocrate', 'register', 'dataset', + str(self.test_dir / 'subcrate'), + '--name', 'Subcrate Data', + '--author', 'Test Author', + '--version', '1.0', + '--date-published', '2025-01-22', + '--description', 'Subcrate test data', + '--keywords', 'test,data', + '--data-format', 'csv', + '--filepath', 'subcrate/subcrate_data.csv' + ], capture_output=True, text=True) + print(f"Subcrate dataset output: {subcrate_dataset_result.stdout}") + print(f"Subcrate dataset error: {subcrate_dataset_result.stderr}") + self.assertEqual(subcrate_dataset_result.returncode, 0) + subcrate_dataset_id = subcrate_dataset_result.stdout.strip() + + # Register software in subcrate + software_result = subprocess.run([ + 'fairscape-cli', 'rocrate', 'register', 'software', + str(self.test_dir / 'subcrate'), + '--name', 'Test Software', + '--author', 'Test Author', + '--version', '1.0', + '--description', 'Test analysis software', + '--keywords', 'test,software', + '--file-format', 'py', + '--filepath', 'subcrate/software.py', + '--date-modified', '2025-01-22' + ], capture_output=True, text=True) + print(f"Software output: {software_result.stdout}") + print(f"Software error: {software_result.stderr}") + self.assertEqual(software_result.returncode, 0) + software_id = software_result.stdout.strip() + + # Register computation in subcrate + computation_result = subprocess.run([ + 'fairscape-cli', 'rocrate', 'register', 'computation', + str(self.test_dir / 'subcrate'), + '--name', 'Test Computation', + '--run-by', 'Test Author', + '--date-created', '2025-01-22', + '--description', 'Test computation', + '--keywords', 'test,computation', + '--used-software', software_id, + '--used-dataset', subcrate_dataset_id, + '--command', 'python software.py subcrate_data.csv' + ], capture_output=True, text=True) + print(f"Computation output: {computation_result.stdout}") + print(f"Computation error: {computation_result.stderr}") + self.assertEqual(computation_result.returncode, 0) + computation_id = computation_result.stdout.strip() + + # Verify crate structure + with open(self.test_dir / 'ro-crate-metadata.json') as f: + top_metadata = json.load(f) + with open(self.test_dir / 'subcrate' / 'ro-crate-metadata.json') as f: + sub_metadata = json.load(f) + + # Verify top-level crate structure + top_root_id = next(item['about']['@id'] for item in top_metadata['@graph'] + if item['@id'] == 'ro-crate-metadata.json') + top_root = next(item for item in top_metadata['@graph'] + if item['@id'] == top_root_id) + + # Verify top-level relationships + self.assertIn(subcrate_id, [part['@id'] for part in top_root['hasPart']]) + self.assertIn(top_dataset_id, [part['@id'] for part in top_root['hasPart']]) + + # Verify subcrate structure + sub_root_id = next(item['about']['@id'] for item in sub_metadata['@graph'] + if item['@id'] == 'ro-crate-metadata.json') + sub_root = next(item for item in sub_metadata['@graph'] + if item['@id'] == sub_root_id) + + # Verify subcrate relationships + sub_parts = [part['@id'] for part in sub_root['hasPart']] + self.assertIn(subcrate_dataset_id, sub_parts) + self.assertIn(software_id, sub_parts) + self.assertIn(computation_id, sub_parts) + + # Verify computation relationships + computation = next(item for item in sub_metadata['@graph'] + if item['@id'] == computation_id) + self.assertIn(software_id, computation['usedSoftware']) + self.assertIn(subcrate_dataset_id, computation['usedDataset']) + +if __name__ == '__main__': + unittest.main() \ No newline at end of file