diff --git a/src/fairscape_cli/models/__init__.py b/src/fairscape_cli/models/__init__.py index 41a2ef8..e65d747 100644 --- a/src/fairscape_cli/models/__init__.py +++ b/src/fairscape_cli/models/__init__.py @@ -8,6 +8,7 @@ from fairscape_cli.models.computation import Computation, GenerateComputation from fairscape_cli.models.rocrate import ( ROCrate, + ROCrateMetadata, GenerateROCrate, ReadROCrateMetadata, AppendCrate, @@ -26,6 +27,7 @@ 'Computation', 'GenerateComputation', 'ROCrate', + 'ROCrateMetadata', 'GenerateROCrate', 'ReadROCrateMetadata', 'AppendCrate', diff --git a/src/fairscape_cli/models/base.py b/src/fairscape_cli/models/base.py index fed329e..187f3c8 100644 --- a/src/fairscape_cli/models/base.py +++ b/src/fairscape_cli/models/base.py @@ -40,11 +40,6 @@ class FairscapeBaseModel(BaseModel): title="guid", alias="@id" ) - context: Dict[str,str] = Field( - default=default_context, - title="context", - alias="@context" - ) metadataType: str = Field( title="metadataType", alias="@type" diff --git a/src/fairscape_cli/models/rocrate.py b/src/fairscape_cli/models/rocrate.py index 275c8b1..a567055 100644 --- a/src/fairscape_cli/models/rocrate.py +++ b/src/fairscape_cli/models/rocrate.py @@ -1,10 +1,9 @@ import pathlib import shutil import json -from typing import Optional, Union, List, Literal, Dict - -from prettytable import PrettyTable -from pydantic import BaseModel, computed_field, Field +from datetime import datetime +from typing import Optional, Union, List, Literal, Dict, Any +from pydantic import BaseModel, Field, ConfigDict, model_validator from fairscape_cli.config import NAAN, DEFAULT_CONTEXT from fairscape_cli.models.software import Software @@ -12,254 +11,268 @@ from fairscape_cli.models.computation import Computation from fairscape_cli.models.guid_utils import GenerateDatetimeSquid -class ROCrateMetadata(BaseModel): - guid: Optional[str] = Field(alias="@id", default=None) - metadataType: Optional[str] = Field(alias="@type", default= "https://w3id.org/EVI#ROCrate") - context: Dict[str, str] = Field(default=DEFAULT_CONTEXT) - name: str = Field(max_length=200) - description: str = Field(min_length=10) - keywords: List[str] = Field(default=[]) - isPartOf: Optional[List[Dict]] - metadataGraph: Optional[List[Union[Dataset,Software, Computation]]] = Field(alias="@graph", default=[]) - -def GenerateROCrate( - path: pathlib.Path, - guid: str, - name: str, - description: str, - keywords: List[str], - organizationName: str = None, - projectName: str = None, - ): - - # overwrite custom GUIDs - sq = GenerateDatetimeSquid() - guid = f"ark:{NAAN}/rocrate-{name.lower().replace(' ', '-')}-{sq}" - - roCrateInstanceMetadata = { - "@id": guid, - "@type": "https://w3id.org/EVI#ROCrate", - "name": name, - "isPartOf": [], - "keywords": keywords, - "description": description, - "metadataGraph": [] - } - - if organizationName: - organizationGuid = f"ark:{NAAN}/organization-{organizationName.lower().replace(' ', '-')}-{GenerateDatetimeSquid()}" - roCrateInstanceMetadata['isPartOf'].append( - { - "@id": organizationGuid, - "@type": "Organization", - "name": organizationName - } - ) - - if projectName: - projectGuid = f"ark:{NAAN}/project-{projectName.lower().replace(' ', '-')}-{GenerateDatetimeSquid()}" - roCrateInstanceMetadata['isPartOf'].append( - { - "@id": projectGuid, - "@type": "Project", - "name": projectName - } - ) - - - rocrateInstance = ROCrateMetadata.model_validate(roCrateInstanceMetadata) +class ROCrateMetadataDescriptor(BaseModel): + model_config = ConfigDict(populate_by_name=True) - if 'ro-crate-metadata.json' in str(path): - roCrateMetadataPath = path - - # if the parent folder doesn't exist, create the parent folder - if not path.parent.exists(): - path.parent.mkdir(parents=True, exist_ok=True) - else: - roCrateMetadataPath = path / 'ro-crate-metadata.json' - - # if the parent folder doesn't exist, create the parent folder - if not path.exists(): - path.mkdir(parents=True, exist_ok=True) - - - with roCrateMetadataPath.open(mode="w") as metadataFile: - serializedMetadata = rocrateInstance.model_dump(by_alias=True) - json.dump(serializedMetadata, metadataFile, indent=2) - - return rocrateInstance + id: str = Field(default="ro-crate-metadata.json", alias="@id") + type: Literal["CreativeWork"] = Field(alias="@type") + conformsTo: Dict = Field(default={ + "@id": "https://w3id.org/ro/crate/1.2-DRAFT" + }) + about: Dict[str, str] +class ROCrateMetadata(BaseModel): + model_config = ConfigDict( + populate_by_name=True, + extra='forbid' + ) + + context: Dict[str, str] = Field( + default={ + "EVI": "https://w3id.org/EVI#", + "@vocab": "https://schema.org/" + }, + alias="@context" + ) + graph: List[Dict] = Field(alias="@graph") + + @model_validator(mode='after') + def validate_metadata(self) -> 'ROCrateMetadata': + self.validate_metadata_descriptor() + self.validate_graph_elements() + return self + + def validate_metadata_descriptor(self): + # Check for metadata descriptor + descriptors = [item for item in self.graph + if item.get("@id") == "ro-crate-metadata.json"] + if not descriptors: + raise ValueError("Missing required metadata descriptor in @graph") + + descriptor = descriptors[0] + # Validate descriptor + ROCrateMetadataDescriptor(**descriptor) + + # Validate about reference exists in graph + about_id = descriptor.get("about", {}).get("@id") + if not about_id: + raise ValueError("Metadata descriptor missing root node in about.@id") + + # Check root exists + root_items = [item for item in self.graph if item.get("@id") == about_id] + if not root_items: + raise ValueError(f"Root id {about_id} referenced in about.@id not found in @graph") + + def validate_graph_elements(self): + """Validate each element in @graph is flat and has an id""" + for item in self.graph: + if "@id" not in item or "@type" not in item: + raise ValueError("All @graph elements must have @id and @type properties") + + # Validate nested objects only contain @id + for key, value in item.items(): + if isinstance(value, dict): + allowed_keys = {"@id"} + if set(value.keys()) - allowed_keys: + raise ValueError(f"Nested object under '{key}' can only contain '@id' property") +def GenerateROCrate( + path: pathlib.Path, + guid: str, + name: str, + description: str, + keywords: List[str], + organizationName: str = None, + projectName: str = None, + license: str = "https://creativecommons.org/licenses/by/4.0/", + datePublished: str = None, +): + # Generate GUID if not provided + sq = GenerateDatetimeSquid() + guid = f"ark:{NAAN}/rocrate-{name.lower().replace(' ', '-')}-{sq}/" + + if datePublished is None: + datePublished = datetime.now().isoformat() + + # Create root dataset entity + root_dataset = { + "@id": guid, + "@type": ["Dataset", "https://w3id.org/EVI#ROCrate"], + "name": name, + "keywords": keywords, + "description": description, + "license": license, + "datePublished": datePublished, + "hasPart": [], + "isPartOf": [] + } + + if organizationName: + organization_guid = f"ark:{NAAN}/organization-{organizationName.lower().replace(' ', '-')}-{GenerateDatetimeSquid()}" + root_dataset['isPartOf'] = [{ + "@id": organization_guid + }] + + if projectName: + project_guid = f"ark:{NAAN}/project-{projectName.lower().replace(' ', '-')}-{GenerateDatetimeSquid()}" + root_dataset['isPartOf'].append({ + "@id": project_guid + }) + + metadata_descriptor = { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "conformsTo": {"@id": "https://w3id.org/ro/crate/1.2-DRAFT"}, + "about": {"@id": guid} + } + + # Create full RO-Crate structure + rocrate_metadata = { + "@context": DEFAULT_CONTEXT, + "@graph": [ + metadata_descriptor, + root_dataset + ] + } + + # Validate the structure + ROCrateMetadata(**rocrate_metadata) + + # Write to file + if 'ro-crate-metadata.json' in str(path): + roCrateMetadataPath = path + if not path.parent.exists(): + path.parent.mkdir(parents=True, exist_ok=True) + else: + roCrateMetadataPath = path / 'ro-crate-metadata.json' + if not path.exists(): + path.mkdir(parents=True, exist_ok=True) + + with roCrateMetadataPath.open(mode="w") as metadataFile: + json.dump(rocrate_metadata, metadataFile, indent=2) + + return rocrate_metadata["@graph"][1] class ROCrate(BaseModel): + model_config = ConfigDict(populate_by_name=True) + guid: Optional[str] = Field(alias="@id", default=None) - metadataType: str = Field(alias="@type", default="https://w3id.org/EVI#ROCrate") name: str = Field(max_length=200) - description: str = Field(min_length=10) - keywords: List[str] = Field(...) - projectName: Optional[str] = Field(default=None) - organizationName: Optional[str] = Field(default=None) + description: str = Field(min_length=5) + keywords: List[str] + projectName: Optional[str] = None + organizationName: Optional[str] = None path: pathlib.Path - metadataGraph: Optional[List[Union[Dataset,Software, Computation]]] = Field(alias="@graph", default=[]) def generate_guid(self) -> str: if self.guid is None: sq = GenerateDatetimeSquid() - self.guid = f"ark:{NAAN}/rocrate-{self.name.replace(' ', '-').lower()}-{sq}" + self.guid = f"ark:{NAAN}/rocrate-{self.name.replace(' ', '-').lower()}-{sq}/" return self.guid - def createCrateFolder(self): self.path.mkdir(parents=True, exist_ok=True) - def initCrate(self): - """Create an rocrate at the current working directory, initilize the ro-crate-metadata.json - - """ - - # create basic rocrate metadata - if self.path.is_dir(): - ro_crate_metadata_path = self.path / 'ro-crate-metadata.json' - - # create guid if none exists + """Create an ROCrate and initialize ro-crate-metadata.json""" + ro_crate_metadata_path = self.path / 'ro-crate-metadata.json' self.generate_guid() - rocrate_metadata = { + # Create root dataset + root_dataset = { "@id": self.guid, - "@context": DEFAULT_CONTEXT, - "@type": "EVI:Dataset", + "@type": ["Dataset", "https://w3id.org/EVI#ROCrate"], "name": self.name, "description": self.description, "keywords": self.keywords, - "isPartOf": [], - "@graph": [] + "hasPart": [] } + # Add organization and project if specified if self.organizationName: organization_guid = f"ark:{NAAN}/organization-{self.organizationName.lower().replace(' ', '-')}-{GenerateDatetimeSquid()}" - rocrate_metadata['isPartOf'].append( - { - "@id": organization_guid, - "@type": "Organization", - "name": self.organizationName - } - ) + root_dataset['isPartOf'] = [{ + "@id": organization_guid, + }] if self.projectName: project_guid = f"ark:{NAAN}/project-{self.projectName.lower().replace(' ', '-')}-{GenerateDatetimeSquid()}" - rocrate_metadata['isPartOf'].append( - { - "@id": project_guid, - "@type": "Project", - "name": self.projectName - } - ) - - # write out to file - with ro_crate_metadata_path.open(mode="w") as metadata_file: - json.dump(rocrate_metadata, metadata_file, indent=2) - - #TODO add to cache - - #TODO list all contents that need to be registered as warnings - - - def copyObject(self, source_filepath: str, destination_filepath: str): - - if source_filepath == "": - raise Exception(message="source path is None") - - if destination_filepath == "": - raise Exception(message="destination path is None") - - # check if the source file exists - source_path = pathlib.Path(source_filepath) - destination_path = pathlib.Path(destination_filepath) - - if source_path.exists() != True: - raise Exception( - message =f"sourcePath: {source_path} Doesn't Exist" - ) - - # TODO check that destination path is in the rocrate + if 'isPartOf' not in root_dataset: + root_dataset['isPartOf'] = [] + root_dataset['isPartOf'].append({ + "@id": project_guid + }) + + # Create metadata descriptor + metadata_descriptor = { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "conformsTo": {"@id": "https://w3id.org/ro/crate/1.2-DRAFT"}, + "about": {"@id": self.guid} + } - # copy the file into the destinationPath - shutil.copy(source_path, destination_path) + # Create full RO-Crate structure + rocrate_metadata = { + "@context": DEFAULT_CONTEXT, + "@graph": [ + metadata_descriptor, + root_dataset + ] + } + # Validate the structure + ROCrateMetadata(**rocrate_metadata) + # Write to file + with ro_crate_metadata_path.open(mode="w") as metadata_file: + json.dump(rocrate_metadata, metadata_file, indent=2) def registerObject(self, model: Union[Dataset, Software, Computation]): - ''' Add a specified peice of metadata to the graph of an ROCrate - Marshals a given model into JSON-LD, opens the ro-crate-metadata.json, - appends the new metadata to the @graph, and overwrites the ro-crate-metadata.json - ''' - - metadata_path = pathlib.Path(self.path) + """Add metadata to the graph of an ROCrate""" + metadata_path = self.path / 'ro-crate-metadata.json' with metadata_path.open("r+") as rocrate_metadata_file: rocrate_metadata = json.load(rocrate_metadata_file) - # TODO assure no duplicative content + # Add to the @graph + model_data = model.model_dump(by_alias=True, exclude_none=True) + rocrate_metadata['@graph'].append(model_data) - # add to the @graph - rocrate_metadata['@graph'].append(model.model_dump(by_alias=True)) + # Add reference to root dataset's hasPart + root_dataset = rocrate_metadata['@graph'][1] # Second element after descriptor + if 'hasPart' not in root_dataset: + root_dataset['hasPart'] = [] + root_dataset['hasPart'].append({"@id": model_data["@id"]}) + + # Validate updated structure + ROCrateMetadata(**rocrate_metadata) + + # Write back to file rocrate_metadata_file.seek(0) + rocrate_metadata_file.truncate() json.dump(rocrate_metadata, rocrate_metadata_file, indent=2) + def registerDataset(self, dataset: Dataset): + self.registerObject(dataset) - def registerDataset(self, Dataset): - # TODO check for entailment - self.registerObject(model=Dataset) - + def registerSoftware(self, software: Software): + self.registerObject(software) - def registerSoftware(self, Software): - # TODO check for entailment - self.registerObject(model=Software) + def registerComputation(self, computation: Computation): + self.registerObject(computation) - - def registerComputation(self, Computation): - # TODO check for entailment - self.registerObject(model=Computation) - - - - def listContents(self): - rocrate_table = PrettyTable() - - rocrate_table.field_names= ['ro_crate', '@id', 'type', 'name'] - for metadata_element in self.graph: - rocrate_table.add_row( - [ - "*", - metadata_element.guid, - metadata_element.type, - metadata_element.name - ] - ) - - return rocrate_table - - - -def ReadROCrateMetadata( - cratePath: pathlib.Path -)-> ROCrateMetadata: - """ Given a path read the rocrate metadata into a pydantic model - """ - - # if cratePath has metadata.json inside - if "ro-crate-metadata.json" in str(cratePath) : - metadataCratePath = cratePath +def ReadROCrateMetadata(cratePath: pathlib.Path) -> Dict[str, Any]: + """Read and validate ROCrate metadata""" + if "ro-crate-metadata.json" in str(cratePath): + metadata_path = cratePath else: - metadataCratePath = cratePath / "ro-crate-metadata.json" - - with metadataCratePath.open("r") as metadataFile: - crateMetadata = json.load(metadataFile) - readCrate = ROCrateMetadata.model_validate(crateMetadata) - - return readCrate + metadata_path = cratePath / "ro-crate-metadata.json" + with metadata_path.open("r") as metadata_file: + crate_metadata = json.load(metadata_file) + # Validate the structure + ROCrateMetadata(**crate_metadata) + return crate_metadata def AppendCrate( cratePath: pathlib.Path, @@ -268,21 +281,28 @@ def AppendCrate( if cratePath.is_dir(): cratePath = cratePath / 'ro-crate-metadata.json' - if len(elements) == 0: + if not elements: return None with cratePath.open("r+") as rocrate_metadata_file: rocrate_metadata = json.load(rocrate_metadata_file) + + # Add elements to @graph and references to root dataset + root_dataset = rocrate_metadata['@graph'][1] # Second element after descriptor + if 'hasPart' not in root_dataset: + root_dataset['hasPart'] = [] - # add to the @graph - for register_elem in elements: - rocrate_metadata['@graph'].append( - register_elem.model_dump( - by_alias=True, - exclude_none=True - )) + for element in elements: + element_data = element.model_dump(by_alias=True, exclude_none=True) + rocrate_metadata['@graph'].append(element_data) + root_dataset['hasPart'].append({"@id": element_data["@id"]}) + # Validate updated structure + ROCrateMetadata(**rocrate_metadata) + + # Write back to file rocrate_metadata_file.seek(0) + rocrate_metadata_file.truncate() json.dump(rocrate_metadata, rocrate_metadata_file, indent=2) @@ -311,12 +331,7 @@ def UpdateCrate( cratePath: pathlib.Path, element: Union[Dataset, Software, Computation] ): - """Update an existing element in the RO-Crate metadata by matching @id - - Args: - cratePath: Path to the RO-Crate directory or metadata file - element: Updated element to replace existing one with matching @id - """ + """Update an existing element in the RO-Crate metadata""" if cratePath.is_dir(): cratePath = cratePath / 'ro-crate-metadata.json' @@ -324,15 +339,16 @@ def UpdateCrate( rocrate_metadata = json.load(rocrate_metadata_file) # Find and replace the element with matching @id + element_data = element.model_dump(by_alias=True, exclude_none=True) for i, existing in enumerate(rocrate_metadata['@graph']): - if existing.get('@id') == element.guid: - rocrate_metadata['@graph'][i] = element.model_dump( - by_alias=True, - exclude_none=True - ) + if existing.get('@id') == element_data['@id']: + rocrate_metadata['@graph'][i] = element_data break - # Write back the updated metadata + # Validate updated structure + ROCrateMetadata(**rocrate_metadata) + + # Write back to file rocrate_metadata_file.seek(0) rocrate_metadata_file.truncate() json.dump(rocrate_metadata, rocrate_metadata_file, indent=2) \ No newline at end of file diff --git a/src/fairscape_cli/rocrate/rocrate.py b/src/fairscape_cli/rocrate/rocrate.py index 7fb2c22..eb3ee6e 100644 --- a/src/fairscape_cli/rocrate/rocrate.py +++ b/src/fairscape_cli/rocrate/rocrate.py @@ -21,6 +21,7 @@ Software, Computation, ROCrate, + ROCrateMetadata, BagIt, # Generator functions @@ -53,66 +54,73 @@ def rocrate(): @rocrate.command('init') @click.option('--guid', required=False, type=str, default="", show_default=False) -@click.option('--name', required=True, type=str) -@click.option('--organization-name', required=True, type=str) -@click.option('--project-name', required=True, type=str) +@click.option('--name', required=True, type=str) +@click.option('--organization-name', required=True, type=str) +@click.option('--project-name', required=True, type=str) @click.option('--description', required=True, type=str) @click.option('--keywords', required=True, multiple=True, type=str) +@click.option('--license', required=False, type=str, default="https://creativecommons.org/licenses/by/4.0/") +@click.option('--date-published', required=False, type=str) def init( - guid, - name, - organization_name, - project_name, - description, - keywords + guid, + name, + organization_name, + project_name, + description, + keywords, + license, + date_published ): - """ Initalize a rocrate in the current working directory by instantiating a ro-crate-metadata.json file. - """ - - passed_crate = GenerateROCrate( - guid=guid, - name=name, - organizationName = organization_name, - projectName = project_name, - description = description, - keywords = keywords, - path = pathlib.Path.cwd(), - ) - - click.echo(passed_crate.guid) - + """ Initialize a rocrate in the current working directory by instantiating a ro-crate-metadata.json file. + """ + passed_crate = GenerateROCrate( + guid=guid, + name=name, + organizationName=organization_name, + projectName=project_name, + description=description, + keywords=keywords, + license=license, + datePublished=date_published, + path=pathlib.Path.cwd(), + ) + click.echo(passed_crate.get("@id")) @rocrate.command('create') @click.option('--guid', required=False, type=str, default="", show_default=False) @click.option('--name', required=True, type=str) -@click.option('--organization-name', required=True, type=str) -@click.option('--project-name', required=True, type=str) +@click.option('--organization-name', required=True, type=str) +@click.option('--project-name', required=True, type=str) @click.option('--description', required=True, type=str) @click.option('--keywords', required=True, multiple=True, type=str) +@click.option('--license', required=False, type=str, default="https://creativecommons.org/licenses/by/4.0/") +@click.option('--date-published', required=False, type=str) @click.argument('rocrate-path', type=click.Path(exists=False, path_type=pathlib.Path)) def create( - rocrate_path, - guid, - name, - organization_name, - project_name, - description, - keywords -): - '''Create an ROCrate in a new path specified by the rocrate-path argument - ''' - - passed_crate = GenerateROCrate( - guid=guid, - name=name, - organizationName = organization_name, - projectName = project_name, - description = description, - keywords = keywords, - path = rocrate_path - ) - - click.echo(passed_crate.guid) + rocrate_path, + guid, + name, + organization_name, + project_name, + description, + keywords, + license, + date_published +): + '''Create an ROCrate in a new path specified by the rocrate-path argument + ''' + passed_crate = GenerateROCrate( + guid=guid, + name=name, + organizationName=organization_name, + projectName=project_name, + description=description, + keywords=keywords, + license=license, + datePublished=date_published, + path=rocrate_path + ) + click.echo(passed_crate.get("@id")) @@ -373,7 +381,77 @@ def computation( click.echo(e) ctx.exit(code=1) - +@register.command('subrocrate') +@click.argument('rocrate-path', type=click.Path(exists=True, path_type=pathlib.Path)) +@click.argument('subrocrate-path', type=click.Path(path_type=pathlib.Path)) +@click.option('--guid', required=False, type=str, default="", show_default=False) +@click.option('--name', required=True, type=str) +@click.option('--organization-name', required=True, type=str) +@click.option('--project-name', required=True, type=str) +@click.option('--description', required=True, type=str) +@click.option('--keywords', required=True, multiple=True, type=str) +@click.pass_context +def subrocrate( + ctx, + rocrate_path: pathlib.Path, + subrocrate_path: pathlib.Path, + guid: str, + name: str, + organization_name: str, + project_name: str, + description: str, + keywords: List[str] +): + """Register a new RO-Crate within an existing RO-Crate directory. + + ROCRATE_PATH: Path to the parent RO-Crate + SUBCRATE_PATH: Relative path within the parent RO-Crate where the subcrate should be created + """ + try: + # Read parent crate metadata + parent_crate = ReadROCrateMetadata(rocrate_path) + + # Construct full path for subcrate + full_subcrate_path = rocrate_path / subrocrate_path + + # Create subcrate + subcrate = GenerateROCrate( + guid=guid, + name=name, + organizationName=organization_name, + projectName=project_name, + description=description, + keywords=keywords, + path=full_subcrate_path + ) + + # Update parent crate to include reference to subcrate + with (rocrate_path / 'ro-crate-metadata.json').open('r+') as f: + parent_metadata = json.load(f) + + root_dataset = parent_metadata['@graph'][1] + + if 'hasPart' not in root_dataset: + root_dataset['hasPart'] = [] + + subcrate_ref = { + "@id": subcrate['@id'] + } + + if not any(part.get('@id') == subcrate['@id'] for part in root_dataset['hasPart']): + root_dataset['hasPart'].append(subcrate_ref) + + # Validate and write updated parent metadata + ROCrateMetadata(**parent_metadata) + f.seek(0) + f.truncate() + json.dump(parent_metadata, f, indent=2) + + click.echo(subcrate['@id']) + + except Exception as exc: + click.echo(f"ERROR: {str(exc)}") + ctx.exit(code=1) # RO Crate add subcommands @rocrate.group('add') diff --git a/tests/test_rocrate_api.py b/tests/test_rocrate_api.py index 2aec4d5..cfda2b9 100644 --- a/tests/test_rocrate_api.py +++ b/tests/test_rocrate_api.py @@ -132,7 +132,7 @@ def test_api(self): # Verify crate metadata rocrateMetadataRecord = ReadROCrateMetadata(self.rocratePath) - rocrateGUIDs = [elem.guid for elem in rocrateMetadataRecord.metadataGraph] + rocrateGUIDs = [elem["@id"] for elem in rocrateMetadataRecord["@graph"]] # Verify all dataset GUIDs are present for ds in datasetList: @@ -155,7 +155,7 @@ def test_api(self): # Final verification rocrateMetadataRecord = ReadROCrateMetadata(self.rocratePath) - rocrateGUIDs = [elem.guid for elem in rocrateMetadataRecord.metadataGraph] + rocrateGUIDs = [elem["@id"] for elem in rocrateMetadataRecord["@graph"]] self.assertIn(computation.guid, rocrateGUIDs, "Computation GUID not found in metadata") self.assertIn(software.guid, rocrateGUIDs, "Software GUID not found in metadata") diff --git a/tests/test_rocrate_commands.py b/tests/test_rocrate_commands.py new file mode 100644 index 0000000..e365042 --- /dev/null +++ b/tests/test_rocrate_commands.py @@ -0,0 +1,163 @@ +import unittest +import pathlib +import shutil +import subprocess +import json +import os + +class TestCLICommands(unittest.TestCase): + def setUp(self): + self.test_dir = pathlib.Path.cwd() / 'tests' / 'data' / 'test_cli' + self.test_dir.mkdir(parents=True, exist_ok=True) + + # Change to test directory + os.chdir(self.test_dir) + # Create files relative to test directory + pathlib.Path('input_data.csv').touch() + pathlib.Path('subcrate').mkdir(exist_ok=True) + pathlib.Path('subcrate/subcrate_data.csv').touch() + pathlib.Path('subcrate/software.py').touch() + + # def tearDown(self): + # if self.test_dir.exists(): + # shutil.rmtree(self.test_dir) + + def test_cli_workflow(self): + # Create top-level crate + top_crate_result = subprocess.run([ + 'fairscape-cli', 'rocrate', 'create', + '--name', 'Top Level Crate', + '--organization-name', 'Test Org', + '--project-name', 'Test Project', + '--description', 'Top level test crate', + '--keywords', 'test,top-level', + '.' + ], capture_output=True, text=True) + print(f"Top crate output: {top_crate_result.stdout}") + print(f"Top crate error: {top_crate_result.stderr}") + self.assertEqual(top_crate_result.returncode, 0) + top_crate_id = top_crate_result.stdout.strip() + + # Create subcrate + subcrate_result = subprocess.run([ + 'fairscape-cli', 'rocrate', 'register', 'subrocrate', + '.', 'subcrate', + '--name', 'Sub Crate', + '--organization-name', 'Test Org', + '--project-name', 'Test Project', + '--description', 'Test subcrate', + '--keywords', 'test,subcrate' + ], capture_output=True, text=True) + print(f"Subcrate output: {subcrate_result.stdout}") + print(f"Subcrate error: {subcrate_result.stderr}") + self.assertEqual(subcrate_result.returncode, 0) + subcrate_id = subcrate_result.stdout.strip() + + # Register top-level dataset + top_dataset_result = subprocess.run([ + 'fairscape-cli', 'rocrate', 'register', 'dataset', + '.', + '--name', 'Top Level Data', + '--author', 'Test Author', + '--version', '1.0', + '--date-published', '2025-01-22', + '--description', 'Top level test data', + '--keywords', 'test,data', + '--data-format', 'csv', + '--filepath', 'input_data.csv' + ], capture_output=True, text=True) + print(f"Top dataset output: {top_dataset_result.stdout}") + print(f"Top dataset error: {top_dataset_result.stderr}") + self.assertEqual(top_dataset_result.returncode, 0) + top_dataset_id = top_dataset_result.stdout.strip() + + # Register subcrate dataset + subcrate_dataset_result = subprocess.run([ + 'fairscape-cli', 'rocrate', 'register', 'dataset', + str(self.test_dir / 'subcrate'), + '--name', 'Subcrate Data', + '--author', 'Test Author', + '--version', '1.0', + '--date-published', '2025-01-22', + '--description', 'Subcrate test data', + '--keywords', 'test,data', + '--data-format', 'csv', + '--filepath', 'subcrate/subcrate_data.csv' + ], capture_output=True, text=True) + print(f"Subcrate dataset output: {subcrate_dataset_result.stdout}") + print(f"Subcrate dataset error: {subcrate_dataset_result.stderr}") + self.assertEqual(subcrate_dataset_result.returncode, 0) + subcrate_dataset_id = subcrate_dataset_result.stdout.strip() + + # Register software in subcrate + software_result = subprocess.run([ + 'fairscape-cli', 'rocrate', 'register', 'software', + str(self.test_dir / 'subcrate'), + '--name', 'Test Software', + '--author', 'Test Author', + '--version', '1.0', + '--description', 'Test analysis software', + '--keywords', 'test,software', + '--file-format', 'py', + '--filepath', 'subcrate/software.py', + '--date-modified', '2025-01-22' + ], capture_output=True, text=True) + print(f"Software output: {software_result.stdout}") + print(f"Software error: {software_result.stderr}") + self.assertEqual(software_result.returncode, 0) + software_id = software_result.stdout.strip() + + # Register computation in subcrate + computation_result = subprocess.run([ + 'fairscape-cli', 'rocrate', 'register', 'computation', + str(self.test_dir / 'subcrate'), + '--name', 'Test Computation', + '--run-by', 'Test Author', + '--date-created', '2025-01-22', + '--description', 'Test computation', + '--keywords', 'test,computation', + '--used-software', software_id, + '--used-dataset', subcrate_dataset_id, + '--command', 'python software.py subcrate_data.csv' + ], capture_output=True, text=True) + print(f"Computation output: {computation_result.stdout}") + print(f"Computation error: {computation_result.stderr}") + self.assertEqual(computation_result.returncode, 0) + computation_id = computation_result.stdout.strip() + + # Verify crate structure + with open(self.test_dir / 'ro-crate-metadata.json') as f: + top_metadata = json.load(f) + with open(self.test_dir / 'subcrate' / 'ro-crate-metadata.json') as f: + sub_metadata = json.load(f) + + # Verify top-level crate structure + top_root_id = next(item['about']['@id'] for item in top_metadata['@graph'] + if item['@id'] == 'ro-crate-metadata.json') + top_root = next(item for item in top_metadata['@graph'] + if item['@id'] == top_root_id) + + # Verify top-level relationships + self.assertIn(subcrate_id, [part['@id'] for part in top_root['hasPart']]) + self.assertIn(top_dataset_id, [part['@id'] for part in top_root['hasPart']]) + + # Verify subcrate structure + sub_root_id = next(item['about']['@id'] for item in sub_metadata['@graph'] + if item['@id'] == 'ro-crate-metadata.json') + sub_root = next(item for item in sub_metadata['@graph'] + if item['@id'] == sub_root_id) + + # Verify subcrate relationships + sub_parts = [part['@id'] for part in sub_root['hasPart']] + self.assertIn(subcrate_dataset_id, sub_parts) + self.assertIn(software_id, sub_parts) + self.assertIn(computation_id, sub_parts) + + # Verify computation relationships + computation = next(item for item in sub_metadata['@graph'] + if item['@id'] == computation_id) + self.assertIn(software_id, computation['usedSoftware']) + self.assertIn(subcrate_dataset_id, computation['usedDataset']) + +if __name__ == '__main__': + unittest.main() \ No newline at end of file