fairscape · jniestroy · Jan 16, 2026 · Jan 8, 2026 · Jan 8, 2026 · Jan 15, 2026
diff --git a/src/fairscape_cli/commands/build_commands.py b/src/fairscape_cli/commands/build_commands.py
@@ -19,7 +19,8 @@
 from fairscape_cli.models import (
     GenerateROCrate,
     LinkSubcrates,
-    collect_subcrate_metadata
+    collect_subcrate_metadata,
+    collect_subcrate_aggregated_metrics
 )
 
 from fairscape_models.rocrate import ROCrateV1_2
@@ -182,6 +183,9 @@ def build_release(
         if keyword not in combined_keywords:
             combined_keywords.append(keyword)
 
+    # Collect aggregated metrics for AI-Ready scoring
+    aggregated_metrics = collect_subcrate_aggregated_metrics(release_directory)
+
     parent_params = {
         "guid": guid,
         "name": name,
@@ -285,6 +289,17 @@ def build_release(
              click.echo(f"ERROR: {e}")
              ctx.exit(1)
 
+    # Add aggregated metrics as individual properties (following evi: prefix pattern)
+    parent_params["evi:datasetCount"] = aggregated_metrics.dataset_count
+    parent_params["evi:computationCount"] = aggregated_metrics.computation_count
+    parent_params["evi:softwareCount"] = aggregated_metrics.software_count
+    parent_params["evi:schemaCount"] = aggregated_metrics.schema_count
+    parent_params["evi:totalContentSizeBytes"] = aggregated_metrics.total_content_size_bytes
+    parent_params["evi:entitiesWithSummaryStats"] = aggregated_metrics.entities_with_summary_stats
+    parent_params["evi:entitiesWithChecksums"] = aggregated_metrics.entities_with_checksums
+    parent_params["evi:totalEntities"] = aggregated_metrics.total_entities
+    parent_params["evi:formats"] = sorted(list(aggregated_metrics.formats))
+
     try:
         click.echo("\n=== Creating release RO-Crate ===")
         parent_crate_root_dict = GenerateROCrate(**parent_params)

diff --git a/src/fairscape_cli/commands/import_commands.py b/src/fairscape_cli/commands/import_commands.py
@@ -325,7 +325,7 @@ def import_figshare(
 @import_group.command('dataverse')
 @click.argument('dataset-doi', type=str)
 @click.option('--server-url', default='https://dataverse.harvard.edu', show_default=True, help='Dataverse server URL.')
-@click.option('--token', required=False, type=str, help='Dataverse API token (optional, for restricted datasets).')
+@click.option('--token', required=False, type=str, help='Dataverse API token.')
 @generic_importer_options
 @click.pass_context
 def import_dataverse(

diff --git a/src/fairscape_cli/commands/rocrate_commands.py b/src/fairscape_cli/commands/rocrate_commands.py
@@ -956,7 +956,7 @@ def registerModel(
         "keywords": list(keywords),
         "modelType": model_type,
         "framework": framework,
-        "modelFormat": model_format,
+        "format": model_format,
         "trainingDataset": list(training_dataset),
         "generatedBy": generated_by,
         "filepath": filepath,
@@ -1099,7 +1099,7 @@ def registerHuggingFaceModel(
         "keywords": list(keywords) if keywords else hf_metadata.get('keywords', []),
         "modelType": model_type or hf_metadata.get('model_type'),
         "framework": framework or hf_metadata.get('framework'),
-        "modelFormat": model_format or hf_metadata.get('model_format'),
+        "format": model_format or hf_metadata.get('model_format'),
         "trainingDataset": list(training_dataset) if training_dataset else hf_metadata.get('training_datasets', []),
         "filepath": hf_metadata.get('download_url'),  
         "url": hf_metadata.get('landing_page_url'),   

diff --git a/src/fairscape_cli/data_fetcher/generic_data/research_data.py b/src/fairscape_cli/data_fetcher/generic_data/research_data.py
@@ -80,8 +80,7 @@ def to_rocrate(self, output_dir: str, **kwargs) -> str:
                 "version": file_info.get("version", "1.0"), 
                 "associatedPublication": self.doi or None,
                 "additionalDocumentation": None, 
-                "format": file_format,
-                "schema": "", 
+                "format": file_format, 
                 "derivedFrom": [],
                 "usedBy": [],
                 "generatedBy": [],

diff --git a/src/fairscape_cli/models/__init__.py b/src/fairscape_cli/models/__init__.py
@@ -8,14 +8,16 @@
 
 from fairscape_cli.models.computation import Computation, GenerateComputation
 from fairscape_cli.models.rocrate import (
-        ROCrate, 
+        ROCrate,
         GenerateROCrate,
-        ReadROCrateMetadata, 
-        AppendCrate, 
+        ReadROCrateMetadata,
+        AppendCrate,
         CopyToROCrate,
         UpdateCrate,
         LinkSubcrates,
-        collect_subcrate_metadata
+        collect_subcrate_metadata,
+        collect_subcrate_aggregated_metrics,
+        AggregatedMetrics
 )
 from fairscape_cli.models.bagit import BagIt
 from fairscape_cli.models.pep import PEPtoROCrateMapper
@@ -39,5 +41,7 @@
     'BagIt',
     'PEPtoROCrateMapper',
     'LinkSubcrates',
-    'collect_subcrate_metadata'
+    'collect_subcrate_metadata',
+    'collect_subcrate_aggregated_metrics',
+    'AggregatedMetrics'
 ]
diff --git a/src/fairscape_cli/models/biochem_entity.py b/src/fairscape_cli/models/biochem_entity.py
@@ -40,7 +40,6 @@ def GenerateBioChemEntity(
 	entityMetadata = {
 			"@id": guid,
 			"name": name,
-			"@type": "https://schema.org/BioChemEntity",
 			"description": description
 	}
 

diff --git a/src/fairscape_cli/models/computation.py b/src/fairscape_cli/models/computation.py
@@ -34,7 +34,6 @@ def GenerateComputation(
     computationMetadata = {
         "@id": guid,
         "name": name,
-        "@type": "https://w3id.org/EVI#Computation"
     }
 
     for key, value in kwargs.items():

diff --git a/src/fairscape_cli/models/dataset.py b/src/fairscape_cli/models/dataset.py
@@ -41,8 +41,7 @@ def GenerateDataset(
 
     datasetMetadata = {
         "@id": guid,
-        "name": name,
-        "@type": "https://w3id.org/EVI#Dataset"
+        "name": name
     }
 
     content_url = None

diff --git a/src/fairscape_cli/models/experiment.py b/src/fairscape_cli/models/experiment.py
@@ -34,8 +34,7 @@ def GenerateExperiment(
 
     experimentMetadata = {
         "@id": guid,
-        "name": name,
-        "@type": "https://w3id.org/EVI#Experiment"
+        "name": name
     }
 
     for key, value in kwargs.items():

diff --git a/src/fairscape_cli/models/instrument.py b/src/fairscape_cli/models/instrument.py
@@ -40,7 +40,6 @@ def GenerateInstrument(
     instrumentMetadata = {
         "@id": guid,
         "name": name,
-        "@type": "https://w3id.org/EVI#Instrument"
     }
 
     if filepath and cratePath:

diff --git a/src/fairscape_cli/models/rocrate.py b/src/fairscape_cli/models/rocrate.py
@@ -2,12 +2,13 @@
 import shutil
 import json
 from datetime import datetime
-from typing import Optional, Union, List, Literal, Dict, Any
+from typing import Optional, Union, List, Literal, Dict, Any, Set
+from dataclasses import dataclass, field
 from pydantic import BaseModel, Field, ConfigDict, model_validator
 import uuid 
 import mongomock
 
-from fairscape_cli.config import NAAN, DEFAULT_CONTEXT
+from fairscape_models import DEFAULT_CONTEXT, DEFAULT_ARK_NAAN as NAAN
 from fairscape_cli.models.software import Software
 from fairscape_cli.models.dataset import Dataset
 from fairscape_cli.models.computation import Computation
@@ -530,6 +531,7 @@ def find_and_process_subcrates(directory: pathlib.Path, base_path: pathlib.Path)
         print("No valid sub-crates found to link.")
 
     return linked_sub_crate_ids
+
 def collect_subcrate_metadata(parent_crate_path: pathlib.Path) -> dict:
     """
     Collects author and keyword metadata from all subcrates in the parent crate.
@@ -584,7 +586,204 @@ def process_directory(directory):
         'authors': sorted(list(authors)),
         'keywords': sorted(list(keywords))
     }
-
+
+
+@dataclass
+class AggregatedMetrics:
+    """
+    Aggregated metrics from all sub-crates for AI-Ready scoring.
+
+    This class accumulates entity counts, statistics, checksums, formats,
+    and schema references from all sub-crates in a release to enable
+    efficient AI-Ready score calculation without recursive file reads.
+    """
+
+    # Entity counts (for provenance scoring)
+    dataset_count: int = 0
+    computation_count: int = 0
+    software_count: int = 0
+    schema_count: int = 0
+
+    # Statistics (for characterization scoring)
+    total_content_size_bytes: int = 0
+    entities_with_summary_stats: int = 0
+
+    # Verification (for pre-model explainability)
+    entities_with_checksums: int = 0
+    total_entities: int = 0
+
+    # Computability
+    formats: Set[str] = field(default_factory=set)
+
+    # Standards
+    schemas: List[Dict[str, str]] = field(default_factory=list)
+
+
+def _extract_content_size_bytes(size_str: str) -> int:
+    """
+    Extract content size in bytes from a size string.
+
+    Args:
+        size_str: Size string like "125.5 GB" or "1.2 TB"
+
+    Returns:
+        Size in bytes as integer, or 0 if parsing fails
+    """
+    if not size_str or not isinstance(size_str, str):
+        return 0
+
+    try:
+        size_str = size_str.strip().upper()
+        if "TB" in size_str:
+            return int(float(size_str.replace("TB", "").strip()) * 1e12)
+        elif "GB" in size_str:
+            return int(float(size_str.replace("GB", "").strip()) * 1e9)
+        elif "MB" in size_str:
+            return int(float(size_str.replace("MB", "").strip()) * 1e6)
+        elif "KB" in size_str:
+            return int(float(size_str.replace("KB", "").strip()) * 1e3)
+        else:
+            # Assume bytes if no unit
+            return int(float(size_str))
+    except (ValueError, AttributeError):
+        return 0
+
+
+def _extract_checksum(entity: Dict[str, Any]) -> Optional[str]:
+    """
+    Extract checksum from an entity.
+
+    Args:
+        entity: Entity dictionary from RO-Crate @graph
+
+    Returns:
+        Checksum string (e.g., "md5:abc123...") or None
+    """
+    # Check common checksum fields
+    md5 = entity.get("md5") or entity.get("MD5")
+    if md5:
+        if md5.startswith("md5:"):
+            return md5
+        else:
+            return f"md5:{md5}"
+
+    sha256 = entity.get("sha256") or entity.get("SHA256")
+    if sha256:
+        if sha256.startswith("sha256:"):
+            return sha256
+        else:
+            return f"sha256:{sha256}"
+
+    return None
+
+
+def _get_entity_type(entity: Dict[str, Any]) -> str:
+    """
+    Get type from entity's @type or metadataType field.
+
+    Args:
+        entity: Entity dictionary from RO-Crate @graph
+
+    Returns:
+        Type string (last item if list), or empty string
+    """
+    type_val = entity.get("@type") or entity.get("metadataType") or []
+    if isinstance(type_val, str):
+        return type_val
+    elif isinstance(type_val, list) and type_val:
+        return type_val[-1]
+    return ""
+
+
+def collect_subcrate_aggregated_metrics(
+    parent_crate_path: pathlib.Path
+) -> AggregatedMetrics:
+    """
+    Collect aggregated metrics from all subcrates for AI-Ready scoring.
+
+    This function traverses all sub-crates in a release directory and
+    aggregates entity counts, statistics, checksums, formats, and schemas.
+    These aggregated metrics are added to the release-level RO-Crate to
+    enable efficient AI-Ready score calculation without requiring recursive
+    file system reads during scoring.
+
+    Args:
+        parent_crate_path: Path to the release directory containing sub-crates
+
+    Returns:
+        AggregatedMetrics object with all roll-up properties
+    """
+    parent_crate_path = pathlib.Path(parent_crate_path)
+    metrics = AggregatedMetrics()
+    processed_files = set()
+
+    def process_directory(directory: pathlib.Path):
+        """Recursively process directories to find and aggregate subcrate metadata."""
+        for path in directory.glob('**/ro-crate-metadata.json'):
+            if path.is_file() and str(path) not in processed_files:
+                processed_files.add(str(path))
+
+                subcrate_metadata = ReadROCrateMetadata(path)
+                graph = subcrate_metadata.get('@graph', [])
+
+                for entity in graph:
+                    # Convert pydantic to dict
+                    if hasattr(entity, 'model_dump'):
+                        entity = entity.model_dump(by_alias=True)
+
+                    if entity.get('@id') == 'ro-crate-metadata.json':
+                        continue
+
+                    entity_type = _get_entity_type(entity)
+
+                    if "Dataset" in entity_type:
+                        metrics.dataset_count += 1
+                        metrics.total_entities += 1
+
+                    elif "Computation" in entity_type or "Experiment" in entity_type:
+                        metrics.computation_count += 1
+                        metrics.total_entities += 1
+
+                    elif "Software" in entity_type:
+                        metrics.software_count += 1
+                        metrics.total_entities += 1
+
+                    elif "Schema" in entity_type:
+                        metrics.schema_count += 1
+                        schema_id = entity.get('@id')
+                        if schema_id:
+                            metrics.schemas.append({"@id": schema_id})
+
+                    content_size = entity.get("contentSize")
+                    if content_size:
+                        size_bytes = _extract_content_size_bytes(content_size)
+                        if size_bytes > 0:
+                            metrics.total_content_size_bytes += size_bytes
+
+                    if entity.get("hasSummaryStatistics"):
+                        metrics.entities_with_summary_stats += 1
+
+                    checksum = _extract_checksum(entity)
+                    if checksum:
+                        metrics.entities_with_checksums += 1
+
+                    format_val = entity.get("format") or entity.get("encodingFormat")
+                    if format_val:
+                        if isinstance(format_val, str):
+                            metrics.formats.add(format_val)
+                        elif isinstance(format_val, list):
+                            for fmt in format_val:
+                                if isinstance(fmt, str):
+                                    metrics.formats.add(fmt)
+
+
+    for dir_item in parent_crate_path.iterdir():
+        if dir_item.is_dir():
+            process_directory(dir_item)
+
+    return metrics
+
+
 ################################
 #
 # Mongomock update tests

diff --git a/src/fairscape_cli/models/sample.py b/src/fairscape_cli/models/sample.py
@@ -39,8 +39,7 @@ def GenerateSample(
 
     sampleMetadata = {
         "@id": guid,
-        "name": name,
-        "@type": "https://w3id.org/EVI#Sample"
+        "name": name
     }
 
     if filepath and cratePath:

diff --git a/src/fairscape_cli/models/software.py b/src/fairscape_cli/models/software.py
@@ -40,8 +40,7 @@ def GenerateSoftware(
 
     softwareMetadata = {
         "@id": guid,
-        "name" : name,
-        "@type": "https://w3id.org/EVI#Software"
+        "name" : name
     }
 
     content_url = None