diff --git a/pyproject.toml b/pyproject.toml index acfc280..34af193 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -144,6 +144,7 @@ ignore = [ "tests/**/*.py" = ["S101", "T201"] # use of assert "**/__init__.py" = ["D104"] + [tool.ruff.lint.mccabe] # Flag errors (`C901`) whenever the complexity level exceeds 15. max-complexity = 15 diff --git a/src/cdm_data_loader_utils/parsers/gene_association_file.py b/src/cdm_data_loader_utils/parsers/gene_association_file.py index 548de56..cd81647 100644 --- a/src/cdm_data_loader_utils/parsers/gene_association_file.py +++ b/src/cdm_data_loader_utils/parsers/gene_association_file.py @@ -273,7 +273,7 @@ def run( if register: register_table(spark, output_path, table_name=table_name, permanent=permanent) - except Exception as e: + except Exception: logger.exception("Pipeline failed") sys.exit(1) finally: diff --git a/src/cdm_data_loader_utils/parsers/shared_identifiers.py b/src/cdm_data_loader_utils/parsers/shared_identifiers.py new file mode 100644 index 0000000..33b865f --- /dev/null +++ b/src/cdm_data_loader_utils/parsers/shared_identifiers.py @@ -0,0 +1,11 @@ +from cdm_data_loader_utils.parsers.xml_utils import get_text + + +def parse_identifiers_generic(entry, xpath, prefix, ns): + result = [] + for node in entry.findall(xpath, ns): + text = get_text(node) + if not text: + continue + result.append({"identifier": f"{prefix}:{text}", "source": prefix, "description": f"{prefix} accession"}) + return result diff --git a/src/cdm_data_loader_utils/parsers/uniprot.py b/src/cdm_data_loader_utils/parsers/uniprot.py index fa6d6a4..ca4516e 100644 --- a/src/cdm_data_loader_utils/parsers/uniprot.py +++ b/src/cdm_data_loader_utils/parsers/uniprot.py @@ -1,28 +1,37 @@ """ -UniProt XML Delta Lake Ingestion Pipeline. +UniProt XML Delta Lake Ingestion Pipeline ========================================= This script parses UniProt XML (.xml.gz) file and ingests the data into structured Delta Lake tables. Typical usage: -------------- +Use it in Berdle as: python3 src/parsers/uniprot.py \ --xml-url "https://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/taxonomic_divisions/uniprot_sprot_archaea.xml.gz" \ --output-dir "./output" \ --namespace "uniprot_db" \ --batch-size 5000 + +python -m cdm_data_loader_utils.parsers.uniprot \ + --xml-url "https://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/taxonomic_divisions/uniprot_sprot_archaea.xml.gz" \ + --output-dir "tests/data/uniprot_archaea" \ + --namespace "uniprot_db" \ + --batch-size 5000 + + Arguments: ---------- ---xml-url: URL to the UniProt XML .gz file +--xml-url: URL to the UniProt XML .gz file --output-dir: Output directory for Delta tables and logs (default: './output') --namespace: Delta Lake database name (default: 'uniprot_db') ---target-date: Process entries modified/updated since specific date +--target-date: Process entries modified/updated since specific date --batch-size: Number of UniProt entries to process per write batch (default: 5000) Functionality: -------------- -- Downloads the XML file if not present locally +- Downloads the XML file if not present locally - Parses UniProt entries in a memory-efficient streaming fashion - Maps parsed data into standardized CDM tables - Writes all tables as Delta Lake tables, supporting incremental import @@ -38,6 +47,7 @@ import datetime import gzip import json +import logging import os import uuid import xml.etree.ElementTree as ET @@ -46,49 +56,61 @@ import requests from delta import configure_spark_with_delta_pip from pyspark.sql import SparkSession +from pyspark.sql.functions import col, split from pyspark.sql.types import ArrayType, StringType, StructField, StructType -## XML namespace mapping for UniProt entries (used for all XPath queries) -NS = {"u": "https://uniprot.org/uniprot"} +from cdm_data_loader_utils.parsers.shared_identifiers import parse_identifiers_generic +from cdm_data_loader_utils.parsers.xml_utils import clean_dict, find_all_text, get_attr, get_text, parse_db_references +# --------------------------------------------------------------------- +# Logging +# --------------------------------------------------------------------- +logger = logging.getLogger(__name__) +logging.basicConfig( + level=logging.INFO, + format="%(asctime)s | %(levelname)s | %(name)s | %(message)s", +) -def load_existing_identifiers(spark, output_dir, namespace): - """ - Load the existing 'identifiers' Delta table and build a mapping from UniProt accession to CDM entity ID. - This function enables consistent mapping of accessions to CDM IDs across multiple imports, supporting upsert and idempotent workflows. - Returns: - dict: {accession: entity_id} - """ - access_to_cdm_id = {} - id_path = os.path.abspath(os.path.join(output_dir, f"{namespace}_identifiers_delta")) - if os.path.exists(id_path): - try: - # Read identifier and entity_id columns from the Delta table - df = spark.read.format("delta").load(id_path).select("identifier", "entity_id") - for row in df.collect(): - # Identifier field: UniProt:Pxxxxx, extract the actual accession part after the colon - accession = row["identifier"].split(":", 1)[1] - access_to_cdm_id[accession] = row["entity_id"] - except Exception as e: - print(f"Couldn't load identifiers table: {e}") - else: - print(f"No previous identifiers delta at {id_path}.") - return access_to_cdm_id +# --------------------------------------------------------------------- +# XML namespace mapping for UniProt entries (used for all XPath queries) +# --------------------------------------------------------------------- +NS = {"ns": "https://uniprot.org/uniprot"} -def generate_cdm_id() -> str: - """ - Generate a CDM entity_id directly from UniProt accession, using 'CDM:' prefix - Ensures that each accession is mapped to stable and unique CDM entity ID, making it easy to join across different tables by accession. - """ - return f"CDM:{uuid.uuid4()}" +# --------------------------------------------------------------------- +# Stable ID namespace (UUIDv5) +# --------------------------------------------------------------------- +CDM_UUID_NAMESPACE = uuid.UUID("2d3f6e2a-4d7b-4a8c-9c5a-0e0f7b7d9b3a") -def build_datasource_record(xml_url): - """ - Build a provenance record for the UniProt datasource without version extraction. - """ +# --------------------------------------------------------------------- +# CURIE prefixes +# --------------------------------------------------------------------- +PREFIX_TRANSLATION: dict[str, str] = { + "UniProtKB": "UniProt", + "UniProtKB/Swiss-Prot": "UniProt", + "UniProtKB/TrEMBL": "UniProt", + "UniParc": "UniParc", + "RefSeq": "RefSeq", + "EMBL": "EMBL", + "PDB": "PDB", + "ChEBI": "ChEBI", + "Rhea": "Rhea", + "NCBI Taxonomy": "NCBITaxon", + "GeneID": "NCBIGene", + "Ensembl": "Ensembl", + "GO": "GO", +} + + +# ================================ HELPERS ================================= +def delta_table_path(output_dir: str, namespace: str, table: str) -> str: + return os.path.abspath(os.path.join(output_dir, namespace, table)) + + +def build_datasource_record(xml_url: str) -> dict: + """Build a provenance record for the UniProt datasource.""" return { "name": "UniProt import", "source": "UniProt", @@ -98,162 +120,323 @@ def build_datasource_record(xml_url): } -def parse_identifiers(entry, cdm_id): - """ - Extract all accession numbers in the UniProt entry and format them into a CDM identifier structure. - """ - return [ - { - "entity_id": cdm_id, - "identifier": f"UniProt:{acc.text}", - "source": "UniProt", - "description": "UniProt accession", - } - for acc in entry.findall("u:accession", NS) - ] +def save_datasource_record(xml_url: str, output_dir: str) -> dict: + """Generate and save the datasource provenance record as a JSON file.""" + datasource = build_datasource_record(xml_url) + + os.makedirs(output_dir, exist_ok=True) + output_path = os.path.join(output_dir, "datasource.json") + + with open(output_path, "w", encoding="utf-8") as f: + json.dump(datasource, f, indent=2) + + logger.info("Saved datasource record to %s", output_path) + return datasource + + +def download_file( + url: str, + output_path: str, + chunk_size: int = 1024 * 1024, + overwrite: bool = False, +) -> None: + """Download URL -> output_path (streaming)""" + if os.path.exists(output_path) and not overwrite: + logger.info("File already exists, skip download: %s", output_path) + return + + tmp_path = output_path + ".part" + if os.path.exists(tmp_path): + try: + os.remove(tmp_path) + except Exception: + pass + + try: + logger.info("Downloading %s -> %s", url, output_path) + with requests.get(url, stream=True, timeout=120) as r: + r.raise_for_status() + with open(tmp_path, "wb") as f: + for chunk in r.iter_content(chunk_size=chunk_size): + if chunk: + f.write(chunk) + os.replace(tmp_path, output_path) + logger.info("Download complete: %s", output_path) + except Exception: + logger.exception("Failed to download %s", url) + try: + if os.path.exists(tmp_path): + os.remove(tmp_path) + except Exception: + logger.exception("Failed to remove partial download: %s", tmp_path) + raise + + +def prepare_local_xml(xml_url: str, output_dir: str, overwrite: bool = False) -> str: + os.makedirs(output_dir, exist_ok=True) + local_path = os.path.join(output_dir, os.path.basename(xml_url)) + download_file(xml_url, local_path, overwrite=overwrite) + return local_path + + +def stream_uniprot_xml(filepath: str): + """Stream gzipped UniProt XML entries.""" + logger.info("Streaming UniProt XML from: %s", filepath) + with gzip.open(filepath, "rb") as f: + for _, elem in ET.iterparse(f, events=("end",)): + if elem.tag.endswith("entry"): + yield elem + elem.clear() + + +def get_spark_session(namespace: str) -> SparkSession: + """Initialize SparkSession with Delta Lake support, and ensure the target database exists.""" + builder = ( + SparkSession.builder.appName("UniProtDeltaIngestion") + .config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension") + .config( + "spark.sql.catalog.spark_catalog", + "org.apache.spark.sql.delta.catalog.DeltaCatalog", + ) + .config("spark.databricks.delta.schema.autoMerge.enabled", "true") + ) + spark = configure_spark_with_delta_pip(builder).getOrCreate() + spark.sql(f"CREATE DATABASE IF NOT EXISTS {namespace}") + return spark + + +def normalize_prefix(db_type: str) -> str: + """Map UniProt dbReference @type to a normalized CURIE prefix.""" + return PREFIX_TRANSLATION.get(db_type, db_type.replace(" ", "")) + +def make_curie(db_type: str, db_id: str) -> str: + """Create CURIE with normalized prefix.""" + return f"{normalize_prefix(db_type)}:{db_id}" -def parse_names(entry, cdm_id): + +# ================================ STABLE ID ================================= +def stable_cdm_id_from_uniprot_accession(accession: str, prefix: str = "cdm_prot_") -> str: + u = uuid.uuid5(CDM_UUID_NAMESPACE, f"UniProt:{accession}") + return f"{prefix}{u}" + + +def load_existing_maps( + spark: SparkSession, + output_dir: str, + namespace: str, +) -> tuple[dict[str, str], dict[str, str]]: """ - Extract all protein names from a UniProt element, including - - Top-level elements (generic names) - - and blocks within (full and short names). + Returns: + accession_to_entity_id: accession -> entity_id (from identifiers) + entity_id_to_created: entity_id -> created (from entities) """ - names = [] + accession_to_entity_id: dict[str, str] = {} + entity_id_to_created: dict[str, str] = {} - # Extract all top-level tags - for name_element in entry.findall("u:name", NS): - if name_element.text: - names.append( - { - "entity_id": cdm_id, - "name": name_element.text, - "description": "UniProt protein name", - "source": "UniProt", - } + id_path = os.path.join(output_dir, namespace, "identifiers") + ent_path = os.path.join(output_dir, namespace, "entities") + + if os.path.exists(id_path): + try: + df = ( + spark.read.format("delta") + .load(id_path) + .filter(col("identifier").startswith("UniProt:")) + .select( + split(col("identifier"), ":").getItem(1).alias("accession"), + col("entity_id"), + ) ) + for row in df.toLocalIterator(): + acc = row["accession"] + eid = row["entity_id"] + if acc and eid: + accession_to_entity_id[acc] = eid + logger.info( + "Loaded %d accession->entity_id from %s", + len(accession_to_entity_id), + id_path, + ) + except Exception: + logger.exception("Couldn't load identifiers from %s", id_path) - # Extract recommended and alternative names from block - protein = entry.find("u:protein", NS) - if protein is not None: - for name_type in ["recommended", "alternative"]: - # Directly use findall for simplicity (recommendedName returns single-element list) - name_blocks = protein.findall(f"u:{name_type}Name", NS) - for name in name_blocks: - for name_length in ["full", "short"]: - name_string = name.find(f"u:{name_length}Name", NS) - if name_string is None or not name_string.text: - continue + if os.path.exists(ent_path): + try: + df = spark.read.format("delta").load(ent_path).select("entity_id", "created") + for row in df.toLocalIterator(): + if row["entity_id"] and row["created"]: + entity_id_to_created[row["entity_id"]] = row["created"] + logger.info( + "Loaded %d entity_id->created from %s", + len(entity_id_to_created), + ent_path, + ) + except Exception: + logger.exception("Couldn't load entities from %s", ent_path) + + return accession_to_entity_id, entity_id_to_created + + +# ================================ PARSERS ================================= +def parse_identifiers(entry, cdm_id: str) -> list[dict]: + out = parse_identifiers_generic(entry=entry, xpath="ns:accession", prefix="UniProt", ns=NS) + for row in out: + row["entity_id"] = cdm_id + row.setdefault("source", "UniProt") + row.setdefault("description", "UniProt accession") + return out - names.append( - { - "entity_id": cdm_id, - "name": name_string.text, - "description": f"UniProt {name_type} {name_length} name", - "source": "UniProt", - } - ) + +def _make_name_record(cdm_id: str, name_text: str, description: str) -> dict: + return { + "entity_id": cdm_id, + "name": name_text, + "description": description, + "source": "UniProt", + } + + +def parse_names(entry, cdm_id: str) -> list[dict]: + names: list[dict] = [] + + for txt in find_all_text(entry, "ns:name", NS): + names.append(_make_name_record(cdm_id, txt, "UniProt entry name")) + + protein = entry.find("ns:protein", NS) + if protein is not None: + for tag_name, logical_type in [ + ("recommendedName", "recommended"), + ("alternativeName", "alternative"), + ]: + for name_block in protein.findall(f"ns:{tag_name}", NS): + for xml_tag, length_label in [ + ("fullName", "full"), + ("shortName", "short"), + ]: + elem = name_block.find(f"ns:{xml_tag}", NS) + text = get_text(elem) + if text: + names.append( + _make_name_record( + cdm_id, + text, + f"UniProt {logical_type} {length_label} name", + ) + ) return names -def parse_protein_info(entry, cdm_id): - """ - Extract protein-level metadata from a UniProt XML element. - """ - protein_info = {} - ec_numbers = [] +def parse_protein_info(entry, cdm_id: str) -> dict | None: + protein_info: dict = {} - # Extract EC numbers from and in - protein = entry.find("u:protein", NS) + protein = entry.find("ns:protein", NS) if protein is not None: - # Find EC numbers in recommendedName - rec = protein.find("u:recommendedName", NS) - if rec is not None: - for ec in rec.findall("u:ecNumber", NS): - if ec.text: - ec_numbers.append(ec.text) - - # Find EC numbers in all alternativeNames - for alt in protein.findall("u:alternativeName", NS): - for ec in alt.findall("u:ecNumber", NS): - if ec.text: - ec_numbers.append(ec.text) + ec_paths = ["ns:recommendedName/ns:ecNumber", "ns:alternativeName/ns:ecNumber"] + ec_numbers: list[str] = [] + for path in ec_paths: + ec_numbers.extend(find_all_text(protein, path, NS)) if ec_numbers: - protein_info["ec_numbers"] = ec_numbers + protein_info["ec_numbers"] = ";".join(ec_numbers) - # Extract protein existence evidence type - protein_existence = entry.find("u:proteinExistence", NS) + protein_existence = entry.find("ns:proteinExistence", NS) if protein_existence is not None: protein_info["protein_id"] = cdm_id - protein_info["evidence_for_existence"] = protein_existence.get("type") - - # Extract sequence and sequence-related attributes - seq_elem = entry.find("u:sequence", NS) - if seq_elem is not None and seq_elem.text: - protein_info["length"] = seq_elem.get("length") - protein_info["mass"] = seq_elem.get("mass") - protein_info["checksum"] = seq_elem.get("checksum") - protein_info["modified"] = seq_elem.get("modified") - protein_info["sequence_version"] = seq_elem.get("version") - protein_info["sequence"] = seq_elem.text.strip() - - # Capture the entry's modified/updated date for tracking - entry_modified = entry.attrib.get("modified") or entry.attrib.get("updated") + protein_info["evidence_for_existence"] = get_attr(protein_existence, "type") + + seq_elem = entry.find("ns:sequence", NS) + if seq_elem is not None: + protein_info.update( + clean_dict( + { + "length": get_attr(seq_elem, "length"), + "mass": get_attr(seq_elem, "mass"), + "checksum": get_attr(seq_elem, "checksum"), + "modified": get_attr(seq_elem, "modified"), + "sequence_version": get_attr(seq_elem, "version"), + "sequence": get_text(seq_elem), + } + ) + ) + + entry_modified = get_attr(entry, "modified") or get_attr(entry, "updated") if entry_modified: protein_info["entry_modified"] = entry_modified - # Return the dictionary if any protein info was extracted return protein_info if protein_info else None -def parse_evidence_map(entry): - """ - Parse all elements from a UniProt XML entry and build a mapping - from evidence key to metadata (type, supporting objects, publications). - """ - evidence_map = {} +def parse_evidence_map(entry) -> dict[str, dict]: + evidence_map: dict[str, dict] = {} - # Loop through every element in the entry - for evidence in entry.findall("u:evidence", NS): - key = evidence.get("key") # Unique evidence key (string) - evidence_type = evidence.get("type") # Evidence code/type (e.g., ECO:0000255) + for ev in entry.findall("ns:evidence", NS): + key = get_attr(ev, "key") + if not key: + continue - supporting_objects = [] - publications = [] + evidence_type = get_attr(ev, "type") + pubs: list[str] = [] + others: list[str] = [] - # Check if this evidence has a element with children - source = evidence.find("u:source", NS) + source = ev.find("ns:source", NS) if source is not None: - for dbref in source.findall("u:dbReference", NS): - db_type = dbref.get("type") - db_id = dbref.get("id") - # Add publication references as PubMed or DOI; others as supporting objects - if db_type == "PubMed": - publications.append(f"PMID:{db_id}") - elif db_type == "DOI": - publications.append(f"DOI:{db_id}") + raw_pubs, raw_others = parse_db_references(source, NS) + + normalized_pubs: list[str] = [] + for p in raw_pubs: + up = p.upper() + if up.startswith("PUBMED:"): + _, acc = p.split(":", 1) + normalized_pubs.append(f"PMID:{acc}") else: - supporting_objects.append(f"{db_type}:{db_id}") + normalized_pubs.append(p) - # Store evidence metadata, omitting empty lists for cleanliness - evidence_map[key] = { - "evidence_type": evidence_type, - "supporting_objects": supporting_objects if supporting_objects else None, - "publications": publications if publications else None, - } + pubs = normalized_pubs + others = raw_others + + evidence_map[key] = clean_dict( + { + "evidence_type": evidence_type, + "publications": pubs or None, + "supporting_objects": others or None, + } + ) return evidence_map -def parse_reaction_association(reaction, cdm_id, evidence_map): - associations = [] - for dbref in reaction.findall("u:dbReference", NS): +def _make_association( + cdm_id: str, + obj: str, + predicate: str | None = None, + evidence_key: str | None = None, + evidence_map: dict | None = None, +) -> dict: + assoc = { + "subject": cdm_id, + "object": obj, + "predicate": predicate, + "evidence_type": None, + "supporting_objects": None, + "publications": None, + } + if evidence_key and evidence_map and evidence_key in evidence_map: + assoc.update(evidence_map[evidence_key]) + return clean_dict(assoc) + + +def parse_reaction_association(reaction, cdm_id: str, evidence_map: dict[str, dict]) -> list[dict]: + associations: list[dict] = [] + for dbref in reaction.findall("ns:dbReference", NS): db_type = dbref.get("type") db_id = dbref.get("id") + if not db_type or not db_id: + continue + assoc = { "subject": cdm_id, "predicate": "catalyzes", - "object": f"{db_type}:{db_id}", + "object": make_curie(db_type, db_id), "evidence_type": None, "supporting_objects": None, "publications": None, @@ -261,124 +444,127 @@ def parse_reaction_association(reaction, cdm_id, evidence_map): evidence_key = reaction.get("evidence") if evidence_key and evidence_key in evidence_map: assoc.update(evidence_map[evidence_key]) - associations.append(assoc) + associations.append(clean_dict(assoc)) return associations -def parse_cofactor_association(cofactor, cdm_id): - associations = [] - for dbref in cofactor.findall("u:dbReference", NS): +def parse_cofactor_association(cofactor, cdm_id: str) -> list[dict]: + associations: list[dict] = [] + for dbref in cofactor.findall("ns:dbReference", NS): db_type = dbref.get("type") db_id = dbref.get("id") - assoc = { - "subject": cdm_id, - "predicate": "requires_cofactor", - "object": f"{db_type}:{db_id}", - "evidence_type": None, - "supporting_objects": None, - "publications": None, - } - associations.append(assoc) + if not db_type or not db_id: + continue + associations.append( + clean_dict( + { + "subject": cdm_id, + "predicate": "requires_cofactor", + "object": make_curie(db_type, db_id), + "evidence_type": None, + "supporting_objects": None, + "publications": None, + } + ) + ) return associations -def parse_associations(entry, cdm_id, evidence_map): +def parse_associations(entry, cdm_id: str, evidence_map: dict[str, dict]) -> list[dict]: """ - Parse all relevant associations from a UniProt XML entry for the CDM model. - Only include fields that are not None for each association. + Only keep: + - taxonomy association + - catalytic activity / cofactor associations """ - associations = [] - - def clean(d): - """Remove None-value keys from a dict.""" - return {k: v for k, v in d.items() if v is not None} + associations: list[dict] = [] # Taxonomy association - organism = entry.find("u:organism", NS) + organism = entry.find("ns:organism", NS) if organism is not None: - taxon_ref = organism.find('u:dbReference[@type="NCBI Taxonomy"]', NS) + taxon_ref = organism.find('ns:dbReference[@type="NCBI Taxonomy"]', NS) if taxon_ref is not None: - associations.append( - clean( - { - "subject": cdm_id, - "object": f"NCBITaxon:{taxon_ref.get('id')}", - "predicate": None, - "evidence_type": None, - "supporting_objects": None, - "publications": None, - } - ) - ) - - # Database cross-references with evidence - for dbref in entry.findall("u:dbReference", NS): - db_type = dbref.get("type") - db_id = dbref.get("id") - association = { - "subject": cdm_id, - "object": f"{db_type}:{db_id}", - "predicate": None, - "evidence_type": None, - "supporting_objects": None, - "publications": None, - } - evidence_key = dbref.get("evidence") - if evidence_key and evidence_key in evidence_map: - association.update(evidence_map[evidence_key]) - associations.append(clean(association)) + tax_id = taxon_ref.get("id") + if tax_id: + associations.append(_make_association(cdm_id, f"NCBITaxon:{tax_id}", predicate="in_taxon")) - # Catalytic/cofactor - for comment in entry.findall("u:comment", NS): + # Catalytic activity / cofactor + for comment in entry.findall("ns:comment", NS): comment_type = comment.get("type") if comment_type == "catalytic activity": - # extract catalytic associations - for reaction in comment.findall("u:reaction", NS): - for assoc in parse_reaction_association(reaction, cdm_id, evidence_map): - associations.append(clean(assoc)) + for reaction in comment.findall("ns:reaction", NS): + associations.extend(parse_reaction_association(reaction, cdm_id, evidence_map)) elif comment_type == "cofactor": - # extract cofactor associations - for cofactor in comment.findall("u:cofactor", NS): - for assoc in parse_cofactor_association(cofactor, cdm_id): - associations.append(clean(assoc)) + for cofactor in comment.findall("ns:cofactor", NS): + associations.extend(parse_cofactor_association(cofactor, cdm_id)) + return associations -def parse_publications(entry): - """ - Extract all publication references from a UniProt XML - Returns a list of standardized publication IDs (PMID and DOI). - """ - publications = [] - - # Iterate through all blocks in the entry - for reference in entry.findall("u:reference", NS): - citation = reference.find("u:citation", NS) - if citation is not None: - # Each may have multiple elements (e.g., PubMed, DOI) - for dbref in citation.findall("u:dbReference", NS): - db_type = dbref.get("type") - db_id = dbref.get("id") - # Standardize format for known publication types - if db_type == "PubMed": - publications.append(f"PMID:{db_id}") - elif db_type == "DOI": - publications.append(f"DOI:{db_id}") - - return publications - - -def parse_uniprot_entry(entry, cdm_id, current_timestamp, datasource_name="UniProt import", prev_created=None): - if prev_created: - entity_created = prev_created - entity_updated = current_timestamp - else: - entity_created = current_timestamp - entity_updated = current_timestamp +def parse_cross_references(entry, cdm_id: str) -> list[dict]: + """Generic -> cross_references table.""" + rows: list[dict] = [] + + for dbref in entry.findall("ns:dbReference", NS): + db_type = dbref.get("type") + db_id = dbref.get("id") + if not db_type or not db_id: + continue + + xref_type = normalize_prefix(db_type) + + if ":" in db_id: + xref = db_id + else: + xref = f"{xref_type}:{db_id}" + + rows.append( + clean_dict( + { + "entity_id": cdm_id, + "xref_type": xref_type, + "xref_value": db_id, + "xref": xref, + } + ) + ) + + return rows + + +def parse_publications(entry) -> list[str]: + publications: list[str] = [] + for reference in entry.findall("ns:reference", NS): + citation = reference.find("ns:citation", NS) + if citation is None: + continue + + raw_pubs, _ = parse_db_references(citation, NS) + for p in raw_pubs: + up = p.upper() + if up.startswith("PUBMED:"): + _, acc = p.split(":", 1) + publications.append(f"PMID:{acc}") + elif up.startswith("DOI:"): + _, acc = p.split(":", 1) + publications.append(f"DOI:{acc}") + + return list(dict.fromkeys(publications)) + + +def parse_uniprot_entry( + entry, + cdm_id: str, + current_timestamp: str, + datasource_name: str = "UniProt import", + prev_created: str | None = None, +) -> dict: + entity_created = prev_created or current_timestamp + entity_updated = current_timestamp uniprot_created = entry.attrib.get("created") uniprot_modified = entry.attrib.get("modified") or entry.attrib.get("updated") uniprot_version = entry.attrib.get("version") + entity = { "entity_id": cdm_id, "entity_type": "protein", @@ -389,65 +575,21 @@ def parse_uniprot_entry(entry, cdm_id, current_timestamp, datasource_name="UniPr "uniprot_created": uniprot_created, "uniprot_modified": uniprot_modified, } + evidence_map = parse_evidence_map(entry) + return { "entity": entity, "identifiers": parse_identifiers(entry, cdm_id), "names": parse_names(entry, cdm_id), "protein": parse_protein_info(entry, cdm_id), "associations": parse_associations(entry, cdm_id, evidence_map), + "cross_references": parse_cross_references(entry, cdm_id), "publications": parse_publications(entry), } -def download_file(url, output_path, chunk_size=8192, overwrite=False) -> None: - """ - Download a file from a given URL to a local output path. - """ - # Skip download if file already exists and not overwriting - if os.path.exists(output_path) and not overwrite: - print(f"File '{output_path}' already exists.") - return - - # Stream download to avoid high memory usage - try: - with requests.get(url, stream=True, timeout=60) as response: - response.raise_for_status() - with open(output_path, "wb") as f: - for chunk in response.iter_content(chunk_size=chunk_size): - if chunk: - f.write(chunk) - print(f"Downloaded '{url}' to '{output_path}'") - except Exception as e: - print(f"Failed to download '{url}': {e}") - - if os.path.exists(output_path): - os.remove(output_path) # Remove incomplete file - raise - - -def stream_uniprot_xml(filepath): - """ - Stream and parse UniProt XML entries from a local gzipped file. - Yields each element as soon as it is parsed to avoid loading the entire XML into memory. - """ - # Open the gzipped XML file for reading in binary mode - with gzip.open(filepath, "rb") as f: - # Use iterparse to process XML incrementally, triggering on element end events - context = ET.iterparse(f, events=("end",)) - for _event, element in context: - # Check tag name, ignoring namespace - if element.tag.endswith("entry"): - yield element - element.clear() - - -## ================================ SCHEMA ================================= -""" -Defines the Spark schema for all major CDM tables derived from UniProt XML. -Each schema is tailored for protein entities, identifiers, protein details, names, associations, and linked publications. -""" - +# ================================ SCHEMA ================================= schema_entities = StructType( [ StructField("entity_id", StringType(), False), @@ -505,6 +647,15 @@ def stream_uniprot_xml(filepath): ] ) +schema_cross_references = StructType( + [ + StructField("entity_id", StringType(), False), + StructField("xref_type", StringType(), True), + StructField("xref_value", StringType(), True), + StructField("xref", StringType(), True), + ] +) + schema_publications = StructType( [ StructField("entity_id", StringType(), False), @@ -513,126 +664,74 @@ def stream_uniprot_xml(filepath): ) -def save_batches_to_delta(spark, tables, output_dir, namespace) -> None: - """ - Persist batches of parsed records for each CDM table into Delta Lake format. - - - Each table is saved into a Delta directory named '{namespace}_{table}_delta' in the output folder. - - If the Delta directory exists, append new records. Otherwise, overwrite it. - - Registers the table in the Spark SQL for downstream query. - """ - for table, (records, schema) in tables.items(): - if not records: - continue # Skip all empty tables - - delta_dir = os.path.abspath(os.path.join(output_dir, f"{namespace}_{table}_delta")) - # Use "append" mode if the Delta directory already exists, otherwise "overwrite" - mode = "append" if os.path.exists(delta_dir) else "overwrite" - - print( - f"[DEBUG] Registering table: {namespace}.{table} at {delta_dir} with mode={mode}, record count: {len(records)}" - ) - - try: - df = spark.createDataFrame(records, schema) - df.write.format("delta").mode(mode).option("overwriteSchema", "true").save(delta_dir) - spark.sql(f""" - CREATE TABLE IF NOT EXISTS {namespace}.{table} - USING DELTA - LOCATION '{delta_dir}' - """) - except Exception as e: - print(f"Failed to save {table} to Delta: {e}") - - -def prepare_local_xml(xml_url, output_dir): - """ - Download the remote UniProt XML (.xml.gz) file to the specified local output directory, - unless the file already exists locally. Returns the full local file path. - """ - # Ensure output directory exists - os.makedirs(output_dir, exist_ok=True) - local_xml_path = os.path.join(output_dir, os.path.basename(xml_url)) - # Download only if file does not exist - download_file(xml_url, local_xml_path) - return local_xml_path - - -def save_datasource_record(xml_url, output_dir): - """ - Generate and save the datasource provenance record as a JSON file in the output directory. - """ - datasource = build_datasource_record(xml_url) - os.makedirs(output_dir, exist_ok=True) # Ensure output directory exists - output_path = os.path.join(output_dir, "datasource.json") - with open(output_path, "w") as f: - json.dump(datasource, f, indent=4) - return datasource - - -def get_spark_session(namespace): - """ - Initialize SparkSession with Delta Lake support, and ensure the target database exists. - """ - # Build SparkSession with Delta extensions enabled - builder = ( - SparkSession.builder.appName("DeltaIngestion") - .config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension") - .config( - "spark.sql.catalog.spark_catalog", - "org.apache.spark.sql.delta.catalog.DeltaCatalog", - ) - ) - spark = configure_spark_with_delta_pip(builder).getOrCreate() - # Ensure the target namespace (database) exists +# ================================ DELTA WRITE ================================= +def ensure_tables_registered(spark: SparkSession, output_dir: str, namespace: str, table_names: list[str]) -> None: spark.sql(f"CREATE DATABASE IF NOT EXISTS {namespace}") - return spark - - -def load_existing_entity(spark, output_dir, namespace): - """ - Load the existing entities_delta Delta table and build a mapping of entity_id to created timestamp. - This mapping is used to support upserts and idempotent writes. - """ - old_created_dict = {} - entities_table_path = os.path.abspath(os.path.join(output_dir, f"{namespace}_entities_delta")) - if os.path.exists(entities_table_path): - try: - # Read only the required columns for efficiency - old_df = spark.read.format("delta").load(entities_table_path).select("entity_id", "created") - for row in old_df.collect(): - old_created_dict[row["entity_id"]] = row["created"] - print(f"Loaded {len(old_created_dict)} existing entity_id records for upsert.") - except Exception as e: - print(f"Couldn't load previous entities delta table: {e}") - else: - print(f"No previous entities delta at {entities_table_path}.") - return old_created_dict + for tbl in table_names: + # delta_dir = os.path.abspath(os.path.join(output_dir, namespace, tbl)) + delta_dir = delta_table_path(output_dir, namespace, tbl) + spark.sql( + f""" + CREATE TABLE IF NOT EXISTS {namespace}.{tbl} + USING DELTA + LOCATION '{delta_dir}' + """ + ) -def parse_entries(local_xml_path, target_date, batch_size, spark, tables, output_dir, namespace, current_timestamp): - """ - Parse UniProt XML entries, write to Delta Lake in batches - Return (processed_entry_count, skipped_entry_count). +def save_batches_to_delta( + spark: SparkSession, + tables: dict[str, tuple[list, StructType]], + output_dir: str, + namespace: str, + mode: str = "append", +) -> None: + for table_name, (records, schema) in tables.items(): + if not records: + continue - """ + # delta_dir = os.path.abspath(os.path.join(output_dir, namespace, table_name)) + delta_dir = delta_table_path(output_dir, namespace, table_name) + df = spark.createDataFrame(records, schema) + writer = df.write.format("delta").mode(mode) + + if mode == "append": + writer = writer.option("mergeSchema", "true") + if mode == "overwrite": + writer = writer.option("overwriteSchema", "true") + + writer.save(delta_dir) + + +## =============================== MAIN PARSING LOOP ================================= +def parse_entries( + local_xml_path: str, + target_date: str | None, + batch_size: int, + spark: SparkSession, + tables: dict[str, tuple[list, StructType]], + output_dir: str, + namespace: str, + current_timestamp: str, + accession_to_entity_id: dict[str, str], + entity_id_to_created: dict[str, str], + mode: str, +) -> tuple[int, int]: target_date_dt = None - - # Convert target_date string to datetime for comparison if provided if target_date: try: target_date_dt = datetime.datetime.strptime(target_date, "%Y-%m-%d") + logger.info("Target date filter enabled: >= %s", target_date) except Exception: - print(f"Invalid target date is {target_date}") + logger.warning("Invalid target date provided: %s (ignored)", target_date) + target_date_dt = None entry_count, skipped = 0, 0 - # Iterate over each element in the XML file for entry_elem in stream_uniprot_xml(local_xml_path): try: - # Get the modification date of the entry mod_date = entry_elem.attrib.get("modified") or entry_elem.attrib.get("updated") - # If target_date is set, skip entries older than target_date + if target_date_dt and mod_date: try: entry_date_dt = datetime.datetime.strptime(mod_date[:10], "%Y-%m-%d") @@ -643,110 +742,197 @@ def parse_entries(local_xml_path, target_date, batch_size, spark, tables, output skipped += 1 continue - # Extract main accession (skip entry if not present) - main_accession_elem = entry_elem.find("u:accession", NS) - if main_accession_elem is None or main_accession_elem.text is None: + main_accession_elem = entry_elem.find("ns:accession", NS) + if main_accession_elem is None or not main_accession_elem.text: skipped += 1 continue - # Generate a unique CDM ID (UUID) for this entry - cdm_id = generate_cdm_id() + accession = main_accession_elem.text.strip() + + cdm_id = accession_to_entity_id.get(accession) or stable_cdm_id_from_uniprot_accession(accession) + prev_created = entity_id_to_created.get(cdm_id) + + record = parse_uniprot_entry(entry_elem, cdm_id, current_timestamp, prev_created=prev_created) - # Parse all sub-objects: entity, identifiers, names, protein, associations, publications - record = parse_uniprot_entry(entry_elem, cdm_id, current_timestamp) tables["entities"][0].append(record["entity"]) tables["identifiers"][0].extend(record["identifiers"]) tables["names"][0].extend(record["names"]) if record["protein"]: tables["proteins"][0].append(record["protein"]) + tables["associations"][0].extend(record["associations"]) - tables["publications"][0].extend( - {"entity_id": record["entity"]["entity_id"], "publication": pub} for pub in record["publications"] - ) + tables["cross_references"][0].extend(record["cross_references"]) + + for pub in record["publications"]: + tables["publications"][0].append( + { + "entity_id": cdm_id, + "publication": pub, + } + ) entry_count += 1 - # Write batch to Delta and clear lists every batch_size entries + if entry_count % batch_size == 0: - save_batches_to_delta(spark, tables, output_dir, namespace) + save_batches_to_delta(spark, tables, output_dir, namespace, mode=mode) for v in tables.values(): v[0].clear() - print(f"{entry_count} entries processed and saved") - except Exception as e: - # If any error occurs in parsing this entry, skip it and count - print(f"Error parsing entry: {e}") + logger.info("Processed and saved %d entries...", entry_count) + + except Exception: + logger.exception("Error parsing UniProt entry, skipping") skipped += 1 - continue - # write remaining records - save_batches_to_delta(spark, tables, output_dir, namespace) + save_batches_to_delta(spark, tables, output_dir, namespace, mode=mode) return entry_count, skipped -def ingest_uniprot(xml_url, output_dir, namespace, target_date=None, batch_size=5000) -> None: - # Generate the timestamp for the current run +def ingest_uniprot( + xml_url: str, + output_dir: str, + namespace: str, + target_date: str | None = None, + batch_size: int = 5000, + mode: str = "append", + overwrite_download: bool = False, +) -> None: current_timestamp = datetime.datetime.now(datetime.UTC).isoformat() - # Prepare local XML - local_xml_path = prepare_local_xml(xml_url, output_dir) - - # Save data source meta information + local_xml_path = prepare_local_xml(xml_url, output_dir, overwrite=overwrite_download) save_datasource_record(xml_url, output_dir) - # Get Spark and the existing CDM entity_id spark = get_spark_session(namespace) + if mode == "append": + accession_to_entity_id, entity_id_to_created = load_existing_maps(spark, output_dir, namespace) + else: + accession_to_entity_id, entity_id_to_created = {}, {} - # Define the table structure (batch storage) - entities, identifiers, names, proteins, associations, publications = ( - [], - [], - [], - [], - [], - [], - ) - tables = { + # accession_to_entity_id, entity_id_to_created = load_existing_maps(spark, output_dir, namespace) + + entities: list[dict] = [] + identifiers: list[dict] = [] + names: list[dict] = [] + proteins: list[dict] = [] + associations: list[dict] = [] + cross_references: list[dict] = [] + publications: list[dict] = [] + + tables: dict[str, tuple[list, StructType]] = { "entities": (entities, schema_entities), "identifiers": (identifiers, schema_identifiers), "names": (names, schema_names), "proteins": (proteins, schema_proteins), "associations": (associations, schema_associations), + "cross_references": (cross_references, schema_cross_references), "publications": (publications, schema_publications), } - # Main cycle processing, transfer to current timestamp + ensure_tables_registered( + spark, + output_dir, + namespace, + [ + "entities", + "identifiers", + "names", + "proteins", + "associations", + "cross_references", + "publications", + ], + ) + + logger.info( + "Starting UniProt ingestion: xml=%s | namespace=%s | mode=%s | batch_size=%d", + xml_url, + namespace, + mode, + batch_size, + ) + entry_count, skipped = parse_entries( - local_xml_path, target_date, batch_size, spark, tables, output_dir, namespace, current_timestamp + local_xml_path=local_xml_path, + target_date=target_date, + batch_size=batch_size, + spark=spark, + tables=tables, + output_dir=output_dir, + namespace=namespace, + current_timestamp=current_timestamp, + accession_to_entity_id=accession_to_entity_id, + entity_id_to_created=entity_id_to_created, + mode=mode, ) - print(f"All entries processed ({entry_count}), skipped {skipped}, writing complete tables.") - spark.sql(f"SHOW TABLES IN {namespace}").show() - spark.sql(f"SELECT COUNT(*) FROM {namespace}.entities").show() - # make sql test in entity table - spark.sql(f"SELECT * FROM {namespace}.entities LIMIT 10").show(truncate=False) + logger.info("Completed parsing UniProt XML. processed=%d skipped=%d", entry_count, skipped) - spark.stop() + logger.info("Verifying Delta tables in namespace `%s`", namespace) + spark.sql(f"SHOW TABLES IN {namespace}").show(truncate=False) - print(f"All Delta tables are created and registered in Spark SQL under `{namespace}`.") + for tbl in [ + "entities", + "identifiers", + "names", + "proteins", + "associations", + "cross_references", + "publications", + ]: + logger.info("Verifying table: %s.%s", namespace, tbl) + spark.sql(f"SELECT COUNT(*) AS row_count FROM {namespace}.{tbl}").show(truncate=False) + spark.sql(f"SELECT * FROM {namespace}.{tbl} LIMIT 5").show(truncate=False) + + spark.stop() + logger.info("Done") +# ================================ CLI ================================= @click.command() @click.option("--xml-url", required=True, help="URL to UniProt XML (.xml.gz)") -@click.option("--output-dir", default="output", help="Output directory for Delta tables") -@click.option("--namespace", default="uniprot_db", help="Delta Lake database name") +@click.option( + "--output-dir", + default="output", + show_default=True, + help="Output directory for Delta tables", +) +@click.option( + "--namespace", + default="uniprot_db", + show_default=True, + help="Delta Lake database name", +) @click.option( "--target-date", default=None, help="Only process entries modified/updated since this date (YYYY-MM-DD)", ) -@click.option("--batch-size", default=5000, help="Batch size for writing Delta tables") -def main(xml_url, output_dir, namespace, target_date, batch_size) -> None: +@click.option( + "--batch-size", + default=5000, + show_default=True, + help="Batch size for writing Delta tables", +) +@click.option( + "--mode", + type=click.Choice(["append", "overwrite"]), + default="append", + show_default=True, +) +@click.option( + "--overwrite-download", + is_flag=True, + help="Force re-download XML even if file exists", +) +def main(xml_url, output_dir, namespace, target_date, batch_size, mode, overwrite_download): ingest_uniprot( xml_url=xml_url, output_dir=output_dir, namespace=namespace, target_date=target_date, batch_size=int(batch_size), + mode=mode, + overwrite_download=overwrite_download, ) diff --git a/src/cdm_data_loader_utils/parsers/uniref.py b/src/cdm_data_loader_utils/parsers/uniref.py index da3327c..528eb47 100644 --- a/src/cdm_data_loader_utils/parsers/uniref.py +++ b/src/cdm_data_loader_utils/parsers/uniref.py @@ -1,5 +1,4 @@ -""" -UniRef XML Cluster ETL Pipeline. +"""UniRef XML Cluster ETL Pipeline. This script downloads a UniRef100 XML file, parses cluster and member information, and writes the extracted data into Delta Lake tables for downstream analysis. @@ -22,7 +21,12 @@ --output-dir cdm-data-loader-utils/output/uniref100_clusters \ --batch-size 1000 -**Parameters:** +python3 uniref.py \ + --ftp-url https://ftp.uniprot.org/pub/databases/uniprot/uniref/uniref100/uniref100.xml.gz \ + --output-dir output_uniref \ + --batch-size 1000 + +Parameters: - --ftp-url: UniProt FTP URL to the UniRef100 gzipped XML file. - --output-dir: Output directory where Delta tables will be written. - --batch-size: Number of UniRef entries to process. @@ -30,206 +34,280 @@ """ import gzip + +### ===== logging setup ===== ### +import logging import os -import uuid import xml.etree.ElementTree as ET from datetime import datetime -from urllib.request import URLError, urlretrieve +from urllib.error import URLError +from urllib.request import urlretrieve import click from delta import configure_spark_with_delta_pip from pyspark.sql import SparkSession from pyspark.sql.types import StringType, StructField, StructType +from cdm_data_loader_utils.parsers.xml_utils import get_text, parse_properties + +logger = logging.getLogger(__name__) + + +UNIREF_NS = {"ns": "http://uniprot.org/uniref"} +DATA_SOURCE = "UniRef 100" + -# Generate a unique CDM entity_id based on accession -def cdm_entity_id(accession) -> str | None: - if not accession: - return None - uuid_part = uuid.uuid5(uuid.NAMESPACE_OID, accession) - return f"CDM:{uuid_part}" +PREFIX_TRANSLATION = { + "UniProtKB ID": "UniProt", + "UniProtKB accession": "UniProt", + "UniParc ID": "UniParc", + "UniRef90 ID": "UniRef90", + "UniRef50 ID": "UniRef50", + "UniRef100 ID": "UniRef100", +} -# Download a file from the specified URL to the local path if it does not already exist -def download_file(url, local_path) -> None: +def generate_dbxref(db: str, acc: str) -> str: + """Generate a database reference that uses BioRegistry prefixes.""" + return f"{PREFIX_TRANSLATION[db]}:{acc}" + + +# timestamp helper +def get_timestamps( + uniref_id: str | None, + existing_created: dict[str, str], + now: datetime | None = None, +) -> tuple[str, str]: """ - If the file is already present at local, the function does nothing. - If the download fails, any partially downloaded file will be removed. + Return (updated_time, created_time) for a given UniRef cluster ID. + + If the cluster already exists in the Delta table, + we keep its original `created` timestamp and only update `updated`. + Otherwise, both are set to `now`. + """ + uniref_key = uniref_id or "" + + now_dt = now or datetime.now() + formatted_now = now_dt.strftime("%Y-%m-%dT%H:%M:%S") + + created_prev = existing_created.get(uniref_key) + if created_prev: + created_time = created_prev.split(".")[0] if "." in created_prev else created_prev + else: + created_time = formatted_now + + return formatted_now, created_time + + +def download_file(url: str, local_path: str) -> None: + """ + Download a file from URL to `local_path` if it does not already exist. + If the download fails, any partially downloaded file is removed. """ if not os.path.exists(local_path): - print(f"Downloading from URL link: {url}") + logger.info(f"Downloading from URL link: {url}") + try: urlretrieve(url, local_path) - print("Download completed!") + logger.info("Download completed!") except Exception as e: - print(f"Failed to download {url}: {e}") + logger.error(f"Failed to download {url}: {e}") if os.path.exists(local_path): os.remove(local_path) raise else: - print(f"File already exists: {local_path}") + logger.info(f"File already exists: {local_path}") -# Load mapping from data_source_entity_id to created timestamp from Delta table -def load_existing_created(spark, entity_table): - existing_created = {} +def load_existing_created(spark: SparkSession, entity_table: str | None) -> dict[str, str]: + """ + Load mapping data_source_entity_id -> created timestamp from the Entity Delta table. + Returns an empty dict if the table does not exist. + """ + existing_created: dict[str, str] = {} if not entity_table: - print("Entity table path not specified.") + logger.warning("Entity table path not specified.") return existing_created try: df = spark.read.format("delta").load(entity_table).select("data_source_entity_id", "created") existing_created = {row["data_source_entity_id"]: row["created"] for row in df.collect()} - print(f"Loaded {len(existing_created)} existing created timestamps.") + logger.info(f"Loaded {len(existing_created)} existing created timestamps from {entity_table}.") except Exception as e: - print(f"No existing Delta table found at {entity_table}. Starting fresh. ({e.__class__.__name__})") + logger.warning(f"No existing Delta table found at {entity_table}. Starting fresh. ({e.__class__.__name__})") return existing_created ##### -------------- List utility function --------------- ##### - - -# Helper function to extract basic cluster info from XML entry element -def extract_cluster(elem, ns): - cluster_id = f"CDM:{uuid.uuid4()}" +def extract_cluster(elem: ET.Element, ns: dict[str, str], uniref_id: str) -> tuple[str, str]: + """Extract a new CDM cluster_id and the UniRef cluster name.""" + # cluster_id = f"CDM:{uuid.uuid4()}" + # cluster_id = cdm_entity_id(uniref_id, prefix="cdm_ccol_") + cluster_id = f"uniref:{uniref_id}" name_elem = elem.find("ns:name", ns) - name = name_elem.text if name_elem is not None else "UNKNOWN" + # TODO: leave blank if there is no name + name = get_text(name_elem, default="UNKNOWN") return cluster_id, name -# Returns tuple of (updated_time, created_time) -def get_timestamps(uniref_id, existing_created, now=None): - now_dt = now or datetime.now() - formatted_now = now_dt.strftime("%Y-%m-%dT%H:%M:%S") - created = existing_created.get(uniref_id) - created_time = (created.split(".")[0] if "." in created else created) if created else formatted_now - return formatted_now, created_time - - -# Extract UniProtKB accession and is_seed status from a dbReference element -def get_accession_and_seed(dbref, ns): +def get_accession_and_seed(dbref: ET.Element | None, ns: dict[str, str]) -> tuple[str | None, str | None, bool]: + """Extract UniProtKB accession and is_seed status from a dbReference element.""" if dbref is None: - return None, False - prop_elems = dbref.findall("ns:property", ns) - - props = {} - for prop in prop_elems: - t = prop.attrib["type"] - v = prop.attrib["value"] - props[t] = v - - acc = props.get("UniProtKB accession") or dbref.attrib.get("id") - is_seed = props.get("isSeed", "false").lower() == "true" - return acc, is_seed - - -# Add both representative and other cluster members into cluster_member_data list -def add_cluster_members(cluster_id, repr_db, elem, cluster_member_data, ns) -> None: - dbrefs = [] + return None, None, False + + props = parse_properties(dbref, ns) + db = dbref.attrib.get("type") + acc = dbref.attrib.get("id") + is_seed_list = props.get("isSeed", []) + is_seed = is_seed_list and is_seed_list[0].lower() == "true" + return db, acc, is_seed + + +def add_cluster_members( + cluster_id: str, + repr_db: ET.Element | None, + elem: ET.Element, + cluster_member_rows: list[tuple[str, str, str, str, str]], + ns: dict[str, str], +) -> None: + """Populate cluster_member_rows with representative + member records.""" + dbrefs: list[tuple[ET.Element, bool]] = [] if repr_db is not None: dbrefs.append((repr_db, True)) for mem in elem.findall("ns:member/ns:dbReference", ns): dbrefs.append((mem, False)) for dbref, is_representative in dbrefs: - acc, is_seed = get_accession_and_seed(dbref, ns) - if acc: - member_entity_id = cdm_entity_id(acc) - cluster_member_data.append( - (cluster_id, member_entity_id, str(is_representative).lower(), str(is_seed).lower(), "1.0") + db, acc, is_seed = get_accession_and_seed(dbref, ns) + if not acc: + continue + + member_entity_id = generate_dbxref(db, acc) + cluster_member_rows.append( + ( + cluster_id, + member_entity_id, + str(is_representative).lower(), + str(is_seed).lower(), + "1.0", # score placeholder ) + ) -# Extract cross-references (UniRef90/50/UniParc) from a dbReference element -def extract_cross_refs(dbref, cross_reference_data, ns) -> None: +def extract_cross_refs( + dbref: ET.Element | None, + cross_reference_rows: list[tuple[str, str, str]], + ns: dict[str, str], +) -> None: + """Extract UniRef90/50/UniParc cross references from a single element.""" if dbref is None: return - props = {p.attrib["type"]: p.attrib["value"] for p in dbref.findall("ns:property", ns)} - entity_id = cdm_entity_id(dbref.attrib.get("id")) - xref_types = ["UniRef90 ID", "UniRef50 ID", "UniParc ID"] - for i in xref_types: - if i in props: - cross_reference_data.append((entity_id, i, props[i])) + props = parse_properties(dbref, ns) + entity_db = dbref.attrib.get("type") + entity_id = dbref.attrib.get("id") + if not entity_id or not entity_db: + return -##### -------------- Parse Uniref XML --------------- ##### + entity_dbxref = generate_dbxref(entity_db, entity_id) + + for key in ("UniRef90 ID", "UniRef50 ID", "UniParc ID"): + key = PREFIX_TRANSLATION[key] + if key in props: + for val in props[key]: + cross_reference_rows.append((entity_dbxref, key, val)) -def parse_uniref_xml(local_gz, batch_size, existing_created): +def parse_uniref_entry( + elem: ET.Element, existing_created: dict[str, str], ns: dict[str, str] +) -> dict[str, list[tuple]]: """ - Parse UniRef XML (gzipped) and extract cluster, entity, cluster member, UniProtKB member, and cross-reference info. + Parse a single UniRef element into CDM-friendly row tuples. + """ + cluster_rows: list[tuple[str, str, str, str | None, str]] = [] + entity_rows: list[tuple[str, str, str, str, str, str]] = [] + member_rows: list[tuple[str, str, str, str, str]] = [] + xref_rows: list[tuple[str, str, str]] = [] + + # Cluster basic info + uniref_id = elem.attrib.get("id") or "" + + cluster_id, name = extract_cluster(elem, ns, uniref_id) + updated_time, created_time = get_timestamps(uniref_id, existing_created) + + cluster_rows.append( + ( + cluster_id, + name, + "protein", + None, + DATA_SOURCE, + ) + ) - Args: - local_gz (str): Local gzipped UniRef XML path. - batch_size (int): Maximum number of entries to parse. - existing_created (dict): Mapping from UniRef cluster ID to 'created' timestamp for idempotent imports. + entity_rows.append( + ( + cluster_id, + uniref_id, + "Cluster", + DATA_SOURCE, + updated_time, + created_time, + ) + ) + + # Cross references from representative and members + repr_db = elem.find("ns:representativeMember/ns:dbReference", ns) + if repr_db is not None: + extract_cross_refs(repr_db, xref_rows, ns) + + for mem in elem.findall("ns:member/ns:dbReference", ns): + extract_cross_refs(mem, xref_rows, ns) + + # Cluster members (representative + members) + add_cluster_members(cluster_id, repr_db, elem, member_rows, ns) - Returns: - dict: Dictionary with lists for each CDM table + return { + "cluster_data": cluster_rows, + "entity_data": entity_rows, + "cluster_member_data": member_rows, + "cross_reference_data": xref_rows, + } + + +##### -------------- Parse Uniref XML --------------- ##### +def parse_uniref_xml(local_gz: str, batch_size: int, existing_created: dict[str, str]) -> dict[str, list[tuple]]: + """ + Stream-parse UniRef XML (gzipped) and extract CDM-like row tuples. """ - ns = {"ns": "http://uniprot.org/uniref"} # Namespace for XML parsing + ns = UNIREF_NS entry_count = 0 - # Initialize lists to collect parsed rows for different tables - cluster_data = [] - entity_data = [] - cluster_member_data = [] - cross_reference_data = [] + cluster_data: list[tuple] = [] + entity_data: list[tuple] = [] + cluster_member_data: list[tuple] = [] + cross_reference_data: list[tuple] = [] with gzip.open(local_gz, "rb") as f: - # Stream parse the XML to avoid memory issues with big files context = ET.iterparse(f, events=("end",)) for _, elem in context: - if elem.tag.endswith("entry"): - # Cluster basic info - cluster_id, name = extract_cluster(elem, ns) - - # Get UniRef cluster id and timestamps - uniref_id = elem.attrib.get("id") - updated_time, created_time = get_timestamps(uniref_id, existing_created) - - # Populate Cluster and Entity table data - cluster_data.append( - ( - cluster_id, # cluster_id - name, # cluster name - "protein", # entity_type (fixed value) - None, # description (not present) - "UniRef 100", # protocol_id - ) - ) - - entity_data.append( - ( - cluster_id, # entity_id (matches cluster_id) - uniref_id, # data_source_entity_id (UniRef100_xxx) - "Cluster", # entity_type - "UniRef 100", # data_source - updated_time, # updated - created_time, # created - ) - ) - - # Extract UniProtKB member attributes and cross-references - repr_db = elem.find("ns:representativeMember/ns:dbReference", ns) - extract_cross_refs(repr_db, cross_reference_data, ns) - - for mem in elem.findall("ns:member/ns:dbReference", ns): - extract_cross_refs(mem, cross_reference_data, ns) - - # ClusterMember table (representative + members) - add_cluster_members(cluster_id, repr_db, elem, cluster_member_data, ns) - - # Batch size limit - entry_count += 1 - if entry_count >= batch_size: - break - - # Release element to save memory - elem.clear() - - print(f"Parsed {entry_count} clusters") + if not elem.tag.endswith("entry"): + continue + + parsed = parse_uniref_entry(elem, existing_created, ns) + cluster_data.extend(parsed["cluster_data"]) + entity_data.extend(parsed["entity_data"]) + cluster_member_data.extend(parsed["cluster_member_data"]) + cross_reference_data.extend(parsed["cross_reference_data"]) + + entry_count += 1 + if entry_count >= batch_size: + break + + elem.clear() + + logger.info(f"Parsed {entry_count} clusters") return { "cluster_data": cluster_data, "entity_data": entity_data, @@ -239,9 +317,7 @@ def parse_uniref_xml(local_gz, batch_size, existing_created): ##### -------------- Save dalta table and print the preview --------------- ##### - - -def save_delta_tables(spark, output_dir, data_dict) -> None: +def save_delta_tables(spark, output_dir, data_dict): # Cluster cluster_schema = StructType( [ @@ -255,7 +331,7 @@ def save_delta_tables(spark, output_dir, data_dict) -> None: cluster_df = spark.createDataFrame(data_dict["cluster_data"], cluster_schema) cluster_df.write.format("delta").mode("overwrite").save(os.path.join(output_dir, "Cluster")) - print(f"Cluster Delta table written to: {os.path.join(output_dir, 'Cluster')}") + logger.info(f"Cluster Delta table written to: {os.path.join(output_dir, 'Cluster')}") # Entity entity_schema = StructType( @@ -272,7 +348,7 @@ def save_delta_tables(spark, output_dir, data_dict) -> None: entity_df = spark.createDataFrame(data_dict["entity_data"], entity_schema) entity_table_path = os.path.join(output_dir, "Entity") entity_df.write.format("delta").mode("overwrite").save(entity_table_path) - print(f"Entity Delta table written to: {entity_table_path}") + logger.info(f"Entity Delta table written to: {entity_table_path}") # ClusterMember cluster_member_schema = StructType( @@ -288,7 +364,7 @@ def save_delta_tables(spark, output_dir, data_dict) -> None: cluster_member_df = spark.createDataFrame(data_dict["cluster_member_data"], cluster_member_schema) cluster_member_path = os.path.join(output_dir, "ClusterMember") cluster_member_df.write.format("delta").mode("overwrite").save(cluster_member_path) - print(f"ClusterMember Delta table written to: {cluster_member_path}") + logger.info(f"ClusterMember Delta table written to: {cluster_member_path}") # CrossReference cross_reference_schema = StructType( @@ -302,22 +378,22 @@ def save_delta_tables(spark, output_dir, data_dict) -> None: cross_reference_df = spark.createDataFrame(data_dict["cross_reference_data"], cross_reference_schema) cross_reference_path = os.path.join(output_dir, "CrossReference") cross_reference_df.write.format("delta").mode("overwrite").save(cross_reference_path) - print(f"CrossReference Delta table written to: {cross_reference_path}") + logger.info(f"CrossReference Delta table written to: {cross_reference_path}") # Previews - print("Sample Clusters:") + logger.info("Sample Clusters:") cluster_df.createOrReplaceTempView("Cluster") spark.sql("SELECT * FROM Cluster LIMIT 20").show(truncate=False) - print("Sample Entities:") + logger.info("Sample Entities:") entity_df.createOrReplaceTempView("Entity") spark.sql("SELECT * FROM Entity LIMIT 20").show(truncate=False) - print("Sample ClusterMembers:") + logger.info("Sample ClusterMembers:") cluster_member_df.createOrReplaceTempView("ClusterMember") spark.sql("SELECT * FROM ClusterMember LIMIT 20").show(truncate=False) - print("Sample CrossReferences:") + logger.info("Sample CrossReferences:") cross_reference_df.createOrReplaceTempView("CrossReference") spark.sql("SELECT * FROM CrossReference LIMIT 20").show(truncate=False) @@ -327,17 +403,27 @@ def build_spark_session(): builder = ( SparkSession.builder.appName("UniRef Cluster Extractor") .config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension") - .config("spark.sql.catalog.spark_catalog", "org.apache.spark.sql.delta.catalog.DeltaCatalog") + .config( + "spark.sql.catalog.spark_catalog", + "org.apache.spark.sql.delta.catalog.DeltaCatalog", + ) ) return configure_spark_with_delta_pip(builder).getOrCreate() -# Click command-line interface for parameter parsing @click.command() @click.option("--ftp-url", required=True, help="FTP URL to UniRef100 XML file") @click.option("--output-dir", required=True, help="Output directory for Delta table") @click.option("--batch-size", default=1000, help="Number of UniRef entries to parse (limit)") -def main(ftp_url, output_dir, batch_size) -> None: +def main(ftp_url, output_dir, batch_size): + # set up logging in CLI context + logging.basicConfig( + level=logging.INFO, + format="%(asctime)s [%(levelname)s] (%(name)s:%(lineno)d %(message)s", + ) + + logger.info("Starting UniRef100/90/50 Import Pipeline") + # Set local path for downloaded gzipped XML file local_gz = os.path.join("/tmp", os.path.basename(ftp_url)) @@ -345,23 +431,31 @@ def main(ftp_url, output_dir, batch_size) -> None: try: download_file(ftp_url, local_gz) except URLError as e: - print(f"Error! Cannot download file: {e.reason}") + logger.error(f"Error! Cannot download file: {e.reason}") return # Start Spark session with Delta Lake support + logger.info("Building Spark session:") spark = build_spark_session() # Load existing entity creation timestamps - entity_table_path = os.path.join(output_dir, "Entity") - existing_created = load_existing_created(spark, entity_table_path) + try: + entity_table_path = os.path.join(output_dir, "Entity") + existing_created = load_existing_created(spark, entity_table_path) + + # Parse the UniRef XML and extract all CDM table data + logger.info("Parsing UniRef XML:") + data_dict = parse_uniref_xml(local_gz, batch_size, existing_created) - # Parse the UniRef XML and extract all CDM table data - data_dict = parse_uniref_xml(local_gz, batch_size, existing_created) + # Write parsed data to Delta tables in output directory + logger.info("Saving Delta tables:") + save_delta_tables(spark, output_dir, data_dict) - # Write parsed data to Delta tables in output directory - save_delta_tables(spark, output_dir, data_dict) + logger.info("UniRef100/90/50 Import Pipeline completed successfully.") - spark.stop() + finally: + spark.stop() + logger.info("Spark session stopped.") if __name__ == "__main__": diff --git a/src/cdm_data_loader_utils/parsers/xml_utils.py b/src/cdm_data_loader_utils/parsers/xml_utils.py new file mode 100644 index 0000000..d916799 --- /dev/null +++ b/src/cdm_data_loader_utils/parsers/xml_utils.py @@ -0,0 +1,124 @@ +""" +Shared XML helper utilities used by UniProt and UniRef parsers. + +This module centralizes common operations: +- Safe text extraction +- Safe attribute extraction +- Property parsing +- Evidence / dbReference parsing +- Cleaning dictionaries +- Deduplicating lists +""" + +import xml.etree.ElementTree as ET +from typing import Any + +# ============================================================ +# Basic Safe Accessors +# ============================================================ + + +def get_text(elem: ET.Element | None, default: str | None = None) -> str | None: + """Return elem.text if exists and non-empty.""" + if elem is None: + return default + if elem.text is None: + return default + text = elem.text.strip() + return text if text else default + + +def get_attr(elem: ET.Element | None, name: str, default: str | None = None) -> str | None: + """Return elem.get(name) safely.""" + if elem is None: + return default + val = elem.get(name) + return val.strip() if isinstance(val, str) else default + + +# ============================================================ +# List / Node Finders +# ============================================================ + + +def find_one(elem: ET.Element, xpath: str, ns: dict[str, str]): + """Return first element matching xpath or None.""" + results = elem.findall(xpath, ns) + return results[0] if results else None + + +def find_all_text(elem: ET.Element, xpath: str, ns: dict[str, str]) -> list[str]: + """Return list of text values from xpath matches (deduped).""" + texts = [] + for node in elem.findall(xpath, ns): + txt = get_text(node) + if txt: + texts.append(txt) + return list(dict.fromkeys(texts)) # preserve order, dedupe + + +def safe_list(x) -> list[Any]: + """Convert None → [].""" + if x is None: + return [] + if isinstance(x, list): + return x + return [x] + + +# ============================================================ +# dbReference / property parsing (shared by UniProt + UniRef) +# ============================================================ + + +def parse_properties(dbref: ET.Element | None, ns: dict[str, str]) -> dict[str, list[str]]: + """ + Extract key/value pairs from blocks. + """ + if dbref is None: + return {} + props = {} + for prop in dbref.findall("ns:property", ns): + ptype = prop.attrib.get("type") + pval = prop.attrib.get("value") + if ptype and pval: + if ptype not in props: + props[ptype] = [] + props[ptype].append(pval) + return props + + +def parse_db_references(elem: ET.Element, ns: dict[str, str], pub_types=("PubMed", "DOI")): + """ + Generic dbReference parser: + - Identify publication IDs (PubMed, DOI) + - Identify other cross-references (dbType:dbId) + """ + publications = [] + others = [] + + for dbref in elem.findall("ns:dbReference", ns): + db_type = dbref.get("type") + db_id = dbref.get("id") + + if not db_type or not db_id: + continue + + if db_type in pub_types: + publications.append(f"{db_type.upper()}:{db_id}") + else: + others.append(f"{db_type}:{db_id}") + + return publications, others + + +# ============================================================ +# Dict Cleaning +# ============================================================ + + +def clean_dict(d: dict[str, Any]) -> dict[str, Any]: + """ + Remove keys whose value is None or empty list. + """ + return {k: v for k, v in d.items() if v not in (None, [], {})} diff --git a/tests/data/uniprot_archaea/datasource.json b/tests/data/uniprot_archaea/datasource.json new file mode 100644 index 0000000..227cd2d --- /dev/null +++ b/tests/data/uniprot_archaea/datasource.json @@ -0,0 +1,7 @@ +{ + "name": "UniProt import", + "source": "UniProt", + "url": "https://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/taxonomic_divisions/uniprot_sprot_archaea.xml.gz", + "accessed": "2025-12-24T04:06:42.594253+00:00", + "version": 115 +} \ No newline at end of file diff --git a/tests/data/uniprot_archaea/uniprot_db/associations/.part-00000-5c7434bb-30e3-45f8-b638-415de5c389a1-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/associations/.part-00000-5c7434bb-30e3-45f8-b638-415de5c389a1-c000.snappy.parquet.crc new file mode 100644 index 0000000..c414fa8 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/associations/.part-00000-5c7434bb-30e3-45f8-b638-415de5c389a1-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/associations/.part-00000-6e2a19e7-36e4-48c7-a949-9a97054f83e1-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/associations/.part-00000-6e2a19e7-36e4-48c7-a949-9a97054f83e1-c000.snappy.parquet.crc new file mode 100644 index 0000000..2352754 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/associations/.part-00000-6e2a19e7-36e4-48c7-a949-9a97054f83e1-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/associations/.part-00000-c4fcd4fc-431e-4ad1-8624-792e6203933b-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/associations/.part-00000-c4fcd4fc-431e-4ad1-8624-792e6203933b-c000.snappy.parquet.crc new file mode 100644 index 0000000..0d9bcef Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/associations/.part-00000-c4fcd4fc-431e-4ad1-8624-792e6203933b-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/associations/.part-00000-fddfb436-dca2-49d9-a478-cec9a901a570-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/associations/.part-00000-fddfb436-dca2-49d9-a478-cec9a901a570-c000.snappy.parquet.crc new file mode 100644 index 0000000..e699f18 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/associations/.part-00000-fddfb436-dca2-49d9-a478-cec9a901a570-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/associations/.part-00001-343540fa-6365-424a-94b6-16af6a4b2e63-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/associations/.part-00001-343540fa-6365-424a-94b6-16af6a4b2e63-c000.snappy.parquet.crc new file mode 100644 index 0000000..b2507f9 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/associations/.part-00001-343540fa-6365-424a-94b6-16af6a4b2e63-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/associations/.part-00001-769f28af-12f9-4406-a951-2d78e45241c1-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/associations/.part-00001-769f28af-12f9-4406-a951-2d78e45241c1-c000.snappy.parquet.crc new file mode 100644 index 0000000..cc3b339 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/associations/.part-00001-769f28af-12f9-4406-a951-2d78e45241c1-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/associations/.part-00001-a044609f-89b1-43da-8fad-c72c083b71f9-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/associations/.part-00001-a044609f-89b1-43da-8fad-c72c083b71f9-c000.snappy.parquet.crc new file mode 100644 index 0000000..83e8d32 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/associations/.part-00001-a044609f-89b1-43da-8fad-c72c083b71f9-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/associations/.part-00001-d0cc9108-298b-44b4-80e4-8ecf39f2926d-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/associations/.part-00001-d0cc9108-298b-44b4-80e4-8ecf39f2926d-c000.snappy.parquet.crc new file mode 100644 index 0000000..f27c4bc Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/associations/.part-00001-d0cc9108-298b-44b4-80e4-8ecf39f2926d-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/associations/.part-00002-33e36f42-b91d-4741-9371-d0a2b01265ea-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/associations/.part-00002-33e36f42-b91d-4741-9371-d0a2b01265ea-c000.snappy.parquet.crc new file mode 100644 index 0000000..f7143e1 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/associations/.part-00002-33e36f42-b91d-4741-9371-d0a2b01265ea-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/associations/.part-00002-4af0fd89-1486-400d-b069-65d0176b17dc-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/associations/.part-00002-4af0fd89-1486-400d-b069-65d0176b17dc-c000.snappy.parquet.crc new file mode 100644 index 0000000..3d4b235 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/associations/.part-00002-4af0fd89-1486-400d-b069-65d0176b17dc-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/associations/.part-00002-60a2f084-b993-4c4c-81bc-4893081c8ace-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/associations/.part-00002-60a2f084-b993-4c4c-81bc-4893081c8ace-c000.snappy.parquet.crc new file mode 100644 index 0000000..226915c Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/associations/.part-00002-60a2f084-b993-4c4c-81bc-4893081c8ace-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/associations/.part-00002-d6af9717-c0ef-4ef3-93bf-17593ea5ff3d-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/associations/.part-00002-d6af9717-c0ef-4ef3-93bf-17593ea5ff3d-c000.snappy.parquet.crc new file mode 100644 index 0000000..30fa563 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/associations/.part-00002-d6af9717-c0ef-4ef3-93bf-17593ea5ff3d-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/associations/.part-00003-52d8b672-b288-4bcb-ab63-bb57677a1ecd-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/associations/.part-00003-52d8b672-b288-4bcb-ab63-bb57677a1ecd-c000.snappy.parquet.crc new file mode 100644 index 0000000..239eedf Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/associations/.part-00003-52d8b672-b288-4bcb-ab63-bb57677a1ecd-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/associations/.part-00003-650a8597-77a2-4e0d-bed4-a45a77799938-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/associations/.part-00003-650a8597-77a2-4e0d-bed4-a45a77799938-c000.snappy.parquet.crc new file mode 100644 index 0000000..e89fd63 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/associations/.part-00003-650a8597-77a2-4e0d-bed4-a45a77799938-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/associations/.part-00003-8a0e9158-3bdc-4059-8261-4f34b384ad8b-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/associations/.part-00003-8a0e9158-3bdc-4059-8261-4f34b384ad8b-c000.snappy.parquet.crc new file mode 100644 index 0000000..42172da Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/associations/.part-00003-8a0e9158-3bdc-4059-8261-4f34b384ad8b-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/associations/.part-00003-c585c50d-d510-403f-b1af-f2590b82ec02-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/associations/.part-00003-c585c50d-d510-403f-b1af-f2590b82ec02-c000.snappy.parquet.crc new file mode 100644 index 0000000..5dbaec2 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/associations/.part-00003-c585c50d-d510-403f-b1af-f2590b82ec02-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/associations/.part-00004-a557f9be-45fe-430d-820c-67c6e98e062b-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/associations/.part-00004-a557f9be-45fe-430d-820c-67c6e98e062b-c000.snappy.parquet.crc new file mode 100644 index 0000000..4ca6aa7 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/associations/.part-00004-a557f9be-45fe-430d-820c-67c6e98e062b-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/associations/.part-00004-bb84e201-0f62-4f40-b0a7-207c849f7a24-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/associations/.part-00004-bb84e201-0f62-4f40-b0a7-207c849f7a24-c000.snappy.parquet.crc new file mode 100644 index 0000000..d0e9a84 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/associations/.part-00004-bb84e201-0f62-4f40-b0a7-207c849f7a24-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/associations/.part-00004-cfdc4727-b5b0-4c31-84c4-3ed85cc9f1c9-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/associations/.part-00004-cfdc4727-b5b0-4c31-84c4-3ed85cc9f1c9-c000.snappy.parquet.crc new file mode 100644 index 0000000..2c61648 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/associations/.part-00004-cfdc4727-b5b0-4c31-84c4-3ed85cc9f1c9-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/associations/.part-00004-f5bf9de0-2f61-4ef9-ae15-45201fe05ae7-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/associations/.part-00004-f5bf9de0-2f61-4ef9-ae15-45201fe05ae7-c000.snappy.parquet.crc new file mode 100644 index 0000000..453f286 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/associations/.part-00004-f5bf9de0-2f61-4ef9-ae15-45201fe05ae7-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/associations/.part-00005-83e03c9c-6e40-47d8-a1e5-fbdcb0abafdb-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/associations/.part-00005-83e03c9c-6e40-47d8-a1e5-fbdcb0abafdb-c000.snappy.parquet.crc new file mode 100644 index 0000000..d60914c Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/associations/.part-00005-83e03c9c-6e40-47d8-a1e5-fbdcb0abafdb-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/associations/.part-00005-d77d92c3-e227-463b-9cf7-50341160d938-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/associations/.part-00005-d77d92c3-e227-463b-9cf7-50341160d938-c000.snappy.parquet.crc new file mode 100644 index 0000000..8e9aa83 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/associations/.part-00005-d77d92c3-e227-463b-9cf7-50341160d938-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/associations/.part-00005-dad45051-3656-45f3-b45c-4c29e143aae9-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/associations/.part-00005-dad45051-3656-45f3-b45c-4c29e143aae9-c000.snappy.parquet.crc new file mode 100644 index 0000000..8bf8f22 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/associations/.part-00005-dad45051-3656-45f3-b45c-4c29e143aae9-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/associations/.part-00005-f48959c1-8814-4930-9482-909e87471c09-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/associations/.part-00005-f48959c1-8814-4930-9482-909e87471c09-c000.snappy.parquet.crc new file mode 100644 index 0000000..51aa979 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/associations/.part-00005-f48959c1-8814-4930-9482-909e87471c09-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/associations/.part-00006-2c4136d2-d3f6-47ea-a5f3-3d5590df1769-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/associations/.part-00006-2c4136d2-d3f6-47ea-a5f3-3d5590df1769-c000.snappy.parquet.crc new file mode 100644 index 0000000..754e6a2 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/associations/.part-00006-2c4136d2-d3f6-47ea-a5f3-3d5590df1769-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/associations/.part-00006-43d069bc-67ee-4b6b-a938-17447f9f6397-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/associations/.part-00006-43d069bc-67ee-4b6b-a938-17447f9f6397-c000.snappy.parquet.crc new file mode 100644 index 0000000..fe0bc61 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/associations/.part-00006-43d069bc-67ee-4b6b-a938-17447f9f6397-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/associations/.part-00006-7631ae9a-9d2c-4334-a806-fca2b784b7b6-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/associations/.part-00006-7631ae9a-9d2c-4334-a806-fca2b784b7b6-c000.snappy.parquet.crc new file mode 100644 index 0000000..78fc033 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/associations/.part-00006-7631ae9a-9d2c-4334-a806-fca2b784b7b6-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/associations/.part-00006-c892b9bb-3763-45a3-84b9-5bb7d0f8106f-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/associations/.part-00006-c892b9bb-3763-45a3-84b9-5bb7d0f8106f-c000.snappy.parquet.crc new file mode 100644 index 0000000..d23f21d Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/associations/.part-00006-c892b9bb-3763-45a3-84b9-5bb7d0f8106f-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/associations/.part-00007-393e6b77-99d0-4bf4-9b62-944af8f3f649-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/associations/.part-00007-393e6b77-99d0-4bf4-9b62-944af8f3f649-c000.snappy.parquet.crc new file mode 100644 index 0000000..80c2b82 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/associations/.part-00007-393e6b77-99d0-4bf4-9b62-944af8f3f649-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/associations/.part-00007-9e4ad55b-542a-44fb-8598-17a9f559aed7-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/associations/.part-00007-9e4ad55b-542a-44fb-8598-17a9f559aed7-c000.snappy.parquet.crc new file mode 100644 index 0000000..db66104 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/associations/.part-00007-9e4ad55b-542a-44fb-8598-17a9f559aed7-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/associations/.part-00007-d719ea24-3a34-4eb5-8620-61e7b97d14b6-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/associations/.part-00007-d719ea24-3a34-4eb5-8620-61e7b97d14b6-c000.snappy.parquet.crc new file mode 100644 index 0000000..1530309 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/associations/.part-00007-d719ea24-3a34-4eb5-8620-61e7b97d14b6-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/associations/.part-00007-db1c7914-39dd-4ff4-83c0-e259ba58d1b7-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/associations/.part-00007-db1c7914-39dd-4ff4-83c0-e259ba58d1b7-c000.snappy.parquet.crc new file mode 100644 index 0000000..3049d16 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/associations/.part-00007-db1c7914-39dd-4ff4-83c0-e259ba58d1b7-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/associations/_delta_log/.00000000000000000000.crc.crc b/tests/data/uniprot_archaea/uniprot_db/associations/_delta_log/.00000000000000000000.crc.crc new file mode 100644 index 0000000..634b88d Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/associations/_delta_log/.00000000000000000000.crc.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/associations/_delta_log/.00000000000000000000.json.crc b/tests/data/uniprot_archaea/uniprot_db/associations/_delta_log/.00000000000000000000.json.crc new file mode 100644 index 0000000..4e21ac9 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/associations/_delta_log/.00000000000000000000.json.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/associations/_delta_log/.00000000000000000001.crc.crc b/tests/data/uniprot_archaea/uniprot_db/associations/_delta_log/.00000000000000000001.crc.crc new file mode 100644 index 0000000..93c0472 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/associations/_delta_log/.00000000000000000001.crc.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/associations/_delta_log/.00000000000000000001.json.crc b/tests/data/uniprot_archaea/uniprot_db/associations/_delta_log/.00000000000000000001.json.crc new file mode 100644 index 0000000..081722f Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/associations/_delta_log/.00000000000000000001.json.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/associations/_delta_log/.00000000000000000002.crc.crc b/tests/data/uniprot_archaea/uniprot_db/associations/_delta_log/.00000000000000000002.crc.crc new file mode 100644 index 0000000..2562ec5 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/associations/_delta_log/.00000000000000000002.crc.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/associations/_delta_log/.00000000000000000002.json.crc b/tests/data/uniprot_archaea/uniprot_db/associations/_delta_log/.00000000000000000002.json.crc new file mode 100644 index 0000000..ef7fc14 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/associations/_delta_log/.00000000000000000002.json.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/associations/_delta_log/.00000000000000000003.crc.crc b/tests/data/uniprot_archaea/uniprot_db/associations/_delta_log/.00000000000000000003.crc.crc new file mode 100644 index 0000000..ef7cd14 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/associations/_delta_log/.00000000000000000003.crc.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/associations/_delta_log/.00000000000000000003.json.crc b/tests/data/uniprot_archaea/uniprot_db/associations/_delta_log/.00000000000000000003.json.crc new file mode 100644 index 0000000..3d36dee Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/associations/_delta_log/.00000000000000000003.json.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/associations/_delta_log/.00000000000000000004.crc.crc b/tests/data/uniprot_archaea/uniprot_db/associations/_delta_log/.00000000000000000004.crc.crc new file mode 100644 index 0000000..1add27e Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/associations/_delta_log/.00000000000000000004.crc.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/associations/_delta_log/.00000000000000000004.json.crc b/tests/data/uniprot_archaea/uniprot_db/associations/_delta_log/.00000000000000000004.json.crc new file mode 100644 index 0000000..d9f9ce8 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/associations/_delta_log/.00000000000000000004.json.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/associations/_delta_log/00000000000000000000.crc b/tests/data/uniprot_archaea/uniprot_db/associations/_delta_log/00000000000000000000.crc new file mode 100644 index 0000000..a2ac8cd --- /dev/null +++ b/tests/data/uniprot_archaea/uniprot_db/associations/_delta_log/00000000000000000000.crc @@ -0,0 +1 @@ +{"txnId":"1dc8188d-79de-4366-a22f-e27f5806ea4d","tableSizeBytes":0,"numFiles":0,"numMetadata":1,"numProtocol":1,"setTransactions":[],"domainMetadata":[],"metadata":{"id":"ad248292-a815-411b-9154-113330cf7ea8","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[]}","partitionColumns":[],"configuration":{},"createdTime":1766549214137},"protocol":{"minReaderVersion":1,"minWriterVersion":2},"allFiles":[]} diff --git a/tests/data/uniprot_archaea/uniprot_db/associations/_delta_log/00000000000000000000.json b/tests/data/uniprot_archaea/uniprot_db/associations/_delta_log/00000000000000000000.json new file mode 100644 index 0000000..aed0969 --- /dev/null +++ b/tests/data/uniprot_archaea/uniprot_db/associations/_delta_log/00000000000000000000.json @@ -0,0 +1,3 @@ +{"commitInfo":{"timestamp":1766549214176,"operation":"CREATE TABLE","operationParameters":{"partitionBy":"[]","clusterBy":"[]","description":null,"isManaged":"false","properties":"{}"},"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{},"engineInfo":"Apache-Spark/4.0.1 Delta-Lake/4.0.0","txnId":"1dc8188d-79de-4366-a22f-e27f5806ea4d"}} +{"metaData":{"id":"ad248292-a815-411b-9154-113330cf7ea8","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[]}","partitionColumns":[],"configuration":{},"createdTime":1766549214137}} +{"protocol":{"minReaderVersion":1,"minWriterVersion":2}} diff --git a/tests/data/uniprot_archaea/uniprot_db/associations/_delta_log/00000000000000000001.crc b/tests/data/uniprot_archaea/uniprot_db/associations/_delta_log/00000000000000000001.crc new file mode 100644 index 0000000..6e10002 --- /dev/null +++ b/tests/data/uniprot_archaea/uniprot_db/associations/_delta_log/00000000000000000001.crc @@ -0,0 +1 @@ +{"txnId":"3a4f684e-b303-45e7-b32c-c59ee4426680","tableSizeBytes":293686,"numFiles":8,"numMetadata":1,"numProtocol":1,"setTransactions":[],"domainMetadata":[],"metadata":{"id":"ad248292-a815-411b-9154-113330cf7ea8","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"subject\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"object\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"predicate\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"evidence_type\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"supporting_objects\",\"type\":{\"type\":\"array\",\"elementType\":\"string\",\"containsNull\":true},\"nullable\":true,\"metadata\":{}},{\"name\":\"publications\",\"type\":{\"type\":\"array\",\"elementType\":\"string\",\"containsNull\":true},\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{},"createdTime":1766549214137},"protocol":{"minReaderVersion":1,"minWriterVersion":2}} diff --git a/tests/data/uniprot_archaea/uniprot_db/associations/_delta_log/00000000000000000001.json b/tests/data/uniprot_archaea/uniprot_db/associations/_delta_log/00000000000000000001.json new file mode 100644 index 0000000..3c3febe --- /dev/null +++ b/tests/data/uniprot_archaea/uniprot_db/associations/_delta_log/00000000000000000001.json @@ -0,0 +1,10 @@ +{"commitInfo":{"timestamp":1766549220763,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":0,"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{"numFiles":"8","numOutputRows":"37839","numOutputBytes":"293686"},"engineInfo":"Apache-Spark/4.0.1 Delta-Lake/4.0.0","txnId":"3a4f684e-b303-45e7-b32c-c59ee4426680"}} +{"metaData":{"id":"ad248292-a815-411b-9154-113330cf7ea8","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"subject\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"object\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"predicate\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"evidence_type\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"supporting_objects\",\"type\":{\"type\":\"array\",\"elementType\":\"string\",\"containsNull\":true},\"nullable\":true,\"metadata\":{}},{\"name\":\"publications\",\"type\":{\"type\":\"array\",\"elementType\":\"string\",\"containsNull\":true},\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{},"createdTime":1766549214137}} +{"add":{"path":"part-00000-6e2a19e7-36e4-48c7-a949-9a97054f83e1-c000.snappy.parquet","partitionValues":{},"size":45924,"modificationTime":1766549220742,"dataChange":true,"stats":"{\"numRecords\":4096,\"minValues\":{\"subject\":\"cdm_prot_00036120-577c-5bbc-ab91\",\"object\":\"ChEBI:CHEBI:13193\",\"predicate\":\"catalyzes\",\"evidence_type\":\"ECO:0000250\"},\"maxValues\":{\"subject\":\"cdm_prot_ffed2e1d-fe14-5556-b067\",\"object\":\"Rhea:RHEA:79571\",\"predicate\":\"requires_cofactor\",\"evidence_type\":\"ECO:0000305\"},\"nullCount\":{\"subject\":0,\"object\":0,\"predicate\":0,\"evidence_type\":1628,\"supporting_objects\":2037,\"publications\":3687}}"}} +{"add":{"path":"part-00001-d0cc9108-298b-44b4-80e4-8ecf39f2926d-c000.snappy.parquet","partitionValues":{},"size":40388,"modificationTime":1766549220757,"dataChange":true,"stats":"{\"numRecords\":5120,\"minValues\":{\"subject\":\"cdm_prot_00030730-9147-5351-b68c\",\"object\":\"ChEBI:CHEBI:131803\",\"predicate\":\"catalyzes\",\"evidence_type\":\"ECO:0000250\"},\"maxValues\":{\"subject\":\"cdm_prot_ff4d0139-f3df-510c-a663\",\"object\":\"Rhea:RHEA:80651\",\"predicate\":\"requires_cofactor\",\"evidence_type\":\"ECO:0000305\"},\"nullCount\":{\"subject\":0,\"object\":0,\"predicate\":0,\"evidence_type\":1311,\"supporting_objects\":1601,\"publications\":4830}}"}} +{"add":{"path":"part-00002-33e36f42-b91d-4741-9371-d0a2b01265ea-c000.snappy.parquet","partitionValues":{},"size":28250,"modificationTime":1766549220747,"dataChange":true,"stats":"{\"numRecords\":4096,\"minValues\":{\"subject\":\"cdm_prot_00a96f35-f63b-5933-9e50\",\"object\":\"ChEBI:CHEBI:13193\",\"predicate\":\"catalyzes\",\"evidence_type\":\"ECO:0000250\"},\"maxValues\":{\"subject\":\"cdm_prot_fd944ff3-10f0-5e2f-95de\",\"object\":\"Rhea:RHEA:76235\",\"predicate\":\"requires_cofactor\",\"evidence_type\":\"ECO:0000269\"},\"nullCount\":{\"subject\":0,\"object\":0,\"predicate\":0,\"evidence_type\":967,\"supporting_objects\":1065,\"publications\":3998}}"}} +{"add":{"path":"part-00003-52d8b672-b288-4bcb-ab63-bb57677a1ecd-c000.snappy.parquet","partitionValues":{},"size":36564,"modificationTime":1766549220759,"dataChange":true,"stats":"{\"numRecords\":5120,\"minValues\":{\"subject\":\"cdm_prot_004f59e4-6b11-58ae-a6ff\",\"object\":\"ChEBI:CHEBI:10329\",\"predicate\":\"catalyzes\",\"evidence_type\":\"ECO:0000250\"},\"maxValues\":{\"subject\":\"cdm_prot_ff2dba94-2092-57d0-9041\",\"object\":\"Rhea:RHEA:63444\",\"predicate\":\"requires_cofactor\",\"evidence_type\":\"ECO:0000305\"},\"nullCount\":{\"subject\":0,\"object\":0,\"predicate\":0,\"evidence_type\":1242,\"supporting_objects\":1515,\"publications\":4855}}"}} +{"add":{"path":"part-00004-bb84e201-0f62-4f40-b0a7-207c849f7a24-c000.snappy.parquet","partitionValues":{},"size":37737,"modificationTime":1766549220757,"dataChange":true,"stats":"{\"numRecords\":5120,\"minValues\":{\"subject\":\"cdm_prot_008bae16-5b42-5bfd-a15c\",\"object\":\"ChEBI:CHEBI:12040\",\"predicate\":\"catalyzes\",\"evidence_type\":\"ECO:0000250\"},\"maxValues\":{\"subject\":\"cdm_prot_fedf6b23-d61c-56dc-b256\",\"object\":\"Rhea:RHEA:62168\",\"predicate\":\"requires_cofactor\",\"evidence_type\":\"ECO:0000305\"},\"nullCount\":{\"subject\":0,\"object\":0,\"predicate\":0,\"evidence_type\":1707,\"supporting_objects\":1962,\"publications\":4871}}"}} +{"add":{"path":"part-00005-dad45051-3656-45f3-b45c-4c29e143aae9-c000.snappy.parquet","partitionValues":{},"size":45839,"modificationTime":1766549220741,"dataChange":true,"stats":"{\"numRecords\":4096,\"minValues\":{\"subject\":\"cdm_prot_00134cb8-e497-5d40-b662\",\"object\":\"ChEBI:CHEBI:133980\",\"predicate\":\"catalyzes\",\"evidence_type\":\"ECO:0000250\"},\"maxValues\":{\"subject\":\"cdm_prot_ffeae0f1-9785-5eaf-ad23\",\"object\":\"Rhea:RHEA:78943\",\"predicate\":\"requires_cofactor\",\"evidence_type\":\"ECO:0000305\"},\"nullCount\":{\"subject\":0,\"object\":0,\"predicate\":0,\"evidence_type\":1503,\"supporting_objects\":1884,\"publications\":3841}}"}} +{"add":{"path":"part-00006-7631ae9a-9d2c-4334-a806-fca2b784b7b6-c000.snappy.parquet","partitionValues":{},"size":23051,"modificationTime":1766549220757,"dataChange":true,"stats":"{\"numRecords\":5120,\"minValues\":{\"subject\":\"cdm_prot_0094747f-6d42-5807-a9c1\",\"object\":\"ChEBI:CHEBI:12931\",\"predicate\":\"catalyzes\",\"evidence_type\":\"ECO:0000250\"},\"maxValues\":{\"subject\":\"cdm_prot_fe6c6c04-1430-5b94-935c\",\"object\":\"Rhea:RHEA:42760\",\"predicate\":\"requires_cofactor\",\"evidence_type\":\"ECO:0000269\"},\"nullCount\":{\"subject\":0,\"object\":0,\"predicate\":0,\"evidence_type\":1062,\"supporting_objects\":1159,\"publications\":5023}}"}} +{"add":{"path":"part-00007-9e4ad55b-542a-44fb-8598-17a9f559aed7-c000.snappy.parquet","partitionValues":{},"size":35933,"modificationTime":1766549220751,"dataChange":true,"stats":"{\"numRecords\":5071,\"minValues\":{\"subject\":\"cdm_prot_00625fdf-0992-5594-a800\",\"object\":\"ChEBI:CHEBI:128769\",\"predicate\":\"catalyzes\",\"evidence_type\":\"ECO:0000250\"},\"maxValues\":{\"subject\":\"cdm_prot_ffcf5789-8bfa-5ca0-914e\",\"object\":\"Rhea:RHEA:84215\",\"predicate\":\"requires_cofactor\",\"evidence_type\":\"ECO:0000305\"},\"nullCount\":{\"subject\":0,\"object\":0,\"predicate\":0,\"evidence_type\":1518,\"supporting_objects\":1933,\"publications\":4681}}"}} diff --git a/tests/data/uniprot_archaea/uniprot_db/associations/_delta_log/00000000000000000002.crc b/tests/data/uniprot_archaea/uniprot_db/associations/_delta_log/00000000000000000002.crc new file mode 100644 index 0000000..cd3a1b3 --- /dev/null +++ b/tests/data/uniprot_archaea/uniprot_db/associations/_delta_log/00000000000000000002.crc @@ -0,0 +1 @@ +{"txnId":"f5844a2c-4bf9-4dd4-a93c-43d554073de4","tableSizeBytes":587015,"numFiles":16,"numMetadata":1,"numProtocol":1,"setTransactions":[],"domainMetadata":[],"metadata":{"id":"ad248292-a815-411b-9154-113330cf7ea8","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"subject\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"object\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"predicate\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"evidence_type\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"supporting_objects\",\"type\":{\"type\":\"array\",\"elementType\":\"string\",\"containsNull\":true},\"nullable\":true,\"metadata\":{}},{\"name\":\"publications\",\"type\":{\"type\":\"array\",\"elementType\":\"string\",\"containsNull\":true},\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{},"createdTime":1766549214137},"protocol":{"minReaderVersion":1,"minWriterVersion":2},"allFiles":[{"path":"part-00007-db1c7914-39dd-4ff4-83c0-e259ba58d1b7-c000.snappy.parquet","partitionValues":{},"size":26421,"modificationTime":1766549228703,"dataChange":false,"stats":"{\"numRecords\":4702,\"minValues\":{\"subject\":\"cdm_prot_00504a00-27c9-551f-b27f\",\"object\":\"ChEBI:CHEBI:132124\",\"predicate\":\"catalyzes\",\"evidence_type\":\"ECO:0000250\"},\"maxValues\":{\"subject\":\"cdm_prot_ff86ebef-56b7-5cf7-bf3e\",\"object\":\"Rhea:RHEA:79083\",\"predicate\":\"requires_cofactor\",\"evidence_type\":\"ECO:0000305\"},\"nullCount\":{\"subject\":0,\"object\":0,\"predicate\":0,\"evidence_type\":896,\"supporting_objects\":950,\"publications\":4648}}"},{"path":"part-00003-650a8597-77a2-4e0d-bed4-a45a77799938-c000.snappy.parquet","partitionValues":{},"size":36912,"modificationTime":1766549228706,"dataChange":false,"stats":"{\"numRecords\":5120,\"minValues\":{\"subject\":\"cdm_prot_000c7e85-7dff-527d-b6db\",\"object\":\"ChEBI:CHEBI:131766\",\"predicate\":\"catalyzes\",\"evidence_type\":\"ECO:0000250\"},\"maxValues\":{\"subject\":\"cdm_prot_ffb46e82-abfa-5122-818f\",\"object\":\"Rhea:RHEA:66528\",\"predicate\":\"requires_cofactor\",\"evidence_type\":\"ECO:0000305\"},\"nullCount\":{\"subject\":0,\"object\":0,\"predicate\":0,\"evidence_type\":1514,\"supporting_objects\":1865,\"publications\":4769}}"},{"path":"part-00000-5c7434bb-30e3-45f8-b638-415de5c389a1-c000.snappy.parquet","partitionValues":{},"size":28874,"modificationTime":1766549228694,"dataChange":false,"stats":"{\"numRecords\":4096,\"minValues\":{\"subject\":\"cdm_prot_00c31285-6721-55e0-855a\",\"object\":\"ChEBI:CHEBI:128769\",\"predicate\":\"catalyzes\",\"evidence_type\":\"ECO:0000250\"},\"maxValues\":{\"subject\":\"cdm_prot_ff289ac2-6f01-59a2-8290\",\"object\":\"Rhea:RHEA:68420\",\"predicate\":\"requires_cofactor\",\"evidence_type\":\"ECO:0000305\"},\"nullCount\":{\"subject\":0,\"object\":0,\"predicate\":0,\"evidence_type\":1286,\"supporting_objects\":1677,\"publications\":3705}}"},{"path":"part-00004-a557f9be-45fe-430d-820c-67c6e98e062b-c000.snappy.parquet","partitionValues":{},"size":36062,"modificationTime":1766549228702,"dataChange":false,"stats":"{\"numRecords\":5120,\"minValues\":{\"subject\":\"cdm_prot_00925b61-fa22-5bcf-9dea\",\"object\":\"ChEBI:CHEBI:132124\",\"predicate\":\"catalyzes\",\"evidence_type\":\"ECO:0000250\"},\"maxValues\":{\"subject\":\"cdm_prot_ffc892c3-0651-5e3c-998a\",\"object\":\"Rhea:RHEA:57720\",\"predicate\":\"requires_cofactor\",\"evidence_type\":\"ECO:0000305\"},\"nullCount\":{\"subject\":0,\"object\":0,\"predicate\":0,\"evidence_type\":1622,\"supporting_objects\":1866,\"publications\":4876}}"},{"path":"part-00003-52d8b672-b288-4bcb-ab63-bb57677a1ecd-c000.snappy.parquet","partitionValues":{},"size":36564,"modificationTime":1766549220759,"dataChange":false,"stats":"{\"numRecords\":5120,\"minValues\":{\"subject\":\"cdm_prot_004f59e4-6b11-58ae-a6ff\",\"object\":\"ChEBI:CHEBI:10329\",\"predicate\":\"catalyzes\",\"evidence_type\":\"ECO:0000250\"},\"maxValues\":{\"subject\":\"cdm_prot_ff2dba94-2092-57d0-9041\",\"object\":\"Rhea:RHEA:63444\",\"predicate\":\"requires_cofactor\",\"evidence_type\":\"ECO:0000305\"},\"nullCount\":{\"subject\":0,\"object\":0,\"predicate\":0,\"evidence_type\":1242,\"supporting_objects\":1515,\"publications\":4855}}"},{"path":"part-00001-d0cc9108-298b-44b4-80e4-8ecf39f2926d-c000.snappy.parquet","partitionValues":{},"size":40388,"modificationTime":1766549220757,"dataChange":false,"stats":"{\"numRecords\":5120,\"minValues\":{\"subject\":\"cdm_prot_00030730-9147-5351-b68c\",\"object\":\"ChEBI:CHEBI:131803\",\"predicate\":\"catalyzes\",\"evidence_type\":\"ECO:0000250\"},\"maxValues\":{\"subject\":\"cdm_prot_ff4d0139-f3df-510c-a663\",\"object\":\"Rhea:RHEA:80651\",\"predicate\":\"requires_cofactor\",\"evidence_type\":\"ECO:0000305\"},\"nullCount\":{\"subject\":0,\"object\":0,\"predicate\":0,\"evidence_type\":1311,\"supporting_objects\":1601,\"publications\":4830}}"},{"path":"part-00005-f48959c1-8814-4930-9482-909e87471c09-c000.snappy.parquet","partitionValues":{},"size":37959,"modificationTime":1766549228670,"dataChange":false,"stats":"{\"numRecords\":5120,\"minValues\":{\"subject\":\"cdm_prot_007c34e9-4de0-5736-985f\",\"object\":\"ChEBI:CHEBI:11561\",\"predicate\":\"catalyzes\",\"evidence_type\":\"ECO:0000250\"},\"maxValues\":{\"subject\":\"cdm_prot_ffdecc1a-6ee9-59b2-9c49\",\"object\":\"Rhea:RHEA:75175\",\"predicate\":\"requires_cofactor\",\"evidence_type\":\"ECO:0000305\"},\"nullCount\":{\"subject\":0,\"object\":0,\"predicate\":0,\"evidence_type\":1831,\"supporting_objects\":2191,\"publications\":4760}}"},{"path":"part-00002-33e36f42-b91d-4741-9371-d0a2b01265ea-c000.snappy.parquet","partitionValues":{},"size":28250,"modificationTime":1766549220747,"dataChange":false,"stats":"{\"numRecords\":4096,\"minValues\":{\"subject\":\"cdm_prot_00a96f35-f63b-5933-9e50\",\"object\":\"ChEBI:CHEBI:13193\",\"predicate\":\"catalyzes\",\"evidence_type\":\"ECO:0000250\"},\"maxValues\":{\"subject\":\"cdm_prot_fd944ff3-10f0-5e2f-95de\",\"object\":\"Rhea:RHEA:76235\",\"predicate\":\"requires_cofactor\",\"evidence_type\":\"ECO:0000269\"},\"nullCount\":{\"subject\":0,\"object\":0,\"predicate\":0,\"evidence_type\":967,\"supporting_objects\":1065,\"publications\":3998}}"},{"path":"part-00000-6e2a19e7-36e4-48c7-a949-9a97054f83e1-c000.snappy.parquet","partitionValues":{},"size":45924,"modificationTime":1766549220742,"dataChange":false,"stats":"{\"numRecords\":4096,\"minValues\":{\"subject\":\"cdm_prot_00036120-577c-5bbc-ab91\",\"object\":\"ChEBI:CHEBI:13193\",\"predicate\":\"catalyzes\",\"evidence_type\":\"ECO:0000250\"},\"maxValues\":{\"subject\":\"cdm_prot_ffed2e1d-fe14-5556-b067\",\"object\":\"Rhea:RHEA:79571\",\"predicate\":\"requires_cofactor\",\"evidence_type\":\"ECO:0000305\"},\"nullCount\":{\"subject\":0,\"object\":0,\"predicate\":0,\"evidence_type\":1628,\"supporting_objects\":2037,\"publications\":3687}}"},{"path":"part-00002-d6af9717-c0ef-4ef3-93bf-17593ea5ff3d-c000.snappy.parquet","partitionValues":{},"size":32281,"modificationTime":1766549228708,"dataChange":false,"stats":"{\"numRecords\":5120,\"minValues\":{\"subject\":\"cdm_prot_00546ee8-4f73-5c85-8b07\",\"object\":\"ChEBI:CHEBI:1178\",\"predicate\":\"catalyzes\",\"evidence_type\":\"ECO:0000250\"},\"maxValues\":{\"subject\":\"cdm_prot_ffce40a0-b890-5e5f-b455\",\"object\":\"Rhea:RHEA:69496\",\"predicate\":\"requires_cofactor\",\"evidence_type\":\"ECO:0000305\"},\"nullCount\":{\"subject\":0,\"object\":0,\"predicate\":0,\"evidence_type\":1407,\"supporting_objects\":1689,\"publications\":4845}}"},{"path":"part-00001-a044609f-89b1-43da-8fad-c72c083b71f9-c000.snappy.parquet","partitionValues":{},"size":46717,"modificationTime":1766549228697,"dataChange":false,"stats":"{\"numRecords\":5120,\"minValues\":{\"subject\":\"cdm_prot_004539b1-91ae-513e-bef9\",\"object\":\"ChEBI:CHEBI:11851\",\"predicate\":\"catalyzes\",\"evidence_type\":\"ECO:0000250\"},\"maxValues\":{\"subject\":\"cdm_prot_ffa14ace-ea75-5cc6-86f2\",\"object\":\"Rhea:RHEA:67136\",\"predicate\":\"requires_cofactor\",\"evidence_type\":\"ECO:0000305\"},\"nullCount\":{\"subject\":0,\"object\":0,\"predicate\":0,\"evidence_type\":1974,\"supporting_objects\":2156,\"publications\":4938}}"},{"path":"part-00005-dad45051-3656-45f3-b45c-4c29e143aae9-c000.snappy.parquet","partitionValues":{},"size":45839,"modificationTime":1766549220741,"dataChange":false,"stats":"{\"numRecords\":4096,\"minValues\":{\"subject\":\"cdm_prot_00134cb8-e497-5d40-b662\",\"object\":\"ChEBI:CHEBI:133980\",\"predicate\":\"catalyzes\",\"evidence_type\":\"ECO:0000250\"},\"maxValues\":{\"subject\":\"cdm_prot_ffeae0f1-9785-5eaf-ad23\",\"object\":\"Rhea:RHEA:78943\",\"predicate\":\"requires_cofactor\",\"evidence_type\":\"ECO:0000305\"},\"nullCount\":{\"subject\":0,\"object\":0,\"predicate\":0,\"evidence_type\":1503,\"supporting_objects\":1884,\"publications\":3841}}"},{"path":"part-00007-9e4ad55b-542a-44fb-8598-17a9f559aed7-c000.snappy.parquet","partitionValues":{},"size":35933,"modificationTime":1766549220751,"dataChange":false,"stats":"{\"numRecords\":5071,\"minValues\":{\"subject\":\"cdm_prot_00625fdf-0992-5594-a800\",\"object\":\"ChEBI:CHEBI:128769\",\"predicate\":\"catalyzes\",\"evidence_type\":\"ECO:0000250\"},\"maxValues\":{\"subject\":\"cdm_prot_ffcf5789-8bfa-5ca0-914e\",\"object\":\"Rhea:RHEA:84215\",\"predicate\":\"requires_cofactor\",\"evidence_type\":\"ECO:0000305\"},\"nullCount\":{\"subject\":0,\"object\":0,\"predicate\":0,\"evidence_type\":1518,\"supporting_objects\":1933,\"publications\":4681}}"},{"path":"part-00006-7631ae9a-9d2c-4334-a806-fca2b784b7b6-c000.snappy.parquet","partitionValues":{},"size":23051,"modificationTime":1766549220757,"dataChange":false,"stats":"{\"numRecords\":5120,\"minValues\":{\"subject\":\"cdm_prot_0094747f-6d42-5807-a9c1\",\"object\":\"ChEBI:CHEBI:12931\",\"predicate\":\"catalyzes\",\"evidence_type\":\"ECO:0000250\"},\"maxValues\":{\"subject\":\"cdm_prot_fe6c6c04-1430-5b94-935c\",\"object\":\"Rhea:RHEA:42760\",\"predicate\":\"requires_cofactor\",\"evidence_type\":\"ECO:0000269\"},\"nullCount\":{\"subject\":0,\"object\":0,\"predicate\":0,\"evidence_type\":1062,\"supporting_objects\":1159,\"publications\":5023}}"},{"path":"part-00004-bb84e201-0f62-4f40-b0a7-207c849f7a24-c000.snappy.parquet","partitionValues":{},"size":37737,"modificationTime":1766549220757,"dataChange":false,"stats":"{\"numRecords\":5120,\"minValues\":{\"subject\":\"cdm_prot_008bae16-5b42-5bfd-a15c\",\"object\":\"ChEBI:CHEBI:12040\",\"predicate\":\"catalyzes\",\"evidence_type\":\"ECO:0000250\"},\"maxValues\":{\"subject\":\"cdm_prot_fedf6b23-d61c-56dc-b256\",\"object\":\"Rhea:RHEA:62168\",\"predicate\":\"requires_cofactor\",\"evidence_type\":\"ECO:0000305\"},\"nullCount\":{\"subject\":0,\"object\":0,\"predicate\":0,\"evidence_type\":1707,\"supporting_objects\":1962,\"publications\":4871}}"},{"path":"part-00006-2c4136d2-d3f6-47ea-a5f3-3d5590df1769-c000.snappy.parquet","partitionValues":{},"size":48103,"modificationTime":1766549228695,"dataChange":false,"stats":"{\"numRecords\":5120,\"minValues\":{\"subject\":\"cdm_prot_001ba1b3-667c-5321-9298\",\"object\":\"ChEBI:CHEBI:10986\",\"predicate\":\"catalyzes\",\"evidence_type\":\"ECO:0000250\"},\"maxValues\":{\"subject\":\"cdm_prot_fffe9d9f-5871-5041-83af\",\"object\":\"Rhea:RHEA:78975\",\"predicate\":\"requires_cofactor\",\"evidence_type\":\"ECO:0000305\"},\"nullCount\":{\"subject\":0,\"object\":0,\"predicate\":0,\"evidence_type\":2149,\"supporting_objects\":2323,\"publications\":4950}}"}]} diff --git a/tests/data/uniprot_archaea/uniprot_db/associations/_delta_log/00000000000000000002.json b/tests/data/uniprot_archaea/uniprot_db/associations/_delta_log/00000000000000000002.json new file mode 100644 index 0000000..2f04fc4 --- /dev/null +++ b/tests/data/uniprot_archaea/uniprot_db/associations/_delta_log/00000000000000000002.json @@ -0,0 +1,9 @@ +{"commitInfo":{"timestamp":1766549228711,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":1,"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{"numFiles":"8","numOutputRows":"39518","numOutputBytes":"293329"},"engineInfo":"Apache-Spark/4.0.1 Delta-Lake/4.0.0","txnId":"f5844a2c-4bf9-4dd4-a93c-43d554073de4"}} +{"add":{"path":"part-00000-5c7434bb-30e3-45f8-b638-415de5c389a1-c000.snappy.parquet","partitionValues":{},"size":28874,"modificationTime":1766549228694,"dataChange":true,"stats":"{\"numRecords\":4096,\"minValues\":{\"subject\":\"cdm_prot_00c31285-6721-55e0-855a\",\"object\":\"ChEBI:CHEBI:128769\",\"predicate\":\"catalyzes\",\"evidence_type\":\"ECO:0000250\"},\"maxValues\":{\"subject\":\"cdm_prot_ff289ac2-6f01-59a2-8290\",\"object\":\"Rhea:RHEA:68420\",\"predicate\":\"requires_cofactor\",\"evidence_type\":\"ECO:0000305\"},\"nullCount\":{\"subject\":0,\"object\":0,\"predicate\":0,\"evidence_type\":1286,\"supporting_objects\":1677,\"publications\":3705}}"}} +{"add":{"path":"part-00001-a044609f-89b1-43da-8fad-c72c083b71f9-c000.snappy.parquet","partitionValues":{},"size":46717,"modificationTime":1766549228697,"dataChange":true,"stats":"{\"numRecords\":5120,\"minValues\":{\"subject\":\"cdm_prot_004539b1-91ae-513e-bef9\",\"object\":\"ChEBI:CHEBI:11851\",\"predicate\":\"catalyzes\",\"evidence_type\":\"ECO:0000250\"},\"maxValues\":{\"subject\":\"cdm_prot_ffa14ace-ea75-5cc6-86f2\",\"object\":\"Rhea:RHEA:67136\",\"predicate\":\"requires_cofactor\",\"evidence_type\":\"ECO:0000305\"},\"nullCount\":{\"subject\":0,\"object\":0,\"predicate\":0,\"evidence_type\":1974,\"supporting_objects\":2156,\"publications\":4938}}"}} +{"add":{"path":"part-00002-d6af9717-c0ef-4ef3-93bf-17593ea5ff3d-c000.snappy.parquet","partitionValues":{},"size":32281,"modificationTime":1766549228708,"dataChange":true,"stats":"{\"numRecords\":5120,\"minValues\":{\"subject\":\"cdm_prot_00546ee8-4f73-5c85-8b07\",\"object\":\"ChEBI:CHEBI:1178\",\"predicate\":\"catalyzes\",\"evidence_type\":\"ECO:0000250\"},\"maxValues\":{\"subject\":\"cdm_prot_ffce40a0-b890-5e5f-b455\",\"object\":\"Rhea:RHEA:69496\",\"predicate\":\"requires_cofactor\",\"evidence_type\":\"ECO:0000305\"},\"nullCount\":{\"subject\":0,\"object\":0,\"predicate\":0,\"evidence_type\":1407,\"supporting_objects\":1689,\"publications\":4845}}"}} +{"add":{"path":"part-00003-650a8597-77a2-4e0d-bed4-a45a77799938-c000.snappy.parquet","partitionValues":{},"size":36912,"modificationTime":1766549228706,"dataChange":true,"stats":"{\"numRecords\":5120,\"minValues\":{\"subject\":\"cdm_prot_000c7e85-7dff-527d-b6db\",\"object\":\"ChEBI:CHEBI:131766\",\"predicate\":\"catalyzes\",\"evidence_type\":\"ECO:0000250\"},\"maxValues\":{\"subject\":\"cdm_prot_ffb46e82-abfa-5122-818f\",\"object\":\"Rhea:RHEA:66528\",\"predicate\":\"requires_cofactor\",\"evidence_type\":\"ECO:0000305\"},\"nullCount\":{\"subject\":0,\"object\":0,\"predicate\":0,\"evidence_type\":1514,\"supporting_objects\":1865,\"publications\":4769}}"}} +{"add":{"path":"part-00004-a557f9be-45fe-430d-820c-67c6e98e062b-c000.snappy.parquet","partitionValues":{},"size":36062,"modificationTime":1766549228702,"dataChange":true,"stats":"{\"numRecords\":5120,\"minValues\":{\"subject\":\"cdm_prot_00925b61-fa22-5bcf-9dea\",\"object\":\"ChEBI:CHEBI:132124\",\"predicate\":\"catalyzes\",\"evidence_type\":\"ECO:0000250\"},\"maxValues\":{\"subject\":\"cdm_prot_ffc892c3-0651-5e3c-998a\",\"object\":\"Rhea:RHEA:57720\",\"predicate\":\"requires_cofactor\",\"evidence_type\":\"ECO:0000305\"},\"nullCount\":{\"subject\":0,\"object\":0,\"predicate\":0,\"evidence_type\":1622,\"supporting_objects\":1866,\"publications\":4876}}"}} +{"add":{"path":"part-00005-f48959c1-8814-4930-9482-909e87471c09-c000.snappy.parquet","partitionValues":{},"size":37959,"modificationTime":1766549228670,"dataChange":true,"stats":"{\"numRecords\":5120,\"minValues\":{\"subject\":\"cdm_prot_007c34e9-4de0-5736-985f\",\"object\":\"ChEBI:CHEBI:11561\",\"predicate\":\"catalyzes\",\"evidence_type\":\"ECO:0000250\"},\"maxValues\":{\"subject\":\"cdm_prot_ffdecc1a-6ee9-59b2-9c49\",\"object\":\"Rhea:RHEA:75175\",\"predicate\":\"requires_cofactor\",\"evidence_type\":\"ECO:0000305\"},\"nullCount\":{\"subject\":0,\"object\":0,\"predicate\":0,\"evidence_type\":1831,\"supporting_objects\":2191,\"publications\":4760}}"}} +{"add":{"path":"part-00006-2c4136d2-d3f6-47ea-a5f3-3d5590df1769-c000.snappy.parquet","partitionValues":{},"size":48103,"modificationTime":1766549228695,"dataChange":true,"stats":"{\"numRecords\":5120,\"minValues\":{\"subject\":\"cdm_prot_001ba1b3-667c-5321-9298\",\"object\":\"ChEBI:CHEBI:10986\",\"predicate\":\"catalyzes\",\"evidence_type\":\"ECO:0000250\"},\"maxValues\":{\"subject\":\"cdm_prot_fffe9d9f-5871-5041-83af\",\"object\":\"Rhea:RHEA:78975\",\"predicate\":\"requires_cofactor\",\"evidence_type\":\"ECO:0000305\"},\"nullCount\":{\"subject\":0,\"object\":0,\"predicate\":0,\"evidence_type\":2149,\"supporting_objects\":2323,\"publications\":4950}}"}} +{"add":{"path":"part-00007-db1c7914-39dd-4ff4-83c0-e259ba58d1b7-c000.snappy.parquet","partitionValues":{},"size":26421,"modificationTime":1766549228703,"dataChange":true,"stats":"{\"numRecords\":4702,\"minValues\":{\"subject\":\"cdm_prot_00504a00-27c9-551f-b27f\",\"object\":\"ChEBI:CHEBI:132124\",\"predicate\":\"catalyzes\",\"evidence_type\":\"ECO:0000250\"},\"maxValues\":{\"subject\":\"cdm_prot_ff86ebef-56b7-5cf7-bf3e\",\"object\":\"Rhea:RHEA:79083\",\"predicate\":\"requires_cofactor\",\"evidence_type\":\"ECO:0000305\"},\"nullCount\":{\"subject\":0,\"object\":0,\"predicate\":0,\"evidence_type\":896,\"supporting_objects\":950,\"publications\":4648}}"}} diff --git a/tests/data/uniprot_archaea/uniprot_db/associations/_delta_log/00000000000000000003.crc b/tests/data/uniprot_archaea/uniprot_db/associations/_delta_log/00000000000000000003.crc new file mode 100644 index 0000000..e912bab --- /dev/null +++ b/tests/data/uniprot_archaea/uniprot_db/associations/_delta_log/00000000000000000003.crc @@ -0,0 +1 @@ +{"txnId":"ae218ef0-5c8a-4b7c-81ec-e8140bee22fa","tableSizeBytes":833722,"numFiles":24,"numMetadata":1,"numProtocol":1,"setTransactions":[],"domainMetadata":[],"metadata":{"id":"ad248292-a815-411b-9154-113330cf7ea8","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"subject\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"object\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"predicate\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"evidence_type\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"supporting_objects\",\"type\":{\"type\":\"array\",\"elementType\":\"string\",\"containsNull\":true},\"nullable\":true,\"metadata\":{}},{\"name\":\"publications\",\"type\":{\"type\":\"array\",\"elementType\":\"string\",\"containsNull\":true},\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{},"createdTime":1766549214137},"protocol":{"minReaderVersion":1,"minWriterVersion":2},"allFiles":[{"path":"part-00007-db1c7914-39dd-4ff4-83c0-e259ba58d1b7-c000.snappy.parquet","partitionValues":{},"size":26421,"modificationTime":1766549228703,"dataChange":false,"stats":"{\"numRecords\":4702,\"minValues\":{\"subject\":\"cdm_prot_00504a00-27c9-551f-b27f\",\"object\":\"ChEBI:CHEBI:132124\",\"predicate\":\"catalyzes\",\"evidence_type\":\"ECO:0000250\"},\"maxValues\":{\"subject\":\"cdm_prot_ff86ebef-56b7-5cf7-bf3e\",\"object\":\"Rhea:RHEA:79083\",\"predicate\":\"requires_cofactor\",\"evidence_type\":\"ECO:0000305\"},\"nullCount\":{\"subject\":0,\"object\":0,\"predicate\":0,\"evidence_type\":896,\"supporting_objects\":950,\"publications\":4648}}"},{"path":"part-00000-5c7434bb-30e3-45f8-b638-415de5c389a1-c000.snappy.parquet","partitionValues":{},"size":28874,"modificationTime":1766549228694,"dataChange":false,"stats":"{\"numRecords\":4096,\"minValues\":{\"subject\":\"cdm_prot_00c31285-6721-55e0-855a\",\"object\":\"ChEBI:CHEBI:128769\",\"predicate\":\"catalyzes\",\"evidence_type\":\"ECO:0000250\"},\"maxValues\":{\"subject\":\"cdm_prot_ff289ac2-6f01-59a2-8290\",\"object\":\"Rhea:RHEA:68420\",\"predicate\":\"requires_cofactor\",\"evidence_type\":\"ECO:0000305\"},\"nullCount\":{\"subject\":0,\"object\":0,\"predicate\":0,\"evidence_type\":1286,\"supporting_objects\":1677,\"publications\":3705}}"},{"path":"part-00005-d77d92c3-e227-463b-9cf7-50341160d938-c000.snappy.parquet","partitionValues":{},"size":50753,"modificationTime":1766549235318,"dataChange":false,"stats":"{\"numRecords\":2048,\"minValues\":{\"subject\":\"cdm_prot_0013ba57-fc41-51cc-bc95\",\"object\":\"ChEBI:CHEBI:138284\",\"predicate\":\"catalyzes\",\"evidence_type\":\"ECO:0000250\"},\"maxValues\":{\"subject\":\"cdm_prot_ffe27896-0bd4-50b4-9cff\",\"object\":\"Rhea:RHEA:77883\",\"predicate\":\"requires_cofactor\",\"evidence_type\":\"ECO:0000269\"},\"nullCount\":{\"subject\":0,\"object\":0,\"predicate\":0,\"evidence_type\":1410,\"supporting_objects\":1476,\"publications\":1982}}"},{"path":"part-00004-cfdc4727-b5b0-4c31-84c4-3ed85cc9f1c9-c000.snappy.parquet","partitionValues":{},"size":24338,"modificationTime":1766549235347,"dataChange":false,"stats":"{\"numRecords\":2048,\"minValues\":{\"subject\":\"cdm_prot_005318fc-0c32-5b4a-b952\",\"object\":\"ChEBI:CHEBI:140395\",\"predicate\":\"catalyzes\",\"evidence_type\":\"ECO:0000250\"},\"maxValues\":{\"subject\":\"cdm_prot_ffbd3905-3836-5893-bd75\",\"object\":\"Rhea:RHEA:21248\",\"predicate\":\"requires_cofactor\",\"evidence_type\":\"ECO:0000305\"},\"nullCount\":{\"subject\":0,\"object\":0,\"predicate\":0,\"evidence_type\":633,\"supporting_objects\":647,\"publications\":2041}}"},{"path":"part-00001-d0cc9108-298b-44b4-80e4-8ecf39f2926d-c000.snappy.parquet","partitionValues":{},"size":40388,"modificationTime":1766549220757,"dataChange":false,"stats":"{\"numRecords\":5120,\"minValues\":{\"subject\":\"cdm_prot_00030730-9147-5351-b68c\",\"object\":\"ChEBI:CHEBI:131803\",\"predicate\":\"catalyzes\",\"evidence_type\":\"ECO:0000250\"},\"maxValues\":{\"subject\":\"cdm_prot_ff4d0139-f3df-510c-a663\",\"object\":\"Rhea:RHEA:80651\",\"predicate\":\"requires_cofactor\",\"evidence_type\":\"ECO:0000305\"},\"nullCount\":{\"subject\":0,\"object\":0,\"predicate\":0,\"evidence_type\":1311,\"supporting_objects\":1601,\"publications\":4830}}"},{"path":"part-00001-a044609f-89b1-43da-8fad-c72c083b71f9-c000.snappy.parquet","partitionValues":{},"size":46717,"modificationTime":1766549228697,"dataChange":false,"stats":"{\"numRecords\":5120,\"minValues\":{\"subject\":\"cdm_prot_004539b1-91ae-513e-bef9\",\"object\":\"ChEBI:CHEBI:11851\",\"predicate\":\"catalyzes\",\"evidence_type\":\"ECO:0000250\"},\"maxValues\":{\"subject\":\"cdm_prot_ffa14ace-ea75-5cc6-86f2\",\"object\":\"Rhea:RHEA:67136\",\"predicate\":\"requires_cofactor\",\"evidence_type\":\"ECO:0000305\"},\"nullCount\":{\"subject\":0,\"object\":0,\"predicate\":0,\"evidence_type\":1974,\"supporting_objects\":2156,\"publications\":4938}}"},{"path":"part-00001-769f28af-12f9-4406-a951-2d78e45241c1-c000.snappy.parquet","partitionValues":{},"size":21772,"modificationTime":1766549235347,"dataChange":false,"stats":"{\"numRecords\":2048,\"minValues\":{\"subject\":\"cdm_prot_006f3d67-56f0-5829-b099\",\"object\":\"ChEBI:CHEBI:15361\",\"predicate\":\"catalyzes\",\"evidence_type\":\"ECO:0000250\"},\"maxValues\":{\"subject\":\"cdm_prot_fea2d6f2-bbe4-51c7-afbe\",\"object\":\"Rhea:RHEA:48556\",\"predicate\":\"requires_cofactor\",\"evidence_type\":\"ECO:0000269\"},\"nullCount\":{\"subject\":0,\"object\":0,\"predicate\":0,\"evidence_type\":963,\"supporting_objects\":1044,\"publications\":1967}}"},{"path":"part-00007-9e4ad55b-542a-44fb-8598-17a9f559aed7-c000.snappy.parquet","partitionValues":{},"size":35933,"modificationTime":1766549220751,"dataChange":false,"stats":"{\"numRecords\":5071,\"minValues\":{\"subject\":\"cdm_prot_00625fdf-0992-5594-a800\",\"object\":\"ChEBI:CHEBI:128769\",\"predicate\":\"catalyzes\",\"evidence_type\":\"ECO:0000250\"},\"maxValues\":{\"subject\":\"cdm_prot_ffcf5789-8bfa-5ca0-914e\",\"object\":\"Rhea:RHEA:84215\",\"predicate\":\"requires_cofactor\",\"evidence_type\":\"ECO:0000305\"},\"nullCount\":{\"subject\":0,\"object\":0,\"predicate\":0,\"evidence_type\":1518,\"supporting_objects\":1933,\"publications\":4681}}"},{"path":"part-00006-7631ae9a-9d2c-4334-a806-fca2b784b7b6-c000.snappy.parquet","partitionValues":{},"size":23051,"modificationTime":1766549220757,"dataChange":false,"stats":"{\"numRecords\":5120,\"minValues\":{\"subject\":\"cdm_prot_0094747f-6d42-5807-a9c1\",\"object\":\"ChEBI:CHEBI:12931\",\"predicate\":\"catalyzes\",\"evidence_type\":\"ECO:0000250\"},\"maxValues\":{\"subject\":\"cdm_prot_fe6c6c04-1430-5b94-935c\",\"object\":\"Rhea:RHEA:42760\",\"predicate\":\"requires_cofactor\",\"evidence_type\":\"ECO:0000269\"},\"nullCount\":{\"subject\":0,\"object\":0,\"predicate\":0,\"evidence_type\":1062,\"supporting_objects\":1159,\"publications\":5023}}"},{"path":"part-00004-bb84e201-0f62-4f40-b0a7-207c849f7a24-c000.snappy.parquet","partitionValues":{},"size":37737,"modificationTime":1766549220757,"dataChange":false,"stats":"{\"numRecords\":5120,\"minValues\":{\"subject\":\"cdm_prot_008bae16-5b42-5bfd-a15c\",\"object\":\"ChEBI:CHEBI:12040\",\"predicate\":\"catalyzes\",\"evidence_type\":\"ECO:0000250\"},\"maxValues\":{\"subject\":\"cdm_prot_fedf6b23-d61c-56dc-b256\",\"object\":\"Rhea:RHEA:62168\",\"predicate\":\"requires_cofactor\",\"evidence_type\":\"ECO:0000305\"},\"nullCount\":{\"subject\":0,\"object\":0,\"predicate\":0,\"evidence_type\":1707,\"supporting_objects\":1962,\"publications\":4871}}"},{"path":"part-00006-2c4136d2-d3f6-47ea-a5f3-3d5590df1769-c000.snappy.parquet","partitionValues":{},"size":48103,"modificationTime":1766549228695,"dataChange":false,"stats":"{\"numRecords\":5120,\"minValues\":{\"subject\":\"cdm_prot_001ba1b3-667c-5321-9298\",\"object\":\"ChEBI:CHEBI:10986\",\"predicate\":\"catalyzes\",\"evidence_type\":\"ECO:0000250\"},\"maxValues\":{\"subject\":\"cdm_prot_fffe9d9f-5871-5041-83af\",\"object\":\"Rhea:RHEA:78975\",\"predicate\":\"requires_cofactor\",\"evidence_type\":\"ECO:0000305\"},\"nullCount\":{\"subject\":0,\"object\":0,\"predicate\":0,\"evidence_type\":2149,\"supporting_objects\":2323,\"publications\":4950}}"},{"path":"part-00003-c585c50d-d510-403f-b1af-f2590b82ec02-c000.snappy.parquet","partitionValues":{},"size":23570,"modificationTime":1766549235346,"dataChange":false,"stats":"{\"numRecords\":2048,\"minValues\":{\"subject\":\"cdm_prot_00fd49c4-bf85-5245-b2b6\",\"object\":\"ChEBI:CHEBI:140395\",\"predicate\":\"catalyzes\",\"evidence_type\":\"ECO:0000250\"},\"maxValues\":{\"subject\":\"cdm_prot_ff765eaf-0779-58e3-9069\",\"object\":\"Rhea:RHEA:58012\",\"predicate\":\"requires_cofactor\",\"evidence_type\":\"ECO:0000269\"},\"nullCount\":{\"subject\":0,\"object\":0,\"predicate\":0,\"evidence_type\":712,\"supporting_objects\":729,\"publications\":2038}}"},{"path":"part-00003-650a8597-77a2-4e0d-bed4-a45a77799938-c000.snappy.parquet","partitionValues":{},"size":36912,"modificationTime":1766549228706,"dataChange":false,"stats":"{\"numRecords\":5120,\"minValues\":{\"subject\":\"cdm_prot_000c7e85-7dff-527d-b6db\",\"object\":\"ChEBI:CHEBI:131766\",\"predicate\":\"catalyzes\",\"evidence_type\":\"ECO:0000250\"},\"maxValues\":{\"subject\":\"cdm_prot_ffb46e82-abfa-5122-818f\",\"object\":\"Rhea:RHEA:66528\",\"predicate\":\"requires_cofactor\",\"evidence_type\":\"ECO:0000305\"},\"nullCount\":{\"subject\":0,\"object\":0,\"predicate\":0,\"evidence_type\":1514,\"supporting_objects\":1865,\"publications\":4769}}"},{"path":"part-00004-a557f9be-45fe-430d-820c-67c6e98e062b-c000.snappy.parquet","partitionValues":{},"size":36062,"modificationTime":1766549228702,"dataChange":false,"stats":"{\"numRecords\":5120,\"minValues\":{\"subject\":\"cdm_prot_00925b61-fa22-5bcf-9dea\",\"object\":\"ChEBI:CHEBI:132124\",\"predicate\":\"catalyzes\",\"evidence_type\":\"ECO:0000250\"},\"maxValues\":{\"subject\":\"cdm_prot_ffc892c3-0651-5e3c-998a\",\"object\":\"Rhea:RHEA:57720\",\"predicate\":\"requires_cofactor\",\"evidence_type\":\"ECO:0000305\"},\"nullCount\":{\"subject\":0,\"object\":0,\"predicate\":0,\"evidence_type\":1622,\"supporting_objects\":1866,\"publications\":4876}}"},{"path":"part-00003-52d8b672-b288-4bcb-ab63-bb57677a1ecd-c000.snappy.parquet","partitionValues":{},"size":36564,"modificationTime":1766549220759,"dataChange":false,"stats":"{\"numRecords\":5120,\"minValues\":{\"subject\":\"cdm_prot_004f59e4-6b11-58ae-a6ff\",\"object\":\"ChEBI:CHEBI:10329\",\"predicate\":\"catalyzes\",\"evidence_type\":\"ECO:0000250\"},\"maxValues\":{\"subject\":\"cdm_prot_ff2dba94-2092-57d0-9041\",\"object\":\"Rhea:RHEA:63444\",\"predicate\":\"requires_cofactor\",\"evidence_type\":\"ECO:0000305\"},\"nullCount\":{\"subject\":0,\"object\":0,\"predicate\":0,\"evidence_type\":1242,\"supporting_objects\":1515,\"publications\":4855}}"},{"path":"part-00005-f48959c1-8814-4930-9482-909e87471c09-c000.snappy.parquet","partitionValues":{},"size":37959,"modificationTime":1766549228670,"dataChange":false,"stats":"{\"numRecords\":5120,\"minValues\":{\"subject\":\"cdm_prot_007c34e9-4de0-5736-985f\",\"object\":\"ChEBI:CHEBI:11561\",\"predicate\":\"catalyzes\",\"evidence_type\":\"ECO:0000250\"},\"maxValues\":{\"subject\":\"cdm_prot_ffdecc1a-6ee9-59b2-9c49\",\"object\":\"Rhea:RHEA:75175\",\"predicate\":\"requires_cofactor\",\"evidence_type\":\"ECO:0000305\"},\"nullCount\":{\"subject\":0,\"object\":0,\"predicate\":0,\"evidence_type\":1831,\"supporting_objects\":2191,\"publications\":4760}}"},{"path":"part-00002-33e36f42-b91d-4741-9371-d0a2b01265ea-c000.snappy.parquet","partitionValues":{},"size":28250,"modificationTime":1766549220747,"dataChange":false,"stats":"{\"numRecords\":4096,\"minValues\":{\"subject\":\"cdm_prot_00a96f35-f63b-5933-9e50\",\"object\":\"ChEBI:CHEBI:13193\",\"predicate\":\"catalyzes\",\"evidence_type\":\"ECO:0000250\"},\"maxValues\":{\"subject\":\"cdm_prot_fd944ff3-10f0-5e2f-95de\",\"object\":\"Rhea:RHEA:76235\",\"predicate\":\"requires_cofactor\",\"evidence_type\":\"ECO:0000269\"},\"nullCount\":{\"subject\":0,\"object\":0,\"predicate\":0,\"evidence_type\":967,\"supporting_objects\":1065,\"publications\":3998}}"},{"path":"part-00000-6e2a19e7-36e4-48c7-a949-9a97054f83e1-c000.snappy.parquet","partitionValues":{},"size":45924,"modificationTime":1766549220742,"dataChange":false,"stats":"{\"numRecords\":4096,\"minValues\":{\"subject\":\"cdm_prot_00036120-577c-5bbc-ab91\",\"object\":\"ChEBI:CHEBI:13193\",\"predicate\":\"catalyzes\",\"evidence_type\":\"ECO:0000250\"},\"maxValues\":{\"subject\":\"cdm_prot_ffed2e1d-fe14-5556-b067\",\"object\":\"Rhea:RHEA:79571\",\"predicate\":\"requires_cofactor\",\"evidence_type\":\"ECO:0000305\"},\"nullCount\":{\"subject\":0,\"object\":0,\"predicate\":0,\"evidence_type\":1628,\"supporting_objects\":2037,\"publications\":3687}}"},{"path":"part-00002-4af0fd89-1486-400d-b069-65d0176b17dc-c000.snappy.parquet","partitionValues":{},"size":69154,"modificationTime":1766549235345,"dataChange":false,"stats":"{\"numRecords\":2048,\"minValues\":{\"subject\":\"cdm_prot_00206b22-8b92-5222-ab7c\",\"object\":\"ChEBI:CHEBI:15377\",\"predicate\":\"catalyzes\",\"evidence_type\":\"ECO:0000255\"},\"maxValues\":{\"subject\":\"cdm_prot_ffede36e-7e42-5dcc-abde\",\"object\":\"Rhea:RHEA:42720\",\"predicate\":\"requires_cofactor\",\"evidence_type\":\"ECO:0000255\"},\"nullCount\":{\"subject\":0,\"object\":0,\"predicate\":0,\"evidence_type\":1825,\"supporting_objects\":1825,\"publications\":2048}}"},{"path":"part-00002-d6af9717-c0ef-4ef3-93bf-17593ea5ff3d-c000.snappy.parquet","partitionValues":{},"size":32281,"modificationTime":1766549228708,"dataChange":false,"stats":"{\"numRecords\":5120,\"minValues\":{\"subject\":\"cdm_prot_00546ee8-4f73-5c85-8b07\",\"object\":\"ChEBI:CHEBI:1178\",\"predicate\":\"catalyzes\",\"evidence_type\":\"ECO:0000250\"},\"maxValues\":{\"subject\":\"cdm_prot_ffce40a0-b890-5e5f-b455\",\"object\":\"Rhea:RHEA:69496\",\"predicate\":\"requires_cofactor\",\"evidence_type\":\"ECO:0000305\"},\"nullCount\":{\"subject\":0,\"object\":0,\"predicate\":0,\"evidence_type\":1407,\"supporting_objects\":1689,\"publications\":4845}}"},{"path":"part-00006-43d069bc-67ee-4b6b-a938-17447f9f6397-c000.snappy.parquet","partitionValues":{},"size":22319,"modificationTime":1766549235346,"dataChange":false,"stats":"{\"numRecords\":2048,\"minValues\":{\"subject\":\"cdm_prot_0030b85f-aa5a-55dd-a0cc\",\"object\":\"ChEBI:CHEBI:13193\",\"predicate\":\"catalyzes\",\"evidence_type\":\"ECO:0000250\"},\"maxValues\":{\"subject\":\"cdm_prot_ff606768-2e8f-543d-abc5\",\"object\":\"Rhea:RHEA:65944\",\"predicate\":\"requires_cofactor\",\"evidence_type\":\"ECO:0000305\"},\"nullCount\":{\"subject\":0,\"object\":0,\"predicate\":0,\"evidence_type\":754,\"supporting_objects\":922,\"publications\":1880}}"},{"path":"part-00000-fddfb436-dca2-49d9-a478-cec9a901a570-c000.snappy.parquet","partitionValues":{},"size":19594,"modificationTime":1766549235339,"dataChange":false,"stats":"{\"numRecords\":2048,\"minValues\":{\"subject\":\"cdm_prot_0132f56a-e32c-5d2c-a0df\",\"object\":\"ChEBI:CHEBI:13193\",\"predicate\":\"catalyzes\",\"evidence_type\":\"ECO:0000255\"},\"maxValues\":{\"subject\":\"cdm_prot_ffcedd0e-face-5ec6-8e0b\",\"object\":\"Rhea:RHEA:65836\",\"predicate\":\"requires_cofactor\",\"evidence_type\":\"ECO:0000269\"},\"nullCount\":{\"subject\":0,\"object\":0,\"predicate\":0,\"evidence_type\":721,\"supporting_objects\":808,\"publications\":1961}}"},{"path":"part-00007-393e6b77-99d0-4bf4-9b62-944af8f3f649-c000.snappy.parquet","partitionValues":{},"size":15207,"modificationTime":1766549235346,"dataChange":false,"stats":"{\"numRecords\":1966,\"minValues\":{\"subject\":\"cdm_prot_00019029-8c98-5477-9dd1\",\"object\":\"ChEBI:CHEBI:13193\",\"predicate\":\"catalyzes\",\"evidence_type\":\"ECO:0000250\"},\"maxValues\":{\"subject\":\"cdm_prot_ff00247f-b12a-5321-b6c1\",\"object\":\"Rhea:RHEA:43892\",\"predicate\":\"requires_cofactor\",\"evidence_type\":\"ECO:0000269\"},\"nullCount\":{\"subject\":0,\"object\":0,\"predicate\":0,\"evidence_type\":491,\"supporting_objects\":560,\"publications\":1897}}"},{"path":"part-00005-dad45051-3656-45f3-b45c-4c29e143aae9-c000.snappy.parquet","partitionValues":{},"size":45839,"modificationTime":1766549220741,"dataChange":false,"stats":"{\"numRecords\":4096,\"minValues\":{\"subject\":\"cdm_prot_00134cb8-e497-5d40-b662\",\"object\":\"ChEBI:CHEBI:133980\",\"predicate\":\"catalyzes\",\"evidence_type\":\"ECO:0000250\"},\"maxValues\":{\"subject\":\"cdm_prot_ffeae0f1-9785-5eaf-ad23\",\"object\":\"Rhea:RHEA:78943\",\"predicate\":\"requires_cofactor\",\"evidence_type\":\"ECO:0000305\"},\"nullCount\":{\"subject\":0,\"object\":0,\"predicate\":0,\"evidence_type\":1503,\"supporting_objects\":1884,\"publications\":3841}}"}]} diff --git a/tests/data/uniprot_archaea/uniprot_db/associations/_delta_log/00000000000000000003.json b/tests/data/uniprot_archaea/uniprot_db/associations/_delta_log/00000000000000000003.json new file mode 100644 index 0000000..8cadb7b --- /dev/null +++ b/tests/data/uniprot_archaea/uniprot_db/associations/_delta_log/00000000000000000003.json @@ -0,0 +1,9 @@ +{"commitInfo":{"timestamp":1766549235350,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":2,"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{"numFiles":"8","numOutputRows":"16302","numOutputBytes":"246707"},"engineInfo":"Apache-Spark/4.0.1 Delta-Lake/4.0.0","txnId":"ae218ef0-5c8a-4b7c-81ec-e8140bee22fa"}} +{"add":{"path":"part-00000-fddfb436-dca2-49d9-a478-cec9a901a570-c000.snappy.parquet","partitionValues":{},"size":19594,"modificationTime":1766549235339,"dataChange":true,"stats":"{\"numRecords\":2048,\"minValues\":{\"subject\":\"cdm_prot_0132f56a-e32c-5d2c-a0df\",\"object\":\"ChEBI:CHEBI:13193\",\"predicate\":\"catalyzes\",\"evidence_type\":\"ECO:0000255\"},\"maxValues\":{\"subject\":\"cdm_prot_ffcedd0e-face-5ec6-8e0b\",\"object\":\"Rhea:RHEA:65836\",\"predicate\":\"requires_cofactor\",\"evidence_type\":\"ECO:0000269\"},\"nullCount\":{\"subject\":0,\"object\":0,\"predicate\":0,\"evidence_type\":721,\"supporting_objects\":808,\"publications\":1961}}"}} +{"add":{"path":"part-00001-769f28af-12f9-4406-a951-2d78e45241c1-c000.snappy.parquet","partitionValues":{},"size":21772,"modificationTime":1766549235347,"dataChange":true,"stats":"{\"numRecords\":2048,\"minValues\":{\"subject\":\"cdm_prot_006f3d67-56f0-5829-b099\",\"object\":\"ChEBI:CHEBI:15361\",\"predicate\":\"catalyzes\",\"evidence_type\":\"ECO:0000250\"},\"maxValues\":{\"subject\":\"cdm_prot_fea2d6f2-bbe4-51c7-afbe\",\"object\":\"Rhea:RHEA:48556\",\"predicate\":\"requires_cofactor\",\"evidence_type\":\"ECO:0000269\"},\"nullCount\":{\"subject\":0,\"object\":0,\"predicate\":0,\"evidence_type\":963,\"supporting_objects\":1044,\"publications\":1967}}"}} +{"add":{"path":"part-00002-4af0fd89-1486-400d-b069-65d0176b17dc-c000.snappy.parquet","partitionValues":{},"size":69154,"modificationTime":1766549235345,"dataChange":true,"stats":"{\"numRecords\":2048,\"minValues\":{\"subject\":\"cdm_prot_00206b22-8b92-5222-ab7c\",\"object\":\"ChEBI:CHEBI:15377\",\"predicate\":\"catalyzes\",\"evidence_type\":\"ECO:0000255\"},\"maxValues\":{\"subject\":\"cdm_prot_ffede36e-7e42-5dcc-abde\",\"object\":\"Rhea:RHEA:42720\",\"predicate\":\"requires_cofactor\",\"evidence_type\":\"ECO:0000255\"},\"nullCount\":{\"subject\":0,\"object\":0,\"predicate\":0,\"evidence_type\":1825,\"supporting_objects\":1825,\"publications\":2048}}"}} +{"add":{"path":"part-00003-c585c50d-d510-403f-b1af-f2590b82ec02-c000.snappy.parquet","partitionValues":{},"size":23570,"modificationTime":1766549235346,"dataChange":true,"stats":"{\"numRecords\":2048,\"minValues\":{\"subject\":\"cdm_prot_00fd49c4-bf85-5245-b2b6\",\"object\":\"ChEBI:CHEBI:140395\",\"predicate\":\"catalyzes\",\"evidence_type\":\"ECO:0000250\"},\"maxValues\":{\"subject\":\"cdm_prot_ff765eaf-0779-58e3-9069\",\"object\":\"Rhea:RHEA:58012\",\"predicate\":\"requires_cofactor\",\"evidence_type\":\"ECO:0000269\"},\"nullCount\":{\"subject\":0,\"object\":0,\"predicate\":0,\"evidence_type\":712,\"supporting_objects\":729,\"publications\":2038}}"}} +{"add":{"path":"part-00004-cfdc4727-b5b0-4c31-84c4-3ed85cc9f1c9-c000.snappy.parquet","partitionValues":{},"size":24338,"modificationTime":1766549235347,"dataChange":true,"stats":"{\"numRecords\":2048,\"minValues\":{\"subject\":\"cdm_prot_005318fc-0c32-5b4a-b952\",\"object\":\"ChEBI:CHEBI:140395\",\"predicate\":\"catalyzes\",\"evidence_type\":\"ECO:0000250\"},\"maxValues\":{\"subject\":\"cdm_prot_ffbd3905-3836-5893-bd75\",\"object\":\"Rhea:RHEA:21248\",\"predicate\":\"requires_cofactor\",\"evidence_type\":\"ECO:0000305\"},\"nullCount\":{\"subject\":0,\"object\":0,\"predicate\":0,\"evidence_type\":633,\"supporting_objects\":647,\"publications\":2041}}"}} +{"add":{"path":"part-00005-d77d92c3-e227-463b-9cf7-50341160d938-c000.snappy.parquet","partitionValues":{},"size":50753,"modificationTime":1766549235318,"dataChange":true,"stats":"{\"numRecords\":2048,\"minValues\":{\"subject\":\"cdm_prot_0013ba57-fc41-51cc-bc95\",\"object\":\"ChEBI:CHEBI:138284\",\"predicate\":\"catalyzes\",\"evidence_type\":\"ECO:0000250\"},\"maxValues\":{\"subject\":\"cdm_prot_ffe27896-0bd4-50b4-9cff\",\"object\":\"Rhea:RHEA:77883\",\"predicate\":\"requires_cofactor\",\"evidence_type\":\"ECO:0000269\"},\"nullCount\":{\"subject\":0,\"object\":0,\"predicate\":0,\"evidence_type\":1410,\"supporting_objects\":1476,\"publications\":1982}}"}} +{"add":{"path":"part-00006-43d069bc-67ee-4b6b-a938-17447f9f6397-c000.snappy.parquet","partitionValues":{},"size":22319,"modificationTime":1766549235346,"dataChange":true,"stats":"{\"numRecords\":2048,\"minValues\":{\"subject\":\"cdm_prot_0030b85f-aa5a-55dd-a0cc\",\"object\":\"ChEBI:CHEBI:13193\",\"predicate\":\"catalyzes\",\"evidence_type\":\"ECO:0000250\"},\"maxValues\":{\"subject\":\"cdm_prot_ff606768-2e8f-543d-abc5\",\"object\":\"Rhea:RHEA:65944\",\"predicate\":\"requires_cofactor\",\"evidence_type\":\"ECO:0000305\"},\"nullCount\":{\"subject\":0,\"object\":0,\"predicate\":0,\"evidence_type\":754,\"supporting_objects\":922,\"publications\":1880}}"}} +{"add":{"path":"part-00007-393e6b77-99d0-4bf4-9b62-944af8f3f649-c000.snappy.parquet","partitionValues":{},"size":15207,"modificationTime":1766549235346,"dataChange":true,"stats":"{\"numRecords\":1966,\"minValues\":{\"subject\":\"cdm_prot_00019029-8c98-5477-9dd1\",\"object\":\"ChEBI:CHEBI:13193\",\"predicate\":\"catalyzes\",\"evidence_type\":\"ECO:0000250\"},\"maxValues\":{\"subject\":\"cdm_prot_ff00247f-b12a-5321-b6c1\",\"object\":\"Rhea:RHEA:43892\",\"predicate\":\"requires_cofactor\",\"evidence_type\":\"ECO:0000269\"},\"nullCount\":{\"subject\":0,\"object\":0,\"predicate\":0,\"evidence_type\":491,\"supporting_objects\":560,\"publications\":1897}}"}} diff --git a/tests/data/uniprot_archaea/uniprot_db/associations/_delta_log/00000000000000000004.crc b/tests/data/uniprot_archaea/uniprot_db/associations/_delta_log/00000000000000000004.crc new file mode 100644 index 0000000..bc8219c --- /dev/null +++ b/tests/data/uniprot_archaea/uniprot_db/associations/_delta_log/00000000000000000004.crc @@ -0,0 +1 @@ +{"txnId":"17877cf5-20b3-49c5-854e-d9648ab48600","tableSizeBytes":1079391,"numFiles":32,"numMetadata":1,"numProtocol":1,"setTransactions":[],"domainMetadata":[],"metadata":{"id":"ad248292-a815-411b-9154-113330cf7ea8","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"subject\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"object\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"predicate\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"evidence_type\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"supporting_objects\",\"type\":{\"type\":\"array\",\"elementType\":\"string\",\"containsNull\":true},\"nullable\":true,\"metadata\":{}},{\"name\":\"publications\",\"type\":{\"type\":\"array\",\"elementType\":\"string\",\"containsNull\":true},\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{},"createdTime":1766549214137},"protocol":{"minReaderVersion":1,"minWriterVersion":2},"allFiles":[{"path":"part-00007-db1c7914-39dd-4ff4-83c0-e259ba58d1b7-c000.snappy.parquet","partitionValues":{},"size":26421,"modificationTime":1766549228703,"dataChange":false,"stats":"{\"numRecords\":4702,\"minValues\":{\"subject\":\"cdm_prot_00504a00-27c9-551f-b27f\",\"object\":\"ChEBI:CHEBI:132124\",\"predicate\":\"catalyzes\",\"evidence_type\":\"ECO:0000250\"},\"maxValues\":{\"subject\":\"cdm_prot_ff86ebef-56b7-5cf7-bf3e\",\"object\":\"Rhea:RHEA:79083\",\"predicate\":\"requires_cofactor\",\"evidence_type\":\"ECO:0000305\"},\"nullCount\":{\"subject\":0,\"object\":0,\"predicate\":0,\"evidence_type\":896,\"supporting_objects\":950,\"publications\":4648}}"},{"path":"part-00000-5c7434bb-30e3-45f8-b638-415de5c389a1-c000.snappy.parquet","partitionValues":{},"size":28874,"modificationTime":1766549228694,"dataChange":false,"stats":"{\"numRecords\":4096,\"minValues\":{\"subject\":\"cdm_prot_00c31285-6721-55e0-855a\",\"object\":\"ChEBI:CHEBI:128769\",\"predicate\":\"catalyzes\",\"evidence_type\":\"ECO:0000250\"},\"maxValues\":{\"subject\":\"cdm_prot_ff289ac2-6f01-59a2-8290\",\"object\":\"Rhea:RHEA:68420\",\"predicate\":\"requires_cofactor\",\"evidence_type\":\"ECO:0000305\"},\"nullCount\":{\"subject\":0,\"object\":0,\"predicate\":0,\"evidence_type\":1286,\"supporting_objects\":1677,\"publications\":3705}}"},{"path":"part-00005-d77d92c3-e227-463b-9cf7-50341160d938-c000.snappy.parquet","partitionValues":{},"size":50753,"modificationTime":1766549235318,"dataChange":false,"stats":"{\"numRecords\":2048,\"minValues\":{\"subject\":\"cdm_prot_0013ba57-fc41-51cc-bc95\",\"object\":\"ChEBI:CHEBI:138284\",\"predicate\":\"catalyzes\",\"evidence_type\":\"ECO:0000250\"},\"maxValues\":{\"subject\":\"cdm_prot_ffe27896-0bd4-50b4-9cff\",\"object\":\"Rhea:RHEA:77883\",\"predicate\":\"requires_cofactor\",\"evidence_type\":\"ECO:0000269\"},\"nullCount\":{\"subject\":0,\"object\":0,\"predicate\":0,\"evidence_type\":1410,\"supporting_objects\":1476,\"publications\":1982}}"},{"path":"part-00000-c4fcd4fc-431e-4ad1-8624-792e6203933b-c000.snappy.parquet","partitionValues":{},"size":11536,"modificationTime":1766549241094,"dataChange":false,"stats":"{\"numRecords\":2048,\"minValues\":{\"subject\":\"cdm_prot_0003b7cb-bf13-5646-9978\",\"object\":\"ChEBI:CHEBI:15378\",\"predicate\":\"catalyzes\",\"evidence_type\":\"ECO:0000255\"},\"maxValues\":{\"subject\":\"cdm_prot_fdc49789-52de-5a6c-95cc\",\"object\":\"Rhea:RHEA:23540\",\"predicate\":\"requires_cofactor\",\"evidence_type\":\"ECO:0000255\"},\"nullCount\":{\"subject\":0,\"object\":0,\"predicate\":0,\"evidence_type\":336,\"supporting_objects\":336,\"publications\":2048}}"},{"path":"part-00004-cfdc4727-b5b0-4c31-84c4-3ed85cc9f1c9-c000.snappy.parquet","partitionValues":{},"size":24338,"modificationTime":1766549235347,"dataChange":false,"stats":"{\"numRecords\":2048,\"minValues\":{\"subject\":\"cdm_prot_005318fc-0c32-5b4a-b952\",\"object\":\"ChEBI:CHEBI:140395\",\"predicate\":\"catalyzes\",\"evidence_type\":\"ECO:0000250\"},\"maxValues\":{\"subject\":\"cdm_prot_ffbd3905-3836-5893-bd75\",\"object\":\"Rhea:RHEA:21248\",\"predicate\":\"requires_cofactor\",\"evidence_type\":\"ECO:0000305\"},\"nullCount\":{\"subject\":0,\"object\":0,\"predicate\":0,\"evidence_type\":633,\"supporting_objects\":647,\"publications\":2041}}"},{"path":"part-00001-d0cc9108-298b-44b4-80e4-8ecf39f2926d-c000.snappy.parquet","partitionValues":{},"size":40388,"modificationTime":1766549220757,"dataChange":false,"stats":"{\"numRecords\":5120,\"minValues\":{\"subject\":\"cdm_prot_00030730-9147-5351-b68c\",\"object\":\"ChEBI:CHEBI:131803\",\"predicate\":\"catalyzes\",\"evidence_type\":\"ECO:0000250\"},\"maxValues\":{\"subject\":\"cdm_prot_ff4d0139-f3df-510c-a663\",\"object\":\"Rhea:RHEA:80651\",\"predicate\":\"requires_cofactor\",\"evidence_type\":\"ECO:0000305\"},\"nullCount\":{\"subject\":0,\"object\":0,\"predicate\":0,\"evidence_type\":1311,\"supporting_objects\":1601,\"publications\":4830}}"},{"path":"part-00001-343540fa-6365-424a-94b6-16af6a4b2e63-c000.snappy.parquet","partitionValues":{},"size":16278,"modificationTime":1766549241100,"dataChange":false,"stats":"{\"numRecords\":3072,\"minValues\":{\"subject\":\"cdm_prot_02975fc1-bb75-5af0-80ae\",\"object\":\"ChEBI:CHEBI:15378\",\"predicate\":\"catalyzes\",\"evidence_type\":\"ECO:0000255\"},\"maxValues\":{\"subject\":\"cdm_prot_fbe63957-4fb2-5512-85ef\",\"object\":\"Rhea:RHEA:20792\",\"predicate\":\"requires_cofactor\",\"evidence_type\":\"ECO:0000255\"},\"nullCount\":{\"subject\":0,\"object\":0,\"predicate\":0,\"evidence_type\":669,\"supporting_objects\":669,\"publications\":3072}}"},{"path":"part-00003-8a0e9158-3bdc-4059-8261-4f34b384ad8b-c000.snappy.parquet","partitionValues":{},"size":24544,"modificationTime":1766549241087,"dataChange":false,"stats":"{\"numRecords\":3072,\"minValues\":{\"subject\":\"cdm_prot_01672ce0-33eb-50c0-8bb3\",\"object\":\"ChEBI:CHEBI:136979\",\"predicate\":\"catalyzes\",\"evidence_type\":\"ECO:0000250\"},\"maxValues\":{\"subject\":\"cdm_prot_fffe9aba-bce9-56de-b269\",\"object\":\"Rhea:RHEA:75299\",\"predicate\":\"requires_cofactor\",\"evidence_type\":\"ECO:0000303\"},\"nullCount\":{\"subject\":0,\"object\":0,\"predicate\":0,\"evidence_type\":695,\"supporting_objects\":762,\"publications\":3005}}"},{"path":"part-00001-a044609f-89b1-43da-8fad-c72c083b71f9-c000.snappy.parquet","partitionValues":{},"size":46717,"modificationTime":1766549228697,"dataChange":false,"stats":"{\"numRecords\":5120,\"minValues\":{\"subject\":\"cdm_prot_004539b1-91ae-513e-bef9\",\"object\":\"ChEBI:CHEBI:11851\",\"predicate\":\"catalyzes\",\"evidence_type\":\"ECO:0000250\"},\"maxValues\":{\"subject\":\"cdm_prot_ffa14ace-ea75-5cc6-86f2\",\"object\":\"Rhea:RHEA:67136\",\"predicate\":\"requires_cofactor\",\"evidence_type\":\"ECO:0000305\"},\"nullCount\":{\"subject\":0,\"object\":0,\"predicate\":0,\"evidence_type\":1974,\"supporting_objects\":2156,\"publications\":4938}}"},{"path":"part-00001-769f28af-12f9-4406-a951-2d78e45241c1-c000.snappy.parquet","partitionValues":{},"size":21772,"modificationTime":1766549235347,"dataChange":false,"stats":"{\"numRecords\":2048,\"minValues\":{\"subject\":\"cdm_prot_006f3d67-56f0-5829-b099\",\"object\":\"ChEBI:CHEBI:15361\",\"predicate\":\"catalyzes\",\"evidence_type\":\"ECO:0000250\"},\"maxValues\":{\"subject\":\"cdm_prot_fea2d6f2-bbe4-51c7-afbe\",\"object\":\"Rhea:RHEA:48556\",\"predicate\":\"requires_cofactor\",\"evidence_type\":\"ECO:0000269\"},\"nullCount\":{\"subject\":0,\"object\":0,\"predicate\":0,\"evidence_type\":963,\"supporting_objects\":1044,\"publications\":1967}}"},{"path":"part-00007-9e4ad55b-542a-44fb-8598-17a9f559aed7-c000.snappy.parquet","partitionValues":{},"size":35933,"modificationTime":1766549220751,"dataChange":false,"stats":"{\"numRecords\":5071,\"minValues\":{\"subject\":\"cdm_prot_00625fdf-0992-5594-a800\",\"object\":\"ChEBI:CHEBI:128769\",\"predicate\":\"catalyzes\",\"evidence_type\":\"ECO:0000250\"},\"maxValues\":{\"subject\":\"cdm_prot_ffcf5789-8bfa-5ca0-914e\",\"object\":\"Rhea:RHEA:84215\",\"predicate\":\"requires_cofactor\",\"evidence_type\":\"ECO:0000305\"},\"nullCount\":{\"subject\":0,\"object\":0,\"predicate\":0,\"evidence_type\":1518,\"supporting_objects\":1933,\"publications\":4681}}"},{"path":"part-00006-7631ae9a-9d2c-4334-a806-fca2b784b7b6-c000.snappy.parquet","partitionValues":{},"size":23051,"modificationTime":1766549220757,"dataChange":false,"stats":"{\"numRecords\":5120,\"minValues\":{\"subject\":\"cdm_prot_0094747f-6d42-5807-a9c1\",\"object\":\"ChEBI:CHEBI:12931\",\"predicate\":\"catalyzes\",\"evidence_type\":\"ECO:0000250\"},\"maxValues\":{\"subject\":\"cdm_prot_fe6c6c04-1430-5b94-935c\",\"object\":\"Rhea:RHEA:42760\",\"predicate\":\"requires_cofactor\",\"evidence_type\":\"ECO:0000269\"},\"nullCount\":{\"subject\":0,\"object\":0,\"predicate\":0,\"evidence_type\":1062,\"supporting_objects\":1159,\"publications\":5023}}"},{"path":"part-00004-bb84e201-0f62-4f40-b0a7-207c849f7a24-c000.snappy.parquet","partitionValues":{},"size":37737,"modificationTime":1766549220757,"dataChange":false,"stats":"{\"numRecords\":5120,\"minValues\":{\"subject\":\"cdm_prot_008bae16-5b42-5bfd-a15c\",\"object\":\"ChEBI:CHEBI:12040\",\"predicate\":\"catalyzes\",\"evidence_type\":\"ECO:0000250\"},\"maxValues\":{\"subject\":\"cdm_prot_fedf6b23-d61c-56dc-b256\",\"object\":\"Rhea:RHEA:62168\",\"predicate\":\"requires_cofactor\",\"evidence_type\":\"ECO:0000305\"},\"nullCount\":{\"subject\":0,\"object\":0,\"predicate\":0,\"evidence_type\":1707,\"supporting_objects\":1962,\"publications\":4871}}"},{"path":"part-00007-d719ea24-3a34-4eb5-8620-61e7b97d14b6-c000.snappy.parquet","partitionValues":{},"size":91789,"modificationTime":1766549241095,"dataChange":false,"stats":"{\"numRecords\":2537,\"minValues\":{\"subject\":\"cdm_prot_0048aaff-7f6b-5ca1-9274\",\"object\":\"ChEBI:CHEBI:138102\",\"predicate\":\"catalyzes\",\"evidence_type\":\"ECO:0000250\"},\"maxValues\":{\"subject\":\"cdm_prot_ffd6bc2d-9fdc-5fb9-a121\",\"object\":\"Rhea:RHEA:68792\",\"predicate\":\"requires_cofactor\",\"evidence_type\":\"ECO:0000305\"},\"nullCount\":{\"subject\":0,\"object\":0,\"predicate\":0,\"evidence_type\":2334,\"supporting_objects\":2364,\"publications\":2523}}"},{"path":"part-00006-2c4136d2-d3f6-47ea-a5f3-3d5590df1769-c000.snappy.parquet","partitionValues":{},"size":48103,"modificationTime":1766549228695,"dataChange":false,"stats":"{\"numRecords\":5120,\"minValues\":{\"subject\":\"cdm_prot_001ba1b3-667c-5321-9298\",\"object\":\"ChEBI:CHEBI:10986\",\"predicate\":\"catalyzes\",\"evidence_type\":\"ECO:0000250\"},\"maxValues\":{\"subject\":\"cdm_prot_fffe9d9f-5871-5041-83af\",\"object\":\"Rhea:RHEA:78975\",\"predicate\":\"requires_cofactor\",\"evidence_type\":\"ECO:0000305\"},\"nullCount\":{\"subject\":0,\"object\":0,\"predicate\":0,\"evidence_type\":2149,\"supporting_objects\":2323,\"publications\":4950}}"},{"path":"part-00003-c585c50d-d510-403f-b1af-f2590b82ec02-c000.snappy.parquet","partitionValues":{},"size":23570,"modificationTime":1766549235346,"dataChange":false,"stats":"{\"numRecords\":2048,\"minValues\":{\"subject\":\"cdm_prot_00fd49c4-bf85-5245-b2b6\",\"object\":\"ChEBI:CHEBI:140395\",\"predicate\":\"catalyzes\",\"evidence_type\":\"ECO:0000250\"},\"maxValues\":{\"subject\":\"cdm_prot_ff765eaf-0779-58e3-9069\",\"object\":\"Rhea:RHEA:58012\",\"predicate\":\"requires_cofactor\",\"evidence_type\":\"ECO:0000269\"},\"nullCount\":{\"subject\":0,\"object\":0,\"predicate\":0,\"evidence_type\":712,\"supporting_objects\":729,\"publications\":2038}}"},{"path":"part-00003-650a8597-77a2-4e0d-bed4-a45a77799938-c000.snappy.parquet","partitionValues":{},"size":36912,"modificationTime":1766549228706,"dataChange":false,"stats":"{\"numRecords\":5120,\"minValues\":{\"subject\":\"cdm_prot_000c7e85-7dff-527d-b6db\",\"object\":\"ChEBI:CHEBI:131766\",\"predicate\":\"catalyzes\",\"evidence_type\":\"ECO:0000250\"},\"maxValues\":{\"subject\":\"cdm_prot_ffb46e82-abfa-5122-818f\",\"object\":\"Rhea:RHEA:66528\",\"predicate\":\"requires_cofactor\",\"evidence_type\":\"ECO:0000305\"},\"nullCount\":{\"subject\":0,\"object\":0,\"predicate\":0,\"evidence_type\":1514,\"supporting_objects\":1865,\"publications\":4769}}"},{"path":"part-00004-a557f9be-45fe-430d-820c-67c6e98e062b-c000.snappy.parquet","partitionValues":{},"size":36062,"modificationTime":1766549228702,"dataChange":false,"stats":"{\"numRecords\":5120,\"minValues\":{\"subject\":\"cdm_prot_00925b61-fa22-5bcf-9dea\",\"object\":\"ChEBI:CHEBI:132124\",\"predicate\":\"catalyzes\",\"evidence_type\":\"ECO:0000250\"},\"maxValues\":{\"subject\":\"cdm_prot_ffc892c3-0651-5e3c-998a\",\"object\":\"Rhea:RHEA:57720\",\"predicate\":\"requires_cofactor\",\"evidence_type\":\"ECO:0000305\"},\"nullCount\":{\"subject\":0,\"object\":0,\"predicate\":0,\"evidence_type\":1622,\"supporting_objects\":1866,\"publications\":4876}}"},{"path":"part-00006-c892b9bb-3763-45a3-84b9-5bb7d0f8106f-c000.snappy.parquet","partitionValues":{},"size":44498,"modificationTime":1766549241087,"dataChange":false,"stats":"{\"numRecords\":3072,\"minValues\":{\"subject\":\"cdm_prot_0007879e-35c8-5917-ad11\",\"object\":\"ChEBI:CHEBI:11851\",\"predicate\":\"catalyzes\",\"evidence_type\":\"ECO:0000250\"},\"maxValues\":{\"subject\":\"cdm_prot_ffe66051-6a0a-55cc-81b2\",\"object\":\"Rhea:RHEA:66592\",\"predicate\":\"requires_cofactor\",\"evidence_type\":\"ECO:0000305\"},\"nullCount\":{\"subject\":0,\"object\":0,\"predicate\":0,\"evidence_type\":1372,\"supporting_objects\":1553,\"publications\":2891}}"},{"path":"part-00003-52d8b672-b288-4bcb-ab63-bb57677a1ecd-c000.snappy.parquet","partitionValues":{},"size":36564,"modificationTime":1766549220759,"dataChange":false,"stats":"{\"numRecords\":5120,\"minValues\":{\"subject\":\"cdm_prot_004f59e4-6b11-58ae-a6ff\",\"object\":\"ChEBI:CHEBI:10329\",\"predicate\":\"catalyzes\",\"evidence_type\":\"ECO:0000250\"},\"maxValues\":{\"subject\":\"cdm_prot_ff2dba94-2092-57d0-9041\",\"object\":\"Rhea:RHEA:63444\",\"predicate\":\"requires_cofactor\",\"evidence_type\":\"ECO:0000305\"},\"nullCount\":{\"subject\":0,\"object\":0,\"predicate\":0,\"evidence_type\":1242,\"supporting_objects\":1515,\"publications\":4855}}"},{"path":"part-00005-f48959c1-8814-4930-9482-909e87471c09-c000.snappy.parquet","partitionValues":{},"size":37959,"modificationTime":1766549228670,"dataChange":false,"stats":"{\"numRecords\":5120,\"minValues\":{\"subject\":\"cdm_prot_007c34e9-4de0-5736-985f\",\"object\":\"ChEBI:CHEBI:11561\",\"predicate\":\"catalyzes\",\"evidence_type\":\"ECO:0000250\"},\"maxValues\":{\"subject\":\"cdm_prot_ffdecc1a-6ee9-59b2-9c49\",\"object\":\"Rhea:RHEA:75175\",\"predicate\":\"requires_cofactor\",\"evidence_type\":\"ECO:0000305\"},\"nullCount\":{\"subject\":0,\"object\":0,\"predicate\":0,\"evidence_type\":1831,\"supporting_objects\":2191,\"publications\":4760}}"},{"path":"part-00004-f5bf9de0-2f61-4ef9-ae15-45201fe05ae7-c000.snappy.parquet","partitionValues":{},"size":22023,"modificationTime":1766549241090,"dataChange":false,"stats":"{\"numRecords\":3072,\"minValues\":{\"subject\":\"cdm_prot_0185e048-25c8-50dd-bfbf\",\"object\":\"ChEBI:CHEBI:13193\",\"predicate\":\"catalyzes\",\"evidence_type\":\"ECO:0000250\"},\"maxValues\":{\"subject\":\"cdm_prot_ffab7ab7-2b79-5388-8849\",\"object\":\"Rhea:RHEA:82211\",\"predicate\":\"requires_cofactor\",\"evidence_type\":\"ECO:0000305\"},\"nullCount\":{\"subject\":0,\"object\":0,\"predicate\":0,\"evidence_type\":713,\"supporting_objects\":963,\"publications\":2822}}"},{"path":"part-00002-33e36f42-b91d-4741-9371-d0a2b01265ea-c000.snappy.parquet","partitionValues":{},"size":28250,"modificationTime":1766549220747,"dataChange":false,"stats":"{\"numRecords\":4096,\"minValues\":{\"subject\":\"cdm_prot_00a96f35-f63b-5933-9e50\",\"object\":\"ChEBI:CHEBI:13193\",\"predicate\":\"catalyzes\",\"evidence_type\":\"ECO:0000250\"},\"maxValues\":{\"subject\":\"cdm_prot_fd944ff3-10f0-5e2f-95de\",\"object\":\"Rhea:RHEA:76235\",\"predicate\":\"requires_cofactor\",\"evidence_type\":\"ECO:0000269\"},\"nullCount\":{\"subject\":0,\"object\":0,\"predicate\":0,\"evidence_type\":967,\"supporting_objects\":1065,\"publications\":3998}}"},{"path":"part-00000-6e2a19e7-36e4-48c7-a949-9a97054f83e1-c000.snappy.parquet","partitionValues":{},"size":45924,"modificationTime":1766549220742,"dataChange":false,"stats":"{\"numRecords\":4096,\"minValues\":{\"subject\":\"cdm_prot_00036120-577c-5bbc-ab91\",\"object\":\"ChEBI:CHEBI:13193\",\"predicate\":\"catalyzes\",\"evidence_type\":\"ECO:0000250\"},\"maxValues\":{\"subject\":\"cdm_prot_ffed2e1d-fe14-5556-b067\",\"object\":\"Rhea:RHEA:79571\",\"predicate\":\"requires_cofactor\",\"evidence_type\":\"ECO:0000305\"},\"nullCount\":{\"subject\":0,\"object\":0,\"predicate\":0,\"evidence_type\":1628,\"supporting_objects\":2037,\"publications\":3687}}"},{"path":"part-00002-4af0fd89-1486-400d-b069-65d0176b17dc-c000.snappy.parquet","partitionValues":{},"size":69154,"modificationTime":1766549235345,"dataChange":false,"stats":"{\"numRecords\":2048,\"minValues\":{\"subject\":\"cdm_prot_00206b22-8b92-5222-ab7c\",\"object\":\"ChEBI:CHEBI:15377\",\"predicate\":\"catalyzes\",\"evidence_type\":\"ECO:0000255\"},\"maxValues\":{\"subject\":\"cdm_prot_ffede36e-7e42-5dcc-abde\",\"object\":\"Rhea:RHEA:42720\",\"predicate\":\"requires_cofactor\",\"evidence_type\":\"ECO:0000255\"},\"nullCount\":{\"subject\":0,\"object\":0,\"predicate\":0,\"evidence_type\":1825,\"supporting_objects\":1825,\"publications\":2048}}"},{"path":"part-00002-d6af9717-c0ef-4ef3-93bf-17593ea5ff3d-c000.snappy.parquet","partitionValues":{},"size":32281,"modificationTime":1766549228708,"dataChange":false,"stats":"{\"numRecords\":5120,\"minValues\":{\"subject\":\"cdm_prot_00546ee8-4f73-5c85-8b07\",\"object\":\"ChEBI:CHEBI:1178\",\"predicate\":\"catalyzes\",\"evidence_type\":\"ECO:0000250\"},\"maxValues\":{\"subject\":\"cdm_prot_ffce40a0-b890-5e5f-b455\",\"object\":\"Rhea:RHEA:69496\",\"predicate\":\"requires_cofactor\",\"evidence_type\":\"ECO:0000305\"},\"nullCount\":{\"subject\":0,\"object\":0,\"predicate\":0,\"evidence_type\":1407,\"supporting_objects\":1689,\"publications\":4845}}"},{"path":"part-00005-83e03c9c-6e40-47d8-a1e5-fbdcb0abafdb-c000.snappy.parquet","partitionValues":{},"size":21367,"modificationTime":1766549241088,"dataChange":false,"stats":"{\"numRecords\":3072,\"minValues\":{\"subject\":\"cdm_prot_0234d270-648d-5275-a005\",\"object\":\"ChEBI:CHEBI:15361\",\"predicate\":\"catalyzes\",\"evidence_type\":\"ECO:0000250\"},\"maxValues\":{\"subject\":\"cdm_prot_fe66808c-aab1-5f12-970e\",\"object\":\"Rhea:RHEA:55292\",\"predicate\":\"requires_cofactor\",\"evidence_type\":\"ECO:0000305\"},\"nullCount\":{\"subject\":0,\"object\":0,\"predicate\":0,\"evidence_type\":1061,\"supporting_objects\":1121,\"publications\":3012}}"},{"path":"part-00006-43d069bc-67ee-4b6b-a938-17447f9f6397-c000.snappy.parquet","partitionValues":{},"size":22319,"modificationTime":1766549235346,"dataChange":false,"stats":"{\"numRecords\":2048,\"minValues\":{\"subject\":\"cdm_prot_0030b85f-aa5a-55dd-a0cc\",\"object\":\"ChEBI:CHEBI:13193\",\"predicate\":\"catalyzes\",\"evidence_type\":\"ECO:0000250\"},\"maxValues\":{\"subject\":\"cdm_prot_ff606768-2e8f-543d-abc5\",\"object\":\"Rhea:RHEA:65944\",\"predicate\":\"requires_cofactor\",\"evidence_type\":\"ECO:0000305\"},\"nullCount\":{\"subject\":0,\"object\":0,\"predicate\":0,\"evidence_type\":754,\"supporting_objects\":922,\"publications\":1880}}"},{"path":"part-00000-fddfb436-dca2-49d9-a478-cec9a901a570-c000.snappy.parquet","partitionValues":{},"size":19594,"modificationTime":1766549235339,"dataChange":false,"stats":"{\"numRecords\":2048,\"minValues\":{\"subject\":\"cdm_prot_0132f56a-e32c-5d2c-a0df\",\"object\":\"ChEBI:CHEBI:13193\",\"predicate\":\"catalyzes\",\"evidence_type\":\"ECO:0000255\"},\"maxValues\":{\"subject\":\"cdm_prot_ffcedd0e-face-5ec6-8e0b\",\"object\":\"Rhea:RHEA:65836\",\"predicate\":\"requires_cofactor\",\"evidence_type\":\"ECO:0000269\"},\"nullCount\":{\"subject\":0,\"object\":0,\"predicate\":0,\"evidence_type\":721,\"supporting_objects\":808,\"publications\":1961}}"},{"path":"part-00007-393e6b77-99d0-4bf4-9b62-944af8f3f649-c000.snappy.parquet","partitionValues":{},"size":15207,"modificationTime":1766549235346,"dataChange":false,"stats":"{\"numRecords\":1966,\"minValues\":{\"subject\":\"cdm_prot_00019029-8c98-5477-9dd1\",\"object\":\"ChEBI:CHEBI:13193\",\"predicate\":\"catalyzes\",\"evidence_type\":\"ECO:0000250\"},\"maxValues\":{\"subject\":\"cdm_prot_ff00247f-b12a-5321-b6c1\",\"object\":\"Rhea:RHEA:43892\",\"predicate\":\"requires_cofactor\",\"evidence_type\":\"ECO:0000269\"},\"nullCount\":{\"subject\":0,\"object\":0,\"predicate\":0,\"evidence_type\":491,\"supporting_objects\":560,\"publications\":1897}}"},{"path":"part-00002-60a2f084-b993-4c4c-81bc-4893081c8ace-c000.snappy.parquet","partitionValues":{},"size":13634,"modificationTime":1766549241086,"dataChange":false,"stats":"{\"numRecords\":3072,\"minValues\":{\"subject\":\"cdm_prot_01c4c107-d76f-540b-a701\",\"object\":\"ChEBI:CHEBI:15378\",\"predicate\":\"catalyzes\",\"evidence_type\":\"ECO:0000250\"},\"maxValues\":{\"subject\":\"cdm_prot_fef0eb9f-57f2-532d-b5b2\",\"object\":\"Rhea:RHEA:42580\",\"predicate\":\"requires_cofactor\",\"evidence_type\":\"ECO:0000255\"},\"nullCount\":{\"subject\":0,\"object\":0,\"predicate\":0,\"evidence_type\":518,\"supporting_objects\":518,\"publications\":3072}}"},{"path":"part-00005-dad45051-3656-45f3-b45c-4c29e143aae9-c000.snappy.parquet","partitionValues":{},"size":45839,"modificationTime":1766549220741,"dataChange":false,"stats":"{\"numRecords\":4096,\"minValues\":{\"subject\":\"cdm_prot_00134cb8-e497-5d40-b662\",\"object\":\"ChEBI:CHEBI:133980\",\"predicate\":\"catalyzes\",\"evidence_type\":\"ECO:0000250\"},\"maxValues\":{\"subject\":\"cdm_prot_ffeae0f1-9785-5eaf-ad23\",\"object\":\"Rhea:RHEA:78943\",\"predicate\":\"requires_cofactor\",\"evidence_type\":\"ECO:0000305\"},\"nullCount\":{\"subject\":0,\"object\":0,\"predicate\":0,\"evidence_type\":1503,\"supporting_objects\":1884,\"publications\":3841}}"}]} diff --git a/tests/data/uniprot_archaea/uniprot_db/associations/_delta_log/00000000000000000004.json b/tests/data/uniprot_archaea/uniprot_db/associations/_delta_log/00000000000000000004.json new file mode 100644 index 0000000..0fa5768 --- /dev/null +++ b/tests/data/uniprot_archaea/uniprot_db/associations/_delta_log/00000000000000000004.json @@ -0,0 +1,9 @@ +{"commitInfo":{"timestamp":1766549241102,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":3,"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{"numFiles":"8","numOutputRows":"23017","numOutputBytes":"245669"},"engineInfo":"Apache-Spark/4.0.1 Delta-Lake/4.0.0","txnId":"17877cf5-20b3-49c5-854e-d9648ab48600"}} +{"add":{"path":"part-00000-c4fcd4fc-431e-4ad1-8624-792e6203933b-c000.snappy.parquet","partitionValues":{},"size":11536,"modificationTime":1766549241094,"dataChange":true,"stats":"{\"numRecords\":2048,\"minValues\":{\"subject\":\"cdm_prot_0003b7cb-bf13-5646-9978\",\"object\":\"ChEBI:CHEBI:15378\",\"predicate\":\"catalyzes\",\"evidence_type\":\"ECO:0000255\"},\"maxValues\":{\"subject\":\"cdm_prot_fdc49789-52de-5a6c-95cc\",\"object\":\"Rhea:RHEA:23540\",\"predicate\":\"requires_cofactor\",\"evidence_type\":\"ECO:0000255\"},\"nullCount\":{\"subject\":0,\"object\":0,\"predicate\":0,\"evidence_type\":336,\"supporting_objects\":336,\"publications\":2048}}"}} +{"add":{"path":"part-00001-343540fa-6365-424a-94b6-16af6a4b2e63-c000.snappy.parquet","partitionValues":{},"size":16278,"modificationTime":1766549241100,"dataChange":true,"stats":"{\"numRecords\":3072,\"minValues\":{\"subject\":\"cdm_prot_02975fc1-bb75-5af0-80ae\",\"object\":\"ChEBI:CHEBI:15378\",\"predicate\":\"catalyzes\",\"evidence_type\":\"ECO:0000255\"},\"maxValues\":{\"subject\":\"cdm_prot_fbe63957-4fb2-5512-85ef\",\"object\":\"Rhea:RHEA:20792\",\"predicate\":\"requires_cofactor\",\"evidence_type\":\"ECO:0000255\"},\"nullCount\":{\"subject\":0,\"object\":0,\"predicate\":0,\"evidence_type\":669,\"supporting_objects\":669,\"publications\":3072}}"}} +{"add":{"path":"part-00002-60a2f084-b993-4c4c-81bc-4893081c8ace-c000.snappy.parquet","partitionValues":{},"size":13634,"modificationTime":1766549241086,"dataChange":true,"stats":"{\"numRecords\":3072,\"minValues\":{\"subject\":\"cdm_prot_01c4c107-d76f-540b-a701\",\"object\":\"ChEBI:CHEBI:15378\",\"predicate\":\"catalyzes\",\"evidence_type\":\"ECO:0000250\"},\"maxValues\":{\"subject\":\"cdm_prot_fef0eb9f-57f2-532d-b5b2\",\"object\":\"Rhea:RHEA:42580\",\"predicate\":\"requires_cofactor\",\"evidence_type\":\"ECO:0000255\"},\"nullCount\":{\"subject\":0,\"object\":0,\"predicate\":0,\"evidence_type\":518,\"supporting_objects\":518,\"publications\":3072}}"}} +{"add":{"path":"part-00003-8a0e9158-3bdc-4059-8261-4f34b384ad8b-c000.snappy.parquet","partitionValues":{},"size":24544,"modificationTime":1766549241087,"dataChange":true,"stats":"{\"numRecords\":3072,\"minValues\":{\"subject\":\"cdm_prot_01672ce0-33eb-50c0-8bb3\",\"object\":\"ChEBI:CHEBI:136979\",\"predicate\":\"catalyzes\",\"evidence_type\":\"ECO:0000250\"},\"maxValues\":{\"subject\":\"cdm_prot_fffe9aba-bce9-56de-b269\",\"object\":\"Rhea:RHEA:75299\",\"predicate\":\"requires_cofactor\",\"evidence_type\":\"ECO:0000303\"},\"nullCount\":{\"subject\":0,\"object\":0,\"predicate\":0,\"evidence_type\":695,\"supporting_objects\":762,\"publications\":3005}}"}} +{"add":{"path":"part-00004-f5bf9de0-2f61-4ef9-ae15-45201fe05ae7-c000.snappy.parquet","partitionValues":{},"size":22023,"modificationTime":1766549241090,"dataChange":true,"stats":"{\"numRecords\":3072,\"minValues\":{\"subject\":\"cdm_prot_0185e048-25c8-50dd-bfbf\",\"object\":\"ChEBI:CHEBI:13193\",\"predicate\":\"catalyzes\",\"evidence_type\":\"ECO:0000250\"},\"maxValues\":{\"subject\":\"cdm_prot_ffab7ab7-2b79-5388-8849\",\"object\":\"Rhea:RHEA:82211\",\"predicate\":\"requires_cofactor\",\"evidence_type\":\"ECO:0000305\"},\"nullCount\":{\"subject\":0,\"object\":0,\"predicate\":0,\"evidence_type\":713,\"supporting_objects\":963,\"publications\":2822}}"}} +{"add":{"path":"part-00005-83e03c9c-6e40-47d8-a1e5-fbdcb0abafdb-c000.snappy.parquet","partitionValues":{},"size":21367,"modificationTime":1766549241088,"dataChange":true,"stats":"{\"numRecords\":3072,\"minValues\":{\"subject\":\"cdm_prot_0234d270-648d-5275-a005\",\"object\":\"ChEBI:CHEBI:15361\",\"predicate\":\"catalyzes\",\"evidence_type\":\"ECO:0000250\"},\"maxValues\":{\"subject\":\"cdm_prot_fe66808c-aab1-5f12-970e\",\"object\":\"Rhea:RHEA:55292\",\"predicate\":\"requires_cofactor\",\"evidence_type\":\"ECO:0000305\"},\"nullCount\":{\"subject\":0,\"object\":0,\"predicate\":0,\"evidence_type\":1061,\"supporting_objects\":1121,\"publications\":3012}}"}} +{"add":{"path":"part-00006-c892b9bb-3763-45a3-84b9-5bb7d0f8106f-c000.snappy.parquet","partitionValues":{},"size":44498,"modificationTime":1766549241087,"dataChange":true,"stats":"{\"numRecords\":3072,\"minValues\":{\"subject\":\"cdm_prot_0007879e-35c8-5917-ad11\",\"object\":\"ChEBI:CHEBI:11851\",\"predicate\":\"catalyzes\",\"evidence_type\":\"ECO:0000250\"},\"maxValues\":{\"subject\":\"cdm_prot_ffe66051-6a0a-55cc-81b2\",\"object\":\"Rhea:RHEA:66592\",\"predicate\":\"requires_cofactor\",\"evidence_type\":\"ECO:0000305\"},\"nullCount\":{\"subject\":0,\"object\":0,\"predicate\":0,\"evidence_type\":1372,\"supporting_objects\":1553,\"publications\":2891}}"}} +{"add":{"path":"part-00007-d719ea24-3a34-4eb5-8620-61e7b97d14b6-c000.snappy.parquet","partitionValues":{},"size":91789,"modificationTime":1766549241095,"dataChange":true,"stats":"{\"numRecords\":2537,\"minValues\":{\"subject\":\"cdm_prot_0048aaff-7f6b-5ca1-9274\",\"object\":\"ChEBI:CHEBI:138102\",\"predicate\":\"catalyzes\",\"evidence_type\":\"ECO:0000250\"},\"maxValues\":{\"subject\":\"cdm_prot_ffd6bc2d-9fdc-5fb9-a121\",\"object\":\"Rhea:RHEA:68792\",\"predicate\":\"requires_cofactor\",\"evidence_type\":\"ECO:0000305\"},\"nullCount\":{\"subject\":0,\"object\":0,\"predicate\":0,\"evidence_type\":2334,\"supporting_objects\":2364,\"publications\":2523}}"}} diff --git a/tests/data/uniprot_archaea/uniprot_db/associations/part-00000-5c7434bb-30e3-45f8-b638-415de5c389a1-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/associations/part-00000-5c7434bb-30e3-45f8-b638-415de5c389a1-c000.snappy.parquet new file mode 100644 index 0000000..ebf50a2 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/associations/part-00000-5c7434bb-30e3-45f8-b638-415de5c389a1-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/associations/part-00000-6e2a19e7-36e4-48c7-a949-9a97054f83e1-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/associations/part-00000-6e2a19e7-36e4-48c7-a949-9a97054f83e1-c000.snappy.parquet new file mode 100644 index 0000000..fef0709 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/associations/part-00000-6e2a19e7-36e4-48c7-a949-9a97054f83e1-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/associations/part-00000-c4fcd4fc-431e-4ad1-8624-792e6203933b-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/associations/part-00000-c4fcd4fc-431e-4ad1-8624-792e6203933b-c000.snappy.parquet new file mode 100644 index 0000000..a2fdea2 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/associations/part-00000-c4fcd4fc-431e-4ad1-8624-792e6203933b-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/associations/part-00000-fddfb436-dca2-49d9-a478-cec9a901a570-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/associations/part-00000-fddfb436-dca2-49d9-a478-cec9a901a570-c000.snappy.parquet new file mode 100644 index 0000000..a6b1f9a Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/associations/part-00000-fddfb436-dca2-49d9-a478-cec9a901a570-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/associations/part-00001-343540fa-6365-424a-94b6-16af6a4b2e63-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/associations/part-00001-343540fa-6365-424a-94b6-16af6a4b2e63-c000.snappy.parquet new file mode 100644 index 0000000..8cdb735 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/associations/part-00001-343540fa-6365-424a-94b6-16af6a4b2e63-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/associations/part-00001-769f28af-12f9-4406-a951-2d78e45241c1-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/associations/part-00001-769f28af-12f9-4406-a951-2d78e45241c1-c000.snappy.parquet new file mode 100644 index 0000000..12a445b Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/associations/part-00001-769f28af-12f9-4406-a951-2d78e45241c1-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/associations/part-00001-a044609f-89b1-43da-8fad-c72c083b71f9-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/associations/part-00001-a044609f-89b1-43da-8fad-c72c083b71f9-c000.snappy.parquet new file mode 100644 index 0000000..68b727b Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/associations/part-00001-a044609f-89b1-43da-8fad-c72c083b71f9-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/associations/part-00001-d0cc9108-298b-44b4-80e4-8ecf39f2926d-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/associations/part-00001-d0cc9108-298b-44b4-80e4-8ecf39f2926d-c000.snappy.parquet new file mode 100644 index 0000000..620ff6f Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/associations/part-00001-d0cc9108-298b-44b4-80e4-8ecf39f2926d-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/associations/part-00002-33e36f42-b91d-4741-9371-d0a2b01265ea-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/associations/part-00002-33e36f42-b91d-4741-9371-d0a2b01265ea-c000.snappy.parquet new file mode 100644 index 0000000..1596b90 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/associations/part-00002-33e36f42-b91d-4741-9371-d0a2b01265ea-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/associations/part-00002-4af0fd89-1486-400d-b069-65d0176b17dc-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/associations/part-00002-4af0fd89-1486-400d-b069-65d0176b17dc-c000.snappy.parquet new file mode 100644 index 0000000..f002db7 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/associations/part-00002-4af0fd89-1486-400d-b069-65d0176b17dc-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/associations/part-00002-60a2f084-b993-4c4c-81bc-4893081c8ace-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/associations/part-00002-60a2f084-b993-4c4c-81bc-4893081c8ace-c000.snappy.parquet new file mode 100644 index 0000000..dd35508 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/associations/part-00002-60a2f084-b993-4c4c-81bc-4893081c8ace-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/associations/part-00002-d6af9717-c0ef-4ef3-93bf-17593ea5ff3d-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/associations/part-00002-d6af9717-c0ef-4ef3-93bf-17593ea5ff3d-c000.snappy.parquet new file mode 100644 index 0000000..ca4daca Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/associations/part-00002-d6af9717-c0ef-4ef3-93bf-17593ea5ff3d-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/associations/part-00003-52d8b672-b288-4bcb-ab63-bb57677a1ecd-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/associations/part-00003-52d8b672-b288-4bcb-ab63-bb57677a1ecd-c000.snappy.parquet new file mode 100644 index 0000000..06390fb Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/associations/part-00003-52d8b672-b288-4bcb-ab63-bb57677a1ecd-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/associations/part-00003-650a8597-77a2-4e0d-bed4-a45a77799938-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/associations/part-00003-650a8597-77a2-4e0d-bed4-a45a77799938-c000.snappy.parquet new file mode 100644 index 0000000..f3707ea Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/associations/part-00003-650a8597-77a2-4e0d-bed4-a45a77799938-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/associations/part-00003-8a0e9158-3bdc-4059-8261-4f34b384ad8b-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/associations/part-00003-8a0e9158-3bdc-4059-8261-4f34b384ad8b-c000.snappy.parquet new file mode 100644 index 0000000..2c760ce Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/associations/part-00003-8a0e9158-3bdc-4059-8261-4f34b384ad8b-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/associations/part-00003-c585c50d-d510-403f-b1af-f2590b82ec02-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/associations/part-00003-c585c50d-d510-403f-b1af-f2590b82ec02-c000.snappy.parquet new file mode 100644 index 0000000..f71808d Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/associations/part-00003-c585c50d-d510-403f-b1af-f2590b82ec02-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/associations/part-00004-a557f9be-45fe-430d-820c-67c6e98e062b-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/associations/part-00004-a557f9be-45fe-430d-820c-67c6e98e062b-c000.snappy.parquet new file mode 100644 index 0000000..d10cf49 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/associations/part-00004-a557f9be-45fe-430d-820c-67c6e98e062b-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/associations/part-00004-bb84e201-0f62-4f40-b0a7-207c849f7a24-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/associations/part-00004-bb84e201-0f62-4f40-b0a7-207c849f7a24-c000.snappy.parquet new file mode 100644 index 0000000..dfc340e Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/associations/part-00004-bb84e201-0f62-4f40-b0a7-207c849f7a24-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/associations/part-00004-cfdc4727-b5b0-4c31-84c4-3ed85cc9f1c9-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/associations/part-00004-cfdc4727-b5b0-4c31-84c4-3ed85cc9f1c9-c000.snappy.parquet new file mode 100644 index 0000000..3fe0610 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/associations/part-00004-cfdc4727-b5b0-4c31-84c4-3ed85cc9f1c9-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/associations/part-00004-f5bf9de0-2f61-4ef9-ae15-45201fe05ae7-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/associations/part-00004-f5bf9de0-2f61-4ef9-ae15-45201fe05ae7-c000.snappy.parquet new file mode 100644 index 0000000..a0e1330 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/associations/part-00004-f5bf9de0-2f61-4ef9-ae15-45201fe05ae7-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/associations/part-00005-83e03c9c-6e40-47d8-a1e5-fbdcb0abafdb-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/associations/part-00005-83e03c9c-6e40-47d8-a1e5-fbdcb0abafdb-c000.snappy.parquet new file mode 100644 index 0000000..4f8b6b3 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/associations/part-00005-83e03c9c-6e40-47d8-a1e5-fbdcb0abafdb-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/associations/part-00005-d77d92c3-e227-463b-9cf7-50341160d938-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/associations/part-00005-d77d92c3-e227-463b-9cf7-50341160d938-c000.snappy.parquet new file mode 100644 index 0000000..17af10d Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/associations/part-00005-d77d92c3-e227-463b-9cf7-50341160d938-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/associations/part-00005-dad45051-3656-45f3-b45c-4c29e143aae9-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/associations/part-00005-dad45051-3656-45f3-b45c-4c29e143aae9-c000.snappy.parquet new file mode 100644 index 0000000..d946d22 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/associations/part-00005-dad45051-3656-45f3-b45c-4c29e143aae9-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/associations/part-00005-f48959c1-8814-4930-9482-909e87471c09-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/associations/part-00005-f48959c1-8814-4930-9482-909e87471c09-c000.snappy.parquet new file mode 100644 index 0000000..5fefd11 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/associations/part-00005-f48959c1-8814-4930-9482-909e87471c09-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/associations/part-00006-2c4136d2-d3f6-47ea-a5f3-3d5590df1769-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/associations/part-00006-2c4136d2-d3f6-47ea-a5f3-3d5590df1769-c000.snappy.parquet new file mode 100644 index 0000000..521e28f Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/associations/part-00006-2c4136d2-d3f6-47ea-a5f3-3d5590df1769-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/associations/part-00006-43d069bc-67ee-4b6b-a938-17447f9f6397-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/associations/part-00006-43d069bc-67ee-4b6b-a938-17447f9f6397-c000.snappy.parquet new file mode 100644 index 0000000..008c9a9 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/associations/part-00006-43d069bc-67ee-4b6b-a938-17447f9f6397-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/associations/part-00006-7631ae9a-9d2c-4334-a806-fca2b784b7b6-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/associations/part-00006-7631ae9a-9d2c-4334-a806-fca2b784b7b6-c000.snappy.parquet new file mode 100644 index 0000000..d15b84d Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/associations/part-00006-7631ae9a-9d2c-4334-a806-fca2b784b7b6-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/associations/part-00006-c892b9bb-3763-45a3-84b9-5bb7d0f8106f-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/associations/part-00006-c892b9bb-3763-45a3-84b9-5bb7d0f8106f-c000.snappy.parquet new file mode 100644 index 0000000..65ab1ca Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/associations/part-00006-c892b9bb-3763-45a3-84b9-5bb7d0f8106f-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/associations/part-00007-393e6b77-99d0-4bf4-9b62-944af8f3f649-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/associations/part-00007-393e6b77-99d0-4bf4-9b62-944af8f3f649-c000.snappy.parquet new file mode 100644 index 0000000..79eb04d Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/associations/part-00007-393e6b77-99d0-4bf4-9b62-944af8f3f649-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/associations/part-00007-9e4ad55b-542a-44fb-8598-17a9f559aed7-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/associations/part-00007-9e4ad55b-542a-44fb-8598-17a9f559aed7-c000.snappy.parquet new file mode 100644 index 0000000..c5e54d9 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/associations/part-00007-9e4ad55b-542a-44fb-8598-17a9f559aed7-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/associations/part-00007-d719ea24-3a34-4eb5-8620-61e7b97d14b6-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/associations/part-00007-d719ea24-3a34-4eb5-8620-61e7b97d14b6-c000.snappy.parquet new file mode 100644 index 0000000..d33cd03 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/associations/part-00007-d719ea24-3a34-4eb5-8620-61e7b97d14b6-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/associations/part-00007-db1c7914-39dd-4ff4-83c0-e259ba58d1b7-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/associations/part-00007-db1c7914-39dd-4ff4-83c0-e259ba58d1b7-c000.snappy.parquet new file mode 100644 index 0000000..0ebf349 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/associations/part-00007-db1c7914-39dd-4ff4-83c0-e259ba58d1b7-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/cross_references/.part-00000-262596ac-a964-4afa-8547-f24c3f0a871a-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/cross_references/.part-00000-262596ac-a964-4afa-8547-f24c3f0a871a-c000.snappy.parquet.crc new file mode 100644 index 0000000..fa403ac Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/cross_references/.part-00000-262596ac-a964-4afa-8547-f24c3f0a871a-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/cross_references/.part-00000-7973428f-e648-454c-a606-b64ffc73f3db-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/cross_references/.part-00000-7973428f-e648-454c-a606-b64ffc73f3db-c000.snappy.parquet.crc new file mode 100644 index 0000000..a5ce3eb Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/cross_references/.part-00000-7973428f-e648-454c-a606-b64ffc73f3db-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/cross_references/.part-00000-8efd2d36-ce8e-4199-9258-b010a08236aa-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/cross_references/.part-00000-8efd2d36-ce8e-4199-9258-b010a08236aa-c000.snappy.parquet.crc new file mode 100644 index 0000000..6990afd Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/cross_references/.part-00000-8efd2d36-ce8e-4199-9258-b010a08236aa-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/cross_references/.part-00000-bee3e108-f720-48d4-89d3-c75a8aa4dfaa-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/cross_references/.part-00000-bee3e108-f720-48d4-89d3-c75a8aa4dfaa-c000.snappy.parquet.crc new file mode 100644 index 0000000..da4d98b Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/cross_references/.part-00000-bee3e108-f720-48d4-89d3-c75a8aa4dfaa-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/cross_references/.part-00001-1fc7356c-e1b3-4a0b-88c6-0bcdaf52eea6-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/cross_references/.part-00001-1fc7356c-e1b3-4a0b-88c6-0bcdaf52eea6-c000.snappy.parquet.crc new file mode 100644 index 0000000..274e6cc Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/cross_references/.part-00001-1fc7356c-e1b3-4a0b-88c6-0bcdaf52eea6-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/cross_references/.part-00001-5c0469f6-368d-4478-aa79-cb054e1ae426-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/cross_references/.part-00001-5c0469f6-368d-4478-aa79-cb054e1ae426-c000.snappy.parquet.crc new file mode 100644 index 0000000..3f3aabb Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/cross_references/.part-00001-5c0469f6-368d-4478-aa79-cb054e1ae426-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/cross_references/.part-00001-aa535d72-deb5-4284-8f04-414aa19f182f-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/cross_references/.part-00001-aa535d72-deb5-4284-8f04-414aa19f182f-c000.snappy.parquet.crc new file mode 100644 index 0000000..a71d312 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/cross_references/.part-00001-aa535d72-deb5-4284-8f04-414aa19f182f-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/cross_references/.part-00001-cb441f11-8637-4e34-af70-446c9277c22f-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/cross_references/.part-00001-cb441f11-8637-4e34-af70-446c9277c22f-c000.snappy.parquet.crc new file mode 100644 index 0000000..f5668bc Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/cross_references/.part-00001-cb441f11-8637-4e34-af70-446c9277c22f-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/cross_references/.part-00002-111b3d32-a822-4c10-9e4b-aaea21460a68-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/cross_references/.part-00002-111b3d32-a822-4c10-9e4b-aaea21460a68-c000.snappy.parquet.crc new file mode 100644 index 0000000..ad9d988 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/cross_references/.part-00002-111b3d32-a822-4c10-9e4b-aaea21460a68-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/cross_references/.part-00002-17744fc6-04d1-4419-a97e-18ed02b79a6e-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/cross_references/.part-00002-17744fc6-04d1-4419-a97e-18ed02b79a6e-c000.snappy.parquet.crc new file mode 100644 index 0000000..0553752 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/cross_references/.part-00002-17744fc6-04d1-4419-a97e-18ed02b79a6e-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/cross_references/.part-00002-4a123736-c6f3-4be0-a4ac-ebae82ba6a92-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/cross_references/.part-00002-4a123736-c6f3-4be0-a4ac-ebae82ba6a92-c000.snappy.parquet.crc new file mode 100644 index 0000000..546fb80 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/cross_references/.part-00002-4a123736-c6f3-4be0-a4ac-ebae82ba6a92-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/cross_references/.part-00002-cc877dc2-5e4e-44db-98eb-701878729795-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/cross_references/.part-00002-cc877dc2-5e4e-44db-98eb-701878729795-c000.snappy.parquet.crc new file mode 100644 index 0000000..752644c Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/cross_references/.part-00002-cc877dc2-5e4e-44db-98eb-701878729795-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/cross_references/.part-00003-03c3d6f3-c92e-43db-9238-faffadb4184f-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/cross_references/.part-00003-03c3d6f3-c92e-43db-9238-faffadb4184f-c000.snappy.parquet.crc new file mode 100644 index 0000000..f122453 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/cross_references/.part-00003-03c3d6f3-c92e-43db-9238-faffadb4184f-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/cross_references/.part-00003-2aebe69d-245d-4cad-aac9-28aa219aa4a7-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/cross_references/.part-00003-2aebe69d-245d-4cad-aac9-28aa219aa4a7-c000.snappy.parquet.crc new file mode 100644 index 0000000..4c970aa Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/cross_references/.part-00003-2aebe69d-245d-4cad-aac9-28aa219aa4a7-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/cross_references/.part-00003-74b068ca-634a-40e6-81ee-258b7182d0db-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/cross_references/.part-00003-74b068ca-634a-40e6-81ee-258b7182d0db-c000.snappy.parquet.crc new file mode 100644 index 0000000..348fe2f Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/cross_references/.part-00003-74b068ca-634a-40e6-81ee-258b7182d0db-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/cross_references/.part-00003-8df64f03-5154-4792-9f94-671fffad6fe6-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/cross_references/.part-00003-8df64f03-5154-4792-9f94-671fffad6fe6-c000.snappy.parquet.crc new file mode 100644 index 0000000..3d63194 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/cross_references/.part-00003-8df64f03-5154-4792-9f94-671fffad6fe6-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/cross_references/.part-00004-3aba1d16-85c4-4a3b-a44f-edb66cb87c03-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/cross_references/.part-00004-3aba1d16-85c4-4a3b-a44f-edb66cb87c03-c000.snappy.parquet.crc new file mode 100644 index 0000000..28c2f02 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/cross_references/.part-00004-3aba1d16-85c4-4a3b-a44f-edb66cb87c03-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/cross_references/.part-00004-a0b07ff5-8921-46bc-b107-b3974f5812e0-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/cross_references/.part-00004-a0b07ff5-8921-46bc-b107-b3974f5812e0-c000.snappy.parquet.crc new file mode 100644 index 0000000..fa51743 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/cross_references/.part-00004-a0b07ff5-8921-46bc-b107-b3974f5812e0-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/cross_references/.part-00004-b891f437-1eb9-44d4-b41b-00d9b1413c72-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/cross_references/.part-00004-b891f437-1eb9-44d4-b41b-00d9b1413c72-c000.snappy.parquet.crc new file mode 100644 index 0000000..e421344 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/cross_references/.part-00004-b891f437-1eb9-44d4-b41b-00d9b1413c72-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/cross_references/.part-00004-fff20f27-90e3-422c-ac10-fa544591cc46-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/cross_references/.part-00004-fff20f27-90e3-422c-ac10-fa544591cc46-c000.snappy.parquet.crc new file mode 100644 index 0000000..c84d062 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/cross_references/.part-00004-fff20f27-90e3-422c-ac10-fa544591cc46-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/cross_references/.part-00005-070d9ade-f0b9-46b6-a11b-a05c3085e84d-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/cross_references/.part-00005-070d9ade-f0b9-46b6-a11b-a05c3085e84d-c000.snappy.parquet.crc new file mode 100644 index 0000000..d1d4d04 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/cross_references/.part-00005-070d9ade-f0b9-46b6-a11b-a05c3085e84d-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/cross_references/.part-00005-5b226a11-c46b-438d-b968-70bcca53330e-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/cross_references/.part-00005-5b226a11-c46b-438d-b968-70bcca53330e-c000.snappy.parquet.crc new file mode 100644 index 0000000..5d38b6f Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/cross_references/.part-00005-5b226a11-c46b-438d-b968-70bcca53330e-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/cross_references/.part-00005-ba1ba60b-f4bf-4cf1-a328-dc5950f4925d-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/cross_references/.part-00005-ba1ba60b-f4bf-4cf1-a328-dc5950f4925d-c000.snappy.parquet.crc new file mode 100644 index 0000000..b0b76b4 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/cross_references/.part-00005-ba1ba60b-f4bf-4cf1-a328-dc5950f4925d-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/cross_references/.part-00005-f82500ca-9132-449d-a9ac-9851bee3578c-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/cross_references/.part-00005-f82500ca-9132-449d-a9ac-9851bee3578c-c000.snappy.parquet.crc new file mode 100644 index 0000000..f66ab7d Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/cross_references/.part-00005-f82500ca-9132-449d-a9ac-9851bee3578c-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/cross_references/.part-00006-1d2d1fa4-982f-439f-b95c-d2e33d9f2725-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/cross_references/.part-00006-1d2d1fa4-982f-439f-b95c-d2e33d9f2725-c000.snappy.parquet.crc new file mode 100644 index 0000000..d72479e Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/cross_references/.part-00006-1d2d1fa4-982f-439f-b95c-d2e33d9f2725-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/cross_references/.part-00006-2bb077fe-e445-4ea6-a113-be61db1e5c67-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/cross_references/.part-00006-2bb077fe-e445-4ea6-a113-be61db1e5c67-c000.snappy.parquet.crc new file mode 100644 index 0000000..3c9dda4 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/cross_references/.part-00006-2bb077fe-e445-4ea6-a113-be61db1e5c67-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/cross_references/.part-00006-9c0db48f-a6f0-4bc0-974d-b4b196673337-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/cross_references/.part-00006-9c0db48f-a6f0-4bc0-974d-b4b196673337-c000.snappy.parquet.crc new file mode 100644 index 0000000..dfa3f5f Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/cross_references/.part-00006-9c0db48f-a6f0-4bc0-974d-b4b196673337-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/cross_references/.part-00006-e2ec48f8-d8ef-4fe7-8f28-987148693d9e-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/cross_references/.part-00006-e2ec48f8-d8ef-4fe7-8f28-987148693d9e-c000.snappy.parquet.crc new file mode 100644 index 0000000..c729757 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/cross_references/.part-00006-e2ec48f8-d8ef-4fe7-8f28-987148693d9e-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/cross_references/.part-00007-1af16a96-34ca-46e6-add8-cad8393ecb39-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/cross_references/.part-00007-1af16a96-34ca-46e6-add8-cad8393ecb39-c000.snappy.parquet.crc new file mode 100644 index 0000000..e3c1ae9 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/cross_references/.part-00007-1af16a96-34ca-46e6-add8-cad8393ecb39-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/cross_references/.part-00007-263935ae-f03a-4a0d-82eb-8239c2044ae4-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/cross_references/.part-00007-263935ae-f03a-4a0d-82eb-8239c2044ae4-c000.snappy.parquet.crc new file mode 100644 index 0000000..3d376e6 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/cross_references/.part-00007-263935ae-f03a-4a0d-82eb-8239c2044ae4-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/cross_references/.part-00007-74b7bec4-aaab-4bd6-8078-75cfcf5cf393-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/cross_references/.part-00007-74b7bec4-aaab-4bd6-8078-75cfcf5cf393-c000.snappy.parquet.crc new file mode 100644 index 0000000..6935281 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/cross_references/.part-00007-74b7bec4-aaab-4bd6-8078-75cfcf5cf393-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/cross_references/.part-00007-c778ca68-9d4c-442c-b8cf-0f9fc8b451fc-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/cross_references/.part-00007-c778ca68-9d4c-442c-b8cf-0f9fc8b451fc-c000.snappy.parquet.crc new file mode 100644 index 0000000..e81e4e6 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/cross_references/.part-00007-c778ca68-9d4c-442c-b8cf-0f9fc8b451fc-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/cross_references/_delta_log/.00000000000000000000.crc.crc b/tests/data/uniprot_archaea/uniprot_db/cross_references/_delta_log/.00000000000000000000.crc.crc new file mode 100644 index 0000000..5f1283a Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/cross_references/_delta_log/.00000000000000000000.crc.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/cross_references/_delta_log/.00000000000000000000.json.crc b/tests/data/uniprot_archaea/uniprot_db/cross_references/_delta_log/.00000000000000000000.json.crc new file mode 100644 index 0000000..2e8de8e Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/cross_references/_delta_log/.00000000000000000000.json.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/cross_references/_delta_log/.00000000000000000001.crc.crc b/tests/data/uniprot_archaea/uniprot_db/cross_references/_delta_log/.00000000000000000001.crc.crc new file mode 100644 index 0000000..ef48bb6 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/cross_references/_delta_log/.00000000000000000001.crc.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/cross_references/_delta_log/.00000000000000000001.json.crc b/tests/data/uniprot_archaea/uniprot_db/cross_references/_delta_log/.00000000000000000001.json.crc new file mode 100644 index 0000000..6dba42c Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/cross_references/_delta_log/.00000000000000000001.json.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/cross_references/_delta_log/.00000000000000000002.crc.crc b/tests/data/uniprot_archaea/uniprot_db/cross_references/_delta_log/.00000000000000000002.crc.crc new file mode 100644 index 0000000..93f8951 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/cross_references/_delta_log/.00000000000000000002.crc.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/cross_references/_delta_log/.00000000000000000002.json.crc b/tests/data/uniprot_archaea/uniprot_db/cross_references/_delta_log/.00000000000000000002.json.crc new file mode 100644 index 0000000..c7d9ec4 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/cross_references/_delta_log/.00000000000000000002.json.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/cross_references/_delta_log/.00000000000000000003.crc.crc b/tests/data/uniprot_archaea/uniprot_db/cross_references/_delta_log/.00000000000000000003.crc.crc new file mode 100644 index 0000000..63cdbb1 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/cross_references/_delta_log/.00000000000000000003.crc.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/cross_references/_delta_log/.00000000000000000003.json.crc b/tests/data/uniprot_archaea/uniprot_db/cross_references/_delta_log/.00000000000000000003.json.crc new file mode 100644 index 0000000..2f6d2d0 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/cross_references/_delta_log/.00000000000000000003.json.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/cross_references/_delta_log/.00000000000000000004.crc.crc b/tests/data/uniprot_archaea/uniprot_db/cross_references/_delta_log/.00000000000000000004.crc.crc new file mode 100644 index 0000000..ddcf1c8 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/cross_references/_delta_log/.00000000000000000004.crc.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/cross_references/_delta_log/.00000000000000000004.json.crc b/tests/data/uniprot_archaea/uniprot_db/cross_references/_delta_log/.00000000000000000004.json.crc new file mode 100644 index 0000000..b09d2b2 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/cross_references/_delta_log/.00000000000000000004.json.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/cross_references/_delta_log/00000000000000000000.crc b/tests/data/uniprot_archaea/uniprot_db/cross_references/_delta_log/00000000000000000000.crc new file mode 100644 index 0000000..66b80eb --- /dev/null +++ b/tests/data/uniprot_archaea/uniprot_db/cross_references/_delta_log/00000000000000000000.crc @@ -0,0 +1 @@ +{"txnId":"e7eb597a-5afc-4d33-9947-914936818492","tableSizeBytes":0,"numFiles":0,"numMetadata":1,"numProtocol":1,"setTransactions":[],"domainMetadata":[],"metadata":{"id":"0a3243aa-0303-4d93-bb82-a850749794af","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[]}","partitionColumns":[],"configuration":{},"createdTime":1766549214447},"protocol":{"minReaderVersion":1,"minWriterVersion":2},"allFiles":[]} diff --git a/tests/data/uniprot_archaea/uniprot_db/cross_references/_delta_log/00000000000000000000.json b/tests/data/uniprot_archaea/uniprot_db/cross_references/_delta_log/00000000000000000000.json new file mode 100644 index 0000000..6a5a73a --- /dev/null +++ b/tests/data/uniprot_archaea/uniprot_db/cross_references/_delta_log/00000000000000000000.json @@ -0,0 +1,3 @@ +{"commitInfo":{"timestamp":1766549214483,"operation":"CREATE TABLE","operationParameters":{"partitionBy":"[]","clusterBy":"[]","description":null,"isManaged":"false","properties":"{}"},"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{},"engineInfo":"Apache-Spark/4.0.1 Delta-Lake/4.0.0","txnId":"e7eb597a-5afc-4d33-9947-914936818492"}} +{"metaData":{"id":"0a3243aa-0303-4d93-bb82-a850749794af","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[]}","partitionColumns":[],"configuration":{},"createdTime":1766549214447}} +{"protocol":{"minReaderVersion":1,"minWriterVersion":2}} diff --git a/tests/data/uniprot_archaea/uniprot_db/cross_references/_delta_log/00000000000000000001.crc b/tests/data/uniprot_archaea/uniprot_db/cross_references/_delta_log/00000000000000000001.crc new file mode 100644 index 0000000..4f6920b --- /dev/null +++ b/tests/data/uniprot_archaea/uniprot_db/cross_references/_delta_log/00000000000000000001.crc @@ -0,0 +1 @@ +{"txnId":"b9250189-51ef-433e-af3e-2bb7df6cd84f","tableSizeBytes":1637195,"numFiles":8,"numMetadata":1,"numProtocol":1,"setTransactions":[],"domainMetadata":[],"metadata":{"id":"0a3243aa-0303-4d93-bb82-a850749794af","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"entity_id\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"xref_type\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"xref_value\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"xref\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{},"createdTime":1766549214447},"protocol":{"minReaderVersion":1,"minWriterVersion":2}} diff --git a/tests/data/uniprot_archaea/uniprot_db/cross_references/_delta_log/00000000000000000001.json b/tests/data/uniprot_archaea/uniprot_db/cross_references/_delta_log/00000000000000000001.json new file mode 100644 index 0000000..81fea2d --- /dev/null +++ b/tests/data/uniprot_archaea/uniprot_db/cross_references/_delta_log/00000000000000000001.json @@ -0,0 +1,10 @@ +{"commitInfo":{"timestamp":1766549221505,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":0,"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{"numFiles":"8","numOutputRows":"174852","numOutputBytes":"1637195"},"engineInfo":"Apache-Spark/4.0.1 Delta-Lake/4.0.0","txnId":"b9250189-51ef-433e-af3e-2bb7df6cd84f"}} +{"metaData":{"id":"0a3243aa-0303-4d93-bb82-a850749794af","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"entity_id\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"xref_type\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"xref_value\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"xref\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{},"createdTime":1766549214447}} +{"add":{"path":"part-00000-262596ac-a964-4afa-8547-f24c3f0a871a-c000.snappy.parquet","partitionValues":{},"size":202528,"modificationTime":1766549221447,"dataChange":true,"stats":"{\"numRecords\":21504,\"minValues\":{\"entity_id\":\"cdm_prot_00036120-577c-5bbc-ab91\",\"xref_type\":\"ABCD\",\"xref_value\":\"1.1.1.-\",\"xref\":\"1.10.1140.10:FF:000002\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffa4eefe-b062-5f26-8f77\",\"xref_type\":\"iPTMnet\",\"xref_value\":\"tvo:TVG1115434\",\"xref\":\"tvo:TVG1115434\"},\"nullCount\":{\"entity_id\":0,\"xref_type\":0,\"xref_value\":0,\"xref\":0}}"}} +{"add":{"path":"part-00001-5c0469f6-368d-4478-aa79-cb054e1ae426-c000.snappy.parquet","partitionValues":{},"size":207444,"modificationTime":1766549221439,"dataChange":true,"stats":"{\"numRecords\":21504,\"minValues\":{\"entity_id\":\"cdm_prot_000e0a71-14e8-5d5e-8144\",\"xref_type\":\"AlphaFoldDB\",\"xref_value\":\"1.1.1.122\",\"xref\":\"1.10.390.10:FF:000006\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffed2e1d-fe14-5556-b067\",\"xref_type\":\"eggNOG\",\"xref_value\":\"tvo:TVG1387115\",\"xref\":\"tvo:TVG1387115\"},\"nullCount\":{\"entity_id\":0,\"xref_type\":0,\"xref_value\":0,\"xref\":0}}"}} +{"add":{"path":"part-00002-17744fc6-04d1-4419-a97e-18ed02b79a6e-c000.snappy.parquet","partitionValues":{},"size":225174,"modificationTime":1766549221484,"dataChange":true,"stats":"{\"numRecords\":22528,\"minValues\":{\"entity_id\":\"cdm_prot_00030730-9147-5351-b68c\",\"xref_type\":\"AlphaFoldDB\",\"xref_value\":\"1.10.10.10\",\"xref\":\"1.10.10.10:FF:000502\"},\"maxValues\":{\"entity_id\":\"cdm_prot_fde84f0d-6520-56ad-b61f\",\"xref_type\":\"iPTMnet\",\"xref_value\":\"tvo:TVG1580555\",\"xref\":\"tvo:TVG1580555\"},\"nullCount\":{\"entity_id\":0,\"xref_type\":0,\"xref_value\":0,\"xref\":0}}"}} +{"add":{"path":"part-00003-03c3d6f3-c92e-43db-9238-faffadb4184f-c000.snappy.parquet","partitionValues":{},"size":229024,"modificationTime":1766549221440,"dataChange":true,"stats":"{\"numRecords\":21504,\"minValues\":{\"entity_id\":\"cdm_prot_004f59e4-6b11-58ae-a6ff\",\"xref_type\":\"AlphaFoldDB\",\"xref_value\":\"1.1.1.-\",\"xref\":\"1.10.150.120:FF:000003\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ff2dba94-2092-57d0-9041\",\"xref_type\":\"iPTMnet\",\"xref_value\":\"tvo:TVG1185779\",\"xref\":\"tvo:TVG1185779\"},\"nullCount\":{\"entity_id\":0,\"xref_type\":0,\"xref_value\":0,\"xref\":0}}"}} +{"add":{"path":"part-00004-b891f437-1eb9-44d4-b41b-00d9b1413c72-c000.snappy.parquet","partitionValues":{},"size":193600,"modificationTime":1766549221485,"dataChange":true,"stats":"{\"numRecords\":21504,\"minValues\":{\"entity_id\":\"cdm_prot_023b8b27-1f32-5069-954b\",\"xref_type\":\"AlphaFoldDB\",\"xref_value\":\"1.1.1.3\",\"xref\":\"1.10.132.60:FF:000013\"},\"maxValues\":{\"entity_id\":\"cdm_prot_fe5e21c9-fae5-5701-b99d\",\"xref_type\":\"iPTMnet\",\"xref_value\":\"tvo:TVG1298537\",\"xref\":\"tvo:TVG1298537\"},\"nullCount\":{\"entity_id\":0,\"xref_type\":0,\"xref_value\":0,\"xref\":0}}"}} +{"add":{"path":"part-00005-5b226a11-c46b-438d-b968-70bcca53330e-c000.snappy.parquet","partitionValues":{},"size":175090,"modificationTime":1766549221499,"dataChange":true,"stats":"{\"numRecords\":22528,\"minValues\":{\"entity_id\":\"cdm_prot_00134cb8-e497-5d40-b662\",\"xref_type\":\"AlphaFoldDB\",\"xref_value\":\"1.10.1060.10\",\"xref\":\"1.10.1060.10:FF:000031\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ff5f637d-e74b-5494-9ed8\",\"xref_type\":\"iPTMnet\",\"xref_value\":\"tvo:TVG1107082\",\"xref\":\"tvo:TVG1107082\"},\"nullCount\":{\"entity_id\":0,\"xref_type\":0,\"xref_value\":0,\"xref\":0}}"}} +{"add":{"path":"part-00006-9c0db48f-a6f0-4bc0-974d-b4b196673337-c000.snappy.parquet","partitionValues":{},"size":199565,"modificationTime":1766549221433,"dataChange":true,"stats":"{\"numRecords\":21504,\"minValues\":{\"entity_id\":\"cdm_prot_003df674-48d1-588a-b194\",\"xref_type\":\"AlphaFoldDB\",\"xref_value\":\"1.1.1.261\",\"xref\":\"1.10.10.410:FF:000001\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffeae0f1-9785-5eaf-ad23\",\"xref_type\":\"iPTMnet\",\"xref_value\":\"tvo:TVG1288826\",\"xref\":\"tvo:TVG1288826\"},\"nullCount\":{\"entity_id\":0,\"xref_type\":0,\"xref_value\":0,\"xref\":0}}"}} +{"add":{"path":"part-00007-74b7bec4-aaab-4bd6-8078-75cfcf5cf393-c000.snappy.parquet","partitionValues":{},"size":204770,"modificationTime":1766549221447,"dataChange":true,"stats":"{\"numRecords\":22276,\"minValues\":{\"entity_id\":\"cdm_prot_00625fdf-0992-5594-a800\",\"xref_type\":\"AlphaFoldDB\",\"xref_value\":\"1.-.-.-\",\"xref\":\"1.10.3860.10:FF:000001\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffcf5789-8bfa-5ca0-914e\",\"xref_type\":\"eggNOG\",\"xref_value\":\"vmo:VMUT_1235\",\"xref\":\"vmo:VMUT_1235\"},\"nullCount\":{\"entity_id\":0,\"xref_type\":0,\"xref_value\":0,\"xref\":0}}"}} diff --git a/tests/data/uniprot_archaea/uniprot_db/cross_references/_delta_log/00000000000000000002.crc b/tests/data/uniprot_archaea/uniprot_db/cross_references/_delta_log/00000000000000000002.crc new file mode 100644 index 0000000..d3f4295 --- /dev/null +++ b/tests/data/uniprot_archaea/uniprot_db/cross_references/_delta_log/00000000000000000002.crc @@ -0,0 +1 @@ +{"txnId":"123abe92-542f-4fb6-8f44-1ebec6b09b06","tableSizeBytes":3256990,"numFiles":16,"numMetadata":1,"numProtocol":1,"setTransactions":[],"domainMetadata":[],"metadata":{"id":"0a3243aa-0303-4d93-bb82-a850749794af","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"entity_id\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"xref_type\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"xref_value\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"xref\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{},"createdTime":1766549214447},"protocol":{"minReaderVersion":1,"minWriterVersion":2},"allFiles":[{"path":"part-00002-17744fc6-04d1-4419-a97e-18ed02b79a6e-c000.snappy.parquet","partitionValues":{},"size":225174,"modificationTime":1766549221484,"dataChange":false,"stats":"{\"numRecords\":22528,\"minValues\":{\"entity_id\":\"cdm_prot_00030730-9147-5351-b68c\",\"xref_type\":\"AlphaFoldDB\",\"xref_value\":\"1.10.10.10\",\"xref\":\"1.10.10.10:FF:000502\"},\"maxValues\":{\"entity_id\":\"cdm_prot_fde84f0d-6520-56ad-b61f\",\"xref_type\":\"iPTMnet\",\"xref_value\":\"tvo:TVG1580555\",\"xref\":\"tvo:TVG1580555\"},\"nullCount\":{\"entity_id\":0,\"xref_type\":0,\"xref_value\":0,\"xref\":0}}"},{"path":"part-00000-bee3e108-f720-48d4-89d3-c75a8aa4dfaa-c000.snappy.parquet","partitionValues":{},"size":197409,"modificationTime":1766549229782,"dataChange":false,"stats":"{\"numRecords\":20480,\"minValues\":{\"entity_id\":\"cdm_prot_004539b1-91ae-513e-bef9\",\"xref_type\":\"AlphaFoldDB\",\"xref_value\":\"1.1.1.23\",\"xref\":\"1.10.1060.10:FF:000026\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ff289ac2-6f01-59a2-8290\",\"xref_type\":\"iPTMnet\",\"xref_value\":\"tvo:TVG1357926\",\"xref\":\"tvo:TVG1357926\"},\"nullCount\":{\"entity_id\":0,\"xref_type\":0,\"xref_value\":0,\"xref\":0}}"},{"path":"part-00004-3aba1d16-85c4-4a3b-a44f-edb66cb87c03-c000.snappy.parquet","partitionValues":{},"size":216341,"modificationTime":1766549229789,"dataChange":false,"stats":"{\"numRecords\":21504,\"minValues\":{\"entity_id\":\"cdm_prot_007c34e9-4de0-5736-985f\",\"xref_type\":\"AlphaFoldDB\",\"xref_value\":\"1.10.10.10\",\"xref\":\"1.10.1020.10:FF:000003\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffc892c3-0651-5e3c-998a\",\"xref_type\":\"eggNOG\",\"xref_value\":\"tvo:TVG1248551\",\"xref\":\"tvo:TVG1248551\"},\"nullCount\":{\"entity_id\":0,\"xref_type\":0,\"xref_value\":0,\"xref\":0}}"},{"path":"part-00004-b891f437-1eb9-44d4-b41b-00d9b1413c72-c000.snappy.parquet","partitionValues":{},"size":193600,"modificationTime":1766549221485,"dataChange":false,"stats":"{\"numRecords\":21504,\"minValues\":{\"entity_id\":\"cdm_prot_023b8b27-1f32-5069-954b\",\"xref_type\":\"AlphaFoldDB\",\"xref_value\":\"1.1.1.3\",\"xref\":\"1.10.132.60:FF:000013\"},\"maxValues\":{\"entity_id\":\"cdm_prot_fe5e21c9-fae5-5701-b99d\",\"xref_type\":\"iPTMnet\",\"xref_value\":\"tvo:TVG1298537\",\"xref\":\"tvo:TVG1298537\"},\"nullCount\":{\"entity_id\":0,\"xref_type\":0,\"xref_value\":0,\"xref\":0}}"},{"path":"part-00003-8df64f03-5154-4792-9f94-671fffad6fe6-c000.snappy.parquet","partitionValues":{},"size":221620,"modificationTime":1766549229794,"dataChange":false,"stats":"{\"numRecords\":21504,\"minValues\":{\"entity_id\":\"cdm_prot_000c7e85-7dff-527d-b6db\",\"xref_type\":\"AlphaFoldDB\",\"xref_value\":\"1.1.1.299\",\"xref\":\"1.10.10.10:FF:000630\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffb46e82-abfa-5122-818f\",\"xref_type\":\"iPTMnet\",\"xref_value\":\"tvo:TVG1279013\",\"xref\":\"tvo:TVG1279013\"},\"nullCount\":{\"entity_id\":0,\"xref_type\":0,\"xref_value\":0,\"xref\":0}}"},{"path":"part-00007-74b7bec4-aaab-4bd6-8078-75cfcf5cf393-c000.snappy.parquet","partitionValues":{},"size":204770,"modificationTime":1766549221447,"dataChange":false,"stats":"{\"numRecords\":22276,\"minValues\":{\"entity_id\":\"cdm_prot_00625fdf-0992-5594-a800\",\"xref_type\":\"AlphaFoldDB\",\"xref_value\":\"1.-.-.-\",\"xref\":\"1.10.3860.10:FF:000001\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffcf5789-8bfa-5ca0-914e\",\"xref_type\":\"eggNOG\",\"xref_value\":\"vmo:VMUT_1235\",\"xref\":\"vmo:VMUT_1235\"},\"nullCount\":{\"entity_id\":0,\"xref_type\":0,\"xref_value\":0,\"xref\":0}}"},{"path":"part-00000-262596ac-a964-4afa-8547-f24c3f0a871a-c000.snappy.parquet","partitionValues":{},"size":202528,"modificationTime":1766549221447,"dataChange":false,"stats":"{\"numRecords\":21504,\"minValues\":{\"entity_id\":\"cdm_prot_00036120-577c-5bbc-ab91\",\"xref_type\":\"ABCD\",\"xref_value\":\"1.1.1.-\",\"xref\":\"1.10.1140.10:FF:000002\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffa4eefe-b062-5f26-8f77\",\"xref_type\":\"iPTMnet\",\"xref_value\":\"tvo:TVG1115434\",\"xref\":\"tvo:TVG1115434\"},\"nullCount\":{\"entity_id\":0,\"xref_type\":0,\"xref_value\":0,\"xref\":0}}"},{"path":"part-00002-4a123736-c6f3-4be0-a4ac-ebae82ba6a92-c000.snappy.parquet","partitionValues":{},"size":201131,"modificationTime":1766549229786,"dataChange":false,"stats":"{\"numRecords\":21504,\"minValues\":{\"entity_id\":\"cdm_prot_00546ee8-4f73-5c85-8b07\",\"xref_type\":\"ABCD\",\"xref_value\":\"1.1.1.205\",\"xref\":\"1.10.238.260:FF:000001\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffce40a0-b890-5e5f-b455\",\"xref_type\":\"eggNOG\",\"xref_value\":\"tvo:TVG1340828\",\"xref\":\"tvo:TVG1340828\"},\"nullCount\":{\"entity_id\":0,\"xref_type\":0,\"xref_value\":0,\"xref\":0}}"},{"path":"part-00005-5b226a11-c46b-438d-b968-70bcca53330e-c000.snappy.parquet","partitionValues":{},"size":175090,"modificationTime":1766549221499,"dataChange":false,"stats":"{\"numRecords\":22528,\"minValues\":{\"entity_id\":\"cdm_prot_00134cb8-e497-5d40-b662\",\"xref_type\":\"AlphaFoldDB\",\"xref_value\":\"1.10.1060.10\",\"xref\":\"1.10.1060.10:FF:000031\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ff5f637d-e74b-5494-9ed8\",\"xref_type\":\"iPTMnet\",\"xref_value\":\"tvo:TVG1107082\",\"xref\":\"tvo:TVG1107082\"},\"nullCount\":{\"entity_id\":0,\"xref_type\":0,\"xref_value\":0,\"xref\":0}}"},{"path":"part-00007-1af16a96-34ca-46e6-add8-cad8393ecb39-c000.snappy.parquet","partitionValues":{},"size":179290,"modificationTime":1766549229768,"dataChange":false,"stats":"{\"numRecords\":21127,\"minValues\":{\"entity_id\":\"cdm_prot_001ba1b3-667c-5321-9298\",\"xref_type\":\"AlphaFoldDB\",\"xref_value\":\"1.10.10.10\",\"xref\":\"1.10.275.10:FF:000012\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ff86ebef-56b7-5cf7-bf3e\",\"xref_type\":\"eggNOG\",\"xref_value\":\"vdi:Vdis_0543\",\"xref\":\"vdi:Vdis_0543\"},\"nullCount\":{\"entity_id\":0,\"xref_type\":0,\"xref_value\":0,\"xref\":0}}"},{"path":"part-00006-1d2d1fa4-982f-439f-b95c-d2e33d9f2725-c000.snappy.parquet","partitionValues":{},"size":203194,"modificationTime":1766549229787,"dataChange":false,"stats":"{\"numRecords\":21504,\"minValues\":{\"entity_id\":\"cdm_prot_0102d35c-20a5-5f8d-8175\",\"xref_type\":\"AlphaFoldDB\",\"xref_value\":\"1.10.150.20\",\"xref\":\"1.10.287.370:FF:000013\"},\"maxValues\":{\"entity_id\":\"cdm_prot_fffe9d9f-5871-5041-83af\",\"xref_type\":\"iPTMnet\",\"xref_value\":\"tvo:TVG1514630\",\"xref\":\"tvo:TVG1514630\"},\"nullCount\":{\"entity_id\":0,\"xref_type\":0,\"xref_value\":0,\"xref\":0}}"},{"path":"part-00003-03c3d6f3-c92e-43db-9238-faffadb4184f-c000.snappy.parquet","partitionValues":{},"size":229024,"modificationTime":1766549221440,"dataChange":false,"stats":"{\"numRecords\":21504,\"minValues\":{\"entity_id\":\"cdm_prot_004f59e4-6b11-58ae-a6ff\",\"xref_type\":\"AlphaFoldDB\",\"xref_value\":\"1.1.1.-\",\"xref\":\"1.10.150.120:FF:000003\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ff2dba94-2092-57d0-9041\",\"xref_type\":\"iPTMnet\",\"xref_value\":\"tvo:TVG1185779\",\"xref\":\"tvo:TVG1185779\"},\"nullCount\":{\"entity_id\":0,\"xref_type\":0,\"xref_value\":0,\"xref\":0}}"},{"path":"part-00001-5c0469f6-368d-4478-aa79-cb054e1ae426-c000.snappy.parquet","partitionValues":{},"size":207444,"modificationTime":1766549221439,"dataChange":false,"stats":"{\"numRecords\":21504,\"minValues\":{\"entity_id\":\"cdm_prot_000e0a71-14e8-5d5e-8144\",\"xref_type\":\"AlphaFoldDB\",\"xref_value\":\"1.1.1.122\",\"xref\":\"1.10.390.10:FF:000006\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffed2e1d-fe14-5556-b067\",\"xref_type\":\"eggNOG\",\"xref_value\":\"tvo:TVG1387115\",\"xref\":\"tvo:TVG1387115\"},\"nullCount\":{\"entity_id\":0,\"xref_type\":0,\"xref_value\":0,\"xref\":0}}"},{"path":"part-00006-9c0db48f-a6f0-4bc0-974d-b4b196673337-c000.snappy.parquet","partitionValues":{},"size":199565,"modificationTime":1766549221433,"dataChange":false,"stats":"{\"numRecords\":21504,\"minValues\":{\"entity_id\":\"cdm_prot_003df674-48d1-588a-b194\",\"xref_type\":\"AlphaFoldDB\",\"xref_value\":\"1.1.1.261\",\"xref\":\"1.10.10.410:FF:000001\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffeae0f1-9785-5eaf-ad23\",\"xref_type\":\"iPTMnet\",\"xref_value\":\"tvo:TVG1288826\",\"xref\":\"tvo:TVG1288826\"},\"nullCount\":{\"entity_id\":0,\"xref_type\":0,\"xref_value\":0,\"xref\":0}}"},{"path":"part-00005-f82500ca-9132-449d-a9ac-9851bee3578c-c000.snappy.parquet","partitionValues":{},"size":205533,"modificationTime":1766549229782,"dataChange":false,"stats":"{\"numRecords\":21504,\"minValues\":{\"entity_id\":\"cdm_prot_00957fe6-8bfd-5ecd-9b08\",\"xref_type\":\"AlphaFoldDB\",\"xref_value\":\"1.1.1.169\",\"xref\":\"1.10.10.10:FF:000214\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffdecc1a-6ee9-59b2-9c49\",\"xref_type\":\"eggNOG\",\"xref_value\":\"tvo:TVG1118724\",\"xref\":\"tvo:TVG1118724\"},\"nullCount\":{\"entity_id\":0,\"xref_type\":0,\"xref_value\":0,\"xref\":0}}"},{"path":"part-00001-1fc7356c-e1b3-4a0b-88c6-0bcdaf52eea6-c000.snappy.parquet","partitionValues":{},"size":195277,"modificationTime":1766549229793,"dataChange":false,"stats":"{\"numRecords\":21504,\"minValues\":{\"entity_id\":\"cdm_prot_00df9ef5-d9b4-5654-9d2a\",\"xref_type\":\"AlphaFoldDB\",\"xref_value\":\"1.1.1.34\",\"xref\":\"1.10.12.10:FF:000001\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffa14ace-ea75-5cc6-86f2\",\"xref_type\":\"eggNOG\",\"xref_value\":\"tvo:TVG1430123\",\"xref\":\"tvo:TVG1430123\"},\"nullCount\":{\"entity_id\":0,\"xref_type\":0,\"xref_value\":0,\"xref\":0}}"}]} diff --git a/tests/data/uniprot_archaea/uniprot_db/cross_references/_delta_log/00000000000000000002.json b/tests/data/uniprot_archaea/uniprot_db/cross_references/_delta_log/00000000000000000002.json new file mode 100644 index 0000000..5b6169f --- /dev/null +++ b/tests/data/uniprot_archaea/uniprot_db/cross_references/_delta_log/00000000000000000002.json @@ -0,0 +1,9 @@ +{"commitInfo":{"timestamp":1766549229797,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":1,"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{"numFiles":"8","numOutputRows":"170631","numOutputBytes":"1619795"},"engineInfo":"Apache-Spark/4.0.1 Delta-Lake/4.0.0","txnId":"123abe92-542f-4fb6-8f44-1ebec6b09b06"}} +{"add":{"path":"part-00000-bee3e108-f720-48d4-89d3-c75a8aa4dfaa-c000.snappy.parquet","partitionValues":{},"size":197409,"modificationTime":1766549229782,"dataChange":true,"stats":"{\"numRecords\":20480,\"minValues\":{\"entity_id\":\"cdm_prot_004539b1-91ae-513e-bef9\",\"xref_type\":\"AlphaFoldDB\",\"xref_value\":\"1.1.1.23\",\"xref\":\"1.10.1060.10:FF:000026\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ff289ac2-6f01-59a2-8290\",\"xref_type\":\"iPTMnet\",\"xref_value\":\"tvo:TVG1357926\",\"xref\":\"tvo:TVG1357926\"},\"nullCount\":{\"entity_id\":0,\"xref_type\":0,\"xref_value\":0,\"xref\":0}}"}} +{"add":{"path":"part-00001-1fc7356c-e1b3-4a0b-88c6-0bcdaf52eea6-c000.snappy.parquet","partitionValues":{},"size":195277,"modificationTime":1766549229793,"dataChange":true,"stats":"{\"numRecords\":21504,\"minValues\":{\"entity_id\":\"cdm_prot_00df9ef5-d9b4-5654-9d2a\",\"xref_type\":\"AlphaFoldDB\",\"xref_value\":\"1.1.1.34\",\"xref\":\"1.10.12.10:FF:000001\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffa14ace-ea75-5cc6-86f2\",\"xref_type\":\"eggNOG\",\"xref_value\":\"tvo:TVG1430123\",\"xref\":\"tvo:TVG1430123\"},\"nullCount\":{\"entity_id\":0,\"xref_type\":0,\"xref_value\":0,\"xref\":0}}"}} +{"add":{"path":"part-00002-4a123736-c6f3-4be0-a4ac-ebae82ba6a92-c000.snappy.parquet","partitionValues":{},"size":201131,"modificationTime":1766549229786,"dataChange":true,"stats":"{\"numRecords\":21504,\"minValues\":{\"entity_id\":\"cdm_prot_00546ee8-4f73-5c85-8b07\",\"xref_type\":\"ABCD\",\"xref_value\":\"1.1.1.205\",\"xref\":\"1.10.238.260:FF:000001\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffce40a0-b890-5e5f-b455\",\"xref_type\":\"eggNOG\",\"xref_value\":\"tvo:TVG1340828\",\"xref\":\"tvo:TVG1340828\"},\"nullCount\":{\"entity_id\":0,\"xref_type\":0,\"xref_value\":0,\"xref\":0}}"}} +{"add":{"path":"part-00003-8df64f03-5154-4792-9f94-671fffad6fe6-c000.snappy.parquet","partitionValues":{},"size":221620,"modificationTime":1766549229794,"dataChange":true,"stats":"{\"numRecords\":21504,\"minValues\":{\"entity_id\":\"cdm_prot_000c7e85-7dff-527d-b6db\",\"xref_type\":\"AlphaFoldDB\",\"xref_value\":\"1.1.1.299\",\"xref\":\"1.10.10.10:FF:000630\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffb46e82-abfa-5122-818f\",\"xref_type\":\"iPTMnet\",\"xref_value\":\"tvo:TVG1279013\",\"xref\":\"tvo:TVG1279013\"},\"nullCount\":{\"entity_id\":0,\"xref_type\":0,\"xref_value\":0,\"xref\":0}}"}} +{"add":{"path":"part-00004-3aba1d16-85c4-4a3b-a44f-edb66cb87c03-c000.snappy.parquet","partitionValues":{},"size":216341,"modificationTime":1766549229789,"dataChange":true,"stats":"{\"numRecords\":21504,\"minValues\":{\"entity_id\":\"cdm_prot_007c34e9-4de0-5736-985f\",\"xref_type\":\"AlphaFoldDB\",\"xref_value\":\"1.10.10.10\",\"xref\":\"1.10.1020.10:FF:000003\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffc892c3-0651-5e3c-998a\",\"xref_type\":\"eggNOG\",\"xref_value\":\"tvo:TVG1248551\",\"xref\":\"tvo:TVG1248551\"},\"nullCount\":{\"entity_id\":0,\"xref_type\":0,\"xref_value\":0,\"xref\":0}}"}} +{"add":{"path":"part-00005-f82500ca-9132-449d-a9ac-9851bee3578c-c000.snappy.parquet","partitionValues":{},"size":205533,"modificationTime":1766549229782,"dataChange":true,"stats":"{\"numRecords\":21504,\"minValues\":{\"entity_id\":\"cdm_prot_00957fe6-8bfd-5ecd-9b08\",\"xref_type\":\"AlphaFoldDB\",\"xref_value\":\"1.1.1.169\",\"xref\":\"1.10.10.10:FF:000214\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffdecc1a-6ee9-59b2-9c49\",\"xref_type\":\"eggNOG\",\"xref_value\":\"tvo:TVG1118724\",\"xref\":\"tvo:TVG1118724\"},\"nullCount\":{\"entity_id\":0,\"xref_type\":0,\"xref_value\":0,\"xref\":0}}"}} +{"add":{"path":"part-00006-1d2d1fa4-982f-439f-b95c-d2e33d9f2725-c000.snappy.parquet","partitionValues":{},"size":203194,"modificationTime":1766549229787,"dataChange":true,"stats":"{\"numRecords\":21504,\"minValues\":{\"entity_id\":\"cdm_prot_0102d35c-20a5-5f8d-8175\",\"xref_type\":\"AlphaFoldDB\",\"xref_value\":\"1.10.150.20\",\"xref\":\"1.10.287.370:FF:000013\"},\"maxValues\":{\"entity_id\":\"cdm_prot_fffe9d9f-5871-5041-83af\",\"xref_type\":\"iPTMnet\",\"xref_value\":\"tvo:TVG1514630\",\"xref\":\"tvo:TVG1514630\"},\"nullCount\":{\"entity_id\":0,\"xref_type\":0,\"xref_value\":0,\"xref\":0}}"}} +{"add":{"path":"part-00007-1af16a96-34ca-46e6-add8-cad8393ecb39-c000.snappy.parquet","partitionValues":{},"size":179290,"modificationTime":1766549229768,"dataChange":true,"stats":"{\"numRecords\":21127,\"minValues\":{\"entity_id\":\"cdm_prot_001ba1b3-667c-5321-9298\",\"xref_type\":\"AlphaFoldDB\",\"xref_value\":\"1.10.10.10\",\"xref\":\"1.10.275.10:FF:000012\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ff86ebef-56b7-5cf7-bf3e\",\"xref_type\":\"eggNOG\",\"xref_value\":\"vdi:Vdis_0543\",\"xref\":\"vdi:Vdis_0543\"},\"nullCount\":{\"entity_id\":0,\"xref_type\":0,\"xref_value\":0,\"xref\":0}}"}} diff --git a/tests/data/uniprot_archaea/uniprot_db/cross_references/_delta_log/00000000000000000003.crc b/tests/data/uniprot_archaea/uniprot_db/cross_references/_delta_log/00000000000000000003.crc new file mode 100644 index 0000000..5f73808 --- /dev/null +++ b/tests/data/uniprot_archaea/uniprot_db/cross_references/_delta_log/00000000000000000003.crc @@ -0,0 +1 @@ +{"txnId":"7f55d115-b189-4d98-8cce-09ce06e8eca0","tableSizeBytes":4634178,"numFiles":24,"numMetadata":1,"numProtocol":1,"setTransactions":[],"domainMetadata":[],"metadata":{"id":"0a3243aa-0303-4d93-bb82-a850749794af","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"entity_id\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"xref_type\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"xref_value\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"xref\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{},"createdTime":1766549214447},"protocol":{"minReaderVersion":1,"minWriterVersion":2},"allFiles":[{"path":"part-00004-fff20f27-90e3-422c-ac10-fa544591cc46-c000.snappy.parquet","partitionValues":{},"size":165085,"modificationTime":1766549236070,"dataChange":false,"stats":"{\"numRecords\":20480,\"minValues\":{\"entity_id\":\"cdm_prot_005318fc-0c32-5b4a-b952\",\"xref_type\":\"AlphaFoldDB\",\"xref_value\":\"1.10.10.1950\",\"xref\":\"1.10.10.60:FF:000024\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffbd3905-3836-5893-bd75\",\"xref_type\":\"eggNOG\",\"xref_value\":\"tvo:TVG1510824\",\"xref\":\"tvo:TVG1510824\"},\"nullCount\":{\"entity_id\":0,\"xref_type\":0,\"xref_value\":0,\"xref\":0}}"},{"path":"part-00002-cc877dc2-5e4e-44db-98eb-701878729795-c000.snappy.parquet","partitionValues":{},"size":150031,"modificationTime":1766549236070,"dataChange":false,"stats":"{\"numRecords\":21504,\"minValues\":{\"entity_id\":\"cdm_prot_0061faab-bd88-562c-8a31\",\"xref_type\":\"AlphaFoldDB\",\"xref_value\":\"1.10.1200.240\",\"xref\":\"1.10.1200.240:FF:000003\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ff216b32-af2a-545f-8e61\",\"xref_type\":\"iPTMnet\",\"xref_value\":\"tvo:TVG0437979\",\"xref\":\"tvo:TVG0437979\"},\"nullCount\":{\"entity_id\":0,\"xref_type\":0,\"xref_value\":0,\"xref\":0}}"},{"path":"part-00000-bee3e108-f720-48d4-89d3-c75a8aa4dfaa-c000.snappy.parquet","partitionValues":{},"size":197409,"modificationTime":1766549229782,"dataChange":false,"stats":"{\"numRecords\":20480,\"minValues\":{\"entity_id\":\"cdm_prot_004539b1-91ae-513e-bef9\",\"xref_type\":\"AlphaFoldDB\",\"xref_value\":\"1.1.1.23\",\"xref\":\"1.10.1060.10:FF:000026\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ff289ac2-6f01-59a2-8290\",\"xref_type\":\"iPTMnet\",\"xref_value\":\"tvo:TVG1357926\",\"xref\":\"tvo:TVG1357926\"},\"nullCount\":{\"entity_id\":0,\"xref_type\":0,\"xref_value\":0,\"xref\":0}}"},{"path":"part-00004-b891f437-1eb9-44d4-b41b-00d9b1413c72-c000.snappy.parquet","partitionValues":{},"size":193600,"modificationTime":1766549221485,"dataChange":false,"stats":"{\"numRecords\":21504,\"minValues\":{\"entity_id\":\"cdm_prot_023b8b27-1f32-5069-954b\",\"xref_type\":\"AlphaFoldDB\",\"xref_value\":\"1.1.1.3\",\"xref\":\"1.10.132.60:FF:000013\"},\"maxValues\":{\"entity_id\":\"cdm_prot_fe5e21c9-fae5-5701-b99d\",\"xref_type\":\"iPTMnet\",\"xref_value\":\"tvo:TVG1298537\",\"xref\":\"tvo:TVG1298537\"},\"nullCount\":{\"entity_id\":0,\"xref_type\":0,\"xref_value\":0,\"xref\":0}}"},{"path":"part-00007-74b7bec4-aaab-4bd6-8078-75cfcf5cf393-c000.snappy.parquet","partitionValues":{},"size":204770,"modificationTime":1766549221447,"dataChange":false,"stats":"{\"numRecords\":22276,\"minValues\":{\"entity_id\":\"cdm_prot_00625fdf-0992-5594-a800\",\"xref_type\":\"AlphaFoldDB\",\"xref_value\":\"1.-.-.-\",\"xref\":\"1.10.3860.10:FF:000001\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffcf5789-8bfa-5ca0-914e\",\"xref_type\":\"eggNOG\",\"xref_value\":\"vmo:VMUT_1235\",\"xref\":\"vmo:VMUT_1235\"},\"nullCount\":{\"entity_id\":0,\"xref_type\":0,\"xref_value\":0,\"xref\":0}}"},{"path":"part-00005-ba1ba60b-f4bf-4cf1-a328-dc5950f4925d-c000.snappy.parquet","partitionValues":{},"size":161631,"modificationTime":1766549236098,"dataChange":false,"stats":"{\"numRecords\":21504,\"minValues\":{\"entity_id\":\"cdm_prot_0013ba57-fc41-51cc-bc95\",\"xref_type\":\"AlphaFoldDB\",\"xref_value\":\"1.10.10.10\",\"xref\":\"1.10.10.10:FF:000449\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffe27896-0bd4-50b4-9cff\",\"xref_type\":\"iPTMnet\",\"xref_value\":\"tvo:TVG1311805\",\"xref\":\"tvo:TVG1311805\"},\"nullCount\":{\"entity_id\":0,\"xref_type\":0,\"xref_value\":0,\"xref\":0}}"},{"path":"part-00000-262596ac-a964-4afa-8547-f24c3f0a871a-c000.snappy.parquet","partitionValues":{},"size":202528,"modificationTime":1766549221447,"dataChange":false,"stats":"{\"numRecords\":21504,\"minValues\":{\"entity_id\":\"cdm_prot_00036120-577c-5bbc-ab91\",\"xref_type\":\"ABCD\",\"xref_value\":\"1.1.1.-\",\"xref\":\"1.10.1140.10:FF:000002\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffa4eefe-b062-5f26-8f77\",\"xref_type\":\"iPTMnet\",\"xref_value\":\"tvo:TVG1115434\",\"xref\":\"tvo:TVG1115434\"},\"nullCount\":{\"entity_id\":0,\"xref_type\":0,\"xref_value\":0,\"xref\":0}}"},{"path":"part-00007-1af16a96-34ca-46e6-add8-cad8393ecb39-c000.snappy.parquet","partitionValues":{},"size":179290,"modificationTime":1766549229768,"dataChange":false,"stats":"{\"numRecords\":21127,\"minValues\":{\"entity_id\":\"cdm_prot_001ba1b3-667c-5321-9298\",\"xref_type\":\"AlphaFoldDB\",\"xref_value\":\"1.10.10.10\",\"xref\":\"1.10.275.10:FF:000012\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ff86ebef-56b7-5cf7-bf3e\",\"xref_type\":\"eggNOG\",\"xref_value\":\"vdi:Vdis_0543\",\"xref\":\"vdi:Vdis_0543\"},\"nullCount\":{\"entity_id\":0,\"xref_type\":0,\"xref_value\":0,\"xref\":0}}"},{"path":"part-00006-e2ec48f8-d8ef-4fe7-8f28-987148693d9e-c000.snappy.parquet","partitionValues":{},"size":158382,"modificationTime":1766549236066,"dataChange":false,"stats":"{\"numRecords\":21504,\"minValues\":{\"entity_id\":\"cdm_prot_00219a7c-c28c-5aaf-928e\",\"xref_type\":\"AlphaFoldDB\",\"xref_value\":\"1.10.10.10\",\"xref\":\"1.10.455.10:FF:000011\"},\"maxValues\":{\"entity_id\":\"cdm_prot_fe67f279-f090-5c4e-b38d\",\"xref_type\":\"iPTMnet\",\"xref_value\":\"tvo:TVG1338731\",\"xref\":\"tvo:TVG1338731\"},\"nullCount\":{\"entity_id\":0,\"xref_type\":0,\"xref_value\":0,\"xref\":0}}"},{"path":"part-00001-cb441f11-8637-4e34-af70-446c9277c22f-c000.snappy.parquet","partitionValues":{},"size":181482,"modificationTime":1766549236060,"dataChange":false,"stats":"{\"numRecords\":21504,\"minValues\":{\"entity_id\":\"cdm_prot_00365069-eeac-5442-8f11\",\"xref_type\":\"AlphaFoldDB\",\"xref_value\":\"1.10.10.10\",\"xref\":\"1.10.10.1410:FF:000002\"},\"maxValues\":{\"entity_id\":\"cdm_prot_fe82106c-a4bc-5a9c-8159\",\"xref_type\":\"iPTMnet\",\"xref_value\":\"tvo:TVG1279046\",\"xref\":\"tvo:TVG1279046\"},\"nullCount\":{\"entity_id\":0,\"xref_type\":0,\"xref_value\":0,\"xref\":0}}"},{"path":"part-00006-9c0db48f-a6f0-4bc0-974d-b4b196673337-c000.snappy.parquet","partitionValues":{},"size":199565,"modificationTime":1766549221433,"dataChange":false,"stats":"{\"numRecords\":21504,\"minValues\":{\"entity_id\":\"cdm_prot_003df674-48d1-588a-b194\",\"xref_type\":\"AlphaFoldDB\",\"xref_value\":\"1.1.1.261\",\"xref\":\"1.10.10.410:FF:000001\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffeae0f1-9785-5eaf-ad23\",\"xref_type\":\"iPTMnet\",\"xref_value\":\"tvo:TVG1288826\",\"xref\":\"tvo:TVG1288826\"},\"nullCount\":{\"entity_id\":0,\"xref_type\":0,\"xref_value\":0,\"xref\":0}}"},{"path":"part-00005-f82500ca-9132-449d-a9ac-9851bee3578c-c000.snappy.parquet","partitionValues":{},"size":205533,"modificationTime":1766549229782,"dataChange":false,"stats":"{\"numRecords\":21504,\"minValues\":{\"entity_id\":\"cdm_prot_00957fe6-8bfd-5ecd-9b08\",\"xref_type\":\"AlphaFoldDB\",\"xref_value\":\"1.1.1.169\",\"xref\":\"1.10.10.10:FF:000214\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffdecc1a-6ee9-59b2-9c49\",\"xref_type\":\"eggNOG\",\"xref_value\":\"tvo:TVG1118724\",\"xref\":\"tvo:TVG1118724\"},\"nullCount\":{\"entity_id\":0,\"xref_type\":0,\"xref_value\":0,\"xref\":0}}"},{"path":"part-00007-c778ca68-9d4c-442c-b8cf-0f9fc8b451fc-c000.snappy.parquet","partitionValues":{},"size":206362,"modificationTime":1766549236079,"dataChange":false,"stats":"{\"numRecords\":21122,\"minValues\":{\"entity_id\":\"cdm_prot_00019029-8c98-5477-9dd1\",\"xref_type\":\"AlphaFoldDB\",\"xref_value\":\"1.1.1.95\",\"xref\":\"1.10.287.990:FF:000001\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ff606768-2e8f-543d-abc5\",\"xref_type\":\"iPTMnet\",\"xref_value\":\"tvo:TVG1046103\",\"xref\":\"tvo:TVG1046103\"},\"nullCount\":{\"entity_id\":0,\"xref_type\":0,\"xref_value\":0,\"xref\":0}}"},{"path":"part-00002-17744fc6-04d1-4419-a97e-18ed02b79a6e-c000.snappy.parquet","partitionValues":{},"size":225174,"modificationTime":1766549221484,"dataChange":false,"stats":"{\"numRecords\":22528,\"minValues\":{\"entity_id\":\"cdm_prot_00030730-9147-5351-b68c\",\"xref_type\":\"AlphaFoldDB\",\"xref_value\":\"1.10.10.10\",\"xref\":\"1.10.10.10:FF:000502\"},\"maxValues\":{\"entity_id\":\"cdm_prot_fde84f0d-6520-56ad-b61f\",\"xref_type\":\"iPTMnet\",\"xref_value\":\"tvo:TVG1580555\",\"xref\":\"tvo:TVG1580555\"},\"nullCount\":{\"entity_id\":0,\"xref_type\":0,\"xref_value\":0,\"xref\":0}}"},{"path":"part-00000-7973428f-e648-454c-a606-b64ffc73f3db-c000.snappy.parquet","partitionValues":{},"size":181264,"modificationTime":1766549236090,"dataChange":false,"stats":"{\"numRecords\":20480,\"minValues\":{\"entity_id\":\"cdm_prot_00d4680f-fe29-598a-9385\",\"xref_type\":\"ABCD\",\"xref_value\":\"1.1.1.302\",\"xref\":\"1.10.10.10:FF:000083\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffcedd0e-face-5ec6-8e0b\",\"xref_type\":\"eggNOG\",\"xref_value\":\"tvo:TVG1570739\",\"xref\":\"tvo:TVG1570739\"},\"nullCount\":{\"entity_id\":0,\"xref_type\":0,\"xref_value\":0,\"xref\":0}}"},{"path":"part-00004-3aba1d16-85c4-4a3b-a44f-edb66cb87c03-c000.snappy.parquet","partitionValues":{},"size":216341,"modificationTime":1766549229789,"dataChange":false,"stats":"{\"numRecords\":21504,\"minValues\":{\"entity_id\":\"cdm_prot_007c34e9-4de0-5736-985f\",\"xref_type\":\"AlphaFoldDB\",\"xref_value\":\"1.10.10.10\",\"xref\":\"1.10.1020.10:FF:000003\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffc892c3-0651-5e3c-998a\",\"xref_type\":\"eggNOG\",\"xref_value\":\"tvo:TVG1248551\",\"xref\":\"tvo:TVG1248551\"},\"nullCount\":{\"entity_id\":0,\"xref_type\":0,\"xref_value\":0,\"xref\":0}}"},{"path":"part-00003-8df64f03-5154-4792-9f94-671fffad6fe6-c000.snappy.parquet","partitionValues":{},"size":221620,"modificationTime":1766549229794,"dataChange":false,"stats":"{\"numRecords\":21504,\"minValues\":{\"entity_id\":\"cdm_prot_000c7e85-7dff-527d-b6db\",\"xref_type\":\"AlphaFoldDB\",\"xref_value\":\"1.1.1.299\",\"xref\":\"1.10.10.10:FF:000630\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffb46e82-abfa-5122-818f\",\"xref_type\":\"iPTMnet\",\"xref_value\":\"tvo:TVG1279013\",\"xref\":\"tvo:TVG1279013\"},\"nullCount\":{\"entity_id\":0,\"xref_type\":0,\"xref_value\":0,\"xref\":0}}"},{"path":"part-00002-4a123736-c6f3-4be0-a4ac-ebae82ba6a92-c000.snappy.parquet","partitionValues":{},"size":201131,"modificationTime":1766549229786,"dataChange":false,"stats":"{\"numRecords\":21504,\"minValues\":{\"entity_id\":\"cdm_prot_00546ee8-4f73-5c85-8b07\",\"xref_type\":\"ABCD\",\"xref_value\":\"1.1.1.205\",\"xref\":\"1.10.238.260:FF:000001\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffce40a0-b890-5e5f-b455\",\"xref_type\":\"eggNOG\",\"xref_value\":\"tvo:TVG1340828\",\"xref\":\"tvo:TVG1340828\"},\"nullCount\":{\"entity_id\":0,\"xref_type\":0,\"xref_value\":0,\"xref\":0}}"},{"path":"part-00005-5b226a11-c46b-438d-b968-70bcca53330e-c000.snappy.parquet","partitionValues":{},"size":175090,"modificationTime":1766549221499,"dataChange":false,"stats":"{\"numRecords\":22528,\"minValues\":{\"entity_id\":\"cdm_prot_00134cb8-e497-5d40-b662\",\"xref_type\":\"AlphaFoldDB\",\"xref_value\":\"1.10.1060.10\",\"xref\":\"1.10.1060.10:FF:000031\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ff5f637d-e74b-5494-9ed8\",\"xref_type\":\"iPTMnet\",\"xref_value\":\"tvo:TVG1107082\",\"xref\":\"tvo:TVG1107082\"},\"nullCount\":{\"entity_id\":0,\"xref_type\":0,\"xref_value\":0,\"xref\":0}}"},{"path":"part-00006-1d2d1fa4-982f-439f-b95c-d2e33d9f2725-c000.snappy.parquet","partitionValues":{},"size":203194,"modificationTime":1766549229787,"dataChange":false,"stats":"{\"numRecords\":21504,\"minValues\":{\"entity_id\":\"cdm_prot_0102d35c-20a5-5f8d-8175\",\"xref_type\":\"AlphaFoldDB\",\"xref_value\":\"1.10.150.20\",\"xref\":\"1.10.287.370:FF:000013\"},\"maxValues\":{\"entity_id\":\"cdm_prot_fffe9d9f-5871-5041-83af\",\"xref_type\":\"iPTMnet\",\"xref_value\":\"tvo:TVG1514630\",\"xref\":\"tvo:TVG1514630\"},\"nullCount\":{\"entity_id\":0,\"xref_type\":0,\"xref_value\":0,\"xref\":0}}"},{"path":"part-00003-03c3d6f3-c92e-43db-9238-faffadb4184f-c000.snappy.parquet","partitionValues":{},"size":229024,"modificationTime":1766549221440,"dataChange":false,"stats":"{\"numRecords\":21504,\"minValues\":{\"entity_id\":\"cdm_prot_004f59e4-6b11-58ae-a6ff\",\"xref_type\":\"AlphaFoldDB\",\"xref_value\":\"1.1.1.-\",\"xref\":\"1.10.150.120:FF:000003\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ff2dba94-2092-57d0-9041\",\"xref_type\":\"iPTMnet\",\"xref_value\":\"tvo:TVG1185779\",\"xref\":\"tvo:TVG1185779\"},\"nullCount\":{\"entity_id\":0,\"xref_type\":0,\"xref_value\":0,\"xref\":0}}"},{"path":"part-00001-5c0469f6-368d-4478-aa79-cb054e1ae426-c000.snappy.parquet","partitionValues":{},"size":207444,"modificationTime":1766549221439,"dataChange":false,"stats":"{\"numRecords\":21504,\"minValues\":{\"entity_id\":\"cdm_prot_000e0a71-14e8-5d5e-8144\",\"xref_type\":\"AlphaFoldDB\",\"xref_value\":\"1.1.1.122\",\"xref\":\"1.10.390.10:FF:000006\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffed2e1d-fe14-5556-b067\",\"xref_type\":\"eggNOG\",\"xref_value\":\"tvo:TVG1387115\",\"xref\":\"tvo:TVG1387115\"},\"nullCount\":{\"entity_id\":0,\"xref_type\":0,\"xref_value\":0,\"xref\":0}}"},{"path":"part-00003-2aebe69d-245d-4cad-aac9-28aa219aa4a7-c000.snappy.parquet","partitionValues":{},"size":172951,"modificationTime":1766549236077,"dataChange":false,"stats":"{\"numRecords\":21504,\"minValues\":{\"entity_id\":\"cdm_prot_00206b22-8b92-5222-ab7c\",\"xref_type\":\"AlphaFoldDB\",\"xref_value\":\"1.10.10.1690\",\"xref\":\"1.10.10.460:FF:000001\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffede36e-7e42-5dcc-abde\",\"xref_type\":\"eggNOG\",\"xref_value\":\"tvo:TVG0437556\",\"xref\":\"tvo:TVG0437556\"},\"nullCount\":{\"entity_id\":0,\"xref_type\":0,\"xref_value\":0,\"xref\":0}}"},{"path":"part-00001-1fc7356c-e1b3-4a0b-88c6-0bcdaf52eea6-c000.snappy.parquet","partitionValues":{},"size":195277,"modificationTime":1766549229793,"dataChange":false,"stats":"{\"numRecords\":21504,\"minValues\":{\"entity_id\":\"cdm_prot_00df9ef5-d9b4-5654-9d2a\",\"xref_type\":\"AlphaFoldDB\",\"xref_value\":\"1.1.1.34\",\"xref\":\"1.10.12.10:FF:000001\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffa14ace-ea75-5cc6-86f2\",\"xref_type\":\"eggNOG\",\"xref_value\":\"tvo:TVG1430123\",\"xref\":\"tvo:TVG1430123\"},\"nullCount\":{\"entity_id\":0,\"xref_type\":0,\"xref_value\":0,\"xref\":0}}"}]} diff --git a/tests/data/uniprot_archaea/uniprot_db/cross_references/_delta_log/00000000000000000003.json b/tests/data/uniprot_archaea/uniprot_db/cross_references/_delta_log/00000000000000000003.json new file mode 100644 index 0000000..fc7266e --- /dev/null +++ b/tests/data/uniprot_archaea/uniprot_db/cross_references/_delta_log/00000000000000000003.json @@ -0,0 +1,9 @@ +{"commitInfo":{"timestamp":1766549236100,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":2,"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{"numFiles":"8","numOutputRows":"169602","numOutputBytes":"1377188"},"engineInfo":"Apache-Spark/4.0.1 Delta-Lake/4.0.0","txnId":"7f55d115-b189-4d98-8cce-09ce06e8eca0"}} +{"add":{"path":"part-00000-7973428f-e648-454c-a606-b64ffc73f3db-c000.snappy.parquet","partitionValues":{},"size":181264,"modificationTime":1766549236090,"dataChange":true,"stats":"{\"numRecords\":20480,\"minValues\":{\"entity_id\":\"cdm_prot_00d4680f-fe29-598a-9385\",\"xref_type\":\"ABCD\",\"xref_value\":\"1.1.1.302\",\"xref\":\"1.10.10.10:FF:000083\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffcedd0e-face-5ec6-8e0b\",\"xref_type\":\"eggNOG\",\"xref_value\":\"tvo:TVG1570739\",\"xref\":\"tvo:TVG1570739\"},\"nullCount\":{\"entity_id\":0,\"xref_type\":0,\"xref_value\":0,\"xref\":0}}"}} +{"add":{"path":"part-00001-cb441f11-8637-4e34-af70-446c9277c22f-c000.snappy.parquet","partitionValues":{},"size":181482,"modificationTime":1766549236060,"dataChange":true,"stats":"{\"numRecords\":21504,\"minValues\":{\"entity_id\":\"cdm_prot_00365069-eeac-5442-8f11\",\"xref_type\":\"AlphaFoldDB\",\"xref_value\":\"1.10.10.10\",\"xref\":\"1.10.10.1410:FF:000002\"},\"maxValues\":{\"entity_id\":\"cdm_prot_fe82106c-a4bc-5a9c-8159\",\"xref_type\":\"iPTMnet\",\"xref_value\":\"tvo:TVG1279046\",\"xref\":\"tvo:TVG1279046\"},\"nullCount\":{\"entity_id\":0,\"xref_type\":0,\"xref_value\":0,\"xref\":0}}"}} +{"add":{"path":"part-00002-cc877dc2-5e4e-44db-98eb-701878729795-c000.snappy.parquet","partitionValues":{},"size":150031,"modificationTime":1766549236070,"dataChange":true,"stats":"{\"numRecords\":21504,\"minValues\":{\"entity_id\":\"cdm_prot_0061faab-bd88-562c-8a31\",\"xref_type\":\"AlphaFoldDB\",\"xref_value\":\"1.10.1200.240\",\"xref\":\"1.10.1200.240:FF:000003\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ff216b32-af2a-545f-8e61\",\"xref_type\":\"iPTMnet\",\"xref_value\":\"tvo:TVG0437979\",\"xref\":\"tvo:TVG0437979\"},\"nullCount\":{\"entity_id\":0,\"xref_type\":0,\"xref_value\":0,\"xref\":0}}"}} +{"add":{"path":"part-00003-2aebe69d-245d-4cad-aac9-28aa219aa4a7-c000.snappy.parquet","partitionValues":{},"size":172951,"modificationTime":1766549236077,"dataChange":true,"stats":"{\"numRecords\":21504,\"minValues\":{\"entity_id\":\"cdm_prot_00206b22-8b92-5222-ab7c\",\"xref_type\":\"AlphaFoldDB\",\"xref_value\":\"1.10.10.1690\",\"xref\":\"1.10.10.460:FF:000001\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffede36e-7e42-5dcc-abde\",\"xref_type\":\"eggNOG\",\"xref_value\":\"tvo:TVG0437556\",\"xref\":\"tvo:TVG0437556\"},\"nullCount\":{\"entity_id\":0,\"xref_type\":0,\"xref_value\":0,\"xref\":0}}"}} +{"add":{"path":"part-00004-fff20f27-90e3-422c-ac10-fa544591cc46-c000.snappy.parquet","partitionValues":{},"size":165085,"modificationTime":1766549236070,"dataChange":true,"stats":"{\"numRecords\":20480,\"minValues\":{\"entity_id\":\"cdm_prot_005318fc-0c32-5b4a-b952\",\"xref_type\":\"AlphaFoldDB\",\"xref_value\":\"1.10.10.1950\",\"xref\":\"1.10.10.60:FF:000024\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffbd3905-3836-5893-bd75\",\"xref_type\":\"eggNOG\",\"xref_value\":\"tvo:TVG1510824\",\"xref\":\"tvo:TVG1510824\"},\"nullCount\":{\"entity_id\":0,\"xref_type\":0,\"xref_value\":0,\"xref\":0}}"}} +{"add":{"path":"part-00005-ba1ba60b-f4bf-4cf1-a328-dc5950f4925d-c000.snappy.parquet","partitionValues":{},"size":161631,"modificationTime":1766549236098,"dataChange":true,"stats":"{\"numRecords\":21504,\"minValues\":{\"entity_id\":\"cdm_prot_0013ba57-fc41-51cc-bc95\",\"xref_type\":\"AlphaFoldDB\",\"xref_value\":\"1.10.10.10\",\"xref\":\"1.10.10.10:FF:000449\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffe27896-0bd4-50b4-9cff\",\"xref_type\":\"iPTMnet\",\"xref_value\":\"tvo:TVG1311805\",\"xref\":\"tvo:TVG1311805\"},\"nullCount\":{\"entity_id\":0,\"xref_type\":0,\"xref_value\":0,\"xref\":0}}"}} +{"add":{"path":"part-00006-e2ec48f8-d8ef-4fe7-8f28-987148693d9e-c000.snappy.parquet","partitionValues":{},"size":158382,"modificationTime":1766549236066,"dataChange":true,"stats":"{\"numRecords\":21504,\"minValues\":{\"entity_id\":\"cdm_prot_00219a7c-c28c-5aaf-928e\",\"xref_type\":\"AlphaFoldDB\",\"xref_value\":\"1.10.10.10\",\"xref\":\"1.10.455.10:FF:000011\"},\"maxValues\":{\"entity_id\":\"cdm_prot_fe67f279-f090-5c4e-b38d\",\"xref_type\":\"iPTMnet\",\"xref_value\":\"tvo:TVG1338731\",\"xref\":\"tvo:TVG1338731\"},\"nullCount\":{\"entity_id\":0,\"xref_type\":0,\"xref_value\":0,\"xref\":0}}"}} +{"add":{"path":"part-00007-c778ca68-9d4c-442c-b8cf-0f9fc8b451fc-c000.snappy.parquet","partitionValues":{},"size":206362,"modificationTime":1766549236079,"dataChange":true,"stats":"{\"numRecords\":21122,\"minValues\":{\"entity_id\":\"cdm_prot_00019029-8c98-5477-9dd1\",\"xref_type\":\"AlphaFoldDB\",\"xref_value\":\"1.1.1.95\",\"xref\":\"1.10.287.990:FF:000001\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ff606768-2e8f-543d-abc5\",\"xref_type\":\"iPTMnet\",\"xref_value\":\"tvo:TVG1046103\",\"xref\":\"tvo:TVG1046103\"},\"nullCount\":{\"entity_id\":0,\"xref_type\":0,\"xref_value\":0,\"xref\":0}}"}} diff --git a/tests/data/uniprot_archaea/uniprot_db/cross_references/_delta_log/00000000000000000004.crc b/tests/data/uniprot_archaea/uniprot_db/cross_references/_delta_log/00000000000000000004.crc new file mode 100644 index 0000000..2ee4a12 --- /dev/null +++ b/tests/data/uniprot_archaea/uniprot_db/cross_references/_delta_log/00000000000000000004.crc @@ -0,0 +1 @@ +{"txnId":"4d485990-fc13-40fb-b4b9-089e4ce94b66","tableSizeBytes":6152005,"numFiles":32,"numMetadata":1,"numProtocol":1,"setTransactions":[],"domainMetadata":[],"metadata":{"id":"0a3243aa-0303-4d93-bb82-a850749794af","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"entity_id\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"xref_type\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"xref_value\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"xref\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{},"createdTime":1766549214447},"protocol":{"minReaderVersion":1,"minWriterVersion":2},"allFiles":[{"path":"part-00004-fff20f27-90e3-422c-ac10-fa544591cc46-c000.snappy.parquet","partitionValues":{},"size":165085,"modificationTime":1766549236070,"dataChange":false,"stats":"{\"numRecords\":20480,\"minValues\":{\"entity_id\":\"cdm_prot_005318fc-0c32-5b4a-b952\",\"xref_type\":\"AlphaFoldDB\",\"xref_value\":\"1.10.10.1950\",\"xref\":\"1.10.10.60:FF:000024\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffbd3905-3836-5893-bd75\",\"xref_type\":\"eggNOG\",\"xref_value\":\"tvo:TVG1510824\",\"xref\":\"tvo:TVG1510824\"},\"nullCount\":{\"entity_id\":0,\"xref_type\":0,\"xref_value\":0,\"xref\":0}}"},{"path":"part-00000-8efd2d36-ce8e-4199-9258-b010a08236aa-c000.snappy.parquet","partitionValues":{},"size":100804,"modificationTime":1766549241857,"dataChange":false,"stats":"{\"numRecords\":16384,\"minValues\":{\"entity_id\":\"cdm_prot_0003b7cb-bf13-5646-9978\",\"xref_type\":\"AlphaFoldDB\",\"xref_value\":\"1.10.10.10\",\"xref\":\"1.10.730.10:FF:000002\"},\"maxValues\":{\"entity_id\":\"cdm_prot_fdc49789-52de-5a6c-95cc\",\"xref_type\":\"eggNOG\",\"xref_value\":\"tvo:TVG1307912\",\"xref\":\"tvo:TVG1307912\"},\"nullCount\":{\"entity_id\":0,\"xref_type\":0,\"xref_value\":0,\"xref\":0}}"},{"path":"part-00002-cc877dc2-5e4e-44db-98eb-701878729795-c000.snappy.parquet","partitionValues":{},"size":150031,"modificationTime":1766549236070,"dataChange":false,"stats":"{\"numRecords\":21504,\"minValues\":{\"entity_id\":\"cdm_prot_0061faab-bd88-562c-8a31\",\"xref_type\":\"AlphaFoldDB\",\"xref_value\":\"1.10.1200.240\",\"xref\":\"1.10.1200.240:FF:000003\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ff216b32-af2a-545f-8e61\",\"xref_type\":\"iPTMnet\",\"xref_value\":\"tvo:TVG0437979\",\"xref\":\"tvo:TVG0437979\"},\"nullCount\":{\"entity_id\":0,\"xref_type\":0,\"xref_value\":0,\"xref\":0}}"},{"path":"part-00000-bee3e108-f720-48d4-89d3-c75a8aa4dfaa-c000.snappy.parquet","partitionValues":{},"size":197409,"modificationTime":1766549229782,"dataChange":false,"stats":"{\"numRecords\":20480,\"minValues\":{\"entity_id\":\"cdm_prot_004539b1-91ae-513e-bef9\",\"xref_type\":\"AlphaFoldDB\",\"xref_value\":\"1.1.1.23\",\"xref\":\"1.10.1060.10:FF:000026\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ff289ac2-6f01-59a2-8290\",\"xref_type\":\"iPTMnet\",\"xref_value\":\"tvo:TVG1357926\",\"xref\":\"tvo:TVG1357926\"},\"nullCount\":{\"entity_id\":0,\"xref_type\":0,\"xref_value\":0,\"xref\":0}}"},{"path":"part-00007-263935ae-f03a-4a0d-82eb-8239c2044ae4-c000.snappy.parquet","partitionValues":{},"size":259863,"modificationTime":1766549241825,"dataChange":false,"stats":"{\"numRecords\":16567,\"minValues\":{\"entity_id\":\"cdm_prot_0244c324-3bb8-5603-9862\",\"xref_type\":\"ABCD\",\"xref_value\":\"1.10.10.10\",\"xref\":\"1.10.10.10:FF:000189\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffd6bc2d-9fdc-5fb9-a121\",\"xref_type\":\"eggNOG\",\"xref_value\":\"tvo:TVG0885567\",\"xref\":\"tvo:TVG0885567\"},\"nullCount\":{\"entity_id\":0,\"xref_type\":0,\"xref_value\":0,\"xref\":0}}"},{"path":"part-00004-b891f437-1eb9-44d4-b41b-00d9b1413c72-c000.snappy.parquet","partitionValues":{},"size":193600,"modificationTime":1766549221485,"dataChange":false,"stats":"{\"numRecords\":21504,\"minValues\":{\"entity_id\":\"cdm_prot_023b8b27-1f32-5069-954b\",\"xref_type\":\"AlphaFoldDB\",\"xref_value\":\"1.1.1.3\",\"xref\":\"1.10.132.60:FF:000013\"},\"maxValues\":{\"entity_id\":\"cdm_prot_fe5e21c9-fae5-5701-b99d\",\"xref_type\":\"iPTMnet\",\"xref_value\":\"tvo:TVG1298537\",\"xref\":\"tvo:TVG1298537\"},\"nullCount\":{\"entity_id\":0,\"xref_type\":0,\"xref_value\":0,\"xref\":0}}"},{"path":"part-00007-74b7bec4-aaab-4bd6-8078-75cfcf5cf393-c000.snappy.parquet","partitionValues":{},"size":204770,"modificationTime":1766549221447,"dataChange":false,"stats":"{\"numRecords\":22276,\"minValues\":{\"entity_id\":\"cdm_prot_00625fdf-0992-5594-a800\",\"xref_type\":\"AlphaFoldDB\",\"xref_value\":\"1.-.-.-\",\"xref\":\"1.10.3860.10:FF:000001\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffcf5789-8bfa-5ca0-914e\",\"xref_type\":\"eggNOG\",\"xref_value\":\"vmo:VMUT_1235\",\"xref\":\"vmo:VMUT_1235\"},\"nullCount\":{\"entity_id\":0,\"xref_type\":0,\"xref_value\":0,\"xref\":0}}"},{"path":"part-00005-ba1ba60b-f4bf-4cf1-a328-dc5950f4925d-c000.snappy.parquet","partitionValues":{},"size":161631,"modificationTime":1766549236098,"dataChange":false,"stats":"{\"numRecords\":21504,\"minValues\":{\"entity_id\":\"cdm_prot_0013ba57-fc41-51cc-bc95\",\"xref_type\":\"AlphaFoldDB\",\"xref_value\":\"1.10.10.10\",\"xref\":\"1.10.10.10:FF:000449\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffe27896-0bd4-50b4-9cff\",\"xref_type\":\"iPTMnet\",\"xref_value\":\"tvo:TVG1311805\",\"xref\":\"tvo:TVG1311805\"},\"nullCount\":{\"entity_id\":0,\"xref_type\":0,\"xref_value\":0,\"xref\":0}}"},{"path":"part-00000-262596ac-a964-4afa-8547-f24c3f0a871a-c000.snappy.parquet","partitionValues":{},"size":202528,"modificationTime":1766549221447,"dataChange":false,"stats":"{\"numRecords\":21504,\"minValues\":{\"entity_id\":\"cdm_prot_00036120-577c-5bbc-ab91\",\"xref_type\":\"ABCD\",\"xref_value\":\"1.1.1.-\",\"xref\":\"1.10.1140.10:FF:000002\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffa4eefe-b062-5f26-8f77\",\"xref_type\":\"iPTMnet\",\"xref_value\":\"tvo:TVG1115434\",\"xref\":\"tvo:TVG1115434\"},\"nullCount\":{\"entity_id\":0,\"xref_type\":0,\"xref_value\":0,\"xref\":0}}"},{"path":"part-00007-1af16a96-34ca-46e6-add8-cad8393ecb39-c000.snappy.parquet","partitionValues":{},"size":179290,"modificationTime":1766549229768,"dataChange":false,"stats":"{\"numRecords\":21127,\"minValues\":{\"entity_id\":\"cdm_prot_001ba1b3-667c-5321-9298\",\"xref_type\":\"AlphaFoldDB\",\"xref_value\":\"1.10.10.10\",\"xref\":\"1.10.275.10:FF:000012\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ff86ebef-56b7-5cf7-bf3e\",\"xref_type\":\"eggNOG\",\"xref_value\":\"vdi:Vdis_0543\",\"xref\":\"vdi:Vdis_0543\"},\"nullCount\":{\"entity_id\":0,\"xref_type\":0,\"xref_value\":0,\"xref\":0}}"},{"path":"part-00004-a0b07ff5-8921-46bc-b107-b3974f5812e0-c000.snappy.parquet","partitionValues":{},"size":210680,"modificationTime":1766549241829,"dataChange":false,"stats":"{\"numRecords\":17408,\"minValues\":{\"entity_id\":\"cdm_prot_0007879e-35c8-5917-ad11\",\"xref_type\":\"AlphaFoldDB\",\"xref_value\":\"1.-.-.-\",\"xref\":\"1.10.150.20:FF:000005\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffe66051-6a0a-55cc-81b2\",\"xref_type\":\"iPTMnet\",\"xref_value\":\"vdi:Vdis_0418\",\"xref\":\"vdi:Vdis_0418\"},\"nullCount\":{\"entity_id\":0,\"xref_type\":0,\"xref_value\":0,\"xref\":0}}"},{"path":"part-00006-e2ec48f8-d8ef-4fe7-8f28-987148693d9e-c000.snappy.parquet","partitionValues":{},"size":158382,"modificationTime":1766549236066,"dataChange":false,"stats":"{\"numRecords\":21504,\"minValues\":{\"entity_id\":\"cdm_prot_00219a7c-c28c-5aaf-928e\",\"xref_type\":\"AlphaFoldDB\",\"xref_value\":\"1.10.10.10\",\"xref\":\"1.10.455.10:FF:000011\"},\"maxValues\":{\"entity_id\":\"cdm_prot_fe67f279-f090-5c4e-b38d\",\"xref_type\":\"iPTMnet\",\"xref_value\":\"tvo:TVG1338731\",\"xref\":\"tvo:TVG1338731\"},\"nullCount\":{\"entity_id\":0,\"xref_type\":0,\"xref_value\":0,\"xref\":0}}"},{"path":"part-00001-cb441f11-8637-4e34-af70-446c9277c22f-c000.snappy.parquet","partitionValues":{},"size":181482,"modificationTime":1766549236060,"dataChange":false,"stats":"{\"numRecords\":21504,\"minValues\":{\"entity_id\":\"cdm_prot_00365069-eeac-5442-8f11\",\"xref_type\":\"AlphaFoldDB\",\"xref_value\":\"1.10.10.10\",\"xref\":\"1.10.10.1410:FF:000002\"},\"maxValues\":{\"entity_id\":\"cdm_prot_fe82106c-a4bc-5a9c-8159\",\"xref_type\":\"iPTMnet\",\"xref_value\":\"tvo:TVG1279046\",\"xref\":\"tvo:TVG1279046\"},\"nullCount\":{\"entity_id\":0,\"xref_type\":0,\"xref_value\":0,\"xref\":0}}"},{"path":"part-00003-74b068ca-634a-40e6-81ee-258b7182d0db-c000.snappy.parquet","partitionValues":{},"size":169011,"modificationTime":1766549241835,"dataChange":false,"stats":"{\"numRecords\":17408,\"minValues\":{\"entity_id\":\"cdm_prot_0185e048-25c8-50dd-bfbf\",\"xref_type\":\"AlphaFoldDB\",\"xref_value\":\"1.10.10.10\",\"xref\":\"1.10.10.10:FF:000655\"},\"maxValues\":{\"entity_id\":\"cdm_prot_fe66808c-aab1-5f12-970e\",\"xref_type\":\"eggNOG\",\"xref_value\":\"tvo:TVG1381191\",\"xref\":\"tvo:TVG1381191\"},\"nullCount\":{\"entity_id\":0,\"xref_type\":0,\"xref_value\":0,\"xref\":0}}"},{"path":"part-00006-9c0db48f-a6f0-4bc0-974d-b4b196673337-c000.snappy.parquet","partitionValues":{},"size":199565,"modificationTime":1766549221433,"dataChange":false,"stats":"{\"numRecords\":21504,\"minValues\":{\"entity_id\":\"cdm_prot_003df674-48d1-588a-b194\",\"xref_type\":\"AlphaFoldDB\",\"xref_value\":\"1.1.1.261\",\"xref\":\"1.10.10.410:FF:000001\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffeae0f1-9785-5eaf-ad23\",\"xref_type\":\"iPTMnet\",\"xref_value\":\"tvo:TVG1288826\",\"xref\":\"tvo:TVG1288826\"},\"nullCount\":{\"entity_id\":0,\"xref_type\":0,\"xref_value\":0,\"xref\":0}}"},{"path":"part-00005-f82500ca-9132-449d-a9ac-9851bee3578c-c000.snappy.parquet","partitionValues":{},"size":205533,"modificationTime":1766549229782,"dataChange":false,"stats":"{\"numRecords\":21504,\"minValues\":{\"entity_id\":\"cdm_prot_00957fe6-8bfd-5ecd-9b08\",\"xref_type\":\"AlphaFoldDB\",\"xref_value\":\"1.1.1.169\",\"xref\":\"1.10.10.10:FF:000214\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffdecc1a-6ee9-59b2-9c49\",\"xref_type\":\"eggNOG\",\"xref_value\":\"tvo:TVG1118724\",\"xref\":\"tvo:TVG1118724\"},\"nullCount\":{\"entity_id\":0,\"xref_type\":0,\"xref_value\":0,\"xref\":0}}"},{"path":"part-00006-2bb077fe-e445-4ea6-a113-be61db1e5c67-c000.snappy.parquet","partitionValues":{},"size":262418,"modificationTime":1766549241848,"dataChange":false,"stats":"{\"numRecords\":17408,\"minValues\":{\"entity_id\":\"cdm_prot_00666a93-0a4c-5498-8cac\",\"xref_type\":\"AlphaFoldDB\",\"xref_value\":\"1.-.-.-\",\"xref\":\"1.10.10.10:FF:000001\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ff9e535d-7847-5448-b5f1\",\"xref_type\":\"eggNOG\",\"xref_value\":\"tvo:TVG0304564\",\"xref\":\"tvo:TVG0304564\"},\"nullCount\":{\"entity_id\":0,\"xref_type\":0,\"xref_value\":0,\"xref\":0}}"},{"path":"part-00007-c778ca68-9d4c-442c-b8cf-0f9fc8b451fc-c000.snappy.parquet","partitionValues":{},"size":206362,"modificationTime":1766549236079,"dataChange":false,"stats":"{\"numRecords\":21122,\"minValues\":{\"entity_id\":\"cdm_prot_00019029-8c98-5477-9dd1\",\"xref_type\":\"AlphaFoldDB\",\"xref_value\":\"1.1.1.95\",\"xref\":\"1.10.287.990:FF:000001\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ff606768-2e8f-543d-abc5\",\"xref_type\":\"iPTMnet\",\"xref_value\":\"tvo:TVG1046103\",\"xref\":\"tvo:TVG1046103\"},\"nullCount\":{\"entity_id\":0,\"xref_type\":0,\"xref_value\":0,\"xref\":0}}"},{"path":"part-00002-17744fc6-04d1-4419-a97e-18ed02b79a6e-c000.snappy.parquet","partitionValues":{},"size":225174,"modificationTime":1766549221484,"dataChange":false,"stats":"{\"numRecords\":22528,\"minValues\":{\"entity_id\":\"cdm_prot_00030730-9147-5351-b68c\",\"xref_type\":\"AlphaFoldDB\",\"xref_value\":\"1.10.10.10\",\"xref\":\"1.10.10.10:FF:000502\"},\"maxValues\":{\"entity_id\":\"cdm_prot_fde84f0d-6520-56ad-b61f\",\"xref_type\":\"iPTMnet\",\"xref_value\":\"tvo:TVG1580555\",\"xref\":\"tvo:TVG1580555\"},\"nullCount\":{\"entity_id\":0,\"xref_type\":0,\"xref_value\":0,\"xref\":0}}"},{"path":"part-00000-7973428f-e648-454c-a606-b64ffc73f3db-c000.snappy.parquet","partitionValues":{},"size":181264,"modificationTime":1766549236090,"dataChange":false,"stats":"{\"numRecords\":20480,\"minValues\":{\"entity_id\":\"cdm_prot_00d4680f-fe29-598a-9385\",\"xref_type\":\"ABCD\",\"xref_value\":\"1.1.1.302\",\"xref\":\"1.10.10.10:FF:000083\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffcedd0e-face-5ec6-8e0b\",\"xref_type\":\"eggNOG\",\"xref_value\":\"tvo:TVG1570739\",\"xref\":\"tvo:TVG1570739\"},\"nullCount\":{\"entity_id\":0,\"xref_type\":0,\"xref_value\":0,\"xref\":0}}"},{"path":"part-00004-3aba1d16-85c4-4a3b-a44f-edb66cb87c03-c000.snappy.parquet","partitionValues":{},"size":216341,"modificationTime":1766549229789,"dataChange":false,"stats":"{\"numRecords\":21504,\"minValues\":{\"entity_id\":\"cdm_prot_007c34e9-4de0-5736-985f\",\"xref_type\":\"AlphaFoldDB\",\"xref_value\":\"1.10.10.10\",\"xref\":\"1.10.1020.10:FF:000003\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffc892c3-0651-5e3c-998a\",\"xref_type\":\"eggNOG\",\"xref_value\":\"tvo:TVG1248551\",\"xref\":\"tvo:TVG1248551\"},\"nullCount\":{\"entity_id\":0,\"xref_type\":0,\"xref_value\":0,\"xref\":0}}"},{"path":"part-00003-8df64f03-5154-4792-9f94-671fffad6fe6-c000.snappy.parquet","partitionValues":{},"size":221620,"modificationTime":1766549229794,"dataChange":false,"stats":"{\"numRecords\":21504,\"minValues\":{\"entity_id\":\"cdm_prot_000c7e85-7dff-527d-b6db\",\"xref_type\":\"AlphaFoldDB\",\"xref_value\":\"1.1.1.299\",\"xref\":\"1.10.10.10:FF:000630\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffb46e82-abfa-5122-818f\",\"xref_type\":\"iPTMnet\",\"xref_value\":\"tvo:TVG1279013\",\"xref\":\"tvo:TVG1279013\"},\"nullCount\":{\"entity_id\":0,\"xref_type\":0,\"xref_value\":0,\"xref\":0}}"},{"path":"part-00002-4a123736-c6f3-4be0-a4ac-ebae82ba6a92-c000.snappy.parquet","partitionValues":{},"size":201131,"modificationTime":1766549229786,"dataChange":false,"stats":"{\"numRecords\":21504,\"minValues\":{\"entity_id\":\"cdm_prot_00546ee8-4f73-5c85-8b07\",\"xref_type\":\"ABCD\",\"xref_value\":\"1.1.1.205\",\"xref\":\"1.10.238.260:FF:000001\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffce40a0-b890-5e5f-b455\",\"xref_type\":\"eggNOG\",\"xref_value\":\"tvo:TVG1340828\",\"xref\":\"tvo:TVG1340828\"},\"nullCount\":{\"entity_id\":0,\"xref_type\":0,\"xref_value\":0,\"xref\":0}}"},{"path":"part-00005-5b226a11-c46b-438d-b968-70bcca53330e-c000.snappy.parquet","partitionValues":{},"size":175090,"modificationTime":1766549221499,"dataChange":false,"stats":"{\"numRecords\":22528,\"minValues\":{\"entity_id\":\"cdm_prot_00134cb8-e497-5d40-b662\",\"xref_type\":\"AlphaFoldDB\",\"xref_value\":\"1.10.1060.10\",\"xref\":\"1.10.1060.10:FF:000031\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ff5f637d-e74b-5494-9ed8\",\"xref_type\":\"iPTMnet\",\"xref_value\":\"tvo:TVG1107082\",\"xref\":\"tvo:TVG1107082\"},\"nullCount\":{\"entity_id\":0,\"xref_type\":0,\"xref_value\":0,\"xref\":0}}"},{"path":"part-00006-1d2d1fa4-982f-439f-b95c-d2e33d9f2725-c000.snappy.parquet","partitionValues":{},"size":203194,"modificationTime":1766549229787,"dataChange":false,"stats":"{\"numRecords\":21504,\"minValues\":{\"entity_id\":\"cdm_prot_0102d35c-20a5-5f8d-8175\",\"xref_type\":\"AlphaFoldDB\",\"xref_value\":\"1.10.150.20\",\"xref\":\"1.10.287.370:FF:000013\"},\"maxValues\":{\"entity_id\":\"cdm_prot_fffe9d9f-5871-5041-83af\",\"xref_type\":\"iPTMnet\",\"xref_value\":\"tvo:TVG1514630\",\"xref\":\"tvo:TVG1514630\"},\"nullCount\":{\"entity_id\":0,\"xref_type\":0,\"xref_value\":0,\"xref\":0}}"},{"path":"part-00002-111b3d32-a822-4c10-9e4b-aaea21460a68-c000.snappy.parquet","partitionValues":{},"size":137797,"modificationTime":1766549241857,"dataChange":false,"stats":"{\"numRecords\":16384,\"minValues\":{\"entity_id\":\"cdm_prot_016cb1c0-8eae-5633-a80d\",\"xref_type\":\"AlphaFoldDB\",\"xref_value\":\"1.1.1.1\",\"xref\":\"1.10.10.10:FF:000264\"},\"maxValues\":{\"entity_id\":\"cdm_prot_fffe9aba-bce9-56de-b269\",\"xref_type\":\"eggNOG\",\"xref_value\":\"tvo:TVG1467237\",\"xref\":\"tvo:TVG1467237\"},\"nullCount\":{\"entity_id\":0,\"xref_type\":0,\"xref_value\":0,\"xref\":0}}"},{"path":"part-00003-03c3d6f3-c92e-43db-9238-faffadb4184f-c000.snappy.parquet","partitionValues":{},"size":229024,"modificationTime":1766549221440,"dataChange":false,"stats":"{\"numRecords\":21504,\"minValues\":{\"entity_id\":\"cdm_prot_004f59e4-6b11-58ae-a6ff\",\"xref_type\":\"AlphaFoldDB\",\"xref_value\":\"1.1.1.-\",\"xref\":\"1.10.150.120:FF:000003\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ff2dba94-2092-57d0-9041\",\"xref_type\":\"iPTMnet\",\"xref_value\":\"tvo:TVG1185779\",\"xref\":\"tvo:TVG1185779\"},\"nullCount\":{\"entity_id\":0,\"xref_type\":0,\"xref_value\":0,\"xref\":0}}"},{"path":"part-00001-aa535d72-deb5-4284-8f04-414aa19f182f-c000.snappy.parquet","partitionValues":{},"size":122109,"modificationTime":1766549241825,"dataChange":false,"stats":"{\"numRecords\":17408,\"minValues\":{\"entity_id\":\"cdm_prot_01672ce0-33eb-50c0-8bb3\",\"xref_type\":\"AlphaFoldDB\",\"xref_value\":\"1.10.240.10\",\"xref\":\"1.10.240.10:FF:000007\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ff62a17a-c48e-5c25-b1ca\",\"xref_type\":\"eggNOG\",\"xref_value\":\"tvo:TVG1345067\",\"xref\":\"tvo:TVG1345067\"},\"nullCount\":{\"entity_id\":0,\"xref_type\":0,\"xref_value\":0,\"xref\":0}}"},{"path":"part-00001-5c0469f6-368d-4478-aa79-cb054e1ae426-c000.snappy.parquet","partitionValues":{},"size":207444,"modificationTime":1766549221439,"dataChange":false,"stats":"{\"numRecords\":21504,\"minValues\":{\"entity_id\":\"cdm_prot_000e0a71-14e8-5d5e-8144\",\"xref_type\":\"AlphaFoldDB\",\"xref_value\":\"1.1.1.122\",\"xref\":\"1.10.390.10:FF:000006\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffed2e1d-fe14-5556-b067\",\"xref_type\":\"eggNOG\",\"xref_value\":\"tvo:TVG1387115\",\"xref\":\"tvo:TVG1387115\"},\"nullCount\":{\"entity_id\":0,\"xref_type\":0,\"xref_value\":0,\"xref\":0}}"},{"path":"part-00003-2aebe69d-245d-4cad-aac9-28aa219aa4a7-c000.snappy.parquet","partitionValues":{},"size":172951,"modificationTime":1766549236077,"dataChange":false,"stats":"{\"numRecords\":21504,\"minValues\":{\"entity_id\":\"cdm_prot_00206b22-8b92-5222-ab7c\",\"xref_type\":\"AlphaFoldDB\",\"xref_value\":\"1.10.10.1690\",\"xref\":\"1.10.10.460:FF:000001\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffede36e-7e42-5dcc-abde\",\"xref_type\":\"eggNOG\",\"xref_value\":\"tvo:TVG0437556\",\"xref\":\"tvo:TVG0437556\"},\"nullCount\":{\"entity_id\":0,\"xref_type\":0,\"xref_value\":0,\"xref\":0}}"},{"path":"part-00005-070d9ade-f0b9-46b6-a11b-a05c3085e84d-c000.snappy.parquet","partitionValues":{},"size":255145,"modificationTime":1766549241818,"dataChange":false,"stats":"{\"numRecords\":16384,\"minValues\":{\"entity_id\":\"cdm_prot_003753a9-9a55-505c-8ea7\",\"xref_type\":\"AlphaFoldDB\",\"xref_value\":\"1.-.-.-\",\"xref\":\"1.10.10.10:FF:001385\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffb5b181-1633-55f5-83ee\",\"xref_type\":\"eggNOG\",\"xref_value\":\"tvo:TVG1472127\",\"xref\":\"tvo:TVG1472127\"},\"nullCount\":{\"entity_id\":0,\"xref_type\":0,\"xref_value\":0,\"xref\":0}}"},{"path":"part-00001-1fc7356c-e1b3-4a0b-88c6-0bcdaf52eea6-c000.snappy.parquet","partitionValues":{},"size":195277,"modificationTime":1766549229793,"dataChange":false,"stats":"{\"numRecords\":21504,\"minValues\":{\"entity_id\":\"cdm_prot_00df9ef5-d9b4-5654-9d2a\",\"xref_type\":\"AlphaFoldDB\",\"xref_value\":\"1.1.1.34\",\"xref\":\"1.10.12.10:FF:000001\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffa14ace-ea75-5cc6-86f2\",\"xref_type\":\"eggNOG\",\"xref_value\":\"tvo:TVG1430123\",\"xref\":\"tvo:TVG1430123\"},\"nullCount\":{\"entity_id\":0,\"xref_type\":0,\"xref_value\":0,\"xref\":0}}"}]} diff --git a/tests/data/uniprot_archaea/uniprot_db/cross_references/_delta_log/00000000000000000004.json b/tests/data/uniprot_archaea/uniprot_db/cross_references/_delta_log/00000000000000000004.json new file mode 100644 index 0000000..22ed2a5 --- /dev/null +++ b/tests/data/uniprot_archaea/uniprot_db/cross_references/_delta_log/00000000000000000004.json @@ -0,0 +1,9 @@ +{"commitInfo":{"timestamp":1766549241860,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":3,"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{"numFiles":"8","numOutputRows":"135351","numOutputBytes":"1517827"},"engineInfo":"Apache-Spark/4.0.1 Delta-Lake/4.0.0","txnId":"4d485990-fc13-40fb-b4b9-089e4ce94b66"}} +{"add":{"path":"part-00000-8efd2d36-ce8e-4199-9258-b010a08236aa-c000.snappy.parquet","partitionValues":{},"size":100804,"modificationTime":1766549241857,"dataChange":true,"stats":"{\"numRecords\":16384,\"minValues\":{\"entity_id\":\"cdm_prot_0003b7cb-bf13-5646-9978\",\"xref_type\":\"AlphaFoldDB\",\"xref_value\":\"1.10.10.10\",\"xref\":\"1.10.730.10:FF:000002\"},\"maxValues\":{\"entity_id\":\"cdm_prot_fdc49789-52de-5a6c-95cc\",\"xref_type\":\"eggNOG\",\"xref_value\":\"tvo:TVG1307912\",\"xref\":\"tvo:TVG1307912\"},\"nullCount\":{\"entity_id\":0,\"xref_type\":0,\"xref_value\":0,\"xref\":0}}"}} +{"add":{"path":"part-00001-aa535d72-deb5-4284-8f04-414aa19f182f-c000.snappy.parquet","partitionValues":{},"size":122109,"modificationTime":1766549241825,"dataChange":true,"stats":"{\"numRecords\":17408,\"minValues\":{\"entity_id\":\"cdm_prot_01672ce0-33eb-50c0-8bb3\",\"xref_type\":\"AlphaFoldDB\",\"xref_value\":\"1.10.240.10\",\"xref\":\"1.10.240.10:FF:000007\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ff62a17a-c48e-5c25-b1ca\",\"xref_type\":\"eggNOG\",\"xref_value\":\"tvo:TVG1345067\",\"xref\":\"tvo:TVG1345067\"},\"nullCount\":{\"entity_id\":0,\"xref_type\":0,\"xref_value\":0,\"xref\":0}}"}} +{"add":{"path":"part-00002-111b3d32-a822-4c10-9e4b-aaea21460a68-c000.snappy.parquet","partitionValues":{},"size":137797,"modificationTime":1766549241857,"dataChange":true,"stats":"{\"numRecords\":16384,\"minValues\":{\"entity_id\":\"cdm_prot_016cb1c0-8eae-5633-a80d\",\"xref_type\":\"AlphaFoldDB\",\"xref_value\":\"1.1.1.1\",\"xref\":\"1.10.10.10:FF:000264\"},\"maxValues\":{\"entity_id\":\"cdm_prot_fffe9aba-bce9-56de-b269\",\"xref_type\":\"eggNOG\",\"xref_value\":\"tvo:TVG1467237\",\"xref\":\"tvo:TVG1467237\"},\"nullCount\":{\"entity_id\":0,\"xref_type\":0,\"xref_value\":0,\"xref\":0}}"}} +{"add":{"path":"part-00003-74b068ca-634a-40e6-81ee-258b7182d0db-c000.snappy.parquet","partitionValues":{},"size":169011,"modificationTime":1766549241835,"dataChange":true,"stats":"{\"numRecords\":17408,\"minValues\":{\"entity_id\":\"cdm_prot_0185e048-25c8-50dd-bfbf\",\"xref_type\":\"AlphaFoldDB\",\"xref_value\":\"1.10.10.10\",\"xref\":\"1.10.10.10:FF:000655\"},\"maxValues\":{\"entity_id\":\"cdm_prot_fe66808c-aab1-5f12-970e\",\"xref_type\":\"eggNOG\",\"xref_value\":\"tvo:TVG1381191\",\"xref\":\"tvo:TVG1381191\"},\"nullCount\":{\"entity_id\":0,\"xref_type\":0,\"xref_value\":0,\"xref\":0}}"}} +{"add":{"path":"part-00004-a0b07ff5-8921-46bc-b107-b3974f5812e0-c000.snappy.parquet","partitionValues":{},"size":210680,"modificationTime":1766549241829,"dataChange":true,"stats":"{\"numRecords\":17408,\"minValues\":{\"entity_id\":\"cdm_prot_0007879e-35c8-5917-ad11\",\"xref_type\":\"AlphaFoldDB\",\"xref_value\":\"1.-.-.-\",\"xref\":\"1.10.150.20:FF:000005\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffe66051-6a0a-55cc-81b2\",\"xref_type\":\"iPTMnet\",\"xref_value\":\"vdi:Vdis_0418\",\"xref\":\"vdi:Vdis_0418\"},\"nullCount\":{\"entity_id\":0,\"xref_type\":0,\"xref_value\":0,\"xref\":0}}"}} +{"add":{"path":"part-00005-070d9ade-f0b9-46b6-a11b-a05c3085e84d-c000.snappy.parquet","partitionValues":{},"size":255145,"modificationTime":1766549241818,"dataChange":true,"stats":"{\"numRecords\":16384,\"minValues\":{\"entity_id\":\"cdm_prot_003753a9-9a55-505c-8ea7\",\"xref_type\":\"AlphaFoldDB\",\"xref_value\":\"1.-.-.-\",\"xref\":\"1.10.10.10:FF:001385\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffb5b181-1633-55f5-83ee\",\"xref_type\":\"eggNOG\",\"xref_value\":\"tvo:TVG1472127\",\"xref\":\"tvo:TVG1472127\"},\"nullCount\":{\"entity_id\":0,\"xref_type\":0,\"xref_value\":0,\"xref\":0}}"}} +{"add":{"path":"part-00006-2bb077fe-e445-4ea6-a113-be61db1e5c67-c000.snappy.parquet","partitionValues":{},"size":262418,"modificationTime":1766549241848,"dataChange":true,"stats":"{\"numRecords\":17408,\"minValues\":{\"entity_id\":\"cdm_prot_00666a93-0a4c-5498-8cac\",\"xref_type\":\"AlphaFoldDB\",\"xref_value\":\"1.-.-.-\",\"xref\":\"1.10.10.10:FF:000001\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ff9e535d-7847-5448-b5f1\",\"xref_type\":\"eggNOG\",\"xref_value\":\"tvo:TVG0304564\",\"xref\":\"tvo:TVG0304564\"},\"nullCount\":{\"entity_id\":0,\"xref_type\":0,\"xref_value\":0,\"xref\":0}}"}} +{"add":{"path":"part-00007-263935ae-f03a-4a0d-82eb-8239c2044ae4-c000.snappy.parquet","partitionValues":{},"size":259863,"modificationTime":1766549241825,"dataChange":true,"stats":"{\"numRecords\":16567,\"minValues\":{\"entity_id\":\"cdm_prot_0244c324-3bb8-5603-9862\",\"xref_type\":\"ABCD\",\"xref_value\":\"1.10.10.10\",\"xref\":\"1.10.10.10:FF:000189\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffd6bc2d-9fdc-5fb9-a121\",\"xref_type\":\"eggNOG\",\"xref_value\":\"tvo:TVG0885567\",\"xref\":\"tvo:TVG0885567\"},\"nullCount\":{\"entity_id\":0,\"xref_type\":0,\"xref_value\":0,\"xref\":0}}"}} diff --git a/tests/data/uniprot_archaea/uniprot_db/cross_references/part-00000-262596ac-a964-4afa-8547-f24c3f0a871a-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/cross_references/part-00000-262596ac-a964-4afa-8547-f24c3f0a871a-c000.snappy.parquet new file mode 100644 index 0000000..291cc2e Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/cross_references/part-00000-262596ac-a964-4afa-8547-f24c3f0a871a-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/cross_references/part-00000-7973428f-e648-454c-a606-b64ffc73f3db-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/cross_references/part-00000-7973428f-e648-454c-a606-b64ffc73f3db-c000.snappy.parquet new file mode 100644 index 0000000..d036f3e Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/cross_references/part-00000-7973428f-e648-454c-a606-b64ffc73f3db-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/cross_references/part-00000-8efd2d36-ce8e-4199-9258-b010a08236aa-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/cross_references/part-00000-8efd2d36-ce8e-4199-9258-b010a08236aa-c000.snappy.parquet new file mode 100644 index 0000000..0c8871f Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/cross_references/part-00000-8efd2d36-ce8e-4199-9258-b010a08236aa-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/cross_references/part-00000-bee3e108-f720-48d4-89d3-c75a8aa4dfaa-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/cross_references/part-00000-bee3e108-f720-48d4-89d3-c75a8aa4dfaa-c000.snappy.parquet new file mode 100644 index 0000000..3b2e9a2 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/cross_references/part-00000-bee3e108-f720-48d4-89d3-c75a8aa4dfaa-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/cross_references/part-00001-1fc7356c-e1b3-4a0b-88c6-0bcdaf52eea6-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/cross_references/part-00001-1fc7356c-e1b3-4a0b-88c6-0bcdaf52eea6-c000.snappy.parquet new file mode 100644 index 0000000..f6a569d Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/cross_references/part-00001-1fc7356c-e1b3-4a0b-88c6-0bcdaf52eea6-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/cross_references/part-00001-5c0469f6-368d-4478-aa79-cb054e1ae426-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/cross_references/part-00001-5c0469f6-368d-4478-aa79-cb054e1ae426-c000.snappy.parquet new file mode 100644 index 0000000..b357714 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/cross_references/part-00001-5c0469f6-368d-4478-aa79-cb054e1ae426-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/cross_references/part-00001-aa535d72-deb5-4284-8f04-414aa19f182f-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/cross_references/part-00001-aa535d72-deb5-4284-8f04-414aa19f182f-c000.snappy.parquet new file mode 100644 index 0000000..1c2a2df Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/cross_references/part-00001-aa535d72-deb5-4284-8f04-414aa19f182f-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/cross_references/part-00001-cb441f11-8637-4e34-af70-446c9277c22f-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/cross_references/part-00001-cb441f11-8637-4e34-af70-446c9277c22f-c000.snappy.parquet new file mode 100644 index 0000000..7b0854f Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/cross_references/part-00001-cb441f11-8637-4e34-af70-446c9277c22f-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/cross_references/part-00002-111b3d32-a822-4c10-9e4b-aaea21460a68-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/cross_references/part-00002-111b3d32-a822-4c10-9e4b-aaea21460a68-c000.snappy.parquet new file mode 100644 index 0000000..68052a9 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/cross_references/part-00002-111b3d32-a822-4c10-9e4b-aaea21460a68-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/cross_references/part-00002-17744fc6-04d1-4419-a97e-18ed02b79a6e-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/cross_references/part-00002-17744fc6-04d1-4419-a97e-18ed02b79a6e-c000.snappy.parquet new file mode 100644 index 0000000..2cad745 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/cross_references/part-00002-17744fc6-04d1-4419-a97e-18ed02b79a6e-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/cross_references/part-00002-4a123736-c6f3-4be0-a4ac-ebae82ba6a92-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/cross_references/part-00002-4a123736-c6f3-4be0-a4ac-ebae82ba6a92-c000.snappy.parquet new file mode 100644 index 0000000..5d20eb0 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/cross_references/part-00002-4a123736-c6f3-4be0-a4ac-ebae82ba6a92-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/cross_references/part-00002-cc877dc2-5e4e-44db-98eb-701878729795-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/cross_references/part-00002-cc877dc2-5e4e-44db-98eb-701878729795-c000.snappy.parquet new file mode 100644 index 0000000..4d3040e Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/cross_references/part-00002-cc877dc2-5e4e-44db-98eb-701878729795-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/cross_references/part-00003-03c3d6f3-c92e-43db-9238-faffadb4184f-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/cross_references/part-00003-03c3d6f3-c92e-43db-9238-faffadb4184f-c000.snappy.parquet new file mode 100644 index 0000000..150e682 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/cross_references/part-00003-03c3d6f3-c92e-43db-9238-faffadb4184f-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/cross_references/part-00003-2aebe69d-245d-4cad-aac9-28aa219aa4a7-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/cross_references/part-00003-2aebe69d-245d-4cad-aac9-28aa219aa4a7-c000.snappy.parquet new file mode 100644 index 0000000..6f6075d Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/cross_references/part-00003-2aebe69d-245d-4cad-aac9-28aa219aa4a7-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/cross_references/part-00003-74b068ca-634a-40e6-81ee-258b7182d0db-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/cross_references/part-00003-74b068ca-634a-40e6-81ee-258b7182d0db-c000.snappy.parquet new file mode 100644 index 0000000..bd94a46 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/cross_references/part-00003-74b068ca-634a-40e6-81ee-258b7182d0db-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/cross_references/part-00003-8df64f03-5154-4792-9f94-671fffad6fe6-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/cross_references/part-00003-8df64f03-5154-4792-9f94-671fffad6fe6-c000.snappy.parquet new file mode 100644 index 0000000..e168d5a Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/cross_references/part-00003-8df64f03-5154-4792-9f94-671fffad6fe6-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/cross_references/part-00004-3aba1d16-85c4-4a3b-a44f-edb66cb87c03-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/cross_references/part-00004-3aba1d16-85c4-4a3b-a44f-edb66cb87c03-c000.snappy.parquet new file mode 100644 index 0000000..a6cbc69 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/cross_references/part-00004-3aba1d16-85c4-4a3b-a44f-edb66cb87c03-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/cross_references/part-00004-a0b07ff5-8921-46bc-b107-b3974f5812e0-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/cross_references/part-00004-a0b07ff5-8921-46bc-b107-b3974f5812e0-c000.snappy.parquet new file mode 100644 index 0000000..a251382 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/cross_references/part-00004-a0b07ff5-8921-46bc-b107-b3974f5812e0-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/cross_references/part-00004-b891f437-1eb9-44d4-b41b-00d9b1413c72-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/cross_references/part-00004-b891f437-1eb9-44d4-b41b-00d9b1413c72-c000.snappy.parquet new file mode 100644 index 0000000..a14370a Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/cross_references/part-00004-b891f437-1eb9-44d4-b41b-00d9b1413c72-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/cross_references/part-00004-fff20f27-90e3-422c-ac10-fa544591cc46-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/cross_references/part-00004-fff20f27-90e3-422c-ac10-fa544591cc46-c000.snappy.parquet new file mode 100644 index 0000000..80e34ad Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/cross_references/part-00004-fff20f27-90e3-422c-ac10-fa544591cc46-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/cross_references/part-00005-070d9ade-f0b9-46b6-a11b-a05c3085e84d-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/cross_references/part-00005-070d9ade-f0b9-46b6-a11b-a05c3085e84d-c000.snappy.parquet new file mode 100644 index 0000000..9e7dc1f Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/cross_references/part-00005-070d9ade-f0b9-46b6-a11b-a05c3085e84d-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/cross_references/part-00005-5b226a11-c46b-438d-b968-70bcca53330e-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/cross_references/part-00005-5b226a11-c46b-438d-b968-70bcca53330e-c000.snappy.parquet new file mode 100644 index 0000000..54ee3b6 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/cross_references/part-00005-5b226a11-c46b-438d-b968-70bcca53330e-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/cross_references/part-00005-ba1ba60b-f4bf-4cf1-a328-dc5950f4925d-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/cross_references/part-00005-ba1ba60b-f4bf-4cf1-a328-dc5950f4925d-c000.snappy.parquet new file mode 100644 index 0000000..fe73297 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/cross_references/part-00005-ba1ba60b-f4bf-4cf1-a328-dc5950f4925d-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/cross_references/part-00005-f82500ca-9132-449d-a9ac-9851bee3578c-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/cross_references/part-00005-f82500ca-9132-449d-a9ac-9851bee3578c-c000.snappy.parquet new file mode 100644 index 0000000..fd14a96 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/cross_references/part-00005-f82500ca-9132-449d-a9ac-9851bee3578c-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/cross_references/part-00006-1d2d1fa4-982f-439f-b95c-d2e33d9f2725-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/cross_references/part-00006-1d2d1fa4-982f-439f-b95c-d2e33d9f2725-c000.snappy.parquet new file mode 100644 index 0000000..69457bb Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/cross_references/part-00006-1d2d1fa4-982f-439f-b95c-d2e33d9f2725-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/cross_references/part-00006-2bb077fe-e445-4ea6-a113-be61db1e5c67-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/cross_references/part-00006-2bb077fe-e445-4ea6-a113-be61db1e5c67-c000.snappy.parquet new file mode 100644 index 0000000..91bb12c Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/cross_references/part-00006-2bb077fe-e445-4ea6-a113-be61db1e5c67-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/cross_references/part-00006-9c0db48f-a6f0-4bc0-974d-b4b196673337-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/cross_references/part-00006-9c0db48f-a6f0-4bc0-974d-b4b196673337-c000.snappy.parquet new file mode 100644 index 0000000..4352b52 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/cross_references/part-00006-9c0db48f-a6f0-4bc0-974d-b4b196673337-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/cross_references/part-00006-e2ec48f8-d8ef-4fe7-8f28-987148693d9e-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/cross_references/part-00006-e2ec48f8-d8ef-4fe7-8f28-987148693d9e-c000.snappy.parquet new file mode 100644 index 0000000..590c4e8 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/cross_references/part-00006-e2ec48f8-d8ef-4fe7-8f28-987148693d9e-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/cross_references/part-00007-1af16a96-34ca-46e6-add8-cad8393ecb39-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/cross_references/part-00007-1af16a96-34ca-46e6-add8-cad8393ecb39-c000.snappy.parquet new file mode 100644 index 0000000..e59476e Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/cross_references/part-00007-1af16a96-34ca-46e6-add8-cad8393ecb39-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/cross_references/part-00007-263935ae-f03a-4a0d-82eb-8239c2044ae4-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/cross_references/part-00007-263935ae-f03a-4a0d-82eb-8239c2044ae4-c000.snappy.parquet new file mode 100644 index 0000000..9d31ccf Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/cross_references/part-00007-263935ae-f03a-4a0d-82eb-8239c2044ae4-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/cross_references/part-00007-74b7bec4-aaab-4bd6-8078-75cfcf5cf393-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/cross_references/part-00007-74b7bec4-aaab-4bd6-8078-75cfcf5cf393-c000.snappy.parquet new file mode 100644 index 0000000..0b849bd Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/cross_references/part-00007-74b7bec4-aaab-4bd6-8078-75cfcf5cf393-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/cross_references/part-00007-c778ca68-9d4c-442c-b8cf-0f9fc8b451fc-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/cross_references/part-00007-c778ca68-9d4c-442c-b8cf-0f9fc8b451fc-c000.snappy.parquet new file mode 100644 index 0000000..8c29ad1 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/cross_references/part-00007-c778ca68-9d4c-442c-b8cf-0f9fc8b451fc-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/entities/.part-00000-26253546-0abd-4ed6-8c55-17dfef347666-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/entities/.part-00000-26253546-0abd-4ed6-8c55-17dfef347666-c000.snappy.parquet.crc new file mode 100644 index 0000000..3f5fa7b Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/entities/.part-00000-26253546-0abd-4ed6-8c55-17dfef347666-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/entities/.part-00000-573241d1-7050-4657-b105-c3c5122440d1-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/entities/.part-00000-573241d1-7050-4657-b105-c3c5122440d1-c000.snappy.parquet.crc new file mode 100644 index 0000000..52c893f Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/entities/.part-00000-573241d1-7050-4657-b105-c3c5122440d1-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/entities/.part-00000-75b2edbc-ef3f-4b58-a315-cc5098832067-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/entities/.part-00000-75b2edbc-ef3f-4b58-a315-cc5098832067-c000.snappy.parquet.crc new file mode 100644 index 0000000..a2b016b Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/entities/.part-00000-75b2edbc-ef3f-4b58-a315-cc5098832067-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/entities/.part-00000-df990575-6363-490d-8af4-6381c1fd05de-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/entities/.part-00000-df990575-6363-490d-8af4-6381c1fd05de-c000.snappy.parquet.crc new file mode 100644 index 0000000..663ccdc Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/entities/.part-00000-df990575-6363-490d-8af4-6381c1fd05de-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/entities/.part-00001-006ac691-2d26-4839-932d-bf78580789fb-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/entities/.part-00001-006ac691-2d26-4839-932d-bf78580789fb-c000.snappy.parquet.crc new file mode 100644 index 0000000..581f90f Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/entities/.part-00001-006ac691-2d26-4839-932d-bf78580789fb-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/entities/.part-00001-3c82d97a-4e0f-412a-8297-3bf943498c02-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/entities/.part-00001-3c82d97a-4e0f-412a-8297-3bf943498c02-c000.snappy.parquet.crc new file mode 100644 index 0000000..3c3b91c Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/entities/.part-00001-3c82d97a-4e0f-412a-8297-3bf943498c02-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/entities/.part-00001-94ad28aa-5ae5-48c4-b30e-a2a33529c4d4-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/entities/.part-00001-94ad28aa-5ae5-48c4-b30e-a2a33529c4d4-c000.snappy.parquet.crc new file mode 100644 index 0000000..1719165 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/entities/.part-00001-94ad28aa-5ae5-48c4-b30e-a2a33529c4d4-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/entities/.part-00001-f7e2e2d6-8a2d-49d6-9d74-98af5161c3d3-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/entities/.part-00001-f7e2e2d6-8a2d-49d6-9d74-98af5161c3d3-c000.snappy.parquet.crc new file mode 100644 index 0000000..2c37fef Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/entities/.part-00001-f7e2e2d6-8a2d-49d6-9d74-98af5161c3d3-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/entities/.part-00002-498d9893-5b55-4b0e-920a-537bcefe3b57-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/entities/.part-00002-498d9893-5b55-4b0e-920a-537bcefe3b57-c000.snappy.parquet.crc new file mode 100644 index 0000000..f5b5852 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/entities/.part-00002-498d9893-5b55-4b0e-920a-537bcefe3b57-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/entities/.part-00002-9433b0fe-4856-48e3-b830-2f9cc2e1c4ba-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/entities/.part-00002-9433b0fe-4856-48e3-b830-2f9cc2e1c4ba-c000.snappy.parquet.crc new file mode 100644 index 0000000..e14b6f5 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/entities/.part-00002-9433b0fe-4856-48e3-b830-2f9cc2e1c4ba-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/entities/.part-00002-a3c8d634-57f3-49a6-a095-689fe9930cc4-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/entities/.part-00002-a3c8d634-57f3-49a6-a095-689fe9930cc4-c000.snappy.parquet.crc new file mode 100644 index 0000000..7ebdbcd Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/entities/.part-00002-a3c8d634-57f3-49a6-a095-689fe9930cc4-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/entities/.part-00002-e21f5f3e-ffef-441e-9416-cddfc7945b7d-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/entities/.part-00002-e21f5f3e-ffef-441e-9416-cddfc7945b7d-c000.snappy.parquet.crc new file mode 100644 index 0000000..47a8d57 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/entities/.part-00002-e21f5f3e-ffef-441e-9416-cddfc7945b7d-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/entities/.part-00003-0b4ced07-8b4e-4c9a-b8b4-fcd3b6d39a5c-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/entities/.part-00003-0b4ced07-8b4e-4c9a-b8b4-fcd3b6d39a5c-c000.snappy.parquet.crc new file mode 100644 index 0000000..8895fac Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/entities/.part-00003-0b4ced07-8b4e-4c9a-b8b4-fcd3b6d39a5c-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/entities/.part-00003-103ab579-ba95-49f6-b98f-ec60b776cedf-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/entities/.part-00003-103ab579-ba95-49f6-b98f-ec60b776cedf-c000.snappy.parquet.crc new file mode 100644 index 0000000..b56d491 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/entities/.part-00003-103ab579-ba95-49f6-b98f-ec60b776cedf-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/entities/.part-00003-c2c4353d-923b-4d02-bcc0-c567d8152c24-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/entities/.part-00003-c2c4353d-923b-4d02-bcc0-c567d8152c24-c000.snappy.parquet.crc new file mode 100644 index 0000000..2bc5555 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/entities/.part-00003-c2c4353d-923b-4d02-bcc0-c567d8152c24-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/entities/.part-00003-dd2f1a5e-4114-41c8-b2e2-72edba0e2d16-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/entities/.part-00003-dd2f1a5e-4114-41c8-b2e2-72edba0e2d16-c000.snappy.parquet.crc new file mode 100644 index 0000000..7de4cb4 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/entities/.part-00003-dd2f1a5e-4114-41c8-b2e2-72edba0e2d16-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/entities/.part-00004-7dd0a75c-fc02-4a91-b584-4fb3805c6851-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/entities/.part-00004-7dd0a75c-fc02-4a91-b584-4fb3805c6851-c000.snappy.parquet.crc new file mode 100644 index 0000000..774a177 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/entities/.part-00004-7dd0a75c-fc02-4a91-b584-4fb3805c6851-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/entities/.part-00004-838d590d-0caf-48bb-935b-4d3bcdef7e6b-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/entities/.part-00004-838d590d-0caf-48bb-935b-4d3bcdef7e6b-c000.snappy.parquet.crc new file mode 100644 index 0000000..7479b2d Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/entities/.part-00004-838d590d-0caf-48bb-935b-4d3bcdef7e6b-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/entities/.part-00004-889473d1-a49a-4d1a-9822-bc64152feb8a-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/entities/.part-00004-889473d1-a49a-4d1a-9822-bc64152feb8a-c000.snappy.parquet.crc new file mode 100644 index 0000000..b0f41f2 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/entities/.part-00004-889473d1-a49a-4d1a-9822-bc64152feb8a-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/entities/.part-00004-9ba0b6c6-3035-4c5a-9d00-f3210e44c1be-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/entities/.part-00004-9ba0b6c6-3035-4c5a-9d00-f3210e44c1be-c000.snappy.parquet.crc new file mode 100644 index 0000000..a1feaeb Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/entities/.part-00004-9ba0b6c6-3035-4c5a-9d00-f3210e44c1be-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/entities/.part-00005-00c6be40-9225-40c1-a7c9-3f800a8f9280-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/entities/.part-00005-00c6be40-9225-40c1-a7c9-3f800a8f9280-c000.snappy.parquet.crc new file mode 100644 index 0000000..c678c00 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/entities/.part-00005-00c6be40-9225-40c1-a7c9-3f800a8f9280-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/entities/.part-00005-16c10944-46a4-4ffb-a8eb-c98f074c4533-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/entities/.part-00005-16c10944-46a4-4ffb-a8eb-c98f074c4533-c000.snappy.parquet.crc new file mode 100644 index 0000000..a6d0672 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/entities/.part-00005-16c10944-46a4-4ffb-a8eb-c98f074c4533-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/entities/.part-00005-3b0745b2-9687-4d0b-b7af-56652f5b79d3-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/entities/.part-00005-3b0745b2-9687-4d0b-b7af-56652f5b79d3-c000.snappy.parquet.crc new file mode 100644 index 0000000..2575aac Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/entities/.part-00005-3b0745b2-9687-4d0b-b7af-56652f5b79d3-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/entities/.part-00005-c44b66e7-c76c-4f20-b855-f02a508ddae1-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/entities/.part-00005-c44b66e7-c76c-4f20-b855-f02a508ddae1-c000.snappy.parquet.crc new file mode 100644 index 0000000..0fd551a Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/entities/.part-00005-c44b66e7-c76c-4f20-b855-f02a508ddae1-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/entities/.part-00006-902c69dd-2364-4c62-aed8-bab1f9d2bf5a-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/entities/.part-00006-902c69dd-2364-4c62-aed8-bab1f9d2bf5a-c000.snappy.parquet.crc new file mode 100644 index 0000000..07556f4 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/entities/.part-00006-902c69dd-2364-4c62-aed8-bab1f9d2bf5a-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/entities/.part-00006-9d7f2f92-96db-4863-87d7-a368f31c8aed-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/entities/.part-00006-9d7f2f92-96db-4863-87d7-a368f31c8aed-c000.snappy.parquet.crc new file mode 100644 index 0000000..882777d Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/entities/.part-00006-9d7f2f92-96db-4863-87d7-a368f31c8aed-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/entities/.part-00006-b5e8d6ab-9bf9-4c7f-b4be-f4a4032b4399-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/entities/.part-00006-b5e8d6ab-9bf9-4c7f-b4be-f4a4032b4399-c000.snappy.parquet.crc new file mode 100644 index 0000000..bc936f4 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/entities/.part-00006-b5e8d6ab-9bf9-4c7f-b4be-f4a4032b4399-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/entities/.part-00006-cad0b36c-46f2-49e7-9889-7f4a1b44cbfb-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/entities/.part-00006-cad0b36c-46f2-49e7-9889-7f4a1b44cbfb-c000.snappy.parquet.crc new file mode 100644 index 0000000..5e5456c Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/entities/.part-00006-cad0b36c-46f2-49e7-9889-7f4a1b44cbfb-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/entities/.part-00007-1bc667d7-2f3a-4ece-a4a2-162e8a3f33f9-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/entities/.part-00007-1bc667d7-2f3a-4ece-a4a2-162e8a3f33f9-c000.snappy.parquet.crc new file mode 100644 index 0000000..6907308 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/entities/.part-00007-1bc667d7-2f3a-4ece-a4a2-162e8a3f33f9-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/entities/.part-00007-642beb72-4230-41ef-b60a-b811933fa8a9-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/entities/.part-00007-642beb72-4230-41ef-b60a-b811933fa8a9-c000.snappy.parquet.crc new file mode 100644 index 0000000..c9012c9 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/entities/.part-00007-642beb72-4230-41ef-b60a-b811933fa8a9-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/entities/.part-00007-77325026-0538-440c-8df0-2d49aa067deb-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/entities/.part-00007-77325026-0538-440c-8df0-2d49aa067deb-c000.snappy.parquet.crc new file mode 100644 index 0000000..83fe129 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/entities/.part-00007-77325026-0538-440c-8df0-2d49aa067deb-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/entities/.part-00007-ac2f1ac0-a676-4934-bf9f-d6cc54d7ae01-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/entities/.part-00007-ac2f1ac0-a676-4934-bf9f-d6cc54d7ae01-c000.snappy.parquet.crc new file mode 100644 index 0000000..4be6cfa Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/entities/.part-00007-ac2f1ac0-a676-4934-bf9f-d6cc54d7ae01-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/entities/_delta_log/.00000000000000000000.crc.crc b/tests/data/uniprot_archaea/uniprot_db/entities/_delta_log/.00000000000000000000.crc.crc new file mode 100644 index 0000000..a8c830e Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/entities/_delta_log/.00000000000000000000.crc.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/entities/_delta_log/.00000000000000000000.json.crc b/tests/data/uniprot_archaea/uniprot_db/entities/_delta_log/.00000000000000000000.json.crc new file mode 100644 index 0000000..7275deb Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/entities/_delta_log/.00000000000000000000.json.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/entities/_delta_log/.00000000000000000001.crc.crc b/tests/data/uniprot_archaea/uniprot_db/entities/_delta_log/.00000000000000000001.crc.crc new file mode 100644 index 0000000..814225f Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/entities/_delta_log/.00000000000000000001.crc.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/entities/_delta_log/.00000000000000000001.json.crc b/tests/data/uniprot_archaea/uniprot_db/entities/_delta_log/.00000000000000000001.json.crc new file mode 100644 index 0000000..1e1ff9f Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/entities/_delta_log/.00000000000000000001.json.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/entities/_delta_log/.00000000000000000002.crc.crc b/tests/data/uniprot_archaea/uniprot_db/entities/_delta_log/.00000000000000000002.crc.crc new file mode 100644 index 0000000..eab6c64 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/entities/_delta_log/.00000000000000000002.crc.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/entities/_delta_log/.00000000000000000002.json.crc b/tests/data/uniprot_archaea/uniprot_db/entities/_delta_log/.00000000000000000002.json.crc new file mode 100644 index 0000000..419a93b Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/entities/_delta_log/.00000000000000000002.json.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/entities/_delta_log/.00000000000000000003.crc.crc b/tests/data/uniprot_archaea/uniprot_db/entities/_delta_log/.00000000000000000003.crc.crc new file mode 100644 index 0000000..bffe2dc Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/entities/_delta_log/.00000000000000000003.crc.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/entities/_delta_log/.00000000000000000003.json.crc b/tests/data/uniprot_archaea/uniprot_db/entities/_delta_log/.00000000000000000003.json.crc new file mode 100644 index 0000000..cbf8f40 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/entities/_delta_log/.00000000000000000003.json.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/entities/_delta_log/.00000000000000000004.crc.crc b/tests/data/uniprot_archaea/uniprot_db/entities/_delta_log/.00000000000000000004.crc.crc new file mode 100644 index 0000000..907528b Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/entities/_delta_log/.00000000000000000004.crc.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/entities/_delta_log/.00000000000000000004.json.crc b/tests/data/uniprot_archaea/uniprot_db/entities/_delta_log/.00000000000000000004.json.crc new file mode 100644 index 0000000..fa2def7 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/entities/_delta_log/.00000000000000000004.json.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/entities/_delta_log/00000000000000000000.crc b/tests/data/uniprot_archaea/uniprot_db/entities/_delta_log/00000000000000000000.crc new file mode 100644 index 0000000..6da808c --- /dev/null +++ b/tests/data/uniprot_archaea/uniprot_db/entities/_delta_log/00000000000000000000.crc @@ -0,0 +1 @@ +{"txnId":"867e3630-44f6-4d36-8161-d2298058c307","tableSizeBytes":0,"numFiles":0,"numMetadata":1,"numProtocol":1,"setTransactions":[],"domainMetadata":[],"metadata":{"id":"458e6c0b-6b24-46fe-be71-c274092356e3","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[]}","partitionColumns":[],"configuration":{},"createdTime":1766549211931},"protocol":{"minReaderVersion":1,"minWriterVersion":2},"allFiles":[]} diff --git a/tests/data/uniprot_archaea/uniprot_db/entities/_delta_log/00000000000000000000.json b/tests/data/uniprot_archaea/uniprot_db/entities/_delta_log/00000000000000000000.json new file mode 100644 index 0000000..71ca194 --- /dev/null +++ b/tests/data/uniprot_archaea/uniprot_db/entities/_delta_log/00000000000000000000.json @@ -0,0 +1,3 @@ +{"commitInfo":{"timestamp":1766549212032,"operation":"CREATE TABLE","operationParameters":{"partitionBy":"[]","clusterBy":"[]","description":null,"isManaged":"false","properties":"{}"},"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{},"engineInfo":"Apache-Spark/4.0.1 Delta-Lake/4.0.0","txnId":"867e3630-44f6-4d36-8161-d2298058c307"}} +{"metaData":{"id":"458e6c0b-6b24-46fe-be71-c274092356e3","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[]}","partitionColumns":[],"configuration":{},"createdTime":1766549211931}} +{"protocol":{"minReaderVersion":1,"minWriterVersion":2}} diff --git a/tests/data/uniprot_archaea/uniprot_db/entities/_delta_log/00000000000000000001.crc b/tests/data/uniprot_archaea/uniprot_db/entities/_delta_log/00000000000000000001.crc new file mode 100644 index 0000000..4669c7a --- /dev/null +++ b/tests/data/uniprot_archaea/uniprot_db/entities/_delta_log/00000000000000000001.crc @@ -0,0 +1 @@ +{"txnId":"ea8ee5d3-0255-499a-b857-0aba50e4de53","tableSizeBytes":233790,"numFiles":8,"numMetadata":1,"numProtocol":1,"setTransactions":[],"domainMetadata":[],"metadata":{"id":"458e6c0b-6b24-46fe-be71-c274092356e3","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"entity_id\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"entity_type\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"data_source\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"created\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"updated\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"version\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"uniprot_created\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"uniprot_modified\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{},"createdTime":1766549211931},"protocol":{"minReaderVersion":1,"minWriterVersion":2}} diff --git a/tests/data/uniprot_archaea/uniprot_db/entities/_delta_log/00000000000000000001.json b/tests/data/uniprot_archaea/uniprot_db/entities/_delta_log/00000000000000000001.json new file mode 100644 index 0000000..98687e4 --- /dev/null +++ b/tests/data/uniprot_archaea/uniprot_db/entities/_delta_log/00000000000000000001.json @@ -0,0 +1,10 @@ +{"commitInfo":{"timestamp":1766549218901,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":0,"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{"numFiles":"8","numOutputRows":"5000","numOutputBytes":"233790"},"engineInfo":"Apache-Spark/4.0.1 Delta-Lake/4.0.0","txnId":"ea8ee5d3-0255-499a-b857-0aba50e4de53"}} +{"metaData":{"id":"458e6c0b-6b24-46fe-be71-c274092356e3","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"entity_id\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"entity_type\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"data_source\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"created\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"updated\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"version\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"uniprot_created\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"uniprot_modified\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{},"createdTime":1766549211931}} +{"add":{"path":"part-00000-df990575-6363-490d-8af4-6381c1fd05de-c000.snappy.parquet","partitionValues":{},"size":29133,"modificationTime":1766549218814,"dataChange":true,"stats":"{\"numRecords\":625,\"minValues\":{\"entity_id\":\"cdm_prot_00036120-577c-5bbc-ab91\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"100\",\"uniprot_created\":\"1990-04-01\",\"uniprot_modified\":\"2024-11-27\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffa4eefe-b062-5f26-8f77\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"99\",\"uniprot_created\":\"2025-06-18\",\"uniprot_modified\":\"2025-06-18\"},\"nullCount\":{\"entity_id\":0,\"entity_type\":0,\"data_source\":0,\"created\":0,\"updated\":0,\"version\":0,\"uniprot_created\":0,\"uniprot_modified\":0}}"}} +{"add":{"path":"part-00001-94ad28aa-5ae5-48c4-b30e-a2a33529c4d4-c000.snappy.parquet","partitionValues":{},"size":29218,"modificationTime":1766549218814,"dataChange":true,"stats":"{\"numRecords\":625,\"minValues\":{\"entity_id\":\"cdm_prot_000e0a71-14e8-5d5e-8144\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"100\",\"uniprot_created\":\"1990-01-01\",\"uniprot_modified\":\"2024-07-24\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffed2e1d-fe14-5556-b067\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"99\",\"uniprot_created\":\"2025-04-09\",\"uniprot_modified\":\"2025-06-18\"},\"nullCount\":{\"entity_id\":0,\"entity_type\":0,\"data_source\":0,\"created\":0,\"updated\":0,\"version\":0,\"uniprot_created\":0,\"uniprot_modified\":0}}"}} +{"add":{"path":"part-00002-9433b0fe-4856-48e3-b830-2f9cc2e1c4ba-c000.snappy.parquet","partitionValues":{},"size":29279,"modificationTime":1766549218814,"dataChange":true,"stats":"{\"numRecords\":625,\"minValues\":{\"entity_id\":\"cdm_prot_00030730-9147-5351-b68c\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"100\",\"uniprot_created\":\"1986-07-21\",\"uniprot_modified\":\"2023-06-28\"},\"maxValues\":{\"entity_id\":\"cdm_prot_fde84f0d-6520-56ad-b61f\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"99\",\"uniprot_created\":\"2024-11-27\",\"uniprot_modified\":\"2025-06-18\"},\"nullCount\":{\"entity_id\":0,\"entity_type\":0,\"data_source\":0,\"created\":0,\"updated\":0,\"version\":0,\"uniprot_created\":0,\"uniprot_modified\":0}}"}} +{"add":{"path":"part-00003-dd2f1a5e-4114-41c8-b2e2-72edba0e2d16-c000.snappy.parquet","partitionValues":{},"size":29273,"modificationTime":1766549218814,"dataChange":true,"stats":"{\"numRecords\":625,\"minValues\":{\"entity_id\":\"cdm_prot_004f59e4-6b11-58ae-a6ff\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"100\",\"uniprot_created\":\"1991-05-01\",\"uniprot_modified\":\"2024-03-27\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ff2dba94-2092-57d0-9041\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"99\",\"uniprot_created\":\"2023-05-03\",\"uniprot_modified\":\"2025-06-18\"},\"nullCount\":{\"entity_id\":0,\"entity_type\":0,\"data_source\":0,\"created\":0,\"updated\":0,\"version\":0,\"uniprot_created\":0,\"uniprot_modified\":0}}"}} +{"add":{"path":"part-00004-7dd0a75c-fc02-4a91-b584-4fb3805c6851-c000.snappy.parquet","partitionValues":{},"size":29227,"modificationTime":1766549218814,"dataChange":true,"stats":"{\"numRecords\":625,\"minValues\":{\"entity_id\":\"cdm_prot_008bae16-5b42-5bfd-a15c\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"100\",\"uniprot_created\":\"1986-07-21\",\"uniprot_modified\":\"2024-05-29\"},\"maxValues\":{\"entity_id\":\"cdm_prot_fedf6b23-d61c-56dc-b256\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"99\",\"uniprot_created\":\"2025-04-09\",\"uniprot_modified\":\"2025-06-18\"},\"nullCount\":{\"entity_id\":0,\"entity_type\":0,\"data_source\":0,\"created\":0,\"updated\":0,\"version\":0,\"uniprot_created\":0,\"uniprot_modified\":0}}"}} +{"add":{"path":"part-00005-3b0745b2-9687-4d0b-b7af-56652f5b79d3-c000.snappy.parquet","partitionValues":{},"size":29339,"modificationTime":1766549218814,"dataChange":true,"stats":"{\"numRecords\":625,\"minValues\":{\"entity_id\":\"cdm_prot_00134cb8-e497-5d40-b662\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"100\",\"uniprot_created\":\"1986-07-21\",\"uniprot_modified\":\"2023-06-28\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ff5f637d-e74b-5494-9ed8\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"99\",\"uniprot_created\":\"2025-06-18\",\"uniprot_modified\":\"2025-06-18\"},\"nullCount\":{\"entity_id\":0,\"entity_type\":0,\"data_source\":0,\"created\":0,\"updated\":0,\"version\":0,\"uniprot_created\":0,\"uniprot_modified\":0}}"}} +{"add":{"path":"part-00006-902c69dd-2364-4c62-aed8-bab1f9d2bf5a-c000.snappy.parquet","partitionValues":{},"size":29134,"modificationTime":1766549218814,"dataChange":true,"stats":"{\"numRecords\":625,\"minValues\":{\"entity_id\":\"cdm_prot_003df674-48d1-588a-b194\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"100\",\"uniprot_created\":\"1989-07-01\",\"uniprot_modified\":\"2024-03-27\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffeae0f1-9785-5eaf-ad23\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"99\",\"uniprot_created\":\"2024-07-24\",\"uniprot_modified\":\"2025-06-18\"},\"nullCount\":{\"entity_id\":0,\"entity_type\":0,\"data_source\":0,\"created\":0,\"updated\":0,\"version\":0,\"uniprot_created\":0,\"uniprot_modified\":0}}"}} +{"add":{"path":"part-00007-1bc667d7-2f3a-4ece-a4a2-162e8a3f33f9-c000.snappy.parquet","partitionValues":{},"size":29187,"modificationTime":1766549218814,"dataChange":true,"stats":"{\"numRecords\":625,\"minValues\":{\"entity_id\":\"cdm_prot_00625fdf-0992-5594-a800\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"100\",\"uniprot_created\":\"1988-08-01\",\"uniprot_modified\":\"2024-07-24\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffcf5789-8bfa-5ca0-914e\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"99\",\"uniprot_created\":\"2025-02-05\",\"uniprot_modified\":\"2025-06-18\"},\"nullCount\":{\"entity_id\":0,\"entity_type\":0,\"data_source\":0,\"created\":0,\"updated\":0,\"version\":0,\"uniprot_created\":0,\"uniprot_modified\":0}}"}} diff --git a/tests/data/uniprot_archaea/uniprot_db/entities/_delta_log/00000000000000000002.crc b/tests/data/uniprot_archaea/uniprot_db/entities/_delta_log/00000000000000000002.crc new file mode 100644 index 0000000..ed911d6 --- /dev/null +++ b/tests/data/uniprot_archaea/uniprot_db/entities/_delta_log/00000000000000000002.crc @@ -0,0 +1 @@ +{"txnId":"827710b0-dc20-464d-a919-4988f1ed8662","tableSizeBytes":467219,"numFiles":16,"numMetadata":1,"numProtocol":1,"setTransactions":[],"domainMetadata":[],"metadata":{"id":"458e6c0b-6b24-46fe-be71-c274092356e3","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"entity_id\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"entity_type\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"data_source\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"created\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"updated\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"version\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"uniprot_created\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"uniprot_modified\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{},"createdTime":1766549211931},"protocol":{"minReaderVersion":1,"minWriterVersion":2},"allFiles":[{"path":"part-00005-00c6be40-9225-40c1-a7c9-3f800a8f9280-c000.snappy.parquet","partitionValues":{},"size":29199,"modificationTime":1766549224077,"dataChange":false,"stats":"{\"numRecords\":625,\"minValues\":{\"entity_id\":\"cdm_prot_00957fe6-8bfd-5ecd-9b08\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"100\",\"uniprot_created\":\"1989-10-01\",\"uniprot_modified\":\"2024-10-02\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffdecc1a-6ee9-59b2-9c49\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"99\",\"uniprot_created\":\"2024-03-27\",\"uniprot_modified\":\"2025-06-18\"},\"nullCount\":{\"entity_id\":0,\"entity_type\":0,\"data_source\":0,\"created\":0,\"updated\":0,\"version\":0,\"uniprot_created\":0,\"uniprot_modified\":0}}"},{"path":"part-00001-94ad28aa-5ae5-48c4-b30e-a2a33529c4d4-c000.snappy.parquet","partitionValues":{},"size":29218,"modificationTime":1766549218814,"dataChange":false,"stats":"{\"numRecords\":625,\"minValues\":{\"entity_id\":\"cdm_prot_000e0a71-14e8-5d5e-8144\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"100\",\"uniprot_created\":\"1990-01-01\",\"uniprot_modified\":\"2024-07-24\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffed2e1d-fe14-5556-b067\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"99\",\"uniprot_created\":\"2025-04-09\",\"uniprot_modified\":\"2025-06-18\"},\"nullCount\":{\"entity_id\":0,\"entity_type\":0,\"data_source\":0,\"created\":0,\"updated\":0,\"version\":0,\"uniprot_created\":0,\"uniprot_modified\":0}}"},{"path":"part-00004-7dd0a75c-fc02-4a91-b584-4fb3805c6851-c000.snappy.parquet","partitionValues":{},"size":29227,"modificationTime":1766549218814,"dataChange":false,"stats":"{\"numRecords\":625,\"minValues\":{\"entity_id\":\"cdm_prot_008bae16-5b42-5bfd-a15c\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"100\",\"uniprot_created\":\"1986-07-21\",\"uniprot_modified\":\"2024-05-29\"},\"maxValues\":{\"entity_id\":\"cdm_prot_fedf6b23-d61c-56dc-b256\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"99\",\"uniprot_created\":\"2025-04-09\",\"uniprot_modified\":\"2025-06-18\"},\"nullCount\":{\"entity_id\":0,\"entity_type\":0,\"data_source\":0,\"created\":0,\"updated\":0,\"version\":0,\"uniprot_created\":0,\"uniprot_modified\":0}}"},{"path":"part-00002-9433b0fe-4856-48e3-b830-2f9cc2e1c4ba-c000.snappy.parquet","partitionValues":{},"size":29279,"modificationTime":1766549218814,"dataChange":false,"stats":"{\"numRecords\":625,\"minValues\":{\"entity_id\":\"cdm_prot_00030730-9147-5351-b68c\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"100\",\"uniprot_created\":\"1986-07-21\",\"uniprot_modified\":\"2023-06-28\"},\"maxValues\":{\"entity_id\":\"cdm_prot_fde84f0d-6520-56ad-b61f\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"99\",\"uniprot_created\":\"2024-11-27\",\"uniprot_modified\":\"2025-06-18\"},\"nullCount\":{\"entity_id\":0,\"entity_type\":0,\"data_source\":0,\"created\":0,\"updated\":0,\"version\":0,\"uniprot_created\":0,\"uniprot_modified\":0}}"},{"path":"part-00000-26253546-0abd-4ed6-8c55-17dfef347666-c000.snappy.parquet","partitionValues":{},"size":29251,"modificationTime":1766549224066,"dataChange":false,"stats":"{\"numRecords\":625,\"minValues\":{\"entity_id\":\"cdm_prot_004539b1-91ae-513e-bef9\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"100\",\"uniprot_created\":\"1988-01-01\",\"uniprot_modified\":\"2024-07-24\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ff289ac2-6f01-59a2-8290\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"99\",\"uniprot_created\":\"2024-01-24\",\"uniprot_modified\":\"2025-06-18\"},\"nullCount\":{\"entity_id\":0,\"entity_type\":0,\"data_source\":0,\"created\":0,\"updated\":0,\"version\":0,\"uniprot_created\":0,\"uniprot_modified\":0}}"},{"path":"part-00001-006ac691-2d26-4839-932d-bf78580789fb-c000.snappy.parquet","partitionValues":{},"size":29022,"modificationTime":1766549224083,"dataChange":false,"stats":"{\"numRecords\":625,\"minValues\":{\"entity_id\":\"cdm_prot_00df9ef5-d9b4-5654-9d2a\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"100\",\"uniprot_created\":\"1992-12-01\",\"uniprot_modified\":\"2024-03-27\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffa14ace-ea75-5cc6-86f2\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"99\",\"uniprot_created\":\"2024-11-27\",\"uniprot_modified\":\"2025-06-18\"},\"nullCount\":{\"entity_id\":0,\"entity_type\":0,\"data_source\":0,\"created\":0,\"updated\":0,\"version\":0,\"uniprot_created\":0,\"uniprot_modified\":0}}"},{"path":"part-00004-889473d1-a49a-4d1a-9822-bc64152feb8a-c000.snappy.parquet","partitionValues":{},"size":29146,"modificationTime":1766549224086,"dataChange":false,"stats":"{\"numRecords\":625,\"minValues\":{\"entity_id\":\"cdm_prot_007c34e9-4de0-5736-985f\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"100\",\"uniprot_created\":\"1988-01-01\",\"uniprot_modified\":\"2024-05-29\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffc892c3-0651-5e3c-998a\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"99\",\"uniprot_created\":\"2022-02-23\",\"uniprot_modified\":\"2025-06-18\"},\"nullCount\":{\"entity_id\":0,\"entity_type\":0,\"data_source\":0,\"created\":0,\"updated\":0,\"version\":0,\"uniprot_created\":0,\"uniprot_modified\":0}}"},{"path":"part-00007-77325026-0538-440c-8df0-2d49aa067deb-c000.snappy.parquet","partitionValues":{},"size":29149,"modificationTime":1766549224083,"dataChange":false,"stats":"{\"numRecords\":625,\"minValues\":{\"entity_id\":\"cdm_prot_001ba1b3-667c-5321-9298\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"100\",\"uniprot_created\":\"1990-01-01\",\"uniprot_modified\":\"2024-11-27\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ff86ebef-56b7-5cf7-bf3e\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"99\",\"uniprot_created\":\"2024-05-29\",\"uniprot_modified\":\"2025-06-18\"},\"nullCount\":{\"entity_id\":0,\"entity_type\":0,\"data_source\":0,\"created\":0,\"updated\":0,\"version\":0,\"uniprot_created\":0,\"uniprot_modified\":0}}"},{"path":"part-00000-df990575-6363-490d-8af4-6381c1fd05de-c000.snappy.parquet","partitionValues":{},"size":29133,"modificationTime":1766549218814,"dataChange":false,"stats":"{\"numRecords\":625,\"minValues\":{\"entity_id\":\"cdm_prot_00036120-577c-5bbc-ab91\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"100\",\"uniprot_created\":\"1990-04-01\",\"uniprot_modified\":\"2024-11-27\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffa4eefe-b062-5f26-8f77\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"99\",\"uniprot_created\":\"2025-06-18\",\"uniprot_modified\":\"2025-06-18\"},\"nullCount\":{\"entity_id\":0,\"entity_type\":0,\"data_source\":0,\"created\":0,\"updated\":0,\"version\":0,\"uniprot_created\":0,\"uniprot_modified\":0}}"},{"path":"part-00005-3b0745b2-9687-4d0b-b7af-56652f5b79d3-c000.snappy.parquet","partitionValues":{},"size":29339,"modificationTime":1766549218814,"dataChange":false,"stats":"{\"numRecords\":625,\"minValues\":{\"entity_id\":\"cdm_prot_00134cb8-e497-5d40-b662\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"100\",\"uniprot_created\":\"1986-07-21\",\"uniprot_modified\":\"2023-06-28\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ff5f637d-e74b-5494-9ed8\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"99\",\"uniprot_created\":\"2025-06-18\",\"uniprot_modified\":\"2025-06-18\"},\"nullCount\":{\"entity_id\":0,\"entity_type\":0,\"data_source\":0,\"created\":0,\"updated\":0,\"version\":0,\"uniprot_created\":0,\"uniprot_modified\":0}}"},{"path":"part-00006-902c69dd-2364-4c62-aed8-bab1f9d2bf5a-c000.snappy.parquet","partitionValues":{},"size":29134,"modificationTime":1766549218814,"dataChange":false,"stats":"{\"numRecords\":625,\"minValues\":{\"entity_id\":\"cdm_prot_003df674-48d1-588a-b194\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"100\",\"uniprot_created\":\"1989-07-01\",\"uniprot_modified\":\"2024-03-27\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffeae0f1-9785-5eaf-ad23\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"99\",\"uniprot_created\":\"2024-07-24\",\"uniprot_modified\":\"2025-06-18\"},\"nullCount\":{\"entity_id\":0,\"entity_type\":0,\"data_source\":0,\"created\":0,\"updated\":0,\"version\":0,\"uniprot_created\":0,\"uniprot_modified\":0}}"},{"path":"part-00006-cad0b36c-46f2-49e7-9889-7f4a1b44cbfb-c000.snappy.parquet","partitionValues":{},"size":29226,"modificationTime":1766549224078,"dataChange":false,"stats":"{\"numRecords\":625,\"minValues\":{\"entity_id\":\"cdm_prot_0102d35c-20a5-5f8d-8175\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"100\",\"uniprot_created\":\"1989-10-01\",\"uniprot_modified\":\"2024-03-27\"},\"maxValues\":{\"entity_id\":\"cdm_prot_fffe9d9f-5871-5041-83af\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"99\",\"uniprot_created\":\"2025-02-05\",\"uniprot_modified\":\"2025-06-18\"},\"nullCount\":{\"entity_id\":0,\"entity_type\":0,\"data_source\":0,\"created\":0,\"updated\":0,\"version\":0,\"uniprot_created\":0,\"uniprot_modified\":0}}"},{"path":"part-00002-a3c8d634-57f3-49a6-a095-689fe9930cc4-c000.snappy.parquet","partitionValues":{},"size":29103,"modificationTime":1766549224077,"dataChange":false,"stats":"{\"numRecords\":625,\"minValues\":{\"entity_id\":\"cdm_prot_00546ee8-4f73-5c85-8b07\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"100\",\"uniprot_created\":\"1993-07-01\",\"uniprot_modified\":\"2024-11-27\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffce40a0-b890-5e5f-b455\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"99\",\"uniprot_created\":\"2025-06-18\",\"uniprot_modified\":\"2025-06-18\"},\"nullCount\":{\"entity_id\":0,\"entity_type\":0,\"data_source\":0,\"created\":0,\"updated\":0,\"version\":0,\"uniprot_created\":0,\"uniprot_modified\":0}}"},{"path":"part-00003-dd2f1a5e-4114-41c8-b2e2-72edba0e2d16-c000.snappy.parquet","partitionValues":{},"size":29273,"modificationTime":1766549218814,"dataChange":false,"stats":"{\"numRecords\":625,\"minValues\":{\"entity_id\":\"cdm_prot_004f59e4-6b11-58ae-a6ff\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"100\",\"uniprot_created\":\"1991-05-01\",\"uniprot_modified\":\"2024-03-27\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ff2dba94-2092-57d0-9041\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"99\",\"uniprot_created\":\"2023-05-03\",\"uniprot_modified\":\"2025-06-18\"},\"nullCount\":{\"entity_id\":0,\"entity_type\":0,\"data_source\":0,\"created\":0,\"updated\":0,\"version\":0,\"uniprot_created\":0,\"uniprot_modified\":0}}"},{"path":"part-00003-0b4ced07-8b4e-4c9a-b8b4-fcd3b6d39a5c-c000.snappy.parquet","partitionValues":{},"size":29333,"modificationTime":1766549224078,"dataChange":false,"stats":"{\"numRecords\":625,\"minValues\":{\"entity_id\":\"cdm_prot_000c7e85-7dff-527d-b6db\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"100\",\"uniprot_created\":\"1988-08-01\",\"uniprot_modified\":\"2024-11-27\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffb46e82-abfa-5122-818f\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"99\",\"uniprot_created\":\"2023-09-13\",\"uniprot_modified\":\"2025-06-18\"},\"nullCount\":{\"entity_id\":0,\"entity_type\":0,\"data_source\":0,\"created\":0,\"updated\":0,\"version\":0,\"uniprot_created\":0,\"uniprot_modified\":0}}"},{"path":"part-00007-1bc667d7-2f3a-4ece-a4a2-162e8a3f33f9-c000.snappy.parquet","partitionValues":{},"size":29187,"modificationTime":1766549218814,"dataChange":false,"stats":"{\"numRecords\":625,\"minValues\":{\"entity_id\":\"cdm_prot_00625fdf-0992-5594-a800\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"100\",\"uniprot_created\":\"1988-08-01\",\"uniprot_modified\":\"2024-07-24\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffcf5789-8bfa-5ca0-914e\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"99\",\"uniprot_created\":\"2025-02-05\",\"uniprot_modified\":\"2025-06-18\"},\"nullCount\":{\"entity_id\":0,\"entity_type\":0,\"data_source\":0,\"created\":0,\"updated\":0,\"version\":0,\"uniprot_created\":0,\"uniprot_modified\":0}}"}]} diff --git a/tests/data/uniprot_archaea/uniprot_db/entities/_delta_log/00000000000000000002.json b/tests/data/uniprot_archaea/uniprot_db/entities/_delta_log/00000000000000000002.json new file mode 100644 index 0000000..b720366 --- /dev/null +++ b/tests/data/uniprot_archaea/uniprot_db/entities/_delta_log/00000000000000000002.json @@ -0,0 +1,9 @@ +{"commitInfo":{"timestamp":1766549224090,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":1,"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{"numFiles":"8","numOutputRows":"5000","numOutputBytes":"233429"},"engineInfo":"Apache-Spark/4.0.1 Delta-Lake/4.0.0","txnId":"827710b0-dc20-464d-a919-4988f1ed8662"}} +{"add":{"path":"part-00000-26253546-0abd-4ed6-8c55-17dfef347666-c000.snappy.parquet","partitionValues":{},"size":29251,"modificationTime":1766549224066,"dataChange":true,"stats":"{\"numRecords\":625,\"minValues\":{\"entity_id\":\"cdm_prot_004539b1-91ae-513e-bef9\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"100\",\"uniprot_created\":\"1988-01-01\",\"uniprot_modified\":\"2024-07-24\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ff289ac2-6f01-59a2-8290\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"99\",\"uniprot_created\":\"2024-01-24\",\"uniprot_modified\":\"2025-06-18\"},\"nullCount\":{\"entity_id\":0,\"entity_type\":0,\"data_source\":0,\"created\":0,\"updated\":0,\"version\":0,\"uniprot_created\":0,\"uniprot_modified\":0}}"}} +{"add":{"path":"part-00001-006ac691-2d26-4839-932d-bf78580789fb-c000.snappy.parquet","partitionValues":{},"size":29022,"modificationTime":1766549224083,"dataChange":true,"stats":"{\"numRecords\":625,\"minValues\":{\"entity_id\":\"cdm_prot_00df9ef5-d9b4-5654-9d2a\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"100\",\"uniprot_created\":\"1992-12-01\",\"uniprot_modified\":\"2024-03-27\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffa14ace-ea75-5cc6-86f2\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"99\",\"uniprot_created\":\"2024-11-27\",\"uniprot_modified\":\"2025-06-18\"},\"nullCount\":{\"entity_id\":0,\"entity_type\":0,\"data_source\":0,\"created\":0,\"updated\":0,\"version\":0,\"uniprot_created\":0,\"uniprot_modified\":0}}"}} +{"add":{"path":"part-00002-a3c8d634-57f3-49a6-a095-689fe9930cc4-c000.snappy.parquet","partitionValues":{},"size":29103,"modificationTime":1766549224077,"dataChange":true,"stats":"{\"numRecords\":625,\"minValues\":{\"entity_id\":\"cdm_prot_00546ee8-4f73-5c85-8b07\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"100\",\"uniprot_created\":\"1993-07-01\",\"uniprot_modified\":\"2024-11-27\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffce40a0-b890-5e5f-b455\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"99\",\"uniprot_created\":\"2025-06-18\",\"uniprot_modified\":\"2025-06-18\"},\"nullCount\":{\"entity_id\":0,\"entity_type\":0,\"data_source\":0,\"created\":0,\"updated\":0,\"version\":0,\"uniprot_created\":0,\"uniprot_modified\":0}}"}} +{"add":{"path":"part-00003-0b4ced07-8b4e-4c9a-b8b4-fcd3b6d39a5c-c000.snappy.parquet","partitionValues":{},"size":29333,"modificationTime":1766549224078,"dataChange":true,"stats":"{\"numRecords\":625,\"minValues\":{\"entity_id\":\"cdm_prot_000c7e85-7dff-527d-b6db\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"100\",\"uniprot_created\":\"1988-08-01\",\"uniprot_modified\":\"2024-11-27\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffb46e82-abfa-5122-818f\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"99\",\"uniprot_created\":\"2023-09-13\",\"uniprot_modified\":\"2025-06-18\"},\"nullCount\":{\"entity_id\":0,\"entity_type\":0,\"data_source\":0,\"created\":0,\"updated\":0,\"version\":0,\"uniprot_created\":0,\"uniprot_modified\":0}}"}} +{"add":{"path":"part-00004-889473d1-a49a-4d1a-9822-bc64152feb8a-c000.snappy.parquet","partitionValues":{},"size":29146,"modificationTime":1766549224086,"dataChange":true,"stats":"{\"numRecords\":625,\"minValues\":{\"entity_id\":\"cdm_prot_007c34e9-4de0-5736-985f\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"100\",\"uniprot_created\":\"1988-01-01\",\"uniprot_modified\":\"2024-05-29\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffc892c3-0651-5e3c-998a\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"99\",\"uniprot_created\":\"2022-02-23\",\"uniprot_modified\":\"2025-06-18\"},\"nullCount\":{\"entity_id\":0,\"entity_type\":0,\"data_source\":0,\"created\":0,\"updated\":0,\"version\":0,\"uniprot_created\":0,\"uniprot_modified\":0}}"}} +{"add":{"path":"part-00005-00c6be40-9225-40c1-a7c9-3f800a8f9280-c000.snappy.parquet","partitionValues":{},"size":29199,"modificationTime":1766549224077,"dataChange":true,"stats":"{\"numRecords\":625,\"minValues\":{\"entity_id\":\"cdm_prot_00957fe6-8bfd-5ecd-9b08\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"100\",\"uniprot_created\":\"1989-10-01\",\"uniprot_modified\":\"2024-10-02\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffdecc1a-6ee9-59b2-9c49\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"99\",\"uniprot_created\":\"2024-03-27\",\"uniprot_modified\":\"2025-06-18\"},\"nullCount\":{\"entity_id\":0,\"entity_type\":0,\"data_source\":0,\"created\":0,\"updated\":0,\"version\":0,\"uniprot_created\":0,\"uniprot_modified\":0}}"}} +{"add":{"path":"part-00006-cad0b36c-46f2-49e7-9889-7f4a1b44cbfb-c000.snappy.parquet","partitionValues":{},"size":29226,"modificationTime":1766549224078,"dataChange":true,"stats":"{\"numRecords\":625,\"minValues\":{\"entity_id\":\"cdm_prot_0102d35c-20a5-5f8d-8175\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"100\",\"uniprot_created\":\"1989-10-01\",\"uniprot_modified\":\"2024-03-27\"},\"maxValues\":{\"entity_id\":\"cdm_prot_fffe9d9f-5871-5041-83af\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"99\",\"uniprot_created\":\"2025-02-05\",\"uniprot_modified\":\"2025-06-18\"},\"nullCount\":{\"entity_id\":0,\"entity_type\":0,\"data_source\":0,\"created\":0,\"updated\":0,\"version\":0,\"uniprot_created\":0,\"uniprot_modified\":0}}"}} +{"add":{"path":"part-00007-77325026-0538-440c-8df0-2d49aa067deb-c000.snappy.parquet","partitionValues":{},"size":29149,"modificationTime":1766549224083,"dataChange":true,"stats":"{\"numRecords\":625,\"minValues\":{\"entity_id\":\"cdm_prot_001ba1b3-667c-5321-9298\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"100\",\"uniprot_created\":\"1990-01-01\",\"uniprot_modified\":\"2024-11-27\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ff86ebef-56b7-5cf7-bf3e\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"99\",\"uniprot_created\":\"2024-05-29\",\"uniprot_modified\":\"2025-06-18\"},\"nullCount\":{\"entity_id\":0,\"entity_type\":0,\"data_source\":0,\"created\":0,\"updated\":0,\"version\":0,\"uniprot_created\":0,\"uniprot_modified\":0}}"}} diff --git a/tests/data/uniprot_archaea/uniprot_db/entities/_delta_log/00000000000000000003.crc b/tests/data/uniprot_archaea/uniprot_db/entities/_delta_log/00000000000000000003.crc new file mode 100644 index 0000000..698ed34 --- /dev/null +++ b/tests/data/uniprot_archaea/uniprot_db/entities/_delta_log/00000000000000000003.crc @@ -0,0 +1 @@ +{"txnId":"823e6093-3ade-4f50-a3cf-01577111bdef","tableSizeBytes":699014,"numFiles":24,"numMetadata":1,"numProtocol":1,"setTransactions":[],"domainMetadata":[],"metadata":{"id":"458e6c0b-6b24-46fe-be71-c274092356e3","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"entity_id\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"entity_type\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"data_source\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"created\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"updated\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"version\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"uniprot_created\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"uniprot_modified\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{},"createdTime":1766549211931},"protocol":{"minReaderVersion":1,"minWriterVersion":2},"allFiles":[{"path":"part-00001-94ad28aa-5ae5-48c4-b30e-a2a33529c4d4-c000.snappy.parquet","partitionValues":{},"size":29218,"modificationTime":1766549218814,"dataChange":false,"stats":"{\"numRecords\":625,\"minValues\":{\"entity_id\":\"cdm_prot_000e0a71-14e8-5d5e-8144\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"100\",\"uniprot_created\":\"1990-01-01\",\"uniprot_modified\":\"2024-07-24\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffed2e1d-fe14-5556-b067\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"99\",\"uniprot_created\":\"2025-04-09\",\"uniprot_modified\":\"2025-06-18\"},\"nullCount\":{\"entity_id\":0,\"entity_type\":0,\"data_source\":0,\"created\":0,\"updated\":0,\"version\":0,\"uniprot_created\":0,\"uniprot_modified\":0}}"},{"path":"part-00005-16c10944-46a4-4ffb-a8eb-c98f074c4533-c000.snappy.parquet","partitionValues":{},"size":28835,"modificationTime":1766549232977,"dataChange":false,"stats":"{\"numRecords\":625,\"minValues\":{\"entity_id\":\"cdm_prot_0013ba57-fc41-51cc-bc95\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"100\",\"uniprot_created\":\"1988-11-01\",\"uniprot_modified\":\"2025-02-05\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffe27896-0bd4-50b4-9cff\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"99\",\"uniprot_created\":\"2025-02-05\",\"uniprot_modified\":\"2025-06-18\"},\"nullCount\":{\"entity_id\":0,\"entity_type\":0,\"data_source\":0,\"created\":0,\"updated\":0,\"version\":0,\"uniprot_created\":0,\"uniprot_modified\":0}}"},{"path":"part-00002-e21f5f3e-ffef-441e-9416-cddfc7945b7d-c000.snappy.parquet","partitionValues":{},"size":28790,"modificationTime":1766549232984,"dataChange":false,"stats":"{\"numRecords\":625,\"minValues\":{\"entity_id\":\"cdm_prot_0061faab-bd88-562c-8a31\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"100\",\"uniprot_created\":\"1989-07-01\",\"uniprot_modified\":\"2024-03-27\"},\"maxValues\":{\"entity_id\":\"cdm_prot_fe9c77d9-be27-518e-8e0c\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"99\",\"uniprot_created\":\"2025-02-05\",\"uniprot_modified\":\"2025-06-18\"},\"nullCount\":{\"entity_id\":0,\"entity_type\":0,\"data_source\":0,\"created\":0,\"updated\":0,\"version\":0,\"uniprot_created\":0,\"uniprot_modified\":0}}"},{"path":"part-00004-7dd0a75c-fc02-4a91-b584-4fb3805c6851-c000.snappy.parquet","partitionValues":{},"size":29227,"modificationTime":1766549218814,"dataChange":false,"stats":"{\"numRecords\":625,\"minValues\":{\"entity_id\":\"cdm_prot_008bae16-5b42-5bfd-a15c\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"100\",\"uniprot_created\":\"1986-07-21\",\"uniprot_modified\":\"2024-05-29\"},\"maxValues\":{\"entity_id\":\"cdm_prot_fedf6b23-d61c-56dc-b256\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"99\",\"uniprot_created\":\"2025-04-09\",\"uniprot_modified\":\"2025-06-18\"},\"nullCount\":{\"entity_id\":0,\"entity_type\":0,\"data_source\":0,\"created\":0,\"updated\":0,\"version\":0,\"uniprot_created\":0,\"uniprot_modified\":0}}"},{"path":"part-00002-9433b0fe-4856-48e3-b830-2f9cc2e1c4ba-c000.snappy.parquet","partitionValues":{},"size":29279,"modificationTime":1766549218814,"dataChange":false,"stats":"{\"numRecords\":625,\"minValues\":{\"entity_id\":\"cdm_prot_00030730-9147-5351-b68c\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"100\",\"uniprot_created\":\"1986-07-21\",\"uniprot_modified\":\"2023-06-28\"},\"maxValues\":{\"entity_id\":\"cdm_prot_fde84f0d-6520-56ad-b61f\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"99\",\"uniprot_created\":\"2024-11-27\",\"uniprot_modified\":\"2025-06-18\"},\"nullCount\":{\"entity_id\":0,\"entity_type\":0,\"data_source\":0,\"created\":0,\"updated\":0,\"version\":0,\"uniprot_created\":0,\"uniprot_modified\":0}}"},{"path":"part-00000-26253546-0abd-4ed6-8c55-17dfef347666-c000.snappy.parquet","partitionValues":{},"size":29251,"modificationTime":1766549224066,"dataChange":false,"stats":"{\"numRecords\":625,\"minValues\":{\"entity_id\":\"cdm_prot_004539b1-91ae-513e-bef9\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"100\",\"uniprot_created\":\"1988-01-01\",\"uniprot_modified\":\"2024-07-24\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ff289ac2-6f01-59a2-8290\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"99\",\"uniprot_created\":\"2024-01-24\",\"uniprot_modified\":\"2025-06-18\"},\"nullCount\":{\"entity_id\":0,\"entity_type\":0,\"data_source\":0,\"created\":0,\"updated\":0,\"version\":0,\"uniprot_created\":0,\"uniprot_modified\":0}}"},{"path":"part-00007-77325026-0538-440c-8df0-2d49aa067deb-c000.snappy.parquet","partitionValues":{},"size":29149,"modificationTime":1766549224083,"dataChange":false,"stats":"{\"numRecords\":625,\"minValues\":{\"entity_id\":\"cdm_prot_001ba1b3-667c-5321-9298\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"100\",\"uniprot_created\":\"1990-01-01\",\"uniprot_modified\":\"2024-11-27\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ff86ebef-56b7-5cf7-bf3e\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"99\",\"uniprot_created\":\"2024-05-29\",\"uniprot_modified\":\"2025-06-18\"},\"nullCount\":{\"entity_id\":0,\"entity_type\":0,\"data_source\":0,\"created\":0,\"updated\":0,\"version\":0,\"uniprot_created\":0,\"uniprot_modified\":0}}"},{"path":"part-00000-df990575-6363-490d-8af4-6381c1fd05de-c000.snappy.parquet","partitionValues":{},"size":29133,"modificationTime":1766549218814,"dataChange":false,"stats":"{\"numRecords\":625,\"minValues\":{\"entity_id\":\"cdm_prot_00036120-577c-5bbc-ab91\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"100\",\"uniprot_created\":\"1990-04-01\",\"uniprot_modified\":\"2024-11-27\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffa4eefe-b062-5f26-8f77\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"99\",\"uniprot_created\":\"2025-06-18\",\"uniprot_modified\":\"2025-06-18\"},\"nullCount\":{\"entity_id\":0,\"entity_type\":0,\"data_source\":0,\"created\":0,\"updated\":0,\"version\":0,\"uniprot_created\":0,\"uniprot_modified\":0}}"},{"path":"part-00005-3b0745b2-9687-4d0b-b7af-56652f5b79d3-c000.snappy.parquet","partitionValues":{},"size":29339,"modificationTime":1766549218814,"dataChange":false,"stats":"{\"numRecords\":625,\"minValues\":{\"entity_id\":\"cdm_prot_00134cb8-e497-5d40-b662\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"100\",\"uniprot_created\":\"1986-07-21\",\"uniprot_modified\":\"2023-06-28\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ff5f637d-e74b-5494-9ed8\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"99\",\"uniprot_created\":\"2025-06-18\",\"uniprot_modified\":\"2025-06-18\"},\"nullCount\":{\"entity_id\":0,\"entity_type\":0,\"data_source\":0,\"created\":0,\"updated\":0,\"version\":0,\"uniprot_created\":0,\"uniprot_modified\":0}}"},{"path":"part-00006-902c69dd-2364-4c62-aed8-bab1f9d2bf5a-c000.snappy.parquet","partitionValues":{},"size":29134,"modificationTime":1766549218814,"dataChange":false,"stats":"{\"numRecords\":625,\"minValues\":{\"entity_id\":\"cdm_prot_003df674-48d1-588a-b194\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"100\",\"uniprot_created\":\"1989-07-01\",\"uniprot_modified\":\"2024-03-27\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffeae0f1-9785-5eaf-ad23\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"99\",\"uniprot_created\":\"2024-07-24\",\"uniprot_modified\":\"2025-06-18\"},\"nullCount\":{\"entity_id\":0,\"entity_type\":0,\"data_source\":0,\"created\":0,\"updated\":0,\"version\":0,\"uniprot_created\":0,\"uniprot_modified\":0}}"},{"path":"part-00002-a3c8d634-57f3-49a6-a095-689fe9930cc4-c000.snappy.parquet","partitionValues":{},"size":29103,"modificationTime":1766549224077,"dataChange":false,"stats":"{\"numRecords\":625,\"minValues\":{\"entity_id\":\"cdm_prot_00546ee8-4f73-5c85-8b07\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"100\",\"uniprot_created\":\"1993-07-01\",\"uniprot_modified\":\"2024-11-27\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffce40a0-b890-5e5f-b455\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"99\",\"uniprot_created\":\"2025-06-18\",\"uniprot_modified\":\"2025-06-18\"},\"nullCount\":{\"entity_id\":0,\"entity_type\":0,\"data_source\":0,\"created\":0,\"updated\":0,\"version\":0,\"uniprot_created\":0,\"uniprot_modified\":0}}"},{"path":"part-00003-dd2f1a5e-4114-41c8-b2e2-72edba0e2d16-c000.snappy.parquet","partitionValues":{},"size":29273,"modificationTime":1766549218814,"dataChange":false,"stats":"{\"numRecords\":625,\"minValues\":{\"entity_id\":\"cdm_prot_004f59e4-6b11-58ae-a6ff\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"100\",\"uniprot_created\":\"1991-05-01\",\"uniprot_modified\":\"2024-03-27\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ff2dba94-2092-57d0-9041\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"99\",\"uniprot_created\":\"2023-05-03\",\"uniprot_modified\":\"2025-06-18\"},\"nullCount\":{\"entity_id\":0,\"entity_type\":0,\"data_source\":0,\"created\":0,\"updated\":0,\"version\":0,\"uniprot_created\":0,\"uniprot_modified\":0}}"},{"path":"part-00007-1bc667d7-2f3a-4ece-a4a2-162e8a3f33f9-c000.snappy.parquet","partitionValues":{},"size":29187,"modificationTime":1766549218814,"dataChange":false,"stats":"{\"numRecords\":625,\"minValues\":{\"entity_id\":\"cdm_prot_00625fdf-0992-5594-a800\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"100\",\"uniprot_created\":\"1988-08-01\",\"uniprot_modified\":\"2024-07-24\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffcf5789-8bfa-5ca0-914e\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"99\",\"uniprot_created\":\"2025-02-05\",\"uniprot_modified\":\"2025-06-18\"},\"nullCount\":{\"entity_id\":0,\"entity_type\":0,\"data_source\":0,\"created\":0,\"updated\":0,\"version\":0,\"uniprot_created\":0,\"uniprot_modified\":0}}"},{"path":"part-00000-75b2edbc-ef3f-4b58-a315-cc5098832067-c000.snappy.parquet","partitionValues":{},"size":29122,"modificationTime":1766549232964,"dataChange":false,"stats":"{\"numRecords\":625,\"minValues\":{\"entity_id\":\"cdm_prot_006f3d67-56f0-5829-b099\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"100\",\"uniprot_created\":\"1994-10-01\",\"uniprot_modified\":\"2024-03-27\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffcedd0e-face-5ec6-8e0b\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"99\",\"uniprot_created\":\"2025-04-09\",\"uniprot_modified\":\"2025-06-18\"},\"nullCount\":{\"entity_id\":0,\"entity_type\":0,\"data_source\":0,\"created\":0,\"updated\":0,\"version\":0,\"uniprot_created\":0,\"uniprot_modified\":0}}"},{"path":"part-00004-9ba0b6c6-3035-4c5a-9d00-f3210e44c1be-c000.snappy.parquet","partitionValues":{},"size":28926,"modificationTime":1766549232963,"dataChange":false,"stats":"{\"numRecords\":625,\"minValues\":{\"entity_id\":\"cdm_prot_005318fc-0c32-5b4a-b952\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"100\",\"uniprot_created\":\"1989-07-01\",\"uniprot_modified\":\"2024-03-27\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffbd3905-3836-5893-bd75\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"99\",\"uniprot_created\":\"2021-09-29\",\"uniprot_modified\":\"2025-06-18\"},\"nullCount\":{\"entity_id\":0,\"entity_type\":0,\"data_source\":0,\"created\":0,\"updated\":0,\"version\":0,\"uniprot_created\":0,\"uniprot_modified\":0}}"},{"path":"part-00005-00c6be40-9225-40c1-a7c9-3f800a8f9280-c000.snappy.parquet","partitionValues":{},"size":29199,"modificationTime":1766549224077,"dataChange":false,"stats":"{\"numRecords\":625,\"minValues\":{\"entity_id\":\"cdm_prot_00957fe6-8bfd-5ecd-9b08\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"100\",\"uniprot_created\":\"1989-10-01\",\"uniprot_modified\":\"2024-10-02\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffdecc1a-6ee9-59b2-9c49\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"99\",\"uniprot_created\":\"2024-03-27\",\"uniprot_modified\":\"2025-06-18\"},\"nullCount\":{\"entity_id\":0,\"entity_type\":0,\"data_source\":0,\"created\":0,\"updated\":0,\"version\":0,\"uniprot_created\":0,\"uniprot_modified\":0}}"},{"path":"part-00003-c2c4353d-923b-4d02-bcc0-c567d8152c24-c000.snappy.parquet","partitionValues":{},"size":28897,"modificationTime":1766549232971,"dataChange":false,"stats":"{\"numRecords\":625,\"minValues\":{\"entity_id\":\"cdm_prot_00206b22-8b92-5222-ab7c\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"100\",\"uniprot_created\":\"1989-10-01\",\"uniprot_modified\":\"2024-03-27\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffede36e-7e42-5dcc-abde\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"99\",\"uniprot_created\":\"2025-02-05\",\"uniprot_modified\":\"2025-06-18\"},\"nullCount\":{\"entity_id\":0,\"entity_type\":0,\"data_source\":0,\"created\":0,\"updated\":0,\"version\":0,\"uniprot_created\":0,\"uniprot_modified\":0}}"},{"path":"part-00001-006ac691-2d26-4839-932d-bf78580789fb-c000.snappy.parquet","partitionValues":{},"size":29022,"modificationTime":1766549224083,"dataChange":false,"stats":"{\"numRecords\":625,\"minValues\":{\"entity_id\":\"cdm_prot_00df9ef5-d9b4-5654-9d2a\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"100\",\"uniprot_created\":\"1992-12-01\",\"uniprot_modified\":\"2024-03-27\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffa14ace-ea75-5cc6-86f2\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"99\",\"uniprot_created\":\"2024-11-27\",\"uniprot_modified\":\"2025-06-18\"},\"nullCount\":{\"entity_id\":0,\"entity_type\":0,\"data_source\":0,\"created\":0,\"updated\":0,\"version\":0,\"uniprot_created\":0,\"uniprot_modified\":0}}"},{"path":"part-00006-b5e8d6ab-9bf9-4c7f-b4be-f4a4032b4399-c000.snappy.parquet","partitionValues":{},"size":29037,"modificationTime":1766549232971,"dataChange":false,"stats":"{\"numRecords\":625,\"minValues\":{\"entity_id\":\"cdm_prot_00219a7c-c28c-5aaf-928e\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"100\",\"uniprot_created\":\"1988-11-01\",\"uniprot_modified\":\"2024-03-27\"},\"maxValues\":{\"entity_id\":\"cdm_prot_fe67f279-f090-5c4e-b38d\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"99\",\"uniprot_created\":\"2024-03-27\",\"uniprot_modified\":\"2025-06-18\"},\"nullCount\":{\"entity_id\":0,\"entity_type\":0,\"data_source\":0,\"created\":0,\"updated\":0,\"version\":0,\"uniprot_created\":0,\"uniprot_modified\":0}}"},{"path":"part-00004-889473d1-a49a-4d1a-9822-bc64152feb8a-c000.snappy.parquet","partitionValues":{},"size":29146,"modificationTime":1766549224086,"dataChange":false,"stats":"{\"numRecords\":625,\"minValues\":{\"entity_id\":\"cdm_prot_007c34e9-4de0-5736-985f\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"100\",\"uniprot_created\":\"1988-01-01\",\"uniprot_modified\":\"2024-05-29\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffc892c3-0651-5e3c-998a\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"99\",\"uniprot_created\":\"2022-02-23\",\"uniprot_modified\":\"2025-06-18\"},\"nullCount\":{\"entity_id\":0,\"entity_type\":0,\"data_source\":0,\"created\":0,\"updated\":0,\"version\":0,\"uniprot_created\":0,\"uniprot_modified\":0}}"},{"path":"part-00007-ac2f1ac0-a676-4934-bf9f-d6cc54d7ae01-c000.snappy.parquet","partitionValues":{},"size":29287,"modificationTime":1766549232972,"dataChange":false,"stats":"{\"numRecords\":625,\"minValues\":{\"entity_id\":\"cdm_prot_00019029-8c98-5477-9dd1\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"100\",\"uniprot_created\":\"1989-07-01\",\"uniprot_modified\":\"2023-06-28\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ff606768-2e8f-543d-abc5\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"99\",\"uniprot_created\":\"2025-04-09\",\"uniprot_modified\":\"2025-06-18\"},\"nullCount\":{\"entity_id\":0,\"entity_type\":0,\"data_source\":0,\"created\":0,\"updated\":0,\"version\":0,\"uniprot_created\":0,\"uniprot_modified\":0}}"},{"path":"part-00006-cad0b36c-46f2-49e7-9889-7f4a1b44cbfb-c000.snappy.parquet","partitionValues":{},"size":29226,"modificationTime":1766549224078,"dataChange":false,"stats":"{\"numRecords\":625,\"minValues\":{\"entity_id\":\"cdm_prot_0102d35c-20a5-5f8d-8175\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"100\",\"uniprot_created\":\"1989-10-01\",\"uniprot_modified\":\"2024-03-27\"},\"maxValues\":{\"entity_id\":\"cdm_prot_fffe9d9f-5871-5041-83af\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"99\",\"uniprot_created\":\"2025-02-05\",\"uniprot_modified\":\"2025-06-18\"},\"nullCount\":{\"entity_id\":0,\"entity_type\":0,\"data_source\":0,\"created\":0,\"updated\":0,\"version\":0,\"uniprot_created\":0,\"uniprot_modified\":0}}"},{"path":"part-00001-3c82d97a-4e0f-412a-8297-3bf943498c02-c000.snappy.parquet","partitionValues":{},"size":28901,"modificationTime":1766549232970,"dataChange":false,"stats":"{\"numRecords\":625,\"minValues\":{\"entity_id\":\"cdm_prot_00365069-eeac-5442-8f11\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"100\",\"uniprot_created\":\"1988-08-01\",\"uniprot_modified\":\"2024-03-27\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ff216b32-af2a-545f-8e61\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"99\",\"uniprot_created\":\"2017-03-15\",\"uniprot_modified\":\"2025-06-18\"},\"nullCount\":{\"entity_id\":0,\"entity_type\":0,\"data_source\":0,\"created\":0,\"updated\":0,\"version\":0,\"uniprot_created\":0,\"uniprot_modified\":0}}"},{"path":"part-00003-0b4ced07-8b4e-4c9a-b8b4-fcd3b6d39a5c-c000.snappy.parquet","partitionValues":{},"size":29333,"modificationTime":1766549224078,"dataChange":false,"stats":"{\"numRecords\":625,\"minValues\":{\"entity_id\":\"cdm_prot_000c7e85-7dff-527d-b6db\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"100\",\"uniprot_created\":\"1988-08-01\",\"uniprot_modified\":\"2024-11-27\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffb46e82-abfa-5122-818f\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"99\",\"uniprot_created\":\"2023-09-13\",\"uniprot_modified\":\"2025-06-18\"},\"nullCount\":{\"entity_id\":0,\"entity_type\":0,\"data_source\":0,\"created\":0,\"updated\":0,\"version\":0,\"uniprot_created\":0,\"uniprot_modified\":0}}"}]} diff --git a/tests/data/uniprot_archaea/uniprot_db/entities/_delta_log/00000000000000000003.json b/tests/data/uniprot_archaea/uniprot_db/entities/_delta_log/00000000000000000003.json new file mode 100644 index 0000000..aafef18 --- /dev/null +++ b/tests/data/uniprot_archaea/uniprot_db/entities/_delta_log/00000000000000000003.json @@ -0,0 +1,9 @@ +{"commitInfo":{"timestamp":1766549232987,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":2,"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{"numFiles":"8","numOutputRows":"5000","numOutputBytes":"231795"},"engineInfo":"Apache-Spark/4.0.1 Delta-Lake/4.0.0","txnId":"823e6093-3ade-4f50-a3cf-01577111bdef"}} +{"add":{"path":"part-00000-75b2edbc-ef3f-4b58-a315-cc5098832067-c000.snappy.parquet","partitionValues":{},"size":29122,"modificationTime":1766549232964,"dataChange":true,"stats":"{\"numRecords\":625,\"minValues\":{\"entity_id\":\"cdm_prot_006f3d67-56f0-5829-b099\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"100\",\"uniprot_created\":\"1994-10-01\",\"uniprot_modified\":\"2024-03-27\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffcedd0e-face-5ec6-8e0b\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"99\",\"uniprot_created\":\"2025-04-09\",\"uniprot_modified\":\"2025-06-18\"},\"nullCount\":{\"entity_id\":0,\"entity_type\":0,\"data_source\":0,\"created\":0,\"updated\":0,\"version\":0,\"uniprot_created\":0,\"uniprot_modified\":0}}"}} +{"add":{"path":"part-00001-3c82d97a-4e0f-412a-8297-3bf943498c02-c000.snappy.parquet","partitionValues":{},"size":28901,"modificationTime":1766549232970,"dataChange":true,"stats":"{\"numRecords\":625,\"minValues\":{\"entity_id\":\"cdm_prot_00365069-eeac-5442-8f11\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"100\",\"uniprot_created\":\"1988-08-01\",\"uniprot_modified\":\"2024-03-27\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ff216b32-af2a-545f-8e61\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"99\",\"uniprot_created\":\"2017-03-15\",\"uniprot_modified\":\"2025-06-18\"},\"nullCount\":{\"entity_id\":0,\"entity_type\":0,\"data_source\":0,\"created\":0,\"updated\":0,\"version\":0,\"uniprot_created\":0,\"uniprot_modified\":0}}"}} +{"add":{"path":"part-00002-e21f5f3e-ffef-441e-9416-cddfc7945b7d-c000.snappy.parquet","partitionValues":{},"size":28790,"modificationTime":1766549232984,"dataChange":true,"stats":"{\"numRecords\":625,\"minValues\":{\"entity_id\":\"cdm_prot_0061faab-bd88-562c-8a31\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"100\",\"uniprot_created\":\"1989-07-01\",\"uniprot_modified\":\"2024-03-27\"},\"maxValues\":{\"entity_id\":\"cdm_prot_fe9c77d9-be27-518e-8e0c\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"99\",\"uniprot_created\":\"2025-02-05\",\"uniprot_modified\":\"2025-06-18\"},\"nullCount\":{\"entity_id\":0,\"entity_type\":0,\"data_source\":0,\"created\":0,\"updated\":0,\"version\":0,\"uniprot_created\":0,\"uniprot_modified\":0}}"}} +{"add":{"path":"part-00003-c2c4353d-923b-4d02-bcc0-c567d8152c24-c000.snappy.parquet","partitionValues":{},"size":28897,"modificationTime":1766549232971,"dataChange":true,"stats":"{\"numRecords\":625,\"minValues\":{\"entity_id\":\"cdm_prot_00206b22-8b92-5222-ab7c\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"100\",\"uniprot_created\":\"1989-10-01\",\"uniprot_modified\":\"2024-03-27\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffede36e-7e42-5dcc-abde\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"99\",\"uniprot_created\":\"2025-02-05\",\"uniprot_modified\":\"2025-06-18\"},\"nullCount\":{\"entity_id\":0,\"entity_type\":0,\"data_source\":0,\"created\":0,\"updated\":0,\"version\":0,\"uniprot_created\":0,\"uniprot_modified\":0}}"}} +{"add":{"path":"part-00004-9ba0b6c6-3035-4c5a-9d00-f3210e44c1be-c000.snappy.parquet","partitionValues":{},"size":28926,"modificationTime":1766549232963,"dataChange":true,"stats":"{\"numRecords\":625,\"minValues\":{\"entity_id\":\"cdm_prot_005318fc-0c32-5b4a-b952\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"100\",\"uniprot_created\":\"1989-07-01\",\"uniprot_modified\":\"2024-03-27\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffbd3905-3836-5893-bd75\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"99\",\"uniprot_created\":\"2021-09-29\",\"uniprot_modified\":\"2025-06-18\"},\"nullCount\":{\"entity_id\":0,\"entity_type\":0,\"data_source\":0,\"created\":0,\"updated\":0,\"version\":0,\"uniprot_created\":0,\"uniprot_modified\":0}}"}} +{"add":{"path":"part-00005-16c10944-46a4-4ffb-a8eb-c98f074c4533-c000.snappy.parquet","partitionValues":{},"size":28835,"modificationTime":1766549232977,"dataChange":true,"stats":"{\"numRecords\":625,\"minValues\":{\"entity_id\":\"cdm_prot_0013ba57-fc41-51cc-bc95\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"100\",\"uniprot_created\":\"1988-11-01\",\"uniprot_modified\":\"2025-02-05\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffe27896-0bd4-50b4-9cff\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"99\",\"uniprot_created\":\"2025-02-05\",\"uniprot_modified\":\"2025-06-18\"},\"nullCount\":{\"entity_id\":0,\"entity_type\":0,\"data_source\":0,\"created\":0,\"updated\":0,\"version\":0,\"uniprot_created\":0,\"uniprot_modified\":0}}"}} +{"add":{"path":"part-00006-b5e8d6ab-9bf9-4c7f-b4be-f4a4032b4399-c000.snappy.parquet","partitionValues":{},"size":29037,"modificationTime":1766549232971,"dataChange":true,"stats":"{\"numRecords\":625,\"minValues\":{\"entity_id\":\"cdm_prot_00219a7c-c28c-5aaf-928e\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"100\",\"uniprot_created\":\"1988-11-01\",\"uniprot_modified\":\"2024-03-27\"},\"maxValues\":{\"entity_id\":\"cdm_prot_fe67f279-f090-5c4e-b38d\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"99\",\"uniprot_created\":\"2024-03-27\",\"uniprot_modified\":\"2025-06-18\"},\"nullCount\":{\"entity_id\":0,\"entity_type\":0,\"data_source\":0,\"created\":0,\"updated\":0,\"version\":0,\"uniprot_created\":0,\"uniprot_modified\":0}}"}} +{"add":{"path":"part-00007-ac2f1ac0-a676-4934-bf9f-d6cc54d7ae01-c000.snappy.parquet","partitionValues":{},"size":29287,"modificationTime":1766549232972,"dataChange":true,"stats":"{\"numRecords\":625,\"minValues\":{\"entity_id\":\"cdm_prot_00019029-8c98-5477-9dd1\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"100\",\"uniprot_created\":\"1989-07-01\",\"uniprot_modified\":\"2023-06-28\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ff606768-2e8f-543d-abc5\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"99\",\"uniprot_created\":\"2025-04-09\",\"uniprot_modified\":\"2025-06-18\"},\"nullCount\":{\"entity_id\":0,\"entity_type\":0,\"data_source\":0,\"created\":0,\"updated\":0,\"version\":0,\"uniprot_created\":0,\"uniprot_modified\":0}}"}} diff --git a/tests/data/uniprot_archaea/uniprot_db/entities/_delta_log/00000000000000000004.crc b/tests/data/uniprot_archaea/uniprot_db/entities/_delta_log/00000000000000000004.crc new file mode 100644 index 0000000..6601dbf --- /dev/null +++ b/tests/data/uniprot_archaea/uniprot_db/entities/_delta_log/00000000000000000004.crc @@ -0,0 +1 @@ +{"txnId":"ca208a64-40af-4d46-88d3-0f231f062b00","tableSizeBytes":924545,"numFiles":32,"numMetadata":1,"numProtocol":1,"setTransactions":[],"domainMetadata":[],"metadata":{"id":"458e6c0b-6b24-46fe-be71-c274092356e3","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"entity_id\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"entity_type\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"data_source\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"created\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"updated\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"version\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"uniprot_created\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"uniprot_modified\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{},"createdTime":1766549211931},"protocol":{"minReaderVersion":1,"minWriterVersion":2},"allFiles":[{"path":"part-00001-94ad28aa-5ae5-48c4-b30e-a2a33529c4d4-c000.snappy.parquet","partitionValues":{},"size":29218,"modificationTime":1766549218814,"dataChange":false,"stats":"{\"numRecords\":625,\"minValues\":{\"entity_id\":\"cdm_prot_000e0a71-14e8-5d5e-8144\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"100\",\"uniprot_created\":\"1990-01-01\",\"uniprot_modified\":\"2024-07-24\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffed2e1d-fe14-5556-b067\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"99\",\"uniprot_created\":\"2025-04-09\",\"uniprot_modified\":\"2025-06-18\"},\"nullCount\":{\"entity_id\":0,\"entity_type\":0,\"data_source\":0,\"created\":0,\"updated\":0,\"version\":0,\"uniprot_created\":0,\"uniprot_modified\":0}}"},{"path":"part-00006-9d7f2f92-96db-4863-87d7-a368f31c8aed-c000.snappy.parquet","partitionValues":{},"size":28168,"modificationTime":1766549238775,"dataChange":false,"stats":"{\"numRecords\":601,\"minValues\":{\"entity_id\":\"cdm_prot_00666a93-0a4c-5498-8cac\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"100\",\"uniprot_created\":\"1988-11-01\",\"uniprot_modified\":\"2024-03-27\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffd1840d-3bb9-5f05-ad33\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"99\",\"uniprot_created\":\"2018-05-23\",\"uniprot_modified\":\"2025-06-18\"},\"nullCount\":{\"entity_id\":0,\"entity_type\":0,\"data_source\":0,\"created\":0,\"updated\":0,\"version\":0,\"uniprot_created\":0,\"uniprot_modified\":0}}"},{"path":"part-00003-103ab579-ba95-49f6-b98f-ec60b776cedf-c000.snappy.parquet","partitionValues":{},"size":28369,"modificationTime":1766549238789,"dataChange":false,"stats":"{\"numRecords\":601,\"minValues\":{\"entity_id\":\"cdm_prot_0007879e-35c8-5917-ad11\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"100\",\"uniprot_created\":\"1992-08-01\",\"uniprot_modified\":\"2024-03-27\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffe66051-6a0a-55cc-81b2\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"99\",\"uniprot_created\":\"2025-04-09\",\"uniprot_modified\":\"2025-06-18\"},\"nullCount\":{\"entity_id\":0,\"entity_type\":0,\"data_source\":0,\"created\":0,\"updated\":0,\"version\":0,\"uniprot_created\":0,\"uniprot_modified\":0}}"},{"path":"part-00005-16c10944-46a4-4ffb-a8eb-c98f074c4533-c000.snappy.parquet","partitionValues":{},"size":28835,"modificationTime":1766549232977,"dataChange":false,"stats":"{\"numRecords\":625,\"minValues\":{\"entity_id\":\"cdm_prot_0013ba57-fc41-51cc-bc95\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"100\",\"uniprot_created\":\"1988-11-01\",\"uniprot_modified\":\"2025-02-05\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffe27896-0bd4-50b4-9cff\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"99\",\"uniprot_created\":\"2025-02-05\",\"uniprot_modified\":\"2025-06-18\"},\"nullCount\":{\"entity_id\":0,\"entity_type\":0,\"data_source\":0,\"created\":0,\"updated\":0,\"version\":0,\"uniprot_created\":0,\"uniprot_modified\":0}}"},{"path":"part-00004-838d590d-0caf-48bb-935b-4d3bcdef7e6b-c000.snappy.parquet","partitionValues":{},"size":28022,"modificationTime":1766549238775,"dataChange":false,"stats":"{\"numRecords\":601,\"minValues\":{\"entity_id\":\"cdm_prot_003753a9-9a55-505c-8ea7\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"100\",\"uniprot_created\":\"1990-04-01\",\"uniprot_modified\":\"2024-03-27\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ff634e32-999c-5ae0-8d84\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"99\",\"uniprot_created\":\"2022-02-23\",\"uniprot_modified\":\"2025-06-18\"},\"nullCount\":{\"entity_id\":0,\"entity_type\":0,\"data_source\":0,\"created\":0,\"updated\":0,\"version\":0,\"uniprot_created\":0,\"uniprot_modified\":0}}"},{"path":"part-00002-e21f5f3e-ffef-441e-9416-cddfc7945b7d-c000.snappy.parquet","partitionValues":{},"size":28790,"modificationTime":1766549232984,"dataChange":false,"stats":"{\"numRecords\":625,\"minValues\":{\"entity_id\":\"cdm_prot_0061faab-bd88-562c-8a31\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"100\",\"uniprot_created\":\"1989-07-01\",\"uniprot_modified\":\"2024-03-27\"},\"maxValues\":{\"entity_id\":\"cdm_prot_fe9c77d9-be27-518e-8e0c\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"99\",\"uniprot_created\":\"2025-02-05\",\"uniprot_modified\":\"2025-06-18\"},\"nullCount\":{\"entity_id\":0,\"entity_type\":0,\"data_source\":0,\"created\":0,\"updated\":0,\"version\":0,\"uniprot_created\":0,\"uniprot_modified\":0}}"},{"path":"part-00004-7dd0a75c-fc02-4a91-b584-4fb3805c6851-c000.snappy.parquet","partitionValues":{},"size":29227,"modificationTime":1766549218814,"dataChange":false,"stats":"{\"numRecords\":625,\"minValues\":{\"entity_id\":\"cdm_prot_008bae16-5b42-5bfd-a15c\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"100\",\"uniprot_created\":\"1986-07-21\",\"uniprot_modified\":\"2024-05-29\"},\"maxValues\":{\"entity_id\":\"cdm_prot_fedf6b23-d61c-56dc-b256\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"99\",\"uniprot_created\":\"2025-04-09\",\"uniprot_modified\":\"2025-06-18\"},\"nullCount\":{\"entity_id\":0,\"entity_type\":0,\"data_source\":0,\"created\":0,\"updated\":0,\"version\":0,\"uniprot_created\":0,\"uniprot_modified\":0}}"},{"path":"part-00002-9433b0fe-4856-48e3-b830-2f9cc2e1c4ba-c000.snappy.parquet","partitionValues":{},"size":29279,"modificationTime":1766549218814,"dataChange":false,"stats":"{\"numRecords\":625,\"minValues\":{\"entity_id\":\"cdm_prot_00030730-9147-5351-b68c\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"100\",\"uniprot_created\":\"1986-07-21\",\"uniprot_modified\":\"2023-06-28\"},\"maxValues\":{\"entity_id\":\"cdm_prot_fde84f0d-6520-56ad-b61f\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"99\",\"uniprot_created\":\"2024-11-27\",\"uniprot_modified\":\"2025-06-18\"},\"nullCount\":{\"entity_id\":0,\"entity_type\":0,\"data_source\":0,\"created\":0,\"updated\":0,\"version\":0,\"uniprot_created\":0,\"uniprot_modified\":0}}"},{"path":"part-00000-26253546-0abd-4ed6-8c55-17dfef347666-c000.snappy.parquet","partitionValues":{},"size":29251,"modificationTime":1766549224066,"dataChange":false,"stats":"{\"numRecords\":625,\"minValues\":{\"entity_id\":\"cdm_prot_004539b1-91ae-513e-bef9\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"100\",\"uniprot_created\":\"1988-01-01\",\"uniprot_modified\":\"2024-07-24\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ff289ac2-6f01-59a2-8290\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"99\",\"uniprot_created\":\"2024-01-24\",\"uniprot_modified\":\"2025-06-18\"},\"nullCount\":{\"entity_id\":0,\"entity_type\":0,\"data_source\":0,\"created\":0,\"updated\":0,\"version\":0,\"uniprot_created\":0,\"uniprot_modified\":0}}"},{"path":"part-00005-c44b66e7-c76c-4f20-b855-f02a508ddae1-c000.snappy.parquet","partitionValues":{},"size":28154,"modificationTime":1766549238779,"dataChange":false,"stats":"{\"numRecords\":601,\"minValues\":{\"entity_id\":\"cdm_prot_007a14c8-c58a-5ae1-b721\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"100\",\"uniprot_created\":\"1990-04-01\",\"uniprot_modified\":\"2024-03-27\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffb5b181-1633-55f5-83ee\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"99\",\"uniprot_created\":\"2025-06-18\",\"uniprot_modified\":\"2025-06-18\"},\"nullCount\":{\"entity_id\":0,\"entity_type\":0,\"data_source\":0,\"created\":0,\"updated\":0,\"version\":0,\"uniprot_created\":0,\"uniprot_modified\":0}}"},{"path":"part-00007-77325026-0538-440c-8df0-2d49aa067deb-c000.snappy.parquet","partitionValues":{},"size":29149,"modificationTime":1766549224083,"dataChange":false,"stats":"{\"numRecords\":625,\"minValues\":{\"entity_id\":\"cdm_prot_001ba1b3-667c-5321-9298\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"100\",\"uniprot_created\":\"1990-01-01\",\"uniprot_modified\":\"2024-11-27\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ff86ebef-56b7-5cf7-bf3e\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"99\",\"uniprot_created\":\"2024-05-29\",\"uniprot_modified\":\"2025-06-18\"},\"nullCount\":{\"entity_id\":0,\"entity_type\":0,\"data_source\":0,\"created\":0,\"updated\":0,\"version\":0,\"uniprot_created\":0,\"uniprot_modified\":0}}"},{"path":"part-00000-df990575-6363-490d-8af4-6381c1fd05de-c000.snappy.parquet","partitionValues":{},"size":29133,"modificationTime":1766549218814,"dataChange":false,"stats":"{\"numRecords\":625,\"minValues\":{\"entity_id\":\"cdm_prot_00036120-577c-5bbc-ab91\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"100\",\"uniprot_created\":\"1990-04-01\",\"uniprot_modified\":\"2024-11-27\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffa4eefe-b062-5f26-8f77\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"99\",\"uniprot_created\":\"2025-06-18\",\"uniprot_modified\":\"2025-06-18\"},\"nullCount\":{\"entity_id\":0,\"entity_type\":0,\"data_source\":0,\"created\":0,\"updated\":0,\"version\":0,\"uniprot_created\":0,\"uniprot_modified\":0}}"},{"path":"part-00005-3b0745b2-9687-4d0b-b7af-56652f5b79d3-c000.snappy.parquet","partitionValues":{},"size":29339,"modificationTime":1766549218814,"dataChange":false,"stats":"{\"numRecords\":625,\"minValues\":{\"entity_id\":\"cdm_prot_00134cb8-e497-5d40-b662\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"100\",\"uniprot_created\":\"1986-07-21\",\"uniprot_modified\":\"2023-06-28\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ff5f637d-e74b-5494-9ed8\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"99\",\"uniprot_created\":\"2025-06-18\",\"uniprot_modified\":\"2025-06-18\"},\"nullCount\":{\"entity_id\":0,\"entity_type\":0,\"data_source\":0,\"created\":0,\"updated\":0,\"version\":0,\"uniprot_created\":0,\"uniprot_modified\":0}}"},{"path":"part-00006-902c69dd-2364-4c62-aed8-bab1f9d2bf5a-c000.snappy.parquet","partitionValues":{},"size":29134,"modificationTime":1766549218814,"dataChange":false,"stats":"{\"numRecords\":625,\"minValues\":{\"entity_id\":\"cdm_prot_003df674-48d1-588a-b194\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"100\",\"uniprot_created\":\"1989-07-01\",\"uniprot_modified\":\"2024-03-27\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffeae0f1-9785-5eaf-ad23\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"99\",\"uniprot_created\":\"2024-07-24\",\"uniprot_modified\":\"2025-06-18\"},\"nullCount\":{\"entity_id\":0,\"entity_type\":0,\"data_source\":0,\"created\":0,\"updated\":0,\"version\":0,\"uniprot_created\":0,\"uniprot_modified\":0}}"},{"path":"part-00002-a3c8d634-57f3-49a6-a095-689fe9930cc4-c000.snappy.parquet","partitionValues":{},"size":29103,"modificationTime":1766549224077,"dataChange":false,"stats":"{\"numRecords\":625,\"minValues\":{\"entity_id\":\"cdm_prot_00546ee8-4f73-5c85-8b07\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"100\",\"uniprot_created\":\"1993-07-01\",\"uniprot_modified\":\"2024-11-27\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffce40a0-b890-5e5f-b455\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"99\",\"uniprot_created\":\"2025-06-18\",\"uniprot_modified\":\"2025-06-18\"},\"nullCount\":{\"entity_id\":0,\"entity_type\":0,\"data_source\":0,\"created\":0,\"updated\":0,\"version\":0,\"uniprot_created\":0,\"uniprot_modified\":0}}"},{"path":"part-00003-dd2f1a5e-4114-41c8-b2e2-72edba0e2d16-c000.snappy.parquet","partitionValues":{},"size":29273,"modificationTime":1766549218814,"dataChange":false,"stats":"{\"numRecords\":625,\"minValues\":{\"entity_id\":\"cdm_prot_004f59e4-6b11-58ae-a6ff\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"100\",\"uniprot_created\":\"1991-05-01\",\"uniprot_modified\":\"2024-03-27\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ff2dba94-2092-57d0-9041\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"99\",\"uniprot_created\":\"2023-05-03\",\"uniprot_modified\":\"2025-06-18\"},\"nullCount\":{\"entity_id\":0,\"entity_type\":0,\"data_source\":0,\"created\":0,\"updated\":0,\"version\":0,\"uniprot_created\":0,\"uniprot_modified\":0}}"},{"path":"part-00007-1bc667d7-2f3a-4ece-a4a2-162e8a3f33f9-c000.snappy.parquet","partitionValues":{},"size":29187,"modificationTime":1766549218814,"dataChange":false,"stats":"{\"numRecords\":625,\"minValues\":{\"entity_id\":\"cdm_prot_00625fdf-0992-5594-a800\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"100\",\"uniprot_created\":\"1988-08-01\",\"uniprot_modified\":\"2024-07-24\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffcf5789-8bfa-5ca0-914e\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"99\",\"uniprot_created\":\"2025-02-05\",\"uniprot_modified\":\"2025-06-18\"},\"nullCount\":{\"entity_id\":0,\"entity_type\":0,\"data_source\":0,\"created\":0,\"updated\":0,\"version\":0,\"uniprot_created\":0,\"uniprot_modified\":0}}"},{"path":"part-00000-75b2edbc-ef3f-4b58-a315-cc5098832067-c000.snappy.parquet","partitionValues":{},"size":29122,"modificationTime":1766549232964,"dataChange":false,"stats":"{\"numRecords\":625,\"minValues\":{\"entity_id\":\"cdm_prot_006f3d67-56f0-5829-b099\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"100\",\"uniprot_created\":\"1994-10-01\",\"uniprot_modified\":\"2024-03-27\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffcedd0e-face-5ec6-8e0b\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"99\",\"uniprot_created\":\"2025-04-09\",\"uniprot_modified\":\"2025-06-18\"},\"nullCount\":{\"entity_id\":0,\"entity_type\":0,\"data_source\":0,\"created\":0,\"updated\":0,\"version\":0,\"uniprot_created\":0,\"uniprot_modified\":0}}"},{"path":"part-00007-642beb72-4230-41ef-b60a-b811933fa8a9-c000.snappy.parquet","partitionValues":{},"size":28495,"modificationTime":1766549238779,"dataChange":false,"stats":"{\"numRecords\":607,\"minValues\":{\"entity_id\":\"cdm_prot_0254412b-7072-5093-9d3c\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"100\",\"uniprot_created\":\"1986-11-01\",\"uniprot_modified\":\"2023-06-28\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffd6bc2d-9fdc-5fb9-a121\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"99\",\"uniprot_created\":\"2025-04-09\",\"uniprot_modified\":\"2025-06-18\"},\"nullCount\":{\"entity_id\":0,\"entity_type\":0,\"data_source\":0,\"created\":0,\"updated\":0,\"version\":0,\"uniprot_created\":0,\"uniprot_modified\":0}}"},{"path":"part-00004-9ba0b6c6-3035-4c5a-9d00-f3210e44c1be-c000.snappy.parquet","partitionValues":{},"size":28926,"modificationTime":1766549232963,"dataChange":false,"stats":"{\"numRecords\":625,\"minValues\":{\"entity_id\":\"cdm_prot_005318fc-0c32-5b4a-b952\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"100\",\"uniprot_created\":\"1989-07-01\",\"uniprot_modified\":\"2024-03-27\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffbd3905-3836-5893-bd75\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"99\",\"uniprot_created\":\"2021-09-29\",\"uniprot_modified\":\"2025-06-18\"},\"nullCount\":{\"entity_id\":0,\"entity_type\":0,\"data_source\":0,\"created\":0,\"updated\":0,\"version\":0,\"uniprot_created\":0,\"uniprot_modified\":0}}"},{"path":"part-00005-00c6be40-9225-40c1-a7c9-3f800a8f9280-c000.snappy.parquet","partitionValues":{},"size":29199,"modificationTime":1766549224077,"dataChange":false,"stats":"{\"numRecords\":625,\"minValues\":{\"entity_id\":\"cdm_prot_00957fe6-8bfd-5ecd-9b08\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"100\",\"uniprot_created\":\"1989-10-01\",\"uniprot_modified\":\"2024-10-02\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffdecc1a-6ee9-59b2-9c49\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"99\",\"uniprot_created\":\"2024-03-27\",\"uniprot_modified\":\"2025-06-18\"},\"nullCount\":{\"entity_id\":0,\"entity_type\":0,\"data_source\":0,\"created\":0,\"updated\":0,\"version\":0,\"uniprot_created\":0,\"uniprot_modified\":0}}"},{"path":"part-00003-c2c4353d-923b-4d02-bcc0-c567d8152c24-c000.snappy.parquet","partitionValues":{},"size":28897,"modificationTime":1766549232971,"dataChange":false,"stats":"{\"numRecords\":625,\"minValues\":{\"entity_id\":\"cdm_prot_00206b22-8b92-5222-ab7c\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"100\",\"uniprot_created\":\"1989-10-01\",\"uniprot_modified\":\"2024-03-27\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffede36e-7e42-5dcc-abde\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"99\",\"uniprot_created\":\"2025-02-05\",\"uniprot_modified\":\"2025-06-18\"},\"nullCount\":{\"entity_id\":0,\"entity_type\":0,\"data_source\":0,\"created\":0,\"updated\":0,\"version\":0,\"uniprot_created\":0,\"uniprot_modified\":0}}"},{"path":"part-00001-006ac691-2d26-4839-932d-bf78580789fb-c000.snappy.parquet","partitionValues":{},"size":29022,"modificationTime":1766549224083,"dataChange":false,"stats":"{\"numRecords\":625,\"minValues\":{\"entity_id\":\"cdm_prot_00df9ef5-d9b4-5654-9d2a\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"100\",\"uniprot_created\":\"1992-12-01\",\"uniprot_modified\":\"2024-03-27\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffa14ace-ea75-5cc6-86f2\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"99\",\"uniprot_created\":\"2024-11-27\",\"uniprot_modified\":\"2025-06-18\"},\"nullCount\":{\"entity_id\":0,\"entity_type\":0,\"data_source\":0,\"created\":0,\"updated\":0,\"version\":0,\"uniprot_created\":0,\"uniprot_modified\":0}}"},{"path":"part-00006-b5e8d6ab-9bf9-4c7f-b4be-f4a4032b4399-c000.snappy.parquet","partitionValues":{},"size":29037,"modificationTime":1766549232971,"dataChange":false,"stats":"{\"numRecords\":625,\"minValues\":{\"entity_id\":\"cdm_prot_00219a7c-c28c-5aaf-928e\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"100\",\"uniprot_created\":\"1988-11-01\",\"uniprot_modified\":\"2024-03-27\"},\"maxValues\":{\"entity_id\":\"cdm_prot_fe67f279-f090-5c4e-b38d\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"99\",\"uniprot_created\":\"2024-03-27\",\"uniprot_modified\":\"2025-06-18\"},\"nullCount\":{\"entity_id\":0,\"entity_type\":0,\"data_source\":0,\"created\":0,\"updated\":0,\"version\":0,\"uniprot_created\":0,\"uniprot_modified\":0}}"},{"path":"part-00004-889473d1-a49a-4d1a-9822-bc64152feb8a-c000.snappy.parquet","partitionValues":{},"size":29146,"modificationTime":1766549224086,"dataChange":false,"stats":"{\"numRecords\":625,\"minValues\":{\"entity_id\":\"cdm_prot_007c34e9-4de0-5736-985f\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"100\",\"uniprot_created\":\"1988-01-01\",\"uniprot_modified\":\"2024-05-29\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffc892c3-0651-5e3c-998a\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"99\",\"uniprot_created\":\"2022-02-23\",\"uniprot_modified\":\"2025-06-18\"},\"nullCount\":{\"entity_id\":0,\"entity_type\":0,\"data_source\":0,\"created\":0,\"updated\":0,\"version\":0,\"uniprot_created\":0,\"uniprot_modified\":0}}"},{"path":"part-00007-ac2f1ac0-a676-4934-bf9f-d6cc54d7ae01-c000.snappy.parquet","partitionValues":{},"size":29287,"modificationTime":1766549232972,"dataChange":false,"stats":"{\"numRecords\":625,\"minValues\":{\"entity_id\":\"cdm_prot_00019029-8c98-5477-9dd1\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"100\",\"uniprot_created\":\"1989-07-01\",\"uniprot_modified\":\"2023-06-28\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ff606768-2e8f-543d-abc5\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"99\",\"uniprot_created\":\"2025-04-09\",\"uniprot_modified\":\"2025-06-18\"},\"nullCount\":{\"entity_id\":0,\"entity_type\":0,\"data_source\":0,\"created\":0,\"updated\":0,\"version\":0,\"uniprot_created\":0,\"uniprot_modified\":0}}"},{"path":"part-00001-f7e2e2d6-8a2d-49d6-9d74-98af5161c3d3-c000.snappy.parquet","partitionValues":{},"size":28139,"modificationTime":1766549238775,"dataChange":false,"stats":"{\"numRecords\":601,\"minValues\":{\"entity_id\":\"cdm_prot_01672ce0-33eb-50c0-8bb3\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"100\",\"uniprot_created\":\"1990-08-01\",\"uniprot_modified\":\"2023-09-13\"},\"maxValues\":{\"entity_id\":\"cdm_prot_fffe9aba-bce9-56de-b269\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"99\",\"uniprot_created\":\"2024-11-27\",\"uniprot_modified\":\"2025-06-18\"},\"nullCount\":{\"entity_id\":0,\"entity_type\":0,\"data_source\":0,\"created\":0,\"updated\":0,\"version\":0,\"uniprot_created\":0,\"uniprot_modified\":0}}"},{"path":"part-00006-cad0b36c-46f2-49e7-9889-7f4a1b44cbfb-c000.snappy.parquet","partitionValues":{},"size":29226,"modificationTime":1766549224078,"dataChange":false,"stats":"{\"numRecords\":625,\"minValues\":{\"entity_id\":\"cdm_prot_0102d35c-20a5-5f8d-8175\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"100\",\"uniprot_created\":\"1989-10-01\",\"uniprot_modified\":\"2024-03-27\"},\"maxValues\":{\"entity_id\":\"cdm_prot_fffe9d9f-5871-5041-83af\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"99\",\"uniprot_created\":\"2025-02-05\",\"uniprot_modified\":\"2025-06-18\"},\"nullCount\":{\"entity_id\":0,\"entity_type\":0,\"data_source\":0,\"created\":0,\"updated\":0,\"version\":0,\"uniprot_created\":0,\"uniprot_modified\":0}}"},{"path":"part-00001-3c82d97a-4e0f-412a-8297-3bf943498c02-c000.snappy.parquet","partitionValues":{},"size":28901,"modificationTime":1766549232970,"dataChange":false,"stats":"{\"numRecords\":625,\"minValues\":{\"entity_id\":\"cdm_prot_00365069-eeac-5442-8f11\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"100\",\"uniprot_created\":\"1988-08-01\",\"uniprot_modified\":\"2024-03-27\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ff216b32-af2a-545f-8e61\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"99\",\"uniprot_created\":\"2017-03-15\",\"uniprot_modified\":\"2025-06-18\"},\"nullCount\":{\"entity_id\":0,\"entity_type\":0,\"data_source\":0,\"created\":0,\"updated\":0,\"version\":0,\"uniprot_created\":0,\"uniprot_modified\":0}}"},{"path":"part-00003-0b4ced07-8b4e-4c9a-b8b4-fcd3b6d39a5c-c000.snappy.parquet","partitionValues":{},"size":29333,"modificationTime":1766549224078,"dataChange":false,"stats":"{\"numRecords\":625,\"minValues\":{\"entity_id\":\"cdm_prot_000c7e85-7dff-527d-b6db\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"100\",\"uniprot_created\":\"1988-08-01\",\"uniprot_modified\":\"2024-11-27\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffb46e82-abfa-5122-818f\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"99\",\"uniprot_created\":\"2023-09-13\",\"uniprot_modified\":\"2025-06-18\"},\"nullCount\":{\"entity_id\":0,\"entity_type\":0,\"data_source\":0,\"created\":0,\"updated\":0,\"version\":0,\"uniprot_created\":0,\"uniprot_modified\":0}}"},{"path":"part-00002-498d9893-5b55-4b0e-920a-537bcefe3b57-c000.snappy.parquet","partitionValues":{},"size":28276,"modificationTime":1766549238779,"dataChange":false,"stats":"{\"numRecords\":601,\"minValues\":{\"entity_id\":\"cdm_prot_0185e048-25c8-50dd-bfbf\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"100\",\"uniprot_created\":\"1988-11-01\",\"uniprot_modified\":\"2023-09-13\"},\"maxValues\":{\"entity_id\":\"cdm_prot_fe66808c-aab1-5f12-970e\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"99\",\"uniprot_created\":\"2025-06-18\",\"uniprot_modified\":\"2025-06-18\"},\"nullCount\":{\"entity_id\":0,\"entity_type\":0,\"data_source\":0,\"created\":0,\"updated\":0,\"version\":0,\"uniprot_created\":0,\"uniprot_modified\":0}}"},{"path":"part-00000-573241d1-7050-4657-b105-c3c5122440d1-c000.snappy.parquet","partitionValues":{},"size":27908,"modificationTime":1766549238778,"dataChange":false,"stats":"{\"numRecords\":601,\"minValues\":{\"entity_id\":\"cdm_prot_0003b7cb-bf13-5646-9978\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"100\",\"uniprot_created\":\"1992-08-01\",\"uniprot_modified\":\"2024-11-27\"},\"maxValues\":{\"entity_id\":\"cdm_prot_fef0eb9f-57f2-532d-b5b2\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"99\",\"uniprot_created\":\"2017-09-27\",\"uniprot_modified\":\"2025-06-18\"},\"nullCount\":{\"entity_id\":0,\"entity_type\":0,\"data_source\":0,\"created\":0,\"updated\":0,\"version\":0,\"uniprot_created\":0,\"uniprot_modified\":0}}"}]} diff --git a/tests/data/uniprot_archaea/uniprot_db/entities/_delta_log/00000000000000000004.json b/tests/data/uniprot_archaea/uniprot_db/entities/_delta_log/00000000000000000004.json new file mode 100644 index 0000000..5eaeb77 --- /dev/null +++ b/tests/data/uniprot_archaea/uniprot_db/entities/_delta_log/00000000000000000004.json @@ -0,0 +1,9 @@ +{"commitInfo":{"timestamp":1766549238792,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":3,"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{"numFiles":"8","numOutputRows":"4814","numOutputBytes":"225531"},"engineInfo":"Apache-Spark/4.0.1 Delta-Lake/4.0.0","txnId":"ca208a64-40af-4d46-88d3-0f231f062b00"}} +{"add":{"path":"part-00000-573241d1-7050-4657-b105-c3c5122440d1-c000.snappy.parquet","partitionValues":{},"size":27908,"modificationTime":1766549238778,"dataChange":true,"stats":"{\"numRecords\":601,\"minValues\":{\"entity_id\":\"cdm_prot_0003b7cb-bf13-5646-9978\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"100\",\"uniprot_created\":\"1992-08-01\",\"uniprot_modified\":\"2024-11-27\"},\"maxValues\":{\"entity_id\":\"cdm_prot_fef0eb9f-57f2-532d-b5b2\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"99\",\"uniprot_created\":\"2017-09-27\",\"uniprot_modified\":\"2025-06-18\"},\"nullCount\":{\"entity_id\":0,\"entity_type\":0,\"data_source\":0,\"created\":0,\"updated\":0,\"version\":0,\"uniprot_created\":0,\"uniprot_modified\":0}}"}} +{"add":{"path":"part-00001-f7e2e2d6-8a2d-49d6-9d74-98af5161c3d3-c000.snappy.parquet","partitionValues":{},"size":28139,"modificationTime":1766549238775,"dataChange":true,"stats":"{\"numRecords\":601,\"minValues\":{\"entity_id\":\"cdm_prot_01672ce0-33eb-50c0-8bb3\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"100\",\"uniprot_created\":\"1990-08-01\",\"uniprot_modified\":\"2023-09-13\"},\"maxValues\":{\"entity_id\":\"cdm_prot_fffe9aba-bce9-56de-b269\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"99\",\"uniprot_created\":\"2024-11-27\",\"uniprot_modified\":\"2025-06-18\"},\"nullCount\":{\"entity_id\":0,\"entity_type\":0,\"data_source\":0,\"created\":0,\"updated\":0,\"version\":0,\"uniprot_created\":0,\"uniprot_modified\":0}}"}} +{"add":{"path":"part-00002-498d9893-5b55-4b0e-920a-537bcefe3b57-c000.snappy.parquet","partitionValues":{},"size":28276,"modificationTime":1766549238779,"dataChange":true,"stats":"{\"numRecords\":601,\"minValues\":{\"entity_id\":\"cdm_prot_0185e048-25c8-50dd-bfbf\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"100\",\"uniprot_created\":\"1988-11-01\",\"uniprot_modified\":\"2023-09-13\"},\"maxValues\":{\"entity_id\":\"cdm_prot_fe66808c-aab1-5f12-970e\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"99\",\"uniprot_created\":\"2025-06-18\",\"uniprot_modified\":\"2025-06-18\"},\"nullCount\":{\"entity_id\":0,\"entity_type\":0,\"data_source\":0,\"created\":0,\"updated\":0,\"version\":0,\"uniprot_created\":0,\"uniprot_modified\":0}}"}} +{"add":{"path":"part-00003-103ab579-ba95-49f6-b98f-ec60b776cedf-c000.snappy.parquet","partitionValues":{},"size":28369,"modificationTime":1766549238789,"dataChange":true,"stats":"{\"numRecords\":601,\"minValues\":{\"entity_id\":\"cdm_prot_0007879e-35c8-5917-ad11\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"100\",\"uniprot_created\":\"1992-08-01\",\"uniprot_modified\":\"2024-03-27\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffe66051-6a0a-55cc-81b2\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"99\",\"uniprot_created\":\"2025-04-09\",\"uniprot_modified\":\"2025-06-18\"},\"nullCount\":{\"entity_id\":0,\"entity_type\":0,\"data_source\":0,\"created\":0,\"updated\":0,\"version\":0,\"uniprot_created\":0,\"uniprot_modified\":0}}"}} +{"add":{"path":"part-00004-838d590d-0caf-48bb-935b-4d3bcdef7e6b-c000.snappy.parquet","partitionValues":{},"size":28022,"modificationTime":1766549238775,"dataChange":true,"stats":"{\"numRecords\":601,\"minValues\":{\"entity_id\":\"cdm_prot_003753a9-9a55-505c-8ea7\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"100\",\"uniprot_created\":\"1990-04-01\",\"uniprot_modified\":\"2024-03-27\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ff634e32-999c-5ae0-8d84\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"99\",\"uniprot_created\":\"2022-02-23\",\"uniprot_modified\":\"2025-06-18\"},\"nullCount\":{\"entity_id\":0,\"entity_type\":0,\"data_source\":0,\"created\":0,\"updated\":0,\"version\":0,\"uniprot_created\":0,\"uniprot_modified\":0}}"}} +{"add":{"path":"part-00005-c44b66e7-c76c-4f20-b855-f02a508ddae1-c000.snappy.parquet","partitionValues":{},"size":28154,"modificationTime":1766549238779,"dataChange":true,"stats":"{\"numRecords\":601,\"minValues\":{\"entity_id\":\"cdm_prot_007a14c8-c58a-5ae1-b721\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"100\",\"uniprot_created\":\"1990-04-01\",\"uniprot_modified\":\"2024-03-27\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffb5b181-1633-55f5-83ee\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"99\",\"uniprot_created\":\"2025-06-18\",\"uniprot_modified\":\"2025-06-18\"},\"nullCount\":{\"entity_id\":0,\"entity_type\":0,\"data_source\":0,\"created\":0,\"updated\":0,\"version\":0,\"uniprot_created\":0,\"uniprot_modified\":0}}"}} +{"add":{"path":"part-00006-9d7f2f92-96db-4863-87d7-a368f31c8aed-c000.snappy.parquet","partitionValues":{},"size":28168,"modificationTime":1766549238775,"dataChange":true,"stats":"{\"numRecords\":601,\"minValues\":{\"entity_id\":\"cdm_prot_00666a93-0a4c-5498-8cac\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"100\",\"uniprot_created\":\"1988-11-01\",\"uniprot_modified\":\"2024-03-27\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffd1840d-3bb9-5f05-ad33\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"99\",\"uniprot_created\":\"2018-05-23\",\"uniprot_modified\":\"2025-06-18\"},\"nullCount\":{\"entity_id\":0,\"entity_type\":0,\"data_source\":0,\"created\":0,\"updated\":0,\"version\":0,\"uniprot_created\":0,\"uniprot_modified\":0}}"}} +{"add":{"path":"part-00007-642beb72-4230-41ef-b60a-b811933fa8a9-c000.snappy.parquet","partitionValues":{},"size":28495,"modificationTime":1766549238779,"dataChange":true,"stats":"{\"numRecords\":607,\"minValues\":{\"entity_id\":\"cdm_prot_0254412b-7072-5093-9d3c\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"100\",\"uniprot_created\":\"1986-11-01\",\"uniprot_modified\":\"2023-06-28\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffd6bc2d-9fdc-5fb9-a121\",\"entity_type\":\"protein\",\"data_source\":\"UniProt import\",\"created\":\"2025-12-24T04:06:38.246132+00:00\",\"updated\":\"2025-12-24T04:06:38.246132+00:00\",\"version\":\"99\",\"uniprot_created\":\"2025-04-09\",\"uniprot_modified\":\"2025-06-18\"},\"nullCount\":{\"entity_id\":0,\"entity_type\":0,\"data_source\":0,\"created\":0,\"updated\":0,\"version\":0,\"uniprot_created\":0,\"uniprot_modified\":0}}"}} diff --git a/tests/data/uniprot_archaea/uniprot_db/entities/part-00000-26253546-0abd-4ed6-8c55-17dfef347666-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/entities/part-00000-26253546-0abd-4ed6-8c55-17dfef347666-c000.snappy.parquet new file mode 100644 index 0000000..f5f8518 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/entities/part-00000-26253546-0abd-4ed6-8c55-17dfef347666-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/entities/part-00000-573241d1-7050-4657-b105-c3c5122440d1-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/entities/part-00000-573241d1-7050-4657-b105-c3c5122440d1-c000.snappy.parquet new file mode 100644 index 0000000..5fb61c1 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/entities/part-00000-573241d1-7050-4657-b105-c3c5122440d1-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/entities/part-00000-75b2edbc-ef3f-4b58-a315-cc5098832067-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/entities/part-00000-75b2edbc-ef3f-4b58-a315-cc5098832067-c000.snappy.parquet new file mode 100644 index 0000000..16b07b0 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/entities/part-00000-75b2edbc-ef3f-4b58-a315-cc5098832067-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/entities/part-00000-df990575-6363-490d-8af4-6381c1fd05de-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/entities/part-00000-df990575-6363-490d-8af4-6381c1fd05de-c000.snappy.parquet new file mode 100644 index 0000000..4059658 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/entities/part-00000-df990575-6363-490d-8af4-6381c1fd05de-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/entities/part-00001-006ac691-2d26-4839-932d-bf78580789fb-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/entities/part-00001-006ac691-2d26-4839-932d-bf78580789fb-c000.snappy.parquet new file mode 100644 index 0000000..16b993e Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/entities/part-00001-006ac691-2d26-4839-932d-bf78580789fb-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/entities/part-00001-3c82d97a-4e0f-412a-8297-3bf943498c02-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/entities/part-00001-3c82d97a-4e0f-412a-8297-3bf943498c02-c000.snappy.parquet new file mode 100644 index 0000000..296d147 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/entities/part-00001-3c82d97a-4e0f-412a-8297-3bf943498c02-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/entities/part-00001-94ad28aa-5ae5-48c4-b30e-a2a33529c4d4-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/entities/part-00001-94ad28aa-5ae5-48c4-b30e-a2a33529c4d4-c000.snappy.parquet new file mode 100644 index 0000000..42c37ec Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/entities/part-00001-94ad28aa-5ae5-48c4-b30e-a2a33529c4d4-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/entities/part-00001-f7e2e2d6-8a2d-49d6-9d74-98af5161c3d3-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/entities/part-00001-f7e2e2d6-8a2d-49d6-9d74-98af5161c3d3-c000.snappy.parquet new file mode 100644 index 0000000..866c465 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/entities/part-00001-f7e2e2d6-8a2d-49d6-9d74-98af5161c3d3-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/entities/part-00002-498d9893-5b55-4b0e-920a-537bcefe3b57-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/entities/part-00002-498d9893-5b55-4b0e-920a-537bcefe3b57-c000.snappy.parquet new file mode 100644 index 0000000..993b713 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/entities/part-00002-498d9893-5b55-4b0e-920a-537bcefe3b57-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/entities/part-00002-9433b0fe-4856-48e3-b830-2f9cc2e1c4ba-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/entities/part-00002-9433b0fe-4856-48e3-b830-2f9cc2e1c4ba-c000.snappy.parquet new file mode 100644 index 0000000..2db8f4b Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/entities/part-00002-9433b0fe-4856-48e3-b830-2f9cc2e1c4ba-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/entities/part-00002-a3c8d634-57f3-49a6-a095-689fe9930cc4-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/entities/part-00002-a3c8d634-57f3-49a6-a095-689fe9930cc4-c000.snappy.parquet new file mode 100644 index 0000000..62286f0 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/entities/part-00002-a3c8d634-57f3-49a6-a095-689fe9930cc4-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/entities/part-00002-e21f5f3e-ffef-441e-9416-cddfc7945b7d-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/entities/part-00002-e21f5f3e-ffef-441e-9416-cddfc7945b7d-c000.snappy.parquet new file mode 100644 index 0000000..4b6b34d Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/entities/part-00002-e21f5f3e-ffef-441e-9416-cddfc7945b7d-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/entities/part-00003-0b4ced07-8b4e-4c9a-b8b4-fcd3b6d39a5c-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/entities/part-00003-0b4ced07-8b4e-4c9a-b8b4-fcd3b6d39a5c-c000.snappy.parquet new file mode 100644 index 0000000..856d53d Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/entities/part-00003-0b4ced07-8b4e-4c9a-b8b4-fcd3b6d39a5c-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/entities/part-00003-103ab579-ba95-49f6-b98f-ec60b776cedf-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/entities/part-00003-103ab579-ba95-49f6-b98f-ec60b776cedf-c000.snappy.parquet new file mode 100644 index 0000000..0c0f7e0 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/entities/part-00003-103ab579-ba95-49f6-b98f-ec60b776cedf-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/entities/part-00003-c2c4353d-923b-4d02-bcc0-c567d8152c24-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/entities/part-00003-c2c4353d-923b-4d02-bcc0-c567d8152c24-c000.snappy.parquet new file mode 100644 index 0000000..b21cf76 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/entities/part-00003-c2c4353d-923b-4d02-bcc0-c567d8152c24-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/entities/part-00003-dd2f1a5e-4114-41c8-b2e2-72edba0e2d16-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/entities/part-00003-dd2f1a5e-4114-41c8-b2e2-72edba0e2d16-c000.snappy.parquet new file mode 100644 index 0000000..dfbd146 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/entities/part-00003-dd2f1a5e-4114-41c8-b2e2-72edba0e2d16-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/entities/part-00004-7dd0a75c-fc02-4a91-b584-4fb3805c6851-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/entities/part-00004-7dd0a75c-fc02-4a91-b584-4fb3805c6851-c000.snappy.parquet new file mode 100644 index 0000000..6e615bd Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/entities/part-00004-7dd0a75c-fc02-4a91-b584-4fb3805c6851-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/entities/part-00004-838d590d-0caf-48bb-935b-4d3bcdef7e6b-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/entities/part-00004-838d590d-0caf-48bb-935b-4d3bcdef7e6b-c000.snappy.parquet new file mode 100644 index 0000000..c5d992c Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/entities/part-00004-838d590d-0caf-48bb-935b-4d3bcdef7e6b-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/entities/part-00004-889473d1-a49a-4d1a-9822-bc64152feb8a-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/entities/part-00004-889473d1-a49a-4d1a-9822-bc64152feb8a-c000.snappy.parquet new file mode 100644 index 0000000..f52811a Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/entities/part-00004-889473d1-a49a-4d1a-9822-bc64152feb8a-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/entities/part-00004-9ba0b6c6-3035-4c5a-9d00-f3210e44c1be-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/entities/part-00004-9ba0b6c6-3035-4c5a-9d00-f3210e44c1be-c000.snappy.parquet new file mode 100644 index 0000000..2a87976 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/entities/part-00004-9ba0b6c6-3035-4c5a-9d00-f3210e44c1be-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/entities/part-00005-00c6be40-9225-40c1-a7c9-3f800a8f9280-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/entities/part-00005-00c6be40-9225-40c1-a7c9-3f800a8f9280-c000.snappy.parquet new file mode 100644 index 0000000..d6da232 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/entities/part-00005-00c6be40-9225-40c1-a7c9-3f800a8f9280-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/entities/part-00005-16c10944-46a4-4ffb-a8eb-c98f074c4533-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/entities/part-00005-16c10944-46a4-4ffb-a8eb-c98f074c4533-c000.snappy.parquet new file mode 100644 index 0000000..5131c64 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/entities/part-00005-16c10944-46a4-4ffb-a8eb-c98f074c4533-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/entities/part-00005-3b0745b2-9687-4d0b-b7af-56652f5b79d3-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/entities/part-00005-3b0745b2-9687-4d0b-b7af-56652f5b79d3-c000.snappy.parquet new file mode 100644 index 0000000..9551e3b Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/entities/part-00005-3b0745b2-9687-4d0b-b7af-56652f5b79d3-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/entities/part-00005-c44b66e7-c76c-4f20-b855-f02a508ddae1-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/entities/part-00005-c44b66e7-c76c-4f20-b855-f02a508ddae1-c000.snappy.parquet new file mode 100644 index 0000000..deee867 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/entities/part-00005-c44b66e7-c76c-4f20-b855-f02a508ddae1-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/entities/part-00006-902c69dd-2364-4c62-aed8-bab1f9d2bf5a-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/entities/part-00006-902c69dd-2364-4c62-aed8-bab1f9d2bf5a-c000.snappy.parquet new file mode 100644 index 0000000..9c91b1a Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/entities/part-00006-902c69dd-2364-4c62-aed8-bab1f9d2bf5a-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/entities/part-00006-9d7f2f92-96db-4863-87d7-a368f31c8aed-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/entities/part-00006-9d7f2f92-96db-4863-87d7-a368f31c8aed-c000.snappy.parquet new file mode 100644 index 0000000..4cd6dec Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/entities/part-00006-9d7f2f92-96db-4863-87d7-a368f31c8aed-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/entities/part-00006-b5e8d6ab-9bf9-4c7f-b4be-f4a4032b4399-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/entities/part-00006-b5e8d6ab-9bf9-4c7f-b4be-f4a4032b4399-c000.snappy.parquet new file mode 100644 index 0000000..5838653 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/entities/part-00006-b5e8d6ab-9bf9-4c7f-b4be-f4a4032b4399-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/entities/part-00006-cad0b36c-46f2-49e7-9889-7f4a1b44cbfb-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/entities/part-00006-cad0b36c-46f2-49e7-9889-7f4a1b44cbfb-c000.snappy.parquet new file mode 100644 index 0000000..7d8049b Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/entities/part-00006-cad0b36c-46f2-49e7-9889-7f4a1b44cbfb-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/entities/part-00007-1bc667d7-2f3a-4ece-a4a2-162e8a3f33f9-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/entities/part-00007-1bc667d7-2f3a-4ece-a4a2-162e8a3f33f9-c000.snappy.parquet new file mode 100644 index 0000000..4b995db Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/entities/part-00007-1bc667d7-2f3a-4ece-a4a2-162e8a3f33f9-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/entities/part-00007-642beb72-4230-41ef-b60a-b811933fa8a9-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/entities/part-00007-642beb72-4230-41ef-b60a-b811933fa8a9-c000.snappy.parquet new file mode 100644 index 0000000..1653416 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/entities/part-00007-642beb72-4230-41ef-b60a-b811933fa8a9-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/entities/part-00007-77325026-0538-440c-8df0-2d49aa067deb-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/entities/part-00007-77325026-0538-440c-8df0-2d49aa067deb-c000.snappy.parquet new file mode 100644 index 0000000..d432935 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/entities/part-00007-77325026-0538-440c-8df0-2d49aa067deb-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/entities/part-00007-ac2f1ac0-a676-4934-bf9f-d6cc54d7ae01-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/entities/part-00007-ac2f1ac0-a676-4934-bf9f-d6cc54d7ae01-c000.snappy.parquet new file mode 100644 index 0000000..ea43f5a Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/entities/part-00007-ac2f1ac0-a676-4934-bf9f-d6cc54d7ae01-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/identifiers/.part-00000-3f84591a-23b6-4cdf-8090-eea8896ab8d2-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/identifiers/.part-00000-3f84591a-23b6-4cdf-8090-eea8896ab8d2-c000.snappy.parquet.crc new file mode 100644 index 0000000..b6cc9f9 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/identifiers/.part-00000-3f84591a-23b6-4cdf-8090-eea8896ab8d2-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/identifiers/.part-00000-99cfe050-fc27-4e15-b5e4-9c72a7f20670-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/identifiers/.part-00000-99cfe050-fc27-4e15-b5e4-9c72a7f20670-c000.snappy.parquet.crc new file mode 100644 index 0000000..c7c3ad7 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/identifiers/.part-00000-99cfe050-fc27-4e15-b5e4-9c72a7f20670-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/identifiers/.part-00000-cbf92e27-f9e4-4e89-a8c2-1d2755a0e39b-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/identifiers/.part-00000-cbf92e27-f9e4-4e89-a8c2-1d2755a0e39b-c000.snappy.parquet.crc new file mode 100644 index 0000000..bc8d808 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/identifiers/.part-00000-cbf92e27-f9e4-4e89-a8c2-1d2755a0e39b-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/identifiers/.part-00000-f69d9253-5a6d-460b-bd51-59cc7a065749-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/identifiers/.part-00000-f69d9253-5a6d-460b-bd51-59cc7a065749-c000.snappy.parquet.crc new file mode 100644 index 0000000..49b6b16 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/identifiers/.part-00000-f69d9253-5a6d-460b-bd51-59cc7a065749-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/identifiers/.part-00001-059ca701-6a3b-42ea-942a-002f081595a8-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/identifiers/.part-00001-059ca701-6a3b-42ea-942a-002f081595a8-c000.snappy.parquet.crc new file mode 100644 index 0000000..06db52b Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/identifiers/.part-00001-059ca701-6a3b-42ea-942a-002f081595a8-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/identifiers/.part-00001-95e1738e-f636-4c46-b7f0-df7c08686f10-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/identifiers/.part-00001-95e1738e-f636-4c46-b7f0-df7c08686f10-c000.snappy.parquet.crc new file mode 100644 index 0000000..f3cf3aa Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/identifiers/.part-00001-95e1738e-f636-4c46-b7f0-df7c08686f10-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/identifiers/.part-00001-b3fc3245-9685-4e83-84a0-46c98096547c-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/identifiers/.part-00001-b3fc3245-9685-4e83-84a0-46c98096547c-c000.snappy.parquet.crc new file mode 100644 index 0000000..af7411a Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/identifiers/.part-00001-b3fc3245-9685-4e83-84a0-46c98096547c-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/identifiers/.part-00001-fc568fc3-e35d-4f6b-b437-c8d98de011b3-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/identifiers/.part-00001-fc568fc3-e35d-4f6b-b437-c8d98de011b3-c000.snappy.parquet.crc new file mode 100644 index 0000000..879be91 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/identifiers/.part-00001-fc568fc3-e35d-4f6b-b437-c8d98de011b3-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/identifiers/.part-00002-39105fc1-4eff-4dd3-95a9-071db7f13100-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/identifiers/.part-00002-39105fc1-4eff-4dd3-95a9-071db7f13100-c000.snappy.parquet.crc new file mode 100644 index 0000000..fd304aa Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/identifiers/.part-00002-39105fc1-4eff-4dd3-95a9-071db7f13100-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/identifiers/.part-00002-51ae934a-79d2-4af2-a4f8-7e87ac901025-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/identifiers/.part-00002-51ae934a-79d2-4af2-a4f8-7e87ac901025-c000.snappy.parquet.crc new file mode 100644 index 0000000..cde8552 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/identifiers/.part-00002-51ae934a-79d2-4af2-a4f8-7e87ac901025-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/identifiers/.part-00002-72f53ac3-bd98-4dcd-9011-5267622cc106-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/identifiers/.part-00002-72f53ac3-bd98-4dcd-9011-5267622cc106-c000.snappy.parquet.crc new file mode 100644 index 0000000..6a34583 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/identifiers/.part-00002-72f53ac3-bd98-4dcd-9011-5267622cc106-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/identifiers/.part-00002-d62dbad4-1f60-4bbc-a56e-13375d352390-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/identifiers/.part-00002-d62dbad4-1f60-4bbc-a56e-13375d352390-c000.snappy.parquet.crc new file mode 100644 index 0000000..d07b943 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/identifiers/.part-00002-d62dbad4-1f60-4bbc-a56e-13375d352390-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/identifiers/.part-00003-0a981679-3d99-4ba8-9da2-b2e4ace767a3-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/identifiers/.part-00003-0a981679-3d99-4ba8-9da2-b2e4ace767a3-c000.snappy.parquet.crc new file mode 100644 index 0000000..cbe25f4 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/identifiers/.part-00003-0a981679-3d99-4ba8-9da2-b2e4ace767a3-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/identifiers/.part-00003-8c55451e-c4a7-47f7-997d-6b5c3641462d-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/identifiers/.part-00003-8c55451e-c4a7-47f7-997d-6b5c3641462d-c000.snappy.parquet.crc new file mode 100644 index 0000000..3d7bfcc Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/identifiers/.part-00003-8c55451e-c4a7-47f7-997d-6b5c3641462d-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/identifiers/.part-00003-b73c348f-59c3-434f-9dfd-ea607ff49a07-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/identifiers/.part-00003-b73c348f-59c3-434f-9dfd-ea607ff49a07-c000.snappy.parquet.crc new file mode 100644 index 0000000..a75f8bd Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/identifiers/.part-00003-b73c348f-59c3-434f-9dfd-ea607ff49a07-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/identifiers/.part-00003-e7063d3a-1894-4014-addf-bc6599b0b1c3-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/identifiers/.part-00003-e7063d3a-1894-4014-addf-bc6599b0b1c3-c000.snappy.parquet.crc new file mode 100644 index 0000000..43cf4dc Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/identifiers/.part-00003-e7063d3a-1894-4014-addf-bc6599b0b1c3-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/identifiers/.part-00004-24cee708-2e14-4b70-8896-f832d3ba4448-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/identifiers/.part-00004-24cee708-2e14-4b70-8896-f832d3ba4448-c000.snappy.parquet.crc new file mode 100644 index 0000000..3e25f8e Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/identifiers/.part-00004-24cee708-2e14-4b70-8896-f832d3ba4448-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/identifiers/.part-00004-c4d85630-3f8b-4b4e-bc53-5ea71fdde5e4-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/identifiers/.part-00004-c4d85630-3f8b-4b4e-bc53-5ea71fdde5e4-c000.snappy.parquet.crc new file mode 100644 index 0000000..7660cd0 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/identifiers/.part-00004-c4d85630-3f8b-4b4e-bc53-5ea71fdde5e4-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/identifiers/.part-00004-e232f22d-e327-4693-bdbf-9e6169f362e4-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/identifiers/.part-00004-e232f22d-e327-4693-bdbf-9e6169f362e4-c000.snappy.parquet.crc new file mode 100644 index 0000000..a110d5e Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/identifiers/.part-00004-e232f22d-e327-4693-bdbf-9e6169f362e4-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/identifiers/.part-00004-ef12b3a8-d41e-47b7-ac5d-2b9195472ad6-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/identifiers/.part-00004-ef12b3a8-d41e-47b7-ac5d-2b9195472ad6-c000.snappy.parquet.crc new file mode 100644 index 0000000..c637a43 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/identifiers/.part-00004-ef12b3a8-d41e-47b7-ac5d-2b9195472ad6-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/identifiers/.part-00005-08a4ca31-6475-42ad-a5cc-a4635414e33e-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/identifiers/.part-00005-08a4ca31-6475-42ad-a5cc-a4635414e33e-c000.snappy.parquet.crc new file mode 100644 index 0000000..8e04e25 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/identifiers/.part-00005-08a4ca31-6475-42ad-a5cc-a4635414e33e-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/identifiers/.part-00005-5932b641-c16d-4311-a633-c5a637d6fe9d-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/identifiers/.part-00005-5932b641-c16d-4311-a633-c5a637d6fe9d-c000.snappy.parquet.crc new file mode 100644 index 0000000..fa7859f Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/identifiers/.part-00005-5932b641-c16d-4311-a633-c5a637d6fe9d-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/identifiers/.part-00005-76253b39-d9cc-4901-a96f-7e977a01b22f-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/identifiers/.part-00005-76253b39-d9cc-4901-a96f-7e977a01b22f-c000.snappy.parquet.crc new file mode 100644 index 0000000..341572a Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/identifiers/.part-00005-76253b39-d9cc-4901-a96f-7e977a01b22f-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/identifiers/.part-00005-9906cb6f-98a5-4bf5-a278-e3700b3ad79f-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/identifiers/.part-00005-9906cb6f-98a5-4bf5-a278-e3700b3ad79f-c000.snappy.parquet.crc new file mode 100644 index 0000000..bda5765 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/identifiers/.part-00005-9906cb6f-98a5-4bf5-a278-e3700b3ad79f-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/identifiers/.part-00006-0926903d-b4bd-4c65-8f57-c56e699c958c-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/identifiers/.part-00006-0926903d-b4bd-4c65-8f57-c56e699c958c-c000.snappy.parquet.crc new file mode 100644 index 0000000..5c055cc Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/identifiers/.part-00006-0926903d-b4bd-4c65-8f57-c56e699c958c-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/identifiers/.part-00006-375fbc01-8bd9-437f-831c-41d8ce9bda0e-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/identifiers/.part-00006-375fbc01-8bd9-437f-831c-41d8ce9bda0e-c000.snappy.parquet.crc new file mode 100644 index 0000000..6ae0367 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/identifiers/.part-00006-375fbc01-8bd9-437f-831c-41d8ce9bda0e-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/identifiers/.part-00006-4d5c4fa1-a97e-4c70-aa70-31ae296086ef-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/identifiers/.part-00006-4d5c4fa1-a97e-4c70-aa70-31ae296086ef-c000.snappy.parquet.crc new file mode 100644 index 0000000..b6cb471 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/identifiers/.part-00006-4d5c4fa1-a97e-4c70-aa70-31ae296086ef-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/identifiers/.part-00006-5fa1af64-92db-4103-9b58-b5a429dc9df7-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/identifiers/.part-00006-5fa1af64-92db-4103-9b58-b5a429dc9df7-c000.snappy.parquet.crc new file mode 100644 index 0000000..a8e0368 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/identifiers/.part-00006-5fa1af64-92db-4103-9b58-b5a429dc9df7-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/identifiers/.part-00007-02e39958-40d8-4ec1-a9da-b09fc6e68e28-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/identifiers/.part-00007-02e39958-40d8-4ec1-a9da-b09fc6e68e28-c000.snappy.parquet.crc new file mode 100644 index 0000000..5d44ddb Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/identifiers/.part-00007-02e39958-40d8-4ec1-a9da-b09fc6e68e28-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/identifiers/.part-00007-3e292979-e9b6-4490-abcd-bdc8c991ca93-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/identifiers/.part-00007-3e292979-e9b6-4490-abcd-bdc8c991ca93-c000.snappy.parquet.crc new file mode 100644 index 0000000..e7b280b Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/identifiers/.part-00007-3e292979-e9b6-4490-abcd-bdc8c991ca93-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/identifiers/.part-00007-53e01b06-b778-412d-8875-ab8afb83606a-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/identifiers/.part-00007-53e01b06-b778-412d-8875-ab8afb83606a-c000.snappy.parquet.crc new file mode 100644 index 0000000..b4d0605 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/identifiers/.part-00007-53e01b06-b778-412d-8875-ab8afb83606a-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/identifiers/.part-00007-95d9af21-41a5-4ba0-82e5-4339e909ff16-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/identifiers/.part-00007-95d9af21-41a5-4ba0-82e5-4339e909ff16-c000.snappy.parquet.crc new file mode 100644 index 0000000..e4dbea8 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/identifiers/.part-00007-95d9af21-41a5-4ba0-82e5-4339e909ff16-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/identifiers/_delta_log/.00000000000000000000.crc.crc b/tests/data/uniprot_archaea/uniprot_db/identifiers/_delta_log/.00000000000000000000.crc.crc new file mode 100644 index 0000000..bbffabb Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/identifiers/_delta_log/.00000000000000000000.crc.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/identifiers/_delta_log/.00000000000000000000.json.crc b/tests/data/uniprot_archaea/uniprot_db/identifiers/_delta_log/.00000000000000000000.json.crc new file mode 100644 index 0000000..8a0d770 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/identifiers/_delta_log/.00000000000000000000.json.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/identifiers/_delta_log/.00000000000000000001.crc.crc b/tests/data/uniprot_archaea/uniprot_db/identifiers/_delta_log/.00000000000000000001.crc.crc new file mode 100644 index 0000000..d097cae Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/identifiers/_delta_log/.00000000000000000001.crc.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/identifiers/_delta_log/.00000000000000000001.json.crc b/tests/data/uniprot_archaea/uniprot_db/identifiers/_delta_log/.00000000000000000001.json.crc new file mode 100644 index 0000000..29bdf4c Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/identifiers/_delta_log/.00000000000000000001.json.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/identifiers/_delta_log/.00000000000000000002.crc.crc b/tests/data/uniprot_archaea/uniprot_db/identifiers/_delta_log/.00000000000000000002.crc.crc new file mode 100644 index 0000000..c2dea0c Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/identifiers/_delta_log/.00000000000000000002.crc.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/identifiers/_delta_log/.00000000000000000002.json.crc b/tests/data/uniprot_archaea/uniprot_db/identifiers/_delta_log/.00000000000000000002.json.crc new file mode 100644 index 0000000..cc8b7db Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/identifiers/_delta_log/.00000000000000000002.json.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/identifiers/_delta_log/.00000000000000000003.crc.crc b/tests/data/uniprot_archaea/uniprot_db/identifiers/_delta_log/.00000000000000000003.crc.crc new file mode 100644 index 0000000..a1e4c33 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/identifiers/_delta_log/.00000000000000000003.crc.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/identifiers/_delta_log/.00000000000000000003.json.crc b/tests/data/uniprot_archaea/uniprot_db/identifiers/_delta_log/.00000000000000000003.json.crc new file mode 100644 index 0000000..50005a5 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/identifiers/_delta_log/.00000000000000000003.json.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/identifiers/_delta_log/.00000000000000000004.crc.crc b/tests/data/uniprot_archaea/uniprot_db/identifiers/_delta_log/.00000000000000000004.crc.crc new file mode 100644 index 0000000..df8eae8 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/identifiers/_delta_log/.00000000000000000004.crc.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/identifiers/_delta_log/.00000000000000000004.json.crc b/tests/data/uniprot_archaea/uniprot_db/identifiers/_delta_log/.00000000000000000004.json.crc new file mode 100644 index 0000000..5137425 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/identifiers/_delta_log/.00000000000000000004.json.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/identifiers/_delta_log/00000000000000000000.crc b/tests/data/uniprot_archaea/uniprot_db/identifiers/_delta_log/00000000000000000000.crc new file mode 100644 index 0000000..b10bad6 --- /dev/null +++ b/tests/data/uniprot_archaea/uniprot_db/identifiers/_delta_log/00000000000000000000.crc @@ -0,0 +1 @@ +{"txnId":"93156bcc-0516-43b9-8aba-b74c099cbd22","tableSizeBytes":0,"numFiles":0,"numMetadata":1,"numProtocol":1,"setTransactions":[],"domainMetadata":[],"metadata":{"id":"4f2e1d80-a7f5-402d-8dd5-04f1e168bc73","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[]}","partitionColumns":[],"configuration":{},"createdTime":1766549213086},"protocol":{"minReaderVersion":1,"minWriterVersion":2},"allFiles":[]} diff --git a/tests/data/uniprot_archaea/uniprot_db/identifiers/_delta_log/00000000000000000000.json b/tests/data/uniprot_archaea/uniprot_db/identifiers/_delta_log/00000000000000000000.json new file mode 100644 index 0000000..932dcf1 --- /dev/null +++ b/tests/data/uniprot_archaea/uniprot_db/identifiers/_delta_log/00000000000000000000.json @@ -0,0 +1,3 @@ +{"commitInfo":{"timestamp":1766549213138,"operation":"CREATE TABLE","operationParameters":{"partitionBy":"[]","clusterBy":"[]","description":null,"isManaged":"false","properties":"{}"},"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{},"engineInfo":"Apache-Spark/4.0.1 Delta-Lake/4.0.0","txnId":"93156bcc-0516-43b9-8aba-b74c099cbd22"}} +{"metaData":{"id":"4f2e1d80-a7f5-402d-8dd5-04f1e168bc73","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[]}","partitionColumns":[],"configuration":{},"createdTime":1766549213086}} +{"protocol":{"minReaderVersion":1,"minWriterVersion":2}} diff --git a/tests/data/uniprot_archaea/uniprot_db/identifiers/_delta_log/00000000000000000001.crc b/tests/data/uniprot_archaea/uniprot_db/identifiers/_delta_log/00000000000000000001.crc new file mode 100644 index 0000000..97c8c80 --- /dev/null +++ b/tests/data/uniprot_archaea/uniprot_db/identifiers/_delta_log/00000000000000000001.crc @@ -0,0 +1 @@ +{"txnId":"7b27cb51-6a99-42e1-9c09-024e3897a8f8","tableSizeBytes":247873,"numFiles":8,"numMetadata":1,"numProtocol":1,"setTransactions":[],"domainMetadata":[],"metadata":{"id":"4f2e1d80-a7f5-402d-8dd5-04f1e168bc73","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"entity_id\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"identifier\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"source\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"description\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{},"createdTime":1766549213086},"protocol":{"minReaderVersion":1,"minWriterVersion":2}} diff --git a/tests/data/uniprot_archaea/uniprot_db/identifiers/_delta_log/00000000000000000001.json b/tests/data/uniprot_archaea/uniprot_db/identifiers/_delta_log/00000000000000000001.json new file mode 100644 index 0000000..3d37064 --- /dev/null +++ b/tests/data/uniprot_archaea/uniprot_db/identifiers/_delta_log/00000000000000000001.json @@ -0,0 +1,10 @@ +{"commitInfo":{"timestamp":1766549219344,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":0,"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{"numFiles":"8","numOutputRows":"5507","numOutputBytes":"247873"},"engineInfo":"Apache-Spark/4.0.1 Delta-Lake/4.0.0","txnId":"7b27cb51-6a99-42e1-9c09-024e3897a8f8"}} +{"metaData":{"id":"4f2e1d80-a7f5-402d-8dd5-04f1e168bc73","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"entity_id\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"identifier\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"source\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"description\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{},"createdTime":1766549213086}} +{"add":{"path":"part-00000-cbf92e27-f9e4-4e89-a8c2-1d2755a0e39b-c000.snappy.parquet","partitionValues":{},"size":30252,"modificationTime":1766549219316,"dataChange":true,"stats":"{\"numRecords\":688,\"minValues\":{\"entity_id\":\"cdm_prot_00036120-577c-5bbc-ab91\",\"identifier\":\"UniProt:A0A0S1X9S7\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffa4eefe-b062-5f26-8f77\",\"identifier\":\"UniProt:Q9YG90\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"nullCount\":{\"entity_id\":0,\"identifier\":0,\"source\":0,\"description\":0}}"}} +{"add":{"path":"part-00001-b3fc3245-9685-4e83-84a0-46c98096547c-c000.snappy.parquet","partitionValues":{},"size":32031,"modificationTime":1766549219325,"dataChange":true,"stats":"{\"numRecords\":688,\"minValues\":{\"entity_id\":\"cdm_prot_000e0a71-14e8-5d5e-8144\",\"identifier\":\"UniProt:A0A5C0XQU4\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffed2e1d-fe14-5556-b067\",\"identifier\":\"UniProt:Q9YG22\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"nullCount\":{\"entity_id\":0,\"identifier\":0,\"source\":0,\"description\":0}}"}} +{"add":{"path":"part-00002-72f53ac3-bd98-4dcd-9011-5267622cc106-c000.snappy.parquet","partitionValues":{},"size":31513,"modificationTime":1766549219316,"dataChange":true,"stats":"{\"numRecords\":688,\"minValues\":{\"entity_id\":\"cdm_prot_00030730-9147-5351-b68c\",\"identifier\":\"UniProt:A0A0E3NC87\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"maxValues\":{\"entity_id\":\"cdm_prot_fde84f0d-6520-56ad-b61f\",\"identifier\":\"UniProt:Q9YFU8\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"nullCount\":{\"entity_id\":0,\"identifier\":0,\"source\":0,\"description\":0}}"}} +{"add":{"path":"part-00003-8c55451e-c4a7-47f7-997d-6b5c3641462d-c000.snappy.parquet","partitionValues":{},"size":31558,"modificationTime":1766549219327,"dataChange":true,"stats":"{\"numRecords\":688,\"minValues\":{\"entity_id\":\"cdm_prot_004f59e4-6b11-58ae-a6ff\",\"identifier\":\"UniProt:A0A0A1GKA2\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ff2dba94-2092-57d0-9041\",\"identifier\":\"UniProt:Q9YFA8\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"nullCount\":{\"entity_id\":0,\"identifier\":0,\"source\":0,\"description\":0}}"}} +{"add":{"path":"part-00004-24cee708-2e14-4b70-8896-f832d3ba4448-c000.snappy.parquet","partitionValues":{},"size":30590,"modificationTime":1766549219329,"dataChange":true,"stats":"{\"numRecords\":688,\"minValues\":{\"entity_id\":\"cdm_prot_008bae16-5b42-5bfd-a15c\",\"identifier\":\"UniProt:A0A256XLS3\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"maxValues\":{\"entity_id\":\"cdm_prot_fedf6b23-d61c-56dc-b256\",\"identifier\":\"UniProt:Q9YFJ2\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"nullCount\":{\"entity_id\":0,\"identifier\":0,\"source\":0,\"description\":0}}"}} +{"add":{"path":"part-00005-76253b39-d9cc-4901-a96f-7e977a01b22f-c000.snappy.parquet","partitionValues":{},"size":30949,"modificationTime":1766549219327,"dataChange":true,"stats":"{\"numRecords\":688,\"minValues\":{\"entity_id\":\"cdm_prot_00134cb8-e497-5d40-b662\",\"identifier\":\"UniProt:A0A8F5BMX8\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ff5f637d-e74b-5494-9ed8\",\"identifier\":\"UniProt:Q9YG32\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"nullCount\":{\"entity_id\":0,\"identifier\":0,\"source\":0,\"description\":0}}"}} +{"add":{"path":"part-00006-5fa1af64-92db-4103-9b58-b5a429dc9df7-c000.snappy.parquet","partitionValues":{},"size":30163,"modificationTime":1766549219319,"dataChange":true,"stats":"{\"numRecords\":688,\"minValues\":{\"entity_id\":\"cdm_prot_003df674-48d1-588a-b194\",\"identifier\":\"UniProt:A0A0F8XYN9\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffeae0f1-9785-5eaf-ad23\",\"identifier\":\"UniProt:Q9YFS9\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"nullCount\":{\"entity_id\":0,\"identifier\":0,\"source\":0,\"description\":0}}"}} +{"add":{"path":"part-00007-02e39958-40d8-4ec1-a9da-b09fc6e68e28-c000.snappy.parquet","partitionValues":{},"size":30817,"modificationTime":1766549219318,"dataChange":true,"stats":"{\"numRecords\":691,\"minValues\":{\"entity_id\":\"cdm_prot_00625fdf-0992-5594-a800\",\"identifier\":\"UniProt:A0A0A7GEY4\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffcf5789-8bfa-5ca0-914e\",\"identifier\":\"UniProt:Q9YEF5\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"nullCount\":{\"entity_id\":0,\"identifier\":0,\"source\":0,\"description\":0}}"}} diff --git a/tests/data/uniprot_archaea/uniprot_db/identifiers/_delta_log/00000000000000000002.crc b/tests/data/uniprot_archaea/uniprot_db/identifiers/_delta_log/00000000000000000002.crc new file mode 100644 index 0000000..7b2029b --- /dev/null +++ b/tests/data/uniprot_archaea/uniprot_db/identifiers/_delta_log/00000000000000000002.crc @@ -0,0 +1 @@ +{"txnId":"b5ac075b-041c-4cfa-86d5-7c32c57de787","tableSizeBytes":496668,"numFiles":16,"numMetadata":1,"numProtocol":1,"setTransactions":[],"domainMetadata":[],"metadata":{"id":"4f2e1d80-a7f5-402d-8dd5-04f1e168bc73","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"entity_id\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"identifier\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"source\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"description\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{},"createdTime":1766549213086},"protocol":{"minReaderVersion":1,"minWriterVersion":2},"allFiles":[{"path":"part-00007-02e39958-40d8-4ec1-a9da-b09fc6e68e28-c000.snappy.parquet","partitionValues":{},"size":30817,"modificationTime":1766549219318,"dataChange":false,"stats":"{\"numRecords\":691,\"minValues\":{\"entity_id\":\"cdm_prot_00625fdf-0992-5594-a800\",\"identifier\":\"UniProt:A0A0A7GEY4\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffcf5789-8bfa-5ca0-914e\",\"identifier\":\"UniProt:Q9YEF5\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"nullCount\":{\"entity_id\":0,\"identifier\":0,\"source\":0,\"description\":0}}"},{"path":"part-00003-8c55451e-c4a7-47f7-997d-6b5c3641462d-c000.snappy.parquet","partitionValues":{},"size":31558,"modificationTime":1766549219327,"dataChange":false,"stats":"{\"numRecords\":688,\"minValues\":{\"entity_id\":\"cdm_prot_004f59e4-6b11-58ae-a6ff\",\"identifier\":\"UniProt:A0A0A1GKA2\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ff2dba94-2092-57d0-9041\",\"identifier\":\"UniProt:Q9YFA8\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"nullCount\":{\"entity_id\":0,\"identifier\":0,\"source\":0,\"description\":0}}"},{"path":"part-00001-fc568fc3-e35d-4f6b-b437-c8d98de011b3-c000.snappy.parquet","partitionValues":{},"size":31460,"modificationTime":1766549225699,"dataChange":false,"stats":"{\"numRecords\":693,\"minValues\":{\"entity_id\":\"cdm_prot_00df9ef5-d9b4-5654-9d2a\",\"identifier\":\"UniProt:A0A384E143\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffa14ace-ea75-5cc6-86f2\",\"identifier\":\"UniProt:Q9YF02\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"nullCount\":{\"entity_id\":0,\"identifier\":0,\"source\":0,\"description\":0}}"},{"path":"part-00007-53e01b06-b778-412d-8875-ab8afb83606a-c000.snappy.parquet","partitionValues":{},"size":32371,"modificationTime":1766549225701,"dataChange":false,"stats":"{\"numRecords\":698,\"minValues\":{\"entity_id\":\"cdm_prot_001ba1b3-667c-5321-9298\",\"identifier\":\"UniProt:A0B5B1\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ff86ebef-56b7-5cf7-bf3e\",\"identifier\":\"UniProt:Q9YG51\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"nullCount\":{\"entity_id\":0,\"identifier\":0,\"source\":0,\"description\":0}}"},{"path":"part-00002-d62dbad4-1f60-4bbc-a56e-13375d352390-c000.snappy.parquet","partitionValues":{},"size":31471,"modificationTime":1766549225702,"dataChange":false,"stats":"{\"numRecords\":693,\"minValues\":{\"entity_id\":\"cdm_prot_00546ee8-4f73-5c85-8b07\",\"identifier\":\"UniProt:A0A075HNX4\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffce40a0-b890-5e5f-b455\",\"identifier\":\"UniProt:Q9YG88\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"nullCount\":{\"entity_id\":0,\"identifier\":0,\"source\":0,\"description\":0}}"},{"path":"part-00004-24cee708-2e14-4b70-8896-f832d3ba4448-c000.snappy.parquet","partitionValues":{},"size":30590,"modificationTime":1766549219329,"dataChange":false,"stats":"{\"numRecords\":688,\"minValues\":{\"entity_id\":\"cdm_prot_008bae16-5b42-5bfd-a15c\",\"identifier\":\"UniProt:A0A256XLS3\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"maxValues\":{\"entity_id\":\"cdm_prot_fedf6b23-d61c-56dc-b256\",\"identifier\":\"UniProt:Q9YFJ2\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"nullCount\":{\"entity_id\":0,\"identifier\":0,\"source\":0,\"description\":0}}"},{"path":"part-00003-e7063d3a-1894-4014-addf-bc6599b0b1c3-c000.snappy.parquet","partitionValues":{},"size":30123,"modificationTime":1766549225700,"dataChange":false,"stats":"{\"numRecords\":693,\"minValues\":{\"entity_id\":\"cdm_prot_000c7e85-7dff-527d-b6db\",\"identifier\":\"UniProt:A0A0A1GNW8\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffb46e82-abfa-5122-818f\",\"identifier\":\"UniProt:S5ZTR0\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"nullCount\":{\"entity_id\":0,\"identifier\":0,\"source\":0,\"description\":0}}"},{"path":"part-00004-ef12b3a8-d41e-47b7-ac5d-2b9195472ad6-c000.snappy.parquet","partitionValues":{},"size":29904,"modificationTime":1766549225710,"dataChange":false,"stats":"{\"numRecords\":693,\"minValues\":{\"entity_id\":\"cdm_prot_007c34e9-4de0-5736-985f\",\"identifier\":\"UniProt:A0A1U8QYC0\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffc892c3-0651-5e3c-998a\",\"identifier\":\"UniProt:Q9YGA3\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"nullCount\":{\"entity_id\":0,\"identifier\":0,\"source\":0,\"description\":0}}"},{"path":"part-00001-b3fc3245-9685-4e83-84a0-46c98096547c-c000.snappy.parquet","partitionValues":{},"size":32031,"modificationTime":1766549219325,"dataChange":false,"stats":"{\"numRecords\":688,\"minValues\":{\"entity_id\":\"cdm_prot_000e0a71-14e8-5d5e-8144\",\"identifier\":\"UniProt:A0A5C0XQU4\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffed2e1d-fe14-5556-b067\",\"identifier\":\"UniProt:Q9YG22\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"nullCount\":{\"entity_id\":0,\"identifier\":0,\"source\":0,\"description\":0}}"},{"path":"part-00005-9906cb6f-98a5-4bf5-a278-e3700b3ad79f-c000.snappy.parquet","partitionValues":{},"size":31582,"modificationTime":1766549225699,"dataChange":false,"stats":"{\"numRecords\":693,\"minValues\":{\"entity_id\":\"cdm_prot_00957fe6-8bfd-5ecd-9b08\",\"identifier\":\"UniProt:A0A0E3JT70\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffdecc1a-6ee9-59b2-9c49\",\"identifier\":\"UniProt:Q9YG68\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"nullCount\":{\"entity_id\":0,\"identifier\":0,\"source\":0,\"description\":0}}"},{"path":"part-00000-cbf92e27-f9e4-4e89-a8c2-1d2755a0e39b-c000.snappy.parquet","partitionValues":{},"size":30252,"modificationTime":1766549219316,"dataChange":false,"stats":"{\"numRecords\":688,\"minValues\":{\"entity_id\":\"cdm_prot_00036120-577c-5bbc-ab91\",\"identifier\":\"UniProt:A0A0S1X9S7\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffa4eefe-b062-5f26-8f77\",\"identifier\":\"UniProt:Q9YG90\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"nullCount\":{\"entity_id\":0,\"identifier\":0,\"source\":0,\"description\":0}}"},{"path":"part-00000-99cfe050-fc27-4e15-b5e4-9c72a7f20670-c000.snappy.parquet","partitionValues":{},"size":30328,"modificationTime":1766549225707,"dataChange":false,"stats":"{\"numRecords\":693,\"minValues\":{\"entity_id\":\"cdm_prot_004539b1-91ae-513e-bef9\",\"identifier\":\"UniProt:A0A0E3NEE1\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ff289ac2-6f01-59a2-8290\",\"identifier\":\"UniProt:Q9YFQ8\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"nullCount\":{\"entity_id\":0,\"identifier\":0,\"source\":0,\"description\":0}}"},{"path":"part-00006-5fa1af64-92db-4103-9b58-b5a429dc9df7-c000.snappy.parquet","partitionValues":{},"size":30163,"modificationTime":1766549219319,"dataChange":false,"stats":"{\"numRecords\":688,\"minValues\":{\"entity_id\":\"cdm_prot_003df674-48d1-588a-b194\",\"identifier\":\"UniProt:A0A0F8XYN9\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffeae0f1-9785-5eaf-ad23\",\"identifier\":\"UniProt:Q9YFS9\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"nullCount\":{\"entity_id\":0,\"identifier\":0,\"source\":0,\"description\":0}}"},{"path":"part-00002-72f53ac3-bd98-4dcd-9011-5267622cc106-c000.snappy.parquet","partitionValues":{},"size":31513,"modificationTime":1766549219316,"dataChange":false,"stats":"{\"numRecords\":688,\"minValues\":{\"entity_id\":\"cdm_prot_00030730-9147-5351-b68c\",\"identifier\":\"UniProt:A0A0E3NC87\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"maxValues\":{\"entity_id\":\"cdm_prot_fde84f0d-6520-56ad-b61f\",\"identifier\":\"UniProt:Q9YFU8\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"nullCount\":{\"entity_id\":0,\"identifier\":0,\"source\":0,\"description\":0}}"},{"path":"part-00005-76253b39-d9cc-4901-a96f-7e977a01b22f-c000.snappy.parquet","partitionValues":{},"size":30949,"modificationTime":1766549219327,"dataChange":false,"stats":"{\"numRecords\":688,\"minValues\":{\"entity_id\":\"cdm_prot_00134cb8-e497-5d40-b662\",\"identifier\":\"UniProt:A0A8F5BMX8\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ff5f637d-e74b-5494-9ed8\",\"identifier\":\"UniProt:Q9YG32\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"nullCount\":{\"entity_id\":0,\"identifier\":0,\"source\":0,\"description\":0}}"},{"path":"part-00006-4d5c4fa1-a97e-4c70-aa70-31ae296086ef-c000.snappy.parquet","partitionValues":{},"size":31556,"modificationTime":1766549225688,"dataChange":false,"stats":"{\"numRecords\":693,\"minValues\":{\"entity_id\":\"cdm_prot_0102d35c-20a5-5f8d-8175\",\"identifier\":\"UniProt:A0A0F8V8L2\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"maxValues\":{\"entity_id\":\"cdm_prot_fffe9d9f-5871-5041-83af\",\"identifier\":\"UniProt:Q9YGA7\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"nullCount\":{\"entity_id\":0,\"identifier\":0,\"source\":0,\"description\":0}}"}]} diff --git a/tests/data/uniprot_archaea/uniprot_db/identifiers/_delta_log/00000000000000000002.json b/tests/data/uniprot_archaea/uniprot_db/identifiers/_delta_log/00000000000000000002.json new file mode 100644 index 0000000..15c7d38 --- /dev/null +++ b/tests/data/uniprot_archaea/uniprot_db/identifiers/_delta_log/00000000000000000002.json @@ -0,0 +1,9 @@ +{"commitInfo":{"timestamp":1766549225713,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":1,"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{"numFiles":"8","numOutputRows":"5549","numOutputBytes":"248795"},"engineInfo":"Apache-Spark/4.0.1 Delta-Lake/4.0.0","txnId":"b5ac075b-041c-4cfa-86d5-7c32c57de787"}} +{"add":{"path":"part-00000-99cfe050-fc27-4e15-b5e4-9c72a7f20670-c000.snappy.parquet","partitionValues":{},"size":30328,"modificationTime":1766549225707,"dataChange":true,"stats":"{\"numRecords\":693,\"minValues\":{\"entity_id\":\"cdm_prot_004539b1-91ae-513e-bef9\",\"identifier\":\"UniProt:A0A0E3NEE1\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ff289ac2-6f01-59a2-8290\",\"identifier\":\"UniProt:Q9YFQ8\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"nullCount\":{\"entity_id\":0,\"identifier\":0,\"source\":0,\"description\":0}}"}} +{"add":{"path":"part-00001-fc568fc3-e35d-4f6b-b437-c8d98de011b3-c000.snappy.parquet","partitionValues":{},"size":31460,"modificationTime":1766549225699,"dataChange":true,"stats":"{\"numRecords\":693,\"minValues\":{\"entity_id\":\"cdm_prot_00df9ef5-d9b4-5654-9d2a\",\"identifier\":\"UniProt:A0A384E143\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffa14ace-ea75-5cc6-86f2\",\"identifier\":\"UniProt:Q9YF02\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"nullCount\":{\"entity_id\":0,\"identifier\":0,\"source\":0,\"description\":0}}"}} +{"add":{"path":"part-00002-d62dbad4-1f60-4bbc-a56e-13375d352390-c000.snappy.parquet","partitionValues":{},"size":31471,"modificationTime":1766549225702,"dataChange":true,"stats":"{\"numRecords\":693,\"minValues\":{\"entity_id\":\"cdm_prot_00546ee8-4f73-5c85-8b07\",\"identifier\":\"UniProt:A0A075HNX4\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffce40a0-b890-5e5f-b455\",\"identifier\":\"UniProt:Q9YG88\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"nullCount\":{\"entity_id\":0,\"identifier\":0,\"source\":0,\"description\":0}}"}} +{"add":{"path":"part-00003-e7063d3a-1894-4014-addf-bc6599b0b1c3-c000.snappy.parquet","partitionValues":{},"size":30123,"modificationTime":1766549225700,"dataChange":true,"stats":"{\"numRecords\":693,\"minValues\":{\"entity_id\":\"cdm_prot_000c7e85-7dff-527d-b6db\",\"identifier\":\"UniProt:A0A0A1GNW8\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffb46e82-abfa-5122-818f\",\"identifier\":\"UniProt:S5ZTR0\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"nullCount\":{\"entity_id\":0,\"identifier\":0,\"source\":0,\"description\":0}}"}} +{"add":{"path":"part-00004-ef12b3a8-d41e-47b7-ac5d-2b9195472ad6-c000.snappy.parquet","partitionValues":{},"size":29904,"modificationTime":1766549225710,"dataChange":true,"stats":"{\"numRecords\":693,\"minValues\":{\"entity_id\":\"cdm_prot_007c34e9-4de0-5736-985f\",\"identifier\":\"UniProt:A0A1U8QYC0\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffc892c3-0651-5e3c-998a\",\"identifier\":\"UniProt:Q9YGA3\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"nullCount\":{\"entity_id\":0,\"identifier\":0,\"source\":0,\"description\":0}}"}} +{"add":{"path":"part-00005-9906cb6f-98a5-4bf5-a278-e3700b3ad79f-c000.snappy.parquet","partitionValues":{},"size":31582,"modificationTime":1766549225699,"dataChange":true,"stats":"{\"numRecords\":693,\"minValues\":{\"entity_id\":\"cdm_prot_00957fe6-8bfd-5ecd-9b08\",\"identifier\":\"UniProt:A0A0E3JT70\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffdecc1a-6ee9-59b2-9c49\",\"identifier\":\"UniProt:Q9YG68\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"nullCount\":{\"entity_id\":0,\"identifier\":0,\"source\":0,\"description\":0}}"}} +{"add":{"path":"part-00006-4d5c4fa1-a97e-4c70-aa70-31ae296086ef-c000.snappy.parquet","partitionValues":{},"size":31556,"modificationTime":1766549225688,"dataChange":true,"stats":"{\"numRecords\":693,\"minValues\":{\"entity_id\":\"cdm_prot_0102d35c-20a5-5f8d-8175\",\"identifier\":\"UniProt:A0A0F8V8L2\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"maxValues\":{\"entity_id\":\"cdm_prot_fffe9d9f-5871-5041-83af\",\"identifier\":\"UniProt:Q9YGA7\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"nullCount\":{\"entity_id\":0,\"identifier\":0,\"source\":0,\"description\":0}}"}} +{"add":{"path":"part-00007-53e01b06-b778-412d-8875-ab8afb83606a-c000.snappy.parquet","partitionValues":{},"size":32371,"modificationTime":1766549225701,"dataChange":true,"stats":"{\"numRecords\":698,\"minValues\":{\"entity_id\":\"cdm_prot_001ba1b3-667c-5321-9298\",\"identifier\":\"UniProt:A0B5B1\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ff86ebef-56b7-5cf7-bf3e\",\"identifier\":\"UniProt:Q9YG51\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"nullCount\":{\"entity_id\":0,\"identifier\":0,\"source\":0,\"description\":0}}"}} diff --git a/tests/data/uniprot_archaea/uniprot_db/identifiers/_delta_log/00000000000000000003.crc b/tests/data/uniprot_archaea/uniprot_db/identifiers/_delta_log/00000000000000000003.crc new file mode 100644 index 0000000..6645a6a --- /dev/null +++ b/tests/data/uniprot_archaea/uniprot_db/identifiers/_delta_log/00000000000000000003.crc @@ -0,0 +1 @@ +{"txnId":"21189619-f59f-4692-8217-53942cd10e4a","tableSizeBytes":744239,"numFiles":24,"numMetadata":1,"numProtocol":1,"setTransactions":[],"domainMetadata":[],"metadata":{"id":"4f2e1d80-a7f5-402d-8dd5-04f1e168bc73","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"entity_id\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"identifier\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"source\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"description\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{},"createdTime":1766549213086},"protocol":{"minReaderVersion":1,"minWriterVersion":2},"allFiles":[{"path":"part-00007-02e39958-40d8-4ec1-a9da-b09fc6e68e28-c000.snappy.parquet","partitionValues":{},"size":30817,"modificationTime":1766549219318,"dataChange":false,"stats":"{\"numRecords\":691,\"minValues\":{\"entity_id\":\"cdm_prot_00625fdf-0992-5594-a800\",\"identifier\":\"UniProt:A0A0A7GEY4\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffcf5789-8bfa-5ca0-914e\",\"identifier\":\"UniProt:Q9YEF5\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"nullCount\":{\"entity_id\":0,\"identifier\":0,\"source\":0,\"description\":0}}"},{"path":"part-00003-8c55451e-c4a7-47f7-997d-6b5c3641462d-c000.snappy.parquet","partitionValues":{},"size":31558,"modificationTime":1766549219327,"dataChange":false,"stats":"{\"numRecords\":688,\"minValues\":{\"entity_id\":\"cdm_prot_004f59e4-6b11-58ae-a6ff\",\"identifier\":\"UniProt:A0A0A1GKA2\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ff2dba94-2092-57d0-9041\",\"identifier\":\"UniProt:Q9YFA8\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"nullCount\":{\"entity_id\":0,\"identifier\":0,\"source\":0,\"description\":0}}"},{"path":"part-00001-fc568fc3-e35d-4f6b-b437-c8d98de011b3-c000.snappy.parquet","partitionValues":{},"size":31460,"modificationTime":1766549225699,"dataChange":false,"stats":"{\"numRecords\":693,\"minValues\":{\"entity_id\":\"cdm_prot_00df9ef5-d9b4-5654-9d2a\",\"identifier\":\"UniProt:A0A384E143\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffa14ace-ea75-5cc6-86f2\",\"identifier\":\"UniProt:Q9YF02\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"nullCount\":{\"entity_id\":0,\"identifier\":0,\"source\":0,\"description\":0}}"},{"path":"part-00004-c4d85630-3f8b-4b4e-bc53-5ea71fdde5e4-c000.snappy.parquet","partitionValues":{},"size":29843,"modificationTime":1766549233591,"dataChange":false,"stats":"{\"numRecords\":687,\"minValues\":{\"entity_id\":\"cdm_prot_005318fc-0c32-5b4a-b952\",\"identifier\":\"UniProt:A0A8F5BKY4\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffbd3905-3836-5893-bd75\",\"identifier\":\"UniProt:Q9YEZ1\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"nullCount\":{\"entity_id\":0,\"identifier\":0,\"source\":0,\"description\":0}}"},{"path":"part-00002-d62dbad4-1f60-4bbc-a56e-13375d352390-c000.snappy.parquet","partitionValues":{},"size":31471,"modificationTime":1766549225702,"dataChange":false,"stats":"{\"numRecords\":693,\"minValues\":{\"entity_id\":\"cdm_prot_00546ee8-4f73-5c85-8b07\",\"identifier\":\"UniProt:A0A075HNX4\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffce40a0-b890-5e5f-b455\",\"identifier\":\"UniProt:Q9YG88\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"nullCount\":{\"entity_id\":0,\"identifier\":0,\"source\":0,\"description\":0}}"},{"path":"part-00002-39105fc1-4eff-4dd3-95a9-071db7f13100-c000.snappy.parquet","partitionValues":{},"size":30896,"modificationTime":1766549233591,"dataChange":false,"stats":"{\"numRecords\":687,\"minValues\":{\"entity_id\":\"cdm_prot_0061faab-bd88-562c-8a31\",\"identifier\":\"UniProt:A0B560\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"maxValues\":{\"entity_id\":\"cdm_prot_fe9c77d9-be27-518e-8e0c\",\"identifier\":\"UniProt:Q9YFN1\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"nullCount\":{\"entity_id\":0,\"identifier\":0,\"source\":0,\"description\":0}}"},{"path":"part-00003-e7063d3a-1894-4014-addf-bc6599b0b1c3-c000.snappy.parquet","partitionValues":{},"size":30123,"modificationTime":1766549225700,"dataChange":false,"stats":"{\"numRecords\":693,\"minValues\":{\"entity_id\":\"cdm_prot_000c7e85-7dff-527d-b6db\",\"identifier\":\"UniProt:A0A0A1GNW8\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffb46e82-abfa-5122-818f\",\"identifier\":\"UniProt:S5ZTR0\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"nullCount\":{\"entity_id\":0,\"identifier\":0,\"source\":0,\"description\":0}}"},{"path":"part-00004-ef12b3a8-d41e-47b7-ac5d-2b9195472ad6-c000.snappy.parquet","partitionValues":{},"size":29904,"modificationTime":1766549225710,"dataChange":false,"stats":"{\"numRecords\":693,\"minValues\":{\"entity_id\":\"cdm_prot_007c34e9-4de0-5736-985f\",\"identifier\":\"UniProt:A0A1U8QYC0\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffc892c3-0651-5e3c-998a\",\"identifier\":\"UniProt:Q9YGA3\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"nullCount\":{\"entity_id\":0,\"identifier\":0,\"source\":0,\"description\":0}}"},{"path":"part-00001-059ca701-6a3b-42ea-942a-002f081595a8-c000.snappy.parquet","partitionValues":{},"size":31077,"modificationTime":1766549233599,"dataChange":false,"stats":"{\"numRecords\":687,\"minValues\":{\"entity_id\":\"cdm_prot_00365069-eeac-5442-8f11\",\"identifier\":\"UniProt:A0B923\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ff216b32-af2a-545f-8e61\",\"identifier\":\"UniProt:Q9YF98\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"nullCount\":{\"entity_id\":0,\"identifier\":0,\"source\":0,\"description\":0}}"},{"path":"part-00003-0a981679-3d99-4ba8-9da2-b2e4ace767a3-c000.snappy.parquet","partitionValues":{},"size":31308,"modificationTime":1766549233599,"dataChange":false,"stats":"{\"numRecords\":687,\"minValues\":{\"entity_id\":\"cdm_prot_00206b22-8b92-5222-ab7c\",\"identifier\":\"UniProt:A0B5Q5\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffede36e-7e42-5dcc-abde\",\"identifier\":\"UniProt:Q9YFY7\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"nullCount\":{\"entity_id\":0,\"identifier\":0,\"source\":0,\"description\":0}}"},{"path":"part-00000-cbf92e27-f9e4-4e89-a8c2-1d2755a0e39b-c000.snappy.parquet","partitionValues":{},"size":30252,"modificationTime":1766549219316,"dataChange":false,"stats":"{\"numRecords\":688,\"minValues\":{\"entity_id\":\"cdm_prot_00036120-577c-5bbc-ab91\",\"identifier\":\"UniProt:A0A0S1X9S7\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffa4eefe-b062-5f26-8f77\",\"identifier\":\"UniProt:Q9YG90\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"nullCount\":{\"entity_id\":0,\"identifier\":0,\"source\":0,\"description\":0}}"},{"path":"part-00000-99cfe050-fc27-4e15-b5e4-9c72a7f20670-c000.snappy.parquet","partitionValues":{},"size":30328,"modificationTime":1766549225707,"dataChange":false,"stats":"{\"numRecords\":693,\"minValues\":{\"entity_id\":\"cdm_prot_004539b1-91ae-513e-bef9\",\"identifier\":\"UniProt:A0A0E3NEE1\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ff289ac2-6f01-59a2-8290\",\"identifier\":\"UniProt:Q9YFQ8\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"nullCount\":{\"entity_id\":0,\"identifier\":0,\"source\":0,\"description\":0}}"},{"path":"part-00006-5fa1af64-92db-4103-9b58-b5a429dc9df7-c000.snappy.parquet","partitionValues":{},"size":30163,"modificationTime":1766549219319,"dataChange":false,"stats":"{\"numRecords\":688,\"minValues\":{\"entity_id\":\"cdm_prot_003df674-48d1-588a-b194\",\"identifier\":\"UniProt:A0A0F8XYN9\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffeae0f1-9785-5eaf-ad23\",\"identifier\":\"UniProt:Q9YFS9\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"nullCount\":{\"entity_id\":0,\"identifier\":0,\"source\":0,\"description\":0}}"},{"path":"part-00002-72f53ac3-bd98-4dcd-9011-5267622cc106-c000.snappy.parquet","partitionValues":{},"size":31513,"modificationTime":1766549219316,"dataChange":false,"stats":"{\"numRecords\":688,\"minValues\":{\"entity_id\":\"cdm_prot_00030730-9147-5351-b68c\",\"identifier\":\"UniProt:A0A0E3NC87\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"maxValues\":{\"entity_id\":\"cdm_prot_fde84f0d-6520-56ad-b61f\",\"identifier\":\"UniProt:Q9YFU8\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"nullCount\":{\"entity_id\":0,\"identifier\":0,\"source\":0,\"description\":0}}"},{"path":"part-00005-76253b39-d9cc-4901-a96f-7e977a01b22f-c000.snappy.parquet","partitionValues":{},"size":30949,"modificationTime":1766549219327,"dataChange":false,"stats":"{\"numRecords\":688,\"minValues\":{\"entity_id\":\"cdm_prot_00134cb8-e497-5d40-b662\",\"identifier\":\"UniProt:A0A8F5BMX8\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ff5f637d-e74b-5494-9ed8\",\"identifier\":\"UniProt:Q9YG32\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"nullCount\":{\"entity_id\":0,\"identifier\":0,\"source\":0,\"description\":0}}"},{"path":"part-00006-4d5c4fa1-a97e-4c70-aa70-31ae296086ef-c000.snappy.parquet","partitionValues":{},"size":31556,"modificationTime":1766549225688,"dataChange":false,"stats":"{\"numRecords\":693,\"minValues\":{\"entity_id\":\"cdm_prot_0102d35c-20a5-5f8d-8175\",\"identifier\":\"UniProt:A0A0F8V8L2\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"maxValues\":{\"entity_id\":\"cdm_prot_fffe9d9f-5871-5041-83af\",\"identifier\":\"UniProt:Q9YGA7\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"nullCount\":{\"entity_id\":0,\"identifier\":0,\"source\":0,\"description\":0}}"},{"path":"part-00007-53e01b06-b778-412d-8875-ab8afb83606a-c000.snappy.parquet","partitionValues":{},"size":32371,"modificationTime":1766549225701,"dataChange":false,"stats":"{\"numRecords\":698,\"minValues\":{\"entity_id\":\"cdm_prot_001ba1b3-667c-5321-9298\",\"identifier\":\"UniProt:A0B5B1\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ff86ebef-56b7-5cf7-bf3e\",\"identifier\":\"UniProt:Q9YG51\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"nullCount\":{\"entity_id\":0,\"identifier\":0,\"source\":0,\"description\":0}}"},{"path":"part-00004-24cee708-2e14-4b70-8896-f832d3ba4448-c000.snappy.parquet","partitionValues":{},"size":30590,"modificationTime":1766549219329,"dataChange":false,"stats":"{\"numRecords\":688,\"minValues\":{\"entity_id\":\"cdm_prot_008bae16-5b42-5bfd-a15c\",\"identifier\":\"UniProt:A0A256XLS3\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"maxValues\":{\"entity_id\":\"cdm_prot_fedf6b23-d61c-56dc-b256\",\"identifier\":\"UniProt:Q9YFJ2\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"nullCount\":{\"entity_id\":0,\"identifier\":0,\"source\":0,\"description\":0}}"},{"path":"part-00007-95d9af21-41a5-4ba0-82e5-4339e909ff16-c000.snappy.parquet","partitionValues":{},"size":31446,"modificationTime":1766549233599,"dataChange":false,"stats":"{\"numRecords\":690,\"minValues\":{\"entity_id\":\"cdm_prot_00019029-8c98-5477-9dd1\",\"identifier\":\"UniProt:A0A110A2W7\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ff606768-2e8f-543d-abc5\",\"identifier\":\"UniProt:Q9YEX9\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"nullCount\":{\"entity_id\":0,\"identifier\":0,\"source\":0,\"description\":0}}"},{"path":"part-00001-b3fc3245-9685-4e83-84a0-46c98096547c-c000.snappy.parquet","partitionValues":{},"size":32031,"modificationTime":1766549219325,"dataChange":false,"stats":"{\"numRecords\":688,\"minValues\":{\"entity_id\":\"cdm_prot_000e0a71-14e8-5d5e-8144\",\"identifier\":\"UniProt:A0A5C0XQU4\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffed2e1d-fe14-5556-b067\",\"identifier\":\"UniProt:Q9YG22\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"nullCount\":{\"entity_id\":0,\"identifier\":0,\"source\":0,\"description\":0}}"},{"path":"part-00005-9906cb6f-98a5-4bf5-a278-e3700b3ad79f-c000.snappy.parquet","partitionValues":{},"size":31582,"modificationTime":1766549225699,"dataChange":false,"stats":"{\"numRecords\":693,\"minValues\":{\"entity_id\":\"cdm_prot_00957fe6-8bfd-5ecd-9b08\",\"identifier\":\"UniProt:A0A0E3JT70\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffdecc1a-6ee9-59b2-9c49\",\"identifier\":\"UniProt:Q9YG68\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"nullCount\":{\"entity_id\":0,\"identifier\":0,\"source\":0,\"description\":0}}"},{"path":"part-00005-08a4ca31-6475-42ad-a5cc-a4635414e33e-c000.snappy.parquet","partitionValues":{},"size":30887,"modificationTime":1766549233593,"dataChange":false,"stats":"{\"numRecords\":687,\"minValues\":{\"entity_id\":\"cdm_prot_0013ba57-fc41-51cc-bc95\",\"identifier\":\"UniProt:A0B562\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffe27896-0bd4-50b4-9cff\",\"identifier\":\"UniProt:Q9YF81\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"nullCount\":{\"entity_id\":0,\"identifier\":0,\"source\":0,\"description\":0}}"},{"path":"part-00006-375fbc01-8bd9-437f-831c-41d8ce9bda0e-c000.snappy.parquet","partitionValues":{},"size":31616,"modificationTime":1766549233604,"dataChange":false,"stats":"{\"numRecords\":687,\"minValues\":{\"entity_id\":\"cdm_prot_00219a7c-c28c-5aaf-928e\",\"identifier\":\"UniProt:A0B532\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"maxValues\":{\"entity_id\":\"cdm_prot_fe67f279-f090-5c4e-b38d\",\"identifier\":\"UniProt:Q9YF95\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"nullCount\":{\"entity_id\":0,\"identifier\":0,\"source\":0,\"description\":0}}"},{"path":"part-00000-f69d9253-5a6d-460b-bd51-59cc7a065749-c000.snappy.parquet","partitionValues":{},"size":30498,"modificationTime":1766549233599,"dataChange":false,"stats":"{\"numRecords\":687,\"minValues\":{\"entity_id\":\"cdm_prot_006f3d67-56f0-5829-b099\",\"identifier\":\"UniProt:A0A384KAR1\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffcedd0e-face-5ec6-8e0b\",\"identifier\":\"UniProt:Q9YFZ1\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"nullCount\":{\"entity_id\":0,\"identifier\":0,\"source\":0,\"description\":0}}"}]} diff --git a/tests/data/uniprot_archaea/uniprot_db/identifiers/_delta_log/00000000000000000003.json b/tests/data/uniprot_archaea/uniprot_db/identifiers/_delta_log/00000000000000000003.json new file mode 100644 index 0000000..7ef840b --- /dev/null +++ b/tests/data/uniprot_archaea/uniprot_db/identifiers/_delta_log/00000000000000000003.json @@ -0,0 +1,9 @@ +{"commitInfo":{"timestamp":1766549233607,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":2,"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{"numFiles":"8","numOutputRows":"5499","numOutputBytes":"247571"},"engineInfo":"Apache-Spark/4.0.1 Delta-Lake/4.0.0","txnId":"21189619-f59f-4692-8217-53942cd10e4a"}} +{"add":{"path":"part-00000-f69d9253-5a6d-460b-bd51-59cc7a065749-c000.snappy.parquet","partitionValues":{},"size":30498,"modificationTime":1766549233599,"dataChange":true,"stats":"{\"numRecords\":687,\"minValues\":{\"entity_id\":\"cdm_prot_006f3d67-56f0-5829-b099\",\"identifier\":\"UniProt:A0A384KAR1\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffcedd0e-face-5ec6-8e0b\",\"identifier\":\"UniProt:Q9YFZ1\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"nullCount\":{\"entity_id\":0,\"identifier\":0,\"source\":0,\"description\":0}}"}} +{"add":{"path":"part-00001-059ca701-6a3b-42ea-942a-002f081595a8-c000.snappy.parquet","partitionValues":{},"size":31077,"modificationTime":1766549233599,"dataChange":true,"stats":"{\"numRecords\":687,\"minValues\":{\"entity_id\":\"cdm_prot_00365069-eeac-5442-8f11\",\"identifier\":\"UniProt:A0B923\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ff216b32-af2a-545f-8e61\",\"identifier\":\"UniProt:Q9YF98\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"nullCount\":{\"entity_id\":0,\"identifier\":0,\"source\":0,\"description\":0}}"}} +{"add":{"path":"part-00002-39105fc1-4eff-4dd3-95a9-071db7f13100-c000.snappy.parquet","partitionValues":{},"size":30896,"modificationTime":1766549233591,"dataChange":true,"stats":"{\"numRecords\":687,\"minValues\":{\"entity_id\":\"cdm_prot_0061faab-bd88-562c-8a31\",\"identifier\":\"UniProt:A0B560\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"maxValues\":{\"entity_id\":\"cdm_prot_fe9c77d9-be27-518e-8e0c\",\"identifier\":\"UniProt:Q9YFN1\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"nullCount\":{\"entity_id\":0,\"identifier\":0,\"source\":0,\"description\":0}}"}} +{"add":{"path":"part-00003-0a981679-3d99-4ba8-9da2-b2e4ace767a3-c000.snappy.parquet","partitionValues":{},"size":31308,"modificationTime":1766549233599,"dataChange":true,"stats":"{\"numRecords\":687,\"minValues\":{\"entity_id\":\"cdm_prot_00206b22-8b92-5222-ab7c\",\"identifier\":\"UniProt:A0B5Q5\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffede36e-7e42-5dcc-abde\",\"identifier\":\"UniProt:Q9YFY7\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"nullCount\":{\"entity_id\":0,\"identifier\":0,\"source\":0,\"description\":0}}"}} +{"add":{"path":"part-00004-c4d85630-3f8b-4b4e-bc53-5ea71fdde5e4-c000.snappy.parquet","partitionValues":{},"size":29843,"modificationTime":1766549233591,"dataChange":true,"stats":"{\"numRecords\":687,\"minValues\":{\"entity_id\":\"cdm_prot_005318fc-0c32-5b4a-b952\",\"identifier\":\"UniProt:A0A8F5BKY4\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffbd3905-3836-5893-bd75\",\"identifier\":\"UniProt:Q9YEZ1\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"nullCount\":{\"entity_id\":0,\"identifier\":0,\"source\":0,\"description\":0}}"}} +{"add":{"path":"part-00005-08a4ca31-6475-42ad-a5cc-a4635414e33e-c000.snappy.parquet","partitionValues":{},"size":30887,"modificationTime":1766549233593,"dataChange":true,"stats":"{\"numRecords\":687,\"minValues\":{\"entity_id\":\"cdm_prot_0013ba57-fc41-51cc-bc95\",\"identifier\":\"UniProt:A0B562\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffe27896-0bd4-50b4-9cff\",\"identifier\":\"UniProt:Q9YF81\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"nullCount\":{\"entity_id\":0,\"identifier\":0,\"source\":0,\"description\":0}}"}} +{"add":{"path":"part-00006-375fbc01-8bd9-437f-831c-41d8ce9bda0e-c000.snappy.parquet","partitionValues":{},"size":31616,"modificationTime":1766549233604,"dataChange":true,"stats":"{\"numRecords\":687,\"minValues\":{\"entity_id\":\"cdm_prot_00219a7c-c28c-5aaf-928e\",\"identifier\":\"UniProt:A0B532\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"maxValues\":{\"entity_id\":\"cdm_prot_fe67f279-f090-5c4e-b38d\",\"identifier\":\"UniProt:Q9YF95\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"nullCount\":{\"entity_id\":0,\"identifier\":0,\"source\":0,\"description\":0}}"}} +{"add":{"path":"part-00007-95d9af21-41a5-4ba0-82e5-4339e909ff16-c000.snappy.parquet","partitionValues":{},"size":31446,"modificationTime":1766549233599,"dataChange":true,"stats":"{\"numRecords\":690,\"minValues\":{\"entity_id\":\"cdm_prot_00019029-8c98-5477-9dd1\",\"identifier\":\"UniProt:A0A110A2W7\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ff606768-2e8f-543d-abc5\",\"identifier\":\"UniProt:Q9YEX9\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"nullCount\":{\"entity_id\":0,\"identifier\":0,\"source\":0,\"description\":0}}"}} diff --git a/tests/data/uniprot_archaea/uniprot_db/identifiers/_delta_log/00000000000000000004.crc b/tests/data/uniprot_archaea/uniprot_db/identifiers/_delta_log/00000000000000000004.crc new file mode 100644 index 0000000..38c5b3f --- /dev/null +++ b/tests/data/uniprot_archaea/uniprot_db/identifiers/_delta_log/00000000000000000004.crc @@ -0,0 +1 @@ +{"txnId":"904cc402-0b5d-451f-ba96-d60d9a74ffd1","tableSizeBytes":979480,"numFiles":32,"numMetadata":1,"numProtocol":1,"setTransactions":[],"domainMetadata":[],"metadata":{"id":"4f2e1d80-a7f5-402d-8dd5-04f1e168bc73","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"entity_id\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"identifier\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"source\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"description\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{},"createdTime":1766549213086},"protocol":{"minReaderVersion":1,"minWriterVersion":2},"allFiles":[{"path":"part-00001-95e1738e-f636-4c46-b7f0-df7c08686f10-c000.snappy.parquet","partitionValues":{},"size":29344,"modificationTime":1766549239330,"dataChange":false,"stats":"{\"numRecords\":643,\"minValues\":{\"entity_id\":\"cdm_prot_01672ce0-33eb-50c0-8bb3\",\"identifier\":\"UniProt:A0A151EH88\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"maxValues\":{\"entity_id\":\"cdm_prot_fffe9aba-bce9-56de-b269\",\"identifier\":\"UniProt:Q9YGA5\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"nullCount\":{\"entity_id\":0,\"identifier\":0,\"source\":0,\"description\":0}}"},{"path":"part-00007-02e39958-40d8-4ec1-a9da-b09fc6e68e28-c000.snappy.parquet","partitionValues":{},"size":30817,"modificationTime":1766549219318,"dataChange":false,"stats":"{\"numRecords\":691,\"minValues\":{\"entity_id\":\"cdm_prot_00625fdf-0992-5594-a800\",\"identifier\":\"UniProt:A0A0A7GEY4\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffcf5789-8bfa-5ca0-914e\",\"identifier\":\"UniProt:Q9YEF5\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"nullCount\":{\"entity_id\":0,\"identifier\":0,\"source\":0,\"description\":0}}"},{"path":"part-00003-8c55451e-c4a7-47f7-997d-6b5c3641462d-c000.snappy.parquet","partitionValues":{},"size":31558,"modificationTime":1766549219327,"dataChange":false,"stats":"{\"numRecords\":688,\"minValues\":{\"entity_id\":\"cdm_prot_004f59e4-6b11-58ae-a6ff\",\"identifier\":\"UniProt:A0A0A1GKA2\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ff2dba94-2092-57d0-9041\",\"identifier\":\"UniProt:Q9YFA8\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"nullCount\":{\"entity_id\":0,\"identifier\":0,\"source\":0,\"description\":0}}"},{"path":"part-00001-fc568fc3-e35d-4f6b-b437-c8d98de011b3-c000.snappy.parquet","partitionValues":{},"size":31460,"modificationTime":1766549225699,"dataChange":false,"stats":"{\"numRecords\":693,\"minValues\":{\"entity_id\":\"cdm_prot_00df9ef5-d9b4-5654-9d2a\",\"identifier\":\"UniProt:A0A384E143\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffa14ace-ea75-5cc6-86f2\",\"identifier\":\"UniProt:Q9YF02\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"nullCount\":{\"entity_id\":0,\"identifier\":0,\"source\":0,\"description\":0}}"},{"path":"part-00000-3f84591a-23b6-4cdf-8090-eea8896ab8d2-c000.snappy.parquet","partitionValues":{},"size":29677,"modificationTime":1766549239341,"dataChange":false,"stats":"{\"numRecords\":643,\"minValues\":{\"entity_id\":\"cdm_prot_0003b7cb-bf13-5646-9978\",\"identifier\":\"UniProt:A0A8F5BRT0\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"maxValues\":{\"entity_id\":\"cdm_prot_fef0eb9f-57f2-532d-b5b2\",\"identifier\":\"UniProt:Q9YFY3\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"nullCount\":{\"entity_id\":0,\"identifier\":0,\"source\":0,\"description\":0}}"},{"path":"part-00004-c4d85630-3f8b-4b4e-bc53-5ea71fdde5e4-c000.snappy.parquet","partitionValues":{},"size":29843,"modificationTime":1766549233591,"dataChange":false,"stats":"{\"numRecords\":687,\"minValues\":{\"entity_id\":\"cdm_prot_005318fc-0c32-5b4a-b952\",\"identifier\":\"UniProt:A0A8F5BKY4\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffbd3905-3836-5893-bd75\",\"identifier\":\"UniProt:Q9YEZ1\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"nullCount\":{\"entity_id\":0,\"identifier\":0,\"source\":0,\"description\":0}}"},{"path":"part-00002-d62dbad4-1f60-4bbc-a56e-13375d352390-c000.snappy.parquet","partitionValues":{},"size":31471,"modificationTime":1766549225702,"dataChange":false,"stats":"{\"numRecords\":693,\"minValues\":{\"entity_id\":\"cdm_prot_00546ee8-4f73-5c85-8b07\",\"identifier\":\"UniProt:A0A075HNX4\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffce40a0-b890-5e5f-b455\",\"identifier\":\"UniProt:Q9YG88\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"nullCount\":{\"entity_id\":0,\"identifier\":0,\"source\":0,\"description\":0}}"},{"path":"part-00002-39105fc1-4eff-4dd3-95a9-071db7f13100-c000.snappy.parquet","partitionValues":{},"size":30896,"modificationTime":1766549233591,"dataChange":false,"stats":"{\"numRecords\":687,\"minValues\":{\"entity_id\":\"cdm_prot_0061faab-bd88-562c-8a31\",\"identifier\":\"UniProt:A0B560\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"maxValues\":{\"entity_id\":\"cdm_prot_fe9c77d9-be27-518e-8e0c\",\"identifier\":\"UniProt:Q9YFN1\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"nullCount\":{\"entity_id\":0,\"identifier\":0,\"source\":0,\"description\":0}}"},{"path":"part-00003-e7063d3a-1894-4014-addf-bc6599b0b1c3-c000.snappy.parquet","partitionValues":{},"size":30123,"modificationTime":1766549225700,"dataChange":false,"stats":"{\"numRecords\":693,\"minValues\":{\"entity_id\":\"cdm_prot_000c7e85-7dff-527d-b6db\",\"identifier\":\"UniProt:A0A0A1GNW8\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffb46e82-abfa-5122-818f\",\"identifier\":\"UniProt:S5ZTR0\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"nullCount\":{\"entity_id\":0,\"identifier\":0,\"source\":0,\"description\":0}}"},{"path":"part-00004-ef12b3a8-d41e-47b7-ac5d-2b9195472ad6-c000.snappy.parquet","partitionValues":{},"size":29904,"modificationTime":1766549225710,"dataChange":false,"stats":"{\"numRecords\":693,\"minValues\":{\"entity_id\":\"cdm_prot_007c34e9-4de0-5736-985f\",\"identifier\":\"UniProt:A0A1U8QYC0\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffc892c3-0651-5e3c-998a\",\"identifier\":\"UniProt:Q9YGA3\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"nullCount\":{\"entity_id\":0,\"identifier\":0,\"source\":0,\"description\":0}}"},{"path":"part-00001-059ca701-6a3b-42ea-942a-002f081595a8-c000.snappy.parquet","partitionValues":{},"size":31077,"modificationTime":1766549233599,"dataChange":false,"stats":"{\"numRecords\":687,\"minValues\":{\"entity_id\":\"cdm_prot_00365069-eeac-5442-8f11\",\"identifier\":\"UniProt:A0B923\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ff216b32-af2a-545f-8e61\",\"identifier\":\"UniProt:Q9YF98\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"nullCount\":{\"entity_id\":0,\"identifier\":0,\"source\":0,\"description\":0}}"},{"path":"part-00003-b73c348f-59c3-434f-9dfd-ea607ff49a07-c000.snappy.parquet","partitionValues":{},"size":28811,"modificationTime":1766549239312,"dataChange":false,"stats":"{\"numRecords\":643,\"minValues\":{\"entity_id\":\"cdm_prot_0007879e-35c8-5917-ad11\",\"identifier\":\"UniProt:A0B9M2\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffe66051-6a0a-55cc-81b2\",\"identifier\":\"UniProt:Q9YFG3\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"nullCount\":{\"entity_id\":0,\"identifier\":0,\"source\":0,\"description\":0}}"},{"path":"part-00003-0a981679-3d99-4ba8-9da2-b2e4ace767a3-c000.snappy.parquet","partitionValues":{},"size":31308,"modificationTime":1766549233599,"dataChange":false,"stats":"{\"numRecords\":687,\"minValues\":{\"entity_id\":\"cdm_prot_00206b22-8b92-5222-ab7c\",\"identifier\":\"UniProt:A0B5Q5\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffede36e-7e42-5dcc-abde\",\"identifier\":\"UniProt:Q9YFY7\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"nullCount\":{\"entity_id\":0,\"identifier\":0,\"source\":0,\"description\":0}}"},{"path":"part-00000-cbf92e27-f9e4-4e89-a8c2-1d2755a0e39b-c000.snappy.parquet","partitionValues":{},"size":30252,"modificationTime":1766549219316,"dataChange":false,"stats":"{\"numRecords\":688,\"minValues\":{\"entity_id\":\"cdm_prot_00036120-577c-5bbc-ab91\",\"identifier\":\"UniProt:A0A0S1X9S7\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffa4eefe-b062-5f26-8f77\",\"identifier\":\"UniProt:Q9YG90\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"nullCount\":{\"entity_id\":0,\"identifier\":0,\"source\":0,\"description\":0}}"},{"path":"part-00000-99cfe050-fc27-4e15-b5e4-9c72a7f20670-c000.snappy.parquet","partitionValues":{},"size":30328,"modificationTime":1766549225707,"dataChange":false,"stats":"{\"numRecords\":693,\"minValues\":{\"entity_id\":\"cdm_prot_004539b1-91ae-513e-bef9\",\"identifier\":\"UniProt:A0A0E3NEE1\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ff289ac2-6f01-59a2-8290\",\"identifier\":\"UniProt:Q9YFQ8\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"nullCount\":{\"entity_id\":0,\"identifier\":0,\"source\":0,\"description\":0}}"},{"path":"part-00005-5932b641-c16d-4311-a633-c5a637d6fe9d-c000.snappy.parquet","partitionValues":{},"size":29668,"modificationTime":1766549239338,"dataChange":false,"stats":"{\"numRecords\":643,\"minValues\":{\"entity_id\":\"cdm_prot_0048aaff-7f6b-5ca1-9274\",\"identifier\":\"UniProt:A0B5P4\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffb5b181-1633-55f5-83ee\",\"identifier\":\"UniProt:Q9YFR7\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"nullCount\":{\"entity_id\":0,\"identifier\":0,\"source\":0,\"description\":0}}"},{"path":"part-00006-5fa1af64-92db-4103-9b58-b5a429dc9df7-c000.snappy.parquet","partitionValues":{},"size":30163,"modificationTime":1766549219319,"dataChange":false,"stats":"{\"numRecords\":688,\"minValues\":{\"entity_id\":\"cdm_prot_003df674-48d1-588a-b194\",\"identifier\":\"UniProt:A0A0F8XYN9\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffeae0f1-9785-5eaf-ad23\",\"identifier\":\"UniProt:Q9YFS9\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"nullCount\":{\"entity_id\":0,\"identifier\":0,\"source\":0,\"description\":0}}"},{"path":"part-00002-72f53ac3-bd98-4dcd-9011-5267622cc106-c000.snappy.parquet","partitionValues":{},"size":31513,"modificationTime":1766549219316,"dataChange":false,"stats":"{\"numRecords\":688,\"minValues\":{\"entity_id\":\"cdm_prot_00030730-9147-5351-b68c\",\"identifier\":\"UniProt:A0A0E3NC87\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"maxValues\":{\"entity_id\":\"cdm_prot_fde84f0d-6520-56ad-b61f\",\"identifier\":\"UniProt:Q9YFU8\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"nullCount\":{\"entity_id\":0,\"identifier\":0,\"source\":0,\"description\":0}}"},{"path":"part-00005-76253b39-d9cc-4901-a96f-7e977a01b22f-c000.snappy.parquet","partitionValues":{},"size":30949,"modificationTime":1766549219327,"dataChange":false,"stats":"{\"numRecords\":688,\"minValues\":{\"entity_id\":\"cdm_prot_00134cb8-e497-5d40-b662\",\"identifier\":\"UniProt:A0A8F5BMX8\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ff5f637d-e74b-5494-9ed8\",\"identifier\":\"UniProt:Q9YG32\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"nullCount\":{\"entity_id\":0,\"identifier\":0,\"source\":0,\"description\":0}}"},{"path":"part-00006-4d5c4fa1-a97e-4c70-aa70-31ae296086ef-c000.snappy.parquet","partitionValues":{},"size":31556,"modificationTime":1766549225688,"dataChange":false,"stats":"{\"numRecords\":693,\"minValues\":{\"entity_id\":\"cdm_prot_0102d35c-20a5-5f8d-8175\",\"identifier\":\"UniProt:A0A0F8V8L2\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"maxValues\":{\"entity_id\":\"cdm_prot_fffe9d9f-5871-5041-83af\",\"identifier\":\"UniProt:Q9YGA7\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"nullCount\":{\"entity_id\":0,\"identifier\":0,\"source\":0,\"description\":0}}"},{"path":"part-00007-53e01b06-b778-412d-8875-ab8afb83606a-c000.snappy.parquet","partitionValues":{},"size":32371,"modificationTime":1766549225701,"dataChange":false,"stats":"{\"numRecords\":698,\"minValues\":{\"entity_id\":\"cdm_prot_001ba1b3-667c-5321-9298\",\"identifier\":\"UniProt:A0B5B1\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ff86ebef-56b7-5cf7-bf3e\",\"identifier\":\"UniProt:Q9YG51\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"nullCount\":{\"entity_id\":0,\"identifier\":0,\"source\":0,\"description\":0}}"},{"path":"part-00007-3e292979-e9b6-4490-abcd-bdc8c991ca93-c000.snappy.parquet","partitionValues":{},"size":29880,"modificationTime":1766549239329,"dataChange":false,"stats":"{\"numRecords\":648,\"minValues\":{\"entity_id\":\"cdm_prot_0254412b-7072-5093-9d3c\",\"identifier\":\"UniProt:A0A384LLU8\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffd6bc2d-9fdc-5fb9-a121\",\"identifier\":\"UniProt:Q9YDZ1\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"nullCount\":{\"entity_id\":0,\"identifier\":0,\"source\":0,\"description\":0}}"},{"path":"part-00004-24cee708-2e14-4b70-8896-f832d3ba4448-c000.snappy.parquet","partitionValues":{},"size":30590,"modificationTime":1766549219329,"dataChange":false,"stats":"{\"numRecords\":688,\"minValues\":{\"entity_id\":\"cdm_prot_008bae16-5b42-5bfd-a15c\",\"identifier\":\"UniProt:A0A256XLS3\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"maxValues\":{\"entity_id\":\"cdm_prot_fedf6b23-d61c-56dc-b256\",\"identifier\":\"UniProt:Q9YFJ2\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"nullCount\":{\"entity_id\":0,\"identifier\":0,\"source\":0,\"description\":0}}"},{"path":"part-00007-95d9af21-41a5-4ba0-82e5-4339e909ff16-c000.snappy.parquet","partitionValues":{},"size":31446,"modificationTime":1766549233599,"dataChange":false,"stats":"{\"numRecords\":690,\"minValues\":{\"entity_id\":\"cdm_prot_00019029-8c98-5477-9dd1\",\"identifier\":\"UniProt:A0A110A2W7\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ff606768-2e8f-543d-abc5\",\"identifier\":\"UniProt:Q9YEX9\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"nullCount\":{\"entity_id\":0,\"identifier\":0,\"source\":0,\"description\":0}}"},{"path":"part-00001-b3fc3245-9685-4e83-84a0-46c98096547c-c000.snappy.parquet","partitionValues":{},"size":32031,"modificationTime":1766549219325,"dataChange":false,"stats":"{\"numRecords\":688,\"minValues\":{\"entity_id\":\"cdm_prot_000e0a71-14e8-5d5e-8144\",\"identifier\":\"UniProt:A0A5C0XQU4\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffed2e1d-fe14-5556-b067\",\"identifier\":\"UniProt:Q9YG22\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"nullCount\":{\"entity_id\":0,\"identifier\":0,\"source\":0,\"description\":0}}"},{"path":"part-00005-9906cb6f-98a5-4bf5-a278-e3700b3ad79f-c000.snappy.parquet","partitionValues":{},"size":31582,"modificationTime":1766549225699,"dataChange":false,"stats":"{\"numRecords\":693,\"minValues\":{\"entity_id\":\"cdm_prot_00957fe6-8bfd-5ecd-9b08\",\"identifier\":\"UniProt:A0A0E3JT70\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffdecc1a-6ee9-59b2-9c49\",\"identifier\":\"UniProt:Q9YG68\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"nullCount\":{\"entity_id\":0,\"identifier\":0,\"source\":0,\"description\":0}}"},{"path":"part-00004-e232f22d-e327-4693-bdbf-9e6169f362e4-c000.snappy.parquet","partitionValues":{},"size":29502,"modificationTime":1766549239331,"dataChange":false,"stats":"{\"numRecords\":643,\"minValues\":{\"entity_id\":\"cdm_prot_003753a9-9a55-505c-8ea7\",\"identifier\":\"UniProt:A0A384KM42\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ff74305f-2499-5464-acc5\",\"identifier\":\"UniProt:Q9YFV8\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"nullCount\":{\"entity_id\":0,\"identifier\":0,\"source\":0,\"description\":0}}"},{"path":"part-00005-08a4ca31-6475-42ad-a5cc-a4635414e33e-c000.snappy.parquet","partitionValues":{},"size":30887,"modificationTime":1766549233593,"dataChange":false,"stats":"{\"numRecords\":687,\"minValues\":{\"entity_id\":\"cdm_prot_0013ba57-fc41-51cc-bc95\",\"identifier\":\"UniProt:A0B562\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffe27896-0bd4-50b4-9cff\",\"identifier\":\"UniProt:Q9YF81\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"nullCount\":{\"entity_id\":0,\"identifier\":0,\"source\":0,\"description\":0}}"},{"path":"part-00006-375fbc01-8bd9-437f-831c-41d8ce9bda0e-c000.snappy.parquet","partitionValues":{},"size":31616,"modificationTime":1766549233604,"dataChange":false,"stats":"{\"numRecords\":687,\"minValues\":{\"entity_id\":\"cdm_prot_00219a7c-c28c-5aaf-928e\",\"identifier\":\"UniProt:A0B532\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"maxValues\":{\"entity_id\":\"cdm_prot_fe67f279-f090-5c4e-b38d\",\"identifier\":\"UniProt:Q9YF95\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"nullCount\":{\"entity_id\":0,\"identifier\":0,\"source\":0,\"description\":0}}"},{"path":"part-00000-f69d9253-5a6d-460b-bd51-59cc7a065749-c000.snappy.parquet","partitionValues":{},"size":30498,"modificationTime":1766549233599,"dataChange":false,"stats":"{\"numRecords\":687,\"minValues\":{\"entity_id\":\"cdm_prot_006f3d67-56f0-5829-b099\",\"identifier\":\"UniProt:A0A384KAR1\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffcedd0e-face-5ec6-8e0b\",\"identifier\":\"UniProt:Q9YFZ1\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"nullCount\":{\"entity_id\":0,\"identifier\":0,\"source\":0,\"description\":0}}"},{"path":"part-00002-51ae934a-79d2-4af2-a4f8-7e87ac901025-c000.snappy.parquet","partitionValues":{},"size":28519,"modificationTime":1766549239328,"dataChange":false,"stats":"{\"numRecords\":643,\"minValues\":{\"entity_id\":\"cdm_prot_0185e048-25c8-50dd-bfbf\",\"identifier\":\"UniProt:A0A8F5BLS2\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"maxValues\":{\"entity_id\":\"cdm_prot_fe66808c-aab1-5f12-970e\",\"identifier\":\"UniProt:Q9YGB5\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"nullCount\":{\"entity_id\":0,\"identifier\":0,\"source\":0,\"description\":0}}"},{"path":"part-00006-0926903d-b4bd-4c65-8f57-c56e699c958c-c000.snappy.parquet","partitionValues":{},"size":29840,"modificationTime":1766549239341,"dataChange":false,"stats":"{\"numRecords\":643,\"minValues\":{\"entity_id\":\"cdm_prot_00666a93-0a4c-5498-8cac\",\"identifier\":\"UniProt:A0A1U8QXI0\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffd1840d-3bb9-5f05-ad33\",\"identifier\":\"UniProt:Q9YEV5\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"nullCount\":{\"entity_id\":0,\"identifier\":0,\"source\":0,\"description\":0}}"}]} diff --git a/tests/data/uniprot_archaea/uniprot_db/identifiers/_delta_log/00000000000000000004.json b/tests/data/uniprot_archaea/uniprot_db/identifiers/_delta_log/00000000000000000004.json new file mode 100644 index 0000000..ba5f9f9 --- /dev/null +++ b/tests/data/uniprot_archaea/uniprot_db/identifiers/_delta_log/00000000000000000004.json @@ -0,0 +1,9 @@ +{"commitInfo":{"timestamp":1766549239344,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":3,"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{"numFiles":"8","numOutputRows":"5149","numOutputBytes":"235241"},"engineInfo":"Apache-Spark/4.0.1 Delta-Lake/4.0.0","txnId":"904cc402-0b5d-451f-ba96-d60d9a74ffd1"}} +{"add":{"path":"part-00000-3f84591a-23b6-4cdf-8090-eea8896ab8d2-c000.snappy.parquet","partitionValues":{},"size":29677,"modificationTime":1766549239341,"dataChange":true,"stats":"{\"numRecords\":643,\"minValues\":{\"entity_id\":\"cdm_prot_0003b7cb-bf13-5646-9978\",\"identifier\":\"UniProt:A0A8F5BRT0\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"maxValues\":{\"entity_id\":\"cdm_prot_fef0eb9f-57f2-532d-b5b2\",\"identifier\":\"UniProt:Q9YFY3\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"nullCount\":{\"entity_id\":0,\"identifier\":0,\"source\":0,\"description\":0}}"}} +{"add":{"path":"part-00001-95e1738e-f636-4c46-b7f0-df7c08686f10-c000.snappy.parquet","partitionValues":{},"size":29344,"modificationTime":1766549239330,"dataChange":true,"stats":"{\"numRecords\":643,\"minValues\":{\"entity_id\":\"cdm_prot_01672ce0-33eb-50c0-8bb3\",\"identifier\":\"UniProt:A0A151EH88\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"maxValues\":{\"entity_id\":\"cdm_prot_fffe9aba-bce9-56de-b269\",\"identifier\":\"UniProt:Q9YGA5\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"nullCount\":{\"entity_id\":0,\"identifier\":0,\"source\":0,\"description\":0}}"}} +{"add":{"path":"part-00002-51ae934a-79d2-4af2-a4f8-7e87ac901025-c000.snappy.parquet","partitionValues":{},"size":28519,"modificationTime":1766549239328,"dataChange":true,"stats":"{\"numRecords\":643,\"minValues\":{\"entity_id\":\"cdm_prot_0185e048-25c8-50dd-bfbf\",\"identifier\":\"UniProt:A0A8F5BLS2\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"maxValues\":{\"entity_id\":\"cdm_prot_fe66808c-aab1-5f12-970e\",\"identifier\":\"UniProt:Q9YGB5\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"nullCount\":{\"entity_id\":0,\"identifier\":0,\"source\":0,\"description\":0}}"}} +{"add":{"path":"part-00003-b73c348f-59c3-434f-9dfd-ea607ff49a07-c000.snappy.parquet","partitionValues":{},"size":28811,"modificationTime":1766549239312,"dataChange":true,"stats":"{\"numRecords\":643,\"minValues\":{\"entity_id\":\"cdm_prot_0007879e-35c8-5917-ad11\",\"identifier\":\"UniProt:A0B9M2\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffe66051-6a0a-55cc-81b2\",\"identifier\":\"UniProt:Q9YFG3\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"nullCount\":{\"entity_id\":0,\"identifier\":0,\"source\":0,\"description\":0}}"}} +{"add":{"path":"part-00004-e232f22d-e327-4693-bdbf-9e6169f362e4-c000.snappy.parquet","partitionValues":{},"size":29502,"modificationTime":1766549239331,"dataChange":true,"stats":"{\"numRecords\":643,\"minValues\":{\"entity_id\":\"cdm_prot_003753a9-9a55-505c-8ea7\",\"identifier\":\"UniProt:A0A384KM42\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ff74305f-2499-5464-acc5\",\"identifier\":\"UniProt:Q9YFV8\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"nullCount\":{\"entity_id\":0,\"identifier\":0,\"source\":0,\"description\":0}}"}} +{"add":{"path":"part-00005-5932b641-c16d-4311-a633-c5a637d6fe9d-c000.snappy.parquet","partitionValues":{},"size":29668,"modificationTime":1766549239338,"dataChange":true,"stats":"{\"numRecords\":643,\"minValues\":{\"entity_id\":\"cdm_prot_0048aaff-7f6b-5ca1-9274\",\"identifier\":\"UniProt:A0B5P4\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffb5b181-1633-55f5-83ee\",\"identifier\":\"UniProt:Q9YFR7\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"nullCount\":{\"entity_id\":0,\"identifier\":0,\"source\":0,\"description\":0}}"}} +{"add":{"path":"part-00006-0926903d-b4bd-4c65-8f57-c56e699c958c-c000.snappy.parquet","partitionValues":{},"size":29840,"modificationTime":1766549239341,"dataChange":true,"stats":"{\"numRecords\":643,\"minValues\":{\"entity_id\":\"cdm_prot_00666a93-0a4c-5498-8cac\",\"identifier\":\"UniProt:A0A1U8QXI0\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffd1840d-3bb9-5f05-ad33\",\"identifier\":\"UniProt:Q9YEV5\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"nullCount\":{\"entity_id\":0,\"identifier\":0,\"source\":0,\"description\":0}}"}} +{"add":{"path":"part-00007-3e292979-e9b6-4490-abcd-bdc8c991ca93-c000.snappy.parquet","partitionValues":{},"size":29880,"modificationTime":1766549239329,"dataChange":true,"stats":"{\"numRecords\":648,\"minValues\":{\"entity_id\":\"cdm_prot_0254412b-7072-5093-9d3c\",\"identifier\":\"UniProt:A0A384LLU8\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffd6bc2d-9fdc-5fb9-a121\",\"identifier\":\"UniProt:Q9YDZ1\",\"source\":\"UniProt\",\"description\":\"UniProt accession\"},\"nullCount\":{\"entity_id\":0,\"identifier\":0,\"source\":0,\"description\":0}}"}} diff --git a/tests/data/uniprot_archaea/uniprot_db/identifiers/part-00000-3f84591a-23b6-4cdf-8090-eea8896ab8d2-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/identifiers/part-00000-3f84591a-23b6-4cdf-8090-eea8896ab8d2-c000.snappy.parquet new file mode 100644 index 0000000..290a667 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/identifiers/part-00000-3f84591a-23b6-4cdf-8090-eea8896ab8d2-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/identifiers/part-00000-99cfe050-fc27-4e15-b5e4-9c72a7f20670-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/identifiers/part-00000-99cfe050-fc27-4e15-b5e4-9c72a7f20670-c000.snappy.parquet new file mode 100644 index 0000000..40350e6 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/identifiers/part-00000-99cfe050-fc27-4e15-b5e4-9c72a7f20670-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/identifiers/part-00000-cbf92e27-f9e4-4e89-a8c2-1d2755a0e39b-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/identifiers/part-00000-cbf92e27-f9e4-4e89-a8c2-1d2755a0e39b-c000.snappy.parquet new file mode 100644 index 0000000..8a79a1f Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/identifiers/part-00000-cbf92e27-f9e4-4e89-a8c2-1d2755a0e39b-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/identifiers/part-00000-f69d9253-5a6d-460b-bd51-59cc7a065749-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/identifiers/part-00000-f69d9253-5a6d-460b-bd51-59cc7a065749-c000.snappy.parquet new file mode 100644 index 0000000..24035d0 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/identifiers/part-00000-f69d9253-5a6d-460b-bd51-59cc7a065749-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/identifiers/part-00001-059ca701-6a3b-42ea-942a-002f081595a8-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/identifiers/part-00001-059ca701-6a3b-42ea-942a-002f081595a8-c000.snappy.parquet new file mode 100644 index 0000000..c4d1b5e Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/identifiers/part-00001-059ca701-6a3b-42ea-942a-002f081595a8-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/identifiers/part-00001-95e1738e-f636-4c46-b7f0-df7c08686f10-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/identifiers/part-00001-95e1738e-f636-4c46-b7f0-df7c08686f10-c000.snappy.parquet new file mode 100644 index 0000000..cffe1bd Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/identifiers/part-00001-95e1738e-f636-4c46-b7f0-df7c08686f10-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/identifiers/part-00001-b3fc3245-9685-4e83-84a0-46c98096547c-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/identifiers/part-00001-b3fc3245-9685-4e83-84a0-46c98096547c-c000.snappy.parquet new file mode 100644 index 0000000..8fdb448 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/identifiers/part-00001-b3fc3245-9685-4e83-84a0-46c98096547c-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/identifiers/part-00001-fc568fc3-e35d-4f6b-b437-c8d98de011b3-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/identifiers/part-00001-fc568fc3-e35d-4f6b-b437-c8d98de011b3-c000.snappy.parquet new file mode 100644 index 0000000..6999b31 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/identifiers/part-00001-fc568fc3-e35d-4f6b-b437-c8d98de011b3-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/identifiers/part-00002-39105fc1-4eff-4dd3-95a9-071db7f13100-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/identifiers/part-00002-39105fc1-4eff-4dd3-95a9-071db7f13100-c000.snappy.parquet new file mode 100644 index 0000000..fac2a02 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/identifiers/part-00002-39105fc1-4eff-4dd3-95a9-071db7f13100-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/identifiers/part-00002-51ae934a-79d2-4af2-a4f8-7e87ac901025-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/identifiers/part-00002-51ae934a-79d2-4af2-a4f8-7e87ac901025-c000.snappy.parquet new file mode 100644 index 0000000..39757a6 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/identifiers/part-00002-51ae934a-79d2-4af2-a4f8-7e87ac901025-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/identifiers/part-00002-72f53ac3-bd98-4dcd-9011-5267622cc106-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/identifiers/part-00002-72f53ac3-bd98-4dcd-9011-5267622cc106-c000.snappy.parquet new file mode 100644 index 0000000..238daf0 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/identifiers/part-00002-72f53ac3-bd98-4dcd-9011-5267622cc106-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/identifiers/part-00002-d62dbad4-1f60-4bbc-a56e-13375d352390-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/identifiers/part-00002-d62dbad4-1f60-4bbc-a56e-13375d352390-c000.snappy.parquet new file mode 100644 index 0000000..e601070 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/identifiers/part-00002-d62dbad4-1f60-4bbc-a56e-13375d352390-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/identifiers/part-00003-0a981679-3d99-4ba8-9da2-b2e4ace767a3-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/identifiers/part-00003-0a981679-3d99-4ba8-9da2-b2e4ace767a3-c000.snappy.parquet new file mode 100644 index 0000000..85f2b6e Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/identifiers/part-00003-0a981679-3d99-4ba8-9da2-b2e4ace767a3-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/identifiers/part-00003-8c55451e-c4a7-47f7-997d-6b5c3641462d-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/identifiers/part-00003-8c55451e-c4a7-47f7-997d-6b5c3641462d-c000.snappy.parquet new file mode 100644 index 0000000..ffa6fec Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/identifiers/part-00003-8c55451e-c4a7-47f7-997d-6b5c3641462d-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/identifiers/part-00003-b73c348f-59c3-434f-9dfd-ea607ff49a07-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/identifiers/part-00003-b73c348f-59c3-434f-9dfd-ea607ff49a07-c000.snappy.parquet new file mode 100644 index 0000000..87789c0 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/identifiers/part-00003-b73c348f-59c3-434f-9dfd-ea607ff49a07-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/identifiers/part-00003-e7063d3a-1894-4014-addf-bc6599b0b1c3-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/identifiers/part-00003-e7063d3a-1894-4014-addf-bc6599b0b1c3-c000.snappy.parquet new file mode 100644 index 0000000..20ca8d3 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/identifiers/part-00003-e7063d3a-1894-4014-addf-bc6599b0b1c3-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/identifiers/part-00004-24cee708-2e14-4b70-8896-f832d3ba4448-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/identifiers/part-00004-24cee708-2e14-4b70-8896-f832d3ba4448-c000.snappy.parquet new file mode 100644 index 0000000..b77ef14 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/identifiers/part-00004-24cee708-2e14-4b70-8896-f832d3ba4448-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/identifiers/part-00004-c4d85630-3f8b-4b4e-bc53-5ea71fdde5e4-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/identifiers/part-00004-c4d85630-3f8b-4b4e-bc53-5ea71fdde5e4-c000.snappy.parquet new file mode 100644 index 0000000..a5c78d3 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/identifiers/part-00004-c4d85630-3f8b-4b4e-bc53-5ea71fdde5e4-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/identifiers/part-00004-e232f22d-e327-4693-bdbf-9e6169f362e4-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/identifiers/part-00004-e232f22d-e327-4693-bdbf-9e6169f362e4-c000.snappy.parquet new file mode 100644 index 0000000..a4ccfeb Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/identifiers/part-00004-e232f22d-e327-4693-bdbf-9e6169f362e4-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/identifiers/part-00004-ef12b3a8-d41e-47b7-ac5d-2b9195472ad6-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/identifiers/part-00004-ef12b3a8-d41e-47b7-ac5d-2b9195472ad6-c000.snappy.parquet new file mode 100644 index 0000000..2e32170 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/identifiers/part-00004-ef12b3a8-d41e-47b7-ac5d-2b9195472ad6-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/identifiers/part-00005-08a4ca31-6475-42ad-a5cc-a4635414e33e-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/identifiers/part-00005-08a4ca31-6475-42ad-a5cc-a4635414e33e-c000.snappy.parquet new file mode 100644 index 0000000..01f2557 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/identifiers/part-00005-08a4ca31-6475-42ad-a5cc-a4635414e33e-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/identifiers/part-00005-5932b641-c16d-4311-a633-c5a637d6fe9d-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/identifiers/part-00005-5932b641-c16d-4311-a633-c5a637d6fe9d-c000.snappy.parquet new file mode 100644 index 0000000..78c07f1 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/identifiers/part-00005-5932b641-c16d-4311-a633-c5a637d6fe9d-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/identifiers/part-00005-76253b39-d9cc-4901-a96f-7e977a01b22f-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/identifiers/part-00005-76253b39-d9cc-4901-a96f-7e977a01b22f-c000.snappy.parquet new file mode 100644 index 0000000..a89d119 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/identifiers/part-00005-76253b39-d9cc-4901-a96f-7e977a01b22f-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/identifiers/part-00005-9906cb6f-98a5-4bf5-a278-e3700b3ad79f-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/identifiers/part-00005-9906cb6f-98a5-4bf5-a278-e3700b3ad79f-c000.snappy.parquet new file mode 100644 index 0000000..c641a2f Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/identifiers/part-00005-9906cb6f-98a5-4bf5-a278-e3700b3ad79f-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/identifiers/part-00006-0926903d-b4bd-4c65-8f57-c56e699c958c-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/identifiers/part-00006-0926903d-b4bd-4c65-8f57-c56e699c958c-c000.snappy.parquet new file mode 100644 index 0000000..1e56eb8 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/identifiers/part-00006-0926903d-b4bd-4c65-8f57-c56e699c958c-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/identifiers/part-00006-375fbc01-8bd9-437f-831c-41d8ce9bda0e-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/identifiers/part-00006-375fbc01-8bd9-437f-831c-41d8ce9bda0e-c000.snappy.parquet new file mode 100644 index 0000000..f338c29 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/identifiers/part-00006-375fbc01-8bd9-437f-831c-41d8ce9bda0e-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/identifiers/part-00006-4d5c4fa1-a97e-4c70-aa70-31ae296086ef-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/identifiers/part-00006-4d5c4fa1-a97e-4c70-aa70-31ae296086ef-c000.snappy.parquet new file mode 100644 index 0000000..9a4705a Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/identifiers/part-00006-4d5c4fa1-a97e-4c70-aa70-31ae296086ef-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/identifiers/part-00006-5fa1af64-92db-4103-9b58-b5a429dc9df7-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/identifiers/part-00006-5fa1af64-92db-4103-9b58-b5a429dc9df7-c000.snappy.parquet new file mode 100644 index 0000000..0487bb0 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/identifiers/part-00006-5fa1af64-92db-4103-9b58-b5a429dc9df7-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/identifiers/part-00007-02e39958-40d8-4ec1-a9da-b09fc6e68e28-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/identifiers/part-00007-02e39958-40d8-4ec1-a9da-b09fc6e68e28-c000.snappy.parquet new file mode 100644 index 0000000..1be1e2f Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/identifiers/part-00007-02e39958-40d8-4ec1-a9da-b09fc6e68e28-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/identifiers/part-00007-3e292979-e9b6-4490-abcd-bdc8c991ca93-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/identifiers/part-00007-3e292979-e9b6-4490-abcd-bdc8c991ca93-c000.snappy.parquet new file mode 100644 index 0000000..4b39f56 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/identifiers/part-00007-3e292979-e9b6-4490-abcd-bdc8c991ca93-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/identifiers/part-00007-53e01b06-b778-412d-8875-ab8afb83606a-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/identifiers/part-00007-53e01b06-b778-412d-8875-ab8afb83606a-c000.snappy.parquet new file mode 100644 index 0000000..092fcb3 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/identifiers/part-00007-53e01b06-b778-412d-8875-ab8afb83606a-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/identifiers/part-00007-95d9af21-41a5-4ba0-82e5-4339e909ff16-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/identifiers/part-00007-95d9af21-41a5-4ba0-82e5-4339e909ff16-c000.snappy.parquet new file mode 100644 index 0000000..5e429f5 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/identifiers/part-00007-95d9af21-41a5-4ba0-82e5-4339e909ff16-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/names/.part-00000-296a81be-f30f-423c-9574-e8f006ae0fdc-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/names/.part-00000-296a81be-f30f-423c-9574-e8f006ae0fdc-c000.snappy.parquet.crc new file mode 100644 index 0000000..74a428b Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/names/.part-00000-296a81be-f30f-423c-9574-e8f006ae0fdc-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/names/.part-00000-7fe49477-1c19-4e2f-a85c-d935eef12414-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/names/.part-00000-7fe49477-1c19-4e2f-a85c-d935eef12414-c000.snappy.parquet.crc new file mode 100644 index 0000000..26feb79 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/names/.part-00000-7fe49477-1c19-4e2f-a85c-d935eef12414-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/names/.part-00000-a2850c69-bf2e-4d70-b897-dd8587907e97-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/names/.part-00000-a2850c69-bf2e-4d70-b897-dd8587907e97-c000.snappy.parquet.crc new file mode 100644 index 0000000..6124bc1 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/names/.part-00000-a2850c69-bf2e-4d70-b897-dd8587907e97-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/names/.part-00000-f44e402f-1ebd-4d1d-8142-a3d70083c403-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/names/.part-00000-f44e402f-1ebd-4d1d-8142-a3d70083c403-c000.snappy.parquet.crc new file mode 100644 index 0000000..e9ca607 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/names/.part-00000-f44e402f-1ebd-4d1d-8142-a3d70083c403-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/names/.part-00001-0aa26ab1-0f23-4f5f-bc76-68425d9bbc6f-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/names/.part-00001-0aa26ab1-0f23-4f5f-bc76-68425d9bbc6f-c000.snappy.parquet.crc new file mode 100644 index 0000000..8525921 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/names/.part-00001-0aa26ab1-0f23-4f5f-bc76-68425d9bbc6f-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/names/.part-00001-3b2d9b31-38b4-4f6c-91b0-10b1bd882505-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/names/.part-00001-3b2d9b31-38b4-4f6c-91b0-10b1bd882505-c000.snappy.parquet.crc new file mode 100644 index 0000000..eff741b Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/names/.part-00001-3b2d9b31-38b4-4f6c-91b0-10b1bd882505-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/names/.part-00001-3c105c72-f00f-4c50-a1b8-91c556ea59a2-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/names/.part-00001-3c105c72-f00f-4c50-a1b8-91c556ea59a2-c000.snappy.parquet.crc new file mode 100644 index 0000000..11417e0 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/names/.part-00001-3c105c72-f00f-4c50-a1b8-91c556ea59a2-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/names/.part-00001-a1a9ee73-730f-4e51-bad2-09ff9419a6fd-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/names/.part-00001-a1a9ee73-730f-4e51-bad2-09ff9419a6fd-c000.snappy.parquet.crc new file mode 100644 index 0000000..46b4dcd Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/names/.part-00001-a1a9ee73-730f-4e51-bad2-09ff9419a6fd-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/names/.part-00002-6650955d-faa1-43e3-a56c-51a85383c8b4-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/names/.part-00002-6650955d-faa1-43e3-a56c-51a85383c8b4-c000.snappy.parquet.crc new file mode 100644 index 0000000..af9a1cf Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/names/.part-00002-6650955d-faa1-43e3-a56c-51a85383c8b4-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/names/.part-00002-685e72ec-2862-40de-8cba-192b36e48b40-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/names/.part-00002-685e72ec-2862-40de-8cba-192b36e48b40-c000.snappy.parquet.crc new file mode 100644 index 0000000..c288044 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/names/.part-00002-685e72ec-2862-40de-8cba-192b36e48b40-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/names/.part-00002-6e534054-9141-43f6-900b-601663a927c2-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/names/.part-00002-6e534054-9141-43f6-900b-601663a927c2-c000.snappy.parquet.crc new file mode 100644 index 0000000..337da6c Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/names/.part-00002-6e534054-9141-43f6-900b-601663a927c2-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/names/.part-00002-a27a104f-dd3a-4fd5-98d7-e032b14d238f-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/names/.part-00002-a27a104f-dd3a-4fd5-98d7-e032b14d238f-c000.snappy.parquet.crc new file mode 100644 index 0000000..dd2e189 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/names/.part-00002-a27a104f-dd3a-4fd5-98d7-e032b14d238f-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/names/.part-00003-54f2d4b4-5ff7-4fcf-9373-8ce4423a8d11-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/names/.part-00003-54f2d4b4-5ff7-4fcf-9373-8ce4423a8d11-c000.snappy.parquet.crc new file mode 100644 index 0000000..2f0c197 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/names/.part-00003-54f2d4b4-5ff7-4fcf-9373-8ce4423a8d11-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/names/.part-00003-a0e33fc3-4700-43ee-9c85-4b0c932fb35b-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/names/.part-00003-a0e33fc3-4700-43ee-9c85-4b0c932fb35b-c000.snappy.parquet.crc new file mode 100644 index 0000000..0b33634 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/names/.part-00003-a0e33fc3-4700-43ee-9c85-4b0c932fb35b-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/names/.part-00003-a378c2f4-d615-4c49-8777-9a0d2b980e52-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/names/.part-00003-a378c2f4-d615-4c49-8777-9a0d2b980e52-c000.snappy.parquet.crc new file mode 100644 index 0000000..5975941 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/names/.part-00003-a378c2f4-d615-4c49-8777-9a0d2b980e52-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/names/.part-00003-a69106e5-54a9-4eb0-88f7-958856dbb6c4-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/names/.part-00003-a69106e5-54a9-4eb0-88f7-958856dbb6c4-c000.snappy.parquet.crc new file mode 100644 index 0000000..1f254ad Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/names/.part-00003-a69106e5-54a9-4eb0-88f7-958856dbb6c4-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/names/.part-00004-5cb8a571-8bfd-4c75-ba1b-40ca4d487f91-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/names/.part-00004-5cb8a571-8bfd-4c75-ba1b-40ca4d487f91-c000.snappy.parquet.crc new file mode 100644 index 0000000..198162f Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/names/.part-00004-5cb8a571-8bfd-4c75-ba1b-40ca4d487f91-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/names/.part-00004-68220f37-23c4-4bdc-a763-9f588a9536c4-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/names/.part-00004-68220f37-23c4-4bdc-a763-9f588a9536c4-c000.snappy.parquet.crc new file mode 100644 index 0000000..41cff3c Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/names/.part-00004-68220f37-23c4-4bdc-a763-9f588a9536c4-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/names/.part-00004-adba1bb2-356b-4c2f-a77e-966c8b9d6735-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/names/.part-00004-adba1bb2-356b-4c2f-a77e-966c8b9d6735-c000.snappy.parquet.crc new file mode 100644 index 0000000..3344f69 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/names/.part-00004-adba1bb2-356b-4c2f-a77e-966c8b9d6735-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/names/.part-00004-f042aa17-bd0f-42fe-bbfa-c49c508eb8d1-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/names/.part-00004-f042aa17-bd0f-42fe-bbfa-c49c508eb8d1-c000.snappy.parquet.crc new file mode 100644 index 0000000..8bf7031 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/names/.part-00004-f042aa17-bd0f-42fe-bbfa-c49c508eb8d1-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/names/.part-00005-0bee7588-c84d-4380-b866-5d69e8c776f6-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/names/.part-00005-0bee7588-c84d-4380-b866-5d69e8c776f6-c000.snappy.parquet.crc new file mode 100644 index 0000000..75af777 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/names/.part-00005-0bee7588-c84d-4380-b866-5d69e8c776f6-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/names/.part-00005-23bb4218-5ffa-434f-a6a1-92cfd47943c5-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/names/.part-00005-23bb4218-5ffa-434f-a6a1-92cfd47943c5-c000.snappy.parquet.crc new file mode 100644 index 0000000..11b16a6 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/names/.part-00005-23bb4218-5ffa-434f-a6a1-92cfd47943c5-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/names/.part-00005-2deeee12-d25b-4365-b10f-babb76fbefb7-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/names/.part-00005-2deeee12-d25b-4365-b10f-babb76fbefb7-c000.snappy.parquet.crc new file mode 100644 index 0000000..accf473 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/names/.part-00005-2deeee12-d25b-4365-b10f-babb76fbefb7-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/names/.part-00005-44f8dfca-d47a-4963-8d9e-094264fc524f-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/names/.part-00005-44f8dfca-d47a-4963-8d9e-094264fc524f-c000.snappy.parquet.crc new file mode 100644 index 0000000..d12e330 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/names/.part-00005-44f8dfca-d47a-4963-8d9e-094264fc524f-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/names/.part-00006-3d33a697-816a-4bad-bd2a-e51ffd964bec-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/names/.part-00006-3d33a697-816a-4bad-bd2a-e51ffd964bec-c000.snappy.parquet.crc new file mode 100644 index 0000000..277eb7d Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/names/.part-00006-3d33a697-816a-4bad-bd2a-e51ffd964bec-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/names/.part-00006-3e8fac6d-c54c-48b0-b2eb-fe99c2d39149-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/names/.part-00006-3e8fac6d-c54c-48b0-b2eb-fe99c2d39149-c000.snappy.parquet.crc new file mode 100644 index 0000000..6f94e34 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/names/.part-00006-3e8fac6d-c54c-48b0-b2eb-fe99c2d39149-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/names/.part-00006-60bba51f-9140-4560-9e5c-89997d480d4c-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/names/.part-00006-60bba51f-9140-4560-9e5c-89997d480d4c-c000.snappy.parquet.crc new file mode 100644 index 0000000..cb19df7 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/names/.part-00006-60bba51f-9140-4560-9e5c-89997d480d4c-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/names/.part-00006-680bfd0a-30aa-4bbe-b288-347d51725fb6-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/names/.part-00006-680bfd0a-30aa-4bbe-b288-347d51725fb6-c000.snappy.parquet.crc new file mode 100644 index 0000000..b49c54b Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/names/.part-00006-680bfd0a-30aa-4bbe-b288-347d51725fb6-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/names/.part-00007-5e1e889d-a46c-4994-ad50-d12aaaba1293-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/names/.part-00007-5e1e889d-a46c-4994-ad50-d12aaaba1293-c000.snappy.parquet.crc new file mode 100644 index 0000000..47750b2 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/names/.part-00007-5e1e889d-a46c-4994-ad50-d12aaaba1293-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/names/.part-00007-7069ff33-fcfe-411f-ac96-40cabfa3d8fe-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/names/.part-00007-7069ff33-fcfe-411f-ac96-40cabfa3d8fe-c000.snappy.parquet.crc new file mode 100644 index 0000000..42244c9 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/names/.part-00007-7069ff33-fcfe-411f-ac96-40cabfa3d8fe-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/names/.part-00007-b260351a-af1f-49f5-be1f-9f27441707d2-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/names/.part-00007-b260351a-af1f-49f5-be1f-9f27441707d2-c000.snappy.parquet.crc new file mode 100644 index 0000000..1cadd6e Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/names/.part-00007-b260351a-af1f-49f5-be1f-9f27441707d2-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/names/.part-00007-b32af234-9734-4c34-88ec-1c9c77fdb751-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/names/.part-00007-b32af234-9734-4c34-88ec-1c9c77fdb751-c000.snappy.parquet.crc new file mode 100644 index 0000000..7335fb3 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/names/.part-00007-b32af234-9734-4c34-88ec-1c9c77fdb751-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/names/_delta_log/.00000000000000000000.crc.crc b/tests/data/uniprot_archaea/uniprot_db/names/_delta_log/.00000000000000000000.crc.crc new file mode 100644 index 0000000..7384a23 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/names/_delta_log/.00000000000000000000.crc.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/names/_delta_log/.00000000000000000000.json.crc b/tests/data/uniprot_archaea/uniprot_db/names/_delta_log/.00000000000000000000.json.crc new file mode 100644 index 0000000..4675e96 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/names/_delta_log/.00000000000000000000.json.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/names/_delta_log/.00000000000000000001.crc.crc b/tests/data/uniprot_archaea/uniprot_db/names/_delta_log/.00000000000000000001.crc.crc new file mode 100644 index 0000000..6a90d57 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/names/_delta_log/.00000000000000000001.crc.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/names/_delta_log/.00000000000000000001.json.crc b/tests/data/uniprot_archaea/uniprot_db/names/_delta_log/.00000000000000000001.json.crc new file mode 100644 index 0000000..58f0153 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/names/_delta_log/.00000000000000000001.json.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/names/_delta_log/.00000000000000000002.crc.crc b/tests/data/uniprot_archaea/uniprot_db/names/_delta_log/.00000000000000000002.crc.crc new file mode 100644 index 0000000..804ddb3 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/names/_delta_log/.00000000000000000002.crc.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/names/_delta_log/.00000000000000000002.json.crc b/tests/data/uniprot_archaea/uniprot_db/names/_delta_log/.00000000000000000002.json.crc new file mode 100644 index 0000000..2078081 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/names/_delta_log/.00000000000000000002.json.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/names/_delta_log/.00000000000000000003.crc.crc b/tests/data/uniprot_archaea/uniprot_db/names/_delta_log/.00000000000000000003.crc.crc new file mode 100644 index 0000000..aec80a5 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/names/_delta_log/.00000000000000000003.crc.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/names/_delta_log/.00000000000000000003.json.crc b/tests/data/uniprot_archaea/uniprot_db/names/_delta_log/.00000000000000000003.json.crc new file mode 100644 index 0000000..cbd45b1 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/names/_delta_log/.00000000000000000003.json.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/names/_delta_log/.00000000000000000004.crc.crc b/tests/data/uniprot_archaea/uniprot_db/names/_delta_log/.00000000000000000004.crc.crc new file mode 100644 index 0000000..9b955b3 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/names/_delta_log/.00000000000000000004.crc.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/names/_delta_log/.00000000000000000004.json.crc b/tests/data/uniprot_archaea/uniprot_db/names/_delta_log/.00000000000000000004.json.crc new file mode 100644 index 0000000..3c7a704 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/names/_delta_log/.00000000000000000004.json.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/names/_delta_log/00000000000000000000.crc b/tests/data/uniprot_archaea/uniprot_db/names/_delta_log/00000000000000000000.crc new file mode 100644 index 0000000..421ec1c --- /dev/null +++ b/tests/data/uniprot_archaea/uniprot_db/names/_delta_log/00000000000000000000.crc @@ -0,0 +1 @@ +{"txnId":"83e6a9ac-b3d9-4c18-8298-0b2e29e9b979","tableSizeBytes":0,"numFiles":0,"numMetadata":1,"numProtocol":1,"setTransactions":[],"domainMetadata":[],"metadata":{"id":"b59f7dc2-44ee-45c1-a0f8-e1714e8527c8","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[]}","partitionColumns":[],"configuration":{},"createdTime":1766549213449},"protocol":{"minReaderVersion":1,"minWriterVersion":2},"allFiles":[]} diff --git a/tests/data/uniprot_archaea/uniprot_db/names/_delta_log/00000000000000000000.json b/tests/data/uniprot_archaea/uniprot_db/names/_delta_log/00000000000000000000.json new file mode 100644 index 0000000..9ef7319 --- /dev/null +++ b/tests/data/uniprot_archaea/uniprot_db/names/_delta_log/00000000000000000000.json @@ -0,0 +1,3 @@ +{"commitInfo":{"timestamp":1766549213486,"operation":"CREATE TABLE","operationParameters":{"partitionBy":"[]","clusterBy":"[]","description":null,"isManaged":"false","properties":"{}"},"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{},"engineInfo":"Apache-Spark/4.0.1 Delta-Lake/4.0.0","txnId":"83e6a9ac-b3d9-4c18-8298-0b2e29e9b979"}} +{"metaData":{"id":"b59f7dc2-44ee-45c1-a0f8-e1714e8527c8","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[]}","partitionColumns":[],"configuration":{},"createdTime":1766549213449}} +{"protocol":{"minReaderVersion":1,"minWriterVersion":2}} diff --git a/tests/data/uniprot_archaea/uniprot_db/names/_delta_log/00000000000000000001.crc b/tests/data/uniprot_archaea/uniprot_db/names/_delta_log/00000000000000000001.crc new file mode 100644 index 0000000..ea47922 --- /dev/null +++ b/tests/data/uniprot_archaea/uniprot_db/names/_delta_log/00000000000000000001.crc @@ -0,0 +1 @@ +{"txnId":"32493727-3c1f-426b-9874-c2c3856324e3","tableSizeBytes":301692,"numFiles":8,"numMetadata":1,"numProtocol":1,"setTransactions":[],"domainMetadata":[],"metadata":{"id":"b59f7dc2-44ee-45c1-a0f8-e1714e8527c8","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"entity_id\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"name\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"description\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"source\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{},"createdTime":1766549213449},"protocol":{"minReaderVersion":1,"minWriterVersion":2}} diff --git a/tests/data/uniprot_archaea/uniprot_db/names/_delta_log/00000000000000000001.json b/tests/data/uniprot_archaea/uniprot_db/names/_delta_log/00000000000000000001.json new file mode 100644 index 0000000..7aeae7c --- /dev/null +++ b/tests/data/uniprot_archaea/uniprot_db/names/_delta_log/00000000000000000001.json @@ -0,0 +1,10 @@ +{"commitInfo":{"timestamp":1766549219755,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":0,"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{"numFiles":"8","numOutputRows":"16576","numOutputBytes":"301692"},"engineInfo":"Apache-Spark/4.0.1 Delta-Lake/4.0.0","txnId":"32493727-3c1f-426b-9874-c2c3856324e3"}} +{"metaData":{"id":"b59f7dc2-44ee-45c1-a0f8-e1714e8527c8","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"entity_id\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"name\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"description\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"source\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{},"createdTime":1766549213449}} +{"add":{"path":"part-00000-7fe49477-1c19-4e2f-a85c-d935eef12414-c000.snappy.parquet","partitionValues":{},"size":43113,"modificationTime":1766549219750,"dataChange":true,"stats":"{\"numRecords\":2048,\"minValues\":{\"entity_id\":\"cdm_prot_00036120-577c-5bbc-ab91\",\"name\":\"(3S)-malyl-CoA thioesterase\",\"description\":\"UniProt alternative full name\",\"source\":\"UniProt\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffed2e1d-fe14-5556-b067\",\"name\":\"tRNA-t(6)A37 methylthiotransfera\",\"description\":\"UniProt recommended short name\",\"source\":\"UniProt\"},\"nullCount\":{\"entity_id\":0,\"name\":0,\"description\":0,\"source\":0}}"}} +{"add":{"path":"part-00001-3b2d9b31-38b4-4f6c-91b0-10b1bd882505-c000.snappy.parquet","partitionValues":{},"size":33382,"modificationTime":1766549219728,"dataChange":true,"stats":"{\"numRecords\":2048,\"minValues\":{\"entity_id\":\"cdm_prot_000e0a71-14e8-5d5e-8144\",\"name\":\"2,3-bisphosphoglycerate-independ\",\"description\":\"UniProt alternative full name\",\"source\":\"UniProt\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffcd18c6-2ffe-515f-9d18\",\"name\":\"tRNA-guanine(15) transglycosylas\",\"description\":\"UniProt recommended short name\",\"source\":\"UniProt\"},\"nullCount\":{\"entity_id\":0,\"name\":0,\"description\":0,\"source\":0}}"}} +{"add":{"path":"part-00002-a27a104f-dd3a-4fd5-98d7-e032b14d238f-c000.snappy.parquet","partitionValues":{},"size":40831,"modificationTime":1766549219731,"dataChange":true,"stats":"{\"numRecords\":2048,\"minValues\":{\"entity_id\":\"cdm_prot_00030730-9147-5351-b68c\",\"name\":\"(S)-8-amino-7-oxononanoate synth\",\"description\":\"UniProt alternative full name\",\"source\":\"UniProt\"},\"maxValues\":{\"entity_id\":\"cdm_prot_fde84f0d-6520-56ad-b61f\",\"name\":\"tRNA-guanine(15) transglycosylas\",\"description\":\"UniProt recommended short name\",\"source\":\"UniProt\"},\"nullCount\":{\"entity_id\":0,\"name\":0,\"description\":0,\"source\":0}}"}} +{"add":{"path":"part-00003-a69106e5-54a9-4eb0-88f7-958856dbb6c4-c000.snappy.parquet","partitionValues":{},"size":39289,"modificationTime":1766549219749,"dataChange":true,"stats":"{\"numRecords\":2048,\"minValues\":{\"entity_id\":\"cdm_prot_004f59e4-6b11-58ae-a6ff\",\"name\":\"(R)-2-hydroxyacid dehydrogenase\",\"description\":\"UniProt alternative full name\",\"source\":\"UniProt\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ff2dba94-2092-57d0-9041\",\"name\":\"dCTP deaminase, dUMP-forming\",\"description\":\"UniProt recommended short name\",\"source\":\"UniProt\"},\"nullCount\":{\"entity_id\":0,\"name\":0,\"description\":0,\"source\":0}}"}} +{"add":{"path":"part-00004-68220f37-23c4-4bdc-a763-9f588a9536c4-c000.snappy.parquet","partitionValues":{},"size":36158,"modificationTime":1766549219736,"dataChange":true,"stats":"{\"numRecords\":2048,\"minValues\":{\"entity_id\":\"cdm_prot_008bae16-5b42-5bfd-a15c\",\"name\":\"(S)-2,3-di-O-geranylgeranylglyce\",\"description\":\"UniProt alternative full name\",\"source\":\"UniProt\"},\"maxValues\":{\"entity_id\":\"cdm_prot_fe5e21c9-fae5-5701-b99d\",\"name\":\"p7ss\",\"description\":\"UniProt recommended short name\",\"source\":\"UniProt\"},\"nullCount\":{\"entity_id\":0,\"name\":0,\"description\":0,\"source\":0}}"}} +{"add":{"path":"part-00005-44f8dfca-d47a-4963-8d9e-094264fc524f-c000.snappy.parquet","partitionValues":{},"size":36652,"modificationTime":1766549219728,"dataChange":true,"stats":"{\"numRecords\":2048,\"minValues\":{\"entity_id\":\"cdm_prot_00134cb8-e497-5d40-b662\",\"name\":\"2-phospho-D-glycerate hydro-lyas\",\"description\":\"UniProt alternative full name\",\"source\":\"UniProt\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ff5f637d-e74b-5494-9ed8\",\"name\":\"tRNA-splicing endonuclease\",\"description\":\"UniProt recommended short name\",\"source\":\"UniProt\"},\"nullCount\":{\"entity_id\":0,\"name\":0,\"description\":0,\"source\":0}}"}} +{"add":{"path":"part-00006-3d33a697-816a-4bad-bd2a-e51ffd964bec-c000.snappy.parquet","partitionValues":{},"size":36600,"modificationTime":1766549219733,"dataChange":true,"stats":"{\"numRecords\":2048,\"minValues\":{\"entity_id\":\"cdm_prot_003df674-48d1-588a-b194\",\"name\":\"2-phosphoglycerate forming glyce\",\"description\":\"UniProt alternative full name\",\"source\":\"UniProt\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffeae0f1-9785-5eaf-ad23\",\"name\":\"sn-glycerol-1-phosphate dehydrog\",\"description\":\"UniProt recommended short name\",\"source\":\"UniProt\"},\"nullCount\":{\"entity_id\":0,\"name\":0,\"description\":0,\"source\":0}}"}} +{"add":{"path":"part-00007-5e1e889d-a46c-4994-ad50-d12aaaba1293-c000.snappy.parquet","partitionValues":{},"size":35667,"modificationTime":1766549219750,"dataChange":true,"stats":"{\"numRecords\":2240,\"minValues\":{\"entity_id\":\"cdm_prot_00625fdf-0992-5594-a800\",\"name\":\"(1->4)-alpha-D-glucan:maltose-1-\",\"description\":\"UniProt alternative full name\",\"source\":\"UniProt\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffcf5789-8bfa-5ca0-914e\",\"name\":\"p-GvpK\",\"description\":\"UniProt recommended short name\",\"source\":\"UniProt\"},\"nullCount\":{\"entity_id\":0,\"name\":0,\"description\":0,\"source\":0}}"}} diff --git a/tests/data/uniprot_archaea/uniprot_db/names/_delta_log/00000000000000000002.crc b/tests/data/uniprot_archaea/uniprot_db/names/_delta_log/00000000000000000002.crc new file mode 100644 index 0000000..7b47c8a --- /dev/null +++ b/tests/data/uniprot_archaea/uniprot_db/names/_delta_log/00000000000000000002.crc @@ -0,0 +1 @@ +{"txnId":"229e975d-2684-4543-b42d-d9d5f0de67c7","tableSizeBytes":604923,"numFiles":16,"numMetadata":1,"numProtocol":1,"setTransactions":[],"domainMetadata":[],"metadata":{"id":"b59f7dc2-44ee-45c1-a0f8-e1714e8527c8","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"entity_id\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"name\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"description\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"source\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{},"createdTime":1766549213449},"protocol":{"minReaderVersion":1,"minWriterVersion":2},"allFiles":[{"path":"part-00006-3d33a697-816a-4bad-bd2a-e51ffd964bec-c000.snappy.parquet","partitionValues":{},"size":36600,"modificationTime":1766549219733,"dataChange":false,"stats":"{\"numRecords\":2048,\"minValues\":{\"entity_id\":\"cdm_prot_003df674-48d1-588a-b194\",\"name\":\"2-phosphoglycerate forming glyce\",\"description\":\"UniProt alternative full name\",\"source\":\"UniProt\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffeae0f1-9785-5eaf-ad23\",\"name\":\"sn-glycerol-1-phosphate dehydrog\",\"description\":\"UniProt recommended short name\",\"source\":\"UniProt\"},\"nullCount\":{\"entity_id\":0,\"name\":0,\"description\":0,\"source\":0}}"},{"path":"part-00004-68220f37-23c4-4bdc-a763-9f588a9536c4-c000.snappy.parquet","partitionValues":{},"size":36158,"modificationTime":1766549219736,"dataChange":false,"stats":"{\"numRecords\":2048,\"minValues\":{\"entity_id\":\"cdm_prot_008bae16-5b42-5bfd-a15c\",\"name\":\"(S)-2,3-di-O-geranylgeranylglyce\",\"description\":\"UniProt alternative full name\",\"source\":\"UniProt\"},\"maxValues\":{\"entity_id\":\"cdm_prot_fe5e21c9-fae5-5701-b99d\",\"name\":\"p7ss\",\"description\":\"UniProt recommended short name\",\"source\":\"UniProt\"},\"nullCount\":{\"entity_id\":0,\"name\":0,\"description\":0,\"source\":0}}"},{"path":"part-00002-a27a104f-dd3a-4fd5-98d7-e032b14d238f-c000.snappy.parquet","partitionValues":{},"size":40831,"modificationTime":1766549219731,"dataChange":false,"stats":"{\"numRecords\":2048,\"minValues\":{\"entity_id\":\"cdm_prot_00030730-9147-5351-b68c\",\"name\":\"(S)-8-amino-7-oxononanoate synth\",\"description\":\"UniProt alternative full name\",\"source\":\"UniProt\"},\"maxValues\":{\"entity_id\":\"cdm_prot_fde84f0d-6520-56ad-b61f\",\"name\":\"tRNA-guanine(15) transglycosylas\",\"description\":\"UniProt recommended short name\",\"source\":\"UniProt\"},\"nullCount\":{\"entity_id\":0,\"name\":0,\"description\":0,\"source\":0}}"},{"path":"part-00001-3b2d9b31-38b4-4f6c-91b0-10b1bd882505-c000.snappy.parquet","partitionValues":{},"size":33382,"modificationTime":1766549219728,"dataChange":false,"stats":"{\"numRecords\":2048,\"minValues\":{\"entity_id\":\"cdm_prot_000e0a71-14e8-5d5e-8144\",\"name\":\"2,3-bisphosphoglycerate-independ\",\"description\":\"UniProt alternative full name\",\"source\":\"UniProt\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffcd18c6-2ffe-515f-9d18\",\"name\":\"tRNA-guanine(15) transglycosylas\",\"description\":\"UniProt recommended short name\",\"source\":\"UniProt\"},\"nullCount\":{\"entity_id\":0,\"name\":0,\"description\":0,\"source\":0}}"},{"path":"part-00000-7fe49477-1c19-4e2f-a85c-d935eef12414-c000.snappy.parquet","partitionValues":{},"size":43113,"modificationTime":1766549219750,"dataChange":false,"stats":"{\"numRecords\":2048,\"minValues\":{\"entity_id\":\"cdm_prot_00036120-577c-5bbc-ab91\",\"name\":\"(3S)-malyl-CoA thioesterase\",\"description\":\"UniProt alternative full name\",\"source\":\"UniProt\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffed2e1d-fe14-5556-b067\",\"name\":\"tRNA-t(6)A37 methylthiotransfera\",\"description\":\"UniProt recommended short name\",\"source\":\"UniProt\"},\"nullCount\":{\"entity_id\":0,\"name\":0,\"description\":0,\"source\":0}}"},{"path":"part-00003-54f2d4b4-5ff7-4fcf-9373-8ce4423a8d11-c000.snappy.parquet","partitionValues":{},"size":39071,"modificationTime":1766549226720,"dataChange":false,"stats":"{\"numRecords\":2048,\"minValues\":{\"entity_id\":\"cdm_prot_000c7e85-7dff-527d-b6db\",\"name\":\"(3S)-malyl-CoA/beta-methylmalyl-\",\"description\":\"UniProt alternative full name\",\"source\":\"UniProt\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffb46e82-abfa-5122-818f\",\"name\":\"pfMre11\",\"description\":\"UniProt recommended short name\",\"source\":\"UniProt\"},\"nullCount\":{\"entity_id\":0,\"name\":0,\"description\":0,\"source\":0}}"},{"path":"part-00004-f042aa17-bd0f-42fe-bbfa-c49c508eb8d1-c000.snappy.parquet","partitionValues":{},"size":35908,"modificationTime":1766549226729,"dataChange":false,"stats":"{\"numRecords\":2048,\"minValues\":{\"entity_id\":\"cdm_prot_007c34e9-4de0-5736-985f\",\"name\":\"16S rRNA (pseudouridine-N1-)-met\",\"description\":\"UniProt alternative full name\",\"source\":\"UniProt\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffc892c3-0651-5e3c-998a\",\"name\":\"tRNA adenylyltransferase NcsA\",\"description\":\"UniProt recommended short name\",\"source\":\"UniProt\"},\"nullCount\":{\"entity_id\":0,\"name\":0,\"description\":0,\"source\":0}}"},{"path":"part-00000-f44e402f-1ebd-4d1d-8142-a3d70083c403-c000.snappy.parquet","partitionValues":{},"size":35349,"modificationTime":1766549226717,"dataChange":false,"stats":"{\"numRecords\":2048,\"minValues\":{\"entity_id\":\"cdm_prot_004539b1-91ae-513e-bef9\",\"name\":\"1-(5-phosphoribosyl)-5-[(5-phosp\",\"description\":\"UniProt alternative full name\",\"source\":\"UniProt\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ff289ac2-6f01-59a2-8290\",\"name\":\"p-GvpO\",\"description\":\"UniProt recommended short name\",\"source\":\"UniProt\"},\"nullCount\":{\"entity_id\":0,\"name\":0,\"description\":0,\"source\":0}}"},{"path":"part-00005-44f8dfca-d47a-4963-8d9e-094264fc524f-c000.snappy.parquet","partitionValues":{},"size":36652,"modificationTime":1766549219728,"dataChange":false,"stats":"{\"numRecords\":2048,\"minValues\":{\"entity_id\":\"cdm_prot_00134cb8-e497-5d40-b662\",\"name\":\"2-phospho-D-glycerate hydro-lyas\",\"description\":\"UniProt alternative full name\",\"source\":\"UniProt\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ff5f637d-e74b-5494-9ed8\",\"name\":\"tRNA-splicing endonuclease\",\"description\":\"UniProt recommended short name\",\"source\":\"UniProt\"},\"nullCount\":{\"entity_id\":0,\"name\":0,\"description\":0,\"source\":0}}"},{"path":"part-00007-b32af234-9734-4c34-88ec-1c9c77fdb751-c000.snappy.parquet","partitionValues":{},"size":28222,"modificationTime":1766549226729,"dataChange":false,"stats":"{\"numRecords\":2143,\"minValues\":{\"entity_id\":\"cdm_prot_00504a00-27c9-551f-b27f\",\"name\":\"(2R,3R)-3-methylornithine syntha\",\"description\":\"UniProt alternative full name\",\"source\":\"UniProt\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ff86ebef-56b7-5cf7-bf3e\",\"name\":\"tRNA pseudouridine synthase Pus1\",\"description\":\"UniProt recommended short name\",\"source\":\"UniProt\"},\"nullCount\":{\"entity_id\":0,\"name\":0,\"description\":0,\"source\":0}}"},{"path":"part-00006-3e8fac6d-c54c-48b0-b2eb-fe99c2d39149-c000.snappy.parquet","partitionValues":{},"size":33346,"modificationTime":1766549226726,"dataChange":false,"stats":"{\"numRecords\":2048,\"minValues\":{\"entity_id\":\"cdm_prot_001ba1b3-667c-5321-9298\",\"name\":\"(2R)-phospho-3-sulfolactate synt\",\"description\":\"UniProt alternative full name\",\"source\":\"UniProt\"},\"maxValues\":{\"entity_id\":\"cdm_prot_fffe9d9f-5871-5041-83af\",\"name\":\"p46\",\"description\":\"UniProt recommended short name\",\"source\":\"UniProt\"},\"nullCount\":{\"entity_id\":0,\"name\":0,\"description\":0,\"source\":0}}"},{"path":"part-00003-a69106e5-54a9-4eb0-88f7-958856dbb6c4-c000.snappy.parquet","partitionValues":{},"size":39289,"modificationTime":1766549219749,"dataChange":false,"stats":"{\"numRecords\":2048,\"minValues\":{\"entity_id\":\"cdm_prot_004f59e4-6b11-58ae-a6ff\",\"name\":\"(R)-2-hydroxyacid dehydrogenase\",\"description\":\"UniProt alternative full name\",\"source\":\"UniProt\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ff2dba94-2092-57d0-9041\",\"name\":\"dCTP deaminase, dUMP-forming\",\"description\":\"UniProt recommended short name\",\"source\":\"UniProt\"},\"nullCount\":{\"entity_id\":0,\"name\":0,\"description\":0,\"source\":0}}"},{"path":"part-00007-5e1e889d-a46c-4994-ad50-d12aaaba1293-c000.snappy.parquet","partitionValues":{},"size":35667,"modificationTime":1766549219750,"dataChange":false,"stats":"{\"numRecords\":2240,\"minValues\":{\"entity_id\":\"cdm_prot_00625fdf-0992-5594-a800\",\"name\":\"(1->4)-alpha-D-glucan:maltose-1-\",\"description\":\"UniProt alternative full name\",\"source\":\"UniProt\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffcf5789-8bfa-5ca0-914e\",\"name\":\"p-GvpK\",\"description\":\"UniProt recommended short name\",\"source\":\"UniProt\"},\"nullCount\":{\"entity_id\":0,\"name\":0,\"description\":0,\"source\":0}}"},{"path":"part-00001-0aa26ab1-0f23-4f5f-bc76-68425d9bbc6f-c000.snappy.parquet","partitionValues":{},"size":35505,"modificationTime":1766549226721,"dataChange":false,"stats":"{\"numRecords\":2048,\"minValues\":{\"entity_id\":\"cdm_prot_00f7712b-a738-523c-aa68\",\"name\":\"3-hexulose-6-phosphate synthase\",\"description\":\"UniProt alternative full name\",\"source\":\"UniProt\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffa14ace-ea75-5cc6-86f2\",\"name\":\"eIF-5A\",\"description\":\"UniProt recommended short name\",\"source\":\"UniProt\"},\"nullCount\":{\"entity_id\":0,\"name\":0,\"description\":0,\"source\":0}}"},{"path":"part-00002-6e534054-9141-43f6-900b-601663a927c2-c000.snappy.parquet","partitionValues":{},"size":42733,"modificationTime":1766549226726,"dataChange":false,"stats":"{\"numRecords\":3072,\"minValues\":{\"entity_id\":\"cdm_prot_00546ee8-4f73-5c85-8b07\",\"name\":\"(R)-2-methylmalate dehydratase\",\"description\":\"UniProt alternative full name\",\"source\":\"UniProt\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffce40a0-b890-5e5f-b455\",\"name\":\"tRNA threonylcarbamoyladenosine \",\"description\":\"UniProt recommended short name\",\"source\":\"UniProt\"},\"nullCount\":{\"entity_id\":0,\"name\":0,\"description\":0,\"source\":0}}"},{"path":"part-00005-23bb4218-5ffa-434f-a6a1-92cfd47943c5-c000.snappy.parquet","partitionValues":{},"size":53097,"modificationTime":1766549226717,"dataChange":false,"stats":"{\"numRecords\":3072,\"minValues\":{\"entity_id\":\"cdm_prot_00957fe6-8bfd-5ecd-9b08\",\"name\":\"2-dehydropantoate 2-reductase\",\"description\":\"UniProt alternative full name\",\"source\":\"UniProt\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffdecc1a-6ee9-59b2-9c49\",\"name\":\"tRNA:m(5)C72 MTase\",\"description\":\"UniProt recommended short name\",\"source\":\"UniProt\"},\"nullCount\":{\"entity_id\":0,\"name\":0,\"description\":0,\"source\":0}}"}]} diff --git a/tests/data/uniprot_archaea/uniprot_db/names/_delta_log/00000000000000000002.json b/tests/data/uniprot_archaea/uniprot_db/names/_delta_log/00000000000000000002.json new file mode 100644 index 0000000..209f06f --- /dev/null +++ b/tests/data/uniprot_archaea/uniprot_db/names/_delta_log/00000000000000000002.json @@ -0,0 +1,9 @@ +{"commitInfo":{"timestamp":1766549226732,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":1,"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{"numFiles":"8","numOutputRows":"18527","numOutputBytes":"303231"},"engineInfo":"Apache-Spark/4.0.1 Delta-Lake/4.0.0","txnId":"229e975d-2684-4543-b42d-d9d5f0de67c7"}} +{"add":{"path":"part-00000-f44e402f-1ebd-4d1d-8142-a3d70083c403-c000.snappy.parquet","partitionValues":{},"size":35349,"modificationTime":1766549226717,"dataChange":true,"stats":"{\"numRecords\":2048,\"minValues\":{\"entity_id\":\"cdm_prot_004539b1-91ae-513e-bef9\",\"name\":\"1-(5-phosphoribosyl)-5-[(5-phosp\",\"description\":\"UniProt alternative full name\",\"source\":\"UniProt\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ff289ac2-6f01-59a2-8290\",\"name\":\"p-GvpO\",\"description\":\"UniProt recommended short name\",\"source\":\"UniProt\"},\"nullCount\":{\"entity_id\":0,\"name\":0,\"description\":0,\"source\":0}}"}} +{"add":{"path":"part-00001-0aa26ab1-0f23-4f5f-bc76-68425d9bbc6f-c000.snappy.parquet","partitionValues":{},"size":35505,"modificationTime":1766549226721,"dataChange":true,"stats":"{\"numRecords\":2048,\"minValues\":{\"entity_id\":\"cdm_prot_00f7712b-a738-523c-aa68\",\"name\":\"3-hexulose-6-phosphate synthase\",\"description\":\"UniProt alternative full name\",\"source\":\"UniProt\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffa14ace-ea75-5cc6-86f2\",\"name\":\"eIF-5A\",\"description\":\"UniProt recommended short name\",\"source\":\"UniProt\"},\"nullCount\":{\"entity_id\":0,\"name\":0,\"description\":0,\"source\":0}}"}} +{"add":{"path":"part-00002-6e534054-9141-43f6-900b-601663a927c2-c000.snappy.parquet","partitionValues":{},"size":42733,"modificationTime":1766549226726,"dataChange":true,"stats":"{\"numRecords\":3072,\"minValues\":{\"entity_id\":\"cdm_prot_00546ee8-4f73-5c85-8b07\",\"name\":\"(R)-2-methylmalate dehydratase\",\"description\":\"UniProt alternative full name\",\"source\":\"UniProt\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffce40a0-b890-5e5f-b455\",\"name\":\"tRNA threonylcarbamoyladenosine \",\"description\":\"UniProt recommended short name\",\"source\":\"UniProt\"},\"nullCount\":{\"entity_id\":0,\"name\":0,\"description\":0,\"source\":0}}"}} +{"add":{"path":"part-00003-54f2d4b4-5ff7-4fcf-9373-8ce4423a8d11-c000.snappy.parquet","partitionValues":{},"size":39071,"modificationTime":1766549226720,"dataChange":true,"stats":"{\"numRecords\":2048,\"minValues\":{\"entity_id\":\"cdm_prot_000c7e85-7dff-527d-b6db\",\"name\":\"(3S)-malyl-CoA/beta-methylmalyl-\",\"description\":\"UniProt alternative full name\",\"source\":\"UniProt\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffb46e82-abfa-5122-818f\",\"name\":\"pfMre11\",\"description\":\"UniProt recommended short name\",\"source\":\"UniProt\"},\"nullCount\":{\"entity_id\":0,\"name\":0,\"description\":0,\"source\":0}}"}} +{"add":{"path":"part-00004-f042aa17-bd0f-42fe-bbfa-c49c508eb8d1-c000.snappy.parquet","partitionValues":{},"size":35908,"modificationTime":1766549226729,"dataChange":true,"stats":"{\"numRecords\":2048,\"minValues\":{\"entity_id\":\"cdm_prot_007c34e9-4de0-5736-985f\",\"name\":\"16S rRNA (pseudouridine-N1-)-met\",\"description\":\"UniProt alternative full name\",\"source\":\"UniProt\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffc892c3-0651-5e3c-998a\",\"name\":\"tRNA adenylyltransferase NcsA\",\"description\":\"UniProt recommended short name\",\"source\":\"UniProt\"},\"nullCount\":{\"entity_id\":0,\"name\":0,\"description\":0,\"source\":0}}"}} +{"add":{"path":"part-00005-23bb4218-5ffa-434f-a6a1-92cfd47943c5-c000.snappy.parquet","partitionValues":{},"size":53097,"modificationTime":1766549226717,"dataChange":true,"stats":"{\"numRecords\":3072,\"minValues\":{\"entity_id\":\"cdm_prot_00957fe6-8bfd-5ecd-9b08\",\"name\":\"2-dehydropantoate 2-reductase\",\"description\":\"UniProt alternative full name\",\"source\":\"UniProt\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffdecc1a-6ee9-59b2-9c49\",\"name\":\"tRNA:m(5)C72 MTase\",\"description\":\"UniProt recommended short name\",\"source\":\"UniProt\"},\"nullCount\":{\"entity_id\":0,\"name\":0,\"description\":0,\"source\":0}}"}} +{"add":{"path":"part-00006-3e8fac6d-c54c-48b0-b2eb-fe99c2d39149-c000.snappy.parquet","partitionValues":{},"size":33346,"modificationTime":1766549226726,"dataChange":true,"stats":"{\"numRecords\":2048,\"minValues\":{\"entity_id\":\"cdm_prot_001ba1b3-667c-5321-9298\",\"name\":\"(2R)-phospho-3-sulfolactate synt\",\"description\":\"UniProt alternative full name\",\"source\":\"UniProt\"},\"maxValues\":{\"entity_id\":\"cdm_prot_fffe9d9f-5871-5041-83af\",\"name\":\"p46\",\"description\":\"UniProt recommended short name\",\"source\":\"UniProt\"},\"nullCount\":{\"entity_id\":0,\"name\":0,\"description\":0,\"source\":0}}"}} +{"add":{"path":"part-00007-b32af234-9734-4c34-88ec-1c9c77fdb751-c000.snappy.parquet","partitionValues":{},"size":28222,"modificationTime":1766549226729,"dataChange":true,"stats":"{\"numRecords\":2143,\"minValues\":{\"entity_id\":\"cdm_prot_00504a00-27c9-551f-b27f\",\"name\":\"(2R,3R)-3-methylornithine syntha\",\"description\":\"UniProt alternative full name\",\"source\":\"UniProt\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ff86ebef-56b7-5cf7-bf3e\",\"name\":\"tRNA pseudouridine synthase Pus1\",\"description\":\"UniProt recommended short name\",\"source\":\"UniProt\"},\"nullCount\":{\"entity_id\":0,\"name\":0,\"description\":0,\"source\":0}}"}} diff --git a/tests/data/uniprot_archaea/uniprot_db/names/_delta_log/00000000000000000003.crc b/tests/data/uniprot_archaea/uniprot_db/names/_delta_log/00000000000000000003.crc new file mode 100644 index 0000000..7a12592 --- /dev/null +++ b/tests/data/uniprot_archaea/uniprot_db/names/_delta_log/00000000000000000003.crc @@ -0,0 +1 @@ +{"txnId":"79e4f656-01b6-4d44-9800-6dbbe024d3fa","tableSizeBytes":884978,"numFiles":24,"numMetadata":1,"numProtocol":1,"setTransactions":[],"domainMetadata":[],"metadata":{"id":"b59f7dc2-44ee-45c1-a0f8-e1714e8527c8","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"entity_id\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"name\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"description\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"source\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{},"createdTime":1766549213449},"protocol":{"minReaderVersion":1,"minWriterVersion":2},"allFiles":[{"path":"part-00002-a27a104f-dd3a-4fd5-98d7-e032b14d238f-c000.snappy.parquet","partitionValues":{},"size":40831,"modificationTime":1766549219731,"dataChange":false,"stats":"{\"numRecords\":2048,\"minValues\":{\"entity_id\":\"cdm_prot_00030730-9147-5351-b68c\",\"name\":\"(S)-8-amino-7-oxononanoate synth\",\"description\":\"UniProt alternative full name\",\"source\":\"UniProt\"},\"maxValues\":{\"entity_id\":\"cdm_prot_fde84f0d-6520-56ad-b61f\",\"name\":\"tRNA-guanine(15) transglycosylas\",\"description\":\"UniProt recommended short name\",\"source\":\"UniProt\"},\"nullCount\":{\"entity_id\":0,\"name\":0,\"description\":0,\"source\":0}}"},{"path":"part-00001-3b2d9b31-38b4-4f6c-91b0-10b1bd882505-c000.snappy.parquet","partitionValues":{},"size":33382,"modificationTime":1766549219728,"dataChange":false,"stats":"{\"numRecords\":2048,\"minValues\":{\"entity_id\":\"cdm_prot_000e0a71-14e8-5d5e-8144\",\"name\":\"2,3-bisphosphoglycerate-independ\",\"description\":\"UniProt alternative full name\",\"source\":\"UniProt\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffcd18c6-2ffe-515f-9d18\",\"name\":\"tRNA-guanine(15) transglycosylas\",\"description\":\"UniProt recommended short name\",\"source\":\"UniProt\"},\"nullCount\":{\"entity_id\":0,\"name\":0,\"description\":0,\"source\":0}}"},{"path":"part-00000-7fe49477-1c19-4e2f-a85c-d935eef12414-c000.snappy.parquet","partitionValues":{},"size":43113,"modificationTime":1766549219750,"dataChange":false,"stats":"{\"numRecords\":2048,\"minValues\":{\"entity_id\":\"cdm_prot_00036120-577c-5bbc-ab91\",\"name\":\"(3S)-malyl-CoA thioesterase\",\"description\":\"UniProt alternative full name\",\"source\":\"UniProt\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffed2e1d-fe14-5556-b067\",\"name\":\"tRNA-t(6)A37 methylthiotransfera\",\"description\":\"UniProt recommended short name\",\"source\":\"UniProt\"},\"nullCount\":{\"entity_id\":0,\"name\":0,\"description\":0,\"source\":0}}"},{"path":"part-00004-f042aa17-bd0f-42fe-bbfa-c49c508eb8d1-c000.snappy.parquet","partitionValues":{},"size":35908,"modificationTime":1766549226729,"dataChange":false,"stats":"{\"numRecords\":2048,\"minValues\":{\"entity_id\":\"cdm_prot_007c34e9-4de0-5736-985f\",\"name\":\"16S rRNA (pseudouridine-N1-)-met\",\"description\":\"UniProt alternative full name\",\"source\":\"UniProt\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffc892c3-0651-5e3c-998a\",\"name\":\"tRNA adenylyltransferase NcsA\",\"description\":\"UniProt recommended short name\",\"source\":\"UniProt\"},\"nullCount\":{\"entity_id\":0,\"name\":0,\"description\":0,\"source\":0}}"},{"path":"part-00000-f44e402f-1ebd-4d1d-8142-a3d70083c403-c000.snappy.parquet","partitionValues":{},"size":35349,"modificationTime":1766549226717,"dataChange":false,"stats":"{\"numRecords\":2048,\"minValues\":{\"entity_id\":\"cdm_prot_004539b1-91ae-513e-bef9\",\"name\":\"1-(5-phosphoribosyl)-5-[(5-phosp\",\"description\":\"UniProt alternative full name\",\"source\":\"UniProt\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ff289ac2-6f01-59a2-8290\",\"name\":\"p-GvpO\",\"description\":\"UniProt recommended short name\",\"source\":\"UniProt\"},\"nullCount\":{\"entity_id\":0,\"name\":0,\"description\":0,\"source\":0}}"},{"path":"part-00007-b32af234-9734-4c34-88ec-1c9c77fdb751-c000.snappy.parquet","partitionValues":{},"size":28222,"modificationTime":1766549226729,"dataChange":false,"stats":"{\"numRecords\":2143,\"minValues\":{\"entity_id\":\"cdm_prot_00504a00-27c9-551f-b27f\",\"name\":\"(2R,3R)-3-methylornithine syntha\",\"description\":\"UniProt alternative full name\",\"source\":\"UniProt\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ff86ebef-56b7-5cf7-bf3e\",\"name\":\"tRNA pseudouridine synthase Pus1\",\"description\":\"UniProt recommended short name\",\"source\":\"UniProt\"},\"nullCount\":{\"entity_id\":0,\"name\":0,\"description\":0,\"source\":0}}"},{"path":"part-00006-3e8fac6d-c54c-48b0-b2eb-fe99c2d39149-c000.snappy.parquet","partitionValues":{},"size":33346,"modificationTime":1766549226726,"dataChange":false,"stats":"{\"numRecords\":2048,\"minValues\":{\"entity_id\":\"cdm_prot_001ba1b3-667c-5321-9298\",\"name\":\"(2R)-phospho-3-sulfolactate synt\",\"description\":\"UniProt alternative full name\",\"source\":\"UniProt\"},\"maxValues\":{\"entity_id\":\"cdm_prot_fffe9d9f-5871-5041-83af\",\"name\":\"p46\",\"description\":\"UniProt recommended short name\",\"source\":\"UniProt\"},\"nullCount\":{\"entity_id\":0,\"name\":0,\"description\":0,\"source\":0}}"},{"path":"part-00003-a378c2f4-d615-4c49-8777-9a0d2b980e52-c000.snappy.parquet","partitionValues":{},"size":35026,"modificationTime":1766549234180,"dataChange":false,"stats":"{\"numRecords\":2048,\"minValues\":{\"entity_id\":\"cdm_prot_00206b22-8b92-5222-ab7c\",\"name\":\"23S rRNA (adenine(2503)-C(2))-me\",\"description\":\"UniProt alternative full name\",\"source\":\"UniProt\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffede36e-7e42-5dcc-abde\",\"name\":\"rRNA (uridine-2'-O-)-methyltrans\",\"description\":\"UniProt recommended short name\",\"source\":\"UniProt\"},\"nullCount\":{\"entity_id\":0,\"name\":0,\"description\":0,\"source\":0}}"},{"path":"part-00003-a69106e5-54a9-4eb0-88f7-958856dbb6c4-c000.snappy.parquet","partitionValues":{},"size":39289,"modificationTime":1766549219749,"dataChange":false,"stats":"{\"numRecords\":2048,\"minValues\":{\"entity_id\":\"cdm_prot_004f59e4-6b11-58ae-a6ff\",\"name\":\"(R)-2-hydroxyacid dehydrogenase\",\"description\":\"UniProt alternative full name\",\"source\":\"UniProt\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ff2dba94-2092-57d0-9041\",\"name\":\"dCTP deaminase, dUMP-forming\",\"description\":\"UniProt recommended short name\",\"source\":\"UniProt\"},\"nullCount\":{\"entity_id\":0,\"name\":0,\"description\":0,\"source\":0}}"},{"path":"part-00000-296a81be-f30f-423c-9574-e8f006ae0fdc-c000.snappy.parquet","partitionValues":{},"size":34908,"modificationTime":1766549234175,"dataChange":false,"stats":"{\"numRecords\":2048,\"minValues\":{\"entity_id\":\"cdm_prot_006f3d67-56f0-5829-b099\",\"name\":\"2,5-diamino-6-(5-phospho-D-ribos\",\"description\":\"UniProt alternative full name\",\"source\":\"UniProt\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffcedd0e-face-5ec6-8e0b\",\"name\":\"pfRad50\",\"description\":\"UniProt recommended short name\",\"source\":\"UniProt\"},\"nullCount\":{\"entity_id\":0,\"name\":0,\"description\":0,\"source\":0}}"},{"path":"part-00005-2deeee12-d25b-4365-b10f-babb76fbefb7-c000.snappy.parquet","partitionValues":{},"size":36141,"modificationTime":1766549234183,"dataChange":false,"stats":"{\"numRecords\":2048,\"minValues\":{\"entity_id\":\"cdm_prot_0013ba57-fc41-51cc-bc95\",\"name\":\"30S ribosomal protein S10\",\"description\":\"UniProt alternative full name\",\"source\":\"UniProt\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffe27896-0bd4-50b4-9cff\",\"name\":\"Small ribosomal subunit protein \",\"description\":\"UniProt recommended full name\",\"source\":\"UniProt\"},\"nullCount\":{\"entity_id\":0,\"name\":0,\"description\":0,\"source\":0}}"},{"path":"part-00006-3d33a697-816a-4bad-bd2a-e51ffd964bec-c000.snappy.parquet","partitionValues":{},"size":36600,"modificationTime":1766549219733,"dataChange":false,"stats":"{\"numRecords\":2048,\"minValues\":{\"entity_id\":\"cdm_prot_003df674-48d1-588a-b194\",\"name\":\"2-phosphoglycerate forming glyce\",\"description\":\"UniProt alternative full name\",\"source\":\"UniProt\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffeae0f1-9785-5eaf-ad23\",\"name\":\"sn-glycerol-1-phosphate dehydrog\",\"description\":\"UniProt recommended short name\",\"source\":\"UniProt\"},\"nullCount\":{\"entity_id\":0,\"name\":0,\"description\":0,\"source\":0}}"},{"path":"part-00004-68220f37-23c4-4bdc-a763-9f588a9536c4-c000.snappy.parquet","partitionValues":{},"size":36158,"modificationTime":1766549219736,"dataChange":false,"stats":"{\"numRecords\":2048,\"minValues\":{\"entity_id\":\"cdm_prot_008bae16-5b42-5bfd-a15c\",\"name\":\"(S)-2,3-di-O-geranylgeranylglyce\",\"description\":\"UniProt alternative full name\",\"source\":\"UniProt\"},\"maxValues\":{\"entity_id\":\"cdm_prot_fe5e21c9-fae5-5701-b99d\",\"name\":\"p7ss\",\"description\":\"UniProt recommended short name\",\"source\":\"UniProt\"},\"nullCount\":{\"entity_id\":0,\"name\":0,\"description\":0,\"source\":0}}"},{"path":"part-00006-680bfd0a-30aa-4bbe-b288-347d51725fb6-c000.snappy.parquet","partitionValues":{},"size":35807,"modificationTime":1766549234176,"dataChange":false,"stats":"{\"numRecords\":2048,\"minValues\":{\"entity_id\":\"cdm_prot_00219a7c-c28c-5aaf-928e\",\"name\":\"(R)-S-adenosyl-L-methionine hydr\",\"description\":\"UniProt alternative full name\",\"source\":\"UniProt\"},\"maxValues\":{\"entity_id\":\"cdm_prot_fe67f279-f090-5c4e-b38d\",\"name\":\"tRNA-splicing ligase RtcB\",\"description\":\"UniProt recommended short name\",\"source\":\"UniProt\"},\"nullCount\":{\"entity_id\":0,\"name\":0,\"description\":0,\"source\":0}}"},{"path":"part-00003-54f2d4b4-5ff7-4fcf-9373-8ce4423a8d11-c000.snappy.parquet","partitionValues":{},"size":39071,"modificationTime":1766549226720,"dataChange":false,"stats":"{\"numRecords\":2048,\"minValues\":{\"entity_id\":\"cdm_prot_000c7e85-7dff-527d-b6db\",\"name\":\"(3S)-malyl-CoA/beta-methylmalyl-\",\"description\":\"UniProt alternative full name\",\"source\":\"UniProt\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffb46e82-abfa-5122-818f\",\"name\":\"pfMre11\",\"description\":\"UniProt recommended short name\",\"source\":\"UniProt\"},\"nullCount\":{\"entity_id\":0,\"name\":0,\"description\":0,\"source\":0}}"},{"path":"part-00004-adba1bb2-356b-4c2f-a77e-966c8b9d6735-c000.snappy.parquet","partitionValues":{},"size":34152,"modificationTime":1766549234170,"dataChange":false,"stats":"{\"numRecords\":2048,\"minValues\":{\"entity_id\":\"cdm_prot_005318fc-0c32-5b4a-b952\",\"name\":\"30S ribosomal protein S10\",\"description\":\"UniProt alternative full name\",\"source\":\"UniProt\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ff94038a-47fa-54b0-813d\",\"name\":\"tRNase Z\",\"description\":\"UniProt recommended short name\",\"source\":\"UniProt\"},\"nullCount\":{\"entity_id\":0,\"name\":0,\"description\":0,\"source\":0}}"},{"path":"part-00007-b260351a-af1f-49f5-be1f-9f27441707d2-c000.snappy.parquet","partitionValues":{},"size":35891,"modificationTime":1766549234187,"dataChange":false,"stats":"{\"numRecords\":2034,\"minValues\":{\"entity_id\":\"cdm_prot_00019029-8c98-5477-9dd1\",\"name\":\"1,3-diaminopropane aminopropyltr\",\"description\":\"UniProt alternative full name\",\"source\":\"UniProt\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ff606768-2e8f-543d-abc5\",\"name\":\"tRNA threonylcarbamoyladenosine \",\"description\":\"UniProt recommended short name\",\"source\":\"UniProt\"},\"nullCount\":{\"entity_id\":0,\"name\":0,\"description\":0,\"source\":0}}"},{"path":"part-00005-44f8dfca-d47a-4963-8d9e-094264fc524f-c000.snappy.parquet","partitionValues":{},"size":36652,"modificationTime":1766549219728,"dataChange":false,"stats":"{\"numRecords\":2048,\"minValues\":{\"entity_id\":\"cdm_prot_00134cb8-e497-5d40-b662\",\"name\":\"2-phospho-D-glycerate hydro-lyas\",\"description\":\"UniProt alternative full name\",\"source\":\"UniProt\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ff5f637d-e74b-5494-9ed8\",\"name\":\"tRNA-splicing endonuclease\",\"description\":\"UniProt recommended short name\",\"source\":\"UniProt\"},\"nullCount\":{\"entity_id\":0,\"name\":0,\"description\":0,\"source\":0}}"},{"path":"part-00002-685e72ec-2862-40de-8cba-192b36e48b40-c000.snappy.parquet","partitionValues":{},"size":35740,"modificationTime":1766549234176,"dataChange":false,"stats":"{\"numRecords\":2048,\"minValues\":{\"entity_id\":\"cdm_prot_0061faab-bd88-562c-8a31\",\"name\":\"50S ribosomal protein L1\",\"description\":\"UniProt alternative full name\",\"source\":\"UniProt\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ff216b32-af2a-545f-8e61\",\"name\":\"Ribosomal protein ML6\",\"description\":\"UniProt recommended full name\",\"source\":\"UniProt\"},\"nullCount\":{\"entity_id\":0,\"name\":0,\"description\":0,\"source\":0}}"},{"path":"part-00007-5e1e889d-a46c-4994-ad50-d12aaaba1293-c000.snappy.parquet","partitionValues":{},"size":35667,"modificationTime":1766549219750,"dataChange":false,"stats":"{\"numRecords\":2240,\"minValues\":{\"entity_id\":\"cdm_prot_00625fdf-0992-5594-a800\",\"name\":\"(1->4)-alpha-D-glucan:maltose-1-\",\"description\":\"UniProt alternative full name\",\"source\":\"UniProt\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffcf5789-8bfa-5ca0-914e\",\"name\":\"p-GvpK\",\"description\":\"UniProt recommended short name\",\"source\":\"UniProt\"},\"nullCount\":{\"entity_id\":0,\"name\":0,\"description\":0,\"source\":0}}"},{"path":"part-00001-0aa26ab1-0f23-4f5f-bc76-68425d9bbc6f-c000.snappy.parquet","partitionValues":{},"size":35505,"modificationTime":1766549226721,"dataChange":false,"stats":"{\"numRecords\":2048,\"minValues\":{\"entity_id\":\"cdm_prot_00f7712b-a738-523c-aa68\",\"name\":\"3-hexulose-6-phosphate synthase\",\"description\":\"UniProt alternative full name\",\"source\":\"UniProt\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffa14ace-ea75-5cc6-86f2\",\"name\":\"eIF-5A\",\"description\":\"UniProt recommended short name\",\"source\":\"UniProt\"},\"nullCount\":{\"entity_id\":0,\"name\":0,\"description\":0,\"source\":0}}"},{"path":"part-00002-6e534054-9141-43f6-900b-601663a927c2-c000.snappy.parquet","partitionValues":{},"size":42733,"modificationTime":1766549226726,"dataChange":false,"stats":"{\"numRecords\":3072,\"minValues\":{\"entity_id\":\"cdm_prot_00546ee8-4f73-5c85-8b07\",\"name\":\"(R)-2-methylmalate dehydratase\",\"description\":\"UniProt alternative full name\",\"source\":\"UniProt\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffce40a0-b890-5e5f-b455\",\"name\":\"tRNA threonylcarbamoyladenosine \",\"description\":\"UniProt recommended short name\",\"source\":\"UniProt\"},\"nullCount\":{\"entity_id\":0,\"name\":0,\"description\":0,\"source\":0}}"},{"path":"part-00001-3c105c72-f00f-4c50-a1b8-91c556ea59a2-c000.snappy.parquet","partitionValues":{},"size":32390,"modificationTime":1766549234179,"dataChange":false,"stats":"{\"numRecords\":2048,\"minValues\":{\"entity_id\":\"cdm_prot_00365069-eeac-5442-8f11\",\"name\":\"2-iminobutanoate/2-iminopropanoa\",\"description\":\"UniProt alternative full name\",\"source\":\"UniProt\"},\"maxValues\":{\"entity_id\":\"cdm_prot_fe82106c-a4bc-5a9c-8159\",\"name\":\"Ribosomal protein 'A'\",\"description\":\"UniProt recommended short name\",\"source\":\"UniProt\"},\"nullCount\":{\"entity_id\":0,\"name\":0,\"description\":0,\"source\":0}}"},{"path":"part-00005-23bb4218-5ffa-434f-a6a1-92cfd47943c5-c000.snappy.parquet","partitionValues":{},"size":53097,"modificationTime":1766549226717,"dataChange":false,"stats":"{\"numRecords\":3072,\"minValues\":{\"entity_id\":\"cdm_prot_00957fe6-8bfd-5ecd-9b08\",\"name\":\"2-dehydropantoate 2-reductase\",\"description\":\"UniProt alternative full name\",\"source\":\"UniProt\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffdecc1a-6ee9-59b2-9c49\",\"name\":\"tRNA:m(5)C72 MTase\",\"description\":\"UniProt recommended short name\",\"source\":\"UniProt\"},\"nullCount\":{\"entity_id\":0,\"name\":0,\"description\":0,\"source\":0}}"}]} diff --git a/tests/data/uniprot_archaea/uniprot_db/names/_delta_log/00000000000000000003.json b/tests/data/uniprot_archaea/uniprot_db/names/_delta_log/00000000000000000003.json new file mode 100644 index 0000000..4537f63 --- /dev/null +++ b/tests/data/uniprot_archaea/uniprot_db/names/_delta_log/00000000000000000003.json @@ -0,0 +1,9 @@ +{"commitInfo":{"timestamp":1766549234190,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":2,"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{"numFiles":"8","numOutputRows":"16370","numOutputBytes":"280055"},"engineInfo":"Apache-Spark/4.0.1 Delta-Lake/4.0.0","txnId":"79e4f656-01b6-4d44-9800-6dbbe024d3fa"}} +{"add":{"path":"part-00000-296a81be-f30f-423c-9574-e8f006ae0fdc-c000.snappy.parquet","partitionValues":{},"size":34908,"modificationTime":1766549234175,"dataChange":true,"stats":"{\"numRecords\":2048,\"minValues\":{\"entity_id\":\"cdm_prot_006f3d67-56f0-5829-b099\",\"name\":\"2,5-diamino-6-(5-phospho-D-ribos\",\"description\":\"UniProt alternative full name\",\"source\":\"UniProt\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffcedd0e-face-5ec6-8e0b\",\"name\":\"pfRad50\",\"description\":\"UniProt recommended short name\",\"source\":\"UniProt\"},\"nullCount\":{\"entity_id\":0,\"name\":0,\"description\":0,\"source\":0}}"}} +{"add":{"path":"part-00001-3c105c72-f00f-4c50-a1b8-91c556ea59a2-c000.snappy.parquet","partitionValues":{},"size":32390,"modificationTime":1766549234179,"dataChange":true,"stats":"{\"numRecords\":2048,\"minValues\":{\"entity_id\":\"cdm_prot_00365069-eeac-5442-8f11\",\"name\":\"2-iminobutanoate/2-iminopropanoa\",\"description\":\"UniProt alternative full name\",\"source\":\"UniProt\"},\"maxValues\":{\"entity_id\":\"cdm_prot_fe82106c-a4bc-5a9c-8159\",\"name\":\"Ribosomal protein 'A'\",\"description\":\"UniProt recommended short name\",\"source\":\"UniProt\"},\"nullCount\":{\"entity_id\":0,\"name\":0,\"description\":0,\"source\":0}}"}} +{"add":{"path":"part-00002-685e72ec-2862-40de-8cba-192b36e48b40-c000.snappy.parquet","partitionValues":{},"size":35740,"modificationTime":1766549234176,"dataChange":true,"stats":"{\"numRecords\":2048,\"minValues\":{\"entity_id\":\"cdm_prot_0061faab-bd88-562c-8a31\",\"name\":\"50S ribosomal protein L1\",\"description\":\"UniProt alternative full name\",\"source\":\"UniProt\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ff216b32-af2a-545f-8e61\",\"name\":\"Ribosomal protein ML6\",\"description\":\"UniProt recommended full name\",\"source\":\"UniProt\"},\"nullCount\":{\"entity_id\":0,\"name\":0,\"description\":0,\"source\":0}}"}} +{"add":{"path":"part-00003-a378c2f4-d615-4c49-8777-9a0d2b980e52-c000.snappy.parquet","partitionValues":{},"size":35026,"modificationTime":1766549234180,"dataChange":true,"stats":"{\"numRecords\":2048,\"minValues\":{\"entity_id\":\"cdm_prot_00206b22-8b92-5222-ab7c\",\"name\":\"23S rRNA (adenine(2503)-C(2))-me\",\"description\":\"UniProt alternative full name\",\"source\":\"UniProt\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffede36e-7e42-5dcc-abde\",\"name\":\"rRNA (uridine-2'-O-)-methyltrans\",\"description\":\"UniProt recommended short name\",\"source\":\"UniProt\"},\"nullCount\":{\"entity_id\":0,\"name\":0,\"description\":0,\"source\":0}}"}} +{"add":{"path":"part-00004-adba1bb2-356b-4c2f-a77e-966c8b9d6735-c000.snappy.parquet","partitionValues":{},"size":34152,"modificationTime":1766549234170,"dataChange":true,"stats":"{\"numRecords\":2048,\"minValues\":{\"entity_id\":\"cdm_prot_005318fc-0c32-5b4a-b952\",\"name\":\"30S ribosomal protein S10\",\"description\":\"UniProt alternative full name\",\"source\":\"UniProt\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ff94038a-47fa-54b0-813d\",\"name\":\"tRNase Z\",\"description\":\"UniProt recommended short name\",\"source\":\"UniProt\"},\"nullCount\":{\"entity_id\":0,\"name\":0,\"description\":0,\"source\":0}}"}} +{"add":{"path":"part-00005-2deeee12-d25b-4365-b10f-babb76fbefb7-c000.snappy.parquet","partitionValues":{},"size":36141,"modificationTime":1766549234183,"dataChange":true,"stats":"{\"numRecords\":2048,\"minValues\":{\"entity_id\":\"cdm_prot_0013ba57-fc41-51cc-bc95\",\"name\":\"30S ribosomal protein S10\",\"description\":\"UniProt alternative full name\",\"source\":\"UniProt\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffe27896-0bd4-50b4-9cff\",\"name\":\"Small ribosomal subunit protein \",\"description\":\"UniProt recommended full name\",\"source\":\"UniProt\"},\"nullCount\":{\"entity_id\":0,\"name\":0,\"description\":0,\"source\":0}}"}} +{"add":{"path":"part-00006-680bfd0a-30aa-4bbe-b288-347d51725fb6-c000.snappy.parquet","partitionValues":{},"size":35807,"modificationTime":1766549234176,"dataChange":true,"stats":"{\"numRecords\":2048,\"minValues\":{\"entity_id\":\"cdm_prot_00219a7c-c28c-5aaf-928e\",\"name\":\"(R)-S-adenosyl-L-methionine hydr\",\"description\":\"UniProt alternative full name\",\"source\":\"UniProt\"},\"maxValues\":{\"entity_id\":\"cdm_prot_fe67f279-f090-5c4e-b38d\",\"name\":\"tRNA-splicing ligase RtcB\",\"description\":\"UniProt recommended short name\",\"source\":\"UniProt\"},\"nullCount\":{\"entity_id\":0,\"name\":0,\"description\":0,\"source\":0}}"}} +{"add":{"path":"part-00007-b260351a-af1f-49f5-be1f-9f27441707d2-c000.snappy.parquet","partitionValues":{},"size":35891,"modificationTime":1766549234187,"dataChange":true,"stats":"{\"numRecords\":2034,\"minValues\":{\"entity_id\":\"cdm_prot_00019029-8c98-5477-9dd1\",\"name\":\"1,3-diaminopropane aminopropyltr\",\"description\":\"UniProt alternative full name\",\"source\":\"UniProt\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ff606768-2e8f-543d-abc5\",\"name\":\"tRNA threonylcarbamoyladenosine \",\"description\":\"UniProt recommended short name\",\"source\":\"UniProt\"},\"nullCount\":{\"entity_id\":0,\"name\":0,\"description\":0,\"source\":0}}"}} diff --git a/tests/data/uniprot_archaea/uniprot_db/names/_delta_log/00000000000000000004.crc b/tests/data/uniprot_archaea/uniprot_db/names/_delta_log/00000000000000000004.crc new file mode 100644 index 0000000..d483bd8 --- /dev/null +++ b/tests/data/uniprot_archaea/uniprot_db/names/_delta_log/00000000000000000004.crc @@ -0,0 +1 @@ +{"txnId":"474276a4-30e2-4f9c-87c7-22bc7f10ed57","tableSizeBytes":1169203,"numFiles":32,"numMetadata":1,"numProtocol":1,"setTransactions":[],"domainMetadata":[],"metadata":{"id":"b59f7dc2-44ee-45c1-a0f8-e1714e8527c8","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"entity_id\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"name\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"description\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"source\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{},"createdTime":1766549213449},"protocol":{"minReaderVersion":1,"minWriterVersion":2},"allFiles":[{"path":"part-00003-a0e33fc3-4700-43ee-9c85-4b0c932fb35b-c000.snappy.parquet","partitionValues":{},"size":34836,"modificationTime":1766549239938,"dataChange":false,"stats":"{\"numRecords\":2048,\"minValues\":{\"entity_id\":\"cdm_prot_0185e048-25c8-50dd-bfbf\",\"name\":\"(Pab)Trm-G10\",\"description\":\"UniProt alternative full name\",\"source\":\"UniProt\"},\"maxValues\":{\"entity_id\":\"cdm_prot_fe66808c-aab1-5f12-970e\",\"name\":\"tRNA:m2G6 methyltransferase\",\"description\":\"UniProt recommended short name\",\"source\":\"UniProt\"},\"nullCount\":{\"entity_id\":0,\"name\":0,\"description\":0,\"source\":0}}"},{"path":"part-00002-a27a104f-dd3a-4fd5-98d7-e032b14d238f-c000.snappy.parquet","partitionValues":{},"size":40831,"modificationTime":1766549219731,"dataChange":false,"stats":"{\"numRecords\":2048,\"minValues\":{\"entity_id\":\"cdm_prot_00030730-9147-5351-b68c\",\"name\":\"(S)-8-amino-7-oxononanoate synth\",\"description\":\"UniProt alternative full name\",\"source\":\"UniProt\"},\"maxValues\":{\"entity_id\":\"cdm_prot_fde84f0d-6520-56ad-b61f\",\"name\":\"tRNA-guanine(15) transglycosylas\",\"description\":\"UniProt recommended short name\",\"source\":\"UniProt\"},\"nullCount\":{\"entity_id\":0,\"name\":0,\"description\":0,\"source\":0}}"},{"path":"part-00001-3b2d9b31-38b4-4f6c-91b0-10b1bd882505-c000.snappy.parquet","partitionValues":{},"size":33382,"modificationTime":1766549219728,"dataChange":false,"stats":"{\"numRecords\":2048,\"minValues\":{\"entity_id\":\"cdm_prot_000e0a71-14e8-5d5e-8144\",\"name\":\"2,3-bisphosphoglycerate-independ\",\"description\":\"UniProt alternative full name\",\"source\":\"UniProt\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffcd18c6-2ffe-515f-9d18\",\"name\":\"tRNA-guanine(15) transglycosylas\",\"description\":\"UniProt recommended short name\",\"source\":\"UniProt\"},\"nullCount\":{\"entity_id\":0,\"name\":0,\"description\":0,\"source\":0}}"},{"path":"part-00000-7fe49477-1c19-4e2f-a85c-d935eef12414-c000.snappy.parquet","partitionValues":{},"size":43113,"modificationTime":1766549219750,"dataChange":false,"stats":"{\"numRecords\":2048,\"minValues\":{\"entity_id\":\"cdm_prot_00036120-577c-5bbc-ab91\",\"name\":\"(3S)-malyl-CoA thioesterase\",\"description\":\"UniProt alternative full name\",\"source\":\"UniProt\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffed2e1d-fe14-5556-b067\",\"name\":\"tRNA-t(6)A37 methylthiotransfera\",\"description\":\"UniProt recommended short name\",\"source\":\"UniProt\"},\"nullCount\":{\"entity_id\":0,\"name\":0,\"description\":0,\"source\":0}}"},{"path":"part-00004-f042aa17-bd0f-42fe-bbfa-c49c508eb8d1-c000.snappy.parquet","partitionValues":{},"size":35908,"modificationTime":1766549226729,"dataChange":false,"stats":"{\"numRecords\":2048,\"minValues\":{\"entity_id\":\"cdm_prot_007c34e9-4de0-5736-985f\",\"name\":\"16S rRNA (pseudouridine-N1-)-met\",\"description\":\"UniProt alternative full name\",\"source\":\"UniProt\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffc892c3-0651-5e3c-998a\",\"name\":\"tRNA adenylyltransferase NcsA\",\"description\":\"UniProt recommended short name\",\"source\":\"UniProt\"},\"nullCount\":{\"entity_id\":0,\"name\":0,\"description\":0,\"source\":0}}"},{"path":"part-00000-f44e402f-1ebd-4d1d-8142-a3d70083c403-c000.snappy.parquet","partitionValues":{},"size":35349,"modificationTime":1766549226717,"dataChange":false,"stats":"{\"numRecords\":2048,\"minValues\":{\"entity_id\":\"cdm_prot_004539b1-91ae-513e-bef9\",\"name\":\"1-(5-phosphoribosyl)-5-[(5-phosp\",\"description\":\"UniProt alternative full name\",\"source\":\"UniProt\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ff289ac2-6f01-59a2-8290\",\"name\":\"p-GvpO\",\"description\":\"UniProt recommended short name\",\"source\":\"UniProt\"},\"nullCount\":{\"entity_id\":0,\"name\":0,\"description\":0,\"source\":0}}"},{"path":"part-00007-b32af234-9734-4c34-88ec-1c9c77fdb751-c000.snappy.parquet","partitionValues":{},"size":28222,"modificationTime":1766549226729,"dataChange":false,"stats":"{\"numRecords\":2143,\"minValues\":{\"entity_id\":\"cdm_prot_00504a00-27c9-551f-b27f\",\"name\":\"(2R,3R)-3-methylornithine syntha\",\"description\":\"UniProt alternative full name\",\"source\":\"UniProt\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ff86ebef-56b7-5cf7-bf3e\",\"name\":\"tRNA pseudouridine synthase Pus1\",\"description\":\"UniProt recommended short name\",\"source\":\"UniProt\"},\"nullCount\":{\"entity_id\":0,\"name\":0,\"description\":0,\"source\":0}}"},{"path":"part-00006-3e8fac6d-c54c-48b0-b2eb-fe99c2d39149-c000.snappy.parquet","partitionValues":{},"size":33346,"modificationTime":1766549226726,"dataChange":false,"stats":"{\"numRecords\":2048,\"minValues\":{\"entity_id\":\"cdm_prot_001ba1b3-667c-5321-9298\",\"name\":\"(2R)-phospho-3-sulfolactate synt\",\"description\":\"UniProt alternative full name\",\"source\":\"UniProt\"},\"maxValues\":{\"entity_id\":\"cdm_prot_fffe9d9f-5871-5041-83af\",\"name\":\"p46\",\"description\":\"UniProt recommended short name\",\"source\":\"UniProt\"},\"nullCount\":{\"entity_id\":0,\"name\":0,\"description\":0,\"source\":0}}"},{"path":"part-00003-a378c2f4-d615-4c49-8777-9a0d2b980e52-c000.snappy.parquet","partitionValues":{},"size":35026,"modificationTime":1766549234180,"dataChange":false,"stats":"{\"numRecords\":2048,\"minValues\":{\"entity_id\":\"cdm_prot_00206b22-8b92-5222-ab7c\",\"name\":\"23S rRNA (adenine(2503)-C(2))-me\",\"description\":\"UniProt alternative full name\",\"source\":\"UniProt\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffede36e-7e42-5dcc-abde\",\"name\":\"rRNA (uridine-2'-O-)-methyltrans\",\"description\":\"UniProt recommended short name\",\"source\":\"UniProt\"},\"nullCount\":{\"entity_id\":0,\"name\":0,\"description\":0,\"source\":0}}"},{"path":"part-00003-a69106e5-54a9-4eb0-88f7-958856dbb6c4-c000.snappy.parquet","partitionValues":{},"size":39289,"modificationTime":1766549219749,"dataChange":false,"stats":"{\"numRecords\":2048,\"minValues\":{\"entity_id\":\"cdm_prot_004f59e4-6b11-58ae-a6ff\",\"name\":\"(R)-2-hydroxyacid dehydrogenase\",\"description\":\"UniProt alternative full name\",\"source\":\"UniProt\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ff2dba94-2092-57d0-9041\",\"name\":\"dCTP deaminase, dUMP-forming\",\"description\":\"UniProt recommended short name\",\"source\":\"UniProt\"},\"nullCount\":{\"entity_id\":0,\"name\":0,\"description\":0,\"source\":0}}"},{"path":"part-00006-60bba51f-9140-4560-9e5c-89997d480d4c-c000.snappy.parquet","partitionValues":{},"size":58317,"modificationTime":1766549239936,"dataChange":false,"stats":"{\"numRecords\":2048,\"minValues\":{\"entity_id\":\"cdm_prot_00666a93-0a4c-5498-8cac\",\"name\":\"ATP pyrophosphate-lyase\",\"description\":\"UniProt alternative full name\",\"source\":\"UniProt\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffb5b181-1633-55f5-83ee\",\"name\":\"Zinc metalloprotease MJ0392\",\"description\":\"UniProt recommended short name\",\"source\":\"UniProt\"},\"nullCount\":{\"entity_id\":0,\"name\":0,\"description\":0,\"source\":0}}"},{"path":"part-00000-296a81be-f30f-423c-9574-e8f006ae0fdc-c000.snappy.parquet","partitionValues":{},"size":34908,"modificationTime":1766549234175,"dataChange":false,"stats":"{\"numRecords\":2048,\"minValues\":{\"entity_id\":\"cdm_prot_006f3d67-56f0-5829-b099\",\"name\":\"2,5-diamino-6-(5-phospho-D-ribos\",\"description\":\"UniProt alternative full name\",\"source\":\"UniProt\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffcedd0e-face-5ec6-8e0b\",\"name\":\"pfRad50\",\"description\":\"UniProt recommended short name\",\"source\":\"UniProt\"},\"nullCount\":{\"entity_id\":0,\"name\":0,\"description\":0,\"source\":0}}"},{"path":"part-00005-2deeee12-d25b-4365-b10f-babb76fbefb7-c000.snappy.parquet","partitionValues":{},"size":36141,"modificationTime":1766549234183,"dataChange":false,"stats":"{\"numRecords\":2048,\"minValues\":{\"entity_id\":\"cdm_prot_0013ba57-fc41-51cc-bc95\",\"name\":\"30S ribosomal protein S10\",\"description\":\"UniProt alternative full name\",\"source\":\"UniProt\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffe27896-0bd4-50b4-9cff\",\"name\":\"Small ribosomal subunit protein \",\"description\":\"UniProt recommended full name\",\"source\":\"UniProt\"},\"nullCount\":{\"entity_id\":0,\"name\":0,\"description\":0,\"source\":0}}"},{"path":"part-00006-3d33a697-816a-4bad-bd2a-e51ffd964bec-c000.snappy.parquet","partitionValues":{},"size":36600,"modificationTime":1766549219733,"dataChange":false,"stats":"{\"numRecords\":2048,\"minValues\":{\"entity_id\":\"cdm_prot_003df674-48d1-588a-b194\",\"name\":\"2-phosphoglycerate forming glyce\",\"description\":\"UniProt alternative full name\",\"source\":\"UniProt\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffeae0f1-9785-5eaf-ad23\",\"name\":\"sn-glycerol-1-phosphate dehydrog\",\"description\":\"UniProt recommended short name\",\"source\":\"UniProt\"},\"nullCount\":{\"entity_id\":0,\"name\":0,\"description\":0,\"source\":0}}"},{"path":"part-00004-68220f37-23c4-4bdc-a763-9f588a9536c4-c000.snappy.parquet","partitionValues":{},"size":36158,"modificationTime":1766549219736,"dataChange":false,"stats":"{\"numRecords\":2048,\"minValues\":{\"entity_id\":\"cdm_prot_008bae16-5b42-5bfd-a15c\",\"name\":\"(S)-2,3-di-O-geranylgeranylglyce\",\"description\":\"UniProt alternative full name\",\"source\":\"UniProt\"},\"maxValues\":{\"entity_id\":\"cdm_prot_fe5e21c9-fae5-5701-b99d\",\"name\":\"p7ss\",\"description\":\"UniProt recommended short name\",\"source\":\"UniProt\"},\"nullCount\":{\"entity_id\":0,\"name\":0,\"description\":0,\"source\":0}}"},{"path":"part-00006-680bfd0a-30aa-4bbe-b288-347d51725fb6-c000.snappy.parquet","partitionValues":{},"size":35807,"modificationTime":1766549234176,"dataChange":false,"stats":"{\"numRecords\":2048,\"minValues\":{\"entity_id\":\"cdm_prot_00219a7c-c28c-5aaf-928e\",\"name\":\"(R)-S-adenosyl-L-methionine hydr\",\"description\":\"UniProt alternative full name\",\"source\":\"UniProt\"},\"maxValues\":{\"entity_id\":\"cdm_prot_fe67f279-f090-5c4e-b38d\",\"name\":\"tRNA-splicing ligase RtcB\",\"description\":\"UniProt recommended short name\",\"source\":\"UniProt\"},\"nullCount\":{\"entity_id\":0,\"name\":0,\"description\":0,\"source\":0}}"},{"path":"part-00002-6650955d-faa1-43e3-a56c-51a85383c8b4-c000.snappy.parquet","partitionValues":{},"size":30410,"modificationTime":1766549239929,"dataChange":false,"stats":"{\"numRecords\":2048,\"minValues\":{\"entity_id\":\"cdm_prot_01672ce0-33eb-50c0-8bb3\",\"name\":\"2'cADPR synthase TcpO\",\"description\":\"UniProt alternative full name\",\"source\":\"UniProt\"},\"maxValues\":{\"entity_id\":\"cdm_prot_fffe9aba-bce9-56de-b269\",\"name\":\"tcpO\",\"description\":\"UniProt recommended short name\",\"source\":\"UniProt\"},\"nullCount\":{\"entity_id\":0,\"name\":0,\"description\":0,\"source\":0}}"},{"path":"part-00000-a2850c69-bf2e-4d70-b897-dd8587907e97-c000.snappy.parquet","partitionValues":{},"size":14492,"modificationTime":1766549239932,"dataChange":false,"stats":"{\"numRecords\":1024,\"minValues\":{\"entity_id\":\"cdm_prot_0003b7cb-bf13-5646-9978\",\"name\":\"AspRS\",\"description\":\"UniProt alternative full name\",\"source\":\"UniProt\"},\"maxValues\":{\"entity_id\":\"cdm_prot_fdc49789-52de-5a6c-95cc\",\"name\":\"Tk-pheRSA\",\"description\":\"UniProt recommended full name\",\"source\":\"UniProt\"},\"nullCount\":{\"entity_id\":0,\"name\":0,\"description\":0,\"source\":0}}"},{"path":"part-00003-54f2d4b4-5ff7-4fcf-9373-8ce4423a8d11-c000.snappy.parquet","partitionValues":{},"size":39071,"modificationTime":1766549226720,"dataChange":false,"stats":"{\"numRecords\":2048,\"minValues\":{\"entity_id\":\"cdm_prot_000c7e85-7dff-527d-b6db\",\"name\":\"(3S)-malyl-CoA/beta-methylmalyl-\",\"description\":\"UniProt alternative full name\",\"source\":\"UniProt\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffb46e82-abfa-5122-818f\",\"name\":\"pfMre11\",\"description\":\"UniProt recommended short name\",\"source\":\"UniProt\"},\"nullCount\":{\"entity_id\":0,\"name\":0,\"description\":0,\"source\":0}}"},{"path":"part-00004-adba1bb2-356b-4c2f-a77e-966c8b9d6735-c000.snappy.parquet","partitionValues":{},"size":34152,"modificationTime":1766549234170,"dataChange":false,"stats":"{\"numRecords\":2048,\"minValues\":{\"entity_id\":\"cdm_prot_005318fc-0c32-5b4a-b952\",\"name\":\"30S ribosomal protein S10\",\"description\":\"UniProt alternative full name\",\"source\":\"UniProt\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ff94038a-47fa-54b0-813d\",\"name\":\"tRNase Z\",\"description\":\"UniProt recommended short name\",\"source\":\"UniProt\"},\"nullCount\":{\"entity_id\":0,\"name\":0,\"description\":0,\"source\":0}}"},{"path":"part-00007-b260351a-af1f-49f5-be1f-9f27441707d2-c000.snappy.parquet","partitionValues":{},"size":35891,"modificationTime":1766549234187,"dataChange":false,"stats":"{\"numRecords\":2034,\"minValues\":{\"entity_id\":\"cdm_prot_00019029-8c98-5477-9dd1\",\"name\":\"1,3-diaminopropane aminopropyltr\",\"description\":\"UniProt alternative full name\",\"source\":\"UniProt\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ff606768-2e8f-543d-abc5\",\"name\":\"tRNA threonylcarbamoyladenosine \",\"description\":\"UniProt recommended short name\",\"source\":\"UniProt\"},\"nullCount\":{\"entity_id\":0,\"name\":0,\"description\":0,\"source\":0}}"},{"path":"part-00005-44f8dfca-d47a-4963-8d9e-094264fc524f-c000.snappy.parquet","partitionValues":{},"size":36652,"modificationTime":1766549219728,"dataChange":false,"stats":"{\"numRecords\":2048,\"minValues\":{\"entity_id\":\"cdm_prot_00134cb8-e497-5d40-b662\",\"name\":\"2-phospho-D-glycerate hydro-lyas\",\"description\":\"UniProt alternative full name\",\"source\":\"UniProt\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ff5f637d-e74b-5494-9ed8\",\"name\":\"tRNA-splicing endonuclease\",\"description\":\"UniProt recommended short name\",\"source\":\"UniProt\"},\"nullCount\":{\"entity_id\":0,\"name\":0,\"description\":0,\"source\":0}}"},{"path":"part-00001-a1a9ee73-730f-4e51-bad2-09ff9419a6fd-c000.snappy.parquet","partitionValues":{},"size":27763,"modificationTime":1766549239919,"dataChange":false,"stats":"{\"numRecords\":2048,\"minValues\":{\"entity_id\":\"cdm_prot_01c4c107-d76f-540b-a701\",\"name\":\"ArgRS\",\"description\":\"UniProt alternative full name\",\"source\":\"UniProt\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ff62a17a-c48e-5c25-b1ca\",\"name\":\"Valyl-tRNA synthetase\",\"description\":\"UniProt recommended full name\",\"source\":\"UniProt\"},\"nullCount\":{\"entity_id\":0,\"name\":0,\"description\":0,\"source\":0}}"},{"path":"part-00002-685e72ec-2862-40de-8cba-192b36e48b40-c000.snappy.parquet","partitionValues":{},"size":35740,"modificationTime":1766549234176,"dataChange":false,"stats":"{\"numRecords\":2048,\"minValues\":{\"entity_id\":\"cdm_prot_0061faab-bd88-562c-8a31\",\"name\":\"50S ribosomal protein L1\",\"description\":\"UniProt alternative full name\",\"source\":\"UniProt\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ff216b32-af2a-545f-8e61\",\"name\":\"Ribosomal protein ML6\",\"description\":\"UniProt recommended full name\",\"source\":\"UniProt\"},\"nullCount\":{\"entity_id\":0,\"name\":0,\"description\":0,\"source\":0}}"},{"path":"part-00007-5e1e889d-a46c-4994-ad50-d12aaaba1293-c000.snappy.parquet","partitionValues":{},"size":35667,"modificationTime":1766549219750,"dataChange":false,"stats":"{\"numRecords\":2240,\"minValues\":{\"entity_id\":\"cdm_prot_00625fdf-0992-5594-a800\",\"name\":\"(1->4)-alpha-D-glucan:maltose-1-\",\"description\":\"UniProt alternative full name\",\"source\":\"UniProt\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffcf5789-8bfa-5ca0-914e\",\"name\":\"p-GvpK\",\"description\":\"UniProt recommended short name\",\"source\":\"UniProt\"},\"nullCount\":{\"entity_id\":0,\"name\":0,\"description\":0,\"source\":0}}"},{"path":"part-00007-7069ff33-fcfe-411f-ac96-40cabfa3d8fe-c000.snappy.parquet","partitionValues":{},"size":41335,"modificationTime":1766549239922,"dataChange":false,"stats":"{\"numRecords\":1445,\"minValues\":{\"entity_id\":\"cdm_prot_0254412b-7072-5093-9d3c\",\"name\":\"Acylphosphatase-like protein MJ0\",\"description\":\"UniProt alternative full name\",\"source\":\"UniProt\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffd6bc2d-9fdc-5fb9-a121\",\"name\":\"Zn-ribbon RNA-binding protein\",\"description\":\"UniProt recommended short name\",\"source\":\"UniProt\"},\"nullCount\":{\"entity_id\":0,\"name\":0,\"description\":0,\"source\":0}}"},{"path":"part-00001-0aa26ab1-0f23-4f5f-bc76-68425d9bbc6f-c000.snappy.parquet","partitionValues":{},"size":35505,"modificationTime":1766549226721,"dataChange":false,"stats":"{\"numRecords\":2048,\"minValues\":{\"entity_id\":\"cdm_prot_00f7712b-a738-523c-aa68\",\"name\":\"3-hexulose-6-phosphate synthase\",\"description\":\"UniProt alternative full name\",\"source\":\"UniProt\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffa14ace-ea75-5cc6-86f2\",\"name\":\"eIF-5A\",\"description\":\"UniProt recommended short name\",\"source\":\"UniProt\"},\"nullCount\":{\"entity_id\":0,\"name\":0,\"description\":0,\"source\":0}}"},{"path":"part-00004-5cb8a571-8bfd-4c75-ba1b-40ca4d487f91-c000.snappy.parquet","partitionValues":{},"size":20049,"modificationTime":1766549239918,"dataChange":false,"stats":"{\"numRecords\":1024,\"minValues\":{\"entity_id\":\"cdm_prot_0007879e-35c8-5917-ad11\",\"name\":\"16S rRNA aminocarboxypropyltrans\",\"description\":\"UniProt alternative full name\",\"source\":\"UniProt\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffe66051-6a0a-55cc-81b2\",\"name\":\"tRNA-uridine isomerase D 1\",\"description\":\"UniProt recommended short name\",\"source\":\"UniProt\"},\"nullCount\":{\"entity_id\":0,\"name\":0,\"description\":0,\"source\":0}}"},{"path":"part-00002-6e534054-9141-43f6-900b-601663a927c2-c000.snappy.parquet","partitionValues":{},"size":42733,"modificationTime":1766549226726,"dataChange":false,"stats":"{\"numRecords\":3072,\"minValues\":{\"entity_id\":\"cdm_prot_00546ee8-4f73-5c85-8b07\",\"name\":\"(R)-2-methylmalate dehydratase\",\"description\":\"UniProt alternative full name\",\"source\":\"UniProt\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffce40a0-b890-5e5f-b455\",\"name\":\"tRNA threonylcarbamoyladenosine \",\"description\":\"UniProt recommended short name\",\"source\":\"UniProt\"},\"nullCount\":{\"entity_id\":0,\"name\":0,\"description\":0,\"source\":0}}"},{"path":"part-00005-0bee7588-c84d-4380-b866-5d69e8c776f6-c000.snappy.parquet","partitionValues":{},"size":57023,"modificationTime":1766549239932,"dataChange":false,"stats":"{\"numRecords\":2048,\"minValues\":{\"entity_id\":\"cdm_prot_0009897c-d1cd-572c-895e\",\"name\":\"2-oxoisovalerate ferredoxin redu\",\"description\":\"UniProt alternative full name\",\"source\":\"UniProt\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffe3617e-dce1-5614-ae51\",\"name\":\"[Protein ADP-ribosylglutamate] h\",\"description\":\"UniProt recommended short name\",\"source\":\"UniProt\"},\"nullCount\":{\"entity_id\":0,\"name\":0,\"description\":0,\"source\":0}}"},{"path":"part-00001-3c105c72-f00f-4c50-a1b8-91c556ea59a2-c000.snappy.parquet","partitionValues":{},"size":32390,"modificationTime":1766549234179,"dataChange":false,"stats":"{\"numRecords\":2048,\"minValues\":{\"entity_id\":\"cdm_prot_00365069-eeac-5442-8f11\",\"name\":\"2-iminobutanoate/2-iminopropanoa\",\"description\":\"UniProt alternative full name\",\"source\":\"UniProt\"},\"maxValues\":{\"entity_id\":\"cdm_prot_fe82106c-a4bc-5a9c-8159\",\"name\":\"Ribosomal protein 'A'\",\"description\":\"UniProt recommended short name\",\"source\":\"UniProt\"},\"nullCount\":{\"entity_id\":0,\"name\":0,\"description\":0,\"source\":0}}"},{"path":"part-00005-23bb4218-5ffa-434f-a6a1-92cfd47943c5-c000.snappy.parquet","partitionValues":{},"size":53097,"modificationTime":1766549226717,"dataChange":false,"stats":"{\"numRecords\":3072,\"minValues\":{\"entity_id\":\"cdm_prot_00957fe6-8bfd-5ecd-9b08\",\"name\":\"2-dehydropantoate 2-reductase\",\"description\":\"UniProt alternative full name\",\"source\":\"UniProt\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffdecc1a-6ee9-59b2-9c49\",\"name\":\"tRNA:m(5)C72 MTase\",\"description\":\"UniProt recommended short name\",\"source\":\"UniProt\"},\"nullCount\":{\"entity_id\":0,\"name\":0,\"description\":0,\"source\":0}}"}]} diff --git a/tests/data/uniprot_archaea/uniprot_db/names/_delta_log/00000000000000000004.json b/tests/data/uniprot_archaea/uniprot_db/names/_delta_log/00000000000000000004.json new file mode 100644 index 0000000..0985365 --- /dev/null +++ b/tests/data/uniprot_archaea/uniprot_db/names/_delta_log/00000000000000000004.json @@ -0,0 +1,9 @@ +{"commitInfo":{"timestamp":1766549239940,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":3,"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{"numFiles":"8","numOutputRows":"13733","numOutputBytes":"284225"},"engineInfo":"Apache-Spark/4.0.1 Delta-Lake/4.0.0","txnId":"474276a4-30e2-4f9c-87c7-22bc7f10ed57"}} +{"add":{"path":"part-00000-a2850c69-bf2e-4d70-b897-dd8587907e97-c000.snappy.parquet","partitionValues":{},"size":14492,"modificationTime":1766549239932,"dataChange":true,"stats":"{\"numRecords\":1024,\"minValues\":{\"entity_id\":\"cdm_prot_0003b7cb-bf13-5646-9978\",\"name\":\"AspRS\",\"description\":\"UniProt alternative full name\",\"source\":\"UniProt\"},\"maxValues\":{\"entity_id\":\"cdm_prot_fdc49789-52de-5a6c-95cc\",\"name\":\"Tk-pheRSA\",\"description\":\"UniProt recommended full name\",\"source\":\"UniProt\"},\"nullCount\":{\"entity_id\":0,\"name\":0,\"description\":0,\"source\":0}}"}} +{"add":{"path":"part-00001-a1a9ee73-730f-4e51-bad2-09ff9419a6fd-c000.snappy.parquet","partitionValues":{},"size":27763,"modificationTime":1766549239919,"dataChange":true,"stats":"{\"numRecords\":2048,\"minValues\":{\"entity_id\":\"cdm_prot_01c4c107-d76f-540b-a701\",\"name\":\"ArgRS\",\"description\":\"UniProt alternative full name\",\"source\":\"UniProt\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ff62a17a-c48e-5c25-b1ca\",\"name\":\"Valyl-tRNA synthetase\",\"description\":\"UniProt recommended full name\",\"source\":\"UniProt\"},\"nullCount\":{\"entity_id\":0,\"name\":0,\"description\":0,\"source\":0}}"}} +{"add":{"path":"part-00002-6650955d-faa1-43e3-a56c-51a85383c8b4-c000.snappy.parquet","partitionValues":{},"size":30410,"modificationTime":1766549239929,"dataChange":true,"stats":"{\"numRecords\":2048,\"minValues\":{\"entity_id\":\"cdm_prot_01672ce0-33eb-50c0-8bb3\",\"name\":\"2'cADPR synthase TcpO\",\"description\":\"UniProt alternative full name\",\"source\":\"UniProt\"},\"maxValues\":{\"entity_id\":\"cdm_prot_fffe9aba-bce9-56de-b269\",\"name\":\"tcpO\",\"description\":\"UniProt recommended short name\",\"source\":\"UniProt\"},\"nullCount\":{\"entity_id\":0,\"name\":0,\"description\":0,\"source\":0}}"}} +{"add":{"path":"part-00003-a0e33fc3-4700-43ee-9c85-4b0c932fb35b-c000.snappy.parquet","partitionValues":{},"size":34836,"modificationTime":1766549239938,"dataChange":true,"stats":"{\"numRecords\":2048,\"minValues\":{\"entity_id\":\"cdm_prot_0185e048-25c8-50dd-bfbf\",\"name\":\"(Pab)Trm-G10\",\"description\":\"UniProt alternative full name\",\"source\":\"UniProt\"},\"maxValues\":{\"entity_id\":\"cdm_prot_fe66808c-aab1-5f12-970e\",\"name\":\"tRNA:m2G6 methyltransferase\",\"description\":\"UniProt recommended short name\",\"source\":\"UniProt\"},\"nullCount\":{\"entity_id\":0,\"name\":0,\"description\":0,\"source\":0}}"}} +{"add":{"path":"part-00004-5cb8a571-8bfd-4c75-ba1b-40ca4d487f91-c000.snappy.parquet","partitionValues":{},"size":20049,"modificationTime":1766549239918,"dataChange":true,"stats":"{\"numRecords\":1024,\"minValues\":{\"entity_id\":\"cdm_prot_0007879e-35c8-5917-ad11\",\"name\":\"16S rRNA aminocarboxypropyltrans\",\"description\":\"UniProt alternative full name\",\"source\":\"UniProt\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffe66051-6a0a-55cc-81b2\",\"name\":\"tRNA-uridine isomerase D 1\",\"description\":\"UniProt recommended short name\",\"source\":\"UniProt\"},\"nullCount\":{\"entity_id\":0,\"name\":0,\"description\":0,\"source\":0}}"}} +{"add":{"path":"part-00005-0bee7588-c84d-4380-b866-5d69e8c776f6-c000.snappy.parquet","partitionValues":{},"size":57023,"modificationTime":1766549239932,"dataChange":true,"stats":"{\"numRecords\":2048,\"minValues\":{\"entity_id\":\"cdm_prot_0009897c-d1cd-572c-895e\",\"name\":\"2-oxoisovalerate ferredoxin redu\",\"description\":\"UniProt alternative full name\",\"source\":\"UniProt\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffe3617e-dce1-5614-ae51\",\"name\":\"[Protein ADP-ribosylglutamate] h\",\"description\":\"UniProt recommended short name\",\"source\":\"UniProt\"},\"nullCount\":{\"entity_id\":0,\"name\":0,\"description\":0,\"source\":0}}"}} +{"add":{"path":"part-00006-60bba51f-9140-4560-9e5c-89997d480d4c-c000.snappy.parquet","partitionValues":{},"size":58317,"modificationTime":1766549239936,"dataChange":true,"stats":"{\"numRecords\":2048,\"minValues\":{\"entity_id\":\"cdm_prot_00666a93-0a4c-5498-8cac\",\"name\":\"ATP pyrophosphate-lyase\",\"description\":\"UniProt alternative full name\",\"source\":\"UniProt\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffb5b181-1633-55f5-83ee\",\"name\":\"Zinc metalloprotease MJ0392\",\"description\":\"UniProt recommended short name\",\"source\":\"UniProt\"},\"nullCount\":{\"entity_id\":0,\"name\":0,\"description\":0,\"source\":0}}"}} +{"add":{"path":"part-00007-7069ff33-fcfe-411f-ac96-40cabfa3d8fe-c000.snappy.parquet","partitionValues":{},"size":41335,"modificationTime":1766549239922,"dataChange":true,"stats":"{\"numRecords\":1445,\"minValues\":{\"entity_id\":\"cdm_prot_0254412b-7072-5093-9d3c\",\"name\":\"Acylphosphatase-like protein MJ0\",\"description\":\"UniProt alternative full name\",\"source\":\"UniProt\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffd6bc2d-9fdc-5fb9-a121\",\"name\":\"Zn-ribbon RNA-binding protein\",\"description\":\"UniProt recommended short name\",\"source\":\"UniProt\"},\"nullCount\":{\"entity_id\":0,\"name\":0,\"description\":0,\"source\":0}}"}} diff --git a/tests/data/uniprot_archaea/uniprot_db/names/part-00000-296a81be-f30f-423c-9574-e8f006ae0fdc-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/names/part-00000-296a81be-f30f-423c-9574-e8f006ae0fdc-c000.snappy.parquet new file mode 100644 index 0000000..a2a2508 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/names/part-00000-296a81be-f30f-423c-9574-e8f006ae0fdc-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/names/part-00000-7fe49477-1c19-4e2f-a85c-d935eef12414-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/names/part-00000-7fe49477-1c19-4e2f-a85c-d935eef12414-c000.snappy.parquet new file mode 100644 index 0000000..7d280a8 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/names/part-00000-7fe49477-1c19-4e2f-a85c-d935eef12414-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/names/part-00000-a2850c69-bf2e-4d70-b897-dd8587907e97-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/names/part-00000-a2850c69-bf2e-4d70-b897-dd8587907e97-c000.snappy.parquet new file mode 100644 index 0000000..e27fcb4 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/names/part-00000-a2850c69-bf2e-4d70-b897-dd8587907e97-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/names/part-00000-f44e402f-1ebd-4d1d-8142-a3d70083c403-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/names/part-00000-f44e402f-1ebd-4d1d-8142-a3d70083c403-c000.snappy.parquet new file mode 100644 index 0000000..5a6532e Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/names/part-00000-f44e402f-1ebd-4d1d-8142-a3d70083c403-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/names/part-00001-0aa26ab1-0f23-4f5f-bc76-68425d9bbc6f-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/names/part-00001-0aa26ab1-0f23-4f5f-bc76-68425d9bbc6f-c000.snappy.parquet new file mode 100644 index 0000000..9926ba1 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/names/part-00001-0aa26ab1-0f23-4f5f-bc76-68425d9bbc6f-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/names/part-00001-3b2d9b31-38b4-4f6c-91b0-10b1bd882505-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/names/part-00001-3b2d9b31-38b4-4f6c-91b0-10b1bd882505-c000.snappy.parquet new file mode 100644 index 0000000..e18b580 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/names/part-00001-3b2d9b31-38b4-4f6c-91b0-10b1bd882505-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/names/part-00001-3c105c72-f00f-4c50-a1b8-91c556ea59a2-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/names/part-00001-3c105c72-f00f-4c50-a1b8-91c556ea59a2-c000.snappy.parquet new file mode 100644 index 0000000..bb42f29 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/names/part-00001-3c105c72-f00f-4c50-a1b8-91c556ea59a2-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/names/part-00001-a1a9ee73-730f-4e51-bad2-09ff9419a6fd-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/names/part-00001-a1a9ee73-730f-4e51-bad2-09ff9419a6fd-c000.snappy.parquet new file mode 100644 index 0000000..294763c Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/names/part-00001-a1a9ee73-730f-4e51-bad2-09ff9419a6fd-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/names/part-00002-6650955d-faa1-43e3-a56c-51a85383c8b4-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/names/part-00002-6650955d-faa1-43e3-a56c-51a85383c8b4-c000.snappy.parquet new file mode 100644 index 0000000..e13e8cf Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/names/part-00002-6650955d-faa1-43e3-a56c-51a85383c8b4-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/names/part-00002-685e72ec-2862-40de-8cba-192b36e48b40-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/names/part-00002-685e72ec-2862-40de-8cba-192b36e48b40-c000.snappy.parquet new file mode 100644 index 0000000..ec2163d Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/names/part-00002-685e72ec-2862-40de-8cba-192b36e48b40-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/names/part-00002-6e534054-9141-43f6-900b-601663a927c2-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/names/part-00002-6e534054-9141-43f6-900b-601663a927c2-c000.snappy.parquet new file mode 100644 index 0000000..4125aab Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/names/part-00002-6e534054-9141-43f6-900b-601663a927c2-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/names/part-00002-a27a104f-dd3a-4fd5-98d7-e032b14d238f-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/names/part-00002-a27a104f-dd3a-4fd5-98d7-e032b14d238f-c000.snappy.parquet new file mode 100644 index 0000000..91883b1 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/names/part-00002-a27a104f-dd3a-4fd5-98d7-e032b14d238f-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/names/part-00003-54f2d4b4-5ff7-4fcf-9373-8ce4423a8d11-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/names/part-00003-54f2d4b4-5ff7-4fcf-9373-8ce4423a8d11-c000.snappy.parquet new file mode 100644 index 0000000..345b61d Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/names/part-00003-54f2d4b4-5ff7-4fcf-9373-8ce4423a8d11-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/names/part-00003-a0e33fc3-4700-43ee-9c85-4b0c932fb35b-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/names/part-00003-a0e33fc3-4700-43ee-9c85-4b0c932fb35b-c000.snappy.parquet new file mode 100644 index 0000000..e3b8674 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/names/part-00003-a0e33fc3-4700-43ee-9c85-4b0c932fb35b-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/names/part-00003-a378c2f4-d615-4c49-8777-9a0d2b980e52-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/names/part-00003-a378c2f4-d615-4c49-8777-9a0d2b980e52-c000.snappy.parquet new file mode 100644 index 0000000..1a2f42c Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/names/part-00003-a378c2f4-d615-4c49-8777-9a0d2b980e52-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/names/part-00003-a69106e5-54a9-4eb0-88f7-958856dbb6c4-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/names/part-00003-a69106e5-54a9-4eb0-88f7-958856dbb6c4-c000.snappy.parquet new file mode 100644 index 0000000..2280a97 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/names/part-00003-a69106e5-54a9-4eb0-88f7-958856dbb6c4-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/names/part-00004-5cb8a571-8bfd-4c75-ba1b-40ca4d487f91-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/names/part-00004-5cb8a571-8bfd-4c75-ba1b-40ca4d487f91-c000.snappy.parquet new file mode 100644 index 0000000..05bdd10 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/names/part-00004-5cb8a571-8bfd-4c75-ba1b-40ca4d487f91-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/names/part-00004-68220f37-23c4-4bdc-a763-9f588a9536c4-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/names/part-00004-68220f37-23c4-4bdc-a763-9f588a9536c4-c000.snappy.parquet new file mode 100644 index 0000000..a8b5c14 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/names/part-00004-68220f37-23c4-4bdc-a763-9f588a9536c4-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/names/part-00004-adba1bb2-356b-4c2f-a77e-966c8b9d6735-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/names/part-00004-adba1bb2-356b-4c2f-a77e-966c8b9d6735-c000.snappy.parquet new file mode 100644 index 0000000..ca6e791 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/names/part-00004-adba1bb2-356b-4c2f-a77e-966c8b9d6735-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/names/part-00004-f042aa17-bd0f-42fe-bbfa-c49c508eb8d1-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/names/part-00004-f042aa17-bd0f-42fe-bbfa-c49c508eb8d1-c000.snappy.parquet new file mode 100644 index 0000000..da7ceff Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/names/part-00004-f042aa17-bd0f-42fe-bbfa-c49c508eb8d1-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/names/part-00005-0bee7588-c84d-4380-b866-5d69e8c776f6-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/names/part-00005-0bee7588-c84d-4380-b866-5d69e8c776f6-c000.snappy.parquet new file mode 100644 index 0000000..f1e99af Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/names/part-00005-0bee7588-c84d-4380-b866-5d69e8c776f6-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/names/part-00005-23bb4218-5ffa-434f-a6a1-92cfd47943c5-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/names/part-00005-23bb4218-5ffa-434f-a6a1-92cfd47943c5-c000.snappy.parquet new file mode 100644 index 0000000..dce0356 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/names/part-00005-23bb4218-5ffa-434f-a6a1-92cfd47943c5-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/names/part-00005-2deeee12-d25b-4365-b10f-babb76fbefb7-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/names/part-00005-2deeee12-d25b-4365-b10f-babb76fbefb7-c000.snappy.parquet new file mode 100644 index 0000000..b53aecb Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/names/part-00005-2deeee12-d25b-4365-b10f-babb76fbefb7-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/names/part-00005-44f8dfca-d47a-4963-8d9e-094264fc524f-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/names/part-00005-44f8dfca-d47a-4963-8d9e-094264fc524f-c000.snappy.parquet new file mode 100644 index 0000000..532ec16 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/names/part-00005-44f8dfca-d47a-4963-8d9e-094264fc524f-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/names/part-00006-3d33a697-816a-4bad-bd2a-e51ffd964bec-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/names/part-00006-3d33a697-816a-4bad-bd2a-e51ffd964bec-c000.snappy.parquet new file mode 100644 index 0000000..36c182a Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/names/part-00006-3d33a697-816a-4bad-bd2a-e51ffd964bec-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/names/part-00006-3e8fac6d-c54c-48b0-b2eb-fe99c2d39149-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/names/part-00006-3e8fac6d-c54c-48b0-b2eb-fe99c2d39149-c000.snappy.parquet new file mode 100644 index 0000000..cffd359 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/names/part-00006-3e8fac6d-c54c-48b0-b2eb-fe99c2d39149-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/names/part-00006-60bba51f-9140-4560-9e5c-89997d480d4c-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/names/part-00006-60bba51f-9140-4560-9e5c-89997d480d4c-c000.snappy.parquet new file mode 100644 index 0000000..bda9df1 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/names/part-00006-60bba51f-9140-4560-9e5c-89997d480d4c-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/names/part-00006-680bfd0a-30aa-4bbe-b288-347d51725fb6-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/names/part-00006-680bfd0a-30aa-4bbe-b288-347d51725fb6-c000.snappy.parquet new file mode 100644 index 0000000..004ea59 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/names/part-00006-680bfd0a-30aa-4bbe-b288-347d51725fb6-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/names/part-00007-5e1e889d-a46c-4994-ad50-d12aaaba1293-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/names/part-00007-5e1e889d-a46c-4994-ad50-d12aaaba1293-c000.snappy.parquet new file mode 100644 index 0000000..a88801e Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/names/part-00007-5e1e889d-a46c-4994-ad50-d12aaaba1293-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/names/part-00007-7069ff33-fcfe-411f-ac96-40cabfa3d8fe-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/names/part-00007-7069ff33-fcfe-411f-ac96-40cabfa3d8fe-c000.snappy.parquet new file mode 100644 index 0000000..7db384e Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/names/part-00007-7069ff33-fcfe-411f-ac96-40cabfa3d8fe-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/names/part-00007-b260351a-af1f-49f5-be1f-9f27441707d2-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/names/part-00007-b260351a-af1f-49f5-be1f-9f27441707d2-c000.snappy.parquet new file mode 100644 index 0000000..941a120 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/names/part-00007-b260351a-af1f-49f5-be1f-9f27441707d2-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/names/part-00007-b32af234-9734-4c34-88ec-1c9c77fdb751-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/names/part-00007-b32af234-9734-4c34-88ec-1c9c77fdb751-c000.snappy.parquet new file mode 100644 index 0000000..9523d84 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/names/part-00007-b32af234-9734-4c34-88ec-1c9c77fdb751-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/proteins/.part-00000-207a5aa7-b459-46f4-8f11-9a0016306790-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/proteins/.part-00000-207a5aa7-b459-46f4-8f11-9a0016306790-c000.snappy.parquet.crc new file mode 100644 index 0000000..0414cb9 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/proteins/.part-00000-207a5aa7-b459-46f4-8f11-9a0016306790-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/proteins/.part-00000-9280d073-b085-4be8-85ed-536df50f62da-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/proteins/.part-00000-9280d073-b085-4be8-85ed-536df50f62da-c000.snappy.parquet.crc new file mode 100644 index 0000000..0fc88c5 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/proteins/.part-00000-9280d073-b085-4be8-85ed-536df50f62da-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/proteins/.part-00000-b4de8d2f-f466-4990-9d63-a326716cbea4-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/proteins/.part-00000-b4de8d2f-f466-4990-9d63-a326716cbea4-c000.snappy.parquet.crc new file mode 100644 index 0000000..2158b40 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/proteins/.part-00000-b4de8d2f-f466-4990-9d63-a326716cbea4-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/proteins/.part-00000-ca8a05fd-be38-4a6e-83d5-5c50f3c51145-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/proteins/.part-00000-ca8a05fd-be38-4a6e-83d5-5c50f3c51145-c000.snappy.parquet.crc new file mode 100644 index 0000000..76af17b Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/proteins/.part-00000-ca8a05fd-be38-4a6e-83d5-5c50f3c51145-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/proteins/.part-00001-14de6dd7-de07-482f-be2f-bc12adc49f95-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/proteins/.part-00001-14de6dd7-de07-482f-be2f-bc12adc49f95-c000.snappy.parquet.crc new file mode 100644 index 0000000..4d57769 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/proteins/.part-00001-14de6dd7-de07-482f-be2f-bc12adc49f95-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/proteins/.part-00001-7a1679d7-407a-42ee-abba-19074356776e-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/proteins/.part-00001-7a1679d7-407a-42ee-abba-19074356776e-c000.snappy.parquet.crc new file mode 100644 index 0000000..9177de4 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/proteins/.part-00001-7a1679d7-407a-42ee-abba-19074356776e-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/proteins/.part-00001-bd46662b-bc66-4729-afd0-35417fd10fc1-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/proteins/.part-00001-bd46662b-bc66-4729-afd0-35417fd10fc1-c000.snappy.parquet.crc new file mode 100644 index 0000000..a32d640 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/proteins/.part-00001-bd46662b-bc66-4729-afd0-35417fd10fc1-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/proteins/.part-00001-d12b1474-5792-422a-a98f-7503233261c9-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/proteins/.part-00001-d12b1474-5792-422a-a98f-7503233261c9-c000.snappy.parquet.crc new file mode 100644 index 0000000..4996738 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/proteins/.part-00001-d12b1474-5792-422a-a98f-7503233261c9-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/proteins/.part-00002-6291644e-f5b3-45af-81c9-ceb443b13c28-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/proteins/.part-00002-6291644e-f5b3-45af-81c9-ceb443b13c28-c000.snappy.parquet.crc new file mode 100644 index 0000000..30bcb6c Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/proteins/.part-00002-6291644e-f5b3-45af-81c9-ceb443b13c28-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/proteins/.part-00002-87a46f98-c10e-487d-b09c-260ae5569b04-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/proteins/.part-00002-87a46f98-c10e-487d-b09c-260ae5569b04-c000.snappy.parquet.crc new file mode 100644 index 0000000..db98d80 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/proteins/.part-00002-87a46f98-c10e-487d-b09c-260ae5569b04-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/proteins/.part-00002-c881f350-4b6e-449b-b6e2-2544a424c9bc-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/proteins/.part-00002-c881f350-4b6e-449b-b6e2-2544a424c9bc-c000.snappy.parquet.crc new file mode 100644 index 0000000..db7f4f9 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/proteins/.part-00002-c881f350-4b6e-449b-b6e2-2544a424c9bc-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/proteins/.part-00002-f9cbb606-eb09-4474-bd73-334b02a5e53c-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/proteins/.part-00002-f9cbb606-eb09-4474-bd73-334b02a5e53c-c000.snappy.parquet.crc new file mode 100644 index 0000000..cdfe0ca Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/proteins/.part-00002-f9cbb606-eb09-4474-bd73-334b02a5e53c-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/proteins/.part-00003-261ceeac-3a67-4da2-a828-81308d60d332-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/proteins/.part-00003-261ceeac-3a67-4da2-a828-81308d60d332-c000.snappy.parquet.crc new file mode 100644 index 0000000..a00c3e2 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/proteins/.part-00003-261ceeac-3a67-4da2-a828-81308d60d332-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/proteins/.part-00003-58a28129-ebb1-40c8-9a7e-78c52cbe7a39-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/proteins/.part-00003-58a28129-ebb1-40c8-9a7e-78c52cbe7a39-c000.snappy.parquet.crc new file mode 100644 index 0000000..03023a2 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/proteins/.part-00003-58a28129-ebb1-40c8-9a7e-78c52cbe7a39-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/proteins/.part-00003-9ceb2e14-936d-4777-923a-c7586d84aff3-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/proteins/.part-00003-9ceb2e14-936d-4777-923a-c7586d84aff3-c000.snappy.parquet.crc new file mode 100644 index 0000000..4036e75 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/proteins/.part-00003-9ceb2e14-936d-4777-923a-c7586d84aff3-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/proteins/.part-00003-cc92fe22-58d3-46d4-863e-0062067ebd2d-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/proteins/.part-00003-cc92fe22-58d3-46d4-863e-0062067ebd2d-c000.snappy.parquet.crc new file mode 100644 index 0000000..6899e91 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/proteins/.part-00003-cc92fe22-58d3-46d4-863e-0062067ebd2d-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/proteins/.part-00004-0cd337df-6b78-40e1-9e72-4d902a79f0b3-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/proteins/.part-00004-0cd337df-6b78-40e1-9e72-4d902a79f0b3-c000.snappy.parquet.crc new file mode 100644 index 0000000..d4d4667 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/proteins/.part-00004-0cd337df-6b78-40e1-9e72-4d902a79f0b3-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/proteins/.part-00004-159d2ae9-d5d1-40ef-8b8c-e8462f482af1-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/proteins/.part-00004-159d2ae9-d5d1-40ef-8b8c-e8462f482af1-c000.snappy.parquet.crc new file mode 100644 index 0000000..1ca3439 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/proteins/.part-00004-159d2ae9-d5d1-40ef-8b8c-e8462f482af1-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/proteins/.part-00004-3dcf6645-ae1c-4467-863d-85acc844f886-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/proteins/.part-00004-3dcf6645-ae1c-4467-863d-85acc844f886-c000.snappy.parquet.crc new file mode 100644 index 0000000..04cc09f Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/proteins/.part-00004-3dcf6645-ae1c-4467-863d-85acc844f886-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/proteins/.part-00004-638e48aa-2700-4992-be7a-7d56903efbec-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/proteins/.part-00004-638e48aa-2700-4992-be7a-7d56903efbec-c000.snappy.parquet.crc new file mode 100644 index 0000000..c03bdba Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/proteins/.part-00004-638e48aa-2700-4992-be7a-7d56903efbec-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/proteins/.part-00005-5052056a-b12d-4506-8172-512fb3e152c7-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/proteins/.part-00005-5052056a-b12d-4506-8172-512fb3e152c7-c000.snappy.parquet.crc new file mode 100644 index 0000000..25eeeb1 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/proteins/.part-00005-5052056a-b12d-4506-8172-512fb3e152c7-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/proteins/.part-00005-831e6b90-0dae-4d62-a116-f1d7b20d680e-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/proteins/.part-00005-831e6b90-0dae-4d62-a116-f1d7b20d680e-c000.snappy.parquet.crc new file mode 100644 index 0000000..415b24f Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/proteins/.part-00005-831e6b90-0dae-4d62-a116-f1d7b20d680e-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/proteins/.part-00005-e1178ae8-d3d7-4662-a488-6f40c9f9eebf-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/proteins/.part-00005-e1178ae8-d3d7-4662-a488-6f40c9f9eebf-c000.snappy.parquet.crc new file mode 100644 index 0000000..e779190 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/proteins/.part-00005-e1178ae8-d3d7-4662-a488-6f40c9f9eebf-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/proteins/.part-00005-ed12e42c-224f-4e76-9a05-3aa1e3f6758a-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/proteins/.part-00005-ed12e42c-224f-4e76-9a05-3aa1e3f6758a-c000.snappy.parquet.crc new file mode 100644 index 0000000..9f8cdb3 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/proteins/.part-00005-ed12e42c-224f-4e76-9a05-3aa1e3f6758a-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/proteins/.part-00006-411b6dd6-7451-4814-9649-b1a37992bad6-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/proteins/.part-00006-411b6dd6-7451-4814-9649-b1a37992bad6-c000.snappy.parquet.crc new file mode 100644 index 0000000..2af3196 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/proteins/.part-00006-411b6dd6-7451-4814-9649-b1a37992bad6-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/proteins/.part-00006-75fa702b-36c2-40fb-a563-65206a6e1827-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/proteins/.part-00006-75fa702b-36c2-40fb-a563-65206a6e1827-c000.snappy.parquet.crc new file mode 100644 index 0000000..b4bd5e0 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/proteins/.part-00006-75fa702b-36c2-40fb-a563-65206a6e1827-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/proteins/.part-00006-c1bdb3ab-0e57-4470-8330-b13826aab8c6-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/proteins/.part-00006-c1bdb3ab-0e57-4470-8330-b13826aab8c6-c000.snappy.parquet.crc new file mode 100644 index 0000000..15b681d Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/proteins/.part-00006-c1bdb3ab-0e57-4470-8330-b13826aab8c6-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/proteins/.part-00006-e6e3deb7-a199-4e40-944a-6b52a9c809bb-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/proteins/.part-00006-e6e3deb7-a199-4e40-944a-6b52a9c809bb-c000.snappy.parquet.crc new file mode 100644 index 0000000..06e97bd Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/proteins/.part-00006-e6e3deb7-a199-4e40-944a-6b52a9c809bb-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/proteins/.part-00007-1ca7b106-08f4-4232-8b29-bf918a55a1d2-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/proteins/.part-00007-1ca7b106-08f4-4232-8b29-bf918a55a1d2-c000.snappy.parquet.crc new file mode 100644 index 0000000..20a1414 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/proteins/.part-00007-1ca7b106-08f4-4232-8b29-bf918a55a1d2-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/proteins/.part-00007-59fe7ec2-82c3-4010-b399-2192a7f6a694-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/proteins/.part-00007-59fe7ec2-82c3-4010-b399-2192a7f6a694-c000.snappy.parquet.crc new file mode 100644 index 0000000..f85f8a4 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/proteins/.part-00007-59fe7ec2-82c3-4010-b399-2192a7f6a694-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/proteins/.part-00007-5c63ea60-2077-47ac-8a07-bc4d52e24659-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/proteins/.part-00007-5c63ea60-2077-47ac-8a07-bc4d52e24659-c000.snappy.parquet.crc new file mode 100644 index 0000000..25ce9d1 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/proteins/.part-00007-5c63ea60-2077-47ac-8a07-bc4d52e24659-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/proteins/.part-00007-d00f5a40-677b-43fe-9fa8-949fc983f149-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/proteins/.part-00007-d00f5a40-677b-43fe-9fa8-949fc983f149-c000.snappy.parquet.crc new file mode 100644 index 0000000..bfce075 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/proteins/.part-00007-d00f5a40-677b-43fe-9fa8-949fc983f149-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/proteins/_delta_log/.00000000000000000000.crc.crc b/tests/data/uniprot_archaea/uniprot_db/proteins/_delta_log/.00000000000000000000.crc.crc new file mode 100644 index 0000000..1904d78 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/proteins/_delta_log/.00000000000000000000.crc.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/proteins/_delta_log/.00000000000000000000.json.crc b/tests/data/uniprot_archaea/uniprot_db/proteins/_delta_log/.00000000000000000000.json.crc new file mode 100644 index 0000000..d202d38 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/proteins/_delta_log/.00000000000000000000.json.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/proteins/_delta_log/.00000000000000000001.crc.crc b/tests/data/uniprot_archaea/uniprot_db/proteins/_delta_log/.00000000000000000001.crc.crc new file mode 100644 index 0000000..88c3c59 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/proteins/_delta_log/.00000000000000000001.crc.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/proteins/_delta_log/.00000000000000000001.json.crc b/tests/data/uniprot_archaea/uniprot_db/proteins/_delta_log/.00000000000000000001.json.crc new file mode 100644 index 0000000..bdc97ad Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/proteins/_delta_log/.00000000000000000001.json.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/proteins/_delta_log/.00000000000000000002.crc.crc b/tests/data/uniprot_archaea/uniprot_db/proteins/_delta_log/.00000000000000000002.crc.crc new file mode 100644 index 0000000..57f81db Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/proteins/_delta_log/.00000000000000000002.crc.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/proteins/_delta_log/.00000000000000000002.json.crc b/tests/data/uniprot_archaea/uniprot_db/proteins/_delta_log/.00000000000000000002.json.crc new file mode 100644 index 0000000..800e2a5 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/proteins/_delta_log/.00000000000000000002.json.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/proteins/_delta_log/.00000000000000000003.crc.crc b/tests/data/uniprot_archaea/uniprot_db/proteins/_delta_log/.00000000000000000003.crc.crc new file mode 100644 index 0000000..4416308 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/proteins/_delta_log/.00000000000000000003.crc.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/proteins/_delta_log/.00000000000000000003.json.crc b/tests/data/uniprot_archaea/uniprot_db/proteins/_delta_log/.00000000000000000003.json.crc new file mode 100644 index 0000000..fffd85d Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/proteins/_delta_log/.00000000000000000003.json.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/proteins/_delta_log/.00000000000000000004.crc.crc b/tests/data/uniprot_archaea/uniprot_db/proteins/_delta_log/.00000000000000000004.crc.crc new file mode 100644 index 0000000..25e9c40 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/proteins/_delta_log/.00000000000000000004.crc.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/proteins/_delta_log/.00000000000000000004.json.crc b/tests/data/uniprot_archaea/uniprot_db/proteins/_delta_log/.00000000000000000004.json.crc new file mode 100644 index 0000000..19ca47c Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/proteins/_delta_log/.00000000000000000004.json.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/proteins/_delta_log/00000000000000000000.crc b/tests/data/uniprot_archaea/uniprot_db/proteins/_delta_log/00000000000000000000.crc new file mode 100644 index 0000000..e5af31c --- /dev/null +++ b/tests/data/uniprot_archaea/uniprot_db/proteins/_delta_log/00000000000000000000.crc @@ -0,0 +1 @@ +{"txnId":"f388a99f-68e0-4d03-bb65-73143a71106f","tableSizeBytes":0,"numFiles":0,"numMetadata":1,"numProtocol":1,"setTransactions":[],"domainMetadata":[],"metadata":{"id":"cf52e24b-c8bf-4037-bc85-ecc2167e63b8","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[]}","partitionColumns":[],"configuration":{},"createdTime":1766549213818},"protocol":{"minReaderVersion":1,"minWriterVersion":2},"allFiles":[]} diff --git a/tests/data/uniprot_archaea/uniprot_db/proteins/_delta_log/00000000000000000000.json b/tests/data/uniprot_archaea/uniprot_db/proteins/_delta_log/00000000000000000000.json new file mode 100644 index 0000000..707ec1a --- /dev/null +++ b/tests/data/uniprot_archaea/uniprot_db/proteins/_delta_log/00000000000000000000.json @@ -0,0 +1,3 @@ +{"commitInfo":{"timestamp":1766549213854,"operation":"CREATE TABLE","operationParameters":{"partitionBy":"[]","clusterBy":"[]","description":null,"isManaged":"false","properties":"{}"},"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{},"engineInfo":"Apache-Spark/4.0.1 Delta-Lake/4.0.0","txnId":"f388a99f-68e0-4d03-bb65-73143a71106f"}} +{"metaData":{"id":"cf52e24b-c8bf-4037-bc85-ecc2167e63b8","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[]}","partitionColumns":[],"configuration":{},"createdTime":1766549213818}} +{"protocol":{"minReaderVersion":1,"minWriterVersion":2}} diff --git a/tests/data/uniprot_archaea/uniprot_db/proteins/_delta_log/00000000000000000001.crc b/tests/data/uniprot_archaea/uniprot_db/proteins/_delta_log/00000000000000000001.crc new file mode 100644 index 0000000..09da987 --- /dev/null +++ b/tests/data/uniprot_archaea/uniprot_db/proteins/_delta_log/00000000000000000001.crc @@ -0,0 +1 @@ +{"txnId":"ea94ed6e-a891-489a-acf3-34d08616b66a","tableSizeBytes":1708888,"numFiles":8,"numMetadata":1,"numProtocol":1,"setTransactions":[],"domainMetadata":[],"metadata":{"id":"cf52e24b-c8bf-4037-bc85-ecc2167e63b8","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"protein_id\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"ec_numbers\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"evidence_for_existence\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"length\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"mass\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"checksum\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"modified\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"sequence_version\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"sequence\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"entry_modified\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{},"createdTime":1766549213818},"protocol":{"minReaderVersion":1,"minWriterVersion":2}} diff --git a/tests/data/uniprot_archaea/uniprot_db/proteins/_delta_log/00000000000000000001.json b/tests/data/uniprot_archaea/uniprot_db/proteins/_delta_log/00000000000000000001.json new file mode 100644 index 0000000..ba34fbb --- /dev/null +++ b/tests/data/uniprot_archaea/uniprot_db/proteins/_delta_log/00000000000000000001.json @@ -0,0 +1,10 @@ +{"commitInfo":{"timestamp":1766549220193,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":0,"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{"numFiles":"8","numOutputRows":"5000","numOutputBytes":"1708888"},"engineInfo":"Apache-Spark/4.0.1 Delta-Lake/4.0.0","txnId":"ea94ed6e-a891-489a-acf3-34d08616b66a"}} +{"metaData":{"id":"cf52e24b-c8bf-4037-bc85-ecc2167e63b8","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"protein_id\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"ec_numbers\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"evidence_for_existence\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"length\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"mass\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"checksum\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"modified\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"sequence_version\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"sequence\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"entry_modified\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{},"createdTime":1766549213818}} +{"add":{"path":"part-00000-b4de8d2f-f466-4990-9d63-a326716cbea4-c000.snappy.parquet","partitionValues":{},"size":221094,"modificationTime":1766549220177,"dataChange":true,"stats":"{\"numRecords\":625,\"minValues\":{\"protein_id\":\"cdm_prot_00036120-577c-5bbc-ab91\",\"ec_numbers\":\"1.1.1.-\",\"evidence_for_existence\":\"evidence at protein level\",\"length\":\"100\",\"mass\":\"10015\",\"checksum\":\"002B834BC5F9D6AD\",\"modified\":\"1991-02-01\",\"sequence_version\":\"1\",\"sequence\":\"MAAESQLKRVIETLRRLGIEEVLKLERRDPQY\",\"entry_modified\":\"2024-11-27\"},\"maxValues\":{\"protein_id\":\"cdm_prot_ffa4eefe-b062-5f26-8f77\",\"ec_numbers\":\"7.1.2.2\",\"evidence_for_existence\":\"inferred from homology\",\"length\":\"990\",\"mass\":\"9995\",\"checksum\":\"FF95A3D3020413CC\",\"modified\":\"2025-04-09\",\"sequence_version\":\"4\",\"sequence\":\"NTNETKVRFTGETAKIGVSLEMLGRIFNGAGK\",\"entry_modified\":\"2025-06-18\"},\"nullCount\":{\"protein_id\":0,\"ec_numbers\":320,\"evidence_for_existence\":0,\"length\":0,\"mass\":0,\"checksum\":0,\"modified\":0,\"sequence_version\":0,\"sequence\":0,\"entry_modified\":0}}"}} +{"add":{"path":"part-00001-bd46662b-bc66-4729-afd0-35417fd10fc1-c000.snappy.parquet","partitionValues":{},"size":194687,"modificationTime":1766549220169,"dataChange":true,"stats":"{\"numRecords\":625,\"minValues\":{\"protein_id\":\"cdm_prot_000e0a71-14e8-5d5e-8144\",\"ec_numbers\":\"1.1.1.25\",\"evidence_for_existence\":\"evidence at protein level\",\"length\":\"103\",\"mass\":\"10987\",\"checksum\":\"00041F603DFDD5A3\",\"modified\":\"1990-01-01\",\"sequence_version\":\"1\",\"sequence\":\"MAAIIVHGGAGTIRKEERIPKVIEGVREAVLA\",\"entry_modified\":\"2024-07-24\"},\"maxValues\":{\"protein_id\":\"cdm_prot_ffed2e1d-fe14-5556-b067\",\"ec_numbers\":\"7.3.2.7\",\"evidence_for_existence\":\"predicted\",\"length\":\"815\",\"mass\":\"90860\",\"checksum\":\"FFB8BC932E9E15EC\",\"modified\":\"2025-02-05\",\"sequence_version\":\"4\",\"sequence\":\"MYTMELRFIRGGVCAVDGVLAAGCREGKYGVG\",\"entry_modified\":\"2025-06-18\"},\"nullCount\":{\"protein_id\":0,\"ec_numbers\":58,\"evidence_for_existence\":0,\"length\":0,\"mass\":0,\"checksum\":0,\"modified\":0,\"sequence_version\":0,\"sequence\":0,\"entry_modified\":0}}"}} +{"add":{"path":"part-00002-c881f350-4b6e-449b-b6e2-2544a424c9bc-c000.snappy.parquet","partitionValues":{},"size":226829,"modificationTime":1766549220177,"dataChange":true,"stats":"{\"numRecords\":625,\"minValues\":{\"protein_id\":\"cdm_prot_00030730-9147-5351-b68c\",\"ec_numbers\":\"1.11.1.6\",\"evidence_for_existence\":\"evidence at protein level\",\"length\":\"101\",\"mass\":\"10049\",\"checksum\":\"001DEFBE3589FDAF\",\"modified\":\"1989-07-01\",\"sequence_version\":\"1\",\"sequence\":\"GIGTLLMLIGTFYFIARGWGVTDKKAREYYAI\",\"entry_modified\":\"2023-06-28\"},\"maxValues\":{\"protein_id\":\"cdm_prot_fde84f0d-6520-56ad-b61f\",\"ec_numbers\":\"7.6.2.8\",\"evidence_for_existence\":\"predicted\",\"length\":\"99\",\"mass\":\"9979\",\"checksum\":\"FFB7F1B1543E0C36\",\"modified\":\"2015-01-07\",\"sequence_version\":\"3\",\"sequence\":\"PMILLALGLLADTDIASLFTAITMDIGMCVTG\",\"entry_modified\":\"2025-06-18\"},\"nullCount\":{\"protein_id\":0,\"ec_numbers\":195,\"evidence_for_existence\":0,\"length\":0,\"mass\":0,\"checksum\":0,\"modified\":0,\"sequence_version\":0,\"sequence\":0,\"entry_modified\":0}}"}} +{"add":{"path":"part-00003-cc92fe22-58d3-46d4-863e-0062067ebd2d-c000.snappy.parquet","partitionValues":{},"size":203116,"modificationTime":1766549220178,"dataChange":true,"stats":"{\"numRecords\":625,\"minValues\":{\"protein_id\":\"cdm_prot_004f59e4-6b11-58ae-a6ff\",\"ec_numbers\":\"1.1.1.337\",\"evidence_for_existence\":\"evidence at protein level\",\"length\":\"102\",\"mass\":\"100984\",\"checksum\":\"005A5C21E13342B4\",\"modified\":\"1991-05-01\",\"sequence_version\":\"1\",\"sequence\":\"KKIGAIAAGSAMVASALATGVFAVEKIGDVEG\",\"entry_modified\":\"2024-03-27\"},\"maxValues\":{\"protein_id\":\"cdm_prot_ff2dba94-2092-57d0-9041\",\"ec_numbers\":\"7.2.2.9\",\"evidence_for_existence\":\"inferred from homology\",\"length\":\"99\",\"mass\":\"96911\",\"checksum\":\"FFF22879BB86CECE\",\"modified\":\"2020-08-12\",\"sequence_version\":\"3\",\"sequence\":\"MYVVNPEEKVIEIMKQTGIDLAATLPCDRIKN\",\"entry_modified\":\"2025-06-18\"},\"nullCount\":{\"protein_id\":0,\"ec_numbers\":153,\"evidence_for_existence\":0,\"length\":0,\"mass\":0,\"checksum\":0,\"modified\":0,\"sequence_version\":0,\"sequence\":0,\"entry_modified\":0}}"}} +{"add":{"path":"part-00004-159d2ae9-d5d1-40ef-8b8c-e8462f482af1-c000.snappy.parquet","partitionValues":{},"size":232956,"modificationTime":1766549220180,"dataChange":true,"stats":"{\"numRecords\":625,\"minValues\":{\"protein_id\":\"cdm_prot_008bae16-5b42-5bfd-a15c\",\"ec_numbers\":\"1.1.1.-\",\"evidence_for_existence\":\"evidence at protein level\",\"length\":\"1080\",\"mass\":\"10065\",\"checksum\":\"0060B82920BA478E\",\"modified\":\"1991-05-01\",\"sequence_version\":\"1\",\"sequence\":\"MADDDTTARRPVLSSFGTLGRGWLGVLALLLV\",\"entry_modified\":\"2024-05-29\"},\"maxValues\":{\"protein_id\":\"cdm_prot_fedf6b23-d61c-56dc-b256\",\"ec_numbers\":\"6.5.1.7\",\"evidence_for_existence\":\"predicted\",\"length\":\"959\",\"mass\":\"92367\",\"checksum\":\"FFD61667871BE291\",\"modified\":\"2024-01-24\",\"sequence_version\":\"4\",\"sequence\":\"VEQDPYEIVIKQLERAAQYMEISE\",\"entry_modified\":\"2025-06-18\"},\"nullCount\":{\"protein_id\":0,\"ec_numbers\":67,\"evidence_for_existence\":0,\"length\":0,\"mass\":0,\"checksum\":0,\"modified\":0,\"sequence_version\":0,\"sequence\":0,\"entry_modified\":0}}"}} +{"add":{"path":"part-00005-e1178ae8-d3d7-4662-a488-6f40c9f9eebf-c000.snappy.parquet","partitionValues":{},"size":216665,"modificationTime":1766549220176,"dataChange":true,"stats":"{\"numRecords\":625,\"minValues\":{\"protein_id\":\"cdm_prot_00134cb8-e497-5d40-b662\",\"ec_numbers\":\"1.16.-.-\",\"evidence_for_existence\":\"evidence at protein level\",\"length\":\"102\",\"mass\":\"10006\",\"checksum\":\"000B863998A7D939\",\"modified\":\"1986-07-21\",\"sequence_version\":\"1\",\"sequence\":\"FEPYIYALLKDDSAIEEVKKITAGRHGRVVKV\",\"entry_modified\":\"2023-06-28\"},\"maxValues\":{\"protein_id\":\"cdm_prot_ff5f637d-e74b-5494-9ed8\",\"ec_numbers\":\"7.-.-.-\",\"evidence_for_existence\":\"predicted\",\"length\":\"99\",\"mass\":\"9921\",\"checksum\":\"FF93ED4EDF204C7A\",\"modified\":\"2024-01-24\",\"sequence_version\":\"4\",\"sequence\":\"SVTIDYDKCKGPECAECVNACPMEVFEIQGDK\",\"entry_modified\":\"2025-06-18\"},\"nullCount\":{\"protein_id\":0,\"ec_numbers\":232,\"evidence_for_existence\":0,\"length\":0,\"mass\":0,\"checksum\":0,\"modified\":0,\"sequence_version\":0,\"sequence\":0,\"entry_modified\":0}}"}} +{"add":{"path":"part-00006-e6e3deb7-a199-4e40-944a-6b52a9c809bb-c000.snappy.parquet","partitionValues":{},"size":209852,"modificationTime":1766549220187,"dataChange":true,"stats":"{\"numRecords\":625,\"minValues\":{\"protein_id\":\"cdm_prot_003df674-48d1-588a-b194\",\"ec_numbers\":\"1.1.1.261\",\"evidence_for_existence\":\"evidence at protein level\",\"length\":\"102\",\"mass\":\"10129\",\"checksum\":\"0037152CDD997446\",\"modified\":\"1989-07-01\",\"sequence_version\":\"1\",\"sequence\":\"MAAVQIDDYGPWTTEPAPRRETDLQALQARLF\",\"entry_modified\":\"2024-03-27\"},\"maxValues\":{\"protein_id\":\"cdm_prot_ffeae0f1-9785-5eaf-ad23\",\"ec_numbers\":\"6.3.5.7\",\"evidence_for_existence\":\"predicted\",\"length\":\"97\",\"mass\":\"9786\",\"checksum\":\"FFBDC4AB39BB53E1\",\"modified\":\"2024-05-29\",\"sequence_version\":\"4\",\"sequence\":\"MYTSFHRIDLPRTIVVGGGVLDKAGGYVSGVA\",\"entry_modified\":\"2025-06-18\"},\"nullCount\":{\"protein_id\":0,\"ec_numbers\":142,\"evidence_for_existence\":0,\"length\":0,\"mass\":0,\"checksum\":0,\"modified\":0,\"sequence_version\":0,\"sequence\":0,\"entry_modified\":0}}"}} +{"add":{"path":"part-00007-59fe7ec2-82c3-4010-b399-2192a7f6a694-c000.snappy.parquet","partitionValues":{},"size":203689,"modificationTime":1766549220181,"dataChange":true,"stats":"{\"numRecords\":625,\"minValues\":{\"protein_id\":\"cdm_prot_00625fdf-0992-5594-a800\",\"ec_numbers\":\"1.-.-.-\",\"evidence_for_existence\":\"evidence at protein level\",\"length\":\"105\",\"mass\":\"10014\",\"checksum\":\"0034823DA6E9ED5C\",\"modified\":\"1990-01-01\",\"sequence_version\":\"1\",\"sequence\":\"MADELSEKSVEGTEEDGESAPAEGTTEGVPVD\",\"entry_modified\":\"2024-07-24\"},\"maxValues\":{\"protein_id\":\"cdm_prot_ffcf5789-8bfa-5ca0-914e\",\"ec_numbers\":\"7.5.2.-\",\"evidence_for_existence\":\"inferred from homology\",\"length\":\"83\",\"mass\":\"9939\",\"checksum\":\"FFB678C2FEE2F209\",\"modified\":\"2023-02-22\",\"sequence_version\":\"4\",\"sequence\":\"MYTLVLLRHGESTWNKENRFTGWTDVDLSKDG\",\"entry_modified\":\"2025-06-18\"},\"nullCount\":{\"protein_id\":0,\"ec_numbers\":124,\"evidence_for_existence\":0,\"length\":0,\"mass\":0,\"checksum\":0,\"modified\":0,\"sequence_version\":0,\"sequence\":0,\"entry_modified\":0}}"}} diff --git a/tests/data/uniprot_archaea/uniprot_db/proteins/_delta_log/00000000000000000002.crc b/tests/data/uniprot_archaea/uniprot_db/proteins/_delta_log/00000000000000000002.crc new file mode 100644 index 0000000..79986e7 --- /dev/null +++ b/tests/data/uniprot_archaea/uniprot_db/proteins/_delta_log/00000000000000000002.crc @@ -0,0 +1 @@ +{"txnId":"c2780b81-dee9-4d15-9b42-9a2cc7fc9cc1","tableSizeBytes":3122651,"numFiles":16,"numMetadata":1,"numProtocol":1,"setTransactions":[],"domainMetadata":[],"metadata":{"id":"cf52e24b-c8bf-4037-bc85-ecc2167e63b8","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"protein_id\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"ec_numbers\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"evidence_for_existence\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"length\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"mass\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"checksum\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"modified\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"sequence_version\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"sequence\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"entry_modified\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{},"createdTime":1766549213818},"protocol":{"minReaderVersion":1,"minWriterVersion":2},"allFiles":[{"path":"part-00007-1ca7b106-08f4-4232-8b29-bf918a55a1d2-c000.snappy.parquet","partitionValues":{},"size":186767,"modificationTime":1766549227675,"dataChange":false,"stats":"{\"numRecords\":625,\"minValues\":{\"protein_id\":\"cdm_prot_001ba1b3-667c-5321-9298\",\"ec_numbers\":\"1.3.1.14\",\"evidence_for_existence\":\"evidence at protein level\",\"length\":\"112\",\"mass\":\"11713\",\"checksum\":\"00177DD8DEC84097\",\"modified\":\"1991-08-01\",\"sequence_version\":\"1\",\"sequence\":\"MADLSTSVAGIRLENPLMLASGILDENGYTML\",\"entry_modified\":\"2024-11-27\"},\"maxValues\":{\"protein_id\":\"cdm_prot_ff86ebef-56b7-5cf7-bf3e\",\"ec_numbers\":\"7.3.2.1\",\"evidence_for_existence\":\"inferred from homology\",\"length\":\"89\",\"mass\":\"9696\",\"checksum\":\"FFE05E8B923ADAF9\",\"modified\":\"2020-10-07\",\"sequence_version\":\"3\",\"sequence\":\"YGEPIGVETLTK\",\"entry_modified\":\"2025-06-18\"},\"nullCount\":{\"protein_id\":0,\"ec_numbers\":11,\"evidence_for_existence\":0,\"length\":0,\"mass\":0,\"checksum\":0,\"modified\":0,\"sequence_version\":0,\"sequence\":0,\"entry_modified\":0}}"},{"path":"part-00005-831e6b90-0dae-4d62-a116-f1d7b20d680e-c000.snappy.parquet","partitionValues":{},"size":152395,"modificationTime":1766549227692,"dataChange":false,"stats":"{\"numRecords\":625,\"minValues\":{\"protein_id\":\"cdm_prot_00957fe6-8bfd-5ecd-9b08\",\"ec_numbers\":\"1.1.1.169\",\"evidence_for_existence\":\"evidence at protein level\",\"length\":\"111\",\"mass\":\"10083\",\"checksum\":\"0033DA1187FEA713\",\"modified\":\"1990-01-01\",\"sequence_version\":\"1\",\"sequence\":\"DINGGGATLPQKLYQTSGVLTAGFAPYIGVGS\",\"entry_modified\":\"2024-10-02\"},\"maxValues\":{\"protein_id\":\"cdm_prot_ffdecc1a-6ee9-59b2-9c49\",\"ec_numbers\":\"7.1.1.-\",\"evidence_for_existence\":\"inferred from homology\",\"length\":\"93\",\"mass\":\"9565\",\"checksum\":\"FFEA81938BF49096\",\"modified\":\"2016-11-02\",\"sequence_version\":\"6\",\"sequence\":\"MYYHIIKSPIYPILLAGDEKGLKHLIFLKDER\",\"entry_modified\":\"2025-06-18\"},\"nullCount\":{\"protein_id\":0,\"ec_numbers\":208,\"evidence_for_existence\":0,\"length\":0,\"mass\":0,\"checksum\":0,\"modified\":0,\"sequence_version\":0,\"sequence\":0,\"entry_modified\":0}}"},{"path":"part-00003-9ceb2e14-936d-4777-923a-c7586d84aff3-c000.snappy.parquet","partitionValues":{},"size":189269,"modificationTime":1766549227675,"dataChange":false,"stats":"{\"numRecords\":625,\"minValues\":{\"protein_id\":\"cdm_prot_000c7e85-7dff-527d-b6db\",\"ec_numbers\":\"1.1.1.299\",\"evidence_for_existence\":\"evidence at protein level\",\"length\":\"1127\",\"mass\":\"10980\",\"checksum\":\"0065BCF4CF0118E3\",\"modified\":\"1988-08-01\",\"sequence_version\":\"1\",\"sequence\":\"AADIFAKFKTSMEVK\",\"entry_modified\":\"2024-11-27\"},\"maxValues\":{\"protein_id\":\"cdm_prot_ffb46e82-abfa-5122-818f\",\"ec_numbers\":\"7.5.2.1\",\"evidence_for_existence\":\"predicted\",\"length\":\"998\",\"mass\":\"92970\",\"checksum\":\"FF118F554983A5C4\",\"modified\":\"2014-06-11\",\"sequence_version\":\"5\",\"sequence\":\"SDTVDIYDAGKILERVII\",\"entry_modified\":\"2025-06-18\"},\"nullCount\":{\"protein_id\":0,\"ec_numbers\":58,\"evidence_for_existence\":0,\"length\":0,\"mass\":0,\"checksum\":0,\"modified\":0,\"sequence_version\":0,\"sequence\":0,\"entry_modified\":0}}"},{"path":"part-00003-cc92fe22-58d3-46d4-863e-0062067ebd2d-c000.snappy.parquet","partitionValues":{},"size":203116,"modificationTime":1766549220178,"dataChange":false,"stats":"{\"numRecords\":625,\"minValues\":{\"protein_id\":\"cdm_prot_004f59e4-6b11-58ae-a6ff\",\"ec_numbers\":\"1.1.1.337\",\"evidence_for_existence\":\"evidence at protein level\",\"length\":\"102\",\"mass\":\"100984\",\"checksum\":\"005A5C21E13342B4\",\"modified\":\"1991-05-01\",\"sequence_version\":\"1\",\"sequence\":\"KKIGAIAAGSAMVASALATGVFAVEKIGDVEG\",\"entry_modified\":\"2024-03-27\"},\"maxValues\":{\"protein_id\":\"cdm_prot_ff2dba94-2092-57d0-9041\",\"ec_numbers\":\"7.2.2.9\",\"evidence_for_existence\":\"inferred from homology\",\"length\":\"99\",\"mass\":\"96911\",\"checksum\":\"FFF22879BB86CECE\",\"modified\":\"2020-08-12\",\"sequence_version\":\"3\",\"sequence\":\"MYVVNPEEKVIEIMKQTGIDLAATLPCDRIKN\",\"entry_modified\":\"2025-06-18\"},\"nullCount\":{\"protein_id\":0,\"ec_numbers\":153,\"evidence_for_existence\":0,\"length\":0,\"mass\":0,\"checksum\":0,\"modified\":0,\"sequence_version\":0,\"sequence\":0,\"entry_modified\":0}}"},{"path":"part-00001-14de6dd7-de07-482f-be2f-bc12adc49f95-c000.snappy.parquet","partitionValues":{},"size":172301,"modificationTime":1766549227688,"dataChange":false,"stats":"{\"numRecords\":625,\"minValues\":{\"protein_id\":\"cdm_prot_00df9ef5-d9b4-5654-9d2a\",\"ec_numbers\":\"1.1.1.383\",\"evidence_for_existence\":\"evidence at protein level\",\"length\":\"102\",\"mass\":\"110945\",\"checksum\":\"0022ADF92AAB288B\",\"modified\":\"1993-10-01\",\"sequence_version\":\"1\",\"sequence\":\"EDEFGTLYAAFDNMRANLRTQISEAETAKQEA\",\"entry_modified\":\"2024-03-27\"},\"maxValues\":{\"protein_id\":\"cdm_prot_ffa14ace-ea75-5cc6-86f2\",\"ec_numbers\":\"7.2.3.1\",\"evidence_for_existence\":\"predicted\",\"length\":\"992\",\"mass\":\"927738\",\"checksum\":\"FF7E0EA466074645\",\"modified\":\"2018-11-07\",\"sequence_version\":\"3\",\"sequence\":\"MYPILVNSLLTCPIVKKGEYNYFVHPITDGIP\",\"entry_modified\":\"2025-06-18\"},\"nullCount\":{\"protein_id\":0,\"ec_numbers\":341,\"evidence_for_existence\":0,\"length\":0,\"mass\":0,\"checksum\":0,\"modified\":0,\"sequence_version\":0,\"sequence\":0,\"entry_modified\":0}}"},{"path":"part-00004-159d2ae9-d5d1-40ef-8b8c-e8462f482af1-c000.snappy.parquet","partitionValues":{},"size":232956,"modificationTime":1766549220180,"dataChange":false,"stats":"{\"numRecords\":625,\"minValues\":{\"protein_id\":\"cdm_prot_008bae16-5b42-5bfd-a15c\",\"ec_numbers\":\"1.1.1.-\",\"evidence_for_existence\":\"evidence at protein level\",\"length\":\"1080\",\"mass\":\"10065\",\"checksum\":\"0060B82920BA478E\",\"modified\":\"1991-05-01\",\"sequence_version\":\"1\",\"sequence\":\"MADDDTTARRPVLSSFGTLGRGWLGVLALLLV\",\"entry_modified\":\"2024-05-29\"},\"maxValues\":{\"protein_id\":\"cdm_prot_fedf6b23-d61c-56dc-b256\",\"ec_numbers\":\"6.5.1.7\",\"evidence_for_existence\":\"predicted\",\"length\":\"959\",\"mass\":\"92367\",\"checksum\":\"FFD61667871BE291\",\"modified\":\"2024-01-24\",\"sequence_version\":\"4\",\"sequence\":\"VEQDPYEIVIKQLERAAQYMEISE\",\"entry_modified\":\"2025-06-18\"},\"nullCount\":{\"protein_id\":0,\"ec_numbers\":67,\"evidence_for_existence\":0,\"length\":0,\"mass\":0,\"checksum\":0,\"modified\":0,\"sequence_version\":0,\"sequence\":0,\"entry_modified\":0}}"},{"path":"part-00006-c1bdb3ab-0e57-4470-8330-b13826aab8c6-c000.snappy.parquet","partitionValues":{},"size":163700,"modificationTime":1766549227685,"dataChange":false,"stats":"{\"numRecords\":625,\"minValues\":{\"protein_id\":\"cdm_prot_0102d35c-20a5-5f8d-8175\",\"ec_numbers\":\"1.2.1.41\",\"evidence_for_existence\":\"evidence at protein level\",\"length\":\"105\",\"mass\":\"10297\",\"checksum\":\"003EF50270BA751E\",\"modified\":\"1991-02-01\",\"sequence_version\":\"1\",\"sequence\":\"DTANDPKYGSQYAPQKVNADVDQGVXXXHPEL\",\"entry_modified\":\"2024-03-27\"},\"maxValues\":{\"protein_id\":\"cdm_prot_fffe9d9f-5871-5041-83af\",\"ec_numbers\":\"7.3.2.2\",\"evidence_for_existence\":\"predicted\",\"length\":\"99\",\"mass\":\"9848\",\"checksum\":\"FFD19C6CC8E8091B\",\"modified\":\"2017-04-12\",\"sequence_version\":\"3\",\"sequence\":\"MYLTREEERILAGEEGEARAKALEVIVKVGEA\",\"entry_modified\":\"2025-06-18\"},\"nullCount\":{\"protein_id\":0,\"ec_numbers\":202,\"evidence_for_existence\":0,\"length\":0,\"mass\":0,\"checksum\":0,\"modified\":0,\"sequence_version\":0,\"sequence\":0,\"entry_modified\":0}}"},{"path":"part-00000-9280d073-b085-4be8-85ed-536df50f62da-c000.snappy.parquet","partitionValues":{},"size":196389,"modificationTime":1766549227684,"dataChange":false,"stats":"{\"numRecords\":625,\"minValues\":{\"protein_id\":\"cdm_prot_004539b1-91ae-513e-bef9\",\"ec_numbers\":\"1.1.1.23\",\"evidence_for_existence\":\"evidence at protein level\",\"length\":\"100\",\"mass\":\"10111\",\"checksum\":\"00A04ACE5FAFE54A\",\"modified\":\"1988-01-01\",\"sequence_version\":\"1\",\"sequence\":\"AEMRNFALRDAQGNEIGVFTGKSPRQAALKAA\",\"entry_modified\":\"2024-07-24\"},\"maxValues\":{\"protein_id\":\"cdm_prot_ff289ac2-6f01-59a2-8290\",\"ec_numbers\":\"6.2.1.56\",\"evidence_for_existence\":\"inferred from homology\",\"length\":\"99\",\"mass\":\"9766\",\"checksum\":\"FF5B95B523D29A12\",\"modified\":\"2018-04-25\",\"sequence_version\":\"6\",\"sequence\":\"SNTRNFVLRDEEGNEHGVFTGKQPRQAALKAA\",\"entry_modified\":\"2025-06-18\"},\"nullCount\":{\"protein_id\":0,\"ec_numbers\":72,\"evidence_for_existence\":0,\"length\":0,\"mass\":0,\"checksum\":0,\"modified\":0,\"sequence_version\":0,\"sequence\":0,\"entry_modified\":0}}"},{"path":"part-00005-e1178ae8-d3d7-4662-a488-6f40c9f9eebf-c000.snappy.parquet","partitionValues":{},"size":216665,"modificationTime":1766549220176,"dataChange":false,"stats":"{\"numRecords\":625,\"minValues\":{\"protein_id\":\"cdm_prot_00134cb8-e497-5d40-b662\",\"ec_numbers\":\"1.16.-.-\",\"evidence_for_existence\":\"evidence at protein level\",\"length\":\"102\",\"mass\":\"10006\",\"checksum\":\"000B863998A7D939\",\"modified\":\"1986-07-21\",\"sequence_version\":\"1\",\"sequence\":\"FEPYIYALLKDDSAIEEVKKITAGRHGRVVKV\",\"entry_modified\":\"2023-06-28\"},\"maxValues\":{\"protein_id\":\"cdm_prot_ff5f637d-e74b-5494-9ed8\",\"ec_numbers\":\"7.-.-.-\",\"evidence_for_existence\":\"predicted\",\"length\":\"99\",\"mass\":\"9921\",\"checksum\":\"FF93ED4EDF204C7A\",\"modified\":\"2024-01-24\",\"sequence_version\":\"4\",\"sequence\":\"SVTIDYDKCKGPECAECVNACPMEVFEIQGDK\",\"entry_modified\":\"2025-06-18\"},\"nullCount\":{\"protein_id\":0,\"ec_numbers\":232,\"evidence_for_existence\":0,\"length\":0,\"mass\":0,\"checksum\":0,\"modified\":0,\"sequence_version\":0,\"sequence\":0,\"entry_modified\":0}}"},{"path":"part-00007-59fe7ec2-82c3-4010-b399-2192a7f6a694-c000.snappy.parquet","partitionValues":{},"size":203689,"modificationTime":1766549220181,"dataChange":false,"stats":"{\"numRecords\":625,\"minValues\":{\"protein_id\":\"cdm_prot_00625fdf-0992-5594-a800\",\"ec_numbers\":\"1.-.-.-\",\"evidence_for_existence\":\"evidence at protein level\",\"length\":\"105\",\"mass\":\"10014\",\"checksum\":\"0034823DA6E9ED5C\",\"modified\":\"1990-01-01\",\"sequence_version\":\"1\",\"sequence\":\"MADELSEKSVEGTEEDGESAPAEGTTEGVPVD\",\"entry_modified\":\"2024-07-24\"},\"maxValues\":{\"protein_id\":\"cdm_prot_ffcf5789-8bfa-5ca0-914e\",\"ec_numbers\":\"7.5.2.-\",\"evidence_for_existence\":\"inferred from homology\",\"length\":\"83\",\"mass\":\"9939\",\"checksum\":\"FFB678C2FEE2F209\",\"modified\":\"2023-02-22\",\"sequence_version\":\"4\",\"sequence\":\"MYTLVLLRHGESTWNKENRFTGWTDVDLSKDG\",\"entry_modified\":\"2025-06-18\"},\"nullCount\":{\"protein_id\":0,\"ec_numbers\":124,\"evidence_for_existence\":0,\"length\":0,\"mass\":0,\"checksum\":0,\"modified\":0,\"sequence_version\":0,\"sequence\":0,\"entry_modified\":0}}"},{"path":"part-00001-bd46662b-bc66-4729-afd0-35417fd10fc1-c000.snappy.parquet","partitionValues":{},"size":194687,"modificationTime":1766549220169,"dataChange":false,"stats":"{\"numRecords\":625,\"minValues\":{\"protein_id\":\"cdm_prot_000e0a71-14e8-5d5e-8144\",\"ec_numbers\":\"1.1.1.25\",\"evidence_for_existence\":\"evidence at protein level\",\"length\":\"103\",\"mass\":\"10987\",\"checksum\":\"00041F603DFDD5A3\",\"modified\":\"1990-01-01\",\"sequence_version\":\"1\",\"sequence\":\"MAAIIVHGGAGTIRKEERIPKVIEGVREAVLA\",\"entry_modified\":\"2024-07-24\"},\"maxValues\":{\"protein_id\":\"cdm_prot_ffed2e1d-fe14-5556-b067\",\"ec_numbers\":\"7.3.2.7\",\"evidence_for_existence\":\"predicted\",\"length\":\"815\",\"mass\":\"90860\",\"checksum\":\"FFB8BC932E9E15EC\",\"modified\":\"2025-02-05\",\"sequence_version\":\"4\",\"sequence\":\"MYTMELRFIRGGVCAVDGVLAAGCREGKYGVG\",\"entry_modified\":\"2025-06-18\"},\"nullCount\":{\"protein_id\":0,\"ec_numbers\":58,\"evidence_for_existence\":0,\"length\":0,\"mass\":0,\"checksum\":0,\"modified\":0,\"sequence_version\":0,\"sequence\":0,\"entry_modified\":0}}"},{"path":"part-00006-e6e3deb7-a199-4e40-944a-6b52a9c809bb-c000.snappy.parquet","partitionValues":{},"size":209852,"modificationTime":1766549220187,"dataChange":false,"stats":"{\"numRecords\":625,\"minValues\":{\"protein_id\":\"cdm_prot_003df674-48d1-588a-b194\",\"ec_numbers\":\"1.1.1.261\",\"evidence_for_existence\":\"evidence at protein level\",\"length\":\"102\",\"mass\":\"10129\",\"checksum\":\"0037152CDD997446\",\"modified\":\"1989-07-01\",\"sequence_version\":\"1\",\"sequence\":\"MAAVQIDDYGPWTTEPAPRRETDLQALQARLF\",\"entry_modified\":\"2024-03-27\"},\"maxValues\":{\"protein_id\":\"cdm_prot_ffeae0f1-9785-5eaf-ad23\",\"ec_numbers\":\"6.3.5.7\",\"evidence_for_existence\":\"predicted\",\"length\":\"97\",\"mass\":\"9786\",\"checksum\":\"FFBDC4AB39BB53E1\",\"modified\":\"2024-05-29\",\"sequence_version\":\"4\",\"sequence\":\"MYTSFHRIDLPRTIVVGGGVLDKAGGYVSGVA\",\"entry_modified\":\"2025-06-18\"},\"nullCount\":{\"protein_id\":0,\"ec_numbers\":142,\"evidence_for_existence\":0,\"length\":0,\"mass\":0,\"checksum\":0,\"modified\":0,\"sequence_version\":0,\"sequence\":0,\"entry_modified\":0}}"},{"path":"part-00002-87a46f98-c10e-487d-b09c-260ae5569b04-c000.snappy.parquet","partitionValues":{},"size":192054,"modificationTime":1766549227674,"dataChange":false,"stats":"{\"numRecords\":625,\"minValues\":{\"protein_id\":\"cdm_prot_00546ee8-4f73-5c85-8b07\",\"ec_numbers\":\"1.1.1.205\",\"evidence_for_existence\":\"evidence at protein level\",\"length\":\"139\",\"mass\":\"100393\",\"checksum\":\"009D64A921BD3176\",\"modified\":\"1994-10-01\",\"sequence_version\":\"1\",\"sequence\":\"MACSRARARAYSSAANLGPGFDALAVALDAYY\",\"entry_modified\":\"2024-11-27\"},\"maxValues\":{\"protein_id\":\"cdm_prot_ffce40a0-b890-5e5f-b455\",\"ec_numbers\":\"7.2.2.6\",\"evidence_for_existence\":\"predicted\",\"length\":\"876\",\"mass\":\"99554\",\"checksum\":\"FF6978208DE0F5B1\",\"modified\":\"2023-02-22\",\"sequence_version\":\"3\",\"sequence\":\"MYVLGIESTAHTIGVGIVNERAEVLANEMHTY\",\"entry_modified\":\"2025-06-18\"},\"nullCount\":{\"protein_id\":0,\"ec_numbers\":50,\"evidence_for_existence\":0,\"length\":0,\"mass\":0,\"checksum\":0,\"modified\":0,\"sequence_version\":0,\"sequence\":0,\"entry_modified\":0}}"},{"path":"part-00002-c881f350-4b6e-449b-b6e2-2544a424c9bc-c000.snappy.parquet","partitionValues":{},"size":226829,"modificationTime":1766549220177,"dataChange":false,"stats":"{\"numRecords\":625,\"minValues\":{\"protein_id\":\"cdm_prot_00030730-9147-5351-b68c\",\"ec_numbers\":\"1.11.1.6\",\"evidence_for_existence\":\"evidence at protein level\",\"length\":\"101\",\"mass\":\"10049\",\"checksum\":\"001DEFBE3589FDAF\",\"modified\":\"1989-07-01\",\"sequence_version\":\"1\",\"sequence\":\"GIGTLLMLIGTFYFIARGWGVTDKKAREYYAI\",\"entry_modified\":\"2023-06-28\"},\"maxValues\":{\"protein_id\":\"cdm_prot_fde84f0d-6520-56ad-b61f\",\"ec_numbers\":\"7.6.2.8\",\"evidence_for_existence\":\"predicted\",\"length\":\"99\",\"mass\":\"9979\",\"checksum\":\"FFB7F1B1543E0C36\",\"modified\":\"2015-01-07\",\"sequence_version\":\"3\",\"sequence\":\"PMILLALGLLADTDIASLFTAITMDIGMCVTG\",\"entry_modified\":\"2025-06-18\"},\"nullCount\":{\"protein_id\":0,\"ec_numbers\":195,\"evidence_for_existence\":0,\"length\":0,\"mass\":0,\"checksum\":0,\"modified\":0,\"sequence_version\":0,\"sequence\":0,\"entry_modified\":0}}"},{"path":"part-00000-b4de8d2f-f466-4990-9d63-a326716cbea4-c000.snappy.parquet","partitionValues":{},"size":221094,"modificationTime":1766549220177,"dataChange":false,"stats":"{\"numRecords\":625,\"minValues\":{\"protein_id\":\"cdm_prot_00036120-577c-5bbc-ab91\",\"ec_numbers\":\"1.1.1.-\",\"evidence_for_existence\":\"evidence at protein level\",\"length\":\"100\",\"mass\":\"10015\",\"checksum\":\"002B834BC5F9D6AD\",\"modified\":\"1991-02-01\",\"sequence_version\":\"1\",\"sequence\":\"MAAESQLKRVIETLRRLGIEEVLKLERRDPQY\",\"entry_modified\":\"2024-11-27\"},\"maxValues\":{\"protein_id\":\"cdm_prot_ffa4eefe-b062-5f26-8f77\",\"ec_numbers\":\"7.1.2.2\",\"evidence_for_existence\":\"inferred from homology\",\"length\":\"990\",\"mass\":\"9995\",\"checksum\":\"FF95A3D3020413CC\",\"modified\":\"2025-04-09\",\"sequence_version\":\"4\",\"sequence\":\"NTNETKVRFTGETAKIGVSLEMLGRIFNGAGK\",\"entry_modified\":\"2025-06-18\"},\"nullCount\":{\"protein_id\":0,\"ec_numbers\":320,\"evidence_for_existence\":0,\"length\":0,\"mass\":0,\"checksum\":0,\"modified\":0,\"sequence_version\":0,\"sequence\":0,\"entry_modified\":0}}"},{"path":"part-00004-0cd337df-6b78-40e1-9e72-4d902a79f0b3-c000.snappy.parquet","partitionValues":{},"size":160888,"modificationTime":1766549227664,"dataChange":false,"stats":"{\"numRecords\":625,\"minValues\":{\"protein_id\":\"cdm_prot_007c34e9-4de0-5736-985f\",\"ec_numbers\":\"1.12.99.-\",\"evidence_for_existence\":\"evidence at protein level\",\"length\":\"100\",\"mass\":\"100504\",\"checksum\":\"0082DA6CD15CD5FD\",\"modified\":\"1988-01-01\",\"sequence_version\":\"1\",\"sequence\":\"EECEREKPPEGLAMDFIKRQFEIGEPEKDLCL\",\"entry_modified\":\"2024-05-29\"},\"maxValues\":{\"protein_id\":\"cdm_prot_ffc892c3-0651-5e3c-998a\",\"ec_numbers\":\"7.2.1.4\",\"evidence_for_existence\":\"predicted\",\"length\":\"984\",\"mass\":\"99919\",\"checksum\":\"FF6F29ADD080060C\",\"modified\":\"2016-11-30\",\"sequence_version\":\"6\",\"sequence\":\"TLQGQKDVIELLKEEGLRDKIKVMVGGAPATQ\",\"entry_modified\":\"2025-06-18\"},\"nullCount\":{\"protein_id\":0,\"ec_numbers\":108,\"evidence_for_existence\":0,\"length\":0,\"mass\":0,\"checksum\":0,\"modified\":0,\"sequence_version\":0,\"sequence\":0,\"entry_modified\":0}}"}]} diff --git a/tests/data/uniprot_archaea/uniprot_db/proteins/_delta_log/00000000000000000002.json b/tests/data/uniprot_archaea/uniprot_db/proteins/_delta_log/00000000000000000002.json new file mode 100644 index 0000000..a35ea55 --- /dev/null +++ b/tests/data/uniprot_archaea/uniprot_db/proteins/_delta_log/00000000000000000002.json @@ -0,0 +1,9 @@ +{"commitInfo":{"timestamp":1766549227695,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":1,"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{"numFiles":"8","numOutputRows":"5000","numOutputBytes":"1413763"},"engineInfo":"Apache-Spark/4.0.1 Delta-Lake/4.0.0","txnId":"c2780b81-dee9-4d15-9b42-9a2cc7fc9cc1"}} +{"add":{"path":"part-00000-9280d073-b085-4be8-85ed-536df50f62da-c000.snappy.parquet","partitionValues":{},"size":196389,"modificationTime":1766549227684,"dataChange":true,"stats":"{\"numRecords\":625,\"minValues\":{\"protein_id\":\"cdm_prot_004539b1-91ae-513e-bef9\",\"ec_numbers\":\"1.1.1.23\",\"evidence_for_existence\":\"evidence at protein level\",\"length\":\"100\",\"mass\":\"10111\",\"checksum\":\"00A04ACE5FAFE54A\",\"modified\":\"1988-01-01\",\"sequence_version\":\"1\",\"sequence\":\"AEMRNFALRDAQGNEIGVFTGKSPRQAALKAA\",\"entry_modified\":\"2024-07-24\"},\"maxValues\":{\"protein_id\":\"cdm_prot_ff289ac2-6f01-59a2-8290\",\"ec_numbers\":\"6.2.1.56\",\"evidence_for_existence\":\"inferred from homology\",\"length\":\"99\",\"mass\":\"9766\",\"checksum\":\"FF5B95B523D29A12\",\"modified\":\"2018-04-25\",\"sequence_version\":\"6\",\"sequence\":\"SNTRNFVLRDEEGNEHGVFTGKQPRQAALKAA\",\"entry_modified\":\"2025-06-18\"},\"nullCount\":{\"protein_id\":0,\"ec_numbers\":72,\"evidence_for_existence\":0,\"length\":0,\"mass\":0,\"checksum\":0,\"modified\":0,\"sequence_version\":0,\"sequence\":0,\"entry_modified\":0}}"}} +{"add":{"path":"part-00001-14de6dd7-de07-482f-be2f-bc12adc49f95-c000.snappy.parquet","partitionValues":{},"size":172301,"modificationTime":1766549227688,"dataChange":true,"stats":"{\"numRecords\":625,\"minValues\":{\"protein_id\":\"cdm_prot_00df9ef5-d9b4-5654-9d2a\",\"ec_numbers\":\"1.1.1.383\",\"evidence_for_existence\":\"evidence at protein level\",\"length\":\"102\",\"mass\":\"110945\",\"checksum\":\"0022ADF92AAB288B\",\"modified\":\"1993-10-01\",\"sequence_version\":\"1\",\"sequence\":\"EDEFGTLYAAFDNMRANLRTQISEAETAKQEA\",\"entry_modified\":\"2024-03-27\"},\"maxValues\":{\"protein_id\":\"cdm_prot_ffa14ace-ea75-5cc6-86f2\",\"ec_numbers\":\"7.2.3.1\",\"evidence_for_existence\":\"predicted\",\"length\":\"992\",\"mass\":\"927738\",\"checksum\":\"FF7E0EA466074645\",\"modified\":\"2018-11-07\",\"sequence_version\":\"3\",\"sequence\":\"MYPILVNSLLTCPIVKKGEYNYFVHPITDGIP\",\"entry_modified\":\"2025-06-18\"},\"nullCount\":{\"protein_id\":0,\"ec_numbers\":341,\"evidence_for_existence\":0,\"length\":0,\"mass\":0,\"checksum\":0,\"modified\":0,\"sequence_version\":0,\"sequence\":0,\"entry_modified\":0}}"}} +{"add":{"path":"part-00002-87a46f98-c10e-487d-b09c-260ae5569b04-c000.snappy.parquet","partitionValues":{},"size":192054,"modificationTime":1766549227674,"dataChange":true,"stats":"{\"numRecords\":625,\"minValues\":{\"protein_id\":\"cdm_prot_00546ee8-4f73-5c85-8b07\",\"ec_numbers\":\"1.1.1.205\",\"evidence_for_existence\":\"evidence at protein level\",\"length\":\"139\",\"mass\":\"100393\",\"checksum\":\"009D64A921BD3176\",\"modified\":\"1994-10-01\",\"sequence_version\":\"1\",\"sequence\":\"MACSRARARAYSSAANLGPGFDALAVALDAYY\",\"entry_modified\":\"2024-11-27\"},\"maxValues\":{\"protein_id\":\"cdm_prot_ffce40a0-b890-5e5f-b455\",\"ec_numbers\":\"7.2.2.6\",\"evidence_for_existence\":\"predicted\",\"length\":\"876\",\"mass\":\"99554\",\"checksum\":\"FF6978208DE0F5B1\",\"modified\":\"2023-02-22\",\"sequence_version\":\"3\",\"sequence\":\"MYVLGIESTAHTIGVGIVNERAEVLANEMHTY\",\"entry_modified\":\"2025-06-18\"},\"nullCount\":{\"protein_id\":0,\"ec_numbers\":50,\"evidence_for_existence\":0,\"length\":0,\"mass\":0,\"checksum\":0,\"modified\":0,\"sequence_version\":0,\"sequence\":0,\"entry_modified\":0}}"}} +{"add":{"path":"part-00003-9ceb2e14-936d-4777-923a-c7586d84aff3-c000.snappy.parquet","partitionValues":{},"size":189269,"modificationTime":1766549227675,"dataChange":true,"stats":"{\"numRecords\":625,\"minValues\":{\"protein_id\":\"cdm_prot_000c7e85-7dff-527d-b6db\",\"ec_numbers\":\"1.1.1.299\",\"evidence_for_existence\":\"evidence at protein level\",\"length\":\"1127\",\"mass\":\"10980\",\"checksum\":\"0065BCF4CF0118E3\",\"modified\":\"1988-08-01\",\"sequence_version\":\"1\",\"sequence\":\"AADIFAKFKTSMEVK\",\"entry_modified\":\"2024-11-27\"},\"maxValues\":{\"protein_id\":\"cdm_prot_ffb46e82-abfa-5122-818f\",\"ec_numbers\":\"7.5.2.1\",\"evidence_for_existence\":\"predicted\",\"length\":\"998\",\"mass\":\"92970\",\"checksum\":\"FF118F554983A5C4\",\"modified\":\"2014-06-11\",\"sequence_version\":\"5\",\"sequence\":\"SDTVDIYDAGKILERVII\",\"entry_modified\":\"2025-06-18\"},\"nullCount\":{\"protein_id\":0,\"ec_numbers\":58,\"evidence_for_existence\":0,\"length\":0,\"mass\":0,\"checksum\":0,\"modified\":0,\"sequence_version\":0,\"sequence\":0,\"entry_modified\":0}}"}} +{"add":{"path":"part-00004-0cd337df-6b78-40e1-9e72-4d902a79f0b3-c000.snappy.parquet","partitionValues":{},"size":160888,"modificationTime":1766549227664,"dataChange":true,"stats":"{\"numRecords\":625,\"minValues\":{\"protein_id\":\"cdm_prot_007c34e9-4de0-5736-985f\",\"ec_numbers\":\"1.12.99.-\",\"evidence_for_existence\":\"evidence at protein level\",\"length\":\"100\",\"mass\":\"100504\",\"checksum\":\"0082DA6CD15CD5FD\",\"modified\":\"1988-01-01\",\"sequence_version\":\"1\",\"sequence\":\"EECEREKPPEGLAMDFIKRQFEIGEPEKDLCL\",\"entry_modified\":\"2024-05-29\"},\"maxValues\":{\"protein_id\":\"cdm_prot_ffc892c3-0651-5e3c-998a\",\"ec_numbers\":\"7.2.1.4\",\"evidence_for_existence\":\"predicted\",\"length\":\"984\",\"mass\":\"99919\",\"checksum\":\"FF6F29ADD080060C\",\"modified\":\"2016-11-30\",\"sequence_version\":\"6\",\"sequence\":\"TLQGQKDVIELLKEEGLRDKIKVMVGGAPATQ\",\"entry_modified\":\"2025-06-18\"},\"nullCount\":{\"protein_id\":0,\"ec_numbers\":108,\"evidence_for_existence\":0,\"length\":0,\"mass\":0,\"checksum\":0,\"modified\":0,\"sequence_version\":0,\"sequence\":0,\"entry_modified\":0}}"}} +{"add":{"path":"part-00005-831e6b90-0dae-4d62-a116-f1d7b20d680e-c000.snappy.parquet","partitionValues":{},"size":152395,"modificationTime":1766549227692,"dataChange":true,"stats":"{\"numRecords\":625,\"minValues\":{\"protein_id\":\"cdm_prot_00957fe6-8bfd-5ecd-9b08\",\"ec_numbers\":\"1.1.1.169\",\"evidence_for_existence\":\"evidence at protein level\",\"length\":\"111\",\"mass\":\"10083\",\"checksum\":\"0033DA1187FEA713\",\"modified\":\"1990-01-01\",\"sequence_version\":\"1\",\"sequence\":\"DINGGGATLPQKLYQTSGVLTAGFAPYIGVGS\",\"entry_modified\":\"2024-10-02\"},\"maxValues\":{\"protein_id\":\"cdm_prot_ffdecc1a-6ee9-59b2-9c49\",\"ec_numbers\":\"7.1.1.-\",\"evidence_for_existence\":\"inferred from homology\",\"length\":\"93\",\"mass\":\"9565\",\"checksum\":\"FFEA81938BF49096\",\"modified\":\"2016-11-02\",\"sequence_version\":\"6\",\"sequence\":\"MYYHIIKSPIYPILLAGDEKGLKHLIFLKDER\",\"entry_modified\":\"2025-06-18\"},\"nullCount\":{\"protein_id\":0,\"ec_numbers\":208,\"evidence_for_existence\":0,\"length\":0,\"mass\":0,\"checksum\":0,\"modified\":0,\"sequence_version\":0,\"sequence\":0,\"entry_modified\":0}}"}} +{"add":{"path":"part-00006-c1bdb3ab-0e57-4470-8330-b13826aab8c6-c000.snappy.parquet","partitionValues":{},"size":163700,"modificationTime":1766549227685,"dataChange":true,"stats":"{\"numRecords\":625,\"minValues\":{\"protein_id\":\"cdm_prot_0102d35c-20a5-5f8d-8175\",\"ec_numbers\":\"1.2.1.41\",\"evidence_for_existence\":\"evidence at protein level\",\"length\":\"105\",\"mass\":\"10297\",\"checksum\":\"003EF50270BA751E\",\"modified\":\"1991-02-01\",\"sequence_version\":\"1\",\"sequence\":\"DTANDPKYGSQYAPQKVNADVDQGVXXXHPEL\",\"entry_modified\":\"2024-03-27\"},\"maxValues\":{\"protein_id\":\"cdm_prot_fffe9d9f-5871-5041-83af\",\"ec_numbers\":\"7.3.2.2\",\"evidence_for_existence\":\"predicted\",\"length\":\"99\",\"mass\":\"9848\",\"checksum\":\"FFD19C6CC8E8091B\",\"modified\":\"2017-04-12\",\"sequence_version\":\"3\",\"sequence\":\"MYLTREEERILAGEEGEARAKALEVIVKVGEA\",\"entry_modified\":\"2025-06-18\"},\"nullCount\":{\"protein_id\":0,\"ec_numbers\":202,\"evidence_for_existence\":0,\"length\":0,\"mass\":0,\"checksum\":0,\"modified\":0,\"sequence_version\":0,\"sequence\":0,\"entry_modified\":0}}"}} +{"add":{"path":"part-00007-1ca7b106-08f4-4232-8b29-bf918a55a1d2-c000.snappy.parquet","partitionValues":{},"size":186767,"modificationTime":1766549227675,"dataChange":true,"stats":"{\"numRecords\":625,\"minValues\":{\"protein_id\":\"cdm_prot_001ba1b3-667c-5321-9298\",\"ec_numbers\":\"1.3.1.14\",\"evidence_for_existence\":\"evidence at protein level\",\"length\":\"112\",\"mass\":\"11713\",\"checksum\":\"00177DD8DEC84097\",\"modified\":\"1991-08-01\",\"sequence_version\":\"1\",\"sequence\":\"MADLSTSVAGIRLENPLMLASGILDENGYTML\",\"entry_modified\":\"2024-11-27\"},\"maxValues\":{\"protein_id\":\"cdm_prot_ff86ebef-56b7-5cf7-bf3e\",\"ec_numbers\":\"7.3.2.1\",\"evidence_for_existence\":\"inferred from homology\",\"length\":\"89\",\"mass\":\"9696\",\"checksum\":\"FFE05E8B923ADAF9\",\"modified\":\"2020-10-07\",\"sequence_version\":\"3\",\"sequence\":\"YGEPIGVETLTK\",\"entry_modified\":\"2025-06-18\"},\"nullCount\":{\"protein_id\":0,\"ec_numbers\":11,\"evidence_for_existence\":0,\"length\":0,\"mass\":0,\"checksum\":0,\"modified\":0,\"sequence_version\":0,\"sequence\":0,\"entry_modified\":0}}"}} diff --git a/tests/data/uniprot_archaea/uniprot_db/proteins/_delta_log/00000000000000000003.crc b/tests/data/uniprot_archaea/uniprot_db/proteins/_delta_log/00000000000000000003.crc new file mode 100644 index 0000000..8e7ae1d --- /dev/null +++ b/tests/data/uniprot_archaea/uniprot_db/proteins/_delta_log/00000000000000000003.crc @@ -0,0 +1 @@ +{"txnId":"f0399e2c-571a-4bc2-b23a-44b5fc6bf3b3","tableSizeBytes":4174669,"numFiles":24,"numMetadata":1,"numProtocol":1,"setTransactions":[],"domainMetadata":[],"metadata":{"id":"cf52e24b-c8bf-4037-bc85-ecc2167e63b8","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"protein_id\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"ec_numbers\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"evidence_for_existence\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"length\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"mass\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"checksum\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"modified\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"sequence_version\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"sequence\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"entry_modified\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{},"createdTime":1766549213818},"protocol":{"minReaderVersion":1,"minWriterVersion":2},"allFiles":[{"path":"part-00007-1ca7b106-08f4-4232-8b29-bf918a55a1d2-c000.snappy.parquet","partitionValues":{},"size":186767,"modificationTime":1766549227675,"dataChange":false,"stats":"{\"numRecords\":625,\"minValues\":{\"protein_id\":\"cdm_prot_001ba1b3-667c-5321-9298\",\"ec_numbers\":\"1.3.1.14\",\"evidence_for_existence\":\"evidence at protein level\",\"length\":\"112\",\"mass\":\"11713\",\"checksum\":\"00177DD8DEC84097\",\"modified\":\"1991-08-01\",\"sequence_version\":\"1\",\"sequence\":\"MADLSTSVAGIRLENPLMLASGILDENGYTML\",\"entry_modified\":\"2024-11-27\"},\"maxValues\":{\"protein_id\":\"cdm_prot_ff86ebef-56b7-5cf7-bf3e\",\"ec_numbers\":\"7.3.2.1\",\"evidence_for_existence\":\"inferred from homology\",\"length\":\"89\",\"mass\":\"9696\",\"checksum\":\"FFE05E8B923ADAF9\",\"modified\":\"2020-10-07\",\"sequence_version\":\"3\",\"sequence\":\"YGEPIGVETLTK\",\"entry_modified\":\"2025-06-18\"},\"nullCount\":{\"protein_id\":0,\"ec_numbers\":11,\"evidence_for_existence\":0,\"length\":0,\"mass\":0,\"checksum\":0,\"modified\":0,\"sequence_version\":0,\"sequence\":0,\"entry_modified\":0}}"},{"path":"part-00003-9ceb2e14-936d-4777-923a-c7586d84aff3-c000.snappy.parquet","partitionValues":{},"size":189269,"modificationTime":1766549227675,"dataChange":false,"stats":"{\"numRecords\":625,\"minValues\":{\"protein_id\":\"cdm_prot_000c7e85-7dff-527d-b6db\",\"ec_numbers\":\"1.1.1.299\",\"evidence_for_existence\":\"evidence at protein level\",\"length\":\"1127\",\"mass\":\"10980\",\"checksum\":\"0065BCF4CF0118E3\",\"modified\":\"1988-08-01\",\"sequence_version\":\"1\",\"sequence\":\"AADIFAKFKTSMEVK\",\"entry_modified\":\"2024-11-27\"},\"maxValues\":{\"protein_id\":\"cdm_prot_ffb46e82-abfa-5122-818f\",\"ec_numbers\":\"7.5.2.1\",\"evidence_for_existence\":\"predicted\",\"length\":\"998\",\"mass\":\"92970\",\"checksum\":\"FF118F554983A5C4\",\"modified\":\"2014-06-11\",\"sequence_version\":\"5\",\"sequence\":\"SDTVDIYDAGKILERVII\",\"entry_modified\":\"2025-06-18\"},\"nullCount\":{\"protein_id\":0,\"ec_numbers\":58,\"evidence_for_existence\":0,\"length\":0,\"mass\":0,\"checksum\":0,\"modified\":0,\"sequence_version\":0,\"sequence\":0,\"entry_modified\":0}}"},{"path":"part-00003-cc92fe22-58d3-46d4-863e-0062067ebd2d-c000.snappy.parquet","partitionValues":{},"size":203116,"modificationTime":1766549220178,"dataChange":false,"stats":"{\"numRecords\":625,\"minValues\":{\"protein_id\":\"cdm_prot_004f59e4-6b11-58ae-a6ff\",\"ec_numbers\":\"1.1.1.337\",\"evidence_for_existence\":\"evidence at protein level\",\"length\":\"102\",\"mass\":\"100984\",\"checksum\":\"005A5C21E13342B4\",\"modified\":\"1991-05-01\",\"sequence_version\":\"1\",\"sequence\":\"KKIGAIAAGSAMVASALATGVFAVEKIGDVEG\",\"entry_modified\":\"2024-03-27\"},\"maxValues\":{\"protein_id\":\"cdm_prot_ff2dba94-2092-57d0-9041\",\"ec_numbers\":\"7.2.2.9\",\"evidence_for_existence\":\"inferred from homology\",\"length\":\"99\",\"mass\":\"96911\",\"checksum\":\"FFF22879BB86CECE\",\"modified\":\"2020-08-12\",\"sequence_version\":\"3\",\"sequence\":\"MYVVNPEEKVIEIMKQTGIDLAATLPCDRIKN\",\"entry_modified\":\"2025-06-18\"},\"nullCount\":{\"protein_id\":0,\"ec_numbers\":153,\"evidence_for_existence\":0,\"length\":0,\"mass\":0,\"checksum\":0,\"modified\":0,\"sequence_version\":0,\"sequence\":0,\"entry_modified\":0}}"},{"path":"part-00006-c1bdb3ab-0e57-4470-8330-b13826aab8c6-c000.snappy.parquet","partitionValues":{},"size":163700,"modificationTime":1766549227685,"dataChange":false,"stats":"{\"numRecords\":625,\"minValues\":{\"protein_id\":\"cdm_prot_0102d35c-20a5-5f8d-8175\",\"ec_numbers\":\"1.2.1.41\",\"evidence_for_existence\":\"evidence at protein level\",\"length\":\"105\",\"mass\":\"10297\",\"checksum\":\"003EF50270BA751E\",\"modified\":\"1991-02-01\",\"sequence_version\":\"1\",\"sequence\":\"DTANDPKYGSQYAPQKVNADVDQGVXXXHPEL\",\"entry_modified\":\"2024-03-27\"},\"maxValues\":{\"protein_id\":\"cdm_prot_fffe9d9f-5871-5041-83af\",\"ec_numbers\":\"7.3.2.2\",\"evidence_for_existence\":\"predicted\",\"length\":\"99\",\"mass\":\"9848\",\"checksum\":\"FFD19C6CC8E8091B\",\"modified\":\"2017-04-12\",\"sequence_version\":\"3\",\"sequence\":\"MYLTREEERILAGEEGEARAKALEVIVKVGEA\",\"entry_modified\":\"2025-06-18\"},\"nullCount\":{\"protein_id\":0,\"ec_numbers\":202,\"evidence_for_existence\":0,\"length\":0,\"mass\":0,\"checksum\":0,\"modified\":0,\"sequence_version\":0,\"sequence\":0,\"entry_modified\":0}}"},{"path":"part-00000-9280d073-b085-4be8-85ed-536df50f62da-c000.snappy.parquet","partitionValues":{},"size":196389,"modificationTime":1766549227684,"dataChange":false,"stats":"{\"numRecords\":625,\"minValues\":{\"protein_id\":\"cdm_prot_004539b1-91ae-513e-bef9\",\"ec_numbers\":\"1.1.1.23\",\"evidence_for_existence\":\"evidence at protein level\",\"length\":\"100\",\"mass\":\"10111\",\"checksum\":\"00A04ACE5FAFE54A\",\"modified\":\"1988-01-01\",\"sequence_version\":\"1\",\"sequence\":\"AEMRNFALRDAQGNEIGVFTGKSPRQAALKAA\",\"entry_modified\":\"2024-07-24\"},\"maxValues\":{\"protein_id\":\"cdm_prot_ff289ac2-6f01-59a2-8290\",\"ec_numbers\":\"6.2.1.56\",\"evidence_for_existence\":\"inferred from homology\",\"length\":\"99\",\"mass\":\"9766\",\"checksum\":\"FF5B95B523D29A12\",\"modified\":\"2018-04-25\",\"sequence_version\":\"6\",\"sequence\":\"SNTRNFVLRDEEGNEHGVFTGKQPRQAALKAA\",\"entry_modified\":\"2025-06-18\"},\"nullCount\":{\"protein_id\":0,\"ec_numbers\":72,\"evidence_for_existence\":0,\"length\":0,\"mass\":0,\"checksum\":0,\"modified\":0,\"sequence_version\":0,\"sequence\":0,\"entry_modified\":0}}"},{"path":"part-00005-e1178ae8-d3d7-4662-a488-6f40c9f9eebf-c000.snappy.parquet","partitionValues":{},"size":216665,"modificationTime":1766549220176,"dataChange":false,"stats":"{\"numRecords\":625,\"minValues\":{\"protein_id\":\"cdm_prot_00134cb8-e497-5d40-b662\",\"ec_numbers\":\"1.16.-.-\",\"evidence_for_existence\":\"evidence at protein level\",\"length\":\"102\",\"mass\":\"10006\",\"checksum\":\"000B863998A7D939\",\"modified\":\"1986-07-21\",\"sequence_version\":\"1\",\"sequence\":\"FEPYIYALLKDDSAIEEVKKITAGRHGRVVKV\",\"entry_modified\":\"2023-06-28\"},\"maxValues\":{\"protein_id\":\"cdm_prot_ff5f637d-e74b-5494-9ed8\",\"ec_numbers\":\"7.-.-.-\",\"evidence_for_existence\":\"predicted\",\"length\":\"99\",\"mass\":\"9921\",\"checksum\":\"FF93ED4EDF204C7A\",\"modified\":\"2024-01-24\",\"sequence_version\":\"4\",\"sequence\":\"SVTIDYDKCKGPECAECVNACPMEVFEIQGDK\",\"entry_modified\":\"2025-06-18\"},\"nullCount\":{\"protein_id\":0,\"ec_numbers\":232,\"evidence_for_existence\":0,\"length\":0,\"mass\":0,\"checksum\":0,\"modified\":0,\"sequence_version\":0,\"sequence\":0,\"entry_modified\":0}}"},{"path":"part-00003-58a28129-ebb1-40c8-9a7e-78c52cbe7a39-c000.snappy.parquet","partitionValues":{},"size":109388,"modificationTime":1766549234742,"dataChange":false,"stats":"{\"numRecords\":625,\"minValues\":{\"protein_id\":\"cdm_prot_00206b22-8b92-5222-ab7c\",\"ec_numbers\":\"2.1.1.-\",\"evidence_for_existence\":\"evidence at protein level\",\"length\":\"101\",\"mass\":\"10028\",\"checksum\":\"00ACFE09073E30D2\",\"modified\":\"1990-01-01\",\"sequence_version\":\"1\",\"sequence\":\"MAAEKVREIEVPQGVTVTVSGATLTTKGQKGQ\",\"entry_modified\":\"2024-03-27\"},\"maxValues\":{\"protein_id\":\"cdm_prot_ffede36e-7e42-5dcc-abde\",\"ec_numbers\":\"7.2.1.-\",\"evidence_for_existence\":\"inferred from homology\",\"length\":\"99\",\"mass\":\"9903\",\"checksum\":\"FF9BFCDE5D63705A\",\"modified\":\"2014-07-09\",\"sequence_version\":\"5\",\"sequence\":\"MYVRFEVPEDMQNEALSLLEKVRESGKVKKGT\",\"entry_modified\":\"2025-06-18\"},\"nullCount\":{\"protein_id\":0,\"ec_numbers\":462,\"evidence_for_existence\":0,\"length\":0,\"mass\":0,\"checksum\":0,\"modified\":0,\"sequence_version\":0,\"sequence\":0,\"entry_modified\":0}}"},{"path":"part-00006-e6e3deb7-a199-4e40-944a-6b52a9c809bb-c000.snappy.parquet","partitionValues":{},"size":209852,"modificationTime":1766549220187,"dataChange":false,"stats":"{\"numRecords\":625,\"minValues\":{\"protein_id\":\"cdm_prot_003df674-48d1-588a-b194\",\"ec_numbers\":\"1.1.1.261\",\"evidence_for_existence\":\"evidence at protein level\",\"length\":\"102\",\"mass\":\"10129\",\"checksum\":\"0037152CDD997446\",\"modified\":\"1989-07-01\",\"sequence_version\":\"1\",\"sequence\":\"MAAVQIDDYGPWTTEPAPRRETDLQALQARLF\",\"entry_modified\":\"2024-03-27\"},\"maxValues\":{\"protein_id\":\"cdm_prot_ffeae0f1-9785-5eaf-ad23\",\"ec_numbers\":\"6.3.5.7\",\"evidence_for_existence\":\"predicted\",\"length\":\"97\",\"mass\":\"9786\",\"checksum\":\"FFBDC4AB39BB53E1\",\"modified\":\"2024-05-29\",\"sequence_version\":\"4\",\"sequence\":\"MYTSFHRIDLPRTIVVGGGVLDKAGGYVSGVA\",\"entry_modified\":\"2025-06-18\"},\"nullCount\":{\"protein_id\":0,\"ec_numbers\":142,\"evidence_for_existence\":0,\"length\":0,\"mass\":0,\"checksum\":0,\"modified\":0,\"sequence_version\":0,\"sequence\":0,\"entry_modified\":0}}"},{"path":"part-00004-638e48aa-2700-4992-be7a-7d56903efbec-c000.snappy.parquet","partitionValues":{},"size":126900,"modificationTime":1766549234755,"dataChange":false,"stats":"{\"numRecords\":625,\"minValues\":{\"protein_id\":\"cdm_prot_005318fc-0c32-5b4a-b952\",\"ec_numbers\":\"2.7.7.6\",\"evidence_for_existence\":\"evidence at protein level\",\"length\":\"100\",\"mass\":\"100075\",\"checksum\":\"0013EF6F9053FB44\",\"modified\":\"1989-07-01\",\"sequence_version\":\"1\",\"sequence\":\"EFTSSTGDTVMIDEGALVEGTIDEDAVGAFGG\",\"entry_modified\":\"2024-03-27\"},\"maxValues\":{\"protein_id\":\"cdm_prot_ffbd3905-3836-5893-bd75\",\"ec_numbers\":\"5.3.1.6\",\"evidence_for_existence\":\"predicted\",\"length\":\"99\",\"mass\":\"9989\",\"checksum\":\"FDCB0BA604D3B38D\",\"modified\":\"2024-01-24\",\"sequence_version\":\"3\",\"sequence\":\"MYYVTPRQLVFPGDVIATADSKVEGPVYLDNG\",\"entry_modified\":\"2025-06-18\"},\"nullCount\":{\"protein_id\":0,\"ec_numbers\":117,\"evidence_for_existence\":0,\"length\":0,\"mass\":0,\"checksum\":0,\"modified\":0,\"sequence_version\":0,\"sequence\":0,\"entry_modified\":0}}"},{"path":"part-00002-87a46f98-c10e-487d-b09c-260ae5569b04-c000.snappy.parquet","partitionValues":{},"size":192054,"modificationTime":1766549227674,"dataChange":false,"stats":"{\"numRecords\":625,\"minValues\":{\"protein_id\":\"cdm_prot_00546ee8-4f73-5c85-8b07\",\"ec_numbers\":\"1.1.1.205\",\"evidence_for_existence\":\"evidence at protein level\",\"length\":\"139\",\"mass\":\"100393\",\"checksum\":\"009D64A921BD3176\",\"modified\":\"1994-10-01\",\"sequence_version\":\"1\",\"sequence\":\"MACSRARARAYSSAANLGPGFDALAVALDAYY\",\"entry_modified\":\"2024-11-27\"},\"maxValues\":{\"protein_id\":\"cdm_prot_ffce40a0-b890-5e5f-b455\",\"ec_numbers\":\"7.2.2.6\",\"evidence_for_existence\":\"predicted\",\"length\":\"876\",\"mass\":\"99554\",\"checksum\":\"FF6978208DE0F5B1\",\"modified\":\"2023-02-22\",\"sequence_version\":\"3\",\"sequence\":\"MYVLGIESTAHTIGVGIVNERAEVLANEMHTY\",\"entry_modified\":\"2025-06-18\"},\"nullCount\":{\"protein_id\":0,\"ec_numbers\":50,\"evidence_for_existence\":0,\"length\":0,\"mass\":0,\"checksum\":0,\"modified\":0,\"sequence_version\":0,\"sequence\":0,\"entry_modified\":0}}"},{"path":"part-00006-411b6dd6-7451-4814-9649-b1a37992bad6-c000.snappy.parquet","partitionValues":{},"size":124378,"modificationTime":1766549234744,"dataChange":false,"stats":"{\"numRecords\":625,\"minValues\":{\"protein_id\":\"cdm_prot_00219a7c-c28c-5aaf-928e\",\"ec_numbers\":\"2.1.1.-\",\"evidence_for_existence\":\"evidence at protein level\",\"length\":\"123\",\"mass\":\"102456\",\"checksum\":\"009F46F13182EC92\",\"modified\":\"1990-01-01\",\"sequence_version\":\"1\",\"sequence\":\"ELIGAANRDTKSFSINRKDAKERVAKAAR\",\"entry_modified\":\"2024-03-27\"},\"maxValues\":{\"protein_id\":\"cdm_prot_fe67f279-f090-5c4e-b38d\",\"ec_numbers\":\"6.5.1.8\",\"evidence_for_existence\":\"inferred from homology\",\"length\":\"988\",\"mass\":\"98125\",\"checksum\":\"FFF8965F6497118A\",\"modified\":\"2011-09-21\",\"sequence_version\":\"3\",\"sequence\":\"TKSSRYCTTEGXDTALRHXPIGANQSAFKSKR\",\"entry_modified\":\"2025-06-18\"},\"nullCount\":{\"protein_id\":0,\"ec_numbers\":533,\"evidence_for_existence\":0,\"length\":0,\"mass\":0,\"checksum\":0,\"modified\":0,\"sequence_version\":0,\"sequence\":0,\"entry_modified\":0}}"},{"path":"part-00005-5052056a-b12d-4506-8172-512fb3e152c7-c000.snappy.parquet","partitionValues":{},"size":90406,"modificationTime":1766549234740,"dataChange":false,"stats":"{\"numRecords\":625,\"minValues\":{\"protein_id\":\"cdm_prot_0013ba57-fc41-51cc-bc95\",\"evidence_for_existence\":\"evidence at protein level\",\"length\":\"100\",\"mass\":\"11261\",\"checksum\":\"0082DFF152B8CEB8\",\"modified\":\"1990-01-01\",\"sequence_version\":\"1\",\"sequence\":\"MAAAKGKKGAKAPSTKKSDYYKVEGNSVTRTK\",\"entry_modified\":\"2025-02-05\"},\"maxValues\":{\"protein_id\":\"cdm_prot_ffe27896-0bd4-50b4-9cff\",\"evidence_for_existence\":\"inferred from homology\",\"length\":\"99\",\"mass\":\"9818\",\"checksum\":\"FE858C20D8AB32DF\",\"modified\":\"2017-06-07\",\"sequence_version\":\"4\",\"sequence\":\"MYTRNIGLDVKLPESECNDPHCPYHGKLSVRG\",\"entry_modified\":\"2025-06-18\"},\"nullCount\":{\"protein_id\":0,\"ec_numbers\":625,\"evidence_for_existence\":0,\"length\":0,\"mass\":0,\"checksum\":0,\"modified\":0,\"sequence_version\":0,\"sequence\":0,\"entry_modified\":0}}"},{"path":"part-00000-b4de8d2f-f466-4990-9d63-a326716cbea4-c000.snappy.parquet","partitionValues":{},"size":221094,"modificationTime":1766549220177,"dataChange":false,"stats":"{\"numRecords\":625,\"minValues\":{\"protein_id\":\"cdm_prot_00036120-577c-5bbc-ab91\",\"ec_numbers\":\"1.1.1.-\",\"evidence_for_existence\":\"evidence at protein level\",\"length\":\"100\",\"mass\":\"10015\",\"checksum\":\"002B834BC5F9D6AD\",\"modified\":\"1991-02-01\",\"sequence_version\":\"1\",\"sequence\":\"MAAESQLKRVIETLRRLGIEEVLKLERRDPQY\",\"entry_modified\":\"2024-11-27\"},\"maxValues\":{\"protein_id\":\"cdm_prot_ffa4eefe-b062-5f26-8f77\",\"ec_numbers\":\"7.1.2.2\",\"evidence_for_existence\":\"inferred from homology\",\"length\":\"990\",\"mass\":\"9995\",\"checksum\":\"FF95A3D3020413CC\",\"modified\":\"2025-04-09\",\"sequence_version\":\"4\",\"sequence\":\"NTNETKVRFTGETAKIGVSLEMLGRIFNGAGK\",\"entry_modified\":\"2025-06-18\"},\"nullCount\":{\"protein_id\":0,\"ec_numbers\":320,\"evidence_for_existence\":0,\"length\":0,\"mass\":0,\"checksum\":0,\"modified\":0,\"sequence_version\":0,\"sequence\":0,\"entry_modified\":0}}"},{"path":"part-00004-0cd337df-6b78-40e1-9e72-4d902a79f0b3-c000.snappy.parquet","partitionValues":{},"size":160888,"modificationTime":1766549227664,"dataChange":false,"stats":"{\"numRecords\":625,\"minValues\":{\"protein_id\":\"cdm_prot_007c34e9-4de0-5736-985f\",\"ec_numbers\":\"1.12.99.-\",\"evidence_for_existence\":\"evidence at protein level\",\"length\":\"100\",\"mass\":\"100504\",\"checksum\":\"0082DA6CD15CD5FD\",\"modified\":\"1988-01-01\",\"sequence_version\":\"1\",\"sequence\":\"EECEREKPPEGLAMDFIKRQFEIGEPEKDLCL\",\"entry_modified\":\"2024-05-29\"},\"maxValues\":{\"protein_id\":\"cdm_prot_ffc892c3-0651-5e3c-998a\",\"ec_numbers\":\"7.2.1.4\",\"evidence_for_existence\":\"predicted\",\"length\":\"984\",\"mass\":\"99919\",\"checksum\":\"FF6F29ADD080060C\",\"modified\":\"2016-11-30\",\"sequence_version\":\"6\",\"sequence\":\"TLQGQKDVIELLKEEGLRDKIKVMVGGAPATQ\",\"entry_modified\":\"2025-06-18\"},\"nullCount\":{\"protein_id\":0,\"ec_numbers\":108,\"evidence_for_existence\":0,\"length\":0,\"mass\":0,\"checksum\":0,\"modified\":0,\"sequence_version\":0,\"sequence\":0,\"entry_modified\":0}}"},{"path":"part-00000-ca8a05fd-be38-4a6e-83d5-5c50f3c51145-c000.snappy.parquet","partitionValues":{},"size":194764,"modificationTime":1766549234751,"dataChange":false,"stats":"{\"numRecords\":625,\"minValues\":{\"protein_id\":\"cdm_prot_006f3d67-56f0-5829-b099\",\"ec_numbers\":\"1.1.1.302\",\"evidence_for_existence\":\"evidence at protein level\",\"length\":\"1005\",\"mass\":\"100087\",\"checksum\":\"0087DF7906911AC1\",\"modified\":\"1995-02-01\",\"sequence_version\":\"1\",\"sequence\":\"MADQSSHQKYEFKKKLESLRGKKGRGTELISL\",\"entry_modified\":\"2024-03-27\"},\"maxValues\":{\"protein_id\":\"cdm_prot_ffcedd0e-face-5ec6-8e0b\",\"ec_numbers\":\"6.3.4.20\",\"evidence_for_existence\":\"predicted\",\"length\":\"993\",\"mass\":\"9986\",\"checksum\":\"FFC1B0D4A0E0E2DE\",\"modified\":\"2024-01-24\",\"sequence_version\":\"5\",\"sequence\":\"MYYTRITQIERLTDEVATLYFSISLRSYPGQF\",\"entry_modified\":\"2025-06-18\"},\"nullCount\":{\"protein_id\":0,\"ec_numbers\":334,\"evidence_for_existence\":0,\"length\":0,\"mass\":0,\"checksum\":0,\"modified\":0,\"sequence_version\":0,\"sequence\":0,\"entry_modified\":0}}"},{"path":"part-00005-831e6b90-0dae-4d62-a116-f1d7b20d680e-c000.snappy.parquet","partitionValues":{},"size":152395,"modificationTime":1766549227692,"dataChange":false,"stats":"{\"numRecords\":625,\"minValues\":{\"protein_id\":\"cdm_prot_00957fe6-8bfd-5ecd-9b08\",\"ec_numbers\":\"1.1.1.169\",\"evidence_for_existence\":\"evidence at protein level\",\"length\":\"111\",\"mass\":\"10083\",\"checksum\":\"0033DA1187FEA713\",\"modified\":\"1990-01-01\",\"sequence_version\":\"1\",\"sequence\":\"DINGGGATLPQKLYQTSGVLTAGFAPYIGVGS\",\"entry_modified\":\"2024-10-02\"},\"maxValues\":{\"protein_id\":\"cdm_prot_ffdecc1a-6ee9-59b2-9c49\",\"ec_numbers\":\"7.1.1.-\",\"evidence_for_existence\":\"inferred from homology\",\"length\":\"93\",\"mass\":\"9565\",\"checksum\":\"FFEA81938BF49096\",\"modified\":\"2016-11-02\",\"sequence_version\":\"6\",\"sequence\":\"MYYHIIKSPIYPILLAGDEKGLKHLIFLKDER\",\"entry_modified\":\"2025-06-18\"},\"nullCount\":{\"protein_id\":0,\"ec_numbers\":208,\"evidence_for_existence\":0,\"length\":0,\"mass\":0,\"checksum\":0,\"modified\":0,\"sequence_version\":0,\"sequence\":0,\"entry_modified\":0}}"},{"path":"part-00001-14de6dd7-de07-482f-be2f-bc12adc49f95-c000.snappy.parquet","partitionValues":{},"size":172301,"modificationTime":1766549227688,"dataChange":false,"stats":"{\"numRecords\":625,\"minValues\":{\"protein_id\":\"cdm_prot_00df9ef5-d9b4-5654-9d2a\",\"ec_numbers\":\"1.1.1.383\",\"evidence_for_existence\":\"evidence at protein level\",\"length\":\"102\",\"mass\":\"110945\",\"checksum\":\"0022ADF92AAB288B\",\"modified\":\"1993-10-01\",\"sequence_version\":\"1\",\"sequence\":\"EDEFGTLYAAFDNMRANLRTQISEAETAKQEA\",\"entry_modified\":\"2024-03-27\"},\"maxValues\":{\"protein_id\":\"cdm_prot_ffa14ace-ea75-5cc6-86f2\",\"ec_numbers\":\"7.2.3.1\",\"evidence_for_existence\":\"predicted\",\"length\":\"992\",\"mass\":\"927738\",\"checksum\":\"FF7E0EA466074645\",\"modified\":\"2018-11-07\",\"sequence_version\":\"3\",\"sequence\":\"MYPILVNSLLTCPIVKKGEYNYFVHPITDGIP\",\"entry_modified\":\"2025-06-18\"},\"nullCount\":{\"protein_id\":0,\"ec_numbers\":341,\"evidence_for_existence\":0,\"length\":0,\"mass\":0,\"checksum\":0,\"modified\":0,\"sequence_version\":0,\"sequence\":0,\"entry_modified\":0}}"},{"path":"part-00004-159d2ae9-d5d1-40ef-8b8c-e8462f482af1-c000.snappy.parquet","partitionValues":{},"size":232956,"modificationTime":1766549220180,"dataChange":false,"stats":"{\"numRecords\":625,\"minValues\":{\"protein_id\":\"cdm_prot_008bae16-5b42-5bfd-a15c\",\"ec_numbers\":\"1.1.1.-\",\"evidence_for_existence\":\"evidence at protein level\",\"length\":\"1080\",\"mass\":\"10065\",\"checksum\":\"0060B82920BA478E\",\"modified\":\"1991-05-01\",\"sequence_version\":\"1\",\"sequence\":\"MADDDTTARRPVLSSFGTLGRGWLGVLALLLV\",\"entry_modified\":\"2024-05-29\"},\"maxValues\":{\"protein_id\":\"cdm_prot_fedf6b23-d61c-56dc-b256\",\"ec_numbers\":\"6.5.1.7\",\"evidence_for_existence\":\"predicted\",\"length\":\"959\",\"mass\":\"92367\",\"checksum\":\"FFD61667871BE291\",\"modified\":\"2024-01-24\",\"sequence_version\":\"4\",\"sequence\":\"VEQDPYEIVIKQLERAAQYMEISE\",\"entry_modified\":\"2025-06-18\"},\"nullCount\":{\"protein_id\":0,\"ec_numbers\":67,\"evidence_for_existence\":0,\"length\":0,\"mass\":0,\"checksum\":0,\"modified\":0,\"sequence_version\":0,\"sequence\":0,\"entry_modified\":0}}"},{"path":"part-00007-59fe7ec2-82c3-4010-b399-2192a7f6a694-c000.snappy.parquet","partitionValues":{},"size":203689,"modificationTime":1766549220181,"dataChange":false,"stats":"{\"numRecords\":625,\"minValues\":{\"protein_id\":\"cdm_prot_00625fdf-0992-5594-a800\",\"ec_numbers\":\"1.-.-.-\",\"evidence_for_existence\":\"evidence at protein level\",\"length\":\"105\",\"mass\":\"10014\",\"checksum\":\"0034823DA6E9ED5C\",\"modified\":\"1990-01-01\",\"sequence_version\":\"1\",\"sequence\":\"MADELSEKSVEGTEEDGESAPAEGTTEGVPVD\",\"entry_modified\":\"2024-07-24\"},\"maxValues\":{\"protein_id\":\"cdm_prot_ffcf5789-8bfa-5ca0-914e\",\"ec_numbers\":\"7.5.2.-\",\"evidence_for_existence\":\"inferred from homology\",\"length\":\"83\",\"mass\":\"9939\",\"checksum\":\"FFB678C2FEE2F209\",\"modified\":\"2023-02-22\",\"sequence_version\":\"4\",\"sequence\":\"MYTLVLLRHGESTWNKENRFTGWTDVDLSKDG\",\"entry_modified\":\"2025-06-18\"},\"nullCount\":{\"protein_id\":0,\"ec_numbers\":124,\"evidence_for_existence\":0,\"length\":0,\"mass\":0,\"checksum\":0,\"modified\":0,\"sequence_version\":0,\"sequence\":0,\"entry_modified\":0}}"},{"path":"part-00001-bd46662b-bc66-4729-afd0-35417fd10fc1-c000.snappy.parquet","partitionValues":{},"size":194687,"modificationTime":1766549220169,"dataChange":false,"stats":"{\"numRecords\":625,\"minValues\":{\"protein_id\":\"cdm_prot_000e0a71-14e8-5d5e-8144\",\"ec_numbers\":\"1.1.1.25\",\"evidence_for_existence\":\"evidence at protein level\",\"length\":\"103\",\"mass\":\"10987\",\"checksum\":\"00041F603DFDD5A3\",\"modified\":\"1990-01-01\",\"sequence_version\":\"1\",\"sequence\":\"MAAIIVHGGAGTIRKEERIPKVIEGVREAVLA\",\"entry_modified\":\"2024-07-24\"},\"maxValues\":{\"protein_id\":\"cdm_prot_ffed2e1d-fe14-5556-b067\",\"ec_numbers\":\"7.3.2.7\",\"evidence_for_existence\":\"predicted\",\"length\":\"815\",\"mass\":\"90860\",\"checksum\":\"FFB8BC932E9E15EC\",\"modified\":\"2025-02-05\",\"sequence_version\":\"4\",\"sequence\":\"MYTMELRFIRGGVCAVDGVLAAGCREGKYGVG\",\"entry_modified\":\"2025-06-18\"},\"nullCount\":{\"protein_id\":0,\"ec_numbers\":58,\"evidence_for_existence\":0,\"length\":0,\"mass\":0,\"checksum\":0,\"modified\":0,\"sequence_version\":0,\"sequence\":0,\"entry_modified\":0}}"},{"path":"part-00001-7a1679d7-407a-42ee-abba-19074356776e-c000.snappy.parquet","partitionValues":{},"size":108189,"modificationTime":1766549234752,"dataChange":false,"stats":"{\"numRecords\":625,\"minValues\":{\"protein_id\":\"cdm_prot_00365069-eeac-5442-8f11\",\"ec_numbers\":\"2.5.1.78\",\"evidence_for_existence\":\"evidence at protein level\",\"length\":\"100\",\"mass\":\"10084\",\"checksum\":\"00876DF92F290CEE\",\"modified\":\"1988-11-01\",\"sequence_version\":\"1\",\"sequence\":\"MAAIEVGRVCIKTLGREAGNTCVIVEVLDKNF\",\"entry_modified\":\"2024-03-27\"},\"maxValues\":{\"protein_id\":\"cdm_prot_ff216b32-af2a-545f-8e61\",\"ec_numbers\":\"6.3.2.-\",\"evidence_for_existence\":\"predicted\",\"length\":\"99\",\"mass\":\"9818\",\"checksum\":\"FFBDE7DF93BAC8FB\",\"modified\":\"2017-03-15\",\"sequence_version\":\"4\",\"sequence\":\"MWRISRVELENFRSYRGAHRLELGDVNLLWGR\",\"entry_modified\":\"2025-06-18\"},\"nullCount\":{\"protein_id\":0,\"ec_numbers\":527,\"evidence_for_existence\":0,\"length\":0,\"mass\":0,\"checksum\":0,\"modified\":0,\"sequence_version\":0,\"sequence\":0,\"entry_modified\":0}}"},{"path":"part-00002-c881f350-4b6e-449b-b6e2-2544a424c9bc-c000.snappy.parquet","partitionValues":{},"size":226829,"modificationTime":1766549220177,"dataChange":false,"stats":"{\"numRecords\":625,\"minValues\":{\"protein_id\":\"cdm_prot_00030730-9147-5351-b68c\",\"ec_numbers\":\"1.11.1.6\",\"evidence_for_existence\":\"evidence at protein level\",\"length\":\"101\",\"mass\":\"10049\",\"checksum\":\"001DEFBE3589FDAF\",\"modified\":\"1989-07-01\",\"sequence_version\":\"1\",\"sequence\":\"GIGTLLMLIGTFYFIARGWGVTDKKAREYYAI\",\"entry_modified\":\"2023-06-28\"},\"maxValues\":{\"protein_id\":\"cdm_prot_fde84f0d-6520-56ad-b61f\",\"ec_numbers\":\"7.6.2.8\",\"evidence_for_existence\":\"predicted\",\"length\":\"99\",\"mass\":\"9979\",\"checksum\":\"FFB7F1B1543E0C36\",\"modified\":\"2015-01-07\",\"sequence_version\":\"3\",\"sequence\":\"PMILLALGLLADTDIASLFTAITMDIGMCVTG\",\"entry_modified\":\"2025-06-18\"},\"nullCount\":{\"protein_id\":0,\"ec_numbers\":195,\"evidence_for_existence\":0,\"length\":0,\"mass\":0,\"checksum\":0,\"modified\":0,\"sequence_version\":0,\"sequence\":0,\"entry_modified\":0}}"},{"path":"part-00007-d00f5a40-677b-43fe-9fa8-949fc983f149-c000.snappy.parquet","partitionValues":{},"size":202124,"modificationTime":1766549234744,"dataChange":false,"stats":"{\"numRecords\":625,\"minValues\":{\"protein_id\":\"cdm_prot_00019029-8c98-5477-9dd1\",\"ec_numbers\":\"1.1.1.95\",\"evidence_for_existence\":\"evidence at protein level\",\"length\":\"100\",\"mass\":\"100033\",\"checksum\":\"002511155E18D68A\",\"modified\":\"1990-01-01\",\"sequence_version\":\"1\",\"sequence\":\"GYVNGLESAEETLAENRESGDFGSSAAAMGNV\",\"entry_modified\":\"2023-06-28\"},\"maxValues\":{\"protein_id\":\"cdm_prot_ff606768-2e8f-543d-abc5\",\"ec_numbers\":\"6.2.1.5\",\"evidence_for_existence\":\"inferred from homology\",\"length\":\"99\",\"mass\":\"99972\",\"checksum\":\"FF11E8597C6A833B\",\"modified\":\"2023-02-22\",\"sequence_version\":\"3\",\"sequence\":\"SKLLDNLRDAVRKFLTGSSSYDKAVEDFIKEL\",\"entry_modified\":\"2025-06-18\"},\"nullCount\":{\"protein_id\":0,\"ec_numbers\":253,\"evidence_for_existence\":0,\"length\":0,\"mass\":0,\"checksum\":0,\"modified\":0,\"sequence_version\":0,\"sequence\":0,\"entry_modified\":0}}"},{"path":"part-00002-f9cbb606-eb09-4474-bd73-334b02a5e53c-c000.snappy.parquet","partitionValues":{},"size":95869,"modificationTime":1766549234742,"dataChange":false,"stats":"{\"numRecords\":625,\"minValues\":{\"protein_id\":\"cdm_prot_0061faab-bd88-562c-8a31\",\"evidence_for_existence\":\"evidence at protein level\",\"length\":\"100\",\"mass\":\"10031\",\"checksum\":\"00AA5F03869ECAEE\",\"modified\":\"1989-07-01\",\"sequence_version\":\"1\",\"sequence\":\"MAAAREAEKKEEGQKQVKLYAIIRIRGRVDVH\",\"entry_modified\":\"2024-03-27\"},\"maxValues\":{\"protein_id\":\"cdm_prot_fe9c77d9-be27-518e-8e0c\",\"evidence_for_existence\":\"inferred from homology\",\"length\":\"99\",\"mass\":\"9974\",\"checksum\":\"FFD30262EE0545E8\",\"modified\":\"2011-12-14\",\"sequence_version\":\"4\",\"sequence\":\"MYRKMEVSLSKDLRKKYGIRSFPVIMGDVVKV\",\"entry_modified\":\"2025-06-18\"},\"nullCount\":{\"protein_id\":0,\"ec_numbers\":625,\"evidence_for_existence\":0,\"length\":0,\"mass\":0,\"checksum\":0,\"modified\":0,\"sequence_version\":0,\"sequence\":0,\"entry_modified\":0}}"}]} diff --git a/tests/data/uniprot_archaea/uniprot_db/proteins/_delta_log/00000000000000000003.json b/tests/data/uniprot_archaea/uniprot_db/proteins/_delta_log/00000000000000000003.json new file mode 100644 index 0000000..1595602 --- /dev/null +++ b/tests/data/uniprot_archaea/uniprot_db/proteins/_delta_log/00000000000000000003.json @@ -0,0 +1,9 @@ +{"commitInfo":{"timestamp":1766549234759,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":2,"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{"numFiles":"8","numOutputRows":"5000","numOutputBytes":"1052018"},"engineInfo":"Apache-Spark/4.0.1 Delta-Lake/4.0.0","txnId":"f0399e2c-571a-4bc2-b23a-44b5fc6bf3b3"}} +{"add":{"path":"part-00000-ca8a05fd-be38-4a6e-83d5-5c50f3c51145-c000.snappy.parquet","partitionValues":{},"size":194764,"modificationTime":1766549234751,"dataChange":true,"stats":"{\"numRecords\":625,\"minValues\":{\"protein_id\":\"cdm_prot_006f3d67-56f0-5829-b099\",\"ec_numbers\":\"1.1.1.302\",\"evidence_for_existence\":\"evidence at protein level\",\"length\":\"1005\",\"mass\":\"100087\",\"checksum\":\"0087DF7906911AC1\",\"modified\":\"1995-02-01\",\"sequence_version\":\"1\",\"sequence\":\"MADQSSHQKYEFKKKLESLRGKKGRGTELISL\",\"entry_modified\":\"2024-03-27\"},\"maxValues\":{\"protein_id\":\"cdm_prot_ffcedd0e-face-5ec6-8e0b\",\"ec_numbers\":\"6.3.4.20\",\"evidence_for_existence\":\"predicted\",\"length\":\"993\",\"mass\":\"9986\",\"checksum\":\"FFC1B0D4A0E0E2DE\",\"modified\":\"2024-01-24\",\"sequence_version\":\"5\",\"sequence\":\"MYYTRITQIERLTDEVATLYFSISLRSYPGQF\",\"entry_modified\":\"2025-06-18\"},\"nullCount\":{\"protein_id\":0,\"ec_numbers\":334,\"evidence_for_existence\":0,\"length\":0,\"mass\":0,\"checksum\":0,\"modified\":0,\"sequence_version\":0,\"sequence\":0,\"entry_modified\":0}}"}} +{"add":{"path":"part-00001-7a1679d7-407a-42ee-abba-19074356776e-c000.snappy.parquet","partitionValues":{},"size":108189,"modificationTime":1766549234752,"dataChange":true,"stats":"{\"numRecords\":625,\"minValues\":{\"protein_id\":\"cdm_prot_00365069-eeac-5442-8f11\",\"ec_numbers\":\"2.5.1.78\",\"evidence_for_existence\":\"evidence at protein level\",\"length\":\"100\",\"mass\":\"10084\",\"checksum\":\"00876DF92F290CEE\",\"modified\":\"1988-11-01\",\"sequence_version\":\"1\",\"sequence\":\"MAAIEVGRVCIKTLGREAGNTCVIVEVLDKNF\",\"entry_modified\":\"2024-03-27\"},\"maxValues\":{\"protein_id\":\"cdm_prot_ff216b32-af2a-545f-8e61\",\"ec_numbers\":\"6.3.2.-\",\"evidence_for_existence\":\"predicted\",\"length\":\"99\",\"mass\":\"9818\",\"checksum\":\"FFBDE7DF93BAC8FB\",\"modified\":\"2017-03-15\",\"sequence_version\":\"4\",\"sequence\":\"MWRISRVELENFRSYRGAHRLELGDVNLLWGR\",\"entry_modified\":\"2025-06-18\"},\"nullCount\":{\"protein_id\":0,\"ec_numbers\":527,\"evidence_for_existence\":0,\"length\":0,\"mass\":0,\"checksum\":0,\"modified\":0,\"sequence_version\":0,\"sequence\":0,\"entry_modified\":0}}"}} +{"add":{"path":"part-00002-f9cbb606-eb09-4474-bd73-334b02a5e53c-c000.snappy.parquet","partitionValues":{},"size":95869,"modificationTime":1766549234742,"dataChange":true,"stats":"{\"numRecords\":625,\"minValues\":{\"protein_id\":\"cdm_prot_0061faab-bd88-562c-8a31\",\"evidence_for_existence\":\"evidence at protein level\",\"length\":\"100\",\"mass\":\"10031\",\"checksum\":\"00AA5F03869ECAEE\",\"modified\":\"1989-07-01\",\"sequence_version\":\"1\",\"sequence\":\"MAAAREAEKKEEGQKQVKLYAIIRIRGRVDVH\",\"entry_modified\":\"2024-03-27\"},\"maxValues\":{\"protein_id\":\"cdm_prot_fe9c77d9-be27-518e-8e0c\",\"evidence_for_existence\":\"inferred from homology\",\"length\":\"99\",\"mass\":\"9974\",\"checksum\":\"FFD30262EE0545E8\",\"modified\":\"2011-12-14\",\"sequence_version\":\"4\",\"sequence\":\"MYRKMEVSLSKDLRKKYGIRSFPVIMGDVVKV\",\"entry_modified\":\"2025-06-18\"},\"nullCount\":{\"protein_id\":0,\"ec_numbers\":625,\"evidence_for_existence\":0,\"length\":0,\"mass\":0,\"checksum\":0,\"modified\":0,\"sequence_version\":0,\"sequence\":0,\"entry_modified\":0}}"}} +{"add":{"path":"part-00003-58a28129-ebb1-40c8-9a7e-78c52cbe7a39-c000.snappy.parquet","partitionValues":{},"size":109388,"modificationTime":1766549234742,"dataChange":true,"stats":"{\"numRecords\":625,\"minValues\":{\"protein_id\":\"cdm_prot_00206b22-8b92-5222-ab7c\",\"ec_numbers\":\"2.1.1.-\",\"evidence_for_existence\":\"evidence at protein level\",\"length\":\"101\",\"mass\":\"10028\",\"checksum\":\"00ACFE09073E30D2\",\"modified\":\"1990-01-01\",\"sequence_version\":\"1\",\"sequence\":\"MAAEKVREIEVPQGVTVTVSGATLTTKGQKGQ\",\"entry_modified\":\"2024-03-27\"},\"maxValues\":{\"protein_id\":\"cdm_prot_ffede36e-7e42-5dcc-abde\",\"ec_numbers\":\"7.2.1.-\",\"evidence_for_existence\":\"inferred from homology\",\"length\":\"99\",\"mass\":\"9903\",\"checksum\":\"FF9BFCDE5D63705A\",\"modified\":\"2014-07-09\",\"sequence_version\":\"5\",\"sequence\":\"MYVRFEVPEDMQNEALSLLEKVRESGKVKKGT\",\"entry_modified\":\"2025-06-18\"},\"nullCount\":{\"protein_id\":0,\"ec_numbers\":462,\"evidence_for_existence\":0,\"length\":0,\"mass\":0,\"checksum\":0,\"modified\":0,\"sequence_version\":0,\"sequence\":0,\"entry_modified\":0}}"}} +{"add":{"path":"part-00004-638e48aa-2700-4992-be7a-7d56903efbec-c000.snappy.parquet","partitionValues":{},"size":126900,"modificationTime":1766549234755,"dataChange":true,"stats":"{\"numRecords\":625,\"minValues\":{\"protein_id\":\"cdm_prot_005318fc-0c32-5b4a-b952\",\"ec_numbers\":\"2.7.7.6\",\"evidence_for_existence\":\"evidence at protein level\",\"length\":\"100\",\"mass\":\"100075\",\"checksum\":\"0013EF6F9053FB44\",\"modified\":\"1989-07-01\",\"sequence_version\":\"1\",\"sequence\":\"EFTSSTGDTVMIDEGALVEGTIDEDAVGAFGG\",\"entry_modified\":\"2024-03-27\"},\"maxValues\":{\"protein_id\":\"cdm_prot_ffbd3905-3836-5893-bd75\",\"ec_numbers\":\"5.3.1.6\",\"evidence_for_existence\":\"predicted\",\"length\":\"99\",\"mass\":\"9989\",\"checksum\":\"FDCB0BA604D3B38D\",\"modified\":\"2024-01-24\",\"sequence_version\":\"3\",\"sequence\":\"MYYVTPRQLVFPGDVIATADSKVEGPVYLDNG\",\"entry_modified\":\"2025-06-18\"},\"nullCount\":{\"protein_id\":0,\"ec_numbers\":117,\"evidence_for_existence\":0,\"length\":0,\"mass\":0,\"checksum\":0,\"modified\":0,\"sequence_version\":0,\"sequence\":0,\"entry_modified\":0}}"}} +{"add":{"path":"part-00005-5052056a-b12d-4506-8172-512fb3e152c7-c000.snappy.parquet","partitionValues":{},"size":90406,"modificationTime":1766549234740,"dataChange":true,"stats":"{\"numRecords\":625,\"minValues\":{\"protein_id\":\"cdm_prot_0013ba57-fc41-51cc-bc95\",\"evidence_for_existence\":\"evidence at protein level\",\"length\":\"100\",\"mass\":\"11261\",\"checksum\":\"0082DFF152B8CEB8\",\"modified\":\"1990-01-01\",\"sequence_version\":\"1\",\"sequence\":\"MAAAKGKKGAKAPSTKKSDYYKVEGNSVTRTK\",\"entry_modified\":\"2025-02-05\"},\"maxValues\":{\"protein_id\":\"cdm_prot_ffe27896-0bd4-50b4-9cff\",\"evidence_for_existence\":\"inferred from homology\",\"length\":\"99\",\"mass\":\"9818\",\"checksum\":\"FE858C20D8AB32DF\",\"modified\":\"2017-06-07\",\"sequence_version\":\"4\",\"sequence\":\"MYTRNIGLDVKLPESECNDPHCPYHGKLSVRG\",\"entry_modified\":\"2025-06-18\"},\"nullCount\":{\"protein_id\":0,\"ec_numbers\":625,\"evidence_for_existence\":0,\"length\":0,\"mass\":0,\"checksum\":0,\"modified\":0,\"sequence_version\":0,\"sequence\":0,\"entry_modified\":0}}"}} +{"add":{"path":"part-00006-411b6dd6-7451-4814-9649-b1a37992bad6-c000.snappy.parquet","partitionValues":{},"size":124378,"modificationTime":1766549234744,"dataChange":true,"stats":"{\"numRecords\":625,\"minValues\":{\"protein_id\":\"cdm_prot_00219a7c-c28c-5aaf-928e\",\"ec_numbers\":\"2.1.1.-\",\"evidence_for_existence\":\"evidence at protein level\",\"length\":\"123\",\"mass\":\"102456\",\"checksum\":\"009F46F13182EC92\",\"modified\":\"1990-01-01\",\"sequence_version\":\"1\",\"sequence\":\"ELIGAANRDTKSFSINRKDAKERVAKAAR\",\"entry_modified\":\"2024-03-27\"},\"maxValues\":{\"protein_id\":\"cdm_prot_fe67f279-f090-5c4e-b38d\",\"ec_numbers\":\"6.5.1.8\",\"evidence_for_existence\":\"inferred from homology\",\"length\":\"988\",\"mass\":\"98125\",\"checksum\":\"FFF8965F6497118A\",\"modified\":\"2011-09-21\",\"sequence_version\":\"3\",\"sequence\":\"TKSSRYCTTEGXDTALRHXPIGANQSAFKSKR\",\"entry_modified\":\"2025-06-18\"},\"nullCount\":{\"protein_id\":0,\"ec_numbers\":533,\"evidence_for_existence\":0,\"length\":0,\"mass\":0,\"checksum\":0,\"modified\":0,\"sequence_version\":0,\"sequence\":0,\"entry_modified\":0}}"}} +{"add":{"path":"part-00007-d00f5a40-677b-43fe-9fa8-949fc983f149-c000.snappy.parquet","partitionValues":{},"size":202124,"modificationTime":1766549234744,"dataChange":true,"stats":"{\"numRecords\":625,\"minValues\":{\"protein_id\":\"cdm_prot_00019029-8c98-5477-9dd1\",\"ec_numbers\":\"1.1.1.95\",\"evidence_for_existence\":\"evidence at protein level\",\"length\":\"100\",\"mass\":\"100033\",\"checksum\":\"002511155E18D68A\",\"modified\":\"1990-01-01\",\"sequence_version\":\"1\",\"sequence\":\"GYVNGLESAEETLAENRESGDFGSSAAAMGNV\",\"entry_modified\":\"2023-06-28\"},\"maxValues\":{\"protein_id\":\"cdm_prot_ff606768-2e8f-543d-abc5\",\"ec_numbers\":\"6.2.1.5\",\"evidence_for_existence\":\"inferred from homology\",\"length\":\"99\",\"mass\":\"99972\",\"checksum\":\"FF11E8597C6A833B\",\"modified\":\"2023-02-22\",\"sequence_version\":\"3\",\"sequence\":\"SKLLDNLRDAVRKFLTGSSSYDKAVEDFIKEL\",\"entry_modified\":\"2025-06-18\"},\"nullCount\":{\"protein_id\":0,\"ec_numbers\":253,\"evidence_for_existence\":0,\"length\":0,\"mass\":0,\"checksum\":0,\"modified\":0,\"sequence_version\":0,\"sequence\":0,\"entry_modified\":0}}"}} diff --git a/tests/data/uniprot_archaea/uniprot_db/proteins/_delta_log/00000000000000000004.crc b/tests/data/uniprot_archaea/uniprot_db/proteins/_delta_log/00000000000000000004.crc new file mode 100644 index 0000000..b45ee8f --- /dev/null +++ b/tests/data/uniprot_archaea/uniprot_db/proteins/_delta_log/00000000000000000004.crc @@ -0,0 +1 @@ +{"txnId":"7ab24080-bcb5-4b76-954d-ddc338e8157b","tableSizeBytes":5727264,"numFiles":32,"numMetadata":1,"numProtocol":1,"setTransactions":[],"domainMetadata":[],"metadata":{"id":"cf52e24b-c8bf-4037-bc85-ecc2167e63b8","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"protein_id\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"ec_numbers\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"evidence_for_existence\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"length\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"mass\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"checksum\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"modified\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"sequence_version\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"sequence\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"entry_modified\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{},"createdTime":1766549213818},"protocol":{"minReaderVersion":1,"minWriterVersion":2},"allFiles":[{"path":"part-00007-1ca7b106-08f4-4232-8b29-bf918a55a1d2-c000.snappy.parquet","partitionValues":{},"size":186767,"modificationTime":1766549227675,"dataChange":false,"stats":"{\"numRecords\":625,\"minValues\":{\"protein_id\":\"cdm_prot_001ba1b3-667c-5321-9298\",\"ec_numbers\":\"1.3.1.14\",\"evidence_for_existence\":\"evidence at protein level\",\"length\":\"112\",\"mass\":\"11713\",\"checksum\":\"00177DD8DEC84097\",\"modified\":\"1991-08-01\",\"sequence_version\":\"1\",\"sequence\":\"MADLSTSVAGIRLENPLMLASGILDENGYTML\",\"entry_modified\":\"2024-11-27\"},\"maxValues\":{\"protein_id\":\"cdm_prot_ff86ebef-56b7-5cf7-bf3e\",\"ec_numbers\":\"7.3.2.1\",\"evidence_for_existence\":\"inferred from homology\",\"length\":\"89\",\"mass\":\"9696\",\"checksum\":\"FFE05E8B923ADAF9\",\"modified\":\"2020-10-07\",\"sequence_version\":\"3\",\"sequence\":\"YGEPIGVETLTK\",\"entry_modified\":\"2025-06-18\"},\"nullCount\":{\"protein_id\":0,\"ec_numbers\":11,\"evidence_for_existence\":0,\"length\":0,\"mass\":0,\"checksum\":0,\"modified\":0,\"sequence_version\":0,\"sequence\":0,\"entry_modified\":0}}"},{"path":"part-00003-9ceb2e14-936d-4777-923a-c7586d84aff3-c000.snappy.parquet","partitionValues":{},"size":189269,"modificationTime":1766549227675,"dataChange":false,"stats":"{\"numRecords\":625,\"minValues\":{\"protein_id\":\"cdm_prot_000c7e85-7dff-527d-b6db\",\"ec_numbers\":\"1.1.1.299\",\"evidence_for_existence\":\"evidence at protein level\",\"length\":\"1127\",\"mass\":\"10980\",\"checksum\":\"0065BCF4CF0118E3\",\"modified\":\"1988-08-01\",\"sequence_version\":\"1\",\"sequence\":\"AADIFAKFKTSMEVK\",\"entry_modified\":\"2024-11-27\"},\"maxValues\":{\"protein_id\":\"cdm_prot_ffb46e82-abfa-5122-818f\",\"ec_numbers\":\"7.5.2.1\",\"evidence_for_existence\":\"predicted\",\"length\":\"998\",\"mass\":\"92970\",\"checksum\":\"FF118F554983A5C4\",\"modified\":\"2014-06-11\",\"sequence_version\":\"5\",\"sequence\":\"SDTVDIYDAGKILERVII\",\"entry_modified\":\"2025-06-18\"},\"nullCount\":{\"protein_id\":0,\"ec_numbers\":58,\"evidence_for_existence\":0,\"length\":0,\"mass\":0,\"checksum\":0,\"modified\":0,\"sequence_version\":0,\"sequence\":0,\"entry_modified\":0}}"},{"path":"part-00003-cc92fe22-58d3-46d4-863e-0062067ebd2d-c000.snappy.parquet","partitionValues":{},"size":203116,"modificationTime":1766549220178,"dataChange":false,"stats":"{\"numRecords\":625,\"minValues\":{\"protein_id\":\"cdm_prot_004f59e4-6b11-58ae-a6ff\",\"ec_numbers\":\"1.1.1.337\",\"evidence_for_existence\":\"evidence at protein level\",\"length\":\"102\",\"mass\":\"100984\",\"checksum\":\"005A5C21E13342B4\",\"modified\":\"1991-05-01\",\"sequence_version\":\"1\",\"sequence\":\"KKIGAIAAGSAMVASALATGVFAVEKIGDVEG\",\"entry_modified\":\"2024-03-27\"},\"maxValues\":{\"protein_id\":\"cdm_prot_ff2dba94-2092-57d0-9041\",\"ec_numbers\":\"7.2.2.9\",\"evidence_for_existence\":\"inferred from homology\",\"length\":\"99\",\"mass\":\"96911\",\"checksum\":\"FFF22879BB86CECE\",\"modified\":\"2020-08-12\",\"sequence_version\":\"3\",\"sequence\":\"MYVVNPEEKVIEIMKQTGIDLAATLPCDRIKN\",\"entry_modified\":\"2025-06-18\"},\"nullCount\":{\"protein_id\":0,\"ec_numbers\":153,\"evidence_for_existence\":0,\"length\":0,\"mass\":0,\"checksum\":0,\"modified\":0,\"sequence_version\":0,\"sequence\":0,\"entry_modified\":0}}"},{"path":"part-00006-c1bdb3ab-0e57-4470-8330-b13826aab8c6-c000.snappy.parquet","partitionValues":{},"size":163700,"modificationTime":1766549227685,"dataChange":false,"stats":"{\"numRecords\":625,\"minValues\":{\"protein_id\":\"cdm_prot_0102d35c-20a5-5f8d-8175\",\"ec_numbers\":\"1.2.1.41\",\"evidence_for_existence\":\"evidence at protein level\",\"length\":\"105\",\"mass\":\"10297\",\"checksum\":\"003EF50270BA751E\",\"modified\":\"1991-02-01\",\"sequence_version\":\"1\",\"sequence\":\"DTANDPKYGSQYAPQKVNADVDQGVXXXHPEL\",\"entry_modified\":\"2024-03-27\"},\"maxValues\":{\"protein_id\":\"cdm_prot_fffe9d9f-5871-5041-83af\",\"ec_numbers\":\"7.3.2.2\",\"evidence_for_existence\":\"predicted\",\"length\":\"99\",\"mass\":\"9848\",\"checksum\":\"FFD19C6CC8E8091B\",\"modified\":\"2017-04-12\",\"sequence_version\":\"3\",\"sequence\":\"MYLTREEERILAGEEGEARAKALEVIVKVGEA\",\"entry_modified\":\"2025-06-18\"},\"nullCount\":{\"protein_id\":0,\"ec_numbers\":202,\"evidence_for_existence\":0,\"length\":0,\"mass\":0,\"checksum\":0,\"modified\":0,\"sequence_version\":0,\"sequence\":0,\"entry_modified\":0}}"},{"path":"part-00000-9280d073-b085-4be8-85ed-536df50f62da-c000.snappy.parquet","partitionValues":{},"size":196389,"modificationTime":1766549227684,"dataChange":false,"stats":"{\"numRecords\":625,\"minValues\":{\"protein_id\":\"cdm_prot_004539b1-91ae-513e-bef9\",\"ec_numbers\":\"1.1.1.23\",\"evidence_for_existence\":\"evidence at protein level\",\"length\":\"100\",\"mass\":\"10111\",\"checksum\":\"00A04ACE5FAFE54A\",\"modified\":\"1988-01-01\",\"sequence_version\":\"1\",\"sequence\":\"AEMRNFALRDAQGNEIGVFTGKSPRQAALKAA\",\"entry_modified\":\"2024-07-24\"},\"maxValues\":{\"protein_id\":\"cdm_prot_ff289ac2-6f01-59a2-8290\",\"ec_numbers\":\"6.2.1.56\",\"evidence_for_existence\":\"inferred from homology\",\"length\":\"99\",\"mass\":\"9766\",\"checksum\":\"FF5B95B523D29A12\",\"modified\":\"2018-04-25\",\"sequence_version\":\"6\",\"sequence\":\"SNTRNFVLRDEEGNEHGVFTGKQPRQAALKAA\",\"entry_modified\":\"2025-06-18\"},\"nullCount\":{\"protein_id\":0,\"ec_numbers\":72,\"evidence_for_existence\":0,\"length\":0,\"mass\":0,\"checksum\":0,\"modified\":0,\"sequence_version\":0,\"sequence\":0,\"entry_modified\":0}}"},{"path":"part-00002-6291644e-f5b3-45af-81c9-ceb443b13c28-c000.snappy.parquet","partitionValues":{},"size":195679,"modificationTime":1766549240512,"dataChange":false,"stats":"{\"numRecords\":601,\"minValues\":{\"protein_id\":\"cdm_prot_0185e048-25c8-50dd-bfbf\",\"ec_numbers\":\"2.1.1.-\",\"evidence_for_existence\":\"evidence at protein level\",\"length\":\"1030\",\"mass\":\"116998\",\"checksum\":\"006F146C7571695B\",\"modified\":\"1988-11-01\",\"sequence_version\":\"1\",\"sequence\":\"AEKLEPVLPLIV\",\"entry_modified\":\"2023-09-13\"},\"maxValues\":{\"protein_id\":\"cdm_prot_fe66808c-aab1-5f12-970e\",\"ec_numbers\":\"7.2.1.4\",\"evidence_for_existence\":\"uncertain\",\"length\":\"855\",\"mass\":\"98910\",\"checksum\":\"FFCEFFF9006453C2\",\"modified\":\"2024-01-24\",\"sequence_version\":\"3\",\"sequence\":\"VAETHDHRVSEIISKKFDVVLAGGITFENVRK\",\"entry_modified\":\"2025-06-18\"},\"nullCount\":{\"protein_id\":0,\"ec_numbers\":18,\"evidence_for_existence\":0,\"length\":0,\"mass\":0,\"checksum\":0,\"modified\":0,\"sequence_version\":0,\"sequence\":0,\"entry_modified\":0}}"},{"path":"part-00005-e1178ae8-d3d7-4662-a488-6f40c9f9eebf-c000.snappy.parquet","partitionValues":{},"size":216665,"modificationTime":1766549220176,"dataChange":false,"stats":"{\"numRecords\":625,\"minValues\":{\"protein_id\":\"cdm_prot_00134cb8-e497-5d40-b662\",\"ec_numbers\":\"1.16.-.-\",\"evidence_for_existence\":\"evidence at protein level\",\"length\":\"102\",\"mass\":\"10006\",\"checksum\":\"000B863998A7D939\",\"modified\":\"1986-07-21\",\"sequence_version\":\"1\",\"sequence\":\"FEPYIYALLKDDSAIEEVKKITAGRHGRVVKV\",\"entry_modified\":\"2023-06-28\"},\"maxValues\":{\"protein_id\":\"cdm_prot_ff5f637d-e74b-5494-9ed8\",\"ec_numbers\":\"7.-.-.-\",\"evidence_for_existence\":\"predicted\",\"length\":\"99\",\"mass\":\"9921\",\"checksum\":\"FF93ED4EDF204C7A\",\"modified\":\"2024-01-24\",\"sequence_version\":\"4\",\"sequence\":\"SVTIDYDKCKGPECAECVNACPMEVFEIQGDK\",\"entry_modified\":\"2025-06-18\"},\"nullCount\":{\"protein_id\":0,\"ec_numbers\":232,\"evidence_for_existence\":0,\"length\":0,\"mass\":0,\"checksum\":0,\"modified\":0,\"sequence_version\":0,\"sequence\":0,\"entry_modified\":0}}"},{"path":"part-00005-ed12e42c-224f-4e76-9a05-3aa1e3f6758a-c000.snappy.parquet","partitionValues":{},"size":173885,"modificationTime":1766549240503,"dataChange":false,"stats":"{\"numRecords\":601,\"minValues\":{\"protein_id\":\"cdm_prot_007a14c8-c58a-5ae1-b721\",\"ec_numbers\":\"1.-.-.-\",\"evidence_for_existence\":\"evidence at protein level\",\"length\":\"100\",\"mass\":\"10081\",\"checksum\":\"00472581CE3910BE\",\"modified\":\"1990-04-01\",\"sequence_version\":\"1\",\"sequence\":\"MAALIDTGIFFGFYSLKDVHHMDSVAIVVHAV\",\"entry_modified\":\"2024-03-27\"},\"maxValues\":{\"protein_id\":\"cdm_prot_ffb5b181-1633-55f5-83ee\",\"ec_numbers\":\"7.-.-.-\",\"evidence_for_existence\":\"uncertain\",\"length\":\"99\",\"mass\":\"9988\",\"checksum\":\"FFA9F0E20910915B\",\"modified\":\"2021-04-07\",\"sequence_version\":\"3\",\"sequence\":\"MYYLKPIGVVEQNENYTVLNIFDEFVEGLDGL\",\"entry_modified\":\"2025-06-18\"},\"nullCount\":{\"protein_id\":0,\"ec_numbers\":559,\"evidence_for_existence\":0,\"length\":0,\"mass\":0,\"checksum\":0,\"modified\":0,\"sequence_version\":0,\"sequence\":0,\"entry_modified\":0}}"},{"path":"part-00003-58a28129-ebb1-40c8-9a7e-78c52cbe7a39-c000.snappy.parquet","partitionValues":{},"size":109388,"modificationTime":1766549234742,"dataChange":false,"stats":"{\"numRecords\":625,\"minValues\":{\"protein_id\":\"cdm_prot_00206b22-8b92-5222-ab7c\",\"ec_numbers\":\"2.1.1.-\",\"evidence_for_existence\":\"evidence at protein level\",\"length\":\"101\",\"mass\":\"10028\",\"checksum\":\"00ACFE09073E30D2\",\"modified\":\"1990-01-01\",\"sequence_version\":\"1\",\"sequence\":\"MAAEKVREIEVPQGVTVTVSGATLTTKGQKGQ\",\"entry_modified\":\"2024-03-27\"},\"maxValues\":{\"protein_id\":\"cdm_prot_ffede36e-7e42-5dcc-abde\",\"ec_numbers\":\"7.2.1.-\",\"evidence_for_existence\":\"inferred from homology\",\"length\":\"99\",\"mass\":\"9903\",\"checksum\":\"FF9BFCDE5D63705A\",\"modified\":\"2014-07-09\",\"sequence_version\":\"5\",\"sequence\":\"MYVRFEVPEDMQNEALSLLEKVRESGKVKKGT\",\"entry_modified\":\"2025-06-18\"},\"nullCount\":{\"protein_id\":0,\"ec_numbers\":462,\"evidence_for_existence\":0,\"length\":0,\"mass\":0,\"checksum\":0,\"modified\":0,\"sequence_version\":0,\"sequence\":0,\"entry_modified\":0}}"},{"path":"part-00006-e6e3deb7-a199-4e40-944a-6b52a9c809bb-c000.snappy.parquet","partitionValues":{},"size":209852,"modificationTime":1766549220187,"dataChange":false,"stats":"{\"numRecords\":625,\"minValues\":{\"protein_id\":\"cdm_prot_003df674-48d1-588a-b194\",\"ec_numbers\":\"1.1.1.261\",\"evidence_for_existence\":\"evidence at protein level\",\"length\":\"102\",\"mass\":\"10129\",\"checksum\":\"0037152CDD997446\",\"modified\":\"1989-07-01\",\"sequence_version\":\"1\",\"sequence\":\"MAAVQIDDYGPWTTEPAPRRETDLQALQARLF\",\"entry_modified\":\"2024-03-27\"},\"maxValues\":{\"protein_id\":\"cdm_prot_ffeae0f1-9785-5eaf-ad23\",\"ec_numbers\":\"6.3.5.7\",\"evidence_for_existence\":\"predicted\",\"length\":\"97\",\"mass\":\"9786\",\"checksum\":\"FFBDC4AB39BB53E1\",\"modified\":\"2024-05-29\",\"sequence_version\":\"4\",\"sequence\":\"MYTSFHRIDLPRTIVVGGGVLDKAGGYVSGVA\",\"entry_modified\":\"2025-06-18\"},\"nullCount\":{\"protein_id\":0,\"ec_numbers\":142,\"evidence_for_existence\":0,\"length\":0,\"mass\":0,\"checksum\":0,\"modified\":0,\"sequence_version\":0,\"sequence\":0,\"entry_modified\":0}}"},{"path":"part-00004-638e48aa-2700-4992-be7a-7d56903efbec-c000.snappy.parquet","partitionValues":{},"size":126900,"modificationTime":1766549234755,"dataChange":false,"stats":"{\"numRecords\":625,\"minValues\":{\"protein_id\":\"cdm_prot_005318fc-0c32-5b4a-b952\",\"ec_numbers\":\"2.7.7.6\",\"evidence_for_existence\":\"evidence at protein level\",\"length\":\"100\",\"mass\":\"100075\",\"checksum\":\"0013EF6F9053FB44\",\"modified\":\"1989-07-01\",\"sequence_version\":\"1\",\"sequence\":\"EFTSSTGDTVMIDEGALVEGTIDEDAVGAFGG\",\"entry_modified\":\"2024-03-27\"},\"maxValues\":{\"protein_id\":\"cdm_prot_ffbd3905-3836-5893-bd75\",\"ec_numbers\":\"5.3.1.6\",\"evidence_for_existence\":\"predicted\",\"length\":\"99\",\"mass\":\"9989\",\"checksum\":\"FDCB0BA604D3B38D\",\"modified\":\"2024-01-24\",\"sequence_version\":\"3\",\"sequence\":\"MYYVTPRQLVFPGDVIATADSKVEGPVYLDNG\",\"entry_modified\":\"2025-06-18\"},\"nullCount\":{\"protein_id\":0,\"ec_numbers\":117,\"evidence_for_existence\":0,\"length\":0,\"mass\":0,\"checksum\":0,\"modified\":0,\"sequence_version\":0,\"sequence\":0,\"entry_modified\":0}}"},{"path":"part-00003-261ceeac-3a67-4da2-a828-81308d60d332-c000.snappy.parquet","partitionValues":{},"size":183530,"modificationTime":1766549240506,"dataChange":false,"stats":"{\"numRecords\":601,\"minValues\":{\"protein_id\":\"cdm_prot_0007879e-35c8-5917-ad11\",\"ec_numbers\":\"1.-.-.-\",\"evidence_for_existence\":\"evidence at protein level\",\"length\":\"100\",\"mass\":\"10080\",\"checksum\":\"00C21A7773BD9813\",\"modified\":\"1996-11-01\",\"sequence_version\":\"1\",\"sequence\":\"MAADAPHPAFEGYATEAVPQAAVGSPGKDGVL\",\"entry_modified\":\"2024-03-27\"},\"maxValues\":{\"protein_id\":\"cdm_prot_ffe66051-6a0a-55cc-81b2\",\"ec_numbers\":\"7.5.2.13\",\"evidence_for_existence\":\"predicted\",\"length\":\"98\",\"mass\":\"9958\",\"checksum\":\"FFFC36244F2C4EAE\",\"modified\":\"2023-11-08\",\"sequence_version\":\"6\",\"sequence\":\"MYRYPVEVIADTYLSKVGGYSYELDRNEIGIN\",\"entry_modified\":\"2025-06-18\"},\"nullCount\":{\"protein_id\":0,\"ec_numbers\":305,\"evidence_for_existence\":0,\"length\":0,\"mass\":0,\"checksum\":0,\"modified\":0,\"sequence_version\":0,\"sequence\":0,\"entry_modified\":0}}"},{"path":"part-00006-75fa702b-36c2-40fb-a563-65206a6e1827-c000.snappy.parquet","partitionValues":{},"size":174482,"modificationTime":1766549240507,"dataChange":false,"stats":"{\"numRecords\":601,\"minValues\":{\"protein_id\":\"cdm_prot_00666a93-0a4c-5498-8cac\",\"ec_numbers\":\"2.1.1.-\",\"evidence_for_existence\":\"evidence at protein level\",\"length\":\"101\",\"mass\":\"10018\",\"checksum\":\"00AEC07D21973C0A\",\"modified\":\"1988-11-01\",\"sequence_version\":\"1\",\"sequence\":\"EFQHIDLNQTHIRLTTDLKNNNLEKYILNLRK\",\"entry_modified\":\"2024-03-27\"},\"maxValues\":{\"protein_id\":\"cdm_prot_ffd1840d-3bb9-5f05-ad33\",\"ec_numbers\":\"7.1.1.2\",\"evidence_for_existence\":\"predicted\",\"length\":\"99\",\"mass\":\"9983\",\"checksum\":\"FF08A53188543448\",\"modified\":\"2021-04-07\",\"sequence_version\":\"3\",\"sequence\":\"MYYKLVLSHYSKTSTLINITLIKHLINILRER\",\"entry_modified\":\"2025-06-18\"},\"nullCount\":{\"protein_id\":0,\"ec_numbers\":566,\"evidence_for_existence\":0,\"length\":0,\"mass\":0,\"checksum\":0,\"modified\":0,\"sequence_version\":0,\"sequence\":0,\"entry_modified\":0}}"},{"path":"part-00001-d12b1474-5792-422a-a98f-7503233261c9-c000.snappy.parquet","partitionValues":{},"size":182650,"modificationTime":1766549240503,"dataChange":false,"stats":"{\"numRecords\":601,\"minValues\":{\"protein_id\":\"cdm_prot_01672ce0-33eb-50c0-8bb3\",\"ec_numbers\":\"1.1.1.103\",\"evidence_for_existence\":\"evidence at protein level\",\"length\":\"1018\",\"mass\":\"100444\",\"checksum\":\"0091F26D3326349F\",\"modified\":\"1990-08-01\",\"sequence_version\":\"1\",\"sequence\":\"MAAERLDPWGAVEIKDYDRLLRTFGIRPFSEV\",\"entry_modified\":\"2023-09-13\"},\"maxValues\":{\"protein_id\":\"cdm_prot_fffe9aba-bce9-56de-b269\",\"ec_numbers\":\"6.1.1.9\",\"evidence_for_existence\":\"predicted\",\"length\":\"92\",\"mass\":\"9920\",\"checksum\":\"FFE8B7A831DD2B04\",\"modified\":\"2024-01-24\",\"sequence_version\":\"3\",\"sequence\":\"VEQKEIGRTYRYVAQELELKMEPVDPKQYVPR\",\"entry_modified\":\"2025-06-18\"},\"nullCount\":{\"protein_id\":0,\"ec_numbers\":216,\"evidence_for_existence\":0,\"length\":0,\"mass\":0,\"checksum\":0,\"modified\":0,\"sequence_version\":0,\"sequence\":0,\"entry_modified\":0}}"},{"path":"part-00002-87a46f98-c10e-487d-b09c-260ae5569b04-c000.snappy.parquet","partitionValues":{},"size":192054,"modificationTime":1766549227674,"dataChange":false,"stats":"{\"numRecords\":625,\"minValues\":{\"protein_id\":\"cdm_prot_00546ee8-4f73-5c85-8b07\",\"ec_numbers\":\"1.1.1.205\",\"evidence_for_existence\":\"evidence at protein level\",\"length\":\"139\",\"mass\":\"100393\",\"checksum\":\"009D64A921BD3176\",\"modified\":\"1994-10-01\",\"sequence_version\":\"1\",\"sequence\":\"MACSRARARAYSSAANLGPGFDALAVALDAYY\",\"entry_modified\":\"2024-11-27\"},\"maxValues\":{\"protein_id\":\"cdm_prot_ffce40a0-b890-5e5f-b455\",\"ec_numbers\":\"7.2.2.6\",\"evidence_for_existence\":\"predicted\",\"length\":\"876\",\"mass\":\"99554\",\"checksum\":\"FF6978208DE0F5B1\",\"modified\":\"2023-02-22\",\"sequence_version\":\"3\",\"sequence\":\"MYVLGIESTAHTIGVGIVNERAEVLANEMHTY\",\"entry_modified\":\"2025-06-18\"},\"nullCount\":{\"protein_id\":0,\"ec_numbers\":50,\"evidence_for_existence\":0,\"length\":0,\"mass\":0,\"checksum\":0,\"modified\":0,\"sequence_version\":0,\"sequence\":0,\"entry_modified\":0}}"},{"path":"part-00006-411b6dd6-7451-4814-9649-b1a37992bad6-c000.snappy.parquet","partitionValues":{},"size":124378,"modificationTime":1766549234744,"dataChange":false,"stats":"{\"numRecords\":625,\"minValues\":{\"protein_id\":\"cdm_prot_00219a7c-c28c-5aaf-928e\",\"ec_numbers\":\"2.1.1.-\",\"evidence_for_existence\":\"evidence at protein level\",\"length\":\"123\",\"mass\":\"102456\",\"checksum\":\"009F46F13182EC92\",\"modified\":\"1990-01-01\",\"sequence_version\":\"1\",\"sequence\":\"ELIGAANRDTKSFSINRKDAKERVAKAAR\",\"entry_modified\":\"2024-03-27\"},\"maxValues\":{\"protein_id\":\"cdm_prot_fe67f279-f090-5c4e-b38d\",\"ec_numbers\":\"6.5.1.8\",\"evidence_for_existence\":\"inferred from homology\",\"length\":\"988\",\"mass\":\"98125\",\"checksum\":\"FFF8965F6497118A\",\"modified\":\"2011-09-21\",\"sequence_version\":\"3\",\"sequence\":\"TKSSRYCTTEGXDTALRHXPIGANQSAFKSKR\",\"entry_modified\":\"2025-06-18\"},\"nullCount\":{\"protein_id\":0,\"ec_numbers\":533,\"evidence_for_existence\":0,\"length\":0,\"mass\":0,\"checksum\":0,\"modified\":0,\"sequence_version\":0,\"sequence\":0,\"entry_modified\":0}}"},{"path":"part-00005-5052056a-b12d-4506-8172-512fb3e152c7-c000.snappy.parquet","partitionValues":{},"size":90406,"modificationTime":1766549234740,"dataChange":false,"stats":"{\"numRecords\":625,\"minValues\":{\"protein_id\":\"cdm_prot_0013ba57-fc41-51cc-bc95\",\"evidence_for_existence\":\"evidence at protein level\",\"length\":\"100\",\"mass\":\"11261\",\"checksum\":\"0082DFF152B8CEB8\",\"modified\":\"1990-01-01\",\"sequence_version\":\"1\",\"sequence\":\"MAAAKGKKGAKAPSTKKSDYYKVEGNSVTRTK\",\"entry_modified\":\"2025-02-05\"},\"maxValues\":{\"protein_id\":\"cdm_prot_ffe27896-0bd4-50b4-9cff\",\"evidence_for_existence\":\"inferred from homology\",\"length\":\"99\",\"mass\":\"9818\",\"checksum\":\"FE858C20D8AB32DF\",\"modified\":\"2017-06-07\",\"sequence_version\":\"4\",\"sequence\":\"MYTRNIGLDVKLPESECNDPHCPYHGKLSVRG\",\"entry_modified\":\"2025-06-18\"},\"nullCount\":{\"protein_id\":0,\"ec_numbers\":625,\"evidence_for_existence\":0,\"length\":0,\"mass\":0,\"checksum\":0,\"modified\":0,\"sequence_version\":0,\"sequence\":0,\"entry_modified\":0}}"},{"path":"part-00000-b4de8d2f-f466-4990-9d63-a326716cbea4-c000.snappy.parquet","partitionValues":{},"size":221094,"modificationTime":1766549220177,"dataChange":false,"stats":"{\"numRecords\":625,\"minValues\":{\"protein_id\":\"cdm_prot_00036120-577c-5bbc-ab91\",\"ec_numbers\":\"1.1.1.-\",\"evidence_for_existence\":\"evidence at protein level\",\"length\":\"100\",\"mass\":\"10015\",\"checksum\":\"002B834BC5F9D6AD\",\"modified\":\"1991-02-01\",\"sequence_version\":\"1\",\"sequence\":\"MAAESQLKRVIETLRRLGIEEVLKLERRDPQY\",\"entry_modified\":\"2024-11-27\"},\"maxValues\":{\"protein_id\":\"cdm_prot_ffa4eefe-b062-5f26-8f77\",\"ec_numbers\":\"7.1.2.2\",\"evidence_for_existence\":\"inferred from homology\",\"length\":\"990\",\"mass\":\"9995\",\"checksum\":\"FF95A3D3020413CC\",\"modified\":\"2025-04-09\",\"sequence_version\":\"4\",\"sequence\":\"NTNETKVRFTGETAKIGVSLEMLGRIFNGAGK\",\"entry_modified\":\"2025-06-18\"},\"nullCount\":{\"protein_id\":0,\"ec_numbers\":320,\"evidence_for_existence\":0,\"length\":0,\"mass\":0,\"checksum\":0,\"modified\":0,\"sequence_version\":0,\"sequence\":0,\"entry_modified\":0}}"},{"path":"part-00004-0cd337df-6b78-40e1-9e72-4d902a79f0b3-c000.snappy.parquet","partitionValues":{},"size":160888,"modificationTime":1766549227664,"dataChange":false,"stats":"{\"numRecords\":625,\"minValues\":{\"protein_id\":\"cdm_prot_007c34e9-4de0-5736-985f\",\"ec_numbers\":\"1.12.99.-\",\"evidence_for_existence\":\"evidence at protein level\",\"length\":\"100\",\"mass\":\"100504\",\"checksum\":\"0082DA6CD15CD5FD\",\"modified\":\"1988-01-01\",\"sequence_version\":\"1\",\"sequence\":\"EECEREKPPEGLAMDFIKRQFEIGEPEKDLCL\",\"entry_modified\":\"2024-05-29\"},\"maxValues\":{\"protein_id\":\"cdm_prot_ffc892c3-0651-5e3c-998a\",\"ec_numbers\":\"7.2.1.4\",\"evidence_for_existence\":\"predicted\",\"length\":\"984\",\"mass\":\"99919\",\"checksum\":\"FF6F29ADD080060C\",\"modified\":\"2016-11-30\",\"sequence_version\":\"6\",\"sequence\":\"TLQGQKDVIELLKEEGLRDKIKVMVGGAPATQ\",\"entry_modified\":\"2025-06-18\"},\"nullCount\":{\"protein_id\":0,\"ec_numbers\":108,\"evidence_for_existence\":0,\"length\":0,\"mass\":0,\"checksum\":0,\"modified\":0,\"sequence_version\":0,\"sequence\":0,\"entry_modified\":0}}"},{"path":"part-00000-ca8a05fd-be38-4a6e-83d5-5c50f3c51145-c000.snappy.parquet","partitionValues":{},"size":194764,"modificationTime":1766549234751,"dataChange":false,"stats":"{\"numRecords\":625,\"minValues\":{\"protein_id\":\"cdm_prot_006f3d67-56f0-5829-b099\",\"ec_numbers\":\"1.1.1.302\",\"evidence_for_existence\":\"evidence at protein level\",\"length\":\"1005\",\"mass\":\"100087\",\"checksum\":\"0087DF7906911AC1\",\"modified\":\"1995-02-01\",\"sequence_version\":\"1\",\"sequence\":\"MADQSSHQKYEFKKKLESLRGKKGRGTELISL\",\"entry_modified\":\"2024-03-27\"},\"maxValues\":{\"protein_id\":\"cdm_prot_ffcedd0e-face-5ec6-8e0b\",\"ec_numbers\":\"6.3.4.20\",\"evidence_for_existence\":\"predicted\",\"length\":\"993\",\"mass\":\"9986\",\"checksum\":\"FFC1B0D4A0E0E2DE\",\"modified\":\"2024-01-24\",\"sequence_version\":\"5\",\"sequence\":\"MYYTRITQIERLTDEVATLYFSISLRSYPGQF\",\"entry_modified\":\"2025-06-18\"},\"nullCount\":{\"protein_id\":0,\"ec_numbers\":334,\"evidence_for_existence\":0,\"length\":0,\"mass\":0,\"checksum\":0,\"modified\":0,\"sequence_version\":0,\"sequence\":0,\"entry_modified\":0}}"},{"path":"part-00005-831e6b90-0dae-4d62-a116-f1d7b20d680e-c000.snappy.parquet","partitionValues":{},"size":152395,"modificationTime":1766549227692,"dataChange":false,"stats":"{\"numRecords\":625,\"minValues\":{\"protein_id\":\"cdm_prot_00957fe6-8bfd-5ecd-9b08\",\"ec_numbers\":\"1.1.1.169\",\"evidence_for_existence\":\"evidence at protein level\",\"length\":\"111\",\"mass\":\"10083\",\"checksum\":\"0033DA1187FEA713\",\"modified\":\"1990-01-01\",\"sequence_version\":\"1\",\"sequence\":\"DINGGGATLPQKLYQTSGVLTAGFAPYIGVGS\",\"entry_modified\":\"2024-10-02\"},\"maxValues\":{\"protein_id\":\"cdm_prot_ffdecc1a-6ee9-59b2-9c49\",\"ec_numbers\":\"7.1.1.-\",\"evidence_for_existence\":\"inferred from homology\",\"length\":\"93\",\"mass\":\"9565\",\"checksum\":\"FFEA81938BF49096\",\"modified\":\"2016-11-02\",\"sequence_version\":\"6\",\"sequence\":\"MYYHIIKSPIYPILLAGDEKGLKHLIFLKDER\",\"entry_modified\":\"2025-06-18\"},\"nullCount\":{\"protein_id\":0,\"ec_numbers\":208,\"evidence_for_existence\":0,\"length\":0,\"mass\":0,\"checksum\":0,\"modified\":0,\"sequence_version\":0,\"sequence\":0,\"entry_modified\":0}}"},{"path":"part-00001-14de6dd7-de07-482f-be2f-bc12adc49f95-c000.snappy.parquet","partitionValues":{},"size":172301,"modificationTime":1766549227688,"dataChange":false,"stats":"{\"numRecords\":625,\"minValues\":{\"protein_id\":\"cdm_prot_00df9ef5-d9b4-5654-9d2a\",\"ec_numbers\":\"1.1.1.383\",\"evidence_for_existence\":\"evidence at protein level\",\"length\":\"102\",\"mass\":\"110945\",\"checksum\":\"0022ADF92AAB288B\",\"modified\":\"1993-10-01\",\"sequence_version\":\"1\",\"sequence\":\"EDEFGTLYAAFDNMRANLRTQISEAETAKQEA\",\"entry_modified\":\"2024-03-27\"},\"maxValues\":{\"protein_id\":\"cdm_prot_ffa14ace-ea75-5cc6-86f2\",\"ec_numbers\":\"7.2.3.1\",\"evidence_for_existence\":\"predicted\",\"length\":\"992\",\"mass\":\"927738\",\"checksum\":\"FF7E0EA466074645\",\"modified\":\"2018-11-07\",\"sequence_version\":\"3\",\"sequence\":\"MYPILVNSLLTCPIVKKGEYNYFVHPITDGIP\",\"entry_modified\":\"2025-06-18\"},\"nullCount\":{\"protein_id\":0,\"ec_numbers\":341,\"evidence_for_existence\":0,\"length\":0,\"mass\":0,\"checksum\":0,\"modified\":0,\"sequence_version\":0,\"sequence\":0,\"entry_modified\":0}}"},{"path":"part-00004-159d2ae9-d5d1-40ef-8b8c-e8462f482af1-c000.snappy.parquet","partitionValues":{},"size":232956,"modificationTime":1766549220180,"dataChange":false,"stats":"{\"numRecords\":625,\"minValues\":{\"protein_id\":\"cdm_prot_008bae16-5b42-5bfd-a15c\",\"ec_numbers\":\"1.1.1.-\",\"evidence_for_existence\":\"evidence at protein level\",\"length\":\"1080\",\"mass\":\"10065\",\"checksum\":\"0060B82920BA478E\",\"modified\":\"1991-05-01\",\"sequence_version\":\"1\",\"sequence\":\"MADDDTTARRPVLSSFGTLGRGWLGVLALLLV\",\"entry_modified\":\"2024-05-29\"},\"maxValues\":{\"protein_id\":\"cdm_prot_fedf6b23-d61c-56dc-b256\",\"ec_numbers\":\"6.5.1.7\",\"evidence_for_existence\":\"predicted\",\"length\":\"959\",\"mass\":\"92367\",\"checksum\":\"FFD61667871BE291\",\"modified\":\"2024-01-24\",\"sequence_version\":\"4\",\"sequence\":\"VEQDPYEIVIKQLERAAQYMEISE\",\"entry_modified\":\"2025-06-18\"},\"nullCount\":{\"protein_id\":0,\"ec_numbers\":67,\"evidence_for_existence\":0,\"length\":0,\"mass\":0,\"checksum\":0,\"modified\":0,\"sequence_version\":0,\"sequence\":0,\"entry_modified\":0}}"},{"path":"part-00007-5c63ea60-2077-47ac-8a07-bc4d52e24659-c000.snappy.parquet","partitionValues":{},"size":179861,"modificationTime":1766549240504,"dataChange":false,"stats":"{\"numRecords\":607,\"minValues\":{\"protein_id\":\"cdm_prot_0254412b-7072-5093-9d3c\",\"ec_numbers\":\"1.3.1.-\",\"evidence_for_existence\":\"evidence at protein level\",\"length\":\"100\",\"mass\":\"10005\",\"checksum\":\"0064010139B99F9A\",\"modified\":\"1988-11-01\",\"sequence_version\":\"1\",\"sequence\":\"AVTVTTMLVRIPIPASQGYLNFGDIMIMLVAV\",\"entry_modified\":\"2023-06-28\"},\"maxValues\":{\"protein_id\":\"cdm_prot_ffd6bc2d-9fdc-5fb9-a121\",\"ec_numbers\":\"7.-.-.-\",\"evidence_for_existence\":\"uncertain\",\"length\":\"98\",\"mass\":\"9929\",\"checksum\":\"FF488030DC364F94\",\"modified\":\"2012-05-16\",\"sequence_version\":\"4\",\"sequence\":\"MYVPEEIIETVRMIEYQHLDIRTTTLGVNLKD\",\"entry_modified\":\"2025-06-18\"},\"nullCount\":{\"protein_id\":0,\"ec_numbers\":579,\"evidence_for_existence\":0,\"length\":0,\"mass\":0,\"checksum\":0,\"modified\":0,\"sequence_version\":0,\"sequence\":0,\"entry_modified\":0}}"},{"path":"part-00004-3dcf6645-ae1c-4467-863d-85acc844f886-c000.snappy.parquet","partitionValues":{},"size":180543,"modificationTime":1766549240515,"dataChange":false,"stats":"{\"numRecords\":601,\"minValues\":{\"protein_id\":\"cdm_prot_003753a9-9a55-505c-8ea7\",\"ec_numbers\":\"1.-.-.-\",\"evidence_for_existence\":\"evidence at protein level\",\"length\":\"100\",\"mass\":\"10007\",\"checksum\":\"0035C4E648DA607F\",\"modified\":\"1990-08-01\",\"sequence_version\":\"1\",\"sequence\":\"MADLKITWLGHAAFLLEAEKKLLIDPFISENP\",\"entry_modified\":\"2024-03-27\"},\"maxValues\":{\"protein_id\":\"cdm_prot_ff634e32-999c-5ae0-8d84\",\"ec_numbers\":\"7.2.2.-\",\"evidence_for_existence\":\"predicted\",\"length\":\"99\",\"mass\":\"9935\",\"checksum\":\"FFD86555239B25AA\",\"modified\":\"2021-04-07\",\"sequence_version\":\"3\",\"sequence\":\"MYVPEEIIETIKMIEYQNLDIRTTTLGVNLKD\",\"entry_modified\":\"2025-06-18\"},\"nullCount\":{\"protein_id\":0,\"ec_numbers\":555,\"evidence_for_existence\":0,\"length\":0,\"mass\":0,\"checksum\":0,\"modified\":0,\"sequence_version\":0,\"sequence\":0,\"entry_modified\":0}}"},{"path":"part-00007-59fe7ec2-82c3-4010-b399-2192a7f6a694-c000.snappy.parquet","partitionValues":{},"size":203689,"modificationTime":1766549220181,"dataChange":false,"stats":"{\"numRecords\":625,\"minValues\":{\"protein_id\":\"cdm_prot_00625fdf-0992-5594-a800\",\"ec_numbers\":\"1.-.-.-\",\"evidence_for_existence\":\"evidence at protein level\",\"length\":\"105\",\"mass\":\"10014\",\"checksum\":\"0034823DA6E9ED5C\",\"modified\":\"1990-01-01\",\"sequence_version\":\"1\",\"sequence\":\"MADELSEKSVEGTEEDGESAPAEGTTEGVPVD\",\"entry_modified\":\"2024-07-24\"},\"maxValues\":{\"protein_id\":\"cdm_prot_ffcf5789-8bfa-5ca0-914e\",\"ec_numbers\":\"7.5.2.-\",\"evidence_for_existence\":\"inferred from homology\",\"length\":\"83\",\"mass\":\"9939\",\"checksum\":\"FFB678C2FEE2F209\",\"modified\":\"2023-02-22\",\"sequence_version\":\"4\",\"sequence\":\"MYTLVLLRHGESTWNKENRFTGWTDVDLSKDG\",\"entry_modified\":\"2025-06-18\"},\"nullCount\":{\"protein_id\":0,\"ec_numbers\":124,\"evidence_for_existence\":0,\"length\":0,\"mass\":0,\"checksum\":0,\"modified\":0,\"sequence_version\":0,\"sequence\":0,\"entry_modified\":0}}"},{"path":"part-00001-bd46662b-bc66-4729-afd0-35417fd10fc1-c000.snappy.parquet","partitionValues":{},"size":194687,"modificationTime":1766549220169,"dataChange":false,"stats":"{\"numRecords\":625,\"minValues\":{\"protein_id\":\"cdm_prot_000e0a71-14e8-5d5e-8144\",\"ec_numbers\":\"1.1.1.25\",\"evidence_for_existence\":\"evidence at protein level\",\"length\":\"103\",\"mass\":\"10987\",\"checksum\":\"00041F603DFDD5A3\",\"modified\":\"1990-01-01\",\"sequence_version\":\"1\",\"sequence\":\"MAAIIVHGGAGTIRKEERIPKVIEGVREAVLA\",\"entry_modified\":\"2024-07-24\"},\"maxValues\":{\"protein_id\":\"cdm_prot_ffed2e1d-fe14-5556-b067\",\"ec_numbers\":\"7.3.2.7\",\"evidence_for_existence\":\"predicted\",\"length\":\"815\",\"mass\":\"90860\",\"checksum\":\"FFB8BC932E9E15EC\",\"modified\":\"2025-02-05\",\"sequence_version\":\"4\",\"sequence\":\"MYTMELRFIRGGVCAVDGVLAAGCREGKYGVG\",\"entry_modified\":\"2025-06-18\"},\"nullCount\":{\"protein_id\":0,\"ec_numbers\":58,\"evidence_for_existence\":0,\"length\":0,\"mass\":0,\"checksum\":0,\"modified\":0,\"sequence_version\":0,\"sequence\":0,\"entry_modified\":0}}"},{"path":"part-00001-7a1679d7-407a-42ee-abba-19074356776e-c000.snappy.parquet","partitionValues":{},"size":108189,"modificationTime":1766549234752,"dataChange":false,"stats":"{\"numRecords\":625,\"minValues\":{\"protein_id\":\"cdm_prot_00365069-eeac-5442-8f11\",\"ec_numbers\":\"2.5.1.78\",\"evidence_for_existence\":\"evidence at protein level\",\"length\":\"100\",\"mass\":\"10084\",\"checksum\":\"00876DF92F290CEE\",\"modified\":\"1988-11-01\",\"sequence_version\":\"1\",\"sequence\":\"MAAIEVGRVCIKTLGREAGNTCVIVEVLDKNF\",\"entry_modified\":\"2024-03-27\"},\"maxValues\":{\"protein_id\":\"cdm_prot_ff216b32-af2a-545f-8e61\",\"ec_numbers\":\"6.3.2.-\",\"evidence_for_existence\":\"predicted\",\"length\":\"99\",\"mass\":\"9818\",\"checksum\":\"FFBDE7DF93BAC8FB\",\"modified\":\"2017-03-15\",\"sequence_version\":\"4\",\"sequence\":\"MWRISRVELENFRSYRGAHRLELGDVNLLWGR\",\"entry_modified\":\"2025-06-18\"},\"nullCount\":{\"protein_id\":0,\"ec_numbers\":527,\"evidence_for_existence\":0,\"length\":0,\"mass\":0,\"checksum\":0,\"modified\":0,\"sequence_version\":0,\"sequence\":0,\"entry_modified\":0}}"},{"path":"part-00002-c881f350-4b6e-449b-b6e2-2544a424c9bc-c000.snappy.parquet","partitionValues":{},"size":226829,"modificationTime":1766549220177,"dataChange":false,"stats":"{\"numRecords\":625,\"minValues\":{\"protein_id\":\"cdm_prot_00030730-9147-5351-b68c\",\"ec_numbers\":\"1.11.1.6\",\"evidence_for_existence\":\"evidence at protein level\",\"length\":\"101\",\"mass\":\"10049\",\"checksum\":\"001DEFBE3589FDAF\",\"modified\":\"1989-07-01\",\"sequence_version\":\"1\",\"sequence\":\"GIGTLLMLIGTFYFIARGWGVTDKKAREYYAI\",\"entry_modified\":\"2023-06-28\"},\"maxValues\":{\"protein_id\":\"cdm_prot_fde84f0d-6520-56ad-b61f\",\"ec_numbers\":\"7.6.2.8\",\"evidence_for_existence\":\"predicted\",\"length\":\"99\",\"mass\":\"9979\",\"checksum\":\"FFB7F1B1543E0C36\",\"modified\":\"2015-01-07\",\"sequence_version\":\"3\",\"sequence\":\"PMILLALGLLADTDIASLFTAITMDIGMCVTG\",\"entry_modified\":\"2025-06-18\"},\"nullCount\":{\"protein_id\":0,\"ec_numbers\":195,\"evidence_for_existence\":0,\"length\":0,\"mass\":0,\"checksum\":0,\"modified\":0,\"sequence_version\":0,\"sequence\":0,\"entry_modified\":0}}"},{"path":"part-00007-d00f5a40-677b-43fe-9fa8-949fc983f149-c000.snappy.parquet","partitionValues":{},"size":202124,"modificationTime":1766549234744,"dataChange":false,"stats":"{\"numRecords\":625,\"minValues\":{\"protein_id\":\"cdm_prot_00019029-8c98-5477-9dd1\",\"ec_numbers\":\"1.1.1.95\",\"evidence_for_existence\":\"evidence at protein level\",\"length\":\"100\",\"mass\":\"100033\",\"checksum\":\"002511155E18D68A\",\"modified\":\"1990-01-01\",\"sequence_version\":\"1\",\"sequence\":\"GYVNGLESAEETLAENRESGDFGSSAAAMGNV\",\"entry_modified\":\"2023-06-28\"},\"maxValues\":{\"protein_id\":\"cdm_prot_ff606768-2e8f-543d-abc5\",\"ec_numbers\":\"6.2.1.5\",\"evidence_for_existence\":\"inferred from homology\",\"length\":\"99\",\"mass\":\"99972\",\"checksum\":\"FF11E8597C6A833B\",\"modified\":\"2023-02-22\",\"sequence_version\":\"3\",\"sequence\":\"SKLLDNLRDAVRKFLTGSSSYDKAVEDFIKEL\",\"entry_modified\":\"2025-06-18\"},\"nullCount\":{\"protein_id\":0,\"ec_numbers\":253,\"evidence_for_existence\":0,\"length\":0,\"mass\":0,\"checksum\":0,\"modified\":0,\"sequence_version\":0,\"sequence\":0,\"entry_modified\":0}}"},{"path":"part-00000-207a5aa7-b459-46f4-8f11-9a0016306790-c000.snappy.parquet","partitionValues":{},"size":281965,"modificationTime":1766549240503,"dataChange":false,"stats":"{\"numRecords\":601,\"minValues\":{\"protein_id\":\"cdm_prot_0003b7cb-bf13-5646-9978\",\"ec_numbers\":\"6.1.1.10\",\"evidence_for_existence\":\"evidence at protein level\",\"length\":\"1018\",\"mass\":\"104994\",\"checksum\":\"0014BB3B6B8C17E1\",\"modified\":\"1996-10-01\",\"sequence_version\":\"1\",\"sequence\":\"MADADIRNDIYVLALENAVKHKAVPRAGAILG\",\"entry_modified\":\"2024-11-27\"},\"maxValues\":{\"protein_id\":\"cdm_prot_fef0eb9f-57f2-532d-b5b2\",\"ec_numbers\":\"6.1.1.6\",\"evidence_for_existence\":\"inferred from homology\",\"length\":\"986\",\"mass\":\"97527\",\"checksum\":\"FFCE442FD587E33E\",\"modified\":\"2024-01-24\",\"sequence_version\":\"3\",\"sequence\":\"MYVLSETKSNFLKLIAEALKRRGLEVEEEELE\",\"entry_modified\":\"2025-06-18\"},\"nullCount\":{\"protein_id\":0,\"ec_numbers\":2,\"evidence_for_existence\":0,\"length\":0,\"mass\":0,\"checksum\":0,\"modified\":0,\"sequence_version\":0,\"sequence\":0,\"entry_modified\":0}}"},{"path":"part-00002-f9cbb606-eb09-4474-bd73-334b02a5e53c-c000.snappy.parquet","partitionValues":{},"size":95869,"modificationTime":1766549234742,"dataChange":false,"stats":"{\"numRecords\":625,\"minValues\":{\"protein_id\":\"cdm_prot_0061faab-bd88-562c-8a31\",\"evidence_for_existence\":\"evidence at protein level\",\"length\":\"100\",\"mass\":\"10031\",\"checksum\":\"00AA5F03869ECAEE\",\"modified\":\"1989-07-01\",\"sequence_version\":\"1\",\"sequence\":\"MAAAREAEKKEEGQKQVKLYAIIRIRGRVDVH\",\"entry_modified\":\"2024-03-27\"},\"maxValues\":{\"protein_id\":\"cdm_prot_fe9c77d9-be27-518e-8e0c\",\"evidence_for_existence\":\"inferred from homology\",\"length\":\"99\",\"mass\":\"9974\",\"checksum\":\"FFD30262EE0545E8\",\"modified\":\"2011-12-14\",\"sequence_version\":\"4\",\"sequence\":\"MYRKMEVSLSKDLRKKYGIRSFPVIMGDVVKV\",\"entry_modified\":\"2025-06-18\"},\"nullCount\":{\"protein_id\":0,\"ec_numbers\":625,\"evidence_for_existence\":0,\"length\":0,\"mass\":0,\"checksum\":0,\"modified\":0,\"sequence_version\":0,\"sequence\":0,\"entry_modified\":0}}"}]} diff --git a/tests/data/uniprot_archaea/uniprot_db/proteins/_delta_log/00000000000000000004.json b/tests/data/uniprot_archaea/uniprot_db/proteins/_delta_log/00000000000000000004.json new file mode 100644 index 0000000..64ac811 --- /dev/null +++ b/tests/data/uniprot_archaea/uniprot_db/proteins/_delta_log/00000000000000000004.json @@ -0,0 +1,9 @@ +{"commitInfo":{"timestamp":1766549240518,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":3,"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{"numFiles":"8","numOutputRows":"4814","numOutputBytes":"1552595"},"engineInfo":"Apache-Spark/4.0.1 Delta-Lake/4.0.0","txnId":"7ab24080-bcb5-4b76-954d-ddc338e8157b"}} +{"add":{"path":"part-00000-207a5aa7-b459-46f4-8f11-9a0016306790-c000.snappy.parquet","partitionValues":{},"size":281965,"modificationTime":1766549240503,"dataChange":true,"stats":"{\"numRecords\":601,\"minValues\":{\"protein_id\":\"cdm_prot_0003b7cb-bf13-5646-9978\",\"ec_numbers\":\"6.1.1.10\",\"evidence_for_existence\":\"evidence at protein level\",\"length\":\"1018\",\"mass\":\"104994\",\"checksum\":\"0014BB3B6B8C17E1\",\"modified\":\"1996-10-01\",\"sequence_version\":\"1\",\"sequence\":\"MADADIRNDIYVLALENAVKHKAVPRAGAILG\",\"entry_modified\":\"2024-11-27\"},\"maxValues\":{\"protein_id\":\"cdm_prot_fef0eb9f-57f2-532d-b5b2\",\"ec_numbers\":\"6.1.1.6\",\"evidence_for_existence\":\"inferred from homology\",\"length\":\"986\",\"mass\":\"97527\",\"checksum\":\"FFCE442FD587E33E\",\"modified\":\"2024-01-24\",\"sequence_version\":\"3\",\"sequence\":\"MYVLSETKSNFLKLIAEALKRRGLEVEEEELE\",\"entry_modified\":\"2025-06-18\"},\"nullCount\":{\"protein_id\":0,\"ec_numbers\":2,\"evidence_for_existence\":0,\"length\":0,\"mass\":0,\"checksum\":0,\"modified\":0,\"sequence_version\":0,\"sequence\":0,\"entry_modified\":0}}"}} +{"add":{"path":"part-00001-d12b1474-5792-422a-a98f-7503233261c9-c000.snappy.parquet","partitionValues":{},"size":182650,"modificationTime":1766549240503,"dataChange":true,"stats":"{\"numRecords\":601,\"minValues\":{\"protein_id\":\"cdm_prot_01672ce0-33eb-50c0-8bb3\",\"ec_numbers\":\"1.1.1.103\",\"evidence_for_existence\":\"evidence at protein level\",\"length\":\"1018\",\"mass\":\"100444\",\"checksum\":\"0091F26D3326349F\",\"modified\":\"1990-08-01\",\"sequence_version\":\"1\",\"sequence\":\"MAAERLDPWGAVEIKDYDRLLRTFGIRPFSEV\",\"entry_modified\":\"2023-09-13\"},\"maxValues\":{\"protein_id\":\"cdm_prot_fffe9aba-bce9-56de-b269\",\"ec_numbers\":\"6.1.1.9\",\"evidence_for_existence\":\"predicted\",\"length\":\"92\",\"mass\":\"9920\",\"checksum\":\"FFE8B7A831DD2B04\",\"modified\":\"2024-01-24\",\"sequence_version\":\"3\",\"sequence\":\"VEQKEIGRTYRYVAQELELKMEPVDPKQYVPR\",\"entry_modified\":\"2025-06-18\"},\"nullCount\":{\"protein_id\":0,\"ec_numbers\":216,\"evidence_for_existence\":0,\"length\":0,\"mass\":0,\"checksum\":0,\"modified\":0,\"sequence_version\":0,\"sequence\":0,\"entry_modified\":0}}"}} +{"add":{"path":"part-00002-6291644e-f5b3-45af-81c9-ceb443b13c28-c000.snappy.parquet","partitionValues":{},"size":195679,"modificationTime":1766549240512,"dataChange":true,"stats":"{\"numRecords\":601,\"minValues\":{\"protein_id\":\"cdm_prot_0185e048-25c8-50dd-bfbf\",\"ec_numbers\":\"2.1.1.-\",\"evidence_for_existence\":\"evidence at protein level\",\"length\":\"1030\",\"mass\":\"116998\",\"checksum\":\"006F146C7571695B\",\"modified\":\"1988-11-01\",\"sequence_version\":\"1\",\"sequence\":\"AEKLEPVLPLIV\",\"entry_modified\":\"2023-09-13\"},\"maxValues\":{\"protein_id\":\"cdm_prot_fe66808c-aab1-5f12-970e\",\"ec_numbers\":\"7.2.1.4\",\"evidence_for_existence\":\"uncertain\",\"length\":\"855\",\"mass\":\"98910\",\"checksum\":\"FFCEFFF9006453C2\",\"modified\":\"2024-01-24\",\"sequence_version\":\"3\",\"sequence\":\"VAETHDHRVSEIISKKFDVVLAGGITFENVRK\",\"entry_modified\":\"2025-06-18\"},\"nullCount\":{\"protein_id\":0,\"ec_numbers\":18,\"evidence_for_existence\":0,\"length\":0,\"mass\":0,\"checksum\":0,\"modified\":0,\"sequence_version\":0,\"sequence\":0,\"entry_modified\":0}}"}} +{"add":{"path":"part-00003-261ceeac-3a67-4da2-a828-81308d60d332-c000.snappy.parquet","partitionValues":{},"size":183530,"modificationTime":1766549240506,"dataChange":true,"stats":"{\"numRecords\":601,\"minValues\":{\"protein_id\":\"cdm_prot_0007879e-35c8-5917-ad11\",\"ec_numbers\":\"1.-.-.-\",\"evidence_for_existence\":\"evidence at protein level\",\"length\":\"100\",\"mass\":\"10080\",\"checksum\":\"00C21A7773BD9813\",\"modified\":\"1996-11-01\",\"sequence_version\":\"1\",\"sequence\":\"MAADAPHPAFEGYATEAVPQAAVGSPGKDGVL\",\"entry_modified\":\"2024-03-27\"},\"maxValues\":{\"protein_id\":\"cdm_prot_ffe66051-6a0a-55cc-81b2\",\"ec_numbers\":\"7.5.2.13\",\"evidence_for_existence\":\"predicted\",\"length\":\"98\",\"mass\":\"9958\",\"checksum\":\"FFFC36244F2C4EAE\",\"modified\":\"2023-11-08\",\"sequence_version\":\"6\",\"sequence\":\"MYRYPVEVIADTYLSKVGGYSYELDRNEIGIN\",\"entry_modified\":\"2025-06-18\"},\"nullCount\":{\"protein_id\":0,\"ec_numbers\":305,\"evidence_for_existence\":0,\"length\":0,\"mass\":0,\"checksum\":0,\"modified\":0,\"sequence_version\":0,\"sequence\":0,\"entry_modified\":0}}"}} +{"add":{"path":"part-00004-3dcf6645-ae1c-4467-863d-85acc844f886-c000.snappy.parquet","partitionValues":{},"size":180543,"modificationTime":1766549240515,"dataChange":true,"stats":"{\"numRecords\":601,\"minValues\":{\"protein_id\":\"cdm_prot_003753a9-9a55-505c-8ea7\",\"ec_numbers\":\"1.-.-.-\",\"evidence_for_existence\":\"evidence at protein level\",\"length\":\"100\",\"mass\":\"10007\",\"checksum\":\"0035C4E648DA607F\",\"modified\":\"1990-08-01\",\"sequence_version\":\"1\",\"sequence\":\"MADLKITWLGHAAFLLEAEKKLLIDPFISENP\",\"entry_modified\":\"2024-03-27\"},\"maxValues\":{\"protein_id\":\"cdm_prot_ff634e32-999c-5ae0-8d84\",\"ec_numbers\":\"7.2.2.-\",\"evidence_for_existence\":\"predicted\",\"length\":\"99\",\"mass\":\"9935\",\"checksum\":\"FFD86555239B25AA\",\"modified\":\"2021-04-07\",\"sequence_version\":\"3\",\"sequence\":\"MYVPEEIIETIKMIEYQNLDIRTTTLGVNLKD\",\"entry_modified\":\"2025-06-18\"},\"nullCount\":{\"protein_id\":0,\"ec_numbers\":555,\"evidence_for_existence\":0,\"length\":0,\"mass\":0,\"checksum\":0,\"modified\":0,\"sequence_version\":0,\"sequence\":0,\"entry_modified\":0}}"}} +{"add":{"path":"part-00005-ed12e42c-224f-4e76-9a05-3aa1e3f6758a-c000.snappy.parquet","partitionValues":{},"size":173885,"modificationTime":1766549240503,"dataChange":true,"stats":"{\"numRecords\":601,\"minValues\":{\"protein_id\":\"cdm_prot_007a14c8-c58a-5ae1-b721\",\"ec_numbers\":\"1.-.-.-\",\"evidence_for_existence\":\"evidence at protein level\",\"length\":\"100\",\"mass\":\"10081\",\"checksum\":\"00472581CE3910BE\",\"modified\":\"1990-04-01\",\"sequence_version\":\"1\",\"sequence\":\"MAALIDTGIFFGFYSLKDVHHMDSVAIVVHAV\",\"entry_modified\":\"2024-03-27\"},\"maxValues\":{\"protein_id\":\"cdm_prot_ffb5b181-1633-55f5-83ee\",\"ec_numbers\":\"7.-.-.-\",\"evidence_for_existence\":\"uncertain\",\"length\":\"99\",\"mass\":\"9988\",\"checksum\":\"FFA9F0E20910915B\",\"modified\":\"2021-04-07\",\"sequence_version\":\"3\",\"sequence\":\"MYYLKPIGVVEQNENYTVLNIFDEFVEGLDGL\",\"entry_modified\":\"2025-06-18\"},\"nullCount\":{\"protein_id\":0,\"ec_numbers\":559,\"evidence_for_existence\":0,\"length\":0,\"mass\":0,\"checksum\":0,\"modified\":0,\"sequence_version\":0,\"sequence\":0,\"entry_modified\":0}}"}} +{"add":{"path":"part-00006-75fa702b-36c2-40fb-a563-65206a6e1827-c000.snappy.parquet","partitionValues":{},"size":174482,"modificationTime":1766549240507,"dataChange":true,"stats":"{\"numRecords\":601,\"minValues\":{\"protein_id\":\"cdm_prot_00666a93-0a4c-5498-8cac\",\"ec_numbers\":\"2.1.1.-\",\"evidence_for_existence\":\"evidence at protein level\",\"length\":\"101\",\"mass\":\"10018\",\"checksum\":\"00AEC07D21973C0A\",\"modified\":\"1988-11-01\",\"sequence_version\":\"1\",\"sequence\":\"EFQHIDLNQTHIRLTTDLKNNNLEKYILNLRK\",\"entry_modified\":\"2024-03-27\"},\"maxValues\":{\"protein_id\":\"cdm_prot_ffd1840d-3bb9-5f05-ad33\",\"ec_numbers\":\"7.1.1.2\",\"evidence_for_existence\":\"predicted\",\"length\":\"99\",\"mass\":\"9983\",\"checksum\":\"FF08A53188543448\",\"modified\":\"2021-04-07\",\"sequence_version\":\"3\",\"sequence\":\"MYYKLVLSHYSKTSTLINITLIKHLINILRER\",\"entry_modified\":\"2025-06-18\"},\"nullCount\":{\"protein_id\":0,\"ec_numbers\":566,\"evidence_for_existence\":0,\"length\":0,\"mass\":0,\"checksum\":0,\"modified\":0,\"sequence_version\":0,\"sequence\":0,\"entry_modified\":0}}"}} +{"add":{"path":"part-00007-5c63ea60-2077-47ac-8a07-bc4d52e24659-c000.snappy.parquet","partitionValues":{},"size":179861,"modificationTime":1766549240504,"dataChange":true,"stats":"{\"numRecords\":607,\"minValues\":{\"protein_id\":\"cdm_prot_0254412b-7072-5093-9d3c\",\"ec_numbers\":\"1.3.1.-\",\"evidence_for_existence\":\"evidence at protein level\",\"length\":\"100\",\"mass\":\"10005\",\"checksum\":\"0064010139B99F9A\",\"modified\":\"1988-11-01\",\"sequence_version\":\"1\",\"sequence\":\"AVTVTTMLVRIPIPASQGYLNFGDIMIMLVAV\",\"entry_modified\":\"2023-06-28\"},\"maxValues\":{\"protein_id\":\"cdm_prot_ffd6bc2d-9fdc-5fb9-a121\",\"ec_numbers\":\"7.-.-.-\",\"evidence_for_existence\":\"uncertain\",\"length\":\"98\",\"mass\":\"9929\",\"checksum\":\"FF488030DC364F94\",\"modified\":\"2012-05-16\",\"sequence_version\":\"4\",\"sequence\":\"MYVPEEIIETVRMIEYQHLDIRTTTLGVNLKD\",\"entry_modified\":\"2025-06-18\"},\"nullCount\":{\"protein_id\":0,\"ec_numbers\":579,\"evidence_for_existence\":0,\"length\":0,\"mass\":0,\"checksum\":0,\"modified\":0,\"sequence_version\":0,\"sequence\":0,\"entry_modified\":0}}"}} diff --git a/tests/data/uniprot_archaea/uniprot_db/proteins/part-00000-207a5aa7-b459-46f4-8f11-9a0016306790-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/proteins/part-00000-207a5aa7-b459-46f4-8f11-9a0016306790-c000.snappy.parquet new file mode 100644 index 0000000..55f3c1a Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/proteins/part-00000-207a5aa7-b459-46f4-8f11-9a0016306790-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/proteins/part-00000-9280d073-b085-4be8-85ed-536df50f62da-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/proteins/part-00000-9280d073-b085-4be8-85ed-536df50f62da-c000.snappy.parquet new file mode 100644 index 0000000..7464966 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/proteins/part-00000-9280d073-b085-4be8-85ed-536df50f62da-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/proteins/part-00000-b4de8d2f-f466-4990-9d63-a326716cbea4-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/proteins/part-00000-b4de8d2f-f466-4990-9d63-a326716cbea4-c000.snappy.parquet new file mode 100644 index 0000000..aadd52e Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/proteins/part-00000-b4de8d2f-f466-4990-9d63-a326716cbea4-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/proteins/part-00000-ca8a05fd-be38-4a6e-83d5-5c50f3c51145-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/proteins/part-00000-ca8a05fd-be38-4a6e-83d5-5c50f3c51145-c000.snappy.parquet new file mode 100644 index 0000000..a6a8edf Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/proteins/part-00000-ca8a05fd-be38-4a6e-83d5-5c50f3c51145-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/proteins/part-00001-14de6dd7-de07-482f-be2f-bc12adc49f95-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/proteins/part-00001-14de6dd7-de07-482f-be2f-bc12adc49f95-c000.snappy.parquet new file mode 100644 index 0000000..a56983e Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/proteins/part-00001-14de6dd7-de07-482f-be2f-bc12adc49f95-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/proteins/part-00001-7a1679d7-407a-42ee-abba-19074356776e-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/proteins/part-00001-7a1679d7-407a-42ee-abba-19074356776e-c000.snappy.parquet new file mode 100644 index 0000000..de8aaad Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/proteins/part-00001-7a1679d7-407a-42ee-abba-19074356776e-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/proteins/part-00001-bd46662b-bc66-4729-afd0-35417fd10fc1-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/proteins/part-00001-bd46662b-bc66-4729-afd0-35417fd10fc1-c000.snappy.parquet new file mode 100644 index 0000000..d8087a2 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/proteins/part-00001-bd46662b-bc66-4729-afd0-35417fd10fc1-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/proteins/part-00001-d12b1474-5792-422a-a98f-7503233261c9-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/proteins/part-00001-d12b1474-5792-422a-a98f-7503233261c9-c000.snappy.parquet new file mode 100644 index 0000000..b49f7d4 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/proteins/part-00001-d12b1474-5792-422a-a98f-7503233261c9-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/proteins/part-00002-6291644e-f5b3-45af-81c9-ceb443b13c28-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/proteins/part-00002-6291644e-f5b3-45af-81c9-ceb443b13c28-c000.snappy.parquet new file mode 100644 index 0000000..fe21c66 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/proteins/part-00002-6291644e-f5b3-45af-81c9-ceb443b13c28-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/proteins/part-00002-87a46f98-c10e-487d-b09c-260ae5569b04-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/proteins/part-00002-87a46f98-c10e-487d-b09c-260ae5569b04-c000.snappy.parquet new file mode 100644 index 0000000..10eeaba Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/proteins/part-00002-87a46f98-c10e-487d-b09c-260ae5569b04-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/proteins/part-00002-c881f350-4b6e-449b-b6e2-2544a424c9bc-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/proteins/part-00002-c881f350-4b6e-449b-b6e2-2544a424c9bc-c000.snappy.parquet new file mode 100644 index 0000000..eb2d20a Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/proteins/part-00002-c881f350-4b6e-449b-b6e2-2544a424c9bc-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/proteins/part-00002-f9cbb606-eb09-4474-bd73-334b02a5e53c-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/proteins/part-00002-f9cbb606-eb09-4474-bd73-334b02a5e53c-c000.snappy.parquet new file mode 100644 index 0000000..d054b08 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/proteins/part-00002-f9cbb606-eb09-4474-bd73-334b02a5e53c-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/proteins/part-00003-261ceeac-3a67-4da2-a828-81308d60d332-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/proteins/part-00003-261ceeac-3a67-4da2-a828-81308d60d332-c000.snappy.parquet new file mode 100644 index 0000000..d2b2bdf Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/proteins/part-00003-261ceeac-3a67-4da2-a828-81308d60d332-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/proteins/part-00003-58a28129-ebb1-40c8-9a7e-78c52cbe7a39-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/proteins/part-00003-58a28129-ebb1-40c8-9a7e-78c52cbe7a39-c000.snappy.parquet new file mode 100644 index 0000000..6a709db Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/proteins/part-00003-58a28129-ebb1-40c8-9a7e-78c52cbe7a39-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/proteins/part-00003-9ceb2e14-936d-4777-923a-c7586d84aff3-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/proteins/part-00003-9ceb2e14-936d-4777-923a-c7586d84aff3-c000.snappy.parquet new file mode 100644 index 0000000..8a2ec8d Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/proteins/part-00003-9ceb2e14-936d-4777-923a-c7586d84aff3-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/proteins/part-00003-cc92fe22-58d3-46d4-863e-0062067ebd2d-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/proteins/part-00003-cc92fe22-58d3-46d4-863e-0062067ebd2d-c000.snappy.parquet new file mode 100644 index 0000000..a155dfa Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/proteins/part-00003-cc92fe22-58d3-46d4-863e-0062067ebd2d-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/proteins/part-00004-0cd337df-6b78-40e1-9e72-4d902a79f0b3-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/proteins/part-00004-0cd337df-6b78-40e1-9e72-4d902a79f0b3-c000.snappy.parquet new file mode 100644 index 0000000..5ee0773 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/proteins/part-00004-0cd337df-6b78-40e1-9e72-4d902a79f0b3-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/proteins/part-00004-159d2ae9-d5d1-40ef-8b8c-e8462f482af1-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/proteins/part-00004-159d2ae9-d5d1-40ef-8b8c-e8462f482af1-c000.snappy.parquet new file mode 100644 index 0000000..eb66e83 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/proteins/part-00004-159d2ae9-d5d1-40ef-8b8c-e8462f482af1-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/proteins/part-00004-3dcf6645-ae1c-4467-863d-85acc844f886-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/proteins/part-00004-3dcf6645-ae1c-4467-863d-85acc844f886-c000.snappy.parquet new file mode 100644 index 0000000..3710f23 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/proteins/part-00004-3dcf6645-ae1c-4467-863d-85acc844f886-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/proteins/part-00004-638e48aa-2700-4992-be7a-7d56903efbec-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/proteins/part-00004-638e48aa-2700-4992-be7a-7d56903efbec-c000.snappy.parquet new file mode 100644 index 0000000..c1196b4 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/proteins/part-00004-638e48aa-2700-4992-be7a-7d56903efbec-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/proteins/part-00005-5052056a-b12d-4506-8172-512fb3e152c7-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/proteins/part-00005-5052056a-b12d-4506-8172-512fb3e152c7-c000.snappy.parquet new file mode 100644 index 0000000..dc9728c Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/proteins/part-00005-5052056a-b12d-4506-8172-512fb3e152c7-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/proteins/part-00005-831e6b90-0dae-4d62-a116-f1d7b20d680e-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/proteins/part-00005-831e6b90-0dae-4d62-a116-f1d7b20d680e-c000.snappy.parquet new file mode 100644 index 0000000..ba56237 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/proteins/part-00005-831e6b90-0dae-4d62-a116-f1d7b20d680e-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/proteins/part-00005-e1178ae8-d3d7-4662-a488-6f40c9f9eebf-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/proteins/part-00005-e1178ae8-d3d7-4662-a488-6f40c9f9eebf-c000.snappy.parquet new file mode 100644 index 0000000..944128f Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/proteins/part-00005-e1178ae8-d3d7-4662-a488-6f40c9f9eebf-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/proteins/part-00005-ed12e42c-224f-4e76-9a05-3aa1e3f6758a-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/proteins/part-00005-ed12e42c-224f-4e76-9a05-3aa1e3f6758a-c000.snappy.parquet new file mode 100644 index 0000000..d5dae36 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/proteins/part-00005-ed12e42c-224f-4e76-9a05-3aa1e3f6758a-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/proteins/part-00006-411b6dd6-7451-4814-9649-b1a37992bad6-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/proteins/part-00006-411b6dd6-7451-4814-9649-b1a37992bad6-c000.snappy.parquet new file mode 100644 index 0000000..335882c Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/proteins/part-00006-411b6dd6-7451-4814-9649-b1a37992bad6-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/proteins/part-00006-75fa702b-36c2-40fb-a563-65206a6e1827-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/proteins/part-00006-75fa702b-36c2-40fb-a563-65206a6e1827-c000.snappy.parquet new file mode 100644 index 0000000..9a7da2d Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/proteins/part-00006-75fa702b-36c2-40fb-a563-65206a6e1827-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/proteins/part-00006-c1bdb3ab-0e57-4470-8330-b13826aab8c6-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/proteins/part-00006-c1bdb3ab-0e57-4470-8330-b13826aab8c6-c000.snappy.parquet new file mode 100644 index 0000000..f23dc68 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/proteins/part-00006-c1bdb3ab-0e57-4470-8330-b13826aab8c6-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/proteins/part-00006-e6e3deb7-a199-4e40-944a-6b52a9c809bb-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/proteins/part-00006-e6e3deb7-a199-4e40-944a-6b52a9c809bb-c000.snappy.parquet new file mode 100644 index 0000000..4e6fbc4 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/proteins/part-00006-e6e3deb7-a199-4e40-944a-6b52a9c809bb-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/proteins/part-00007-1ca7b106-08f4-4232-8b29-bf918a55a1d2-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/proteins/part-00007-1ca7b106-08f4-4232-8b29-bf918a55a1d2-c000.snappy.parquet new file mode 100644 index 0000000..60bac0b Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/proteins/part-00007-1ca7b106-08f4-4232-8b29-bf918a55a1d2-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/proteins/part-00007-59fe7ec2-82c3-4010-b399-2192a7f6a694-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/proteins/part-00007-59fe7ec2-82c3-4010-b399-2192a7f6a694-c000.snappy.parquet new file mode 100644 index 0000000..01fc238 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/proteins/part-00007-59fe7ec2-82c3-4010-b399-2192a7f6a694-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/proteins/part-00007-5c63ea60-2077-47ac-8a07-bc4d52e24659-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/proteins/part-00007-5c63ea60-2077-47ac-8a07-bc4d52e24659-c000.snappy.parquet new file mode 100644 index 0000000..0906346 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/proteins/part-00007-5c63ea60-2077-47ac-8a07-bc4d52e24659-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/proteins/part-00007-d00f5a40-677b-43fe-9fa8-949fc983f149-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/proteins/part-00007-d00f5a40-677b-43fe-9fa8-949fc983f149-c000.snappy.parquet new file mode 100644 index 0000000..a7a60ad Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/proteins/part-00007-d00f5a40-677b-43fe-9fa8-949fc983f149-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/publications/.part-00000-0c3fd018-b5d3-4b38-8d4c-10a75c7d879a-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/publications/.part-00000-0c3fd018-b5d3-4b38-8d4c-10a75c7d879a-c000.snappy.parquet.crc new file mode 100644 index 0000000..356e3ee Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/publications/.part-00000-0c3fd018-b5d3-4b38-8d4c-10a75c7d879a-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/publications/.part-00000-4a2b52be-31d1-4a2c-b12e-65ca33e408cb-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/publications/.part-00000-4a2b52be-31d1-4a2c-b12e-65ca33e408cb-c000.snappy.parquet.crc new file mode 100644 index 0000000..6abb45a Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/publications/.part-00000-4a2b52be-31d1-4a2c-b12e-65ca33e408cb-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/publications/.part-00000-6ccd0024-5a9d-4740-841a-965595b8114b-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/publications/.part-00000-6ccd0024-5a9d-4740-841a-965595b8114b-c000.snappy.parquet.crc new file mode 100644 index 0000000..35d5117 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/publications/.part-00000-6ccd0024-5a9d-4740-841a-965595b8114b-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/publications/.part-00000-b88f9765-c916-4da9-baba-e7fb1d2e74a0-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/publications/.part-00000-b88f9765-c916-4da9-baba-e7fb1d2e74a0-c000.snappy.parquet.crc new file mode 100644 index 0000000..44891f6 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/publications/.part-00000-b88f9765-c916-4da9-baba-e7fb1d2e74a0-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/publications/.part-00001-8c6d56b3-5bad-499b-9cb7-2c6138bc9134-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/publications/.part-00001-8c6d56b3-5bad-499b-9cb7-2c6138bc9134-c000.snappy.parquet.crc new file mode 100644 index 0000000..e825b3f Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/publications/.part-00001-8c6d56b3-5bad-499b-9cb7-2c6138bc9134-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/publications/.part-00001-b0701a06-5ec4-48fe-8a86-dc299bf5e361-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/publications/.part-00001-b0701a06-5ec4-48fe-8a86-dc299bf5e361-c000.snappy.parquet.crc new file mode 100644 index 0000000..55b5aea Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/publications/.part-00001-b0701a06-5ec4-48fe-8a86-dc299bf5e361-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/publications/.part-00001-d224a09d-c0a7-46a1-80f7-f160f977cb32-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/publications/.part-00001-d224a09d-c0a7-46a1-80f7-f160f977cb32-c000.snappy.parquet.crc new file mode 100644 index 0000000..fe5e9be Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/publications/.part-00001-d224a09d-c0a7-46a1-80f7-f160f977cb32-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/publications/.part-00001-ec1917e9-be8d-4a9f-b44d-3f3a58787ae8-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/publications/.part-00001-ec1917e9-be8d-4a9f-b44d-3f3a58787ae8-c000.snappy.parquet.crc new file mode 100644 index 0000000..f18106f Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/publications/.part-00001-ec1917e9-be8d-4a9f-b44d-3f3a58787ae8-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/publications/.part-00002-289cb3ff-e88a-4077-8cff-abe6e99e351c-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/publications/.part-00002-289cb3ff-e88a-4077-8cff-abe6e99e351c-c000.snappy.parquet.crc new file mode 100644 index 0000000..1cb878d Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/publications/.part-00002-289cb3ff-e88a-4077-8cff-abe6e99e351c-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/publications/.part-00002-a1cf72fa-61e5-4ea3-95eb-f7247e0f25bc-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/publications/.part-00002-a1cf72fa-61e5-4ea3-95eb-f7247e0f25bc-c000.snappy.parquet.crc new file mode 100644 index 0000000..71c4adc Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/publications/.part-00002-a1cf72fa-61e5-4ea3-95eb-f7247e0f25bc-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/publications/.part-00002-b7947ee9-4005-4e4b-9226-089bddb34507-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/publications/.part-00002-b7947ee9-4005-4e4b-9226-089bddb34507-c000.snappy.parquet.crc new file mode 100644 index 0000000..7e5a696 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/publications/.part-00002-b7947ee9-4005-4e4b-9226-089bddb34507-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/publications/.part-00002-f94d1286-6d09-40ae-8a2e-8d53da426a8f-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/publications/.part-00002-f94d1286-6d09-40ae-8a2e-8d53da426a8f-c000.snappy.parquet.crc new file mode 100644 index 0000000..21929c7 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/publications/.part-00002-f94d1286-6d09-40ae-8a2e-8d53da426a8f-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/publications/.part-00003-1e6a7263-53d2-4ad1-b0d1-8946b7d232ce-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/publications/.part-00003-1e6a7263-53d2-4ad1-b0d1-8946b7d232ce-c000.snappy.parquet.crc new file mode 100644 index 0000000..239e644 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/publications/.part-00003-1e6a7263-53d2-4ad1-b0d1-8946b7d232ce-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/publications/.part-00003-ac774356-7cfe-4c65-b3cd-0d32b8f036df-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/publications/.part-00003-ac774356-7cfe-4c65-b3cd-0d32b8f036df-c000.snappy.parquet.crc new file mode 100644 index 0000000..e62fc0d Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/publications/.part-00003-ac774356-7cfe-4c65-b3cd-0d32b8f036df-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/publications/.part-00003-b08a10e8-4e23-45d8-939c-d636a6ac5c6a-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/publications/.part-00003-b08a10e8-4e23-45d8-939c-d636a6ac5c6a-c000.snappy.parquet.crc new file mode 100644 index 0000000..d5ab38b Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/publications/.part-00003-b08a10e8-4e23-45d8-939c-d636a6ac5c6a-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/publications/.part-00003-dccd9b86-0835-437b-a3ff-b4c189a6f500-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/publications/.part-00003-dccd9b86-0835-437b-a3ff-b4c189a6f500-c000.snappy.parquet.crc new file mode 100644 index 0000000..16a11ef Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/publications/.part-00003-dccd9b86-0835-437b-a3ff-b4c189a6f500-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/publications/.part-00004-012aa02e-6c71-472c-aea2-843e6dfba871-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/publications/.part-00004-012aa02e-6c71-472c-aea2-843e6dfba871-c000.snappy.parquet.crc new file mode 100644 index 0000000..1cf1413 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/publications/.part-00004-012aa02e-6c71-472c-aea2-843e6dfba871-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/publications/.part-00004-3adef09a-7f08-499c-873d-c688a5b87607-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/publications/.part-00004-3adef09a-7f08-499c-873d-c688a5b87607-c000.snappy.parquet.crc new file mode 100644 index 0000000..4b59651 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/publications/.part-00004-3adef09a-7f08-499c-873d-c688a5b87607-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/publications/.part-00004-66534a87-6fcc-4512-8fde-d3bd0fb0b3e7-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/publications/.part-00004-66534a87-6fcc-4512-8fde-d3bd0fb0b3e7-c000.snappy.parquet.crc new file mode 100644 index 0000000..0519fb8 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/publications/.part-00004-66534a87-6fcc-4512-8fde-d3bd0fb0b3e7-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/publications/.part-00004-e5bd7727-fd4d-495a-8331-baaba9c1fd56-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/publications/.part-00004-e5bd7727-fd4d-495a-8331-baaba9c1fd56-c000.snappy.parquet.crc new file mode 100644 index 0000000..e8676af Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/publications/.part-00004-e5bd7727-fd4d-495a-8331-baaba9c1fd56-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/publications/.part-00005-046ab7ca-764e-4b76-aaf7-63befe2be012-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/publications/.part-00005-046ab7ca-764e-4b76-aaf7-63befe2be012-c000.snappy.parquet.crc new file mode 100644 index 0000000..8be4a30 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/publications/.part-00005-046ab7ca-764e-4b76-aaf7-63befe2be012-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/publications/.part-00005-160203b8-7cdf-4de5-9c3e-e702684da453-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/publications/.part-00005-160203b8-7cdf-4de5-9c3e-e702684da453-c000.snappy.parquet.crc new file mode 100644 index 0000000..d1574c1 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/publications/.part-00005-160203b8-7cdf-4de5-9c3e-e702684da453-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/publications/.part-00005-ee21b800-08d7-4952-8f25-62ba5c7f1b48-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/publications/.part-00005-ee21b800-08d7-4952-8f25-62ba5c7f1b48-c000.snappy.parquet.crc new file mode 100644 index 0000000..187c9b4 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/publications/.part-00005-ee21b800-08d7-4952-8f25-62ba5c7f1b48-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/publications/.part-00005-f5593537-2d26-441d-987c-679181679009-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/publications/.part-00005-f5593537-2d26-441d-987c-679181679009-c000.snappy.parquet.crc new file mode 100644 index 0000000..97f7401 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/publications/.part-00005-f5593537-2d26-441d-987c-679181679009-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/publications/.part-00006-52a3314a-e3e9-4fa6-b47a-b8ade082ab0b-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/publications/.part-00006-52a3314a-e3e9-4fa6-b47a-b8ade082ab0b-c000.snappy.parquet.crc new file mode 100644 index 0000000..1b8b074 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/publications/.part-00006-52a3314a-e3e9-4fa6-b47a-b8ade082ab0b-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/publications/.part-00006-6905bd59-3561-445e-96da-fd3fb75a98c4-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/publications/.part-00006-6905bd59-3561-445e-96da-fd3fb75a98c4-c000.snappy.parquet.crc new file mode 100644 index 0000000..f134f63 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/publications/.part-00006-6905bd59-3561-445e-96da-fd3fb75a98c4-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/publications/.part-00006-6f7c3abb-2b03-4a44-820e-6fd9dc498f4d-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/publications/.part-00006-6f7c3abb-2b03-4a44-820e-6fd9dc498f4d-c000.snappy.parquet.crc new file mode 100644 index 0000000..87d0941 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/publications/.part-00006-6f7c3abb-2b03-4a44-820e-6fd9dc498f4d-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/publications/.part-00006-d488a52c-e9d0-4038-a949-b98404b3d0a3-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/publications/.part-00006-d488a52c-e9d0-4038-a949-b98404b3d0a3-c000.snappy.parquet.crc new file mode 100644 index 0000000..8a3e6e6 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/publications/.part-00006-d488a52c-e9d0-4038-a949-b98404b3d0a3-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/publications/.part-00007-3f91533c-9a7b-4ab3-82e0-9dae3b6e6ea1-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/publications/.part-00007-3f91533c-9a7b-4ab3-82e0-9dae3b6e6ea1-c000.snappy.parquet.crc new file mode 100644 index 0000000..98ba98b Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/publications/.part-00007-3f91533c-9a7b-4ab3-82e0-9dae3b6e6ea1-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/publications/.part-00007-7df33c35-afd2-48d4-aa6f-7327b9a2260e-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/publications/.part-00007-7df33c35-afd2-48d4-aa6f-7327b9a2260e-c000.snappy.parquet.crc new file mode 100644 index 0000000..2f75c30 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/publications/.part-00007-7df33c35-afd2-48d4-aa6f-7327b9a2260e-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/publications/.part-00007-d23331f9-be41-4003-bb73-777605e43168-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/publications/.part-00007-d23331f9-be41-4003-bb73-777605e43168-c000.snappy.parquet.crc new file mode 100644 index 0000000..4ba3f9c Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/publications/.part-00007-d23331f9-be41-4003-bb73-777605e43168-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/publications/.part-00007-fd957b73-5888-4d07-b174-c1539842756b-c000.snappy.parquet.crc b/tests/data/uniprot_archaea/uniprot_db/publications/.part-00007-fd957b73-5888-4d07-b174-c1539842756b-c000.snappy.parquet.crc new file mode 100644 index 0000000..419c22e Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/publications/.part-00007-fd957b73-5888-4d07-b174-c1539842756b-c000.snappy.parquet.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/publications/_delta_log/.00000000000000000000.crc.crc b/tests/data/uniprot_archaea/uniprot_db/publications/_delta_log/.00000000000000000000.crc.crc new file mode 100644 index 0000000..0db6ed6 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/publications/_delta_log/.00000000000000000000.crc.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/publications/_delta_log/.00000000000000000000.json.crc b/tests/data/uniprot_archaea/uniprot_db/publications/_delta_log/.00000000000000000000.json.crc new file mode 100644 index 0000000..5094798 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/publications/_delta_log/.00000000000000000000.json.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/publications/_delta_log/.00000000000000000001.crc.crc b/tests/data/uniprot_archaea/uniprot_db/publications/_delta_log/.00000000000000000001.crc.crc new file mode 100644 index 0000000..7934a0a Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/publications/_delta_log/.00000000000000000001.crc.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/publications/_delta_log/.00000000000000000001.json.crc b/tests/data/uniprot_archaea/uniprot_db/publications/_delta_log/.00000000000000000001.json.crc new file mode 100644 index 0000000..33470c4 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/publications/_delta_log/.00000000000000000001.json.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/publications/_delta_log/.00000000000000000002.crc.crc b/tests/data/uniprot_archaea/uniprot_db/publications/_delta_log/.00000000000000000002.crc.crc new file mode 100644 index 0000000..0cfaa8d Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/publications/_delta_log/.00000000000000000002.crc.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/publications/_delta_log/.00000000000000000002.json.crc b/tests/data/uniprot_archaea/uniprot_db/publications/_delta_log/.00000000000000000002.json.crc new file mode 100644 index 0000000..42ddf41 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/publications/_delta_log/.00000000000000000002.json.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/publications/_delta_log/.00000000000000000003.crc.crc b/tests/data/uniprot_archaea/uniprot_db/publications/_delta_log/.00000000000000000003.crc.crc new file mode 100644 index 0000000..6c01c2a Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/publications/_delta_log/.00000000000000000003.crc.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/publications/_delta_log/.00000000000000000003.json.crc b/tests/data/uniprot_archaea/uniprot_db/publications/_delta_log/.00000000000000000003.json.crc new file mode 100644 index 0000000..8c3af4f Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/publications/_delta_log/.00000000000000000003.json.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/publications/_delta_log/.00000000000000000004.crc.crc b/tests/data/uniprot_archaea/uniprot_db/publications/_delta_log/.00000000000000000004.crc.crc new file mode 100644 index 0000000..6566fe8 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/publications/_delta_log/.00000000000000000004.crc.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/publications/_delta_log/.00000000000000000004.json.crc b/tests/data/uniprot_archaea/uniprot_db/publications/_delta_log/.00000000000000000004.json.crc new file mode 100644 index 0000000..1b86937 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/publications/_delta_log/.00000000000000000004.json.crc differ diff --git a/tests/data/uniprot_archaea/uniprot_db/publications/_delta_log/00000000000000000000.crc b/tests/data/uniprot_archaea/uniprot_db/publications/_delta_log/00000000000000000000.crc new file mode 100644 index 0000000..c91c11d --- /dev/null +++ b/tests/data/uniprot_archaea/uniprot_db/publications/_delta_log/00000000000000000000.crc @@ -0,0 +1 @@ +{"txnId":"55e0e3d5-9b75-49fc-a80c-0774dd15d6a5","tableSizeBytes":0,"numFiles":0,"numMetadata":1,"numProtocol":1,"setTransactions":[],"domainMetadata":[],"metadata":{"id":"775d0442-cb4f-46a5-99e2-0df543723745","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[]}","partitionColumns":[],"configuration":{},"createdTime":1766549214750},"protocol":{"minReaderVersion":1,"minWriterVersion":2},"allFiles":[]} diff --git a/tests/data/uniprot_archaea/uniprot_db/publications/_delta_log/00000000000000000000.json b/tests/data/uniprot_archaea/uniprot_db/publications/_delta_log/00000000000000000000.json new file mode 100644 index 0000000..86db4a7 --- /dev/null +++ b/tests/data/uniprot_archaea/uniprot_db/publications/_delta_log/00000000000000000000.json @@ -0,0 +1,3 @@ +{"commitInfo":{"timestamp":1766549214784,"operation":"CREATE TABLE","operationParameters":{"partitionBy":"[]","clusterBy":"[]","description":null,"isManaged":"false","properties":"{}"},"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{},"engineInfo":"Apache-Spark/4.0.1 Delta-Lake/4.0.0","txnId":"55e0e3d5-9b75-49fc-a80c-0774dd15d6a5"}} +{"metaData":{"id":"775d0442-cb4f-46a5-99e2-0df543723745","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[]}","partitionColumns":[],"configuration":{},"createdTime":1766549214750}} +{"protocol":{"minReaderVersion":1,"minWriterVersion":2}} diff --git a/tests/data/uniprot_archaea/uniprot_db/publications/_delta_log/00000000000000000001.crc b/tests/data/uniprot_archaea/uniprot_db/publications/_delta_log/00000000000000000001.crc new file mode 100644 index 0000000..3b87397 --- /dev/null +++ b/tests/data/uniprot_archaea/uniprot_db/publications/_delta_log/00000000000000000001.crc @@ -0,0 +1 @@ +{"txnId":"640fd8a1-d2f3-4c29-bcc9-b62841b28686","tableSizeBytes":247034,"numFiles":8,"numMetadata":1,"numProtocol":1,"setTransactions":[],"domainMetadata":[],"metadata":{"id":"775d0442-cb4f-46a5-99e2-0df543723745","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"entity_id\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"publication\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{},"createdTime":1766549214750},"protocol":{"minReaderVersion":1,"minWriterVersion":2}} diff --git a/tests/data/uniprot_archaea/uniprot_db/publications/_delta_log/00000000000000000001.json b/tests/data/uniprot_archaea/uniprot_db/publications/_delta_log/00000000000000000001.json new file mode 100644 index 0000000..369a11f --- /dev/null +++ b/tests/data/uniprot_archaea/uniprot_db/publications/_delta_log/00000000000000000001.json @@ -0,0 +1,10 @@ +{"commitInfo":{"timestamp":1766549221878,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":0,"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{"numFiles":"8","numOutputRows":"12603","numOutputBytes":"247034"},"engineInfo":"Apache-Spark/4.0.1 Delta-Lake/4.0.0","txnId":"640fd8a1-d2f3-4c29-bcc9-b62841b28686"}} +{"metaData":{"id":"775d0442-cb4f-46a5-99e2-0df543723745","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"entity_id\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"publication\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{},"createdTime":1766549214750}} +{"add":{"path":"part-00000-0c3fd018-b5d3-4b38-8d4c-10a75c7d879a-c000.snappy.parquet","partitionValues":{},"size":21003,"modificationTime":1766549221866,"dataChange":true,"stats":"{\"numRecords\":1024,\"minValues\":{\"entity_id\":\"cdm_prot_00036120-577c-5bbc-ab91\",\"publication\":\"DOI:10.1002/prot.20703\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffa4eefe-b062-5f26-8f77\",\"publication\":\"PMID:9738442\"},\"nullCount\":{\"entity_id\":0,\"publication\":0}}"}} +{"add":{"path":"part-00001-d224a09d-c0a7-46a1-80f7-f160f977cb32-c000.snappy.parquet","partitionValues":{},"size":38756,"modificationTime":1766549221853,"dataChange":true,"stats":"{\"numRecords\":2048,\"minValues\":{\"entity_id\":\"cdm_prot_000e0a71-14e8-5d5e-8144\",\"publication\":\"DOI:10.1002/jobm.201300741\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffed2e1d-fe14-5556-b067\",\"publication\":\"PMID:9847077\"},\"nullCount\":{\"entity_id\":0,\"publication\":0}}"}} +{"add":{"path":"part-00002-a1cf72fa-61e5-4ea3-95eb-f7247e0f25bc-c000.snappy.parquet","partitionValues":{},"size":21210,"modificationTime":1766549221856,"dataChange":true,"stats":"{\"numRecords\":1024,\"minValues\":{\"entity_id\":\"cdm_prot_00030730-9147-5351-b68c\",\"publication\":\"DOI:10.1002/j.1460-2075.1987.tb0\"},\"maxValues\":{\"entity_id\":\"cdm_prot_fde84f0d-6520-56ad-b61f\",\"publication\":\"PMID:9878396\"},\"nullCount\":{\"entity_id\":0,\"publication\":0}}"}} +{"add":{"path":"part-00003-1e6a7263-53d2-4ad1-b0d1-8946b7d232ce-c000.snappy.parquet","partitionValues":{},"size":38315,"modificationTime":1766549221865,"dataChange":true,"stats":"{\"numRecords\":2048,\"minValues\":{\"entity_id\":\"cdm_prot_004f59e4-6b11-58ae-a6ff\",\"publication\":\"DOI:10.1002/(sici)1097-0290(1999\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ff0114ac-fb68-5211-bc16\",\"publication\":\"PMID:9864346\"},\"nullCount\":{\"entity_id\":0,\"publication\":0}}"}} +{"add":{"path":"part-00004-66534a87-6fcc-4512-8fde-d3bd0fb0b3e7-c000.snappy.parquet","partitionValues":{},"size":41663,"modificationTime":1766549221871,"dataChange":true,"stats":"{\"numRecords\":2048,\"minValues\":{\"entity_id\":\"cdm_prot_00134cb8-e497-5d40-b662\",\"publication\":\"DOI:10.1002/bip.21096\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ff5f637d-e74b-5494-9ed8\",\"publication\":\"PMID:9758761\"},\"nullCount\":{\"entity_id\":0,\"publication\":0}}"}} +{"add":{"path":"part-00005-f5593537-2d26-441d-987c-679181679009-c000.snappy.parquet","partitionValues":{},"size":21899,"modificationTime":1766549221865,"dataChange":true,"stats":"{\"numRecords\":1024,\"minValues\":{\"entity_id\":\"cdm_prot_00d52fb0-9168-5276-b624\",\"publication\":\"DOI:10.1006/bbrc.1995.1080\"},\"maxValues\":{\"entity_id\":\"cdm_prot_feabb57f-0dd5-5284-bbea\",\"publication\":\"PMID:9845338\"},\"nullCount\":{\"entity_id\":0,\"publication\":0}}"}} +{"add":{"path":"part-00006-d488a52c-e9d0-4038-a949-b98404b3d0a3-c000.snappy.parquet","partitionValues":{},"size":40559,"modificationTime":1766549221870,"dataChange":true,"stats":"{\"numRecords\":2048,\"minValues\":{\"entity_id\":\"cdm_prot_003df674-48d1-588a-b194\",\"publication\":\"DOI:10.1002/anie.201101832\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffeae0f1-9785-5eaf-ad23\",\"publication\":\"PMID:9821972\"},\"nullCount\":{\"entity_id\":0,\"publication\":0}}"}} +{"add":{"path":"part-00007-3f91533c-9a7b-4ab3-82e0-9dae3b6e6ea1-c000.snappy.parquet","partitionValues":{},"size":23629,"modificationTime":1766549221856,"dataChange":true,"stats":"{\"numRecords\":1339,\"minValues\":{\"entity_id\":\"cdm_prot_00625fdf-0992-5594-a800\",\"publication\":\"DOI:10.1002/mbo3.120\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffcf5789-8bfa-5ca0-914e\",\"publication\":\"PMID:9930671\"},\"nullCount\":{\"entity_id\":0,\"publication\":0}}"}} diff --git a/tests/data/uniprot_archaea/uniprot_db/publications/_delta_log/00000000000000000002.crc b/tests/data/uniprot_archaea/uniprot_db/publications/_delta_log/00000000000000000002.crc new file mode 100644 index 0000000..40f96ff --- /dev/null +++ b/tests/data/uniprot_archaea/uniprot_db/publications/_delta_log/00000000000000000002.crc @@ -0,0 +1 @@ +{"txnId":"b81d454a-7878-4d85-a660-1817f02825c1","tableSizeBytes":488017,"numFiles":16,"numMetadata":1,"numProtocol":1,"setTransactions":[],"domainMetadata":[],"metadata":{"id":"775d0442-cb4f-46a5-99e2-0df543723745","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"entity_id\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"publication\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{},"createdTime":1766549214750},"protocol":{"minReaderVersion":1,"minWriterVersion":2},"allFiles":[{"path":"part-00003-1e6a7263-53d2-4ad1-b0d1-8946b7d232ce-c000.snappy.parquet","partitionValues":{},"size":38315,"modificationTime":1766549221865,"dataChange":false,"stats":"{\"numRecords\":2048,\"minValues\":{\"entity_id\":\"cdm_prot_004f59e4-6b11-58ae-a6ff\",\"publication\":\"DOI:10.1002/(sici)1097-0290(1999\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ff0114ac-fb68-5211-bc16\",\"publication\":\"PMID:9864346\"},\"nullCount\":{\"entity_id\":0,\"publication\":0}}"},{"path":"part-00000-0c3fd018-b5d3-4b38-8d4c-10a75c7d879a-c000.snappy.parquet","partitionValues":{},"size":21003,"modificationTime":1766549221866,"dataChange":false,"stats":"{\"numRecords\":1024,\"minValues\":{\"entity_id\":\"cdm_prot_00036120-577c-5bbc-ab91\",\"publication\":\"DOI:10.1002/prot.20703\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffa4eefe-b062-5f26-8f77\",\"publication\":\"PMID:9738442\"},\"nullCount\":{\"entity_id\":0,\"publication\":0}}"},{"path":"part-00001-8c6d56b3-5bad-499b-9cb7-2c6138bc9134-c000.snappy.parquet","partitionValues":{},"size":41308,"modificationTime":1766549230558,"dataChange":false,"stats":"{\"numRecords\":2048,\"minValues\":{\"entity_id\":\"cdm_prot_004539b1-91ae-513e-bef9\",\"publication\":\"DOI:10.1002/(sici)1097-0134(1996\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffa14ace-ea75-5cc6-86f2\",\"publication\":\"PMID:9931020\"},\"nullCount\":{\"entity_id\":0,\"publication\":0}}"},{"path":"part-00007-3f91533c-9a7b-4ab3-82e0-9dae3b6e6ea1-c000.snappy.parquet","partitionValues":{},"size":23629,"modificationTime":1766549221856,"dataChange":false,"stats":"{\"numRecords\":1339,\"minValues\":{\"entity_id\":\"cdm_prot_00625fdf-0992-5594-a800\",\"publication\":\"DOI:10.1002/mbo3.120\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffcf5789-8bfa-5ca0-914e\",\"publication\":\"PMID:9930671\"},\"nullCount\":{\"entity_id\":0,\"publication\":0}}"},{"path":"part-00000-6ccd0024-5a9d-4740-841a-965595b8114b-c000.snappy.parquet","partitionValues":{},"size":15988,"modificationTime":1766549230558,"dataChange":false,"stats":"{\"numRecords\":1024,\"minValues\":{\"entity_id\":\"cdm_prot_00c31285-6721-55e0-855a\",\"publication\":\"DOI:10.1006/bbrc.1999.0533\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ff289ac2-6f01-59a2-8290\",\"publication\":\"PMID:9847077\"},\"nullCount\":{\"entity_id\":0,\"publication\":0}}"},{"path":"part-00004-3adef09a-7f08-499c-873d-c688a5b87607-c000.snappy.parquet","partitionValues":{},"size":39468,"modificationTime":1766549230567,"dataChange":false,"stats":"{\"numRecords\":2048,\"minValues\":{\"entity_id\":\"cdm_prot_007c34e9-4de0-5736-985f\",\"publication\":\"DOI:10.1002/2211-5463.12439\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffc892c3-0651-5e3c-998a\",\"publication\":\"PMID:9933933\"},\"nullCount\":{\"entity_id\":0,\"publication\":0}}"},{"path":"part-00005-ee21b800-08d7-4952-8f25-62ba5c7f1b48-c000.snappy.parquet","partitionValues":{},"size":20263,"modificationTime":1766549230560,"dataChange":false,"stats":"{\"numRecords\":1024,\"minValues\":{\"entity_id\":\"cdm_prot_01ac737c-ed84-5684-b60d\",\"publication\":\"DOI:10.1002/jcb.10107\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffdecc1a-6ee9-59b2-9c49\",\"publication\":\"PMID:9761745\"},\"nullCount\":{\"entity_id\":0,\"publication\":0}}"},{"path":"part-00005-f5593537-2d26-441d-987c-679181679009-c000.snappy.parquet","partitionValues":{},"size":21899,"modificationTime":1766549221865,"dataChange":false,"stats":"{\"numRecords\":1024,\"minValues\":{\"entity_id\":\"cdm_prot_00d52fb0-9168-5276-b624\",\"publication\":\"DOI:10.1006/bbrc.1995.1080\"},\"maxValues\":{\"entity_id\":\"cdm_prot_feabb57f-0dd5-5284-bbea\",\"publication\":\"PMID:9845338\"},\"nullCount\":{\"entity_id\":0,\"publication\":0}}"},{"path":"part-00006-d488a52c-e9d0-4038-a949-b98404b3d0a3-c000.snappy.parquet","partitionValues":{},"size":40559,"modificationTime":1766549221870,"dataChange":false,"stats":"{\"numRecords\":2048,\"minValues\":{\"entity_id\":\"cdm_prot_003df674-48d1-588a-b194\",\"publication\":\"DOI:10.1002/anie.201101832\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffeae0f1-9785-5eaf-ad23\",\"publication\":\"PMID:9821972\"},\"nullCount\":{\"entity_id\":0,\"publication\":0}}"},{"path":"part-00007-7df33c35-afd2-48d4-aa6f-7327b9a2260e-c000.snappy.parquet","partitionValues":{},"size":23928,"modificationTime":1766549230560,"dataChange":false,"stats":"{\"numRecords\":1122,\"minValues\":{\"entity_id\":\"cdm_prot_001ba1b3-667c-5321-9298\",\"publication\":\"DOI:10.1002/anie.201106765\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ff86ebef-56b7-5cf7-bf3e\",\"publication\":\"PMID:9679194\"},\"nullCount\":{\"entity_id\":0,\"publication\":0}}"},{"path":"part-00002-b7947ee9-4005-4e4b-9226-089bddb34507-c000.snappy.parquet","partitionValues":{},"size":21260,"modificationTime":1766549230559,"dataChange":false,"stats":"{\"numRecords\":1024,\"minValues\":{\"entity_id\":\"cdm_prot_00546ee8-4f73-5c85-8b07\",\"publication\":\"DOI:10.1002/anie.201201708\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ff1fb047-631e-56b1-9454\",\"publication\":\"PMID:9972267\"},\"nullCount\":{\"entity_id\":0,\"publication\":0}}"},{"path":"part-00001-d224a09d-c0a7-46a1-80f7-f160f977cb32-c000.snappy.parquet","partitionValues":{},"size":38756,"modificationTime":1766549221853,"dataChange":false,"stats":"{\"numRecords\":2048,\"minValues\":{\"entity_id\":\"cdm_prot_000e0a71-14e8-5d5e-8144\",\"publication\":\"DOI:10.1002/jobm.201300741\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffed2e1d-fe14-5556-b067\",\"publication\":\"PMID:9847077\"},\"nullCount\":{\"entity_id\":0,\"publication\":0}}"},{"path":"part-00002-a1cf72fa-61e5-4ea3-95eb-f7247e0f25bc-c000.snappy.parquet","partitionValues":{},"size":21210,"modificationTime":1766549221856,"dataChange":false,"stats":"{\"numRecords\":1024,\"minValues\":{\"entity_id\":\"cdm_prot_00030730-9147-5351-b68c\",\"publication\":\"DOI:10.1002/j.1460-2075.1987.tb0\"},\"maxValues\":{\"entity_id\":\"cdm_prot_fde84f0d-6520-56ad-b61f\",\"publication\":\"PMID:9878396\"},\"nullCount\":{\"entity_id\":0,\"publication\":0}}"},{"path":"part-00004-66534a87-6fcc-4512-8fde-d3bd0fb0b3e7-c000.snappy.parquet","partitionValues":{},"size":41663,"modificationTime":1766549221871,"dataChange":false,"stats":"{\"numRecords\":2048,\"minValues\":{\"entity_id\":\"cdm_prot_00134cb8-e497-5d40-b662\",\"publication\":\"DOI:10.1002/bip.21096\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ff5f637d-e74b-5494-9ed8\",\"publication\":\"PMID:9758761\"},\"nullCount\":{\"entity_id\":0,\"publication\":0}}"},{"path":"part-00006-52a3314a-e3e9-4fa6-b47a-b8ade082ab0b-c000.snappy.parquet","partitionValues":{},"size":40819,"modificationTime":1766549230561,"dataChange":false,"stats":"{\"numRecords\":2048,\"minValues\":{\"entity_id\":\"cdm_prot_0102d35c-20a5-5f8d-8175\",\"publication\":\"DOI:10.1002/j.1460-2075.1992.tb0\"},\"maxValues\":{\"entity_id\":\"cdm_prot_fffe9d9f-5871-5041-83af\",\"publication\":\"PMID:9756623\"},\"nullCount\":{\"entity_id\":0,\"publication\":0}}"},{"path":"part-00003-dccd9b86-0835-437b-a3ff-b4c189a6f500-c000.snappy.parquet","partitionValues":{},"size":37949,"modificationTime":1766549230560,"dataChange":false,"stats":"{\"numRecords\":2048,\"minValues\":{\"entity_id\":\"cdm_prot_000c7e85-7dff-527d-b6db\",\"publication\":\"DOI:10.1002/1873-3468.13550\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffce40a0-b890-5e5f-b455\",\"publication\":\"PMID:9933933\"},\"nullCount\":{\"entity_id\":0,\"publication\":0}}"}]} diff --git a/tests/data/uniprot_archaea/uniprot_db/publications/_delta_log/00000000000000000002.json b/tests/data/uniprot_archaea/uniprot_db/publications/_delta_log/00000000000000000002.json new file mode 100644 index 0000000..00cf3fa --- /dev/null +++ b/tests/data/uniprot_archaea/uniprot_db/publications/_delta_log/00000000000000000002.json @@ -0,0 +1,9 @@ +{"commitInfo":{"timestamp":1766549230570,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":1,"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{"numFiles":"8","numOutputRows":"12386","numOutputBytes":"240983"},"engineInfo":"Apache-Spark/4.0.1 Delta-Lake/4.0.0","txnId":"b81d454a-7878-4d85-a660-1817f02825c1"}} +{"add":{"path":"part-00000-6ccd0024-5a9d-4740-841a-965595b8114b-c000.snappy.parquet","partitionValues":{},"size":15988,"modificationTime":1766549230558,"dataChange":true,"stats":"{\"numRecords\":1024,\"minValues\":{\"entity_id\":\"cdm_prot_00c31285-6721-55e0-855a\",\"publication\":\"DOI:10.1006/bbrc.1999.0533\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ff289ac2-6f01-59a2-8290\",\"publication\":\"PMID:9847077\"},\"nullCount\":{\"entity_id\":0,\"publication\":0}}"}} +{"add":{"path":"part-00001-8c6d56b3-5bad-499b-9cb7-2c6138bc9134-c000.snappy.parquet","partitionValues":{},"size":41308,"modificationTime":1766549230558,"dataChange":true,"stats":"{\"numRecords\":2048,\"minValues\":{\"entity_id\":\"cdm_prot_004539b1-91ae-513e-bef9\",\"publication\":\"DOI:10.1002/(sici)1097-0134(1996\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffa14ace-ea75-5cc6-86f2\",\"publication\":\"PMID:9931020\"},\"nullCount\":{\"entity_id\":0,\"publication\":0}}"}} +{"add":{"path":"part-00002-b7947ee9-4005-4e4b-9226-089bddb34507-c000.snappy.parquet","partitionValues":{},"size":21260,"modificationTime":1766549230559,"dataChange":true,"stats":"{\"numRecords\":1024,\"minValues\":{\"entity_id\":\"cdm_prot_00546ee8-4f73-5c85-8b07\",\"publication\":\"DOI:10.1002/anie.201201708\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ff1fb047-631e-56b1-9454\",\"publication\":\"PMID:9972267\"},\"nullCount\":{\"entity_id\":0,\"publication\":0}}"}} +{"add":{"path":"part-00003-dccd9b86-0835-437b-a3ff-b4c189a6f500-c000.snappy.parquet","partitionValues":{},"size":37949,"modificationTime":1766549230560,"dataChange":true,"stats":"{\"numRecords\":2048,\"minValues\":{\"entity_id\":\"cdm_prot_000c7e85-7dff-527d-b6db\",\"publication\":\"DOI:10.1002/1873-3468.13550\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffce40a0-b890-5e5f-b455\",\"publication\":\"PMID:9933933\"},\"nullCount\":{\"entity_id\":0,\"publication\":0}}"}} +{"add":{"path":"part-00004-3adef09a-7f08-499c-873d-c688a5b87607-c000.snappy.parquet","partitionValues":{},"size":39468,"modificationTime":1766549230567,"dataChange":true,"stats":"{\"numRecords\":2048,\"minValues\":{\"entity_id\":\"cdm_prot_007c34e9-4de0-5736-985f\",\"publication\":\"DOI:10.1002/2211-5463.12439\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffc892c3-0651-5e3c-998a\",\"publication\":\"PMID:9933933\"},\"nullCount\":{\"entity_id\":0,\"publication\":0}}"}} +{"add":{"path":"part-00005-ee21b800-08d7-4952-8f25-62ba5c7f1b48-c000.snappy.parquet","partitionValues":{},"size":20263,"modificationTime":1766549230560,"dataChange":true,"stats":"{\"numRecords\":1024,\"minValues\":{\"entity_id\":\"cdm_prot_01ac737c-ed84-5684-b60d\",\"publication\":\"DOI:10.1002/jcb.10107\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffdecc1a-6ee9-59b2-9c49\",\"publication\":\"PMID:9761745\"},\"nullCount\":{\"entity_id\":0,\"publication\":0}}"}} +{"add":{"path":"part-00006-52a3314a-e3e9-4fa6-b47a-b8ade082ab0b-c000.snappy.parquet","partitionValues":{},"size":40819,"modificationTime":1766549230561,"dataChange":true,"stats":"{\"numRecords\":2048,\"minValues\":{\"entity_id\":\"cdm_prot_0102d35c-20a5-5f8d-8175\",\"publication\":\"DOI:10.1002/j.1460-2075.1992.tb0\"},\"maxValues\":{\"entity_id\":\"cdm_prot_fffe9d9f-5871-5041-83af\",\"publication\":\"PMID:9756623\"},\"nullCount\":{\"entity_id\":0,\"publication\":0}}"}} +{"add":{"path":"part-00007-7df33c35-afd2-48d4-aa6f-7327b9a2260e-c000.snappy.parquet","partitionValues":{},"size":23928,"modificationTime":1766549230560,"dataChange":true,"stats":"{\"numRecords\":1122,\"minValues\":{\"entity_id\":\"cdm_prot_001ba1b3-667c-5321-9298\",\"publication\":\"DOI:10.1002/anie.201106765\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ff86ebef-56b7-5cf7-bf3e\",\"publication\":\"PMID:9679194\"},\"nullCount\":{\"entity_id\":0,\"publication\":0}}"}} diff --git a/tests/data/uniprot_archaea/uniprot_db/publications/_delta_log/00000000000000000003.crc b/tests/data/uniprot_archaea/uniprot_db/publications/_delta_log/00000000000000000003.crc new file mode 100644 index 0000000..75d9e35 --- /dev/null +++ b/tests/data/uniprot_archaea/uniprot_db/publications/_delta_log/00000000000000000003.crc @@ -0,0 +1 @@ +{"txnId":"3bdf390d-1798-4812-9644-e6f83d18ca4a","tableSizeBytes":709888,"numFiles":24,"numMetadata":1,"numProtocol":1,"setTransactions":[],"domainMetadata":[],"metadata":{"id":"775d0442-cb4f-46a5-99e2-0df543723745","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"entity_id\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"publication\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{},"createdTime":1766549214750},"protocol":{"minReaderVersion":1,"minWriterVersion":2},"allFiles":[{"path":"part-00001-ec1917e9-be8d-4a9f-b44d-3f3a58787ae8-c000.snappy.parquet","partitionValues":{},"size":37487,"modificationTime":1766549236694,"dataChange":false,"stats":"{\"numRecords\":2048,\"minValues\":{\"entity_id\":\"cdm_prot_00365069-eeac-5442-8f11\",\"publication\":\"DOI:10.1002/iub.2122\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ff216b32-af2a-545f-8e61\",\"publication\":\"PMID:9746351\"},\"nullCount\":{\"entity_id\":0,\"publication\":0}}"},{"path":"part-00003-1e6a7263-53d2-4ad1-b0d1-8946b7d232ce-c000.snappy.parquet","partitionValues":{},"size":38315,"modificationTime":1766549221865,"dataChange":false,"stats":"{\"numRecords\":2048,\"minValues\":{\"entity_id\":\"cdm_prot_004f59e4-6b11-58ae-a6ff\",\"publication\":\"DOI:10.1002/(sici)1097-0290(1999\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ff0114ac-fb68-5211-bc16\",\"publication\":\"PMID:9864346\"},\"nullCount\":{\"entity_id\":0,\"publication\":0}}"},{"path":"part-00000-0c3fd018-b5d3-4b38-8d4c-10a75c7d879a-c000.snappy.parquet","partitionValues":{},"size":21003,"modificationTime":1766549221866,"dataChange":false,"stats":"{\"numRecords\":1024,\"minValues\":{\"entity_id\":\"cdm_prot_00036120-577c-5bbc-ab91\",\"publication\":\"DOI:10.1002/prot.20703\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffa4eefe-b062-5f26-8f77\",\"publication\":\"PMID:9738442\"},\"nullCount\":{\"entity_id\":0,\"publication\":0}}"},{"path":"part-00001-8c6d56b3-5bad-499b-9cb7-2c6138bc9134-c000.snappy.parquet","partitionValues":{},"size":41308,"modificationTime":1766549230558,"dataChange":false,"stats":"{\"numRecords\":2048,\"minValues\":{\"entity_id\":\"cdm_prot_004539b1-91ae-513e-bef9\",\"publication\":\"DOI:10.1002/(sici)1097-0134(1996\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffa14ace-ea75-5cc6-86f2\",\"publication\":\"PMID:9931020\"},\"nullCount\":{\"entity_id\":0,\"publication\":0}}"},{"path":"part-00007-3f91533c-9a7b-4ab3-82e0-9dae3b6e6ea1-c000.snappy.parquet","partitionValues":{},"size":23629,"modificationTime":1766549221856,"dataChange":false,"stats":"{\"numRecords\":1339,\"minValues\":{\"entity_id\":\"cdm_prot_00625fdf-0992-5594-a800\",\"publication\":\"DOI:10.1002/mbo3.120\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffcf5789-8bfa-5ca0-914e\",\"publication\":\"PMID:9930671\"},\"nullCount\":{\"entity_id\":0,\"publication\":0}}"},{"path":"part-00002-f94d1286-6d09-40ae-8a2e-8d53da426a8f-c000.snappy.parquet","partitionValues":{},"size":19038,"modificationTime":1766549236679,"dataChange":false,"stats":"{\"numRecords\":1024,\"minValues\":{\"entity_id\":\"cdm_prot_0061faab-bd88-562c-8a31\",\"publication\":\"DOI:10.1006/jmbi.1993.1419\"},\"maxValues\":{\"entity_id\":\"cdm_prot_fd88c6d6-80a5-523a-a8a2\",\"publication\":\"PMID:9746351\"},\"nullCount\":{\"entity_id\":0,\"publication\":0}}"},{"path":"part-00005-ee21b800-08d7-4952-8f25-62ba5c7f1b48-c000.snappy.parquet","partitionValues":{},"size":20263,"modificationTime":1766549230560,"dataChange":false,"stats":"{\"numRecords\":1024,\"minValues\":{\"entity_id\":\"cdm_prot_01ac737c-ed84-5684-b60d\",\"publication\":\"DOI:10.1002/jcb.10107\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffdecc1a-6ee9-59b2-9c49\",\"publication\":\"PMID:9761745\"},\"nullCount\":{\"entity_id\":0,\"publication\":0}}"},{"path":"part-00005-f5593537-2d26-441d-987c-679181679009-c000.snappy.parquet","partitionValues":{},"size":21899,"modificationTime":1766549221865,"dataChange":false,"stats":"{\"numRecords\":1024,\"minValues\":{\"entity_id\":\"cdm_prot_00d52fb0-9168-5276-b624\",\"publication\":\"DOI:10.1006/bbrc.1995.1080\"},\"maxValues\":{\"entity_id\":\"cdm_prot_feabb57f-0dd5-5284-bbea\",\"publication\":\"PMID:9845338\"},\"nullCount\":{\"entity_id\":0,\"publication\":0}}"},{"path":"part-00004-012aa02e-6c71-472c-aea2-843e6dfba871-c000.snappy.parquet","partitionValues":{},"size":19180,"modificationTime":1766549236679,"dataChange":false,"stats":"{\"numRecords\":1024,\"minValues\":{\"entity_id\":\"cdm_prot_005318fc-0c32-5b4a-b952\",\"publication\":\"DOI:10.1006/bbrc.1999.1480\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ff94038a-47fa-54b0-813d\",\"publication\":\"PMID:9843941\"},\"nullCount\":{\"entity_id\":0,\"publication\":0}}"},{"path":"part-00006-d488a52c-e9d0-4038-a949-b98404b3d0a3-c000.snappy.parquet","partitionValues":{},"size":40559,"modificationTime":1766549221870,"dataChange":false,"stats":"{\"numRecords\":2048,\"minValues\":{\"entity_id\":\"cdm_prot_003df674-48d1-588a-b194\",\"publication\":\"DOI:10.1002/anie.201101832\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffeae0f1-9785-5eaf-ad23\",\"publication\":\"PMID:9821972\"},\"nullCount\":{\"entity_id\":0,\"publication\":0}}"},{"path":"part-00003-ac774356-7cfe-4c65-b3cd-0d32b8f036df-c000.snappy.parquet","partitionValues":{},"size":36149,"modificationTime":1766549236695,"dataChange":false,"stats":"{\"numRecords\":2048,\"minValues\":{\"entity_id\":\"cdm_prot_00206b22-8b92-5222-ab7c\",\"publication\":\"DOI:10.1006/bbrc.2001.5523\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffede36e-7e42-5dcc-abde\",\"publication\":\"PMID:9829929\"},\"nullCount\":{\"entity_id\":0,\"publication\":0}}"},{"path":"part-00001-d224a09d-c0a7-46a1-80f7-f160f977cb32-c000.snappy.parquet","partitionValues":{},"size":38756,"modificationTime":1766549221853,"dataChange":false,"stats":"{\"numRecords\":2048,\"minValues\":{\"entity_id\":\"cdm_prot_000e0a71-14e8-5d5e-8144\",\"publication\":\"DOI:10.1002/jobm.201300741\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffed2e1d-fe14-5556-b067\",\"publication\":\"PMID:9847077\"},\"nullCount\":{\"entity_id\":0,\"publication\":0}}"},{"path":"part-00005-160203b8-7cdf-4de5-9c3e-e702684da453-c000.snappy.parquet","partitionValues":{},"size":40105,"modificationTime":1766549236685,"dataChange":false,"stats":"{\"numRecords\":2048,\"minValues\":{\"entity_id\":\"cdm_prot_0013ba57-fc41-51cc-bc95\",\"publication\":\"DOI:10.1006/bbrc.1999.1480\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffe27896-0bd4-50b4-9cff\",\"publication\":\"PMID:9679194\"},\"nullCount\":{\"entity_id\":0,\"publication\":0}}"},{"path":"part-00006-6f7c3abb-2b03-4a44-820e-6fd9dc498f4d-c000.snappy.parquet","partitionValues":{},"size":21843,"modificationTime":1766549236695,"dataChange":false,"stats":"{\"numRecords\":1024,\"minValues\":{\"entity_id\":\"cdm_prot_00219a7c-c28c-5aaf-928e\",\"publication\":\"DOI:10.1002/anie.200800794\"},\"maxValues\":{\"entity_id\":\"cdm_prot_feb0ef0d-9115-5ccf-a414\",\"publication\":\"PMID:9845338\"},\"nullCount\":{\"entity_id\":0,\"publication\":0}}"},{"path":"part-00000-6ccd0024-5a9d-4740-841a-965595b8114b-c000.snappy.parquet","partitionValues":{},"size":15988,"modificationTime":1766549230558,"dataChange":false,"stats":"{\"numRecords\":1024,\"minValues\":{\"entity_id\":\"cdm_prot_00c31285-6721-55e0-855a\",\"publication\":\"DOI:10.1006/bbrc.1999.0533\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ff289ac2-6f01-59a2-8290\",\"publication\":\"PMID:9847077\"},\"nullCount\":{\"entity_id\":0,\"publication\":0}}"},{"path":"part-00004-3adef09a-7f08-499c-873d-c688a5b87607-c000.snappy.parquet","partitionValues":{},"size":39468,"modificationTime":1766549230567,"dataChange":false,"stats":"{\"numRecords\":2048,\"minValues\":{\"entity_id\":\"cdm_prot_007c34e9-4de0-5736-985f\",\"publication\":\"DOI:10.1002/2211-5463.12439\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffc892c3-0651-5e3c-998a\",\"publication\":\"PMID:9933933\"},\"nullCount\":{\"entity_id\":0,\"publication\":0}}"},{"path":"part-00007-d23331f9-be41-4003-bb73-777605e43168-c000.snappy.parquet","partitionValues":{},"size":26370,"modificationTime":1766549236685,"dataChange":false,"stats":"{\"numRecords\":1284,\"minValues\":{\"entity_id\":\"cdm_prot_00019029-8c98-5477-9dd1\",\"publication\":\"DOI:10.1002/anie.201108896\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ff606768-2e8f-543d-abc5\",\"publication\":\"PMID:9931259\"},\"nullCount\":{\"entity_id\":0,\"publication\":0}}"},{"path":"part-00007-7df33c35-afd2-48d4-aa6f-7327b9a2260e-c000.snappy.parquet","partitionValues":{},"size":23928,"modificationTime":1766549230560,"dataChange":false,"stats":"{\"numRecords\":1122,\"minValues\":{\"entity_id\":\"cdm_prot_001ba1b3-667c-5321-9298\",\"publication\":\"DOI:10.1002/anie.201106765\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ff86ebef-56b7-5cf7-bf3e\",\"publication\":\"PMID:9679194\"},\"nullCount\":{\"entity_id\":0,\"publication\":0}}"},{"path":"part-00000-b88f9765-c916-4da9-baba-e7fb1d2e74a0-c000.snappy.parquet","partitionValues":{},"size":21699,"modificationTime":1766549236685,"dataChange":false,"stats":"{\"numRecords\":1024,\"minValues\":{\"entity_id\":\"cdm_prot_00d4680f-fe29-598a-9385\",\"publication\":\"DOI:10.1002/embj.201386100\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffbb736d-e93f-58cc-8b79\",\"publication\":\"PMID:9988755\"},\"nullCount\":{\"entity_id\":0,\"publication\":0}}"},{"path":"part-00002-b7947ee9-4005-4e4b-9226-089bddb34507-c000.snappy.parquet","partitionValues":{},"size":21260,"modificationTime":1766549230559,"dataChange":false,"stats":"{\"numRecords\":1024,\"minValues\":{\"entity_id\":\"cdm_prot_00546ee8-4f73-5c85-8b07\",\"publication\":\"DOI:10.1002/anie.201201708\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ff1fb047-631e-56b1-9454\",\"publication\":\"PMID:9972267\"},\"nullCount\":{\"entity_id\":0,\"publication\":0}}"},{"path":"part-00002-a1cf72fa-61e5-4ea3-95eb-f7247e0f25bc-c000.snappy.parquet","partitionValues":{},"size":21210,"modificationTime":1766549221856,"dataChange":false,"stats":"{\"numRecords\":1024,\"minValues\":{\"entity_id\":\"cdm_prot_00030730-9147-5351-b68c\",\"publication\":\"DOI:10.1002/j.1460-2075.1987.tb0\"},\"maxValues\":{\"entity_id\":\"cdm_prot_fde84f0d-6520-56ad-b61f\",\"publication\":\"PMID:9878396\"},\"nullCount\":{\"entity_id\":0,\"publication\":0}}"},{"path":"part-00004-66534a87-6fcc-4512-8fde-d3bd0fb0b3e7-c000.snappy.parquet","partitionValues":{},"size":41663,"modificationTime":1766549221871,"dataChange":false,"stats":"{\"numRecords\":2048,\"minValues\":{\"entity_id\":\"cdm_prot_00134cb8-e497-5d40-b662\",\"publication\":\"DOI:10.1002/bip.21096\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ff5f637d-e74b-5494-9ed8\",\"publication\":\"PMID:9758761\"},\"nullCount\":{\"entity_id\":0,\"publication\":0}}"},{"path":"part-00006-52a3314a-e3e9-4fa6-b47a-b8ade082ab0b-c000.snappy.parquet","partitionValues":{},"size":40819,"modificationTime":1766549230561,"dataChange":false,"stats":"{\"numRecords\":2048,\"minValues\":{\"entity_id\":\"cdm_prot_0102d35c-20a5-5f8d-8175\",\"publication\":\"DOI:10.1002/j.1460-2075.1992.tb0\"},\"maxValues\":{\"entity_id\":\"cdm_prot_fffe9d9f-5871-5041-83af\",\"publication\":\"PMID:9756623\"},\"nullCount\":{\"entity_id\":0,\"publication\":0}}"},{"path":"part-00003-dccd9b86-0835-437b-a3ff-b4c189a6f500-c000.snappy.parquet","partitionValues":{},"size":37949,"modificationTime":1766549230560,"dataChange":false,"stats":"{\"numRecords\":2048,\"minValues\":{\"entity_id\":\"cdm_prot_000c7e85-7dff-527d-b6db\",\"publication\":\"DOI:10.1002/1873-3468.13550\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffce40a0-b890-5e5f-b455\",\"publication\":\"PMID:9933933\"},\"nullCount\":{\"entity_id\":0,\"publication\":0}}"}]} diff --git a/tests/data/uniprot_archaea/uniprot_db/publications/_delta_log/00000000000000000003.json b/tests/data/uniprot_archaea/uniprot_db/publications/_delta_log/00000000000000000003.json new file mode 100644 index 0000000..d84524c --- /dev/null +++ b/tests/data/uniprot_archaea/uniprot_db/publications/_delta_log/00000000000000000003.json @@ -0,0 +1,9 @@ +{"commitInfo":{"timestamp":1766549236697,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":2,"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{"numFiles":"8","numOutputRows":"11524","numOutputBytes":"221871"},"engineInfo":"Apache-Spark/4.0.1 Delta-Lake/4.0.0","txnId":"3bdf390d-1798-4812-9644-e6f83d18ca4a"}} +{"add":{"path":"part-00000-b88f9765-c916-4da9-baba-e7fb1d2e74a0-c000.snappy.parquet","partitionValues":{},"size":21699,"modificationTime":1766549236685,"dataChange":true,"stats":"{\"numRecords\":1024,\"minValues\":{\"entity_id\":\"cdm_prot_00d4680f-fe29-598a-9385\",\"publication\":\"DOI:10.1002/embj.201386100\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffbb736d-e93f-58cc-8b79\",\"publication\":\"PMID:9988755\"},\"nullCount\":{\"entity_id\":0,\"publication\":0}}"}} +{"add":{"path":"part-00001-ec1917e9-be8d-4a9f-b44d-3f3a58787ae8-c000.snappy.parquet","partitionValues":{},"size":37487,"modificationTime":1766549236694,"dataChange":true,"stats":"{\"numRecords\":2048,\"minValues\":{\"entity_id\":\"cdm_prot_00365069-eeac-5442-8f11\",\"publication\":\"DOI:10.1002/iub.2122\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ff216b32-af2a-545f-8e61\",\"publication\":\"PMID:9746351\"},\"nullCount\":{\"entity_id\":0,\"publication\":0}}"}} +{"add":{"path":"part-00002-f94d1286-6d09-40ae-8a2e-8d53da426a8f-c000.snappy.parquet","partitionValues":{},"size":19038,"modificationTime":1766549236679,"dataChange":true,"stats":"{\"numRecords\":1024,\"minValues\":{\"entity_id\":\"cdm_prot_0061faab-bd88-562c-8a31\",\"publication\":\"DOI:10.1006/jmbi.1993.1419\"},\"maxValues\":{\"entity_id\":\"cdm_prot_fd88c6d6-80a5-523a-a8a2\",\"publication\":\"PMID:9746351\"},\"nullCount\":{\"entity_id\":0,\"publication\":0}}"}} +{"add":{"path":"part-00003-ac774356-7cfe-4c65-b3cd-0d32b8f036df-c000.snappy.parquet","partitionValues":{},"size":36149,"modificationTime":1766549236695,"dataChange":true,"stats":"{\"numRecords\":2048,\"minValues\":{\"entity_id\":\"cdm_prot_00206b22-8b92-5222-ab7c\",\"publication\":\"DOI:10.1006/bbrc.2001.5523\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffede36e-7e42-5dcc-abde\",\"publication\":\"PMID:9829929\"},\"nullCount\":{\"entity_id\":0,\"publication\":0}}"}} +{"add":{"path":"part-00004-012aa02e-6c71-472c-aea2-843e6dfba871-c000.snappy.parquet","partitionValues":{},"size":19180,"modificationTime":1766549236679,"dataChange":true,"stats":"{\"numRecords\":1024,\"minValues\":{\"entity_id\":\"cdm_prot_005318fc-0c32-5b4a-b952\",\"publication\":\"DOI:10.1006/bbrc.1999.1480\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ff94038a-47fa-54b0-813d\",\"publication\":\"PMID:9843941\"},\"nullCount\":{\"entity_id\":0,\"publication\":0}}"}} +{"add":{"path":"part-00005-160203b8-7cdf-4de5-9c3e-e702684da453-c000.snappy.parquet","partitionValues":{},"size":40105,"modificationTime":1766549236685,"dataChange":true,"stats":"{\"numRecords\":2048,\"minValues\":{\"entity_id\":\"cdm_prot_0013ba57-fc41-51cc-bc95\",\"publication\":\"DOI:10.1006/bbrc.1999.1480\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffe27896-0bd4-50b4-9cff\",\"publication\":\"PMID:9679194\"},\"nullCount\":{\"entity_id\":0,\"publication\":0}}"}} +{"add":{"path":"part-00006-6f7c3abb-2b03-4a44-820e-6fd9dc498f4d-c000.snappy.parquet","partitionValues":{},"size":21843,"modificationTime":1766549236695,"dataChange":true,"stats":"{\"numRecords\":1024,\"minValues\":{\"entity_id\":\"cdm_prot_00219a7c-c28c-5aaf-928e\",\"publication\":\"DOI:10.1002/anie.200800794\"},\"maxValues\":{\"entity_id\":\"cdm_prot_feb0ef0d-9115-5ccf-a414\",\"publication\":\"PMID:9845338\"},\"nullCount\":{\"entity_id\":0,\"publication\":0}}"}} +{"add":{"path":"part-00007-d23331f9-be41-4003-bb73-777605e43168-c000.snappy.parquet","partitionValues":{},"size":26370,"modificationTime":1766549236685,"dataChange":true,"stats":"{\"numRecords\":1284,\"minValues\":{\"entity_id\":\"cdm_prot_00019029-8c98-5477-9dd1\",\"publication\":\"DOI:10.1002/anie.201108896\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ff606768-2e8f-543d-abc5\",\"publication\":\"PMID:9931259\"},\"nullCount\":{\"entity_id\":0,\"publication\":0}}"}} diff --git a/tests/data/uniprot_archaea/uniprot_db/publications/_delta_log/00000000000000000004.crc b/tests/data/uniprot_archaea/uniprot_db/publications/_delta_log/00000000000000000004.crc new file mode 100644 index 0000000..f5898e9 --- /dev/null +++ b/tests/data/uniprot_archaea/uniprot_db/publications/_delta_log/00000000000000000004.crc @@ -0,0 +1 @@ +{"txnId":"8aa17362-133f-4459-8803-751eb8f230a4","tableSizeBytes":927934,"numFiles":32,"numMetadata":1,"numProtocol":1,"setTransactions":[],"domainMetadata":[],"metadata":{"id":"775d0442-cb4f-46a5-99e2-0df543723745","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"entity_id\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"publication\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{},"createdTime":1766549214750},"protocol":{"minReaderVersion":1,"minWriterVersion":2},"allFiles":[{"path":"part-00001-ec1917e9-be8d-4a9f-b44d-3f3a58787ae8-c000.snappy.parquet","partitionValues":{},"size":37487,"modificationTime":1766549236694,"dataChange":false,"stats":"{\"numRecords\":2048,\"minValues\":{\"entity_id\":\"cdm_prot_00365069-eeac-5442-8f11\",\"publication\":\"DOI:10.1002/iub.2122\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ff216b32-af2a-545f-8e61\",\"publication\":\"PMID:9746351\"},\"nullCount\":{\"entity_id\":0,\"publication\":0}}"},{"path":"part-00003-1e6a7263-53d2-4ad1-b0d1-8946b7d232ce-c000.snappy.parquet","partitionValues":{},"size":38315,"modificationTime":1766549221865,"dataChange":false,"stats":"{\"numRecords\":2048,\"minValues\":{\"entity_id\":\"cdm_prot_004f59e4-6b11-58ae-a6ff\",\"publication\":\"DOI:10.1002/(sici)1097-0290(1999\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ff0114ac-fb68-5211-bc16\",\"publication\":\"PMID:9864346\"},\"nullCount\":{\"entity_id\":0,\"publication\":0}}"},{"path":"part-00000-0c3fd018-b5d3-4b38-8d4c-10a75c7d879a-c000.snappy.parquet","partitionValues":{},"size":21003,"modificationTime":1766549221866,"dataChange":false,"stats":"{\"numRecords\":1024,\"minValues\":{\"entity_id\":\"cdm_prot_00036120-577c-5bbc-ab91\",\"publication\":\"DOI:10.1002/prot.20703\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffa4eefe-b062-5f26-8f77\",\"publication\":\"PMID:9738442\"},\"nullCount\":{\"entity_id\":0,\"publication\":0}}"},{"path":"part-00001-8c6d56b3-5bad-499b-9cb7-2c6138bc9134-c000.snappy.parquet","partitionValues":{},"size":41308,"modificationTime":1766549230558,"dataChange":false,"stats":"{\"numRecords\":2048,\"minValues\":{\"entity_id\":\"cdm_prot_004539b1-91ae-513e-bef9\",\"publication\":\"DOI:10.1002/(sici)1097-0134(1996\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffa14ace-ea75-5cc6-86f2\",\"publication\":\"PMID:9931020\"},\"nullCount\":{\"entity_id\":0,\"publication\":0}}"},{"path":"part-00007-fd957b73-5888-4d07-b174-c1539842756b-c000.snappy.parquet","partitionValues":{},"size":25067,"modificationTime":1766549242428,"dataChange":false,"stats":"{\"numRecords\":1127,\"minValues\":{\"entity_id\":\"cdm_prot_0315572e-8cda-5615-8d4b\",\"publication\":\"DOI:10.1002/prot.20255\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffd6bc2d-9fdc-5fb9-a121\",\"publication\":\"PMID:9847077\"},\"nullCount\":{\"entity_id\":0,\"publication\":0}}"},{"path":"part-00002-289cb3ff-e88a-4077-8cff-abe6e99e351c-c000.snappy.parquet","partitionValues":{},"size":41127,"modificationTime":1766549242427,"dataChange":false,"stats":"{\"numRecords\":2048,\"minValues\":{\"entity_id\":\"cdm_prot_0185e048-25c8-50dd-bfbf\",\"publication\":\"DOI:10.1002/pro.2039\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffe66051-6a0a-55cc-81b2\",\"publication\":\"PMID:9845366\"},\"nullCount\":{\"entity_id\":0,\"publication\":0}}"},{"path":"part-00007-3f91533c-9a7b-4ab3-82e0-9dae3b6e6ea1-c000.snappy.parquet","partitionValues":{},"size":23629,"modificationTime":1766549221856,"dataChange":false,"stats":"{\"numRecords\":1339,\"minValues\":{\"entity_id\":\"cdm_prot_00625fdf-0992-5594-a800\",\"publication\":\"DOI:10.1002/mbo3.120\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffcf5789-8bfa-5ca0-914e\",\"publication\":\"PMID:9930671\"},\"nullCount\":{\"entity_id\":0,\"publication\":0}}"},{"path":"part-00002-f94d1286-6d09-40ae-8a2e-8d53da426a8f-c000.snappy.parquet","partitionValues":{},"size":19038,"modificationTime":1766549236679,"dataChange":false,"stats":"{\"numRecords\":1024,\"minValues\":{\"entity_id\":\"cdm_prot_0061faab-bd88-562c-8a31\",\"publication\":\"DOI:10.1006/jmbi.1993.1419\"},\"maxValues\":{\"entity_id\":\"cdm_prot_fd88c6d6-80a5-523a-a8a2\",\"publication\":\"PMID:9746351\"},\"nullCount\":{\"entity_id\":0,\"publication\":0}}"},{"path":"part-00005-ee21b800-08d7-4952-8f25-62ba5c7f1b48-c000.snappy.parquet","partitionValues":{},"size":20263,"modificationTime":1766549230560,"dataChange":false,"stats":"{\"numRecords\":1024,\"minValues\":{\"entity_id\":\"cdm_prot_01ac737c-ed84-5684-b60d\",\"publication\":\"DOI:10.1002/jcb.10107\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffdecc1a-6ee9-59b2-9c49\",\"publication\":\"PMID:9761745\"},\"nullCount\":{\"entity_id\":0,\"publication\":0}}"},{"path":"part-00005-f5593537-2d26-441d-987c-679181679009-c000.snappy.parquet","partitionValues":{},"size":21899,"modificationTime":1766549221865,"dataChange":false,"stats":"{\"numRecords\":1024,\"minValues\":{\"entity_id\":\"cdm_prot_00d52fb0-9168-5276-b624\",\"publication\":\"DOI:10.1006/bbrc.1995.1080\"},\"maxValues\":{\"entity_id\":\"cdm_prot_feabb57f-0dd5-5284-bbea\",\"publication\":\"PMID:9845338\"},\"nullCount\":{\"entity_id\":0,\"publication\":0}}"},{"path":"part-00004-012aa02e-6c71-472c-aea2-843e6dfba871-c000.snappy.parquet","partitionValues":{},"size":19180,"modificationTime":1766549236679,"dataChange":false,"stats":"{\"numRecords\":1024,\"minValues\":{\"entity_id\":\"cdm_prot_005318fc-0c32-5b4a-b952\",\"publication\":\"DOI:10.1006/bbrc.1999.1480\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ff94038a-47fa-54b0-813d\",\"publication\":\"PMID:9843941\"},\"nullCount\":{\"entity_id\":0,\"publication\":0}}"},{"path":"part-00006-d488a52c-e9d0-4038-a949-b98404b3d0a3-c000.snappy.parquet","partitionValues":{},"size":40559,"modificationTime":1766549221870,"dataChange":false,"stats":"{\"numRecords\":2048,\"minValues\":{\"entity_id\":\"cdm_prot_003df674-48d1-588a-b194\",\"publication\":\"DOI:10.1002/anie.201101832\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffeae0f1-9785-5eaf-ad23\",\"publication\":\"PMID:9821972\"},\"nullCount\":{\"entity_id\":0,\"publication\":0}}"},{"path":"part-00003-ac774356-7cfe-4c65-b3cd-0d32b8f036df-c000.snappy.parquet","partitionValues":{},"size":36149,"modificationTime":1766549236695,"dataChange":false,"stats":"{\"numRecords\":2048,\"minValues\":{\"entity_id\":\"cdm_prot_00206b22-8b92-5222-ab7c\",\"publication\":\"DOI:10.1006/bbrc.2001.5523\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffede36e-7e42-5dcc-abde\",\"publication\":\"PMID:9829929\"},\"nullCount\":{\"entity_id\":0,\"publication\":0}}"},{"path":"part-00004-e5bd7727-fd4d-495a-8331-baaba9c1fd56-c000.snappy.parquet","partitionValues":{},"size":22660,"modificationTime":1766549242427,"dataChange":false,"stats":"{\"numRecords\":1024,\"minValues\":{\"entity_id\":\"cdm_prot_003753a9-9a55-505c-8ea7\",\"publication\":\"DOI:10.1002/j.1460-2075.1989.tb0\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffe3617e-dce1-5614-ae51\",\"publication\":\"PMID:9811660\"},\"nullCount\":{\"entity_id\":0,\"publication\":0}}"},{"path":"part-00001-d224a09d-c0a7-46a1-80f7-f160f977cb32-c000.snappy.parquet","partitionValues":{},"size":38756,"modificationTime":1766549221853,"dataChange":false,"stats":"{\"numRecords\":2048,\"minValues\":{\"entity_id\":\"cdm_prot_000e0a71-14e8-5d5e-8144\",\"publication\":\"DOI:10.1002/jobm.201300741\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffed2e1d-fe14-5556-b067\",\"publication\":\"PMID:9847077\"},\"nullCount\":{\"entity_id\":0,\"publication\":0}}"},{"path":"part-00005-160203b8-7cdf-4de5-9c3e-e702684da453-c000.snappy.parquet","partitionValues":{},"size":40105,"modificationTime":1766549236685,"dataChange":false,"stats":"{\"numRecords\":2048,\"minValues\":{\"entity_id\":\"cdm_prot_0013ba57-fc41-51cc-bc95\",\"publication\":\"DOI:10.1006/bbrc.1999.1480\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffe27896-0bd4-50b4-9cff\",\"publication\":\"PMID:9679194\"},\"nullCount\":{\"entity_id\":0,\"publication\":0}}"},{"path":"part-00000-4a2b52be-31d1-4a2c-b12e-65ca33e408cb-c000.snappy.parquet","partitionValues":{},"size":22341,"modificationTime":1766549242436,"dataChange":false,"stats":"{\"numRecords\":1024,\"minValues\":{\"entity_id\":\"cdm_prot_0003b7cb-bf13-5646-9978\",\"publication\":\"DOI:10.1006/jmbi.2001.5373\"},\"maxValues\":{\"entity_id\":\"cdm_prot_fef0eb9f-57f2-532d-b5b2\",\"publication\":\"PMID:9851985\"},\"nullCount\":{\"entity_id\":0,\"publication\":0}}"},{"path":"part-00006-6f7c3abb-2b03-4a44-820e-6fd9dc498f4d-c000.snappy.parquet","partitionValues":{},"size":21843,"modificationTime":1766549236695,"dataChange":false,"stats":"{\"numRecords\":1024,\"minValues\":{\"entity_id\":\"cdm_prot_00219a7c-c28c-5aaf-928e\",\"publication\":\"DOI:10.1002/anie.200800794\"},\"maxValues\":{\"entity_id\":\"cdm_prot_feb0ef0d-9115-5ccf-a414\",\"publication\":\"PMID:9845338\"},\"nullCount\":{\"entity_id\":0,\"publication\":0}}"},{"path":"part-00006-6905bd59-3561-445e-96da-fd3fb75a98c4-c000.snappy.parquet","partitionValues":{},"size":23096,"modificationTime":1766549242426,"dataChange":false,"stats":"{\"numRecords\":1024,\"minValues\":{\"entity_id\":\"cdm_prot_014d9783-4a69-58c0-9bad\",\"publication\":\"DOI:10.1007/bf00277350\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffd1840d-3bb9-5f05-ad33\",\"publication\":\"PMID:9860944\"},\"nullCount\":{\"entity_id\":0,\"publication\":0}}"},{"path":"part-00000-6ccd0024-5a9d-4740-841a-965595b8114b-c000.snappy.parquet","partitionValues":{},"size":15988,"modificationTime":1766549230558,"dataChange":false,"stats":"{\"numRecords\":1024,\"minValues\":{\"entity_id\":\"cdm_prot_00c31285-6721-55e0-855a\",\"publication\":\"DOI:10.1006/bbrc.1999.0533\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ff289ac2-6f01-59a2-8290\",\"publication\":\"PMID:9847077\"},\"nullCount\":{\"entity_id\":0,\"publication\":0}}"},{"path":"part-00004-3adef09a-7f08-499c-873d-c688a5b87607-c000.snappy.parquet","partitionValues":{},"size":39468,"modificationTime":1766549230567,"dataChange":false,"stats":"{\"numRecords\":2048,\"minValues\":{\"entity_id\":\"cdm_prot_007c34e9-4de0-5736-985f\",\"publication\":\"DOI:10.1002/2211-5463.12439\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffc892c3-0651-5e3c-998a\",\"publication\":\"PMID:9933933\"},\"nullCount\":{\"entity_id\":0,\"publication\":0}}"},{"path":"part-00001-b0701a06-5ec4-48fe-8a86-dc299bf5e361-c000.snappy.parquet","partitionValues":{},"size":21348,"modificationTime":1766549242426,"dataChange":false,"stats":"{\"numRecords\":1024,\"minValues\":{\"entity_id\":\"cdm_prot_01672ce0-33eb-50c0-8bb3\",\"publication\":\"DOI:10.1002/prot.20796\"},\"maxValues\":{\"entity_id\":\"cdm_prot_fffe9aba-bce9-56de-b269\",\"publication\":\"PMID:9847077\"},\"nullCount\":{\"entity_id\":0,\"publication\":0}}"},{"path":"part-00007-d23331f9-be41-4003-bb73-777605e43168-c000.snappy.parquet","partitionValues":{},"size":26370,"modificationTime":1766549236685,"dataChange":false,"stats":"{\"numRecords\":1284,\"minValues\":{\"entity_id\":\"cdm_prot_00019029-8c98-5477-9dd1\",\"publication\":\"DOI:10.1002/anie.201108896\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ff606768-2e8f-543d-abc5\",\"publication\":\"PMID:9931259\"},\"nullCount\":{\"entity_id\":0,\"publication\":0}}"},{"path":"part-00007-7df33c35-afd2-48d4-aa6f-7327b9a2260e-c000.snappy.parquet","partitionValues":{},"size":23928,"modificationTime":1766549230560,"dataChange":false,"stats":"{\"numRecords\":1122,\"minValues\":{\"entity_id\":\"cdm_prot_001ba1b3-667c-5321-9298\",\"publication\":\"DOI:10.1002/anie.201106765\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ff86ebef-56b7-5cf7-bf3e\",\"publication\":\"PMID:9679194\"},\"nullCount\":{\"entity_id\":0,\"publication\":0}}"},{"path":"part-00000-b88f9765-c916-4da9-baba-e7fb1d2e74a0-c000.snappy.parquet","partitionValues":{},"size":21699,"modificationTime":1766549236685,"dataChange":false,"stats":"{\"numRecords\":1024,\"minValues\":{\"entity_id\":\"cdm_prot_00d4680f-fe29-598a-9385\",\"publication\":\"DOI:10.1002/embj.201386100\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffbb736d-e93f-58cc-8b79\",\"publication\":\"PMID:9988755\"},\"nullCount\":{\"entity_id\":0,\"publication\":0}}"},{"path":"part-00002-b7947ee9-4005-4e4b-9226-089bddb34507-c000.snappy.parquet","partitionValues":{},"size":21260,"modificationTime":1766549230559,"dataChange":false,"stats":"{\"numRecords\":1024,\"minValues\":{\"entity_id\":\"cdm_prot_00546ee8-4f73-5c85-8b07\",\"publication\":\"DOI:10.1002/anie.201201708\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ff1fb047-631e-56b1-9454\",\"publication\":\"PMID:9972267\"},\"nullCount\":{\"entity_id\":0,\"publication\":0}}"},{"path":"part-00002-a1cf72fa-61e5-4ea3-95eb-f7247e0f25bc-c000.snappy.parquet","partitionValues":{},"size":21210,"modificationTime":1766549221856,"dataChange":false,"stats":"{\"numRecords\":1024,\"minValues\":{\"entity_id\":\"cdm_prot_00030730-9147-5351-b68c\",\"publication\":\"DOI:10.1002/j.1460-2075.1987.tb0\"},\"maxValues\":{\"entity_id\":\"cdm_prot_fde84f0d-6520-56ad-b61f\",\"publication\":\"PMID:9878396\"},\"nullCount\":{\"entity_id\":0,\"publication\":0}}"},{"path":"part-00004-66534a87-6fcc-4512-8fde-d3bd0fb0b3e7-c000.snappy.parquet","partitionValues":{},"size":41663,"modificationTime":1766549221871,"dataChange":false,"stats":"{\"numRecords\":2048,\"minValues\":{\"entity_id\":\"cdm_prot_00134cb8-e497-5d40-b662\",\"publication\":\"DOI:10.1002/bip.21096\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ff5f637d-e74b-5494-9ed8\",\"publication\":\"PMID:9758761\"},\"nullCount\":{\"entity_id\":0,\"publication\":0}}"},{"path":"part-00005-046ab7ca-764e-4b76-aaf7-63befe2be012-c000.snappy.parquet","partitionValues":{},"size":43603,"modificationTime":1766549242424,"dataChange":false,"stats":"{\"numRecords\":2048,\"minValues\":{\"entity_id\":\"cdm_prot_0048aaff-7f6b-5ca1-9274\",\"publication\":\"DOI:10.1002/j.1460-2075.1992.tb0\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffb5b181-1633-55f5-83ee\",\"publication\":\"PMID:9697413\"},\"nullCount\":{\"entity_id\":0,\"publication\":0}}"},{"path":"part-00006-52a3314a-e3e9-4fa6-b47a-b8ade082ab0b-c000.snappy.parquet","partitionValues":{},"size":40819,"modificationTime":1766549230561,"dataChange":false,"stats":"{\"numRecords\":2048,\"minValues\":{\"entity_id\":\"cdm_prot_0102d35c-20a5-5f8d-8175\",\"publication\":\"DOI:10.1002/j.1460-2075.1992.tb0\"},\"maxValues\":{\"entity_id\":\"cdm_prot_fffe9d9f-5871-5041-83af\",\"publication\":\"PMID:9756623\"},\"nullCount\":{\"entity_id\":0,\"publication\":0}}"},{"path":"part-00003-dccd9b86-0835-437b-a3ff-b4c189a6f500-c000.snappy.parquet","partitionValues":{},"size":37949,"modificationTime":1766549230560,"dataChange":false,"stats":"{\"numRecords\":2048,\"minValues\":{\"entity_id\":\"cdm_prot_000c7e85-7dff-527d-b6db\",\"publication\":\"DOI:10.1002/1873-3468.13550\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffce40a0-b890-5e5f-b455\",\"publication\":\"PMID:9933933\"},\"nullCount\":{\"entity_id\":0,\"publication\":0}}"},{"path":"part-00003-b08a10e8-4e23-45d8-939c-d636a6ac5c6a-c000.snappy.parquet","partitionValues":{},"size":18804,"modificationTime":1766549242432,"dataChange":false,"stats":"{\"numRecords\":1024,\"minValues\":{\"entity_id\":\"cdm_prot_0007879e-35c8-5917-ad11\",\"publication\":\"DOI:10.1002/pro.4815\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ff8b4ca9-ca19-5bef-bc63\",\"publication\":\"PMID:9679194\"},\"nullCount\":{\"entity_id\":0,\"publication\":0}}"}]} diff --git a/tests/data/uniprot_archaea/uniprot_db/publications/_delta_log/00000000000000000004.json b/tests/data/uniprot_archaea/uniprot_db/publications/_delta_log/00000000000000000004.json new file mode 100644 index 0000000..13f72b2 --- /dev/null +++ b/tests/data/uniprot_archaea/uniprot_db/publications/_delta_log/00000000000000000004.json @@ -0,0 +1,9 @@ +{"commitInfo":{"timestamp":1766549242439,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":3,"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{"numFiles":"8","numOutputRows":"10343","numOutputBytes":"218046"},"engineInfo":"Apache-Spark/4.0.1 Delta-Lake/4.0.0","txnId":"8aa17362-133f-4459-8803-751eb8f230a4"}} +{"add":{"path":"part-00000-4a2b52be-31d1-4a2c-b12e-65ca33e408cb-c000.snappy.parquet","partitionValues":{},"size":22341,"modificationTime":1766549242436,"dataChange":true,"stats":"{\"numRecords\":1024,\"minValues\":{\"entity_id\":\"cdm_prot_0003b7cb-bf13-5646-9978\",\"publication\":\"DOI:10.1006/jmbi.2001.5373\"},\"maxValues\":{\"entity_id\":\"cdm_prot_fef0eb9f-57f2-532d-b5b2\",\"publication\":\"PMID:9851985\"},\"nullCount\":{\"entity_id\":0,\"publication\":0}}"}} +{"add":{"path":"part-00001-b0701a06-5ec4-48fe-8a86-dc299bf5e361-c000.snappy.parquet","partitionValues":{},"size":21348,"modificationTime":1766549242426,"dataChange":true,"stats":"{\"numRecords\":1024,\"minValues\":{\"entity_id\":\"cdm_prot_01672ce0-33eb-50c0-8bb3\",\"publication\":\"DOI:10.1002/prot.20796\"},\"maxValues\":{\"entity_id\":\"cdm_prot_fffe9aba-bce9-56de-b269\",\"publication\":\"PMID:9847077\"},\"nullCount\":{\"entity_id\":0,\"publication\":0}}"}} +{"add":{"path":"part-00002-289cb3ff-e88a-4077-8cff-abe6e99e351c-c000.snappy.parquet","partitionValues":{},"size":41127,"modificationTime":1766549242427,"dataChange":true,"stats":"{\"numRecords\":2048,\"minValues\":{\"entity_id\":\"cdm_prot_0185e048-25c8-50dd-bfbf\",\"publication\":\"DOI:10.1002/pro.2039\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffe66051-6a0a-55cc-81b2\",\"publication\":\"PMID:9845366\"},\"nullCount\":{\"entity_id\":0,\"publication\":0}}"}} +{"add":{"path":"part-00003-b08a10e8-4e23-45d8-939c-d636a6ac5c6a-c000.snappy.parquet","partitionValues":{},"size":18804,"modificationTime":1766549242432,"dataChange":true,"stats":"{\"numRecords\":1024,\"minValues\":{\"entity_id\":\"cdm_prot_0007879e-35c8-5917-ad11\",\"publication\":\"DOI:10.1002/pro.4815\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ff8b4ca9-ca19-5bef-bc63\",\"publication\":\"PMID:9679194\"},\"nullCount\":{\"entity_id\":0,\"publication\":0}}"}} +{"add":{"path":"part-00004-e5bd7727-fd4d-495a-8331-baaba9c1fd56-c000.snappy.parquet","partitionValues":{},"size":22660,"modificationTime":1766549242427,"dataChange":true,"stats":"{\"numRecords\":1024,\"minValues\":{\"entity_id\":\"cdm_prot_003753a9-9a55-505c-8ea7\",\"publication\":\"DOI:10.1002/j.1460-2075.1989.tb0\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffe3617e-dce1-5614-ae51\",\"publication\":\"PMID:9811660\"},\"nullCount\":{\"entity_id\":0,\"publication\":0}}"}} +{"add":{"path":"part-00005-046ab7ca-764e-4b76-aaf7-63befe2be012-c000.snappy.parquet","partitionValues":{},"size":43603,"modificationTime":1766549242424,"dataChange":true,"stats":"{\"numRecords\":2048,\"minValues\":{\"entity_id\":\"cdm_prot_0048aaff-7f6b-5ca1-9274\",\"publication\":\"DOI:10.1002/j.1460-2075.1992.tb0\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffb5b181-1633-55f5-83ee\",\"publication\":\"PMID:9697413\"},\"nullCount\":{\"entity_id\":0,\"publication\":0}}"}} +{"add":{"path":"part-00006-6905bd59-3561-445e-96da-fd3fb75a98c4-c000.snappy.parquet","partitionValues":{},"size":23096,"modificationTime":1766549242426,"dataChange":true,"stats":"{\"numRecords\":1024,\"minValues\":{\"entity_id\":\"cdm_prot_014d9783-4a69-58c0-9bad\",\"publication\":\"DOI:10.1007/bf00277350\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffd1840d-3bb9-5f05-ad33\",\"publication\":\"PMID:9860944\"},\"nullCount\":{\"entity_id\":0,\"publication\":0}}"}} +{"add":{"path":"part-00007-fd957b73-5888-4d07-b174-c1539842756b-c000.snappy.parquet","partitionValues":{},"size":25067,"modificationTime":1766549242428,"dataChange":true,"stats":"{\"numRecords\":1127,\"minValues\":{\"entity_id\":\"cdm_prot_0315572e-8cda-5615-8d4b\",\"publication\":\"DOI:10.1002/prot.20255\"},\"maxValues\":{\"entity_id\":\"cdm_prot_ffd6bc2d-9fdc-5fb9-a121\",\"publication\":\"PMID:9847077\"},\"nullCount\":{\"entity_id\":0,\"publication\":0}}"}} diff --git a/tests/data/uniprot_archaea/uniprot_db/publications/part-00000-0c3fd018-b5d3-4b38-8d4c-10a75c7d879a-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/publications/part-00000-0c3fd018-b5d3-4b38-8d4c-10a75c7d879a-c000.snappy.parquet new file mode 100644 index 0000000..13ad0eb Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/publications/part-00000-0c3fd018-b5d3-4b38-8d4c-10a75c7d879a-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/publications/part-00000-4a2b52be-31d1-4a2c-b12e-65ca33e408cb-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/publications/part-00000-4a2b52be-31d1-4a2c-b12e-65ca33e408cb-c000.snappy.parquet new file mode 100644 index 0000000..bada68c Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/publications/part-00000-4a2b52be-31d1-4a2c-b12e-65ca33e408cb-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/publications/part-00000-6ccd0024-5a9d-4740-841a-965595b8114b-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/publications/part-00000-6ccd0024-5a9d-4740-841a-965595b8114b-c000.snappy.parquet new file mode 100644 index 0000000..042c3a6 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/publications/part-00000-6ccd0024-5a9d-4740-841a-965595b8114b-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/publications/part-00000-b88f9765-c916-4da9-baba-e7fb1d2e74a0-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/publications/part-00000-b88f9765-c916-4da9-baba-e7fb1d2e74a0-c000.snappy.parquet new file mode 100644 index 0000000..4b6d207 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/publications/part-00000-b88f9765-c916-4da9-baba-e7fb1d2e74a0-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/publications/part-00001-8c6d56b3-5bad-499b-9cb7-2c6138bc9134-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/publications/part-00001-8c6d56b3-5bad-499b-9cb7-2c6138bc9134-c000.snappy.parquet new file mode 100644 index 0000000..e049e4e Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/publications/part-00001-8c6d56b3-5bad-499b-9cb7-2c6138bc9134-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/publications/part-00001-b0701a06-5ec4-48fe-8a86-dc299bf5e361-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/publications/part-00001-b0701a06-5ec4-48fe-8a86-dc299bf5e361-c000.snappy.parquet new file mode 100644 index 0000000..7b5bfb3 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/publications/part-00001-b0701a06-5ec4-48fe-8a86-dc299bf5e361-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/publications/part-00001-d224a09d-c0a7-46a1-80f7-f160f977cb32-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/publications/part-00001-d224a09d-c0a7-46a1-80f7-f160f977cb32-c000.snappy.parquet new file mode 100644 index 0000000..d5b3d8f Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/publications/part-00001-d224a09d-c0a7-46a1-80f7-f160f977cb32-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/publications/part-00001-ec1917e9-be8d-4a9f-b44d-3f3a58787ae8-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/publications/part-00001-ec1917e9-be8d-4a9f-b44d-3f3a58787ae8-c000.snappy.parquet new file mode 100644 index 0000000..327d146 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/publications/part-00001-ec1917e9-be8d-4a9f-b44d-3f3a58787ae8-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/publications/part-00002-289cb3ff-e88a-4077-8cff-abe6e99e351c-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/publications/part-00002-289cb3ff-e88a-4077-8cff-abe6e99e351c-c000.snappy.parquet new file mode 100644 index 0000000..830725d Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/publications/part-00002-289cb3ff-e88a-4077-8cff-abe6e99e351c-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/publications/part-00002-a1cf72fa-61e5-4ea3-95eb-f7247e0f25bc-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/publications/part-00002-a1cf72fa-61e5-4ea3-95eb-f7247e0f25bc-c000.snappy.parquet new file mode 100644 index 0000000..97a949f Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/publications/part-00002-a1cf72fa-61e5-4ea3-95eb-f7247e0f25bc-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/publications/part-00002-b7947ee9-4005-4e4b-9226-089bddb34507-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/publications/part-00002-b7947ee9-4005-4e4b-9226-089bddb34507-c000.snappy.parquet new file mode 100644 index 0000000..698210f Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/publications/part-00002-b7947ee9-4005-4e4b-9226-089bddb34507-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/publications/part-00002-f94d1286-6d09-40ae-8a2e-8d53da426a8f-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/publications/part-00002-f94d1286-6d09-40ae-8a2e-8d53da426a8f-c000.snappy.parquet new file mode 100644 index 0000000..46f208f Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/publications/part-00002-f94d1286-6d09-40ae-8a2e-8d53da426a8f-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/publications/part-00003-1e6a7263-53d2-4ad1-b0d1-8946b7d232ce-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/publications/part-00003-1e6a7263-53d2-4ad1-b0d1-8946b7d232ce-c000.snappy.parquet new file mode 100644 index 0000000..75b2469 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/publications/part-00003-1e6a7263-53d2-4ad1-b0d1-8946b7d232ce-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/publications/part-00003-ac774356-7cfe-4c65-b3cd-0d32b8f036df-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/publications/part-00003-ac774356-7cfe-4c65-b3cd-0d32b8f036df-c000.snappy.parquet new file mode 100644 index 0000000..1ff6733 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/publications/part-00003-ac774356-7cfe-4c65-b3cd-0d32b8f036df-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/publications/part-00003-b08a10e8-4e23-45d8-939c-d636a6ac5c6a-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/publications/part-00003-b08a10e8-4e23-45d8-939c-d636a6ac5c6a-c000.snappy.parquet new file mode 100644 index 0000000..5fe8c3f Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/publications/part-00003-b08a10e8-4e23-45d8-939c-d636a6ac5c6a-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/publications/part-00003-dccd9b86-0835-437b-a3ff-b4c189a6f500-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/publications/part-00003-dccd9b86-0835-437b-a3ff-b4c189a6f500-c000.snappy.parquet new file mode 100644 index 0000000..664a7d3 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/publications/part-00003-dccd9b86-0835-437b-a3ff-b4c189a6f500-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/publications/part-00004-012aa02e-6c71-472c-aea2-843e6dfba871-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/publications/part-00004-012aa02e-6c71-472c-aea2-843e6dfba871-c000.snappy.parquet new file mode 100644 index 0000000..20ce335 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/publications/part-00004-012aa02e-6c71-472c-aea2-843e6dfba871-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/publications/part-00004-3adef09a-7f08-499c-873d-c688a5b87607-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/publications/part-00004-3adef09a-7f08-499c-873d-c688a5b87607-c000.snappy.parquet new file mode 100644 index 0000000..b1ec56b Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/publications/part-00004-3adef09a-7f08-499c-873d-c688a5b87607-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/publications/part-00004-66534a87-6fcc-4512-8fde-d3bd0fb0b3e7-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/publications/part-00004-66534a87-6fcc-4512-8fde-d3bd0fb0b3e7-c000.snappy.parquet new file mode 100644 index 0000000..2aaecd8 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/publications/part-00004-66534a87-6fcc-4512-8fde-d3bd0fb0b3e7-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/publications/part-00004-e5bd7727-fd4d-495a-8331-baaba9c1fd56-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/publications/part-00004-e5bd7727-fd4d-495a-8331-baaba9c1fd56-c000.snappy.parquet new file mode 100644 index 0000000..9c64334 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/publications/part-00004-e5bd7727-fd4d-495a-8331-baaba9c1fd56-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/publications/part-00005-046ab7ca-764e-4b76-aaf7-63befe2be012-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/publications/part-00005-046ab7ca-764e-4b76-aaf7-63befe2be012-c000.snappy.parquet new file mode 100644 index 0000000..423bfde Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/publications/part-00005-046ab7ca-764e-4b76-aaf7-63befe2be012-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/publications/part-00005-160203b8-7cdf-4de5-9c3e-e702684da453-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/publications/part-00005-160203b8-7cdf-4de5-9c3e-e702684da453-c000.snappy.parquet new file mode 100644 index 0000000..5f09851 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/publications/part-00005-160203b8-7cdf-4de5-9c3e-e702684da453-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/publications/part-00005-ee21b800-08d7-4952-8f25-62ba5c7f1b48-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/publications/part-00005-ee21b800-08d7-4952-8f25-62ba5c7f1b48-c000.snappy.parquet new file mode 100644 index 0000000..f7b6f44 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/publications/part-00005-ee21b800-08d7-4952-8f25-62ba5c7f1b48-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/publications/part-00005-f5593537-2d26-441d-987c-679181679009-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/publications/part-00005-f5593537-2d26-441d-987c-679181679009-c000.snappy.parquet new file mode 100644 index 0000000..7f66031 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/publications/part-00005-f5593537-2d26-441d-987c-679181679009-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/publications/part-00006-52a3314a-e3e9-4fa6-b47a-b8ade082ab0b-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/publications/part-00006-52a3314a-e3e9-4fa6-b47a-b8ade082ab0b-c000.snappy.parquet new file mode 100644 index 0000000..0ea762c Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/publications/part-00006-52a3314a-e3e9-4fa6-b47a-b8ade082ab0b-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/publications/part-00006-6905bd59-3561-445e-96da-fd3fb75a98c4-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/publications/part-00006-6905bd59-3561-445e-96da-fd3fb75a98c4-c000.snappy.parquet new file mode 100644 index 0000000..57bb038 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/publications/part-00006-6905bd59-3561-445e-96da-fd3fb75a98c4-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/publications/part-00006-6f7c3abb-2b03-4a44-820e-6fd9dc498f4d-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/publications/part-00006-6f7c3abb-2b03-4a44-820e-6fd9dc498f4d-c000.snappy.parquet new file mode 100644 index 0000000..0269f48 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/publications/part-00006-6f7c3abb-2b03-4a44-820e-6fd9dc498f4d-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/publications/part-00006-d488a52c-e9d0-4038-a949-b98404b3d0a3-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/publications/part-00006-d488a52c-e9d0-4038-a949-b98404b3d0a3-c000.snappy.parquet new file mode 100644 index 0000000..7c3be1a Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/publications/part-00006-d488a52c-e9d0-4038-a949-b98404b3d0a3-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/publications/part-00007-3f91533c-9a7b-4ab3-82e0-9dae3b6e6ea1-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/publications/part-00007-3f91533c-9a7b-4ab3-82e0-9dae3b6e6ea1-c000.snappy.parquet new file mode 100644 index 0000000..634818f Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/publications/part-00007-3f91533c-9a7b-4ab3-82e0-9dae3b6e6ea1-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/publications/part-00007-7df33c35-afd2-48d4-aa6f-7327b9a2260e-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/publications/part-00007-7df33c35-afd2-48d4-aa6f-7327b9a2260e-c000.snappy.parquet new file mode 100644 index 0000000..95b0730 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/publications/part-00007-7df33c35-afd2-48d4-aa6f-7327b9a2260e-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/publications/part-00007-d23331f9-be41-4003-bb73-777605e43168-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/publications/part-00007-d23331f9-be41-4003-bb73-777605e43168-c000.snappy.parquet new file mode 100644 index 0000000..168b81c Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/publications/part-00007-d23331f9-be41-4003-bb73-777605e43168-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_db/publications/part-00007-fd957b73-5888-4d07-b174-c1539842756b-c000.snappy.parquet b/tests/data/uniprot_archaea/uniprot_db/publications/part-00007-fd957b73-5888-4d07-b174-c1539842756b-c000.snappy.parquet new file mode 100644 index 0000000..56bab03 Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_db/publications/part-00007-fd957b73-5888-4d07-b174-c1539842756b-c000.snappy.parquet differ diff --git a/tests/data/uniprot_archaea/uniprot_sprot_archaea.xml.gz b/tests/data/uniprot_archaea/uniprot_sprot_archaea.xml.gz new file mode 100644 index 0000000..54c2d4e Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_sprot_archaea.xml.gz differ diff --git a/tests/data/uniprot_archaea/uniprot_sprot_archaea.xml.gz.1 b/tests/data/uniprot_archaea/uniprot_sprot_archaea.xml.gz.1 new file mode 100644 index 0000000..87ab8ff Binary files /dev/null and b/tests/data/uniprot_archaea/uniprot_sprot_archaea.xml.gz.1 differ diff --git a/tests/parsers/test_shared_identifiers.py b/tests/parsers/test_shared_identifiers.py new file mode 100644 index 0000000..b76e9af --- /dev/null +++ b/tests/parsers/test_shared_identifiers.py @@ -0,0 +1,34 @@ +import xml.etree.ElementTree as ET + +from cdm_data_loader_utils.parsers.shared_identifiers import parse_identifiers_generic + + +def test_parse_identifiers_generic_basic() -> None: + # + # P12345 + # Q99999 + # + ns = {"ns": "dummy"} + entry = ET.Element("entry") + + a1 = ET.SubElement(entry, "accession") + a1.text = "P12345" + a2 = ET.SubElement(entry, "accession") + a2.text = "Q99999" + + # Add namespace prefix to match xpath + a1.tag = "{dummy}accession" + a2.tag = "{dummy}accession" + + rows = parse_identifiers_generic( + entry=entry, + xpath="ns:accession", + prefix="UniProt", + ns=ns, + ) + + assert len(rows) == 2 + assert rows[0]["identifier"] == "UniProt:P12345" + assert rows[1]["identifier"] == "UniProt:Q99999" + assert rows[0]["source"] == "UniProt" + assert rows[0]["description"] == "UniProt accession" diff --git a/tests/parsers/test_uniprot.py b/tests/parsers/test_uniprot.py index 105fb76..fcfeb63 100644 --- a/tests/parsers/test_uniprot.py +++ b/tests/parsers/test_uniprot.py @@ -1,4 +1,4 @@ -"""Tests for the UniProt parser. +""" This file uses pytest to provide parameterized and functional tests for all major UniProt parsing utility functions, ensuring correct parsing and transformation of @@ -17,755 +17,730 @@ How to run in the terminal: PYTHONPATH=src pytest tests/test_uniprot.py + pytest tests/parsers/test_uniprot.py + PYTHONPATH=. pytest test_uniprot.py """ import datetime -import re +import json import xml.etree.ElementTree as ET -from typing import Any +from pathlib import Path import pytest from cdm_data_loader_utils.parsers.uniprot import ( build_datasource_record, - generate_cdm_id, parse_associations, + parse_cross_references, parse_evidence_map, parse_identifiers, parse_names, parse_protein_info, - parse_publications, - parse_uniprot_entry, + save_datasource_record, ) -# Regular expression to validate UUID format -UUID_PATTERN = re.compile(r"^[a-f0-9]{8}-[a-f0-9]{4}-[1-5][a-f0-9]{3}-[89ab][a-f0-9]{3}-[a-f0-9]{12}$", re.IGNORECASE) +NS_URI = "https://uniprot.org/uniprot" + +@pytest.fixture( + params=[ + "https://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/complete/uniprot_sprot.xml.gz", + "http://example.org/uniprot_test.xml.gz", + ] +) +def xml_url(request): + return request.param -@pytest.mark.parametrize("n", range(5)) -def test_generate_cdm_id_format(n: int) -> None: - uuid = generate_cdm_id() - assert uuid.startswith("CDM:") - uuid_str = uuid[4:] - assert UUID_PATTERN.match(uuid_str), f"{uuid_str} is not a valid UUID" +def test_build_datasource_record(xml_url): + record = build_datasource_record(xml_url) -## build_datasource_record ## -def test_build_datasource_record() -> None: - url = "https://example.com/uniprot.xml.gz" - record = build_datasource_record(url) + # ---- basic structure ---- assert isinstance(record, dict) - assert set(record.keys()) == {"name", "source", "url", "accessed", "version"} + + # ---- fixed fields ---- assert record["name"] == "UniProt import" assert record["source"] == "UniProt" - assert record["url"] == url - - # check accessed - accessed_dt = datetime.datetime.fromisoformat(record["accessed"]) - now = datetime.datetime.now(datetime.UTC) - delta = abs((now - accessed_dt).total_seconds()) - assert delta < 10 + assert record["url"] == xml_url assert record["version"] == 115 + # ---- accessed field ---- + accessed = record.get("accessed") + assert accessed is not None -@pytest.mark.parametrize("bad_url", [None, ""]) -def test_build_datasource_record_bad(bad_url: str | None) -> None: - record = build_datasource_record(bad_url) - assert record["url"] == bad_url + parsed = datetime.datetime.fromisoformat(accessed) + assert parsed.tzinfo is not None + assert parsed.tzinfo == datetime.UTC -## parse_identifiers function test ## -@pytest.mark.parametrize( - ("xml_str", "cdm_id", "expected"), - [ - ### multiple accessions, expect two dict, every dic use the same cdm_id - ### identifier according to number - ( - """ - - Q9V2L2 - G8ZFP4 - - """, - "CDM:001", - [ - { - "entity_id": "CDM:001", - "identifier": "UniProt:Q9V2L2", - "source": "UniProt", - "description": "UniProt accession", - }, - { - "entity_id": "CDM:001", - "identifier": "UniProt:G8ZFP4", - "source": "UniProt", - "description": "UniProt accession", - }, - ], - ), - ### Use single accession - ( - """ - - X00001 - - """, - "CDM:002", - [ - { - "entity_id": "CDM:002", - "identifier": "UniProt:X00001", - "source": "UniProt", - "description": "UniProt accession", - } - ], - ), - ### No accession - ( - """ - - - """, - "CDM:003", - [], - ), - ], -) -def test_parse_identifiers(xml_str: str, cdm_id: str, expected: list[dict[str, str]]) -> None: +def test_save_datasource_record(tmp_path: Path, xml_url): + """ + save_datasource_record should: + - create output directory if missing + - write datasource.json + - return the same content that is written to disk """ - This approach ensures that parse_identifiers correctly parses and structures identifier data. + output_dir = tmp_path / "output" - The parsed Element object and the provided CDM_id are passed to the parse_identifiers funtion. - The function is expected to extract all relevant identifier information from the XML and return list of dict. + # ---- call function ---- + result = save_datasource_record(xml_url, str(output_dir)) - The test compares the result output with the predefined expected result using an assert statement. + # ---- return value sanity ---- + assert isinstance(result, dict) + assert result["url"] == xml_url + assert result["source"] == "UniProt" + assert result["name"] == "UniProt import" + assert "accessed" in result + assert "version" in result - """ - entry = ET.fromstring(xml_str) - result = parse_identifiers(entry, cdm_id) - assert result == expected + # ---- file existence ---- + output_file = output_dir / "datasource.json" + assert output_file.exists() + assert output_file.is_file() + # ---- file content correctness ---- + with open(output_file, encoding="utf-8") as f: + on_disk = json.load(f) -""" - This parameterized pytest function tests the correctness of the parse_names function for various UniProt XML entry scenarios. + assert on_disk == result - XML string representing a UniProt entry with different protein names: - top-level - recommended names, - alternative names, - combinations, - no names - cdm_id: CDM entry ID +def make_entry(names=None, protein_names=None): + entry = ET.Element(f"{{{NS_URI}}}entry") - Output: - A list of name records with their metadata + # + for n in names or []: + e = ET.SubElement(entry, f"{{{NS_URI}}}name") + e.text = n -""" + # block + if protein_names: + protein = ET.SubElement(entry, f"{{{NS_URI}}}protein") + + for tag, logical in [ + ("recommendedName", "recommended"), + ("alternativeName", "alternative"), + ]: + if logical not in protein_names: + continue + + block = ET.SubElement(protein, f"{{{NS_URI}}}{tag}") + for xml_tag in ["fullName", "shortName"]: + val = protein_names[logical].get(xml_tag.replace("Name", "")) + if val: + e = ET.SubElement(block, f"{{{NS_URI}}}{xml_tag}") + e.text = val + + return entry -## parse_names function test ## @pytest.mark.parametrize( - ("xml_str", "cdm_id", "expected"), + "entry_kwargs, cdm_id, expected", [ - # Only top-level + # Only ( - """ - MainProteinName - """, - "CDM:001", - [ - { - "entity_id": "CDM:001", - "name": "MainProteinName", - "description": "UniProt protein name", - "source": "UniProt", - } - ], - ), - # RecommendedName (fullName and shortName) - ( - """ - - - RecFullName - RecShort - - - """, - "CDM:002", - [ - { - "entity_id": "CDM:002", - "name": "RecFullName", - "description": "UniProt recommended full name", - "source": "UniProt", - }, - { - "entity_id": "CDM:002", - "name": "RecShort", - "description": "UniProt recommended short name", - "source": "UniProt", - }, - ], + {"names": ["ProteinA"]}, + "cdm_1", + { + ("ProteinA", "UniProt entry name"), + }, ), - # AlternativeName (fullName and shortName) + # entry name + recommended full name ( - """ - - - AltFullName1 - AltShort1 - - - AltFullName2 - - - """, - "CDM:003", - [ - { - "entity_id": "CDM:003", - "name": "AltFullName1", - "description": "UniProt alternative full name", - "source": "UniProt", - }, - { - "entity_id": "CDM:003", - "name": "AltShort1", - "description": "UniProt alternative short name", - "source": "UniProt", - }, - { - "entity_id": "CDM:003", - "name": "AltFullName2", - "description": "UniProt alternative full name", - "source": "UniProt", + { + "names": ["ProteinB"], + "protein_names": { + "recommended": {"full": "Rec Full B", "short": None}, }, - ], + }, + "cdm_2", + { + ("ProteinB", "UniProt entry name"), + ("Rec Full B", "UniProt recommended full name"), + }, ), - # Mixed: top-level and + # everything ( - """ - TopLevel - - - MixedFull - - - """, - "CDM:004", - [ - { - "entity_id": "CDM:004", - "name": "TopLevel", - "description": "UniProt protein name", - "source": "UniProt", - }, - { - "entity_id": "CDM:004", - "name": "MixedFull", - "description": "UniProt recommended full name", - "source": "UniProt", + { + "names": ["ProteinC"], + "protein_names": { + "recommended": {"full": "Rec Full C", "short": "Rec Short C"}, + "alternative": {"full": "Alt Full C", "short": "Alt Short C"}, }, - ], - ), - # No names at all - ( - """ - """, - "CDM:005", - [], + }, + "cdm_3", + { + ("ProteinC", "UniProt entry name"), + ("Rec Full C", "UniProt recommended full name"), + ("Rec Short C", "UniProt recommended short name"), + ("Alt Full C", "UniProt alternative full name"), + ("Alt Short C", "UniProt alternative short name"), + }, ), ], ) -def test_parse_names(xml_str: str, cdm_id: str, expected: list[dict[str, str]]) -> None: - entry = ET.fromstring(xml_str) - result = parse_names(entry, cdm_id) - assert result == expected +def test_parse_names_parametrized(entry_kwargs, cdm_id, expected): + entry = make_entry(**entry_kwargs) + rows = parse_names(entry, cdm_id) -""" - - This test ensures parse_protein_info works correctly for different combinations of data - Including cases with no protein info, sequence only, existence only or EC numbers - - This approach thoroughly validates that parse_protein_info can accurately extract, combine and structure metadata field. + # ---- row count ---- + assert len(rows) == len(expected) - Include: - EC Number, - existence evidence, - sequence + # ---- content ---- + observed = {(r["name"], r["description"]) for r in rows} + assert observed == expected -""" + # ---- entity_id and source ---- + for r in rows: + assert r["entity_id"] == cdm_id + assert r["source"] == "UniProt" -## parse_protein_info function test ## @pytest.mark.parametrize( - ("xml_str", "cdm_id", "expected"), + "build_entry, cdm_id, expected", [ - # There are multiple ecNumbers under the recommend names - ( - """ - - - 1.2.3.4 - 5.6.7.8 - - - """, - "CDM:001", - {"ec_numbers": ["1.2.3.4", "5.6.7.8"]}, - ), - # alternativeName has EC Number + # -------------------------------------------------- + # Empty entry -> None + # -------------------------------------------------- ( - """ - - - 3.3.3.3 - - - """, - "CDM:002", - {"ec_numbers": ["3.3.3.3"]}, + lambda: ET.Element(f"{{{NS_URI}}}entry"), + "cdm_1", + None, ), - # If have both proteinExistence evidence and existence + # -------------------------------------------------- + # Only EC numbers + # -------------------------------------------------- ( - """ - - """, - "CDM:003", + lambda: ( + lambda entry: ( + ET.SubElement( + ET.SubElement( + ET.SubElement(entry, f"{{{NS_URI}}}protein"), + f"{{{NS_URI}}}recommendedName", + ), + f"{{{NS_URI}}}ecNumber", + ).__setattr__("text", "1.1.1.1"), + entry, + )[1] + )(ET.Element(f"{{{NS_URI}}}entry")), + "cdm_2", { - "protein_id": "CDM:003", - "evidence_for_existence": "evidence at protein level", + "ec_numbers": "1.1.1.1", }, ), - # Sequence only + # -------------------------------------------------- + # Only sequence + entry modified + # -------------------------------------------------- ( - """ - - MAGNLSKVAAVSGVAAAVLGK - - """, - "CDM:004", + lambda: ( + lambda entry: ( + entry.set("modified", "2024-01-01"), + ET.SubElement( + entry, + f"{{{NS_URI}}}sequence", + { + "length": "100", + "mass": "12345", + "checksum": "ABC", + "version": "2", + }, + ).__setattr__("text", "MKTIIALSY"), + entry, + )[2] + )(ET.Element(f"{{{NS_URI}}}entry")), + "cdm_3", { - "length": "357", + "length": "100", "mass": "12345", - "checksum": "ABCD", - "modified": "2024-05-21", + "checksum": "ABC", "sequence_version": "2", - "sequence": "MAGNLSKVAAVSGVAAAVLGK", + "sequence": "MKTIIALSY", + "entry_modified": "2024-01-01", }, ), - # Combine with three elements: proteinExistence, sequence and ecNumbers + # -------------------------------------------------- + # Everything + # -------------------------------------------------- ( - """ - - - 3.3.3.3 - - - 8.8.8.8 - - - - - MKTLLTGAAT - - """, - "CDM:005", + lambda: ( + lambda entry: ( + entry.set("modified", "2024-02-02"), + # protein + EC + ET.SubElement( + ET.SubElement( + ET.SubElement(entry, f"{{{NS_URI}}}protein"), + f"{{{NS_URI}}}recommendedName", + ), + f"{{{NS_URI}}}ecNumber", + ).__setattr__("text", "3.5.4.4"), + # proteinExistence + ET.SubElement( + entry, + f"{{{NS_URI}}}proteinExistence", + {"type": "evidence at protein level"}, + ), + # sequence + ET.SubElement( + entry, + f"{{{NS_URI}}}sequence", + { + "length": "250", + "mass": "99999", + "checksum": "XYZ", + "modified": "2023-12-01", + "version": "1", + }, + ).__setattr__("text", "MADEUPSEQUENCE"), + entry, + )[4] + )(ET.Element(f"{{{NS_URI}}}entry")), + "cdm_4", { - "ec_numbers": ["3.3.3.3", "8.8.8.8"], - "protein_id": "CDM:005", - "evidence_for_existence": "evidence at transcript level", - "length": "10", - "mass": "1000", + "ec_numbers": "3.5.4.4", + "protein_id": "cdm_4", + "evidence_for_existence": "evidence at protein level", + "length": "250", + "mass": "99999", "checksum": "XYZ", - "modified": "2021-12-01", + "modified": "2023-12-01", "sequence_version": "1", - "sequence": "MKTLLTGAAT", + "sequence": "MADEUPSEQUENCE", + "entry_modified": "2024-02-02", }, ), - # return None - ("""""", "CDM:006", None), ], ) -def test_parse_protein_info(xml_str: str, cdm_id: str, expected: dict[str, Any]) -> None: - entry = ET.fromstring(xml_str) - result = parse_protein_info(entry, cdm_id) - assert result == expected - +def test_parse_protein_info(build_entry, cdm_id, expected): + entry = build_entry() -""" - - This parameterized pytest function verifies the behavior of the parse_evidence_map function - for different UniProt XML entry structures involving evidence elements. - - xml_str: Simulates a UniProt entry with various and sub-structures, - including cases with multiple evidence elements, missing sources, or no evidence at all. - - expected: A dictionary mapping evidence keys to their extracted details—such as evidence type, - supporting objects, and publication references. - - Ensure parse_evidence_map: - Accurately extract evidence keys and types - Correctly classify supporting objects and publication references - Handle entries with absent sources or evidence elements - Represent all relevant evidence metadata in the required structure + result = parse_protein_info(entry, cdm_id) -""" + if expected is None: + assert result is None + else: + assert isinstance(result, dict) + assert result == expected -## parse_evidence_map function test ## @pytest.mark.parametrize( - ("xml_str", "expected"), + "build_xml, expected", [ - # Single evidence,include PubMed and supporting object + # -------------------------------------------------- + # No evidence elements + # -------------------------------------------------- ( - """ - - - - - - - """, - { - "1": { - "evidence_type": "ECO:0000255", - "supporting_objects": ["Ensembl:ENSG00001"], - "publications": ["PMID:123456"], - } - }, + lambda: ET.Element(f"{{{NS_URI}}}entry"), + {}, ), - # multiple evidences + # -------------------------------------------------- + # Evidence without key + # -------------------------------------------------- ( - """ - - - - - - - - - - - """, - { - "E1": { - "evidence_type": "ECO:0000313", - "supporting_objects": None, - "publications": ["PMID:654321"], - }, - "E2": { - "evidence_type": "ECO:0000250", - "supporting_objects": ["PDB:2N7Q"], - "publications": None, - }, - }, + lambda: ( + lambda entry: ( + ET.SubElement(entry, f"{{{NS_URI}}}evidence", {"type": "ECO:0000269"}), + entry, + )[1] + )(ET.Element(f"{{{NS_URI}}}entry")), + {}, ), - # no source + # -------------------------------------------------- + # Evidence with key, no source + # -------------------------------------------------- ( - """ - - """, + lambda: ( + lambda entry: ( + ET.SubElement( + entry, + f"{{{NS_URI}}}evidence", + {"key": "1", "type": "ECO:0000313"}, + ), + entry, + )[1] + )(ET.Element(f"{{{NS_URI}}}entry")), { - "X1": { - "evidence_type": "ECO:9999999", - "supporting_objects": None, - "publications": None, + "1": { + "evidence_type": "ECO:0000313", } }, ), - # no evidence - ( - """ - """, - {}, - ), - # one evidence with multiple supporting objects + # -------------------------------------------------- + # Evidence with PUBMED with other refs + # -------------------------------------------------- ( - """ - - - - - - - """, + lambda: ( + lambda entry: ( + lambda ev: ( + ET.SubElement( + ET.SubElement(ev, f"{{{NS_URI}}}source"), + f"{{{NS_URI}}}dbReference", + {"type": "PubMed", "id": "12345"}, + ), + ET.SubElement( + ET.SubElement(ev, f"{{{NS_URI}}}source"), + f"{{{NS_URI}}}dbReference", + {"type": "GO", "id": "GO:0008150"}, + ), + entry, + )[2] + )( + ET.SubElement( + entry, + f"{{{NS_URI}}}evidence", + {"key": "E2", "type": "ECO:0000269"}, + ) + ) + )(ET.Element(f"{{{NS_URI}}}entry")), { - "K1": { + "E2": { "evidence_type": "ECO:0000269", - "supporting_objects": ["Ensembl:ENS1", "RefSeq:RS123"], - "publications": None, + "publications": ["PMID:12345"], } }, ), ], ) -def test_parse_evidence_map(xml_str: str, expected: dict[str, Any]) -> None: - entry = ET.fromstring(xml_str) +def test_parse_evidence_map_parametrized(build_xml, expected): + entry = build_xml() result = parse_evidence_map(entry) - assert result == expected - -""" - - xml_strings: models a UniProt entry with different types of possible associations - cdm_id: uniquely identifies the protein being parsed - evidence_map: supplies external evidence metadata for associations - expected: list of association dictionaries - - Arg: - The function correctly links proteins to organism taxonomy. - Cross-references are properly included, evidence metadata is correctly merged. - Associations derived from catalytic activity and cofactor comments are correctly generated. - All combinations and edge cases are handled robustly. - -""" + assert isinstance(result, dict) + assert result == expected -## parse_associations function test ## @pytest.mark.parametrize( - ("xml_str", "cdm_id", "evidence_map", "expected"), + "build_xml, cdm_id, evidence_map, expected", [ - # organism association(NCBI Taxonomy dbReference) + # -------------------------------------------------- + # Taxonomy association only + # -------------------------------------------------- ( - """ - - - - """, - "CDM:1", + lambda: ( + lambda entry: ( + ET.SubElement( + ET.SubElement(entry, f"{{{NS_URI}}}organism"), + f"{{{NS_URI}}}dbReference", + {"type": "NCBI Taxonomy", "id": "1234"}, + ), + entry, + )[1] + )(ET.Element(f"{{{NS_URI}}}entry")), + "cdm_1", {}, - [{"subject": "CDM:1", "object": "NCBITaxon:9606"}], - ), - # dbReference with evidence key - ( - """ - - """, - "CDM:2", - { - "E1": { - "evidence_type": "ECO:0000250", - "supporting_objects": ["Ensembl:ENS1"], - "publications": ["PMID:1234"], - } - }, [ { - "subject": "CDM:2", - "object": "PDB:2N7Q", - "evidence_type": "ECO:0000250", - "supporting_objects": ["Ensembl:ENS1"], - "publications": ["PMID:1234"], + "subject": "cdm_1", + "object": "NCBITaxon:1234", + "predicate": "in_taxon", } ], ), - # comment catalytic activity (reaction) with evidence key + # -------------------------------------------------- + # Catalytic activity with evidence + # -------------------------------------------------- ( - """ - - - - - - """, - "CDM:3", + lambda: ( + lambda entry: ( + lambda comment: ( + lambda reaction: ( + ET.SubElement( + reaction, + f"{{{NS_URI}}}dbReference", + {"type": "Rhea", "id": "RHEA:12345"}, + ), + entry, + )[1] + )( + ET.SubElement( + comment, + f"{{{NS_URI}}}reaction", + {"evidence": "E1"}, + ) + ) + )( + ET.SubElement( + entry, + f"{{{NS_URI}}}comment", + {"type": "catalytic activity"}, + ) + ) + )(ET.Element(f"{{{NS_URI}}}entry")), + "cdm_2", { - "E2": { - "evidence_type": "ECO:0000313", - "publications": ["PMID:2222"], + "E1": { + "evidence_type": "ECO:0000269", + "publications": ["PMID:12345"], } }, [ { - "subject": "CDM:3", + "subject": "cdm_2", "predicate": "catalyzes", - "object": "Rhea:12345", - "evidence_type": "ECO:0000313", - "publications": ["PMID:2222"], + "object": "Rhea:RHEA:12345", + "evidence_type": "ECO:0000269", + "publications": ["PMID:12345"], } ], ), - # Comment cofactor without evidence + # -------------------------------------------------- + # Cofactor association + # -------------------------------------------------- ( - """ - - - - - - """, - "CDM:4", + lambda: ( + lambda entry: ( + lambda comment: ( + ET.SubElement( + ET.SubElement( + comment, + f"{{{NS_URI}}}cofactor", + ), + f"{{{NS_URI}}}dbReference", + {"type": "ChEBI", "id": "CHEBI:15377"}, + ), + entry, + )[1] + )( + ET.SubElement( + entry, + f"{{{NS_URI}}}comment", + {"type": "cofactor"}, + ) + ) + )(ET.Element(f"{{{NS_URI}}}entry")), + "cdm_3", {}, [ { - "subject": "CDM:4", + "subject": "cdm_3", "predicate": "requires_cofactor", "object": "ChEBI:CHEBI:15377", } ], ), - # Several relevant relationship(with organism and dbReference) - ( - """ - - - - - """, - "CDM:5", - {}, - [ - {"subject": "CDM:5", "object": "NCBITaxon:562"}, - {"subject": "CDM:5", "object": "RefSeq:NP_414543"}, - ], - ), - # if it is empty entry, return to [] - ("""""", "CDM:6", {}, []), ], ) -def test_parse_associations( - xml_str: str, cdm_id: str, evidence_map: dict[str, Any], expected: list[dict[str, str]] -) -> None: - entry = ET.fromstring(xml_str) - result = parse_associations(entry, cdm_id, evidence_map) - assert result == expected - +def test_parse_associations_parametrized(build_xml, cdm_id, evidence_map, expected): + entry = build_xml() -""" - - xml_str: Uniprot entry include , , - Refer: PubMed, DOI, GeneBank, DDBJ, EMBL - - Output: List of publication identifier - - Arg: - Extract publication of references - Recognize and format database types ( with prefixing “PMID:”, “DOI:”) - Handle entries with multiple or mixed publication types - Return an empty list if no publication data. + result = parse_associations(entry, cdm_id, evidence_map) -""" + assert isinstance(result, list) + assert result == expected -## parse_publications function test ## @pytest.mark.parametrize( - ("xml_str", "expected"), + "build_xml, cdm_id, expected", [ - # Single PubMed + # -------------------------------------------------- + # No dbReference + # -------------------------------------------------- ( - """ - - - - - - """, - ["PMID:12345"], + lambda: ET.Element(f"{{{NS_URI}}}entry"), + "cdm_1", + [], ), - # Multiple types include (PubMed, DOI, GenBank) + # -------------------------------------------------- + # dbReference with CURIE id + # -------------------------------------------------- ( - """ - - - - - - - - """, - ["PMID:55555", "DOI:10.1000/j.jmb.2020.01.001"], + lambda: ( + lambda entry: ( + ET.SubElement( + entry, + f"{{{NS_URI}}}dbReference", + {"type": "GO", "id": "GO:0008150"}, + ), + entry, + )[1] + )(ET.Element(f"{{{NS_URI}}}entry")), + "cdm_2", + [ + { + "entity_id": "cdm_2", + "xref_type": "GO", + "xref_value": "GO:0008150", + "xref": "GO:0008150", + } + ], ), - # Multiple references + # -------------------------------------------------- + # dbReference without CURIE (prefix) + # -------------------------------------------------- ( - """ - - - - - - - - - - - """, - ["DOI:10.1000/jmb.123456", "PMID:98765"], + lambda: ( + lambda entry: ( + ET.SubElement( + entry, + f"{{{NS_URI}}}dbReference", + {"type": "CDD", "id": "cd04253"}, + ), + entry, + )[1] + )(ET.Element(f"{{{NS_URI}}}entry")), + "cdm_3", + [ + { + "entity_id": "cdm_3", + "xref_type": "CDD", + "xref_value": "cd04253", + "xref": "CDD:cd04253", + } + ], + ), + # -------------------------------------------------- + # Mixed dbReferences + # -------------------------------------------------- + ( + lambda: ( + lambda entry: ( + ET.SubElement( + entry, + f"{{{NS_URI}}}dbReference", + {"type": "GO", "id": "GO:0003674"}, + ), + ET.SubElement( + entry, + f"{{{NS_URI}}}dbReference", + {"type": "PDB", "id": "1ABC"}, + ), + entry, + )[2] + )(ET.Element(f"{{{NS_URI}}}entry")), + "cdm_4", + [ + { + "entity_id": "cdm_4", + "xref_type": "GO", + "xref_value": "GO:0003674", + "xref": "GO:0003674", + }, + { + "entity_id": "cdm_4", + "xref_type": "PDB", + "xref_value": "1ABC", + "xref": "PDB:1ABC", + }, + ], ), - # dbReference: DDBJ and EMBL + # -------------------------------------------------- + # Missing type or id + # -------------------------------------------------- ( - """ - - - - - - - """, + lambda: ( + lambda entry: ( + ET.SubElement( + entry, + f"{{{NS_URI}}}dbReference", + {"type": "GO"}, # missing id + ), + ET.SubElement( + entry, + f"{{{NS_URI}}}dbReference", + {"id": "123"}, # missing type + ), + entry, + )[2] + )(ET.Element(f"{{{NS_URI}}}entry")), + "cdm_5", [], ), - # no publication - ("""""", []), ], ) -def test_parse_publications(xml_str: str, expected: list[str]) -> None: - entry = ET.fromstring(xml_str) - result = parse_publications(entry) +def test_parse_cross_references_parametrized(build_xml, cdm_id, expected): + entry = build_xml() + + result = parse_cross_references(entry, cdm_id) + + assert isinstance(result, list) assert result == expected -## parse_uniprot_entry function test ## @pytest.mark.parametrize( - ("xml_str", "datasource_name", "prev_created"), + "build_xml, cdm_id, expected", [ + # -------------------------------------------------- + # No accession + # -------------------------------------------------- ( - """ - - P12345 - ProteinX - - - ProteinX Full Name - - - - - - - - - - - - """, - "UniProt import", - None, + lambda: ET.Element(f"{{{NS_URI}}}entry"), + "cdm_1", + [], + ), + # -------------------------------------------------- + # Single accession + # -------------------------------------------------- + ( + lambda: ( + lambda entry: ( + ET.SubElement(entry, f"{{{NS_URI}}}accession").__setattr__("text", "P12345"), + entry, + )[1] + )(ET.Element(f"{{{NS_URI}}}entry")), + "cdm_2", + [ + { + "entity_id": "cdm_2", + "identifier": "UniProt:P12345", + "source": "UniProt", + "description": "UniProt accession", + } + ], + ), + # -------------------------------------------------- + # Multiple accessions + # -------------------------------------------------- + ( + lambda: ( + lambda entry: ( + ET.SubElement(entry, f"{{{NS_URI}}}accession").__setattr__("text", "Q11111"), + ET.SubElement(entry, f"{{{NS_URI}}}accession").__setattr__("text", "Q22222"), + entry, + )[2] + )(ET.Element(f"{{{NS_URI}}}entry")), + "cdm_3", + [ + { + "entity_id": "cdm_3", + "identifier": "UniProt:Q11111", + "source": "UniProt", + "description": "UniProt accession", + }, + { + "entity_id": "cdm_3", + "identifier": "UniProt:Q22222", + "source": "UniProt", + "description": "UniProt accession", + }, + ], + ), + # -------------------------------------------------- + # parse_identifiers_generic already sets source/description → setdefault + # -------------------------------------------------- + ( + lambda: ( + lambda entry: ( + ET.SubElement(entry, f"{{{NS_URI}}}accession").__setattr__("text", "A0A000"), + entry, + )[1] + )(ET.Element(f"{{{NS_URI}}}entry")), + "cdm_4", + [ + { + "entity_id": "cdm_4", + "identifier": "UniProt:A0A000", + "source": "UniProt", # remains + "description": "UniProt accession", # remains + } + ], ), ], ) -def test_parse_uniprot_entry(xml_str: str, datasource_name: str, prev_created: None) -> None: - entry = ET.fromstring(xml_str) - cdm_id = generate_cdm_id() - - current_timestamp = "2024-07-17T13:00:00Z" - - record = parse_uniprot_entry(entry, cdm_id, current_timestamp, datasource_name, prev_created) - - entity = record["entity"] - assert entity["entity_type"] == "protein" - assert entity["data_source"] == datasource_name - assert entity["version"] == "3" - assert entity["uniprot_created"] == "2020-01-01" - assert entity["uniprot_modified"] == "2021-01-01" - assert entity["entity_id"].startswith("CDM:") - - # identifiers/names/associations/publications - assert isinstance(record["identifiers"], list) - assert isinstance(record["names"], list) - assert isinstance(record["associations"], list) - assert isinstance(record["publications"], list) +def test_parse_identifiers_parametrized(build_xml, cdm_id, expected): + entry = build_xml() + + result = parse_identifiers(entry, cdm_id) + + assert isinstance(result, list) + assert result == expected diff --git a/tests/parsers/test_uniref.py b/tests/parsers/test_uniref.py index 630949c..965cedc 100644 --- a/tests/parsers/test_uniref.py +++ b/tests/parsers/test_uniref.py @@ -1,27 +1,114 @@ -"""Tests for the UniRef importer.""" +import os +import sys -import datetime as dt +sys.path.append(os.path.abspath(os.path.dirname(__file__))) + +import gzip +import tempfile import textwrap import xml.etree.ElementTree as ET - +from datetime import datetime +from types import SimpleNamespace import pytest from cdm_data_loader_utils.parsers.uniref import ( add_cluster_members, - cdm_entity_id, extract_cluster, extract_cross_refs, get_accession_and_seed, get_timestamps, + load_existing_created, + parse_uniref_entry, + parse_uniref_xml, ) +NS = {"ns": "http://uniprot.org/uniref"} + + +class FakeSparkDF: + """A fake DataFrame returned by spark.read.format().load().select().""" + + def __init__(self, rows): + self._rows = rows + + def collect(self): + return self._rows + + +class FakeSparkReader: + """Mock spark.read.format('delta').load().select() chain""" + + def __init__(self, rows=None, fail=False): + self._rows = rows + self._fail = fail + + def format(self, fmt): + assert fmt == "delta" + return self + + def load(self, path): + if self._fail: + raise Exception("Table does not exist") + return self + + def select(self, *cols): + return FakeSparkDF(self._rows) + @pytest.mark.parametrize( - ("accession", "expected_prefix"), - [("A0B0123456", "CDM:"), ("P01234", "CDM:"), ("", None), (None, None)], + "entity_table, expected", + [ + (None, {}), # no path + ("", {}), # empty path + ], ) -def test_cdm_entity_id(accession: str | None, expected_prefix: str | None) -> None: - """Ensure that CDM entities start with the appropriate prefix.""" +def test_load_existing_created_no_path(entity_table, expected): + """Should return empty dict when entity_table path is missing.""" + fake_spark = SimpleNamespace() + assert load_existing_created(fake_spark, entity_table) == expected + + +def test_load_existing_created_success(monkeypatch): + """Delta table exists: should return dict of id → created timestamp.""" + rows = [ + {"data_source_entity_id": "UniRef100_A", "created": "2024-01-01T00:00:00"}, + {"data_source_entity_id": "UniRef100_B", "created": "2024-01-02T00:00:00"}, + ] + + fake_reader = FakeSparkReader(rows=rows) + + # Patch spark.read to our fake reader + fake_spark = SimpleNamespace(read=fake_reader) + + result = load_existing_created(fake_spark, "/fake/path/entity") + + assert result == { + "UniRef100_A": "2024-01-01T00:00:00", + "UniRef100_B": "2024-01-02T00:00:00", + } + + +def test_load_existing_created_missing_table(monkeypatch): + """If Delta table does not exist (load fails), return empty dict.""" + fake_reader = FakeSparkReader(fail=True) + + fake_spark = SimpleNamespace(read=fake_reader) + + result = load_existing_created(fake_spark, "/fake/path/entity") + + assert result == {} + + +@pytest.mark.parametrize( + "accession,expected_prefix", + [ + ("A0B0123456", "CDM:"), + ("P01234", "CDM:"), + ("", None), + (None, None), + ], +) +def test_cdm_entity_id(accession, expected_prefix): result = cdm_entity_id(accession) if expected_prefix is None: assert result is None @@ -30,7 +117,7 @@ def test_cdm_entity_id(accession: str | None, expected_prefix: str | None) -> No @pytest.mark.parametrize( - ("xml_str", "expected_name"), + "xml_str, expected_name", [ ( "" @@ -43,31 +130,33 @@ def test_cdm_entity_id(accession: str | None, expected_prefix: str | None) -> No ), ], ) -def test_extract_cluster(xml_str: str, expected_name: str) -> None: - """Test cluster extraction from XML.""" +def test_extract_cluster(xml_str, expected_name): ns = {"ns": "http://uniprot.org/uniref"} elem = ET.fromstring(xml_str) - cluster_id, name = extract_cluster(elem, ns) - assert cluster_id.startswith("CDM:") + + uniref_id = elem.attrib.get("id") + cluster_id, name = extract_cluster(elem, ns, uniref_id) + + assert cluster_id.startswith("cdm_ccol_") assert isinstance(cluster_id, str) assert name == expected_name @pytest.mark.parametrize( - ("uniref_id", "existing_created", "now", "expected"), + "uniref_id, existing_created, now, expected", [ # Has existing_created ( "UniRef100_A", {"UniRef100_A": "2024-01-01T00:00:00"}, - dt.datetime(2025, 1, 1, 0, 0, 0, tzinfo=dt.UTC), + datetime(2025, 1, 1, 0, 0, 0), ("2025-01-01T00:00:00", "2024-01-01T00:00:00"), ), # There is no existing_created ( "UniRef100_B", {"UniRef100_A": "2024-01-01T00:00:00"}, - dt.datetime(2025, 1, 1, 0, 0, 0, tzinfo=dt.UTC), + datetime(2025, 1, 1, 0, 0, 0), ("2025-01-01T00:00:00", "2025-01-01T00:00:00"), ), # There is no existing_created,also not provide "now" @@ -79,8 +168,7 @@ def test_extract_cluster(xml_str: str, expected_name: str) -> None: ), ], ) -def test_get_timestamps(uniref_id: str, existing_created: str, now: dt.datetime, expected: tuple[str] | None) -> None: - """Test timestamps.""" +def test_get_timestamps(uniref_id, existing_created, now, expected): result = get_timestamps(uniref_id, existing_created, now) if expected is not None: assert result == expected @@ -88,11 +176,11 @@ def test_get_timestamps(uniref_id: str, existing_created: str, now: dt.datetime, formatted_now, created_time = result assert formatted_now == created_time assert isinstance(formatted_now, str) - assert len(formatted_now) == 19 # "YYYY-MM-DDTHH:MM:SS" ---> 19 bites + assert len(formatted_now) == 19 # "YYYY-MM-DDTHH:MM:SS" @pytest.mark.parametrize( - ("xml_str", "expected_acc", "expected_is_seed"), + "xml_str, expected_acc, expected_is_seed", [ # Have accession and isSeed ( @@ -105,7 +193,7 @@ def test_get_timestamps(uniref_id: str, existing_created: str, now: dt.datetime, "A0A009HJL9", True, ), - # Only accession, no isSeed + # Only accession,No isSeed ( """ @@ -115,7 +203,7 @@ def test_get_timestamps(uniref_id: str, existing_created: str, now: dt.datetime, "A0A241V597", False, ), - # No accession, only id + # No accession,only id ( """ @@ -124,11 +212,14 @@ def test_get_timestamps(uniref_id: str, existing_created: str, now: dt.datetime, False, ), # None - (None, None, False), + ( + None, + None, + False, + ), ], ) -def test_get_accession_and_seed(xml_str: str | None, expected_acc: str | None, expected_is_seed: bool) -> None: - """Test parsing of UniRef entries for accession and seed status.""" +def test_get_accession_and_seed(xml_str, expected_acc, expected_is_seed): ns = {"ns": "http://uniprot.org/uniref"} dbref = ET.fromstring(xml_str) if xml_str else None acc, is_seed = get_accession_and_seed(dbref, ns) @@ -136,9 +227,10 @@ def test_get_accession_and_seed(xml_str: str | None, expected_acc: str | None, e assert is_seed == expected_is_seed -def make_entry_with_members(member_xmls: list[str], ns_uri: str = "http://uniprot.org/uniref") -> ET.Element: +def make_entry_with_members(member_xmls, ns_uri="http://uniprot.org/uniref"): """ - Receives a list of xml strings from dbReference, generates an element with child nodes. + Receives a list of xml strings from dbReference, + generates an element with child nodes """ entry_elem = ET.Element(f"{{{ns_uri}}}entry") for dbref_xml in member_xmls: @@ -149,10 +241,10 @@ def make_entry_with_members(member_xmls: list[str], ns_uri: str = "http://unipro @pytest.mark.parametrize( - ("repr_xml", "member_xmls", "expected"), + "repr_xml, member_xmls, expected", [ pytest.param( - # representative member, two members + # representative member textwrap.dedent(""" @@ -173,23 +265,27 @@ def make_entry_with_members(member_xmls: list[str], ns_uri: str = "http://unipro """), ], [ - ("CLUSTER_X", "CDM:", "true", "true", "1.0"), - ("CLUSTER_X", "CDM:", "false", "false", "1.0"), - ("CLUSTER_X", "CDM:", "false", "true", "1.0"), + # cdm_prot_ + ("CLUSTER_X", "cdm_prot_", "true", "true", "1.0"), + ("CLUSTER_X", "cdm_prot_", "false", "false", "1.0"), + ("CLUSTER_X", "cdm_prot_", "false", "true", "1.0"), ], id="with-representative-and-members", ), pytest.param( - # Only memebers, no representative member + # Only members, no representative member None, [ textwrap.dedent(""" - """) + """), + ], + [ + # cdm_prot_ + ("CLUSTER_X", "cdm_prot_", "false", "false", "1.0"), ], - [("CLUSTER_X", "CDM:", "false", "false", "1.0")], id="members-only", ), pytest.param( @@ -201,8 +297,8 @@ def make_entry_with_members(member_xmls: list[str], ns_uri: str = "http://unipro ), ], ) -def test_add_cluster_members(repr_xml: str | None, member_xmls: list[str], expected: list[tuple[str, ...]]) -> None: - """Test add_cluster_members with various representative/member combinations.""" +def test_add_cluster_members(repr_xml, member_xmls, expected): + """Test add_cluster_members with various representative/member combinations""" ns = {"ns": "http://uniprot.org/uniref"} cluster_id = "CLUSTER_X" @@ -226,11 +322,8 @@ def test_add_cluster_members(repr_xml: str | None, member_xmls: list[str], expec assert out[4] == score, f"Wrong score at idx {i}: {out[4]}" -XREF_TYPES = ["UniRef90 ID", "UniRef50 ID", "UniParc ID"] - - @pytest.mark.parametrize( - ("dbref_props", "expected_xrefs"), + "dbref_props, expected_xrefs", [ ( # all cross-ref fields present @@ -265,20 +358,202 @@ def test_add_cluster_members(repr_xml: str | None, member_xmls: list[str], expec ), ], ) -def test_extract_cross_refs_param(dbref_props: list[tuple[str, str]], expected_xrefs: list[tuple[str, str]]) -> None: +def test_extract_cross_refs_param(dbref_props, expected_xrefs): """ - Test that extract_cross_refs correctly extracts all UniRef cross-reference fields. + Test that extract_cross_refs correctly extracts all UniRef cross-reference fields """ - dbref = ET.Element("{http://uniprot.org/uniref}dbReference", type="UniProtKB ID", id="TEST_ID") + dbref = ET.Element( + "{http://uniprot.org/uniref}dbReference", + type="UniProtKB ID", + id="TEST_ID", + ) for t, v in dbref_props: - ET.SubElement(dbref, "{http://uniprot.org/uniref}property", type=t, value=v) + ET.SubElement( + dbref, + "{http://uniprot.org/uniref}property", + type=t, + value=v, + ) ns = {"ns": "http://uniprot.org/uniref"} cross_reference_data = [] extract_cross_refs(dbref, cross_reference_data, ns) entity_id = cdm_entity_id("TEST_ID") - expected = {(entity_id, typ, val) for typ, val in expected_xrefs} + expected = set((entity_id, typ, val) for typ, val in expected_xrefs) got = set(cross_reference_data) assert got == expected + + +@pytest.mark.parametrize( + "xml_str, existing_created, expected_created, expect_member_count, expect_xref_count", + [ + pytest.param( + # CASE 1: + # The old creation time exists, it should be retained. + """ + + Cluster: Example protein + + + + + + + + + + + + + + + + """, + {"UniRef100_TEST": "2020-01-01T00:00:00"}, + "2020-01-01T00:00:00", # expected created + 2, # 1 repr + 1 member + 1, # one UniParc xref + id="with_existing_created", + ), + pytest.param( + # CASE 2: + # No existing_created → created == updated + """ + + Cluster: New protein + + + + + + + + + """, + {}, + None, # meaning created == updated + 1, # only representative + 0, # no xrefs + id="no_existing_created", + ), + ], +) +def test_parse_uniref_entry_param(xml_str, existing_created, expected_created, expect_member_count, expect_xref_count): + elem = ET.fromstring(xml_str) + + result = parse_uniref_entry(elem, existing_created, NS) + + cluster_rows = result["cluster_data"] + entity_rows = result["entity_data"] + member_rows = result["cluster_member_data"] + xref_rows = result["cross_reference_data"] + + # ----------------------------- + # Validate cluster + # ----------------------------- + assert len(cluster_rows) == 1 + cluster_id = cluster_rows[0][0] + assert cluster_id.startswith("cdm_ccol_") + + # ----------------------------- + # Validate Entity + # ----------------------------- + assert len(entity_rows) == 1 + ( + ent_entity_id, + data_source_entity_id, + ent_type, + data_source, + updated, + created, + ) = entity_rows[0] + + assert ent_entity_id == cluster_id + assert data_source.startswith("UniRef") + assert ent_type == "Cluster" + + if expected_created is not None: + assert created == expected_created + assert updated != created + else: + # created == updated when no existing_created + assert created == updated + + # ----------------------------- + # Validate members + # ----------------------------- + assert len(member_rows) == expect_member_count + for row in member_rows: + cid, entity_id, _, _, _ = row + assert cid == cluster_id + assert entity_id.startswith("cdm_prot_") + + # ----------------------------- + # Validate xrefs + # ----------------------------- + assert len(xref_rows) == expect_xref_count + for e_id, x_type, x_val in xref_rows: + # xrefs use CDM: prefix + assert e_id.startswith("CDM:") + + +def make_fake_uniref_xml(num_entries=2): + """ + Create a minimal UniRef XML with N elements. + """ + entries = [] + for i in range(num_entries): + entries.append(f""" + + Cluster FAKE {i} + + + + + + + + """) + + xml = f""" + + {"".join(entries)} + + """ + return xml.strip().encode("utf-8") + + +@pytest.mark.parametrize("batch_size", [1, 2]) +def test_parse_uniref_xml_batch(batch_size): + # Prepare fake XML inside a gzipped temp file + with tempfile.NamedTemporaryFile(suffix=".xml.gz", delete=True) as tmp: + xml_bytes = make_fake_uniref_xml(num_entries=2) + + with gzip.open(tmp.name, "wb") as gz: + gz.write(xml_bytes) + + # No existing created timestamps + existing_created = {} + + # Call the parser + result = parse_uniref_xml(tmp.name, batch_size, existing_created) + + # Validate the number of parsed entries matches batch_size + assert len(result["cluster_data"]) == batch_size + assert len(result["entity_data"]) == batch_size + + # Member data: each entry has exactly 1 representative member → 1 row per entry + assert len(result["cluster_member_data"]) == batch_size + + # Cross references: none included in fake XML + assert len(result["cross_reference_data"]) == 0 + + # Validate cluster_id prefix + cluster_id = result["cluster_data"][0][0] + assert cluster_id.startswith("cdm_ccol_") diff --git a/tests/parsers/test_xml_utils.py b/tests/parsers/test_xml_utils.py new file mode 100644 index 0000000..fc6e3ba --- /dev/null +++ b/tests/parsers/test_xml_utils.py @@ -0,0 +1,49 @@ +import xml.etree.ElementTree as ET + +from cdm_data_loader_utils.parsers.xml_utils import ( + clean_dict, + get_attr, + get_text, + parse_db_references, +) + + +def test_get_text_and_get_attr_basic() -> None: + elem = ET.Element("tag", attrib={"id": "123"}) + elem.text = " hello " + + assert get_text(elem) == "hello" + assert get_text(None) is None + assert get_attr(elem, "id") == "123" + assert get_attr(elem, "missing") is None + + +def test_parse_db_references_pub_and_others() -> None: + ns = {"ns": "dummy"} + source = ET.Element("source") + db1 = ET.SubElement(source, "dbReference", attrib={"type": "PubMed", "id": "12345"}) + db2 = ET.SubElement(source, "dbReference", attrib={"type": "DOI", "id": "10.1000/xyz"}) + db3 = ET.SubElement(source, "dbReference", attrib={"type": "PDB", "id": "1ABC"}) + + db1.tag = "{dummy}dbReference" + db2.tag = "{dummy}dbReference" + db3.tag = "{dummy}dbReference" + + pubs, others = parse_db_references(source, ns) + + assert "PUBMED:12345" in pubs + assert "DOI:10.1000/xyz" in pubs + assert "PDB:1ABC" in others + + +def test_clean_dict_removes_nones_and_empty() -> None: + """Test that clean_dict removes None and empty values.""" + d = { + "a": 1, + "b": None, + "c": [], + "d": {}, + "e": "ok", + } + cleaned = clean_dict(d) + assert cleaned == {"a": 1, "e": "ok"} diff --git a/uv.lock b/uv.lock index ad945bf..3414a4e 100644 --- a/uv.lock +++ b/uv.lock @@ -281,7 +281,7 @@ wheels = [ [[package]] name = "berdl-notebook-python-base" version = "0.1.0" -source = { git = "https://github.com/BERDataLakehouse/spark_notebook_base.git#e6ff234b274a634c0a3e7cf7fdb08528bbe8a5ab" } +source = { git = "https://github.com/BERDataLakehouse/spark_notebook_base.git?rev=0.0.0-dec11#e6ff234b274a634c0a3e7cf7fdb08528bbe8a5ab" } dependencies = [ { name = "attrs" }, { name = "boto3" }, @@ -316,7 +316,7 @@ dependencies = [ [[package]] name = "berdl-notebook-utils" version = "0.0.1" -source = { git = "https://github.com/BERDataLakehouse/spark_notebook.git?subdirectory=notebook_utils#ce40d0035f3edb8a7725b0d57dfcfdfbc7d37f55" } +source = { git = "https://github.com/BERDataLakehouse/spark_notebook.git?subdirectory=notebook_utils#e3de07fe2fef2aa9097abe543cbdbc1bd4d42289" } dependencies = [ { name = "berdl-notebook-python-base" }, ] @@ -756,20 +756,20 @@ wheels = [ [[package]] name = "curies" -version = "0.12.5" +version = "0.12.7" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "pydantic" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/a9/4c/fc5d51c21b99f802948a8b3079565806239c76e7b2f1f6702a603fe282f7/curies-0.12.5.tar.gz", hash = "sha256:57e4853045f8029c2564fbf2290221ff7a529034405076d1e82b7a8727b33dfc", size = 282912, upload-time = "2025-11-25T12:47:24.825Z" } +sdist = { url = "https://files.pythonhosted.org/packages/ef/fc/8f73cbde9b2034e4b4f8524b4c5b7ce2a68d052ede8a486c0bc806c1f54d/curies-0.12.7.tar.gz", hash = "sha256:b51f422f6f8b93b35b583195222563327a00629d0ef8e889dc14606e31950e4f", size = 283292, upload-time = "2025-12-22T15:48:33.554Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/8c/dd/29000adb47118edbf865a6e366fba294dcdacdf34322cedb23b8e7d30ae0/curies-0.12.5-py3-none-any.whl", hash = "sha256:e7fbb63cb49aeb389d46db64dae02f1563741084e033c2075cd1e163fdb1ead8", size = 69711, upload-time = "2025-11-25T12:47:23.058Z" }, + { url = "https://files.pythonhosted.org/packages/c6/65/c6118987bc902a1a5941d2028c49d91c2db55d5bec148b46d155a125543b/curies-0.12.7-py3-none-any.whl", hash = "sha256:9038d6afd6311328b072db51488af1ce162cb26c1a3cc497d2d00871ddb824a9", size = 70042, upload-time = "2025-12-22T15:48:32.508Z" }, ] [[package]] name = "dask" -version = "2025.11.0" +version = "2025.12.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "click" }, @@ -780,9 +780,9 @@ dependencies = [ { name = "pyyaml" }, { name = "toolz" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/db/33/eacaa72731f7fc64868caaf2d35060d50049eff889bd217263e68f76472f/dask-2025.11.0.tar.gz", hash = "sha256:23d59e624b80ee05b7cc8df858682cca58262c4c3b197ccf61da0f6543c8f7c3", size = 10984781, upload-time = "2025-11-06T16:56:51.535Z" } +sdist = { url = "https://files.pythonhosted.org/packages/49/ae/92fca08ff8fe3e8413842564dd55ee30c9cd9e07629e1bf4d347b005a5bf/dask-2025.12.0.tar.gz", hash = "sha256:8d478f2aabd025e2453cf733ad64559de90cf328c20209e4574e9543707c3e1b", size = 10995316, upload-time = "2025-12-12T14:59:10.885Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/1d/54/a46920229d12c3a6e9f0081d1bdaeffad23c1826353ace95714faee926e5/dask-2025.11.0-py3-none-any.whl", hash = "sha256:08c35a8146c05c93b34f83cf651009129c42ee71762da7ca452fb7308641c2b8", size = 1477108, upload-time = "2025-11-06T16:56:44.892Z" }, + { url = "https://files.pythonhosted.org/packages/6f/3a/2121294941227c548d4b5f897a8a1b5f4c44a58f5437f239e6b86511d78e/dask-2025.12.0-py3-none-any.whl", hash = "sha256:4213ce9c5d51d6d89337cff69de35d902aa0bf6abdb8a25c942a4d0281f3a598", size = 1481293, upload-time = "2025-12-12T14:58:59.32Z" }, ] [package.optional-dependencies] @@ -814,19 +814,19 @@ dependencies = [ [[package]] name = "debugpy" -version = "1.8.18" +version = "1.8.19" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/62/1a/7cb5531840d7ba5d9329644109e62adee41f2f0083d9f8a4039f01de58cf/debugpy-1.8.18.tar.gz", hash = "sha256:02551b1b84a91faadd2db9bc4948873f2398190c95b3cc6f97dc706f43e8c433", size = 1644467, upload-time = "2025-12-10T19:48:07.236Z" } +sdist = { url = "https://files.pythonhosted.org/packages/73/75/9e12d4d42349b817cd545b89247696c67917aab907012ae5b64bbfea3199/debugpy-1.8.19.tar.gz", hash = "sha256:eea7e5987445ab0b5ed258093722d5ecb8bb72217c5c9b1e21f64efe23ddebdb", size = 1644590, upload-time = "2025-12-15T21:53:28.044Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/fe/3f/45af037e91e308274a092eb6a86282865fb1f11148cdb7616e811aae33d7/debugpy-1.8.18-cp313-cp313-macosx_15_0_universal2.whl", hash = "sha256:75d14dd04b617ee38e46786394ec0dd5e1ac5e3d10ffb034fd6c7b72111174c2", size = 2538826, upload-time = "2025-12-10T19:48:29.434Z" }, - { url = "https://files.pythonhosted.org/packages/cc/f4/2de6bf624de05134d1bbe0a8750d484363cd212c3ade3d04f5c77d47d0ce/debugpy-1.8.18-cp313-cp313-manylinux_2_34_x86_64.whl", hash = "sha256:1b224887af5121fa702f9f542968170d104e3f9cac827d85fdefe89702dc235c", size = 4292542, upload-time = "2025-12-10T19:48:30.836Z" }, - { url = "https://files.pythonhosted.org/packages/93/54/89de7ef84d5ac39fc64a773feaedd902536cc5295814cd22d19c6d9dea35/debugpy-1.8.18-cp313-cp313-win32.whl", hash = "sha256:636a5445a3336e4aba323a3545ca2bb373b04b0bc14084a4eb20c989db44429f", size = 5280460, upload-time = "2025-12-10T19:48:32.696Z" }, - { url = "https://files.pythonhosted.org/packages/4f/59/651329e618406229edbef6508a5aa05e43cd027f042740c5b27e46854b23/debugpy-1.8.18-cp313-cp313-win_amd64.whl", hash = "sha256:6da217ac8c1152d698b9809484d50c75bef9cc02fd6886a893a6df81ec952ff8", size = 5322399, upload-time = "2025-12-10T19:48:35.057Z" }, - { url = "https://files.pythonhosted.org/packages/36/59/5e8bf46a66ca9dfcd0ce4f35c07085aeb60d99bf5c52135973a4e197ed41/debugpy-1.8.18-cp314-cp314-macosx_15_0_universal2.whl", hash = "sha256:be7f622d250fe3429571e84572eb771023f1da22c754f28d2c60a10d74a4cc1b", size = 2537336, upload-time = "2025-12-10T19:48:36.463Z" }, - { url = "https://files.pythonhosted.org/packages/a1/5a/3b37cc266a69da83a4febaa4267bb2062d4bec5287036e2f23d9a30a788c/debugpy-1.8.18-cp314-cp314-manylinux_2_34_x86_64.whl", hash = "sha256:df8bf7cd78019d5d155213bf5a1818b36403d0c3758d669e76827d4db026b840", size = 4268696, upload-time = "2025-12-10T19:48:37.855Z" }, - { url = "https://files.pythonhosted.org/packages/de/4b/1e13586444440e5754b70055449b70afa187aaa167fa4c20c0c05d9c3b80/debugpy-1.8.18-cp314-cp314-win32.whl", hash = "sha256:32dd56d50fe15c47d0f930a7f0b9d3e5eb8ed04770bc6c313fba6d226f87e1e8", size = 5280624, upload-time = "2025-12-10T19:48:39.28Z" }, - { url = "https://files.pythonhosted.org/packages/7a/21/f8c12baa16212859269dc4c3e4b413778ec1154d332896d3c4cca96ac660/debugpy-1.8.18-cp314-cp314-win_amd64.whl", hash = "sha256:714b61d753cfe3ed5e7bf0aad131506d750e271726ac86e3e265fd7eeebbe765", size = 5321982, upload-time = "2025-12-10T19:48:41.086Z" }, - { url = "https://files.pythonhosted.org/packages/dc/0d/bf7ac329c132436c57124202b5b5ccd6366e5d8e75eeb184cf078c826e8d/debugpy-1.8.18-py2.py3-none-any.whl", hash = "sha256:ab8cf0abe0fe2dfe1f7e65abc04b1db8740f9be80c1274acb625855c5c3ece6e", size = 5286576, upload-time = "2025-12-10T19:48:56.071Z" }, + { url = "https://files.pythonhosted.org/packages/71/3d/388035a31a59c26f1ecc8d86af607d0c42e20ef80074147cd07b180c4349/debugpy-1.8.19-cp313-cp313-macosx_15_0_universal2.whl", hash = "sha256:91e35db2672a0abaf325f4868fcac9c1674a0d9ad9bb8a8c849c03a5ebba3e6d", size = 2538859, upload-time = "2025-12-15T21:53:50.478Z" }, + { url = "https://files.pythonhosted.org/packages/4a/19/c93a0772d0962294f083dbdb113af1a7427bb632d36e5314297068f55db7/debugpy-1.8.19-cp313-cp313-manylinux_2_34_x86_64.whl", hash = "sha256:85016a73ab84dea1c1f1dcd88ec692993bcbe4532d1b49ecb5f3c688ae50c606", size = 4292575, upload-time = "2025-12-15T21:53:51.821Z" }, + { url = "https://files.pythonhosted.org/packages/5c/56/09e48ab796b0a77e3d7dc250f95251832b8bf6838c9632f6100c98bdf426/debugpy-1.8.19-cp313-cp313-win32.whl", hash = "sha256:b605f17e89ba0ecee994391194285fada89cee111cfcd29d6f2ee11cbdc40976", size = 5286209, upload-time = "2025-12-15T21:53:53.602Z" }, + { url = "https://files.pythonhosted.org/packages/fb/4e/931480b9552c7d0feebe40c73725dd7703dcc578ba9efc14fe0e6d31cfd1/debugpy-1.8.19-cp313-cp313-win_amd64.whl", hash = "sha256:c30639998a9f9cd9699b4b621942c0179a6527f083c72351f95c6ab1728d5b73", size = 5328206, upload-time = "2025-12-15T21:53:55.433Z" }, + { url = "https://files.pythonhosted.org/packages/f6/b9/cbec520c3a00508327476c7fce26fbafef98f412707e511eb9d19a2ef467/debugpy-1.8.19-cp314-cp314-macosx_15_0_universal2.whl", hash = "sha256:1e8c4d1bd230067bf1bbcdbd6032e5a57068638eb28b9153d008ecde288152af", size = 2537372, upload-time = "2025-12-15T21:53:57.318Z" }, + { url = "https://files.pythonhosted.org/packages/88/5e/cf4e4dc712a141e10d58405c58c8268554aec3c35c09cdcda7535ff13f76/debugpy-1.8.19-cp314-cp314-manylinux_2_34_x86_64.whl", hash = "sha256:d40c016c1f538dbf1762936e3aeb43a89b965069d9f60f9e39d35d9d25e6b809", size = 4268729, upload-time = "2025-12-15T21:53:58.712Z" }, + { url = "https://files.pythonhosted.org/packages/82/a3/c91a087ab21f1047db328c1d3eb5d1ff0e52de9e74f9f6f6fa14cdd93d58/debugpy-1.8.19-cp314-cp314-win32.whl", hash = "sha256:0601708223fe1cd0e27c6cce67a899d92c7d68e73690211e6788a4b0e1903f5b", size = 5286388, upload-time = "2025-12-15T21:54:00.687Z" }, + { url = "https://files.pythonhosted.org/packages/17/b8/bfdc30b6e94f1eff09f2dc9cc1f9cd1c6cde3d996bcbd36ce2d9a4956e99/debugpy-1.8.19-cp314-cp314-win_amd64.whl", hash = "sha256:8e19a725f5d486f20e53a1dde2ab8bb2c9607c40c00a42ab646def962b41125f", size = 5327741, upload-time = "2025-12-15T21:54:02.148Z" }, + { url = "https://files.pythonhosted.org/packages/25/3e/e27078370414ef35fafad2c06d182110073daaeb5d3bf734b0b1eeefe452/debugpy-1.8.19-py2.py3-none-any.whl", hash = "sha256:360ffd231a780abbc414ba0f005dad409e71c78637efe8f2bd75837132a41d38", size = 5292321, upload-time = "2025-12-15T21:54:16.024Z" }, ] [[package]] @@ -883,7 +883,7 @@ wheels = [ [[package]] name = "distributed" -version = "2025.11.0" +version = "2025.12.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "click" }, @@ -902,9 +902,9 @@ dependencies = [ { name = "urllib3" }, { name = "zict" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/c5/be/085a60b627c1f2b795827ce41d4cc1e11e74ffdadcb5235ee5fb620f7929/distributed-2025.11.0.tar.gz", hash = "sha256:372c2f0c2faa890fc42188349969ba468161a9b356df49c4ca7d9a8d551a7ace", size = 2119140, upload-time = "2025-11-06T16:57:32.391Z" } +sdist = { url = "https://files.pythonhosted.org/packages/4f/f7/25e4ed891f4b347a7c0e6ad6106b564938ddd6f1832aa03f1505d0949cb4/distributed-2025.12.0.tar.gz", hash = "sha256:b1e58f1b3d733885335817562ee1723379f23733e4ef3546f141080d9cb01a74", size = 2102841, upload-time = "2025-12-12T14:58:57.74Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/46/ec/da78855318971c2be94d0283a41de6941a6b9f16146fb00babc74903ae01/distributed-2025.11.0-py3-none-any.whl", hash = "sha256:1794ff25b19ba347ccce563fb1dd5898c3bb30f500b15f8c20ad373f6281b30f", size = 1009248, upload-time = "2025-11-06T16:57:28.714Z" }, + { url = "https://files.pythonhosted.org/packages/87/45/ca760deab4de448e6c0e3860fc187bcc49216eabda379f6ce68065158843/distributed-2025.12.0-py3-none-any.whl", hash = "sha256:35d18449002ea191e97f7e04a33e16f90c2243486be52d4d0f991072ea06b48a", size = 1008379, upload-time = "2025-12-12T14:58:54.195Z" }, ] [[package]] @@ -927,11 +927,11 @@ wheels = [ [[package]] name = "docutils" -version = "0.22.3" +version = "0.22.4" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/d9/02/111134bfeb6e6c7ac4c74594e39a59f6c0195dc4846afbeac3cba60f1927/docutils-0.22.3.tar.gz", hash = "sha256:21486ae730e4ca9f622677b1412b879af1791efcfba517e4c6f60be543fc8cdd", size = 2290153, upload-time = "2025-11-06T02:35:55.655Z" } +sdist = { url = "https://files.pythonhosted.org/packages/ae/b6/03bb70946330e88ffec97aefd3ea75ba575cb2e762061e0e62a213befee8/docutils-0.22.4.tar.gz", hash = "sha256:4db53b1fde9abecbb74d91230d32ab626d94f6badfc575d6db9194a49df29968", size = 2291750, upload-time = "2025-12-18T19:00:26.443Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/11/a8/c6a4b901d17399c77cd81fb001ce8961e9f5e04d3daf27e8925cb012e163/docutils-0.22.3-py3-none-any.whl", hash = "sha256:bd772e4aca73aff037958d44f2be5229ded4c09927fcf8690c577b66234d6ceb", size = 633032, upload-time = "2025-11-06T02:35:52.391Z" }, + { url = "https://files.pythonhosted.org/packages/02/10/5da547df7a391dcde17f59520a231527b8571e6f46fc8efb02ccb370ab12/docutils-0.22.4-py3-none-any.whl", hash = "sha256:d0013f540772d1420576855455d050a2180186c91c15779301ac2ccb3eeb68de", size = 633196, upload-time = "2025-12-18T19:00:18.077Z" }, ] [[package]] @@ -993,11 +993,11 @@ wheels = [ [[package]] name = "filelock" -version = "3.20.0" +version = "3.20.1" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/58/46/0028a82567109b5ef6e4d2a1f04a583fb513e6cf9527fcdd09afd817deeb/filelock-3.20.0.tar.gz", hash = "sha256:711e943b4ec6be42e1d4e6690b48dc175c822967466bb31c0c293f34334c13f4", size = 18922, upload-time = "2025-10-08T18:03:50.056Z" } +sdist = { url = "https://files.pythonhosted.org/packages/a7/23/ce7a1126827cedeb958fc043d61745754464eb56c5937c35bbf2b8e26f34/filelock-3.20.1.tar.gz", hash = "sha256:b8360948b351b80f420878d8516519a2204b07aefcdcfd24912a5d33127f188c", size = 19476, upload-time = "2025-12-15T23:54:28.027Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/76/91/7216b27286936c16f5b4d0c530087e4a54eead683e6b0b73dd0c64844af6/filelock-3.20.0-py3-none-any.whl", hash = "sha256:339b4732ffda5cd79b13f4e2711a31b0365ce445d95d243bb996273d072546a2", size = 16054, upload-time = "2025-10-08T18:03:48.35Z" }, + { url = "https://files.pythonhosted.org/packages/e3/7f/a1a97644e39e7316d850784c642093c99df1290a460df4ede27659056834/filelock-3.20.1-py3-none-any.whl", hash = "sha256:15d9e9a67306188a44baa72f569d2bfd803076269365fdea0934385da4dc361a", size = 16666, upload-time = "2025-12-15T23:54:26.874Z" }, ] [[package]] @@ -1338,14 +1338,14 @@ wheels = [ [[package]] name = "importlib-metadata" -version = "8.7.0" +version = "8.7.1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "zipp" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/76/66/650a33bd90f786193e4de4b3ad86ea60b53c89b669a5c7be931fac31cdb0/importlib_metadata-8.7.0.tar.gz", hash = "sha256:d13b81ad223b890aa16c5471f2ac3056cf76c5f10f82d6f9292f0b415f389000", size = 56641, upload-time = "2025-04-27T15:29:01.736Z" } +sdist = { url = "https://files.pythonhosted.org/packages/f3/49/3b30cad09e7771a4982d9975a8cbf64f00d4a1ececb53297f1d9a7be1b10/importlib_metadata-8.7.1.tar.gz", hash = "sha256:49fef1ae6440c182052f407c8d34a68f72efc36db9ca90dc0113398f2fdde8bb", size = 57107, upload-time = "2025-12-21T10:00:19.278Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/20/b0/36bd937216ec521246249be3bf9855081de4c5e06a0c9b4219dbeda50373/importlib_metadata-8.7.0-py3-none-any.whl", hash = "sha256:e5dd1551894c77868a30651cef00984d50e1002d06942a7101d34870c5f02afd", size = 27656, upload-time = "2025-04-27T15:29:00.214Z" }, + { url = "https://files.pythonhosted.org/packages/fa/5e/f8e9a1d23b9c20a551a8a02ea3637b4642e22c2626e3a13a9a29cdea99eb/importlib_metadata-8.7.1-py3-none-any.whl", hash = "sha256:5a1f80bf1daa489495071efbb095d75a634cf28a8bc299581244063b53176151", size = 27865, upload-time = "2025-12-21T10:00:18.329Z" }, ] [[package]] @@ -1849,7 +1849,7 @@ wheels = [ [[package]] name = "jupyterlab" -version = "4.5.0" +version = "4.5.1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "async-lru" }, @@ -1866,9 +1866,9 @@ dependencies = [ { name = "tornado" }, { name = "traitlets" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/df/e5/4fa382a796a6d8e2cd867816b64f1ff27f906e43a7a83ad9eb389e448cd8/jupyterlab-4.5.0.tar.gz", hash = "sha256:aec33d6d8f1225b495ee2cf20f0514f45e6df8e360bdd7ac9bace0b7ac5177ea", size = 23989880, upload-time = "2025-11-18T13:19:00.365Z" } +sdist = { url = "https://files.pythonhosted.org/packages/09/21/413d142686a4e8f4268d985becbdb4daf060524726248e73be4773786987/jupyterlab-4.5.1.tar.gz", hash = "sha256:09da1ddfbd9eec18b5101dbb8515612aa1e47443321fb99503725a88e93d20d9", size = 23992251, upload-time = "2025-12-15T16:58:59.361Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/6c/1e/5a4d5498eba382fee667ed797cf64ae5d1b13b04356df62f067f48bb0f61/jupyterlab-4.5.0-py3-none-any.whl", hash = "sha256:88e157c75c1afff64c7dc4b801ec471450b922a4eae4305211ddd40da8201c8a", size = 12380641, upload-time = "2025-11-18T13:18:56.252Z" }, + { url = "https://files.pythonhosted.org/packages/af/c3/acced767eecc11a70c65c45295db5396c4f0c1937874937d5a76d7b177b6/jupyterlab-4.5.1-py3-none-any.whl", hash = "sha256:31b059de96de0754ff1f2ce6279774b6aab8c34d7082e9752db58207c99bd514", size = 12384821, upload-time = "2025-12-15T16:58:55.563Z" }, ] [[package]] @@ -2045,7 +2045,7 @@ wheels = [ [[package]] name = "langsmith" -version = "0.4.59" +version = "0.5.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "httpx" }, @@ -2057,9 +2057,9 @@ dependencies = [ { name = "uuid-utils" }, { name = "zstandard" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/61/71/d61524c3205bde7ec90423d997cf1a228d8adf2811110ec91ed40c8e8a34/langsmith-0.4.59.tar.gz", hash = "sha256:6b143214c2303dafb29ab12dcd05ac50bdfc60dac01c6e0450e50cee1d2415e0", size = 992784, upload-time = "2025-12-11T02:40:52.231Z" } +sdist = { url = "https://files.pythonhosted.org/packages/d8/4b/d448307e8557e36b20008d0d1cd0a58233c38d90bf978e1d093be0ca4cb2/langsmith-0.5.0.tar.gz", hash = "sha256:5cadf1ddd30e838cf61679f4a776aaef638d4b02ffbceba9f73283caebd39e1b", size = 869272, upload-time = "2025-12-16T17:35:38.78Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/63/54/4577ef9424debea2fa08af338489d593276520d2e2f8950575d292be612c/langsmith-0.4.59-py3-none-any.whl", hash = "sha256:97c26399286441a7b7b06b912e2801420fbbf3a049787e609d49dc975ab10bc5", size = 413051, upload-time = "2025-12-11T02:40:50.523Z" }, + { url = "https://files.pythonhosted.org/packages/ee/8a/d9bc95607846bc82fbe0b98d2592ffb5e036c97a362735ae926e3d519df7/langsmith-0.5.0-py3-none-any.whl", hash = "sha256:a83750cb3dccb33148d4ffe005e3e03080fad13e01671efbb74c9a68813bfef8", size = 273711, upload-time = "2025-12-16T17:35:37.165Z" }, ] [[package]] @@ -2202,14 +2202,14 @@ wheels = [ [[package]] name = "marshmallow" -version = "3.26.1" +version = "3.26.2" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "packaging" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/ab/5e/5e53d26b42ab75491cda89b871dab9e97c840bf12c63ec58a1919710cd06/marshmallow-3.26.1.tar.gz", hash = "sha256:e6d8affb6cb61d39d26402096dc0aee12d5a26d490a121f118d2e81dc0719dc6", size = 221825, upload-time = "2025-02-03T15:32:25.093Z" } +sdist = { url = "https://files.pythonhosted.org/packages/55/79/de6c16cc902f4fc372236926b0ce2ab7845268dcc30fb2fbb7f71b418631/marshmallow-3.26.2.tar.gz", hash = "sha256:bbe2adb5a03e6e3571b573f42527c6fe926e17467833660bebd11593ab8dfd57", size = 222095, upload-time = "2025-12-22T06:53:53.309Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/34/75/51952c7b2d3873b44a0028b1bd26a25078c18f92f256608e8d1dc61b39fd/marshmallow-3.26.1-py3-none-any.whl", hash = "sha256:3350409f20a70a7e4e11a27661187b77cdcaeb20abca41c1454fe33636bea09c", size = 50878, upload-time = "2025-02-03T15:32:22.295Z" }, + { url = "https://files.pythonhosted.org/packages/be/2f/5108cb3ee4ba6501748c4908b908e55f42a5b66245b4cfe0c99326e1ef6e/marshmallow-3.26.2-py3-none-any.whl", hash = "sha256:013fa8a3c4c276c24d26d84ce934dc964e2aa794345a0f8c7e5a7191482c8a73", size = 50964, upload-time = "2025-12-22T06:53:51.801Z" }, ] [[package]] @@ -2226,7 +2226,7 @@ wheels = [ [[package]] name = "mcp" -version = "1.23.3" +version = "1.25.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "anyio" }, @@ -2244,9 +2244,9 @@ dependencies = [ { name = "typing-inspection" }, { name = "uvicorn", marker = "sys_platform != 'emscripten'" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/a7/a4/d06a303f45997e266f2c228081abe299bbcba216cb806128e2e49095d25f/mcp-1.23.3.tar.gz", hash = "sha256:b3b0da2cc949950ce1259c7bfc1b081905a51916fcd7c8182125b85e70825201", size = 600697, upload-time = "2025-12-09T16:04:37.351Z" } +sdist = { url = "https://files.pythonhosted.org/packages/d5/2d/649d80a0ecf6a1f82632ca44bec21c0461a9d9fc8934d38cb5b319f2db5e/mcp-1.25.0.tar.gz", hash = "sha256:56310361ebf0364e2d438e5b45f7668cbb124e158bb358333cd06e49e83a6802", size = 605387, upload-time = "2025-12-19T10:19:56.985Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/32/c6/13c1a26b47b3f3a3b480783001ada4268917c9f42d78a079c336da2e75e5/mcp-1.23.3-py3-none-any.whl", hash = "sha256:32768af4b46a1b4f7df34e2bfdf5c6011e7b63d7f1b0e321d0fdef4cd6082031", size = 231570, upload-time = "2025-12-09T16:04:35.56Z" }, + { url = "https://files.pythonhosted.org/packages/e2/fc/6dc7659c2ae5ddf280477011f4213a74f806862856b796ef08f028e664bf/mcp-1.25.0-py3-none-any.whl", hash = "sha256:b37c38144a666add0862614cc79ec276e97d72aa8ca26d622818d4e278b9721a", size = 233076, upload-time = "2025-12-19T10:19:55.416Z" }, ] [[package]] @@ -2332,7 +2332,7 @@ wheels = [ [[package]] name = "mkdocs-material" -version = "9.7.0" +version = "9.7.1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "babel" }, @@ -2347,9 +2347,9 @@ dependencies = [ { name = "pymdown-extensions" }, { name = "requests" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/9c/3b/111b84cd6ff28d9e955b5f799ef217a17bc1684ac346af333e6100e413cb/mkdocs_material-9.7.0.tar.gz", hash = "sha256:602b359844e906ee402b7ed9640340cf8a474420d02d8891451733b6b02314ec", size = 4094546, upload-time = "2025-11-11T08:49:09.73Z" } +sdist = { url = "https://files.pythonhosted.org/packages/27/e2/2ffc356cd72f1473d07c7719d82a8f2cbd261666828614ecb95b12169f41/mkdocs_material-9.7.1.tar.gz", hash = "sha256:89601b8f2c3e6c6ee0a918cc3566cb201d40bf37c3cd3c2067e26fadb8cce2b8", size = 4094392, upload-time = "2025-12-18T09:49:00.308Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/04/87/eefe8d5e764f4cf50ed91b943f8e8f96b5efd65489d8303b7a36e2e79834/mkdocs_material-9.7.0-py3-none-any.whl", hash = "sha256:da2866ea53601125ff5baa8aa06404c6e07af3c5ce3d5de95e3b52b80b442887", size = 9283770, upload-time = "2025-11-11T08:49:06.26Z" }, + { url = "https://files.pythonhosted.org/packages/3e/32/ed071cb721aca8c227718cffcf7bd539620e9799bbf2619e90c757bfd030/mkdocs_material-9.7.1-py3-none-any.whl", hash = "sha256:3f6100937d7d731f87f1e3e3b021c97f7239666b9ba1151ab476cabb96c60d5c", size = 9297166, upload-time = "2025-12-18T09:48:56.664Z" }, ] [[package]] @@ -2505,7 +2505,7 @@ wheels = [ [[package]] name = "nbclient" -version = "0.10.2" +version = "0.10.3" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "jupyter-client" }, @@ -2513,9 +2513,9 @@ dependencies = [ { name = "nbformat" }, { name = "traitlets" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/87/66/7ffd18d58eae90d5721f9f39212327695b749e23ad44b3881744eaf4d9e8/nbclient-0.10.2.tar.gz", hash = "sha256:90b7fc6b810630db87a6d0c2250b1f0ab4cf4d3c27a299b0cde78a4ed3fd9193", size = 62424, upload-time = "2024-12-19T10:32:27.164Z" } +sdist = { url = "https://files.pythonhosted.org/packages/8d/f3/1f6cf2ede4b026bc5f0b424cb41adf22f9c804e90a4dbd4fdb42291a35d5/nbclient-0.10.3.tar.gz", hash = "sha256:0baf171ee246e3bb2391da0635e719f27dc77d99aef59e0b04dcb935ee04c575", size = 62564, upload-time = "2025-12-19T15:50:09.331Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/34/6d/e7fa07f03a4a7b221d94b4d586edb754a9b0dc3c9e2c93353e9fa4e0d117/nbclient-0.10.2-py3-none-any.whl", hash = "sha256:4ffee11e788b4a27fabeb7955547e4318a5298f34342a4bfd01f2e1faaeadc3d", size = 25434, upload-time = "2024-12-19T10:32:24.139Z" }, + { url = "https://files.pythonhosted.org/packages/b2/77/0c73678f5260501a271fd7342bee5d639440f2e9e07d590f1100a056d87c/nbclient-0.10.3-py3-none-any.whl", hash = "sha256:39e9bd403504dd2484dd0fd25235bb6a683ce8cd9873356e40d880696adc9e35", size = 25473, upload-time = "2025-12-19T15:50:07.671Z" }, ] [[package]] @@ -2581,54 +2581,52 @@ wheels = [ [[package]] name = "numpy" -version = "2.3.5" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/76/65/21b3bc86aac7b8f2862db1e808f1ea22b028e30a225a34a5ede9bf8678f2/numpy-2.3.5.tar.gz", hash = "sha256:784db1dcdab56bf0517743e746dfb0f885fc68d948aba86eeec2cba234bdf1c0", size = 20584950, upload-time = "2025-11-16T22:52:42.067Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/db/69/9cde09f36da4b5a505341180a3f2e6fadc352fd4d2b7096ce9778db83f1a/numpy-2.3.5-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:d0f23b44f57077c1ede8c5f26b30f706498b4862d3ff0a7298b8411dd2f043ff", size = 16728251, upload-time = "2025-11-16T22:50:19.013Z" }, - { url = "https://files.pythonhosted.org/packages/79/fb/f505c95ceddd7027347b067689db71ca80bd5ecc926f913f1a23e65cf09b/numpy-2.3.5-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:aa5bc7c5d59d831d9773d1170acac7893ce3a5e130540605770ade83280e7188", size = 12254652, upload-time = "2025-11-16T22:50:21.487Z" }, - { url = "https://files.pythonhosted.org/packages/78/da/8c7738060ca9c31b30e9301ee0cf6c5ffdbf889d9593285a1cead337f9a5/numpy-2.3.5-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:ccc933afd4d20aad3c00bcef049cb40049f7f196e0397f1109dba6fed63267b0", size = 5083172, upload-time = "2025-11-16T22:50:24.562Z" }, - { url = "https://files.pythonhosted.org/packages/a4/b4/ee5bb2537fb9430fd2ef30a616c3672b991a4129bb1c7dcc42aa0abbe5d7/numpy-2.3.5-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:afaffc4393205524af9dfa400fa250143a6c3bc646c08c9f5e25a9f4b4d6a903", size = 6622990, upload-time = "2025-11-16T22:50:26.47Z" }, - { url = "https://files.pythonhosted.org/packages/95/03/dc0723a013c7d7c19de5ef29e932c3081df1c14ba582b8b86b5de9db7f0f/numpy-2.3.5-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9c75442b2209b8470d6d5d8b1c25714270686f14c749028d2199c54e29f20b4d", size = 14248902, upload-time = "2025-11-16T22:50:28.861Z" }, - { url = "https://files.pythonhosted.org/packages/f5/10/ca162f45a102738958dcec8023062dad0cbc17d1ab99d68c4e4a6c45fb2b/numpy-2.3.5-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:11e06aa0af8c0f05104d56450d6093ee639e15f24ecf62d417329d06e522e017", size = 16597430, upload-time = "2025-11-16T22:50:31.56Z" }, - { url = "https://files.pythonhosted.org/packages/2a/51/c1e29be863588db58175175f057286900b4b3327a1351e706d5e0f8dd679/numpy-2.3.5-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:ed89927b86296067b4f81f108a2271d8926467a8868e554eaf370fc27fa3ccaf", size = 16024551, upload-time = "2025-11-16T22:50:34.242Z" }, - { url = "https://files.pythonhosted.org/packages/83/68/8236589d4dbb87253d28259d04d9b814ec0ecce7cb1c7fed29729f4c3a78/numpy-2.3.5-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:51c55fe3451421f3a6ef9a9c1439e82101c57a2c9eab9feb196a62b1a10b58ce", size = 18533275, upload-time = "2025-11-16T22:50:37.651Z" }, - { url = "https://files.pythonhosted.org/packages/40/56/2932d75b6f13465239e3b7b7e511be27f1b8161ca2510854f0b6e521c395/numpy-2.3.5-cp313-cp313-win32.whl", hash = "sha256:1978155dd49972084bd6ef388d66ab70f0c323ddee6f693d539376498720fb7e", size = 6277637, upload-time = "2025-11-16T22:50:40.11Z" }, - { url = "https://files.pythonhosted.org/packages/0c/88/e2eaa6cffb115b85ed7c7c87775cb8bcf0816816bc98ca8dbfa2ee33fe6e/numpy-2.3.5-cp313-cp313-win_amd64.whl", hash = "sha256:00dc4e846108a382c5869e77c6ed514394bdeb3403461d25a829711041217d5b", size = 12779090, upload-time = "2025-11-16T22:50:42.503Z" }, - { url = "https://files.pythonhosted.org/packages/8f/88/3f41e13a44ebd4034ee17baa384acac29ba6a4fcc2aca95f6f08ca0447d1/numpy-2.3.5-cp313-cp313-win_arm64.whl", hash = "sha256:0472f11f6ec23a74a906a00b48a4dcf3849209696dff7c189714511268d103ae", size = 10194710, upload-time = "2025-11-16T22:50:44.971Z" }, - { url = "https://files.pythonhosted.org/packages/13/cb/71744144e13389d577f867f745b7df2d8489463654a918eea2eeb166dfc9/numpy-2.3.5-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:414802f3b97f3c1eef41e530aaba3b3c1620649871d8cb38c6eaff034c2e16bd", size = 16827292, upload-time = "2025-11-16T22:50:47.715Z" }, - { url = "https://files.pythonhosted.org/packages/71/80/ba9dc6f2a4398e7f42b708a7fdc841bb638d353be255655498edbf9a15a8/numpy-2.3.5-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:5ee6609ac3604fa7780e30a03e5e241a7956f8e2fcfe547d51e3afa5247ac47f", size = 12378897, upload-time = "2025-11-16T22:50:51.327Z" }, - { url = "https://files.pythonhosted.org/packages/2e/6d/db2151b9f64264bcceccd51741aa39b50150de9b602d98ecfe7e0c4bff39/numpy-2.3.5-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:86d835afea1eaa143012a2d7a3f45a3adce2d7adc8b4961f0b362214d800846a", size = 5207391, upload-time = "2025-11-16T22:50:54.542Z" }, - { url = "https://files.pythonhosted.org/packages/80/ae/429bacace5ccad48a14c4ae5332f6aa8ab9f69524193511d60ccdfdc65fa/numpy-2.3.5-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:30bc11310e8153ca664b14c5f1b73e94bd0503681fcf136a163de856f3a50139", size = 6721275, upload-time = "2025-11-16T22:50:56.794Z" }, - { url = "https://files.pythonhosted.org/packages/74/5b/1919abf32d8722646a38cd527bc3771eb229a32724ee6ba340ead9b92249/numpy-2.3.5-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1062fde1dcf469571705945b0f221b73928f34a20c904ffb45db101907c3454e", size = 14306855, upload-time = "2025-11-16T22:50:59.208Z" }, - { url = "https://files.pythonhosted.org/packages/a5/87/6831980559434973bebc30cd9c1f21e541a0f2b0c280d43d3afd909b66d0/numpy-2.3.5-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ce581db493ea1a96c0556360ede6607496e8bf9b3a8efa66e06477267bc831e9", size = 16657359, upload-time = "2025-11-16T22:51:01.991Z" }, - { url = "https://files.pythonhosted.org/packages/dd/91/c797f544491ee99fd00495f12ebb7802c440c1915811d72ac5b4479a3356/numpy-2.3.5-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:cc8920d2ec5fa99875b670bb86ddeb21e295cb07aa331810d9e486e0b969d946", size = 16093374, upload-time = "2025-11-16T22:51:05.291Z" }, - { url = "https://files.pythonhosted.org/packages/74/a6/54da03253afcbe7a72785ec4da9c69fb7a17710141ff9ac5fcb2e32dbe64/numpy-2.3.5-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:9ee2197ef8c4f0dfe405d835f3b6a14f5fee7782b5de51ba06fb65fc9b36e9f1", size = 18594587, upload-time = "2025-11-16T22:51:08.585Z" }, - { url = "https://files.pythonhosted.org/packages/80/e9/aff53abbdd41b0ecca94285f325aff42357c6b5abc482a3fcb4994290b18/numpy-2.3.5-cp313-cp313t-win32.whl", hash = "sha256:70b37199913c1bd300ff6e2693316c6f869c7ee16378faf10e4f5e3275b299c3", size = 6405940, upload-time = "2025-11-16T22:51:11.541Z" }, - { url = "https://files.pythonhosted.org/packages/d5/81/50613fec9d4de5480de18d4f8ef59ad7e344d497edbef3cfd80f24f98461/numpy-2.3.5-cp313-cp313t-win_amd64.whl", hash = "sha256:b501b5fa195cc9e24fe102f21ec0a44dffc231d2af79950b451e0d99cea02234", size = 12920341, upload-time = "2025-11-16T22:51:14.312Z" }, - { url = "https://files.pythonhosted.org/packages/bb/ab/08fd63b9a74303947f34f0bd7c5903b9c5532c2d287bead5bdf4c556c486/numpy-2.3.5-cp313-cp313t-win_arm64.whl", hash = "sha256:a80afd79f45f3c4a7d341f13acbe058d1ca8ac017c165d3fa0d3de6bc1a079d7", size = 10262507, upload-time = "2025-11-16T22:51:16.846Z" }, - { url = "https://files.pythonhosted.org/packages/ba/97/1a914559c19e32d6b2e233cf9a6a114e67c856d35b1d6babca571a3e880f/numpy-2.3.5-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:bf06bc2af43fa8d32d30fae16ad965663e966b1a3202ed407b84c989c3221e82", size = 16735706, upload-time = "2025-11-16T22:51:19.558Z" }, - { url = "https://files.pythonhosted.org/packages/57/d4/51233b1c1b13ecd796311216ae417796b88b0616cfd8a33ae4536330748a/numpy-2.3.5-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:052e8c42e0c49d2575621c158934920524f6c5da05a1d3b9bab5d8e259e045f0", size = 12264507, upload-time = "2025-11-16T22:51:22.492Z" }, - { url = "https://files.pythonhosted.org/packages/45/98/2fe46c5c2675b8306d0b4a3ec3494273e93e1226a490f766e84298576956/numpy-2.3.5-cp314-cp314-macosx_14_0_arm64.whl", hash = "sha256:1ed1ec893cff7040a02c8aa1c8611b94d395590d553f6b53629a4461dc7f7b63", size = 5093049, upload-time = "2025-11-16T22:51:25.171Z" }, - { url = "https://files.pythonhosted.org/packages/ce/0e/0698378989bb0ac5f1660c81c78ab1fe5476c1a521ca9ee9d0710ce54099/numpy-2.3.5-cp314-cp314-macosx_14_0_x86_64.whl", hash = "sha256:2dcd0808a421a482a080f89859a18beb0b3d1e905b81e617a188bd80422d62e9", size = 6626603, upload-time = "2025-11-16T22:51:27Z" }, - { url = "https://files.pythonhosted.org/packages/5e/a6/9ca0eecc489640615642a6cbc0ca9e10df70df38c4d43f5a928ff18d8827/numpy-2.3.5-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:727fd05b57df37dc0bcf1a27767a3d9a78cbbc92822445f32cc3436ba797337b", size = 14262696, upload-time = "2025-11-16T22:51:29.402Z" }, - { url = "https://files.pythonhosted.org/packages/c8/f6/07ec185b90ec9d7217a00eeeed7383b73d7e709dae2a9a021b051542a708/numpy-2.3.5-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:fffe29a1ef00883599d1dc2c51aa2e5d80afe49523c261a74933df395c15c520", size = 16597350, upload-time = "2025-11-16T22:51:32.167Z" }, - { url = "https://files.pythonhosted.org/packages/75/37/164071d1dde6a1a84c9b8e5b414fa127981bad47adf3a6b7e23917e52190/numpy-2.3.5-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:8f7f0e05112916223d3f438f293abf0727e1181b5983f413dfa2fefc4098245c", size = 16040190, upload-time = "2025-11-16T22:51:35.403Z" }, - { url = "https://files.pythonhosted.org/packages/08/3c/f18b82a406b04859eb026d204e4e1773eb41c5be58410f41ffa511d114ae/numpy-2.3.5-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:2e2eb32ddb9ccb817d620ac1d8dae7c3f641c1e5f55f531a33e8ab97960a75b8", size = 18536749, upload-time = "2025-11-16T22:51:39.698Z" }, - { url = "https://files.pythonhosted.org/packages/40/79/f82f572bf44cf0023a2fe8588768e23e1592585020d638999f15158609e1/numpy-2.3.5-cp314-cp314-win32.whl", hash = "sha256:66f85ce62c70b843bab1fb14a05d5737741e74e28c7b8b5a064de10142fad248", size = 6335432, upload-time = "2025-11-16T22:51:42.476Z" }, - { url = "https://files.pythonhosted.org/packages/a3/2e/235b4d96619931192c91660805e5e49242389742a7a82c27665021db690c/numpy-2.3.5-cp314-cp314-win_amd64.whl", hash = "sha256:e6a0bc88393d65807d751a614207b7129a310ca4fe76a74e5c7da5fa5671417e", size = 12919388, upload-time = "2025-11-16T22:51:45.275Z" }, - { url = "https://files.pythonhosted.org/packages/07/2b/29fd75ce45d22a39c61aad74f3d718e7ab67ccf839ca8b60866054eb15f8/numpy-2.3.5-cp314-cp314-win_arm64.whl", hash = "sha256:aeffcab3d4b43712bb7a60b65f6044d444e75e563ff6180af8f98dd4b905dfd2", size = 10476651, upload-time = "2025-11-16T22:51:47.749Z" }, - { url = "https://files.pythonhosted.org/packages/17/e1/f6a721234ebd4d87084cfa68d081bcba2f5cfe1974f7de4e0e8b9b2a2ba1/numpy-2.3.5-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:17531366a2e3a9e30762c000f2c43a9aaa05728712e25c11ce1dbe700c53ad41", size = 16834503, upload-time = "2025-11-16T22:51:50.443Z" }, - { url = "https://files.pythonhosted.org/packages/5c/1c/baf7ffdc3af9c356e1c135e57ab7cf8d247931b9554f55c467efe2c69eff/numpy-2.3.5-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:d21644de1b609825ede2f48be98dfde4656aefc713654eeee280e37cadc4e0ad", size = 12381612, upload-time = "2025-11-16T22:51:53.609Z" }, - { url = "https://files.pythonhosted.org/packages/74/91/f7f0295151407ddc9ba34e699013c32c3c91944f9b35fcf9281163dc1468/numpy-2.3.5-cp314-cp314t-macosx_14_0_arm64.whl", hash = "sha256:c804e3a5aba5460c73955c955bdbd5c08c354954e9270a2c1565f62e866bdc39", size = 5210042, upload-time = "2025-11-16T22:51:56.213Z" }, - { url = "https://files.pythonhosted.org/packages/2e/3b/78aebf345104ec50dd50a4d06ddeb46a9ff5261c33bcc58b1c4f12f85ec2/numpy-2.3.5-cp314-cp314t-macosx_14_0_x86_64.whl", hash = "sha256:cc0a57f895b96ec78969c34f682c602bf8da1a0270b09bc65673df2e7638ec20", size = 6724502, upload-time = "2025-11-16T22:51:58.584Z" }, - { url = "https://files.pythonhosted.org/packages/02/c6/7c34b528740512e57ef1b7c8337ab0b4f0bddf34c723b8996c675bc2bc91/numpy-2.3.5-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:900218e456384ea676e24ea6a0417f030a3b07306d29d7ad843957b40a9d8d52", size = 14308962, upload-time = "2025-11-16T22:52:01.698Z" }, - { url = "https://files.pythonhosted.org/packages/80/35/09d433c5262bc32d725bafc619e095b6a6651caf94027a03da624146f655/numpy-2.3.5-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:09a1bea522b25109bf8e6f3027bd810f7c1085c64a0c7ce050c1676ad0ba010b", size = 16655054, upload-time = "2025-11-16T22:52:04.267Z" }, - { url = "https://files.pythonhosted.org/packages/7a/ab/6a7b259703c09a88804fa2430b43d6457b692378f6b74b356155283566ac/numpy-2.3.5-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:04822c00b5fd0323c8166d66c701dc31b7fbd252c100acd708c48f763968d6a3", size = 16091613, upload-time = "2025-11-16T22:52:08.651Z" }, - { url = "https://files.pythonhosted.org/packages/c2/88/330da2071e8771e60d1038166ff9d73f29da37b01ec3eb43cb1427464e10/numpy-2.3.5-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:d6889ec4ec662a1a37eb4b4fb26b6100841804dac55bd9df579e326cdc146227", size = 18591147, upload-time = "2025-11-16T22:52:11.453Z" }, - { url = "https://files.pythonhosted.org/packages/51/41/851c4b4082402d9ea860c3626db5d5df47164a712cb23b54be028b184c1c/numpy-2.3.5-cp314-cp314t-win32.whl", hash = "sha256:93eebbcf1aafdf7e2ddd44c2923e2672e1010bddc014138b229e49725b4d6be5", size = 6479806, upload-time = "2025-11-16T22:52:14.641Z" }, - { url = "https://files.pythonhosted.org/packages/90/30/d48bde1dfd93332fa557cff1972fbc039e055a52021fbef4c2c4b1eefd17/numpy-2.3.5-cp314-cp314t-win_amd64.whl", hash = "sha256:c8a9958e88b65c3b27e22ca2a076311636850b612d6bbfb76e8d156aacde2aaf", size = 13105760, upload-time = "2025-11-16T22:52:17.975Z" }, - { url = "https://files.pythonhosted.org/packages/2d/fd/4b5eb0b3e888d86aee4d198c23acec7d214baaf17ea93c1adec94c9518b9/numpy-2.3.5-cp314-cp314t-win_arm64.whl", hash = "sha256:6203fdf9f3dc5bdaed7319ad8698e685c7a3be10819f41d32a0723e611733b42", size = 10545459, upload-time = "2025-11-16T22:52:20.55Z" }, +version = "2.4.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/a4/7a/6a3d14e205d292b738db449d0de649b373a59edb0d0b4493821d0a3e8718/numpy-2.4.0.tar.gz", hash = "sha256:6e504f7b16118198f138ef31ba24d985b124c2c469fe8467007cf30fd992f934", size = 20685720, upload-time = "2025-12-20T16:18:19.023Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a7/0d/853fd96372eda07c824d24adf02e8bc92bb3731b43a9b2a39161c3667cc4/numpy-2.4.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:a152d86a3ae00ba5f47b3acf3b827509fd0b6cb7d3259665e63dafbad22a75ea", size = 16649088, upload-time = "2025-12-20T16:16:31.421Z" }, + { url = "https://files.pythonhosted.org/packages/e3/37/cc636f1f2a9f585434e20a3e6e63422f70bfe4f7f6698e941db52ea1ac9a/numpy-2.4.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:39b19251dec4de8ff8496cd0806cbe27bf0684f765abb1f4809554de93785f2d", size = 12364065, upload-time = "2025-12-20T16:16:33.491Z" }, + { url = "https://files.pythonhosted.org/packages/ed/69/0b78f37ca3690969beee54103ce5f6021709134e8020767e93ba691a72f1/numpy-2.4.0-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:009bd0ea12d3c784b6639a8457537016ce5172109e585338e11334f6a7bb88ee", size = 5192640, upload-time = "2025-12-20T16:16:35.636Z" }, + { url = "https://files.pythonhosted.org/packages/1d/2a/08569f8252abf590294dbb09a430543ec8f8cc710383abfb3e75cc73aeda/numpy-2.4.0-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:5fe44e277225fd3dff6882d86d3d447205d43532c3627313d17e754fb3905a0e", size = 6541556, upload-time = "2025-12-20T16:16:37.276Z" }, + { url = "https://files.pythonhosted.org/packages/93/e9/a949885a4e177493d61519377952186b6cbfdf1d6002764c664ba28349b5/numpy-2.4.0-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f935c4493eda9069851058fa0d9e39dbf6286be690066509305e52912714dbb2", size = 14396562, upload-time = "2025-12-20T16:16:38.953Z" }, + { url = "https://files.pythonhosted.org/packages/99/98/9d4ad53b0e9ef901c2ef1d550d2136f5ac42d3fd2988390a6def32e23e48/numpy-2.4.0-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8cfa5f29a695cb7438965e6c3e8d06e0416060cf0d709c1b1c1653a939bf5c2a", size = 16351719, upload-time = "2025-12-20T16:16:41.503Z" }, + { url = "https://files.pythonhosted.org/packages/28/de/5f3711a38341d6e8dd619f6353251a0cdd07f3d6d101a8fd46f4ef87f895/numpy-2.4.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:ba0cb30acd3ef11c94dc27fbfba68940652492bc107075e7ffe23057f9425681", size = 16176053, upload-time = "2025-12-20T16:16:44.552Z" }, + { url = "https://files.pythonhosted.org/packages/2a/5b/2a3753dc43916501b4183532e7ace862e13211042bceafa253afb5c71272/numpy-2.4.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:60e8c196cd82cbbd4f130b5290007e13e6de3eca79f0d4d38014769d96a7c475", size = 18277859, upload-time = "2025-12-20T16:16:47.174Z" }, + { url = "https://files.pythonhosted.org/packages/2c/c5/a18bcdd07a941db3076ef489d036ab16d2bfc2eae0cf27e5a26e29189434/numpy-2.4.0-cp313-cp313-win32.whl", hash = "sha256:5f48cb3e88fbc294dc90e215d86fbaf1c852c63dbdb6c3a3e63f45c4b57f7344", size = 5953849, upload-time = "2025-12-20T16:16:49.554Z" }, + { url = "https://files.pythonhosted.org/packages/4f/f1/719010ff8061da6e8a26e1980cf090412d4f5f8060b31f0c45d77dd67a01/numpy-2.4.0-cp313-cp313-win_amd64.whl", hash = "sha256:a899699294f28f7be8992853c0c60741f16ff199205e2e6cdca155762cbaa59d", size = 12302840, upload-time = "2025-12-20T16:16:51.227Z" }, + { url = "https://files.pythonhosted.org/packages/f5/5a/b3d259083ed8b4d335270c76966cb6cf14a5d1b69e1a608994ac57a659e6/numpy-2.4.0-cp313-cp313-win_arm64.whl", hash = "sha256:9198f447e1dc5647d07c9a6bbe2063cc0132728cc7175b39dbc796da5b54920d", size = 10308509, upload-time = "2025-12-20T16:16:53.313Z" }, + { url = "https://files.pythonhosted.org/packages/31/01/95edcffd1bb6c0633df4e808130545c4f07383ab629ac7e316fb44fff677/numpy-2.4.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:74623f2ab5cc3f7c886add4f735d1031a1d2be4a4ae63c0546cfd74e7a31ddf6", size = 12491815, upload-time = "2025-12-20T16:16:55.496Z" }, + { url = "https://files.pythonhosted.org/packages/59/ea/5644b8baa92cc1c7163b4b4458c8679852733fa74ca49c942cfa82ded4e0/numpy-2.4.0-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:0804a8e4ab070d1d35496e65ffd3cf8114c136a2b81f61dfab0de4b218aacfd5", size = 5320321, upload-time = "2025-12-20T16:16:57.468Z" }, + { url = "https://files.pythonhosted.org/packages/26/4e/e10938106d70bc21319bd6a86ae726da37edc802ce35a3a71ecdf1fdfe7f/numpy-2.4.0-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:02a2038eb27f9443a8b266a66911e926566b5a6ffd1a689b588f7f35b81e7dc3", size = 6641635, upload-time = "2025-12-20T16:16:59.379Z" }, + { url = "https://files.pythonhosted.org/packages/b3/8d/a8828e3eaf5c0b4ab116924df82f24ce3416fa38d0674d8f708ddc6c8aac/numpy-2.4.0-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1889b3a3f47a7b5bee16bc25a2145bd7cb91897f815ce3499db64c7458b6d91d", size = 14456053, upload-time = "2025-12-20T16:17:01.768Z" }, + { url = "https://files.pythonhosted.org/packages/68/a1/17d97609d87d4520aa5ae2dcfb32305654550ac6a35effb946d303e594ce/numpy-2.4.0-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:85eef4cb5625c47ee6425c58a3502555e10f45ee973da878ac8248ad58c136f3", size = 16401702, upload-time = "2025-12-20T16:17:04.235Z" }, + { url = "https://files.pythonhosted.org/packages/18/32/0f13c1b2d22bea1118356b8b963195446f3af124ed7a5adfa8fdecb1b6ca/numpy-2.4.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:6dc8b7e2f4eb184b37655195f421836cfae6f58197b67e3ffc501f1333d993fa", size = 16242493, upload-time = "2025-12-20T16:17:06.856Z" }, + { url = "https://files.pythonhosted.org/packages/ae/23/48f21e3d309fbc137c068a1475358cbd3a901b3987dcfc97a029ab3068e2/numpy-2.4.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:44aba2f0cafd287871a495fb3163408b0bd25bbce135c6f621534a07f4f7875c", size = 18324222, upload-time = "2025-12-20T16:17:09.392Z" }, + { url = "https://files.pythonhosted.org/packages/ac/52/41f3d71296a3dcaa4f456aaa3c6fc8e745b43d0552b6bde56571bb4b4a0f/numpy-2.4.0-cp313-cp313t-win32.whl", hash = "sha256:20c115517513831860c573996e395707aa9fb691eb179200125c250e895fcd93", size = 6076216, upload-time = "2025-12-20T16:17:11.437Z" }, + { url = "https://files.pythonhosted.org/packages/35/ff/46fbfe60ab0710d2a2b16995f708750307d30eccbb4c38371ea9e986866e/numpy-2.4.0-cp313-cp313t-win_amd64.whl", hash = "sha256:b48e35f4ab6f6a7597c46e301126ceba4c44cd3280e3750f85db48b082624fa4", size = 12444263, upload-time = "2025-12-20T16:17:13.182Z" }, + { url = "https://files.pythonhosted.org/packages/a3/e3/9189ab319c01d2ed556c932ccf55064c5d75bb5850d1df7a482ce0badead/numpy-2.4.0-cp313-cp313t-win_arm64.whl", hash = "sha256:4d1cfce39e511069b11e67cd0bd78ceff31443b7c9e5c04db73c7a19f572967c", size = 10378265, upload-time = "2025-12-20T16:17:15.211Z" }, + { url = "https://files.pythonhosted.org/packages/ab/ed/52eac27de39d5e5a6c9aadabe672bc06f55e24a3d9010cd1183948055d76/numpy-2.4.0-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:c95eb6db2884917d86cde0b4d4cf31adf485c8ec36bf8696dd66fa70de96f36b", size = 16647476, upload-time = "2025-12-20T16:17:17.671Z" }, + { url = "https://files.pythonhosted.org/packages/77/c0/990ce1b7fcd4e09aeaa574e2a0a839589e4b08b2ca68070f1acb1fea6736/numpy-2.4.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:65167da969cd1ec3a1df31cb221ca3a19a8aaa25370ecb17d428415e93c1935e", size = 12374563, upload-time = "2025-12-20T16:17:20.216Z" }, + { url = "https://files.pythonhosted.org/packages/37/7c/8c5e389c6ae8f5fd2277a988600d79e9625db3fff011a2d87ac80b881a4c/numpy-2.4.0-cp314-cp314-macosx_14_0_arm64.whl", hash = "sha256:3de19cfecd1465d0dcf8a5b5ea8b3155b42ed0b639dba4b71e323d74f2a3be5e", size = 5203107, upload-time = "2025-12-20T16:17:22.47Z" }, + { url = "https://files.pythonhosted.org/packages/e6/94/ca5b3bd6a8a70a5eec9a0b8dd7f980c1eff4b8a54970a9a7fef248ef564f/numpy-2.4.0-cp314-cp314-macosx_14_0_x86_64.whl", hash = "sha256:6c05483c3136ac4c91b4e81903cb53a8707d316f488124d0398499a4f8e8ef51", size = 6538067, upload-time = "2025-12-20T16:17:24.001Z" }, + { url = "https://files.pythonhosted.org/packages/79/43/993eb7bb5be6761dde2b3a3a594d689cec83398e3f58f4758010f3b85727/numpy-2.4.0-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:36667db4d6c1cea79c8930ab72fadfb4060feb4bfe724141cd4bd064d2e5f8ce", size = 14411926, upload-time = "2025-12-20T16:17:25.822Z" }, + { url = "https://files.pythonhosted.org/packages/03/75/d4c43b61de473912496317a854dac54f1efec3eeb158438da6884b70bb90/numpy-2.4.0-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9a818668b674047fd88c4cddada7ab8f1c298812783e8328e956b78dc4807f9f", size = 16354295, upload-time = "2025-12-20T16:17:28.308Z" }, + { url = "https://files.pythonhosted.org/packages/b8/0a/b54615b47ee8736a6461a4bb6749128dd3435c5a759d5663f11f0e9af4ac/numpy-2.4.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:1ee32359fb7543b7b7bd0b2f46294db27e29e7bbdf70541e81b190836cd83ded", size = 16190242, upload-time = "2025-12-20T16:17:30.993Z" }, + { url = "https://files.pythonhosted.org/packages/98/ce/ea207769aacad6246525ec6c6bbd66a2bf56c72443dc10e2f90feed29290/numpy-2.4.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:e493962256a38f58283de033d8af176c5c91c084ea30f15834f7545451c42059", size = 18280875, upload-time = "2025-12-20T16:17:33.327Z" }, + { url = "https://files.pythonhosted.org/packages/17/ef/ec409437aa962ea372ed601c519a2b141701683ff028f894b7466f0ab42b/numpy-2.4.0-cp314-cp314-win32.whl", hash = "sha256:6bbaebf0d11567fa8926215ae731e1d58e6ec28a8a25235b8a47405d301332db", size = 6002530, upload-time = "2025-12-20T16:17:35.729Z" }, + { url = "https://files.pythonhosted.org/packages/5f/4a/5cb94c787a3ed1ac65e1271b968686521169a7b3ec0b6544bb3ca32960b0/numpy-2.4.0-cp314-cp314-win_amd64.whl", hash = "sha256:3d857f55e7fdf7c38ab96c4558c95b97d1c685be6b05c249f5fdafcbd6f9899e", size = 12435890, upload-time = "2025-12-20T16:17:37.599Z" }, + { url = "https://files.pythonhosted.org/packages/48/a0/04b89db963af9de1104975e2544f30de89adbf75b9e75f7dd2599be12c79/numpy-2.4.0-cp314-cp314-win_arm64.whl", hash = "sha256:bb50ce5fb202a26fd5404620e7ef820ad1ab3558b444cb0b55beb7ef66cd2d63", size = 10591892, upload-time = "2025-12-20T16:17:39.649Z" }, + { url = "https://files.pythonhosted.org/packages/53/e5/d74b5ccf6712c06c7a545025a6a71bfa03bdc7e0568b405b0d655232fd92/numpy-2.4.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:355354388cba60f2132df297e2d53053d4063f79077b67b481d21276d61fc4df", size = 12494312, upload-time = "2025-12-20T16:17:41.714Z" }, + { url = "https://files.pythonhosted.org/packages/c2/08/3ca9cc2ddf54dfee7ae9a6479c071092a228c68aef08252aa08dac2af002/numpy-2.4.0-cp314-cp314t-macosx_14_0_arm64.whl", hash = "sha256:1d8f9fde5f6dc1b6fc34df8162f3b3079365468703fee7f31d4e0cc8c63baed9", size = 5322862, upload-time = "2025-12-20T16:17:44.145Z" }, + { url = "https://files.pythonhosted.org/packages/87/74/0bb63a68394c0c1e52670cfff2e309afa41edbe11b3327d9af29e4383f34/numpy-2.4.0-cp314-cp314t-macosx_14_0_x86_64.whl", hash = "sha256:e0434aa22c821f44eeb4c650b81c7fbdd8c0122c6c4b5a576a76d5a35625ecd9", size = 6644986, upload-time = "2025-12-20T16:17:46.203Z" }, + { url = "https://files.pythonhosted.org/packages/06/8f/9264d9bdbcf8236af2823623fe2f3981d740fc3461e2787e231d97c38c28/numpy-2.4.0-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:40483b2f2d3ba7aad426443767ff5632ec3156ef09742b96913787d13c336471", size = 14457958, upload-time = "2025-12-20T16:17:48.017Z" }, + { url = "https://files.pythonhosted.org/packages/8c/d9/f9a69ae564bbc7236a35aa883319364ef5fd41f72aa320cc1cbe66148fe2/numpy-2.4.0-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d9e6a7664ddd9746e20b7325351fe1a8408d0a2bf9c63b5e898290ddc8f09544", size = 16398394, upload-time = "2025-12-20T16:17:50.409Z" }, + { url = "https://files.pythonhosted.org/packages/34/c7/39241501408dde7f885d241a98caba5421061a2c6d2b2197ac5e3aa842d8/numpy-2.4.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:ecb0019d44f4cdb50b676c5d0cb4b1eae8e15d1ed3d3e6639f986fc92b2ec52c", size = 16241044, upload-time = "2025-12-20T16:17:52.661Z" }, + { url = "https://files.pythonhosted.org/packages/7c/95/cae7effd90e065a95e59fe710eeee05d7328ed169776dfdd9f789e032125/numpy-2.4.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:d0ffd9e2e4441c96a9c91ec1783285d80bf835b677853fc2770a89d50c1e48ac", size = 18321772, upload-time = "2025-12-20T16:17:54.947Z" }, + { url = "https://files.pythonhosted.org/packages/96/df/3c6c279accd2bfb968a76298e5b276310bd55d243df4fa8ac5816d79347d/numpy-2.4.0-cp314-cp314t-win32.whl", hash = "sha256:77f0d13fa87036d7553bf81f0e1fe3ce68d14c9976c9851744e4d3e91127e95f", size = 6148320, upload-time = "2025-12-20T16:17:57.249Z" }, + { url = "https://files.pythonhosted.org/packages/92/8d/f23033cce252e7a75cae853d17f582e86534c46404dea1c8ee094a9d6d84/numpy-2.4.0-cp314-cp314t-win_amd64.whl", hash = "sha256:b1f5b45829ac1848893f0ddf5cb326110604d6df96cdc255b0bf9edd154104d4", size = 12623460, upload-time = "2025-12-20T16:17:58.963Z" }, + { url = "https://files.pythonhosted.org/packages/a4/4f/1f8475907d1a7c4ef9020edf7f39ea2422ec896849245f00688e4b268a71/numpy-2.4.0-cp314-cp314t-win_arm64.whl", hash = "sha256:23a3e9d1a6f360267e8fbb38ba5db355a6a7e9be71d7fce7ab3125e88bb646c8", size = 10661799, upload-time = "2025-12-20T16:18:01.078Z" }, ] [[package]] @@ -2646,7 +2644,7 @@ wheels = [ [[package]] name = "openai" -version = "2.9.0" +version = "2.14.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "anyio" }, @@ -2658,9 +2656,9 @@ dependencies = [ { name = "tqdm" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/09/48/516290f38745cc1e72856f50e8afed4a7f9ac396a5a18f39e892ab89dfc2/openai-2.9.0.tar.gz", hash = "sha256:b52ec65727fc8f1eed2fbc86c8eac0998900c7ef63aa2eb5c24b69717c56fa5f", size = 608202, upload-time = "2025-12-04T18:15:09.01Z" } +sdist = { url = "https://files.pythonhosted.org/packages/d8/b1/12fe1c196bea326261718eb037307c1c1fe1dedc2d2d4de777df822e6238/openai-2.14.0.tar.gz", hash = "sha256:419357bedde9402d23bf8f2ee372fca1985a73348debba94bddff06f19459952", size = 626938, upload-time = "2025-12-19T03:28:45.742Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/59/fd/ae2da789cd923dd033c99b8d544071a827c92046b150db01cfa5cea5b3fd/openai-2.9.0-py3-none-any.whl", hash = "sha256:0d168a490fbb45630ad508a6f3022013c155a68fd708069b6a1a01a5e8f0ffad", size = 1030836, upload-time = "2025-12-04T18:15:07.063Z" }, + { url = "https://files.pythonhosted.org/packages/27/4b/7c1a00c2c3fbd004253937f7520f692a9650767aa73894d7a34f0d65d3f4/openai-2.14.0-py3-none-any.whl", hash = "sha256:7ea40aca4ffc4c4a776e77679021b47eec1160e341f42ae086ba949c9dcc9183", size = 1067558, upload-time = "2025-12-19T03:28:43.727Z" }, ] [[package]] @@ -3249,24 +3247,24 @@ crypto = [ [[package]] name = "pymdown-extensions" -version = "10.18" +version = "10.19.1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "markdown" }, { name = "pyyaml" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/d4/95/e4fa281e3f13b3d9c4aaebb21ef44879840325fa418276dd921209a5e9f9/pymdown_extensions-10.18.tar.gz", hash = "sha256:20252abe6367354b24191431617a072ee6be9f68c5afcc74ea5573508a61f9e5", size = 847697, upload-time = "2025-12-07T17:22:12.857Z" } +sdist = { url = "https://files.pythonhosted.org/packages/72/2d/9f30cee56d4d6d222430d401e85b0a6a1ae229819362f5786943d1a8c03b/pymdown_extensions-10.19.1.tar.gz", hash = "sha256:4969c691009a389fb1f9712dd8e7bd70dcc418d15a0faf70acb5117d022f7de8", size = 847839, upload-time = "2025-12-14T17:25:24.42Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/46/a4/aa2bada4a2fd648f40f19affa55d2c01dc7ff5ea9cffd3dfdeb6114951db/pymdown_extensions-10.18-py3-none-any.whl", hash = "sha256:090bca72be43f7d3186374e23c782899dbef9dc153ef24c59dcd3c346f9ffcae", size = 266703, upload-time = "2025-12-07T17:22:11.22Z" }, + { url = "https://files.pythonhosted.org/packages/fb/35/b763e8fbcd51968329b9adc52d188fc97859f85f2ee15fe9f379987d99c5/pymdown_extensions-10.19.1-py3-none-any.whl", hash = "sha256:e8698a66055b1dc0dca2a7f2c9d0ea6f5faa7834a9c432e3535ab96c0c4e509b", size = 266693, upload-time = "2025-12-14T17:25:22.999Z" }, ] [[package]] name = "pyparsing" -version = "3.2.5" +version = "3.3.1" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/f2/a5/181488fc2b9d093e3972d2a472855aae8a03f000592dbfce716a512b3359/pyparsing-3.2.5.tar.gz", hash = "sha256:2df8d5b7b2802ef88e8d016a2eb9c7aeaa923529cd251ed0fe4608275d4105b6", size = 1099274, upload-time = "2025-09-21T04:11:06.277Z" } +sdist = { url = "https://files.pythonhosted.org/packages/33/c1/1d9de9aeaa1b89b0186e5fe23294ff6517fce1bc69149185577cd31016b2/pyparsing-3.3.1.tar.gz", hash = "sha256:47fad0f17ac1e2cad3de3b458570fbc9b03560aa029ed5e16ee5554da9a2251c", size = 1550512, upload-time = "2025-12-23T03:14:04.391Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/10/5e/1aa9a93198c6b64513c9d7752de7422c06402de6600a8767da1524f9570b/pyparsing-3.2.5-py3-none-any.whl", hash = "sha256:e38a4f02064cf41fe6593d328d0512495ad1f3d8a91c4f73fc401b3079a59a5e", size = 113890, upload-time = "2025-09-21T04:11:04.117Z" }, + { url = "https://files.pythonhosted.org/packages/8b/40/2614036cdd416452f5bf98ec037f38a1afb17f327cb8e6b652d4729e0af8/pyparsing-3.3.1-py3-none-any.whl", hash = "sha256:023b5e7e5520ad96642e2c6db4cb683d3970bd640cdf7115049a6e9c3682df82", size = 121793, upload-time = "2025-12-23T03:14:02.103Z" }, ] [[package]] @@ -3408,11 +3406,11 @@ wheels = [ [[package]] name = "python-multipart" -version = "0.0.20" +version = "0.0.21" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/f3/87/f44d7c9f274c7ee665a29b885ec97089ec5dc034c7f3fafa03da9e39a09e/python_multipart-0.0.20.tar.gz", hash = "sha256:8dd0cab45b8e23064ae09147625994d090fa46f5b0d1e13af944c331a7fa9d13", size = 37158, upload-time = "2024-12-16T19:45:46.972Z" } +sdist = { url = "https://files.pythonhosted.org/packages/78/96/804520d0850c7db98e5ccb70282e29208723f0964e88ffd9d0da2f52ea09/python_multipart-0.0.21.tar.gz", hash = "sha256:7137ebd4d3bbf70ea1622998f902b97a29434a9e8dc40eb203bbcf7c2a2cba92", size = 37196, upload-time = "2025-12-17T09:24:22.446Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/45/58/38b5afbc1a800eeea951b9285d3912613f2603bdf897a4ab0f4bd7f405fc/python_multipart-0.0.20-py3-none-any.whl", hash = "sha256:8a62d3a8335e06589fe01f2a3e178cdcc632f3fbe0d492ad9ee0ec35aab1f104", size = 24546, upload-time = "2024-12-16T19:45:44.423Z" }, + { url = "https://files.pythonhosted.org/packages/aa/76/03af049af4dcee5d27442f71b6924f01f3efb5d2bd34f23fcd563f2cc5f5/python_multipart-0.0.21-py3-none-any.whl", hash = "sha256:cf7a6713e01c87aa35387f4774e812c4361150938d20d232800f75ffcf266090", size = 24541, upload-time = "2025-12-17T09:24:21.153Z" }, ] [[package]] @@ -3725,11 +3723,11 @@ wheels = [ [[package]] name = "roman-numerals" -version = "3.1.0" +version = "4.1.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/57/5b/1bcda2c6a8acec5b310dd70f732400827b96f05d815834f0f112b91b3539/roman_numerals-3.1.0.tar.gz", hash = "sha256:384e36fc1e8d4bd361bdb3672841faae7a345b3f708aae9895d074c878332551", size = 9069, upload-time = "2025-03-12T00:41:08.837Z" } +sdist = { url = "https://files.pythonhosted.org/packages/ae/f9/41dc953bbeb056c17d5f7a519f50fdf010bd0553be2d630bc69d1e022703/roman_numerals-4.1.0.tar.gz", hash = "sha256:1af8b147eb1405d5839e78aeb93131690495fe9da5c91856cb33ad55a7f1e5b2", size = 9077, upload-time = "2025-12-17T18:25:34.381Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/82/1d/7356f115a0e5faf8dc59894a3e9fc8b1821ab949163458b0072db0a12a68/roman_numerals-3.1.0-py3-none-any.whl", hash = "sha256:842ae5fd12912d62720c9aad8cab706e8c692556d01a38443e051ee6cc158d90", size = 7709, upload-time = "2025-03-12T00:41:07.626Z" }, + { url = "https://files.pythonhosted.org/packages/04/54/6f679c435d28e0a568d8e8a7c0a93a09010818634c3c3907fc98d8983770/roman_numerals-4.1.0-py3-none-any.whl", hash = "sha256:647ba99caddc2cc1e55a51e4360689115551bf4476d90e8162cf8c345fe233c7", size = 7676, upload-time = "2025-12-17T18:25:33.098Z" }, ] [[package]] @@ -3800,28 +3798,28 @@ wheels = [ [[package]] name = "ruff" -version = "0.14.8" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/ed/d9/f7a0c4b3a2bf2556cd5d99b05372c29980249ef71e8e32669ba77428c82c/ruff-0.14.8.tar.gz", hash = "sha256:774ed0dd87d6ce925e3b8496feb3a00ac564bea52b9feb551ecd17e0a23d1eed", size = 5765385, upload-time = "2025-12-04T15:06:17.669Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/48/b8/9537b52010134b1d2b72870cc3f92d5fb759394094741b09ceccae183fbe/ruff-0.14.8-py3-none-linux_armv6l.whl", hash = "sha256:ec071e9c82eca417f6111fd39f7043acb53cd3fde9b1f95bbed745962e345afb", size = 13441540, upload-time = "2025-12-04T15:06:14.896Z" }, - { url = "https://files.pythonhosted.org/packages/24/00/99031684efb025829713682012b6dd37279b1f695ed1b01725f85fd94b38/ruff-0.14.8-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:8cdb162a7159f4ca36ce980a18c43d8f036966e7f73f866ac8f493b75e0c27e9", size = 13669384, upload-time = "2025-12-04T15:06:51.809Z" }, - { url = "https://files.pythonhosted.org/packages/72/64/3eb5949169fc19c50c04f28ece2c189d3b6edd57e5b533649dae6ca484fe/ruff-0.14.8-py3-none-macosx_11_0_arm64.whl", hash = "sha256:2e2fcbefe91f9fad0916850edf0854530c15bd1926b6b779de47e9ab619ea38f", size = 12806917, upload-time = "2025-12-04T15:06:08.925Z" }, - { url = "https://files.pythonhosted.org/packages/c4/08/5250babb0b1b11910f470370ec0cbc67470231f7cdc033cee57d4976f941/ruff-0.14.8-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a9d70721066a296f45786ec31916dc287b44040f553da21564de0ab4d45a869b", size = 13256112, upload-time = "2025-12-04T15:06:23.498Z" }, - { url = "https://files.pythonhosted.org/packages/78/4c/6c588e97a8e8c2d4b522c31a579e1df2b4d003eddfbe23d1f262b1a431ff/ruff-0.14.8-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:2c87e09b3cd9d126fc67a9ecd3b5b1d3ded2b9c7fce3f16e315346b9d05cfb52", size = 13227559, upload-time = "2025-12-04T15:06:33.432Z" }, - { url = "https://files.pythonhosted.org/packages/23/ce/5f78cea13eda8eceac71b5f6fa6e9223df9b87bb2c1891c166d1f0dce9f1/ruff-0.14.8-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1d62cb310c4fbcb9ee4ac023fe17f984ae1e12b8a4a02e3d21489f9a2a5f730c", size = 13896379, upload-time = "2025-12-04T15:06:02.687Z" }, - { url = "https://files.pythonhosted.org/packages/cf/79/13de4517c4dadce9218a20035b21212a4c180e009507731f0d3b3f5df85a/ruff-0.14.8-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:1af35c2d62633d4da0521178e8a2641c636d2a7153da0bac1b30cfd4ccd91344", size = 15372786, upload-time = "2025-12-04T15:06:29.828Z" }, - { url = "https://files.pythonhosted.org/packages/00/06/33df72b3bb42be8a1c3815fd4fae83fa2945fc725a25d87ba3e42d1cc108/ruff-0.14.8-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:25add4575ffecc53d60eed3f24b1e934493631b48ebbc6ebaf9d8517924aca4b", size = 14990029, upload-time = "2025-12-04T15:06:36.812Z" }, - { url = "https://files.pythonhosted.org/packages/64/61/0f34927bd90925880394de0e081ce1afab66d7b3525336f5771dcf0cb46c/ruff-0.14.8-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4c943d847b7f02f7db4201a0600ea7d244d8a404fbb639b439e987edcf2baf9a", size = 14407037, upload-time = "2025-12-04T15:06:39.979Z" }, - { url = "https://files.pythonhosted.org/packages/96/bc/058fe0aefc0fbf0d19614cb6d1a3e2c048f7dc77ca64957f33b12cfdc5ef/ruff-0.14.8-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cb6e8bf7b4f627548daa1b69283dac5a296bfe9ce856703b03130732e20ddfe2", size = 14102390, upload-time = "2025-12-04T15:06:46.372Z" }, - { url = "https://files.pythonhosted.org/packages/af/a4/e4f77b02b804546f4c17e8b37a524c27012dd6ff05855d2243b49a7d3cb9/ruff-0.14.8-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:7aaf2974f378e6b01d1e257c6948207aec6a9b5ba53fab23d0182efb887a0e4a", size = 14230793, upload-time = "2025-12-04T15:06:20.497Z" }, - { url = "https://files.pythonhosted.org/packages/3f/52/bb8c02373f79552e8d087cedaffad76b8892033d2876c2498a2582f09dcf/ruff-0.14.8-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:e5758ca513c43ad8a4ef13f0f081f80f08008f410790f3611a21a92421ab045b", size = 13160039, upload-time = "2025-12-04T15:06:49.06Z" }, - { url = "https://files.pythonhosted.org/packages/1f/ad/b69d6962e477842e25c0b11622548df746290cc6d76f9e0f4ed7456c2c31/ruff-0.14.8-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:f74f7ba163b6e85a8d81a590363bf71618847e5078d90827749bfda1d88c9cdf", size = 13205158, upload-time = "2025-12-04T15:06:54.574Z" }, - { url = "https://files.pythonhosted.org/packages/06/63/54f23da1315c0b3dfc1bc03fbc34e10378918a20c0b0f086418734e57e74/ruff-0.14.8-py3-none-musllinux_1_2_i686.whl", hash = "sha256:eed28f6fafcc9591994c42254f5a5c5ca40e69a30721d2ab18bb0bb3baac3ab6", size = 13469550, upload-time = "2025-12-04T15:05:59.209Z" }, - { url = "https://files.pythonhosted.org/packages/70/7d/a4d7b1961e4903bc37fffb7ddcfaa7beb250f67d97cfd1ee1d5cddb1ec90/ruff-0.14.8-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:21d48fa744c9d1cb8d71eb0a740c4dd02751a5de9db9a730a8ef75ca34cf138e", size = 14211332, upload-time = "2025-12-04T15:06:06.027Z" }, - { url = "https://files.pythonhosted.org/packages/5d/93/2a5063341fa17054e5c86582136e9895db773e3c2ffb770dde50a09f35f0/ruff-0.14.8-py3-none-win32.whl", hash = "sha256:15f04cb45c051159baebb0f0037f404f1dc2f15a927418f29730f411a79bc4e7", size = 13151890, upload-time = "2025-12-04T15:06:11.668Z" }, - { url = "https://files.pythonhosted.org/packages/02/1c/65c61a0859c0add13a3e1cbb6024b42de587456a43006ca2d4fd3d1618fe/ruff-0.14.8-py3-none-win_amd64.whl", hash = "sha256:9eeb0b24242b5bbff3011409a739929f497f3fb5fe3b5698aba5e77e8c833097", size = 14537826, upload-time = "2025-12-04T15:06:26.409Z" }, - { url = "https://files.pythonhosted.org/packages/6d/63/8b41cea3afd7f58eb64ac9251668ee0073789a3bc9ac6f816c8c6fef986d/ruff-0.14.8-py3-none-win_arm64.whl", hash = "sha256:965a582c93c63fe715fd3e3f8aa37c4b776777203d8e1d8aa3cc0c14424a4b99", size = 13634522, upload-time = "2025-12-04T15:06:43.212Z" }, +version = "0.14.10" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/57/08/52232a877978dd8f9cf2aeddce3e611b40a63287dfca29b6b8da791f5e8d/ruff-0.14.10.tar.gz", hash = "sha256:9a2e830f075d1a42cd28420d7809ace390832a490ed0966fe373ba288e77aaf4", size = 5859763, upload-time = "2025-12-18T19:28:57.98Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/60/01/933704d69f3f05ee16ef11406b78881733c186fe14b6a46b05cfcaf6d3b2/ruff-0.14.10-py3-none-linux_armv6l.whl", hash = "sha256:7a3ce585f2ade3e1f29ec1b92df13e3da262178df8c8bdf876f48fa0e8316c49", size = 13527080, upload-time = "2025-12-18T19:29:25.642Z" }, + { url = "https://files.pythonhosted.org/packages/df/58/a0349197a7dfa603ffb7f5b0470391efa79ddc327c1e29c4851e85b09cc5/ruff-0.14.10-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:674f9be9372907f7257c51f1d4fc902cb7cf014b9980152b802794317941f08f", size = 13797320, upload-time = "2025-12-18T19:29:02.571Z" }, + { url = "https://files.pythonhosted.org/packages/7b/82/36be59f00a6082e38c23536df4e71cdbc6af8d7c707eade97fcad5c98235/ruff-0.14.10-py3-none-macosx_11_0_arm64.whl", hash = "sha256:d85713d522348837ef9df8efca33ccb8bd6fcfc86a2cde3ccb4bc9d28a18003d", size = 12918434, upload-time = "2025-12-18T19:28:51.202Z" }, + { url = "https://files.pythonhosted.org/packages/a6/00/45c62a7f7e34da92a25804f813ebe05c88aa9e0c25e5cb5a7d23dd7450e3/ruff-0.14.10-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6987ebe0501ae4f4308d7d24e2d0fe3d7a98430f5adfd0f1fead050a740a3a77", size = 13371961, upload-time = "2025-12-18T19:29:04.991Z" }, + { url = "https://files.pythonhosted.org/packages/40/31/a5906d60f0405f7e57045a70f2d57084a93ca7425f22e1d66904769d1628/ruff-0.14.10-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:16a01dfb7b9e4eee556fbfd5392806b1b8550c9b4a9f6acd3dbe6812b193c70a", size = 13275629, upload-time = "2025-12-18T19:29:21.381Z" }, + { url = "https://files.pythonhosted.org/packages/3e/60/61c0087df21894cf9d928dc04bcd4fb10e8b2e8dca7b1a276ba2155b2002/ruff-0.14.10-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7165d31a925b7a294465fa81be8c12a0e9b60fb02bf177e79067c867e71f8b1f", size = 14029234, upload-time = "2025-12-18T19:29:00.132Z" }, + { url = "https://files.pythonhosted.org/packages/44/84/77d911bee3b92348b6e5dab5a0c898d87084ea03ac5dc708f46d88407def/ruff-0.14.10-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:c561695675b972effb0c0a45db233f2c816ff3da8dcfbe7dfc7eed625f218935", size = 15449890, upload-time = "2025-12-18T19:28:53.573Z" }, + { url = "https://files.pythonhosted.org/packages/e9/36/480206eaefa24a7ec321582dda580443a8f0671fdbf6b1c80e9c3e93a16a/ruff-0.14.10-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4bb98fcbbc61725968893682fd4df8966a34611239c9fd07a1f6a07e7103d08e", size = 15123172, upload-time = "2025-12-18T19:29:23.453Z" }, + { url = "https://files.pythonhosted.org/packages/5c/38/68e414156015ba80cef5473d57919d27dfb62ec804b96180bafdeaf0e090/ruff-0.14.10-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f24b47993a9d8cb858429e97bdf8544c78029f09b520af615c1d261bf827001d", size = 14460260, upload-time = "2025-12-18T19:29:27.808Z" }, + { url = "https://files.pythonhosted.org/packages/b3/19/9e050c0dca8aba824d67cc0db69fb459c28d8cd3f6855b1405b3f29cc91d/ruff-0.14.10-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:59aabd2e2c4fd614d2862e7939c34a532c04f1084476d6833dddef4afab87e9f", size = 14229978, upload-time = "2025-12-18T19:29:11.32Z" }, + { url = "https://files.pythonhosted.org/packages/51/eb/e8dd1dd6e05b9e695aa9dd420f4577debdd0f87a5ff2fedda33c09e9be8c/ruff-0.14.10-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:213db2b2e44be8625002dbea33bb9c60c66ea2c07c084a00d55732689d697a7f", size = 14338036, upload-time = "2025-12-18T19:29:09.184Z" }, + { url = "https://files.pythonhosted.org/packages/6a/12/f3e3a505db7c19303b70af370d137795fcfec136d670d5de5391e295c134/ruff-0.14.10-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:b914c40ab64865a17a9a5b67911d14df72346a634527240039eb3bd650e5979d", size = 13264051, upload-time = "2025-12-18T19:29:13.431Z" }, + { url = "https://files.pythonhosted.org/packages/08/64/8c3a47eaccfef8ac20e0484e68e0772013eb85802f8a9f7603ca751eb166/ruff-0.14.10-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:1484983559f026788e3a5c07c81ef7d1e97c1c78ed03041a18f75df104c45405", size = 13283998, upload-time = "2025-12-18T19:29:06.994Z" }, + { url = "https://files.pythonhosted.org/packages/12/84/534a5506f4074e5cc0529e5cd96cfc01bb480e460c7edf5af70d2bcae55e/ruff-0.14.10-py3-none-musllinux_1_2_i686.whl", hash = "sha256:c70427132db492d25f982fffc8d6c7535cc2fd2c83fc8888f05caaa248521e60", size = 13601891, upload-time = "2025-12-18T19:28:55.811Z" }, + { url = "https://files.pythonhosted.org/packages/0d/1e/14c916087d8598917dbad9b2921d340f7884824ad6e9c55de948a93b106d/ruff-0.14.10-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:5bcf45b681e9f1ee6445d317ce1fa9d6cba9a6049542d1c3d5b5958986be8830", size = 14336660, upload-time = "2025-12-18T19:29:16.531Z" }, + { url = "https://files.pythonhosted.org/packages/f2/1c/d7b67ab43f30013b47c12b42d1acd354c195351a3f7a1d67f59e54227ede/ruff-0.14.10-py3-none-win32.whl", hash = "sha256:104c49fc7ab73f3f3a758039adea978869a918f31b73280db175b43a2d9b51d6", size = 13196187, upload-time = "2025-12-18T19:29:19.006Z" }, + { url = "https://files.pythonhosted.org/packages/fb/9c/896c862e13886fae2af961bef3e6312db9ebc6adc2b156fe95e615dee8c1/ruff-0.14.10-py3-none-win_amd64.whl", hash = "sha256:466297bd73638c6bdf06485683e812db1c00c7ac96d4ddd0294a338c62fdc154", size = 14661283, upload-time = "2025-12-18T19:29:30.16Z" }, + { url = "https://files.pythonhosted.org/packages/74/31/b0e29d572670dca3674eeee78e418f20bdf97fa8aa9ea71380885e175ca0/ruff-0.14.10-py3-none-win_arm64.whl", hash = "sha256:e51d046cf6dda98a4633b8a8a771451107413b0f07183b2bef03f075599e44e6", size = 13729839, upload-time = "2025-12-18T19:28:48.636Z" }, ] [[package]] @@ -3925,11 +3923,11 @@ wheels = [ [[package]] name = "soupsieve" -version = "2.8" +version = "2.8.1" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/6d/e6/21ccce3262dd4889aa3332e5a119a3491a95e8f60939870a3a035aabac0d/soupsieve-2.8.tar.gz", hash = "sha256:e2dd4a40a628cb5f28f6d4b0db8800b8f581b65bb380b97de22ba5ca8d72572f", size = 103472, upload-time = "2025-08-27T15:39:51.78Z" } +sdist = { url = "https://files.pythonhosted.org/packages/89/23/adf3796d740536d63a6fbda113d07e60c734b6ed5d3058d1e47fc0495e47/soupsieve-2.8.1.tar.gz", hash = "sha256:4cf733bc50fa805f5df4b8ef4740fc0e0fa6218cf3006269afd3f9d6d80fd350", size = 117856, upload-time = "2025-12-18T13:50:34.655Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/14/a0/bb38d3b76b8cae341dad93a2dd83ab7462e6dbcdd84d43f54ee60a8dc167/soupsieve-2.8-py3-none-any.whl", hash = "sha256:0cc76456a30e20f5d7f2e14a98a4ae2ee4e5abdc7c5ea0aafe795f344bc7984c", size = 36679, upload-time = "2025-08-27T15:39:50.179Z" }, + { url = "https://files.pythonhosted.org/packages/48/f3/b67d6ea49ca9154453b6d70b34ea22f3996b9fa55da105a79d8732227adc/soupsieve-2.8.1-py3-none-any.whl", hash = "sha256:a11fe2a6f3d76ab3cf2de04eb339c1be5b506a8a47f2ceb6d139803177f85434", size = 36710, upload-time = "2025-12-18T13:50:33.267Z" }, ] [[package]] @@ -4085,14 +4083,15 @@ wheels = [ [[package]] name = "sse-starlette" -version = "3.0.3" +version = "3.0.4" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "anyio" }, + { name = "starlette" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/db/3c/fa6517610dc641262b77cc7bf994ecd17465812c1b0585fe33e11be758ab/sse_starlette-3.0.3.tar.gz", hash = "sha256:88cfb08747e16200ea990c8ca876b03910a23b547ab3bd764c0d8eb81019b971", size = 21943, upload-time = "2025-10-30T18:44:20.117Z" } +sdist = { url = "https://files.pythonhosted.org/packages/17/8b/54651ad49bce99a50fd61a7f19c2b6a79fbb072e693101fbb1194c362054/sse_starlette-3.0.4.tar.gz", hash = "sha256:5e34286862e96ead0eb70f5ddd0bd21ab1f6473a8f44419dd267f431611383dd", size = 22576, upload-time = "2025-12-14T16:22:52.493Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/23/a0/984525d19ca5c8a6c33911a0c164b11490dd0f90ff7fd689f704f84e9a11/sse_starlette-3.0.3-py3-none-any.whl", hash = "sha256:af5bf5a6f3933df1d9c7f8539633dc8444ca6a97ab2e2a7cd3b6e431ac03a431", size = 11765, upload-time = "2025-10-30T18:44:18.834Z" }, + { url = "https://files.pythonhosted.org/packages/71/22/8ab1066358601163e1ac732837adba3672f703818f693e179b24e0d3b65c/sse_starlette-3.0.4-py3-none-any.whl", hash = "sha256:32c80ef0d04506ced4b0b6ab8fe300925edc37d26f666afb1874c754895f5dc3", size = 11764, upload-time = "2025-12-14T16:22:51.453Z" }, ] [[package]] @@ -4222,21 +4221,21 @@ wheels = [ [[package]] name = "tornado" -version = "6.5.3" +version = "6.5.4" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/7f/2e/3d22d478f27cb4b41edd4db7f10cd7846d0a28ea443342de3dba97035166/tornado-6.5.3.tar.gz", hash = "sha256:16abdeb0211796ffc73765bc0a20119712d68afeeaf93d1a3f2edf6b3aee8d5a", size = 513348, upload-time = "2025-12-11T04:16:42.225Z" } +sdist = { url = "https://files.pythonhosted.org/packages/37/1d/0a336abf618272d53f62ebe274f712e213f5a03c0b2339575430b8362ef2/tornado-6.5.4.tar.gz", hash = "sha256:a22fa9047405d03260b483980635f0b041989d8bcc9a313f8fe18b411d84b1d7", size = 513632, upload-time = "2025-12-15T19:21:03.836Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/d3/e9/bf22f66e1d5d112c0617974b5ce86666683b32c09b355dfcd59f8d5c8ef6/tornado-6.5.3-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:2dd7d7e8d3e4635447a8afd4987951e3d4e8d1fb9ad1908c54c4002aabab0520", size = 443860, upload-time = "2025-12-11T04:16:26.638Z" }, - { url = "https://files.pythonhosted.org/packages/ca/9c/594b631f0b8dc5977080c7093d1e96f1377c10552577d2c31bb0208c9362/tornado-6.5.3-cp39-abi3-macosx_10_9_x86_64.whl", hash = "sha256:5977a396f83496657779f59a48c38096ef01edfe4f42f1c0634b791dde8165d0", size = 442118, upload-time = "2025-12-11T04:16:28.32Z" }, - { url = "https://files.pythonhosted.org/packages/78/f6/685b869f5b5b9d9547571be838c6106172082751696355b60fc32a4988ed/tornado-6.5.3-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f72ac800be2ac73ddc1504f7aa21069a4137e8d70c387172c063d363d04f2208", size = 445700, upload-time = "2025-12-11T04:16:29.64Z" }, - { url = "https://files.pythonhosted.org/packages/91/4c/f0d19edf24912b7f21ae5e941f7798d132ad4d9b71441c1e70917a297265/tornado-6.5.3-cp39-abi3-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c43c4fc4f5419c6561cfb8b884a8f6db7b142787d47821e1a0e1296253458265", size = 445041, upload-time = "2025-12-11T04:16:30.799Z" }, - { url = "https://files.pythonhosted.org/packages/eb/2b/e02da94f4a4aef2bb3b923c838ef284a77548a5f06bac2a8682b36b4eead/tornado-6.5.3-cp39-abi3-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:de8b3fed4b3afb65d542d7702ac8767b567e240f6a43020be8eaef59328f117b", size = 445270, upload-time = "2025-12-11T04:16:32.316Z" }, - { url = "https://files.pythonhosted.org/packages/58/e2/7a7535d23133443552719dba526dacbb7415f980157da9f14950ddb88ad6/tornado-6.5.3-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:dbc4b4c32245b952566e17a20d5c1648fbed0e16aec3fc7e19f3974b36e0e47c", size = 445957, upload-time = "2025-12-11T04:16:33.913Z" }, - { url = "https://files.pythonhosted.org/packages/a0/1f/9ff92eca81ff17a86286ec440dcd5eab0400326eb81761aa9a4eecb1ffb9/tornado-6.5.3-cp39-abi3-musllinux_1_2_i686.whl", hash = "sha256:db238e8a174b4bfd0d0238b8cfcff1c14aebb4e2fcdafbf0ea5da3b81caceb4c", size = 445371, upload-time = "2025-12-11T04:16:35.093Z" }, - { url = "https://files.pythonhosted.org/packages/70/b1/1d03ae4526a393b0b839472a844397337f03c7f3a1e6b5c82241f0e18281/tornado-6.5.3-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:892595c100cd9b53a768cbfc109dfc55dec884afe2de5290611a566078d9692d", size = 445348, upload-time = "2025-12-11T04:16:36.679Z" }, - { url = "https://files.pythonhosted.org/packages/4b/7d/7c181feadc8941f418d0d26c3790ee34ffa4bd0a294bc5201d44ebd19c1e/tornado-6.5.3-cp39-abi3-win32.whl", hash = "sha256:88141456525fe291e47bbe1ba3ffb7982549329f09b4299a56813923af2bd197", size = 446433, upload-time = "2025-12-11T04:16:38.332Z" }, - { url = "https://files.pythonhosted.org/packages/34/98/4f7f938606e21d0baea8c6c39a7c8e95bdf8e50b0595b1bb6f0de2af7a6e/tornado-6.5.3-cp39-abi3-win_amd64.whl", hash = "sha256:ba4b513d221cc7f795a532c1e296f36bcf6a60e54b15efd3f092889458c69af1", size = 446842, upload-time = "2025-12-11T04:16:39.867Z" }, - { url = "https://files.pythonhosted.org/packages/7a/27/0e3fca4c4edf33fb6ee079e784c63961cd816971a45e5e4cacebe794158d/tornado-6.5.3-cp39-abi3-win_arm64.whl", hash = "sha256:278c54d262911365075dd45e0b6314308c74badd6ff9a54490e7daccdd5ed0ea", size = 445863, upload-time = "2025-12-11T04:16:41.099Z" }, + { url = "https://files.pythonhosted.org/packages/ab/a9/e94a9d5224107d7ce3cc1fab8d5dc97f5ea351ccc6322ee4fb661da94e35/tornado-6.5.4-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:d6241c1a16b1c9e4cc28148b1cda97dd1c6cb4fb7068ac1bedc610768dff0ba9", size = 443909, upload-time = "2025-12-15T19:20:48.382Z" }, + { url = "https://files.pythonhosted.org/packages/db/7e/f7b8d8c4453f305a51f80dbb49014257bb7d28ccb4bbb8dd328ea995ecad/tornado-6.5.4-cp39-abi3-macosx_10_9_x86_64.whl", hash = "sha256:2d50f63dda1d2cac3ae1fa23d254e16b5e38153758470e9956cbc3d813d40843", size = 442163, upload-time = "2025-12-15T19:20:49.791Z" }, + { url = "https://files.pythonhosted.org/packages/ba/b5/206f82d51e1bfa940ba366a8d2f83904b15942c45a78dd978b599870ab44/tornado-6.5.4-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d1cf66105dc6acb5af613c054955b8137e34a03698aa53272dbda4afe252be17", size = 445746, upload-time = "2025-12-15T19:20:51.491Z" }, + { url = "https://files.pythonhosted.org/packages/8e/9d/1a3338e0bd30ada6ad4356c13a0a6c35fbc859063fa7eddb309183364ac1/tornado-6.5.4-cp39-abi3-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:50ff0a58b0dc97939d29da29cd624da010e7f804746621c78d14b80238669335", size = 445083, upload-time = "2025-12-15T19:20:52.778Z" }, + { url = "https://files.pythonhosted.org/packages/50/d4/e51d52047e7eb9a582da59f32125d17c0482d065afd5d3bc435ff2120dc5/tornado-6.5.4-cp39-abi3-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e5fb5e04efa54cf0baabdd10061eb4148e0be137166146fff835745f59ab9f7f", size = 445315, upload-time = "2025-12-15T19:20:53.996Z" }, + { url = "https://files.pythonhosted.org/packages/27/07/2273972f69ca63dbc139694a3fc4684edec3ea3f9efabf77ed32483b875c/tornado-6.5.4-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:9c86b1643b33a4cd415f8d0fe53045f913bf07b4a3ef646b735a6a86047dda84", size = 446003, upload-time = "2025-12-15T19:20:56.101Z" }, + { url = "https://files.pythonhosted.org/packages/d1/83/41c52e47502bf7260044413b6770d1a48dda2f0246f95ee1384a3cd9c44a/tornado-6.5.4-cp39-abi3-musllinux_1_2_i686.whl", hash = "sha256:6eb82872335a53dd063a4f10917b3efd28270b56a33db69009606a0312660a6f", size = 445412, upload-time = "2025-12-15T19:20:57.398Z" }, + { url = "https://files.pythonhosted.org/packages/10/c7/bc96917f06cbee182d44735d4ecde9c432e25b84f4c2086143013e7b9e52/tornado-6.5.4-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:6076d5dda368c9328ff41ab5d9dd3608e695e8225d1cd0fd1e006f05da3635a8", size = 445392, upload-time = "2025-12-15T19:20:58.692Z" }, + { url = "https://files.pythonhosted.org/packages/0c/1a/d7592328d037d36f2d2462f4bc1fbb383eec9278bc786c1b111cbbd44cfa/tornado-6.5.4-cp39-abi3-win32.whl", hash = "sha256:1768110f2411d5cd281bac0a090f707223ce77fd110424361092859e089b38d1", size = 446481, upload-time = "2025-12-15T19:21:00.008Z" }, + { url = "https://files.pythonhosted.org/packages/d6/6d/c69be695a0a64fd37a97db12355a035a6d90f79067a3cf936ec2b1dc38cd/tornado-6.5.4-cp39-abi3-win_amd64.whl", hash = "sha256:fa07d31e0cd85c60713f2b995da613588aa03e1303d75705dca6af8babc18ddc", size = 446886, upload-time = "2025-12-15T19:21:01.287Z" }, + { url = "https://files.pythonhosted.org/packages/50/49/8dc3fd90902f70084bd2cd059d576ddb4f8bb44c2c7c0e33a11422acb17e/tornado-6.5.4-cp39-abi3-win_arm64.whl", hash = "sha256:053e6e16701eb6cbe641f308f4c1a9541f91b6261991160391bfc342e8a551a1", size = 445910, upload-time = "2025-12-15T19:21:02.571Z" }, ] [[package]] @@ -4262,15 +4261,15 @@ wheels = [ [[package]] name = "typer-slim" -version = "0.20.0" +version = "0.20.1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "click" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/8e/45/81b94a52caed434b94da65729c03ad0fb7665fab0f7db9ee54c94e541403/typer_slim-0.20.0.tar.gz", hash = "sha256:9fc6607b3c6c20f5c33ea9590cbeb17848667c51feee27d9e314a579ab07d1a3", size = 106561, upload-time = "2025-10-20T17:03:46.642Z" } +sdist = { url = "https://files.pythonhosted.org/packages/3f/3d/6a4ec47010e8de34dade20c8e7bce90502b173f62a6b41619523a3fcf562/typer_slim-0.20.1.tar.gz", hash = "sha256:bb9e4f7e6dc31551c8a201383df322b81b0ce37239a5ead302598a2ebb6f7c9c", size = 106113, upload-time = "2025-12-19T16:48:54.206Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/5e/dd/5cbf31f402f1cc0ab087c94d4669cfa55bd1e818688b910631e131d74e75/typer_slim-0.20.0-py3-none-any.whl", hash = "sha256:f42a9b7571a12b97dddf364745d29f12221865acef7a2680065f9bb29c7dc89d", size = 47087, upload-time = "2025-10-20T17:03:44.546Z" }, + { url = "https://files.pythonhosted.org/packages/d8/f9/a273c8b57c69ac1b90509ebda204972265fdc978fbbecc25980786f8c038/typer_slim-0.20.1-py3-none-any.whl", hash = "sha256:8e89c5dbaffe87a4f86f4c7a9e2f7059b5b68c66f558f298969d42ce34f10122", size = 47440, upload-time = "2025-12-19T16:48:52.678Z" }, ] [[package]] @@ -4309,11 +4308,11 @@ wheels = [ [[package]] name = "tzdata" -version = "2025.2" +version = "2025.3" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/95/32/1a225d6164441be760d75c2c42e2780dc0873fe382da3e98a2e1e48361e5/tzdata-2025.2.tar.gz", hash = "sha256:b60a638fcc0daffadf82fe0f57e53d06bdec2f36c4df66280ae79bce6bd6f2b9", size = 196380, upload-time = "2025-03-23T13:54:43.652Z" } +sdist = { url = "https://files.pythonhosted.org/packages/5e/a7/c202b344c5ca7daf398f3b8a477eeb205cf3b6f32e7ec3a6bac0629ca975/tzdata-2025.3.tar.gz", hash = "sha256:de39c2ca5dc7b0344f2eba86f49d614019d29f060fc4ebc8a417896a620b56a7", size = 196772, upload-time = "2025-12-13T17:45:35.667Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/5c/23/c7abc0ca0a1526a0774eca151daeb8de62ec457e77262b66b359c3c7679e/tzdata-2025.2-py2.py3-none-any.whl", hash = "sha256:1a403fada01ff9221ca8044d701868fa132215d84beb92242d9acd2147f667a8", size = 347839, upload-time = "2025-03-23T13:54:41.845Z" }, + { url = "https://files.pythonhosted.org/packages/c7/b0/003792df09decd6849a5e39c28b513c06e84436a54440380862b5aeff25d/tzdata-2025.3-py2.py3-none-any.whl", hash = "sha256:06a47e5700f3081aab02b2e513160914ff0694bce9947d6b76ebd6bf57cfc5d1", size = 348521, upload-time = "2025-12-13T17:45:33.889Z" }, ] [[package]] @@ -4358,15 +4357,15 @@ wheels = [ [[package]] name = "uvicorn" -version = "0.38.0" +version = "0.40.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "click" }, { name = "h11" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/cb/ce/f06b84e2697fef4688ca63bdb2fdf113ca0a3be33f94488f2cadb690b0cf/uvicorn-0.38.0.tar.gz", hash = "sha256:fd97093bdd120a2609fc0d3afe931d4d4ad688b6e75f0f929fde1bc36fe0e91d", size = 80605, upload-time = "2025-10-18T13:46:44.63Z" } +sdist = { url = "https://files.pythonhosted.org/packages/c3/d1/8f3c683c9561a4e6689dd3b1d345c815f10f86acd044ee1fb9a4dcd0b8c5/uvicorn-0.40.0.tar.gz", hash = "sha256:839676675e87e73694518b5574fd0f24c9d97b46bea16df7b8c05ea1a51071ea", size = 81761, upload-time = "2025-12-21T14:16:22.45Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/ee/d9/d88e73ca598f4f6ff671fb5fde8a32925c2e08a637303a1d12883c7305fa/uvicorn-0.38.0-py3-none-any.whl", hash = "sha256:48c0afd214ceb59340075b4a052ea1ee91c16fbc2a9b1469cca0e54566977b02", size = 68109, upload-time = "2025-10-18T13:46:42.958Z" }, + { url = "https://files.pythonhosted.org/packages/3d/d8/2083a1daa7439a66f3a48589a57d576aa117726762618f6bb09fe3798796/uvicorn-0.40.0-py3-none-any.whl", hash = "sha256:c6c8f55bc8bf13eb6fa9ff87ad62308bbbc33d0b67f84293151efe87e0d5f2ee", size = 68502, upload-time = "2025-12-21T14:16:21.041Z" }, ] [[package]]