-
Notifications
You must be signed in to change notification settings - Fork 4
refactor(dataset-store): extract amp-datasets-registry crate
#1563
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
4e51736 to
56d88ee
Compare
LNSD
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM ✅
Please, check my comments 🙂
| let dataset_store = { | ||
| let provider_configs_store = ProviderConfigsStore::new( | ||
| amp_object_store::new_with_prefix( | ||
| &config.providers_store_url, | ||
| config.providers_store_url.path(), | ||
| ) | ||
| .map_err(Error::ProvidersStoreCreation)?, | ||
| ); | ||
| let dataset_manifests_store = DatasetManifestsStore::new( | ||
| amp_object_store::new_with_prefix( | ||
| &config.manifests_store_url, | ||
| config.manifests_store_url.path(), | ||
| ) | ||
| .map_err(Error::ManifestsStoreCreation)?, | ||
| ); | ||
| DatasetStore::new( | ||
| metadata_db.clone(), | ||
| provider_configs_store, | ||
| dataset_manifests_store, | ||
| let provider_configs_store = ProviderConfigsStore::new( | ||
| amp_object_store::new_with_prefix( | ||
| &config.providers_store_url, | ||
| config.providers_store_url.path(), | ||
| ) | ||
| }; | ||
| .map_err(Error::ProvidersStoreCreation)?, | ||
| ); | ||
| let dataset_manifests_store = DatasetManifestsStore::new( | ||
| amp_object_store::new_with_prefix( | ||
| &config.manifests_store_url, | ||
| config.manifests_store_url.path(), | ||
| ) | ||
| .map_err(Error::ManifestsStoreCreation)?, | ||
| ); | ||
| let datasets_registry = DatasetsRegistry::new(metadata_db.clone(), dataset_manifests_store); | ||
| let dataset_store = DatasetStore::new(datasets_registry.clone(), provider_configs_store); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can you respect the let = {} blocks? That makes easier to read what are these many lines for
| let dataset_store = { | ||
| let provider_configs_store = ProviderConfigsStore::new( | ||
| amp_object_store::new_with_prefix( | ||
| &config.providers_store_url, | ||
| config.providers_store_url.path(), | ||
| ) | ||
| .map_err(Error::ProvidersStoreCreation)?, | ||
| ); | ||
| let dataset_manifests_store = DatasetManifestsStore::new( | ||
| amp_object_store::new_with_prefix( | ||
| &config.manifests_store_url, | ||
| config.manifests_store_url.path(), | ||
| ) | ||
| .map_err(Error::ManifestsStoreCreation)?, | ||
| ); | ||
| DatasetStore::new( | ||
| metadata_db.clone(), | ||
| provider_configs_store, | ||
| dataset_manifests_store, | ||
| let provider_configs_store = ProviderConfigsStore::new( | ||
| amp_object_store::new_with_prefix( | ||
| &config.providers_store_url, | ||
| config.providers_store_url.path(), | ||
| ) | ||
| }; | ||
| .map_err(Error::ProvidersStoreCreation)?, | ||
| ); | ||
| let dataset_manifests_store = DatasetManifestsStore::new( | ||
| amp_object_store::new_with_prefix( | ||
| &config.manifests_store_url, | ||
| config.manifests_store_url.path(), | ||
| ) | ||
| .map_err(Error::ManifestsStoreCreation)?, | ||
| ); | ||
| let datasets_registry = DatasetsRegistry::new(metadata_db.clone(), dataset_manifests_store); | ||
| let dataset_store = DatasetStore::new(datasets_registry.clone(), provider_configs_store); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Idem
| let manifest = manifest_content | ||
| .try_into_manifest::<datasets_common::manifest::Manifest>() |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Nice!
| let manifest_json: JsonValue = manifest_content | ||
| .try_into_manifest() | ||
| .try_into_manifest::<JsonValue>() | ||
| .map_err(Error::ParseManifest)?; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You should remove the type annotation just before the = sign.
| ## Usage | ||
|
|
||
| ### Creating a Registry | ||
|
|
||
| ```rust | ||
| use amp_datasets_registry::{DatasetsRegistry, manifests::DatasetManifestsStore}; | ||
|
|
||
| let manifests_store = DatasetManifestsStore::new(object_store); | ||
| let registry = DatasetsRegistry::new(metadata_db, manifests_store); | ||
| ``` | ||
|
|
||
| ### Registering a Manifest | ||
|
|
||
| ```rust | ||
| // Store manifest without linking to any dataset | ||
| let hash = Hash::from_str("abc123...")?; | ||
| registry.register_manifest(&hash, manifest_json).await?; | ||
| ``` | ||
|
|
||
| ### Linking and Versioning | ||
|
|
||
| ```rust | ||
| // Link manifest to dataset (also sets "dev" tag) | ||
| registry.link_manifest(&namespace, &name, &hash).await?; | ||
|
|
||
| // Set a semantic version (also updates "latest" if higher) | ||
| let version = Version::new(1, 0, 0); | ||
| registry.set_dataset_version_tag(&namespace, &name, &version, &hash).await?; | ||
| ``` | ||
|
|
||
| ### Resolving Revisions | ||
|
|
||
| ```rust | ||
| // Resolve "latest" tag | ||
| let hash = registry.resolve_latest_version_hash(&namespace, &name).await?; | ||
|
|
||
| // Resolve any revision reference | ||
| let hash_ref = registry.resolve_revision(&reference).await?; | ||
| ``` | ||
|
|
||
| ### Garbage Collection | ||
|
|
||
| ```rust | ||
| // Find orphaned manifests (not linked to any dataset) | ||
| let orphaned = registry.list_orphaned_manifests().await?; | ||
|
|
||
| // Delete orphaned manifests | ||
| for hash in orphaned { | ||
| registry.delete_manifest(&hash).await?; | ||
| } | ||
| ``` | ||
|
|
||
| ## API Reference | ||
|
|
||
| ### Manifest Management | ||
|
|
||
| | Method | Description | | ||
| |--------|-------------| | ||
| | `register_manifest(hash, content)` | Store manifest in object store and database | | ||
| | `get_manifest(hash)` | Retrieve manifest content by hash | | ||
| | `delete_manifest(hash)` | Delete unlinked manifest from both stores | | ||
| | `list_all_manifests()` | List all manifests with usage counts | | ||
| | `list_orphaned_manifests()` | List manifests with no dataset links | | ||
|
|
||
| ### Version Tags | ||
|
|
||
| | Method | Description | | ||
| |--------|-------------| | ||
| | `set_dataset_version_tag(ns, name, version, hash)` | Set semantic version, auto-update "latest" | | ||
| | `delete_dataset_version_tag(ns, name, version)` | Remove a version tag | | ||
| | `list_dataset_version_tags(ns, name)` | List all versions for a dataset | | ||
|
|
||
| ### Revision Resolution | ||
|
|
||
| | Method | Description | | ||
| |--------|-------------| | ||
| | `resolve_revision(reference)` | Resolve any revision to hash reference | | ||
| | `resolve_latest_version_hash(ns, name)` | Get hash for "latest" tag | | ||
| | `resolve_dev_version_hash(ns, name)` | Get hash for "dev" tag | | ||
| | `resolve_version_hash(ns, name, version)` | Get hash for specific version | | ||
|
|
||
| ### Dataset Links | ||
|
|
||
| | Method | Description | | ||
| |--------|-------------| | ||
| | `link_manifest(ns, name, hash)` | Link manifest to dataset, set "dev" tag | | ||
| | `unlink_dataset_manifests(ns, name)` | Remove all links for a dataset | | ||
| | `is_manifest_linked(ns, name, hash)` | Check if manifest is linked | | ||
| | `list_manifest_linked_datasets(hash)` | List datasets using a manifest | |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
In the "component feature docs" like this, it makes no sense to duplicate the info already present in the code and its Rustdocs.
I think you can drom this portion of the docs.
| let dataset_store = { | ||
| let provider_configs_store = ProviderConfigsStore::new( | ||
| amp_object_store::new_with_prefix( | ||
| &config.providers_store_url, | ||
| config.providers_store_url.path(), | ||
| ) | ||
| .expect("Failed to create provider configs store"), | ||
| ); | ||
| let dataset_manifests_store = DatasetManifestsStore::new( | ||
| amp_object_store::new_with_prefix( | ||
| &config.manifests_store_url, | ||
| config.manifests_store_url.path(), | ||
| ) | ||
| .expect("Failed to create manifests store"), | ||
| ); | ||
| DatasetStore::new( | ||
| sysdb.conn_pool().clone(), | ||
| provider_configs_store, | ||
| dataset_manifests_store, | ||
| let provider_configs_store = ProviderConfigsStore::new( | ||
| amp_object_store::new_with_prefix( | ||
| &config.providers_store_url, | ||
| config.providers_store_url.path(), | ||
| ) | ||
| }; | ||
| .expect("Failed to create provider configs store"), | ||
| ); | ||
| let dataset_manifests_store = DatasetManifestsStore::new( | ||
| amp_object_store::new_with_prefix( | ||
| &config.manifests_store_url, | ||
| config.manifests_store_url.path(), | ||
| ) | ||
| .expect("Failed to create manifests store"), | ||
| ); | ||
| let datasets_registry = | ||
| DatasetsRegistry::new(sysdb.conn_pool().clone(), dataset_manifests_store); | ||
| let dataset_store = | ||
| DatasetStore::new(datasets_registry.clone(), provider_configs_store); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Idem. let (dataset_store, datasets_registry) = {...}.
| --- | ||
| name: "dataset-registry" | ||
| description: "DatasetsRegistry for manifest storage, version tags (latest/dev/semantic), revision resolution. Load when working with dataset versioning or manifest management" | ||
| components: "crate:datasets-registry,crate:metadata-db" |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can you add the crate:amp-object-store to the list? Also rename crate:amp-datasets-registry
amp-datasets-registrycrate fromamp-dataset-storeadmin-apito useDatasetsRegistrydirectly for registry operationsCloses #1557