diff --git a/docker/entity-api/Dockerfile b/docker/entity-api/Dockerfile index 9ab08a92..ddd373dd 100644 --- a/docker/entity-api/Dockerfile +++ b/docker/entity-api/Dockerfile @@ -37,27 +37,27 @@ EOF # 8 - Make the start script executable # 9 - Clean the dnf/yum cache and other locations to reduce Docker Image layer size. # Assume the base image has upgraded dnf and installed its dnf-plugins-core - RUN dnf install --assumeyes nginx && \ - # Push aside nginx default.conf files that may exist on the system - [ ! -f /etc/nginx/conf.d/default.conf ] || mv /etc/nginx/conf.d/default.conf /tmp/etc_nginx_conf.d_default.conf.ORIGINAL && \ - [ ! -f /etc/nginx/nginx.conf ] || mv /etc/nginx/nginx.conf /tmp/etc_nginx_nginx.conf.ORIGINAL && \ - # Install the nginx default.conf file just installed in WORKDIR - mv nginx/nginx.conf /etc/nginx/nginx.conf && \ - # Clean up the nginx install directory in WORKDIR - [ ! -d nginx ] || mv nginx /tmp/nginx_from_WORKDIR && \ - # Push aside the verification file from the base image which will - # no longer report correctly once uWSGI is started for the service. - [ ! -f /tmp/verify_uwsgi.sh ] || mv /tmp/verify_uwsgi.sh /tmp/verify_uwsgi.sh.ORIGINAL && \ - # Install the requirements.txt file for the service - pip3.13 install --no-cache-dir --upgrade pip -r src/requirements.txt && \ - # Make the script referenced in the CMD directive below executable. - chmod a+x start.sh && \ - # Clean up artifacts to slim down this layer of the Docker Image - dnf clean all && \ - rm -rf /var/cache/dnf \ - /var/log/dnf \ - /var/log/yum \ - /root/.cache +RUN dnf install --assumeyes nginx && \ + # Push aside nginx default.conf files that may exist on the system + [ ! -f /etc/nginx/conf.d/default.conf ] || mv /etc/nginx/conf.d/default.conf /tmp/etc_nginx_conf.d_default.conf.ORIGINAL && \ + [ ! -f /etc/nginx/nginx.conf ] || mv /etc/nginx/nginx.conf /tmp/etc_nginx_nginx.conf.ORIGINAL && \ + # Install the nginx default.conf file just installed in WORKDIR + mv nginx/nginx.conf /etc/nginx/nginx.conf && \ + # Clean up the nginx install directory in WORKDIR + [ ! -d nginx ] || mv nginx /tmp/nginx_from_WORKDIR && \ + # Push aside the verification file from the base image which will + # no longer report correctly once uWSGI is started for the service. + [ ! -f /tmp/verify_uwsgi.sh ] || mv /tmp/verify_uwsgi.sh /tmp/verify_uwsgi.sh.ORIGINAL && \ + # Install the requirements.txt file for the service + pip3.13 install --no-cache-dir --upgrade pip -r src/requirements.txt && \ + # Make the script referenced in the CMD directive below executable. + chmod a+x start.sh && \ + # Clean up artifacts to slim down this layer of the Docker Image + dnf clean all && \ + rm -rf /var/cache/dnf \ + /var/log/dnf \ + /var/log/yum \ + /root/.cache # The EXPOSE instruction informs Docker that the container listens on the specified network ports at runtime. # EXPOSE does not make the ports of the container accessible to the host. diff --git a/entity-api-spec.yaml b/entity-api-spec.yaml index f84dcddb..f443b094 100644 --- a/entity-api-spec.yaml +++ b/entity-api-spec.yaml @@ -937,6 +937,10 @@ components: type: string readOnly: true description: 'The email address of the person or process authenticated when creating the object.' + associated_publication: + type: object + description: 'The publication associated with the given collection' + readOnly: true created_by_user_sub: type: string readOnly: true diff --git a/src/app.py b/src/app.py index 0322b702..9ef3b201 100644 --- a/src/app.py +++ b/src/app.py @@ -5403,8 +5403,6 @@ def delete_cache(entity_uuid, entity_type): dataset_upload_dict = schema_neo4j_queries.get_dataset_upload(neo4j_driver_instance, entity_uuid) # For Publication, also delete cache of the associated collection - # NOTE: As of 5/30/2025, the [:USES_DATA] workaround has been deprecated. - # Still keep it in the code until further decision - Zhou if entity_type == 'Publication': publication_collection_dict = schema_neo4j_queries.get_publication_associated_collection(neo4j_driver_instance, entity_uuid) diff --git a/src/requirements.txt b/src/requirements.txt index 68b518e2..1449a271 100644 --- a/src/requirements.txt +++ b/src/requirements.txt @@ -1,7 +1,6 @@ Flask==3.0.3 neo4j==5.20.0 prov==2.0.1 -Werkzeug==3.0.3 deepdiff==7.0.1 # For interacting with memcached diff --git a/src/schema/provenance_schema.yaml b/src/schema/provenance_schema.yaml index 37192954..5a8fe45b 100644 --- a/src/schema/provenance_schema.yaml +++ b/src/schema/provenance_schema.yaml @@ -308,6 +308,15 @@ ENTITIES: indexed: true description: "The displayname of globus group which the user who created this entity is a member of" before_create_trigger: set_group_name #same as group_uuid, except set group_name + associated_publication: + type: json_string #dict + generated: true + indexed: true + transient: true + description: "A JSON containing the UUID, HuBMAP_ID, and Title for the associated publication" + on_read_trigger: get_collection_associated_publication + on_index_trigger: get_collection_associated_publication + ############################################# Dataset ############################################# @@ -788,9 +797,6 @@ ENTITIES: type: string indexed: true description: 'A DOI pointing to an Organ Mapping Antibody Panel relevant to this publication' - - # NOTE: As of 5/30/2025, the [:USES_DATA] workaround has been deprecated. - # Still keep it in the code until further decision - Zhou associated_collection: type: json_string # dict generated: true diff --git a/src/schema/schema_neo4j_queries.py b/src/schema/schema_neo4j_queries.py index 15106f9c..e3463947 100644 --- a/src/schema/schema_neo4j_queries.py +++ b/src/schema/schema_neo4j_queries.py @@ -839,8 +839,6 @@ def get_parent_activity_uuid_from_entity(neo4j_driver, entity_uuid): the uuid of the associated collection """ def link_publication_to_associated_collection(neo4j_driver, entity_uuid, associated_collection_uuid): - # NOTE: As of 5/30/2025, the [:USES_DATA] workaround has been deprecated. - # Still keep it in the code until further decision - Zhou try: with neo4j_driver.session() as session: tx = session.begin_transaction() @@ -1109,9 +1107,6 @@ def get_next_revision_uuids(neo4j_driver, uuid): """ def get_collection_associated_datasets(neo4j_driver, uuid, property_key = None): results = [] - - # NOTE: As of 5/30/2025, the [:USES_DATA] workaround has been deprecated. - # Still keep it in the code until further decision - Zhou if property_key: query = (f"MATCH (e:Entity)-[:IN_COLLECTION|:USES_DATA]->(c:Collection) " f"WHERE c.uuid = '{uuid}' " @@ -1210,9 +1205,6 @@ def get_dataset_collections(neo4j_driver, uuid, property_key = None, properties_ """ def get_publication_associated_collection(neo4j_driver, uuid): result = {} - - # NOTE: As of 5/30/2025, the [:USES_DATA] workaround has been deprecated. - # Still keep it in the code until further decision - Zhou query = (f"MATCH (p:Publication)-[:USES_DATA]->(c:Collection) " f"WHERE p.uuid = '{uuid}' " f"RETURN c as {record_field_name}") @@ -1229,6 +1221,41 @@ def get_publication_associated_collection(neo4j_driver, uuid): return result + +""" +Get the associated collection for a given publication + +Parameters +---------- +neo4j_driver : neo4j.Driver object + The neo4j database connection pool +uuid : str + The uuid of publication +property_key : str + A target property key for result filtering + +Returns +------- +dict + A dictionary representation of the chosen values +""" +def get_collection_associated_publication(neo4j_driver, uuid): + result = {} + query = (f"MATCH (p:Publication)-[:USES_DATA]->(c:Collection) " + f"WHERE c.uuid = '{uuid}' " + f"RETURN {{uuid: p.uuid, hubmap_id: p.hubmap_id, title: p.title}} AS publication") + + logger.info("=====get_collection_associated_publication() query======") + logger.debug(query) + + with neo4j_driver.session() as session: + record = session.run(query).single() + if record: + result = record["publication"] + return result + + + """ Get the associated Upload for a given dataset @@ -2079,8 +2106,6 @@ def delete_ancestor_linkages_tx(neo4j_driver, entity_uuid, ancestor_uuids): The uuid to target publication """ def _delete_publication_associated_collection_linkages_tx(tx, uuid): - # NOTE: As of 5/30/2025, the [:USES_DATA] workaround has been deprecated. - # Still keep it in the code until further decision - Zhou query = (f"MATCH (p:Publication)-[r:USES_DATA]->(c:Collection) " f"WHERE p.uuid = '{uuid}' " f"DELETE r") diff --git a/src/schema/schema_triggers.py b/src/schema/schema_triggers.py index a23321f9..811eed0d 100644 --- a/src/schema/schema_triggers.py +++ b/src/schema/schema_triggers.py @@ -830,6 +830,45 @@ def get_publication_associated_collection(property_key, normalized_type, request return property_key, schema_manager.normalize_entity_result_for_response(collection_dict) +""" +TriggerTypeEnum.ON_READ + +Trigger event method of getting the associated publication for this collection + +Parameters +---------- +property_key : str + The target property key +normalized_type : str + One of the types defined in the schema yaml: Dataset +request_args: ImmutableMultiDict + The Flask request.args passed in from application request +user_token: str + The user's globus nexus token +existing_data_dict : dict + A dictionary that contains all existing entity properties +new_data_dict : dict + A merged dictionary that contains all possible input data to be used + +Returns +------- +str: The target property key +dict: A dictionary representation of the associated publication with all the normalized information +""" +def get_collection_associated_publication(property_key, normalized_type, request_args, user_token, existing_data_dict, new_data_dict): + if 'uuid' not in existing_data_dict: + raise KeyError("Missing 'uuid' key in 'existing_data_dict' during calling 'get_collection_associated_publication()' trigger method.") + + logger.info(f"Executing 'get_collection_associated_publication()' trigger method on uuid: {existing_data_dict['uuid']}") + + publication_dict = schema_neo4j_queries.get_collection_associated_publication(schema_manager.get_neo4j_driver_instance(), existing_data_dict['uuid']) + + # Get rid of the entity node properties that are not defined in the yaml schema + # as well as the ones defined as `exposed: false` in the yaml schema + return property_key, publication_dict + + + """ TriggerTypeEnum.ON_READ