Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 21 additions & 21 deletions docker/entity-api/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -37,27 +37,27 @@ EOF
# 8 - Make the start script executable
# 9 - Clean the dnf/yum cache and other locations to reduce Docker Image layer size.
# Assume the base image has upgraded dnf and installed its dnf-plugins-core
RUN dnf install --assumeyes nginx && \
# Push aside nginx default.conf files that may exist on the system
[ ! -f /etc/nginx/conf.d/default.conf ] || mv /etc/nginx/conf.d/default.conf /tmp/etc_nginx_conf.d_default.conf.ORIGINAL && \
[ ! -f /etc/nginx/nginx.conf ] || mv /etc/nginx/nginx.conf /tmp/etc_nginx_nginx.conf.ORIGINAL && \
# Install the nginx default.conf file just installed in WORKDIR
mv nginx/nginx.conf /etc/nginx/nginx.conf && \
# Clean up the nginx install directory in WORKDIR
[ ! -d nginx ] || mv nginx /tmp/nginx_from_WORKDIR && \
# Push aside the verification file from the base image which will
# no longer report correctly once uWSGI is started for the service.
[ ! -f /tmp/verify_uwsgi.sh ] || mv /tmp/verify_uwsgi.sh /tmp/verify_uwsgi.sh.ORIGINAL && \
# Install the requirements.txt file for the service
pip3.13 install --no-cache-dir --upgrade pip -r src/requirements.txt && \
# Make the script referenced in the CMD directive below executable.
chmod a+x start.sh && \
# Clean up artifacts to slim down this layer of the Docker Image
dnf clean all && \
rm -rf /var/cache/dnf \
/var/log/dnf \
/var/log/yum \
/root/.cache
RUN dnf install --assumeyes nginx && \
# Push aside nginx default.conf files that may exist on the system
[ ! -f /etc/nginx/conf.d/default.conf ] || mv /etc/nginx/conf.d/default.conf /tmp/etc_nginx_conf.d_default.conf.ORIGINAL && \
[ ! -f /etc/nginx/nginx.conf ] || mv /etc/nginx/nginx.conf /tmp/etc_nginx_nginx.conf.ORIGINAL && \
# Install the nginx default.conf file just installed in WORKDIR
mv nginx/nginx.conf /etc/nginx/nginx.conf && \
# Clean up the nginx install directory in WORKDIR
[ ! -d nginx ] || mv nginx /tmp/nginx_from_WORKDIR && \
# Push aside the verification file from the base image which will
# no longer report correctly once uWSGI is started for the service.
[ ! -f /tmp/verify_uwsgi.sh ] || mv /tmp/verify_uwsgi.sh /tmp/verify_uwsgi.sh.ORIGINAL && \
# Install the requirements.txt file for the service
pip3.13 install --no-cache-dir --upgrade pip -r src/requirements.txt && \
# Make the script referenced in the CMD directive below executable.
chmod a+x start.sh && \
# Clean up artifacts to slim down this layer of the Docker Image
dnf clean all && \
rm -rf /var/cache/dnf \
/var/log/dnf \
/var/log/yum \
/root/.cache

# The EXPOSE instruction informs Docker that the container listens on the specified network ports at runtime.
# EXPOSE does not make the ports of the container accessible to the host.
Expand Down
4 changes: 4 additions & 0 deletions entity-api-spec.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -937,6 +937,10 @@ components:
type: string
readOnly: true
description: 'The email address of the person or process authenticated when creating the object.'
associated_publication:
type: object
description: 'The publication associated with the given collection'
readOnly: true
created_by_user_sub:
type: string
readOnly: true
Expand Down
2 changes: 0 additions & 2 deletions src/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -5403,8 +5403,6 @@ def delete_cache(entity_uuid, entity_type):
dataset_upload_dict = schema_neo4j_queries.get_dataset_upload(neo4j_driver_instance, entity_uuid)

# For Publication, also delete cache of the associated collection
# NOTE: As of 5/30/2025, the [:USES_DATA] workaround has been deprecated.
# Still keep it in the code until further decision - Zhou
if entity_type == 'Publication':
publication_collection_dict = schema_neo4j_queries.get_publication_associated_collection(neo4j_driver_instance, entity_uuid)

Expand Down
1 change: 0 additions & 1 deletion src/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
Flask==3.0.3
neo4j==5.20.0
prov==2.0.1
Werkzeug==3.0.3
deepdiff==7.0.1

# For interacting with memcached
Expand Down
12 changes: 9 additions & 3 deletions src/schema/provenance_schema.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -308,6 +308,15 @@ ENTITIES:
indexed: true
description: "The displayname of globus group which the user who created this entity is a member of"
before_create_trigger: set_group_name #same as group_uuid, except set group_name
associated_publication:
type: json_string #dict
generated: true
indexed: true
transient: true
description: "A JSON containing the UUID, HuBMAP_ID, and Title for the associated publication"
on_read_trigger: get_collection_associated_publication
on_index_trigger: get_collection_associated_publication



############################################# Dataset #############################################
Expand Down Expand Up @@ -788,9 +797,6 @@ ENTITIES:
type: string
indexed: true
description: 'A DOI pointing to an Organ Mapping Antibody Panel relevant to this publication'

# NOTE: As of 5/30/2025, the [:USES_DATA] workaround has been deprecated.
# Still keep it in the code until further decision - Zhou
associated_collection:
type: json_string # dict
generated: true
Expand Down
45 changes: 35 additions & 10 deletions src/schema/schema_neo4j_queries.py
Original file line number Diff line number Diff line change
Expand Up @@ -839,8 +839,6 @@ def get_parent_activity_uuid_from_entity(neo4j_driver, entity_uuid):
the uuid of the associated collection
"""
def link_publication_to_associated_collection(neo4j_driver, entity_uuid, associated_collection_uuid):
# NOTE: As of 5/30/2025, the [:USES_DATA] workaround has been deprecated.
# Still keep it in the code until further decision - Zhou
try:
with neo4j_driver.session() as session:
tx = session.begin_transaction()
Expand Down Expand Up @@ -1109,9 +1107,6 @@ def get_next_revision_uuids(neo4j_driver, uuid):
"""
def get_collection_associated_datasets(neo4j_driver, uuid, property_key = None):
results = []

# NOTE: As of 5/30/2025, the [:USES_DATA] workaround has been deprecated.
# Still keep it in the code until further decision - Zhou
if property_key:
query = (f"MATCH (e:Entity)-[:IN_COLLECTION|:USES_DATA]->(c:Collection) "
f"WHERE c.uuid = '{uuid}' "
Expand Down Expand Up @@ -1210,9 +1205,6 @@ def get_dataset_collections(neo4j_driver, uuid, property_key = None, properties_
"""
def get_publication_associated_collection(neo4j_driver, uuid):
result = {}

# NOTE: As of 5/30/2025, the [:USES_DATA] workaround has been deprecated.
# Still keep it in the code until further decision - Zhou
query = (f"MATCH (p:Publication)-[:USES_DATA]->(c:Collection) "
f"WHERE p.uuid = '{uuid}' "
f"RETURN c as {record_field_name}")
Expand All @@ -1229,6 +1221,41 @@ def get_publication_associated_collection(neo4j_driver, uuid):

return result


"""
Get the associated collection for a given publication

Parameters
----------
neo4j_driver : neo4j.Driver object
The neo4j database connection pool
uuid : str
The uuid of publication
property_key : str
A target property key for result filtering

Returns
-------
dict
A dictionary representation of the chosen values
"""
def get_collection_associated_publication(neo4j_driver, uuid):
result = {}
query = (f"MATCH (p:Publication)-[:USES_DATA]->(c:Collection) "
f"WHERE c.uuid = '{uuid}' "
f"RETURN {{uuid: p.uuid, hubmap_id: p.hubmap_id, title: p.title}} AS publication")

logger.info("=====get_collection_associated_publication() query======")
logger.debug(query)

with neo4j_driver.session() as session:
record = session.run(query).single()
if record:
result = record["publication"]
return result



"""
Get the associated Upload for a given dataset

Expand Down Expand Up @@ -2079,8 +2106,6 @@ def delete_ancestor_linkages_tx(neo4j_driver, entity_uuid, ancestor_uuids):
The uuid to target publication
"""
def _delete_publication_associated_collection_linkages_tx(tx, uuid):
# NOTE: As of 5/30/2025, the [:USES_DATA] workaround has been deprecated.
# Still keep it in the code until further decision - Zhou
query = (f"MATCH (p:Publication)-[r:USES_DATA]->(c:Collection) "
f"WHERE p.uuid = '{uuid}' "
f"DELETE r")
Expand Down
39 changes: 39 additions & 0 deletions src/schema/schema_triggers.py
Original file line number Diff line number Diff line change
Expand Up @@ -830,6 +830,45 @@ def get_publication_associated_collection(property_key, normalized_type, request
return property_key, schema_manager.normalize_entity_result_for_response(collection_dict)


"""
TriggerTypeEnum.ON_READ

Trigger event method of getting the associated publication for this collection

Parameters
----------
property_key : str
The target property key
normalized_type : str
One of the types defined in the schema yaml: Dataset
request_args: ImmutableMultiDict
The Flask request.args passed in from application request
user_token: str
The user's globus nexus token
existing_data_dict : dict
A dictionary that contains all existing entity properties
new_data_dict : dict
A merged dictionary that contains all possible input data to be used

Returns
-------
str: The target property key
dict: A dictionary representation of the associated publication with all the normalized information
"""
def get_collection_associated_publication(property_key, normalized_type, request_args, user_token, existing_data_dict, new_data_dict):
if 'uuid' not in existing_data_dict:
raise KeyError("Missing 'uuid' key in 'existing_data_dict' during calling 'get_collection_associated_publication()' trigger method.")

logger.info(f"Executing 'get_collection_associated_publication()' trigger method on uuid: {existing_data_dict['uuid']}")

publication_dict = schema_neo4j_queries.get_collection_associated_publication(schema_manager.get_neo4j_driver_instance(), existing_data_dict['uuid'])

# Get rid of the entity node properties that are not defined in the yaml schema
# as well as the ones defined as `exposed: false` in the yaml schema
return property_key, publication_dict



"""
TriggerTypeEnum.ON_READ

Expand Down