Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 42 additions & 13 deletions osf/metadata/serializers/datacite/datacite_tree_walker.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
FOAF,
ORCID,
OSF,
PROV,
ROR,
SKOS,
DATACITE,
Expand Down Expand Up @@ -128,23 +129,51 @@ def _visit_identifier(self, parent_el, *, doi_override=None):
})

def _visit_creators(self, parent_el, focus_iri):
creator_iris = set(self.basket[focus_iri:DCTERMS.creator])
if (not creator_iris) and ((focus_iri, RDF.type, OSF.File) in self.basket):
creator_iris.update(self.basket[focus_iri:OSF.hasFileVersion / DCTERMS.creator])
if not creator_iris:
creator_iris.update(self.basket[focus_iri:OSF.isContainedBy / DCTERMS.creator])
if not creator_iris:
creator_iris.update(self.basket[focus_iri:DCTERMS.isPartOf / DCTERMS.creator])
if not creator_iris:
creator_iris.update(self.basket[focus_iri:DCTERMS.contributor])
if not creator_iris:
creator_iris.update(self.basket[focus_iri:OSF.isContainedBy / DCTERMS.contributor])
creator_iris = []

ordered_contributors = []
attribution_refs = list(self.basket[focus_iri:PROV.qualifiedAttribution])
for attribution_ref in attribution_refs:
try:
order_val = next(self.basket[attribution_ref:OSF.order])
except StopIteration:
# If there is no explicit order, shove it to the end
order_index = float('inf')
else:
try:
order_index = order_val.toPython()
except AttributeError:
order_index = int(order_val)
try:
agent_iri = next(self.basket[attribution_ref:PROV.agent])
except StopIteration:
continue
ordered_contributors.append((order_index, agent_iri))

if ordered_contributors:
ordered_contributors.sort(key=lambda pair: pair[0])
creator_iris.extend(agent_iri for _, agent_iri in ordered_contributors)

# Fallbacks when there is no explicit OSF ordering
if not creator_iris:
creator_iris.update(self.basket[focus_iri:DCTERMS.isPartOf / DCTERMS.contributor])
creator_iris = list(self.basket[focus_iri:DCTERMS.creator])
if (not creator_iris) and ((focus_iri, RDF.type, OSF.File) in self.basket):
creator_iris.extend(self.basket[focus_iri:OSF.hasFileVersion / DCTERMS.creator])
if not creator_iris:
creator_iris.extend(self.basket[focus_iri:OSF.isContainedBy / DCTERMS.creator])
if not creator_iris:
creator_iris.extend(self.basket[focus_iri:DCTERMS.isPartOf / DCTERMS.creator])
if not creator_iris:
creator_iris.extend(self.basket[focus_iri:DCTERMS.contributor])
if not creator_iris:
creator_iris.extend(self.basket[focus_iri:OSF.isContainedBy / DCTERMS.contributor])
if not creator_iris:
creator_iris.extend(self.basket[focus_iri:DCTERMS.isPartOf / DCTERMS.contributor])

if not creator_iris:
raise ValueError(f'gathered no creators or contributors around {focus_iri}')
creators_el = self.visit(parent_el, 'creators', is_list=True)
for creator_iri in creator_iris: # TODO: "priority order"
for creator_iri in creator_iris:
creator_el = self.visit(creators_el, 'creator')
for name in self.basket[creator_iri:FOAF.name]:
self.visit(creator_el, 'creatorName', text=name, attrib={
Expand Down
51 changes: 51 additions & 0 deletions tests/identifiers/test_datacite.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,57 @@ def test_datacite_build_metadata_for_dataarchive_registration(self, registration
assert resource_type.text == 'Pre-registration'
assert resource_type.attrib['resourceTypeGeneral'] == 'Dataset'

def test_datacite_creators_follow_osf_contributor_order(self, datacite_client):
registration = RegistrationFactory(is_public=True)
first = registration.creator
second = AuthUserFactory()
third = AuthUserFactory()
registration.add_contributor(third, visible=True)
registration.add_contributor(second, visible=True)
registration.save()

visible_contributors = list(registration.visible_contributors)
correct_order = [u.fullname for u in visible_contributors]
assert correct_order == [
first.fullname,
third.fullname,
second.fullname,
]

metadata_xml = datacite_client.build_metadata(registration)
parser = lxml.etree.XMLParser(ns_clean=True, recover=True, encoding='utf-8')
root = lxml.etree.fromstring(metadata_xml, parser=parser)
creators_el = root.find('{%s}creators' % schema40.ns[None])
creator_elems = creators_el.findall('{%s}creator' % schema40.ns[None])
xml_creator_names = [
c.find('{%s}creatorName' % schema40.ns[None]).text
for c in creator_elems
]
assert xml_creator_names == correct_order

auth = Auth(first)
registration.move_contributor(first, auth=auth, index=2, save=True)
registration.refresh_from_db()

visible_contributors = list(registration.visible_contributors)
new_correct_order = [u.fullname for u in visible_contributors]
assert new_correct_order == [
third.fullname,
second.fullname,
first.fullname,
]

metadata_xml = datacite_client.build_metadata(registration)
root = lxml.etree.fromstring(metadata_xml, parser=parser)
creators_el = root.find('{%s}creators' % schema40.ns[None])
creator_elems = creators_el.findall('{%s}creator' % schema40.ns[None])
xml_creator_names = [
c.find('{%s}creatorName' % schema40.ns[None]).text
for c in creator_elems
]

assert xml_creator_names == new_correct_order

def test_datacite_format_contributors(self, datacite_client):
visible_contrib = AuthUserFactory()
visible_contrib2 = AuthUserFactory()
Expand Down
Loading