Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
79 changes: 52 additions & 27 deletions article/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -930,56 +930,76 @@ def find_duplicated_pkg_names(cls, journal=None, journal_id=None):
params["journal"] = journal
if journal_id:
params["journal__id"] = journal_id
duplicates = (
return (
cls.objects.filter(**params)
.exclude(sps_pkg_name__isnull=True)
.exclude(sps_pkg_name="")
.exclude(data_status=choices.DATA_STATUS_DUPLICATED)
.values("sps_pkg_name")
.annotate(count=Count("id"))
.filter(count__gt=1)
.values_list("sps_pkg_name", flat=True)
)

@classmethod
def find_duplicated_pid_v2(cls, journal=None, journal_id=None):
# Busca em ambos os campos de ISSN
Copy link

Copilot AI Jan 19, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Comment is misleading: 'Busca em ambos os campos de ISSN' (Searches in both ISSN fields) but this method searches by journal/journal_id parameters, not ISSN fields.

Copilot uses AI. Check for mistakes.
params = {}
if journal:
params["journal"] = journal
if journal_id:
params["journal__id"] = journal_id
return (
cls.objects.filter(**params)
.exclude(pid_v2__isnull=True)
.exclude(pid_v2="")
.exclude(data_status=choices.DATA_STATUS_DUPLICATED)
.values("pid_v2")
.annotate(count=Count("id"))
.filter(count__gt=1)
.values_list("pid_v2", flat=True)
)
return list(item["sps_pkg_name"] for item in duplicates)

@classmethod
def mark_items_as_duplicated(cls, journal=None, journal_id=None):
def deduplicate_items(cls, user, journal=None, journal_id=None, mark_as_duplicated=False, deduplicate=False):
"""
Corrige todos os artigos marcados como DATA_STATUS_DUPLICATED com base nos ISSNs fornecidos.

Args:
issns: Lista de ISSNs para verificar duplicatas.
user: Usuário que está executando a operação.
"""
Comment on lines 965 to 971
Copy link

Copilot AI Jan 19, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Documentation is outdated. The docstring mentions 'issns: Lista de ISSNs' but the method actually accepts 'journal' and 'journal_id' parameters, not 'issns'.

Copilot uses AI. Check for mistakes.
article_duplicated_pkg_names = cls.find_duplicated_pkg_names(
article_duplicated_pid_v2 = cls.find_duplicated_pid_v2(
journal, journal_id
)
if not article_duplicated_pkg_names:
return
cls.objects.filter(sps_pkg_name__in=article_duplicated_pkg_names).exclude(
data_status=choices.DATA_STATUS_DUPLICATED
).update(
data_status=choices.DATA_STATUS_DUPLICATED,
)
return article_duplicated_pkg_names

@classmethod
def deduplicate_items(cls, user, journal=None, journal_id=None):
"""
Corrige todos os artigos marcados como DATA_STATUS_DUPLICATED com base nos ISSNs fornecidos.
if article_duplicated_pid_v2.exists():
if mark_as_duplicated:
cls.objects.filter(pid_v2__in=article_duplicated_pid_v2).exclude(
data_status=choices.DATA_STATUS_DUPLICATED
).update(
data_status=choices.DATA_STATUS_DUPLICATED,
)
if deduplicate:
for pid_v2 in article_duplicated_pid_v2:
cls.fix_duplicated_items(user, None, pid_v2)

Args:
issns: Lista de ISSNs para verificar duplicatas.
user: Usuário que está executando a operação.
"""
article_duplicated_pkg_names = cls.find_duplicated_pkg_names(
journal, journal_id
)
for pkg_name in article_duplicated_pkg_names:
cls.fix_duplicated_pkg_name(pkg_name, user)
if article_duplicated_pkg_names.exists():
if mark_as_duplicated:
cls.objects.filter(sps_pkg_name__in=article_duplicated_pkg_names).exclude(
data_status=choices.DATA_STATUS_DUPLICATED
).update(
data_status=choices.DATA_STATUS_DUPLICATED,
)
if deduplicate:
for pkg_name in article_duplicated_pkg_names:
cls.fix_duplicated_items(user, pkg_name, None)
return article_duplicated_pkg_names

@classmethod
def fix_duplicated_pkg_name(cls, pkg_name, user):
def fix_duplicated_items(cls, user, pkg_name, pid_v2):
"""
Corrige artigos marcados como DATA_STATUS_DUPLICATED com base no pkg_name fornecido.

Expand All @@ -991,7 +1011,12 @@ def fix_duplicated_pkg_name(cls, pkg_name, user):
int: Número de artigos atualizados.
"""
try:
articles = cls.objects.filter(sps_pkg_name=pkg_name).exclude(
filters = Q()
if pkg_name:
filters |= Q(sps_pkg_name=pkg_name)
if pid_v2:
filters |= Q(pid_v2=pid_v2)
articles = cls.objects.filter(filters).exclude(
data_status=choices.DATA_STATUS_DUPLICATED
)
if articles.count() <= 1:
Expand All @@ -1014,8 +1039,8 @@ def fix_duplicated_pkg_name(cls, pkg_name, user):
UnexpectedEvent.create(
exception=exception,
exc_traceback=exc_traceback,
action="article.models.Article.fix_duplicated_pkg_name",
detail=pkg_name,
action="article.models.Article.fix_duplicated_items",
detail=pkg_name or pid_v2,
)


Expand Down
7 changes: 2 additions & 5 deletions article/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -1063,11 +1063,8 @@ def task_fix_journal_articles_status(
if mark_as_public:
Article.mark_items_as_public(journal_id=journal_id)

if mark_as_duplicated:
Article.mark_items_as_duplicated(journal_id=journal_id)

if deduplicate:
Article.deduplicate_items(user, journal_id=journal_id)
if mark_as_duplicated or deduplicate:
Article.deduplicate_items(user, journal_id=journal_id, mark_as_duplicated=mark_as_duplicated, deduplicate=deduplicate)

return {
"status": "success",
Expand Down
74 changes: 52 additions & 22 deletions pid_provider/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -1315,7 +1315,7 @@ def mark_items_as_invalid(cls, issns):
@profile_classmethod
def find_duplicated_pkg_names(cls, issns):
# Busca em ambos os campos de ISSN
duplicates = (
return (
cls.objects.filter(Q(issn_print__in=issns) | Q(issn_electronic__in=issns))
.exclude(pkg_name__isnull=True)
.exclude(pkg_name="")
Expand All @@ -1328,52 +1328,82 @@ def find_duplicated_pkg_names(cls, issns):
.values("pkg_name")
.annotate(count=Count("id"))
.filter(count__gt=1)
.values_list("pkg_name", flat=True)
)
return list(set(item["pkg_name"] for item in duplicates))

@classmethod

Copy link

Copilot AI Jan 19, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Normal methods should have 'self', rather than 'cls', as their first parameter.

Suggested change
@classmethod

Copilot uses AI. Check for mistakes.
@profile_classmethod
def mark_items_as_duplicated(cls, issns):
ppx_duplicated_pkg_names = PidProviderXML.find_duplicated_pkg_names(issns)
if not ppx_duplicated_pkg_names:
return
cls.objects.filter(pkg_name__in=ppx_duplicated_pkg_names).exclude(
proc_status=choices.PPXML_STATUS_DUPLICATED
).update(
proc_status=choices.PPXML_STATUS_DUPLICATED,
def find_duplicated_v2(cls, issns):
# Busca em ambos os campos de ISSN
return (
cls.objects.filter(Q(issn_print__in=issns) | Q(issn_electronic__in=issns))
.exclude(v2__isnull=True)
.exclude(v2="")
.exclude(
proc_status__in=[
choices.PPXML_STATUS_DUPLICATED,
choices.PPXML_STATUS_INVALID,
]
)
.values("v2")
.annotate(count=Count("id"))
.filter(count__gt=1)
.values_list("v2", flat=True)
)
return ppx_duplicated_pkg_names

@classmethod
@profile_classmethod
def deduplicate_items(cls, user, issns):
def deduplicate_items(cls, user, issns, mark_as_duplicated=False, deduplicate=False):
"""
Corrige todos os artigos marcados como DATA_STATUS_DUPLICATED com base nos ISSNs fornecidos.

Comment on lines +1355 to 1358
Copy link

Copilot AI Jan 22, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A descrição do PR menciona a criação do novo campo deprecated_pkg_name em PidProviderXML, mas não há campo ou migração correspondente no código (nenhuma ocorrência de deprecated_pkg_name no app pid_provider); ou o campo/migração está faltando, ou a descrição precisa ser ajustada para refletir a implementação atual.

Copilot uses AI. Check for mistakes.
Args:
issns: Lista de ISSNs para verificar duplicatas.
user: Usuário que está executando a operação.
"""
Comment on lines 1356 to 1362
Copy link

Copilot AI Jan 19, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Docstring needs to be updated. It still mentions 'artigos' (articles) but this method operates on PidProviderXML items, not articles. Additionally, the Args section should document the new 'mark_as_duplicated' and 'deduplicate' parameters.

Copilot uses AI. Check for mistakes.
duplicated_v2 = cls.find_duplicated_v2(issns)
if duplicated_v2.exists():
if mark_as_duplicated:
cls.objects.filter(v2__in=duplicated_v2).exclude(
proc_status=choices.PPXML_STATUS_DUPLICATED
).update(
proc_status=choices.PPXML_STATUS_DUPLICATED,
)
if deduplicate:
for v2 in duplicated_v2:
cls.fix_duplicated_items(user, None, v2)

duplicated_pkg_names = cls.find_duplicated_pkg_names(issns)
for pkg_name in duplicated_pkg_names:
cls.fix_duplicated_pkg_name(pkg_name, user)
return duplicated_pkg_names
if duplicated_pkg_names.exists():
if mark_as_duplicated:
cls.objects.filter(pkg_name__in=duplicated_pkg_names).exclude(
proc_status=choices.PPXML_STATUS_DUPLICATED
).update(
proc_status=choices.PPXML_STATUS_DUPLICATED,
)
if deduplicate:
for pkg_name in duplicated_pkg_names:
cls.fix_duplicated_items(user, pkg_name, None)

@classmethod
@profile_classmethod
def fix_duplicated_pkg_name(cls, pkg_name, user):
def fix_duplicated_items(cls, user, pkg_name, v2):
"""
Corrige items marcados como PPXML_STATUS_DUPLICATED com base no pkg_name fornecido.
Copy link

Copilot AI Jan 19, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The docstring is incomplete. It only mentions 'pkg_name fornecido' (provided pkg_name) but the method now also accepts 'v2' parameter. The description should reflect that it can handle both pkg_name and v2 parameters.

Suggested change
Corrige items marcados como PPXML_STATUS_DUPLICATED com base no pkg_name fornecido.
Corrige items marcados como PPXML_STATUS_DUPLICATED com base no pkg_name ou v2 fornecidos.

Copilot uses AI. Check for mistakes.

Args:
pkg_name: Nome do pacote para verificar duplicatas.
user: Usuário que está executando a operação.

pkg_name: Nome do pacote para verificar duplicatas.
v2: Valor do pid v2 para verificar duplicatas.
Returns:
int: Número de items atualizados.
"""
try:
items = cls.objects.filter(pkg_name=pkg_name)
filters = Q()
if v2:
filters |= Q(v2=v2) | Q(other_pid__pid_in_xml=v2)
if pkg_name:
filters |= Q(pkg_name=pkg_name)
items = cls.objects.filter(filters)
if items.count() <= 1:
return 0

Expand Down Expand Up @@ -1409,7 +1439,7 @@ def fix_duplicated_pkg_name(cls, pkg_name, user):
UnexpectedEvent.create(
exception=exception,
exc_traceback=exc_traceback,
action="pid_provider.models.PidProviderXML.fix_duplicated_pkg_name",
action="pid_provider.models.PidProviderXML.fix_duplicated_items",
detail=pkg_name,
Copy link

Copilot AI Jan 19, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The 'detail' parameter should include v2 when it's provided. Currently it only uses pkg_name, which will be None when fixing by v2.

Suggested change
detail=pkg_name,
detail=f"pkg_name={pkg_name}, v2={v2}",

Copilot uses AI. Check for mistakes.
Copy link

Copilot AI Jan 22, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No UnexpectedEvent.create de fix_duplicated_items, o campo detail sempre recebe apenas pkg_name, de modo que chamadas que deduplicam por v2 não registram o identificador usado; para facilitar depuração, seria melhor incluir o valor de v2 quando ele for o critério (por exemplo, algo como pkg_name or v2).

Suggested change
detail=pkg_name,
detail=pkg_name or v2,

Copilot uses AI. Check for mistakes.
)

Expand Down
23 changes: 20 additions & 3 deletions pid_provider/query_params.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,9 +63,19 @@ def aop_pid(self):

@cached_property
def pkg_name(self):
"""Nome do pacote do documento."""
"""Nome do pacote do documento, parâmtro usado ao instanciar XMLAdapter"""
Copy link

Copilot AI Jan 19, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Spelling error: 'parâmtro' should be 'parâmetro'

Suggested change
"""Nome do pacote do documento, parâmtro usado ao instanciar XMLAdapter"""
"""Nome do pacote do documento, parâmetro usado ao instanciar XMLAdapter"""

Copilot uses AI. Check for mistakes.
Copy link

Copilot AI Jan 22, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Há um erro de digitação em "parâmtro" na docstring de pkg_name; o correto é "parâmetro".

Suggested change
"""Nome do pacote do documento, parâmtro usado ao instanciar XMLAdapter"""
"""Nome do pacote do documento, parâmetro usado ao instanciar XMLAdapter"""

Copilot uses AI. Check for mistakes.
return self.xml_adapter.pkg_name


@cached_property
def sps_pkg_name(self):
"""Nome do pacote do documento (deprecated)."""
return self.xml_adapter.sps_pkg_name

@cached_property
def deprecated_sps_pkg_name(self):
"""Nome do pacote do documento (deprecated)."""
return self.xml_adapter.sps_pkg_name
Comment on lines +76 to +77
Copy link

Copilot AI Jan 22, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A propriedade deprecated_sps_pkg_name está retornando self.xml_adapter.sps_pkg_name, o que a torna redundante em relação a sps_pkg_name e impede o uso do campo deprecated_sps_pkg_name exposto pelo packtools 4.14.0 para localizar registros com nomes de pacote antigos; a propriedade deve usar o atributo específico de "deprecated" do adapter (por exemplo, xml_adapter.deprecated_sps_pkg_name).

Suggested change
"""Nome do pacote do documento (deprecated)."""
return self.xml_adapter.sps_pkg_name
"""Nome antigo de pacote do documento (deprecated)."""
return self.xml_adapter.deprecated_sps_pkg_name

Copilot uses AI. Check for mistakes.

@cached_property
def main_doi(self):
"""DOI principal do documento."""
Expand Down Expand Up @@ -176,8 +186,15 @@ def identifier_queries(self):
q |= Q(v2=self.aop_pid) | Q(aop_pid=self.aop_pid)

# Package name
pkg_names = set()
if self.pkg_name:
q |= Q(pkg_name=self.pkg_name)
pkg_names.add(self.pkg_name)
if self.sps_pkg_name:
pkg_names.add(self.sps_pkg_name)
if self.deprecated_sps_pkg_name:
pkg_names.add(self.deprecated_sps_pkg_name)
if pkg_names:
q |= Q(pkg_name__in=pkg_names)

# # DOI principal
# if self.main_doi:
Expand Down
7 changes: 2 additions & 5 deletions pid_provider/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -281,11 +281,8 @@ def task_fix_journal_pid_provider_xmls_status(
if mark_as_invalid:
PidProviderXML.mark_items_as_invalid(journal.issns)

if mark_as_duplicated:
PidProviderXML.mark_items_as_duplicated(journal.issns)

if deduplicate:
PidProviderXML.deduplicate_items(user, journal.issns)
if mark_as_duplicated or deduplicate:
PidProviderXML.deduplicate_items(user, journal.issns, mark_as_duplicated=mark_as_duplicated, deduplicate=deduplicate)

return {
"status": "success",
Expand Down
2 changes: 1 addition & 1 deletion requirements/base.txt
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ pysolr==3.9.0 # https://pypi.org/project/pysolr/
# ------------------------------------------------------------------------------
tornado>=6.5.2 # not directly required, pinned by Snyk to avoid a vulnerability
lxml==6.0.2 # https://github.com/lxml/lxml
git+https://git@github.com/scieloorg/packtools@4.13.1#egg=packtools
git+https://git@github.com/scieloorg/packtools@4.14.0#egg=packtools

# pymongo
# ------------------------------------------------------------------------------
Expand Down
Loading