-
Notifications
You must be signed in to change notification settings - Fork 10
No pid provider, adiciona deprecated_sps_pkg_name para identificar pacotes registrados e melhora a identificação e exclusão de duplicados #1256
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
77015ff
809b488
b1b82e6
5e166b2
5d3c365
d1cc017
5693777
26d2877
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -930,56 +930,76 @@ def find_duplicated_pkg_names(cls, journal=None, journal_id=None): | |
| params["journal"] = journal | ||
| if journal_id: | ||
| params["journal__id"] = journal_id | ||
| duplicates = ( | ||
| return ( | ||
| cls.objects.filter(**params) | ||
| .exclude(sps_pkg_name__isnull=True) | ||
| .exclude(sps_pkg_name="") | ||
| .exclude(data_status=choices.DATA_STATUS_DUPLICATED) | ||
| .values("sps_pkg_name") | ||
| .annotate(count=Count("id")) | ||
| .filter(count__gt=1) | ||
| .values_list("sps_pkg_name", flat=True) | ||
| ) | ||
|
|
||
| @classmethod | ||
| def find_duplicated_pid_v2(cls, journal=None, journal_id=None): | ||
| # Busca em ambos os campos de ISSN | ||
| params = {} | ||
| if journal: | ||
| params["journal"] = journal | ||
| if journal_id: | ||
| params["journal__id"] = journal_id | ||
| return ( | ||
| cls.objects.filter(**params) | ||
| .exclude(pid_v2__isnull=True) | ||
| .exclude(pid_v2="") | ||
| .exclude(data_status=choices.DATA_STATUS_DUPLICATED) | ||
| .values("pid_v2") | ||
| .annotate(count=Count("id")) | ||
| .filter(count__gt=1) | ||
| .values_list("pid_v2", flat=True) | ||
| ) | ||
| return list(item["sps_pkg_name"] for item in duplicates) | ||
|
|
||
| @classmethod | ||
| def mark_items_as_duplicated(cls, journal=None, journal_id=None): | ||
| def deduplicate_items(cls, user, journal=None, journal_id=None, mark_as_duplicated=False, deduplicate=False): | ||
| """ | ||
| Corrige todos os artigos marcados como DATA_STATUS_DUPLICATED com base nos ISSNs fornecidos. | ||
|
|
||
| Args: | ||
| issns: Lista de ISSNs para verificar duplicatas. | ||
| user: Usuário que está executando a operação. | ||
| """ | ||
|
Comment on lines
965
to
971
|
||
| article_duplicated_pkg_names = cls.find_duplicated_pkg_names( | ||
| article_duplicated_pid_v2 = cls.find_duplicated_pid_v2( | ||
| journal, journal_id | ||
| ) | ||
| if not article_duplicated_pkg_names: | ||
| return | ||
| cls.objects.filter(sps_pkg_name__in=article_duplicated_pkg_names).exclude( | ||
| data_status=choices.DATA_STATUS_DUPLICATED | ||
| ).update( | ||
| data_status=choices.DATA_STATUS_DUPLICATED, | ||
| ) | ||
| return article_duplicated_pkg_names | ||
|
|
||
| @classmethod | ||
| def deduplicate_items(cls, user, journal=None, journal_id=None): | ||
| """ | ||
| Corrige todos os artigos marcados como DATA_STATUS_DUPLICATED com base nos ISSNs fornecidos. | ||
| if article_duplicated_pid_v2.exists(): | ||
| if mark_as_duplicated: | ||
| cls.objects.filter(pid_v2__in=article_duplicated_pid_v2).exclude( | ||
| data_status=choices.DATA_STATUS_DUPLICATED | ||
| ).update( | ||
| data_status=choices.DATA_STATUS_DUPLICATED, | ||
| ) | ||
| if deduplicate: | ||
| for pid_v2 in article_duplicated_pid_v2: | ||
| cls.fix_duplicated_items(user, None, pid_v2) | ||
|
|
||
| Args: | ||
| issns: Lista de ISSNs para verificar duplicatas. | ||
| user: Usuário que está executando a operação. | ||
| """ | ||
| article_duplicated_pkg_names = cls.find_duplicated_pkg_names( | ||
| journal, journal_id | ||
| ) | ||
| for pkg_name in article_duplicated_pkg_names: | ||
| cls.fix_duplicated_pkg_name(pkg_name, user) | ||
| if article_duplicated_pkg_names.exists(): | ||
| if mark_as_duplicated: | ||
| cls.objects.filter(sps_pkg_name__in=article_duplicated_pkg_names).exclude( | ||
| data_status=choices.DATA_STATUS_DUPLICATED | ||
| ).update( | ||
| data_status=choices.DATA_STATUS_DUPLICATED, | ||
| ) | ||
| if deduplicate: | ||
| for pkg_name in article_duplicated_pkg_names: | ||
| cls.fix_duplicated_items(user, pkg_name, None) | ||
| return article_duplicated_pkg_names | ||
|
|
||
| @classmethod | ||
| def fix_duplicated_pkg_name(cls, pkg_name, user): | ||
| def fix_duplicated_items(cls, user, pkg_name, pid_v2): | ||
| """ | ||
| Corrige artigos marcados como DATA_STATUS_DUPLICATED com base no pkg_name fornecido. | ||
|
|
||
|
|
@@ -991,7 +1011,12 @@ def fix_duplicated_pkg_name(cls, pkg_name, user): | |
| int: Número de artigos atualizados. | ||
| """ | ||
| try: | ||
| articles = cls.objects.filter(sps_pkg_name=pkg_name).exclude( | ||
| filters = Q() | ||
| if pkg_name: | ||
| filters |= Q(sps_pkg_name=pkg_name) | ||
| if pid_v2: | ||
| filters |= Q(pid_v2=pid_v2) | ||
| articles = cls.objects.filter(filters).exclude( | ||
| data_status=choices.DATA_STATUS_DUPLICATED | ||
| ) | ||
| if articles.count() <= 1: | ||
|
|
@@ -1014,8 +1039,8 @@ def fix_duplicated_pkg_name(cls, pkg_name, user): | |
| UnexpectedEvent.create( | ||
| exception=exception, | ||
| exc_traceback=exc_traceback, | ||
| action="article.models.Article.fix_duplicated_pkg_name", | ||
| detail=pkg_name, | ||
| action="article.models.Article.fix_duplicated_items", | ||
| detail=pkg_name or pid_v2, | ||
| ) | ||
|
|
||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change | ||||||||
|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -1315,7 +1315,7 @@ def mark_items_as_invalid(cls, issns): | |||||||||
| @profile_classmethod | ||||||||||
| def find_duplicated_pkg_names(cls, issns): | ||||||||||
| # Busca em ambos os campos de ISSN | ||||||||||
| duplicates = ( | ||||||||||
| return ( | ||||||||||
| cls.objects.filter(Q(issn_print__in=issns) | Q(issn_electronic__in=issns)) | ||||||||||
| .exclude(pkg_name__isnull=True) | ||||||||||
| .exclude(pkg_name="") | ||||||||||
|
|
@@ -1328,52 +1328,82 @@ def find_duplicated_pkg_names(cls, issns): | |||||||||
| .values("pkg_name") | ||||||||||
| .annotate(count=Count("id")) | ||||||||||
| .filter(count__gt=1) | ||||||||||
| .values_list("pkg_name", flat=True) | ||||||||||
| ) | ||||||||||
| return list(set(item["pkg_name"] for item in duplicates)) | ||||||||||
|
|
||||||||||
| @classmethod | ||||||||||
|
|
||||||||||
|
||||||||||
| @classmethod |
Copilot
AI
Jan 22, 2026
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
A descrição do PR menciona a criação do novo campo deprecated_pkg_name em PidProviderXML, mas não há campo ou migração correspondente no código (nenhuma ocorrência de deprecated_pkg_name no app pid_provider); ou o campo/migração está faltando, ou a descrição precisa ser ajustada para refletir a implementação atual.
Copilot
AI
Jan 19, 2026
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Docstring needs to be updated. It still mentions 'artigos' (articles) but this method operates on PidProviderXML items, not articles. Additionally, the Args section should document the new 'mark_as_duplicated' and 'deduplicate' parameters.
Copilot
AI
Jan 19, 2026
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The docstring is incomplete. It only mentions 'pkg_name fornecido' (provided pkg_name) but the method now also accepts 'v2' parameter. The description should reflect that it can handle both pkg_name and v2 parameters.
| Corrige items marcados como PPXML_STATUS_DUPLICATED com base no pkg_name fornecido. | |
| Corrige items marcados como PPXML_STATUS_DUPLICATED com base no pkg_name ou v2 fornecidos. |
Copilot
AI
Jan 19, 2026
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The 'detail' parameter should include v2 when it's provided. Currently it only uses pkg_name, which will be None when fixing by v2.
| detail=pkg_name, | |
| detail=f"pkg_name={pkg_name}, v2={v2}", |
Copilot
AI
Jan 22, 2026
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
No UnexpectedEvent.create de fix_duplicated_items, o campo detail sempre recebe apenas pkg_name, de modo que chamadas que deduplicam por v2 não registram o identificador usado; para facilitar depuração, seria melhor incluir o valor de v2 quando ele for o critério (por exemplo, algo como pkg_name or v2).
| detail=pkg_name, | |
| detail=pkg_name or v2, |
| Original file line number | Diff line number | Diff line change | ||||||||
|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -63,9 +63,19 @@ def aop_pid(self): | |||||||||
|
|
||||||||||
| @cached_property | ||||||||||
| def pkg_name(self): | ||||||||||
| """Nome do pacote do documento.""" | ||||||||||
| """Nome do pacote do documento, parâmtro usado ao instanciar XMLAdapter""" | ||||||||||
|
||||||||||
| """Nome do pacote do documento, parâmtro usado ao instanciar XMLAdapter""" | |
| """Nome do pacote do documento, parâmetro usado ao instanciar XMLAdapter""" |
Copilot
AI
Jan 22, 2026
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Há um erro de digitação em "parâmtro" na docstring de pkg_name; o correto é "parâmetro".
| """Nome do pacote do documento, parâmtro usado ao instanciar XMLAdapter""" | |
| """Nome do pacote do documento, parâmetro usado ao instanciar XMLAdapter""" |
Copilot
AI
Jan 22, 2026
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
A propriedade deprecated_sps_pkg_name está retornando self.xml_adapter.sps_pkg_name, o que a torna redundante em relação a sps_pkg_name e impede o uso do campo deprecated_sps_pkg_name exposto pelo packtools 4.14.0 para localizar registros com nomes de pacote antigos; a propriedade deve usar o atributo específico de "deprecated" do adapter (por exemplo, xml_adapter.deprecated_sps_pkg_name).
| """Nome do pacote do documento (deprecated).""" | |
| return self.xml_adapter.sps_pkg_name | |
| """Nome antigo de pacote do documento (deprecated).""" | |
| return self.xml_adapter.deprecated_sps_pkg_name |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Comment is misleading: 'Busca em ambos os campos de ISSN' (Searches in both ISSN fields) but this method searches by journal/journal_id parameters, not ISSN fields.