From c6aae219d3ec149e754074f79a1aa3b2c5b1d369 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Wed, 18 Jun 2025 23:09:29 +0300 Subject: [PATCH 1/3] external id --- LexData/claim.py | 2 ++ LexData/entity.py | 41 +++++++++++++++++++++++++++++++++++++---- LexData/utils.py | 7 +++++++ 3 files changed, 46 insertions(+), 4 deletions(-) diff --git a/LexData/claim.py b/LexData/claim.py index bd13d36..337efe0 100644 --- a/LexData/claim.py +++ b/LexData/claim.py @@ -110,6 +110,8 @@ def pure_value(self) -> Union[str, int, float, Tuple[float, float]]: return value["id"] if vtype == "string": return value + if vtype == "external-id": + return value if vtype == "monolingualtext": return value["text"] if vtype == "quantity": diff --git a/LexData/entity.py b/LexData/entity.py index e21b6fe..906922e 100644 --- a/LexData/entity.py +++ b/LexData/entity.py @@ -3,6 +3,7 @@ from typing import Dict, List, Union from .claim import Claim +from .utils import getPropertyType from .wikidatasession import WikidataSession @@ -87,11 +88,43 @@ def __setEntityClaim__(self, idProp: str, idStr: str): :param idProp: id of the property (example: "P31") :param idItem: id of the entity (example: "Q1") """ - entityId = int(idStr[1:]) - claim_value = json.dumps({"entity-type": "item", "numeric-id": entityId}) - self.__setClaim__(idProp, claim_value) + # Check if this is an external-id property + datatype = None + try: + datatype = getPropertyType(idProp) + except Exception: + # If we can't get the property type, assume it's an entity + pass + if datatype == "external-id": + # For external-id properties, create a Claim object and use __setClaims__ + claim = Claim(propertyId=idProp, value=idStr) + self.__setClaims__([claim]) + return + # Handle entity-type properties as before + if idStr.startswith(('Q', 'P', 'L')): + entityId = int(idStr[1:]) + claim_value = json.dumps({"entity-type": "item", "numeric-id": entityId}) + self.__setClaim__(idProp, claim_value) + else: + raise ValueError(f"Invalid entity ID format: {idStr}. Expected Q, P, or L prefix.") def __setClaim__(self, idProp: str, claim_value): + from .utils import getPropertyType + import LexData.lexeme + is_lexeme = isinstance(self, LexData.lexeme.Lexeme) + if isinstance(claim_value, Claim): + datatype = getPropertyType(idProp) + if datatype == "external-id": + claim_value = claim_value.pure_value + else: + snak_data = claim_value["mainsnak"] + claim_value = json.dumps(snak_data["datavalue"]) + datatype = getPropertyType(idProp) + if datatype == "external-id": + claim_value_json = json.dumps(claim_value) + else: + claim_value_json = claim_value + # Use wbcreateclaim for both lexemes and other entities PARAMS = { "action": "wbcreateclaim", "format": "json", @@ -99,7 +132,7 @@ def __setClaim__(self, idProp: str, claim_value): "snaktype": "value", "bot": "1", "property": idProp, - "value": claim_value, + "value": claim_value_json, "token": "__AUTO__", } diff --git a/LexData/utils.py b/LexData/utils.py index a5dc28b..81649d0 100644 --- a/LexData/utils.py +++ b/LexData/utils.py @@ -39,6 +39,13 @@ def buildDataValue(datatype: str, value): raise TypeError( f"Can not convert type {type(value)} to datatype {datatype}" ) + elif datatype == "external-id": + if type(value) == str: + return {"value": value, "type": "external-id"} + else: + raise TypeError( + f"Can not convert type {type(value)} to datatype {datatype}" + ) elif datatype in [ "string", "tabular-data", From 404de1e856a9565cb015bc4a512363242f7706ad Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Thu, 19 Jun 2025 06:41:08 +0300 Subject: [PATCH 2/3] external-id support documentation --- README.md | 8 ++++++++ example.py | 7 ++++++- 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 67bc439..5810290 100644 --- a/README.md +++ b/README.md @@ -8,6 +8,14 @@ power of the access to the internals. LexData is still in beta phase and there fore some features are missing and functions might be renamed in future. +## Features + +- Create and manage Wikidata Lexemes +- Add forms and senses to lexemes +- Add claims to lexemes, forms, and senses (including external-id properties) +- Search and find existing lexemes +- Support for various Wikidata data types (entities, strings, external-ids, etc.) + The code of AitalvivemBot was used as a starting point, but probably theres not a single line of code that wasn't rewritten. diff --git a/example.py b/example.py index ff6014d..fe71308 100644 --- a/example.py +++ b/example.py @@ -29,7 +29,7 @@ if len(L2.forms) == 0: L2.createForm("firsts", ["Q146786"]) -# …or senses, with or without additional claims +# …or senses, with or without additional claims… if len(L2.senses) == 0: L2.createSense( { @@ -38,3 +38,8 @@ }, claims={"P5137": ["Q19269277"]}, ) + +# …and add external-id claim to lexeme +if len(L2.claims.get("P12682", [])) == 0: + external_id_claim = LexData.Claim(propertyId="P12682", value="example_50bcf7bc0a0ae2bab9011b09139f6f8a") + L2.addClaims([external_id_claim]) \ No newline at end of file From 4b765c7f427c2c293bd28c034b1850ad3b416296 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Thu, 19 Jun 2025 21:04:33 +0300 Subject: [PATCH 3/3] changed external-id example to positional --- example.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/example.py b/example.py index fe71308..cd42d96 100644 --- a/example.py +++ b/example.py @@ -41,5 +41,5 @@ # …and add external-id claim to lexeme if len(L2.claims.get("P12682", [])) == 0: - external_id_claim = LexData.Claim(propertyId="P12682", value="example_50bcf7bc0a0ae2bab9011b09139f6f8a") + external_id_claim = LexData.Claim("P12682", "example_50bcf7bc0a0ae2bab9011b09139f6f8a") L2.addClaims([external_id_claim]) \ No newline at end of file