Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions DHIS2/python_skeleton/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
.env
.venv/
__pycache__/
input/
output/
55 changes: 55 additions & 0 deletions DHIS2/python_skeleton/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
# DHIS2 Python Skeleton

Small set of DHIS2 utilities (“use cases”):
- Generate SQL for missing attribute values (`create-missing-values`).
- Update data element UIDs and codes inside a blueprint XLSX (`update-blueprint-dataelements`).

## Installation (recommended: virtualenv)
```bash
python3 -m venv .venv
source .venv/bin/activate # Linux / macOS
# .venv\Scripts\activate # Windows

pip install openpyxl
pip install requests
```

## Quick start
Run commands from the project root.

### Update data element UIDs in a blueprint
Reads an XLSX from `input/` and writes the result to `output/` (adds a timestamp if the file already exists).
```bash
python3 main_skeleton.py \
--use-case update-blueprint-dataelements \
--base-url https://server \
--jsessionid token \
--xlsx-file Blueprint_HFW.xlsx \
--output-xlsx-file blueprint_apvd.xlsx
```
Key parameters:
- `--sheets`: sheet names to process (default `Module 1 - APVD` and `Module 2 - APVD`).
- `--name-col`: column with the data element name (index, letter, or header; default `3`).
- `--uid-col`: column to write the UID (default `UID`; accepts index, letter, or header).
- `--code-col`: column to write the code (default `Code`; accepts index, letter, or header).
- `--data-start-row`: start row when no headers are present (default `2`; auto-adjusts if headers are detected).

What it does:
- Ensures names end with `-APVD`.
- Searches DHIS2 with `name:like`; if one exact match is found, writes UID (and code when present).
- If no exact match, shows candidates and prompts for UID manually in the console.

### Create SQL for missing attributes
Generates INSERT statements for missing tracked-entity attributes (reads CSV from `input/`, writes SQL to `output/`):
```bash
python3 main_skeleton.py \
--use-case create-missing-values \
--base-url https://server \
--jsessionid token \
--input-file teis_without_storedby.csv \
--output-file insert_attr_fullname.sql
```

## Notes
- Credentials can also come from `.env` (`BASE_URL` and `JSESSIONID`); the script will ask for confirmation before using them.
- For more options, run `python3 main_skeleton.py --help`.
253 changes: 253 additions & 0 deletions DHIS2/python_skeleton/create_missing_values_use_case.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,253 @@
# create_missing_values_use_case.py

from typing import Optional, Tuple

import requests

from dhis_utils import dhis_get
from file_utils import read_csv, escape_sql_literal, write_text


# Numeric ID of the attribute in trackedentityattributevalue table
TRACKED_ENTITY_ATTRIBUTE_ID = 11364749
# Optional: UID of the attribute, for documentation/reference
TRACKED_ENTITY_ATTRIBUTE_UID = "Nf2VUgxqhmi"


class CreateMissingValuesUseCase:
"""
Use case to generate INSERT statements for missing attribute values
based on existing TEI metadata (created, lastUpdated, storedBy).
"""

def __init__(
self,
base_url: str,
jsessionid: str,
input_path: str,
output_path: str,
):
"""
Args:
base_url: DHIS2 base URL (without trailing slash).
jsessionid: JSESSIONID cookie value.
input_path: CSV file name (relative to 'input/' folder).
output_path: Output SQL file name (relative to 'output/' folder).
"""
self.base_url = base_url
self.jsessionid = jsessionid
self.input_path = input_path
self.output_path = output_path

@staticmethod
def normalize_timestamp(raw_timestamp: Optional[str]) -> Optional[str]:
"""
Normalize a DHIS2 timestamp into a format that Postgres accepts.
Example:
'2025-07-18T13:48:12.502' -> '2025-07-18 13:48:12.502'
"""
if not raw_timestamp:
return None

timestamp = raw_timestamp.rstrip("Z")
timestamp = timestamp.replace("T", " ")
return timestamp

@classmethod
def _get_attribute_template_from_tei_level(
cls,
tei_data: dict,
) -> Tuple[Optional[str], Optional[str], Optional[str]]:
"""
Try to obtain (created, lastUpdated, storedBy) from the first
attribute at TEI level.
"""
tei_level_attributes = tei_data.get("attributes") or []
if not tei_level_attributes:
return None, None, None

first_attribute = tei_level_attributes[0]
created = cls.normalize_timestamp(first_attribute.get("created"))
last_updated = cls.normalize_timestamp(first_attribute.get("lastUpdated"))
stored_by = first_attribute.get("storedBy")

return created, last_updated, stored_by

@classmethod
def _get_attribute_template_from_first_enrollment(
cls,
tei_data: dict,
) -> Tuple[Optional[str], Optional[str], Optional[str]]:
"""
Fallback: try to obtain (created, lastUpdated, storedBy) from the
first attribute of the first enrollment.
"""
enrollments = tei_data.get("enrollments") or []
if not enrollments:
return None, None, None

first_enrollment = enrollments[0]
enrollment_attributes = first_enrollment.get("attributes") or []
if not enrollment_attributes:
return None, None, None

first_enrollment_attribute = enrollment_attributes[0]
created = cls.normalize_timestamp(first_enrollment_attribute.get("created"))
last_updated = cls.normalize_timestamp(first_enrollment_attribute.get("lastUpdated"))
stored_by = first_enrollment_attribute.get("storedBy")

return created, last_updated, stored_by

def _get_attribute_template(
self,
tei_uid: str,
) -> Tuple[Optional[str], Optional[str], Optional[str]]:
"""
Get (created, lastUpdated, storedBy) to reuse as a template in the
new attribute value.

Strategy:
1) Try TEI-level attributes.
2) If none, try the first enrollment's attributes.
3) If nothing found, return (None, None, None).
"""
tei_data = dhis_get(
path=f"/api/trackedEntityInstances/{tei_uid}",
base_url=self.base_url,
jsessionid=self.jsessionid,
params={"fields": "*"},
)

created, last_updated, stored_by = self._get_attribute_template_from_tei_level(tei_data)
if created and last_updated and stored_by:
return created, last_updated, stored_by

return self._get_attribute_template_from_first_enrollment(tei_data)

@staticmethod
def _build_insert_statement(
tracked_entity_id: str,
created_timestamp: str,
last_updated_timestamp: str,
full_name: str,
stored_by: str,
) -> str:
"""
Build an INSERT statement for trackedentityattributevalue.
"""
full_name_sql = escape_sql_literal(full_name)
stored_by_sql = escape_sql_literal(stored_by)

return f"""
INSERT INTO trackedentityattributevalue (
trackedentityid,
trackedentityattributeid,
created,
lastupdated,
value,
storedby
)
VALUES (
{tracked_entity_id},
{TRACKED_ENTITY_ATTRIBUTE_ID},
'{created_timestamp}'::timestamp,
'{last_updated_timestamp}'::timestamp,
'{full_name_sql}',
'{stored_by_sql}'
);
""".strip()

def execute(self):
"""
Use case entry point.

It expects a CSV with at least:
- trackedentityid
- tei_uid
- full_name

For each row:
- Obtain a template for timestamps (created, lastUpdated, storedBy)
based on existing attributes of the TEI.
- Generate INSERT statements in trackedentityattributevalue to store full_name.
"""
inserts: list[str] = []
skipped: list[tuple[str, str]] = []

rows = read_csv(self.input_path)
print(f"Read {len(rows)} rows from input/{self.input_path}")

for row in rows:
tracked_entity_id = (row.get("trackedentityid") or "").strip()
tei_uid = (row.get("tei_uid") or "").strip()
firstname = (row.get("firstname") or "").strip()
surname = (row.get("surname") or "").strip()

# Construimos el full_name como:
# firstname + " " + surname
if firstname and surname:
full_name = f"{firstname} {surname}"
elif firstname:
full_name = firstname
elif surname:
full_name = surname
else:
full_name = ""

if not tracked_entity_id or not tei_uid or not full_name:
print(f"[SKIP] Missing required data in CSV row: {row}")
skipped.append((tei_uid, "incomplete_csv_data"))
continue

print(f"Processing TEI {tei_uid} (trackedentityid={tracked_entity_id})...")

try:
created_timestamp, last_updated_timestamp, stored_by = self._get_attribute_template(
tei_uid=tei_uid,
)
except requests.HTTPError as http_error:
status_code = (
http_error.response.status_code
if http_error.response is not None
else "?"
)
print(f"[ERROR] TEI {tei_uid}: HTTP {status_code}")
skipped.append((tei_uid, f"http_{status_code}"))
continue
except Exception as unexpected_error:
print(f"[ERROR] TEI {tei_uid}: {unexpected_error}")
skipped.append((tei_uid, "unexpected_error"))
continue

if not created_timestamp or not last_updated_timestamp or not stored_by:
print(
f"[WARN] TEI {tei_uid}: no template "
"(created/lastUpdated/storedBy), skipping"
)
skipped.append((tei_uid, "no_attribute_template"))
continue

insert_sql = self._build_insert_statement(
tracked_entity_id=tracked_entity_id,
created_timestamp=created_timestamp,
last_updated_timestamp=last_updated_timestamp,
full_name=full_name,
stored_by=stored_by,
)

inserts.append(insert_sql)

print(insert_sql)

if not inserts:
print("No INSERT statements generated. Check CSV / connection.")
return

sql_script = "BEGIN;\n\n" + "\n\n".join(inserts) + "\n\nCOMMIT;\n"
final_path = write_text(self.output_path, sql_script)

print(f"\nSQL written to: {final_path}")
if skipped:
print("\nSkipped TEIs:")
for tei_uid, reason in skipped:
print(f" - {tei_uid}: {reason}")
75 changes: 75 additions & 0 deletions DHIS2/python_skeleton/dhis_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
# dhis_utils.py

import sys
import requests


def dhis_get(
path: str,
base_url: str,
jsessionid: str,
params: dict | None = None,
timeout: int = 30,
) -> dict:
"""
Perform a GET request to a DHIS2 instance using a JSESSIONID cookie.

Args:
path: API path, e.g. "/api/system/info".
base_url: Base URL of the DHIS2 instance, e.g. "https://my-dhis2".
jsessionid: Value of the JSESSIONID cookie.
params: Optional query parameters.
timeout: Request timeout in seconds.

Returns:
Parsed JSON response as a Python dict.

Raises:
requests.HTTPError if the response status is not 2xx.
"""
url = f"{base_url.rstrip('/')}/{path.lstrip('/')}"
cookies = {"JSESSIONID": jsessionid}
response = requests.get(url, params=params or {}, cookies=cookies, timeout=timeout)
response.raise_for_status()
return response.json()


def test_connection(
base_url: str,
jsessionid: str,
timeout: int = 10,
) -> dict:
"""
Test connection against /api/system/info.
If it fails, print an error and abort the script.

Returns:
system/info JSON dict if everything is OK.

Exits:
Calls sys.exit(1) on any error.
"""
try:
system_info = dhis_get(
path="/api/system/info",
base_url=base_url,
jsessionid=jsessionid,
params=None,
timeout=timeout,
)
except requests.HTTPError as http_error:
status = http_error.response.status_code if http_error.response is not None else "?"
print(f"[FATAL] HTTP error {status} while calling /api/system/info at {base_url}")
sys.exit(1)
except requests.RequestException as req_error:
print(f"[FATAL] Could not connect to {base_url} (/api/system/info): {req_error}")
sys.exit(1)
except Exception as unexpected:
print(f"[FATAL] Unexpected error while testing connection to {base_url}: {unexpected}")
sys.exit(1)

print(
f"[OK] Connected to DHIS2 at {base_url} "
f"(version={system_info.get('version', 'unknown')})"
)
return system_info
Loading