diff --git a/.github/workflows/base-lambdas-reusable-deploy-all.yml b/.github/workflows/base-lambdas-reusable-deploy-all.yml index d92a3893a..e5bd7798d 100644 --- a/.github/workflows/base-lambdas-reusable-deploy-all.yml +++ b/.github/workflows/base-lambdas-reusable-deploy-all.yml @@ -796,3 +796,17 @@ jobs: lambda_layer_names: "core_lambda_layer" secrets: AWS_ASSUME_ROLE: ${{ secrets.AWS_ASSUME_ROLE }} + + deploy_report_orchestration_lambda: + name: Deploy Search Document Review + uses: ./.github/workflows/base-lambdas-reusable-deploy.yml + with: + environment: ${{ inputs.environment }} + python_version: ${{ inputs.python_version }} + build_branch: ${{ inputs.build_branch }} + sandbox: ${{ inputs.sandbox }} + lambda_handler_name: report_orchestration_handler + lambda_aws_name: reportOrchestration + lambda_layer_names: "core_lambda_layer,reports_lambda_layer" + secrets: + AWS_ASSUME_ROLE: ${{ secrets.AWS_ASSUME_ROLE }} diff --git a/lambdas/handlers/report_orchestration_handler.py b/lambdas/handlers/report_orchestration_handler.py new file mode 100644 index 000000000..84f6372ab --- /dev/null +++ b/lambdas/handlers/report_orchestration_handler.py @@ -0,0 +1,56 @@ +import os +import tempfile +from datetime import datetime, timedelta, timezone + +from repositories.reporting.reporting_dynamo_repository import ReportingDynamoRepository +from services.reporting.excel_report_generator_service import ExcelReportGenerator +from services.reporting.report_orchestration_service import ReportOrchestrationService +from utils.audit_logging_setup import LoggingService +from utils.decorators.ensure_env_var import ensure_environment_variables +from utils.decorators.handle_lambda_exceptions import handle_lambda_exceptions +from utils.decorators.override_error_check import override_error_check +from utils.decorators.set_audit_arg import set_request_context_for_logging + +logger = LoggingService(__name__) + + +def calculate_reporting_window(): + now = datetime.now(timezone.utc) + today_7am = now.replace(hour=7, minute=0, second=0, microsecond=0) + + if now < today_7am: + today_7am -= timedelta(days=1) + + yesterday_7am = today_7am - timedelta(days=1) + + return ( + int(yesterday_7am.timestamp()), + int(today_7am.timestamp()), + ) + +@ensure_environment_variables( + names=["BULK_UPLOAD_REPORT_TABLE_NAME"] +) +@override_error_check +@handle_lambda_exceptions +@set_request_context_for_logging +def lambda_handler(event, context): + logger.info("Report orchestration lambda invoked") + table_name = os.getenv("BULK_UPLOAD_REPORT_TABLE_NAME") + + repository = ReportingDynamoRepository(table_name) + excel_generator = ExcelReportGenerator() + + service = ReportOrchestrationService( + repository=repository, + excel_generator=excel_generator, + ) + + window_start, window_end = calculate_reporting_window() + tmp_dir = tempfile.mkdtemp() + + service.process_reporting_window( + window_start_ts=window_start, + window_end_ts=window_end, + output_dir=tmp_dir, + ) diff --git a/lambdas/repositories/reporting/reporting_dynamo_repository.py b/lambdas/repositories/reporting/reporting_dynamo_repository.py new file mode 100644 index 000000000..7a694edf6 --- /dev/null +++ b/lambdas/repositories/reporting/reporting_dynamo_repository.py @@ -0,0 +1,35 @@ +from typing import Dict, List + +from boto3.dynamodb.conditions import Attr +from services.base.dynamo_service import DynamoDBService +from utils.audit_logging_setup import LoggingService + +logger = LoggingService(__name__) + + +class ReportingDynamoRepository: + def __init__(self, table_name: str): + self.table_name = table_name + self.dynamo_service = DynamoDBService() + + def get_records_for_time_window( + self, + start_timestamp: int, + end_timestamp: int, + ) -> List[Dict]: + logger.info( + f"Querying reporting table for window, " + f"table_name: {self.table_name}, " + f"start_timestamp: {start_timestamp}, " + f"end_timestamp: {end_timestamp}", + ) + + filter_expression = Attr("Timestamp").between( + start_timestamp, + end_timestamp, + ) + + return self.dynamo_service.scan_whole_table( + table_name=self.table_name, + filter_expression=filter_expression, + ) diff --git a/lambdas/services/reporting/excel_report_generator_service.py b/lambdas/services/reporting/excel_report_generator_service.py new file mode 100644 index 000000000..b8bef9a5c --- /dev/null +++ b/lambdas/services/reporting/excel_report_generator_service.py @@ -0,0 +1,58 @@ +from datetime import datetime + +from openpyxl.workbook import Workbook +from utils.audit_logging_setup import LoggingService + +logger = LoggingService(__name__) + + +class ExcelReportGenerator: + def create_report_orchestration_xlsx( + self, + ods_code: str, + records: list[dict], + output_path: str, + ) -> str: + logger.info( + f"Creating Excel report for ODS code {ods_code} and records {len(records)}" + ) + wb = Workbook() + ws = wb.active + ws.title = "Daily Upload Report" + + # Report metadata + ws.append([f"ODS Code: {ods_code}"]) + ws.append([f"Generated at (UTC): {datetime.utcnow().isoformat()}"]) + ws.append([]) + + # Header row + ws.append( + [ + "ID", + "Date", + "NHS Number", + "Uploader ODS", + "PDS ODS", + "Upload Status", + "Reason", + "File Path", + ] + ) + + for record in records: + ws.append( + [ + record.get("ID"), + record.get("Date"), + record.get("NhsNumber"), + record.get("UploaderOdsCode"), + record.get("PdsOdsCode"), + record.get("UploadStatus"), + record.get("Reason"), + record.get("FilePath"), + ] + ) + + wb.save(output_path) + logger.info(f"Excel report written successfully for for ods code {ods_code}") + return output_path diff --git a/lambdas/services/reporting/report_orchestration_service.py b/lambdas/services/reporting/report_orchestration_service.py new file mode 100644 index 000000000..a9500735b --- /dev/null +++ b/lambdas/services/reporting/report_orchestration_service.py @@ -0,0 +1,62 @@ +import tempfile +from collections import defaultdict + +from utils.audit_logging_setup import LoggingService + +logger = LoggingService(__name__) + + +class ReportOrchestrationService: + def __init__( + self, + repository, + excel_generator, + ): + self.repository = repository + self.excel_generator = excel_generator + + def process_reporting_window( + self, + window_start_ts: int, + window_end_ts: int, + output_dir: str, + ): + records = self.repository.get_records_for_time_window( + window_start_ts, + window_end_ts, + ) + if not records: + logger.info("No records found for reporting window") + return + + records_by_ods = self.group_records_by_ods(records) + + for ods_code, ods_records in records_by_ods.items(): + logger.info( + f"Generating report for ODS ods_code = {ods_code} record_count = {len(ods_records)}" + ) + self.generate_ods_report(ods_code, ods_records) + logger.info("Report orchestration completed") + + @staticmethod + def group_records_by_ods(records: list[dict]) -> dict[str, list[dict]]: + grouped = defaultdict(list) + for record in records: + ods_code = record.get("UploaderOdsCode") or "UNKNOWN" + grouped[ods_code].append(record) + return grouped + + def generate_ods_report( + self, + ods_code: str, + records: list[dict], + ): + with tempfile.NamedTemporaryFile( + suffix=f"_{ods_code}.xlsx", + delete=False, + ) as tmp: + self.excel_generator.create_report_orchestration_xlsx( + ods_code=ods_code, + records=records, + output_path=tmp.name, + ) diff --git a/lambdas/tests/unit/handlers/test_report_orchestration_handler.py b/lambdas/tests/unit/handlers/test_report_orchestration_handler.py new file mode 100644 index 000000000..c9ee2ff84 --- /dev/null +++ b/lambdas/tests/unit/handlers/test_report_orchestration_handler.py @@ -0,0 +1,74 @@ +from unittest import mock +from unittest.mock import MagicMock +import pytest +from handlers.report_orchestration_handler import lambda_handler + + +class FakeContext: + aws_request_id = "test-request-id" + + +@pytest.fixture(autouse=True) +def mock_env(monkeypatch): + monkeypatch.setenv("BULK_UPLOAD_REPORT_TABLE_NAME", "TestTable") + + +@pytest.fixture +def mock_logger(mocker): + return mocker.patch("handlers.report_orchestration_handler.logger", new=MagicMock()) + + +@pytest.fixture +def mock_repo(mocker): + return mocker.patch( + "handlers.report_orchestration_handler.ReportingDynamoRepository", + autospec=True, + ) + + +@pytest.fixture +def mock_excel_generator(mocker): + return mocker.patch( + "handlers.report_orchestration_handler.ExcelReportGenerator", + autospec=True, + ) + + +@pytest.fixture +def mock_service(mocker): + return mocker.patch( + "handlers.report_orchestration_handler.ReportOrchestrationService", + autospec=True, + ) + + +@pytest.fixture +def mock_window(mocker): + return mocker.patch( + "handlers.report_orchestration_handler.calculate_reporting_window", + return_value=(100, 200), + ) + + +def test_lambda_handler_calls_service( + mock_logger, mock_repo, mock_excel_generator, mock_service, mock_window +): + lambda_handler(event={}, context=FakeContext()) + + mock_repo.assert_called_once_with("TestTable") + mock_excel_generator.assert_called_once_with() + + mock_service.assert_called_once() + instance = mock_service.return_value + instance.process_reporting_window.assert_called_once_with( + window_start_ts=100, + window_end_ts=200, + output_dir=mock.ANY, + ) + + mock_logger.info.assert_any_call("Report orchestration lambda invoked") + + +def test_lambda_handler_calls_window_function(mock_service, mock_window): + lambda_handler(event={}, context=FakeContext()) + mock_window.assert_called_once() diff --git a/lambdas/tests/unit/repositories/reporting/test_reporting_dynamo_repository.py b/lambdas/tests/unit/repositories/reporting/test_reporting_dynamo_repository.py new file mode 100644 index 000000000..df3c55304 --- /dev/null +++ b/lambdas/tests/unit/repositories/reporting/test_reporting_dynamo_repository.py @@ -0,0 +1,41 @@ +from unittest.mock import MagicMock + +import pytest +from repositories.reporting.reporting_dynamo_repository import ReportingDynamoRepository + + +@pytest.fixture +def mock_dynamo_service(mocker): + mock_service = mocker.patch( + "repositories.reporting.reporting_dynamo_repository.DynamoDBService" + ) + instance = mock_service.return_value + instance.scan_whole_table = MagicMock() + return instance + + +@pytest.fixture +def reporting_repo(mock_dynamo_service): + return ReportingDynamoRepository(table_name="TestTable") + + +def test_get_records_for_time_window_calls_scan(mock_dynamo_service, reporting_repo): + mock_dynamo_service.scan_whole_table.return_value = [] + + reporting_repo.get_records_for_time_window(100, 200) + + mock_dynamo_service.scan_whole_table.assert_called_once() + assert "filter_expression" in mock_dynamo_service.scan_whole_table.call_args.kwargs + + +def test_get_records_for_time_window_returns_empty_list( + mock_dynamo_service, reporting_repo +): + start_ts = 0 + end_ts = 50 + mock_dynamo_service.scan_whole_table.return_value = [] + + result = reporting_repo.get_records_for_time_window(start_ts, end_ts) + + assert result == [] + mock_dynamo_service.scan_whole_table.assert_called_once() diff --git a/lambdas/tests/unit/services/reporting/test_excel_report_generator_service.py b/lambdas/tests/unit/services/reporting/test_excel_report_generator_service.py new file mode 100644 index 000000000..f16257c0f --- /dev/null +++ b/lambdas/tests/unit/services/reporting/test_excel_report_generator_service.py @@ -0,0 +1,154 @@ + +import pytest +from freezegun import freeze_time +from openpyxl import load_workbook +from services.reporting.excel_report_generator_service import ExcelReportGenerator + + +@pytest.fixture +def excel_report_generator(): + return ExcelReportGenerator() + + +@freeze_time("2025-01-01T12:00:00") +def test_create_report_orchestration_xlsx_happy_path( + excel_report_generator, + tmp_path, +): + output_file = tmp_path / "report.xlsx" + + ods_code = "Y12345" + records = [ + { + "ID": 1, + "Date": "2025-01-01", + "NhsNumber": "1234567890", + "UploaderOdsCode": "Y12345", + "PdsOdsCode": "A99999", + "UploadStatus": "SUCCESS", + "Reason": "", + "FilePath": "/path/file1.pdf", + }, + { + "ID": 2, + "Date": "2025-01-02", + "NhsNumber": "123456789", + "UploaderOdsCode": "Y12345", + "PdsOdsCode": "B88888", + "UploadStatus": "FAILED", + "Reason": "Invalid NHS number", + "FilePath": "/path/file2.pdf", + }, + ] + + result = excel_report_generator.create_report_orchestration_xlsx( + ods_code=ods_code, + records=records, + output_path=str(output_file), + ) + + # File path returned + assert result == str(output_file) + assert output_file.exists() + + wb = load_workbook(output_file) + ws = wb.active + + # Sheet name + assert ws.title == "Daily Upload Report" + + # Metadata rows + assert ws["A1"].value == f"ODS Code: {ods_code}" + assert ws["A2"].value == "Generated at (UTC): 2025-01-01T12:00:00" + assert ws["A3"].value is None # blank row + + # Header row + assert [cell.value for cell in ws[4]] == [ + "ID", + "Date", + "NHS Number", + "Uploader ODS", + "PDS ODS", + "Upload Status", + "Reason", + "File Path", + ] + + # First data row + assert [cell.value for cell in ws[5]] == [ + 1, + "2025-01-01", + "1234567890", + "Y12345", + "A99999", + "SUCCESS", + None, + "/path/file1.pdf", + ] + + # Second data row + assert [cell.value for cell in ws[6]] == [ + 2, + "2025-01-02", + "123456789", + "Y12345", + "B88888", + "FAILED", + "Invalid NHS number", + "/path/file2.pdf", + ] + + +def test_create_report_orchestration_xlsx_with_no_records( + excel_report_generator, + tmp_path, +): + output_file = tmp_path / "empty_report.xlsx" + + excel_report_generator.create_report_orchestration_xlsx( + ods_code="Y12345", + records=[], + output_path=str(output_file), + ) + + wb = load_workbook(output_file) + ws = wb.active + + # Only metadata + header rows should exist + assert ws.max_row == 4 + + +def test_create_report_orchestration_xlsx_handles_missing_fields( + excel_report_generator, + tmp_path, +): + output_file = tmp_path / "partial.xlsx" + + records = [ + { + "ID": 1, + "NhsNumber": "1234567890", + } + ] + + excel_report_generator.create_report_orchestration_xlsx( + ods_code="Y12345", + records=records, + output_path=str(output_file), + ) + + wb = load_workbook(output_file) + ws = wb.active + + row = [cell.value for cell in ws[5]] + + assert row == [ + 1, + None, + "1234567890", + None, + None, + None, + None, + None, + ] diff --git a/lambdas/tests/unit/services/reporting/test_report_orchestration_service.py b/lambdas/tests/unit/services/reporting/test_report_orchestration_service.py new file mode 100644 index 000000000..c068ba0c1 --- /dev/null +++ b/lambdas/tests/unit/services/reporting/test_report_orchestration_service.py @@ -0,0 +1,107 @@ +import pytest +from services.reporting.report_orchestration_service import ReportOrchestrationService + + +@pytest.fixture +def mock_repository(mocker): + repo = mocker.Mock() + repo.get_records_for_time_window.return_value = [] + return repo + + +@pytest.fixture +def mock_excel_generator(mocker): + return mocker.Mock() + + +@pytest.fixture +def report_orchestration_service(mock_repository, mock_excel_generator): + return ReportOrchestrationService( + repository=mock_repository, + excel_generator=mock_excel_generator, + ) + + +def test_process_reporting_window_no_records( + report_orchestration_service, mock_repository, mock_excel_generator +): + mock_repository.get_records_for_time_window.return_value = [] + + report_orchestration_service.process_reporting_window(100, 200, output_dir="/tmp") + + mock_excel_generator.create_report_orchestration_xlsx.assert_not_called() + + +def test_group_records_by_ods_groups_correctly(): + records = [ + {"UploaderOdsCode": "Y12345", "ID": 1}, + {"UploaderOdsCode": "Y12345", "ID": 2}, + {"UploaderOdsCode": "A99999", "ID": 3}, + {"ID": 4}, # missing ODS + {"UploaderOdsCode": None, "ID": 5}, # null ODS + ] + + result = ReportOrchestrationService.group_records_by_ods(records) + + assert result["Y12345"] == [ + {"UploaderOdsCode": "Y12345", "ID": 1}, + {"UploaderOdsCode": "Y12345", "ID": 2}, + ] + assert result["A99999"] == [{"UploaderOdsCode": "A99999", "ID": 3}] + assert result["UNKNOWN"] == [ + {"ID": 4}, + {"UploaderOdsCode": None, "ID": 5}, + ] + + +def test_process_reporting_window_generates_reports_per_ods( + report_orchestration_service, mock_repository, mocker +): + records = [ + {"UploaderOdsCode": "Y12345", "ID": 1}, + {"UploaderOdsCode": "Y12345", "ID": 2}, + {"UploaderOdsCode": "A99999", "ID": 3}, + ] + mock_repository.get_records_for_time_window.return_value = records + + mocked_generate = mocker.patch.object( + report_orchestration_service, "generate_ods_report" + ) + + report_orchestration_service.process_reporting_window(100, 200, output_dir="/tmp") + + mocked_generate.assert_any_call( + "Y12345", + [ + {"UploaderOdsCode": "Y12345", "ID": 1}, + {"UploaderOdsCode": "Y12345", "ID": 2}, + ], + ) + mocked_generate.assert_any_call( + "A99999", + [{"UploaderOdsCode": "A99999", "ID": 3}], + ) + assert mocked_generate.call_count == 2 + + +def test_generate_ods_report_creates_excel_report( + report_orchestration_service, mock_excel_generator, mocker +): + fake_tmp = mocker.MagicMock() + fake_tmp.__enter__.return_value = fake_tmp + fake_tmp.name = "/tmp/fake_Y12345.xlsx" + + mocker.patch( + "services.reporting.report_orchestration_service.tempfile.NamedTemporaryFile", + return_value=fake_tmp, + ) + + records = [{"ID": 1, "UploaderOdsCode": "Y12345"}] + + report_orchestration_service.generate_ods_report("Y12345", records) + + mock_excel_generator.create_report_orchestration_xlsx.assert_called_once_with( + ods_code="Y12345", + records=records, + output_path=fake_tmp.name, + )