From eb518b44af1654d02b6c52e05222069e39ccc215 Mon Sep 17 00:00:00 2001 From: Sara Sjunnebo Date: Mon, 15 Dec 2025 10:58:21 +0100 Subject: [PATCH 01/23] Add test for update_statusdb --- dataflow_transfer/tests/test_run_classes.py | 63 +++++++++++++++++++-- 1 file changed, 59 insertions(+), 4 deletions(-) diff --git a/dataflow_transfer/tests/test_run_classes.py b/dataflow_transfer/tests/test_run_classes.py index 3764333..f770841 100644 --- a/dataflow_transfer/tests/test_run_classes.py +++ b/dataflow_transfer/tests/test_run_classes.py @@ -129,10 +129,6 @@ def test_generate_rsync_command(run_fixture, final_sync, request): assert f"; echo $? > {run_obj.final_rsync_exitcode_file}" in rsync_command -def test_initiate_background_transfer(): - pass # Further tests can be implemented for initiate_background_transfer - - def test_do_final_transfer(): pass # Further tests can be implemented for do_final_transfer @@ -185,3 +181,62 @@ def get_events(self, run_id): run_obj.db = MockDB() assert run_obj.has_status(status_to_check) == expected_result + + +@pytest.mark.parametrize( + "run_fixture, existing_statuses, status_to_update", + [ + ( + "nextseq_testobj", + [], + "sequencing_started", + ), + ( + "nextseq_testobj", + [{"event_type": "sequencing_started"}], + "transfer_started", + ), + ( + "miseqseq_testobj", + [], + "sequencing_started", + ), + ( + "miseqseq_testobj", + [{"event_type": "sequencing_started"}], + "transfer_started", + ), + ], +) +def test_update_statusdb( + run_fixture, + existing_statuses, + status_to_update, + request, +): + run_obj = request.getfixturevalue(run_fixture) + + class MockDB: + def __init__(self): + self.updated_doc = None + + def get_db_doc(self, ddoc, view, run_id): + return {"events": existing_statuses, "files": {}} + + def update_db_doc(self, doc): + self.updated_doc = doc + + import dataflow_transfer.utils.filesystem as fs + + def mock_locate_metadata(metadata_list, run_dir): + return [] + + def mock_parse_metadata_files(files): + return {} + + fs.locate_metadata = mock_locate_metadata + fs.parse_metadata_files = mock_parse_metadata_files + mock_db = MockDB() + run_obj.db = mock_db + run_obj.update_statusdb(status=status_to_update) + assert mock_db.updated_doc["events"][-1]["event_type"] == status_to_update From 58040d8a97829a26fee221b17a020a07e82673ce Mon Sep 17 00:00:00 2001 From: Sara Sjunnebo Date: Mon, 15 Dec 2025 11:32:27 +0100 Subject: [PATCH 02/23] add tests for initiate_background_transfer --- dataflow_transfer/tests/test_run_classes.py | 41 +++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/dataflow_transfer/tests/test_run_classes.py b/dataflow_transfer/tests/test_run_classes.py index f770841..e072dff 100644 --- a/dataflow_transfer/tests/test_run_classes.py +++ b/dataflow_transfer/tests/test_run_classes.py @@ -129,6 +129,47 @@ def test_generate_rsync_command(run_fixture, final_sync, request): assert f"; echo $? > {run_obj.final_rsync_exitcode_file}" in rsync_command +# use parameterization for the test fixtures to test initiate_background_transfer. mock fs.rsync_is_running, fs.submit_background_process and update_statusdb +@pytest.mark.parametrize( + "run_fixture, rsync_running", + [ + ("nextseq_testobj", False), + ("nextseq_testobj", True), + ("miseqseq_testobj", False), + ("miseqseq_testobj", True), + ], +) +def test_initiate_background_transfer(run_fixture, rsync_running, request, monkeypatch): + run_obj = request.getfixturevalue(run_fixture) + + def mock_rsync_is_running(src): + return rsync_running + + def mock_submit_background_process(command_str): + mock_submit_background_process.called = True + mock_submit_background_process.command_str = command_str + + def mock_update_statusdb(status, additional_info=None): + mock_update_statusdb.called = True + mock_update_statusdb.status = status + + monkeypatch.setattr(generic_runs.fs, "rsync_is_running", mock_rsync_is_running) + monkeypatch.setattr( + generic_runs.fs, "submit_background_process", mock_submit_background_process + ) + monkeypatch.setattr(run_obj, "update_statusdb", mock_update_statusdb) + + run_obj.initiate_background_transfer() + + if rsync_running: + assert not hasattr(mock_submit_background_process, "called") + else: + assert hasattr(mock_submit_background_process, "called") + assert "rsync" in mock_submit_background_process.command_str + assert hasattr(mock_update_statusdb, "called") + assert mock_update_statusdb.status == "transfer_started" + + def test_do_final_transfer(): pass # Further tests can be implemented for do_final_transfer From 963e5a71a85bb467178d7a4378f38baea92837f2 Mon Sep 17 00:00:00 2001 From: Sara Sjunnebo Date: Mon, 15 Dec 2025 11:42:59 +0100 Subject: [PATCH 03/23] Merge of transfer methods since they overlapped a lot --- dataflow_transfer/dataflow_transfer.py | 4 +- dataflow_transfer/run_classes/generic_runs.py | 44 +++++-------------- dataflow_transfer/tests/test_run_classes.py | 23 ++++++---- 3 files changed, 29 insertions(+), 42 deletions(-) diff --git a/dataflow_transfer/dataflow_transfer.py b/dataflow_transfer/dataflow_transfer.py index e696ff8..d879f5e 100644 --- a/dataflow_transfer/dataflow_transfer.py +++ b/dataflow_transfer/dataflow_transfer.py @@ -30,7 +30,7 @@ def process_run(run_dir, sequencer, config): ## Sequencing ongoing. Start background transfer if not already running. if run.sequencing_ongoing: run.update_statusdb(status="sequencing_started") - run.initiate_background_transfer() + run.start_transfer(final=False) return ## Sequencing finished but transfer not complete. Start final transfer. @@ -41,7 +41,7 @@ def process_run(run_dir, sequencer, config): "Will attempt final transfer again." ) run.update_statusdb(status="sequencing_finished") - run.do_final_transfer() + run.start_transfer(final=True) return ## Final transfer completed successfully. Update statusdb. diff --git a/dataflow_transfer/run_classes/generic_runs.py b/dataflow_transfer/run_classes/generic_runs.py index 5d7959d..6d1812a 100644 --- a/dataflow_transfer/run_classes/generic_runs.py +++ b/dataflow_transfer/run_classes/generic_runs.py @@ -65,53 +65,33 @@ def generate_rsync_command(self, is_final_sync=False): command_str += f"; echo $? > {self.final_rsync_exitcode_file}" return command_str - def initiate_background_transfer(self): + def start_transfer(self, final=False): """Start background rsync transfer to storage.""" - background_transfer_command = self.generate_rsync_command(is_final_sync=False) + transfer_command = self.generate_rsync_command(is_final_sync=final) if fs.rsync_is_running(src=self.run_dir): logger.info( f"Rsync is already running for {self.run_dir}. Skipping background transfer initiation." ) return try: - fs.submit_background_process(background_transfer_command) + fs.submit_background_process(transfer_command) logger.info( - f"{self.run_id}: Started background rsync to {self.miarka_destination}" - + f" with the following command: '{background_transfer_command}'" + f"{self.run_id}: Started rsync to {self.miarka_destination}" + + f" with the following command: '{transfer_command}'" ) except Exception as e: - logger.error(f"Failed to start background transfer for {self.run_id}: {e}") + logger.error(f"Failed to start rsync for {self.run_id}: {e}") raise e rsync_info = { - "command": background_transfer_command, + "command": transfer_command, "destination_path": self.miarka_destination, } - self.update_statusdb(status="transfer_started", additional_info=rsync_info) - - def do_final_transfer(self): - """Start final rsync transfer to storage.""" - final_transfer_command = self.generate_rsync_command(is_final_sync=True) - if fs.rsync_is_running(src=self.run_dir): - logger.info( - f"Rsync is already running for {self.run_dir}. Skipping final transfer initiation." - ) - return - try: - fs.submit_background_process(final_transfer_command) - logger.info( - f"{self.run_id}: Started FINAL rsync to {self.miarka_destination}" - + f" with the following command: '{final_transfer_command}'" + if final: + self.update_statusdb( + status="final_transfer_started", additional_info=rsync_info ) - except Exception as e: - logger.error(f"Failed to start final transfer for {self.run_id}: {e}") - raise e - rsync_info = { - "command": final_transfer_command, - "destination_path": self.miarka_destination, - } - self.update_statusdb( - status="final_transfer_started", additional_info=rsync_info - ) + else: + self.update_statusdb(status="transfer_started", additional_info=rsync_info) @property def final_sync_successful(self): diff --git a/dataflow_transfer/tests/test_run_classes.py b/dataflow_transfer/tests/test_run_classes.py index e072dff..2e60ab1 100644 --- a/dataflow_transfer/tests/test_run_classes.py +++ b/dataflow_transfer/tests/test_run_classes.py @@ -131,15 +131,19 @@ def test_generate_rsync_command(run_fixture, final_sync, request): # use parameterization for the test fixtures to test initiate_background_transfer. mock fs.rsync_is_running, fs.submit_background_process and update_statusdb @pytest.mark.parametrize( - "run_fixture, rsync_running", + "run_fixture, rsync_running, final", [ - ("nextseq_testobj", False), - ("nextseq_testobj", True), - ("miseqseq_testobj", False), - ("miseqseq_testobj", True), + ("nextseq_testobj", False, False), + ("nextseq_testobj", True, False), + ("nextseq_testobj", False, True), + ("nextseq_testobj", True, True), + ("miseqseq_testobj", False, False), + ("miseqseq_testobj", True, False), + ("miseqseq_testobj", False, True), + ("miseqseq_testobj", True, True), ], ) -def test_initiate_background_transfer(run_fixture, rsync_running, request, monkeypatch): +def test_start_transfer(run_fixture, rsync_running, final, request, monkeypatch): run_obj = request.getfixturevalue(run_fixture) def mock_rsync_is_running(src): @@ -159,7 +163,7 @@ def mock_update_statusdb(status, additional_info=None): ) monkeypatch.setattr(run_obj, "update_statusdb", mock_update_statusdb) - run_obj.initiate_background_transfer() + run_obj.start_transfer(final=final) if rsync_running: assert not hasattr(mock_submit_background_process, "called") @@ -167,7 +171,10 @@ def mock_update_statusdb(status, additional_info=None): assert hasattr(mock_submit_background_process, "called") assert "rsync" in mock_submit_background_process.command_str assert hasattr(mock_update_statusdb, "called") - assert mock_update_statusdb.status == "transfer_started" + if final: + assert mock_update_statusdb.status == "final_transfer_started" + else: + assert mock_update_statusdb.status == "transfer_started" def test_do_final_transfer(): From 1e0d7523759eaab7717cad768254fb173fb5039c Mon Sep 17 00:00:00 2001 From: Sara Sjunnebo Date: Mon, 15 Dec 2025 11:44:11 +0100 Subject: [PATCH 04/23] Cleanup --- dataflow_transfer/tests/test_run_classes.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/dataflow_transfer/tests/test_run_classes.py b/dataflow_transfer/tests/test_run_classes.py index 2e60ab1..5177efe 100644 --- a/dataflow_transfer/tests/test_run_classes.py +++ b/dataflow_transfer/tests/test_run_classes.py @@ -57,7 +57,6 @@ def miseqseq_testobj(tmp_path): return illumina_runs.MiSeqRun(str(run_dir), config) -# mock calls to dataflow_transfer.utils.statusdb.StatusdbSession to avoid actual DB connections @pytest.fixture(autouse=True) def mock_statusdbsession(monkeypatch): class MockStatusdbSession: @@ -73,7 +72,6 @@ def update_db_doc(self, doc): monkeypatch.setattr(generic_runs, "StatusdbSession", MockStatusdbSession) -# use parameterization for the test fixtures to test confirm_run_type @pytest.mark.parametrize( "run_fixture, expected_run_type", [ @@ -129,7 +127,6 @@ def test_generate_rsync_command(run_fixture, final_sync, request): assert f"; echo $? > {run_obj.final_rsync_exitcode_file}" in rsync_command -# use parameterization for the test fixtures to test initiate_background_transfer. mock fs.rsync_is_running, fs.submit_background_process and update_statusdb @pytest.mark.parametrize( "run_fixture, rsync_running, final", [ @@ -177,10 +174,6 @@ def mock_update_statusdb(status, additional_info=None): assert mock_update_statusdb.status == "transfer_started" -def test_do_final_transfer(): - pass # Further tests can be implemented for do_final_transfer - - @pytest.mark.parametrize( "run_fixture, sync_successful", [ @@ -203,7 +196,6 @@ def test_final_sync_successful(run_fixture, sync_successful, request): assert run_obj.final_sync_successful == sync_successful -# use fixtures to test Run.has_status for differernt illumina_runs objects @pytest.mark.parametrize( "run_fixture, status_to_check, expected_result", [ From 4dea5248019f3acec456729430b9233a12374abe Mon Sep 17 00:00:00 2001 From: Sara Sjunnebo Date: Mon, 15 Dec 2025 11:50:58 +0100 Subject: [PATCH 05/23] Add fixture and tests for novaseqxplus --- dataflow_transfer/tests/test_run_classes.py | 41 +++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/dataflow_transfer/tests/test_run_classes.py b/dataflow_transfer/tests/test_run_classes.py index 5177efe..5a2fe28 100644 --- a/dataflow_transfer/tests/test_run_classes.py +++ b/dataflow_transfer/tests/test_run_classes.py @@ -3,6 +3,33 @@ from dataflow_transfer.run_classes import illumina_runs, generic_runs +# TODO: add tests for ONT and ELEMENT runs when those are implemented + + +@pytest.fixture +def novaseqxplus_testobj(tmp_path): + config = { + "log": {"file": "test.log"}, + "transfer_details": {"user": "testuser", "host": "testhost"}, + "statusdb": { + "username": "dbuser", + "password": "dbpass", + "url:": "dburl", + "database": "dbname", + }, + "sequencers": { + "NovaSeqXPlus": { + "miarka_destination": "/data/NovaSeqXPlus", + "metadata_for_statusdb": ["RunInfo.xml", "RunParameters.xml"], + "ignore_folders": ["nosync"], + "rsync_options": ["--chmod=Dg+s,g+rw"], + } + }, + } + run_id = "20251010_LH00202_0284_B22CVHTLT1" + run_dir = tmp_path / run_id + run_dir.mkdir() + return illumina_runs.NovaSeqXPlusRun(str(run_dir), config) @pytest.fixture @@ -75,6 +102,7 @@ def update_db_doc(self, doc): @pytest.mark.parametrize( "run_fixture, expected_run_type", [ + ("novaseqxplus_testobj", "NovaSeqXPlus"), ("nextseq_testobj", "NextSeq"), ("miseqseq_testobj", "MiSeq"), ], @@ -92,6 +120,7 @@ def test_confirm_run_type(run_fixture, expected_run_type, request): @pytest.mark.parametrize( "run_fixture", [ + "novaseqxplus_testobj", "nextseq_testobj", "miseqseq_testobj", ], @@ -110,6 +139,8 @@ def test_sequencing_ongoing(run_fixture, request): @pytest.mark.parametrize( "run_fixture, final_sync", [ + ("novaseqxplus_testobj", False), + ("novaseqxplus_testobj", True), ("nextseq_testobj", False), ("nextseq_testobj", True), ("miseqseq_testobj", False), @@ -130,6 +161,10 @@ def test_generate_rsync_command(run_fixture, final_sync, request): @pytest.mark.parametrize( "run_fixture, rsync_running, final", [ + ("novaseqxplus_testobj", False, False), + ("novaseqxplus_testobj", True, False), + ("novaseqxplus_testobj", False, True), + ("novaseqxplus_testobj", True, True), ("nextseq_testobj", False, False), ("nextseq_testobj", True, False), ("nextseq_testobj", False, True), @@ -177,6 +212,8 @@ def mock_update_statusdb(status, additional_info=None): @pytest.mark.parametrize( "run_fixture, sync_successful", [ + ("novaseqxplus_testobj", True), + ("novaseqxplus_testobj", False), ("nextseq_testobj", True), ("nextseq_testobj", False), ("miseqseq_testobj", True), @@ -199,6 +236,10 @@ def test_final_sync_successful(run_fixture, sync_successful, request): @pytest.mark.parametrize( "run_fixture, status_to_check, expected_result", [ + ("novaseqxplus_testobj", "sequencing_started", False), + ("novaseqxplus_testobj", "sequencing_started", True), + ("novaseqxplus_testobj", "sequencing_finished", False), + ("novaseqxplus_testobj", "sequencing_finished", True), ("nextseq_testobj", "sequencing_started", False), ("nextseq_testobj", "sequencing_started", True), ("nextseq_testobj", "sequencing_finished", False), From b54cc0f459cb7b87b73cc5d80ee4589ffe6e09d3 Mon Sep 17 00:00:00 2001 From: Sara Sjunnebo Date: Mon, 15 Dec 2025 13:16:19 +0100 Subject: [PATCH 06/23] update ci tests --- .github/workflows/python-app.yml | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml index 2ed96a5..d835327 100644 --- a/.github/workflows/python-app.yml +++ b/.github/workflows/python-app.yml @@ -26,14 +26,10 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade pip - pip install flake8 pytest + pip install pytest if [ -f requirements.txt ]; then pip install -r requirements.txt; fi - - name: Lint with flake8 - run: | - # stop the build if there are Python syntax errors or undefined names - flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics - # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide - flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics + - name: Install dataflow_transfer + run: pip install -e . - name: Test with pytest run: | pytest From 6542028fd7e624a2183a8821cac83a21f3d28295 Mon Sep 17 00:00:00 2001 From: Sara Sjunnebo Date: Mon, 15 Dec 2025 13:48:28 +0100 Subject: [PATCH 07/23] Add PR version check --- .github/workflows/check-version.yml | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) create mode 100644 .github/workflows/check-version.yml diff --git a/.github/workflows/check-version.yml b/.github/workflows/check-version.yml new file mode 100644 index 0000000..9a5f943 --- /dev/null +++ b/.github/workflows/check-version.yml @@ -0,0 +1,29 @@ +# write a github actions workflow that checks that the version in the pyproject.toml file has been updated on every pull request to master +name: Check version update +on: + pull_request: + branches: [ "master" ] +permissions: + contents: read +jobs: + check-version: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Check version update + run: | + PR_NUMBER=${{ github.event.pull_request.number }} + FILE_CHANGED=$(git diff origin/master...HEAD --name-only | grep 'pyproject.toml' || true) + if [ -z "$FILE_CHANGED" ]; then + echo "pyproject.toml was not changed in this PR. Please update the version." + exit 1 + else + echo "pyproject.toml was changed in this PR." + fi + VERSION_CHANGED=$(git diff origin/master...HEAD pyproject.toml | grep 'version =' || true) + if [ -z "$VERSION_CHANGED" ]; then + echo "Version in pyproject.toml was not updated. Please update the version." + exit 1 + else + echo "Version in pyproject.toml was updated." + fi \ No newline at end of file From 734fbed7a4a86f2cbc53b0f6b30d0db2af97c39e Mon Sep 17 00:00:00 2001 From: Sara Sjunnebo Date: Mon, 15 Dec 2025 13:49:19 +0100 Subject: [PATCH 08/23] Bump version --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 01a528d..b181ba2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dataflow_transfer" -version = "1.0.2" +version = "1.0.3" description = "Script for transferring sequencing data from sequencers to storage" authors = [ { name = "Sara Sjunnebo", email = "sara.sjunnebo@scilifelab.se" }, From e21e7c8cc3722c73166648cbb106afa218fc58f6 Mon Sep 17 00:00:00 2001 From: Sara Sjunnebo Date: Mon, 15 Dec 2025 13:53:09 +0100 Subject: [PATCH 09/23] fixes to version check --- .github/workflows/check-version.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/check-version.yml b/.github/workflows/check-version.yml index 9a5f943..598f1af 100644 --- a/.github/workflows/check-version.yml +++ b/.github/workflows/check-version.yml @@ -13,14 +13,14 @@ jobs: - name: Check version update run: | PR_NUMBER=${{ github.event.pull_request.number }} - FILE_CHANGED=$(git diff origin/master...HEAD --name-only | grep 'pyproject.toml' || true) + FILE_CHANGED=$(git diff --name-only ${{ github.event.pull_request.base.sha }} HEAD | grep 'pyproject.toml' || true) if [ -z "$FILE_CHANGED" ]; then echo "pyproject.toml was not changed in this PR. Please update the version." exit 1 else echo "pyproject.toml was changed in this PR." fi - VERSION_CHANGED=$(git diff origin/master...HEAD pyproject.toml | grep 'version =' || true) + VERSION_CHANGED=$(git diff ${{ github.event.pull_request.base.sha }} HEAD | grep 'version =' || true) if [ -z "$VERSION_CHANGED" ]; then echo "Version in pyproject.toml was not updated. Please update the version." exit 1 From 5efb4494d02120859744f36f856785312cc2529d Mon Sep 17 00:00:00 2001 From: Sara Sjunnebo Date: Mon, 15 Dec 2025 14:02:38 +0100 Subject: [PATCH 10/23] fetch all history --- .github/workflows/check-version.yml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/.github/workflows/check-version.yml b/.github/workflows/check-version.yml index 598f1af..21aee35 100644 --- a/.github/workflows/check-version.yml +++ b/.github/workflows/check-version.yml @@ -1,4 +1,3 @@ -# write a github actions workflow that checks that the version in the pyproject.toml file has been updated on every pull request to master name: Check version update on: pull_request: @@ -9,7 +8,10 @@ jobs: check-version: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 + - name: Checkout PR + uses: actions/checkout@v4 + with: + fetch-depth: 0 # Fetch all history for all branches and tags - name: Check version update run: | PR_NUMBER=${{ github.event.pull_request.number }} From 21ddfe0eb026ac7a5337df3cc76ee614380e2432 Mon Sep 17 00:00:00 2001 From: Sara Sjunnebo Date: Mon, 15 Dec 2025 14:03:35 +0100 Subject: [PATCH 11/23] syntax fix --- .github/workflows/check-version.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/check-version.yml b/.github/workflows/check-version.yml index 21aee35..4a5812a 100644 --- a/.github/workflows/check-version.yml +++ b/.github/workflows/check-version.yml @@ -9,9 +9,9 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout PR - uses: actions/checkout@v4 - with: - fetch-depth: 0 # Fetch all history for all branches and tags + uses: actions/checkout@v4 + with: + fetch-depth: 0 # Fetch all history for all branches and tags - name: Check version update run: | PR_NUMBER=${{ github.event.pull_request.number }} From 2fe03bbd0f8ba9a3a2682392b7550ca29339ae8b Mon Sep 17 00:00:00 2001 From: Sara Sjunnebo Date: Mon, 15 Dec 2025 14:11:50 +0100 Subject: [PATCH 12/23] add linting --- .github/workflows/lint-code.yml | 93 +++++++++++++++++++++++++++++++++ pyproject.toml | 25 +++++++++ 2 files changed, 118 insertions(+) create mode 100644 .github/workflows/lint-code.yml diff --git a/.github/workflows/lint-code.yml b/.github/workflows/lint-code.yml new file mode 100644 index 0000000..a5ecc36 --- /dev/null +++ b/.github/workflows/lint-code.yml @@ -0,0 +1,93 @@ +name: Lint code +on: [push, pull_request] + +jobs: + # Use ruff to check for code style violations + ruff-check: + runs-on: ubuntu-latest + steps: + - name: Checkout repo + uses: actions/checkout@v4 + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: "3.14" + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install ruff + - name: ruff --> Check for style violations + # Configured in pyproject.toml + run: ruff check . + + # Use ruff to check code formatting + ruff-format: + runs-on: ubuntu-latest + steps: + - name: Checkout repo + uses: actions/checkout@v4 + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: "3.14" + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install ruff + - name: ruff --> Check code formatting + run: ruff format --check . + + # Use mypy for static type checking + mypy-check: + runs-on: ubuntu-latest + steps: + - name: Checkout repo + uses: actions/checkout@v4 + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: "3.14" + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install mypy + # Start by installing type stubs + - name: mypy --> Install stubs + run: mypy --install-types --non-interactive . + + # Use pip-check-reqs/pip-missing-reqs to check for missing dependencies + requirements-check: + runs-on: ubuntu-latest + steps: + - name: Checkout repository + uses: actions/checkout@v4 + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: "3.14" + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install pip-check-reqs + + - name: Run pip-check-reqs/pip-missing-reqs + run: | + pip-missing-reqs . + + # Use Prettier to check various file formats + prettier: + runs-on: ubuntu-latest + steps: + - name: Checkout repository + uses: actions/checkout@v4 + - name: Setup node + uses: actions/setup-node@v4 + with: + node-version: "20" + + - name: Install Prettier + run: npm install -g prettier + + - name: Run Prettier --check + run: prettier --check . \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index b181ba2..bb4c58d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,3 +1,28 @@ +[tool.ruff.lint] +select = [ + # Ruff default rules + # ------------------------------ + "E4", # pycodestyle Imports + "E7", # pycodestyle Statements + "E9", # pycodestyle Runtime + "F", # Pyflakes + + # Additional Comment + # ------------------------------------------------------ + "I", # isort Best-practice sorting of imports + "UP", # pyupgrade Make sure syntax is up-to-date +] +ignore = [ + "E402", # Module level import not at top of file + "E722", # Do not use bare 'except' + "E741", # Ambiguous variable name +] + +[tool.mypy] +ignore_missing_imports = true +follow_imports = 'skip' +exclude = "build" + [project] name = "dataflow_transfer" version = "1.0.3" From e5c74db558449d4c6b7916a4dae44d0fc182a632 Mon Sep 17 00:00:00 2001 From: Sara Sjunnebo Date: Mon, 15 Dec 2025 14:14:23 +0100 Subject: [PATCH 13/23] remove mypy --- .github/workflows/lint-code.yml | 18 ------------------ pyproject.toml | 5 ----- 2 files changed, 23 deletions(-) diff --git a/.github/workflows/lint-code.yml b/.github/workflows/lint-code.yml index a5ecc36..0a1e617 100644 --- a/.github/workflows/lint-code.yml +++ b/.github/workflows/lint-code.yml @@ -37,24 +37,6 @@ jobs: - name: ruff --> Check code formatting run: ruff format --check . - # Use mypy for static type checking - mypy-check: - runs-on: ubuntu-latest - steps: - - name: Checkout repo - uses: actions/checkout@v4 - - name: Set up Python - uses: actions/setup-python@v4 - with: - python-version: "3.14" - - name: Install dependencies - run: | - python -m pip install --upgrade pip - pip install mypy - # Start by installing type stubs - - name: mypy --> Install stubs - run: mypy --install-types --non-interactive . - # Use pip-check-reqs/pip-missing-reqs to check for missing dependencies requirements-check: runs-on: ubuntu-latest diff --git a/pyproject.toml b/pyproject.toml index bb4c58d..ac4a2ea 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -18,11 +18,6 @@ ignore = [ "E741", # Ambiguous variable name ] -[tool.mypy] -ignore_missing_imports = true -follow_imports = 'skip' -exclude = "build" - [project] name = "dataflow_transfer" version = "1.0.3" From 8bae6499460042d5aafffb8c379e17adcf81b23c Mon Sep 17 00:00:00 2001 From: Sara Sjunnebo Date: Mon, 15 Dec 2025 14:18:30 +0100 Subject: [PATCH 14/23] Ruff --- dataflow_transfer/dataflow_transfer.py | 2 +- dataflow_transfer/run_classes/generic_runs.py | 5 +++-- dataflow_transfer/run_classes/illumina_runs.py | 1 + dataflow_transfer/tests/test_run_classes.py | 3 ++- dataflow_transfer/utils/filesystem.py | 15 +++++++-------- 5 files changed, 14 insertions(+), 12 deletions(-) diff --git a/dataflow_transfer/dataflow_transfer.py b/dataflow_transfer/dataflow_transfer.py index d879f5e..3b34bb7 100644 --- a/dataflow_transfer/dataflow_transfer.py +++ b/dataflow_transfer/dataflow_transfer.py @@ -2,7 +2,7 @@ import time from dataflow_transfer.run_classes.registry import RUN_CLASS_REGISTRY -from dataflow_transfer.utils.filesystem import get_run_dir, find_runs +from dataflow_transfer.utils.filesystem import find_runs, get_run_dir logger = logging.getLogger(__name__) diff --git a/dataflow_transfer/run_classes/generic_runs.py b/dataflow_transfer/run_classes/generic_runs.py index 6d1812a..d76d0b4 100644 --- a/dataflow_transfer/run_classes/generic_runs.py +++ b/dataflow_transfer/run_classes/generic_runs.py @@ -1,9 +1,10 @@ -import os import logging +import os import re from datetime import datetime -from dataflow_transfer.utils.statusdb import StatusdbSession + import dataflow_transfer.utils.filesystem as fs +from dataflow_transfer.utils.statusdb import StatusdbSession logger = logging.getLogger(__name__) diff --git a/dataflow_transfer/run_classes/illumina_runs.py b/dataflow_transfer/run_classes/illumina_runs.py index b5004a7..b74bb4c 100644 --- a/dataflow_transfer/run_classes/illumina_runs.py +++ b/dataflow_transfer/run_classes/illumina_runs.py @@ -1,4 +1,5 @@ from dataflow_transfer.run_classes.generic_runs import Run + from .registry import register_run_class diff --git a/dataflow_transfer/tests/test_run_classes.py b/dataflow_transfer/tests/test_run_classes.py index 5a2fe28..8f46a92 100644 --- a/dataflow_transfer/tests/test_run_classes.py +++ b/dataflow_transfer/tests/test_run_classes.py @@ -1,8 +1,9 @@ import os + import pytest +from dataflow_transfer.run_classes import generic_runs, illumina_runs -from dataflow_transfer.run_classes import illumina_runs, generic_runs # TODO: add tests for ONT and ELEMENT runs when those are implemented diff --git a/dataflow_transfer/utils/filesystem.py b/dataflow_transfer/utils/filesystem.py index e38068f..a02aadb 100644 --- a/dataflow_transfer/utils/filesystem.py +++ b/dataflow_transfer/utils/filesystem.py @@ -1,9 +1,10 @@ import json import logging import os -import xmltodict import subprocess +import xmltodict + logger = logging.getLogger(__name__) @@ -40,9 +41,7 @@ def rsync_is_running(src): def submit_background_process(command_str: str): """Submit a command string as a background process.""" - background_process = subprocess.Popen( - command_str, stdout=subprocess.PIPE, shell=True - ) + subprocess.Popen(command_str, stdout=subprocess.PIPE, shell=True) def parse_metadata_files(files): @@ -52,10 +51,10 @@ def parse_metadata_files(files): for file_path in files: try: if file_path.endswith(".json"): - with open(file_path, "r") as f: + with open(file_path) as f: metadata[os.path.basename(file_path)] = json.load(f) elif file_path.endswith(".xml"): - with open(file_path, "r") as f: + with open(file_path) as f: xml_content = xmltodict.parse( f.read(), attr_prefix="", cdata_key="text" ) @@ -71,10 +70,10 @@ def parse_metadata_files(files): def check_exit_status(file_path): - """Check the exit status from a given file. + """Check the exit status from a given file. Return True if exit code is 0, else False.""" if os.path.exists(file_path): - with open(file_path, "r") as f: + with open(file_path) as f: exit_code = f.read().strip() if exit_code == "0": return True From fcc773675ec0bd52ff12d8ac89f80dd87d77be93 Mon Sep 17 00:00:00 2001 From: Sara Sjunnebo Date: Mon, 15 Dec 2025 14:19:46 +0100 Subject: [PATCH 15/23] rename github workflow --- .github/workflows/{python-app.yml => test-code.yml} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename .github/workflows/{python-app.yml => test-code.yml} (100%) diff --git a/.github/workflows/python-app.yml b/.github/workflows/test-code.yml similarity index 100% rename from .github/workflows/python-app.yml rename to .github/workflows/test-code.yml From c7e6dbeb49b26c6366530c583675c2ee34348a1f Mon Sep 17 00:00:00 2001 From: Sara Sjunnebo Date: Mon, 15 Dec 2025 14:25:42 +0100 Subject: [PATCH 16/23] update action versions --- .github/workflows/check-version.yml | 2 +- .github/workflows/lint-code.yml | 16 ++++++++-------- .github/workflows/test-code.yml | 4 ++-- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/.github/workflows/check-version.yml b/.github/workflows/check-version.yml index 4a5812a..88cce6f 100644 --- a/.github/workflows/check-version.yml +++ b/.github/workflows/check-version.yml @@ -9,7 +9,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout PR - uses: actions/checkout@v4 + uses: actions/checkout@v6 with: fetch-depth: 0 # Fetch all history for all branches and tags - name: Check version update diff --git a/.github/workflows/lint-code.yml b/.github/workflows/lint-code.yml index 0a1e617..92380f9 100644 --- a/.github/workflows/lint-code.yml +++ b/.github/workflows/lint-code.yml @@ -7,9 +7,9 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout repo - uses: actions/checkout@v4 + uses: actions/checkout@v6 - name: Set up Python - uses: actions/setup-python@v4 + uses: actions/setup-python@v6 with: python-version: "3.14" - name: Install dependencies @@ -25,9 +25,9 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout repo - uses: actions/checkout@v4 + uses: actions/checkout@v6 - name: Set up Python - uses: actions/setup-python@v4 + uses: actions/setup-python@v6 with: python-version: "3.14" - name: Install dependencies @@ -42,9 +42,9 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout repository - uses: actions/checkout@v4 + uses: actions/checkout@v6 - name: Set up Python - uses: actions/setup-python@v4 + uses: actions/setup-python@v6 with: python-version: "3.14" @@ -62,9 +62,9 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout repository - uses: actions/checkout@v4 + uses: actions/checkout@v6 - name: Setup node - uses: actions/setup-node@v4 + uses: actions/setup-node@v6 with: node-version: "20" diff --git a/.github/workflows/test-code.yml b/.github/workflows/test-code.yml index d835327..2f4c15f 100644 --- a/.github/workflows/test-code.yml +++ b/.github/workflows/test-code.yml @@ -18,9 +18,9 @@ jobs: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 - name: Set up Python 3.14 - uses: actions/setup-python@v3 + uses: actions/setup-python@v6 with: python-version: "3.14" - name: Install dependencies From cbcb8412ddf29d95f95b6c8bf08f1a553aac13b2 Mon Sep 17 00:00:00 2001 From: Sara Sjunnebo Date: Mon, 15 Dec 2025 14:25:52 +0100 Subject: [PATCH 17/23] add pr label check --- .github/workflows/check-pr-label.yml | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100644 .github/workflows/check-pr-label.yml diff --git a/.github/workflows/check-pr-label.yml b/.github/workflows/check-pr-label.yml new file mode 100644 index 0000000..b7ea439 --- /dev/null +++ b/.github/workflows/check-pr-label.yml @@ -0,0 +1,21 @@ +name: Check Label on PR +on: + pull_request: + types: [opened, synchronize, labeled, unlabeled] + +jobs: + check_pr_label: + runs-on: ubuntu-latest + steps: + - name: Checkout PR + uses: actions/checkout@v6 + with: + fetch-depth: 0 # Fetch all history for all branches and tags + + - name: Check if the PR contains the label validation or no validation + id: check_pr_label + if: | + ! contains( github.event.pull_request.labels.*.name, 'validation') && ! contains( github.event.pull_request.labels.*.name, 'no validation') + run: | + echo "Neither 'validation' nor 'no validation' labels are present." + exit 1 # Exit with a failure \ No newline at end of file From a100a91c549d742d95370ab2ee6406210234d3e1 Mon Sep 17 00:00:00 2001 From: Sara Sjunnebo Date: Mon, 15 Dec 2025 14:28:01 +0100 Subject: [PATCH 18/23] ruff --- dataflow_transfer/tests/test_filesystem.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/dataflow_transfer/tests/test_filesystem.py b/dataflow_transfer/tests/test_filesystem.py index 7bee1e7..2812b82 100644 --- a/dataflow_transfer/tests/test_filesystem.py +++ b/dataflow_transfer/tests/test_filesystem.py @@ -1,18 +1,19 @@ import json import os import tempfile -import pytest -from unittest.mock import patch from subprocess import CalledProcessError +from unittest.mock import patch + +import pytest from dataflow_transfer.utils.filesystem import ( - get_run_dir, + check_exit_status, find_runs, + get_run_dir, + locate_metadata, + parse_metadata_files, rsync_is_running, submit_background_process, - parse_metadata_files, - check_exit_status, - locate_metadata, ) From 3c9239b8355eeebef36b29b7b9eff9c913480c13 Mon Sep 17 00:00:00 2001 From: Sara Sjunnebo Date: Mon, 15 Dec 2025 14:38:57 +0100 Subject: [PATCH 19/23] ruff fixes --- dataflow_transfer/run_classes/__init__.py | 15 +++++++-------- dataflow_transfer/run_classes/element_runs.py | 1 + dataflow_transfer/run_classes/ont_runs.py | 1 + 3 files changed, 9 insertions(+), 8 deletions(-) diff --git a/dataflow_transfer/run_classes/__init__.py b/dataflow_transfer/run_classes/__init__.py index c58da30..705c5c7 100644 --- a/dataflow_transfer/run_classes/__init__.py +++ b/dataflow_transfer/run_classes/__init__.py @@ -1,12 +1,11 @@ # This adds the run classes to the registry. Do not remove. -from .registry import RUN_CLASS_REGISTRY - - +from dataflow_transfer.run_classes.element_runs import AVITIRun # noqa: F401, I001 from dataflow_transfer.run_classes.illumina_runs import ( - NovaSeqXPlusRun, - NextSeqRun, - MiSeqRun, + MiSeqRun, # noqa: F401 + NextSeqRun, # noqa: F401 + NovaSeqXPlusRun, # noqa: F401 ) -from dataflow_transfer.run_classes.ont_runs import PromethIONRun, MinIONRun -from dataflow_transfer.run_classes.element_runs import AVITIRun +from dataflow_transfer.run_classes.ont_runs import MinIONRun, PromethIONRun # noqa: F401 + +from .registry import RUN_CLASS_REGISTRY # noqa: F401 diff --git a/dataflow_transfer/run_classes/element_runs.py b/dataflow_transfer/run_classes/element_runs.py index f00fac7..97f448e 100644 --- a/dataflow_transfer/run_classes/element_runs.py +++ b/dataflow_transfer/run_classes/element_runs.py @@ -1,4 +1,5 @@ from dataflow_transfer.run_classes.generic_runs import Run + from .registry import register_run_class diff --git a/dataflow_transfer/run_classes/ont_runs.py b/dataflow_transfer/run_classes/ont_runs.py index 1bd730b..dfd11d7 100644 --- a/dataflow_transfer/run_classes/ont_runs.py +++ b/dataflow_transfer/run_classes/ont_runs.py @@ -1,4 +1,5 @@ from dataflow_transfer.run_classes.generic_runs import Run + from .registry import register_run_class From 1389b84ece1804167825e0012f2b78d65b012ca4 Mon Sep 17 00:00:00 2001 From: Sara Sjunnebo Date: Mon, 15 Dec 2025 14:39:57 +0100 Subject: [PATCH 20/23] more ruff --- dataflow_transfer/cli.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/dataflow_transfer/cli.py b/dataflow_transfer/cli.py index 57c90a6..b5c7688 100644 --- a/dataflow_transfer/cli.py +++ b/dataflow_transfer/cli.py @@ -1,17 +1,18 @@ -import click -import os import logging +import os + +import click import yaml -from dataflow_transfer.dataflow_transfer import transfer_runs from dataflow_transfer import log +from dataflow_transfer.dataflow_transfer import transfer_runs from dataflow_transfer.run_classes.registry import RUN_CLASS_REGISTRY logger = logging.getLogger(__name__) def load_config(config_file_path): - with open(config_file_path, "r") as file: + with open(config_file_path) as file: config = yaml.safe_load(file) return config From d3641a9fc06f909011f39a85e506d55ffd609636 Mon Sep 17 00:00:00 2001 From: Sara Sjunnebo Date: Mon, 15 Dec 2025 14:48:59 +0100 Subject: [PATCH 21/23] remove prettier for now --- .github/workflows/lint-code.yml | 17 ----------------- 1 file changed, 17 deletions(-) diff --git a/.github/workflows/lint-code.yml b/.github/workflows/lint-code.yml index 92380f9..c082a48 100644 --- a/.github/workflows/lint-code.yml +++ b/.github/workflows/lint-code.yml @@ -56,20 +56,3 @@ jobs: - name: Run pip-check-reqs/pip-missing-reqs run: | pip-missing-reqs . - - # Use Prettier to check various file formats - prettier: - runs-on: ubuntu-latest - steps: - - name: Checkout repository - uses: actions/checkout@v6 - - name: Setup node - uses: actions/setup-node@v6 - with: - node-version: "20" - - - name: Install Prettier - run: npm install -g prettier - - - name: Run Prettier --check - run: prettier --check . \ No newline at end of file From d6230bac6d420e90e1cbed072cdb5f8cf50b5c35 Mon Sep 17 00:00:00 2001 From: Sara Sjunnebo Date: Mon, 15 Dec 2025 15:02:55 +0100 Subject: [PATCH 22/23] Add code coverage check --- .github/workflows/test-code.yml | 8 ++++++-- pyproject.toml | 1 + requirements-dev.txt | 4 ++++ 3 files changed, 11 insertions(+), 2 deletions(-) create mode 100644 requirements-dev.txt diff --git a/.github/workflows/test-code.yml b/.github/workflows/test-code.yml index 2f4c15f..f06a543 100644 --- a/.github/workflows/test-code.yml +++ b/.github/workflows/test-code.yml @@ -26,10 +26,14 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade pip - pip install pytest + pip install pytest, pytest-cov if [ -f requirements.txt ]; then pip install -r requirements.txt; fi - name: Install dataflow_transfer run: pip install -e . - name: Test with pytest run: | - pytest + pytest --cov --cov-branch --cov-report=xml + - name: Upload coverage reports to Codecov + uses: codecov/codecov-action@v5 + with: + token: ${{ secrets.CODECOV_TOKEN }} \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index ac4a2ea..157624c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -40,6 +40,7 @@ dependencies = [ dev = [ "ruff>=0.11.8", "pytest", + "pytest-cov", ] [project.scripts] diff --git a/requirements-dev.txt b/requirements-dev.txt new file mode 100644 index 0000000..a615bde --- /dev/null +++ b/requirements-dev.txt @@ -0,0 +1,4 @@ +-r requirements.txt +ruff>=0.11.8 +pytest +pytest-cov \ No newline at end of file From aebfad113a9883984ed1e8732a611f52feae1d19 Mon Sep 17 00:00:00 2001 From: Sara Sjunnebo Date: Mon, 15 Dec 2025 15:04:25 +0100 Subject: [PATCH 23/23] fix typo --- .github/workflows/test-code.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test-code.yml b/.github/workflows/test-code.yml index f06a543..8bfcb4f 100644 --- a/.github/workflows/test-code.yml +++ b/.github/workflows/test-code.yml @@ -26,7 +26,7 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade pip - pip install pytest, pytest-cov + pip install pytest pytest-cov if [ -f requirements.txt ]; then pip install -r requirements.txt; fi - name: Install dataflow_transfer run: pip install -e .