diff --git a/_build/logs/myst.build.json b/_build/logs/myst.build.json new file mode 100644 index 00000000..ac7e9a13 --- /dev/null +++ b/_build/logs/myst.build.json @@ -0,0 +1,29 @@ +{ + "input": { + "files": [ + "docs" + ], + "opts": { + "execute": false, + "pdf": false, + "tex": false, + "typst": false, + "docx": false, + "md": false, + "xml": false, + "meca": false, + "cff": false, + "site": false, + "html": false, + "all": false, + "doiBib": false, + "watch": false, + "force": false, + "checkLinks": false, + "strict": false, + "ci": false, + "maxSizeWebp": 1572864 + } + }, + "exports": [] +} \ No newline at end of file diff --git a/changelog_entry.yaml b/changelog_entry.yaml index e69de29b..09c9ca7b 100644 --- a/changelog_entry.yaml +++ b/changelog_entry.yaml @@ -0,0 +1,4 @@ +- bump: patch + changes: + fixed: + - Handle empty HUGGING_FACE_TOKEN gracefully in CI environments. Empty strings are now treated the same as None, and non-interactive environments (like CI) return None instead of hanging on getpass prompt. diff --git a/policyengine_core/tools/hugging_face.py b/policyengine_core/tools/hugging_face.py index 887d4447..7964658f 100644 --- a/policyengine_core/tools/hugging_face.py +++ b/policyengine_core/tools/hugging_face.py @@ -91,21 +91,32 @@ def download_huggingface_dataset( ) -def get_or_prompt_hf_token() -> str: +def get_or_prompt_hf_token() -> str | None: """ Either get the Hugging Face token from the environment, or prompt the user for it and store it in the environment. Returns: - str: The Hugging Face token. + str | None: The Hugging Face token, or None if not available + and running non-interactively (e.g., in CI without secrets). """ token = os.environ.get("HUGGING_FACE_TOKEN") - if token is None: - token = getpass( - "Enter your Hugging Face token (or set HUGGING_FACE_TOKEN environment variable): " - ) - # Optionally store in env for subsequent calls in same session - os.environ["HUGGING_FACE_TOKEN"] = token + # Treat empty string same as None (handles CI with missing secrets) + if not token: + # Check if running interactively before prompting + if os.isatty(0): + token = getpass( + "Enter your Hugging Face token (or set HUGGING_FACE_TOKEN environment variable): " + ) + # Store in env for subsequent calls in same session + if token: + os.environ["HUGGING_FACE_TOKEN"] = token + else: + # User entered empty string - return None + return None + else: + # Non-interactive (CI) - return None instead of prompting + return None return token diff --git a/tests/core/test_microsimulation_person_accessor.py b/tests/core/test_microsimulation_person_accessor.py new file mode 100644 index 00000000..a5ef598a --- /dev/null +++ b/tests/core/test_microsimulation_person_accessor.py @@ -0,0 +1,93 @@ +""" +Test for Python 3.14 compatibility issue #407. + +The person() accessor in Microsimulation should return unweighted values +(numpy arrays), not weighted MicroSeries. This test verifies that the +isinstance check correctly identifies Microsimulation instances. + +Issue #407 reports that in Python 3.14, person() returns unweighted values +when it previously returned weighted values in Python 3.13. This test +ensures consistent behavior across Python versions. +""" + +import numpy as np +import pytest +from microdf import MicroSeries + +from policyengine_core.country_template import Microsimulation + + +class TestMicrosimulationPersonAccessor: + """Tests for person() accessor behavior in Microsimulation.""" + + def test_person_accessor_returns_unweighted_in_microsimulation(self): + """ + Verify that person() accessor returns unweighted numpy arrays. + + The person() accessor is used internally in formulas and should return + unweighted values for performance. This is the intended behavior per + the code comment: "Internal simulation code shouldn't use weights in + order to avoid performance slowdowns." + """ + sim = Microsimulation() + result = sim.person("salary", "2022-01") + + # The result should be a numpy array, not MicroSeries + assert isinstance(result, np.ndarray), ( + f"Expected numpy.ndarray but got {type(result).__name__}. " + "person() should return unweighted arrays for performance." + ) + + def test_calculate_returns_weighted_microseries(self): + """ + Verify that sim.calculate() returns weighted MicroSeries by default. + + This is the expected behavior for user-facing calculations. + """ + sim = Microsimulation() + result = sim.calculate("salary", "2022-01") + + assert isinstance(result, MicroSeries), ( + f"Expected MicroSeries but got {type(result).__name__}. " + "sim.calculate() should return weighted MicroSeries by default." + ) + + def test_isinstance_check_works_for_microsimulation(self): + """ + Directly test that isinstance check works for Microsimulation. + + This ensures the isinstance check in Population.__call__ correctly + identifies Microsimulation instances across Python versions. + """ + from policyengine_core.simulations.microsimulation import ( + Microsimulation as CoreMicrosimulation, + ) + + sim = Microsimulation() + + assert isinstance(sim, CoreMicrosimulation), ( + f"isinstance(sim, Microsimulation) returned False. " + f"sim type: {type(sim)}, MRO: {type(sim).__mro__}" + ) + + def test_person_accessor_kwargs_passed_correctly(self): + """ + Test that the person() accessor passes the correct kwargs to calculate(). + + This test verifies that use_weights=False is passed to avoid + performance issues in internal calculations. + """ + sim = Microsimulation() + + # Call person() which should pass use_weights=False + result_person = sim.person("salary", "2022-01") + + # Call calculate() with use_weights=False directly + result_calculate = sim.calculate( + "salary", "2022-01", use_weights=False + ) + + # Both should return numpy arrays with the same values + assert isinstance(result_person, np.ndarray) + assert isinstance(result_calculate, np.ndarray) + np.testing.assert_array_equal(result_person, result_calculate) diff --git a/tests/core/tools/test_hugging_face.py b/tests/core/tools/test_hugging_face.py index 3dcda613..d34175a9 100644 --- a/tests/core/tools/test_hugging_face.py +++ b/tests/core/tools/test_hugging_face.py @@ -124,39 +124,65 @@ def test_get_token_from_user_input(self): """Test retrieving token via user input when not in environment""" test_token = "user_input_token_456" - # Mock both empty environment and user input + # Mock empty environment, interactive mode, and user input with patch.dict(os.environ, {}, clear=True): - with patch( - "policyengine_core.tools.hugging_face.getpass", - return_value=test_token, - ): - result = get_or_prompt_hf_token() - assert result == test_token + with patch("os.isatty", return_value=True): + with patch( + "policyengine_core.tools.hugging_face.getpass", + return_value=test_token, + ): + result = get_or_prompt_hf_token() + assert result == test_token - # Verify token was stored in environment - assert os.environ.get("HUGGING_FACE_TOKEN") == test_token + # Verify token was stored in environment + assert os.environ.get("HUGGING_FACE_TOKEN") == test_token def test_empty_user_input(self): - """Test handling of empty user input""" + """Test handling of empty user input in interactive mode""" with patch.dict(os.environ, {}, clear=True): - with patch( - "policyengine_core.tools.hugging_face.getpass", return_value="" - ): + with patch("os.isatty", return_value=True): + with patch( + "policyengine_core.tools.hugging_face.getpass", + return_value="", + ): + result = get_or_prompt_hf_token() + # Empty input should return None (not stored) + assert result is None + # Empty token should not be stored + assert os.environ.get("HUGGING_FACE_TOKEN") is None + + def test_non_interactive_mode_returns_none(self): + """Test that non-interactive mode (CI) returns None without prompting""" + with patch.dict(os.environ, {}, clear=True): + with patch("os.isatty", return_value=False): + with patch( + "policyengine_core.tools.hugging_face.getpass" + ) as mock_getpass: + result = get_or_prompt_hf_token() + assert result is None + # getpass should not be called in non-interactive mode + mock_getpass.assert_not_called() + + def test_empty_env_token_treated_as_none(self): + """Test that empty string token in env is treated as missing""" + with patch.dict(os.environ, {"HUGGING_FACE_TOKEN": ""}, clear=True): + with patch("os.isatty", return_value=False): result = get_or_prompt_hf_token() - assert result == "" - assert os.environ.get("HUGGING_FACE_TOKEN") == "" + # Empty string should be treated as None + assert result is None def test_environment_variable_persistence(self): """Test that environment variable persists across multiple calls""" test_token = "persistence_test_token" - # First call with no environment variable + # First call with no environment variable (interactive mode) with patch.dict(os.environ, {}, clear=True): - with patch( - "policyengine_core.tools.hugging_face.getpass", - return_value=test_token, - ): - first_result = get_or_prompt_hf_token() + with patch("os.isatty", return_value=True): + with patch( + "policyengine_core.tools.hugging_face.getpass", + return_value=test_token, + ): + first_result = get_or_prompt_hf_token() # Second call should use environment variable second_result = get_or_prompt_hf_token()