From 55e805c322454a30c232c055485552ff89a538af Mon Sep 17 00:00:00 2001 From: Cedric McKeever Date: Wed, 25 Jun 2025 11:42:22 +0800 Subject: [PATCH 01/15] bring to PEP8 --- global_macro_data/__init__.py | 2 +- global_macro_data/gmd.py | 116 +++++++++++++++++++--------------- 2 files changed, 67 insertions(+), 51 deletions(-) diff --git a/global_macro_data/__init__.py b/global_macro_data/__init__.py index ff336e1..62cdb46 100644 --- a/global_macro_data/__init__.py +++ b/global_macro_data/__init__.py @@ -14,4 +14,4 @@ "list_variables", "list_countries", "VALID_VARIABLES" -] \ No newline at end of file +] diff --git a/global_macro_data/gmd.py b/global_macro_data/gmd.py index aa78e7f..79ca05d 100644 --- a/global_macro_data/gmd.py +++ b/global_macro_data/gmd.py @@ -33,6 +33,7 @@ "CurrencyCrisis", "BankingCrisis", "SovDebtCrisis" ] + def get_available_versions() -> List[str]: """Get list of available versions from GitHub""" try: @@ -43,25 +44,27 @@ def get_available_versions() -> List[str]: response = requests.get(versions_url) if response.status_code != 200: raise Exception("Could not fetch versions") - + versions_df = pd.read_csv(io.StringIO(response.text)) versions = versions_df['versions'].tolist() return sorted(versions, reverse=True) except Exception as e: raise Exception(f"Error fetching versions: {str(e)}") + def get_current_version() -> str: """Get the current version of the dataset""" versions = get_available_versions() return versions[0] if versions else None + def list_variables() -> None: """Display list of available variables and their descriptions""" print("\nAvailable variables:\n") print("-" * 90) print(f"{'Variable':<17} Description") print("-" * 90) - + descriptions = { "nGDP": "Nominal Gross Domestic Product", "rGDP": "Real Gross Domestic Product, in 2010 prices", @@ -110,12 +113,13 @@ def list_variables() -> None: "CurrencyCrisis": "Currency Crisis", "BankingCrisis": "Banking Crisis" } - + for var in sorted(VALID_VARIABLES): print(f"{var:<17} {descriptions.get(var, '')}") - + print("-" * 90) + def list_countries() -> None: """Display list of available countries and their ISO codes""" try: @@ -125,17 +129,18 @@ def list_countries() -> None: os.path.dirname(script_dir), 'isomapping.csv' ) isomapping = pd.read_csv(isomapping_path) - + print("\nCountry and territories" + " " * 27 + "Code") print("-" * 60) - + for _, row in isomapping.iterrows(): print(f"{row['countryname']:<50} {row['ISO3']}") - + print("-" * 60) except Exception as e: raise Exception(f"Error loading country list: {str(e)}") + def gmd( variables: Optional[Union[str, List[str]]] = None, country: Optional[Union[str, List[str]]] = None, @@ -146,37 +151,45 @@ def gmd( ) -> Optional[pd.DataFrame]: """ Download and filter Global Macro Data. - - Parameters: - - variables (str or list): Variable code(s) to include - (e.g., "rGDP" or ["rGDP", "unemp"]) - - country (str or list): ISO3 country code(s) - (e.g., "SGP" or ["MRT", "SGP"]) - - version (str): Dataset version in format 'YYYY_MM' - (e.g., '2025_01') - - raw (bool): If True, download raw data for a single variable - - iso (bool): If True, display list of available countries - - vars (bool): If True, display list of available variables - - Returns: - - pd.DataFrame: The requested data, or None if displaying lists + + Parameters + ---------- + variables : str or list of str, optional + Variable code(s) to include (e.g., 'rGDP' or ['rGDP', 'unemp']). + country : str or list of str, optional + ISO3 country code(s) to include (e.g., 'SGP' or ['MRT', 'SGP']). + version : str, optional + Dataset version in 'YYYY_MM' format (e.g., '2025_01'). + raw : bool, default=False + If True, download raw data for a single variable only. + iso : bool, default=False + If True, display the list of available countries and return None. + vars : bool, default=False + If True, display the list of available variables and return None. + + Returns + ------- + pd.DataFrame or None + Filtered macroeconomic data as a DataFrame, or None if displaying + metadata. """ + base_url = "https://www.globalmacrodata.com" - + # Handle special display options if iso: list_countries() return None - + if vars: list_variables() return None - + # Validate variables before proceeding if variables: if isinstance(variables, str): variables = [variables] - + # Validate variables invalid_vars = [ var for var in variables if var not in VALID_VARIABLES @@ -190,7 +203,7 @@ def gmd( "use: gmd(vars=True)" ) sys.exit(1) - + # Get current version if not specified if version is None: version = get_current_version() @@ -206,21 +219,23 @@ def gmd( print(f"Available versions are: {', '.join(available_versions)}") print(f"The current version is: {get_current_version()}") sys.exit(1) - + # Handle raw data option if raw: - if (not variables or - (isinstance(variables, list) and len(variables) > 1)): + if (not variables or + (isinstance(variables, list) and len(variables) > 1)): print("Global Macro Database by Müller et. al (2025)") print("Website: https://www.globalmacrodata.com\n") print("Warning: raw requires specifying exactly one variable") - print("Note: Raw data is only accessed variable-wise using: gmd [variable], raw") - print("To download the full data documentation: https://www.globalmacrodata.com/GMD.xlsx") + print("Note: Raw data is only accessed variable-wise using: " + "gmd [variable], raw") + print("To download the full data documentation: " + "https://www.globalmacrodata.com/GMD.xlsx") sys.exit(1) - + if isinstance(variables, list): variables = variables[0] - + data_url = f"{base_url}/{variables}_{version}.csv" print(f"Importing raw data for variable: {variables}") else: @@ -231,7 +246,7 @@ def gmd( print(f"Importing data for variable: {variables}") else: data_url = f"{base_url}/GMD_{version}.csv" - + # Download data try: response = requests.get(data_url) @@ -241,17 +256,17 @@ def gmd( print("Website: https://www.globalmacrodata.com\n") print(f"Error downloading data: {str(e)}") sys.exit(1) - + # Read the data df = pd.read_csv(io.StringIO(response.text)) - + # Filter by country if specified if country: if isinstance(country, str): country = [country] - + country = [c.upper() for c in country] - + # Validate country codes invalid_countries = [ c for c in country if c not in df["ISO3"].unique() @@ -260,35 +275,36 @@ def gmd( print("Global Macro Database by Müller et. al (2025)") print("Website: https://www.globalmacrodata.com\n") print(f"Error: Invalid country code '{invalid_countries[0]}'") - print("\nTo see the list of valid country codes, use: gmd(iso=True)") + print("\nTo see the list of valid country codes, " + "use: gmd(iso=True)") sys.exit(1) - + df = df[df["ISO3"].isin(country)] print(f"Filtered data for countries: {', '.join(country)}") - + # Filter by variables if specified if variables and not raw: if isinstance(variables, str): variables = [variables] - + # Always include identifier columns required_cols = ["ISO3", "countryname", "year"] all_cols = required_cols + [ var for var in variables if var not in required_cols ] - + # Filter to only include requested variables existing_vars = [var for var in all_cols if var in df.columns] df = df[existing_vars] - + # Clean up missing variables df = df.dropna(axis=1, how='all') - + # Display dataset information if len(df) == 0: print(f"The database has no data on {variables} for {country}") return None - + if raw: n_sources = len(df.columns) - 8 # Subtract identifier columns print(f"Final dataset: {len(df)} observations of {n_sources} sources") @@ -297,13 +313,13 @@ def gmd( f"Final dataset: {len(df)} observations of " f"{len(df.columns)} variables" ) - + print(f"Version: {version}") - + # Sort and order columns df = df.sort_values(['countryname', 'year']) id_cols = ['ISO3', 'countryname', 'year'] other_cols = [col for col in df.columns if col not in id_cols] df = df[id_cols + other_cols] - - return df \ No newline at end of file + + return df From 44969c68839da008f8b7ff9267a380c1f3159526 Mon Sep 17 00:00:00 2001 From: Cedric McKeever Date: Wed, 25 Jun 2025 11:52:29 +0800 Subject: [PATCH 02/15] remove ISO and vars from gmd. rename gmd() to get_data() --- global_macro_data/__init__.py | 4 ++-- global_macro_data/gmd.py | 19 ++----------------- tests/test_gmd.py | 26 +++++++++++++------------- 3 files changed, 17 insertions(+), 32 deletions(-) diff --git a/global_macro_data/__init__.py b/global_macro_data/__init__.py index 62cdb46..c3b9d72 100644 --- a/global_macro_data/__init__.py +++ b/global_macro_data/__init__.py @@ -1,5 +1,5 @@ from .gmd import ( - gmd, + get_data, get_available_versions, get_current_version, list_variables, @@ -8,7 +8,7 @@ ) __all__ = [ - "gmd", + "get_data", "get_available_versions", "get_current_version", "list_variables", diff --git a/global_macro_data/gmd.py b/global_macro_data/gmd.py index 79ca05d..8ed5a9a 100644 --- a/global_macro_data/gmd.py +++ b/global_macro_data/gmd.py @@ -141,14 +141,12 @@ def list_countries() -> None: raise Exception(f"Error loading country list: {str(e)}") -def gmd( +def get_data( variables: Optional[Union[str, List[str]]] = None, country: Optional[Union[str, List[str]]] = None, version: Optional[str] = None, raw: bool = False, - iso: bool = False, - vars: bool = False -) -> Optional[pd.DataFrame]: +) -> pd.DataFrame: """ Download and filter Global Macro Data. @@ -162,10 +160,6 @@ def gmd( Dataset version in 'YYYY_MM' format (e.g., '2025_01'). raw : bool, default=False If True, download raw data for a single variable only. - iso : bool, default=False - If True, display the list of available countries and return None. - vars : bool, default=False - If True, display the list of available variables and return None. Returns ------- @@ -176,15 +170,6 @@ def gmd( base_url = "https://www.globalmacrodata.com" - # Handle special display options - if iso: - list_countries() - return None - - if vars: - list_variables() - return None - # Validate variables before proceeding if variables: if isinstance(variables, str): diff --git a/tests/test_gmd.py b/tests/test_gmd.py index 1839c00..188f55f 100644 --- a/tests/test_gmd.py +++ b/tests/test_gmd.py @@ -1,7 +1,7 @@ import pytest import pandas as pd from global_macro_data import ( - gmd, + get_data, get_available_versions, get_current_version, list_variables, @@ -40,7 +40,7 @@ def test_list_countries(capsys): def test_gmd_default(): """Test default gmd call""" - df = gmd() + df = get_data() assert isinstance(df, pd.DataFrame) assert len(df) > 0 assert all(col in df.columns for col in ["ISO3", "countryname", "year"]) @@ -48,41 +48,41 @@ def test_gmd_default(): def test_gmd_version(): """Test gmd with specific version""" version = get_current_version() - df = gmd(version=version) + df = get_data(version=version) assert isinstance(df, pd.DataFrame) assert len(df) > 0 def test_gmd_country(): """Test gmd with specific country""" - df = gmd(country="USA") + df = get_data(country="USA") assert isinstance(df, pd.DataFrame) assert len(df) > 0 assert all(df["ISO3"] == "USA") def test_gmd_countries(): """Test gmd with multiple countries""" - df = gmd(country=["USA", "CHN"]) + df = get_data(country=["USA", "CHN"]) assert isinstance(df, pd.DataFrame) assert len(df) > 0 assert set(df["ISO3"].unique()) == {"USA", "CHN"} def test_gmd_variables(): """Test gmd with specific variables""" - df = gmd(variables=["rGDP", "infl"]) + df = get_data(variables=["rGDP", "infl"]) assert isinstance(df, pd.DataFrame) assert len(df) > 0 assert all(col in df.columns for col in ["rGDP", "infl"]) def test_gmd_raw(): """Test gmd with raw data option""" - df = gmd(variables="rGDP", raw=True) + df = get_data(variables="rGDP", raw=True) assert isinstance(df, pd.DataFrame) assert len(df) > 0 assert "rGDP" in df.columns def test_gmd_combinations(): """Test gmd with multiple parameters""" - df = gmd( + df = get_data( version=get_current_version(), country=["USA", "CHN"], variables=["rGDP", "infl"] @@ -95,24 +95,24 @@ def test_gmd_combinations(): def test_gmd_invalid_version(): """Test gmd with invalid version""" with pytest.raises(ValueError): - gmd(version="invalid_version") + get_data(version="invalid_version") def test_gmd_invalid_country(): """Test gmd with invalid country""" with pytest.raises(ValueError): - gmd(country="INVALID") + get_data(country="INVALID") def test_gmd_invalid_variable(): """Test gmd with invalid variable""" with pytest.raises(ValueError): - gmd(variables="INVALID") + get_data(variables="INVALID") def test_gmd_raw_multiple_variables(): """Test gmd raw option with multiple variables""" with pytest.raises(ValueError): - gmd(variables=["rGDP", "infl"], raw=True) + get_data(variables=["rGDP", "infl"], raw=True) def test_gmd_raw_no_variable(): """Test gmd raw option without variable""" with pytest.raises(ValueError): - gmd(raw=True) \ No newline at end of file + get_data(raw=True) \ No newline at end of file From 5c02e1ba4b79ad9cf7bb1737d281a843b51ca37b Mon Sep 17 00:00:00 2001 From: Cedric McKeever Date: Wed, 25 Jun 2025 13:57:41 +0800 Subject: [PATCH 03/15] replace sys.exit -> errors, print -> logger --- .gitignore | 5 +- global_macro_data/__init__.py | 2 +- global_macro_data/exceptions.py | 95 +++++++++++++++ global_macro_data/gmd.py | 205 ++++++++++++++------------------ global_macro_data/logging.py | 7 ++ 5 files changed, 198 insertions(+), 116 deletions(-) create mode 100644 global_macro_data/exceptions.py create mode 100644 global_macro_data/logging.py diff --git a/.gitignore b/.gitignore index 72b1f6a..a599f92 100644 --- a/.gitignore +++ b/.gitignore @@ -24,4 +24,7 @@ htmlcov/ Thumbs.db # Keep egg-info for package metadata -!global_macro_data.egg-info/ \ No newline at end of file +!global_macro_data.egg-info/ + +# Test file +test.py \ No newline at end of file diff --git a/global_macro_data/__init__.py b/global_macro_data/__init__.py index c3b9d72..585bf9a 100644 --- a/global_macro_data/__init__.py +++ b/global_macro_data/__init__.py @@ -1,4 +1,4 @@ -from .gmd import ( +from global_macro_data.gmd import ( get_data, get_available_versions, get_current_version, diff --git a/global_macro_data/exceptions.py b/global_macro_data/exceptions.py new file mode 100644 index 0000000..674b110 --- /dev/null +++ b/global_macro_data/exceptions.py @@ -0,0 +1,95 @@ +INTRO = ( + "\nGlobal Macro Database by Müller et al. (2025)\n" + "Website: https://www.globalmacrodata.com\n\n" +) + + +class InvalidVariableError(ValueError): + """Raised when one or more variable codes are invalid.""" + + def __init__(self, invalid_vars: list[str]): + self.invalid_vars = invalid_vars + + # Format variable list nicely + if len(invalid_vars) == 1: + var_list = invalid_vars[0] + var_intro = "Invalid variable code" + else: + var_list = ', '.join(invalid_vars) + var_intro = "Invalid variable codes" + + message = ( + INTRO + + f"{var_intro}: {var_list}\n\n" + "To see the list of valid variable codes, " + "use: gmd.list_variables()" + ) + + super().__init__(message) + + +class InvalidVersionError(ValueError): + """Raised when a requested dataset version does not exist.""" + + def __init__( + self, requested_version: str, + available: list[str], + current: str + ): + + self.requested_version = requested_version + self.available_versions = available + self.current_version = current + + message = ( + INTRO + + f"Error: '{requested_version}' is not a valid dataset version.\n" + f"Available versions: {', '.join(available)}\n" + f"Current version: {current}" + ) + super().__init__(message) + + +class RawModeError(ValueError): + """Raised when raw=True is used incorrectly.""" + + def __init__(self): + message = ( + INTRO + + "'raw=True' requires specifying exactly one variable.\n" + "Raw data is only accessed variable-wise using: " + "gmd(variable, raw=True)\n" + "For full documentation: https://www.globalmacrodata.com/GMD.xlsx" + ) + super().__init__(message) + + +class DataDownloadError(ConnectionError): + """Raised when data cannot be downloaded from the remote source.""" + + def __init__(self, original_exception: Exception): + message = ( + INTRO + + f"Error downloading data:\n{original_exception}" + ) + super().__init__(message) + self.original_exception = original_exception + + +class InvalidCountryError(ValueError): + """Raised when one or more country codes are invalid.""" + + def __init__(self, invalid_codes: list[str]): + self.invalid_codes = invalid_codes + + if len(invalid_codes) == 1: + msg = f"Invalid country code: '{invalid_codes[0]}'" + else: + msg = f"Invalid country codes: {', '.join(invalid_codes)}" + + message = ( + INTRO + + f"{msg}\n\n" + "To see the list of valid country codes, use: gmd.list_countries()" + ) + super().__init__(message) diff --git a/global_macro_data/gmd.py b/global_macro_data/gmd.py index 8ed5a9a..97600cb 100644 --- a/global_macro_data/gmd.py +++ b/global_macro_data/gmd.py @@ -1,9 +1,19 @@ +# Standard library import os -import requests -import pandas as pd import io from typing import Optional, Union, List -import sys + +# Third-party +import pandas as pd +import requests + +# Internal modules +from .logging import logger +from .exceptions import ( + InvalidVariableError, InvalidVersionError, RawModeError, + DataDownloadError, InvalidCountryError +) + # Valid variables list VALID_VARIABLES = [ @@ -58,87 +68,74 @@ def get_current_version() -> str: return versions[0] if versions else None -def list_variables() -> None: - """Display list of available variables and their descriptions""" - print("\nAvailable variables:\n") - print("-" * 90) - print(f"{'Variable':<17} Description") - print("-" * 90) - +def list_variables() -> pd.DataFrame: + """Return list of available variables and their descriptions.""" descriptions = { - "nGDP": "Nominal Gross Domestic Product", - "rGDP": "Real Gross Domestic Product, in 2010 prices", - "rGDP_pc": "Real Gross Domestic Product per Capita", - "rGDP_USD": "Real Gross Domestic Product in USD", - "deflator": "GDP deflator", - "cons": "Total Consumption", - "rcons": "Real Total Consumption", - "cons_GDP": "Total Consumption as % of GDP", - "inv": "Total Investment", - "inv_GDP": "Total Investment as % of GDP", - "finv": "Fixed Investment", - "finv_GDP": "Fixed Investment as % of GDP", - "exports": "Total Exports", - "exports_GDP": "Total Exports as % of GDP", - "imports": "Total Imports", - "imports_GDP": "Total Imports as % of GDP", - "CA": "Current Account Balance", - "CA_GDP": "Current Account Balance as % of GDP", - "USDfx": "Exchange Rate against USD", - "REER": "Real Effective Exchange Rate, 2010 = 100", - "govexp": "Government Expenditure", - "govexp_GDP": "Government Expenditure as % of GDP", - "govrev": "Government Revenue", - "govrev_GDP": "Government Revenue as % of GDP", - "govtax": "Government Tax Revenue", - "govtax_GDP": "Government Tax Revenue as % of GDP", - "govdef": "Government Deficit", - "govdef_GDP": "Government Deficit as % of GDP", - "govdebt": "Government Debt", - "govdebt_GDP": "Government Debt as % of GDP", - "HPI": "House Price Index", - "CPI": "Consumer Price Index, 2010 = 100", - "infl": "Inflation Rate", - "pop": "Population", - "unemp": "Unemployment Rate", - "strate": "Short-term Interest Rate", - "ltrate": "Long-term Interest Rate", - "cbrate": "Central Bank Policy Rate", - "M0": "M0 Money Supply", - "M1": "M1 Money Supply", - "M2": "M2 Money Supply", - "M3": "M3 Money Supply", - "M4": "M4 Money Supply", - "SovDebtCrisis": "Sovereign Debt Crisis", - "CurrencyCrisis": "Currency Crisis", - "BankingCrisis": "Banking Crisis" + 'nGDP': 'Nominal Gross Domestic Product', + 'rGDP': 'Real Gross Domestic Product, in 2010 prices', + 'rGDP_pc': 'Real Gross Domestic Product per Capita', + 'rGDP_USD': 'Real Gross Domestic Product in USD', + 'deflator': 'GDP deflator', + 'cons': 'Total Consumption', + 'rcons': 'Real Total Consumption', + 'cons_GDP': 'Total Consumption as % of GDP', + 'inv': 'Total Investment', + 'inv_GDP': 'Total Investment as % of GDP', + 'finv': 'Fixed Investment', + 'finv_GDP': 'Fixed Investment as % of GDP', + 'exports': 'Total Exports', + 'exports_GDP': 'Total Exports as % of GDP', + 'imports': 'Total Imports', + 'imports_GDP': 'Total Imports as % of GDP', + 'CA': 'Current Account Balance', + 'CA_GDP': 'Current Account Balance as % of GDP', + 'USDfx': 'Exchange Rate against USD', + 'REER': 'Real Effective Exchange Rate, 2010 = 100', + 'govexp': 'Government Expenditure', + 'govexp_GDP': 'Government Expenditure as % of GDP', + 'govrev': 'Government Revenue', + 'govrev_GDP': 'Government Revenue as % of GDP', + 'govtax': 'Government Tax Revenue', + 'govtax_GDP': 'Government Tax Revenue as % of GDP', + 'govdef': 'Government Deficit', + 'govdef_GDP': 'Government Deficit as % of GDP', + 'govdebt': 'Government Debt', + 'govdebt_GDP': 'Government Debt as % of GDP', + 'HPI': 'House Price Index', + 'CPI': 'Consumer Price Index, 2010 = 100', + 'infl': 'Inflation Rate', + 'pop': 'Population', + 'unemp': 'Unemployment Rate', + 'strate': 'Short-term Interest Rate', + 'ltrate': 'Long-term Interest Rate', + 'cbrate': 'Central Bank Policy Rate', + 'M0': 'M0 Money Supply', + 'M1': 'M1 Money Supply', + 'M2': 'M2 Money Supply', + 'M3': 'M3 Money Supply', + 'M4': 'M4 Money Supply', + 'SovDebtCrisis': 'Sovereign Debt Crisis', + 'CurrencyCrisis': 'Currency Crisis', + 'BankingCrisis': 'Banking Crisis', } - for var in sorted(VALID_VARIABLES): - print(f"{var:<17} {descriptions.get(var, '')}") - - print("-" * 90) + return pd.DataFrame({ + 'Variable': VALID_VARIABLES, + 'Description': [descriptions.get(var, '') for var in VALID_VARIABLES] + }).sort_values('Variable') -def list_countries() -> None: - """Display list of available countries and their ISO codes""" +def list_countries() -> pd.DataFrame: + """Return list of available countries and their ISO3 codes.""" try: - # Load isomapping from the package directory script_dir = os.path.dirname(os.path.abspath(__file__)) isomapping_path = os.path.join( os.path.dirname(script_dir), 'isomapping.csv' ) - isomapping = pd.read_csv(isomapping_path) - - print("\nCountry and territories" + " " * 27 + "Code") - print("-" * 60) - - for _, row in isomapping.iterrows(): - print(f"{row['countryname']:<50} {row['ISO3']}") - - print("-" * 60) + df = pd.read_csv(isomapping_path) + return df[['countryname', 'ISO3']] except Exception as e: - raise Exception(f"Error loading country list: {str(e)}") + raise RuntimeError(f'Error loading country list: {e}') def get_data( @@ -180,14 +177,7 @@ def get_data( var for var in variables if var not in VALID_VARIABLES ] if invalid_vars: - print("Global Macro Database by Müller et. al (2025)") - print("Website: https://www.globalmacrodata.com\n") - print(f"Invalid variable code: {invalid_vars[0]}") - print( - "\nTo see the list of valid variable codes, " - "use: gmd(vars=True)" - ) - sys.exit(1) + raise InvalidVariableError(invalid_vars) # Get current version if not specified if version is None: @@ -198,49 +188,39 @@ def get_data( # Check if version exists available_versions = get_available_versions() if version not in available_versions: - print("Global Macro Database by Müller et. al (2025)") - print("Website: https://www.globalmacrodata.com\n") - print(f"Error: {version} is not valid") - print(f"Available versions are: {', '.join(available_versions)}") - print(f"The current version is: {get_current_version()}") - sys.exit(1) + raise InvalidVersionError( + requested_version=version, + available=available_versions, + current=get_current_version() + ) # Handle raw data option if raw: - if (not variables or - (isinstance(variables, list) and len(variables) > 1)): - print("Global Macro Database by Müller et. al (2025)") - print("Website: https://www.globalmacrodata.com\n") - print("Warning: raw requires specifying exactly one variable") - print("Note: Raw data is only accessed variable-wise using: " - "gmd [variable], raw") - print("To download the full data documentation: " - "https://www.globalmacrodata.com/GMD.xlsx") - sys.exit(1) + if not variables or \ + (isinstance(variables, list) and len(variables) > 1): + raise RawModeError() if isinstance(variables, list): variables = variables[0] data_url = f"{base_url}/{variables}_{version}.csv" - print(f"Importing raw data for variable: {variables}") + logger.info(f'Importing raw data for variable: {variables}') else: # Handle single variable case for efficiency if isinstance(variables, list) and len(variables) == 1: variables = variables[0] data_url = f"{base_url}/{variables}_{version}.csv" - print(f"Importing data for variable: {variables}") + logger.info(f'Importing data for variable: {variables}') else: data_url = f"{base_url}/GMD_{version}.csv" + logger.info('Importing data') # Download data try: response = requests.get(data_url) response.raise_for_status() except requests.exceptions.RequestException as e: - print("Global Macro Database by Müller et. al (2025)") - print("Website: https://www.globalmacrodata.com\n") - print(f"Error downloading data: {str(e)}") - sys.exit(1) + raise DataDownloadError(e) # Read the data df = pd.read_csv(io.StringIO(response.text)) @@ -257,15 +237,10 @@ def get_data( c for c in country if c not in df["ISO3"].unique() ] if invalid_countries: - print("Global Macro Database by Müller et. al (2025)") - print("Website: https://www.globalmacrodata.com\n") - print(f"Error: Invalid country code '{invalid_countries[0]}'") - print("\nTo see the list of valid country codes, " - "use: gmd(iso=True)") - sys.exit(1) + raise InvalidCountryError(invalid_countries) df = df[df["ISO3"].isin(country)] - print(f"Filtered data for countries: {', '.join(country)}") + logger.info(f"Filtered data for countries: {', '.join(country)}") # Filter by variables if specified if variables and not raw: @@ -287,19 +262,21 @@ def get_data( # Display dataset information if len(df) == 0: - print(f"The database has no data on {variables} for {country}") + logger.warning("The database has no data on " + f"{variables} for {country}") return None if raw: n_sources = len(df.columns) - 8 # Subtract identifier columns - print(f"Final dataset: {len(df)} observations of {n_sources} sources") + logger.info(f"Final dataset: {len(df)} " + f"observations of {n_sources} sources") else: - print( + logger.info( f"Final dataset: {len(df)} observations of " f"{len(df.columns)} variables" ) - print(f"Version: {version}") + logger.info(f"Version: {version}") # Sort and order columns df = df.sort_values(['countryname', 'year']) diff --git a/global_macro_data/logging.py b/global_macro_data/logging.py new file mode 100644 index 0000000..7180a4d --- /dev/null +++ b/global_macro_data/logging.py @@ -0,0 +1,7 @@ +import logging + +logger = logging.getLogger('gmd') +logger.setLevel(logging.INFO) # Default level (can be overridden by user) + +# Add NullHandler so importing your package doesn't configure root logger +logger.addHandler(logging.NullHandler()) From a2a93ec0449729da74ca1fcbb5c85ebbb8e35f0b Mon Sep 17 00:00:00 2001 From: Cedric McKeever Date: Wed, 25 Jun 2025 14:19:46 +0800 Subject: [PATCH 04/15] add verbose logging func --- global_macro_data/__init__.py | 1 + global_macro_data/logging.py | 8 ++++++++ 2 files changed, 9 insertions(+) diff --git a/global_macro_data/__init__.py b/global_macro_data/__init__.py index 585bf9a..397e02c 100644 --- a/global_macro_data/__init__.py +++ b/global_macro_data/__init__.py @@ -6,6 +6,7 @@ list_countries, VALID_VARIABLES ) +from global_macro_data.logging import enable_verbose_logging __all__ = [ "get_data", diff --git a/global_macro_data/logging.py b/global_macro_data/logging.py index 7180a4d..4353746 100644 --- a/global_macro_data/logging.py +++ b/global_macro_data/logging.py @@ -5,3 +5,11 @@ # Add NullHandler so importing your package doesn't configure root logger logger.addHandler(logging.NullHandler()) + + +def enable_verbose_logging(level=logging.INFO): + """Enable verbose logging.""" + logging.basicConfig( + level=level, + format='%(levelname)s:%(name)s:%(message)s' + ) From 8357e5022273aaee86cecd2aecac677e4db2fba0 Mon Sep 17 00:00:00 2001 From: Cedric McKeever Date: Wed, 25 Jun 2025 14:56:47 +0800 Subject: [PATCH 05/15] improve debugging --- global_macro_data/__init__.py | 4 ++-- global_macro_data/exceptions.py | 14 +++----------- global_macro_data/gmd.py | 18 +++++++++--------- tests/test_gmd.py | 4 ++-- 4 files changed, 16 insertions(+), 24 deletions(-) diff --git a/global_macro_data/__init__.py b/global_macro_data/__init__.py index 397e02c..c9da8b7 100644 --- a/global_macro_data/__init__.py +++ b/global_macro_data/__init__.py @@ -1,6 +1,6 @@ from global_macro_data.gmd import ( get_data, - get_available_versions, + list_versions, get_current_version, list_variables, list_countries, @@ -10,7 +10,7 @@ __all__ = [ "get_data", - "get_available_versions", + "list_versions", "get_current_version", "list_variables", "list_countries", diff --git a/global_macro_data/exceptions.py b/global_macro_data/exceptions.py index 674b110..d09f6c7 100644 --- a/global_macro_data/exceptions.py +++ b/global_macro_data/exceptions.py @@ -31,21 +31,13 @@ def __init__(self, invalid_vars: list[str]): class InvalidVersionError(ValueError): """Raised when a requested dataset version does not exist.""" - def __init__( - self, requested_version: str, - available: list[str], - current: str - ): + def __init__(self, requested_version: str): self.requested_version = requested_version - self.available_versions = available - self.current_version = current - message = ( INTRO + f"Error: '{requested_version}' is not a valid dataset version.\n" - f"Available versions: {', '.join(available)}\n" - f"Current version: {current}" + "To see the list of valid versions, use: gmd.list_versions()" ) super().__init__(message) @@ -58,7 +50,7 @@ def __init__(self): INTRO + "'raw=True' requires specifying exactly one variable.\n" "Raw data is only accessed variable-wise using: " - "gmd(variable, raw=True)\n" + "gmd.get_data(variable, raw=True)\n" "For full documentation: https://www.globalmacrodata.com/GMD.xlsx" ) super().__init__(message) diff --git a/global_macro_data/gmd.py b/global_macro_data/gmd.py index 97600cb..7bb8a04 100644 --- a/global_macro_data/gmd.py +++ b/global_macro_data/gmd.py @@ -44,7 +44,7 @@ ] -def get_available_versions() -> List[str]: +def list_versions() -> List[str]: """Get list of available versions from GitHub""" try: versions_url = ( @@ -64,12 +64,13 @@ def get_available_versions() -> List[str]: def get_current_version() -> str: """Get the current version of the dataset""" - versions = get_available_versions() + versions = list_versions() return versions[0] if versions else None def list_variables() -> pd.DataFrame: """Return list of available variables and their descriptions.""" + global VALID_VARIABLES descriptions = { 'nGDP': 'Nominal Gross Domestic Product', 'rGDP': 'Real Gross Domestic Product, in 2010 prices', @@ -161,10 +162,10 @@ def get_data( Returns ------- pd.DataFrame or None - Filtered macroeconomic data as a DataFrame, or None if displaying - metadata. + Filtered macroeconomic data as a DataFrame, or None if no data + available. """ - + global VALID_VARIABLES base_url = "https://www.globalmacrodata.com" # Validate variables before proceeding @@ -186,12 +187,10 @@ def get_data( version = get_current_version() else: # Check if version exists - available_versions = get_available_versions() + available_versions = list_versions() if version not in available_versions: raise InvalidVersionError( requested_version=version, - available=available_versions, - current=get_current_version() ) # Handle raw data option @@ -281,7 +280,8 @@ def get_data( # Sort and order columns df = df.sort_values(['countryname', 'year']) id_cols = ['ISO3', 'countryname', 'year'] - other_cols = [col for col in df.columns if col not in id_cols] + other_cols = [col for col in df.columns + if (col not in id_cols) and col in VALID_VARIABLES] df = df[id_cols + other_cols] return df diff --git a/tests/test_gmd.py b/tests/test_gmd.py index 188f55f..35056e9 100644 --- a/tests/test_gmd.py +++ b/tests/test_gmd.py @@ -2,7 +2,7 @@ import pandas as pd from global_macro_data import ( get_data, - get_available_versions, + list_versions, get_current_version, list_variables, list_countries, @@ -11,7 +11,7 @@ def test_get_available_versions(): """Test getting available versions""" - versions = get_available_versions() + versions = list_versions() assert isinstance(versions, list) assert len(versions) > 0 assert all(isinstance(v, str) for v in versions) From 1fe17814cab1d8c2f414f636424900a138d85a92 Mon Sep 17 00:00:00 2001 From: Cedric McKeever Date: Wed, 25 Jun 2025 15:08:34 +0800 Subject: [PATCH 06/15] add ability to call countryname, instead of ISO --- global_macro_data/gmd.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/global_macro_data/gmd.py b/global_macro_data/gmd.py index 7bb8a04..9055c64 100644 --- a/global_macro_data/gmd.py +++ b/global_macro_data/gmd.py @@ -153,7 +153,8 @@ def get_data( variables : str or list of str, optional Variable code(s) to include (e.g., 'rGDP' or ['rGDP', 'unemp']). country : str or list of str, optional - ISO3 country code(s) to include (e.g., 'SGP' or ['MRT', 'SGP']). + Country or ISO3 country code(s) to include (e.g., 'SGP' or + ['MRT', 'SGP']). version : str, optional Dataset version in 'YYYY_MM' format (e.g., '2025_01'). raw : bool, default=False @@ -229,7 +230,14 @@ def get_data( if isinstance(country, str): country = [country] - country = [c.upper() for c in country] + # Load country name to ISO3 mapping + country_df = list_countries() + country_df['countryname'] = country_df['countryname'].str.upper() + country_to_ISO = ( + country_df.set_index('countryname')['ISO3'].to_dict() + ) + + country = [country_to_ISO.get(c.upper(), c.upper()) for c in country] # Validate country codes invalid_countries = [ From 3705de56091738d5b6b34548a6985016360a2052 Mon Sep 17 00:00:00 2001 From: Cedric McKeever Date: Wed, 25 Jun 2025 15:14:14 +0800 Subject: [PATCH 07/15] Update help(enable_verbose_logging) --- global_macro_data/__init__.py | 1 + global_macro_data/logging.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/global_macro_data/__init__.py b/global_macro_data/__init__.py index c9da8b7..8ea010d 100644 --- a/global_macro_data/__init__.py +++ b/global_macro_data/__init__.py @@ -14,5 +14,6 @@ "get_current_version", "list_variables", "list_countries", + "enable_verbose_logging", "VALID_VARIABLES" ] diff --git a/global_macro_data/logging.py b/global_macro_data/logging.py index 4353746..d44faf4 100644 --- a/global_macro_data/logging.py +++ b/global_macro_data/logging.py @@ -8,7 +8,7 @@ def enable_verbose_logging(level=logging.INFO): - """Enable verbose logging.""" + """Enable verbose logging for package.""" logging.basicConfig( level=level, format='%(levelname)s:%(name)s:%(message)s' From 8a745273779f7a8a8ab5082741c30c187034a069 Mon Sep 17 00:00:00 2001 From: Cedric McKeever Date: Wed, 25 Jun 2025 16:37:41 +0800 Subject: [PATCH 08/15] Fix to package level verbose setting --- global_macro_data/logging.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/global_macro_data/logging.py b/global_macro_data/logging.py index d44faf4..d758fa3 100644 --- a/global_macro_data/logging.py +++ b/global_macro_data/logging.py @@ -8,8 +8,14 @@ def enable_verbose_logging(level=logging.INFO): - """Enable verbose logging for package.""" - logging.basicConfig( - level=level, - format='%(levelname)s:%(name)s:%(message)s' - ) + """Enable console logging specifically for the gmd package.""" + handler = logging.StreamHandler() + handler.setLevel(level) + + formatter = logging.Formatter('%(levelname)s:%(name)s:%(message)s') + handler.setFormatter(formatter) + + if not any(isinstance(h, logging.StreamHandler) for h in logger.handlers): + logger.addHandler(handler) + + logger.setLevel(level) From 907143007c358cd73c3f9bc088ea86a337407cbc Mon Sep 17 00:00:00 2001 From: Cedric McKeever Date: Wed, 25 Jun 2025 16:40:31 +0800 Subject: [PATCH 09/15] Update README --- README.md | 33 +++++++++++++++++++-------------- 1 file changed, 19 insertions(+), 14 deletions(-) diff --git a/README.md b/README.md index ba79017..29d0eed 100644 --- a/README.md +++ b/README.md @@ -30,47 +30,52 @@ pip install global_macro_data **How to use (examples)** ```python -from global_macro_data import gmd +import global_macro_data as gmd # Get data from latest available version -df = gmd() +df = gmd.get_data() # Get data from a specific version -df = gmd(version="2025_01") +df = gmd.get_data(version="2025_01") # Get data for a specific country -df = gmd(country="USA") +df = gmd.get_data(country="USA") # Get data for multiple countries -df = gmd(country=["USA", "CHN", "DEU"]) +df = gmd.get_data(country=["USA", "CHN", "Germany"]) # Get specific variables -df = gmd(variables=["rGDP", "infl", "unemp"]) +df = gmd.get_data(variables=["rGDP", "infl", "unemp"]) # Get raw data for a single variable -df = gmd(variables="rGDP", raw=True) +df = gmd.get_data(variables="rGDP", raw=True) # List available variables and their descriptions -gmd(vars=True) +gmd.list_variables() # List available countries and their ISO codes -gmd(iso=True) +gmd.list_countries() + +# List available versions +gmd.list_versions() # Combine parameters -df = gmd( +df = gmd.get_data( version="2025_01", country=["USA", "CHN"], variables=["rGDP", "unemp", "CPI"] ) + +# Enable verbose logging (INFO-level and above) +gmd.enable_verbose_logging() ``` -## Parameters +## Parameters for `gmd.get_data()` - **variables (str or list)**: Variable code(s) to include (e.g., "rGDP" or ["rGDP", "unemp"]) -- **country (str or list)**: ISO3 country code(s) (e.g., "SGP" or ["MRT", "SGP"]) +- **country (str or list)**: Country name or ISO3 country code(s) (e.g., "SGP" or ["MRT", "SGP"]) - **version (str)**: Dataset version in format 'YYYY_MM' (e.g., '2025_01'). If None or "current", uses the latest version - **raw (bool)**: If True, download raw data for a single variable -- **iso (bool)**: If True, display list of available countries -- **vars (bool)**: If True, display list of available variables + ## Release schedule | Release Date | Details | From aafdf563ba48049ea5fc62a86a9cbb0730bb04d2 Mon Sep 17 00:00:00 2001 From: Cedric McKeever Date: Wed, 25 Jun 2025 16:51:57 +0800 Subject: [PATCH 10/15] bugfix for raw data --- README.md | 2 +- global_macro_data/gmd.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 29d0eed..4730a09 100644 --- a/README.md +++ b/README.md @@ -17,7 +17,7 @@ This repository complements paper, **Müller, Xu, Lehbib, and Chen (2025)**, whi - **Scheduled Updates**: Regular releases ensure data reliability. - **Full Transparency**: All code is open source and available in this repository. - **Accessible Formats**: Provided in `.dta`, `.csv` and as **Stata -/Python/R package**. +/Python/R packages**. ## Data access diff --git a/global_macro_data/gmd.py b/global_macro_data/gmd.py index 9055c64..dc048c6 100644 --- a/global_macro_data/gmd.py +++ b/global_macro_data/gmd.py @@ -123,7 +123,7 @@ def list_variables() -> pd.DataFrame: return pd.DataFrame({ 'Variable': VALID_VARIABLES, 'Description': [descriptions.get(var, '') for var in VALID_VARIABLES] - }).sort_values('Variable') + }).sort_values('Variable').reset_index(drop=True) def list_countries() -> pd.DataFrame: @@ -289,7 +289,7 @@ def get_data( df = df.sort_values(['countryname', 'year']) id_cols = ['ISO3', 'countryname', 'year'] other_cols = [col for col in df.columns - if (col not in id_cols) and col in VALID_VARIABLES] + if (col not in id_cols)] df = df[id_cols + other_cols] - return df + return df.drop(columns=['id'], errors='ignore').reset_index(drop=True) From 117e48e3c6acbfe2a1654735a6dacaafb6c26df8 Mon Sep 17 00:00:00 2001 From: Cedric McKeever Date: Wed, 25 Jun 2025 17:00:26 +0800 Subject: [PATCH 11/15] fix wrong substract for num of sources in raw data --- global_macro_data/gmd.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/global_macro_data/gmd.py b/global_macro_data/gmd.py index dc048c6..7e7245e 100644 --- a/global_macro_data/gmd.py +++ b/global_macro_data/gmd.py @@ -274,7 +274,7 @@ def get_data( return None if raw: - n_sources = len(df.columns) - 8 # Subtract identifier columns + n_sources = len(df.columns) - 7 # Subtract identifier columns logger.info(f"Final dataset: {len(df)} " f"observations of {n_sources} sources") else: From 9f89f335d846eaaf1ee8704a37e2e39f2474ac30 Mon Sep 17 00:00:00 2001 From: Cedric McKeever Date: Thu, 26 Jun 2025 10:11:30 +0800 Subject: [PATCH 12/15] Update list_country to return dict, use JSON --- global_macro_data/gmd.py | 23 ++-- isomapping.csv | 258 -------------------------------------- isomapping.json | 259 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 271 insertions(+), 269 deletions(-) delete mode 100644 isomapping.csv create mode 100644 isomapping.json diff --git a/global_macro_data/gmd.py b/global_macro_data/gmd.py index 7e7245e..e6d41cf 100644 --- a/global_macro_data/gmd.py +++ b/global_macro_data/gmd.py @@ -2,6 +2,7 @@ import os import io from typing import Optional, Union, List +import json # Third-party import pandas as pd @@ -126,15 +127,14 @@ def list_variables() -> pd.DataFrame: }).sort_values('Variable').reset_index(drop=True) -def list_countries() -> pd.DataFrame: - """Return list of available countries and their ISO3 codes.""" +def list_countries() -> dict: + """Return dict of available countries and their ISO3 codes.""" try: script_dir = os.path.dirname(os.path.abspath(__file__)) isomapping_path = os.path.join( - os.path.dirname(script_dir), 'isomapping.csv' + os.path.dirname(script_dir), 'isomapping.json' ) - df = pd.read_csv(isomapping_path) - return df[['countryname', 'ISO3']] + return _load_json(isomapping_path) except Exception as e: raise RuntimeError(f'Error loading country list: {e}') @@ -231,12 +231,7 @@ def get_data( country = [country] # Load country name to ISO3 mapping - country_df = list_countries() - country_df['countryname'] = country_df['countryname'].str.upper() - country_to_ISO = ( - country_df.set_index('countryname')['ISO3'].to_dict() - ) - + country_to_ISO = {k.upper(): v for k, v in list_countries().items()} country = [country_to_ISO.get(c.upper(), c.upper()) for c in country] # Validate country codes @@ -293,3 +288,9 @@ def get_data( df = df[id_cols + other_cols] return df.drop(columns=['id'], errors='ignore').reset_index(drop=True) + + +def _load_json(path: str) -> dict: + """Load a JSON file as a Python dictionary.""" + with open(path, 'r', encoding='utf-8') as f: + return json.load(f) diff --git a/isomapping.csv b/isomapping.csv deleted file mode 100644 index 18d6e3c..0000000 --- a/isomapping.csv +++ /dev/null @@ -1,258 +0,0 @@ -countryname,ISO3 -Afghanistan,AFG -Albania,ALB -Algeria,DZA -American Samoa,ASM -Andorra,AND -Angola,AGO -Anguilla,AIA -Antarctica,ATA -Antigua and Barbuda,ATG -Argentina,ARG -Armenia,ARM -Aruba,ABW -Australia,AUS -Austria,AUT -Azerbaijan,AZE -Bahamas,BHS -Bahrain,BHR -Bangladesh,BGD -Barbados,BRB -Belarus,BLR -Belgium,BEL -Belize,BLZ -Benin,BEN -Bermuda,BMU -Bhutan,BTN -Bolivia,BOL -"Bonaire, Sint Eustatius and Saba",BES -Bosnia and Herzegovina,BIH -Botswana,BWA -Bouvet Island,BVT -Brazil,BRA -British Indian Ocean Territory,IOT -British Virgin Islands,VGB -Brunei,BRN -Bulgaria,BGR -Burkina Faso,BFA -Burundi,BDI -Cambodia,KHM -Cameroon,CMR -Canada,CAN -Cape Verde,CPV -Cayman Islands,CYM -Central African Republic,CAF -Chad,TCD -Chile,CHL -China,CHN -Christmas Island,CXR -Cocos (Keeling) Islands,CCK -Colombia,COL -Comoros,COM -Cook Islands,COK -Costa Rica,CRI -Croatia,HRV -Cuba,CUB -Curaçao,CUW -Cyprus,CYP -Czech Republic,CZE -Czechoslovakia,CSK -Democratic Republic of Yemen,YMD -Democratic Republic of the Congo,COD -Denmark,DNK -Djibouti,DJI -Dominica,DMA -Dominican Republic,DOM -Ecuador,ECU -Egypt,EGY -El Salvador,SLV -Equatorial Guinea,GNQ -Eritrea,ERI -Estonia,EST -Eswatini,SWZ -Ethiopia,ETH -Falkland Islands,FLK -Faroe Islands,FRO -Fiji,FJI -Finland,FIN -France,FRA -French Guiana,GUF -French Polynesia,PYF -French Southern Territories,ATF -Gabon,GAB -Gambia,GMB -Georgia,GEO -German Democratic Republic,DDR -Germany,DEU -Ghana,GHA -Gibraltar,GIB -Greece,GRC -Greenland,GRL -Grenada,GRD -Guadeloupe,GLP -Guam,GUM -Guatemala,GTM -Guernsey,GGY -Guinea,GIN -Guinea-Bissau,GNB -Guyana,GUY -Haiti,HTI -Heard Island and McDonald Islands,HMD -Holy See,VAT -Honduras,HND -Hong Kong,HKG -Hungary,HUN -Iceland,ISL -India,IND -Indonesia,IDN -Iran,IRN -Iraq,IRQ -Ireland,IRL -Isle of Man,IMN -Israel,ISR -Italy,ITA -Ivory Coast,CIV -Jamaica,JAM -Japan,JPN -Jersey,JEY -Jordan,JOR -Kazakhstan,KAZ -Kenya,KEN -Kiribati,KIR -Kosovo,XKX -Kuwait,KWT -Kyrgyzstan,KGZ -Laos,LAO -Latvia,LVA -Lebanon,LBN -Lesotho,LSO -Liberia,LBR -Libya,LBY -Liechtenstein,LIE -Lithuania,LTU -Luxembourg,LUX -Macau,MAC -Macedonia,MKD -Madagascar,MDG -Malawi,MWI -Malaysia,MYS -Maldives,MDV -Mali,MLI -Malta,MLT -Marshall Islands,MHL -Martinique,MTQ -Mauritania,MRT -Mauritius,MUS -Mayotte,MYT -Mexico,MEX -Micronesia (Federated States of),FSM -Moldova,MDA -Monaco,MCO -Mongolia,MNG -Montenegro,MNE -Montserrat,MSR -Morocco,MAR -Mozambique,MOZ -Myanmar,MMR -Namibia,NAM -Nauru,NRU -Nepal,NPL -Netherlands,NLD -Netherlands Antilles,ANT -New Caledonia,NCL -New Zealand,NZL -Nicaragua,NIC -Niger,NER -Nigeria,NGA -Niue,NIU -Norfolk Island,NFK -North Korea,PRK -Northern Mariana Islands,MNP -Norway,NOR -Oman,OMN -Pakistan,PAK -Palau,PLW -Palestine,PSE -Panama,PAN -Papua New Guinea,PNG -Paraguay,PRY -Peru,PER -Philippines,PHL -Pitcairn,PCN -Poland,POL -Portugal,PRT -Puerto Rico,PRI -Qatar,QAT -Republic of the Congo,COG -Romania,ROU -Russian Federation,RUS -Rwanda,RWA -Réunion,REU -Saint Barthélemy,BLM -"Saint Helena, Ascension and Tristan da Cunha",SHN -Saint Kitts and Nevis,KNA -Saint Lucia,LCA -Saint Martin,MAF -Saint Pierre and Miquelon,SPM -Saint Vincent and the Grenadines,VCT -Samoa,WSM -San Marino,SMR -Sao Tome and Principe,STP -Saudi Arabia,SAU -Senegal,SEN -Serbia,SRB -Serbia and Montenegro,SCG -Seychelles,SYC -Sierra Leone,SLE -Singapore,SGP -Sint Maarten,SXM -Slovakia,SVK -Slovenia,SVN -Solomon Islands,SLB -Somalia,SOM -South Africa,ZAF -South Georgia and the South Sandwich Islands,SGS -South Korea,KOR -South Sudan,SSD -Soviet Union,SUN -Spain,ESP -Sri Lanka,LKA -Sudan,SDN -Suriname,SUR -Svalbard and Jan Mayen,SJM -Sweden,SWE -Switzerland,CHE -Syria,SYR -Taiwan,TWN -Tajikistan,TJK -Tanzania,TZA -Thailand,THA -Timor-Leste,TLS -Togo,TGO -Tokelau,TKL -Tonga,TON -Trinidad and Tobago,TTO -Tunisia,TUN -Turkey,TUR -Turkmenistan,TKM -Turks and Caicos Islands,TCA -Tuvalu,TUV -US Virgin Islands,VIR -Uganda,UGA -Ukraine,UKR -United Arab Emirates,ARE -United Kingdom,GBR -United States,USA -United States Minor Outlying Islands,UMI -Uruguay,URY -Uzbekistan,UZB -Vanuatu,VUT -Venezuela,VEN -Vietnam,VNM -Wallis and Futuna,WLF -Western Sahara,ESH -Yemen,YEM -Yugoslavia,YUG -Zambia,ZMB -Zimbabwe,ZWE -Åland Åland Islands,ALA diff --git a/isomapping.json b/isomapping.json new file mode 100644 index 0000000..d2ee849 --- /dev/null +++ b/isomapping.json @@ -0,0 +1,259 @@ +{ + "Afghanistan": "AFG", + "Albania": "ALB", + "Algeria": "DZA", + "American Samoa": "ASM", + "Andorra": "AND", + "Angola": "AGO", + "Anguilla": "AIA", + "Antarctica": "ATA", + "Antigua and Barbuda": "ATG", + "Argentina": "ARG", + "Armenia": "ARM", + "Aruba": "ABW", + "Australia": "AUS", + "Austria": "AUT", + "Azerbaijan": "AZE", + "Bahamas": "BHS", + "Bahrain": "BHR", + "Bangladesh": "BGD", + "Barbados": "BRB", + "Belarus": "BLR", + "Belgium": "BEL", + "Belize": "BLZ", + "Benin": "BEN", + "Bermuda": "BMU", + "Bhutan": "BTN", + "Bolivia": "BOL", + "Bonaire, Sint Eustatius and Saba": "BES", + "Bosnia and Herzegovina": "BIH", + "Botswana": "BWA", + "Bouvet Island": "BVT", + "Brazil": "BRA", + "British Indian Ocean Territory": "IOT", + "British Virgin Islands": "VGB", + "Brunei": "BRN", + "Bulgaria": "BGR", + "Burkina Faso": "BFA", + "Burundi": "BDI", + "Cambodia": "KHM", + "Cameroon": "CMR", + "Canada": "CAN", + "Cape Verde": "CPV", + "Cayman Islands": "CYM", + "Central African Republic": "CAF", + "Chad": "TCD", + "Chile": "CHL", + "China": "CHN", + "Christmas Island": "CXR", + "Cocos (Keeling) Islands": "CCK", + "Colombia": "COL", + "Comoros": "COM", + "Cook Islands": "COK", + "Costa Rica": "CRI", + "Croatia": "HRV", + "Cuba": "CUB", + "Curaçao": "CUW", + "Cyprus": "CYP", + "Czech Republic": "CZE", + "Czechoslovakia": "CSK", + "Democratic Republic of Yemen": "YMD", + "Democratic Republic of the Congo": "COD", + "Denmark": "DNK", + "Djibouti": "DJI", + "Dominica": "DMA", + "Dominican Republic": "DOM", + "Ecuador": "ECU", + "Egypt": "EGY", + "El Salvador": "SLV", + "Equatorial Guinea": "GNQ", + "Eritrea": "ERI", + "Estonia": "EST", + "Eswatini": "SWZ", + "Ethiopia": "ETH", + "Falkland Islands": "FLK", + "Faroe Islands": "FRO", + "Fiji": "FJI", + "Finland": "FIN", + "France": "FRA", + "French Guiana": "GUF", + "French Polynesia": "PYF", + "French Southern Territories": "ATF", + "Gabon": "GAB", + "Gambia": "GMB", + "Georgia": "GEO", + "German Democratic Republic": "DDR", + "Germany": "DEU", + "Ghana": "GHA", + "Gibraltar": "GIB", + "Greece": "GRC", + "Greenland": "GRL", + "Grenada": "GRD", + "Guadeloupe": "GLP", + "Guam": "GUM", + "Guatemala": "GTM", + "Guernsey": "GGY", + "Guinea": "GIN", + "Guinea-Bissau": "GNB", + "Guyana": "GUY", + "Haiti": "HTI", + "Heard Island and McDonald Islands": "HMD", + "Holy See": "VAT", + "Honduras": "HND", + "Hong Kong": "HKG", + "Hungary": "HUN", + "Iceland": "ISL", + "India": "IND", + "Indonesia": "IDN", + "Iran": "IRN", + "Iraq": "IRQ", + "Ireland": "IRL", + "Isle of Man": "IMN", + "Israel": "ISR", + "Italy": "ITA", + "Ivory Coast": "CIV", + "Jamaica": "JAM", + "Japan": "JPN", + "Jersey": "JEY", + "Jordan": "JOR", + "Kazakhstan": "KAZ", + "Kenya": "KEN", + "Kiribati": "KIR", + "Kosovo": "XKX", + "Kuwait": "KWT", + "Kyrgyzstan": "KGZ", + "Laos": "LAO", + "Latvia": "LVA", + "Lebanon": "LBN", + "Lesotho": "LSO", + "Liberia": "LBR", + "Libya": "LBY", + "Liechtenstein": "LIE", + "Lithuania": "LTU", + "Luxembourg": "LUX", + "Macau": "MAC", + "Macedonia": "MKD", + "Madagascar": "MDG", + "Malawi": "MWI", + "Malaysia": "MYS", + "Maldives": "MDV", + "Mali": "MLI", + "Malta": "MLT", + "Marshall Islands": "MHL", + "Martinique": "MTQ", + "Mauritania": "MRT", + "Mauritius": "MUS", + "Mayotte": "MYT", + "Mexico": "MEX", + "Micronesia (Federated States of)": "FSM", + "Moldova": "MDA", + "Monaco": "MCO", + "Mongolia": "MNG", + "Montenegro": "MNE", + "Montserrat": "MSR", + "Morocco": "MAR", + "Mozambique": "MOZ", + "Myanmar": "MMR", + "Namibia": "NAM", + "Nauru": "NRU", + "Nepal": "NPL", + "Netherlands": "NLD", + "Netherlands Antilles": "ANT", + "New Caledonia": "NCL", + "New Zealand": "NZL", + "Nicaragua": "NIC", + "Niger": "NER", + "Nigeria": "NGA", + "Niue": "NIU", + "Norfolk Island": "NFK", + "North Korea": "PRK", + "Northern Mariana Islands": "MNP", + "Norway": "NOR", + "Oman": "OMN", + "Pakistan": "PAK", + "Palau": "PLW", + "Palestine": "PSE", + "Panama": "PAN", + "Papua New Guinea": "PNG", + "Paraguay": "PRY", + "Peru": "PER", + "Philippines": "PHL", + "Pitcairn": "PCN", + "Poland": "POL", + "Portugal": "PRT", + "Puerto Rico": "PRI", + "Qatar": "QAT", + "Republic of the Congo": "COG", + "Romania": "ROU", + "Russian Federation": "RUS", + "Rwanda": "RWA", + "Réunion": "REU", + "Saint Barthélemy": "BLM", + "Saint Helena, Ascension and Tristan da Cunha": "SHN", + "Saint Kitts and Nevis": "KNA", + "Saint Lucia": "LCA", + "Saint Martin": "MAF", + "Saint Pierre and Miquelon": "SPM", + "Saint Vincent and the Grenadines": "VCT", + "Samoa": "WSM", + "San Marino": "SMR", + "Sao Tome and Principe": "STP", + "Saudi Arabia": "SAU", + "Senegal": "SEN", + "Serbia": "SRB", + "Serbia and Montenegro": "SCG", + "Seychelles": "SYC", + "Sierra Leone": "SLE", + "Singapore": "SGP", + "Sint Maarten": "SXM", + "Slovakia": "SVK", + "Slovenia": "SVN", + "Solomon Islands": "SLB", + "Somalia": "SOM", + "South Africa": "ZAF", + "South Georgia and the South Sandwich Islands": "SGS", + "South Korea": "KOR", + "South Sudan": "SSD", + "Soviet Union": "SUN", + "Spain": "ESP", + "Sri Lanka": "LKA", + "Sudan": "SDN", + "Suriname": "SUR", + "Svalbard and Jan Mayen": "SJM", + "Sweden": "SWE", + "Switzerland": "CHE", + "Syria": "SYR", + "Taiwan": "TWN", + "Tajikistan": "TJK", + "Tanzania": "TZA", + "Thailand": "THA", + "Timor-Leste": "TLS", + "Togo": "TGO", + "Tokelau": "TKL", + "Tonga": "TON", + "Trinidad and Tobago": "TTO", + "Tunisia": "TUN", + "Turkey": "TUR", + "Turkmenistan": "TKM", + "Turks and Caicos Islands": "TCA", + "Tuvalu": "TUV", + "US Virgin Islands": "VIR", + "Uganda": "UGA", + "Ukraine": "UKR", + "United Arab Emirates": "ARE", + "United Kingdom": "GBR", + "United States": "USA", + "United States Minor Outlying Islands": "UMI", + "Uruguay": "URY", + "Uzbekistan": "UZB", + "Vanuatu": "VUT", + "Venezuela": "VEN", + "Vietnam": "VNM", + "Wallis and Futuna": "WLF", + "Western Sahara": "ESH", + "Yemen": "YEM", + "Yugoslavia": "YUG", + "Zambia": "ZMB", + "Zimbabwe": "ZWE", + "Åland Åland Islands": "ALA" +} \ No newline at end of file From 377e78321ecad57fa1f26eb3514f0705dc8e382f Mon Sep 17 00:00:00 2001 From: Cedric McKeever Date: Thu, 26 Jun 2025 10:23:51 +0800 Subject: [PATCH 13/15] add to_dict param to list_country and list_vars --- global_macro_data/gmd.py | 38 ++++++++++++++++++++++++++++---------- 1 file changed, 28 insertions(+), 10 deletions(-) diff --git a/global_macro_data/gmd.py b/global_macro_data/gmd.py index e6d41cf..314f17e 100644 --- a/global_macro_data/gmd.py +++ b/global_macro_data/gmd.py @@ -69,8 +69,10 @@ def get_current_version() -> str: return versions[0] if versions else None -def list_variables() -> pd.DataFrame: - """Return list of available variables and their descriptions.""" +def list_variables( + as_dict: bool = False +) -> Union[pd.DataFrame, dict[str, str]]: + """Return available variable codes and their descriptions.""" global VALID_VARIABLES descriptions = { 'nGDP': 'Nominal Gross Domestic Product', @@ -121,20 +123,34 @@ def list_variables() -> pd.DataFrame: 'BankingCrisis': 'Banking Crisis', } - return pd.DataFrame({ + data = pd.DataFrame({ 'Variable': VALID_VARIABLES, 'Description': [descriptions.get(var, '') for var in VALID_VARIABLES] }).sort_values('Variable').reset_index(drop=True) + if as_dict: + return data.set_index('Variable').to_dict() -def list_countries() -> dict: - """Return dict of available countries and their ISO3 codes.""" + return data + + +def list_countries( + as_dict: bool = False +) -> Union[pd.DataFrame, dict[str, str]]: + """Return countries and their ISO3 codes.""" try: - script_dir = os.path.dirname(os.path.abspath(__file__)) - isomapping_path = os.path.join( - os.path.dirname(script_dir), 'isomapping.json' + path = os.path.join(os.path.dirname(os.path.abspath(__file__)), + '..', 'isomapping.json') + data = _load_json(path) + + if as_dict: + return data + + return pd.DataFrame( + {'Country': list(data.keys()), + 'ISO3': list(data.values()) + } ) - return _load_json(isomapping_path) except Exception as e: raise RuntimeError(f'Error loading country list: {e}') @@ -231,7 +247,9 @@ def get_data( country = [country] # Load country name to ISO3 mapping - country_to_ISO = {k.upper(): v for k, v in list_countries().items()} + country_to_ISO = { + k.upper(): v for k, v in list_countries(as_dict=True).items() + } country = [country_to_ISO.get(c.upper(), c.upper()) for c in country] # Validate country codes From 5d24b5a2f14d00d4283add8f15ba48759f75e9f2 Mon Sep 17 00:00:00 2001 From: Cedric McKeever Date: Thu, 26 Jun 2025 15:17:01 +0800 Subject: [PATCH 14/15] Successfully ran tox tests --- global_macro_data.egg-info/PKG-INFO | 123 ------------------ global_macro_data.egg-info/SOURCES.txt | 10 -- .../dependency_links.txt | 1 - global_macro_data.egg-info/requires.txt | 2 - global_macro_data.egg-info/top_level.txt | 1 - global_macro_data/gmd.py | 14 +- .../isomapping.json | 0 pyproject.toml | 33 +++++ requirements.txt | 13 +- setup.py | 34 ----- tests/conftest.py | 18 --- tests/test_gmd.py | 96 ++++++++------ tox.ini | 36 +++++ 13 files changed, 138 insertions(+), 243 deletions(-) delete mode 100644 global_macro_data.egg-info/PKG-INFO delete mode 100644 global_macro_data.egg-info/SOURCES.txt delete mode 100644 global_macro_data.egg-info/dependency_links.txt delete mode 100644 global_macro_data.egg-info/requires.txt delete mode 100644 global_macro_data.egg-info/top_level.txt rename isomapping.json => global_macro_data/isomapping.json (100%) create mode 100644 pyproject.toml delete mode 100644 setup.py delete mode 100644 tests/conftest.py create mode 100644 tox.ini diff --git a/global_macro_data.egg-info/PKG-INFO b/global_macro_data.egg-info/PKG-INFO deleted file mode 100644 index 5d877a2..0000000 --- a/global_macro_data.egg-info/PKG-INFO +++ /dev/null @@ -1,123 +0,0 @@ -Metadata-Version: 2.4 -Name: global-macro-data -Version: 0.3.1 -Summary: Global Macro Database by Karsten Müller, Chenzi Xu, Mohamed Lehbib and Ziliang Chen (2025) -Home-page: https://github.com/KMueller-Lab/Global-Macro-Database-Python -Author: Yangbo Wang -Author-email: wangyangbo@ruc.edu.cn -Classifier: Programming Language :: Python :: 3 -Classifier: License :: OSI Approved :: MIT License -Classifier: Operating System :: OS Independent -Requires-Python: >=3.6 -Description-Content-Type: text/markdown -Requires-Dist: requests -Requires-Dist: pandas -Dynamic: author -Dynamic: author-email -Dynamic: classifier -Dynamic: description -Dynamic: description-content-type -Dynamic: home-page -Dynamic: requires-dist -Dynamic: requires-python -Dynamic: summary - -# The Global Macro Database (Python Package) - - Website Badge - - -[![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](LICENSE) - -[Link to paper 📄](https://www.globalmacrodata.com/research-paper.html) - -This repository complements paper, **Müller, Xu, Lehbib, and Chen (2025)**, which introduces a panel dataset of **46 macroeconomic variables across 243 countries** from historical records beginning in the year **1086** until **2024**, including projections through the year **2030**. - -## Features - -- **Unparalleled Coverage**: Combines data from **32 contemporary sources** (e.g., IMF, World Bank, OECD) with **78 historical datasets**. -- **Extensive Variables**: GDP, inflation, government finance, trade, employment, interest rates, and more. -- **Harmonized Data**: Resolves inconsistencies and splices all available data together. -- **Scheduled Updates**: Regular releases ensure data reliability. -- **Full Transparency**: All code is open source and available in this repository. -- **Accessible Formats**: Provided in `.dta`, `.csv` and as **Stata -/Python/R package**. - -## Data access - -Download via website - -**Python package:** -``` -pip install global_macro_data -``` - -**How to use (examples)** -```python -from global_macro_data import gmd - -# Get data from latest available version -df = gmd() - -# Get data from a specific version -df = gmd(version="2025_01") - -# Get data for a specific country -df = gmd(country="USA") - -# Get data for multiple countries -df = gmd(country=["USA", "CHN", "DEU"]) - -# Get specific variables -df = gmd(variables=["rGDP", "infl", "unemp"]) - -# Get raw data for a single variable -df = gmd(variables="rGDP", raw=True) - -# List available variables and their descriptions -gmd(vars=True) - -# List available countries and their ISO codes -gmd(iso=True) - -# Combine parameters -df = gmd( - version="2025_01", - country=["USA", "CHN"], - variables=["rGDP", "unemp", "CPI"] -) -``` - -## Parameters -- **variables (str or list)**: Variable code(s) to include (e.g., "rGDP" or ["rGDP", "unemp"]) -- **country (str or list)**: ISO3 country code(s) (e.g., "SGP" or ["MRT", "SGP"]) -- **version (str)**: Dataset version in format 'YYYY_MM' (e.g., '2025_01'). If None or "current", uses the latest version -- **raw (bool)**: If True, download raw data for a single variable -- **iso (bool)**: If True, display list of available countries -- **vars (bool)**: If True, display list of available variables - -## Release schedule -| Release Date | Details | -|--------------|-----------------| -| 2025-01-30 | Initial release: 2025_01 | -| 2025-04-01 | 2025_03 | -| 2025-07-01 | 2025_06 | -| 2025-10-01 | 2025_09 | -| 2026-01-01 | 2025_12 | - -## Citation - -To cite this dataset, please use the following reference: - -```bibtex -@techreport{mueller2025global, - title = {The Global Macro Database: A New International Macroeconomic Dataset}, - author = {Müller, Karsten and Xu, Chenzi and Lehbib, Mohamed and Chen, Ziliang}, - year = {2025}, - type = {Working Paper} -} -``` - -## Acknowledgments - -The development of the Global Macro Database would not have been possible without the generous funding provided by the Singapore Ministry of Education (MOE) through the PYP grants (WBS A-0003319-01-00 and A-0003319-02-00), a Tier 1 grant (A-8001749- 00-00), and the NUS Risk Management Institute (A-8002360-00-00). This financial support laid the foundation for the successful completion of this extensive project. diff --git a/global_macro_data.egg-info/SOURCES.txt b/global_macro_data.egg-info/SOURCES.txt deleted file mode 100644 index 85e548b..0000000 --- a/global_macro_data.egg-info/SOURCES.txt +++ /dev/null @@ -1,10 +0,0 @@ -README.md -setup.py -global_macro_data/__init__.py -global_macro_data/gmd.py -global_macro_data.egg-info/PKG-INFO -global_macro_data.egg-info/SOURCES.txt -global_macro_data.egg-info/dependency_links.txt -global_macro_data.egg-info/requires.txt -global_macro_data.egg-info/top_level.txt -tests/test_gmd.py \ No newline at end of file diff --git a/global_macro_data.egg-info/dependency_links.txt b/global_macro_data.egg-info/dependency_links.txt deleted file mode 100644 index 8b13789..0000000 --- a/global_macro_data.egg-info/dependency_links.txt +++ /dev/null @@ -1 +0,0 @@ - diff --git a/global_macro_data.egg-info/requires.txt b/global_macro_data.egg-info/requires.txt deleted file mode 100644 index 65a42be..0000000 --- a/global_macro_data.egg-info/requires.txt +++ /dev/null @@ -1,2 +0,0 @@ -requests -pandas diff --git a/global_macro_data.egg-info/top_level.txt b/global_macro_data.egg-info/top_level.txt deleted file mode 100644 index d4a9e38..0000000 --- a/global_macro_data.egg-info/top_level.txt +++ /dev/null @@ -1 +0,0 @@ -global_macro_data diff --git a/global_macro_data/gmd.py b/global_macro_data/gmd.py index 314f17e..c8838bc 100644 --- a/global_macro_data/gmd.py +++ b/global_macro_data/gmd.py @@ -1,8 +1,8 @@ # Standard library -import os import io from typing import Optional, Union, List import json +import importlib.resources # Third-party import pandas as pd @@ -139,9 +139,9 @@ def list_countries( ) -> Union[pd.DataFrame, dict[str, str]]: """Return countries and their ISO3 codes.""" try: - path = os.path.join(os.path.dirname(os.path.abspath(__file__)), - '..', 'isomapping.json') - data = _load_json(path) + with importlib.resources.files('global_macro_data')\ + .joinpath('isomapping.json').open('r', encoding='utf-8') as f: + data = json.load(f) if as_dict: return data @@ -306,9 +306,3 @@ def get_data( df = df[id_cols + other_cols] return df.drop(columns=['id'], errors='ignore').reset_index(drop=True) - - -def _load_json(path: str) -> dict: - """Load a JSON file as a Python dictionary.""" - with open(path, 'r', encoding='utf-8') as f: - return json.load(f) diff --git a/isomapping.json b/global_macro_data/isomapping.json similarity index 100% rename from isomapping.json rename to global_macro_data/isomapping.json diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..0ee8ee2 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,33 @@ +[project] +name = "global-macro-data" +version = "0.3.1" +description = "Global Macro Database by Karsten Müller, Chenzi Xu, Mohamed Lehbib and Ziliang Chen (2025)" +readme = "README.md" +authors = [ + { name = "Yangbo Wang", email = "wangyangbo@ruc.edu.cn" } +] +requires-python = ">=3.10" +license = { text = "MIT" } +dependencies = [ + "pandas>=1.5.3", + "requests>=2.31.0" +] + +classifiers = [ + "Programming Language :: Python :: 3", + "License :: OSI Approved :: MIT License", + "Operating System :: OS Independent" +] + +[build-system] +requires = ["setuptools>=61", "wheel"] +build-backend = "setuptools.build_meta" + +[tool.setuptools] +packages = ["global_macro_data"] + +[tool.setuptools.package-data] +global_macro_data = ["isomapping.json"] + +[project.urls] +Homepage = "https://github.com/KMueller-Lab/Global-Macro-Database-Python" diff --git a/requirements.txt b/requirements.txt index 4c3088f..070c2cc 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,11 @@ -requests -pandas \ No newline at end of file +certifi==2025.6.15 +charset-normalizer==3.4.2 +idna==3.10 +numpy==2.3.1 +pandas==2.3.0 +python-dateutil==2.9.0.post0 +pytz==2025.2 +requests==2.32.4 +six==1.17.0 +tzdata==2025.2 +urllib3==2.5.0 diff --git a/setup.py b/setup.py deleted file mode 100644 index 0639e32..0000000 --- a/setup.py +++ /dev/null @@ -1,34 +0,0 @@ -import os - -def read_readme(): - if os.path.exists("README.md"): - with open("README.md", "r", encoding="utf-8") as f: - return f.read() - return "Global Macro Data package" - -from setuptools import setup, find_packages - -setup( - name="global-macro-data", - version="0.3.1", - packages=find_packages(), - package_data={ - "global_macro_data": ["isomapping.csv"], - }, - install_requires=[ - "requests", - "pandas" - ], - author="Yangbo Wang", - author_email="wangyangbo@ruc.edu.cn", - description="Global Macro Database by Karsten Müller, Chenzi Xu, Mohamed Lehbib and Ziliang Chen (2025)", - long_description=open("README.md", encoding="utf-8").read(), - long_description_content_type="text/markdown", - url="https://github.com/KMueller-Lab/Global-Macro-Database-Python", - classifiers=[ - "Programming Language :: Python :: 3", - "License :: OSI Approved :: MIT License", - "Operating System :: OS Independent", - ], - python_requires=">=3.6", -) diff --git a/tests/conftest.py b/tests/conftest.py deleted file mode 100644 index 0208376..0000000 --- a/tests/conftest.py +++ /dev/null @@ -1,18 +0,0 @@ -import pytest -import pandas as pd -import os -import sys - -# Add the package root directory to Python path -sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) - -@pytest.fixture(scope="session") -def sample_data(): - """Create a sample dataset for testing""" - return pd.DataFrame({ - 'ISO3': ['USA', 'CHN', 'DEU'] * 2, - 'countryname': ['United States', 'China', 'Germany'] * 2, - 'year': [2020, 2020, 2020, 2021, 2021, 2021], - 'rGDP': [1000, 800, 600, 1050, 850, 620], - 'infl': [2.0, 3.0, 1.5, 2.5, 3.5, 1.8] - }) \ No newline at end of file diff --git a/tests/test_gmd.py b/tests/test_gmd.py index 35056e9..ef9e4a5 100644 --- a/tests/test_gmd.py +++ b/tests/test_gmd.py @@ -1,89 +1,96 @@ import pytest import pandas as pd -from global_macro_data import ( - get_data, - list_versions, - get_current_version, - list_variables, - list_countries, - VALID_VARIABLES +import global_macro_data as gmd +from global_macro_data.exceptions import ( + InvalidVersionError, + InvalidCountryError, + InvalidVariableError, + RawModeError, ) + def test_get_available_versions(): """Test getting available versions""" - versions = list_versions() + versions = gmd.list_versions() assert isinstance(versions, list) assert len(versions) > 0 assert all(isinstance(v, str) for v in versions) assert all(len(v.split('_')) == 2 for v in versions) + def test_get_current_version(): """Test getting current version""" - version = get_current_version() + version = gmd.get_current_version() assert isinstance(version, str) assert len(version.split('_')) == 2 -def test_list_variables(capsys): - """Test listing variables""" - list_variables() - captured = capsys.readouterr() - assert "Available variables" in captured.out - for var in VALID_VARIABLES: - assert var in captured.out - -def test_list_countries(capsys): - """Test listing countries""" - list_countries() - captured = capsys.readouterr() - assert "Country and territories" in captured.out - assert "Code" in captured.out + +def test_list_variables(): + df = gmd.list_variables() + assert isinstance(df, pd.DataFrame) + assert not df.empty + for var in gmd.VALID_VARIABLES: + assert var in df['Variable'].values + + +def test_list_countries(): + df = gmd.list_countries() + assert isinstance(df, pd.DataFrame) + assert not df.empty + def test_gmd_default(): """Test default gmd call""" - df = get_data() + df = gmd.get_data() assert isinstance(df, pd.DataFrame) assert len(df) > 0 assert all(col in df.columns for col in ["ISO3", "countryname", "year"]) + def test_gmd_version(): """Test gmd with specific version""" - version = get_current_version() - df = get_data(version=version) + version = gmd.get_current_version() + df = gmd.get_data(version=version) assert isinstance(df, pd.DataFrame) assert len(df) > 0 + def test_gmd_country(): """Test gmd with specific country""" - df = get_data(country="USA") + df = gmd.get_data(country="USA") assert isinstance(df, pd.DataFrame) assert len(df) > 0 assert all(df["ISO3"] == "USA") + def test_gmd_countries(): """Test gmd with multiple countries""" - df = get_data(country=["USA", "CHN"]) + df = gmd.get_data(country=["USA", "CHN"]) assert isinstance(df, pd.DataFrame) assert len(df) > 0 assert set(df["ISO3"].unique()) == {"USA", "CHN"} + def test_gmd_variables(): """Test gmd with specific variables""" - df = get_data(variables=["rGDP", "infl"]) + df = gmd.get_data(variables=["rGDP", "infl"]) assert isinstance(df, pd.DataFrame) assert len(df) > 0 assert all(col in df.columns for col in ["rGDP", "infl"]) + def test_gmd_raw(): """Test gmd with raw data option""" - df = get_data(variables="rGDP", raw=True) + df = gmd.get_data(variables="rGDP", raw=True) assert isinstance(df, pd.DataFrame) assert len(df) > 0 assert "rGDP" in df.columns + def test_gmd_combinations(): """Test gmd with multiple parameters""" - df = get_data( - version=get_current_version(), + df = gmd.get_data( + version=gmd.get_current_version(), country=["USA", "CHN"], variables=["rGDP", "infl"] ) @@ -92,27 +99,32 @@ def test_gmd_combinations(): assert set(df["ISO3"].unique()) == {"USA", "CHN"} assert all(col in df.columns for col in ["rGDP", "infl"]) + def test_gmd_invalid_version(): """Test gmd with invalid version""" - with pytest.raises(ValueError): - get_data(version="invalid_version") + with pytest.raises(InvalidVersionError): + gmd.get_data(version="invalid_version") + def test_gmd_invalid_country(): """Test gmd with invalid country""" - with pytest.raises(ValueError): - get_data(country="INVALID") + with pytest.raises(InvalidCountryError): + gmd.get_data(country="INVALID") + def test_gmd_invalid_variable(): """Test gmd with invalid variable""" - with pytest.raises(ValueError): - get_data(variables="INVALID") + with pytest.raises(InvalidVariableError): + gmd.get_data(variables="INVALID") + def test_gmd_raw_multiple_variables(): """Test gmd raw option with multiple variables""" - with pytest.raises(ValueError): - get_data(variables=["rGDP", "infl"], raw=True) + with pytest.raises(RawModeError): + gmd.get_data(variables=["rGDP", "infl"], raw=True) + def test_gmd_raw_no_variable(): """Test gmd raw option without variable""" - with pytest.raises(ValueError): - get_data(raw=True) \ No newline at end of file + with pytest.raises(RawModeError): + gmd.get_data(raw=True) diff --git a/tox.ini b/tox.ini new file mode 100644 index 0000000..8403386 --- /dev/null +++ b/tox.ini @@ -0,0 +1,36 @@ +[tox] +envlist = + py310-pandas150-requests231 + py310-pandas200-requests231 + py310-pandas220-requests232 +skip_missing_interpreters = true + +[testenv] +usedevelop = true +basepython = python3.10 +deps = + pytest + +[testenv:py310-pandas150-requests231] +deps = + {[testenv]deps} + numpy==1.23.5 + pandas==1.5.3 + requests==2.31.0 +commands = pytest tests/ + +[testenv:py310-pandas200-requests231] +deps = + {[testenv]deps} + numpy==1.24.4 + pandas==2.0.3 + requests==2.31.0 +commands = pytest tests/ + +[testenv:py310-pandas220-requests232] +deps = + {[testenv]deps} + numpy==1.26.4 + pandas==2.2.2 + requests==2.32.4 +commands = pytest tests/ From d34d18f8afd4c62ac90291be1b1aed5370003686 Mon Sep 17 00:00:00 2001 From: Cedric McKeever Date: Fri, 27 Jun 2025 16:29:11 +0800 Subject: [PATCH 15/15] Ensure sufficient backwards compatibility --- .gitignore | 9 +++++--- global_macro_data/exceptions.py | 7 +++++-- global_macro_data/gmd.py | 18 ++++++++++------ pyproject.toml | 11 +++++----- requirements.txt | 11 ---------- tox.ini | 37 +++++++++++++++++++++++++++++++-- 6 files changed, 64 insertions(+), 29 deletions(-) delete mode 100644 requirements.txt diff --git a/.gitignore b/.gitignore index a599f92..3092ec3 100644 --- a/.gitignore +++ b/.gitignore @@ -23,8 +23,11 @@ htmlcov/ .DS_Store Thumbs.db -# Keep egg-info for package metadata -!global_macro_data.egg-info/ +# Remove egg-info +global_macro_data.egg-info/ # Test file -test.py \ No newline at end of file +test.py + +# Testing: .tox +.tox/ \ No newline at end of file diff --git a/global_macro_data/exceptions.py b/global_macro_data/exceptions.py index d09f6c7..e02f56b 100644 --- a/global_macro_data/exceptions.py +++ b/global_macro_data/exceptions.py @@ -1,3 +1,6 @@ +from typing import List + + INTRO = ( "\nGlobal Macro Database by Müller et al. (2025)\n" "Website: https://www.globalmacrodata.com\n\n" @@ -7,7 +10,7 @@ class InvalidVariableError(ValueError): """Raised when one or more variable codes are invalid.""" - def __init__(self, invalid_vars: list[str]): + def __init__(self, invalid_vars: List[str]): self.invalid_vars = invalid_vars # Format variable list nicely @@ -71,7 +74,7 @@ def __init__(self, original_exception: Exception): class InvalidCountryError(ValueError): """Raised when one or more country codes are invalid.""" - def __init__(self, invalid_codes: list[str]): + def __init__(self, invalid_codes: List[str]): self.invalid_codes = invalid_codes if len(invalid_codes) == 1: diff --git a/global_macro_data/gmd.py b/global_macro_data/gmd.py index c8838bc..42f940e 100644 --- a/global_macro_data/gmd.py +++ b/global_macro_data/gmd.py @@ -1,8 +1,12 @@ # Standard library import io -from typing import Optional, Union, List +from typing import Optional, Union, List, Dict +try: + import importlib.resources as importlib_resources +except ImportError: + import importlib_resources # Backport for Python <3.9 + import json -import importlib.resources # Third-party import pandas as pd @@ -71,7 +75,7 @@ def get_current_version() -> str: def list_variables( as_dict: bool = False -) -> Union[pd.DataFrame, dict[str, str]]: +) -> Union[pd.DataFrame, Dict[str, str]]: """Return available variable codes and their descriptions.""" global VALID_VARIABLES descriptions = { @@ -136,11 +140,13 @@ def list_variables( def list_countries( as_dict: bool = False -) -> Union[pd.DataFrame, dict[str, str]]: +) -> Union[pd.DataFrame, Dict[str, str]]: """Return countries and their ISO3 codes.""" try: - with importlib.resources.files('global_macro_data')\ - .joinpath('isomapping.json').open('r', encoding='utf-8') as f: + with importlib_resources.open_text('global_macro_data', + 'isomapping.json', + encoding='utf-8') as f: + data = json.load(f) if as_dict: diff --git a/pyproject.toml b/pyproject.toml index 0ee8ee2..727ac0a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,16 +1,17 @@ [project] name = "global-macro-data" -version = "0.3.1" +version = "1.0.0" description = "Global Macro Database by Karsten Müller, Chenzi Xu, Mohamed Lehbib and Ziliang Chen (2025)" readme = "README.md" authors = [ { name = "Yangbo Wang", email = "wangyangbo@ruc.edu.cn" } ] -requires-python = ">=3.10" +requires-python = ">=3.8" license = { text = "MIT" } dependencies = [ - "pandas>=1.5.3", - "requests>=2.31.0" + "pandas>=1.0.5", + "requests>=2.20", + "importlib_resources; python_version < \"3.9\"" ] classifiers = [ @@ -20,7 +21,7 @@ classifiers = [ ] [build-system] -requires = ["setuptools>=61", "wheel"] +requires = ["setuptools>=42", "wheel"] build-backend = "setuptools.build_meta" [tool.setuptools] diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index 070c2cc..0000000 --- a/requirements.txt +++ /dev/null @@ -1,11 +0,0 @@ -certifi==2025.6.15 -charset-normalizer==3.4.2 -idna==3.10 -numpy==2.3.1 -pandas==2.3.0 -python-dateutil==2.9.0.post0 -pytz==2025.2 -requests==2.32.4 -six==1.17.0 -tzdata==2025.2 -urllib3==2.5.0 diff --git a/tox.ini b/tox.ini index 8403386..f71124a 100644 --- a/tox.ini +++ b/tox.ini @@ -1,17 +1,39 @@ [tox] envlist = + py38-pandas130-requests230 + py39-pandas140-requests231 py310-pandas150-requests231 py310-pandas200-requests231 py310-pandas220-requests232 + py312-pandas220-requests232 skip_missing_interpreters = true [testenv] usedevelop = true -basepython = python3.10 deps = - pytest + pytest==8.2.1 + +[testenv:py38-pandas130-requests230] +basepython = python3.8 +deps = + {[testenv]deps} + numpy==1.18.0 + pandas==1.0.5 + requests==2.20.0 + importlib_resources +commands = pytest tests/ + +[testenv:py39-pandas140-requests231] +basepython = python3.9 +deps = + {[testenv]deps} + numpy==1.22.4 + pandas==1.4.4 + requests==2.31.0 +commands = pytest tests/ [testenv:py310-pandas150-requests231] +basepython = python3.10 deps = {[testenv]deps} numpy==1.23.5 @@ -20,6 +42,7 @@ deps = commands = pytest tests/ [testenv:py310-pandas200-requests231] +basepython = python3.10 deps = {[testenv]deps} numpy==1.24.4 @@ -28,6 +51,16 @@ deps = commands = pytest tests/ [testenv:py310-pandas220-requests232] +basepython = python3.10 +deps = + {[testenv]deps} + numpy==1.26.4 + pandas==2.2.2 + requests==2.32.4 +commands = pytest tests/ + +[testenv:py312-pandas220-requests232] +basepython = python3.12 deps = {[testenv]deps} numpy==1.26.4