From ffb5cdeb65945f1eaadac8c2a2661243890f7369 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?D=C3=A1vid=20Isztl?= Date: Wed, 3 Dec 2025 10:38:39 +0100 Subject: [PATCH 1/2] Fix EmptyDataError when reading empty CSV files Handle pandas.errors.EmptyDataError that occurs when pd.read_csv() encounters an existing but empty CSV file. This fix catches the error in both 'append' and 'update' modes and treats the empty file as if it doesn't exist, creating a new file with the current data. The fix prevents crashes when emission CSV files exist but are empty, which can happen if a previous write operation was interrupted. Added test cases for both append and update modes with empty files. --- codecarbon/output_methods/file.py | 20 ++++++++++++++++++-- tests/output_methods/test_file.py | 24 ++++++++++++++++++++++++ 2 files changed, 42 insertions(+), 2 deletions(-) diff --git a/codecarbon/output_methods/file.py b/codecarbon/output_methods/file.py index 76297a863..83918ecf4 100644 --- a/codecarbon/output_methods/file.py +++ b/codecarbon/output_methods/file.py @@ -96,14 +96,30 @@ def out(self, total: EmissionsData, _): if not file_exists: df = new_df elif self.on_csv_write == "append": - df = pd.read_csv(self.save_file_path) + try: + df = pd.read_csv(self.save_file_path) + except pd.errors.EmptyDataError: + logger.warning( + f"File {self.save_file_path} exists but is empty. Creating new file." + ) + df = new_df + df.to_csv(self.save_file_path, index=False) + return # Filter out empty or all-NA columns, to avoid warnings from Pandas, # see https://github.com/pandas-dev/pandas/issues/55928 df = df.dropna(axis=1, how="all") new_df = new_df.dropna(axis=1, how="all") df = pd.concat([df, new_df]) else: - df = pd.read_csv(self.save_file_path) + try: + df = pd.read_csv(self.save_file_path) + except pd.errors.EmptyDataError: + logger.warning( + f"File {self.save_file_path} exists but is empty. Creating new file." + ) + df = new_df + df.to_csv(self.save_file_path, index=False) + return df_run = df.loc[df.run_id == total.run_id] if len(df_run) < 1: df = pd.concat([df, new_df]) diff --git a/tests/output_methods/test_file.py b/tests/output_methods/test_file.py index 87ef9163e..e36132680 100644 --- a/tests/output_methods/test_file.py +++ b/tests/output_methods/test_file.py @@ -173,6 +173,30 @@ def test_file_output_out_update_file_exists_one_matchingrows(self): # file_output = FileOutput("test.csv", self.temp_dir, on_csv_write="append") # file_output.out(self.emissions_data, None) + def test_file_output_out_append_empty_file_exists(self): + file_output = FileOutput("test.csv", self.temp_dir, on_csv_write="append") + # Create an empty file + with open(file_output.save_file_path, "w") as _: + pass + + # This should not raise an error + file_output.out(self.emissions_data, None) + + df = pd.read_csv(os.path.join(self.temp_dir, "test.csv")) + self.assertEqual(len(df), 1) + + def test_file_output_out_update_empty_file_exists(self): + file_output = FileOutput("test.csv", self.temp_dir, on_csv_write="update") + # Create an empty file + with open(file_output.save_file_path, "w") as _: + pass + + # This should not raise an error + file_output.out(self.emissions_data, None) + + df = pd.read_csv(os.path.join(self.temp_dir, "test.csv")) + self.assertEqual(len(df), 1) + def test_file_output_task_out(self): task_emissions_data = [ TaskEmissionsData( From 9cc0a548191fa73372d14636e36d4a0aa0973d30 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?D=C3=A1vid=20Isztl?= Date: Fri, 5 Dec 2025 16:20:29 +0100 Subject: [PATCH 2/2] Fix EmptyDataError when reading empty CSV files --- codecarbon/output_methods/file.py | 25 +++++++------------------ 1 file changed, 7 insertions(+), 18 deletions(-) diff --git a/codecarbon/output_methods/file.py b/codecarbon/output_methods/file.py index 83918ecf4..211585d14 100644 --- a/codecarbon/output_methods/file.py +++ b/codecarbon/output_methods/file.py @@ -88,6 +88,11 @@ def out(self, total: EmissionsData, _): """ file_exists: bool = os.path.isfile(self.save_file_path) + if file_exists and os.path.getsize(self.save_file_path) == 0: + logger.warning( + f"File {self.save_file_path} exists but is empty. Treating as new file." + ) + file_exists = False if file_exists and not self.has_valid_headers(total): logger.warning("The CSV format has changed, backing up old emission file.") backup(self.save_file_path) @@ -96,30 +101,14 @@ def out(self, total: EmissionsData, _): if not file_exists: df = new_df elif self.on_csv_write == "append": - try: - df = pd.read_csv(self.save_file_path) - except pd.errors.EmptyDataError: - logger.warning( - f"File {self.save_file_path} exists but is empty. Creating new file." - ) - df = new_df - df.to_csv(self.save_file_path, index=False) - return + df = pd.read_csv(self.save_file_path) # Filter out empty or all-NA columns, to avoid warnings from Pandas, # see https://github.com/pandas-dev/pandas/issues/55928 df = df.dropna(axis=1, how="all") new_df = new_df.dropna(axis=1, how="all") df = pd.concat([df, new_df]) else: - try: - df = pd.read_csv(self.save_file_path) - except pd.errors.EmptyDataError: - logger.warning( - f"File {self.save_file_path} exists but is empty. Creating new file." - ) - df = new_df - df.to_csv(self.save_file_path, index=False) - return + df = pd.read_csv(self.save_file_path) df_run = df.loc[df.run_id == total.run_id] if len(df_run) < 1: df = pd.concat([df, new_df])