Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions carbonserver/carbonserver/api/infra/database/sql_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@ class Emission(Base):
gpu_energy = Column(Float)
ram_energy = Column(Float)
energy_consumed = Column(Float)
cpu_utilization_percent = Column(Float, nullable=True)
gpu_utilization_percent = Column(Float, nullable=True)
ram_utilization_percent = Column(Float, nullable=True)
wue = Column(Float, nullable=False, default=0)
run_id = Column(UUID(as_uuid=True), ForeignKey("runs.id", ondelete="CASCADE"))
run = relationship("Run", back_populates="emissions")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,9 @@ def add_emission(self, emission: EmissionCreate) -> UUID:
gpu_energy=emission.gpu_energy,
ram_energy=emission.ram_energy,
energy_consumed=emission.energy_consumed,
cpu_utilization_percent=emission.cpu_utilization_percent,
gpu_utilization_percent=emission.gpu_utilization_percent,
ram_utilization_percent=emission.ram_utilization_percent,
wue=emission.wue,
run_id=emission.run_id,
)
Expand Down Expand Up @@ -105,6 +108,9 @@ def map_sql_to_schema(emission: sql_models.Emission) -> Emission:
gpu_energy=emission.gpu_energy,
ram_energy=emission.ram_energy,
energy_consumed=emission.energy_consumed,
cpu_utilization_percent=emission.cpu_utilization_percent,
gpu_utilization_percent=emission.gpu_utilization_percent,
ram_utilization_percent=emission.ram_utilization_percent,
wue=emission.wue,
run_id=emission.run_id,
)
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,15 @@ def get_project_detailed_sums_by_experiment(
func.sum(SqlModelEmission.energy_consumed).label("energy_consumed"),
func.sum(SqlModelEmission.duration).label("duration"),
func.avg(SqlModelEmission.emissions_rate).label("emissions_rate"),
func.avg(SqlModelEmission.cpu_utilization_percent).label(
"cpu_utilization_percent"
),
func.avg(SqlModelEmission.gpu_utilization_percent).label(
"gpu_utilization_percent"
),
func.avg(SqlModelEmission.ram_utilization_percent).label(
"ram_utilization_percent"
),
func.count(SqlModelEmission.emissions_rate).label(
"emissions_count"
),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,15 @@ def get_organization_detailed_sums(
func.sum(SqlModelEmission.energy_consumed).label("energy_consumed"),
func.sum(SqlModelEmission.duration).label("duration"),
func.avg(SqlModelEmission.emissions_rate).label("emissions_rate"),
func.avg(SqlModelEmission.cpu_utilization_percent).label(
"cpu_utilization_percent"
),
func.avg(SqlModelEmission.gpu_utilization_percent).label(
"gpu_utilization_percent"
),
func.avg(SqlModelEmission.ram_utilization_percent).label(
"ram_utilization_percent"
),
func.count(SqlModelEmission.emissions_rate).label(
"emissions_count"
),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,15 @@ def get_project_detailed_sums(
func.sum(SqlModelEmission.energy_consumed).label("energy_consumed"),
func.sum(SqlModelEmission.duration).label("duration"),
func.avg(SqlModelEmission.emissions_rate).label("emissions_rate"),
func.avg(SqlModelEmission.cpu_utilization_percent).label(
"cpu_utilization_percent"
),
func.avg(SqlModelEmission.gpu_utilization_percent).label(
"gpu_utilization_percent"
),
func.avg(SqlModelEmission.ram_utilization_percent).label(
"ram_utilization_percent"
),
func.count(SqlModelEmission.emissions_rate).label(
"emissions_count"
),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,15 @@ def get_experiment_detailed_sums_by_run(
func.sum(SqlModelEmission.energy_consumed).label("energy_consumed"),
func.sum(SqlModelEmission.duration).label("duration"),
func.avg(SqlModelEmission.emissions_rate).label("emissions_rate"),
func.avg(SqlModelEmission.cpu_utilization_percent).label(
"cpu_utilization_percent"
),
func.avg(SqlModelEmission.gpu_utilization_percent).label(
"gpu_utilization_percent"
),
func.avg(SqlModelEmission.ram_utilization_percent).label(
"ram_utilization_percent"
),
func.count(SqlModelEmission.emissions_rate).label(
"emissions_count"
),
Expand Down
21 changes: 21 additions & 0 deletions carbonserver/carbonserver/api/schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,15 @@ class EmissionBase(BaseModel):
ram_energy: Optional[float] = Field(
..., ge=0, description="The ram_energy must be greater than zero"
)
cpu_utilization_percent: Optional[float] = Field(
None, ge=0, le=100, description="The CPU utilization must be between 0 and 100"
)
gpu_utilization_percent: Optional[float] = Field(
None, ge=0, le=100, description="The GPU utilization must be between 0 and 100"
)
ram_utilization_percent: Optional[float] = Field(
None, ge=0, le=100, description="The RAM utilization must be between 0 and 100"
)
wue: Optional[float] = Field(
default=0,
ge=0,
Expand Down Expand Up @@ -183,6 +192,9 @@ class RunReport(RunBase):
duration: float
emissions_rate: float
emissions_count: int
cpu_utilization_percent: Optional[float] = None
gpu_utilization_percent: Optional[float] = None
ram_utilization_percent: Optional[float] = None


class ExperimentBase(BaseModel):
Expand Down Expand Up @@ -246,6 +258,9 @@ class ExperimentReport(ExperimentBase):
duration: int
emissions_rate: float
emissions_count: int
cpu_utilization_percent: Optional[float] = None
gpu_utilization_percent: Optional[float] = None
ram_utilization_percent: Optional[float] = None

class Config:
schema_extra = {
Expand Down Expand Up @@ -377,6 +392,9 @@ class ProjectReport(ProjectBase):
duration: int
emissions_rate: float
emissions_count: int
cpu_utilization_percent: Optional[float] = None
gpu_utilization_percent: Optional[float] = None
ram_utilization_percent: Optional[float] = None


class OrganizationBase(BaseModel):
Expand Down Expand Up @@ -420,6 +438,9 @@ class OrganizationReport(OrganizationBase):
duration: int
emissions_rate: float
emissions_count: int
cpu_utilization_percent: Optional[float] = None
gpu_utilization_percent: Optional[float] = None
ram_utilization_percent: Optional[float] = None


class Membership(BaseModel):
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
"""add_utilization_metrics_to_emissions

Revision ID: 20251119_add_utilization
Revises: 3212895acafd
Create Date: 2025-11-19 18:52:00.000000

"""

import sqlalchemy as sa
from alembic import op

# revision identifiers, used by Alembic.
revision = "20251119_add_utilization"
down_revision = "3212895acafd"
branch_labels = None
depends_on = None


def upgrade():
"""
Add CPU, GPU, and RAM utilization percentage fields to emissions table.
These fields track the average utilization of resources during emission tracking.
"""
op.add_column(
"emissions",
sa.Column("cpu_utilization_percent", sa.Float, nullable=True),
)
op.add_column(
"emissions",
sa.Column("gpu_utilization_percent", sa.Float, nullable=True),
)
op.add_column(
"emissions",
sa.Column("ram_utilization_percent", sa.Float, nullable=True),
)


def downgrade():
"""
Remove CPU, GPU, and RAM utilization percentage fields from emissions table.
"""
op.drop_column("emissions", "ram_utilization_percent")
op.drop_column("emissions", "gpu_utilization_percent")
op.drop_column("emissions", "cpu_utilization_percent")
56 changes: 56 additions & 0 deletions codecarbon/emissions_tracker.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
from functools import wraps
from typing import Any, Callable, Dict, List, Optional, Union

import psutil

from codecarbon._version import __version__
from codecarbon.core.config import get_hierarchical_config
from codecarbon.core.emissions import Emissions
Expand Down Expand Up @@ -335,6 +337,11 @@ def __init__(
self._last_measured_time: float = time.perf_counter()
self._total_energy: Energy = Energy.from_energy(kWh=0)
self._total_water: Water = Water.from_litres(litres=0)
# CPU and RAM utilization tracking
self._cpu_utilization_history: List[float] = []
self._gpu_utilization_history: List[float] = []
self._ram_utilization_history: List[float] = []
self._ram_used_history: List[float] = []
self._total_cpu_energy: Energy = Energy.from_energy(kWh=0)
self._total_gpu_energy: Energy = Energy.from_energy(kWh=0)
self._total_ram_energy: Energy = Energy.from_energy(kWh=0)
Expand Down Expand Up @@ -482,6 +489,13 @@ def start(self) -> None:
return

self._last_measured_time = self._start_time = time.perf_counter()

# Clear utilization history for fresh measurements
self._cpu_utilization_history.clear()
self._ram_utilization_history.clear()
self._ram_used_history.clear()
self._gpu_utilization_history.clear()

# Read initial energy for hardware
for hardware in self._hardware:
hardware.start()
Expand Down Expand Up @@ -525,6 +539,13 @@ def start_task(self, task_name=None) -> None:
if task_name in self._tasks.keys():
task_name += "_" + uuid.uuid4().__str__()
self._last_measured_time = self._start_time = time.perf_counter()

# Clear utilization history for fresh measurements
self._cpu_utilization_history.clear()
self._ram_utilization_history.clear()
self._ram_used_history.clear()
self._gpu_utilization_history.clear()

# Read initial energy for hardware
for hardware in self._hardware:
hardware.start()
Expand Down Expand Up @@ -782,6 +803,26 @@ def _prepare_emissions_data(self) -> EmissionsData:
duration=duration.seconds,
emissions=emissions, # kg
emissions_rate=emissions / duration.seconds, # kg/s
cpu_utilization_percent=(
sum(self._cpu_utilization_history) / len(self._cpu_utilization_history)
if self._cpu_utilization_history
else 0
),
gpu_utilization_percent=(
sum(self._gpu_utilization_history) / len(self._gpu_utilization_history)
if self._gpu_utilization_history
else 0
),
ram_utilization_percent=(
sum(self._ram_utilization_history) / len(self._ram_utilization_history)
if self._ram_utilization_history
else 0
),
ram_used_gb=(
sum(self._ram_used_history) / len(self._ram_used_history)
if self._ram_used_history
else 0
),
cpu_power=avg_cpu_power,
gpu_power=avg_gpu_power,
ram_power=avg_ram_power,
Expand Down Expand Up @@ -855,6 +896,21 @@ def _monitor_power(self) -> None:
if isinstance(hardware, CPU):
hardware.monitor_power()

# Collect CPU and RAM utilization metrics
self._cpu_utilization_history.append(psutil.cpu_percent())
self._ram_utilization_history.append(psutil.virtual_memory().percent)
self._ram_used_history.append(psutil.virtual_memory().used / (1024**3))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Don't we have to multiply it by 100 to have a percent ?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I tested and it's OK in the CSV : 55 for 55% of RAM used.


# Collect GPU utilization metrics
for hardware in self._hardware:
if isinstance(hardware, GPU):
gpu_details = hardware.devices.get_gpu_details()
for gpu_detail in gpu_details:
if "gpu_utilization" in gpu_detail:
self._gpu_utilization_history.append(
gpu_detail["gpu_utilization"]
)

def _do_measurements(self) -> None:
for hardware in self._hardware:
h_time = time.perf_counter()
Expand Down
8 changes: 8 additions & 0 deletions codecarbon/output_methods/emissions_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,10 @@ class EmissionsData:
latitude: float
ram_total_size: float
tracking_mode: str
cpu_utilization_percent: float = 0.0
gpu_utilization_percent: float = 0.0
ram_utilization_percent: float = 0.0
ram_used_gb: float = 0.0
on_cloud: str = "N"
pue: float = 1
wue: float = 0
Expand Down Expand Up @@ -101,6 +105,10 @@ class TaskEmissionsData:
latitude: float
ram_total_size: float
tracking_mode: str
cpu_utilization_percent: float = 0.0
gpu_utilization_percent: float = 0.0
ram_utilization_percent: float = 0.0
ram_used_gb: float = 0.0
on_cloud: str = "N"

@property
Expand Down
10 changes: 9 additions & 1 deletion docs/edit/output.rst
Original file line number Diff line number Diff line change
Expand Up @@ -77,8 +77,16 @@ input parameter (defaults to the current directory), for each experiment tracked
| This is done for privacy protection.
* - ram_total_size
- total RAM available (Go)
* - Tracking_mode:
* - tracking_mode:
- ``machine`` or ``process``(default to ``machine``)
* - cpu_utilization_percent
- Average CPU utilization during tracking period (%)
* - gpu_utilization_percent
- Average GPU utilization during tracking period (%)
* - ram_utilization_percent
- Average RAM utilization during tracking period (%)
* - ram_used_gb
- Average RAM used during tracking period (GB)

.. note::

Expand Down
54 changes: 54 additions & 0 deletions test_gpu_monitoring.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
#!/usr/bin/env python3
"""
Simple test script to verify GPU load monitoring functionality.
This script will run a simple workload and check if GPU utilization is being tracked.
"""

import time

from codecarbon import EmissionsTracker


def main():
print("Starting GPU load monitoring test...")
print("=" * 60)

# Initialize the tracker
tracker = EmissionsTracker(
project_name="gpu_load_test",
measure_power_secs=2,
save_to_file=True,
output_file="test_gpu_emissions.csv",
)

# Start tracking
tracker.start()
print("Tracker started. Running for 10 seconds...")

# Run for a short duration to collect some metrics
time.sleep(10)

# Stop tracking
emissions = tracker.stop()

print("=" * 60)
print("Test completed!")
print(f"Total emissions: {emissions:.6f} kg CO2")

# Check if GPU utilization was tracked
if hasattr(tracker, "final_emissions_data"):
data = tracker.final_emissions_data
print(f"GPU utilization: {data.gpu_utilization_percent:.2f}%")
print(f"CPU utilization: {data.cpu_utilization_percent:.2f}%")
print(f"RAM utilization: {data.ram_utilization_percent:.2f}%")

if data.gpu_utilization_percent > 0:
print("\n✓ GPU utilization tracking is working!")
else:
print("\n⚠ GPU utilization is 0% (may not have GPU or no GPU workload)")

print("\nCheck test_gpu_emissions.csv for detailed results.")


if __name__ == "__main__":
main()
Loading
Loading