From e1c58a33002b8b3721f0a1619f0ee884786c8183 Mon Sep 17 00:00:00 2001
From: IlyasMoutawwakil <ilyas.moutawwakil@gmail.com>
Date: Tue, 16 Jan 2024 09:10:24 +0000
Subject: [PATCH 1/5] added amd-smi interface

---
 codecarbon/core/gpu.py          | 193 ++++++++++++++++++++++++++------
 codecarbon/core/util.py         |  20 ++++
 codecarbon/emissions_tracker.py |   7 +-
 3 files changed, 185 insertions(+), 35 deletions(-)

diff --git a/codecarbon/core/gpu.py b/codecarbon/core/gpu.py
index 70a81cabc..97158c4c4 100644
--- a/codecarbon/core/gpu.py
+++ b/codecarbon/core/gpu.py
@@ -1,10 +1,23 @@
+from collections import namedtuple
 from dataclasses import dataclass, field
 
-import pynvml
-
 from codecarbon.core.units import Energy, Power, Time
+from codecarbon.core.util import is_amd_system, is_nvidia_system
 from codecarbon.external.logger import logger
 
+USE_AMDSMI = False
+USE_PYNVML = False
+
+if is_nvidia_system():
+    import pynvml
+
+    USE_PYNVML = True
+
+if is_amd_system():
+    import amdsmi
+
+    USE_AMDSMI = True
+
 
 @dataclass
 class GPUDevice:
@@ -92,46 +105,105 @@ def _get_total_energy_consumption(self):
         """Returns total energy consumption for this GPU in millijoules (mJ) since the driver was last reloaded
         https://docs.nvidia.com/deploy/nvml-api/group__nvmlDeviceQueries.html#group__nvmlDeviceQueries_1g732ab899b5bd18ac4bfb93c02de4900a
         """
-        return pynvml.nvmlDeviceGetTotalEnergyConsumption(self.handle)
+        if USE_PYNVML:
+            return pynvml.nvmlDeviceGetTotalEnergyConsumption(self.handle)
+        elif USE_AMDSMI:
+            # returns energy in microjoules (amd-smi metric --energy)
+            return amdsmi.amdsmi_get_power_measure(self.handle)["energy_accumulator"]
+        else:
+            raise Exception("No GPU interface available")
 
     def _get_gpu_name(self):
         """Returns the name of the GPU device
         https://docs.nvidia.com/deploy/nvml-api/group__nvmlDeviceQueries.html#group__nvmlDeviceQueries_1ga5361803e044c6fdf3b08523fb6d1481
         """
-        name = pynvml.nvmlDeviceGetName(self.handle)
+        if USE_PYNVML:
+            name = pynvml.nvmlDeviceGetName(self.handle)
+        elif USE_AMDSMI:
+            name = amdsmi.amdsmi_get_board_info(self.handle)["manufacturer_name"]
+        else:
+            raise Exception("No GPU interface available")
+
         return self._to_utf8(name)
 
     def _get_uuid(self):
         """Returns the globally unique GPU device UUID
         https://docs.nvidia.com/deploy/nvml-api/group__nvmlDeviceQueries.html#group__nvmlDeviceQueries_1g72710fb20f30f0c2725ce31579832654
         """
-        uuid = pynvml.nvmlDeviceGetUUID(self.handle)
+        if USE_PYNVML:
+            uuid = pynvml.nvmlDeviceGetUUID(self.handle)
+        elif USE_AMDSMI:
+            uuid = amdsmi.amdsmi_get_device_uuid(self.handle)
+        else:
+            raise Exception("No GPU interface available")
+
         return self._to_utf8(uuid)
 
     def _get_memory_info(self):
         """Returns memory info in bytes
         https://docs.nvidia.com/deploy/nvml-api/group__nvmlDeviceQueries.html#group__nvmlDeviceQueries_1g2dfeb1db82aa1de91aa6edf941c85ca8
         """
-        return pynvml.nvmlDeviceGetMemoryInfo(self.handle)
+        if USE_PYNVML:
+            return pynvml.nvmlDeviceGetMemoryInfo(self.handle)
+        elif USE_AMDSMI:
+            # returns memory in megabytes (amd-smi metric --mem-usage)
+            memory_info = amdsmi.amdsmi_get_vram_usage(self.handle)
+            AMDMemory = namedtuple("AMDMemory", ["total", "used", "free"])
+            return AMDMemory(
+                total=memory_info["vram_total"] * 1024 * 1024,
+                used=memory_info["vram_used"] * 1024 * 1024,
+                free=(memory_info["vram_total"] - memory_info["vram_used"])
+                * 1024
+                * 1024,
+            )
+        else:
+            raise Exception("No GPU interface available")
 
     def _get_temperature(self):
         """Returns degrees in the Celsius scale
         https://docs.nvidia.com/deploy/nvml-api/group__nvmlDeviceQueries.html#group__nvmlDeviceQueries_1g92d1c5182a14dd4be7090e3c1480b121
         """
-        return pynvml.nvmlDeviceGetTemperature(self.handle, pynvml.NVML_TEMPERATURE_GPU)
+        if USE_PYNVML:
+            return pynvml.nvmlDeviceGetTemperature(
+                self.handle,
+                sensor=pynvml.NVML_TEMPERATURE_GPU,
+            )
+        elif USE_AMDSMI:
+            return amdsmi.amdsmi_dev_get_temp_metric(
+                self.handle,
+                sensor_type=amdsmi.AmdSmiTemperatureType.EDGE,
+                metric=amdsmi.AmdSmiTemperatureMetric.CURRENT,
+            )
+        else:
+            raise Exception("No GPU interface available")
 
     def _get_power_usage(self):
         """Returns power usage in milliwatts
         https://docs.nvidia.com/deploy/nvml-api/group__nvmlDeviceQueries.html#group__nvmlDeviceQueries_1g7ef7dff0ff14238d08a19ad7fb23fc87
         """
-        return pynvml.nvmlDeviceGetPowerUsage(self.handle)
+        if USE_PYNVML:
+            return pynvml.nvmlDeviceGetPowerUsage(self.handle)
+        elif USE_AMDSMI:
+            # returns power in Watts (amd-smi metric --power)
+            return (
+                amdsmi.amdsmi_get_power_measure(self.handle)["average_socket_power"]
+                * 1000
+            )
+        else:
+            raise Exception("No GPU interface available")
 
     def _get_power_limit(self):
         """Returns max power usage in milliwatts
         https://docs.nvidia.com/deploy/nvml-api/group__nvmlDeviceQueries.html#group__nvmlDeviceQueries_1g263b5bf552d5ec7fcd29a088264d10ad
         """
         try:
-            return pynvml.nvmlDeviceGetEnforcedPowerLimit(self.handle)
+            if USE_PYNVML:
+                return pynvml.nvmlDeviceGetEnforcedPowerLimit(self.handle)
+            elif USE_AMDSMI:
+                # returns power limit in Watts (amd-smi static --limit)
+                return (
+                    amdsmi.amdsmi_get_power_measure(self.handle)["power_limit"] * 1000
+                )
         except Exception:
             return None
 
@@ -139,51 +211,100 @@ def _get_gpu_utilization(self):
         """Returns the % of utilization of the kernels during the last sample
         https://docs.nvidia.com/deploy/nvml-api/structnvmlUtilization__t.html#structnvmlUtilization__t
         """
-        return pynvml.nvmlDeviceGetUtilizationRates(self.handle).gpu
+        if USE_PYNVML:
+            return pynvml.nvmlDeviceGetUtilizationRates(self.handle).gpu
+        elif USE_AMDSMI:
+            return amdsmi.amdsmi_get_gpu_activity(self.handle)["gfx_activity"]
+        else:
+            raise Exception("No GPU interface available")
 
     def _get_compute_mode(self):
         """Returns the compute mode of the GPU
         https://docs.nvidia.com/deploy/nvml-api/group__nvmlDeviceEnumvs.html#group__nvmlDeviceEnumvs_1gbed1b88f2e3ba39070d31d1db4340233
         """
-        return pynvml.nvmlDeviceGetComputeMode(self.handle)
+        if USE_PYNVML:
+            return pynvml.nvmlDeviceGetComputeMode(self.handle)
+        elif USE_AMDSMI:
+            return None
+        else:
+            raise Exception("No GPU interface available")
 
     def _get_compute_processes(self):
-        """Returns the list of processes ids having a compute context on the
-        device with the memory used
+        """Returns the list of processes ids having a compute context on the device with the memory used
         https://docs.nvidia.com/deploy/nvml-api/group__nvmlDeviceQueries.html#group__nvmlDeviceQueries_1g46ceaea624d5c96e098e03c453419d68
         """
         try:
-            processes = pynvml.nvmlDeviceGetComputeRunningProcesses(self.handle)
-
-            return [{"pid": p.pid, "used_memory": p.usedGpuMemory} for p in processes]
-        except pynvml.NVMLError:
+            if USE_PYNVML:
+                processes = pynvml.nvmlDeviceGetComputeRunningProcesses(self.handle)
+                return [
+                    {"pid": p.pid, "used_memory": p.usedGpuMemory} for p in processes
+                ]
+            elif USE_AMDSMI:
+                processes_handles = amdsmi.amdsmi_get_process_list(self.handle)
+                processes_info = [
+                    amdsmi.amdsmi_get_process_info(self.handle, p)
+                    for p in processes_handles
+                ]
+                return [
+                    {"pid": p["pid"], "used_memory": p["memory_usage"]["vram_usage"]}
+                    for p in processes_info
+                ]
+        except Exception:
             return []
 
     def _get_graphics_processes(self):
-        """Returns the list of processes ids having a graphics context on the
-        device with the memory used
+        """Returns the list of processes ids having a graphics context on the device with the memory used
         https://docs.nvidia.com/deploy/nvml-api/group__nvmlDeviceQueries.html#group__nvmlDeviceQueries_1g7eacf7fa7ba4f4485d166736bf31195e
         """
         try:
-            processes = pynvml.nvmlDeviceGetGraphicsRunningProcesses(self.handle)
-
-            return [{"pid": p.pid, "used_memory": p.usedGpuMemory} for p in processes]
-        except pynvml.NVMLError:
+            if USE_PYNVML:
+                processes = pynvml.nvmlDeviceGetGraphicsRunningProcesses(self.handle)
+                return [
+                    {"pid": p.pid, "used_memory": p.usedGpuMemory} for p in processes
+                ]
+            elif USE_AMDSMI:
+                processes_handles = amdsmi.amdsmi_get_process_list(self.handle)
+                processes_info = [
+                    amdsmi.amdsmi_get_process_info(self.handle, p)
+                    for p in processes_handles
+                ]
+                return [
+                    {"pid": p["pid"], "used_memory": p["memory_usage"]["vram_usage"]}
+                    for p in processes_info
+                    if p["engine_usage"]["gfx"] > 0
+                ]
+        except Exception:
             return []
 
 
 class AllGPUDevices:
     def __init__(self):
         if is_gpu_details_available():
-            logger.debug("GPU available. Starting setup")
-            self.device_count = pynvml.nvmlDeviceGetCount()
+            if USE_PYNVML:
+                logger.debug("Nvidia GPU available. Starting setup")
+                pynvml.nvmlInit()
+                self.device_count = pynvml.nvmlDeviceGetCount()
+            elif USE_AMDSMI:
+                logger.debug("AMD GPU available. Starting setup")
+                amdsmi.amdsmi_init()
+                self.device_count = len(amdsmi.amdsmi_get_device_handles())
+            else:
+                logger.error("No GPU interface available")
+                self.device_count = 0
         else:
             logger.error("There is no GPU available")
             self.device_count = 0
         self.devices = []
         for i in range(self.device_count):
-            handle = pynvml.nvmlDeviceGetHandleByIndex(i)
-            gpu_device = GPUDevice(handle=handle, gpu_index=i)
+            if USE_PYNVML:
+                handle = pynvml.nvmlDeviceGetHandleByIndex(i)
+                gpu_device = GPUDevice(handle=handle, gpu_index=i)
+            elif USE_AMDSMI:
+                handle = amdsmi.amdsmi_get_device_handles()[i]
+                gpu_device = GPUDevice(handle=handle, gpu_index=i)
+            else:
+                raise Exception("No GPU interface available")
+
             self.devices.append(gpu_device)
 
     def get_gpu_static_info(self):
@@ -206,7 +327,7 @@ def get_gpu_static_info(self):
                 devices_static_info.append(gpu_device.get_static_details())
             return devices_static_info
 
-        except pynvml.NVMLError:
+        except Exception:
             logger.warning("Failed to retrieve gpu static info", exc_info=True)
             return []
 
@@ -238,7 +359,7 @@ def get_gpu_details(self):
                 devices_info.append(gpu_device.get_gpu_details())
             return devices_info
 
-        except pynvml.NVMLError:
+        except Exception:
             logger.warning("Failed to retrieve gpu information", exc_info=True)
             return []
 
@@ -261,7 +382,7 @@ def get_delta(self, last_duration: Time):
                 devices_info.append(gpu_device.delta(last_duration))
             return devices_info
 
-        except pynvml.NVMLError:
+        except Exception:
             logger.warning("Failed to retrieve gpu information", exc_info=True)
             return []
 
@@ -269,8 +390,14 @@ def get_delta(self, last_duration: Time):
 def is_gpu_details_available():
     """Returns True if the GPU details are available."""
     try:
-        pynvml.nvmlInit()
-        return True
+        if USE_PYNVML:
+            pynvml.nvmlInit()
+            return True
+        elif USE_AMDSMI:
+            amdsmi.amdsmi_init()
+            return True
+        else:
+            return False
 
-    except pynvml.NVMLError:
+    except Exception:
         return False
diff --git a/codecarbon/core/util.py b/codecarbon/core/util.py
index 7bf66edb3..ef1d7b81b 100644
--- a/codecarbon/core/util.py
+++ b/codecarbon/core/util.py
@@ -117,3 +117,23 @@ def count_cpus() -> int:
     num_cpus = num_cpus_matches[0].replace("NumCPUs=", "")
     logger.debug(f"Detected {num_cpus} cpus available on SLURM.")
     return int(num_cpus)
+
+
+def is_amd_system():
+    """Returns True if the system has an amd-smi interface."""
+    try:
+        # Check if amd-smi is available
+        subprocess.check_output(["amd-smi", "--help"])
+        return True
+    except subprocess.CalledProcessError:
+        return False
+
+
+def is_nvidia_system():
+    """Returns True if the system has an nvidia-smi interface."""
+    try:
+        # Check if nvidia-smi is available
+        subprocess.check_output(["nvidia-smi", "--help"])
+        return True
+    except Exception:
+        return False
diff --git a/codecarbon/emissions_tracker.py b/codecarbon/emissions_tracker.py
index b16bbf0de..249dd8e9c 100644
--- a/codecarbon/emissions_tracker.py
+++ b/codecarbon/emissions_tracker.py
@@ -18,7 +18,7 @@
 from codecarbon.core.config import get_hierarchical_config, parse_gpu_ids
 from codecarbon.core.emissions import Emissions
 from codecarbon.core.units import Energy, Power, Time
-from codecarbon.core.util import count_cpus, suppress
+from codecarbon.core.util import count_cpus, is_amd_system, is_nvidia_system, suppress
 from codecarbon.external.geography import CloudMetadata, GeoMetadata
 from codecarbon.external.hardware import CPU, GPU, RAM
 from codecarbon.external.logger import logger, set_logger_format, set_logger_level
@@ -280,7 +280,10 @@ def __init__(
         # Hardware detection
         logger.info("[setup] GPU Tracking...")
         if gpu.is_gpu_details_available():
-            logger.info("Tracking Nvidia GPU via pynvml")
+            if is_nvidia_system():
+                logger.info("Tracking Nvidia GPU via pynvml")
+            elif is_amd_system():
+                logger.info("Tracking AMD GPU via amdsmi")
             gpu_devices = GPU.from_utils(self._gpu_ids)
             self._hardware.append(gpu_devices)
             gpu_names = [n["name"] for n in gpu_devices.devices.get_gpu_static_info()]

From fc93306ee9d8d14265b71efe75b748d4cb91d264 Mon Sep 17 00:00:00 2001
From: IlyasMoutawwakil <ilyas.moutawwakil@gmail.com>
Date: Tue, 16 Jan 2024 09:46:50 +0000
Subject: [PATCH 2/5] fix energy unit

---
 codecarbon/core/gpu.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/codecarbon/core/gpu.py b/codecarbon/core/gpu.py
index 97158c4c4..fe0edc8e7 100644
--- a/codecarbon/core/gpu.py
+++ b/codecarbon/core/gpu.py
@@ -108,8 +108,12 @@ def _get_total_energy_consumption(self):
         if USE_PYNVML:
             return pynvml.nvmlDeviceGetTotalEnergyConsumption(self.handle)
         elif USE_AMDSMI:
-            # returns energy in microjoules (amd-smi metric --energy)
-            return amdsmi.amdsmi_get_power_measure(self.handle)["energy_accumulator"]
+            # returns energy in "Energy Status Units" which is equivalent to 15.3 microjoules (amd-smi metric --energy)
+            return (
+                amdsmi.amdsmi_get_power_measure(self.handle)["energy_accumulator"]
+                * 15.3
+                / 1000
+            )
         else:
             raise Exception("No GPU interface available")
 

From 0626e4b6a4753ff3ee9b6c684010a94702110444 Mon Sep 17 00:00:00 2001
From: IlyasMoutawwakil <ilyas.moutawwakil@gmail.com>
Date: Tue, 16 Jan 2024 10:04:52 +0000
Subject: [PATCH 3/5] use counter_resolution instead of hard coding it

---
 codecarbon/core/gpu.py | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/codecarbon/core/gpu.py b/codecarbon/core/gpu.py
index fe0edc8e7..6fe632a2c 100644
--- a/codecarbon/core/gpu.py
+++ b/codecarbon/core/gpu.py
@@ -108,12 +108,9 @@ def _get_total_energy_consumption(self):
         if USE_PYNVML:
             return pynvml.nvmlDeviceGetTotalEnergyConsumption(self.handle)
         elif USE_AMDSMI:
-            # returns energy in "Energy Status Units" which is equivalent to 15.3 microjoules (amd-smi metric --energy)
-            return (
-                amdsmi.amdsmi_get_power_measure(self.handle)["energy_accumulator"]
-                * 15.3
-                / 1000
-            )
+            # returns energy in "Energy Status Units" which is equivalent to around 15.3 microjoules
+            energy = amdsmi.amdsmi_dev_get_energy_count(self.handle)
+            return energy["power"] * energy["counter_resolution"] / 1000
         else:
             raise Exception("No GPU interface available")
 

From 37f07ecf9b4ac5781389747fd0a2a92d35ce1197 Mon Sep 17 00:00:00 2001
From: benoit-cty <4-benoit-cty@users.noreply.git.leximpact.dev>
Date: Fri, 26 Jan 2024 12:40:28 +0100
Subject: [PATCH 4/5] wip : handle AMD and Nvidia at the same time

---
 codecarbon/core/gpu.py | 29 +++++++++++++++--------------
 1 file changed, 15 insertions(+), 14 deletions(-)

diff --git a/codecarbon/core/gpu.py b/codecarbon/core/gpu.py
index 6fe632a2c..a69c98086 100644
--- a/codecarbon/core/gpu.py
+++ b/codecarbon/core/gpu.py
@@ -279,34 +279,35 @@ def _get_graphics_processes(self):
 
 
 class AllGPUDevices:
+    devices = []
+    device_count:int = 0
+    
     def __init__(self):
+        self.devices = []
         if is_gpu_details_available():
             if USE_PYNVML:
                 logger.debug("Nvidia GPU available. Starting setup")
                 pynvml.nvmlInit()
                 self.device_count = pynvml.nvmlDeviceGetCount()
-            elif USE_AMDSMI:
+                for i in range(self.device_count):
+                    handle = pynvml.nvmlDeviceGetHandleByIndex(i)
+                    gpu_device = GPUDevice(handle=handle, gpu_index=i)
+                    self.devices.append(gpu_device)
+            if USE_AMDSMI:
                 logger.debug("AMD GPU available. Starting setup")
                 amdsmi.amdsmi_init()
                 self.device_count = len(amdsmi.amdsmi_get_device_handles())
+                for i in range(self.device_count):
+                    handle = amdsmi.amdsmi_get_device_handles()[i]
+                    gpu_device = GPUDevice(handle=handle, gpu_index=i)
+                    self.devices.append(gpu_device)
             else:
                 logger.error("No GPU interface available")
-                self.device_count = 0
         else:
             logger.error("There is no GPU available")
-            self.device_count = 0
-        self.devices = []
-        for i in range(self.device_count):
-            if USE_PYNVML:
-                handle = pynvml.nvmlDeviceGetHandleByIndex(i)
-                gpu_device = GPUDevice(handle=handle, gpu_index=i)
-            elif USE_AMDSMI:
-                handle = amdsmi.amdsmi_get_device_handles()[i]
-                gpu_device = GPUDevice(handle=handle, gpu_index=i)
-            else:
-                raise Exception("No GPU interface available")
+        self.device_count = len(self.devices)
 
-            self.devices.append(gpu_device)
+        
 
     def get_gpu_static_info(self):
         """Get all GPUs static information.

From 0002c2e57681ac3afec8b8c09f32249434deb735 Mon Sep 17 00:00:00 2001
From: IlyasMoutawwakil <ilyas.moutawwakil@gmail.com>
Date: Mon, 29 Jan 2024 14:36:50 +0000
Subject: [PATCH 5/5] added support for amd and nvidia at the same time

---
 codecarbon/core/gpu.py          | 335 +++++++++++++++-----------------
 codecarbon/core/util.py         |  20 --
 codecarbon/emissions_tracker.py |  13 +-
 3 files changed, 169 insertions(+), 199 deletions(-)

diff --git a/codecarbon/core/gpu.py b/codecarbon/core/gpu.py
index a69c98086..c846badf9 100644
--- a/codecarbon/core/gpu.py
+++ b/codecarbon/core/gpu.py
@@ -1,32 +1,66 @@
+import subprocess
+from typing import List, Any
 from collections import namedtuple
 from dataclasses import dataclass, field
 
+
 from codecarbon.core.units import Energy, Power, Time
-from codecarbon.core.util import is_amd_system, is_nvidia_system
 from codecarbon.external.logger import logger
 
-USE_AMDSMI = False
-USE_PYNVML = False
 
-if is_nvidia_system():
+def is_rocm_system():
+    """Returns True if the system has an rocm-smi interface."""
+    try:
+        # Check if rocm-smi is available
+        subprocess.check_output(["rocm-smi", "--help"])
+        return True
+    except subprocess.CalledProcessError:
+        return False
+
+
+def is_nvidia_system():
+    """Returns True if the system has an nvidia-smi interface."""
+    try:
+        # Check if nvidia-smi is available
+        subprocess.check_output(["nvidia-smi", "--help"])
+        return True
+    except Exception:
+        return False
+
+
+try:
     import pynvml
 
-    USE_PYNVML = True
+    PYNVML_AVAILABLE = True
+except ImportError:
+    if is_nvidia_system():
+        logger.warning(
+            "Nvidia GPU detected but pynvml is not available. "
+            "Please install pynvml to get GPU metrics."
+        )
+    PYNVML_AVAILABLE = False
 
-if is_amd_system():
+try:
     import amdsmi
 
-    USE_AMDSMI = True
+    AMDSMI_AVAILABLE = True
+except ImportError:
+    if is_rocm_system():
+        logger.warning(
+            "AMD GPU detected but amdsmi is not available. "
+            "Please install amdsmi to get GPU metrics."
+        )
+    AMDSMI_AVAILABLE = False
 
 
 @dataclass
 class GPUDevice:
-    handle: any
+    handle: Any
     gpu_index: int
-    # Energy consumed in kWh
-    energy_delta: Energy = field(default_factory=lambda: Energy(0))
     # Power based on reading
     power: Power = field(default_factory=lambda: Power(0))
+    # Energy consumed in kWh
+    energy_delta: Energy = field(default_factory=lambda: Energy(0))
     # Last energy reading in kWh
     last_energy: Energy = field(default_factory=lambda: Energy(0))
 
@@ -101,213 +135,184 @@ def _to_utf8(self, str_or_bytes):
 
         return str_or_bytes
 
+
+@dataclass
+class NvidiaGPUDevice(GPUDevice):
     def _get_total_energy_consumption(self):
         """Returns total energy consumption for this GPU in millijoules (mJ) since the driver was last reloaded
         https://docs.nvidia.com/deploy/nvml-api/group__nvmlDeviceQueries.html#group__nvmlDeviceQueries_1g732ab899b5bd18ac4bfb93c02de4900a
         """
-        if USE_PYNVML:
-            return pynvml.nvmlDeviceGetTotalEnergyConsumption(self.handle)
-        elif USE_AMDSMI:
-            # returns energy in "Energy Status Units" which is equivalent to around 15.3 microjoules
-            energy = amdsmi.amdsmi_dev_get_energy_count(self.handle)
-            return energy["power"] * energy["counter_resolution"] / 1000
-        else:
-            raise Exception("No GPU interface available")
+        return pynvml.nvmlDeviceGetTotalEnergyConsumption(self.handle)
 
     def _get_gpu_name(self):
         """Returns the name of the GPU device
         https://docs.nvidia.com/deploy/nvml-api/group__nvmlDeviceQueries.html#group__nvmlDeviceQueries_1ga5361803e044c6fdf3b08523fb6d1481
         """
-        if USE_PYNVML:
-            name = pynvml.nvmlDeviceGetName(self.handle)
-        elif USE_AMDSMI:
-            name = amdsmi.amdsmi_get_board_info(self.handle)["manufacturer_name"]
-        else:
-            raise Exception("No GPU interface available")
-
+        name = pynvml.nvmlDeviceGetName(self.handle)
         return self._to_utf8(name)
 
     def _get_uuid(self):
         """Returns the globally unique GPU device UUID
         https://docs.nvidia.com/deploy/nvml-api/group__nvmlDeviceQueries.html#group__nvmlDeviceQueries_1g72710fb20f30f0c2725ce31579832654
         """
-        if USE_PYNVML:
-            uuid = pynvml.nvmlDeviceGetUUID(self.handle)
-        elif USE_AMDSMI:
-            uuid = amdsmi.amdsmi_get_device_uuid(self.handle)
-        else:
-            raise Exception("No GPU interface available")
-
+        uuid = pynvml.nvmlDeviceGetUUID(self.handle)
         return self._to_utf8(uuid)
 
     def _get_memory_info(self):
         """Returns memory info in bytes
         https://docs.nvidia.com/deploy/nvml-api/group__nvmlDeviceQueries.html#group__nvmlDeviceQueries_1g2dfeb1db82aa1de91aa6edf941c85ca8
         """
-        if USE_PYNVML:
-            return pynvml.nvmlDeviceGetMemoryInfo(self.handle)
-        elif USE_AMDSMI:
-            # returns memory in megabytes (amd-smi metric --mem-usage)
-            memory_info = amdsmi.amdsmi_get_vram_usage(self.handle)
-            AMDMemory = namedtuple("AMDMemory", ["total", "used", "free"])
-            return AMDMemory(
-                total=memory_info["vram_total"] * 1024 * 1024,
-                used=memory_info["vram_used"] * 1024 * 1024,
-                free=(memory_info["vram_total"] - memory_info["vram_used"])
-                * 1024
-                * 1024,
-            )
-        else:
-            raise Exception("No GPU interface available")
+        return pynvml.nvmlDeviceGetMemoryInfo(self.handle)
 
     def _get_temperature(self):
         """Returns degrees in the Celsius scale
         https://docs.nvidia.com/deploy/nvml-api/group__nvmlDeviceQueries.html#group__nvmlDeviceQueries_1g92d1c5182a14dd4be7090e3c1480b121
         """
-        if USE_PYNVML:
-            return pynvml.nvmlDeviceGetTemperature(
-                self.handle,
-                sensor=pynvml.NVML_TEMPERATURE_GPU,
-            )
-        elif USE_AMDSMI:
-            return amdsmi.amdsmi_dev_get_temp_metric(
-                self.handle,
-                sensor_type=amdsmi.AmdSmiTemperatureType.EDGE,
-                metric=amdsmi.AmdSmiTemperatureMetric.CURRENT,
-            )
-        else:
-            raise Exception("No GPU interface available")
+        return pynvml.nvmlDeviceGetTemperature(
+            self.handle, sensor=pynvml.NVML_TEMPERATURE_GPU
+        )
 
     def _get_power_usage(self):
         """Returns power usage in milliwatts
         https://docs.nvidia.com/deploy/nvml-api/group__nvmlDeviceQueries.html#group__nvmlDeviceQueries_1g7ef7dff0ff14238d08a19ad7fb23fc87
         """
-        if USE_PYNVML:
-            return pynvml.nvmlDeviceGetPowerUsage(self.handle)
-        elif USE_AMDSMI:
-            # returns power in Watts (amd-smi metric --power)
-            return (
-                amdsmi.amdsmi_get_power_measure(self.handle)["average_socket_power"]
-                * 1000
-            )
-        else:
-            raise Exception("No GPU interface available")
+        return pynvml.nvmlDeviceGetPowerUsage(self.handle)
 
     def _get_power_limit(self):
         """Returns max power usage in milliwatts
         https://docs.nvidia.com/deploy/nvml-api/group__nvmlDeviceQueries.html#group__nvmlDeviceQueries_1g263b5bf552d5ec7fcd29a088264d10ad
         """
-        try:
-            if USE_PYNVML:
-                return pynvml.nvmlDeviceGetEnforcedPowerLimit(self.handle)
-            elif USE_AMDSMI:
-                # returns power limit in Watts (amd-smi static --limit)
-                return (
-                    amdsmi.amdsmi_get_power_measure(self.handle)["power_limit"] * 1000
-                )
-        except Exception:
-            return None
+        return pynvml.nvmlDeviceGetEnforcedPowerLimit(self.handle)
 
     def _get_gpu_utilization(self):
         """Returns the % of utilization of the kernels during the last sample
         https://docs.nvidia.com/deploy/nvml-api/structnvmlUtilization__t.html#structnvmlUtilization__t
         """
-        if USE_PYNVML:
-            return pynvml.nvmlDeviceGetUtilizationRates(self.handle).gpu
-        elif USE_AMDSMI:
-            return amdsmi.amdsmi_get_gpu_activity(self.handle)["gfx_activity"]
-        else:
-            raise Exception("No GPU interface available")
+        return pynvml.nvmlDeviceGetUtilizationRates(self.handle).gpu
 
     def _get_compute_mode(self):
         """Returns the compute mode of the GPU
         https://docs.nvidia.com/deploy/nvml-api/group__nvmlDeviceEnumvs.html#group__nvmlDeviceEnumvs_1gbed1b88f2e3ba39070d31d1db4340233
         """
-        if USE_PYNVML:
-            return pynvml.nvmlDeviceGetComputeMode(self.handle)
-        elif USE_AMDSMI:
-            return None
-        else:
-            raise Exception("No GPU interface available")
+        return pynvml.nvmlDeviceGetComputeMode(self.handle)
 
     def _get_compute_processes(self):
         """Returns the list of processes ids having a compute context on the device with the memory used
         https://docs.nvidia.com/deploy/nvml-api/group__nvmlDeviceQueries.html#group__nvmlDeviceQueries_1g46ceaea624d5c96e098e03c453419d68
         """
-        try:
-            if USE_PYNVML:
-                processes = pynvml.nvmlDeviceGetComputeRunningProcesses(self.handle)
-                return [
-                    {"pid": p.pid, "used_memory": p.usedGpuMemory} for p in processes
-                ]
-            elif USE_AMDSMI:
-                processes_handles = amdsmi.amdsmi_get_process_list(self.handle)
-                processes_info = [
-                    amdsmi.amdsmi_get_process_info(self.handle, p)
-                    for p in processes_handles
-                ]
-                return [
-                    {"pid": p["pid"], "used_memory": p["memory_usage"]["vram_usage"]}
-                    for p in processes_info
-                ]
-        except Exception:
-            return []
+        processes = pynvml.nvmlDeviceGetComputeRunningProcesses(self.handle)
+        return [{"pid": p.pid, "used_memory": p.usedGpuMemory} for p in processes]
 
     def _get_graphics_processes(self):
         """Returns the list of processes ids having a graphics context on the device with the memory used
         https://docs.nvidia.com/deploy/nvml-api/group__nvmlDeviceQueries.html#group__nvmlDeviceQueries_1g7eacf7fa7ba4f4485d166736bf31195e
         """
-        try:
-            if USE_PYNVML:
-                processes = pynvml.nvmlDeviceGetGraphicsRunningProcesses(self.handle)
-                return [
-                    {"pid": p.pid, "used_memory": p.usedGpuMemory} for p in processes
-                ]
-            elif USE_AMDSMI:
-                processes_handles = amdsmi.amdsmi_get_process_list(self.handle)
-                processes_info = [
-                    amdsmi.amdsmi_get_process_info(self.handle, p)
-                    for p in processes_handles
-                ]
-                return [
-                    {"pid": p["pid"], "used_memory": p["memory_usage"]["vram_usage"]}
-                    for p in processes_info
-                    if p["engine_usage"]["gfx"] > 0
-                ]
-        except Exception:
-            return []
+        processes = pynvml.nvmlDeviceGetGraphicsRunningProcesses(self.handle)
+        return [{"pid": p.pid, "used_memory": p.usedGpuMemory} for p in processes]
+
+
+class AMDGPUDevice(GPUDevice):
+    def _get_total_energy_consumption(self):
+        """Returns energy in "Energy Status Units" which is equivalent to around 15.3 microjoules"""
+        energy_count = amdsmi.amdsmi_dev_get_energy_count(self.handle)
+        energy = energy_count["power"] * energy_count["counter_resolution"] / 1000
+        return energy
+
+    def _get_gpu_name(self):
+        """Returns the name of the GPU device"""
+        name = amdsmi.amdsmi_get_board_info(self.handle)["manufacturer_name"]
+        return self._to_utf8(name)
+
+    def _get_uuid(self):
+        """Returns the globally unique GPU device UUID"""
+        uuid = amdsmi.amdsmi_get_device_uuid(self.handle)
+        return self._to_utf8(uuid)
+
+    def _get_memory_info(self):
+        """Returns memory info in bytes"""
+        memory_info = amdsmi.amdsmi_get_vram_usage(self.handle)
+        AMDMemory = namedtuple("AMDMemory", ["total", "used", "free"])
+        return AMDMemory(
+            total=memory_info["vram_total"] * 1024 * 1024,
+            used=memory_info["vram_used"] * 1024 * 1024,
+            free=(memory_info["vram_total"] - memory_info["vram_used"]) * 1024 * 1024,
+        )
+
+    def _get_temperature(self):
+        """Returns degrees in the Celsius scale"""
+        return amdsmi.amdsmi_dev_get_temp_metric(
+            self.handle,
+            sensor_type=amdsmi.AmdSmiTemperatureType.EDGE,
+            metric=amdsmi.AmdSmiTemperatureMetric.CURRENT,
+        )
+
+    def _get_power_usage(self):
+        """Returns power usage in milliwatts"""
+        return (
+            amdsmi.amdsmi_get_power_measure(self.handle)["average_socket_power"] * 1000
+        )
+
+    def _get_power_limit(self):
+        """Returns max power usage in milliwatts"""
+        return amdsmi.amdsmi_get_power_measure(self.handle)["power_limit"] * 1000
+
+    def _get_gpu_utilization(self):
+        """Returns the % of utilization of the kernels during the last sample"""
+        return amdsmi.amdsmi_get_gpu_activity(self.handle)["gfx_activity"]
+
+    def _get_compute_mode(self):
+        """Returns the compute mode of the GPU"""
+        return None
+
+    def _get_compute_processes(self):
+        """Returns the list of processes ids having a compute context on the device with the memory used"""
+        processes_handles = amdsmi.amdsmi_get_process_list(self.handle)
+        processes_infos = [
+            amdsmi.amdsmi_get_process_info(self.handle, p) for p in processes_handles
+        ]
+        return [
+            {"pid": p["pid"], "used_memory": p["memory_usage"]["vram_mem"]}
+            for p in processes_infos
+        ]
+
+    def _get_graphics_processes(self):
+        """Returns the list of processes ids having a graphics context on the device with the memory used"""
+        processes_handles = amdsmi.amdsmi_get_process_list(self.handle)
+        processes_infos = [
+            amdsmi.amdsmi_get_process_info(self.handle, p) for p in processes_handles
+        ]
+        return [
+            {"pid": p["pid"], "used_memory": p["memory_usage"]["vram_usage"]}
+            for p in processes_infos
+            if p["engine_usage"]["gfx"] > 0
+        ]
 
 
 class AllGPUDevices:
-    devices = []
-    device_count:int = 0
-    
+    device_count: int
+    devices: List[GPUDevice]
+
     def __init__(self):
         self.devices = []
-        if is_gpu_details_available():
-            if USE_PYNVML:
-                logger.debug("Nvidia GPU available. Starting setup")
-                pynvml.nvmlInit()
-                self.device_count = pynvml.nvmlDeviceGetCount()
-                for i in range(self.device_count):
-                    handle = pynvml.nvmlDeviceGetHandleByIndex(i)
-                    gpu_device = GPUDevice(handle=handle, gpu_index=i)
-                    self.devices.append(gpu_device)
-            if USE_AMDSMI:
-                logger.debug("AMD GPU available. Starting setup")
-                amdsmi.amdsmi_init()
-                self.device_count = len(amdsmi.amdsmi_get_device_handles())
-                for i in range(self.device_count):
-                    handle = amdsmi.amdsmi_get_device_handles()[i]
-                    gpu_device = GPUDevice(handle=handle, gpu_index=i)
-                    self.devices.append(gpu_device)
-            else:
-                logger.error("No GPU interface available")
-        else:
-            logger.error("There is no GPU available")
-        self.device_count = len(self.devices)
 
-        
+        if is_nvidia_system() and PYNVML_AVAILABLE:
+            logger.debug("PyNVML available. Starting setup")
+            pynvml.nvmlInit()
+            nvidia_devices_count = pynvml.nvmlDeviceGetCount()
+            for i in range(nvidia_devices_count):
+                handle = pynvml.nvmlDeviceGetHandleByIndex(i)
+                nvidia_gpu_device = NvidiaGPUDevice(handle=handle, gpu_index=i)
+                self.devices.append(nvidia_gpu_device)
+
+        if is_rocm_system() and AMDSMI_AVAILABLE:
+            logger.debug("AMDSMI available. Starting setup")
+            amdsmi.amdsmi_init()
+            amd_devices_handles = amdsmi.amdsmi_get_device_handles()
+            for i, handle in enumerate(amd_devices_handles):
+                amd_gpu_device = AMDGPUDevice(handle=handle, gpu_index=i)
+                self.devices.append(amd_gpu_device)
+
+        self.device_count = len(self.devices)
 
     def get_gpu_static_info(self):
         """Get all GPUs static information.
@@ -357,7 +362,7 @@ def get_gpu_details(self):
         try:
             devices_info = []
             for i in range(self.device_count):
-                gpu_device: GPUDevice = self.devices[i]
+                gpu_device = self.devices[i]
                 devices_info.append(gpu_device.get_gpu_details())
             return devices_info
 
@@ -380,26 +385,10 @@ def get_delta(self, last_duration: Time):
         try:
             devices_info = []
             for i in range(self.device_count):
-                gpu_device: GPUDevice = self.devices[i]
+                gpu_device = self.devices[i]
                 devices_info.append(gpu_device.delta(last_duration))
             return devices_info
 
         except Exception:
             logger.warning("Failed to retrieve gpu information", exc_info=True)
             return []
-
-
-def is_gpu_details_available():
-    """Returns True if the GPU details are available."""
-    try:
-        if USE_PYNVML:
-            pynvml.nvmlInit()
-            return True
-        elif USE_AMDSMI:
-            amdsmi.amdsmi_init()
-            return True
-        else:
-            return False
-
-    except Exception:
-        return False
diff --git a/codecarbon/core/util.py b/codecarbon/core/util.py
index ef1d7b81b..7bf66edb3 100644
--- a/codecarbon/core/util.py
+++ b/codecarbon/core/util.py
@@ -117,23 +117,3 @@ def count_cpus() -> int:
     num_cpus = num_cpus_matches[0].replace("NumCPUs=", "")
     logger.debug(f"Detected {num_cpus} cpus available on SLURM.")
     return int(num_cpus)
-
-
-def is_amd_system():
-    """Returns True if the system has an amd-smi interface."""
-    try:
-        # Check if amd-smi is available
-        subprocess.check_output(["amd-smi", "--help"])
-        return True
-    except subprocess.CalledProcessError:
-        return False
-
-
-def is_nvidia_system():
-    """Returns True if the system has an nvidia-smi interface."""
-    try:
-        # Check if nvidia-smi is available
-        subprocess.check_output(["nvidia-smi", "--help"])
-        return True
-    except Exception:
-        return False
diff --git a/codecarbon/emissions_tracker.py b/codecarbon/emissions_tracker.py
index 249dd8e9c..cb8469e37 100644
--- a/codecarbon/emissions_tracker.py
+++ b/codecarbon/emissions_tracker.py
@@ -18,7 +18,7 @@
 from codecarbon.core.config import get_hierarchical_config, parse_gpu_ids
 from codecarbon.core.emissions import Emissions
 from codecarbon.core.units import Energy, Power, Time
-from codecarbon.core.util import count_cpus, is_amd_system, is_nvidia_system, suppress
+from codecarbon.core.util import count_cpus, suppress
 from codecarbon.external.geography import CloudMetadata, GeoMetadata
 from codecarbon.external.hardware import CPU, GPU, RAM
 from codecarbon.external.logger import logger, set_logger_format, set_logger_level
@@ -279,11 +279,12 @@ def __init__(
 
         # Hardware detection
         logger.info("[setup] GPU Tracking...")
-        if gpu.is_gpu_details_available():
-            if is_nvidia_system():
-                logger.info("Tracking Nvidia GPU via pynvml")
-            elif is_amd_system():
-                logger.info("Tracking AMD GPU via amdsmi")
+        if gpu.is_nvidia_system() or gpu.is_rocm_system():
+            if gpu.is_nvidia_system():
+                logger.info("Tracking Nvidia GPUs via PyNVML")
+            elif gpu.is_rocm_system():
+                logger.info("Tracking AMD GPUs via AMDSMI")
+
             gpu_devices = GPU.from_utils(self._gpu_ids)
             self._hardware.append(gpu_devices)
             gpu_names = [n["name"] for n in gpu_devices.devices.get_gpu_static_info()]