summaryrefslogtreecommitdiffstats
path: root/glances/plugins/gpu/cards/nvidia.py
blob: b7b30bb1c7ae46a062d98db2c633ee64094d9a0f (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
# -*- coding: utf-8 -*-
#
# This file is part of Glances.
#
# SPDX-FileCopyrightText: 2024 Nicolas Hennion <nicolas@nicolargo.com>
#
# SPDX-License-Identifier: LGPL-3.0-only
#

"""NVidia Extension unit for Glances' GPU plugin."""

from glances.logger import logger
from glances.globals import nativestr
try:
    import pynvml
except Exception as e:
    import_nvidia_error_tag = True
    # Display debug message if import KeyError
    logger.warning("Missing Python Lib ({}), Nvidia GPU plugin is disabled".format(e))
else:
    import_nvidia_error_tag = False


class NvidiaGPU:
    """GPU card class."""

    def __init__(self):
        """Init Nvidia GPU card class."""
        if import_nvidia_error_tag:
            self.device_handles = []
        else:
            try:
                pynvml.nvmlInit()
                self.device_handles = get_device_handles()
            except Exception:
                logger.debug("pynvml could not be initialized.")
                self.device_handles = []

    def exit(self):
        """Close NVidia GPU class."""
        if self.device_handles != []:
            try:
                pynvml.nvmlShutdown()
            except Exception as e:
                logger.debug("pynvml failed to shutdown correctly ({})".format(e))

    def get_device_stats(self):
        """Get Nvidia GPU stats."""
        stats = []

        for index, device_handle in enumerate(self.device_handles):
            device_stats = dict()
            # Dictionary key is the GPU_ID
            device_stats['key'] = 'gpu_id'
            # GPU id (for multiple GPU, start at 0)
            device_stats['gpu_id'] = f'nvidia{index}'
            # GPU name
            device_stats['name'] = get_device_name(device_handle)
            # Memory consumption in % (not available on all GPU)
            device_stats['mem'] = get_mem(device_handle)
            # Processor consumption in %
            device_stats['proc'] = get_proc(device_handle)
            # Processor temperature in °C
            device_stats['temperature'] = get_temperature(device_handle)
            # Fan speed in %
            device_stats['fan_speed'] = get_fan_speed(device_handle)
            stats.append(device_stats)

        return stats


def get_device_handles():
    """Get a list of NVML device handles, one per device.

    Can throw NVMLError.
    """
    return [pynvml.nvmlDeviceGetHandleByIndex(i) for i in range(pynvml.nvmlDeviceGetCount())]


def get_device_name(device_handle):
    """Get GPU device name."""
    try:
        return nativestr(pynvml.nvmlDeviceGetName(device_handle))
    except pynvml.NVMLError:
        return "NVIDIA"


def get_mem(device_handle):
    """Get GPU device memory consumption in percent."""
    try:
        memory_info = pynvml.nvmlDeviceGetMemoryInfo(device_handle)
        return memory_info.used * 100.0 / memory_info.total
    except pynvml.NVMLError:
        return None


def get_proc(device_handle):
    """Get GPU device CPU consumption in percent."""
    try:
        return pynvml.nvmlDeviceGetUtilizationRates(device_handle).gpu
    except pynvml.NVMLError:
        return None


def get_temperature(device_handle):
    """Get GPU device CPU temperature in Celsius."""
    try:
        return pynvml.nvmlDeviceGetTemperature(device_handle, pynvml.NVML_TEMPERATURE_GPU)
    except pynvml.NVMLError:
        return None


def get_fan_speed(device_handle):
    """Get GPU device fan speed in percent."""
    try:
        return pynvml.nvmlDeviceGetFanSpeed(device_handle)
    except pynvml.NVMLError:
        return None