summaryrefslogtreecommitdiffstats
path: root/glances/plugins/gpu/__init__.py
diff options
context:
space:
mode:
Diffstat (limited to 'glances/plugins/gpu/__init__.py')
-rw-r--r--glances/plugins/gpu/__init__.py346
1 files changed, 346 insertions, 0 deletions
diff --git a/glances/plugins/gpu/__init__.py b/glances/plugins/gpu/__init__.py
index e69de29b..3d3a73cf 100644
--- a/glances/plugins/gpu/__init__.py
+++ b/glances/plugins/gpu/__init__.py
@@ -0,0 +1,346 @@
+# -*- coding: utf-8 -*-
+#
+# This file is part of Glances.
+#
+# Copyright (C) 2020 Kirby Banman <kirby.banman@gmail.com>
+#
+# SPDX-License-Identifier: LGPL-3.0-only
+#
+
+"""GPU plugin (limited to NVIDIA chipsets)."""
+
+from glances.globals import nativestr, to_fahrenheit
+from glances.logger import logger
+from glances.plugins.plugin.model import GlancesPluginModel
+
+# In Glances 3.1.4 or higher, we use the py3nvml lib (see issue #1523)
+try:
+ import py3nvml.py3nvml as pynvml
+except Exception as e:
+ import_error_tag = True
+ # Display debug message if import KeyError
+ logger.warning("Missing Python Lib ({}), Nvidia GPU plugin is disabled".format(e))
+else:
+ import_error_tag = False
+
+# Define the history items list
+# All items in this list will be historised if the --enable-history tag is set
+items_history_list = [
+ {'name': 'proc', 'description': 'GPU processor', 'y_unit': '%'},
+ {'name': 'mem', 'description': 'Memory consumption', 'y_unit': '%'},
+]
+
+
+class PluginModel(GlancesPluginModel):
+ """Glances GPU plugin (limited to NVIDIA chipsets).
+
+ stats is a list of dictionaries with one entry per GPU
+ """
+
+ def __init__(self, args=None, config=None):
+ """Init the plugin."""
+ super(PluginModel, self).__init__(
+ args=args, config=config, items_history_list=items_history_list, stats_init_value=[]
+ )
+
+ # Init the Nvidia API
+ self.init_nvidia()
+
+ # We want to display the stat in the curse interface
+ self.display_curse = True
+
+ def init_nvidia(self):
+ """Init the NVIDIA API."""
+ if import_error_tag:
+ self.nvml_ready = False
+
+ try:
+ pynvml.nvmlInit()
+ self.device_handles = get_device_handles()
+ self.nvml_ready = True
+ except Exception:
+ logger.debug("pynvml could not be initialized.")
+ self.nvml_ready = False
+
+ return self.nvml_ready
+
+ def get_key(self):
+ """Return the key of the list."""
+ return 'gpu_id'
+
+ @GlancesPluginModel._check_decorator
+ @GlancesPluginModel._log_result_decorator
+ def update(self):
+ """Update the GPU stats."""
+ # Init new stats
+ stats = self.get_init_value()
+
+ if not self.nvml_ready:
+ # !!!
+ # Uncomment to test on computer without GPU
+ # One GPU sample:
+ # self.stats = [
+ # {
+ # "key": "gpu_id",
+ # "gpu_id": 0,
+ # "name": "Fake GeForce GTX",
+ # "mem": 5.792331695556641,
+ # "proc": 4,
+ # "temperature": 26,
+ # "fan_speed": 30
+ # }
+ # ]
+ # Two GPU sample:
+ # self.stats = [
+ # {
+ # "key": "gpu_id",
+ # "gpu_id": 0,
+ # "name": "Fake GeForce GTX1",
+ # "mem": 5.792331695556641,
+ # "proc": 4,
+ # "temperature": 26,
+ # "fan_speed": 30
+ # },
+ # {
+ # "key": "gpu_id",
+ # "gpu_id": 1,
+ # "name": "Fake GeForce GTX2",
+ # "mem": 15,
+ # "proc": 8,
+ # "temperature": 65,
+ # "fan_speed": 75
+ # }
+ # ]
+ return self.stats
+
+ if self.input_method == 'local':
+ stats = self.get_device_stats()
+ elif self.input_method == 'snmp':
+ # not available
+ pass
+
+ # Update the stats
+ self.stats = stats
+
+ return self.stats
+
+ def update_views(self):
+ """Update stats views."""
+ # Call the father's method
+ super(PluginModel, self).update_views()
+
+ # Add specifics information
+ # Alert
+ for i in self.stats:
+ # Init the views for the current GPU
+ self.views[i[self.get_key()]] = {'proc': {}, 'mem': {}, 'temperature': {}}
+ # Processor alert
+ if 'proc' in i:
+ alert = self.get_alert(i['proc'], header='proc')
+ self.views[i[self.get_key()]]['proc']['decoration'] = alert
+ # Memory alert
+ if 'mem' in i:
+ alert = self.get_alert(i['mem'], header='mem')
+ self.views[i[self.get_key()]]['mem']['decoration'] = alert
+ # Temperature alert
+ if 'temperature' in i:
+ alert = self.get_alert(i['temperature'], header='temperature')
+ self.views[i[self.get_key()]]['temperature']['decoration'] = alert
+
+ return True
+
+ def msg_curse(self, args=None, max_width=None):
+ """Return the dict to display in the curse interface."""
+ # Init the return message
+ ret = []
+
+ # Only process if stats exist, not empty (issue #871) and plugin not disabled
+ if not self.stats or (self.stats == []) or self.is_disabled():
+ return ret
+
+ # Check if all GPU have the same name
+ same_name = all(s['name'] == self.stats[0]['name'] for s in self.stats)
+
+ # gpu_stats contain the first GPU in the list
+ gpu_stats = self.stats[0]
+
+ # Header
+ header = ''
+ if len(self.stats) > 1:
+ header += '{} '.format(len(self.stats))
+ if same_name:
+ header += '{} {}'.format('GPU', gpu_stats['name'])
+ else:
+ header += '{}'.format('GPU')
+ msg = header[:17]
+ ret.append(self.curse_add_line(msg, "TITLE"))
+
+ # Build the string message
+ if len(self.stats) == 1 or args.meangpu:
+ # GPU stat summary or mono GPU
+ # New line
+ ret.append(self.curse_new_line())
+ # GPU PROC
+ try:
+ mean_proc = sum(s['proc'] for s in self.stats if s is not None) / len(self.stats)
+ except TypeError:
+ mean_proc_msg = '{:>4}'.format('N/A')
+ else:
+ mean_proc_msg = '{:>3.0f}%'.format(mean_proc)
+ if len(self.stats) > 1:
+ msg = '{:13}'.format('proc mean:')
+ else:
+ msg = '{:13}'.format('proc:')
+ ret.append(self.curse_add_line(msg))
+ ret.append(
+ self.curse_add_line(
+ mean_proc_msg, self.get_views(item=gpu_stats[self.get_key()], key='proc', option='decoration')
+ )
+ )
+ # New line
+ ret.append(self.curse_new_line())
+ # GPU MEM
+ try:
+ mean_mem = sum(s['mem'] for s in self.stats if s is not None) / len(self.stats)
+ except TypeError:
+ mean_mem_msg = '{:>4}'.format('N/A')
+ else:
+ mean_mem_msg = '{:>3.0f}%'.format(mean_mem)
+ if len(self.stats) > 1:
+ msg = '{:13}'.format('mem mean:')
+ else:
+ msg = '{:13}'.format('mem:')
+ ret.append(self.curse_add_line(msg))
+ ret.append(
+ self.curse_add_line(
+ mean_mem_msg, self.get_views(item=gpu_stats[self.get_key()], key='mem', option='decoration')
+ )
+ )
+ # New line
+ ret.append(self.curse_new_line())
+ # GPU TEMPERATURE
+ try:
+ mean_temperature = sum(s['temperature'] for s in self.stats if s is not None) / len(self.stats)
+ except TypeError:
+ mean_temperature_msg = '{:>4}'.format('N/A')
+ else:
+ unit = 'C'
+ if args.fahrenheit:
+ mean_temperature = to_fahrenheit(mean_temperature)
+ unit = 'F'
+ mean_temperature_msg = '{:>3.0f}{}'.format(mean_temperature, unit)
+ if len(self.stats) > 1:
+ msg = '{:13}'.format('temp mean:')
+ else:
+ msg = '{:13}'.format('temperature:')
+ ret.append(self.curse_add_line(msg))
+ ret.append(
+ self.curse_add_line(
+ mean_temperature_msg,
+ self.get_views(item=gpu_stats[self.get_key()], key='temperature', option='decoration'),
+ )
+ )
+ else:
+ # Multi GPU
+ # Temperature is not displayed in this mode...
+ for gpu_stats in self.stats:
+ # New line
+ ret.append(self.curse_new_line())
+ # GPU ID + PROC + MEM + TEMPERATURE
+ id_msg = '{}'.format(gpu_stats['gpu_id'])
+ try:
+ proc_msg = '{:>3.0f}%'.format(gpu_stats['proc'])
+ except (ValueError, TypeError):
+ proc_msg = '{:>4}'.format('N/A')
+ try:
+ mem_msg = '{:>3.0f}%'.format(gpu_stats['mem'])
+ except (ValueError, TypeError):
+ mem_msg = '{:>4}'.format('N/A')
+ msg = '{}: {} mem: {}'.format(id_msg, proc_msg, mem_msg)
+ ret.append(self.curse_add_line(msg))
+
+ return ret
+
+ def get_device_stats(self):
+ """Get GPU stats."""
+ stats = []
+
+ for index, device_handle in enumerate(self.device_handles):
+ device_stats = dict()
+ # Dictionary key is the GPU_ID
+ device_stats['key'] = self.get_key()
+ # GPU id (for multiple GPU, start at 0)
+ device_stats['gpu_id'] = index
+ # GPU name
+ device_stats['name'] = get_device_name(device_handle)
+ # Memory consumption in % (not available on all GPU)
+ device_stats['mem'] = get_mem(device_handle)
+ # Processor consumption in %
+ device_stats['proc'] = get_proc(device_handle)
+ # Processor temperature in °C
+ device_stats['temperature'] = get_temperature(device_handle)
+ # Fan speed in %
+ device_stats['fan_speed'] = get_fan_speed(device_handle)
+ stats.append(device_stats)
+
+ return stats
+
+ def exit(self):
+ """Overwrite the exit method to close the GPU API."""
+ if self.nvml_ready:
+ try:
+ pynvml.nvmlShutdown()
+ except Exception as e:
+ logger.debug("pynvml failed to shutdown correctly ({})".format(e))
+
+ # Call the father exit method
+ super(PluginModel, self).exit()
+
+
+def get_device_handles():
+ """Get a list of NVML device handles, one per device.
+
+ Can throw NVMLError.
+ """
+ return [pynvml.nvmlDeviceGetHandleByIndex(i) for i in range(pynvml.nvmlDeviceGetCount())]
+
+
+def get_device_name(device_handle):
+ """Get GPU device name."""
+ try:
+ return nativestr(pynvml.nvmlDeviceGetName(device_handle))
+ except pynvml.NVMLError:
+ return "NVIDIA"
+
+
+def get_mem(device_handle):
+ """Get GPU device memory consumption in percent."""
+ try:
+ memory_info = pynvml.nvmlDeviceGetMemoryInfo(device_handle)
+ return memory_info.used * 100.0 / memory_info.total
+ except pynvml.NVMLError:
+ return None
+
+
+def get_proc(device_handle):
+ """Get GPU device CPU consumption in percent."""
+ try:
+ return pynvml.nvmlDeviceGetUtilizationRates(device_handle).gpu
+ except pynvml.NVMLError:
+ return None
+
+
+def get_temperature(device_handle):
+ """Get GPU device CPU temperature in Celsius."""
+ try:
+ return pynvml.nvmlDeviceGetTemperature(device_handle, pynvml.NVML_TEMPERATURE_GPU)
+ except pynvml.NVMLError:
+ return None
+
+
+def get_fan_speed(device_handle):
+ """Get GPU device fan speed in percent."""
+ try:
+ return pynvml.nvmlDeviceGetFanSpeed(device_handle)
+ except pynvml.NVMLError:
+ return None