1 files changed, 346 insertions, 0 deletions
diff --git a/glances/plugins/gpu/__init__.py b/glances/plugins/gpu/__init__.py
index e69de29b..3d3a73cf 100644
--- a/glances/plugins/gpu/__init__.py
+++ b/glances/plugins/gpu/__init__.py
@@ -0,0 +1,346 @@
+# -*- coding: utf-8 -*-
+#
+# This file is part of Glances.
+#
+# Copyright (C) 2020 Kirby Banman <kirby.banman@gmail.com>
+#
+# SPDX-License-Identifier: LGPL-3.0-only
+#
+
+"""GPU plugin (limited to NVIDIA chipsets)."""
+
+from glances.globals import nativestr, to_fahrenheit
+from glances.logger import logger
+from glances.plugins.plugin.model import GlancesPluginModel
+
+# In Glances 3.1.4 or higher, we use the py3nvml lib (see issue #1523)
+try:
+    import py3nvml.py3nvml as pynvml
+except Exception as e:
+    import_error_tag = True
+    # Display debug message if import KeyError
+    logger.warning("Missing Python Lib ({}), Nvidia GPU plugin is disabled".format(e))
+else:
+    import_error_tag = False
+
+# Define the history items list
+# All items in this list will be historised if the --enable-history tag is set
+items_history_list = [
+    {'name': 'proc', 'description': 'GPU processor', 'y_unit': '%'},
+    {'name': 'mem', 'description': 'Memory consumption', 'y_unit': '%'},
+]
+
+
+class PluginModel(GlancesPluginModel):
+    """Glances GPU plugin (limited to NVIDIA chipsets).
+
+    stats is a list of dictionaries with one entry per GPU
+    """
+
+    def __init__(self, args=None, config=None):
+        """Init the plugin."""
+        super(PluginModel, self).__init__(
+            args=args, config=config, items_history_list=items_history_list, stats_init_value=[]
+        )
+
+        # Init the Nvidia API
+        self.init_nvidia()
+
+        # We want to display the stat in the curse interface
+        self.display_curse = True
+
+    def init_nvidia(self):
+        """Init the NVIDIA API."""
+        if import_error_tag:
+            self.nvml_ready = False
+
+        try:
+            pynvml.nvmlInit()
+            self.device_handles = get_device_handles()
+            self.nvml_ready = True
+        except Exception:
+            logger.debug("pynvml could not be initialized.")
+            self.nvml_ready = False
+
+        return self.nvml_ready
+
+    def get_key(self):
+        """Return the key of the list."""
+        return 'gpu_id'
+
+    @GlancesPluginModel._check_decorator
+    @GlancesPluginModel._log_result_decorator
+    def update(self):
+        """Update the GPU stats."""
+        # Init new stats
+        stats = self.get_init_value()
+
+        if not self.nvml_ready:
+            # !!!
+            # Uncomment to test on computer without GPU
+            # One GPU sample:
+            # self.stats = [
+            #     {
+            #         "key": "gpu_id",
+            #         "gpu_id": 0,
+            #         "name": "Fake GeForce GTX",
+            #         "mem": 5.792331695556641,
+            #         "proc": 4,
+            #         "temperature": 26,
+            #         "fan_speed": 30
+            #     }
+            # ]
+            # Two GPU sample:
+            # self.stats = [
+            #     {
+            #         "key": "gpu_id",
+            #         "gpu_id": 0,
+            #         "name": "Fake GeForce GTX1",
+            #         "mem": 5.792331695556641,
+            #         "proc": 4,
+            #         "temperature": 26,
+            #         "fan_speed": 30
+            #     },
+            #     {
+            #         "key": "gpu_id",
+            #         "gpu_id": 1,
+            #         "name": "Fake GeForce GTX2",
+            #         "mem": 15,
+            #         "proc": 8,
+            #         "temperature": 65,
+            #         "fan_speed": 75
+            #     }
+            # ]
+            return self.stats
+
+        if self.input_method == 'local':
+            stats = self.get_device_stats()
+        elif self.input_method == 'snmp':
+            # not available
+            pass
+
+        # Update the stats
+        self.stats = stats
+
+        return self.stats
+
+    def update_views(self):
+        """Update stats views."""
+        # Call the father's method
+        super(PluginModel, self).update_views()
+
+        # Add specifics information
+        # Alert
+        for i in self.stats:
+            # Init the views for the current GPU
+            self.views[i[self.get_key()]] = {'proc': {}, 'mem': {}, 'temperature': {}}
+            # Processor alert
+            if 'proc' in i:
+                alert = self.get_alert(i['proc'], header='proc')
+                self.views[i[self.get_key()]]['proc']['decoration'] = alert
+            # Memory alert
+            if 'mem' in i:
+                alert = self.get_alert(i['mem'], header='mem')
+                self.views[i[self.get_key()]]['mem']['decoration'] = alert
+            # Temperature alert
+            if 'temperature' in i:
+                alert = self.get_alert(i['temperature'], header='temperature')
+                self.views[i[self.get_key()]]['temperature']['decoration'] = alert
+
+        return True
+
+    def msg_curse(self, args=None, max_width=None):
+        """Return the dict to display in the curse interface."""
+        # Init the return message
+        ret = []
+
+        # Only process if stats exist, not empty (issue #871) and plugin not disabled
+        if not self.stats or (self.stats == []) or self.is_disabled():
+            return ret
+
+        # Check if all GPU have the same name
+        same_name = all(s['name'] == self.stats[0]['name'] for s in self.stats)
+
+        # gpu_stats contain the first GPU in the list
+        gpu_stats = self.stats[0]
+
+        # Header
+        header = ''
+        if len(self.stats) > 1:
+            header += '{} '.format(len(self.stats))
+        if same_name:
+            header += '{} {}'.format('GPU', gpu_stats['name'])
+        else:
+            header += '{}'.format('GPU')
+        msg = header[:17]
+        ret.append(self.curse_add_line(msg, "TITLE"))
+
+        # Build the string message
+        if len(self.stats) == 1 or args.meangpu:
+            # GPU stat summary or mono GPU
+            # New line
+            ret.append(self.curse_new_line())
+            # GPU PROC
+            try:
+                mean_proc = sum(s['proc'] for s in self.stats if s is not None) / len(self.stats)
+            except TypeError:
+                mean_proc_msg = '{:>4}'.format('N/A')
+            else:
+                mean_proc_msg = '{:>3.0f}%'.format(mean_proc)
+            if len(self.stats) > 1:
+                msg = '{:13}'.format('proc mean:')
+            else:
+                msg = '{:13}'.format('proc:')
+            ret.append(self.curse_add_line(msg))
+            ret.append(
+                self.curse_add_line(
+                    mean_proc_msg, self.get_views(item=gpu_stats[self.get_key()], key='proc', option='decoration')
+                )
+            )
+            # New line
+            ret.append(self.curse_new_line())
+            # GPU MEM
+            try:
+                mean_mem = sum(s['mem'] for s in self.stats if s is not None) / len(self.stats)
+            except TypeError:
+                mean_mem_msg = '{:>4}'.format('N/A')
+            else:
+                mean_mem_msg = '{:>3.0f}%'.format(mean_mem)
+            if len(self.stats) > 1:
+                msg = '{:13}'.format('mem mean:')
+            else:
+                msg = '{:13}'.format('mem:')
+            ret.append(self.curse_add_line(msg))
+            ret.append(
+                self.curse_add_line(
+                    mean_mem_msg, self.get_views(item=gpu_stats[self.get_key()], key='mem', option='decoration')
+                )
+            )
+            # New line
+            ret.append(self.curse_new_line())
+            # GPU TEMPERATURE
+            try:
+                mean_temperature = sum(s['temperature'] for s in self.stats if s is not None) / len(self.stats)
+            except TypeError:
+                mean_temperature_msg = '{:>4}'.format('N/A')
+            else:
+                unit = 'C'
+                if args.fahrenheit:
+                    mean_temperature = to_fahrenheit(mean_temperature)
+                    unit = 'F'
+                mean_temperature_msg = '{:>3.0f}{}'.format(mean_temperature, unit)
+            if len(self.stats) > 1:
+                msg = '{:13}'.format('temp mean:')
+            else:
+                msg = '{:13}'.format('temperature:')
+            ret.append(self.curse_add_line(msg))
+            ret.append(
+                self.curse_add_line(
+                    mean_temperature_msg,
+                    self.get_views(item=gpu_stats[self.get_key()], key='temperature', option='decoration'),
+                )
+            )
+        else:
+            # Multi GPU
+            # Temperature is not displayed in this mode...
+            for gpu_stats in self.stats:
+                # New line
+                ret.append(self.curse_new_line())
+                # GPU ID + PROC + MEM + TEMPERATURE
+                id_msg = '{}'.format(gpu_stats['gpu_id'])
+                try:
+                    proc_msg = '{:>3.0f}%'.format(gpu_stats['proc'])
+                except (ValueError, TypeError):
+                    proc_msg = '{:>4}'.format('N/A')
+                try:
+                    mem_msg = '{:>3.0f}%'.format(gpu_stats['mem'])
+                except (ValueError, TypeError):
+                    mem_msg = '{:>4}'.format('N/A')
+                msg = '{}: {} mem: {}'.format(id_msg, proc_msg, mem_msg)
+                ret.append(self.curse_add_line(msg))
+
+        return ret
+
+    def get_device_stats(self):
+        """Get GPU stats."""
+        stats = []
+
+        for index, device_handle in enumerate(self.device_handles):
+            device_stats = dict()
+            # Dictionary key is the GPU_ID
+            device_stats['key'] = self.get_key()
+            # GPU id (for multiple GPU, start at 0)
+            device_stats['gpu_id'] = index
+            # GPU name
+            device_stats['name'] = get_device_name(device_handle)
+            # Memory consumption in % (not available on all GPU)
+            device_stats['mem'] = get_mem(device_handle)
+            # Processor consumption in %
+            device_stats['proc'] = get_proc(device_handle)
+            # Processor temperature in °C
+            device_stats['temperature'] = get_temperature(device_handle)
+            # Fan speed in %
+            device_stats['fan_speed'] = get_fan_speed(device_handle)
+            stats.append(device_stats)
+
+        return stats
+
+    def exit(self):
+        """Overwrite the exit method to close the GPU API."""
+        if self.nvml_ready:
+            try:
+                pynvml.nvmlShutdown()
+            except Exception as e:
+                logger.debug("pynvml failed to shutdown correctly ({})".format(e))
+
+        # Call the father exit method
+        super(PluginModel, self).exit()
+
+
+def get_device_handles():
+    """Get a list of NVML device handles, one per device.
+
+    Can throw NVMLError.
+    """
+    return [pynvml.nvmlDeviceGetHandleByIndex(i) for i in range(pynvml.nvmlDeviceGetCount())]
+
+
+def get_device_name(device_handle):
+    """Get GPU device name."""
+    try:
+        return nativestr(pynvml.nvmlDeviceGetName(device_handle))
+    except pynvml.NVMLError:
+        return "NVIDIA"
+
+
+def get_mem(device_handle):
+    """Get GPU device memory consumption in percent."""
+    try:
+        memory_info = pynvml.nvmlDeviceGetMemoryInfo(device_handle)
+        return memory_info.used * 100.0 / memory_info.total
+    except pynvml.NVMLError:
+        return None
+
+
+def get_proc(device_handle):
+    """Get GPU device CPU consumption in percent."""
+    try:
+        return pynvml.nvmlDeviceGetUtilizationRates(device_handle).gpu
+    except pynvml.NVMLError:
+        return None
+
+
+def get_temperature(device_handle):
+    """Get GPU device CPU temperature in Celsius."""
+    try:
+        return pynvml.nvmlDeviceGetTemperature(device_handle, pynvml.NVML_TEMPERATURE_GPU)
+    except pynvml.NVMLError:
+        return None
+
+
+def get_fan_speed(device_handle):
+    """Get GPU device fan speed in percent."""
+    try:
+        return pynvml.nvmlDeviceGetFanSpeed(device_handle)
+    except pynvml.NVMLError:
+        return None