diff options
-rw-r--r-- | AppImage/README.md | 8 | ||||
-rwxr-xr-x | AppImage/make_appimage.sh | 25 | ||||
-rwxr-xr-x | AppImage/make_appimage_ubuntu_1804_docker.sh | 45 | ||||
-rw-r--r-- | README.markdown | 10 | ||||
-rw-r--r-- | include/nvtop/extract_gpuinfo_common.h | 4 | ||||
-rw-r--r-- | src/extract_gpuinfo_amdgpu.c | 12 | ||||
-rw-r--r-- | src/extract_gpuinfo_apple.m | 1 | ||||
-rw-r--r-- | src/extract_gpuinfo_ascend.c | 2 | ||||
-rw-r--r-- | src/extract_gpuinfo_intel.c | 2 | ||||
-rw-r--r-- | src/extract_gpuinfo_mali_common.c | 1 | ||||
-rw-r--r-- | src/extract_gpuinfo_msm.c | 2 | ||||
-rw-r--r-- | src/extract_gpuinfo_nvidia.c | 341 | ||||
-rw-r--r-- | src/extract_gpuinfo_panfrost.c | 1 | ||||
-rw-r--r-- | src/extract_gpuinfo_panthor.c | 1 | ||||
-rw-r--r-- | src/interface.c | 20 |
15 files changed, 333 insertions, 142 deletions
diff --git a/AppImage/README.md b/AppImage/README.md new file mode 100644 index 0000000..7d2a2b1 --- /dev/null +++ b/AppImage/README.md @@ -0,0 +1,8 @@ +# Build the AppImage + +```bash +podman pull ubuntu:18.04 +podman run --interactive --tty --rm --volume $PWD:/nvtop ubuntu:18.04 +cd nvtop +./AppImage/make_appimage.sh +```
\ No newline at end of file diff --git a/AppImage/make_appimage.sh b/AppImage/make_appimage.sh index a57bad6..d1ab74e 100755 --- a/AppImage/make_appimage.sh +++ b/AppImage/make_appimage.sh @@ -1,28 +1,43 @@ #!/usr/bin/env bash +install_deps() { + apt-get update + apt-get install -y gcc g++ libncurses5-dev libncursesw5-dev libdrm-dev wget file libudev-dev ninja-build make python3-venv +} + configure_nvtop() { - cmake -S .. -B nvtop_build -DNVIDIA_SUPPORT=ON -DAMDGPU_SUPPORT=ON -DCMAKE_INSTALL_PREFIX=/usr + cmake -B build -S . -DCMAKE_BUILD_TYPE=Release -DUSE_LIBUDEV_OVER_LIBSYSTEMD=ON -DCMAKE_INSTALL_PREFIX=/usr } build_nvtop() { - cmake --build nvtop_build + cmake --build build } install_nvtop_AppDir() { - DESTDIR=../AppDir cmake --build nvtop_build --target install + DESTDIR=$PWD/AppDir cmake --build build --target install } get_linuxdeploy() { wget -nc https://github.com/linuxdeploy/linuxdeploy/releases/download/continuous/linuxdeploy-x86_64.AppImage chmod u+x linuxdeploy-x86_64.AppImage + ./linuxdeploy-x86_64.AppImage --appimage-extract +} + +get_cmake() { + python3 -m venv .venv + source .venv/bin/activate + pip install --upgrade pip + pip install cmake } create_AppImage() { + install_deps + get_cmake configure_nvtop build_nvtop install_nvtop_AppDir get_linuxdeploy - ./linuxdeploy-x86_64.AppImage --appdir AppDir --output appimage + ./squashfs-root/AppRun --appdir AppDir --output appimage --exclude-library="*udev*" --desktop-file AppDir/usr/share/applications/nvtop.desktop --icon-file AppDir/usr/share/icons/nvtop.svg } -create_AppImage
\ No newline at end of file +create_AppImage diff --git a/AppImage/make_appimage_ubuntu_1804_docker.sh b/AppImage/make_appimage_ubuntu_1804_docker.sh deleted file mode 100755 index 63afa90..0000000 --- a/AppImage/make_appimage_ubuntu_1804_docker.sh +++ /dev/null @@ -1,45 +0,0 @@ -#!/usr/bin/env bash - -install_deps() { - apt-get update - apt-get install -y gcc g++ libncurses5-dev libncursesw5-dev libdrm-dev wget file libudev-dev -} - -configure_nvtop() { - cmake ../.. -DNVIDIA_SUPPORT=ON -DAMDGPU_SUPPORT=ON -DINTEL_SUPPORT=ON -DUSE_LIBUDEV_OVER_LIBSYSTEMD=ON -DCMAKE_INSTALL_PREFIX=/usr -} - -build_nvtop() { - cmake --build . -} - -install_nvtop_AppDir() { - DESTDIR=../AppDir cmake --build . --target install -} - -get_linuxdeploy() { - wget -nc https://github.com/linuxdeploy/linuxdeploy/releases/download/continuous/linuxdeploy-x86_64.AppImage - chmod u+x linuxdeploy-x86_64.AppImage - ./linuxdeploy-x86_64.AppImage --appimage-extract -} - -get_cmake() { - wget -nc https://github.com/Kitware/CMake/releases/download/v3.18.0/cmake-3.18.0.tar.gz - tar zxf cmake-3.18.0.tar.gz - ./cmake-3.18.0/bootstrap --prefix=/usr && make && make install -} - -create_AppImage() { - install_deps - get_cmake - mkdir nvtop_build - cd nvtop_build - configure_nvtop - build_nvtop - install_nvtop_AppDir - cd .. - get_linuxdeploy - ./squashfs-root/AppRun --appdir AppDir --output appimage --exclude-library="*udev*" -} - -create_AppImage diff --git a/README.markdown b/README.markdown index 610cc33..4ecc6f7 100644 --- a/README.markdown +++ b/README.markdown @@ -4,9 +4,13 @@ NVTOP What is NVTOP? -------------- -NVTOP stands for Neat Videocard TOP, a (h)top like task monitor for AMD, Intel -and NVIDIA GPUs. It can handle multiple GPUs and print information -about them in a htop-familiar way. +NVTOP stands for Neat Videocard TOP, a (h)top like task monitor for GPUs and +accelerators. It can handle multiple GPUs and print information about them in a +htop-familiar way. + +Currently supported vendors are AMD (Linux amdgpu driver), Apple (limited M1 & +M2 support), Huawei (Ascend), Intel (Linux i915 driver), NVIDIA (Linux +proprietary divers), Qualcomm Adreno (Linux MSM driver). Because a picture is worth a thousand words: diff --git a/include/nvtop/extract_gpuinfo_common.h b/include/nvtop/extract_gpuinfo_common.h index 9b89d9c..917f33d 100644 --- a/include/nvtop/extract_gpuinfo_common.h +++ b/include/nvtop/extract_gpuinfo_common.h @@ -76,6 +76,7 @@ struct gpuinfo_static_info { unsigned l2cache_size; unsigned n_exec_engines; bool integrated_graphics; + bool encode_decode_shared; unsigned char valid[(gpuinfo_static_info_count + CHAR_BIT - 1) / CHAR_BIT]; }; @@ -102,6 +103,7 @@ enum gpuinfo_dynamic_info_valid { gpuinfo_gpu_temp_valid, gpuinfo_power_draw_valid, gpuinfo_power_draw_max_valid, + gpuinfo_multi_instance_mode_valid, gpuinfo_dynamic_info_count, }; @@ -125,7 +127,7 @@ struct gpuinfo_dynamic_info { unsigned int gpu_temp; // GPU temperature °celsius unsigned int power_draw; // Power usage in milliwatts unsigned int power_draw_max; // Max power usage in milliwatts - bool encode_decode_shared; // True if encode and decode is shared (Intel) + bool multi_instance_mode; // True if the GPU is in multi-instance mode unsigned char valid[(gpuinfo_dynamic_info_count + CHAR_BIT - 1) / CHAR_BIT]; }; diff --git a/src/extract_gpuinfo_amdgpu.c b/src/extract_gpuinfo_amdgpu.c index e166cc9..39b20b9 100644 --- a/src/extract_gpuinfo_amdgpu.c +++ b/src/extract_gpuinfo_amdgpu.c @@ -68,6 +68,7 @@ static typeof(drmDropMaster) *_drmDropMaster; static typeof(amdgpu_device_initialize) *_amdgpu_device_initialize; static typeof(amdgpu_device_deinitialize) *_amdgpu_device_deinitialize; static typeof(amdgpu_get_marketing_name) *_amdgpu_get_marketing_name; +static typeof(amdgpu_query_hw_ip_info) *_amdgpu_query_hw_ip_info; static typeof(amdgpu_query_gpu_info) *_amdgpu_query_gpu_info; static typeof(amdgpu_query_info) *_amdgpu_query_info; static typeof(amdgpu_query_sensor_info) *_amdgpu_query_sensor_info; @@ -218,6 +219,7 @@ static bool gpuinfo_amdgpu_init(void) { _amdgpu_device_initialize = dlsym(libdrm_amdgpu_handle, "amdgpu_device_initialize"); _amdgpu_device_deinitialize = dlsym(libdrm_amdgpu_handle, "amdgpu_device_deinitialize"); _amdgpu_get_marketing_name = dlsym(libdrm_amdgpu_handle, "amdgpu_get_marketing_name"); + _amdgpu_query_hw_ip_info = dlsym(libdrm_amdgpu_handle, "amdgpu_query_hw_ip_info"); _amdgpu_query_info = dlsym(libdrm_amdgpu_handle, "amdgpu_query_info"); _amdgpu_query_gpu_info = dlsym(libdrm_amdgpu_handle, "amdgpu_query_gpu_info"); _amdgpu_query_sensor_info = dlsym(libdrm_amdgpu_handle, "amdgpu_query_sensor_info"); @@ -512,6 +514,7 @@ static void gpuinfo_amdgpu_populate_static_info(struct gpu_info *_gpu_info) { const char *name = NULL; static_info->integrated_graphics = false; + static_info->encode_decode_shared = false; RESET_ALL(static_info->valid); if (libdrm_amdgpu_handle && _amdgpu_get_marketing_name) @@ -635,6 +638,14 @@ static void gpuinfo_amdgpu_populate_static_info(struct gpu_info *_gpu_info) { if (info_query_success && (info.ids_flags & AMDGPU_IDS_FLAGS_FUSION)) { static_info->integrated_graphics = true; } + + // Checking if Encode and Decode are unified:AMDGPU_INFO_HW_IP_INFO + if (_amdgpu_query_hw_ip_info) { + struct drm_amdgpu_info_hw_ip vcn_ip_info; + if (_amdgpu_query_hw_ip_info(gpu_info->amdgpu_device, AMDGPU_HW_IP_VCN_ENC, 0, &vcn_ip_info) == 0) { + static_info->encode_decode_shared = vcn_ip_info.hw_ip_version_major >= 4; + } + } } static void gpuinfo_amdgpu_refresh_dynamic_info(struct gpu_info *_gpu_info) { @@ -645,7 +656,6 @@ static void gpuinfo_amdgpu_refresh_dynamic_info(struct gpu_info *_gpu_info) { uint32_t out32; RESET_ALL(dynamic_info->valid); - dynamic_info->encode_decode_shared = false; if (libdrm_amdgpu_handle && _amdgpu_query_gpu_info) info_query_success = !_amdgpu_query_gpu_info(gpu_info->amdgpu_device, &info); diff --git a/src/extract_gpuinfo_apple.m b/src/extract_gpuinfo_apple.m index 84d8f32..8e14f7a 100644 --- a/src/extract_gpuinfo_apple.m +++ b/src/extract_gpuinfo_apple.m @@ -115,6 +115,7 @@ static void gpuinfo_apple_populate_static_info(struct gpu_info *_gpu_info) { SET_VALID(gpuinfo_device_name_valid, static_info->valid); static_info->integrated_graphics = [gpu_info->device location] == MTLDeviceLocationBuiltIn; + static_info->encode_decode_shared = true; } static void gpuinfo_apple_refresh_dynamic_info(struct gpu_info *_gpu_info) { diff --git a/src/extract_gpuinfo_ascend.c b/src/extract_gpuinfo_ascend.c index 58a529c..be8dc99 100644 --- a/src/extract_gpuinfo_ascend.c +++ b/src/extract_gpuinfo_ascend.c @@ -147,6 +147,7 @@ static void gpuinfo_ascend_populate_static_info(struct gpu_info *_gpu_info) { struct gpu_info_ascend *gpu_info = container_of(_gpu_info, struct gpu_info_ascend, base); struct gpuinfo_static_info *static_info = &gpu_info->base.static_info; static_info->integrated_graphics = false; + static_info->encode_decode_shared = true; RESET_ALL(static_info->valid); int card_id, device_id; @@ -168,7 +169,6 @@ static void gpuinfo_ascend_refresh_dynamic_info(struct gpu_info *_gpu_info) { struct gpu_info_ascend *gpu_info = container_of(_gpu_info, struct gpu_info_ascend, base); struct gpuinfo_dynamic_info *dynamic_info = &gpu_info->base.dynamic_info; RESET_ALL(dynamic_info->valid); - dynamic_info->encode_decode_shared = false; int card_id, device_id; _decode_card_device_id_from_pdev(_gpu_info->pdev, &card_id, &device_id); diff --git a/src/extract_gpuinfo_intel.c b/src/extract_gpuinfo_intel.c index f4dabc0..a120d9c 100644 --- a/src/extract_gpuinfo_intel.c +++ b/src/extract_gpuinfo_intel.c @@ -315,6 +315,7 @@ void gpuinfo_intel_populate_static_info(struct gpu_info *_gpu_info) { const char *dev_name; static_info->integrated_graphics = false; + static_info->encode_decode_shared = true; RESET_ALL(static_info->valid); if (nvtop_device_get_property_value(gpu_info->driver_device, "ID_MODEL_FROM_DATABASE", &dev_name) >= 0) { @@ -348,7 +349,6 @@ void gpuinfo_intel_refresh_dynamic_info(struct gpu_info *_gpu_info) { struct gpuinfo_dynamic_info *dynamic_info = &gpu_info->base.dynamic_info; RESET_ALL(dynamic_info->valid); - dynamic_info->encode_decode_shared = true; nvtop_device *card_dev_copy; const char *syspath; diff --git a/src/extract_gpuinfo_mali_common.c b/src/extract_gpuinfo_mali_common.c index 9521ad7..a1c5600 100644 --- a/src/extract_gpuinfo_mali_common.c +++ b/src/extract_gpuinfo_mali_common.c @@ -318,7 +318,6 @@ void mali_common_refresh_dynamic_info(struct gpuinfo_dynamic_info *dynamic_info, const char *meminfo_available) { RESET_ALL(dynamic_info->valid); - dynamic_info->encode_decode_shared = true; rewind(state->meminfo_file); fflush(state->meminfo_file); diff --git a/src/extract_gpuinfo_msm.c b/src/extract_gpuinfo_msm.c index 6b3bc97..f05b7fe 100644 --- a/src/extract_gpuinfo_msm.c +++ b/src/extract_gpuinfo_msm.c @@ -466,6 +466,7 @@ void gpuinfo_msm_populate_static_info(struct gpu_info *_gpu_info) { struct gpuinfo_static_info *static_info = &gpu_info->base.static_info; static_info->integrated_graphics = true; + static_info->encode_decode_shared = true; RESET_ALL(static_info->valid); uint64_t gpuid; @@ -496,7 +497,6 @@ void gpuinfo_msm_refresh_dynamic_info(struct gpu_info *_gpu_info) { struct gpuinfo_dynamic_info *dynamic_info = &gpu_info->base.dynamic_info; RESET_ALL(dynamic_info->valid); - dynamic_info->encode_decode_shared = true; // GPU clock uint64_t clock_val; diff --git a/src/extract_gpuinfo_nvidia.c b/src/extract_gpuinfo_nvidia.c index f0356ed..541318e 100644 --- a/src/extract_gpuinfo_nvidia.c +++ b/src/extract_gpuinfo_nvidia.c @@ -1,6 +1,6 @@ /* * - * Copyright (C) 2021 Maxime Schmitt <maxime.schmitt91@gmail.com> + * Copyright (C) 2021-2024 Maxime Schmitt <maxime.schmitt91@gmail.com> * * This file is part of Nvtop. * @@ -71,6 +71,11 @@ static nvmlReturn_t (*nvmlDeviceGetMaxPcieLinkWidth)(nvmlDevice_t device, unsign typedef enum { NVML_TEMPERATURE_THRESHOLD_SHUTDOWN = 0, NVML_TEMPERATURE_THRESHOLD_SLOWDOWN = 1, + NVML_TEMPERATURE_THRESHOLD_MEM_MAX = 2, + NVML_TEMPERATURE_THRESHOLD_GPU_MAX = 3, + NVML_TEMPERATURE_THRESHOLD_ACOUSTIC_MIN = 4, + NVML_TEMPERATURE_THRESHOLD_ACOUSTIC_CURR = 5, + NVML_TEMPERATURE_THRESHOLD_ACOUSTIC_MAX = 6, } nvmlTemperatureThresholds_t; static nvmlReturn_t (*nvmlDeviceGetTemperatureThreshold)(nvmlDevice_t device, nvmlTemperatureThresholds_t thresholdType, @@ -100,9 +105,18 @@ typedef struct { unsigned long long total; unsigned long long free; unsigned long long used; -} nvmlMemory_t; +} nvmlMemory_v1_t; -static nvmlReturn_t (*nvmlDeviceGetMemoryInfo)(nvmlDevice_t device, nvmlMemory_t *memory); +typedef struct { + unsigned int version; + unsigned long long total; + unsigned long long reserved; + unsigned long long free; + unsigned long long used; +} nvmlMemory_v2_t; + +static nvmlReturn_t (*nvmlDeviceGetMemoryInfo)(nvmlDevice_t device, nvmlMemory_v1_t *memory); +static nvmlReturn_t (*nvmlDeviceGetMemoryInfo_v2)(nvmlDevice_t device, nvmlMemory_v2_t *memory); static nvmlReturn_t (*nvmlDeviceGetCurrPcieLinkGeneration)(nvmlDevice_t device, unsigned int *currLinkGen); @@ -140,15 +154,56 @@ static nvmlReturn_t (*nvmlDeviceGetDecoderUtilization)(nvmlDevice_t device, unsi typedef struct { unsigned int pid; unsigned long long usedGpuMemory; - // unsigned int gpuInstanceId; // not supported by older NVIDIA drivers - // unsigned int computeInstanceId; // not supported by older NVIDIA drivers -} nvmlProcessInfo_t; +} nvmlProcessInfo_v1_t; -static nvmlReturn_t (*nvmlDeviceGetGraphicsRunningProcesses)(nvmlDevice_t device, unsigned int *infoCount, - nvmlProcessInfo_t *infos); +typedef struct { + unsigned int pid; + unsigned long long usedGpuMemory; + unsigned int gpuInstanceId; + unsigned int computeInstanceId; +} nvmlProcessInfo_v2_t; -static nvmlReturn_t (*nvmlDeviceGetComputeRunningProcesses)(nvmlDevice_t device, unsigned int *infoCount, - nvmlProcessInfo_t *infos); +typedef struct { + unsigned int pid; + unsigned long long usedGpuMemory; + unsigned int gpuInstanceId; + unsigned int computeInstanceId; + // This is present in https://github.com/NVIDIA/DCGM/blob/master/sdk/nvidia/nvml/nvml.h#L294 but not the latest driver nvml.h + // unsigned long long usedGpuCcProtectedMemory; +} nvmlProcessInfo_v3_t; + +static nvmlReturn_t (*nvmlDeviceGetGraphicsRunningProcesses_v1)(nvmlDevice_t device, unsigned int *infoCount, + nvmlProcessInfo_v1_t *infos); +static nvmlReturn_t (*nvmlDeviceGetGraphicsRunningProcesses_v2)(nvmlDevice_t device, unsigned int *infoCount, + nvmlProcessInfo_v2_t *infos); +static nvmlReturn_t (*nvmlDeviceGetGraphicsRunningProcesses_v3)(nvmlDevice_t device, unsigned int *infoCount, + nvmlProcessInfo_v3_t *infos); + +static nvmlReturn_t (*nvmlDeviceGetComputeRunningProcesses_v1)(nvmlDevice_t device, unsigned int *infoCount, + nvmlProcessInfo_v1_t *infos); +static nvmlReturn_t (*nvmlDeviceGetComputeRunningProcesses_v2)(nvmlDevice_t device, unsigned int *infoCount, + nvmlProcessInfo_v2_t *infos); +static nvmlReturn_t (*nvmlDeviceGetComputeRunningProcesses_v3)(nvmlDevice_t device, unsigned int *infoCount, + nvmlProcessInfo_v3_t *infos); + +static nvmlReturn_t (*nvmlDeviceGetMPSComputeRunningProcesses_v1)(nvmlDevice_t device, unsigned int *infoCount, + nvmlProcessInfo_v1_t *infos); +static nvmlReturn_t (*nvmlDeviceGetMPSComputeRunningProcesses_v2)(nvmlDevice_t device, unsigned int *infoCount, + nvmlProcessInfo_v2_t *infos); +static nvmlReturn_t (*nvmlDeviceGetMPSComputeRunningProcesses_v3)(nvmlDevice_t device, unsigned int *infoCount, + nvmlProcessInfo_v3_t *infos); + +// Common interface passing void* +static nvmlReturn_t (*nvmlDeviceGetGraphicsRunningProcesses[4])(nvmlDevice_t device, unsigned int *infoCount, + void *infos); +static nvmlReturn_t (*nvmlDeviceGetComputeRunningProcesses[4])(nvmlDevice_t device, unsigned int *infoCount, + void *infos); +static nvmlReturn_t (*nvmlDeviceGetMPSComputeRunningProcesses[4])(nvmlDevice_t device, unsigned int *infoCount, + void *infos); + +#define NVML_DEVICE_MIG_DISABLE 0x0 +#define NVML_DEVICE_MIG_ENABLE 0x1 +nvmlReturn_t (*nvmlDeviceGetMigMode)(nvmlDevice_t device, unsigned int *currentMode, unsigned int *pendingMode); static void *libnvidia_ml_handle; @@ -177,6 +232,7 @@ struct gpu_info_nvidia { struct list_head allocate_list; nvmlDevice_t gpuhandle; + bool isInMigMode; unsigned long long last_utilization_timestamp; }; @@ -286,8 +342,10 @@ static bool gpuinfo_nvidia_init(void) { if (!nvmlDeviceGetUtilizationRates) goto init_error_clean_exit; + // Get v2 and fallback to v1 + nvmlDeviceGetMemoryInfo_v2 = dlsym(libnvidia_ml_handle, "nvmlDeviceGetMemoryInfo_v2"); nvmlDeviceGetMemoryInfo = dlsym(libnvidia_ml_handle, "nvmlDeviceGetMemoryInfo"); - if (!nvmlDeviceGetMemoryInfo) + if (!nvmlDeviceGetMemoryInfo_v2 && !nvmlDeviceGetMemoryInfo) goto init_error_clean_exit; nvmlDeviceGetCurrPcieLinkGeneration = dlsym(libnvidia_ml_handle, "nvmlDeviceGetCurrPcieLinkGeneration"); @@ -326,16 +384,49 @@ static bool gpuinfo_nvidia_init(void) { if (!nvmlDeviceGetDecoderUtilization) goto init_error_clean_exit; - nvmlDeviceGetGraphicsRunningProcesses = dlsym(libnvidia_ml_handle, "nvmlDeviceGetGraphicsRunningProcesses"); - if (!nvmlDeviceGetGraphicsRunningProcesses) + nvmlDeviceGetGraphicsRunningProcesses_v3 = dlsym(libnvidia_ml_handle, "nvmlDeviceGetGraphicsRunningProcesses_v3"); + nvmlDeviceGetGraphicsRunningProcesses_v2 = dlsym(libnvidia_ml_handle, "nvmlDeviceGetGraphicsRunningProcesses_v2"); + nvmlDeviceGetGraphicsRunningProcesses_v1 = dlsym(libnvidia_ml_handle, "nvmlDeviceGetGraphicsRunningProcesses"); + if (!nvmlDeviceGetGraphicsRunningProcesses_v3 && !nvmlDeviceGetGraphicsRunningProcesses_v2 && + !nvmlDeviceGetGraphicsRunningProcesses_v1) goto init_error_clean_exit; - nvmlDeviceGetComputeRunningProcesses = dlsym(libnvidia_ml_handle, "nvmlDeviceGetComputeRunningProcesses"); - if (!nvmlDeviceGetComputeRunningProcesses) + nvmlDeviceGetGraphicsRunningProcesses[1] = + (nvmlReturn_t(*)(nvmlDevice_t, unsigned int *, void *))nvmlDeviceGetGraphicsRunningProcesses_v1; + nvmlDeviceGetGraphicsRunningProcesses[2] = + (nvmlReturn_t(*)(nvmlDevice_t, unsigned int *, void *))nvmlDeviceGetGraphicsRunningProcesses_v2; + nvmlDeviceGetGraphicsRunningProcesses[3] = + (nvmlReturn_t(*)(nvmlDevice_t, unsigned int *, void *))nvmlDeviceGetGraphicsRunningProcesses_v3; + + nvmlDeviceGetComputeRunningProcesses_v3 = dlsym(libnvidia_ml_handle, "nvmlDeviceGetComputeRunningProcesses_v3"); + nvmlDeviceGetComputeRunningProcesses_v2 = dlsym(libnvidia_ml_handle, "nvmlDeviceGetComputeRunningProcesses_v2"); + nvmlDeviceGetComputeRunningProcesses_v1 = dlsym(libnvidia_ml_handle, "nvmlDeviceGetComputeRunningProcesses"); + if (!nvmlDeviceGetComputeRunningProcesses_v3 && !nvmlDeviceGetComputeRunningProcesses_v2 && + !nvmlDeviceGetComputeRunningProcesses_v1) goto init_error_clean_exit; - // This one might not be available + nvmlDeviceGetComputeRunningProcesses[1] = + (nvmlReturn_t(*)(nvmlDevice_t, unsigned int *, void *))nvmlDeviceGetComputeRunningProcesses_v1; + nvmlDeviceGetComputeRunningProcesses[2] = + (nvmlReturn_t(*)(nvmlDevice_t, unsigned int *, void *))nvmlDeviceGetComputeRunningProcesses_v2; + nvmlDeviceGetComputeRunningProcesses[3] = + (nvmlReturn_t(*)(nvmlDevice_t, unsigned int *, void *))nvmlDeviceGetComputeRunningProcesses_v3; + + // These functions were not available in older NVML libs; don't error if not present + nvmlDeviceGetMPSComputeRunningProcesses_v3 = dlsym(libnvidia_ml_handle, "nvmlDeviceGetMPSComputeRunningProcesses_v3"); + nvmlDeviceGetMPSComputeRunningProcesses_v2 = dlsym(libnvidia_ml_handle, "nvmlDeviceGetMPSComputeRunningProcesses_v2"); + nvmlDeviceGetMPSComputeRunningProcesses_v1 = dlsym(libnvidia_ml_handle, "nvmlDeviceGetMPSComputeRunningProcesses"); + + nvmlDeviceGetMPSComputeRunningProcesses[1] = + (nvmlReturn_t(*)(nvmlDevice_t, unsigned int *, void *))nvmlDeviceGetMPSComputeRunningProcesses_v1; + nvmlDeviceGetMPSComputeRunningProcesses[2] = + (nvmlReturn_t(*)(nvmlDevice_t, unsigned int *, void *))nvmlDeviceGetMPSComputeRunningProcesses_v2; + nvmlDeviceGetMPSComputeRunningProcesses[3] = + (nvmlReturn_t(*)(nvmlDevice_t, unsigned int *, void *))nvmlDeviceGetMPSComputeRunningProcesses_v3; + + // These ones might not be available nvmlDeviceGetProcessUtilization = dlsym(libnvidia_ml_handle, "nvmlDeviceGetProcessUtilization"); + nvmlDeviceGetMigMode = dlsym(libnvidia_ml_handle, "nvmlDeviceGetMigMode"); last_nvml_return_status = nvmlInit(); if (last_nvml_return_status != NVML_SUCCESS) { @@ -420,6 +511,7 @@ static void gpuinfo_nvidia_populate_static_info(struct gpu_info *_gpu_info) { nvmlDevice_t device = gpu_info->gpuhandle; static_info->integrated_graphics = false; + static_info->encode_decode_shared = false; RESET_ALL(static_info->valid); last_nvml_return_status = nvmlDeviceGetName(device, static_info->device_name, MAX_DEVICE_NAME); @@ -457,7 +549,6 @@ static void gpuinfo_nvidia_refresh_dynamic_info(struct gpu_info *_gpu_info) { nvmlClockType_t getMaxClockFrom = NVML_CLOCK_GRAPHICS; RESET_ALL(dynamic_info->valid); - dynamic_info->encode_decode_shared = false; // GPU current speed // Maximum between SM and Graphical @@ -514,13 +605,28 @@ static void gpuinfo_nvidia_refresh_dynamic_info(struct gpu_info *_gpu_info) { SET_VALID(gpuinfo_decoder_rate_valid, dynamic_info->valid); // Device memory info (total,used,free) - nvmlMemory_t memory_info; - last_nvml_return_status = nvmlDeviceGetMemoryInfo(device, &memory_info); - if (last_nvml_return_status == NVML_SUCCESS) { - SET_GPUINFO_DYNAMIC(dynamic_info, total_memory, memory_info.total); - SET_GPUINFO_DYNAMIC(dynamic_info, used_memory, memory_info.used); - SET_GPUINFO_DYNAMIC(dynamic_info, free_memory, memory_info.free); - SET_GPUINFO_DYNAMIC(dynamic_info, mem_util_rate, memory_info.used * 100 / memory_info.total); + bool got_meminfo = false; + if (nvmlDeviceGetMemoryInfo_v2) { + nvmlMemory_v2_t memory_info; + memory_info.version = 2; + last_nvml_return_status = nvmlDeviceGetMemoryInfo_v2(device, &memory_info); + if (last_nvml_return_status == NVML_SUCCESS) { + got_meminfo = true; + SET_GPUINFO_DYNAMIC(dynamic_info, total_memory, memory_info.total); + SET_GPUINFO_DYNAMIC(dynamic_info, used_memory, memory_info.used); + SET_GPUINFO_DYNAMIC(dynamic_info, free_memory, memory_info.free); + SET_GPUINFO_DYNAMIC(dynamic_info, mem_util_rate, memory_info.used * 100 / memory_info.total); + } + } + if (!got_meminfo && nvmlDeviceGetMemoryInfo) { + nvmlMemory_v1_t memory_info; + last_nvml_return_status = nvmlDeviceGetMemoryInfo(device, &memory_info); + if (last_nvml_return_status == NVML_SUCCESS) { + SET_GPUINFO_DYNAMIC(dynamic_info, total_memory, memory_info.total); + SET_GPUINFO_DYNAMIC(dynamic_info, used_memory, memory_info.used); + SET_GPUINFO_DYNAMIC(dynamic_info, free_memory, memory_info.free); + SET_GPUINFO_DYNAMIC(dynamic_info, mem_util_rate, memory_info.used * 100 / memory_info.total); + } } // Pcie generation used by the device @@ -562,6 +668,15 @@ static void gpuinfo_nvidia_refresh_dynamic_info(struct gpu_info *_gpu_info) { last_nvml_return_status = nvmlDeviceGetEnforcedPowerLimit(device, &dynamic_info->power_draw_max); if (last_nvml_return_status == NVML_SUCCESS) SET_VALID(gpuinfo_power_draw_max_valid, dynamic_info->valid); + + // MIG mode + if (nvmlDeviceGetMigMode) { + unsigned currentMode, pendingMode; + last_nvml_return_status = nvmlDeviceGetMigMode(device, ¤tMode, &pendingMode); + if (last_nvml_return_status == NVML_SUCCESS) { + SET_GPUINFO_DYNAMIC(dynamic_info, multi_instance_mode, currentMode == NVML_DEVICE_MIG_ENABLE); + } + } } static void gpuinfo_nvidia_get_process_utilization(struct gpu_info_nvidia *gpu_info, unsigned num_processes_recovered, @@ -609,69 +724,143 @@ static void gpuinfo_nvidia_get_process_utilization(struct gpu_info_nvidia *gpu_i gpu_info->last_utilization_timestamp = newest_timestamp_candidate; free(samples); } + // Mark the ones w/o update since last sample period to 0% usage + for (unsigned j = 0; j < num_processes_recovered; ++j) { + if (!IS_VALID(gpuinfo_process_gpu_usage_valid, processes[j].valid)) + SET_GPUINFO_PROCESS(&processes[j], gpu_usage, 0); + if (!IS_VALID(gpuinfo_process_encode_usage_valid, processes[j].valid)) + SET_GPUINFO_PROCESS(&processes[j], encode_usage, 0); + if (!IS_VALID(gpuinfo_process_decode_usage_valid, processes[j].valid)) + SET_GPUINFO_PROCESS(&processes[j], decode_usage, 0); + } } static void gpuinfo_nvidia_get_running_processes(struct gpu_info *_gpu_info) { struct gpu_info_nvidia *gpu_info = container_of(_gpu_info, struct gpu_info_nvidia, base); nvmlDevice_t device = gpu_info->gpuhandle; + bool validProcessGathering = false; + for (unsigned version = 3; !validProcessGathering && version > 0; version--) { + // Get the size of the actual function being used + size_t sizeof_nvmlProcessInfo; + switch (version) { + case 3: + sizeof_nvmlProcessInfo = sizeof(nvmlProcessInfo_v3_t); + break; + case 2: + sizeof_nvmlProcessInfo = sizeof(nvmlProcessInfo_v2_t); + break; + default: + sizeof_nvmlProcessInfo = sizeof(nvmlProcessInfo_v1_t); + break; + } - _gpu_info->processes_count = 0; - static size_t array_size = 0; - static nvmlProcessInfo_t *retrieved_infos = NULL; - unsigned graphical_count = 0, compute_count = 0, recovered_count; -retry_query_graphical: - recovered_count = array_size; - last_nvml_return_status = nvmlDeviceGetGraphicsRunningProcesses(device, &recovered_count, retrieved_infos); - if (last_nvml_return_status == NVML_ERROR_INSUFFICIENT_SIZE) { - array_size += COMMON_PROCESS_LINEAR_REALLOC_INC; - retrieved_infos = reallocarray(retrieved_infos, array_size, sizeof(*retrieved_infos)); - if (!retrieved_infos) { - perror("Could not re-allocate memory: "); - exit(EXIT_FAILURE); + _gpu_info->processes_count = 0; + static size_t array_size = 0; + static char *retrieved_infos = NULL; + unsigned graphical_count = 0, compute_count = 0, recovered_count; + if (nvmlDeviceGetGraphicsRunningProcesses[version]) { + retry_query_graphical: + recovered_count = array_size; + last_nvml_return_status = + nvmlDeviceGetGraphicsRunningProcesses[version](device, &recovered_count, retrieved_infos); + if (last_nvml_return_status == NVML_ERROR_INSUFFICIENT_SIZE) { + array_size += COMMON_PROCESS_LINEAR_REALLOC_INC; + retrieved_infos = reallocarray(retrieved_infos, array_size, sizeof_nvmlProcessInfo); + if (!retrieved_infos) { + perror("Could not re-allocate memory: "); + exit(EXIT_FAILURE); + } + goto retry_query_graphical; + } + if (last_nvml_return_status == NVML_SUCCESS) { + validProcessGathering = true; + graphical_count = recovered_count; + } } - goto retry_query_graphical; - } - if (last_nvml_return_status == NVML_SUCCESS) { - graphical_count = recovered_count; - } -retry_query_compute: - recovered_count = array_size - graphical_count; - last_nvml_return_status = - nvmlDeviceGetComputeRunningProcesses(device, &recovered_count, retrieved_infos + graphical_count); - if (last_nvml_return_status == NVML_ERROR_INSUFFICIENT_SIZE) { - array_size += COMMON_PROCESS_LINEAR_REALLOC_INC; - retrieved_infos = reallocarray(retrieved_infos, array_size, sizeof(*retrieved_infos)); - if (!retrieved_infos) { - perror("Could not re-allocate memory: "); - exit(EXIT_FAILURE); + + if (nvmlDeviceGetComputeRunningProcesses[version]) { + retry_query_compute: + recovered_count = array_size - graphical_count; + last_nvml_return_status = nvmlDeviceGetComputeRunningProcesses[version]( + device, &recovered_count, retrieved_infos + graphical_count * sizeof_nvmlProcessInfo); + if (last_nvml_return_status == NVML_ERROR_INSUFFICIENT_SIZE) { + array_size += COMMON_PROCESS_LINEAR_REALLOC_INC; + retrieved_infos = reallocarray(retrieved_infos, array_size, sizeof_nvmlProcessInfo); + if (!retrieved_infos) { + perror("Could not re-allocate memory: "); + exit(EXIT_FAILURE); + } + goto retry_query_compute; + } + if (last_nvml_return_status == NVML_SUCCESS) { + validProcessGathering = true; + compute_count = recovered_count; + } } - goto retry_query_compute; - } - if (last_nvml_return_status == NVML_SUCCESS) { - compute_count = recovered_count; - } - _gpu_info->processes_count = graphical_count + compute_count; - if (_gpu_info->processes_count > 0) { - if (_gpu_info->processes_count > _gpu_info->processes_array_size) { - _gpu_info->processes_array_size = _gpu_info->processes_count + COMMON_PROCESS_LINEAR_REALLOC_INC; - _gpu_info->processes = - reallocarray(_gpu_info->processes, _gpu_info->processes_array_size, sizeof(*_gpu_info->processes)); - if (!_gpu_info->processes) { - perror("Could not allocate memory: "); - exit(EXIT_FAILURE); + if (nvmlDeviceGetMPSComputeRunningProcesses[version]) { + retry_query_compute_MPS: + recovered_count = array_size - graphical_count - compute_count; + last_nvml_return_status = nvmlDeviceGetMPSComputeRunningProcesses[version]( + device, &recovered_count, retrieved_infos + (graphical_count + compute_count) * sizeof_nvmlProcessInfo); + if (last_nvml_return_status == NVML_ERROR_INSUFFICIENT_SIZE) { + array_size += COMMON_PROCESS_LINEAR_REALLOC_INC; + retrieved_infos = reallocarray(retrieved_infos, array_size, sizeof_nvmlProcessInfo); + if (!retrieved_infos) { + perror("Could not re-allocate memory: "); + exit(EXIT_FAILURE); + } + goto retry_query_compute_MPS; + } + if (last_nvml_return_status == NVML_SUCCESS) { + validProcessGathering = true; + compute_count += recovered_count; } } - memset(_gpu_info->processes, 0, _gpu_info->processes_count * sizeof(*_gpu_info->processes)); - for (unsigned i = 0; i < graphical_count + compute_count; ++i) { - if (i < graphical_count) - _gpu_info->processes[i].type = gpu_process_graphical; - else - _gpu_info->processes[i].type = gpu_process_compute; - _gpu_info->processes[i].pid = retrieved_infos[i].pid; - _gpu_info->processes[i].gpu_memory_usage = retrieved_infos[i].usedGpuMemory; - SET_VALID(gpuinfo_process_gpu_memory_usage_valid, _gpu_info->processes[i].valid); + + if (!validProcessGathering) + continue; + + _gpu_info->processes_count = graphical_count + compute_count; + if (_gpu_info->processes_count > 0) { + if (_gpu_info->processes_count > _gpu_info->processes_array_size) { + _gpu_info->processes_array_size = _gpu_info->processes_count + COMMON_PROCESS_LINEAR_REALLOC_INC; + _gpu_info->processes = + reallocarray(_gpu_info->processes, _gpu_info->processes_array_size, sizeof(*_gpu_info->processes)); + if (!_gpu_info->processes) { + perror("Could not allocate memory: "); + exit(EXIT_FAILURE); + } + } + memset(_gpu_info->processes, 0, _gpu_info->processes_count * sizeof(*_gpu_info->processes)); + for (unsigned i = 0; i < graphical_count + compute_count; ++i) { + if (i < graphical_count) + _gpu_info->processes[i].type = gpu_process_graphical; + else + _gpu_info->processes[i].type = gpu_process_compute; + switch (version) { + case 2: { + nvmlProcessInfo_v2_t *pinfo = (nvmlProcessInfo_v2_t *)retrieved_infos; + _gpu_info->processes[i].pid = pinfo[i].pid; + _gpu_info->processes[i].gpu_memory_usage = pinfo[i].usedGpuMemory; + } break; + case 3: { + nvmlProcessInfo_v3_t *pinfo = (nvmlProcessInfo_v3_t *)retrieved_infos; + _gpu_info->processes[i].pid = pinfo[i].pid; + _gpu_info->processes[i].gpu_memory_usage = pinfo[i].usedGpuMemory; + } break; + default: { + nvmlProcessInfo_v1_t *pinfo = (nvmlProcessInfo_v1_t *)retrieved_infos; + _gpu_info->processes[i].pid = pinfo[i].pid; + _gpu_info->processes[i].gpu_memory_usage = pinfo[i].usedGpuMemory; + } break; + } + SET_VALID(gpuinfo_process_gpu_memory_usage_valid, _gpu_info->processes[i].valid); + } } } - gpuinfo_nvidia_get_process_utilization(gpu_info, _gpu_info->processes_count, _gpu_info->processes); + // If the GPU is in MIG mode; process utilization is not supported + if (!(IS_VALID(gpuinfo_multi_instance_mode_valid, gpu_info->base.dynamic_info.valid) && + !gpu_info->base.dynamic_info.multi_instance_mode)) + gpuinfo_nvidia_get_process_utilization(gpu_info, _gpu_info->processes_count, _gpu_info->processes); } diff --git a/src/extract_gpuinfo_panfrost.c b/src/extract_gpuinfo_panfrost.c index 46016c7..f99c7be 100644 --- a/src/extract_gpuinfo_panfrost.c +++ b/src/extract_gpuinfo_panfrost.c @@ -210,6 +210,7 @@ void gpuinfo_panfrost_populate_static_info(struct gpu_info *_gpu_info) { struct gpuinfo_static_info *static_info = &gpu_info->base.static_info; static_info->integrated_graphics = true; + static_info->encode_decode_shared = true; RESET_ALL(static_info->valid); uint64_t gpuid; diff --git a/src/extract_gpuinfo_panthor.c b/src/extract_gpuinfo_panthor.c index 18abf99..0eba |