1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
|
package nvidia
// TODO: Optimization: cache most recent info w/ timestamp, and only update if older than X
import (
"strconv"
"strings"
"time"
"github.com/rai-project/nvidia-smi"
//"github.com/NVIDIA/gpu-monitoring-tools/bindings/go/nvml"
"github.com/xxxserxxx/gotop/v3/devices"
)
func init() {
devices.RegisterTemp(updateNvidiaTemp)
devices.RegisterMem(updateNvidiaMem)
devices.RegisterCPU(updateNvidiaUsage)
}
func updateNvidiaTemp(temps map[string]int) map[string]error {
errs := make(map[string]error)
info, err := nvidiasmi.New()
if err != nil {
errs["nvidia"] = err
return errs
}
if info.HasGPU() {
for i := range info.GPUS {
gpu := info.GPUS[i]
if gpu.GpuTemp == "N/A" {
// The GPU does not export a temperature measure
continue
}
name := gpu.ProductName + " " + strconv.Itoa(i)
temperature, err := strconv.ParseFloat(strings.ReplaceAll(gpu.GpuTemp, " C", ""), 10)
if err != nil {
errs[name] = err
continue
}
temps[name] = int(temperature)
}
}
return errs
}
func updateNvidiaMem(mems map[string]devices.MemoryInfo) map[string]error {
errs := make(map[string]error)
info, err := nvidiasmi.New()
if err != nil {
errs["nvidia"] = err
return errs
}
if info.HasGPU() {
for i := range info.GPUS {
gpu := info.GPUS[i]
if gpu.MemoryUtil == "N/A" || gpu.Total == "N/A" || gpu.Used == "N/A" {
// The GPU does not export sufficient memory measures
continue
}
name := gpu.ProductName + strconv.Itoa(i)
mem, err := strconv.Atoi(gpu.MemoryUtil)
if err != nil {
errs[name+"Mem"] = err
continue
}
total, err := strconv.Atoi(gpu.Total)
if err != nil {
errs[name+"Total"] = err
continue
}
used, err := strconv.Atoi(gpu.Used)
if err != nil {
errs[name+"Used"] = err
continue
}
if total == 0 && used == 0 {
total = 100
used = mem
} else if total != 0 && used == 0 {
used = int(float64(total) * (float64(mem) / 100))
} else if total == 0 && used != 0 {
total = int(float64(used) / (float64(mem) / 100))
}
dev := devices.MemoryInfo{
Total: uint64(total),
Used: uint64(used),
}
dev.UsedPercent = float64(mem)
mems[name] = dev
}
}
return errs
}
func updateNvidiaUsage(cpus map[string]int, _ time.Duration, _ bool) map[string]error {
errs := make(map[string]error)
info, err := nvidiasmi.New()
if err != nil {
errs["nvidia"] = err
return errs
}
if info.HasGPU() {
for i := range info.GPUS {
gpu := info.GPUS[i]
if gpu.GpuUtil == "N/A" {
// The GPU does not export sufficient memory measures
continue
}
name := gpu.ProductName + " " + strconv.Itoa(i)
usage, err := strconv.Atoi(gpu.GpuUtil)
if err != nil {
errs[name] = err
continue
}
cpus[name] = usage
}
}
return errs
}
|