1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
|
package nvidia
import (
"bytes"
"encoding/csv"
"os/exec"
"strconv"
"sync"
"time"
//"github.com/NVIDIA/gpu-monitoring-tools/bindings/go/nvml"
"github.com/xxxserxxx/gotop/v4/devices"
)
func init() {
_temps = make(map[string]int)
_mems = make(map[string]devices.MemoryInfo)
_cpus = make(map[string]int)
errors = make(map[string]error)
devices.RegisterTemp(updateNvidiaTemp)
devices.RegisterMem(updateNvidiaMem)
devices.RegisterCPU(updateNvidiaUsage)
lock = sync.Mutex{}
devices.RegisterStartup(startup)
}
func updateNvidiaTemp(temps map[string]int) map[string]error {
lock.Lock()
defer lock.Unlock()
for k, v := range _temps {
temps[k] = v
}
return errors
}
func updateNvidiaMem(mems map[string]devices.MemoryInfo) map[string]error {
lock.Lock()
defer lock.Unlock()
for k, v := range _mems {
mems[k] = v
}
return errors
}
func updateNvidiaUsage(cpus map[string]int, _ bool) map[string]error {
lock.Lock()
defer lock.Unlock()
for k, v := range _cpus {
cpus[k] = v
}
return errors
}
func startup(vars map[string]string) error {
var err error
refresh := time.Second
if v, ok := vars["nvidia-refresh"]; ok {
if refresh, err = time.ParseDuration(v); err != nil {
return err
}
}
go func() {
timer := time.Tick(refresh)
for range timer {
update()
}
}()
return nil
}
var (
_temps map[string]int
_mems map[string]devices.MemoryInfo
_cpus map[string]int
errors map[string]error
)
var lock sync.Mutex
// update updates the cached NVidia metric data: name, index,
// temperature.gpu, utilization.gpu, utilization.memory, memory.total, memory.free, memory.used
func update() {
bs, err := exec.Command(
"nvidia-smi",
"--query-gpu=name,index,temperature.gpu,utilization.gpu,memory.total,memory.used",
"--format=csv,noheader,nounits").Output()
if err != nil {
errors["nvidia"] = err
return
}
csvReader := csv.NewReader(bytes.NewReader(bs))
csvReader.TrimLeadingSpace = true
records, err := csvReader.ReadAll()
if err != nil {
errors["nvidia"] = err
return
}
lock.Lock()
defer lock.Unlock()
for _, row := range records {
name := row[0] + "." + row[1]
if _temps[name], err = strconv.Atoi(row[2]); err != nil {
errors[name] = err
}
if _cpus[name], err = strconv.Atoi(row[3]); err != nil {
errors[name] = err
}
t, err := strconv.Atoi(row[4])
if err != nil {
errors[name] = err
}
u, err := strconv.Atoi(row[5])
if err != nil {
errors[name] = err
}
_mems[name] = devices.MemoryInfo{
Total: uint64(t),
Used: uint64(u),
UsedPercent: float64(u) / float64(t),
}
}
}
|