summaryrefslogtreecommitdiffstats
path: root/src/go/collectors/go.d.plugin/modules/intelgpu/exec.go
blob: cb81bccb01724ee50dca272c0ebd64590b66e5cd (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
// SPDX-License-Identifier: GPL-3.0-or-later

package intelgpu

import (
	"bufio"
	"bytes"
	"errors"
	"os/exec"
	"strconv"
	"sync"
	"time"

	"github.com/netdata/netdata/go/go.d.plugin/logger"
)

func newIntelGpuTopExec(ndsudoPath string, updateEvery int, log *logger.Logger) (*intelGpuTopExec, error) {
	topExec := &intelGpuTopExec{
		Logger:      log,
		ndsudoPath:  ndsudoPath,
		updateEvery: updateEvery,
	}

	if err := topExec.run(); err != nil {
		return nil, err
	}

	return topExec, nil
}

type intelGpuTopExec struct {
	*logger.Logger

	ndsudoPath  string
	updateEvery int

	cmd  *exec.Cmd
	done chan struct{}

	mux        sync.Mutex
	lastSample string
}

func (e *intelGpuTopExec) run() error {
	refresh := 900
	if e.updateEvery > 1 {
		refresh = e.updateEvery*1000 - 500 // milliseconds
	}

	cmd := exec.Command(e.ndsudoPath, "igt-json", "--interval", strconv.Itoa(refresh))

	e.Debugf("executing '%s'", cmd)

	r, err := cmd.StdoutPipe()
	if err != nil {
		return err
	}

	if err := cmd.Start(); err != nil {
		return err
	}

	firstSample := make(chan struct{}, 1)
	done := make(chan struct{})
	e.cmd = cmd
	e.done = done

	go func() {
		defer close(done)
		sc := bufio.NewScanner(r)
		var buf bytes.Buffer
		var n int

		for sc.Scan() {
			if n++; n > 1000 {
				break
			}

			text := sc.Text()

			if buf.Cap() == 0 && text != "{" || text == "" {
				continue
			}

			if text == "}," {
				text = "}"
			}

			buf.WriteString(text + "\n")

			if text[0] == '}' {
				e.mux.Lock()
				e.lastSample = buf.String()
				e.mux.Unlock()

				select {
				case firstSample <- struct{}{}:
				default:
				}

				buf.Reset()
				n = 0
			}
		}
	}()

	select {
	case <-e.done:
		_ = e.stop()
		return errors.New("process exited before the first sample was collected")
	case <-time.After(time.Second * 3):
		_ = e.stop()
		return errors.New("timed out waiting for first sample")
	case <-firstSample:
		return nil
	}
}

func (e *intelGpuTopExec) queryGPUSummaryJson() ([]byte, error) {
	select {
	case <-e.done:
		return nil, errors.New("process has already exited")
	default:
	}

	e.mux.Lock()
	defer e.mux.Unlock()

	return []byte(e.lastSample), nil
}

func (e *intelGpuTopExec) stop() error {
	if e.cmd == nil || e.cmd.Process == nil {
		return nil
	}

	_ = e.cmd.Process.Kill()
	_, _ = e.cmd.Process.Wait()
	e.cmd = nil

	select {
	case <-e.done:
		return nil
	case <-time.After(time.Second * 2):
		return errors.New("timed out waiting for process to exit")
	}
}