mirror of
https://github.com/henrygd/beszel.git
synced 2025-12-17 02:36:17 +01:00
fix intel_gpu_top restart loop and add intel gpu pkg power (#1203)
This commit is contained in:
@@ -258,6 +258,7 @@ func (gm *GPUManager) GetCurrentData() map[string]system.GPUData {
|
||||
gpuAvg.Engines[name] = twoDecimals(engine / count)
|
||||
maxEngineUsage = max(maxEngineUsage, engine/count)
|
||||
}
|
||||
gpuAvg.PowerPkg = twoDecimals(gpu.PowerPkg / count)
|
||||
gpuAvg.Usage = twoDecimals(maxEngineUsage)
|
||||
} else {
|
||||
gpuAvg.Usage = twoDecimals(gpu.Usage / count)
|
||||
@@ -266,7 +267,7 @@ func (gm *GPUManager) GetCurrentData() map[string]system.GPUData {
|
||||
}
|
||||
|
||||
// reset accumulators in the original gpu data for next collection
|
||||
gpu.Usage, gpu.Power, gpu.Count = gpuAvg.Usage, gpuAvg.Power, 1
|
||||
gpu.Usage, gpu.Power, gpu.PowerPkg, gpu.Count = gpuAvg.Usage, gpuAvg.Power, gpuAvg.PowerPkg, 1
|
||||
gpu.Engines = gpuAvg.Engines
|
||||
|
||||
// append id to the name if there are multiple GPUs with the same name
|
||||
|
||||
@@ -17,6 +17,7 @@ const (
|
||||
|
||||
type intelGpuStats struct {
|
||||
PowerGPU float64
|
||||
PowerPkg float64
|
||||
Engines map[string]float64
|
||||
}
|
||||
|
||||
@@ -33,6 +34,7 @@ func (gm *GPUManager) updateIntelFromStats(sample *intelGpuStats) bool {
|
||||
}
|
||||
|
||||
gpuData.Power += sample.PowerGPU
|
||||
gpuData.PowerPkg += sample.PowerPkg
|
||||
|
||||
if gpuData.Engines == nil {
|
||||
gpuData.Engines = make(map[string]float64, len(sample.Engines))
|
||||
@@ -46,7 +48,7 @@ func (gm *GPUManager) updateIntelFromStats(sample *intelGpuStats) bool {
|
||||
}
|
||||
|
||||
// collectIntelStats executes intel_gpu_top in text mode (-l) and parses the output
|
||||
func (gm *GPUManager) collectIntelStats() error {
|
||||
func (gm *GPUManager) collectIntelStats() (err error) {
|
||||
cmd := exec.Command(intelGpuStatsCmd, "-s", intelGpuStatsInterval, "-l")
|
||||
// Avoid blocking if intel_gpu_top writes to stderr
|
||||
cmd.Stderr = io.Discard
|
||||
@@ -58,23 +60,28 @@ func (gm *GPUManager) collectIntelStats() error {
|
||||
return err
|
||||
}
|
||||
|
||||
// Ensure we always reap the child to avoid zombies on any return path.
|
||||
// Ensure we always reap the child to avoid zombies on any return path and
|
||||
// propagate a non-zero exit code if no other error was set.
|
||||
defer func() {
|
||||
// Best-effort close of the pipe (unblock the child if it writes)
|
||||
_ = stdout.Close()
|
||||
if cmd.ProcessState == nil || !cmd.ProcessState.Exited() {
|
||||
_ = cmd.Process.Kill()
|
||||
}
|
||||
_ = cmd.Wait()
|
||||
if waitErr := cmd.Wait(); err == nil && waitErr != nil {
|
||||
err = waitErr
|
||||
}
|
||||
}()
|
||||
|
||||
scanner := bufio.NewScanner(stdout)
|
||||
var header1 string
|
||||
var header2 string
|
||||
var engineNames []string
|
||||
var friendlyNames []string
|
||||
var preEngineCols int
|
||||
var powerIndex int
|
||||
var hadDataRow bool
|
||||
// skip first data row because it sometimes has erroneous data
|
||||
var skippedFirstDataRow bool
|
||||
|
||||
for scanner.Scan() {
|
||||
line := strings.TrimSpace(scanner.Text())
|
||||
@@ -83,24 +90,34 @@ func (gm *GPUManager) collectIntelStats() error {
|
||||
}
|
||||
|
||||
// first header line
|
||||
if header1 == "" {
|
||||
if strings.HasPrefix(line, "Freq") {
|
||||
header1 = line
|
||||
continue
|
||||
}
|
||||
|
||||
// second header line
|
||||
if header2 == "" {
|
||||
if strings.HasPrefix(line, "req") {
|
||||
engineNames, friendlyNames, powerIndex, preEngineCols = gm.parseIntelHeaders(header1, line)
|
||||
header1, header2 = "x", "x" // don't need these anymore
|
||||
continue
|
||||
}
|
||||
|
||||
// Data row
|
||||
sample := gm.parseIntelData(line, engineNames, friendlyNames, powerIndex, preEngineCols)
|
||||
if !skippedFirstDataRow {
|
||||
skippedFirstDataRow = true
|
||||
continue
|
||||
}
|
||||
sample, err := gm.parseIntelData(line, engineNames, friendlyNames, powerIndex, preEngineCols)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
hadDataRow = true
|
||||
gm.updateIntelFromStats(&sample)
|
||||
}
|
||||
if err := scanner.Err(); err != nil {
|
||||
return err
|
||||
if scanErr := scanner.Err(); scanErr != nil {
|
||||
return scanErr
|
||||
}
|
||||
if !hadDataRow {
|
||||
return errNoValidData
|
||||
}
|
||||
return nil
|
||||
}
|
||||
@@ -145,19 +162,22 @@ func (gm *GPUManager) parseIntelHeaders(header1 string, header2 string) (engineN
|
||||
return engineNames, friendlyNames, powerIndex, preEngineCols
|
||||
}
|
||||
|
||||
func (gm *GPUManager) parseIntelData(line string, engineNames []string, friendlyNames []string, powerIndex int, preEngineCols int) (sample intelGpuStats) {
|
||||
func (gm *GPUManager) parseIntelData(line string, engineNames []string, friendlyNames []string, powerIndex int, preEngineCols int) (sample intelGpuStats, err error) {
|
||||
fields := strings.Fields(line)
|
||||
if len(fields) == 0 {
|
||||
return sample
|
||||
return sample, errNoValidData
|
||||
}
|
||||
// Make sure row has enough columns for engines
|
||||
if need := preEngineCols + 3*len(engineNames); len(fields) < need {
|
||||
return sample
|
||||
return sample, errNoValidData
|
||||
}
|
||||
if powerIndex >= 0 && powerIndex < len(fields) {
|
||||
if v, perr := strconv.ParseFloat(fields[powerIndex], 64); perr == nil {
|
||||
sample.PowerGPU = v
|
||||
}
|
||||
if v, perr := strconv.ParseFloat(fields[powerIndex+1], 64); perr == nil {
|
||||
sample.PowerPkg = v
|
||||
}
|
||||
}
|
||||
if len(engineNames) > 0 {
|
||||
sample.Engines = make(map[string]float64, len(engineNames))
|
||||
@@ -175,5 +195,5 @@ func (gm *GPUManager) parseIntelData(line string, engineNames []string, friendly
|
||||
}
|
||||
}
|
||||
}
|
||||
return sample
|
||||
return sample, nil
|
||||
}
|
||||
|
||||
@@ -849,13 +849,15 @@ func TestIntelCollectorStreaming(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
os.Setenv("PATH", dir)
|
||||
|
||||
// Create a fake intel_gpu_top that prints -l format with two samples and exits
|
||||
// Create a fake intel_gpu_top that prints -l format with four samples (first will be skipped) and exits
|
||||
scriptPath := filepath.Join(dir, "intel_gpu_top")
|
||||
script := `#!/bin/sh
|
||||
echo "Freq MHz IRQ RC6 Power W IMC MiB/s RCS BCS VCS"
|
||||
echo " req act /s % gpu pkg rd wr % se wa % se wa % se wa"
|
||||
echo "373 373 224 45 1.50 4.13 2554 714 12.34 0 0 0.00 0 0 5.00 0 0"
|
||||
echo "226 223 338 58 2.00 2.69 1820 965 0.00 0 0 0.00 0 0 0.00 0 0"`
|
||||
echo "226 223 338 58 2.00 2.69 1820 965 0.00 0 0 0.00 0 0 0.00 0 0"
|
||||
echo "189 187 412 67 1.80 2.45 1950 823 8.50 2 1 15.00 1 0 22.00 0 1"
|
||||
echo "298 295 278 51 2.20 3.12 1675 942 5.75 1 2 9.50 3 1 12.00 1 0"`
|
||||
if err := os.WriteFile(scriptPath, []byte(script), 0755); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
@@ -864,21 +866,22 @@ echo "226 223 338 58 2.00 2.69 1820 965 0.00 0 0 0.00
|
||||
GpuDataMap: make(map[string]*system.GPUData),
|
||||
}
|
||||
|
||||
// Run the collector once; it should read two samples and return
|
||||
// Run the collector once; it should read four samples but skip the first and return
|
||||
if err := gm.collectIntelStats(); err != nil {
|
||||
t.Fatalf("collectIntelStats error: %v", err)
|
||||
}
|
||||
|
||||
gpu := gm.GpuDataMap["0"]
|
||||
require.NotNil(t, gpu)
|
||||
// Power should be sum of samples: 1.5 + 2.0 = 3.5
|
||||
assert.EqualValues(t, 3.5, gpu.Power)
|
||||
// Engines aggregated
|
||||
assert.EqualValues(t, 12.34, gpu.Engines["Render/3D"])
|
||||
assert.EqualValues(t, 5.0, gpu.Engines["Video"])
|
||||
assert.EqualValues(t, 0.0, gpu.Engines["Blitter"]) // BCS is zero in both samples
|
||||
// Count should be 2 samples
|
||||
assert.Equal(t, float64(2), gpu.Count)
|
||||
// Power should be sum of samples 2-4 (first is skipped): 2.0 + 1.8 + 2.2 = 6.0
|
||||
assert.EqualValues(t, 6.0, gpu.Power)
|
||||
assert.InDelta(t, 8.26, gpu.PowerPkg, 0.01) // Allow small floating point differences
|
||||
// Engines aggregated from samples 2-4
|
||||
assert.EqualValues(t, 14.25, gpu.Engines["Render/3D"]) // 0.00 + 8.50 + 5.75
|
||||
assert.EqualValues(t, 34.0, gpu.Engines["Video"]) // 0.00 + 22.00 + 12.00
|
||||
assert.EqualValues(t, 24.5, gpu.Engines["Blitter"]) // 0.00 + 15.00 + 9.50
|
||||
// Count should be 3 samples (first is skipped)
|
||||
assert.Equal(t, float64(3), gpu.Count)
|
||||
}
|
||||
|
||||
func TestParseIntelHeaders(t *testing.T) {
|
||||
@@ -970,6 +973,7 @@ func TestParseIntelData(t *testing.T) {
|
||||
preEngineCols int
|
||||
wantPowerGPU float64
|
||||
wantEngines map[string]float64
|
||||
wantErr error
|
||||
}{
|
||||
{
|
||||
name: "basic data with power and engines",
|
||||
@@ -1022,6 +1026,7 @@ func TestParseIntelData(t *testing.T) {
|
||||
preEngineCols: 8,
|
||||
wantPowerGPU: 0.0,
|
||||
wantEngines: nil, // empty sample returned
|
||||
wantErr: errNoValidData,
|
||||
},
|
||||
{
|
||||
name: "empty line",
|
||||
@@ -1032,6 +1037,7 @@ func TestParseIntelData(t *testing.T) {
|
||||
preEngineCols: 8,
|
||||
wantPowerGPU: 0.0,
|
||||
wantEngines: nil,
|
||||
wantErr: errNoValidData,
|
||||
},
|
||||
{
|
||||
name: "data with invalid power value",
|
||||
@@ -1076,7 +1082,8 @@ func TestParseIntelData(t *testing.T) {
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
gm := &GPUManager{}
|
||||
sample := gm.parseIntelData(tt.line, tt.engineNames, tt.friendlyNames, tt.powerIndex, tt.preEngineCols)
|
||||
sample, err := gm.parseIntelData(tt.line, tt.engineNames, tt.friendlyNames, tt.powerIndex, tt.preEngineCols)
|
||||
assert.Equal(t, tt.wantErr, err)
|
||||
|
||||
assert.Equal(t, tt.wantPowerGPU, sample.PowerGPU)
|
||||
assert.Equal(t, tt.wantEngines, sample.Engines)
|
||||
|
||||
Reference in New Issue
Block a user