From 7b128d09ac07fdfb248e3d8db220c93cd59c6381 Mon Sep 17 00:00:00 2001 From: henrygd Date: Wed, 24 Sep 2025 13:24:48 -0400 Subject: [PATCH] Update Intel GPU collector to parse plain text (`-l`) instead of JSON output (#1150) --- agent/gpu_intel.go | 169 ++++++++++++++++------- agent/gpu_test.go | 276 ++++++++++++++++++++++++++++++++------ supplemental/CHANGELOG.md | 8 ++ 3 files changed, 368 insertions(+), 85 deletions(-) diff --git a/agent/gpu_intel.go b/agent/gpu_intel.go index 48c6f756..4a9225d6 100644 --- a/agent/gpu_intel.go +++ b/agent/gpu_intel.go @@ -1,9 +1,11 @@ package agent import ( - "encoding/json" - "fmt" + "bufio" + "io" "os/exec" + "strconv" + "strings" "github.com/henrygd/beszel/internal/entities/system" ) @@ -14,12 +16,8 @@ const ( ) type intelGpuStats struct { - Power struct { - GPU float64 `json:"GPU"` - } `json:"power"` - Engines map[string]struct { - Busy float64 `json:"busy"` - } `json:"engines"` + PowerGPU float64 + Engines map[string]float64 } // updateIntelFromStats updates aggregated GPU data from a single intelGpuStats sample @@ -34,24 +32,24 @@ func (gm *GPUManager) updateIntelFromStats(sample *intelGpuStats) bool { gm.GpuDataMap["0"] = gpuData } - if sample.Power.GPU > 0 { - gpuData.Power += sample.Power.GPU - } + gpuData.Power += sample.PowerGPU if gpuData.Engines == nil { gpuData.Engines = make(map[string]float64, len(sample.Engines)) } for name, engine := range sample.Engines { - gpuData.Engines[name] += engine.Busy + gpuData.Engines[name] += engine } gpuData.Count++ return true } -// collectIntelStats executes intel_gpu_top in JSON mode and stream-decodes the array of samples +// collectIntelStats executes intel_gpu_top in text mode (-l) and parses the output func (gm *GPUManager) collectIntelStats() error { - cmd := exec.Command(intelGpuStatsCmd, "-s", intelGpuStatsInterval, "-J") + cmd := exec.Command(intelGpuStatsCmd, "-s", intelGpuStatsInterval, "-l") + // Avoid blocking if intel_gpu_top writes to stderr + cmd.Stderr = io.Discard stdout, err := cmd.StdoutPipe() if err != nil { return err @@ -60,43 +58,122 @@ func (gm *GPUManager) collectIntelStats() error { return err } - dec := json.NewDecoder(stdout) + // Ensure we always reap the child to avoid zombies on any return path. + defer func() { + // Best-effort close of the pipe (unblock the child if it writes) + _ = stdout.Close() + if cmd.ProcessState == nil || !cmd.ProcessState.Exited() { + _ = cmd.Process.Kill() + } + _ = cmd.Wait() + }() - // Expect a JSON array stream: [ { ... }, { ... }, ... ] - tok, err := dec.Token() - if err != nil { - return err - } - if delim, ok := tok.(json.Delim); !ok || delim != '[' { - return fmt.Errorf("unexpected JSON start token: %v", tok) - } + scanner := bufio.NewScanner(stdout) + var header1 string + var header2 string + var engineNames []string + var friendlyNames []string + var preEngineCols int + var powerIndex int = -1 - var sample intelGpuStats - for { - if dec.More() { - // Clear the engines map before decoding - if sample.Engines != nil { - for k := range sample.Engines { - delete(sample.Engines, k) - } - } - - if err := dec.Decode(&sample); err != nil { - return fmt.Errorf("decode intel gpu: %w", err) - } - gm.updateIntelFromStats(&sample) + for scanner.Scan() { + line := strings.TrimSpace(scanner.Text()) + if line == "" { continue } - // Attempt to read closing bracket (will only be present when process exits) - tok, err = dec.Token() - if err != nil { - // When the process is still running, decoder will block in More/Decode; any error here is terminal - return err + + // first header line + if header1 == "" { + header1 = line + continue } - if delim, ok := tok.(json.Delim); ok && delim == ']' { - break + + // second header line + if header2 == "" { + engineNames, friendlyNames, powerIndex, preEngineCols = gm.parseIntelHeaders(header1, line) + header1, header2 = "x", "x" // don't need these anymore + continue + } + + // Data row + sample := gm.parseIntelData(line, engineNames, friendlyNames, powerIndex, preEngineCols) + gm.updateIntelFromStats(&sample) + } + if err := scanner.Err(); err != nil { + return err + } + return nil +} + +func (gm *GPUManager) parseIntelHeaders(header1 string, header2 string) (engineNames []string, friendlyNames []string, powerIndex int, preEngineCols int) { + // Build indexes + h1 := strings.Fields(header1) + h2 := strings.Fields(header2) + powerIndex = -1 // Initialize to -1, will be set to actual index if found + // Collect engine names from header1 + for _, col := range h1 { + key := strings.TrimRightFunc(col, func(r rune) bool { return r >= '0' && r <= '9' }) + var friendly string + switch key { + case "RCS": + friendly = "Render/3D" + case "BCS": + friendly = "Blitter" + case "VCS": + friendly = "Video" + case "VECS": + friendly = "VideoEnhance" + case "CCS": + friendly = "Compute" + default: + continue + } + engineNames = append(engineNames, key) + friendlyNames = append(friendlyNames, friendly) + } + // find power gpu index among pre-engine columns + if n := len(engineNames); n > 0 { + preEngineCols = max(len(h2)-3*n, 0) + limit := min(len(h2), preEngineCols) + for i := range limit { + if strings.EqualFold(h2[i], "gpu") { + powerIndex = i + break + } } } - - return cmd.Wait() + return engineNames, friendlyNames, powerIndex, preEngineCols +} + +func (gm *GPUManager) parseIntelData(line string, engineNames []string, friendlyNames []string, powerIndex int, preEngineCols int) (sample intelGpuStats) { + fields := strings.Fields(line) + if len(fields) == 0 { + return sample + } + // Make sure row has enough columns for engines + if need := preEngineCols + 3*len(engineNames); len(fields) < need { + return sample + } + if powerIndex >= 0 && powerIndex < len(fields) { + if v, perr := strconv.ParseFloat(fields[powerIndex], 64); perr == nil { + sample.PowerGPU = v + } + } + if len(engineNames) > 0 { + sample.Engines = make(map[string]float64, len(engineNames)) + for k := range engineNames { + base := preEngineCols + 3*k + if base < len(fields) { + busy := 0.0 + if v, e := strconv.ParseFloat(fields[base], 64); e == nil { + busy = v + } + cur := sample.Engines[friendlyNames[k]] + sample.Engines[friendlyNames[k]] = cur + busy + } else { + sample.Engines[friendlyNames[k]] = 0 + } + } + } + return sample } diff --git a/agent/gpu_test.go b/agent/gpu_test.go index c2497275..1b5f129c 100644 --- a/agent/gpu_test.go +++ b/agent/gpu_test.go @@ -756,11 +756,11 @@ func TestAccumulation(t *testing.T) { continue } - assert.InDelta(t, expected.temperature, gpu.Temperature, 0.01, "Temperature should match") - assert.InDelta(t, expected.memoryUsed, gpu.MemoryUsed, 0.01, "Memory used should match") - assert.InDelta(t, expected.memoryTotal, gpu.MemoryTotal, 0.01, "Memory total should match") - assert.InDelta(t, expected.usage, gpu.Usage, 0.01, "Usage should match") - assert.InDelta(t, expected.power, gpu.Power, 0.01, "Power should match") + assert.EqualValues(t, expected.temperature, gpu.Temperature, "Temperature should match") + assert.EqualValues(t, expected.memoryUsed, gpu.MemoryUsed, "Memory used should match") + assert.EqualValues(t, expected.memoryTotal, gpu.MemoryTotal, "Memory total should match") + assert.EqualValues(t, expected.usage, gpu.Usage, "Usage should match") + assert.EqualValues(t, expected.power, gpu.Power, "Power should match") assert.Equal(t, expected.count, gpu.Count, "Count should match") } @@ -773,9 +773,9 @@ func TestAccumulation(t *testing.T) { continue } - assert.InDelta(t, expected.temperature, gpu.Temperature, 0.01, "Temperature in GetCurrentData should match") - assert.InDelta(t, expected.avgUsage, gpu.Usage, 0.01, "Average usage in GetCurrentData should match") - assert.InDelta(t, expected.avgPower, gpu.Power, 0.01, "Average power in GetCurrentData should match") + assert.EqualValues(t, expected.temperature, gpu.Temperature, "Temperature in GetCurrentData should match") + assert.EqualValues(t, expected.avgUsage, gpu.Usage, "Average usage in GetCurrentData should match") + assert.EqualValues(t, expected.avgPower, gpu.Power, "Average power in GetCurrentData should match") } // Verify that accumulators in the original map are reset @@ -800,14 +800,12 @@ func TestIntelUpdateFromStats(t *testing.T) { // First sample with power and two engines sample1 := intelGpuStats{ - Engines: map[string]struct { - Busy float64 `json:"busy"` - }{ - "Render/3D": {Busy: 20.0}, - "Video": {Busy: 5.0}, + PowerGPU: 10.5, + Engines: map[string]float64{ + "Render/3D": 20.0, + "Video": 5.0, }, } - sample1.Power.GPU = 10.5 ok := gm.updateIntelFromStats(&sample1) assert.True(t, ok) @@ -815,33 +813,31 @@ func TestIntelUpdateFromStats(t *testing.T) { gpu := gm.GpuDataMap["0"] require.NotNil(t, gpu) assert.Equal(t, "GPU", gpu.Name) - assert.InDelta(t, 10.5, gpu.Power, 0.001) - assert.InDelta(t, 20.0, gpu.Engines["Render/3D"], 0.001) - assert.InDelta(t, 5.0, gpu.Engines["Video"], 0.001) + assert.EqualValues(t, 10.5, gpu.Power) + assert.EqualValues(t, 20.0, gpu.Engines["Render/3D"]) + assert.EqualValues(t, 5.0, gpu.Engines["Video"]) assert.Equal(t, float64(1), gpu.Count) // Second sample with zero power (should not add) and additional engine busy sample2 := intelGpuStats{ - Engines: map[string]struct { - Busy float64 `json:"busy"` - }{ - "Render/3D": {Busy: 10.0}, - "Video": {Busy: 2.5}, - "Blitter": {Busy: 1.0}, + PowerGPU: 0.0, + Engines: map[string]float64{ + "Render/3D": 10.0, + "Video": 2.5, + "Blitter": 1.0, }, } // zero power should not increment power accumulator - sample2.Power.GPU = 0.0 ok = gm.updateIntelFromStats(&sample2) assert.True(t, ok) gpu = gm.GpuDataMap["0"] require.NotNil(t, gpu) - assert.InDelta(t, 10.5, gpu.Power, 0.001) - assert.InDelta(t, 30.0, gpu.Engines["Render/3D"], 0.001) // 20 + 10 - assert.InDelta(t, 7.5, gpu.Engines["Video"], 0.001) // 5 + 2.5 - assert.InDelta(t, 1.0, gpu.Engines["Blitter"], 0.001) + assert.EqualValues(t, 10.5, gpu.Power) + assert.EqualValues(t, 30.0, gpu.Engines["Render/3D"]) // 20 + 10 + assert.EqualValues(t, 7.5, gpu.Engines["Video"]) // 5 + 2.5 + assert.EqualValues(t, 1.0, gpu.Engines["Blitter"]) assert.Equal(t, float64(2), gpu.Count) } @@ -853,15 +849,13 @@ func TestIntelCollectorStreaming(t *testing.T) { dir := t.TempDir() os.Setenv("PATH", dir) - // Create a fake intel_gpu_top that prints a JSON array with two samples and exits + // Create a fake intel_gpu_top that prints -l format with two samples and exits scriptPath := filepath.Join(dir, "intel_gpu_top") script := `#!/bin/sh -# Ignore args -s and -J -# Emit a JSON array with two objects, separated by a comma, then exit -(echo '['; \ - echo '{"power":{"GPU":1.5},"engines":{"Render/3D":{"busy":12.34}}},'; \ - echo '{"power":{"GPU":2.0},"engines":{"Video":{"busy":5}}}'; \ - echo ']')` +echo "Freq MHz IRQ RC6 Power W IMC MiB/s RCS BCS VCS" +echo " req act /s % gpu pkg rd wr % se wa % se wa % se wa" +echo "373 373 224 45 1.50 4.13 2554 714 12.34 0 0 0.00 0 0 5.00 0 0" +echo "226 223 338 58 2.00 2.69 1820 965 0.00 0 0 0.00 0 0 0.00 0 0"` if err := os.WriteFile(scriptPath, []byte(script), 0755); err != nil { t.Fatal(err) } @@ -877,11 +871,215 @@ func TestIntelCollectorStreaming(t *testing.T) { gpu := gm.GpuDataMap["0"] require.NotNil(t, gpu) - // Power should be sum of non-zero samples: 1.5 + 2.0 = 3.5 - assert.InDelta(t, 3.5, gpu.Power, 0.001) + // Power should be sum of samples: 1.5 + 2.0 = 3.5 + assert.EqualValues(t, 3.5, gpu.Power) // Engines aggregated - assert.InDelta(t, 12.34, gpu.Engines["Render/3D"], 0.001) - assert.InDelta(t, 5.0, gpu.Engines["Video"], 0.001) + assert.EqualValues(t, 12.34, gpu.Engines["Render/3D"]) + assert.EqualValues(t, 5.0, gpu.Engines["Video"]) + assert.EqualValues(t, 0.0, gpu.Engines["Blitter"]) // BCS is zero in both samples // Count should be 2 samples assert.Equal(t, float64(2), gpu.Count) } + +func TestParseIntelHeaders(t *testing.T) { + tests := []struct { + name string + header1 string + header2 string + wantEngineNames []string + wantFriendlyNames []string + wantPowerIndex int + wantPreEngineCols int + }{ + { + name: "basic headers with RCS BCS VCS", + header1: "Freq MHz IRQ RC6 Power W IMC MiB/s RCS BCS VCS", + header2: " req act /s % gpu pkg rd wr % se wa % se wa % se wa", + wantEngineNames: []string{"RCS", "BCS", "VCS"}, + wantFriendlyNames: []string{"Render/3D", "Blitter", "Video"}, + wantPowerIndex: 4, // "gpu" is at index 4 + wantPreEngineCols: 8, // 17 total cols - 3*3 = 8 + }, + { + name: "headers with only RCS", + header1: "Freq MHz IRQ RC6 Power W IMC MiB/s RCS", + header2: " req act /s % gpu pkg rd wr % se wa", + wantEngineNames: []string{"RCS"}, + wantFriendlyNames: []string{"Render/3D"}, + wantPowerIndex: 4, + wantPreEngineCols: 8, // 11 total - 3*1 = 8 + }, + { + name: "headers with VECS and CCS", + header1: "Freq MHz IRQ RC6 Power W IMC MiB/s VECS CCS", + header2: " req act /s % gpu pkg rd wr % se wa % se wa", + wantEngineNames: []string{"VECS", "CCS"}, + wantFriendlyNames: []string{"VideoEnhance", "Compute"}, + wantPowerIndex: 4, + wantPreEngineCols: 8, // 14 total - 3*2 = 8 + }, + { + name: "no engines", + header1: "Freq MHz IRQ RC6 Power W IMC MiB/s", + header2: " req act /s % gpu pkg rd wr", + wantEngineNames: nil, // no engines found, slices remain nil + wantFriendlyNames: nil, + wantPowerIndex: -1, // no engines, so no search + wantPreEngineCols: 0, + }, + { + name: "power index not found", + header1: "Freq MHz IRQ RC6 Power W IMC MiB/s RCS", + header2: " req act /s % pkg cpu rd wr % se wa", // no "gpu" + wantEngineNames: []string{"RCS"}, + wantFriendlyNames: []string{"Render/3D"}, + wantPowerIndex: -1, // "gpu" not found + wantPreEngineCols: 8, // 11 total - 3*1 = 8 + }, + { + name: "empty headers", + header1: "", + header2: "", + wantEngineNames: nil, // empty input, slices remain nil + wantFriendlyNames: nil, + wantPowerIndex: -1, + wantPreEngineCols: 0, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + gm := &GPUManager{} + engineNames, friendlyNames, powerIndex, preEngineCols := gm.parseIntelHeaders(tt.header1, tt.header2) + + assert.Equal(t, tt.wantEngineNames, engineNames) + assert.Equal(t, tt.wantFriendlyNames, friendlyNames) + assert.Equal(t, tt.wantPowerIndex, powerIndex) + assert.Equal(t, tt.wantPreEngineCols, preEngineCols) + }) + } +} + +func TestParseIntelData(t *testing.T) { + tests := []struct { + name string + line string + engineNames []string + friendlyNames []string + powerIndex int + preEngineCols int + wantPowerGPU float64 + wantEngines map[string]float64 + }{ + { + name: "basic data with power and engines", + line: "373 373 224 45 1.50 4.13 2554 714 12.34 0 0 0.00 0 0 5.00 0 0", + engineNames: []string{"RCS", "BCS", "VCS"}, + friendlyNames: []string{"Render/3D", "Blitter", "Video"}, + powerIndex: 4, + preEngineCols: 8, + wantPowerGPU: 1.50, + wantEngines: map[string]float64{ + "Render/3D": 12.34, + "Blitter": 0.00, + "Video": 5.00, + }, + }, + { + name: "data with zero power", + line: "226 223 338 58 0.00 2.69 1820 965 0.00 0 0 0.00 0 0 0.00 0 0", + engineNames: []string{"RCS", "BCS", "VCS"}, + friendlyNames: []string{"Render/3D", "Blitter", "Video"}, + powerIndex: 4, + preEngineCols: 8, + wantPowerGPU: 0.00, + wantEngines: map[string]float64{ + "Render/3D": 0.00, + "Blitter": 0.00, + "Video": 0.00, + }, + }, + { + name: "data with no power index", + line: "373 373 224 45 1.50 4.13 2554 714 12.34 0 0 0.00 0 0 5.00 0 0", + engineNames: []string{"RCS", "BCS", "VCS"}, + friendlyNames: []string{"Render/3D", "Blitter", "Video"}, + powerIndex: -1, + preEngineCols: 8, + wantPowerGPU: 0.0, // no power parsed + wantEngines: map[string]float64{ + "Render/3D": 12.34, + "Blitter": 0.00, + "Video": 5.00, + }, + }, + { + name: "data with insufficient columns", + line: "373 373 224 45 1.50", // too few columns + engineNames: []string{"RCS", "BCS", "VCS"}, + friendlyNames: []string{"Render/3D", "Blitter", "Video"}, + powerIndex: 4, + preEngineCols: 8, + wantPowerGPU: 0.0, + wantEngines: nil, // empty sample returned + }, + { + name: "empty line", + line: "", + engineNames: []string{"RCS"}, + friendlyNames: []string{"Render/3D"}, + powerIndex: 4, + preEngineCols: 8, + wantPowerGPU: 0.0, + wantEngines: nil, + }, + { + name: "data with invalid power value", + line: "373 373 224 45 N/A 4.13 2554 714 12.34 0 0 0.00 0 0 5.00 0 0", + engineNames: []string{"RCS", "BCS", "VCS"}, + friendlyNames: []string{"Render/3D", "Blitter", "Video"}, + powerIndex: 4, + preEngineCols: 8, + wantPowerGPU: 0.0, // N/A can't be parsed + wantEngines: map[string]float64{ + "Render/3D": 12.34, + "Blitter": 0.00, + "Video": 5.00, + }, + }, + { + name: "data with invalid engine value", + line: "373 373 224 45 1.50 4.13 2554 714 N/A 0 0 0.00 0 0 5.00 0 0", + engineNames: []string{"RCS", "BCS", "VCS"}, + friendlyNames: []string{"Render/3D", "Blitter", "Video"}, + powerIndex: 4, + preEngineCols: 8, + wantPowerGPU: 1.50, + wantEngines: map[string]float64{ + "Render/3D": 0.0, // N/A becomes 0 + "Blitter": 0.00, + "Video": 5.00, + }, + }, + { + name: "data with no engines", + line: "373 373 224 45 1.50 4.13 2554 714", + engineNames: []string{}, + friendlyNames: []string{}, + powerIndex: 4, + preEngineCols: 8, + wantPowerGPU: 1.50, + wantEngines: nil, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + gm := &GPUManager{} + sample := gm.parseIntelData(tt.line, tt.engineNames, tt.friendlyNames, tt.powerIndex, tt.preEngineCols) + + assert.Equal(t, tt.wantPowerGPU, sample.PowerGPU) + assert.Equal(t, tt.wantEngines, sample.Engines) + }) + } +} diff --git a/supplemental/CHANGELOG.md b/supplemental/CHANGELOG.md index 542bdec6..65abadd3 100644 --- a/supplemental/CHANGELOG.md +++ b/supplemental/CHANGELOG.md @@ -1,5 +1,13 @@ +## 0.12.11 + +- Adjust calculation of cached memory (fixes #1187, #1196) + +- Update Intel GPU collector to parse plain text (`-l`) instead of JSON output (#1150) + ## 0.12.10 +Note that the default memory calculation changed in this release, which may cause a difference in memory usage compared to previous versions. + - Add initial support for Intel GPUs (#1150, #755) - Show connection type (WebSocket / SSH) in hub UI.