include GTT memory in AMD GPU metrics (#1569)

This commit is contained in:
henrygd
2026-02-13 20:06:37 -05:00
parent 7d6c0caafc
commit 283fa9d5c2
2 changed files with 69 additions and 32 deletions

View File

@@ -103,8 +103,17 @@ func (gm *GPUManager) updateAmdGpuData(cardPath string) bool {
// Read all sysfs values first (no lock needed - these can be slow) // Read all sysfs values first (no lock needed - these can be slow)
usage, usageErr := readSysfsFloat(filepath.Join(devicePath, "gpu_busy_percent")) usage, usageErr := readSysfsFloat(filepath.Join(devicePath, "gpu_busy_percent"))
memUsed, memUsedErr := readSysfsFloat(filepath.Join(devicePath, "mem_info_vram_used")) vramUsed, memUsedErr := readSysfsFloat(filepath.Join(devicePath, "mem_info_vram_used"))
memTotal, _ := readSysfsFloat(filepath.Join(devicePath, "mem_info_vram_total")) vramTotal, _ := readSysfsFloat(filepath.Join(devicePath, "mem_info_vram_total"))
memUsed := vramUsed
memTotal := vramTotal
// if gtt is present, add it to the memory used and total (https://github.com/henrygd/beszel/issues/1569#issuecomment-3837640484)
if gttUsed, err := readSysfsFloat(filepath.Join(devicePath, "mem_info_gtt_used")); err == nil && gttUsed > 0 {
if gttTotal, err := readSysfsFloat(filepath.Join(devicePath, "mem_info_gtt_total")); err == nil {
memUsed += gttUsed
memTotal += gttTotal
}
}
var temp, power float64 var temp, power float64
hwmons, _ := filepath.Glob(filepath.Join(devicePath, "hwmon/hwmon*")) hwmons, _ := filepath.Glob(filepath.Join(devicePath, "hwmon/hwmon*"))

View File

@@ -119,40 +119,68 @@ func TestAmdgpuNameCacheRoundTrip(t *testing.T) {
} }
func TestUpdateAmdGpuDataWithFakeSysfs(t *testing.T) { func TestUpdateAmdGpuDataWithFakeSysfs(t *testing.T) {
dir := t.TempDir() tests := []struct {
cardPath := filepath.Join(dir, "card0") name string
devicePath := filepath.Join(cardPath, "device") writeGTT bool
hwmonPath := filepath.Join(devicePath, "hwmon", "hwmon0") wantMemoryUsed float64
require.NoError(t, os.MkdirAll(hwmonPath, 0o755)) wantMemoryTotal float64
}{
write := func(name, content string) { {
require.NoError(t, os.WriteFile(filepath.Join(devicePath, name), []byte(content), 0o644)) name: "sums vram and gtt when gtt is present",
writeGTT: true,
wantMemoryUsed: bytesToMegabytes(1073741824 + 536870912),
wantMemoryTotal: bytesToMegabytes(2147483648 + 4294967296),
},
{
name: "falls back to vram when gtt is missing",
writeGTT: false,
wantMemoryUsed: bytesToMegabytes(1073741824),
wantMemoryTotal: bytesToMegabytes(2147483648),
},
} }
write("vendor", "0x1002")
write("device", "0x1506")
write("revision", "0xc1")
write("gpu_busy_percent", "25")
write("mem_info_vram_used", "1073741824")
write("mem_info_vram_total", "2147483648")
require.NoError(t, os.WriteFile(filepath.Join(hwmonPath, "temp1_input"), []byte("45000"), 0o644))
require.NoError(t, os.WriteFile(filepath.Join(hwmonPath, "power1_input"), []byte("20000000"), 0o644))
// Pre-cache name so getAmdGpuName returns a known value (it uses system amdgpu.ids path) for _, tt := range tests {
cacheAmdgpuName("1506", "c1", "AMD Radeon 610M Graphics", true) t.Run(tt.name, func(t *testing.T) {
dir := t.TempDir()
cardPath := filepath.Join(dir, "card0")
devicePath := filepath.Join(cardPath, "device")
hwmonPath := filepath.Join(devicePath, "hwmon", "hwmon0")
require.NoError(t, os.MkdirAll(hwmonPath, 0o755))
gm := &GPUManager{GpuDataMap: make(map[string]*system.GPUData)} write := func(name, content string) {
ok := gm.updateAmdGpuData(cardPath) require.NoError(t, os.WriteFile(filepath.Join(devicePath, name), []byte(content), 0o644))
require.True(t, ok) }
write("vendor", "0x1002")
write("device", "0x1506")
write("revision", "0xc1")
write("gpu_busy_percent", "25")
write("mem_info_vram_used", "1073741824")
write("mem_info_vram_total", "2147483648")
if tt.writeGTT {
write("mem_info_gtt_used", "536870912")
write("mem_info_gtt_total", "4294967296")
}
require.NoError(t, os.WriteFile(filepath.Join(hwmonPath, "temp1_input"), []byte("45000"), 0o644))
require.NoError(t, os.WriteFile(filepath.Join(hwmonPath, "power1_input"), []byte("20000000"), 0o644))
gpu, ok := gm.GpuDataMap["card0"] // Pre-cache name so getAmdGpuName returns a known value (it uses system amdgpu.ids path)
require.True(t, ok) cacheAmdgpuName("1506", "c1", "AMD Radeon 610M Graphics", true)
assert.Equal(t, "AMD Radeon 610M", gpu.Name)
assert.Equal(t, 25.0, gpu.Usage) gm := &GPUManager{GpuDataMap: make(map[string]*system.GPUData)}
assert.Equal(t, bytesToMegabytes(1073741824), gpu.MemoryUsed) ok := gm.updateAmdGpuData(cardPath)
assert.Equal(t, bytesToMegabytes(2147483648), gpu.MemoryTotal) require.True(t, ok)
assert.Equal(t, 45.0, gpu.Temperature)
assert.Equal(t, 20.0, gpu.Power) gpu, ok := gm.GpuDataMap["card0"]
assert.Equal(t, 1.0, gpu.Count) require.True(t, ok)
assert.Equal(t, "AMD Radeon 610M", gpu.Name)
assert.Equal(t, 25.0, gpu.Usage)
assert.Equal(t, tt.wantMemoryUsed, gpu.MemoryUsed)
assert.Equal(t, tt.wantMemoryTotal, gpu.MemoryTotal)
assert.Equal(t, 45.0, gpu.Temperature)
assert.Equal(t, 20.0, gpu.Power)
assert.Equal(t, 1.0, gpu.Count)
})
}
} }
func TestLookupAmdgpuNameInFile(t *testing.T) { func TestLookupAmdgpuNameInFile(t *testing.T) {