include GTT memory in AMD GPU metrics (#1569)

This commit is contained in:
henrygd
2026-02-13 20:06:37 -05:00
parent 7d6c0caafc
commit 283fa9d5c2
2 changed files with 69 additions and 32 deletions

View File

@@ -103,8 +103,17 @@ func (gm *GPUManager) updateAmdGpuData(cardPath string) bool {
// Read all sysfs values first (no lock needed - these can be slow) // Read all sysfs values first (no lock needed - these can be slow)
usage, usageErr := readSysfsFloat(filepath.Join(devicePath, "gpu_busy_percent")) usage, usageErr := readSysfsFloat(filepath.Join(devicePath, "gpu_busy_percent"))
memUsed, memUsedErr := readSysfsFloat(filepath.Join(devicePath, "mem_info_vram_used")) vramUsed, memUsedErr := readSysfsFloat(filepath.Join(devicePath, "mem_info_vram_used"))
memTotal, _ := readSysfsFloat(filepath.Join(devicePath, "mem_info_vram_total")) vramTotal, _ := readSysfsFloat(filepath.Join(devicePath, "mem_info_vram_total"))
memUsed := vramUsed
memTotal := vramTotal
// if gtt is present, add it to the memory used and total (https://github.com/henrygd/beszel/issues/1569#issuecomment-3837640484)
if gttUsed, err := readSysfsFloat(filepath.Join(devicePath, "mem_info_gtt_used")); err == nil && gttUsed > 0 {
if gttTotal, err := readSysfsFloat(filepath.Join(devicePath, "mem_info_gtt_total")); err == nil {
memUsed += gttUsed
memTotal += gttTotal
}
}
var temp, power float64 var temp, power float64
hwmons, _ := filepath.Glob(filepath.Join(devicePath, "hwmon/hwmon*")) hwmons, _ := filepath.Glob(filepath.Join(devicePath, "hwmon/hwmon*"))

View File

@@ -119,6 +119,28 @@ func TestAmdgpuNameCacheRoundTrip(t *testing.T) {
} }
func TestUpdateAmdGpuDataWithFakeSysfs(t *testing.T) { func TestUpdateAmdGpuDataWithFakeSysfs(t *testing.T) {
tests := []struct {
name string
writeGTT bool
wantMemoryUsed float64
wantMemoryTotal float64
}{
{
name: "sums vram and gtt when gtt is present",
writeGTT: true,
wantMemoryUsed: bytesToMegabytes(1073741824 + 536870912),
wantMemoryTotal: bytesToMegabytes(2147483648 + 4294967296),
},
{
name: "falls back to vram when gtt is missing",
writeGTT: false,
wantMemoryUsed: bytesToMegabytes(1073741824),
wantMemoryTotal: bytesToMegabytes(2147483648),
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
dir := t.TempDir() dir := t.TempDir()
cardPath := filepath.Join(dir, "card0") cardPath := filepath.Join(dir, "card0")
devicePath := filepath.Join(cardPath, "device") devicePath := filepath.Join(cardPath, "device")
@@ -134,6 +156,10 @@ func TestUpdateAmdGpuDataWithFakeSysfs(t *testing.T) {
write("gpu_busy_percent", "25") write("gpu_busy_percent", "25")
write("mem_info_vram_used", "1073741824") write("mem_info_vram_used", "1073741824")
write("mem_info_vram_total", "2147483648") write("mem_info_vram_total", "2147483648")
if tt.writeGTT {
write("mem_info_gtt_used", "536870912")
write("mem_info_gtt_total", "4294967296")
}
require.NoError(t, os.WriteFile(filepath.Join(hwmonPath, "temp1_input"), []byte("45000"), 0o644)) require.NoError(t, os.WriteFile(filepath.Join(hwmonPath, "temp1_input"), []byte("45000"), 0o644))
require.NoError(t, os.WriteFile(filepath.Join(hwmonPath, "power1_input"), []byte("20000000"), 0o644)) require.NoError(t, os.WriteFile(filepath.Join(hwmonPath, "power1_input"), []byte("20000000"), 0o644))
@@ -148,11 +174,13 @@ func TestUpdateAmdGpuDataWithFakeSysfs(t *testing.T) {
require.True(t, ok) require.True(t, ok)
assert.Equal(t, "AMD Radeon 610M", gpu.Name) assert.Equal(t, "AMD Radeon 610M", gpu.Name)
assert.Equal(t, 25.0, gpu.Usage) assert.Equal(t, 25.0, gpu.Usage)
assert.Equal(t, bytesToMegabytes(1073741824), gpu.MemoryUsed) assert.Equal(t, tt.wantMemoryUsed, gpu.MemoryUsed)
assert.Equal(t, bytesToMegabytes(2147483648), gpu.MemoryTotal) assert.Equal(t, tt.wantMemoryTotal, gpu.MemoryTotal)
assert.Equal(t, 45.0, gpu.Temperature) assert.Equal(t, 45.0, gpu.Temperature)
assert.Equal(t, 20.0, gpu.Power) assert.Equal(t, 20.0, gpu.Power)
assert.Equal(t, 1.0, gpu.Count) assert.Equal(t, 1.0, gpu.Count)
})
}
} }
func TestLookupAmdgpuNameInFile(t *testing.T) { func TestLookupAmdgpuNameInFile(t *testing.T) {