nvml: add rtd3 memory workaround, fix slog imports (#1587)

* NVML: only read memory usage if utilization > 0% to allow rtd3, #1522

* logging: /x/exp/slog -> log/slog everywhere, fixes log instance inconsistencies
This commit is contained in:
crimist
2026-01-05 12:26:59 -08:00
committed by GitHub
parent 4547ff7b5d
commit 475c53a55d
5 changed files with 31 additions and 19 deletions

View File

@@ -15,7 +15,7 @@ import (
"github.com/henrygd/beszel/internal/entities/system"
"golang.org/x/exp/slog"
"log/slog"
)
const (

View File

@@ -8,7 +8,7 @@ import (
"github.com/ebitengine/purego"
"github.com/henrygd/beszel/internal/entities/system"
"golang.org/x/exp/slog"
"log/slog"
)
// NVML constants and types
@@ -180,19 +180,33 @@ func (c *nvmlCollector) collect() {
var temp uint32
nvmlDeviceGetTemperature(device, 0, &temp) // 0 is NVML_TEMPERATURE_GPU
// Memory
var usedMem, totalMem uint64
if c.isV2 {
var memory nvmlMemoryV2
memory.Version = 0x02000028 // (2 << 24) | 40 bytes
nvmlDeviceGetMemoryInfo(device, uintptr(unsafe.Pointer(&memory)))
usedMem = memory.Used
totalMem = memory.Total
// only poll memory if GPU is active to avoid resetting 21 second suspend timer
if utilization.Gpu > 0 {
var usedMem, totalMem uint64
if c.isV2 {
var memory nvmlMemoryV2
memory.Version = 0x02000028 // (2 << 24) | 40 bytes
if ret := nvmlDeviceGetMemoryInfo(device, uintptr(unsafe.Pointer(&memory))); ret != nvmlReturn(nvmlSuccess) {
slog.Debug("NVML: MemoryInfo_v2 failed", "bdf", bdf, "ret", ret)
} else {
usedMem = memory.Used
totalMem = memory.Total
}
} else {
var memory nvmlMemoryV1
if ret := nvmlDeviceGetMemoryInfo(device, uintptr(unsafe.Pointer(&memory))); ret != nvmlReturn(nvmlSuccess) {
slog.Debug("NVML: MemoryInfo failed", "bdf", bdf, "ret", ret)
} else {
usedMem = memory.Used
totalMem = memory.Total
}
}
if totalMem > 0 {
gpu.MemoryUsed = float64(usedMem) / 1024 / 1024 / mebibytesInAMegabyte
gpu.MemoryTotal = float64(totalMem) / 1024 / 1024 / mebibytesInAMegabyte
}
} else {
var memory nvmlMemoryV1
nvmlDeviceGetMemoryInfo(device, uintptr(unsafe.Pointer(&memory)))
usedMem = memory.Used
totalMem = memory.Total
slog.Debug("NVML: Skipping memory info (utilization=0)", "bdf", bdf)
}
// Power
@@ -200,8 +214,6 @@ func (c *nvmlCollector) collect() {
nvmlDeviceGetPowerUsage(device, &power)
gpu.Temperature = float64(temp)
gpu.MemoryUsed = float64(usedMem) / 1024 / 1024 / mebibytesInAMegabyte
gpu.MemoryTotal = float64(totalMem) / 1024 / 1024 / mebibytesInAMegabyte
gpu.Usage += float64(utilization.Gpu)
gpu.Power += float64(power) / 1000.0
gpu.Count++

View File

@@ -8,7 +8,7 @@ import (
"strings"
"github.com/ebitengine/purego"
"golang.org/x/exp/slog"
"log/slog"
)
func openLibrary(name string) (uintptr, error) {

View File

@@ -9,7 +9,7 @@ import (
"github.com/henrygd/beszel/internal/common"
"github.com/henrygd/beszel/internal/entities/smart"
"golang.org/x/exp/slog"
"log/slog"
)
// HandlerContext provides context for request handlers

View File

@@ -19,7 +19,7 @@ import (
"github.com/henrygd/beszel/internal/entities/smart"
"golang.org/x/exp/slog"
"log/slog"
)
// SmartManager manages data collection for SMART devices