mirror of
https://github.com/henrygd/beszel.git
synced 2026-03-27 07:56:19 +01:00
Compare commits
2 Commits
2bd85e04fc
...
nvml
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
5f74438bd7 | ||
|
|
ea354ec030 |
@@ -72,7 +72,7 @@ type nvmlCollector struct {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (c *nvmlCollector) init() error {
|
func (c *nvmlCollector) init() error {
|
||||||
slog.Debug("NVML: Initializing")
|
slog.Info("NVML: Initializing")
|
||||||
libPath := getNVMLPath()
|
libPath := getNVMLPath()
|
||||||
|
|
||||||
lib, err := openLibrary(libPath)
|
lib, err := openLibrary(libPath)
|
||||||
@@ -159,7 +159,7 @@ func (c *nvmlCollector) collect() {
|
|||||||
gpu := c.gm.GpuDataMap[id]
|
gpu := c.gm.GpuDataMap[id]
|
||||||
|
|
||||||
if bdf != "" && !c.isGPUActive(bdf) {
|
if bdf != "" && !c.isGPUActive(bdf) {
|
||||||
slog.Debug("NVML: GPU is suspended, skipping", "bdf", bdf)
|
slog.Info("NVML: GPU is suspended, skipping", "bdf", bdf)
|
||||||
gpu.Temperature = 0
|
gpu.Temperature = 0
|
||||||
gpu.MemoryUsed = 0
|
gpu.MemoryUsed = 0
|
||||||
continue
|
continue
|
||||||
@@ -168,13 +168,13 @@ func (c *nvmlCollector) collect() {
|
|||||||
// Utilization
|
// Utilization
|
||||||
var utilization nvmlUtilization
|
var utilization nvmlUtilization
|
||||||
if ret := nvmlDeviceGetUtilizationRates(device, &utilization); ret != nvmlReturn(nvmlSuccess) {
|
if ret := nvmlDeviceGetUtilizationRates(device, &utilization); ret != nvmlReturn(nvmlSuccess) {
|
||||||
slog.Debug("NVML: Utilization failed (GPU likely suspended)", "bdf", bdf, "ret", ret)
|
slog.Info("NVML: Utilization failed (GPU likely suspended)", "bdf", bdf, "ret", ret)
|
||||||
gpu.Temperature = 0
|
gpu.Temperature = 0
|
||||||
gpu.MemoryUsed = 0
|
gpu.MemoryUsed = 0
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
slog.Debug("NVML: Collecting data for GPU", "bdf", bdf)
|
slog.Info("NVML: Collecting data for GPU", "bdf", bdf)
|
||||||
|
|
||||||
// Temperature
|
// Temperature
|
||||||
var temp uint32
|
var temp uint32
|
||||||
@@ -205,6 +205,6 @@ func (c *nvmlCollector) collect() {
|
|||||||
gpu.Usage += float64(utilization.Gpu)
|
gpu.Usage += float64(utilization.Gpu)
|
||||||
gpu.Power += float64(power) / 1000.0
|
gpu.Power += float64(power) / 1000.0
|
||||||
gpu.Count++
|
gpu.Count++
|
||||||
slog.Debug("NVML: Collected data", "gpu", gpu)
|
slog.Info("NVML: Collected data", "gpu", gpu)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -29,12 +29,12 @@ func (c *nvmlCollector) isGPUActive(bdf string) bool {
|
|||||||
statusPath := filepath.Join("/sys/bus/pci/devices", bdf, "power/runtime_status")
|
statusPath := filepath.Join("/sys/bus/pci/devices", bdf, "power/runtime_status")
|
||||||
status, err := os.ReadFile(statusPath)
|
status, err := os.ReadFile(statusPath)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
slog.Debug("NVML: Can't read runtime_status", "bdf", bdf, "err", err)
|
slog.Info("NVML: Can't read runtime_status", "bdf", bdf, "err", err)
|
||||||
return true // Assume active if we can't read status
|
return true // Assume active if we can't read status
|
||||||
}
|
}
|
||||||
statusStr := strings.TrimSpace(string(status))
|
statusStr := strings.TrimSpace(string(status))
|
||||||
if statusStr != "active" && statusStr != "resuming" {
|
if statusStr != "active" && statusStr != "resuming" {
|
||||||
slog.Debug("NVML: GPU not active", "bdf", bdf, "status", statusStr)
|
slog.Info("NVML: GPU is not active", "bdf", bdf, "status", statusStr)
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -47,7 +47,6 @@ func (c *nvmlCollector) isGPUActive(bdf string) bool {
|
|||||||
if err == nil {
|
if err == nil {
|
||||||
pstateStr := strings.TrimSpace(string(pstate))
|
pstateStr := strings.TrimSpace(string(pstate))
|
||||||
if pstateStr != "D0" {
|
if pstateStr != "D0" {
|
||||||
slog.Debug("NVML: GPU not in D0 state", "bdf", bdf, "pstate", pstateStr)
|
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user