add nvtop integration and introduce GPU_COLLECTOR env var

This commit is contained in:
henrygd
2026-02-13 17:10:16 -05:00
parent 1f1a448aef
commit 14ecb1b069
6 changed files with 834 additions and 164 deletions

View File

@@ -32,6 +32,7 @@ func (gm *GPUManager) hasAmdSysfs() bool {
// collectAmdStats collects AMD GPU metrics directly from sysfs to avoid the overhead of rocm-smi
func (gm *GPUManager) collectAmdStats() error {
sysfsPollInterval := 3000 * time.Millisecond
cards, err := filepath.Glob("/sys/class/drm/card*")
if err != nil {
return err
@@ -70,7 +71,7 @@ func (gm *GPUManager) collectAmdStats() error {
continue
}
failures = 0
time.Sleep(rocmSmiInterval)
time.Sleep(sysfsPollInterval)
}
}