mirror of
https://github.com/henrygd/beszel.git
synced 2026-04-14 00:41:50 +02:00
fix: GPU ID collision between Intel and NVIDIA collectors (#1522)
- Prefix Intel GPU ID as i0 to avoid NVML/NVIDIA index IDs like 0 - Update frontend GPU engines chart to select a GPU by id instead of assuming g[0] - Adjust tests to use the new Intel GPU id
This commit is contained in:
@@ -237,10 +237,11 @@ func (gm *GPUManager) parseAmdData(output []byte) bool {
|
|||||||
totalMemory, _ := strconv.ParseFloat(v.MemoryTotal, 64)
|
totalMemory, _ := strconv.ParseFloat(v.MemoryTotal, 64)
|
||||||
usage, _ := strconv.ParseFloat(v.Usage, 64)
|
usage, _ := strconv.ParseFloat(v.Usage, 64)
|
||||||
|
|
||||||
if _, ok := gm.GpuDataMap[v.ID]; !ok {
|
id := v.ID
|
||||||
gm.GpuDataMap[v.ID] = &system.GPUData{Name: v.Name}
|
if _, ok := gm.GpuDataMap[id]; !ok {
|
||||||
|
gm.GpuDataMap[id] = &system.GPUData{Name: v.Name}
|
||||||
}
|
}
|
||||||
gpu := gm.GpuDataMap[v.ID]
|
gpu := gm.GpuDataMap[id]
|
||||||
gpu.Temperature, _ = strconv.ParseFloat(v.Temperature, 64)
|
gpu.Temperature, _ = strconv.ParseFloat(v.Temperature, 64)
|
||||||
gpu.MemoryUsed = bytesToMegabytes(memoryUsage)
|
gpu.MemoryUsed = bytesToMegabytes(memoryUsage)
|
||||||
gpu.MemoryTotal = bytesToMegabytes(totalMemory)
|
gpu.MemoryTotal = bytesToMegabytes(totalMemory)
|
||||||
|
|||||||
@@ -27,10 +27,11 @@ func (gm *GPUManager) updateIntelFromStats(sample *intelGpuStats) bool {
|
|||||||
defer gm.Unlock()
|
defer gm.Unlock()
|
||||||
|
|
||||||
// only one gpu for now - cmd doesn't provide all by default
|
// only one gpu for now - cmd doesn't provide all by default
|
||||||
gpuData, ok := gm.GpuDataMap["0"]
|
id := "i0" // prefix with i to avoid conflicts with nvidia card ids
|
||||||
|
gpuData, ok := gm.GpuDataMap[id]
|
||||||
if !ok {
|
if !ok {
|
||||||
gpuData = &system.GPUData{Name: "GPU", Engines: make(map[string]float64)}
|
gpuData = &system.GPUData{Name: "GPU", Engines: make(map[string]float64)}
|
||||||
gm.GpuDataMap["0"] = gpuData
|
gm.GpuDataMap[id] = gpuData
|
||||||
}
|
}
|
||||||
|
|
||||||
gpuData.Power += sample.PowerGPU
|
gpuData.Power += sample.PowerGPU
|
||||||
|
|||||||
@@ -1385,7 +1385,7 @@ func TestIntelUpdateFromStats(t *testing.T) {
|
|||||||
ok := gm.updateIntelFromStats(&sample1)
|
ok := gm.updateIntelFromStats(&sample1)
|
||||||
assert.True(t, ok)
|
assert.True(t, ok)
|
||||||
|
|
||||||
gpu := gm.GpuDataMap["0"]
|
gpu := gm.GpuDataMap["i0"]
|
||||||
require.NotNil(t, gpu)
|
require.NotNil(t, gpu)
|
||||||
assert.Equal(t, "GPU", gpu.Name)
|
assert.Equal(t, "GPU", gpu.Name)
|
||||||
assert.EqualValues(t, 10.5, gpu.Power)
|
assert.EqualValues(t, 10.5, gpu.Power)
|
||||||
@@ -1407,7 +1407,7 @@ func TestIntelUpdateFromStats(t *testing.T) {
|
|||||||
ok = gm.updateIntelFromStats(&sample2)
|
ok = gm.updateIntelFromStats(&sample2)
|
||||||
assert.True(t, ok)
|
assert.True(t, ok)
|
||||||
|
|
||||||
gpu = gm.GpuDataMap["0"]
|
gpu = gm.GpuDataMap["i0"]
|
||||||
require.NotNil(t, gpu)
|
require.NotNil(t, gpu)
|
||||||
assert.EqualValues(t, 10.5, gpu.Power)
|
assert.EqualValues(t, 10.5, gpu.Power)
|
||||||
assert.EqualValues(t, 30.0, gpu.Engines["Render/3D"]) // 20 + 10
|
assert.EqualValues(t, 30.0, gpu.Engines["Render/3D"]) // 20 + 10
|
||||||
@@ -1446,7 +1446,7 @@ echo "298 295 278 51 2.20 3.12 1675 942 5.75 1 2 9.50
|
|||||||
t.Fatalf("collectIntelStats error: %v", err)
|
t.Fatalf("collectIntelStats error: %v", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
gpu := gm.GpuDataMap["0"]
|
gpu := gm.GpuDataMap["i0"]
|
||||||
require.NotNil(t, gpu)
|
require.NotNil(t, gpu)
|
||||||
// Power should be sum of samples 2-4 (first is skipped): 2.0 + 1.8 + 2.2 = 6.0
|
// Power should be sum of samples 2-4 (first is skipped): 2.0 + 1.8 + 2.2 = 6.0
|
||||||
assert.EqualValues(t, 6.0, gpu.Power)
|
assert.EqualValues(t, 6.0, gpu.Power)
|
||||||
|
|||||||
@@ -409,26 +409,18 @@ export default memo(function SystemDetail({ id }: { id: string }) {
|
|||||||
if (lastGpus) {
|
if (lastGpus) {
|
||||||
// check if there are any GPUs at all
|
// check if there are any GPUs at all
|
||||||
hasGpuData = Object.keys(lastGpus).length > 0
|
hasGpuData = Object.keys(lastGpus).length > 0
|
||||||
// check if there are any GPUs with engines
|
// check if there are any GPUs with engines or power data
|
||||||
for (let i = 0; i < systemStats.length && !hasGpuEnginesData; i++) {
|
for (let i = 0; i < systemStats.length && (!hasGpuEnginesData || !hasGpuPowerData); i++) {
|
||||||
const gpus = systemStats[i].stats?.g
|
const gpus = systemStats[i].stats?.g
|
||||||
if (!gpus) continue
|
if (!gpus) continue
|
||||||
for (const id in gpus) {
|
for (const id in gpus) {
|
||||||
if (gpus[id].e !== undefined) {
|
if (!hasGpuEnginesData && gpus[id].e !== undefined) {
|
||||||
hasGpuEnginesData = true
|
hasGpuEnginesData = true
|
||||||
break
|
|
||||||
}
|
}
|
||||||
}
|
if (!hasGpuPowerData && (gpus[id].p !== undefined || gpus[id].pp !== undefined)) {
|
||||||
}
|
|
||||||
// check if there are any GPUs with power data
|
|
||||||
for (let i = 0; i < systemStats.length && !hasGpuPowerData; i++) {
|
|
||||||
const gpus = systemStats[i].stats?.g
|
|
||||||
if (!gpus) continue
|
|
||||||
for (const id in gpus) {
|
|
||||||
if (gpus[id].p !== undefined || gpus[id].pp !== undefined) {
|
|
||||||
hasGpuPowerData = true
|
hasGpuPowerData = true
|
||||||
break
|
|
||||||
}
|
}
|
||||||
|
if (hasGpuEnginesData && hasGpuPowerData) break
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -896,16 +888,30 @@ export default memo(function SystemDetail({ id }: { id: string }) {
|
|||||||
})
|
})
|
||||||
|
|
||||||
function GpuEnginesChart({ chartData }: { chartData: ChartData }) {
|
function GpuEnginesChart({ chartData }: { chartData: ChartData }) {
|
||||||
const dataPoints: DataPoint[] = []
|
const { gpuId, engines } = useMemo(() => {
|
||||||
const engines = Object.keys(chartData.systemStats?.at(-1)?.stats.g?.[0]?.e ?? {}).sort()
|
for (let i = chartData.systemStats.length - 1; i >= 0; i--) {
|
||||||
for (const engine of engines) {
|
const gpus = chartData.systemStats[i].stats?.g
|
||||||
dataPoints.push({
|
if (!gpus) continue
|
||||||
label: engine,
|
for (const id in gpus) {
|
||||||
dataKey: ({ stats }: SystemStatsRecord) => stats?.g?.[0]?.e?.[engine] ?? 0,
|
if (gpus[id].e) {
|
||||||
color: `hsl(${140 + (((engines.indexOf(engine) * 360) / engines.length) % 360)}, 65%, 52%)`,
|
return { gpuId: id, engines: Object.keys(gpus[id].e).sort() }
|
||||||
opacity: 0.35,
|
}
|
||||||
})
|
}
|
||||||
|
}
|
||||||
|
return { gpuId: null, engines: [] }
|
||||||
|
}, [chartData.systemStats])
|
||||||
|
|
||||||
|
if (!gpuId) {
|
||||||
|
return null
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const dataPoints: DataPoint[] = engines.map((engine, i) => ({
|
||||||
|
label: engine,
|
||||||
|
dataKey: ({ stats }: SystemStatsRecord) => stats?.g?.[gpuId]?.e?.[engine] ?? 0,
|
||||||
|
color: `hsl(${140 + (((i * 360) / engines.length) % 360)}, 65%, 52%)`,
|
||||||
|
opacity: 0.35,
|
||||||
|
}))
|
||||||
|
|
||||||
return (
|
return (
|
||||||
<LineChartDefault
|
<LineChartDefault
|
||||||
legend={true}
|
legend={true}
|
||||||
|
|||||||
Reference in New Issue
Block a user