diff --git a/agent/gpu.go b/agent/gpu.go
index 5ba70adb..205819f7 100644
--- a/agent/gpu.go
+++ b/agent/gpu.go
@@ -27,13 +27,10 @@ const (
nvidiaSmiInterval string = "4" // in seconds
tegraStatsInterval string = "3700" // in milliseconds
rocmSmiInterval time.Duration = 4300 * time.Millisecond
-
// Command retry and timeout constants
retryWaitTime time.Duration = 5 * time.Second
maxFailureRetries int = 5
- cmdBufferSize uint16 = 10 * 1024
-
// Unit Conversions
mebibytesInAMegabyte float64 = 1.024 // nvidia-smi reports memory in MiB
milliwattsInAWatt float64 = 1000.0 // tegrastats reports power in mW
@@ -42,10 +39,11 @@ const (
// GPUManager manages data collection for GPUs (either Nvidia or AMD)
type GPUManager struct {
sync.Mutex
- nvidiaSmi bool
- rocmSmi bool
- tegrastats bool
- GpuDataMap map[string]*system.GPUData
+ nvidiaSmi bool
+ rocmSmi bool
+ tegrastats bool
+ intelGpuStats bool
+ GpuDataMap map[string]*system.GPUData
}
// RocmSmiJson represents the JSON structure of rocm-smi output
@@ -66,6 +64,7 @@ type gpuCollector struct {
cmdArgs []string
parse func([]byte) bool // returns true if valid data was found
buf []byte
+ bufSize uint16
}
var errNoValidData = fmt.Errorf("no valid GPU data found") // Error for missing data
@@ -99,7 +98,7 @@ func (c *gpuCollector) collect() error {
scanner := bufio.NewScanner(stdout)
if c.buf == nil {
- c.buf = make([]byte, 0, cmdBufferSize)
+ c.buf = make([]byte, 0, c.bufSize)
}
scanner.Buffer(c.buf, bufio.MaxScanTokenSize)
@@ -244,20 +243,24 @@ func (gm *GPUManager) GetCurrentData() map[string]system.GPUData {
// copy / reset the data
gpuData := make(map[string]system.GPUData, len(gm.GpuDataMap))
for id, gpu := range gm.GpuDataMap {
- gpuAvg := *gpu
+ // avoid division by zero
+ count := max(gpu.Count, 1)
+ // average the data
+ gpuAvg := *gpu
gpuAvg.Temperature = twoDecimals(gpu.Temperature)
gpuAvg.MemoryUsed = twoDecimals(gpu.MemoryUsed)
gpuAvg.MemoryTotal = twoDecimals(gpu.MemoryTotal)
-
- // avoid division by zero
- if gpu.Count > 0 {
- gpuAvg.Usage = twoDecimals(gpu.Usage / gpu.Count)
- gpuAvg.Power = twoDecimals(gpu.Power / gpu.Count)
+ gpuAvg.Usage = twoDecimals(gpu.Usage / count)
+ gpuAvg.Power = twoDecimals(gpu.Power / count)
+ gpuAvg.Engines = make(map[string]float64, len(gpu.Engines))
+ for name, engine := range gpu.Engines {
+ gpuAvg.Engines[name] = twoDecimals(engine / count)
}
- // reset accumulators in the original
- gpu.Usage, gpu.Power, gpu.Count = 0, 0, 0
+ // reset accumulators in the original gpu data for next collection
+ gpu.Usage, gpu.Power, gpu.Count = gpuAvg.Usage, gpuAvg.Power, 1
+ gpu.Engines = gpuAvg.Engines
// append id to the name if there are multiple GPUs with the same name
if nameCounts[gpu.Name] > 1 {
@@ -284,18 +287,28 @@ func (gm *GPUManager) detectGPUs() error {
gm.tegrastats = true
gm.nvidiaSmi = false
}
- if gm.nvidiaSmi || gm.rocmSmi || gm.tegrastats {
+ if _, err := exec.LookPath(intelGpuStatsCmd); err == nil {
+ slog.Info("Intel GPU stats found")
+ gm.intelGpuStats = true
+ }
+ if gm.nvidiaSmi || gm.rocmSmi || gm.tegrastats || gm.intelGpuStats {
return nil
}
- return fmt.Errorf("no GPU found - install nvidia-smi, rocm-smi, or tegrastats")
+ return fmt.Errorf("no GPU found - install nvidia-smi, rocm-smi, tegrastats, or intel_gpu_top")
}
// startCollector starts the appropriate GPU data collector based on the command
func (gm *GPUManager) startCollector(command string) {
collector := gpuCollector{
- name: command,
+ name: command,
+ bufSize: 10 * 1024,
}
switch command {
+ case intelGpuStatsCmd:
+ slog.Info("Starting Intel GPU stats collector")
+ collector.cmdArgs = []string{"-s", intelGpuStatsInterval, "-J"}
+ collector.parse = gm.parseIntelData
+ go collector.start()
case nvidiaSmiCmd:
collector.cmdArgs = []string{
"-l", nvidiaSmiInterval,
@@ -344,6 +357,9 @@ func NewGPUManager() (*GPUManager, error) {
if gm.tegrastats {
gm.startCollector(tegraStatsCmd)
}
+ if gm.intelGpuStats {
+ gm.startCollector(intelGpuStatsCmd)
+ }
return &gm, nil
}
diff --git a/agent/gpu_intel.go b/agent/gpu_intel.go
new file mode 100644
index 00000000..f4df97c8
--- /dev/null
+++ b/agent/gpu_intel.go
@@ -0,0 +1,53 @@
+package agent
+
+import (
+ "encoding/json"
+ "log/slog"
+
+ "github.com/henrygd/beszel/internal/entities/system"
+)
+
+const (
+ intelGpuStatsCmd string = "intel_gpu_top"
+ intelGpuStatsInterval string = "3800" // in milliseconds
+)
+
+type intelGpuStats struct {
+ Power struct {
+ GPU float64 `json:"gpu"`
+ } `json:"power"`
+ Engines map[string]struct {
+ Busy float64 `json:"busy"`
+ } `json:"engines"`
+}
+
+func (gm *GPUManager) parseIntelData(output []byte) bool {
+ slog.Info("Parsing Intel GPU stats")
+ var intelGpuStats intelGpuStats
+ if err := json.Unmarshal(output, &intelGpuStats); err != nil {
+ slog.Error("Error parsing Intel GPU stats", "err", err)
+ return false
+ }
+ gm.Lock()
+ defer gm.Unlock()
+
+ // only one gpu for now - cmd doesn't provide all by default
+ gpuData, ok := gm.GpuDataMap["0"]
+ if !ok {
+ gpuData = &system.GPUData{Name: "GPU", Engines: make(map[string]float64, len(intelGpuStats.Engines))}
+ gm.GpuDataMap["0"] = gpuData
+ }
+
+ if intelGpuStats.Power.GPU > 0 {
+ gpuData.Power += intelGpuStats.Power.GPU
+ }
+
+ for name, engine := range intelGpuStats.Engines {
+ gpuData.Engines[name] += engine.Busy
+ }
+
+ gpuData.Count++
+
+ slog.Info("GPU Data", "gpuData", gpuData)
+ return true
+}
diff --git a/internal/entities/system/system.go b/internal/entities/system/system.go
index 47c2f0ab..7011b127 100644
--- a/internal/entities/system/system.go
+++ b/internal/entities/system/system.go
@@ -45,13 +45,14 @@ type Stats struct {
}
type GPUData struct {
- Name string `json:"n" cbor:"0,keyasint"`
- Temperature float64 `json:"-"`
- MemoryUsed float64 `json:"mu,omitempty" cbor:"1,keyasint,omitempty"`
- MemoryTotal float64 `json:"mt,omitempty" cbor:"2,keyasint,omitempty"`
- Usage float64 `json:"u" cbor:"3,keyasint"`
- Power float64 `json:"p,omitempty" cbor:"4,keyasint,omitempty"`
- Count float64 `json:"-"`
+ Name string `json:"n" cbor:"0,keyasint"`
+ Temperature float64 `json:"-"`
+ MemoryUsed float64 `json:"mu,omitempty" cbor:"1,keyasint,omitempty"`
+ MemoryTotal float64 `json:"mt,omitempty" cbor:"2,keyasint,omitempty"`
+ Usage float64 `json:"u" cbor:"3,keyasint"`
+ Power float64 `json:"p,omitempty" cbor:"4,keyasint,omitempty"`
+ Count float64 `json:"-"`
+ Engines map[string]float64 `json:"e,omitempty" cbor:"5,keyasint,omitempty"`
}
type FsStats struct {
diff --git a/internal/site/src/components/charts/hooks.ts b/internal/site/src/components/charts/hooks.ts
index a5946026..a72d9639 100644
--- a/internal/site/src/components/charts/hooks.ts
+++ b/internal/site/src/components/charts/hooks.ts
@@ -115,7 +115,7 @@ export function useNetworkInterfaces(interfaces: SystemStats["ni"]) {
data: (index = 3) => {
return sortedKeys.map((key) => ({
label: key,
- dataKey: (stats: SystemStatsRecord) => stats.stats?.ni?.[key]?.[index],
+ dataKey: ({ stats }: SystemStatsRecord) => stats?.ni?.[key]?.[index],
color: `hsl(${220 + (((sortedKeys.indexOf(key) * 360) / sortedKeys.length) % 360)}, 70%, 50%)`,
opacity: 0.3,
@@ -123,3 +123,15 @@ export function useNetworkInterfaces(interfaces: SystemStats["ni"]) {
},
}
}
+
+/** Generates chart configurations for GPU engines */
+export function useGpuEngines(systemStats?: SystemStatsRecord) {
+ const keys = Object.keys(systemStats?.stats.g?.[0]?.e ?? {})
+ const sortedKeys = keys.sort()
+ return sortedKeys.map((engine) => ({
+ label: engine,
+ dataKey: ({ stats }: SystemStatsRecord) => stats?.g?.[0]?.e?.[engine] ?? 0,
+ color: `hsl(${220 + ((sortedKeys.indexOf(engine) * 360) / sortedKeys.length) % 360}, 65%, 52%)`,
+ opacity: 0.35,
+ }))
+}
diff --git a/internal/site/src/components/routes/system.tsx b/internal/site/src/components/routes/system.tsx
index 345c7f57..0a47fadf 100644
--- a/internal/site/src/components/routes/system.tsx
+++ b/internal/site/src/components/routes/system.tsx
@@ -18,7 +18,7 @@ import AreaChartDefault from "@/components/charts/area-chart"
import ContainerChart from "@/components/charts/container-chart"
import DiskChart from "@/components/charts/disk-chart"
import GpuPowerChart from "@/components/charts/gpu-power-chart"
-import { useContainerChartConfigs } from "@/components/charts/hooks"
+import { useContainerChartConfigs, useGpuEngines } from "@/components/charts/hooks"
import LoadAverageChart from "@/components/charts/load-average-chart"
import MemChart from "@/components/charts/mem-chart"
import SwapChart from "@/components/charts/swap-chart"
@@ -61,6 +61,7 @@ import { Select, SelectContent, SelectItem, SelectTrigger, SelectValue } from ".
import { Separator } from "../ui/separator"
import { Tooltip, TooltipContent, TooltipProvider, TooltipTrigger } from "../ui/tooltip"
import NetworkSheet from "./system/network-sheet"
+import LineChartDefault from "../charts/line-chart"
type ChartTimeData = {
time: number
@@ -398,6 +399,7 @@ export default memo(function SystemDetail({ name }: { name: string }) {
const lastGpuVals = Object.values(systemStats.at(-1)?.stats.g ?? {})
const hasGpuData = lastGpuVals.length > 0
const hasGpuPowerData = lastGpuVals.some((gpu) => gpu.p !== undefined)
+ const hasGpuEnginesData = lastGpuVals.some((gpu) => gpu.e !== undefined)
let translatedStatus: string = system.status
if (system.status === SystemStatus.Up) {
@@ -770,6 +772,17 @@ export default memo(function SystemDetail({ name }: { name: string }) {
)}
+
+ {hasGpuEnginesData && (
+
+
+
+ )}
{/* GPU charts */}
@@ -897,6 +910,13 @@ export default memo(function SystemDetail({ name }: { name: string }) {
)
})
+function GpuEnginesChart({ chartData }: { chartData: ChartData }) {
+ const engineData = useGpuEngines(chartData.systemStats.at(-1))
+ return (
+ `${toFixedFloat(val, 2)}%`} contentFormatter={({ value }) => `${decimalString(value)}%`} />
+ )
+}
+
function FilterBar({ store = $containerFilter }: { store?: typeof $containerFilter }) {
const containerFilter = useStore(store)
const { t } = useLingui()
diff --git a/internal/site/src/types.d.ts b/internal/site/src/types.d.ts
index 5d8d25f7..4925ebbf 100644
--- a/internal/site/src/types.d.ts
+++ b/internal/site/src/types.d.ts
@@ -158,6 +158,8 @@ export interface GPUData {
u: number
/** power (w) */
p?: number
+ /** engines */
+ e?: Record
}
export interface ExtraFsStats {