From fe30f996952631497caf7bd5d01c198f10faf156 Mon Sep 17 00:00:00 2001 From: Sven van Ginkel Date: Fri, 31 Oct 2025 22:11:22 +0100 Subject: [PATCH] [Feature] Add detailed CPU metrics (User, System, IOWait, Steal) with per-core monitoring (#1356) * Add user, system io wait * add per cpu core * add total --- agent/cpu.go | 99 +++++++++++++++ agent/system.go | 17 ++- internal/entities/system/system.go | 7 +- internal/site/src/components/charts/hooks.ts | 22 ++++ .../site/src/components/routes/system.tsx | 36 +++++- .../routes/system/cpu-cores-sheet.tsx | 119 ++++++++++++++++++ internal/site/src/types.d.ts | 10 ++ 7 files changed, 303 insertions(+), 7 deletions(-) create mode 100644 internal/site/src/components/routes/system/cpu-cores-sheet.tsx diff --git a/agent/cpu.go b/agent/cpu.go index bd4afc21..c93b0adc 100644 --- a/agent/cpu.go +++ b/agent/cpu.go @@ -8,6 +8,7 @@ import ( ) var lastCpuTimes = make(map[uint16]cpu.TimesStat) +var lastPerCoreCpuTimes = make(map[uint16][]cpu.TimesStat) // init initializes the CPU monitoring by storing the initial CPU times // for the default 60-second cache interval. @@ -15,6 +16,18 @@ func init() { if times, err := cpu.Times(false); err == nil { lastCpuTimes[60000] = times[0] } + if perCoreTimes, err := cpu.Times(true); err == nil { + lastPerCoreCpuTimes[60000] = perCoreTimes + } +} + +// CpuMetrics contains detailed CPU usage breakdown +type CpuMetrics struct { + Total float64 + User float64 + System float64 + Iowait float64 + Steal float64 } // getCpuPercent calculates the CPU usage percentage using cached previous measurements. @@ -34,6 +47,92 @@ func getCpuPercent(cacheTimeMs uint16) (float64, error) { return delta, nil } +// getCpuMetrics calculates detailed CPU usage metrics using cached previous measurements. +// It returns percentages for total, user, system, iowait, and steal time. +func getCpuMetrics(cacheTimeMs uint16) (CpuMetrics, error) { + times, err := cpu.Times(false) + if err != nil || len(times) == 0 { + return CpuMetrics{}, err + } + // if cacheTimeMs is not in lastCpuTimes, use 60000 as fallback lastCpuTime + if _, ok := lastCpuTimes[cacheTimeMs]; !ok { + lastCpuTimes[cacheTimeMs] = lastCpuTimes[60000] + } + + t1 := lastCpuTimes[cacheTimeMs] + t2 := times[0] + + t1All, t1Busy := getAllBusy(t1) + t2All, t2Busy := getAllBusy(t2) + + totalDelta := t2All - t1All + if totalDelta <= 0 { + return CpuMetrics{}, nil + } + + metrics := CpuMetrics{ + Total: clampPercent((t2Busy - t1Busy) / totalDelta * 100), + User: clampPercent((t2.User - t1.User) / totalDelta * 100), + System: clampPercent((t2.System - t1.System) / totalDelta * 100), + Iowait: clampPercent((t2.Iowait - t1.Iowait) / totalDelta * 100), + Steal: clampPercent((t2.Steal - t1.Steal) / totalDelta * 100), + } + + lastCpuTimes[cacheTimeMs] = times[0] + return metrics, nil +} + +// clampPercent ensures the percentage is between 0 and 100 +func clampPercent(value float64) float64 { + return math.Min(100, math.Max(0, value)) +} + +// getPerCoreCpuMetrics calculates per-core CPU usage metrics. +// Returns a map where the key is "cpu0", "cpu1", etc. and the value is an array of [user, system, iowait, steal] percentages. +func getPerCoreCpuMetrics(cacheTimeMs uint16) (map[string][4]float64, error) { + perCoreTimes, err := cpu.Times(true) + if err != nil || len(perCoreTimes) == 0 { + return nil, err + } + + // Initialize cache if needed + if _, ok := lastPerCoreCpuTimes[cacheTimeMs]; !ok { + lastPerCoreCpuTimes[cacheTimeMs] = lastPerCoreCpuTimes[60000] + } + + lastTimes := lastPerCoreCpuTimes[cacheTimeMs] + result := make(map[string][4]float64) + + // Calculate metrics for each core + for i, currentTime := range perCoreTimes { + if i >= len(lastTimes) { + break + } + + t1 := lastTimes[i] + t2 := currentTime + + t1All, _ := getAllBusy(t1) + t2All, _ := getAllBusy(t2) + + totalDelta := t2All - t1All + if totalDelta <= 0 { + continue + } + + // Store as [user, system, iowait, steal] + result[currentTime.CPU] = [4]float64{ + clampPercent((t2.User - t1.User) / totalDelta * 100), + clampPercent((t2.System - t1.System) / totalDelta * 100), + clampPercent((t2.Iowait - t1.Iowait) / totalDelta * 100), + clampPercent((t2.Steal - t1.Steal) / totalDelta * 100), + } + } + + lastPerCoreCpuTimes[cacheTimeMs] = perCoreTimes + return result, nil +} + // calculateBusy calculates the CPU busy percentage between two time points. // It computes the ratio of busy time to total time elapsed between t1 and t2, // returning a percentage clamped between 0 and 100. diff --git a/agent/system.go b/agent/system.go index dad48ed2..eb0d7c93 100644 --- a/agent/system.go +++ b/agent/system.go @@ -83,12 +83,21 @@ func (a *Agent) getSystemStats(cacheTimeMs uint16) system.Stats { systemStats.Battery[1] = batteryState } - // cpu percent - cpuPercent, err := getCpuPercent(cacheTimeMs) + // cpu metrics + cpuMetrics, err := getCpuMetrics(cacheTimeMs) if err == nil { - systemStats.Cpu = twoDecimals(cpuPercent) + systemStats.Cpu = twoDecimals(cpuMetrics.Total) + systemStats.CpuUser = twoDecimals(cpuMetrics.User) + systemStats.CpuSystem = twoDecimals(cpuMetrics.System) + systemStats.CpuIowait = twoDecimals(cpuMetrics.Iowait) + systemStats.CpuSteal = twoDecimals(cpuMetrics.Steal) } else { - slog.Error("Error getting cpu percent", "err", err) + slog.Error("Error getting cpu metrics", "err", err) + } + + // per-core cpu metrics + if perCoreCpuMetrics, err := getPerCoreCpuMetrics(cacheTimeMs); err == nil && len(perCoreCpuMetrics) > 0 { + systemStats.CpuCores = perCoreCpuMetrics } // load average diff --git a/internal/entities/system/system.go b/internal/entities/system/system.go index 5177bbfb..216a7fea 100644 --- a/internal/entities/system/system.go +++ b/internal/entities/system/system.go @@ -11,7 +11,12 @@ import ( type Stats struct { Cpu float64 `json:"cpu" cbor:"0,keyasint"` MaxCpu float64 `json:"cpum,omitempty" cbor:"1,keyasint,omitempty"` - Mem float64 `json:"m" cbor:"2,keyasint"` + CpuUser float64 `json:"cpuu,omitempty" cbor:"33,keyasint,omitempty"` + CpuSystem float64 `json:"cpus,omitempty" cbor:"34,keyasint,omitempty"` + CpuIowait float64 `json:"cpui,omitempty" cbor:"35,keyasint,omitempty"` + CpuSteal float64 `json:"cpust,omitempty" cbor:"36,keyasint,omitempty"` + CpuCores map[string][4]float64 `json:"cpuc,omitempty" cbor:"37,keyasint,omitempty"` // [user, system, iowait, steal] per core + Mem float64 `json:"m" cbor:"2,keyasint"` MemUsed float64 `json:"mu" cbor:"3,keyasint"` MemPct float64 `json:"mp" cbor:"4,keyasint"` MemBuffCache float64 `json:"mb" cbor:"5,keyasint"` diff --git a/internal/site/src/components/charts/hooks.ts b/internal/site/src/components/charts/hooks.ts index 8bb75076..64d96343 100644 --- a/internal/site/src/components/charts/hooks.ts +++ b/internal/site/src/components/charts/hooks.ts @@ -118,6 +118,28 @@ export function useNetworkInterfaces(interfaces: SystemStats["ni"]) { dataKey: ({ stats }: SystemStatsRecord) => stats?.ni?.[key]?.[index], color: `hsl(${220 + (((sortedKeys.indexOf(key) * 360) / sortedKeys.length) % 360)}, 70%, 50%)`, + opacity: 0.3, + })) + }, + } +} + +// Assures consistent colors for CPU cores +export function useCpuCores(cores: SystemStats["cpuc"]) { + const keys = Object.keys(cores ?? {}) + // Sort cores by name (cpu0, cpu1, cpu2, etc.) + const sortedKeys = keys.sort((a, b) => { + const numA = Number.parseInt(a.replace("cpu", "")) + const numB = Number.parseInt(b.replace("cpu", "")) + return numA - numB + }) + return { + length: sortedKeys.length, + data: (index = 0) => { + return sortedKeys.map((key) => ({ + label: key, + dataKey: ({ stats }: SystemStatsRecord) => stats?.cpuc?.[key]?.[index], + color: `hsl(${(((sortedKeys.indexOf(key) * 360) / sortedKeys.length) % 360)}, 70%, 50%)`, opacity: 0.3, })) }, diff --git a/internal/site/src/components/routes/system.tsx b/internal/site/src/components/routes/system.tsx index 19e6a541..9c8d5030 100644 --- a/internal/site/src/components/routes/system.tsx +++ b/internal/site/src/components/routes/system.tsx @@ -73,6 +73,7 @@ import { Select, SelectContent, SelectItem, SelectTrigger, SelectValue } from ". import { Separator } from "../ui/separator" import { Tooltip, TooltipContent, TooltipProvider, TooltipTrigger } from "../ui/tooltip" import NetworkSheet from "./system/network-sheet" +import CpuCoresSheet from "./system/cpu-cores-sheet" import LineChartDefault from "../charts/line-chart" @@ -585,18 +586,49 @@ export default memo(function SystemDetail({ id }: { id: string }) { grid={grid} title={t`CPU Usage`} description={t`Average system-wide CPU utilization`} - cornerEl={maxValSelect} + cornerEl={ + <> + {maxValSelect} + + + } + legend={true} > (showMax ? stats?.cpum : stats?.cpu), color: 1, opacity: 0.4, }, + { + label: t`User`, + dataKey: ({ stats }) => stats?.cpuu, + color: 2, + opacity: 0.3, + }, + { + label: t`System`, + dataKey: ({ stats }) => stats?.cpus, + color: 3, + opacity: 0.3, + }, + { + label: t`IOWait`, + dataKey: ({ stats }) => stats?.cpui, + color: 4, + opacity: 0.3, + }, + { + label: t`Steal`, + dataKey: ({ stats }) => stats?.cpust, + color: 5, + opacity: 0.3, + }, ]} tickFormatter={(val) => `${toFixedFloat(val, 2)}%`} contentFormatter={({ value }) => `${decimalString(value)}%`} diff --git a/internal/site/src/components/routes/system/cpu-cores-sheet.tsx b/internal/site/src/components/routes/system/cpu-cores-sheet.tsx new file mode 100644 index 00000000..7fff6512 --- /dev/null +++ b/internal/site/src/components/routes/system/cpu-cores-sheet.tsx @@ -0,0 +1,119 @@ +import { t } from "@lingui/core/macro" +import { MoreHorizontalIcon } from "lucide-react" +import { memo, useRef, useState } from "react" +import AreaChartDefault from "@/components/charts/area-chart" +import ChartTimeSelect from "@/components/charts/chart-time-select" +import { Button } from "@/components/ui/button" +import { Sheet, SheetContent, SheetTrigger } from "@/components/ui/sheet" +import { DialogTitle } from "@/components/ui/dialog" +import { decimalString, toFixedFloat } from "@/lib/utils" +import type { ChartData, SystemStatsRecord } from "@/types" +import { ChartCard } from "../system" + +export default memo(function CpuCoresSheet({ + chartData, + dataEmpty, + grid, + maxValues, +}: { + chartData: ChartData + dataEmpty: boolean + grid: boolean + maxValues: boolean +}) { + const [cpuCoresOpen, setCpuCoresOpen] = useState(false) + const hasOpened = useRef(false) + + if (cpuCoresOpen && !hasOpened.current) { + hasOpened.current = true + } + + // Get list of CPU cores from the latest stats + const cpuCoresData = chartData.systemStats.at(-1)?.stats?.cpuc ?? {} + const coreNames = Object.keys(cpuCoresData).sort((a, b) => { + const numA = Number.parseInt(a.replace("cpu", "")) + const numB = Number.parseInt(b.replace("cpu", "")) + return numA - numB + }) + + if (coreNames.length === 0) { + return null + } + + return ( + + {t`Per-core CPU usage`} + + + + {hasOpened.current && ( + + + {coreNames.map((coreName) => ( + + { + const core = stats?.cpuc?.[coreName] + if (!core) return undefined + // Sum all metrics: user + system + iowait + steal + return core[0] + core[1] + core[2] + core[3] + }, + color: 1, + opacity: 0.4, + }, + { + label: t`User`, + dataKey: ({ stats }: SystemStatsRecord) => stats?.cpuc?.[coreName]?.[0], + color: 2, + opacity: 0.3, + }, + { + label: t`System`, + dataKey: ({ stats }: SystemStatsRecord) => stats?.cpuc?.[coreName]?.[1], + color: 3, + opacity: 0.3, + }, + { + label: t`IOWait`, + dataKey: ({ stats }: SystemStatsRecord) => stats?.cpuc?.[coreName]?.[2], + color: 4, + opacity: 0.3, + }, + { + label: t`Steal`, + dataKey: ({ stats }: SystemStatsRecord) => stats?.cpuc?.[coreName]?.[3], + color: 5, + opacity: 0.3, + }, + ]} + tickFormatter={(val) => `${toFixedFloat(val, 2)}%`} + contentFormatter={({ value }) => `${decimalString(value)}%`} + /> + + ))} + + )} + + ) +}) diff --git a/internal/site/src/types.d.ts b/internal/site/src/types.d.ts index efac7c17..337fda4d 100644 --- a/internal/site/src/types.d.ts +++ b/internal/site/src/types.d.ts @@ -84,6 +84,16 @@ export interface SystemStats { cpu: number /** peak cpu */ cpum?: number + /** cpu user percent */ + cpuu?: number + /** cpu system percent */ + cpus?: number + /** cpu iowait percent */ + cpui?: number + /** cpu steal percent */ + cpust?: number + /** per-core cpu metrics [user, system, iowait, steal] */ + cpuc?: Record // TODO: remove these in future release in favor of la /** load average 1 minute */ l1?: number