add more cpu metrics (#1356)

- adds monitoring for cpu state time and per-core usage

Co-authored-by: Sven van Ginkel <svenvanginkel@icloud.com>
This commit is contained in:
henrygd
2025-11-01 12:57:48 -04:00
parent 3dbc48727e
commit 6a562ce03b
14 changed files with 407 additions and 46 deletions

View File

@@ -3,6 +3,7 @@ package system
// TODO: this is confusing, make common package with common/types common/helpers etc
import (
"encoding/json"
"time"
"github.com/henrygd/beszel/internal/entities/container"
@@ -41,9 +42,28 @@ type Stats struct {
LoadAvg [3]float64 `json:"la,omitempty" cbor:"28,keyasint"`
Battery [2]uint8 `json:"bat,omitzero" cbor:"29,keyasint,omitzero"` // [percent, charge state, current]
MaxMem float64 `json:"mm,omitempty" cbor:"30,keyasint,omitempty"`
NetworkInterfaces map[string][4]uint64 `json:"ni,omitempty" cbor:"31,keyasint,omitempty"` // [upload bytes, download bytes, total upload, total download]
DiskIO [2]uint64 `json:"dio,omitzero" cbor:"32,keyasint,omitzero"` // [read bytes, write bytes]
MaxDiskIO [2]uint64 `json:"diom,omitzero" cbor:"-"` // [max read bytes, max write bytes]
NetworkInterfaces map[string][4]uint64 `json:"ni,omitempty" cbor:"31,keyasint,omitempty"` // [upload bytes, download bytes, total upload, total download]
DiskIO [2]uint64 `json:"dio,omitzero" cbor:"32,keyasint,omitzero"` // [read bytes, write bytes]
MaxDiskIO [2]uint64 `json:"diom,omitzero" cbor:"-"` // [max read bytes, max write bytes]
CpuBreakdown []float64 `json:"cpub,omitempty" cbor:"33,keyasint,omitempty"` // [user, system, iowait, steal, idle]
CpuCoresUsage Uint8Slice `json:"cpus,omitempty" cbor:"34,keyasint,omitempty"` // per-core busy usage [CPU0..]
}
// Uint8Slice wraps []uint8 to customize JSON encoding while keeping CBOR efficient.
// JSON: encodes as array of numbers (avoids base64 string).
// CBOR: falls back to default handling for []uint8 (byte string), keeping payload small.
type Uint8Slice []uint8
func (s Uint8Slice) MarshalJSON() ([]byte, error) {
if s == nil {
return []byte("null"), nil
}
// Convert to wider ints to force array-of-numbers encoding.
arr := make([]uint16, len(s))
for i, v := range s {
arr[i] = uint16(v)
}
return json.Marshal(arr)
}
type GPUData struct {

View File

@@ -718,7 +718,9 @@ func init() {
"type": "autodate"
}
],
"indexes": [],
"indexes": [
"CREATE INDEX ` + "`" + `idx_systems_status` + "`" + ` ON ` + "`" + `systems` + "`" + ` (` + "`" + `status` + "`" + `)"
],
"system": false
},
{

View File

@@ -177,6 +177,10 @@ func (rm *RecordManager) AverageSystemStats(db dbx.Builder, records RecordIds) *
stats := &tempStats
// necessary because uint8 is not big enough for the sum
batterySum := 0
// accumulate per-core usage across records
var cpuCoresSums []uint64
// accumulate cpu breakdown [user, system, iowait, steal, idle]
var cpuBreakdownSums []float64
count := float64(len(records))
tempCount := float64(0)
@@ -194,6 +198,15 @@ func (rm *RecordManager) AverageSystemStats(db dbx.Builder, records RecordIds) *
}
sum.Cpu += stats.Cpu
// accumulate cpu time breakdowns if present
if stats.CpuBreakdown != nil {
if len(cpuBreakdownSums) < len(stats.CpuBreakdown) {
cpuBreakdownSums = append(cpuBreakdownSums, make([]float64, len(stats.CpuBreakdown)-len(cpuBreakdownSums))...)
}
for i, v := range stats.CpuBreakdown {
cpuBreakdownSums[i] += v
}
}
sum.Mem += stats.Mem
sum.MemUsed += stats.MemUsed
sum.MemPct += stats.MemPct
@@ -217,6 +230,17 @@ func (rm *RecordManager) AverageSystemStats(db dbx.Builder, records RecordIds) *
sum.DiskIO[1] += stats.DiskIO[1]
batterySum += int(stats.Battery[0])
sum.Battery[1] = stats.Battery[1]
// accumulate per-core usage if present
if stats.CpuCoresUsage != nil {
if len(cpuCoresSums) < len(stats.CpuCoresUsage) {
// extend slices to accommodate core count
cpuCoresSums = append(cpuCoresSums, make([]uint64, len(stats.CpuCoresUsage)-len(cpuCoresSums))...)
}
for i, v := range stats.CpuCoresUsage {
cpuCoresSums[i] += uint64(v)
}
}
// Set peak values
sum.MaxCpu = max(sum.MaxCpu, stats.MaxCpu, stats.Cpu)
sum.MaxMem = max(sum.MaxMem, stats.MaxMem, stats.MemUsed)
@@ -385,6 +409,25 @@ func (rm *RecordManager) AverageSystemStats(db dbx.Builder, records RecordIds) *
sum.GPUData[id] = gpu
}
}
// Average per-core usage
if len(cpuCoresSums) > 0 {
avg := make(system.Uint8Slice, len(cpuCoresSums))
for i := range cpuCoresSums {
v := math.Round(float64(cpuCoresSums[i]) / count)
avg[i] = uint8(v)
}
sum.CpuCoresUsage = avg
}
// Average CPU breakdown
if len(cpuBreakdownSums) > 0 {
avg := make([]float64, len(cpuBreakdownSums))
for i := range cpuBreakdownSums {
avg[i] = twoDecimals(cpuBreakdownSums[i] / count)
}
sum.CpuBreakdown = avg
}
}
return sum

View File

@@ -17,6 +17,7 @@ export type DataPoint = {
dataKey: (data: SystemStatsRecord) => number | undefined
color: number | string
opacity: number
stackId?: string | number
}
export default function AreaChartDefault({
@@ -29,19 +30,23 @@ export default function AreaChartDefault({
domain,
legend,
itemSorter,
reverseStackOrder = false,
hideYAxis = false,
}: // logRender = false,
{
chartData: ChartData
max?: number
maxToggled?: boolean
tickFormatter: (value: number, index: number) => string
contentFormatter: ({ value, payload }: { value: number; payload: SystemStatsRecord }) => string
dataPoints?: DataPoint[]
domain?: [number, number]
legend?: boolean
itemSorter?: (a: any, b: any) => number
// logRender?: boolean
}) {
{
chartData: ChartData
max?: number
maxToggled?: boolean
tickFormatter: (value: number, index: number) => string
contentFormatter: ({ value, payload }: { value: number; payload: SystemStatsRecord }) => string
dataPoints?: DataPoint[]
domain?: [number, number]
legend?: boolean
itemSorter?: (a: any, b: any) => number
reverseStackOrder?: boolean
hideYAxis?: boolean
// logRender?: boolean
}) {
const { yAxisWidth, updateYAxisWidth } = useYAxisWidth()
// biome-ignore lint/correctness/useExhaustiveDependencies: ignore
@@ -56,12 +61,13 @@ export default function AreaChartDefault({
<div>
<ChartContainer
className={cn("h-full w-full absolute aspect-auto bg-card opacity-0 transition-opacity", {
"opacity-100": yAxisWidth,
"opacity-100": yAxisWidth || hideYAxis,
"ps-4": hideYAxis,
})}
>
<AreaChart accessibilityLayer data={chartData.systemStats} margin={chartMargin}>
<AreaChart reverseStackOrder={reverseStackOrder} accessibilityLayer data={chartData.systemStats} margin={hideYAxis ? { ...chartMargin, left: 5 } : chartMargin}>
<CartesianGrid vertical={false} />
<YAxis
{!hideYAxis && <YAxis
direction="ltr"
orientation={chartData.orientation}
className="tracking-tighter"
@@ -70,7 +76,7 @@ export default function AreaChartDefault({
tickFormatter={(value, index) => updateYAxisWidth(tickFormatter(value, index))}
tickLine={false}
axisLine={false}
/>
/>}
{xAxis(chartData)}
<ChartTooltip
animationEasing="ease-out"
@@ -99,6 +105,7 @@ export default function AreaChartDefault({
fillOpacity={dataPoint.opacity}
stroke={color}
isAnimationActive={false}
stackId={dataPoint.stackId}
/>
)
})}

View File

@@ -69,7 +69,7 @@ export function useContainerChartConfigs(containerData: ChartData["containerData
const hue = ((i * 360) / count) % 360
chartConfig[containerName] = {
label: containerName,
color: `hsl(${hue}, 60%, 55%)`,
color: `hsl(${hue}, var(--chart-saturation), var(--chart-lightness))`,
}
}

View File

@@ -1,5 +1,5 @@
import { t } from "@lingui/core/macro"
import { Plural, Trans, useLingui } from "@lingui/react/macro"
import { Trans, useLingui } from "@lingui/react/macro"
import { useStore } from "@nanostores/react"
import { getPagePath } from "@nanostores/router"
import { timeTicks } from "d3-time"
@@ -73,6 +73,7 @@ import { Select, SelectContent, SelectItem, SelectTrigger, SelectValue } from ".
import { Separator } from "../ui/separator"
import { Tooltip, TooltipContent, TooltipProvider, TooltipTrigger } from "../ui/tooltip"
import NetworkSheet from "./system/network-sheet"
import CpuCoresSheet from "./system/cpu-sheet"
import LineChartDefault from "../charts/line-chart"
@@ -97,8 +98,8 @@ function getTimeData(chartTime: ChartTimes, lastCreated: number) {
}
}
const buffer = chartTime === "1m" ? 400 : 20_000
const now = new Date(Date.now() + buffer)
// const buffer = chartTime === "1m" ? 400 : 20_000
const now = new Date(Date.now())
const startTime = chartTimeData[chartTime].getOffset(now)
const ticks = timeTicks(startTime, now, chartTimeData[chartTime].ticks ?? 12).map((date) => date.getTime())
const data = {
@@ -585,7 +586,12 @@ export default memo(function SystemDetail({ id }: { id: string }) {
grid={grid}
title={t`CPU Usage`}
description={t`Average system-wide CPU utilization`}
cornerEl={maxValSelect}
cornerEl={
<div className="flex gap-2">
{maxValSelect}
<CpuCoresSheet chartData={chartData} dataEmpty={dataEmpty} grid={grid} maxValues={maxValues} />
</div>
}
>
<AreaChartDefault
chartData={chartData}
@@ -1119,7 +1125,7 @@ export function ChartCard({
<CardDescription>{description}</CardDescription>
{cornerEl && <div className="py-1 grid sm:justify-end sm:absolute sm:top-3.5 sm:end-3.5">{cornerEl}</div>}
</CardHeader>
<div className={cn("ps-0 w-[calc(100%-1.5em)] relative group", legend ? "h-54 md:h-56" : "h-48 md:h-52")}>
<div className={cn("ps-0 w-[calc(100%-1.3em)] relative group", legend ? "h-54 md:h-56" : "h-48 md:h-52")}>
{
<Spinner
msg={empty ? t`Waiting for enough records to display` : undefined}

View File

@@ -0,0 +1,195 @@
import { t } from "@lingui/core/macro"
import { MoreHorizontalIcon } from "lucide-react"
import { memo, useMemo, useRef, useState } from "react"
import AreaChartDefault, { DataPoint } from "@/components/charts/area-chart"
import ChartTimeSelect from "@/components/charts/chart-time-select"
import { Button } from "@/components/ui/button"
import { Sheet, SheetContent, SheetTrigger } from "@/components/ui/sheet"
import { DialogTitle } from "@/components/ui/dialog"
import { compareSemVer, decimalString, parseSemVer, toFixedFloat } from "@/lib/utils"
import type { ChartData, SystemStatsRecord } from "@/types"
import { ChartCard } from "../system"
const minAgentVersion = parseSemVer("0.15.3")
export default memo(function CpuCoresSheet({
chartData,
dataEmpty,
grid,
maxValues,
}: {
chartData: ChartData
dataEmpty: boolean
grid: boolean
maxValues: boolean
}) {
const [cpuCoresOpen, setCpuCoresOpen] = useState(false)
const hasOpened = useRef(false)
const supportsBreakdown = useMemo(() => compareSemVer(chartData.agentVersion, minAgentVersion) >= 0, [chartData.agentVersion])
if (!supportsBreakdown) {
return null
}
if (cpuCoresOpen && !hasOpened.current) {
hasOpened.current = true
}
// Latest stats snapshot
const latest = chartData.systemStats.at(-1)?.stats
const cpus = latest?.cpus ?? []
const numCores = cpus.length
const hasBreakdown = (latest?.cpub?.length ?? 0) > 0
const breakdownDataPoints = [
{
label: t`Other`,
dataKey: ({ stats }: SystemStatsRecord) => {
const total = stats?.cpub?.reduce((acc, curr) => acc + curr, 0) ?? 0
return total > 0 ? 100 - total : null
},
color: `hsl(80, 65%, 52%)`,
opacity: 0.4,
stackId: "a"
},
{
label: "Steal",
dataKey: ({ stats }: SystemStatsRecord) => stats?.cpub?.[3],
color: 5,
opacity: 0.4,
stackId: "a"
},
{
label: "Idle",
dataKey: ({ stats }: SystemStatsRecord) => stats?.cpub?.[4],
color: 2,
opacity: 0.4,
stackId: "a"
},
{
label: "IOWait",
dataKey: ({ stats }: SystemStatsRecord) => stats?.cpub?.[2],
color: 4,
opacity: 0.4,
stackId: "a"
},
{
label: "User",
dataKey: ({ stats }: SystemStatsRecord) => stats?.cpub?.[0],
color: 1,
opacity: 0.4,
stackId: "a"
},
{
label: "System",
dataKey: ({ stats }: SystemStatsRecord) => stats?.cpub?.[1],
color: 3,
opacity: 0.4,
stackId: "a"
},
] as DataPoint[]
return (
<Sheet open={cpuCoresOpen} onOpenChange={setCpuCoresOpen}>
<DialogTitle className="sr-only">{t`CPU Usage`}</DialogTitle>
<SheetTrigger asChild>
<Button
title={t`View more`}
variant="outline"
size="icon"
className="shrink-0 max-sm:absolute max-sm:top-3 max-sm:end-3"
>
<MoreHorizontalIcon />
</Button>
</SheetTrigger>
{hasOpened.current && (
<SheetContent aria-describedby={undefined} className="overflow-auto w-200 !max-w-full p-4 sm:p-6">
<ChartTimeSelect className="w-[calc(100%-2em)] bg-card" agentVersion={chartData.agentVersion} />
{hasBreakdown && (
<ChartCard
key="cpu-breakdown"
empty={dataEmpty}
grid={grid}
title={t`CPU Time Breakdown`}
description={t`Percentage of time spent in each state`}
legend={true}
className="min-h-auto"
>
<AreaChartDefault
reverseStackOrder={true}
chartData={chartData}
maxToggled={maxValues}
legend={true}
dataPoints={breakdownDataPoints}
tickFormatter={(val) => `${toFixedFloat(val, 2)}%`}
contentFormatter={({ value }) => `${decimalString(value)}%`}
itemSorter={() => 1}
domain={[0, 100]}
/>
</ChartCard>
)}
{numCores > 0 && (
<ChartCard
key="cpu-cores-all"
empty={dataEmpty}
grid={grid}
title={t`CPU Cores`}
legend={numCores < 10}
description={t`Per-core average utilization`}
className="min-h-auto"
>
<AreaChartDefault
hideYAxis={true}
chartData={chartData}
maxToggled={maxValues}
legend={numCores < 10}
dataPoints={Array.from({ length: numCores }).map((_, i) => ({
label: `CPU ${i}`,
dataKey: ({ stats }: SystemStatsRecord) => stats?.cpus?.[i] ?? 1 / (stats?.cpus?.length ?? 1),
color: `hsl(${226 + (((i * 360) / Math.max(1, numCores)) % 360)}, var(--chart-saturation), var(--chart-lightness))`,
opacity: 0.35,
stackId: "a"
}))}
tickFormatter={(val) => `${val}%`}
contentFormatter={({ value }) => `${value}%`}
reverseStackOrder={true}
itemSorter={() => 1}
/>
</ChartCard>
)}
{Array.from({ length: numCores }).map((_, i) => (
<ChartCard
key={`cpu-core-${i}`}
empty={dataEmpty}
grid={grid}
title={`CPU ${i}`}
description={t`Per-core average utilization`}
legend={false}
className="min-h-auto"
>
<AreaChartDefault
chartData={chartData}
maxToggled={maxValues}
legend={false}
dataPoints={[
{
label: t`Usage`,
dataKey: ({ stats }: SystemStatsRecord) => stats?.cpus?.[i],
color: `hsl(${226 + (((i * 360) / Math.max(1, numCores)) % 360)}, 65%, 52%)`,
opacity: 0.35,
},
]}
tickFormatter={(val) => `${val}%`}
contentFormatter={({ value }) => `${value}%`}
/>
</ChartCard>
))}
</SheetContent>
)}
</Sheet>
)
})

View File

@@ -53,7 +53,7 @@ export default memo(function NetworkSheet({
</SheetTrigger>
{hasOpened.current && (
<SheetContent aria-describedby={undefined} className="overflow-auto w-200 !max-w-full p-4 sm:p-6">
<ChartTimeSelect className="w-[calc(100%-2em)]" agentVersion={chartData.agentVersion} />
<ChartTimeSelect className="w-[calc(100%-2em)] bg-card" agentVersion={chartData.agentVersion} />
<ChartCard
empty={dataEmpty}
grid={grid}

View File

@@ -32,6 +32,8 @@
--chart-4: hsl(280 65% 60%);
--chart-5: hsl(340 75% 55%);
--table-header: hsl(225, 6%, 97%);
--chart-saturation: 65%;
--chart-lightness: 50%;
}
.dark {
@@ -51,11 +53,13 @@
--accent: hsl(220 5% 15.5%);
--accent-foreground: hsl(220 2% 98%);
--destructive: hsl(0 62% 46%);
--border: hsl(220 3% 16%);
--border: hsl(220 3% 17%);
--input: hsl(220 4% 22%);
--ring: hsl(220 4% 80%);
--table-header: hsl(220, 6%, 13%);
--radius: 0.8rem;
--chart-saturation: 60%;
--chart-lightness: 55%;
}
@theme inline {

View File

@@ -287,7 +287,7 @@ export function formatBytes(
}
}
export const chartMargin = { top: 12 }
export const chartMargin = { top: 12, right: 5 }
/**
* Retuns value of system host, truncating full path if socket.

View File

@@ -84,6 +84,10 @@ export interface SystemStats {
cpu: number
/** peak cpu */
cpum?: number
/** cpu breakdown [user, system, iowait, steal, idle] (0-100 integers) */
cpub?: number[]
/** per-core cpu usage [CPU0..] (0-100 integers) */
cpus?: number[]
// TODO: remove these in future release in favor of la
/** load average 1 minute */
l1?: number