add more cpu metrics (#1356)

- adds monitoring for cpu state time and per-core usage

Co-authored-by: Sven van Ginkel <svenvanginkel@icloud.com>
This commit is contained in:
henrygd
2025-11-01 12:57:48 -04:00
parent 3dbc48727e
commit 6a562ce03b
14 changed files with 407 additions and 46 deletions

View File

@@ -56,7 +56,7 @@ func GetBatteryStats() (batteryPercent uint8, batteryState uint8, err error) {
// if there were some errors, like missing data, skip it
continue
}
if bat.Full == 0 {
if bat == nil || bat.Full == 0 {
// skip batteries with no capacity. Charge is unlikely to ever be zero, but
// we can't guarantee that, so don't skip based on charge.
continue

View File

@@ -4,10 +4,12 @@ import (
"math"
"runtime"
"github.com/henrygd/beszel/internal/entities/system"
"github.com/shirou/gopsutil/v4/cpu"
)
var lastCpuTimes = make(map[uint16]cpu.TimesStat)
var lastPerCoreCpuTimes = make(map[uint16][]cpu.TimesStat)
// init initializes the CPU monitoring by storing the initial CPU times
// for the default 60-second cache interval.
@@ -15,23 +17,92 @@ func init() {
if times, err := cpu.Times(false); err == nil {
lastCpuTimes[60000] = times[0]
}
if perCoreTimes, err := cpu.Times(true); err == nil {
lastPerCoreCpuTimes[60000] = perCoreTimes
}
}
// getCpuPercent calculates the CPU usage percentage using cached previous measurements.
// It uses the specified cache time interval to determine the time window for calculation.
// Returns the CPU usage percentage (0-100) and any error encountered.
func getCpuPercent(cacheTimeMs uint16) (float64, error) {
// CpuMetrics contains detailed CPU usage breakdown
type CpuMetrics struct {
Total float64
User float64
System float64
Iowait float64
Steal float64
Idle float64
}
// getCpuMetrics calculates detailed CPU usage metrics using cached previous measurements.
// It returns percentages for total, user, system, iowait, and steal time.
func getCpuMetrics(cacheTimeMs uint16) (CpuMetrics, error) {
times, err := cpu.Times(false)
if err != nil || len(times) == 0 {
return 0, err
return CpuMetrics{}, err
}
// if cacheTimeMs is not in lastCpuTimes, use 60000 as fallback lastCpuTime
if _, ok := lastCpuTimes[cacheTimeMs]; !ok {
lastCpuTimes[cacheTimeMs] = lastCpuTimes[60000]
}
delta := calculateBusy(lastCpuTimes[cacheTimeMs], times[0])
t1 := lastCpuTimes[cacheTimeMs]
t2 := times[0]
t1All, _ := getAllBusy(t1)
t2All, _ := getAllBusy(t2)
totalDelta := t2All - t1All
if totalDelta <= 0 {
return CpuMetrics{}, nil
}
metrics := CpuMetrics{
Total: calculateBusy(t1, t2),
User: clampPercent((t2.User - t1.User) / totalDelta * 100),
System: clampPercent((t2.System - t1.System) / totalDelta * 100),
Iowait: clampPercent((t2.Iowait - t1.Iowait) / totalDelta * 100),
Steal: clampPercent((t2.Steal - t1.Steal) / totalDelta * 100),
Idle: clampPercent((t2.Idle - t1.Idle) / totalDelta * 100),
}
lastCpuTimes[cacheTimeMs] = times[0]
return delta, nil
return metrics, nil
}
// clampPercent ensures the percentage is between 0 and 100
func clampPercent(value float64) float64 {
return math.Min(100, math.Max(0, value))
}
// getPerCoreCpuUsage calculates per-core CPU busy usage as integer percentages (0-100).
// It uses cached previous measurements for the provided cache interval.
func getPerCoreCpuUsage(cacheTimeMs uint16) (system.Uint8Slice, error) {
perCoreTimes, err := cpu.Times(true)
if err != nil || len(perCoreTimes) == 0 {
return nil, err
}
// Initialize cache if needed
if _, ok := lastPerCoreCpuTimes[cacheTimeMs]; !ok {
lastPerCoreCpuTimes[cacheTimeMs] = lastPerCoreCpuTimes[60000]
}
lastTimes := lastPerCoreCpuTimes[cacheTimeMs]
// Limit to the number of cores available in both samples
length := len(perCoreTimes)
if len(lastTimes) < length {
length = len(lastTimes)
}
usage := make([]uint8, length)
for i := 0; i < length; i++ {
t1 := lastTimes[i]
t2 := perCoreTimes[i]
usage[i] = uint8(math.Round(calculateBusy(t1, t2)))
}
lastPerCoreCpuTimes[cacheTimeMs] = perCoreTimes
return usage, nil
}
// calculateBusy calculates the CPU busy percentage between two time points.
@@ -41,13 +112,10 @@ func calculateBusy(t1, t2 cpu.TimesStat) float64 {
t1All, t1Busy := getAllBusy(t1)
t2All, t2Busy := getAllBusy(t2)
if t2Busy <= t1Busy {
if t2All <= t1All || t2Busy <= t1Busy {
return 0
}
if t2All <= t1All {
return 100
}
return math.Min(100, math.Max(0, (t2Busy-t1Busy)/(t2All-t1All)*100))
return clampPercent((t2Busy - t1Busy) / (t2All - t1All) * 100)
}
// getAllBusy calculates the total CPU time and busy CPU time from CPU times statistics.

View File

@@ -83,12 +83,24 @@ func (a *Agent) getSystemStats(cacheTimeMs uint16) system.Stats {
systemStats.Battery[1] = batteryState
}
// cpu percent
cpuPercent, err := getCpuPercent(cacheTimeMs)
// cpu metrics
cpuMetrics, err := getCpuMetrics(cacheTimeMs)
if err == nil {
systemStats.Cpu = twoDecimals(cpuPercent)
systemStats.Cpu = twoDecimals(cpuMetrics.Total)
systemStats.CpuBreakdown = []float64{
twoDecimals(cpuMetrics.User),
twoDecimals(cpuMetrics.System),
twoDecimals(cpuMetrics.Iowait),
twoDecimals(cpuMetrics.Steal),
twoDecimals(cpuMetrics.Idle),
}
} else {
slog.Error("Error getting cpu percent", "err", err)
slog.Error("Error getting cpu metrics", "err", err)
}
// per-core cpu usage
if perCoreUsage, err := getPerCoreCpuUsage(cacheTimeMs); err == nil {
systemStats.CpuCoresUsage = perCoreUsage
}
// load average