Compare commits

..

3 Commits

Author SHA1 Message Date
henrygd
fbbdd49fc2 collect top process 2025-11-03 21:50:33 -05:00
henrygd
fc0947aa04 fix windows extra disk backslash issue (#1361) 2025-11-03 17:42:08 -05:00
henrygd
1d546a4091 update nvidia dockerfile to build latest smartmontools (#1335) 2025-11-02 17:13:47 -05:00
5 changed files with 163 additions and 7 deletions

View File

@@ -2,14 +2,19 @@ package agent
import (
"math"
"path/filepath"
"runtime"
"time"
"github.com/henrygd/beszel/internal/entities/system"
"github.com/shirou/gopsutil/v4/cpu"
"github.com/shirou/gopsutil/v4/process"
)
var lastCpuTimes = make(map[uint16]cpu.TimesStat)
var lastPerCoreCpuTimes = make(map[uint16][]cpu.TimesStat)
var lastProcessCpuTimes = make(map[uint16]map[int32]float64)
var lastProcessCpuSampleTime = make(map[uint16]time.Time)
// init initializes the CPU monitoring by storing the initial CPU times
// for the default 60-second cache interval.
@@ -20,6 +25,16 @@ func init() {
if perCoreTimes, err := cpu.Times(true); err == nil {
lastPerCoreCpuTimes[60000] = perCoreTimes
}
if processes, err := process.Processes(); err == nil {
snapshot := make(map[int32]float64, len(processes))
for _, proc := range processes {
if times, err := proc.Times(); err == nil {
snapshot[proc.Pid] = times.Total()
}
}
lastProcessCpuTimes[60000] = snapshot
lastProcessCpuSampleTime[60000] = time.Now()
}
}
// CpuMetrics contains detailed CPU usage breakdown
@@ -105,6 +120,110 @@ func getPerCoreCpuUsage(cacheTimeMs uint16) (system.Uint8Slice, error) {
return usage, nil
}
// getTopCpuProcess returns the process with the highest CPU usage since the last run
// for the given cache interval. It returns nil if insufficient data is available.
func getTopCpuProcess(cacheTimeMs uint16) (*system.TopCpuProcess, error) {
processes, err := process.Processes()
if err != nil {
return nil, err
}
now := time.Now()
lastTimes, ok := lastProcessCpuTimes[cacheTimeMs]
if !ok {
if fallback := lastProcessCpuTimes[60000]; fallback != nil {
copied := make(map[int32]float64, len(fallback))
for pid, total := range fallback {
copied[pid] = total
}
lastTimes = copied
lastProcessCpuTimes[cacheTimeMs] = copied
} else {
lastTimes = make(map[int32]float64)
lastProcessCpuTimes[cacheTimeMs] = lastTimes
}
}
lastSample := lastProcessCpuSampleTime[cacheTimeMs]
if lastSample.IsZero() {
if fallback := lastProcessCpuSampleTime[60000]; !fallback.IsZero() {
lastSample = fallback
lastProcessCpuSampleTime[cacheTimeMs] = fallback
}
}
elapsed := now.Sub(lastSample).Seconds()
if lastSample.IsZero() || elapsed <= 0 {
snapshot := make(map[int32]float64, len(processes))
for _, proc := range processes {
if times, err := proc.Times(); err == nil {
snapshot[proc.Pid] = times.Total()
}
}
lastProcessCpuTimes[cacheTimeMs] = snapshot
lastProcessCpuSampleTime[cacheTimeMs] = now
return nil, nil
}
cpuCount := float64(runtime.NumCPU())
if cpuCount <= 0 {
cpuCount = 1
}
snapshot := make(map[int32]float64, len(processes))
var topName string
var topPercent float64
for _, proc := range processes {
times, err := proc.Times()
if err != nil {
continue
}
total := times.Total()
pid := proc.Pid
snapshot[pid] = total
lastTotal, ok := lastTimes[pid]
if !ok || total <= lastTotal {
continue
}
percent := clampPercent((total - lastTotal) / (elapsed * cpuCount) * 100)
if percent <= 0 {
continue
}
name, err := proc.Name()
if err != nil || name == "" {
if exe, exeErr := proc.Exe(); exeErr == nil && exe != "" {
name = filepath.Base(exe)
}
}
if name == "" {
continue
}
if percent > topPercent {
topPercent = percent
topName = name
}
}
lastProcessCpuTimes[cacheTimeMs] = snapshot
lastProcessCpuSampleTime[cacheTimeMs] = now
if topName == "" {
return nil, nil
}
return &system.TopCpuProcess{
Name: topName,
Percent: topPercent,
}, nil
}
// calculateBusy calculates the CPU busy percentage between two time points.
// It computes the ratio of busy time to total time elapsed between t1 and t2,
// returning a percentage clamped between 0 and 100.

View File

@@ -31,6 +31,7 @@ func (a *Agent) initializeDiskInfo() {
filesystem, _ := GetEnv("FILESYSTEM")
efPath := "/extra-filesystems"
hasRoot := false
isWindows := runtime.GOOS == "windows"
partitions, err := disk.Partitions(false)
if err != nil {
@@ -38,6 +39,13 @@ func (a *Agent) initializeDiskInfo() {
}
slog.Debug("Disk", "partitions", partitions)
// trim trailing backslash for Windows devices (#1361)
if isWindows {
for i, p := range partitions {
partitions[i].Device = strings.TrimSuffix(p.Device, "\\")
}
}
// ioContext := context.WithValue(a.sensorsContext,
// common.EnvKey, common.EnvMap{common.HostProcEnvKey: "/tmp/testproc"},
// )
@@ -52,7 +60,7 @@ func (a *Agent) initializeDiskInfo() {
// Helper function to add a filesystem to fsStats if it doesn't exist
addFsStat := func(device, mountpoint string, root bool, customName ...string) {
var key string
if runtime.GOOS == "windows" {
if isWindows {
key = device
} else {
key = filepath.Base(device)

View File

@@ -98,6 +98,15 @@ func (a *Agent) getSystemStats(cacheTimeMs uint16) system.Stats {
slog.Error("Error getting cpu metrics", "err", err)
}
if topProcess, err := getTopCpuProcess(cacheTimeMs); err == nil {
if topProcess != nil {
topProcess.Percent = twoDecimals(topProcess.Percent)
systemStats.TopCpuProcess = topProcess
}
} else {
slog.Error("Error getting top cpu process", "err", err)
}
// per-core cpu usage
if perCoreUsage, err := getPerCoreCpuUsage(cacheTimeMs); err == nil {
systemStats.CpuCoresUsage = perCoreUsage

View File

@@ -2,7 +2,6 @@ FROM --platform=$BUILDPLATFORM golang:alpine AS builder
WORKDIR /app
COPY ../go.mod ../go.sum ./
RUN go mod download
@@ -13,7 +12,24 @@ COPY . ./
ARG TARGETOS TARGETARCH
RUN CGO_ENABLED=0 GOGC=75 GOOS=$TARGETOS GOARCH=$TARGETARCH go build -ldflags "-w -s" -o /agent ./internal/cmd/agent
RUN rm -rf /tmp/*
# --------------------------
# Smartmontools builder stage
# --------------------------
FROM nvidia/cuda:12.2.2-base-ubuntu22.04 AS smartmontools-builder
RUN apt-get update && apt-get install -y \
wget \
build-essential \
&& wget https://downloads.sourceforge.net/project/smartmontools/smartmontools/7.5/smartmontools-7.5.tar.gz \
&& tar zxvf smartmontools-7.5.tar.gz \
&& cd smartmontools-7.5 \
&& ./configure --prefix=/usr --sysconfdir=/etc \
&& make \
&& make install \
&& rm -rf /smartmontools-7.5* \
&& apt-get remove -y wget build-essential \
&& apt-get autoremove -y \
&& rm -rf /var/lib/apt/lists/*
# --------------------------
# Final image: GPU-enabled agent with nvidia-smi
@@ -21,10 +37,8 @@ RUN rm -rf /tmp/*
FROM nvidia/cuda:12.2.2-base-ubuntu22.04
COPY --from=builder /agent /agent
# this is so we don't need to create the /tmp directory in the scratch container
COPY --from=builder /tmp /tmp
RUN apt-get update && apt-get install -y smartmontools && rm -rf /var/lib/apt/lists/*
# Copy smartmontools binaries and config files
COPY --from=smartmontools-builder /usr/sbin/smartctl /usr/sbin/smartctl
# Ensure data persistence across container recreations
VOLUME ["/var/lib/beszel-agent"]

View File

@@ -47,6 +47,7 @@ type Stats struct {
MaxDiskIO [2]uint64 `json:"diom,omitzero" cbor:"-"` // [max read bytes, max write bytes]
CpuBreakdown []float64 `json:"cpub,omitempty" cbor:"33,keyasint,omitempty"` // [user, system, iowait, steal, idle]
CpuCoresUsage Uint8Slice `json:"cpus,omitempty" cbor:"34,keyasint,omitempty"` // per-core busy usage [CPU0..]
TopCpuProcess *TopCpuProcess `json:"tcp,omitempty" cbor:"35,keyasint,omitempty"`
}
// Uint8Slice wraps []uint8 to customize JSON encoding while keeping CBOR efficient.
@@ -153,3 +154,8 @@ type CombinedData struct {
Info Info `json:"info" cbor:"1,keyasint"`
Containers []*container.Stats `json:"container" cbor:"2,keyasint"`
}
type TopCpuProcess struct {
Name string `json:"n" cbor:"0,keyasint"`
Percent float64 `json:"p" cbor:"1,keyasint"`
}