Compare commits

...

28 Commits

Author SHA1 Message Date
henrygd
e4e0affbc1 test(hub): add additional tests for all system alerts 2026-03-17 18:48:54 -04:00
henrygd
c3a0e645ee refactor: variable renaming in alerts package 2026-03-17 18:44:46 -04:00
henrygd
c6c3950fb0 refactor: add alertsCache to maintain active alert data in memory 2026-03-17 18:32:57 -04:00
henrygd
48ddc96a0d systemd: allow timer monitoring with SERVICE_PATTERNS (#1820) 2026-03-17 15:11:44 -04:00
henrygd
704cb86de8 refactor: change ExpiryMap.store to be a pointer 2026-03-16 17:44:45 -04:00
henrygd
2854ce882f fix(ui): centralize default layout width and update default setting 2026-03-16 15:23:32 -04:00
henrygd
ed50367f70 fix(agent): add fallback for podman container health (#1475) 2026-03-15 17:59:59 -04:00
henrygd
4ebe869591 ui: virtualize smart table 2026-03-15 15:20:07 -04:00
henrygd
c9bbbe91f2 ui: improve table col widths and hide text showing above header 2026-03-15 14:59:25 -04:00
henrygd
5bfe4f6970 agent: include ip in container port if not 0.0.0.0 or :: 2026-03-15 14:58:21 -04:00
henrygd
380d2b1091 add ports column to containers table (#1481) 2026-03-14 19:29:39 -04:00
henrygd
a7f99e7a8c agent: support new Docker API Health field (#1475) 2026-03-14 15:26:44 -04:00
henrygd
bd94a9d142 agent: improve disk discovery / IO mapping and add tests (#1811) 2026-03-13 16:03:27 -04:00
henrygd
8e2316f845 refactor: simplify/improve status alert handling (#1519)
also adds new functionality to restore any pending down alerts
that were lost by hub restart before creation
2026-03-12 15:53:40 -04:00
Sven van Ginkel
0d3dfcb207 fix(hub): check if status alert is triggered before sending up alert (#1806) 2026-03-12 13:38:42 -04:00
henrygd
b386ce5190 hub: add ExpiryMap.UpdateExpiration and sync SMART fetch intervals (#1800)
- Update smartFetchMap expiration when agent smart interval changes
- Prevent background SMART fetching before initial system details are
loaded
- Add buffer to SMART fetch timing check
- Get rid of unnecessary pointers in expirymap
2026-03-11 16:25:52 -04:00
henrygd
e527534016 ensure deprecated system fields are migrated to newer structures
also removes refs to legacy load avg fields (l1, l5, l15) that were
around for a very short period
2026-03-10 18:46:57 -04:00
Victor Eduardo
ec7ad632a9 fix: Use historical records to average disk usage for extra disk alerts (#1801)
- Introduced a new test file `alerts_disk_test.go` to validate the behavior of disk alerts using historical data for extra filesystems.
- Enhanced the `HandleSystemAlerts` function to correctly calculate disk usage for extra filesystems based on historical records.
- Updated the `SystemAlertStats` struct to include `ExtraFs` for tracking additional filesystem statistics.
2026-03-09 18:32:35 -04:00
VACInc
963fce5a33 agent: mark mdraid rebuild as warning, not failed (#1797) 2026-03-09 17:54:53 -04:00
Sven van Ginkel
d38c0da06d fix: bypass NIC auto-filter when interface is explicitly whitelisted via NICS (#1805)
Co-authored-by: henrygd <hank@henrygd.me>
2026-03-09 17:47:59 -04:00
henrygd
cae6ac4626 update go version to 1.26.1 2026-03-09 16:10:38 -04:00
henrygd
6b1ff264f2 gpu(amd): add workaround for misreported sysfs filesize (#1799) 2026-03-09 14:53:52 -04:00
henrygd
35d0e792ad refactor(expirymap): optimize performance and add StopCleaner method 2026-03-08 19:09:41 -04:00
henrygd
654cd06b19 respect SMART_INTERVAL across agent reconnects (#1800)
Move tracking of the last SMART data fetch from individual System
instances to the SystemManager using a TTL-based ExpiryMap.

This ensures that the SMART_INTERVAL is respected even if an
agent connection is dropped and re-established, preventing
redundant data collection on every reconnect.
2026-03-08 19:03:50 -04:00
henrygd
5e1b028130 refactor(smart): improve perf by skipping ata_device_statistics parsing if unnecessary 2026-03-08 15:19:50 -04:00
henrygd
638e7dc12a fix(smart): handle negative ATA device statistics values (#1791) 2026-03-08 13:34:16 -04:00
henrygd
73c262455d refactor(agent): move GetEnv to utils package 2026-03-07 14:12:17 -05:00
henrygd
0c4d2edd45 refactor(agent): add utils package; rm utils.go and fs_utils.go 2026-03-07 13:50:49 -05:00
67 changed files with 4342 additions and 1006 deletions

View File

@@ -6,7 +6,6 @@ package agent
import ( import (
"log/slog" "log/slog"
"os"
"strings" "strings"
"sync" "sync"
"time" "time"
@@ -14,6 +13,7 @@ import (
"github.com/gliderlabs/ssh" "github.com/gliderlabs/ssh"
"github.com/henrygd/beszel" "github.com/henrygd/beszel"
"github.com/henrygd/beszel/agent/deltatracker" "github.com/henrygd/beszel/agent/deltatracker"
"github.com/henrygd/beszel/agent/utils"
"github.com/henrygd/beszel/internal/common" "github.com/henrygd/beszel/internal/common"
"github.com/henrygd/beszel/internal/entities/system" "github.com/henrygd/beszel/internal/entities/system"
gossh "golang.org/x/crypto/ssh" gossh "golang.org/x/crypto/ssh"
@@ -68,11 +68,11 @@ func NewAgent(dataDir ...string) (agent *Agent, err error) {
slog.Info("Data directory", "path", agent.dataDir) slog.Info("Data directory", "path", agent.dataDir)
} }
agent.memCalc, _ = GetEnv("MEM_CALC") agent.memCalc, _ = utils.GetEnv("MEM_CALC")
agent.sensorConfig = agent.newSensorConfig() agent.sensorConfig = agent.newSensorConfig()
// Parse disk usage cache duration (e.g., "15m", "1h") to avoid waking sleeping disks // Parse disk usage cache duration (e.g., "15m", "1h") to avoid waking sleeping disks
if diskUsageCache, exists := GetEnv("DISK_USAGE_CACHE"); exists { if diskUsageCache, exists := utils.GetEnv("DISK_USAGE_CACHE"); exists {
if duration, err := time.ParseDuration(diskUsageCache); err == nil { if duration, err := time.ParseDuration(diskUsageCache); err == nil {
agent.diskUsageCacheDuration = duration agent.diskUsageCacheDuration = duration
slog.Info("DISK_USAGE_CACHE", "duration", duration) slog.Info("DISK_USAGE_CACHE", "duration", duration)
@@ -82,7 +82,7 @@ func NewAgent(dataDir ...string) (agent *Agent, err error) {
} }
// Set up slog with a log level determined by the LOG_LEVEL env var // Set up slog with a log level determined by the LOG_LEVEL env var
if logLevelStr, exists := GetEnv("LOG_LEVEL"); exists { if logLevelStr, exists := utils.GetEnv("LOG_LEVEL"); exists {
switch strings.ToLower(logLevelStr) { switch strings.ToLower(logLevelStr) {
case "debug": case "debug":
agent.debug = true agent.debug = true
@@ -103,7 +103,7 @@ func NewAgent(dataDir ...string) (agent *Agent, err error) {
agent.refreshSystemDetails() agent.refreshSystemDetails()
// SMART_INTERVAL env var to update smart data at this interval // SMART_INTERVAL env var to update smart data at this interval
if smartIntervalEnv, exists := GetEnv("SMART_INTERVAL"); exists { if smartIntervalEnv, exists := utils.GetEnv("SMART_INTERVAL"); exists {
if duration, err := time.ParseDuration(smartIntervalEnv); err == nil && duration > 0 { if duration, err := time.ParseDuration(smartIntervalEnv); err == nil && duration > 0 {
agent.systemDetails.SmartInterval = duration agent.systemDetails.SmartInterval = duration
slog.Info("SMART_INTERVAL", "duration", duration) slog.Info("SMART_INTERVAL", "duration", duration)
@@ -148,15 +148,6 @@ func NewAgent(dataDir ...string) (agent *Agent, err error) {
return agent, nil return agent, nil
} }
// GetEnv retrieves an environment variable with a "BESZEL_AGENT_" prefix, or falls back to the unprefixed key.
func GetEnv(key string) (value string, exists bool) {
if value, exists = os.LookupEnv("BESZEL_AGENT_" + key); exists {
return value, exists
}
// Fallback to the old unprefixed key
return os.LookupEnv(key)
}
func (a *Agent) gatherStats(options common.DataRequestOptions) *system.CombinedData { func (a *Agent) gatherStats(options common.DataRequestOptions) *system.CombinedData {
a.Lock() a.Lock()
defer a.Unlock() defer a.Unlock()
@@ -213,7 +204,7 @@ func (a *Agent) gatherStats(options common.DataRequestOptions) *system.CombinedD
data.Stats.ExtraFs[key] = stats data.Stats.ExtraFs[key] = stats
// Add percentages to Info struct for dashboard // Add percentages to Info struct for dashboard
if stats.DiskTotal > 0 { if stats.DiskTotal > 0 {
pct := twoDecimals((stats.DiskUsed / stats.DiskTotal) * 100) pct := utils.TwoDecimals((stats.DiskUsed / stats.DiskTotal) * 100)
data.Info.ExtraFsPct[key] = pct data.Info.ExtraFsPct[key] = pct
} }
} }

View File

@@ -14,6 +14,7 @@ import (
"time" "time"
"github.com/henrygd/beszel" "github.com/henrygd/beszel"
"github.com/henrygd/beszel/agent/utils"
"github.com/henrygd/beszel/internal/common" "github.com/henrygd/beszel/internal/common"
"github.com/fxamacker/cbor/v2" "github.com/fxamacker/cbor/v2"
@@ -43,7 +44,7 @@ type WebSocketClient struct {
// newWebSocketClient creates a new WebSocket client for the given agent. // newWebSocketClient creates a new WebSocket client for the given agent.
// It reads configuration from environment variables and validates the hub URL. // It reads configuration from environment variables and validates the hub URL.
func newWebSocketClient(agent *Agent) (client *WebSocketClient, err error) { func newWebSocketClient(agent *Agent) (client *WebSocketClient, err error) {
hubURLStr, exists := GetEnv("HUB_URL") hubURLStr, exists := utils.GetEnv("HUB_URL")
if !exists { if !exists {
return nil, errors.New("HUB_URL environment variable not set") return nil, errors.New("HUB_URL environment variable not set")
} }
@@ -72,12 +73,12 @@ func newWebSocketClient(agent *Agent) (client *WebSocketClient, err error) {
// If neither is set, it returns an error. // If neither is set, it returns an error.
func getToken() (string, error) { func getToken() (string, error) {
// get token from env var // get token from env var
token, _ := GetEnv("TOKEN") token, _ := utils.GetEnv("TOKEN")
if token != "" { if token != "" {
return token, nil return token, nil
} }
// get token from file // get token from file
tokenFile, _ := GetEnv("TOKEN_FILE") tokenFile, _ := utils.GetEnv("TOKEN_FILE")
if tokenFile == "" { if tokenFile == "" {
return "", errors.New("must set TOKEN or TOKEN_FILE") return "", errors.New("must set TOKEN or TOKEN_FILE")
} }
@@ -197,7 +198,7 @@ func (client *WebSocketClient) handleAuthChallenge(msg *common.HubRequest[cbor.R
} }
if authRequest.NeedSysInfo { if authRequest.NeedSysInfo {
response.Name, _ = GetEnv("SYSTEM_NAME") response.Name, _ = utils.GetEnv("SYSTEM_NAME")
response.Hostname = client.agent.systemDetails.Hostname response.Hostname = client.agent.systemDetails.Hostname
serverAddr := client.agent.connectionManager.serverOptions.Addr serverAddr := client.agent.connectionManager.serverOptions.Addr
_, response.Port, _ = net.SplitHostPort(serverAddr) _, response.Port, _ = net.SplitHostPort(serverAddr)

View File

@@ -6,6 +6,8 @@ import (
"os" "os"
"path/filepath" "path/filepath"
"runtime" "runtime"
"github.com/henrygd/beszel/agent/utils"
) )
// GetDataDir returns the path to the data directory for the agent and an error // GetDataDir returns the path to the data directory for the agent and an error
@@ -16,7 +18,7 @@ func GetDataDir(dataDirs ...string) (string, error) {
return testDataDirs(dataDirs) return testDataDirs(dataDirs)
} }
dataDir, _ := GetEnv("DATA_DIR") dataDir, _ := utils.GetEnv("DATA_DIR")
if dataDir != "" { if dataDir != "" {
dataDirs = append(dataDirs, dataDir) dataDirs = append(dataDirs, dataDir)
} }

View File

@@ -8,11 +8,31 @@ import (
"strings" "strings"
"time" "time"
"github.com/henrygd/beszel/agent/utils"
"github.com/henrygd/beszel/internal/entities/system" "github.com/henrygd/beszel/internal/entities/system"
"github.com/shirou/gopsutil/v4/disk" "github.com/shirou/gopsutil/v4/disk"
) )
// fsRegistrationContext holds the shared lookup state needed to resolve a
// filesystem into the tracked fsStats key and metadata.
type fsRegistrationContext struct {
filesystem string // value of optional FILESYSTEM env var
isWindows bool
efPath string // path to extra filesystems (default "/extra-filesystems")
diskIoCounters map[string]disk.IOCountersStat
}
// diskDiscovery groups the transient state for a single initializeDiskInfo run so
// helper methods can share the same partitions, mount paths, and lookup functions
type diskDiscovery struct {
agent *Agent
rootMountPoint string
partitions []disk.PartitionStat
usageFn func(string) (*disk.UsageStat, error)
ctx fsRegistrationContext
}
// parseFilesystemEntry parses a filesystem entry in the format "device__customname" // parseFilesystemEntry parses a filesystem entry in the format "device__customname"
// Returns the device/filesystem part and the custom name part // Returns the device/filesystem part and the custom name part
func parseFilesystemEntry(entry string) (device, customName string) { func parseFilesystemEntry(entry string) (device, customName string) {
@@ -26,19 +46,230 @@ func parseFilesystemEntry(entry string) (device, customName string) {
return device, customName return device, customName
} }
// extraFilesystemPartitionInfo derives the I/O device and optional display name
// for a mounted /extra-filesystems partition. Prefer the partition device reported
// by the system and only use the folder name for custom naming metadata.
func extraFilesystemPartitionInfo(p disk.PartitionStat) (device, customName string) {
device = strings.TrimSpace(p.Device)
folderDevice, customName := parseFilesystemEntry(filepath.Base(p.Mountpoint))
if device == "" {
device = folderDevice
}
return device, customName
}
func isDockerSpecialMountpoint(mountpoint string) bool { func isDockerSpecialMountpoint(mountpoint string) bool {
switch mountpoint { switch mountpoint {
case "/etc/hosts", "/etc/resolv.conf", "/etc/hostname": case "/etc/hosts", "/etc/resolv.conf", "/etc/hostname":
return true return true
default: }
return false
}
// registerFilesystemStats resolves the tracked key and stats payload for a
// filesystem before it is inserted into fsStats.
func registerFilesystemStats(existing map[string]*system.FsStats, device, mountpoint string, root bool, customName string, ctx fsRegistrationContext) (string, *system.FsStats, bool) {
key := device
if !ctx.isWindows {
key = filepath.Base(device)
}
if root {
// Try to map root device to a diskIoCounters entry. First checks for an
// exact key match, then uses findIoDevice for normalized / prefix-based
// matching (e.g. nda0p2 -> nda0), and finally falls back to FILESYSTEM.
if _, ioMatch := ctx.diskIoCounters[key]; !ioMatch {
if matchedKey, match := findIoDevice(key, ctx.diskIoCounters); match {
key = matchedKey
} else if ctx.filesystem != "" {
if matchedKey, match := findIoDevice(ctx.filesystem, ctx.diskIoCounters); match {
key = matchedKey
}
}
if _, ioMatch = ctx.diskIoCounters[key]; !ioMatch {
slog.Warn("Root I/O unmapped; set FILESYSTEM", "device", device, "mountpoint", mountpoint)
}
}
} else {
// Check if non-root has diskstats and prefer the folder device for
// /extra-filesystems mounts when the discovered partition device is a
// mapper path (e.g. luks UUID) that obscures the underlying block device.
if _, ioMatch := ctx.diskIoCounters[key]; !ioMatch {
if strings.HasPrefix(mountpoint, ctx.efPath) {
folderDevice, _ := parseFilesystemEntry(filepath.Base(mountpoint))
if folderDevice != "" {
if matchedKey, match := findIoDevice(folderDevice, ctx.diskIoCounters); match {
key = matchedKey
}
}
}
if _, ioMatch = ctx.diskIoCounters[key]; !ioMatch {
if matchedKey, match := findIoDevice(key, ctx.diskIoCounters); match {
key = matchedKey
}
}
}
}
if _, exists := existing[key]; exists {
return "", nil, false
}
fsStats := &system.FsStats{Root: root, Mountpoint: mountpoint}
if customName != "" {
fsStats.Name = customName
}
return key, fsStats, true
}
// addFsStat inserts a discovered filesystem if it resolves to a new tracking
// key. The key selection itself lives in buildFsStatRegistration so that logic
// can stay directly unit-tested.
func (d *diskDiscovery) addFsStat(device, mountpoint string, root bool, customName string) {
key, fsStats, ok := registerFilesystemStats(d.agent.fsStats, device, mountpoint, root, customName, d.ctx)
if !ok {
return
}
d.agent.fsStats[key] = fsStats
name := key
if customName != "" {
name = customName
}
slog.Info("Detected disk", "name", name, "device", device, "mount", mountpoint, "io", key, "root", root)
}
// addConfiguredRootFs resolves FILESYSTEM against partitions first, then falls
// back to direct diskstats matching for setups like ZFS where partitions do not
// expose the physical device name.
func (d *diskDiscovery) addConfiguredRootFs() bool {
if d.ctx.filesystem == "" {
return false return false
} }
for _, p := range d.partitions {
if filesystemMatchesPartitionSetting(d.ctx.filesystem, p) {
d.addFsStat(p.Device, p.Mountpoint, true, "")
return true
}
}
// FILESYSTEM may name a physical disk absent from partitions (e.g. ZFS lists
// dataset paths like zroot/ROOT/default, not block devices).
if ioKey, match := findIoDevice(d.ctx.filesystem, d.ctx.diskIoCounters); match {
d.agent.fsStats[ioKey] = &system.FsStats{Root: true, Mountpoint: d.rootMountPoint}
return true
}
slog.Warn("Partition details not found", "filesystem", d.ctx.filesystem)
return false
}
func isRootFallbackPartition(p disk.PartitionStat, rootMountPoint string) bool {
return p.Mountpoint == rootMountPoint ||
(isDockerSpecialMountpoint(p.Mountpoint) && strings.HasPrefix(p.Device, "/dev"))
}
// addPartitionRootFs handles the non-configured root fallback path when a
// partition looks like the active root mount but still needs translating to an
// I/O device key.
func (d *diskDiscovery) addPartitionRootFs(device, mountpoint string) bool {
fs, match := findIoDevice(filepath.Base(device), d.ctx.diskIoCounters)
if !match {
return false
}
// The resolved I/O device is already known here, so use it directly to avoid
// a second fallback search inside buildFsStatRegistration.
d.addFsStat(fs, mountpoint, true, "")
return true
}
// addLastResortRootFs is only used when neither FILESYSTEM nor partition-based
// heuristics can identify root, so it picks the busiest I/O device as a final
// fallback and preserves the root mountpoint for usage collection.
func (d *diskDiscovery) addLastResortRootFs() {
rootKey := mostActiveIoDevice(d.ctx.diskIoCounters)
if rootKey != "" {
slog.Warn("Using most active device for root I/O; set FILESYSTEM to override", "device", rootKey)
} else {
rootKey = filepath.Base(d.rootMountPoint)
if _, exists := d.agent.fsStats[rootKey]; exists {
rootKey = "root"
}
slog.Warn("Root I/O device not detected; set FILESYSTEM to override")
}
d.agent.fsStats[rootKey] = &system.FsStats{Root: true, Mountpoint: d.rootMountPoint}
}
// findPartitionByFilesystemSetting matches an EXTRA_FILESYSTEMS entry against a
// discovered partition either by mountpoint or by device suffix.
func findPartitionByFilesystemSetting(filesystem string, partitions []disk.PartitionStat) (disk.PartitionStat, bool) {
for _, p := range partitions {
if strings.HasSuffix(p.Device, filesystem) || p.Mountpoint == filesystem {
return p, true
}
}
return disk.PartitionStat{}, false
}
// addConfiguredExtraFsEntry resolves one EXTRA_FILESYSTEMS entry, preferring a
// discovered partition and falling back to any path that disk.Usage accepts.
func (d *diskDiscovery) addConfiguredExtraFsEntry(filesystem, customName string) {
if p, found := findPartitionByFilesystemSetting(filesystem, d.partitions); found {
d.addFsStat(p.Device, p.Mountpoint, false, customName)
return
}
if _, err := d.usageFn(filesystem); err == nil {
d.addFsStat(filepath.Base(filesystem), filesystem, false, customName)
return
} else {
slog.Error("Invalid filesystem", "name", filesystem, "err", err)
}
}
// addConfiguredExtraFilesystems parses and registers the comma-separated
// EXTRA_FILESYSTEMS env var entries.
func (d *diskDiscovery) addConfiguredExtraFilesystems(extraFilesystems string) {
for fsEntry := range strings.SplitSeq(extraFilesystems, ",") {
filesystem, customName := parseFilesystemEntry(fsEntry)
d.addConfiguredExtraFsEntry(filesystem, customName)
}
}
// addPartitionExtraFs registers partitions mounted under /extra-filesystems so
// their display names can come from the folder name while their I/O keys still
// prefer the underlying partition device.
func (d *diskDiscovery) addPartitionExtraFs(p disk.PartitionStat) {
if !strings.HasPrefix(p.Mountpoint, d.ctx.efPath) {
return
}
device, customName := extraFilesystemPartitionInfo(p)
d.addFsStat(device, p.Mountpoint, false, customName)
}
// addExtraFilesystemFolders handles bare directories under /extra-filesystems
// that may not appear in partition discovery, while skipping mountpoints that
// were already registered from higher-fidelity sources.
func (d *diskDiscovery) addExtraFilesystemFolders(folderNames []string) {
existingMountpoints := make(map[string]bool, len(d.agent.fsStats))
for _, stats := range d.agent.fsStats {
existingMountpoints[stats.Mountpoint] = true
}
for _, folderName := range folderNames {
mountpoint := filepath.Join(d.ctx.efPath, folderName)
slog.Debug("/extra-filesystems", "mountpoint", mountpoint)
if existingMountpoints[mountpoint] {
continue
}
device, customName := parseFilesystemEntry(folderName)
d.addFsStat(device, mountpoint, false, customName)
}
} }
// Sets up the filesystems to monitor for disk usage and I/O. // Sets up the filesystems to monitor for disk usage and I/O.
func (a *Agent) initializeDiskInfo() { func (a *Agent) initializeDiskInfo() {
filesystem, _ := GetEnv("FILESYSTEM") filesystem, _ := utils.GetEnv("FILESYSTEM")
efPath := "/extra-filesystems"
hasRoot := false hasRoot := false
isWindows := runtime.GOOS == "windows" isWindows := runtime.GOOS == "windows"
@@ -55,167 +286,57 @@ func (a *Agent) initializeDiskInfo() {
} }
} }
// ioContext := context.WithValue(a.sensorsContext,
// common.EnvKey, common.EnvMap{common.HostProcEnvKey: "/tmp/testproc"},
// )
// diskIoCounters, err := disk.IOCountersWithContext(ioContext)
diskIoCounters, err := disk.IOCounters() diskIoCounters, err := disk.IOCounters()
if err != nil { if err != nil {
slog.Error("Error getting diskstats", "err", err) slog.Error("Error getting diskstats", "err", err)
} }
slog.Debug("Disk I/O", "diskstats", diskIoCounters) slog.Debug("Disk I/O", "diskstats", diskIoCounters)
ctx := fsRegistrationContext{
// Helper function to add a filesystem to fsStats if it doesn't exist filesystem: filesystem,
addFsStat := func(device, mountpoint string, root bool, customName ...string) { isWindows: isWindows,
var key string diskIoCounters: diskIoCounters,
if isWindows { efPath: "/extra-filesystems",
key = device
} else {
key = filepath.Base(device)
}
var ioMatch bool
if _, exists := a.fsStats[key]; !exists {
if root {
slog.Info("Detected root device", "name", key)
// Try to map root device to a diskIoCounters entry. First
// checks for an exact key match, then uses findIoDevice for
// normalized / prefix-based matching (e.g. nda0p2 → nda0),
// and finally falls back to the FILESYSTEM env var.
if _, ioMatch = diskIoCounters[key]; !ioMatch {
if matchedKey, match := findIoDevice(key, diskIoCounters); match {
key = matchedKey
ioMatch = true
} else if filesystem != "" {
if matchedKey, match := findIoDevice(filesystem, diskIoCounters); match {
key = matchedKey
ioMatch = true
}
}
if !ioMatch {
slog.Warn("Root I/O unmapped; set FILESYSTEM", "device", device, "mountpoint", mountpoint)
}
}
} else {
// Check if non-root has diskstats and fall back to folder name if not
// Scenario: device is encrypted and named luks-2bcb02be-999d-4417-8d18-5c61e660fb6e - not in /proc/diskstats.
// However, the device can be specified by mounting folder from luks device at /extra-filesystems/sda1
if _, ioMatch = diskIoCounters[key]; !ioMatch {
efBase := filepath.Base(mountpoint)
if _, ioMatch = diskIoCounters[efBase]; ioMatch {
key = efBase
}
}
}
fsStats := &system.FsStats{Root: root, Mountpoint: mountpoint}
if len(customName) > 0 && customName[0] != "" {
fsStats.Name = customName[0]
}
a.fsStats[key] = fsStats
}
} }
// Get the appropriate root mount point for this system // Get the appropriate root mount point for this system
rootMountPoint := a.getRootMountPoint() discovery := diskDiscovery{
agent: a,
// Use FILESYSTEM env var to find root filesystem rootMountPoint: a.getRootMountPoint(),
if filesystem != "" { partitions: partitions,
for _, p := range partitions { usageFn: disk.Usage,
if filesystemMatchesPartitionSetting(filesystem, p) { ctx: ctx,
addFsStat(p.Device, p.Mountpoint, true)
hasRoot = true
break
}
}
if !hasRoot {
// FILESYSTEM may name a physical disk absent from partitions (e.g.
// ZFS lists dataset paths like zroot/ROOT/default, not block devices).
// Try matching directly against diskIoCounters.
if ioKey, match := findIoDevice(filesystem, diskIoCounters); match {
a.fsStats[ioKey] = &system.FsStats{Root: true, Mountpoint: rootMountPoint}
hasRoot = true
} else {
slog.Warn("Partition details not found", "filesystem", filesystem)
}
}
} }
// Add EXTRA_FILESYSTEMS env var values to fsStats hasRoot = discovery.addConfiguredRootFs()
if extraFilesystems, exists := GetEnv("EXTRA_FILESYSTEMS"); exists {
for fsEntry := range strings.SplitSeq(extraFilesystems, ",") {
// Parse custom name from format: device__customname
fs, customName := parseFilesystemEntry(fsEntry)
found := false // Add EXTRA_FILESYSTEMS env var values to fsStats
for _, p := range partitions { if extraFilesystems, exists := utils.GetEnv("EXTRA_FILESYSTEMS"); exists {
if strings.HasSuffix(p.Device, fs) || p.Mountpoint == fs { discovery.addConfiguredExtraFilesystems(extraFilesystems)
addFsStat(p.Device, p.Mountpoint, false, customName)
found = true
break
}
}
// if not in partitions, test if we can get disk usage
if !found {
if _, err := disk.Usage(fs); err == nil {
addFsStat(filepath.Base(fs), fs, false, customName)
} else {
slog.Error("Invalid filesystem", "name", fs, "err", err)
}
}
}
} }
// Process partitions for various mount points // Process partitions for various mount points
for _, p := range partitions { for _, p := range partitions {
// fmt.Println(p.Device, p.Mountpoint) if !hasRoot && isRootFallbackPartition(p, discovery.rootMountPoint) {
// Binary root fallback or docker root fallback hasRoot = discovery.addPartitionRootFs(p.Device, p.Mountpoint)
if !hasRoot && (p.Mountpoint == rootMountPoint || (isDockerSpecialMountpoint(p.Mountpoint) && strings.HasPrefix(p.Device, "/dev"))) {
fs, match := findIoDevice(filepath.Base(p.Device), diskIoCounters)
if match {
addFsStat(fs, p.Mountpoint, true)
hasRoot = true
}
}
// Check if device is in /extra-filesystems
if strings.HasPrefix(p.Mountpoint, efPath) {
device, customName := parseFilesystemEntry(p.Mountpoint)
addFsStat(device, p.Mountpoint, false, customName)
} }
discovery.addPartitionExtraFs(p)
} }
// Check all folders in /extra-filesystems and add them if not already present // Check all folders in /extra-filesystems and add them if not already present
if folders, err := os.ReadDir(efPath); err == nil { if folders, err := os.ReadDir(discovery.ctx.efPath); err == nil {
existingMountpoints := make(map[string]bool) folderNames := make([]string, 0, len(folders))
for _, stats := range a.fsStats {
existingMountpoints[stats.Mountpoint] = true
}
for _, folder := range folders { for _, folder := range folders {
if folder.IsDir() { if folder.IsDir() {
mountpoint := filepath.Join(efPath, folder.Name()) folderNames = append(folderNames, folder.Name())
slog.Debug("/extra-filesystems", "mountpoint", mountpoint)
if !existingMountpoints[mountpoint] {
device, customName := parseFilesystemEntry(folder.Name())
addFsStat(device, mountpoint, false, customName)
}
} }
} }
discovery.addExtraFilesystemFolders(folderNames)
} }
// If no root filesystem set, try the most active I/O device as a last // If no root filesystem set, try the most active I/O device as a last
// resort (e.g. ZFS where dataset names are unrelated to disk names). // resort (e.g. ZFS where dataset names are unrelated to disk names).
if !hasRoot { if !hasRoot {
rootKey := mostActiveIoDevice(diskIoCounters) discovery.addLastResortRootFs()
if rootKey != "" {
slog.Warn("Using most active device for root I/O; set FILESYSTEM to override", "device", rootKey)
} else {
rootKey = filepath.Base(rootMountPoint)
if _, exists := a.fsStats[rootKey]; exists {
rootKey = "root"
}
slog.Warn("Root I/O device not detected; set FILESYSTEM to override")
}
a.fsStats[rootKey] = &system.FsStats{Root: true, Mountpoint: rootMountPoint}
} }
a.pruneDuplicateRootExtraFilesystems() a.pruneDuplicateRootExtraFilesystems()
@@ -380,6 +501,8 @@ func normalizeDeviceName(value string) string {
// Sets start values for disk I/O stats. // Sets start values for disk I/O stats.
func (a *Agent) initializeDiskIoStats(diskIoCounters map[string]disk.IOCountersStat) { func (a *Agent) initializeDiskIoStats(diskIoCounters map[string]disk.IOCountersStat) {
a.fsNames = a.fsNames[:0]
now := time.Now()
for device, stats := range a.fsStats { for device, stats := range a.fsStats {
// skip if not in diskIoCounters // skip if not in diskIoCounters
d, exists := diskIoCounters[device] d, exists := diskIoCounters[device]
@@ -388,7 +511,7 @@ func (a *Agent) initializeDiskIoStats(diskIoCounters map[string]disk.IOCountersS
continue continue
} }
// populate initial values // populate initial values
stats.Time = time.Now() stats.Time = now
stats.TotalRead = d.ReadBytes stats.TotalRead = d.ReadBytes
stats.TotalWrite = d.WriteBytes stats.TotalWrite = d.WriteBytes
// add to list of valid io device names // add to list of valid io device names
@@ -412,12 +535,12 @@ func (a *Agent) updateDiskUsage(systemStats *system.Stats) {
continue continue
} }
if d, err := disk.Usage(stats.Mountpoint); err == nil { if d, err := disk.Usage(stats.Mountpoint); err == nil {
stats.DiskTotal = bytesToGigabytes(d.Total) stats.DiskTotal = utils.BytesToGigabytes(d.Total)
stats.DiskUsed = bytesToGigabytes(d.Used) stats.DiskUsed = utils.BytesToGigabytes(d.Used)
if stats.Root { if stats.Root {
systemStats.DiskTotal = bytesToGigabytes(d.Total) systemStats.DiskTotal = utils.BytesToGigabytes(d.Total)
systemStats.DiskUsed = bytesToGigabytes(d.Used) systemStats.DiskUsed = utils.BytesToGigabytes(d.Used)
systemStats.DiskPct = twoDecimals(d.UsedPercent) systemStats.DiskPct = utils.TwoDecimals(d.UsedPercent)
} }
} else { } else {
// reset stats if error (likely unmounted) // reset stats if error (likely unmounted)
@@ -470,8 +593,8 @@ func (a *Agent) updateDiskIo(cacheTimeMs uint16, systemStats *system.Stats) {
diskIORead := (d.ReadBytes - prev.readBytes) * 1000 / msElapsed diskIORead := (d.ReadBytes - prev.readBytes) * 1000 / msElapsed
diskIOWrite := (d.WriteBytes - prev.writeBytes) * 1000 / msElapsed diskIOWrite := (d.WriteBytes - prev.writeBytes) * 1000 / msElapsed
readMbPerSecond := bytesToMegabytes(float64(diskIORead)) readMbPerSecond := utils.BytesToMegabytes(float64(diskIORead))
writeMbPerSecond := bytesToMegabytes(float64(diskIOWrite)) writeMbPerSecond := utils.BytesToMegabytes(float64(diskIOWrite))
// validate values // validate values
if readMbPerSecond > 50_000 || writeMbPerSecond > 50_000 { if readMbPerSecond > 50_000 || writeMbPerSecond > 50_000 {

View File

@@ -93,6 +93,443 @@ func TestParseFilesystemEntry(t *testing.T) {
} }
} }
func TestExtraFilesystemPartitionInfo(t *testing.T) {
t.Run("uses partition device for label-only mountpoint", func(t *testing.T) {
device, customName := extraFilesystemPartitionInfo(disk.PartitionStat{
Device: "/dev/sdc",
Mountpoint: "/extra-filesystems/Share",
})
assert.Equal(t, "/dev/sdc", device)
assert.Equal(t, "", customName)
})
t.Run("uses custom name from mountpoint suffix", func(t *testing.T) {
device, customName := extraFilesystemPartitionInfo(disk.PartitionStat{
Device: "/dev/sdc",
Mountpoint: "/extra-filesystems/sdc__Share",
})
assert.Equal(t, "/dev/sdc", device)
assert.Equal(t, "Share", customName)
})
t.Run("falls back to folder device when partition device is unavailable", func(t *testing.T) {
device, customName := extraFilesystemPartitionInfo(disk.PartitionStat{
Mountpoint: "/extra-filesystems/sdc__Share",
})
assert.Equal(t, "sdc", device)
assert.Equal(t, "Share", customName)
})
t.Run("supports custom name without folder device prefix", func(t *testing.T) {
device, customName := extraFilesystemPartitionInfo(disk.PartitionStat{
Device: "/dev/sdc",
Mountpoint: "/extra-filesystems/__Share",
})
assert.Equal(t, "/dev/sdc", device)
assert.Equal(t, "Share", customName)
})
}
func TestBuildFsStatRegistration(t *testing.T) {
t.Run("uses basename for non-windows exact io match", func(t *testing.T) {
key, stats, ok := registerFilesystemStats(
map[string]*system.FsStats{},
"/dev/sda1",
"/mnt/data",
false,
"archive",
fsRegistrationContext{
isWindows: false,
diskIoCounters: map[string]disk.IOCountersStat{
"sda1": {Name: "sda1"},
},
},
)
assert.True(t, ok)
assert.Equal(t, "sda1", key)
assert.Equal(t, "/mnt/data", stats.Mountpoint)
assert.Equal(t, "archive", stats.Name)
assert.False(t, stats.Root)
})
t.Run("maps root partition to io device by prefix", func(t *testing.T) {
key, stats, ok := registerFilesystemStats(
map[string]*system.FsStats{},
"/dev/ada0p2",
"/",
true,
"",
fsRegistrationContext{
isWindows: false,
diskIoCounters: map[string]disk.IOCountersStat{
"ada0": {Name: "ada0", ReadBytes: 1000, WriteBytes: 1000},
},
},
)
assert.True(t, ok)
assert.Equal(t, "ada0", key)
assert.True(t, stats.Root)
assert.Equal(t, "/", stats.Mountpoint)
})
t.Run("uses filesystem setting as root fallback", func(t *testing.T) {
key, _, ok := registerFilesystemStats(
map[string]*system.FsStats{},
"overlay",
"/",
true,
"",
fsRegistrationContext{
filesystem: "nvme0n1p2",
isWindows: false,
diskIoCounters: map[string]disk.IOCountersStat{
"nvme0n1": {Name: "nvme0n1", ReadBytes: 1000, WriteBytes: 1000},
},
},
)
assert.True(t, ok)
assert.Equal(t, "nvme0n1", key)
})
t.Run("prefers parsed extra-filesystems device over mapper device", func(t *testing.T) {
key, stats, ok := registerFilesystemStats(
map[string]*system.FsStats{},
"/dev/mapper/luks-2bcb02be-999d-4417-8d18-5c61e660fb6e",
"/extra-filesystems/nvme0n1p2__Archive",
false,
"Archive",
fsRegistrationContext{
isWindows: false,
diskIoCounters: map[string]disk.IOCountersStat{
"dm-1": {Name: "dm-1", Label: "luks-2bcb02be-999d-4417-8d18-5c61e660fb6e"},
"nvme0n1p2": {Name: "nvme0n1p2"},
},
},
)
assert.True(t, ok)
assert.Equal(t, "nvme0n1p2", key)
assert.Equal(t, "Archive", stats.Name)
})
t.Run("falls back to mapper io device when folder device cannot be resolved", func(t *testing.T) {
key, stats, ok := registerFilesystemStats(
map[string]*system.FsStats{},
"/dev/mapper/luks-2bcb02be-999d-4417-8d18-5c61e660fb6e",
"/extra-filesystems/Archive",
false,
"Archive",
fsRegistrationContext{
isWindows: false,
diskIoCounters: map[string]disk.IOCountersStat{
"dm-1": {Name: "dm-1", Label: "luks-2bcb02be-999d-4417-8d18-5c61e660fb6e"},
},
},
)
assert.True(t, ok)
assert.Equal(t, "dm-1", key)
assert.Equal(t, "Archive", stats.Name)
})
t.Run("uses full device name on windows", func(t *testing.T) {
key, _, ok := registerFilesystemStats(
map[string]*system.FsStats{},
`C:`,
`C:\\`,
false,
"",
fsRegistrationContext{
isWindows: true,
diskIoCounters: map[string]disk.IOCountersStat{
`C:`: {Name: `C:`},
},
},
)
assert.True(t, ok)
assert.Equal(t, `C:`, key)
})
t.Run("skips existing key", func(t *testing.T) {
key, stats, ok := registerFilesystemStats(
map[string]*system.FsStats{"sda1": {Mountpoint: "/existing"}},
"/dev/sda1",
"/mnt/data",
false,
"",
fsRegistrationContext{
isWindows: false,
diskIoCounters: map[string]disk.IOCountersStat{
"sda1": {Name: "sda1"},
},
},
)
assert.False(t, ok)
assert.Empty(t, key)
assert.Nil(t, stats)
})
}
func TestAddConfiguredRootFs(t *testing.T) {
t.Run("adds root from matching partition", func(t *testing.T) {
agent := &Agent{fsStats: make(map[string]*system.FsStats)}
discovery := diskDiscovery{
agent: agent,
rootMountPoint: "/",
partitions: []disk.PartitionStat{{Device: "/dev/ada0p2", Mountpoint: "/"}},
ctx: fsRegistrationContext{
filesystem: "/dev/ada0p2",
isWindows: false,
diskIoCounters: map[string]disk.IOCountersStat{
"ada0": {Name: "ada0", ReadBytes: 1000, WriteBytes: 1000},
},
},
}
ok := discovery.addConfiguredRootFs()
assert.True(t, ok)
stats, exists := agent.fsStats["ada0"]
assert.True(t, exists)
assert.True(t, stats.Root)
assert.Equal(t, "/", stats.Mountpoint)
})
t.Run("adds root from io device when partition is missing", func(t *testing.T) {
agent := &Agent{fsStats: make(map[string]*system.FsStats)}
discovery := diskDiscovery{
agent: agent,
rootMountPoint: "/sysroot",
ctx: fsRegistrationContext{
filesystem: "zroot",
isWindows: false,
diskIoCounters: map[string]disk.IOCountersStat{
"nda0": {Name: "nda0", Label: "zroot", ReadBytes: 1000, WriteBytes: 1000},
},
},
}
ok := discovery.addConfiguredRootFs()
assert.True(t, ok)
stats, exists := agent.fsStats["nda0"]
assert.True(t, exists)
assert.True(t, stats.Root)
assert.Equal(t, "/sysroot", stats.Mountpoint)
})
t.Run("returns false when filesystem cannot be resolved", func(t *testing.T) {
agent := &Agent{fsStats: make(map[string]*system.FsStats)}
discovery := diskDiscovery{
agent: agent,
rootMountPoint: "/",
ctx: fsRegistrationContext{
filesystem: "missing-disk",
isWindows: false,
diskIoCounters: map[string]disk.IOCountersStat{},
},
}
ok := discovery.addConfiguredRootFs()
assert.False(t, ok)
assert.Empty(t, agent.fsStats)
})
}
func TestAddPartitionRootFs(t *testing.T) {
t.Run("adds root from fallback partition candidate", func(t *testing.T) {
agent := &Agent{fsStats: make(map[string]*system.FsStats)}
discovery := diskDiscovery{
agent: agent,
ctx: fsRegistrationContext{
isWindows: false,
diskIoCounters: map[string]disk.IOCountersStat{
"nvme0n1": {Name: "nvme0n1", ReadBytes: 1000, WriteBytes: 1000},
},
},
}
ok := discovery.addPartitionRootFs("/dev/nvme0n1p2", "/")
assert.True(t, ok)
stats, exists := agent.fsStats["nvme0n1"]
assert.True(t, exists)
assert.True(t, stats.Root)
assert.Equal(t, "/", stats.Mountpoint)
})
t.Run("returns false when no io device matches", func(t *testing.T) {
agent := &Agent{fsStats: make(map[string]*system.FsStats)}
discovery := diskDiscovery{agent: agent, ctx: fsRegistrationContext{diskIoCounters: map[string]disk.IOCountersStat{}}}
ok := discovery.addPartitionRootFs("/dev/mapper/root", "/")
assert.False(t, ok)
assert.Empty(t, agent.fsStats)
})
}
func TestAddLastResortRootFs(t *testing.T) {
t.Run("uses most active io device when available", func(t *testing.T) {
agent := &Agent{fsStats: make(map[string]*system.FsStats)}
discovery := diskDiscovery{agent: agent, rootMountPoint: "/", ctx: fsRegistrationContext{diskIoCounters: map[string]disk.IOCountersStat{
"sda": {Name: "sda", ReadBytes: 5000, WriteBytes: 5000},
"sdb": {Name: "sdb", ReadBytes: 1000, WriteBytes: 1000},
}}}
discovery.addLastResortRootFs()
stats, exists := agent.fsStats["sda"]
assert.True(t, exists)
assert.True(t, stats.Root)
})
t.Run("falls back to root key when mountpoint basename collides", func(t *testing.T) {
agent := &Agent{fsStats: map[string]*system.FsStats{
"sysroot": {Mountpoint: "/extra-filesystems/sysroot"},
}}
discovery := diskDiscovery{agent: agent, rootMountPoint: "/sysroot", ctx: fsRegistrationContext{diskIoCounters: map[string]disk.IOCountersStat{}}}
discovery.addLastResortRootFs()
stats, exists := agent.fsStats["root"]
assert.True(t, exists)
assert.True(t, stats.Root)
assert.Equal(t, "/sysroot", stats.Mountpoint)
})
}
func TestAddConfiguredExtraFsEntry(t *testing.T) {
t.Run("uses matching partition when present", func(t *testing.T) {
agent := &Agent{fsStats: make(map[string]*system.FsStats)}
discovery := diskDiscovery{
agent: agent,
partitions: []disk.PartitionStat{{Device: "/dev/sdb1", Mountpoint: "/mnt/backup"}},
usageFn: func(string) (*disk.UsageStat, error) {
t.Fatal("usage fallback should not be called when partition matches")
return nil, nil
},
ctx: fsRegistrationContext{
isWindows: false,
diskIoCounters: map[string]disk.IOCountersStat{
"sdb1": {Name: "sdb1"},
},
},
}
discovery.addConfiguredExtraFsEntry("sdb1", "backup")
stats, exists := agent.fsStats["sdb1"]
assert.True(t, exists)
assert.Equal(t, "/mnt/backup", stats.Mountpoint)
assert.Equal(t, "backup", stats.Name)
})
t.Run("falls back to usage-validated path", func(t *testing.T) {
agent := &Agent{fsStats: make(map[string]*system.FsStats)}
discovery := diskDiscovery{
agent: agent,
usageFn: func(path string) (*disk.UsageStat, error) {
assert.Equal(t, "/srv/archive", path)
return &disk.UsageStat{}, nil
},
ctx: fsRegistrationContext{
isWindows: false,
diskIoCounters: map[string]disk.IOCountersStat{
"archive": {Name: "archive"},
},
},
}
discovery.addConfiguredExtraFsEntry("/srv/archive", "archive")
stats, exists := agent.fsStats["archive"]
assert.True(t, exists)
assert.Equal(t, "/srv/archive", stats.Mountpoint)
assert.Equal(t, "archive", stats.Name)
})
t.Run("ignores invalid filesystem entry", func(t *testing.T) {
agent := &Agent{fsStats: make(map[string]*system.FsStats)}
discovery := diskDiscovery{
agent: agent,
usageFn: func(string) (*disk.UsageStat, error) {
return nil, os.ErrNotExist
},
}
discovery.addConfiguredExtraFsEntry("/missing/archive", "")
assert.Empty(t, agent.fsStats)
})
}
func TestAddConfiguredExtraFilesystems(t *testing.T) {
t.Run("parses and registers multiple configured filesystems", func(t *testing.T) {
agent := &Agent{fsStats: make(map[string]*system.FsStats)}
discovery := diskDiscovery{
agent: agent,
partitions: []disk.PartitionStat{{Device: "/dev/sda1", Mountpoint: "/mnt/fast"}},
usageFn: func(path string) (*disk.UsageStat, error) {
if path == "/srv/archive" {
return &disk.UsageStat{}, nil
}
return nil, os.ErrNotExist
},
ctx: fsRegistrationContext{
isWindows: false,
diskIoCounters: map[string]disk.IOCountersStat{
"sda1": {Name: "sda1"},
"archive": {Name: "archive"},
},
},
}
discovery.addConfiguredExtraFilesystems("sda1__fast,/srv/archive__cold")
assert.Contains(t, agent.fsStats, "sda1")
assert.Equal(t, "fast", agent.fsStats["sda1"].Name)
assert.Contains(t, agent.fsStats, "archive")
assert.Equal(t, "cold", agent.fsStats["archive"].Name)
})
}
func TestAddExtraFilesystemFolders(t *testing.T) {
t.Run("adds missing folders and skips existing mountpoints", func(t *testing.T) {
agent := &Agent{fsStats: map[string]*system.FsStats{
"existing": {Mountpoint: "/extra-filesystems/existing"},
}}
discovery := diskDiscovery{
agent: agent,
ctx: fsRegistrationContext{
isWindows: false,
efPath: "/extra-filesystems",
diskIoCounters: map[string]disk.IOCountersStat{
"newdisk": {Name: "newdisk"},
},
},
}
discovery.addExtraFilesystemFolders([]string{"existing", "newdisk__Archive"})
assert.Len(t, agent.fsStats, 2)
stats, exists := agent.fsStats["newdisk"]
assert.True(t, exists)
assert.Equal(t, "/extra-filesystems/newdisk__Archive", stats.Mountpoint)
assert.Equal(t, "Archive", stats.Name)
})
}
func TestFindIoDevice(t *testing.T) { func TestFindIoDevice(t *testing.T) {
t.Run("matches by device name", func(t *testing.T) { t.Run("matches by device name", func(t *testing.T) {
ioCounters := map[string]disk.IOCountersStat{ ioCounters := map[string]disk.IOCountersStat{
@@ -310,7 +747,7 @@ func TestInitializeDiskInfoWithCustomNames(t *testing.T) {
// Test the parsing logic by calling the relevant part // Test the parsing logic by calling the relevant part
// We'll create a simplified version to test just the parsing // We'll create a simplified version to test just the parsing
extraFilesystems := tc.envValue extraFilesystems := tc.envValue
for _, fsEntry := range strings.Split(extraFilesystems, ",") { for fsEntry := range strings.SplitSeq(extraFilesystems, ",") {
// Parse the entry // Parse the entry
fsEntry = strings.TrimSpace(fsEntry) fsEntry = strings.TrimSpace(fsEntry)
var fs, customName string var fs, customName string
@@ -506,3 +943,33 @@ func TestHasSameDiskUsage(t *testing.T) {
assert.False(t, hasSameDiskUsage(&disk.UsageStat{Total: 0, Used: 0}, &disk.UsageStat{Total: 1, Used: 1})) assert.False(t, hasSameDiskUsage(&disk.UsageStat{Total: 0, Used: 0}, &disk.UsageStat{Total: 1, Used: 1}))
}) })
} }
func TestInitializeDiskIoStatsResetsTrackedDevices(t *testing.T) {
agent := &Agent{
fsStats: map[string]*system.FsStats{
"sda": {},
"sdb": {},
},
fsNames: []string{"stale", "sda"},
}
agent.initializeDiskIoStats(map[string]disk.IOCountersStat{
"sda": {Name: "sda", ReadBytes: 10, WriteBytes: 20},
"sdb": {Name: "sdb", ReadBytes: 30, WriteBytes: 40},
})
assert.ElementsMatch(t, []string{"sda", "sdb"}, agent.fsNames)
assert.Len(t, agent.fsNames, 2)
assert.Equal(t, uint64(10), agent.fsStats["sda"].TotalRead)
assert.Equal(t, uint64(20), agent.fsStats["sda"].TotalWrite)
assert.False(t, agent.fsStats["sda"].Time.IsZero())
assert.False(t, agent.fsStats["sdb"].Time.IsZero())
agent.initializeDiskIoStats(map[string]disk.IOCountersStat{
"sdb": {Name: "sdb", ReadBytes: 50, WriteBytes: 60},
})
assert.Equal(t, []string{"sdb"}, agent.fsNames)
assert.Equal(t, uint64(50), agent.fsStats["sdb"].TotalRead)
assert.Equal(t, uint64(60), agent.fsStats["sdb"].TotalWrite)
}

View File

@@ -16,11 +16,14 @@ import (
"os" "os"
"path" "path"
"regexp" "regexp"
"sort"
"strconv"
"strings" "strings"
"sync" "sync"
"time" "time"
"github.com/henrygd/beszel/agent/deltatracker" "github.com/henrygd/beszel/agent/deltatracker"
"github.com/henrygd/beszel/agent/utils"
"github.com/henrygd/beszel/internal/entities/container" "github.com/henrygd/beszel/internal/entities/container"
"github.com/blang/semver" "github.com/blang/semver"
@@ -336,15 +339,48 @@ func validateCpuPercentage(cpuPct float64, containerName string) error {
// updateContainerStatsValues updates the final stats values // updateContainerStatsValues updates the final stats values
func updateContainerStatsValues(stats *container.Stats, cpuPct float64, usedMemory uint64, sent_delta, recv_delta uint64, readTime time.Time) { func updateContainerStatsValues(stats *container.Stats, cpuPct float64, usedMemory uint64, sent_delta, recv_delta uint64, readTime time.Time) {
stats.Cpu = twoDecimals(cpuPct) stats.Cpu = utils.TwoDecimals(cpuPct)
stats.Mem = bytesToMegabytes(float64(usedMemory)) stats.Mem = utils.BytesToMegabytes(float64(usedMemory))
stats.Bandwidth = [2]uint64{sent_delta, recv_delta} stats.Bandwidth = [2]uint64{sent_delta, recv_delta}
// TODO(0.19+): stop populating NetworkSent/NetworkRecv (deprecated in 0.18.3) // TODO(0.19+): stop populating NetworkSent/NetworkRecv (deprecated in 0.18.3)
stats.NetworkSent = bytesToMegabytes(float64(sent_delta)) stats.NetworkSent = utils.BytesToMegabytes(float64(sent_delta))
stats.NetworkRecv = bytesToMegabytes(float64(recv_delta)) stats.NetworkRecv = utils.BytesToMegabytes(float64(recv_delta))
stats.PrevReadTime = readTime stats.PrevReadTime = readTime
} }
// convertContainerPortsToString formats the ports of a container into a sorted, deduplicated string.
// ctr.Ports is nilled out after processing so the slice is not accidentally reused.
func convertContainerPortsToString(ctr *container.ApiInfo) string {
if len(ctr.Ports) == 0 {
return ""
}
sort.Slice(ctr.Ports, func(i, j int) bool {
return ctr.Ports[i].PublicPort < ctr.Ports[j].PublicPort
})
var builder strings.Builder
seenPorts := make(map[uint16]struct{})
for _, p := range ctr.Ports {
_, ok := seenPorts[p.PublicPort]
if p.PublicPort == 0 || ok {
continue
}
seenPorts[p.PublicPort] = struct{}{}
if builder.Len() > 0 {
builder.WriteString(", ")
}
switch p.IP {
case "0.0.0.0", "::":
default:
builder.WriteString(p.IP)
builder.WriteByte(':')
}
builder.WriteString(strconv.Itoa(int(p.PublicPort)))
}
// clear ports slice so it doesn't get reused and blend into next response
ctr.Ports = nil
return builder.String()
}
func parseDockerStatus(status string) (string, container.DockerHealth) { func parseDockerStatus(status string) (string, container.DockerHealth) {
trimmed := strings.TrimSpace(status) trimmed := strings.TrimSpace(status)
if trimmed == "" { if trimmed == "" {
@@ -364,22 +400,60 @@ func parseDockerStatus(status string) (string, container.DockerHealth) {
statusText = trimmed statusText = trimmed
} }
healthText := strings.ToLower(strings.TrimSpace(strings.TrimSuffix(trimmed[openIdx+1:], ")"))) healthText := strings.TrimSpace(strings.TrimSuffix(trimmed[openIdx+1:], ")"))
// Some Docker statuses include a "health:" prefix inside the parentheses. // Some Docker statuses include a "health:" prefix inside the parentheses.
// Strip it so it maps correctly to the known health states. // Strip it so it maps correctly to the known health states.
if colonIdx := strings.IndexRune(healthText, ':'); colonIdx != -1 { if colonIdx := strings.IndexRune(healthText, ':'); colonIdx != -1 {
prefix := strings.TrimSpace(healthText[:colonIdx]) prefix := strings.ToLower(strings.TrimSpace(healthText[:colonIdx]))
if prefix == "health" || prefix == "health status" { if prefix == "health" || prefix == "health status" {
healthText = strings.TrimSpace(healthText[colonIdx+1:]) healthText = strings.TrimSpace(healthText[colonIdx+1:])
} }
} }
if health, ok := container.DockerHealthStrings[healthText]; ok { if health, ok := parseDockerHealthStatus(healthText); ok {
return statusText, health return statusText, health
} }
return trimmed, container.DockerHealthNone return trimmed, container.DockerHealthNone
} }
// parseDockerHealthStatus maps Docker health status strings to container.DockerHealth values
func parseDockerHealthStatus(status string) (container.DockerHealth, bool) {
health, ok := container.DockerHealthStrings[strings.ToLower(strings.TrimSpace(status))]
return health, ok
}
// getPodmanContainerHealth fetches container health status from the container inspect endpoint.
// Used for Podman which doesn't provide health status in the /containers/json endpoint as of March 2026.
// https://github.com/containers/podman/issues/27786
func (dm *dockerManager) getPodmanContainerHealth(containerID string) (container.DockerHealth, error) {
resp, err := dm.client.Get(fmt.Sprintf("http://localhost/containers/%s/json", url.PathEscape(containerID)))
if err != nil {
return container.DockerHealthNone, err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return container.DockerHealthNone, fmt.Errorf("container inspect request failed: %s", resp.Status)
}
var inspectInfo struct {
State struct {
Health struct {
Status string
}
}
}
if err := json.NewDecoder(resp.Body).Decode(&inspectInfo); err != nil {
return container.DockerHealthNone, err
}
if health, ok := parseDockerHealthStatus(inspectInfo.State.Health.Status); ok {
return health, nil
}
return container.DockerHealthNone, nil
}
// Updates stats for individual container with cache-time-aware delta tracking // Updates stats for individual container with cache-time-aware delta tracking
func (dm *dockerManager) updateContainerStats(ctr *container.ApiInfo, cacheTimeMs uint16) error { func (dm *dockerManager) updateContainerStats(ctr *container.ApiInfo, cacheTimeMs uint16) error {
name := ctr.Names[0][1:] name := ctr.Names[0][1:]
@@ -389,6 +463,21 @@ func (dm *dockerManager) updateContainerStats(ctr *container.ApiInfo, cacheTimeM
return err return err
} }
statusText, health := parseDockerStatus(ctr.Status)
// Docker exposes Health.Status on /containers/json in API 1.52+.
// Podman currently requires falling back to the inspect endpoint as of March 2026.
// https://github.com/containers/podman/issues/27786
if ctr.Health.Status != "" {
if h, ok := parseDockerHealthStatus(ctr.Health.Status); ok {
health = h
}
} else if dm.usingPodman {
if podmanHealth, err := dm.getPodmanContainerHealth(ctr.IdShort); err == nil {
health = podmanHealth
}
}
dm.containerStatsMutex.Lock() dm.containerStatsMutex.Lock()
defer dm.containerStatsMutex.Unlock() defer dm.containerStatsMutex.Unlock()
@@ -400,11 +489,13 @@ func (dm *dockerManager) updateContainerStats(ctr *container.ApiInfo, cacheTimeM
} }
stats.Id = ctr.IdShort stats.Id = ctr.IdShort
statusText, health := parseDockerStatus(ctr.Status)
stats.Status = statusText stats.Status = statusText
stats.Health = health stats.Health = health
if len(ctr.Ports) > 0 {
stats.Ports = convertContainerPortsToString(ctr)
}
// reset current stats // reset current stats
stats.Cpu = 0 stats.Cpu = 0
stats.Mem = 0 stats.Mem = 0
@@ -487,7 +578,7 @@ func (dm *dockerManager) deleteContainerStatsSync(id string) {
// Creates a new http client for Docker or Podman API // Creates a new http client for Docker or Podman API
func newDockerManager() *dockerManager { func newDockerManager() *dockerManager {
dockerHost, exists := GetEnv("DOCKER_HOST") dockerHost, exists := utils.GetEnv("DOCKER_HOST")
if exists { if exists {
// return nil if set to empty string // return nil if set to empty string
if dockerHost == "" { if dockerHost == "" {
@@ -523,7 +614,7 @@ func newDockerManager() *dockerManager {
// configurable timeout // configurable timeout
timeout := time.Millisecond * time.Duration(dockerTimeoutMs) timeout := time.Millisecond * time.Duration(dockerTimeoutMs)
if t, set := GetEnv("DOCKER_TIMEOUT"); set { if t, set := utils.GetEnv("DOCKER_TIMEOUT"); set {
timeout, err = time.ParseDuration(t) timeout, err = time.ParseDuration(t)
if err != nil { if err != nil {
slog.Error(err.Error()) slog.Error(err.Error())
@@ -540,7 +631,7 @@ func newDockerManager() *dockerManager {
// Read container exclusion patterns from environment variable // Read container exclusion patterns from environment variable
var excludeContainers []string var excludeContainers []string
if excludeStr, set := GetEnv("EXCLUDE_CONTAINERS"); set && excludeStr != "" { if excludeStr, set := utils.GetEnv("EXCLUDE_CONTAINERS"); set && excludeStr != "" {
parts := strings.SplitSeq(excludeStr, ",") parts := strings.SplitSeq(excludeStr, ",")
for part := range parts { for part := range parts {
trimmed := strings.TrimSpace(part) trimmed := strings.TrimSpace(part)

View File

@@ -18,6 +18,7 @@ import (
"time" "time"
"github.com/henrygd/beszel/agent/deltatracker" "github.com/henrygd/beszel/agent/deltatracker"
"github.com/henrygd/beszel/agent/utils"
"github.com/henrygd/beszel/internal/entities/container" "github.com/henrygd/beszel/internal/entities/container"
"github.com/stretchr/testify/assert" "github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require" "github.com/stretchr/testify/require"
@@ -34,6 +35,12 @@ type recordingRoundTripper struct {
lastQuery map[string]string lastQuery map[string]string
} }
type roundTripFunc func(*http.Request) (*http.Response, error)
func (fn roundTripFunc) RoundTrip(req *http.Request) (*http.Response, error) {
return fn(req)
}
func (rt *recordingRoundTripper) RoundTrip(req *http.Request) (*http.Response, error) { func (rt *recordingRoundTripper) RoundTrip(req *http.Request) (*http.Response, error) {
rt.called = true rt.called = true
rt.lastPath = req.URL.EscapedPath() rt.lastPath = req.URL.EscapedPath()
@@ -213,6 +220,28 @@ func TestContainerDetailsRequestsUseExpectedDockerPaths(t *testing.T) {
}) })
} }
func TestGetPodmanContainerHealth(t *testing.T) {
called := false
dm := &dockerManager{
client: &http.Client{Transport: roundTripFunc(func(req *http.Request) (*http.Response, error) {
called = true
assert.Equal(t, "/containers/0123456789ab/json", req.URL.EscapedPath())
return &http.Response{
StatusCode: http.StatusOK,
Status: "200 OK",
Header: make(http.Header),
Body: io.NopCloser(strings.NewReader(`{"State":{"Health":{"Status":"healthy"}}}`)),
Request: req,
}, nil
})},
}
health, err := dm.getPodmanContainerHealth("0123456789ab")
require.NoError(t, err)
assert.True(t, called)
assert.Equal(t, container.DockerHealthHealthy, health)
}
func TestValidateCpuPercentage(t *testing.T) { func TestValidateCpuPercentage(t *testing.T) {
tests := []struct { tests := []struct {
name string name string
@@ -298,48 +327,6 @@ func TestUpdateContainerStatsValues(t *testing.T) {
assert.Equal(t, testTime, stats.PrevReadTime) assert.Equal(t, testTime, stats.PrevReadTime)
} }
func TestTwoDecimals(t *testing.T) {
tests := []struct {
name string
input float64
expected float64
}{
{"round down", 1.234, 1.23},
{"round half up", 1.235, 1.24}, // math.Round rounds half up
{"no rounding needed", 1.23, 1.23},
{"negative number", -1.235, -1.24}, // math.Round rounds half up (more negative)
{"zero", 0.0, 0.0},
{"large number", 123.456, 123.46}, // rounds 5 up
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
result := twoDecimals(tt.input)
assert.Equal(t, tt.expected, result)
})
}
}
func TestBytesToMegabytes(t *testing.T) {
tests := []struct {
name string
input float64
expected float64
}{
{"1 MB", 1048576, 1.0},
{"512 KB", 524288, 0.5},
{"zero", 0, 0},
{"large value", 1073741824, 1024}, // 1 GB = 1024 MB
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
result := bytesToMegabytes(tt.input)
assert.Equal(t, tt.expected, result)
})
}
}
func TestInitializeCpuTracking(t *testing.T) { func TestInitializeCpuTracking(t *testing.T) {
dm := &dockerManager{ dm := &dockerManager{
lastCpuContainer: make(map[uint16]map[string]uint64), lastCpuContainer: make(map[uint16]map[string]uint64),
@@ -905,11 +892,11 @@ func TestContainerStatsEndToEndWithRealData(t *testing.T) {
updateContainerStatsValues(testStats, cpuPct, usedMemory, 1000000, 500000, testTime) updateContainerStatsValues(testStats, cpuPct, usedMemory, 1000000, 500000, testTime)
assert.Equal(t, cpuPct, testStats.Cpu) assert.Equal(t, cpuPct, testStats.Cpu)
assert.Equal(t, bytesToMegabytes(float64(usedMemory)), testStats.Mem) assert.Equal(t, utils.BytesToMegabytes(float64(usedMemory)), testStats.Mem)
assert.Equal(t, [2]uint64{1000000, 500000}, testStats.Bandwidth) assert.Equal(t, [2]uint64{1000000, 500000}, testStats.Bandwidth)
// Deprecated fields still populated for backward compatibility with older hubs // Deprecated fields still populated for backward compatibility with older hubs
assert.Equal(t, bytesToMegabytes(1000000), testStats.NetworkSent) assert.Equal(t, utils.BytesToMegabytes(1000000), testStats.NetworkSent)
assert.Equal(t, bytesToMegabytes(500000), testStats.NetworkRecv) assert.Equal(t, utils.BytesToMegabytes(500000), testStats.NetworkRecv)
assert.Equal(t, testTime, testStats.PrevReadTime) assert.Equal(t, testTime, testStats.PrevReadTime)
} }
@@ -1170,6 +1157,18 @@ func TestParseDockerStatus(t *testing.T) {
expectedStatus: "", expectedStatus: "",
expectedHealth: container.DockerHealthNone, expectedHealth: container.DockerHealthNone,
}, },
{
name: "status health with health: prefix",
input: "Up 5 minutes (health: starting)",
expectedStatus: "Up 5 minutes",
expectedHealth: container.DockerHealthStarting,
},
{
name: "status health with health status: prefix",
input: "Up 10 minutes (health status: unhealthy)",
expectedStatus: "Up 10 minutes",
expectedHealth: container.DockerHealthUnhealthy,
},
} }
for _, tt := range tests { for _, tt := range tests {
@@ -1181,6 +1180,84 @@ func TestParseDockerStatus(t *testing.T) {
} }
} }
func TestParseDockerHealthStatus(t *testing.T) {
tests := []struct {
input string
expectedHealth container.DockerHealth
expectedOk bool
}{
{"healthy", container.DockerHealthHealthy, true},
{"unhealthy", container.DockerHealthUnhealthy, true},
{"starting", container.DockerHealthStarting, true},
{"none", container.DockerHealthNone, true},
{" Healthy ", container.DockerHealthHealthy, true},
{"unknown", container.DockerHealthNone, false},
{"", container.DockerHealthNone, false},
}
for _, tt := range tests {
t.Run(tt.input, func(t *testing.T) {
health, ok := parseDockerHealthStatus(tt.input)
assert.Equal(t, tt.expectedHealth, health)
assert.Equal(t, tt.expectedOk, ok)
})
}
}
func TestUpdateContainerStatsUsesPodmanInspectHealthFallback(t *testing.T) {
var requestedPaths []string
dm := &dockerManager{
client: &http.Client{Transport: roundTripFunc(func(req *http.Request) (*http.Response, error) {
requestedPaths = append(requestedPaths, req.URL.EscapedPath())
switch req.URL.EscapedPath() {
case "/containers/0123456789ab/stats":
return &http.Response{
StatusCode: http.StatusOK,
Status: "200 OK",
Header: make(http.Header),
Body: io.NopCloser(strings.NewReader(`{
"read":"2026-03-15T21:26:59Z",
"cpu_stats":{"cpu_usage":{"total_usage":1000},"system_cpu_usage":2000},
"memory_stats":{"usage":1048576,"stats":{"inactive_file":262144}},
"networks":{"eth0":{"rx_bytes":0,"tx_bytes":0}}
}`)),
Request: req,
}, nil
case "/containers/0123456789ab/json":
return &http.Response{
StatusCode: http.StatusOK,
Status: "200 OK",
Header: make(http.Header),
Body: io.NopCloser(strings.NewReader(`{"State":{"Health":{"Status":"healthy"}}}`)),
Request: req,
}, nil
default:
return nil, fmt.Errorf("unexpected path: %s", req.URL.EscapedPath())
}
})},
containerStatsMap: make(map[string]*container.Stats),
apiStats: &container.ApiStats{},
usingPodman: true,
lastCpuContainer: make(map[uint16]map[string]uint64),
lastCpuSystem: make(map[uint16]map[string]uint64),
lastCpuReadTime: make(map[uint16]map[string]time.Time),
networkSentTrackers: make(map[uint16]*deltatracker.DeltaTracker[string, uint64]),
networkRecvTrackers: make(map[uint16]*deltatracker.DeltaTracker[string, uint64]),
}
ctr := &container.ApiInfo{
IdShort: "0123456789ab",
Names: []string{"/beszel"},
Status: "Up 2 minutes",
Image: "beszel:latest",
}
err := dm.updateContainerStats(ctr, defaultCacheTimeMs)
require.NoError(t, err)
assert.Equal(t, []string{"/containers/0123456789ab/stats", "/containers/0123456789ab/json"}, requestedPaths)
assert.Equal(t, container.DockerHealthHealthy, dm.containerStatsMap[ctr.IdShort].Health)
assert.Equal(t, "Up 2 minutes", dm.containerStatsMap[ctr.IdShort].Status)
}
func TestConstantsAndUtilityFunctions(t *testing.T) { func TestConstantsAndUtilityFunctions(t *testing.T) {
// Test constants are properly defined // Test constants are properly defined
assert.Equal(t, uint16(60000), defaultCacheTimeMs) assert.Equal(t, uint16(60000), defaultCacheTimeMs)
@@ -1190,13 +1267,13 @@ func TestConstantsAndUtilityFunctions(t *testing.T) {
assert.Equal(t, 5*1024*1024, maxTotalLogSize) // 5MB assert.Equal(t, 5*1024*1024, maxTotalLogSize) // 5MB
// Test utility functions // Test utility functions
assert.Equal(t, 1.5, twoDecimals(1.499)) assert.Equal(t, 1.5, utils.TwoDecimals(1.499))
assert.Equal(t, 1.5, twoDecimals(1.5)) assert.Equal(t, 1.5, utils.TwoDecimals(1.5))
assert.Equal(t, 1.5, twoDecimals(1.501)) assert.Equal(t, 1.5, utils.TwoDecimals(1.501))
assert.Equal(t, 1.0, bytesToMegabytes(1048576)) // 1 MB assert.Equal(t, 1.0, utils.BytesToMegabytes(1048576)) // 1 MB
assert.Equal(t, 0.5, bytesToMegabytes(524288)) // 512 KB assert.Equal(t, 0.5, utils.BytesToMegabytes(524288)) // 512 KB
assert.Equal(t, 0.0, bytesToMegabytes(0)) assert.Equal(t, 0.0, utils.BytesToMegabytes(0))
} }
func TestDecodeDockerLogStream(t *testing.T) { func TestDecodeDockerLogStream(t *testing.T) {
@@ -1496,3 +1573,99 @@ func TestAnsiEscapePattern(t *testing.T) {
}) })
} }
} }
func TestConvertContainerPortsToString(t *testing.T) {
type port = struct {
PublicPort uint16
IP string
}
tests := []struct {
name string
ports []port
expected string
}{
{
name: "empty ports",
ports: nil,
expected: "",
},
{
name: "single port",
ports: []port{
{PublicPort: 80, IP: "0.0.0.0"},
},
expected: "80",
},
{
name: "single port with non-default IP",
ports: []port{
{PublicPort: 80, IP: "1.2.3.4"},
},
expected: "1.2.3.4:80",
},
{
name: "ipv6 default ip",
ports: []port{
{PublicPort: 80, IP: "::"},
},
expected: "80",
},
{
name: "zero PublicPort is skipped",
ports: []port{
{PublicPort: 0, IP: "0.0.0.0"},
{PublicPort: 80, IP: "0.0.0.0"},
},
expected: "80",
},
{
name: "ports sorted ascending by PublicPort",
ports: []port{
{PublicPort: 443, IP: "0.0.0.0"},
{PublicPort: 80, IP: "0.0.0.0"},
{PublicPort: 8080, IP: "0.0.0.0"},
},
expected: "80, 443, 8080",
},
{
name: "duplicates are deduplicated",
ports: []port{
{PublicPort: 80, IP: "0.0.0.0"},
{PublicPort: 80, IP: "0.0.0.0"},
{PublicPort: 443, IP: "0.0.0.0"},
},
expected: "80, 443",
},
{
name: "multiple ports with different IPs",
ports: []port{
{PublicPort: 80, IP: "0.0.0.0"},
{PublicPort: 443, IP: "1.2.3.4"},
},
expected: "80, 1.2.3.4:443",
},
{
name: "ports slice is nilled after call",
ports: []port{
{PublicPort: 8080, IP: "0.0.0.0"},
},
expected: "8080",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
ctr := &container.ApiInfo{}
for _, p := range tt.ports {
ctr.Ports = append(ctr.Ports, struct {
PublicPort uint16
IP string
}{PublicPort: p.PublicPort, IP: p.IP})
}
result := convertContainerPortsToString(ctr)
assert.Equal(t, tt.expected, result)
// Ports slice must be cleared to prevent bleed-over into the next response
assert.Nil(t, ctr.Ports, "ctr.Ports should be nil after formatContainerPorts")
})
}
}

View File

@@ -8,6 +8,7 @@ import (
"strconv" "strconv"
"strings" "strings"
"github.com/henrygd/beszel/agent/utils"
"github.com/henrygd/beszel/internal/entities/smart" "github.com/henrygd/beszel/internal/entities/smart"
) )
@@ -141,9 +142,9 @@ func readEmmcHealth(blockName string) (emmcHealth, bool) {
out.lifeA = lifeA out.lifeA = lifeA
out.lifeB = lifeB out.lifeB = lifeB
out.model = readStringFile(filepath.Join(deviceDir, "name")) out.model = utils.ReadStringFile(filepath.Join(deviceDir, "name"))
out.serial = readStringFile(filepath.Join(deviceDir, "serial")) out.serial = utils.ReadStringFile(filepath.Join(deviceDir, "serial"))
out.revision = readStringFile(filepath.Join(deviceDir, "prv")) out.revision = utils.ReadStringFile(filepath.Join(deviceDir, "prv"))
if capBytes, ok := readBlockCapacityBytes(blockName); ok { if capBytes, ok := readBlockCapacityBytes(blockName); ok {
out.capacity = capBytes out.capacity = capBytes
@@ -153,7 +154,7 @@ func readEmmcHealth(blockName string) (emmcHealth, bool) {
} }
func readLifeTime(deviceDir string) (uint8, uint8, bool) { func readLifeTime(deviceDir string) (uint8, uint8, bool) {
if content, ok := readStringFileOK(filepath.Join(deviceDir, "life_time")); ok { if content, ok := utils.ReadStringFileOK(filepath.Join(deviceDir, "life_time")); ok {
a, b, ok := parseHexBytePair(content) a, b, ok := parseHexBytePair(content)
return a, b, ok return a, b, ok
} }
@@ -170,7 +171,7 @@ func readBlockCapacityBytes(blockName string) (uint64, bool) {
sizePath := filepath.Join(emmcSysfsRoot, "class", "block", blockName, "size") sizePath := filepath.Join(emmcSysfsRoot, "class", "block", blockName, "size")
lbsPath := filepath.Join(emmcSysfsRoot, "class", "block", blockName, "queue", "logical_block_size") lbsPath := filepath.Join(emmcSysfsRoot, "class", "block", blockName, "queue", "logical_block_size")
sizeStr, ok := readStringFileOK(sizePath) sizeStr, ok := utils.ReadStringFileOK(sizePath)
if !ok { if !ok {
return 0, false return 0, false
} }
@@ -179,7 +180,7 @@ func readBlockCapacityBytes(blockName string) (uint64, bool) {
return 0, false return 0, false
} }
lbsStr, ok := readStringFileOK(lbsPath) lbsStr, ok := utils.ReadStringFileOK(lbsPath)
logicalBlockSize := uint64(512) logicalBlockSize := uint64(512)
if ok { if ok {
if parsed, err := strconv.ParseUint(lbsStr, 10, 64); err == nil && parsed > 0 { if parsed, err := strconv.ParseUint(lbsStr, 10, 64); err == nil && parsed > 0 {
@@ -191,7 +192,7 @@ func readBlockCapacityBytes(blockName string) (uint64, bool) {
} }
func readHexByteFile(path string) (uint8, bool) { func readHexByteFile(path string) (uint8, bool) {
content, ok := readStringFileOK(path) content, ok := utils.ReadStringFileOK(path)
if !ok { if !ok {
return 0, false return 0, false
} }

View File

@@ -1,41 +0,0 @@
package agent
import (
"os"
"strconv"
"strings"
)
// readStringFile returns trimmed file contents or empty string on error.
func readStringFile(path string) string {
content, _ := readStringFileOK(path)
return content
}
// readStringFileOK returns trimmed file contents and read success.
func readStringFileOK(path string) (string, bool) {
b, err := os.ReadFile(path)
if err != nil {
return "", false
}
return strings.TrimSpace(string(b)), true
}
// fileExists reports whether the given path exists.
func fileExists(path string) bool {
_, err := os.Stat(path)
return err == nil
}
// readUintFile parses a decimal uint64 value from a file.
func readUintFile(path string) (uint64, bool) {
raw, ok := readStringFileOK(path)
if !ok {
return 0, false
}
parsed, err := strconv.ParseUint(raw, 10, 64)
if err != nil {
return 0, false
}
return parsed, true
}

View File

@@ -15,6 +15,7 @@ import (
"sync" "sync"
"time" "time"
"github.com/henrygd/beszel/agent/utils"
"github.com/henrygd/beszel/internal/entities/system" "github.com/henrygd/beszel/internal/entities/system"
) )
@@ -291,8 +292,8 @@ func (gm *GPUManager) parseAmdData(output []byte) bool {
} }
gpu := gm.GpuDataMap[id] gpu := gm.GpuDataMap[id]
gpu.Temperature, _ = strconv.ParseFloat(v.Temperature, 64) gpu.Temperature, _ = strconv.ParseFloat(v.Temperature, 64)
gpu.MemoryUsed = bytesToMegabytes(memoryUsage) gpu.MemoryUsed = utils.BytesToMegabytes(memoryUsage)
gpu.MemoryTotal = bytesToMegabytes(totalMemory) gpu.MemoryTotal = utils.BytesToMegabytes(totalMemory)
gpu.Usage += usage gpu.Usage += usage
gpu.Power += power gpu.Power += power
gpu.Count++ gpu.Count++
@@ -366,16 +367,16 @@ func (gm *GPUManager) calculateGPUAverage(id string, gpu *system.GPUData, cacheK
gpuAvg := *gpu gpuAvg := *gpu
deltaUsage, deltaPower, deltaPowerPkg := gm.calculateDeltas(gpu, lastSnapshot) deltaUsage, deltaPower, deltaPowerPkg := gm.calculateDeltas(gpu, lastSnapshot)
gpuAvg.Power = twoDecimals(deltaPower / float64(deltaCount)) gpuAvg.Power = utils.TwoDecimals(deltaPower / float64(deltaCount))
if gpu.Engines != nil { if gpu.Engines != nil {
// make fresh map for averaged engine metrics to avoid mutating // make fresh map for averaged engine metrics to avoid mutating
// the accumulator map stored in gm.GpuDataMap // the accumulator map stored in gm.GpuDataMap
gpuAvg.Engines = make(map[string]float64, len(gpu.Engines)) gpuAvg.Engines = make(map[string]float64, len(gpu.Engines))
gpuAvg.Usage = gm.calculateIntelGPUUsage(&gpuAvg, gpu, lastSnapshot, deltaCount) gpuAvg.Usage = gm.calculateIntelGPUUsage(&gpuAvg, gpu, lastSnapshot, deltaCount)
gpuAvg.PowerPkg = twoDecimals(deltaPowerPkg / float64(deltaCount)) gpuAvg.PowerPkg = utils.TwoDecimals(deltaPowerPkg / float64(deltaCount))
} else { } else {
gpuAvg.Usage = twoDecimals(deltaUsage / float64(deltaCount)) gpuAvg.Usage = utils.TwoDecimals(deltaUsage / float64(deltaCount))
} }
gm.lastAvgData[id] = gpuAvg gm.lastAvgData[id] = gpuAvg
@@ -410,17 +411,17 @@ func (gm *GPUManager) calculateIntelGPUUsage(gpuAvg, gpu *system.GPUData, lastSn
} else { } else {
deltaEngine = engine deltaEngine = engine
} }
gpuAvg.Engines[name] = twoDecimals(deltaEngine / float64(deltaCount)) gpuAvg.Engines[name] = utils.TwoDecimals(deltaEngine / float64(deltaCount))
maxEngineUsage = max(maxEngineUsage, deltaEngine/float64(deltaCount)) maxEngineUsage = max(maxEngineUsage, deltaEngine/float64(deltaCount))
} }
return twoDecimals(maxEngineUsage) return utils.TwoDecimals(maxEngineUsage)
} }
// updateInstantaneousValues updates values that should reflect current state, not averages // updateInstantaneousValues updates values that should reflect current state, not averages
func (gm *GPUManager) updateInstantaneousValues(gpuAvg *system.GPUData, gpu *system.GPUData) { func (gm *GPUManager) updateInstantaneousValues(gpuAvg *system.GPUData, gpu *system.GPUData) {
gpuAvg.Temperature = twoDecimals(gpu.Temperature) gpuAvg.Temperature = utils.TwoDecimals(gpu.Temperature)
gpuAvg.MemoryUsed = twoDecimals(gpu.MemoryUsed) gpuAvg.MemoryUsed = utils.TwoDecimals(gpu.MemoryUsed)
gpuAvg.MemoryTotal = twoDecimals(gpu.MemoryTotal) gpuAvg.MemoryTotal = utils.TwoDecimals(gpu.MemoryTotal)
} }
// storeSnapshot saves the current GPU state for this cache key // storeSnapshot saves the current GPU state for this cache key
@@ -687,7 +688,7 @@ func (gm *GPUManager) resolveLegacyCollectorPriority(caps gpuCapabilities) []col
priorities := make([]collectorSource, 0, 4) priorities := make([]collectorSource, 0, 4)
if caps.hasNvidiaSmi && !caps.hasTegrastats { if caps.hasNvidiaSmi && !caps.hasTegrastats {
if nvml, _ := GetEnv("NVML"); nvml == "true" { if nvml, _ := utils.GetEnv("NVML"); nvml == "true" {
priorities = append(priorities, collectorSourceNVML, collectorSourceNvidiaSMI) priorities = append(priorities, collectorSourceNVML, collectorSourceNvidiaSMI)
} else { } else {
priorities = append(priorities, collectorSourceNvidiaSMI) priorities = append(priorities, collectorSourceNvidiaSMI)
@@ -695,7 +696,7 @@ func (gm *GPUManager) resolveLegacyCollectorPriority(caps gpuCapabilities) []col
} }
if caps.hasRocmSmi { if caps.hasRocmSmi {
if val, _ := GetEnv("AMD_SYSFS"); val == "true" { if val, _ := utils.GetEnv("AMD_SYSFS"); val == "true" {
priorities = append(priorities, collectorSourceAmdSysfs) priorities = append(priorities, collectorSourceAmdSysfs)
} else { } else {
priorities = append(priorities, collectorSourceRocmSMI) priorities = append(priorities, collectorSourceRocmSMI)
@@ -728,7 +729,7 @@ func (gm *GPUManager) resolveLegacyCollectorPriority(caps gpuCapabilities) []col
// NewGPUManager creates and initializes a new GPUManager // NewGPUManager creates and initializes a new GPUManager
func NewGPUManager() (*GPUManager, error) { func NewGPUManager() (*GPUManager, error) {
if skipGPU, _ := GetEnv("SKIP_GPU"); skipGPU == "true" { if skipGPU, _ := utils.GetEnv("SKIP_GPU"); skipGPU == "true" {
return nil, nil return nil, nil
} }
var gm GPUManager var gm GPUManager
@@ -745,7 +746,7 @@ func NewGPUManager() (*GPUManager, error) {
} }
// if GPU_COLLECTOR is set, start user-defined collectors. // if GPU_COLLECTOR is set, start user-defined collectors.
if collectorConfig, ok := GetEnv("GPU_COLLECTOR"); ok && strings.TrimSpace(collectorConfig) != "" { if collectorConfig, ok := utils.GetEnv("GPU_COLLECTOR"); ok && strings.TrimSpace(collectorConfig) != "" {
priorities := parseCollectorPriority(collectorConfig) priorities := parseCollectorPriority(collectorConfig)
if gm.startCollectorsByPriority(priorities, caps) == 0 { if gm.startCollectorsByPriority(priorities, caps) == 0 {
return nil, fmt.Errorf("no configured GPU collectors are available") return nil, fmt.Errorf("no configured GPU collectors are available")

View File

@@ -13,6 +13,7 @@ import (
"sync" "sync"
"time" "time"
"github.com/henrygd/beszel/agent/utils"
"github.com/henrygd/beszel/internal/entities/system" "github.com/henrygd/beszel/internal/entities/system"
) )
@@ -32,8 +33,8 @@ func (gm *GPUManager) hasAmdSysfs() bool {
return false return false
} }
for _, vendorPath := range cards { for _, vendorPath := range cards {
vendor, err := os.ReadFile(vendorPath) vendor, err := utils.ReadStringFileLimited(vendorPath, 64)
if err == nil && strings.TrimSpace(string(vendor)) == "0x1002" { if err == nil && vendor == "0x1002" {
return true return true
} }
} }
@@ -87,12 +88,11 @@ func (gm *GPUManager) collectAmdStats() error {
// isAmdGpu checks whether a DRM card path belongs to AMD vendor ID 0x1002. // isAmdGpu checks whether a DRM card path belongs to AMD vendor ID 0x1002.
func isAmdGpu(cardPath string) bool { func isAmdGpu(cardPath string) bool {
vendorPath := filepath.Join(cardPath, "device/vendor") vendor, err := utils.ReadStringFileLimited(filepath.Join(cardPath, "device/vendor"), 64)
vendor, err := os.ReadFile(vendorPath)
if err != nil { if err != nil {
return false return false
} }
return strings.TrimSpace(string(vendor)) == "0x1002" return vendor == "0x1002"
} }
// updateAmdGpuData reads GPU metrics from sysfs and updates the GPU data map. // updateAmdGpuData reads GPU metrics from sysfs and updates the GPU data map.
@@ -144,8 +144,8 @@ func (gm *GPUManager) updateAmdGpuData(cardPath string) bool {
if usageErr == nil { if usageErr == nil {
gpu.Usage += usage gpu.Usage += usage
} }
gpu.MemoryUsed = bytesToMegabytes(memUsed) gpu.MemoryUsed = utils.BytesToMegabytes(memUsed)
gpu.MemoryTotal = bytesToMegabytes(memTotal) gpu.MemoryTotal = utils.BytesToMegabytes(memTotal)
gpu.Temperature = temp gpu.Temperature = temp
gpu.Power += power gpu.Power += power
gpu.Count++ gpu.Count++
@@ -154,11 +154,11 @@ func (gm *GPUManager) updateAmdGpuData(cardPath string) bool {
// readSysfsFloat reads and parses a numeric value from a sysfs file. // readSysfsFloat reads and parses a numeric value from a sysfs file.
func readSysfsFloat(path string) (float64, error) { func readSysfsFloat(path string) (float64, error) {
val, err := os.ReadFile(path) val, err := utils.ReadStringFileLimited(path, 64)
if err != nil { if err != nil {
return 0, err return 0, err
} }
return strconv.ParseFloat(strings.TrimSpace(string(val)), 64) return strconv.ParseFloat(val, 64)
} }
// normalizeHexID normalizes hex IDs by trimming spaces, lowercasing, and dropping 0x. // normalizeHexID normalizes hex IDs by trimming spaces, lowercasing, and dropping 0x.
@@ -273,16 +273,16 @@ func cacheMissingAmdgpuName(deviceID, revisionID string) {
// Falls back to showing the raw device ID if not found in the lookup table. // Falls back to showing the raw device ID if not found in the lookup table.
func getAmdGpuName(devicePath string) string { func getAmdGpuName(devicePath string) string {
// Try product_name first (works for some enterprise GPUs) // Try product_name first (works for some enterprise GPUs)
if prod, err := os.ReadFile(filepath.Join(devicePath, "product_name")); err == nil { if prod, err := utils.ReadStringFileLimited(filepath.Join(devicePath, "product_name"), 128); err == nil {
return strings.TrimSpace(string(prod)) return prod
} }
// Read PCI device ID and look it up // Read PCI device ID and look it up
if deviceID, err := os.ReadFile(filepath.Join(devicePath, "device")); err == nil { if deviceID, err := utils.ReadStringFileLimited(filepath.Join(devicePath, "device"), 64); err == nil {
id := normalizeHexID(string(deviceID)) id := normalizeHexID(deviceID)
revision := "" revision := ""
if revBytes, revErr := os.ReadFile(filepath.Join(devicePath, "revision")); revErr == nil { if rev, revErr := utils.ReadStringFileLimited(filepath.Join(devicePath, "revision"), 64); revErr == nil {
revision = normalizeHexID(string(revBytes)) revision = normalizeHexID(rev)
} }
if name, found, done := getCachedAmdgpuName(id, revision); found { if name, found, done := getCachedAmdgpuName(id, revision); found {

View File

@@ -7,6 +7,7 @@ import (
"path/filepath" "path/filepath"
"testing" "testing"
"github.com/henrygd/beszel/agent/utils"
"github.com/henrygd/beszel/internal/entities/system" "github.com/henrygd/beszel/internal/entities/system"
"github.com/stretchr/testify/assert" "github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require" "github.com/stretchr/testify/require"
@@ -128,14 +129,14 @@ func TestUpdateAmdGpuDataWithFakeSysfs(t *testing.T) {
{ {
name: "sums vram and gtt when gtt is present", name: "sums vram and gtt when gtt is present",
writeGTT: true, writeGTT: true,
wantMemoryUsed: bytesToMegabytes(1073741824 + 536870912), wantMemoryUsed: utils.BytesToMegabytes(1073741824 + 536870912),
wantMemoryTotal: bytesToMegabytes(2147483648 + 4294967296), wantMemoryTotal: utils.BytesToMegabytes(2147483648 + 4294967296),
}, },
{ {
name: "falls back to vram when gtt is missing", name: "falls back to vram when gtt is missing",
writeGTT: false, writeGTT: false,
wantMemoryUsed: bytesToMegabytes(1073741824), wantMemoryUsed: utils.BytesToMegabytes(1073741824),
wantMemoryTotal: bytesToMegabytes(2147483648), wantMemoryTotal: utils.BytesToMegabytes(2147483648),
}, },
} }

View File

@@ -7,6 +7,7 @@ import (
"strconv" "strconv"
"strings" "strings"
"github.com/henrygd/beszel/agent/utils"
"github.com/henrygd/beszel/internal/entities/system" "github.com/henrygd/beszel/internal/entities/system"
) )
@@ -52,7 +53,7 @@ func (gm *GPUManager) updateIntelFromStats(sample *intelGpuStats) bool {
func (gm *GPUManager) collectIntelStats() (err error) { func (gm *GPUManager) collectIntelStats() (err error) {
// Build command arguments, optionally selecting a device via -d // Build command arguments, optionally selecting a device via -d
args := []string{"-s", intelGpuStatsInterval, "-l"} args := []string{"-s", intelGpuStatsInterval, "-l"}
if dev, ok := GetEnv("INTEL_GPU_DEVICE"); ok && dev != "" { if dev, ok := utils.GetEnv("INTEL_GPU_DEVICE"); ok && dev != "" {
args = append(args, "-d", dev) args = append(args, "-d", dev)
} }
cmd := exec.Command(intelGpuStatsCmd, args...) cmd := exec.Command(intelGpuStatsCmd, args...)

View File

@@ -9,6 +9,7 @@ import (
"strings" "strings"
"time" "time"
"github.com/henrygd/beszel/agent/utils"
"github.com/henrygd/beszel/internal/entities/system" "github.com/henrygd/beszel/internal/entities/system"
) )
@@ -80,10 +81,10 @@ func (gm *GPUManager) updateNvtopSnapshots(snapshots []nvtopSnapshot) bool {
gpu.Temperature = parseNvtopNumber(*sample.Temp) gpu.Temperature = parseNvtopNumber(*sample.Temp)
} }
if sample.MemUsed != nil { if sample.MemUsed != nil {
gpu.MemoryUsed = bytesToMegabytes(parseNvtopNumber(*sample.MemUsed)) gpu.MemoryUsed = utils.BytesToMegabytes(parseNvtopNumber(*sample.MemUsed))
} }
if sample.MemTotal != nil { if sample.MemTotal != nil {
gpu.MemoryTotal = bytesToMegabytes(parseNvtopNumber(*sample.MemTotal)) gpu.MemoryTotal = utils.BytesToMegabytes(parseNvtopNumber(*sample.MemTotal))
} }
if sample.GpuUtil != nil { if sample.GpuUtil != nil {
gpu.Usage += parseNvtopNumber(*sample.GpuUtil) gpu.Usage += parseNvtopNumber(*sample.GpuUtil)

View File

@@ -10,6 +10,7 @@ import (
"testing" "testing"
"time" "time"
"github.com/henrygd/beszel/agent/utils"
"github.com/henrygd/beszel/internal/entities/system" "github.com/henrygd/beszel/internal/entities/system"
"github.com/stretchr/testify/assert" "github.com/stretchr/testify/assert"
@@ -265,8 +266,8 @@ func TestParseNvtopData(t *testing.T) {
assert.Equal(t, 48.0, g0.Temperature) assert.Equal(t, 48.0, g0.Temperature)
assert.Equal(t, 5.0, g0.Usage) assert.Equal(t, 5.0, g0.Usage)
assert.Equal(t, 13.0, g0.Power) assert.Equal(t, 13.0, g0.Power)
assert.Equal(t, bytesToMegabytes(349372416), g0.MemoryUsed) assert.Equal(t, utils.BytesToMegabytes(349372416), g0.MemoryUsed)
assert.Equal(t, bytesToMegabytes(4294967296), g0.MemoryTotal) assert.Equal(t, utils.BytesToMegabytes(4294967296), g0.MemoryTotal)
assert.Equal(t, 1.0, g0.Count) assert.Equal(t, 1.0, g0.Count)
g1, ok := gm.GpuDataMap["n1"] g1, ok := gm.GpuDataMap["n1"]
@@ -275,8 +276,8 @@ func TestParseNvtopData(t *testing.T) {
assert.Equal(t, 48.0, g1.Temperature) assert.Equal(t, 48.0, g1.Temperature)
assert.Equal(t, 12.0, g1.Usage) assert.Equal(t, 12.0, g1.Usage)
assert.Equal(t, 9.0, g1.Power) assert.Equal(t, 9.0, g1.Power)
assert.Equal(t, bytesToMegabytes(1213784064), g1.MemoryUsed) assert.Equal(t, utils.BytesToMegabytes(1213784064), g1.MemoryUsed)
assert.Equal(t, bytesToMegabytes(16929173504), g1.MemoryTotal) assert.Equal(t, utils.BytesToMegabytes(16929173504), g1.MemoryTotal)
assert.Equal(t, 1.0, g1.Count) assert.Equal(t, 1.0, g1.Count)
} }

View File

@@ -9,6 +9,7 @@ import (
"strconv" "strconv"
"strings" "strings"
"github.com/henrygd/beszel/agent/utils"
"github.com/henrygd/beszel/internal/entities/smart" "github.com/henrygd/beszel/internal/entities/smart"
) )
@@ -42,7 +43,7 @@ func scanMdraidDevices() []*DeviceInfo {
continue continue
} }
mdDir := filepath.Join(blockDir, name, "md") mdDir := filepath.Join(blockDir, name, "md")
if !fileExists(filepath.Join(mdDir, "array_state")) { if !utils.FileExists(filepath.Join(mdDir, "array_state")) {
continue continue
} }
@@ -134,24 +135,24 @@ func readMdraidHealth(blockName string) (mdraidHealth, bool) {
} }
mdDir := filepath.Join(mdraidSysfsRoot, "block", blockName, "md") mdDir := filepath.Join(mdraidSysfsRoot, "block", blockName, "md")
arrayState, okState := readStringFileOK(filepath.Join(mdDir, "array_state")) arrayState, okState := utils.ReadStringFileOK(filepath.Join(mdDir, "array_state"))
if !okState { if !okState {
return out, false return out, false
} }
out.arrayState = arrayState out.arrayState = arrayState
out.level = readStringFile(filepath.Join(mdDir, "level")) out.level = utils.ReadStringFile(filepath.Join(mdDir, "level"))
out.syncAction = readStringFile(filepath.Join(mdDir, "sync_action")) out.syncAction = utils.ReadStringFile(filepath.Join(mdDir, "sync_action"))
out.syncCompleted = readStringFile(filepath.Join(mdDir, "sync_completed")) out.syncCompleted = utils.ReadStringFile(filepath.Join(mdDir, "sync_completed"))
out.syncSpeed = readStringFile(filepath.Join(mdDir, "sync_speed")) out.syncSpeed = utils.ReadStringFile(filepath.Join(mdDir, "sync_speed"))
if val, ok := readUintFile(filepath.Join(mdDir, "raid_disks")); ok { if val, ok := utils.ReadUintFile(filepath.Join(mdDir, "raid_disks")); ok {
out.raidDisks = val out.raidDisks = val
} }
if val, ok := readUintFile(filepath.Join(mdDir, "degraded")); ok { if val, ok := utils.ReadUintFile(filepath.Join(mdDir, "degraded")); ok {
out.degraded = val out.degraded = val
} }
if val, ok := readUintFile(filepath.Join(mdDir, "mismatch_cnt")); ok { if val, ok := utils.ReadUintFile(filepath.Join(mdDir, "mismatch_cnt")); ok {
out.mismatchCnt = val out.mismatchCnt = val
} }
@@ -169,11 +170,18 @@ func mdraidSmartStatus(health mdraidHealth) string {
case "inactive", "faulty", "broken", "stopped": case "inactive", "faulty", "broken", "stopped":
return "FAILED" return "FAILED"
} }
// During rebuild/recovery, arrays are often temporarily degraded; report as
// warning instead of hard failure while synchronization is in progress.
syncAction := strings.ToLower(strings.TrimSpace(health.syncAction))
switch syncAction {
case "resync", "recover", "reshape":
return "WARNING"
}
if health.degraded > 0 { if health.degraded > 0 {
return "FAILED" return "FAILED"
} }
switch strings.ToLower(strings.TrimSpace(health.syncAction)) { switch syncAction {
case "resync", "recover", "reshape", "check", "repair": case "check", "repair":
return "WARNING" return "WARNING"
} }
switch state { switch state {
@@ -205,7 +213,7 @@ func readMdraidBlockCapacityBytes(blockName, root string) (uint64, bool) {
sizePath := filepath.Join(root, "block", blockName, "size") sizePath := filepath.Join(root, "block", blockName, "size")
lbsPath := filepath.Join(root, "block", blockName, "queue", "logical_block_size") lbsPath := filepath.Join(root, "block", blockName, "queue", "logical_block_size")
sizeStr, ok := readStringFileOK(sizePath) sizeStr, ok := utils.ReadStringFileOK(sizePath)
if !ok { if !ok {
return 0, false return 0, false
} }
@@ -215,7 +223,7 @@ func readMdraidBlockCapacityBytes(blockName, root string) (uint64, bool) {
} }
logicalBlockSize := uint64(512) logicalBlockSize := uint64(512)
if lbsStr, ok := readStringFileOK(lbsPath); ok { if lbsStr, ok := utils.ReadStringFileOK(lbsPath); ok {
if parsed, err := strconv.ParseUint(lbsStr, 10, 64); err == nil && parsed > 0 { if parsed, err := strconv.ParseUint(lbsStr, 10, 64); err == nil && parsed > 0 {
logicalBlockSize = parsed logicalBlockSize = parsed
} }

View File

@@ -85,6 +85,9 @@ func TestMdraidSmartStatus(t *testing.T) {
if got := mdraidSmartStatus(mdraidHealth{arrayState: "inactive"}); got != "FAILED" { if got := mdraidSmartStatus(mdraidHealth{arrayState: "inactive"}); got != "FAILED" {
t.Fatalf("mdraidSmartStatus(inactive) = %q, want FAILED", got) t.Fatalf("mdraidSmartStatus(inactive) = %q, want FAILED", got)
} }
if got := mdraidSmartStatus(mdraidHealth{arrayState: "active", degraded: 1, syncAction: "recover"}); got != "WARNING" {
t.Fatalf("mdraidSmartStatus(degraded+recover) = %q, want WARNING", got)
}
if got := mdraidSmartStatus(mdraidHealth{arrayState: "active", degraded: 1}); got != "FAILED" { if got := mdraidSmartStatus(mdraidHealth{arrayState: "active", degraded: 1}); got != "FAILED" {
t.Fatalf("mdraidSmartStatus(degraded) = %q, want FAILED", got) t.Fatalf("mdraidSmartStatus(degraded) = %q, want FAILED", got)
} }

View File

@@ -8,6 +8,7 @@ import (
"time" "time"
"github.com/henrygd/beszel/agent/deltatracker" "github.com/henrygd/beszel/agent/deltatracker"
"github.com/henrygd/beszel/agent/utils"
"github.com/henrygd/beszel/internal/entities/system" "github.com/henrygd/beszel/internal/entities/system"
psutilNet "github.com/shirou/gopsutil/v4/net" psutilNet "github.com/shirou/gopsutil/v4/net"
) )
@@ -94,7 +95,7 @@ func (a *Agent) initializeNetIoStats() {
a.netInterfaces = make(map[string]struct{}, 0) a.netInterfaces = make(map[string]struct{}, 0)
// parse NICS env var for whitelist / blacklist // parse NICS env var for whitelist / blacklist
nicsEnvVal, nicsEnvExists := GetEnv("NICS") nicsEnvVal, nicsEnvExists := utils.GetEnv("NICS")
var nicCfg *NicConfig var nicCfg *NicConfig
if nicsEnvExists { if nicsEnvExists {
nicCfg = newNicConfig(nicsEnvVal) nicCfg = newNicConfig(nicsEnvVal)
@@ -103,10 +104,7 @@ func (a *Agent) initializeNetIoStats() {
// get current network I/O stats and record valid interfaces // get current network I/O stats and record valid interfaces
if netIO, err := psutilNet.IOCounters(true); err == nil { if netIO, err := psutilNet.IOCounters(true); err == nil {
for _, v := range netIO { for _, v := range netIO {
if nicsEnvExists && !isValidNic(v.Name, nicCfg) { if skipNetworkInterface(v, nicCfg) {
continue
}
if a.skipNetworkInterface(v) {
continue continue
} }
slog.Info("Detected network interface", "name", v.Name, "sent", v.BytesSent, "recv", v.BytesRecv) slog.Info("Detected network interface", "name", v.Name, "sent", v.BytesSent, "recv", v.BytesRecv)
@@ -215,10 +213,8 @@ func (a *Agent) applyNetworkTotals(
totalBytesSent, totalBytesRecv uint64, totalBytesSent, totalBytesRecv uint64,
bytesSentPerSecond, bytesRecvPerSecond uint64, bytesSentPerSecond, bytesRecvPerSecond uint64,
) { ) {
networkSentPs := bytesToMegabytes(float64(bytesSentPerSecond)) if bytesSentPerSecond > 10_000_000_000 || bytesRecvPerSecond > 10_000_000_000 {
networkRecvPs := bytesToMegabytes(float64(bytesRecvPerSecond)) slog.Warn("Invalid net stats. Resetting.", "sent", bytesSentPerSecond, "recv", bytesRecvPerSecond)
if networkSentPs > 10_000 || networkRecvPs > 10_000 {
slog.Warn("Invalid net stats. Resetting.", "sent", networkSentPs, "recv", networkRecvPs)
for _, v := range netIO { for _, v := range netIO {
if _, exists := a.netInterfaces[v.Name]; !exists { if _, exists := a.netInterfaces[v.Name]; !exists {
continue continue
@@ -228,21 +224,29 @@ func (a *Agent) applyNetworkTotals(
a.initializeNetIoStats() a.initializeNetIoStats()
delete(a.netIoStats, cacheTimeMs) delete(a.netIoStats, cacheTimeMs)
delete(a.netInterfaceDeltaTrackers, cacheTimeMs) delete(a.netInterfaceDeltaTrackers, cacheTimeMs)
systemStats.NetworkSent = 0
systemStats.NetworkRecv = 0
systemStats.Bandwidth[0], systemStats.Bandwidth[1] = 0, 0 systemStats.Bandwidth[0], systemStats.Bandwidth[1] = 0, 0
return return
} }
systemStats.NetworkSent = networkSentPs
systemStats.NetworkRecv = networkRecvPs
systemStats.Bandwidth[0], systemStats.Bandwidth[1] = bytesSentPerSecond, bytesRecvPerSecond systemStats.Bandwidth[0], systemStats.Bandwidth[1] = bytesSentPerSecond, bytesRecvPerSecond
nis.BytesSent = totalBytesSent nis.BytesSent = totalBytesSent
nis.BytesRecv = totalBytesRecv nis.BytesRecv = totalBytesRecv
a.netIoStats[cacheTimeMs] = nis a.netIoStats[cacheTimeMs] = nis
} }
func (a *Agent) skipNetworkInterface(v psutilNet.IOCountersStat) bool { // skipNetworkInterface returns true if the network interface should be ignored.
func skipNetworkInterface(v psutilNet.IOCountersStat, nicCfg *NicConfig) bool {
if nicCfg != nil {
if !isValidNic(v.Name, nicCfg) {
return true
}
// In whitelist mode, we honor explicit inclusion without auto-filtering.
if !nicCfg.isBlacklist {
return false
}
// In blacklist mode, still apply the auto-filter below.
}
switch { switch {
case strings.HasPrefix(v.Name, "lo"), case strings.HasPrefix(v.Name, "lo"),
strings.HasPrefix(v.Name, "docker"), strings.HasPrefix(v.Name, "docker"),

View File

@@ -261,6 +261,39 @@ func TestNewNicConfig(t *testing.T) {
}) })
} }
} }
func TestSkipNetworkInterface(t *testing.T) {
tests := []struct {
name string
nic psutilNet.IOCountersStat
nicCfg *NicConfig
expectSkip bool
}{
{"loopback lo", psutilNet.IOCountersStat{Name: "lo", BytesSent: 100, BytesRecv: 100}, nil, true},
{"loopback lo0", psutilNet.IOCountersStat{Name: "lo0", BytesSent: 100, BytesRecv: 100}, nil, true},
{"docker prefix", psutilNet.IOCountersStat{Name: "docker0", BytesSent: 100, BytesRecv: 100}, nil, true},
{"br- prefix", psutilNet.IOCountersStat{Name: "br-lan", BytesSent: 100, BytesRecv: 100}, nil, true},
{"veth prefix", psutilNet.IOCountersStat{Name: "veth0abc", BytesSent: 100, BytesRecv: 100}, nil, true},
{"bond prefix", psutilNet.IOCountersStat{Name: "bond0", BytesSent: 100, BytesRecv: 100}, nil, true},
{"cali prefix", psutilNet.IOCountersStat{Name: "cali1234", BytesSent: 100, BytesRecv: 100}, nil, true},
{"zero BytesRecv", psutilNet.IOCountersStat{Name: "eth0", BytesSent: 100, BytesRecv: 0}, nil, true},
{"zero BytesSent", psutilNet.IOCountersStat{Name: "eth0", BytesSent: 0, BytesRecv: 100}, nil, true},
{"both zero", psutilNet.IOCountersStat{Name: "eth0", BytesSent: 0, BytesRecv: 0}, nil, true},
{"normal eth0", psutilNet.IOCountersStat{Name: "eth0", BytesSent: 100, BytesRecv: 200}, nil, false},
{"normal wlan0", psutilNet.IOCountersStat{Name: "wlan0", BytesSent: 1, BytesRecv: 1}, nil, false},
{"whitelist overrides skip (docker)", psutilNet.IOCountersStat{Name: "docker0", BytesSent: 100, BytesRecv: 100}, newNicConfig("docker0"), false},
{"whitelist overrides skip (lo)", psutilNet.IOCountersStat{Name: "lo", BytesSent: 100, BytesRecv: 100}, newNicConfig("lo"), false},
{"whitelist exclusion", psutilNet.IOCountersStat{Name: "eth1", BytesSent: 100, BytesRecv: 100}, newNicConfig("eth0"), true},
{"blacklist skip lo", psutilNet.IOCountersStat{Name: "lo", BytesSent: 100, BytesRecv: 100}, newNicConfig("-eth0"), true},
{"blacklist explicit eth0", psutilNet.IOCountersStat{Name: "eth0", BytesSent: 100, BytesRecv: 100}, newNicConfig("-eth0"), true},
{"blacklist allow eth1", psutilNet.IOCountersStat{Name: "eth1", BytesSent: 100, BytesRecv: 100}, newNicConfig("-eth0"), false},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
assert.Equal(t, tt.expectSkip, skipNetworkInterface(tt.nic, tt.nicCfg))
})
}
}
func TestEnsureNetworkInterfacesMap(t *testing.T) { func TestEnsureNetworkInterfacesMap(t *testing.T) {
var a Agent var a Agent
var stats system.Stats var stats system.Stats
@@ -383,8 +416,6 @@ func TestApplyNetworkTotals(t *testing.T) {
totalBytesSent uint64 totalBytesSent uint64
totalBytesRecv uint64 totalBytesRecv uint64
expectReset bool expectReset bool
expectedNetworkSent float64
expectedNetworkRecv float64
expectedBandwidthSent uint64 expectedBandwidthSent uint64
expectedBandwidthRecv uint64 expectedBandwidthRecv uint64
}{ }{
@@ -395,8 +426,6 @@ func TestApplyNetworkTotals(t *testing.T) {
totalBytesSent: 10000000, totalBytesSent: 10000000,
totalBytesRecv: 20000000, totalBytesRecv: 20000000,
expectReset: false, expectReset: false,
expectedNetworkSent: 0.95, // ~1 MB/s rounded to 2 decimals
expectedNetworkRecv: 1.91, // ~2 MB/s rounded to 2 decimals
expectedBandwidthSent: 1000000, expectedBandwidthSent: 1000000,
expectedBandwidthRecv: 2000000, expectedBandwidthRecv: 2000000,
}, },
@@ -424,18 +453,6 @@ func TestApplyNetworkTotals(t *testing.T) {
totalBytesRecv: 20000000, totalBytesRecv: 20000000,
expectReset: true, expectReset: true,
}, },
{
name: "Valid network stats - at threshold boundary",
bytesSentPerSecond: 10485750000, // ~9999.99 MB/s (rounds to 9999.99)
bytesRecvPerSecond: 10485750000, // ~9999.99 MB/s (rounds to 9999.99)
totalBytesSent: 10000000,
totalBytesRecv: 20000000,
expectReset: false,
expectedNetworkSent: 9999.99,
expectedNetworkRecv: 9999.99,
expectedBandwidthSent: 10485750000,
expectedBandwidthRecv: 10485750000,
},
{ {
name: "Zero values", name: "Zero values",
bytesSentPerSecond: 0, bytesSentPerSecond: 0,
@@ -443,8 +460,6 @@ func TestApplyNetworkTotals(t *testing.T) {
totalBytesSent: 0, totalBytesSent: 0,
totalBytesRecv: 0, totalBytesRecv: 0,
expectReset: false, expectReset: false,
expectedNetworkSent: 0.0,
expectedNetworkRecv: 0.0,
expectedBandwidthSent: 0, expectedBandwidthSent: 0,
expectedBandwidthRecv: 0, expectedBandwidthRecv: 0,
}, },
@@ -481,14 +496,10 @@ func TestApplyNetworkTotals(t *testing.T) {
// Should have reset network tracking state - maps cleared and stats zeroed // Should have reset network tracking state - maps cleared and stats zeroed
assert.NotContains(t, a.netIoStats, cacheTimeMs, "cache entry should be cleared after reset") assert.NotContains(t, a.netIoStats, cacheTimeMs, "cache entry should be cleared after reset")
assert.NotContains(t, a.netInterfaceDeltaTrackers, cacheTimeMs, "tracker should be cleared on reset") assert.NotContains(t, a.netInterfaceDeltaTrackers, cacheTimeMs, "tracker should be cleared on reset")
assert.Zero(t, systemStats.NetworkSent)
assert.Zero(t, systemStats.NetworkRecv)
assert.Zero(t, systemStats.Bandwidth[0]) assert.Zero(t, systemStats.Bandwidth[0])
assert.Zero(t, systemStats.Bandwidth[1]) assert.Zero(t, systemStats.Bandwidth[1])
} else { } else {
// Should have applied stats // Should have applied stats
assert.Equal(t, tt.expectedNetworkSent, systemStats.NetworkSent)
assert.Equal(t, tt.expectedNetworkRecv, systemStats.NetworkRecv)
assert.Equal(t, tt.expectedBandwidthSent, systemStats.Bandwidth[0]) assert.Equal(t, tt.expectedBandwidthSent, systemStats.Bandwidth[0])
assert.Equal(t, tt.expectedBandwidthRecv, systemStats.Bandwidth[1]) assert.Equal(t, tt.expectedBandwidthRecv, systemStats.Bandwidth[1])

View File

@@ -10,6 +10,7 @@ import (
"strings" "strings"
"unicode/utf8" "unicode/utf8"
"github.com/henrygd/beszel/agent/utils"
"github.com/henrygd/beszel/internal/entities/system" "github.com/henrygd/beszel/internal/entities/system"
"github.com/shirou/gopsutil/v4/common" "github.com/shirou/gopsutil/v4/common"
@@ -26,9 +27,9 @@ type SensorConfig struct {
} }
func (a *Agent) newSensorConfig() *SensorConfig { func (a *Agent) newSensorConfig() *SensorConfig {
primarySensor, _ := GetEnv("PRIMARY_SENSOR") primarySensor, _ := utils.GetEnv("PRIMARY_SENSOR")
sysSensors, _ := GetEnv("SYS_SENSORS") sysSensors, _ := utils.GetEnv("SYS_SENSORS")
sensorsEnvVal, sensorsSet := GetEnv("SENSORS") sensorsEnvVal, sensorsSet := utils.GetEnv("SENSORS")
skipCollection := sensorsSet && sensorsEnvVal == "" skipCollection := sensorsSet && sensorsEnvVal == ""
return a.newSensorConfigWithEnv(primarySensor, sysSensors, sensorsEnvVal, skipCollection) return a.newSensorConfigWithEnv(primarySensor, sysSensors, sensorsEnvVal, skipCollection)
@@ -135,7 +136,7 @@ func (a *Agent) updateTemperatures(systemStats *system.Stats) {
case sensorName: case sensorName:
a.systemInfo.DashboardTemp = sensor.Temperature a.systemInfo.DashboardTemp = sensor.Temperature
} }
systemStats.Temperatures[sensorName] = twoDecimals(sensor.Temperature) systemStats.Temperatures[sensorName] = utils.TwoDecimals(sensor.Temperature)
} }
} }

View File

@@ -12,6 +12,7 @@ import (
"time" "time"
"github.com/henrygd/beszel" "github.com/henrygd/beszel"
"github.com/henrygd/beszel/agent/utils"
"github.com/henrygd/beszel/internal/common" "github.com/henrygd/beszel/internal/common"
"github.com/henrygd/beszel/internal/entities/system" "github.com/henrygd/beszel/internal/entities/system"
@@ -36,7 +37,7 @@ var hubVersions map[string]semver.Version
// and begins listening for connections. Returns an error if the server // and begins listening for connections. Returns an error if the server
// is already running or if there's an issue starting the server. // is already running or if there's an issue starting the server.
func (a *Agent) StartServer(opts ServerOptions) error { func (a *Agent) StartServer(opts ServerOptions) error {
if disableSSH, _ := GetEnv("DISABLE_SSH"); disableSSH == "true" { if disableSSH, _ := utils.GetEnv("DISABLE_SSH"); disableSSH == "true" {
return errors.New("SSH disabled") return errors.New("SSH disabled")
} }
if a.server != nil { if a.server != nil {
@@ -238,11 +239,11 @@ func ParseKeys(input string) ([]gossh.PublicKey, error) {
// and finally defaults to ":45876". // and finally defaults to ":45876".
func GetAddress(addr string) string { func GetAddress(addr string) string {
if addr == "" { if addr == "" {
addr, _ = GetEnv("LISTEN") addr, _ = utils.GetEnv("LISTEN")
} }
if addr == "" { if addr == "" {
// Legacy PORT environment variable support // Legacy PORT environment variable support
addr, _ = GetEnv("PORT") addr, _ = utils.GetEnv("PORT")
} }
if addr == "" { if addr == "" {
return ":45876" return ":45876"
@@ -258,7 +259,7 @@ func GetAddress(addr string) string {
// It checks the NETWORK environment variable first, then infers from // It checks the NETWORK environment variable first, then infers from
// the address format: addresses starting with "/" are "unix", others are "tcp". // the address format: addresses starting with "/" are "unix", others are "tcp".
func GetNetwork(addr string) string { func GetNetwork(addr string) string {
if network, ok := GetEnv("NETWORK"); ok && network != "" { if network, ok := utils.GetEnv("NETWORK"); ok && network != "" {
return network return network
} }
if strings.HasPrefix(addr, "/") { if strings.HasPrefix(addr, "/") {

View File

@@ -18,6 +18,7 @@ import (
"sync" "sync"
"time" "time"
"github.com/henrygd/beszel/agent/utils"
"github.com/henrygd/beszel/internal/entities/smart" "github.com/henrygd/beszel/internal/entities/smart"
) )
@@ -156,7 +157,7 @@ func (sm *SmartManager) ScanDevices(force bool) error {
currentDevices := sm.devicesSnapshot() currentDevices := sm.devicesSnapshot()
var configuredDevices []*DeviceInfo var configuredDevices []*DeviceInfo
if configuredRaw, ok := GetEnv("SMART_DEVICES"); ok { if configuredRaw, ok := utils.GetEnv("SMART_DEVICES"); ok {
slog.Info("SMART_DEVICES", "value", configuredRaw) slog.Info("SMART_DEVICES", "value", configuredRaw)
config := strings.TrimSpace(configuredRaw) config := strings.TrimSpace(configuredRaw)
if config == "" { if config == "" {
@@ -222,7 +223,7 @@ func (sm *SmartManager) ScanDevices(force bool) error {
} }
func (sm *SmartManager) parseConfiguredDevices(config string) ([]*DeviceInfo, error) { func (sm *SmartManager) parseConfiguredDevices(config string) ([]*DeviceInfo, error) {
splitChar := os.Getenv("SMART_DEVICES_SEPARATOR") splitChar, _ := utils.GetEnv("SMART_DEVICES_SEPARATOR")
if splitChar == "" { if splitChar == "" {
splitChar = "," splitChar = ","
} }
@@ -260,7 +261,7 @@ func (sm *SmartManager) parseConfiguredDevices(config string) ([]*DeviceInfo, er
} }
func (sm *SmartManager) refreshExcludedDevices() { func (sm *SmartManager) refreshExcludedDevices() {
rawValue, _ := GetEnv("EXCLUDE_SMART") rawValue, _ := utils.GetEnv("EXCLUDE_SMART")
sm.excludedDevices = make(map[string]struct{}) sm.excludedDevices = make(map[string]struct{})
for entry := range strings.SplitSeq(rawValue, ",") { for entry := range strings.SplitSeq(rawValue, ",") {
@@ -870,15 +871,18 @@ func (sm *SmartManager) parseSmartForSata(output []byte) (bool, int) {
smartData.FirmwareVersion = data.FirmwareVersion smartData.FirmwareVersion = data.FirmwareVersion
smartData.Capacity = data.UserCapacity.Bytes smartData.Capacity = data.UserCapacity.Bytes
smartData.Temperature = data.Temperature.Current smartData.Temperature = data.Temperature.Current
if smartData.Temperature == 0 {
if temp, ok := temperatureFromAtaDeviceStatistics(data.AtaDeviceStatistics); ok {
smartData.Temperature = temp
}
}
smartData.SmartStatus = getSmartStatus(smartData.Temperature, data.SmartStatus.Passed) smartData.SmartStatus = getSmartStatus(smartData.Temperature, data.SmartStatus.Passed)
smartData.DiskName = data.Device.Name smartData.DiskName = data.Device.Name
smartData.DiskType = data.Device.Type smartData.DiskType = data.Device.Type
// get values from ata_device_statistics if necessary
var ataDeviceStats smart.AtaDeviceStatistics
if smartData.Temperature == 0 {
if temp := findAtaDeviceStatisticsValue(&data, &ataDeviceStats, 5, "Current Temperature", 0, 255); temp != nil {
smartData.Temperature = uint8(*temp)
}
}
// update SmartAttributes // update SmartAttributes
smartData.Attributes = make([]*smart.SmartAttribute, 0, len(data.AtaSmartAttributes.Table)) smartData.Attributes = make([]*smart.SmartAttribute, 0, len(data.AtaSmartAttributes.Table))
for _, attr := range data.AtaSmartAttributes.Table { for _, attr := range data.AtaSmartAttributes.Table {
@@ -913,23 +917,20 @@ func getSmartStatus(temperature uint8, passed bool) string {
} }
} }
func temperatureFromAtaDeviceStatistics(stats smart.AtaDeviceStatistics) (uint8, bool) {
entry := findAtaDeviceStatisticsEntry(stats, 5, "Current Temperature")
if entry == nil || entry.Value == nil {
return 0, false
}
if *entry.Value > 255 {
return 0, false
}
return uint8(*entry.Value), true
}
// findAtaDeviceStatisticsEntry centralizes ATA devstat lookups so additional // findAtaDeviceStatisticsEntry centralizes ATA devstat lookups so additional
// metrics can be pulled from the same structure in the future. // metrics can be pulled from the same structure in the future.
func findAtaDeviceStatisticsEntry(stats smart.AtaDeviceStatistics, pageNumber uint8, entryName string) *smart.AtaDeviceStatisticsEntry { func findAtaDeviceStatisticsValue(data *smart.SmartInfoForSata, ataDeviceStats *smart.AtaDeviceStatistics, entryNumber uint8, entryName string, minValue, maxValue int64) *int64 {
for pageIdx := range stats.Pages { if len(ataDeviceStats.Pages) == 0 {
page := &stats.Pages[pageIdx] if len(data.AtaDeviceStatistics) == 0 {
if page.Number != pageNumber { return nil
}
if err := json.Unmarshal(data.AtaDeviceStatistics, ataDeviceStats); err != nil {
return nil
}
}
for pageIdx := range ataDeviceStats.Pages {
page := &ataDeviceStats.Pages[pageIdx]
if page.Number != entryNumber {
continue continue
} }
for entryIdx := range page.Table { for entryIdx := range page.Table {
@@ -937,7 +938,10 @@ func findAtaDeviceStatisticsEntry(stats smart.AtaDeviceStatistics, pageNumber ui
if !strings.EqualFold(entry.Name, entryName) { if !strings.EqualFold(entry.Name, entryName) {
continue continue
} }
return entry if entry.Value == nil || *entry.Value < minValue || *entry.Value > maxValue {
return nil
}
return entry.Value
} }
} }
return nil return nil

View File

@@ -121,6 +121,78 @@ func TestParseSmartForSataDeviceStatisticsTemperature(t *testing.T) {
assert.Equal(t, uint8(22), deviceData.Temperature) assert.Equal(t, uint8(22), deviceData.Temperature)
} }
func TestParseSmartForSataAtaDeviceStatistics(t *testing.T) {
// tests that ata_device_statistics values are parsed correctly
jsonPayload := []byte(`{
"smartctl": {"exit_status": 0},
"device": {"name": "/dev/sdb", "type": "sat"},
"model_name": "SanDisk SSD U110 16GB",
"serial_number": "lksjfh23lhj",
"firmware_version": "U21B001",
"user_capacity": {"bytes": 16013942784},
"smart_status": {"passed": true},
"ata_smart_attributes": {"table": []},
"ata_device_statistics": {
"pages": [
{
"number": 5,
"name": "Temperature Statistics",
"table": [
{"name": "Current Temperature", "value": 43, "flags": {"valid": true}},
{"name": "Specified Minimum Operating Temperature", "value": -20, "flags": {"valid": true}}
]
}
]
}
}`)
sm := &SmartManager{SmartDataMap: make(map[string]*smart.SmartData)}
hasData, exitStatus := sm.parseSmartForSata(jsonPayload)
require.True(t, hasData)
assert.Equal(t, 0, exitStatus)
deviceData, ok := sm.SmartDataMap["lksjfh23lhj"]
require.True(t, ok, "expected smart data entry for serial lksjfh23lhj")
assert.Equal(t, uint8(43), deviceData.Temperature)
}
func TestParseSmartForSataNegativeDeviceStatistics(t *testing.T) {
// Tests that negative values in ata_device_statistics (e.g. min operating temp)
// do not cause the entire SAT parser to fail.
jsonPayload := []byte(`{
"smartctl": {"exit_status": 0},
"device": {"name": "/dev/sdb", "type": "sat"},
"model_name": "SanDisk SSD U110 16GB",
"serial_number": "NEGATIVE123",
"firmware_version": "U21B001",
"user_capacity": {"bytes": 16013942784},
"smart_status": {"passed": true},
"temperature": {"current": 38},
"ata_smart_attributes": {"table": []},
"ata_device_statistics": {
"pages": [
{
"number": 5,
"name": "Temperature Statistics",
"table": [
{"name": "Current Temperature", "value": 38, "flags": {"valid": true}},
{"name": "Specified Minimum Operating Temperature", "value": -20, "flags": {"valid": true}}
]
}
]
}
}`)
sm := &SmartManager{SmartDataMap: make(map[string]*smart.SmartData)}
hasData, exitStatus := sm.parseSmartForSata(jsonPayload)
require.True(t, hasData)
assert.Equal(t, 0, exitStatus)
deviceData, ok := sm.SmartDataMap["NEGATIVE123"]
require.True(t, ok, "expected smart data entry for serial NEGATIVE123")
assert.Equal(t, uint8(38), deviceData.Temperature)
}
func TestParseSmartForSataParentheticalRawValue(t *testing.T) { func TestParseSmartForSataParentheticalRawValue(t *testing.T) {
jsonPayload := []byte(`{ jsonPayload := []byte(`{
"smartctl": {"exit_status": 0}, "smartctl": {"exit_status": 0},
@@ -727,6 +799,182 @@ func TestIsVirtualDeviceScsi(t *testing.T) {
} }
} }
func TestFindAtaDeviceStatisticsValue(t *testing.T) {
val42 := int64(42)
val100 := int64(100)
valMinus20 := int64(-20)
tests := []struct {
name string
data smart.SmartInfoForSata
ataDeviceStats smart.AtaDeviceStatistics
entryNumber uint8
entryName string
minValue int64
maxValue int64
expectedValue *int64
}{
{
name: "value in ataDeviceStats",
ataDeviceStats: smart.AtaDeviceStatistics{
Pages: []smart.AtaDeviceStatisticsPage{
{
Number: 5,
Table: []smart.AtaDeviceStatisticsEntry{
{Name: "Current Temperature", Value: &val42},
},
},
},
},
entryNumber: 5,
entryName: "Current Temperature",
minValue: 0,
maxValue: 100,
expectedValue: &val42,
},
{
name: "value unmarshaled from data",
data: smart.SmartInfoForSata{
AtaDeviceStatistics: []byte(`{"pages":[{"number":5,"table":[{"name":"Current Temperature","value":100}]}]}`),
},
entryNumber: 5,
entryName: "Current Temperature",
minValue: 0,
maxValue: 255,
expectedValue: &val100,
},
{
name: "value out of range (too high)",
ataDeviceStats: smart.AtaDeviceStatistics{
Pages: []smart.AtaDeviceStatisticsPage{
{
Number: 5,
Table: []smart.AtaDeviceStatisticsEntry{
{Name: "Current Temperature", Value: &val100},
},
},
},
},
entryNumber: 5,
entryName: "Current Temperature",
minValue: 0,
maxValue: 50,
expectedValue: nil,
},
{
name: "value out of range (too low)",
ataDeviceStats: smart.AtaDeviceStatistics{
Pages: []smart.AtaDeviceStatisticsPage{
{
Number: 5,
Table: []smart.AtaDeviceStatisticsEntry{
{Name: "Min Temp", Value: &valMinus20},
},
},
},
},
entryNumber: 5,
entryName: "Min Temp",
minValue: 0,
maxValue: 100,
expectedValue: nil,
},
{
name: "no statistics available",
data: smart.SmartInfoForSata{},
entryNumber: 5,
entryName: "Current Temperature",
minValue: 0,
maxValue: 255,
expectedValue: nil,
},
{
name: "wrong page number",
ataDeviceStats: smart.AtaDeviceStatistics{
Pages: []smart.AtaDeviceStatisticsPage{
{
Number: 1,
Table: []smart.AtaDeviceStatisticsEntry{
{Name: "Current Temperature", Value: &val42},
},
},
},
},
entryNumber: 5,
entryName: "Current Temperature",
minValue: 0,
maxValue: 100,
expectedValue: nil,
},
{
name: "wrong entry name",
ataDeviceStats: smart.AtaDeviceStatistics{
Pages: []smart.AtaDeviceStatisticsPage{
{
Number: 5,
Table: []smart.AtaDeviceStatisticsEntry{
{Name: "Other Stat", Value: &val42},
},
},
},
},
entryNumber: 5,
entryName: "Current Temperature",
minValue: 0,
maxValue: 100,
expectedValue: nil,
},
{
name: "case insensitive name match",
ataDeviceStats: smart.AtaDeviceStatistics{
Pages: []smart.AtaDeviceStatisticsPage{
{
Number: 5,
Table: []smart.AtaDeviceStatisticsEntry{
{Name: "CURRENT TEMPERATURE", Value: &val42},
},
},
},
},
entryNumber: 5,
entryName: "Current Temperature",
minValue: 0,
maxValue: 100,
expectedValue: &val42,
},
{
name: "entry value is nil",
ataDeviceStats: smart.AtaDeviceStatistics{
Pages: []smart.AtaDeviceStatisticsPage{
{
Number: 5,
Table: []smart.AtaDeviceStatisticsEntry{
{Name: "Current Temperature", Value: nil},
},
},
},
},
entryNumber: 5,
entryName: "Current Temperature",
minValue: 0,
maxValue: 100,
expectedValue: nil,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
result := findAtaDeviceStatisticsValue(&tt.data, &tt.ataDeviceStats, tt.entryNumber, tt.entryName, tt.minValue, tt.maxValue)
if tt.expectedValue == nil {
assert.Nil(t, result)
} else {
require.NotNil(t, result)
assert.Equal(t, *tt.expectedValue, *result)
}
})
}
}
func TestRefreshExcludedDevices(t *testing.T) { func TestRefreshExcludedDevices(t *testing.T) {
tests := []struct { tests := []struct {
name string name string

View File

@@ -12,6 +12,7 @@ import (
"github.com/henrygd/beszel" "github.com/henrygd/beszel"
"github.com/henrygd/beszel/agent/battery" "github.com/henrygd/beszel/agent/battery"
"github.com/henrygd/beszel/agent/utils"
"github.com/henrygd/beszel/agent/zfs" "github.com/henrygd/beszel/agent/zfs"
"github.com/henrygd/beszel/internal/entities/container" "github.com/henrygd/beszel/internal/entities/container"
"github.com/henrygd/beszel/internal/entities/system" "github.com/henrygd/beszel/internal/entities/system"
@@ -127,13 +128,13 @@ func (a *Agent) getSystemStats(cacheTimeMs uint16) system.Stats {
// cpu metrics // cpu metrics
cpuMetrics, err := getCpuMetrics(cacheTimeMs) cpuMetrics, err := getCpuMetrics(cacheTimeMs)
if err == nil { if err == nil {
systemStats.Cpu = twoDecimals(cpuMetrics.Total) systemStats.Cpu = utils.TwoDecimals(cpuMetrics.Total)
systemStats.CpuBreakdown = []float64{ systemStats.CpuBreakdown = []float64{
twoDecimals(cpuMetrics.User), utils.TwoDecimals(cpuMetrics.User),
twoDecimals(cpuMetrics.System), utils.TwoDecimals(cpuMetrics.System),
twoDecimals(cpuMetrics.Iowait), utils.TwoDecimals(cpuMetrics.Iowait),
twoDecimals(cpuMetrics.Steal), utils.TwoDecimals(cpuMetrics.Steal),
twoDecimals(cpuMetrics.Idle), utils.TwoDecimals(cpuMetrics.Idle),
} }
} else { } else {
slog.Error("Error getting cpu metrics", "err", err) slog.Error("Error getting cpu metrics", "err", err)
@@ -157,8 +158,8 @@ func (a *Agent) getSystemStats(cacheTimeMs uint16) system.Stats {
// memory // memory
if v, err := mem.VirtualMemory(); err == nil { if v, err := mem.VirtualMemory(); err == nil {
// swap // swap
systemStats.Swap = bytesToGigabytes(v.SwapTotal) systemStats.Swap = utils.BytesToGigabytes(v.SwapTotal)
systemStats.SwapUsed = bytesToGigabytes(v.SwapTotal - v.SwapFree - v.SwapCached) systemStats.SwapUsed = utils.BytesToGigabytes(v.SwapTotal - v.SwapFree - v.SwapCached)
// cache + buffers value for default mem calculation // cache + buffers value for default mem calculation
// note: gopsutil automatically adds SReclaimable to v.Cached // note: gopsutil automatically adds SReclaimable to v.Cached
cacheBuff := v.Cached + v.Buffers - v.Shared cacheBuff := v.Cached + v.Buffers - v.Shared
@@ -181,13 +182,13 @@ func (a *Agent) getSystemStats(cacheTimeMs uint16) system.Stats {
if arcSize, _ := zfs.ARCSize(); arcSize > 0 && arcSize < v.Used { if arcSize, _ := zfs.ARCSize(); arcSize > 0 && arcSize < v.Used {
v.Used = v.Used - arcSize v.Used = v.Used - arcSize
v.UsedPercent = float64(v.Used) / float64(v.Total) * 100.0 v.UsedPercent = float64(v.Used) / float64(v.Total) * 100.0
systemStats.MemZfsArc = bytesToGigabytes(arcSize) systemStats.MemZfsArc = utils.BytesToGigabytes(arcSize)
} }
} }
systemStats.Mem = bytesToGigabytes(v.Total) systemStats.Mem = utils.BytesToGigabytes(v.Total)
systemStats.MemBuffCache = bytesToGigabytes(cacheBuff) systemStats.MemBuffCache = utils.BytesToGigabytes(cacheBuff)
systemStats.MemUsed = bytesToGigabytes(v.Used) systemStats.MemUsed = utils.BytesToGigabytes(v.Used)
systemStats.MemPct = twoDecimals(v.UsedPercent) systemStats.MemPct = utils.TwoDecimals(v.UsedPercent)
} }
// disk usage // disk usage

View File

@@ -15,6 +15,7 @@ import (
"time" "time"
"github.com/coreos/go-systemd/v22/dbus" "github.com/coreos/go-systemd/v22/dbus"
"github.com/henrygd/beszel/agent/utils"
"github.com/henrygd/beszel/internal/entities/systemd" "github.com/henrygd/beszel/internal/entities/systemd"
) )
@@ -49,7 +50,7 @@ func isSystemdAvailable() bool {
// newSystemdManager creates a new systemdManager. // newSystemdManager creates a new systemdManager.
func newSystemdManager() (*systemdManager, error) { func newSystemdManager() (*systemdManager, error) {
if skipSystemd, _ := GetEnv("SKIP_SYSTEMD"); skipSystemd == "true" { if skipSystemd, _ := utils.GetEnv("SKIP_SYSTEMD"); skipSystemd == "true" {
return nil, nil return nil, nil
} }
@@ -294,13 +295,13 @@ func unescapeServiceName(name string) string {
// otherwise defaults to "*service". // otherwise defaults to "*service".
func getServicePatterns() []string { func getServicePatterns() []string {
patterns := []string{} patterns := []string{}
if envPatterns, _ := GetEnv("SERVICE_PATTERNS"); envPatterns != "" { if envPatterns, _ := utils.GetEnv("SERVICE_PATTERNS"); envPatterns != "" {
for pattern := range strings.SplitSeq(envPatterns, ",") { for pattern := range strings.SplitSeq(envPatterns, ",") {
pattern = strings.TrimSpace(pattern) pattern = strings.TrimSpace(pattern)
if pattern == "" { if pattern == "" {
continue continue
} }
if !strings.HasSuffix(pattern, ".service") { if !strings.HasSuffix(pattern, "timer") && !strings.HasSuffix(pattern, ".service") {
pattern += ".service" pattern += ".service"
} }
patterns = append(patterns, pattern) patterns = append(patterns, pattern)

View File

@@ -156,6 +156,13 @@ func TestGetServicePatterns(t *testing.T) {
expected: []string{"*nginx*.service", "*apache*.service"}, expected: []string{"*nginx*.service", "*apache*.service"},
cleanupEnvVars: true, cleanupEnvVars: true,
}, },
{
name: "opt into timer monitoring",
prefixedEnv: "nginx.service,docker,apache.timer",
unprefixedEnv: "",
expected: []string{"nginx.service", "docker.service", "apache.timer"},
cleanupEnvVars: true,
},
} }
for _, tt := range tests { for _, tt := range tests {

View File

@@ -1,15 +0,0 @@
package agent
import "math"
func bytesToMegabytes(b float64) float64 {
return twoDecimals(b / 1048576)
}
func bytesToGigabytes(b uint64) float64 {
return twoDecimals(float64(b) / 1073741824)
}
func twoDecimals(value float64) float64 {
return math.Round(value*100) / 100
}

88
agent/utils/utils.go Normal file
View File

@@ -0,0 +1,88 @@
package utils
import (
"io"
"math"
"os"
"strconv"
"strings"
)
// GetEnv retrieves an environment variable with a "BESZEL_AGENT_" prefix, or falls back to the unprefixed key.
func GetEnv(key string) (value string, exists bool) {
if value, exists = os.LookupEnv("BESZEL_AGENT_" + key); exists {
return value, exists
}
return os.LookupEnv(key)
}
// BytesToMegabytes converts bytes to megabytes and rounds to two decimal places.
func BytesToMegabytes(b float64) float64 {
return TwoDecimals(b / 1048576)
}
// BytesToGigabytes converts bytes to gigabytes and rounds to two decimal places.
func BytesToGigabytes(b uint64) float64 {
return TwoDecimals(float64(b) / 1073741824)
}
// TwoDecimals rounds a float64 value to two decimal places.
func TwoDecimals(value float64) float64 {
return math.Round(value*100) / 100
}
// func RoundFloat(val float64, precision uint) float64 {
// ratio := math.Pow(10, float64(precision))
// return math.Round(val*ratio) / ratio
// }
// ReadStringFile returns trimmed file contents or empty string on error.
func ReadStringFile(path string) string {
content, _ := ReadStringFileOK(path)
return content
}
// ReadStringFileOK returns trimmed file contents and read success.
func ReadStringFileOK(path string) (string, bool) {
b, err := os.ReadFile(path)
if err != nil {
return "", false
}
return strings.TrimSpace(string(b)), true
}
// ReadStringFileLimited reads a file into a string with a maximum size (in bytes) to avoid
// allocating large buffers and potential panics with pseudo-files when the size is misreported.
func ReadStringFileLimited(path string, maxSize int) (string, error) {
f, err := os.Open(path)
if err != nil {
return "", err
}
defer f.Close()
buf := make([]byte, maxSize)
n, err := f.Read(buf)
if err != nil && err != io.EOF {
return "", err
}
return strings.TrimSpace(string(buf[:n])), nil
}
// FileExists reports whether the given path exists.
func FileExists(path string) bool {
_, err := os.Stat(path)
return err == nil
}
// ReadUintFile parses a decimal uint64 value from a file.
func ReadUintFile(path string) (uint64, bool) {
raw, ok := ReadStringFileOK(path)
if !ok {
return 0, false
}
parsed, err := strconv.ParseUint(raw, 10, 64)
if err != nil {
return 0, false
}
return parsed, true
}

165
agent/utils/utils_test.go Normal file
View File

@@ -0,0 +1,165 @@
package utils
import (
"os"
"path/filepath"
"testing"
"github.com/stretchr/testify/assert"
)
func TestTwoDecimals(t *testing.T) {
tests := []struct {
name string
input float64
expected float64
}{
{"round down", 1.234, 1.23},
{"round half up", 1.235, 1.24}, // math.Round rounds half up
{"no rounding needed", 1.23, 1.23},
{"negative number", -1.235, -1.24}, // math.Round rounds half up (more negative)
{"zero", 0.0, 0.0},
{"large number", 123.456, 123.46}, // rounds 5 up
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
result := TwoDecimals(tt.input)
assert.Equal(t, tt.expected, result)
})
}
}
func TestBytesToMegabytes(t *testing.T) {
tests := []struct {
name string
input float64
expected float64
}{
{"1 MB", 1048576, 1.0},
{"512 KB", 524288, 0.5},
{"zero", 0, 0},
{"large value", 1073741824, 1024}, // 1 GB = 1024 MB
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
result := BytesToMegabytes(tt.input)
assert.Equal(t, tt.expected, result)
})
}
}
func TestBytesToGigabytes(t *testing.T) {
tests := []struct {
name string
input uint64
expected float64
}{
{"1 GB", 1073741824, 1.0},
{"512 MB", 536870912, 0.5},
{"0 GB", 0, 0},
{"2 GB", 2147483648, 2.0},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
result := BytesToGigabytes(tt.input)
assert.Equal(t, tt.expected, result)
})
}
}
func TestFileFunctions(t *testing.T) {
tmpDir := t.TempDir()
testFilePath := filepath.Join(tmpDir, "test.txt")
testContent := "hello world"
// Test FileExists (false)
assert.False(t, FileExists(testFilePath))
// Test ReadStringFileOK (false)
content, ok := ReadStringFileOK(testFilePath)
assert.False(t, ok)
assert.Empty(t, content)
// Test ReadStringFile (empty)
assert.Empty(t, ReadStringFile(testFilePath))
// Write file
err := os.WriteFile(testFilePath, []byte(testContent+"\n "), 0644)
assert.NoError(t, err)
// Test FileExists (true)
assert.True(t, FileExists(testFilePath))
// Test ReadStringFileOK (true)
content, ok = ReadStringFileOK(testFilePath)
assert.True(t, ok)
assert.Equal(t, testContent, content)
// Test ReadStringFile (content)
assert.Equal(t, testContent, ReadStringFile(testFilePath))
}
func TestReadUintFile(t *testing.T) {
tmpDir := t.TempDir()
t.Run("valid uint", func(t *testing.T) {
path := filepath.Join(tmpDir, "uint.txt")
os.WriteFile(path, []byte(" 12345\n"), 0644)
val, ok := ReadUintFile(path)
assert.True(t, ok)
assert.Equal(t, uint64(12345), val)
})
t.Run("invalid uint", func(t *testing.T) {
path := filepath.Join(tmpDir, "invalid.txt")
os.WriteFile(path, []byte("abc"), 0644)
val, ok := ReadUintFile(path)
assert.False(t, ok)
assert.Equal(t, uint64(0), val)
})
t.Run("missing file", func(t *testing.T) {
path := filepath.Join(tmpDir, "missing.txt")
val, ok := ReadUintFile(path)
assert.False(t, ok)
assert.Equal(t, uint64(0), val)
})
}
func TestGetEnv(t *testing.T) {
key := "TEST_VAR"
prefixedKey := "BESZEL_AGENT_" + key
t.Run("prefixed variable exists", func(t *testing.T) {
os.Setenv(prefixedKey, "prefixed_val")
os.Setenv(key, "unprefixed_val")
defer os.Unsetenv(prefixedKey)
defer os.Unsetenv(key)
val, exists := GetEnv(key)
assert.True(t, exists)
assert.Equal(t, "prefixed_val", val)
})
t.Run("only unprefixed variable exists", func(t *testing.T) {
os.Unsetenv(prefixedKey)
os.Setenv(key, "unprefixed_val")
defer os.Unsetenv(key)
val, exists := GetEnv(key)
assert.True(t, exists)
assert.Equal(t, "unprefixed_val", val)
})
t.Run("neither variable exists", func(t *testing.T) {
os.Unsetenv(prefixedKey)
os.Unsetenv(key)
val, exists := GetEnv(key)
assert.False(t, exists)
assert.Empty(t, val)
})
}

2
go.mod
View File

@@ -1,6 +1,6 @@
module github.com/henrygd/beszel module github.com/henrygd/beszel
go 1.26.0 go 1.26.1
require ( require (
github.com/blang/semver v3.5.1+incompatible github.com/blang/semver v3.5.1+incompatible

View File

@@ -21,9 +21,9 @@ type hubLike interface {
type AlertManager struct { type AlertManager struct {
hub hubLike hub hubLike
alertQueue chan alertTask stopOnce sync.Once
stopChan chan struct{}
pendingAlerts sync.Map pendingAlerts sync.Map
alertsCache *AlertsCache
} }
type AlertMessageData struct { type AlertMessageData struct {
@@ -40,16 +40,22 @@ type UserNotificationSettings struct {
Webhooks []string `json:"webhooks"` Webhooks []string `json:"webhooks"`
} }
type SystemAlertFsStats struct {
DiskTotal float64 `json:"d"`
DiskUsed float64 `json:"du"`
}
// Values pulled from system_stats.stats that are relevant to alerts.
type SystemAlertStats struct { type SystemAlertStats struct {
Cpu float64 `json:"cpu"` Cpu float64 `json:"cpu"`
Mem float64 `json:"mp"` Mem float64 `json:"mp"`
Disk float64 `json:"dp"` Disk float64 `json:"dp"`
NetSent float64 `json:"ns"` Bandwidth [2]uint64 `json:"b"`
NetRecv float64 `json:"nr"`
GPU map[string]SystemAlertGPUData `json:"g"` GPU map[string]SystemAlertGPUData `json:"g"`
Temperatures map[string]float32 `json:"t"` Temperatures map[string]float32 `json:"t"`
LoadAvg [3]float64 `json:"la"` LoadAvg [3]float64 `json:"la"`
Battery [2]uint8 `json:"bat"` Battery [2]uint8 `json:"bat"`
ExtraFs map[string]SystemAlertFsStats `json:"efs"`
} }
type SystemAlertGPUData struct { type SystemAlertGPUData struct {
@@ -58,7 +64,7 @@ type SystemAlertGPUData struct {
type SystemAlertData struct { type SystemAlertData struct {
systemRecord *core.Record systemRecord *core.Record
alertRecord *core.Record alertData CachedAlertData
name string name string
unit string unit string
val float64 val float64
@@ -92,12 +98,10 @@ var supportsTitle = map[string]struct{}{
// NewAlertManager creates a new AlertManager instance. // NewAlertManager creates a new AlertManager instance.
func NewAlertManager(app hubLike) *AlertManager { func NewAlertManager(app hubLike) *AlertManager {
am := &AlertManager{ am := &AlertManager{
hub: app, hub: app,
alertQueue: make(chan alertTask, 5), alertsCache: NewAlertsCache(app),
stopChan: make(chan struct{}),
} }
am.bindEvents() am.bindEvents()
go am.startWorker()
return am return am
} }
@@ -106,6 +110,19 @@ func (am *AlertManager) bindEvents() {
am.hub.OnRecordAfterUpdateSuccess("alerts").BindFunc(updateHistoryOnAlertUpdate) am.hub.OnRecordAfterUpdateSuccess("alerts").BindFunc(updateHistoryOnAlertUpdate)
am.hub.OnRecordAfterDeleteSuccess("alerts").BindFunc(resolveHistoryOnAlertDelete) am.hub.OnRecordAfterDeleteSuccess("alerts").BindFunc(resolveHistoryOnAlertDelete)
am.hub.OnRecordAfterUpdateSuccess("smart_devices").BindFunc(am.handleSmartDeviceAlert) am.hub.OnRecordAfterUpdateSuccess("smart_devices").BindFunc(am.handleSmartDeviceAlert)
am.hub.OnServe().BindFunc(func(e *core.ServeEvent) error {
// Populate all alerts into cache on startup
_ = am.alertsCache.PopulateFromDB(true)
if err := resolveStatusAlerts(e.App); err != nil {
e.App.Logger().Error("Failed to resolve stale status alerts", "err", err)
}
if err := am.restorePendingStatusAlerts(); err != nil {
e.App.Logger().Error("Failed to restore pending status alerts", "err", err)
}
return e.Next()
})
} }
// IsNotificationSilenced checks if a notification should be silenced based on configured quiet hours // IsNotificationSilenced checks if a notification should be silenced based on configured quiet hours
@@ -259,13 +276,14 @@ func (am *AlertManager) SendShoutrrrAlert(notificationUrl, title, message, link,
} }
// Add link // Add link
if scheme == "ntfy" { switch scheme {
case "ntfy":
queryParams.Add("Actions", fmt.Sprintf("view, %s, %s", linkText, link)) queryParams.Add("Actions", fmt.Sprintf("view, %s, %s", linkText, link))
} else if scheme == "lark" { case "lark":
queryParams.Add("link", link) queryParams.Add("link", link)
} else if scheme == "bark" { case "bark":
queryParams.Add("url", link) queryParams.Add("url", link)
} else { default:
message += "\n\n" + link message += "\n\n" + link
} }
@@ -298,3 +316,13 @@ func (am *AlertManager) SendTestNotification(e *core.RequestEvent) error {
} }
return e.JSON(200, map[string]bool{"err": false}) return e.JSON(200, map[string]bool{"err": false})
} }
// setAlertTriggered updates the "triggered" status of an alert record in the database
func (am *AlertManager) setAlertTriggered(alert CachedAlertData, triggered bool) error {
alertRecord, err := am.hub.FindRecordById("alerts", alert.Id)
if err != nil {
return err
}
alertRecord.Set("triggered", triggered)
return am.hub.Save(alertRecord)
}

View File

@@ -0,0 +1,177 @@
package alerts
import (
"github.com/pocketbase/dbx"
"github.com/pocketbase/pocketbase/core"
"github.com/pocketbase/pocketbase/tools/store"
)
// CachedAlertData represents the relevant fields of an alert record for status checking and updates.
type CachedAlertData struct {
Id string
SystemID string
UserID string
Name string
Value float64
Triggered bool
Min uint8
// Created types.DateTime
}
func (a *CachedAlertData) PopulateFromRecord(record *core.Record) {
a.Id = record.Id
a.SystemID = record.GetString("system")
a.UserID = record.GetString("user")
a.Name = record.GetString("name")
a.Value = record.GetFloat("value")
a.Triggered = record.GetBool("triggered")
a.Min = uint8(record.GetInt("min"))
// a.Created = record.GetDateTime("created")
}
// AlertsCache provides an in-memory cache for system alerts.
type AlertsCache struct {
app core.App
store *store.Store[string, *store.Store[string, CachedAlertData]]
populated bool
}
// NewAlertsCache creates a new instance of SystemAlertsCache.
func NewAlertsCache(app core.App) *AlertsCache {
c := AlertsCache{
app: app,
store: store.New(map[string]*store.Store[string, CachedAlertData]{}),
}
return c.bindEvents()
}
// bindEvents sets up event listeners to keep the cache in sync with database changes.
func (c *AlertsCache) bindEvents() *AlertsCache {
c.app.OnRecordAfterUpdateSuccess("alerts").BindFunc(func(e *core.RecordEvent) error {
// c.Delete(e.Record.Original()) // this would be needed if the system field on an existing alert was changed, however we don't currently allow that in the UI so we'll leave it commented out
c.Update(e.Record)
return e.Next()
})
c.app.OnRecordAfterDeleteSuccess("alerts").BindFunc(func(e *core.RecordEvent) error {
c.Delete(e.Record)
return e.Next()
})
c.app.OnRecordAfterCreateSuccess("alerts").BindFunc(func(e *core.RecordEvent) error {
c.Update(e.Record)
return e.Next()
})
return c
}
// PopulateFromDB clears current entries and loads all alerts from the database into the cache.
func (c *AlertsCache) PopulateFromDB(force bool) error {
if !force && c.populated {
return nil
}
records, err := c.app.FindAllRecords("alerts")
if err != nil {
return err
}
c.store.RemoveAll()
for _, record := range records {
c.Update(record)
}
c.populated = true
return nil
}
// Update adds or updates an alert record in the cache.
func (c *AlertsCache) Update(record *core.Record) {
systemID := record.GetString("system")
if systemID == "" {
return
}
systemStore, ok := c.store.GetOk(systemID)
if !ok {
systemStore = store.New(map[string]CachedAlertData{})
c.store.Set(systemID, systemStore)
}
var ca CachedAlertData
ca.PopulateFromRecord(record)
systemStore.Set(record.Id, ca)
}
// Delete removes an alert record from the cache.
func (c *AlertsCache) Delete(record *core.Record) {
systemID := record.GetString("system")
if systemID == "" {
return
}
if systemStore, ok := c.store.GetOk(systemID); ok {
systemStore.Remove(record.Id)
}
}
// GetSystemAlerts returns all alerts for the specified system, lazy-loading if necessary.
func (c *AlertsCache) GetSystemAlerts(systemID string) []CachedAlertData {
systemStore, ok := c.store.GetOk(systemID)
if !ok {
// Populate cache for this system
records, err := c.app.FindAllRecords("alerts", dbx.NewExp("system={:system}", dbx.Params{"system": systemID}))
if err != nil {
return nil
}
systemStore = store.New(map[string]CachedAlertData{})
for _, record := range records {
var ca CachedAlertData
ca.PopulateFromRecord(record)
systemStore.Set(record.Id, ca)
}
c.store.Set(systemID, systemStore)
}
all := systemStore.GetAll()
alerts := make([]CachedAlertData, 0, len(all))
for _, alert := range all {
alerts = append(alerts, alert)
}
return alerts
}
// GetAlert returns a specific alert by its ID from the cache.
func (c *AlertsCache) GetAlert(systemID, alertID string) (CachedAlertData, bool) {
if systemStore, ok := c.store.GetOk(systemID); ok {
return systemStore.GetOk(alertID)
}
return CachedAlertData{}, false
}
// GetAlertsByName returns all alerts of a specific type for the specified system.
func (c *AlertsCache) GetAlertsByName(systemID, alertName string) []CachedAlertData {
allAlerts := c.GetSystemAlerts(systemID)
var alerts []CachedAlertData
for _, record := range allAlerts {
if record.Name == alertName {
alerts = append(alerts, record)
}
}
return alerts
}
// GetAlertsExcludingNames returns all alerts for the specified system excluding the given types.
func (c *AlertsCache) GetAlertsExcludingNames(systemID string, excludedNames ...string) []CachedAlertData {
excludeMap := make(map[string]struct{})
for _, name := range excludedNames {
excludeMap[name] = struct{}{}
}
allAlerts := c.GetSystemAlerts(systemID)
var alerts []CachedAlertData
for _, record := range allAlerts {
if _, excluded := excludeMap[record.Name]; !excluded {
alerts = append(alerts, record)
}
}
return alerts
}
// Refresh returns the latest cached copy for an alert snapshot if it still exists.
func (c *AlertsCache) Refresh(alert CachedAlertData) (CachedAlertData, bool) {
if alert.Id == "" {
return CachedAlertData{}, false
}
return c.GetAlert(alert.SystemID, alert.Id)
}

View File

@@ -0,0 +1,215 @@
//go:build testing
package alerts_test
import (
"testing"
"github.com/henrygd/beszel/internal/alerts"
beszelTests "github.com/henrygd/beszel/internal/tests"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestSystemAlertsCachePopulateAndFilter(t *testing.T) {
hub, user := beszelTests.GetHubWithUser(t)
defer hub.Cleanup()
systems, err := beszelTests.CreateSystems(hub, 2, user.Id, "up")
require.NoError(t, err)
system1 := systems[0]
system2 := systems[1]
statusAlert, err := beszelTests.CreateRecord(hub, "alerts", map[string]any{
"name": "Status",
"system": system1.Id,
"user": user.Id,
"min": 1,
})
require.NoError(t, err)
cpuAlert, err := beszelTests.CreateRecord(hub, "alerts", map[string]any{
"name": "CPU",
"system": system1.Id,
"user": user.Id,
"value": 80,
"min": 1,
})
require.NoError(t, err)
memoryAlert, err := beszelTests.CreateRecord(hub, "alerts", map[string]any{
"name": "Memory",
"system": system2.Id,
"user": user.Id,
"value": 90,
"min": 1,
})
require.NoError(t, err)
cache := alerts.NewAlertsCache(hub)
cache.PopulateFromDB(false)
statusAlerts := cache.GetAlertsByName(system1.Id, "Status")
require.Len(t, statusAlerts, 1)
assert.Equal(t, statusAlert.Id, statusAlerts[0].Id)
nonStatusAlerts := cache.GetAlertsExcludingNames(system1.Id, "Status")
require.Len(t, nonStatusAlerts, 1)
assert.Equal(t, cpuAlert.Id, nonStatusAlerts[0].Id)
system2Alerts := cache.GetSystemAlerts(system2.Id)
require.Len(t, system2Alerts, 1)
assert.Equal(t, memoryAlert.Id, system2Alerts[0].Id)
}
func TestSystemAlertsCacheLazyLoadUpdateAndDelete(t *testing.T) {
hub, user := beszelTests.GetHubWithUser(t)
defer hub.Cleanup()
systems, err := beszelTests.CreateSystems(hub, 1, user.Id, "up")
require.NoError(t, err)
systemRecord := systems[0]
statusAlert, err := beszelTests.CreateRecord(hub, "alerts", map[string]any{
"name": "Status",
"system": systemRecord.Id,
"user": user.Id,
"min": 1,
})
require.NoError(t, err)
cache := alerts.NewAlertsCache(hub)
require.Len(t, cache.GetSystemAlerts(systemRecord.Id), 1, "first lookup should lazy-load alerts for the system")
cpuAlert, err := beszelTests.CreateRecord(hub, "alerts", map[string]any{
"name": "CPU",
"system": systemRecord.Id,
"user": user.Id,
"value": 80,
"min": 1,
})
require.NoError(t, err)
cache.Update(cpuAlert)
nonStatusAlerts := cache.GetAlertsExcludingNames(systemRecord.Id, "Status")
require.Len(t, nonStatusAlerts, 1)
assert.Equal(t, cpuAlert.Id, nonStatusAlerts[0].Id)
cache.Delete(statusAlert)
assert.Empty(t, cache.GetAlertsByName(systemRecord.Id, "Status"), "deleted alerts should be removed from the in-memory cache")
}
func TestSystemAlertsCacheRefreshReturnsLatestCopy(t *testing.T) {
hub, user := beszelTests.GetHubWithUser(t)
defer hub.Cleanup()
systems, err := beszelTests.CreateSystems(hub, 1, user.Id, "up")
require.NoError(t, err)
system := systems[0]
alert, err := beszelTests.CreateRecord(hub, "alerts", map[string]any{
"name": "Status",
"system": system.Id,
"user": user.Id,
"min": 1,
"triggered": false,
})
require.NoError(t, err)
cache := alerts.NewAlertsCache(hub)
snapshot := cache.GetSystemAlerts(system.Id)[0]
assert.False(t, snapshot.Triggered)
alert.Set("triggered", true)
require.NoError(t, hub.Save(alert))
refreshed, ok := cache.Refresh(snapshot)
require.True(t, ok)
assert.Equal(t, snapshot.Id, refreshed.Id)
assert.True(t, refreshed.Triggered, "refresh should return the updated cached value rather than the stale snapshot")
require.NoError(t, hub.Delete(alert))
_, ok = cache.Refresh(snapshot)
assert.False(t, ok, "refresh should report false when the cached alert no longer exists")
}
func TestAlertManagerCacheLifecycle(t *testing.T) {
hub, user := beszelTests.GetHubWithUser(t)
defer hub.Cleanup()
systems, err := beszelTests.CreateSystems(hub, 1, user.Id, "up")
require.NoError(t, err)
system := systems[0]
// Create an alert
alert, err := beszelTests.CreateRecord(hub, "alerts", map[string]any{
"name": "CPU",
"system": system.Id,
"user": user.Id,
"value": 80,
"min": 1,
})
require.NoError(t, err)
am := hub.AlertManager
cache := am.GetSystemAlertsCache()
// Verify it's in cache (it should be since CreateRecord triggers the event)
assert.Len(t, cache.GetSystemAlerts(system.Id), 1)
assert.Equal(t, alert.Id, cache.GetSystemAlerts(system.Id)[0].Id)
assert.EqualValues(t, 80, cache.GetSystemAlerts(system.Id)[0].Value)
// Update the alert through PocketBase to trigger events
alert.Set("value", 85)
require.NoError(t, hub.Save(alert))
// Check if updated value is reflected (or at least that it's still there)
cachedAlerts := cache.GetSystemAlerts(system.Id)
assert.Len(t, cachedAlerts, 1)
assert.EqualValues(t, 85, cachedAlerts[0].Value)
// Delete the alert through PocketBase to trigger events
require.NoError(t, hub.Delete(alert))
// Verify it's removed from cache
assert.Empty(t, cache.GetSystemAlerts(system.Id), "alert should be removed from cache after PocketBase delete")
}
// func TestAlertManagerCacheMovesAlertToNewSystemOnUpdate(t *testing.T) {
// hub, user := beszelTests.GetHubWithUser(t)
// defer hub.Cleanup()
// systems, err := beszelTests.CreateSystems(hub, 2, user.Id, "up")
// require.NoError(t, err)
// system1 := systems[0]
// system2 := systems[1]
// alert, err := beszelTests.CreateRecord(hub, "alerts", map[string]any{
// "name": "CPU",
// "system": system1.Id,
// "user": user.Id,
// "value": 80,
// "min": 1,
// })
// require.NoError(t, err)
// am := hub.AlertManager
// cache := am.GetSystemAlertsCache()
// // Initially in system1 cache
// assert.Len(t, cache.Get(system1.Id), 1)
// assert.Empty(t, cache.Get(system2.Id))
// // Move alert to system2
// alert.Set("system", system2.Id)
// require.NoError(t, hub.Save(alert))
// // DEBUG: print if it is found
// // fmt.Printf("system1 alerts after update: %v\n", cache.Get(system1.Id))
// // Should be removed from system1 and present in system2
// assert.Empty(t, cache.GetType(system1.Id, "CPU"), "updated alerts should be evicted from the previous system cache")
// require.Len(t, cache.Get(system2.Id), 1)
// assert.Equal(t, alert.Id, cache.Get(system2.Id)[0].Id)
// }

View File

@@ -0,0 +1,155 @@
//go:build testing
package alerts_test
import (
"encoding/json"
"testing"
"time"
"github.com/henrygd/beszel/internal/entities/system"
beszelTests "github.com/henrygd/beszel/internal/tests"
"github.com/pocketbase/dbx"
"github.com/pocketbase/pocketbase/tools/types"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
// TestDiskAlertExtraFsMultiMinute tests that multi-minute disk alerts correctly use
// historical per-minute values for extra (non-root) filesystems, not the current live snapshot.
func TestDiskAlertExtraFsMultiMinute(t *testing.T) {
hub, user := beszelTests.GetHubWithUser(t)
defer hub.Cleanup()
systems, err := beszelTests.CreateSystems(hub, 1, user.Id, "up")
require.NoError(t, err)
systemRecord := systems[0]
// Disk alert: threshold 80%, min=2 (requires historical averaging)
diskAlert, err := beszelTests.CreateRecord(hub, "alerts", map[string]any{
"name": "Disk",
"system": systemRecord.Id,
"user": user.Id,
"value": 80, // threshold: 80%
"min": 2, // 2 minutes - requires historical averaging
})
require.NoError(t, err)
assert.False(t, diskAlert.GetBool("triggered"), "Alert should not be triggered initially")
am := hub.GetAlertManager()
now := time.Now().UTC()
extraFsHigh := map[string]*system.FsStats{
"/mnt/data": {DiskTotal: 1000, DiskUsed: 920}, // 92% - above threshold
}
// Insert 4 historical records spread over 3 minutes (same pattern as battery tests).
// The oldest record must predate (now - 2min) so the alert time window is valid.
recordTimes := []time.Duration{
-180 * time.Second, // 3 min ago - anchors oldest record before alert.time
-90 * time.Second,
-60 * time.Second,
-30 * time.Second,
}
for _, offset := range recordTimes {
stats := system.Stats{
DiskPct: 30, // root disk at 30% - below threshold
ExtraFs: extraFsHigh,
}
statsJSON, _ := json.Marshal(stats)
recordTime := now.Add(offset)
record, err := beszelTests.CreateRecord(hub, "system_stats", map[string]any{
"system": systemRecord.Id,
"type": "1m",
"stats": string(statsJSON),
})
require.NoError(t, err)
record.SetRaw("created", recordTime.Format(types.DefaultDateLayout))
err = hub.SaveNoValidate(record)
require.NoError(t, err)
}
combinedDataHigh := &system.CombinedData{
Stats: system.Stats{
DiskPct: 30,
ExtraFs: extraFsHigh,
},
Info: system.Info{
DiskPct: 30,
},
}
systemRecord.Set("updated", now)
err = hub.SaveNoValidate(systemRecord)
require.NoError(t, err)
err = am.HandleSystemAlerts(systemRecord, combinedDataHigh)
require.NoError(t, err)
time.Sleep(20 * time.Millisecond)
diskAlert, err = hub.FindFirstRecordByFilter("alerts", "id={:id}", dbx.Params{"id": diskAlert.Id})
require.NoError(t, err)
assert.True(t, diskAlert.GetBool("triggered"),
"Alert SHOULD be triggered when extra disk average (92%%) exceeds threshold (80%%)")
// --- Resolution: extra disk drops to 50%, alert should resolve ---
extraFsLow := map[string]*system.FsStats{
"/mnt/data": {DiskTotal: 1000, DiskUsed: 500}, // 50% - below threshold
}
newNow := now.Add(2 * time.Minute)
recordTimesLow := []time.Duration{
-180 * time.Second,
-90 * time.Second,
-60 * time.Second,
-30 * time.Second,
}
for _, offset := range recordTimesLow {
stats := system.Stats{
DiskPct: 30,
ExtraFs: extraFsLow,
}
statsJSON, _ := json.Marshal(stats)
recordTime := newNow.Add(offset)
record, err := beszelTests.CreateRecord(hub, "system_stats", map[string]any{
"system": systemRecord.Id,
"type": "1m",
"stats": string(statsJSON),
})
require.NoError(t, err)
record.SetRaw("created", recordTime.Format(types.DefaultDateLayout))
err = hub.SaveNoValidate(record)
require.NoError(t, err)
}
combinedDataLow := &system.CombinedData{
Stats: system.Stats{
DiskPct: 30,
ExtraFs: extraFsLow,
},
Info: system.Info{
DiskPct: 30,
},
}
systemRecord.Set("updated", newNow)
err = hub.SaveNoValidate(systemRecord)
require.NoError(t, err)
err = am.HandleSystemAlerts(systemRecord, combinedDataLow)
require.NoError(t, err)
time.Sleep(20 * time.Millisecond)
diskAlert, err = hub.FindFirstRecordByFilter("alerts", "id={:id}", dbx.Params{"id": diskAlert.Id})
require.NoError(t, err)
assert.False(t, diskAlert.GetBool("triggered"),
"Alert should be resolved when extra disk average (50%%) drops below threshold (80%%)")
}

View File

@@ -49,7 +49,7 @@ func TestAlertSilencedOneTime(t *testing.T) {
// Get alert manager // Get alert manager
am := alerts.NewAlertManager(hub) am := alerts.NewAlertManager(hub)
defer am.StopWorker() defer am.Stop()
// Test that alert is silenced // Test that alert is silenced
silenced := am.IsNotificationSilenced(user.Id, system.Id) silenced := am.IsNotificationSilenced(user.Id, system.Id)
@@ -106,7 +106,7 @@ func TestAlertSilencedDaily(t *testing.T) {
// Get alert manager // Get alert manager
am := alerts.NewAlertManager(hub) am := alerts.NewAlertManager(hub)
defer am.StopWorker() defer am.Stop()
// Get current hour and create a window that includes current time // Get current hour and create a window that includes current time
now := time.Now().UTC() now := time.Now().UTC()
@@ -170,7 +170,7 @@ func TestAlertSilencedDailyMidnightCrossing(t *testing.T) {
// Get alert manager // Get alert manager
am := alerts.NewAlertManager(hub) am := alerts.NewAlertManager(hub)
defer am.StopWorker() defer am.Stop()
// Create a window that crosses midnight: 22:00 - 02:00 // Create a window that crosses midnight: 22:00 - 02:00
startTime := time.Date(2000, 1, 1, 22, 0, 0, 0, time.UTC) startTime := time.Date(2000, 1, 1, 22, 0, 0, 0, time.UTC)
@@ -211,7 +211,7 @@ func TestAlertSilencedGlobal(t *testing.T) {
// Get alert manager // Get alert manager
am := alerts.NewAlertManager(hub) am := alerts.NewAlertManager(hub)
defer am.StopWorker() defer am.Stop()
// Create a global quiet hours window (no system specified) // Create a global quiet hours window (no system specified)
now := time.Now().UTC() now := time.Now().UTC()
@@ -250,7 +250,7 @@ func TestAlertSilencedSystemSpecific(t *testing.T) {
// Get alert manager // Get alert manager
am := alerts.NewAlertManager(hub) am := alerts.NewAlertManager(hub)
defer am.StopWorker() defer am.Stop()
// Create a system-specific quiet hours window for system1 only // Create a system-specific quiet hours window for system1 only
now := time.Now().UTC() now := time.Now().UTC()
@@ -296,7 +296,7 @@ func TestAlertSilencedMultiUser(t *testing.T) {
// Get alert manager // Get alert manager
am := alerts.NewAlertManager(hub) am := alerts.NewAlertManager(hub)
defer am.StopWorker() defer am.Stop()
// Create a quiet hours window for user1 only // Create a quiet hours window for user1 only
now := time.Now().UTC() now := time.Now().UTC()
@@ -417,7 +417,7 @@ func TestAlertSilencedNoWindows(t *testing.T) {
// Get alert manager // Get alert manager
am := alerts.NewAlertManager(hub) am := alerts.NewAlertManager(hub)
defer am.StopWorker() defer am.Stop()
// Without any quiet hours windows, alert should NOT be silenced // Without any quiet hours windows, alert should NOT be silenced
silenced := am.IsNotificationSilenced(user.Id, system.Id) silenced := am.IsNotificationSilenced(user.Id, system.Id)

View File

@@ -5,67 +5,28 @@ import (
"strings" "strings"
"time" "time"
"github.com/pocketbase/dbx"
"github.com/pocketbase/pocketbase/core" "github.com/pocketbase/pocketbase/core"
) )
type alertTask struct {
action string // "schedule" or "cancel"
systemName string
alertRecord *core.Record
delay time.Duration
}
type alertInfo struct { type alertInfo struct {
systemName string systemName string
alertRecord *core.Record alertData CachedAlertData
expireTime time.Time expireTime time.Time
timer *time.Timer
} }
// startWorker is a long-running goroutine that processes alert tasks // Stop cancels all pending status alert timers.
// every x seconds. It must be running to process status alerts. func (am *AlertManager) Stop() {
func (am *AlertManager) startWorker() { am.stopOnce.Do(func() {
processPendingAlerts := time.Tick(15 * time.Second) am.pendingAlerts.Range(func(key, value any) bool {
info := value.(*alertInfo)
// check for status alerts that are not resolved when system comes up if info.timer != nil {
// (can be removed if we figure out core bug in #1052) info.timer.Stop()
checkStatusAlerts := time.Tick(561 * time.Second)
for {
select {
case <-am.stopChan:
return
case task := <-am.alertQueue:
switch task.action {
case "schedule":
am.pendingAlerts.Store(task.alertRecord.Id, &alertInfo{
systemName: task.systemName,
alertRecord: task.alertRecord,
expireTime: time.Now().Add(task.delay),
})
case "cancel":
am.pendingAlerts.Delete(task.alertRecord.Id)
} }
case <-checkStatusAlerts: am.pendingAlerts.Delete(key)
resolveStatusAlerts(am.hub) return true
case <-processPendingAlerts: })
// Check for expired alerts every tick })
now := time.Now()
for key, value := range am.pendingAlerts.Range {
info := value.(*alertInfo)
if now.After(info.expireTime) {
// Downtime delay has passed, process alert
am.sendStatusAlert("down", info.systemName, info.alertRecord)
am.pendingAlerts.Delete(key)
}
}
}
}
}
// StopWorker shuts down the AlertManager.worker goroutine
func (am *AlertManager) StopWorker() {
close(am.stopChan)
} }
// HandleStatusAlerts manages the logic when system status changes. // HandleStatusAlerts manages the logic when system status changes.
@@ -74,82 +35,104 @@ func (am *AlertManager) HandleStatusAlerts(newStatus string, systemRecord *core.
return nil return nil
} }
alertRecords, err := am.getSystemStatusAlerts(systemRecord.Id) alerts := am.alertsCache.GetAlertsByName(systemRecord.Id, "Status")
if err != nil { if len(alerts) == 0 {
return err
}
if len(alertRecords) == 0 {
return nil return nil
} }
systemName := systemRecord.GetString("name") systemName := systemRecord.GetString("name")
if newStatus == "down" { if newStatus == "down" {
am.handleSystemDown(systemName, alertRecords) am.handleSystemDown(systemName, alerts)
} else { } else {
am.handleSystemUp(systemName, alertRecords) am.handleSystemUp(systemName, alerts)
} }
return nil return nil
} }
// getSystemStatusAlerts retrieves all "Status" alert records for a given system ID. // handleSystemDown manages the logic when a system status changes to "down". It schedules pending alerts for each alert record.
func (am *AlertManager) getSystemStatusAlerts(systemID string) ([]*core.Record, error) { func (am *AlertManager) handleSystemDown(systemName string, alerts []CachedAlertData) {
alertRecords, err := am.hub.FindAllRecords("alerts", dbx.HashExp{ for _, alertData := range alerts {
"system": systemID, min := max(1, int(alertData.Min))
"name": "Status", am.schedulePendingStatusAlert(systemName, alertData, time.Duration(min)*time.Minute)
})
if err != nil {
return nil, err
} }
return alertRecords, nil
} }
// Schedules delayed "down" alerts for each alert record. // schedulePendingStatusAlert sets up a timer to send a "down" alert after the specified delay if the system is still down.
func (am *AlertManager) handleSystemDown(systemName string, alertRecords []*core.Record) { // It returns true if the alert was scheduled, or false if an alert was already pending for the given alert record.
for _, alertRecord := range alertRecords { func (am *AlertManager) schedulePendingStatusAlert(systemName string, alertData CachedAlertData, delay time.Duration) bool {
// Continue if alert is already scheduled alert := &alertInfo{
if _, exists := am.pendingAlerts.Load(alertRecord.Id); exists { systemName: systemName,
continue alertData: alertData,
} expireTime: time.Now().Add(delay),
// Schedule by adding to queue
min := max(1, alertRecord.GetInt("min"))
am.alertQueue <- alertTask{
action: "schedule",
systemName: systemName,
alertRecord: alertRecord,
delay: time.Duration(min) * time.Minute,
}
} }
storedAlert, loaded := am.pendingAlerts.LoadOrStore(alertData.Id, alert)
if loaded {
return false
}
stored := storedAlert.(*alertInfo)
stored.timer = time.AfterFunc(time.Until(stored.expireTime), func() {
am.processPendingAlert(alertData.Id)
})
return true
} }
// handleSystemUp manages the logic when a system status changes to "up". // handleSystemUp manages the logic when a system status changes to "up".
// It cancels any pending alerts and sends "up" alerts. // It cancels any pending alerts and sends "up" alerts.
func (am *AlertManager) handleSystemUp(systemName string, alertRecords []*core.Record) { func (am *AlertManager) handleSystemUp(systemName string, alerts []CachedAlertData) {
for _, alertRecord := range alertRecords { for _, alertData := range alerts {
alertRecordID := alertRecord.Id
// If alert exists for record, delete and continue (down alert not sent) // If alert exists for record, delete and continue (down alert not sent)
if _, exists := am.pendingAlerts.Load(alertRecordID); exists { if am.cancelPendingAlert(alertData.Id) {
am.alertQueue <- alertTask{
action: "cancel",
alertRecord: alertRecord,
}
continue continue
} }
// No alert scheduled for this record, send "up" alert if !alertData.Triggered {
if err := am.sendStatusAlert("up", systemName, alertRecord); err != nil { continue
}
if err := am.sendStatusAlert("up", systemName, alertData); err != nil {
am.hub.Logger().Error("Failed to send alert", "err", err) am.hub.Logger().Error("Failed to send alert", "err", err)
} }
} }
} }
// sendStatusAlert sends a status alert ("up" or "down") to the users associated with the alert records. // cancelPendingAlert stops the timer and removes the pending alert for the given alert ID. Returns true if a pending alert was found and cancelled.
func (am *AlertManager) sendStatusAlert(alertStatus string, systemName string, alertRecord *core.Record) error { func (am *AlertManager) cancelPendingAlert(alertID string) bool {
switch alertStatus { value, loaded := am.pendingAlerts.LoadAndDelete(alertID)
case "up": if !loaded {
alertRecord.Set("triggered", false) return false
case "down": }
alertRecord.Set("triggered", true)
info := value.(*alertInfo)
if info.timer != nil {
info.timer.Stop()
}
return true
}
// processPendingAlert sends a "down" alert if the pending alert has expired and the system is still down.
func (am *AlertManager) processPendingAlert(alertID string) {
value, loaded := am.pendingAlerts.LoadAndDelete(alertID)
if !loaded {
return
}
info := value.(*alertInfo)
refreshedAlertData, ok := am.alertsCache.Refresh(info.alertData)
if !ok || refreshedAlertData.Triggered {
return
}
if err := am.sendStatusAlert("down", info.systemName, refreshedAlertData); err != nil {
am.hub.Logger().Error("Failed to send alert", "err", err)
}
}
// sendStatusAlert sends a status alert ("up" or "down") to the users associated with the alert records.
func (am *AlertManager) sendStatusAlert(alertStatus string, systemName string, alertData CachedAlertData) error {
// Update trigger state for alert record before sending alert
triggered := alertStatus == "down"
if err := am.setAlertTriggered(alertData, triggered); err != nil {
return err
} }
am.hub.Save(alertRecord)
var emoji string var emoji string
if alertStatus == "up" { if alertStatus == "up" {
@@ -162,10 +145,10 @@ func (am *AlertManager) sendStatusAlert(alertStatus string, systemName string, a
message := strings.TrimSuffix(title, emoji) message := strings.TrimSuffix(title, emoji)
// Get system ID for the link // Get system ID for the link
systemID := alertRecord.GetString("system") systemID := alertData.SystemID
return am.SendAlert(AlertMessageData{ return am.SendAlert(AlertMessageData{
UserID: alertRecord.GetString("user"), UserID: alertData.UserID,
SystemID: systemID, SystemID: systemID,
Title: title, Title: title,
Message: message, Message: message,
@@ -174,8 +157,8 @@ func (am *AlertManager) sendStatusAlert(alertStatus string, systemName string, a
}) })
} }
// resolveStatusAlerts resolves any status alerts that weren't resolved // resolveStatusAlerts resolves any triggered status alerts that weren't resolved
// when system came up (https://github.com/henrygd/beszel/issues/1052) // when system came up (https://github.com/henrygd/beszel/issues/1052).
func resolveStatusAlerts(app core.App) error { func resolveStatusAlerts(app core.App) error {
db := app.DB() db := app.DB()
// Find all active status alerts where the system is actually up // Find all active status alerts where the system is actually up
@@ -205,3 +188,40 @@ func resolveStatusAlerts(app core.App) error {
} }
return nil return nil
} }
// restorePendingStatusAlerts re-queues untriggered status alerts for systems that
// are still down after a hub restart. This rebuilds the lost in-memory timer state.
func (am *AlertManager) restorePendingStatusAlerts() error {
type pendingStatusAlert struct {
AlertID string `db:"alert_id"`
SystemID string `db:"system_id"`
SystemName string `db:"system_name"`
}
var pending []pendingStatusAlert
err := am.hub.DB().NewQuery(`
SELECT a.id AS alert_id, a.system AS system_id, s.name AS system_name
FROM alerts a
JOIN systems s ON a.system = s.id
WHERE a.name = 'Status'
AND a.triggered = false
AND s.status = 'down'
`).All(&pending)
if err != nil {
return err
}
// Make sure cache is populated before trying to restore pending alerts
_ = am.alertsCache.PopulateFromDB(false)
for _, item := range pending {
alertData, ok := am.alertsCache.GetAlert(item.SystemID, item.AlertID)
if !ok {
continue
}
min := max(1, int(alertData.Min))
am.schedulePendingStatusAlert(item.SystemName, alertData, time.Duration(min)*time.Minute)
}
return nil
}

View File

@@ -0,0 +1,755 @@
//go:build testing
package alerts_test
import (
"testing"
"testing/synctest"
"time"
"github.com/henrygd/beszel/internal/alerts"
beszelTests "github.com/henrygd/beszel/internal/tests"
"github.com/pocketbase/dbx"
"github.com/pocketbase/pocketbase/core"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestStatusAlerts(t *testing.T) {
synctest.Test(t, func(t *testing.T) {
hub, user := beszelTests.GetHubWithUser(t)
defer hub.Cleanup()
systems, err := beszelTests.CreateSystems(hub, 4, user.Id, "paused")
assert.NoError(t, err)
var alerts []*core.Record
for i, system := range systems {
alert, err := beszelTests.CreateRecord(hub, "alerts", map[string]any{
"name": "Status",
"system": system.Id,
"user": user.Id,
"min": i + 1,
})
assert.NoError(t, err)
alerts = append(alerts, alert)
}
time.Sleep(10 * time.Millisecond)
for _, alert := range alerts {
assert.False(t, alert.GetBool("triggered"), "Alert should not be triggered immediately")
}
if hub.TestMailer.TotalSend() != 0 {
assert.Zero(t, hub.TestMailer.TotalSend(), "Expected 0 messages, got %d", hub.TestMailer.TotalSend())
}
for _, system := range systems {
assert.EqualValues(t, "paused", system.GetString("status"), "System should be paused")
}
for _, system := range systems {
system.Set("status", "up")
err = hub.SaveNoValidate(system)
assert.NoError(t, err)
}
time.Sleep(time.Second)
assert.EqualValues(t, 0, hub.GetPendingAlertsCount(), "should have 0 alerts in the pendingAlerts map")
for _, system := range systems {
system.Set("status", "down")
err = hub.SaveNoValidate(system)
assert.NoError(t, err)
}
// after 30 seconds, should have 4 alerts in the pendingAlerts map, no triggered alerts
time.Sleep(time.Second * 30)
assert.EqualValues(t, 4, hub.GetPendingAlertsCount(), "should have 4 alerts in the pendingAlerts map")
triggeredCount, err := hub.CountRecords("alerts", dbx.HashExp{"triggered": true})
assert.NoError(t, err)
assert.EqualValues(t, 0, triggeredCount, "should have 0 alert triggered")
assert.EqualValues(t, 0, hub.TestMailer.TotalSend(), "should have 0 messages sent")
// after 1:30 seconds, should have 1 triggered alert and 3 pending alerts
time.Sleep(time.Second * 60)
assert.EqualValues(t, 3, hub.GetPendingAlertsCount(), "should have 3 alerts in the pendingAlerts map")
triggeredCount, err = hub.CountRecords("alerts", dbx.HashExp{"triggered": true})
assert.NoError(t, err)
assert.EqualValues(t, 1, triggeredCount, "should have 1 alert triggered")
assert.EqualValues(t, 1, hub.TestMailer.TotalSend(), "should have 1 messages sent")
// after 2:30 seconds, should have 2 triggered alerts and 2 pending alerts
time.Sleep(time.Second * 60)
assert.EqualValues(t, 2, hub.GetPendingAlertsCount(), "should have 2 alerts in the pendingAlerts map")
triggeredCount, err = hub.CountRecords("alerts", dbx.HashExp{"triggered": true})
assert.NoError(t, err)
assert.EqualValues(t, 2, triggeredCount, "should have 2 alert triggered")
assert.EqualValues(t, 2, hub.TestMailer.TotalSend(), "should have 2 messages sent")
// now we will bring the remaning systems back up
for _, system := range systems {
system.Set("status", "up")
err = hub.SaveNoValidate(system)
assert.NoError(t, err)
}
time.Sleep(time.Second)
// should have 0 alerts in the pendingAlerts map and 0 alerts triggered
assert.EqualValues(t, 0, hub.GetPendingAlertsCount(), "should have 0 alerts in the pendingAlerts map")
triggeredCount, err = hub.CountRecords("alerts", dbx.HashExp{"triggered": true})
assert.NoError(t, err)
assert.Zero(t, triggeredCount, "should have 0 alert triggered")
// 4 messages sent, 2 down alerts and 2 up alerts for first 2 systems
assert.EqualValues(t, 4, hub.TestMailer.TotalSend(), "should have 4 messages sent")
})
}
func TestStatusAlertRecoveryBeforeDeadline(t *testing.T) {
hub, user := beszelTests.GetHubWithUser(t)
defer hub.Cleanup()
// Ensure user settings have an email
userSettings, _ := hub.FindFirstRecordByFilter("user_settings", "user={:user}", map[string]any{"user": user.Id})
userSettings.Set("settings", `{"emails":["test@example.com"],"webhooks":[]}`)
hub.Save(userSettings)
// Initial email count
initialEmailCount := hub.TestMailer.TotalSend()
systemCollection, _ := hub.FindCollectionByNameOrId("systems")
system := core.NewRecord(systemCollection)
system.Set("name", "test-system")
system.Set("status", "up")
system.Set("host", "127.0.0.1")
system.Set("users", []string{user.Id})
hub.Save(system)
alertCollection, _ := hub.FindCollectionByNameOrId("alerts")
alert := core.NewRecord(alertCollection)
alert.Set("user", user.Id)
alert.Set("system", system.Id)
alert.Set("name", "Status")
alert.Set("triggered", false)
alert.Set("min", 1)
hub.Save(alert)
am := hub.AlertManager
// 1. System goes down
am.HandleStatusAlerts("down", system)
assert.Equal(t, 1, am.GetPendingAlertsCount(), "Alert should be scheduled")
// 2. System goes up BEFORE delay expires
// Triggering HandleStatusAlerts("up") SHOULD NOT send an alert.
am.HandleStatusAlerts("up", system)
assert.Equal(t, 0, am.GetPendingAlertsCount(), "Alert should be canceled if system recovers before delay expires")
// Verify that NO email was sent.
assert.Equal(t, initialEmailCount, hub.TestMailer.TotalSend(), "Recovery notification should not be sent if system never went down")
}
func TestStatusAlertNormalRecovery(t *testing.T) {
hub, user := beszelTests.GetHubWithUser(t)
defer hub.Cleanup()
// Ensure user settings have an email
userSettings, _ := hub.FindFirstRecordByFilter("user_settings", "user={:user}", map[string]any{"user": user.Id})
userSettings.Set("settings", `{"emails":["test@example.com"],"webhooks":[]}`)
hub.Save(userSettings)
systemCollection, _ := hub.FindCollectionByNameOrId("systems")
system := core.NewRecord(systemCollection)
system.Set("name", "test-system")
system.Set("status", "up")
system.Set("host", "127.0.0.1")
system.Set("users", []string{user.Id})
hub.Save(system)
alertCollection, _ := hub.FindCollectionByNameOrId("alerts")
alert := core.NewRecord(alertCollection)
alert.Set("user", user.Id)
alert.Set("system", system.Id)
alert.Set("name", "Status")
alert.Set("triggered", true) // System was confirmed DOWN
hub.Save(alert)
am := hub.AlertManager
initialEmailCount := hub.TestMailer.TotalSend()
// System goes up
am.HandleStatusAlerts("up", system)
// Verify that an email WAS sent (normal recovery).
assert.Equal(t, initialEmailCount+1, hub.TestMailer.TotalSend(), "Recovery notification should be sent if system was triggered as down")
}
func TestHandleStatusAlertsDoesNotSendRecoveryWhileDownIsOnlyPending(t *testing.T) {
hub, user := beszelTests.GetHubWithUser(t)
defer hub.Cleanup()
userSettings, err := hub.FindFirstRecordByFilter("user_settings", "user={:user}", map[string]any{"user": user.Id})
require.NoError(t, err)
userSettings.Set("settings", `{"emails":["test@example.com"],"webhooks":[]}`)
require.NoError(t, hub.Save(userSettings))
systemCollection, err := hub.FindCollectionByNameOrId("systems")
require.NoError(t, err)
system := core.NewRecord(systemCollection)
system.Set("name", "test-system")
system.Set("status", "up")
system.Set("host", "127.0.0.1")
system.Set("users", []string{user.Id})
require.NoError(t, hub.Save(system))
alertCollection, err := hub.FindCollectionByNameOrId("alerts")
require.NoError(t, err)
alert := core.NewRecord(alertCollection)
alert.Set("user", user.Id)
alert.Set("system", system.Id)
alert.Set("name", "Status")
alert.Set("triggered", false)
alert.Set("min", 1)
require.NoError(t, hub.Save(alert))
initialEmailCount := hub.TestMailer.TotalSend()
am := alerts.NewTestAlertManagerWithoutWorker(hub)
require.NoError(t, am.HandleStatusAlerts("down", system))
assert.Equal(t, 1, am.GetPendingAlertsCount(), "down transition should register a pending alert immediately")
require.NoError(t, am.HandleStatusAlerts("up", system))
assert.Zero(t, am.GetPendingAlertsCount(), "recovery should cancel the pending down alert")
assert.Equal(t, initialEmailCount, hub.TestMailer.TotalSend(), "recovery notification should not be sent before a down alert triggers")
alertRecord, err := hub.FindRecordById("alerts", alert.Id)
require.NoError(t, err)
assert.False(t, alertRecord.GetBool("triggered"), "alert should remain untriggered when downtime never matured")
}
func TestStatusAlertTimerCancellationPreventsBoundaryDelivery(t *testing.T) {
synctest.Test(t, func(t *testing.T) {
hub, user := beszelTests.GetHubWithUser(t)
defer hub.Cleanup()
userSettings, err := hub.FindFirstRecordByFilter("user_settings", "user={:user}", map[string]any{"user": user.Id})
require.NoError(t, err)
userSettings.Set("settings", `{"emails":["test@example.com"],"webhooks":[]}`)
require.NoError(t, hub.Save(userSettings))
systemCollection, err := hub.FindCollectionByNameOrId("systems")
require.NoError(t, err)
system := core.NewRecord(systemCollection)
system.Set("name", "test-system")
system.Set("status", "up")
system.Set("host", "127.0.0.1")
system.Set("users", []string{user.Id})
require.NoError(t, hub.Save(system))
alertCollection, err := hub.FindCollectionByNameOrId("alerts")
require.NoError(t, err)
alert := core.NewRecord(alertCollection)
alert.Set("user", user.Id)
alert.Set("system", system.Id)
alert.Set("name", "Status")
alert.Set("triggered", false)
alert.Set("min", 1)
require.NoError(t, hub.Save(alert))
initialEmailCount := hub.TestMailer.TotalSend()
am := alerts.NewTestAlertManagerWithoutWorker(hub)
require.NoError(t, am.HandleStatusAlerts("down", system))
assert.Equal(t, 1, am.GetPendingAlertsCount(), "down transition should register a pending alert immediately")
require.True(t, am.ResetPendingAlertTimer(alert.Id, 25*time.Millisecond), "test should shorten the pending alert timer")
time.Sleep(10 * time.Millisecond)
require.NoError(t, am.HandleStatusAlerts("up", system))
assert.Zero(t, am.GetPendingAlertsCount(), "recovery should remove the pending alert before the timer callback runs")
time.Sleep(40 * time.Millisecond)
assert.Equal(t, initialEmailCount, hub.TestMailer.TotalSend(), "timer callback should not deliver after recovery cancels the pending alert")
alertRecord, err := hub.FindRecordById("alerts", alert.Id)
require.NoError(t, err)
assert.False(t, alertRecord.GetBool("triggered"), "alert should remain untriggered when cancellation wins the timer race")
time.Sleep(time.Minute)
synctest.Wait()
})
}
func TestStatusAlertDownFiresAfterDelayExpires(t *testing.T) {
hub, user := beszelTests.GetHubWithUser(t)
defer hub.Cleanup()
userSettings, err := hub.FindFirstRecordByFilter("user_settings", "user={:user}", map[string]any{"user": user.Id})
require.NoError(t, err)
userSettings.Set("settings", `{"emails":["test@example.com"],"webhooks":[]}`)
require.NoError(t, hub.Save(userSettings))
systemCollection, err := hub.FindCollectionByNameOrId("systems")
require.NoError(t, err)
system := core.NewRecord(systemCollection)
system.Set("name", "test-system")
system.Set("status", "up")
system.Set("host", "127.0.0.1")
system.Set("users", []string{user.Id})
require.NoError(t, hub.Save(system))
alertCollection, err := hub.FindCollectionByNameOrId("alerts")
require.NoError(t, err)
alert := core.NewRecord(alertCollection)
alert.Set("user", user.Id)
alert.Set("system", system.Id)
alert.Set("name", "Status")
alert.Set("triggered", false)
alert.Set("min", 1)
require.NoError(t, hub.Save(alert))
initialEmailCount := hub.TestMailer.TotalSend()
am := alerts.NewTestAlertManagerWithoutWorker(hub)
require.NoError(t, am.HandleStatusAlerts("down", system))
assert.Equal(t, 1, am.GetPendingAlertsCount(), "alert should be pending after system goes down")
// Expire the pending alert and process it
am.ForceExpirePendingAlerts()
processed, err := am.ProcessPendingAlerts()
require.NoError(t, err)
assert.Len(t, processed, 1, "one alert should have been processed")
assert.Equal(t, 0, am.GetPendingAlertsCount(), "pending alert should be consumed after processing")
// Verify down email was sent
assert.Equal(t, initialEmailCount+1, hub.TestMailer.TotalSend(), "down notification should be sent after delay expires")
// Verify triggered flag is set in the DB
alertRecord, err := hub.FindRecordById("alerts", alert.Id)
require.NoError(t, err)
assert.True(t, alertRecord.GetBool("triggered"), "alert should be marked triggered after downtime matures")
}
func TestStatusAlertDuplicateDownCallIsIdempotent(t *testing.T) {
hub, user := beszelTests.GetHubWithUser(t)
defer hub.Cleanup()
userSettings, err := hub.FindFirstRecordByFilter("user_settings", "user={:user}", map[string]any{"user": user.Id})
require.NoError(t, err)
userSettings.Set("settings", `{"emails":["test@example.com"],"webhooks":[]}`)
require.NoError(t, hub.Save(userSettings))
systemCollection, err := hub.FindCollectionByNameOrId("systems")
require.NoError(t, err)
system := core.NewRecord(systemCollection)
system.Set("name", "test-system")
system.Set("status", "up")
system.Set("host", "127.0.0.1")
system.Set("users", []string{user.Id})
require.NoError(t, hub.Save(system))
alertCollection, err := hub.FindCollectionByNameOrId("alerts")
require.NoError(t, err)
alert := core.NewRecord(alertCollection)
alert.Set("user", user.Id)
alert.Set("system", system.Id)
alert.Set("name", "Status")
alert.Set("triggered", false)
alert.Set("min", 5)
require.NoError(t, hub.Save(alert))
am := alerts.NewTestAlertManagerWithoutWorker(hub)
require.NoError(t, am.HandleStatusAlerts("down", system))
require.NoError(t, am.HandleStatusAlerts("down", system))
require.NoError(t, am.HandleStatusAlerts("down", system))
assert.Equal(t, 1, am.GetPendingAlertsCount(), "repeated down calls should not schedule duplicate pending alerts")
}
func TestStatusAlertNoAlertRecord(t *testing.T) {
hub, user := beszelTests.GetHubWithUser(t)
defer hub.Cleanup()
systemCollection, err := hub.FindCollectionByNameOrId("systems")
require.NoError(t, err)
system := core.NewRecord(systemCollection)
system.Set("name", "test-system")
system.Set("status", "up")
system.Set("host", "127.0.0.1")
system.Set("users", []string{user.Id})
require.NoError(t, hub.Save(system))
// No Status alert record created for this system
initialEmailCount := hub.TestMailer.TotalSend()
am := alerts.NewTestAlertManagerWithoutWorker(hub)
require.NoError(t, am.HandleStatusAlerts("down", system))
assert.Equal(t, 0, am.GetPendingAlertsCount(), "no pending alert when no alert record exists")
require.NoError(t, am.HandleStatusAlerts("up", system))
assert.Equal(t, initialEmailCount, hub.TestMailer.TotalSend(), "no email when no alert record exists")
}
func TestRestorePendingStatusAlertsRequeuesDownSystemsAfterRestart(t *testing.T) {
hub, user := beszelTests.GetHubWithUser(t)
defer hub.Cleanup()
userSettings, err := hub.FindFirstRecordByFilter("user_settings", "user={:user}", map[string]any{"user": user.Id})
require.NoError(t, err)
userSettings.Set("settings", `{"emails":["test@example.com"],"webhooks":[]}`)
require.NoError(t, hub.Save(userSettings))
systems, err := beszelTests.CreateSystems(hub, 1, user.Id, "down")
require.NoError(t, err)
system := systems[0]
alertCollection, err := hub.FindCollectionByNameOrId("alerts")
require.NoError(t, err)
alert := core.NewRecord(alertCollection)
alert.Set("user", user.Id)
alert.Set("system", system.Id)
alert.Set("name", "Status")
alert.Set("triggered", false)
alert.Set("min", 1)
require.NoError(t, hub.Save(alert))
initialEmailCount := hub.TestMailer.TotalSend()
am := alerts.NewTestAlertManagerWithoutWorker(hub)
require.NoError(t, am.RestorePendingStatusAlerts())
assert.Equal(t, 1, am.GetPendingAlertsCount(), "startup restore should requeue a pending down alert for a system still marked down")
am.ForceExpirePendingAlerts()
processed, err := am.ProcessPendingAlerts()
require.NoError(t, err)
assert.Len(t, processed, 1, "restored pending alert should be processable after the delay expires")
assert.Equal(t, initialEmailCount+1, hub.TestMailer.TotalSend(), "restored pending alert should send the down notification")
alertRecord, err := hub.FindRecordById("alerts", alert.Id)
require.NoError(t, err)
assert.True(t, alertRecord.GetBool("triggered"), "restored pending alert should mark the alert as triggered once delivered")
}
func TestRestorePendingStatusAlertsSkipsNonDownOrAlreadyTriggeredAlerts(t *testing.T) {
hub, user := beszelTests.GetHubWithUser(t)
defer hub.Cleanup()
systemsDown, err := beszelTests.CreateSystems(hub, 2, user.Id, "down")
require.NoError(t, err)
systemDownPending := systemsDown[0]
systemDownTriggered := systemsDown[1]
systemUp, err := beszelTests.CreateRecord(hub, "systems", map[string]any{
"name": "up-system",
"users": []string{user.Id},
"host": "127.0.0.2",
"status": "up",
})
require.NoError(t, err)
_, err = beszelTests.CreateRecord(hub, "alerts", map[string]any{
"name": "Status",
"system": systemDownPending.Id,
"user": user.Id,
"min": 1,
"triggered": false,
})
require.NoError(t, err)
_, err = beszelTests.CreateRecord(hub, "alerts", map[string]any{
"name": "Status",
"system": systemUp.Id,
"user": user.Id,
"min": 1,
"triggered": false,
})
require.NoError(t, err)
_, err = beszelTests.CreateRecord(hub, "alerts", map[string]any{
"name": "Status",
"system": systemDownTriggered.Id,
"user": user.Id,
"min": 1,
"triggered": true,
})
require.NoError(t, err)
am := alerts.NewTestAlertManagerWithoutWorker(hub)
require.NoError(t, am.RestorePendingStatusAlerts())
assert.Equal(t, 1, am.GetPendingAlertsCount(), "only untriggered alerts for currently down systems should be restored")
}
func TestRestorePendingStatusAlertsIsIdempotent(t *testing.T) {
hub, user := beszelTests.GetHubWithUser(t)
defer hub.Cleanup()
systems, err := beszelTests.CreateSystems(hub, 1, user.Id, "down")
require.NoError(t, err)
system := systems[0]
_, err = beszelTests.CreateRecord(hub, "alerts", map[string]any{
"name": "Status",
"system": system.Id,
"user": user.Id,
"min": 1,
"triggered": false,
})
require.NoError(t, err)
am := alerts.NewTestAlertManagerWithoutWorker(hub)
require.NoError(t, am.RestorePendingStatusAlerts())
require.NoError(t, am.RestorePendingStatusAlerts())
assert.Equal(t, 1, am.GetPendingAlertsCount(), "restoring twice should not create duplicate pending alerts")
am.ForceExpirePendingAlerts()
processed, err := am.ProcessPendingAlerts()
require.NoError(t, err)
assert.Len(t, processed, 1, "restored alert should still be processable exactly once")
assert.Zero(t, am.GetPendingAlertsCount(), "processing the restored alert should empty the pending map")
}
func TestResolveStatusAlertsFixesStaleTriggered(t *testing.T) {
hub, user := beszelTests.GetHubWithUser(t)
defer hub.Cleanup()
// CreateSystems uses SaveNoValidate after initial save to bypass the
// onRecordCreate hook that forces status = "pending".
systems, err := beszelTests.CreateSystems(hub, 1, user.Id, "up")
require.NoError(t, err)
system := systems[0]
alertCollection, err := hub.FindCollectionByNameOrId("alerts")
require.NoError(t, err)
alert := core.NewRecord(alertCollection)
alert.Set("user", user.Id)
alert.Set("system", system.Id)
alert.Set("name", "Status")
alert.Set("triggered", true) // Stale: system is up but alert still says triggered
require.NoError(t, hub.Save(alert))
// resolveStatusAlerts should clear the stale triggered flag
require.NoError(t, alerts.ResolveStatusAlerts(hub))
alertRecord, err := hub.FindRecordById("alerts", alert.Id)
require.NoError(t, err)
assert.False(t, alertRecord.GetBool("triggered"), "stale triggered flag should be cleared when system is up")
}
func TestResolveStatusAlerts(t *testing.T) {
hub, user := beszelTests.GetHubWithUser(t)
defer hub.Cleanup()
// Create a systemUp
systemUp, err := beszelTests.CreateRecord(hub, "systems", map[string]any{
"name": "test-system",
"users": []string{user.Id},
"host": "127.0.0.1",
"status": "up",
})
assert.NoError(t, err)
systemDown, err := beszelTests.CreateRecord(hub, "systems", map[string]any{
"name": "test-system-2",
"users": []string{user.Id},
"host": "127.0.0.2",
"status": "up",
})
assert.NoError(t, err)
// Create a status alertUp for the system
alertUp, err := beszelTests.CreateRecord(hub, "alerts", map[string]any{
"name": "Status",
"system": systemUp.Id,
"user": user.Id,
"min": 1,
})
assert.NoError(t, err)
alertDown, err := beszelTests.CreateRecord(hub, "alerts", map[string]any{
"name": "Status",
"system": systemDown.Id,
"user": user.Id,
"min": 1,
})
assert.NoError(t, err)
// Verify alert is not triggered initially
assert.False(t, alertUp.GetBool("triggered"), "Alert should not be triggered initially")
// Set the system to 'up' (this should not trigger the alert)
systemUp.Set("status", "up")
err = hub.SaveNoValidate(systemUp)
assert.NoError(t, err)
systemDown.Set("status", "down")
err = hub.SaveNoValidate(systemDown)
assert.NoError(t, err)
// Wait a moment for any processing
time.Sleep(10 * time.Millisecond)
// Verify alertUp is still not triggered after setting system to up
alertUp, err = hub.FindFirstRecordByFilter("alerts", "id={:id}", dbx.Params{"id": alertUp.Id})
assert.NoError(t, err)
assert.False(t, alertUp.GetBool("triggered"), "Alert should not be triggered when system is up")
// Manually set both alerts triggered to true
alertUp.Set("triggered", true)
err = hub.SaveNoValidate(alertUp)
assert.NoError(t, err)
alertDown.Set("triggered", true)
err = hub.SaveNoValidate(alertDown)
assert.NoError(t, err)
// Verify we have exactly one alert with triggered true
triggeredCount, err := hub.CountRecords("alerts", dbx.HashExp{"triggered": true})
assert.NoError(t, err)
assert.EqualValues(t, 2, triggeredCount, "Should have exactly two alerts with triggered true")
// Verify the specific alertUp is triggered
alertUp, err = hub.FindFirstRecordByFilter("alerts", "id={:id}", dbx.Params{"id": alertUp.Id})
assert.NoError(t, err)
assert.True(t, alertUp.GetBool("triggered"), "Alert should be triggered")
// Verify we have two unresolved alert history records
alertHistoryCount, err := hub.CountRecords("alerts_history", dbx.HashExp{"resolved": ""})
assert.NoError(t, err)
assert.EqualValues(t, 2, alertHistoryCount, "Should have exactly two unresolved alert history records")
err = alerts.ResolveStatusAlerts(hub)
assert.NoError(t, err)
// Verify alertUp is not triggered after resolving
alertUp, err = hub.FindFirstRecordByFilter("alerts", "id={:id}", dbx.Params{"id": alertUp.Id})
assert.NoError(t, err)
assert.False(t, alertUp.GetBool("triggered"), "Alert should not be triggered after resolving")
// Verify alertDown is still triggered
alertDown, err = hub.FindFirstRecordByFilter("alerts", "id={:id}", dbx.Params{"id": alertDown.Id})
assert.NoError(t, err)
assert.True(t, alertDown.GetBool("triggered"), "Alert should still be triggered after resolving")
// Verify we have one unresolved alert history record
alertHistoryCount, err = hub.CountRecords("alerts_history", dbx.HashExp{"resolved": ""})
assert.NoError(t, err)
assert.EqualValues(t, 1, alertHistoryCount, "Should have exactly one unresolved alert history record")
}
func TestAlertsHistoryStatus(t *testing.T) {
synctest.Test(t, func(t *testing.T) {
hub, user := beszelTests.GetHubWithUser(t)
defer hub.Cleanup()
// Create a system
systems, err := beszelTests.CreateSystems(hub, 1, user.Id, "up")
assert.NoError(t, err)
system := systems[0]
// Create a status alertRecord for the system
alertRecord, err := beszelTests.CreateRecord(hub, "alerts", map[string]any{
"name": "Status",
"system": system.Id,
"user": user.Id,
"min": 1,
})
assert.NoError(t, err)
// Verify alert is not triggered initially
assert.False(t, alertRecord.GetBool("triggered"), "Alert should not be triggered initially")
// Set the system to 'down' (this should trigger the alert)
system.Set("status", "down")
err = hub.Save(system)
assert.NoError(t, err)
time.Sleep(time.Second * 30)
synctest.Wait()
alertFresh, _ := hub.FindRecordById("alerts", alertRecord.Id)
assert.False(t, alertFresh.GetBool("triggered"), "Alert should not be triggered after 30 seconds")
time.Sleep(time.Minute)
synctest.Wait()
// Verify alert is triggered after setting system to down
alertFresh, err = hub.FindRecordById("alerts", alertRecord.Id)
assert.NoError(t, err)
assert.True(t, alertFresh.GetBool("triggered"), "Alert should be triggered after one minute")
// Verify we have one unresolved alert history record
alertHistoryCount, err := hub.CountRecords("alerts_history", dbx.HashExp{"resolved": ""})
assert.NoError(t, err)
assert.EqualValues(t, 1, alertHistoryCount, "Should have exactly one unresolved alert history record")
// Set the system back to 'up' (this should resolve the alert)
system.Set("status", "up")
err = hub.Save(system)
assert.NoError(t, err)
time.Sleep(time.Second)
synctest.Wait()
// Verify alert is not triggered after setting system back to up
alertFresh, err = hub.FindRecordById("alerts", alertRecord.Id)
assert.NoError(t, err)
assert.False(t, alertFresh.GetBool("triggered"), "Alert should not be triggered after system recovers")
// Verify the alert history record is resolved
alertHistoryCount, err = hub.CountRecords("alerts_history", dbx.HashExp{"resolved": ""})
assert.NoError(t, err)
assert.EqualValues(t, 0, alertHistoryCount, "Should have no unresolved alert history records")
})
}
func TestStatusAlertClearedBeforeSend(t *testing.T) {
synctest.Test(t, func(t *testing.T) {
hub, user := beszelTests.GetHubWithUser(t)
defer hub.Cleanup()
// Create a system
systems, err := beszelTests.CreateSystems(hub, 1, user.Id, "up")
assert.NoError(t, err)
system := systems[0]
// Ensure user settings have an email
userSettings, _ := hub.FindFirstRecordByFilter("user_settings", "user={:user}", map[string]any{"user": user.Id})
userSettings.Set("settings", `{"emails":["test@example.com"],"webhooks":[]}`)
hub.Save(userSettings)
// Initial email count
initialEmailCount := hub.TestMailer.TotalSend()
// Create a status alertRecord for the system
alertRecord, err := beszelTests.CreateRecord(hub, "alerts", map[string]any{
"name": "Status",
"system": system.Id,
"user": user.Id,
"min": 1,
})
assert.NoError(t, err)
// Verify alert is not triggered initially
assert.False(t, alertRecord.GetBool("triggered"), "Alert should not be triggered initially")
// Set the system to 'down' (this should trigger the alert)
system.Set("status", "down")
err = hub.Save(system)
assert.NoError(t, err)
time.Sleep(time.Second * 30)
synctest.Wait()
// Set system back up to clear the pending alert before it triggers
system.Set("status", "up")
err = hub.Save(system)
assert.NoError(t, err)
time.Sleep(time.Minute)
synctest.Wait()
// Verify that we have not sent any emails since the system recovered before the alert triggered
assert.Equal(t, initialEmailCount, hub.TestMailer.TotalSend(), "No email should be sent if system recovers before alert triggers")
// Verify alert is not triggered after setting system back to up
alertFresh, err := hub.FindRecordById("alerts", alertRecord.Id)
assert.NoError(t, err)
assert.False(t, alertFresh.GetBool("triggered"), "Alert should not be triggered after system recovers")
// Verify that no alert history record was created since the alert never triggered
alertHistoryCount, err := hub.CountRecords("alerts_history")
assert.NoError(t, err)
assert.EqualValues(t, 0, alertHistoryCount, "Should have no unresolved alert history records since alert never triggered")
})
}

View File

@@ -11,15 +11,11 @@ import (
"github.com/pocketbase/dbx" "github.com/pocketbase/dbx"
"github.com/pocketbase/pocketbase/core" "github.com/pocketbase/pocketbase/core"
"github.com/pocketbase/pocketbase/tools/types" "github.com/pocketbase/pocketbase/tools/types"
"github.com/spf13/cast"
) )
func (am *AlertManager) HandleSystemAlerts(systemRecord *core.Record, data *system.CombinedData) error { func (am *AlertManager) HandleSystemAlerts(systemRecord *core.Record, data *system.CombinedData) error {
alertRecords, err := am.hub.FindAllRecords("alerts", alerts := am.alertsCache.GetAlertsExcludingNames(systemRecord.Id, "Status")
dbx.NewExp("system={:system} AND name!='Status'", dbx.Params{"system": systemRecord.Id}), if len(alerts) == 0 {
)
if err != nil || len(alertRecords) == 0 {
// log.Println("no alerts found for system")
return nil return nil
} }
@@ -27,8 +23,8 @@ func (am *AlertManager) HandleSystemAlerts(systemRecord *core.Record, data *syst
now := systemRecord.GetDateTime("updated").Time().UTC() now := systemRecord.GetDateTime("updated").Time().UTC()
oldestTime := now oldestTime := now
for _, alertRecord := range alertRecords { for _, alertData := range alerts {
name := alertRecord.GetString("name") name := alertData.Name
var val float64 var val float64
unit := "%" unit := "%"
@@ -73,8 +69,8 @@ func (am *AlertManager) HandleSystemAlerts(systemRecord *core.Record, data *syst
val = float64(data.Stats.Battery[0]) val = float64(data.Stats.Battery[0])
} }
triggered := alertRecord.GetBool("triggered") triggered := alertData.Triggered
threshold := alertRecord.GetFloat("value") threshold := alertData.Value
// Battery alert has inverted logic: trigger when value is BELOW threshold // Battery alert has inverted logic: trigger when value is BELOW threshold
lowAlert := isLowAlert(name) lowAlert := isLowAlert(name)
@@ -92,11 +88,11 @@ func (am *AlertManager) HandleSystemAlerts(systemRecord *core.Record, data *syst
} }
} }
min := max(1, cast.ToUint8(alertRecord.Get("min"))) min := max(1, alertData.Min)
alert := SystemAlertData{ alert := SystemAlertData{
systemRecord: systemRecord, systemRecord: systemRecord,
alertRecord: alertRecord, alertData: alertData,
name: name, name: name,
unit: unit, unit: unit,
val: val, val: val,
@@ -129,7 +125,7 @@ func (am *AlertManager) HandleSystemAlerts(systemRecord *core.Record, data *syst
Created types.DateTime `db:"created"` Created types.DateTime `db:"created"`
}{} }{}
err = am.hub.DB(). err := am.hub.DB().
Select("stats", "created"). Select("stats", "created").
From("system_stats"). From("system_stats").
Where(dbx.NewExp( Where(dbx.NewExp(
@@ -192,22 +188,24 @@ func (am *AlertManager) HandleSystemAlerts(systemRecord *core.Record, data *syst
case "Memory": case "Memory":
alert.val += stats.Mem alert.val += stats.Mem
case "Bandwidth": case "Bandwidth":
alert.val += stats.NetSent + stats.NetRecv alert.val += float64(stats.Bandwidth[0]+stats.Bandwidth[1]) / (1024 * 1024)
case "Disk": case "Disk":
if alert.mapSums == nil { if alert.mapSums == nil {
alert.mapSums = make(map[string]float32, len(data.Stats.ExtraFs)+1) alert.mapSums = make(map[string]float32, len(stats.ExtraFs)+1)
} }
// add root disk // add root disk
if _, ok := alert.mapSums["root"]; !ok { if _, ok := alert.mapSums["root"]; !ok {
alert.mapSums["root"] = 0.0 alert.mapSums["root"] = 0.0
} }
alert.mapSums["root"] += float32(stats.Disk) alert.mapSums["root"] += float32(stats.Disk)
// add extra disks // add extra disks from historical record
for key, fs := range data.Stats.ExtraFs { for key, fs := range stats.ExtraFs {
if _, ok := alert.mapSums[key]; !ok { if fs.DiskTotal > 0 {
alert.mapSums[key] = 0.0 if _, ok := alert.mapSums[key]; !ok {
alert.mapSums[key] = 0.0
}
alert.mapSums[key] += float32(fs.DiskUsed / fs.DiskTotal * 100)
} }
alert.mapSums[key] += float32(fs.DiskUsed / fs.DiskTotal * 100)
} }
case "Temperature": case "Temperature":
if alert.mapSums == nil { if alert.mapSums == nil {
@@ -342,13 +340,12 @@ func (am *AlertManager) sendSystemAlert(alert SystemAlertData) {
} }
body := fmt.Sprintf("%s averaged %.2f%s for the previous %v %s.", alert.descriptor, alert.val, alert.unit, alert.min, minutesLabel) body := fmt.Sprintf("%s averaged %.2f%s for the previous %v %s.", alert.descriptor, alert.val, alert.unit, alert.min, minutesLabel)
alert.alertRecord.Set("triggered", alert.triggered) if err := am.setAlertTriggered(alert.alertData, alert.triggered); err != nil {
if err := am.hub.Save(alert.alertRecord); err != nil {
// app.Logger().Error("failed to save alert record", "err", err) // app.Logger().Error("failed to save alert record", "err", err)
return return
} }
am.SendAlert(AlertMessageData{ am.SendAlert(AlertMessageData{
UserID: alert.alertRecord.GetString("user"), UserID: alert.alertData.UserID,
SystemID: alert.systemRecord.Id, SystemID: alert.systemRecord.Id,
Title: subject, Title: subject,
Message: body, Message: body,

View File

@@ -0,0 +1,218 @@
//go:build testing
package alerts_test
import (
"testing"
"testing/synctest"
"time"
"github.com/henrygd/beszel/internal/entities/system"
beszelTests "github.com/henrygd/beszel/internal/tests"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
type systemAlertValueSetter[T any] func(info *system.Info, stats *system.Stats, value T)
type systemAlertTestFixture struct {
hub *beszelTests.TestHub
alertID string
submit func(*system.CombinedData) error
}
func createCombinedData[T any](value T, setValue systemAlertValueSetter[T]) *system.CombinedData {
var data system.CombinedData
setValue(&data.Info, &data.Stats, value)
return &data
}
func newSystemAlertTestFixture(t *testing.T, alertName string, min int, threshold float64) *systemAlertTestFixture {
t.Helper()
hub, user := beszelTests.GetHubWithUser(t)
systems, err := beszelTests.CreateSystems(hub, 1, user.Id, "up")
require.NoError(t, err)
systemRecord := systems[0]
sysManagerSystem, err := hub.GetSystemManager().GetSystemFromStore(systemRecord.Id)
require.NoError(t, err)
require.NotNil(t, sysManagerSystem)
sysManagerSystem.StopUpdater()
userSettings, err := hub.FindFirstRecordByFilter("user_settings", "user={:user}", map[string]any{"user": user.Id})
require.NoError(t, err)
userSettings.Set("settings", `{"emails":["test@example.com"],"webhooks":[]}`)
require.NoError(t, hub.Save(userSettings))
alertRecord, err := beszelTests.CreateRecord(hub, "alerts", map[string]any{
"name": alertName,
"system": systemRecord.Id,
"user": user.Id,
"min": min,
"value": threshold,
})
require.NoError(t, err)
assert.False(t, alertRecord.GetBool("triggered"), "Alert should not be triggered initially")
alertsCache := hub.GetAlertManager().GetSystemAlertsCache()
cachedAlerts := alertsCache.GetAlertsExcludingNames(systemRecord.Id, "Status")
assert.Len(t, cachedAlerts, 1, "Alert should be in cache")
return &systemAlertTestFixture{
hub: hub,
alertID: alertRecord.Id,
submit: func(data *system.CombinedData) error {
_, err := sysManagerSystem.CreateRecords(data)
return err
},
}
}
func (fixture *systemAlertTestFixture) cleanup() {
fixture.hub.Cleanup()
}
func submitValue[T any](fixture *systemAlertTestFixture, t *testing.T, value T, setValue systemAlertValueSetter[T]) {
t.Helper()
require.NoError(t, fixture.submit(createCombinedData(value, setValue)))
}
func (fixture *systemAlertTestFixture) assertTriggered(t *testing.T, triggered bool, message string) {
t.Helper()
alertRecord, err := fixture.hub.FindRecordById("alerts", fixture.alertID)
require.NoError(t, err)
assert.Equal(t, triggered, alertRecord.GetBool("triggered"), message)
}
func waitForSystemAlert(d time.Duration) {
time.Sleep(d)
synctest.Wait()
}
func testOneMinuteSystemAlert[T any](t *testing.T, alertName string, threshold float64, setValue systemAlertValueSetter[T], triggerValue, resolveValue T) {
t.Helper()
synctest.Test(t, func(t *testing.T) {
fixture := newSystemAlertTestFixture(t, alertName, 1, threshold)
defer fixture.cleanup()
submitValue(fixture, t, triggerValue, setValue)
waitForSystemAlert(time.Second)
fixture.assertTriggered(t, true, "Alert should be triggered")
assert.Equal(t, 1, fixture.hub.TestMailer.TotalSend(), "An email should have been sent")
submitValue(fixture, t, resolveValue, setValue)
waitForSystemAlert(time.Second)
fixture.assertTriggered(t, false, "Alert should be untriggered")
assert.Equal(t, 2, fixture.hub.TestMailer.TotalSend(), "A second email should have been sent for untriggering the alert")
waitForSystemAlert(time.Minute)
})
}
func testMultiMinuteSystemAlert[T any](t *testing.T, alertName string, threshold float64, min int, setValue systemAlertValueSetter[T], baselineValue, triggerValue, resolveValue T) {
t.Helper()
synctest.Test(t, func(t *testing.T) {
fixture := newSystemAlertTestFixture(t, alertName, min, threshold)
defer fixture.cleanup()
submitValue(fixture, t, baselineValue, setValue)
waitForSystemAlert(time.Minute + time.Second)
fixture.assertTriggered(t, false, "Alert should not be triggered yet")
submitValue(fixture, t, triggerValue, setValue)
waitForSystemAlert(time.Minute)
fixture.assertTriggered(t, false, "Alert should not be triggered until the history window is full")
submitValue(fixture, t, triggerValue, setValue)
waitForSystemAlert(time.Second)
fixture.assertTriggered(t, true, "Alert should be triggered")
assert.Equal(t, 1, fixture.hub.TestMailer.TotalSend(), "An email should have been sent")
submitValue(fixture, t, resolveValue, setValue)
waitForSystemAlert(time.Second)
fixture.assertTriggered(t, false, "Alert should be untriggered")
assert.Equal(t, 2, fixture.hub.TestMailer.TotalSend(), "A second email should have been sent for untriggering the alert")
})
}
func setCPUAlertValue(info *system.Info, stats *system.Stats, value float64) {
info.Cpu = value
stats.Cpu = value
}
func setMemoryAlertValue(info *system.Info, stats *system.Stats, value float64) {
info.MemPct = value
stats.MemPct = value
}
func setDiskAlertValue(info *system.Info, stats *system.Stats, value float64) {
info.DiskPct = value
stats.DiskPct = value
}
func setBandwidthAlertValue(info *system.Info, stats *system.Stats, value [2]uint64) {
info.BandwidthBytes = value[0] + value[1]
stats.Bandwidth = value
}
func megabytesToBytes(mb uint64) uint64 {
return mb * 1024 * 1024
}
func setGPUAlertValue(info *system.Info, stats *system.Stats, value float64) {
info.GpuPct = value
stats.GPUData = map[string]system.GPUData{
"GPU0": {Usage: value},
}
}
func setTemperatureAlertValue(info *system.Info, stats *system.Stats, value float64) {
info.DashboardTemp = value
stats.Temperatures = map[string]float64{
"Temp0": value,
}
}
func setLoadAvgAlertValue(info *system.Info, stats *system.Stats, value [3]float64) {
info.LoadAvg = value
stats.LoadAvg = value
}
func setBatteryAlertValue(info *system.Info, stats *system.Stats, value [2]uint8) {
info.Battery = value
stats.Battery = value
}
func TestSystemAlertsOneMin(t *testing.T) {
testOneMinuteSystemAlert(t, "CPU", 50, setCPUAlertValue, 51, 49)
testOneMinuteSystemAlert(t, "Memory", 50, setMemoryAlertValue, 51, 49)
testOneMinuteSystemAlert(t, "Disk", 50, setDiskAlertValue, 51, 49)
testOneMinuteSystemAlert(t, "Bandwidth", 50, setBandwidthAlertValue, [2]uint64{megabytesToBytes(26), megabytesToBytes(25)}, [2]uint64{megabytesToBytes(25), megabytesToBytes(24)})
testOneMinuteSystemAlert(t, "GPU", 50, setGPUAlertValue, 51, 49)
testOneMinuteSystemAlert(t, "Temperature", 70, setTemperatureAlertValue, 71, 69)
testOneMinuteSystemAlert(t, "LoadAvg1", 4, setLoadAvgAlertValue, [3]float64{4.1, 0, 0}, [3]float64{3.9, 0, 0})
testOneMinuteSystemAlert(t, "LoadAvg5", 4, setLoadAvgAlertValue, [3]float64{0, 4.1, 0}, [3]float64{0, 3.9, 0})
testOneMinuteSystemAlert(t, "LoadAvg15", 4, setLoadAvgAlertValue, [3]float64{0, 0, 4.1}, [3]float64{0, 0, 3.9})
testOneMinuteSystemAlert(t, "Battery", 20, setBatteryAlertValue, [2]uint8{19, 0}, [2]uint8{21, 0})
}
func TestSystemAlertsTwoMin(t *testing.T) {
testMultiMinuteSystemAlert(t, "CPU", 50, 2, setCPUAlertValue, 10, 51, 48)
testMultiMinuteSystemAlert(t, "Memory", 50, 2, setMemoryAlertValue, 10, 51, 48)
testMultiMinuteSystemAlert(t, "Disk", 50, 2, setDiskAlertValue, 10, 51, 48)
testMultiMinuteSystemAlert(t, "Bandwidth", 50, 2, setBandwidthAlertValue, [2]uint64{megabytesToBytes(10), megabytesToBytes(10)}, [2]uint64{megabytesToBytes(26), megabytesToBytes(25)}, [2]uint64{megabytesToBytes(10), megabytesToBytes(10)})
testMultiMinuteSystemAlert(t, "GPU", 50, 2, setGPUAlertValue, 10, 51, 48)
testMultiMinuteSystemAlert(t, "Temperature", 70, 2, setTemperatureAlertValue, 10, 71, 67)
testMultiMinuteSystemAlert(t, "LoadAvg1", 4, 2, setLoadAvgAlertValue, [3]float64{0, 0, 0}, [3]float64{4.1, 0, 0}, [3]float64{3.5, 0, 0})
testMultiMinuteSystemAlert(t, "LoadAvg5", 4, 2, setLoadAvgAlertValue, [3]float64{0, 2, 0}, [3]float64{0, 4.1, 0}, [3]float64{0, 3.5, 0})
testMultiMinuteSystemAlert(t, "LoadAvg15", 4, 2, setLoadAvgAlertValue, [3]float64{0, 0, 2}, [3]float64{0, 0, 4.1}, [3]float64{0, 0, 3.5})
testMultiMinuteSystemAlert(t, "Battery", 20, 2, setBatteryAlertValue, [2]uint8{21, 0}, [2]uint8{19, 0}, [2]uint8{25, 1})
}

View File

@@ -12,9 +12,9 @@ import (
"testing/synctest" "testing/synctest"
"time" "time"
"github.com/henrygd/beszel/internal/alerts"
beszelTests "github.com/henrygd/beszel/internal/tests" beszelTests "github.com/henrygd/beszel/internal/tests"
"github.com/henrygd/beszel/internal/alerts"
"github.com/pocketbase/dbx" "github.com/pocketbase/dbx"
"github.com/pocketbase/pocketbase/core" "github.com/pocketbase/pocketbase/core"
pbTests "github.com/pocketbase/pocketbase/tests" pbTests "github.com/pocketbase/pocketbase/tests"
@@ -369,87 +369,6 @@ func TestUserAlertsApi(t *testing.T) {
} }
} }
func TestStatusAlerts(t *testing.T) {
synctest.Test(t, func(t *testing.T) {
hub, user := beszelTests.GetHubWithUser(t)
defer hub.Cleanup()
systems, err := beszelTests.CreateSystems(hub, 4, user.Id, "paused")
assert.NoError(t, err)
var alerts []*core.Record
for i, system := range systems {
alert, err := beszelTests.CreateRecord(hub, "alerts", map[string]any{
"name": "Status",
"system": system.Id,
"user": user.Id,
"min": i + 1,
})
assert.NoError(t, err)
alerts = append(alerts, alert)
}
time.Sleep(10 * time.Millisecond)
for _, alert := range alerts {
assert.False(t, alert.GetBool("triggered"), "Alert should not be triggered immediately")
}
if hub.TestMailer.TotalSend() != 0 {
assert.Zero(t, hub.TestMailer.TotalSend(), "Expected 0 messages, got %d", hub.TestMailer.TotalSend())
}
for _, system := range systems {
assert.EqualValues(t, "paused", system.GetString("status"), "System should be paused")
}
for _, system := range systems {
system.Set("status", "up")
err = hub.SaveNoValidate(system)
assert.NoError(t, err)
}
time.Sleep(time.Second)
assert.EqualValues(t, 0, hub.GetPendingAlertsCount(), "should have 0 alerts in the pendingAlerts map")
for _, system := range systems {
system.Set("status", "down")
err = hub.SaveNoValidate(system)
assert.NoError(t, err)
}
// after 30 seconds, should have 4 alerts in the pendingAlerts map, no triggered alerts
time.Sleep(time.Second * 30)
assert.EqualValues(t, 4, hub.GetPendingAlertsCount(), "should have 4 alerts in the pendingAlerts map")
triggeredCount, err := hub.CountRecords("alerts", dbx.HashExp{"triggered": true})
assert.NoError(t, err)
assert.EqualValues(t, 0, triggeredCount, "should have 0 alert triggered")
assert.EqualValues(t, 0, hub.TestMailer.TotalSend(), "should have 0 messages sent")
// after 1:30 seconds, should have 1 triggered alert and 3 pending alerts
time.Sleep(time.Second * 60)
assert.EqualValues(t, 3, hub.GetPendingAlertsCount(), "should have 3 alerts in the pendingAlerts map")
triggeredCount, err = hub.CountRecords("alerts", dbx.HashExp{"triggered": true})
assert.NoError(t, err)
assert.EqualValues(t, 1, triggeredCount, "should have 1 alert triggered")
assert.EqualValues(t, 1, hub.TestMailer.TotalSend(), "should have 1 messages sent")
// after 2:30 seconds, should have 2 triggered alerts and 2 pending alerts
time.Sleep(time.Second * 60)
assert.EqualValues(t, 2, hub.GetPendingAlertsCount(), "should have 2 alerts in the pendingAlerts map")
triggeredCount, err = hub.CountRecords("alerts", dbx.HashExp{"triggered": true})
assert.NoError(t, err)
assert.EqualValues(t, 2, triggeredCount, "should have 2 alert triggered")
assert.EqualValues(t, 2, hub.TestMailer.TotalSend(), "should have 2 messages sent")
// now we will bring the remaning systems back up
for _, system := range systems {
system.Set("status", "up")
err = hub.SaveNoValidate(system)
assert.NoError(t, err)
}
time.Sleep(time.Second)
// should have 0 alerts in the pendingAlerts map and 0 alerts triggered
assert.EqualValues(t, 0, hub.GetPendingAlertsCount(), "should have 0 alerts in the pendingAlerts map")
triggeredCount, err = hub.CountRecords("alerts", dbx.HashExp{"triggered": true})
assert.NoError(t, err)
assert.Zero(t, triggeredCount, "should have 0 alert triggered")
// 4 messages sent, 2 down alerts and 2 up alerts for first 2 systems
assert.EqualValues(t, 4, hub.TestMailer.TotalSend(), "should have 4 messages sent")
})
}
func TestAlertsHistory(t *testing.T) { func TestAlertsHistory(t *testing.T) {
synctest.Test(t, func(t *testing.T) { synctest.Test(t, func(t *testing.T) {
hub, user := beszelTests.GetHubWithUser(t) hub, user := beszelTests.GetHubWithUser(t)
@@ -578,102 +497,46 @@ func TestAlertsHistory(t *testing.T) {
assert.EqualValues(t, 2, totalHistoryCount, "Should have 2 total alert history records") assert.EqualValues(t, 2, totalHistoryCount, "Should have 2 total alert history records")
}) })
} }
func TestResolveStatusAlerts(t *testing.T) {
hub, user := beszelTests.GetHubWithUser(t) func TestSetAlertTriggered(t *testing.T) {
hub, _ := beszelTests.NewTestHub(t.TempDir())
defer hub.Cleanup() defer hub.Cleanup()
// Create a systemUp hub.StartHub()
systemUp, err := beszelTests.CreateRecord(hub, "systems", map[string]any{
"name": "test-system", user, _ := beszelTests.CreateUser(hub, "test@example.com", "password")
"users": []string{user.Id}, system, _ := beszelTests.CreateRecord(hub, "systems", map[string]any{
"host": "127.0.0.1", "name": "test-system",
"status": "up", "users": []string{user.Id},
"host": "127.0.0.1",
}) })
assert.NoError(t, err)
systemDown, err := beszelTests.CreateRecord(hub, "systems", map[string]any{ alertRecord, _ := beszelTests.CreateRecord(hub, "alerts", map[string]any{
"name": "test-system-2", "name": "CPU",
"users": []string{user.Id}, "system": system.Id,
"host": "127.0.0.2", "user": user.Id,
"status": "up", "value": 80,
"triggered": false,
}) })
am := alerts.NewAlertManager(hub)
var alert alerts.CachedAlertData
alert.PopulateFromRecord(alertRecord)
// Test triggering the alert
err := am.SetAlertTriggered(alert, true)
assert.NoError(t, err) assert.NoError(t, err)
// Create a status alertUp for the system updatedRecord, err := hub.FindRecordById("alerts", alert.Id)
alertUp, err := beszelTests.CreateRecord(hub, "alerts", map[string]any{ assert.NoError(t, err)
"name": "Status", assert.True(t, updatedRecord.GetBool("triggered"))
"system": systemUp.Id,
"user": user.Id, // Test un-triggering the alert
"min": 1, err = am.SetAlertTriggered(alert, false)
})
assert.NoError(t, err) assert.NoError(t, err)
alertDown, err := beszelTests.CreateRecord(hub, "alerts", map[string]any{ updatedRecord, err = hub.FindRecordById("alerts", alert.Id)
"name": "Status",
"system": systemDown.Id,
"user": user.Id,
"min": 1,
})
assert.NoError(t, err) assert.NoError(t, err)
assert.False(t, updatedRecord.GetBool("triggered"))
// Verify alert is not triggered initially
assert.False(t, alertUp.GetBool("triggered"), "Alert should not be triggered initially")
// Set the system to 'up' (this should not trigger the alert)
systemUp.Set("status", "up")
err = hub.SaveNoValidate(systemUp)
assert.NoError(t, err)
systemDown.Set("status", "down")
err = hub.SaveNoValidate(systemDown)
assert.NoError(t, err)
// Wait a moment for any processing
time.Sleep(10 * time.Millisecond)
// Verify alertUp is still not triggered after setting system to up
alertUp, err = hub.FindFirstRecordByFilter("alerts", "id={:id}", dbx.Params{"id": alertUp.Id})
assert.NoError(t, err)
assert.False(t, alertUp.GetBool("triggered"), "Alert should not be triggered when system is up")
// Manually set both alerts triggered to true
alertUp.Set("triggered", true)
err = hub.SaveNoValidate(alertUp)
assert.NoError(t, err)
alertDown.Set("triggered", true)
err = hub.SaveNoValidate(alertDown)
assert.NoError(t, err)
// Verify we have exactly one alert with triggered true
triggeredCount, err := hub.CountRecords("alerts", dbx.HashExp{"triggered": true})
assert.NoError(t, err)
assert.EqualValues(t, 2, triggeredCount, "Should have exactly two alerts with triggered true")
// Verify the specific alertUp is triggered
alertUp, err = hub.FindFirstRecordByFilter("alerts", "id={:id}", dbx.Params{"id": alertUp.Id})
assert.NoError(t, err)
assert.True(t, alertUp.GetBool("triggered"), "Alert should be triggered")
// Verify we have two unresolved alert history records
alertHistoryCount, err := hub.CountRecords("alerts_history", dbx.HashExp{"resolved": ""})
assert.NoError(t, err)
assert.EqualValues(t, 2, alertHistoryCount, "Should have exactly two unresolved alert history records")
err = alerts.ResolveStatusAlerts(hub)
assert.NoError(t, err)
// Verify alertUp is not triggered after resolving
alertUp, err = hub.FindFirstRecordByFilter("alerts", "id={:id}", dbx.Params{"id": alertUp.Id})
assert.NoError(t, err)
assert.False(t, alertUp.GetBool("triggered"), "Alert should not be triggered after resolving")
// Verify alertDown is still triggered
alertDown, err = hub.FindFirstRecordByFilter("alerts", "id={:id}", dbx.Params{"id": alertDown.Id})
assert.NoError(t, err)
assert.True(t, alertDown.GetBool("triggered"), "Alert should still be triggered after resolving")
// Verify we have one unresolved alert history record
alertHistoryCount, err = hub.CountRecords("alerts_history", dbx.HashExp{"resolved": ""})
assert.NoError(t, err)
assert.EqualValues(t, 1, alertHistoryCount, "Should have exactly one unresolved alert history record")
} }

View File

@@ -9,6 +9,18 @@ import (
"github.com/pocketbase/pocketbase/core" "github.com/pocketbase/pocketbase/core"
) )
func NewTestAlertManagerWithoutWorker(app hubLike) *AlertManager {
return &AlertManager{
hub: app,
alertsCache: NewAlertsCache(app),
}
}
// GetSystemAlertsCache returns the internal system alerts cache.
func (am *AlertManager) GetSystemAlertsCache() *AlertsCache {
return am.alertsCache
}
func (am *AlertManager) GetAlertManager() *AlertManager { func (am *AlertManager) GetAlertManager() *AlertManager {
return am return am
} }
@@ -27,19 +39,18 @@ func (am *AlertManager) GetPendingAlertsCount() int {
} }
// ProcessPendingAlerts manually processes all expired alerts (for testing) // ProcessPendingAlerts manually processes all expired alerts (for testing)
func (am *AlertManager) ProcessPendingAlerts() ([]*core.Record, error) { func (am *AlertManager) ProcessPendingAlerts() ([]CachedAlertData, error) {
now := time.Now() now := time.Now()
var lastErr error var lastErr error
var processedAlerts []*core.Record var processedAlerts []CachedAlertData
am.pendingAlerts.Range(func(key, value any) bool { am.pendingAlerts.Range(func(key, value any) bool {
info := value.(*alertInfo) info := value.(*alertInfo)
if now.After(info.expireTime) { if now.After(info.expireTime) {
// Downtime delay has passed, process alert if info.timer != nil {
if err := am.sendStatusAlert("down", info.systemName, info.alertRecord); err != nil { info.timer.Stop()
lastErr = err
} }
processedAlerts = append(processedAlerts, info.alertRecord) am.processPendingAlert(key.(string))
am.pendingAlerts.Delete(key) processedAlerts = append(processedAlerts, info.alertData)
} }
return true return true
}) })
@@ -56,6 +67,31 @@ func (am *AlertManager) ForceExpirePendingAlerts() {
}) })
} }
func (am *AlertManager) ResetPendingAlertTimer(alertID string, delay time.Duration) bool {
value, loaded := am.pendingAlerts.Load(alertID)
if !loaded {
return false
}
info := value.(*alertInfo)
if info.timer != nil {
info.timer.Stop()
}
info.expireTime = time.Now().Add(delay)
info.timer = time.AfterFunc(delay, func() {
am.processPendingAlert(alertID)
})
return true
}
func ResolveStatusAlerts(app core.App) error { func ResolveStatusAlerts(app core.App) error {
return resolveStatusAlerts(app) return resolveStatusAlerts(app)
} }
func (am *AlertManager) RestorePendingStatusAlerts() error {
return am.restorePendingStatusAlerts()
}
func (am *AlertManager) SetAlertTriggered(alert CachedAlertData, triggered bool) error {
return am.setAlertTriggered(alert, triggered)
}

View File

@@ -9,6 +9,7 @@ import (
"github.com/henrygd/beszel" "github.com/henrygd/beszel"
"github.com/henrygd/beszel/agent" "github.com/henrygd/beszel/agent"
"github.com/henrygd/beszel/agent/health" "github.com/henrygd/beszel/agent/health"
"github.com/henrygd/beszel/agent/utils"
"github.com/spf13/pflag" "github.com/spf13/pflag"
"golang.org/x/crypto/ssh" "golang.org/x/crypto/ssh"
) )
@@ -116,12 +117,12 @@ func (opts *cmdOptions) loadPublicKeys() ([]ssh.PublicKey, error) {
} }
// Try environment variable // Try environment variable
if key, ok := agent.GetEnv("KEY"); ok && key != "" { if key, ok := utils.GetEnv("KEY"); ok && key != "" {
return agent.ParseKeys(key) return agent.ParseKeys(key)
} }
// Try key file // Try key file
keyFile, ok := agent.GetEnv("KEY_FILE") keyFile, ok := utils.GetEnv("KEY_FILE")
if !ok { if !ok {
return nil, fmt.Errorf("no key provided: must set -key flag, KEY env var, or KEY_FILE env var. Use 'beszel-agent help' for usage") return nil, fmt.Errorf("no key provided: must set -key flag, KEY env var, or KEY_FILE env var. Use 'beszel-agent help' for usage")
} }

View File

@@ -10,10 +10,19 @@ type ApiInfo struct {
Status string Status string
State string State string
Image string Image string
Health struct {
Status string
// FailingStreak int
}
Ports []struct {
// PrivatePort uint16
PublicPort uint16
IP string
// Type string
}
// ImageID string // ImageID string
// Command string // Command string
// Created int64 // Created int64
// Ports []Port
// SizeRw int64 `json:",omitempty"` // SizeRw int64 `json:",omitempty"`
// SizeRootFs int64 `json:",omitempty"` // SizeRootFs int64 `json:",omitempty"`
// Labels map[string]string // Labels map[string]string
@@ -140,6 +149,7 @@ type Stats struct {
Status string `json:"-" cbor:"6,keyasint"` Status string `json:"-" cbor:"6,keyasint"`
Id string `json:"-" cbor:"7,keyasint"` Id string `json:"-" cbor:"7,keyasint"`
Image string `json:"-" cbor:"8,keyasint"` Image string `json:"-" cbor:"8,keyasint"`
Ports string `json:"-" cbor:"10,keyasint"`
// PrevCpu [2]uint64 `json:"-"` // PrevCpu [2]uint64 `json:"-"`
CpuSystem uint64 `json:"-"` CpuSystem uint64 `json:"-"`
CpuContainer uint64 `json:"-"` CpuContainer uint64 `json:"-"`

View File

@@ -143,8 +143,8 @@ type AtaDeviceStatisticsPage struct {
} }
type AtaDeviceStatisticsEntry struct { type AtaDeviceStatisticsEntry struct {
Name string `json:"name"` Name string `json:"name"`
Value *uint64 `json:"value,omitempty"` Value *int64 `json:"value,omitempty"`
} }
type AtaSmartAttribute struct { type AtaSmartAttribute struct {
@@ -356,8 +356,8 @@ type SmartInfoForSata struct {
SmartStatus SmartStatusInfo `json:"smart_status"` SmartStatus SmartStatusInfo `json:"smart_status"`
// AtaSmartData AtaSmartData `json:"ata_smart_data"` // AtaSmartData AtaSmartData `json:"ata_smart_data"`
// AtaSctCapabilities AtaSctCapabilities `json:"ata_sct_capabilities"` // AtaSctCapabilities AtaSctCapabilities `json:"ata_sct_capabilities"`
AtaSmartAttributes AtaSmartAttributes `json:"ata_smart_attributes"` AtaSmartAttributes AtaSmartAttributes `json:"ata_smart_attributes"`
AtaDeviceStatistics AtaDeviceStatistics `json:"ata_device_statistics"` AtaDeviceStatistics json.RawMessage `json:"ata_device_statistics"`
// PowerOnTime PowerOnTimeInfo `json:"power_on_time"` // PowerOnTime PowerOnTimeInfo `json:"power_on_time"`
// PowerCycleCount uint16 `json:"power_cycle_count"` // PowerCycleCount uint16 `json:"power_cycle_count"`
Temperature TemperatureInfo `json:"temperature"` Temperature TemperatureInfo `json:"temperature"`

View File

@@ -12,8 +12,9 @@ import (
type Stats struct { type Stats struct {
Cpu float64 `json:"cpu" cbor:"0,keyasint"` Cpu float64 `json:"cpu" cbor:"0,keyasint"`
MaxCpu float64 `json:"cpum,omitempty" cbor:"1,keyasint,omitempty"` MaxCpu float64 `json:"cpum,omitempty" cbor:"-"`
Mem float64 `json:"m" cbor:"2,keyasint"` Mem float64 `json:"m" cbor:"2,keyasint"`
MaxMem float64 `json:"mm,omitempty" cbor:"-"`
MemUsed float64 `json:"mu" cbor:"3,keyasint"` MemUsed float64 `json:"mu" cbor:"3,keyasint"`
MemPct float64 `json:"mp" cbor:"4,keyasint"` MemPct float64 `json:"mp" cbor:"4,keyasint"`
MemBuffCache float64 `json:"mb" cbor:"5,keyasint"` MemBuffCache float64 `json:"mb" cbor:"5,keyasint"`
@@ -23,26 +24,25 @@ type Stats struct {
DiskTotal float64 `json:"d" cbor:"9,keyasint"` DiskTotal float64 `json:"d" cbor:"9,keyasint"`
DiskUsed float64 `json:"du" cbor:"10,keyasint"` DiskUsed float64 `json:"du" cbor:"10,keyasint"`
DiskPct float64 `json:"dp" cbor:"11,keyasint"` DiskPct float64 `json:"dp" cbor:"11,keyasint"`
DiskReadPs float64 `json:"dr" cbor:"12,keyasint"` DiskReadPs float64 `json:"dr,omitzero" cbor:"12,keyasint,omitzero"`
DiskWritePs float64 `json:"dw" cbor:"13,keyasint"` DiskWritePs float64 `json:"dw,omitzero" cbor:"13,keyasint,omitzero"`
MaxDiskReadPs float64 `json:"drm,omitempty" cbor:"14,keyasint,omitempty"` MaxDiskReadPs float64 `json:"drm,omitempty" cbor:"-"`
MaxDiskWritePs float64 `json:"dwm,omitempty" cbor:"15,keyasint,omitempty"` MaxDiskWritePs float64 `json:"dwm,omitempty" cbor:"-"`
NetworkSent float64 `json:"ns,omitzero" cbor:"16,keyasint,omitzero"` NetworkSent float64 `json:"ns,omitzero" cbor:"16,keyasint,omitzero"`
NetworkRecv float64 `json:"nr,omitzero" cbor:"17,keyasint,omitzero"` NetworkRecv float64 `json:"nr,omitzero" cbor:"17,keyasint,omitzero"`
MaxNetworkSent float64 `json:"nsm,omitempty" cbor:"18,keyasint,omitempty"` MaxNetworkSent float64 `json:"nsm,omitempty" cbor:"-"`
MaxNetworkRecv float64 `json:"nrm,omitempty" cbor:"19,keyasint,omitempty"` MaxNetworkRecv float64 `json:"nrm,omitempty" cbor:"-"`
Temperatures map[string]float64 `json:"t,omitempty" cbor:"20,keyasint,omitempty"` Temperatures map[string]float64 `json:"t,omitempty" cbor:"20,keyasint,omitempty"`
ExtraFs map[string]*FsStats `json:"efs,omitempty" cbor:"21,keyasint,omitempty"` ExtraFs map[string]*FsStats `json:"efs,omitempty" cbor:"21,keyasint,omitempty"`
GPUData map[string]GPUData `json:"g,omitempty" cbor:"22,keyasint,omitempty"` GPUData map[string]GPUData `json:"g,omitempty" cbor:"22,keyasint,omitempty"`
LoadAvg1 float64 `json:"l1,omitempty" cbor:"23,keyasint,omitempty"` // LoadAvg1 float64 `json:"l1,omitempty" cbor:"23,keyasint,omitempty"`
LoadAvg5 float64 `json:"l5,omitempty" cbor:"24,keyasint,omitempty"` // LoadAvg5 float64 `json:"l5,omitempty" cbor:"24,keyasint,omitempty"`
LoadAvg15 float64 `json:"l15,omitempty" cbor:"25,keyasint,omitempty"` // LoadAvg15 float64 `json:"l15,omitempty" cbor:"25,keyasint,omitempty"`
Bandwidth [2]uint64 `json:"b,omitzero" cbor:"26,keyasint,omitzero"` // [sent bytes, recv bytes] Bandwidth [2]uint64 `json:"b,omitzero" cbor:"26,keyasint,omitzero"` // [sent bytes, recv bytes]
MaxBandwidth [2]uint64 `json:"bm,omitzero" cbor:"27,keyasint,omitzero"` // [sent bytes, recv bytes] MaxBandwidth [2]uint64 `json:"bm,omitzero" cbor:"-"` // [sent bytes, recv bytes]
// TODO: remove other load fields in future release in favor of load avg array // TODO: remove other load fields in future release in favor of load avg array
LoadAvg [3]float64 `json:"la,omitempty" cbor:"28,keyasint"` LoadAvg [3]float64 `json:"la,omitempty" cbor:"28,keyasint"`
Battery [2]uint8 `json:"bat,omitzero" cbor:"29,keyasint,omitzero"` // [percent, charge state, current] Battery [2]uint8 `json:"bat,omitzero" cbor:"29,keyasint,omitzero"` // [percent, charge state, current]
MaxMem float64 `json:"mm,omitempty" cbor:"30,keyasint,omitempty"`
NetworkInterfaces map[string][4]uint64 `json:"ni,omitempty" cbor:"31,keyasint,omitempty"` // [upload bytes, download bytes, total upload, total download] NetworkInterfaces map[string][4]uint64 `json:"ni,omitempty" cbor:"31,keyasint,omitempty"` // [upload bytes, download bytes, total upload, total download]
DiskIO [2]uint64 `json:"dio,omitzero" cbor:"32,keyasint,omitzero"` // [read bytes, write bytes] DiskIO [2]uint64 `json:"dio,omitzero" cbor:"32,keyasint,omitzero"` // [read bytes, write bytes]
MaxDiskIO [2]uint64 `json:"diom,omitzero" cbor:"-"` // [max read bytes, max write bytes] MaxDiskIO [2]uint64 `json:"diom,omitzero" cbor:"-"` // [max read bytes, max write bytes]
@@ -90,8 +90,8 @@ type FsStats struct {
TotalWrite uint64 `json:"-"` TotalWrite uint64 `json:"-"`
DiskReadPs float64 `json:"r" cbor:"2,keyasint"` DiskReadPs float64 `json:"r" cbor:"2,keyasint"`
DiskWritePs float64 `json:"w" cbor:"3,keyasint"` DiskWritePs float64 `json:"w" cbor:"3,keyasint"`
MaxDiskReadPS float64 `json:"rm,omitempty" cbor:"4,keyasint,omitempty"` MaxDiskReadPS float64 `json:"rm,omitempty" cbor:"-"`
MaxDiskWritePS float64 `json:"wm,omitempty" cbor:"5,keyasint,omitempty"` MaxDiskWritePS float64 `json:"wm,omitempty" cbor:"-"`
// TODO: remove DiskReadPs and DiskWritePs in future release in favor of DiskReadBytes and DiskWriteBytes // TODO: remove DiskReadPs and DiskWritePs in future release in favor of DiskReadBytes and DiskWriteBytes
DiskReadBytes uint64 `json:"rb" cbor:"6,keyasint,omitempty"` DiskReadBytes uint64 `json:"rb" cbor:"6,keyasint,omitempty"`
DiskWriteBytes uint64 `json:"wb" cbor:"7,keyasint,omitempty"` DiskWriteBytes uint64 `json:"wb" cbor:"7,keyasint,omitempty"`
@@ -129,23 +129,23 @@ type Info struct {
KernelVersion string `json:"k,omitempty" cbor:"1,keyasint,omitempty"` // deprecated - moved to Details struct KernelVersion string `json:"k,omitempty" cbor:"1,keyasint,omitempty"` // deprecated - moved to Details struct
Cores int `json:"c,omitzero" cbor:"2,keyasint,omitzero"` // deprecated - moved to Details struct Cores int `json:"c,omitzero" cbor:"2,keyasint,omitzero"` // deprecated - moved to Details struct
// Threads is needed in Info struct to calculate load average thresholds // Threads is needed in Info struct to calculate load average thresholds
Threads int `json:"t,omitempty" cbor:"3,keyasint,omitempty"` Threads int `json:"t,omitempty" cbor:"3,keyasint,omitempty"`
CpuModel string `json:"m,omitempty" cbor:"4,keyasint,omitempty"` // deprecated - moved to Details struct CpuModel string `json:"m,omitempty" cbor:"4,keyasint,omitempty"` // deprecated - moved to Details struct
Uptime uint64 `json:"u" cbor:"5,keyasint"` Uptime uint64 `json:"u" cbor:"5,keyasint"`
Cpu float64 `json:"cpu" cbor:"6,keyasint"` Cpu float64 `json:"cpu" cbor:"6,keyasint"`
MemPct float64 `json:"mp" cbor:"7,keyasint"` MemPct float64 `json:"mp" cbor:"7,keyasint"`
DiskPct float64 `json:"dp" cbor:"8,keyasint"` DiskPct float64 `json:"dp" cbor:"8,keyasint"`
Bandwidth float64 `json:"b" cbor:"9,keyasint"` Bandwidth float64 `json:"b,omitzero" cbor:"9,keyasint"` // deprecated in favor of BandwidthBytes
AgentVersion string `json:"v" cbor:"10,keyasint"` AgentVersion string `json:"v" cbor:"10,keyasint"`
Podman bool `json:"p,omitempty" cbor:"11,keyasint,omitempty"` // deprecated - moved to Details struct Podman bool `json:"p,omitempty" cbor:"11,keyasint,omitempty"` // deprecated - moved to Details struct
GpuPct float64 `json:"g,omitempty" cbor:"12,keyasint,omitempty"` GpuPct float64 `json:"g,omitempty" cbor:"12,keyasint,omitempty"`
DashboardTemp float64 `json:"dt,omitempty" cbor:"13,keyasint,omitempty"` DashboardTemp float64 `json:"dt,omitempty" cbor:"13,keyasint,omitempty"`
Os Os `json:"os,omitempty" cbor:"14,keyasint,omitempty"` // deprecated - moved to Details struct Os Os `json:"os,omitempty" cbor:"14,keyasint,omitempty"` // deprecated - moved to Details struct
LoadAvg1 float64 `json:"l1,omitempty" cbor:"15,keyasint,omitempty"` // deprecated - use `la` array instead // LoadAvg1 float64 `json:"l1,omitempty" cbor:"15,keyasint,omitempty"` // deprecated - use `la` array instead
LoadAvg5 float64 `json:"l5,omitempty" cbor:"16,keyasint,omitempty"` // deprecated - use `la` array instead // LoadAvg5 float64 `json:"l5,omitempty" cbor:"16,keyasint,omitempty"` // deprecated - use `la` array instead
LoadAvg15 float64 `json:"l15,omitempty" cbor:"17,keyasint,omitempty"` // deprecated - use `la` array instead // LoadAvg15 float64 `json:"l15,omitempty" cbor:"17,keyasint,omitempty"` // deprecated - use `la` array instead
BandwidthBytes uint64 `json:"bb" cbor:"18,keyasint"`
BandwidthBytes uint64 `json:"bb" cbor:"18,keyasint"`
LoadAvg [3]float64 `json:"la,omitempty" cbor:"19,keyasint"` LoadAvg [3]float64 `json:"la,omitempty" cbor:"19,keyasint"`
ConnectionType ConnectionType `json:"ct,omitempty" cbor:"20,keyasint,omitempty,omitzero"` ConnectionType ConnectionType `json:"ct,omitempty" cbor:"20,keyasint,omitempty,omitzero"`
ExtraFsPct map[string]float64 `json:"efs,omitempty" cbor:"21,keyasint,omitempty"` ExtraFsPct map[string]float64 `json:"efs,omitempty" cbor:"21,keyasint,omitempty"`

View File

@@ -917,7 +917,7 @@ func TestAgentWebSocketIntegration(t *testing.T) {
// Wait for connection result // Wait for connection result
maxWait := 2 * time.Second maxWait := 2 * time.Second
time.Sleep(20 * time.Millisecond) time.Sleep(40 * time.Millisecond)
checkInterval := 20 * time.Millisecond checkInterval := 20 * time.Millisecond
timeout := time.After(maxWait) timeout := time.After(maxWait)
ticker := time.Tick(checkInterval) ticker := time.Tick(checkInterval)

View File

@@ -1,35 +1,39 @@
// Package expirymap provides a thread-safe map with expiring entries.
// It supports TTL-based expiration with both lazy cleanup on access
// and periodic background cleanup.
package expirymap package expirymap
import ( import (
"reflect" "sync"
"time" "time"
"github.com/pocketbase/pocketbase/tools/store" "github.com/pocketbase/pocketbase/tools/store"
) )
type val[T any] struct { type val[T comparable] struct {
value T value T
expires time.Time expires time.Time
} }
type ExpiryMap[T any] struct { type ExpiryMap[T comparable] struct {
store *store.Store[string, *val[T]] store *store.Store[string, val[T]]
cleanupInterval time.Duration stopChan chan struct{}
stopOnce sync.Once
} }
// New creates a new expiry map with custom cleanup interval // New creates a new expiry map with custom cleanup interval
func New[T any](cleanupInterval time.Duration) *ExpiryMap[T] { func New[T comparable](cleanupInterval time.Duration) *ExpiryMap[T] {
m := &ExpiryMap[T]{ m := &ExpiryMap[T]{
store: store.New(map[string]*val[T]{}), store: store.New(map[string]val[T]{}),
cleanupInterval: cleanupInterval, stopChan: make(chan struct{}),
} }
m.startCleaner() go m.startCleaner(cleanupInterval)
return m return m
} }
// Set stores a value with the given TTL // Set stores a value with the given TTL
func (m *ExpiryMap[T]) Set(key string, value T, ttl time.Duration) { func (m *ExpiryMap[T]) Set(key string, value T, ttl time.Duration) {
m.store.Set(key, &val[T]{ m.store.Set(key, val[T]{
value: value, value: value,
expires: time.Now().Add(ttl), expires: time.Now().Add(ttl),
}) })
@@ -55,7 +59,7 @@ func (m *ExpiryMap[T]) GetOk(key string) (T, bool) {
// GetByValue retrieves a value by value // GetByValue retrieves a value by value
func (m *ExpiryMap[T]) GetByValue(val T) (key string, value T, ok bool) { func (m *ExpiryMap[T]) GetByValue(val T) (key string, value T, ok bool) {
for key, v := range m.store.GetAll() { for key, v := range m.store.GetAll() {
if reflect.DeepEqual(v.value, val) { if v.value == val {
// check if expired // check if expired
if v.expires.Before(time.Now()) { if v.expires.Before(time.Now()) {
m.store.Remove(key) m.store.Remove(key)
@@ -75,7 +79,7 @@ func (m *ExpiryMap[T]) Remove(key string) {
// RemovebyValue removes a value by value // RemovebyValue removes a value by value
func (m *ExpiryMap[T]) RemovebyValue(value T) (T, bool) { func (m *ExpiryMap[T]) RemovebyValue(value T) (T, bool) {
for key, val := range m.store.GetAll() { for key, val := range m.store.GetAll() {
if reflect.DeepEqual(val.value, value) { if val.value == value {
m.store.Remove(key) m.store.Remove(key)
return val.value, true return val.value, true
} }
@@ -84,13 +88,23 @@ func (m *ExpiryMap[T]) RemovebyValue(value T) (T, bool) {
} }
// startCleaner runs the background cleanup process // startCleaner runs the background cleanup process
func (m *ExpiryMap[T]) startCleaner() { func (m *ExpiryMap[T]) startCleaner(interval time.Duration) {
go func() { tick := time.Tick(interval)
tick := time.Tick(m.cleanupInterval) for {
for range tick { select {
case <-tick:
m.cleanup() m.cleanup()
case <-m.stopChan:
return
} }
}() }
}
// StopCleaner stops the background cleanup process
func (m *ExpiryMap[T]) StopCleaner() {
m.stopOnce.Do(func() {
close(m.stopChan)
})
} }
// cleanup removes all expired entries // cleanup removes all expired entries
@@ -102,3 +116,12 @@ func (m *ExpiryMap[T]) cleanup() {
} }
} }
} }
// UpdateExpiration updates the expiration time of a key
func (m *ExpiryMap[T]) UpdateExpiration(key string, ttl time.Duration) {
value, ok := m.store.GetOk(key)
if ok {
value.expires = time.Now().Add(ttl)
m.store.Set(key, value)
}
}

View File

@@ -4,6 +4,7 @@ package expirymap
import ( import (
"testing" "testing"
"testing/synctest"
"time" "time"
"github.com/stretchr/testify/assert" "github.com/stretchr/testify/assert"
@@ -177,6 +178,33 @@ func TestExpiryMap_GenericTypes(t *testing.T) {
}) })
} }
func TestExpiryMap_UpdateExpiration(t *testing.T) {
em := New[string](time.Hour)
// Set a value with short TTL
em.Set("key1", "value1", time.Millisecond*50)
// Verify it exists
assert.True(t, em.Has("key1"))
// Update expiration to a longer TTL
em.UpdateExpiration("key1", time.Hour)
// Wait for the original TTL to pass
time.Sleep(time.Millisecond * 100)
// Should still exist because expiration was updated
assert.True(t, em.Has("key1"))
value, ok := em.GetOk("key1")
assert.True(t, ok)
assert.Equal(t, "value1", value)
// Try updating non-existent key (should not panic)
assert.NotPanics(t, func() {
em.UpdateExpiration("nonexistent", time.Hour)
})
}
func TestExpiryMap_ZeroValues(t *testing.T) { func TestExpiryMap_ZeroValues(t *testing.T) {
em := New[string](time.Hour) em := New[string](time.Hour)
@@ -473,3 +501,52 @@ func TestExpiryMap_ValueOperations_Integration(t *testing.T) {
assert.Equal(t, "unique", value) assert.Equal(t, "unique", value)
assert.Equal(t, "key2", key) assert.Equal(t, "key2", key)
} }
func TestExpiryMap_Cleaner(t *testing.T) {
synctest.Test(t, func(t *testing.T) {
em := New[string](time.Second)
defer em.StopCleaner()
em.Set("test", "value", 500*time.Millisecond)
// Wait 600ms, value is expired but cleaner hasn't run yet (interval is 1s)
time.Sleep(600 * time.Millisecond)
synctest.Wait()
// Map should still hold the value in its internal store before lazy access or cleaner
assert.Equal(t, 1, len(em.store.GetAll()), "store should still have 1 item before cleaner runs")
// Wait another 500ms so cleaner (1s interval) runs
time.Sleep(500 * time.Millisecond)
synctest.Wait() // Wait for background goroutine to process the tick
assert.Equal(t, 0, len(em.store.GetAll()), "store should be empty after cleaner runs")
})
}
func TestExpiryMap_StopCleaner(t *testing.T) {
em := New[string](time.Hour)
// Initially, stopChan is open, reading would block
select {
case <-em.stopChan:
t.Fatal("stopChan should be open initially")
default:
// success
}
em.StopCleaner()
// After StopCleaner, stopChan is closed, reading returns immediately
select {
case <-em.stopChan:
// success
default:
t.Fatal("stopChan was not closed by StopCleaner")
}
// Calling StopCleaner again should NOT panic thanks to sync.Once
assert.NotPanics(t, func() {
em.StopCleaner()
})
}

View File

@@ -48,7 +48,6 @@ type System struct {
detailsFetched atomic.Bool // True if static system details have been fetched and saved detailsFetched atomic.Bool // True if static system details have been fetched and saved
smartFetching atomic.Bool // True if SMART devices are currently being fetched smartFetching atomic.Bool // True if SMART devices are currently being fetched
smartInterval time.Duration // Interval for periodic SMART data updates smartInterval time.Duration // Interval for periodic SMART data updates
lastSmartFetch atomic.Int64 // Unix milliseconds of last SMART data fetch
} }
func (sm *SystemManager) NewSystem(systemId string) *System { func (sm *SystemManager) NewSystem(systemId string) *System {
@@ -134,19 +133,34 @@ func (sys *System) update() error {
return err return err
} }
// ensure deprecated fields from older agents are migrated to current fields
migrateDeprecatedFields(data, !sys.detailsFetched.Load())
// create system records // create system records
_, err = sys.createRecords(data) _, err = sys.createRecords(data)
// if details were included and fetched successfully, mark details as fetched and update smart interval if set by agent
if err == nil && data.Details != nil {
sys.detailsFetched.Store(true)
// update smart interval if it's set on the agent side
if data.Details.SmartInterval > 0 {
sys.smartInterval = data.Details.SmartInterval
// make sure we reset expiration of lastFetch to remain as long as the new smart interval
// to prevent premature expiration leading to new fetch if interval is different.
sys.manager.smartFetchMap.UpdateExpiration(sys.Id, sys.smartInterval+time.Minute)
}
}
// Fetch and save SMART devices when system first comes online or at intervals // Fetch and save SMART devices when system first comes online or at intervals
if backgroundSmartFetchEnabled() { if backgroundSmartFetchEnabled() && sys.detailsFetched.Load() {
if sys.smartInterval <= 0 { if sys.smartInterval <= 0 {
sys.smartInterval = time.Hour sys.smartInterval = time.Hour
} }
lastFetch := sys.lastSmartFetch.Load() lastFetch, _ := sys.manager.smartFetchMap.GetOk(sys.Id)
if time.Since(time.UnixMilli(lastFetch)) >= sys.smartInterval && sys.smartFetching.CompareAndSwap(false, true) { if time.Since(time.UnixMilli(lastFetch-1e4)) >= sys.smartInterval && sys.smartFetching.CompareAndSwap(false, true) {
go func() { go func() {
defer sys.smartFetching.Store(false) defer sys.smartFetching.Store(false)
sys.lastSmartFetch.Store(time.Now().UnixMilli()) sys.manager.smartFetchMap.Set(sys.Id, time.Now().UnixMilli(), sys.smartInterval+time.Minute)
_ = sys.FetchAndSaveSmartDevices() _ = sys.FetchAndSaveSmartDevices()
}() }()
} }
@@ -221,11 +235,6 @@ func (sys *System) createRecords(data *system.CombinedData) (*core.Record, error
if err := createSystemDetailsRecord(txApp, data.Details, sys.Id); err != nil { if err := createSystemDetailsRecord(txApp, data.Details, sys.Id); err != nil {
return err return err
} }
sys.detailsFetched.Store(true)
// update smart interval if it's set on the agent side
if data.Details.SmartInterval > 0 {
sys.smartInterval = data.Details.SmartInterval
}
} }
// update system record (do this last because it triggers alerts and we need above records to be inserted first) // update system record (do this last because it triggers alerts and we need above records to be inserted first)
@@ -309,10 +318,11 @@ func createContainerRecords(app core.App, data []*container.Stats, systemId stri
valueStrings := make([]string, 0, len(data)) valueStrings := make([]string, 0, len(data))
for i, container := range data { for i, container := range data {
suffix := fmt.Sprintf("%d", i) suffix := fmt.Sprintf("%d", i)
valueStrings = append(valueStrings, fmt.Sprintf("({:id%[1]s}, {:system}, {:name%[1]s}, {:image%[1]s}, {:status%[1]s}, {:health%[1]s}, {:cpu%[1]s}, {:memory%[1]s}, {:net%[1]s}, {:updated})", suffix)) valueStrings = append(valueStrings, fmt.Sprintf("({:id%[1]s}, {:system}, {:name%[1]s}, {:image%[1]s}, {:ports%[1]s}, {:status%[1]s}, {:health%[1]s}, {:cpu%[1]s}, {:memory%[1]s}, {:net%[1]s}, {:updated})", suffix))
params["id"+suffix] = container.Id params["id"+suffix] = container.Id
params["name"+suffix] = container.Name params["name"+suffix] = container.Name
params["image"+suffix] = container.Image params["image"+suffix] = container.Image
params["ports"+suffix] = container.Ports
params["status"+suffix] = container.Status params["status"+suffix] = container.Status
params["health"+suffix] = container.Health params["health"+suffix] = container.Health
params["cpu"+suffix] = container.Cpu params["cpu"+suffix] = container.Cpu
@@ -324,7 +334,7 @@ func createContainerRecords(app core.App, data []*container.Stats, systemId stri
params["net"+suffix] = netBytes params["net"+suffix] = netBytes
} }
queryString := fmt.Sprintf( queryString := fmt.Sprintf(
"INSERT INTO containers (id, system, name, image, status, health, cpu, memory, net, updated) VALUES %s ON CONFLICT(id) DO UPDATE SET system = excluded.system, name = excluded.name, image = excluded.image, status = excluded.status, health = excluded.health, cpu = excluded.cpu, memory = excluded.memory, net = excluded.net, updated = excluded.updated", "INSERT INTO containers (id, system, name, image, ports, status, health, cpu, memory, net, updated) VALUES %s ON CONFLICT(id) DO UPDATE SET system = excluded.system, name = excluded.name, image = excluded.image, ports = excluded.ports, status = excluded.status, health = excluded.health, cpu = excluded.cpu, memory = excluded.memory, net = excluded.net, updated = excluded.updated",
strings.Join(valueStrings, ","), strings.Join(valueStrings, ","),
) )
_, err := app.DB().NewQuery(queryString).Bind(params).Execute() _, err := app.DB().NewQuery(queryString).Bind(params).Execute()
@@ -703,3 +713,50 @@ func getJitter() <-chan time.Time {
msDelay := (interval * minPercent / 100) + rand.Intn(interval*jitterRange/100) msDelay := (interval * minPercent / 100) + rand.Intn(interval*jitterRange/100)
return time.After(time.Duration(msDelay) * time.Millisecond) return time.After(time.Duration(msDelay) * time.Millisecond)
} }
// migrateDeprecatedFields moves values from deprecated fields to their new locations if the new
// fields are not already populated. Deprecated fields and refs may be removed at least 30 days
// and one minor version release after the release that includes the migration.
//
// This is run when processing incoming system data from agents, which may be on older versions.
func migrateDeprecatedFields(cd *system.CombinedData, createDetails bool) {
// migration added 0.19.0
if cd.Stats.Bandwidth[0] == 0 && cd.Stats.Bandwidth[1] == 0 {
cd.Stats.Bandwidth[0] = uint64(cd.Stats.NetworkSent * 1024 * 1024)
cd.Stats.Bandwidth[1] = uint64(cd.Stats.NetworkRecv * 1024 * 1024)
cd.Stats.NetworkSent, cd.Stats.NetworkRecv = 0, 0
}
// migration added 0.19.0
if cd.Info.BandwidthBytes == 0 {
cd.Info.BandwidthBytes = uint64(cd.Info.Bandwidth * 1024 * 1024)
cd.Info.Bandwidth = 0
}
// migration added 0.19.0
if cd.Stats.DiskIO[0] == 0 && cd.Stats.DiskIO[1] == 0 {
cd.Stats.DiskIO[0] = uint64(cd.Stats.DiskReadPs * 1024 * 1024)
cd.Stats.DiskIO[1] = uint64(cd.Stats.DiskWritePs * 1024 * 1024)
cd.Stats.DiskReadPs, cd.Stats.DiskWritePs = 0, 0
}
// migration added 0.19.0 - Move deprecated Info fields to Details struct
if cd.Details == nil && cd.Info.Hostname != "" {
if createDetails {
cd.Details = &system.Details{
Hostname: cd.Info.Hostname,
Kernel: cd.Info.KernelVersion,
Cores: cd.Info.Cores,
Threads: cd.Info.Threads,
CpuModel: cd.Info.CpuModel,
Podman: cd.Info.Podman,
Os: cd.Info.Os,
MemoryTotal: uint64(cd.Stats.Mem * 1024 * 1024 * 1024),
}
}
// zero the deprecated fields to prevent saving them in systems.info DB json payload
cd.Info.Hostname = ""
cd.Info.KernelVersion = ""
cd.Info.Cores = 0
cd.Info.CpuModel = ""
cd.Info.Podman = false
cd.Info.Os = 0
}
}

View File

@@ -8,6 +8,7 @@ import (
"github.com/henrygd/beszel/internal/hub/ws" "github.com/henrygd/beszel/internal/hub/ws"
"github.com/henrygd/beszel/internal/entities/system" "github.com/henrygd/beszel/internal/entities/system"
"github.com/henrygd/beszel/internal/hub/expirymap"
"github.com/henrygd/beszel/internal/common" "github.com/henrygd/beszel/internal/common"
@@ -40,9 +41,10 @@ var errSystemExists = errors.New("system exists")
// SystemManager manages a collection of monitored systems and their connections. // SystemManager manages a collection of monitored systems and their connections.
// It handles system lifecycle, status updates, and maintains both SSH and WebSocket connections. // It handles system lifecycle, status updates, and maintains both SSH and WebSocket connections.
type SystemManager struct { type SystemManager struct {
hub hubLike // Hub interface for database and alert operations hub hubLike // Hub interface for database and alert operations
systems *store.Store[string, *System] // Thread-safe store of active systems systems *store.Store[string, *System] // Thread-safe store of active systems
sshConfig *ssh.ClientConfig // SSH client configuration for system connections sshConfig *ssh.ClientConfig // SSH client configuration for system connections
smartFetchMap *expirymap.ExpiryMap[int64] // Stores last SMART fetch time per system ID
} }
// hubLike defines the interface requirements for the hub dependency. // hubLike defines the interface requirements for the hub dependency.
@@ -58,8 +60,9 @@ type hubLike interface {
// The hub must implement the hubLike interface to provide database and alert functionality. // The hub must implement the hubLike interface to provide database and alert functionality.
func NewSystemManager(hub hubLike) *SystemManager { func NewSystemManager(hub hubLike) *SystemManager {
return &SystemManager{ return &SystemManager{
systems: store.New(map[string]*System{}), systems: store.New(map[string]*System{}),
hub: hub, hub: hub,
smartFetchMap: expirymap.New[int64](time.Hour),
} }
} }

View File

@@ -0,0 +1,159 @@
//go:build testing
package systems
import (
"testing"
"github.com/henrygd/beszel/internal/entities/system"
)
func TestCombinedData_MigrateDeprecatedFields(t *testing.T) {
t.Run("Migrate NetworkSent and NetworkRecv to Bandwidth", func(t *testing.T) {
cd := &system.CombinedData{
Stats: system.Stats{
NetworkSent: 1.5, // 1.5 MB
NetworkRecv: 2.5, // 2.5 MB
},
}
migrateDeprecatedFields(cd, true)
expectedSent := uint64(1.5 * 1024 * 1024)
expectedRecv := uint64(2.5 * 1024 * 1024)
if cd.Stats.Bandwidth[0] != expectedSent {
t.Errorf("expected Bandwidth[0] %d, got %d", expectedSent, cd.Stats.Bandwidth[0])
}
if cd.Stats.Bandwidth[1] != expectedRecv {
t.Errorf("expected Bandwidth[1] %d, got %d", expectedRecv, cd.Stats.Bandwidth[1])
}
if cd.Stats.NetworkSent != 0 || cd.Stats.NetworkRecv != 0 {
t.Errorf("expected NetworkSent and NetworkRecv to be reset, got %f, %f", cd.Stats.NetworkSent, cd.Stats.NetworkRecv)
}
})
t.Run("Migrate Info.Bandwidth to Info.BandwidthBytes", func(t *testing.T) {
cd := &system.CombinedData{
Info: system.Info{
Bandwidth: 10.0, // 10 MB
},
}
migrateDeprecatedFields(cd, true)
expected := uint64(10 * 1024 * 1024)
if cd.Info.BandwidthBytes != expected {
t.Errorf("expected BandwidthBytes %d, got %d", expected, cd.Info.BandwidthBytes)
}
if cd.Info.Bandwidth != 0 {
t.Errorf("expected Info.Bandwidth to be reset, got %f", cd.Info.Bandwidth)
}
})
t.Run("Migrate DiskReadPs and DiskWritePs to DiskIO", func(t *testing.T) {
cd := &system.CombinedData{
Stats: system.Stats{
DiskReadPs: 3.0, // 3 MB
DiskWritePs: 4.0, // 4 MB
},
}
migrateDeprecatedFields(cd, true)
expectedRead := uint64(3 * 1024 * 1024)
expectedWrite := uint64(4 * 1024 * 1024)
if cd.Stats.DiskIO[0] != expectedRead {
t.Errorf("expected DiskIO[0] %d, got %d", expectedRead, cd.Stats.DiskIO[0])
}
if cd.Stats.DiskIO[1] != expectedWrite {
t.Errorf("expected DiskIO[1] %d, got %d", expectedWrite, cd.Stats.DiskIO[1])
}
if cd.Stats.DiskReadPs != 0 || cd.Stats.DiskWritePs != 0 {
t.Errorf("expected DiskReadPs and DiskWritePs to be reset, got %f, %f", cd.Stats.DiskReadPs, cd.Stats.DiskWritePs)
}
})
t.Run("Migrate Info fields to Details struct", func(t *testing.T) {
cd := &system.CombinedData{
Stats: system.Stats{
Mem: 16.0, // 16 GB
},
Info: system.Info{
Hostname: "test-host",
KernelVersion: "6.8.0",
Cores: 8,
Threads: 16,
CpuModel: "Intel i7",
Podman: true,
Os: system.Linux,
},
}
migrateDeprecatedFields(cd, true)
if cd.Details == nil {
t.Fatal("expected Details struct to be created")
}
if cd.Details.Hostname != "test-host" {
t.Errorf("expected Hostname 'test-host', got '%s'", cd.Details.Hostname)
}
if cd.Details.Kernel != "6.8.0" {
t.Errorf("expected Kernel '6.8.0', got '%s'", cd.Details.Kernel)
}
if cd.Details.Cores != 8 {
t.Errorf("expected Cores 8, got %d", cd.Details.Cores)
}
if cd.Details.Threads != 16 {
t.Errorf("expected Threads 16, got %d", cd.Details.Threads)
}
if cd.Details.CpuModel != "Intel i7" {
t.Errorf("expected CpuModel 'Intel i7', got '%s'", cd.Details.CpuModel)
}
if cd.Details.Podman != true {
t.Errorf("expected Podman true, got %v", cd.Details.Podman)
}
if cd.Details.Os != system.Linux {
t.Errorf("expected Os Linux, got %d", cd.Details.Os)
}
expectedMem := uint64(16 * 1024 * 1024 * 1024)
if cd.Details.MemoryTotal != expectedMem {
t.Errorf("expected MemoryTotal %d, got %d", expectedMem, cd.Details.MemoryTotal)
}
if cd.Info.Hostname != "" || cd.Info.KernelVersion != "" || cd.Info.Cores != 0 || cd.Info.CpuModel != "" || cd.Info.Podman != false || cd.Info.Os != 0 {
t.Errorf("expected Info fields to be reset, got %+v", cd.Info)
}
})
t.Run("Do not migrate if Details already exists", func(t *testing.T) {
cd := &system.CombinedData{
Details: &system.Details{Hostname: "existing-host"},
Info: system.Info{
Hostname: "deprecated-host",
},
}
migrateDeprecatedFields(cd, true)
if cd.Details.Hostname != "existing-host" {
t.Errorf("expected Hostname 'existing-host', got '%s'", cd.Details.Hostname)
}
if cd.Info.Hostname != "deprecated-host" {
t.Errorf("expected Info.Hostname to remain 'deprecated-host', got '%s'", cd.Info.Hostname)
}
})
t.Run("Do not create details if migrateDetails is false", func(t *testing.T) {
cd := &system.CombinedData{
Info: system.Info{
Hostname: "deprecated-host",
},
}
migrateDeprecatedFields(cd, false)
if cd.Details != nil {
t.Fatal("expected Details struct to not be created")
}
if cd.Info.Hostname != "" {
t.Errorf("expected Info.Hostname to be reset, got '%s'", cd.Info.Hostname)
}
})
}

View File

@@ -7,6 +7,7 @@ import (
"fmt" "fmt"
entities "github.com/henrygd/beszel/internal/entities/system" entities "github.com/henrygd/beszel/internal/entities/system"
"github.com/pocketbase/pocketbase/core"
) )
// The hub integration tests create/replace systems and cleanup the test apps quickly. // The hub integration tests create/replace systems and cleanup the test apps quickly.
@@ -113,4 +114,14 @@ func (sm *SystemManager) RemoveAllSystems() {
for _, system := range sm.systems.GetAll() { for _, system := range sm.systems.GetAll() {
sm.RemoveSystem(system.Id) sm.RemoveSystem(system.Id)
} }
sm.smartFetchMap.StopCleaner()
}
func (s *System) StopUpdater() {
s.cancel()
}
func (s *System) CreateRecords(data *entities.CombinedData) (*core.Record, error) {
s.data = data
return s.createRecords(data)
} }

View File

@@ -977,18 +977,6 @@ func init() {
"system": false, "system": false,
"type": "number" "type": "number"
}, },
{
"hidden": false,
"id": "number3332085495",
"max": null,
"min": null,
"name": "updated",
"onlyInt": true,
"presentable": false,
"required": true,
"system": false,
"type": "number"
},
{ {
"autogeneratePattern": "", "autogeneratePattern": "",
"hidden": false, "hidden": false,
@@ -1002,6 +990,32 @@ func init() {
"required": false, "required": false,
"system": false, "system": false,
"type": "text" "type": "text"
},
{
"autogeneratePattern": "",
"hidden": false,
"id": "text2308952269",
"max": 0,
"min": 0,
"name": "ports",
"pattern": "",
"presentable": false,
"primaryKey": false,
"required": false,
"system": false,
"type": "text"
},
{
"hidden": false,
"id": "number3332085495",
"max": null,
"min": null,
"name": "updated",
"onlyInt": true,
"presentable": false,
"required": true,
"system": false,
"type": "number"
} }
], ],
"indexes": [ "indexes": [

View File

@@ -16,19 +16,16 @@ import { useYAxisWidth } from "./hooks"
export default memo(function LoadAverageChart({ chartData }: { chartData: ChartData }) { export default memo(function LoadAverageChart({ chartData }: { chartData: ChartData }) {
const { yAxisWidth, updateYAxisWidth } = useYAxisWidth() const { yAxisWidth, updateYAxisWidth } = useYAxisWidth()
const keys: { legacy: keyof SystemStats; color: string; label: string }[] = [ const keys: { color: string; label: string }[] = [
{ {
legacy: "l1",
color: "hsl(271, 81%, 60%)", // Purple color: "hsl(271, 81%, 60%)", // Purple
label: t({ message: `1 min`, comment: "Load average" }), label: t({ message: `1 min`, comment: "Load average" }),
}, },
{ {
legacy: "l5",
color: "hsl(217, 91%, 60%)", // Blue color: "hsl(217, 91%, 60%)", // Blue
label: t({ message: `5 min`, comment: "Load average" }), label: t({ message: `5 min`, comment: "Load average" }),
}, },
{ {
legacy: "l15",
color: "hsl(25, 95%, 53%)", // Orange color: "hsl(25, 95%, 53%)", // Orange
label: t({ message: `15 min`, comment: "Load average" }), label: t({ message: `15 min`, comment: "Load average" }),
}, },
@@ -66,27 +63,18 @@ export default memo(function LoadAverageChart({ chartData }: { chartData: ChartD
/> />
} }
/> />
{keys.map(({ legacy, color, label }, i) => { {keys.map(({ color, label }, i) => (
const dataKey = (value: { stats: SystemStats }) => { <Line
const { minor, patch } = chartData.agentVersion key={label}
if (minor <= 12 && patch < 1) { dataKey={(value: { stats: SystemStats }) => value.stats?.la?.[i]}
return value.stats?.[legacy] name={label}
} type="monotoneX"
return value.stats?.la?.[i] ?? value.stats?.[legacy] dot={false}
} strokeWidth={1.5}
return ( stroke={color}
<Line isAnimationActive={false}
key={label} />
dataKey={dataKey} ))}
name={label}
type="monotoneX"
dot={false}
strokeWidth={1.5}
stroke={color}
isAnimationActive={false}
/>
)
})}
<ChartLegend content={<ChartLegendContent />} /> <ChartLegend content={<ChartLegendContent />} />
</LineChart> </LineChart>
</ChartContainer> </ChartContainer>

View File

@@ -4,7 +4,6 @@ import { cn, decimalString, formatBytes, hourWithSeconds } from "@/lib/utils"
import type { ContainerRecord } from "@/types" import type { ContainerRecord } from "@/types"
import { ContainerHealth, ContainerHealthLabels } from "@/lib/enums" import { ContainerHealth, ContainerHealthLabels } from "@/lib/enums"
import { import {
ArrowUpDownIcon,
ClockIcon, ClockIcon,
ContainerIcon, ContainerIcon,
CpuIcon, CpuIcon,
@@ -13,11 +12,12 @@ import {
ServerIcon, ServerIcon,
ShieldCheckIcon, ShieldCheckIcon,
} from "lucide-react" } from "lucide-react"
import { EthernetIcon, HourglassIcon } from "../ui/icons" import { EthernetIcon, HourglassIcon, SquareArrowRightEnterIcon } from "../ui/icons"
import { Badge } from "../ui/badge" import { Badge } from "../ui/badge"
import { t } from "@lingui/core/macro" import { t } from "@lingui/core/macro"
import { $allSystemsById } from "@/lib/stores" import { $allSystemsById, $longestSystemNameLen } from "@/lib/stores"
import { useStore } from "@nanostores/react" import { useStore } from "@nanostores/react"
import { Tooltip, TooltipContent, TooltipTrigger } from "../ui/tooltip"
// Unit names and their corresponding number of seconds for converting docker status strings // Unit names and their corresponding number of seconds for converting docker status strings
const unitSeconds = [ const unitSeconds = [
@@ -63,7 +63,12 @@ export const containerChartCols: ColumnDef<ContainerRecord>[] = [
header: ({ column }) => <HeaderButton column={column} name={t`System`} Icon={ServerIcon} />, header: ({ column }) => <HeaderButton column={column} name={t`System`} Icon={ServerIcon} />,
cell: ({ getValue }) => { cell: ({ getValue }) => {
const allSystems = useStore($allSystemsById) const allSystems = useStore($allSystemsById)
return <span className="ms-1.5 xl:w-34 block truncate">{allSystems[getValue() as string]?.name ?? ""}</span> const longestName = useStore($longestSystemNameLen)
return (
<div className="ms-1 max-w-40 truncate" style={{ width: `${longestName / 1.05}ch` }}>
{allSystems[getValue() as string]?.name ?? ""}
</div>
)
}, },
}, },
// { // {
@@ -82,7 +87,7 @@ export const containerChartCols: ColumnDef<ContainerRecord>[] = [
header: ({ column }) => <HeaderButton column={column} name={t`CPU`} Icon={CpuIcon} />, header: ({ column }) => <HeaderButton column={column} name={t`CPU`} Icon={CpuIcon} />,
cell: ({ getValue }) => { cell: ({ getValue }) => {
const val = getValue() as number const val = getValue() as number
return <span className="ms-1.5 tabular-nums">{`${decimalString(val, val >= 10 ? 1 : 2)}%`}</span> return <span className="ms-1 tabular-nums">{`${decimalString(val, val >= 10 ? 1 : 2)}%`}</span>
}, },
}, },
{ {
@@ -94,7 +99,7 @@ export const containerChartCols: ColumnDef<ContainerRecord>[] = [
const val = getValue() as number const val = getValue() as number
const formatted = formatBytes(val, false, undefined, true) const formatted = formatBytes(val, false, undefined, true)
return ( return (
<span className="ms-1.5 tabular-nums">{`${decimalString(formatted.value, formatted.value >= 10 ? 1 : 2)} ${formatted.unit}`}</span> <span className="ms-1 tabular-nums">{`${decimalString(formatted.value, formatted.value >= 10 ? 1 : 2)} ${formatted.unit}`}</span>
) )
}, },
}, },
@@ -103,11 +108,12 @@ export const containerChartCols: ColumnDef<ContainerRecord>[] = [
accessorFn: (record) => record.net, accessorFn: (record) => record.net,
invertSorting: true, invertSorting: true,
header: ({ column }) => <HeaderButton column={column} name={t`Net`} Icon={EthernetIcon} />, header: ({ column }) => <HeaderButton column={column} name={t`Net`} Icon={EthernetIcon} />,
minSize: 112,
cell: ({ getValue }) => { cell: ({ getValue }) => {
const val = getValue() as number const val = getValue() as number
const formatted = formatBytes(val, true, undefined, false) const formatted = formatBytes(val, true, undefined, false)
return ( return (
<span className="ms-1.5 tabular-nums">{`${decimalString(formatted.value, formatted.value >= 10 ? 1 : 2)} ${formatted.unit}`}</span> <div className="ms-1 tabular-nums">{`${decimalString(formatted.value, formatted.value >= 10 ? 1 : 2)} ${formatted.unit}`}</div>
) )
}, },
}, },
@@ -116,6 +122,7 @@ export const containerChartCols: ColumnDef<ContainerRecord>[] = [
invertSorting: true, invertSorting: true,
accessorFn: (record) => record.health, accessorFn: (record) => record.health,
header: ({ column }) => <HeaderButton column={column} name={t`Health`} Icon={ShieldCheckIcon} />, header: ({ column }) => <HeaderButton column={column} name={t`Health`} Icon={ShieldCheckIcon} />,
minSize: 121,
cell: ({ getValue }) => { cell: ({ getValue }) => {
const healthValue = getValue() as number const healthValue = getValue() as number
const healthStatus = ContainerHealthLabels[healthValue] || "Unknown" const healthStatus = ContainerHealthLabels[healthValue] || "Unknown"
@@ -134,6 +141,35 @@ export const containerChartCols: ColumnDef<ContainerRecord>[] = [
) )
}, },
}, },
{
id: "ports",
accessorFn: (record) => record.ports || undefined,
header: ({ column }) => (
<HeaderButton
column={column}
name={t({ message: "Ports", context: "Container ports" })}
Icon={SquareArrowRightEnterIcon}
/>
),
sortingFn: (a, b) => getPortValue(a.original.ports) - getPortValue(b.original.ports),
minSize: 147,
cell: ({ getValue }) => {
const val = getValue() as string | undefined
if (!val) {
return <div className="ms-1.5 text-muted-foreground">-</div>
}
const className = "ms-1 w-27 block truncate tabular-nums"
if (val.length > 14) {
return (
<Tooltip>
<TooltipTrigger className={className}>{val}</TooltipTrigger>
<TooltipContent>{val}</TooltipContent>
</Tooltip>
)
}
return <span className={className}>{val}</span>
},
},
{ {
id: "image", id: "image",
sortingFn: (a, b) => a.original.image.localeCompare(b.original.image), sortingFn: (a, b) => a.original.image.localeCompare(b.original.image),
@@ -142,7 +178,12 @@ export const containerChartCols: ColumnDef<ContainerRecord>[] = [
<HeaderButton column={column} name={t({ message: "Image", context: "Docker image" })} Icon={LayersIcon} /> <HeaderButton column={column} name={t({ message: "Image", context: "Docker image" })} Icon={LayersIcon} />
), ),
cell: ({ getValue }) => { cell: ({ getValue }) => {
return <span className="ms-1.5 xl:w-40 block truncate">{getValue() as string}</span> const val = getValue() as string
return (
<div className="ms-1 xl:w-40 truncate" title={val}>
{val}
</div>
)
}, },
}, },
{ {
@@ -152,7 +193,7 @@ export const containerChartCols: ColumnDef<ContainerRecord>[] = [
sortingFn: (a, b) => getStatusValue(a.original.status) - getStatusValue(b.original.status), sortingFn: (a, b) => getStatusValue(a.original.status) - getStatusValue(b.original.status),
header: ({ column }) => <HeaderButton column={column} name={t`Status`} Icon={HourglassIcon} />, header: ({ column }) => <HeaderButton column={column} name={t`Status`} Icon={HourglassIcon} />,
cell: ({ getValue }) => { cell: ({ getValue }) => {
return <span className="ms-1.5 w-25 block truncate">{getValue() as string}</span> return <span className="ms-1 w-25 block truncate">{getValue() as string}</span>
}, },
}, },
{ {
@@ -162,7 +203,7 @@ export const containerChartCols: ColumnDef<ContainerRecord>[] = [
header: ({ column }) => <HeaderButton column={column} name={t`Updated`} Icon={ClockIcon} />, header: ({ column }) => <HeaderButton column={column} name={t`Updated`} Icon={ClockIcon} />,
cell: ({ getValue }) => { cell: ({ getValue }) => {
const timestamp = getValue() as number const timestamp = getValue() as number
return <span className="ms-1.5 tabular-nums">{hourWithSeconds(new Date(timestamp).toISOString())}</span> return <span className="ms-1 tabular-nums">{hourWithSeconds(new Date(timestamp).toISOString())}</span>
}, },
}, },
] ]
@@ -188,7 +229,21 @@ function HeaderButton({
> >
{Icon && <Icon className="size-4" />} {Icon && <Icon className="size-4" />}
{name} {name}
<ArrowUpDownIcon className="size-4" /> {/* <ArrowUpDownIcon className="size-4" /> */}
</Button> </Button>
) )
} }
/**
* Convert port string to a number for sorting.
* Handles formats like "80", "127.0.0.1:80", and "80, 443" (takes the first mapping).
*/
function getPortValue(ports: string | undefined): number {
if (!ports) {
return 0
}
const first = ports.includes(",") ? ports.substring(0, ports.indexOf(",")) : ports
const colonIndex = first.lastIndexOf(":")
const portStr = colonIndex === -1 ? first : first.substring(colonIndex + 1)
return Number(portStr) || 0
}

View File

@@ -1,3 +1,4 @@
/** biome-ignore-all lint/security/noDangerouslySetInnerHtml: html comes directly from docker via agent */
import { t } from "@lingui/core/macro" import { t } from "@lingui/core/macro"
import { Trans } from "@lingui/react/macro" import { Trans } from "@lingui/react/macro"
import { import {
@@ -13,7 +14,7 @@ import {
type VisibilityState, type VisibilityState,
} from "@tanstack/react-table" } from "@tanstack/react-table"
import { useVirtualizer, type VirtualItem } from "@tanstack/react-virtual" import { useVirtualizer, type VirtualItem } from "@tanstack/react-virtual"
import { memo, RefObject, useEffect, useRef, useState } from "react" import { memo, type RefObject, useEffect, useRef, useState } from "react"
import { Input } from "@/components/ui/input" import { Input } from "@/components/ui/input"
import { TableBody, TableCell, TableHead, TableHeader, TableRow } from "@/components/ui/table" import { TableBody, TableCell, TableHead, TableHeader, TableRow } from "@/components/ui/table"
import { pb } from "@/lib/api" import { pb } from "@/lib/api"
@@ -44,6 +45,20 @@ export default function ContainersTable({ systemId }: { systemId?: string }) {
) )
const [columnFilters, setColumnFilters] = useState<ColumnFiltersState>([]) const [columnFilters, setColumnFilters] = useState<ColumnFiltersState>([])
const [columnVisibility, setColumnVisibility] = useState<VisibilityState>({}) const [columnVisibility, setColumnVisibility] = useState<VisibilityState>({})
// Hide ports column if no ports are present
useEffect(() => {
if (data) {
const hasPorts = data.some((container) => container.ports)
setColumnVisibility((prev) => {
if (prev.ports === hasPorts) {
return prev
}
return { ...prev, ports: hasPorts }
})
}
}, [data])
const [rowSelection, setRowSelection] = useState({}) const [rowSelection, setRowSelection] = useState({})
const [globalFilter, setGlobalFilter] = useState("") const [globalFilter, setGlobalFilter] = useState("")
@@ -51,7 +66,7 @@ export default function ContainersTable({ systemId }: { systemId?: string }) {
function fetchData(systemId?: string) { function fetchData(systemId?: string) {
pb.collection<ContainerRecord>("containers") pb.collection<ContainerRecord>("containers")
.getList(0, 2000, { .getList(0, 2000, {
fields: "id,name,image,cpu,memory,net,health,status,system,updated", fields: "id,name,image,ports,cpu,memory,net,health,status,system,updated",
filter: systemId ? pb.filter("system={:system}", { system: systemId }) : undefined, filter: systemId ? pb.filter("system={:system}", { system: systemId }) : undefined,
}) })
.then(({ items }) => { .then(({ items }) => {
@@ -67,7 +82,7 @@ export default function ContainersTable({ systemId }: { systemId?: string }) {
setData((curItems) => { setData((curItems) => {
const lastUpdated = Math.max(items[0].updated, items.at(-1)?.updated ?? 0) const lastUpdated = Math.max(items[0].updated, items.at(-1)?.updated ?? 0)
const containerIds = new Set() const containerIds = new Set()
const newItems = [] const newItems: ContainerRecord[] = []
for (const item of items) { for (const item of items) {
if (Math.abs(lastUpdated - item.updated) < 70_000) { if (Math.abs(lastUpdated - item.updated) < 70_000) {
containerIds.add(item.id) containerIds.add(item.id)
@@ -134,7 +149,8 @@ export default function ContainersTable({ systemId }: { systemId?: string }) {
const status = container.status ?? "" const status = container.status ?? ""
const healthLabel = ContainerHealthLabels[container.health as ContainerHealth] ?? "" const healthLabel = ContainerHealthLabels[container.health as ContainerHealth] ?? ""
const image = container.image ?? "" const image = container.image ?? ""
const searchString = `${systemName} ${id} ${name} ${healthLabel} ${status} ${image}`.toLowerCase() const ports = container.ports ?? ""
const searchString = `${systemName} ${id} ${name} ${healthLabel} ${status} ${image} ${ports}`.toLowerCase()
return (filterValue as string) return (filterValue as string)
.toLowerCase() .toLowerCase()
@@ -300,9 +316,6 @@ function ContainerSheet({
setSheetOpen: (open: boolean) => void setSheetOpen: (open: boolean) => void
activeContainer: RefObject<ContainerRecord | null> activeContainer: RefObject<ContainerRecord | null>
}) { }) {
const container = activeContainer.current
if (!container) return null
const [logsDisplay, setLogsDisplay] = useState<string>("") const [logsDisplay, setLogsDisplay] = useState<string>("")
const [infoDisplay, setInfoDisplay] = useState<string>("") const [infoDisplay, setInfoDisplay] = useState<string>("")
const [logsFullscreenOpen, setLogsFullscreenOpen] = useState<boolean>(false) const [logsFullscreenOpen, setLogsFullscreenOpen] = useState<boolean>(false)
@@ -310,6 +323,8 @@ function ContainerSheet({
const [isRefreshingLogs, setIsRefreshingLogs] = useState<boolean>(false) const [isRefreshingLogs, setIsRefreshingLogs] = useState<boolean>(false)
const logsContainerRef = useRef<HTMLDivElement>(null) const logsContainerRef = useRef<HTMLDivElement>(null)
const container = activeContainer.current
function scrollLogsToBottom() { function scrollLogsToBottom() {
if (logsContainerRef.current) { if (logsContainerRef.current) {
logsContainerRef.current.scrollTo({ top: logsContainerRef.current.scrollHeight }) logsContainerRef.current.scrollTo({ top: logsContainerRef.current.scrollHeight })
@@ -317,6 +332,7 @@ function ContainerSheet({
} }
const refreshLogs = async () => { const refreshLogs = async () => {
if (!container) return
setIsRefreshingLogs(true) setIsRefreshingLogs(true)
const startTime = Date.now() const startTime = Date.now()
@@ -348,6 +364,8 @@ function ContainerSheet({
})() })()
}, [container]) }, [container])
if (!container) return null
return ( return (
<> <>
<LogsFullscreenDialog <LogsFullscreenDialog
@@ -378,8 +396,14 @@ function ContainerSheet({
{container.image} {container.image}
<Separator orientation="vertical" className="h-2.5 bg-muted-foreground opacity-70" /> <Separator orientation="vertical" className="h-2.5 bg-muted-foreground opacity-70" />
{container.id} {container.id}
<Separator orientation="vertical" className="h-2.5 bg-muted-foreground opacity-70" /> {/* {container.ports && (
{ContainerHealthLabels[container.health as ContainerHealth]} <>
<Separator orientation="vertical" className="h-2.5 bg-muted-foreground opacity-70" />
{container.ports}
</>
)} */}
{/* <Separator orientation="vertical" className="h-2.5 bg-muted-foreground opacity-70" />
{ContainerHealthLabels[container.health as ContainerHealth]} */}
</SheetDescription> </SheetDescription>
</SheetHeader> </SheetHeader>
<div className="px-3 pb-3 -mt-4 flex flex-col gap-3 h-full items-start"> <div className="px-3 pb-3 -mt-4 flex flex-col gap-3 h-full items-start">
@@ -438,11 +462,12 @@ function ContainerSheet({
function ContainersTableHead({ table }: { table: TableType<ContainerRecord> }) { function ContainersTableHead({ table }: { table: TableType<ContainerRecord> }) {
return ( return (
<TableHeader className="sticky top-0 z-50 w-full border-b-2"> <TableHeader className="sticky top-0 z-50 w-full border-b-2">
<div className="absolute -top-2 left-0 w-full h-4 bg-table-header z-50"></div>
{table.getHeaderGroups().map((headerGroup) => ( {table.getHeaderGroups().map((headerGroup) => (
<tr key={headerGroup.id}> <tr key={headerGroup.id}>
{headerGroup.headers.map((header) => { {headerGroup.headers.map((header) => {
return ( return (
<TableHead className="px-2" key={header.id}> <TableHead className="px-2" key={header.id} style={{ width: header.getSize() }}>
{header.isPlaceholder ? null : flexRender(header.column.columnDef.header, header.getContext())} {header.isPlaceholder ? null : flexRender(header.column.columnDef.header, header.getContext())}
</TableHead> </TableHead>
) )
@@ -474,6 +499,7 @@ const ContainerTableRow = memo(function ContainerTableRow({
className="py-0 ps-4.5" className="py-0 ps-4.5"
style={{ style={{
height: virtualRow.size, height: virtualRow.size,
width: cell.column.getSize(),
}} }}
> >
{flexRender(cell.column.columnDef.cell, cell.getContext())} {flexRender(cell.column.columnDef.cell, cell.getContext())}

View File

@@ -12,7 +12,7 @@ import Slider from "@/components/ui/slider"
import { HourFormat, Unit } from "@/lib/enums" import { HourFormat, Unit } from "@/lib/enums"
import { dynamicActivate } from "@/lib/i18n" import { dynamicActivate } from "@/lib/i18n"
import languages from "@/lib/languages" import languages from "@/lib/languages"
import { $userSettings } from "@/lib/stores" import { $userSettings, defaultLayoutWidth } from "@/lib/stores"
import { chartTimeData, currentHour12 } from "@/lib/utils" import { chartTimeData, currentHour12 } from "@/lib/utils"
import type { UserSettings } from "@/types" import type { UserSettings } from "@/types"
import { saveSettings } from "./layout" import { saveSettings } from "./layout"
@@ -21,7 +21,7 @@ export default function SettingsProfilePage({ userSettings }: { userSettings: Us
const [isLoading, setIsLoading] = useState(false) const [isLoading, setIsLoading] = useState(false)
const { i18n } = useLingui() const { i18n } = useLingui()
const currentUserSettings = useStore($userSettings) const currentUserSettings = useStore($userSettings)
const layoutWidth = currentUserSettings.layoutWidth ?? 1500 const layoutWidth = currentUserSettings.layoutWidth ?? defaultLayoutWidth
async function handleSubmit(e: React.FormEvent<HTMLFormElement>) { async function handleSubmit(e: React.FormEvent<HTMLFormElement>) {
e.preventDefault() e.preventDefault()

View File

@@ -654,7 +654,7 @@ export default memo(function SystemDetail({ id }: { id: string }) {
)} )}
{/* Load Average chart */} {/* Load Average chart */}
{chartData.agentVersion?.minor >= 12 && ( {chartData.agentVersion?.minor > 12 && (
<ChartCard <ChartCard
empty={dataEmpty} empty={dataEmpty}
grid={grid} grid={grid}

View File

@@ -3,13 +3,16 @@ import {
type ColumnDef, type ColumnDef,
type ColumnFiltersState, type ColumnFiltersState,
type Column, type Column,
type Row,
type SortingState, type SortingState,
type Table as TableType,
flexRender, flexRender,
getCoreRowModel, getCoreRowModel,
getFilteredRowModel, getFilteredRowModel,
getSortedRowModel, getSortedRowModel,
useReactTable, useReactTable,
} from "@tanstack/react-table" } from "@tanstack/react-table"
import { useVirtualizer, type VirtualItem } from "@tanstack/react-virtual"
import { import {
Activity, Activity,
Box, Box,
@@ -40,6 +43,7 @@ import {
toFixedFloat, toFixedFloat,
formatTemperature, formatTemperature,
cn, cn,
getVisualStringWidth,
secondsToString, secondsToString,
hourWithSeconds, hourWithSeconds,
formatShortDate, formatShortDate,
@@ -57,7 +61,7 @@ import {
DropdownMenuSeparator, DropdownMenuSeparator,
DropdownMenuTrigger, DropdownMenuTrigger,
} from "@/components/ui/dropdown-menu" } from "@/components/ui/dropdown-menu"
import { useCallback, useMemo, useEffect, useState } from "react" import { memo, useCallback, useMemo, useEffect, useRef, useState } from "react"
import { Tooltip, TooltipContent, TooltipTrigger } from "@/components/ui/tooltip" import { Tooltip, TooltipContent, TooltipTrigger } from "@/components/ui/tooltip"
// Column definition for S.M.A.R.T. attributes table // Column definition for S.M.A.R.T. attributes table
@@ -101,7 +105,11 @@ function formatCapacity(bytes: number): string {
const SMART_DEVICE_FIELDS = "id,system,name,model,state,capacity,temp,type,hours,cycles,updated" const SMART_DEVICE_FIELDS = "id,system,name,model,state,capacity,temp,type,hours,cycles,updated"
export const columns: ColumnDef<SmartDeviceRecord>[] = [ export const createColumns = (
longestName: number,
longestModel: number,
longestDevice: number
): ColumnDef<SmartDeviceRecord>[] => [
{ {
id: "system", id: "system",
accessorFn: (record) => record.system, accessorFn: (record) => record.system,
@@ -114,7 +122,11 @@ export const columns: ColumnDef<SmartDeviceRecord>[] = [
header: ({ column }) => <HeaderButton column={column} name={t`System`} Icon={ServerIcon} />, header: ({ column }) => <HeaderButton column={column} name={t`System`} Icon={ServerIcon} />,
cell: ({ getValue }) => { cell: ({ getValue }) => {
const allSystems = useStore($allSystemsById) const allSystems = useStore($allSystemsById)
return <span className="ms-1.5 xl:w-30 block truncate">{allSystems[getValue() as string]?.name ?? ""}</span> return (
<div className="ms-1.5 max-w-40 block truncate" style={{ width: `${longestName / 1.05}ch` }}>
{allSystems[getValue() as string]?.name ?? ""}
</div>
)
}, },
}, },
{ {
@@ -122,7 +134,11 @@ export const columns: ColumnDef<SmartDeviceRecord>[] = [
sortingFn: (a, b) => a.original.name.localeCompare(b.original.name), sortingFn: (a, b) => a.original.name.localeCompare(b.original.name),
header: ({ column }) => <HeaderButton column={column} name={t`Device`} Icon={HardDrive} />, header: ({ column }) => <HeaderButton column={column} name={t`Device`} Icon={HardDrive} />,
cell: ({ getValue }) => ( cell: ({ getValue }) => (
<div className="font-medium max-w-40 truncate ms-1.5" title={getValue() as string}> <div
className="font-medium max-w-40 truncate ms-1"
title={getValue() as string}
style={{ width: `${longestDevice / 1.05}ch` }}
>
{getValue() as string} {getValue() as string}
</div> </div>
), ),
@@ -132,7 +148,11 @@ export const columns: ColumnDef<SmartDeviceRecord>[] = [
sortingFn: (a, b) => a.original.model.localeCompare(b.original.model), sortingFn: (a, b) => a.original.model.localeCompare(b.original.model),
header: ({ column }) => <HeaderButton column={column} name={t`Model`} Icon={Box} />, header: ({ column }) => <HeaderButton column={column} name={t`Model`} Icon={Box} />,
cell: ({ getValue }) => ( cell: ({ getValue }) => (
<div className="max-w-48 truncate ms-1.5" title={getValue() as string}> <div
className="max-w-48 truncate ms-1"
title={getValue() as string}
style={{ width: `${longestModel / 1.05}ch` }}
>
{getValue() as string} {getValue() as string}
</div> </div>
), ),
@@ -141,7 +161,7 @@ export const columns: ColumnDef<SmartDeviceRecord>[] = [
accessorKey: "capacity", accessorKey: "capacity",
invertSorting: true, invertSorting: true,
header: ({ column }) => <HeaderButton column={column} name={t`Capacity`} Icon={BinaryIcon} />, header: ({ column }) => <HeaderButton column={column} name={t`Capacity`} Icon={BinaryIcon} />,
cell: ({ getValue }) => <span className="ms-1.5">{formatCapacity(getValue() as number)}</span>, cell: ({ getValue }) => <span className="ms-1">{formatCapacity(getValue() as number)}</span>,
}, },
{ {
accessorKey: "state", accessorKey: "state",
@@ -149,9 +169,9 @@ export const columns: ColumnDef<SmartDeviceRecord>[] = [
cell: ({ getValue }) => { cell: ({ getValue }) => {
const status = getValue() as string const status = getValue() as string
return ( return (
<div className="ms-1.5"> <Badge className="ms-1" variant={status === "PASSED" ? "success" : status === "FAILED" ? "danger" : "warning"}>
<Badge variant={status === "PASSED" ? "success" : status === "FAILED" ? "danger" : "warning"}>{status}</Badge> {status}
</div> </Badge>
) )
}, },
}, },
@@ -160,11 +180,9 @@ export const columns: ColumnDef<SmartDeviceRecord>[] = [
sortingFn: (a, b) => a.original.type.localeCompare(b.original.type), sortingFn: (a, b) => a.original.type.localeCompare(b.original.type),
header: ({ column }) => <HeaderButton column={column} name={t`Type`} Icon={ArrowLeftRightIcon} />, header: ({ column }) => <HeaderButton column={column} name={t`Type`} Icon={ArrowLeftRightIcon} />,
cell: ({ getValue }) => ( cell: ({ getValue }) => (
<div className="ms-1.5"> <Badge variant="outline" className="ms-1 uppercase">
<Badge variant="outline" className="uppercase"> {getValue() as string}
{getValue() as string} </Badge>
</Badge>
</div>
), ),
}, },
{ {
@@ -176,11 +194,11 @@ export const columns: ColumnDef<SmartDeviceRecord>[] = [
cell: ({ getValue }) => { cell: ({ getValue }) => {
const hours = getValue() as number | undefined const hours = getValue() as number | undefined
if (hours == null) { if (hours == null) {
return <div className="text-sm text-muted-foreground ms-1.5">N/A</div> return <div className="text-sm text-muted-foreground ms-1">N/A</div>
} }
const seconds = hours * 3600 const seconds = hours * 3600
return ( return (
<div className="text-sm ms-1.5"> <div className="text-sm ms-1">
<div>{secondsToString(seconds, "hour")}</div> <div>{secondsToString(seconds, "hour")}</div>
<div className="text-muted-foreground text-xs">{secondsToString(seconds, "day")}</div> <div className="text-muted-foreground text-xs">{secondsToString(seconds, "day")}</div>
</div> </div>
@@ -196,9 +214,9 @@ export const columns: ColumnDef<SmartDeviceRecord>[] = [
cell: ({ getValue }) => { cell: ({ getValue }) => {
const cycles = getValue() as number | undefined const cycles = getValue() as number | undefined
if (cycles == null) { if (cycles == null) {
return <div className="text-muted-foreground ms-1.5">N/A</div> return <div className="text-muted-foreground ms-1">N/A</div>
} }
return <span className="ms-1.5">{cycles.toLocaleString()}</span> return <span className="ms-1">{cycles.toLocaleString()}</span>
}, },
}, },
{ {
@@ -208,10 +226,10 @@ export const columns: ColumnDef<SmartDeviceRecord>[] = [
cell: ({ getValue }) => { cell: ({ getValue }) => {
const temp = getValue() as number | null | undefined const temp = getValue() as number | null | undefined
if (!temp) { if (!temp) {
return <div className="text-muted-foreground ms-1.5">N/A</div> return <div className="text-muted-foreground ms-1">N/A</div>
} }
const { value, unit } = formatTemperature(temp) const { value, unit } = formatTemperature(temp)
return <span className="ms-1.5">{`${value} ${unit}`}</span> return <span className="ms-1">{`${value} ${unit}`}</span>
}, },
}, },
// { // {
@@ -236,7 +254,7 @@ export const columns: ColumnDef<SmartDeviceRecord>[] = [
// if today, use hourWithSeconds, otherwise use formatShortDate // if today, use hourWithSeconds, otherwise use formatShortDate
const formatter = const formatter =
new Date(timestamp).toDateString() === new Date().toDateString() ? hourWithSeconds : formatShortDate new Date(timestamp).toDateString() === new Date().toDateString() ? hourWithSeconds : formatShortDate
return <span className="ms-1.5 tabular-nums">{formatter(timestamp)}</span> return <span className="ms-1 tabular-nums">{formatter(timestamp)}</span>
}, },
}, },
] ]
@@ -275,6 +293,36 @@ export default function DisksTable({ systemId }: { systemId?: string }) {
const [sheetOpen, setSheetOpen] = useState(false) const [sheetOpen, setSheetOpen] = useState(false)
const [rowActionState, setRowActionState] = useState<{ type: "refresh" | "delete"; id: string } | null>(null) const [rowActionState, setRowActionState] = useState<{ type: "refresh" | "delete"; id: string } | null>(null)
const [globalFilter, setGlobalFilter] = useState("") const [globalFilter, setGlobalFilter] = useState("")
const allSystems = useStore($allSystemsById)
// duplicate the devices to test with more rows
// if (
// smartDevices?.length &&
// smartDevices.length < 50 &&
// typeof window !== "undefined" &&
// window.location.hostname === "localhost"
// ) {
// setSmartDevices([...smartDevices, ...smartDevices, ...smartDevices])
// }
// Calculate the right width for the columns based on the longest strings among the displayed devices
const { longestName, longestModel, longestDevice } = useMemo(() => {
const result = { longestName: 0, longestModel: 0, longestDevice: 0 }
if (!smartDevices || Object.keys(allSystems).length === 0) {
return result
}
const seenSystems = new Set<string>()
for (const device of smartDevices) {
if (!systemId && !seenSystems.has(device.system)) {
seenSystems.add(device.system)
const name = allSystems[device.system]?.name ?? ""
result.longestName = Math.max(result.longestName, getVisualStringWidth(name))
}
result.longestModel = Math.max(result.longestModel, getVisualStringWidth(device.model ?? ""))
result.longestDevice = Math.max(result.longestDevice, getVisualStringWidth(device.name ?? ""))
}
return result
}, [smartDevices, systemId, allSystems])
const openSheet = (disk: SmartDeviceRecord) => { const openSheet = (disk: SmartDeviceRecord) => {
setActiveDiskId(disk.id) setActiveDiskId(disk.id)
@@ -440,9 +488,10 @@ export default function DisksTable({ systemId }: { systemId?: string }) {
// Filter columns based on whether systemId is provided // Filter columns based on whether systemId is provided
const tableColumns = useMemo(() => { const tableColumns = useMemo(() => {
const columns = createColumns(longestName, longestModel, longestDevice)
const baseColumns = systemId ? columns.filter((col) => col.id !== "system") : columns const baseColumns = systemId ? columns.filter((col) => col.id !== "system") : columns
return [...baseColumns, actionColumn] return [...baseColumns, actionColumn]
}, [systemId, actionColumn]) }, [systemId, actionColumn, longestName, longestModel, longestDevice])
const table = useReactTable({ const table = useReactTable({
data: smartDevices || ([] as SmartDeviceRecord[]), data: smartDevices || ([] as SmartDeviceRecord[]),
@@ -474,6 +523,7 @@ export default function DisksTable({ systemId }: { systemId?: string }) {
.every((term) => searchString.includes(term)) .every((term) => searchString.includes(term))
}, },
}) })
const rows = table.getRowModel().rows
// Hide the table on system pages if there's no data, but always show on global page // Hide the table on system pages if there's no data, but always show on global page
if (systemId && !smartDevices?.length && !columnFilters.length) { if (systemId && !smartDevices?.length && !columnFilters.length) {
@@ -513,57 +563,123 @@ export default function DisksTable({ systemId }: { systemId?: string }) {
</div> </div>
</div> </div>
</CardHeader> </CardHeader>
<div className="rounded-md border text-nowrap"> <SmartDevicesTable
<Table> table={table}
<TableHeader> rows={rows}
{table.getHeaderGroups().map((headerGroup) => ( colLength={tableColumns.length}
<TableRow key={headerGroup.id}> data={smartDevices}
{headerGroup.headers.map((header) => { openSheet={openSheet}
return ( />
<TableHead key={header.id} className="px-2">
{header.isPlaceholder ? null : flexRender(header.column.columnDef.header, header.getContext())}
</TableHead>
)
})}
</TableRow>
))}
</TableHeader>
<TableBody>
{table.getRowModel().rows?.length ? (
table.getRowModel().rows.map((row) => (
<TableRow
key={row.id}
data-state={row.getIsSelected() && "selected"}
className="cursor-pointer"
onClick={() => openSheet(row.original)}
>
{row.getVisibleCells().map((cell) => (
<TableCell key={cell.id} className="md:ps-5">
{flexRender(cell.column.columnDef.cell, cell.getContext())}
</TableCell>
))}
</TableRow>
))
) : (
<TableRow>
<TableCell colSpan={tableColumns.length} className="h-24 text-center">
{smartDevices ? (
t`No results.`
) : (
<LoaderCircleIcon className="animate-spin size-10 opacity-60 mx-auto" />
)}
</TableCell>
</TableRow>
)}
</TableBody>
</Table>
</div>
</Card> </Card>
<DiskSheet diskId={activeDiskId} open={sheetOpen} onOpenChange={setSheetOpen} /> <DiskSheet diskId={activeDiskId} open={sheetOpen} onOpenChange={setSheetOpen} />
</div> </div>
) )
} }
const SmartDevicesTable = memo(function SmartDevicesTable({
table,
rows,
colLength,
data,
openSheet,
}: {
table: TableType<SmartDeviceRecord>
rows: Row<SmartDeviceRecord>[]
colLength: number
data: SmartDeviceRecord[] | undefined
openSheet: (disk: SmartDeviceRecord) => void
}) {
const scrollRef = useRef<HTMLDivElement>(null)
const virtualizer = useVirtualizer<HTMLDivElement, HTMLTableRowElement>({
count: rows.length,
estimateSize: () => 65,
getScrollElement: () => scrollRef.current,
overscan: 5,
})
const virtualRows = virtualizer.getVirtualItems()
const paddingTop = Math.max(0, virtualRows[0]?.start ?? 0 - virtualizer.options.scrollMargin)
const paddingBottom = Math.max(0, virtualizer.getTotalSize() - (virtualRows[virtualRows.length - 1]?.end ?? 0))
return (
<div
className={cn(
"h-min max-h-[calc(100dvh-17rem)] max-w-full relative overflow-auto rounded-md border",
(!rows.length || rows.length > 2) && "min-h-50"
)}
ref={scrollRef}
>
<div style={{ height: `${virtualizer.getTotalSize() + 48}px`, paddingTop, paddingBottom }}>
<table className="w-full text-sm text-nowrap">
<SmartTableHead table={table} />
<TableBody>
{rows.length ? (
virtualRows.map((virtualRow) => {
const row = rows[virtualRow.index]
return <SmartDeviceTableRow key={row.id} row={row} virtualRow={virtualRow} openSheet={openSheet} />
})
) : (
<TableRow>
<TableCell colSpan={colLength} className="h-24 text-center pointer-events-none">
{data ? t`No results.` : <LoaderCircleIcon className="animate-spin size-10 opacity-60 mx-auto" />}
</TableCell>
</TableRow>
)}
</TableBody>
</table>
</div>
</div>
)
})
function SmartTableHead({ table }: { table: TableType<SmartDeviceRecord> }) {
return (
<TableHeader className="sticky top-0 z-50 w-full border-b-2">
<div className="absolute -top-2 left-0 w-full h-4 bg-table-header z-50"></div>
{table.getHeaderGroups().map((headerGroup) => (
<TableRow key={headerGroup.id}>
{headerGroup.headers.map((header) => (
<TableHead key={header.id} className="px-2">
{header.isPlaceholder ? null : flexRender(header.column.columnDef.header, header.getContext())}
</TableHead>
))}
</TableRow>
))}
</TableHeader>
)
}
const SmartDeviceTableRow = memo(function SmartDeviceTableRow({
row,
virtualRow,
openSheet,
}: {
row: Row<SmartDeviceRecord>
virtualRow: VirtualItem
openSheet: (disk: SmartDeviceRecord) => void
}) {
return (
<TableRow
data-state={row.getIsSelected() && "selected"}
className="cursor-pointer"
onClick={() => openSheet(row.original)}
>
{row.getVisibleCells().map((cell) => (
<TableCell
key={cell.id}
className="md:ps-5 py-0"
style={{
height: virtualRow.size,
}}
>
{flexRender(cell.column.columnDef.cell, cell.getContext())}
</TableCell>
))}
</TableRow>
)
})
function DiskSheet({ function DiskSheet({
diskId, diskId,
open, open,

View File

@@ -46,7 +46,6 @@ export default function SystemdTable({ systemId }: { systemId?: string }) {
return setData([]) return setData([])
}, [systemId]) }, [systemId])
useEffect(() => { useEffect(() => {
const lastUpdated = data[0]?.updated ?? 0 const lastUpdated = data[0]?.updated ?? 0
@@ -360,15 +359,9 @@ function SystemdSheet({
return ( return (
<> <>
{hasCurrent ? current : notAvailable} {hasCurrent ? current : notAvailable}
{hasMax && ( {hasMax && <span className="text-muted-foreground ms-1.5">{`(${t`limit`}: ${max})`}</span>}
<span className="text-muted-foreground ms-1.5">
{`(${t`limit`}: ${max})`}
</span>
)}
{max === null && ( {max === null && (
<span className="text-muted-foreground ms-1.5"> <span className="text-muted-foreground ms-1.5">{`(${t`limit`}: ${t`Unlimited`.toLowerCase()})`}</span>
{`(${t`limit`}: ${t`Unlimited`.toLowerCase()})`}
</span>
)} )}
</> </>
) )
@@ -435,7 +428,7 @@ function SystemdSheet({
</tr> </tr>
) )
} }
const capitalize = (str: string) => `${str.charAt(0).toUpperCase()}${str.slice(1).toLowerCase()}` const capitalize = (str: string) => `${str.charAt(0).toUpperCase()}${str.slice(1).toLowerCase()}`
return ( return (
@@ -621,6 +614,7 @@ function SystemdSheet({
function SystemdTableHead({ table }: { table: TableType<SystemdRecord> }) { function SystemdTableHead({ table }: { table: TableType<SystemdRecord> }) {
return ( return (
<TableHeader className="sticky top-0 z-50 w-full border-b-2"> <TableHeader className="sticky top-0 z-50 w-full border-b-2">
<div className="absolute -top-2 left-0 w-full h-4 bg-table-header z-50"></div>
{table.getHeaderGroups().map((headerGroup) => ( {table.getHeaderGroups().map((headerGroup) => (
<tr key={headerGroup.id}> <tr key={headerGroup.id}>
{headerGroup.headers.map((header) => { {headerGroup.headers.map((header) => {

View File

@@ -198,32 +198,19 @@ export function SystemsTableColumns(viewMode: "table" | "grid"): ColumnDef<Syste
}, },
{ {
id: "loadAverage", id: "loadAverage",
accessorFn: ({ info }) => { accessorFn: ({ info }) => info.la?.reduce((acc, curr) => acc + curr, 0),
const sum = info.la?.reduce((acc, curr) => acc + curr, 0)
// TODO: remove this in future release in favor of la array
if (!sum) {
return (info.l1 ?? 0) + (info.l5 ?? 0) + (info.l15 ?? 0) || undefined
}
return sum || undefined
},
name: () => t({ message: "Load Avg", comment: "Short label for load average" }), name: () => t({ message: "Load Avg", comment: "Short label for load average" }),
size: 0, size: 0,
Icon: HourglassIcon, Icon: HourglassIcon,
header: sortableHeader, header: sortableHeader,
cell(info: CellContext<SystemRecord, unknown>) { cell(info: CellContext<SystemRecord, unknown>) {
const { info: sysInfo, status } = info.row.original const { info: sysInfo, status } = info.row.original
const { major, minor } = parseSemVer(sysInfo.v)
const { colorWarn = 65, colorCrit = 90 } = useStore($userSettings, { keys: ["colorWarn", "colorCrit"] }) const { colorWarn = 65, colorCrit = 90 } = useStore($userSettings, { keys: ["colorWarn", "colorCrit"] })
// agent version const loadAverages = sysInfo.la || []
const { minor, patch } = parseSemVer(sysInfo.v)
let loadAverages = sysInfo.la
// use legacy load averages if agent version is less than 12.1.0
if (!loadAverages || (minor === 12 && patch < 1)) {
loadAverages = [sysInfo.l1 ?? 0, sysInfo.l5 ?? 0, sysInfo.l15 ?? 0]
}
const max = Math.max(...loadAverages) const max = Math.max(...loadAverages)
if (max === 0 && (status === SystemStatus.Paused || minor < 12)) { if (max === 0 && (status === SystemStatus.Paused || (major < 1 && minor < 13))) {
return null return null
} }
@@ -248,19 +235,20 @@ export function SystemsTableColumns(viewMode: "table" | "grid"): ColumnDef<Syste
}, },
}, },
{ {
accessorFn: ({ info }) => info.bb || (info.b || 0) * 1024 * 1024 || undefined, accessorFn: ({ info, status }) => (status !== SystemStatus.Up ? undefined : info.bb),
id: "net", id: "net",
name: () => t`Net`, name: () => t`Net`,
size: 0, size: 0,
Icon: EthernetIcon, Icon: EthernetIcon,
header: sortableHeader, header: sortableHeader,
sortUndefined: "last",
cell(info) { cell(info) {
const sys = info.row.original const val = info.getValue() as number | undefined
const userSettings = useStore($userSettings, { keys: ["unitNet"] }) if (val === undefined) {
if (sys.status === SystemStatus.Paused) {
return null return null
} }
const { value, unit } = formatBytes((info.getValue() || 0) as number, true, userSettings.unitNet, false) const userSettings = useStore($userSettings, { keys: ["unitNet"] })
const { value, unit } = formatBytes(val, true, userSettings.unitNet, false)
return ( return (
<span className="tabular-nums whitespace-nowrap"> <span className="tabular-nums whitespace-nowrap">
{decimalString(value, value >= 100 ? 1 : 2)} {unit} {decimalString(value, value >= 100 ? 1 : 2)} {unit}

View File

@@ -391,6 +391,7 @@ function SystemsTableHead({ table }: { table: TableType<SystemRecord> }) {
const { t } = useLingui() const { t } = useLingui()
return ( return (
<TableHeader className="sticky top-0 z-50 w-full border-b-2"> <TableHeader className="sticky top-0 z-50 w-full border-b-2">
<div className="absolute -top-2 left-0 w-full h-4 bg-table-header z-50"></div>
{table.getHeaderGroups().map((headerGroup) => ( {table.getHeaderGroups().map((headerGroup) => (
<tr key={headerGroup.id}> <tr key={headerGroup.id}>
{headerGroup.headers.map((header) => { {headerGroup.headers.map((header) => {

View File

@@ -185,3 +185,14 @@ export function PlugChargingIcon(props: SVGProps<SVGSVGElement>) {
</svg> </svg>
) )
} }
// Lucide Icons (ISC) - used for ports
export function SquareArrowRightEnterIcon(props: SVGProps<SVGSVGElement>) {
return (
<svg viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth="2" strokeLinecap="round" {...props}>
<path d="m10 16 4-4-4-4" />
<path d="M3 12h11" />
<path d="M3 8V5a2 2 0 0 1 2-2h14a2 2 0 0 1 2 2v14a2 2 0 0 1-2 2H5a2 2 0 0 1-2-2v-3" />
</svg>
)
}

View File

@@ -3,6 +3,9 @@ import type { AlertMap, ChartTimes, SystemRecord, UserSettings } from "@/types"
import { pb } from "./api" import { pb } from "./api"
import { Unit } from "./enums" import { Unit } from "./enums"
/** Default layout width. Used as fallback when user setting is unset. */
export const defaultLayoutWidth = 1580
/** Store if user is authenticated */ /** Store if user is authenticated */
export const $authenticated = atom(pb.authStore.isValid) export const $authenticated = atom(pb.authStore.isValid)

View File

@@ -14,7 +14,14 @@ import { Toaster } from "@/components/ui/toaster.tsx"
import { alertManager } from "@/lib/alerts" import { alertManager } from "@/lib/alerts"
import { pb, updateUserSettings } from "@/lib/api.ts" import { pb, updateUserSettings } from "@/lib/api.ts"
import { dynamicActivate, getLocale } from "@/lib/i18n" import { dynamicActivate, getLocale } from "@/lib/i18n"
import { $authenticated, $copyContent, $direction, $publicKey, $userSettings } from "@/lib/stores.ts" import {
$authenticated,
$copyContent,
$direction,
$publicKey,
$userSettings,
defaultLayoutWidth,
} from "@/lib/stores.ts"
import * as systemsManager from "@/lib/systemsManager.ts" import * as systemsManager from "@/lib/systemsManager.ts"
const LoginPage = lazy(() => import("@/components/login/login.tsx")) const LoginPage = lazy(() => import("@/components/login/login.tsx"))
@@ -100,7 +107,7 @@ const Layout = () => {
<LoginPage /> <LoginPage />
</Suspense> </Suspense>
) : ( ) : (
<div style={{ "--container": `${userSettings.layoutWidth ?? 1580}px` } as React.CSSProperties}> <div style={{ "--container": `${userSettings.layoutWidth ?? defaultLayoutWidth}px` } as React.CSSProperties}>
<div className="container"> <div className="container">
<Navbar /> <Navbar />
</div> </div>

View File

@@ -45,12 +45,6 @@ export interface SystemInfo {
c: number c: number
/** cpu model */ /** cpu model */
m: string m: string
/** load average 1 minute */
l1?: number
/** load average 5 minutes */
l5?: number
/** load average 15 minutes */
l15?: number
/** load average */ /** load average */
la?: [number, number, number] la?: [number, number, number]
/** operating system */ /** operating system */
@@ -94,13 +88,6 @@ export interface SystemStats {
cpub?: number[] cpub?: number[]
/** per-core cpu usage [CPU0..] (0-100 integers) */ /** per-core cpu usage [CPU0..] (0-100 integers) */
cpus?: number[] cpus?: number[]
// TODO: remove these in future release in favor of la
/** load average 1 minute */
l1?: number
/** load average 5 minutes */
l5?: number
/** load average 15 minutes */
l15?: number
/** load average */ /** load average */
la?: [number, number, number] la?: [number, number, number]
/** total memory (gb) */ /** total memory (gb) */
@@ -267,6 +254,7 @@ export interface ContainerRecord extends RecordModel {
system: string system: string
name: string name: string
image: string image: string
ports: string
cpu: number cpu: number
memory: number memory: number
net: number net: number

View File

@@ -98,7 +98,7 @@ func ClearCollection(t testing.TB, app core.App, collectionName string) error {
} }
func (h *TestHub) Cleanup() { func (h *TestHub) Cleanup() {
h.GetAlertManager().StopWorker() h.GetAlertManager().Stop()
h.GetSystemManager().RemoveAllSystems() h.GetSystemManager().RemoveAllSystems()
h.TestApp.Cleanup() h.TestApp.Cleanup()
} }