mirror of
https://github.com/henrygd/beszel.git
synced 2026-03-22 21:46:18 +01:00
Compare commits
39 Commits
v0.18.4
...
e4e0affbc1
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
e4e0affbc1 | ||
|
|
c3a0e645ee | ||
|
|
c6c3950fb0 | ||
|
|
48ddc96a0d | ||
|
|
704cb86de8 | ||
|
|
2854ce882f | ||
|
|
ed50367f70 | ||
|
|
4ebe869591 | ||
|
|
c9bbbe91f2 | ||
|
|
5bfe4f6970 | ||
|
|
380d2b1091 | ||
|
|
a7f99e7a8c | ||
|
|
bd94a9d142 | ||
|
|
8e2316f845 | ||
|
|
0d3dfcb207 | ||
|
|
b386ce5190 | ||
|
|
e527534016 | ||
|
|
ec7ad632a9 | ||
|
|
963fce5a33 | ||
|
|
d38c0da06d | ||
|
|
cae6ac4626 | ||
|
|
6b1ff264f2 | ||
|
|
35d0e792ad | ||
|
|
654cd06b19 | ||
|
|
5e1b028130 | ||
|
|
638e7dc12a | ||
|
|
73c262455d | ||
|
|
0c4d2edd45 | ||
|
|
8f23fff1c9 | ||
|
|
02c1a0c13d | ||
|
|
69fdcb36ab | ||
|
|
b91eb6de40 | ||
|
|
ec69f6c6e0 | ||
|
|
a86cb91e07 | ||
|
|
004841717a | ||
|
|
096296ba7b | ||
|
|
b012df5669 | ||
|
|
12545b4b6d | ||
|
|
9e2296452b |
@@ -6,7 +6,6 @@ package agent
|
||||
|
||||
import (
|
||||
"log/slog"
|
||||
"os"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
@@ -14,6 +13,7 @@ import (
|
||||
"github.com/gliderlabs/ssh"
|
||||
"github.com/henrygd/beszel"
|
||||
"github.com/henrygd/beszel/agent/deltatracker"
|
||||
"github.com/henrygd/beszel/agent/utils"
|
||||
"github.com/henrygd/beszel/internal/common"
|
||||
"github.com/henrygd/beszel/internal/entities/system"
|
||||
gossh "golang.org/x/crypto/ssh"
|
||||
@@ -68,11 +68,11 @@ func NewAgent(dataDir ...string) (agent *Agent, err error) {
|
||||
slog.Info("Data directory", "path", agent.dataDir)
|
||||
}
|
||||
|
||||
agent.memCalc, _ = GetEnv("MEM_CALC")
|
||||
agent.memCalc, _ = utils.GetEnv("MEM_CALC")
|
||||
agent.sensorConfig = agent.newSensorConfig()
|
||||
|
||||
// Parse disk usage cache duration (e.g., "15m", "1h") to avoid waking sleeping disks
|
||||
if diskUsageCache, exists := GetEnv("DISK_USAGE_CACHE"); exists {
|
||||
if diskUsageCache, exists := utils.GetEnv("DISK_USAGE_CACHE"); exists {
|
||||
if duration, err := time.ParseDuration(diskUsageCache); err == nil {
|
||||
agent.diskUsageCacheDuration = duration
|
||||
slog.Info("DISK_USAGE_CACHE", "duration", duration)
|
||||
@@ -82,7 +82,7 @@ func NewAgent(dataDir ...string) (agent *Agent, err error) {
|
||||
}
|
||||
|
||||
// Set up slog with a log level determined by the LOG_LEVEL env var
|
||||
if logLevelStr, exists := GetEnv("LOG_LEVEL"); exists {
|
||||
if logLevelStr, exists := utils.GetEnv("LOG_LEVEL"); exists {
|
||||
switch strings.ToLower(logLevelStr) {
|
||||
case "debug":
|
||||
agent.debug = true
|
||||
@@ -103,7 +103,7 @@ func NewAgent(dataDir ...string) (agent *Agent, err error) {
|
||||
agent.refreshSystemDetails()
|
||||
|
||||
// SMART_INTERVAL env var to update smart data at this interval
|
||||
if smartIntervalEnv, exists := GetEnv("SMART_INTERVAL"); exists {
|
||||
if smartIntervalEnv, exists := utils.GetEnv("SMART_INTERVAL"); exists {
|
||||
if duration, err := time.ParseDuration(smartIntervalEnv); err == nil && duration > 0 {
|
||||
agent.systemDetails.SmartInterval = duration
|
||||
slog.Info("SMART_INTERVAL", "duration", duration)
|
||||
@@ -148,15 +148,6 @@ func NewAgent(dataDir ...string) (agent *Agent, err error) {
|
||||
return agent, nil
|
||||
}
|
||||
|
||||
// GetEnv retrieves an environment variable with a "BESZEL_AGENT_" prefix, or falls back to the unprefixed key.
|
||||
func GetEnv(key string) (value string, exists bool) {
|
||||
if value, exists = os.LookupEnv("BESZEL_AGENT_" + key); exists {
|
||||
return value, exists
|
||||
}
|
||||
// Fallback to the old unprefixed key
|
||||
return os.LookupEnv(key)
|
||||
}
|
||||
|
||||
func (a *Agent) gatherStats(options common.DataRequestOptions) *system.CombinedData {
|
||||
a.Lock()
|
||||
defer a.Unlock()
|
||||
@@ -213,7 +204,7 @@ func (a *Agent) gatherStats(options common.DataRequestOptions) *system.CombinedD
|
||||
data.Stats.ExtraFs[key] = stats
|
||||
// Add percentages to Info struct for dashboard
|
||||
if stats.DiskTotal > 0 {
|
||||
pct := twoDecimals((stats.DiskUsed / stats.DiskTotal) * 100)
|
||||
pct := utils.TwoDecimals((stats.DiskUsed / stats.DiskTotal) * 100)
|
||||
data.Info.ExtraFsPct[key] = pct
|
||||
}
|
||||
}
|
||||
|
||||
@@ -14,6 +14,7 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/henrygd/beszel"
|
||||
"github.com/henrygd/beszel/agent/utils"
|
||||
"github.com/henrygd/beszel/internal/common"
|
||||
|
||||
"github.com/fxamacker/cbor/v2"
|
||||
@@ -43,7 +44,7 @@ type WebSocketClient struct {
|
||||
// newWebSocketClient creates a new WebSocket client for the given agent.
|
||||
// It reads configuration from environment variables and validates the hub URL.
|
||||
func newWebSocketClient(agent *Agent) (client *WebSocketClient, err error) {
|
||||
hubURLStr, exists := GetEnv("HUB_URL")
|
||||
hubURLStr, exists := utils.GetEnv("HUB_URL")
|
||||
if !exists {
|
||||
return nil, errors.New("HUB_URL environment variable not set")
|
||||
}
|
||||
@@ -72,12 +73,12 @@ func newWebSocketClient(agent *Agent) (client *WebSocketClient, err error) {
|
||||
// If neither is set, it returns an error.
|
||||
func getToken() (string, error) {
|
||||
// get token from env var
|
||||
token, _ := GetEnv("TOKEN")
|
||||
token, _ := utils.GetEnv("TOKEN")
|
||||
if token != "" {
|
||||
return token, nil
|
||||
}
|
||||
// get token from file
|
||||
tokenFile, _ := GetEnv("TOKEN_FILE")
|
||||
tokenFile, _ := utils.GetEnv("TOKEN_FILE")
|
||||
if tokenFile == "" {
|
||||
return "", errors.New("must set TOKEN or TOKEN_FILE")
|
||||
}
|
||||
@@ -197,7 +198,7 @@ func (client *WebSocketClient) handleAuthChallenge(msg *common.HubRequest[cbor.R
|
||||
}
|
||||
|
||||
if authRequest.NeedSysInfo {
|
||||
response.Name, _ = GetEnv("SYSTEM_NAME")
|
||||
response.Name, _ = utils.GetEnv("SYSTEM_NAME")
|
||||
response.Hostname = client.agent.systemDetails.Hostname
|
||||
serverAddr := client.agent.connectionManager.serverOptions.Addr
|
||||
_, response.Port, _ = net.SplitHostPort(serverAddr)
|
||||
|
||||
@@ -14,10 +14,10 @@ var lastPerCoreCpuTimes = make(map[uint16][]cpu.TimesStat)
|
||||
// init initializes the CPU monitoring by storing the initial CPU times
|
||||
// for the default 60-second cache interval.
|
||||
func init() {
|
||||
if times, err := cpu.Times(false); err == nil {
|
||||
if times, err := cpu.Times(false); err == nil && len(times) > 0 {
|
||||
lastCpuTimes[60000] = times[0]
|
||||
}
|
||||
if perCoreTimes, err := cpu.Times(true); err == nil {
|
||||
if perCoreTimes, err := cpu.Times(true); err == nil && len(perCoreTimes) > 0 {
|
||||
lastPerCoreCpuTimes[60000] = perCoreTimes
|
||||
}
|
||||
}
|
||||
|
||||
@@ -6,6 +6,8 @@ import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
"runtime"
|
||||
|
||||
"github.com/henrygd/beszel/agent/utils"
|
||||
)
|
||||
|
||||
// GetDataDir returns the path to the data directory for the agent and an error
|
||||
@@ -16,7 +18,7 @@ func GetDataDir(dataDirs ...string) (string, error) {
|
||||
return testDataDirs(dataDirs)
|
||||
}
|
||||
|
||||
dataDir, _ := GetEnv("DATA_DIR")
|
||||
dataDir, _ := utils.GetEnv("DATA_DIR")
|
||||
if dataDir != "" {
|
||||
dataDirs = append(dataDirs, dataDir)
|
||||
}
|
||||
|
||||
554
agent/disk.go
554
agent/disk.go
@@ -8,11 +8,31 @@ import (
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/henrygd/beszel/agent/utils"
|
||||
"github.com/henrygd/beszel/internal/entities/system"
|
||||
|
||||
"github.com/shirou/gopsutil/v4/disk"
|
||||
)
|
||||
|
||||
// fsRegistrationContext holds the shared lookup state needed to resolve a
|
||||
// filesystem into the tracked fsStats key and metadata.
|
||||
type fsRegistrationContext struct {
|
||||
filesystem string // value of optional FILESYSTEM env var
|
||||
isWindows bool
|
||||
efPath string // path to extra filesystems (default "/extra-filesystems")
|
||||
diskIoCounters map[string]disk.IOCountersStat
|
||||
}
|
||||
|
||||
// diskDiscovery groups the transient state for a single initializeDiskInfo run so
|
||||
// helper methods can share the same partitions, mount paths, and lookup functions
|
||||
type diskDiscovery struct {
|
||||
agent *Agent
|
||||
rootMountPoint string
|
||||
partitions []disk.PartitionStat
|
||||
usageFn func(string) (*disk.UsageStat, error)
|
||||
ctx fsRegistrationContext
|
||||
}
|
||||
|
||||
// parseFilesystemEntry parses a filesystem entry in the format "device__customname"
|
||||
// Returns the device/filesystem part and the custom name part
|
||||
func parseFilesystemEntry(entry string) (device, customName string) {
|
||||
@@ -26,19 +46,230 @@ func parseFilesystemEntry(entry string) (device, customName string) {
|
||||
return device, customName
|
||||
}
|
||||
|
||||
// extraFilesystemPartitionInfo derives the I/O device and optional display name
|
||||
// for a mounted /extra-filesystems partition. Prefer the partition device reported
|
||||
// by the system and only use the folder name for custom naming metadata.
|
||||
func extraFilesystemPartitionInfo(p disk.PartitionStat) (device, customName string) {
|
||||
device = strings.TrimSpace(p.Device)
|
||||
folderDevice, customName := parseFilesystemEntry(filepath.Base(p.Mountpoint))
|
||||
if device == "" {
|
||||
device = folderDevice
|
||||
}
|
||||
return device, customName
|
||||
}
|
||||
|
||||
func isDockerSpecialMountpoint(mountpoint string) bool {
|
||||
switch mountpoint {
|
||||
case "/etc/hosts", "/etc/resolv.conf", "/etc/hostname":
|
||||
return true
|
||||
default:
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// registerFilesystemStats resolves the tracked key and stats payload for a
|
||||
// filesystem before it is inserted into fsStats.
|
||||
func registerFilesystemStats(existing map[string]*system.FsStats, device, mountpoint string, root bool, customName string, ctx fsRegistrationContext) (string, *system.FsStats, bool) {
|
||||
key := device
|
||||
if !ctx.isWindows {
|
||||
key = filepath.Base(device)
|
||||
}
|
||||
|
||||
if root {
|
||||
// Try to map root device to a diskIoCounters entry. First checks for an
|
||||
// exact key match, then uses findIoDevice for normalized / prefix-based
|
||||
// matching (e.g. nda0p2 -> nda0), and finally falls back to FILESYSTEM.
|
||||
if _, ioMatch := ctx.diskIoCounters[key]; !ioMatch {
|
||||
if matchedKey, match := findIoDevice(key, ctx.diskIoCounters); match {
|
||||
key = matchedKey
|
||||
} else if ctx.filesystem != "" {
|
||||
if matchedKey, match := findIoDevice(ctx.filesystem, ctx.diskIoCounters); match {
|
||||
key = matchedKey
|
||||
}
|
||||
}
|
||||
if _, ioMatch = ctx.diskIoCounters[key]; !ioMatch {
|
||||
slog.Warn("Root I/O unmapped; set FILESYSTEM", "device", device, "mountpoint", mountpoint)
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// Check if non-root has diskstats and prefer the folder device for
|
||||
// /extra-filesystems mounts when the discovered partition device is a
|
||||
// mapper path (e.g. luks UUID) that obscures the underlying block device.
|
||||
if _, ioMatch := ctx.diskIoCounters[key]; !ioMatch {
|
||||
if strings.HasPrefix(mountpoint, ctx.efPath) {
|
||||
folderDevice, _ := parseFilesystemEntry(filepath.Base(mountpoint))
|
||||
if folderDevice != "" {
|
||||
if matchedKey, match := findIoDevice(folderDevice, ctx.diskIoCounters); match {
|
||||
key = matchedKey
|
||||
}
|
||||
}
|
||||
}
|
||||
if _, ioMatch = ctx.diskIoCounters[key]; !ioMatch {
|
||||
if matchedKey, match := findIoDevice(key, ctx.diskIoCounters); match {
|
||||
key = matchedKey
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if _, exists := existing[key]; exists {
|
||||
return "", nil, false
|
||||
}
|
||||
|
||||
fsStats := &system.FsStats{Root: root, Mountpoint: mountpoint}
|
||||
if customName != "" {
|
||||
fsStats.Name = customName
|
||||
}
|
||||
return key, fsStats, true
|
||||
}
|
||||
|
||||
// addFsStat inserts a discovered filesystem if it resolves to a new tracking
|
||||
// key. The key selection itself lives in buildFsStatRegistration so that logic
|
||||
// can stay directly unit-tested.
|
||||
func (d *diskDiscovery) addFsStat(device, mountpoint string, root bool, customName string) {
|
||||
key, fsStats, ok := registerFilesystemStats(d.agent.fsStats, device, mountpoint, root, customName, d.ctx)
|
||||
if !ok {
|
||||
return
|
||||
}
|
||||
d.agent.fsStats[key] = fsStats
|
||||
name := key
|
||||
if customName != "" {
|
||||
name = customName
|
||||
}
|
||||
slog.Info("Detected disk", "name", name, "device", device, "mount", mountpoint, "io", key, "root", root)
|
||||
}
|
||||
|
||||
// addConfiguredRootFs resolves FILESYSTEM against partitions first, then falls
|
||||
// back to direct diskstats matching for setups like ZFS where partitions do not
|
||||
// expose the physical device name.
|
||||
func (d *diskDiscovery) addConfiguredRootFs() bool {
|
||||
if d.ctx.filesystem == "" {
|
||||
return false
|
||||
}
|
||||
|
||||
for _, p := range d.partitions {
|
||||
if filesystemMatchesPartitionSetting(d.ctx.filesystem, p) {
|
||||
d.addFsStat(p.Device, p.Mountpoint, true, "")
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
// FILESYSTEM may name a physical disk absent from partitions (e.g. ZFS lists
|
||||
// dataset paths like zroot/ROOT/default, not block devices).
|
||||
if ioKey, match := findIoDevice(d.ctx.filesystem, d.ctx.diskIoCounters); match {
|
||||
d.agent.fsStats[ioKey] = &system.FsStats{Root: true, Mountpoint: d.rootMountPoint}
|
||||
return true
|
||||
}
|
||||
|
||||
slog.Warn("Partition details not found", "filesystem", d.ctx.filesystem)
|
||||
return false
|
||||
}
|
||||
|
||||
func isRootFallbackPartition(p disk.PartitionStat, rootMountPoint string) bool {
|
||||
return p.Mountpoint == rootMountPoint ||
|
||||
(isDockerSpecialMountpoint(p.Mountpoint) && strings.HasPrefix(p.Device, "/dev"))
|
||||
}
|
||||
|
||||
// addPartitionRootFs handles the non-configured root fallback path when a
|
||||
// partition looks like the active root mount but still needs translating to an
|
||||
// I/O device key.
|
||||
func (d *diskDiscovery) addPartitionRootFs(device, mountpoint string) bool {
|
||||
fs, match := findIoDevice(filepath.Base(device), d.ctx.diskIoCounters)
|
||||
if !match {
|
||||
return false
|
||||
}
|
||||
// The resolved I/O device is already known here, so use it directly to avoid
|
||||
// a second fallback search inside buildFsStatRegistration.
|
||||
d.addFsStat(fs, mountpoint, true, "")
|
||||
return true
|
||||
}
|
||||
|
||||
// addLastResortRootFs is only used when neither FILESYSTEM nor partition-based
|
||||
// heuristics can identify root, so it picks the busiest I/O device as a final
|
||||
// fallback and preserves the root mountpoint for usage collection.
|
||||
func (d *diskDiscovery) addLastResortRootFs() {
|
||||
rootKey := mostActiveIoDevice(d.ctx.diskIoCounters)
|
||||
if rootKey != "" {
|
||||
slog.Warn("Using most active device for root I/O; set FILESYSTEM to override", "device", rootKey)
|
||||
} else {
|
||||
rootKey = filepath.Base(d.rootMountPoint)
|
||||
if _, exists := d.agent.fsStats[rootKey]; exists {
|
||||
rootKey = "root"
|
||||
}
|
||||
slog.Warn("Root I/O device not detected; set FILESYSTEM to override")
|
||||
}
|
||||
d.agent.fsStats[rootKey] = &system.FsStats{Root: true, Mountpoint: d.rootMountPoint}
|
||||
}
|
||||
|
||||
// findPartitionByFilesystemSetting matches an EXTRA_FILESYSTEMS entry against a
|
||||
// discovered partition either by mountpoint or by device suffix.
|
||||
func findPartitionByFilesystemSetting(filesystem string, partitions []disk.PartitionStat) (disk.PartitionStat, bool) {
|
||||
for _, p := range partitions {
|
||||
if strings.HasSuffix(p.Device, filesystem) || p.Mountpoint == filesystem {
|
||||
return p, true
|
||||
}
|
||||
}
|
||||
return disk.PartitionStat{}, false
|
||||
}
|
||||
|
||||
// addConfiguredExtraFsEntry resolves one EXTRA_FILESYSTEMS entry, preferring a
|
||||
// discovered partition and falling back to any path that disk.Usage accepts.
|
||||
func (d *diskDiscovery) addConfiguredExtraFsEntry(filesystem, customName string) {
|
||||
if p, found := findPartitionByFilesystemSetting(filesystem, d.partitions); found {
|
||||
d.addFsStat(p.Device, p.Mountpoint, false, customName)
|
||||
return
|
||||
}
|
||||
|
||||
if _, err := d.usageFn(filesystem); err == nil {
|
||||
d.addFsStat(filepath.Base(filesystem), filesystem, false, customName)
|
||||
return
|
||||
} else {
|
||||
slog.Error("Invalid filesystem", "name", filesystem, "err", err)
|
||||
}
|
||||
}
|
||||
|
||||
// addConfiguredExtraFilesystems parses and registers the comma-separated
|
||||
// EXTRA_FILESYSTEMS env var entries.
|
||||
func (d *diskDiscovery) addConfiguredExtraFilesystems(extraFilesystems string) {
|
||||
for fsEntry := range strings.SplitSeq(extraFilesystems, ",") {
|
||||
filesystem, customName := parseFilesystemEntry(fsEntry)
|
||||
d.addConfiguredExtraFsEntry(filesystem, customName)
|
||||
}
|
||||
}
|
||||
|
||||
// addPartitionExtraFs registers partitions mounted under /extra-filesystems so
|
||||
// their display names can come from the folder name while their I/O keys still
|
||||
// prefer the underlying partition device.
|
||||
func (d *diskDiscovery) addPartitionExtraFs(p disk.PartitionStat) {
|
||||
if !strings.HasPrefix(p.Mountpoint, d.ctx.efPath) {
|
||||
return
|
||||
}
|
||||
device, customName := extraFilesystemPartitionInfo(p)
|
||||
d.addFsStat(device, p.Mountpoint, false, customName)
|
||||
}
|
||||
|
||||
// addExtraFilesystemFolders handles bare directories under /extra-filesystems
|
||||
// that may not appear in partition discovery, while skipping mountpoints that
|
||||
// were already registered from higher-fidelity sources.
|
||||
func (d *diskDiscovery) addExtraFilesystemFolders(folderNames []string) {
|
||||
existingMountpoints := make(map[string]bool, len(d.agent.fsStats))
|
||||
for _, stats := range d.agent.fsStats {
|
||||
existingMountpoints[stats.Mountpoint] = true
|
||||
}
|
||||
|
||||
for _, folderName := range folderNames {
|
||||
mountpoint := filepath.Join(d.ctx.efPath, folderName)
|
||||
slog.Debug("/extra-filesystems", "mountpoint", mountpoint)
|
||||
if existingMountpoints[mountpoint] {
|
||||
continue
|
||||
}
|
||||
device, customName := parseFilesystemEntry(folderName)
|
||||
d.addFsStat(device, mountpoint, false, customName)
|
||||
}
|
||||
}
|
||||
|
||||
// Sets up the filesystems to monitor for disk usage and I/O.
|
||||
func (a *Agent) initializeDiskInfo() {
|
||||
filesystem, _ := GetEnv("FILESYSTEM")
|
||||
efPath := "/extra-filesystems"
|
||||
filesystem, _ := utils.GetEnv("FILESYSTEM")
|
||||
hasRoot := false
|
||||
isWindows := runtime.GOOS == "windows"
|
||||
|
||||
@@ -55,164 +286,223 @@ func (a *Agent) initializeDiskInfo() {
|
||||
}
|
||||
}
|
||||
|
||||
// ioContext := context.WithValue(a.sensorsContext,
|
||||
// common.EnvKey, common.EnvMap{common.HostProcEnvKey: "/tmp/testproc"},
|
||||
// )
|
||||
// diskIoCounters, err := disk.IOCountersWithContext(ioContext)
|
||||
|
||||
diskIoCounters, err := disk.IOCounters()
|
||||
if err != nil {
|
||||
slog.Error("Error getting diskstats", "err", err)
|
||||
}
|
||||
slog.Debug("Disk I/O", "diskstats", diskIoCounters)
|
||||
|
||||
// Helper function to add a filesystem to fsStats if it doesn't exist
|
||||
addFsStat := func(device, mountpoint string, root bool, customName ...string) {
|
||||
var key string
|
||||
if isWindows {
|
||||
key = device
|
||||
} else {
|
||||
key = filepath.Base(device)
|
||||
}
|
||||
var ioMatch bool
|
||||
if _, exists := a.fsStats[key]; !exists {
|
||||
if root {
|
||||
slog.Info("Detected root device", "name", key)
|
||||
// Check if root device is in /proc/diskstats. Do not guess a
|
||||
// fallback device for root: that can misattribute root I/O to a
|
||||
// different disk while usage remains tied to root mountpoint.
|
||||
if _, ioMatch = diskIoCounters[key]; !ioMatch {
|
||||
if matchedKey, match := findIoDevice(filesystem, diskIoCounters); match {
|
||||
key = matchedKey
|
||||
ioMatch = true
|
||||
} else {
|
||||
slog.Warn("Root I/O unmapped; set FILESYSTEM", "device", device, "mountpoint", mountpoint)
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// Check if non-root has diskstats and fall back to folder name if not
|
||||
// Scenario: device is encrypted and named luks-2bcb02be-999d-4417-8d18-5c61e660fb6e - not in /proc/diskstats.
|
||||
// However, the device can be specified by mounting folder from luks device at /extra-filesystems/sda1
|
||||
if _, ioMatch = diskIoCounters[key]; !ioMatch {
|
||||
efBase := filepath.Base(mountpoint)
|
||||
if _, ioMatch = diskIoCounters[efBase]; ioMatch {
|
||||
key = efBase
|
||||
}
|
||||
}
|
||||
}
|
||||
fsStats := &system.FsStats{Root: root, Mountpoint: mountpoint}
|
||||
if len(customName) > 0 && customName[0] != "" {
|
||||
fsStats.Name = customName[0]
|
||||
}
|
||||
a.fsStats[key] = fsStats
|
||||
}
|
||||
ctx := fsRegistrationContext{
|
||||
filesystem: filesystem,
|
||||
isWindows: isWindows,
|
||||
diskIoCounters: diskIoCounters,
|
||||
efPath: "/extra-filesystems",
|
||||
}
|
||||
|
||||
// Get the appropriate root mount point for this system
|
||||
rootMountPoint := a.getRootMountPoint()
|
||||
|
||||
// Use FILESYSTEM env var to find root filesystem
|
||||
if filesystem != "" {
|
||||
for _, p := range partitions {
|
||||
if strings.HasSuffix(p.Device, filesystem) || p.Mountpoint == filesystem {
|
||||
addFsStat(p.Device, p.Mountpoint, true)
|
||||
hasRoot = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if !hasRoot {
|
||||
slog.Warn("Partition details not found", "filesystem", filesystem)
|
||||
}
|
||||
discovery := diskDiscovery{
|
||||
agent: a,
|
||||
rootMountPoint: a.getRootMountPoint(),
|
||||
partitions: partitions,
|
||||
usageFn: disk.Usage,
|
||||
ctx: ctx,
|
||||
}
|
||||
|
||||
// Add EXTRA_FILESYSTEMS env var values to fsStats
|
||||
if extraFilesystems, exists := GetEnv("EXTRA_FILESYSTEMS"); exists {
|
||||
for fsEntry := range strings.SplitSeq(extraFilesystems, ",") {
|
||||
// Parse custom name from format: device__customname
|
||||
fs, customName := parseFilesystemEntry(fsEntry)
|
||||
hasRoot = discovery.addConfiguredRootFs()
|
||||
|
||||
found := false
|
||||
for _, p := range partitions {
|
||||
if strings.HasSuffix(p.Device, fs) || p.Mountpoint == fs {
|
||||
addFsStat(p.Device, p.Mountpoint, false, customName)
|
||||
found = true
|
||||
break
|
||||
}
|
||||
}
|
||||
// if not in partitions, test if we can get disk usage
|
||||
if !found {
|
||||
if _, err := disk.Usage(fs); err == nil {
|
||||
addFsStat(filepath.Base(fs), fs, false, customName)
|
||||
} else {
|
||||
slog.Error("Invalid filesystem", "name", fs, "err", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
// Add EXTRA_FILESYSTEMS env var values to fsStats
|
||||
if extraFilesystems, exists := utils.GetEnv("EXTRA_FILESYSTEMS"); exists {
|
||||
discovery.addConfiguredExtraFilesystems(extraFilesystems)
|
||||
}
|
||||
|
||||
// Process partitions for various mount points
|
||||
for _, p := range partitions {
|
||||
// fmt.Println(p.Device, p.Mountpoint)
|
||||
// Binary root fallback or docker root fallback
|
||||
if !hasRoot && (p.Mountpoint == rootMountPoint || (isDockerSpecialMountpoint(p.Mountpoint) && strings.HasPrefix(p.Device, "/dev"))) {
|
||||
fs, match := findIoDevice(filepath.Base(p.Device), diskIoCounters)
|
||||
if match {
|
||||
addFsStat(fs, p.Mountpoint, true)
|
||||
hasRoot = true
|
||||
}
|
||||
}
|
||||
|
||||
// Check if device is in /extra-filesystems
|
||||
if strings.HasPrefix(p.Mountpoint, efPath) {
|
||||
device, customName := parseFilesystemEntry(p.Mountpoint)
|
||||
addFsStat(device, p.Mountpoint, false, customName)
|
||||
if !hasRoot && isRootFallbackPartition(p, discovery.rootMountPoint) {
|
||||
hasRoot = discovery.addPartitionRootFs(p.Device, p.Mountpoint)
|
||||
}
|
||||
discovery.addPartitionExtraFs(p)
|
||||
}
|
||||
|
||||
// Check all folders in /extra-filesystems and add them if not already present
|
||||
if folders, err := os.ReadDir(efPath); err == nil {
|
||||
existingMountpoints := make(map[string]bool)
|
||||
for _, stats := range a.fsStats {
|
||||
existingMountpoints[stats.Mountpoint] = true
|
||||
}
|
||||
if folders, err := os.ReadDir(discovery.ctx.efPath); err == nil {
|
||||
folderNames := make([]string, 0, len(folders))
|
||||
for _, folder := range folders {
|
||||
if folder.IsDir() {
|
||||
mountpoint := filepath.Join(efPath, folder.Name())
|
||||
slog.Debug("/extra-filesystems", "mountpoint", mountpoint)
|
||||
if !existingMountpoints[mountpoint] {
|
||||
device, customName := parseFilesystemEntry(folder.Name())
|
||||
addFsStat(device, mountpoint, false, customName)
|
||||
}
|
||||
folderNames = append(folderNames, folder.Name())
|
||||
}
|
||||
}
|
||||
discovery.addExtraFilesystemFolders(folderNames)
|
||||
}
|
||||
|
||||
// If no root filesystem set, use fallback
|
||||
// If no root filesystem set, try the most active I/O device as a last
|
||||
// resort (e.g. ZFS where dataset names are unrelated to disk names).
|
||||
if !hasRoot {
|
||||
rootKey := filepath.Base(rootMountPoint)
|
||||
if _, exists := a.fsStats[rootKey]; exists {
|
||||
rootKey = "root"
|
||||
}
|
||||
slog.Warn("Root device not detected; root I/O disabled", "mountpoint", rootMountPoint)
|
||||
a.fsStats[rootKey] = &system.FsStats{Root: true, Mountpoint: rootMountPoint}
|
||||
discovery.addLastResortRootFs()
|
||||
}
|
||||
|
||||
a.pruneDuplicateRootExtraFilesystems()
|
||||
a.initializeDiskIoStats(diskIoCounters)
|
||||
}
|
||||
|
||||
// Returns matching device from /proc/diskstats.
|
||||
// bool is true if a match was found.
|
||||
func findIoDevice(filesystem string, diskIoCounters map[string]disk.IOCountersStat) (string, bool) {
|
||||
for _, d := range diskIoCounters {
|
||||
if d.Name == filesystem || (d.Label != "" && d.Label == filesystem) {
|
||||
return d.Name, true
|
||||
// Removes extra filesystems that mirror root usage (https://github.com/henrygd/beszel/issues/1428).
|
||||
func (a *Agent) pruneDuplicateRootExtraFilesystems() {
|
||||
var rootMountpoint string
|
||||
for _, stats := range a.fsStats {
|
||||
if stats != nil && stats.Root {
|
||||
rootMountpoint = stats.Mountpoint
|
||||
break
|
||||
}
|
||||
}
|
||||
return "", false
|
||||
if rootMountpoint == "" {
|
||||
return
|
||||
}
|
||||
rootUsage, err := disk.Usage(rootMountpoint)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
for name, stats := range a.fsStats {
|
||||
if stats == nil || stats.Root {
|
||||
continue
|
||||
}
|
||||
extraUsage, err := disk.Usage(stats.Mountpoint)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
if hasSameDiskUsage(rootUsage, extraUsage) {
|
||||
slog.Info("Ignoring duplicate FS", "name", name, "mount", stats.Mountpoint)
|
||||
delete(a.fsStats, name)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// hasSameDiskUsage compares root/extra usage with a small byte tolerance.
|
||||
func hasSameDiskUsage(a, b *disk.UsageStat) bool {
|
||||
if a == nil || b == nil || a.Total == 0 || b.Total == 0 {
|
||||
return false
|
||||
}
|
||||
// Allow minor drift between sequential disk usage calls.
|
||||
const toleranceBytes uint64 = 16 * 1024 * 1024
|
||||
return withinUsageTolerance(a.Total, b.Total, toleranceBytes) &&
|
||||
withinUsageTolerance(a.Used, b.Used, toleranceBytes)
|
||||
}
|
||||
|
||||
// withinUsageTolerance reports whether two byte values differ by at most tolerance.
|
||||
func withinUsageTolerance(a, b, tolerance uint64) bool {
|
||||
if a >= b {
|
||||
return a-b <= tolerance
|
||||
}
|
||||
return b-a <= tolerance
|
||||
}
|
||||
|
||||
type ioMatchCandidate struct {
|
||||
name string
|
||||
bytes uint64
|
||||
ops uint64
|
||||
}
|
||||
|
||||
// findIoDevice prefers exact device/label matches, then falls back to a
|
||||
// prefix-related candidate with the highest recent activity.
|
||||
func findIoDevice(filesystem string, diskIoCounters map[string]disk.IOCountersStat) (string, bool) {
|
||||
filesystem = normalizeDeviceName(filesystem)
|
||||
if filesystem == "" {
|
||||
return "", false
|
||||
}
|
||||
|
||||
candidates := []ioMatchCandidate{}
|
||||
|
||||
for _, d := range diskIoCounters {
|
||||
if normalizeDeviceName(d.Name) == filesystem || (d.Label != "" && normalizeDeviceName(d.Label) == filesystem) {
|
||||
return d.Name, true
|
||||
}
|
||||
if prefixRelated(normalizeDeviceName(d.Name), filesystem) ||
|
||||
(d.Label != "" && prefixRelated(normalizeDeviceName(d.Label), filesystem)) {
|
||||
candidates = append(candidates, ioMatchCandidate{
|
||||
name: d.Name,
|
||||
bytes: d.ReadBytes + d.WriteBytes,
|
||||
ops: d.ReadCount + d.WriteCount,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
if len(candidates) == 0 {
|
||||
return "", false
|
||||
}
|
||||
|
||||
best := candidates[0]
|
||||
for _, c := range candidates[1:] {
|
||||
if c.bytes > best.bytes ||
|
||||
(c.bytes == best.bytes && c.ops > best.ops) ||
|
||||
(c.bytes == best.bytes && c.ops == best.ops && c.name < best.name) {
|
||||
best = c
|
||||
}
|
||||
}
|
||||
|
||||
slog.Info("Using disk I/O fallback", "requested", filesystem, "selected", best.name)
|
||||
return best.name, true
|
||||
}
|
||||
|
||||
// mostActiveIoDevice returns the device with the highest I/O activity,
|
||||
// or "" if diskIoCounters is empty.
|
||||
func mostActiveIoDevice(diskIoCounters map[string]disk.IOCountersStat) string {
|
||||
var best ioMatchCandidate
|
||||
for _, d := range diskIoCounters {
|
||||
c := ioMatchCandidate{
|
||||
name: d.Name,
|
||||
bytes: d.ReadBytes + d.WriteBytes,
|
||||
ops: d.ReadCount + d.WriteCount,
|
||||
}
|
||||
if best.name == "" || c.bytes > best.bytes ||
|
||||
(c.bytes == best.bytes && c.ops > best.ops) ||
|
||||
(c.bytes == best.bytes && c.ops == best.ops && c.name < best.name) {
|
||||
best = c
|
||||
}
|
||||
}
|
||||
return best.name
|
||||
}
|
||||
|
||||
// prefixRelated reports whether either identifier is a prefix of the other.
|
||||
func prefixRelated(a, b string) bool {
|
||||
if a == "" || b == "" || a == b {
|
||||
return false
|
||||
}
|
||||
return strings.HasPrefix(a, b) || strings.HasPrefix(b, a)
|
||||
}
|
||||
|
||||
// filesystemMatchesPartitionSetting checks whether a FILESYSTEM env var value
|
||||
// matches a partition by mountpoint, exact device name, or prefix relationship
|
||||
// (e.g. FILESYSTEM=ada0 matches partition /dev/ada0p2).
|
||||
func filesystemMatchesPartitionSetting(filesystem string, p disk.PartitionStat) bool {
|
||||
filesystem = strings.TrimSpace(filesystem)
|
||||
if filesystem == "" {
|
||||
return false
|
||||
}
|
||||
if p.Mountpoint == filesystem {
|
||||
return true
|
||||
}
|
||||
|
||||
fsName := normalizeDeviceName(filesystem)
|
||||
partName := normalizeDeviceName(p.Device)
|
||||
if fsName == "" || partName == "" {
|
||||
return false
|
||||
}
|
||||
if fsName == partName {
|
||||
return true
|
||||
}
|
||||
return prefixRelated(partName, fsName)
|
||||
}
|
||||
|
||||
// normalizeDeviceName canonicalizes device strings for comparisons.
|
||||
func normalizeDeviceName(value string) string {
|
||||
name := filepath.Base(strings.TrimSpace(value))
|
||||
if name == "." {
|
||||
return ""
|
||||
}
|
||||
return name
|
||||
}
|
||||
|
||||
// Sets start values for disk I/O stats.
|
||||
func (a *Agent) initializeDiskIoStats(diskIoCounters map[string]disk.IOCountersStat) {
|
||||
a.fsNames = a.fsNames[:0]
|
||||
now := time.Now()
|
||||
for device, stats := range a.fsStats {
|
||||
// skip if not in diskIoCounters
|
||||
d, exists := diskIoCounters[device]
|
||||
@@ -221,7 +511,7 @@ func (a *Agent) initializeDiskIoStats(diskIoCounters map[string]disk.IOCountersS
|
||||
continue
|
||||
}
|
||||
// populate initial values
|
||||
stats.Time = time.Now()
|
||||
stats.Time = now
|
||||
stats.TotalRead = d.ReadBytes
|
||||
stats.TotalWrite = d.WriteBytes
|
||||
// add to list of valid io device names
|
||||
@@ -245,12 +535,12 @@ func (a *Agent) updateDiskUsage(systemStats *system.Stats) {
|
||||
continue
|
||||
}
|
||||
if d, err := disk.Usage(stats.Mountpoint); err == nil {
|
||||
stats.DiskTotal = bytesToGigabytes(d.Total)
|
||||
stats.DiskUsed = bytesToGigabytes(d.Used)
|
||||
stats.DiskTotal = utils.BytesToGigabytes(d.Total)
|
||||
stats.DiskUsed = utils.BytesToGigabytes(d.Used)
|
||||
if stats.Root {
|
||||
systemStats.DiskTotal = bytesToGigabytes(d.Total)
|
||||
systemStats.DiskUsed = bytesToGigabytes(d.Used)
|
||||
systemStats.DiskPct = twoDecimals(d.UsedPercent)
|
||||
systemStats.DiskTotal = utils.BytesToGigabytes(d.Total)
|
||||
systemStats.DiskUsed = utils.BytesToGigabytes(d.Used)
|
||||
systemStats.DiskPct = utils.TwoDecimals(d.UsedPercent)
|
||||
}
|
||||
} else {
|
||||
// reset stats if error (likely unmounted)
|
||||
@@ -303,8 +593,8 @@ func (a *Agent) updateDiskIo(cacheTimeMs uint16, systemStats *system.Stats) {
|
||||
|
||||
diskIORead := (d.ReadBytes - prev.readBytes) * 1000 / msElapsed
|
||||
diskIOWrite := (d.WriteBytes - prev.writeBytes) * 1000 / msElapsed
|
||||
readMbPerSecond := bytesToMegabytes(float64(diskIORead))
|
||||
writeMbPerSecond := bytesToMegabytes(float64(diskIOWrite))
|
||||
readMbPerSecond := utils.BytesToMegabytes(float64(diskIORead))
|
||||
writeMbPerSecond := utils.BytesToMegabytes(float64(diskIOWrite))
|
||||
|
||||
// validate values
|
||||
if readMbPerSecond > 50_000 || writeMbPerSecond > 50_000 {
|
||||
|
||||
@@ -93,6 +93,443 @@ func TestParseFilesystemEntry(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestExtraFilesystemPartitionInfo(t *testing.T) {
|
||||
t.Run("uses partition device for label-only mountpoint", func(t *testing.T) {
|
||||
device, customName := extraFilesystemPartitionInfo(disk.PartitionStat{
|
||||
Device: "/dev/sdc",
|
||||
Mountpoint: "/extra-filesystems/Share",
|
||||
})
|
||||
|
||||
assert.Equal(t, "/dev/sdc", device)
|
||||
assert.Equal(t, "", customName)
|
||||
})
|
||||
|
||||
t.Run("uses custom name from mountpoint suffix", func(t *testing.T) {
|
||||
device, customName := extraFilesystemPartitionInfo(disk.PartitionStat{
|
||||
Device: "/dev/sdc",
|
||||
Mountpoint: "/extra-filesystems/sdc__Share",
|
||||
})
|
||||
|
||||
assert.Equal(t, "/dev/sdc", device)
|
||||
assert.Equal(t, "Share", customName)
|
||||
})
|
||||
|
||||
t.Run("falls back to folder device when partition device is unavailable", func(t *testing.T) {
|
||||
device, customName := extraFilesystemPartitionInfo(disk.PartitionStat{
|
||||
Mountpoint: "/extra-filesystems/sdc__Share",
|
||||
})
|
||||
|
||||
assert.Equal(t, "sdc", device)
|
||||
assert.Equal(t, "Share", customName)
|
||||
})
|
||||
|
||||
t.Run("supports custom name without folder device prefix", func(t *testing.T) {
|
||||
device, customName := extraFilesystemPartitionInfo(disk.PartitionStat{
|
||||
Device: "/dev/sdc",
|
||||
Mountpoint: "/extra-filesystems/__Share",
|
||||
})
|
||||
|
||||
assert.Equal(t, "/dev/sdc", device)
|
||||
assert.Equal(t, "Share", customName)
|
||||
})
|
||||
}
|
||||
|
||||
func TestBuildFsStatRegistration(t *testing.T) {
|
||||
t.Run("uses basename for non-windows exact io match", func(t *testing.T) {
|
||||
key, stats, ok := registerFilesystemStats(
|
||||
map[string]*system.FsStats{},
|
||||
"/dev/sda1",
|
||||
"/mnt/data",
|
||||
false,
|
||||
"archive",
|
||||
fsRegistrationContext{
|
||||
isWindows: false,
|
||||
diskIoCounters: map[string]disk.IOCountersStat{
|
||||
"sda1": {Name: "sda1"},
|
||||
},
|
||||
},
|
||||
)
|
||||
|
||||
assert.True(t, ok)
|
||||
assert.Equal(t, "sda1", key)
|
||||
assert.Equal(t, "/mnt/data", stats.Mountpoint)
|
||||
assert.Equal(t, "archive", stats.Name)
|
||||
assert.False(t, stats.Root)
|
||||
})
|
||||
|
||||
t.Run("maps root partition to io device by prefix", func(t *testing.T) {
|
||||
key, stats, ok := registerFilesystemStats(
|
||||
map[string]*system.FsStats{},
|
||||
"/dev/ada0p2",
|
||||
"/",
|
||||
true,
|
||||
"",
|
||||
fsRegistrationContext{
|
||||
isWindows: false,
|
||||
diskIoCounters: map[string]disk.IOCountersStat{
|
||||
"ada0": {Name: "ada0", ReadBytes: 1000, WriteBytes: 1000},
|
||||
},
|
||||
},
|
||||
)
|
||||
|
||||
assert.True(t, ok)
|
||||
assert.Equal(t, "ada0", key)
|
||||
assert.True(t, stats.Root)
|
||||
assert.Equal(t, "/", stats.Mountpoint)
|
||||
})
|
||||
|
||||
t.Run("uses filesystem setting as root fallback", func(t *testing.T) {
|
||||
key, _, ok := registerFilesystemStats(
|
||||
map[string]*system.FsStats{},
|
||||
"overlay",
|
||||
"/",
|
||||
true,
|
||||
"",
|
||||
fsRegistrationContext{
|
||||
filesystem: "nvme0n1p2",
|
||||
isWindows: false,
|
||||
diskIoCounters: map[string]disk.IOCountersStat{
|
||||
"nvme0n1": {Name: "nvme0n1", ReadBytes: 1000, WriteBytes: 1000},
|
||||
},
|
||||
},
|
||||
)
|
||||
|
||||
assert.True(t, ok)
|
||||
assert.Equal(t, "nvme0n1", key)
|
||||
})
|
||||
|
||||
t.Run("prefers parsed extra-filesystems device over mapper device", func(t *testing.T) {
|
||||
key, stats, ok := registerFilesystemStats(
|
||||
map[string]*system.FsStats{},
|
||||
"/dev/mapper/luks-2bcb02be-999d-4417-8d18-5c61e660fb6e",
|
||||
"/extra-filesystems/nvme0n1p2__Archive",
|
||||
false,
|
||||
"Archive",
|
||||
fsRegistrationContext{
|
||||
isWindows: false,
|
||||
diskIoCounters: map[string]disk.IOCountersStat{
|
||||
"dm-1": {Name: "dm-1", Label: "luks-2bcb02be-999d-4417-8d18-5c61e660fb6e"},
|
||||
"nvme0n1p2": {Name: "nvme0n1p2"},
|
||||
},
|
||||
},
|
||||
)
|
||||
|
||||
assert.True(t, ok)
|
||||
assert.Equal(t, "nvme0n1p2", key)
|
||||
assert.Equal(t, "Archive", stats.Name)
|
||||
})
|
||||
|
||||
t.Run("falls back to mapper io device when folder device cannot be resolved", func(t *testing.T) {
|
||||
key, stats, ok := registerFilesystemStats(
|
||||
map[string]*system.FsStats{},
|
||||
"/dev/mapper/luks-2bcb02be-999d-4417-8d18-5c61e660fb6e",
|
||||
"/extra-filesystems/Archive",
|
||||
false,
|
||||
"Archive",
|
||||
fsRegistrationContext{
|
||||
isWindows: false,
|
||||
diskIoCounters: map[string]disk.IOCountersStat{
|
||||
"dm-1": {Name: "dm-1", Label: "luks-2bcb02be-999d-4417-8d18-5c61e660fb6e"},
|
||||
},
|
||||
},
|
||||
)
|
||||
|
||||
assert.True(t, ok)
|
||||
assert.Equal(t, "dm-1", key)
|
||||
assert.Equal(t, "Archive", stats.Name)
|
||||
})
|
||||
|
||||
t.Run("uses full device name on windows", func(t *testing.T) {
|
||||
key, _, ok := registerFilesystemStats(
|
||||
map[string]*system.FsStats{},
|
||||
`C:`,
|
||||
`C:\\`,
|
||||
false,
|
||||
"",
|
||||
fsRegistrationContext{
|
||||
isWindows: true,
|
||||
diskIoCounters: map[string]disk.IOCountersStat{
|
||||
`C:`: {Name: `C:`},
|
||||
},
|
||||
},
|
||||
)
|
||||
|
||||
assert.True(t, ok)
|
||||
assert.Equal(t, `C:`, key)
|
||||
})
|
||||
|
||||
t.Run("skips existing key", func(t *testing.T) {
|
||||
key, stats, ok := registerFilesystemStats(
|
||||
map[string]*system.FsStats{"sda1": {Mountpoint: "/existing"}},
|
||||
"/dev/sda1",
|
||||
"/mnt/data",
|
||||
false,
|
||||
"",
|
||||
fsRegistrationContext{
|
||||
isWindows: false,
|
||||
diskIoCounters: map[string]disk.IOCountersStat{
|
||||
"sda1": {Name: "sda1"},
|
||||
},
|
||||
},
|
||||
)
|
||||
|
||||
assert.False(t, ok)
|
||||
assert.Empty(t, key)
|
||||
assert.Nil(t, stats)
|
||||
})
|
||||
}
|
||||
|
||||
func TestAddConfiguredRootFs(t *testing.T) {
|
||||
t.Run("adds root from matching partition", func(t *testing.T) {
|
||||
agent := &Agent{fsStats: make(map[string]*system.FsStats)}
|
||||
discovery := diskDiscovery{
|
||||
agent: agent,
|
||||
rootMountPoint: "/",
|
||||
partitions: []disk.PartitionStat{{Device: "/dev/ada0p2", Mountpoint: "/"}},
|
||||
ctx: fsRegistrationContext{
|
||||
filesystem: "/dev/ada0p2",
|
||||
isWindows: false,
|
||||
diskIoCounters: map[string]disk.IOCountersStat{
|
||||
"ada0": {Name: "ada0", ReadBytes: 1000, WriteBytes: 1000},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
ok := discovery.addConfiguredRootFs()
|
||||
|
||||
assert.True(t, ok)
|
||||
stats, exists := agent.fsStats["ada0"]
|
||||
assert.True(t, exists)
|
||||
assert.True(t, stats.Root)
|
||||
assert.Equal(t, "/", stats.Mountpoint)
|
||||
})
|
||||
|
||||
t.Run("adds root from io device when partition is missing", func(t *testing.T) {
|
||||
agent := &Agent{fsStats: make(map[string]*system.FsStats)}
|
||||
discovery := diskDiscovery{
|
||||
agent: agent,
|
||||
rootMountPoint: "/sysroot",
|
||||
ctx: fsRegistrationContext{
|
||||
filesystem: "zroot",
|
||||
isWindows: false,
|
||||
diskIoCounters: map[string]disk.IOCountersStat{
|
||||
"nda0": {Name: "nda0", Label: "zroot", ReadBytes: 1000, WriteBytes: 1000},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
ok := discovery.addConfiguredRootFs()
|
||||
|
||||
assert.True(t, ok)
|
||||
stats, exists := agent.fsStats["nda0"]
|
||||
assert.True(t, exists)
|
||||
assert.True(t, stats.Root)
|
||||
assert.Equal(t, "/sysroot", stats.Mountpoint)
|
||||
})
|
||||
|
||||
t.Run("returns false when filesystem cannot be resolved", func(t *testing.T) {
|
||||
agent := &Agent{fsStats: make(map[string]*system.FsStats)}
|
||||
discovery := diskDiscovery{
|
||||
agent: agent,
|
||||
rootMountPoint: "/",
|
||||
ctx: fsRegistrationContext{
|
||||
filesystem: "missing-disk",
|
||||
isWindows: false,
|
||||
diskIoCounters: map[string]disk.IOCountersStat{},
|
||||
},
|
||||
}
|
||||
|
||||
ok := discovery.addConfiguredRootFs()
|
||||
|
||||
assert.False(t, ok)
|
||||
assert.Empty(t, agent.fsStats)
|
||||
})
|
||||
}
|
||||
|
||||
func TestAddPartitionRootFs(t *testing.T) {
|
||||
t.Run("adds root from fallback partition candidate", func(t *testing.T) {
|
||||
agent := &Agent{fsStats: make(map[string]*system.FsStats)}
|
||||
discovery := diskDiscovery{
|
||||
agent: agent,
|
||||
ctx: fsRegistrationContext{
|
||||
isWindows: false,
|
||||
diskIoCounters: map[string]disk.IOCountersStat{
|
||||
"nvme0n1": {Name: "nvme0n1", ReadBytes: 1000, WriteBytes: 1000},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
ok := discovery.addPartitionRootFs("/dev/nvme0n1p2", "/")
|
||||
|
||||
assert.True(t, ok)
|
||||
stats, exists := agent.fsStats["nvme0n1"]
|
||||
assert.True(t, exists)
|
||||
assert.True(t, stats.Root)
|
||||
assert.Equal(t, "/", stats.Mountpoint)
|
||||
})
|
||||
|
||||
t.Run("returns false when no io device matches", func(t *testing.T) {
|
||||
agent := &Agent{fsStats: make(map[string]*system.FsStats)}
|
||||
discovery := diskDiscovery{agent: agent, ctx: fsRegistrationContext{diskIoCounters: map[string]disk.IOCountersStat{}}}
|
||||
|
||||
ok := discovery.addPartitionRootFs("/dev/mapper/root", "/")
|
||||
|
||||
assert.False(t, ok)
|
||||
assert.Empty(t, agent.fsStats)
|
||||
})
|
||||
}
|
||||
|
||||
func TestAddLastResortRootFs(t *testing.T) {
|
||||
t.Run("uses most active io device when available", func(t *testing.T) {
|
||||
agent := &Agent{fsStats: make(map[string]*system.FsStats)}
|
||||
discovery := diskDiscovery{agent: agent, rootMountPoint: "/", ctx: fsRegistrationContext{diskIoCounters: map[string]disk.IOCountersStat{
|
||||
"sda": {Name: "sda", ReadBytes: 5000, WriteBytes: 5000},
|
||||
"sdb": {Name: "sdb", ReadBytes: 1000, WriteBytes: 1000},
|
||||
}}}
|
||||
|
||||
discovery.addLastResortRootFs()
|
||||
|
||||
stats, exists := agent.fsStats["sda"]
|
||||
assert.True(t, exists)
|
||||
assert.True(t, stats.Root)
|
||||
})
|
||||
|
||||
t.Run("falls back to root key when mountpoint basename collides", func(t *testing.T) {
|
||||
agent := &Agent{fsStats: map[string]*system.FsStats{
|
||||
"sysroot": {Mountpoint: "/extra-filesystems/sysroot"},
|
||||
}}
|
||||
discovery := diskDiscovery{agent: agent, rootMountPoint: "/sysroot", ctx: fsRegistrationContext{diskIoCounters: map[string]disk.IOCountersStat{}}}
|
||||
|
||||
discovery.addLastResortRootFs()
|
||||
|
||||
stats, exists := agent.fsStats["root"]
|
||||
assert.True(t, exists)
|
||||
assert.True(t, stats.Root)
|
||||
assert.Equal(t, "/sysroot", stats.Mountpoint)
|
||||
})
|
||||
}
|
||||
|
||||
func TestAddConfiguredExtraFsEntry(t *testing.T) {
|
||||
t.Run("uses matching partition when present", func(t *testing.T) {
|
||||
agent := &Agent{fsStats: make(map[string]*system.FsStats)}
|
||||
discovery := diskDiscovery{
|
||||
agent: agent,
|
||||
partitions: []disk.PartitionStat{{Device: "/dev/sdb1", Mountpoint: "/mnt/backup"}},
|
||||
usageFn: func(string) (*disk.UsageStat, error) {
|
||||
t.Fatal("usage fallback should not be called when partition matches")
|
||||
return nil, nil
|
||||
},
|
||||
ctx: fsRegistrationContext{
|
||||
isWindows: false,
|
||||
diskIoCounters: map[string]disk.IOCountersStat{
|
||||
"sdb1": {Name: "sdb1"},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
discovery.addConfiguredExtraFsEntry("sdb1", "backup")
|
||||
|
||||
stats, exists := agent.fsStats["sdb1"]
|
||||
assert.True(t, exists)
|
||||
assert.Equal(t, "/mnt/backup", stats.Mountpoint)
|
||||
assert.Equal(t, "backup", stats.Name)
|
||||
})
|
||||
|
||||
t.Run("falls back to usage-validated path", func(t *testing.T) {
|
||||
agent := &Agent{fsStats: make(map[string]*system.FsStats)}
|
||||
discovery := diskDiscovery{
|
||||
agent: agent,
|
||||
usageFn: func(path string) (*disk.UsageStat, error) {
|
||||
assert.Equal(t, "/srv/archive", path)
|
||||
return &disk.UsageStat{}, nil
|
||||
},
|
||||
ctx: fsRegistrationContext{
|
||||
isWindows: false,
|
||||
diskIoCounters: map[string]disk.IOCountersStat{
|
||||
"archive": {Name: "archive"},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
discovery.addConfiguredExtraFsEntry("/srv/archive", "archive")
|
||||
|
||||
stats, exists := agent.fsStats["archive"]
|
||||
assert.True(t, exists)
|
||||
assert.Equal(t, "/srv/archive", stats.Mountpoint)
|
||||
assert.Equal(t, "archive", stats.Name)
|
||||
})
|
||||
|
||||
t.Run("ignores invalid filesystem entry", func(t *testing.T) {
|
||||
agent := &Agent{fsStats: make(map[string]*system.FsStats)}
|
||||
discovery := diskDiscovery{
|
||||
agent: agent,
|
||||
usageFn: func(string) (*disk.UsageStat, error) {
|
||||
return nil, os.ErrNotExist
|
||||
},
|
||||
}
|
||||
|
||||
discovery.addConfiguredExtraFsEntry("/missing/archive", "")
|
||||
|
||||
assert.Empty(t, agent.fsStats)
|
||||
})
|
||||
}
|
||||
|
||||
func TestAddConfiguredExtraFilesystems(t *testing.T) {
|
||||
t.Run("parses and registers multiple configured filesystems", func(t *testing.T) {
|
||||
agent := &Agent{fsStats: make(map[string]*system.FsStats)}
|
||||
discovery := diskDiscovery{
|
||||
agent: agent,
|
||||
partitions: []disk.PartitionStat{{Device: "/dev/sda1", Mountpoint: "/mnt/fast"}},
|
||||
usageFn: func(path string) (*disk.UsageStat, error) {
|
||||
if path == "/srv/archive" {
|
||||
return &disk.UsageStat{}, nil
|
||||
}
|
||||
return nil, os.ErrNotExist
|
||||
},
|
||||
ctx: fsRegistrationContext{
|
||||
isWindows: false,
|
||||
diskIoCounters: map[string]disk.IOCountersStat{
|
||||
"sda1": {Name: "sda1"},
|
||||
"archive": {Name: "archive"},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
discovery.addConfiguredExtraFilesystems("sda1__fast,/srv/archive__cold")
|
||||
|
||||
assert.Contains(t, agent.fsStats, "sda1")
|
||||
assert.Equal(t, "fast", agent.fsStats["sda1"].Name)
|
||||
assert.Contains(t, agent.fsStats, "archive")
|
||||
assert.Equal(t, "cold", agent.fsStats["archive"].Name)
|
||||
})
|
||||
}
|
||||
|
||||
func TestAddExtraFilesystemFolders(t *testing.T) {
|
||||
t.Run("adds missing folders and skips existing mountpoints", func(t *testing.T) {
|
||||
agent := &Agent{fsStats: map[string]*system.FsStats{
|
||||
"existing": {Mountpoint: "/extra-filesystems/existing"},
|
||||
}}
|
||||
discovery := diskDiscovery{
|
||||
agent: agent,
|
||||
ctx: fsRegistrationContext{
|
||||
isWindows: false,
|
||||
efPath: "/extra-filesystems",
|
||||
diskIoCounters: map[string]disk.IOCountersStat{
|
||||
"newdisk": {Name: "newdisk"},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
discovery.addExtraFilesystemFolders([]string{"existing", "newdisk__Archive"})
|
||||
|
||||
assert.Len(t, agent.fsStats, 2)
|
||||
stats, exists := agent.fsStats["newdisk"]
|
||||
assert.True(t, exists)
|
||||
assert.Equal(t, "/extra-filesystems/newdisk__Archive", stats.Mountpoint)
|
||||
assert.Equal(t, "Archive", stats.Name)
|
||||
})
|
||||
}
|
||||
|
||||
func TestFindIoDevice(t *testing.T) {
|
||||
t.Run("matches by device name", func(t *testing.T) {
|
||||
ioCounters := map[string]disk.IOCountersStat{
|
||||
@@ -116,7 +553,7 @@ func TestFindIoDevice(t *testing.T) {
|
||||
assert.Equal(t, "sda", device)
|
||||
})
|
||||
|
||||
t.Run("returns no fallback when not found", func(t *testing.T) {
|
||||
t.Run("returns no match when not found", func(t *testing.T) {
|
||||
ioCounters := map[string]disk.IOCountersStat{
|
||||
"sda": {Name: "sda"},
|
||||
"sdb": {Name: "sdb"},
|
||||
@@ -126,6 +563,106 @@ func TestFindIoDevice(t *testing.T) {
|
||||
assert.False(t, ok)
|
||||
assert.Equal(t, "", device)
|
||||
})
|
||||
|
||||
t.Run("uses uncertain unique prefix fallback", func(t *testing.T) {
|
||||
ioCounters := map[string]disk.IOCountersStat{
|
||||
"nvme0n1": {Name: "nvme0n1"},
|
||||
"sda": {Name: "sda"},
|
||||
}
|
||||
|
||||
device, ok := findIoDevice("nvme0n1p2", ioCounters)
|
||||
assert.True(t, ok)
|
||||
assert.Equal(t, "nvme0n1", device)
|
||||
})
|
||||
|
||||
t.Run("uses dominant activity when prefix matches are ambiguous", func(t *testing.T) {
|
||||
ioCounters := map[string]disk.IOCountersStat{
|
||||
"sda": {Name: "sda", ReadBytes: 5000, WriteBytes: 5000, ReadCount: 100, WriteCount: 100},
|
||||
"sdb": {Name: "sdb", ReadBytes: 1000, WriteBytes: 1000, ReadCount: 50, WriteCount: 50},
|
||||
}
|
||||
|
||||
device, ok := findIoDevice("sd", ioCounters)
|
||||
assert.True(t, ok)
|
||||
assert.Equal(t, "sda", device)
|
||||
})
|
||||
|
||||
t.Run("uses highest activity when ambiguous without dominance", func(t *testing.T) {
|
||||
ioCounters := map[string]disk.IOCountersStat{
|
||||
"sda": {Name: "sda", ReadBytes: 3000, WriteBytes: 3000, ReadCount: 50, WriteCount: 50},
|
||||
"sdb": {Name: "sdb", ReadBytes: 2500, WriteBytes: 2500, ReadCount: 40, WriteCount: 40},
|
||||
}
|
||||
|
||||
device, ok := findIoDevice("sd", ioCounters)
|
||||
assert.True(t, ok)
|
||||
assert.Equal(t, "sda", device)
|
||||
})
|
||||
|
||||
t.Run("matches /dev/-prefixed partition to parent disk", func(t *testing.T) {
|
||||
ioCounters := map[string]disk.IOCountersStat{
|
||||
"nda0": {Name: "nda0", ReadBytes: 1000, WriteBytes: 1000},
|
||||
}
|
||||
|
||||
device, ok := findIoDevice("/dev/nda0p2", ioCounters)
|
||||
assert.True(t, ok)
|
||||
assert.Equal(t, "nda0", device)
|
||||
})
|
||||
|
||||
t.Run("uses deterministic name tie-breaker", func(t *testing.T) {
|
||||
ioCounters := map[string]disk.IOCountersStat{
|
||||
"sdb": {Name: "sdb", ReadBytes: 2000, WriteBytes: 2000, ReadCount: 10, WriteCount: 10},
|
||||
"sda": {Name: "sda", ReadBytes: 2000, WriteBytes: 2000, ReadCount: 10, WriteCount: 10},
|
||||
}
|
||||
|
||||
device, ok := findIoDevice("sd", ioCounters)
|
||||
assert.True(t, ok)
|
||||
assert.Equal(t, "sda", device)
|
||||
})
|
||||
}
|
||||
|
||||
func TestFilesystemMatchesPartitionSetting(t *testing.T) {
|
||||
p := disk.PartitionStat{Device: "/dev/ada0p2", Mountpoint: "/"}
|
||||
|
||||
t.Run("matches mountpoint setting", func(t *testing.T) {
|
||||
assert.True(t, filesystemMatchesPartitionSetting("/", p))
|
||||
})
|
||||
|
||||
t.Run("matches exact partition setting", func(t *testing.T) {
|
||||
assert.True(t, filesystemMatchesPartitionSetting("ada0p2", p))
|
||||
assert.True(t, filesystemMatchesPartitionSetting("/dev/ada0p2", p))
|
||||
})
|
||||
|
||||
t.Run("matches prefix-style parent setting", func(t *testing.T) {
|
||||
assert.True(t, filesystemMatchesPartitionSetting("ada0", p))
|
||||
assert.True(t, filesystemMatchesPartitionSetting("/dev/ada0", p))
|
||||
})
|
||||
|
||||
t.Run("does not match unrelated device", func(t *testing.T) {
|
||||
assert.False(t, filesystemMatchesPartitionSetting("sda", p))
|
||||
assert.False(t, filesystemMatchesPartitionSetting("nvme0n1", p))
|
||||
assert.False(t, filesystemMatchesPartitionSetting("", p))
|
||||
})
|
||||
}
|
||||
|
||||
func TestMostActiveIoDevice(t *testing.T) {
|
||||
t.Run("returns most active device", func(t *testing.T) {
|
||||
ioCounters := map[string]disk.IOCountersStat{
|
||||
"nda0": {Name: "nda0", ReadBytes: 5000, WriteBytes: 5000, ReadCount: 100, WriteCount: 100},
|
||||
"nda1": {Name: "nda1", ReadBytes: 1000, WriteBytes: 1000, ReadCount: 50, WriteCount: 50},
|
||||
}
|
||||
assert.Equal(t, "nda0", mostActiveIoDevice(ioCounters))
|
||||
})
|
||||
|
||||
t.Run("uses deterministic tie-breaker", func(t *testing.T) {
|
||||
ioCounters := map[string]disk.IOCountersStat{
|
||||
"sdb": {Name: "sdb", ReadBytes: 1000, WriteBytes: 1000, ReadCount: 10, WriteCount: 10},
|
||||
"sda": {Name: "sda", ReadBytes: 1000, WriteBytes: 1000, ReadCount: 10, WriteCount: 10},
|
||||
}
|
||||
assert.Equal(t, "sda", mostActiveIoDevice(ioCounters))
|
||||
})
|
||||
|
||||
t.Run("returns empty for empty map", func(t *testing.T) {
|
||||
assert.Equal(t, "", mostActiveIoDevice(map[string]disk.IOCountersStat{}))
|
||||
})
|
||||
}
|
||||
|
||||
func TestIsDockerSpecialMountpoint(t *testing.T) {
|
||||
@@ -210,7 +747,7 @@ func TestInitializeDiskInfoWithCustomNames(t *testing.T) {
|
||||
// Test the parsing logic by calling the relevant part
|
||||
// We'll create a simplified version to test just the parsing
|
||||
extraFilesystems := tc.envValue
|
||||
for _, fsEntry := range strings.Split(extraFilesystems, ",") {
|
||||
for fsEntry := range strings.SplitSeq(extraFilesystems, ",") {
|
||||
// Parse the entry
|
||||
fsEntry = strings.TrimSpace(fsEntry)
|
||||
var fs, customName string
|
||||
@@ -372,3 +909,67 @@ func TestDiskUsageCaching(t *testing.T) {
|
||||
"lastDiskUsageUpdate should be refreshed when cache expires")
|
||||
})
|
||||
}
|
||||
|
||||
func TestHasSameDiskUsage(t *testing.T) {
|
||||
const toleranceBytes uint64 = 16 * 1024 * 1024
|
||||
|
||||
t.Run("returns true when totals and usage are equal", func(t *testing.T) {
|
||||
a := &disk.UsageStat{Total: 100 * 1024 * 1024 * 1024, Used: 42 * 1024 * 1024 * 1024}
|
||||
b := &disk.UsageStat{Total: 100 * 1024 * 1024 * 1024, Used: 42 * 1024 * 1024 * 1024}
|
||||
assert.True(t, hasSameDiskUsage(a, b))
|
||||
})
|
||||
|
||||
t.Run("returns true within tolerance", func(t *testing.T) {
|
||||
a := &disk.UsageStat{Total: 100 * 1024 * 1024 * 1024, Used: 42 * 1024 * 1024 * 1024}
|
||||
b := &disk.UsageStat{
|
||||
Total: a.Total + toleranceBytes - 1,
|
||||
Used: a.Used - toleranceBytes + 1,
|
||||
}
|
||||
assert.True(t, hasSameDiskUsage(a, b))
|
||||
})
|
||||
|
||||
t.Run("returns false when total exceeds tolerance", func(t *testing.T) {
|
||||
a := &disk.UsageStat{Total: 100 * 1024 * 1024 * 1024, Used: 42 * 1024 * 1024 * 1024}
|
||||
b := &disk.UsageStat{
|
||||
Total: a.Total + toleranceBytes + 1,
|
||||
Used: a.Used,
|
||||
}
|
||||
assert.False(t, hasSameDiskUsage(a, b))
|
||||
})
|
||||
|
||||
t.Run("returns false for nil or zero total", func(t *testing.T) {
|
||||
assert.False(t, hasSameDiskUsage(nil, &disk.UsageStat{Total: 1, Used: 1}))
|
||||
assert.False(t, hasSameDiskUsage(&disk.UsageStat{Total: 1, Used: 1}, nil))
|
||||
assert.False(t, hasSameDiskUsage(&disk.UsageStat{Total: 0, Used: 0}, &disk.UsageStat{Total: 1, Used: 1}))
|
||||
})
|
||||
}
|
||||
|
||||
func TestInitializeDiskIoStatsResetsTrackedDevices(t *testing.T) {
|
||||
agent := &Agent{
|
||||
fsStats: map[string]*system.FsStats{
|
||||
"sda": {},
|
||||
"sdb": {},
|
||||
},
|
||||
fsNames: []string{"stale", "sda"},
|
||||
}
|
||||
|
||||
agent.initializeDiskIoStats(map[string]disk.IOCountersStat{
|
||||
"sda": {Name: "sda", ReadBytes: 10, WriteBytes: 20},
|
||||
"sdb": {Name: "sdb", ReadBytes: 30, WriteBytes: 40},
|
||||
})
|
||||
|
||||
assert.ElementsMatch(t, []string{"sda", "sdb"}, agent.fsNames)
|
||||
assert.Len(t, agent.fsNames, 2)
|
||||
assert.Equal(t, uint64(10), agent.fsStats["sda"].TotalRead)
|
||||
assert.Equal(t, uint64(20), agent.fsStats["sda"].TotalWrite)
|
||||
assert.False(t, agent.fsStats["sda"].Time.IsZero())
|
||||
assert.False(t, agent.fsStats["sdb"].Time.IsZero())
|
||||
|
||||
agent.initializeDiskIoStats(map[string]disk.IOCountersStat{
|
||||
"sdb": {Name: "sdb", ReadBytes: 50, WriteBytes: 60},
|
||||
})
|
||||
|
||||
assert.Equal(t, []string{"sdb"}, agent.fsNames)
|
||||
assert.Equal(t, uint64(50), agent.fsStats["sdb"].TotalRead)
|
||||
assert.Equal(t, uint64(60), agent.fsStats["sdb"].TotalWrite)
|
||||
}
|
||||
|
||||
115
agent/docker.go
115
agent/docker.go
@@ -16,11 +16,14 @@ import (
|
||||
"os"
|
||||
"path"
|
||||
"regexp"
|
||||
"sort"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/henrygd/beszel/agent/deltatracker"
|
||||
"github.com/henrygd/beszel/agent/utils"
|
||||
"github.com/henrygd/beszel/internal/entities/container"
|
||||
|
||||
"github.com/blang/semver"
|
||||
@@ -336,15 +339,48 @@ func validateCpuPercentage(cpuPct float64, containerName string) error {
|
||||
|
||||
// updateContainerStatsValues updates the final stats values
|
||||
func updateContainerStatsValues(stats *container.Stats, cpuPct float64, usedMemory uint64, sent_delta, recv_delta uint64, readTime time.Time) {
|
||||
stats.Cpu = twoDecimals(cpuPct)
|
||||
stats.Mem = bytesToMegabytes(float64(usedMemory))
|
||||
stats.Cpu = utils.TwoDecimals(cpuPct)
|
||||
stats.Mem = utils.BytesToMegabytes(float64(usedMemory))
|
||||
stats.Bandwidth = [2]uint64{sent_delta, recv_delta}
|
||||
// TODO(0.19+): stop populating NetworkSent/NetworkRecv (deprecated in 0.18.3)
|
||||
stats.NetworkSent = bytesToMegabytes(float64(sent_delta))
|
||||
stats.NetworkRecv = bytesToMegabytes(float64(recv_delta))
|
||||
stats.NetworkSent = utils.BytesToMegabytes(float64(sent_delta))
|
||||
stats.NetworkRecv = utils.BytesToMegabytes(float64(recv_delta))
|
||||
stats.PrevReadTime = readTime
|
||||
}
|
||||
|
||||
// convertContainerPortsToString formats the ports of a container into a sorted, deduplicated string.
|
||||
// ctr.Ports is nilled out after processing so the slice is not accidentally reused.
|
||||
func convertContainerPortsToString(ctr *container.ApiInfo) string {
|
||||
if len(ctr.Ports) == 0 {
|
||||
return ""
|
||||
}
|
||||
sort.Slice(ctr.Ports, func(i, j int) bool {
|
||||
return ctr.Ports[i].PublicPort < ctr.Ports[j].PublicPort
|
||||
})
|
||||
var builder strings.Builder
|
||||
seenPorts := make(map[uint16]struct{})
|
||||
for _, p := range ctr.Ports {
|
||||
_, ok := seenPorts[p.PublicPort]
|
||||
if p.PublicPort == 0 || ok {
|
||||
continue
|
||||
}
|
||||
seenPorts[p.PublicPort] = struct{}{}
|
||||
if builder.Len() > 0 {
|
||||
builder.WriteString(", ")
|
||||
}
|
||||
switch p.IP {
|
||||
case "0.0.0.0", "::":
|
||||
default:
|
||||
builder.WriteString(p.IP)
|
||||
builder.WriteByte(':')
|
||||
}
|
||||
builder.WriteString(strconv.Itoa(int(p.PublicPort)))
|
||||
}
|
||||
// clear ports slice so it doesn't get reused and blend into next response
|
||||
ctr.Ports = nil
|
||||
return builder.String()
|
||||
}
|
||||
|
||||
func parseDockerStatus(status string) (string, container.DockerHealth) {
|
||||
trimmed := strings.TrimSpace(status)
|
||||
if trimmed == "" {
|
||||
@@ -364,22 +400,60 @@ func parseDockerStatus(status string) (string, container.DockerHealth) {
|
||||
statusText = trimmed
|
||||
}
|
||||
|
||||
healthText := strings.ToLower(strings.TrimSpace(strings.TrimSuffix(trimmed[openIdx+1:], ")")))
|
||||
healthText := strings.TrimSpace(strings.TrimSuffix(trimmed[openIdx+1:], ")"))
|
||||
// Some Docker statuses include a "health:" prefix inside the parentheses.
|
||||
// Strip it so it maps correctly to the known health states.
|
||||
if colonIdx := strings.IndexRune(healthText, ':'); colonIdx != -1 {
|
||||
prefix := strings.TrimSpace(healthText[:colonIdx])
|
||||
prefix := strings.ToLower(strings.TrimSpace(healthText[:colonIdx]))
|
||||
if prefix == "health" || prefix == "health status" {
|
||||
healthText = strings.TrimSpace(healthText[colonIdx+1:])
|
||||
}
|
||||
}
|
||||
if health, ok := container.DockerHealthStrings[healthText]; ok {
|
||||
if health, ok := parseDockerHealthStatus(healthText); ok {
|
||||
return statusText, health
|
||||
}
|
||||
|
||||
return trimmed, container.DockerHealthNone
|
||||
}
|
||||
|
||||
// parseDockerHealthStatus maps Docker health status strings to container.DockerHealth values
|
||||
func parseDockerHealthStatus(status string) (container.DockerHealth, bool) {
|
||||
health, ok := container.DockerHealthStrings[strings.ToLower(strings.TrimSpace(status))]
|
||||
return health, ok
|
||||
}
|
||||
|
||||
// getPodmanContainerHealth fetches container health status from the container inspect endpoint.
|
||||
// Used for Podman which doesn't provide health status in the /containers/json endpoint as of March 2026.
|
||||
// https://github.com/containers/podman/issues/27786
|
||||
func (dm *dockerManager) getPodmanContainerHealth(containerID string) (container.DockerHealth, error) {
|
||||
resp, err := dm.client.Get(fmt.Sprintf("http://localhost/containers/%s/json", url.PathEscape(containerID)))
|
||||
if err != nil {
|
||||
return container.DockerHealthNone, err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
return container.DockerHealthNone, fmt.Errorf("container inspect request failed: %s", resp.Status)
|
||||
}
|
||||
|
||||
var inspectInfo struct {
|
||||
State struct {
|
||||
Health struct {
|
||||
Status string
|
||||
}
|
||||
}
|
||||
}
|
||||
if err := json.NewDecoder(resp.Body).Decode(&inspectInfo); err != nil {
|
||||
return container.DockerHealthNone, err
|
||||
}
|
||||
|
||||
if health, ok := parseDockerHealthStatus(inspectInfo.State.Health.Status); ok {
|
||||
return health, nil
|
||||
}
|
||||
|
||||
return container.DockerHealthNone, nil
|
||||
}
|
||||
|
||||
// Updates stats for individual container with cache-time-aware delta tracking
|
||||
func (dm *dockerManager) updateContainerStats(ctr *container.ApiInfo, cacheTimeMs uint16) error {
|
||||
name := ctr.Names[0][1:]
|
||||
@@ -389,6 +463,21 @@ func (dm *dockerManager) updateContainerStats(ctr *container.ApiInfo, cacheTimeM
|
||||
return err
|
||||
}
|
||||
|
||||
statusText, health := parseDockerStatus(ctr.Status)
|
||||
|
||||
// Docker exposes Health.Status on /containers/json in API 1.52+.
|
||||
// Podman currently requires falling back to the inspect endpoint as of March 2026.
|
||||
// https://github.com/containers/podman/issues/27786
|
||||
if ctr.Health.Status != "" {
|
||||
if h, ok := parseDockerHealthStatus(ctr.Health.Status); ok {
|
||||
health = h
|
||||
}
|
||||
} else if dm.usingPodman {
|
||||
if podmanHealth, err := dm.getPodmanContainerHealth(ctr.IdShort); err == nil {
|
||||
health = podmanHealth
|
||||
}
|
||||
}
|
||||
|
||||
dm.containerStatsMutex.Lock()
|
||||
defer dm.containerStatsMutex.Unlock()
|
||||
|
||||
@@ -400,11 +489,13 @@ func (dm *dockerManager) updateContainerStats(ctr *container.ApiInfo, cacheTimeM
|
||||
}
|
||||
|
||||
stats.Id = ctr.IdShort
|
||||
|
||||
statusText, health := parseDockerStatus(ctr.Status)
|
||||
stats.Status = statusText
|
||||
stats.Health = health
|
||||
|
||||
if len(ctr.Ports) > 0 {
|
||||
stats.Ports = convertContainerPortsToString(ctr)
|
||||
}
|
||||
|
||||
// reset current stats
|
||||
stats.Cpu = 0
|
||||
stats.Mem = 0
|
||||
@@ -487,7 +578,7 @@ func (dm *dockerManager) deleteContainerStatsSync(id string) {
|
||||
|
||||
// Creates a new http client for Docker or Podman API
|
||||
func newDockerManager() *dockerManager {
|
||||
dockerHost, exists := GetEnv("DOCKER_HOST")
|
||||
dockerHost, exists := utils.GetEnv("DOCKER_HOST")
|
||||
if exists {
|
||||
// return nil if set to empty string
|
||||
if dockerHost == "" {
|
||||
@@ -523,7 +614,7 @@ func newDockerManager() *dockerManager {
|
||||
|
||||
// configurable timeout
|
||||
timeout := time.Millisecond * time.Duration(dockerTimeoutMs)
|
||||
if t, set := GetEnv("DOCKER_TIMEOUT"); set {
|
||||
if t, set := utils.GetEnv("DOCKER_TIMEOUT"); set {
|
||||
timeout, err = time.ParseDuration(t)
|
||||
if err != nil {
|
||||
slog.Error(err.Error())
|
||||
@@ -540,7 +631,7 @@ func newDockerManager() *dockerManager {
|
||||
|
||||
// Read container exclusion patterns from environment variable
|
||||
var excludeContainers []string
|
||||
if excludeStr, set := GetEnv("EXCLUDE_CONTAINERS"); set && excludeStr != "" {
|
||||
if excludeStr, set := utils.GetEnv("EXCLUDE_CONTAINERS"); set && excludeStr != "" {
|
||||
parts := strings.SplitSeq(excludeStr, ",")
|
||||
for part := range parts {
|
||||
trimmed := strings.TrimSpace(part)
|
||||
|
||||
@@ -18,6 +18,7 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/henrygd/beszel/agent/deltatracker"
|
||||
"github.com/henrygd/beszel/agent/utils"
|
||||
"github.com/henrygd/beszel/internal/entities/container"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
@@ -34,6 +35,12 @@ type recordingRoundTripper struct {
|
||||
lastQuery map[string]string
|
||||
}
|
||||
|
||||
type roundTripFunc func(*http.Request) (*http.Response, error)
|
||||
|
||||
func (fn roundTripFunc) RoundTrip(req *http.Request) (*http.Response, error) {
|
||||
return fn(req)
|
||||
}
|
||||
|
||||
func (rt *recordingRoundTripper) RoundTrip(req *http.Request) (*http.Response, error) {
|
||||
rt.called = true
|
||||
rt.lastPath = req.URL.EscapedPath()
|
||||
@@ -213,6 +220,28 @@ func TestContainerDetailsRequestsUseExpectedDockerPaths(t *testing.T) {
|
||||
})
|
||||
}
|
||||
|
||||
func TestGetPodmanContainerHealth(t *testing.T) {
|
||||
called := false
|
||||
dm := &dockerManager{
|
||||
client: &http.Client{Transport: roundTripFunc(func(req *http.Request) (*http.Response, error) {
|
||||
called = true
|
||||
assert.Equal(t, "/containers/0123456789ab/json", req.URL.EscapedPath())
|
||||
return &http.Response{
|
||||
StatusCode: http.StatusOK,
|
||||
Status: "200 OK",
|
||||
Header: make(http.Header),
|
||||
Body: io.NopCloser(strings.NewReader(`{"State":{"Health":{"Status":"healthy"}}}`)),
|
||||
Request: req,
|
||||
}, nil
|
||||
})},
|
||||
}
|
||||
|
||||
health, err := dm.getPodmanContainerHealth("0123456789ab")
|
||||
require.NoError(t, err)
|
||||
assert.True(t, called)
|
||||
assert.Equal(t, container.DockerHealthHealthy, health)
|
||||
}
|
||||
|
||||
func TestValidateCpuPercentage(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
@@ -298,48 +327,6 @@ func TestUpdateContainerStatsValues(t *testing.T) {
|
||||
assert.Equal(t, testTime, stats.PrevReadTime)
|
||||
}
|
||||
|
||||
func TestTwoDecimals(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
input float64
|
||||
expected float64
|
||||
}{
|
||||
{"round down", 1.234, 1.23},
|
||||
{"round half up", 1.235, 1.24}, // math.Round rounds half up
|
||||
{"no rounding needed", 1.23, 1.23},
|
||||
{"negative number", -1.235, -1.24}, // math.Round rounds half up (more negative)
|
||||
{"zero", 0.0, 0.0},
|
||||
{"large number", 123.456, 123.46}, // rounds 5 up
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
result := twoDecimals(tt.input)
|
||||
assert.Equal(t, tt.expected, result)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestBytesToMegabytes(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
input float64
|
||||
expected float64
|
||||
}{
|
||||
{"1 MB", 1048576, 1.0},
|
||||
{"512 KB", 524288, 0.5},
|
||||
{"zero", 0, 0},
|
||||
{"large value", 1073741824, 1024}, // 1 GB = 1024 MB
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
result := bytesToMegabytes(tt.input)
|
||||
assert.Equal(t, tt.expected, result)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestInitializeCpuTracking(t *testing.T) {
|
||||
dm := &dockerManager{
|
||||
lastCpuContainer: make(map[uint16]map[string]uint64),
|
||||
@@ -905,11 +892,11 @@ func TestContainerStatsEndToEndWithRealData(t *testing.T) {
|
||||
updateContainerStatsValues(testStats, cpuPct, usedMemory, 1000000, 500000, testTime)
|
||||
|
||||
assert.Equal(t, cpuPct, testStats.Cpu)
|
||||
assert.Equal(t, bytesToMegabytes(float64(usedMemory)), testStats.Mem)
|
||||
assert.Equal(t, utils.BytesToMegabytes(float64(usedMemory)), testStats.Mem)
|
||||
assert.Equal(t, [2]uint64{1000000, 500000}, testStats.Bandwidth)
|
||||
// Deprecated fields still populated for backward compatibility with older hubs
|
||||
assert.Equal(t, bytesToMegabytes(1000000), testStats.NetworkSent)
|
||||
assert.Equal(t, bytesToMegabytes(500000), testStats.NetworkRecv)
|
||||
assert.Equal(t, utils.BytesToMegabytes(1000000), testStats.NetworkSent)
|
||||
assert.Equal(t, utils.BytesToMegabytes(500000), testStats.NetworkRecv)
|
||||
assert.Equal(t, testTime, testStats.PrevReadTime)
|
||||
}
|
||||
|
||||
@@ -1170,6 +1157,18 @@ func TestParseDockerStatus(t *testing.T) {
|
||||
expectedStatus: "",
|
||||
expectedHealth: container.DockerHealthNone,
|
||||
},
|
||||
{
|
||||
name: "status health with health: prefix",
|
||||
input: "Up 5 minutes (health: starting)",
|
||||
expectedStatus: "Up 5 minutes",
|
||||
expectedHealth: container.DockerHealthStarting,
|
||||
},
|
||||
{
|
||||
name: "status health with health status: prefix",
|
||||
input: "Up 10 minutes (health status: unhealthy)",
|
||||
expectedStatus: "Up 10 minutes",
|
||||
expectedHealth: container.DockerHealthUnhealthy,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
@@ -1181,6 +1180,84 @@ func TestParseDockerStatus(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseDockerHealthStatus(t *testing.T) {
|
||||
tests := []struct {
|
||||
input string
|
||||
expectedHealth container.DockerHealth
|
||||
expectedOk bool
|
||||
}{
|
||||
{"healthy", container.DockerHealthHealthy, true},
|
||||
{"unhealthy", container.DockerHealthUnhealthy, true},
|
||||
{"starting", container.DockerHealthStarting, true},
|
||||
{"none", container.DockerHealthNone, true},
|
||||
{" Healthy ", container.DockerHealthHealthy, true},
|
||||
{"unknown", container.DockerHealthNone, false},
|
||||
{"", container.DockerHealthNone, false},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.input, func(t *testing.T) {
|
||||
health, ok := parseDockerHealthStatus(tt.input)
|
||||
assert.Equal(t, tt.expectedHealth, health)
|
||||
assert.Equal(t, tt.expectedOk, ok)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestUpdateContainerStatsUsesPodmanInspectHealthFallback(t *testing.T) {
|
||||
var requestedPaths []string
|
||||
dm := &dockerManager{
|
||||
client: &http.Client{Transport: roundTripFunc(func(req *http.Request) (*http.Response, error) {
|
||||
requestedPaths = append(requestedPaths, req.URL.EscapedPath())
|
||||
switch req.URL.EscapedPath() {
|
||||
case "/containers/0123456789ab/stats":
|
||||
return &http.Response{
|
||||
StatusCode: http.StatusOK,
|
||||
Status: "200 OK",
|
||||
Header: make(http.Header),
|
||||
Body: io.NopCloser(strings.NewReader(`{
|
||||
"read":"2026-03-15T21:26:59Z",
|
||||
"cpu_stats":{"cpu_usage":{"total_usage":1000},"system_cpu_usage":2000},
|
||||
"memory_stats":{"usage":1048576,"stats":{"inactive_file":262144}},
|
||||
"networks":{"eth0":{"rx_bytes":0,"tx_bytes":0}}
|
||||
}`)),
|
||||
Request: req,
|
||||
}, nil
|
||||
case "/containers/0123456789ab/json":
|
||||
return &http.Response{
|
||||
StatusCode: http.StatusOK,
|
||||
Status: "200 OK",
|
||||
Header: make(http.Header),
|
||||
Body: io.NopCloser(strings.NewReader(`{"State":{"Health":{"Status":"healthy"}}}`)),
|
||||
Request: req,
|
||||
}, nil
|
||||
default:
|
||||
return nil, fmt.Errorf("unexpected path: %s", req.URL.EscapedPath())
|
||||
}
|
||||
})},
|
||||
containerStatsMap: make(map[string]*container.Stats),
|
||||
apiStats: &container.ApiStats{},
|
||||
usingPodman: true,
|
||||
lastCpuContainer: make(map[uint16]map[string]uint64),
|
||||
lastCpuSystem: make(map[uint16]map[string]uint64),
|
||||
lastCpuReadTime: make(map[uint16]map[string]time.Time),
|
||||
networkSentTrackers: make(map[uint16]*deltatracker.DeltaTracker[string, uint64]),
|
||||
networkRecvTrackers: make(map[uint16]*deltatracker.DeltaTracker[string, uint64]),
|
||||
}
|
||||
|
||||
ctr := &container.ApiInfo{
|
||||
IdShort: "0123456789ab",
|
||||
Names: []string{"/beszel"},
|
||||
Status: "Up 2 minutes",
|
||||
Image: "beszel:latest",
|
||||
}
|
||||
|
||||
err := dm.updateContainerStats(ctr, defaultCacheTimeMs)
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, []string{"/containers/0123456789ab/stats", "/containers/0123456789ab/json"}, requestedPaths)
|
||||
assert.Equal(t, container.DockerHealthHealthy, dm.containerStatsMap[ctr.IdShort].Health)
|
||||
assert.Equal(t, "Up 2 minutes", dm.containerStatsMap[ctr.IdShort].Status)
|
||||
}
|
||||
|
||||
func TestConstantsAndUtilityFunctions(t *testing.T) {
|
||||
// Test constants are properly defined
|
||||
assert.Equal(t, uint16(60000), defaultCacheTimeMs)
|
||||
@@ -1190,13 +1267,13 @@ func TestConstantsAndUtilityFunctions(t *testing.T) {
|
||||
assert.Equal(t, 5*1024*1024, maxTotalLogSize) // 5MB
|
||||
|
||||
// Test utility functions
|
||||
assert.Equal(t, 1.5, twoDecimals(1.499))
|
||||
assert.Equal(t, 1.5, twoDecimals(1.5))
|
||||
assert.Equal(t, 1.5, twoDecimals(1.501))
|
||||
assert.Equal(t, 1.5, utils.TwoDecimals(1.499))
|
||||
assert.Equal(t, 1.5, utils.TwoDecimals(1.5))
|
||||
assert.Equal(t, 1.5, utils.TwoDecimals(1.501))
|
||||
|
||||
assert.Equal(t, 1.0, bytesToMegabytes(1048576)) // 1 MB
|
||||
assert.Equal(t, 0.5, bytesToMegabytes(524288)) // 512 KB
|
||||
assert.Equal(t, 0.0, bytesToMegabytes(0))
|
||||
assert.Equal(t, 1.0, utils.BytesToMegabytes(1048576)) // 1 MB
|
||||
assert.Equal(t, 0.5, utils.BytesToMegabytes(524288)) // 512 KB
|
||||
assert.Equal(t, 0.0, utils.BytesToMegabytes(0))
|
||||
}
|
||||
|
||||
func TestDecodeDockerLogStream(t *testing.T) {
|
||||
@@ -1496,3 +1573,99 @@ func TestAnsiEscapePattern(t *testing.T) {
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestConvertContainerPortsToString(t *testing.T) {
|
||||
type port = struct {
|
||||
PublicPort uint16
|
||||
IP string
|
||||
}
|
||||
tests := []struct {
|
||||
name string
|
||||
ports []port
|
||||
expected string
|
||||
}{
|
||||
{
|
||||
name: "empty ports",
|
||||
ports: nil,
|
||||
expected: "",
|
||||
},
|
||||
{
|
||||
name: "single port",
|
||||
ports: []port{
|
||||
{PublicPort: 80, IP: "0.0.0.0"},
|
||||
},
|
||||
expected: "80",
|
||||
},
|
||||
{
|
||||
name: "single port with non-default IP",
|
||||
ports: []port{
|
||||
{PublicPort: 80, IP: "1.2.3.4"},
|
||||
},
|
||||
expected: "1.2.3.4:80",
|
||||
},
|
||||
{
|
||||
name: "ipv6 default ip",
|
||||
ports: []port{
|
||||
{PublicPort: 80, IP: "::"},
|
||||
},
|
||||
expected: "80",
|
||||
},
|
||||
{
|
||||
name: "zero PublicPort is skipped",
|
||||
ports: []port{
|
||||
{PublicPort: 0, IP: "0.0.0.0"},
|
||||
{PublicPort: 80, IP: "0.0.0.0"},
|
||||
},
|
||||
expected: "80",
|
||||
},
|
||||
{
|
||||
name: "ports sorted ascending by PublicPort",
|
||||
ports: []port{
|
||||
{PublicPort: 443, IP: "0.0.0.0"},
|
||||
{PublicPort: 80, IP: "0.0.0.0"},
|
||||
{PublicPort: 8080, IP: "0.0.0.0"},
|
||||
},
|
||||
expected: "80, 443, 8080",
|
||||
},
|
||||
{
|
||||
name: "duplicates are deduplicated",
|
||||
ports: []port{
|
||||
{PublicPort: 80, IP: "0.0.0.0"},
|
||||
{PublicPort: 80, IP: "0.0.0.0"},
|
||||
{PublicPort: 443, IP: "0.0.0.0"},
|
||||
},
|
||||
expected: "80, 443",
|
||||
},
|
||||
{
|
||||
name: "multiple ports with different IPs",
|
||||
ports: []port{
|
||||
{PublicPort: 80, IP: "0.0.0.0"},
|
||||
{PublicPort: 443, IP: "1.2.3.4"},
|
||||
},
|
||||
expected: "80, 1.2.3.4:443",
|
||||
},
|
||||
{
|
||||
name: "ports slice is nilled after call",
|
||||
ports: []port{
|
||||
{PublicPort: 8080, IP: "0.0.0.0"},
|
||||
},
|
||||
expected: "8080",
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
ctr := &container.ApiInfo{}
|
||||
for _, p := range tt.ports {
|
||||
ctr.Ports = append(ctr.Ports, struct {
|
||||
PublicPort uint16
|
||||
IP string
|
||||
}{PublicPort: p.PublicPort, IP: p.IP})
|
||||
}
|
||||
result := convertContainerPortsToString(ctr)
|
||||
assert.Equal(t, tt.expected, result)
|
||||
// Ports slice must be cleared to prevent bleed-over into the next response
|
||||
assert.Nil(t, ctr.Ports, "ctr.Ports should be nil after formatContainerPorts")
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@@ -8,6 +8,7 @@ import (
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/henrygd/beszel/agent/utils"
|
||||
"github.com/henrygd/beszel/internal/entities/smart"
|
||||
)
|
||||
|
||||
@@ -141,9 +142,9 @@ func readEmmcHealth(blockName string) (emmcHealth, bool) {
|
||||
out.lifeA = lifeA
|
||||
out.lifeB = lifeB
|
||||
|
||||
out.model = readStringFile(filepath.Join(deviceDir, "name"))
|
||||
out.serial = readStringFile(filepath.Join(deviceDir, "serial"))
|
||||
out.revision = readStringFile(filepath.Join(deviceDir, "prv"))
|
||||
out.model = utils.ReadStringFile(filepath.Join(deviceDir, "name"))
|
||||
out.serial = utils.ReadStringFile(filepath.Join(deviceDir, "serial"))
|
||||
out.revision = utils.ReadStringFile(filepath.Join(deviceDir, "prv"))
|
||||
|
||||
if capBytes, ok := readBlockCapacityBytes(blockName); ok {
|
||||
out.capacity = capBytes
|
||||
@@ -153,7 +154,7 @@ func readEmmcHealth(blockName string) (emmcHealth, bool) {
|
||||
}
|
||||
|
||||
func readLifeTime(deviceDir string) (uint8, uint8, bool) {
|
||||
if content, ok := readStringFileOK(filepath.Join(deviceDir, "life_time")); ok {
|
||||
if content, ok := utils.ReadStringFileOK(filepath.Join(deviceDir, "life_time")); ok {
|
||||
a, b, ok := parseHexBytePair(content)
|
||||
return a, b, ok
|
||||
}
|
||||
@@ -170,7 +171,7 @@ func readBlockCapacityBytes(blockName string) (uint64, bool) {
|
||||
sizePath := filepath.Join(emmcSysfsRoot, "class", "block", blockName, "size")
|
||||
lbsPath := filepath.Join(emmcSysfsRoot, "class", "block", blockName, "queue", "logical_block_size")
|
||||
|
||||
sizeStr, ok := readStringFileOK(sizePath)
|
||||
sizeStr, ok := utils.ReadStringFileOK(sizePath)
|
||||
if !ok {
|
||||
return 0, false
|
||||
}
|
||||
@@ -179,7 +180,7 @@ func readBlockCapacityBytes(blockName string) (uint64, bool) {
|
||||
return 0, false
|
||||
}
|
||||
|
||||
lbsStr, ok := readStringFileOK(lbsPath)
|
||||
lbsStr, ok := utils.ReadStringFileOK(lbsPath)
|
||||
logicalBlockSize := uint64(512)
|
||||
if ok {
|
||||
if parsed, err := strconv.ParseUint(lbsStr, 10, 64); err == nil && parsed > 0 {
|
||||
@@ -191,7 +192,7 @@ func readBlockCapacityBytes(blockName string) (uint64, bool) {
|
||||
}
|
||||
|
||||
func readHexByteFile(path string) (uint8, bool) {
|
||||
content, ok := readStringFileOK(path)
|
||||
content, ok := utils.ReadStringFileOK(path)
|
||||
if !ok {
|
||||
return 0, false
|
||||
}
|
||||
@@ -199,19 +200,6 @@ func readHexByteFile(path string) (uint8, bool) {
|
||||
return b, ok
|
||||
}
|
||||
|
||||
func readStringFile(path string) string {
|
||||
content, _ := readStringFileOK(path)
|
||||
return content
|
||||
}
|
||||
|
||||
func readStringFileOK(path string) (string, bool) {
|
||||
b, err := os.ReadFile(path)
|
||||
if err != nil {
|
||||
return "", false
|
||||
}
|
||||
return strings.TrimSpace(string(b)), true
|
||||
}
|
||||
|
||||
func hasEmmcHealthFiles(deviceDir string) bool {
|
||||
entries, err := os.ReadDir(deviceDir)
|
||||
if err != nil {
|
||||
|
||||
29
agent/gpu.go
29
agent/gpu.go
@@ -15,6 +15,7 @@ import (
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/henrygd/beszel/agent/utils"
|
||||
"github.com/henrygd/beszel/internal/entities/system"
|
||||
)
|
||||
|
||||
@@ -291,8 +292,8 @@ func (gm *GPUManager) parseAmdData(output []byte) bool {
|
||||
}
|
||||
gpu := gm.GpuDataMap[id]
|
||||
gpu.Temperature, _ = strconv.ParseFloat(v.Temperature, 64)
|
||||
gpu.MemoryUsed = bytesToMegabytes(memoryUsage)
|
||||
gpu.MemoryTotal = bytesToMegabytes(totalMemory)
|
||||
gpu.MemoryUsed = utils.BytesToMegabytes(memoryUsage)
|
||||
gpu.MemoryTotal = utils.BytesToMegabytes(totalMemory)
|
||||
gpu.Usage += usage
|
||||
gpu.Power += power
|
||||
gpu.Count++
|
||||
@@ -366,16 +367,16 @@ func (gm *GPUManager) calculateGPUAverage(id string, gpu *system.GPUData, cacheK
|
||||
gpuAvg := *gpu
|
||||
deltaUsage, deltaPower, deltaPowerPkg := gm.calculateDeltas(gpu, lastSnapshot)
|
||||
|
||||
gpuAvg.Power = twoDecimals(deltaPower / float64(deltaCount))
|
||||
gpuAvg.Power = utils.TwoDecimals(deltaPower / float64(deltaCount))
|
||||
|
||||
if gpu.Engines != nil {
|
||||
// make fresh map for averaged engine metrics to avoid mutating
|
||||
// the accumulator map stored in gm.GpuDataMap
|
||||
gpuAvg.Engines = make(map[string]float64, len(gpu.Engines))
|
||||
gpuAvg.Usage = gm.calculateIntelGPUUsage(&gpuAvg, gpu, lastSnapshot, deltaCount)
|
||||
gpuAvg.PowerPkg = twoDecimals(deltaPowerPkg / float64(deltaCount))
|
||||
gpuAvg.PowerPkg = utils.TwoDecimals(deltaPowerPkg / float64(deltaCount))
|
||||
} else {
|
||||
gpuAvg.Usage = twoDecimals(deltaUsage / float64(deltaCount))
|
||||
gpuAvg.Usage = utils.TwoDecimals(deltaUsage / float64(deltaCount))
|
||||
}
|
||||
|
||||
gm.lastAvgData[id] = gpuAvg
|
||||
@@ -410,17 +411,17 @@ func (gm *GPUManager) calculateIntelGPUUsage(gpuAvg, gpu *system.GPUData, lastSn
|
||||
} else {
|
||||
deltaEngine = engine
|
||||
}
|
||||
gpuAvg.Engines[name] = twoDecimals(deltaEngine / float64(deltaCount))
|
||||
gpuAvg.Engines[name] = utils.TwoDecimals(deltaEngine / float64(deltaCount))
|
||||
maxEngineUsage = max(maxEngineUsage, deltaEngine/float64(deltaCount))
|
||||
}
|
||||
return twoDecimals(maxEngineUsage)
|
||||
return utils.TwoDecimals(maxEngineUsage)
|
||||
}
|
||||
|
||||
// updateInstantaneousValues updates values that should reflect current state, not averages
|
||||
func (gm *GPUManager) updateInstantaneousValues(gpuAvg *system.GPUData, gpu *system.GPUData) {
|
||||
gpuAvg.Temperature = twoDecimals(gpu.Temperature)
|
||||
gpuAvg.MemoryUsed = twoDecimals(gpu.MemoryUsed)
|
||||
gpuAvg.MemoryTotal = twoDecimals(gpu.MemoryTotal)
|
||||
gpuAvg.Temperature = utils.TwoDecimals(gpu.Temperature)
|
||||
gpuAvg.MemoryUsed = utils.TwoDecimals(gpu.MemoryUsed)
|
||||
gpuAvg.MemoryTotal = utils.TwoDecimals(gpu.MemoryTotal)
|
||||
}
|
||||
|
||||
// storeSnapshot saves the current GPU state for this cache key
|
||||
@@ -687,7 +688,7 @@ func (gm *GPUManager) resolveLegacyCollectorPriority(caps gpuCapabilities) []col
|
||||
priorities := make([]collectorSource, 0, 4)
|
||||
|
||||
if caps.hasNvidiaSmi && !caps.hasTegrastats {
|
||||
if nvml, _ := GetEnv("NVML"); nvml == "true" {
|
||||
if nvml, _ := utils.GetEnv("NVML"); nvml == "true" {
|
||||
priorities = append(priorities, collectorSourceNVML, collectorSourceNvidiaSMI)
|
||||
} else {
|
||||
priorities = append(priorities, collectorSourceNvidiaSMI)
|
||||
@@ -695,7 +696,7 @@ func (gm *GPUManager) resolveLegacyCollectorPriority(caps gpuCapabilities) []col
|
||||
}
|
||||
|
||||
if caps.hasRocmSmi {
|
||||
if val, _ := GetEnv("AMD_SYSFS"); val == "true" {
|
||||
if val, _ := utils.GetEnv("AMD_SYSFS"); val == "true" {
|
||||
priorities = append(priorities, collectorSourceAmdSysfs)
|
||||
} else {
|
||||
priorities = append(priorities, collectorSourceRocmSMI)
|
||||
@@ -728,7 +729,7 @@ func (gm *GPUManager) resolveLegacyCollectorPriority(caps gpuCapabilities) []col
|
||||
|
||||
// NewGPUManager creates and initializes a new GPUManager
|
||||
func NewGPUManager() (*GPUManager, error) {
|
||||
if skipGPU, _ := GetEnv("SKIP_GPU"); skipGPU == "true" {
|
||||
if skipGPU, _ := utils.GetEnv("SKIP_GPU"); skipGPU == "true" {
|
||||
return nil, nil
|
||||
}
|
||||
var gm GPUManager
|
||||
@@ -745,7 +746,7 @@ func NewGPUManager() (*GPUManager, error) {
|
||||
}
|
||||
|
||||
// if GPU_COLLECTOR is set, start user-defined collectors.
|
||||
if collectorConfig, ok := GetEnv("GPU_COLLECTOR"); ok && strings.TrimSpace(collectorConfig) != "" {
|
||||
if collectorConfig, ok := utils.GetEnv("GPU_COLLECTOR"); ok && strings.TrimSpace(collectorConfig) != "" {
|
||||
priorities := parseCollectorPriority(collectorConfig)
|
||||
if gm.startCollectorsByPriority(priorities, caps) == 0 {
|
||||
return nil, fmt.Errorf("no configured GPU collectors are available")
|
||||
|
||||
@@ -13,6 +13,7 @@ import (
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/henrygd/beszel/agent/utils"
|
||||
"github.com/henrygd/beszel/internal/entities/system"
|
||||
)
|
||||
|
||||
@@ -32,8 +33,8 @@ func (gm *GPUManager) hasAmdSysfs() bool {
|
||||
return false
|
||||
}
|
||||
for _, vendorPath := range cards {
|
||||
vendor, err := os.ReadFile(vendorPath)
|
||||
if err == nil && strings.TrimSpace(string(vendor)) == "0x1002" {
|
||||
vendor, err := utils.ReadStringFileLimited(vendorPath, 64)
|
||||
if err == nil && vendor == "0x1002" {
|
||||
return true
|
||||
}
|
||||
}
|
||||
@@ -87,12 +88,11 @@ func (gm *GPUManager) collectAmdStats() error {
|
||||
|
||||
// isAmdGpu checks whether a DRM card path belongs to AMD vendor ID 0x1002.
|
||||
func isAmdGpu(cardPath string) bool {
|
||||
vendorPath := filepath.Join(cardPath, "device/vendor")
|
||||
vendor, err := os.ReadFile(vendorPath)
|
||||
vendor, err := utils.ReadStringFileLimited(filepath.Join(cardPath, "device/vendor"), 64)
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
return strings.TrimSpace(string(vendor)) == "0x1002"
|
||||
return vendor == "0x1002"
|
||||
}
|
||||
|
||||
// updateAmdGpuData reads GPU metrics from sysfs and updates the GPU data map.
|
||||
@@ -144,8 +144,8 @@ func (gm *GPUManager) updateAmdGpuData(cardPath string) bool {
|
||||
if usageErr == nil {
|
||||
gpu.Usage += usage
|
||||
}
|
||||
gpu.MemoryUsed = bytesToMegabytes(memUsed)
|
||||
gpu.MemoryTotal = bytesToMegabytes(memTotal)
|
||||
gpu.MemoryUsed = utils.BytesToMegabytes(memUsed)
|
||||
gpu.MemoryTotal = utils.BytesToMegabytes(memTotal)
|
||||
gpu.Temperature = temp
|
||||
gpu.Power += power
|
||||
gpu.Count++
|
||||
@@ -154,11 +154,11 @@ func (gm *GPUManager) updateAmdGpuData(cardPath string) bool {
|
||||
|
||||
// readSysfsFloat reads and parses a numeric value from a sysfs file.
|
||||
func readSysfsFloat(path string) (float64, error) {
|
||||
val, err := os.ReadFile(path)
|
||||
val, err := utils.ReadStringFileLimited(path, 64)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
return strconv.ParseFloat(strings.TrimSpace(string(val)), 64)
|
||||
return strconv.ParseFloat(val, 64)
|
||||
}
|
||||
|
||||
// normalizeHexID normalizes hex IDs by trimming spaces, lowercasing, and dropping 0x.
|
||||
@@ -273,16 +273,16 @@ func cacheMissingAmdgpuName(deviceID, revisionID string) {
|
||||
// Falls back to showing the raw device ID if not found in the lookup table.
|
||||
func getAmdGpuName(devicePath string) string {
|
||||
// Try product_name first (works for some enterprise GPUs)
|
||||
if prod, err := os.ReadFile(filepath.Join(devicePath, "product_name")); err == nil {
|
||||
return strings.TrimSpace(string(prod))
|
||||
if prod, err := utils.ReadStringFileLimited(filepath.Join(devicePath, "product_name"), 128); err == nil {
|
||||
return prod
|
||||
}
|
||||
|
||||
// Read PCI device ID and look it up
|
||||
if deviceID, err := os.ReadFile(filepath.Join(devicePath, "device")); err == nil {
|
||||
id := normalizeHexID(string(deviceID))
|
||||
if deviceID, err := utils.ReadStringFileLimited(filepath.Join(devicePath, "device"), 64); err == nil {
|
||||
id := normalizeHexID(deviceID)
|
||||
revision := ""
|
||||
if revBytes, revErr := os.ReadFile(filepath.Join(devicePath, "revision")); revErr == nil {
|
||||
revision = normalizeHexID(string(revBytes))
|
||||
if rev, revErr := utils.ReadStringFileLimited(filepath.Join(devicePath, "revision"), 64); revErr == nil {
|
||||
revision = normalizeHexID(rev)
|
||||
}
|
||||
|
||||
if name, found, done := getCachedAmdgpuName(id, revision); found {
|
||||
|
||||
@@ -7,6 +7,7 @@ import (
|
||||
"path/filepath"
|
||||
"testing"
|
||||
|
||||
"github.com/henrygd/beszel/agent/utils"
|
||||
"github.com/henrygd/beszel/internal/entities/system"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
@@ -128,14 +129,14 @@ func TestUpdateAmdGpuDataWithFakeSysfs(t *testing.T) {
|
||||
{
|
||||
name: "sums vram and gtt when gtt is present",
|
||||
writeGTT: true,
|
||||
wantMemoryUsed: bytesToMegabytes(1073741824 + 536870912),
|
||||
wantMemoryTotal: bytesToMegabytes(2147483648 + 4294967296),
|
||||
wantMemoryUsed: utils.BytesToMegabytes(1073741824 + 536870912),
|
||||
wantMemoryTotal: utils.BytesToMegabytes(2147483648 + 4294967296),
|
||||
},
|
||||
{
|
||||
name: "falls back to vram when gtt is missing",
|
||||
writeGTT: false,
|
||||
wantMemoryUsed: bytesToMegabytes(1073741824),
|
||||
wantMemoryTotal: bytesToMegabytes(2147483648),
|
||||
wantMemoryUsed: utils.BytesToMegabytes(1073741824),
|
||||
wantMemoryTotal: utils.BytesToMegabytes(2147483648),
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
@@ -7,6 +7,7 @@ import (
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/henrygd/beszel/agent/utils"
|
||||
"github.com/henrygd/beszel/internal/entities/system"
|
||||
)
|
||||
|
||||
@@ -52,7 +53,7 @@ func (gm *GPUManager) updateIntelFromStats(sample *intelGpuStats) bool {
|
||||
func (gm *GPUManager) collectIntelStats() (err error) {
|
||||
// Build command arguments, optionally selecting a device via -d
|
||||
args := []string{"-s", intelGpuStatsInterval, "-l"}
|
||||
if dev, ok := GetEnv("INTEL_GPU_DEVICE"); ok && dev != "" {
|
||||
if dev, ok := utils.GetEnv("INTEL_GPU_DEVICE"); ok && dev != "" {
|
||||
args = append(args, "-d", dev)
|
||||
}
|
||||
cmd := exec.Command(intelGpuStatsCmd, args...)
|
||||
|
||||
@@ -9,6 +9,7 @@ import (
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/henrygd/beszel/agent/utils"
|
||||
"github.com/henrygd/beszel/internal/entities/system"
|
||||
)
|
||||
|
||||
@@ -80,10 +81,10 @@ func (gm *GPUManager) updateNvtopSnapshots(snapshots []nvtopSnapshot) bool {
|
||||
gpu.Temperature = parseNvtopNumber(*sample.Temp)
|
||||
}
|
||||
if sample.MemUsed != nil {
|
||||
gpu.MemoryUsed = bytesToMegabytes(parseNvtopNumber(*sample.MemUsed))
|
||||
gpu.MemoryUsed = utils.BytesToMegabytes(parseNvtopNumber(*sample.MemUsed))
|
||||
}
|
||||
if sample.MemTotal != nil {
|
||||
gpu.MemoryTotal = bytesToMegabytes(parseNvtopNumber(*sample.MemTotal))
|
||||
gpu.MemoryTotal = utils.BytesToMegabytes(parseNvtopNumber(*sample.MemTotal))
|
||||
}
|
||||
if sample.GpuUtil != nil {
|
||||
gpu.Usage += parseNvtopNumber(*sample.GpuUtil)
|
||||
|
||||
@@ -10,6 +10,7 @@ import (
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/henrygd/beszel/agent/utils"
|
||||
"github.com/henrygd/beszel/internal/entities/system"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
@@ -265,8 +266,8 @@ func TestParseNvtopData(t *testing.T) {
|
||||
assert.Equal(t, 48.0, g0.Temperature)
|
||||
assert.Equal(t, 5.0, g0.Usage)
|
||||
assert.Equal(t, 13.0, g0.Power)
|
||||
assert.Equal(t, bytesToMegabytes(349372416), g0.MemoryUsed)
|
||||
assert.Equal(t, bytesToMegabytes(4294967296), g0.MemoryTotal)
|
||||
assert.Equal(t, utils.BytesToMegabytes(349372416), g0.MemoryUsed)
|
||||
assert.Equal(t, utils.BytesToMegabytes(4294967296), g0.MemoryTotal)
|
||||
assert.Equal(t, 1.0, g0.Count)
|
||||
|
||||
g1, ok := gm.GpuDataMap["n1"]
|
||||
@@ -275,8 +276,8 @@ func TestParseNvtopData(t *testing.T) {
|
||||
assert.Equal(t, 48.0, g1.Temperature)
|
||||
assert.Equal(t, 12.0, g1.Usage)
|
||||
assert.Equal(t, 9.0, g1.Power)
|
||||
assert.Equal(t, bytesToMegabytes(1213784064), g1.MemoryUsed)
|
||||
assert.Equal(t, bytesToMegabytes(16929173504), g1.MemoryTotal)
|
||||
assert.Equal(t, utils.BytesToMegabytes(1213784064), g1.MemoryUsed)
|
||||
assert.Equal(t, utils.BytesToMegabytes(16929173504), g1.MemoryTotal)
|
||||
assert.Equal(t, 1.0, g1.Count)
|
||||
}
|
||||
|
||||
|
||||
233
agent/mdraid_linux.go
Normal file
233
agent/mdraid_linux.go
Normal file
@@ -0,0 +1,233 @@
|
||||
//go:build linux
|
||||
|
||||
package agent
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/henrygd/beszel/agent/utils"
|
||||
"github.com/henrygd/beszel/internal/entities/smart"
|
||||
)
|
||||
|
||||
// mdraidSysfsRoot is a test hook; production value is "/sys".
|
||||
var mdraidSysfsRoot = "/sys"
|
||||
|
||||
type mdraidHealth struct {
|
||||
level string
|
||||
arrayState string
|
||||
degraded uint64
|
||||
raidDisks uint64
|
||||
syncAction string
|
||||
syncCompleted string
|
||||
syncSpeed string
|
||||
mismatchCnt uint64
|
||||
capacity uint64
|
||||
}
|
||||
|
||||
// scanMdraidDevices discovers Linux md arrays exposed in sysfs.
|
||||
func scanMdraidDevices() []*DeviceInfo {
|
||||
blockDir := filepath.Join(mdraidSysfsRoot, "block")
|
||||
entries, err := os.ReadDir(blockDir)
|
||||
if err != nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
devices := make([]*DeviceInfo, 0, 2)
|
||||
for _, ent := range entries {
|
||||
name := ent.Name()
|
||||
if !isMdraidBlockName(name) {
|
||||
continue
|
||||
}
|
||||
mdDir := filepath.Join(blockDir, name, "md")
|
||||
if !utils.FileExists(filepath.Join(mdDir, "array_state")) {
|
||||
continue
|
||||
}
|
||||
|
||||
devPath := filepath.Join("/dev", name)
|
||||
devices = append(devices, &DeviceInfo{
|
||||
Name: devPath,
|
||||
Type: "mdraid",
|
||||
InfoName: devPath + " [mdraid]",
|
||||
Protocol: "MD",
|
||||
})
|
||||
}
|
||||
|
||||
return devices
|
||||
}
|
||||
|
||||
// collectMdraidHealth reads mdraid health and stores it in SmartDataMap.
|
||||
func (sm *SmartManager) collectMdraidHealth(deviceInfo *DeviceInfo) (bool, error) {
|
||||
if deviceInfo == nil || deviceInfo.Name == "" {
|
||||
return false, nil
|
||||
}
|
||||
|
||||
base := filepath.Base(deviceInfo.Name)
|
||||
if !isMdraidBlockName(base) && !strings.EqualFold(deviceInfo.Type, "mdraid") {
|
||||
return false, nil
|
||||
}
|
||||
|
||||
health, ok := readMdraidHealth(base)
|
||||
if !ok {
|
||||
return false, nil
|
||||
}
|
||||
|
||||
deviceInfo.Type = "mdraid"
|
||||
key := fmt.Sprintf("mdraid:%s", base)
|
||||
status := mdraidSmartStatus(health)
|
||||
|
||||
attrs := make([]*smart.SmartAttribute, 0, 10)
|
||||
if health.arrayState != "" {
|
||||
attrs = append(attrs, &smart.SmartAttribute{Name: "ArrayState", RawString: health.arrayState})
|
||||
}
|
||||
if health.level != "" {
|
||||
attrs = append(attrs, &smart.SmartAttribute{Name: "RaidLevel", RawString: health.level})
|
||||
}
|
||||
if health.raidDisks > 0 {
|
||||
attrs = append(attrs, &smart.SmartAttribute{Name: "RaidDisks", RawValue: health.raidDisks})
|
||||
}
|
||||
if health.degraded > 0 {
|
||||
attrs = append(attrs, &smart.SmartAttribute{Name: "Degraded", RawValue: health.degraded})
|
||||
}
|
||||
if health.syncAction != "" {
|
||||
attrs = append(attrs, &smart.SmartAttribute{Name: "SyncAction", RawString: health.syncAction})
|
||||
}
|
||||
if health.syncCompleted != "" {
|
||||
attrs = append(attrs, &smart.SmartAttribute{Name: "SyncCompleted", RawString: health.syncCompleted})
|
||||
}
|
||||
if health.syncSpeed != "" {
|
||||
attrs = append(attrs, &smart.SmartAttribute{Name: "SyncSpeed", RawString: health.syncSpeed})
|
||||
}
|
||||
if health.mismatchCnt > 0 {
|
||||
attrs = append(attrs, &smart.SmartAttribute{Name: "MismatchCount", RawValue: health.mismatchCnt})
|
||||
}
|
||||
|
||||
sm.Lock()
|
||||
defer sm.Unlock()
|
||||
|
||||
if _, exists := sm.SmartDataMap[key]; !exists {
|
||||
sm.SmartDataMap[key] = &smart.SmartData{}
|
||||
}
|
||||
|
||||
data := sm.SmartDataMap[key]
|
||||
data.ModelName = "Linux MD RAID"
|
||||
if health.level != "" {
|
||||
data.ModelName = "Linux MD RAID (" + health.level + ")"
|
||||
}
|
||||
data.Capacity = health.capacity
|
||||
data.SmartStatus = status
|
||||
data.DiskName = filepath.Join("/dev", base)
|
||||
data.DiskType = "mdraid"
|
||||
data.Attributes = attrs
|
||||
|
||||
return true, nil
|
||||
}
|
||||
|
||||
// readMdraidHealth reads md array health fields from sysfs.
|
||||
func readMdraidHealth(blockName string) (mdraidHealth, bool) {
|
||||
var out mdraidHealth
|
||||
|
||||
if !isMdraidBlockName(blockName) {
|
||||
return out, false
|
||||
}
|
||||
|
||||
mdDir := filepath.Join(mdraidSysfsRoot, "block", blockName, "md")
|
||||
arrayState, okState := utils.ReadStringFileOK(filepath.Join(mdDir, "array_state"))
|
||||
if !okState {
|
||||
return out, false
|
||||
}
|
||||
|
||||
out.arrayState = arrayState
|
||||
out.level = utils.ReadStringFile(filepath.Join(mdDir, "level"))
|
||||
out.syncAction = utils.ReadStringFile(filepath.Join(mdDir, "sync_action"))
|
||||
out.syncCompleted = utils.ReadStringFile(filepath.Join(mdDir, "sync_completed"))
|
||||
out.syncSpeed = utils.ReadStringFile(filepath.Join(mdDir, "sync_speed"))
|
||||
|
||||
if val, ok := utils.ReadUintFile(filepath.Join(mdDir, "raid_disks")); ok {
|
||||
out.raidDisks = val
|
||||
}
|
||||
if val, ok := utils.ReadUintFile(filepath.Join(mdDir, "degraded")); ok {
|
||||
out.degraded = val
|
||||
}
|
||||
if val, ok := utils.ReadUintFile(filepath.Join(mdDir, "mismatch_cnt")); ok {
|
||||
out.mismatchCnt = val
|
||||
}
|
||||
|
||||
if capBytes, ok := readMdraidBlockCapacityBytes(blockName, mdraidSysfsRoot); ok {
|
||||
out.capacity = capBytes
|
||||
}
|
||||
|
||||
return out, true
|
||||
}
|
||||
|
||||
// mdraidSmartStatus maps md state/sync signals to a SMART-like status.
|
||||
func mdraidSmartStatus(health mdraidHealth) string {
|
||||
state := strings.ToLower(strings.TrimSpace(health.arrayState))
|
||||
switch state {
|
||||
case "inactive", "faulty", "broken", "stopped":
|
||||
return "FAILED"
|
||||
}
|
||||
// During rebuild/recovery, arrays are often temporarily degraded; report as
|
||||
// warning instead of hard failure while synchronization is in progress.
|
||||
syncAction := strings.ToLower(strings.TrimSpace(health.syncAction))
|
||||
switch syncAction {
|
||||
case "resync", "recover", "reshape":
|
||||
return "WARNING"
|
||||
}
|
||||
if health.degraded > 0 {
|
||||
return "FAILED"
|
||||
}
|
||||
switch syncAction {
|
||||
case "check", "repair":
|
||||
return "WARNING"
|
||||
}
|
||||
switch state {
|
||||
case "clean", "active", "active-idle", "write-pending", "read-auto", "readonly":
|
||||
return "PASSED"
|
||||
}
|
||||
return "UNKNOWN"
|
||||
}
|
||||
|
||||
// isMdraidBlockName matches /dev/mdN-style block device names.
|
||||
func isMdraidBlockName(name string) bool {
|
||||
if !strings.HasPrefix(name, "md") {
|
||||
return false
|
||||
}
|
||||
suffix := strings.TrimPrefix(name, "md")
|
||||
if suffix == "" {
|
||||
return false
|
||||
}
|
||||
for _, c := range suffix {
|
||||
if c < '0' || c > '9' {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// readMdraidBlockCapacityBytes converts block size metadata into bytes.
|
||||
func readMdraidBlockCapacityBytes(blockName, root string) (uint64, bool) {
|
||||
sizePath := filepath.Join(root, "block", blockName, "size")
|
||||
lbsPath := filepath.Join(root, "block", blockName, "queue", "logical_block_size")
|
||||
|
||||
sizeStr, ok := utils.ReadStringFileOK(sizePath)
|
||||
if !ok {
|
||||
return 0, false
|
||||
}
|
||||
sectors, err := strconv.ParseUint(sizeStr, 10, 64)
|
||||
if err != nil || sectors == 0 {
|
||||
return 0, false
|
||||
}
|
||||
|
||||
logicalBlockSize := uint64(512)
|
||||
if lbsStr, ok := utils.ReadStringFileOK(lbsPath); ok {
|
||||
if parsed, err := strconv.ParseUint(lbsStr, 10, 64); err == nil && parsed > 0 {
|
||||
logicalBlockSize = parsed
|
||||
}
|
||||
}
|
||||
|
||||
return sectors * logicalBlockSize, true
|
||||
}
|
||||
103
agent/mdraid_linux_test.go
Normal file
103
agent/mdraid_linux_test.go
Normal file
@@ -0,0 +1,103 @@
|
||||
//go:build linux
|
||||
|
||||
package agent
|
||||
|
||||
import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
|
||||
"github.com/henrygd/beszel/internal/entities/smart"
|
||||
)
|
||||
|
||||
func TestMdraidMockSysfsScanAndCollect(t *testing.T) {
|
||||
tmp := t.TempDir()
|
||||
prev := mdraidSysfsRoot
|
||||
mdraidSysfsRoot = tmp
|
||||
t.Cleanup(func() { mdraidSysfsRoot = prev })
|
||||
|
||||
mdDir := filepath.Join(tmp, "block", "md0", "md")
|
||||
queueDir := filepath.Join(tmp, "block", "md0", "queue")
|
||||
if err := os.MkdirAll(mdDir, 0o755); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if err := os.MkdirAll(queueDir, 0o755); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
write := func(path, content string) {
|
||||
t.Helper()
|
||||
if err := os.WriteFile(path, []byte(content), 0o644); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
||||
write(filepath.Join(mdDir, "array_state"), "active\n")
|
||||
write(filepath.Join(mdDir, "level"), "raid1\n")
|
||||
write(filepath.Join(mdDir, "raid_disks"), "2\n")
|
||||
write(filepath.Join(mdDir, "degraded"), "0\n")
|
||||
write(filepath.Join(mdDir, "sync_action"), "resync\n")
|
||||
write(filepath.Join(mdDir, "sync_completed"), "10%\n")
|
||||
write(filepath.Join(mdDir, "sync_speed"), "100M\n")
|
||||
write(filepath.Join(mdDir, "mismatch_cnt"), "0\n")
|
||||
write(filepath.Join(queueDir, "logical_block_size"), "512\n")
|
||||
write(filepath.Join(tmp, "block", "md0", "size"), "2048\n")
|
||||
|
||||
devs := scanMdraidDevices()
|
||||
if len(devs) != 1 {
|
||||
t.Fatalf("scanMdraidDevices() = %d devices, want 1", len(devs))
|
||||
}
|
||||
if devs[0].Name != "/dev/md0" || devs[0].Type != "mdraid" {
|
||||
t.Fatalf("scanMdraidDevices()[0] = %+v, want Name=/dev/md0 Type=mdraid", devs[0])
|
||||
}
|
||||
|
||||
sm := &SmartManager{SmartDataMap: map[string]*smart.SmartData{}}
|
||||
ok, err := sm.collectMdraidHealth(devs[0])
|
||||
if err != nil || !ok {
|
||||
t.Fatalf("collectMdraidHealth() = (ok=%v, err=%v), want (true,nil)", ok, err)
|
||||
}
|
||||
if len(sm.SmartDataMap) != 1 {
|
||||
t.Fatalf("SmartDataMap len=%d, want 1", len(sm.SmartDataMap))
|
||||
}
|
||||
var got *smart.SmartData
|
||||
for _, v := range sm.SmartDataMap {
|
||||
got = v
|
||||
break
|
||||
}
|
||||
if got == nil {
|
||||
t.Fatalf("SmartDataMap value nil")
|
||||
}
|
||||
if got.DiskType != "mdraid" || got.DiskName != "/dev/md0" {
|
||||
t.Fatalf("disk fields = (type=%q name=%q), want (mdraid,/dev/md0)", got.DiskType, got.DiskName)
|
||||
}
|
||||
if got.SmartStatus != "WARNING" {
|
||||
t.Fatalf("SmartStatus=%q, want WARNING", got.SmartStatus)
|
||||
}
|
||||
if got.ModelName == "" || got.Capacity == 0 {
|
||||
t.Fatalf("identity fields = (model=%q cap=%d), want non-empty model and cap>0", got.ModelName, got.Capacity)
|
||||
}
|
||||
if len(got.Attributes) < 5 {
|
||||
t.Fatalf("attributes len=%d, want >= 5", len(got.Attributes))
|
||||
}
|
||||
}
|
||||
|
||||
func TestMdraidSmartStatus(t *testing.T) {
|
||||
if got := mdraidSmartStatus(mdraidHealth{arrayState: "inactive"}); got != "FAILED" {
|
||||
t.Fatalf("mdraidSmartStatus(inactive) = %q, want FAILED", got)
|
||||
}
|
||||
if got := mdraidSmartStatus(mdraidHealth{arrayState: "active", degraded: 1, syncAction: "recover"}); got != "WARNING" {
|
||||
t.Fatalf("mdraidSmartStatus(degraded+recover) = %q, want WARNING", got)
|
||||
}
|
||||
if got := mdraidSmartStatus(mdraidHealth{arrayState: "active", degraded: 1}); got != "FAILED" {
|
||||
t.Fatalf("mdraidSmartStatus(degraded) = %q, want FAILED", got)
|
||||
}
|
||||
if got := mdraidSmartStatus(mdraidHealth{arrayState: "active", syncAction: "recover"}); got != "WARNING" {
|
||||
t.Fatalf("mdraidSmartStatus(recover) = %q, want WARNING", got)
|
||||
}
|
||||
if got := mdraidSmartStatus(mdraidHealth{arrayState: "clean"}); got != "PASSED" {
|
||||
t.Fatalf("mdraidSmartStatus(clean) = %q, want PASSED", got)
|
||||
}
|
||||
if got := mdraidSmartStatus(mdraidHealth{arrayState: "unknown"}); got != "UNKNOWN" {
|
||||
t.Fatalf("mdraidSmartStatus(unknown) = %q, want UNKNOWN", got)
|
||||
}
|
||||
}
|
||||
11
agent/mdraid_stub.go
Normal file
11
agent/mdraid_stub.go
Normal file
@@ -0,0 +1,11 @@
|
||||
//go:build !linux
|
||||
|
||||
package agent
|
||||
|
||||
func scanMdraidDevices() []*DeviceInfo {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (sm *SmartManager) collectMdraidHealth(deviceInfo *DeviceInfo) (bool, error) {
|
||||
return false, nil
|
||||
}
|
||||
@@ -8,6 +8,7 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/henrygd/beszel/agent/deltatracker"
|
||||
"github.com/henrygd/beszel/agent/utils"
|
||||
"github.com/henrygd/beszel/internal/entities/system"
|
||||
psutilNet "github.com/shirou/gopsutil/v4/net"
|
||||
)
|
||||
@@ -94,7 +95,7 @@ func (a *Agent) initializeNetIoStats() {
|
||||
a.netInterfaces = make(map[string]struct{}, 0)
|
||||
|
||||
// parse NICS env var for whitelist / blacklist
|
||||
nicsEnvVal, nicsEnvExists := GetEnv("NICS")
|
||||
nicsEnvVal, nicsEnvExists := utils.GetEnv("NICS")
|
||||
var nicCfg *NicConfig
|
||||
if nicsEnvExists {
|
||||
nicCfg = newNicConfig(nicsEnvVal)
|
||||
@@ -103,10 +104,7 @@ func (a *Agent) initializeNetIoStats() {
|
||||
// get current network I/O stats and record valid interfaces
|
||||
if netIO, err := psutilNet.IOCounters(true); err == nil {
|
||||
for _, v := range netIO {
|
||||
if nicsEnvExists && !isValidNic(v.Name, nicCfg) {
|
||||
continue
|
||||
}
|
||||
if a.skipNetworkInterface(v) {
|
||||
if skipNetworkInterface(v, nicCfg) {
|
||||
continue
|
||||
}
|
||||
slog.Info("Detected network interface", "name", v.Name, "sent", v.BytesSent, "recv", v.BytesRecv)
|
||||
@@ -215,10 +213,8 @@ func (a *Agent) applyNetworkTotals(
|
||||
totalBytesSent, totalBytesRecv uint64,
|
||||
bytesSentPerSecond, bytesRecvPerSecond uint64,
|
||||
) {
|
||||
networkSentPs := bytesToMegabytes(float64(bytesSentPerSecond))
|
||||
networkRecvPs := bytesToMegabytes(float64(bytesRecvPerSecond))
|
||||
if networkSentPs > 10_000 || networkRecvPs > 10_000 {
|
||||
slog.Warn("Invalid net stats. Resetting.", "sent", networkSentPs, "recv", networkRecvPs)
|
||||
if bytesSentPerSecond > 10_000_000_000 || bytesRecvPerSecond > 10_000_000_000 {
|
||||
slog.Warn("Invalid net stats. Resetting.", "sent", bytesSentPerSecond, "recv", bytesRecvPerSecond)
|
||||
for _, v := range netIO {
|
||||
if _, exists := a.netInterfaces[v.Name]; !exists {
|
||||
continue
|
||||
@@ -228,21 +224,29 @@ func (a *Agent) applyNetworkTotals(
|
||||
a.initializeNetIoStats()
|
||||
delete(a.netIoStats, cacheTimeMs)
|
||||
delete(a.netInterfaceDeltaTrackers, cacheTimeMs)
|
||||
systemStats.NetworkSent = 0
|
||||
systemStats.NetworkRecv = 0
|
||||
systemStats.Bandwidth[0], systemStats.Bandwidth[1] = 0, 0
|
||||
return
|
||||
}
|
||||
|
||||
systemStats.NetworkSent = networkSentPs
|
||||
systemStats.NetworkRecv = networkRecvPs
|
||||
systemStats.Bandwidth[0], systemStats.Bandwidth[1] = bytesSentPerSecond, bytesRecvPerSecond
|
||||
nis.BytesSent = totalBytesSent
|
||||
nis.BytesRecv = totalBytesRecv
|
||||
a.netIoStats[cacheTimeMs] = nis
|
||||
}
|
||||
|
||||
func (a *Agent) skipNetworkInterface(v psutilNet.IOCountersStat) bool {
|
||||
// skipNetworkInterface returns true if the network interface should be ignored.
|
||||
func skipNetworkInterface(v psutilNet.IOCountersStat, nicCfg *NicConfig) bool {
|
||||
if nicCfg != nil {
|
||||
if !isValidNic(v.Name, nicCfg) {
|
||||
return true
|
||||
}
|
||||
// In whitelist mode, we honor explicit inclusion without auto-filtering.
|
||||
if !nicCfg.isBlacklist {
|
||||
return false
|
||||
}
|
||||
// In blacklist mode, still apply the auto-filter below.
|
||||
}
|
||||
|
||||
switch {
|
||||
case strings.HasPrefix(v.Name, "lo"),
|
||||
strings.HasPrefix(v.Name, "docker"),
|
||||
|
||||
@@ -261,6 +261,39 @@ func TestNewNicConfig(t *testing.T) {
|
||||
})
|
||||
}
|
||||
}
|
||||
func TestSkipNetworkInterface(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
nic psutilNet.IOCountersStat
|
||||
nicCfg *NicConfig
|
||||
expectSkip bool
|
||||
}{
|
||||
{"loopback lo", psutilNet.IOCountersStat{Name: "lo", BytesSent: 100, BytesRecv: 100}, nil, true},
|
||||
{"loopback lo0", psutilNet.IOCountersStat{Name: "lo0", BytesSent: 100, BytesRecv: 100}, nil, true},
|
||||
{"docker prefix", psutilNet.IOCountersStat{Name: "docker0", BytesSent: 100, BytesRecv: 100}, nil, true},
|
||||
{"br- prefix", psutilNet.IOCountersStat{Name: "br-lan", BytesSent: 100, BytesRecv: 100}, nil, true},
|
||||
{"veth prefix", psutilNet.IOCountersStat{Name: "veth0abc", BytesSent: 100, BytesRecv: 100}, nil, true},
|
||||
{"bond prefix", psutilNet.IOCountersStat{Name: "bond0", BytesSent: 100, BytesRecv: 100}, nil, true},
|
||||
{"cali prefix", psutilNet.IOCountersStat{Name: "cali1234", BytesSent: 100, BytesRecv: 100}, nil, true},
|
||||
{"zero BytesRecv", psutilNet.IOCountersStat{Name: "eth0", BytesSent: 100, BytesRecv: 0}, nil, true},
|
||||
{"zero BytesSent", psutilNet.IOCountersStat{Name: "eth0", BytesSent: 0, BytesRecv: 100}, nil, true},
|
||||
{"both zero", psutilNet.IOCountersStat{Name: "eth0", BytesSent: 0, BytesRecv: 0}, nil, true},
|
||||
{"normal eth0", psutilNet.IOCountersStat{Name: "eth0", BytesSent: 100, BytesRecv: 200}, nil, false},
|
||||
{"normal wlan0", psutilNet.IOCountersStat{Name: "wlan0", BytesSent: 1, BytesRecv: 1}, nil, false},
|
||||
{"whitelist overrides skip (docker)", psutilNet.IOCountersStat{Name: "docker0", BytesSent: 100, BytesRecv: 100}, newNicConfig("docker0"), false},
|
||||
{"whitelist overrides skip (lo)", psutilNet.IOCountersStat{Name: "lo", BytesSent: 100, BytesRecv: 100}, newNicConfig("lo"), false},
|
||||
{"whitelist exclusion", psutilNet.IOCountersStat{Name: "eth1", BytesSent: 100, BytesRecv: 100}, newNicConfig("eth0"), true},
|
||||
{"blacklist skip lo", psutilNet.IOCountersStat{Name: "lo", BytesSent: 100, BytesRecv: 100}, newNicConfig("-eth0"), true},
|
||||
{"blacklist explicit eth0", psutilNet.IOCountersStat{Name: "eth0", BytesSent: 100, BytesRecv: 100}, newNicConfig("-eth0"), true},
|
||||
{"blacklist allow eth1", psutilNet.IOCountersStat{Name: "eth1", BytesSent: 100, BytesRecv: 100}, newNicConfig("-eth0"), false},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
assert.Equal(t, tt.expectSkip, skipNetworkInterface(tt.nic, tt.nicCfg))
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestEnsureNetworkInterfacesMap(t *testing.T) {
|
||||
var a Agent
|
||||
var stats system.Stats
|
||||
@@ -383,8 +416,6 @@ func TestApplyNetworkTotals(t *testing.T) {
|
||||
totalBytesSent uint64
|
||||
totalBytesRecv uint64
|
||||
expectReset bool
|
||||
expectedNetworkSent float64
|
||||
expectedNetworkRecv float64
|
||||
expectedBandwidthSent uint64
|
||||
expectedBandwidthRecv uint64
|
||||
}{
|
||||
@@ -395,8 +426,6 @@ func TestApplyNetworkTotals(t *testing.T) {
|
||||
totalBytesSent: 10000000,
|
||||
totalBytesRecv: 20000000,
|
||||
expectReset: false,
|
||||
expectedNetworkSent: 0.95, // ~1 MB/s rounded to 2 decimals
|
||||
expectedNetworkRecv: 1.91, // ~2 MB/s rounded to 2 decimals
|
||||
expectedBandwidthSent: 1000000,
|
||||
expectedBandwidthRecv: 2000000,
|
||||
},
|
||||
@@ -424,18 +453,6 @@ func TestApplyNetworkTotals(t *testing.T) {
|
||||
totalBytesRecv: 20000000,
|
||||
expectReset: true,
|
||||
},
|
||||
{
|
||||
name: "Valid network stats - at threshold boundary",
|
||||
bytesSentPerSecond: 10485750000, // ~9999.99 MB/s (rounds to 9999.99)
|
||||
bytesRecvPerSecond: 10485750000, // ~9999.99 MB/s (rounds to 9999.99)
|
||||
totalBytesSent: 10000000,
|
||||
totalBytesRecv: 20000000,
|
||||
expectReset: false,
|
||||
expectedNetworkSent: 9999.99,
|
||||
expectedNetworkRecv: 9999.99,
|
||||
expectedBandwidthSent: 10485750000,
|
||||
expectedBandwidthRecv: 10485750000,
|
||||
},
|
||||
{
|
||||
name: "Zero values",
|
||||
bytesSentPerSecond: 0,
|
||||
@@ -443,8 +460,6 @@ func TestApplyNetworkTotals(t *testing.T) {
|
||||
totalBytesSent: 0,
|
||||
totalBytesRecv: 0,
|
||||
expectReset: false,
|
||||
expectedNetworkSent: 0.0,
|
||||
expectedNetworkRecv: 0.0,
|
||||
expectedBandwidthSent: 0,
|
||||
expectedBandwidthRecv: 0,
|
||||
},
|
||||
@@ -481,14 +496,10 @@ func TestApplyNetworkTotals(t *testing.T) {
|
||||
// Should have reset network tracking state - maps cleared and stats zeroed
|
||||
assert.NotContains(t, a.netIoStats, cacheTimeMs, "cache entry should be cleared after reset")
|
||||
assert.NotContains(t, a.netInterfaceDeltaTrackers, cacheTimeMs, "tracker should be cleared on reset")
|
||||
assert.Zero(t, systemStats.NetworkSent)
|
||||
assert.Zero(t, systemStats.NetworkRecv)
|
||||
assert.Zero(t, systemStats.Bandwidth[0])
|
||||
assert.Zero(t, systemStats.Bandwidth[1])
|
||||
} else {
|
||||
// Should have applied stats
|
||||
assert.Equal(t, tt.expectedNetworkSent, systemStats.NetworkSent)
|
||||
assert.Equal(t, tt.expectedNetworkRecv, systemStats.NetworkRecv)
|
||||
assert.Equal(t, tt.expectedBandwidthSent, systemStats.Bandwidth[0])
|
||||
assert.Equal(t, tt.expectedBandwidthRecv, systemStats.Bandwidth[1])
|
||||
|
||||
|
||||
@@ -10,6 +10,7 @@ import (
|
||||
"strings"
|
||||
"unicode/utf8"
|
||||
|
||||
"github.com/henrygd/beszel/agent/utils"
|
||||
"github.com/henrygd/beszel/internal/entities/system"
|
||||
|
||||
"github.com/shirou/gopsutil/v4/common"
|
||||
@@ -26,9 +27,9 @@ type SensorConfig struct {
|
||||
}
|
||||
|
||||
func (a *Agent) newSensorConfig() *SensorConfig {
|
||||
primarySensor, _ := GetEnv("PRIMARY_SENSOR")
|
||||
sysSensors, _ := GetEnv("SYS_SENSORS")
|
||||
sensorsEnvVal, sensorsSet := GetEnv("SENSORS")
|
||||
primarySensor, _ := utils.GetEnv("PRIMARY_SENSOR")
|
||||
sysSensors, _ := utils.GetEnv("SYS_SENSORS")
|
||||
sensorsEnvVal, sensorsSet := utils.GetEnv("SENSORS")
|
||||
skipCollection := sensorsSet && sensorsEnvVal == ""
|
||||
|
||||
return a.newSensorConfigWithEnv(primarySensor, sysSensors, sensorsEnvVal, skipCollection)
|
||||
@@ -135,7 +136,7 @@ func (a *Agent) updateTemperatures(systemStats *system.Stats) {
|
||||
case sensorName:
|
||||
a.systemInfo.DashboardTemp = sensor.Temperature
|
||||
}
|
||||
systemStats.Temperatures[sensorName] = twoDecimals(sensor.Temperature)
|
||||
systemStats.Temperatures[sensorName] = utils.TwoDecimals(sensor.Temperature)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -12,6 +12,7 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/henrygd/beszel"
|
||||
"github.com/henrygd/beszel/agent/utils"
|
||||
"github.com/henrygd/beszel/internal/common"
|
||||
"github.com/henrygd/beszel/internal/entities/system"
|
||||
|
||||
@@ -36,7 +37,7 @@ var hubVersions map[string]semver.Version
|
||||
// and begins listening for connections. Returns an error if the server
|
||||
// is already running or if there's an issue starting the server.
|
||||
func (a *Agent) StartServer(opts ServerOptions) error {
|
||||
if disableSSH, _ := GetEnv("DISABLE_SSH"); disableSSH == "true" {
|
||||
if disableSSH, _ := utils.GetEnv("DISABLE_SSH"); disableSSH == "true" {
|
||||
return errors.New("SSH disabled")
|
||||
}
|
||||
if a.server != nil {
|
||||
@@ -238,11 +239,11 @@ func ParseKeys(input string) ([]gossh.PublicKey, error) {
|
||||
// and finally defaults to ":45876".
|
||||
func GetAddress(addr string) string {
|
||||
if addr == "" {
|
||||
addr, _ = GetEnv("LISTEN")
|
||||
addr, _ = utils.GetEnv("LISTEN")
|
||||
}
|
||||
if addr == "" {
|
||||
// Legacy PORT environment variable support
|
||||
addr, _ = GetEnv("PORT")
|
||||
addr, _ = utils.GetEnv("PORT")
|
||||
}
|
||||
if addr == "" {
|
||||
return ":45876"
|
||||
@@ -258,7 +259,7 @@ func GetAddress(addr string) string {
|
||||
// It checks the NETWORK environment variable first, then infers from
|
||||
// the address format: addresses starting with "/" are "unix", others are "tcp".
|
||||
func GetNetwork(addr string) string {
|
||||
if network, ok := GetEnv("NETWORK"); ok && network != "" {
|
||||
if network, ok := utils.GetEnv("NETWORK"); ok && network != "" {
|
||||
return network
|
||||
}
|
||||
if strings.HasPrefix(addr, "/") {
|
||||
|
||||
@@ -18,6 +18,7 @@ import (
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/henrygd/beszel/agent/utils"
|
||||
"github.com/henrygd/beszel/internal/entities/smart"
|
||||
)
|
||||
|
||||
@@ -156,7 +157,7 @@ func (sm *SmartManager) ScanDevices(force bool) error {
|
||||
currentDevices := sm.devicesSnapshot()
|
||||
|
||||
var configuredDevices []*DeviceInfo
|
||||
if configuredRaw, ok := GetEnv("SMART_DEVICES"); ok {
|
||||
if configuredRaw, ok := utils.GetEnv("SMART_DEVICES"); ok {
|
||||
slog.Info("SMART_DEVICES", "value", configuredRaw)
|
||||
config := strings.TrimSpace(configuredRaw)
|
||||
if config == "" {
|
||||
@@ -199,6 +200,13 @@ func (sm *SmartManager) ScanDevices(force bool) error {
|
||||
hasValidScan = true
|
||||
}
|
||||
|
||||
// Add Linux mdraid arrays by reading sysfs health fields. This does not
|
||||
// require smartctl and does not scan the whole device.
|
||||
if raidDevices := scanMdraidDevices(); len(raidDevices) > 0 {
|
||||
scannedDevices = append(scannedDevices, raidDevices...)
|
||||
hasValidScan = true
|
||||
}
|
||||
|
||||
finalDevices := mergeDeviceLists(currentDevices, scannedDevices, configuredDevices)
|
||||
finalDevices = sm.filterExcludedDevices(finalDevices)
|
||||
sm.updateSmartDevices(finalDevices)
|
||||
@@ -215,7 +223,7 @@ func (sm *SmartManager) ScanDevices(force bool) error {
|
||||
}
|
||||
|
||||
func (sm *SmartManager) parseConfiguredDevices(config string) ([]*DeviceInfo, error) {
|
||||
splitChar := os.Getenv("SMART_DEVICES_SEPARATOR")
|
||||
splitChar, _ := utils.GetEnv("SMART_DEVICES_SEPARATOR")
|
||||
if splitChar == "" {
|
||||
splitChar = ","
|
||||
}
|
||||
@@ -253,7 +261,7 @@ func (sm *SmartManager) parseConfiguredDevices(config string) ([]*DeviceInfo, er
|
||||
}
|
||||
|
||||
func (sm *SmartManager) refreshExcludedDevices() {
|
||||
rawValue, _ := GetEnv("EXCLUDE_SMART")
|
||||
rawValue, _ := utils.GetEnv("EXCLUDE_SMART")
|
||||
sm.excludedDevices = make(map[string]struct{})
|
||||
|
||||
for entry := range strings.SplitSeq(rawValue, ",") {
|
||||
@@ -450,6 +458,12 @@ func (sm *SmartManager) CollectSmart(deviceInfo *DeviceInfo) error {
|
||||
return errNoValidSmartData
|
||||
}
|
||||
|
||||
// mdraid health is not exposed via SMART; Linux exposes array state in sysfs.
|
||||
if deviceInfo != nil {
|
||||
if ok, err := sm.collectMdraidHealth(deviceInfo); ok {
|
||||
return err
|
||||
}
|
||||
}
|
||||
// eMMC health is not exposed via SMART on Linux, but the kernel provides
|
||||
// wear / EOL indicators via sysfs. Prefer that path when available.
|
||||
if deviceInfo != nil {
|
||||
@@ -857,15 +871,18 @@ func (sm *SmartManager) parseSmartForSata(output []byte) (bool, int) {
|
||||
smartData.FirmwareVersion = data.FirmwareVersion
|
||||
smartData.Capacity = data.UserCapacity.Bytes
|
||||
smartData.Temperature = data.Temperature.Current
|
||||
if smartData.Temperature == 0 {
|
||||
if temp, ok := temperatureFromAtaDeviceStatistics(data.AtaDeviceStatistics); ok {
|
||||
smartData.Temperature = temp
|
||||
}
|
||||
}
|
||||
smartData.SmartStatus = getSmartStatus(smartData.Temperature, data.SmartStatus.Passed)
|
||||
smartData.DiskName = data.Device.Name
|
||||
smartData.DiskType = data.Device.Type
|
||||
|
||||
// get values from ata_device_statistics if necessary
|
||||
var ataDeviceStats smart.AtaDeviceStatistics
|
||||
if smartData.Temperature == 0 {
|
||||
if temp := findAtaDeviceStatisticsValue(&data, &ataDeviceStats, 5, "Current Temperature", 0, 255); temp != nil {
|
||||
smartData.Temperature = uint8(*temp)
|
||||
}
|
||||
}
|
||||
|
||||
// update SmartAttributes
|
||||
smartData.Attributes = make([]*smart.SmartAttribute, 0, len(data.AtaSmartAttributes.Table))
|
||||
for _, attr := range data.AtaSmartAttributes.Table {
|
||||
@@ -900,23 +917,20 @@ func getSmartStatus(temperature uint8, passed bool) string {
|
||||
}
|
||||
}
|
||||
|
||||
func temperatureFromAtaDeviceStatistics(stats smart.AtaDeviceStatistics) (uint8, bool) {
|
||||
entry := findAtaDeviceStatisticsEntry(stats, 5, "Current Temperature")
|
||||
if entry == nil || entry.Value == nil {
|
||||
return 0, false
|
||||
}
|
||||
if *entry.Value > 255 {
|
||||
return 0, false
|
||||
}
|
||||
return uint8(*entry.Value), true
|
||||
}
|
||||
|
||||
// findAtaDeviceStatisticsEntry centralizes ATA devstat lookups so additional
|
||||
// metrics can be pulled from the same structure in the future.
|
||||
func findAtaDeviceStatisticsEntry(stats smart.AtaDeviceStatistics, pageNumber uint8, entryName string) *smart.AtaDeviceStatisticsEntry {
|
||||
for pageIdx := range stats.Pages {
|
||||
page := &stats.Pages[pageIdx]
|
||||
if page.Number != pageNumber {
|
||||
func findAtaDeviceStatisticsValue(data *smart.SmartInfoForSata, ataDeviceStats *smart.AtaDeviceStatistics, entryNumber uint8, entryName string, minValue, maxValue int64) *int64 {
|
||||
if len(ataDeviceStats.Pages) == 0 {
|
||||
if len(data.AtaDeviceStatistics) == 0 {
|
||||
return nil
|
||||
}
|
||||
if err := json.Unmarshal(data.AtaDeviceStatistics, ataDeviceStats); err != nil {
|
||||
return nil
|
||||
}
|
||||
}
|
||||
for pageIdx := range ataDeviceStats.Pages {
|
||||
page := &ataDeviceStats.Pages[pageIdx]
|
||||
if page.Number != entryNumber {
|
||||
continue
|
||||
}
|
||||
for entryIdx := range page.Table {
|
||||
@@ -924,7 +938,10 @@ func findAtaDeviceStatisticsEntry(stats smart.AtaDeviceStatistics, pageNumber ui
|
||||
if !strings.EqualFold(entry.Name, entryName) {
|
||||
continue
|
||||
}
|
||||
return entry
|
||||
if entry.Value == nil || *entry.Value < minValue || *entry.Value > maxValue {
|
||||
return nil
|
||||
}
|
||||
return entry.Value
|
||||
}
|
||||
}
|
||||
return nil
|
||||
@@ -1146,9 +1163,11 @@ func NewSmartManager() (*SmartManager, error) {
|
||||
slog.Debug("smartctl", "path", path, "err", err)
|
||||
if err != nil {
|
||||
// Keep the previous fail-fast behavior unless this Linux host exposes
|
||||
// eMMC health via sysfs, in which case smartctl is optional.
|
||||
if runtime.GOOS == "linux" && len(scanEmmcDevices()) > 0 {
|
||||
return sm, nil
|
||||
// eMMC or mdraid health via sysfs, in which case smartctl is optional.
|
||||
if runtime.GOOS == "linux" {
|
||||
if len(scanEmmcDevices()) > 0 || len(scanMdraidDevices()) > 0 {
|
||||
return sm, nil
|
||||
}
|
||||
}
|
||||
return nil, err
|
||||
}
|
||||
|
||||
@@ -121,6 +121,78 @@ func TestParseSmartForSataDeviceStatisticsTemperature(t *testing.T) {
|
||||
assert.Equal(t, uint8(22), deviceData.Temperature)
|
||||
}
|
||||
|
||||
func TestParseSmartForSataAtaDeviceStatistics(t *testing.T) {
|
||||
// tests that ata_device_statistics values are parsed correctly
|
||||
jsonPayload := []byte(`{
|
||||
"smartctl": {"exit_status": 0},
|
||||
"device": {"name": "/dev/sdb", "type": "sat"},
|
||||
"model_name": "SanDisk SSD U110 16GB",
|
||||
"serial_number": "lksjfh23lhj",
|
||||
"firmware_version": "U21B001",
|
||||
"user_capacity": {"bytes": 16013942784},
|
||||
"smart_status": {"passed": true},
|
||||
"ata_smart_attributes": {"table": []},
|
||||
"ata_device_statistics": {
|
||||
"pages": [
|
||||
{
|
||||
"number": 5,
|
||||
"name": "Temperature Statistics",
|
||||
"table": [
|
||||
{"name": "Current Temperature", "value": 43, "flags": {"valid": true}},
|
||||
{"name": "Specified Minimum Operating Temperature", "value": -20, "flags": {"valid": true}}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
}`)
|
||||
|
||||
sm := &SmartManager{SmartDataMap: make(map[string]*smart.SmartData)}
|
||||
hasData, exitStatus := sm.parseSmartForSata(jsonPayload)
|
||||
require.True(t, hasData)
|
||||
assert.Equal(t, 0, exitStatus)
|
||||
|
||||
deviceData, ok := sm.SmartDataMap["lksjfh23lhj"]
|
||||
require.True(t, ok, "expected smart data entry for serial lksjfh23lhj")
|
||||
assert.Equal(t, uint8(43), deviceData.Temperature)
|
||||
}
|
||||
|
||||
func TestParseSmartForSataNegativeDeviceStatistics(t *testing.T) {
|
||||
// Tests that negative values in ata_device_statistics (e.g. min operating temp)
|
||||
// do not cause the entire SAT parser to fail.
|
||||
jsonPayload := []byte(`{
|
||||
"smartctl": {"exit_status": 0},
|
||||
"device": {"name": "/dev/sdb", "type": "sat"},
|
||||
"model_name": "SanDisk SSD U110 16GB",
|
||||
"serial_number": "NEGATIVE123",
|
||||
"firmware_version": "U21B001",
|
||||
"user_capacity": {"bytes": 16013942784},
|
||||
"smart_status": {"passed": true},
|
||||
"temperature": {"current": 38},
|
||||
"ata_smart_attributes": {"table": []},
|
||||
"ata_device_statistics": {
|
||||
"pages": [
|
||||
{
|
||||
"number": 5,
|
||||
"name": "Temperature Statistics",
|
||||
"table": [
|
||||
{"name": "Current Temperature", "value": 38, "flags": {"valid": true}},
|
||||
{"name": "Specified Minimum Operating Temperature", "value": -20, "flags": {"valid": true}}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
}`)
|
||||
|
||||
sm := &SmartManager{SmartDataMap: make(map[string]*smart.SmartData)}
|
||||
hasData, exitStatus := sm.parseSmartForSata(jsonPayload)
|
||||
require.True(t, hasData)
|
||||
assert.Equal(t, 0, exitStatus)
|
||||
|
||||
deviceData, ok := sm.SmartDataMap["NEGATIVE123"]
|
||||
require.True(t, ok, "expected smart data entry for serial NEGATIVE123")
|
||||
assert.Equal(t, uint8(38), deviceData.Temperature)
|
||||
}
|
||||
|
||||
func TestParseSmartForSataParentheticalRawValue(t *testing.T) {
|
||||
jsonPayload := []byte(`{
|
||||
"smartctl": {"exit_status": 0},
|
||||
@@ -727,6 +799,182 @@ func TestIsVirtualDeviceScsi(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestFindAtaDeviceStatisticsValue(t *testing.T) {
|
||||
val42 := int64(42)
|
||||
val100 := int64(100)
|
||||
valMinus20 := int64(-20)
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
data smart.SmartInfoForSata
|
||||
ataDeviceStats smart.AtaDeviceStatistics
|
||||
entryNumber uint8
|
||||
entryName string
|
||||
minValue int64
|
||||
maxValue int64
|
||||
expectedValue *int64
|
||||
}{
|
||||
{
|
||||
name: "value in ataDeviceStats",
|
||||
ataDeviceStats: smart.AtaDeviceStatistics{
|
||||
Pages: []smart.AtaDeviceStatisticsPage{
|
||||
{
|
||||
Number: 5,
|
||||
Table: []smart.AtaDeviceStatisticsEntry{
|
||||
{Name: "Current Temperature", Value: &val42},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
entryNumber: 5,
|
||||
entryName: "Current Temperature",
|
||||
minValue: 0,
|
||||
maxValue: 100,
|
||||
expectedValue: &val42,
|
||||
},
|
||||
{
|
||||
name: "value unmarshaled from data",
|
||||
data: smart.SmartInfoForSata{
|
||||
AtaDeviceStatistics: []byte(`{"pages":[{"number":5,"table":[{"name":"Current Temperature","value":100}]}]}`),
|
||||
},
|
||||
entryNumber: 5,
|
||||
entryName: "Current Temperature",
|
||||
minValue: 0,
|
||||
maxValue: 255,
|
||||
expectedValue: &val100,
|
||||
},
|
||||
{
|
||||
name: "value out of range (too high)",
|
||||
ataDeviceStats: smart.AtaDeviceStatistics{
|
||||
Pages: []smart.AtaDeviceStatisticsPage{
|
||||
{
|
||||
Number: 5,
|
||||
Table: []smart.AtaDeviceStatisticsEntry{
|
||||
{Name: "Current Temperature", Value: &val100},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
entryNumber: 5,
|
||||
entryName: "Current Temperature",
|
||||
minValue: 0,
|
||||
maxValue: 50,
|
||||
expectedValue: nil,
|
||||
},
|
||||
{
|
||||
name: "value out of range (too low)",
|
||||
ataDeviceStats: smart.AtaDeviceStatistics{
|
||||
Pages: []smart.AtaDeviceStatisticsPage{
|
||||
{
|
||||
Number: 5,
|
||||
Table: []smart.AtaDeviceStatisticsEntry{
|
||||
{Name: "Min Temp", Value: &valMinus20},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
entryNumber: 5,
|
||||
entryName: "Min Temp",
|
||||
minValue: 0,
|
||||
maxValue: 100,
|
||||
expectedValue: nil,
|
||||
},
|
||||
{
|
||||
name: "no statistics available",
|
||||
data: smart.SmartInfoForSata{},
|
||||
entryNumber: 5,
|
||||
entryName: "Current Temperature",
|
||||
minValue: 0,
|
||||
maxValue: 255,
|
||||
expectedValue: nil,
|
||||
},
|
||||
{
|
||||
name: "wrong page number",
|
||||
ataDeviceStats: smart.AtaDeviceStatistics{
|
||||
Pages: []smart.AtaDeviceStatisticsPage{
|
||||
{
|
||||
Number: 1,
|
||||
Table: []smart.AtaDeviceStatisticsEntry{
|
||||
{Name: "Current Temperature", Value: &val42},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
entryNumber: 5,
|
||||
entryName: "Current Temperature",
|
||||
minValue: 0,
|
||||
maxValue: 100,
|
||||
expectedValue: nil,
|
||||
},
|
||||
{
|
||||
name: "wrong entry name",
|
||||
ataDeviceStats: smart.AtaDeviceStatistics{
|
||||
Pages: []smart.AtaDeviceStatisticsPage{
|
||||
{
|
||||
Number: 5,
|
||||
Table: []smart.AtaDeviceStatisticsEntry{
|
||||
{Name: "Other Stat", Value: &val42},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
entryNumber: 5,
|
||||
entryName: "Current Temperature",
|
||||
minValue: 0,
|
||||
maxValue: 100,
|
||||
expectedValue: nil,
|
||||
},
|
||||
{
|
||||
name: "case insensitive name match",
|
||||
ataDeviceStats: smart.AtaDeviceStatistics{
|
||||
Pages: []smart.AtaDeviceStatisticsPage{
|
||||
{
|
||||
Number: 5,
|
||||
Table: []smart.AtaDeviceStatisticsEntry{
|
||||
{Name: "CURRENT TEMPERATURE", Value: &val42},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
entryNumber: 5,
|
||||
entryName: "Current Temperature",
|
||||
minValue: 0,
|
||||
maxValue: 100,
|
||||
expectedValue: &val42,
|
||||
},
|
||||
{
|
||||
name: "entry value is nil",
|
||||
ataDeviceStats: smart.AtaDeviceStatistics{
|
||||
Pages: []smart.AtaDeviceStatisticsPage{
|
||||
{
|
||||
Number: 5,
|
||||
Table: []smart.AtaDeviceStatisticsEntry{
|
||||
{Name: "Current Temperature", Value: nil},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
entryNumber: 5,
|
||||
entryName: "Current Temperature",
|
||||
minValue: 0,
|
||||
maxValue: 100,
|
||||
expectedValue: nil,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
result := findAtaDeviceStatisticsValue(&tt.data, &tt.ataDeviceStats, tt.entryNumber, tt.entryName, tt.minValue, tt.maxValue)
|
||||
if tt.expectedValue == nil {
|
||||
assert.Nil(t, result)
|
||||
} else {
|
||||
require.NotNil(t, result)
|
||||
assert.Equal(t, *tt.expectedValue, *result)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestRefreshExcludedDevices(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
|
||||
@@ -7,12 +7,13 @@ import (
|
||||
"log/slog"
|
||||
"os"
|
||||
"runtime"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/henrygd/beszel"
|
||||
"github.com/henrygd/beszel/agent/battery"
|
||||
"github.com/henrygd/beszel/agent/utils"
|
||||
"github.com/henrygd/beszel/agent/zfs"
|
||||
"github.com/henrygd/beszel/internal/entities/container"
|
||||
"github.com/henrygd/beszel/internal/entities/system"
|
||||
|
||||
@@ -107,7 +108,7 @@ func (a *Agent) refreshSystemDetails() {
|
||||
}
|
||||
|
||||
// zfs
|
||||
if _, err := getARCSize(); err != nil {
|
||||
if _, err := zfs.ARCSize(); err != nil {
|
||||
slog.Debug("Not monitoring ZFS ARC", "err", err)
|
||||
} else {
|
||||
a.zfs = true
|
||||
@@ -127,13 +128,13 @@ func (a *Agent) getSystemStats(cacheTimeMs uint16) system.Stats {
|
||||
// cpu metrics
|
||||
cpuMetrics, err := getCpuMetrics(cacheTimeMs)
|
||||
if err == nil {
|
||||
systemStats.Cpu = twoDecimals(cpuMetrics.Total)
|
||||
systemStats.Cpu = utils.TwoDecimals(cpuMetrics.Total)
|
||||
systemStats.CpuBreakdown = []float64{
|
||||
twoDecimals(cpuMetrics.User),
|
||||
twoDecimals(cpuMetrics.System),
|
||||
twoDecimals(cpuMetrics.Iowait),
|
||||
twoDecimals(cpuMetrics.Steal),
|
||||
twoDecimals(cpuMetrics.Idle),
|
||||
utils.TwoDecimals(cpuMetrics.User),
|
||||
utils.TwoDecimals(cpuMetrics.System),
|
||||
utils.TwoDecimals(cpuMetrics.Iowait),
|
||||
utils.TwoDecimals(cpuMetrics.Steal),
|
||||
utils.TwoDecimals(cpuMetrics.Idle),
|
||||
}
|
||||
} else {
|
||||
slog.Error("Error getting cpu metrics", "err", err)
|
||||
@@ -157,8 +158,8 @@ func (a *Agent) getSystemStats(cacheTimeMs uint16) system.Stats {
|
||||
// memory
|
||||
if v, err := mem.VirtualMemory(); err == nil {
|
||||
// swap
|
||||
systemStats.Swap = bytesToGigabytes(v.SwapTotal)
|
||||
systemStats.SwapUsed = bytesToGigabytes(v.SwapTotal - v.SwapFree - v.SwapCached)
|
||||
systemStats.Swap = utils.BytesToGigabytes(v.SwapTotal)
|
||||
systemStats.SwapUsed = utils.BytesToGigabytes(v.SwapTotal - v.SwapFree - v.SwapCached)
|
||||
// cache + buffers value for default mem calculation
|
||||
// note: gopsutil automatically adds SReclaimable to v.Cached
|
||||
cacheBuff := v.Cached + v.Buffers - v.Shared
|
||||
@@ -178,16 +179,16 @@ func (a *Agent) getSystemStats(cacheTimeMs uint16) system.Stats {
|
||||
// }
|
||||
// subtract ZFS ARC size from used memory and add as its own category
|
||||
if a.zfs {
|
||||
if arcSize, _ := getARCSize(); arcSize > 0 && arcSize < v.Used {
|
||||
if arcSize, _ := zfs.ARCSize(); arcSize > 0 && arcSize < v.Used {
|
||||
v.Used = v.Used - arcSize
|
||||
v.UsedPercent = float64(v.Used) / float64(v.Total) * 100.0
|
||||
systemStats.MemZfsArc = bytesToGigabytes(arcSize)
|
||||
systemStats.MemZfsArc = utils.BytesToGigabytes(arcSize)
|
||||
}
|
||||
}
|
||||
systemStats.Mem = bytesToGigabytes(v.Total)
|
||||
systemStats.MemBuffCache = bytesToGigabytes(cacheBuff)
|
||||
systemStats.MemUsed = bytesToGigabytes(v.Used)
|
||||
systemStats.MemPct = twoDecimals(v.UsedPercent)
|
||||
systemStats.Mem = utils.BytesToGigabytes(v.Total)
|
||||
systemStats.MemBuffCache = utils.BytesToGigabytes(cacheBuff)
|
||||
systemStats.MemUsed = utils.BytesToGigabytes(v.Used)
|
||||
systemStats.MemPct = utils.TwoDecimals(v.UsedPercent)
|
||||
}
|
||||
|
||||
// disk usage
|
||||
@@ -250,32 +251,6 @@ func (a *Agent) getSystemStats(cacheTimeMs uint16) system.Stats {
|
||||
return systemStats
|
||||
}
|
||||
|
||||
// Returns the size of the ZFS ARC memory cache in bytes
|
||||
func getARCSize() (uint64, error) {
|
||||
file, err := os.Open("/proc/spl/kstat/zfs/arcstats")
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
defer file.Close()
|
||||
|
||||
// Scan the lines
|
||||
scanner := bufio.NewScanner(file)
|
||||
for scanner.Scan() {
|
||||
line := scanner.Text()
|
||||
if strings.HasPrefix(line, "size") {
|
||||
// Example line: size 4 15032385536
|
||||
fields := strings.Fields(line)
|
||||
if len(fields) < 3 {
|
||||
return 0, err
|
||||
}
|
||||
// Return the size as uint64
|
||||
return strconv.ParseUint(fields[2], 10, 64)
|
||||
}
|
||||
}
|
||||
|
||||
return 0, fmt.Errorf("failed to parse size field")
|
||||
}
|
||||
|
||||
// getOsPrettyName attempts to get the pretty OS name from /etc/os-release on Linux systems
|
||||
func getOsPrettyName() (string, error) {
|
||||
file, err := os.Open("/etc/os-release")
|
||||
|
||||
@@ -15,6 +15,7 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/coreos/go-systemd/v22/dbus"
|
||||
"github.com/henrygd/beszel/agent/utils"
|
||||
"github.com/henrygd/beszel/internal/entities/systemd"
|
||||
)
|
||||
|
||||
@@ -49,7 +50,7 @@ func isSystemdAvailable() bool {
|
||||
|
||||
// newSystemdManager creates a new systemdManager.
|
||||
func newSystemdManager() (*systemdManager, error) {
|
||||
if skipSystemd, _ := GetEnv("SKIP_SYSTEMD"); skipSystemd == "true" {
|
||||
if skipSystemd, _ := utils.GetEnv("SKIP_SYSTEMD"); skipSystemd == "true" {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
@@ -294,13 +295,13 @@ func unescapeServiceName(name string) string {
|
||||
// otherwise defaults to "*service".
|
||||
func getServicePatterns() []string {
|
||||
patterns := []string{}
|
||||
if envPatterns, _ := GetEnv("SERVICE_PATTERNS"); envPatterns != "" {
|
||||
if envPatterns, _ := utils.GetEnv("SERVICE_PATTERNS"); envPatterns != "" {
|
||||
for pattern := range strings.SplitSeq(envPatterns, ",") {
|
||||
pattern = strings.TrimSpace(pattern)
|
||||
if pattern == "" {
|
||||
continue
|
||||
}
|
||||
if !strings.HasSuffix(pattern, ".service") {
|
||||
if !strings.HasSuffix(pattern, "timer") && !strings.HasSuffix(pattern, ".service") {
|
||||
pattern += ".service"
|
||||
}
|
||||
patterns = append(patterns, pattern)
|
||||
|
||||
@@ -156,6 +156,13 @@ func TestGetServicePatterns(t *testing.T) {
|
||||
expected: []string{"*nginx*.service", "*apache*.service"},
|
||||
cleanupEnvVars: true,
|
||||
},
|
||||
{
|
||||
name: "opt into timer monitoring",
|
||||
prefixedEnv: "nginx.service,docker,apache.timer",
|
||||
unprefixedEnv: "",
|
||||
expected: []string{"nginx.service", "docker.service", "apache.timer"},
|
||||
cleanupEnvVars: true,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
|
||||
@@ -1,15 +0,0 @@
|
||||
package agent
|
||||
|
||||
import "math"
|
||||
|
||||
func bytesToMegabytes(b float64) float64 {
|
||||
return twoDecimals(b / 1048576)
|
||||
}
|
||||
|
||||
func bytesToGigabytes(b uint64) float64 {
|
||||
return twoDecimals(float64(b) / 1073741824)
|
||||
}
|
||||
|
||||
func twoDecimals(value float64) float64 {
|
||||
return math.Round(value*100) / 100
|
||||
}
|
||||
88
agent/utils/utils.go
Normal file
88
agent/utils/utils.go
Normal file
@@ -0,0 +1,88 @@
|
||||
package utils
|
||||
|
||||
import (
|
||||
"io"
|
||||
"math"
|
||||
"os"
|
||||
"strconv"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// GetEnv retrieves an environment variable with a "BESZEL_AGENT_" prefix, or falls back to the unprefixed key.
|
||||
func GetEnv(key string) (value string, exists bool) {
|
||||
if value, exists = os.LookupEnv("BESZEL_AGENT_" + key); exists {
|
||||
return value, exists
|
||||
}
|
||||
return os.LookupEnv(key)
|
||||
}
|
||||
|
||||
// BytesToMegabytes converts bytes to megabytes and rounds to two decimal places.
|
||||
func BytesToMegabytes(b float64) float64 {
|
||||
return TwoDecimals(b / 1048576)
|
||||
}
|
||||
|
||||
// BytesToGigabytes converts bytes to gigabytes and rounds to two decimal places.
|
||||
func BytesToGigabytes(b uint64) float64 {
|
||||
return TwoDecimals(float64(b) / 1073741824)
|
||||
}
|
||||
|
||||
// TwoDecimals rounds a float64 value to two decimal places.
|
||||
func TwoDecimals(value float64) float64 {
|
||||
return math.Round(value*100) / 100
|
||||
}
|
||||
|
||||
// func RoundFloat(val float64, precision uint) float64 {
|
||||
// ratio := math.Pow(10, float64(precision))
|
||||
// return math.Round(val*ratio) / ratio
|
||||
// }
|
||||
|
||||
// ReadStringFile returns trimmed file contents or empty string on error.
|
||||
func ReadStringFile(path string) string {
|
||||
content, _ := ReadStringFileOK(path)
|
||||
return content
|
||||
}
|
||||
|
||||
// ReadStringFileOK returns trimmed file contents and read success.
|
||||
func ReadStringFileOK(path string) (string, bool) {
|
||||
b, err := os.ReadFile(path)
|
||||
if err != nil {
|
||||
return "", false
|
||||
}
|
||||
return strings.TrimSpace(string(b)), true
|
||||
}
|
||||
|
||||
// ReadStringFileLimited reads a file into a string with a maximum size (in bytes) to avoid
|
||||
// allocating large buffers and potential panics with pseudo-files when the size is misreported.
|
||||
func ReadStringFileLimited(path string, maxSize int) (string, error) {
|
||||
f, err := os.Open(path)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
buf := make([]byte, maxSize)
|
||||
n, err := f.Read(buf)
|
||||
if err != nil && err != io.EOF {
|
||||
return "", err
|
||||
}
|
||||
return strings.TrimSpace(string(buf[:n])), nil
|
||||
}
|
||||
|
||||
// FileExists reports whether the given path exists.
|
||||
func FileExists(path string) bool {
|
||||
_, err := os.Stat(path)
|
||||
return err == nil
|
||||
}
|
||||
|
||||
// ReadUintFile parses a decimal uint64 value from a file.
|
||||
func ReadUintFile(path string) (uint64, bool) {
|
||||
raw, ok := ReadStringFileOK(path)
|
||||
if !ok {
|
||||
return 0, false
|
||||
}
|
||||
parsed, err := strconv.ParseUint(raw, 10, 64)
|
||||
if err != nil {
|
||||
return 0, false
|
||||
}
|
||||
return parsed, true
|
||||
}
|
||||
165
agent/utils/utils_test.go
Normal file
165
agent/utils/utils_test.go
Normal file
@@ -0,0 +1,165 @@
|
||||
package utils
|
||||
|
||||
import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
func TestTwoDecimals(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
input float64
|
||||
expected float64
|
||||
}{
|
||||
{"round down", 1.234, 1.23},
|
||||
{"round half up", 1.235, 1.24}, // math.Round rounds half up
|
||||
{"no rounding needed", 1.23, 1.23},
|
||||
{"negative number", -1.235, -1.24}, // math.Round rounds half up (more negative)
|
||||
{"zero", 0.0, 0.0},
|
||||
{"large number", 123.456, 123.46}, // rounds 5 up
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
result := TwoDecimals(tt.input)
|
||||
assert.Equal(t, tt.expected, result)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestBytesToMegabytes(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
input float64
|
||||
expected float64
|
||||
}{
|
||||
{"1 MB", 1048576, 1.0},
|
||||
{"512 KB", 524288, 0.5},
|
||||
{"zero", 0, 0},
|
||||
{"large value", 1073741824, 1024}, // 1 GB = 1024 MB
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
result := BytesToMegabytes(tt.input)
|
||||
assert.Equal(t, tt.expected, result)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestBytesToGigabytes(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
input uint64
|
||||
expected float64
|
||||
}{
|
||||
{"1 GB", 1073741824, 1.0},
|
||||
{"512 MB", 536870912, 0.5},
|
||||
{"0 GB", 0, 0},
|
||||
{"2 GB", 2147483648, 2.0},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
result := BytesToGigabytes(tt.input)
|
||||
assert.Equal(t, tt.expected, result)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestFileFunctions(t *testing.T) {
|
||||
tmpDir := t.TempDir()
|
||||
testFilePath := filepath.Join(tmpDir, "test.txt")
|
||||
testContent := "hello world"
|
||||
|
||||
// Test FileExists (false)
|
||||
assert.False(t, FileExists(testFilePath))
|
||||
|
||||
// Test ReadStringFileOK (false)
|
||||
content, ok := ReadStringFileOK(testFilePath)
|
||||
assert.False(t, ok)
|
||||
assert.Empty(t, content)
|
||||
|
||||
// Test ReadStringFile (empty)
|
||||
assert.Empty(t, ReadStringFile(testFilePath))
|
||||
|
||||
// Write file
|
||||
err := os.WriteFile(testFilePath, []byte(testContent+"\n "), 0644)
|
||||
assert.NoError(t, err)
|
||||
|
||||
// Test FileExists (true)
|
||||
assert.True(t, FileExists(testFilePath))
|
||||
|
||||
// Test ReadStringFileOK (true)
|
||||
content, ok = ReadStringFileOK(testFilePath)
|
||||
assert.True(t, ok)
|
||||
assert.Equal(t, testContent, content)
|
||||
|
||||
// Test ReadStringFile (content)
|
||||
assert.Equal(t, testContent, ReadStringFile(testFilePath))
|
||||
}
|
||||
|
||||
func TestReadUintFile(t *testing.T) {
|
||||
tmpDir := t.TempDir()
|
||||
|
||||
t.Run("valid uint", func(t *testing.T) {
|
||||
path := filepath.Join(tmpDir, "uint.txt")
|
||||
os.WriteFile(path, []byte(" 12345\n"), 0644)
|
||||
val, ok := ReadUintFile(path)
|
||||
assert.True(t, ok)
|
||||
assert.Equal(t, uint64(12345), val)
|
||||
})
|
||||
|
||||
t.Run("invalid uint", func(t *testing.T) {
|
||||
path := filepath.Join(tmpDir, "invalid.txt")
|
||||
os.WriteFile(path, []byte("abc"), 0644)
|
||||
val, ok := ReadUintFile(path)
|
||||
assert.False(t, ok)
|
||||
assert.Equal(t, uint64(0), val)
|
||||
})
|
||||
|
||||
t.Run("missing file", func(t *testing.T) {
|
||||
path := filepath.Join(tmpDir, "missing.txt")
|
||||
val, ok := ReadUintFile(path)
|
||||
assert.False(t, ok)
|
||||
assert.Equal(t, uint64(0), val)
|
||||
})
|
||||
}
|
||||
|
||||
func TestGetEnv(t *testing.T) {
|
||||
key := "TEST_VAR"
|
||||
prefixedKey := "BESZEL_AGENT_" + key
|
||||
|
||||
t.Run("prefixed variable exists", func(t *testing.T) {
|
||||
os.Setenv(prefixedKey, "prefixed_val")
|
||||
os.Setenv(key, "unprefixed_val")
|
||||
defer os.Unsetenv(prefixedKey)
|
||||
defer os.Unsetenv(key)
|
||||
|
||||
val, exists := GetEnv(key)
|
||||
assert.True(t, exists)
|
||||
assert.Equal(t, "prefixed_val", val)
|
||||
})
|
||||
|
||||
t.Run("only unprefixed variable exists", func(t *testing.T) {
|
||||
os.Unsetenv(prefixedKey)
|
||||
os.Setenv(key, "unprefixed_val")
|
||||
defer os.Unsetenv(key)
|
||||
|
||||
val, exists := GetEnv(key)
|
||||
assert.True(t, exists)
|
||||
assert.Equal(t, "unprefixed_val", val)
|
||||
})
|
||||
|
||||
t.Run("neither variable exists", func(t *testing.T) {
|
||||
os.Unsetenv(prefixedKey)
|
||||
os.Unsetenv(key)
|
||||
|
||||
val, exists := GetEnv(key)
|
||||
assert.False(t, exists)
|
||||
assert.Empty(t, val)
|
||||
})
|
||||
}
|
||||
11
agent/zfs/zfs_freebsd.go
Normal file
11
agent/zfs/zfs_freebsd.go
Normal file
@@ -0,0 +1,11 @@
|
||||
//go:build freebsd
|
||||
|
||||
package zfs
|
||||
|
||||
import (
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
func ARCSize() (uint64, error) {
|
||||
return unix.SysctlUint64("kstat.zfs.misc.arcstats.size")
|
||||
}
|
||||
34
agent/zfs/zfs_linux.go
Normal file
34
agent/zfs/zfs_linux.go
Normal file
@@ -0,0 +1,34 @@
|
||||
//go:build linux
|
||||
|
||||
// Package zfs provides functions to read ZFS statistics.
|
||||
package zfs
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"fmt"
|
||||
"os"
|
||||
"strconv"
|
||||
"strings"
|
||||
)
|
||||
|
||||
func ARCSize() (uint64, error) {
|
||||
file, err := os.Open("/proc/spl/kstat/zfs/arcstats")
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
defer file.Close()
|
||||
|
||||
scanner := bufio.NewScanner(file)
|
||||
for scanner.Scan() {
|
||||
line := scanner.Text()
|
||||
if strings.HasPrefix(line, "size") {
|
||||
fields := strings.Fields(line)
|
||||
if len(fields) < 3 {
|
||||
return 0, fmt.Errorf("unexpected arcstats size format: %s", line)
|
||||
}
|
||||
return strconv.ParseUint(fields[2], 10, 64)
|
||||
}
|
||||
}
|
||||
|
||||
return 0, fmt.Errorf("size field not found in arcstats")
|
||||
}
|
||||
9
agent/zfs/zfs_unsupported.go
Normal file
9
agent/zfs/zfs_unsupported.go
Normal file
@@ -0,0 +1,9 @@
|
||||
//go:build !linux && !freebsd
|
||||
|
||||
package zfs
|
||||
|
||||
import "errors"
|
||||
|
||||
func ARCSize() (uint64, error) {
|
||||
return 0, errors.ErrUnsupported
|
||||
}
|
||||
2
go.mod
2
go.mod
@@ -1,6 +1,6 @@
|
||||
module github.com/henrygd/beszel
|
||||
|
||||
go 1.26.0
|
||||
go 1.26.1
|
||||
|
||||
require (
|
||||
github.com/blang/semver v3.5.1+incompatible
|
||||
|
||||
@@ -21,9 +21,9 @@ type hubLike interface {
|
||||
|
||||
type AlertManager struct {
|
||||
hub hubLike
|
||||
alertQueue chan alertTask
|
||||
stopChan chan struct{}
|
||||
stopOnce sync.Once
|
||||
pendingAlerts sync.Map
|
||||
alertsCache *AlertsCache
|
||||
}
|
||||
|
||||
type AlertMessageData struct {
|
||||
@@ -40,16 +40,22 @@ type UserNotificationSettings struct {
|
||||
Webhooks []string `json:"webhooks"`
|
||||
}
|
||||
|
||||
type SystemAlertFsStats struct {
|
||||
DiskTotal float64 `json:"d"`
|
||||
DiskUsed float64 `json:"du"`
|
||||
}
|
||||
|
||||
// Values pulled from system_stats.stats that are relevant to alerts.
|
||||
type SystemAlertStats struct {
|
||||
Cpu float64 `json:"cpu"`
|
||||
Mem float64 `json:"mp"`
|
||||
Disk float64 `json:"dp"`
|
||||
NetSent float64 `json:"ns"`
|
||||
NetRecv float64 `json:"nr"`
|
||||
Bandwidth [2]uint64 `json:"b"`
|
||||
GPU map[string]SystemAlertGPUData `json:"g"`
|
||||
Temperatures map[string]float32 `json:"t"`
|
||||
LoadAvg [3]float64 `json:"la"`
|
||||
Battery [2]uint8 `json:"bat"`
|
||||
ExtraFs map[string]SystemAlertFsStats `json:"efs"`
|
||||
}
|
||||
|
||||
type SystemAlertGPUData struct {
|
||||
@@ -58,7 +64,7 @@ type SystemAlertGPUData struct {
|
||||
|
||||
type SystemAlertData struct {
|
||||
systemRecord *core.Record
|
||||
alertRecord *core.Record
|
||||
alertData CachedAlertData
|
||||
name string
|
||||
unit string
|
||||
val float64
|
||||
@@ -92,12 +98,10 @@ var supportsTitle = map[string]struct{}{
|
||||
// NewAlertManager creates a new AlertManager instance.
|
||||
func NewAlertManager(app hubLike) *AlertManager {
|
||||
am := &AlertManager{
|
||||
hub: app,
|
||||
alertQueue: make(chan alertTask, 5),
|
||||
stopChan: make(chan struct{}),
|
||||
hub: app,
|
||||
alertsCache: NewAlertsCache(app),
|
||||
}
|
||||
am.bindEvents()
|
||||
go am.startWorker()
|
||||
return am
|
||||
}
|
||||
|
||||
@@ -106,6 +110,19 @@ func (am *AlertManager) bindEvents() {
|
||||
am.hub.OnRecordAfterUpdateSuccess("alerts").BindFunc(updateHistoryOnAlertUpdate)
|
||||
am.hub.OnRecordAfterDeleteSuccess("alerts").BindFunc(resolveHistoryOnAlertDelete)
|
||||
am.hub.OnRecordAfterUpdateSuccess("smart_devices").BindFunc(am.handleSmartDeviceAlert)
|
||||
|
||||
am.hub.OnServe().BindFunc(func(e *core.ServeEvent) error {
|
||||
// Populate all alerts into cache on startup
|
||||
_ = am.alertsCache.PopulateFromDB(true)
|
||||
|
||||
if err := resolveStatusAlerts(e.App); err != nil {
|
||||
e.App.Logger().Error("Failed to resolve stale status alerts", "err", err)
|
||||
}
|
||||
if err := am.restorePendingStatusAlerts(); err != nil {
|
||||
e.App.Logger().Error("Failed to restore pending status alerts", "err", err)
|
||||
}
|
||||
return e.Next()
|
||||
})
|
||||
}
|
||||
|
||||
// IsNotificationSilenced checks if a notification should be silenced based on configured quiet hours
|
||||
@@ -259,13 +276,14 @@ func (am *AlertManager) SendShoutrrrAlert(notificationUrl, title, message, link,
|
||||
}
|
||||
|
||||
// Add link
|
||||
if scheme == "ntfy" {
|
||||
switch scheme {
|
||||
case "ntfy":
|
||||
queryParams.Add("Actions", fmt.Sprintf("view, %s, %s", linkText, link))
|
||||
} else if scheme == "lark" {
|
||||
case "lark":
|
||||
queryParams.Add("link", link)
|
||||
} else if scheme == "bark" {
|
||||
case "bark":
|
||||
queryParams.Add("url", link)
|
||||
} else {
|
||||
default:
|
||||
message += "\n\n" + link
|
||||
}
|
||||
|
||||
@@ -298,3 +316,13 @@ func (am *AlertManager) SendTestNotification(e *core.RequestEvent) error {
|
||||
}
|
||||
return e.JSON(200, map[string]bool{"err": false})
|
||||
}
|
||||
|
||||
// setAlertTriggered updates the "triggered" status of an alert record in the database
|
||||
func (am *AlertManager) setAlertTriggered(alert CachedAlertData, triggered bool) error {
|
||||
alertRecord, err := am.hub.FindRecordById("alerts", alert.Id)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
alertRecord.Set("triggered", triggered)
|
||||
return am.hub.Save(alertRecord)
|
||||
}
|
||||
|
||||
177
internal/alerts/alerts_cache.go
Normal file
177
internal/alerts/alerts_cache.go
Normal file
@@ -0,0 +1,177 @@
|
||||
package alerts
|
||||
|
||||
import (
|
||||
"github.com/pocketbase/dbx"
|
||||
"github.com/pocketbase/pocketbase/core"
|
||||
"github.com/pocketbase/pocketbase/tools/store"
|
||||
)
|
||||
|
||||
// CachedAlertData represents the relevant fields of an alert record for status checking and updates.
|
||||
type CachedAlertData struct {
|
||||
Id string
|
||||
SystemID string
|
||||
UserID string
|
||||
Name string
|
||||
Value float64
|
||||
Triggered bool
|
||||
Min uint8
|
||||
// Created types.DateTime
|
||||
}
|
||||
|
||||
func (a *CachedAlertData) PopulateFromRecord(record *core.Record) {
|
||||
a.Id = record.Id
|
||||
a.SystemID = record.GetString("system")
|
||||
a.UserID = record.GetString("user")
|
||||
a.Name = record.GetString("name")
|
||||
a.Value = record.GetFloat("value")
|
||||
a.Triggered = record.GetBool("triggered")
|
||||
a.Min = uint8(record.GetInt("min"))
|
||||
// a.Created = record.GetDateTime("created")
|
||||
}
|
||||
|
||||
// AlertsCache provides an in-memory cache for system alerts.
|
||||
type AlertsCache struct {
|
||||
app core.App
|
||||
store *store.Store[string, *store.Store[string, CachedAlertData]]
|
||||
populated bool
|
||||
}
|
||||
|
||||
// NewAlertsCache creates a new instance of SystemAlertsCache.
|
||||
func NewAlertsCache(app core.App) *AlertsCache {
|
||||
c := AlertsCache{
|
||||
app: app,
|
||||
store: store.New(map[string]*store.Store[string, CachedAlertData]{}),
|
||||
}
|
||||
return c.bindEvents()
|
||||
}
|
||||
|
||||
// bindEvents sets up event listeners to keep the cache in sync with database changes.
|
||||
func (c *AlertsCache) bindEvents() *AlertsCache {
|
||||
c.app.OnRecordAfterUpdateSuccess("alerts").BindFunc(func(e *core.RecordEvent) error {
|
||||
// c.Delete(e.Record.Original()) // this would be needed if the system field on an existing alert was changed, however we don't currently allow that in the UI so we'll leave it commented out
|
||||
c.Update(e.Record)
|
||||
return e.Next()
|
||||
})
|
||||
c.app.OnRecordAfterDeleteSuccess("alerts").BindFunc(func(e *core.RecordEvent) error {
|
||||
c.Delete(e.Record)
|
||||
return e.Next()
|
||||
})
|
||||
c.app.OnRecordAfterCreateSuccess("alerts").BindFunc(func(e *core.RecordEvent) error {
|
||||
c.Update(e.Record)
|
||||
return e.Next()
|
||||
})
|
||||
return c
|
||||
}
|
||||
|
||||
// PopulateFromDB clears current entries and loads all alerts from the database into the cache.
|
||||
func (c *AlertsCache) PopulateFromDB(force bool) error {
|
||||
if !force && c.populated {
|
||||
return nil
|
||||
}
|
||||
records, err := c.app.FindAllRecords("alerts")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
c.store.RemoveAll()
|
||||
for _, record := range records {
|
||||
c.Update(record)
|
||||
}
|
||||
c.populated = true
|
||||
return nil
|
||||
}
|
||||
|
||||
// Update adds or updates an alert record in the cache.
|
||||
func (c *AlertsCache) Update(record *core.Record) {
|
||||
systemID := record.GetString("system")
|
||||
if systemID == "" {
|
||||
return
|
||||
}
|
||||
systemStore, ok := c.store.GetOk(systemID)
|
||||
if !ok {
|
||||
systemStore = store.New(map[string]CachedAlertData{})
|
||||
c.store.Set(systemID, systemStore)
|
||||
}
|
||||
var ca CachedAlertData
|
||||
ca.PopulateFromRecord(record)
|
||||
systemStore.Set(record.Id, ca)
|
||||
}
|
||||
|
||||
// Delete removes an alert record from the cache.
|
||||
func (c *AlertsCache) Delete(record *core.Record) {
|
||||
systemID := record.GetString("system")
|
||||
if systemID == "" {
|
||||
return
|
||||
}
|
||||
if systemStore, ok := c.store.GetOk(systemID); ok {
|
||||
systemStore.Remove(record.Id)
|
||||
}
|
||||
}
|
||||
|
||||
// GetSystemAlerts returns all alerts for the specified system, lazy-loading if necessary.
|
||||
func (c *AlertsCache) GetSystemAlerts(systemID string) []CachedAlertData {
|
||||
systemStore, ok := c.store.GetOk(systemID)
|
||||
if !ok {
|
||||
// Populate cache for this system
|
||||
records, err := c.app.FindAllRecords("alerts", dbx.NewExp("system={:system}", dbx.Params{"system": systemID}))
|
||||
if err != nil {
|
||||
return nil
|
||||
}
|
||||
systemStore = store.New(map[string]CachedAlertData{})
|
||||
for _, record := range records {
|
||||
var ca CachedAlertData
|
||||
ca.PopulateFromRecord(record)
|
||||
systemStore.Set(record.Id, ca)
|
||||
}
|
||||
c.store.Set(systemID, systemStore)
|
||||
}
|
||||
all := systemStore.GetAll()
|
||||
alerts := make([]CachedAlertData, 0, len(all))
|
||||
for _, alert := range all {
|
||||
alerts = append(alerts, alert)
|
||||
}
|
||||
return alerts
|
||||
}
|
||||
|
||||
// GetAlert returns a specific alert by its ID from the cache.
|
||||
func (c *AlertsCache) GetAlert(systemID, alertID string) (CachedAlertData, bool) {
|
||||
if systemStore, ok := c.store.GetOk(systemID); ok {
|
||||
return systemStore.GetOk(alertID)
|
||||
}
|
||||
return CachedAlertData{}, false
|
||||
}
|
||||
|
||||
// GetAlertsByName returns all alerts of a specific type for the specified system.
|
||||
func (c *AlertsCache) GetAlertsByName(systemID, alertName string) []CachedAlertData {
|
||||
allAlerts := c.GetSystemAlerts(systemID)
|
||||
var alerts []CachedAlertData
|
||||
for _, record := range allAlerts {
|
||||
if record.Name == alertName {
|
||||
alerts = append(alerts, record)
|
||||
}
|
||||
}
|
||||
return alerts
|
||||
}
|
||||
|
||||
// GetAlertsExcludingNames returns all alerts for the specified system excluding the given types.
|
||||
func (c *AlertsCache) GetAlertsExcludingNames(systemID string, excludedNames ...string) []CachedAlertData {
|
||||
excludeMap := make(map[string]struct{})
|
||||
for _, name := range excludedNames {
|
||||
excludeMap[name] = struct{}{}
|
||||
}
|
||||
allAlerts := c.GetSystemAlerts(systemID)
|
||||
var alerts []CachedAlertData
|
||||
for _, record := range allAlerts {
|
||||
if _, excluded := excludeMap[record.Name]; !excluded {
|
||||
alerts = append(alerts, record)
|
||||
}
|
||||
}
|
||||
return alerts
|
||||
}
|
||||
|
||||
// Refresh returns the latest cached copy for an alert snapshot if it still exists.
|
||||
func (c *AlertsCache) Refresh(alert CachedAlertData) (CachedAlertData, bool) {
|
||||
if alert.Id == "" {
|
||||
return CachedAlertData{}, false
|
||||
}
|
||||
return c.GetAlert(alert.SystemID, alert.Id)
|
||||
}
|
||||
215
internal/alerts/alerts_cache_test.go
Normal file
215
internal/alerts/alerts_cache_test.go
Normal file
@@ -0,0 +1,215 @@
|
||||
//go:build testing
|
||||
|
||||
package alerts_test
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/henrygd/beszel/internal/alerts"
|
||||
beszelTests "github.com/henrygd/beszel/internal/tests"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestSystemAlertsCachePopulateAndFilter(t *testing.T) {
|
||||
hub, user := beszelTests.GetHubWithUser(t)
|
||||
defer hub.Cleanup()
|
||||
|
||||
systems, err := beszelTests.CreateSystems(hub, 2, user.Id, "up")
|
||||
require.NoError(t, err)
|
||||
system1 := systems[0]
|
||||
system2 := systems[1]
|
||||
|
||||
statusAlert, err := beszelTests.CreateRecord(hub, "alerts", map[string]any{
|
||||
"name": "Status",
|
||||
"system": system1.Id,
|
||||
"user": user.Id,
|
||||
"min": 1,
|
||||
})
|
||||
require.NoError(t, err)
|
||||
|
||||
cpuAlert, err := beszelTests.CreateRecord(hub, "alerts", map[string]any{
|
||||
"name": "CPU",
|
||||
"system": system1.Id,
|
||||
"user": user.Id,
|
||||
"value": 80,
|
||||
"min": 1,
|
||||
})
|
||||
require.NoError(t, err)
|
||||
|
||||
memoryAlert, err := beszelTests.CreateRecord(hub, "alerts", map[string]any{
|
||||
"name": "Memory",
|
||||
"system": system2.Id,
|
||||
"user": user.Id,
|
||||
"value": 90,
|
||||
"min": 1,
|
||||
})
|
||||
require.NoError(t, err)
|
||||
|
||||
cache := alerts.NewAlertsCache(hub)
|
||||
cache.PopulateFromDB(false)
|
||||
|
||||
statusAlerts := cache.GetAlertsByName(system1.Id, "Status")
|
||||
require.Len(t, statusAlerts, 1)
|
||||
assert.Equal(t, statusAlert.Id, statusAlerts[0].Id)
|
||||
|
||||
nonStatusAlerts := cache.GetAlertsExcludingNames(system1.Id, "Status")
|
||||
require.Len(t, nonStatusAlerts, 1)
|
||||
assert.Equal(t, cpuAlert.Id, nonStatusAlerts[0].Id)
|
||||
|
||||
system2Alerts := cache.GetSystemAlerts(system2.Id)
|
||||
require.Len(t, system2Alerts, 1)
|
||||
assert.Equal(t, memoryAlert.Id, system2Alerts[0].Id)
|
||||
}
|
||||
|
||||
func TestSystemAlertsCacheLazyLoadUpdateAndDelete(t *testing.T) {
|
||||
hub, user := beszelTests.GetHubWithUser(t)
|
||||
defer hub.Cleanup()
|
||||
|
||||
systems, err := beszelTests.CreateSystems(hub, 1, user.Id, "up")
|
||||
require.NoError(t, err)
|
||||
systemRecord := systems[0]
|
||||
|
||||
statusAlert, err := beszelTests.CreateRecord(hub, "alerts", map[string]any{
|
||||
"name": "Status",
|
||||
"system": systemRecord.Id,
|
||||
"user": user.Id,
|
||||
"min": 1,
|
||||
})
|
||||
require.NoError(t, err)
|
||||
|
||||
cache := alerts.NewAlertsCache(hub)
|
||||
require.Len(t, cache.GetSystemAlerts(systemRecord.Id), 1, "first lookup should lazy-load alerts for the system")
|
||||
|
||||
cpuAlert, err := beszelTests.CreateRecord(hub, "alerts", map[string]any{
|
||||
"name": "CPU",
|
||||
"system": systemRecord.Id,
|
||||
"user": user.Id,
|
||||
"value": 80,
|
||||
"min": 1,
|
||||
})
|
||||
require.NoError(t, err)
|
||||
|
||||
cache.Update(cpuAlert)
|
||||
|
||||
nonStatusAlerts := cache.GetAlertsExcludingNames(systemRecord.Id, "Status")
|
||||
require.Len(t, nonStatusAlerts, 1)
|
||||
assert.Equal(t, cpuAlert.Id, nonStatusAlerts[0].Id)
|
||||
|
||||
cache.Delete(statusAlert)
|
||||
assert.Empty(t, cache.GetAlertsByName(systemRecord.Id, "Status"), "deleted alerts should be removed from the in-memory cache")
|
||||
}
|
||||
|
||||
func TestSystemAlertsCacheRefreshReturnsLatestCopy(t *testing.T) {
|
||||
hub, user := beszelTests.GetHubWithUser(t)
|
||||
defer hub.Cleanup()
|
||||
|
||||
systems, err := beszelTests.CreateSystems(hub, 1, user.Id, "up")
|
||||
require.NoError(t, err)
|
||||
system := systems[0]
|
||||
|
||||
alert, err := beszelTests.CreateRecord(hub, "alerts", map[string]any{
|
||||
"name": "Status",
|
||||
"system": system.Id,
|
||||
"user": user.Id,
|
||||
"min": 1,
|
||||
"triggered": false,
|
||||
})
|
||||
require.NoError(t, err)
|
||||
|
||||
cache := alerts.NewAlertsCache(hub)
|
||||
snapshot := cache.GetSystemAlerts(system.Id)[0]
|
||||
assert.False(t, snapshot.Triggered)
|
||||
|
||||
alert.Set("triggered", true)
|
||||
require.NoError(t, hub.Save(alert))
|
||||
|
||||
refreshed, ok := cache.Refresh(snapshot)
|
||||
require.True(t, ok)
|
||||
assert.Equal(t, snapshot.Id, refreshed.Id)
|
||||
assert.True(t, refreshed.Triggered, "refresh should return the updated cached value rather than the stale snapshot")
|
||||
|
||||
require.NoError(t, hub.Delete(alert))
|
||||
_, ok = cache.Refresh(snapshot)
|
||||
assert.False(t, ok, "refresh should report false when the cached alert no longer exists")
|
||||
}
|
||||
|
||||
func TestAlertManagerCacheLifecycle(t *testing.T) {
|
||||
hub, user := beszelTests.GetHubWithUser(t)
|
||||
defer hub.Cleanup()
|
||||
|
||||
systems, err := beszelTests.CreateSystems(hub, 1, user.Id, "up")
|
||||
require.NoError(t, err)
|
||||
system := systems[0]
|
||||
|
||||
// Create an alert
|
||||
alert, err := beszelTests.CreateRecord(hub, "alerts", map[string]any{
|
||||
"name": "CPU",
|
||||
"system": system.Id,
|
||||
"user": user.Id,
|
||||
"value": 80,
|
||||
"min": 1,
|
||||
})
|
||||
require.NoError(t, err)
|
||||
|
||||
am := hub.AlertManager
|
||||
cache := am.GetSystemAlertsCache()
|
||||
|
||||
// Verify it's in cache (it should be since CreateRecord triggers the event)
|
||||
assert.Len(t, cache.GetSystemAlerts(system.Id), 1)
|
||||
assert.Equal(t, alert.Id, cache.GetSystemAlerts(system.Id)[0].Id)
|
||||
assert.EqualValues(t, 80, cache.GetSystemAlerts(system.Id)[0].Value)
|
||||
|
||||
// Update the alert through PocketBase to trigger events
|
||||
alert.Set("value", 85)
|
||||
require.NoError(t, hub.Save(alert))
|
||||
|
||||
// Check if updated value is reflected (or at least that it's still there)
|
||||
cachedAlerts := cache.GetSystemAlerts(system.Id)
|
||||
assert.Len(t, cachedAlerts, 1)
|
||||
assert.EqualValues(t, 85, cachedAlerts[0].Value)
|
||||
|
||||
// Delete the alert through PocketBase to trigger events
|
||||
require.NoError(t, hub.Delete(alert))
|
||||
|
||||
// Verify it's removed from cache
|
||||
assert.Empty(t, cache.GetSystemAlerts(system.Id), "alert should be removed from cache after PocketBase delete")
|
||||
}
|
||||
|
||||
// func TestAlertManagerCacheMovesAlertToNewSystemOnUpdate(t *testing.T) {
|
||||
// hub, user := beszelTests.GetHubWithUser(t)
|
||||
// defer hub.Cleanup()
|
||||
|
||||
// systems, err := beszelTests.CreateSystems(hub, 2, user.Id, "up")
|
||||
// require.NoError(t, err)
|
||||
// system1 := systems[0]
|
||||
// system2 := systems[1]
|
||||
|
||||
// alert, err := beszelTests.CreateRecord(hub, "alerts", map[string]any{
|
||||
// "name": "CPU",
|
||||
// "system": system1.Id,
|
||||
// "user": user.Id,
|
||||
// "value": 80,
|
||||
// "min": 1,
|
||||
// })
|
||||
// require.NoError(t, err)
|
||||
|
||||
// am := hub.AlertManager
|
||||
// cache := am.GetSystemAlertsCache()
|
||||
|
||||
// // Initially in system1 cache
|
||||
// assert.Len(t, cache.Get(system1.Id), 1)
|
||||
// assert.Empty(t, cache.Get(system2.Id))
|
||||
|
||||
// // Move alert to system2
|
||||
// alert.Set("system", system2.Id)
|
||||
// require.NoError(t, hub.Save(alert))
|
||||
|
||||
// // DEBUG: print if it is found
|
||||
// // fmt.Printf("system1 alerts after update: %v\n", cache.Get(system1.Id))
|
||||
|
||||
// // Should be removed from system1 and present in system2
|
||||
// assert.Empty(t, cache.GetType(system1.Id, "CPU"), "updated alerts should be evicted from the previous system cache")
|
||||
// require.Len(t, cache.Get(system2.Id), 1)
|
||||
// assert.Equal(t, alert.Id, cache.Get(system2.Id)[0].Id)
|
||||
// }
|
||||
155
internal/alerts/alerts_disk_test.go
Normal file
155
internal/alerts/alerts_disk_test.go
Normal file
@@ -0,0 +1,155 @@
|
||||
//go:build testing
|
||||
|
||||
package alerts_test
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/henrygd/beszel/internal/entities/system"
|
||||
beszelTests "github.com/henrygd/beszel/internal/tests"
|
||||
|
||||
"github.com/pocketbase/dbx"
|
||||
"github.com/pocketbase/pocketbase/tools/types"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
// TestDiskAlertExtraFsMultiMinute tests that multi-minute disk alerts correctly use
|
||||
// historical per-minute values for extra (non-root) filesystems, not the current live snapshot.
|
||||
func TestDiskAlertExtraFsMultiMinute(t *testing.T) {
|
||||
hub, user := beszelTests.GetHubWithUser(t)
|
||||
defer hub.Cleanup()
|
||||
|
||||
systems, err := beszelTests.CreateSystems(hub, 1, user.Id, "up")
|
||||
require.NoError(t, err)
|
||||
systemRecord := systems[0]
|
||||
|
||||
// Disk alert: threshold 80%, min=2 (requires historical averaging)
|
||||
diskAlert, err := beszelTests.CreateRecord(hub, "alerts", map[string]any{
|
||||
"name": "Disk",
|
||||
"system": systemRecord.Id,
|
||||
"user": user.Id,
|
||||
"value": 80, // threshold: 80%
|
||||
"min": 2, // 2 minutes - requires historical averaging
|
||||
})
|
||||
require.NoError(t, err)
|
||||
assert.False(t, diskAlert.GetBool("triggered"), "Alert should not be triggered initially")
|
||||
|
||||
am := hub.GetAlertManager()
|
||||
now := time.Now().UTC()
|
||||
|
||||
extraFsHigh := map[string]*system.FsStats{
|
||||
"/mnt/data": {DiskTotal: 1000, DiskUsed: 920}, // 92% - above threshold
|
||||
}
|
||||
|
||||
// Insert 4 historical records spread over 3 minutes (same pattern as battery tests).
|
||||
// The oldest record must predate (now - 2min) so the alert time window is valid.
|
||||
recordTimes := []time.Duration{
|
||||
-180 * time.Second, // 3 min ago - anchors oldest record before alert.time
|
||||
-90 * time.Second,
|
||||
-60 * time.Second,
|
||||
-30 * time.Second,
|
||||
}
|
||||
|
||||
for _, offset := range recordTimes {
|
||||
stats := system.Stats{
|
||||
DiskPct: 30, // root disk at 30% - below threshold
|
||||
ExtraFs: extraFsHigh,
|
||||
}
|
||||
statsJSON, _ := json.Marshal(stats)
|
||||
|
||||
recordTime := now.Add(offset)
|
||||
record, err := beszelTests.CreateRecord(hub, "system_stats", map[string]any{
|
||||
"system": systemRecord.Id,
|
||||
"type": "1m",
|
||||
"stats": string(statsJSON),
|
||||
})
|
||||
require.NoError(t, err)
|
||||
record.SetRaw("created", recordTime.Format(types.DefaultDateLayout))
|
||||
err = hub.SaveNoValidate(record)
|
||||
require.NoError(t, err)
|
||||
}
|
||||
|
||||
combinedDataHigh := &system.CombinedData{
|
||||
Stats: system.Stats{
|
||||
DiskPct: 30,
|
||||
ExtraFs: extraFsHigh,
|
||||
},
|
||||
Info: system.Info{
|
||||
DiskPct: 30,
|
||||
},
|
||||
}
|
||||
|
||||
systemRecord.Set("updated", now)
|
||||
err = hub.SaveNoValidate(systemRecord)
|
||||
require.NoError(t, err)
|
||||
|
||||
err = am.HandleSystemAlerts(systemRecord, combinedDataHigh)
|
||||
require.NoError(t, err)
|
||||
|
||||
time.Sleep(20 * time.Millisecond)
|
||||
|
||||
diskAlert, err = hub.FindFirstRecordByFilter("alerts", "id={:id}", dbx.Params{"id": diskAlert.Id})
|
||||
require.NoError(t, err)
|
||||
assert.True(t, diskAlert.GetBool("triggered"),
|
||||
"Alert SHOULD be triggered when extra disk average (92%%) exceeds threshold (80%%)")
|
||||
|
||||
// --- Resolution: extra disk drops to 50%, alert should resolve ---
|
||||
|
||||
extraFsLow := map[string]*system.FsStats{
|
||||
"/mnt/data": {DiskTotal: 1000, DiskUsed: 500}, // 50% - below threshold
|
||||
}
|
||||
|
||||
newNow := now.Add(2 * time.Minute)
|
||||
recordTimesLow := []time.Duration{
|
||||
-180 * time.Second,
|
||||
-90 * time.Second,
|
||||
-60 * time.Second,
|
||||
-30 * time.Second,
|
||||
}
|
||||
|
||||
for _, offset := range recordTimesLow {
|
||||
stats := system.Stats{
|
||||
DiskPct: 30,
|
||||
ExtraFs: extraFsLow,
|
||||
}
|
||||
statsJSON, _ := json.Marshal(stats)
|
||||
|
||||
recordTime := newNow.Add(offset)
|
||||
record, err := beszelTests.CreateRecord(hub, "system_stats", map[string]any{
|
||||
"system": systemRecord.Id,
|
||||
"type": "1m",
|
||||
"stats": string(statsJSON),
|
||||
})
|
||||
require.NoError(t, err)
|
||||
record.SetRaw("created", recordTime.Format(types.DefaultDateLayout))
|
||||
err = hub.SaveNoValidate(record)
|
||||
require.NoError(t, err)
|
||||
}
|
||||
|
||||
combinedDataLow := &system.CombinedData{
|
||||
Stats: system.Stats{
|
||||
DiskPct: 30,
|
||||
ExtraFs: extraFsLow,
|
||||
},
|
||||
Info: system.Info{
|
||||
DiskPct: 30,
|
||||
},
|
||||
}
|
||||
|
||||
systemRecord.Set("updated", newNow)
|
||||
err = hub.SaveNoValidate(systemRecord)
|
||||
require.NoError(t, err)
|
||||
|
||||
err = am.HandleSystemAlerts(systemRecord, combinedDataLow)
|
||||
require.NoError(t, err)
|
||||
|
||||
time.Sleep(20 * time.Millisecond)
|
||||
|
||||
diskAlert, err = hub.FindFirstRecordByFilter("alerts", "id={:id}", dbx.Params{"id": diskAlert.Id})
|
||||
require.NoError(t, err)
|
||||
assert.False(t, diskAlert.GetBool("triggered"),
|
||||
"Alert should be resolved when extra disk average (50%%) drops below threshold (80%%)")
|
||||
}
|
||||
@@ -49,7 +49,7 @@ func TestAlertSilencedOneTime(t *testing.T) {
|
||||
|
||||
// Get alert manager
|
||||
am := alerts.NewAlertManager(hub)
|
||||
defer am.StopWorker()
|
||||
defer am.Stop()
|
||||
|
||||
// Test that alert is silenced
|
||||
silenced := am.IsNotificationSilenced(user.Id, system.Id)
|
||||
@@ -106,7 +106,7 @@ func TestAlertSilencedDaily(t *testing.T) {
|
||||
|
||||
// Get alert manager
|
||||
am := alerts.NewAlertManager(hub)
|
||||
defer am.StopWorker()
|
||||
defer am.Stop()
|
||||
|
||||
// Get current hour and create a window that includes current time
|
||||
now := time.Now().UTC()
|
||||
@@ -170,7 +170,7 @@ func TestAlertSilencedDailyMidnightCrossing(t *testing.T) {
|
||||
|
||||
// Get alert manager
|
||||
am := alerts.NewAlertManager(hub)
|
||||
defer am.StopWorker()
|
||||
defer am.Stop()
|
||||
|
||||
// Create a window that crosses midnight: 22:00 - 02:00
|
||||
startTime := time.Date(2000, 1, 1, 22, 0, 0, 0, time.UTC)
|
||||
@@ -211,7 +211,7 @@ func TestAlertSilencedGlobal(t *testing.T) {
|
||||
|
||||
// Get alert manager
|
||||
am := alerts.NewAlertManager(hub)
|
||||
defer am.StopWorker()
|
||||
defer am.Stop()
|
||||
|
||||
// Create a global quiet hours window (no system specified)
|
||||
now := time.Now().UTC()
|
||||
@@ -250,7 +250,7 @@ func TestAlertSilencedSystemSpecific(t *testing.T) {
|
||||
|
||||
// Get alert manager
|
||||
am := alerts.NewAlertManager(hub)
|
||||
defer am.StopWorker()
|
||||
defer am.Stop()
|
||||
|
||||
// Create a system-specific quiet hours window for system1 only
|
||||
now := time.Now().UTC()
|
||||
@@ -296,7 +296,7 @@ func TestAlertSilencedMultiUser(t *testing.T) {
|
||||
|
||||
// Get alert manager
|
||||
am := alerts.NewAlertManager(hub)
|
||||
defer am.StopWorker()
|
||||
defer am.Stop()
|
||||
|
||||
// Create a quiet hours window for user1 only
|
||||
now := time.Now().UTC()
|
||||
@@ -417,7 +417,7 @@ func TestAlertSilencedNoWindows(t *testing.T) {
|
||||
|
||||
// Get alert manager
|
||||
am := alerts.NewAlertManager(hub)
|
||||
defer am.StopWorker()
|
||||
defer am.Stop()
|
||||
|
||||
// Without any quiet hours windows, alert should NOT be silenced
|
||||
silenced := am.IsNotificationSilenced(user.Id, system.Id)
|
||||
|
||||
@@ -5,67 +5,28 @@ import (
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/pocketbase/dbx"
|
||||
"github.com/pocketbase/pocketbase/core"
|
||||
)
|
||||
|
||||
type alertTask struct {
|
||||
action string // "schedule" or "cancel"
|
||||
systemName string
|
||||
alertRecord *core.Record
|
||||
delay time.Duration
|
||||
}
|
||||
|
||||
type alertInfo struct {
|
||||
systemName string
|
||||
alertRecord *core.Record
|
||||
expireTime time.Time
|
||||
systemName string
|
||||
alertData CachedAlertData
|
||||
expireTime time.Time
|
||||
timer *time.Timer
|
||||
}
|
||||
|
||||
// startWorker is a long-running goroutine that processes alert tasks
|
||||
// every x seconds. It must be running to process status alerts.
|
||||
func (am *AlertManager) startWorker() {
|
||||
processPendingAlerts := time.Tick(15 * time.Second)
|
||||
|
||||
// check for status alerts that are not resolved when system comes up
|
||||
// (can be removed if we figure out core bug in #1052)
|
||||
checkStatusAlerts := time.Tick(561 * time.Second)
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-am.stopChan:
|
||||
return
|
||||
case task := <-am.alertQueue:
|
||||
switch task.action {
|
||||
case "schedule":
|
||||
am.pendingAlerts.Store(task.alertRecord.Id, &alertInfo{
|
||||
systemName: task.systemName,
|
||||
alertRecord: task.alertRecord,
|
||||
expireTime: time.Now().Add(task.delay),
|
||||
})
|
||||
case "cancel":
|
||||
am.pendingAlerts.Delete(task.alertRecord.Id)
|
||||
// Stop cancels all pending status alert timers.
|
||||
func (am *AlertManager) Stop() {
|
||||
am.stopOnce.Do(func() {
|
||||
am.pendingAlerts.Range(func(key, value any) bool {
|
||||
info := value.(*alertInfo)
|
||||
if info.timer != nil {
|
||||
info.timer.Stop()
|
||||
}
|
||||
case <-checkStatusAlerts:
|
||||
resolveStatusAlerts(am.hub)
|
||||
case <-processPendingAlerts:
|
||||
// Check for expired alerts every tick
|
||||
now := time.Now()
|
||||
for key, value := range am.pendingAlerts.Range {
|
||||
info := value.(*alertInfo)
|
||||
if now.After(info.expireTime) {
|
||||
// Downtime delay has passed, process alert
|
||||
am.sendStatusAlert("down", info.systemName, info.alertRecord)
|
||||
am.pendingAlerts.Delete(key)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// StopWorker shuts down the AlertManager.worker goroutine
|
||||
func (am *AlertManager) StopWorker() {
|
||||
close(am.stopChan)
|
||||
am.pendingAlerts.Delete(key)
|
||||
return true
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
// HandleStatusAlerts manages the logic when system status changes.
|
||||
@@ -74,82 +35,104 @@ func (am *AlertManager) HandleStatusAlerts(newStatus string, systemRecord *core.
|
||||
return nil
|
||||
}
|
||||
|
||||
alertRecords, err := am.getSystemStatusAlerts(systemRecord.Id)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if len(alertRecords) == 0 {
|
||||
alerts := am.alertsCache.GetAlertsByName(systemRecord.Id, "Status")
|
||||
if len(alerts) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
systemName := systemRecord.GetString("name")
|
||||
if newStatus == "down" {
|
||||
am.handleSystemDown(systemName, alertRecords)
|
||||
am.handleSystemDown(systemName, alerts)
|
||||
} else {
|
||||
am.handleSystemUp(systemName, alertRecords)
|
||||
am.handleSystemUp(systemName, alerts)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// getSystemStatusAlerts retrieves all "Status" alert records for a given system ID.
|
||||
func (am *AlertManager) getSystemStatusAlerts(systemID string) ([]*core.Record, error) {
|
||||
alertRecords, err := am.hub.FindAllRecords("alerts", dbx.HashExp{
|
||||
"system": systemID,
|
||||
"name": "Status",
|
||||
})
|
||||
if err != nil {
|
||||
return nil, err
|
||||
// handleSystemDown manages the logic when a system status changes to "down". It schedules pending alerts for each alert record.
|
||||
func (am *AlertManager) handleSystemDown(systemName string, alerts []CachedAlertData) {
|
||||
for _, alertData := range alerts {
|
||||
min := max(1, int(alertData.Min))
|
||||
am.schedulePendingStatusAlert(systemName, alertData, time.Duration(min)*time.Minute)
|
||||
}
|
||||
return alertRecords, nil
|
||||
}
|
||||
|
||||
// Schedules delayed "down" alerts for each alert record.
|
||||
func (am *AlertManager) handleSystemDown(systemName string, alertRecords []*core.Record) {
|
||||
for _, alertRecord := range alertRecords {
|
||||
// Continue if alert is already scheduled
|
||||
if _, exists := am.pendingAlerts.Load(alertRecord.Id); exists {
|
||||
continue
|
||||
}
|
||||
// Schedule by adding to queue
|
||||
min := max(1, alertRecord.GetInt("min"))
|
||||
am.alertQueue <- alertTask{
|
||||
action: "schedule",
|
||||
systemName: systemName,
|
||||
alertRecord: alertRecord,
|
||||
delay: time.Duration(min) * time.Minute,
|
||||
}
|
||||
// schedulePendingStatusAlert sets up a timer to send a "down" alert after the specified delay if the system is still down.
|
||||
// It returns true if the alert was scheduled, or false if an alert was already pending for the given alert record.
|
||||
func (am *AlertManager) schedulePendingStatusAlert(systemName string, alertData CachedAlertData, delay time.Duration) bool {
|
||||
alert := &alertInfo{
|
||||
systemName: systemName,
|
||||
alertData: alertData,
|
||||
expireTime: time.Now().Add(delay),
|
||||
}
|
||||
|
||||
storedAlert, loaded := am.pendingAlerts.LoadOrStore(alertData.Id, alert)
|
||||
if loaded {
|
||||
return false
|
||||
}
|
||||
|
||||
stored := storedAlert.(*alertInfo)
|
||||
stored.timer = time.AfterFunc(time.Until(stored.expireTime), func() {
|
||||
am.processPendingAlert(alertData.Id)
|
||||
})
|
||||
return true
|
||||
}
|
||||
|
||||
// handleSystemUp manages the logic when a system status changes to "up".
|
||||
// It cancels any pending alerts and sends "up" alerts.
|
||||
func (am *AlertManager) handleSystemUp(systemName string, alertRecords []*core.Record) {
|
||||
for _, alertRecord := range alertRecords {
|
||||
alertRecordID := alertRecord.Id
|
||||
func (am *AlertManager) handleSystemUp(systemName string, alerts []CachedAlertData) {
|
||||
for _, alertData := range alerts {
|
||||
// If alert exists for record, delete and continue (down alert not sent)
|
||||
if _, exists := am.pendingAlerts.Load(alertRecordID); exists {
|
||||
am.alertQueue <- alertTask{
|
||||
action: "cancel",
|
||||
alertRecord: alertRecord,
|
||||
}
|
||||
if am.cancelPendingAlert(alertData.Id) {
|
||||
continue
|
||||
}
|
||||
// No alert scheduled for this record, send "up" alert
|
||||
if err := am.sendStatusAlert("up", systemName, alertRecord); err != nil {
|
||||
if !alertData.Triggered {
|
||||
continue
|
||||
}
|
||||
if err := am.sendStatusAlert("up", systemName, alertData); err != nil {
|
||||
am.hub.Logger().Error("Failed to send alert", "err", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// sendStatusAlert sends a status alert ("up" or "down") to the users associated with the alert records.
|
||||
func (am *AlertManager) sendStatusAlert(alertStatus string, systemName string, alertRecord *core.Record) error {
|
||||
switch alertStatus {
|
||||
case "up":
|
||||
alertRecord.Set("triggered", false)
|
||||
case "down":
|
||||
alertRecord.Set("triggered", true)
|
||||
// cancelPendingAlert stops the timer and removes the pending alert for the given alert ID. Returns true if a pending alert was found and cancelled.
|
||||
func (am *AlertManager) cancelPendingAlert(alertID string) bool {
|
||||
value, loaded := am.pendingAlerts.LoadAndDelete(alertID)
|
||||
if !loaded {
|
||||
return false
|
||||
}
|
||||
|
||||
info := value.(*alertInfo)
|
||||
if info.timer != nil {
|
||||
info.timer.Stop()
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// processPendingAlert sends a "down" alert if the pending alert has expired and the system is still down.
|
||||
func (am *AlertManager) processPendingAlert(alertID string) {
|
||||
value, loaded := am.pendingAlerts.LoadAndDelete(alertID)
|
||||
if !loaded {
|
||||
return
|
||||
}
|
||||
|
||||
info := value.(*alertInfo)
|
||||
refreshedAlertData, ok := am.alertsCache.Refresh(info.alertData)
|
||||
if !ok || refreshedAlertData.Triggered {
|
||||
return
|
||||
}
|
||||
if err := am.sendStatusAlert("down", info.systemName, refreshedAlertData); err != nil {
|
||||
am.hub.Logger().Error("Failed to send alert", "err", err)
|
||||
}
|
||||
}
|
||||
|
||||
// sendStatusAlert sends a status alert ("up" or "down") to the users associated with the alert records.
|
||||
func (am *AlertManager) sendStatusAlert(alertStatus string, systemName string, alertData CachedAlertData) error {
|
||||
// Update trigger state for alert record before sending alert
|
||||
triggered := alertStatus == "down"
|
||||
if err := am.setAlertTriggered(alertData, triggered); err != nil {
|
||||
return err
|
||||
}
|
||||
am.hub.Save(alertRecord)
|
||||
|
||||
var emoji string
|
||||
if alertStatus == "up" {
|
||||
@@ -162,10 +145,10 @@ func (am *AlertManager) sendStatusAlert(alertStatus string, systemName string, a
|
||||
message := strings.TrimSuffix(title, emoji)
|
||||
|
||||
// Get system ID for the link
|
||||
systemID := alertRecord.GetString("system")
|
||||
systemID := alertData.SystemID
|
||||
|
||||
return am.SendAlert(AlertMessageData{
|
||||
UserID: alertRecord.GetString("user"),
|
||||
UserID: alertData.UserID,
|
||||
SystemID: systemID,
|
||||
Title: title,
|
||||
Message: message,
|
||||
@@ -174,8 +157,8 @@ func (am *AlertManager) sendStatusAlert(alertStatus string, systemName string, a
|
||||
})
|
||||
}
|
||||
|
||||
// resolveStatusAlerts resolves any status alerts that weren't resolved
|
||||
// when system came up (https://github.com/henrygd/beszel/issues/1052)
|
||||
// resolveStatusAlerts resolves any triggered status alerts that weren't resolved
|
||||
// when system came up (https://github.com/henrygd/beszel/issues/1052).
|
||||
func resolveStatusAlerts(app core.App) error {
|
||||
db := app.DB()
|
||||
// Find all active status alerts where the system is actually up
|
||||
@@ -205,3 +188,40 @@ func resolveStatusAlerts(app core.App) error {
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// restorePendingStatusAlerts re-queues untriggered status alerts for systems that
|
||||
// are still down after a hub restart. This rebuilds the lost in-memory timer state.
|
||||
func (am *AlertManager) restorePendingStatusAlerts() error {
|
||||
type pendingStatusAlert struct {
|
||||
AlertID string `db:"alert_id"`
|
||||
SystemID string `db:"system_id"`
|
||||
SystemName string `db:"system_name"`
|
||||
}
|
||||
|
||||
var pending []pendingStatusAlert
|
||||
err := am.hub.DB().NewQuery(`
|
||||
SELECT a.id AS alert_id, a.system AS system_id, s.name AS system_name
|
||||
FROM alerts a
|
||||
JOIN systems s ON a.system = s.id
|
||||
WHERE a.name = 'Status'
|
||||
AND a.triggered = false
|
||||
AND s.status = 'down'
|
||||
`).All(&pending)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Make sure cache is populated before trying to restore pending alerts
|
||||
_ = am.alertsCache.PopulateFromDB(false)
|
||||
|
||||
for _, item := range pending {
|
||||
alertData, ok := am.alertsCache.GetAlert(item.SystemID, item.AlertID)
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
min := max(1, int(alertData.Min))
|
||||
am.schedulePendingStatusAlert(item.SystemName, alertData, time.Duration(min)*time.Minute)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
755
internal/alerts/alerts_status_test.go
Normal file
755
internal/alerts/alerts_status_test.go
Normal file
@@ -0,0 +1,755 @@
|
||||
//go:build testing
|
||||
|
||||
package alerts_test
|
||||
|
||||
import (
|
||||
"testing"
|
||||
"testing/synctest"
|
||||
"time"
|
||||
|
||||
"github.com/henrygd/beszel/internal/alerts"
|
||||
beszelTests "github.com/henrygd/beszel/internal/tests"
|
||||
"github.com/pocketbase/dbx"
|
||||
"github.com/pocketbase/pocketbase/core"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestStatusAlerts(t *testing.T) {
|
||||
synctest.Test(t, func(t *testing.T) {
|
||||
hub, user := beszelTests.GetHubWithUser(t)
|
||||
defer hub.Cleanup()
|
||||
|
||||
systems, err := beszelTests.CreateSystems(hub, 4, user.Id, "paused")
|
||||
assert.NoError(t, err)
|
||||
|
||||
var alerts []*core.Record
|
||||
for i, system := range systems {
|
||||
alert, err := beszelTests.CreateRecord(hub, "alerts", map[string]any{
|
||||
"name": "Status",
|
||||
"system": system.Id,
|
||||
"user": user.Id,
|
||||
"min": i + 1,
|
||||
})
|
||||
assert.NoError(t, err)
|
||||
alerts = append(alerts, alert)
|
||||
}
|
||||
|
||||
time.Sleep(10 * time.Millisecond)
|
||||
|
||||
for _, alert := range alerts {
|
||||
assert.False(t, alert.GetBool("triggered"), "Alert should not be triggered immediately")
|
||||
}
|
||||
if hub.TestMailer.TotalSend() != 0 {
|
||||
assert.Zero(t, hub.TestMailer.TotalSend(), "Expected 0 messages, got %d", hub.TestMailer.TotalSend())
|
||||
}
|
||||
for _, system := range systems {
|
||||
assert.EqualValues(t, "paused", system.GetString("status"), "System should be paused")
|
||||
}
|
||||
for _, system := range systems {
|
||||
system.Set("status", "up")
|
||||
err = hub.SaveNoValidate(system)
|
||||
assert.NoError(t, err)
|
||||
}
|
||||
time.Sleep(time.Second)
|
||||
assert.EqualValues(t, 0, hub.GetPendingAlertsCount(), "should have 0 alerts in the pendingAlerts map")
|
||||
for _, system := range systems {
|
||||
system.Set("status", "down")
|
||||
err = hub.SaveNoValidate(system)
|
||||
assert.NoError(t, err)
|
||||
}
|
||||
// after 30 seconds, should have 4 alerts in the pendingAlerts map, no triggered alerts
|
||||
time.Sleep(time.Second * 30)
|
||||
assert.EqualValues(t, 4, hub.GetPendingAlertsCount(), "should have 4 alerts in the pendingAlerts map")
|
||||
triggeredCount, err := hub.CountRecords("alerts", dbx.HashExp{"triggered": true})
|
||||
assert.NoError(t, err)
|
||||
assert.EqualValues(t, 0, triggeredCount, "should have 0 alert triggered")
|
||||
assert.EqualValues(t, 0, hub.TestMailer.TotalSend(), "should have 0 messages sent")
|
||||
// after 1:30 seconds, should have 1 triggered alert and 3 pending alerts
|
||||
time.Sleep(time.Second * 60)
|
||||
assert.EqualValues(t, 3, hub.GetPendingAlertsCount(), "should have 3 alerts in the pendingAlerts map")
|
||||
triggeredCount, err = hub.CountRecords("alerts", dbx.HashExp{"triggered": true})
|
||||
assert.NoError(t, err)
|
||||
assert.EqualValues(t, 1, triggeredCount, "should have 1 alert triggered")
|
||||
assert.EqualValues(t, 1, hub.TestMailer.TotalSend(), "should have 1 messages sent")
|
||||
// after 2:30 seconds, should have 2 triggered alerts and 2 pending alerts
|
||||
time.Sleep(time.Second * 60)
|
||||
assert.EqualValues(t, 2, hub.GetPendingAlertsCount(), "should have 2 alerts in the pendingAlerts map")
|
||||
triggeredCount, err = hub.CountRecords("alerts", dbx.HashExp{"triggered": true})
|
||||
assert.NoError(t, err)
|
||||
assert.EqualValues(t, 2, triggeredCount, "should have 2 alert triggered")
|
||||
assert.EqualValues(t, 2, hub.TestMailer.TotalSend(), "should have 2 messages sent")
|
||||
// now we will bring the remaning systems back up
|
||||
for _, system := range systems {
|
||||
system.Set("status", "up")
|
||||
err = hub.SaveNoValidate(system)
|
||||
assert.NoError(t, err)
|
||||
}
|
||||
time.Sleep(time.Second)
|
||||
// should have 0 alerts in the pendingAlerts map and 0 alerts triggered
|
||||
assert.EqualValues(t, 0, hub.GetPendingAlertsCount(), "should have 0 alerts in the pendingAlerts map")
|
||||
triggeredCount, err = hub.CountRecords("alerts", dbx.HashExp{"triggered": true})
|
||||
assert.NoError(t, err)
|
||||
assert.Zero(t, triggeredCount, "should have 0 alert triggered")
|
||||
// 4 messages sent, 2 down alerts and 2 up alerts for first 2 systems
|
||||
assert.EqualValues(t, 4, hub.TestMailer.TotalSend(), "should have 4 messages sent")
|
||||
})
|
||||
}
|
||||
func TestStatusAlertRecoveryBeforeDeadline(t *testing.T) {
|
||||
hub, user := beszelTests.GetHubWithUser(t)
|
||||
defer hub.Cleanup()
|
||||
|
||||
// Ensure user settings have an email
|
||||
userSettings, _ := hub.FindFirstRecordByFilter("user_settings", "user={:user}", map[string]any{"user": user.Id})
|
||||
userSettings.Set("settings", `{"emails":["test@example.com"],"webhooks":[]}`)
|
||||
hub.Save(userSettings)
|
||||
|
||||
// Initial email count
|
||||
initialEmailCount := hub.TestMailer.TotalSend()
|
||||
|
||||
systemCollection, _ := hub.FindCollectionByNameOrId("systems")
|
||||
system := core.NewRecord(systemCollection)
|
||||
system.Set("name", "test-system")
|
||||
system.Set("status", "up")
|
||||
system.Set("host", "127.0.0.1")
|
||||
system.Set("users", []string{user.Id})
|
||||
hub.Save(system)
|
||||
|
||||
alertCollection, _ := hub.FindCollectionByNameOrId("alerts")
|
||||
alert := core.NewRecord(alertCollection)
|
||||
alert.Set("user", user.Id)
|
||||
alert.Set("system", system.Id)
|
||||
alert.Set("name", "Status")
|
||||
alert.Set("triggered", false)
|
||||
alert.Set("min", 1)
|
||||
hub.Save(alert)
|
||||
|
||||
am := hub.AlertManager
|
||||
|
||||
// 1. System goes down
|
||||
am.HandleStatusAlerts("down", system)
|
||||
assert.Equal(t, 1, am.GetPendingAlertsCount(), "Alert should be scheduled")
|
||||
|
||||
// 2. System goes up BEFORE delay expires
|
||||
// Triggering HandleStatusAlerts("up") SHOULD NOT send an alert.
|
||||
am.HandleStatusAlerts("up", system)
|
||||
|
||||
assert.Equal(t, 0, am.GetPendingAlertsCount(), "Alert should be canceled if system recovers before delay expires")
|
||||
|
||||
// Verify that NO email was sent.
|
||||
assert.Equal(t, initialEmailCount, hub.TestMailer.TotalSend(), "Recovery notification should not be sent if system never went down")
|
||||
|
||||
}
|
||||
|
||||
func TestStatusAlertNormalRecovery(t *testing.T) {
|
||||
hub, user := beszelTests.GetHubWithUser(t)
|
||||
defer hub.Cleanup()
|
||||
|
||||
// Ensure user settings have an email
|
||||
userSettings, _ := hub.FindFirstRecordByFilter("user_settings", "user={:user}", map[string]any{"user": user.Id})
|
||||
userSettings.Set("settings", `{"emails":["test@example.com"],"webhooks":[]}`)
|
||||
hub.Save(userSettings)
|
||||
|
||||
systemCollection, _ := hub.FindCollectionByNameOrId("systems")
|
||||
system := core.NewRecord(systemCollection)
|
||||
system.Set("name", "test-system")
|
||||
system.Set("status", "up")
|
||||
system.Set("host", "127.0.0.1")
|
||||
system.Set("users", []string{user.Id})
|
||||
hub.Save(system)
|
||||
|
||||
alertCollection, _ := hub.FindCollectionByNameOrId("alerts")
|
||||
alert := core.NewRecord(alertCollection)
|
||||
alert.Set("user", user.Id)
|
||||
alert.Set("system", system.Id)
|
||||
alert.Set("name", "Status")
|
||||
alert.Set("triggered", true) // System was confirmed DOWN
|
||||
hub.Save(alert)
|
||||
|
||||
am := hub.AlertManager
|
||||
initialEmailCount := hub.TestMailer.TotalSend()
|
||||
|
||||
// System goes up
|
||||
am.HandleStatusAlerts("up", system)
|
||||
|
||||
// Verify that an email WAS sent (normal recovery).
|
||||
assert.Equal(t, initialEmailCount+1, hub.TestMailer.TotalSend(), "Recovery notification should be sent if system was triggered as down")
|
||||
|
||||
}
|
||||
|
||||
func TestHandleStatusAlertsDoesNotSendRecoveryWhileDownIsOnlyPending(t *testing.T) {
|
||||
hub, user := beszelTests.GetHubWithUser(t)
|
||||
defer hub.Cleanup()
|
||||
|
||||
userSettings, err := hub.FindFirstRecordByFilter("user_settings", "user={:user}", map[string]any{"user": user.Id})
|
||||
require.NoError(t, err)
|
||||
userSettings.Set("settings", `{"emails":["test@example.com"],"webhooks":[]}`)
|
||||
require.NoError(t, hub.Save(userSettings))
|
||||
|
||||
systemCollection, err := hub.FindCollectionByNameOrId("systems")
|
||||
require.NoError(t, err)
|
||||
system := core.NewRecord(systemCollection)
|
||||
system.Set("name", "test-system")
|
||||
system.Set("status", "up")
|
||||
system.Set("host", "127.0.0.1")
|
||||
system.Set("users", []string{user.Id})
|
||||
require.NoError(t, hub.Save(system))
|
||||
|
||||
alertCollection, err := hub.FindCollectionByNameOrId("alerts")
|
||||
require.NoError(t, err)
|
||||
alert := core.NewRecord(alertCollection)
|
||||
alert.Set("user", user.Id)
|
||||
alert.Set("system", system.Id)
|
||||
alert.Set("name", "Status")
|
||||
alert.Set("triggered", false)
|
||||
alert.Set("min", 1)
|
||||
require.NoError(t, hub.Save(alert))
|
||||
|
||||
initialEmailCount := hub.TestMailer.TotalSend()
|
||||
am := alerts.NewTestAlertManagerWithoutWorker(hub)
|
||||
|
||||
require.NoError(t, am.HandleStatusAlerts("down", system))
|
||||
assert.Equal(t, 1, am.GetPendingAlertsCount(), "down transition should register a pending alert immediately")
|
||||
|
||||
require.NoError(t, am.HandleStatusAlerts("up", system))
|
||||
assert.Zero(t, am.GetPendingAlertsCount(), "recovery should cancel the pending down alert")
|
||||
assert.Equal(t, initialEmailCount, hub.TestMailer.TotalSend(), "recovery notification should not be sent before a down alert triggers")
|
||||
|
||||
alertRecord, err := hub.FindRecordById("alerts", alert.Id)
|
||||
require.NoError(t, err)
|
||||
assert.False(t, alertRecord.GetBool("triggered"), "alert should remain untriggered when downtime never matured")
|
||||
}
|
||||
|
||||
func TestStatusAlertTimerCancellationPreventsBoundaryDelivery(t *testing.T) {
|
||||
synctest.Test(t, func(t *testing.T) {
|
||||
hub, user := beszelTests.GetHubWithUser(t)
|
||||
defer hub.Cleanup()
|
||||
|
||||
userSettings, err := hub.FindFirstRecordByFilter("user_settings", "user={:user}", map[string]any{"user": user.Id})
|
||||
require.NoError(t, err)
|
||||
userSettings.Set("settings", `{"emails":["test@example.com"],"webhooks":[]}`)
|
||||
require.NoError(t, hub.Save(userSettings))
|
||||
|
||||
systemCollection, err := hub.FindCollectionByNameOrId("systems")
|
||||
require.NoError(t, err)
|
||||
system := core.NewRecord(systemCollection)
|
||||
system.Set("name", "test-system")
|
||||
system.Set("status", "up")
|
||||
system.Set("host", "127.0.0.1")
|
||||
system.Set("users", []string{user.Id})
|
||||
require.NoError(t, hub.Save(system))
|
||||
|
||||
alertCollection, err := hub.FindCollectionByNameOrId("alerts")
|
||||
require.NoError(t, err)
|
||||
alert := core.NewRecord(alertCollection)
|
||||
alert.Set("user", user.Id)
|
||||
alert.Set("system", system.Id)
|
||||
alert.Set("name", "Status")
|
||||
alert.Set("triggered", false)
|
||||
alert.Set("min", 1)
|
||||
require.NoError(t, hub.Save(alert))
|
||||
|
||||
initialEmailCount := hub.TestMailer.TotalSend()
|
||||
am := alerts.NewTestAlertManagerWithoutWorker(hub)
|
||||
|
||||
require.NoError(t, am.HandleStatusAlerts("down", system))
|
||||
assert.Equal(t, 1, am.GetPendingAlertsCount(), "down transition should register a pending alert immediately")
|
||||
require.True(t, am.ResetPendingAlertTimer(alert.Id, 25*time.Millisecond), "test should shorten the pending alert timer")
|
||||
|
||||
time.Sleep(10 * time.Millisecond)
|
||||
require.NoError(t, am.HandleStatusAlerts("up", system))
|
||||
assert.Zero(t, am.GetPendingAlertsCount(), "recovery should remove the pending alert before the timer callback runs")
|
||||
|
||||
time.Sleep(40 * time.Millisecond)
|
||||
assert.Equal(t, initialEmailCount, hub.TestMailer.TotalSend(), "timer callback should not deliver after recovery cancels the pending alert")
|
||||
|
||||
alertRecord, err := hub.FindRecordById("alerts", alert.Id)
|
||||
require.NoError(t, err)
|
||||
assert.False(t, alertRecord.GetBool("triggered"), "alert should remain untriggered when cancellation wins the timer race")
|
||||
|
||||
time.Sleep(time.Minute)
|
||||
synctest.Wait()
|
||||
})
|
||||
}
|
||||
|
||||
func TestStatusAlertDownFiresAfterDelayExpires(t *testing.T) {
|
||||
hub, user := beszelTests.GetHubWithUser(t)
|
||||
defer hub.Cleanup()
|
||||
|
||||
userSettings, err := hub.FindFirstRecordByFilter("user_settings", "user={:user}", map[string]any{"user": user.Id})
|
||||
require.NoError(t, err)
|
||||
userSettings.Set("settings", `{"emails":["test@example.com"],"webhooks":[]}`)
|
||||
require.NoError(t, hub.Save(userSettings))
|
||||
|
||||
systemCollection, err := hub.FindCollectionByNameOrId("systems")
|
||||
require.NoError(t, err)
|
||||
system := core.NewRecord(systemCollection)
|
||||
system.Set("name", "test-system")
|
||||
system.Set("status", "up")
|
||||
system.Set("host", "127.0.0.1")
|
||||
system.Set("users", []string{user.Id})
|
||||
require.NoError(t, hub.Save(system))
|
||||
|
||||
alertCollection, err := hub.FindCollectionByNameOrId("alerts")
|
||||
require.NoError(t, err)
|
||||
alert := core.NewRecord(alertCollection)
|
||||
alert.Set("user", user.Id)
|
||||
alert.Set("system", system.Id)
|
||||
alert.Set("name", "Status")
|
||||
alert.Set("triggered", false)
|
||||
alert.Set("min", 1)
|
||||
require.NoError(t, hub.Save(alert))
|
||||
|
||||
initialEmailCount := hub.TestMailer.TotalSend()
|
||||
am := alerts.NewTestAlertManagerWithoutWorker(hub)
|
||||
|
||||
require.NoError(t, am.HandleStatusAlerts("down", system))
|
||||
assert.Equal(t, 1, am.GetPendingAlertsCount(), "alert should be pending after system goes down")
|
||||
|
||||
// Expire the pending alert and process it
|
||||
am.ForceExpirePendingAlerts()
|
||||
processed, err := am.ProcessPendingAlerts()
|
||||
require.NoError(t, err)
|
||||
assert.Len(t, processed, 1, "one alert should have been processed")
|
||||
assert.Equal(t, 0, am.GetPendingAlertsCount(), "pending alert should be consumed after processing")
|
||||
|
||||
// Verify down email was sent
|
||||
assert.Equal(t, initialEmailCount+1, hub.TestMailer.TotalSend(), "down notification should be sent after delay expires")
|
||||
|
||||
// Verify triggered flag is set in the DB
|
||||
alertRecord, err := hub.FindRecordById("alerts", alert.Id)
|
||||
require.NoError(t, err)
|
||||
assert.True(t, alertRecord.GetBool("triggered"), "alert should be marked triggered after downtime matures")
|
||||
}
|
||||
|
||||
func TestStatusAlertDuplicateDownCallIsIdempotent(t *testing.T) {
|
||||
hub, user := beszelTests.GetHubWithUser(t)
|
||||
defer hub.Cleanup()
|
||||
|
||||
userSettings, err := hub.FindFirstRecordByFilter("user_settings", "user={:user}", map[string]any{"user": user.Id})
|
||||
require.NoError(t, err)
|
||||
userSettings.Set("settings", `{"emails":["test@example.com"],"webhooks":[]}`)
|
||||
require.NoError(t, hub.Save(userSettings))
|
||||
|
||||
systemCollection, err := hub.FindCollectionByNameOrId("systems")
|
||||
require.NoError(t, err)
|
||||
system := core.NewRecord(systemCollection)
|
||||
system.Set("name", "test-system")
|
||||
system.Set("status", "up")
|
||||
system.Set("host", "127.0.0.1")
|
||||
system.Set("users", []string{user.Id})
|
||||
require.NoError(t, hub.Save(system))
|
||||
|
||||
alertCollection, err := hub.FindCollectionByNameOrId("alerts")
|
||||
require.NoError(t, err)
|
||||
alert := core.NewRecord(alertCollection)
|
||||
alert.Set("user", user.Id)
|
||||
alert.Set("system", system.Id)
|
||||
alert.Set("name", "Status")
|
||||
alert.Set("triggered", false)
|
||||
alert.Set("min", 5)
|
||||
require.NoError(t, hub.Save(alert))
|
||||
|
||||
am := alerts.NewTestAlertManagerWithoutWorker(hub)
|
||||
|
||||
require.NoError(t, am.HandleStatusAlerts("down", system))
|
||||
require.NoError(t, am.HandleStatusAlerts("down", system))
|
||||
require.NoError(t, am.HandleStatusAlerts("down", system))
|
||||
|
||||
assert.Equal(t, 1, am.GetPendingAlertsCount(), "repeated down calls should not schedule duplicate pending alerts")
|
||||
}
|
||||
|
||||
func TestStatusAlertNoAlertRecord(t *testing.T) {
|
||||
hub, user := beszelTests.GetHubWithUser(t)
|
||||
defer hub.Cleanup()
|
||||
|
||||
systemCollection, err := hub.FindCollectionByNameOrId("systems")
|
||||
require.NoError(t, err)
|
||||
system := core.NewRecord(systemCollection)
|
||||
system.Set("name", "test-system")
|
||||
system.Set("status", "up")
|
||||
system.Set("host", "127.0.0.1")
|
||||
system.Set("users", []string{user.Id})
|
||||
require.NoError(t, hub.Save(system))
|
||||
|
||||
// No Status alert record created for this system
|
||||
initialEmailCount := hub.TestMailer.TotalSend()
|
||||
am := alerts.NewTestAlertManagerWithoutWorker(hub)
|
||||
|
||||
require.NoError(t, am.HandleStatusAlerts("down", system))
|
||||
assert.Equal(t, 0, am.GetPendingAlertsCount(), "no pending alert when no alert record exists")
|
||||
|
||||
require.NoError(t, am.HandleStatusAlerts("up", system))
|
||||
assert.Equal(t, initialEmailCount, hub.TestMailer.TotalSend(), "no email when no alert record exists")
|
||||
}
|
||||
|
||||
func TestRestorePendingStatusAlertsRequeuesDownSystemsAfterRestart(t *testing.T) {
|
||||
hub, user := beszelTests.GetHubWithUser(t)
|
||||
defer hub.Cleanup()
|
||||
|
||||
userSettings, err := hub.FindFirstRecordByFilter("user_settings", "user={:user}", map[string]any{"user": user.Id})
|
||||
require.NoError(t, err)
|
||||
userSettings.Set("settings", `{"emails":["test@example.com"],"webhooks":[]}`)
|
||||
require.NoError(t, hub.Save(userSettings))
|
||||
|
||||
systems, err := beszelTests.CreateSystems(hub, 1, user.Id, "down")
|
||||
require.NoError(t, err)
|
||||
system := systems[0]
|
||||
|
||||
alertCollection, err := hub.FindCollectionByNameOrId("alerts")
|
||||
require.NoError(t, err)
|
||||
alert := core.NewRecord(alertCollection)
|
||||
alert.Set("user", user.Id)
|
||||
alert.Set("system", system.Id)
|
||||
alert.Set("name", "Status")
|
||||
alert.Set("triggered", false)
|
||||
alert.Set("min", 1)
|
||||
require.NoError(t, hub.Save(alert))
|
||||
|
||||
initialEmailCount := hub.TestMailer.TotalSend()
|
||||
am := alerts.NewTestAlertManagerWithoutWorker(hub)
|
||||
|
||||
require.NoError(t, am.RestorePendingStatusAlerts())
|
||||
assert.Equal(t, 1, am.GetPendingAlertsCount(), "startup restore should requeue a pending down alert for a system still marked down")
|
||||
|
||||
am.ForceExpirePendingAlerts()
|
||||
processed, err := am.ProcessPendingAlerts()
|
||||
require.NoError(t, err)
|
||||
assert.Len(t, processed, 1, "restored pending alert should be processable after the delay expires")
|
||||
assert.Equal(t, initialEmailCount+1, hub.TestMailer.TotalSend(), "restored pending alert should send the down notification")
|
||||
|
||||
alertRecord, err := hub.FindRecordById("alerts", alert.Id)
|
||||
require.NoError(t, err)
|
||||
assert.True(t, alertRecord.GetBool("triggered"), "restored pending alert should mark the alert as triggered once delivered")
|
||||
}
|
||||
|
||||
func TestRestorePendingStatusAlertsSkipsNonDownOrAlreadyTriggeredAlerts(t *testing.T) {
|
||||
hub, user := beszelTests.GetHubWithUser(t)
|
||||
defer hub.Cleanup()
|
||||
|
||||
systemsDown, err := beszelTests.CreateSystems(hub, 2, user.Id, "down")
|
||||
require.NoError(t, err)
|
||||
systemDownPending := systemsDown[0]
|
||||
systemDownTriggered := systemsDown[1]
|
||||
|
||||
systemUp, err := beszelTests.CreateRecord(hub, "systems", map[string]any{
|
||||
"name": "up-system",
|
||||
"users": []string{user.Id},
|
||||
"host": "127.0.0.2",
|
||||
"status": "up",
|
||||
})
|
||||
require.NoError(t, err)
|
||||
|
||||
_, err = beszelTests.CreateRecord(hub, "alerts", map[string]any{
|
||||
"name": "Status",
|
||||
"system": systemDownPending.Id,
|
||||
"user": user.Id,
|
||||
"min": 1,
|
||||
"triggered": false,
|
||||
})
|
||||
require.NoError(t, err)
|
||||
|
||||
_, err = beszelTests.CreateRecord(hub, "alerts", map[string]any{
|
||||
"name": "Status",
|
||||
"system": systemUp.Id,
|
||||
"user": user.Id,
|
||||
"min": 1,
|
||||
"triggered": false,
|
||||
})
|
||||
require.NoError(t, err)
|
||||
|
||||
_, err = beszelTests.CreateRecord(hub, "alerts", map[string]any{
|
||||
"name": "Status",
|
||||
"system": systemDownTriggered.Id,
|
||||
"user": user.Id,
|
||||
"min": 1,
|
||||
"triggered": true,
|
||||
})
|
||||
require.NoError(t, err)
|
||||
|
||||
am := alerts.NewTestAlertManagerWithoutWorker(hub)
|
||||
require.NoError(t, am.RestorePendingStatusAlerts())
|
||||
assert.Equal(t, 1, am.GetPendingAlertsCount(), "only untriggered alerts for currently down systems should be restored")
|
||||
}
|
||||
|
||||
func TestRestorePendingStatusAlertsIsIdempotent(t *testing.T) {
|
||||
hub, user := beszelTests.GetHubWithUser(t)
|
||||
defer hub.Cleanup()
|
||||
|
||||
systems, err := beszelTests.CreateSystems(hub, 1, user.Id, "down")
|
||||
require.NoError(t, err)
|
||||
system := systems[0]
|
||||
|
||||
_, err = beszelTests.CreateRecord(hub, "alerts", map[string]any{
|
||||
"name": "Status",
|
||||
"system": system.Id,
|
||||
"user": user.Id,
|
||||
"min": 1,
|
||||
"triggered": false,
|
||||
})
|
||||
require.NoError(t, err)
|
||||
|
||||
am := alerts.NewTestAlertManagerWithoutWorker(hub)
|
||||
require.NoError(t, am.RestorePendingStatusAlerts())
|
||||
require.NoError(t, am.RestorePendingStatusAlerts())
|
||||
|
||||
assert.Equal(t, 1, am.GetPendingAlertsCount(), "restoring twice should not create duplicate pending alerts")
|
||||
am.ForceExpirePendingAlerts()
|
||||
processed, err := am.ProcessPendingAlerts()
|
||||
require.NoError(t, err)
|
||||
assert.Len(t, processed, 1, "restored alert should still be processable exactly once")
|
||||
assert.Zero(t, am.GetPendingAlertsCount(), "processing the restored alert should empty the pending map")
|
||||
}
|
||||
|
||||
func TestResolveStatusAlertsFixesStaleTriggered(t *testing.T) {
|
||||
hub, user := beszelTests.GetHubWithUser(t)
|
||||
defer hub.Cleanup()
|
||||
|
||||
// CreateSystems uses SaveNoValidate after initial save to bypass the
|
||||
// onRecordCreate hook that forces status = "pending".
|
||||
systems, err := beszelTests.CreateSystems(hub, 1, user.Id, "up")
|
||||
require.NoError(t, err)
|
||||
system := systems[0]
|
||||
|
||||
alertCollection, err := hub.FindCollectionByNameOrId("alerts")
|
||||
require.NoError(t, err)
|
||||
alert := core.NewRecord(alertCollection)
|
||||
alert.Set("user", user.Id)
|
||||
alert.Set("system", system.Id)
|
||||
alert.Set("name", "Status")
|
||||
alert.Set("triggered", true) // Stale: system is up but alert still says triggered
|
||||
require.NoError(t, hub.Save(alert))
|
||||
|
||||
// resolveStatusAlerts should clear the stale triggered flag
|
||||
require.NoError(t, alerts.ResolveStatusAlerts(hub))
|
||||
|
||||
alertRecord, err := hub.FindRecordById("alerts", alert.Id)
|
||||
require.NoError(t, err)
|
||||
assert.False(t, alertRecord.GetBool("triggered"), "stale triggered flag should be cleared when system is up")
|
||||
}
|
||||
func TestResolveStatusAlerts(t *testing.T) {
|
||||
hub, user := beszelTests.GetHubWithUser(t)
|
||||
defer hub.Cleanup()
|
||||
|
||||
// Create a systemUp
|
||||
systemUp, err := beszelTests.CreateRecord(hub, "systems", map[string]any{
|
||||
"name": "test-system",
|
||||
"users": []string{user.Id},
|
||||
"host": "127.0.0.1",
|
||||
"status": "up",
|
||||
})
|
||||
assert.NoError(t, err)
|
||||
|
||||
systemDown, err := beszelTests.CreateRecord(hub, "systems", map[string]any{
|
||||
"name": "test-system-2",
|
||||
"users": []string{user.Id},
|
||||
"host": "127.0.0.2",
|
||||
"status": "up",
|
||||
})
|
||||
assert.NoError(t, err)
|
||||
|
||||
// Create a status alertUp for the system
|
||||
alertUp, err := beszelTests.CreateRecord(hub, "alerts", map[string]any{
|
||||
"name": "Status",
|
||||
"system": systemUp.Id,
|
||||
"user": user.Id,
|
||||
"min": 1,
|
||||
})
|
||||
assert.NoError(t, err)
|
||||
|
||||
alertDown, err := beszelTests.CreateRecord(hub, "alerts", map[string]any{
|
||||
"name": "Status",
|
||||
"system": systemDown.Id,
|
||||
"user": user.Id,
|
||||
"min": 1,
|
||||
})
|
||||
assert.NoError(t, err)
|
||||
|
||||
// Verify alert is not triggered initially
|
||||
assert.False(t, alertUp.GetBool("triggered"), "Alert should not be triggered initially")
|
||||
|
||||
// Set the system to 'up' (this should not trigger the alert)
|
||||
systemUp.Set("status", "up")
|
||||
err = hub.SaveNoValidate(systemUp)
|
||||
assert.NoError(t, err)
|
||||
|
||||
systemDown.Set("status", "down")
|
||||
err = hub.SaveNoValidate(systemDown)
|
||||
assert.NoError(t, err)
|
||||
|
||||
// Wait a moment for any processing
|
||||
time.Sleep(10 * time.Millisecond)
|
||||
|
||||
// Verify alertUp is still not triggered after setting system to up
|
||||
alertUp, err = hub.FindFirstRecordByFilter("alerts", "id={:id}", dbx.Params{"id": alertUp.Id})
|
||||
assert.NoError(t, err)
|
||||
assert.False(t, alertUp.GetBool("triggered"), "Alert should not be triggered when system is up")
|
||||
|
||||
// Manually set both alerts triggered to true
|
||||
alertUp.Set("triggered", true)
|
||||
err = hub.SaveNoValidate(alertUp)
|
||||
assert.NoError(t, err)
|
||||
alertDown.Set("triggered", true)
|
||||
err = hub.SaveNoValidate(alertDown)
|
||||
assert.NoError(t, err)
|
||||
|
||||
// Verify we have exactly one alert with triggered true
|
||||
triggeredCount, err := hub.CountRecords("alerts", dbx.HashExp{"triggered": true})
|
||||
assert.NoError(t, err)
|
||||
assert.EqualValues(t, 2, triggeredCount, "Should have exactly two alerts with triggered true")
|
||||
|
||||
// Verify the specific alertUp is triggered
|
||||
alertUp, err = hub.FindFirstRecordByFilter("alerts", "id={:id}", dbx.Params{"id": alertUp.Id})
|
||||
assert.NoError(t, err)
|
||||
assert.True(t, alertUp.GetBool("triggered"), "Alert should be triggered")
|
||||
|
||||
// Verify we have two unresolved alert history records
|
||||
alertHistoryCount, err := hub.CountRecords("alerts_history", dbx.HashExp{"resolved": ""})
|
||||
assert.NoError(t, err)
|
||||
assert.EqualValues(t, 2, alertHistoryCount, "Should have exactly two unresolved alert history records")
|
||||
|
||||
err = alerts.ResolveStatusAlerts(hub)
|
||||
assert.NoError(t, err)
|
||||
|
||||
// Verify alertUp is not triggered after resolving
|
||||
alertUp, err = hub.FindFirstRecordByFilter("alerts", "id={:id}", dbx.Params{"id": alertUp.Id})
|
||||
assert.NoError(t, err)
|
||||
assert.False(t, alertUp.GetBool("triggered"), "Alert should not be triggered after resolving")
|
||||
// Verify alertDown is still triggered
|
||||
alertDown, err = hub.FindFirstRecordByFilter("alerts", "id={:id}", dbx.Params{"id": alertDown.Id})
|
||||
assert.NoError(t, err)
|
||||
assert.True(t, alertDown.GetBool("triggered"), "Alert should still be triggered after resolving")
|
||||
|
||||
// Verify we have one unresolved alert history record
|
||||
alertHistoryCount, err = hub.CountRecords("alerts_history", dbx.HashExp{"resolved": ""})
|
||||
assert.NoError(t, err)
|
||||
assert.EqualValues(t, 1, alertHistoryCount, "Should have exactly one unresolved alert history record")
|
||||
|
||||
}
|
||||
|
||||
func TestAlertsHistoryStatus(t *testing.T) {
|
||||
synctest.Test(t, func(t *testing.T) {
|
||||
hub, user := beszelTests.GetHubWithUser(t)
|
||||
defer hub.Cleanup()
|
||||
|
||||
// Create a system
|
||||
systems, err := beszelTests.CreateSystems(hub, 1, user.Id, "up")
|
||||
assert.NoError(t, err)
|
||||
system := systems[0]
|
||||
|
||||
// Create a status alertRecord for the system
|
||||
alertRecord, err := beszelTests.CreateRecord(hub, "alerts", map[string]any{
|
||||
"name": "Status",
|
||||
"system": system.Id,
|
||||
"user": user.Id,
|
||||
"min": 1,
|
||||
})
|
||||
assert.NoError(t, err)
|
||||
|
||||
// Verify alert is not triggered initially
|
||||
assert.False(t, alertRecord.GetBool("triggered"), "Alert should not be triggered initially")
|
||||
|
||||
// Set the system to 'down' (this should trigger the alert)
|
||||
system.Set("status", "down")
|
||||
err = hub.Save(system)
|
||||
assert.NoError(t, err)
|
||||
|
||||
time.Sleep(time.Second * 30)
|
||||
synctest.Wait()
|
||||
|
||||
alertFresh, _ := hub.FindRecordById("alerts", alertRecord.Id)
|
||||
assert.False(t, alertFresh.GetBool("triggered"), "Alert should not be triggered after 30 seconds")
|
||||
|
||||
time.Sleep(time.Minute)
|
||||
synctest.Wait()
|
||||
|
||||
// Verify alert is triggered after setting system to down
|
||||
alertFresh, err = hub.FindRecordById("alerts", alertRecord.Id)
|
||||
assert.NoError(t, err)
|
||||
assert.True(t, alertFresh.GetBool("triggered"), "Alert should be triggered after one minute")
|
||||
|
||||
// Verify we have one unresolved alert history record
|
||||
alertHistoryCount, err := hub.CountRecords("alerts_history", dbx.HashExp{"resolved": ""})
|
||||
assert.NoError(t, err)
|
||||
assert.EqualValues(t, 1, alertHistoryCount, "Should have exactly one unresolved alert history record")
|
||||
|
||||
// Set the system back to 'up' (this should resolve the alert)
|
||||
system.Set("status", "up")
|
||||
err = hub.Save(system)
|
||||
assert.NoError(t, err)
|
||||
|
||||
time.Sleep(time.Second)
|
||||
synctest.Wait()
|
||||
|
||||
// Verify alert is not triggered after setting system back to up
|
||||
alertFresh, err = hub.FindRecordById("alerts", alertRecord.Id)
|
||||
assert.NoError(t, err)
|
||||
assert.False(t, alertFresh.GetBool("triggered"), "Alert should not be triggered after system recovers")
|
||||
|
||||
// Verify the alert history record is resolved
|
||||
alertHistoryCount, err = hub.CountRecords("alerts_history", dbx.HashExp{"resolved": ""})
|
||||
assert.NoError(t, err)
|
||||
assert.EqualValues(t, 0, alertHistoryCount, "Should have no unresolved alert history records")
|
||||
})
|
||||
}
|
||||
|
||||
func TestStatusAlertClearedBeforeSend(t *testing.T) {
|
||||
synctest.Test(t, func(t *testing.T) {
|
||||
hub, user := beszelTests.GetHubWithUser(t)
|
||||
defer hub.Cleanup()
|
||||
|
||||
// Create a system
|
||||
systems, err := beszelTests.CreateSystems(hub, 1, user.Id, "up")
|
||||
assert.NoError(t, err)
|
||||
system := systems[0]
|
||||
|
||||
// Ensure user settings have an email
|
||||
userSettings, _ := hub.FindFirstRecordByFilter("user_settings", "user={:user}", map[string]any{"user": user.Id})
|
||||
userSettings.Set("settings", `{"emails":["test@example.com"],"webhooks":[]}`)
|
||||
hub.Save(userSettings)
|
||||
|
||||
// Initial email count
|
||||
initialEmailCount := hub.TestMailer.TotalSend()
|
||||
|
||||
// Create a status alertRecord for the system
|
||||
alertRecord, err := beszelTests.CreateRecord(hub, "alerts", map[string]any{
|
||||
"name": "Status",
|
||||
"system": system.Id,
|
||||
"user": user.Id,
|
||||
"min": 1,
|
||||
})
|
||||
assert.NoError(t, err)
|
||||
|
||||
// Verify alert is not triggered initially
|
||||
assert.False(t, alertRecord.GetBool("triggered"), "Alert should not be triggered initially")
|
||||
|
||||
// Set the system to 'down' (this should trigger the alert)
|
||||
system.Set("status", "down")
|
||||
err = hub.Save(system)
|
||||
assert.NoError(t, err)
|
||||
|
||||
time.Sleep(time.Second * 30)
|
||||
synctest.Wait()
|
||||
|
||||
// Set system back up to clear the pending alert before it triggers
|
||||
system.Set("status", "up")
|
||||
err = hub.Save(system)
|
||||
assert.NoError(t, err)
|
||||
|
||||
time.Sleep(time.Minute)
|
||||
synctest.Wait()
|
||||
|
||||
// Verify that we have not sent any emails since the system recovered before the alert triggered
|
||||
assert.Equal(t, initialEmailCount, hub.TestMailer.TotalSend(), "No email should be sent if system recovers before alert triggers")
|
||||
|
||||
// Verify alert is not triggered after setting system back to up
|
||||
alertFresh, err := hub.FindRecordById("alerts", alertRecord.Id)
|
||||
assert.NoError(t, err)
|
||||
assert.False(t, alertFresh.GetBool("triggered"), "Alert should not be triggered after system recovers")
|
||||
|
||||
// Verify that no alert history record was created since the alert never triggered
|
||||
alertHistoryCount, err := hub.CountRecords("alerts_history")
|
||||
assert.NoError(t, err)
|
||||
assert.EqualValues(t, 0, alertHistoryCount, "Should have no unresolved alert history records since alert never triggered")
|
||||
})
|
||||
}
|
||||
@@ -11,15 +11,11 @@ import (
|
||||
"github.com/pocketbase/dbx"
|
||||
"github.com/pocketbase/pocketbase/core"
|
||||
"github.com/pocketbase/pocketbase/tools/types"
|
||||
"github.com/spf13/cast"
|
||||
)
|
||||
|
||||
func (am *AlertManager) HandleSystemAlerts(systemRecord *core.Record, data *system.CombinedData) error {
|
||||
alertRecords, err := am.hub.FindAllRecords("alerts",
|
||||
dbx.NewExp("system={:system} AND name!='Status'", dbx.Params{"system": systemRecord.Id}),
|
||||
)
|
||||
if err != nil || len(alertRecords) == 0 {
|
||||
// log.Println("no alerts found for system")
|
||||
alerts := am.alertsCache.GetAlertsExcludingNames(systemRecord.Id, "Status")
|
||||
if len(alerts) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -27,8 +23,8 @@ func (am *AlertManager) HandleSystemAlerts(systemRecord *core.Record, data *syst
|
||||
now := systemRecord.GetDateTime("updated").Time().UTC()
|
||||
oldestTime := now
|
||||
|
||||
for _, alertRecord := range alertRecords {
|
||||
name := alertRecord.GetString("name")
|
||||
for _, alertData := range alerts {
|
||||
name := alertData.Name
|
||||
var val float64
|
||||
unit := "%"
|
||||
|
||||
@@ -38,7 +34,7 @@ func (am *AlertManager) HandleSystemAlerts(systemRecord *core.Record, data *syst
|
||||
case "Memory":
|
||||
val = data.Info.MemPct
|
||||
case "Bandwidth":
|
||||
val = data.Info.Bandwidth
|
||||
val = float64(data.Info.BandwidthBytes) / (1024 * 1024)
|
||||
unit = " MB/s"
|
||||
case "Disk":
|
||||
maxUsedPct := data.Info.DiskPct
|
||||
@@ -73,8 +69,8 @@ func (am *AlertManager) HandleSystemAlerts(systemRecord *core.Record, data *syst
|
||||
val = float64(data.Stats.Battery[0])
|
||||
}
|
||||
|
||||
triggered := alertRecord.GetBool("triggered")
|
||||
threshold := alertRecord.GetFloat("value")
|
||||
triggered := alertData.Triggered
|
||||
threshold := alertData.Value
|
||||
|
||||
// Battery alert has inverted logic: trigger when value is BELOW threshold
|
||||
lowAlert := isLowAlert(name)
|
||||
@@ -92,11 +88,11 @@ func (am *AlertManager) HandleSystemAlerts(systemRecord *core.Record, data *syst
|
||||
}
|
||||
}
|
||||
|
||||
min := max(1, cast.ToUint8(alertRecord.Get("min")))
|
||||
min := max(1, alertData.Min)
|
||||
|
||||
alert := SystemAlertData{
|
||||
systemRecord: systemRecord,
|
||||
alertRecord: alertRecord,
|
||||
alertData: alertData,
|
||||
name: name,
|
||||
unit: unit,
|
||||
val: val,
|
||||
@@ -129,7 +125,7 @@ func (am *AlertManager) HandleSystemAlerts(systemRecord *core.Record, data *syst
|
||||
Created types.DateTime `db:"created"`
|
||||
}{}
|
||||
|
||||
err = am.hub.DB().
|
||||
err := am.hub.DB().
|
||||
Select("stats", "created").
|
||||
From("system_stats").
|
||||
Where(dbx.NewExp(
|
||||
@@ -192,22 +188,24 @@ func (am *AlertManager) HandleSystemAlerts(systemRecord *core.Record, data *syst
|
||||
case "Memory":
|
||||
alert.val += stats.Mem
|
||||
case "Bandwidth":
|
||||
alert.val += stats.NetSent + stats.NetRecv
|
||||
alert.val += float64(stats.Bandwidth[0]+stats.Bandwidth[1]) / (1024 * 1024)
|
||||
case "Disk":
|
||||
if alert.mapSums == nil {
|
||||
alert.mapSums = make(map[string]float32, len(data.Stats.ExtraFs)+1)
|
||||
alert.mapSums = make(map[string]float32, len(stats.ExtraFs)+1)
|
||||
}
|
||||
// add root disk
|
||||
if _, ok := alert.mapSums["root"]; !ok {
|
||||
alert.mapSums["root"] = 0.0
|
||||
}
|
||||
alert.mapSums["root"] += float32(stats.Disk)
|
||||
// add extra disks
|
||||
for key, fs := range data.Stats.ExtraFs {
|
||||
if _, ok := alert.mapSums[key]; !ok {
|
||||
alert.mapSums[key] = 0.0
|
||||
// add extra disks from historical record
|
||||
for key, fs := range stats.ExtraFs {
|
||||
if fs.DiskTotal > 0 {
|
||||
if _, ok := alert.mapSums[key]; !ok {
|
||||
alert.mapSums[key] = 0.0
|
||||
}
|
||||
alert.mapSums[key] += float32(fs.DiskUsed / fs.DiskTotal * 100)
|
||||
}
|
||||
alert.mapSums[key] += float32(fs.DiskUsed / fs.DiskTotal * 100)
|
||||
}
|
||||
case "Temperature":
|
||||
if alert.mapSums == nil {
|
||||
@@ -342,13 +340,12 @@ func (am *AlertManager) sendSystemAlert(alert SystemAlertData) {
|
||||
}
|
||||
body := fmt.Sprintf("%s averaged %.2f%s for the previous %v %s.", alert.descriptor, alert.val, alert.unit, alert.min, minutesLabel)
|
||||
|
||||
alert.alertRecord.Set("triggered", alert.triggered)
|
||||
if err := am.hub.Save(alert.alertRecord); err != nil {
|
||||
if err := am.setAlertTriggered(alert.alertData, alert.triggered); err != nil {
|
||||
// app.Logger().Error("failed to save alert record", "err", err)
|
||||
return
|
||||
}
|
||||
am.SendAlert(AlertMessageData{
|
||||
UserID: alert.alertRecord.GetString("user"),
|
||||
UserID: alert.alertData.UserID,
|
||||
SystemID: alert.systemRecord.Id,
|
||||
Title: subject,
|
||||
Message: body,
|
||||
|
||||
218
internal/alerts/alerts_system_test.go
Normal file
218
internal/alerts/alerts_system_test.go
Normal file
@@ -0,0 +1,218 @@
|
||||
//go:build testing
|
||||
|
||||
package alerts_test
|
||||
|
||||
import (
|
||||
"testing"
|
||||
"testing/synctest"
|
||||
"time"
|
||||
|
||||
"github.com/henrygd/beszel/internal/entities/system"
|
||||
beszelTests "github.com/henrygd/beszel/internal/tests"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
type systemAlertValueSetter[T any] func(info *system.Info, stats *system.Stats, value T)
|
||||
|
||||
type systemAlertTestFixture struct {
|
||||
hub *beszelTests.TestHub
|
||||
alertID string
|
||||
submit func(*system.CombinedData) error
|
||||
}
|
||||
|
||||
func createCombinedData[T any](value T, setValue systemAlertValueSetter[T]) *system.CombinedData {
|
||||
var data system.CombinedData
|
||||
setValue(&data.Info, &data.Stats, value)
|
||||
return &data
|
||||
}
|
||||
|
||||
func newSystemAlertTestFixture(t *testing.T, alertName string, min int, threshold float64) *systemAlertTestFixture {
|
||||
t.Helper()
|
||||
|
||||
hub, user := beszelTests.GetHubWithUser(t)
|
||||
|
||||
systems, err := beszelTests.CreateSystems(hub, 1, user.Id, "up")
|
||||
require.NoError(t, err)
|
||||
systemRecord := systems[0]
|
||||
|
||||
sysManagerSystem, err := hub.GetSystemManager().GetSystemFromStore(systemRecord.Id)
|
||||
require.NoError(t, err)
|
||||
require.NotNil(t, sysManagerSystem)
|
||||
sysManagerSystem.StopUpdater()
|
||||
|
||||
userSettings, err := hub.FindFirstRecordByFilter("user_settings", "user={:user}", map[string]any{"user": user.Id})
|
||||
require.NoError(t, err)
|
||||
userSettings.Set("settings", `{"emails":["test@example.com"],"webhooks":[]}`)
|
||||
require.NoError(t, hub.Save(userSettings))
|
||||
|
||||
alertRecord, err := beszelTests.CreateRecord(hub, "alerts", map[string]any{
|
||||
"name": alertName,
|
||||
"system": systemRecord.Id,
|
||||
"user": user.Id,
|
||||
"min": min,
|
||||
"value": threshold,
|
||||
})
|
||||
require.NoError(t, err)
|
||||
|
||||
assert.False(t, alertRecord.GetBool("triggered"), "Alert should not be triggered initially")
|
||||
|
||||
alertsCache := hub.GetAlertManager().GetSystemAlertsCache()
|
||||
cachedAlerts := alertsCache.GetAlertsExcludingNames(systemRecord.Id, "Status")
|
||||
assert.Len(t, cachedAlerts, 1, "Alert should be in cache")
|
||||
|
||||
return &systemAlertTestFixture{
|
||||
hub: hub,
|
||||
alertID: alertRecord.Id,
|
||||
submit: func(data *system.CombinedData) error {
|
||||
_, err := sysManagerSystem.CreateRecords(data)
|
||||
return err
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
func (fixture *systemAlertTestFixture) cleanup() {
|
||||
fixture.hub.Cleanup()
|
||||
}
|
||||
|
||||
func submitValue[T any](fixture *systemAlertTestFixture, t *testing.T, value T, setValue systemAlertValueSetter[T]) {
|
||||
t.Helper()
|
||||
require.NoError(t, fixture.submit(createCombinedData(value, setValue)))
|
||||
}
|
||||
|
||||
func (fixture *systemAlertTestFixture) assertTriggered(t *testing.T, triggered bool, message string) {
|
||||
t.Helper()
|
||||
|
||||
alertRecord, err := fixture.hub.FindRecordById("alerts", fixture.alertID)
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, triggered, alertRecord.GetBool("triggered"), message)
|
||||
}
|
||||
|
||||
func waitForSystemAlert(d time.Duration) {
|
||||
time.Sleep(d)
|
||||
synctest.Wait()
|
||||
}
|
||||
|
||||
func testOneMinuteSystemAlert[T any](t *testing.T, alertName string, threshold float64, setValue systemAlertValueSetter[T], triggerValue, resolveValue T) {
|
||||
t.Helper()
|
||||
|
||||
synctest.Test(t, func(t *testing.T) {
|
||||
fixture := newSystemAlertTestFixture(t, alertName, 1, threshold)
|
||||
defer fixture.cleanup()
|
||||
|
||||
submitValue(fixture, t, triggerValue, setValue)
|
||||
waitForSystemAlert(time.Second)
|
||||
|
||||
fixture.assertTriggered(t, true, "Alert should be triggered")
|
||||
assert.Equal(t, 1, fixture.hub.TestMailer.TotalSend(), "An email should have been sent")
|
||||
|
||||
submitValue(fixture, t, resolveValue, setValue)
|
||||
waitForSystemAlert(time.Second)
|
||||
|
||||
fixture.assertTriggered(t, false, "Alert should be untriggered")
|
||||
assert.Equal(t, 2, fixture.hub.TestMailer.TotalSend(), "A second email should have been sent for untriggering the alert")
|
||||
|
||||
waitForSystemAlert(time.Minute)
|
||||
})
|
||||
}
|
||||
|
||||
func testMultiMinuteSystemAlert[T any](t *testing.T, alertName string, threshold float64, min int, setValue systemAlertValueSetter[T], baselineValue, triggerValue, resolveValue T) {
|
||||
t.Helper()
|
||||
|
||||
synctest.Test(t, func(t *testing.T) {
|
||||
fixture := newSystemAlertTestFixture(t, alertName, min, threshold)
|
||||
defer fixture.cleanup()
|
||||
|
||||
submitValue(fixture, t, baselineValue, setValue)
|
||||
waitForSystemAlert(time.Minute + time.Second)
|
||||
fixture.assertTriggered(t, false, "Alert should not be triggered yet")
|
||||
|
||||
submitValue(fixture, t, triggerValue, setValue)
|
||||
waitForSystemAlert(time.Minute)
|
||||
fixture.assertTriggered(t, false, "Alert should not be triggered until the history window is full")
|
||||
|
||||
submitValue(fixture, t, triggerValue, setValue)
|
||||
waitForSystemAlert(time.Second)
|
||||
fixture.assertTriggered(t, true, "Alert should be triggered")
|
||||
assert.Equal(t, 1, fixture.hub.TestMailer.TotalSend(), "An email should have been sent")
|
||||
|
||||
submitValue(fixture, t, resolveValue, setValue)
|
||||
waitForSystemAlert(time.Second)
|
||||
fixture.assertTriggered(t, false, "Alert should be untriggered")
|
||||
assert.Equal(t, 2, fixture.hub.TestMailer.TotalSend(), "A second email should have been sent for untriggering the alert")
|
||||
})
|
||||
}
|
||||
|
||||
func setCPUAlertValue(info *system.Info, stats *system.Stats, value float64) {
|
||||
info.Cpu = value
|
||||
stats.Cpu = value
|
||||
}
|
||||
|
||||
func setMemoryAlertValue(info *system.Info, stats *system.Stats, value float64) {
|
||||
info.MemPct = value
|
||||
stats.MemPct = value
|
||||
}
|
||||
|
||||
func setDiskAlertValue(info *system.Info, stats *system.Stats, value float64) {
|
||||
info.DiskPct = value
|
||||
stats.DiskPct = value
|
||||
}
|
||||
|
||||
func setBandwidthAlertValue(info *system.Info, stats *system.Stats, value [2]uint64) {
|
||||
info.BandwidthBytes = value[0] + value[1]
|
||||
stats.Bandwidth = value
|
||||
}
|
||||
|
||||
func megabytesToBytes(mb uint64) uint64 {
|
||||
return mb * 1024 * 1024
|
||||
}
|
||||
|
||||
func setGPUAlertValue(info *system.Info, stats *system.Stats, value float64) {
|
||||
info.GpuPct = value
|
||||
stats.GPUData = map[string]system.GPUData{
|
||||
"GPU0": {Usage: value},
|
||||
}
|
||||
}
|
||||
|
||||
func setTemperatureAlertValue(info *system.Info, stats *system.Stats, value float64) {
|
||||
info.DashboardTemp = value
|
||||
stats.Temperatures = map[string]float64{
|
||||
"Temp0": value,
|
||||
}
|
||||
}
|
||||
|
||||
func setLoadAvgAlertValue(info *system.Info, stats *system.Stats, value [3]float64) {
|
||||
info.LoadAvg = value
|
||||
stats.LoadAvg = value
|
||||
}
|
||||
|
||||
func setBatteryAlertValue(info *system.Info, stats *system.Stats, value [2]uint8) {
|
||||
info.Battery = value
|
||||
stats.Battery = value
|
||||
}
|
||||
|
||||
func TestSystemAlertsOneMin(t *testing.T) {
|
||||
testOneMinuteSystemAlert(t, "CPU", 50, setCPUAlertValue, 51, 49)
|
||||
testOneMinuteSystemAlert(t, "Memory", 50, setMemoryAlertValue, 51, 49)
|
||||
testOneMinuteSystemAlert(t, "Disk", 50, setDiskAlertValue, 51, 49)
|
||||
testOneMinuteSystemAlert(t, "Bandwidth", 50, setBandwidthAlertValue, [2]uint64{megabytesToBytes(26), megabytesToBytes(25)}, [2]uint64{megabytesToBytes(25), megabytesToBytes(24)})
|
||||
testOneMinuteSystemAlert(t, "GPU", 50, setGPUAlertValue, 51, 49)
|
||||
testOneMinuteSystemAlert(t, "Temperature", 70, setTemperatureAlertValue, 71, 69)
|
||||
testOneMinuteSystemAlert(t, "LoadAvg1", 4, setLoadAvgAlertValue, [3]float64{4.1, 0, 0}, [3]float64{3.9, 0, 0})
|
||||
testOneMinuteSystemAlert(t, "LoadAvg5", 4, setLoadAvgAlertValue, [3]float64{0, 4.1, 0}, [3]float64{0, 3.9, 0})
|
||||
testOneMinuteSystemAlert(t, "LoadAvg15", 4, setLoadAvgAlertValue, [3]float64{0, 0, 4.1}, [3]float64{0, 0, 3.9})
|
||||
testOneMinuteSystemAlert(t, "Battery", 20, setBatteryAlertValue, [2]uint8{19, 0}, [2]uint8{21, 0})
|
||||
}
|
||||
|
||||
func TestSystemAlertsTwoMin(t *testing.T) {
|
||||
testMultiMinuteSystemAlert(t, "CPU", 50, 2, setCPUAlertValue, 10, 51, 48)
|
||||
testMultiMinuteSystemAlert(t, "Memory", 50, 2, setMemoryAlertValue, 10, 51, 48)
|
||||
testMultiMinuteSystemAlert(t, "Disk", 50, 2, setDiskAlertValue, 10, 51, 48)
|
||||
testMultiMinuteSystemAlert(t, "Bandwidth", 50, 2, setBandwidthAlertValue, [2]uint64{megabytesToBytes(10), megabytesToBytes(10)}, [2]uint64{megabytesToBytes(26), megabytesToBytes(25)}, [2]uint64{megabytesToBytes(10), megabytesToBytes(10)})
|
||||
testMultiMinuteSystemAlert(t, "GPU", 50, 2, setGPUAlertValue, 10, 51, 48)
|
||||
testMultiMinuteSystemAlert(t, "Temperature", 70, 2, setTemperatureAlertValue, 10, 71, 67)
|
||||
testMultiMinuteSystemAlert(t, "LoadAvg1", 4, 2, setLoadAvgAlertValue, [3]float64{0, 0, 0}, [3]float64{4.1, 0, 0}, [3]float64{3.5, 0, 0})
|
||||
testMultiMinuteSystemAlert(t, "LoadAvg5", 4, 2, setLoadAvgAlertValue, [3]float64{0, 2, 0}, [3]float64{0, 4.1, 0}, [3]float64{0, 3.5, 0})
|
||||
testMultiMinuteSystemAlert(t, "LoadAvg15", 4, 2, setLoadAvgAlertValue, [3]float64{0, 0, 2}, [3]float64{0, 0, 4.1}, [3]float64{0, 0, 3.5})
|
||||
testMultiMinuteSystemAlert(t, "Battery", 20, 2, setBatteryAlertValue, [2]uint8{21, 0}, [2]uint8{19, 0}, [2]uint8{25, 1})
|
||||
}
|
||||
@@ -12,9 +12,9 @@ import (
|
||||
"testing/synctest"
|
||||
"time"
|
||||
|
||||
"github.com/henrygd/beszel/internal/alerts"
|
||||
beszelTests "github.com/henrygd/beszel/internal/tests"
|
||||
|
||||
"github.com/henrygd/beszel/internal/alerts"
|
||||
"github.com/pocketbase/dbx"
|
||||
"github.com/pocketbase/pocketbase/core"
|
||||
pbTests "github.com/pocketbase/pocketbase/tests"
|
||||
@@ -369,87 +369,6 @@ func TestUserAlertsApi(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestStatusAlerts(t *testing.T) {
|
||||
synctest.Test(t, func(t *testing.T) {
|
||||
hub, user := beszelTests.GetHubWithUser(t)
|
||||
defer hub.Cleanup()
|
||||
|
||||
systems, err := beszelTests.CreateSystems(hub, 4, user.Id, "paused")
|
||||
assert.NoError(t, err)
|
||||
|
||||
var alerts []*core.Record
|
||||
for i, system := range systems {
|
||||
alert, err := beszelTests.CreateRecord(hub, "alerts", map[string]any{
|
||||
"name": "Status",
|
||||
"system": system.Id,
|
||||
"user": user.Id,
|
||||
"min": i + 1,
|
||||
})
|
||||
assert.NoError(t, err)
|
||||
alerts = append(alerts, alert)
|
||||
}
|
||||
|
||||
time.Sleep(10 * time.Millisecond)
|
||||
|
||||
for _, alert := range alerts {
|
||||
assert.False(t, alert.GetBool("triggered"), "Alert should not be triggered immediately")
|
||||
}
|
||||
if hub.TestMailer.TotalSend() != 0 {
|
||||
assert.Zero(t, hub.TestMailer.TotalSend(), "Expected 0 messages, got %d", hub.TestMailer.TotalSend())
|
||||
}
|
||||
for _, system := range systems {
|
||||
assert.EqualValues(t, "paused", system.GetString("status"), "System should be paused")
|
||||
}
|
||||
for _, system := range systems {
|
||||
system.Set("status", "up")
|
||||
err = hub.SaveNoValidate(system)
|
||||
assert.NoError(t, err)
|
||||
}
|
||||
time.Sleep(time.Second)
|
||||
assert.EqualValues(t, 0, hub.GetPendingAlertsCount(), "should have 0 alerts in the pendingAlerts map")
|
||||
for _, system := range systems {
|
||||
system.Set("status", "down")
|
||||
err = hub.SaveNoValidate(system)
|
||||
assert.NoError(t, err)
|
||||
}
|
||||
// after 30 seconds, should have 4 alerts in the pendingAlerts map, no triggered alerts
|
||||
time.Sleep(time.Second * 30)
|
||||
assert.EqualValues(t, 4, hub.GetPendingAlertsCount(), "should have 4 alerts in the pendingAlerts map")
|
||||
triggeredCount, err := hub.CountRecords("alerts", dbx.HashExp{"triggered": true})
|
||||
assert.NoError(t, err)
|
||||
assert.EqualValues(t, 0, triggeredCount, "should have 0 alert triggered")
|
||||
assert.EqualValues(t, 0, hub.TestMailer.TotalSend(), "should have 0 messages sent")
|
||||
// after 1:30 seconds, should have 1 triggered alert and 3 pending alerts
|
||||
time.Sleep(time.Second * 60)
|
||||
assert.EqualValues(t, 3, hub.GetPendingAlertsCount(), "should have 3 alerts in the pendingAlerts map")
|
||||
triggeredCount, err = hub.CountRecords("alerts", dbx.HashExp{"triggered": true})
|
||||
assert.NoError(t, err)
|
||||
assert.EqualValues(t, 1, triggeredCount, "should have 1 alert triggered")
|
||||
assert.EqualValues(t, 1, hub.TestMailer.TotalSend(), "should have 1 messages sent")
|
||||
// after 2:30 seconds, should have 2 triggered alerts and 2 pending alerts
|
||||
time.Sleep(time.Second * 60)
|
||||
assert.EqualValues(t, 2, hub.GetPendingAlertsCount(), "should have 2 alerts in the pendingAlerts map")
|
||||
triggeredCount, err = hub.CountRecords("alerts", dbx.HashExp{"triggered": true})
|
||||
assert.NoError(t, err)
|
||||
assert.EqualValues(t, 2, triggeredCount, "should have 2 alert triggered")
|
||||
assert.EqualValues(t, 2, hub.TestMailer.TotalSend(), "should have 2 messages sent")
|
||||
// now we will bring the remaning systems back up
|
||||
for _, system := range systems {
|
||||
system.Set("status", "up")
|
||||
err = hub.SaveNoValidate(system)
|
||||
assert.NoError(t, err)
|
||||
}
|
||||
time.Sleep(time.Second)
|
||||
// should have 0 alerts in the pendingAlerts map and 0 alerts triggered
|
||||
assert.EqualValues(t, 0, hub.GetPendingAlertsCount(), "should have 0 alerts in the pendingAlerts map")
|
||||
triggeredCount, err = hub.CountRecords("alerts", dbx.HashExp{"triggered": true})
|
||||
assert.NoError(t, err)
|
||||
assert.Zero(t, triggeredCount, "should have 0 alert triggered")
|
||||
// 4 messages sent, 2 down alerts and 2 up alerts for first 2 systems
|
||||
assert.EqualValues(t, 4, hub.TestMailer.TotalSend(), "should have 4 messages sent")
|
||||
})
|
||||
}
|
||||
|
||||
func TestAlertsHistory(t *testing.T) {
|
||||
synctest.Test(t, func(t *testing.T) {
|
||||
hub, user := beszelTests.GetHubWithUser(t)
|
||||
@@ -578,102 +497,46 @@ func TestAlertsHistory(t *testing.T) {
|
||||
assert.EqualValues(t, 2, totalHistoryCount, "Should have 2 total alert history records")
|
||||
})
|
||||
}
|
||||
func TestResolveStatusAlerts(t *testing.T) {
|
||||
hub, user := beszelTests.GetHubWithUser(t)
|
||||
|
||||
func TestSetAlertTriggered(t *testing.T) {
|
||||
hub, _ := beszelTests.NewTestHub(t.TempDir())
|
||||
defer hub.Cleanup()
|
||||
|
||||
// Create a systemUp
|
||||
systemUp, err := beszelTests.CreateRecord(hub, "systems", map[string]any{
|
||||
"name": "test-system",
|
||||
"users": []string{user.Id},
|
||||
"host": "127.0.0.1",
|
||||
"status": "up",
|
||||
hub.StartHub()
|
||||
|
||||
user, _ := beszelTests.CreateUser(hub, "test@example.com", "password")
|
||||
system, _ := beszelTests.CreateRecord(hub, "systems", map[string]any{
|
||||
"name": "test-system",
|
||||
"users": []string{user.Id},
|
||||
"host": "127.0.0.1",
|
||||
})
|
||||
assert.NoError(t, err)
|
||||
|
||||
systemDown, err := beszelTests.CreateRecord(hub, "systems", map[string]any{
|
||||
"name": "test-system-2",
|
||||
"users": []string{user.Id},
|
||||
"host": "127.0.0.2",
|
||||
"status": "up",
|
||||
alertRecord, _ := beszelTests.CreateRecord(hub, "alerts", map[string]any{
|
||||
"name": "CPU",
|
||||
"system": system.Id,
|
||||
"user": user.Id,
|
||||
"value": 80,
|
||||
"triggered": false,
|
||||
})
|
||||
|
||||
am := alerts.NewAlertManager(hub)
|
||||
|
||||
var alert alerts.CachedAlertData
|
||||
alert.PopulateFromRecord(alertRecord)
|
||||
|
||||
// Test triggering the alert
|
||||
err := am.SetAlertTriggered(alert, true)
|
||||
assert.NoError(t, err)
|
||||
|
||||
// Create a status alertUp for the system
|
||||
alertUp, err := beszelTests.CreateRecord(hub, "alerts", map[string]any{
|
||||
"name": "Status",
|
||||
"system": systemUp.Id,
|
||||
"user": user.Id,
|
||||
"min": 1,
|
||||
})
|
||||
updatedRecord, err := hub.FindRecordById("alerts", alert.Id)
|
||||
assert.NoError(t, err)
|
||||
assert.True(t, updatedRecord.GetBool("triggered"))
|
||||
|
||||
// Test un-triggering the alert
|
||||
err = am.SetAlertTriggered(alert, false)
|
||||
assert.NoError(t, err)
|
||||
|
||||
alertDown, err := beszelTests.CreateRecord(hub, "alerts", map[string]any{
|
||||
"name": "Status",
|
||||
"system": systemDown.Id,
|
||||
"user": user.Id,
|
||||
"min": 1,
|
||||
})
|
||||
updatedRecord, err = hub.FindRecordById("alerts", alert.Id)
|
||||
assert.NoError(t, err)
|
||||
|
||||
// Verify alert is not triggered initially
|
||||
assert.False(t, alertUp.GetBool("triggered"), "Alert should not be triggered initially")
|
||||
|
||||
// Set the system to 'up' (this should not trigger the alert)
|
||||
systemUp.Set("status", "up")
|
||||
err = hub.SaveNoValidate(systemUp)
|
||||
assert.NoError(t, err)
|
||||
|
||||
systemDown.Set("status", "down")
|
||||
err = hub.SaveNoValidate(systemDown)
|
||||
assert.NoError(t, err)
|
||||
|
||||
// Wait a moment for any processing
|
||||
time.Sleep(10 * time.Millisecond)
|
||||
|
||||
// Verify alertUp is still not triggered after setting system to up
|
||||
alertUp, err = hub.FindFirstRecordByFilter("alerts", "id={:id}", dbx.Params{"id": alertUp.Id})
|
||||
assert.NoError(t, err)
|
||||
assert.False(t, alertUp.GetBool("triggered"), "Alert should not be triggered when system is up")
|
||||
|
||||
// Manually set both alerts triggered to true
|
||||
alertUp.Set("triggered", true)
|
||||
err = hub.SaveNoValidate(alertUp)
|
||||
assert.NoError(t, err)
|
||||
alertDown.Set("triggered", true)
|
||||
err = hub.SaveNoValidate(alertDown)
|
||||
assert.NoError(t, err)
|
||||
|
||||
// Verify we have exactly one alert with triggered true
|
||||
triggeredCount, err := hub.CountRecords("alerts", dbx.HashExp{"triggered": true})
|
||||
assert.NoError(t, err)
|
||||
assert.EqualValues(t, 2, triggeredCount, "Should have exactly two alerts with triggered true")
|
||||
|
||||
// Verify the specific alertUp is triggered
|
||||
alertUp, err = hub.FindFirstRecordByFilter("alerts", "id={:id}", dbx.Params{"id": alertUp.Id})
|
||||
assert.NoError(t, err)
|
||||
assert.True(t, alertUp.GetBool("triggered"), "Alert should be triggered")
|
||||
|
||||
// Verify we have two unresolved alert history records
|
||||
alertHistoryCount, err := hub.CountRecords("alerts_history", dbx.HashExp{"resolved": ""})
|
||||
assert.NoError(t, err)
|
||||
assert.EqualValues(t, 2, alertHistoryCount, "Should have exactly two unresolved alert history records")
|
||||
|
||||
err = alerts.ResolveStatusAlerts(hub)
|
||||
assert.NoError(t, err)
|
||||
|
||||
// Verify alertUp is not triggered after resolving
|
||||
alertUp, err = hub.FindFirstRecordByFilter("alerts", "id={:id}", dbx.Params{"id": alertUp.Id})
|
||||
assert.NoError(t, err)
|
||||
assert.False(t, alertUp.GetBool("triggered"), "Alert should not be triggered after resolving")
|
||||
// Verify alertDown is still triggered
|
||||
alertDown, err = hub.FindFirstRecordByFilter("alerts", "id={:id}", dbx.Params{"id": alertDown.Id})
|
||||
assert.NoError(t, err)
|
||||
assert.True(t, alertDown.GetBool("triggered"), "Alert should still be triggered after resolving")
|
||||
|
||||
// Verify we have one unresolved alert history record
|
||||
alertHistoryCount, err = hub.CountRecords("alerts_history", dbx.HashExp{"resolved": ""})
|
||||
assert.NoError(t, err)
|
||||
assert.EqualValues(t, 1, alertHistoryCount, "Should have exactly one unresolved alert history record")
|
||||
|
||||
assert.False(t, updatedRecord.GetBool("triggered"))
|
||||
}
|
||||
|
||||
@@ -9,6 +9,18 @@ import (
|
||||
"github.com/pocketbase/pocketbase/core"
|
||||
)
|
||||
|
||||
func NewTestAlertManagerWithoutWorker(app hubLike) *AlertManager {
|
||||
return &AlertManager{
|
||||
hub: app,
|
||||
alertsCache: NewAlertsCache(app),
|
||||
}
|
||||
}
|
||||
|
||||
// GetSystemAlertsCache returns the internal system alerts cache.
|
||||
func (am *AlertManager) GetSystemAlertsCache() *AlertsCache {
|
||||
return am.alertsCache
|
||||
}
|
||||
|
||||
func (am *AlertManager) GetAlertManager() *AlertManager {
|
||||
return am
|
||||
}
|
||||
@@ -27,19 +39,18 @@ func (am *AlertManager) GetPendingAlertsCount() int {
|
||||
}
|
||||
|
||||
// ProcessPendingAlerts manually processes all expired alerts (for testing)
|
||||
func (am *AlertManager) ProcessPendingAlerts() ([]*core.Record, error) {
|
||||
func (am *AlertManager) ProcessPendingAlerts() ([]CachedAlertData, error) {
|
||||
now := time.Now()
|
||||
var lastErr error
|
||||
var processedAlerts []*core.Record
|
||||
var processedAlerts []CachedAlertData
|
||||
am.pendingAlerts.Range(func(key, value any) bool {
|
||||
info := value.(*alertInfo)
|
||||
if now.After(info.expireTime) {
|
||||
// Downtime delay has passed, process alert
|
||||
if err := am.sendStatusAlert("down", info.systemName, info.alertRecord); err != nil {
|
||||
lastErr = err
|
||||
if info.timer != nil {
|
||||
info.timer.Stop()
|
||||
}
|
||||
processedAlerts = append(processedAlerts, info.alertRecord)
|
||||
am.pendingAlerts.Delete(key)
|
||||
am.processPendingAlert(key.(string))
|
||||
processedAlerts = append(processedAlerts, info.alertData)
|
||||
}
|
||||
return true
|
||||
})
|
||||
@@ -56,6 +67,31 @@ func (am *AlertManager) ForceExpirePendingAlerts() {
|
||||
})
|
||||
}
|
||||
|
||||
func (am *AlertManager) ResetPendingAlertTimer(alertID string, delay time.Duration) bool {
|
||||
value, loaded := am.pendingAlerts.Load(alertID)
|
||||
if !loaded {
|
||||
return false
|
||||
}
|
||||
|
||||
info := value.(*alertInfo)
|
||||
if info.timer != nil {
|
||||
info.timer.Stop()
|
||||
}
|
||||
info.expireTime = time.Now().Add(delay)
|
||||
info.timer = time.AfterFunc(delay, func() {
|
||||
am.processPendingAlert(alertID)
|
||||
})
|
||||
return true
|
||||
}
|
||||
|
||||
func ResolveStatusAlerts(app core.App) error {
|
||||
return resolveStatusAlerts(app)
|
||||
}
|
||||
|
||||
func (am *AlertManager) RestorePendingStatusAlerts() error {
|
||||
return am.restorePendingStatusAlerts()
|
||||
}
|
||||
|
||||
func (am *AlertManager) SetAlertTriggered(alert CachedAlertData, triggered bool) error {
|
||||
return am.setAlertTriggered(alert, triggered)
|
||||
}
|
||||
|
||||
@@ -9,6 +9,7 @@ import (
|
||||
"github.com/henrygd/beszel"
|
||||
"github.com/henrygd/beszel/agent"
|
||||
"github.com/henrygd/beszel/agent/health"
|
||||
"github.com/henrygd/beszel/agent/utils"
|
||||
"github.com/spf13/pflag"
|
||||
"golang.org/x/crypto/ssh"
|
||||
)
|
||||
@@ -116,12 +117,12 @@ func (opts *cmdOptions) loadPublicKeys() ([]ssh.PublicKey, error) {
|
||||
}
|
||||
|
||||
// Try environment variable
|
||||
if key, ok := agent.GetEnv("KEY"); ok && key != "" {
|
||||
if key, ok := utils.GetEnv("KEY"); ok && key != "" {
|
||||
return agent.ParseKeys(key)
|
||||
}
|
||||
|
||||
// Try key file
|
||||
keyFile, ok := agent.GetEnv("KEY_FILE")
|
||||
keyFile, ok := utils.GetEnv("KEY_FILE")
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("no key provided: must set -key flag, KEY env var, or KEY_FILE env var. Use 'beszel-agent help' for usage")
|
||||
}
|
||||
|
||||
@@ -10,10 +10,19 @@ type ApiInfo struct {
|
||||
Status string
|
||||
State string
|
||||
Image string
|
||||
Health struct {
|
||||
Status string
|
||||
// FailingStreak int
|
||||
}
|
||||
Ports []struct {
|
||||
// PrivatePort uint16
|
||||
PublicPort uint16
|
||||
IP string
|
||||
// Type string
|
||||
}
|
||||
// ImageID string
|
||||
// Command string
|
||||
// Created int64
|
||||
// Ports []Port
|
||||
// SizeRw int64 `json:",omitempty"`
|
||||
// SizeRootFs int64 `json:",omitempty"`
|
||||
// Labels map[string]string
|
||||
@@ -140,6 +149,7 @@ type Stats struct {
|
||||
Status string `json:"-" cbor:"6,keyasint"`
|
||||
Id string `json:"-" cbor:"7,keyasint"`
|
||||
Image string `json:"-" cbor:"8,keyasint"`
|
||||
Ports string `json:"-" cbor:"10,keyasint"`
|
||||
// PrevCpu [2]uint64 `json:"-"`
|
||||
CpuSystem uint64 `json:"-"`
|
||||
CpuContainer uint64 `json:"-"`
|
||||
|
||||
@@ -143,8 +143,8 @@ type AtaDeviceStatisticsPage struct {
|
||||
}
|
||||
|
||||
type AtaDeviceStatisticsEntry struct {
|
||||
Name string `json:"name"`
|
||||
Value *uint64 `json:"value,omitempty"`
|
||||
Name string `json:"name"`
|
||||
Value *int64 `json:"value,omitempty"`
|
||||
}
|
||||
|
||||
type AtaSmartAttribute struct {
|
||||
@@ -356,8 +356,8 @@ type SmartInfoForSata struct {
|
||||
SmartStatus SmartStatusInfo `json:"smart_status"`
|
||||
// AtaSmartData AtaSmartData `json:"ata_smart_data"`
|
||||
// AtaSctCapabilities AtaSctCapabilities `json:"ata_sct_capabilities"`
|
||||
AtaSmartAttributes AtaSmartAttributes `json:"ata_smart_attributes"`
|
||||
AtaDeviceStatistics AtaDeviceStatistics `json:"ata_device_statistics"`
|
||||
AtaSmartAttributes AtaSmartAttributes `json:"ata_smart_attributes"`
|
||||
AtaDeviceStatistics json.RawMessage `json:"ata_device_statistics"`
|
||||
// PowerOnTime PowerOnTimeInfo `json:"power_on_time"`
|
||||
// PowerCycleCount uint16 `json:"power_cycle_count"`
|
||||
Temperature TemperatureInfo `json:"temperature"`
|
||||
|
||||
@@ -12,8 +12,9 @@ import (
|
||||
|
||||
type Stats struct {
|
||||
Cpu float64 `json:"cpu" cbor:"0,keyasint"`
|
||||
MaxCpu float64 `json:"cpum,omitempty" cbor:"1,keyasint,omitempty"`
|
||||
MaxCpu float64 `json:"cpum,omitempty" cbor:"-"`
|
||||
Mem float64 `json:"m" cbor:"2,keyasint"`
|
||||
MaxMem float64 `json:"mm,omitempty" cbor:"-"`
|
||||
MemUsed float64 `json:"mu" cbor:"3,keyasint"`
|
||||
MemPct float64 `json:"mp" cbor:"4,keyasint"`
|
||||
MemBuffCache float64 `json:"mb" cbor:"5,keyasint"`
|
||||
@@ -23,26 +24,25 @@ type Stats struct {
|
||||
DiskTotal float64 `json:"d" cbor:"9,keyasint"`
|
||||
DiskUsed float64 `json:"du" cbor:"10,keyasint"`
|
||||
DiskPct float64 `json:"dp" cbor:"11,keyasint"`
|
||||
DiskReadPs float64 `json:"dr" cbor:"12,keyasint"`
|
||||
DiskWritePs float64 `json:"dw" cbor:"13,keyasint"`
|
||||
MaxDiskReadPs float64 `json:"drm,omitempty" cbor:"14,keyasint,omitempty"`
|
||||
MaxDiskWritePs float64 `json:"dwm,omitempty" cbor:"15,keyasint,omitempty"`
|
||||
DiskReadPs float64 `json:"dr,omitzero" cbor:"12,keyasint,omitzero"`
|
||||
DiskWritePs float64 `json:"dw,omitzero" cbor:"13,keyasint,omitzero"`
|
||||
MaxDiskReadPs float64 `json:"drm,omitempty" cbor:"-"`
|
||||
MaxDiskWritePs float64 `json:"dwm,omitempty" cbor:"-"`
|
||||
NetworkSent float64 `json:"ns,omitzero" cbor:"16,keyasint,omitzero"`
|
||||
NetworkRecv float64 `json:"nr,omitzero" cbor:"17,keyasint,omitzero"`
|
||||
MaxNetworkSent float64 `json:"nsm,omitempty" cbor:"18,keyasint,omitempty"`
|
||||
MaxNetworkRecv float64 `json:"nrm,omitempty" cbor:"19,keyasint,omitempty"`
|
||||
MaxNetworkSent float64 `json:"nsm,omitempty" cbor:"-"`
|
||||
MaxNetworkRecv float64 `json:"nrm,omitempty" cbor:"-"`
|
||||
Temperatures map[string]float64 `json:"t,omitempty" cbor:"20,keyasint,omitempty"`
|
||||
ExtraFs map[string]*FsStats `json:"efs,omitempty" cbor:"21,keyasint,omitempty"`
|
||||
GPUData map[string]GPUData `json:"g,omitempty" cbor:"22,keyasint,omitempty"`
|
||||
LoadAvg1 float64 `json:"l1,omitempty" cbor:"23,keyasint,omitempty"`
|
||||
LoadAvg5 float64 `json:"l5,omitempty" cbor:"24,keyasint,omitempty"`
|
||||
LoadAvg15 float64 `json:"l15,omitempty" cbor:"25,keyasint,omitempty"`
|
||||
Bandwidth [2]uint64 `json:"b,omitzero" cbor:"26,keyasint,omitzero"` // [sent bytes, recv bytes]
|
||||
MaxBandwidth [2]uint64 `json:"bm,omitzero" cbor:"27,keyasint,omitzero"` // [sent bytes, recv bytes]
|
||||
// LoadAvg1 float64 `json:"l1,omitempty" cbor:"23,keyasint,omitempty"`
|
||||
// LoadAvg5 float64 `json:"l5,omitempty" cbor:"24,keyasint,omitempty"`
|
||||
// LoadAvg15 float64 `json:"l15,omitempty" cbor:"25,keyasint,omitempty"`
|
||||
Bandwidth [2]uint64 `json:"b,omitzero" cbor:"26,keyasint,omitzero"` // [sent bytes, recv bytes]
|
||||
MaxBandwidth [2]uint64 `json:"bm,omitzero" cbor:"-"` // [sent bytes, recv bytes]
|
||||
// TODO: remove other load fields in future release in favor of load avg array
|
||||
LoadAvg [3]float64 `json:"la,omitempty" cbor:"28,keyasint"`
|
||||
Battery [2]uint8 `json:"bat,omitzero" cbor:"29,keyasint,omitzero"` // [percent, charge state, current]
|
||||
MaxMem float64 `json:"mm,omitempty" cbor:"30,keyasint,omitempty"`
|
||||
Battery [2]uint8 `json:"bat,omitzero" cbor:"29,keyasint,omitzero"` // [percent, charge state, current]
|
||||
NetworkInterfaces map[string][4]uint64 `json:"ni,omitempty" cbor:"31,keyasint,omitempty"` // [upload bytes, download bytes, total upload, total download]
|
||||
DiskIO [2]uint64 `json:"dio,omitzero" cbor:"32,keyasint,omitzero"` // [read bytes, write bytes]
|
||||
MaxDiskIO [2]uint64 `json:"diom,omitzero" cbor:"-"` // [max read bytes, max write bytes]
|
||||
@@ -90,8 +90,8 @@ type FsStats struct {
|
||||
TotalWrite uint64 `json:"-"`
|
||||
DiskReadPs float64 `json:"r" cbor:"2,keyasint"`
|
||||
DiskWritePs float64 `json:"w" cbor:"3,keyasint"`
|
||||
MaxDiskReadPS float64 `json:"rm,omitempty" cbor:"4,keyasint,omitempty"`
|
||||
MaxDiskWritePS float64 `json:"wm,omitempty" cbor:"5,keyasint,omitempty"`
|
||||
MaxDiskReadPS float64 `json:"rm,omitempty" cbor:"-"`
|
||||
MaxDiskWritePS float64 `json:"wm,omitempty" cbor:"-"`
|
||||
// TODO: remove DiskReadPs and DiskWritePs in future release in favor of DiskReadBytes and DiskWriteBytes
|
||||
DiskReadBytes uint64 `json:"rb" cbor:"6,keyasint,omitempty"`
|
||||
DiskWriteBytes uint64 `json:"wb" cbor:"7,keyasint,omitempty"`
|
||||
@@ -129,23 +129,23 @@ type Info struct {
|
||||
KernelVersion string `json:"k,omitempty" cbor:"1,keyasint,omitempty"` // deprecated - moved to Details struct
|
||||
Cores int `json:"c,omitzero" cbor:"2,keyasint,omitzero"` // deprecated - moved to Details struct
|
||||
// Threads is needed in Info struct to calculate load average thresholds
|
||||
Threads int `json:"t,omitempty" cbor:"3,keyasint,omitempty"`
|
||||
CpuModel string `json:"m,omitempty" cbor:"4,keyasint,omitempty"` // deprecated - moved to Details struct
|
||||
Uptime uint64 `json:"u" cbor:"5,keyasint"`
|
||||
Cpu float64 `json:"cpu" cbor:"6,keyasint"`
|
||||
MemPct float64 `json:"mp" cbor:"7,keyasint"`
|
||||
DiskPct float64 `json:"dp" cbor:"8,keyasint"`
|
||||
Bandwidth float64 `json:"b" cbor:"9,keyasint"`
|
||||
AgentVersion string `json:"v" cbor:"10,keyasint"`
|
||||
Podman bool `json:"p,omitempty" cbor:"11,keyasint,omitempty"` // deprecated - moved to Details struct
|
||||
GpuPct float64 `json:"g,omitempty" cbor:"12,keyasint,omitempty"`
|
||||
DashboardTemp float64 `json:"dt,omitempty" cbor:"13,keyasint,omitempty"`
|
||||
Os Os `json:"os,omitempty" cbor:"14,keyasint,omitempty"` // deprecated - moved to Details struct
|
||||
LoadAvg1 float64 `json:"l1,omitempty" cbor:"15,keyasint,omitempty"` // deprecated - use `la` array instead
|
||||
LoadAvg5 float64 `json:"l5,omitempty" cbor:"16,keyasint,omitempty"` // deprecated - use `la` array instead
|
||||
LoadAvg15 float64 `json:"l15,omitempty" cbor:"17,keyasint,omitempty"` // deprecated - use `la` array instead
|
||||
BandwidthBytes uint64 `json:"bb" cbor:"18,keyasint"`
|
||||
Threads int `json:"t,omitempty" cbor:"3,keyasint,omitempty"`
|
||||
CpuModel string `json:"m,omitempty" cbor:"4,keyasint,omitempty"` // deprecated - moved to Details struct
|
||||
Uptime uint64 `json:"u" cbor:"5,keyasint"`
|
||||
Cpu float64 `json:"cpu" cbor:"6,keyasint"`
|
||||
MemPct float64 `json:"mp" cbor:"7,keyasint"`
|
||||
DiskPct float64 `json:"dp" cbor:"8,keyasint"`
|
||||
Bandwidth float64 `json:"b,omitzero" cbor:"9,keyasint"` // deprecated in favor of BandwidthBytes
|
||||
AgentVersion string `json:"v" cbor:"10,keyasint"`
|
||||
Podman bool `json:"p,omitempty" cbor:"11,keyasint,omitempty"` // deprecated - moved to Details struct
|
||||
GpuPct float64 `json:"g,omitempty" cbor:"12,keyasint,omitempty"`
|
||||
DashboardTemp float64 `json:"dt,omitempty" cbor:"13,keyasint,omitempty"`
|
||||
Os Os `json:"os,omitempty" cbor:"14,keyasint,omitempty"` // deprecated - moved to Details struct
|
||||
// LoadAvg1 float64 `json:"l1,omitempty" cbor:"15,keyasint,omitempty"` // deprecated - use `la` array instead
|
||||
// LoadAvg5 float64 `json:"l5,omitempty" cbor:"16,keyasint,omitempty"` // deprecated - use `la` array instead
|
||||
// LoadAvg15 float64 `json:"l15,omitempty" cbor:"17,keyasint,omitempty"` // deprecated - use `la` array instead
|
||||
|
||||
BandwidthBytes uint64 `json:"bb" cbor:"18,keyasint"`
|
||||
LoadAvg [3]float64 `json:"la,omitempty" cbor:"19,keyasint"`
|
||||
ConnectionType ConnectionType `json:"ct,omitempty" cbor:"20,keyasint,omitempty,omitzero"`
|
||||
ExtraFsPct map[string]float64 `json:"efs,omitempty" cbor:"21,keyasint,omitempty"`
|
||||
|
||||
@@ -917,7 +917,7 @@ func TestAgentWebSocketIntegration(t *testing.T) {
|
||||
|
||||
// Wait for connection result
|
||||
maxWait := 2 * time.Second
|
||||
time.Sleep(20 * time.Millisecond)
|
||||
time.Sleep(40 * time.Millisecond)
|
||||
checkInterval := 20 * time.Millisecond
|
||||
timeout := time.After(maxWait)
|
||||
ticker := time.Tick(checkInterval)
|
||||
|
||||
@@ -1,35 +1,39 @@
|
||||
// Package expirymap provides a thread-safe map with expiring entries.
|
||||
// It supports TTL-based expiration with both lazy cleanup on access
|
||||
// and periodic background cleanup.
|
||||
package expirymap
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/pocketbase/pocketbase/tools/store"
|
||||
)
|
||||
|
||||
type val[T any] struct {
|
||||
type val[T comparable] struct {
|
||||
value T
|
||||
expires time.Time
|
||||
}
|
||||
|
||||
type ExpiryMap[T any] struct {
|
||||
store *store.Store[string, *val[T]]
|
||||
cleanupInterval time.Duration
|
||||
type ExpiryMap[T comparable] struct {
|
||||
store *store.Store[string, val[T]]
|
||||
stopChan chan struct{}
|
||||
stopOnce sync.Once
|
||||
}
|
||||
|
||||
// New creates a new expiry map with custom cleanup interval
|
||||
func New[T any](cleanupInterval time.Duration) *ExpiryMap[T] {
|
||||
func New[T comparable](cleanupInterval time.Duration) *ExpiryMap[T] {
|
||||
m := &ExpiryMap[T]{
|
||||
store: store.New(map[string]*val[T]{}),
|
||||
cleanupInterval: cleanupInterval,
|
||||
store: store.New(map[string]val[T]{}),
|
||||
stopChan: make(chan struct{}),
|
||||
}
|
||||
m.startCleaner()
|
||||
go m.startCleaner(cleanupInterval)
|
||||
return m
|
||||
}
|
||||
|
||||
// Set stores a value with the given TTL
|
||||
func (m *ExpiryMap[T]) Set(key string, value T, ttl time.Duration) {
|
||||
m.store.Set(key, &val[T]{
|
||||
m.store.Set(key, val[T]{
|
||||
value: value,
|
||||
expires: time.Now().Add(ttl),
|
||||
})
|
||||
@@ -55,7 +59,7 @@ func (m *ExpiryMap[T]) GetOk(key string) (T, bool) {
|
||||
// GetByValue retrieves a value by value
|
||||
func (m *ExpiryMap[T]) GetByValue(val T) (key string, value T, ok bool) {
|
||||
for key, v := range m.store.GetAll() {
|
||||
if reflect.DeepEqual(v.value, val) {
|
||||
if v.value == val {
|
||||
// check if expired
|
||||
if v.expires.Before(time.Now()) {
|
||||
m.store.Remove(key)
|
||||
@@ -75,7 +79,7 @@ func (m *ExpiryMap[T]) Remove(key string) {
|
||||
// RemovebyValue removes a value by value
|
||||
func (m *ExpiryMap[T]) RemovebyValue(value T) (T, bool) {
|
||||
for key, val := range m.store.GetAll() {
|
||||
if reflect.DeepEqual(val.value, value) {
|
||||
if val.value == value {
|
||||
m.store.Remove(key)
|
||||
return val.value, true
|
||||
}
|
||||
@@ -84,13 +88,23 @@ func (m *ExpiryMap[T]) RemovebyValue(value T) (T, bool) {
|
||||
}
|
||||
|
||||
// startCleaner runs the background cleanup process
|
||||
func (m *ExpiryMap[T]) startCleaner() {
|
||||
go func() {
|
||||
tick := time.Tick(m.cleanupInterval)
|
||||
for range tick {
|
||||
func (m *ExpiryMap[T]) startCleaner(interval time.Duration) {
|
||||
tick := time.Tick(interval)
|
||||
for {
|
||||
select {
|
||||
case <-tick:
|
||||
m.cleanup()
|
||||
case <-m.stopChan:
|
||||
return
|
||||
}
|
||||
}()
|
||||
}
|
||||
}
|
||||
|
||||
// StopCleaner stops the background cleanup process
|
||||
func (m *ExpiryMap[T]) StopCleaner() {
|
||||
m.stopOnce.Do(func() {
|
||||
close(m.stopChan)
|
||||
})
|
||||
}
|
||||
|
||||
// cleanup removes all expired entries
|
||||
@@ -102,3 +116,12 @@ func (m *ExpiryMap[T]) cleanup() {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// UpdateExpiration updates the expiration time of a key
|
||||
func (m *ExpiryMap[T]) UpdateExpiration(key string, ttl time.Duration) {
|
||||
value, ok := m.store.GetOk(key)
|
||||
if ok {
|
||||
value.expires = time.Now().Add(ttl)
|
||||
m.store.Set(key, value)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -4,6 +4,7 @@ package expirymap
|
||||
|
||||
import (
|
||||
"testing"
|
||||
"testing/synctest"
|
||||
"time"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
@@ -177,6 +178,33 @@ func TestExpiryMap_GenericTypes(t *testing.T) {
|
||||
})
|
||||
}
|
||||
|
||||
func TestExpiryMap_UpdateExpiration(t *testing.T) {
|
||||
em := New[string](time.Hour)
|
||||
|
||||
// Set a value with short TTL
|
||||
em.Set("key1", "value1", time.Millisecond*50)
|
||||
|
||||
// Verify it exists
|
||||
assert.True(t, em.Has("key1"))
|
||||
|
||||
// Update expiration to a longer TTL
|
||||
em.UpdateExpiration("key1", time.Hour)
|
||||
|
||||
// Wait for the original TTL to pass
|
||||
time.Sleep(time.Millisecond * 100)
|
||||
|
||||
// Should still exist because expiration was updated
|
||||
assert.True(t, em.Has("key1"))
|
||||
value, ok := em.GetOk("key1")
|
||||
assert.True(t, ok)
|
||||
assert.Equal(t, "value1", value)
|
||||
|
||||
// Try updating non-existent key (should not panic)
|
||||
assert.NotPanics(t, func() {
|
||||
em.UpdateExpiration("nonexistent", time.Hour)
|
||||
})
|
||||
}
|
||||
|
||||
func TestExpiryMap_ZeroValues(t *testing.T) {
|
||||
em := New[string](time.Hour)
|
||||
|
||||
@@ -473,3 +501,52 @@ func TestExpiryMap_ValueOperations_Integration(t *testing.T) {
|
||||
assert.Equal(t, "unique", value)
|
||||
assert.Equal(t, "key2", key)
|
||||
}
|
||||
|
||||
func TestExpiryMap_Cleaner(t *testing.T) {
|
||||
synctest.Test(t, func(t *testing.T) {
|
||||
em := New[string](time.Second)
|
||||
defer em.StopCleaner()
|
||||
|
||||
em.Set("test", "value", 500*time.Millisecond)
|
||||
|
||||
// Wait 600ms, value is expired but cleaner hasn't run yet (interval is 1s)
|
||||
time.Sleep(600 * time.Millisecond)
|
||||
synctest.Wait()
|
||||
|
||||
// Map should still hold the value in its internal store before lazy access or cleaner
|
||||
assert.Equal(t, 1, len(em.store.GetAll()), "store should still have 1 item before cleaner runs")
|
||||
|
||||
// Wait another 500ms so cleaner (1s interval) runs
|
||||
time.Sleep(500 * time.Millisecond)
|
||||
synctest.Wait() // Wait for background goroutine to process the tick
|
||||
|
||||
assert.Equal(t, 0, len(em.store.GetAll()), "store should be empty after cleaner runs")
|
||||
})
|
||||
}
|
||||
|
||||
func TestExpiryMap_StopCleaner(t *testing.T) {
|
||||
em := New[string](time.Hour)
|
||||
|
||||
// Initially, stopChan is open, reading would block
|
||||
select {
|
||||
case <-em.stopChan:
|
||||
t.Fatal("stopChan should be open initially")
|
||||
default:
|
||||
// success
|
||||
}
|
||||
|
||||
em.StopCleaner()
|
||||
|
||||
// After StopCleaner, stopChan is closed, reading returns immediately
|
||||
select {
|
||||
case <-em.stopChan:
|
||||
// success
|
||||
default:
|
||||
t.Fatal("stopChan was not closed by StopCleaner")
|
||||
}
|
||||
|
||||
// Calling StopCleaner again should NOT panic thanks to sync.Once
|
||||
assert.NotPanics(t, func() {
|
||||
em.StopCleaner()
|
||||
})
|
||||
}
|
||||
|
||||
@@ -48,7 +48,6 @@ type System struct {
|
||||
detailsFetched atomic.Bool // True if static system details have been fetched and saved
|
||||
smartFetching atomic.Bool // True if SMART devices are currently being fetched
|
||||
smartInterval time.Duration // Interval for periodic SMART data updates
|
||||
lastSmartFetch atomic.Int64 // Unix milliseconds of last SMART data fetch
|
||||
}
|
||||
|
||||
func (sm *SystemManager) NewSystem(systemId string) *System {
|
||||
@@ -134,19 +133,34 @@ func (sys *System) update() error {
|
||||
return err
|
||||
}
|
||||
|
||||
// ensure deprecated fields from older agents are migrated to current fields
|
||||
migrateDeprecatedFields(data, !sys.detailsFetched.Load())
|
||||
|
||||
// create system records
|
||||
_, err = sys.createRecords(data)
|
||||
|
||||
// if details were included and fetched successfully, mark details as fetched and update smart interval if set by agent
|
||||
if err == nil && data.Details != nil {
|
||||
sys.detailsFetched.Store(true)
|
||||
// update smart interval if it's set on the agent side
|
||||
if data.Details.SmartInterval > 0 {
|
||||
sys.smartInterval = data.Details.SmartInterval
|
||||
// make sure we reset expiration of lastFetch to remain as long as the new smart interval
|
||||
// to prevent premature expiration leading to new fetch if interval is different.
|
||||
sys.manager.smartFetchMap.UpdateExpiration(sys.Id, sys.smartInterval+time.Minute)
|
||||
}
|
||||
}
|
||||
|
||||
// Fetch and save SMART devices when system first comes online or at intervals
|
||||
if backgroundSmartFetchEnabled() {
|
||||
if backgroundSmartFetchEnabled() && sys.detailsFetched.Load() {
|
||||
if sys.smartInterval <= 0 {
|
||||
sys.smartInterval = time.Hour
|
||||
}
|
||||
lastFetch := sys.lastSmartFetch.Load()
|
||||
if time.Since(time.UnixMilli(lastFetch)) >= sys.smartInterval && sys.smartFetching.CompareAndSwap(false, true) {
|
||||
lastFetch, _ := sys.manager.smartFetchMap.GetOk(sys.Id)
|
||||
if time.Since(time.UnixMilli(lastFetch-1e4)) >= sys.smartInterval && sys.smartFetching.CompareAndSwap(false, true) {
|
||||
go func() {
|
||||
defer sys.smartFetching.Store(false)
|
||||
sys.lastSmartFetch.Store(time.Now().UnixMilli())
|
||||
sys.manager.smartFetchMap.Set(sys.Id, time.Now().UnixMilli(), sys.smartInterval+time.Minute)
|
||||
_ = sys.FetchAndSaveSmartDevices()
|
||||
}()
|
||||
}
|
||||
@@ -221,11 +235,6 @@ func (sys *System) createRecords(data *system.CombinedData) (*core.Record, error
|
||||
if err := createSystemDetailsRecord(txApp, data.Details, sys.Id); err != nil {
|
||||
return err
|
||||
}
|
||||
sys.detailsFetched.Store(true)
|
||||
// update smart interval if it's set on the agent side
|
||||
if data.Details.SmartInterval > 0 {
|
||||
sys.smartInterval = data.Details.SmartInterval
|
||||
}
|
||||
}
|
||||
|
||||
// update system record (do this last because it triggers alerts and we need above records to be inserted first)
|
||||
@@ -309,10 +318,11 @@ func createContainerRecords(app core.App, data []*container.Stats, systemId stri
|
||||
valueStrings := make([]string, 0, len(data))
|
||||
for i, container := range data {
|
||||
suffix := fmt.Sprintf("%d", i)
|
||||
valueStrings = append(valueStrings, fmt.Sprintf("({:id%[1]s}, {:system}, {:name%[1]s}, {:image%[1]s}, {:status%[1]s}, {:health%[1]s}, {:cpu%[1]s}, {:memory%[1]s}, {:net%[1]s}, {:updated})", suffix))
|
||||
valueStrings = append(valueStrings, fmt.Sprintf("({:id%[1]s}, {:system}, {:name%[1]s}, {:image%[1]s}, {:ports%[1]s}, {:status%[1]s}, {:health%[1]s}, {:cpu%[1]s}, {:memory%[1]s}, {:net%[1]s}, {:updated})", suffix))
|
||||
params["id"+suffix] = container.Id
|
||||
params["name"+suffix] = container.Name
|
||||
params["image"+suffix] = container.Image
|
||||
params["ports"+suffix] = container.Ports
|
||||
params["status"+suffix] = container.Status
|
||||
params["health"+suffix] = container.Health
|
||||
params["cpu"+suffix] = container.Cpu
|
||||
@@ -324,7 +334,7 @@ func createContainerRecords(app core.App, data []*container.Stats, systemId stri
|
||||
params["net"+suffix] = netBytes
|
||||
}
|
||||
queryString := fmt.Sprintf(
|
||||
"INSERT INTO containers (id, system, name, image, status, health, cpu, memory, net, updated) VALUES %s ON CONFLICT(id) DO UPDATE SET system = excluded.system, name = excluded.name, image = excluded.image, status = excluded.status, health = excluded.health, cpu = excluded.cpu, memory = excluded.memory, net = excluded.net, updated = excluded.updated",
|
||||
"INSERT INTO containers (id, system, name, image, ports, status, health, cpu, memory, net, updated) VALUES %s ON CONFLICT(id) DO UPDATE SET system = excluded.system, name = excluded.name, image = excluded.image, ports = excluded.ports, status = excluded.status, health = excluded.health, cpu = excluded.cpu, memory = excluded.memory, net = excluded.net, updated = excluded.updated",
|
||||
strings.Join(valueStrings, ","),
|
||||
)
|
||||
_, err := app.DB().NewQuery(queryString).Bind(params).Execute()
|
||||
@@ -703,3 +713,50 @@ func getJitter() <-chan time.Time {
|
||||
msDelay := (interval * minPercent / 100) + rand.Intn(interval*jitterRange/100)
|
||||
return time.After(time.Duration(msDelay) * time.Millisecond)
|
||||
}
|
||||
|
||||
// migrateDeprecatedFields moves values from deprecated fields to their new locations if the new
|
||||
// fields are not already populated. Deprecated fields and refs may be removed at least 30 days
|
||||
// and one minor version release after the release that includes the migration.
|
||||
//
|
||||
// This is run when processing incoming system data from agents, which may be on older versions.
|
||||
func migrateDeprecatedFields(cd *system.CombinedData, createDetails bool) {
|
||||
// migration added 0.19.0
|
||||
if cd.Stats.Bandwidth[0] == 0 && cd.Stats.Bandwidth[1] == 0 {
|
||||
cd.Stats.Bandwidth[0] = uint64(cd.Stats.NetworkSent * 1024 * 1024)
|
||||
cd.Stats.Bandwidth[1] = uint64(cd.Stats.NetworkRecv * 1024 * 1024)
|
||||
cd.Stats.NetworkSent, cd.Stats.NetworkRecv = 0, 0
|
||||
}
|
||||
// migration added 0.19.0
|
||||
if cd.Info.BandwidthBytes == 0 {
|
||||
cd.Info.BandwidthBytes = uint64(cd.Info.Bandwidth * 1024 * 1024)
|
||||
cd.Info.Bandwidth = 0
|
||||
}
|
||||
// migration added 0.19.0
|
||||
if cd.Stats.DiskIO[0] == 0 && cd.Stats.DiskIO[1] == 0 {
|
||||
cd.Stats.DiskIO[0] = uint64(cd.Stats.DiskReadPs * 1024 * 1024)
|
||||
cd.Stats.DiskIO[1] = uint64(cd.Stats.DiskWritePs * 1024 * 1024)
|
||||
cd.Stats.DiskReadPs, cd.Stats.DiskWritePs = 0, 0
|
||||
}
|
||||
// migration added 0.19.0 - Move deprecated Info fields to Details struct
|
||||
if cd.Details == nil && cd.Info.Hostname != "" {
|
||||
if createDetails {
|
||||
cd.Details = &system.Details{
|
||||
Hostname: cd.Info.Hostname,
|
||||
Kernel: cd.Info.KernelVersion,
|
||||
Cores: cd.Info.Cores,
|
||||
Threads: cd.Info.Threads,
|
||||
CpuModel: cd.Info.CpuModel,
|
||||
Podman: cd.Info.Podman,
|
||||
Os: cd.Info.Os,
|
||||
MemoryTotal: uint64(cd.Stats.Mem * 1024 * 1024 * 1024),
|
||||
}
|
||||
}
|
||||
// zero the deprecated fields to prevent saving them in systems.info DB json payload
|
||||
cd.Info.Hostname = ""
|
||||
cd.Info.KernelVersion = ""
|
||||
cd.Info.Cores = 0
|
||||
cd.Info.CpuModel = ""
|
||||
cd.Info.Podman = false
|
||||
cd.Info.Os = 0
|
||||
}
|
||||
}
|
||||
|
||||
@@ -8,6 +8,7 @@ import (
|
||||
"github.com/henrygd/beszel/internal/hub/ws"
|
||||
|
||||
"github.com/henrygd/beszel/internal/entities/system"
|
||||
"github.com/henrygd/beszel/internal/hub/expirymap"
|
||||
|
||||
"github.com/henrygd/beszel/internal/common"
|
||||
|
||||
@@ -40,9 +41,10 @@ var errSystemExists = errors.New("system exists")
|
||||
// SystemManager manages a collection of monitored systems and their connections.
|
||||
// It handles system lifecycle, status updates, and maintains both SSH and WebSocket connections.
|
||||
type SystemManager struct {
|
||||
hub hubLike // Hub interface for database and alert operations
|
||||
systems *store.Store[string, *System] // Thread-safe store of active systems
|
||||
sshConfig *ssh.ClientConfig // SSH client configuration for system connections
|
||||
hub hubLike // Hub interface for database and alert operations
|
||||
systems *store.Store[string, *System] // Thread-safe store of active systems
|
||||
sshConfig *ssh.ClientConfig // SSH client configuration for system connections
|
||||
smartFetchMap *expirymap.ExpiryMap[int64] // Stores last SMART fetch time per system ID
|
||||
}
|
||||
|
||||
// hubLike defines the interface requirements for the hub dependency.
|
||||
@@ -58,8 +60,9 @@ type hubLike interface {
|
||||
// The hub must implement the hubLike interface to provide database and alert functionality.
|
||||
func NewSystemManager(hub hubLike) *SystemManager {
|
||||
return &SystemManager{
|
||||
systems: store.New(map[string]*System{}),
|
||||
hub: hub,
|
||||
systems: store.New(map[string]*System{}),
|
||||
hub: hub,
|
||||
smartFetchMap: expirymap.New[int64](time.Hour),
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
159
internal/hub/systems/system_test.go
Normal file
159
internal/hub/systems/system_test.go
Normal file
@@ -0,0 +1,159 @@
|
||||
//go:build testing
|
||||
|
||||
package systems
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/henrygd/beszel/internal/entities/system"
|
||||
)
|
||||
|
||||
func TestCombinedData_MigrateDeprecatedFields(t *testing.T) {
|
||||
t.Run("Migrate NetworkSent and NetworkRecv to Bandwidth", func(t *testing.T) {
|
||||
cd := &system.CombinedData{
|
||||
Stats: system.Stats{
|
||||
NetworkSent: 1.5, // 1.5 MB
|
||||
NetworkRecv: 2.5, // 2.5 MB
|
||||
},
|
||||
}
|
||||
migrateDeprecatedFields(cd, true)
|
||||
|
||||
expectedSent := uint64(1.5 * 1024 * 1024)
|
||||
expectedRecv := uint64(2.5 * 1024 * 1024)
|
||||
|
||||
if cd.Stats.Bandwidth[0] != expectedSent {
|
||||
t.Errorf("expected Bandwidth[0] %d, got %d", expectedSent, cd.Stats.Bandwidth[0])
|
||||
}
|
||||
if cd.Stats.Bandwidth[1] != expectedRecv {
|
||||
t.Errorf("expected Bandwidth[1] %d, got %d", expectedRecv, cd.Stats.Bandwidth[1])
|
||||
}
|
||||
if cd.Stats.NetworkSent != 0 || cd.Stats.NetworkRecv != 0 {
|
||||
t.Errorf("expected NetworkSent and NetworkRecv to be reset, got %f, %f", cd.Stats.NetworkSent, cd.Stats.NetworkRecv)
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("Migrate Info.Bandwidth to Info.BandwidthBytes", func(t *testing.T) {
|
||||
cd := &system.CombinedData{
|
||||
Info: system.Info{
|
||||
Bandwidth: 10.0, // 10 MB
|
||||
},
|
||||
}
|
||||
migrateDeprecatedFields(cd, true)
|
||||
|
||||
expected := uint64(10 * 1024 * 1024)
|
||||
if cd.Info.BandwidthBytes != expected {
|
||||
t.Errorf("expected BandwidthBytes %d, got %d", expected, cd.Info.BandwidthBytes)
|
||||
}
|
||||
if cd.Info.Bandwidth != 0 {
|
||||
t.Errorf("expected Info.Bandwidth to be reset, got %f", cd.Info.Bandwidth)
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("Migrate DiskReadPs and DiskWritePs to DiskIO", func(t *testing.T) {
|
||||
cd := &system.CombinedData{
|
||||
Stats: system.Stats{
|
||||
DiskReadPs: 3.0, // 3 MB
|
||||
DiskWritePs: 4.0, // 4 MB
|
||||
},
|
||||
}
|
||||
migrateDeprecatedFields(cd, true)
|
||||
|
||||
expectedRead := uint64(3 * 1024 * 1024)
|
||||
expectedWrite := uint64(4 * 1024 * 1024)
|
||||
|
||||
if cd.Stats.DiskIO[0] != expectedRead {
|
||||
t.Errorf("expected DiskIO[0] %d, got %d", expectedRead, cd.Stats.DiskIO[0])
|
||||
}
|
||||
if cd.Stats.DiskIO[1] != expectedWrite {
|
||||
t.Errorf("expected DiskIO[1] %d, got %d", expectedWrite, cd.Stats.DiskIO[1])
|
||||
}
|
||||
if cd.Stats.DiskReadPs != 0 || cd.Stats.DiskWritePs != 0 {
|
||||
t.Errorf("expected DiskReadPs and DiskWritePs to be reset, got %f, %f", cd.Stats.DiskReadPs, cd.Stats.DiskWritePs)
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("Migrate Info fields to Details struct", func(t *testing.T) {
|
||||
cd := &system.CombinedData{
|
||||
Stats: system.Stats{
|
||||
Mem: 16.0, // 16 GB
|
||||
},
|
||||
Info: system.Info{
|
||||
Hostname: "test-host",
|
||||
KernelVersion: "6.8.0",
|
||||
Cores: 8,
|
||||
Threads: 16,
|
||||
CpuModel: "Intel i7",
|
||||
Podman: true,
|
||||
Os: system.Linux,
|
||||
},
|
||||
}
|
||||
migrateDeprecatedFields(cd, true)
|
||||
|
||||
if cd.Details == nil {
|
||||
t.Fatal("expected Details struct to be created")
|
||||
}
|
||||
if cd.Details.Hostname != "test-host" {
|
||||
t.Errorf("expected Hostname 'test-host', got '%s'", cd.Details.Hostname)
|
||||
}
|
||||
if cd.Details.Kernel != "6.8.0" {
|
||||
t.Errorf("expected Kernel '6.8.0', got '%s'", cd.Details.Kernel)
|
||||
}
|
||||
if cd.Details.Cores != 8 {
|
||||
t.Errorf("expected Cores 8, got %d", cd.Details.Cores)
|
||||
}
|
||||
if cd.Details.Threads != 16 {
|
||||
t.Errorf("expected Threads 16, got %d", cd.Details.Threads)
|
||||
}
|
||||
if cd.Details.CpuModel != "Intel i7" {
|
||||
t.Errorf("expected CpuModel 'Intel i7', got '%s'", cd.Details.CpuModel)
|
||||
}
|
||||
if cd.Details.Podman != true {
|
||||
t.Errorf("expected Podman true, got %v", cd.Details.Podman)
|
||||
}
|
||||
if cd.Details.Os != system.Linux {
|
||||
t.Errorf("expected Os Linux, got %d", cd.Details.Os)
|
||||
}
|
||||
expectedMem := uint64(16 * 1024 * 1024 * 1024)
|
||||
if cd.Details.MemoryTotal != expectedMem {
|
||||
t.Errorf("expected MemoryTotal %d, got %d", expectedMem, cd.Details.MemoryTotal)
|
||||
}
|
||||
|
||||
if cd.Info.Hostname != "" || cd.Info.KernelVersion != "" || cd.Info.Cores != 0 || cd.Info.CpuModel != "" || cd.Info.Podman != false || cd.Info.Os != 0 {
|
||||
t.Errorf("expected Info fields to be reset, got %+v", cd.Info)
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("Do not migrate if Details already exists", func(t *testing.T) {
|
||||
cd := &system.CombinedData{
|
||||
Details: &system.Details{Hostname: "existing-host"},
|
||||
Info: system.Info{
|
||||
Hostname: "deprecated-host",
|
||||
},
|
||||
}
|
||||
migrateDeprecatedFields(cd, true)
|
||||
|
||||
if cd.Details.Hostname != "existing-host" {
|
||||
t.Errorf("expected Hostname 'existing-host', got '%s'", cd.Details.Hostname)
|
||||
}
|
||||
if cd.Info.Hostname != "deprecated-host" {
|
||||
t.Errorf("expected Info.Hostname to remain 'deprecated-host', got '%s'", cd.Info.Hostname)
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("Do not create details if migrateDetails is false", func(t *testing.T) {
|
||||
cd := &system.CombinedData{
|
||||
Info: system.Info{
|
||||
Hostname: "deprecated-host",
|
||||
},
|
||||
}
|
||||
migrateDeprecatedFields(cd, false)
|
||||
|
||||
if cd.Details != nil {
|
||||
t.Fatal("expected Details struct to not be created")
|
||||
}
|
||||
|
||||
if cd.Info.Hostname != "" {
|
||||
t.Errorf("expected Info.Hostname to be reset, got '%s'", cd.Info.Hostname)
|
||||
}
|
||||
})
|
||||
}
|
||||
@@ -7,6 +7,7 @@ import (
|
||||
"fmt"
|
||||
|
||||
entities "github.com/henrygd/beszel/internal/entities/system"
|
||||
"github.com/pocketbase/pocketbase/core"
|
||||
)
|
||||
|
||||
// The hub integration tests create/replace systems and cleanup the test apps quickly.
|
||||
@@ -113,4 +114,14 @@ func (sm *SystemManager) RemoveAllSystems() {
|
||||
for _, system := range sm.systems.GetAll() {
|
||||
sm.RemoveSystem(system.Id)
|
||||
}
|
||||
sm.smartFetchMap.StopCleaner()
|
||||
}
|
||||
|
||||
func (s *System) StopUpdater() {
|
||||
s.cancel()
|
||||
}
|
||||
|
||||
func (s *System) CreateRecords(data *entities.CombinedData) (*core.Record, error) {
|
||||
s.data = data
|
||||
return s.createRecords(data)
|
||||
}
|
||||
|
||||
@@ -977,18 +977,6 @@ func init() {
|
||||
"system": false,
|
||||
"type": "number"
|
||||
},
|
||||
{
|
||||
"hidden": false,
|
||||
"id": "number3332085495",
|
||||
"max": null,
|
||||
"min": null,
|
||||
"name": "updated",
|
||||
"onlyInt": true,
|
||||
"presentable": false,
|
||||
"required": true,
|
||||
"system": false,
|
||||
"type": "number"
|
||||
},
|
||||
{
|
||||
"autogeneratePattern": "",
|
||||
"hidden": false,
|
||||
@@ -1002,6 +990,32 @@ func init() {
|
||||
"required": false,
|
||||
"system": false,
|
||||
"type": "text"
|
||||
},
|
||||
{
|
||||
"autogeneratePattern": "",
|
||||
"hidden": false,
|
||||
"id": "text2308952269",
|
||||
"max": 0,
|
||||
"min": 0,
|
||||
"name": "ports",
|
||||
"pattern": "",
|
||||
"presentable": false,
|
||||
"primaryKey": false,
|
||||
"required": false,
|
||||
"system": false,
|
||||
"type": "text"
|
||||
},
|
||||
{
|
||||
"hidden": false,
|
||||
"id": "number3332085495",
|
||||
"max": null,
|
||||
"min": null,
|
||||
"name": "updated",
|
||||
"onlyInt": true,
|
||||
"presentable": false,
|
||||
"required": true,
|
||||
"system": false,
|
||||
"type": "number"
|
||||
}
|
||||
],
|
||||
"indexes": [
|
||||
@@ -16,19 +16,16 @@ import { useYAxisWidth } from "./hooks"
|
||||
export default memo(function LoadAverageChart({ chartData }: { chartData: ChartData }) {
|
||||
const { yAxisWidth, updateYAxisWidth } = useYAxisWidth()
|
||||
|
||||
const keys: { legacy: keyof SystemStats; color: string; label: string }[] = [
|
||||
const keys: { color: string; label: string }[] = [
|
||||
{
|
||||
legacy: "l1",
|
||||
color: "hsl(271, 81%, 60%)", // Purple
|
||||
label: t({ message: `1 min`, comment: "Load average" }),
|
||||
},
|
||||
{
|
||||
legacy: "l5",
|
||||
color: "hsl(217, 91%, 60%)", // Blue
|
||||
label: t({ message: `5 min`, comment: "Load average" }),
|
||||
},
|
||||
{
|
||||
legacy: "l15",
|
||||
color: "hsl(25, 95%, 53%)", // Orange
|
||||
label: t({ message: `15 min`, comment: "Load average" }),
|
||||
},
|
||||
@@ -66,27 +63,18 @@ export default memo(function LoadAverageChart({ chartData }: { chartData: ChartD
|
||||
/>
|
||||
}
|
||||
/>
|
||||
{keys.map(({ legacy, color, label }, i) => {
|
||||
const dataKey = (value: { stats: SystemStats }) => {
|
||||
const { minor, patch } = chartData.agentVersion
|
||||
if (minor <= 12 && patch < 1) {
|
||||
return value.stats?.[legacy]
|
||||
}
|
||||
return value.stats?.la?.[i] ?? value.stats?.[legacy]
|
||||
}
|
||||
return (
|
||||
<Line
|
||||
key={label}
|
||||
dataKey={dataKey}
|
||||
name={label}
|
||||
type="monotoneX"
|
||||
dot={false}
|
||||
strokeWidth={1.5}
|
||||
stroke={color}
|
||||
isAnimationActive={false}
|
||||
/>
|
||||
)
|
||||
})}
|
||||
{keys.map(({ color, label }, i) => (
|
||||
<Line
|
||||
key={label}
|
||||
dataKey={(value: { stats: SystemStats }) => value.stats?.la?.[i]}
|
||||
name={label}
|
||||
type="monotoneX"
|
||||
dot={false}
|
||||
strokeWidth={1.5}
|
||||
stroke={color}
|
||||
isAnimationActive={false}
|
||||
/>
|
||||
))}
|
||||
<ChartLegend content={<ChartLegendContent />} />
|
||||
</LineChart>
|
||||
</ChartContainer>
|
||||
|
||||
@@ -4,7 +4,6 @@ import { cn, decimalString, formatBytes, hourWithSeconds } from "@/lib/utils"
|
||||
import type { ContainerRecord } from "@/types"
|
||||
import { ContainerHealth, ContainerHealthLabels } from "@/lib/enums"
|
||||
import {
|
||||
ArrowUpDownIcon,
|
||||
ClockIcon,
|
||||
ContainerIcon,
|
||||
CpuIcon,
|
||||
@@ -13,11 +12,12 @@ import {
|
||||
ServerIcon,
|
||||
ShieldCheckIcon,
|
||||
} from "lucide-react"
|
||||
import { EthernetIcon, HourglassIcon } from "../ui/icons"
|
||||
import { EthernetIcon, HourglassIcon, SquareArrowRightEnterIcon } from "../ui/icons"
|
||||
import { Badge } from "../ui/badge"
|
||||
import { t } from "@lingui/core/macro"
|
||||
import { $allSystemsById } from "@/lib/stores"
|
||||
import { $allSystemsById, $longestSystemNameLen } from "@/lib/stores"
|
||||
import { useStore } from "@nanostores/react"
|
||||
import { Tooltip, TooltipContent, TooltipTrigger } from "../ui/tooltip"
|
||||
|
||||
// Unit names and their corresponding number of seconds for converting docker status strings
|
||||
const unitSeconds = [
|
||||
@@ -63,7 +63,12 @@ export const containerChartCols: ColumnDef<ContainerRecord>[] = [
|
||||
header: ({ column }) => <HeaderButton column={column} name={t`System`} Icon={ServerIcon} />,
|
||||
cell: ({ getValue }) => {
|
||||
const allSystems = useStore($allSystemsById)
|
||||
return <span className="ms-1.5 xl:w-34 block truncate">{allSystems[getValue() as string]?.name ?? ""}</span>
|
||||
const longestName = useStore($longestSystemNameLen)
|
||||
return (
|
||||
<div className="ms-1 max-w-40 truncate" style={{ width: `${longestName / 1.05}ch` }}>
|
||||
{allSystems[getValue() as string]?.name ?? ""}
|
||||
</div>
|
||||
)
|
||||
},
|
||||
},
|
||||
// {
|
||||
@@ -82,7 +87,7 @@ export const containerChartCols: ColumnDef<ContainerRecord>[] = [
|
||||
header: ({ column }) => <HeaderButton column={column} name={t`CPU`} Icon={CpuIcon} />,
|
||||
cell: ({ getValue }) => {
|
||||
const val = getValue() as number
|
||||
return <span className="ms-1.5 tabular-nums">{`${decimalString(val, val >= 10 ? 1 : 2)}%`}</span>
|
||||
return <span className="ms-1 tabular-nums">{`${decimalString(val, val >= 10 ? 1 : 2)}%`}</span>
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -94,7 +99,7 @@ export const containerChartCols: ColumnDef<ContainerRecord>[] = [
|
||||
const val = getValue() as number
|
||||
const formatted = formatBytes(val, false, undefined, true)
|
||||
return (
|
||||
<span className="ms-1.5 tabular-nums">{`${decimalString(formatted.value, formatted.value >= 10 ? 1 : 2)} ${formatted.unit}`}</span>
|
||||
<span className="ms-1 tabular-nums">{`${decimalString(formatted.value, formatted.value >= 10 ? 1 : 2)} ${formatted.unit}`}</span>
|
||||
)
|
||||
},
|
||||
},
|
||||
@@ -103,11 +108,12 @@ export const containerChartCols: ColumnDef<ContainerRecord>[] = [
|
||||
accessorFn: (record) => record.net,
|
||||
invertSorting: true,
|
||||
header: ({ column }) => <HeaderButton column={column} name={t`Net`} Icon={EthernetIcon} />,
|
||||
minSize: 112,
|
||||
cell: ({ getValue }) => {
|
||||
const val = getValue() as number
|
||||
const formatted = formatBytes(val, true, undefined, false)
|
||||
return (
|
||||
<span className="ms-1.5 tabular-nums">{`${decimalString(formatted.value, formatted.value >= 10 ? 1 : 2)} ${formatted.unit}`}</span>
|
||||
<div className="ms-1 tabular-nums">{`${decimalString(formatted.value, formatted.value >= 10 ? 1 : 2)} ${formatted.unit}`}</div>
|
||||
)
|
||||
},
|
||||
},
|
||||
@@ -116,6 +122,7 @@ export const containerChartCols: ColumnDef<ContainerRecord>[] = [
|
||||
invertSorting: true,
|
||||
accessorFn: (record) => record.health,
|
||||
header: ({ column }) => <HeaderButton column={column} name={t`Health`} Icon={ShieldCheckIcon} />,
|
||||
minSize: 121,
|
||||
cell: ({ getValue }) => {
|
||||
const healthValue = getValue() as number
|
||||
const healthStatus = ContainerHealthLabels[healthValue] || "Unknown"
|
||||
@@ -134,6 +141,35 @@ export const containerChartCols: ColumnDef<ContainerRecord>[] = [
|
||||
)
|
||||
},
|
||||
},
|
||||
{
|
||||
id: "ports",
|
||||
accessorFn: (record) => record.ports || undefined,
|
||||
header: ({ column }) => (
|
||||
<HeaderButton
|
||||
column={column}
|
||||
name={t({ message: "Ports", context: "Container ports" })}
|
||||
Icon={SquareArrowRightEnterIcon}
|
||||
/>
|
||||
),
|
||||
sortingFn: (a, b) => getPortValue(a.original.ports) - getPortValue(b.original.ports),
|
||||
minSize: 147,
|
||||
cell: ({ getValue }) => {
|
||||
const val = getValue() as string | undefined
|
||||
if (!val) {
|
||||
return <div className="ms-1.5 text-muted-foreground">-</div>
|
||||
}
|
||||
const className = "ms-1 w-27 block truncate tabular-nums"
|
||||
if (val.length > 14) {
|
||||
return (
|
||||
<Tooltip>
|
||||
<TooltipTrigger className={className}>{val}</TooltipTrigger>
|
||||
<TooltipContent>{val}</TooltipContent>
|
||||
</Tooltip>
|
||||
)
|
||||
}
|
||||
return <span className={className}>{val}</span>
|
||||
},
|
||||
},
|
||||
{
|
||||
id: "image",
|
||||
sortingFn: (a, b) => a.original.image.localeCompare(b.original.image),
|
||||
@@ -142,7 +178,12 @@ export const containerChartCols: ColumnDef<ContainerRecord>[] = [
|
||||
<HeaderButton column={column} name={t({ message: "Image", context: "Docker image" })} Icon={LayersIcon} />
|
||||
),
|
||||
cell: ({ getValue }) => {
|
||||
return <span className="ms-1.5 xl:w-40 block truncate">{getValue() as string}</span>
|
||||
const val = getValue() as string
|
||||
return (
|
||||
<div className="ms-1 xl:w-40 truncate" title={val}>
|
||||
{val}
|
||||
</div>
|
||||
)
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -152,7 +193,7 @@ export const containerChartCols: ColumnDef<ContainerRecord>[] = [
|
||||
sortingFn: (a, b) => getStatusValue(a.original.status) - getStatusValue(b.original.status),
|
||||
header: ({ column }) => <HeaderButton column={column} name={t`Status`} Icon={HourglassIcon} />,
|
||||
cell: ({ getValue }) => {
|
||||
return <span className="ms-1.5 w-25 block truncate">{getValue() as string}</span>
|
||||
return <span className="ms-1 w-25 block truncate">{getValue() as string}</span>
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -162,7 +203,7 @@ export const containerChartCols: ColumnDef<ContainerRecord>[] = [
|
||||
header: ({ column }) => <HeaderButton column={column} name={t`Updated`} Icon={ClockIcon} />,
|
||||
cell: ({ getValue }) => {
|
||||
const timestamp = getValue() as number
|
||||
return <span className="ms-1.5 tabular-nums">{hourWithSeconds(new Date(timestamp).toISOString())}</span>
|
||||
return <span className="ms-1 tabular-nums">{hourWithSeconds(new Date(timestamp).toISOString())}</span>
|
||||
},
|
||||
},
|
||||
]
|
||||
@@ -188,7 +229,21 @@ function HeaderButton({
|
||||
>
|
||||
{Icon && <Icon className="size-4" />}
|
||||
{name}
|
||||
<ArrowUpDownIcon className="size-4" />
|
||||
{/* <ArrowUpDownIcon className="size-4" /> */}
|
||||
</Button>
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert port string to a number for sorting.
|
||||
* Handles formats like "80", "127.0.0.1:80", and "80, 443" (takes the first mapping).
|
||||
*/
|
||||
function getPortValue(ports: string | undefined): number {
|
||||
if (!ports) {
|
||||
return 0
|
||||
}
|
||||
const first = ports.includes(",") ? ports.substring(0, ports.indexOf(",")) : ports
|
||||
const colonIndex = first.lastIndexOf(":")
|
||||
const portStr = colonIndex === -1 ? first : first.substring(colonIndex + 1)
|
||||
return Number(portStr) || 0
|
||||
}
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
/** biome-ignore-all lint/security/noDangerouslySetInnerHtml: html comes directly from docker via agent */
|
||||
import { t } from "@lingui/core/macro"
|
||||
import { Trans } from "@lingui/react/macro"
|
||||
import {
|
||||
@@ -13,7 +14,7 @@ import {
|
||||
type VisibilityState,
|
||||
} from "@tanstack/react-table"
|
||||
import { useVirtualizer, type VirtualItem } from "@tanstack/react-virtual"
|
||||
import { memo, RefObject, useEffect, useRef, useState } from "react"
|
||||
import { memo, type RefObject, useEffect, useRef, useState } from "react"
|
||||
import { Input } from "@/components/ui/input"
|
||||
import { TableBody, TableCell, TableHead, TableHeader, TableRow } from "@/components/ui/table"
|
||||
import { pb } from "@/lib/api"
|
||||
@@ -44,6 +45,20 @@ export default function ContainersTable({ systemId }: { systemId?: string }) {
|
||||
)
|
||||
const [columnFilters, setColumnFilters] = useState<ColumnFiltersState>([])
|
||||
const [columnVisibility, setColumnVisibility] = useState<VisibilityState>({})
|
||||
|
||||
// Hide ports column if no ports are present
|
||||
useEffect(() => {
|
||||
if (data) {
|
||||
const hasPorts = data.some((container) => container.ports)
|
||||
setColumnVisibility((prev) => {
|
||||
if (prev.ports === hasPorts) {
|
||||
return prev
|
||||
}
|
||||
return { ...prev, ports: hasPorts }
|
||||
})
|
||||
}
|
||||
}, [data])
|
||||
|
||||
const [rowSelection, setRowSelection] = useState({})
|
||||
const [globalFilter, setGlobalFilter] = useState("")
|
||||
|
||||
@@ -51,7 +66,7 @@ export default function ContainersTable({ systemId }: { systemId?: string }) {
|
||||
function fetchData(systemId?: string) {
|
||||
pb.collection<ContainerRecord>("containers")
|
||||
.getList(0, 2000, {
|
||||
fields: "id,name,image,cpu,memory,net,health,status,system,updated",
|
||||
fields: "id,name,image,ports,cpu,memory,net,health,status,system,updated",
|
||||
filter: systemId ? pb.filter("system={:system}", { system: systemId }) : undefined,
|
||||
})
|
||||
.then(({ items }) => {
|
||||
@@ -67,7 +82,7 @@ export default function ContainersTable({ systemId }: { systemId?: string }) {
|
||||
setData((curItems) => {
|
||||
const lastUpdated = Math.max(items[0].updated, items.at(-1)?.updated ?? 0)
|
||||
const containerIds = new Set()
|
||||
const newItems = []
|
||||
const newItems: ContainerRecord[] = []
|
||||
for (const item of items) {
|
||||
if (Math.abs(lastUpdated - item.updated) < 70_000) {
|
||||
containerIds.add(item.id)
|
||||
@@ -134,7 +149,8 @@ export default function ContainersTable({ systemId }: { systemId?: string }) {
|
||||
const status = container.status ?? ""
|
||||
const healthLabel = ContainerHealthLabels[container.health as ContainerHealth] ?? ""
|
||||
const image = container.image ?? ""
|
||||
const searchString = `${systemName} ${id} ${name} ${healthLabel} ${status} ${image}`.toLowerCase()
|
||||
const ports = container.ports ?? ""
|
||||
const searchString = `${systemName} ${id} ${name} ${healthLabel} ${status} ${image} ${ports}`.toLowerCase()
|
||||
|
||||
return (filterValue as string)
|
||||
.toLowerCase()
|
||||
@@ -300,9 +316,6 @@ function ContainerSheet({
|
||||
setSheetOpen: (open: boolean) => void
|
||||
activeContainer: RefObject<ContainerRecord | null>
|
||||
}) {
|
||||
const container = activeContainer.current
|
||||
if (!container) return null
|
||||
|
||||
const [logsDisplay, setLogsDisplay] = useState<string>("")
|
||||
const [infoDisplay, setInfoDisplay] = useState<string>("")
|
||||
const [logsFullscreenOpen, setLogsFullscreenOpen] = useState<boolean>(false)
|
||||
@@ -310,6 +323,8 @@ function ContainerSheet({
|
||||
const [isRefreshingLogs, setIsRefreshingLogs] = useState<boolean>(false)
|
||||
const logsContainerRef = useRef<HTMLDivElement>(null)
|
||||
|
||||
const container = activeContainer.current
|
||||
|
||||
function scrollLogsToBottom() {
|
||||
if (logsContainerRef.current) {
|
||||
logsContainerRef.current.scrollTo({ top: logsContainerRef.current.scrollHeight })
|
||||
@@ -317,6 +332,7 @@ function ContainerSheet({
|
||||
}
|
||||
|
||||
const refreshLogs = async () => {
|
||||
if (!container) return
|
||||
setIsRefreshingLogs(true)
|
||||
const startTime = Date.now()
|
||||
|
||||
@@ -348,6 +364,8 @@ function ContainerSheet({
|
||||
})()
|
||||
}, [container])
|
||||
|
||||
if (!container) return null
|
||||
|
||||
return (
|
||||
<>
|
||||
<LogsFullscreenDialog
|
||||
@@ -378,8 +396,14 @@ function ContainerSheet({
|
||||
{container.image}
|
||||
<Separator orientation="vertical" className="h-2.5 bg-muted-foreground opacity-70" />
|
||||
{container.id}
|
||||
<Separator orientation="vertical" className="h-2.5 bg-muted-foreground opacity-70" />
|
||||
{ContainerHealthLabels[container.health as ContainerHealth]}
|
||||
{/* {container.ports && (
|
||||
<>
|
||||
<Separator orientation="vertical" className="h-2.5 bg-muted-foreground opacity-70" />
|
||||
{container.ports}
|
||||
</>
|
||||
)} */}
|
||||
{/* <Separator orientation="vertical" className="h-2.5 bg-muted-foreground opacity-70" />
|
||||
{ContainerHealthLabels[container.health as ContainerHealth]} */}
|
||||
</SheetDescription>
|
||||
</SheetHeader>
|
||||
<div className="px-3 pb-3 -mt-4 flex flex-col gap-3 h-full items-start">
|
||||
@@ -438,11 +462,12 @@ function ContainerSheet({
|
||||
function ContainersTableHead({ table }: { table: TableType<ContainerRecord> }) {
|
||||
return (
|
||||
<TableHeader className="sticky top-0 z-50 w-full border-b-2">
|
||||
<div className="absolute -top-2 left-0 w-full h-4 bg-table-header z-50"></div>
|
||||
{table.getHeaderGroups().map((headerGroup) => (
|
||||
<tr key={headerGroup.id}>
|
||||
{headerGroup.headers.map((header) => {
|
||||
return (
|
||||
<TableHead className="px-2" key={header.id}>
|
||||
<TableHead className="px-2" key={header.id} style={{ width: header.getSize() }}>
|
||||
{header.isPlaceholder ? null : flexRender(header.column.columnDef.header, header.getContext())}
|
||||
</TableHead>
|
||||
)
|
||||
@@ -474,6 +499,7 @@ const ContainerTableRow = memo(function ContainerTableRow({
|
||||
className="py-0 ps-4.5"
|
||||
style={{
|
||||
height: virtualRow.size,
|
||||
width: cell.column.getSize(),
|
||||
}}
|
||||
>
|
||||
{flexRender(cell.column.columnDef.cell, cell.getContext())}
|
||||
|
||||
@@ -43,7 +43,7 @@ export function copyDockerCompose(port = "45876", publicKey: string, token: stri
|
||||
|
||||
export function copyDockerRun(port = "45876", publicKey: string, token: string) {
|
||||
copyToClipboard(
|
||||
`docker run -d --name beszel-agent --network host --restart unless-stopped -v /var/run/docker.sock:/var/run/docker.sock:ro -v ./beszel_agent_data:/var/lib/beszel-agent -e KEY="${publicKey}" -e LISTEN=${port} -e TOKEN="${token}" -e HUB_URL="${getHubURL()}" henrygd/beszel-agent`
|
||||
`docker run -d --name beszel-agent --network host --restart unless-stopped -v /var/run/docker.sock:/var/run/docker.sock:ro -v beszel_agent_data:/var/lib/beszel-agent -e KEY="${publicKey}" -e LISTEN=${port} -e TOKEN="${token}" -e HUB_URL="${getHubURL()}" henrygd/beszel-agent`
|
||||
)
|
||||
}
|
||||
|
||||
|
||||
@@ -12,7 +12,7 @@ import Slider from "@/components/ui/slider"
|
||||
import { HourFormat, Unit } from "@/lib/enums"
|
||||
import { dynamicActivate } from "@/lib/i18n"
|
||||
import languages from "@/lib/languages"
|
||||
import { $userSettings } from "@/lib/stores"
|
||||
import { $userSettings, defaultLayoutWidth } from "@/lib/stores"
|
||||
import { chartTimeData, currentHour12 } from "@/lib/utils"
|
||||
import type { UserSettings } from "@/types"
|
||||
import { saveSettings } from "./layout"
|
||||
@@ -21,7 +21,7 @@ export default function SettingsProfilePage({ userSettings }: { userSettings: Us
|
||||
const [isLoading, setIsLoading] = useState(false)
|
||||
const { i18n } = useLingui()
|
||||
const currentUserSettings = useStore($userSettings)
|
||||
const layoutWidth = currentUserSettings.layoutWidth ?? 1500
|
||||
const layoutWidth = currentUserSettings.layoutWidth ?? defaultLayoutWidth
|
||||
|
||||
async function handleSubmit(e: React.FormEvent<HTMLFormElement>) {
|
||||
e.preventDefault()
|
||||
|
||||
@@ -654,7 +654,7 @@ export default memo(function SystemDetail({ id }: { id: string }) {
|
||||
)}
|
||||
|
||||
{/* Load Average chart */}
|
||||
{chartData.agentVersion?.minor >= 12 && (
|
||||
{chartData.agentVersion?.minor > 12 && (
|
||||
<ChartCard
|
||||
empty={dataEmpty}
|
||||
grid={grid}
|
||||
|
||||
@@ -3,13 +3,16 @@ import {
|
||||
type ColumnDef,
|
||||
type ColumnFiltersState,
|
||||
type Column,
|
||||
type Row,
|
||||
type SortingState,
|
||||
type Table as TableType,
|
||||
flexRender,
|
||||
getCoreRowModel,
|
||||
getFilteredRowModel,
|
||||
getSortedRowModel,
|
||||
useReactTable,
|
||||
} from "@tanstack/react-table"
|
||||
import { useVirtualizer, type VirtualItem } from "@tanstack/react-virtual"
|
||||
import {
|
||||
Activity,
|
||||
Box,
|
||||
@@ -40,6 +43,7 @@ import {
|
||||
toFixedFloat,
|
||||
formatTemperature,
|
||||
cn,
|
||||
getVisualStringWidth,
|
||||
secondsToString,
|
||||
hourWithSeconds,
|
||||
formatShortDate,
|
||||
@@ -57,7 +61,7 @@ import {
|
||||
DropdownMenuSeparator,
|
||||
DropdownMenuTrigger,
|
||||
} from "@/components/ui/dropdown-menu"
|
||||
import { useCallback, useMemo, useEffect, useState } from "react"
|
||||
import { memo, useCallback, useMemo, useEffect, useRef, useState } from "react"
|
||||
import { Tooltip, TooltipContent, TooltipTrigger } from "@/components/ui/tooltip"
|
||||
|
||||
// Column definition for S.M.A.R.T. attributes table
|
||||
@@ -101,7 +105,11 @@ function formatCapacity(bytes: number): string {
|
||||
|
||||
const SMART_DEVICE_FIELDS = "id,system,name,model,state,capacity,temp,type,hours,cycles,updated"
|
||||
|
||||
export const columns: ColumnDef<SmartDeviceRecord>[] = [
|
||||
export const createColumns = (
|
||||
longestName: number,
|
||||
longestModel: number,
|
||||
longestDevice: number
|
||||
): ColumnDef<SmartDeviceRecord>[] => [
|
||||
{
|
||||
id: "system",
|
||||
accessorFn: (record) => record.system,
|
||||
@@ -114,7 +122,11 @@ export const columns: ColumnDef<SmartDeviceRecord>[] = [
|
||||
header: ({ column }) => <HeaderButton column={column} name={t`System`} Icon={ServerIcon} />,
|
||||
cell: ({ getValue }) => {
|
||||
const allSystems = useStore($allSystemsById)
|
||||
return <span className="ms-1.5 xl:w-30 block truncate">{allSystems[getValue() as string]?.name ?? ""}</span>
|
||||
return (
|
||||
<div className="ms-1.5 max-w-40 block truncate" style={{ width: `${longestName / 1.05}ch` }}>
|
||||
{allSystems[getValue() as string]?.name ?? ""}
|
||||
</div>
|
||||
)
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -122,7 +134,11 @@ export const columns: ColumnDef<SmartDeviceRecord>[] = [
|
||||
sortingFn: (a, b) => a.original.name.localeCompare(b.original.name),
|
||||
header: ({ column }) => <HeaderButton column={column} name={t`Device`} Icon={HardDrive} />,
|
||||
cell: ({ getValue }) => (
|
||||
<div className="font-medium max-w-40 truncate ms-1.5" title={getValue() as string}>
|
||||
<div
|
||||
className="font-medium max-w-40 truncate ms-1"
|
||||
title={getValue() as string}
|
||||
style={{ width: `${longestDevice / 1.05}ch` }}
|
||||
>
|
||||
{getValue() as string}
|
||||
</div>
|
||||
),
|
||||
@@ -132,7 +148,11 @@ export const columns: ColumnDef<SmartDeviceRecord>[] = [
|
||||
sortingFn: (a, b) => a.original.model.localeCompare(b.original.model),
|
||||
header: ({ column }) => <HeaderButton column={column} name={t`Model`} Icon={Box} />,
|
||||
cell: ({ getValue }) => (
|
||||
<div className="max-w-48 truncate ms-1.5" title={getValue() as string}>
|
||||
<div
|
||||
className="max-w-48 truncate ms-1"
|
||||
title={getValue() as string}
|
||||
style={{ width: `${longestModel / 1.05}ch` }}
|
||||
>
|
||||
{getValue() as string}
|
||||
</div>
|
||||
),
|
||||
@@ -141,7 +161,7 @@ export const columns: ColumnDef<SmartDeviceRecord>[] = [
|
||||
accessorKey: "capacity",
|
||||
invertSorting: true,
|
||||
header: ({ column }) => <HeaderButton column={column} name={t`Capacity`} Icon={BinaryIcon} />,
|
||||
cell: ({ getValue }) => <span className="ms-1.5">{formatCapacity(getValue() as number)}</span>,
|
||||
cell: ({ getValue }) => <span className="ms-1">{formatCapacity(getValue() as number)}</span>,
|
||||
},
|
||||
{
|
||||
accessorKey: "state",
|
||||
@@ -149,9 +169,9 @@ export const columns: ColumnDef<SmartDeviceRecord>[] = [
|
||||
cell: ({ getValue }) => {
|
||||
const status = getValue() as string
|
||||
return (
|
||||
<div className="ms-1.5">
|
||||
<Badge variant={status === "PASSED" ? "success" : status === "FAILED" ? "danger" : "warning"}>{status}</Badge>
|
||||
</div>
|
||||
<Badge className="ms-1" variant={status === "PASSED" ? "success" : status === "FAILED" ? "danger" : "warning"}>
|
||||
{status}
|
||||
</Badge>
|
||||
)
|
||||
},
|
||||
},
|
||||
@@ -160,11 +180,9 @@ export const columns: ColumnDef<SmartDeviceRecord>[] = [
|
||||
sortingFn: (a, b) => a.original.type.localeCompare(b.original.type),
|
||||
header: ({ column }) => <HeaderButton column={column} name={t`Type`} Icon={ArrowLeftRightIcon} />,
|
||||
cell: ({ getValue }) => (
|
||||
<div className="ms-1.5">
|
||||
<Badge variant="outline" className="uppercase">
|
||||
{getValue() as string}
|
||||
</Badge>
|
||||
</div>
|
||||
<Badge variant="outline" className="ms-1 uppercase">
|
||||
{getValue() as string}
|
||||
</Badge>
|
||||
),
|
||||
},
|
||||
{
|
||||
@@ -176,11 +194,11 @@ export const columns: ColumnDef<SmartDeviceRecord>[] = [
|
||||
cell: ({ getValue }) => {
|
||||
const hours = getValue() as number | undefined
|
||||
if (hours == null) {
|
||||
return <div className="text-sm text-muted-foreground ms-1.5">N/A</div>
|
||||
return <div className="text-sm text-muted-foreground ms-1">N/A</div>
|
||||
}
|
||||
const seconds = hours * 3600
|
||||
return (
|
||||
<div className="text-sm ms-1.5">
|
||||
<div className="text-sm ms-1">
|
||||
<div>{secondsToString(seconds, "hour")}</div>
|
||||
<div className="text-muted-foreground text-xs">{secondsToString(seconds, "day")}</div>
|
||||
</div>
|
||||
@@ -196,9 +214,9 @@ export const columns: ColumnDef<SmartDeviceRecord>[] = [
|
||||
cell: ({ getValue }) => {
|
||||
const cycles = getValue() as number | undefined
|
||||
if (cycles == null) {
|
||||
return <div className="text-muted-foreground ms-1.5">N/A</div>
|
||||
return <div className="text-muted-foreground ms-1">N/A</div>
|
||||
}
|
||||
return <span className="ms-1.5">{cycles.toLocaleString()}</span>
|
||||
return <span className="ms-1">{cycles.toLocaleString()}</span>
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -208,10 +226,10 @@ export const columns: ColumnDef<SmartDeviceRecord>[] = [
|
||||
cell: ({ getValue }) => {
|
||||
const temp = getValue() as number | null | undefined
|
||||
if (!temp) {
|
||||
return <div className="text-muted-foreground ms-1.5">N/A</div>
|
||||
return <div className="text-muted-foreground ms-1">N/A</div>
|
||||
}
|
||||
const { value, unit } = formatTemperature(temp)
|
||||
return <span className="ms-1.5">{`${value} ${unit}`}</span>
|
||||
return <span className="ms-1">{`${value} ${unit}`}</span>
|
||||
},
|
||||
},
|
||||
// {
|
||||
@@ -236,7 +254,7 @@ export const columns: ColumnDef<SmartDeviceRecord>[] = [
|
||||
// if today, use hourWithSeconds, otherwise use formatShortDate
|
||||
const formatter =
|
||||
new Date(timestamp).toDateString() === new Date().toDateString() ? hourWithSeconds : formatShortDate
|
||||
return <span className="ms-1.5 tabular-nums">{formatter(timestamp)}</span>
|
||||
return <span className="ms-1 tabular-nums">{formatter(timestamp)}</span>
|
||||
},
|
||||
},
|
||||
]
|
||||
@@ -275,6 +293,36 @@ export default function DisksTable({ systemId }: { systemId?: string }) {
|
||||
const [sheetOpen, setSheetOpen] = useState(false)
|
||||
const [rowActionState, setRowActionState] = useState<{ type: "refresh" | "delete"; id: string } | null>(null)
|
||||
const [globalFilter, setGlobalFilter] = useState("")
|
||||
const allSystems = useStore($allSystemsById)
|
||||
|
||||
// duplicate the devices to test with more rows
|
||||
// if (
|
||||
// smartDevices?.length &&
|
||||
// smartDevices.length < 50 &&
|
||||
// typeof window !== "undefined" &&
|
||||
// window.location.hostname === "localhost"
|
||||
// ) {
|
||||
// setSmartDevices([...smartDevices, ...smartDevices, ...smartDevices])
|
||||
// }
|
||||
|
||||
// Calculate the right width for the columns based on the longest strings among the displayed devices
|
||||
const { longestName, longestModel, longestDevice } = useMemo(() => {
|
||||
const result = { longestName: 0, longestModel: 0, longestDevice: 0 }
|
||||
if (!smartDevices || Object.keys(allSystems).length === 0) {
|
||||
return result
|
||||
}
|
||||
const seenSystems = new Set<string>()
|
||||
for (const device of smartDevices) {
|
||||
if (!systemId && !seenSystems.has(device.system)) {
|
||||
seenSystems.add(device.system)
|
||||
const name = allSystems[device.system]?.name ?? ""
|
||||
result.longestName = Math.max(result.longestName, getVisualStringWidth(name))
|
||||
}
|
||||
result.longestModel = Math.max(result.longestModel, getVisualStringWidth(device.model ?? ""))
|
||||
result.longestDevice = Math.max(result.longestDevice, getVisualStringWidth(device.name ?? ""))
|
||||
}
|
||||
return result
|
||||
}, [smartDevices, systemId, allSystems])
|
||||
|
||||
const openSheet = (disk: SmartDeviceRecord) => {
|
||||
setActiveDiskId(disk.id)
|
||||
@@ -440,9 +488,10 @@ export default function DisksTable({ systemId }: { systemId?: string }) {
|
||||
|
||||
// Filter columns based on whether systemId is provided
|
||||
const tableColumns = useMemo(() => {
|
||||
const columns = createColumns(longestName, longestModel, longestDevice)
|
||||
const baseColumns = systemId ? columns.filter((col) => col.id !== "system") : columns
|
||||
return [...baseColumns, actionColumn]
|
||||
}, [systemId, actionColumn])
|
||||
}, [systemId, actionColumn, longestName, longestModel, longestDevice])
|
||||
|
||||
const table = useReactTable({
|
||||
data: smartDevices || ([] as SmartDeviceRecord[]),
|
||||
@@ -474,6 +523,7 @@ export default function DisksTable({ systemId }: { systemId?: string }) {
|
||||
.every((term) => searchString.includes(term))
|
||||
},
|
||||
})
|
||||
const rows = table.getRowModel().rows
|
||||
|
||||
// Hide the table on system pages if there's no data, but always show on global page
|
||||
if (systemId && !smartDevices?.length && !columnFilters.length) {
|
||||
@@ -513,57 +563,123 @@ export default function DisksTable({ systemId }: { systemId?: string }) {
|
||||
</div>
|
||||
</div>
|
||||
</CardHeader>
|
||||
<div className="rounded-md border text-nowrap">
|
||||
<Table>
|
||||
<TableHeader>
|
||||
{table.getHeaderGroups().map((headerGroup) => (
|
||||
<TableRow key={headerGroup.id}>
|
||||
{headerGroup.headers.map((header) => {
|
||||
return (
|
||||
<TableHead key={header.id} className="px-2">
|
||||
{header.isPlaceholder ? null : flexRender(header.column.columnDef.header, header.getContext())}
|
||||
</TableHead>
|
||||
)
|
||||
})}
|
||||
</TableRow>
|
||||
))}
|
||||
</TableHeader>
|
||||
<TableBody>
|
||||
{table.getRowModel().rows?.length ? (
|
||||
table.getRowModel().rows.map((row) => (
|
||||
<TableRow
|
||||
key={row.id}
|
||||
data-state={row.getIsSelected() && "selected"}
|
||||
className="cursor-pointer"
|
||||
onClick={() => openSheet(row.original)}
|
||||
>
|
||||
{row.getVisibleCells().map((cell) => (
|
||||
<TableCell key={cell.id} className="md:ps-5">
|
||||
{flexRender(cell.column.columnDef.cell, cell.getContext())}
|
||||
</TableCell>
|
||||
))}
|
||||
</TableRow>
|
||||
))
|
||||
) : (
|
||||
<TableRow>
|
||||
<TableCell colSpan={tableColumns.length} className="h-24 text-center">
|
||||
{smartDevices ? (
|
||||
t`No results.`
|
||||
) : (
|
||||
<LoaderCircleIcon className="animate-spin size-10 opacity-60 mx-auto" />
|
||||
)}
|
||||
</TableCell>
|
||||
</TableRow>
|
||||
)}
|
||||
</TableBody>
|
||||
</Table>
|
||||
</div>
|
||||
<SmartDevicesTable
|
||||
table={table}
|
||||
rows={rows}
|
||||
colLength={tableColumns.length}
|
||||
data={smartDevices}
|
||||
openSheet={openSheet}
|
||||
/>
|
||||
</Card>
|
||||
<DiskSheet diskId={activeDiskId} open={sheetOpen} onOpenChange={setSheetOpen} />
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
const SmartDevicesTable = memo(function SmartDevicesTable({
|
||||
table,
|
||||
rows,
|
||||
colLength,
|
||||
data,
|
||||
openSheet,
|
||||
}: {
|
||||
table: TableType<SmartDeviceRecord>
|
||||
rows: Row<SmartDeviceRecord>[]
|
||||
colLength: number
|
||||
data: SmartDeviceRecord[] | undefined
|
||||
openSheet: (disk: SmartDeviceRecord) => void
|
||||
}) {
|
||||
const scrollRef = useRef<HTMLDivElement>(null)
|
||||
|
||||
const virtualizer = useVirtualizer<HTMLDivElement, HTMLTableRowElement>({
|
||||
count: rows.length,
|
||||
estimateSize: () => 65,
|
||||
getScrollElement: () => scrollRef.current,
|
||||
overscan: 5,
|
||||
})
|
||||
const virtualRows = virtualizer.getVirtualItems()
|
||||
|
||||
const paddingTop = Math.max(0, virtualRows[0]?.start ?? 0 - virtualizer.options.scrollMargin)
|
||||
const paddingBottom = Math.max(0, virtualizer.getTotalSize() - (virtualRows[virtualRows.length - 1]?.end ?? 0))
|
||||
|
||||
return (
|
||||
<div
|
||||
className={cn(
|
||||
"h-min max-h-[calc(100dvh-17rem)] max-w-full relative overflow-auto rounded-md border",
|
||||
(!rows.length || rows.length > 2) && "min-h-50"
|
||||
)}
|
||||
ref={scrollRef}
|
||||
>
|
||||
<div style={{ height: `${virtualizer.getTotalSize() + 48}px`, paddingTop, paddingBottom }}>
|
||||
<table className="w-full text-sm text-nowrap">
|
||||
<SmartTableHead table={table} />
|
||||
<TableBody>
|
||||
{rows.length ? (
|
||||
virtualRows.map((virtualRow) => {
|
||||
const row = rows[virtualRow.index]
|
||||
return <SmartDeviceTableRow key={row.id} row={row} virtualRow={virtualRow} openSheet={openSheet} />
|
||||
})
|
||||
) : (
|
||||
<TableRow>
|
||||
<TableCell colSpan={colLength} className="h-24 text-center pointer-events-none">
|
||||
{data ? t`No results.` : <LoaderCircleIcon className="animate-spin size-10 opacity-60 mx-auto" />}
|
||||
</TableCell>
|
||||
</TableRow>
|
||||
)}
|
||||
</TableBody>
|
||||
</table>
|
||||
</div>
|
||||
</div>
|
||||
)
|
||||
})
|
||||
|
||||
function SmartTableHead({ table }: { table: TableType<SmartDeviceRecord> }) {
|
||||
return (
|
||||
<TableHeader className="sticky top-0 z-50 w-full border-b-2">
|
||||
<div className="absolute -top-2 left-0 w-full h-4 bg-table-header z-50"></div>
|
||||
{table.getHeaderGroups().map((headerGroup) => (
|
||||
<TableRow key={headerGroup.id}>
|
||||
{headerGroup.headers.map((header) => (
|
||||
<TableHead key={header.id} className="px-2">
|
||||
{header.isPlaceholder ? null : flexRender(header.column.columnDef.header, header.getContext())}
|
||||
</TableHead>
|
||||
))}
|
||||
</TableRow>
|
||||
))}
|
||||
</TableHeader>
|
||||
)
|
||||
}
|
||||
|
||||
const SmartDeviceTableRow = memo(function SmartDeviceTableRow({
|
||||
row,
|
||||
virtualRow,
|
||||
openSheet,
|
||||
}: {
|
||||
row: Row<SmartDeviceRecord>
|
||||
virtualRow: VirtualItem
|
||||
openSheet: (disk: SmartDeviceRecord) => void
|
||||
}) {
|
||||
return (
|
||||
<TableRow
|
||||
data-state={row.getIsSelected() && "selected"}
|
||||
className="cursor-pointer"
|
||||
onClick={() => openSheet(row.original)}
|
||||
>
|
||||
{row.getVisibleCells().map((cell) => (
|
||||
<TableCell
|
||||
key={cell.id}
|
||||
className="md:ps-5 py-0"
|
||||
style={{
|
||||
height: virtualRow.size,
|
||||
}}
|
||||
>
|
||||
{flexRender(cell.column.columnDef.cell, cell.getContext())}
|
||||
</TableCell>
|
||||
))}
|
||||
</TableRow>
|
||||
)
|
||||
})
|
||||
|
||||
function DiskSheet({
|
||||
diskId,
|
||||
open,
|
||||
@@ -621,8 +737,8 @@ function DiskSheet({
|
||||
const deviceName = disk?.name || unknown
|
||||
const model = disk?.model || unknown
|
||||
const capacity = disk?.capacity ? formatCapacity(disk.capacity) : unknown
|
||||
const serialNumber = disk?.serial || unknown
|
||||
const firmwareVersion = disk?.firmware || unknown
|
||||
const serialNumber = disk?.serial
|
||||
const firmwareVersion = disk?.firmware
|
||||
const status = disk?.state || unknown
|
||||
|
||||
return (
|
||||
@@ -636,24 +752,32 @@ function DiskSheet({
|
||||
{model}
|
||||
<Separator orientation="vertical" className="h-2.5 bg-muted-foreground opacity-70" />
|
||||
{capacity}
|
||||
<Separator orientation="vertical" className="h-2.5 bg-muted-foreground opacity-70" />
|
||||
<Tooltip>
|
||||
<TooltipTrigger asChild>
|
||||
<span>{serialNumber}</span>
|
||||
</TooltipTrigger>
|
||||
<TooltipContent>
|
||||
<Trans>Serial Number</Trans>
|
||||
</TooltipContent>
|
||||
</Tooltip>
|
||||
<Separator orientation="vertical" className="h-2.5 bg-muted-foreground opacity-70" />
|
||||
<Tooltip>
|
||||
<TooltipTrigger asChild>
|
||||
<span>{firmwareVersion}</span>
|
||||
</TooltipTrigger>
|
||||
<TooltipContent>
|
||||
<Trans>Firmware</Trans>
|
||||
</TooltipContent>
|
||||
</Tooltip>
|
||||
{serialNumber && (
|
||||
<>
|
||||
<Separator orientation="vertical" className="h-2.5 bg-muted-foreground opacity-70" />
|
||||
<Tooltip>
|
||||
<TooltipTrigger asChild>
|
||||
<span>{serialNumber}</span>
|
||||
</TooltipTrigger>
|
||||
<TooltipContent>
|
||||
<Trans>Serial Number</Trans>
|
||||
</TooltipContent>
|
||||
</Tooltip>
|
||||
</>
|
||||
)}
|
||||
{firmwareVersion && (
|
||||
<>
|
||||
<Separator orientation="vertical" className="h-2.5 bg-muted-foreground opacity-70" />
|
||||
<Tooltip>
|
||||
<TooltipTrigger asChild>
|
||||
<span>{firmwareVersion}</span>
|
||||
</TooltipTrigger>
|
||||
<TooltipContent>
|
||||
<Trans>Firmware</Trans>
|
||||
</TooltipContent>
|
||||
</Tooltip>
|
||||
</>
|
||||
)}
|
||||
</SheetDescription>
|
||||
</SheetHeader>
|
||||
<div className="flex-1 overflow-hidden p-4 flex flex-col gap-4">
|
||||
|
||||
@@ -46,7 +46,6 @@ export default function SystemdTable({ systemId }: { systemId?: string }) {
|
||||
return setData([])
|
||||
}, [systemId])
|
||||
|
||||
|
||||
useEffect(() => {
|
||||
const lastUpdated = data[0]?.updated ?? 0
|
||||
|
||||
@@ -360,15 +359,9 @@ function SystemdSheet({
|
||||
return (
|
||||
<>
|
||||
{hasCurrent ? current : notAvailable}
|
||||
{hasMax && (
|
||||
<span className="text-muted-foreground ms-1.5">
|
||||
{`(${t`limit`}: ${max})`}
|
||||
</span>
|
||||
)}
|
||||
{hasMax && <span className="text-muted-foreground ms-1.5">{`(${t`limit`}: ${max})`}</span>}
|
||||
{max === null && (
|
||||
<span className="text-muted-foreground ms-1.5">
|
||||
{`(${t`limit`}: ${t`Unlimited`.toLowerCase()})`}
|
||||
</span>
|
||||
<span className="text-muted-foreground ms-1.5">{`(${t`limit`}: ${t`Unlimited`.toLowerCase()})`}</span>
|
||||
)}
|
||||
</>
|
||||
)
|
||||
@@ -435,7 +428,7 @@ function SystemdSheet({
|
||||
</tr>
|
||||
)
|
||||
}
|
||||
|
||||
|
||||
const capitalize = (str: string) => `${str.charAt(0).toUpperCase()}${str.slice(1).toLowerCase()}`
|
||||
|
||||
return (
|
||||
@@ -621,6 +614,7 @@ function SystemdSheet({
|
||||
function SystemdTableHead({ table }: { table: TableType<SystemdRecord> }) {
|
||||
return (
|
||||
<TableHeader className="sticky top-0 z-50 w-full border-b-2">
|
||||
<div className="absolute -top-2 left-0 w-full h-4 bg-table-header z-50"></div>
|
||||
{table.getHeaderGroups().map((headerGroup) => (
|
||||
<tr key={headerGroup.id}>
|
||||
{headerGroup.headers.map((header) => {
|
||||
|
||||
@@ -198,32 +198,19 @@ export function SystemsTableColumns(viewMode: "table" | "grid"): ColumnDef<Syste
|
||||
},
|
||||
{
|
||||
id: "loadAverage",
|
||||
accessorFn: ({ info }) => {
|
||||
const sum = info.la?.reduce((acc, curr) => acc + curr, 0)
|
||||
// TODO: remove this in future release in favor of la array
|
||||
if (!sum) {
|
||||
return (info.l1 ?? 0) + (info.l5 ?? 0) + (info.l15 ?? 0) || undefined
|
||||
}
|
||||
return sum || undefined
|
||||
},
|
||||
accessorFn: ({ info }) => info.la?.reduce((acc, curr) => acc + curr, 0),
|
||||
name: () => t({ message: "Load Avg", comment: "Short label for load average" }),
|
||||
size: 0,
|
||||
Icon: HourglassIcon,
|
||||
header: sortableHeader,
|
||||
cell(info: CellContext<SystemRecord, unknown>) {
|
||||
const { info: sysInfo, status } = info.row.original
|
||||
const { major, minor } = parseSemVer(sysInfo.v)
|
||||
const { colorWarn = 65, colorCrit = 90 } = useStore($userSettings, { keys: ["colorWarn", "colorCrit"] })
|
||||
// agent version
|
||||
const { minor, patch } = parseSemVer(sysInfo.v)
|
||||
let loadAverages = sysInfo.la
|
||||
|
||||
// use legacy load averages if agent version is less than 12.1.0
|
||||
if (!loadAverages || (minor === 12 && patch < 1)) {
|
||||
loadAverages = [sysInfo.l1 ?? 0, sysInfo.l5 ?? 0, sysInfo.l15 ?? 0]
|
||||
}
|
||||
const loadAverages = sysInfo.la || []
|
||||
|
||||
const max = Math.max(...loadAverages)
|
||||
if (max === 0 && (status === SystemStatus.Paused || minor < 12)) {
|
||||
if (max === 0 && (status === SystemStatus.Paused || (major < 1 && minor < 13))) {
|
||||
return null
|
||||
}
|
||||
|
||||
@@ -248,19 +235,20 @@ export function SystemsTableColumns(viewMode: "table" | "grid"): ColumnDef<Syste
|
||||
},
|
||||
},
|
||||
{
|
||||
accessorFn: ({ info }) => info.bb || (info.b || 0) * 1024 * 1024 || undefined,
|
||||
accessorFn: ({ info, status }) => (status !== SystemStatus.Up ? undefined : info.bb),
|
||||
id: "net",
|
||||
name: () => t`Net`,
|
||||
size: 0,
|
||||
Icon: EthernetIcon,
|
||||
header: sortableHeader,
|
||||
sortUndefined: "last",
|
||||
cell(info) {
|
||||
const sys = info.row.original
|
||||
const userSettings = useStore($userSettings, { keys: ["unitNet"] })
|
||||
if (sys.status === SystemStatus.Paused) {
|
||||
const val = info.getValue() as number | undefined
|
||||
if (val === undefined) {
|
||||
return null
|
||||
}
|
||||
const { value, unit } = formatBytes((info.getValue() || 0) as number, true, userSettings.unitNet, false)
|
||||
const userSettings = useStore($userSettings, { keys: ["unitNet"] })
|
||||
const { value, unit } = formatBytes(val, true, userSettings.unitNet, false)
|
||||
return (
|
||||
<span className="tabular-nums whitespace-nowrap">
|
||||
{decimalString(value, value >= 100 ? 1 : 2)} {unit}
|
||||
|
||||
@@ -391,6 +391,7 @@ function SystemsTableHead({ table }: { table: TableType<SystemRecord> }) {
|
||||
const { t } = useLingui()
|
||||
return (
|
||||
<TableHeader className="sticky top-0 z-50 w-full border-b-2">
|
||||
<div className="absolute -top-2 left-0 w-full h-4 bg-table-header z-50"></div>
|
||||
{table.getHeaderGroups().map((headerGroup) => (
|
||||
<tr key={headerGroup.id}>
|
||||
{headerGroup.headers.map((header) => {
|
||||
|
||||
@@ -185,3 +185,14 @@ export function PlugChargingIcon(props: SVGProps<SVGSVGElement>) {
|
||||
</svg>
|
||||
)
|
||||
}
|
||||
|
||||
// Lucide Icons (ISC) - used for ports
|
||||
export function SquareArrowRightEnterIcon(props: SVGProps<SVGSVGElement>) {
|
||||
return (
|
||||
<svg viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth="2" strokeLinecap="round" {...props}>
|
||||
<path d="m10 16 4-4-4-4" />
|
||||
<path d="M3 12h11" />
|
||||
<path d="M3 8V5a2 2 0 0 1 2-2h14a2 2 0 0 1 2 2v14a2 2 0 0 1-2 2H5a2 2 0 0 1-2-2v-3" />
|
||||
</svg>
|
||||
)
|
||||
}
|
||||
|
||||
@@ -3,6 +3,9 @@ import type { AlertMap, ChartTimes, SystemRecord, UserSettings } from "@/types"
|
||||
import { pb } from "./api"
|
||||
import { Unit } from "./enums"
|
||||
|
||||
/** Default layout width. Used as fallback when user setting is unset. */
|
||||
export const defaultLayoutWidth = 1580
|
||||
|
||||
/** Store if user is authenticated */
|
||||
export const $authenticated = atom(pb.authStore.isValid)
|
||||
|
||||
|
||||
@@ -14,7 +14,14 @@ import { Toaster } from "@/components/ui/toaster.tsx"
|
||||
import { alertManager } from "@/lib/alerts"
|
||||
import { pb, updateUserSettings } from "@/lib/api.ts"
|
||||
import { dynamicActivate, getLocale } from "@/lib/i18n"
|
||||
import { $authenticated, $copyContent, $direction, $publicKey, $userSettings } from "@/lib/stores.ts"
|
||||
import {
|
||||
$authenticated,
|
||||
$copyContent,
|
||||
$direction,
|
||||
$publicKey,
|
||||
$userSettings,
|
||||
defaultLayoutWidth,
|
||||
} from "@/lib/stores.ts"
|
||||
import * as systemsManager from "@/lib/systemsManager.ts"
|
||||
|
||||
const LoginPage = lazy(() => import("@/components/login/login.tsx"))
|
||||
@@ -100,7 +107,7 @@ const Layout = () => {
|
||||
<LoginPage />
|
||||
</Suspense>
|
||||
) : (
|
||||
<div style={{ "--container": `${userSettings.layoutWidth ?? 1580}px` } as React.CSSProperties}>
|
||||
<div style={{ "--container": `${userSettings.layoutWidth ?? defaultLayoutWidth}px` } as React.CSSProperties}>
|
||||
<div className="container">
|
||||
<Navbar />
|
||||
</div>
|
||||
|
||||
14
internal/site/src/types.d.ts
vendored
14
internal/site/src/types.d.ts
vendored
@@ -45,12 +45,6 @@ export interface SystemInfo {
|
||||
c: number
|
||||
/** cpu model */
|
||||
m: string
|
||||
/** load average 1 minute */
|
||||
l1?: number
|
||||
/** load average 5 minutes */
|
||||
l5?: number
|
||||
/** load average 15 minutes */
|
||||
l15?: number
|
||||
/** load average */
|
||||
la?: [number, number, number]
|
||||
/** operating system */
|
||||
@@ -94,13 +88,6 @@ export interface SystemStats {
|
||||
cpub?: number[]
|
||||
/** per-core cpu usage [CPU0..] (0-100 integers) */
|
||||
cpus?: number[]
|
||||
// TODO: remove these in future release in favor of la
|
||||
/** load average 1 minute */
|
||||
l1?: number
|
||||
/** load average 5 minutes */
|
||||
l5?: number
|
||||
/** load average 15 minutes */
|
||||
l15?: number
|
||||
/** load average */
|
||||
la?: [number, number, number]
|
||||
/** total memory (gb) */
|
||||
@@ -267,6 +254,7 @@ export interface ContainerRecord extends RecordModel {
|
||||
system: string
|
||||
name: string
|
||||
image: string
|
||||
ports: string
|
||||
cpu: number
|
||||
memory: number
|
||||
net: number
|
||||
|
||||
@@ -98,7 +98,7 @@ func ClearCollection(t testing.TB, app core.App, collectionName string) error {
|
||||
}
|
||||
|
||||
func (h *TestHub) Cleanup() {
|
||||
h.GetAlertManager().StopWorker()
|
||||
h.GetAlertManager().Stop()
|
||||
h.GetSystemManager().RemoveAllSystems()
|
||||
h.TestApp.Cleanup()
|
||||
}
|
||||
|
||||
@@ -51,7 +51,7 @@ The [quick start guide](https://beszel.dev/guide/getting-started) and other docu
|
||||
- **GPU usage / power draw** - Nvidia, AMD, and Intel.
|
||||
- **Battery** - Host system battery charge.
|
||||
- **Containers** - Status and metrics of all running Docker / Podman containers.
|
||||
- **S.M.A.R.T.** - Host system disk health (includes eMMC wear/EOL via Linux sysfs when available).
|
||||
- **S.M.A.R.T.** - Host system disk health (includes eMMC wear/EOL and Linux mdraid array health via sysfs when available).
|
||||
|
||||
## Help and discussion
|
||||
|
||||
|
||||
@@ -374,7 +374,7 @@ else
|
||||
fi
|
||||
|
||||
# Stop existing service if it exists (for upgrades)
|
||||
if [ -f "$BIN_PATH" ]; then
|
||||
if [ "$UNINSTALL" != true ] && [ -f "$BIN_PATH" ]; then
|
||||
echo "Existing installation detected. Stopping service for upgrade..."
|
||||
if is_alpine; then
|
||||
rc-service beszel-agent stop 2>/dev/null || true
|
||||
@@ -451,7 +451,7 @@ if [ "$UNINSTALL" = true ]; then
|
||||
else
|
||||
echo "Stopping and disabling the agent service..."
|
||||
systemctl stop beszel-agent.service
|
||||
systemctl disable beszel-agent.service
|
||||
systemctl disable beszel-agent.service >/dev/null 2>&1
|
||||
|
||||
echo "Removing the systemd service file..."
|
||||
rm /etc/systemd/system/beszel-agent.service
|
||||
@@ -459,7 +459,7 @@ if [ "$UNINSTALL" = true ]; then
|
||||
# Remove the update timer and service if they exist
|
||||
echo "Removing the daily update service and timer..."
|
||||
systemctl stop beszel-agent-update.timer 2>/dev/null
|
||||
systemctl disable beszel-agent-update.timer 2>/dev/null
|
||||
systemctl disable beszel-agent-update.timer >/dev/null 2>&1
|
||||
rm -f /etc/systemd/system/beszel-agent-update.service
|
||||
rm -f /etc/systemd/system/beszel-agent-update.timer
|
||||
|
||||
@@ -549,14 +549,14 @@ else
|
||||
fi
|
||||
|
||||
# Create a dedicated user for the service if it doesn't exist
|
||||
echo "Creating a dedicated user for the Beszel Agent service..."
|
||||
echo "Configuring the dedicated user for the Beszel Agent service..."
|
||||
if is_alpine; then
|
||||
if ! id -u beszel >/dev/null 2>&1; then
|
||||
addgroup beszel
|
||||
adduser -S -D -H -s /sbin/nologin -G beszel beszel
|
||||
fi
|
||||
# Add the user to the docker group to allow access to the Docker socket if group docker exists
|
||||
if getent group docker; then
|
||||
if getent group docker >/dev/null 2>&1; then
|
||||
echo "Adding beszel to docker group"
|
||||
addgroup beszel docker
|
||||
fi
|
||||
@@ -604,12 +604,12 @@ else
|
||||
useradd --system --home-dir /nonexistent --shell /bin/false beszel
|
||||
fi
|
||||
# Add the user to the docker group to allow access to the Docker socket if group docker exists
|
||||
if getent group docker; then
|
||||
if getent group docker >/dev/null 2>&1; then
|
||||
echo "Adding beszel to docker group"
|
||||
usermod -aG docker beszel
|
||||
fi
|
||||
# Add the user to the disk group to allow access to disk devices if group disk exists
|
||||
if getent group disk; then
|
||||
if getent group disk >/dev/null 2>&1; then
|
||||
echo "Adding beszel to disk group"
|
||||
usermod -aG disk beszel
|
||||
fi
|
||||
@@ -629,7 +629,6 @@ if [ ! -d "$BIN_DIR" ]; then
|
||||
fi
|
||||
|
||||
# Download and install the Beszel Agent
|
||||
echo "Downloading and installing the agent..."
|
||||
|
||||
OS=$(uname -s | sed -e 'y/ABCDEFGHIJKLMNOPQRSTUVWXYZ/abcdefghijklmnopqrstuvwxyz/')
|
||||
ARCH=$(detect_architecture)
|
||||
@@ -656,19 +655,29 @@ else
|
||||
INSTALL_VERSION=$(echo "$INSTALL_VERSION" | sed 's/^v//')
|
||||
fi
|
||||
|
||||
echo "Downloading and installing agent version ${INSTALL_VERSION} from ${GITHUB_URL} ..."
|
||||
echo "Downloading beszel-agent v${INSTALL_VERSION}..."
|
||||
|
||||
# Download checksums file
|
||||
TEMP_DIR=$(mktemp -d)
|
||||
cd "$TEMP_DIR" || exit 1
|
||||
CHECKSUM=$(curl -sL "$GITHUB_URL/henrygd/beszel/releases/download/v${INSTALL_VERSION}/beszel_${INSTALL_VERSION}_checksums.txt" | grep "$FILE_NAME" | cut -d' ' -f1)
|
||||
CHECKSUM=$(curl -fsSL "$GITHUB_URL/henrygd/beszel/releases/download/v${INSTALL_VERSION}/beszel_${INSTALL_VERSION}_checksums.txt" | grep "$FILE_NAME" | cut -d' ' -f1)
|
||||
if [ -z "$CHECKSUM" ] || ! echo "$CHECKSUM" | grep -qE "^[a-fA-F0-9]{64}$"; then
|
||||
echo "Failed to get checksum or invalid checksum format"
|
||||
echo "Try again with --mirror (or --mirror <url>) if GitHub is not reachable."
|
||||
rm -rf "$TEMP_DIR"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if ! curl -#L "$GITHUB_URL/henrygd/beszel/releases/download/v${INSTALL_VERSION}/$FILE_NAME" -o "$FILE_NAME"; then
|
||||
echo "Failed to download the agent from ""$GITHUB_URL/henrygd/beszel/releases/download/v${INSTALL_VERSION}/$FILE_NAME"
|
||||
if ! curl -fL# --retry 3 --retry-delay 2 --connect-timeout 10 "$GITHUB_URL/henrygd/beszel/releases/download/v${INSTALL_VERSION}/$FILE_NAME" -o "$FILE_NAME"; then
|
||||
echo "Failed to download the agent from $GITHUB_URL/henrygd/beszel/releases/download/v${INSTALL_VERSION}/$FILE_NAME"
|
||||
echo "Try again with --mirror (or --mirror <url>) if GitHub is not reachable."
|
||||
rm -rf "$TEMP_DIR"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if ! tar -tzf "$FILE_NAME" >/dev/null 2>&1; then
|
||||
echo "Downloaded archive is invalid or incomplete (possible network/proxy issue)."
|
||||
echo "Try again with --mirror (or --mirror <url>) if the download path is unstable."
|
||||
rm -rf "$TEMP_DIR"
|
||||
exit 1
|
||||
fi
|
||||
@@ -685,6 +694,12 @@ if ! tar -xzf "$FILE_NAME" beszel-agent; then
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ ! -s "$TEMP_DIR/beszel-agent" ]; then
|
||||
echo "Downloaded binary is missing or empty."
|
||||
rm -rf "$TEMP_DIR"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ -f "$BIN_PATH" ]; then
|
||||
echo "Backing up existing binary..."
|
||||
cp "$BIN_PATH" "$BIN_PATH.bak"
|
||||
@@ -871,6 +886,8 @@ EOF
|
||||
|
||||
elif is_freebsd; then
|
||||
echo "Checking for existing FreeBSD service configuration..."
|
||||
# Ensure rc.d directory exists on minimal FreeBSD installs
|
||||
mkdir -p /usr/local/etc/rc.d
|
||||
|
||||
# Create environment configuration file with proper permissions if it doesn't exist
|
||||
if [ ! -f "$AGENT_DIR/env" ]; then
|
||||
@@ -989,7 +1006,7 @@ EOF
|
||||
# Load and start the service
|
||||
printf "\nLoading and starting the agent service...\n"
|
||||
systemctl daemon-reload
|
||||
systemctl enable beszel-agent.service
|
||||
systemctl enable beszel-agent.service >/dev/null 2>&1
|
||||
systemctl restart beszel-agent.service
|
||||
|
||||
|
||||
@@ -1035,7 +1052,7 @@ WantedBy=timers.target
|
||||
EOF
|
||||
|
||||
systemctl daemon-reload
|
||||
systemctl enable --now beszel-agent-update.timer
|
||||
systemctl enable --now beszel-agent-update.timer >/dev/null 2>&1
|
||||
|
||||
printf "\nDaily updates have been enabled.\n"
|
||||
;;
|
||||
|
||||
@@ -156,7 +156,7 @@ fi
|
||||
|
||||
# Define default values
|
||||
PORT=8090
|
||||
GITHUB_PROXY_URL="https://ghfast.top/"
|
||||
GITHUB_URL="https://github.com"
|
||||
AUTO_UPDATE_FLAG="false"
|
||||
UNINSTALL=false
|
||||
|
||||
@@ -173,7 +173,7 @@ while [ $# -gt 0 ]; do
|
||||
printf "Options: \n"
|
||||
printf " -u : Uninstall the Beszel Hub\n"
|
||||
printf " -p <port> : Specify a port number (default: 8090)\n"
|
||||
printf " -c <url> : Use a custom GitHub mirror URL (e.g., https://ghfast.top/)\n"
|
||||
printf " -c, --mirror [URL] : Use a GitHub mirror/proxy URL (default: https://gh.beszel.dev)\n"
|
||||
printf " --auto-update : Enable automatic daily updates (disabled by default)\n"
|
||||
printf " -h, --help : Display this help message\n"
|
||||
exit 0
|
||||
@@ -183,10 +183,14 @@ while [ $# -gt 0 ]; do
|
||||
PORT="$1"
|
||||
shift
|
||||
;;
|
||||
-c)
|
||||
shift
|
||||
GITHUB_PROXY_URL=$(ensure_trailing_slash "$1")
|
||||
-c | --mirror)
|
||||
shift
|
||||
if [ -n "$1" ] && ! echo "$1" | grep -q '^-'; then
|
||||
GITHUB_URL="$(ensure_trailing_slash "$1")https://github.com"
|
||||
shift
|
||||
else
|
||||
GITHUB_URL="https://gh.beszel.dev"
|
||||
fi
|
||||
;;
|
||||
--auto-update)
|
||||
AUTO_UPDATE_FLAG="true"
|
||||
@@ -199,9 +203,6 @@ while [ $# -gt 0 ]; do
|
||||
esac
|
||||
done
|
||||
|
||||
# Ensure the proxy URL ends with a /
|
||||
GITHUB_PROXY_URL=$(ensure_trailing_slash "$GITHUB_PROXY_URL")
|
||||
|
||||
# Set paths based on operating system
|
||||
if is_freebsd; then
|
||||
HUB_DIR="/usr/local/etc/beszel"
|
||||
@@ -323,10 +324,41 @@ OS=$(uname -s | tr '[:upper:]' '[:lower:]')
|
||||
ARCH=$(detect_architecture)
|
||||
FILE_NAME="beszel_${OS}_${ARCH}.tar.gz"
|
||||
|
||||
curl -sL "${GITHUB_PROXY_URL}https://github.com/henrygd/beszel/releases/latest/download/$FILE_NAME" | tar -xz -O beszel | tee ./beszel >/dev/null
|
||||
chmod +x ./beszel
|
||||
mv ./beszel "$BIN_PATH"
|
||||
TEMP_DIR=$(mktemp -d)
|
||||
ARCHIVE_PATH="$TEMP_DIR/$FILE_NAME"
|
||||
DOWNLOAD_URL="$GITHUB_URL/henrygd/beszel/releases/latest/download/$FILE_NAME"
|
||||
|
||||
if ! curl -fL# --retry 3 --retry-delay 2 --connect-timeout 10 "$DOWNLOAD_URL" -o "$ARCHIVE_PATH"; then
|
||||
echo "Failed to download the Beszel Hub from:"
|
||||
echo "$DOWNLOAD_URL"
|
||||
echo "Try again with --mirror (or --mirror <url>) if GitHub is not reachable."
|
||||
rm -rf "$TEMP_DIR"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if ! tar -tzf "$ARCHIVE_PATH" >/dev/null 2>&1; then
|
||||
echo "Downloaded archive is invalid or incomplete (possible network/proxy issue)."
|
||||
echo "Try again with --mirror (or --mirror <url>) if the download path is unstable."
|
||||
rm -rf "$TEMP_DIR"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if ! tar -xzf "$ARCHIVE_PATH" -C "$TEMP_DIR" beszel; then
|
||||
echo "Failed to extract beszel from archive."
|
||||
rm -rf "$TEMP_DIR"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ ! -s "$TEMP_DIR/beszel" ]; then
|
||||
echo "Downloaded binary is missing or empty."
|
||||
rm -rf "$TEMP_DIR"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
chmod +x "$TEMP_DIR/beszel"
|
||||
mv "$TEMP_DIR/beszel" "$BIN_PATH"
|
||||
chown beszel:beszel "$BIN_PATH"
|
||||
rm -rf "$TEMP_DIR"
|
||||
|
||||
if is_freebsd; then
|
||||
echo "Creating FreeBSD rc service..."
|
||||
@@ -375,8 +407,8 @@ EOF
|
||||
|
||||
else
|
||||
# Original systemd service installation code
|
||||
printf "Creating the systemd service for the Beszel Hub...\n\n"
|
||||
tee /etc/systemd/system/beszel-hub.service <<EOF
|
||||
printf "Creating the systemd service for the Beszel Hub...\n"
|
||||
cat >/etc/systemd/system/beszel-hub.service <<EOF
|
||||
[Unit]
|
||||
Description=Beszel Hub Service
|
||||
After=network.target
|
||||
@@ -393,10 +425,10 @@ WantedBy=multi-user.target
|
||||
EOF
|
||||
|
||||
# Load and start the service
|
||||
printf "\nLoading and starting the Beszel Hub service...\n"
|
||||
printf "Loading and starting the Beszel Hub service...\n"
|
||||
systemctl daemon-reload
|
||||
systemctl enable beszel-hub.service
|
||||
systemctl start beszel-hub.service
|
||||
systemctl enable --quiet beszel-hub.service
|
||||
systemctl start --quiet beszel-hub.service
|
||||
|
||||
# Wait for the service to start or fail
|
||||
sleep 2
|
||||
@@ -444,4 +476,4 @@ EOF
|
||||
fi
|
||||
fi
|
||||
|
||||
echo "The Beszel Hub has been installed and configured successfully! It is now accessible on port $PORT."
|
||||
printf "\n\033[32mBeszel Hub has been installed successfully! It is now accessible on port $PORT.\033[0m\n"
|
||||
|
||||
Reference in New Issue
Block a user