Compare commits

...

9 Commits

Author SHA1 Message Date
henrygd
ed50367f70 fix(agent): add fallback for podman container health (#1475) 2026-03-15 17:59:59 -04:00
henrygd
4ebe869591 ui: virtualize smart table 2026-03-15 15:20:07 -04:00
henrygd
c9bbbe91f2 ui: improve table col widths and hide text showing above header 2026-03-15 14:59:25 -04:00
henrygd
5bfe4f6970 agent: include ip in container port if not 0.0.0.0 or :: 2026-03-15 14:58:21 -04:00
henrygd
380d2b1091 add ports column to containers table (#1481) 2026-03-14 19:29:39 -04:00
henrygd
a7f99e7a8c agent: support new Docker API Health field (#1475) 2026-03-14 15:26:44 -04:00
henrygd
bd94a9d142 agent: improve disk discovery / IO mapping and add tests (#1811) 2026-03-13 16:03:27 -04:00
henrygd
8e2316f845 refactor: simplify/improve status alert handling (#1519)
also adds new functionality to restore any pending down alerts
that were lost by hub restart before creation
2026-03-12 15:53:40 -04:00
Sven van Ginkel
0d3dfcb207 fix(hub): check if status alert is triggered before sending up alert (#1806) 2026-03-12 13:38:42 -04:00
21 changed files with 2158 additions and 524 deletions

View File

@@ -14,6 +14,25 @@ import (
"github.com/shirou/gopsutil/v4/disk" "github.com/shirou/gopsutil/v4/disk"
) )
// fsRegistrationContext holds the shared lookup state needed to resolve a
// filesystem into the tracked fsStats key and metadata.
type fsRegistrationContext struct {
filesystem string // value of optional FILESYSTEM env var
isWindows bool
efPath string // path to extra filesystems (default "/extra-filesystems")
diskIoCounters map[string]disk.IOCountersStat
}
// diskDiscovery groups the transient state for a single initializeDiskInfo run so
// helper methods can share the same partitions, mount paths, and lookup functions
type diskDiscovery struct {
agent *Agent
rootMountPoint string
partitions []disk.PartitionStat
usageFn func(string) (*disk.UsageStat, error)
ctx fsRegistrationContext
}
// parseFilesystemEntry parses a filesystem entry in the format "device__customname" // parseFilesystemEntry parses a filesystem entry in the format "device__customname"
// Returns the device/filesystem part and the custom name part // Returns the device/filesystem part and the custom name part
func parseFilesystemEntry(entry string) (device, customName string) { func parseFilesystemEntry(entry string) (device, customName string) {
@@ -27,19 +46,230 @@ func parseFilesystemEntry(entry string) (device, customName string) {
return device, customName return device, customName
} }
// extraFilesystemPartitionInfo derives the I/O device and optional display name
// for a mounted /extra-filesystems partition. Prefer the partition device reported
// by the system and only use the folder name for custom naming metadata.
func extraFilesystemPartitionInfo(p disk.PartitionStat) (device, customName string) {
device = strings.TrimSpace(p.Device)
folderDevice, customName := parseFilesystemEntry(filepath.Base(p.Mountpoint))
if device == "" {
device = folderDevice
}
return device, customName
}
func isDockerSpecialMountpoint(mountpoint string) bool { func isDockerSpecialMountpoint(mountpoint string) bool {
switch mountpoint { switch mountpoint {
case "/etc/hosts", "/etc/resolv.conf", "/etc/hostname": case "/etc/hosts", "/etc/resolv.conf", "/etc/hostname":
return true return true
default: }
return false
}
// registerFilesystemStats resolves the tracked key and stats payload for a
// filesystem before it is inserted into fsStats.
func registerFilesystemStats(existing map[string]*system.FsStats, device, mountpoint string, root bool, customName string, ctx fsRegistrationContext) (string, *system.FsStats, bool) {
key := device
if !ctx.isWindows {
key = filepath.Base(device)
}
if root {
// Try to map root device to a diskIoCounters entry. First checks for an
// exact key match, then uses findIoDevice for normalized / prefix-based
// matching (e.g. nda0p2 -> nda0), and finally falls back to FILESYSTEM.
if _, ioMatch := ctx.diskIoCounters[key]; !ioMatch {
if matchedKey, match := findIoDevice(key, ctx.diskIoCounters); match {
key = matchedKey
} else if ctx.filesystem != "" {
if matchedKey, match := findIoDevice(ctx.filesystem, ctx.diskIoCounters); match {
key = matchedKey
}
}
if _, ioMatch = ctx.diskIoCounters[key]; !ioMatch {
slog.Warn("Root I/O unmapped; set FILESYSTEM", "device", device, "mountpoint", mountpoint)
}
}
} else {
// Check if non-root has diskstats and prefer the folder device for
// /extra-filesystems mounts when the discovered partition device is a
// mapper path (e.g. luks UUID) that obscures the underlying block device.
if _, ioMatch := ctx.diskIoCounters[key]; !ioMatch {
if strings.HasPrefix(mountpoint, ctx.efPath) {
folderDevice, _ := parseFilesystemEntry(filepath.Base(mountpoint))
if folderDevice != "" {
if matchedKey, match := findIoDevice(folderDevice, ctx.diskIoCounters); match {
key = matchedKey
}
}
}
if _, ioMatch = ctx.diskIoCounters[key]; !ioMatch {
if matchedKey, match := findIoDevice(key, ctx.diskIoCounters); match {
key = matchedKey
}
}
}
}
if _, exists := existing[key]; exists {
return "", nil, false
}
fsStats := &system.FsStats{Root: root, Mountpoint: mountpoint}
if customName != "" {
fsStats.Name = customName
}
return key, fsStats, true
}
// addFsStat inserts a discovered filesystem if it resolves to a new tracking
// key. The key selection itself lives in buildFsStatRegistration so that logic
// can stay directly unit-tested.
func (d *diskDiscovery) addFsStat(device, mountpoint string, root bool, customName string) {
key, fsStats, ok := registerFilesystemStats(d.agent.fsStats, device, mountpoint, root, customName, d.ctx)
if !ok {
return
}
d.agent.fsStats[key] = fsStats
name := key
if customName != "" {
name = customName
}
slog.Info("Detected disk", "name", name, "device", device, "mount", mountpoint, "io", key, "root", root)
}
// addConfiguredRootFs resolves FILESYSTEM against partitions first, then falls
// back to direct diskstats matching for setups like ZFS where partitions do not
// expose the physical device name.
func (d *diskDiscovery) addConfiguredRootFs() bool {
if d.ctx.filesystem == "" {
return false return false
} }
for _, p := range d.partitions {
if filesystemMatchesPartitionSetting(d.ctx.filesystem, p) {
d.addFsStat(p.Device, p.Mountpoint, true, "")
return true
}
}
// FILESYSTEM may name a physical disk absent from partitions (e.g. ZFS lists
// dataset paths like zroot/ROOT/default, not block devices).
if ioKey, match := findIoDevice(d.ctx.filesystem, d.ctx.diskIoCounters); match {
d.agent.fsStats[ioKey] = &system.FsStats{Root: true, Mountpoint: d.rootMountPoint}
return true
}
slog.Warn("Partition details not found", "filesystem", d.ctx.filesystem)
return false
}
func isRootFallbackPartition(p disk.PartitionStat, rootMountPoint string) bool {
return p.Mountpoint == rootMountPoint ||
(isDockerSpecialMountpoint(p.Mountpoint) && strings.HasPrefix(p.Device, "/dev"))
}
// addPartitionRootFs handles the non-configured root fallback path when a
// partition looks like the active root mount but still needs translating to an
// I/O device key.
func (d *diskDiscovery) addPartitionRootFs(device, mountpoint string) bool {
fs, match := findIoDevice(filepath.Base(device), d.ctx.diskIoCounters)
if !match {
return false
}
// The resolved I/O device is already known here, so use it directly to avoid
// a second fallback search inside buildFsStatRegistration.
d.addFsStat(fs, mountpoint, true, "")
return true
}
// addLastResortRootFs is only used when neither FILESYSTEM nor partition-based
// heuristics can identify root, so it picks the busiest I/O device as a final
// fallback and preserves the root mountpoint for usage collection.
func (d *diskDiscovery) addLastResortRootFs() {
rootKey := mostActiveIoDevice(d.ctx.diskIoCounters)
if rootKey != "" {
slog.Warn("Using most active device for root I/O; set FILESYSTEM to override", "device", rootKey)
} else {
rootKey = filepath.Base(d.rootMountPoint)
if _, exists := d.agent.fsStats[rootKey]; exists {
rootKey = "root"
}
slog.Warn("Root I/O device not detected; set FILESYSTEM to override")
}
d.agent.fsStats[rootKey] = &system.FsStats{Root: true, Mountpoint: d.rootMountPoint}
}
// findPartitionByFilesystemSetting matches an EXTRA_FILESYSTEMS entry against a
// discovered partition either by mountpoint or by device suffix.
func findPartitionByFilesystemSetting(filesystem string, partitions []disk.PartitionStat) (disk.PartitionStat, bool) {
for _, p := range partitions {
if strings.HasSuffix(p.Device, filesystem) || p.Mountpoint == filesystem {
return p, true
}
}
return disk.PartitionStat{}, false
}
// addConfiguredExtraFsEntry resolves one EXTRA_FILESYSTEMS entry, preferring a
// discovered partition and falling back to any path that disk.Usage accepts.
func (d *diskDiscovery) addConfiguredExtraFsEntry(filesystem, customName string) {
if p, found := findPartitionByFilesystemSetting(filesystem, d.partitions); found {
d.addFsStat(p.Device, p.Mountpoint, false, customName)
return
}
if _, err := d.usageFn(filesystem); err == nil {
d.addFsStat(filepath.Base(filesystem), filesystem, false, customName)
return
} else {
slog.Error("Invalid filesystem", "name", filesystem, "err", err)
}
}
// addConfiguredExtraFilesystems parses and registers the comma-separated
// EXTRA_FILESYSTEMS env var entries.
func (d *diskDiscovery) addConfiguredExtraFilesystems(extraFilesystems string) {
for fsEntry := range strings.SplitSeq(extraFilesystems, ",") {
filesystem, customName := parseFilesystemEntry(fsEntry)
d.addConfiguredExtraFsEntry(filesystem, customName)
}
}
// addPartitionExtraFs registers partitions mounted under /extra-filesystems so
// their display names can come from the folder name while their I/O keys still
// prefer the underlying partition device.
func (d *diskDiscovery) addPartitionExtraFs(p disk.PartitionStat) {
if !strings.HasPrefix(p.Mountpoint, d.ctx.efPath) {
return
}
device, customName := extraFilesystemPartitionInfo(p)
d.addFsStat(device, p.Mountpoint, false, customName)
}
// addExtraFilesystemFolders handles bare directories under /extra-filesystems
// that may not appear in partition discovery, while skipping mountpoints that
// were already registered from higher-fidelity sources.
func (d *diskDiscovery) addExtraFilesystemFolders(folderNames []string) {
existingMountpoints := make(map[string]bool, len(d.agent.fsStats))
for _, stats := range d.agent.fsStats {
existingMountpoints[stats.Mountpoint] = true
}
for _, folderName := range folderNames {
mountpoint := filepath.Join(d.ctx.efPath, folderName)
slog.Debug("/extra-filesystems", "mountpoint", mountpoint)
if existingMountpoints[mountpoint] {
continue
}
device, customName := parseFilesystemEntry(folderName)
d.addFsStat(device, mountpoint, false, customName)
}
} }
// Sets up the filesystems to monitor for disk usage and I/O. // Sets up the filesystems to monitor for disk usage and I/O.
func (a *Agent) initializeDiskInfo() { func (a *Agent) initializeDiskInfo() {
filesystem, _ := utils.GetEnv("FILESYSTEM") filesystem, _ := utils.GetEnv("FILESYSTEM")
efPath := "/extra-filesystems"
hasRoot := false hasRoot := false
isWindows := runtime.GOOS == "windows" isWindows := runtime.GOOS == "windows"
@@ -56,167 +286,57 @@ func (a *Agent) initializeDiskInfo() {
} }
} }
// ioContext := context.WithValue(a.sensorsContext,
// common.EnvKey, common.EnvMap{common.HostProcEnvKey: "/tmp/testproc"},
// )
// diskIoCounters, err := disk.IOCountersWithContext(ioContext)
diskIoCounters, err := disk.IOCounters() diskIoCounters, err := disk.IOCounters()
if err != nil { if err != nil {
slog.Error("Error getting diskstats", "err", err) slog.Error("Error getting diskstats", "err", err)
} }
slog.Debug("Disk I/O", "diskstats", diskIoCounters) slog.Debug("Disk I/O", "diskstats", diskIoCounters)
ctx := fsRegistrationContext{
// Helper function to add a filesystem to fsStats if it doesn't exist filesystem: filesystem,
addFsStat := func(device, mountpoint string, root bool, customName ...string) { isWindows: isWindows,
var key string diskIoCounters: diskIoCounters,
if isWindows { efPath: "/extra-filesystems",
key = device
} else {
key = filepath.Base(device)
}
var ioMatch bool
if _, exists := a.fsStats[key]; !exists {
if root {
slog.Info("Detected root device", "name", key)
// Try to map root device to a diskIoCounters entry. First
// checks for an exact key match, then uses findIoDevice for
// normalized / prefix-based matching (e.g. nda0p2 → nda0),
// and finally falls back to the FILESYSTEM env var.
if _, ioMatch = diskIoCounters[key]; !ioMatch {
if matchedKey, match := findIoDevice(key, diskIoCounters); match {
key = matchedKey
ioMatch = true
} else if filesystem != "" {
if matchedKey, match := findIoDevice(filesystem, diskIoCounters); match {
key = matchedKey
ioMatch = true
}
}
if !ioMatch {
slog.Warn("Root I/O unmapped; set FILESYSTEM", "device", device, "mountpoint", mountpoint)
}
}
} else {
// Check if non-root has diskstats and fall back to folder name if not
// Scenario: device is encrypted and named luks-2bcb02be-999d-4417-8d18-5c61e660fb6e - not in /proc/diskstats.
// However, the device can be specified by mounting folder from luks device at /extra-filesystems/sda1
if _, ioMatch = diskIoCounters[key]; !ioMatch {
efBase := filepath.Base(mountpoint)
if _, ioMatch = diskIoCounters[efBase]; ioMatch {
key = efBase
}
}
}
fsStats := &system.FsStats{Root: root, Mountpoint: mountpoint}
if len(customName) > 0 && customName[0] != "" {
fsStats.Name = customName[0]
}
a.fsStats[key] = fsStats
}
} }
// Get the appropriate root mount point for this system // Get the appropriate root mount point for this system
rootMountPoint := a.getRootMountPoint() discovery := diskDiscovery{
agent: a,
// Use FILESYSTEM env var to find root filesystem rootMountPoint: a.getRootMountPoint(),
if filesystem != "" { partitions: partitions,
for _, p := range partitions { usageFn: disk.Usage,
if filesystemMatchesPartitionSetting(filesystem, p) { ctx: ctx,
addFsStat(p.Device, p.Mountpoint, true)
hasRoot = true
break
}
}
if !hasRoot {
// FILESYSTEM may name a physical disk absent from partitions (e.g.
// ZFS lists dataset paths like zroot/ROOT/default, not block devices).
// Try matching directly against diskIoCounters.
if ioKey, match := findIoDevice(filesystem, diskIoCounters); match {
a.fsStats[ioKey] = &system.FsStats{Root: true, Mountpoint: rootMountPoint}
hasRoot = true
} else {
slog.Warn("Partition details not found", "filesystem", filesystem)
}
}
} }
hasRoot = discovery.addConfiguredRootFs()
// Add EXTRA_FILESYSTEMS env var values to fsStats // Add EXTRA_FILESYSTEMS env var values to fsStats
if extraFilesystems, exists := utils.GetEnv("EXTRA_FILESYSTEMS"); exists { if extraFilesystems, exists := utils.GetEnv("EXTRA_FILESYSTEMS"); exists {
for fsEntry := range strings.SplitSeq(extraFilesystems, ",") { discovery.addConfiguredExtraFilesystems(extraFilesystems)
// Parse custom name from format: device__customname
fs, customName := parseFilesystemEntry(fsEntry)
found := false
for _, p := range partitions {
if strings.HasSuffix(p.Device, fs) || p.Mountpoint == fs {
addFsStat(p.Device, p.Mountpoint, false, customName)
found = true
break
}
}
// if not in partitions, test if we can get disk usage
if !found {
if _, err := disk.Usage(fs); err == nil {
addFsStat(filepath.Base(fs), fs, false, customName)
} else {
slog.Error("Invalid filesystem", "name", fs, "err", err)
}
}
}
} }
// Process partitions for various mount points // Process partitions for various mount points
for _, p := range partitions { for _, p := range partitions {
// fmt.Println(p.Device, p.Mountpoint) if !hasRoot && isRootFallbackPartition(p, discovery.rootMountPoint) {
// Binary root fallback or docker root fallback hasRoot = discovery.addPartitionRootFs(p.Device, p.Mountpoint)
if !hasRoot && (p.Mountpoint == rootMountPoint || (isDockerSpecialMountpoint(p.Mountpoint) && strings.HasPrefix(p.Device, "/dev"))) {
fs, match := findIoDevice(filepath.Base(p.Device), diskIoCounters)
if match {
addFsStat(fs, p.Mountpoint, true)
hasRoot = true
}
}
// Check if device is in /extra-filesystems
if strings.HasPrefix(p.Mountpoint, efPath) {
device, customName := parseFilesystemEntry(p.Mountpoint)
addFsStat(device, p.Mountpoint, false, customName)
} }
discovery.addPartitionExtraFs(p)
} }
// Check all folders in /extra-filesystems and add them if not already present // Check all folders in /extra-filesystems and add them if not already present
if folders, err := os.ReadDir(efPath); err == nil { if folders, err := os.ReadDir(discovery.ctx.efPath); err == nil {
existingMountpoints := make(map[string]bool) folderNames := make([]string, 0, len(folders))
for _, stats := range a.fsStats {
existingMountpoints[stats.Mountpoint] = true
}
for _, folder := range folders { for _, folder := range folders {
if folder.IsDir() { if folder.IsDir() {
mountpoint := filepath.Join(efPath, folder.Name()) folderNames = append(folderNames, folder.Name())
slog.Debug("/extra-filesystems", "mountpoint", mountpoint)
if !existingMountpoints[mountpoint] {
device, customName := parseFilesystemEntry(folder.Name())
addFsStat(device, mountpoint, false, customName)
}
} }
} }
discovery.addExtraFilesystemFolders(folderNames)
} }
// If no root filesystem set, try the most active I/O device as a last // If no root filesystem set, try the most active I/O device as a last
// resort (e.g. ZFS where dataset names are unrelated to disk names). // resort (e.g. ZFS where dataset names are unrelated to disk names).
if !hasRoot { if !hasRoot {
rootKey := mostActiveIoDevice(diskIoCounters) discovery.addLastResortRootFs()
if rootKey != "" {
slog.Warn("Using most active device for root I/O; set FILESYSTEM to override", "device", rootKey)
} else {
rootKey = filepath.Base(rootMountPoint)
if _, exists := a.fsStats[rootKey]; exists {
rootKey = "root"
}
slog.Warn("Root I/O device not detected; set FILESYSTEM to override")
}
a.fsStats[rootKey] = &system.FsStats{Root: true, Mountpoint: rootMountPoint}
} }
a.pruneDuplicateRootExtraFilesystems() a.pruneDuplicateRootExtraFilesystems()
@@ -381,6 +501,8 @@ func normalizeDeviceName(value string) string {
// Sets start values for disk I/O stats. // Sets start values for disk I/O stats.
func (a *Agent) initializeDiskIoStats(diskIoCounters map[string]disk.IOCountersStat) { func (a *Agent) initializeDiskIoStats(diskIoCounters map[string]disk.IOCountersStat) {
a.fsNames = a.fsNames[:0]
now := time.Now()
for device, stats := range a.fsStats { for device, stats := range a.fsStats {
// skip if not in diskIoCounters // skip if not in diskIoCounters
d, exists := diskIoCounters[device] d, exists := diskIoCounters[device]
@@ -389,7 +511,7 @@ func (a *Agent) initializeDiskIoStats(diskIoCounters map[string]disk.IOCountersS
continue continue
} }
// populate initial values // populate initial values
stats.Time = time.Now() stats.Time = now
stats.TotalRead = d.ReadBytes stats.TotalRead = d.ReadBytes
stats.TotalWrite = d.WriteBytes stats.TotalWrite = d.WriteBytes
// add to list of valid io device names // add to list of valid io device names

View File

@@ -93,6 +93,443 @@ func TestParseFilesystemEntry(t *testing.T) {
} }
} }
func TestExtraFilesystemPartitionInfo(t *testing.T) {
t.Run("uses partition device for label-only mountpoint", func(t *testing.T) {
device, customName := extraFilesystemPartitionInfo(disk.PartitionStat{
Device: "/dev/sdc",
Mountpoint: "/extra-filesystems/Share",
})
assert.Equal(t, "/dev/sdc", device)
assert.Equal(t, "", customName)
})
t.Run("uses custom name from mountpoint suffix", func(t *testing.T) {
device, customName := extraFilesystemPartitionInfo(disk.PartitionStat{
Device: "/dev/sdc",
Mountpoint: "/extra-filesystems/sdc__Share",
})
assert.Equal(t, "/dev/sdc", device)
assert.Equal(t, "Share", customName)
})
t.Run("falls back to folder device when partition device is unavailable", func(t *testing.T) {
device, customName := extraFilesystemPartitionInfo(disk.PartitionStat{
Mountpoint: "/extra-filesystems/sdc__Share",
})
assert.Equal(t, "sdc", device)
assert.Equal(t, "Share", customName)
})
t.Run("supports custom name without folder device prefix", func(t *testing.T) {
device, customName := extraFilesystemPartitionInfo(disk.PartitionStat{
Device: "/dev/sdc",
Mountpoint: "/extra-filesystems/__Share",
})
assert.Equal(t, "/dev/sdc", device)
assert.Equal(t, "Share", customName)
})
}
func TestBuildFsStatRegistration(t *testing.T) {
t.Run("uses basename for non-windows exact io match", func(t *testing.T) {
key, stats, ok := registerFilesystemStats(
map[string]*system.FsStats{},
"/dev/sda1",
"/mnt/data",
false,
"archive",
fsRegistrationContext{
isWindows: false,
diskIoCounters: map[string]disk.IOCountersStat{
"sda1": {Name: "sda1"},
},
},
)
assert.True(t, ok)
assert.Equal(t, "sda1", key)
assert.Equal(t, "/mnt/data", stats.Mountpoint)
assert.Equal(t, "archive", stats.Name)
assert.False(t, stats.Root)
})
t.Run("maps root partition to io device by prefix", func(t *testing.T) {
key, stats, ok := registerFilesystemStats(
map[string]*system.FsStats{},
"/dev/ada0p2",
"/",
true,
"",
fsRegistrationContext{
isWindows: false,
diskIoCounters: map[string]disk.IOCountersStat{
"ada0": {Name: "ada0", ReadBytes: 1000, WriteBytes: 1000},
},
},
)
assert.True(t, ok)
assert.Equal(t, "ada0", key)
assert.True(t, stats.Root)
assert.Equal(t, "/", stats.Mountpoint)
})
t.Run("uses filesystem setting as root fallback", func(t *testing.T) {
key, _, ok := registerFilesystemStats(
map[string]*system.FsStats{},
"overlay",
"/",
true,
"",
fsRegistrationContext{
filesystem: "nvme0n1p2",
isWindows: false,
diskIoCounters: map[string]disk.IOCountersStat{
"nvme0n1": {Name: "nvme0n1", ReadBytes: 1000, WriteBytes: 1000},
},
},
)
assert.True(t, ok)
assert.Equal(t, "nvme0n1", key)
})
t.Run("prefers parsed extra-filesystems device over mapper device", func(t *testing.T) {
key, stats, ok := registerFilesystemStats(
map[string]*system.FsStats{},
"/dev/mapper/luks-2bcb02be-999d-4417-8d18-5c61e660fb6e",
"/extra-filesystems/nvme0n1p2__Archive",
false,
"Archive",
fsRegistrationContext{
isWindows: false,
diskIoCounters: map[string]disk.IOCountersStat{
"dm-1": {Name: "dm-1", Label: "luks-2bcb02be-999d-4417-8d18-5c61e660fb6e"},
"nvme0n1p2": {Name: "nvme0n1p2"},
},
},
)
assert.True(t, ok)
assert.Equal(t, "nvme0n1p2", key)
assert.Equal(t, "Archive", stats.Name)
})
t.Run("falls back to mapper io device when folder device cannot be resolved", func(t *testing.T) {
key, stats, ok := registerFilesystemStats(
map[string]*system.FsStats{},
"/dev/mapper/luks-2bcb02be-999d-4417-8d18-5c61e660fb6e",
"/extra-filesystems/Archive",
false,
"Archive",
fsRegistrationContext{
isWindows: false,
diskIoCounters: map[string]disk.IOCountersStat{
"dm-1": {Name: "dm-1", Label: "luks-2bcb02be-999d-4417-8d18-5c61e660fb6e"},
},
},
)
assert.True(t, ok)
assert.Equal(t, "dm-1", key)
assert.Equal(t, "Archive", stats.Name)
})
t.Run("uses full device name on windows", func(t *testing.T) {
key, _, ok := registerFilesystemStats(
map[string]*system.FsStats{},
`C:`,
`C:\\`,
false,
"",
fsRegistrationContext{
isWindows: true,
diskIoCounters: map[string]disk.IOCountersStat{
`C:`: {Name: `C:`},
},
},
)
assert.True(t, ok)
assert.Equal(t, `C:`, key)
})
t.Run("skips existing key", func(t *testing.T) {
key, stats, ok := registerFilesystemStats(
map[string]*system.FsStats{"sda1": {Mountpoint: "/existing"}},
"/dev/sda1",
"/mnt/data",
false,
"",
fsRegistrationContext{
isWindows: false,
diskIoCounters: map[string]disk.IOCountersStat{
"sda1": {Name: "sda1"},
},
},
)
assert.False(t, ok)
assert.Empty(t, key)
assert.Nil(t, stats)
})
}
func TestAddConfiguredRootFs(t *testing.T) {
t.Run("adds root from matching partition", func(t *testing.T) {
agent := &Agent{fsStats: make(map[string]*system.FsStats)}
discovery := diskDiscovery{
agent: agent,
rootMountPoint: "/",
partitions: []disk.PartitionStat{{Device: "/dev/ada0p2", Mountpoint: "/"}},
ctx: fsRegistrationContext{
filesystem: "/dev/ada0p2",
isWindows: false,
diskIoCounters: map[string]disk.IOCountersStat{
"ada0": {Name: "ada0", ReadBytes: 1000, WriteBytes: 1000},
},
},
}
ok := discovery.addConfiguredRootFs()
assert.True(t, ok)
stats, exists := agent.fsStats["ada0"]
assert.True(t, exists)
assert.True(t, stats.Root)
assert.Equal(t, "/", stats.Mountpoint)
})
t.Run("adds root from io device when partition is missing", func(t *testing.T) {
agent := &Agent{fsStats: make(map[string]*system.FsStats)}
discovery := diskDiscovery{
agent: agent,
rootMountPoint: "/sysroot",
ctx: fsRegistrationContext{
filesystem: "zroot",
isWindows: false,
diskIoCounters: map[string]disk.IOCountersStat{
"nda0": {Name: "nda0", Label: "zroot", ReadBytes: 1000, WriteBytes: 1000},
},
},
}
ok := discovery.addConfiguredRootFs()
assert.True(t, ok)
stats, exists := agent.fsStats["nda0"]
assert.True(t, exists)
assert.True(t, stats.Root)
assert.Equal(t, "/sysroot", stats.Mountpoint)
})
t.Run("returns false when filesystem cannot be resolved", func(t *testing.T) {
agent := &Agent{fsStats: make(map[string]*system.FsStats)}
discovery := diskDiscovery{
agent: agent,
rootMountPoint: "/",
ctx: fsRegistrationContext{
filesystem: "missing-disk",
isWindows: false,
diskIoCounters: map[string]disk.IOCountersStat{},
},
}
ok := discovery.addConfiguredRootFs()
assert.False(t, ok)
assert.Empty(t, agent.fsStats)
})
}
func TestAddPartitionRootFs(t *testing.T) {
t.Run("adds root from fallback partition candidate", func(t *testing.T) {
agent := &Agent{fsStats: make(map[string]*system.FsStats)}
discovery := diskDiscovery{
agent: agent,
ctx: fsRegistrationContext{
isWindows: false,
diskIoCounters: map[string]disk.IOCountersStat{
"nvme0n1": {Name: "nvme0n1", ReadBytes: 1000, WriteBytes: 1000},
},
},
}
ok := discovery.addPartitionRootFs("/dev/nvme0n1p2", "/")
assert.True(t, ok)
stats, exists := agent.fsStats["nvme0n1"]
assert.True(t, exists)
assert.True(t, stats.Root)
assert.Equal(t, "/", stats.Mountpoint)
})
t.Run("returns false when no io device matches", func(t *testing.T) {
agent := &Agent{fsStats: make(map[string]*system.FsStats)}
discovery := diskDiscovery{agent: agent, ctx: fsRegistrationContext{diskIoCounters: map[string]disk.IOCountersStat{}}}
ok := discovery.addPartitionRootFs("/dev/mapper/root", "/")
assert.False(t, ok)
assert.Empty(t, agent.fsStats)
})
}
func TestAddLastResortRootFs(t *testing.T) {
t.Run("uses most active io device when available", func(t *testing.T) {
agent := &Agent{fsStats: make(map[string]*system.FsStats)}
discovery := diskDiscovery{agent: agent, rootMountPoint: "/", ctx: fsRegistrationContext{diskIoCounters: map[string]disk.IOCountersStat{
"sda": {Name: "sda", ReadBytes: 5000, WriteBytes: 5000},
"sdb": {Name: "sdb", ReadBytes: 1000, WriteBytes: 1000},
}}}
discovery.addLastResortRootFs()
stats, exists := agent.fsStats["sda"]
assert.True(t, exists)
assert.True(t, stats.Root)
})
t.Run("falls back to root key when mountpoint basename collides", func(t *testing.T) {
agent := &Agent{fsStats: map[string]*system.FsStats{
"sysroot": {Mountpoint: "/extra-filesystems/sysroot"},
}}
discovery := diskDiscovery{agent: agent, rootMountPoint: "/sysroot", ctx: fsRegistrationContext{diskIoCounters: map[string]disk.IOCountersStat{}}}
discovery.addLastResortRootFs()
stats, exists := agent.fsStats["root"]
assert.True(t, exists)
assert.True(t, stats.Root)
assert.Equal(t, "/sysroot", stats.Mountpoint)
})
}
func TestAddConfiguredExtraFsEntry(t *testing.T) {
t.Run("uses matching partition when present", func(t *testing.T) {
agent := &Agent{fsStats: make(map[string]*system.FsStats)}
discovery := diskDiscovery{
agent: agent,
partitions: []disk.PartitionStat{{Device: "/dev/sdb1", Mountpoint: "/mnt/backup"}},
usageFn: func(string) (*disk.UsageStat, error) {
t.Fatal("usage fallback should not be called when partition matches")
return nil, nil
},
ctx: fsRegistrationContext{
isWindows: false,
diskIoCounters: map[string]disk.IOCountersStat{
"sdb1": {Name: "sdb1"},
},
},
}
discovery.addConfiguredExtraFsEntry("sdb1", "backup")
stats, exists := agent.fsStats["sdb1"]
assert.True(t, exists)
assert.Equal(t, "/mnt/backup", stats.Mountpoint)
assert.Equal(t, "backup", stats.Name)
})
t.Run("falls back to usage-validated path", func(t *testing.T) {
agent := &Agent{fsStats: make(map[string]*system.FsStats)}
discovery := diskDiscovery{
agent: agent,
usageFn: func(path string) (*disk.UsageStat, error) {
assert.Equal(t, "/srv/archive", path)
return &disk.UsageStat{}, nil
},
ctx: fsRegistrationContext{
isWindows: false,
diskIoCounters: map[string]disk.IOCountersStat{
"archive": {Name: "archive"},
},
},
}
discovery.addConfiguredExtraFsEntry("/srv/archive", "archive")
stats, exists := agent.fsStats["archive"]
assert.True(t, exists)
assert.Equal(t, "/srv/archive", stats.Mountpoint)
assert.Equal(t, "archive", stats.Name)
})
t.Run("ignores invalid filesystem entry", func(t *testing.T) {
agent := &Agent{fsStats: make(map[string]*system.FsStats)}
discovery := diskDiscovery{
agent: agent,
usageFn: func(string) (*disk.UsageStat, error) {
return nil, os.ErrNotExist
},
}
discovery.addConfiguredExtraFsEntry("/missing/archive", "")
assert.Empty(t, agent.fsStats)
})
}
func TestAddConfiguredExtraFilesystems(t *testing.T) {
t.Run("parses and registers multiple configured filesystems", func(t *testing.T) {
agent := &Agent{fsStats: make(map[string]*system.FsStats)}
discovery := diskDiscovery{
agent: agent,
partitions: []disk.PartitionStat{{Device: "/dev/sda1", Mountpoint: "/mnt/fast"}},
usageFn: func(path string) (*disk.UsageStat, error) {
if path == "/srv/archive" {
return &disk.UsageStat{}, nil
}
return nil, os.ErrNotExist
},
ctx: fsRegistrationContext{
isWindows: false,
diskIoCounters: map[string]disk.IOCountersStat{
"sda1": {Name: "sda1"},
"archive": {Name: "archive"},
},
},
}
discovery.addConfiguredExtraFilesystems("sda1__fast,/srv/archive__cold")
assert.Contains(t, agent.fsStats, "sda1")
assert.Equal(t, "fast", agent.fsStats["sda1"].Name)
assert.Contains(t, agent.fsStats, "archive")
assert.Equal(t, "cold", agent.fsStats["archive"].Name)
})
}
func TestAddExtraFilesystemFolders(t *testing.T) {
t.Run("adds missing folders and skips existing mountpoints", func(t *testing.T) {
agent := &Agent{fsStats: map[string]*system.FsStats{
"existing": {Mountpoint: "/extra-filesystems/existing"},
}}
discovery := diskDiscovery{
agent: agent,
ctx: fsRegistrationContext{
isWindows: false,
efPath: "/extra-filesystems",
diskIoCounters: map[string]disk.IOCountersStat{
"newdisk": {Name: "newdisk"},
},
},
}
discovery.addExtraFilesystemFolders([]string{"existing", "newdisk__Archive"})
assert.Len(t, agent.fsStats, 2)
stats, exists := agent.fsStats["newdisk"]
assert.True(t, exists)
assert.Equal(t, "/extra-filesystems/newdisk__Archive", stats.Mountpoint)
assert.Equal(t, "Archive", stats.Name)
})
}
func TestFindIoDevice(t *testing.T) { func TestFindIoDevice(t *testing.T) {
t.Run("matches by device name", func(t *testing.T) { t.Run("matches by device name", func(t *testing.T) {
ioCounters := map[string]disk.IOCountersStat{ ioCounters := map[string]disk.IOCountersStat{
@@ -310,7 +747,7 @@ func TestInitializeDiskInfoWithCustomNames(t *testing.T) {
// Test the parsing logic by calling the relevant part // Test the parsing logic by calling the relevant part
// We'll create a simplified version to test just the parsing // We'll create a simplified version to test just the parsing
extraFilesystems := tc.envValue extraFilesystems := tc.envValue
for _, fsEntry := range strings.Split(extraFilesystems, ",") { for fsEntry := range strings.SplitSeq(extraFilesystems, ",") {
// Parse the entry // Parse the entry
fsEntry = strings.TrimSpace(fsEntry) fsEntry = strings.TrimSpace(fsEntry)
var fs, customName string var fs, customName string
@@ -506,3 +943,33 @@ func TestHasSameDiskUsage(t *testing.T) {
assert.False(t, hasSameDiskUsage(&disk.UsageStat{Total: 0, Used: 0}, &disk.UsageStat{Total: 1, Used: 1})) assert.False(t, hasSameDiskUsage(&disk.UsageStat{Total: 0, Used: 0}, &disk.UsageStat{Total: 1, Used: 1}))
}) })
} }
func TestInitializeDiskIoStatsResetsTrackedDevices(t *testing.T) {
agent := &Agent{
fsStats: map[string]*system.FsStats{
"sda": {},
"sdb": {},
},
fsNames: []string{"stale", "sda"},
}
agent.initializeDiskIoStats(map[string]disk.IOCountersStat{
"sda": {Name: "sda", ReadBytes: 10, WriteBytes: 20},
"sdb": {Name: "sdb", ReadBytes: 30, WriteBytes: 40},
})
assert.ElementsMatch(t, []string{"sda", "sdb"}, agent.fsNames)
assert.Len(t, agent.fsNames, 2)
assert.Equal(t, uint64(10), agent.fsStats["sda"].TotalRead)
assert.Equal(t, uint64(20), agent.fsStats["sda"].TotalWrite)
assert.False(t, agent.fsStats["sda"].Time.IsZero())
assert.False(t, agent.fsStats["sdb"].Time.IsZero())
agent.initializeDiskIoStats(map[string]disk.IOCountersStat{
"sdb": {Name: "sdb", ReadBytes: 50, WriteBytes: 60},
})
assert.Equal(t, []string{"sdb"}, agent.fsNames)
assert.Equal(t, uint64(50), agent.fsStats["sdb"].TotalRead)
assert.Equal(t, uint64(60), agent.fsStats["sdb"].TotalWrite)
}

View File

@@ -16,6 +16,8 @@ import (
"os" "os"
"path" "path"
"regexp" "regexp"
"sort"
"strconv"
"strings" "strings"
"sync" "sync"
"time" "time"
@@ -346,6 +348,39 @@ func updateContainerStatsValues(stats *container.Stats, cpuPct float64, usedMemo
stats.PrevReadTime = readTime stats.PrevReadTime = readTime
} }
// convertContainerPortsToString formats the ports of a container into a sorted, deduplicated string.
// ctr.Ports is nilled out after processing so the slice is not accidentally reused.
func convertContainerPortsToString(ctr *container.ApiInfo) string {
if len(ctr.Ports) == 0 {
return ""
}
sort.Slice(ctr.Ports, func(i, j int) bool {
return ctr.Ports[i].PublicPort < ctr.Ports[j].PublicPort
})
var builder strings.Builder
seenPorts := make(map[uint16]struct{})
for _, p := range ctr.Ports {
_, ok := seenPorts[p.PublicPort]
if p.PublicPort == 0 || ok {
continue
}
seenPorts[p.PublicPort] = struct{}{}
if builder.Len() > 0 {
builder.WriteString(", ")
}
switch p.IP {
case "0.0.0.0", "::":
default:
builder.WriteString(p.IP)
builder.WriteByte(':')
}
builder.WriteString(strconv.Itoa(int(p.PublicPort)))
}
// clear ports slice so it doesn't get reused and blend into next response
ctr.Ports = nil
return builder.String()
}
func parseDockerStatus(status string) (string, container.DockerHealth) { func parseDockerStatus(status string) (string, container.DockerHealth) {
trimmed := strings.TrimSpace(status) trimmed := strings.TrimSpace(status)
if trimmed == "" { if trimmed == "" {
@@ -365,22 +400,60 @@ func parseDockerStatus(status string) (string, container.DockerHealth) {
statusText = trimmed statusText = trimmed
} }
healthText := strings.ToLower(strings.TrimSpace(strings.TrimSuffix(trimmed[openIdx+1:], ")"))) healthText := strings.TrimSpace(strings.TrimSuffix(trimmed[openIdx+1:], ")"))
// Some Docker statuses include a "health:" prefix inside the parentheses. // Some Docker statuses include a "health:" prefix inside the parentheses.
// Strip it so it maps correctly to the known health states. // Strip it so it maps correctly to the known health states.
if colonIdx := strings.IndexRune(healthText, ':'); colonIdx != -1 { if colonIdx := strings.IndexRune(healthText, ':'); colonIdx != -1 {
prefix := strings.TrimSpace(healthText[:colonIdx]) prefix := strings.ToLower(strings.TrimSpace(healthText[:colonIdx]))
if prefix == "health" || prefix == "health status" { if prefix == "health" || prefix == "health status" {
healthText = strings.TrimSpace(healthText[colonIdx+1:]) healthText = strings.TrimSpace(healthText[colonIdx+1:])
} }
} }
if health, ok := container.DockerHealthStrings[healthText]; ok { if health, ok := parseDockerHealthStatus(healthText); ok {
return statusText, health return statusText, health
} }
return trimmed, container.DockerHealthNone return trimmed, container.DockerHealthNone
} }
// parseDockerHealthStatus maps Docker health status strings to container.DockerHealth values
func parseDockerHealthStatus(status string) (container.DockerHealth, bool) {
health, ok := container.DockerHealthStrings[strings.ToLower(strings.TrimSpace(status))]
return health, ok
}
// getPodmanContainerHealth fetches container health status from the container inspect endpoint.
// Used for Podman which doesn't provide health status in the /containers/json endpoint as of March 2026.
// https://github.com/containers/podman/issues/27786
func (dm *dockerManager) getPodmanContainerHealth(containerID string) (container.DockerHealth, error) {
resp, err := dm.client.Get(fmt.Sprintf("http://localhost/containers/%s/json", url.PathEscape(containerID)))
if err != nil {
return container.DockerHealthNone, err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return container.DockerHealthNone, fmt.Errorf("container inspect request failed: %s", resp.Status)
}
var inspectInfo struct {
State struct {
Health struct {
Status string
}
}
}
if err := json.NewDecoder(resp.Body).Decode(&inspectInfo); err != nil {
return container.DockerHealthNone, err
}
if health, ok := parseDockerHealthStatus(inspectInfo.State.Health.Status); ok {
return health, nil
}
return container.DockerHealthNone, nil
}
// Updates stats for individual container with cache-time-aware delta tracking // Updates stats for individual container with cache-time-aware delta tracking
func (dm *dockerManager) updateContainerStats(ctr *container.ApiInfo, cacheTimeMs uint16) error { func (dm *dockerManager) updateContainerStats(ctr *container.ApiInfo, cacheTimeMs uint16) error {
name := ctr.Names[0][1:] name := ctr.Names[0][1:]
@@ -390,6 +463,21 @@ func (dm *dockerManager) updateContainerStats(ctr *container.ApiInfo, cacheTimeM
return err return err
} }
statusText, health := parseDockerStatus(ctr.Status)
// Docker exposes Health.Status on /containers/json in API 1.52+.
// Podman currently requires falling back to the inspect endpoint as of March 2026.
// https://github.com/containers/podman/issues/27786
if ctr.Health.Status != "" {
if h, ok := parseDockerHealthStatus(ctr.Health.Status); ok {
health = h
}
} else if dm.usingPodman {
if podmanHealth, err := dm.getPodmanContainerHealth(ctr.IdShort); err == nil {
health = podmanHealth
}
}
dm.containerStatsMutex.Lock() dm.containerStatsMutex.Lock()
defer dm.containerStatsMutex.Unlock() defer dm.containerStatsMutex.Unlock()
@@ -401,11 +489,13 @@ func (dm *dockerManager) updateContainerStats(ctr *container.ApiInfo, cacheTimeM
} }
stats.Id = ctr.IdShort stats.Id = ctr.IdShort
statusText, health := parseDockerStatus(ctr.Status)
stats.Status = statusText stats.Status = statusText
stats.Health = health stats.Health = health
if len(ctr.Ports) > 0 {
stats.Ports = convertContainerPortsToString(ctr)
}
// reset current stats // reset current stats
stats.Cpu = 0 stats.Cpu = 0
stats.Mem = 0 stats.Mem = 0

View File

@@ -35,6 +35,12 @@ type recordingRoundTripper struct {
lastQuery map[string]string lastQuery map[string]string
} }
type roundTripFunc func(*http.Request) (*http.Response, error)
func (fn roundTripFunc) RoundTrip(req *http.Request) (*http.Response, error) {
return fn(req)
}
func (rt *recordingRoundTripper) RoundTrip(req *http.Request) (*http.Response, error) { func (rt *recordingRoundTripper) RoundTrip(req *http.Request) (*http.Response, error) {
rt.called = true rt.called = true
rt.lastPath = req.URL.EscapedPath() rt.lastPath = req.URL.EscapedPath()
@@ -214,6 +220,28 @@ func TestContainerDetailsRequestsUseExpectedDockerPaths(t *testing.T) {
}) })
} }
func TestGetPodmanContainerHealth(t *testing.T) {
called := false
dm := &dockerManager{
client: &http.Client{Transport: roundTripFunc(func(req *http.Request) (*http.Response, error) {
called = true
assert.Equal(t, "/containers/0123456789ab/json", req.URL.EscapedPath())
return &http.Response{
StatusCode: http.StatusOK,
Status: "200 OK",
Header: make(http.Header),
Body: io.NopCloser(strings.NewReader(`{"State":{"Health":{"Status":"healthy"}}}`)),
Request: req,
}, nil
})},
}
health, err := dm.getPodmanContainerHealth("0123456789ab")
require.NoError(t, err)
assert.True(t, called)
assert.Equal(t, container.DockerHealthHealthy, health)
}
func TestValidateCpuPercentage(t *testing.T) { func TestValidateCpuPercentage(t *testing.T) {
tests := []struct { tests := []struct {
name string name string
@@ -1129,6 +1157,18 @@ func TestParseDockerStatus(t *testing.T) {
expectedStatus: "", expectedStatus: "",
expectedHealth: container.DockerHealthNone, expectedHealth: container.DockerHealthNone,
}, },
{
name: "status health with health: prefix",
input: "Up 5 minutes (health: starting)",
expectedStatus: "Up 5 minutes",
expectedHealth: container.DockerHealthStarting,
},
{
name: "status health with health status: prefix",
input: "Up 10 minutes (health status: unhealthy)",
expectedStatus: "Up 10 minutes",
expectedHealth: container.DockerHealthUnhealthy,
},
} }
for _, tt := range tests { for _, tt := range tests {
@@ -1140,6 +1180,84 @@ func TestParseDockerStatus(t *testing.T) {
} }
} }
func TestParseDockerHealthStatus(t *testing.T) {
tests := []struct {
input string
expectedHealth container.DockerHealth
expectedOk bool
}{
{"healthy", container.DockerHealthHealthy, true},
{"unhealthy", container.DockerHealthUnhealthy, true},
{"starting", container.DockerHealthStarting, true},
{"none", container.DockerHealthNone, true},
{" Healthy ", container.DockerHealthHealthy, true},
{"unknown", container.DockerHealthNone, false},
{"", container.DockerHealthNone, false},
}
for _, tt := range tests {
t.Run(tt.input, func(t *testing.T) {
health, ok := parseDockerHealthStatus(tt.input)
assert.Equal(t, tt.expectedHealth, health)
assert.Equal(t, tt.expectedOk, ok)
})
}
}
func TestUpdateContainerStatsUsesPodmanInspectHealthFallback(t *testing.T) {
var requestedPaths []string
dm := &dockerManager{
client: &http.Client{Transport: roundTripFunc(func(req *http.Request) (*http.Response, error) {
requestedPaths = append(requestedPaths, req.URL.EscapedPath())
switch req.URL.EscapedPath() {
case "/containers/0123456789ab/stats":
return &http.Response{
StatusCode: http.StatusOK,
Status: "200 OK",
Header: make(http.Header),
Body: io.NopCloser(strings.NewReader(`{
"read":"2026-03-15T21:26:59Z",
"cpu_stats":{"cpu_usage":{"total_usage":1000},"system_cpu_usage":2000},
"memory_stats":{"usage":1048576,"stats":{"inactive_file":262144}},
"networks":{"eth0":{"rx_bytes":0,"tx_bytes":0}}
}`)),
Request: req,
}, nil
case "/containers/0123456789ab/json":
return &http.Response{
StatusCode: http.StatusOK,
Status: "200 OK",
Header: make(http.Header),
Body: io.NopCloser(strings.NewReader(`{"State":{"Health":{"Status":"healthy"}}}`)),
Request: req,
}, nil
default:
return nil, fmt.Errorf("unexpected path: %s", req.URL.EscapedPath())
}
})},
containerStatsMap: make(map[string]*container.Stats),
apiStats: &container.ApiStats{},
usingPodman: true,
lastCpuContainer: make(map[uint16]map[string]uint64),
lastCpuSystem: make(map[uint16]map[string]uint64),
lastCpuReadTime: make(map[uint16]map[string]time.Time),
networkSentTrackers: make(map[uint16]*deltatracker.DeltaTracker[string, uint64]),
networkRecvTrackers: make(map[uint16]*deltatracker.DeltaTracker[string, uint64]),
}
ctr := &container.ApiInfo{
IdShort: "0123456789ab",
Names: []string{"/beszel"},
Status: "Up 2 minutes",
Image: "beszel:latest",
}
err := dm.updateContainerStats(ctr, defaultCacheTimeMs)
require.NoError(t, err)
assert.Equal(t, []string{"/containers/0123456789ab/stats", "/containers/0123456789ab/json"}, requestedPaths)
assert.Equal(t, container.DockerHealthHealthy, dm.containerStatsMap[ctr.IdShort].Health)
assert.Equal(t, "Up 2 minutes", dm.containerStatsMap[ctr.IdShort].Status)
}
func TestConstantsAndUtilityFunctions(t *testing.T) { func TestConstantsAndUtilityFunctions(t *testing.T) {
// Test constants are properly defined // Test constants are properly defined
assert.Equal(t, uint16(60000), defaultCacheTimeMs) assert.Equal(t, uint16(60000), defaultCacheTimeMs)
@@ -1455,3 +1573,99 @@ func TestAnsiEscapePattern(t *testing.T) {
}) })
} }
} }
func TestConvertContainerPortsToString(t *testing.T) {
type port = struct {
PublicPort uint16
IP string
}
tests := []struct {
name string
ports []port
expected string
}{
{
name: "empty ports",
ports: nil,
expected: "",
},
{
name: "single port",
ports: []port{
{PublicPort: 80, IP: "0.0.0.0"},
},
expected: "80",
},
{
name: "single port with non-default IP",
ports: []port{
{PublicPort: 80, IP: "1.2.3.4"},
},
expected: "1.2.3.4:80",
},
{
name: "ipv6 default ip",
ports: []port{
{PublicPort: 80, IP: "::"},
},
expected: "80",
},
{
name: "zero PublicPort is skipped",
ports: []port{
{PublicPort: 0, IP: "0.0.0.0"},
{PublicPort: 80, IP: "0.0.0.0"},
},
expected: "80",
},
{
name: "ports sorted ascending by PublicPort",
ports: []port{
{PublicPort: 443, IP: "0.0.0.0"},
{PublicPort: 80, IP: "0.0.0.0"},
{PublicPort: 8080, IP: "0.0.0.0"},
},
expected: "80, 443, 8080",
},
{
name: "duplicates are deduplicated",
ports: []port{
{PublicPort: 80, IP: "0.0.0.0"},
{PublicPort: 80, IP: "0.0.0.0"},
{PublicPort: 443, IP: "0.0.0.0"},
},
expected: "80, 443",
},
{
name: "multiple ports with different IPs",
ports: []port{
{PublicPort: 80, IP: "0.0.0.0"},
{PublicPort: 443, IP: "1.2.3.4"},
},
expected: "80, 1.2.3.4:443",
},
{
name: "ports slice is nilled after call",
ports: []port{
{PublicPort: 8080, IP: "0.0.0.0"},
},
expected: "8080",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
ctr := &container.ApiInfo{}
for _, p := range tt.ports {
ctr.Ports = append(ctr.Ports, struct {
PublicPort uint16
IP string
}{PublicPort: p.PublicPort, IP: p.IP})
}
result := convertContainerPortsToString(ctr)
assert.Equal(t, tt.expected, result)
// Ports slice must be cleared to prevent bleed-over into the next response
assert.Nil(t, ctr.Ports, "ctr.Ports should be nil after formatContainerPorts")
})
}
}

View File

@@ -21,8 +21,7 @@ type hubLike interface {
type AlertManager struct { type AlertManager struct {
hub hubLike hub hubLike
alertQueue chan alertTask stopOnce sync.Once
stopChan chan struct{}
pendingAlerts sync.Map pendingAlerts sync.Map
} }
@@ -98,12 +97,9 @@ var supportsTitle = map[string]struct{}{
// NewAlertManager creates a new AlertManager instance. // NewAlertManager creates a new AlertManager instance.
func NewAlertManager(app hubLike) *AlertManager { func NewAlertManager(app hubLike) *AlertManager {
am := &AlertManager{ am := &AlertManager{
hub: app, hub: app,
alertQueue: make(chan alertTask, 5),
stopChan: make(chan struct{}),
} }
am.bindEvents() am.bindEvents()
go am.startWorker()
return am return am
} }
@@ -112,6 +108,16 @@ func (am *AlertManager) bindEvents() {
am.hub.OnRecordAfterUpdateSuccess("alerts").BindFunc(updateHistoryOnAlertUpdate) am.hub.OnRecordAfterUpdateSuccess("alerts").BindFunc(updateHistoryOnAlertUpdate)
am.hub.OnRecordAfterDeleteSuccess("alerts").BindFunc(resolveHistoryOnAlertDelete) am.hub.OnRecordAfterDeleteSuccess("alerts").BindFunc(resolveHistoryOnAlertDelete)
am.hub.OnRecordAfterUpdateSuccess("smart_devices").BindFunc(am.handleSmartDeviceAlert) am.hub.OnRecordAfterUpdateSuccess("smart_devices").BindFunc(am.handleSmartDeviceAlert)
am.hub.OnServe().BindFunc(func(e *core.ServeEvent) error {
if err := resolveStatusAlerts(e.App); err != nil {
e.App.Logger().Error("Failed to resolve stale status alerts", "err", err)
}
if err := am.restorePendingStatusAlerts(); err != nil {
e.App.Logger().Error("Failed to restore pending status alerts", "err", err)
}
return e.Next()
})
} }
// IsNotificationSilenced checks if a notification should be silenced based on configured quiet hours // IsNotificationSilenced checks if a notification should be silenced based on configured quiet hours

View File

@@ -49,7 +49,7 @@ func TestAlertSilencedOneTime(t *testing.T) {
// Get alert manager // Get alert manager
am := alerts.NewAlertManager(hub) am := alerts.NewAlertManager(hub)
defer am.StopWorker() defer am.Stop()
// Test that alert is silenced // Test that alert is silenced
silenced := am.IsNotificationSilenced(user.Id, system.Id) silenced := am.IsNotificationSilenced(user.Id, system.Id)
@@ -106,7 +106,7 @@ func TestAlertSilencedDaily(t *testing.T) {
// Get alert manager // Get alert manager
am := alerts.NewAlertManager(hub) am := alerts.NewAlertManager(hub)
defer am.StopWorker() defer am.Stop()
// Get current hour and create a window that includes current time // Get current hour and create a window that includes current time
now := time.Now().UTC() now := time.Now().UTC()
@@ -170,7 +170,7 @@ func TestAlertSilencedDailyMidnightCrossing(t *testing.T) {
// Get alert manager // Get alert manager
am := alerts.NewAlertManager(hub) am := alerts.NewAlertManager(hub)
defer am.StopWorker() defer am.Stop()
// Create a window that crosses midnight: 22:00 - 02:00 // Create a window that crosses midnight: 22:00 - 02:00
startTime := time.Date(2000, 1, 1, 22, 0, 0, 0, time.UTC) startTime := time.Date(2000, 1, 1, 22, 0, 0, 0, time.UTC)
@@ -211,7 +211,7 @@ func TestAlertSilencedGlobal(t *testing.T) {
// Get alert manager // Get alert manager
am := alerts.NewAlertManager(hub) am := alerts.NewAlertManager(hub)
defer am.StopWorker() defer am.Stop()
// Create a global quiet hours window (no system specified) // Create a global quiet hours window (no system specified)
now := time.Now().UTC() now := time.Now().UTC()
@@ -250,7 +250,7 @@ func TestAlertSilencedSystemSpecific(t *testing.T) {
// Get alert manager // Get alert manager
am := alerts.NewAlertManager(hub) am := alerts.NewAlertManager(hub)
defer am.StopWorker() defer am.Stop()
// Create a system-specific quiet hours window for system1 only // Create a system-specific quiet hours window for system1 only
now := time.Now().UTC() now := time.Now().UTC()
@@ -296,7 +296,7 @@ func TestAlertSilencedMultiUser(t *testing.T) {
// Get alert manager // Get alert manager
am := alerts.NewAlertManager(hub) am := alerts.NewAlertManager(hub)
defer am.StopWorker() defer am.Stop()
// Create a quiet hours window for user1 only // Create a quiet hours window for user1 only
now := time.Now().UTC() now := time.Now().UTC()
@@ -417,7 +417,7 @@ func TestAlertSilencedNoWindows(t *testing.T) {
// Get alert manager // Get alert manager
am := alerts.NewAlertManager(hub) am := alerts.NewAlertManager(hub)
defer am.StopWorker() defer am.Stop()
// Without any quiet hours windows, alert should NOT be silenced // Without any quiet hours windows, alert should NOT be silenced
silenced := am.IsNotificationSilenced(user.Id, system.Id) silenced := am.IsNotificationSilenced(user.Id, system.Id)

View File

@@ -9,63 +9,25 @@ import (
"github.com/pocketbase/pocketbase/core" "github.com/pocketbase/pocketbase/core"
) )
type alertTask struct {
action string // "schedule" or "cancel"
systemName string
alertRecord *core.Record
delay time.Duration
}
type alertInfo struct { type alertInfo struct {
systemName string systemName string
alertRecord *core.Record alertRecord *core.Record
expireTime time.Time expireTime time.Time
timer *time.Timer
} }
// startWorker is a long-running goroutine that processes alert tasks // Stop cancels all pending status alert timers.
// every x seconds. It must be running to process status alerts. func (am *AlertManager) Stop() {
func (am *AlertManager) startWorker() { am.stopOnce.Do(func() {
processPendingAlerts := time.Tick(15 * time.Second) am.pendingAlerts.Range(func(key, value any) bool {
info := value.(*alertInfo)
// check for status alerts that are not resolved when system comes up if info.timer != nil {
// (can be removed if we figure out core bug in #1052) info.timer.Stop()
checkStatusAlerts := time.Tick(561 * time.Second)
for {
select {
case <-am.stopChan:
return
case task := <-am.alertQueue:
switch task.action {
case "schedule":
am.pendingAlerts.Store(task.alertRecord.Id, &alertInfo{
systemName: task.systemName,
alertRecord: task.alertRecord,
expireTime: time.Now().Add(task.delay),
})
case "cancel":
am.pendingAlerts.Delete(task.alertRecord.Id)
} }
case <-checkStatusAlerts: am.pendingAlerts.Delete(key)
resolveStatusAlerts(am.hub) return true
case <-processPendingAlerts: })
// Check for expired alerts every tick })
now := time.Now()
for key, value := range am.pendingAlerts.Range {
info := value.(*alertInfo)
if now.After(info.expireTime) {
// Downtime delay has passed, process alert
am.sendStatusAlert("down", info.systemName, info.alertRecord)
am.pendingAlerts.Delete(key)
}
}
}
}
}
// StopWorker shuts down the AlertManager.worker goroutine
func (am *AlertManager) StopWorker() {
close(am.stopChan)
} }
// HandleStatusAlerts manages the logic when system status changes. // HandleStatusAlerts manages the logic when system status changes.
@@ -103,44 +65,82 @@ func (am *AlertManager) getSystemStatusAlerts(systemID string) ([]*core.Record,
return alertRecords, nil return alertRecords, nil
} }
// Schedules delayed "down" alerts for each alert record. // handleSystemDown manages the logic when a system status changes to "down". It schedules pending alerts for each alert record.
func (am *AlertManager) handleSystemDown(systemName string, alertRecords []*core.Record) { func (am *AlertManager) handleSystemDown(systemName string, alertRecords []*core.Record) {
for _, alertRecord := range alertRecords { for _, alertRecord := range alertRecords {
// Continue if alert is already scheduled
if _, exists := am.pendingAlerts.Load(alertRecord.Id); exists {
continue
}
// Schedule by adding to queue
min := max(1, alertRecord.GetInt("min")) min := max(1, alertRecord.GetInt("min"))
am.alertQueue <- alertTask{ am.schedulePendingStatusAlert(systemName, alertRecord, time.Duration(min)*time.Minute)
action: "schedule",
systemName: systemName,
alertRecord: alertRecord,
delay: time.Duration(min) * time.Minute,
}
} }
} }
// schedulePendingStatusAlert sets up a timer to send a "down" alert after the specified delay if the system is still down.
// It returns true if the alert was scheduled, or false if an alert was already pending for the given alert record.
func (am *AlertManager) schedulePendingStatusAlert(systemName string, alertRecord *core.Record, delay time.Duration) bool {
alert := &alertInfo{
systemName: systemName,
alertRecord: alertRecord,
expireTime: time.Now().Add(delay),
}
storedAlert, loaded := am.pendingAlerts.LoadOrStore(alertRecord.Id, alert)
if loaded {
return false
}
stored := storedAlert.(*alertInfo)
stored.timer = time.AfterFunc(time.Until(stored.expireTime), func() {
am.processPendingAlert(alertRecord.Id)
})
return true
}
// handleSystemUp manages the logic when a system status changes to "up". // handleSystemUp manages the logic when a system status changes to "up".
// It cancels any pending alerts and sends "up" alerts. // It cancels any pending alerts and sends "up" alerts.
func (am *AlertManager) handleSystemUp(systemName string, alertRecords []*core.Record) { func (am *AlertManager) handleSystemUp(systemName string, alertRecords []*core.Record) {
for _, alertRecord := range alertRecords { for _, alertRecord := range alertRecords {
alertRecordID := alertRecord.Id
// If alert exists for record, delete and continue (down alert not sent) // If alert exists for record, delete and continue (down alert not sent)
if _, exists := am.pendingAlerts.Load(alertRecordID); exists { if am.cancelPendingAlert(alertRecord.Id) {
am.alertQueue <- alertTask{ continue
action: "cancel", }
alertRecord: alertRecord, if !alertRecord.GetBool("triggered") {
}
continue continue
} }
// No alert scheduled for this record, send "up" alert
if err := am.sendStatusAlert("up", systemName, alertRecord); err != nil { if err := am.sendStatusAlert("up", systemName, alertRecord); err != nil {
am.hub.Logger().Error("Failed to send alert", "err", err) am.hub.Logger().Error("Failed to send alert", "err", err)
} }
} }
} }
// cancelPendingAlert stops the timer and removes the pending alert for the given alert ID. Returns true if a pending alert was found and cancelled.
func (am *AlertManager) cancelPendingAlert(alertID string) bool {
value, loaded := am.pendingAlerts.LoadAndDelete(alertID)
if !loaded {
return false
}
info := value.(*alertInfo)
if info.timer != nil {
info.timer.Stop()
}
return true
}
// processPendingAlert sends a "down" alert if the pending alert has expired and the system is still down.
func (am *AlertManager) processPendingAlert(alertID string) {
value, loaded := am.pendingAlerts.LoadAndDelete(alertID)
if !loaded {
return
}
info := value.(*alertInfo)
if info.alertRecord.GetBool("triggered") {
return
}
if err := am.sendStatusAlert("down", info.systemName, info.alertRecord); err != nil {
am.hub.Logger().Error("Failed to send alert", "err", err)
}
}
// sendStatusAlert sends a status alert ("up" or "down") to the users associated with the alert records. // sendStatusAlert sends a status alert ("up" or "down") to the users associated with the alert records.
func (am *AlertManager) sendStatusAlert(alertStatus string, systemName string, alertRecord *core.Record) error { func (am *AlertManager) sendStatusAlert(alertStatus string, systemName string, alertRecord *core.Record) error {
switch alertStatus { switch alertStatus {
@@ -174,8 +174,8 @@ func (am *AlertManager) sendStatusAlert(alertStatus string, systemName string, a
}) })
} }
// resolveStatusAlerts resolves any status alerts that weren't resolved // resolveStatusAlerts resolves any triggered status alerts that weren't resolved
// when system came up (https://github.com/henrygd/beszel/issues/1052) // when system came up (https://github.com/henrygd/beszel/issues/1052).
func resolveStatusAlerts(app core.App) error { func resolveStatusAlerts(app core.App) error {
db := app.DB() db := app.DB()
// Find all active status alerts where the system is actually up // Find all active status alerts where the system is actually up
@@ -205,3 +205,36 @@ func resolveStatusAlerts(app core.App) error {
} }
return nil return nil
} }
// restorePendingStatusAlerts re-queues untriggered status alerts for systems that
// are still down after a hub restart. This rebuilds the lost in-memory timer state.
func (am *AlertManager) restorePendingStatusAlerts() error {
type pendingStatusAlert struct {
AlertID string `db:"alert_id"`
SystemName string `db:"system_name"`
}
var pending []pendingStatusAlert
err := am.hub.DB().NewQuery(`
SELECT a.id AS alert_id, s.name AS system_name
FROM alerts a
JOIN systems s ON a.system = s.id
WHERE a.name = 'Status'
AND a.triggered = false
AND s.status = 'down'
`).All(&pending)
if err != nil {
return err
}
for _, item := range pending {
alertRecord, err := am.hub.FindRecordById("alerts", item.AlertID)
if err != nil {
return err
}
min := max(1, alertRecord.GetInt("min"))
am.schedulePendingStatusAlert(item.SystemName, alertRecord, time.Duration(min)*time.Minute)
}
return nil
}

View File

@@ -0,0 +1,628 @@
//go:build testing
package alerts_test
import (
"testing"
"testing/synctest"
"time"
"github.com/henrygd/beszel/internal/alerts"
beszelTests "github.com/henrygd/beszel/internal/tests"
"github.com/pocketbase/dbx"
"github.com/pocketbase/pocketbase/core"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestStatusAlerts(t *testing.T) {
synctest.Test(t, func(t *testing.T) {
hub, user := beszelTests.GetHubWithUser(t)
defer hub.Cleanup()
systems, err := beszelTests.CreateSystems(hub, 4, user.Id, "paused")
assert.NoError(t, err)
var alerts []*core.Record
for i, system := range systems {
alert, err := beszelTests.CreateRecord(hub, "alerts", map[string]any{
"name": "Status",
"system": system.Id,
"user": user.Id,
"min": i + 1,
})
assert.NoError(t, err)
alerts = append(alerts, alert)
}
time.Sleep(10 * time.Millisecond)
for _, alert := range alerts {
assert.False(t, alert.GetBool("triggered"), "Alert should not be triggered immediately")
}
if hub.TestMailer.TotalSend() != 0 {
assert.Zero(t, hub.TestMailer.TotalSend(), "Expected 0 messages, got %d", hub.TestMailer.TotalSend())
}
for _, system := range systems {
assert.EqualValues(t, "paused", system.GetString("status"), "System should be paused")
}
for _, system := range systems {
system.Set("status", "up")
err = hub.SaveNoValidate(system)
assert.NoError(t, err)
}
time.Sleep(time.Second)
assert.EqualValues(t, 0, hub.GetPendingAlertsCount(), "should have 0 alerts in the pendingAlerts map")
for _, system := range systems {
system.Set("status", "down")
err = hub.SaveNoValidate(system)
assert.NoError(t, err)
}
// after 30 seconds, should have 4 alerts in the pendingAlerts map, no triggered alerts
time.Sleep(time.Second * 30)
assert.EqualValues(t, 4, hub.GetPendingAlertsCount(), "should have 4 alerts in the pendingAlerts map")
triggeredCount, err := hub.CountRecords("alerts", dbx.HashExp{"triggered": true})
assert.NoError(t, err)
assert.EqualValues(t, 0, triggeredCount, "should have 0 alert triggered")
assert.EqualValues(t, 0, hub.TestMailer.TotalSend(), "should have 0 messages sent")
// after 1:30 seconds, should have 1 triggered alert and 3 pending alerts
time.Sleep(time.Second * 60)
assert.EqualValues(t, 3, hub.GetPendingAlertsCount(), "should have 3 alerts in the pendingAlerts map")
triggeredCount, err = hub.CountRecords("alerts", dbx.HashExp{"triggered": true})
assert.NoError(t, err)
assert.EqualValues(t, 1, triggeredCount, "should have 1 alert triggered")
assert.EqualValues(t, 1, hub.TestMailer.TotalSend(), "should have 1 messages sent")
// after 2:30 seconds, should have 2 triggered alerts and 2 pending alerts
time.Sleep(time.Second * 60)
assert.EqualValues(t, 2, hub.GetPendingAlertsCount(), "should have 2 alerts in the pendingAlerts map")
triggeredCount, err = hub.CountRecords("alerts", dbx.HashExp{"triggered": true})
assert.NoError(t, err)
assert.EqualValues(t, 2, triggeredCount, "should have 2 alert triggered")
assert.EqualValues(t, 2, hub.TestMailer.TotalSend(), "should have 2 messages sent")
// now we will bring the remaning systems back up
for _, system := range systems {
system.Set("status", "up")
err = hub.SaveNoValidate(system)
assert.NoError(t, err)
}
time.Sleep(time.Second)
// should have 0 alerts in the pendingAlerts map and 0 alerts triggered
assert.EqualValues(t, 0, hub.GetPendingAlertsCount(), "should have 0 alerts in the pendingAlerts map")
triggeredCount, err = hub.CountRecords("alerts", dbx.HashExp{"triggered": true})
assert.NoError(t, err)
assert.Zero(t, triggeredCount, "should have 0 alert triggered")
// 4 messages sent, 2 down alerts and 2 up alerts for first 2 systems
assert.EqualValues(t, 4, hub.TestMailer.TotalSend(), "should have 4 messages sent")
})
}
func TestStatusAlertRecoveryBeforeDeadline(t *testing.T) {
hub, user := beszelTests.GetHubWithUser(t)
defer hub.Cleanup()
// Ensure user settings have an email
userSettings, _ := hub.FindFirstRecordByFilter("user_settings", "user={:user}", map[string]any{"user": user.Id})
userSettings.Set("settings", `{"emails":["test@example.com"],"webhooks":[]}`)
hub.Save(userSettings)
// Initial email count
initialEmailCount := hub.TestMailer.TotalSend()
systemCollection, _ := hub.FindCollectionByNameOrId("systems")
system := core.NewRecord(systemCollection)
system.Set("name", "test-system")
system.Set("status", "up")
system.Set("host", "127.0.0.1")
system.Set("users", []string{user.Id})
hub.Save(system)
alertCollection, _ := hub.FindCollectionByNameOrId("alerts")
alert := core.NewRecord(alertCollection)
alert.Set("user", user.Id)
alert.Set("system", system.Id)
alert.Set("name", "Status")
alert.Set("triggered", false)
alert.Set("min", 1)
hub.Save(alert)
am := hub.AlertManager
// 1. System goes down
am.HandleStatusAlerts("down", system)
assert.Equal(t, 1, am.GetPendingAlertsCount(), "Alert should be scheduled")
// 2. System goes up BEFORE delay expires
// Triggering HandleStatusAlerts("up") SHOULD NOT send an alert.
am.HandleStatusAlerts("up", system)
assert.Equal(t, 0, am.GetPendingAlertsCount(), "Alert should be canceled if system recovers before delay expires")
// Verify that NO email was sent.
assert.Equal(t, initialEmailCount, hub.TestMailer.TotalSend(), "Recovery notification should not be sent if system never went down")
}
func TestStatusAlertNormalRecovery(t *testing.T) {
hub, user := beszelTests.GetHubWithUser(t)
defer hub.Cleanup()
// Ensure user settings have an email
userSettings, _ := hub.FindFirstRecordByFilter("user_settings", "user={:user}", map[string]any{"user": user.Id})
userSettings.Set("settings", `{"emails":["test@example.com"],"webhooks":[]}`)
hub.Save(userSettings)
systemCollection, _ := hub.FindCollectionByNameOrId("systems")
system := core.NewRecord(systemCollection)
system.Set("name", "test-system")
system.Set("status", "up")
system.Set("host", "127.0.0.1")
system.Set("users", []string{user.Id})
hub.Save(system)
alertCollection, _ := hub.FindCollectionByNameOrId("alerts")
alert := core.NewRecord(alertCollection)
alert.Set("user", user.Id)
alert.Set("system", system.Id)
alert.Set("name", "Status")
alert.Set("triggered", true) // System was confirmed DOWN
hub.Save(alert)
am := hub.AlertManager
initialEmailCount := hub.TestMailer.TotalSend()
// System goes up
am.HandleStatusAlerts("up", system)
// Verify that an email WAS sent (normal recovery).
assert.Equal(t, initialEmailCount+1, hub.TestMailer.TotalSend(), "Recovery notification should be sent if system was triggered as down")
}
func TestHandleStatusAlertsDoesNotSendRecoveryWhileDownIsOnlyPending(t *testing.T) {
hub, user := beszelTests.GetHubWithUser(t)
defer hub.Cleanup()
userSettings, err := hub.FindFirstRecordByFilter("user_settings", "user={:user}", map[string]any{"user": user.Id})
require.NoError(t, err)
userSettings.Set("settings", `{"emails":["test@example.com"],"webhooks":[]}`)
require.NoError(t, hub.Save(userSettings))
systemCollection, err := hub.FindCollectionByNameOrId("systems")
require.NoError(t, err)
system := core.NewRecord(systemCollection)
system.Set("name", "test-system")
system.Set("status", "up")
system.Set("host", "127.0.0.1")
system.Set("users", []string{user.Id})
require.NoError(t, hub.Save(system))
alertCollection, err := hub.FindCollectionByNameOrId("alerts")
require.NoError(t, err)
alert := core.NewRecord(alertCollection)
alert.Set("user", user.Id)
alert.Set("system", system.Id)
alert.Set("name", "Status")
alert.Set("triggered", false)
alert.Set("min", 1)
require.NoError(t, hub.Save(alert))
initialEmailCount := hub.TestMailer.TotalSend()
am := alerts.NewTestAlertManagerWithoutWorker(hub)
require.NoError(t, am.HandleStatusAlerts("down", system))
assert.Equal(t, 1, am.GetPendingAlertsCount(), "down transition should register a pending alert immediately")
require.NoError(t, am.HandleStatusAlerts("up", system))
assert.Zero(t, am.GetPendingAlertsCount(), "recovery should cancel the pending down alert")
assert.Equal(t, initialEmailCount, hub.TestMailer.TotalSend(), "recovery notification should not be sent before a down alert triggers")
alertRecord, err := hub.FindRecordById("alerts", alert.Id)
require.NoError(t, err)
assert.False(t, alertRecord.GetBool("triggered"), "alert should remain untriggered when downtime never matured")
}
func TestStatusAlertTimerCancellationPreventsBoundaryDelivery(t *testing.T) {
synctest.Test(t, func(t *testing.T) {
hub, user := beszelTests.GetHubWithUser(t)
defer hub.Cleanup()
userSettings, err := hub.FindFirstRecordByFilter("user_settings", "user={:user}", map[string]any{"user": user.Id})
require.NoError(t, err)
userSettings.Set("settings", `{"emails":["test@example.com"],"webhooks":[]}`)
require.NoError(t, hub.Save(userSettings))
systemCollection, err := hub.FindCollectionByNameOrId("systems")
require.NoError(t, err)
system := core.NewRecord(systemCollection)
system.Set("name", "test-system")
system.Set("status", "up")
system.Set("host", "127.0.0.1")
system.Set("users", []string{user.Id})
require.NoError(t, hub.Save(system))
alertCollection, err := hub.FindCollectionByNameOrId("alerts")
require.NoError(t, err)
alert := core.NewRecord(alertCollection)
alert.Set("user", user.Id)
alert.Set("system", system.Id)
alert.Set("name", "Status")
alert.Set("triggered", false)
alert.Set("min", 1)
require.NoError(t, hub.Save(alert))
initialEmailCount := hub.TestMailer.TotalSend()
am := alerts.NewTestAlertManagerWithoutWorker(hub)
require.NoError(t, am.HandleStatusAlerts("down", system))
assert.Equal(t, 1, am.GetPendingAlertsCount(), "down transition should register a pending alert immediately")
require.True(t, am.ResetPendingAlertTimer(alert.Id, 25*time.Millisecond), "test should shorten the pending alert timer")
time.Sleep(10 * time.Millisecond)
require.NoError(t, am.HandleStatusAlerts("up", system))
assert.Zero(t, am.GetPendingAlertsCount(), "recovery should remove the pending alert before the timer callback runs")
time.Sleep(40 * time.Millisecond)
assert.Equal(t, initialEmailCount, hub.TestMailer.TotalSend(), "timer callback should not deliver after recovery cancels the pending alert")
alertRecord, err := hub.FindRecordById("alerts", alert.Id)
require.NoError(t, err)
assert.False(t, alertRecord.GetBool("triggered"), "alert should remain untriggered when cancellation wins the timer race")
time.Sleep(time.Minute)
synctest.Wait()
})
}
func TestStatusAlertDownFiresAfterDelayExpires(t *testing.T) {
hub, user := beszelTests.GetHubWithUser(t)
defer hub.Cleanup()
userSettings, err := hub.FindFirstRecordByFilter("user_settings", "user={:user}", map[string]any{"user": user.Id})
require.NoError(t, err)
userSettings.Set("settings", `{"emails":["test@example.com"],"webhooks":[]}`)
require.NoError(t, hub.Save(userSettings))
systemCollection, err := hub.FindCollectionByNameOrId("systems")
require.NoError(t, err)
system := core.NewRecord(systemCollection)
system.Set("name", "test-system")
system.Set("status", "up")
system.Set("host", "127.0.0.1")
system.Set("users", []string{user.Id})
require.NoError(t, hub.Save(system))
alertCollection, err := hub.FindCollectionByNameOrId("alerts")
require.NoError(t, err)
alert := core.NewRecord(alertCollection)
alert.Set("user", user.Id)
alert.Set("system", system.Id)
alert.Set("name", "Status")
alert.Set("triggered", false)
alert.Set("min", 1)
require.NoError(t, hub.Save(alert))
initialEmailCount := hub.TestMailer.TotalSend()
am := alerts.NewTestAlertManagerWithoutWorker(hub)
require.NoError(t, am.HandleStatusAlerts("down", system))
assert.Equal(t, 1, am.GetPendingAlertsCount(), "alert should be pending after system goes down")
// Expire the pending alert and process it
am.ForceExpirePendingAlerts()
processed, err := am.ProcessPendingAlerts()
require.NoError(t, err)
assert.Len(t, processed, 1, "one alert should have been processed")
assert.Equal(t, 0, am.GetPendingAlertsCount(), "pending alert should be consumed after processing")
// Verify down email was sent
assert.Equal(t, initialEmailCount+1, hub.TestMailer.TotalSend(), "down notification should be sent after delay expires")
// Verify triggered flag is set in the DB
alertRecord, err := hub.FindRecordById("alerts", alert.Id)
require.NoError(t, err)
assert.True(t, alertRecord.GetBool("triggered"), "alert should be marked triggered after downtime matures")
}
func TestStatusAlertDuplicateDownCallIsIdempotent(t *testing.T) {
hub, user := beszelTests.GetHubWithUser(t)
defer hub.Cleanup()
userSettings, err := hub.FindFirstRecordByFilter("user_settings", "user={:user}", map[string]any{"user": user.Id})
require.NoError(t, err)
userSettings.Set("settings", `{"emails":["test@example.com"],"webhooks":[]}`)
require.NoError(t, hub.Save(userSettings))
systemCollection, err := hub.FindCollectionByNameOrId("systems")
require.NoError(t, err)
system := core.NewRecord(systemCollection)
system.Set("name", "test-system")
system.Set("status", "up")
system.Set("host", "127.0.0.1")
system.Set("users", []string{user.Id})
require.NoError(t, hub.Save(system))
alertCollection, err := hub.FindCollectionByNameOrId("alerts")
require.NoError(t, err)
alert := core.NewRecord(alertCollection)
alert.Set("user", user.Id)
alert.Set("system", system.Id)
alert.Set("name", "Status")
alert.Set("triggered", false)
alert.Set("min", 5)
require.NoError(t, hub.Save(alert))
am := alerts.NewTestAlertManagerWithoutWorker(hub)
require.NoError(t, am.HandleStatusAlerts("down", system))
require.NoError(t, am.HandleStatusAlerts("down", system))
require.NoError(t, am.HandleStatusAlerts("down", system))
assert.Equal(t, 1, am.GetPendingAlertsCount(), "repeated down calls should not schedule duplicate pending alerts")
}
func TestStatusAlertNoAlertRecord(t *testing.T) {
hub, user := beszelTests.GetHubWithUser(t)
defer hub.Cleanup()
systemCollection, err := hub.FindCollectionByNameOrId("systems")
require.NoError(t, err)
system := core.NewRecord(systemCollection)
system.Set("name", "test-system")
system.Set("status", "up")
system.Set("host", "127.0.0.1")
system.Set("users", []string{user.Id})
require.NoError(t, hub.Save(system))
// No Status alert record created for this system
initialEmailCount := hub.TestMailer.TotalSend()
am := alerts.NewTestAlertManagerWithoutWorker(hub)
require.NoError(t, am.HandleStatusAlerts("down", system))
assert.Equal(t, 0, am.GetPendingAlertsCount(), "no pending alert when no alert record exists")
require.NoError(t, am.HandleStatusAlerts("up", system))
assert.Equal(t, initialEmailCount, hub.TestMailer.TotalSend(), "no email when no alert record exists")
}
func TestRestorePendingStatusAlertsRequeuesDownSystemsAfterRestart(t *testing.T) {
hub, user := beszelTests.GetHubWithUser(t)
defer hub.Cleanup()
userSettings, err := hub.FindFirstRecordByFilter("user_settings", "user={:user}", map[string]any{"user": user.Id})
require.NoError(t, err)
userSettings.Set("settings", `{"emails":["test@example.com"],"webhooks":[]}`)
require.NoError(t, hub.Save(userSettings))
systems, err := beszelTests.CreateSystems(hub, 1, user.Id, "down")
require.NoError(t, err)
system := systems[0]
alertCollection, err := hub.FindCollectionByNameOrId("alerts")
require.NoError(t, err)
alert := core.NewRecord(alertCollection)
alert.Set("user", user.Id)
alert.Set("system", system.Id)
alert.Set("name", "Status")
alert.Set("triggered", false)
alert.Set("min", 1)
require.NoError(t, hub.Save(alert))
initialEmailCount := hub.TestMailer.TotalSend()
am := alerts.NewTestAlertManagerWithoutWorker(hub)
require.NoError(t, am.RestorePendingStatusAlerts())
assert.Equal(t, 1, am.GetPendingAlertsCount(), "startup restore should requeue a pending down alert for a system still marked down")
am.ForceExpirePendingAlerts()
processed, err := am.ProcessPendingAlerts()
require.NoError(t, err)
assert.Len(t, processed, 1, "restored pending alert should be processable after the delay expires")
assert.Equal(t, initialEmailCount+1, hub.TestMailer.TotalSend(), "restored pending alert should send the down notification")
alertRecord, err := hub.FindRecordById("alerts", alert.Id)
require.NoError(t, err)
assert.True(t, alertRecord.GetBool("triggered"), "restored pending alert should mark the alert as triggered once delivered")
}
func TestRestorePendingStatusAlertsSkipsNonDownOrAlreadyTriggeredAlerts(t *testing.T) {
hub, user := beszelTests.GetHubWithUser(t)
defer hub.Cleanup()
systemsDown, err := beszelTests.CreateSystems(hub, 2, user.Id, "down")
require.NoError(t, err)
systemDownPending := systemsDown[0]
systemDownTriggered := systemsDown[1]
systemUp, err := beszelTests.CreateRecord(hub, "systems", map[string]any{
"name": "up-system",
"users": []string{user.Id},
"host": "127.0.0.2",
"status": "up",
})
require.NoError(t, err)
_, err = beszelTests.CreateRecord(hub, "alerts", map[string]any{
"name": "Status",
"system": systemDownPending.Id,
"user": user.Id,
"min": 1,
"triggered": false,
})
require.NoError(t, err)
_, err = beszelTests.CreateRecord(hub, "alerts", map[string]any{
"name": "Status",
"system": systemUp.Id,
"user": user.Id,
"min": 1,
"triggered": false,
})
require.NoError(t, err)
_, err = beszelTests.CreateRecord(hub, "alerts", map[string]any{
"name": "Status",
"system": systemDownTriggered.Id,
"user": user.Id,
"min": 1,
"triggered": true,
})
require.NoError(t, err)
am := alerts.NewTestAlertManagerWithoutWorker(hub)
require.NoError(t, am.RestorePendingStatusAlerts())
assert.Equal(t, 1, am.GetPendingAlertsCount(), "only untriggered alerts for currently down systems should be restored")
}
func TestRestorePendingStatusAlertsIsIdempotent(t *testing.T) {
hub, user := beszelTests.GetHubWithUser(t)
defer hub.Cleanup()
systems, err := beszelTests.CreateSystems(hub, 1, user.Id, "down")
require.NoError(t, err)
system := systems[0]
_, err = beszelTests.CreateRecord(hub, "alerts", map[string]any{
"name": "Status",
"system": system.Id,
"user": user.Id,
"min": 1,
"triggered": false,
})
require.NoError(t, err)
am := alerts.NewTestAlertManagerWithoutWorker(hub)
require.NoError(t, am.RestorePendingStatusAlerts())
require.NoError(t, am.RestorePendingStatusAlerts())
assert.Equal(t, 1, am.GetPendingAlertsCount(), "restoring twice should not create duplicate pending alerts")
am.ForceExpirePendingAlerts()
processed, err := am.ProcessPendingAlerts()
require.NoError(t, err)
assert.Len(t, processed, 1, "restored alert should still be processable exactly once")
assert.Zero(t, am.GetPendingAlertsCount(), "processing the restored alert should empty the pending map")
}
func TestResolveStatusAlertsFixesStaleTriggered(t *testing.T) {
hub, user := beszelTests.GetHubWithUser(t)
defer hub.Cleanup()
// CreateSystems uses SaveNoValidate after initial save to bypass the
// onRecordCreate hook that forces status = "pending".
systems, err := beszelTests.CreateSystems(hub, 1, user.Id, "up")
require.NoError(t, err)
system := systems[0]
alertCollection, err := hub.FindCollectionByNameOrId("alerts")
require.NoError(t, err)
alert := core.NewRecord(alertCollection)
alert.Set("user", user.Id)
alert.Set("system", system.Id)
alert.Set("name", "Status")
alert.Set("triggered", true) // Stale: system is up but alert still says triggered
require.NoError(t, hub.Save(alert))
// resolveStatusAlerts should clear the stale triggered flag
require.NoError(t, alerts.ResolveStatusAlerts(hub))
alertRecord, err := hub.FindRecordById("alerts", alert.Id)
require.NoError(t, err)
assert.False(t, alertRecord.GetBool("triggered"), "stale triggered flag should be cleared when system is up")
}
func TestResolveStatusAlerts(t *testing.T) {
hub, user := beszelTests.GetHubWithUser(t)
defer hub.Cleanup()
// Create a systemUp
systemUp, err := beszelTests.CreateRecord(hub, "systems", map[string]any{
"name": "test-system",
"users": []string{user.Id},
"host": "127.0.0.1",
"status": "up",
})
assert.NoError(t, err)
systemDown, err := beszelTests.CreateRecord(hub, "systems", map[string]any{
"name": "test-system-2",
"users": []string{user.Id},
"host": "127.0.0.2",
"status": "up",
})
assert.NoError(t, err)
// Create a status alertUp for the system
alertUp, err := beszelTests.CreateRecord(hub, "alerts", map[string]any{
"name": "Status",
"system": systemUp.Id,
"user": user.Id,
"min": 1,
})
assert.NoError(t, err)
alertDown, err := beszelTests.CreateRecord(hub, "alerts", map[string]any{
"name": "Status",
"system": systemDown.Id,
"user": user.Id,
"min": 1,
})
assert.NoError(t, err)
// Verify alert is not triggered initially
assert.False(t, alertUp.GetBool("triggered"), "Alert should not be triggered initially")
// Set the system to 'up' (this should not trigger the alert)
systemUp.Set("status", "up")
err = hub.SaveNoValidate(systemUp)
assert.NoError(t, err)
systemDown.Set("status", "down")
err = hub.SaveNoValidate(systemDown)
assert.NoError(t, err)
// Wait a moment for any processing
time.Sleep(10 * time.Millisecond)
// Verify alertUp is still not triggered after setting system to up
alertUp, err = hub.FindFirstRecordByFilter("alerts", "id={:id}", dbx.Params{"id": alertUp.Id})
assert.NoError(t, err)
assert.False(t, alertUp.GetBool("triggered"), "Alert should not be triggered when system is up")
// Manually set both alerts triggered to true
alertUp.Set("triggered", true)
err = hub.SaveNoValidate(alertUp)
assert.NoError(t, err)
alertDown.Set("triggered", true)
err = hub.SaveNoValidate(alertDown)
assert.NoError(t, err)
// Verify we have exactly one alert with triggered true
triggeredCount, err := hub.CountRecords("alerts", dbx.HashExp{"triggered": true})
assert.NoError(t, err)
assert.EqualValues(t, 2, triggeredCount, "Should have exactly two alerts with triggered true")
// Verify the specific alertUp is triggered
alertUp, err = hub.FindFirstRecordByFilter("alerts", "id={:id}", dbx.Params{"id": alertUp.Id})
assert.NoError(t, err)
assert.True(t, alertUp.GetBool("triggered"), "Alert should be triggered")
// Verify we have two unresolved alert history records
alertHistoryCount, err := hub.CountRecords("alerts_history", dbx.HashExp{"resolved": ""})
assert.NoError(t, err)
assert.EqualValues(t, 2, alertHistoryCount, "Should have exactly two unresolved alert history records")
err = alerts.ResolveStatusAlerts(hub)
assert.NoError(t, err)
// Verify alertUp is not triggered after resolving
alertUp, err = hub.FindFirstRecordByFilter("alerts", "id={:id}", dbx.Params{"id": alertUp.Id})
assert.NoError(t, err)
assert.False(t, alertUp.GetBool("triggered"), "Alert should not be triggered after resolving")
// Verify alertDown is still triggered
alertDown, err = hub.FindFirstRecordByFilter("alerts", "id={:id}", dbx.Params{"id": alertDown.Id})
assert.NoError(t, err)
assert.True(t, alertDown.GetBool("triggered"), "Alert should still be triggered after resolving")
// Verify we have one unresolved alert history record
alertHistoryCount, err = hub.CountRecords("alerts_history", dbx.HashExp{"resolved": ""})
assert.NoError(t, err)
assert.EqualValues(t, 1, alertHistoryCount, "Should have exactly one unresolved alert history record")
}

View File

@@ -12,7 +12,6 @@ import (
"testing/synctest" "testing/synctest"
"time" "time"
"github.com/henrygd/beszel/internal/alerts"
beszelTests "github.com/henrygd/beszel/internal/tests" beszelTests "github.com/henrygd/beszel/internal/tests"
"github.com/pocketbase/dbx" "github.com/pocketbase/dbx"
@@ -369,87 +368,6 @@ func TestUserAlertsApi(t *testing.T) {
} }
} }
func TestStatusAlerts(t *testing.T) {
synctest.Test(t, func(t *testing.T) {
hub, user := beszelTests.GetHubWithUser(t)
defer hub.Cleanup()
systems, err := beszelTests.CreateSystems(hub, 4, user.Id, "paused")
assert.NoError(t, err)
var alerts []*core.Record
for i, system := range systems {
alert, err := beszelTests.CreateRecord(hub, "alerts", map[string]any{
"name": "Status",
"system": system.Id,
"user": user.Id,
"min": i + 1,
})
assert.NoError(t, err)
alerts = append(alerts, alert)
}
time.Sleep(10 * time.Millisecond)
for _, alert := range alerts {
assert.False(t, alert.GetBool("triggered"), "Alert should not be triggered immediately")
}
if hub.TestMailer.TotalSend() != 0 {
assert.Zero(t, hub.TestMailer.TotalSend(), "Expected 0 messages, got %d", hub.TestMailer.TotalSend())
}
for _, system := range systems {
assert.EqualValues(t, "paused", system.GetString("status"), "System should be paused")
}
for _, system := range systems {
system.Set("status", "up")
err = hub.SaveNoValidate(system)
assert.NoError(t, err)
}
time.Sleep(time.Second)
assert.EqualValues(t, 0, hub.GetPendingAlertsCount(), "should have 0 alerts in the pendingAlerts map")
for _, system := range systems {
system.Set("status", "down")
err = hub.SaveNoValidate(system)
assert.NoError(t, err)
}
// after 30 seconds, should have 4 alerts in the pendingAlerts map, no triggered alerts
time.Sleep(time.Second * 30)
assert.EqualValues(t, 4, hub.GetPendingAlertsCount(), "should have 4 alerts in the pendingAlerts map")
triggeredCount, err := hub.CountRecords("alerts", dbx.HashExp{"triggered": true})
assert.NoError(t, err)
assert.EqualValues(t, 0, triggeredCount, "should have 0 alert triggered")
assert.EqualValues(t, 0, hub.TestMailer.TotalSend(), "should have 0 messages sent")
// after 1:30 seconds, should have 1 triggered alert and 3 pending alerts
time.Sleep(time.Second * 60)
assert.EqualValues(t, 3, hub.GetPendingAlertsCount(), "should have 3 alerts in the pendingAlerts map")
triggeredCount, err = hub.CountRecords("alerts", dbx.HashExp{"triggered": true})
assert.NoError(t, err)
assert.EqualValues(t, 1, triggeredCount, "should have 1 alert triggered")
assert.EqualValues(t, 1, hub.TestMailer.TotalSend(), "should have 1 messages sent")
// after 2:30 seconds, should have 2 triggered alerts and 2 pending alerts
time.Sleep(time.Second * 60)
assert.EqualValues(t, 2, hub.GetPendingAlertsCount(), "should have 2 alerts in the pendingAlerts map")
triggeredCount, err = hub.CountRecords("alerts", dbx.HashExp{"triggered": true})
assert.NoError(t, err)
assert.EqualValues(t, 2, triggeredCount, "should have 2 alert triggered")
assert.EqualValues(t, 2, hub.TestMailer.TotalSend(), "should have 2 messages sent")
// now we will bring the remaning systems back up
for _, system := range systems {
system.Set("status", "up")
err = hub.SaveNoValidate(system)
assert.NoError(t, err)
}
time.Sleep(time.Second)
// should have 0 alerts in the pendingAlerts map and 0 alerts triggered
assert.EqualValues(t, 0, hub.GetPendingAlertsCount(), "should have 0 alerts in the pendingAlerts map")
triggeredCount, err = hub.CountRecords("alerts", dbx.HashExp{"triggered": true})
assert.NoError(t, err)
assert.Zero(t, triggeredCount, "should have 0 alert triggered")
// 4 messages sent, 2 down alerts and 2 up alerts for first 2 systems
assert.EqualValues(t, 4, hub.TestMailer.TotalSend(), "should have 4 messages sent")
})
}
func TestAlertsHistory(t *testing.T) { func TestAlertsHistory(t *testing.T) {
synctest.Test(t, func(t *testing.T) { synctest.Test(t, func(t *testing.T) {
hub, user := beszelTests.GetHubWithUser(t) hub, user := beszelTests.GetHubWithUser(t)
@@ -578,102 +496,3 @@ func TestAlertsHistory(t *testing.T) {
assert.EqualValues(t, 2, totalHistoryCount, "Should have 2 total alert history records") assert.EqualValues(t, 2, totalHistoryCount, "Should have 2 total alert history records")
}) })
} }
func TestResolveStatusAlerts(t *testing.T) {
hub, user := beszelTests.GetHubWithUser(t)
defer hub.Cleanup()
// Create a systemUp
systemUp, err := beszelTests.CreateRecord(hub, "systems", map[string]any{
"name": "test-system",
"users": []string{user.Id},
"host": "127.0.0.1",
"status": "up",
})
assert.NoError(t, err)
systemDown, err := beszelTests.CreateRecord(hub, "systems", map[string]any{
"name": "test-system-2",
"users": []string{user.Id},
"host": "127.0.0.2",
"status": "up",
})
assert.NoError(t, err)
// Create a status alertUp for the system
alertUp, err := beszelTests.CreateRecord(hub, "alerts", map[string]any{
"name": "Status",
"system": systemUp.Id,
"user": user.Id,
"min": 1,
})
assert.NoError(t, err)
alertDown, err := beszelTests.CreateRecord(hub, "alerts", map[string]any{
"name": "Status",
"system": systemDown.Id,
"user": user.Id,
"min": 1,
})
assert.NoError(t, err)
// Verify alert is not triggered initially
assert.False(t, alertUp.GetBool("triggered"), "Alert should not be triggered initially")
// Set the system to 'up' (this should not trigger the alert)
systemUp.Set("status", "up")
err = hub.SaveNoValidate(systemUp)
assert.NoError(t, err)
systemDown.Set("status", "down")
err = hub.SaveNoValidate(systemDown)
assert.NoError(t, err)
// Wait a moment for any processing
time.Sleep(10 * time.Millisecond)
// Verify alertUp is still not triggered after setting system to up
alertUp, err = hub.FindFirstRecordByFilter("alerts", "id={:id}", dbx.Params{"id": alertUp.Id})
assert.NoError(t, err)
assert.False(t, alertUp.GetBool("triggered"), "Alert should not be triggered when system is up")
// Manually set both alerts triggered to true
alertUp.Set("triggered", true)
err = hub.SaveNoValidate(alertUp)
assert.NoError(t, err)
alertDown.Set("triggered", true)
err = hub.SaveNoValidate(alertDown)
assert.NoError(t, err)
// Verify we have exactly one alert with triggered true
triggeredCount, err := hub.CountRecords("alerts", dbx.HashExp{"triggered": true})
assert.NoError(t, err)
assert.EqualValues(t, 2, triggeredCount, "Should have exactly two alerts with triggered true")
// Verify the specific alertUp is triggered
alertUp, err = hub.FindFirstRecordByFilter("alerts", "id={:id}", dbx.Params{"id": alertUp.Id})
assert.NoError(t, err)
assert.True(t, alertUp.GetBool("triggered"), "Alert should be triggered")
// Verify we have two unresolved alert history records
alertHistoryCount, err := hub.CountRecords("alerts_history", dbx.HashExp{"resolved": ""})
assert.NoError(t, err)
assert.EqualValues(t, 2, alertHistoryCount, "Should have exactly two unresolved alert history records")
err = alerts.ResolveStatusAlerts(hub)
assert.NoError(t, err)
// Verify alertUp is not triggered after resolving
alertUp, err = hub.FindFirstRecordByFilter("alerts", "id={:id}", dbx.Params{"id": alertUp.Id})
assert.NoError(t, err)
assert.False(t, alertUp.GetBool("triggered"), "Alert should not be triggered after resolving")
// Verify alertDown is still triggered
alertDown, err = hub.FindFirstRecordByFilter("alerts", "id={:id}", dbx.Params{"id": alertDown.Id})
assert.NoError(t, err)
assert.True(t, alertDown.GetBool("triggered"), "Alert should still be triggered after resolving")
// Verify we have one unresolved alert history record
alertHistoryCount, err = hub.CountRecords("alerts_history", dbx.HashExp{"resolved": ""})
assert.NoError(t, err)
assert.EqualValues(t, 1, alertHistoryCount, "Should have exactly one unresolved alert history record")
}

View File

@@ -9,6 +9,12 @@ import (
"github.com/pocketbase/pocketbase/core" "github.com/pocketbase/pocketbase/core"
) )
func NewTestAlertManagerWithoutWorker(app hubLike) *AlertManager {
return &AlertManager{
hub: app,
}
}
func (am *AlertManager) GetAlertManager() *AlertManager { func (am *AlertManager) GetAlertManager() *AlertManager {
return am return am
} }
@@ -34,12 +40,11 @@ func (am *AlertManager) ProcessPendingAlerts() ([]*core.Record, error) {
am.pendingAlerts.Range(func(key, value any) bool { am.pendingAlerts.Range(func(key, value any) bool {
info := value.(*alertInfo) info := value.(*alertInfo)
if now.After(info.expireTime) { if now.After(info.expireTime) {
// Downtime delay has passed, process alert if info.timer != nil {
if err := am.sendStatusAlert("down", info.systemName, info.alertRecord); err != nil { info.timer.Stop()
lastErr = err
} }
am.processPendingAlert(key.(string))
processedAlerts = append(processedAlerts, info.alertRecord) processedAlerts = append(processedAlerts, info.alertRecord)
am.pendingAlerts.Delete(key)
} }
return true return true
}) })
@@ -56,6 +61,27 @@ func (am *AlertManager) ForceExpirePendingAlerts() {
}) })
} }
func (am *AlertManager) ResetPendingAlertTimer(alertID string, delay time.Duration) bool {
value, loaded := am.pendingAlerts.Load(alertID)
if !loaded {
return false
}
info := value.(*alertInfo)
if info.timer != nil {
info.timer.Stop()
}
info.expireTime = time.Now().Add(delay)
info.timer = time.AfterFunc(delay, func() {
am.processPendingAlert(alertID)
})
return true
}
func ResolveStatusAlerts(app core.App) error { func ResolveStatusAlerts(app core.App) error {
return resolveStatusAlerts(app) return resolveStatusAlerts(app)
} }
func (am *AlertManager) RestorePendingStatusAlerts() error {
return am.restorePendingStatusAlerts()
}

View File

@@ -10,10 +10,19 @@ type ApiInfo struct {
Status string Status string
State string State string
Image string Image string
Health struct {
Status string
// FailingStreak int
}
Ports []struct {
// PrivatePort uint16
PublicPort uint16
IP string
// Type string
}
// ImageID string // ImageID string
// Command string // Command string
// Created int64 // Created int64
// Ports []Port
// SizeRw int64 `json:",omitempty"` // SizeRw int64 `json:",omitempty"`
// SizeRootFs int64 `json:",omitempty"` // SizeRootFs int64 `json:",omitempty"`
// Labels map[string]string // Labels map[string]string
@@ -140,6 +149,7 @@ type Stats struct {
Status string `json:"-" cbor:"6,keyasint"` Status string `json:"-" cbor:"6,keyasint"`
Id string `json:"-" cbor:"7,keyasint"` Id string `json:"-" cbor:"7,keyasint"`
Image string `json:"-" cbor:"8,keyasint"` Image string `json:"-" cbor:"8,keyasint"`
Ports string `json:"-" cbor:"10,keyasint"`
// PrevCpu [2]uint64 `json:"-"` // PrevCpu [2]uint64 `json:"-"`
CpuSystem uint64 `json:"-"` CpuSystem uint64 `json:"-"`
CpuContainer uint64 `json:"-"` CpuContainer uint64 `json:"-"`

View File

@@ -318,10 +318,11 @@ func createContainerRecords(app core.App, data []*container.Stats, systemId stri
valueStrings := make([]string, 0, len(data)) valueStrings := make([]string, 0, len(data))
for i, container := range data { for i, container := range data {
suffix := fmt.Sprintf("%d", i) suffix := fmt.Sprintf("%d", i)
valueStrings = append(valueStrings, fmt.Sprintf("({:id%[1]s}, {:system}, {:name%[1]s}, {:image%[1]s}, {:status%[1]s}, {:health%[1]s}, {:cpu%[1]s}, {:memory%[1]s}, {:net%[1]s}, {:updated})", suffix)) valueStrings = append(valueStrings, fmt.Sprintf("({:id%[1]s}, {:system}, {:name%[1]s}, {:image%[1]s}, {:ports%[1]s}, {:status%[1]s}, {:health%[1]s}, {:cpu%[1]s}, {:memory%[1]s}, {:net%[1]s}, {:updated})", suffix))
params["id"+suffix] = container.Id params["id"+suffix] = container.Id
params["name"+suffix] = container.Name params["name"+suffix] = container.Name
params["image"+suffix] = container.Image params["image"+suffix] = container.Image
params["ports"+suffix] = container.Ports
params["status"+suffix] = container.Status params["status"+suffix] = container.Status
params["health"+suffix] = container.Health params["health"+suffix] = container.Health
params["cpu"+suffix] = container.Cpu params["cpu"+suffix] = container.Cpu
@@ -333,7 +334,7 @@ func createContainerRecords(app core.App, data []*container.Stats, systemId stri
params["net"+suffix] = netBytes params["net"+suffix] = netBytes
} }
queryString := fmt.Sprintf( queryString := fmt.Sprintf(
"INSERT INTO containers (id, system, name, image, status, health, cpu, memory, net, updated) VALUES %s ON CONFLICT(id) DO UPDATE SET system = excluded.system, name = excluded.name, image = excluded.image, status = excluded.status, health = excluded.health, cpu = excluded.cpu, memory = excluded.memory, net = excluded.net, updated = excluded.updated", "INSERT INTO containers (id, system, name, image, ports, status, health, cpu, memory, net, updated) VALUES %s ON CONFLICT(id) DO UPDATE SET system = excluded.system, name = excluded.name, image = excluded.image, ports = excluded.ports, status = excluded.status, health = excluded.health, cpu = excluded.cpu, memory = excluded.memory, net = excluded.net, updated = excluded.updated",
strings.Join(valueStrings, ","), strings.Join(valueStrings, ","),
) )
_, err := app.DB().NewQuery(queryString).Bind(params).Execute() _, err := app.DB().NewQuery(queryString).Bind(params).Execute()

View File

@@ -977,18 +977,6 @@ func init() {
"system": false, "system": false,
"type": "number" "type": "number"
}, },
{
"hidden": false,
"id": "number3332085495",
"max": null,
"min": null,
"name": "updated",
"onlyInt": true,
"presentable": false,
"required": true,
"system": false,
"type": "number"
},
{ {
"autogeneratePattern": "", "autogeneratePattern": "",
"hidden": false, "hidden": false,
@@ -1002,6 +990,32 @@ func init() {
"required": false, "required": false,
"system": false, "system": false,
"type": "text" "type": "text"
},
{
"autogeneratePattern": "",
"hidden": false,
"id": "text2308952269",
"max": 0,
"min": 0,
"name": "ports",
"pattern": "",
"presentable": false,
"primaryKey": false,
"required": false,
"system": false,
"type": "text"
},
{
"hidden": false,
"id": "number3332085495",
"max": null,
"min": null,
"name": "updated",
"onlyInt": true,
"presentable": false,
"required": true,
"system": false,
"type": "number"
} }
], ],
"indexes": [ "indexes": [

View File

@@ -4,7 +4,6 @@ import { cn, decimalString, formatBytes, hourWithSeconds } from "@/lib/utils"
import type { ContainerRecord } from "@/types" import type { ContainerRecord } from "@/types"
import { ContainerHealth, ContainerHealthLabels } from "@/lib/enums" import { ContainerHealth, ContainerHealthLabels } from "@/lib/enums"
import { import {
ArrowUpDownIcon,
ClockIcon, ClockIcon,
ContainerIcon, ContainerIcon,
CpuIcon, CpuIcon,
@@ -13,11 +12,12 @@ import {
ServerIcon, ServerIcon,
ShieldCheckIcon, ShieldCheckIcon,
} from "lucide-react" } from "lucide-react"
import { EthernetIcon, HourglassIcon } from "../ui/icons" import { EthernetIcon, HourglassIcon, SquareArrowRightEnterIcon } from "../ui/icons"
import { Badge } from "../ui/badge" import { Badge } from "../ui/badge"
import { t } from "@lingui/core/macro" import { t } from "@lingui/core/macro"
import { $allSystemsById } from "@/lib/stores" import { $allSystemsById, $longestSystemNameLen } from "@/lib/stores"
import { useStore } from "@nanostores/react" import { useStore } from "@nanostores/react"
import { Tooltip, TooltipContent, TooltipTrigger } from "../ui/tooltip"
// Unit names and their corresponding number of seconds for converting docker status strings // Unit names and their corresponding number of seconds for converting docker status strings
const unitSeconds = [ const unitSeconds = [
@@ -63,7 +63,12 @@ export const containerChartCols: ColumnDef<ContainerRecord>[] = [
header: ({ column }) => <HeaderButton column={column} name={t`System`} Icon={ServerIcon} />, header: ({ column }) => <HeaderButton column={column} name={t`System`} Icon={ServerIcon} />,
cell: ({ getValue }) => { cell: ({ getValue }) => {
const allSystems = useStore($allSystemsById) const allSystems = useStore($allSystemsById)
return <span className="ms-1.5 xl:w-34 block truncate">{allSystems[getValue() as string]?.name ?? ""}</span> const longestName = useStore($longestSystemNameLen)
return (
<div className="ms-1 max-w-40 truncate" style={{ width: `${longestName / 1.05}ch` }}>
{allSystems[getValue() as string]?.name ?? ""}
</div>
)
}, },
}, },
// { // {
@@ -82,7 +87,7 @@ export const containerChartCols: ColumnDef<ContainerRecord>[] = [
header: ({ column }) => <HeaderButton column={column} name={t`CPU`} Icon={CpuIcon} />, header: ({ column }) => <HeaderButton column={column} name={t`CPU`} Icon={CpuIcon} />,
cell: ({ getValue }) => { cell: ({ getValue }) => {
const val = getValue() as number const val = getValue() as number
return <span className="ms-1.5 tabular-nums">{`${decimalString(val, val >= 10 ? 1 : 2)}%`}</span> return <span className="ms-1 tabular-nums">{`${decimalString(val, val >= 10 ? 1 : 2)}%`}</span>
}, },
}, },
{ {
@@ -94,7 +99,7 @@ export const containerChartCols: ColumnDef<ContainerRecord>[] = [
const val = getValue() as number const val = getValue() as number
const formatted = formatBytes(val, false, undefined, true) const formatted = formatBytes(val, false, undefined, true)
return ( return (
<span className="ms-1.5 tabular-nums">{`${decimalString(formatted.value, formatted.value >= 10 ? 1 : 2)} ${formatted.unit}`}</span> <span className="ms-1 tabular-nums">{`${decimalString(formatted.value, formatted.value >= 10 ? 1 : 2)} ${formatted.unit}`}</span>
) )
}, },
}, },
@@ -103,11 +108,12 @@ export const containerChartCols: ColumnDef<ContainerRecord>[] = [
accessorFn: (record) => record.net, accessorFn: (record) => record.net,
invertSorting: true, invertSorting: true,
header: ({ column }) => <HeaderButton column={column} name={t`Net`} Icon={EthernetIcon} />, header: ({ column }) => <HeaderButton column={column} name={t`Net`} Icon={EthernetIcon} />,
minSize: 112,
cell: ({ getValue }) => { cell: ({ getValue }) => {
const val = getValue() as number const val = getValue() as number
const formatted = formatBytes(val, true, undefined, false) const formatted = formatBytes(val, true, undefined, false)
return ( return (
<span className="ms-1.5 tabular-nums">{`${decimalString(formatted.value, formatted.value >= 10 ? 1 : 2)} ${formatted.unit}`}</span> <div className="ms-1 tabular-nums">{`${decimalString(formatted.value, formatted.value >= 10 ? 1 : 2)} ${formatted.unit}`}</div>
) )
}, },
}, },
@@ -116,6 +122,7 @@ export const containerChartCols: ColumnDef<ContainerRecord>[] = [
invertSorting: true, invertSorting: true,
accessorFn: (record) => record.health, accessorFn: (record) => record.health,
header: ({ column }) => <HeaderButton column={column} name={t`Health`} Icon={ShieldCheckIcon} />, header: ({ column }) => <HeaderButton column={column} name={t`Health`} Icon={ShieldCheckIcon} />,
minSize: 121,
cell: ({ getValue }) => { cell: ({ getValue }) => {
const healthValue = getValue() as number const healthValue = getValue() as number
const healthStatus = ContainerHealthLabels[healthValue] || "Unknown" const healthStatus = ContainerHealthLabels[healthValue] || "Unknown"
@@ -134,6 +141,35 @@ export const containerChartCols: ColumnDef<ContainerRecord>[] = [
) )
}, },
}, },
{
id: "ports",
accessorFn: (record) => record.ports || undefined,
header: ({ column }) => (
<HeaderButton
column={column}
name={t({ message: "Ports", context: "Container ports" })}
Icon={SquareArrowRightEnterIcon}
/>
),
sortingFn: (a, b) => getPortValue(a.original.ports) - getPortValue(b.original.ports),
minSize: 147,
cell: ({ getValue }) => {
const val = getValue() as string | undefined
if (!val) {
return <div className="ms-1.5 text-muted-foreground">-</div>
}
const className = "ms-1 w-27 block truncate tabular-nums"
if (val.length > 14) {
return (
<Tooltip>
<TooltipTrigger className={className}>{val}</TooltipTrigger>
<TooltipContent>{val}</TooltipContent>
</Tooltip>
)
}
return <span className={className}>{val}</span>
},
},
{ {
id: "image", id: "image",
sortingFn: (a, b) => a.original.image.localeCompare(b.original.image), sortingFn: (a, b) => a.original.image.localeCompare(b.original.image),
@@ -142,7 +178,12 @@ export const containerChartCols: ColumnDef<ContainerRecord>[] = [
<HeaderButton column={column} name={t({ message: "Image", context: "Docker image" })} Icon={LayersIcon} /> <HeaderButton column={column} name={t({ message: "Image", context: "Docker image" })} Icon={LayersIcon} />
), ),
cell: ({ getValue }) => { cell: ({ getValue }) => {
return <span className="ms-1.5 xl:w-40 block truncate">{getValue() as string}</span> const val = getValue() as string
return (
<div className="ms-1 xl:w-40 truncate" title={val}>
{val}
</div>
)
}, },
}, },
{ {
@@ -152,7 +193,7 @@ export const containerChartCols: ColumnDef<ContainerRecord>[] = [
sortingFn: (a, b) => getStatusValue(a.original.status) - getStatusValue(b.original.status), sortingFn: (a, b) => getStatusValue(a.original.status) - getStatusValue(b.original.status),
header: ({ column }) => <HeaderButton column={column} name={t`Status`} Icon={HourglassIcon} />, header: ({ column }) => <HeaderButton column={column} name={t`Status`} Icon={HourglassIcon} />,
cell: ({ getValue }) => { cell: ({ getValue }) => {
return <span className="ms-1.5 w-25 block truncate">{getValue() as string}</span> return <span className="ms-1 w-25 block truncate">{getValue() as string}</span>
}, },
}, },
{ {
@@ -162,7 +203,7 @@ export const containerChartCols: ColumnDef<ContainerRecord>[] = [
header: ({ column }) => <HeaderButton column={column} name={t`Updated`} Icon={ClockIcon} />, header: ({ column }) => <HeaderButton column={column} name={t`Updated`} Icon={ClockIcon} />,
cell: ({ getValue }) => { cell: ({ getValue }) => {
const timestamp = getValue() as number const timestamp = getValue() as number
return <span className="ms-1.5 tabular-nums">{hourWithSeconds(new Date(timestamp).toISOString())}</span> return <span className="ms-1 tabular-nums">{hourWithSeconds(new Date(timestamp).toISOString())}</span>
}, },
}, },
] ]
@@ -188,7 +229,21 @@ function HeaderButton({
> >
{Icon && <Icon className="size-4" />} {Icon && <Icon className="size-4" />}
{name} {name}
<ArrowUpDownIcon className="size-4" /> {/* <ArrowUpDownIcon className="size-4" /> */}
</Button> </Button>
) )
} }
/**
* Convert port string to a number for sorting.
* Handles formats like "80", "127.0.0.1:80", and "80, 443" (takes the first mapping).
*/
function getPortValue(ports: string | undefined): number {
if (!ports) {
return 0
}
const first = ports.includes(",") ? ports.substring(0, ports.indexOf(",")) : ports
const colonIndex = first.lastIndexOf(":")
const portStr = colonIndex === -1 ? first : first.substring(colonIndex + 1)
return Number(portStr) || 0
}

View File

@@ -1,3 +1,4 @@
/** biome-ignore-all lint/security/noDangerouslySetInnerHtml: html comes directly from docker via agent */
import { t } from "@lingui/core/macro" import { t } from "@lingui/core/macro"
import { Trans } from "@lingui/react/macro" import { Trans } from "@lingui/react/macro"
import { import {
@@ -13,7 +14,7 @@ import {
type VisibilityState, type VisibilityState,
} from "@tanstack/react-table" } from "@tanstack/react-table"
import { useVirtualizer, type VirtualItem } from "@tanstack/react-virtual" import { useVirtualizer, type VirtualItem } from "@tanstack/react-virtual"
import { memo, RefObject, useEffect, useRef, useState } from "react" import { memo, type RefObject, useEffect, useRef, useState } from "react"
import { Input } from "@/components/ui/input" import { Input } from "@/components/ui/input"
import { TableBody, TableCell, TableHead, TableHeader, TableRow } from "@/components/ui/table" import { TableBody, TableCell, TableHead, TableHeader, TableRow } from "@/components/ui/table"
import { pb } from "@/lib/api" import { pb } from "@/lib/api"
@@ -44,6 +45,20 @@ export default function ContainersTable({ systemId }: { systemId?: string }) {
) )
const [columnFilters, setColumnFilters] = useState<ColumnFiltersState>([]) const [columnFilters, setColumnFilters] = useState<ColumnFiltersState>([])
const [columnVisibility, setColumnVisibility] = useState<VisibilityState>({}) const [columnVisibility, setColumnVisibility] = useState<VisibilityState>({})
// Hide ports column if no ports are present
useEffect(() => {
if (data) {
const hasPorts = data.some((container) => container.ports)
setColumnVisibility((prev) => {
if (prev.ports === hasPorts) {
return prev
}
return { ...prev, ports: hasPorts }
})
}
}, [data])
const [rowSelection, setRowSelection] = useState({}) const [rowSelection, setRowSelection] = useState({})
const [globalFilter, setGlobalFilter] = useState("") const [globalFilter, setGlobalFilter] = useState("")
@@ -51,7 +66,7 @@ export default function ContainersTable({ systemId }: { systemId?: string }) {
function fetchData(systemId?: string) { function fetchData(systemId?: string) {
pb.collection<ContainerRecord>("containers") pb.collection<ContainerRecord>("containers")
.getList(0, 2000, { .getList(0, 2000, {
fields: "id,name,image,cpu,memory,net,health,status,system,updated", fields: "id,name,image,ports,cpu,memory,net,health,status,system,updated",
filter: systemId ? pb.filter("system={:system}", { system: systemId }) : undefined, filter: systemId ? pb.filter("system={:system}", { system: systemId }) : undefined,
}) })
.then(({ items }) => { .then(({ items }) => {
@@ -67,7 +82,7 @@ export default function ContainersTable({ systemId }: { systemId?: string }) {
setData((curItems) => { setData((curItems) => {
const lastUpdated = Math.max(items[0].updated, items.at(-1)?.updated ?? 0) const lastUpdated = Math.max(items[0].updated, items.at(-1)?.updated ?? 0)
const containerIds = new Set() const containerIds = new Set()
const newItems = [] const newItems: ContainerRecord[] = []
for (const item of items) { for (const item of items) {
if (Math.abs(lastUpdated - item.updated) < 70_000) { if (Math.abs(lastUpdated - item.updated) < 70_000) {
containerIds.add(item.id) containerIds.add(item.id)
@@ -134,7 +149,8 @@ export default function ContainersTable({ systemId }: { systemId?: string }) {
const status = container.status ?? "" const status = container.status ?? ""
const healthLabel = ContainerHealthLabels[container.health as ContainerHealth] ?? "" const healthLabel = ContainerHealthLabels[container.health as ContainerHealth] ?? ""
const image = container.image ?? "" const image = container.image ?? ""
const searchString = `${systemName} ${id} ${name} ${healthLabel} ${status} ${image}`.toLowerCase() const ports = container.ports ?? ""
const searchString = `${systemName} ${id} ${name} ${healthLabel} ${status} ${image} ${ports}`.toLowerCase()
return (filterValue as string) return (filterValue as string)
.toLowerCase() .toLowerCase()
@@ -300,9 +316,6 @@ function ContainerSheet({
setSheetOpen: (open: boolean) => void setSheetOpen: (open: boolean) => void
activeContainer: RefObject<ContainerRecord | null> activeContainer: RefObject<ContainerRecord | null>
}) { }) {
const container = activeContainer.current
if (!container) return null
const [logsDisplay, setLogsDisplay] = useState<string>("") const [logsDisplay, setLogsDisplay] = useState<string>("")
const [infoDisplay, setInfoDisplay] = useState<string>("") const [infoDisplay, setInfoDisplay] = useState<string>("")
const [logsFullscreenOpen, setLogsFullscreenOpen] = useState<boolean>(false) const [logsFullscreenOpen, setLogsFullscreenOpen] = useState<boolean>(false)
@@ -310,6 +323,8 @@ function ContainerSheet({
const [isRefreshingLogs, setIsRefreshingLogs] = useState<boolean>(false) const [isRefreshingLogs, setIsRefreshingLogs] = useState<boolean>(false)
const logsContainerRef = useRef<HTMLDivElement>(null) const logsContainerRef = useRef<HTMLDivElement>(null)
const container = activeContainer.current
function scrollLogsToBottom() { function scrollLogsToBottom() {
if (logsContainerRef.current) { if (logsContainerRef.current) {
logsContainerRef.current.scrollTo({ top: logsContainerRef.current.scrollHeight }) logsContainerRef.current.scrollTo({ top: logsContainerRef.current.scrollHeight })
@@ -317,6 +332,7 @@ function ContainerSheet({
} }
const refreshLogs = async () => { const refreshLogs = async () => {
if (!container) return
setIsRefreshingLogs(true) setIsRefreshingLogs(true)
const startTime = Date.now() const startTime = Date.now()
@@ -348,6 +364,8 @@ function ContainerSheet({
})() })()
}, [container]) }, [container])
if (!container) return null
return ( return (
<> <>
<LogsFullscreenDialog <LogsFullscreenDialog
@@ -378,8 +396,14 @@ function ContainerSheet({
{container.image} {container.image}
<Separator orientation="vertical" className="h-2.5 bg-muted-foreground opacity-70" /> <Separator orientation="vertical" className="h-2.5 bg-muted-foreground opacity-70" />
{container.id} {container.id}
<Separator orientation="vertical" className="h-2.5 bg-muted-foreground opacity-70" /> {/* {container.ports && (
{ContainerHealthLabels[container.health as ContainerHealth]} <>
<Separator orientation="vertical" className="h-2.5 bg-muted-foreground opacity-70" />
{container.ports}
</>
)} */}
{/* <Separator orientation="vertical" className="h-2.5 bg-muted-foreground opacity-70" />
{ContainerHealthLabels[container.health as ContainerHealth]} */}
</SheetDescription> </SheetDescription>
</SheetHeader> </SheetHeader>
<div className="px-3 pb-3 -mt-4 flex flex-col gap-3 h-full items-start"> <div className="px-3 pb-3 -mt-4 flex flex-col gap-3 h-full items-start">
@@ -438,11 +462,12 @@ function ContainerSheet({
function ContainersTableHead({ table }: { table: TableType<ContainerRecord> }) { function ContainersTableHead({ table }: { table: TableType<ContainerRecord> }) {
return ( return (
<TableHeader className="sticky top-0 z-50 w-full border-b-2"> <TableHeader className="sticky top-0 z-50 w-full border-b-2">
<div className="absolute -top-2 left-0 w-full h-4 bg-table-header z-50"></div>
{table.getHeaderGroups().map((headerGroup) => ( {table.getHeaderGroups().map((headerGroup) => (
<tr key={headerGroup.id}> <tr key={headerGroup.id}>
{headerGroup.headers.map((header) => { {headerGroup.headers.map((header) => {
return ( return (
<TableHead className="px-2" key={header.id}> <TableHead className="px-2" key={header.id} style={{ width: header.getSize() }}>
{header.isPlaceholder ? null : flexRender(header.column.columnDef.header, header.getContext())} {header.isPlaceholder ? null : flexRender(header.column.columnDef.header, header.getContext())}
</TableHead> </TableHead>
) )
@@ -474,6 +499,7 @@ const ContainerTableRow = memo(function ContainerTableRow({
className="py-0 ps-4.5" className="py-0 ps-4.5"
style={{ style={{
height: virtualRow.size, height: virtualRow.size,
width: cell.column.getSize(),
}} }}
> >
{flexRender(cell.column.columnDef.cell, cell.getContext())} {flexRender(cell.column.columnDef.cell, cell.getContext())}

View File

@@ -3,13 +3,16 @@ import {
type ColumnDef, type ColumnDef,
type ColumnFiltersState, type ColumnFiltersState,
type Column, type Column,
type Row,
type SortingState, type SortingState,
type Table as TableType,
flexRender, flexRender,
getCoreRowModel, getCoreRowModel,
getFilteredRowModel, getFilteredRowModel,
getSortedRowModel, getSortedRowModel,
useReactTable, useReactTable,
} from "@tanstack/react-table" } from "@tanstack/react-table"
import { useVirtualizer, type VirtualItem } from "@tanstack/react-virtual"
import { import {
Activity, Activity,
Box, Box,
@@ -40,6 +43,7 @@ import {
toFixedFloat, toFixedFloat,
formatTemperature, formatTemperature,
cn, cn,
getVisualStringWidth,
secondsToString, secondsToString,
hourWithSeconds, hourWithSeconds,
formatShortDate, formatShortDate,
@@ -57,7 +61,7 @@ import {
DropdownMenuSeparator, DropdownMenuSeparator,
DropdownMenuTrigger, DropdownMenuTrigger,
} from "@/components/ui/dropdown-menu" } from "@/components/ui/dropdown-menu"
import { useCallback, useMemo, useEffect, useState } from "react" import { memo, useCallback, useMemo, useEffect, useRef, useState } from "react"
import { Tooltip, TooltipContent, TooltipTrigger } from "@/components/ui/tooltip" import { Tooltip, TooltipContent, TooltipTrigger } from "@/components/ui/tooltip"
// Column definition for S.M.A.R.T. attributes table // Column definition for S.M.A.R.T. attributes table
@@ -101,7 +105,11 @@ function formatCapacity(bytes: number): string {
const SMART_DEVICE_FIELDS = "id,system,name,model,state,capacity,temp,type,hours,cycles,updated" const SMART_DEVICE_FIELDS = "id,system,name,model,state,capacity,temp,type,hours,cycles,updated"
export const columns: ColumnDef<SmartDeviceRecord>[] = [ export const createColumns = (
longestName: number,
longestModel: number,
longestDevice: number
): ColumnDef<SmartDeviceRecord>[] => [
{ {
id: "system", id: "system",
accessorFn: (record) => record.system, accessorFn: (record) => record.system,
@@ -114,7 +122,11 @@ export const columns: ColumnDef<SmartDeviceRecord>[] = [
header: ({ column }) => <HeaderButton column={column} name={t`System`} Icon={ServerIcon} />, header: ({ column }) => <HeaderButton column={column} name={t`System`} Icon={ServerIcon} />,
cell: ({ getValue }) => { cell: ({ getValue }) => {
const allSystems = useStore($allSystemsById) const allSystems = useStore($allSystemsById)
return <span className="ms-1.5 xl:w-30 block truncate">{allSystems[getValue() as string]?.name ?? ""}</span> return (
<div className="ms-1.5 max-w-40 block truncate" style={{ width: `${longestName / 1.05}ch` }}>
{allSystems[getValue() as string]?.name ?? ""}
</div>
)
}, },
}, },
{ {
@@ -122,7 +134,11 @@ export const columns: ColumnDef<SmartDeviceRecord>[] = [
sortingFn: (a, b) => a.original.name.localeCompare(b.original.name), sortingFn: (a, b) => a.original.name.localeCompare(b.original.name),
header: ({ column }) => <HeaderButton column={column} name={t`Device`} Icon={HardDrive} />, header: ({ column }) => <HeaderButton column={column} name={t`Device`} Icon={HardDrive} />,
cell: ({ getValue }) => ( cell: ({ getValue }) => (
<div className="font-medium max-w-40 truncate ms-1.5" title={getValue() as string}> <div
className="font-medium max-w-40 truncate ms-1"
title={getValue() as string}
style={{ width: `${longestDevice / 1.05}ch` }}
>
{getValue() as string} {getValue() as string}
</div> </div>
), ),
@@ -132,7 +148,11 @@ export const columns: ColumnDef<SmartDeviceRecord>[] = [
sortingFn: (a, b) => a.original.model.localeCompare(b.original.model), sortingFn: (a, b) => a.original.model.localeCompare(b.original.model),
header: ({ column }) => <HeaderButton column={column} name={t`Model`} Icon={Box} />, header: ({ column }) => <HeaderButton column={column} name={t`Model`} Icon={Box} />,
cell: ({ getValue }) => ( cell: ({ getValue }) => (
<div className="max-w-48 truncate ms-1.5" title={getValue() as string}> <div
className="max-w-48 truncate ms-1"
title={getValue() as string}
style={{ width: `${longestModel / 1.05}ch` }}
>
{getValue() as string} {getValue() as string}
</div> </div>
), ),
@@ -141,7 +161,7 @@ export const columns: ColumnDef<SmartDeviceRecord>[] = [
accessorKey: "capacity", accessorKey: "capacity",
invertSorting: true, invertSorting: true,
header: ({ column }) => <HeaderButton column={column} name={t`Capacity`} Icon={BinaryIcon} />, header: ({ column }) => <HeaderButton column={column} name={t`Capacity`} Icon={BinaryIcon} />,
cell: ({ getValue }) => <span className="ms-1.5">{formatCapacity(getValue() as number)}</span>, cell: ({ getValue }) => <span className="ms-1">{formatCapacity(getValue() as number)}</span>,
}, },
{ {
accessorKey: "state", accessorKey: "state",
@@ -149,9 +169,9 @@ export const columns: ColumnDef<SmartDeviceRecord>[] = [
cell: ({ getValue }) => { cell: ({ getValue }) => {
const status = getValue() as string const status = getValue() as string
return ( return (
<div className="ms-1.5"> <Badge className="ms-1" variant={status === "PASSED" ? "success" : status === "FAILED" ? "danger" : "warning"}>
<Badge variant={status === "PASSED" ? "success" : status === "FAILED" ? "danger" : "warning"}>{status}</Badge> {status}
</div> </Badge>
) )
}, },
}, },
@@ -160,11 +180,9 @@ export const columns: ColumnDef<SmartDeviceRecord>[] = [
sortingFn: (a, b) => a.original.type.localeCompare(b.original.type), sortingFn: (a, b) => a.original.type.localeCompare(b.original.type),
header: ({ column }) => <HeaderButton column={column} name={t`Type`} Icon={ArrowLeftRightIcon} />, header: ({ column }) => <HeaderButton column={column} name={t`Type`} Icon={ArrowLeftRightIcon} />,
cell: ({ getValue }) => ( cell: ({ getValue }) => (
<div className="ms-1.5"> <Badge variant="outline" className="ms-1 uppercase">
<Badge variant="outline" className="uppercase"> {getValue() as string}
{getValue() as string} </Badge>
</Badge>
</div>
), ),
}, },
{ {
@@ -176,11 +194,11 @@ export const columns: ColumnDef<SmartDeviceRecord>[] = [
cell: ({ getValue }) => { cell: ({ getValue }) => {
const hours = getValue() as number | undefined const hours = getValue() as number | undefined
if (hours == null) { if (hours == null) {
return <div className="text-sm text-muted-foreground ms-1.5">N/A</div> return <div className="text-sm text-muted-foreground ms-1">N/A</div>
} }
const seconds = hours * 3600 const seconds = hours * 3600
return ( return (
<div className="text-sm ms-1.5"> <div className="text-sm ms-1">
<div>{secondsToString(seconds, "hour")}</div> <div>{secondsToString(seconds, "hour")}</div>
<div className="text-muted-foreground text-xs">{secondsToString(seconds, "day")}</div> <div className="text-muted-foreground text-xs">{secondsToString(seconds, "day")}</div>
</div> </div>
@@ -196,9 +214,9 @@ export const columns: ColumnDef<SmartDeviceRecord>[] = [
cell: ({ getValue }) => { cell: ({ getValue }) => {
const cycles = getValue() as number | undefined const cycles = getValue() as number | undefined
if (cycles == null) { if (cycles == null) {
return <div className="text-muted-foreground ms-1.5">N/A</div> return <div className="text-muted-foreground ms-1">N/A</div>
} }
return <span className="ms-1.5">{cycles.toLocaleString()}</span> return <span className="ms-1">{cycles.toLocaleString()}</span>
}, },
}, },
{ {
@@ -208,10 +226,10 @@ export const columns: ColumnDef<SmartDeviceRecord>[] = [
cell: ({ getValue }) => { cell: ({ getValue }) => {
const temp = getValue() as number | null | undefined const temp = getValue() as number | null | undefined
if (!temp) { if (!temp) {
return <div className="text-muted-foreground ms-1.5">N/A</div> return <div className="text-muted-foreground ms-1">N/A</div>
} }
const { value, unit } = formatTemperature(temp) const { value, unit } = formatTemperature(temp)
return <span className="ms-1.5">{`${value} ${unit}`}</span> return <span className="ms-1">{`${value} ${unit}`}</span>
}, },
}, },
// { // {
@@ -236,7 +254,7 @@ export const columns: ColumnDef<SmartDeviceRecord>[] = [
// if today, use hourWithSeconds, otherwise use formatShortDate // if today, use hourWithSeconds, otherwise use formatShortDate
const formatter = const formatter =
new Date(timestamp).toDateString() === new Date().toDateString() ? hourWithSeconds : formatShortDate new Date(timestamp).toDateString() === new Date().toDateString() ? hourWithSeconds : formatShortDate
return <span className="ms-1.5 tabular-nums">{formatter(timestamp)}</span> return <span className="ms-1 tabular-nums">{formatter(timestamp)}</span>
}, },
}, },
] ]
@@ -275,6 +293,36 @@ export default function DisksTable({ systemId }: { systemId?: string }) {
const [sheetOpen, setSheetOpen] = useState(false) const [sheetOpen, setSheetOpen] = useState(false)
const [rowActionState, setRowActionState] = useState<{ type: "refresh" | "delete"; id: string } | null>(null) const [rowActionState, setRowActionState] = useState<{ type: "refresh" | "delete"; id: string } | null>(null)
const [globalFilter, setGlobalFilter] = useState("") const [globalFilter, setGlobalFilter] = useState("")
const allSystems = useStore($allSystemsById)
// duplicate the devices to test with more rows
// if (
// smartDevices?.length &&
// smartDevices.length < 50 &&
// typeof window !== "undefined" &&
// window.location.hostname === "localhost"
// ) {
// setSmartDevices([...smartDevices, ...smartDevices, ...smartDevices])
// }
// Calculate the right width for the columns based on the longest strings among the displayed devices
const { longestName, longestModel, longestDevice } = useMemo(() => {
const result = { longestName: 0, longestModel: 0, longestDevice: 0 }
if (!smartDevices || Object.keys(allSystems).length === 0) {
return result
}
const seenSystems = new Set<string>()
for (const device of smartDevices) {
if (!systemId && !seenSystems.has(device.system)) {
seenSystems.add(device.system)
const name = allSystems[device.system]?.name ?? ""
result.longestName = Math.max(result.longestName, getVisualStringWidth(name))
}
result.longestModel = Math.max(result.longestModel, getVisualStringWidth(device.model ?? ""))
result.longestDevice = Math.max(result.longestDevice, getVisualStringWidth(device.name ?? ""))
}
return result
}, [smartDevices, systemId, allSystems])
const openSheet = (disk: SmartDeviceRecord) => { const openSheet = (disk: SmartDeviceRecord) => {
setActiveDiskId(disk.id) setActiveDiskId(disk.id)
@@ -440,9 +488,10 @@ export default function DisksTable({ systemId }: { systemId?: string }) {
// Filter columns based on whether systemId is provided // Filter columns based on whether systemId is provided
const tableColumns = useMemo(() => { const tableColumns = useMemo(() => {
const columns = createColumns(longestName, longestModel, longestDevice)
const baseColumns = systemId ? columns.filter((col) => col.id !== "system") : columns const baseColumns = systemId ? columns.filter((col) => col.id !== "system") : columns
return [...baseColumns, actionColumn] return [...baseColumns, actionColumn]
}, [systemId, actionColumn]) }, [systemId, actionColumn, longestName, longestModel, longestDevice])
const table = useReactTable({ const table = useReactTable({
data: smartDevices || ([] as SmartDeviceRecord[]), data: smartDevices || ([] as SmartDeviceRecord[]),
@@ -474,6 +523,7 @@ export default function DisksTable({ systemId }: { systemId?: string }) {
.every((term) => searchString.includes(term)) .every((term) => searchString.includes(term))
}, },
}) })
const rows = table.getRowModel().rows
// Hide the table on system pages if there's no data, but always show on global page // Hide the table on system pages if there's no data, but always show on global page
if (systemId && !smartDevices?.length && !columnFilters.length) { if (systemId && !smartDevices?.length && !columnFilters.length) {
@@ -513,57 +563,123 @@ export default function DisksTable({ systemId }: { systemId?: string }) {
</div> </div>
</div> </div>
</CardHeader> </CardHeader>
<div className="rounded-md border text-nowrap"> <SmartDevicesTable
<Table> table={table}
<TableHeader> rows={rows}
{table.getHeaderGroups().map((headerGroup) => ( colLength={tableColumns.length}
<TableRow key={headerGroup.id}> data={smartDevices}
{headerGroup.headers.map((header) => { openSheet={openSheet}
return ( />
<TableHead key={header.id} className="px-2">
{header.isPlaceholder ? null : flexRender(header.column.columnDef.header, header.getContext())}
</TableHead>
)
})}
</TableRow>
))}
</TableHeader>
<TableBody>
{table.getRowModel().rows?.length ? (
table.getRowModel().rows.map((row) => (
<TableRow
key={row.id}
data-state={row.getIsSelected() && "selected"}
className="cursor-pointer"
onClick={() => openSheet(row.original)}
>
{row.getVisibleCells().map((cell) => (
<TableCell key={cell.id} className="md:ps-5">
{flexRender(cell.column.columnDef.cell, cell.getContext())}
</TableCell>
))}
</TableRow>
))
) : (
<TableRow>
<TableCell colSpan={tableColumns.length} className="h-24 text-center">
{smartDevices ? (
t`No results.`
) : (
<LoaderCircleIcon className="animate-spin size-10 opacity-60 mx-auto" />
)}
</TableCell>
</TableRow>
)}
</TableBody>
</Table>
</div>
</Card> </Card>
<DiskSheet diskId={activeDiskId} open={sheetOpen} onOpenChange={setSheetOpen} /> <DiskSheet diskId={activeDiskId} open={sheetOpen} onOpenChange={setSheetOpen} />
</div> </div>
) )
} }
const SmartDevicesTable = memo(function SmartDevicesTable({
table,
rows,
colLength,
data,
openSheet,
}: {
table: TableType<SmartDeviceRecord>
rows: Row<SmartDeviceRecord>[]
colLength: number
data: SmartDeviceRecord[] | undefined
openSheet: (disk: SmartDeviceRecord) => void
}) {
const scrollRef = useRef<HTMLDivElement>(null)
const virtualizer = useVirtualizer<HTMLDivElement, HTMLTableRowElement>({
count: rows.length,
estimateSize: () => 65,
getScrollElement: () => scrollRef.current,
overscan: 5,
})
const virtualRows = virtualizer.getVirtualItems()
const paddingTop = Math.max(0, virtualRows[0]?.start ?? 0 - virtualizer.options.scrollMargin)
const paddingBottom = Math.max(0, virtualizer.getTotalSize() - (virtualRows[virtualRows.length - 1]?.end ?? 0))
return (
<div
className={cn(
"h-min max-h-[calc(100dvh-17rem)] max-w-full relative overflow-auto rounded-md border",
(!rows.length || rows.length > 2) && "min-h-50"
)}
ref={scrollRef}
>
<div style={{ height: `${virtualizer.getTotalSize() + 48}px`, paddingTop, paddingBottom }}>
<table className="w-full text-sm text-nowrap">
<SmartTableHead table={table} />
<TableBody>
{rows.length ? (
virtualRows.map((virtualRow) => {
const row = rows[virtualRow.index]
return <SmartDeviceTableRow key={row.id} row={row} virtualRow={virtualRow} openSheet={openSheet} />
})
) : (
<TableRow>
<TableCell colSpan={colLength} className="h-24 text-center pointer-events-none">
{data ? t`No results.` : <LoaderCircleIcon className="animate-spin size-10 opacity-60 mx-auto" />}
</TableCell>
</TableRow>
)}
</TableBody>
</table>
</div>
</div>
)
})
function SmartTableHead({ table }: { table: TableType<SmartDeviceRecord> }) {
return (
<TableHeader className="sticky top-0 z-50 w-full border-b-2">
<div className="absolute -top-2 left-0 w-full h-4 bg-table-header z-50"></div>
{table.getHeaderGroups().map((headerGroup) => (
<TableRow key={headerGroup.id}>
{headerGroup.headers.map((header) => (
<TableHead key={header.id} className="px-2">
{header.isPlaceholder ? null : flexRender(header.column.columnDef.header, header.getContext())}
</TableHead>
))}
</TableRow>
))}
</TableHeader>
)
}
const SmartDeviceTableRow = memo(function SmartDeviceTableRow({
row,
virtualRow,
openSheet,
}: {
row: Row<SmartDeviceRecord>
virtualRow: VirtualItem
openSheet: (disk: SmartDeviceRecord) => void
}) {
return (
<TableRow
data-state={row.getIsSelected() && "selected"}
className="cursor-pointer"
onClick={() => openSheet(row.original)}
>
{row.getVisibleCells().map((cell) => (
<TableCell
key={cell.id}
className="md:ps-5 py-0"
style={{
height: virtualRow.size,
}}
>
{flexRender(cell.column.columnDef.cell, cell.getContext())}
</TableCell>
))}
</TableRow>
)
})
function DiskSheet({ function DiskSheet({
diskId, diskId,
open, open,

View File

@@ -46,7 +46,6 @@ export default function SystemdTable({ systemId }: { systemId?: string }) {
return setData([]) return setData([])
}, [systemId]) }, [systemId])
useEffect(() => { useEffect(() => {
const lastUpdated = data[0]?.updated ?? 0 const lastUpdated = data[0]?.updated ?? 0
@@ -360,15 +359,9 @@ function SystemdSheet({
return ( return (
<> <>
{hasCurrent ? current : notAvailable} {hasCurrent ? current : notAvailable}
{hasMax && ( {hasMax && <span className="text-muted-foreground ms-1.5">{`(${t`limit`}: ${max})`}</span>}
<span className="text-muted-foreground ms-1.5">
{`(${t`limit`}: ${max})`}
</span>
)}
{max === null && ( {max === null && (
<span className="text-muted-foreground ms-1.5"> <span className="text-muted-foreground ms-1.5">{`(${t`limit`}: ${t`Unlimited`.toLowerCase()})`}</span>
{`(${t`limit`}: ${t`Unlimited`.toLowerCase()})`}
</span>
)} )}
</> </>
) )
@@ -435,7 +428,7 @@ function SystemdSheet({
</tr> </tr>
) )
} }
const capitalize = (str: string) => `${str.charAt(0).toUpperCase()}${str.slice(1).toLowerCase()}` const capitalize = (str: string) => `${str.charAt(0).toUpperCase()}${str.slice(1).toLowerCase()}`
return ( return (
@@ -621,6 +614,7 @@ function SystemdSheet({
function SystemdTableHead({ table }: { table: TableType<SystemdRecord> }) { function SystemdTableHead({ table }: { table: TableType<SystemdRecord> }) {
return ( return (
<TableHeader className="sticky top-0 z-50 w-full border-b-2"> <TableHeader className="sticky top-0 z-50 w-full border-b-2">
<div className="absolute -top-2 left-0 w-full h-4 bg-table-header z-50"></div>
{table.getHeaderGroups().map((headerGroup) => ( {table.getHeaderGroups().map((headerGroup) => (
<tr key={headerGroup.id}> <tr key={headerGroup.id}>
{headerGroup.headers.map((header) => { {headerGroup.headers.map((header) => {

View File

@@ -391,6 +391,7 @@ function SystemsTableHead({ table }: { table: TableType<SystemRecord> }) {
const { t } = useLingui() const { t } = useLingui()
return ( return (
<TableHeader className="sticky top-0 z-50 w-full border-b-2"> <TableHeader className="sticky top-0 z-50 w-full border-b-2">
<div className="absolute -top-2 left-0 w-full h-4 bg-table-header z-50"></div>
{table.getHeaderGroups().map((headerGroup) => ( {table.getHeaderGroups().map((headerGroup) => (
<tr key={headerGroup.id}> <tr key={headerGroup.id}>
{headerGroup.headers.map((header) => { {headerGroup.headers.map((header) => {

View File

@@ -185,3 +185,14 @@ export function PlugChargingIcon(props: SVGProps<SVGSVGElement>) {
</svg> </svg>
) )
} }
// Lucide Icons (ISC) - used for ports
export function SquareArrowRightEnterIcon(props: SVGProps<SVGSVGElement>) {
return (
<svg viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth="2" strokeLinecap="round" {...props}>
<path d="m10 16 4-4-4-4" />
<path d="M3 12h11" />
<path d="M3 8V5a2 2 0 0 1 2-2h14a2 2 0 0 1 2 2v14a2 2 0 0 1-2 2H5a2 2 0 0 1-2-2v-3" />
</svg>
)
}

View File

@@ -254,6 +254,7 @@ export interface ContainerRecord extends RecordModel {
system: string system: string
name: string name: string
image: string image: string
ports: string
cpu: number cpu: number
memory: number memory: number
net: number net: number

View File

@@ -98,7 +98,7 @@ func ClearCollection(t testing.TB, app core.App, collectionName string) error {
} }
func (h *TestHub) Cleanup() { func (h *TestHub) Cleanup() {
h.GetAlertManager().StopWorker() h.GetAlertManager().Stop()
h.GetSystemManager().RemoveAllSystems() h.GetSystemManager().RemoveAllSystems()
h.TestApp.Cleanup() h.TestApp.Cleanup()
} }