mirror of
https://github.com/henrygd/beszel.git
synced 2026-03-21 21:26:16 +01:00
fix: Use historical records to average disk usage for extra disk alerts (#1801)
- Introduced a new test file `alerts_disk_test.go` to validate the behavior of disk alerts using historical data for extra filesystems. - Enhanced the `HandleSystemAlerts` function to correctly calculate disk usage for extra filesystems based on historical records. - Updated the `SystemAlertStats` struct to include `ExtraFs` for tracking additional filesystem statistics.
This commit is contained in:
@@ -40,6 +40,11 @@ type UserNotificationSettings struct {
|
|||||||
Webhooks []string `json:"webhooks"`
|
Webhooks []string `json:"webhooks"`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type SystemAlertFsStats struct {
|
||||||
|
DiskTotal float64 `json:"d"`
|
||||||
|
DiskUsed float64 `json:"du"`
|
||||||
|
}
|
||||||
|
|
||||||
type SystemAlertStats struct {
|
type SystemAlertStats struct {
|
||||||
Cpu float64 `json:"cpu"`
|
Cpu float64 `json:"cpu"`
|
||||||
Mem float64 `json:"mp"`
|
Mem float64 `json:"mp"`
|
||||||
@@ -50,6 +55,7 @@ type SystemAlertStats struct {
|
|||||||
Temperatures map[string]float32 `json:"t"`
|
Temperatures map[string]float32 `json:"t"`
|
||||||
LoadAvg [3]float64 `json:"la"`
|
LoadAvg [3]float64 `json:"la"`
|
||||||
Battery [2]uint8 `json:"bat"`
|
Battery [2]uint8 `json:"bat"`
|
||||||
|
ExtraFs map[string]SystemAlertFsStats `json:"efs"`
|
||||||
}
|
}
|
||||||
|
|
||||||
type SystemAlertGPUData struct {
|
type SystemAlertGPUData struct {
|
||||||
|
|||||||
155
internal/alerts/alerts_disk_test.go
Normal file
155
internal/alerts/alerts_disk_test.go
Normal file
@@ -0,0 +1,155 @@
|
|||||||
|
//go:build testing
|
||||||
|
|
||||||
|
package alerts_test
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/json"
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/henrygd/beszel/internal/entities/system"
|
||||||
|
beszelTests "github.com/henrygd/beszel/internal/tests"
|
||||||
|
|
||||||
|
"github.com/pocketbase/dbx"
|
||||||
|
"github.com/pocketbase/pocketbase/tools/types"
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
|
"github.com/stretchr/testify/require"
|
||||||
|
)
|
||||||
|
|
||||||
|
// TestDiskAlertExtraFsMultiMinute tests that multi-minute disk alerts correctly use
|
||||||
|
// historical per-minute values for extra (non-root) filesystems, not the current live snapshot.
|
||||||
|
func TestDiskAlertExtraFsMultiMinute(t *testing.T) {
|
||||||
|
hub, user := beszelTests.GetHubWithUser(t)
|
||||||
|
defer hub.Cleanup()
|
||||||
|
|
||||||
|
systems, err := beszelTests.CreateSystems(hub, 1, user.Id, "up")
|
||||||
|
require.NoError(t, err)
|
||||||
|
systemRecord := systems[0]
|
||||||
|
|
||||||
|
// Disk alert: threshold 80%, min=2 (requires historical averaging)
|
||||||
|
diskAlert, err := beszelTests.CreateRecord(hub, "alerts", map[string]any{
|
||||||
|
"name": "Disk",
|
||||||
|
"system": systemRecord.Id,
|
||||||
|
"user": user.Id,
|
||||||
|
"value": 80, // threshold: 80%
|
||||||
|
"min": 2, // 2 minutes - requires historical averaging
|
||||||
|
})
|
||||||
|
require.NoError(t, err)
|
||||||
|
assert.False(t, diskAlert.GetBool("triggered"), "Alert should not be triggered initially")
|
||||||
|
|
||||||
|
am := hub.GetAlertManager()
|
||||||
|
now := time.Now().UTC()
|
||||||
|
|
||||||
|
extraFsHigh := map[string]*system.FsStats{
|
||||||
|
"/mnt/data": {DiskTotal: 1000, DiskUsed: 920}, // 92% - above threshold
|
||||||
|
}
|
||||||
|
|
||||||
|
// Insert 4 historical records spread over 3 minutes (same pattern as battery tests).
|
||||||
|
// The oldest record must predate (now - 2min) so the alert time window is valid.
|
||||||
|
recordTimes := []time.Duration{
|
||||||
|
-180 * time.Second, // 3 min ago - anchors oldest record before alert.time
|
||||||
|
-90 * time.Second,
|
||||||
|
-60 * time.Second,
|
||||||
|
-30 * time.Second,
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, offset := range recordTimes {
|
||||||
|
stats := system.Stats{
|
||||||
|
DiskPct: 30, // root disk at 30% - below threshold
|
||||||
|
ExtraFs: extraFsHigh,
|
||||||
|
}
|
||||||
|
statsJSON, _ := json.Marshal(stats)
|
||||||
|
|
||||||
|
recordTime := now.Add(offset)
|
||||||
|
record, err := beszelTests.CreateRecord(hub, "system_stats", map[string]any{
|
||||||
|
"system": systemRecord.Id,
|
||||||
|
"type": "1m",
|
||||||
|
"stats": string(statsJSON),
|
||||||
|
})
|
||||||
|
require.NoError(t, err)
|
||||||
|
record.SetRaw("created", recordTime.Format(types.DefaultDateLayout))
|
||||||
|
err = hub.SaveNoValidate(record)
|
||||||
|
require.NoError(t, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
combinedDataHigh := &system.CombinedData{
|
||||||
|
Stats: system.Stats{
|
||||||
|
DiskPct: 30,
|
||||||
|
ExtraFs: extraFsHigh,
|
||||||
|
},
|
||||||
|
Info: system.Info{
|
||||||
|
DiskPct: 30,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
systemRecord.Set("updated", now)
|
||||||
|
err = hub.SaveNoValidate(systemRecord)
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
err = am.HandleSystemAlerts(systemRecord, combinedDataHigh)
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
time.Sleep(20 * time.Millisecond)
|
||||||
|
|
||||||
|
diskAlert, err = hub.FindFirstRecordByFilter("alerts", "id={:id}", dbx.Params{"id": diskAlert.Id})
|
||||||
|
require.NoError(t, err)
|
||||||
|
assert.True(t, diskAlert.GetBool("triggered"),
|
||||||
|
"Alert SHOULD be triggered when extra disk average (92%%) exceeds threshold (80%%)")
|
||||||
|
|
||||||
|
// --- Resolution: extra disk drops to 50%, alert should resolve ---
|
||||||
|
|
||||||
|
extraFsLow := map[string]*system.FsStats{
|
||||||
|
"/mnt/data": {DiskTotal: 1000, DiskUsed: 500}, // 50% - below threshold
|
||||||
|
}
|
||||||
|
|
||||||
|
newNow := now.Add(2 * time.Minute)
|
||||||
|
recordTimesLow := []time.Duration{
|
||||||
|
-180 * time.Second,
|
||||||
|
-90 * time.Second,
|
||||||
|
-60 * time.Second,
|
||||||
|
-30 * time.Second,
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, offset := range recordTimesLow {
|
||||||
|
stats := system.Stats{
|
||||||
|
DiskPct: 30,
|
||||||
|
ExtraFs: extraFsLow,
|
||||||
|
}
|
||||||
|
statsJSON, _ := json.Marshal(stats)
|
||||||
|
|
||||||
|
recordTime := newNow.Add(offset)
|
||||||
|
record, err := beszelTests.CreateRecord(hub, "system_stats", map[string]any{
|
||||||
|
"system": systemRecord.Id,
|
||||||
|
"type": "1m",
|
||||||
|
"stats": string(statsJSON),
|
||||||
|
})
|
||||||
|
require.NoError(t, err)
|
||||||
|
record.SetRaw("created", recordTime.Format(types.DefaultDateLayout))
|
||||||
|
err = hub.SaveNoValidate(record)
|
||||||
|
require.NoError(t, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
combinedDataLow := &system.CombinedData{
|
||||||
|
Stats: system.Stats{
|
||||||
|
DiskPct: 30,
|
||||||
|
ExtraFs: extraFsLow,
|
||||||
|
},
|
||||||
|
Info: system.Info{
|
||||||
|
DiskPct: 30,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
systemRecord.Set("updated", newNow)
|
||||||
|
err = hub.SaveNoValidate(systemRecord)
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
err = am.HandleSystemAlerts(systemRecord, combinedDataLow)
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
time.Sleep(20 * time.Millisecond)
|
||||||
|
|
||||||
|
diskAlert, err = hub.FindFirstRecordByFilter("alerts", "id={:id}", dbx.Params{"id": diskAlert.Id})
|
||||||
|
require.NoError(t, err)
|
||||||
|
assert.False(t, diskAlert.GetBool("triggered"),
|
||||||
|
"Alert should be resolved when extra disk average (50%%) drops below threshold (80%%)")
|
||||||
|
}
|
||||||
@@ -195,20 +195,22 @@ func (am *AlertManager) HandleSystemAlerts(systemRecord *core.Record, data *syst
|
|||||||
alert.val += stats.NetSent + stats.NetRecv
|
alert.val += stats.NetSent + stats.NetRecv
|
||||||
case "Disk":
|
case "Disk":
|
||||||
if alert.mapSums == nil {
|
if alert.mapSums == nil {
|
||||||
alert.mapSums = make(map[string]float32, len(data.Stats.ExtraFs)+1)
|
alert.mapSums = make(map[string]float32, len(stats.ExtraFs)+1)
|
||||||
}
|
}
|
||||||
// add root disk
|
// add root disk
|
||||||
if _, ok := alert.mapSums["root"]; !ok {
|
if _, ok := alert.mapSums["root"]; !ok {
|
||||||
alert.mapSums["root"] = 0.0
|
alert.mapSums["root"] = 0.0
|
||||||
}
|
}
|
||||||
alert.mapSums["root"] += float32(stats.Disk)
|
alert.mapSums["root"] += float32(stats.Disk)
|
||||||
// add extra disks
|
// add extra disks from historical record
|
||||||
for key, fs := range data.Stats.ExtraFs {
|
for key, fs := range stats.ExtraFs {
|
||||||
|
if fs.DiskTotal > 0 {
|
||||||
if _, ok := alert.mapSums[key]; !ok {
|
if _, ok := alert.mapSums[key]; !ok {
|
||||||
alert.mapSums[key] = 0.0
|
alert.mapSums[key] = 0.0
|
||||||
}
|
}
|
||||||
alert.mapSums[key] += float32(fs.DiskUsed / fs.DiskTotal * 100)
|
alert.mapSums[key] += float32(fs.DiskUsed / fs.DiskTotal * 100)
|
||||||
}
|
}
|
||||||
|
}
|
||||||
case "Temperature":
|
case "Temperature":
|
||||||
if alert.mapSums == nil {
|
if alert.mapSums == nil {
|
||||||
alert.mapSums = make(map[string]float32, len(stats.Temperatures))
|
alert.mapSums = make(map[string]float32, len(stats.Temperatures))
|
||||||
|
|||||||
Reference in New Issue
Block a user