add check for status alerts which are not properly resolved (#1052)

This commit is contained in:
henrygd
2025-09-17 13:31:49 -04:00
parent 1b9e781d45
commit 59057a2ba4
8 changed files with 187 additions and 40 deletions

View File

@@ -25,7 +25,12 @@ type alertInfo struct {
// startWorker is a long-running goroutine that processes alert tasks
// every x seconds. It must be running to process status alerts.
func (am *AlertManager) startWorker() {
tick := time.Tick(15 * time.Second)
processPendingAlerts := time.Tick(15 * time.Second)
// check for status alerts that are not resolved when system comes up
// (can be removed if we figure out core bug in #1052)
checkStatusAlerts := time.Tick(561 * time.Second)
for {
select {
case <-am.stopChan:
@@ -41,7 +46,9 @@ func (am *AlertManager) startWorker() {
case "cancel":
am.pendingAlerts.Delete(task.alertRecord.Id)
}
case <-tick:
case <-checkStatusAlerts:
resolveStatusAlerts(am.hub)
case <-processPendingAlerts:
// Check for expired alerts every tick
now := time.Now()
for key, value := range am.pendingAlerts.Range {
@@ -170,3 +177,35 @@ func (am *AlertManager) sendStatusAlert(alertStatus string, systemName string, a
LinkText: "View " + systemName,
})
}
// resolveStatusAlerts resolves any status alerts that weren't resolved
// when system came up (https://github.com/henrygd/beszel/issues/1052)
func resolveStatusAlerts(app core.App) error {
db := app.DB()
// Find all active status alerts where the system is actually up
var alertIds []string
err := db.NewQuery(`
SELECT a.id
FROM alerts a
JOIN systems s ON a.system = s.id
WHERE a.name = 'Status'
AND a.triggered = true
AND s.status = 'up'
`).Column(&alertIds)
if err != nil {
return err
}
// resolve all matching alert records
for _, alertId := range alertIds {
alert, err := app.FindRecordById("alerts", alertId)
if err != nil {
return err
}
alert.Set("triggered", false)
err = app.Save(alert)
if err != nil {
return err
}
}
return nil
}

View File

@@ -13,6 +13,7 @@ import (
"testing/synctest"
"time"
"github.com/henrygd/beszel/internal/alerts"
beszelTests "github.com/henrygd/beszel/internal/tests"
"github.com/pocketbase/dbx"
@@ -369,33 +370,9 @@ func TestUserAlertsApi(t *testing.T) {
}
}
func getHubWithUser(t *testing.T) (*beszelTests.TestHub, *core.Record) {
hub, err := beszelTests.NewTestHub(t.TempDir())
assert.NoError(t, err)
hub.StartHub()
// Manually initialize the system manager to bind event hooks
err = hub.GetSystemManager().Initialize()
assert.NoError(t, err)
// Create a test user
user, err := beszelTests.CreateUser(hub, "test@example.com", "password")
assert.NoError(t, err)
// Create user settings for the test user (required for alert notifications)
userSettingsData := map[string]any{
"user": user.Id,
"settings": `{"emails":[test@example.com],"webhooks":[]}`,
}
_, err = beszelTests.CreateRecord(hub, "user_settings", userSettingsData)
assert.NoError(t, err)
return hub, user
}
func TestStatusAlerts(t *testing.T) {
synctest.Test(t, func(t *testing.T) {
hub, user := getHubWithUser(t)
hub, user := beszelTests.GetHubWithUser(t)
defer hub.Cleanup()
systems, err := beszelTests.CreateSystems(hub, 4, user.Id, "paused")
@@ -476,7 +453,7 @@ func TestStatusAlerts(t *testing.T) {
func TestAlertsHistory(t *testing.T) {
synctest.Test(t, func(t *testing.T) {
hub, user := getHubWithUser(t)
hub, user := beszelTests.GetHubWithUser(t)
defer hub.Cleanup()
// Create systems and alerts
@@ -602,3 +579,102 @@ func TestAlertsHistory(t *testing.T) {
assert.EqualValues(t, 2, totalHistoryCount, "Should have 2 total alert history records")
})
}
func TestResolveStatusAlerts(t *testing.T) {
hub, user := beszelTests.GetHubWithUser(t)
defer hub.Cleanup()
// Create a systemUp
systemUp, err := beszelTests.CreateRecord(hub, "systems", map[string]any{
"name": "test-system",
"users": []string{user.Id},
"host": "127.0.0.1",
"status": "up",
})
assert.NoError(t, err)
systemDown, err := beszelTests.CreateRecord(hub, "systems", map[string]any{
"name": "test-system-2",
"users": []string{user.Id},
"host": "127.0.0.2",
"status": "up",
})
assert.NoError(t, err)
// Create a status alertUp for the system
alertUp, err := beszelTests.CreateRecord(hub, "alerts", map[string]any{
"name": "Status",
"system": systemUp.Id,
"user": user.Id,
"min": 1,
})
assert.NoError(t, err)
alertDown, err := beszelTests.CreateRecord(hub, "alerts", map[string]any{
"name": "Status",
"system": systemDown.Id,
"user": user.Id,
"min": 1,
})
assert.NoError(t, err)
// Verify alert is not triggered initially
assert.False(t, alertUp.GetBool("triggered"), "Alert should not be triggered initially")
// Set the system to 'up' (this should not trigger the alert)
systemUp.Set("status", "up")
err = hub.SaveNoValidate(systemUp)
assert.NoError(t, err)
systemDown.Set("status", "down")
err = hub.SaveNoValidate(systemDown)
assert.NoError(t, err)
// Wait a moment for any processing
time.Sleep(10 * time.Millisecond)
// Verify alertUp is still not triggered after setting system to up
alertUp, err = hub.FindFirstRecordByFilter("alerts", "id={:id}", dbx.Params{"id": alertUp.Id})
assert.NoError(t, err)
assert.False(t, alertUp.GetBool("triggered"), "Alert should not be triggered when system is up")
// Manually set both alerts triggered to true
alertUp.Set("triggered", true)
err = hub.SaveNoValidate(alertUp)
assert.NoError(t, err)
alertDown.Set("triggered", true)
err = hub.SaveNoValidate(alertDown)
assert.NoError(t, err)
// Verify we have exactly one alert with triggered true
triggeredCount, err := hub.CountRecords("alerts", dbx.HashExp{"triggered": true})
assert.NoError(t, err)
assert.EqualValues(t, 2, triggeredCount, "Should have exactly two alerts with triggered true")
// Verify the specific alertUp is triggered
alertUp, err = hub.FindFirstRecordByFilter("alerts", "id={:id}", dbx.Params{"id": alertUp.Id})
assert.NoError(t, err)
assert.True(t, alertUp.GetBool("triggered"), "Alert should be triggered")
// Verify we have two unresolved alert history records
alertHistoryCount, err := hub.CountRecords("alerts_history", dbx.HashExp{"resolved": ""})
assert.NoError(t, err)
assert.EqualValues(t, 2, alertHistoryCount, "Should have exactly two unresolved alert history records")
err = alerts.ResolveStatusAlerts(hub)
assert.NoError(t, err)
// Verify alertUp is not triggered after resolving
alertUp, err = hub.FindFirstRecordByFilter("alerts", "id={:id}", dbx.Params{"id": alertUp.Id})
assert.NoError(t, err)
assert.False(t, alertUp.GetBool("triggered"), "Alert should not be triggered after resolving")
// Verify alertDown is still triggered
alertDown, err = hub.FindFirstRecordByFilter("alerts", "id={:id}", dbx.Params{"id": alertDown.Id})
assert.NoError(t, err)
assert.True(t, alertDown.GetBool("triggered"), "Alert should still be triggered after resolving")
// Verify we have one unresolved alert history record
alertHistoryCount, err = hub.CountRecords("alerts_history", dbx.HashExp{"resolved": ""})
assert.NoError(t, err)
assert.EqualValues(t, 1, alertHistoryCount, "Should have exactly one unresolved alert history record")
}

View File

@@ -1,3 +1,6 @@
//go:build testing
// +build testing
package alerts
import (
@@ -53,3 +56,7 @@ func (am *AlertManager) ForceExpirePendingAlerts() {
return true
})
}
func ResolveStatusAlerts(app core.App) error {
return resolveStatusAlerts(app)
}

View File

@@ -175,7 +175,7 @@ func TestDeleteOldSystemStats(t *testing.T) {
}
// Run deletion
err = records.TestDeleteOldSystemStats(hub)
err = records.DeleteOldSystemStats(hub)
require.NoError(t, err)
// Verify results
@@ -268,7 +268,7 @@ func TestDeleteOldAlertsHistory(t *testing.T) {
assert.Equal(t, int64(tc.alertCount), countBefore, "Initial count should match")
// Run deletion
err = records.TestDeleteOldAlertsHistory(hub, tc.countToKeep, tc.countBeforeDeletion)
err = records.DeleteOldAlertsHistory(hub, tc.countToKeep, tc.countBeforeDeletion)
require.NoError(t, err)
// Count after deletion
@@ -332,7 +332,7 @@ func TestDeleteOldAlertsHistoryEdgeCases(t *testing.T) {
}
// Should not error and should not delete anything
err = records.TestDeleteOldAlertsHistory(hub, 10, 20)
err = records.DeleteOldAlertsHistory(hub, 10, 20)
require.NoError(t, err)
count, err := hub.CountRecords("alerts_history")
@@ -346,7 +346,7 @@ func TestDeleteOldAlertsHistoryEdgeCases(t *testing.T) {
require.NoError(t, err)
// Should not error with empty table
err = records.TestDeleteOldAlertsHistory(hub, 10, 20)
err = records.DeleteOldAlertsHistory(hub, 10, 20)
require.NoError(t, err)
})
}
@@ -376,7 +376,7 @@ func TestTwoDecimals(t *testing.T) {
}
for _, tc := range testCases {
result := records.TestTwoDecimals(tc.input)
result := records.TwoDecimals(tc.input)
assert.InDelta(t, tc.expected, result, 0.02, "twoDecimals(%f) should equal %f", tc.input, tc.expected)
}
}

View File

@@ -7,17 +7,17 @@ import (
"github.com/pocketbase/pocketbase/core"
)
// TestDeleteOldSystemStats exposes deleteOldSystemStats for testing
func TestDeleteOldSystemStats(app core.App) error {
// DeleteOldSystemStats exposes deleteOldSystemStats for testing
func DeleteOldSystemStats(app core.App) error {
return deleteOldSystemStats(app)
}
// TestDeleteOldAlertsHistory exposes deleteOldAlertsHistory for testing
func TestDeleteOldAlertsHistory(app core.App, countToKeep, countBeforeDeletion int) error {
// DeleteOldAlertsHistory exposes deleteOldAlertsHistory for testing
func DeleteOldAlertsHistory(app core.App, countToKeep, countBeforeDeletion int) error {
return deleteOldAlertsHistory(app, countToKeep, countBeforeDeletion)
}
// TestTwoDecimals exposes twoDecimals for testing
func TestTwoDecimals(value float64) float64 {
// TwoDecimals exposes twoDecimals for testing
func TwoDecimals(value float64) float64 {
return twoDecimals(value)
}

View File

@@ -125,3 +125,28 @@ func CreateSystems(app core.App, count int, userId string, status string) ([]*co
}
return systems, nil
}
// GetHubWithUser creates a test hub with a test user and user settings
func GetHubWithUser(t *testing.T) (*TestHub, *core.Record) {
hub, err := NewTestHub(t.TempDir())
assert.NoError(t, err)
hub.StartHub()
// Manually initialize the system manager to bind event hooks
err = hub.GetSystemManager().Initialize()
assert.NoError(t, err)
// Create a test user
user, err := CreateUser(hub, "test@example.com", "password")
assert.NoError(t, err)
// Create user settings for the test user (required for alert notifications)
userSettingsData := map[string]any{
"user": user.Id,
"settings": `{"emails":[test@example.com],"webhooks":[]}`,
}
_, err = CreateRecord(hub, "user_settings", userSettingsData)
assert.NoError(t, err)
return hub, user
}