mirror of
https://github.com/henrygd/beszel.git
synced 2025-12-17 02:36:17 +01:00
add check for status alerts which are not properly resolved (#1052)
This commit is contained in:
@@ -25,7 +25,12 @@ type alertInfo struct {
|
||||
// startWorker is a long-running goroutine that processes alert tasks
|
||||
// every x seconds. It must be running to process status alerts.
|
||||
func (am *AlertManager) startWorker() {
|
||||
tick := time.Tick(15 * time.Second)
|
||||
processPendingAlerts := time.Tick(15 * time.Second)
|
||||
|
||||
// check for status alerts that are not resolved when system comes up
|
||||
// (can be removed if we figure out core bug in #1052)
|
||||
checkStatusAlerts := time.Tick(561 * time.Second)
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-am.stopChan:
|
||||
@@ -41,7 +46,9 @@ func (am *AlertManager) startWorker() {
|
||||
case "cancel":
|
||||
am.pendingAlerts.Delete(task.alertRecord.Id)
|
||||
}
|
||||
case <-tick:
|
||||
case <-checkStatusAlerts:
|
||||
resolveStatusAlerts(am.hub)
|
||||
case <-processPendingAlerts:
|
||||
// Check for expired alerts every tick
|
||||
now := time.Now()
|
||||
for key, value := range am.pendingAlerts.Range {
|
||||
@@ -170,3 +177,35 @@ func (am *AlertManager) sendStatusAlert(alertStatus string, systemName string, a
|
||||
LinkText: "View " + systemName,
|
||||
})
|
||||
}
|
||||
|
||||
// resolveStatusAlerts resolves any status alerts that weren't resolved
|
||||
// when system came up (https://github.com/henrygd/beszel/issues/1052)
|
||||
func resolveStatusAlerts(app core.App) error {
|
||||
db := app.DB()
|
||||
// Find all active status alerts where the system is actually up
|
||||
var alertIds []string
|
||||
err := db.NewQuery(`
|
||||
SELECT a.id
|
||||
FROM alerts a
|
||||
JOIN systems s ON a.system = s.id
|
||||
WHERE a.name = 'Status'
|
||||
AND a.triggered = true
|
||||
AND s.status = 'up'
|
||||
`).Column(&alertIds)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
// resolve all matching alert records
|
||||
for _, alertId := range alertIds {
|
||||
alert, err := app.FindRecordById("alerts", alertId)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
alert.Set("triggered", false)
|
||||
err = app.Save(alert)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -13,6 +13,7 @@ import (
|
||||
"testing/synctest"
|
||||
"time"
|
||||
|
||||
"github.com/henrygd/beszel/internal/alerts"
|
||||
beszelTests "github.com/henrygd/beszel/internal/tests"
|
||||
|
||||
"github.com/pocketbase/dbx"
|
||||
@@ -369,33 +370,9 @@ func TestUserAlertsApi(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func getHubWithUser(t *testing.T) (*beszelTests.TestHub, *core.Record) {
|
||||
hub, err := beszelTests.NewTestHub(t.TempDir())
|
||||
assert.NoError(t, err)
|
||||
hub.StartHub()
|
||||
|
||||
// Manually initialize the system manager to bind event hooks
|
||||
err = hub.GetSystemManager().Initialize()
|
||||
assert.NoError(t, err)
|
||||
|
||||
// Create a test user
|
||||
user, err := beszelTests.CreateUser(hub, "test@example.com", "password")
|
||||
assert.NoError(t, err)
|
||||
|
||||
// Create user settings for the test user (required for alert notifications)
|
||||
userSettingsData := map[string]any{
|
||||
"user": user.Id,
|
||||
"settings": `{"emails":[test@example.com],"webhooks":[]}`,
|
||||
}
|
||||
_, err = beszelTests.CreateRecord(hub, "user_settings", userSettingsData)
|
||||
assert.NoError(t, err)
|
||||
|
||||
return hub, user
|
||||
}
|
||||
|
||||
func TestStatusAlerts(t *testing.T) {
|
||||
synctest.Test(t, func(t *testing.T) {
|
||||
hub, user := getHubWithUser(t)
|
||||
hub, user := beszelTests.GetHubWithUser(t)
|
||||
defer hub.Cleanup()
|
||||
|
||||
systems, err := beszelTests.CreateSystems(hub, 4, user.Id, "paused")
|
||||
@@ -476,7 +453,7 @@ func TestStatusAlerts(t *testing.T) {
|
||||
|
||||
func TestAlertsHistory(t *testing.T) {
|
||||
synctest.Test(t, func(t *testing.T) {
|
||||
hub, user := getHubWithUser(t)
|
||||
hub, user := beszelTests.GetHubWithUser(t)
|
||||
defer hub.Cleanup()
|
||||
|
||||
// Create systems and alerts
|
||||
@@ -602,3 +579,102 @@ func TestAlertsHistory(t *testing.T) {
|
||||
assert.EqualValues(t, 2, totalHistoryCount, "Should have 2 total alert history records")
|
||||
})
|
||||
}
|
||||
func TestResolveStatusAlerts(t *testing.T) {
|
||||
hub, user := beszelTests.GetHubWithUser(t)
|
||||
defer hub.Cleanup()
|
||||
|
||||
// Create a systemUp
|
||||
systemUp, err := beszelTests.CreateRecord(hub, "systems", map[string]any{
|
||||
"name": "test-system",
|
||||
"users": []string{user.Id},
|
||||
"host": "127.0.0.1",
|
||||
"status": "up",
|
||||
})
|
||||
assert.NoError(t, err)
|
||||
|
||||
systemDown, err := beszelTests.CreateRecord(hub, "systems", map[string]any{
|
||||
"name": "test-system-2",
|
||||
"users": []string{user.Id},
|
||||
"host": "127.0.0.2",
|
||||
"status": "up",
|
||||
})
|
||||
assert.NoError(t, err)
|
||||
|
||||
// Create a status alertUp for the system
|
||||
alertUp, err := beszelTests.CreateRecord(hub, "alerts", map[string]any{
|
||||
"name": "Status",
|
||||
"system": systemUp.Id,
|
||||
"user": user.Id,
|
||||
"min": 1,
|
||||
})
|
||||
assert.NoError(t, err)
|
||||
|
||||
alertDown, err := beszelTests.CreateRecord(hub, "alerts", map[string]any{
|
||||
"name": "Status",
|
||||
"system": systemDown.Id,
|
||||
"user": user.Id,
|
||||
"min": 1,
|
||||
})
|
||||
assert.NoError(t, err)
|
||||
|
||||
// Verify alert is not triggered initially
|
||||
assert.False(t, alertUp.GetBool("triggered"), "Alert should not be triggered initially")
|
||||
|
||||
// Set the system to 'up' (this should not trigger the alert)
|
||||
systemUp.Set("status", "up")
|
||||
err = hub.SaveNoValidate(systemUp)
|
||||
assert.NoError(t, err)
|
||||
|
||||
systemDown.Set("status", "down")
|
||||
err = hub.SaveNoValidate(systemDown)
|
||||
assert.NoError(t, err)
|
||||
|
||||
// Wait a moment for any processing
|
||||
time.Sleep(10 * time.Millisecond)
|
||||
|
||||
// Verify alertUp is still not triggered after setting system to up
|
||||
alertUp, err = hub.FindFirstRecordByFilter("alerts", "id={:id}", dbx.Params{"id": alertUp.Id})
|
||||
assert.NoError(t, err)
|
||||
assert.False(t, alertUp.GetBool("triggered"), "Alert should not be triggered when system is up")
|
||||
|
||||
// Manually set both alerts triggered to true
|
||||
alertUp.Set("triggered", true)
|
||||
err = hub.SaveNoValidate(alertUp)
|
||||
assert.NoError(t, err)
|
||||
alertDown.Set("triggered", true)
|
||||
err = hub.SaveNoValidate(alertDown)
|
||||
assert.NoError(t, err)
|
||||
|
||||
// Verify we have exactly one alert with triggered true
|
||||
triggeredCount, err := hub.CountRecords("alerts", dbx.HashExp{"triggered": true})
|
||||
assert.NoError(t, err)
|
||||
assert.EqualValues(t, 2, triggeredCount, "Should have exactly two alerts with triggered true")
|
||||
|
||||
// Verify the specific alertUp is triggered
|
||||
alertUp, err = hub.FindFirstRecordByFilter("alerts", "id={:id}", dbx.Params{"id": alertUp.Id})
|
||||
assert.NoError(t, err)
|
||||
assert.True(t, alertUp.GetBool("triggered"), "Alert should be triggered")
|
||||
|
||||
// Verify we have two unresolved alert history records
|
||||
alertHistoryCount, err := hub.CountRecords("alerts_history", dbx.HashExp{"resolved": ""})
|
||||
assert.NoError(t, err)
|
||||
assert.EqualValues(t, 2, alertHistoryCount, "Should have exactly two unresolved alert history records")
|
||||
|
||||
err = alerts.ResolveStatusAlerts(hub)
|
||||
assert.NoError(t, err)
|
||||
|
||||
// Verify alertUp is not triggered after resolving
|
||||
alertUp, err = hub.FindFirstRecordByFilter("alerts", "id={:id}", dbx.Params{"id": alertUp.Id})
|
||||
assert.NoError(t, err)
|
||||
assert.False(t, alertUp.GetBool("triggered"), "Alert should not be triggered after resolving")
|
||||
// Verify alertDown is still triggered
|
||||
alertDown, err = hub.FindFirstRecordByFilter("alerts", "id={:id}", dbx.Params{"id": alertDown.Id})
|
||||
assert.NoError(t, err)
|
||||
assert.True(t, alertDown.GetBool("triggered"), "Alert should still be triggered after resolving")
|
||||
|
||||
// Verify we have one unresolved alert history record
|
||||
alertHistoryCount, err = hub.CountRecords("alerts_history", dbx.HashExp{"resolved": ""})
|
||||
assert.NoError(t, err)
|
||||
assert.EqualValues(t, 1, alertHistoryCount, "Should have exactly one unresolved alert history record")
|
||||
|
||||
}
|
||||
|
||||
@@ -1,3 +1,6 @@
|
||||
//go:build testing
|
||||
// +build testing
|
||||
|
||||
package alerts
|
||||
|
||||
import (
|
||||
@@ -53,3 +56,7 @@ func (am *AlertManager) ForceExpirePendingAlerts() {
|
||||
return true
|
||||
})
|
||||
}
|
||||
|
||||
func ResolveStatusAlerts(app core.App) error {
|
||||
return resolveStatusAlerts(app)
|
||||
}
|
||||
|
||||
@@ -175,7 +175,7 @@ func TestDeleteOldSystemStats(t *testing.T) {
|
||||
}
|
||||
|
||||
// Run deletion
|
||||
err = records.TestDeleteOldSystemStats(hub)
|
||||
err = records.DeleteOldSystemStats(hub)
|
||||
require.NoError(t, err)
|
||||
|
||||
// Verify results
|
||||
@@ -268,7 +268,7 @@ func TestDeleteOldAlertsHistory(t *testing.T) {
|
||||
assert.Equal(t, int64(tc.alertCount), countBefore, "Initial count should match")
|
||||
|
||||
// Run deletion
|
||||
err = records.TestDeleteOldAlertsHistory(hub, tc.countToKeep, tc.countBeforeDeletion)
|
||||
err = records.DeleteOldAlertsHistory(hub, tc.countToKeep, tc.countBeforeDeletion)
|
||||
require.NoError(t, err)
|
||||
|
||||
// Count after deletion
|
||||
@@ -332,7 +332,7 @@ func TestDeleteOldAlertsHistoryEdgeCases(t *testing.T) {
|
||||
}
|
||||
|
||||
// Should not error and should not delete anything
|
||||
err = records.TestDeleteOldAlertsHistory(hub, 10, 20)
|
||||
err = records.DeleteOldAlertsHistory(hub, 10, 20)
|
||||
require.NoError(t, err)
|
||||
|
||||
count, err := hub.CountRecords("alerts_history")
|
||||
@@ -346,7 +346,7 @@ func TestDeleteOldAlertsHistoryEdgeCases(t *testing.T) {
|
||||
require.NoError(t, err)
|
||||
|
||||
// Should not error with empty table
|
||||
err = records.TestDeleteOldAlertsHistory(hub, 10, 20)
|
||||
err = records.DeleteOldAlertsHistory(hub, 10, 20)
|
||||
require.NoError(t, err)
|
||||
})
|
||||
}
|
||||
@@ -376,7 +376,7 @@ func TestTwoDecimals(t *testing.T) {
|
||||
}
|
||||
|
||||
for _, tc := range testCases {
|
||||
result := records.TestTwoDecimals(tc.input)
|
||||
result := records.TwoDecimals(tc.input)
|
||||
assert.InDelta(t, tc.expected, result, 0.02, "twoDecimals(%f) should equal %f", tc.input, tc.expected)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -7,17 +7,17 @@ import (
|
||||
"github.com/pocketbase/pocketbase/core"
|
||||
)
|
||||
|
||||
// TestDeleteOldSystemStats exposes deleteOldSystemStats for testing
|
||||
func TestDeleteOldSystemStats(app core.App) error {
|
||||
// DeleteOldSystemStats exposes deleteOldSystemStats for testing
|
||||
func DeleteOldSystemStats(app core.App) error {
|
||||
return deleteOldSystemStats(app)
|
||||
}
|
||||
|
||||
// TestDeleteOldAlertsHistory exposes deleteOldAlertsHistory for testing
|
||||
func TestDeleteOldAlertsHistory(app core.App, countToKeep, countBeforeDeletion int) error {
|
||||
// DeleteOldAlertsHistory exposes deleteOldAlertsHistory for testing
|
||||
func DeleteOldAlertsHistory(app core.App, countToKeep, countBeforeDeletion int) error {
|
||||
return deleteOldAlertsHistory(app, countToKeep, countBeforeDeletion)
|
||||
}
|
||||
|
||||
// TestTwoDecimals exposes twoDecimals for testing
|
||||
func TestTwoDecimals(value float64) float64 {
|
||||
// TwoDecimals exposes twoDecimals for testing
|
||||
func TwoDecimals(value float64) float64 {
|
||||
return twoDecimals(value)
|
||||
}
|
||||
|
||||
@@ -125,3 +125,28 @@ func CreateSystems(app core.App, count int, userId string, status string) ([]*co
|
||||
}
|
||||
return systems, nil
|
||||
}
|
||||
|
||||
// GetHubWithUser creates a test hub with a test user and user settings
|
||||
func GetHubWithUser(t *testing.T) (*TestHub, *core.Record) {
|
||||
hub, err := NewTestHub(t.TempDir())
|
||||
assert.NoError(t, err)
|
||||
hub.StartHub()
|
||||
|
||||
// Manually initialize the system manager to bind event hooks
|
||||
err = hub.GetSystemManager().Initialize()
|
||||
assert.NoError(t, err)
|
||||
|
||||
// Create a test user
|
||||
user, err := CreateUser(hub, "test@example.com", "password")
|
||||
assert.NoError(t, err)
|
||||
|
||||
// Create user settings for the test user (required for alert notifications)
|
||||
userSettingsData := map[string]any{
|
||||
"user": user.Id,
|
||||
"settings": `{"emails":[test@example.com],"webhooks":[]}`,
|
||||
}
|
||||
_, err = CreateRecord(hub, "user_settings", userSettingsData)
|
||||
assert.NoError(t, err)
|
||||
|
||||
return hub, user
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user