mirror of
https://github.com/henrygd/beszel.git
synced 2025-12-17 10:46:16 +01:00
add check for status alerts which are not properly resolved (#1052)
This commit is contained in:
@@ -1,3 +1,4 @@
|
|||||||
|
// Package deltatracker provides a tracker for calculating differences in numeric values over time.
|
||||||
package deltatracker
|
package deltatracker
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
|||||||
@@ -1,4 +1,3 @@
|
|||||||
// Package deltatracker provides a tracker for calculating differences in numeric values over time.
|
|
||||||
package deltatracker
|
package deltatracker
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
|||||||
@@ -25,7 +25,12 @@ type alertInfo struct {
|
|||||||
// startWorker is a long-running goroutine that processes alert tasks
|
// startWorker is a long-running goroutine that processes alert tasks
|
||||||
// every x seconds. It must be running to process status alerts.
|
// every x seconds. It must be running to process status alerts.
|
||||||
func (am *AlertManager) startWorker() {
|
func (am *AlertManager) startWorker() {
|
||||||
tick := time.Tick(15 * time.Second)
|
processPendingAlerts := time.Tick(15 * time.Second)
|
||||||
|
|
||||||
|
// check for status alerts that are not resolved when system comes up
|
||||||
|
// (can be removed if we figure out core bug in #1052)
|
||||||
|
checkStatusAlerts := time.Tick(561 * time.Second)
|
||||||
|
|
||||||
for {
|
for {
|
||||||
select {
|
select {
|
||||||
case <-am.stopChan:
|
case <-am.stopChan:
|
||||||
@@ -41,7 +46,9 @@ func (am *AlertManager) startWorker() {
|
|||||||
case "cancel":
|
case "cancel":
|
||||||
am.pendingAlerts.Delete(task.alertRecord.Id)
|
am.pendingAlerts.Delete(task.alertRecord.Id)
|
||||||
}
|
}
|
||||||
case <-tick:
|
case <-checkStatusAlerts:
|
||||||
|
resolveStatusAlerts(am.hub)
|
||||||
|
case <-processPendingAlerts:
|
||||||
// Check for expired alerts every tick
|
// Check for expired alerts every tick
|
||||||
now := time.Now()
|
now := time.Now()
|
||||||
for key, value := range am.pendingAlerts.Range {
|
for key, value := range am.pendingAlerts.Range {
|
||||||
@@ -170,3 +177,35 @@ func (am *AlertManager) sendStatusAlert(alertStatus string, systemName string, a
|
|||||||
LinkText: "View " + systemName,
|
LinkText: "View " + systemName,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// resolveStatusAlerts resolves any status alerts that weren't resolved
|
||||||
|
// when system came up (https://github.com/henrygd/beszel/issues/1052)
|
||||||
|
func resolveStatusAlerts(app core.App) error {
|
||||||
|
db := app.DB()
|
||||||
|
// Find all active status alerts where the system is actually up
|
||||||
|
var alertIds []string
|
||||||
|
err := db.NewQuery(`
|
||||||
|
SELECT a.id
|
||||||
|
FROM alerts a
|
||||||
|
JOIN systems s ON a.system = s.id
|
||||||
|
WHERE a.name = 'Status'
|
||||||
|
AND a.triggered = true
|
||||||
|
AND s.status = 'up'
|
||||||
|
`).Column(&alertIds)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
// resolve all matching alert records
|
||||||
|
for _, alertId := range alertIds {
|
||||||
|
alert, err := app.FindRecordById("alerts", alertId)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
alert.Set("triggered", false)
|
||||||
|
err = app.Save(alert)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|||||||
@@ -13,6 +13,7 @@ import (
|
|||||||
"testing/synctest"
|
"testing/synctest"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
"github.com/henrygd/beszel/internal/alerts"
|
||||||
beszelTests "github.com/henrygd/beszel/internal/tests"
|
beszelTests "github.com/henrygd/beszel/internal/tests"
|
||||||
|
|
||||||
"github.com/pocketbase/dbx"
|
"github.com/pocketbase/dbx"
|
||||||
@@ -369,33 +370,9 @@ func TestUserAlertsApi(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func getHubWithUser(t *testing.T) (*beszelTests.TestHub, *core.Record) {
|
|
||||||
hub, err := beszelTests.NewTestHub(t.TempDir())
|
|
||||||
assert.NoError(t, err)
|
|
||||||
hub.StartHub()
|
|
||||||
|
|
||||||
// Manually initialize the system manager to bind event hooks
|
|
||||||
err = hub.GetSystemManager().Initialize()
|
|
||||||
assert.NoError(t, err)
|
|
||||||
|
|
||||||
// Create a test user
|
|
||||||
user, err := beszelTests.CreateUser(hub, "test@example.com", "password")
|
|
||||||
assert.NoError(t, err)
|
|
||||||
|
|
||||||
// Create user settings for the test user (required for alert notifications)
|
|
||||||
userSettingsData := map[string]any{
|
|
||||||
"user": user.Id,
|
|
||||||
"settings": `{"emails":[test@example.com],"webhooks":[]}`,
|
|
||||||
}
|
|
||||||
_, err = beszelTests.CreateRecord(hub, "user_settings", userSettingsData)
|
|
||||||
assert.NoError(t, err)
|
|
||||||
|
|
||||||
return hub, user
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestStatusAlerts(t *testing.T) {
|
func TestStatusAlerts(t *testing.T) {
|
||||||
synctest.Test(t, func(t *testing.T) {
|
synctest.Test(t, func(t *testing.T) {
|
||||||
hub, user := getHubWithUser(t)
|
hub, user := beszelTests.GetHubWithUser(t)
|
||||||
defer hub.Cleanup()
|
defer hub.Cleanup()
|
||||||
|
|
||||||
systems, err := beszelTests.CreateSystems(hub, 4, user.Id, "paused")
|
systems, err := beszelTests.CreateSystems(hub, 4, user.Id, "paused")
|
||||||
@@ -476,7 +453,7 @@ func TestStatusAlerts(t *testing.T) {
|
|||||||
|
|
||||||
func TestAlertsHistory(t *testing.T) {
|
func TestAlertsHistory(t *testing.T) {
|
||||||
synctest.Test(t, func(t *testing.T) {
|
synctest.Test(t, func(t *testing.T) {
|
||||||
hub, user := getHubWithUser(t)
|
hub, user := beszelTests.GetHubWithUser(t)
|
||||||
defer hub.Cleanup()
|
defer hub.Cleanup()
|
||||||
|
|
||||||
// Create systems and alerts
|
// Create systems and alerts
|
||||||
@@ -602,3 +579,102 @@ func TestAlertsHistory(t *testing.T) {
|
|||||||
assert.EqualValues(t, 2, totalHistoryCount, "Should have 2 total alert history records")
|
assert.EqualValues(t, 2, totalHistoryCount, "Should have 2 total alert history records")
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
func TestResolveStatusAlerts(t *testing.T) {
|
||||||
|
hub, user := beszelTests.GetHubWithUser(t)
|
||||||
|
defer hub.Cleanup()
|
||||||
|
|
||||||
|
// Create a systemUp
|
||||||
|
systemUp, err := beszelTests.CreateRecord(hub, "systems", map[string]any{
|
||||||
|
"name": "test-system",
|
||||||
|
"users": []string{user.Id},
|
||||||
|
"host": "127.0.0.1",
|
||||||
|
"status": "up",
|
||||||
|
})
|
||||||
|
assert.NoError(t, err)
|
||||||
|
|
||||||
|
systemDown, err := beszelTests.CreateRecord(hub, "systems", map[string]any{
|
||||||
|
"name": "test-system-2",
|
||||||
|
"users": []string{user.Id},
|
||||||
|
"host": "127.0.0.2",
|
||||||
|
"status": "up",
|
||||||
|
})
|
||||||
|
assert.NoError(t, err)
|
||||||
|
|
||||||
|
// Create a status alertUp for the system
|
||||||
|
alertUp, err := beszelTests.CreateRecord(hub, "alerts", map[string]any{
|
||||||
|
"name": "Status",
|
||||||
|
"system": systemUp.Id,
|
||||||
|
"user": user.Id,
|
||||||
|
"min": 1,
|
||||||
|
})
|
||||||
|
assert.NoError(t, err)
|
||||||
|
|
||||||
|
alertDown, err := beszelTests.CreateRecord(hub, "alerts", map[string]any{
|
||||||
|
"name": "Status",
|
||||||
|
"system": systemDown.Id,
|
||||||
|
"user": user.Id,
|
||||||
|
"min": 1,
|
||||||
|
})
|
||||||
|
assert.NoError(t, err)
|
||||||
|
|
||||||
|
// Verify alert is not triggered initially
|
||||||
|
assert.False(t, alertUp.GetBool("triggered"), "Alert should not be triggered initially")
|
||||||
|
|
||||||
|
// Set the system to 'up' (this should not trigger the alert)
|
||||||
|
systemUp.Set("status", "up")
|
||||||
|
err = hub.SaveNoValidate(systemUp)
|
||||||
|
assert.NoError(t, err)
|
||||||
|
|
||||||
|
systemDown.Set("status", "down")
|
||||||
|
err = hub.SaveNoValidate(systemDown)
|
||||||
|
assert.NoError(t, err)
|
||||||
|
|
||||||
|
// Wait a moment for any processing
|
||||||
|
time.Sleep(10 * time.Millisecond)
|
||||||
|
|
||||||
|
// Verify alertUp is still not triggered after setting system to up
|
||||||
|
alertUp, err = hub.FindFirstRecordByFilter("alerts", "id={:id}", dbx.Params{"id": alertUp.Id})
|
||||||
|
assert.NoError(t, err)
|
||||||
|
assert.False(t, alertUp.GetBool("triggered"), "Alert should not be triggered when system is up")
|
||||||
|
|
||||||
|
// Manually set both alerts triggered to true
|
||||||
|
alertUp.Set("triggered", true)
|
||||||
|
err = hub.SaveNoValidate(alertUp)
|
||||||
|
assert.NoError(t, err)
|
||||||
|
alertDown.Set("triggered", true)
|
||||||
|
err = hub.SaveNoValidate(alertDown)
|
||||||
|
assert.NoError(t, err)
|
||||||
|
|
||||||
|
// Verify we have exactly one alert with triggered true
|
||||||
|
triggeredCount, err := hub.CountRecords("alerts", dbx.HashExp{"triggered": true})
|
||||||
|
assert.NoError(t, err)
|
||||||
|
assert.EqualValues(t, 2, triggeredCount, "Should have exactly two alerts with triggered true")
|
||||||
|
|
||||||
|
// Verify the specific alertUp is triggered
|
||||||
|
alertUp, err = hub.FindFirstRecordByFilter("alerts", "id={:id}", dbx.Params{"id": alertUp.Id})
|
||||||
|
assert.NoError(t, err)
|
||||||
|
assert.True(t, alertUp.GetBool("triggered"), "Alert should be triggered")
|
||||||
|
|
||||||
|
// Verify we have two unresolved alert history records
|
||||||
|
alertHistoryCount, err := hub.CountRecords("alerts_history", dbx.HashExp{"resolved": ""})
|
||||||
|
assert.NoError(t, err)
|
||||||
|
assert.EqualValues(t, 2, alertHistoryCount, "Should have exactly two unresolved alert history records")
|
||||||
|
|
||||||
|
err = alerts.ResolveStatusAlerts(hub)
|
||||||
|
assert.NoError(t, err)
|
||||||
|
|
||||||
|
// Verify alertUp is not triggered after resolving
|
||||||
|
alertUp, err = hub.FindFirstRecordByFilter("alerts", "id={:id}", dbx.Params{"id": alertUp.Id})
|
||||||
|
assert.NoError(t, err)
|
||||||
|
assert.False(t, alertUp.GetBool("triggered"), "Alert should not be triggered after resolving")
|
||||||
|
// Verify alertDown is still triggered
|
||||||
|
alertDown, err = hub.FindFirstRecordByFilter("alerts", "id={:id}", dbx.Params{"id": alertDown.Id})
|
||||||
|
assert.NoError(t, err)
|
||||||
|
assert.True(t, alertDown.GetBool("triggered"), "Alert should still be triggered after resolving")
|
||||||
|
|
||||||
|
// Verify we have one unresolved alert history record
|
||||||
|
alertHistoryCount, err = hub.CountRecords("alerts_history", dbx.HashExp{"resolved": ""})
|
||||||
|
assert.NoError(t, err)
|
||||||
|
assert.EqualValues(t, 1, alertHistoryCount, "Should have exactly one unresolved alert history record")
|
||||||
|
|
||||||
|
}
|
||||||
|
|||||||
@@ -1,3 +1,6 @@
|
|||||||
|
//go:build testing
|
||||||
|
// +build testing
|
||||||
|
|
||||||
package alerts
|
package alerts
|
||||||
|
|
||||||
import (
|
import (
|
||||||
@@ -53,3 +56,7 @@ func (am *AlertManager) ForceExpirePendingAlerts() {
|
|||||||
return true
|
return true
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func ResolveStatusAlerts(app core.App) error {
|
||||||
|
return resolveStatusAlerts(app)
|
||||||
|
}
|
||||||
|
|||||||
@@ -175,7 +175,7 @@ func TestDeleteOldSystemStats(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Run deletion
|
// Run deletion
|
||||||
err = records.TestDeleteOldSystemStats(hub)
|
err = records.DeleteOldSystemStats(hub)
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
|
|
||||||
// Verify results
|
// Verify results
|
||||||
@@ -268,7 +268,7 @@ func TestDeleteOldAlertsHistory(t *testing.T) {
|
|||||||
assert.Equal(t, int64(tc.alertCount), countBefore, "Initial count should match")
|
assert.Equal(t, int64(tc.alertCount), countBefore, "Initial count should match")
|
||||||
|
|
||||||
// Run deletion
|
// Run deletion
|
||||||
err = records.TestDeleteOldAlertsHistory(hub, tc.countToKeep, tc.countBeforeDeletion)
|
err = records.DeleteOldAlertsHistory(hub, tc.countToKeep, tc.countBeforeDeletion)
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
|
|
||||||
// Count after deletion
|
// Count after deletion
|
||||||
@@ -332,7 +332,7 @@ func TestDeleteOldAlertsHistoryEdgeCases(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Should not error and should not delete anything
|
// Should not error and should not delete anything
|
||||||
err = records.TestDeleteOldAlertsHistory(hub, 10, 20)
|
err = records.DeleteOldAlertsHistory(hub, 10, 20)
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
|
|
||||||
count, err := hub.CountRecords("alerts_history")
|
count, err := hub.CountRecords("alerts_history")
|
||||||
@@ -346,7 +346,7 @@ func TestDeleteOldAlertsHistoryEdgeCases(t *testing.T) {
|
|||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
|
|
||||||
// Should not error with empty table
|
// Should not error with empty table
|
||||||
err = records.TestDeleteOldAlertsHistory(hub, 10, 20)
|
err = records.DeleteOldAlertsHistory(hub, 10, 20)
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
@@ -376,7 +376,7 @@ func TestTwoDecimals(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
for _, tc := range testCases {
|
for _, tc := range testCases {
|
||||||
result := records.TestTwoDecimals(tc.input)
|
result := records.TwoDecimals(tc.input)
|
||||||
assert.InDelta(t, tc.expected, result, 0.02, "twoDecimals(%f) should equal %f", tc.input, tc.expected)
|
assert.InDelta(t, tc.expected, result, 0.02, "twoDecimals(%f) should equal %f", tc.input, tc.expected)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -7,17 +7,17 @@ import (
|
|||||||
"github.com/pocketbase/pocketbase/core"
|
"github.com/pocketbase/pocketbase/core"
|
||||||
)
|
)
|
||||||
|
|
||||||
// TestDeleteOldSystemStats exposes deleteOldSystemStats for testing
|
// DeleteOldSystemStats exposes deleteOldSystemStats for testing
|
||||||
func TestDeleteOldSystemStats(app core.App) error {
|
func DeleteOldSystemStats(app core.App) error {
|
||||||
return deleteOldSystemStats(app)
|
return deleteOldSystemStats(app)
|
||||||
}
|
}
|
||||||
|
|
||||||
// TestDeleteOldAlertsHistory exposes deleteOldAlertsHistory for testing
|
// DeleteOldAlertsHistory exposes deleteOldAlertsHistory for testing
|
||||||
func TestDeleteOldAlertsHistory(app core.App, countToKeep, countBeforeDeletion int) error {
|
func DeleteOldAlertsHistory(app core.App, countToKeep, countBeforeDeletion int) error {
|
||||||
return deleteOldAlertsHistory(app, countToKeep, countBeforeDeletion)
|
return deleteOldAlertsHistory(app, countToKeep, countBeforeDeletion)
|
||||||
}
|
}
|
||||||
|
|
||||||
// TestTwoDecimals exposes twoDecimals for testing
|
// TwoDecimals exposes twoDecimals for testing
|
||||||
func TestTwoDecimals(value float64) float64 {
|
func TwoDecimals(value float64) float64 {
|
||||||
return twoDecimals(value)
|
return twoDecimals(value)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -125,3 +125,28 @@ func CreateSystems(app core.App, count int, userId string, status string) ([]*co
|
|||||||
}
|
}
|
||||||
return systems, nil
|
return systems, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// GetHubWithUser creates a test hub with a test user and user settings
|
||||||
|
func GetHubWithUser(t *testing.T) (*TestHub, *core.Record) {
|
||||||
|
hub, err := NewTestHub(t.TempDir())
|
||||||
|
assert.NoError(t, err)
|
||||||
|
hub.StartHub()
|
||||||
|
|
||||||
|
// Manually initialize the system manager to bind event hooks
|
||||||
|
err = hub.GetSystemManager().Initialize()
|
||||||
|
assert.NoError(t, err)
|
||||||
|
|
||||||
|
// Create a test user
|
||||||
|
user, err := CreateUser(hub, "test@example.com", "password")
|
||||||
|
assert.NoError(t, err)
|
||||||
|
|
||||||
|
// Create user settings for the test user (required for alert notifications)
|
||||||
|
userSettingsData := map[string]any{
|
||||||
|
"user": user.Id,
|
||||||
|
"settings": `{"emails":[test@example.com],"webhooks":[]}`,
|
||||||
|
}
|
||||||
|
_, err = CreateRecord(hub, "user_settings", userSettingsData)
|
||||||
|
assert.NoError(t, err)
|
||||||
|
|
||||||
|
return hub, user
|
||||||
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user