mirror of
https://github.com/henrygd/beszel.git
synced 2025-12-17 02:36:17 +01:00
add alerts for smart failures
This commit is contained in:
@@ -104,6 +104,7 @@ func NewAlertManager(app hubLike) *AlertManager {
|
|||||||
func (am *AlertManager) bindEvents() {
|
func (am *AlertManager) bindEvents() {
|
||||||
am.hub.OnRecordAfterUpdateSuccess("alerts").BindFunc(updateHistoryOnAlertUpdate)
|
am.hub.OnRecordAfterUpdateSuccess("alerts").BindFunc(updateHistoryOnAlertUpdate)
|
||||||
am.hub.OnRecordAfterDeleteSuccess("alerts").BindFunc(resolveHistoryOnAlertDelete)
|
am.hub.OnRecordAfterDeleteSuccess("alerts").BindFunc(resolveHistoryOnAlertDelete)
|
||||||
|
am.hub.OnRecordAfterUpdateSuccess("smart_devices").BindFunc(am.handleSmartDeviceAlert)
|
||||||
}
|
}
|
||||||
|
|
||||||
// IsNotificationSilenced checks if a notification should be silenced based on configured quiet hours
|
// IsNotificationSilenced checks if a notification should be silenced based on configured quiet hours
|
||||||
|
|||||||
67
internal/alerts/alerts_smart.go
Normal file
67
internal/alerts/alerts_smart.go
Normal file
@@ -0,0 +1,67 @@
|
|||||||
|
package alerts
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
|
||||||
|
"github.com/pocketbase/pocketbase/core"
|
||||||
|
)
|
||||||
|
|
||||||
|
// handleSmartDeviceAlert sends alerts when a SMART device state changes from PASSED to FAILED.
|
||||||
|
// This is automatic and does not require user opt-in.
|
||||||
|
func (am *AlertManager) handleSmartDeviceAlert(e *core.RecordEvent) error {
|
||||||
|
oldState := e.Record.Original().GetString("state")
|
||||||
|
newState := e.Record.GetString("state")
|
||||||
|
|
||||||
|
// Only alert when transitioning from PASSED to FAILED
|
||||||
|
if oldState != "PASSED" || newState != "FAILED" {
|
||||||
|
return e.Next()
|
||||||
|
}
|
||||||
|
|
||||||
|
systemID := e.Record.GetString("system")
|
||||||
|
if systemID == "" {
|
||||||
|
return e.Next()
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fetch the system record to get the name and users
|
||||||
|
systemRecord, err := e.App.FindRecordById("systems", systemID)
|
||||||
|
if err != nil {
|
||||||
|
e.App.Logger().Error("Failed to find system for SMART alert", "err", err, "systemID", systemID)
|
||||||
|
return e.Next()
|
||||||
|
}
|
||||||
|
|
||||||
|
systemName := systemRecord.GetString("name")
|
||||||
|
deviceName := e.Record.GetString("name")
|
||||||
|
model := e.Record.GetString("model")
|
||||||
|
|
||||||
|
// Build alert message
|
||||||
|
title := fmt.Sprintf("SMART failure on %s: %s \U0001F534", systemName, deviceName)
|
||||||
|
var message string
|
||||||
|
if model != "" {
|
||||||
|
message = fmt.Sprintf("Disk %s (%s) SMART status changed to FAILED", deviceName, model)
|
||||||
|
} else {
|
||||||
|
message = fmt.Sprintf("Disk %s SMART status changed to FAILED", deviceName)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get users associated with the system
|
||||||
|
userIDs := systemRecord.GetStringSlice("users")
|
||||||
|
if len(userIDs) == 0 {
|
||||||
|
return e.Next()
|
||||||
|
}
|
||||||
|
|
||||||
|
// Send alert to each user
|
||||||
|
for _, userID := range userIDs {
|
||||||
|
if err := am.SendAlert(AlertMessageData{
|
||||||
|
UserID: userID,
|
||||||
|
SystemID: systemID,
|
||||||
|
Title: title,
|
||||||
|
Message: message,
|
||||||
|
Link: am.hub.MakeLink("system", systemID),
|
||||||
|
LinkText: "View " + systemName,
|
||||||
|
}); err != nil {
|
||||||
|
e.App.Logger().Error("Failed to send SMART alert", "err", err, "userID", userID)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return e.Next()
|
||||||
|
}
|
||||||
|
|
||||||
196
internal/alerts/alerts_smart_test.go
Normal file
196
internal/alerts/alerts_smart_test.go
Normal file
@@ -0,0 +1,196 @@
|
|||||||
|
//go:build testing
|
||||||
|
// +build testing
|
||||||
|
|
||||||
|
package alerts_test
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
beszelTests "github.com/henrygd/beszel/internal/tests"
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestSmartDeviceAlert(t *testing.T) {
|
||||||
|
hub, user := beszelTests.GetHubWithUser(t)
|
||||||
|
defer hub.Cleanup()
|
||||||
|
|
||||||
|
// Create a system for the user
|
||||||
|
system, err := beszelTests.CreateRecord(hub, "systems", map[string]any{
|
||||||
|
"name": "test-system",
|
||||||
|
"users": []string{user.Id},
|
||||||
|
"host": "127.0.0.1",
|
||||||
|
})
|
||||||
|
assert.NoError(t, err)
|
||||||
|
|
||||||
|
// Create a smart_device with state PASSED
|
||||||
|
smartDevice, err := beszelTests.CreateRecord(hub, "smart_devices", map[string]any{
|
||||||
|
"system": system.Id,
|
||||||
|
"name": "/dev/sda",
|
||||||
|
"model": "Samsung SSD 970 EVO",
|
||||||
|
"state": "PASSED",
|
||||||
|
})
|
||||||
|
assert.NoError(t, err)
|
||||||
|
|
||||||
|
// Verify no emails sent initially
|
||||||
|
assert.Zero(t, hub.TestMailer.TotalSend(), "should have 0 emails sent initially")
|
||||||
|
|
||||||
|
// Re-fetch the record so PocketBase can properly track original values
|
||||||
|
smartDevice, err = hub.FindRecordById("smart_devices", smartDevice.Id)
|
||||||
|
assert.NoError(t, err)
|
||||||
|
|
||||||
|
// Update the smart device state to FAILED
|
||||||
|
smartDevice.Set("state", "FAILED")
|
||||||
|
err = hub.Save(smartDevice)
|
||||||
|
assert.NoError(t, err)
|
||||||
|
|
||||||
|
// Wait for the alert to be processed
|
||||||
|
time.Sleep(50 * time.Millisecond)
|
||||||
|
|
||||||
|
// Verify that an email was sent
|
||||||
|
assert.EqualValues(t, 1, hub.TestMailer.TotalSend(), "should have 1 email sent after state changed to FAILED")
|
||||||
|
|
||||||
|
// Check the email content
|
||||||
|
lastMessage := hub.TestMailer.LastMessage()
|
||||||
|
assert.Contains(t, lastMessage.Subject, "SMART failure on test-system")
|
||||||
|
assert.Contains(t, lastMessage.Subject, "/dev/sda")
|
||||||
|
assert.Contains(t, lastMessage.Text, "Samsung SSD 970 EVO")
|
||||||
|
assert.Contains(t, lastMessage.Text, "FAILED")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestSmartDeviceAlertNoAlertOnNonPassedToFailed(t *testing.T) {
|
||||||
|
hub, user := beszelTests.GetHubWithUser(t)
|
||||||
|
defer hub.Cleanup()
|
||||||
|
|
||||||
|
// Create a system for the user
|
||||||
|
system, err := beszelTests.CreateRecord(hub, "systems", map[string]any{
|
||||||
|
"name": "test-system",
|
||||||
|
"users": []string{user.Id},
|
||||||
|
"host": "127.0.0.1",
|
||||||
|
})
|
||||||
|
assert.NoError(t, err)
|
||||||
|
|
||||||
|
// Create a smart_device with state UNKNOWN
|
||||||
|
smartDevice, err := beszelTests.CreateRecord(hub, "smart_devices", map[string]any{
|
||||||
|
"system": system.Id,
|
||||||
|
"name": "/dev/sda",
|
||||||
|
"model": "Samsung SSD 970 EVO",
|
||||||
|
"state": "UNKNOWN",
|
||||||
|
})
|
||||||
|
assert.NoError(t, err)
|
||||||
|
|
||||||
|
// Re-fetch the record so PocketBase can properly track original values
|
||||||
|
smartDevice, err = hub.FindRecordById("smart_devices", smartDevice.Id)
|
||||||
|
assert.NoError(t, err)
|
||||||
|
|
||||||
|
// Update the state from UNKNOWN to FAILED - should NOT trigger alert
|
||||||
|
smartDevice.Set("state", "FAILED")
|
||||||
|
err = hub.Save(smartDevice)
|
||||||
|
assert.NoError(t, err)
|
||||||
|
|
||||||
|
time.Sleep(50 * time.Millisecond)
|
||||||
|
|
||||||
|
// Verify no email was sent (only PASSED -> FAILED triggers alert)
|
||||||
|
assert.Zero(t, hub.TestMailer.TotalSend(), "should have 0 emails when changing from UNKNOWN to FAILED")
|
||||||
|
|
||||||
|
// Re-fetch the record again
|
||||||
|
smartDevice, err = hub.FindRecordById("smart_devices", smartDevice.Id)
|
||||||
|
assert.NoError(t, err)
|
||||||
|
|
||||||
|
// Update state from FAILED to PASSED - should NOT trigger alert
|
||||||
|
smartDevice.Set("state", "PASSED")
|
||||||
|
err = hub.Save(smartDevice)
|
||||||
|
assert.NoError(t, err)
|
||||||
|
|
||||||
|
time.Sleep(50 * time.Millisecond)
|
||||||
|
|
||||||
|
// Verify no email was sent
|
||||||
|
assert.Zero(t, hub.TestMailer.TotalSend(), "should have 0 emails when changing from FAILED to PASSED")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestSmartDeviceAlertMultipleUsers(t *testing.T) {
|
||||||
|
hub, user1 := beszelTests.GetHubWithUser(t)
|
||||||
|
defer hub.Cleanup()
|
||||||
|
|
||||||
|
// Create a second user
|
||||||
|
user2, err := beszelTests.CreateUser(hub, "test2@example.com", "password")
|
||||||
|
assert.NoError(t, err)
|
||||||
|
|
||||||
|
// Create user settings for the second user
|
||||||
|
_, err = beszelTests.CreateRecord(hub, "user_settings", map[string]any{
|
||||||
|
"user": user2.Id,
|
||||||
|
"settings": `{"emails":["test2@example.com"],"webhooks":[]}`,
|
||||||
|
})
|
||||||
|
assert.NoError(t, err)
|
||||||
|
|
||||||
|
// Create a system with both users
|
||||||
|
system, err := beszelTests.CreateRecord(hub, "systems", map[string]any{
|
||||||
|
"name": "shared-system",
|
||||||
|
"users": []string{user1.Id, user2.Id},
|
||||||
|
"host": "127.0.0.1",
|
||||||
|
})
|
||||||
|
assert.NoError(t, err)
|
||||||
|
|
||||||
|
// Create a smart_device with state PASSED
|
||||||
|
smartDevice, err := beszelTests.CreateRecord(hub, "smart_devices", map[string]any{
|
||||||
|
"system": system.Id,
|
||||||
|
"name": "/dev/nvme0n1",
|
||||||
|
"model": "WD Black SN850",
|
||||||
|
"state": "PASSED",
|
||||||
|
})
|
||||||
|
assert.NoError(t, err)
|
||||||
|
|
||||||
|
// Re-fetch the record so PocketBase can properly track original values
|
||||||
|
smartDevice, err = hub.FindRecordById("smart_devices", smartDevice.Id)
|
||||||
|
assert.NoError(t, err)
|
||||||
|
|
||||||
|
// Update the smart device state to FAILED
|
||||||
|
smartDevice.Set("state", "FAILED")
|
||||||
|
err = hub.Save(smartDevice)
|
||||||
|
assert.NoError(t, err)
|
||||||
|
|
||||||
|
time.Sleep(50 * time.Millisecond)
|
||||||
|
|
||||||
|
// Verify that two emails were sent (one for each user)
|
||||||
|
assert.EqualValues(t, 2, hub.TestMailer.TotalSend(), "should have 2 emails sent for 2 users")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestSmartDeviceAlertWithoutModel(t *testing.T) {
|
||||||
|
hub, user := beszelTests.GetHubWithUser(t)
|
||||||
|
defer hub.Cleanup()
|
||||||
|
|
||||||
|
// Create a system for the user
|
||||||
|
system, err := beszelTests.CreateRecord(hub, "systems", map[string]any{
|
||||||
|
"name": "test-system",
|
||||||
|
"users": []string{user.Id},
|
||||||
|
"host": "127.0.0.1",
|
||||||
|
})
|
||||||
|
assert.NoError(t, err)
|
||||||
|
|
||||||
|
// Create a smart_device with state PASSED but no model
|
||||||
|
smartDevice, err := beszelTests.CreateRecord(hub, "smart_devices", map[string]any{
|
||||||
|
"system": system.Id,
|
||||||
|
"name": "/dev/sdb",
|
||||||
|
"state": "PASSED",
|
||||||
|
})
|
||||||
|
assert.NoError(t, err)
|
||||||
|
|
||||||
|
// Re-fetch the record so PocketBase can properly track original values
|
||||||
|
smartDevice, err = hub.FindRecordById("smart_devices", smartDevice.Id)
|
||||||
|
assert.NoError(t, err)
|
||||||
|
|
||||||
|
// Update the smart device state to FAILED
|
||||||
|
smartDevice.Set("state", "FAILED")
|
||||||
|
err = hub.Save(smartDevice)
|
||||||
|
assert.NoError(t, err)
|
||||||
|
|
||||||
|
time.Sleep(50 * time.Millisecond)
|
||||||
|
|
||||||
|
// Verify that an email was sent
|
||||||
|
assert.EqualValues(t, 1, hub.TestMailer.TotalSend(), "should have 1 email sent")
|
||||||
|
|
||||||
|
// Check that the email doesn't have empty parentheses for missing model
|
||||||
|
lastMessage := hub.TestMailer.LastMessage()
|
||||||
|
assert.NotContains(t, lastMessage.Text, "()", "should not have empty parentheses for missing model")
|
||||||
|
assert.Contains(t, lastMessage.Text, "/dev/sdb")
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user