add alerts for smart failures

This commit is contained in:
henrygd
2025-11-30 13:52:31 -05:00
parent 41a3d9359f
commit 11edabd09f
3 changed files with 264 additions and 0 deletions

View File

@@ -104,6 +104,7 @@ func NewAlertManager(app hubLike) *AlertManager {
func (am *AlertManager) bindEvents() { func (am *AlertManager) bindEvents() {
am.hub.OnRecordAfterUpdateSuccess("alerts").BindFunc(updateHistoryOnAlertUpdate) am.hub.OnRecordAfterUpdateSuccess("alerts").BindFunc(updateHistoryOnAlertUpdate)
am.hub.OnRecordAfterDeleteSuccess("alerts").BindFunc(resolveHistoryOnAlertDelete) am.hub.OnRecordAfterDeleteSuccess("alerts").BindFunc(resolveHistoryOnAlertDelete)
am.hub.OnRecordAfterUpdateSuccess("smart_devices").BindFunc(am.handleSmartDeviceAlert)
} }
// IsNotificationSilenced checks if a notification should be silenced based on configured quiet hours // IsNotificationSilenced checks if a notification should be silenced based on configured quiet hours

View File

@@ -0,0 +1,67 @@
package alerts
import (
"fmt"
"github.com/pocketbase/pocketbase/core"
)
// handleSmartDeviceAlert sends alerts when a SMART device state changes from PASSED to FAILED.
// This is automatic and does not require user opt-in.
func (am *AlertManager) handleSmartDeviceAlert(e *core.RecordEvent) error {
oldState := e.Record.Original().GetString("state")
newState := e.Record.GetString("state")
// Only alert when transitioning from PASSED to FAILED
if oldState != "PASSED" || newState != "FAILED" {
return e.Next()
}
systemID := e.Record.GetString("system")
if systemID == "" {
return e.Next()
}
// Fetch the system record to get the name and users
systemRecord, err := e.App.FindRecordById("systems", systemID)
if err != nil {
e.App.Logger().Error("Failed to find system for SMART alert", "err", err, "systemID", systemID)
return e.Next()
}
systemName := systemRecord.GetString("name")
deviceName := e.Record.GetString("name")
model := e.Record.GetString("model")
// Build alert message
title := fmt.Sprintf("SMART failure on %s: %s \U0001F534", systemName, deviceName)
var message string
if model != "" {
message = fmt.Sprintf("Disk %s (%s) SMART status changed to FAILED", deviceName, model)
} else {
message = fmt.Sprintf("Disk %s SMART status changed to FAILED", deviceName)
}
// Get users associated with the system
userIDs := systemRecord.GetStringSlice("users")
if len(userIDs) == 0 {
return e.Next()
}
// Send alert to each user
for _, userID := range userIDs {
if err := am.SendAlert(AlertMessageData{
UserID: userID,
SystemID: systemID,
Title: title,
Message: message,
Link: am.hub.MakeLink("system", systemID),
LinkText: "View " + systemName,
}); err != nil {
e.App.Logger().Error("Failed to send SMART alert", "err", err, "userID", userID)
}
}
return e.Next()
}

View File

@@ -0,0 +1,196 @@
//go:build testing
// +build testing
package alerts_test
import (
"testing"
"time"
beszelTests "github.com/henrygd/beszel/internal/tests"
"github.com/stretchr/testify/assert"
)
func TestSmartDeviceAlert(t *testing.T) {
hub, user := beszelTests.GetHubWithUser(t)
defer hub.Cleanup()
// Create a system for the user
system, err := beszelTests.CreateRecord(hub, "systems", map[string]any{
"name": "test-system",
"users": []string{user.Id},
"host": "127.0.0.1",
})
assert.NoError(t, err)
// Create a smart_device with state PASSED
smartDevice, err := beszelTests.CreateRecord(hub, "smart_devices", map[string]any{
"system": system.Id,
"name": "/dev/sda",
"model": "Samsung SSD 970 EVO",
"state": "PASSED",
})
assert.NoError(t, err)
// Verify no emails sent initially
assert.Zero(t, hub.TestMailer.TotalSend(), "should have 0 emails sent initially")
// Re-fetch the record so PocketBase can properly track original values
smartDevice, err = hub.FindRecordById("smart_devices", smartDevice.Id)
assert.NoError(t, err)
// Update the smart device state to FAILED
smartDevice.Set("state", "FAILED")
err = hub.Save(smartDevice)
assert.NoError(t, err)
// Wait for the alert to be processed
time.Sleep(50 * time.Millisecond)
// Verify that an email was sent
assert.EqualValues(t, 1, hub.TestMailer.TotalSend(), "should have 1 email sent after state changed to FAILED")
// Check the email content
lastMessage := hub.TestMailer.LastMessage()
assert.Contains(t, lastMessage.Subject, "SMART failure on test-system")
assert.Contains(t, lastMessage.Subject, "/dev/sda")
assert.Contains(t, lastMessage.Text, "Samsung SSD 970 EVO")
assert.Contains(t, lastMessage.Text, "FAILED")
}
func TestSmartDeviceAlertNoAlertOnNonPassedToFailed(t *testing.T) {
hub, user := beszelTests.GetHubWithUser(t)
defer hub.Cleanup()
// Create a system for the user
system, err := beszelTests.CreateRecord(hub, "systems", map[string]any{
"name": "test-system",
"users": []string{user.Id},
"host": "127.0.0.1",
})
assert.NoError(t, err)
// Create a smart_device with state UNKNOWN
smartDevice, err := beszelTests.CreateRecord(hub, "smart_devices", map[string]any{
"system": system.Id,
"name": "/dev/sda",
"model": "Samsung SSD 970 EVO",
"state": "UNKNOWN",
})
assert.NoError(t, err)
// Re-fetch the record so PocketBase can properly track original values
smartDevice, err = hub.FindRecordById("smart_devices", smartDevice.Id)
assert.NoError(t, err)
// Update the state from UNKNOWN to FAILED - should NOT trigger alert
smartDevice.Set("state", "FAILED")
err = hub.Save(smartDevice)
assert.NoError(t, err)
time.Sleep(50 * time.Millisecond)
// Verify no email was sent (only PASSED -> FAILED triggers alert)
assert.Zero(t, hub.TestMailer.TotalSend(), "should have 0 emails when changing from UNKNOWN to FAILED")
// Re-fetch the record again
smartDevice, err = hub.FindRecordById("smart_devices", smartDevice.Id)
assert.NoError(t, err)
// Update state from FAILED to PASSED - should NOT trigger alert
smartDevice.Set("state", "PASSED")
err = hub.Save(smartDevice)
assert.NoError(t, err)
time.Sleep(50 * time.Millisecond)
// Verify no email was sent
assert.Zero(t, hub.TestMailer.TotalSend(), "should have 0 emails when changing from FAILED to PASSED")
}
func TestSmartDeviceAlertMultipleUsers(t *testing.T) {
hub, user1 := beszelTests.GetHubWithUser(t)
defer hub.Cleanup()
// Create a second user
user2, err := beszelTests.CreateUser(hub, "test2@example.com", "password")
assert.NoError(t, err)
// Create user settings for the second user
_, err = beszelTests.CreateRecord(hub, "user_settings", map[string]any{
"user": user2.Id,
"settings": `{"emails":["test2@example.com"],"webhooks":[]}`,
})
assert.NoError(t, err)
// Create a system with both users
system, err := beszelTests.CreateRecord(hub, "systems", map[string]any{
"name": "shared-system",
"users": []string{user1.Id, user2.Id},
"host": "127.0.0.1",
})
assert.NoError(t, err)
// Create a smart_device with state PASSED
smartDevice, err := beszelTests.CreateRecord(hub, "smart_devices", map[string]any{
"system": system.Id,
"name": "/dev/nvme0n1",
"model": "WD Black SN850",
"state": "PASSED",
})
assert.NoError(t, err)
// Re-fetch the record so PocketBase can properly track original values
smartDevice, err = hub.FindRecordById("smart_devices", smartDevice.Id)
assert.NoError(t, err)
// Update the smart device state to FAILED
smartDevice.Set("state", "FAILED")
err = hub.Save(smartDevice)
assert.NoError(t, err)
time.Sleep(50 * time.Millisecond)
// Verify that two emails were sent (one for each user)
assert.EqualValues(t, 2, hub.TestMailer.TotalSend(), "should have 2 emails sent for 2 users")
}
func TestSmartDeviceAlertWithoutModel(t *testing.T) {
hub, user := beszelTests.GetHubWithUser(t)
defer hub.Cleanup()
// Create a system for the user
system, err := beszelTests.CreateRecord(hub, "systems", map[string]any{
"name": "test-system",
"users": []string{user.Id},
"host": "127.0.0.1",
})
assert.NoError(t, err)
// Create a smart_device with state PASSED but no model
smartDevice, err := beszelTests.CreateRecord(hub, "smart_devices", map[string]any{
"system": system.Id,
"name": "/dev/sdb",
"state": "PASSED",
})
assert.NoError(t, err)
// Re-fetch the record so PocketBase can properly track original values
smartDevice, err = hub.FindRecordById("smart_devices", smartDevice.Id)
assert.NoError(t, err)
// Update the smart device state to FAILED
smartDevice.Set("state", "FAILED")
err = hub.Save(smartDevice)
assert.NoError(t, err)
time.Sleep(50 * time.Millisecond)
// Verify that an email was sent
assert.EqualValues(t, 1, hub.TestMailer.TotalSend(), "should have 1 email sent")
// Check that the email doesn't have empty parentheses for missing model
lastMessage := hub.TestMailer.LastMessage()
assert.NotContains(t, lastMessage.Text, "()", "should not have empty parentheses for missing model")
assert.Contains(t, lastMessage.Text, "/dev/sdb")
}