mirror of
https://github.com/henrygd/beszel.git
synced 2025-12-17 10:46:16 +01:00
add alerts for smart failures
This commit is contained in:
@@ -104,6 +104,7 @@ func NewAlertManager(app hubLike) *AlertManager {
|
||||
func (am *AlertManager) bindEvents() {
|
||||
am.hub.OnRecordAfterUpdateSuccess("alerts").BindFunc(updateHistoryOnAlertUpdate)
|
||||
am.hub.OnRecordAfterDeleteSuccess("alerts").BindFunc(resolveHistoryOnAlertDelete)
|
||||
am.hub.OnRecordAfterUpdateSuccess("smart_devices").BindFunc(am.handleSmartDeviceAlert)
|
||||
}
|
||||
|
||||
// IsNotificationSilenced checks if a notification should be silenced based on configured quiet hours
|
||||
|
||||
67
internal/alerts/alerts_smart.go
Normal file
67
internal/alerts/alerts_smart.go
Normal file
@@ -0,0 +1,67 @@
|
||||
package alerts
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/pocketbase/pocketbase/core"
|
||||
)
|
||||
|
||||
// handleSmartDeviceAlert sends alerts when a SMART device state changes from PASSED to FAILED.
|
||||
// This is automatic and does not require user opt-in.
|
||||
func (am *AlertManager) handleSmartDeviceAlert(e *core.RecordEvent) error {
|
||||
oldState := e.Record.Original().GetString("state")
|
||||
newState := e.Record.GetString("state")
|
||||
|
||||
// Only alert when transitioning from PASSED to FAILED
|
||||
if oldState != "PASSED" || newState != "FAILED" {
|
||||
return e.Next()
|
||||
}
|
||||
|
||||
systemID := e.Record.GetString("system")
|
||||
if systemID == "" {
|
||||
return e.Next()
|
||||
}
|
||||
|
||||
// Fetch the system record to get the name and users
|
||||
systemRecord, err := e.App.FindRecordById("systems", systemID)
|
||||
if err != nil {
|
||||
e.App.Logger().Error("Failed to find system for SMART alert", "err", err, "systemID", systemID)
|
||||
return e.Next()
|
||||
}
|
||||
|
||||
systemName := systemRecord.GetString("name")
|
||||
deviceName := e.Record.GetString("name")
|
||||
model := e.Record.GetString("model")
|
||||
|
||||
// Build alert message
|
||||
title := fmt.Sprintf("SMART failure on %s: %s \U0001F534", systemName, deviceName)
|
||||
var message string
|
||||
if model != "" {
|
||||
message = fmt.Sprintf("Disk %s (%s) SMART status changed to FAILED", deviceName, model)
|
||||
} else {
|
||||
message = fmt.Sprintf("Disk %s SMART status changed to FAILED", deviceName)
|
||||
}
|
||||
|
||||
// Get users associated with the system
|
||||
userIDs := systemRecord.GetStringSlice("users")
|
||||
if len(userIDs) == 0 {
|
||||
return e.Next()
|
||||
}
|
||||
|
||||
// Send alert to each user
|
||||
for _, userID := range userIDs {
|
||||
if err := am.SendAlert(AlertMessageData{
|
||||
UserID: userID,
|
||||
SystemID: systemID,
|
||||
Title: title,
|
||||
Message: message,
|
||||
Link: am.hub.MakeLink("system", systemID),
|
||||
LinkText: "View " + systemName,
|
||||
}); err != nil {
|
||||
e.App.Logger().Error("Failed to send SMART alert", "err", err, "userID", userID)
|
||||
}
|
||||
}
|
||||
|
||||
return e.Next()
|
||||
}
|
||||
|
||||
196
internal/alerts/alerts_smart_test.go
Normal file
196
internal/alerts/alerts_smart_test.go
Normal file
@@ -0,0 +1,196 @@
|
||||
//go:build testing
|
||||
// +build testing
|
||||
|
||||
package alerts_test
|
||||
|
||||
import (
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
beszelTests "github.com/henrygd/beszel/internal/tests"
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
func TestSmartDeviceAlert(t *testing.T) {
|
||||
hub, user := beszelTests.GetHubWithUser(t)
|
||||
defer hub.Cleanup()
|
||||
|
||||
// Create a system for the user
|
||||
system, err := beszelTests.CreateRecord(hub, "systems", map[string]any{
|
||||
"name": "test-system",
|
||||
"users": []string{user.Id},
|
||||
"host": "127.0.0.1",
|
||||
})
|
||||
assert.NoError(t, err)
|
||||
|
||||
// Create a smart_device with state PASSED
|
||||
smartDevice, err := beszelTests.CreateRecord(hub, "smart_devices", map[string]any{
|
||||
"system": system.Id,
|
||||
"name": "/dev/sda",
|
||||
"model": "Samsung SSD 970 EVO",
|
||||
"state": "PASSED",
|
||||
})
|
||||
assert.NoError(t, err)
|
||||
|
||||
// Verify no emails sent initially
|
||||
assert.Zero(t, hub.TestMailer.TotalSend(), "should have 0 emails sent initially")
|
||||
|
||||
// Re-fetch the record so PocketBase can properly track original values
|
||||
smartDevice, err = hub.FindRecordById("smart_devices", smartDevice.Id)
|
||||
assert.NoError(t, err)
|
||||
|
||||
// Update the smart device state to FAILED
|
||||
smartDevice.Set("state", "FAILED")
|
||||
err = hub.Save(smartDevice)
|
||||
assert.NoError(t, err)
|
||||
|
||||
// Wait for the alert to be processed
|
||||
time.Sleep(50 * time.Millisecond)
|
||||
|
||||
// Verify that an email was sent
|
||||
assert.EqualValues(t, 1, hub.TestMailer.TotalSend(), "should have 1 email sent after state changed to FAILED")
|
||||
|
||||
// Check the email content
|
||||
lastMessage := hub.TestMailer.LastMessage()
|
||||
assert.Contains(t, lastMessage.Subject, "SMART failure on test-system")
|
||||
assert.Contains(t, lastMessage.Subject, "/dev/sda")
|
||||
assert.Contains(t, lastMessage.Text, "Samsung SSD 970 EVO")
|
||||
assert.Contains(t, lastMessage.Text, "FAILED")
|
||||
}
|
||||
|
||||
func TestSmartDeviceAlertNoAlertOnNonPassedToFailed(t *testing.T) {
|
||||
hub, user := beszelTests.GetHubWithUser(t)
|
||||
defer hub.Cleanup()
|
||||
|
||||
// Create a system for the user
|
||||
system, err := beszelTests.CreateRecord(hub, "systems", map[string]any{
|
||||
"name": "test-system",
|
||||
"users": []string{user.Id},
|
||||
"host": "127.0.0.1",
|
||||
})
|
||||
assert.NoError(t, err)
|
||||
|
||||
// Create a smart_device with state UNKNOWN
|
||||
smartDevice, err := beszelTests.CreateRecord(hub, "smart_devices", map[string]any{
|
||||
"system": system.Id,
|
||||
"name": "/dev/sda",
|
||||
"model": "Samsung SSD 970 EVO",
|
||||
"state": "UNKNOWN",
|
||||
})
|
||||
assert.NoError(t, err)
|
||||
|
||||
// Re-fetch the record so PocketBase can properly track original values
|
||||
smartDevice, err = hub.FindRecordById("smart_devices", smartDevice.Id)
|
||||
assert.NoError(t, err)
|
||||
|
||||
// Update the state from UNKNOWN to FAILED - should NOT trigger alert
|
||||
smartDevice.Set("state", "FAILED")
|
||||
err = hub.Save(smartDevice)
|
||||
assert.NoError(t, err)
|
||||
|
||||
time.Sleep(50 * time.Millisecond)
|
||||
|
||||
// Verify no email was sent (only PASSED -> FAILED triggers alert)
|
||||
assert.Zero(t, hub.TestMailer.TotalSend(), "should have 0 emails when changing from UNKNOWN to FAILED")
|
||||
|
||||
// Re-fetch the record again
|
||||
smartDevice, err = hub.FindRecordById("smart_devices", smartDevice.Id)
|
||||
assert.NoError(t, err)
|
||||
|
||||
// Update state from FAILED to PASSED - should NOT trigger alert
|
||||
smartDevice.Set("state", "PASSED")
|
||||
err = hub.Save(smartDevice)
|
||||
assert.NoError(t, err)
|
||||
|
||||
time.Sleep(50 * time.Millisecond)
|
||||
|
||||
// Verify no email was sent
|
||||
assert.Zero(t, hub.TestMailer.TotalSend(), "should have 0 emails when changing from FAILED to PASSED")
|
||||
}
|
||||
|
||||
func TestSmartDeviceAlertMultipleUsers(t *testing.T) {
|
||||
hub, user1 := beszelTests.GetHubWithUser(t)
|
||||
defer hub.Cleanup()
|
||||
|
||||
// Create a second user
|
||||
user2, err := beszelTests.CreateUser(hub, "test2@example.com", "password")
|
||||
assert.NoError(t, err)
|
||||
|
||||
// Create user settings for the second user
|
||||
_, err = beszelTests.CreateRecord(hub, "user_settings", map[string]any{
|
||||
"user": user2.Id,
|
||||
"settings": `{"emails":["test2@example.com"],"webhooks":[]}`,
|
||||
})
|
||||
assert.NoError(t, err)
|
||||
|
||||
// Create a system with both users
|
||||
system, err := beszelTests.CreateRecord(hub, "systems", map[string]any{
|
||||
"name": "shared-system",
|
||||
"users": []string{user1.Id, user2.Id},
|
||||
"host": "127.0.0.1",
|
||||
})
|
||||
assert.NoError(t, err)
|
||||
|
||||
// Create a smart_device with state PASSED
|
||||
smartDevice, err := beszelTests.CreateRecord(hub, "smart_devices", map[string]any{
|
||||
"system": system.Id,
|
||||
"name": "/dev/nvme0n1",
|
||||
"model": "WD Black SN850",
|
||||
"state": "PASSED",
|
||||
})
|
||||
assert.NoError(t, err)
|
||||
|
||||
// Re-fetch the record so PocketBase can properly track original values
|
||||
smartDevice, err = hub.FindRecordById("smart_devices", smartDevice.Id)
|
||||
assert.NoError(t, err)
|
||||
|
||||
// Update the smart device state to FAILED
|
||||
smartDevice.Set("state", "FAILED")
|
||||
err = hub.Save(smartDevice)
|
||||
assert.NoError(t, err)
|
||||
|
||||
time.Sleep(50 * time.Millisecond)
|
||||
|
||||
// Verify that two emails were sent (one for each user)
|
||||
assert.EqualValues(t, 2, hub.TestMailer.TotalSend(), "should have 2 emails sent for 2 users")
|
||||
}
|
||||
|
||||
func TestSmartDeviceAlertWithoutModel(t *testing.T) {
|
||||
hub, user := beszelTests.GetHubWithUser(t)
|
||||
defer hub.Cleanup()
|
||||
|
||||
// Create a system for the user
|
||||
system, err := beszelTests.CreateRecord(hub, "systems", map[string]any{
|
||||
"name": "test-system",
|
||||
"users": []string{user.Id},
|
||||
"host": "127.0.0.1",
|
||||
})
|
||||
assert.NoError(t, err)
|
||||
|
||||
// Create a smart_device with state PASSED but no model
|
||||
smartDevice, err := beszelTests.CreateRecord(hub, "smart_devices", map[string]any{
|
||||
"system": system.Id,
|
||||
"name": "/dev/sdb",
|
||||
"state": "PASSED",
|
||||
})
|
||||
assert.NoError(t, err)
|
||||
|
||||
// Re-fetch the record so PocketBase can properly track original values
|
||||
smartDevice, err = hub.FindRecordById("smart_devices", smartDevice.Id)
|
||||
assert.NoError(t, err)
|
||||
|
||||
// Update the smart device state to FAILED
|
||||
smartDevice.Set("state", "FAILED")
|
||||
err = hub.Save(smartDevice)
|
||||
assert.NoError(t, err)
|
||||
|
||||
time.Sleep(50 * time.Millisecond)
|
||||
|
||||
// Verify that an email was sent
|
||||
assert.EqualValues(t, 1, hub.TestMailer.TotalSend(), "should have 1 email sent")
|
||||
|
||||
// Check that the email doesn't have empty parentheses for missing model
|
||||
lastMessage := hub.TestMailer.LastMessage()
|
||||
assert.NotContains(t, lastMessage.Text, "()", "should not have empty parentheses for missing model")
|
||||
assert.Contains(t, lastMessage.Text, "/dev/sdb")
|
||||
}
|
||||
Reference in New Issue
Block a user