SMART: add eMMC health via sysfs (#1736)

* SMART: add eMMC health via sysfs

Read eMMC wear/EOL indicators from /sys/class/block/mmcblk*/device and expose in SMART device list. Includes mocked sysfs tests and UI tweaks for unknown temps.

* small optimizations for emmc scan and parsing

* smart: keep smartctl optional only for Linux hosts with eMMC

* update smart alerts to handle warning state

* refactor: rename binPath to smartctlPath and replace hasSmartctl with smartctlPath checks

---------

Co-authored-by: henrygd <hank@henrygd.me>
This commit is contained in:
VACInc
2026-02-12 15:27:42 -05:00
committed by GitHub
parent 2230097dc7
commit e816ea143a
11 changed files with 661 additions and 27 deletions

View File

@@ -2,18 +2,18 @@ package alerts
import (
"fmt"
"strings"
"github.com/pocketbase/pocketbase/core"
)
// handleSmartDeviceAlert sends alerts when a SMART device state changes from PASSED to FAILED.
// handleSmartDeviceAlert sends alerts when a SMART device state worsens into WARNING/FAILED.
// This is automatic and does not require user opt-in.
func (am *AlertManager) handleSmartDeviceAlert(e *core.RecordEvent) error {
oldState := e.Record.Original().GetString("state")
newState := e.Record.GetString("state")
// Only alert when transitioning from PASSED to FAILED
if oldState != "PASSED" || newState != "FAILED" {
if !shouldSendSmartDeviceAlert(oldState, newState) {
return e.Next()
}
@@ -32,14 +32,15 @@ func (am *AlertManager) handleSmartDeviceAlert(e *core.RecordEvent) error {
systemName := systemRecord.GetString("name")
deviceName := e.Record.GetString("name")
model := e.Record.GetString("model")
statusLabel := smartStateLabel(newState)
// Build alert message
title := fmt.Sprintf("SMART failure on %s: %s \U0001F534", systemName, deviceName)
title := fmt.Sprintf("SMART %s on %s: %s %s", statusLabel, systemName, deviceName, smartStateEmoji(newState))
var message string
if model != "" {
message = fmt.Sprintf("Disk %s (%s) SMART status changed to FAILED", deviceName, model)
message = fmt.Sprintf("Disk %s (%s) SMART status changed to %s", deviceName, model, newState)
} else {
message = fmt.Sprintf("Disk %s SMART status changed to FAILED", deviceName)
message = fmt.Sprintf("Disk %s SMART status changed to %s", deviceName, newState)
}
// Get users associated with the system
@@ -65,3 +66,42 @@ func (am *AlertManager) handleSmartDeviceAlert(e *core.RecordEvent) error {
return e.Next()
}
func shouldSendSmartDeviceAlert(oldState, newState string) bool {
oldSeverity := smartStateSeverity(oldState)
newSeverity := smartStateSeverity(newState)
// Ignore unknown states and recoveries; only alert on worsening transitions
// from known-good/degraded states into WARNING/FAILED.
return oldSeverity >= 1 && newSeverity > oldSeverity
}
func smartStateSeverity(state string) int {
switch state {
case "PASSED":
return 1
case "WARNING":
return 2
case "FAILED":
return 3
default:
return 0
}
}
func smartStateEmoji(state string) string {
switch state {
case "WARNING":
return "\U0001F7E0"
default:
return "\U0001F534"
}
}
func smartStateLabel(state string) string {
switch state {
case "FAILED":
return "failure"
default:
return strings.ToLower(state)
}
}

View File

@@ -58,6 +58,74 @@ func TestSmartDeviceAlert(t *testing.T) {
assert.Contains(t, lastMessage.Text, "FAILED")
}
func TestSmartDeviceAlertPassedToWarning(t *testing.T) {
hub, user := beszelTests.GetHubWithUser(t)
defer hub.Cleanup()
system, err := beszelTests.CreateRecord(hub, "systems", map[string]any{
"name": "test-system",
"users": []string{user.Id},
"host": "127.0.0.1",
})
assert.NoError(t, err)
smartDevice, err := beszelTests.CreateRecord(hub, "smart_devices", map[string]any{
"system": system.Id,
"name": "/dev/mmcblk0",
"model": "eMMC",
"state": "PASSED",
})
assert.NoError(t, err)
smartDevice, err = hub.FindRecordById("smart_devices", smartDevice.Id)
assert.NoError(t, err)
smartDevice.Set("state", "WARNING")
err = hub.Save(smartDevice)
assert.NoError(t, err)
time.Sleep(50 * time.Millisecond)
assert.EqualValues(t, 1, hub.TestMailer.TotalSend(), "should have 1 email sent after state changed to WARNING")
lastMessage := hub.TestMailer.LastMessage()
assert.Contains(t, lastMessage.Subject, "SMART warning on test-system")
assert.Contains(t, lastMessage.Text, "WARNING")
}
func TestSmartDeviceAlertWarningToFailed(t *testing.T) {
hub, user := beszelTests.GetHubWithUser(t)
defer hub.Cleanup()
system, err := beszelTests.CreateRecord(hub, "systems", map[string]any{
"name": "test-system",
"users": []string{user.Id},
"host": "127.0.0.1",
})
assert.NoError(t, err)
smartDevice, err := beszelTests.CreateRecord(hub, "smart_devices", map[string]any{
"system": system.Id,
"name": "/dev/mmcblk0",
"model": "eMMC",
"state": "WARNING",
})
assert.NoError(t, err)
smartDevice, err = hub.FindRecordById("smart_devices", smartDevice.Id)
assert.NoError(t, err)
smartDevice.Set("state", "FAILED")
err = hub.Save(smartDevice)
assert.NoError(t, err)
time.Sleep(50 * time.Millisecond)
assert.EqualValues(t, 1, hub.TestMailer.TotalSend(), "should have 1 email sent after state changed from WARNING to FAILED")
lastMessage := hub.TestMailer.LastMessage()
assert.Contains(t, lastMessage.Subject, "SMART failure on test-system")
assert.Contains(t, lastMessage.Text, "FAILED")
}
func TestSmartDeviceAlertNoAlertOnNonPassedToFailed(t *testing.T) {
hub, user := beszelTests.GetHubWithUser(t)
defer hub.Cleanup()
@@ -83,7 +151,8 @@ func TestSmartDeviceAlertNoAlertOnNonPassedToFailed(t *testing.T) {
smartDevice, err = hub.FindRecordById("smart_devices", smartDevice.Id)
assert.NoError(t, err)
// Update the state from UNKNOWN to FAILED - should NOT trigger alert
// Update the state from UNKNOWN to FAILED - should NOT trigger alert.
// We only alert from known healthy/degraded states.
smartDevice.Set("state", "FAILED")
err = hub.Save(smartDevice)
assert.NoError(t, err)

View File

@@ -206,7 +206,12 @@ export const columns: ColumnDef<SmartDeviceRecord>[] = [
invertSorting: true,
header: ({ column }) => <HeaderButton column={column} name={t`Temp`} Icon={ThermometerIcon} />,
cell: ({ getValue }) => {
const { value, unit } = formatTemperature(getValue() as number)
const temp = getValue() as number | undefined | null
// Most devices won't report a real 0C temperature; treat 0 as "unknown".
if (temp == null || temp === 0) {
return <div className="text-muted-foreground ms-1.5">N/A</div>
}
const { value, unit } = formatTemperature(temp)
return <span className="ms-1.5">{`${value} ${unit}`}</span>
},
},