From e816ea143a8e53c314848a6df5f8a3afbb0c739b Mon Sep 17 00:00:00 2001 From: VACInc Date: Thu, 12 Feb 2026 15:27:42 -0500 Subject: [PATCH] SMART: add eMMC health via sysfs (#1736) * SMART: add eMMC health via sysfs Read eMMC wear/EOL indicators from /sys/class/block/mmcblk*/device and expose in SMART device list. Includes mocked sysfs tests and UI tweaks for unknown temps. * small optimizations for emmc scan and parsing * smart: keep smartctl optional only for Linux hosts with eMMC * update smart alerts to handle warning state * refactor: rename binPath to smartctlPath and replace hasSmartctl with smartctlPath checks --------- Co-authored-by: henrygd --- .gitignore | 1 + agent/emmc_common.go | 95 ++++++++ agent/emmc_common_test.go | 78 ++++++ agent/emmc_linux.go | 227 ++++++++++++++++++ agent/emmc_linux_test.go | 80 ++++++ agent/emmc_stub.go | 14 ++ agent/smart.go | 61 +++-- internal/alerts/alerts_smart.go | 52 +++- internal/alerts/alerts_smart_test.go | 71 +++++- .../components/routes/system/smart-table.tsx | 7 +- readme.md | 2 +- 11 files changed, 661 insertions(+), 27 deletions(-) create mode 100644 agent/emmc_common.go create mode 100644 agent/emmc_common_test.go create mode 100644 agent/emmc_linux.go create mode 100644 agent/emmc_linux_test.go create mode 100644 agent/emmc_stub.go diff --git a/.gitignore b/.gitignore index 7cc5228e..98b0fb5d 100644 --- a/.gitignore +++ b/.gitignore @@ -10,6 +10,7 @@ dist *.exe internal/cmd/hub/hub internal/cmd/agent/agent +agent.test node_modules build *timestamp* diff --git a/agent/emmc_common.go b/agent/emmc_common.go new file mode 100644 index 00000000..d9956986 --- /dev/null +++ b/agent/emmc_common.go @@ -0,0 +1,95 @@ +package agent + +import ( + "fmt" + "strconv" + "strings" +) + +func isEmmcBlockName(name string) bool { + if !strings.HasPrefix(name, "mmcblk") { + return false + } + suffix := strings.TrimPrefix(name, "mmcblk") + if suffix == "" { + return false + } + for _, c := range suffix { + if c < '0' || c > '9' { + return false + } + } + return true +} + +func parseHexOrDecByte(s string) (uint8, bool) { + s = strings.TrimSpace(s) + if s == "" { + return 0, false + } + base := 10 + if strings.HasPrefix(s, "0x") || strings.HasPrefix(s, "0X") { + base = 16 + s = s[2:] + } + parsed, err := strconv.ParseUint(s, base, 8) + if err != nil { + return 0, false + } + return uint8(parsed), true +} + +func parseHexBytePair(s string) (uint8, uint8, bool) { + fields := strings.Fields(s) + if len(fields) < 2 { + return 0, 0, false + } + a, okA := parseHexOrDecByte(fields[0]) + b, okB := parseHexOrDecByte(fields[1]) + if !okA && !okB { + return 0, 0, false + } + return a, b, true +} + +func emmcSmartStatus(preEOL uint8) string { + switch preEOL { + case 0x01: + return "PASSED" + case 0x02: + return "WARNING" + case 0x03: + return "FAILED" + default: + return "UNKNOWN" + } +} + +func emmcPreEOLString(preEOL uint8) string { + switch preEOL { + case 0x01: + return "0x01 (normal)" + case 0x02: + return "0x02 (warning)" + case 0x03: + return "0x03 (urgent)" + default: + return fmt.Sprintf("0x%02x", preEOL) + } +} + +func emmcLifeTimeString(v uint8) string { + // JEDEC eMMC: 0x01..0x0A => 0-100% used in 10% steps, 0x0B => exceeded. + switch { + case v == 0: + return "0x00 (not reported)" + case v >= 0x01 && v <= 0x0A: + low := int(v-1) * 10 + high := int(v) * 10 + return fmt.Sprintf("0x%02x (%d-%d%% used)", v, low, high) + case v == 0x0B: + return "0x0b (>100% used)" + default: + return fmt.Sprintf("0x%02x", v) + } +} diff --git a/agent/emmc_common_test.go b/agent/emmc_common_test.go new file mode 100644 index 00000000..c6d0913c --- /dev/null +++ b/agent/emmc_common_test.go @@ -0,0 +1,78 @@ +package agent + +import "testing" + +func TestParseHexOrDecByte(t *testing.T) { + tests := []struct { + in string + want uint8 + ok bool + }{ + {"0x01", 1, true}, + {"0X0b", 11, true}, + {"01", 1, true}, + {" 3 ", 3, true}, + {"", 0, false}, + {"0x", 0, false}, + {"nope", 0, false}, + } + + for _, tt := range tests { + got, ok := parseHexOrDecByte(tt.in) + if ok != tt.ok || got != tt.want { + t.Fatalf("parseHexOrDecByte(%q) = (%d,%v), want (%d,%v)", tt.in, got, ok, tt.want, tt.ok) + } + } +} + +func TestParseHexBytePair(t *testing.T) { + a, b, ok := parseHexBytePair("0x01 0x02\n") + if !ok || a != 1 || b != 2 { + t.Fatalf("parseHexBytePair hex = (%d,%d,%v), want (1,2,true)", a, b, ok) + } + + a, b, ok = parseHexBytePair("01 02") + if !ok || a != 1 || b != 2 { + t.Fatalf("parseHexBytePair dec = (%d,%d,%v), want (1,2,true)", a, b, ok) + } + + _, _, ok = parseHexBytePair("0x01") + if ok { + t.Fatalf("parseHexBytePair short input ok=true, want false") + } +} + +func TestEmmcSmartStatus(t *testing.T) { + if got := emmcSmartStatus(0x01); got != "PASSED" { + t.Fatalf("emmcSmartStatus(0x01) = %q, want PASSED", got) + } + if got := emmcSmartStatus(0x02); got != "WARNING" { + t.Fatalf("emmcSmartStatus(0x02) = %q, want WARNING", got) + } + if got := emmcSmartStatus(0x03); got != "FAILED" { + t.Fatalf("emmcSmartStatus(0x03) = %q, want FAILED", got) + } + if got := emmcSmartStatus(0x00); got != "UNKNOWN" { + t.Fatalf("emmcSmartStatus(0x00) = %q, want UNKNOWN", got) + } +} + +func TestIsEmmcBlockName(t *testing.T) { + cases := []struct { + name string + ok bool + }{ + {"mmcblk0", true}, + {"mmcblk1", true}, + {"mmcblk10", true}, + {"mmcblk0p1", false}, + {"sda", false}, + {"mmcblk", false}, + {"mmcblkA", false}, + } + for _, c := range cases { + if got := isEmmcBlockName(c.name); got != c.ok { + t.Fatalf("isEmmcBlockName(%q) = %v, want %v", c.name, got, c.ok) + } + } +} diff --git a/agent/emmc_linux.go b/agent/emmc_linux.go new file mode 100644 index 00000000..10a2d82c --- /dev/null +++ b/agent/emmc_linux.go @@ -0,0 +1,227 @@ +//go:build linux + +package agent + +import ( + "os" + "path/filepath" + "strconv" + "strings" + + "github.com/henrygd/beszel/internal/entities/smart" +) + +// emmcSysfsRoot is a test hook; production value is "/sys". +var emmcSysfsRoot = "/sys" + +type emmcHealth struct { + model string + serial string + revision string + capacity uint64 + preEOL uint8 + lifeA uint8 + lifeB uint8 +} + +func scanEmmcDevices() []*DeviceInfo { + blockDir := filepath.Join(emmcSysfsRoot, "class", "block") + entries, err := os.ReadDir(blockDir) + if err != nil { + return nil + } + + devices := make([]*DeviceInfo, 0, 2) + for _, ent := range entries { + name := ent.Name() + if !isEmmcBlockName(name) { + continue + } + + deviceDir := filepath.Join(blockDir, name, "device") + if !hasEmmcHealthFiles(deviceDir) { + continue + } + + devPath := filepath.Join("/dev", name) + devices = append(devices, &DeviceInfo{ + Name: devPath, + Type: "emmc", + InfoName: devPath + " [eMMC]", + Protocol: "MMC", + }) + } + + return devices +} + +func (sm *SmartManager) collectEmmcHealth(deviceInfo *DeviceInfo) (bool, error) { + if deviceInfo == nil || deviceInfo.Name == "" { + return false, nil + } + + base := filepath.Base(deviceInfo.Name) + if !isEmmcBlockName(base) && !strings.EqualFold(deviceInfo.Type, "emmc") && !strings.EqualFold(deviceInfo.Type, "mmc") { + return false, nil + } + + health, ok := readEmmcHealth(base) + if !ok { + return false, nil + } + + // Normalize the device type to keep pruning logic stable across refreshes. + deviceInfo.Type = "emmc" + + key := health.serial + if key == "" { + key = filepath.Join("/dev", base) + } + + status := emmcSmartStatus(health.preEOL) + + attrs := []*smart.SmartAttribute{ + { + Name: "PreEOLInfo", + RawValue: uint64(health.preEOL), + RawString: emmcPreEOLString(health.preEOL), + }, + { + Name: "DeviceLifeTimeEstA", + RawValue: uint64(health.lifeA), + RawString: emmcLifeTimeString(health.lifeA), + }, + { + Name: "DeviceLifeTimeEstB", + RawValue: uint64(health.lifeB), + RawString: emmcLifeTimeString(health.lifeB), + }, + } + + sm.Lock() + defer sm.Unlock() + + if _, exists := sm.SmartDataMap[key]; !exists { + sm.SmartDataMap[key] = &smart.SmartData{} + } + + data := sm.SmartDataMap[key] + data.ModelName = health.model + data.SerialNumber = health.serial + data.FirmwareVersion = health.revision + data.Capacity = health.capacity + data.Temperature = 0 + data.SmartStatus = status + data.DiskName = filepath.Join("/dev", base) + data.DiskType = "emmc" + data.Attributes = attrs + + return true, nil +} + +func readEmmcHealth(blockName string) (emmcHealth, bool) { + var out emmcHealth + + if !isEmmcBlockName(blockName) { + return out, false + } + + deviceDir := filepath.Join(emmcSysfsRoot, "class", "block", blockName, "device") + preEOL, okPre := readHexByteFile(filepath.Join(deviceDir, "pre_eol_info")) + + // Some kernels expose EXT_CSD lifetime via "life_time" (two bytes), others as + // separate files. Support both. + lifeA, lifeB, okLife := readLifeTime(deviceDir) + + if !okPre && !okLife { + return out, false + } + + out.preEOL = preEOL + out.lifeA = lifeA + out.lifeB = lifeB + + out.model = readStringFile(filepath.Join(deviceDir, "name")) + out.serial = readStringFile(filepath.Join(deviceDir, "serial")) + out.revision = readStringFile(filepath.Join(deviceDir, "prv")) + + if capBytes, ok := readBlockCapacityBytes(blockName); ok { + out.capacity = capBytes + } + + return out, true +} + +func readLifeTime(deviceDir string) (uint8, uint8, bool) { + if content, ok := readStringFileOK(filepath.Join(deviceDir, "life_time")); ok { + a, b, ok := parseHexBytePair(content) + return a, b, ok + } + + a, okA := readHexByteFile(filepath.Join(deviceDir, "device_life_time_est_typ_a")) + b, okB := readHexByteFile(filepath.Join(deviceDir, "device_life_time_est_typ_b")) + if okA || okB { + return a, b, true + } + return 0, 0, false +} + +func readBlockCapacityBytes(blockName string) (uint64, bool) { + sizePath := filepath.Join(emmcSysfsRoot, "class", "block", blockName, "size") + lbsPath := filepath.Join(emmcSysfsRoot, "class", "block", blockName, "queue", "logical_block_size") + + sizeStr, ok := readStringFileOK(sizePath) + if !ok { + return 0, false + } + sectors, err := strconv.ParseUint(sizeStr, 10, 64) + if err != nil || sectors == 0 { + return 0, false + } + + lbsStr, ok := readStringFileOK(lbsPath) + logicalBlockSize := uint64(512) + if ok { + if parsed, err := strconv.ParseUint(lbsStr, 10, 64); err == nil && parsed > 0 { + logicalBlockSize = parsed + } + } + + return sectors * logicalBlockSize, true +} + +func readHexByteFile(path string) (uint8, bool) { + content, ok := readStringFileOK(path) + if !ok { + return 0, false + } + b, ok := parseHexOrDecByte(content) + return b, ok +} + +func readStringFile(path string) string { + content, _ := readStringFileOK(path) + return content +} + +func readStringFileOK(path string) (string, bool) { + b, err := os.ReadFile(path) + if err != nil { + return "", false + } + return strings.TrimSpace(string(b)), true +} + +func hasEmmcHealthFiles(deviceDir string) bool { + entries, err := os.ReadDir(deviceDir) + if err != nil { + return false + } + for _, ent := range entries { + switch ent.Name() { + case "pre_eol_info", "life_time", "device_life_time_est_typ_a", "device_life_time_est_typ_b": + return true + } + } + return false +} diff --git a/agent/emmc_linux_test.go b/agent/emmc_linux_test.go new file mode 100644 index 00000000..626abada --- /dev/null +++ b/agent/emmc_linux_test.go @@ -0,0 +1,80 @@ +//go:build linux + +package agent + +import ( + "os" + "path/filepath" + "testing" + + "github.com/henrygd/beszel/internal/entities/smart" +) + +func TestEmmcMockSysfsScanAndCollect(t *testing.T) { + tmp := t.TempDir() + prev := emmcSysfsRoot + emmcSysfsRoot = tmp + t.Cleanup(func() { emmcSysfsRoot = prev }) + + // Fake: /sys/class/block/mmcblk0 + mmcDeviceDir := filepath.Join(tmp, "class", "block", "mmcblk0", "device") + mmcQueueDir := filepath.Join(tmp, "class", "block", "mmcblk0", "queue") + if err := os.MkdirAll(mmcDeviceDir, 0o755); err != nil { + t.Fatal(err) + } + if err := os.MkdirAll(mmcQueueDir, 0o755); err != nil { + t.Fatal(err) + } + + write := func(path, content string) { + t.Helper() + if err := os.WriteFile(path, []byte(content), 0o644); err != nil { + t.Fatal(err) + } + } + + write(filepath.Join(mmcDeviceDir, "pre_eol_info"), "0x02\n") + write(filepath.Join(mmcDeviceDir, "life_time"), "0x04 0x05\n") + write(filepath.Join(mmcDeviceDir, "name"), "H26M52103FMR\n") + write(filepath.Join(mmcDeviceDir, "serial"), "01234567\n") + write(filepath.Join(mmcDeviceDir, "prv"), "0x08\n") + write(filepath.Join(mmcQueueDir, "logical_block_size"), "512\n") + write(filepath.Join(tmp, "class", "block", "mmcblk0", "size"), "1024\n") // sectors + + devs := scanEmmcDevices() + if len(devs) != 1 { + t.Fatalf("scanEmmcDevices() = %d devices, want 1", len(devs)) + } + if devs[0].Name != "/dev/mmcblk0" || devs[0].Type != "emmc" { + t.Fatalf("scanEmmcDevices()[0] = %+v, want Name=/dev/mmcblk0 Type=emmc", devs[0]) + } + + sm := &SmartManager{SmartDataMap: map[string]*smart.SmartData{}} + ok, err := sm.collectEmmcHealth(devs[0]) + if err != nil || !ok { + t.Fatalf("collectEmmcHealth() = (ok=%v, err=%v), want (true,nil)", ok, err) + } + if len(sm.SmartDataMap) != 1 { + t.Fatalf("SmartDataMap len=%d, want 1", len(sm.SmartDataMap)) + } + var got *smart.SmartData + for _, v := range sm.SmartDataMap { + got = v + break + } + if got == nil { + t.Fatalf("SmartDataMap value nil") + } + if got.DiskType != "emmc" || got.DiskName != "/dev/mmcblk0" { + t.Fatalf("disk fields = (type=%q name=%q), want (emmc,/dev/mmcblk0)", got.DiskType, got.DiskName) + } + if got.SmartStatus != "WARNING" { + t.Fatalf("SmartStatus=%q, want WARNING", got.SmartStatus) + } + if got.SerialNumber != "01234567" || got.ModelName == "" || got.Capacity == 0 { + t.Fatalf("identity fields = (model=%q serial=%q cap=%d), want non-empty model, serial 01234567, cap>0", got.ModelName, got.SerialNumber, got.Capacity) + } + if len(got.Attributes) < 3 { + t.Fatalf("attributes len=%d, want >= 3", len(got.Attributes)) + } +} diff --git a/agent/emmc_stub.go b/agent/emmc_stub.go new file mode 100644 index 00000000..9886f757 --- /dev/null +++ b/agent/emmc_stub.go @@ -0,0 +1,14 @@ +//go:build !linux + +package agent + +// Non-Linux builds: eMMC health via sysfs is not available. + +func scanEmmcDevices() []*DeviceInfo { + return nil +} + +func (sm *SmartManager) collectEmmcHealth(deviceInfo *DeviceInfo) (bool, error) { + return false, nil +} + diff --git a/agent/smart.go b/agent/smart.go index 51b1004c..6bffea07 100644 --- a/agent/smart.go +++ b/agent/smart.go @@ -28,7 +28,7 @@ type SmartManager struct { SmartDevices []*DeviceInfo refreshMutex sync.Mutex lastScanTime time.Time - binPath string + smartctlPath string excludedDevices map[string]struct{} } @@ -170,27 +170,35 @@ func (sm *SmartManager) ScanDevices(force bool) error { configuredDevices = parsedDevices } - ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) - defer cancel() - - cmd := exec.CommandContext(ctx, sm.binPath, "--scan", "-j") - output, err := cmd.Output() - var ( scanErr error scannedDevices []*DeviceInfo hasValidScan bool ) - if err != nil { - scanErr = err - } else { - scannedDevices, hasValidScan = sm.parseScan(output) - if !hasValidScan { - scanErr = errNoValidSmartData + if sm.smartctlPath != "" { + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + defer cancel() + + cmd := exec.CommandContext(ctx, sm.smartctlPath, "--scan", "-j") + output, err := cmd.Output() + if err != nil { + scanErr = err + } else { + scannedDevices, hasValidScan = sm.parseScan(output) + if !hasValidScan { + scanErr = errNoValidSmartData + } } } + // Add eMMC devices (Linux only) by reading sysfs health fields. This does not + // require smartctl and does not scan the whole device. + if emmcDevices := scanEmmcDevices(); len(emmcDevices) > 0 { + scannedDevices = append(scannedDevices, emmcDevices...) + hasValidScan = true + } + finalDevices := mergeDeviceLists(currentDevices, scannedDevices, configuredDevices) finalDevices = sm.filterExcludedDevices(finalDevices) sm.updateSmartDevices(finalDevices) @@ -442,6 +450,18 @@ func (sm *SmartManager) CollectSmart(deviceInfo *DeviceInfo) error { return errNoValidSmartData } + // eMMC health is not exposed via SMART on Linux, but the kernel provides + // wear / EOL indicators via sysfs. Prefer that path when available. + if deviceInfo != nil { + if ok, err := sm.collectEmmcHealth(deviceInfo); ok { + return err + } + } + + if sm.smartctlPath == "" { + return errNoValidSmartData + } + // slog.Info("collecting SMART data", "device", deviceInfo.Name, "type", deviceInfo.Type, "has_existing_data", sm.hasDataForDevice(deviceInfo.Name)) // Check if we have any existing data for this device @@ -452,7 +472,7 @@ func (sm *SmartManager) CollectSmart(deviceInfo *DeviceInfo) error { // Try with -n standby first if we have existing data args := sm.smartctlArgs(deviceInfo, hasExistingData) - cmd := exec.CommandContext(ctx, sm.binPath, args...) + cmd := exec.CommandContext(ctx, sm.smartctlPath, args...) output, err := cmd.CombinedOutput() // Check if device is in standby (exit status 2) @@ -465,7 +485,7 @@ func (sm *SmartManager) CollectSmart(deviceInfo *DeviceInfo) error { ctx2, cancel2 := context.WithTimeout(context.Background(), 15*time.Second) defer cancel2() args = sm.smartctlArgs(deviceInfo, false) - cmd = exec.CommandContext(ctx2, sm.binPath, args...) + cmd = exec.CommandContext(ctx2, sm.smartctlPath, args...) output, err = cmd.CombinedOutput() } @@ -482,7 +502,7 @@ func (sm *SmartManager) CollectSmart(deviceInfo *DeviceInfo) error { ctx3, cancel3 := context.WithTimeout(context.Background(), 15*time.Second) defer cancel3() args = sm.smartctlArgs(deviceInfo, false) - cmd = exec.CommandContext(ctx3, sm.binPath, args...) + cmd = exec.CommandContext(ctx3, sm.smartctlPath, args...) output, err = cmd.CombinedOutput() hasValidData = sm.parseSmartOutput(deviceInfo, output) @@ -1123,10 +1143,15 @@ func NewSmartManager() (*SmartManager, error) { } sm.refreshExcludedDevices() path, err := sm.detectSmartctl() + slog.Debug("smartctl", "path", path, "err", err) if err != nil { + // Keep the previous fail-fast behavior unless this Linux host exposes + // eMMC health via sysfs, in which case smartctl is optional. + if runtime.GOOS == "linux" && len(scanEmmcDevices()) > 0 { + return sm, nil + } return nil, err } - slog.Debug("smartctl", "path", path) - sm.binPath = path + sm.smartctlPath = path return sm, nil } diff --git a/internal/alerts/alerts_smart.go b/internal/alerts/alerts_smart.go index cf2a2f60..1de6b2fd 100644 --- a/internal/alerts/alerts_smart.go +++ b/internal/alerts/alerts_smart.go @@ -2,18 +2,18 @@ package alerts import ( "fmt" + "strings" "github.com/pocketbase/pocketbase/core" ) -// handleSmartDeviceAlert sends alerts when a SMART device state changes from PASSED to FAILED. +// handleSmartDeviceAlert sends alerts when a SMART device state worsens into WARNING/FAILED. // This is automatic and does not require user opt-in. func (am *AlertManager) handleSmartDeviceAlert(e *core.RecordEvent) error { oldState := e.Record.Original().GetString("state") newState := e.Record.GetString("state") - // Only alert when transitioning from PASSED to FAILED - if oldState != "PASSED" || newState != "FAILED" { + if !shouldSendSmartDeviceAlert(oldState, newState) { return e.Next() } @@ -32,14 +32,15 @@ func (am *AlertManager) handleSmartDeviceAlert(e *core.RecordEvent) error { systemName := systemRecord.GetString("name") deviceName := e.Record.GetString("name") model := e.Record.GetString("model") + statusLabel := smartStateLabel(newState) // Build alert message - title := fmt.Sprintf("SMART failure on %s: %s \U0001F534", systemName, deviceName) + title := fmt.Sprintf("SMART %s on %s: %s %s", statusLabel, systemName, deviceName, smartStateEmoji(newState)) var message string if model != "" { - message = fmt.Sprintf("Disk %s (%s) SMART status changed to FAILED", deviceName, model) + message = fmt.Sprintf("Disk %s (%s) SMART status changed to %s", deviceName, model, newState) } else { - message = fmt.Sprintf("Disk %s SMART status changed to FAILED", deviceName) + message = fmt.Sprintf("Disk %s SMART status changed to %s", deviceName, newState) } // Get users associated with the system @@ -65,3 +66,42 @@ func (am *AlertManager) handleSmartDeviceAlert(e *core.RecordEvent) error { return e.Next() } +func shouldSendSmartDeviceAlert(oldState, newState string) bool { + oldSeverity := smartStateSeverity(oldState) + newSeverity := smartStateSeverity(newState) + + // Ignore unknown states and recoveries; only alert on worsening transitions + // from known-good/degraded states into WARNING/FAILED. + return oldSeverity >= 1 && newSeverity > oldSeverity +} + +func smartStateSeverity(state string) int { + switch state { + case "PASSED": + return 1 + case "WARNING": + return 2 + case "FAILED": + return 3 + default: + return 0 + } +} + +func smartStateEmoji(state string) string { + switch state { + case "WARNING": + return "\U0001F7E0" + default: + return "\U0001F534" + } +} + +func smartStateLabel(state string) string { + switch state { + case "FAILED": + return "failure" + default: + return strings.ToLower(state) + } +} diff --git a/internal/alerts/alerts_smart_test.go b/internal/alerts/alerts_smart_test.go index 17b96c41..6a2ecd15 100644 --- a/internal/alerts/alerts_smart_test.go +++ b/internal/alerts/alerts_smart_test.go @@ -58,6 +58,74 @@ func TestSmartDeviceAlert(t *testing.T) { assert.Contains(t, lastMessage.Text, "FAILED") } +func TestSmartDeviceAlertPassedToWarning(t *testing.T) { + hub, user := beszelTests.GetHubWithUser(t) + defer hub.Cleanup() + + system, err := beszelTests.CreateRecord(hub, "systems", map[string]any{ + "name": "test-system", + "users": []string{user.Id}, + "host": "127.0.0.1", + }) + assert.NoError(t, err) + + smartDevice, err := beszelTests.CreateRecord(hub, "smart_devices", map[string]any{ + "system": system.Id, + "name": "/dev/mmcblk0", + "model": "eMMC", + "state": "PASSED", + }) + assert.NoError(t, err) + + smartDevice, err = hub.FindRecordById("smart_devices", smartDevice.Id) + assert.NoError(t, err) + + smartDevice.Set("state", "WARNING") + err = hub.Save(smartDevice) + assert.NoError(t, err) + + time.Sleep(50 * time.Millisecond) + + assert.EqualValues(t, 1, hub.TestMailer.TotalSend(), "should have 1 email sent after state changed to WARNING") + lastMessage := hub.TestMailer.LastMessage() + assert.Contains(t, lastMessage.Subject, "SMART warning on test-system") + assert.Contains(t, lastMessage.Text, "WARNING") +} + +func TestSmartDeviceAlertWarningToFailed(t *testing.T) { + hub, user := beszelTests.GetHubWithUser(t) + defer hub.Cleanup() + + system, err := beszelTests.CreateRecord(hub, "systems", map[string]any{ + "name": "test-system", + "users": []string{user.Id}, + "host": "127.0.0.1", + }) + assert.NoError(t, err) + + smartDevice, err := beszelTests.CreateRecord(hub, "smart_devices", map[string]any{ + "system": system.Id, + "name": "/dev/mmcblk0", + "model": "eMMC", + "state": "WARNING", + }) + assert.NoError(t, err) + + smartDevice, err = hub.FindRecordById("smart_devices", smartDevice.Id) + assert.NoError(t, err) + + smartDevice.Set("state", "FAILED") + err = hub.Save(smartDevice) + assert.NoError(t, err) + + time.Sleep(50 * time.Millisecond) + + assert.EqualValues(t, 1, hub.TestMailer.TotalSend(), "should have 1 email sent after state changed from WARNING to FAILED") + lastMessage := hub.TestMailer.LastMessage() + assert.Contains(t, lastMessage.Subject, "SMART failure on test-system") + assert.Contains(t, lastMessage.Text, "FAILED") +} + func TestSmartDeviceAlertNoAlertOnNonPassedToFailed(t *testing.T) { hub, user := beszelTests.GetHubWithUser(t) defer hub.Cleanup() @@ -83,7 +151,8 @@ func TestSmartDeviceAlertNoAlertOnNonPassedToFailed(t *testing.T) { smartDevice, err = hub.FindRecordById("smart_devices", smartDevice.Id) assert.NoError(t, err) - // Update the state from UNKNOWN to FAILED - should NOT trigger alert + // Update the state from UNKNOWN to FAILED - should NOT trigger alert. + // We only alert from known healthy/degraded states. smartDevice.Set("state", "FAILED") err = hub.Save(smartDevice) assert.NoError(t, err) diff --git a/internal/site/src/components/routes/system/smart-table.tsx b/internal/site/src/components/routes/system/smart-table.tsx index e712624d..01748f8c 100644 --- a/internal/site/src/components/routes/system/smart-table.tsx +++ b/internal/site/src/components/routes/system/smart-table.tsx @@ -206,7 +206,12 @@ export const columns: ColumnDef[] = [ invertSorting: true, header: ({ column }) => , cell: ({ getValue }) => { - const { value, unit } = formatTemperature(getValue() as number) + const temp = getValue() as number | undefined | null + // Most devices won't report a real 0C temperature; treat 0 as "unknown". + if (temp == null || temp === 0) { + return
N/A
+ } + const { value, unit } = formatTemperature(temp) return {`${value} ${unit}`} }, }, diff --git a/readme.md b/readme.md index c4b38362..6b9740fc 100644 --- a/readme.md +++ b/readme.md @@ -51,7 +51,7 @@ The [quick start guide](https://beszel.dev/guide/getting-started) and other docu - **GPU usage / power draw** - Nvidia, AMD, and Intel. - **Battery** - Host system battery charge. - **Containers** - Status and metrics of all running Docker / Podman containers. -- **S.M.A.R.T.** - Host system disk health. +- **S.M.A.R.T.** - Host system disk health (includes eMMC wear/EOL via Linux sysfs when available). ## Help and discussion