diff --git a/agent/emmc_linux.go b/agent/emmc_linux.go index 10a2d82c..0100b1f1 100644 --- a/agent/emmc_linux.go +++ b/agent/emmc_linux.go @@ -199,19 +199,6 @@ func readHexByteFile(path string) (uint8, bool) { return b, ok } -func readStringFile(path string) string { - content, _ := readStringFileOK(path) - return content -} - -func readStringFileOK(path string) (string, bool) { - b, err := os.ReadFile(path) - if err != nil { - return "", false - } - return strings.TrimSpace(string(b)), true -} - func hasEmmcHealthFiles(deviceDir string) bool { entries, err := os.ReadDir(deviceDir) if err != nil { diff --git a/agent/file_utils.go b/agent/file_utils.go new file mode 100644 index 00000000..89dc4692 --- /dev/null +++ b/agent/file_utils.go @@ -0,0 +1,24 @@ +package agent + +import ( + "os" + "strings" +) + +func readStringFile(path string) string { + content, _ := readStringFileOK(path) + return content +} + +func readStringFileOK(path string) (string, bool) { + b, err := os.ReadFile(path) + if err != nil { + return "", false + } + return strings.TrimSpace(string(b)), true +} + +func fileExists(path string) bool { + _, err := os.Stat(path) + return err == nil +} diff --git a/agent/mdraid_linux.go b/agent/mdraid_linux.go new file mode 100644 index 00000000..08a402e4 --- /dev/null +++ b/agent/mdraid_linux.go @@ -0,0 +1,239 @@ +//go:build linux + +package agent + +import ( + "fmt" + "os" + "path/filepath" + "strconv" + "strings" + + "github.com/henrygd/beszel/internal/entities/smart" +) + +// mdraidSysfsRoot is a test hook; production value is "/sys". +var mdraidSysfsRoot = "/sys" + +type mdraidHealth struct { + name string + level string + arrayState string + degraded uint64 + raidDisks uint64 + syncAction string + syncCompleted string + syncSpeed string + mismatchCnt uint64 + capacity uint64 +} + +func scanMdraidDevices() []*DeviceInfo { + blockDir := filepath.Join(mdraidSysfsRoot, "block") + entries, err := os.ReadDir(blockDir) + if err != nil { + return nil + } + + devices := make([]*DeviceInfo, 0, 2) + for _, ent := range entries { + name := ent.Name() + if !isMdraidBlockName(name) { + continue + } + mdDir := filepath.Join(blockDir, name, "md") + if !fileExists(filepath.Join(mdDir, "array_state")) { + continue + } + + devPath := filepath.Join("/dev", name) + devices = append(devices, &DeviceInfo{ + Name: devPath, + Type: "mdraid", + InfoName: devPath + " [mdraid]", + Protocol: "MD", + }) + } + + return devices +} + +func (sm *SmartManager) collectMdraidHealth(deviceInfo *DeviceInfo) (bool, error) { + if deviceInfo == nil || deviceInfo.Name == "" { + return false, nil + } + + base := filepath.Base(deviceInfo.Name) + if !isMdraidBlockName(base) && !strings.EqualFold(deviceInfo.Type, "mdraid") { + return false, nil + } + + health, ok := readMdraidHealth(base) + if !ok { + return false, nil + } + + deviceInfo.Type = "mdraid" + key := fmt.Sprintf("mdraid:%s", base) + status := mdraidSmartStatus(health) + + attrs := make([]*smart.SmartAttribute, 0, 10) + if health.arrayState != "" { + attrs = append(attrs, &smart.SmartAttribute{Name: "ArrayState", RawString: health.arrayState}) + } + if health.level != "" { + attrs = append(attrs, &smart.SmartAttribute{Name: "RaidLevel", RawString: health.level}) + } + if health.raidDisks > 0 { + attrs = append(attrs, &smart.SmartAttribute{Name: "RaidDisks", RawValue: health.raidDisks}) + } + if health.degraded > 0 { + attrs = append(attrs, &smart.SmartAttribute{Name: "Degraded", RawValue: health.degraded}) + } + if health.syncAction != "" { + attrs = append(attrs, &smart.SmartAttribute{Name: "SyncAction", RawString: health.syncAction}) + } + if health.syncCompleted != "" { + attrs = append(attrs, &smart.SmartAttribute{Name: "SyncCompleted", RawString: health.syncCompleted}) + } + if health.syncSpeed != "" { + attrs = append(attrs, &smart.SmartAttribute{Name: "SyncSpeed", RawString: health.syncSpeed}) + } + if health.mismatchCnt > 0 { + attrs = append(attrs, &smart.SmartAttribute{Name: "MismatchCount", RawValue: health.mismatchCnt}) + } + + sm.Lock() + defer sm.Unlock() + + if _, exists := sm.SmartDataMap[key]; !exists { + sm.SmartDataMap[key] = &smart.SmartData{} + } + + data := sm.SmartDataMap[key] + data.ModelName = "Linux MD RAID" + if health.level != "" { + data.ModelName = "Linux MD RAID (" + health.level + ")" + } + data.SerialNumber = "" + data.FirmwareVersion = "" + data.Capacity = health.capacity + data.Temperature = 0 + data.SmartStatus = status + data.DiskName = filepath.Join("/dev", base) + data.DiskType = "mdraid" + data.Attributes = attrs + sm.SmartDataMap[key] = data + + return true, nil +} + +func readMdraidHealth(blockName string) (mdraidHealth, bool) { + var out mdraidHealth + + if !isMdraidBlockName(blockName) { + return out, false + } + + mdDir := filepath.Join(mdraidSysfsRoot, "block", blockName, "md") + arrayState, okState := readStringFileOK(filepath.Join(mdDir, "array_state")) + if !okState { + return out, false + } + + out.name = blockName + out.arrayState = arrayState + out.level = readStringFile(filepath.Join(mdDir, "level")) + out.syncAction = readStringFile(filepath.Join(mdDir, "sync_action")) + out.syncCompleted = readStringFile(filepath.Join(mdDir, "sync_completed")) + out.syncSpeed = readStringFile(filepath.Join(mdDir, "sync_speed")) + + if val, ok := readUintFile(filepath.Join(mdDir, "raid_disks")); ok { + out.raidDisks = val + } + if val, ok := readUintFile(filepath.Join(mdDir, "degraded")); ok { + out.degraded = val + } + if val, ok := readUintFile(filepath.Join(mdDir, "mismatch_cnt")); ok { + out.mismatchCnt = val + } + + if capBytes, ok := readMdraidBlockCapacityBytes(blockName, mdraidSysfsRoot); ok { + out.capacity = capBytes + } + + return out, true +} + +func mdraidSmartStatus(health mdraidHealth) string { + state := strings.ToLower(strings.TrimSpace(health.arrayState)) + switch state { + case "inactive", "faulty", "broken", "stopped": + return "FAILED" + } + if health.degraded > 0 { + return "FAILED" + } + + switch strings.ToLower(strings.TrimSpace(health.syncAction)) { + case "resync", "recover", "reshape", "check", "repair": + return "WARNING" + } + + if state == "clean" || state == "active" || state == "readonly" { + return "PASSED" + } + return "UNKNOWN" +} + +func isMdraidBlockName(name string) bool { + if !strings.HasPrefix(name, "md") { + return false + } + suffix := strings.TrimPrefix(name, "md") + if suffix == "" { + return false + } + for _, c := range suffix { + if c < '0' || c > '9' { + return false + } + } + return true +} + +func readUintFile(path string) (uint64, bool) { + raw, ok := readStringFileOK(path) + if !ok { + return 0, false + } + parsed, err := strconv.ParseUint(strings.TrimSpace(raw), 10, 64) + if err != nil { + return 0, false + } + return parsed, true +} + +func readMdraidBlockCapacityBytes(blockName, root string) (uint64, bool) { + sizePath := filepath.Join(root, "block", blockName, "size") + lbsPath := filepath.Join(root, "block", blockName, "queue", "logical_block_size") + + sizeStr, ok := readStringFileOK(sizePath) + if !ok { + return 0, false + } + sectors, err := strconv.ParseUint(strings.TrimSpace(sizeStr), 10, 64) + if err != nil || sectors == 0 { + return 0, false + } + + lbsStr, ok := readStringFileOK(lbsPath) + logicalBlockSize := uint64(512) + if ok { + if parsed, err := strconv.ParseUint(strings.TrimSpace(lbsStr), 10, 64); err == nil && parsed > 0 { + logicalBlockSize = parsed + } + } + + return sectors * logicalBlockSize, true +} diff --git a/agent/mdraid_linux_test.go b/agent/mdraid_linux_test.go new file mode 100644 index 00000000..fcb50d6c --- /dev/null +++ b/agent/mdraid_linux_test.go @@ -0,0 +1,100 @@ +//go:build linux + +package agent + +import ( + "os" + "path/filepath" + "testing" + + "github.com/henrygd/beszel/internal/entities/smart" +) + +func TestMdraidMockSysfsScanAndCollect(t *testing.T) { + tmp := t.TempDir() + prev := mdraidSysfsRoot + mdraidSysfsRoot = tmp + t.Cleanup(func() { mdraidSysfsRoot = prev }) + + mdDir := filepath.Join(tmp, "block", "md0", "md") + queueDir := filepath.Join(tmp, "block", "md0", "queue") + if err := os.MkdirAll(mdDir, 0o755); err != nil { + t.Fatal(err) + } + if err := os.MkdirAll(queueDir, 0o755); err != nil { + t.Fatal(err) + } + + write := func(path, content string) { + t.Helper() + if err := os.WriteFile(path, []byte(content), 0o644); err != nil { + t.Fatal(err) + } + } + + write(filepath.Join(mdDir, "array_state"), "active\n") + write(filepath.Join(mdDir, "level"), "raid1\n") + write(filepath.Join(mdDir, "raid_disks"), "2\n") + write(filepath.Join(mdDir, "degraded"), "0\n") + write(filepath.Join(mdDir, "sync_action"), "resync\n") + write(filepath.Join(mdDir, "sync_completed"), "10%\n") + write(filepath.Join(mdDir, "sync_speed"), "100M\n") + write(filepath.Join(mdDir, "mismatch_cnt"), "0\n") + write(filepath.Join(queueDir, "logical_block_size"), "512\n") + write(filepath.Join(tmp, "block", "md0", "size"), "2048\n") + + devs := scanMdraidDevices() + if len(devs) != 1 { + t.Fatalf("scanMdraidDevices() = %d devices, want 1", len(devs)) + } + if devs[0].Name != "/dev/md0" || devs[0].Type != "mdraid" { + t.Fatalf("scanMdraidDevices()[0] = %+v, want Name=/dev/md0 Type=mdraid", devs[0]) + } + + sm := &SmartManager{SmartDataMap: map[string]*smart.SmartData{}} + ok, err := sm.collectMdraidHealth(devs[0]) + if err != nil || !ok { + t.Fatalf("collectMdraidHealth() = (ok=%v, err=%v), want (true,nil)", ok, err) + } + if len(sm.SmartDataMap) != 1 { + t.Fatalf("SmartDataMap len=%d, want 1", len(sm.SmartDataMap)) + } + var got *smart.SmartData + for _, v := range sm.SmartDataMap { + got = v + break + } + if got == nil { + t.Fatalf("SmartDataMap value nil") + } + if got.DiskType != "mdraid" || got.DiskName != "/dev/md0" { + t.Fatalf("disk fields = (type=%q name=%q), want (mdraid,/dev/md0)", got.DiskType, got.DiskName) + } + if got.SmartStatus != "WARNING" { + t.Fatalf("SmartStatus=%q, want WARNING", got.SmartStatus) + } + if got.ModelName == "" || got.Capacity == 0 { + t.Fatalf("identity fields = (model=%q cap=%d), want non-empty model and cap>0", got.ModelName, got.Capacity) + } + if len(got.Attributes) < 5 { + t.Fatalf("attributes len=%d, want >= 5", len(got.Attributes)) + } +} + +func TestMdraidSmartStatus(t *testing.T) { + if got := mdraidSmartStatus(mdraidHealth{arrayState: "inactive"}); got != "FAILED" { + t.Fatalf("mdraidSmartStatus(inactive) = %q, want FAILED", got) + } + if got := mdraidSmartStatus(mdraidHealth{arrayState: "active", degraded: 1}); got != "FAILED" { + t.Fatalf("mdraidSmartStatus(degraded) = %q, want FAILED", got) + } + if got := mdraidSmartStatus(mdraidHealth{arrayState: "active", syncAction: "recover"}); got != "WARNING" { + t.Fatalf("mdraidSmartStatus(recover) = %q, want WARNING", got) + } + if got := mdraidSmartStatus(mdraidHealth{arrayState: "clean"}); got != "PASSED" { + t.Fatalf("mdraidSmartStatus(clean) = %q, want PASSED", got) + } + if got := mdraidSmartStatus(mdraidHealth{arrayState: "unknown"}); got != "UNKNOWN" { + t.Fatalf("mdraidSmartStatus(unknown) = %q, want UNKNOWN", got) + } +} diff --git a/agent/mdraid_stub.go b/agent/mdraid_stub.go new file mode 100644 index 00000000..4c95d20b --- /dev/null +++ b/agent/mdraid_stub.go @@ -0,0 +1,11 @@ +//go:build !linux + +package agent + +func scanMdraidDevices() []*DeviceInfo { + return nil +} + +func (sm *SmartManager) collectMdraidHealth(deviceInfo *DeviceInfo) (bool, error) { + return false, nil +} diff --git a/agent/smart.go b/agent/smart.go index 2a779589..09562d26 100644 --- a/agent/smart.go +++ b/agent/smart.go @@ -199,6 +199,13 @@ func (sm *SmartManager) ScanDevices(force bool) error { hasValidScan = true } + // Add Linux mdraid arrays by reading sysfs health fields. This does not + // require smartctl and does not scan the whole device. + if raidDevices := scanMdraidDevices(); len(raidDevices) > 0 { + scannedDevices = append(scannedDevices, raidDevices...) + hasValidScan = true + } + finalDevices := mergeDeviceLists(currentDevices, scannedDevices, configuredDevices) finalDevices = sm.filterExcludedDevices(finalDevices) sm.updateSmartDevices(finalDevices) @@ -450,6 +457,12 @@ func (sm *SmartManager) CollectSmart(deviceInfo *DeviceInfo) error { return errNoValidSmartData } + // mdraid health is not exposed via SMART; Linux exposes array state in sysfs. + if deviceInfo != nil { + if ok, err := sm.collectMdraidHealth(deviceInfo); ok { + return err + } + } // eMMC health is not exposed via SMART on Linux, but the kernel provides // wear / EOL indicators via sysfs. Prefer that path when available. if deviceInfo != nil { @@ -1146,9 +1159,11 @@ func NewSmartManager() (*SmartManager, error) { slog.Debug("smartctl", "path", path, "err", err) if err != nil { // Keep the previous fail-fast behavior unless this Linux host exposes - // eMMC health via sysfs, in which case smartctl is optional. - if runtime.GOOS == "linux" && len(scanEmmcDevices()) > 0 { - return sm, nil + // eMMC or mdraid health via sysfs, in which case smartctl is optional. + if runtime.GOOS == "linux" { + if len(scanEmmcDevices()) > 0 || len(scanMdraidDevices()) > 0 { + return sm, nil + } } return nil, err } diff --git a/readme.md b/readme.md index 6b9740fc..2495eaf7 100644 --- a/readme.md +++ b/readme.md @@ -51,7 +51,7 @@ The [quick start guide](https://beszel.dev/guide/getting-started) and other docu - **GPU usage / power draw** - Nvidia, AMD, and Intel. - **Battery** - Host system battery charge. - **Containers** - Status and metrics of all running Docker / Podman containers. -- **S.M.A.R.T.** - Host system disk health (includes eMMC wear/EOL via Linux sysfs when available). +- **S.M.A.R.T.** - Host system disk health (includes eMMC wear/EOL and Linux mdraid array health via sysfs when available). ## Help and discussion