SMART: add eMMC health via sysfs (#1736)

* SMART: add eMMC health via sysfs

Read eMMC wear/EOL indicators from /sys/class/block/mmcblk*/device and expose in SMART device list. Includes mocked sysfs tests and UI tweaks for unknown temps.

* small optimizations for emmc scan and parsing

* smart: keep smartctl optional only for Linux hosts with eMMC

* update smart alerts to handle warning state

* refactor: rename binPath to smartctlPath and replace hasSmartctl with smartctlPath checks

---------

Co-authored-by: henrygd <hank@henrygd.me>
This commit is contained in:
VACInc
2026-02-12 15:27:42 -05:00
committed by GitHub
parent 2230097dc7
commit e816ea143a
11 changed files with 661 additions and 27 deletions

View File

@@ -28,7 +28,7 @@ type SmartManager struct {
SmartDevices []*DeviceInfo
refreshMutex sync.Mutex
lastScanTime time.Time
binPath string
smartctlPath string
excludedDevices map[string]struct{}
}
@@ -170,27 +170,35 @@ func (sm *SmartManager) ScanDevices(force bool) error {
configuredDevices = parsedDevices
}
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
defer cancel()
cmd := exec.CommandContext(ctx, sm.binPath, "--scan", "-j")
output, err := cmd.Output()
var (
scanErr error
scannedDevices []*DeviceInfo
hasValidScan bool
)
if err != nil {
scanErr = err
} else {
scannedDevices, hasValidScan = sm.parseScan(output)
if !hasValidScan {
scanErr = errNoValidSmartData
if sm.smartctlPath != "" {
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
defer cancel()
cmd := exec.CommandContext(ctx, sm.smartctlPath, "--scan", "-j")
output, err := cmd.Output()
if err != nil {
scanErr = err
} else {
scannedDevices, hasValidScan = sm.parseScan(output)
if !hasValidScan {
scanErr = errNoValidSmartData
}
}
}
// Add eMMC devices (Linux only) by reading sysfs health fields. This does not
// require smartctl and does not scan the whole device.
if emmcDevices := scanEmmcDevices(); len(emmcDevices) > 0 {
scannedDevices = append(scannedDevices, emmcDevices...)
hasValidScan = true
}
finalDevices := mergeDeviceLists(currentDevices, scannedDevices, configuredDevices)
finalDevices = sm.filterExcludedDevices(finalDevices)
sm.updateSmartDevices(finalDevices)
@@ -442,6 +450,18 @@ func (sm *SmartManager) CollectSmart(deviceInfo *DeviceInfo) error {
return errNoValidSmartData
}
// eMMC health is not exposed via SMART on Linux, but the kernel provides
// wear / EOL indicators via sysfs. Prefer that path when available.
if deviceInfo != nil {
if ok, err := sm.collectEmmcHealth(deviceInfo); ok {
return err
}
}
if sm.smartctlPath == "" {
return errNoValidSmartData
}
// slog.Info("collecting SMART data", "device", deviceInfo.Name, "type", deviceInfo.Type, "has_existing_data", sm.hasDataForDevice(deviceInfo.Name))
// Check if we have any existing data for this device
@@ -452,7 +472,7 @@ func (sm *SmartManager) CollectSmart(deviceInfo *DeviceInfo) error {
// Try with -n standby first if we have existing data
args := sm.smartctlArgs(deviceInfo, hasExistingData)
cmd := exec.CommandContext(ctx, sm.binPath, args...)
cmd := exec.CommandContext(ctx, sm.smartctlPath, args...)
output, err := cmd.CombinedOutput()
// Check if device is in standby (exit status 2)
@@ -465,7 +485,7 @@ func (sm *SmartManager) CollectSmart(deviceInfo *DeviceInfo) error {
ctx2, cancel2 := context.WithTimeout(context.Background(), 15*time.Second)
defer cancel2()
args = sm.smartctlArgs(deviceInfo, false)
cmd = exec.CommandContext(ctx2, sm.binPath, args...)
cmd = exec.CommandContext(ctx2, sm.smartctlPath, args...)
output, err = cmd.CombinedOutput()
}
@@ -482,7 +502,7 @@ func (sm *SmartManager) CollectSmart(deviceInfo *DeviceInfo) error {
ctx3, cancel3 := context.WithTimeout(context.Background(), 15*time.Second)
defer cancel3()
args = sm.smartctlArgs(deviceInfo, false)
cmd = exec.CommandContext(ctx3, sm.binPath, args...)
cmd = exec.CommandContext(ctx3, sm.smartctlPath, args...)
output, err = cmd.CombinedOutput()
hasValidData = sm.parseSmartOutput(deviceInfo, output)
@@ -1123,10 +1143,15 @@ func NewSmartManager() (*SmartManager, error) {
}
sm.refreshExcludedDevices()
path, err := sm.detectSmartctl()
slog.Debug("smartctl", "path", path, "err", err)
if err != nil {
// Keep the previous fail-fast behavior unless this Linux host exposes
// eMMC health via sysfs, in which case smartctl is optional.
if runtime.GOOS == "linux" && len(scanEmmcDevices()) > 0 {
return sm, nil
}
return nil, err
}
slog.Debug("smartctl", "path", path)
sm.binPath = path
sm.smartctlPath = path
return sm, nil
}