diff --git a/agent/agent.go b/agent/agent.go
index 4c395a70..5c04e86f 100644
--- a/agent/agent.go
+++ b/agent/agent.go
@@ -42,6 +42,7 @@ type Agent struct {
server *ssh.Server // SSH server
dataDir string // Directory for persisting data
keys []gossh.PublicKey // SSH public keys
+ smartManager *SmartManager // Manages SMART data
}
// NewAgent creates a new agent with the given data directory for persisting data.
@@ -100,11 +101,15 @@ func NewAgent(dataDir ...string) (agent *Agent, err error) {
// initialize docker manager
agent.dockerManager = newDockerManager(agent)
+ agent.smartManager, err = NewSmartManager()
+ if err != nil {
+ slog.Debug("SMART", "err", err)
+ }
+
// initialize GPU manager
- if gm, err := NewGPUManager(); err != nil {
+ agent.gpuManager, err = NewGPUManager()
+ if err != nil {
slog.Debug("GPU", "err", err)
- } else {
- agent.gpuManager = gm
}
// if debugging, print stats
diff --git a/agent/client.go b/agent/client.go
index c3f5a165..251eea62 100644
--- a/agent/client.go
+++ b/agent/client.go
@@ -15,6 +15,7 @@ import (
"github.com/henrygd/beszel"
"github.com/henrygd/beszel/internal/common"
+ "github.com/henrygd/beszel/internal/entities/smart"
"github.com/henrygd/beszel/internal/entities/system"
"github.com/fxamacker/cbor/v2"
@@ -273,6 +274,8 @@ func (client *WebSocketClient) sendResponse(data any, requestID *uint32) error {
response.Fingerprint = v
case string:
response.String = &v
+ case map[string]smart.SmartData:
+ response.SmartData = v
// case []byte:
// response.RawBytes = v
// case string:
diff --git a/agent/handlers.go b/agent/handlers.go
index 70e1eb9c..4d16884f 100644
--- a/agent/handlers.go
+++ b/agent/handlers.go
@@ -7,6 +7,9 @@ import (
"github.com/fxamacker/cbor/v2"
"github.com/henrygd/beszel/internal/common"
+ "github.com/henrygd/beszel/internal/entities/smart"
+
+ "golang.org/x/exp/slog"
)
// HandlerContext provides context for request handlers
@@ -46,6 +49,7 @@ func NewHandlerRegistry() *HandlerRegistry {
registry.Register(common.CheckFingerprint, &CheckFingerprintHandler{})
registry.Register(common.GetContainerLogs, &GetContainerLogsHandler{})
registry.Register(common.GetContainerInfo, &GetContainerInfoHandler{})
+ registry.Register(common.GetSmartData, &GetSmartDataHandler{})
return registry
}
@@ -152,3 +156,21 @@ func (h *GetContainerInfoHandler) Handle(hctx *HandlerContext) error {
return hctx.SendResponse(info, hctx.RequestID)
}
+
+////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////
+
+// GetSmartDataHandler handles SMART data requests
+type GetSmartDataHandler struct{}
+
+func (h *GetSmartDataHandler) Handle(hctx *HandlerContext) error {
+ if hctx.Agent.smartManager == nil {
+ // return empty map to indicate no data
+ return hctx.SendResponse(map[string]smart.SmartData{}, hctx.RequestID)
+ }
+ if err := hctx.Agent.smartManager.Refresh(); err != nil {
+ slog.Debug("smart refresh failed", "err", err)
+ }
+ data := hctx.Agent.smartManager.GetCurrentData()
+ return hctx.SendResponse(data, hctx.RequestID)
+}
diff --git a/agent/smart.go b/agent/smart.go
new file mode 100644
index 00000000..d51ba36b
--- /dev/null
+++ b/agent/smart.go
@@ -0,0 +1,402 @@
+package agent
+
+import (
+ "context"
+ "encoding/json"
+ "fmt"
+ "os/exec"
+ "sync"
+ "time"
+
+ "github.com/henrygd/beszel/internal/entities/smart"
+
+ "golang.org/x/exp/slog"
+)
+
+// SmartManager manages data collection for SMART devices
+type SmartManager struct {
+ sync.Mutex
+ SmartDataMap map[string]*smart.SmartData
+ SmartDevices []*DeviceInfo
+ refreshMutex sync.Mutex
+}
+
+type scanOutput struct {
+ Devices []struct {
+ Name string `json:"name"`
+ Type string `json:"type"`
+ InfoName string `json:"info_name"`
+ Protocol string `json:"protocol"`
+ } `json:"devices"`
+}
+
+type DeviceInfo struct {
+ Name string `json:"name"`
+ Type string `json:"type"`
+ InfoName string `json:"info_name"`
+ Protocol string `json:"protocol"`
+}
+
+var errNoValidSmartData = fmt.Errorf("no valid SMART data found") // Error for missing data
+
+// Refresh updates SMART data for all known devices on demand.
+func (sm *SmartManager) Refresh() error {
+ sm.refreshMutex.Lock()
+ defer sm.refreshMutex.Unlock()
+
+ scanErr := sm.ScanDevices()
+ if scanErr != nil {
+ slog.Warn("smartctl scan failed", "err", scanErr)
+ }
+
+ devices := sm.devicesSnapshot()
+ var collectErr error
+ for _, deviceInfo := range devices {
+ if deviceInfo == nil {
+ continue
+ }
+ if err := sm.CollectSmart(deviceInfo); err != nil {
+ slog.Info("smartctl collect failed for device, skipping", "device", deviceInfo.Name, "err", err)
+ collectErr = err
+ }
+ }
+
+ return sm.resolveRefreshError(scanErr, collectErr)
+}
+
+// devicesSnapshot returns a copy of the current device slice to avoid iterating
+// while holding the primary mutex for longer than necessary.
+func (sm *SmartManager) devicesSnapshot() []*DeviceInfo {
+ sm.Lock()
+ defer sm.Unlock()
+
+ devices := make([]*DeviceInfo, len(sm.SmartDevices))
+ copy(devices, sm.SmartDevices)
+ return devices
+}
+
+// hasSmartData reports whether any SMART data has been collected.
+// func (sm *SmartManager) hasSmartData() bool {
+// sm.Lock()
+// defer sm.Unlock()
+
+// return len(sm.SmartDataMap) > 0
+// }
+
+// resolveRefreshError determines the proper error to return after a refresh.
+func (sm *SmartManager) resolveRefreshError(scanErr, collectErr error) error {
+ sm.Lock()
+ noDevices := len(sm.SmartDevices) == 0
+ noData := len(sm.SmartDataMap) == 0
+ sm.Unlock()
+
+ if noDevices {
+ if scanErr != nil {
+ return scanErr
+ }
+ }
+
+ if !noData {
+ return nil
+ }
+
+ if collectErr != nil {
+ return collectErr
+ }
+ if scanErr != nil {
+ return scanErr
+ }
+ return errNoValidSmartData
+}
+
+// GetCurrentData returns the current SMART data
+func (sm *SmartManager) GetCurrentData() map[string]smart.SmartData {
+ sm.Lock()
+ defer sm.Unlock()
+ result := make(map[string]smart.SmartData, len(sm.SmartDataMap))
+ for key, value := range sm.SmartDataMap {
+ if value != nil {
+ result[key] = *value
+ }
+ }
+ return result
+}
+
+// ScanDevices scans for SMART devices
+// Scan devices using `smartctl --scan -j`
+// If scan fails, return error
+// If scan succeeds, parse the output and update the SmartDevices slice
+func (sm *SmartManager) ScanDevices() error {
+ ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
+ defer cancel()
+
+ cmd := exec.CommandContext(ctx, "smartctl", "--scan", "-j")
+ output, err := cmd.Output()
+
+ if err != nil {
+ return err
+ }
+
+ hasValidData := sm.parseScan(output)
+ if !hasValidData {
+ return errNoValidSmartData
+ }
+ return nil
+}
+
+// CollectSmart collects SMART data for a device
+// Collect data using `smartctl --all -j /dev/sdX` or `smartctl --all -j /dev/nvmeX`
+// Always attempts to parse output even if command fails, as some data may still be available
+// If collect fails, return error
+// If collect succeeds, parse the output and update the SmartDataMap
+// Uses -n standby to avoid waking up sleeping disks, but bypasses standby mode
+// for initial data collection when no cached data exists
+func (sm *SmartManager) CollectSmart(deviceInfo *DeviceInfo) error {
+ // Check if we have any existing data for this device
+ hasExistingData := sm.hasDataForDevice(deviceInfo.Name)
+
+ ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
+ defer cancel()
+
+ // Try with -n standby first if we have existing data
+ cmd := exec.CommandContext(ctx, "smartctl", "-aj", "-n", "standby", deviceInfo.Name)
+ output, err := cmd.CombinedOutput()
+
+ // Check if device is in standby (exit status 2)
+ if exitErr, ok := err.(*exec.ExitError); ok && exitErr.ExitCode() == 2 {
+ if hasExistingData {
+ // Device is in standby and we have cached data, keep using cache
+ slog.Debug("device in standby mode, using cached data", "device", deviceInfo.Name)
+ return nil
+ }
+ // No cached data, need to collect initial data by bypassing standby
+ slog.Debug("device in standby but no cached data, collecting initial data", "device", deviceInfo.Name)
+ ctx2, cancel2 := context.WithTimeout(context.Background(), 10*time.Second)
+ defer cancel2()
+ cmd = exec.CommandContext(ctx2, "smartctl", "-aj", deviceInfo.Name)
+ output, err = cmd.CombinedOutput()
+ }
+
+ hasValidData := false
+
+ switch deviceInfo.Type {
+ case "scsi", "sat", "ata":
+ // parse SATA/SCSI/ATA devices
+ hasValidData, _ = sm.parseSmartForSata(output)
+ case "nvme":
+ // parse nvme devices
+ hasValidData, _ = sm.parseSmartForNvme(output)
+ }
+
+ if !hasValidData {
+ if err != nil {
+ return err
+ }
+ return errNoValidSmartData
+ }
+ return nil
+}
+
+// hasDataForDevice checks if we have cached SMART data for a specific device
+func (sm *SmartManager) hasDataForDevice(deviceName string) bool {
+ sm.Lock()
+ defer sm.Unlock()
+
+ // Check if any cached data has this device name
+ for _, data := range sm.SmartDataMap {
+ if data != nil && data.DiskName == deviceName {
+ return true
+ }
+ }
+ return false
+}
+
+// parseScan parses the output of smartctl --scan -j and updates the SmartDevices slice
+func (sm *SmartManager) parseScan(output []byte) bool {
+ sm.Lock()
+ defer sm.Unlock()
+
+ sm.SmartDevices = make([]*DeviceInfo, 0)
+ scan := &scanOutput{}
+
+ if err := json.Unmarshal(output, scan); err != nil {
+ slog.Warn("Failed to parse smartctl scan JSON", "err", err)
+ return false
+ }
+
+ if len(scan.Devices) == 0 {
+ return false
+ }
+
+ scannedDeviceNameMap := make(map[string]bool, len(scan.Devices))
+
+ for _, device := range scan.Devices {
+ deviceInfo := &DeviceInfo{
+ Name: device.Name,
+ Type: device.Type,
+ InfoName: device.InfoName,
+ Protocol: device.Protocol,
+ }
+ sm.SmartDevices = append(sm.SmartDevices, deviceInfo)
+ scannedDeviceNameMap[device.Name] = true
+ }
+ // remove devices that are not in the scan
+ for key := range sm.SmartDataMap {
+ if _, ok := scannedDeviceNameMap[key]; !ok {
+ delete(sm.SmartDataMap, key)
+ }
+ }
+
+ return true
+}
+
+// parseSmartForSata parses the output of smartctl --all -j for SATA/ATA devices and updates the SmartDataMap
+// Returns hasValidData and exitStatus
+func (sm *SmartManager) parseSmartForSata(output []byte) (bool, int) {
+ var data smart.SmartInfoForSata
+
+ if err := json.Unmarshal(output, &data); err != nil {
+ return false, 0
+ }
+
+ if data.SerialNumber == "" {
+ slog.Warn("device has no serial number, skipping", "device", data.Device.Name)
+ return false, data.Smartctl.ExitStatus
+ }
+
+ sm.Lock()
+ defer sm.Unlock()
+
+ // get device name (e.g. /dev/sda)
+ keyName := data.SerialNumber
+
+ // if device does not exist in SmartDataMap, initialize it
+ if _, ok := sm.SmartDataMap[keyName]; !ok {
+ sm.SmartDataMap[keyName] = &smart.SmartData{}
+ }
+
+ // update SmartData
+ smartData := sm.SmartDataMap[keyName]
+ // smartData.ModelFamily = data.ModelFamily
+ smartData.ModelName = data.ModelName
+ smartData.SerialNumber = data.SerialNumber
+ smartData.FirmwareVersion = data.FirmwareVersion
+ smartData.Capacity = data.UserCapacity.Bytes
+ smartData.Temperature = data.Temperature.Current
+ smartData.SmartStatus = getSmartStatus(smartData.Temperature, data.SmartStatus.Passed)
+ smartData.DiskName = data.Device.Name
+ smartData.DiskType = data.Device.Type
+
+ // update SmartAttributes
+ smartData.Attributes = make([]*smart.SmartAttribute, 0, len(data.AtaSmartAttributes.Table))
+ for _, attr := range data.AtaSmartAttributes.Table {
+ smartAttr := &smart.SmartAttribute{
+ ID: attr.ID,
+ Name: attr.Name,
+ Value: attr.Value,
+ Worst: attr.Worst,
+ Threshold: attr.Thresh,
+ RawValue: attr.Raw.Value,
+ RawString: attr.Raw.String,
+ WhenFailed: attr.WhenFailed,
+ }
+ smartData.Attributes = append(smartData.Attributes, smartAttr)
+ }
+ sm.SmartDataMap[keyName] = smartData
+
+ return true, data.Smartctl.ExitStatus
+}
+
+func getSmartStatus(temperature uint8, passed bool) string {
+ if passed {
+ return "PASSED"
+ } else if temperature > 0 {
+ return "FAILED"
+ } else {
+ return "UNKNOWN"
+ }
+}
+
+// parseSmartForNvme parses the output of smartctl --all -j /dev/nvmeX and updates the SmartDataMap
+// Returns hasValidData and exitStatus
+func (sm *SmartManager) parseSmartForNvme(output []byte) (bool, int) {
+ data := &smart.SmartInfoForNvme{}
+
+ if err := json.Unmarshal(output, &data); err != nil {
+ return false, 0
+ }
+
+ if data.SerialNumber == "" {
+ slog.Warn("device has no serial number, skipping", "device", data.Device.Name)
+ return false, data.Smartctl.ExitStatus
+ }
+
+ sm.Lock()
+ defer sm.Unlock()
+
+ // get device name (e.g. /dev/nvme0)
+ keyName := data.SerialNumber
+
+ // if device does not exist in SmartDataMap, initialize it
+ if _, ok := sm.SmartDataMap[keyName]; !ok {
+ sm.SmartDataMap[keyName] = &smart.SmartData{}
+ }
+
+ // update SmartData
+ smartData := sm.SmartDataMap[keyName]
+ smartData.ModelName = data.ModelName
+ smartData.SerialNumber = data.SerialNumber
+ smartData.FirmwareVersion = data.FirmwareVersion
+ smartData.Capacity = data.UserCapacity.Bytes
+ smartData.Temperature = data.NVMeSmartHealthInformationLog.Temperature
+ smartData.SmartStatus = getSmartStatus(smartData.Temperature, data.SmartStatus.Passed)
+ smartData.DiskName = data.Device.Name
+ smartData.DiskType = data.Device.Type
+
+ // nvme attributes does not follow the same format as ata attributes,
+ // so we manually map each field to SmartAttributes
+ log := data.NVMeSmartHealthInformationLog
+ smartData.Attributes = []*smart.SmartAttribute{
+ {Name: "CriticalWarning", RawValue: uint64(log.CriticalWarning)},
+ {Name: "Temperature", RawValue: uint64(log.Temperature)},
+ {Name: "AvailableSpare", RawValue: uint64(log.AvailableSpare)},
+ {Name: "AvailableSpareThreshold", RawValue: uint64(log.AvailableSpareThreshold)},
+ {Name: "PercentageUsed", RawValue: uint64(log.PercentageUsed)},
+ {Name: "DataUnitsRead", RawValue: log.DataUnitsRead},
+ {Name: "DataUnitsWritten", RawValue: log.DataUnitsWritten},
+ {Name: "HostReads", RawValue: uint64(log.HostReads)},
+ {Name: "HostWrites", RawValue: uint64(log.HostWrites)},
+ {Name: "ControllerBusyTime", RawValue: uint64(log.ControllerBusyTime)},
+ {Name: "PowerCycles", RawValue: uint64(log.PowerCycles)},
+ {Name: "PowerOnHours", RawValue: uint64(log.PowerOnHours)},
+ {Name: "UnsafeShutdowns", RawValue: uint64(log.UnsafeShutdowns)},
+ {Name: "MediaErrors", RawValue: uint64(log.MediaErrors)},
+ {Name: "NumErrLogEntries", RawValue: uint64(log.NumErrLogEntries)},
+ {Name: "WarningTempTime", RawValue: uint64(log.WarningTempTime)},
+ {Name: "CriticalCompTime", RawValue: uint64(log.CriticalCompTime)},
+ }
+
+ sm.SmartDataMap[keyName] = smartData
+
+ return true, data.Smartctl.ExitStatus
+}
+
+// detectSmartctl checks if smartctl is installed, returns an error if not
+func (sm *SmartManager) detectSmartctl() error {
+ if _, err := exec.LookPath("smartctl"); err == nil {
+ return nil
+ }
+ return fmt.Errorf("no smartctl found - install smartctl")
+}
+
+// NewSmartManager creates and initializes a new SmartManager
+func NewSmartManager() (*SmartManager, error) {
+ sm := &SmartManager{
+ SmartDataMap: make(map[string]*smart.SmartData),
+ }
+ if err := sm.detectSmartctl(); err != nil {
+ return nil, err
+ }
+
+ return sm, nil
+}
diff --git a/internal/common/common-ws.go b/internal/common/common-ws.go
index 9319616a..64e96830 100644
--- a/internal/common/common-ws.go
+++ b/internal/common/common-ws.go
@@ -1,6 +1,7 @@
package common
import (
+ "github.com/henrygd/beszel/internal/entities/smart"
"github.com/henrygd/beszel/internal/entities/system"
)
@@ -15,6 +16,8 @@ const (
GetContainerLogs
// Request container info from agent
GetContainerInfo
+ // Request SMART data from agent
+ GetSmartData
// Add new actions here...
)
@@ -27,11 +30,12 @@ type HubRequest[T any] struct {
// AgentResponse defines the structure for responses sent from agent to hub.
type AgentResponse struct {
- Id *uint32 `cbor:"0,keyasint,omitempty"`
- SystemData *system.CombinedData `cbor:"1,keyasint,omitempty,omitzero"`
- Fingerprint *FingerprintResponse `cbor:"2,keyasint,omitempty,omitzero"`
- Error string `cbor:"3,keyasint,omitempty,omitzero"`
- String *string `cbor:"4,keyasint,omitempty,omitzero"`
+ Id *uint32 `cbor:"0,keyasint,omitempty"`
+ SystemData *system.CombinedData `cbor:"1,keyasint,omitempty,omitzero"`
+ Fingerprint *FingerprintResponse `cbor:"2,keyasint,omitempty,omitzero"`
+ Error string `cbor:"3,keyasint,omitempty,omitzero"`
+ String *string `cbor:"4,keyasint,omitempty,omitzero"`
+ SmartData map[string]smart.SmartData `cbor:"5,keyasint,omitempty,omitzero"`
// Logs *LogsPayload `cbor:"4,keyasint,omitempty,omitzero"`
// RawBytes []byte `cbor:"4,keyasint,omitempty,omitzero"`
}
diff --git a/internal/entities/smart/smart.go b/internal/entities/smart/smart.go
new file mode 100644
index 00000000..c1fd6aa1
--- /dev/null
+++ b/internal/entities/smart/smart.go
@@ -0,0 +1,362 @@
+package smart
+
+// Common types
+type VersionInfo [2]int
+
+type SmartctlInfo struct {
+ Version VersionInfo `json:"version"`
+ SvnRevision string `json:"svn_revision"`
+ PlatformInfo string `json:"platform_info"`
+ BuildInfo string `json:"build_info"`
+ Argv []string `json:"argv"`
+ ExitStatus int `json:"exit_status"`
+}
+
+type DeviceInfo struct {
+ Name string `json:"name"`
+ InfoName string `json:"info_name"`
+ Type string `json:"type"`
+ Protocol string `json:"protocol"`
+}
+
+type UserCapacity struct {
+ Blocks uint64 `json:"blocks"`
+ Bytes uint64 `json:"bytes"`
+}
+
+// type LocalTime struct {
+// TimeT int64 `json:"time_t"`
+// Asctime string `json:"asctime"`
+// }
+
+// type WwnInfo struct {
+// Naa int `json:"naa"`
+// Oui int `json:"oui"`
+// ID int `json:"id"`
+// }
+
+// type FormFactorInfo struct {
+// AtaValue int `json:"ata_value"`
+// Name string `json:"name"`
+// }
+
+// type TrimInfo struct {
+// Supported bool `json:"supported"`
+// }
+
+// type AtaVersionInfo struct {
+// String string `json:"string"`
+// MajorValue int `json:"major_value"`
+// MinorValue int `json:"minor_value"`
+// }
+
+// type VersionStringInfo struct {
+// String string `json:"string"`
+// Value int `json:"value"`
+// }
+
+// type SpeedInfo struct {
+// SataValue int `json:"sata_value"`
+// String string `json:"string"`
+// UnitsPerSecond int `json:"units_per_second"`
+// BitsPerUnit int `json:"bits_per_unit"`
+// }
+
+// type InterfaceSpeedInfo struct {
+// Max SpeedInfo `json:"max"`
+// Current SpeedInfo `json:"current"`
+// }
+
+type SmartStatusInfo struct {
+ Passed bool `json:"passed"`
+}
+
+type StatusInfo struct {
+ Value int `json:"value"`
+ String string `json:"string"`
+ Passed bool `json:"passed"`
+}
+
+type PollingMinutes struct {
+ Short int `json:"short"`
+ Extended int `json:"extended"`
+}
+
+type CapabilitiesInfo struct {
+ Values []int `json:"values"`
+ ExecOfflineImmediateSupported bool `json:"exec_offline_immediate_supported"`
+ OfflineIsAbortedUponNewCmd bool `json:"offline_is_aborted_upon_new_cmd"`
+ OfflineSurfaceScanSupported bool `json:"offline_surface_scan_supported"`
+ SelfTestsSupported bool `json:"self_tests_supported"`
+ ConveyanceSelfTestSupported bool `json:"conveyance_self_test_supported"`
+ SelectiveSelfTestSupported bool `json:"selective_self_test_supported"`
+ AttributeAutosaveEnabled bool `json:"attribute_autosave_enabled"`
+ ErrorLoggingSupported bool `json:"error_logging_supported"`
+ GpLoggingSupported bool `json:"gp_logging_supported"`
+}
+
+// type AtaSmartData struct {
+// OfflineDataCollection OfflineDataCollectionInfo `json:"offline_data_collection"`
+// SelfTest SelfTestInfo `json:"self_test"`
+// Capabilities CapabilitiesInfo `json:"capabilities"`
+// }
+
+// type OfflineDataCollectionInfo struct {
+// Status StatusInfo `json:"status"`
+// CompletionSeconds int `json:"completion_seconds"`
+// }
+
+// type SelfTestInfo struct {
+// Status StatusInfo `json:"status"`
+// PollingMinutes PollingMinutes `json:"polling_minutes"`
+// }
+
+// type AtaSctCapabilities struct {
+// Value int `json:"value"`
+// ErrorRecoveryControlSupported bool `json:"error_recovery_control_supported"`
+// FeatureControlSupported bool `json:"feature_control_supported"`
+// DataTableSupported bool `json:"data_table_supported"`
+// }
+
+type SummaryInfo struct {
+ Revision int `json:"revision"`
+ Count int `json:"count"`
+}
+
+type AtaSmartAttributes struct {
+ // Revision int `json:"revision"`
+ Table []AtaSmartAttribute `json:"table"`
+}
+
+type AtaSmartAttribute struct {
+ ID uint16 `json:"id"`
+ Name string `json:"name"`
+ Value uint16 `json:"value"`
+ Worst uint16 `json:"worst"`
+ Thresh uint16 `json:"thresh"`
+ WhenFailed string `json:"when_failed"`
+ Flags AttributeFlags `json:"flags"`
+ Raw RawValue `json:"raw"`
+}
+
+type AttributeFlags struct {
+ Value int `json:"value"`
+ String string `json:"string"`
+ Prefailure bool `json:"prefailure"`
+ UpdatedOnline bool `json:"updated_online"`
+ Performance bool `json:"performance"`
+ ErrorRate bool `json:"error_rate"`
+ EventCount bool `json:"event_count"`
+ AutoKeep bool `json:"auto_keep"`
+}
+
+type RawValue struct {
+ Value uint64 `json:"value"`
+ String string `json:"string"`
+}
+
+// type PowerOnTimeInfo struct {
+// Hours uint32 `json:"hours"`
+// }
+
+type TemperatureInfo struct {
+ Current uint8 `json:"current"`
+}
+
+// type SelectiveSelfTestTable struct {
+// LbaMin int `json:"lba_min"`
+// LbaMax int `json:"lba_max"`
+// Status StatusInfo `json:"status"`
+// }
+
+// type SelectiveSelfTestFlags struct {
+// Value int `json:"value"`
+// RemainderScanEnabled bool `json:"remainder_scan_enabled"`
+// }
+
+// type AtaSmartSelectiveSelfTestLog struct {
+// Revision int `json:"revision"`
+// Table []SelectiveSelfTestTable `json:"table"`
+// Flags SelectiveSelfTestFlags `json:"flags"`
+// PowerUpScanResumeMinutes int `json:"power_up_scan_resume_minutes"`
+// }
+
+// BaseSmartInfo contains common fields shared between SATA and NVMe drives
+// type BaseSmartInfo struct {
+// Device DeviceInfo `json:"device"`
+// ModelName string `json:"model_name"`
+// SerialNumber string `json:"serial_number"`
+// FirmwareVersion string `json:"firmware_version"`
+// UserCapacity UserCapacity `json:"user_capacity"`
+// LogicalBlockSize int `json:"logical_block_size"`
+// LocalTime LocalTime `json:"local_time"`
+// }
+
+type SmartctlInfoLegacy struct {
+ Version VersionInfo `json:"version"`
+ SvnRevision string `json:"svn_revision"`
+ PlatformInfo string `json:"platform_info"`
+ BuildInfo string `json:"build_info"`
+ Argv []string `json:"argv"`
+ ExitStatus int `json:"exit_status"`
+}
+
+type SmartInfoForSata struct {
+ // JSONFormatVersion VersionInfo `json:"json_format_version"`
+ Smartctl SmartctlInfoLegacy `json:"smartctl"`
+ Device DeviceInfo `json:"device"`
+ // ModelFamily string `json:"model_family"`
+ ModelName string `json:"model_name"`
+ SerialNumber string `json:"serial_number"`
+ // Wwn WwnInfo `json:"wwn"`
+ FirmwareVersion string `json:"firmware_version"`
+ UserCapacity UserCapacity `json:"user_capacity"`
+ // LogicalBlockSize int `json:"logical_block_size"`
+ // PhysicalBlockSize int `json:"physical_block_size"`
+ // RotationRate int `json:"rotation_rate"`
+ // FormFactor FormFactorInfo `json:"form_factor"`
+ // Trim TrimInfo `json:"trim"`
+ // InSmartctlDatabase bool `json:"in_smartctl_database"`
+ // AtaVersion AtaVersionInfo `json:"ata_version"`
+ // SataVersion VersionStringInfo `json:"sata_version"`
+ // InterfaceSpeed InterfaceSpeedInfo `json:"interface_speed"`
+ // LocalTime LocalTime `json:"local_time"`
+ SmartStatus SmartStatusInfo `json:"smart_status"`
+ // AtaSmartData AtaSmartData `json:"ata_smart_data"`
+ // AtaSctCapabilities AtaSctCapabilities `json:"ata_sct_capabilities"`
+ AtaSmartAttributes AtaSmartAttributes `json:"ata_smart_attributes"`
+ // PowerOnTime PowerOnTimeInfo `json:"power_on_time"`
+ // PowerCycleCount uint16 `json:"power_cycle_count"`
+ Temperature TemperatureInfo `json:"temperature"`
+ // AtaSmartErrorLog AtaSmartErrorLog `json:"ata_smart_error_log"`
+ // AtaSmartSelfTestLog AtaSmartSelfTestLog `json:"ata_smart_self_test_log"`
+ // AtaSmartSelectiveSelfTestLog AtaSmartSelectiveSelfTestLog `json:"ata_smart_selective_self_test_log"`
+}
+
+// type AtaSmartErrorLog struct {
+// Summary SummaryInfo `json:"summary"`
+// }
+
+// type AtaSmartSelfTestLog struct {
+// Standard SummaryInfo `json:"standard"`
+// }
+
+type SmartctlInfoNvme struct {
+ Version VersionInfo `json:"version"`
+ SVNRevision string `json:"svn_revision"`
+ PlatformInfo string `json:"platform_info"`
+ BuildInfo string `json:"build_info"`
+ Argv []string `json:"argv"`
+ ExitStatus int `json:"exit_status"`
+}
+
+// type NVMePCIVendor struct {
+// ID int `json:"id"`
+// SubsystemID int `json:"subsystem_id"`
+// }
+
+// type SizeCapacityInfo struct {
+// Blocks uint64 `json:"blocks"`
+// Bytes uint64 `json:"bytes"`
+// }
+
+// type EUI64Info struct {
+// OUI int `json:"oui"`
+// ExtID int `json:"ext_id"`
+// }
+
+// type NVMeNamespace struct {
+// ID uint32 `json:"id"`
+// Size SizeCapacityInfo `json:"size"`
+// Capacity SizeCapacityInfo `json:"capacity"`
+// Utilization SizeCapacityInfo `json:"utilization"`
+// FormattedLBASize uint32 `json:"formatted_lba_size"`
+// EUI64 EUI64Info `json:"eui64"`
+// }
+
+type SmartStatusInfoNvme struct {
+ Passed bool `json:"passed"`
+ NVMe SmartStatusNVMe `json:"nvme"`
+}
+
+type SmartStatusNVMe struct {
+ Value int `json:"value"`
+}
+
+type NVMeSmartHealthInformationLog struct {
+ CriticalWarning uint `json:"critical_warning"`
+ Temperature uint8 `json:"temperature"`
+ AvailableSpare uint `json:"available_spare"`
+ AvailableSpareThreshold uint `json:"available_spare_threshold"`
+ PercentageUsed uint8 `json:"percentage_used"`
+ DataUnitsRead uint64 `json:"data_units_read"`
+ DataUnitsWritten uint64 `json:"data_units_written"`
+ HostReads uint `json:"host_reads"`
+ HostWrites uint `json:"host_writes"`
+ ControllerBusyTime uint `json:"controller_busy_time"`
+ PowerCycles uint16 `json:"power_cycles"`
+ PowerOnHours uint32 `json:"power_on_hours"`
+ UnsafeShutdowns uint16 `json:"unsafe_shutdowns"`
+ MediaErrors uint `json:"media_errors"`
+ NumErrLogEntries uint `json:"num_err_log_entries"`
+ WarningTempTime uint `json:"warning_temp_time"`
+ CriticalCompTime uint `json:"critical_comp_time"`
+ TemperatureSensors []uint8 `json:"temperature_sensors"`
+}
+
+type SmartInfoForNvme struct {
+ // JSONFormatVersion VersionInfo `json:"json_format_version"`
+ Smartctl SmartctlInfoNvme `json:"smartctl"`
+ Device DeviceInfo `json:"device"`
+ ModelName string `json:"model_name"`
+ SerialNumber string `json:"serial_number"`
+ FirmwareVersion string `json:"firmware_version"`
+ // NVMePCIVendor NVMePCIVendor `json:"nvme_pci_vendor"`
+ // NVMeIEEEOUIIdentifier uint32 `json:"nvme_ieee_oui_identifier"`
+ // NVMeTotalCapacity uint64 `json:"nvme_total_capacity"`
+ // NVMeUnallocatedCapacity uint64 `json:"nvme_unallocated_capacity"`
+ // NVMeControllerID uint16 `json:"nvme_controller_id"`
+ // NVMeVersion VersionStringInfo `json:"nvme_version"`
+ // NVMeNumberOfNamespaces uint8 `json:"nvme_number_of_namespaces"`
+ // NVMeNamespaces []NVMeNamespace `json:"nvme_namespaces"`
+ UserCapacity UserCapacity `json:"user_capacity"`
+ // LogicalBlockSize int `json:"logical_block_size"`
+ // LocalTime LocalTime `json:"local_time"`
+ SmartStatus SmartStatusInfoNvme `json:"smart_status"`
+ NVMeSmartHealthInformationLog NVMeSmartHealthInformationLog `json:"nvme_smart_health_information_log"`
+ Temperature TemperatureInfoNvme `json:"temperature"`
+ PowerCycleCount uint16 `json:"power_cycle_count"`
+ PowerOnTime PowerOnTimeInfoNvme `json:"power_on_time"`
+}
+
+type TemperatureInfoNvme struct {
+ Current int `json:"current"`
+}
+
+type PowerOnTimeInfoNvme struct {
+ Hours int `json:"hours"`
+}
+
+type SmartData struct {
+ // ModelFamily string `json:"mf,omitempty" cbor:"0,keyasint,omitempty"`
+ ModelName string `json:"mn,omitempty" cbor:"1,keyasint,omitempty"`
+ SerialNumber string `json:"sn,omitempty" cbor:"2,keyasint,omitempty"`
+ FirmwareVersion string `json:"fv,omitempty" cbor:"3,keyasint,omitempty"`
+ Capacity uint64 `json:"c,omitempty" cbor:"4,keyasint,omitempty"`
+ SmartStatus string `json:"s,omitempty" cbor:"5,keyasint,omitempty"`
+ DiskName string `json:"dn,omitempty" cbor:"6,keyasint,omitempty"`
+ DiskType string `json:"dt,omitempty" cbor:"7,keyasint,omitempty"`
+ Temperature uint8 `json:"t,omitempty" cbor:"8,keyasint,omitempty"`
+ Attributes []*SmartAttribute `json:"a,omitempty" cbor:"9,keyasint,omitempty"`
+}
+
+type SmartAttribute struct {
+ ID uint16 `json:"id,omitempty" cbor:"0,keyasint,omitempty"`
+ Name string `json:"n" cbor:"1,keyasint"`
+ Value uint16 `json:"v,omitempty" cbor:"2,keyasint,omitempty"`
+ Worst uint16 `json:"w,omitempty" cbor:"3,keyasint,omitempty"`
+ Threshold uint16 `json:"t,omitempty" cbor:"4,keyasint,omitempty"`
+ RawValue uint64 `json:"rv" cbor:"5,keyasint"`
+ RawString string `json:"rs,omitempty" cbor:"6,keyasint,omitempty"`
+ WhenFailed string `json:"wf,omitempty" cbor:"7,keyasint,omitempty"`
+}
diff --git a/internal/hub/hub.go b/internal/hub/hub.go
index 2360e79e..8156c38c 100644
--- a/internal/hub/hub.go
+++ b/internal/hub/hub.go
@@ -256,6 +256,8 @@ func (h *Hub) registerApiRoutes(se *core.ServeEvent) error {
apiAuth.GET("/containers/logs", h.getContainerLogs)
// get container info
apiAuth.GET("/containers/info", h.getContainerInfo)
+ // get SMART data
+ apiAuth.GET("/smart", h.getSmartData)
return nil
}
@@ -321,6 +323,24 @@ func (h *Hub) getContainerInfo(e *core.RequestEvent) error {
}, "info")
}
+// getSmartData handles GET /api/beszel/smart requests
+func (h *Hub) getSmartData(e *core.RequestEvent) error {
+ systemID := e.Request.URL.Query().Get("system")
+ if systemID == "" {
+ return e.JSON(http.StatusBadRequest, map[string]string{"error": "system parameter is required"})
+ }
+ system, err := h.sm.GetSystem(systemID)
+ if err != nil {
+ return e.JSON(http.StatusNotFound, map[string]string{"error": "system not found"})
+ }
+ data, err := system.FetchSmartDataFromAgent()
+ if err != nil {
+ return e.JSON(http.StatusInternalServerError, map[string]string{"error": err.Error()})
+ }
+ e.Response.Header().Set("Cache-Control", "public, max-age=60")
+ return e.JSON(http.StatusOK, data)
+}
+
// generates key pair if it doesn't exist and returns signer
func (h *Hub) GetSSHKey(dataDir string) (ssh.Signer, error) {
if h.signer != nil {
diff --git a/internal/hub/systems/system.go b/internal/hub/systems/system.go
index 78615a95..cdba4f92 100644
--- a/internal/hub/systems/system.go
+++ b/internal/hub/systems/system.go
@@ -340,6 +340,45 @@ func (sys *System) FetchContainerLogsFromAgent(containerID string) (string, erro
return sys.fetchStringFromAgentViaSSH(common.GetContainerLogs, common.ContainerLogsRequest{ContainerID: containerID}, "no logs in response")
}
+// FetchSmartDataFromAgent fetches SMART data from the agent
+func (sys *System) FetchSmartDataFromAgent() (map[string]any, error) {
+ // fetch via websocket
+ if sys.WsConn != nil && sys.WsConn.IsConnected() {
+ ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
+ defer cancel()
+ return sys.WsConn.RequestSmartData(ctx)
+ }
+ // fetch via SSH
+ var result map[string]any
+ err := sys.runSSHOperation(5*time.Second, 1, func(session *ssh.Session) (bool, error) {
+ stdout, err := session.StdoutPipe()
+ if err != nil {
+ return false, err
+ }
+ stdin, stdinErr := session.StdinPipe()
+ if stdinErr != nil {
+ return false, stdinErr
+ }
+ if err := session.Shell(); err != nil {
+ return false, err
+ }
+ req := common.HubRequest[any]{Action: common.GetSmartData}
+ _ = cbor.NewEncoder(stdin).Encode(req)
+ _ = stdin.Close()
+ var resp common.AgentResponse
+ if err := cbor.NewDecoder(stdout).Decode(&resp); err != nil {
+ return false, err
+ }
+ // Convert to generic map for JSON response
+ result = make(map[string]any, len(resp.SmartData))
+ for k, v := range resp.SmartData {
+ result[k] = v
+ }
+ return false, nil
+ })
+ return result, err
+}
+
// fetchDataViaSSH handles fetching data using SSH.
// This function encapsulates the original SSH logic.
// It updates sys.data directly upon successful fetch.
diff --git a/internal/hub/ws/handlers.go b/internal/hub/ws/handlers.go
index 627216eb..f879f53a 100644
--- a/internal/hub/ws/handlers.go
+++ b/internal/hub/ws/handlers.go
@@ -115,6 +115,46 @@ func (ws *WsConn) RequestContainerInfo(ctx context.Context, containerID string)
////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////
+// RequestSmartData requests SMART data via WebSocket.
+func (ws *WsConn) RequestSmartData(ctx context.Context) (map[string]any, error) {
+ if !ws.IsConnected() {
+ return nil, gws.ErrConnClosed
+ }
+ req, err := ws.requestManager.SendRequest(ctx, common.GetSmartData, nil)
+ if err != nil {
+ return nil, err
+ }
+ var result map[string]any
+ handler := ResponseHandler(&smartDataHandler{result: &result})
+ if err := ws.handleAgentRequest(req, handler); err != nil {
+ return nil, err
+ }
+ return result, nil
+}
+
+// smartDataHandler parses SMART data map from AgentResponse
+type smartDataHandler struct {
+ BaseHandler
+ result *map[string]any
+}
+
+func (h *smartDataHandler) Handle(agentResponse common.AgentResponse) error {
+ if agentResponse.SmartData == nil {
+ return errors.New("no SMART data in response")
+ }
+ // convert to map[string]any for transport convenience in hub layer
+ out := make(map[string]any, len(agentResponse.SmartData))
+ for k, v := range agentResponse.SmartData {
+ out[k] = v
+ }
+ *h.result = out
+ return nil
+}
+
+////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////
+
// fingerprintHandler implements ResponseHandler for fingerprint requests
type fingerprintHandler struct {
result *common.FingerprintResponse
diff --git a/internal/site/src/components/routes/system.tsx b/internal/site/src/components/routes/system.tsx
index 4d314ace..7de6a389 100644
--- a/internal/site/src/components/routes/system.tsx
+++ b/internal/site/src/components/routes/system.tsx
@@ -573,6 +573,18 @@ export default memo(function SystemDetail({ id }: { id: string }) {
+
+ {/*