mirror of
https://github.com/henrygd/beszel.git
synced 2025-12-17 02:36:17 +01:00
update logs in smart.go
also change max execution time to 2 sec
This commit is contained in:
@@ -3,6 +3,7 @@ package agent
|
|||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
"os/exec"
|
"os/exec"
|
||||||
"slices"
|
"slices"
|
||||||
@@ -60,7 +61,7 @@ func (sm *SmartManager) Refresh(forceScan bool) error {
|
|||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
if err := sm.CollectSmart(deviceInfo); err != nil {
|
if err := sm.CollectSmart(deviceInfo); err != nil {
|
||||||
slog.Debug("smartctl collect failed, skipping", "device", deviceInfo.Name, "err", err)
|
slog.Debug("smartctl collect failed", "device", deviceInfo.Name, "err", err)
|
||||||
collectErr = err
|
collectErr = err
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -131,18 +132,18 @@ func (sm *SmartManager) GetCurrentData() map[string]smart.SmartData {
|
|||||||
// If scan fails, return error
|
// If scan fails, return error
|
||||||
// If scan succeeds, parse the output and update the SmartDevices slice
|
// If scan succeeds, parse the output and update the SmartDevices slice
|
||||||
func (sm *SmartManager) ScanDevices(force bool) error {
|
func (sm *SmartManager) ScanDevices(force bool) error {
|
||||||
if !force && time.Since(sm.lastScanTime) < 10*time.Minute {
|
if !force && time.Since(sm.lastScanTime) < 30*time.Minute {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
sm.lastScanTime = time.Now()
|
sm.lastScanTime = time.Now()
|
||||||
|
|
||||||
var configuredDevices []*DeviceInfo
|
var configuredDevices []*DeviceInfo
|
||||||
if configuredRaw, ok := GetEnv("SMART_DEVICES"); ok {
|
if configuredRaw, ok := GetEnv("SMART_DEVICES"); ok {
|
||||||
|
slog.Info("SMART_DEVICES", "value", configuredRaw)
|
||||||
config := strings.TrimSpace(configuredRaw)
|
config := strings.TrimSpace(configuredRaw)
|
||||||
if config == "" {
|
if config == "" {
|
||||||
return errNoValidSmartData
|
return errNoValidSmartData
|
||||||
}
|
}
|
||||||
slog.Info("SMART_DEVICES", "config", config)
|
|
||||||
|
|
||||||
parsedDevices, err := sm.parseConfiguredDevices(config)
|
parsedDevices, err := sm.parseConfiguredDevices(config)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@@ -174,8 +175,10 @@ func (sm *SmartManager) ScanDevices(force bool) error {
|
|||||||
|
|
||||||
finalDevices := mergeDeviceLists(scannedDevices, configuredDevices)
|
finalDevices := mergeDeviceLists(scannedDevices, configuredDevices)
|
||||||
sm.updateSmartDevices(finalDevices)
|
sm.updateSmartDevices(finalDevices)
|
||||||
|
|
||||||
if len(finalDevices) == 0 {
|
if len(finalDevices) == 0 {
|
||||||
if scanErr != nil {
|
if scanErr != nil {
|
||||||
|
slog.Debug("smartctl scan failed", "err", scanErr)
|
||||||
return scanErr
|
return scanErr
|
||||||
}
|
}
|
||||||
return errNoValidSmartData
|
return errNoValidSmartData
|
||||||
@@ -197,7 +200,7 @@ func (sm *SmartManager) parseConfiguredDevices(config string) ([]*DeviceInfo, er
|
|||||||
|
|
||||||
name := strings.TrimSpace(parts[0])
|
name := strings.TrimSpace(parts[0])
|
||||||
if name == "" {
|
if name == "" {
|
||||||
return nil, fmt.Errorf("invalid SMART_DEVICES entry %q: device name is required", entry)
|
return nil, fmt.Errorf("invalid SMART_DEVICES entry %q", entry)
|
||||||
}
|
}
|
||||||
|
|
||||||
devType := ""
|
devType := ""
|
||||||
@@ -269,9 +272,12 @@ func (sm *SmartManager) parseSmartOutput(deviceInfo *DeviceInfo, output []byte)
|
|||||||
deviceInfo.Type = parser.Type
|
deviceInfo.Type = parser.Type
|
||||||
}
|
}
|
||||||
return true
|
return true
|
||||||
|
} else {
|
||||||
|
slog.Debug("parser failed", "device", deviceInfo.Name, "parser", parser.Type)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
slog.Debug("parsing failed", "device", deviceInfo.Name)
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -283,10 +289,12 @@ func (sm *SmartManager) parseSmartOutput(deviceInfo *DeviceInfo, output []byte)
|
|||||||
// Uses -n standby to avoid waking up sleeping disks, but bypasses standby mode
|
// Uses -n standby to avoid waking up sleeping disks, but bypasses standby mode
|
||||||
// for initial data collection when no cached data exists
|
// for initial data collection when no cached data exists
|
||||||
func (sm *SmartManager) CollectSmart(deviceInfo *DeviceInfo) error {
|
func (sm *SmartManager) CollectSmart(deviceInfo *DeviceInfo) error {
|
||||||
|
// slog.Info("collecting SMART data", "device", deviceInfo.Name, "type", deviceInfo.Type, "has_existing_data", sm.hasDataForDevice(deviceInfo.Name))
|
||||||
|
|
||||||
// Check if we have any existing data for this device
|
// Check if we have any existing data for this device
|
||||||
hasExistingData := sm.hasDataForDevice(deviceInfo.Name)
|
hasExistingData := sm.hasDataForDevice(deviceInfo.Name)
|
||||||
|
|
||||||
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
|
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
|
||||||
defer cancel()
|
defer cancel()
|
||||||
|
|
||||||
// Try with -n standby first if we have existing data
|
// Try with -n standby first if we have existing data
|
||||||
@@ -298,12 +306,10 @@ func (sm *SmartManager) CollectSmart(deviceInfo *DeviceInfo) error {
|
|||||||
if exitErr, ok := err.(*exec.ExitError); ok && exitErr.ExitCode() == 2 {
|
if exitErr, ok := err.(*exec.ExitError); ok && exitErr.ExitCode() == 2 {
|
||||||
if hasExistingData {
|
if hasExistingData {
|
||||||
// Device is in standby and we have cached data, keep using cache
|
// Device is in standby and we have cached data, keep using cache
|
||||||
slog.Debug("device in standby mode, using cached data", "device", deviceInfo.Name)
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
// No cached data, need to collect initial data by bypassing standby
|
// No cached data, need to collect initial data by bypassing standby
|
||||||
slog.Debug("device in standby but no cached data, collecting initial data", "device", deviceInfo.Name)
|
ctx2, cancel2 := context.WithTimeout(context.Background(), 2*time.Second)
|
||||||
ctx2, cancel2 := context.WithTimeout(context.Background(), 10*time.Second)
|
|
||||||
defer cancel2()
|
defer cancel2()
|
||||||
args = sm.smartctlArgs(deviceInfo, false)
|
args = sm.smartctlArgs(deviceInfo, false)
|
||||||
cmd = exec.CommandContext(ctx2, "smartctl", args...)
|
cmd = exec.CommandContext(ctx2, "smartctl", args...)
|
||||||
@@ -314,10 +320,13 @@ func (sm *SmartManager) CollectSmart(deviceInfo *DeviceInfo) error {
|
|||||||
|
|
||||||
if !hasValidData {
|
if !hasValidData {
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
slog.Debug("smartctl failed", "device", deviceInfo.Name, "err", err)
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
slog.Debug("no valid SMART data found", "device", deviceInfo.Name)
|
||||||
return errNoValidSmartData
|
return errNoValidSmartData
|
||||||
}
|
}
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -362,17 +371,17 @@ func (sm *SmartManager) parseScan(output []byte) ([]*DeviceInfo, bool) {
|
|||||||
scan := &scanOutput{}
|
scan := &scanOutput{}
|
||||||
|
|
||||||
if err := json.Unmarshal(output, scan); err != nil {
|
if err := json.Unmarshal(output, scan); err != nil {
|
||||||
slog.Debug("Failed to parse smartctl scan JSON", "err", err)
|
|
||||||
return nil, false
|
return nil, false
|
||||||
}
|
}
|
||||||
|
|
||||||
if len(scan.Devices) == 0 {
|
if len(scan.Devices) == 0 {
|
||||||
|
slog.Debug("no devices found in smartctl scan")
|
||||||
return nil, false
|
return nil, false
|
||||||
}
|
}
|
||||||
|
|
||||||
devices := make([]*DeviceInfo, 0, len(scan.Devices))
|
devices := make([]*DeviceInfo, 0, len(scan.Devices))
|
||||||
for _, device := range scan.Devices {
|
for _, device := range scan.Devices {
|
||||||
// slog.Info("found device during scan", "name", device.Name, "type", device.Type, "protocol", device.Protocol)
|
slog.Debug("smartctl scan", "name", device.Name, "type", device.Type, "protocol", device.Protocol)
|
||||||
devices = append(devices, &DeviceInfo{
|
devices = append(devices, &DeviceInfo{
|
||||||
Name: device.Name,
|
Name: device.Name,
|
||||||
Type: device.Type,
|
Type: device.Type,
|
||||||
@@ -452,7 +461,7 @@ func (sm *SmartManager) updateSmartDevices(devices []*DeviceInfo) {
|
|||||||
for key, data := range sm.SmartDataMap {
|
for key, data := range sm.SmartDataMap {
|
||||||
if data == nil {
|
if data == nil {
|
||||||
delete(sm.SmartDataMap, key)
|
delete(sm.SmartDataMap, key)
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
if _, ok := validNames[data.DiskName]; ok {
|
if _, ok := validNames[data.DiskName]; ok {
|
||||||
@@ -496,13 +505,13 @@ func (sm *SmartManager) parseSmartForSata(output []byte) (bool, int) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if data.SerialNumber == "" {
|
if data.SerialNumber == "" {
|
||||||
slog.Debug("device has no serial number, skipping", "device", data.Device.Name)
|
slog.Debug("no serial number", "device", data.Device.Name)
|
||||||
return false, data.Smartctl.ExitStatus
|
return false, data.Smartctl.ExitStatus
|
||||||
}
|
}
|
||||||
|
|
||||||
// Skip virtual devices (e.g., Kubernetes PVCs, QEMU, VirtualBox, etc.)
|
// Skip virtual devices (e.g., Kubernetes PVCs, QEMU, VirtualBox, etc.)
|
||||||
if sm.isVirtualDevice(&data) {
|
if sm.isVirtualDevice(&data) {
|
||||||
slog.Debug("skipping virtual device", "device", data.Device.Name, "model", data.ModelName)
|
slog.Debug("skipping smart", "device", data.Device.Name, "model", data.ModelName)
|
||||||
return false, data.Smartctl.ExitStatus
|
return false, data.Smartctl.ExitStatus
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -566,7 +575,7 @@ func (sm *SmartManager) parseSmartForScsi(output []byte) (bool, int) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if data.SerialNumber == "" {
|
if data.SerialNumber == "" {
|
||||||
slog.Debug("scsi device has no serial number, skipping", "device", data.Device.Name)
|
slog.Debug("no serial number", "device", data.Device.Name)
|
||||||
return false, data.Smartctl.ExitStatus
|
return false, data.Smartctl.ExitStatus
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -633,7 +642,6 @@ func parseScsiGigabytesProcessed(value string) int64 {
|
|||||||
normalized := strings.ReplaceAll(value, ",", "")
|
normalized := strings.ReplaceAll(value, ",", "")
|
||||||
parsed, err := strconv.ParseInt(normalized, 10, 64)
|
parsed, err := strconv.ParseInt(normalized, 10, 64)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
slog.Debug("failed to parse SCSI gigabytes processed", "value", value, "err", err)
|
|
||||||
return -1
|
return -1
|
||||||
}
|
}
|
||||||
return parsed
|
return parsed
|
||||||
@@ -649,7 +657,7 @@ func (sm *SmartManager) parseSmartForNvme(output []byte) (bool, int) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if data.SerialNumber == "" {
|
if data.SerialNumber == "" {
|
||||||
slog.Debug("device has no serial number, skipping", "device", data.Device.Name)
|
slog.Debug("no serial number", "device", data.Device.Name)
|
||||||
return false, data.Smartctl.ExitStatus
|
return false, data.Smartctl.ExitStatus
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -705,9 +713,11 @@ func (sm *SmartManager) parseSmartForNvme(output []byte) (bool, int) {
|
|||||||
// detectSmartctl checks if smartctl is installed, returns an error if not
|
// detectSmartctl checks if smartctl is installed, returns an error if not
|
||||||
func (sm *SmartManager) detectSmartctl() error {
|
func (sm *SmartManager) detectSmartctl() error {
|
||||||
if _, err := exec.LookPath("smartctl"); err == nil {
|
if _, err := exec.LookPath("smartctl"); err == nil {
|
||||||
|
slog.Debug("smartctl found")
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
return fmt.Errorf("smartctl not found")
|
slog.Debug("smartctl not found")
|
||||||
|
return errors.New("smartctl not found")
|
||||||
}
|
}
|
||||||
|
|
||||||
// NewSmartManager creates and initializes a new SmartManager
|
// NewSmartManager creates and initializes a new SmartManager
|
||||||
|
|||||||
Reference in New Issue
Block a user