mirror of
https://github.com/henrygd/beszel.git
synced 2026-03-30 18:26:18 +02:00
Prevent temperature collection from blocking agent stats (#1839)
This commit is contained in:
@@ -2,12 +2,14 @@ package agent
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"log/slog"
|
||||
"path"
|
||||
"runtime"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
"unicode/utf8"
|
||||
|
||||
"github.com/henrygd/beszel/agent/utils"
|
||||
@@ -38,6 +40,11 @@ func (a *Agent) newSensorConfig() *SensorConfig {
|
||||
// Matches sensors.TemperaturesWithContext to allow for panic recovery (gopsutil/issues/1832)
|
||||
type getTempsFn func(ctx context.Context) ([]sensors.TemperatureStat, error)
|
||||
|
||||
var (
|
||||
errTemperatureFetchTimeout = errors.New("temperature collection timed out")
|
||||
temperatureFetchTimeout = 2 * time.Second
|
||||
)
|
||||
|
||||
// newSensorConfigWithEnv creates a SensorConfig with the provided environment variables
|
||||
// sensorsSet indicates if the SENSORS environment variable was explicitly set (even to empty string)
|
||||
func (a *Agent) newSensorConfigWithEnv(primarySensor, sysSensors, sensorsEnvVal string, skipCollection bool) *SensorConfig {
|
||||
@@ -86,10 +93,12 @@ func (a *Agent) updateTemperatures(systemStats *system.Stats) {
|
||||
// reset high temp
|
||||
a.systemInfo.DashboardTemp = 0
|
||||
|
||||
temps, err := a.getTempsWithPanicRecovery(getSensorTemps)
|
||||
temps, err := a.getTempsWithTimeout(getSensorTemps)
|
||||
if err != nil {
|
||||
// retry once on panic (gopsutil/issues/1832)
|
||||
temps, err = a.getTempsWithPanicRecovery(getSensorTemps)
|
||||
if !errors.Is(err, errTemperatureFetchTimeout) {
|
||||
temps, err = a.getTempsWithTimeout(getSensorTemps)
|
||||
}
|
||||
if err != nil {
|
||||
slog.Warn("Error updating temperatures", "err", err)
|
||||
if len(systemStats.Temperatures) > 0 {
|
||||
@@ -152,6 +161,26 @@ func (a *Agent) getTempsWithPanicRecovery(getTemps getTempsFn) (temps []sensors.
|
||||
return
|
||||
}
|
||||
|
||||
func (a *Agent) getTempsWithTimeout(getTemps getTempsFn) ([]sensors.TemperatureStat, error) {
|
||||
type result struct {
|
||||
temps []sensors.TemperatureStat
|
||||
err error
|
||||
}
|
||||
|
||||
resultCh := make(chan result, 1)
|
||||
go func() {
|
||||
temps, err := a.getTempsWithPanicRecovery(getTemps)
|
||||
resultCh <- result{temps: temps, err: err}
|
||||
}()
|
||||
|
||||
select {
|
||||
case res := <-resultCh:
|
||||
return res.temps, res.err
|
||||
case <-time.After(temperatureFetchTimeout):
|
||||
return nil, errTemperatureFetchTimeout
|
||||
}
|
||||
}
|
||||
|
||||
// isValidSensor checks if a sensor is valid based on the sensor name and the sensor config
|
||||
func isValidSensor(sensorName string, config *SensorConfig) bool {
|
||||
// if no sensors configured, everything is valid
|
||||
|
||||
@@ -6,6 +6,7 @@ import (
|
||||
"context"
|
||||
"fmt"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/henrygd/beszel/internal/entities/system"
|
||||
|
||||
@@ -526,3 +527,66 @@ func TestGetTempsWithPanicRecovery(t *testing.T) {
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestGetTempsWithTimeout(t *testing.T) {
|
||||
agent := &Agent{
|
||||
sensorConfig: &SensorConfig{
|
||||
context: context.Background(),
|
||||
},
|
||||
}
|
||||
|
||||
originalTimeout := temperatureFetchTimeout
|
||||
t.Cleanup(func() {
|
||||
temperatureFetchTimeout = originalTimeout
|
||||
})
|
||||
temperatureFetchTimeout = 10 * time.Millisecond
|
||||
|
||||
t.Run("returns temperatures before timeout", func(t *testing.T) {
|
||||
temps, err := agent.getTempsWithTimeout(func(ctx context.Context) ([]sensors.TemperatureStat, error) {
|
||||
return []sensors.TemperatureStat{{SensorKey: "cpu_temp", Temperature: 42}}, nil
|
||||
})
|
||||
|
||||
require.NoError(t, err)
|
||||
require.Len(t, temps, 1)
|
||||
assert.Equal(t, "cpu_temp", temps[0].SensorKey)
|
||||
})
|
||||
|
||||
t.Run("returns timeout error when collector hangs", func(t *testing.T) {
|
||||
temps, err := agent.getTempsWithTimeout(func(ctx context.Context) ([]sensors.TemperatureStat, error) {
|
||||
time.Sleep(50 * time.Millisecond)
|
||||
return []sensors.TemperatureStat{{SensorKey: "cpu_temp", Temperature: 42}}, nil
|
||||
})
|
||||
|
||||
assert.Nil(t, temps)
|
||||
assert.ErrorIs(t, err, errTemperatureFetchTimeout)
|
||||
})
|
||||
}
|
||||
|
||||
func TestUpdateTemperaturesSkipsOnTimeout(t *testing.T) {
|
||||
agent := &Agent{
|
||||
systemInfo: system.Info{DashboardTemp: 99},
|
||||
sensorConfig: &SensorConfig{
|
||||
context: context.Background(),
|
||||
},
|
||||
}
|
||||
|
||||
originalTimeout := temperatureFetchTimeout
|
||||
t.Cleanup(func() {
|
||||
temperatureFetchTimeout = originalTimeout
|
||||
getSensorTemps = sensors.TemperaturesWithContext
|
||||
})
|
||||
temperatureFetchTimeout = 10 * time.Millisecond
|
||||
getSensorTemps = func(ctx context.Context) ([]sensors.TemperatureStat, error) {
|
||||
time.Sleep(50 * time.Millisecond)
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
stats := &system.Stats{
|
||||
Temperatures: map[string]float64{"stale": 50},
|
||||
}
|
||||
|
||||
agent.updateTemperatures(stats)
|
||||
|
||||
assert.Equal(t, 0.0, agent.systemInfo.DashboardTemp)
|
||||
assert.Equal(t, map[string]float64{}, stats.Temperatures)
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user