mirror of
https://github.com/henrygd/beszel.git
synced 2026-04-14 17:01:51 +02:00
Prevent temperature collection from blocking agent stats (#1839)
This commit is contained in:
@@ -2,12 +2,14 @@ package agent
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
"log/slog"
|
"log/slog"
|
||||||
"path"
|
"path"
|
||||||
"runtime"
|
"runtime"
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
|
"time"
|
||||||
"unicode/utf8"
|
"unicode/utf8"
|
||||||
|
|
||||||
"github.com/henrygd/beszel/agent/utils"
|
"github.com/henrygd/beszel/agent/utils"
|
||||||
@@ -38,6 +40,11 @@ func (a *Agent) newSensorConfig() *SensorConfig {
|
|||||||
// Matches sensors.TemperaturesWithContext to allow for panic recovery (gopsutil/issues/1832)
|
// Matches sensors.TemperaturesWithContext to allow for panic recovery (gopsutil/issues/1832)
|
||||||
type getTempsFn func(ctx context.Context) ([]sensors.TemperatureStat, error)
|
type getTempsFn func(ctx context.Context) ([]sensors.TemperatureStat, error)
|
||||||
|
|
||||||
|
var (
|
||||||
|
errTemperatureFetchTimeout = errors.New("temperature collection timed out")
|
||||||
|
temperatureFetchTimeout = 2 * time.Second
|
||||||
|
)
|
||||||
|
|
||||||
// newSensorConfigWithEnv creates a SensorConfig with the provided environment variables
|
// newSensorConfigWithEnv creates a SensorConfig with the provided environment variables
|
||||||
// sensorsSet indicates if the SENSORS environment variable was explicitly set (even to empty string)
|
// sensorsSet indicates if the SENSORS environment variable was explicitly set (even to empty string)
|
||||||
func (a *Agent) newSensorConfigWithEnv(primarySensor, sysSensors, sensorsEnvVal string, skipCollection bool) *SensorConfig {
|
func (a *Agent) newSensorConfigWithEnv(primarySensor, sysSensors, sensorsEnvVal string, skipCollection bool) *SensorConfig {
|
||||||
@@ -86,10 +93,12 @@ func (a *Agent) updateTemperatures(systemStats *system.Stats) {
|
|||||||
// reset high temp
|
// reset high temp
|
||||||
a.systemInfo.DashboardTemp = 0
|
a.systemInfo.DashboardTemp = 0
|
||||||
|
|
||||||
temps, err := a.getTempsWithPanicRecovery(getSensorTemps)
|
temps, err := a.getTempsWithTimeout(getSensorTemps)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
// retry once on panic (gopsutil/issues/1832)
|
// retry once on panic (gopsutil/issues/1832)
|
||||||
temps, err = a.getTempsWithPanicRecovery(getSensorTemps)
|
if !errors.Is(err, errTemperatureFetchTimeout) {
|
||||||
|
temps, err = a.getTempsWithTimeout(getSensorTemps)
|
||||||
|
}
|
||||||
if err != nil {
|
if err != nil {
|
||||||
slog.Warn("Error updating temperatures", "err", err)
|
slog.Warn("Error updating temperatures", "err", err)
|
||||||
if len(systemStats.Temperatures) > 0 {
|
if len(systemStats.Temperatures) > 0 {
|
||||||
@@ -152,6 +161,26 @@ func (a *Agent) getTempsWithPanicRecovery(getTemps getTempsFn) (temps []sensors.
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (a *Agent) getTempsWithTimeout(getTemps getTempsFn) ([]sensors.TemperatureStat, error) {
|
||||||
|
type result struct {
|
||||||
|
temps []sensors.TemperatureStat
|
||||||
|
err error
|
||||||
|
}
|
||||||
|
|
||||||
|
resultCh := make(chan result, 1)
|
||||||
|
go func() {
|
||||||
|
temps, err := a.getTempsWithPanicRecovery(getTemps)
|
||||||
|
resultCh <- result{temps: temps, err: err}
|
||||||
|
}()
|
||||||
|
|
||||||
|
select {
|
||||||
|
case res := <-resultCh:
|
||||||
|
return res.temps, res.err
|
||||||
|
case <-time.After(temperatureFetchTimeout):
|
||||||
|
return nil, errTemperatureFetchTimeout
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// isValidSensor checks if a sensor is valid based on the sensor name and the sensor config
|
// isValidSensor checks if a sensor is valid based on the sensor name and the sensor config
|
||||||
func isValidSensor(sensorName string, config *SensorConfig) bool {
|
func isValidSensor(sensorName string, config *SensorConfig) bool {
|
||||||
// if no sensors configured, everything is valid
|
// if no sensors configured, everything is valid
|
||||||
|
|||||||
@@ -6,6 +6,7 @@ import (
|
|||||||
"context"
|
"context"
|
||||||
"fmt"
|
"fmt"
|
||||||
"testing"
|
"testing"
|
||||||
|
"time"
|
||||||
|
|
||||||
"github.com/henrygd/beszel/internal/entities/system"
|
"github.com/henrygd/beszel/internal/entities/system"
|
||||||
|
|
||||||
@@ -526,3 +527,66 @@ func TestGetTempsWithPanicRecovery(t *testing.T) {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestGetTempsWithTimeout(t *testing.T) {
|
||||||
|
agent := &Agent{
|
||||||
|
sensorConfig: &SensorConfig{
|
||||||
|
context: context.Background(),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
originalTimeout := temperatureFetchTimeout
|
||||||
|
t.Cleanup(func() {
|
||||||
|
temperatureFetchTimeout = originalTimeout
|
||||||
|
})
|
||||||
|
temperatureFetchTimeout = 10 * time.Millisecond
|
||||||
|
|
||||||
|
t.Run("returns temperatures before timeout", func(t *testing.T) {
|
||||||
|
temps, err := agent.getTempsWithTimeout(func(ctx context.Context) ([]sensors.TemperatureStat, error) {
|
||||||
|
return []sensors.TemperatureStat{{SensorKey: "cpu_temp", Temperature: 42}}, nil
|
||||||
|
})
|
||||||
|
|
||||||
|
require.NoError(t, err)
|
||||||
|
require.Len(t, temps, 1)
|
||||||
|
assert.Equal(t, "cpu_temp", temps[0].SensorKey)
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("returns timeout error when collector hangs", func(t *testing.T) {
|
||||||
|
temps, err := agent.getTempsWithTimeout(func(ctx context.Context) ([]sensors.TemperatureStat, error) {
|
||||||
|
time.Sleep(50 * time.Millisecond)
|
||||||
|
return []sensors.TemperatureStat{{SensorKey: "cpu_temp", Temperature: 42}}, nil
|
||||||
|
})
|
||||||
|
|
||||||
|
assert.Nil(t, temps)
|
||||||
|
assert.ErrorIs(t, err, errTemperatureFetchTimeout)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestUpdateTemperaturesSkipsOnTimeout(t *testing.T) {
|
||||||
|
agent := &Agent{
|
||||||
|
systemInfo: system.Info{DashboardTemp: 99},
|
||||||
|
sensorConfig: &SensorConfig{
|
||||||
|
context: context.Background(),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
originalTimeout := temperatureFetchTimeout
|
||||||
|
t.Cleanup(func() {
|
||||||
|
temperatureFetchTimeout = originalTimeout
|
||||||
|
getSensorTemps = sensors.TemperaturesWithContext
|
||||||
|
})
|
||||||
|
temperatureFetchTimeout = 10 * time.Millisecond
|
||||||
|
getSensorTemps = func(ctx context.Context) ([]sensors.TemperatureStat, error) {
|
||||||
|
time.Sleep(50 * time.Millisecond)
|
||||||
|
return nil, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
stats := &system.Stats{
|
||||||
|
Temperatures: map[string]float64{"stale": 50},
|
||||||
|
}
|
||||||
|
|
||||||
|
agent.updateTemperatures(stats)
|
||||||
|
|
||||||
|
assert.Equal(t, 0.0, agent.systemInfo.DashboardTemp)
|
||||||
|
assert.Equal(t, map[string]float64{}, stats.Temperatures)
|
||||||
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user