mirror of
https://github.com/henrygd/beszel.git
synced 2026-03-22 21:46:18 +01:00
Compare commits
5 Commits
614-smart
...
4395520a28
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
4395520a28 | ||
|
|
8c52f30a71 | ||
|
|
46316ebffa | ||
|
|
0b04f60b6c | ||
|
|
20b822d072 |
2
.github/workflows/release.yml
vendored
2
.github/workflows/release.yml
vendored
@@ -39,4 +39,4 @@ jobs:
|
||||
version: latest
|
||||
args: release --clean
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.TOKEN }}
|
||||
GITHUB_TOKEN: ${{ secrets.TOKEN || secrets.GITHUB_TOKEN }}
|
||||
|
||||
@@ -243,21 +243,26 @@ func (gm *GPUManager) GetCurrentData() map[string]system.GPUData {
|
||||
// copy / reset the data
|
||||
gpuData := make(map[string]system.GPUData, len(gm.GpuDataMap))
|
||||
for id, gpu := range gm.GpuDataMap {
|
||||
// sum the data
|
||||
gpu.Temperature = twoDecimals(gpu.Temperature)
|
||||
gpu.MemoryUsed = twoDecimals(gpu.MemoryUsed)
|
||||
gpu.MemoryTotal = twoDecimals(gpu.MemoryTotal)
|
||||
gpu.Usage = twoDecimals(gpu.Usage / gpu.Count)
|
||||
gpu.Power = twoDecimals(gpu.Power / gpu.Count)
|
||||
// reset the count
|
||||
gpu.Count = 1
|
||||
// dereference to avoid overwriting anything else
|
||||
gpuCopy := *gpu
|
||||
var gpuAvg system.GPUData
|
||||
|
||||
gpuAvg.Temperature = twoDecimals(gpu.Temperature)
|
||||
gpuAvg.MemoryUsed = twoDecimals(gpu.MemoryUsed)
|
||||
gpuAvg.MemoryTotal = twoDecimals(gpu.MemoryTotal)
|
||||
|
||||
// avoid division by zero
|
||||
if gpu.Count > 0 {
|
||||
gpuAvg.Usage = twoDecimals(gpu.Usage / gpu.Count)
|
||||
gpuAvg.Power = twoDecimals(gpu.Power / gpu.Count)
|
||||
}
|
||||
|
||||
// reset accumulators in the original
|
||||
gpu.Usage, gpu.Power, gpu.Count = 0, 0, 0
|
||||
|
||||
// append id to the name if there are multiple GPUs with the same name
|
||||
if nameCounts[gpu.Name] > 1 {
|
||||
gpuCopy.Name = fmt.Sprintf("%s %s", gpu.Name, id)
|
||||
gpuAvg.Name = fmt.Sprintf("%s %s", gpu.Name, id)
|
||||
}
|
||||
gpuData[id] = gpuCopy
|
||||
gpuData[id] = gpuAvg
|
||||
}
|
||||
slog.Debug("GPU", "data", gpuData)
|
||||
return gpuData
|
||||
|
||||
@@ -279,6 +279,19 @@ func TestParseJetsonData(t *testing.T) {
|
||||
Count: 1,
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "orin nano",
|
||||
input: "06-18-2025 11:25:24 RAM 3452/7620MB (lfb 25x4MB) SWAP 1518/16384MB (cached 174MB) CPU [1%@1420,2%@1420,0%@1420,2%@1420,2%@729,1%@729] GR3D_FREQ 0% cpu@50.031C soc2@49.031C soc0@50C gpu@49.031C tj@50.25C soc1@50.25C VDD_IN 4824mW/4824mW VDD_CPU_GPU_CV 518mW/518mW VDD_SOC 1475mW/1475mW",
|
||||
wantMetrics: &system.GPUData{
|
||||
Name: "GPU",
|
||||
MemoryUsed: 3452.0,
|
||||
MemoryTotal: 7620.0,
|
||||
Usage: 0.0,
|
||||
Temperature: 50.25,
|
||||
Power: 0.518,
|
||||
Count: 1,
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "missing temperature",
|
||||
input: "11-14-2024 22:54:33 RAM 4300/30698MB GR3D_FREQ 45% VDD_GPU_SOC 2171mW",
|
||||
@@ -318,44 +331,75 @@ func TestParseJetsonData(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestGetCurrentData(t *testing.T) {
|
||||
gm := &GPUManager{
|
||||
GpuDataMap: map[string]*system.GPUData{
|
||||
"0": {
|
||||
Name: "GPU1",
|
||||
Temperature: 50,
|
||||
MemoryUsed: 2048,
|
||||
MemoryTotal: 4096,
|
||||
Usage: 100, // 100 over 2 counts = 50 avg
|
||||
Power: 200, // 200 over 2 counts = 100 avg
|
||||
Count: 2,
|
||||
t.Run("calculates averages and resets accumulators", func(t *testing.T) {
|
||||
gm := &GPUManager{
|
||||
GpuDataMap: map[string]*system.GPUData{
|
||||
"0": {
|
||||
Name: "GPU1",
|
||||
Temperature: 50,
|
||||
MemoryUsed: 2048,
|
||||
MemoryTotal: 4096,
|
||||
Usage: 100, // 100 over 2 counts = 50 avg
|
||||
Power: 200, // 200 over 2 counts = 100 avg
|
||||
Count: 2,
|
||||
},
|
||||
"1": {
|
||||
Name: "GPU1",
|
||||
Temperature: 60,
|
||||
MemoryUsed: 3072,
|
||||
MemoryTotal: 8192,
|
||||
Usage: 30,
|
||||
Power: 60,
|
||||
Count: 1,
|
||||
},
|
||||
},
|
||||
"1": {
|
||||
Name: "GPU1",
|
||||
Temperature: 60,
|
||||
MemoryUsed: 3072,
|
||||
MemoryTotal: 8192,
|
||||
Usage: 30,
|
||||
Power: 60,
|
||||
Count: 1,
|
||||
}
|
||||
|
||||
result := gm.GetCurrentData()
|
||||
|
||||
// Verify name disambiguation
|
||||
assert.Equal(t, "GPU1 0", result["0"].Name)
|
||||
assert.Equal(t, "GPU1 1", result["1"].Name)
|
||||
|
||||
// Check averaged values in the result
|
||||
assert.InDelta(t, 50.0, result["0"].Usage, 0.01)
|
||||
assert.InDelta(t, 100.0, result["0"].Power, 0.01)
|
||||
assert.InDelta(t, 30.0, result["1"].Usage, 0.01)
|
||||
assert.InDelta(t, 60.0, result["1"].Power, 0.01)
|
||||
|
||||
// Verify that accumulators in the original map are reset
|
||||
assert.Equal(t, float64(0), gm.GpuDataMap["0"].Count, "GPU 0 Count should be reset")
|
||||
assert.Equal(t, float64(0), gm.GpuDataMap["0"].Usage, "GPU 0 Usage should be reset")
|
||||
assert.Equal(t, float64(0), gm.GpuDataMap["0"].Power, "GPU 0 Power should be reset")
|
||||
assert.Equal(t, float64(0), gm.GpuDataMap["1"].Count, "GPU 1 Count should be reset")
|
||||
assert.Equal(t, float64(0), gm.GpuDataMap["1"].Usage, "GPU 1 Usage should be reset")
|
||||
assert.Equal(t, float64(0), gm.GpuDataMap["1"].Power, "GPU 1 Power should be reset")
|
||||
})
|
||||
|
||||
t.Run("handles zero count without panicking", func(t *testing.T) {
|
||||
gm := &GPUManager{
|
||||
GpuDataMap: map[string]*system.GPUData{
|
||||
"0": {
|
||||
Name: "TestGPU",
|
||||
Count: 0,
|
||||
Usage: 0,
|
||||
Power: 0,
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
result := gm.GetCurrentData()
|
||||
var result map[string]system.GPUData
|
||||
assert.NotPanics(t, func() {
|
||||
result = gm.GetCurrentData()
|
||||
})
|
||||
|
||||
// Verify name disambiguation
|
||||
assert.Equal(t, "GPU1 0", result["0"].Name)
|
||||
assert.Equal(t, "GPU1 1", result["1"].Name)
|
||||
// Check that usage and power are 0
|
||||
assert.Equal(t, 0.0, result["0"].Usage)
|
||||
assert.Equal(t, 0.0, result["0"].Power)
|
||||
|
||||
// Check averaged values
|
||||
assert.InDelta(t, 50.0, result["0"].Usage, 0.01)
|
||||
assert.InDelta(t, 100.0, result["0"].Power, 0.01)
|
||||
assert.InDelta(t, 30.0, result["1"].Usage, 0.01)
|
||||
assert.InDelta(t, 60.0, result["1"].Power, 0.01)
|
||||
|
||||
// Verify reset counts
|
||||
assert.Equal(t, float64(1), gm.GpuDataMap["0"].Count)
|
||||
assert.Equal(t, float64(1), gm.GpuDataMap["1"].Count)
|
||||
// Verify reset count
|
||||
assert.Equal(t, float64(0), gm.GpuDataMap["0"].Count)
|
||||
})
|
||||
}
|
||||
|
||||
func TestDetectGPUs(t *testing.T) {
|
||||
@@ -722,6 +766,18 @@ func TestAccumulation(t *testing.T) {
|
||||
assert.InDelta(t, expected.avgUsage, gpu.Usage, 0.01, "Average usage in GetCurrentData should match")
|
||||
assert.InDelta(t, expected.avgPower, gpu.Power, 0.01, "Average power in GetCurrentData should match")
|
||||
}
|
||||
|
||||
// Verify that accumulators in the original map are reset
|
||||
for id := range tt.expectedValues {
|
||||
gpu, exists := gm.GpuDataMap[id]
|
||||
assert.True(t, exists, "GPU with ID %s should still exist after GetCurrentData", id)
|
||||
if !exists {
|
||||
continue
|
||||
}
|
||||
assert.Equal(t, float64(0), gpu.Count, "Count should be reset for GPU ID %s", id)
|
||||
assert.Equal(t, float64(0), gpu.Usage, "Usage should be reset for GPU ID %s", id)
|
||||
assert.Equal(t, float64(0), gpu.Power, "Power should be reset for GPU ID %s", id)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@@ -3,6 +3,7 @@ package agent
|
||||
import (
|
||||
"beszel/internal/entities/system"
|
||||
"context"
|
||||
"fmt"
|
||||
"log/slog"
|
||||
"path"
|
||||
"strconv"
|
||||
@@ -30,6 +31,9 @@ func (a *Agent) newSensorConfig() *SensorConfig {
|
||||
return a.newSensorConfigWithEnv(primarySensor, sysSensors, sensorsEnvVal, skipCollection)
|
||||
}
|
||||
|
||||
// Matches sensors.TemperaturesWithContext to allow for panic recovery (gopsutil/issues/1832)
|
||||
type getTempsFn func(ctx context.Context) ([]sensors.TemperatureStat, error)
|
||||
|
||||
// newSensorConfigWithEnv creates a SensorConfig with the provided environment variables
|
||||
// sensorsSet indicates if the SENSORS environment variable was explicitly set (even to empty string)
|
||||
func (a *Agent) newSensorConfigWithEnv(primarySensor, sysSensors, sensorsEnvVal string, skipCollection bool) *SensorConfig {
|
||||
@@ -78,8 +82,18 @@ func (a *Agent) updateTemperatures(systemStats *system.Stats) {
|
||||
// reset high temp
|
||||
a.systemInfo.DashboardTemp = 0
|
||||
|
||||
// get sensor data
|
||||
temps, _ := sensors.TemperaturesWithContext(a.sensorConfig.context)
|
||||
temps, err := a.getTempsWithPanicRecovery(sensors.TemperaturesWithContext)
|
||||
if err != nil {
|
||||
// retry once on panic (gopsutil/issues/1832)
|
||||
temps, err = a.getTempsWithPanicRecovery(sensors.TemperaturesWithContext)
|
||||
if err != nil {
|
||||
slog.Warn("Error updating temperatures", "err", err)
|
||||
if len(systemStats.Temperatures) > 0 {
|
||||
systemStats.Temperatures = make(map[string]float64)
|
||||
}
|
||||
return
|
||||
}
|
||||
}
|
||||
slog.Debug("Temperature", "sensors", temps)
|
||||
|
||||
// return if no sensors
|
||||
@@ -107,15 +121,28 @@ func (a *Agent) updateTemperatures(systemStats *system.Stats) {
|
||||
continue
|
||||
}
|
||||
// set dashboard temperature
|
||||
if a.sensorConfig.primarySensor == "" {
|
||||
switch a.sensorConfig.primarySensor {
|
||||
case "":
|
||||
a.systemInfo.DashboardTemp = max(a.systemInfo.DashboardTemp, sensor.Temperature)
|
||||
} else if a.sensorConfig.primarySensor == sensorName {
|
||||
case sensorName:
|
||||
a.systemInfo.DashboardTemp = sensor.Temperature
|
||||
}
|
||||
systemStats.Temperatures[sensorName] = twoDecimals(sensor.Temperature)
|
||||
}
|
||||
}
|
||||
|
||||
// getTempsWithPanicRecovery wraps sensors.TemperaturesWithContext to recover from panics (gopsutil/issues/1832)
|
||||
func (a *Agent) getTempsWithPanicRecovery(getTemps getTempsFn) (temps []sensors.TemperatureStat, err error) {
|
||||
defer func() {
|
||||
if r := recover(); r != nil {
|
||||
err = fmt.Errorf("panic: %v", r)
|
||||
}
|
||||
}()
|
||||
// get sensor data (error ignored intentionally as it may be only with one sensor)
|
||||
temps, _ = getTemps(a.sensorConfig.context)
|
||||
return
|
||||
}
|
||||
|
||||
// isValidSensor checks if a sensor is valid based on the sensor name and the sensor config
|
||||
func isValidSensor(sensorName string, config *SensorConfig) bool {
|
||||
// if no sensors configured, everything is valid
|
||||
|
||||
@@ -4,11 +4,14 @@
|
||||
package agent
|
||||
|
||||
import (
|
||||
"beszel/internal/entities/system"
|
||||
"context"
|
||||
"fmt"
|
||||
"os"
|
||||
"testing"
|
||||
|
||||
"github.com/shirou/gopsutil/v4/common"
|
||||
"github.com/shirou/gopsutil/v4/sensors"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
@@ -454,3 +457,97 @@ func TestScaleTemperatureLogic(t *testing.T) {
|
||||
result, expected)
|
||||
})
|
||||
}
|
||||
|
||||
func TestGetTempsWithPanicRecovery(t *testing.T) {
|
||||
agent := &Agent{
|
||||
systemInfo: system.Info{},
|
||||
sensorConfig: &SensorConfig{
|
||||
context: context.Background(),
|
||||
},
|
||||
}
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
getTempsFn getTempsFn
|
||||
expectError bool
|
||||
errorMsg string
|
||||
}{
|
||||
{
|
||||
name: "successful_function_call",
|
||||
getTempsFn: func(ctx context.Context) ([]sensors.TemperatureStat, error) {
|
||||
return []sensors.TemperatureStat{
|
||||
{SensorKey: "test_sensor", Temperature: 45.0},
|
||||
}, nil
|
||||
},
|
||||
expectError: false,
|
||||
},
|
||||
{
|
||||
name: "function_returns_error",
|
||||
getTempsFn: func(ctx context.Context) ([]sensors.TemperatureStat, error) {
|
||||
return []sensors.TemperatureStat{
|
||||
{SensorKey: "test_sensor", Temperature: 45.0},
|
||||
}, fmt.Errorf("sensor error")
|
||||
},
|
||||
expectError: false, // getTempsWithPanicRecovery ignores errors from the function
|
||||
},
|
||||
{
|
||||
name: "function_panics_with_string",
|
||||
getTempsFn: func(ctx context.Context) ([]sensors.TemperatureStat, error) {
|
||||
panic("test panic")
|
||||
},
|
||||
expectError: true,
|
||||
errorMsg: "panic: test panic",
|
||||
},
|
||||
{
|
||||
name: "function_panics_with_error",
|
||||
getTempsFn: func(ctx context.Context) ([]sensors.TemperatureStat, error) {
|
||||
panic(fmt.Errorf("panic error"))
|
||||
},
|
||||
expectError: true,
|
||||
errorMsg: "panic:",
|
||||
},
|
||||
{
|
||||
name: "function_panics_with_index_out_of_bounds",
|
||||
getTempsFn: func(ctx context.Context) ([]sensors.TemperatureStat, error) {
|
||||
slice := []int{1, 2, 3}
|
||||
_ = slice[10] // out of bounds panic
|
||||
return nil, nil
|
||||
},
|
||||
expectError: true,
|
||||
errorMsg: "panic:",
|
||||
},
|
||||
{
|
||||
name: "function_panics_with_any_conversion",
|
||||
getTempsFn: func(ctx context.Context) ([]sensors.TemperatureStat, error) {
|
||||
var i any = "string"
|
||||
_ = i.(int) // type assertion panic
|
||||
return nil, nil
|
||||
},
|
||||
expectError: true,
|
||||
errorMsg: "panic:",
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
var temps []sensors.TemperatureStat
|
||||
var err error
|
||||
|
||||
// The function should not panic, regardless of what the injected function does
|
||||
assert.NotPanics(t, func() {
|
||||
temps, err = agent.getTempsWithPanicRecovery(tt.getTempsFn)
|
||||
}, "getTempsWithPanicRecovery should not panic")
|
||||
|
||||
if tt.expectError {
|
||||
assert.Error(t, err, "Expected an error to be returned")
|
||||
if tt.errorMsg != "" {
|
||||
assert.Contains(t, err.Error(), tt.errorMsg,
|
||||
"Error message should contain expected text")
|
||||
}
|
||||
assert.Nil(t, temps, "Temps should be nil when panic occurs")
|
||||
} else {
|
||||
assert.NoError(t, err, "Should not return error for successful calls")
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@@ -182,7 +182,7 @@ function Install-BeszelAgentWithScoop {
|
||||
scoop bucket add beszel https://github.com/henrygd/beszel-scoops | Out-Null
|
||||
|
||||
Write-Host "Installing / updating beszel-agent..."
|
||||
scoop install beszel-agent
|
||||
scoop install beszel-agent | Out-Null
|
||||
|
||||
if (-not (Test-CommandExists "beszel-agent")) {
|
||||
throw "Failed to install beszel-agent"
|
||||
|
||||
@@ -316,18 +316,27 @@ fi
|
||||
# Create a dedicated user for the service if it doesn't exist
|
||||
if is_alpine; then
|
||||
if ! id -u beszel >/dev/null 2>&1; then
|
||||
echo "Creating a dedicated group for the Beszel Agent service..."
|
||||
addgroup beszel
|
||||
echo "Creating a dedicated user for the Beszel Agent service..."
|
||||
adduser -S -D -H -s /sbin/nologin beszel
|
||||
adduser -S -D -H -s /sbin/nologin -G beszel beszel
|
||||
fi
|
||||
# Add the user to the docker group to allow access to the Docker socket
|
||||
addgroup beszel docker
|
||||
# Add the user to the docker group to allow access to the Docker socket if group docker exists
|
||||
if getent group docker; then
|
||||
echo "Adding besel to docker group"
|
||||
usermod -aG docker beszel
|
||||
fi
|
||||
|
||||
else
|
||||
if ! id -u beszel >/dev/null 2>&1; then
|
||||
echo "Creating a dedicated user for the Beszel Agent service..."
|
||||
useradd --system --home-dir /nonexistent --shell /bin/false beszel
|
||||
fi
|
||||
# Add the user to the docker group to allow access to the Docker socket
|
||||
usermod -aG docker beszel
|
||||
# Add the user to the docker group to allow access to the Docker socket if group docker exists
|
||||
if getent group docker; then
|
||||
echo "Adding besel to docker group"
|
||||
usermod -aG docker beszel
|
||||
fi
|
||||
fi
|
||||
|
||||
# Create the directory for the Beszel Agent
|
||||
|
||||
Reference in New Issue
Block a user