Probable fix for Jetson gpu issue (#895 )

add GITHUB_TOKEN fallback for goreleaser (#925 )
adding the fallback to the GITHUB_TOKEN allows execution of goreleaser in a fork without additional configuration
2026-04-05 12:31:49 +02:00 · 2025-06-26 22:11:48 -04:00 · 2025-06-26 21:03:19 -04:00 · 2025-06-25 13:52:45 -04:00 · 2025-06-23 19:50:11 -04:00 · 2025-06-17 16:08:32 -04:00
7 changed files with 250 additions and 56 deletions
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -39,4 +39,4 @@ jobs:
          version: latest
          args: release --clean
        env:
-          GITHUB_TOKEN: ${{ secrets.TOKEN }}
+          GITHUB_TOKEN: ${{ secrets.TOKEN || secrets.GITHUB_TOKEN }}
--- a/beszel/internal/agent/gpu.go
+++ b/beszel/internal/agent/gpu.go
@@ -243,21 +243,26 @@ func (gm *GPUManager) GetCurrentData() map[string]system.GPUData {
 	// copy / reset the data
 	gpuData := make(map[string]system.GPUData, len(gm.GpuDataMap))
 	for id, gpu := range gm.GpuDataMap {
-		// sum the data
+		var gpuAvg system.GPUData
-		gpu.Temperature = twoDecimals(gpu.Temperature)
+
-		gpu.MemoryUsed = twoDecimals(gpu.MemoryUsed)
+		gpuAvg.Temperature = twoDecimals(gpu.Temperature)
-		gpu.MemoryTotal = twoDecimals(gpu.MemoryTotal)
+		gpuAvg.MemoryUsed = twoDecimals(gpu.MemoryUsed)
-		gpu.Usage = twoDecimals(gpu.Usage / gpu.Count)
+		gpuAvg.MemoryTotal = twoDecimals(gpu.MemoryTotal)
-		gpu.Power = twoDecimals(gpu.Power / gpu.Count)
+
-		// reset the count
+		// avoid division by zero
-		gpu.Count = 1
+		if gpu.Count > 0 {
-		// dereference to avoid overwriting anything else
+			gpuAvg.Usage = twoDecimals(gpu.Usage / gpu.Count)
-		gpuCopy := *gpu
+			gpuAvg.Power = twoDecimals(gpu.Power / gpu.Count)
 		}
 		// reset accumulators in the original
 		gpu.Usage, gpu.Power, gpu.Count = 0, 0, 0
 		// append id to the name if there are multiple GPUs with the same name
 		if nameCounts[gpu.Name] > 1 {
-			gpuCopy.Name = fmt.Sprintf("%s %s", gpu.Name, id)
+			gpuAvg.Name = fmt.Sprintf("%s %s", gpu.Name, id)
 		}
-		gpuData[id] = gpuCopy
+		gpuData[id] = gpuAvg
 	}
 	slog.Debug("GPU", "data", gpuData)
 	return gpuData
--- a/beszel/internal/agent/gpu_test.go
+++ b/beszel/internal/agent/gpu_test.go
@@ -279,6 +279,19 @@ func TestParseJetsonData(t *testing.T) {
 				Count:       1,
 			},
 		},
 		{
 			name:  "orin nano",
 			input: "06-18-2025 11:25:24 RAM 3452/7620MB (lfb 25x4MB) SWAP 1518/16384MB (cached 174MB) CPU [1%@1420,2%@1420,0%@1420,2%@1420,2%@729,1%@729] GR3D_FREQ 0% cpu@50.031C soc2@49.031C soc0@50C gpu@49.031C tj@50.25C soc1@50.25C VDD_IN 4824mW/4824mW VDD_CPU_GPU_CV 518mW/518mW VDD_SOC 1475mW/1475mW",
 			wantMetrics: &system.GPUData{
 				Name:        "GPU",
 				MemoryUsed:  3452.0,
 				MemoryTotal: 7620.0,
 				Usage:       0.0,
 				Temperature: 50.25,
 				Power:       0.518,
 				Count:       1,
 			},
 		},
 		{
 			name:  "missing temperature",
 			input: "11-14-2024 22:54:33 RAM 4300/30698MB GR3D_FREQ 45% VDD_GPU_SOC 2171mW",
@@ -318,44 +331,75 @@ func TestParseJetsonData(t *testing.T) {
 }
 func TestGetCurrentData(t *testing.T) {
-	gm := &GPUManager{
+	t.Run("calculates averages and resets accumulators", func(t *testing.T) {
-		GpuDataMap: map[string]*system.GPUData{
+		gm := &GPUManager{
-			"0": {
+			GpuDataMap: map[string]*system.GPUData{
-				Name:        "GPU1",
+				"0": {
-				Temperature: 50,
+					Name:        "GPU1",
-				MemoryUsed:  2048,
+					Temperature: 50,
-				MemoryTotal: 4096,
+					MemoryUsed:  2048,
-				Usage:       100, // 100 over 2 counts = 50 avg
+					MemoryTotal: 4096,
-				Power:       200, // 200 over 2 counts = 100 avg
+					Usage:       100, // 100 over 2 counts = 50 avg
-				Count:       2,
+					Power:       200, // 200 over 2 counts = 100 avg
 					Count:       2,
 				},
 				"1": {
 					Name:        "GPU1",
 					Temperature: 60,
 					MemoryUsed:  3072,
 					MemoryTotal: 8192,
 					Usage:       30,
 					Power:       60,
 					Count:       1,
 				},
 			},
-			"1": {
+		}
-				Name:        "GPU1",
+
-				Temperature: 60,
+		result := gm.GetCurrentData()
-				MemoryUsed:  3072,
+
-				MemoryTotal: 8192,
+		// Verify name disambiguation
-				Usage:       30,
+		assert.Equal(t, "GPU1 0", result["0"].Name)
-				Power:       60,
+		assert.Equal(t, "GPU1 1", result["1"].Name)
-				Count:       1,
+
 		// Check averaged values in the result
 		assert.InDelta(t, 50.0, result["0"].Usage, 0.01)
 		assert.InDelta(t, 100.0, result["0"].Power, 0.01)
 		assert.InDelta(t, 30.0, result["1"].Usage, 0.01)
 		assert.InDelta(t, 60.0, result["1"].Power, 0.01)
 		// Verify that accumulators in the original map are reset
 		assert.Equal(t, float64(0), gm.GpuDataMap["0"].Count, "GPU 0 Count should be reset")
 		assert.Equal(t, float64(0), gm.GpuDataMap["0"].Usage, "GPU 0 Usage should be reset")
 		assert.Equal(t, float64(0), gm.GpuDataMap["0"].Power, "GPU 0 Power should be reset")
 		assert.Equal(t, float64(0), gm.GpuDataMap["1"].Count, "GPU 1 Count should be reset")
 		assert.Equal(t, float64(0), gm.GpuDataMap["1"].Usage, "GPU 1 Usage should be reset")
 		assert.Equal(t, float64(0), gm.GpuDataMap["1"].Power, "GPU 1 Power should be reset")
 	})
 	t.Run("handles zero count without panicking", func(t *testing.T) {
 		gm := &GPUManager{
 			GpuDataMap: map[string]*system.GPUData{
 				"0": {
 					Name:  "TestGPU",
 					Count: 0,
 					Usage: 0,
 					Power: 0,
 				},
 			},
-		},
+		}
 	}
-	result := gm.GetCurrentData()
+		var result map[string]system.GPUData
 		assert.NotPanics(t, func() {
 			result = gm.GetCurrentData()
 		})
-	// Verify name disambiguation
+		// Check that usage and power are 0
-	assert.Equal(t, "GPU1 0", result["0"].Name)
+		assert.Equal(t, 0.0, result["0"].Usage)
-	assert.Equal(t, "GPU1 1", result["1"].Name)
+		assert.Equal(t, 0.0, result["0"].Power)
-	// Check averaged values
+		// Verify reset count
-	assert.InDelta(t, 50.0, result["0"].Usage, 0.01)
+		assert.Equal(t, float64(0), gm.GpuDataMap["0"].Count)
-	assert.InDelta(t, 100.0, result["0"].Power, 0.01)
+	})
 	assert.InDelta(t, 30.0, result["1"].Usage, 0.01)
 	assert.InDelta(t, 60.0, result["1"].Power, 0.01)
 	// Verify reset counts
 	assert.Equal(t, float64(1), gm.GpuDataMap["0"].Count)
 	assert.Equal(t, float64(1), gm.GpuDataMap["1"].Count)
 }
 func TestDetectGPUs(t *testing.T) {
@@ -722,6 +766,18 @@ func TestAccumulation(t *testing.T) {
 				assert.InDelta(t, expected.avgUsage, gpu.Usage, 0.01, "Average usage in GetCurrentData should match")
 				assert.InDelta(t, expected.avgPower, gpu.Power, 0.01, "Average power in GetCurrentData should match")
 			}
 			// Verify that accumulators in the original map are reset
 			for id := range tt.expectedValues {
 				gpu, exists := gm.GpuDataMap[id]
 				assert.True(t, exists, "GPU with ID %s should still exist after GetCurrentData", id)
 				if !exists {
 					continue
 				}
 				assert.Equal(t, float64(0), gpu.Count, "Count should be reset for GPU ID %s", id)
 				assert.Equal(t, float64(0), gpu.Usage, "Usage should be reset for GPU ID %s", id)
 				assert.Equal(t, float64(0), gpu.Power, "Power should be reset for GPU ID %s", id)
 			}
 		})
 	}
 }
--- a/beszel/internal/agent/sensors.go
+++ b/beszel/internal/agent/sensors.go
@@ -3,6 +3,7 @@ package agent
 import (
 	"beszel/internal/entities/system"
 	"context"
 	"fmt"
 	"log/slog"
 	"path"
 	"strconv"
@@ -30,6 +31,9 @@ func (a *Agent) newSensorConfig() *SensorConfig {
 	return a.newSensorConfigWithEnv(primarySensor, sysSensors, sensorsEnvVal, skipCollection)
 }
 // Matches sensors.TemperaturesWithContext to allow for panic recovery (gopsutil/issues/1832)
 type getTempsFn func(ctx context.Context) ([]sensors.TemperatureStat, error)
 // newSensorConfigWithEnv creates a SensorConfig with the provided environment variables
 // sensorsSet indicates if the SENSORS environment variable was explicitly set (even to empty string)
 func (a *Agent) newSensorConfigWithEnv(primarySensor, sysSensors, sensorsEnvVal string, skipCollection bool) *SensorConfig {
@@ -78,8 +82,18 @@ func (a *Agent) updateTemperatures(systemStats *system.Stats) {
 	// reset high temp
 	a.systemInfo.DashboardTemp = 0
-	// get sensor data
+	temps, err := a.getTempsWithPanicRecovery(sensors.TemperaturesWithContext)
-	temps, _ := sensors.TemperaturesWithContext(a.sensorConfig.context)
+	if err != nil {
 		// retry once on panic (gopsutil/issues/1832)
 		temps, err = a.getTempsWithPanicRecovery(sensors.TemperaturesWithContext)
 		if err != nil {
 			slog.Warn("Error updating temperatures", "err", err)
 			if len(systemStats.Temperatures) > 0 {
 				systemStats.Temperatures = make(map[string]float64)
 			}
 			return
 		}
 	}
 	slog.Debug("Temperature", "sensors", temps)
 	// return if no sensors
@@ -107,15 +121,28 @@ func (a *Agent) updateTemperatures(systemStats *system.Stats) {
 			continue
 		}
 		// set dashboard temperature
-		if a.sensorConfig.primarySensor == "" {
+		switch a.sensorConfig.primarySensor {
 		case "":
 			a.systemInfo.DashboardTemp = max(a.systemInfo.DashboardTemp, sensor.Temperature)
-		} else if a.sensorConfig.primarySensor == sensorName {
+		case sensorName:
 			a.systemInfo.DashboardTemp = sensor.Temperature
 		}
 		systemStats.Temperatures[sensorName] = twoDecimals(sensor.Temperature)
 	}
 }
 // getTempsWithPanicRecovery wraps sensors.TemperaturesWithContext to recover from panics (gopsutil/issues/1832)
 func (a *Agent) getTempsWithPanicRecovery(getTemps getTempsFn) (temps []sensors.TemperatureStat, err error) {
 	defer func() {
 		if r := recover(); r != nil {
 			err = fmt.Errorf("panic: %v", r)
 		}
 	}()
 	// get sensor data (error ignored intentionally as it may be only with one sensor)
 	temps, _ = getTemps(a.sensorConfig.context)
 	return
 }
 // isValidSensor checks if a sensor is valid based on the sensor name and the sensor config
 func isValidSensor(sensorName string, config *SensorConfig) bool {
 	// if no sensors configured, everything is valid
--- a/beszel/internal/agent/sensors_test.go
+++ b/beszel/internal/agent/sensors_test.go
@@ -4,11 +4,14 @@
 package agent
 import (
 	"beszel/internal/entities/system"
 	"context"
 	"fmt"
 	"os"
 	"testing"
 	"github.com/shirou/gopsutil/v4/common"
 	"github.com/shirou/gopsutil/v4/sensors"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
 )
@@ -454,3 +457,97 @@ func TestScaleTemperatureLogic(t *testing.T) {
 			result, expected)
 	})
 }
 func TestGetTempsWithPanicRecovery(t *testing.T) {
 	agent := &Agent{
 		systemInfo: system.Info{},
 		sensorConfig: &SensorConfig{
 			context: context.Background(),
 		},
 	}
 	tests := []struct {
 		name        string
 		getTempsFn  getTempsFn
 		expectError bool
 		errorMsg    string
 	}{
 		{
 			name: "successful_function_call",
 			getTempsFn: func(ctx context.Context) ([]sensors.TemperatureStat, error) {
 				return []sensors.TemperatureStat{
 					{SensorKey: "test_sensor", Temperature: 45.0},
 				}, nil
 			},
 			expectError: false,
 		},
 		{
 			name: "function_returns_error",
 			getTempsFn: func(ctx context.Context) ([]sensors.TemperatureStat, error) {
 				return []sensors.TemperatureStat{
 					{SensorKey: "test_sensor", Temperature: 45.0},
 				}, fmt.Errorf("sensor error")
 			},
 			expectError: false, // getTempsWithPanicRecovery ignores errors from the function
 		},
 		{
 			name: "function_panics_with_string",
 			getTempsFn: func(ctx context.Context) ([]sensors.TemperatureStat, error) {
 				panic("test panic")
 			},
 			expectError: true,
 			errorMsg:    "panic: test panic",
 		},
 		{
 			name: "function_panics_with_error",
 			getTempsFn: func(ctx context.Context) ([]sensors.TemperatureStat, error) {
 				panic(fmt.Errorf("panic error"))
 			},
 			expectError: true,
 			errorMsg:    "panic:",
 		},
 		{
 			name: "function_panics_with_index_out_of_bounds",
 			getTempsFn: func(ctx context.Context) ([]sensors.TemperatureStat, error) {
 				slice := []int{1, 2, 3}
 				_ = slice[10] // out of bounds panic
 				return nil, nil
 			},
 			expectError: true,
 			errorMsg:    "panic:",
 		},
 		{
 			name: "function_panics_with_any_conversion",
 			getTempsFn: func(ctx context.Context) ([]sensors.TemperatureStat, error) {
 				var i any = "string"
 				_ = i.(int) // type assertion panic
 				return nil, nil
 			},
 			expectError: true,
 			errorMsg:    "panic:",
 		},
 	}
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
 			var temps []sensors.TemperatureStat
 			var err error
 			// The function should not panic, regardless of what the injected function does
 			assert.NotPanics(t, func() {
 				temps, err = agent.getTempsWithPanicRecovery(tt.getTempsFn)
 			}, "getTempsWithPanicRecovery should not panic")
 			if tt.expectError {
 				assert.Error(t, err, "Expected an error to be returned")
 				if tt.errorMsg != "" {
 					assert.Contains(t, err.Error(), tt.errorMsg,
 						"Error message should contain expected text")
 				}
 				assert.Nil(t, temps, "Temps should be nil when panic occurs")
 			} else {
 				assert.NoError(t, err, "Should not return error for successful calls")
 			}
 		})
 	}
 }
--- a/supplemental/scripts/install-agent.ps1
+++ b/supplemental/scripts/install-agent.ps1
@@ -182,7 +182,7 @@ function Install-BeszelAgentWithScoop {
    scoop bucket add beszel https://github.com/henrygd/beszel-scoops | Out-Null
    Write-Host "Installing / updating beszel-agent..."
-    scoop install beszel-agent
+    scoop install beszel-agent | Out-Null
    if (-not (Test-CommandExists "beszel-agent")) {
        throw "Failed to install beszel-agent"
--- a/supplemental/scripts/install-agent.sh
+++ b/supplemental/scripts/install-agent.sh
@@ -316,18 +316,27 @@ fi
 # Create a dedicated user for the service if it doesn't exist
 if is_alpine; then
  if ! id -u beszel >/dev/null 2>&1; then
    echo "Creating a dedicated group for the Beszel Agent service..."
    addgroup beszel
    echo "Creating a dedicated user for the Beszel Agent service..."
-    adduser -S -D -H -s /sbin/nologin beszel
+    adduser -S -D -H -s /sbin/nologin -G beszel beszel
  fi
-  # Add the user to the docker group to allow access to the Docker socket
+  # Add the user to the docker group to allow access to the Docker socket if group docker exists
-  addgroup beszel docker
+  if getent group docker; then
 	  echo "Adding besel to docker group"
 	  usermod -aG docker beszel
  fi
 else
  if ! id -u beszel >/dev/null 2>&1; then
    echo "Creating a dedicated user for the Beszel Agent service..."
    useradd --system --home-dir /nonexistent --shell /bin/false beszel
  fi
-  # Add the user to the docker group to allow access to the Docker socket
+# Add the user to the docker group to allow access to the Docker socket if group docker exists
-  usermod -aG docker beszel
+  if getent group docker; then
 	  echo "Adding besel to docker group"
 	  usermod -aG docker beszel
  fi
 fi
 # Create the directory for the Beszel Agent
Author	SHA1	Message	Date
henrygd	4395520a28	Probable fix for Jetson gpu issue (#895 )	2025-06-26 22:11:48 -04:00
Alexander Mnich	8c52f30a71	add GITHUB_TOKEN fallback for goreleaser (#925 ) adding the fallback to the GITHUB_TOKEN allows execution of goreleaser in a fork without additional configuration	2025-06-26 21:03:19 -04:00
SSU	46316ebffa	fix(install): suppress scoop output to avoid nssm path pollution (#918 ) Suppressed the output of “scoop install beszel-agent” to ensure the NSSM service path contains only the executable location. Closes #915 Co-authored-by: suseol <suseol@geosr.com>	2025-06-25 13:52:45 -04:00
henrygd	0b04f60b6c	Add panic recovery for sensors.TemperaturesWithContext (#796 )	2025-06-23 19:50:11 -04:00
HansAndreManfredson	20b822d072	Fix missing groups #892 (#893 )	2025-06-17 16:08:32 -04:00