fix(hub): add onAfterBootstrapAndMigrations to properly queue fns after migrations

also remove error return from NewHub and improve comments in hub.go
test: update tests that use os.Setenv to t.Setenv
2026-03-22 05:36:15 +01:00 · 2026-03-20 19:32:59 -04:00 · 2026-03-20 15:00:28 -04:00 · 2026-03-20 14:39:05 -04:00 · 2026-03-19 11:36:10 -04:00 · 2026-03-18 17:44:34 -04:00
80 changed files with 5737 additions and 1418 deletions
--- a/.github/workflows/vulncheck.yml
+++ b/.github/workflows/vulncheck.yml
@@ -19,11 +19,11 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - name: Check out code into the Go module directory
-        uses: actions/checkout@v4
+        uses: actions/checkout@v6
      - name: Set up Go
-        uses: actions/setup-go@v5
+        uses: actions/setup-go@v6
        with:
-          go-version: 1.25.x
+          go-version: 1.26.x
          # cached: false
      - name: Get official govulncheck
        run: go install golang.org/x/vuln/cmd/govulncheck@latest
--- a/agent/agent.go
+++ b/agent/agent.go
@@ -6,7 +6,6 @@ package agent

 import (
 	"log/slog"
-	"os"
 	"strings"
 	"sync"
 	"time"
@@ -14,6 +13,7 @@ import (
 	"github.com/gliderlabs/ssh"
 	"github.com/henrygd/beszel"
 	"github.com/henrygd/beszel/agent/deltatracker"
+	"github.com/henrygd/beszel/agent/utils"
 	"github.com/henrygd/beszel/internal/common"
 	"github.com/henrygd/beszel/internal/entities/system"
 	gossh "golang.org/x/crypto/ssh"
@@ -68,11 +68,11 @@ func NewAgent(dataDir ...string) (agent *Agent, err error) {
 		slog.Info("Data directory", "path", agent.dataDir)
 	}

-	agent.memCalc, _ = GetEnv("MEM_CALC")
+	agent.memCalc, _ = utils.GetEnv("MEM_CALC")
 	agent.sensorConfig = agent.newSensorConfig()

 	// Parse disk usage cache duration (e.g., "15m", "1h") to avoid waking sleeping disks
-	if diskUsageCache, exists := GetEnv("DISK_USAGE_CACHE"); exists {
+	if diskUsageCache, exists := utils.GetEnv("DISK_USAGE_CACHE"); exists {
 		if duration, err := time.ParseDuration(diskUsageCache); err == nil {
 			agent.diskUsageCacheDuration = duration
 			slog.Info("DISK_USAGE_CACHE", "duration", duration)
@@ -82,7 +82,7 @@ func NewAgent(dataDir ...string) (agent *Agent, err error) {
 	}

 	// Set up slog with a log level determined by the LOG_LEVEL env var
-	if logLevelStr, exists := GetEnv("LOG_LEVEL"); exists {
+	if logLevelStr, exists := utils.GetEnv("LOG_LEVEL"); exists {
 		switch strings.ToLower(logLevelStr) {
 		case "debug":
 			agent.debug = true
@@ -103,7 +103,7 @@ func NewAgent(dataDir ...string) (agent *Agent, err error) {
 	agent.refreshSystemDetails()

 	// SMART_INTERVAL env var to update smart data at this interval
-	if smartIntervalEnv, exists := GetEnv("SMART_INTERVAL"); exists {
+	if smartIntervalEnv, exists := utils.GetEnv("SMART_INTERVAL"); exists {
 		if duration, err := time.ParseDuration(smartIntervalEnv); err == nil && duration > 0 {
 			agent.systemDetails.SmartInterval = duration
 			slog.Info("SMART_INTERVAL", "duration", duration)
@@ -148,15 +148,6 @@ func NewAgent(dataDir ...string) (agent *Agent, err error) {
 	return agent, nil
 }

-// GetEnv retrieves an environment variable with a "BESZEL_AGENT_" prefix, or falls back to the unprefixed key.
-func GetEnv(key string) (value string, exists bool) {
-	if value, exists = os.LookupEnv("BESZEL_AGENT_" + key); exists {
-		return value, exists
-	}
-	// Fallback to the old unprefixed key
-	return os.LookupEnv(key)
-}
-
 func (a *Agent) gatherStats(options common.DataRequestOptions) *system.CombinedData {
 	a.Lock()
 	defer a.Unlock()
@@ -213,7 +204,7 @@ func (a *Agent) gatherStats(options common.DataRequestOptions) *system.CombinedD
 			data.Stats.ExtraFs[key] = stats
 			// Add percentages to Info struct for dashboard
 			if stats.DiskTotal > 0 {
-				pct := twoDecimals((stats.DiskUsed / stats.DiskTotal) * 100)
+				pct := utils.TwoDecimals((stats.DiskUsed / stats.DiskTotal) * 100)
 				data.Info.ExtraFsPct[key] = pct
 			}
 		}
--- a/agent/client.go
+++ b/agent/client.go
@@ -14,6 +14,7 @@ import (
 	"time"

 	"github.com/henrygd/beszel"
+	"github.com/henrygd/beszel/agent/utils"
 	"github.com/henrygd/beszel/internal/common"

 	"github.com/fxamacker/cbor/v2"
@@ -43,7 +44,7 @@ type WebSocketClient struct {
 // newWebSocketClient creates a new WebSocket client for the given agent.
 // It reads configuration from environment variables and validates the hub URL.
 func newWebSocketClient(agent *Agent) (client *WebSocketClient, err error) {
-	hubURLStr, exists := GetEnv("HUB_URL")
+	hubURLStr, exists := utils.GetEnv("HUB_URL")
 	if !exists {
 		return nil, errors.New("HUB_URL environment variable not set")
 	}
@@ -72,12 +73,12 @@ func newWebSocketClient(agent *Agent) (client *WebSocketClient, err error) {
 // If neither is set, it returns an error.
 func getToken() (string, error) {
 	// get token from env var
-	token, _ := GetEnv("TOKEN")
+	token, _ := utils.GetEnv("TOKEN")
 	if token != "" {
 		return token, nil
 	}
 	// get token from file
-	tokenFile, _ := GetEnv("TOKEN_FILE")
+	tokenFile, _ := utils.GetEnv("TOKEN_FILE")
 	if tokenFile == "" {
 		return "", errors.New("must set TOKEN or TOKEN_FILE")
 	}
@@ -197,7 +198,7 @@ func (client *WebSocketClient) handleAuthChallenge(msg *common.HubRequest[cbor.R
 	}

 	if authRequest.NeedSysInfo {
-		response.Name, _ = GetEnv("SYSTEM_NAME")
+		response.Name, _ = utils.GetEnv("SYSTEM_NAME")
 		response.Hostname = client.agent.systemDetails.Hostname
 		serverAddr := client.agent.connectionManager.serverOptions.Addr
 		_, response.Port, _ = net.SplitHostPort(serverAddr)
--- a/agent/client_test.go
+++ b/agent/client_test.go
@@ -70,19 +70,11 @@ func TestNewWebSocketClient(t *testing.T) {
 		t.Run(tc.name, func(t *testing.T) {
 			// Set up environment
 			if tc.hubURL != "" {
-				os.Setenv("BESZEL_AGENT_HUB_URL", tc.hubURL)
-			} else {
-				os.Unsetenv("BESZEL_AGENT_HUB_URL")
+				t.Setenv("BESZEL_AGENT_HUB_URL", tc.hubURL)
 			}
 			if tc.token != "" {
-				os.Setenv("BESZEL_AGENT_TOKEN", tc.token)
-			} else {
-				os.Unsetenv("BESZEL_AGENT_TOKEN")
+				t.Setenv("BESZEL_AGENT_TOKEN", tc.token)
 			}
-			defer func() {
-				os.Unsetenv("BESZEL_AGENT_HUB_URL")
-				os.Unsetenv("BESZEL_AGENT_TOKEN")
-			}()

 			client, err := newWebSocketClient(agent)

@@ -138,12 +130,8 @@ func TestWebSocketClient_GetOptions(t *testing.T) {
 	for _, tc := range testCases {
 		t.Run(tc.name, func(t *testing.T) {
 			// Set up environment
-			os.Setenv("BESZEL_AGENT_HUB_URL", tc.inputURL)
-			os.Setenv("BESZEL_AGENT_TOKEN", "test-token")
-			defer func() {
-				os.Unsetenv("BESZEL_AGENT_HUB_URL")
-				os.Unsetenv("BESZEL_AGENT_TOKEN")
-			}()
+			t.Setenv("BESZEL_AGENT_HUB_URL", tc.inputURL)
+			t.Setenv("BESZEL_AGENT_TOKEN", "test-token")

 			client, err := newWebSocketClient(agent)
 			require.NoError(t, err)
@@ -185,12 +173,8 @@ func TestWebSocketClient_VerifySignature(t *testing.T) {
 	require.NoError(t, err)

 	// Set up environment
-	os.Setenv("BESZEL_AGENT_HUB_URL", "http://localhost:8080")
-	os.Setenv("BESZEL_AGENT_TOKEN", "test-token")
-	defer func() {
-		os.Unsetenv("BESZEL_AGENT_HUB_URL")
-		os.Unsetenv("BESZEL_AGENT_TOKEN")
-	}()
+	t.Setenv("BESZEL_AGENT_HUB_URL", "http://localhost:8080")
+	t.Setenv("BESZEL_AGENT_TOKEN", "test-token")

 	client, err := newWebSocketClient(agent)
 	require.NoError(t, err)
@@ -258,12 +242,8 @@ func TestWebSocketClient_HandleHubRequest(t *testing.T) {
 	agent := createTestAgent(t)

 	// Set up environment
-	os.Setenv("BESZEL_AGENT_HUB_URL", "http://localhost:8080")
-	os.Setenv("BESZEL_AGENT_TOKEN", "test-token")
-	defer func() {
-		os.Unsetenv("BESZEL_AGENT_HUB_URL")
-		os.Unsetenv("BESZEL_AGENT_TOKEN")
-	}()
+	t.Setenv("BESZEL_AGENT_HUB_URL", "http://localhost:8080")
+	t.Setenv("BESZEL_AGENT_TOKEN", "test-token")

 	client, err := newWebSocketClient(agent)
 	require.NoError(t, err)
@@ -350,13 +330,8 @@ func TestGetUserAgent(t *testing.T) {
 func TestWebSocketClient_Close(t *testing.T) {
 	agent := createTestAgent(t)

-	// Set up environment
-	os.Setenv("BESZEL_AGENT_HUB_URL", "http://localhost:8080")
-	os.Setenv("BESZEL_AGENT_TOKEN", "test-token")
-	defer func() {
-		os.Unsetenv("BESZEL_AGENT_HUB_URL")
-		os.Unsetenv("BESZEL_AGENT_TOKEN")
-	}()
+	t.Setenv("BESZEL_AGENT_HUB_URL", "http://localhost:8080")
+	t.Setenv("BESZEL_AGENT_TOKEN", "test-token")

 	client, err := newWebSocketClient(agent)
 	require.NoError(t, err)
@@ -371,13 +346,8 @@ func TestWebSocketClient_Close(t *testing.T) {
 func TestWebSocketClient_ConnectRateLimit(t *testing.T) {
 	agent := createTestAgent(t)

-	// Set up environment
-	os.Setenv("BESZEL_AGENT_HUB_URL", "http://localhost:8080")
-	os.Setenv("BESZEL_AGENT_TOKEN", "test-token")
-	defer func() {
-		os.Unsetenv("BESZEL_AGENT_HUB_URL")
-		os.Unsetenv("BESZEL_AGENT_TOKEN")
-	}()
+	t.Setenv("BESZEL_AGENT_HUB_URL", "http://localhost:8080")
+	t.Setenv("BESZEL_AGENT_TOKEN", "test-token")

 	client, err := newWebSocketClient(agent)
 	require.NoError(t, err)
@@ -393,20 +363,10 @@ func TestWebSocketClient_ConnectRateLimit(t *testing.T) {

 // TestGetToken tests the getToken function with various scenarios
 func TestGetToken(t *testing.T) {
-	unsetEnvVars := func() {
-		os.Unsetenv("BESZEL_AGENT_TOKEN")
-		os.Unsetenv("TOKEN")
-		os.Unsetenv("BESZEL_AGENT_TOKEN_FILE")
-		os.Unsetenv("TOKEN_FILE")
-	}
-
 	t.Run("token from TOKEN environment variable", func(t *testing.T) {
-		unsetEnvVars()
-
 		// Set TOKEN env var
 		expectedToken := "test-token-from-env"
-		os.Setenv("TOKEN", expectedToken)
-		defer os.Unsetenv("TOKEN")
+		t.Setenv("TOKEN", expectedToken)

 		token, err := getToken()
 		assert.NoError(t, err)
@@ -414,12 +374,9 @@ func TestGetToken(t *testing.T) {
 	})

 	t.Run("token from BESZEL_AGENT_TOKEN environment variable", func(t *testing.T) {
-		unsetEnvVars()
-
 		// Set BESZEL_AGENT_TOKEN env var (should take precedence)
 		expectedToken := "test-token-from-beszel-env"
-		os.Setenv("BESZEL_AGENT_TOKEN", expectedToken)
-		defer os.Unsetenv("BESZEL_AGENT_TOKEN")
+		t.Setenv("BESZEL_AGENT_TOKEN", expectedToken)

 		token, err := getToken()
 		assert.NoError(t, err)
@@ -427,8 +384,6 @@ func TestGetToken(t *testing.T) {
 	})

 	t.Run("token from TOKEN_FILE", func(t *testing.T) {
-		unsetEnvVars()
-
 		// Create a temporary token file
 		expectedToken := "test-token-from-file"
 		tokenFile, err := os.CreateTemp("", "token-test-*.txt")
@@ -440,8 +395,7 @@ func TestGetToken(t *testing.T) {
 		tokenFile.Close()

 		// Set TOKEN_FILE env var
-		os.Setenv("TOKEN_FILE", tokenFile.Name())
-		defer os.Unsetenv("TOKEN_FILE")
+		t.Setenv("TOKEN_FILE", tokenFile.Name())

 		token, err := getToken()
 		assert.NoError(t, err)
@@ -449,8 +403,6 @@ func TestGetToken(t *testing.T) {
 	})

 	t.Run("token from BESZEL_AGENT_TOKEN_FILE", func(t *testing.T) {
-		unsetEnvVars()
-
 		// Create a temporary token file
 		expectedToken := "test-token-from-beszel-file"
 		tokenFile, err := os.CreateTemp("", "token-test-*.txt")
@@ -462,8 +414,7 @@ func TestGetToken(t *testing.T) {
 		tokenFile.Close()

 		// Set BESZEL_AGENT_TOKEN_FILE env var (should take precedence)
-		os.Setenv("BESZEL_AGENT_TOKEN_FILE", tokenFile.Name())
-		defer os.Unsetenv("BESZEL_AGENT_TOKEN_FILE")
+		t.Setenv("BESZEL_AGENT_TOKEN_FILE", tokenFile.Name())

 		token, err := getToken()
 		assert.NoError(t, err)
@@ -471,8 +422,6 @@ func TestGetToken(t *testing.T) {
 	})

 	t.Run("TOKEN takes precedence over TOKEN_FILE", func(t *testing.T) {
-		unsetEnvVars()
-
 		// Create a temporary token file
 		fileToken := "token-from-file"
 		tokenFile, err := os.CreateTemp("", "token-test-*.txt")
@@ -485,12 +434,8 @@ func TestGetToken(t *testing.T) {

 		// Set both TOKEN and TOKEN_FILE
 		envToken := "token-from-env"
-		os.Setenv("TOKEN", envToken)
-		os.Setenv("TOKEN_FILE", tokenFile.Name())
-		defer func() {
-			os.Unsetenv("TOKEN")
-			os.Unsetenv("TOKEN_FILE")
-		}()
+		t.Setenv("TOKEN", envToken)
+		t.Setenv("TOKEN_FILE", tokenFile.Name())

 		token, err := getToken()
 		assert.NoError(t, err)
@@ -498,7 +443,10 @@ func TestGetToken(t *testing.T) {
 	})

 	t.Run("error when neither TOKEN nor TOKEN_FILE is set", func(t *testing.T) {
-		unsetEnvVars()
+		t.Setenv("BESZEL_AGENT_TOKEN", "")
+		t.Setenv("TOKEN", "")
+		t.Setenv("BESZEL_AGENT_TOKEN_FILE", "")
+		t.Setenv("TOKEN_FILE", "")

 		token, err := getToken()
 		assert.Error(t, err)
@@ -507,11 +455,8 @@ func TestGetToken(t *testing.T) {
 	})

 	t.Run("error when TOKEN_FILE points to non-existent file", func(t *testing.T) {
-		unsetEnvVars()
-
 		// Set TOKEN_FILE to a non-existent file
-		os.Setenv("TOKEN_FILE", "/non/existent/file.txt")
-		defer os.Unsetenv("TOKEN_FILE")
+		t.Setenv("TOKEN_FILE", "/non/existent/file.txt")

 		token, err := getToken()
 		assert.Error(t, err)
@@ -520,8 +465,6 @@ func TestGetToken(t *testing.T) {
 	})

 	t.Run("handles empty token file", func(t *testing.T) {
-		unsetEnvVars()
-
 		// Create an empty token file
 		tokenFile, err := os.CreateTemp("", "token-test-*.txt")
 		require.NoError(t, err)
@@ -529,8 +472,7 @@ func TestGetToken(t *testing.T) {
 		tokenFile.Close()

 		// Set TOKEN_FILE env var
-		os.Setenv("TOKEN_FILE", tokenFile.Name())
-		defer os.Unsetenv("TOKEN_FILE")
+		t.Setenv("TOKEN_FILE", tokenFile.Name())

 		token, err := getToken()
 		assert.NoError(t, err)
@@ -538,8 +480,6 @@ func TestGetToken(t *testing.T) {
 	})

 	t.Run("strips whitespace from TOKEN_FILE", func(t *testing.T) {
-		unsetEnvVars()
-
 		tokenWithWhitespace := "  test-token-with-whitespace  \n\t"
 		expectedToken := "test-token-with-whitespace"
 		tokenFile, err := os.CreateTemp("", "token-test-*.txt")
@@ -550,8 +490,7 @@ func TestGetToken(t *testing.T) {
 		require.NoError(t, err)
 		tokenFile.Close()

-		os.Setenv("TOKEN_FILE", tokenFile.Name())
-		defer os.Unsetenv("TOKEN_FILE")
+		t.Setenv("TOKEN_FILE", tokenFile.Name())

 		token, err := getToken()
 		assert.NoError(t, err)
--- a/agent/connection_manager_test.go
+++ b/agent/connection_manager_test.go
@@ -7,7 +7,6 @@ import (
 	"fmt"
 	"net"
 	"net/url"
-	"os"
 	"testing"
 	"time"

@@ -183,10 +182,6 @@ func TestConnectionManager_TickerManagement(t *testing.T) {

 // TestConnectionManager_WebSocketConnectionFlow tests WebSocket connection logic
 func TestConnectionManager_WebSocketConnectionFlow(t *testing.T) {
-	if testing.Short() {
-		t.Skip("Skipping WebSocket connection test in short mode")
-	}
-
 	agent := createTestAgent(t)
 	cm := agent.connectionManager

@@ -196,19 +191,18 @@ func TestConnectionManager_WebSocketConnectionFlow(t *testing.T) {
 	assert.Equal(t, Disconnected, cm.State, "State should remain Disconnected after failed connection")

 	// Test with invalid URL
-	os.Setenv("BESZEL_AGENT_HUB_URL", "invalid-url")
-	os.Setenv("BESZEL_AGENT_TOKEN", "test-token")
-	defer func() {
-		os.Unsetenv("BESZEL_AGENT_HUB_URL")
-		os.Unsetenv("BESZEL_AGENT_TOKEN")
-	}()
-
-	// Test with missing token
-	os.Setenv("BESZEL_AGENT_HUB_URL", "http://localhost:8080")
-	os.Unsetenv("BESZEL_AGENT_TOKEN")
+	t.Setenv("BESZEL_AGENT_HUB_URL", "1,33%")
+	t.Setenv("BESZEL_AGENT_TOKEN", "test-token")

 	_, err2 := newWebSocketClient(agent)
-	assert.Error(t, err2, "WebSocket client creation should fail without token")
+	assert.Error(t, err2, "WebSocket client creation should fail with invalid URL")
+
+	// Test with missing token
+	t.Setenv("BESZEL_AGENT_HUB_URL", "http://localhost:8080")
+	t.Setenv("BESZEL_AGENT_TOKEN", "")
+
+	_, err3 := newWebSocketClient(agent)
+	assert.Error(t, err3, "WebSocket client creation should fail without token")
 }

 // TestConnectionManager_ReconnectionLogic tests reconnection prevention logic
@@ -234,12 +228,8 @@ func TestConnectionManager_ConnectWithRateLimit(t *testing.T) {
 	cm := agent.connectionManager

 	// Set up environment for WebSocket client creation
-	os.Setenv("BESZEL_AGENT_HUB_URL", "ws://localhost:8080")
-	os.Setenv("BESZEL_AGENT_TOKEN", "test-token")
-	defer func() {
-		os.Unsetenv("BESZEL_AGENT_HUB_URL")
-		os.Unsetenv("BESZEL_AGENT_TOKEN")
-	}()
+	t.Setenv("BESZEL_AGENT_HUB_URL", "ws://localhost:8080")
+	t.Setenv("BESZEL_AGENT_TOKEN", "test-token")

 	// Create WebSocket client
 	wsClient, err := newWebSocketClient(agent)
@@ -285,12 +275,8 @@ func TestConnectionManager_CloseWebSocket(t *testing.T) {
 	}, "Should not panic when closing nil WebSocket client")

 	// Set up environment and create WebSocket client
-	os.Setenv("BESZEL_AGENT_HUB_URL", "ws://localhost:8080")
-	os.Setenv("BESZEL_AGENT_TOKEN", "test-token")
-	defer func() {
-		os.Unsetenv("BESZEL_AGENT_HUB_URL")
-		os.Unsetenv("BESZEL_AGENT_TOKEN")
-	}()
+	t.Setenv("BESZEL_AGENT_HUB_URL", "ws://localhost:8080")
+	t.Setenv("BESZEL_AGENT_TOKEN", "test-token")

 	wsClient, err := newWebSocketClient(agent)
 	require.NoError(t, err)
--- a/agent/data_dir.go
+++ b/agent/data_dir.go
@@ -6,6 +6,8 @@ import (
 	"os"
 	"path/filepath"
 	"runtime"
+
+	"github.com/henrygd/beszel/agent/utils"
 )

 // GetDataDir returns the path to the data directory for the agent and an error
@@ -16,7 +18,7 @@ func GetDataDir(dataDirs ...string) (string, error) {
 		return testDataDirs(dataDirs)
 	}

-	dataDir, _ := GetEnv("DATA_DIR")
+	dataDir, _ := utils.GetEnv("DATA_DIR")
 	if dataDir != "" {
 		dataDirs = append(dataDirs, dataDir)
 	}
--- a/agent/data_dir_test.go
+++ b/agent/data_dir_test.go
@@ -39,17 +39,7 @@ func TestGetDataDir(t *testing.T) {
 	t.Run("DATA_DIR environment variable", func(t *testing.T) {
 		tempDir := t.TempDir()

-		// Set environment variable
-		oldValue := os.Getenv("DATA_DIR")
-		defer func() {
-			if oldValue == "" {
-				os.Unsetenv("BESZEL_AGENT_DATA_DIR")
-			} else {
-				os.Setenv("BESZEL_AGENT_DATA_DIR", oldValue)
-			}
-		}()
-
-		os.Setenv("BESZEL_AGENT_DATA_DIR", tempDir)
+		t.Setenv("BESZEL_AGENT_DATA_DIR", tempDir)

 		result, err := GetDataDir()
 		require.NoError(t, err)
@@ -65,17 +55,6 @@ func TestGetDataDir(t *testing.T) {

 	// Test fallback behavior (empty dataDir, no env var)
 	t.Run("fallback to default directories", func(t *testing.T) {
-		// Clear DATA_DIR environment variable
-		oldValue := os.Getenv("DATA_DIR")
-		defer func() {
-			if oldValue == "" {
-				os.Unsetenv("DATA_DIR")
-			} else {
-				os.Setenv("DATA_DIR", oldValue)
-			}
-		}()
-		os.Unsetenv("DATA_DIR")
-
 		// This will try platform-specific defaults, which may or may not work
 		// We're mainly testing that it doesn't panic and returns some result
 		result, err := GetDataDir()
--- a/agent/disk.go
+++ b/agent/disk.go
@@ -8,11 +8,31 @@ import (
 	"strings"
 	"time"

+	"github.com/henrygd/beszel/agent/utils"
 	"github.com/henrygd/beszel/internal/entities/system"

 	"github.com/shirou/gopsutil/v4/disk"
 )

+// fsRegistrationContext holds the shared lookup state needed to resolve a
+// filesystem into the tracked fsStats key and metadata.
+type fsRegistrationContext struct {
+	filesystem     string // value of optional FILESYSTEM env var
+	isWindows      bool
+	efPath         string // path to extra filesystems (default "/extra-filesystems")
+	diskIoCounters map[string]disk.IOCountersStat
+}
+
+// diskDiscovery groups the transient state for a single initializeDiskInfo run so
+// helper methods can share the same partitions, mount paths, and lookup functions
+type diskDiscovery struct {
+	agent          *Agent
+	rootMountPoint string
+	partitions     []disk.PartitionStat
+	usageFn        func(string) (*disk.UsageStat, error)
+	ctx            fsRegistrationContext
+}
+
 // parseFilesystemEntry parses a filesystem entry in the format "device__customname"
 // Returns the device/filesystem part and the custom name part
 func parseFilesystemEntry(entry string) (device, customName string) {
@@ -26,19 +46,230 @@ func parseFilesystemEntry(entry string) (device, customName string) {
 	return device, customName
 }

+// extraFilesystemPartitionInfo derives the I/O device and optional display name
+// for a mounted /extra-filesystems partition. Prefer the partition device reported
+// by the system and only use the folder name for custom naming metadata.
+func extraFilesystemPartitionInfo(p disk.PartitionStat) (device, customName string) {
+	device = strings.TrimSpace(p.Device)
+	folderDevice, customName := parseFilesystemEntry(filepath.Base(p.Mountpoint))
+	if device == "" {
+		device = folderDevice
+	}
+	return device, customName
+}
+
 func isDockerSpecialMountpoint(mountpoint string) bool {
 	switch mountpoint {
 	case "/etc/hosts", "/etc/resolv.conf", "/etc/hostname":
 		return true
-	default:
+	}
+	return false
+}
+
+// registerFilesystemStats resolves the tracked key and stats payload for a
+// filesystem before it is inserted into fsStats.
+func registerFilesystemStats(existing map[string]*system.FsStats, device, mountpoint string, root bool, customName string, ctx fsRegistrationContext) (string, *system.FsStats, bool) {
+	key := device
+	if !ctx.isWindows {
+		key = filepath.Base(device)
+	}
+
+	if root {
+		// Try to map root device to a diskIoCounters entry. First checks for an
+		// exact key match, then uses findIoDevice for normalized / prefix-based
+		// matching (e.g. nda0p2 -> nda0), and finally falls back to FILESYSTEM.
+		if _, ioMatch := ctx.diskIoCounters[key]; !ioMatch {
+			if matchedKey, match := findIoDevice(key, ctx.diskIoCounters); match {
+				key = matchedKey
+			} else if ctx.filesystem != "" {
+				if matchedKey, match := findIoDevice(ctx.filesystem, ctx.diskIoCounters); match {
+					key = matchedKey
+				}
+			}
+			if _, ioMatch = ctx.diskIoCounters[key]; !ioMatch {
+				slog.Warn("Root I/O unmapped; set FILESYSTEM", "device", device, "mountpoint", mountpoint)
+			}
+		}
+	} else {
+		// Check if non-root has diskstats and prefer the folder device for
+		// /extra-filesystems mounts when the discovered partition device is a
+		// mapper path (e.g. luks UUID) that obscures the underlying block device.
+		if _, ioMatch := ctx.diskIoCounters[key]; !ioMatch {
+			if strings.HasPrefix(mountpoint, ctx.efPath) {
+				folderDevice, _ := parseFilesystemEntry(filepath.Base(mountpoint))
+				if folderDevice != "" {
+					if matchedKey, match := findIoDevice(folderDevice, ctx.diskIoCounters); match {
+						key = matchedKey
+					}
+				}
+			}
+			if _, ioMatch = ctx.diskIoCounters[key]; !ioMatch {
+				if matchedKey, match := findIoDevice(key, ctx.diskIoCounters); match {
+					key = matchedKey
+				}
+			}
+		}
+	}
+
+	if _, exists := existing[key]; exists {
+		return "", nil, false
+	}
+
+	fsStats := &system.FsStats{Root: root, Mountpoint: mountpoint}
+	if customName != "" {
+		fsStats.Name = customName
+	}
+	return key, fsStats, true
+}
+
+// addFsStat inserts a discovered filesystem if it resolves to a new tracking
+// key. The key selection itself lives in buildFsStatRegistration so that logic
+// can stay directly unit-tested.
+func (d *diskDiscovery) addFsStat(device, mountpoint string, root bool, customName string) {
+	key, fsStats, ok := registerFilesystemStats(d.agent.fsStats, device, mountpoint, root, customName, d.ctx)
+	if !ok {
+		return
+	}
+	d.agent.fsStats[key] = fsStats
+	name := key
+	if customName != "" {
+		name = customName
+	}
+	slog.Info("Detected disk", "name", name, "device", device, "mount", mountpoint, "io", key, "root", root)
+}
+
+// addConfiguredRootFs resolves FILESYSTEM against partitions first, then falls
+// back to direct diskstats matching for setups like ZFS where partitions do not
+// expose the physical device name.
+func (d *diskDiscovery) addConfiguredRootFs() bool {
+	if d.ctx.filesystem == "" {
 		return false
 	}
+
+	for _, p := range d.partitions {
+		if filesystemMatchesPartitionSetting(d.ctx.filesystem, p) {
+			d.addFsStat(p.Device, p.Mountpoint, true, "")
+			return true
+		}
+	}
+
+	// FILESYSTEM may name a physical disk absent from partitions (e.g. ZFS lists
+	// dataset paths like zroot/ROOT/default, not block devices).
+	if ioKey, match := findIoDevice(d.ctx.filesystem, d.ctx.diskIoCounters); match {
+		d.agent.fsStats[ioKey] = &system.FsStats{Root: true, Mountpoint: d.rootMountPoint}
+		return true
+	}
+
+	slog.Warn("Partition details not found", "filesystem", d.ctx.filesystem)
+	return false
+}
+
+func isRootFallbackPartition(p disk.PartitionStat, rootMountPoint string) bool {
+	return p.Mountpoint == rootMountPoint ||
+		(isDockerSpecialMountpoint(p.Mountpoint) && strings.HasPrefix(p.Device, "/dev"))
+}
+
+// addPartitionRootFs handles the non-configured root fallback path when a
+// partition looks like the active root mount but still needs translating to an
+// I/O device key.
+func (d *diskDiscovery) addPartitionRootFs(device, mountpoint string) bool {
+	fs, match := findIoDevice(filepath.Base(device), d.ctx.diskIoCounters)
+	if !match {
+		return false
+	}
+	// The resolved I/O device is already known here, so use it directly to avoid
+	// a second fallback search inside buildFsStatRegistration.
+	d.addFsStat(fs, mountpoint, true, "")
+	return true
+}
+
+// addLastResortRootFs is only used when neither FILESYSTEM nor partition-based
+// heuristics can identify root, so it picks the busiest I/O device as a final
+// fallback and preserves the root mountpoint for usage collection.
+func (d *diskDiscovery) addLastResortRootFs() {
+	rootKey := mostActiveIoDevice(d.ctx.diskIoCounters)
+	if rootKey != "" {
+		slog.Warn("Using most active device for root I/O; set FILESYSTEM to override", "device", rootKey)
+	} else {
+		rootKey = filepath.Base(d.rootMountPoint)
+		if _, exists := d.agent.fsStats[rootKey]; exists {
+			rootKey = "root"
+		}
+		slog.Warn("Root I/O device not detected; set FILESYSTEM to override")
+	}
+	d.agent.fsStats[rootKey] = &system.FsStats{Root: true, Mountpoint: d.rootMountPoint}
+}
+
+// findPartitionByFilesystemSetting matches an EXTRA_FILESYSTEMS entry against a
+// discovered partition either by mountpoint or by device suffix.
+func findPartitionByFilesystemSetting(filesystem string, partitions []disk.PartitionStat) (disk.PartitionStat, bool) {
+	for _, p := range partitions {
+		if strings.HasSuffix(p.Device, filesystem) || p.Mountpoint == filesystem {
+			return p, true
+		}
+	}
+	return disk.PartitionStat{}, false
+}
+
+// addConfiguredExtraFsEntry resolves one EXTRA_FILESYSTEMS entry, preferring a
+// discovered partition and falling back to any path that disk.Usage accepts.
+func (d *diskDiscovery) addConfiguredExtraFsEntry(filesystem, customName string) {
+	if p, found := findPartitionByFilesystemSetting(filesystem, d.partitions); found {
+		d.addFsStat(p.Device, p.Mountpoint, false, customName)
+		return
+	}
+
+	if _, err := d.usageFn(filesystem); err == nil {
+		d.addFsStat(filepath.Base(filesystem), filesystem, false, customName)
+		return
+	} else {
+		slog.Error("Invalid filesystem", "name", filesystem, "err", err)
+	}
+}
+
+// addConfiguredExtraFilesystems parses and registers the comma-separated
+// EXTRA_FILESYSTEMS env var entries.
+func (d *diskDiscovery) addConfiguredExtraFilesystems(extraFilesystems string) {
+	for fsEntry := range strings.SplitSeq(extraFilesystems, ",") {
+		filesystem, customName := parseFilesystemEntry(fsEntry)
+		d.addConfiguredExtraFsEntry(filesystem, customName)
+	}
+}
+
+// addPartitionExtraFs registers partitions mounted under /extra-filesystems so
+// their display names can come from the folder name while their I/O keys still
+// prefer the underlying partition device.
+func (d *diskDiscovery) addPartitionExtraFs(p disk.PartitionStat) {
+	if !strings.HasPrefix(p.Mountpoint, d.ctx.efPath) {
+		return
+	}
+	device, customName := extraFilesystemPartitionInfo(p)
+	d.addFsStat(device, p.Mountpoint, false, customName)
+}
+
+// addExtraFilesystemFolders handles bare directories under /extra-filesystems
+// that may not appear in partition discovery, while skipping mountpoints that
+// were already registered from higher-fidelity sources.
+func (d *diskDiscovery) addExtraFilesystemFolders(folderNames []string) {
+	existingMountpoints := make(map[string]bool, len(d.agent.fsStats))
+	for _, stats := range d.agent.fsStats {
+		existingMountpoints[stats.Mountpoint] = true
+	}
+
+	for _, folderName := range folderNames {
+		mountpoint := filepath.Join(d.ctx.efPath, folderName)
+		slog.Debug("/extra-filesystems", "mountpoint", mountpoint)
+		if existingMountpoints[mountpoint] {
+			continue
+		}
+		device, customName := parseFilesystemEntry(folderName)
+		d.addFsStat(device, mountpoint, false, customName)
+	}
 }

 // Sets up the filesystems to monitor for disk usage and I/O.
 func (a *Agent) initializeDiskInfo() {
-	filesystem, _ := GetEnv("FILESYSTEM")
-	efPath := "/extra-filesystems"
+	filesystem, _ := utils.GetEnv("FILESYSTEM")
 	hasRoot := false
 	isWindows := runtime.GOOS == "windows"

@@ -55,167 +286,57 @@ func (a *Agent) initializeDiskInfo() {
 		}
 	}

-	// ioContext := context.WithValue(a.sensorsContext,
-	// 	common.EnvKey, common.EnvMap{common.HostProcEnvKey: "/tmp/testproc"},
-	// )
-	// diskIoCounters, err := disk.IOCountersWithContext(ioContext)
-
 	diskIoCounters, err := disk.IOCounters()
 	if err != nil {
 		slog.Error("Error getting diskstats", "err", err)
 	}
 	slog.Debug("Disk I/O", "diskstats", diskIoCounters)
-
-	// Helper function to add a filesystem to fsStats if it doesn't exist
-	addFsStat := func(device, mountpoint string, root bool, customName ...string) {
-		var key string
-		if isWindows {
-			key = device
-		} else {
-			key = filepath.Base(device)
-		}
-		var ioMatch bool
-		if _, exists := a.fsStats[key]; !exists {
-			if root {
-				slog.Info("Detected root device", "name", key)
-				// Try to map root device to a diskIoCounters entry. First
-				// checks for an exact key match, then uses findIoDevice for
-				// normalized / prefix-based matching (e.g. nda0p2 → nda0),
-				// and finally falls back to the FILESYSTEM env var.
-				if _, ioMatch = diskIoCounters[key]; !ioMatch {
-					if matchedKey, match := findIoDevice(key, diskIoCounters); match {
-						key = matchedKey
-						ioMatch = true
-					} else if filesystem != "" {
-						if matchedKey, match := findIoDevice(filesystem, diskIoCounters); match {
-							key = matchedKey
-							ioMatch = true
-						}
-					}
-					if !ioMatch {
-						slog.Warn("Root I/O unmapped; set FILESYSTEM", "device", device, "mountpoint", mountpoint)
-					}
-				}
-			} else {
-				// Check if non-root has diskstats and fall back to folder name if not
-				// Scenario: device is encrypted and named luks-2bcb02be-999d-4417-8d18-5c61e660fb6e - not in /proc/diskstats.
-				// However, the device can be specified by mounting folder from luks device at /extra-filesystems/sda1
-				if _, ioMatch = diskIoCounters[key]; !ioMatch {
-					efBase := filepath.Base(mountpoint)
-					if _, ioMatch = diskIoCounters[efBase]; ioMatch {
-						key = efBase
-					}
-				}
-			}
-			fsStats := &system.FsStats{Root: root, Mountpoint: mountpoint}
-			if len(customName) > 0 && customName[0] != "" {
-				fsStats.Name = customName[0]
-			}
-			a.fsStats[key] = fsStats
-		}
+	ctx := fsRegistrationContext{
+		filesystem:     filesystem,
+		isWindows:      isWindows,
+		diskIoCounters: diskIoCounters,
+		efPath:         "/extra-filesystems",
 	}

 	// Get the appropriate root mount point for this system
-	rootMountPoint := a.getRootMountPoint()
-
-	// Use FILESYSTEM env var to find root filesystem
-	if filesystem != "" {
-		for _, p := range partitions {
-			if filesystemMatchesPartitionSetting(filesystem, p) {
-				addFsStat(p.Device, p.Mountpoint, true)
-				hasRoot = true
-				break
-			}
-		}
-		if !hasRoot {
-			// FILESYSTEM may name a physical disk absent from partitions (e.g.
-			// ZFS lists dataset paths like zroot/ROOT/default, not block devices).
-			// Try matching directly against diskIoCounters.
-			if ioKey, match := findIoDevice(filesystem, diskIoCounters); match {
-				a.fsStats[ioKey] = &system.FsStats{Root: true, Mountpoint: rootMountPoint}
-				hasRoot = true
-			} else {
-				slog.Warn("Partition details not found", "filesystem", filesystem)
-			}
-		}
+	discovery := diskDiscovery{
+		agent:          a,
+		rootMountPoint: a.getRootMountPoint(),
+		partitions:     partitions,
+		usageFn:        disk.Usage,
+		ctx:            ctx,
 	}

-	// Add EXTRA_FILESYSTEMS env var values to fsStats
-	if extraFilesystems, exists := GetEnv("EXTRA_FILESYSTEMS"); exists {
-		for fsEntry := range strings.SplitSeq(extraFilesystems, ",") {
-			// Parse custom name from format: device__customname
-			fs, customName := parseFilesystemEntry(fsEntry)
+	hasRoot = discovery.addConfiguredRootFs()

-			found := false
-			for _, p := range partitions {
-				if strings.HasSuffix(p.Device, fs) || p.Mountpoint == fs {
-					addFsStat(p.Device, p.Mountpoint, false, customName)
-					found = true
-					break
-				}
-			}
-			// if not in partitions, test if we can get disk usage
-			if !found {
-				if _, err := disk.Usage(fs); err == nil {
-					addFsStat(filepath.Base(fs), fs, false, customName)
-				} else {
-					slog.Error("Invalid filesystem", "name", fs, "err", err)
-				}
-			}
-		}
+	// Add EXTRA_FILESYSTEMS env var values to fsStats
+	if extraFilesystems, exists := utils.GetEnv("EXTRA_FILESYSTEMS"); exists {
+		discovery.addConfiguredExtraFilesystems(extraFilesystems)
 	}

 	// Process partitions for various mount points
 	for _, p := range partitions {
-		// fmt.Println(p.Device, p.Mountpoint)
-		// Binary root fallback or docker root fallback
-		if !hasRoot && (p.Mountpoint == rootMountPoint || (isDockerSpecialMountpoint(p.Mountpoint) && strings.HasPrefix(p.Device, "/dev"))) {
-			fs, match := findIoDevice(filepath.Base(p.Device), diskIoCounters)
-			if match {
-				addFsStat(fs, p.Mountpoint, true)
-				hasRoot = true
-			}
-		}
-
-		// Check if device is in /extra-filesystems
-		if strings.HasPrefix(p.Mountpoint, efPath) {
-			device, customName := parseFilesystemEntry(p.Mountpoint)
-			addFsStat(device, p.Mountpoint, false, customName)
+		if !hasRoot && isRootFallbackPartition(p, discovery.rootMountPoint) {
+			hasRoot = discovery.addPartitionRootFs(p.Device, p.Mountpoint)
 		}
+		discovery.addPartitionExtraFs(p)
 	}

 	// Check all folders in /extra-filesystems and add them if not already present
-	if folders, err := os.ReadDir(efPath); err == nil {
-		existingMountpoints := make(map[string]bool)
-		for _, stats := range a.fsStats {
-			existingMountpoints[stats.Mountpoint] = true
-		}
+	if folders, err := os.ReadDir(discovery.ctx.efPath); err == nil {
+		folderNames := make([]string, 0, len(folders))
 		for _, folder := range folders {
 			if folder.IsDir() {
-				mountpoint := filepath.Join(efPath, folder.Name())
-				slog.Debug("/extra-filesystems", "mountpoint", mountpoint)
-				if !existingMountpoints[mountpoint] {
-					device, customName := parseFilesystemEntry(folder.Name())
-					addFsStat(device, mountpoint, false, customName)
-				}
+				folderNames = append(folderNames, folder.Name())
 			}
 		}
+		discovery.addExtraFilesystemFolders(folderNames)
 	}

 	// If no root filesystem set, try the most active I/O device as a last
 	// resort (e.g. ZFS where dataset names are unrelated to disk names).
 	if !hasRoot {
-		rootKey := mostActiveIoDevice(diskIoCounters)
-		if rootKey != "" {
-			slog.Warn("Using most active device for root I/O; set FILESYSTEM to override", "device", rootKey)
-		} else {
-			rootKey = filepath.Base(rootMountPoint)
-			if _, exists := a.fsStats[rootKey]; exists {
-				rootKey = "root"
-			}
-			slog.Warn("Root I/O device not detected; set FILESYSTEM to override")
-		}
-		a.fsStats[rootKey] = &system.FsStats{Root: true, Mountpoint: rootMountPoint}
+		discovery.addLastResortRootFs()
 	}

 	a.pruneDuplicateRootExtraFilesystems()
@@ -380,6 +501,8 @@ func normalizeDeviceName(value string) string {

 // Sets start values for disk I/O stats.
 func (a *Agent) initializeDiskIoStats(diskIoCounters map[string]disk.IOCountersStat) {
+	a.fsNames = a.fsNames[:0]
+	now := time.Now()
 	for device, stats := range a.fsStats {
 		// skip if not in diskIoCounters
 		d, exists := diskIoCounters[device]
@@ -388,7 +511,7 @@ func (a *Agent) initializeDiskIoStats(diskIoCounters map[string]disk.IOCountersS
 			continue
 		}
 		// populate initial values
-		stats.Time = time.Now()
+		stats.Time = now
 		stats.TotalRead = d.ReadBytes
 		stats.TotalWrite = d.WriteBytes
 		// add to list of valid io device names
@@ -412,12 +535,12 @@ func (a *Agent) updateDiskUsage(systemStats *system.Stats) {
 			continue
 		}
 		if d, err := disk.Usage(stats.Mountpoint); err == nil {
-			stats.DiskTotal = bytesToGigabytes(d.Total)
-			stats.DiskUsed = bytesToGigabytes(d.Used)
+			stats.DiskTotal = utils.BytesToGigabytes(d.Total)
+			stats.DiskUsed = utils.BytesToGigabytes(d.Used)
 			if stats.Root {
-				systemStats.DiskTotal = bytesToGigabytes(d.Total)
-				systemStats.DiskUsed = bytesToGigabytes(d.Used)
-				systemStats.DiskPct = twoDecimals(d.UsedPercent)
+				systemStats.DiskTotal = utils.BytesToGigabytes(d.Total)
+				systemStats.DiskUsed = utils.BytesToGigabytes(d.Used)
+				systemStats.DiskPct = utils.TwoDecimals(d.UsedPercent)
 			}
 		} else {
 			// reset stats if error (likely unmounted)
@@ -470,8 +593,8 @@ func (a *Agent) updateDiskIo(cacheTimeMs uint16, systemStats *system.Stats) {

 			diskIORead := (d.ReadBytes - prev.readBytes) * 1000 / msElapsed
 			diskIOWrite := (d.WriteBytes - prev.writeBytes) * 1000 / msElapsed
-			readMbPerSecond := bytesToMegabytes(float64(diskIORead))
-			writeMbPerSecond := bytesToMegabytes(float64(diskIOWrite))
+			readMbPerSecond := utils.BytesToMegabytes(float64(diskIORead))
+			writeMbPerSecond := utils.BytesToMegabytes(float64(diskIOWrite))

 			// validate values
 			if readMbPerSecond > 50_000 || writeMbPerSecond > 50_000 {
--- a/agent/disk_test.go
+++ b/agent/disk_test.go
@@ -93,6 +93,443 @@ func TestParseFilesystemEntry(t *testing.T) {
 	}
 }

+func TestExtraFilesystemPartitionInfo(t *testing.T) {
+	t.Run("uses partition device for label-only mountpoint", func(t *testing.T) {
+		device, customName := extraFilesystemPartitionInfo(disk.PartitionStat{
+			Device:     "/dev/sdc",
+			Mountpoint: "/extra-filesystems/Share",
+		})
+
+		assert.Equal(t, "/dev/sdc", device)
+		assert.Equal(t, "", customName)
+	})
+
+	t.Run("uses custom name from mountpoint suffix", func(t *testing.T) {
+		device, customName := extraFilesystemPartitionInfo(disk.PartitionStat{
+			Device:     "/dev/sdc",
+			Mountpoint: "/extra-filesystems/sdc__Share",
+		})
+
+		assert.Equal(t, "/dev/sdc", device)
+		assert.Equal(t, "Share", customName)
+	})
+
+	t.Run("falls back to folder device when partition device is unavailable", func(t *testing.T) {
+		device, customName := extraFilesystemPartitionInfo(disk.PartitionStat{
+			Mountpoint: "/extra-filesystems/sdc__Share",
+		})
+
+		assert.Equal(t, "sdc", device)
+		assert.Equal(t, "Share", customName)
+	})
+
+	t.Run("supports custom name without folder device prefix", func(t *testing.T) {
+		device, customName := extraFilesystemPartitionInfo(disk.PartitionStat{
+			Device:     "/dev/sdc",
+			Mountpoint: "/extra-filesystems/__Share",
+		})
+
+		assert.Equal(t, "/dev/sdc", device)
+		assert.Equal(t, "Share", customName)
+	})
+}
+
+func TestBuildFsStatRegistration(t *testing.T) {
+	t.Run("uses basename for non-windows exact io match", func(t *testing.T) {
+		key, stats, ok := registerFilesystemStats(
+			map[string]*system.FsStats{},
+			"/dev/sda1",
+			"/mnt/data",
+			false,
+			"archive",
+			fsRegistrationContext{
+				isWindows: false,
+				diskIoCounters: map[string]disk.IOCountersStat{
+					"sda1": {Name: "sda1"},
+				},
+			},
+		)
+
+		assert.True(t, ok)
+		assert.Equal(t, "sda1", key)
+		assert.Equal(t, "/mnt/data", stats.Mountpoint)
+		assert.Equal(t, "archive", stats.Name)
+		assert.False(t, stats.Root)
+	})
+
+	t.Run("maps root partition to io device by prefix", func(t *testing.T) {
+		key, stats, ok := registerFilesystemStats(
+			map[string]*system.FsStats{},
+			"/dev/ada0p2",
+			"/",
+			true,
+			"",
+			fsRegistrationContext{
+				isWindows: false,
+				diskIoCounters: map[string]disk.IOCountersStat{
+					"ada0": {Name: "ada0", ReadBytes: 1000, WriteBytes: 1000},
+				},
+			},
+		)
+
+		assert.True(t, ok)
+		assert.Equal(t, "ada0", key)
+		assert.True(t, stats.Root)
+		assert.Equal(t, "/", stats.Mountpoint)
+	})
+
+	t.Run("uses filesystem setting as root fallback", func(t *testing.T) {
+		key, _, ok := registerFilesystemStats(
+			map[string]*system.FsStats{},
+			"overlay",
+			"/",
+			true,
+			"",
+			fsRegistrationContext{
+				filesystem: "nvme0n1p2",
+				isWindows:  false,
+				diskIoCounters: map[string]disk.IOCountersStat{
+					"nvme0n1": {Name: "nvme0n1", ReadBytes: 1000, WriteBytes: 1000},
+				},
+			},
+		)
+
+		assert.True(t, ok)
+		assert.Equal(t, "nvme0n1", key)
+	})
+
+	t.Run("prefers parsed extra-filesystems device over mapper device", func(t *testing.T) {
+		key, stats, ok := registerFilesystemStats(
+			map[string]*system.FsStats{},
+			"/dev/mapper/luks-2bcb02be-999d-4417-8d18-5c61e660fb6e",
+			"/extra-filesystems/nvme0n1p2__Archive",
+			false,
+			"Archive",
+			fsRegistrationContext{
+				isWindows: false,
+				diskIoCounters: map[string]disk.IOCountersStat{
+					"dm-1":      {Name: "dm-1", Label: "luks-2bcb02be-999d-4417-8d18-5c61e660fb6e"},
+					"nvme0n1p2": {Name: "nvme0n1p2"},
+				},
+			},
+		)
+
+		assert.True(t, ok)
+		assert.Equal(t, "nvme0n1p2", key)
+		assert.Equal(t, "Archive", stats.Name)
+	})
+
+	t.Run("falls back to mapper io device when folder device cannot be resolved", func(t *testing.T) {
+		key, stats, ok := registerFilesystemStats(
+			map[string]*system.FsStats{},
+			"/dev/mapper/luks-2bcb02be-999d-4417-8d18-5c61e660fb6e",
+			"/extra-filesystems/Archive",
+			false,
+			"Archive",
+			fsRegistrationContext{
+				isWindows: false,
+				diskIoCounters: map[string]disk.IOCountersStat{
+					"dm-1": {Name: "dm-1", Label: "luks-2bcb02be-999d-4417-8d18-5c61e660fb6e"},
+				},
+			},
+		)
+
+		assert.True(t, ok)
+		assert.Equal(t, "dm-1", key)
+		assert.Equal(t, "Archive", stats.Name)
+	})
+
+	t.Run("uses full device name on windows", func(t *testing.T) {
+		key, _, ok := registerFilesystemStats(
+			map[string]*system.FsStats{},
+			`C:`,
+			`C:\\`,
+			false,
+			"",
+			fsRegistrationContext{
+				isWindows: true,
+				diskIoCounters: map[string]disk.IOCountersStat{
+					`C:`: {Name: `C:`},
+				},
+			},
+		)
+
+		assert.True(t, ok)
+		assert.Equal(t, `C:`, key)
+	})
+
+	t.Run("skips existing key", func(t *testing.T) {
+		key, stats, ok := registerFilesystemStats(
+			map[string]*system.FsStats{"sda1": {Mountpoint: "/existing"}},
+			"/dev/sda1",
+			"/mnt/data",
+			false,
+			"",
+			fsRegistrationContext{
+				isWindows: false,
+				diskIoCounters: map[string]disk.IOCountersStat{
+					"sda1": {Name: "sda1"},
+				},
+			},
+		)
+
+		assert.False(t, ok)
+		assert.Empty(t, key)
+		assert.Nil(t, stats)
+	})
+}
+
+func TestAddConfiguredRootFs(t *testing.T) {
+	t.Run("adds root from matching partition", func(t *testing.T) {
+		agent := &Agent{fsStats: make(map[string]*system.FsStats)}
+		discovery := diskDiscovery{
+			agent:          agent,
+			rootMountPoint: "/",
+			partitions:     []disk.PartitionStat{{Device: "/dev/ada0p2", Mountpoint: "/"}},
+			ctx: fsRegistrationContext{
+				filesystem: "/dev/ada0p2",
+				isWindows:  false,
+				diskIoCounters: map[string]disk.IOCountersStat{
+					"ada0": {Name: "ada0", ReadBytes: 1000, WriteBytes: 1000},
+				},
+			},
+		}
+
+		ok := discovery.addConfiguredRootFs()
+
+		assert.True(t, ok)
+		stats, exists := agent.fsStats["ada0"]
+		assert.True(t, exists)
+		assert.True(t, stats.Root)
+		assert.Equal(t, "/", stats.Mountpoint)
+	})
+
+	t.Run("adds root from io device when partition is missing", func(t *testing.T) {
+		agent := &Agent{fsStats: make(map[string]*system.FsStats)}
+		discovery := diskDiscovery{
+			agent:          agent,
+			rootMountPoint: "/sysroot",
+			ctx: fsRegistrationContext{
+				filesystem: "zroot",
+				isWindows:  false,
+				diskIoCounters: map[string]disk.IOCountersStat{
+					"nda0": {Name: "nda0", Label: "zroot", ReadBytes: 1000, WriteBytes: 1000},
+				},
+			},
+		}
+
+		ok := discovery.addConfiguredRootFs()
+
+		assert.True(t, ok)
+		stats, exists := agent.fsStats["nda0"]
+		assert.True(t, exists)
+		assert.True(t, stats.Root)
+		assert.Equal(t, "/sysroot", stats.Mountpoint)
+	})
+
+	t.Run("returns false when filesystem cannot be resolved", func(t *testing.T) {
+		agent := &Agent{fsStats: make(map[string]*system.FsStats)}
+		discovery := diskDiscovery{
+			agent:          agent,
+			rootMountPoint: "/",
+			ctx: fsRegistrationContext{
+				filesystem:     "missing-disk",
+				isWindows:      false,
+				diskIoCounters: map[string]disk.IOCountersStat{},
+			},
+		}
+
+		ok := discovery.addConfiguredRootFs()
+
+		assert.False(t, ok)
+		assert.Empty(t, agent.fsStats)
+	})
+}
+
+func TestAddPartitionRootFs(t *testing.T) {
+	t.Run("adds root from fallback partition candidate", func(t *testing.T) {
+		agent := &Agent{fsStats: make(map[string]*system.FsStats)}
+		discovery := diskDiscovery{
+			agent: agent,
+			ctx: fsRegistrationContext{
+				isWindows: false,
+				diskIoCounters: map[string]disk.IOCountersStat{
+					"nvme0n1": {Name: "nvme0n1", ReadBytes: 1000, WriteBytes: 1000},
+				},
+			},
+		}
+
+		ok := discovery.addPartitionRootFs("/dev/nvme0n1p2", "/")
+
+		assert.True(t, ok)
+		stats, exists := agent.fsStats["nvme0n1"]
+		assert.True(t, exists)
+		assert.True(t, stats.Root)
+		assert.Equal(t, "/", stats.Mountpoint)
+	})
+
+	t.Run("returns false when no io device matches", func(t *testing.T) {
+		agent := &Agent{fsStats: make(map[string]*system.FsStats)}
+		discovery := diskDiscovery{agent: agent, ctx: fsRegistrationContext{diskIoCounters: map[string]disk.IOCountersStat{}}}
+
+		ok := discovery.addPartitionRootFs("/dev/mapper/root", "/")
+
+		assert.False(t, ok)
+		assert.Empty(t, agent.fsStats)
+	})
+}
+
+func TestAddLastResortRootFs(t *testing.T) {
+	t.Run("uses most active io device when available", func(t *testing.T) {
+		agent := &Agent{fsStats: make(map[string]*system.FsStats)}
+		discovery := diskDiscovery{agent: agent, rootMountPoint: "/", ctx: fsRegistrationContext{diskIoCounters: map[string]disk.IOCountersStat{
+			"sda": {Name: "sda", ReadBytes: 5000, WriteBytes: 5000},
+			"sdb": {Name: "sdb", ReadBytes: 1000, WriteBytes: 1000},
+		}}}
+
+		discovery.addLastResortRootFs()
+
+		stats, exists := agent.fsStats["sda"]
+		assert.True(t, exists)
+		assert.True(t, stats.Root)
+	})
+
+	t.Run("falls back to root key when mountpoint basename collides", func(t *testing.T) {
+		agent := &Agent{fsStats: map[string]*system.FsStats{
+			"sysroot": {Mountpoint: "/extra-filesystems/sysroot"},
+		}}
+		discovery := diskDiscovery{agent: agent, rootMountPoint: "/sysroot", ctx: fsRegistrationContext{diskIoCounters: map[string]disk.IOCountersStat{}}}
+
+		discovery.addLastResortRootFs()
+
+		stats, exists := agent.fsStats["root"]
+		assert.True(t, exists)
+		assert.True(t, stats.Root)
+		assert.Equal(t, "/sysroot", stats.Mountpoint)
+	})
+}
+
+func TestAddConfiguredExtraFsEntry(t *testing.T) {
+	t.Run("uses matching partition when present", func(t *testing.T) {
+		agent := &Agent{fsStats: make(map[string]*system.FsStats)}
+		discovery := diskDiscovery{
+			agent:      agent,
+			partitions: []disk.PartitionStat{{Device: "/dev/sdb1", Mountpoint: "/mnt/backup"}},
+			usageFn: func(string) (*disk.UsageStat, error) {
+				t.Fatal("usage fallback should not be called when partition matches")
+				return nil, nil
+			},
+			ctx: fsRegistrationContext{
+				isWindows: false,
+				diskIoCounters: map[string]disk.IOCountersStat{
+					"sdb1": {Name: "sdb1"},
+				},
+			},
+		}
+
+		discovery.addConfiguredExtraFsEntry("sdb1", "backup")
+
+		stats, exists := agent.fsStats["sdb1"]
+		assert.True(t, exists)
+		assert.Equal(t, "/mnt/backup", stats.Mountpoint)
+		assert.Equal(t, "backup", stats.Name)
+	})
+
+	t.Run("falls back to usage-validated path", func(t *testing.T) {
+		agent := &Agent{fsStats: make(map[string]*system.FsStats)}
+		discovery := diskDiscovery{
+			agent: agent,
+			usageFn: func(path string) (*disk.UsageStat, error) {
+				assert.Equal(t, "/srv/archive", path)
+				return &disk.UsageStat{}, nil
+			},
+			ctx: fsRegistrationContext{
+				isWindows: false,
+				diskIoCounters: map[string]disk.IOCountersStat{
+					"archive": {Name: "archive"},
+				},
+			},
+		}
+
+		discovery.addConfiguredExtraFsEntry("/srv/archive", "archive")
+
+		stats, exists := agent.fsStats["archive"]
+		assert.True(t, exists)
+		assert.Equal(t, "/srv/archive", stats.Mountpoint)
+		assert.Equal(t, "archive", stats.Name)
+	})
+
+	t.Run("ignores invalid filesystem entry", func(t *testing.T) {
+		agent := &Agent{fsStats: make(map[string]*system.FsStats)}
+		discovery := diskDiscovery{
+			agent: agent,
+			usageFn: func(string) (*disk.UsageStat, error) {
+				return nil, os.ErrNotExist
+			},
+		}
+
+		discovery.addConfiguredExtraFsEntry("/missing/archive", "")
+
+		assert.Empty(t, agent.fsStats)
+	})
+}
+
+func TestAddConfiguredExtraFilesystems(t *testing.T) {
+	t.Run("parses and registers multiple configured filesystems", func(t *testing.T) {
+		agent := &Agent{fsStats: make(map[string]*system.FsStats)}
+		discovery := diskDiscovery{
+			agent:      agent,
+			partitions: []disk.PartitionStat{{Device: "/dev/sda1", Mountpoint: "/mnt/fast"}},
+			usageFn: func(path string) (*disk.UsageStat, error) {
+				if path == "/srv/archive" {
+					return &disk.UsageStat{}, nil
+				}
+				return nil, os.ErrNotExist
+			},
+			ctx: fsRegistrationContext{
+				isWindows: false,
+				diskIoCounters: map[string]disk.IOCountersStat{
+					"sda1":    {Name: "sda1"},
+					"archive": {Name: "archive"},
+				},
+			},
+		}
+
+		discovery.addConfiguredExtraFilesystems("sda1__fast,/srv/archive__cold")
+
+		assert.Contains(t, agent.fsStats, "sda1")
+		assert.Equal(t, "fast", agent.fsStats["sda1"].Name)
+		assert.Contains(t, agent.fsStats, "archive")
+		assert.Equal(t, "cold", agent.fsStats["archive"].Name)
+	})
+}
+
+func TestAddExtraFilesystemFolders(t *testing.T) {
+	t.Run("adds missing folders and skips existing mountpoints", func(t *testing.T) {
+		agent := &Agent{fsStats: map[string]*system.FsStats{
+			"existing": {Mountpoint: "/extra-filesystems/existing"},
+		}}
+		discovery := diskDiscovery{
+			agent: agent,
+			ctx: fsRegistrationContext{
+				isWindows: false,
+				efPath:    "/extra-filesystems",
+				diskIoCounters: map[string]disk.IOCountersStat{
+					"newdisk": {Name: "newdisk"},
+				},
+			},
+		}
+
+		discovery.addExtraFilesystemFolders([]string{"existing", "newdisk__Archive"})
+
+		assert.Len(t, agent.fsStats, 2)
+		stats, exists := agent.fsStats["newdisk"]
+		assert.True(t, exists)
+		assert.Equal(t, "/extra-filesystems/newdisk__Archive", stats.Mountpoint)
+		assert.Equal(t, "Archive", stats.Name)
+	})
+}
+
 func TestFindIoDevice(t *testing.T) {
 	t.Run("matches by device name", func(t *testing.T) {
 		ioCounters := map[string]disk.IOCountersStat{
@@ -250,18 +687,8 @@ func TestIsDockerSpecialMountpoint(t *testing.T) {
 }

 func TestInitializeDiskInfoWithCustomNames(t *testing.T) {
-	// Set up environment variables
-	oldEnv := os.Getenv("EXTRA_FILESYSTEMS")
-	defer func() {
-		if oldEnv != "" {
-			os.Setenv("EXTRA_FILESYSTEMS", oldEnv)
-		} else {
-			os.Unsetenv("EXTRA_FILESYSTEMS")
-		}
-	}()
-
 	// Test with custom names
-	os.Setenv("EXTRA_FILESYSTEMS", "sda1__my-storage,/dev/sdb1__backup-drive,nvme0n1p2")
+	t.Setenv("EXTRA_FILESYSTEMS", "sda1__my-storage,/dev/sdb1__backup-drive,nvme0n1p2")

 	// Mock disk partitions (we'll just test the parsing logic)
 	// Since the actual disk operations are system-dependent, we'll focus on the parsing
@@ -289,7 +716,7 @@ func TestInitializeDiskInfoWithCustomNames(t *testing.T) {

 	for _, tc := range testCases {
 		t.Run("env_"+tc.envValue, func(t *testing.T) {
-			os.Setenv("EXTRA_FILESYSTEMS", tc.envValue)
+			t.Setenv("EXTRA_FILESYSTEMS", tc.envValue)

 			// Create mock partitions that would match our test cases
 			partitions := []disk.PartitionStat{}
@@ -310,7 +737,7 @@ func TestInitializeDiskInfoWithCustomNames(t *testing.T) {
 			// Test the parsing logic by calling the relevant part
 			// We'll create a simplified version to test just the parsing
 			extraFilesystems := tc.envValue
-			for _, fsEntry := range strings.Split(extraFilesystems, ",") {
+			for fsEntry := range strings.SplitSeq(extraFilesystems, ",") {
 				// Parse the entry
 				fsEntry = strings.TrimSpace(fsEntry)
 				var fs, customName string
@@ -506,3 +933,33 @@ func TestHasSameDiskUsage(t *testing.T) {
 		assert.False(t, hasSameDiskUsage(&disk.UsageStat{Total: 0, Used: 0}, &disk.UsageStat{Total: 1, Used: 1}))
 	})
 }
+
+func TestInitializeDiskIoStatsResetsTrackedDevices(t *testing.T) {
+	agent := &Agent{
+		fsStats: map[string]*system.FsStats{
+			"sda": {},
+			"sdb": {},
+		},
+		fsNames: []string{"stale", "sda"},
+	}
+
+	agent.initializeDiskIoStats(map[string]disk.IOCountersStat{
+		"sda": {Name: "sda", ReadBytes: 10, WriteBytes: 20},
+		"sdb": {Name: "sdb", ReadBytes: 30, WriteBytes: 40},
+	})
+
+	assert.ElementsMatch(t, []string{"sda", "sdb"}, agent.fsNames)
+	assert.Len(t, agent.fsNames, 2)
+	assert.Equal(t, uint64(10), agent.fsStats["sda"].TotalRead)
+	assert.Equal(t, uint64(20), agent.fsStats["sda"].TotalWrite)
+	assert.False(t, agent.fsStats["sda"].Time.IsZero())
+	assert.False(t, agent.fsStats["sdb"].Time.IsZero())
+
+	agent.initializeDiskIoStats(map[string]disk.IOCountersStat{
+		"sdb": {Name: "sdb", ReadBytes: 50, WriteBytes: 60},
+	})
+
+	assert.Equal(t, []string{"sdb"}, agent.fsNames)
+	assert.Equal(t, uint64(50), agent.fsStats["sdb"].TotalRead)
+	assert.Equal(t, uint64(60), agent.fsStats["sdb"].TotalWrite)
+}
--- a/agent/docker.go
+++ b/agent/docker.go
@@ -16,11 +16,14 @@ import (
 	"os"
 	"path"
 	"regexp"
+	"sort"
+	"strconv"
 	"strings"
 	"sync"
 	"time"

 	"github.com/henrygd/beszel/agent/deltatracker"
+	"github.com/henrygd/beszel/agent/utils"
 	"github.com/henrygd/beszel/internal/entities/container"

 	"github.com/blang/semver"
@@ -336,15 +339,48 @@ func validateCpuPercentage(cpuPct float64, containerName string) error {

 // updateContainerStatsValues updates the final stats values
 func updateContainerStatsValues(stats *container.Stats, cpuPct float64, usedMemory uint64, sent_delta, recv_delta uint64, readTime time.Time) {
-	stats.Cpu = twoDecimals(cpuPct)
-	stats.Mem = bytesToMegabytes(float64(usedMemory))
+	stats.Cpu = utils.TwoDecimals(cpuPct)
+	stats.Mem = utils.BytesToMegabytes(float64(usedMemory))
 	stats.Bandwidth = [2]uint64{sent_delta, recv_delta}
 	// TODO(0.19+): stop populating NetworkSent/NetworkRecv (deprecated in 0.18.3)
-	stats.NetworkSent = bytesToMegabytes(float64(sent_delta))
-	stats.NetworkRecv = bytesToMegabytes(float64(recv_delta))
+	stats.NetworkSent = utils.BytesToMegabytes(float64(sent_delta))
+	stats.NetworkRecv = utils.BytesToMegabytes(float64(recv_delta))
 	stats.PrevReadTime = readTime
 }

+// convertContainerPortsToString formats the ports of a container into a sorted, deduplicated string.
+// ctr.Ports is nilled out after processing so the slice is not accidentally reused.
+func convertContainerPortsToString(ctr *container.ApiInfo) string {
+	if len(ctr.Ports) == 0 {
+		return ""
+	}
+	sort.Slice(ctr.Ports, func(i, j int) bool {
+		return ctr.Ports[i].PublicPort < ctr.Ports[j].PublicPort
+	})
+	var builder strings.Builder
+	seenPorts := make(map[uint16]struct{})
+	for _, p := range ctr.Ports {
+		_, ok := seenPorts[p.PublicPort]
+		if p.PublicPort == 0 || ok {
+			continue
+		}
+		seenPorts[p.PublicPort] = struct{}{}
+		if builder.Len() > 0 {
+			builder.WriteString(", ")
+		}
+		switch p.IP {
+		case "0.0.0.0", "::":
+		default:
+			builder.WriteString(p.IP)
+			builder.WriteByte(':')
+		}
+		builder.WriteString(strconv.Itoa(int(p.PublicPort)))
+	}
+	// clear ports slice so it doesn't get reused and blend into next response
+	ctr.Ports = nil
+	return builder.String()
+}
+
 func parseDockerStatus(status string) (string, container.DockerHealth) {
 	trimmed := strings.TrimSpace(status)
 	if trimmed == "" {
@@ -364,22 +400,60 @@ func parseDockerStatus(status string) (string, container.DockerHealth) {
 		statusText = trimmed
 	}

-	healthText := strings.ToLower(strings.TrimSpace(strings.TrimSuffix(trimmed[openIdx+1:], ")")))
+	healthText := strings.TrimSpace(strings.TrimSuffix(trimmed[openIdx+1:], ")"))
 	// Some Docker statuses include a "health:" prefix inside the parentheses.
 	// Strip it so it maps correctly to the known health states.
 	if colonIdx := strings.IndexRune(healthText, ':'); colonIdx != -1 {
-		prefix := strings.TrimSpace(healthText[:colonIdx])
+		prefix := strings.ToLower(strings.TrimSpace(healthText[:colonIdx]))
 		if prefix == "health" || prefix == "health status" {
 			healthText = strings.TrimSpace(healthText[colonIdx+1:])
 		}
 	}
-	if health, ok := container.DockerHealthStrings[healthText]; ok {
+	if health, ok := parseDockerHealthStatus(healthText); ok {
 		return statusText, health
 	}

 	return trimmed, container.DockerHealthNone
 }

+// parseDockerHealthStatus maps Docker health status strings to container.DockerHealth values
+func parseDockerHealthStatus(status string) (container.DockerHealth, bool) {
+	health, ok := container.DockerHealthStrings[strings.ToLower(strings.TrimSpace(status))]
+	return health, ok
+}
+
+// getPodmanContainerHealth fetches container health status from the container inspect endpoint.
+// Used for Podman which doesn't provide health status in the /containers/json endpoint as of March 2026.
+// https://github.com/containers/podman/issues/27786
+func (dm *dockerManager) getPodmanContainerHealth(containerID string) (container.DockerHealth, error) {
+	resp, err := dm.client.Get(fmt.Sprintf("http://localhost/containers/%s/json", url.PathEscape(containerID)))
+	if err != nil {
+		return container.DockerHealthNone, err
+	}
+	defer resp.Body.Close()
+
+	if resp.StatusCode != http.StatusOK {
+		return container.DockerHealthNone, fmt.Errorf("container inspect request failed: %s", resp.Status)
+	}
+
+	var inspectInfo struct {
+		State struct {
+			Health struct {
+				Status string
+			}
+		}
+	}
+	if err := json.NewDecoder(resp.Body).Decode(&inspectInfo); err != nil {
+		return container.DockerHealthNone, err
+	}
+
+	if health, ok := parseDockerHealthStatus(inspectInfo.State.Health.Status); ok {
+		return health, nil
+	}
+
+	return container.DockerHealthNone, nil
+}
+
 // Updates stats for individual container with cache-time-aware delta tracking
 func (dm *dockerManager) updateContainerStats(ctr *container.ApiInfo, cacheTimeMs uint16) error {
 	name := ctr.Names[0][1:]
@@ -389,6 +463,21 @@ func (dm *dockerManager) updateContainerStats(ctr *container.ApiInfo, cacheTimeM
 		return err
 	}

+	statusText, health := parseDockerStatus(ctr.Status)
+
+	// Docker exposes Health.Status on /containers/json in API 1.52+.
+	// Podman currently requires falling back to the inspect endpoint as of March 2026.
+	// https://github.com/containers/podman/issues/27786
+	if ctr.Health.Status != "" {
+		if h, ok := parseDockerHealthStatus(ctr.Health.Status); ok {
+			health = h
+		}
+	} else if dm.usingPodman {
+		if podmanHealth, err := dm.getPodmanContainerHealth(ctr.IdShort); err == nil {
+			health = podmanHealth
+		}
+	}
+
 	dm.containerStatsMutex.Lock()
 	defer dm.containerStatsMutex.Unlock()

@@ -400,11 +489,13 @@ func (dm *dockerManager) updateContainerStats(ctr *container.ApiInfo, cacheTimeM
 	}

 	stats.Id = ctr.IdShort
-
-	statusText, health := parseDockerStatus(ctr.Status)
 	stats.Status = statusText
 	stats.Health = health

+	if len(ctr.Ports) > 0 {
+		stats.Ports = convertContainerPortsToString(ctr)
+	}
+
 	// reset current stats
 	stats.Cpu = 0
 	stats.Mem = 0
@@ -487,7 +578,7 @@ func (dm *dockerManager) deleteContainerStatsSync(id string) {

 // Creates a new http client for Docker or Podman API
 func newDockerManager() *dockerManager {
-	dockerHost, exists := GetEnv("DOCKER_HOST")
+	dockerHost, exists := utils.GetEnv("DOCKER_HOST")
 	if exists {
 		// return nil if set to empty string
 		if dockerHost == "" {
@@ -523,7 +614,7 @@ func newDockerManager() *dockerManager {

 	// configurable timeout
 	timeout := time.Millisecond * time.Duration(dockerTimeoutMs)
-	if t, set := GetEnv("DOCKER_TIMEOUT"); set {
+	if t, set := utils.GetEnv("DOCKER_TIMEOUT"); set {
 		timeout, err = time.ParseDuration(t)
 		if err != nil {
 			slog.Error(err.Error())
@@ -540,7 +631,7 @@ func newDockerManager() *dockerManager {

 	// Read container exclusion patterns from environment variable
 	var excludeContainers []string
-	if excludeStr, set := GetEnv("EXCLUDE_CONTAINERS"); set && excludeStr != "" {
+	if excludeStr, set := utils.GetEnv("EXCLUDE_CONTAINERS"); set && excludeStr != "" {
 		parts := strings.SplitSeq(excludeStr, ",")
 		for part := range parts {
 			trimmed := strings.TrimSpace(part)
--- a/agent/docker_test.go
+++ b/agent/docker_test.go
@@ -18,6 +18,7 @@ import (
 	"time"

 	"github.com/henrygd/beszel/agent/deltatracker"
+	"github.com/henrygd/beszel/agent/utils"
 	"github.com/henrygd/beszel/internal/entities/container"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
@@ -34,6 +35,12 @@ type recordingRoundTripper struct {
 	lastQuery   map[string]string
 }

+type roundTripFunc func(*http.Request) (*http.Response, error)
+
+func (fn roundTripFunc) RoundTrip(req *http.Request) (*http.Response, error) {
+	return fn(req)
+}
+
 func (rt *recordingRoundTripper) RoundTrip(req *http.Request) (*http.Response, error) {
 	rt.called = true
 	rt.lastPath = req.URL.EscapedPath()
@@ -213,6 +220,28 @@ func TestContainerDetailsRequestsUseExpectedDockerPaths(t *testing.T) {
 	})
 }

+func TestGetPodmanContainerHealth(t *testing.T) {
+	called := false
+	dm := &dockerManager{
+		client: &http.Client{Transport: roundTripFunc(func(req *http.Request) (*http.Response, error) {
+			called = true
+			assert.Equal(t, "/containers/0123456789ab/json", req.URL.EscapedPath())
+			return &http.Response{
+				StatusCode: http.StatusOK,
+				Status:     "200 OK",
+				Header:     make(http.Header),
+				Body:       io.NopCloser(strings.NewReader(`{"State":{"Health":{"Status":"healthy"}}}`)),
+				Request:    req,
+			}, nil
+		})},
+	}
+
+	health, err := dm.getPodmanContainerHealth("0123456789ab")
+	require.NoError(t, err)
+	assert.True(t, called)
+	assert.Equal(t, container.DockerHealthHealthy, health)
+}
+
 func TestValidateCpuPercentage(t *testing.T) {
 	tests := []struct {
 		name          string
@@ -298,48 +327,6 @@ func TestUpdateContainerStatsValues(t *testing.T) {
 	assert.Equal(t, testTime, stats.PrevReadTime)
 }

-func TestTwoDecimals(t *testing.T) {
-	tests := []struct {
-		name     string
-		input    float64
-		expected float64
-	}{
-		{"round down", 1.234, 1.23},
-		{"round half up", 1.235, 1.24}, // math.Round rounds half up
-		{"no rounding needed", 1.23, 1.23},
-		{"negative number", -1.235, -1.24}, // math.Round rounds half up (more negative)
-		{"zero", 0.0, 0.0},
-		{"large number", 123.456, 123.46}, // rounds 5 up
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			result := twoDecimals(tt.input)
-			assert.Equal(t, tt.expected, result)
-		})
-	}
-}
-
-func TestBytesToMegabytes(t *testing.T) {
-	tests := []struct {
-		name     string
-		input    float64
-		expected float64
-	}{
-		{"1 MB", 1048576, 1.0},
-		{"512 KB", 524288, 0.5},
-		{"zero", 0, 0},
-		{"large value", 1073741824, 1024}, // 1 GB = 1024 MB
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			result := bytesToMegabytes(tt.input)
-			assert.Equal(t, tt.expected, result)
-		})
-	}
-}
-
 func TestInitializeCpuTracking(t *testing.T) {
 	dm := &dockerManager{
 		lastCpuContainer: make(map[uint16]map[string]uint64),
@@ -905,11 +892,11 @@ func TestContainerStatsEndToEndWithRealData(t *testing.T) {
 	updateContainerStatsValues(testStats, cpuPct, usedMemory, 1000000, 500000, testTime)

 	assert.Equal(t, cpuPct, testStats.Cpu)
-	assert.Equal(t, bytesToMegabytes(float64(usedMemory)), testStats.Mem)
+	assert.Equal(t, utils.BytesToMegabytes(float64(usedMemory)), testStats.Mem)
 	assert.Equal(t, [2]uint64{1000000, 500000}, testStats.Bandwidth)
 	// Deprecated fields still populated for backward compatibility with older hubs
-	assert.Equal(t, bytesToMegabytes(1000000), testStats.NetworkSent)
-	assert.Equal(t, bytesToMegabytes(500000), testStats.NetworkRecv)
+	assert.Equal(t, utils.BytesToMegabytes(1000000), testStats.NetworkSent)
+	assert.Equal(t, utils.BytesToMegabytes(500000), testStats.NetworkRecv)
 	assert.Equal(t, testTime, testStats.PrevReadTime)
 }

@@ -1170,6 +1157,18 @@ func TestParseDockerStatus(t *testing.T) {
 			expectedStatus: "",
 			expectedHealth: container.DockerHealthNone,
 		},
+		{
+			name:           "status health with health: prefix",
+			input:          "Up 5 minutes (health: starting)",
+			expectedStatus: "Up 5 minutes",
+			expectedHealth: container.DockerHealthStarting,
+		},
+		{
+			name:           "status health with health status: prefix",
+			input:          "Up 10 minutes (health status: unhealthy)",
+			expectedStatus: "Up 10 minutes",
+			expectedHealth: container.DockerHealthUnhealthy,
+		},
 	}

 	for _, tt := range tests {
@@ -1181,6 +1180,84 @@ func TestParseDockerStatus(t *testing.T) {
 	}
 }

+func TestParseDockerHealthStatus(t *testing.T) {
+	tests := []struct {
+		input          string
+		expectedHealth container.DockerHealth
+		expectedOk     bool
+	}{
+		{"healthy", container.DockerHealthHealthy, true},
+		{"unhealthy", container.DockerHealthUnhealthy, true},
+		{"starting", container.DockerHealthStarting, true},
+		{"none", container.DockerHealthNone, true},
+		{" Healthy ", container.DockerHealthHealthy, true},
+		{"unknown", container.DockerHealthNone, false},
+		{"", container.DockerHealthNone, false},
+	}
+	for _, tt := range tests {
+		t.Run(tt.input, func(t *testing.T) {
+			health, ok := parseDockerHealthStatus(tt.input)
+			assert.Equal(t, tt.expectedHealth, health)
+			assert.Equal(t, tt.expectedOk, ok)
+		})
+	}
+}
+
+func TestUpdateContainerStatsUsesPodmanInspectHealthFallback(t *testing.T) {
+	var requestedPaths []string
+	dm := &dockerManager{
+		client: &http.Client{Transport: roundTripFunc(func(req *http.Request) (*http.Response, error) {
+			requestedPaths = append(requestedPaths, req.URL.EscapedPath())
+			switch req.URL.EscapedPath() {
+			case "/containers/0123456789ab/stats":
+				return &http.Response{
+					StatusCode: http.StatusOK,
+					Status:     "200 OK",
+					Header:     make(http.Header),
+					Body: io.NopCloser(strings.NewReader(`{
+						"read":"2026-03-15T21:26:59Z",
+						"cpu_stats":{"cpu_usage":{"total_usage":1000},"system_cpu_usage":2000},
+						"memory_stats":{"usage":1048576,"stats":{"inactive_file":262144}},
+						"networks":{"eth0":{"rx_bytes":0,"tx_bytes":0}}
+					}`)),
+					Request: req,
+				}, nil
+			case "/containers/0123456789ab/json":
+				return &http.Response{
+					StatusCode: http.StatusOK,
+					Status:     "200 OK",
+					Header:     make(http.Header),
+					Body:       io.NopCloser(strings.NewReader(`{"State":{"Health":{"Status":"healthy"}}}`)),
+					Request:    req,
+				}, nil
+			default:
+				return nil, fmt.Errorf("unexpected path: %s", req.URL.EscapedPath())
+			}
+		})},
+		containerStatsMap:   make(map[string]*container.Stats),
+		apiStats:            &container.ApiStats{},
+		usingPodman:         true,
+		lastCpuContainer:    make(map[uint16]map[string]uint64),
+		lastCpuSystem:       make(map[uint16]map[string]uint64),
+		lastCpuReadTime:     make(map[uint16]map[string]time.Time),
+		networkSentTrackers: make(map[uint16]*deltatracker.DeltaTracker[string, uint64]),
+		networkRecvTrackers: make(map[uint16]*deltatracker.DeltaTracker[string, uint64]),
+	}
+
+	ctr := &container.ApiInfo{
+		IdShort: "0123456789ab",
+		Names:   []string{"/beszel"},
+		Status:  "Up 2 minutes",
+		Image:   "beszel:latest",
+	}
+
+	err := dm.updateContainerStats(ctr, defaultCacheTimeMs)
+	require.NoError(t, err)
+	assert.Equal(t, []string{"/containers/0123456789ab/stats", "/containers/0123456789ab/json"}, requestedPaths)
+	assert.Equal(t, container.DockerHealthHealthy, dm.containerStatsMap[ctr.IdShort].Health)
+	assert.Equal(t, "Up 2 minutes", dm.containerStatsMap[ctr.IdShort].Status)
+}
+
 func TestConstantsAndUtilityFunctions(t *testing.T) {
 	// Test constants are properly defined
 	assert.Equal(t, uint16(60000), defaultCacheTimeMs)
@@ -1190,13 +1267,13 @@ func TestConstantsAndUtilityFunctions(t *testing.T) {
 	assert.Equal(t, 5*1024*1024, maxTotalLogSize)               // 5MB

 	// Test utility functions
-	assert.Equal(t, 1.5, twoDecimals(1.499))
-	assert.Equal(t, 1.5, twoDecimals(1.5))
-	assert.Equal(t, 1.5, twoDecimals(1.501))
+	assert.Equal(t, 1.5, utils.TwoDecimals(1.499))
+	assert.Equal(t, 1.5, utils.TwoDecimals(1.5))
+	assert.Equal(t, 1.5, utils.TwoDecimals(1.501))

-	assert.Equal(t, 1.0, bytesToMegabytes(1048576)) // 1 MB
-	assert.Equal(t, 0.5, bytesToMegabytes(524288))  // 512 KB
-	assert.Equal(t, 0.0, bytesToMegabytes(0))
+	assert.Equal(t, 1.0, utils.BytesToMegabytes(1048576)) // 1 MB
+	assert.Equal(t, 0.5, utils.BytesToMegabytes(524288))  // 512 KB
+	assert.Equal(t, 0.0, utils.BytesToMegabytes(0))
 }

 func TestDecodeDockerLogStream(t *testing.T) {
@@ -1496,3 +1573,99 @@ func TestAnsiEscapePattern(t *testing.T) {
 		})
 	}
 }
+
+func TestConvertContainerPortsToString(t *testing.T) {
+	type port = struct {
+		PublicPort uint16
+		IP         string
+	}
+	tests := []struct {
+		name     string
+		ports    []port
+		expected string
+	}{
+		{
+			name:     "empty ports",
+			ports:    nil,
+			expected: "",
+		},
+		{
+			name: "single port",
+			ports: []port{
+				{PublicPort: 80, IP: "0.0.0.0"},
+			},
+			expected: "80",
+		},
+		{
+			name: "single port with non-default IP",
+			ports: []port{
+				{PublicPort: 80, IP: "1.2.3.4"},
+			},
+			expected: "1.2.3.4:80",
+		},
+		{
+			name: "ipv6 default ip",
+			ports: []port{
+				{PublicPort: 80, IP: "::"},
+			},
+			expected: "80",
+		},
+		{
+			name: "zero PublicPort is skipped",
+			ports: []port{
+				{PublicPort: 0, IP: "0.0.0.0"},
+				{PublicPort: 80, IP: "0.0.0.0"},
+			},
+			expected: "80",
+		},
+		{
+			name: "ports sorted ascending by PublicPort",
+			ports: []port{
+				{PublicPort: 443, IP: "0.0.0.0"},
+				{PublicPort: 80, IP: "0.0.0.0"},
+				{PublicPort: 8080, IP: "0.0.0.0"},
+			},
+			expected: "80, 443, 8080",
+		},
+		{
+			name: "duplicates are deduplicated",
+			ports: []port{
+				{PublicPort: 80, IP: "0.0.0.0"},
+				{PublicPort: 80, IP: "0.0.0.0"},
+				{PublicPort: 443, IP: "0.0.0.0"},
+			},
+			expected: "80, 443",
+		},
+		{
+			name: "multiple ports with different IPs",
+			ports: []port{
+				{PublicPort: 80, IP: "0.0.0.0"},
+				{PublicPort: 443, IP: "1.2.3.4"},
+			},
+			expected: "80, 1.2.3.4:443",
+		},
+		{
+			name: "ports slice is nilled after call",
+			ports: []port{
+				{PublicPort: 8080, IP: "0.0.0.0"},
+			},
+			expected: "8080",
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			ctr := &container.ApiInfo{}
+			for _, p := range tt.ports {
+				ctr.Ports = append(ctr.Ports, struct {
+					PublicPort uint16
+					IP         string
+				}{PublicPort: p.PublicPort, IP: p.IP})
+			}
+			result := convertContainerPortsToString(ctr)
+			assert.Equal(t, tt.expected, result)
+			// Ports slice must be cleared to prevent bleed-over into the next response
+			assert.Nil(t, ctr.Ports, "ctr.Ports should be nil after formatContainerPorts")
+		})
+	}
+}
--- a/agent/emmc_linux.go
+++ b/agent/emmc_linux.go
@@ -8,6 +8,7 @@ import (
 	"strconv"
 	"strings"

+	"github.com/henrygd/beszel/agent/utils"
 	"github.com/henrygd/beszel/internal/entities/smart"
 )

@@ -141,9 +142,9 @@ func readEmmcHealth(blockName string) (emmcHealth, bool) {
 	out.lifeA = lifeA
 	out.lifeB = lifeB

-	out.model = readStringFile(filepath.Join(deviceDir, "name"))
-	out.serial = readStringFile(filepath.Join(deviceDir, "serial"))
-	out.revision = readStringFile(filepath.Join(deviceDir, "prv"))
+	out.model = utils.ReadStringFile(filepath.Join(deviceDir, "name"))
+	out.serial = utils.ReadStringFile(filepath.Join(deviceDir, "serial"))
+	out.revision = utils.ReadStringFile(filepath.Join(deviceDir, "prv"))

 	if capBytes, ok := readBlockCapacityBytes(blockName); ok {
 		out.capacity = capBytes
@@ -153,7 +154,7 @@ func readEmmcHealth(blockName string) (emmcHealth, bool) {
 }

 func readLifeTime(deviceDir string) (uint8, uint8, bool) {
-	if content, ok := readStringFileOK(filepath.Join(deviceDir, "life_time")); ok {
+	if content, ok := utils.ReadStringFileOK(filepath.Join(deviceDir, "life_time")); ok {
 		a, b, ok := parseHexBytePair(content)
 		return a, b, ok
 	}
@@ -170,7 +171,7 @@ func readBlockCapacityBytes(blockName string) (uint64, bool) {
 	sizePath := filepath.Join(emmcSysfsRoot, "class", "block", blockName, "size")
 	lbsPath := filepath.Join(emmcSysfsRoot, "class", "block", blockName, "queue", "logical_block_size")

-	sizeStr, ok := readStringFileOK(sizePath)
+	sizeStr, ok := utils.ReadStringFileOK(sizePath)
 	if !ok {
 		return 0, false
 	}
@@ -179,7 +180,7 @@ func readBlockCapacityBytes(blockName string) (uint64, bool) {
 		return 0, false
 	}

-	lbsStr, ok := readStringFileOK(lbsPath)
+	lbsStr, ok := utils.ReadStringFileOK(lbsPath)
 	logicalBlockSize := uint64(512)
 	if ok {
 		if parsed, err := strconv.ParseUint(lbsStr, 10, 64); err == nil && parsed > 0 {
@@ -191,7 +192,7 @@ func readBlockCapacityBytes(blockName string) (uint64, bool) {
 }

 func readHexByteFile(path string) (uint8, bool) {
-	content, ok := readStringFileOK(path)
+	content, ok := utils.ReadStringFileOK(path)
 	if !ok {
 		return 0, false
 	}
@@ -199,19 +200,6 @@ func readHexByteFile(path string) (uint8, bool) {
 	return b, ok
 }

-func readStringFile(path string) string {
-	content, _ := readStringFileOK(path)
-	return content
-}
-
-func readStringFileOK(path string) (string, bool) {
-	b, err := os.ReadFile(path)
-	if err != nil {
-		return "", false
-	}
-	return strings.TrimSpace(string(b)), true
-}
-
 func hasEmmcHealthFiles(deviceDir string) bool {
 	entries, err := os.ReadDir(deviceDir)
 	if err != nil {
--- a/agent/gpu.go
+++ b/agent/gpu.go
@@ -15,6 +15,7 @@ import (
 	"sync"
 	"time"

+	"github.com/henrygd/beszel/agent/utils"
 	"github.com/henrygd/beszel/internal/entities/system"
 )

@@ -291,8 +292,8 @@ func (gm *GPUManager) parseAmdData(output []byte) bool {
 		}
 		gpu := gm.GpuDataMap[id]
 		gpu.Temperature, _ = strconv.ParseFloat(v.Temperature, 64)
-		gpu.MemoryUsed = bytesToMegabytes(memoryUsage)
-		gpu.MemoryTotal = bytesToMegabytes(totalMemory)
+		gpu.MemoryUsed = utils.BytesToMegabytes(memoryUsage)
+		gpu.MemoryTotal = utils.BytesToMegabytes(totalMemory)
 		gpu.Usage += usage
 		gpu.Power += power
 		gpu.Count++
@@ -366,16 +367,16 @@ func (gm *GPUManager) calculateGPUAverage(id string, gpu *system.GPUData, cacheK
 	gpuAvg := *gpu
 	deltaUsage, deltaPower, deltaPowerPkg := gm.calculateDeltas(gpu, lastSnapshot)

-	gpuAvg.Power = twoDecimals(deltaPower / float64(deltaCount))
+	gpuAvg.Power = utils.TwoDecimals(deltaPower / float64(deltaCount))

 	if gpu.Engines != nil {
 		// make fresh map for averaged engine metrics to avoid mutating
 		// the accumulator map stored in gm.GpuDataMap
 		gpuAvg.Engines = make(map[string]float64, len(gpu.Engines))
 		gpuAvg.Usage = gm.calculateIntelGPUUsage(&gpuAvg, gpu, lastSnapshot, deltaCount)
-		gpuAvg.PowerPkg = twoDecimals(deltaPowerPkg / float64(deltaCount))
+		gpuAvg.PowerPkg = utils.TwoDecimals(deltaPowerPkg / float64(deltaCount))
 	} else {
-		gpuAvg.Usage = twoDecimals(deltaUsage / float64(deltaCount))
+		gpuAvg.Usage = utils.TwoDecimals(deltaUsage / float64(deltaCount))
 	}

 	gm.lastAvgData[id] = gpuAvg
@@ -410,17 +411,17 @@ func (gm *GPUManager) calculateIntelGPUUsage(gpuAvg, gpu *system.GPUData, lastSn
 		} else {
 			deltaEngine = engine
 		}
-		gpuAvg.Engines[name] = twoDecimals(deltaEngine / float64(deltaCount))
+		gpuAvg.Engines[name] = utils.TwoDecimals(deltaEngine / float64(deltaCount))
 		maxEngineUsage = max(maxEngineUsage, deltaEngine/float64(deltaCount))
 	}
-	return twoDecimals(maxEngineUsage)
+	return utils.TwoDecimals(maxEngineUsage)
 }

 // updateInstantaneousValues updates values that should reflect current state, not averages
 func (gm *GPUManager) updateInstantaneousValues(gpuAvg *system.GPUData, gpu *system.GPUData) {
-	gpuAvg.Temperature = twoDecimals(gpu.Temperature)
-	gpuAvg.MemoryUsed = twoDecimals(gpu.MemoryUsed)
-	gpuAvg.MemoryTotal = twoDecimals(gpu.MemoryTotal)
+	gpuAvg.Temperature = utils.TwoDecimals(gpu.Temperature)
+	gpuAvg.MemoryUsed = utils.TwoDecimals(gpu.MemoryUsed)
+	gpuAvg.MemoryTotal = utils.TwoDecimals(gpu.MemoryTotal)
 }

 // storeSnapshot saves the current GPU state for this cache key
@@ -687,7 +688,7 @@ func (gm *GPUManager) resolveLegacyCollectorPriority(caps gpuCapabilities) []col
 	priorities := make([]collectorSource, 0, 4)

 	if caps.hasNvidiaSmi && !caps.hasTegrastats {
-		if nvml, _ := GetEnv("NVML"); nvml == "true" {
+		if nvml, _ := utils.GetEnv("NVML"); nvml == "true" {
 			priorities = append(priorities, collectorSourceNVML, collectorSourceNvidiaSMI)
 		} else {
 			priorities = append(priorities, collectorSourceNvidiaSMI)
@@ -695,7 +696,7 @@ func (gm *GPUManager) resolveLegacyCollectorPriority(caps gpuCapabilities) []col
 	}

 	if caps.hasRocmSmi {
-		if val, _ := GetEnv("AMD_SYSFS"); val == "true" {
+		if val, _ := utils.GetEnv("AMD_SYSFS"); val == "true" {
 			priorities = append(priorities, collectorSourceAmdSysfs)
 		} else {
 			priorities = append(priorities, collectorSourceRocmSMI)
@@ -728,7 +729,7 @@ func (gm *GPUManager) resolveLegacyCollectorPriority(caps gpuCapabilities) []col

 // NewGPUManager creates and initializes a new GPUManager
 func NewGPUManager() (*GPUManager, error) {
-	if skipGPU, _ := GetEnv("SKIP_GPU"); skipGPU == "true" {
+	if skipGPU, _ := utils.GetEnv("SKIP_GPU"); skipGPU == "true" {
 		return nil, nil
 	}
 	var gm GPUManager
@@ -745,7 +746,7 @@ func NewGPUManager() (*GPUManager, error) {
 	}

 	// if GPU_COLLECTOR is set, start user-defined collectors.
-	if collectorConfig, ok := GetEnv("GPU_COLLECTOR"); ok && strings.TrimSpace(collectorConfig) != "" {
+	if collectorConfig, ok := utils.GetEnv("GPU_COLLECTOR"); ok && strings.TrimSpace(collectorConfig) != "" {
 		priorities := parseCollectorPriority(collectorConfig)
 		if gm.startCollectorsByPriority(priorities, caps) == 0 {
 			return nil, fmt.Errorf("no configured GPU collectors are available")
--- a/agent/gpu_amd_linux.go
+++ b/agent/gpu_amd_linux.go
@@ -13,6 +13,7 @@ import (
 	"sync"
 	"time"

+	"github.com/henrygd/beszel/agent/utils"
 	"github.com/henrygd/beszel/internal/entities/system"
 )

@@ -32,8 +33,8 @@ func (gm *GPUManager) hasAmdSysfs() bool {
 		return false
 	}
 	for _, vendorPath := range cards {
-		vendor, err := os.ReadFile(vendorPath)
-		if err == nil && strings.TrimSpace(string(vendor)) == "0x1002" {
+		vendor, err := utils.ReadStringFileLimited(vendorPath, 64)
+		if err == nil && vendor == "0x1002" {
 			return true
 		}
 	}
@@ -87,12 +88,11 @@ func (gm *GPUManager) collectAmdStats() error {

 // isAmdGpu checks whether a DRM card path belongs to AMD vendor ID 0x1002.
 func isAmdGpu(cardPath string) bool {
-	vendorPath := filepath.Join(cardPath, "device/vendor")
-	vendor, err := os.ReadFile(vendorPath)
+	vendor, err := utils.ReadStringFileLimited(filepath.Join(cardPath, "device/vendor"), 64)
 	if err != nil {
 		return false
 	}
-	return strings.TrimSpace(string(vendor)) == "0x1002"
+	return vendor == "0x1002"
 }

 // updateAmdGpuData reads GPU metrics from sysfs and updates the GPU data map.
@@ -144,8 +144,8 @@ func (gm *GPUManager) updateAmdGpuData(cardPath string) bool {
 	if usageErr == nil {
 		gpu.Usage += usage
 	}
-	gpu.MemoryUsed = bytesToMegabytes(memUsed)
-	gpu.MemoryTotal = bytesToMegabytes(memTotal)
+	gpu.MemoryUsed = utils.BytesToMegabytes(memUsed)
+	gpu.MemoryTotal = utils.BytesToMegabytes(memTotal)
 	gpu.Temperature = temp
 	gpu.Power += power
 	gpu.Count++
@@ -154,11 +154,11 @@ func (gm *GPUManager) updateAmdGpuData(cardPath string) bool {

 // readSysfsFloat reads and parses a numeric value from a sysfs file.
 func readSysfsFloat(path string) (float64, error) {
-	val, err := os.ReadFile(path)
+	val, err := utils.ReadStringFileLimited(path, 64)
 	if err != nil {
 		return 0, err
 	}
-	return strconv.ParseFloat(strings.TrimSpace(string(val)), 64)
+	return strconv.ParseFloat(val, 64)
 }

 // normalizeHexID normalizes hex IDs by trimming spaces, lowercasing, and dropping 0x.
@@ -273,16 +273,16 @@ func cacheMissingAmdgpuName(deviceID, revisionID string) {
 // Falls back to showing the raw device ID if not found in the lookup table.
 func getAmdGpuName(devicePath string) string {
 	// Try product_name first (works for some enterprise GPUs)
-	if prod, err := os.ReadFile(filepath.Join(devicePath, "product_name")); err == nil {
-		return strings.TrimSpace(string(prod))
+	if prod, err := utils.ReadStringFileLimited(filepath.Join(devicePath, "product_name"), 128); err == nil {
+		return prod
 	}

 	// Read PCI device ID and look it up
-	if deviceID, err := os.ReadFile(filepath.Join(devicePath, "device")); err == nil {
-		id := normalizeHexID(string(deviceID))
+	if deviceID, err := utils.ReadStringFileLimited(filepath.Join(devicePath, "device"), 64); err == nil {
+		id := normalizeHexID(deviceID)
 		revision := ""
-		if revBytes, revErr := os.ReadFile(filepath.Join(devicePath, "revision")); revErr == nil {
-			revision = normalizeHexID(string(revBytes))
+		if rev, revErr := utils.ReadStringFileLimited(filepath.Join(devicePath, "revision"), 64); revErr == nil {
+			revision = normalizeHexID(rev)
 		}

 		if name, found, done := getCachedAmdgpuName(id, revision); found {
--- a/agent/gpu_amd_linux_test.go
+++ b/agent/gpu_amd_linux_test.go
@@ -7,6 +7,7 @@ import (
 	"path/filepath"
 	"testing"

+	"github.com/henrygd/beszel/agent/utils"
 	"github.com/henrygd/beszel/internal/entities/system"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
@@ -128,14 +129,14 @@ func TestUpdateAmdGpuDataWithFakeSysfs(t *testing.T) {
 		{
 			name:            "sums vram and gtt when gtt is present",
 			writeGTT:        true,
-			wantMemoryUsed:  bytesToMegabytes(1073741824 + 536870912),
-			wantMemoryTotal: bytesToMegabytes(2147483648 + 4294967296),
+			wantMemoryUsed:  utils.BytesToMegabytes(1073741824 + 536870912),
+			wantMemoryTotal: utils.BytesToMegabytes(2147483648 + 4294967296),
 		},
 		{
 			name:            "falls back to vram when gtt is missing",
 			writeGTT:        false,
-			wantMemoryUsed:  bytesToMegabytes(1073741824),
-			wantMemoryTotal: bytesToMegabytes(2147483648),
+			wantMemoryUsed:  utils.BytesToMegabytes(1073741824),
+			wantMemoryTotal: utils.BytesToMegabytes(2147483648),
 		},
 	}

--- a/agent/gpu_intel.go
+++ b/agent/gpu_intel.go
@@ -7,6 +7,7 @@ import (
 	"strconv"
 	"strings"

+	"github.com/henrygd/beszel/agent/utils"
 	"github.com/henrygd/beszel/internal/entities/system"
 )

@@ -52,7 +53,7 @@ func (gm *GPUManager) updateIntelFromStats(sample *intelGpuStats) bool {
 func (gm *GPUManager) collectIntelStats() (err error) {
 	// Build command arguments, optionally selecting a device via -d
 	args := []string{"-s", intelGpuStatsInterval, "-l"}
-	if dev, ok := GetEnv("INTEL_GPU_DEVICE"); ok && dev != "" {
+	if dev, ok := utils.GetEnv("INTEL_GPU_DEVICE"); ok && dev != "" {
 		args = append(args, "-d", dev)
 	}
 	cmd := exec.Command(intelGpuStatsCmd, args...)
--- a/agent/gpu_nvtop.go
+++ b/agent/gpu_nvtop.go
@@ -9,6 +9,7 @@ import (
 	"strings"
 	"time"

+	"github.com/henrygd/beszel/agent/utils"
 	"github.com/henrygd/beszel/internal/entities/system"
 )

@@ -80,10 +81,10 @@ func (gm *GPUManager) updateNvtopSnapshots(snapshots []nvtopSnapshot) bool {
 			gpu.Temperature = parseNvtopNumber(*sample.Temp)
 		}
 		if sample.MemUsed != nil {
-			gpu.MemoryUsed = bytesToMegabytes(parseNvtopNumber(*sample.MemUsed))
+			gpu.MemoryUsed = utils.BytesToMegabytes(parseNvtopNumber(*sample.MemUsed))
 		}
 		if sample.MemTotal != nil {
-			gpu.MemoryTotal = bytesToMegabytes(parseNvtopNumber(*sample.MemTotal))
+			gpu.MemoryTotal = utils.BytesToMegabytes(parseNvtopNumber(*sample.MemTotal))
 		}
 		if sample.GpuUtil != nil {
 			gpu.Usage += parseNvtopNumber(*sample.GpuUtil)
--- a/agent/gpu_test.go
+++ b/agent/gpu_test.go
@@ -10,6 +10,7 @@ import (
 	"testing"
 	"time"

+	"github.com/henrygd/beszel/agent/utils"
 	"github.com/henrygd/beszel/internal/entities/system"

 	"github.com/stretchr/testify/assert"
@@ -265,8 +266,8 @@ func TestParseNvtopData(t *testing.T) {
 	assert.Equal(t, 48.0, g0.Temperature)
 	assert.Equal(t, 5.0, g0.Usage)
 	assert.Equal(t, 13.0, g0.Power)
-	assert.Equal(t, bytesToMegabytes(349372416), g0.MemoryUsed)
-	assert.Equal(t, bytesToMegabytes(4294967296), g0.MemoryTotal)
+	assert.Equal(t, utils.BytesToMegabytes(349372416), g0.MemoryUsed)
+	assert.Equal(t, utils.BytesToMegabytes(4294967296), g0.MemoryTotal)
 	assert.Equal(t, 1.0, g0.Count)

 	g1, ok := gm.GpuDataMap["n1"]
@@ -275,8 +276,8 @@ func TestParseNvtopData(t *testing.T) {
 	assert.Equal(t, 48.0, g1.Temperature)
 	assert.Equal(t, 12.0, g1.Usage)
 	assert.Equal(t, 9.0, g1.Power)
-	assert.Equal(t, bytesToMegabytes(1213784064), g1.MemoryUsed)
-	assert.Equal(t, bytesToMegabytes(16929173504), g1.MemoryTotal)
+	assert.Equal(t, utils.BytesToMegabytes(1213784064), g1.MemoryUsed)
+	assert.Equal(t, utils.BytesToMegabytes(16929173504), g1.MemoryTotal)
 	assert.Equal(t, 1.0, g1.Count)
 }

@@ -1082,8 +1083,6 @@ func TestCalculateGPUAverage(t *testing.T) {

 func TestGPUCapabilitiesAndLegacyPriority(t *testing.T) {
 	// Save original PATH
-	origPath := os.Getenv("PATH")
-	defer os.Setenv("PATH", origPath)
 	hasAmdSysfs := (&GPUManager{}).hasAmdSysfs()

 	tests := []struct {
@@ -1177,7 +1176,7 @@ echo "[]"`
 		{
 			name: "no gpu tools available",
 			setupCommands: func(_ string) error {
-				os.Setenv("PATH", "")
+				t.Setenv("PATH", "")
 				return nil
 			},
 			wantErr: true,
@@ -1187,7 +1186,7 @@ echo "[]"`
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
 			tempDir := t.TempDir()
-			os.Setenv("PATH", tempDir)
+			t.Setenv("PATH", tempDir)
 			if err := tt.setupCommands(tempDir); err != nil {
 				t.Fatal(err)
 			}
@@ -1233,13 +1232,9 @@ echo "[]"`
 }

 func TestCollectorStartHelpers(t *testing.T) {
-	// Save original PATH
-	origPath := os.Getenv("PATH")
-	defer os.Setenv("PATH", origPath)
-
 	// Set up temp dir with the commands
 	dir := t.TempDir()
-	os.Setenv("PATH", dir)
+	t.Setenv("PATH", dir)

 	tests := []struct {
 		name     string
@@ -1369,11 +1364,8 @@ echo '[{"device_name":"NVIDIA Test GPU","temp":"52C","power_draw":"31W","gpu_uti
 }

 func TestNewGPUManagerPriorityNvtopFallback(t *testing.T) {
-	origPath := os.Getenv("PATH")
-	defer os.Setenv("PATH", origPath)
-
 	dir := t.TempDir()
-	os.Setenv("PATH", dir)
+	t.Setenv("PATH", dir)
 	t.Setenv("BESZEL_AGENT_GPU_COLLECTOR", "nvtop,nvidia-smi")

 	nvtopPath := filepath.Join(dir, "nvtop")
@@ -1398,11 +1390,8 @@ echo "0, NVIDIA Priority GPU, 45, 512, 2048, 12, 25"`
 }

 func TestNewGPUManagerPriorityMixedCollectors(t *testing.T) {
-	origPath := os.Getenv("PATH")
-	defer os.Setenv("PATH", origPath)
-
 	dir := t.TempDir()
-	os.Setenv("PATH", dir)
+	t.Setenv("PATH", dir)
 	t.Setenv("BESZEL_AGENT_GPU_COLLECTOR", "intel_gpu_top,rocm-smi")

 	intelPath := filepath.Join(dir, "intel_gpu_top")
@@ -1432,11 +1421,8 @@ echo '{"card0": {"Temperature (Sensor edge) (C)": "49.0", "Current Socket Graphi
 }

 func TestNewGPUManagerPriorityNvmlFallbackToNvidiaSmi(t *testing.T) {
-	origPath := os.Getenv("PATH")
-	defer os.Setenv("PATH", origPath)
-
 	dir := t.TempDir()
-	os.Setenv("PATH", dir)
+	t.Setenv("PATH", dir)
 	t.Setenv("BESZEL_AGENT_GPU_COLLECTOR", "nvml,nvidia-smi")

 	nvidiaPath := filepath.Join(dir, "nvidia-smi")
@@ -1455,11 +1441,8 @@ echo "0, NVIDIA Fallback GPU, 41, 256, 1024, 8, 14"`
 }

 func TestNewGPUManagerConfiguredCollectorsMustStart(t *testing.T) {
-	origPath := os.Getenv("PATH")
-	defer os.Setenv("PATH", origPath)
-
 	dir := t.TempDir()
-	os.Setenv("PATH", dir)
+	t.Setenv("PATH", dir)

 	t.Run("configured valid collector unavailable", func(t *testing.T) {
 		t.Setenv("BESZEL_AGENT_GPU_COLLECTOR", "nvidia-smi")
@@ -1479,11 +1462,8 @@ func TestNewGPUManagerConfiguredCollectorsMustStart(t *testing.T) {
 }

 func TestNewGPUManagerJetsonIgnoresCollectorConfig(t *testing.T) {
-	origPath := os.Getenv("PATH")
-	defer os.Setenv("PATH", origPath)
-
 	dir := t.TempDir()
-	os.Setenv("PATH", dir)
+	t.Setenv("PATH", dir)
 	t.Setenv("BESZEL_AGENT_GPU_COLLECTOR", "nvidia-smi")

 	tegraPath := filepath.Join(dir, "tegrastats")
@@ -1718,12 +1698,8 @@ func TestIntelUpdateFromStats(t *testing.T) {
 }

 func TestIntelCollectorStreaming(t *testing.T) {
-	// Save and override PATH
-	origPath := os.Getenv("PATH")
-	defer os.Setenv("PATH", origPath)
-
 	dir := t.TempDir()
-	os.Setenv("PATH", dir)
+	t.Setenv("PATH", dir)

 	// Create a fake intel_gpu_top that prints -l format with four samples (first will be skipped) and exits
 	scriptPath := filepath.Join(dir, "intel_gpu_top")
--- a/agent/mdraid_linux.go
+++ b/agent/mdraid_linux.go
@@ -0,0 +1,233 @@
+//go:build linux
+
+package agent
+
+import (
+	"fmt"
+	"os"
+	"path/filepath"
+	"strconv"
+	"strings"
+
+	"github.com/henrygd/beszel/agent/utils"
+	"github.com/henrygd/beszel/internal/entities/smart"
+)
+
+// mdraidSysfsRoot is a test hook; production value is "/sys".
+var mdraidSysfsRoot = "/sys"
+
+type mdraidHealth struct {
+	level         string
+	arrayState    string
+	degraded      uint64
+	raidDisks     uint64
+	syncAction    string
+	syncCompleted string
+	syncSpeed     string
+	mismatchCnt   uint64
+	capacity      uint64
+}
+
+// scanMdraidDevices discovers Linux md arrays exposed in sysfs.
+func scanMdraidDevices() []*DeviceInfo {
+	blockDir := filepath.Join(mdraidSysfsRoot, "block")
+	entries, err := os.ReadDir(blockDir)
+	if err != nil {
+		return nil
+	}
+
+	devices := make([]*DeviceInfo, 0, 2)
+	for _, ent := range entries {
+		name := ent.Name()
+		if !isMdraidBlockName(name) {
+			continue
+		}
+		mdDir := filepath.Join(blockDir, name, "md")
+		if !utils.FileExists(filepath.Join(mdDir, "array_state")) {
+			continue
+		}
+
+		devPath := filepath.Join("/dev", name)
+		devices = append(devices, &DeviceInfo{
+			Name:     devPath,
+			Type:     "mdraid",
+			InfoName: devPath + " [mdraid]",
+			Protocol: "MD",
+		})
+	}
+
+	return devices
+}
+
+// collectMdraidHealth reads mdraid health and stores it in SmartDataMap.
+func (sm *SmartManager) collectMdraidHealth(deviceInfo *DeviceInfo) (bool, error) {
+	if deviceInfo == nil || deviceInfo.Name == "" {
+		return false, nil
+	}
+
+	base := filepath.Base(deviceInfo.Name)
+	if !isMdraidBlockName(base) && !strings.EqualFold(deviceInfo.Type, "mdraid") {
+		return false, nil
+	}
+
+	health, ok := readMdraidHealth(base)
+	if !ok {
+		return false, nil
+	}
+
+	deviceInfo.Type = "mdraid"
+	key := fmt.Sprintf("mdraid:%s", base)
+	status := mdraidSmartStatus(health)
+
+	attrs := make([]*smart.SmartAttribute, 0, 10)
+	if health.arrayState != "" {
+		attrs = append(attrs, &smart.SmartAttribute{Name: "ArrayState", RawString: health.arrayState})
+	}
+	if health.level != "" {
+		attrs = append(attrs, &smart.SmartAttribute{Name: "RaidLevel", RawString: health.level})
+	}
+	if health.raidDisks > 0 {
+		attrs = append(attrs, &smart.SmartAttribute{Name: "RaidDisks", RawValue: health.raidDisks})
+	}
+	if health.degraded > 0 {
+		attrs = append(attrs, &smart.SmartAttribute{Name: "Degraded", RawValue: health.degraded})
+	}
+	if health.syncAction != "" {
+		attrs = append(attrs, &smart.SmartAttribute{Name: "SyncAction", RawString: health.syncAction})
+	}
+	if health.syncCompleted != "" {
+		attrs = append(attrs, &smart.SmartAttribute{Name: "SyncCompleted", RawString: health.syncCompleted})
+	}
+	if health.syncSpeed != "" {
+		attrs = append(attrs, &smart.SmartAttribute{Name: "SyncSpeed", RawString: health.syncSpeed})
+	}
+	if health.mismatchCnt > 0 {
+		attrs = append(attrs, &smart.SmartAttribute{Name: "MismatchCount", RawValue: health.mismatchCnt})
+	}
+
+	sm.Lock()
+	defer sm.Unlock()
+
+	if _, exists := sm.SmartDataMap[key]; !exists {
+		sm.SmartDataMap[key] = &smart.SmartData{}
+	}
+
+	data := sm.SmartDataMap[key]
+	data.ModelName = "Linux MD RAID"
+	if health.level != "" {
+		data.ModelName = "Linux MD RAID (" + health.level + ")"
+	}
+	data.Capacity = health.capacity
+	data.SmartStatus = status
+	data.DiskName = filepath.Join("/dev", base)
+	data.DiskType = "mdraid"
+	data.Attributes = attrs
+
+	return true, nil
+}
+
+// readMdraidHealth reads md array health fields from sysfs.
+func readMdraidHealth(blockName string) (mdraidHealth, bool) {
+	var out mdraidHealth
+
+	if !isMdraidBlockName(blockName) {
+		return out, false
+	}
+
+	mdDir := filepath.Join(mdraidSysfsRoot, "block", blockName, "md")
+	arrayState, okState := utils.ReadStringFileOK(filepath.Join(mdDir, "array_state"))
+	if !okState {
+		return out, false
+	}
+
+	out.arrayState = arrayState
+	out.level = utils.ReadStringFile(filepath.Join(mdDir, "level"))
+	out.syncAction = utils.ReadStringFile(filepath.Join(mdDir, "sync_action"))
+	out.syncCompleted = utils.ReadStringFile(filepath.Join(mdDir, "sync_completed"))
+	out.syncSpeed = utils.ReadStringFile(filepath.Join(mdDir, "sync_speed"))
+
+	if val, ok := utils.ReadUintFile(filepath.Join(mdDir, "raid_disks")); ok {
+		out.raidDisks = val
+	}
+	if val, ok := utils.ReadUintFile(filepath.Join(mdDir, "degraded")); ok {
+		out.degraded = val
+	}
+	if val, ok := utils.ReadUintFile(filepath.Join(mdDir, "mismatch_cnt")); ok {
+		out.mismatchCnt = val
+	}
+
+	if capBytes, ok := readMdraidBlockCapacityBytes(blockName, mdraidSysfsRoot); ok {
+		out.capacity = capBytes
+	}
+
+	return out, true
+}
+
+// mdraidSmartStatus maps md state/sync signals to a SMART-like status.
+func mdraidSmartStatus(health mdraidHealth) string {
+	state := strings.ToLower(strings.TrimSpace(health.arrayState))
+	switch state {
+	case "inactive", "faulty", "broken", "stopped":
+		return "FAILED"
+	}
+	// During rebuild/recovery, arrays are often temporarily degraded; report as
+	// warning instead of hard failure while synchronization is in progress.
+	syncAction := strings.ToLower(strings.TrimSpace(health.syncAction))
+	switch syncAction {
+	case "resync", "recover", "reshape":
+		return "WARNING"
+	}
+	if health.degraded > 0 {
+		return "FAILED"
+	}
+	switch syncAction {
+	case "check", "repair":
+		return "WARNING"
+	}
+	switch state {
+	case "clean", "active", "active-idle", "write-pending", "read-auto", "readonly":
+		return "PASSED"
+	}
+	return "UNKNOWN"
+}
+
+// isMdraidBlockName matches /dev/mdN-style block device names.
+func isMdraidBlockName(name string) bool {
+	if !strings.HasPrefix(name, "md") {
+		return false
+	}
+	suffix := strings.TrimPrefix(name, "md")
+	if suffix == "" {
+		return false
+	}
+	for _, c := range suffix {
+		if c < '0' || c > '9' {
+			return false
+		}
+	}
+	return true
+}
+
+// readMdraidBlockCapacityBytes converts block size metadata into bytes.
+func readMdraidBlockCapacityBytes(blockName, root string) (uint64, bool) {
+	sizePath := filepath.Join(root, "block", blockName, "size")
+	lbsPath := filepath.Join(root, "block", blockName, "queue", "logical_block_size")
+
+	sizeStr, ok := utils.ReadStringFileOK(sizePath)
+	if !ok {
+		return 0, false
+	}
+	sectors, err := strconv.ParseUint(sizeStr, 10, 64)
+	if err != nil || sectors == 0 {
+		return 0, false
+	}
+
+	logicalBlockSize := uint64(512)
+	if lbsStr, ok := utils.ReadStringFileOK(lbsPath); ok {
+		if parsed, err := strconv.ParseUint(lbsStr, 10, 64); err == nil && parsed > 0 {
+			logicalBlockSize = parsed
+		}
+	}
+
+	return sectors * logicalBlockSize, true
+}
--- a/agent/mdraid_linux_test.go
+++ b/agent/mdraid_linux_test.go
@@ -0,0 +1,103 @@
+//go:build linux
+
+package agent
+
+import (
+	"os"
+	"path/filepath"
+	"testing"
+
+	"github.com/henrygd/beszel/internal/entities/smart"
+)
+
+func TestMdraidMockSysfsScanAndCollect(t *testing.T) {
+	tmp := t.TempDir()
+	prev := mdraidSysfsRoot
+	mdraidSysfsRoot = tmp
+	t.Cleanup(func() { mdraidSysfsRoot = prev })
+
+	mdDir := filepath.Join(tmp, "block", "md0", "md")
+	queueDir := filepath.Join(tmp, "block", "md0", "queue")
+	if err := os.MkdirAll(mdDir, 0o755); err != nil {
+		t.Fatal(err)
+	}
+	if err := os.MkdirAll(queueDir, 0o755); err != nil {
+		t.Fatal(err)
+	}
+
+	write := func(path, content string) {
+		t.Helper()
+		if err := os.WriteFile(path, []byte(content), 0o644); err != nil {
+			t.Fatal(err)
+		}
+	}
+
+	write(filepath.Join(mdDir, "array_state"), "active\n")
+	write(filepath.Join(mdDir, "level"), "raid1\n")
+	write(filepath.Join(mdDir, "raid_disks"), "2\n")
+	write(filepath.Join(mdDir, "degraded"), "0\n")
+	write(filepath.Join(mdDir, "sync_action"), "resync\n")
+	write(filepath.Join(mdDir, "sync_completed"), "10%\n")
+	write(filepath.Join(mdDir, "sync_speed"), "100M\n")
+	write(filepath.Join(mdDir, "mismatch_cnt"), "0\n")
+	write(filepath.Join(queueDir, "logical_block_size"), "512\n")
+	write(filepath.Join(tmp, "block", "md0", "size"), "2048\n")
+
+	devs := scanMdraidDevices()
+	if len(devs) != 1 {
+		t.Fatalf("scanMdraidDevices() = %d devices, want 1", len(devs))
+	}
+	if devs[0].Name != "/dev/md0" || devs[0].Type != "mdraid" {
+		t.Fatalf("scanMdraidDevices()[0] = %+v, want Name=/dev/md0 Type=mdraid", devs[0])
+	}
+
+	sm := &SmartManager{SmartDataMap: map[string]*smart.SmartData{}}
+	ok, err := sm.collectMdraidHealth(devs[0])
+	if err != nil || !ok {
+		t.Fatalf("collectMdraidHealth() = (ok=%v, err=%v), want (true,nil)", ok, err)
+	}
+	if len(sm.SmartDataMap) != 1 {
+		t.Fatalf("SmartDataMap len=%d, want 1", len(sm.SmartDataMap))
+	}
+	var got *smart.SmartData
+	for _, v := range sm.SmartDataMap {
+		got = v
+		break
+	}
+	if got == nil {
+		t.Fatalf("SmartDataMap value nil")
+	}
+	if got.DiskType != "mdraid" || got.DiskName != "/dev/md0" {
+		t.Fatalf("disk fields = (type=%q name=%q), want (mdraid,/dev/md0)", got.DiskType, got.DiskName)
+	}
+	if got.SmartStatus != "WARNING" {
+		t.Fatalf("SmartStatus=%q, want WARNING", got.SmartStatus)
+	}
+	if got.ModelName == "" || got.Capacity == 0 {
+		t.Fatalf("identity fields = (model=%q cap=%d), want non-empty model and cap>0", got.ModelName, got.Capacity)
+	}
+	if len(got.Attributes) < 5 {
+		t.Fatalf("attributes len=%d, want >= 5", len(got.Attributes))
+	}
+}
+
+func TestMdraidSmartStatus(t *testing.T) {
+	if got := mdraidSmartStatus(mdraidHealth{arrayState: "inactive"}); got != "FAILED" {
+		t.Fatalf("mdraidSmartStatus(inactive) = %q, want FAILED", got)
+	}
+	if got := mdraidSmartStatus(mdraidHealth{arrayState: "active", degraded: 1, syncAction: "recover"}); got != "WARNING" {
+		t.Fatalf("mdraidSmartStatus(degraded+recover) = %q, want WARNING", got)
+	}
+	if got := mdraidSmartStatus(mdraidHealth{arrayState: "active", degraded: 1}); got != "FAILED" {
+		t.Fatalf("mdraidSmartStatus(degraded) = %q, want FAILED", got)
+	}
+	if got := mdraidSmartStatus(mdraidHealth{arrayState: "active", syncAction: "recover"}); got != "WARNING" {
+		t.Fatalf("mdraidSmartStatus(recover) = %q, want WARNING", got)
+	}
+	if got := mdraidSmartStatus(mdraidHealth{arrayState: "clean"}); got != "PASSED" {
+		t.Fatalf("mdraidSmartStatus(clean) = %q, want PASSED", got)
+	}
+	if got := mdraidSmartStatus(mdraidHealth{arrayState: "unknown"}); got != "UNKNOWN" {
+		t.Fatalf("mdraidSmartStatus(unknown) = %q, want UNKNOWN", got)
+	}
+}
--- a/agent/mdraid_stub.go
+++ b/agent/mdraid_stub.go
@@ -0,0 +1,11 @@
+//go:build !linux
+
+package agent
+
+func scanMdraidDevices() []*DeviceInfo {
+	return nil
+}
+
+func (sm *SmartManager) collectMdraidHealth(deviceInfo *DeviceInfo) (bool, error) {
+	return false, nil
+}
--- a/agent/network.go
+++ b/agent/network.go
@@ -8,6 +8,7 @@ import (
 	"time"

 	"github.com/henrygd/beszel/agent/deltatracker"
+	"github.com/henrygd/beszel/agent/utils"
 	"github.com/henrygd/beszel/internal/entities/system"
 	psutilNet "github.com/shirou/gopsutil/v4/net"
 )
@@ -94,7 +95,7 @@ func (a *Agent) initializeNetIoStats() {
 	a.netInterfaces = make(map[string]struct{}, 0)

 	// parse NICS env var for whitelist / blacklist
-	nicsEnvVal, nicsEnvExists := GetEnv("NICS")
+	nicsEnvVal, nicsEnvExists := utils.GetEnv("NICS")
 	var nicCfg *NicConfig
 	if nicsEnvExists {
 		nicCfg = newNicConfig(nicsEnvVal)
@@ -103,10 +104,7 @@ func (a *Agent) initializeNetIoStats() {
 	// get current network I/O stats and record valid interfaces
 	if netIO, err := psutilNet.IOCounters(true); err == nil {
 		for _, v := range netIO {
-			if nicsEnvExists && !isValidNic(v.Name, nicCfg) {
-				continue
-			}
-			if a.skipNetworkInterface(v) {
+			if skipNetworkInterface(v, nicCfg) {
 				continue
 			}
 			slog.Info("Detected network interface", "name", v.Name, "sent", v.BytesSent, "recv", v.BytesRecv)
@@ -215,10 +213,8 @@ func (a *Agent) applyNetworkTotals(
 	totalBytesSent, totalBytesRecv uint64,
 	bytesSentPerSecond, bytesRecvPerSecond uint64,
 ) {
-	networkSentPs := bytesToMegabytes(float64(bytesSentPerSecond))
-	networkRecvPs := bytesToMegabytes(float64(bytesRecvPerSecond))
-	if networkSentPs > 10_000 || networkRecvPs > 10_000 {
-		slog.Warn("Invalid net stats. Resetting.", "sent", networkSentPs, "recv", networkRecvPs)
+	if bytesSentPerSecond > 10_000_000_000 || bytesRecvPerSecond > 10_000_000_000 {
+		slog.Warn("Invalid net stats. Resetting.", "sent", bytesSentPerSecond, "recv", bytesRecvPerSecond)
 		for _, v := range netIO {
 			if _, exists := a.netInterfaces[v.Name]; !exists {
 				continue
@@ -228,21 +224,29 @@ func (a *Agent) applyNetworkTotals(
 		a.initializeNetIoStats()
 		delete(a.netIoStats, cacheTimeMs)
 		delete(a.netInterfaceDeltaTrackers, cacheTimeMs)
-		systemStats.NetworkSent = 0
-		systemStats.NetworkRecv = 0
 		systemStats.Bandwidth[0], systemStats.Bandwidth[1] = 0, 0
 		return
 	}

-	systemStats.NetworkSent = networkSentPs
-	systemStats.NetworkRecv = networkRecvPs
 	systemStats.Bandwidth[0], systemStats.Bandwidth[1] = bytesSentPerSecond, bytesRecvPerSecond
 	nis.BytesSent = totalBytesSent
 	nis.BytesRecv = totalBytesRecv
 	a.netIoStats[cacheTimeMs] = nis
 }

-func (a *Agent) skipNetworkInterface(v psutilNet.IOCountersStat) bool {
+// skipNetworkInterface returns true if the network interface should be ignored.
+func skipNetworkInterface(v psutilNet.IOCountersStat, nicCfg *NicConfig) bool {
+	if nicCfg != nil {
+		if !isValidNic(v.Name, nicCfg) {
+			return true
+		}
+		// In whitelist mode, we honor explicit inclusion without auto-filtering.
+		if !nicCfg.isBlacklist {
+			return false
+		}
+		// In blacklist mode, still apply the auto-filter below.
+	}
+
 	switch {
 	case strings.HasPrefix(v.Name, "lo"),
 		strings.HasPrefix(v.Name, "docker"),
--- a/agent/network_test.go
+++ b/agent/network_test.go
@@ -261,6 +261,39 @@ func TestNewNicConfig(t *testing.T) {
 		})
 	}
 }
+func TestSkipNetworkInterface(t *testing.T) {
+	tests := []struct {
+		name       string
+		nic        psutilNet.IOCountersStat
+		nicCfg     *NicConfig
+		expectSkip bool
+	}{
+		{"loopback lo", psutilNet.IOCountersStat{Name: "lo", BytesSent: 100, BytesRecv: 100}, nil, true},
+		{"loopback lo0", psutilNet.IOCountersStat{Name: "lo0", BytesSent: 100, BytesRecv: 100}, nil, true},
+		{"docker prefix", psutilNet.IOCountersStat{Name: "docker0", BytesSent: 100, BytesRecv: 100}, nil, true},
+		{"br- prefix", psutilNet.IOCountersStat{Name: "br-lan", BytesSent: 100, BytesRecv: 100}, nil, true},
+		{"veth prefix", psutilNet.IOCountersStat{Name: "veth0abc", BytesSent: 100, BytesRecv: 100}, nil, true},
+		{"bond prefix", psutilNet.IOCountersStat{Name: "bond0", BytesSent: 100, BytesRecv: 100}, nil, true},
+		{"cali prefix", psutilNet.IOCountersStat{Name: "cali1234", BytesSent: 100, BytesRecv: 100}, nil, true},
+		{"zero BytesRecv", psutilNet.IOCountersStat{Name: "eth0", BytesSent: 100, BytesRecv: 0}, nil, true},
+		{"zero BytesSent", psutilNet.IOCountersStat{Name: "eth0", BytesSent: 0, BytesRecv: 100}, nil, true},
+		{"both zero", psutilNet.IOCountersStat{Name: "eth0", BytesSent: 0, BytesRecv: 0}, nil, true},
+		{"normal eth0", psutilNet.IOCountersStat{Name: "eth0", BytesSent: 100, BytesRecv: 200}, nil, false},
+		{"normal wlan0", psutilNet.IOCountersStat{Name: "wlan0", BytesSent: 1, BytesRecv: 1}, nil, false},
+		{"whitelist overrides skip (docker)", psutilNet.IOCountersStat{Name: "docker0", BytesSent: 100, BytesRecv: 100}, newNicConfig("docker0"), false},
+		{"whitelist overrides skip (lo)", psutilNet.IOCountersStat{Name: "lo", BytesSent: 100, BytesRecv: 100}, newNicConfig("lo"), false},
+		{"whitelist exclusion", psutilNet.IOCountersStat{Name: "eth1", BytesSent: 100, BytesRecv: 100}, newNicConfig("eth0"), true},
+		{"blacklist skip lo", psutilNet.IOCountersStat{Name: "lo", BytesSent: 100, BytesRecv: 100}, newNicConfig("-eth0"), true},
+		{"blacklist explicit eth0", psutilNet.IOCountersStat{Name: "eth0", BytesSent: 100, BytesRecv: 100}, newNicConfig("-eth0"), true},
+		{"blacklist allow eth1", psutilNet.IOCountersStat{Name: "eth1", BytesSent: 100, BytesRecv: 100}, newNicConfig("-eth0"), false},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			assert.Equal(t, tt.expectSkip, skipNetworkInterface(tt.nic, tt.nicCfg))
+		})
+	}
+}
+
 func TestEnsureNetworkInterfacesMap(t *testing.T) {
 	var a Agent
 	var stats system.Stats
@@ -383,8 +416,6 @@ func TestApplyNetworkTotals(t *testing.T) {
 		totalBytesSent        uint64
 		totalBytesRecv        uint64
 		expectReset           bool
-		expectedNetworkSent   float64
-		expectedNetworkRecv   float64
 		expectedBandwidthSent uint64
 		expectedBandwidthRecv uint64
 	}{
@@ -395,8 +426,6 @@ func TestApplyNetworkTotals(t *testing.T) {
 			totalBytesSent:        10000000,
 			totalBytesRecv:        20000000,
 			expectReset:           false,
-			expectedNetworkSent:   0.95, // ~1 MB/s rounded to 2 decimals
-			expectedNetworkRecv:   1.91, // ~2 MB/s rounded to 2 decimals
 			expectedBandwidthSent: 1000000,
 			expectedBandwidthRecv: 2000000,
 		},
@@ -424,18 +453,6 @@ func TestApplyNetworkTotals(t *testing.T) {
 			totalBytesRecv:     20000000,
 			expectReset:        true,
 		},
-		{
-			name:                  "Valid network stats - at threshold boundary",
-			bytesSentPerSecond:    10485750000, // ~9999.99 MB/s (rounds to 9999.99)
-			bytesRecvPerSecond:    10485750000, // ~9999.99 MB/s (rounds to 9999.99)
-			totalBytesSent:        10000000,
-			totalBytesRecv:        20000000,
-			expectReset:           false,
-			expectedNetworkSent:   9999.99,
-			expectedNetworkRecv:   9999.99,
-			expectedBandwidthSent: 10485750000,
-			expectedBandwidthRecv: 10485750000,
-		},
 		{
 			name:                  "Zero values",
 			bytesSentPerSecond:    0,
@@ -443,8 +460,6 @@ func TestApplyNetworkTotals(t *testing.T) {
 			totalBytesSent:        0,
 			totalBytesRecv:        0,
 			expectReset:           false,
-			expectedNetworkSent:   0.0,
-			expectedNetworkRecv:   0.0,
 			expectedBandwidthSent: 0,
 			expectedBandwidthRecv: 0,
 		},
@@ -481,14 +496,10 @@ func TestApplyNetworkTotals(t *testing.T) {
 				// Should have reset network tracking state - maps cleared and stats zeroed
 				assert.NotContains(t, a.netIoStats, cacheTimeMs, "cache entry should be cleared after reset")
 				assert.NotContains(t, a.netInterfaceDeltaTrackers, cacheTimeMs, "tracker should be cleared on reset")
-				assert.Zero(t, systemStats.NetworkSent)
-				assert.Zero(t, systemStats.NetworkRecv)
 				assert.Zero(t, systemStats.Bandwidth[0])
 				assert.Zero(t, systemStats.Bandwidth[1])
 			} else {
 				// Should have applied stats
-				assert.Equal(t, tt.expectedNetworkSent, systemStats.NetworkSent)
-				assert.Equal(t, tt.expectedNetworkRecv, systemStats.NetworkRecv)
 				assert.Equal(t, tt.expectedBandwidthSent, systemStats.Bandwidth[0])
 				assert.Equal(t, tt.expectedBandwidthRecv, systemStats.Bandwidth[1])

--- a/agent/sensors.go
+++ b/agent/sensors.go
@@ -10,6 +10,7 @@ import (
 	"strings"
 	"unicode/utf8"

+	"github.com/henrygd/beszel/agent/utils"
 	"github.com/henrygd/beszel/internal/entities/system"

 	"github.com/shirou/gopsutil/v4/common"
@@ -26,9 +27,9 @@ type SensorConfig struct {
 }

 func (a *Agent) newSensorConfig() *SensorConfig {
-	primarySensor, _ := GetEnv("PRIMARY_SENSOR")
-	sysSensors, _ := GetEnv("SYS_SENSORS")
-	sensorsEnvVal, sensorsSet := GetEnv("SENSORS")
+	primarySensor, _ := utils.GetEnv("PRIMARY_SENSOR")
+	sysSensors, _ := utils.GetEnv("SYS_SENSORS")
+	sensorsEnvVal, sensorsSet := utils.GetEnv("SENSORS")
 	skipCollection := sensorsSet && sensorsEnvVal == ""

 	return a.newSensorConfigWithEnv(primarySensor, sysSensors, sensorsEnvVal, skipCollection)
@@ -135,7 +136,7 @@ func (a *Agent) updateTemperatures(systemStats *system.Stats) {
 		case sensorName:
 			a.systemInfo.DashboardTemp = sensor.Temperature
 		}
-		systemStats.Temperatures[sensorName] = twoDecimals(sensor.Temperature)
+		systemStats.Temperatures[sensorName] = utils.TwoDecimals(sensor.Temperature)
 	}
 }

--- a/agent/sensors_test.go
+++ b/agent/sensors_test.go
@@ -5,7 +5,6 @@ package agent
 import (
 	"context"
 	"fmt"
-	"os"
 	"testing"

 	"github.com/henrygd/beszel/internal/entities/system"
@@ -329,34 +328,10 @@ func TestNewSensorConfigWithEnv(t *testing.T) {
 }

 func TestNewSensorConfig(t *testing.T) {
-	// Save original environment variables
-	originalPrimary, hasPrimary := os.LookupEnv("BESZEL_AGENT_PRIMARY_SENSOR")
-	originalSys, hasSys := os.LookupEnv("BESZEL_AGENT_SYS_SENSORS")
-	originalSensors, hasSensors := os.LookupEnv("BESZEL_AGENT_SENSORS")
-
-	// Restore environment variables after the test
-	defer func() {
-		// Clean up test environment variables
-		os.Unsetenv("BESZEL_AGENT_PRIMARY_SENSOR")
-		os.Unsetenv("BESZEL_AGENT_SYS_SENSORS")
-		os.Unsetenv("BESZEL_AGENT_SENSORS")
-
-		// Restore original values if they existed
-		if hasPrimary {
-			os.Setenv("BESZEL_AGENT_PRIMARY_SENSOR", originalPrimary)
-		}
-		if hasSys {
-			os.Setenv("BESZEL_AGENT_SYS_SENSORS", originalSys)
-		}
-		if hasSensors {
-			os.Setenv("BESZEL_AGENT_SENSORS", originalSensors)
-		}
-	}()
-
 	// Set test environment variables
-	os.Setenv("BESZEL_AGENT_PRIMARY_SENSOR", "test_primary")
-	os.Setenv("BESZEL_AGENT_SYS_SENSORS", "/test/path")
-	os.Setenv("BESZEL_AGENT_SENSORS", "test_sensor1,test_*,test_sensor3")
+	t.Setenv("BESZEL_AGENT_PRIMARY_SENSOR", "test_primary")
+	t.Setenv("BESZEL_AGENT_SYS_SENSORS", "/test/path")
+	t.Setenv("BESZEL_AGENT_SENSORS", "test_sensor1,test_*,test_sensor3")

 	agent := &Agent{}
 	result := agent.newSensorConfig()
--- a/agent/server.go
+++ b/agent/server.go
@@ -12,6 +12,7 @@ import (
 	"time"

 	"github.com/henrygd/beszel"
+	"github.com/henrygd/beszel/agent/utils"
 	"github.com/henrygd/beszel/internal/common"
 	"github.com/henrygd/beszel/internal/entities/system"

@@ -36,7 +37,7 @@ var hubVersions map[string]semver.Version
 // and begins listening for connections. Returns an error if the server
 // is already running or if there's an issue starting the server.
 func (a *Agent) StartServer(opts ServerOptions) error {
-	if disableSSH, _ := GetEnv("DISABLE_SSH"); disableSSH == "true" {
+	if disableSSH, _ := utils.GetEnv("DISABLE_SSH"); disableSSH == "true" {
 		return errors.New("SSH disabled")
 	}
 	if a.server != nil {
@@ -238,11 +239,11 @@ func ParseKeys(input string) ([]gossh.PublicKey, error) {
 // and finally defaults to ":45876".
 func GetAddress(addr string) string {
 	if addr == "" {
-		addr, _ = GetEnv("LISTEN")
+		addr, _ = utils.GetEnv("LISTEN")
 	}
 	if addr == "" {
 		// Legacy PORT environment variable support
-		addr, _ = GetEnv("PORT")
+		addr, _ = utils.GetEnv("PORT")
 	}
 	if addr == "" {
 		return ":45876"
@@ -258,7 +259,7 @@ func GetAddress(addr string) string {
 // It checks the NETWORK environment variable first, then infers from
 // the address format: addresses starting with "/" are "unix", others are "tcp".
 func GetNetwork(addr string) string {
-	if network, ok := GetEnv("NETWORK"); ok && network != "" {
+	if network, ok := utils.GetEnv("NETWORK"); ok && network != "" {
 		return network
 	}
 	if strings.HasPrefix(addr, "/") {
--- a/agent/server_test.go
+++ b/agent/server_test.go
@@ -183,8 +183,7 @@ func TestStartServer(t *testing.T) {
 }

 func TestStartServerDisableSSH(t *testing.T) {
-	os.Setenv("BESZEL_AGENT_DISABLE_SSH", "true")
-	defer os.Unsetenv("BESZEL_AGENT_DISABLE_SSH")
+	t.Setenv("BESZEL_AGENT_DISABLE_SSH", "true")

 	agent, err := NewAgent("")
 	require.NoError(t, err)
--- a/agent/smart.go
+++ b/agent/smart.go
@@ -18,6 +18,7 @@ import (
 	"sync"
 	"time"

+	"github.com/henrygd/beszel/agent/utils"
 	"github.com/henrygd/beszel/internal/entities/smart"
 )

@@ -156,7 +157,7 @@ func (sm *SmartManager) ScanDevices(force bool) error {
 	currentDevices := sm.devicesSnapshot()

 	var configuredDevices []*DeviceInfo
-	if configuredRaw, ok := GetEnv("SMART_DEVICES"); ok {
+	if configuredRaw, ok := utils.GetEnv("SMART_DEVICES"); ok {
 		slog.Info("SMART_DEVICES", "value", configuredRaw)
 		config := strings.TrimSpace(configuredRaw)
 		if config == "" {
@@ -199,6 +200,13 @@ func (sm *SmartManager) ScanDevices(force bool) error {
 		hasValidScan = true
 	}

+	// Add Linux mdraid arrays by reading sysfs health fields. This does not
+	// require smartctl and does not scan the whole device.
+	if raidDevices := scanMdraidDevices(); len(raidDevices) > 0 {
+		scannedDevices = append(scannedDevices, raidDevices...)
+		hasValidScan = true
+	}
+
 	finalDevices := mergeDeviceLists(currentDevices, scannedDevices, configuredDevices)
 	finalDevices = sm.filterExcludedDevices(finalDevices)
 	sm.updateSmartDevices(finalDevices)
@@ -215,7 +223,7 @@ func (sm *SmartManager) ScanDevices(force bool) error {
 }

 func (sm *SmartManager) parseConfiguredDevices(config string) ([]*DeviceInfo, error) {
-	splitChar := os.Getenv("SMART_DEVICES_SEPARATOR")
+	splitChar, _ := utils.GetEnv("SMART_DEVICES_SEPARATOR")
 	if splitChar == "" {
 		splitChar = ","
 	}
@@ -253,7 +261,7 @@ func (sm *SmartManager) parseConfiguredDevices(config string) ([]*DeviceInfo, er
 }

 func (sm *SmartManager) refreshExcludedDevices() {
-	rawValue, _ := GetEnv("EXCLUDE_SMART")
+	rawValue, _ := utils.GetEnv("EXCLUDE_SMART")
 	sm.excludedDevices = make(map[string]struct{})

 	for entry := range strings.SplitSeq(rawValue, ",") {
@@ -450,6 +458,12 @@ func (sm *SmartManager) CollectSmart(deviceInfo *DeviceInfo) error {
 		return errNoValidSmartData
 	}

+	// mdraid health is not exposed via SMART; Linux exposes array state in sysfs.
+	if deviceInfo != nil {
+		if ok, err := sm.collectMdraidHealth(deviceInfo); ok {
+			return err
+		}
+	}
 	// eMMC health is not exposed via SMART on Linux, but the kernel provides
 	// wear / EOL indicators via sysfs. Prefer that path when available.
 	if deviceInfo != nil {
@@ -857,15 +871,18 @@ func (sm *SmartManager) parseSmartForSata(output []byte) (bool, int) {
 	smartData.FirmwareVersion = data.FirmwareVersion
 	smartData.Capacity = data.UserCapacity.Bytes
 	smartData.Temperature = data.Temperature.Current
-	if smartData.Temperature == 0 {
-		if temp, ok := temperatureFromAtaDeviceStatistics(data.AtaDeviceStatistics); ok {
-			smartData.Temperature = temp
-		}
-	}
 	smartData.SmartStatus = getSmartStatus(smartData.Temperature, data.SmartStatus.Passed)
 	smartData.DiskName = data.Device.Name
 	smartData.DiskType = data.Device.Type

+	// get values from ata_device_statistics if necessary
+	var ataDeviceStats smart.AtaDeviceStatistics
+	if smartData.Temperature == 0 {
+		if temp := findAtaDeviceStatisticsValue(&data, &ataDeviceStats, 5, "Current Temperature", 0, 255); temp != nil {
+			smartData.Temperature = uint8(*temp)
+		}
+	}
+
 	// update SmartAttributes
 	smartData.Attributes = make([]*smart.SmartAttribute, 0, len(data.AtaSmartAttributes.Table))
 	for _, attr := range data.AtaSmartAttributes.Table {
@@ -900,23 +917,20 @@ func getSmartStatus(temperature uint8, passed bool) string {
 	}
 }

-func temperatureFromAtaDeviceStatistics(stats smart.AtaDeviceStatistics) (uint8, bool) {
-	entry := findAtaDeviceStatisticsEntry(stats, 5, "Current Temperature")
-	if entry == nil || entry.Value == nil {
-		return 0, false
-	}
-	if *entry.Value > 255 {
-		return 0, false
-	}
-	return uint8(*entry.Value), true
-}
-
 // findAtaDeviceStatisticsEntry centralizes ATA devstat lookups so additional
 // metrics can be pulled from the same structure in the future.
-func findAtaDeviceStatisticsEntry(stats smart.AtaDeviceStatistics, pageNumber uint8, entryName string) *smart.AtaDeviceStatisticsEntry {
-	for pageIdx := range stats.Pages {
-		page := &stats.Pages[pageIdx]
-		if page.Number != pageNumber {
+func findAtaDeviceStatisticsValue(data *smart.SmartInfoForSata, ataDeviceStats *smart.AtaDeviceStatistics, entryNumber uint8, entryName string, minValue, maxValue int64) *int64 {
+	if len(ataDeviceStats.Pages) == 0 {
+		if len(data.AtaDeviceStatistics) == 0 {
+			return nil
+		}
+		if err := json.Unmarshal(data.AtaDeviceStatistics, ataDeviceStats); err != nil {
+			return nil
+		}
+	}
+	for pageIdx := range ataDeviceStats.Pages {
+		page := &ataDeviceStats.Pages[pageIdx]
+		if page.Number != entryNumber {
 			continue
 		}
 		for entryIdx := range page.Table {
@@ -924,7 +938,10 @@ func findAtaDeviceStatisticsEntry(stats smart.AtaDeviceStatistics, pageNumber ui
 			if !strings.EqualFold(entry.Name, entryName) {
 				continue
 			}
-			return entry
+			if entry.Value == nil || *entry.Value < minValue || *entry.Value > maxValue {
+				return nil
+			}
+			return entry.Value
 		}
 	}
 	return nil
@@ -1146,9 +1163,11 @@ func NewSmartManager() (*SmartManager, error) {
 	slog.Debug("smartctl", "path", path, "err", err)
 	if err != nil {
 		// Keep the previous fail-fast behavior unless this Linux host exposes
-		// eMMC health via sysfs, in which case smartctl is optional.
-		if runtime.GOOS == "linux" && len(scanEmmcDevices()) > 0 {
-			return sm, nil
+		// eMMC or mdraid health via sysfs, in which case smartctl is optional.
+		if runtime.GOOS == "linux" {
+			if len(scanEmmcDevices()) > 0 || len(scanMdraidDevices()) > 0 {
+				return sm, nil
+			}
 		}
 		return nil, err
 	}
--- a/agent/smart_test.go
+++ b/agent/smart_test.go
@@ -121,6 +121,78 @@ func TestParseSmartForSataDeviceStatisticsTemperature(t *testing.T) {
 	assert.Equal(t, uint8(22), deviceData.Temperature)
 }

+func TestParseSmartForSataAtaDeviceStatistics(t *testing.T) {
+	// tests that ata_device_statistics values are parsed correctly
+	jsonPayload := []byte(`{
+		"smartctl": {"exit_status": 0},
+		"device": {"name": "/dev/sdb", "type": "sat"},
+		"model_name": "SanDisk SSD U110 16GB",
+		"serial_number": "lksjfh23lhj",
+		"firmware_version": "U21B001",
+		"user_capacity": {"bytes": 16013942784},
+		"smart_status": {"passed": true},
+		"ata_smart_attributes": {"table": []},
+		"ata_device_statistics": {
+			"pages": [
+				{
+					"number": 5,
+					"name": "Temperature Statistics",
+					"table": [
+						{"name": "Current Temperature", "value": 43, "flags": {"valid": true}},
+						{"name": "Specified Minimum Operating Temperature", "value": -20, "flags": {"valid": true}}
+					]
+				}
+			]
+		}
+	}`)
+
+	sm := &SmartManager{SmartDataMap: make(map[string]*smart.SmartData)}
+	hasData, exitStatus := sm.parseSmartForSata(jsonPayload)
+	require.True(t, hasData)
+	assert.Equal(t, 0, exitStatus)
+
+	deviceData, ok := sm.SmartDataMap["lksjfh23lhj"]
+	require.True(t, ok, "expected smart data entry for serial lksjfh23lhj")
+	assert.Equal(t, uint8(43), deviceData.Temperature)
+}
+
+func TestParseSmartForSataNegativeDeviceStatistics(t *testing.T) {
+	// Tests that negative values in ata_device_statistics (e.g. min operating temp)
+	// do not cause the entire SAT parser to fail.
+	jsonPayload := []byte(`{
+		"smartctl": {"exit_status": 0},
+		"device": {"name": "/dev/sdb", "type": "sat"},
+		"model_name": "SanDisk SSD U110 16GB",
+		"serial_number": "NEGATIVE123",
+		"firmware_version": "U21B001",
+		"user_capacity": {"bytes": 16013942784},
+		"smart_status": {"passed": true},
+		"temperature": {"current": 38},
+		"ata_smart_attributes": {"table": []},
+		"ata_device_statistics": {
+			"pages": [
+				{
+					"number": 5,
+					"name": "Temperature Statistics",
+					"table": [
+						{"name": "Current Temperature", "value": 38, "flags": {"valid": true}},
+						{"name": "Specified Minimum Operating Temperature", "value": -20, "flags": {"valid": true}}
+					]
+				}
+			]
+		}
+	}`)
+
+	sm := &SmartManager{SmartDataMap: make(map[string]*smart.SmartData)}
+	hasData, exitStatus := sm.parseSmartForSata(jsonPayload)
+	require.True(t, hasData)
+	assert.Equal(t, 0, exitStatus)
+
+	deviceData, ok := sm.SmartDataMap["NEGATIVE123"]
+	require.True(t, ok, "expected smart data entry for serial NEGATIVE123")
+	assert.Equal(t, uint8(38), deviceData.Temperature)
+}
+
 func TestParseSmartForSataParentheticalRawValue(t *testing.T) {
 	jsonPayload := []byte(`{
 		"smartctl": {"exit_status": 0},
@@ -727,6 +799,182 @@ func TestIsVirtualDeviceScsi(t *testing.T) {
 	}
 }

+func TestFindAtaDeviceStatisticsValue(t *testing.T) {
+	val42 := int64(42)
+	val100 := int64(100)
+	valMinus20 := int64(-20)
+
+	tests := []struct {
+		name           string
+		data           smart.SmartInfoForSata
+		ataDeviceStats smart.AtaDeviceStatistics
+		entryNumber    uint8
+		entryName      string
+		minValue       int64
+		maxValue       int64
+		expectedValue  *int64
+	}{
+		{
+			name: "value in ataDeviceStats",
+			ataDeviceStats: smart.AtaDeviceStatistics{
+				Pages: []smart.AtaDeviceStatisticsPage{
+					{
+						Number: 5,
+						Table: []smart.AtaDeviceStatisticsEntry{
+							{Name: "Current Temperature", Value: &val42},
+						},
+					},
+				},
+			},
+			entryNumber:   5,
+			entryName:     "Current Temperature",
+			minValue:      0,
+			maxValue:      100,
+			expectedValue: &val42,
+		},
+		{
+			name: "value unmarshaled from data",
+			data: smart.SmartInfoForSata{
+				AtaDeviceStatistics: []byte(`{"pages":[{"number":5,"table":[{"name":"Current Temperature","value":100}]}]}`),
+			},
+			entryNumber:   5,
+			entryName:     "Current Temperature",
+			minValue:      0,
+			maxValue:      255,
+			expectedValue: &val100,
+		},
+		{
+			name: "value out of range (too high)",
+			ataDeviceStats: smart.AtaDeviceStatistics{
+				Pages: []smart.AtaDeviceStatisticsPage{
+					{
+						Number: 5,
+						Table: []smart.AtaDeviceStatisticsEntry{
+							{Name: "Current Temperature", Value: &val100},
+						},
+					},
+				},
+			},
+			entryNumber:   5,
+			entryName:     "Current Temperature",
+			minValue:      0,
+			maxValue:      50,
+			expectedValue: nil,
+		},
+		{
+			name: "value out of range (too low)",
+			ataDeviceStats: smart.AtaDeviceStatistics{
+				Pages: []smart.AtaDeviceStatisticsPage{
+					{
+						Number: 5,
+						Table: []smart.AtaDeviceStatisticsEntry{
+							{Name: "Min Temp", Value: &valMinus20},
+						},
+					},
+				},
+			},
+			entryNumber:   5,
+			entryName:     "Min Temp",
+			minValue:      0,
+			maxValue:      100,
+			expectedValue: nil,
+		},
+		{
+			name:          "no statistics available",
+			data:          smart.SmartInfoForSata{},
+			entryNumber:   5,
+			entryName:     "Current Temperature",
+			minValue:      0,
+			maxValue:      255,
+			expectedValue: nil,
+		},
+		{
+			name: "wrong page number",
+			ataDeviceStats: smart.AtaDeviceStatistics{
+				Pages: []smart.AtaDeviceStatisticsPage{
+					{
+						Number: 1,
+						Table: []smart.AtaDeviceStatisticsEntry{
+							{Name: "Current Temperature", Value: &val42},
+						},
+					},
+				},
+			},
+			entryNumber:   5,
+			entryName:     "Current Temperature",
+			minValue:      0,
+			maxValue:      100,
+			expectedValue: nil,
+		},
+		{
+			name: "wrong entry name",
+			ataDeviceStats: smart.AtaDeviceStatistics{
+				Pages: []smart.AtaDeviceStatisticsPage{
+					{
+						Number: 5,
+						Table: []smart.AtaDeviceStatisticsEntry{
+							{Name: "Other Stat", Value: &val42},
+						},
+					},
+				},
+			},
+			entryNumber:   5,
+			entryName:     "Current Temperature",
+			minValue:      0,
+			maxValue:      100,
+			expectedValue: nil,
+		},
+		{
+			name: "case insensitive name match",
+			ataDeviceStats: smart.AtaDeviceStatistics{
+				Pages: []smart.AtaDeviceStatisticsPage{
+					{
+						Number: 5,
+						Table: []smart.AtaDeviceStatisticsEntry{
+							{Name: "CURRENT TEMPERATURE", Value: &val42},
+						},
+					},
+				},
+			},
+			entryNumber:   5,
+			entryName:     "Current Temperature",
+			minValue:      0,
+			maxValue:      100,
+			expectedValue: &val42,
+		},
+		{
+			name: "entry value is nil",
+			ataDeviceStats: smart.AtaDeviceStatistics{
+				Pages: []smart.AtaDeviceStatisticsPage{
+					{
+						Number: 5,
+						Table: []smart.AtaDeviceStatisticsEntry{
+							{Name: "Current Temperature", Value: nil},
+						},
+					},
+				},
+			},
+			entryNumber:   5,
+			entryName:     "Current Temperature",
+			minValue:      0,
+			maxValue:      100,
+			expectedValue: nil,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			result := findAtaDeviceStatisticsValue(&tt.data, &tt.ataDeviceStats, tt.entryNumber, tt.entryName, tt.minValue, tt.maxValue)
+			if tt.expectedValue == nil {
+				assert.Nil(t, result)
+			} else {
+				require.NotNil(t, result)
+				assert.Equal(t, *tt.expectedValue, *result)
+			}
+		})
+	}
+}
+
 func TestRefreshExcludedDevices(t *testing.T) {
 	tests := []struct {
 		name         string
@@ -787,7 +1035,7 @@ func TestRefreshExcludedDevices(t *testing.T) {
 				t.Setenv("EXCLUDE_SMART", tt.envValue)
 			} else {
 				// Ensure env var is not set for empty test
-				os.Unsetenv("EXCLUDE_SMART")
+				t.Setenv("EXCLUDE_SMART", "")
 			}

 			sm := &SmartManager{}
--- a/agent/system.go
+++ b/agent/system.go
@@ -12,6 +12,7 @@ import (

 	"github.com/henrygd/beszel"
 	"github.com/henrygd/beszel/agent/battery"
+	"github.com/henrygd/beszel/agent/utils"
 	"github.com/henrygd/beszel/agent/zfs"
 	"github.com/henrygd/beszel/internal/entities/container"
 	"github.com/henrygd/beszel/internal/entities/system"
@@ -127,13 +128,13 @@ func (a *Agent) getSystemStats(cacheTimeMs uint16) system.Stats {
 	// cpu metrics
 	cpuMetrics, err := getCpuMetrics(cacheTimeMs)
 	if err == nil {
-		systemStats.Cpu = twoDecimals(cpuMetrics.Total)
+		systemStats.Cpu = utils.TwoDecimals(cpuMetrics.Total)
 		systemStats.CpuBreakdown = []float64{
-			twoDecimals(cpuMetrics.User),
-			twoDecimals(cpuMetrics.System),
-			twoDecimals(cpuMetrics.Iowait),
-			twoDecimals(cpuMetrics.Steal),
-			twoDecimals(cpuMetrics.Idle),
+			utils.TwoDecimals(cpuMetrics.User),
+			utils.TwoDecimals(cpuMetrics.System),
+			utils.TwoDecimals(cpuMetrics.Iowait),
+			utils.TwoDecimals(cpuMetrics.Steal),
+			utils.TwoDecimals(cpuMetrics.Idle),
 		}
 	} else {
 		slog.Error("Error getting cpu metrics", "err", err)
@@ -157,8 +158,8 @@ func (a *Agent) getSystemStats(cacheTimeMs uint16) system.Stats {
 	// memory
 	if v, err := mem.VirtualMemory(); err == nil {
 		// swap
-		systemStats.Swap = bytesToGigabytes(v.SwapTotal)
-		systemStats.SwapUsed = bytesToGigabytes(v.SwapTotal - v.SwapFree - v.SwapCached)
+		systemStats.Swap = utils.BytesToGigabytes(v.SwapTotal)
+		systemStats.SwapUsed = utils.BytesToGigabytes(v.SwapTotal - v.SwapFree - v.SwapCached)
 		// cache + buffers value for default mem calculation
 		// note: gopsutil automatically adds SReclaimable to v.Cached
 		cacheBuff := v.Cached + v.Buffers - v.Shared
@@ -181,13 +182,13 @@ func (a *Agent) getSystemStats(cacheTimeMs uint16) system.Stats {
 			if arcSize, _ := zfs.ARCSize(); arcSize > 0 && arcSize < v.Used {
 				v.Used = v.Used - arcSize
 				v.UsedPercent = float64(v.Used) / float64(v.Total) * 100.0
-				systemStats.MemZfsArc = bytesToGigabytes(arcSize)
+				systemStats.MemZfsArc = utils.BytesToGigabytes(arcSize)
 			}
 		}
-		systemStats.Mem = bytesToGigabytes(v.Total)
-		systemStats.MemBuffCache = bytesToGigabytes(cacheBuff)
-		systemStats.MemUsed = bytesToGigabytes(v.Used)
-		systemStats.MemPct = twoDecimals(v.UsedPercent)
+		systemStats.Mem = utils.BytesToGigabytes(v.Total)
+		systemStats.MemBuffCache = utils.BytesToGigabytes(cacheBuff)
+		systemStats.MemUsed = utils.BytesToGigabytes(v.Used)
+		systemStats.MemPct = utils.TwoDecimals(v.UsedPercent)
 	}

 	// disk usage
--- a/agent/systemd.go
+++ b/agent/systemd.go
@@ -15,6 +15,7 @@ import (
 	"time"

 	"github.com/coreos/go-systemd/v22/dbus"
+	"github.com/henrygd/beszel/agent/utils"
 	"github.com/henrygd/beszel/internal/entities/systemd"
 )

@@ -49,7 +50,7 @@ func isSystemdAvailable() bool {

 // newSystemdManager creates a new systemdManager.
 func newSystemdManager() (*systemdManager, error) {
-	if skipSystemd, _ := GetEnv("SKIP_SYSTEMD"); skipSystemd == "true" {
+	if skipSystemd, _ := utils.GetEnv("SKIP_SYSTEMD"); skipSystemd == "true" {
 		return nil, nil
 	}

@@ -294,13 +295,13 @@ func unescapeServiceName(name string) string {
 // otherwise defaults to "*service".
 func getServicePatterns() []string {
 	patterns := []string{}
-	if envPatterns, _ := GetEnv("SERVICE_PATTERNS"); envPatterns != "" {
+	if envPatterns, _ := utils.GetEnv("SERVICE_PATTERNS"); envPatterns != "" {
 		for pattern := range strings.SplitSeq(envPatterns, ",") {
 			pattern = strings.TrimSpace(pattern)
 			if pattern == "" {
 				continue
 			}
-			if !strings.HasSuffix(pattern, ".service") {
+			if !strings.HasSuffix(pattern, "timer") && !strings.HasSuffix(pattern, ".service") {
 				pattern += ".service"
 			}
 			patterns = append(patterns, pattern)
--- a/agent/systemd_test.go
+++ b/agent/systemd_test.go
@@ -156,20 +156,23 @@ func TestGetServicePatterns(t *testing.T) {
 			expected:       []string{"*nginx*.service", "*apache*.service"},
 			cleanupEnvVars: true,
 		},
+		{
+			name:           "opt into timer monitoring",
+			prefixedEnv:    "nginx.service,docker,apache.timer",
+			unprefixedEnv:  "",
+			expected:       []string{"nginx.service", "docker.service", "apache.timer"},
+			cleanupEnvVars: true,
+		},
 	}

 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
-			// Clean up any existing env vars
-			os.Unsetenv("BESZEL_AGENT_SERVICE_PATTERNS")
-			os.Unsetenv("SERVICE_PATTERNS")
-
 			// Set up environment variables
 			if tt.prefixedEnv != "" {
-				os.Setenv("BESZEL_AGENT_SERVICE_PATTERNS", tt.prefixedEnv)
+				t.Setenv("BESZEL_AGENT_SERVICE_PATTERNS", tt.prefixedEnv)
 			}
 			if tt.unprefixedEnv != "" {
-				os.Setenv("SERVICE_PATTERNS", tt.unprefixedEnv)
+				t.Setenv("SERVICE_PATTERNS", tt.unprefixedEnv)
 			}

 			// Run the function
@@ -177,12 +180,6 @@ func TestGetServicePatterns(t *testing.T) {

 			// Verify results
 			assert.Equal(t, tt.expected, result, "Patterns should match expected values")
-
-			// Cleanup
-			if tt.cleanupEnvVars {
-				os.Unsetenv("BESZEL_AGENT_SERVICE_PATTERNS")
-				os.Unsetenv("SERVICE_PATTERNS")
-			}
 		})
 	}
 }
--- a/agent/utils.go
+++ b/agent/utils.go
@@ -1,15 +0,0 @@
-package agent
-
-import "math"
-
-func bytesToMegabytes(b float64) float64 {
-	return twoDecimals(b / 1048576)
-}
-
-func bytesToGigabytes(b uint64) float64 {
-	return twoDecimals(float64(b) / 1073741824)
-}
-
-func twoDecimals(value float64) float64 {
-	return math.Round(value*100) / 100
-}
--- a/agent/utils/utils.go
+++ b/agent/utils/utils.go
@@ -0,0 +1,88 @@
+package utils
+
+import (
+	"io"
+	"math"
+	"os"
+	"strconv"
+	"strings"
+)
+
+// GetEnv retrieves an environment variable with a "BESZEL_AGENT_" prefix, or falls back to the unprefixed key.
+func GetEnv(key string) (value string, exists bool) {
+	if value, exists = os.LookupEnv("BESZEL_AGENT_" + key); exists {
+		return value, exists
+	}
+	return os.LookupEnv(key)
+}
+
+// BytesToMegabytes converts bytes to megabytes and rounds to two decimal places.
+func BytesToMegabytes(b float64) float64 {
+	return TwoDecimals(b / 1048576)
+}
+
+// BytesToGigabytes converts bytes to gigabytes and rounds to two decimal places.
+func BytesToGigabytes(b uint64) float64 {
+	return TwoDecimals(float64(b) / 1073741824)
+}
+
+// TwoDecimals rounds a float64 value to two decimal places.
+func TwoDecimals(value float64) float64 {
+	return math.Round(value*100) / 100
+}
+
+// func RoundFloat(val float64, precision uint) float64 {
+//     ratio := math.Pow(10, float64(precision))
+//     return math.Round(val*ratio) / ratio
+// }
+
+// ReadStringFile returns trimmed file contents or empty string on error.
+func ReadStringFile(path string) string {
+	content, _ := ReadStringFileOK(path)
+	return content
+}
+
+// ReadStringFileOK returns trimmed file contents and read success.
+func ReadStringFileOK(path string) (string, bool) {
+	b, err := os.ReadFile(path)
+	if err != nil {
+		return "", false
+	}
+	return strings.TrimSpace(string(b)), true
+}
+
+// ReadStringFileLimited reads a file into a string with a maximum size (in bytes) to avoid
+// allocating large buffers and potential panics with pseudo-files when the size is misreported.
+func ReadStringFileLimited(path string, maxSize int) (string, error) {
+	f, err := os.Open(path)
+	if err != nil {
+		return "", err
+	}
+	defer f.Close()
+
+	buf := make([]byte, maxSize)
+	n, err := f.Read(buf)
+	if err != nil && err != io.EOF {
+		return "", err
+	}
+	return strings.TrimSpace(string(buf[:n])), nil
+}
+
+// FileExists reports whether the given path exists.
+func FileExists(path string) bool {
+	_, err := os.Stat(path)
+	return err == nil
+}
+
+// ReadUintFile parses a decimal uint64 value from a file.
+func ReadUintFile(path string) (uint64, bool) {
+	raw, ok := ReadStringFileOK(path)
+	if !ok {
+		return 0, false
+	}
+	parsed, err := strconv.ParseUint(raw, 10, 64)
+	if err != nil {
+		return 0, false
+	}
+	return parsed, true
+}
--- a/agent/utils/utils_test.go
+++ b/agent/utils/utils_test.go
@@ -0,0 +1,158 @@
+package utils
+
+import (
+	"os"
+	"path/filepath"
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+)
+
+func TestTwoDecimals(t *testing.T) {
+	tests := []struct {
+		name     string
+		input    float64
+		expected float64
+	}{
+		{"round down", 1.234, 1.23},
+		{"round half up", 1.235, 1.24}, // math.Round rounds half up
+		{"no rounding needed", 1.23, 1.23},
+		{"negative number", -1.235, -1.24}, // math.Round rounds half up (more negative)
+		{"zero", 0.0, 0.0},
+		{"large number", 123.456, 123.46}, // rounds 5 up
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			result := TwoDecimals(tt.input)
+			assert.Equal(t, tt.expected, result)
+		})
+	}
+}
+
+func TestBytesToMegabytes(t *testing.T) {
+	tests := []struct {
+		name     string
+		input    float64
+		expected float64
+	}{
+		{"1 MB", 1048576, 1.0},
+		{"512 KB", 524288, 0.5},
+		{"zero", 0, 0},
+		{"large value", 1073741824, 1024}, // 1 GB = 1024 MB
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			result := BytesToMegabytes(tt.input)
+			assert.Equal(t, tt.expected, result)
+		})
+	}
+}
+
+func TestBytesToGigabytes(t *testing.T) {
+	tests := []struct {
+		name     string
+		input    uint64
+		expected float64
+	}{
+		{"1 GB", 1073741824, 1.0},
+		{"512 MB", 536870912, 0.5},
+		{"0 GB", 0, 0},
+		{"2 GB", 2147483648, 2.0},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			result := BytesToGigabytes(tt.input)
+			assert.Equal(t, tt.expected, result)
+		})
+	}
+}
+
+func TestFileFunctions(t *testing.T) {
+	tmpDir := t.TempDir()
+	testFilePath := filepath.Join(tmpDir, "test.txt")
+	testContent := "hello world"
+
+	// Test FileExists (false)
+	assert.False(t, FileExists(testFilePath))
+
+	// Test ReadStringFileOK (false)
+	content, ok := ReadStringFileOK(testFilePath)
+	assert.False(t, ok)
+	assert.Empty(t, content)
+
+	// Test ReadStringFile (empty)
+	assert.Empty(t, ReadStringFile(testFilePath))
+
+	// Write file
+	err := os.WriteFile(testFilePath, []byte(testContent+"\n "), 0644)
+	assert.NoError(t, err)
+
+	// Test FileExists (true)
+	assert.True(t, FileExists(testFilePath))
+
+	// Test ReadStringFileOK (true)
+	content, ok = ReadStringFileOK(testFilePath)
+	assert.True(t, ok)
+	assert.Equal(t, testContent, content)
+
+	// Test ReadStringFile (content)
+	assert.Equal(t, testContent, ReadStringFile(testFilePath))
+}
+
+func TestReadUintFile(t *testing.T) {
+	tmpDir := t.TempDir()
+
+	t.Run("valid uint", func(t *testing.T) {
+		path := filepath.Join(tmpDir, "uint.txt")
+		os.WriteFile(path, []byte(" 12345\n"), 0644)
+		val, ok := ReadUintFile(path)
+		assert.True(t, ok)
+		assert.Equal(t, uint64(12345), val)
+	})
+
+	t.Run("invalid uint", func(t *testing.T) {
+		path := filepath.Join(tmpDir, "invalid.txt")
+		os.WriteFile(path, []byte("abc"), 0644)
+		val, ok := ReadUintFile(path)
+		assert.False(t, ok)
+		assert.Equal(t, uint64(0), val)
+	})
+
+	t.Run("missing file", func(t *testing.T) {
+		path := filepath.Join(tmpDir, "missing.txt")
+		val, ok := ReadUintFile(path)
+		assert.False(t, ok)
+		assert.Equal(t, uint64(0), val)
+	})
+}
+
+func TestGetEnv(t *testing.T) {
+	key := "TEST_VAR"
+	prefixedKey := "BESZEL_AGENT_" + key
+
+	t.Run("prefixed variable exists", func(t *testing.T) {
+		t.Setenv(prefixedKey, "prefixed_val")
+		t.Setenv(key, "unprefixed_val")
+
+		val, exists := GetEnv(key)
+		assert.True(t, exists)
+		assert.Equal(t, "prefixed_val", val)
+	})
+
+	t.Run("only unprefixed variable exists", func(t *testing.T) {
+		t.Setenv(key, "unprefixed_val")
+
+		val, exists := GetEnv(key)
+		assert.True(t, exists)
+		assert.Equal(t, "unprefixed_val", val)
+	})
+
+	t.Run("neither variable exists", func(t *testing.T) {
+		val, exists := GetEnv(key)
+		assert.False(t, exists)
+		assert.Empty(t, val)
+	})
+}
--- a/go.mod
+++ b/go.mod
@@ -1,6 +1,6 @@
 module github.com/henrygd/beszel

-go 1.26.0
+go 1.26.1

 require (
 	github.com/blang/semver v3.5.1+incompatible
--- a/internal/alerts/alerts.go
+++ b/internal/alerts/alerts.go
@@ -21,9 +21,9 @@ type hubLike interface {

 type AlertManager struct {
 	hub           hubLike
-	alertQueue    chan alertTask
-	stopChan      chan struct{}
+	stopOnce      sync.Once
 	pendingAlerts sync.Map
+	alertsCache   *AlertsCache
 }

 type AlertMessageData struct {
@@ -40,16 +40,22 @@ type UserNotificationSettings struct {
 	Webhooks []string `json:"webhooks"`
 }

+type SystemAlertFsStats struct {
+	DiskTotal float64 `json:"d"`
+	DiskUsed  float64 `json:"du"`
+}
+
+// Values pulled from system_stats.stats that are relevant to alerts.
 type SystemAlertStats struct {
 	Cpu          float64                       `json:"cpu"`
 	Mem          float64                       `json:"mp"`
 	Disk         float64                       `json:"dp"`
-	NetSent      float64                       `json:"ns"`
-	NetRecv      float64                       `json:"nr"`
+	Bandwidth    [2]uint64                     `json:"b"`
 	GPU          map[string]SystemAlertGPUData `json:"g"`
 	Temperatures map[string]float32            `json:"t"`
 	LoadAvg      [3]float64                    `json:"la"`
 	Battery      [2]uint8                      `json:"bat"`
+	ExtraFs      map[string]SystemAlertFsStats `json:"efs"`
 }

 type SystemAlertGPUData struct {
@@ -58,7 +64,7 @@ type SystemAlertGPUData struct {

 type SystemAlertData struct {
 	systemRecord *core.Record
-	alertRecord  *core.Record
+	alertData    CachedAlertData
 	name         string
 	unit         string
 	val          float64
@@ -92,12 +98,10 @@ var supportsTitle = map[string]struct{}{
 // NewAlertManager creates a new AlertManager instance.
 func NewAlertManager(app hubLike) *AlertManager {
 	am := &AlertManager{
-		hub:        app,
-		alertQueue: make(chan alertTask, 5),
-		stopChan:   make(chan struct{}),
+		hub:         app,
+		alertsCache: NewAlertsCache(app),
 	}
 	am.bindEvents()
-	go am.startWorker()
 	return am
 }

@@ -106,6 +110,19 @@ func (am *AlertManager) bindEvents() {
 	am.hub.OnRecordAfterUpdateSuccess("alerts").BindFunc(updateHistoryOnAlertUpdate)
 	am.hub.OnRecordAfterDeleteSuccess("alerts").BindFunc(resolveHistoryOnAlertDelete)
 	am.hub.OnRecordAfterUpdateSuccess("smart_devices").BindFunc(am.handleSmartDeviceAlert)
+
+	am.hub.OnServe().BindFunc(func(e *core.ServeEvent) error {
+		// Populate all alerts into cache on startup
+		_ = am.alertsCache.PopulateFromDB(true)
+
+		if err := resolveStatusAlerts(e.App); err != nil {
+			e.App.Logger().Error("Failed to resolve stale status alerts", "err", err)
+		}
+		if err := am.restorePendingStatusAlerts(); err != nil {
+			e.App.Logger().Error("Failed to restore pending status alerts", "err", err)
+		}
+		return e.Next()
+	})
 }

 // IsNotificationSilenced checks if a notification should be silenced based on configured quiet hours
@@ -259,13 +276,14 @@ func (am *AlertManager) SendShoutrrrAlert(notificationUrl, title, message, link,
 	}

 	// Add link
-	if scheme == "ntfy" {
+	switch scheme {
+	case "ntfy":
 		queryParams.Add("Actions", fmt.Sprintf("view, %s, %s", linkText, link))
-	} else if scheme == "lark" {
+	case "lark":
 		queryParams.Add("link", link)
-	} else if scheme == "bark" {
+	case "bark":
 		queryParams.Add("url", link)
-	} else {
+	default:
 		message += "\n\n" + link
 	}

@@ -298,3 +316,13 @@ func (am *AlertManager) SendTestNotification(e *core.RequestEvent) error {
 	}
 	return e.JSON(200, map[string]bool{"err": false})
 }
+
+// setAlertTriggered updates the "triggered" status of an alert record in the database
+func (am *AlertManager) setAlertTriggered(alert CachedAlertData, triggered bool) error {
+	alertRecord, err := am.hub.FindRecordById("alerts", alert.Id)
+	if err != nil {
+		return err
+	}
+	alertRecord.Set("triggered", triggered)
+	return am.hub.Save(alertRecord)
+}
--- a/internal/alerts/alerts_cache.go
+++ b/internal/alerts/alerts_cache.go
@@ -0,0 +1,177 @@
+package alerts
+
+import (
+	"github.com/pocketbase/dbx"
+	"github.com/pocketbase/pocketbase/core"
+	"github.com/pocketbase/pocketbase/tools/store"
+)
+
+// CachedAlertData represents the relevant fields of an alert record for status checking and updates.
+type CachedAlertData struct {
+	Id        string
+	SystemID  string
+	UserID    string
+	Name      string
+	Value     float64
+	Triggered bool
+	Min       uint8
+	// Created   types.DateTime
+}
+
+func (a *CachedAlertData) PopulateFromRecord(record *core.Record) {
+	a.Id = record.Id
+	a.SystemID = record.GetString("system")
+	a.UserID = record.GetString("user")
+	a.Name = record.GetString("name")
+	a.Value = record.GetFloat("value")
+	a.Triggered = record.GetBool("triggered")
+	a.Min = uint8(record.GetInt("min"))
+	// a.Created = record.GetDateTime("created")
+}
+
+// AlertsCache provides an in-memory cache for system alerts.
+type AlertsCache struct {
+	app       core.App
+	store     *store.Store[string, *store.Store[string, CachedAlertData]]
+	populated bool
+}
+
+// NewAlertsCache creates a new instance of SystemAlertsCache.
+func NewAlertsCache(app core.App) *AlertsCache {
+	c := AlertsCache{
+		app:   app,
+		store: store.New(map[string]*store.Store[string, CachedAlertData]{}),
+	}
+	return c.bindEvents()
+}
+
+// bindEvents sets up event listeners to keep the cache in sync with database changes.
+func (c *AlertsCache) bindEvents() *AlertsCache {
+	c.app.OnRecordAfterUpdateSuccess("alerts").BindFunc(func(e *core.RecordEvent) error {
+		// c.Delete(e.Record.Original()) // this would be needed if the system field on an existing alert was changed, however we don't currently allow that in the UI so we'll leave it commented out
+		c.Update(e.Record)
+		return e.Next()
+	})
+	c.app.OnRecordAfterDeleteSuccess("alerts").BindFunc(func(e *core.RecordEvent) error {
+		c.Delete(e.Record)
+		return e.Next()
+	})
+	c.app.OnRecordAfterCreateSuccess("alerts").BindFunc(func(e *core.RecordEvent) error {
+		c.Update(e.Record)
+		return e.Next()
+	})
+	return c
+}
+
+// PopulateFromDB clears current entries and loads all alerts from the database into the cache.
+func (c *AlertsCache) PopulateFromDB(force bool) error {
+	if !force && c.populated {
+		return nil
+	}
+	records, err := c.app.FindAllRecords("alerts")
+	if err != nil {
+		return err
+	}
+	c.store.RemoveAll()
+	for _, record := range records {
+		c.Update(record)
+	}
+	c.populated = true
+	return nil
+}
+
+// Update adds or updates an alert record in the cache.
+func (c *AlertsCache) Update(record *core.Record) {
+	systemID := record.GetString("system")
+	if systemID == "" {
+		return
+	}
+	systemStore, ok := c.store.GetOk(systemID)
+	if !ok {
+		systemStore = store.New(map[string]CachedAlertData{})
+		c.store.Set(systemID, systemStore)
+	}
+	var ca CachedAlertData
+	ca.PopulateFromRecord(record)
+	systemStore.Set(record.Id, ca)
+}
+
+// Delete removes an alert record from the cache.
+func (c *AlertsCache) Delete(record *core.Record) {
+	systemID := record.GetString("system")
+	if systemID == "" {
+		return
+	}
+	if systemStore, ok := c.store.GetOk(systemID); ok {
+		systemStore.Remove(record.Id)
+	}
+}
+
+// GetSystemAlerts returns all alerts for the specified system, lazy-loading if necessary.
+func (c *AlertsCache) GetSystemAlerts(systemID string) []CachedAlertData {
+	systemStore, ok := c.store.GetOk(systemID)
+	if !ok {
+		// Populate cache for this system
+		records, err := c.app.FindAllRecords("alerts", dbx.NewExp("system={:system}", dbx.Params{"system": systemID}))
+		if err != nil {
+			return nil
+		}
+		systemStore = store.New(map[string]CachedAlertData{})
+		for _, record := range records {
+			var ca CachedAlertData
+			ca.PopulateFromRecord(record)
+			systemStore.Set(record.Id, ca)
+		}
+		c.store.Set(systemID, systemStore)
+	}
+	all := systemStore.GetAll()
+	alerts := make([]CachedAlertData, 0, len(all))
+	for _, alert := range all {
+		alerts = append(alerts, alert)
+	}
+	return alerts
+}
+
+// GetAlert returns a specific alert by its ID from the cache.
+func (c *AlertsCache) GetAlert(systemID, alertID string) (CachedAlertData, bool) {
+	if systemStore, ok := c.store.GetOk(systemID); ok {
+		return systemStore.GetOk(alertID)
+	}
+	return CachedAlertData{}, false
+}
+
+// GetAlertsByName returns all alerts of a specific type for the specified system.
+func (c *AlertsCache) GetAlertsByName(systemID, alertName string) []CachedAlertData {
+	allAlerts := c.GetSystemAlerts(systemID)
+	var alerts []CachedAlertData
+	for _, record := range allAlerts {
+		if record.Name == alertName {
+			alerts = append(alerts, record)
+		}
+	}
+	return alerts
+}
+
+// GetAlertsExcludingNames returns all alerts for the specified system excluding the given types.
+func (c *AlertsCache) GetAlertsExcludingNames(systemID string, excludedNames ...string) []CachedAlertData {
+	excludeMap := make(map[string]struct{})
+	for _, name := range excludedNames {
+		excludeMap[name] = struct{}{}
+	}
+	allAlerts := c.GetSystemAlerts(systemID)
+	var alerts []CachedAlertData
+	for _, record := range allAlerts {
+		if _, excluded := excludeMap[record.Name]; !excluded {
+			alerts = append(alerts, record)
+		}
+	}
+	return alerts
+}
+
+// Refresh returns the latest cached copy for an alert snapshot if it still exists.
+func (c *AlertsCache) Refresh(alert CachedAlertData) (CachedAlertData, bool) {
+	if alert.Id == "" {
+		return CachedAlertData{}, false
+	}
+	return c.GetAlert(alert.SystemID, alert.Id)
+}
--- a/internal/alerts/alerts_cache_test.go
+++ b/internal/alerts/alerts_cache_test.go
@@ -0,0 +1,215 @@
+//go:build testing
+
+package alerts_test
+
+import (
+	"testing"
+
+	"github.com/henrygd/beszel/internal/alerts"
+	beszelTests "github.com/henrygd/beszel/internal/tests"
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+)
+
+func TestSystemAlertsCachePopulateAndFilter(t *testing.T) {
+	hub, user := beszelTests.GetHubWithUser(t)
+	defer hub.Cleanup()
+
+	systems, err := beszelTests.CreateSystems(hub, 2, user.Id, "up")
+	require.NoError(t, err)
+	system1 := systems[0]
+	system2 := systems[1]
+
+	statusAlert, err := beszelTests.CreateRecord(hub, "alerts", map[string]any{
+		"name":   "Status",
+		"system": system1.Id,
+		"user":   user.Id,
+		"min":    1,
+	})
+	require.NoError(t, err)
+
+	cpuAlert, err := beszelTests.CreateRecord(hub, "alerts", map[string]any{
+		"name":   "CPU",
+		"system": system1.Id,
+		"user":   user.Id,
+		"value":  80,
+		"min":    1,
+	})
+	require.NoError(t, err)
+
+	memoryAlert, err := beszelTests.CreateRecord(hub, "alerts", map[string]any{
+		"name":   "Memory",
+		"system": system2.Id,
+		"user":   user.Id,
+		"value":  90,
+		"min":    1,
+	})
+	require.NoError(t, err)
+
+	cache := alerts.NewAlertsCache(hub)
+	cache.PopulateFromDB(false)
+
+	statusAlerts := cache.GetAlertsByName(system1.Id, "Status")
+	require.Len(t, statusAlerts, 1)
+	assert.Equal(t, statusAlert.Id, statusAlerts[0].Id)
+
+	nonStatusAlerts := cache.GetAlertsExcludingNames(system1.Id, "Status")
+	require.Len(t, nonStatusAlerts, 1)
+	assert.Equal(t, cpuAlert.Id, nonStatusAlerts[0].Id)
+
+	system2Alerts := cache.GetSystemAlerts(system2.Id)
+	require.Len(t, system2Alerts, 1)
+	assert.Equal(t, memoryAlert.Id, system2Alerts[0].Id)
+}
+
+func TestSystemAlertsCacheLazyLoadUpdateAndDelete(t *testing.T) {
+	hub, user := beszelTests.GetHubWithUser(t)
+	defer hub.Cleanup()
+
+	systems, err := beszelTests.CreateSystems(hub, 1, user.Id, "up")
+	require.NoError(t, err)
+	systemRecord := systems[0]
+
+	statusAlert, err := beszelTests.CreateRecord(hub, "alerts", map[string]any{
+		"name":   "Status",
+		"system": systemRecord.Id,
+		"user":   user.Id,
+		"min":    1,
+	})
+	require.NoError(t, err)
+
+	cache := alerts.NewAlertsCache(hub)
+	require.Len(t, cache.GetSystemAlerts(systemRecord.Id), 1, "first lookup should lazy-load alerts for the system")
+
+	cpuAlert, err := beszelTests.CreateRecord(hub, "alerts", map[string]any{
+		"name":   "CPU",
+		"system": systemRecord.Id,
+		"user":   user.Id,
+		"value":  80,
+		"min":    1,
+	})
+	require.NoError(t, err)
+
+	cache.Update(cpuAlert)
+
+	nonStatusAlerts := cache.GetAlertsExcludingNames(systemRecord.Id, "Status")
+	require.Len(t, nonStatusAlerts, 1)
+	assert.Equal(t, cpuAlert.Id, nonStatusAlerts[0].Id)
+
+	cache.Delete(statusAlert)
+	assert.Empty(t, cache.GetAlertsByName(systemRecord.Id, "Status"), "deleted alerts should be removed from the in-memory cache")
+}
+
+func TestSystemAlertsCacheRefreshReturnsLatestCopy(t *testing.T) {
+	hub, user := beszelTests.GetHubWithUser(t)
+	defer hub.Cleanup()
+
+	systems, err := beszelTests.CreateSystems(hub, 1, user.Id, "up")
+	require.NoError(t, err)
+	system := systems[0]
+
+	alert, err := beszelTests.CreateRecord(hub, "alerts", map[string]any{
+		"name":      "Status",
+		"system":    system.Id,
+		"user":      user.Id,
+		"min":       1,
+		"triggered": false,
+	})
+	require.NoError(t, err)
+
+	cache := alerts.NewAlertsCache(hub)
+	snapshot := cache.GetSystemAlerts(system.Id)[0]
+	assert.False(t, snapshot.Triggered)
+
+	alert.Set("triggered", true)
+	require.NoError(t, hub.Save(alert))
+
+	refreshed, ok := cache.Refresh(snapshot)
+	require.True(t, ok)
+	assert.Equal(t, snapshot.Id, refreshed.Id)
+	assert.True(t, refreshed.Triggered, "refresh should return the updated cached value rather than the stale snapshot")
+
+	require.NoError(t, hub.Delete(alert))
+	_, ok = cache.Refresh(snapshot)
+	assert.False(t, ok, "refresh should report false when the cached alert no longer exists")
+}
+
+func TestAlertManagerCacheLifecycle(t *testing.T) {
+	hub, user := beszelTests.GetHubWithUser(t)
+	defer hub.Cleanup()
+
+	systems, err := beszelTests.CreateSystems(hub, 1, user.Id, "up")
+	require.NoError(t, err)
+	system := systems[0]
+
+	// Create an alert
+	alert, err := beszelTests.CreateRecord(hub, "alerts", map[string]any{
+		"name":   "CPU",
+		"system": system.Id,
+		"user":   user.Id,
+		"value":  80,
+		"min":    1,
+	})
+	require.NoError(t, err)
+
+	am := hub.AlertManager
+	cache := am.GetSystemAlertsCache()
+
+	// Verify it's in cache (it should be since CreateRecord triggers the event)
+	assert.Len(t, cache.GetSystemAlerts(system.Id), 1)
+	assert.Equal(t, alert.Id, cache.GetSystemAlerts(system.Id)[0].Id)
+	assert.EqualValues(t, 80, cache.GetSystemAlerts(system.Id)[0].Value)
+
+	// Update the alert through PocketBase to trigger events
+	alert.Set("value", 85)
+	require.NoError(t, hub.Save(alert))
+
+	// Check if updated value is reflected (or at least that it's still there)
+	cachedAlerts := cache.GetSystemAlerts(system.Id)
+	assert.Len(t, cachedAlerts, 1)
+	assert.EqualValues(t, 85, cachedAlerts[0].Value)
+
+	// Delete the alert through PocketBase to trigger events
+	require.NoError(t, hub.Delete(alert))
+
+	// Verify it's removed from cache
+	assert.Empty(t, cache.GetSystemAlerts(system.Id), "alert should be removed from cache after PocketBase delete")
+}
+
+// func TestAlertManagerCacheMovesAlertToNewSystemOnUpdate(t *testing.T) {
+// 	hub, user := beszelTests.GetHubWithUser(t)
+// 	defer hub.Cleanup()
+
+// 	systems, err := beszelTests.CreateSystems(hub, 2, user.Id, "up")
+// 	require.NoError(t, err)
+// 	system1 := systems[0]
+// 	system2 := systems[1]
+
+// 	alert, err := beszelTests.CreateRecord(hub, "alerts", map[string]any{
+// 		"name":   "CPU",
+// 		"system": system1.Id,
+// 		"user":   user.Id,
+// 		"value":  80,
+// 		"min":    1,
+// 	})
+// 	require.NoError(t, err)
+
+// 	am := hub.AlertManager
+// 	cache := am.GetSystemAlertsCache()
+
+// 	// Initially in system1 cache
+// 	assert.Len(t, cache.Get(system1.Id), 1)
+// 	assert.Empty(t, cache.Get(system2.Id))
+
+// 	// Move alert to system2
+// 	alert.Set("system", system2.Id)
+// 	require.NoError(t, hub.Save(alert))
+
+// 	// DEBUG: print if it is found
+// 	// fmt.Printf("system1 alerts after update: %v\n", cache.Get(system1.Id))
+
+// 	// Should be removed from system1 and present in system2
+// 	assert.Empty(t, cache.GetType(system1.Id, "CPU"), "updated alerts should be evicted from the previous system cache")
+// 	require.Len(t, cache.Get(system2.Id), 1)
+// 	assert.Equal(t, alert.Id, cache.Get(system2.Id)[0].Id)
+// }
--- a/internal/alerts/alerts_disk_test.go
+++ b/internal/alerts/alerts_disk_test.go
@@ -0,0 +1,155 @@
+//go:build testing
+
+package alerts_test
+
+import (
+	"encoding/json"
+	"testing"
+	"time"
+
+	"github.com/henrygd/beszel/internal/entities/system"
+	beszelTests "github.com/henrygd/beszel/internal/tests"
+
+	"github.com/pocketbase/dbx"
+	"github.com/pocketbase/pocketbase/tools/types"
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+)
+
+// TestDiskAlertExtraFsMultiMinute tests that multi-minute disk alerts correctly use
+// historical per-minute values for extra (non-root) filesystems, not the current live snapshot.
+func TestDiskAlertExtraFsMultiMinute(t *testing.T) {
+	hub, user := beszelTests.GetHubWithUser(t)
+	defer hub.Cleanup()
+
+	systems, err := beszelTests.CreateSystems(hub, 1, user.Id, "up")
+	require.NoError(t, err)
+	systemRecord := systems[0]
+
+	// Disk alert: threshold 80%, min=2 (requires historical averaging)
+	diskAlert, err := beszelTests.CreateRecord(hub, "alerts", map[string]any{
+		"name":   "Disk",
+		"system": systemRecord.Id,
+		"user":   user.Id,
+		"value":  80, // threshold: 80%
+		"min":    2,  // 2 minutes - requires historical averaging
+	})
+	require.NoError(t, err)
+	assert.False(t, diskAlert.GetBool("triggered"), "Alert should not be triggered initially")
+
+	am := hub.GetAlertManager()
+	now := time.Now().UTC()
+
+	extraFsHigh := map[string]*system.FsStats{
+		"/mnt/data": {DiskTotal: 1000, DiskUsed: 920}, // 92% - above threshold
+	}
+
+	// Insert 4 historical records spread over 3 minutes (same pattern as battery tests).
+	// The oldest record must predate (now - 2min) so the alert time window is valid.
+	recordTimes := []time.Duration{
+		-180 * time.Second, // 3 min ago - anchors oldest record before alert.time
+		-90 * time.Second,
+		-60 * time.Second,
+		-30 * time.Second,
+	}
+
+	for _, offset := range recordTimes {
+		stats := system.Stats{
+			DiskPct: 30, // root disk at 30% - below threshold
+			ExtraFs: extraFsHigh,
+		}
+		statsJSON, _ := json.Marshal(stats)
+
+		recordTime := now.Add(offset)
+		record, err := beszelTests.CreateRecord(hub, "system_stats", map[string]any{
+			"system": systemRecord.Id,
+			"type":   "1m",
+			"stats":  string(statsJSON),
+		})
+		require.NoError(t, err)
+		record.SetRaw("created", recordTime.Format(types.DefaultDateLayout))
+		err = hub.SaveNoValidate(record)
+		require.NoError(t, err)
+	}
+
+	combinedDataHigh := &system.CombinedData{
+		Stats: system.Stats{
+			DiskPct: 30,
+			ExtraFs: extraFsHigh,
+		},
+		Info: system.Info{
+			DiskPct: 30,
+		},
+	}
+
+	systemRecord.Set("updated", now)
+	err = hub.SaveNoValidate(systemRecord)
+	require.NoError(t, err)
+
+	err = am.HandleSystemAlerts(systemRecord, combinedDataHigh)
+	require.NoError(t, err)
+
+	time.Sleep(20 * time.Millisecond)
+
+	diskAlert, err = hub.FindFirstRecordByFilter("alerts", "id={:id}", dbx.Params{"id": diskAlert.Id})
+	require.NoError(t, err)
+	assert.True(t, diskAlert.GetBool("triggered"),
+		"Alert SHOULD be triggered when extra disk average (92%%) exceeds threshold (80%%)")
+
+	// --- Resolution: extra disk drops to 50%, alert should resolve ---
+
+	extraFsLow := map[string]*system.FsStats{
+		"/mnt/data": {DiskTotal: 1000, DiskUsed: 500}, // 50% - below threshold
+	}
+
+	newNow := now.Add(2 * time.Minute)
+	recordTimesLow := []time.Duration{
+		-180 * time.Second,
+		-90 * time.Second,
+		-60 * time.Second,
+		-30 * time.Second,
+	}
+
+	for _, offset := range recordTimesLow {
+		stats := system.Stats{
+			DiskPct: 30,
+			ExtraFs: extraFsLow,
+		}
+		statsJSON, _ := json.Marshal(stats)
+
+		recordTime := newNow.Add(offset)
+		record, err := beszelTests.CreateRecord(hub, "system_stats", map[string]any{
+			"system": systemRecord.Id,
+			"type":   "1m",
+			"stats":  string(statsJSON),
+		})
+		require.NoError(t, err)
+		record.SetRaw("created", recordTime.Format(types.DefaultDateLayout))
+		err = hub.SaveNoValidate(record)
+		require.NoError(t, err)
+	}
+
+	combinedDataLow := &system.CombinedData{
+		Stats: system.Stats{
+			DiskPct: 30,
+			ExtraFs: extraFsLow,
+		},
+		Info: system.Info{
+			DiskPct: 30,
+		},
+	}
+
+	systemRecord.Set("updated", newNow)
+	err = hub.SaveNoValidate(systemRecord)
+	require.NoError(t, err)
+
+	err = am.HandleSystemAlerts(systemRecord, combinedDataLow)
+	require.NoError(t, err)
+
+	time.Sleep(20 * time.Millisecond)
+
+	diskAlert, err = hub.FindFirstRecordByFilter("alerts", "id={:id}", dbx.Params{"id": diskAlert.Id})
+	require.NoError(t, err)
+	assert.False(t, diskAlert.GetBool("triggered"),
+		"Alert should be resolved when extra disk average (50%%) drops below threshold (80%%)")
+}
--- a/internal/alerts/alerts_quiet_hours_test.go
+++ b/internal/alerts/alerts_quiet_hours_test.go
@@ -49,7 +49,7 @@ func TestAlertSilencedOneTime(t *testing.T) {

 	// Get alert manager
 	am := alerts.NewAlertManager(hub)
-	defer am.StopWorker()
+	defer am.Stop()

 	// Test that alert is silenced
 	silenced := am.IsNotificationSilenced(user.Id, system.Id)
@@ -106,7 +106,7 @@ func TestAlertSilencedDaily(t *testing.T) {

 	// Get alert manager
 	am := alerts.NewAlertManager(hub)
-	defer am.StopWorker()
+	defer am.Stop()

 	// Get current hour and create a window that includes current time
 	now := time.Now().UTC()
@@ -170,7 +170,7 @@ func TestAlertSilencedDailyMidnightCrossing(t *testing.T) {

 	// Get alert manager
 	am := alerts.NewAlertManager(hub)
-	defer am.StopWorker()
+	defer am.Stop()

 	// Create a window that crosses midnight: 22:00 - 02:00
 	startTime := time.Date(2000, 1, 1, 22, 0, 0, 0, time.UTC)
@@ -211,7 +211,7 @@ func TestAlertSilencedGlobal(t *testing.T) {

 	// Get alert manager
 	am := alerts.NewAlertManager(hub)
-	defer am.StopWorker()
+	defer am.Stop()

 	// Create a global quiet hours window (no system specified)
 	now := time.Now().UTC()
@@ -250,7 +250,7 @@ func TestAlertSilencedSystemSpecific(t *testing.T) {

 	// Get alert manager
 	am := alerts.NewAlertManager(hub)
-	defer am.StopWorker()
+	defer am.Stop()

 	// Create a system-specific quiet hours window for system1 only
 	now := time.Now().UTC()
@@ -296,7 +296,7 @@ func TestAlertSilencedMultiUser(t *testing.T) {

 	// Get alert manager
 	am := alerts.NewAlertManager(hub)
-	defer am.StopWorker()
+	defer am.Stop()

 	// Create a quiet hours window for user1 only
 	now := time.Now().UTC()
@@ -417,7 +417,7 @@ func TestAlertSilencedNoWindows(t *testing.T) {

 	// Get alert manager
 	am := alerts.NewAlertManager(hub)
-	defer am.StopWorker()
+	defer am.Stop()

 	// Without any quiet hours windows, alert should NOT be silenced
 	silenced := am.IsNotificationSilenced(user.Id, system.Id)
--- a/internal/alerts/alerts_status.go
+++ b/internal/alerts/alerts_status.go
@@ -5,67 +5,28 @@ import (
 	"strings"
 	"time"

-	"github.com/pocketbase/dbx"
 	"github.com/pocketbase/pocketbase/core"
 )

-type alertTask struct {
-	action      string // "schedule" or "cancel"
-	systemName  string
-	alertRecord *core.Record
-	delay       time.Duration
-}
-
 type alertInfo struct {
-	systemName  string
-	alertRecord *core.Record
-	expireTime  time.Time
+	systemName string
+	alertData  CachedAlertData
+	expireTime time.Time
+	timer      *time.Timer
 }

-// startWorker is a long-running goroutine that processes alert tasks
-// every x seconds. It must be running to process status alerts.
-func (am *AlertManager) startWorker() {
-	processPendingAlerts := time.Tick(15 * time.Second)
-
-	// check for status alerts that are not resolved when system comes up
-	// (can be removed if we figure out core bug in #1052)
-	checkStatusAlerts := time.Tick(561 * time.Second)
-
-	for {
-		select {
-		case <-am.stopChan:
-			return
-		case task := <-am.alertQueue:
-			switch task.action {
-			case "schedule":
-				am.pendingAlerts.Store(task.alertRecord.Id, &alertInfo{
-					systemName:  task.systemName,
-					alertRecord: task.alertRecord,
-					expireTime:  time.Now().Add(task.delay),
-				})
-			case "cancel":
-				am.pendingAlerts.Delete(task.alertRecord.Id)
+// Stop cancels all pending status alert timers.
+func (am *AlertManager) Stop() {
+	am.stopOnce.Do(func() {
+		am.pendingAlerts.Range(func(key, value any) bool {
+			info := value.(*alertInfo)
+			if info.timer != nil {
+				info.timer.Stop()
 			}
-		case <-checkStatusAlerts:
-			resolveStatusAlerts(am.hub)
-		case <-processPendingAlerts:
-			// Check for expired alerts every tick
-			now := time.Now()
-			for key, value := range am.pendingAlerts.Range {
-				info := value.(*alertInfo)
-				if now.After(info.expireTime) {
-					// Downtime delay has passed, process alert
-					am.sendStatusAlert("down", info.systemName, info.alertRecord)
-					am.pendingAlerts.Delete(key)
-				}
-			}
-		}
-	}
-}
-
-// StopWorker shuts down the AlertManager.worker goroutine
-func (am *AlertManager) StopWorker() {
-	close(am.stopChan)
+			am.pendingAlerts.Delete(key)
+			return true
+		})
+	})
 }

 // HandleStatusAlerts manages the logic when system status changes.
@@ -74,82 +35,104 @@ func (am *AlertManager) HandleStatusAlerts(newStatus string, systemRecord *core.
 		return nil
 	}

-	alertRecords, err := am.getSystemStatusAlerts(systemRecord.Id)
-	if err != nil {
-		return err
-	}
-	if len(alertRecords) == 0 {
+	alerts := am.alertsCache.GetAlertsByName(systemRecord.Id, "Status")
+	if len(alerts) == 0 {
 		return nil
 	}

 	systemName := systemRecord.GetString("name")
 	if newStatus == "down" {
-		am.handleSystemDown(systemName, alertRecords)
+		am.handleSystemDown(systemName, alerts)
 	} else {
-		am.handleSystemUp(systemName, alertRecords)
+		am.handleSystemUp(systemName, alerts)
 	}
 	return nil
 }

-// getSystemStatusAlerts retrieves all "Status" alert records for a given system ID.
-func (am *AlertManager) getSystemStatusAlerts(systemID string) ([]*core.Record, error) {
-	alertRecords, err := am.hub.FindAllRecords("alerts", dbx.HashExp{
-		"system": systemID,
-		"name":   "Status",
-	})
-	if err != nil {
-		return nil, err
+// handleSystemDown manages the logic when a system status changes to "down". It schedules pending alerts for each alert record.
+func (am *AlertManager) handleSystemDown(systemName string, alerts []CachedAlertData) {
+	for _, alertData := range alerts {
+		min := max(1, int(alertData.Min))
+		am.schedulePendingStatusAlert(systemName, alertData, time.Duration(min)*time.Minute)
 	}
-	return alertRecords, nil
 }

-// Schedules delayed "down" alerts for each alert record.
-func (am *AlertManager) handleSystemDown(systemName string, alertRecords []*core.Record) {
-	for _, alertRecord := range alertRecords {
-		// Continue if alert is already scheduled
-		if _, exists := am.pendingAlerts.Load(alertRecord.Id); exists {
-			continue
-		}
-		// Schedule by adding to queue
-		min := max(1, alertRecord.GetInt("min"))
-		am.alertQueue <- alertTask{
-			action:      "schedule",
-			systemName:  systemName,
-			alertRecord: alertRecord,
-			delay:       time.Duration(min) * time.Minute,
-		}
+// schedulePendingStatusAlert sets up a timer to send a "down" alert after the specified delay if the system is still down.
+// It returns true if the alert was scheduled, or false if an alert was already pending for the given alert record.
+func (am *AlertManager) schedulePendingStatusAlert(systemName string, alertData CachedAlertData, delay time.Duration) bool {
+	alert := &alertInfo{
+		systemName: systemName,
+		alertData:  alertData,
+		expireTime: time.Now().Add(delay),
 	}
+
+	storedAlert, loaded := am.pendingAlerts.LoadOrStore(alertData.Id, alert)
+	if loaded {
+		return false
+	}
+
+	stored := storedAlert.(*alertInfo)
+	stored.timer = time.AfterFunc(time.Until(stored.expireTime), func() {
+		am.processPendingAlert(alertData.Id)
+	})
+	return true
 }

 // handleSystemUp manages the logic when a system status changes to "up".
 // It cancels any pending alerts and sends "up" alerts.
-func (am *AlertManager) handleSystemUp(systemName string, alertRecords []*core.Record) {
-	for _, alertRecord := range alertRecords {
-		alertRecordID := alertRecord.Id
+func (am *AlertManager) handleSystemUp(systemName string, alerts []CachedAlertData) {
+	for _, alertData := range alerts {
 		// If alert exists for record, delete and continue (down alert not sent)
-		if _, exists := am.pendingAlerts.Load(alertRecordID); exists {
-			am.alertQueue <- alertTask{
-				action:      "cancel",
-				alertRecord: alertRecord,
-			}
+		if am.cancelPendingAlert(alertData.Id) {
 			continue
 		}
-		// No alert scheduled for this record, send "up" alert
-		if err := am.sendStatusAlert("up", systemName, alertRecord); err != nil {
+		if !alertData.Triggered {
+			continue
+		}
+		if err := am.sendStatusAlert("up", systemName, alertData); err != nil {
 			am.hub.Logger().Error("Failed to send alert", "err", err)
 		}
 	}
 }

-// sendStatusAlert sends a status alert ("up" or "down") to the users associated with the alert records.
-func (am *AlertManager) sendStatusAlert(alertStatus string, systemName string, alertRecord *core.Record) error {
-	switch alertStatus {
-	case "up":
-		alertRecord.Set("triggered", false)
-	case "down":
-		alertRecord.Set("triggered", true)
+// cancelPendingAlert stops the timer and removes the pending alert for the given alert ID. Returns true if a pending alert was found and cancelled.
+func (am *AlertManager) cancelPendingAlert(alertID string) bool {
+	value, loaded := am.pendingAlerts.LoadAndDelete(alertID)
+	if !loaded {
+		return false
+	}
+
+	info := value.(*alertInfo)
+	if info.timer != nil {
+		info.timer.Stop()
+	}
+	return true
+}
+
+// processPendingAlert sends a "down" alert if the pending alert has expired and the system is still down.
+func (am *AlertManager) processPendingAlert(alertID string) {
+	value, loaded := am.pendingAlerts.LoadAndDelete(alertID)
+	if !loaded {
+		return
+	}
+
+	info := value.(*alertInfo)
+	refreshedAlertData, ok := am.alertsCache.Refresh(info.alertData)
+	if !ok || refreshedAlertData.Triggered {
+		return
+	}
+	if err := am.sendStatusAlert("down", info.systemName, refreshedAlertData); err != nil {
+		am.hub.Logger().Error("Failed to send alert", "err", err)
+	}
+}
+
+// sendStatusAlert sends a status alert ("up" or "down") to the users associated with the alert records.
+func (am *AlertManager) sendStatusAlert(alertStatus string, systemName string, alertData CachedAlertData) error {
+	// Update trigger state for alert record before sending alert
+	triggered := alertStatus == "down"
+	if err := am.setAlertTriggered(alertData, triggered); err != nil {
+		return err
 	}
-	am.hub.Save(alertRecord)

 	var emoji string
 	if alertStatus == "up" {
@@ -162,10 +145,10 @@ func (am *AlertManager) sendStatusAlert(alertStatus string, systemName string, a
 	message := strings.TrimSuffix(title, emoji)

 	// Get system ID for the link
-	systemID := alertRecord.GetString("system")
+	systemID := alertData.SystemID

 	return am.SendAlert(AlertMessageData{
-		UserID:   alertRecord.GetString("user"),
+		UserID:   alertData.UserID,
 		SystemID: systemID,
 		Title:    title,
 		Message:  message,
@@ -174,8 +157,8 @@ func (am *AlertManager) sendStatusAlert(alertStatus string, systemName string, a
 	})
 }

-// resolveStatusAlerts resolves any status alerts that weren't resolved
-// when system came up (https://github.com/henrygd/beszel/issues/1052)
+// resolveStatusAlerts resolves any triggered status alerts that weren't resolved
+// when system came up (https://github.com/henrygd/beszel/issues/1052).
 func resolveStatusAlerts(app core.App) error {
 	db := app.DB()
 	// Find all active status alerts where the system is actually up
@@ -205,3 +188,40 @@ func resolveStatusAlerts(app core.App) error {
 	}
 	return nil
 }
+
+// restorePendingStatusAlerts re-queues untriggered status alerts for systems that
+// are still down after a hub restart. This rebuilds the lost in-memory timer state.
+func (am *AlertManager) restorePendingStatusAlerts() error {
+	type pendingStatusAlert struct {
+		AlertID    string `db:"alert_id"`
+		SystemID   string `db:"system_id"`
+		SystemName string `db:"system_name"`
+	}
+
+	var pending []pendingStatusAlert
+	err := am.hub.DB().NewQuery(`
+		SELECT a.id AS alert_id, a.system AS system_id, s.name AS system_name
+		FROM alerts a
+		JOIN systems s ON a.system = s.id
+		WHERE a.name = 'Status'
+		AND a.triggered = false
+		AND s.status = 'down'
+	`).All(&pending)
+	if err != nil {
+		return err
+	}
+
+	// Make sure cache is populated before trying to restore pending alerts
+	_ = am.alertsCache.PopulateFromDB(false)
+
+	for _, item := range pending {
+		alertData, ok := am.alertsCache.GetAlert(item.SystemID, item.AlertID)
+		if !ok {
+			continue
+		}
+		min := max(1, int(alertData.Min))
+		am.schedulePendingStatusAlert(item.SystemName, alertData, time.Duration(min)*time.Minute)
+	}
+
+	return nil
+}
--- a/internal/alerts/alerts_status_test.go
+++ b/internal/alerts/alerts_status_test.go
@@ -0,0 +1,943 @@
+//go:build testing
+
+package alerts_test
+
+import (
+	"testing"
+	"testing/synctest"
+	"time"
+
+	"github.com/henrygd/beszel/internal/alerts"
+	beszelTests "github.com/henrygd/beszel/internal/tests"
+	"github.com/pocketbase/dbx"
+	"github.com/pocketbase/pocketbase/core"
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+)
+
+func setStatusAlertEmail(t *testing.T, hub core.App, userID, email string) {
+	t.Helper()
+
+	userSettings, err := hub.FindFirstRecordByFilter("user_settings", "user={:user}", map[string]any{"user": userID})
+	require.NoError(t, err)
+
+	userSettings.Set("settings", map[string]any{
+		"emails":   []string{email},
+		"webhooks": []string{},
+	})
+	require.NoError(t, hub.Save(userSettings))
+}
+
+func TestStatusAlerts(t *testing.T) {
+	synctest.Test(t, func(t *testing.T) {
+		hub, user := beszelTests.GetHubWithUser(t)
+		defer hub.Cleanup()
+
+		systems, err := beszelTests.CreateSystems(hub, 4, user.Id, "paused")
+		assert.NoError(t, err)
+
+		var alerts []*core.Record
+		for i, system := range systems {
+			alert, err := beszelTests.CreateRecord(hub, "alerts", map[string]any{
+				"name":   "Status",
+				"system": system.Id,
+				"user":   user.Id,
+				"min":    i + 1,
+			})
+			assert.NoError(t, err)
+			alerts = append(alerts, alert)
+		}
+
+		time.Sleep(10 * time.Millisecond)
+
+		for _, alert := range alerts {
+			assert.False(t, alert.GetBool("triggered"), "Alert should not be triggered immediately")
+		}
+		if hub.TestMailer.TotalSend() != 0 {
+			assert.Zero(t, hub.TestMailer.TotalSend(), "Expected 0 messages, got %d", hub.TestMailer.TotalSend())
+		}
+		for _, system := range systems {
+			assert.EqualValues(t, "paused", system.GetString("status"), "System should be paused")
+		}
+		for _, system := range systems {
+			system.Set("status", "up")
+			err = hub.SaveNoValidate(system)
+			assert.NoError(t, err)
+		}
+		time.Sleep(time.Second)
+		assert.EqualValues(t, 0, hub.GetPendingAlertsCount(), "should have 0 alerts in the pendingAlerts map")
+		for _, system := range systems {
+			system.Set("status", "down")
+			err = hub.SaveNoValidate(system)
+			assert.NoError(t, err)
+		}
+		// after 30 seconds, should have 4 alerts in the pendingAlerts map, no triggered alerts
+		time.Sleep(time.Second * 30)
+		assert.EqualValues(t, 4, hub.GetPendingAlertsCount(), "should have 4 alerts in the pendingAlerts map")
+		triggeredCount, err := hub.CountRecords("alerts", dbx.HashExp{"triggered": true})
+		assert.NoError(t, err)
+		assert.EqualValues(t, 0, triggeredCount, "should have 0 alert triggered")
+		assert.EqualValues(t, 0, hub.TestMailer.TotalSend(), "should have 0 messages sent")
+		// after 1:30 seconds, should have 1 triggered alert and 3 pending alerts
+		time.Sleep(time.Second * 60)
+		assert.EqualValues(t, 3, hub.GetPendingAlertsCount(), "should have 3 alerts in the pendingAlerts map")
+		triggeredCount, err = hub.CountRecords("alerts", dbx.HashExp{"triggered": true})
+		assert.NoError(t, err)
+		assert.EqualValues(t, 1, triggeredCount, "should have 1 alert triggered")
+		assert.EqualValues(t, 1, hub.TestMailer.TotalSend(), "should have 1 messages sent")
+		// after 2:30 seconds, should have 2 triggered alerts and 2 pending alerts
+		time.Sleep(time.Second * 60)
+		assert.EqualValues(t, 2, hub.GetPendingAlertsCount(), "should have 2 alerts in the pendingAlerts map")
+		triggeredCount, err = hub.CountRecords("alerts", dbx.HashExp{"triggered": true})
+		assert.NoError(t, err)
+		assert.EqualValues(t, 2, triggeredCount, "should have 2 alert triggered")
+		assert.EqualValues(t, 2, hub.TestMailer.TotalSend(), "should have 2 messages sent")
+		// now we will bring the remaning systems back up
+		for _, system := range systems {
+			system.Set("status", "up")
+			err = hub.SaveNoValidate(system)
+			assert.NoError(t, err)
+		}
+		time.Sleep(time.Second)
+		// should have 0 alerts in the pendingAlerts map and 0 alerts triggered
+		assert.EqualValues(t, 0, hub.GetPendingAlertsCount(), "should have 0 alerts in the pendingAlerts map")
+		triggeredCount, err = hub.CountRecords("alerts", dbx.HashExp{"triggered": true})
+		assert.NoError(t, err)
+		assert.Zero(t, triggeredCount, "should have 0 alert triggered")
+		// 4 messages sent, 2 down alerts and 2 up alerts for first 2 systems
+		assert.EqualValues(t, 4, hub.TestMailer.TotalSend(), "should have 4 messages sent")
+	})
+}
+func TestStatusAlertRecoveryBeforeDeadline(t *testing.T) {
+	hub, user := beszelTests.GetHubWithUser(t)
+	defer hub.Cleanup()
+
+	// Ensure user settings have an email
+	userSettings, _ := hub.FindFirstRecordByFilter("user_settings", "user={:user}", map[string]any{"user": user.Id})
+	userSettings.Set("settings", `{"emails":["test@example.com"],"webhooks":[]}`)
+	hub.Save(userSettings)
+
+	// Initial email count
+	initialEmailCount := hub.TestMailer.TotalSend()
+
+	systemCollection, _ := hub.FindCollectionByNameOrId("systems")
+	system := core.NewRecord(systemCollection)
+	system.Set("name", "test-system")
+	system.Set("status", "up")
+	system.Set("host", "127.0.0.1")
+	system.Set("users", []string{user.Id})
+	hub.Save(system)
+
+	alertCollection, _ := hub.FindCollectionByNameOrId("alerts")
+	alert := core.NewRecord(alertCollection)
+	alert.Set("user", user.Id)
+	alert.Set("system", system.Id)
+	alert.Set("name", "Status")
+	alert.Set("triggered", false)
+	alert.Set("min", 1)
+	hub.Save(alert)
+
+	am := hub.AlertManager
+
+	// 1. System goes down
+	am.HandleStatusAlerts("down", system)
+	assert.Equal(t, 1, am.GetPendingAlertsCount(), "Alert should be scheduled")
+
+	// 2. System goes up BEFORE delay expires
+	// Triggering HandleStatusAlerts("up") SHOULD NOT send an alert.
+	am.HandleStatusAlerts("up", system)
+
+	assert.Equal(t, 0, am.GetPendingAlertsCount(), "Alert should be canceled if system recovers before delay expires")
+
+	// Verify that NO email was sent.
+	assert.Equal(t, initialEmailCount, hub.TestMailer.TotalSend(), "Recovery notification should not be sent if system never went down")
+
+}
+
+func TestStatusAlertNormalRecovery(t *testing.T) {
+	hub, user := beszelTests.GetHubWithUser(t)
+	defer hub.Cleanup()
+
+	// Ensure user settings have an email
+	userSettings, _ := hub.FindFirstRecordByFilter("user_settings", "user={:user}", map[string]any{"user": user.Id})
+	userSettings.Set("settings", `{"emails":["test@example.com"],"webhooks":[]}`)
+	hub.Save(userSettings)
+
+	systemCollection, _ := hub.FindCollectionByNameOrId("systems")
+	system := core.NewRecord(systemCollection)
+	system.Set("name", "test-system")
+	system.Set("status", "up")
+	system.Set("host", "127.0.0.1")
+	system.Set("users", []string{user.Id})
+	hub.Save(system)
+
+	alertCollection, _ := hub.FindCollectionByNameOrId("alerts")
+	alert := core.NewRecord(alertCollection)
+	alert.Set("user", user.Id)
+	alert.Set("system", system.Id)
+	alert.Set("name", "Status")
+	alert.Set("triggered", true) // System was confirmed DOWN
+	hub.Save(alert)
+
+	am := hub.AlertManager
+	initialEmailCount := hub.TestMailer.TotalSend()
+
+	// System goes up
+	am.HandleStatusAlerts("up", system)
+
+	// Verify that an email WAS sent (normal recovery).
+	assert.Equal(t, initialEmailCount+1, hub.TestMailer.TotalSend(), "Recovery notification should be sent if system was triggered as down")
+
+}
+
+func TestHandleStatusAlertsDoesNotSendRecoveryWhileDownIsOnlyPending(t *testing.T) {
+	hub, user := beszelTests.GetHubWithUser(t)
+	defer hub.Cleanup()
+
+	userSettings, err := hub.FindFirstRecordByFilter("user_settings", "user={:user}", map[string]any{"user": user.Id})
+	require.NoError(t, err)
+	userSettings.Set("settings", `{"emails":["test@example.com"],"webhooks":[]}`)
+	require.NoError(t, hub.Save(userSettings))
+
+	systemCollection, err := hub.FindCollectionByNameOrId("systems")
+	require.NoError(t, err)
+	system := core.NewRecord(systemCollection)
+	system.Set("name", "test-system")
+	system.Set("status", "up")
+	system.Set("host", "127.0.0.1")
+	system.Set("users", []string{user.Id})
+	require.NoError(t, hub.Save(system))
+
+	alertCollection, err := hub.FindCollectionByNameOrId("alerts")
+	require.NoError(t, err)
+	alert := core.NewRecord(alertCollection)
+	alert.Set("user", user.Id)
+	alert.Set("system", system.Id)
+	alert.Set("name", "Status")
+	alert.Set("triggered", false)
+	alert.Set("min", 1)
+	require.NoError(t, hub.Save(alert))
+
+	initialEmailCount := hub.TestMailer.TotalSend()
+	am := alerts.NewTestAlertManagerWithoutWorker(hub)
+
+	require.NoError(t, am.HandleStatusAlerts("down", system))
+	assert.Equal(t, 1, am.GetPendingAlertsCount(), "down transition should register a pending alert immediately")
+
+	require.NoError(t, am.HandleStatusAlerts("up", system))
+	assert.Zero(t, am.GetPendingAlertsCount(), "recovery should cancel the pending down alert")
+	assert.Equal(t, initialEmailCount, hub.TestMailer.TotalSend(), "recovery notification should not be sent before a down alert triggers")
+
+	alertRecord, err := hub.FindRecordById("alerts", alert.Id)
+	require.NoError(t, err)
+	assert.False(t, alertRecord.GetBool("triggered"), "alert should remain untriggered when downtime never matured")
+}
+
+func TestStatusAlertTimerCancellationPreventsBoundaryDelivery(t *testing.T) {
+	synctest.Test(t, func(t *testing.T) {
+		hub, user := beszelTests.GetHubWithUser(t)
+		defer hub.Cleanup()
+
+		userSettings, err := hub.FindFirstRecordByFilter("user_settings", "user={:user}", map[string]any{"user": user.Id})
+		require.NoError(t, err)
+		userSettings.Set("settings", `{"emails":["test@example.com"],"webhooks":[]}`)
+		require.NoError(t, hub.Save(userSettings))
+
+		systemCollection, err := hub.FindCollectionByNameOrId("systems")
+		require.NoError(t, err)
+		system := core.NewRecord(systemCollection)
+		system.Set("name", "test-system")
+		system.Set("status", "up")
+		system.Set("host", "127.0.0.1")
+		system.Set("users", []string{user.Id})
+		require.NoError(t, hub.Save(system))
+
+		alertCollection, err := hub.FindCollectionByNameOrId("alerts")
+		require.NoError(t, err)
+		alert := core.NewRecord(alertCollection)
+		alert.Set("user", user.Id)
+		alert.Set("system", system.Id)
+		alert.Set("name", "Status")
+		alert.Set("triggered", false)
+		alert.Set("min", 1)
+		require.NoError(t, hub.Save(alert))
+
+		initialEmailCount := hub.TestMailer.TotalSend()
+		am := alerts.NewTestAlertManagerWithoutWorker(hub)
+
+		require.NoError(t, am.HandleStatusAlerts("down", system))
+		assert.Equal(t, 1, am.GetPendingAlertsCount(), "down transition should register a pending alert immediately")
+		require.True(t, am.ResetPendingAlertTimer(alert.Id, 25*time.Millisecond), "test should shorten the pending alert timer")
+
+		time.Sleep(10 * time.Millisecond)
+		require.NoError(t, am.HandleStatusAlerts("up", system))
+		assert.Zero(t, am.GetPendingAlertsCount(), "recovery should remove the pending alert before the timer callback runs")
+
+		time.Sleep(40 * time.Millisecond)
+		assert.Equal(t, initialEmailCount, hub.TestMailer.TotalSend(), "timer callback should not deliver after recovery cancels the pending alert")
+
+		alertRecord, err := hub.FindRecordById("alerts", alert.Id)
+		require.NoError(t, err)
+		assert.False(t, alertRecord.GetBool("triggered"), "alert should remain untriggered when cancellation wins the timer race")
+
+		time.Sleep(time.Minute)
+		synctest.Wait()
+	})
+}
+
+func TestStatusAlertDownFiresAfterDelayExpires(t *testing.T) {
+	hub, user := beszelTests.GetHubWithUser(t)
+	defer hub.Cleanup()
+
+	userSettings, err := hub.FindFirstRecordByFilter("user_settings", "user={:user}", map[string]any{"user": user.Id})
+	require.NoError(t, err)
+	userSettings.Set("settings", `{"emails":["test@example.com"],"webhooks":[]}`)
+	require.NoError(t, hub.Save(userSettings))
+
+	systemCollection, err := hub.FindCollectionByNameOrId("systems")
+	require.NoError(t, err)
+	system := core.NewRecord(systemCollection)
+	system.Set("name", "test-system")
+	system.Set("status", "up")
+	system.Set("host", "127.0.0.1")
+	system.Set("users", []string{user.Id})
+	require.NoError(t, hub.Save(system))
+
+	alertCollection, err := hub.FindCollectionByNameOrId("alerts")
+	require.NoError(t, err)
+	alert := core.NewRecord(alertCollection)
+	alert.Set("user", user.Id)
+	alert.Set("system", system.Id)
+	alert.Set("name", "Status")
+	alert.Set("triggered", false)
+	alert.Set("min", 1)
+	require.NoError(t, hub.Save(alert))
+
+	initialEmailCount := hub.TestMailer.TotalSend()
+	am := alerts.NewTestAlertManagerWithoutWorker(hub)
+
+	require.NoError(t, am.HandleStatusAlerts("down", system))
+	assert.Equal(t, 1, am.GetPendingAlertsCount(), "alert should be pending after system goes down")
+
+	// Expire the pending alert and process it
+	am.ForceExpirePendingAlerts()
+	processed, err := am.ProcessPendingAlerts()
+	require.NoError(t, err)
+	assert.Len(t, processed, 1, "one alert should have been processed")
+	assert.Equal(t, 0, am.GetPendingAlertsCount(), "pending alert should be consumed after processing")
+
+	// Verify down email was sent
+	assert.Equal(t, initialEmailCount+1, hub.TestMailer.TotalSend(), "down notification should be sent after delay expires")
+
+	// Verify triggered flag is set in the DB
+	alertRecord, err := hub.FindRecordById("alerts", alert.Id)
+	require.NoError(t, err)
+	assert.True(t, alertRecord.GetBool("triggered"), "alert should be marked triggered after downtime matures")
+}
+
+func TestStatusAlertMultipleUsersRespectDifferentMinutes(t *testing.T) {
+	synctest.Test(t, func(t *testing.T) {
+		hub, user1 := beszelTests.GetHubWithUser(t)
+		defer hub.Cleanup()
+
+		setStatusAlertEmail(t, hub, user1.Id, "user1@example.com")
+
+		user2, err := beszelTests.CreateUser(hub, "user2@example.com", "password")
+		require.NoError(t, err)
+		_, err = beszelTests.CreateRecord(hub, "user_settings", map[string]any{
+			"user": user2.Id,
+			"settings": map[string]any{
+				"emails":   []string{"user2@example.com"},
+				"webhooks": []string{},
+			},
+		})
+		require.NoError(t, err)
+
+		system, err := beszelTests.CreateRecord(hub, "systems", map[string]any{
+			"name":  "shared-system",
+			"users": []string{user1.Id, user2.Id},
+			"host":  "127.0.0.1",
+		})
+		require.NoError(t, err)
+		system.Set("status", "up")
+		require.NoError(t, hub.SaveNoValidate(system))
+
+		alertUser1, err := beszelTests.CreateRecord(hub, "alerts", map[string]any{
+			"name":   "Status",
+			"system": system.Id,
+			"user":   user1.Id,
+			"min":    1,
+		})
+		require.NoError(t, err)
+		alertUser2, err := beszelTests.CreateRecord(hub, "alerts", map[string]any{
+			"name":   "Status",
+			"system": system.Id,
+			"user":   user2.Id,
+			"min":    2,
+		})
+		require.NoError(t, err)
+
+		time.Sleep(10 * time.Millisecond)
+
+		system.Set("status", "down")
+		require.NoError(t, hub.SaveNoValidate(system))
+
+		assert.Equal(t, 2, hub.GetPendingAlertsCount(), "both user alerts should be pending after the system goes down")
+
+		time.Sleep(59 * time.Second)
+		synctest.Wait()
+		assert.Zero(t, hub.TestMailer.TotalSend(), "no messages should be sent before the earliest alert minute elapses")
+
+		time.Sleep(2 * time.Second)
+		synctest.Wait()
+
+		messages := hub.TestMailer.Messages()
+		require.Len(t, messages, 1, "only the first user's alert should send after one minute")
+		require.Len(t, messages[0].To, 1)
+		assert.Equal(t, "user1@example.com", messages[0].To[0].Address)
+		assert.Contains(t, messages[0].Subject, "Connection to shared-system is down")
+		assert.Equal(t, 1, hub.GetPendingAlertsCount(), "the later user alert should still be pending")
+
+		time.Sleep(58 * time.Second)
+		synctest.Wait()
+		assert.Equal(t, 1, hub.TestMailer.TotalSend(), "the second user's alert should still be waiting before two minutes")
+
+		time.Sleep(2 * time.Second)
+		synctest.Wait()
+
+		messages = hub.TestMailer.Messages()
+		require.Len(t, messages, 2, "both users should eventually receive their own status alert")
+		require.Len(t, messages[1].To, 1)
+		assert.Equal(t, "user2@example.com", messages[1].To[0].Address)
+		assert.Contains(t, messages[1].Subject, "Connection to shared-system is down")
+		assert.Zero(t, hub.GetPendingAlertsCount(), "all pending alerts should be consumed after both timers fire")
+
+		alertUser1, err = hub.FindRecordById("alerts", alertUser1.Id)
+		require.NoError(t, err)
+		assert.True(t, alertUser1.GetBool("triggered"), "user1 alert should be marked triggered after delivery")
+
+		alertUser2, err = hub.FindRecordById("alerts", alertUser2.Id)
+		require.NoError(t, err)
+		assert.True(t, alertUser2.GetBool("triggered"), "user2 alert should be marked triggered after delivery")
+	})
+}
+
+func TestStatusAlertMultipleUsersRecoveryBetweenMinutesOnlyAlertsEarlierUser(t *testing.T) {
+	synctest.Test(t, func(t *testing.T) {
+		hub, user1 := beszelTests.GetHubWithUser(t)
+		defer hub.Cleanup()
+
+		setStatusAlertEmail(t, hub, user1.Id, "user1@example.com")
+
+		user2, err := beszelTests.CreateUser(hub, "user2@example.com", "password")
+		require.NoError(t, err)
+		_, err = beszelTests.CreateRecord(hub, "user_settings", map[string]any{
+			"user": user2.Id,
+			"settings": map[string]any{
+				"emails":   []string{"user2@example.com"},
+				"webhooks": []string{},
+			},
+		})
+		require.NoError(t, err)
+
+		system, err := beszelTests.CreateRecord(hub, "systems", map[string]any{
+			"name":  "shared-system",
+			"users": []string{user1.Id, user2.Id},
+			"host":  "127.0.0.1",
+		})
+		require.NoError(t, err)
+		system.Set("status", "up")
+		require.NoError(t, hub.SaveNoValidate(system))
+
+		alertUser1, err := beszelTests.CreateRecord(hub, "alerts", map[string]any{
+			"name":   "Status",
+			"system": system.Id,
+			"user":   user1.Id,
+			"min":    1,
+		})
+		require.NoError(t, err)
+		alertUser2, err := beszelTests.CreateRecord(hub, "alerts", map[string]any{
+			"name":   "Status",
+			"system": system.Id,
+			"user":   user2.Id,
+			"min":    2,
+		})
+		require.NoError(t, err)
+
+		time.Sleep(10 * time.Millisecond)
+
+		system.Set("status", "down")
+		require.NoError(t, hub.SaveNoValidate(system))
+
+		time.Sleep(61 * time.Second)
+		synctest.Wait()
+
+		messages := hub.TestMailer.Messages()
+		require.Len(t, messages, 1, "the first user's down alert should send before recovery")
+		require.Len(t, messages[0].To, 1)
+		assert.Equal(t, "user1@example.com", messages[0].To[0].Address)
+		assert.Contains(t, messages[0].Subject, "Connection to shared-system is down")
+		assert.Equal(t, 1, hub.GetPendingAlertsCount(), "the second user's alert should still be pending")
+
+		system.Set("status", "up")
+		require.NoError(t, hub.SaveNoValidate(system))
+
+		time.Sleep(time.Second)
+		synctest.Wait()
+
+		messages = hub.TestMailer.Messages()
+		require.Len(t, messages, 2, "recovery should notify only the user whose down alert had already triggered")
+		for _, message := range messages {
+			require.Len(t, message.To, 1)
+			assert.Equal(t, "user1@example.com", message.To[0].Address)
+		}
+		assert.Contains(t, messages[1].Subject, "Connection to shared-system is up")
+		assert.Zero(t, hub.GetPendingAlertsCount(), "recovery should cancel the later user's pending alert")
+
+		time.Sleep(61 * time.Second)
+		synctest.Wait()
+
+		messages = hub.TestMailer.Messages()
+		require.Len(t, messages, 2, "user2 should never receive a down alert once recovery cancels the pending timer")
+
+		alertUser1, err = hub.FindRecordById("alerts", alertUser1.Id)
+		require.NoError(t, err)
+		assert.False(t, alertUser1.GetBool("triggered"), "user1 alert should be cleared after recovery")
+
+		alertUser2, err = hub.FindRecordById("alerts", alertUser2.Id)
+		require.NoError(t, err)
+		assert.False(t, alertUser2.GetBool("triggered"), "user2 alert should remain untriggered because it never fired")
+	})
+}
+
+func TestStatusAlertDuplicateDownCallIsIdempotent(t *testing.T) {
+	hub, user := beszelTests.GetHubWithUser(t)
+	defer hub.Cleanup()
+
+	userSettings, err := hub.FindFirstRecordByFilter("user_settings", "user={:user}", map[string]any{"user": user.Id})
+	require.NoError(t, err)
+	userSettings.Set("settings", `{"emails":["test@example.com"],"webhooks":[]}`)
+	require.NoError(t, hub.Save(userSettings))
+
+	systemCollection, err := hub.FindCollectionByNameOrId("systems")
+	require.NoError(t, err)
+	system := core.NewRecord(systemCollection)
+	system.Set("name", "test-system")
+	system.Set("status", "up")
+	system.Set("host", "127.0.0.1")
+	system.Set("users", []string{user.Id})
+	require.NoError(t, hub.Save(system))
+
+	alertCollection, err := hub.FindCollectionByNameOrId("alerts")
+	require.NoError(t, err)
+	alert := core.NewRecord(alertCollection)
+	alert.Set("user", user.Id)
+	alert.Set("system", system.Id)
+	alert.Set("name", "Status")
+	alert.Set("triggered", false)
+	alert.Set("min", 5)
+	require.NoError(t, hub.Save(alert))
+
+	am := alerts.NewTestAlertManagerWithoutWorker(hub)
+
+	require.NoError(t, am.HandleStatusAlerts("down", system))
+	require.NoError(t, am.HandleStatusAlerts("down", system))
+	require.NoError(t, am.HandleStatusAlerts("down", system))
+
+	assert.Equal(t, 1, am.GetPendingAlertsCount(), "repeated down calls should not schedule duplicate pending alerts")
+}
+
+func TestStatusAlertNoAlertRecord(t *testing.T) {
+	hub, user := beszelTests.GetHubWithUser(t)
+	defer hub.Cleanup()
+
+	systemCollection, err := hub.FindCollectionByNameOrId("systems")
+	require.NoError(t, err)
+	system := core.NewRecord(systemCollection)
+	system.Set("name", "test-system")
+	system.Set("status", "up")
+	system.Set("host", "127.0.0.1")
+	system.Set("users", []string{user.Id})
+	require.NoError(t, hub.Save(system))
+
+	// No Status alert record created for this system
+	initialEmailCount := hub.TestMailer.TotalSend()
+	am := alerts.NewTestAlertManagerWithoutWorker(hub)
+
+	require.NoError(t, am.HandleStatusAlerts("down", system))
+	assert.Equal(t, 0, am.GetPendingAlertsCount(), "no pending alert when no alert record exists")
+
+	require.NoError(t, am.HandleStatusAlerts("up", system))
+	assert.Equal(t, initialEmailCount, hub.TestMailer.TotalSend(), "no email when no alert record exists")
+}
+
+func TestRestorePendingStatusAlertsRequeuesDownSystemsAfterRestart(t *testing.T) {
+	hub, user := beszelTests.GetHubWithUser(t)
+	defer hub.Cleanup()
+
+	userSettings, err := hub.FindFirstRecordByFilter("user_settings", "user={:user}", map[string]any{"user": user.Id})
+	require.NoError(t, err)
+	userSettings.Set("settings", `{"emails":["test@example.com"],"webhooks":[]}`)
+	require.NoError(t, hub.Save(userSettings))
+
+	systems, err := beszelTests.CreateSystems(hub, 1, user.Id, "down")
+	require.NoError(t, err)
+	system := systems[0]
+
+	alertCollection, err := hub.FindCollectionByNameOrId("alerts")
+	require.NoError(t, err)
+	alert := core.NewRecord(alertCollection)
+	alert.Set("user", user.Id)
+	alert.Set("system", system.Id)
+	alert.Set("name", "Status")
+	alert.Set("triggered", false)
+	alert.Set("min", 1)
+	require.NoError(t, hub.Save(alert))
+
+	initialEmailCount := hub.TestMailer.TotalSend()
+	am := alerts.NewTestAlertManagerWithoutWorker(hub)
+
+	require.NoError(t, am.RestorePendingStatusAlerts())
+	assert.Equal(t, 1, am.GetPendingAlertsCount(), "startup restore should requeue a pending down alert for a system still marked down")
+
+	am.ForceExpirePendingAlerts()
+	processed, err := am.ProcessPendingAlerts()
+	require.NoError(t, err)
+	assert.Len(t, processed, 1, "restored pending alert should be processable after the delay expires")
+	assert.Equal(t, initialEmailCount+1, hub.TestMailer.TotalSend(), "restored pending alert should send the down notification")
+
+	alertRecord, err := hub.FindRecordById("alerts", alert.Id)
+	require.NoError(t, err)
+	assert.True(t, alertRecord.GetBool("triggered"), "restored pending alert should mark the alert as triggered once delivered")
+}
+
+func TestRestorePendingStatusAlertsSkipsNonDownOrAlreadyTriggeredAlerts(t *testing.T) {
+	hub, user := beszelTests.GetHubWithUser(t)
+	defer hub.Cleanup()
+
+	systemsDown, err := beszelTests.CreateSystems(hub, 2, user.Id, "down")
+	require.NoError(t, err)
+	systemDownPending := systemsDown[0]
+	systemDownTriggered := systemsDown[1]
+
+	systemUp, err := beszelTests.CreateRecord(hub, "systems", map[string]any{
+		"name":   "up-system",
+		"users":  []string{user.Id},
+		"host":   "127.0.0.2",
+		"status": "up",
+	})
+	require.NoError(t, err)
+
+	_, err = beszelTests.CreateRecord(hub, "alerts", map[string]any{
+		"name":      "Status",
+		"system":    systemDownPending.Id,
+		"user":      user.Id,
+		"min":       1,
+		"triggered": false,
+	})
+	require.NoError(t, err)
+
+	_, err = beszelTests.CreateRecord(hub, "alerts", map[string]any{
+		"name":      "Status",
+		"system":    systemUp.Id,
+		"user":      user.Id,
+		"min":       1,
+		"triggered": false,
+	})
+	require.NoError(t, err)
+
+	_, err = beszelTests.CreateRecord(hub, "alerts", map[string]any{
+		"name":      "Status",
+		"system":    systemDownTriggered.Id,
+		"user":      user.Id,
+		"min":       1,
+		"triggered": true,
+	})
+	require.NoError(t, err)
+
+	am := alerts.NewTestAlertManagerWithoutWorker(hub)
+	require.NoError(t, am.RestorePendingStatusAlerts())
+	assert.Equal(t, 1, am.GetPendingAlertsCount(), "only untriggered alerts for currently down systems should be restored")
+}
+
+func TestRestorePendingStatusAlertsIsIdempotent(t *testing.T) {
+	hub, user := beszelTests.GetHubWithUser(t)
+	defer hub.Cleanup()
+
+	systems, err := beszelTests.CreateSystems(hub, 1, user.Id, "down")
+	require.NoError(t, err)
+	system := systems[0]
+
+	_, err = beszelTests.CreateRecord(hub, "alerts", map[string]any{
+		"name":      "Status",
+		"system":    system.Id,
+		"user":      user.Id,
+		"min":       1,
+		"triggered": false,
+	})
+	require.NoError(t, err)
+
+	am := alerts.NewTestAlertManagerWithoutWorker(hub)
+	require.NoError(t, am.RestorePendingStatusAlerts())
+	require.NoError(t, am.RestorePendingStatusAlerts())
+
+	assert.Equal(t, 1, am.GetPendingAlertsCount(), "restoring twice should not create duplicate pending alerts")
+	am.ForceExpirePendingAlerts()
+	processed, err := am.ProcessPendingAlerts()
+	require.NoError(t, err)
+	assert.Len(t, processed, 1, "restored alert should still be processable exactly once")
+	assert.Zero(t, am.GetPendingAlertsCount(), "processing the restored alert should empty the pending map")
+}
+
+func TestResolveStatusAlertsFixesStaleTriggered(t *testing.T) {
+	hub, user := beszelTests.GetHubWithUser(t)
+	defer hub.Cleanup()
+
+	// CreateSystems uses SaveNoValidate after initial save to bypass the
+	// onRecordCreate hook that forces status = "pending".
+	systems, err := beszelTests.CreateSystems(hub, 1, user.Id, "up")
+	require.NoError(t, err)
+	system := systems[0]
+
+	alertCollection, err := hub.FindCollectionByNameOrId("alerts")
+	require.NoError(t, err)
+	alert := core.NewRecord(alertCollection)
+	alert.Set("user", user.Id)
+	alert.Set("system", system.Id)
+	alert.Set("name", "Status")
+	alert.Set("triggered", true) // Stale: system is up but alert still says triggered
+	require.NoError(t, hub.Save(alert))
+
+	// resolveStatusAlerts should clear the stale triggered flag
+	require.NoError(t, alerts.ResolveStatusAlerts(hub))
+
+	alertRecord, err := hub.FindRecordById("alerts", alert.Id)
+	require.NoError(t, err)
+	assert.False(t, alertRecord.GetBool("triggered"), "stale triggered flag should be cleared when system is up")
+}
+func TestResolveStatusAlerts(t *testing.T) {
+	hub, user := beszelTests.GetHubWithUser(t)
+	defer hub.Cleanup()
+
+	// Create a systemUp
+	systemUp, err := beszelTests.CreateRecord(hub, "systems", map[string]any{
+		"name":   "test-system",
+		"users":  []string{user.Id},
+		"host":   "127.0.0.1",
+		"status": "up",
+	})
+	assert.NoError(t, err)
+
+	systemDown, err := beszelTests.CreateRecord(hub, "systems", map[string]any{
+		"name":   "test-system-2",
+		"users":  []string{user.Id},
+		"host":   "127.0.0.2",
+		"status": "up",
+	})
+	assert.NoError(t, err)
+
+	// Create a status alertUp for the system
+	alertUp, err := beszelTests.CreateRecord(hub, "alerts", map[string]any{
+		"name":   "Status",
+		"system": systemUp.Id,
+		"user":   user.Id,
+		"min":    1,
+	})
+	assert.NoError(t, err)
+
+	alertDown, err := beszelTests.CreateRecord(hub, "alerts", map[string]any{
+		"name":   "Status",
+		"system": systemDown.Id,
+		"user":   user.Id,
+		"min":    1,
+	})
+	assert.NoError(t, err)
+
+	// Verify alert is not triggered initially
+	assert.False(t, alertUp.GetBool("triggered"), "Alert should not be triggered initially")
+
+	// Set the system to 'up' (this should not trigger the alert)
+	systemUp.Set("status", "up")
+	err = hub.SaveNoValidate(systemUp)
+	assert.NoError(t, err)
+
+	systemDown.Set("status", "down")
+	err = hub.SaveNoValidate(systemDown)
+	assert.NoError(t, err)
+
+	// Wait a moment for any processing
+	time.Sleep(10 * time.Millisecond)
+
+	// Verify alertUp is still not triggered after setting system to up
+	alertUp, err = hub.FindFirstRecordByFilter("alerts", "id={:id}", dbx.Params{"id": alertUp.Id})
+	assert.NoError(t, err)
+	assert.False(t, alertUp.GetBool("triggered"), "Alert should not be triggered when system is up")
+
+	// Manually set both alerts triggered to true
+	alertUp.Set("triggered", true)
+	err = hub.SaveNoValidate(alertUp)
+	assert.NoError(t, err)
+	alertDown.Set("triggered", true)
+	err = hub.SaveNoValidate(alertDown)
+	assert.NoError(t, err)
+
+	// Verify we have exactly one alert with triggered true
+	triggeredCount, err := hub.CountRecords("alerts", dbx.HashExp{"triggered": true})
+	assert.NoError(t, err)
+	assert.EqualValues(t, 2, triggeredCount, "Should have exactly two alerts with triggered true")
+
+	// Verify the specific alertUp is triggered
+	alertUp, err = hub.FindFirstRecordByFilter("alerts", "id={:id}", dbx.Params{"id": alertUp.Id})
+	assert.NoError(t, err)
+	assert.True(t, alertUp.GetBool("triggered"), "Alert should be triggered")
+
+	// Verify we have two unresolved alert history records
+	alertHistoryCount, err := hub.CountRecords("alerts_history", dbx.HashExp{"resolved": ""})
+	assert.NoError(t, err)
+	assert.EqualValues(t, 2, alertHistoryCount, "Should have exactly two unresolved alert history records")
+
+	err = alerts.ResolveStatusAlerts(hub)
+	assert.NoError(t, err)
+
+	// Verify alertUp is not triggered after resolving
+	alertUp, err = hub.FindFirstRecordByFilter("alerts", "id={:id}", dbx.Params{"id": alertUp.Id})
+	assert.NoError(t, err)
+	assert.False(t, alertUp.GetBool("triggered"), "Alert should not be triggered after resolving")
+	// Verify alertDown is still triggered
+	alertDown, err = hub.FindFirstRecordByFilter("alerts", "id={:id}", dbx.Params{"id": alertDown.Id})
+	assert.NoError(t, err)
+	assert.True(t, alertDown.GetBool("triggered"), "Alert should still be triggered after resolving")
+
+	// Verify we have one unresolved alert history record
+	alertHistoryCount, err = hub.CountRecords("alerts_history", dbx.HashExp{"resolved": ""})
+	assert.NoError(t, err)
+	assert.EqualValues(t, 1, alertHistoryCount, "Should have exactly one unresolved alert history record")
+
+}
+
+func TestAlertsHistoryStatus(t *testing.T) {
+	synctest.Test(t, func(t *testing.T) {
+		hub, user := beszelTests.GetHubWithUser(t)
+		defer hub.Cleanup()
+
+		// Create a system
+		systems, err := beszelTests.CreateSystems(hub, 1, user.Id, "up")
+		assert.NoError(t, err)
+		system := systems[0]
+
+		// Create a status alertRecord for the system
+		alertRecord, err := beszelTests.CreateRecord(hub, "alerts", map[string]any{
+			"name":   "Status",
+			"system": system.Id,
+			"user":   user.Id,
+			"min":    1,
+		})
+		assert.NoError(t, err)
+
+		// Verify alert is not triggered initially
+		assert.False(t, alertRecord.GetBool("triggered"), "Alert should not be triggered initially")
+
+		// Set the system to 'down' (this should trigger the alert)
+		system.Set("status", "down")
+		err = hub.Save(system)
+		assert.NoError(t, err)
+
+		time.Sleep(time.Second * 30)
+		synctest.Wait()
+
+		alertFresh, _ := hub.FindRecordById("alerts", alertRecord.Id)
+		assert.False(t, alertFresh.GetBool("triggered"), "Alert should not be triggered after 30 seconds")
+
+		time.Sleep(time.Minute)
+		synctest.Wait()
+
+		// Verify alert is triggered after setting system to down
+		alertFresh, err = hub.FindRecordById("alerts", alertRecord.Id)
+		assert.NoError(t, err)
+		assert.True(t, alertFresh.GetBool("triggered"), "Alert should be triggered after one minute")
+
+		// Verify we have one unresolved alert history record
+		alertHistoryCount, err := hub.CountRecords("alerts_history", dbx.HashExp{"resolved": ""})
+		assert.NoError(t, err)
+		assert.EqualValues(t, 1, alertHistoryCount, "Should have exactly one unresolved alert history record")
+
+		// Set the system back to 'up' (this should resolve the alert)
+		system.Set("status", "up")
+		err = hub.Save(system)
+		assert.NoError(t, err)
+
+		time.Sleep(time.Second)
+		synctest.Wait()
+
+		// Verify alert is not triggered after setting system back to up
+		alertFresh, err = hub.FindRecordById("alerts", alertRecord.Id)
+		assert.NoError(t, err)
+		assert.False(t, alertFresh.GetBool("triggered"), "Alert should not be triggered after system recovers")
+
+		// Verify the alert history record is resolved
+		alertHistoryCount, err = hub.CountRecords("alerts_history", dbx.HashExp{"resolved": ""})
+		assert.NoError(t, err)
+		assert.EqualValues(t, 0, alertHistoryCount, "Should have no unresolved alert history records")
+	})
+}
+
+func TestStatusAlertClearedBeforeSend(t *testing.T) {
+	synctest.Test(t, func(t *testing.T) {
+		hub, user := beszelTests.GetHubWithUser(t)
+		defer hub.Cleanup()
+
+		// Create a system
+		systems, err := beszelTests.CreateSystems(hub, 1, user.Id, "up")
+		assert.NoError(t, err)
+		system := systems[0]
+
+		// Ensure user settings have an email
+		userSettings, _ := hub.FindFirstRecordByFilter("user_settings", "user={:user}", map[string]any{"user": user.Id})
+		userSettings.Set("settings", `{"emails":["test@example.com"],"webhooks":[]}`)
+		hub.Save(userSettings)
+
+		// Initial email count
+		initialEmailCount := hub.TestMailer.TotalSend()
+
+		// Create a status alertRecord for the system
+		alertRecord, err := beszelTests.CreateRecord(hub, "alerts", map[string]any{
+			"name":   "Status",
+			"system": system.Id,
+			"user":   user.Id,
+			"min":    1,
+		})
+		assert.NoError(t, err)
+
+		// Verify alert is not triggered initially
+		assert.False(t, alertRecord.GetBool("triggered"), "Alert should not be triggered initially")
+
+		// Set the system to 'down' (this should trigger the alert)
+		system.Set("status", "down")
+		err = hub.Save(system)
+		assert.NoError(t, err)
+
+		time.Sleep(time.Second * 30)
+		synctest.Wait()
+
+		// Set system back up to clear the pending alert before it triggers
+		system.Set("status", "up")
+		err = hub.Save(system)
+		assert.NoError(t, err)
+
+		time.Sleep(time.Minute)
+		synctest.Wait()
+
+		// Verify that we have not sent any emails since the system recovered before the alert triggered
+		assert.Equal(t, initialEmailCount, hub.TestMailer.TotalSend(), "No email should be sent if system recovers before alert triggers")
+
+		// Verify alert is not triggered after setting system back to up
+		alertFresh, err := hub.FindRecordById("alerts", alertRecord.Id)
+		assert.NoError(t, err)
+		assert.False(t, alertFresh.GetBool("triggered"), "Alert should not be triggered after system recovers")
+
+		// Verify that no alert history record was created since the alert never triggered
+		alertHistoryCount, err := hub.CountRecords("alerts_history")
+		assert.NoError(t, err)
+		assert.EqualValues(t, 0, alertHistoryCount, "Should have no unresolved alert history records since alert never triggered")
+	})
+}
--- a/internal/alerts/alerts_system.go
+++ b/internal/alerts/alerts_system.go
@@ -11,15 +11,11 @@ import (
 	"github.com/pocketbase/dbx"
 	"github.com/pocketbase/pocketbase/core"
 	"github.com/pocketbase/pocketbase/tools/types"
-	"github.com/spf13/cast"
 )

 func (am *AlertManager) HandleSystemAlerts(systemRecord *core.Record, data *system.CombinedData) error {
-	alertRecords, err := am.hub.FindAllRecords("alerts",
-		dbx.NewExp("system={:system} AND name!='Status'", dbx.Params{"system": systemRecord.Id}),
-	)
-	if err != nil || len(alertRecords) == 0 {
-		// log.Println("no alerts found for system")
+	alerts := am.alertsCache.GetAlertsExcludingNames(systemRecord.Id, "Status")
+	if len(alerts) == 0 {
 		return nil
 	}

@@ -27,8 +23,8 @@ func (am *AlertManager) HandleSystemAlerts(systemRecord *core.Record, data *syst
 	now := systemRecord.GetDateTime("updated").Time().UTC()
 	oldestTime := now

-	for _, alertRecord := range alertRecords {
-		name := alertRecord.GetString("name")
+	for _, alertData := range alerts {
+		name := alertData.Name
 		var val float64
 		unit := "%"

@@ -73,8 +69,8 @@ func (am *AlertManager) HandleSystemAlerts(systemRecord *core.Record, data *syst
 			val = float64(data.Stats.Battery[0])
 		}

-		triggered := alertRecord.GetBool("triggered")
-		threshold := alertRecord.GetFloat("value")
+		triggered := alertData.Triggered
+		threshold := alertData.Value

 		// Battery alert has inverted logic: trigger when value is BELOW threshold
 		lowAlert := isLowAlert(name)
@@ -92,11 +88,11 @@ func (am *AlertManager) HandleSystemAlerts(systemRecord *core.Record, data *syst
 			}
 		}

-		min := max(1, cast.ToUint8(alertRecord.Get("min")))
+		min := max(1, alertData.Min)

 		alert := SystemAlertData{
 			systemRecord: systemRecord,
-			alertRecord:  alertRecord,
+			alertData:    alertData,
 			name:         name,
 			unit:         unit,
 			val:          val,
@@ -129,7 +125,7 @@ func (am *AlertManager) HandleSystemAlerts(systemRecord *core.Record, data *syst
 		Created types.DateTime `db:"created"`
 	}{}

-	err = am.hub.DB().
+	err := am.hub.DB().
 		Select("stats", "created").
 		From("system_stats").
 		Where(dbx.NewExp(
@@ -192,22 +188,24 @@ func (am *AlertManager) HandleSystemAlerts(systemRecord *core.Record, data *syst
 			case "Memory":
 				alert.val += stats.Mem
 			case "Bandwidth":
-				alert.val += stats.NetSent + stats.NetRecv
+				alert.val += float64(stats.Bandwidth[0]+stats.Bandwidth[1]) / (1024 * 1024)
 			case "Disk":
 				if alert.mapSums == nil {
-					alert.mapSums = make(map[string]float32, len(data.Stats.ExtraFs)+1)
+					alert.mapSums = make(map[string]float32, len(stats.ExtraFs)+1)
 				}
 				// add root disk
 				if _, ok := alert.mapSums["root"]; !ok {
 					alert.mapSums["root"] = 0.0
 				}
 				alert.mapSums["root"] += float32(stats.Disk)
-				// add extra disks
-				for key, fs := range data.Stats.ExtraFs {
-					if _, ok := alert.mapSums[key]; !ok {
-						alert.mapSums[key] = 0.0
+				// add extra disks from historical record
+				for key, fs := range stats.ExtraFs {
+					if fs.DiskTotal > 0 {
+						if _, ok := alert.mapSums[key]; !ok {
+							alert.mapSums[key] = 0.0
+						}
+						alert.mapSums[key] += float32(fs.DiskUsed / fs.DiskTotal * 100)
 					}
-					alert.mapSums[key] += float32(fs.DiskUsed / fs.DiskTotal * 100)
 				}
 			case "Temperature":
 				if alert.mapSums == nil {
@@ -342,13 +340,12 @@ func (am *AlertManager) sendSystemAlert(alert SystemAlertData) {
 	}
 	body := fmt.Sprintf("%s averaged %.2f%s for the previous %v %s.", alert.descriptor, alert.val, alert.unit, alert.min, minutesLabel)

-	alert.alertRecord.Set("triggered", alert.triggered)
-	if err := am.hub.Save(alert.alertRecord); err != nil {
+	if err := am.setAlertTriggered(alert.alertData, alert.triggered); err != nil {
 		// app.Logger().Error("failed to save alert record", "err", err)
 		return
 	}
 	am.SendAlert(AlertMessageData{
-		UserID:   alert.alertRecord.GetString("user"),
+		UserID:   alert.alertData.UserID,
 		SystemID: alert.systemRecord.Id,
 		Title:    subject,
 		Message:  body,
--- a/internal/alerts/alerts_system_test.go
+++ b/internal/alerts/alerts_system_test.go
@@ -0,0 +1,218 @@
+//go:build testing
+
+package alerts_test
+
+import (
+	"testing"
+	"testing/synctest"
+	"time"
+
+	"github.com/henrygd/beszel/internal/entities/system"
+	beszelTests "github.com/henrygd/beszel/internal/tests"
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+)
+
+type systemAlertValueSetter[T any] func(info *system.Info, stats *system.Stats, value T)
+
+type systemAlertTestFixture struct {
+	hub     *beszelTests.TestHub
+	alertID string
+	submit  func(*system.CombinedData) error
+}
+
+func createCombinedData[T any](value T, setValue systemAlertValueSetter[T]) *system.CombinedData {
+	var data system.CombinedData
+	setValue(&data.Info, &data.Stats, value)
+	return &data
+}
+
+func newSystemAlertTestFixture(t *testing.T, alertName string, min int, threshold float64) *systemAlertTestFixture {
+	t.Helper()
+
+	hub, user := beszelTests.GetHubWithUser(t)
+
+	systems, err := beszelTests.CreateSystems(hub, 1, user.Id, "up")
+	require.NoError(t, err)
+	systemRecord := systems[0]
+
+	sysManagerSystem, err := hub.GetSystemManager().GetSystemFromStore(systemRecord.Id)
+	require.NoError(t, err)
+	require.NotNil(t, sysManagerSystem)
+	sysManagerSystem.StopUpdater()
+
+	userSettings, err := hub.FindFirstRecordByFilter("user_settings", "user={:user}", map[string]any{"user": user.Id})
+	require.NoError(t, err)
+	userSettings.Set("settings", `{"emails":["test@example.com"],"webhooks":[]}`)
+	require.NoError(t, hub.Save(userSettings))
+
+	alertRecord, err := beszelTests.CreateRecord(hub, "alerts", map[string]any{
+		"name":   alertName,
+		"system": systemRecord.Id,
+		"user":   user.Id,
+		"min":    min,
+		"value":  threshold,
+	})
+	require.NoError(t, err)
+
+	assert.False(t, alertRecord.GetBool("triggered"), "Alert should not be triggered initially")
+
+	alertsCache := hub.GetAlertManager().GetSystemAlertsCache()
+	cachedAlerts := alertsCache.GetAlertsExcludingNames(systemRecord.Id, "Status")
+	assert.Len(t, cachedAlerts, 1, "Alert should be in cache")
+
+	return &systemAlertTestFixture{
+		hub:     hub,
+		alertID: alertRecord.Id,
+		submit: func(data *system.CombinedData) error {
+			_, err := sysManagerSystem.CreateRecords(data)
+			return err
+		},
+	}
+}
+
+func (fixture *systemAlertTestFixture) cleanup() {
+	fixture.hub.Cleanup()
+}
+
+func submitValue[T any](fixture *systemAlertTestFixture, t *testing.T, value T, setValue systemAlertValueSetter[T]) {
+	t.Helper()
+	require.NoError(t, fixture.submit(createCombinedData(value, setValue)))
+}
+
+func (fixture *systemAlertTestFixture) assertTriggered(t *testing.T, triggered bool, message string) {
+	t.Helper()
+
+	alertRecord, err := fixture.hub.FindRecordById("alerts", fixture.alertID)
+	require.NoError(t, err)
+	assert.Equal(t, triggered, alertRecord.GetBool("triggered"), message)
+}
+
+func waitForSystemAlert(d time.Duration) {
+	time.Sleep(d)
+	synctest.Wait()
+}
+
+func testOneMinuteSystemAlert[T any](t *testing.T, alertName string, threshold float64, setValue systemAlertValueSetter[T], triggerValue, resolveValue T) {
+	t.Helper()
+
+	synctest.Test(t, func(t *testing.T) {
+		fixture := newSystemAlertTestFixture(t, alertName, 1, threshold)
+		defer fixture.cleanup()
+
+		submitValue(fixture, t, triggerValue, setValue)
+		waitForSystemAlert(time.Second)
+
+		fixture.assertTriggered(t, true, "Alert should be triggered")
+		assert.Equal(t, 1, fixture.hub.TestMailer.TotalSend(), "An email should have been sent")
+
+		submitValue(fixture, t, resolveValue, setValue)
+		waitForSystemAlert(time.Second)
+
+		fixture.assertTriggered(t, false, "Alert should be untriggered")
+		assert.Equal(t, 2, fixture.hub.TestMailer.TotalSend(), "A second email should have been sent for untriggering the alert")
+
+		waitForSystemAlert(time.Minute)
+	})
+}
+
+func testMultiMinuteSystemAlert[T any](t *testing.T, alertName string, threshold float64, min int, setValue systemAlertValueSetter[T], baselineValue, triggerValue, resolveValue T) {
+	t.Helper()
+
+	synctest.Test(t, func(t *testing.T) {
+		fixture := newSystemAlertTestFixture(t, alertName, min, threshold)
+		defer fixture.cleanup()
+
+		submitValue(fixture, t, baselineValue, setValue)
+		waitForSystemAlert(time.Minute + time.Second)
+		fixture.assertTriggered(t, false, "Alert should not be triggered yet")
+
+		submitValue(fixture, t, triggerValue, setValue)
+		waitForSystemAlert(time.Minute)
+		fixture.assertTriggered(t, false, "Alert should not be triggered until the history window is full")
+
+		submitValue(fixture, t, triggerValue, setValue)
+		waitForSystemAlert(time.Second)
+		fixture.assertTriggered(t, true, "Alert should be triggered")
+		assert.Equal(t, 1, fixture.hub.TestMailer.TotalSend(), "An email should have been sent")
+
+		submitValue(fixture, t, resolveValue, setValue)
+		waitForSystemAlert(time.Second)
+		fixture.assertTriggered(t, false, "Alert should be untriggered")
+		assert.Equal(t, 2, fixture.hub.TestMailer.TotalSend(), "A second email should have been sent for untriggering the alert")
+	})
+}
+
+func setCPUAlertValue(info *system.Info, stats *system.Stats, value float64) {
+	info.Cpu = value
+	stats.Cpu = value
+}
+
+func setMemoryAlertValue(info *system.Info, stats *system.Stats, value float64) {
+	info.MemPct = value
+	stats.MemPct = value
+}
+
+func setDiskAlertValue(info *system.Info, stats *system.Stats, value float64) {
+	info.DiskPct = value
+	stats.DiskPct = value
+}
+
+func setBandwidthAlertValue(info *system.Info, stats *system.Stats, value [2]uint64) {
+	info.BandwidthBytes = value[0] + value[1]
+	stats.Bandwidth = value
+}
+
+func megabytesToBytes(mb uint64) uint64 {
+	return mb * 1024 * 1024
+}
+
+func setGPUAlertValue(info *system.Info, stats *system.Stats, value float64) {
+	info.GpuPct = value
+	stats.GPUData = map[string]system.GPUData{
+		"GPU0": {Usage: value},
+	}
+}
+
+func setTemperatureAlertValue(info *system.Info, stats *system.Stats, value float64) {
+	info.DashboardTemp = value
+	stats.Temperatures = map[string]float64{
+		"Temp0": value,
+	}
+}
+
+func setLoadAvgAlertValue(info *system.Info, stats *system.Stats, value [3]float64) {
+	info.LoadAvg = value
+	stats.LoadAvg = value
+}
+
+func setBatteryAlertValue(info *system.Info, stats *system.Stats, value [2]uint8) {
+	info.Battery = value
+	stats.Battery = value
+}
+
+func TestSystemAlertsOneMin(t *testing.T) {
+	testOneMinuteSystemAlert(t, "CPU", 50, setCPUAlertValue, 51, 49)
+	testOneMinuteSystemAlert(t, "Memory", 50, setMemoryAlertValue, 51, 49)
+	testOneMinuteSystemAlert(t, "Disk", 50, setDiskAlertValue, 51, 49)
+	testOneMinuteSystemAlert(t, "Bandwidth", 50, setBandwidthAlertValue, [2]uint64{megabytesToBytes(26), megabytesToBytes(25)}, [2]uint64{megabytesToBytes(25), megabytesToBytes(24)})
+	testOneMinuteSystemAlert(t, "GPU", 50, setGPUAlertValue, 51, 49)
+	testOneMinuteSystemAlert(t, "Temperature", 70, setTemperatureAlertValue, 71, 69)
+	testOneMinuteSystemAlert(t, "LoadAvg1", 4, setLoadAvgAlertValue, [3]float64{4.1, 0, 0}, [3]float64{3.9, 0, 0})
+	testOneMinuteSystemAlert(t, "LoadAvg5", 4, setLoadAvgAlertValue, [3]float64{0, 4.1, 0}, [3]float64{0, 3.9, 0})
+	testOneMinuteSystemAlert(t, "LoadAvg15", 4, setLoadAvgAlertValue, [3]float64{0, 0, 4.1}, [3]float64{0, 0, 3.9})
+	testOneMinuteSystemAlert(t, "Battery", 20, setBatteryAlertValue, [2]uint8{19, 0}, [2]uint8{21, 0})
+}
+
+func TestSystemAlertsTwoMin(t *testing.T) {
+	testMultiMinuteSystemAlert(t, "CPU", 50, 2, setCPUAlertValue, 10, 51, 48)
+	testMultiMinuteSystemAlert(t, "Memory", 50, 2, setMemoryAlertValue, 10, 51, 48)
+	testMultiMinuteSystemAlert(t, "Disk", 50, 2, setDiskAlertValue, 10, 51, 48)
+	testMultiMinuteSystemAlert(t, "Bandwidth", 50, 2, setBandwidthAlertValue, [2]uint64{megabytesToBytes(10), megabytesToBytes(10)}, [2]uint64{megabytesToBytes(26), megabytesToBytes(25)}, [2]uint64{megabytesToBytes(10), megabytesToBytes(10)})
+	testMultiMinuteSystemAlert(t, "GPU", 50, 2, setGPUAlertValue, 10, 51, 48)
+	testMultiMinuteSystemAlert(t, "Temperature", 70, 2, setTemperatureAlertValue, 10, 71, 67)
+	testMultiMinuteSystemAlert(t, "LoadAvg1", 4, 2, setLoadAvgAlertValue, [3]float64{0, 0, 0}, [3]float64{4.1, 0, 0}, [3]float64{3.5, 0, 0})
+	testMultiMinuteSystemAlert(t, "LoadAvg5", 4, 2, setLoadAvgAlertValue, [3]float64{0, 2, 0}, [3]float64{0, 4.1, 0}, [3]float64{0, 3.5, 0})
+	testMultiMinuteSystemAlert(t, "LoadAvg15", 4, 2, setLoadAvgAlertValue, [3]float64{0, 0, 2}, [3]float64{0, 0, 4.1}, [3]float64{0, 0, 3.5})
+	testMultiMinuteSystemAlert(t, "Battery", 20, 2, setBatteryAlertValue, [2]uint8{21, 0}, [2]uint8{19, 0}, [2]uint8{25, 1})
+}
--- a/internal/alerts/alerts_test.go
+++ b/internal/alerts/alerts_test.go
@@ -12,9 +12,9 @@ import (
 	"testing/synctest"
 	"time"

-	"github.com/henrygd/beszel/internal/alerts"
 	beszelTests "github.com/henrygd/beszel/internal/tests"

+	"github.com/henrygd/beszel/internal/alerts"
 	"github.com/pocketbase/dbx"
 	"github.com/pocketbase/pocketbase/core"
 	pbTests "github.com/pocketbase/pocketbase/tests"
@@ -369,87 +369,6 @@ func TestUserAlertsApi(t *testing.T) {
 	}
 }

-func TestStatusAlerts(t *testing.T) {
-	synctest.Test(t, func(t *testing.T) {
-		hub, user := beszelTests.GetHubWithUser(t)
-		defer hub.Cleanup()
-
-		systems, err := beszelTests.CreateSystems(hub, 4, user.Id, "paused")
-		assert.NoError(t, err)
-
-		var alerts []*core.Record
-		for i, system := range systems {
-			alert, err := beszelTests.CreateRecord(hub, "alerts", map[string]any{
-				"name":   "Status",
-				"system": system.Id,
-				"user":   user.Id,
-				"min":    i + 1,
-			})
-			assert.NoError(t, err)
-			alerts = append(alerts, alert)
-		}
-
-		time.Sleep(10 * time.Millisecond)
-
-		for _, alert := range alerts {
-			assert.False(t, alert.GetBool("triggered"), "Alert should not be triggered immediately")
-		}
-		if hub.TestMailer.TotalSend() != 0 {
-			assert.Zero(t, hub.TestMailer.TotalSend(), "Expected 0 messages, got %d", hub.TestMailer.TotalSend())
-		}
-		for _, system := range systems {
-			assert.EqualValues(t, "paused", system.GetString("status"), "System should be paused")
-		}
-		for _, system := range systems {
-			system.Set("status", "up")
-			err = hub.SaveNoValidate(system)
-			assert.NoError(t, err)
-		}
-		time.Sleep(time.Second)
-		assert.EqualValues(t, 0, hub.GetPendingAlertsCount(), "should have 0 alerts in the pendingAlerts map")
-		for _, system := range systems {
-			system.Set("status", "down")
-			err = hub.SaveNoValidate(system)
-			assert.NoError(t, err)
-		}
-		// after 30 seconds, should have 4 alerts in the pendingAlerts map, no triggered alerts
-		time.Sleep(time.Second * 30)
-		assert.EqualValues(t, 4, hub.GetPendingAlertsCount(), "should have 4 alerts in the pendingAlerts map")
-		triggeredCount, err := hub.CountRecords("alerts", dbx.HashExp{"triggered": true})
-		assert.NoError(t, err)
-		assert.EqualValues(t, 0, triggeredCount, "should have 0 alert triggered")
-		assert.EqualValues(t, 0, hub.TestMailer.TotalSend(), "should have 0 messages sent")
-		// after 1:30 seconds, should have 1 triggered alert and 3 pending alerts
-		time.Sleep(time.Second * 60)
-		assert.EqualValues(t, 3, hub.GetPendingAlertsCount(), "should have 3 alerts in the pendingAlerts map")
-		triggeredCount, err = hub.CountRecords("alerts", dbx.HashExp{"triggered": true})
-		assert.NoError(t, err)
-		assert.EqualValues(t, 1, triggeredCount, "should have 1 alert triggered")
-		assert.EqualValues(t, 1, hub.TestMailer.TotalSend(), "should have 1 messages sent")
-		// after 2:30 seconds, should have 2 triggered alerts and 2 pending alerts
-		time.Sleep(time.Second * 60)
-		assert.EqualValues(t, 2, hub.GetPendingAlertsCount(), "should have 2 alerts in the pendingAlerts map")
-		triggeredCount, err = hub.CountRecords("alerts", dbx.HashExp{"triggered": true})
-		assert.NoError(t, err)
-		assert.EqualValues(t, 2, triggeredCount, "should have 2 alert triggered")
-		assert.EqualValues(t, 2, hub.TestMailer.TotalSend(), "should have 2 messages sent")
-		// now we will bring the remaning systems back up
-		for _, system := range systems {
-			system.Set("status", "up")
-			err = hub.SaveNoValidate(system)
-			assert.NoError(t, err)
-		}
-		time.Sleep(time.Second)
-		// should have 0 alerts in the pendingAlerts map and 0 alerts triggered
-		assert.EqualValues(t, 0, hub.GetPendingAlertsCount(), "should have 0 alerts in the pendingAlerts map")
-		triggeredCount, err = hub.CountRecords("alerts", dbx.HashExp{"triggered": true})
-		assert.NoError(t, err)
-		assert.Zero(t, triggeredCount, "should have 0 alert triggered")
-		// 4 messages sent, 2 down alerts and 2 up alerts for first 2 systems
-		assert.EqualValues(t, 4, hub.TestMailer.TotalSend(), "should have 4 messages sent")
-	})
-}
-
 func TestAlertsHistory(t *testing.T) {
 	synctest.Test(t, func(t *testing.T) {
 		hub, user := beszelTests.GetHubWithUser(t)
@@ -578,102 +497,46 @@ func TestAlertsHistory(t *testing.T) {
 		assert.EqualValues(t, 2, totalHistoryCount, "Should have 2 total alert history records")
 	})
 }
-func TestResolveStatusAlerts(t *testing.T) {
-	hub, user := beszelTests.GetHubWithUser(t)
+
+func TestSetAlertTriggered(t *testing.T) {
+	hub, _ := beszelTests.NewTestHub(t.TempDir())
 	defer hub.Cleanup()

-	// Create a systemUp
-	systemUp, err := beszelTests.CreateRecord(hub, "systems", map[string]any{
-		"name":   "test-system",
-		"users":  []string{user.Id},
-		"host":   "127.0.0.1",
-		"status": "up",
+	hub.StartHub()
+
+	user, _ := beszelTests.CreateUser(hub, "test@example.com", "password")
+	system, _ := beszelTests.CreateRecord(hub, "systems", map[string]any{
+		"name":  "test-system",
+		"users": []string{user.Id},
+		"host":  "127.0.0.1",
 	})
-	assert.NoError(t, err)

-	systemDown, err := beszelTests.CreateRecord(hub, "systems", map[string]any{
-		"name":   "test-system-2",
-		"users":  []string{user.Id},
-		"host":   "127.0.0.2",
-		"status": "up",
+	alertRecord, _ := beszelTests.CreateRecord(hub, "alerts", map[string]any{
+		"name":      "CPU",
+		"system":    system.Id,
+		"user":      user.Id,
+		"value":     80,
+		"triggered": false,
 	})
+
+	am := alerts.NewAlertManager(hub)
+
+	var alert alerts.CachedAlertData
+	alert.PopulateFromRecord(alertRecord)
+
+	// Test triggering the alert
+	err := am.SetAlertTriggered(alert, true)
 	assert.NoError(t, err)

-	// Create a status alertUp for the system
-	alertUp, err := beszelTests.CreateRecord(hub, "alerts", map[string]any{
-		"name":   "Status",
-		"system": systemUp.Id,
-		"user":   user.Id,
-		"min":    1,
-	})
+	updatedRecord, err := hub.FindRecordById("alerts", alert.Id)
+	assert.NoError(t, err)
+	assert.True(t, updatedRecord.GetBool("triggered"))
+
+	// Test un-triggering the alert
+	err = am.SetAlertTriggered(alert, false)
 	assert.NoError(t, err)

-	alertDown, err := beszelTests.CreateRecord(hub, "alerts", map[string]any{
-		"name":   "Status",
-		"system": systemDown.Id,
-		"user":   user.Id,
-		"min":    1,
-	})
+	updatedRecord, err = hub.FindRecordById("alerts", alert.Id)
 	assert.NoError(t, err)
-
-	// Verify alert is not triggered initially
-	assert.False(t, alertUp.GetBool("triggered"), "Alert should not be triggered initially")
-
-	// Set the system to 'up' (this should not trigger the alert)
-	systemUp.Set("status", "up")
-	err = hub.SaveNoValidate(systemUp)
-	assert.NoError(t, err)
-
-	systemDown.Set("status", "down")
-	err = hub.SaveNoValidate(systemDown)
-	assert.NoError(t, err)
-
-	// Wait a moment for any processing
-	time.Sleep(10 * time.Millisecond)
-
-	// Verify alertUp is still not triggered after setting system to up
-	alertUp, err = hub.FindFirstRecordByFilter("alerts", "id={:id}", dbx.Params{"id": alertUp.Id})
-	assert.NoError(t, err)
-	assert.False(t, alertUp.GetBool("triggered"), "Alert should not be triggered when system is up")
-
-	// Manually set both alerts triggered to true
-	alertUp.Set("triggered", true)
-	err = hub.SaveNoValidate(alertUp)
-	assert.NoError(t, err)
-	alertDown.Set("triggered", true)
-	err = hub.SaveNoValidate(alertDown)
-	assert.NoError(t, err)
-
-	// Verify we have exactly one alert with triggered true
-	triggeredCount, err := hub.CountRecords("alerts", dbx.HashExp{"triggered": true})
-	assert.NoError(t, err)
-	assert.EqualValues(t, 2, triggeredCount, "Should have exactly two alerts with triggered true")
-
-	// Verify the specific alertUp is triggered
-	alertUp, err = hub.FindFirstRecordByFilter("alerts", "id={:id}", dbx.Params{"id": alertUp.Id})
-	assert.NoError(t, err)
-	assert.True(t, alertUp.GetBool("triggered"), "Alert should be triggered")
-
-	// Verify we have two unresolved alert history records
-	alertHistoryCount, err := hub.CountRecords("alerts_history", dbx.HashExp{"resolved": ""})
-	assert.NoError(t, err)
-	assert.EqualValues(t, 2, alertHistoryCount, "Should have exactly two unresolved alert history records")
-
-	err = alerts.ResolveStatusAlerts(hub)
-	assert.NoError(t, err)
-
-	// Verify alertUp is not triggered after resolving
-	alertUp, err = hub.FindFirstRecordByFilter("alerts", "id={:id}", dbx.Params{"id": alertUp.Id})
-	assert.NoError(t, err)
-	assert.False(t, alertUp.GetBool("triggered"), "Alert should not be triggered after resolving")
-	// Verify alertDown is still triggered
-	alertDown, err = hub.FindFirstRecordByFilter("alerts", "id={:id}", dbx.Params{"id": alertDown.Id})
-	assert.NoError(t, err)
-	assert.True(t, alertDown.GetBool("triggered"), "Alert should still be triggered after resolving")
-
-	// Verify we have one unresolved alert history record
-	alertHistoryCount, err = hub.CountRecords("alerts_history", dbx.HashExp{"resolved": ""})
-	assert.NoError(t, err)
-	assert.EqualValues(t, 1, alertHistoryCount, "Should have exactly one unresolved alert history record")
-
+	assert.False(t, updatedRecord.GetBool("triggered"))
 }
--- a/internal/alerts/alerts_test_helpers.go
+++ b/internal/alerts/alerts_test_helpers.go
@@ -9,6 +9,18 @@ import (
 	"github.com/pocketbase/pocketbase/core"
 )

+func NewTestAlertManagerWithoutWorker(app hubLike) *AlertManager {
+	return &AlertManager{
+		hub:         app,
+		alertsCache: NewAlertsCache(app),
+	}
+}
+
+// GetSystemAlertsCache returns the internal system alerts cache.
+func (am *AlertManager) GetSystemAlertsCache() *AlertsCache {
+	return am.alertsCache
+}
+
 func (am *AlertManager) GetAlertManager() *AlertManager {
 	return am
 }
@@ -27,19 +39,18 @@ func (am *AlertManager) GetPendingAlertsCount() int {
 }

 // ProcessPendingAlerts manually processes all expired alerts (for testing)
-func (am *AlertManager) ProcessPendingAlerts() ([]*core.Record, error) {
+func (am *AlertManager) ProcessPendingAlerts() ([]CachedAlertData, error) {
 	now := time.Now()
 	var lastErr error
-	var processedAlerts []*core.Record
+	var processedAlerts []CachedAlertData
 	am.pendingAlerts.Range(func(key, value any) bool {
 		info := value.(*alertInfo)
 		if now.After(info.expireTime) {
-			// Downtime delay has passed, process alert
-			if err := am.sendStatusAlert("down", info.systemName, info.alertRecord); err != nil {
-				lastErr = err
+			if info.timer != nil {
+				info.timer.Stop()
 			}
-			processedAlerts = append(processedAlerts, info.alertRecord)
-			am.pendingAlerts.Delete(key)
+			am.processPendingAlert(key.(string))
+			processedAlerts = append(processedAlerts, info.alertData)
 		}
 		return true
 	})
@@ -56,6 +67,31 @@ func (am *AlertManager) ForceExpirePendingAlerts() {
 	})
 }

+func (am *AlertManager) ResetPendingAlertTimer(alertID string, delay time.Duration) bool {
+	value, loaded := am.pendingAlerts.Load(alertID)
+	if !loaded {
+		return false
+	}
+
+	info := value.(*alertInfo)
+	if info.timer != nil {
+		info.timer.Stop()
+	}
+	info.expireTime = time.Now().Add(delay)
+	info.timer = time.AfterFunc(delay, func() {
+		am.processPendingAlert(alertID)
+	})
+	return true
+}
+
 func ResolveStatusAlerts(app core.App) error {
 	return resolveStatusAlerts(app)
 }
+
+func (am *AlertManager) RestorePendingStatusAlerts() error {
+	return am.restorePendingStatusAlerts()
+}
+
+func (am *AlertManager) SetAlertTriggered(alert CachedAlertData, triggered bool) error {
+	return am.setAlertTriggered(alert, triggered)
+}
--- a/internal/cmd/agent/agent.go
+++ b/internal/cmd/agent/agent.go
@@ -9,6 +9,7 @@ import (
 	"github.com/henrygd/beszel"
 	"github.com/henrygd/beszel/agent"
 	"github.com/henrygd/beszel/agent/health"
+	"github.com/henrygd/beszel/agent/utils"
 	"github.com/spf13/pflag"
 	"golang.org/x/crypto/ssh"
 )
@@ -116,12 +117,12 @@ func (opts *cmdOptions) loadPublicKeys() ([]ssh.PublicKey, error) {
 	}

 	// Try environment variable
-	if key, ok := agent.GetEnv("KEY"); ok && key != "" {
+	if key, ok := utils.GetEnv("KEY"); ok && key != "" {
 		return agent.ParseKeys(key)
 	}

 	// Try key file
-	keyFile, ok := agent.GetEnv("KEY_FILE")
+	keyFile, ok := utils.GetEnv("KEY_FILE")
 	if !ok {
 		return nil, fmt.Errorf("no key provided: must set -key flag, KEY env var, or KEY_FILE env var. Use 'beszel-agent help' for usage")
 	}
--- a/internal/cmd/hub/hub.go
+++ b/internal/cmd/hub/hub.go
@@ -28,8 +28,8 @@ func main() {
 	}

 	baseApp := getBaseApp()
-	h := hub.NewHub(baseApp)
-	if err := h.StartHub(); err != nil {
+	hub := hub.NewHub(baseApp)
+	if err := hub.StartHub(); err != nil {
 		log.Fatal(err)
 	}
 }
--- a/internal/entities/container/container.go
+++ b/internal/entities/container/container.go
@@ -10,10 +10,19 @@ type ApiInfo struct {
 	Status  string
 	State   string
 	Image   string
+	Health  struct {
+		Status string
+		// FailingStreak int
+	}
+	Ports []struct {
+		// PrivatePort uint16
+		PublicPort uint16
+		IP         string
+		// Type        string
+	}
 	// ImageID string
 	// Command string
 	// Created int64
-	// Ports      []Port
 	// SizeRw     int64 `json:",omitempty"`
 	// SizeRootFs int64 `json:",omitempty"`
 	// Labels     map[string]string
@@ -140,6 +149,7 @@ type Stats struct {
 	Status string       `json:"-" cbor:"6,keyasint"`
 	Id     string       `json:"-" cbor:"7,keyasint"`
 	Image  string       `json:"-" cbor:"8,keyasint"`
+	Ports  string       `json:"-" cbor:"10,keyasint"`
 	// PrevCpu     [2]uint64    `json:"-"`
 	CpuSystem    uint64       `json:"-"`
 	CpuContainer uint64       `json:"-"`
--- a/internal/entities/smart/smart.go
+++ b/internal/entities/smart/smart.go
@@ -143,8 +143,8 @@ type AtaDeviceStatisticsPage struct {
 }

 type AtaDeviceStatisticsEntry struct {
-	Name  string  `json:"name"`
-	Value *uint64 `json:"value,omitempty"`
+	Name  string `json:"name"`
+	Value *int64 `json:"value,omitempty"`
 }

 type AtaSmartAttribute struct {
@@ -356,8 +356,8 @@ type SmartInfoForSata struct {
 	SmartStatus SmartStatusInfo `json:"smart_status"`
 	// AtaSmartData                 AtaSmartData                 `json:"ata_smart_data"`
 	// AtaSctCapabilities           AtaSctCapabilities           `json:"ata_sct_capabilities"`
-	AtaSmartAttributes  AtaSmartAttributes  `json:"ata_smart_attributes"`
-	AtaDeviceStatistics AtaDeviceStatistics `json:"ata_device_statistics"`
+	AtaSmartAttributes  AtaSmartAttributes `json:"ata_smart_attributes"`
+	AtaDeviceStatistics json.RawMessage    `json:"ata_device_statistics"`
 	// PowerOnTime                  PowerOnTimeInfo              `json:"power_on_time"`
 	// PowerCycleCount              uint16                       `json:"power_cycle_count"`
 	Temperature TemperatureInfo `json:"temperature"`
--- a/internal/entities/system/system.go
+++ b/internal/entities/system/system.go
@@ -12,8 +12,9 @@ import (

 type Stats struct {
 	Cpu            float64             `json:"cpu" cbor:"0,keyasint"`
-	MaxCpu         float64             `json:"cpum,omitempty" cbor:"1,keyasint,omitempty"`
+	MaxCpu         float64             `json:"cpum,omitempty" cbor:"-"`
 	Mem            float64             `json:"m" cbor:"2,keyasint"`
+	MaxMem         float64             `json:"mm,omitempty" cbor:"-"`
 	MemUsed        float64             `json:"mu" cbor:"3,keyasint"`
 	MemPct         float64             `json:"mp" cbor:"4,keyasint"`
 	MemBuffCache   float64             `json:"mb" cbor:"5,keyasint"`
@@ -23,26 +24,25 @@ type Stats struct {
 	DiskTotal      float64             `json:"d" cbor:"9,keyasint"`
 	DiskUsed       float64             `json:"du" cbor:"10,keyasint"`
 	DiskPct        float64             `json:"dp" cbor:"11,keyasint"`
-	DiskReadPs     float64             `json:"dr" cbor:"12,keyasint"`
-	DiskWritePs    float64             `json:"dw" cbor:"13,keyasint"`
-	MaxDiskReadPs  float64             `json:"drm,omitempty" cbor:"14,keyasint,omitempty"`
-	MaxDiskWritePs float64             `json:"dwm,omitempty" cbor:"15,keyasint,omitempty"`
+	DiskReadPs     float64             `json:"dr,omitzero" cbor:"12,keyasint,omitzero"`
+	DiskWritePs    float64             `json:"dw,omitzero" cbor:"13,keyasint,omitzero"`
+	MaxDiskReadPs  float64             `json:"drm,omitempty" cbor:"-"`
+	MaxDiskWritePs float64             `json:"dwm,omitempty" cbor:"-"`
 	NetworkSent    float64             `json:"ns,omitzero" cbor:"16,keyasint,omitzero"`
 	NetworkRecv    float64             `json:"nr,omitzero" cbor:"17,keyasint,omitzero"`
-	MaxNetworkSent float64             `json:"nsm,omitempty" cbor:"18,keyasint,omitempty"`
-	MaxNetworkRecv float64             `json:"nrm,omitempty" cbor:"19,keyasint,omitempty"`
+	MaxNetworkSent float64             `json:"nsm,omitempty" cbor:"-"`
+	MaxNetworkRecv float64             `json:"nrm,omitempty" cbor:"-"`
 	Temperatures   map[string]float64  `json:"t,omitempty" cbor:"20,keyasint,omitempty"`
 	ExtraFs        map[string]*FsStats `json:"efs,omitempty" cbor:"21,keyasint,omitempty"`
 	GPUData        map[string]GPUData  `json:"g,omitempty" cbor:"22,keyasint,omitempty"`
-	LoadAvg1       float64             `json:"l1,omitempty" cbor:"23,keyasint,omitempty"`
-	LoadAvg5       float64             `json:"l5,omitempty" cbor:"24,keyasint,omitempty"`
-	LoadAvg15      float64             `json:"l15,omitempty" cbor:"25,keyasint,omitempty"`
-	Bandwidth      [2]uint64           `json:"b,omitzero" cbor:"26,keyasint,omitzero"`  // [sent bytes, recv bytes]
-	MaxBandwidth   [2]uint64           `json:"bm,omitzero" cbor:"27,keyasint,omitzero"` // [sent bytes, recv bytes]
+	// LoadAvg1       float64             `json:"l1,omitempty" cbor:"23,keyasint,omitempty"`
+	// LoadAvg5       float64             `json:"l5,omitempty" cbor:"24,keyasint,omitempty"`
+	// LoadAvg15      float64             `json:"l15,omitempty" cbor:"25,keyasint,omitempty"`
+	Bandwidth    [2]uint64 `json:"b,omitzero" cbor:"26,keyasint,omitzero"` // [sent bytes, recv bytes]
+	MaxBandwidth [2]uint64 `json:"bm,omitzero" cbor:"-"`                   // [sent bytes, recv bytes]
 	// TODO: remove other load fields in future release in favor of load avg array
 	LoadAvg           [3]float64           `json:"la,omitempty" cbor:"28,keyasint"`
-	Battery           [2]uint8             `json:"bat,omitzero" cbor:"29,keyasint,omitzero"` // [percent, charge state, current]
-	MaxMem            float64              `json:"mm,omitempty" cbor:"30,keyasint,omitempty"`
+	Battery           [2]uint8             `json:"bat,omitzero" cbor:"29,keyasint,omitzero"`    // [percent, charge state, current]
 	NetworkInterfaces map[string][4]uint64 `json:"ni,omitempty" cbor:"31,keyasint,omitempty"`   // [upload bytes, download bytes, total upload, total download]
 	DiskIO            [2]uint64            `json:"dio,omitzero" cbor:"32,keyasint,omitzero"`    // [read bytes, write bytes]
 	MaxDiskIO         [2]uint64            `json:"diom,omitzero" cbor:"-"`                      // [max read bytes, max write bytes]
@@ -90,8 +90,8 @@ type FsStats struct {
 	TotalWrite     uint64    `json:"-"`
 	DiskReadPs     float64   `json:"r" cbor:"2,keyasint"`
 	DiskWritePs    float64   `json:"w" cbor:"3,keyasint"`
-	MaxDiskReadPS  float64   `json:"rm,omitempty" cbor:"4,keyasint,omitempty"`
-	MaxDiskWritePS float64   `json:"wm,omitempty" cbor:"5,keyasint,omitempty"`
+	MaxDiskReadPS  float64   `json:"rm,omitempty" cbor:"-"`
+	MaxDiskWritePS float64   `json:"wm,omitempty" cbor:"-"`
 	// TODO: remove DiskReadPs and DiskWritePs in future release in favor of DiskReadBytes and DiskWriteBytes
 	DiskReadBytes     uint64 `json:"rb" cbor:"6,keyasint,omitempty"`
 	DiskWriteBytes    uint64 `json:"wb" cbor:"7,keyasint,omitempty"`
@@ -129,23 +129,23 @@ type Info struct {
 	KernelVersion string `json:"k,omitempty" cbor:"1,keyasint,omitempty"` // deprecated - moved to Details struct
 	Cores         int    `json:"c,omitzero" cbor:"2,keyasint,omitzero"`   // deprecated - moved to Details struct
 	// Threads is needed in Info struct to calculate load average thresholds
-	Threads        int     `json:"t,omitempty" cbor:"3,keyasint,omitempty"`
-	CpuModel       string  `json:"m,omitempty" cbor:"4,keyasint,omitempty"` // deprecated - moved to Details struct
-	Uptime         uint64  `json:"u" cbor:"5,keyasint"`
-	Cpu            float64 `json:"cpu" cbor:"6,keyasint"`
-	MemPct         float64 `json:"mp" cbor:"7,keyasint"`
-	DiskPct        float64 `json:"dp" cbor:"8,keyasint"`
-	Bandwidth      float64 `json:"b" cbor:"9,keyasint"`
-	AgentVersion   string  `json:"v" cbor:"10,keyasint"`
-	Podman         bool    `json:"p,omitempty" cbor:"11,keyasint,omitempty"` // deprecated - moved to Details struct
-	GpuPct         float64 `json:"g,omitempty" cbor:"12,keyasint,omitempty"`
-	DashboardTemp  float64 `json:"dt,omitempty" cbor:"13,keyasint,omitempty"`
-	Os             Os      `json:"os,omitempty" cbor:"14,keyasint,omitempty"`  // deprecated - moved to Details struct
-	LoadAvg1       float64 `json:"l1,omitempty" cbor:"15,keyasint,omitempty"`  // deprecated - use `la` array instead
-	LoadAvg5       float64 `json:"l5,omitempty" cbor:"16,keyasint,omitempty"`  // deprecated - use `la` array instead
-	LoadAvg15      float64 `json:"l15,omitempty" cbor:"17,keyasint,omitempty"` // deprecated - use `la` array instead
-	BandwidthBytes uint64  `json:"bb" cbor:"18,keyasint"`
+	Threads       int     `json:"t,omitempty" cbor:"3,keyasint,omitempty"`
+	CpuModel      string  `json:"m,omitempty" cbor:"4,keyasint,omitempty"` // deprecated - moved to Details struct
+	Uptime        uint64  `json:"u" cbor:"5,keyasint"`
+	Cpu           float64 `json:"cpu" cbor:"6,keyasint"`
+	MemPct        float64 `json:"mp" cbor:"7,keyasint"`
+	DiskPct       float64 `json:"dp" cbor:"8,keyasint"`
+	Bandwidth     float64 `json:"b,omitzero" cbor:"9,keyasint"` // deprecated in favor of BandwidthBytes
+	AgentVersion  string  `json:"v" cbor:"10,keyasint"`
+	Podman        bool    `json:"p,omitempty" cbor:"11,keyasint,omitempty"` // deprecated - moved to Details struct
+	GpuPct        float64 `json:"g,omitempty" cbor:"12,keyasint,omitempty"`
+	DashboardTemp float64 `json:"dt,omitempty" cbor:"13,keyasint,omitempty"`
+	Os            Os      `json:"os,omitempty" cbor:"14,keyasint,omitempty"` // deprecated - moved to Details struct
+	// LoadAvg1       float64 `json:"l1,omitempty" cbor:"15,keyasint,omitempty"`  // deprecated - use `la` array instead
+	// LoadAvg5       float64 `json:"l5,omitempty" cbor:"16,keyasint,omitempty"`  // deprecated - use `la` array instead
+	// LoadAvg15      float64 `json:"l15,omitempty" cbor:"17,keyasint,omitempty"` // deprecated - use `la` array instead

+	BandwidthBytes uint64             `json:"bb" cbor:"18,keyasint"`
 	LoadAvg        [3]float64         `json:"la,omitempty" cbor:"19,keyasint"`
 	ConnectionType ConnectionType     `json:"ct,omitempty" cbor:"20,keyasint,omitempty,omitzero"`
 	ExtraFsPct     map[string]float64 `json:"efs,omitempty" cbor:"21,keyasint,omitempty"`
--- a/internal/hub/agent_connect_test.go
+++ b/internal/hub/agent_connect_test.go
@@ -32,7 +32,7 @@ func createTestHub(t testing.TB) (*Hub, *pbtests.TestApp, error) {
 	if err != nil {
 		return nil, nil, err
 	}
-	return NewHub(testApp), testApp, nil
+	return NewHub(testApp), testApp, err
 }

 // cleanupTestHub stops background system goroutines before tearing down the app.
@@ -897,12 +897,8 @@ func TestAgentWebSocketIntegration(t *testing.T) {
 			require.NoError(t, err)

 			// Set up environment variables for the agent
-			os.Setenv("BESZEL_AGENT_HUB_URL", ts.URL)
-			os.Setenv("BESZEL_AGENT_TOKEN", tc.agentToken)
-			defer func() {
-				os.Unsetenv("BESZEL_AGENT_HUB_URL")
-				os.Unsetenv("BESZEL_AGENT_TOKEN")
-			}()
+			t.Setenv("BESZEL_AGENT_HUB_URL", ts.URL)
+			t.Setenv("BESZEL_AGENT_TOKEN", tc.agentToken)

 			// Start agent in background
 			done := make(chan error, 1)
@@ -917,7 +913,7 @@ func TestAgentWebSocketIntegration(t *testing.T) {

 			// Wait for connection result
 			maxWait := 2 * time.Second
-			time.Sleep(20 * time.Millisecond)
+			time.Sleep(40 * time.Millisecond)
 			checkInterval := 20 * time.Millisecond
 			timeout := time.After(maxWait)
 			ticker := time.Tick(checkInterval)
@@ -1080,12 +1076,8 @@ func TestMultipleSystemsWithSameUniversalToken(t *testing.T) {
 			require.NoError(t, err)

 			// Set up environment variables for the agent
-			os.Setenv("BESZEL_AGENT_HUB_URL", ts.URL)
-			os.Setenv("BESZEL_AGENT_TOKEN", universalToken)
-			defer func() {
-				os.Unsetenv("BESZEL_AGENT_HUB_URL")
-				os.Unsetenv("BESZEL_AGENT_TOKEN")
-			}()
+			t.Setenv("BESZEL_AGENT_HUB_URL", ts.URL)
+			t.Setenv("BESZEL_AGENT_TOKEN", universalToken)

 			// Count systems before connection
 			systemsBefore, err := testApp.FindRecordsByFilter("systems", "users ~ {:userId}", "", -1, 0, map[string]any{"userId": userRecord.Id})
@@ -1243,12 +1235,8 @@ func TestPermanentUniversalTokenFromDB(t *testing.T) {
 	require.NoError(t, err)

 	// Set up environment variables for the agent
-	os.Setenv("BESZEL_AGENT_HUB_URL", ts.URL)
-	os.Setenv("BESZEL_AGENT_TOKEN", universalToken)
-	defer func() {
-		os.Unsetenv("BESZEL_AGENT_HUB_URL")
-		os.Unsetenv("BESZEL_AGENT_TOKEN")
-	}()
+	t.Setenv("BESZEL_AGENT_HUB_URL", ts.URL)
+	t.Setenv("BESZEL_AGENT_TOKEN", universalToken)

 	// Start agent in background
 	done := make(chan error, 1)
--- a/internal/hub/collections.go
+++ b/internal/hub/collections.go
@@ -0,0 +1,128 @@
+package hub
+
+import "github.com/pocketbase/pocketbase/core"
+
+type collectionRules struct {
+	list   *string
+	view   *string
+	create *string
+	update *string
+	delete *string
+}
+
+// setCollectionAuthSettings applies Beszel's collection auth settings.
+func setCollectionAuthSettings(app core.App) error {
+	usersCollection, err := app.FindCollectionByNameOrId("users")
+	if err != nil {
+		return err
+	}
+	superusersCollection, err := app.FindCollectionByNameOrId(core.CollectionNameSuperusers)
+	if err != nil {
+		return err
+	}
+
+	// disable email auth if DISABLE_PASSWORD_AUTH env var is set
+	disablePasswordAuth, _ := GetEnv("DISABLE_PASSWORD_AUTH")
+	usersCollection.PasswordAuth.Enabled = disablePasswordAuth != "true"
+	usersCollection.PasswordAuth.IdentityFields = []string{"email"}
+	// allow oauth user creation if USER_CREATION is set
+	if userCreation, _ := GetEnv("USER_CREATION"); userCreation == "true" {
+		cr := "@request.context = 'oauth2'"
+		usersCollection.CreateRule = &cr
+	} else {
+		usersCollection.CreateRule = nil
+	}
+
+	// enable mfaOtp mfa if MFA_OTP env var is set
+	mfaOtp, _ := GetEnv("MFA_OTP")
+	usersCollection.OTP.Length = 6
+	superusersCollection.OTP.Length = 6
+	usersCollection.OTP.Enabled = mfaOtp == "true"
+	usersCollection.MFA.Enabled = mfaOtp == "true"
+	superusersCollection.OTP.Enabled = mfaOtp == "true" || mfaOtp == "superusers"
+	superusersCollection.MFA.Enabled = mfaOtp == "true" || mfaOtp == "superusers"
+	if err := app.Save(superusersCollection); err != nil {
+		return err
+	}
+	if err := app.Save(usersCollection); err != nil {
+		return err
+	}
+
+	// When SHARE_ALL_SYSTEMS is enabled, any authenticated user can read
+	// system-scoped data. Write rules continue to block readonly users.
+	shareAllSystems, _ := GetEnv("SHARE_ALL_SYSTEMS")
+
+	authenticatedRule := "@request.auth.id != \"\""
+	systemsMemberRule := authenticatedRule + " && users.id ?= @request.auth.id"
+	systemMemberRule := authenticatedRule + " && system.users.id ?= @request.auth.id"
+
+	systemsReadRule := systemsMemberRule
+	systemScopedReadRule := systemMemberRule
+	if shareAllSystems == "true" {
+		systemsReadRule = authenticatedRule
+		systemScopedReadRule = authenticatedRule
+	}
+	systemsWriteRule := systemsReadRule + " && @request.auth.role != \"readonly\""
+	systemScopedWriteRule := systemScopedReadRule + " && @request.auth.role != \"readonly\""
+
+	if err := applyCollectionRules(app, []string{"systems"}, collectionRules{
+		list:   &systemsReadRule,
+		view:   &systemsReadRule,
+		create: &systemsWriteRule,
+		update: &systemsWriteRule,
+		delete: &systemsWriteRule,
+	}); err != nil {
+		return err
+	}
+
+	if err := applyCollectionRules(app, []string{"containers", "container_stats", "system_stats", "systemd_services"}, collectionRules{
+		list: &systemScopedReadRule,
+	}); err != nil {
+		return err
+	}
+
+	if err := applyCollectionRules(app, []string{"smart_devices"}, collectionRules{
+		list:   &systemScopedReadRule,
+		view:   &systemScopedReadRule,
+		delete: &systemScopedWriteRule,
+	}); err != nil {
+		return err
+	}
+
+	if err := applyCollectionRules(app, []string{"fingerprints"}, collectionRules{
+		list:   &systemScopedReadRule,
+		view:   &systemScopedReadRule,
+		create: &systemScopedWriteRule,
+		update: &systemScopedWriteRule,
+		delete: &systemScopedWriteRule,
+	}); err != nil {
+		return err
+	}
+
+	if err := applyCollectionRules(app, []string{"system_details"}, collectionRules{
+		list: &systemScopedReadRule,
+		view: &systemScopedReadRule,
+	}); err != nil {
+		return err
+	}
+
+	return nil
+}
+
+func applyCollectionRules(app core.App, collectionNames []string, rules collectionRules) error {
+	for _, collectionName := range collectionNames {
+		collection, err := app.FindCollectionByNameOrId(collectionName)
+		if err != nil {
+			return err
+		}
+		collection.ListRule = rules.list
+		collection.ViewRule = rules.view
+		collection.CreateRule = rules.create
+		collection.UpdateRule = rules.update
+		collection.DeleteRule = rules.delete
+		if err := app.Save(collection); err != nil {
+			return err
+		}
+	}
+	return nil
+}
--- a/internal/hub/collections_test.go
+++ b/internal/hub/collections_test.go
@@ -0,0 +1,527 @@
+//go:build testing
+
+package hub_test
+
+import (
+	"fmt"
+	"net/http"
+	"testing"
+
+	beszelTests "github.com/henrygd/beszel/internal/tests"
+	"github.com/pocketbase/pocketbase/core"
+	pbTests "github.com/pocketbase/pocketbase/tests"
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+)
+
+func TestCollectionRulesDefault(t *testing.T) {
+	hub, _ := beszelTests.NewTestHub(t.TempDir())
+	defer hub.Cleanup()
+
+	const isUserMatchesUser = `@request.auth.id != "" && user = @request.auth.id`
+
+	const isUserInUsers = `@request.auth.id != "" && users.id ?= @request.auth.id`
+	const isUserInUsersNotReadonly = `@request.auth.id != "" && users.id ?= @request.auth.id && @request.auth.role != "readonly"`
+
+	const isUserInSystemUsers = `@request.auth.id != "" && system.users.id ?= @request.auth.id`
+	const isUserInSystemUsersNotReadonly = `@request.auth.id != "" && system.users.id ?= @request.auth.id && @request.auth.role != "readonly"`
+
+	// users collection
+	usersCollection, err := hub.FindCollectionByNameOrId("users")
+	assert.NoError(t, err, "Failed to find users collection")
+	assert.True(t, usersCollection.PasswordAuth.Enabled)
+	assert.Equal(t, usersCollection.PasswordAuth.IdentityFields, []string{"email"})
+	assert.Nil(t, usersCollection.CreateRule)
+	assert.False(t, usersCollection.MFA.Enabled)
+
+	// superusers collection
+	superusersCollection, err := hub.FindCollectionByNameOrId(core.CollectionNameSuperusers)
+	assert.NoError(t, err, "Failed to find superusers collection")
+	assert.True(t, superusersCollection.PasswordAuth.Enabled)
+	assert.Equal(t, superusersCollection.PasswordAuth.IdentityFields, []string{"email"})
+	assert.Nil(t, superusersCollection.CreateRule)
+	assert.False(t, superusersCollection.MFA.Enabled)
+
+	// alerts collection
+	alertsCollection, err := hub.FindCollectionByNameOrId("alerts")
+	require.NoError(t, err, "Failed to find alerts collection")
+	assert.Equal(t, isUserMatchesUser, *alertsCollection.ListRule)
+	assert.Nil(t, alertsCollection.ViewRule)
+	assert.Equal(t, isUserMatchesUser, *alertsCollection.CreateRule)
+	assert.Equal(t, isUserMatchesUser, *alertsCollection.UpdateRule)
+	assert.Equal(t, isUserMatchesUser, *alertsCollection.DeleteRule)
+
+	// alerts_history collection
+	alertsHistoryCollection, err := hub.FindCollectionByNameOrId("alerts_history")
+	require.NoError(t, err, "Failed to find alerts_history collection")
+	assert.Equal(t, isUserMatchesUser, *alertsHistoryCollection.ListRule)
+	assert.Nil(t, alertsHistoryCollection.ViewRule)
+	assert.Nil(t, alertsHistoryCollection.CreateRule)
+	assert.Nil(t, alertsHistoryCollection.UpdateRule)
+	assert.Equal(t, isUserMatchesUser, *alertsHistoryCollection.DeleteRule)
+
+	// containers collection
+	containersCollection, err := hub.FindCollectionByNameOrId("containers")
+	require.NoError(t, err, "Failed to find containers collection")
+	assert.Equal(t, isUserInSystemUsers, *containersCollection.ListRule)
+	assert.Nil(t, containersCollection.ViewRule)
+	assert.Nil(t, containersCollection.CreateRule)
+	assert.Nil(t, containersCollection.UpdateRule)
+	assert.Nil(t, containersCollection.DeleteRule)
+
+	// container_stats collection
+	containerStatsCollection, err := hub.FindCollectionByNameOrId("container_stats")
+	require.NoError(t, err, "Failed to find container_stats collection")
+	assert.Equal(t, isUserInSystemUsers, *containerStatsCollection.ListRule)
+	assert.Nil(t, containerStatsCollection.ViewRule)
+	assert.Nil(t, containerStatsCollection.CreateRule)
+	assert.Nil(t, containerStatsCollection.UpdateRule)
+	assert.Nil(t, containerStatsCollection.DeleteRule)
+
+	// fingerprints collection
+	fingerprintsCollection, err := hub.FindCollectionByNameOrId("fingerprints")
+	require.NoError(t, err, "Failed to find fingerprints collection")
+	assert.Equal(t, isUserInSystemUsers, *fingerprintsCollection.ListRule)
+	assert.Equal(t, isUserInSystemUsers, *fingerprintsCollection.ViewRule)
+	assert.Equal(t, isUserInSystemUsersNotReadonly, *fingerprintsCollection.CreateRule)
+	assert.Equal(t, isUserInSystemUsersNotReadonly, *fingerprintsCollection.UpdateRule)
+	assert.Equal(t, isUserInSystemUsersNotReadonly, *fingerprintsCollection.DeleteRule)
+
+	// quiet_hours collection
+	quietHoursCollection, err := hub.FindCollectionByNameOrId("quiet_hours")
+	require.NoError(t, err, "Failed to find quiet_hours collection")
+	assert.Equal(t, isUserMatchesUser, *quietHoursCollection.ListRule)
+	assert.Equal(t, isUserMatchesUser, *quietHoursCollection.ViewRule)
+	assert.Equal(t, isUserMatchesUser, *quietHoursCollection.CreateRule)
+	assert.Equal(t, isUserMatchesUser, *quietHoursCollection.UpdateRule)
+	assert.Equal(t, isUserMatchesUser, *quietHoursCollection.DeleteRule)
+
+	// smart_devices collection
+	smartDevicesCollection, err := hub.FindCollectionByNameOrId("smart_devices")
+	require.NoError(t, err, "Failed to find smart_devices collection")
+	assert.Equal(t, isUserInSystemUsers, *smartDevicesCollection.ListRule)
+	assert.Equal(t, isUserInSystemUsers, *smartDevicesCollection.ViewRule)
+	assert.Nil(t, smartDevicesCollection.CreateRule)
+	assert.Nil(t, smartDevicesCollection.UpdateRule)
+	assert.Equal(t, isUserInSystemUsersNotReadonly, *smartDevicesCollection.DeleteRule)
+
+	// system_details collection
+	systemDetailsCollection, err := hub.FindCollectionByNameOrId("system_details")
+	require.NoError(t, err, "Failed to find system_details collection")
+	assert.Equal(t, isUserInSystemUsers, *systemDetailsCollection.ListRule)
+	assert.Equal(t, isUserInSystemUsers, *systemDetailsCollection.ViewRule)
+	assert.Nil(t, systemDetailsCollection.CreateRule)
+	assert.Nil(t, systemDetailsCollection.UpdateRule)
+	assert.Nil(t, systemDetailsCollection.DeleteRule)
+
+	// system_stats collection
+	systemStatsCollection, err := hub.FindCollectionByNameOrId("system_stats")
+	require.NoError(t, err, "Failed to find system_stats collection")
+	assert.Equal(t, isUserInSystemUsers, *systemStatsCollection.ListRule)
+	assert.Nil(t, systemStatsCollection.ViewRule)
+	assert.Nil(t, systemStatsCollection.CreateRule)
+	assert.Nil(t, systemStatsCollection.UpdateRule)
+	assert.Nil(t, systemStatsCollection.DeleteRule)
+
+	// systemd_services collection
+	systemdServicesCollection, err := hub.FindCollectionByNameOrId("systemd_services")
+	require.NoError(t, err, "Failed to find systemd_services collection")
+	assert.Equal(t, isUserInSystemUsers, *systemdServicesCollection.ListRule)
+	assert.Nil(t, systemdServicesCollection.ViewRule)
+	assert.Nil(t, systemdServicesCollection.CreateRule)
+	assert.Nil(t, systemdServicesCollection.UpdateRule)
+	assert.Nil(t, systemdServicesCollection.DeleteRule)
+
+	// systems collection
+	systemsCollection, err := hub.FindCollectionByNameOrId("systems")
+	require.NoError(t, err, "Failed to find systems collection")
+	assert.Equal(t, isUserInUsers, *systemsCollection.ListRule)
+	assert.Equal(t, isUserInUsers, *systemsCollection.ViewRule)
+	assert.Equal(t, isUserInUsersNotReadonly, *systemsCollection.CreateRule)
+	assert.Equal(t, isUserInUsersNotReadonly, *systemsCollection.UpdateRule)
+	assert.Equal(t, isUserInUsersNotReadonly, *systemsCollection.DeleteRule)
+
+	// universal_tokens collection
+	universalTokensCollection, err := hub.FindCollectionByNameOrId("universal_tokens")
+	require.NoError(t, err, "Failed to find universal_tokens collection")
+	assert.Nil(t, universalTokensCollection.ListRule)
+	assert.Nil(t, universalTokensCollection.ViewRule)
+	assert.Nil(t, universalTokensCollection.CreateRule)
+	assert.Nil(t, universalTokensCollection.UpdateRule)
+	assert.Nil(t, universalTokensCollection.DeleteRule)
+
+	// user_settings collection
+	userSettingsCollection, err := hub.FindCollectionByNameOrId("user_settings")
+	require.NoError(t, err, "Failed to find user_settings collection")
+	assert.Equal(t, isUserMatchesUser, *userSettingsCollection.ListRule)
+	assert.Nil(t, userSettingsCollection.ViewRule)
+	assert.Equal(t, isUserMatchesUser, *userSettingsCollection.CreateRule)
+	assert.Equal(t, isUserMatchesUser, *userSettingsCollection.UpdateRule)
+	assert.Nil(t, userSettingsCollection.DeleteRule)
+}
+
+func TestCollectionRulesShareAllSystems(t *testing.T) {
+	t.Setenv("SHARE_ALL_SYSTEMS", "true")
+	hub, _ := beszelTests.NewTestHub(t.TempDir())
+	defer hub.Cleanup()
+
+	const isUser = `@request.auth.id != ""`
+	const isUserNotReadonly = `@request.auth.id != "" && @request.auth.role != "readonly"`
+
+	const isUserMatchesUser = `@request.auth.id != "" && user = @request.auth.id`
+
+	// alerts collection
+	alertsCollection, err := hub.FindCollectionByNameOrId("alerts")
+	require.NoError(t, err, "Failed to find alerts collection")
+	assert.Equal(t, isUserMatchesUser, *alertsCollection.ListRule)
+	assert.Nil(t, alertsCollection.ViewRule)
+	assert.Equal(t, isUserMatchesUser, *alertsCollection.CreateRule)
+	assert.Equal(t, isUserMatchesUser, *alertsCollection.UpdateRule)
+	assert.Equal(t, isUserMatchesUser, *alertsCollection.DeleteRule)
+
+	// alerts_history collection
+	alertsHistoryCollection, err := hub.FindCollectionByNameOrId("alerts_history")
+	require.NoError(t, err, "Failed to find alerts_history collection")
+	assert.Equal(t, isUserMatchesUser, *alertsHistoryCollection.ListRule)
+	assert.Nil(t, alertsHistoryCollection.ViewRule)
+	assert.Nil(t, alertsHistoryCollection.CreateRule)
+	assert.Nil(t, alertsHistoryCollection.UpdateRule)
+	assert.Equal(t, isUserMatchesUser, *alertsHistoryCollection.DeleteRule)
+
+	// containers collection
+	containersCollection, err := hub.FindCollectionByNameOrId("containers")
+	require.NoError(t, err, "Failed to find containers collection")
+	assert.Equal(t, isUser, *containersCollection.ListRule)
+	assert.Nil(t, containersCollection.ViewRule)
+	assert.Nil(t, containersCollection.CreateRule)
+	assert.Nil(t, containersCollection.UpdateRule)
+	assert.Nil(t, containersCollection.DeleteRule)
+
+	// container_stats collection
+	containerStatsCollection, err := hub.FindCollectionByNameOrId("container_stats")
+	require.NoError(t, err, "Failed to find container_stats collection")
+	assert.Equal(t, isUser, *containerStatsCollection.ListRule)
+	assert.Nil(t, containerStatsCollection.ViewRule)
+	assert.Nil(t, containerStatsCollection.CreateRule)
+	assert.Nil(t, containerStatsCollection.UpdateRule)
+	assert.Nil(t, containerStatsCollection.DeleteRule)
+
+	// fingerprints collection
+	fingerprintsCollection, err := hub.FindCollectionByNameOrId("fingerprints")
+	require.NoError(t, err, "Failed to find fingerprints collection")
+	assert.Equal(t, isUser, *fingerprintsCollection.ListRule)
+	assert.Equal(t, isUser, *fingerprintsCollection.ViewRule)
+	assert.Equal(t, isUserNotReadonly, *fingerprintsCollection.CreateRule)
+	assert.Equal(t, isUserNotReadonly, *fingerprintsCollection.UpdateRule)
+	assert.Equal(t, isUserNotReadonly, *fingerprintsCollection.DeleteRule)
+
+	// quiet_hours collection
+	quietHoursCollection, err := hub.FindCollectionByNameOrId("quiet_hours")
+	require.NoError(t, err, "Failed to find quiet_hours collection")
+	assert.Equal(t, isUserMatchesUser, *quietHoursCollection.ListRule)
+	assert.Equal(t, isUserMatchesUser, *quietHoursCollection.ViewRule)
+	assert.Equal(t, isUserMatchesUser, *quietHoursCollection.CreateRule)
+	assert.Equal(t, isUserMatchesUser, *quietHoursCollection.UpdateRule)
+	assert.Equal(t, isUserMatchesUser, *quietHoursCollection.DeleteRule)
+
+	// smart_devices collection
+	smartDevicesCollection, err := hub.FindCollectionByNameOrId("smart_devices")
+	require.NoError(t, err, "Failed to find smart_devices collection")
+	assert.Equal(t, isUser, *smartDevicesCollection.ListRule)
+	assert.Equal(t, isUser, *smartDevicesCollection.ViewRule)
+	assert.Nil(t, smartDevicesCollection.CreateRule)
+	assert.Nil(t, smartDevicesCollection.UpdateRule)
+	assert.Equal(t, isUserNotReadonly, *smartDevicesCollection.DeleteRule)
+
+	// system_details collection
+	systemDetailsCollection, err := hub.FindCollectionByNameOrId("system_details")
+	require.NoError(t, err, "Failed to find system_details collection")
+	assert.Equal(t, isUser, *systemDetailsCollection.ListRule)
+	assert.Equal(t, isUser, *systemDetailsCollection.ViewRule)
+	assert.Nil(t, systemDetailsCollection.CreateRule)
+	assert.Nil(t, systemDetailsCollection.UpdateRule)
+	assert.Nil(t, systemDetailsCollection.DeleteRule)
+
+	// system_stats collection
+	systemStatsCollection, err := hub.FindCollectionByNameOrId("system_stats")
+	require.NoError(t, err, "Failed to find system_stats collection")
+	assert.Equal(t, isUser, *systemStatsCollection.ListRule)
+	assert.Nil(t, systemStatsCollection.ViewRule)
+	assert.Nil(t, systemStatsCollection.CreateRule)
+	assert.Nil(t, systemStatsCollection.UpdateRule)
+	assert.Nil(t, systemStatsCollection.DeleteRule)
+
+	// systemd_services collection
+	systemdServicesCollection, err := hub.FindCollectionByNameOrId("systemd_services")
+	require.NoError(t, err, "Failed to find systemd_services collection")
+	assert.Equal(t, isUser, *systemdServicesCollection.ListRule)
+	assert.Nil(t, systemdServicesCollection.ViewRule)
+	assert.Nil(t, systemdServicesCollection.CreateRule)
+	assert.Nil(t, systemdServicesCollection.UpdateRule)
+	assert.Nil(t, systemdServicesCollection.DeleteRule)
+
+	// systems collection
+	systemsCollection, err := hub.FindCollectionByNameOrId("systems")
+	require.NoError(t, err, "Failed to find systems collection")
+	assert.Equal(t, isUser, *systemsCollection.ListRule)
+	assert.Equal(t, isUser, *systemsCollection.ViewRule)
+	assert.Equal(t, isUserNotReadonly, *systemsCollection.CreateRule)
+	assert.Equal(t, isUserNotReadonly, *systemsCollection.UpdateRule)
+	assert.Equal(t, isUserNotReadonly, *systemsCollection.DeleteRule)
+
+	// universal_tokens collection
+	universalTokensCollection, err := hub.FindCollectionByNameOrId("universal_tokens")
+	require.NoError(t, err, "Failed to find universal_tokens collection")
+	assert.Nil(t, universalTokensCollection.ListRule)
+	assert.Nil(t, universalTokensCollection.ViewRule)
+	assert.Nil(t, universalTokensCollection.CreateRule)
+	assert.Nil(t, universalTokensCollection.UpdateRule)
+	assert.Nil(t, universalTokensCollection.DeleteRule)
+
+	// user_settings collection
+	userSettingsCollection, err := hub.FindCollectionByNameOrId("user_settings")
+	require.NoError(t, err, "Failed to find user_settings collection")
+	assert.Equal(t, isUserMatchesUser, *userSettingsCollection.ListRule)
+	assert.Nil(t, userSettingsCollection.ViewRule)
+	assert.Equal(t, isUserMatchesUser, *userSettingsCollection.CreateRule)
+	assert.Equal(t, isUserMatchesUser, *userSettingsCollection.UpdateRule)
+	assert.Nil(t, userSettingsCollection.DeleteRule)
+}
+
+func TestDisablePasswordAuth(t *testing.T) {
+	t.Setenv("DISABLE_PASSWORD_AUTH", "true")
+	hub, _ := beszelTests.NewTestHub(t.TempDir())
+	defer hub.Cleanup()
+
+	usersCollection, err := hub.FindCollectionByNameOrId("users")
+	assert.NoError(t, err)
+	assert.False(t, usersCollection.PasswordAuth.Enabled)
+}
+
+func TestUserCreation(t *testing.T) {
+	t.Setenv("USER_CREATION", "true")
+	hub, _ := beszelTests.NewTestHub(t.TempDir())
+	defer hub.Cleanup()
+
+	usersCollection, err := hub.FindCollectionByNameOrId("users")
+	assert.NoError(t, err)
+	assert.Equal(t, "@request.context = 'oauth2'", *usersCollection.CreateRule)
+}
+
+func TestMFAOtp(t *testing.T) {
+	t.Setenv("MFA_OTP", "true")
+	hub, _ := beszelTests.NewTestHub(t.TempDir())
+	defer hub.Cleanup()
+
+	usersCollection, err := hub.FindCollectionByNameOrId("users")
+	assert.NoError(t, err)
+	assert.True(t, usersCollection.OTP.Enabled)
+	assert.True(t, usersCollection.MFA.Enabled)
+
+	superusersCollection, err := hub.FindCollectionByNameOrId(core.CollectionNameSuperusers)
+	assert.NoError(t, err)
+	assert.True(t, superusersCollection.OTP.Enabled)
+	assert.True(t, superusersCollection.MFA.Enabled)
+}
+
+func TestApiCollectionsAuthRules(t *testing.T) {
+	hub, _ := beszelTests.NewTestHub(t.TempDir())
+	defer hub.Cleanup()
+
+	hub.StartHub()
+
+	user1, _ := beszelTests.CreateUser(hub, "user1@example.com", "password")
+	user1Token, _ := user1.NewAuthToken()
+
+	user2, _ := beszelTests.CreateUser(hub, "user2@example.com", "password")
+	// user2Token, _ := user2.NewAuthToken()
+
+	userReadonly, _ := beszelTests.CreateUserWithRole(hub, "userreadonly@example.com", "password", "readonly")
+	userReadonlyToken, _ := userReadonly.NewAuthToken()
+
+	userOneSystem, _ := beszelTests.CreateRecord(hub, "systems", map[string]any{
+		"name":  "system1",
+		"users": []string{user1.Id},
+		"host":  "127.0.0.1",
+	})
+
+	sharedSystem, _ := beszelTests.CreateRecord(hub, "systems", map[string]any{
+		"name":  "system2",
+		"users": []string{user1.Id, user2.Id},
+		"host":  "127.0.0.2",
+	})
+
+	userTwoSystem, _ := beszelTests.CreateRecord(hub, "systems", map[string]any{
+		"name":  "system3",
+		"users": []string{user2.Id},
+		"host":  "127.0.0.2",
+	})
+
+	userRecords, _ := hub.CountRecords("users")
+	assert.EqualValues(t, 3, userRecords, "all users should be created")
+
+	systemRecords, _ := hub.CountRecords("systems")
+	assert.EqualValues(t, 3, systemRecords, "all systems should be created")
+
+	testAppFactory := func(t testing.TB) *pbTests.TestApp {
+		return hub.TestApp
+	}
+
+	scenarios := []beszelTests.ApiScenario{
+		{
+			Name:               "Unauthorized user cannot list systems",
+			Method:             http.MethodGet,
+			URL:                "/api/collections/systems/records",
+			ExpectedStatus:     200, // https://github.com/pocketbase/pocketbase/discussions/1570
+			TestAppFactory:     testAppFactory,
+			ExpectedContent:    []string{`"items":[]`, `"totalItems":0`},
+			NotExpectedContent: []string{userOneSystem.Id, sharedSystem.Id, userTwoSystem.Id},
+		},
+		{
+			Name:               "Unauthorized user cannot delete a system",
+			Method:             http.MethodDelete,
+			URL:                fmt.Sprintf("/api/collections/systems/records/%s", userOneSystem.Id),
+			ExpectedStatus:     404,
+			TestAppFactory:     testAppFactory,
+			ExpectedContent:    []string{"resource wasn't found"},
+			NotExpectedContent: []string{userOneSystem.Id},
+			BeforeTestFunc: func(t testing.TB, app *pbTests.TestApp, e *core.ServeEvent) {
+				systemsCount, _ := app.CountRecords("systems")
+				assert.EqualValues(t, 3, systemsCount, "should have 3 systems before deletion")
+			},
+			AfterTestFunc: func(t testing.TB, app *pbTests.TestApp, res *http.Response) {
+				systemsCount, _ := app.CountRecords("systems")
+				assert.EqualValues(t, 3, systemsCount, "should still have 3 systems after failed deletion")
+			},
+		},
+		{
+			Name:   "User 1 can list their own systems",
+			Method: http.MethodGet,
+			URL:    "/api/collections/systems/records",
+			Headers: map[string]string{
+				"Authorization": user1Token,
+			},
+			ExpectedStatus:     200,
+			ExpectedContent:    []string{userOneSystem.Id, sharedSystem.Id},
+			NotExpectedContent: []string{userTwoSystem.Id},
+			TestAppFactory:     testAppFactory,
+		},
+		{
+			Name:   "User 1 cannot list user 2's system",
+			Method: http.MethodGet,
+			URL:    "/api/collections/systems/records",
+			Headers: map[string]string{
+				"Authorization": user1Token,
+			},
+			ExpectedStatus:     200,
+			ExpectedContent:    []string{userOneSystem.Id, sharedSystem.Id},
+			NotExpectedContent: []string{userTwoSystem.Id},
+			TestAppFactory:     testAppFactory,
+		},
+		{
+			Name:   "User 1 can see user 2's system if SHARE_ALL_SYSTEMS is enabled",
+			Method: http.MethodGet,
+			URL:    "/api/collections/systems/records",
+			Headers: map[string]string{
+				"Authorization": user1Token,
+			},
+			ExpectedStatus:  200,
+			ExpectedContent: []string{userOneSystem.Id, sharedSystem.Id, userTwoSystem.Id},
+			TestAppFactory:  testAppFactory,
+			BeforeTestFunc: func(t testing.TB, app *pbTests.TestApp, e *core.ServeEvent) {
+				t.Setenv("SHARE_ALL_SYSTEMS", "true")
+				hub.SetCollectionAuthSettings()
+			},
+			AfterTestFunc: func(t testing.TB, app *pbTests.TestApp, res *http.Response) {
+				t.Setenv("SHARE_ALL_SYSTEMS", "")
+				hub.SetCollectionAuthSettings()
+			},
+		},
+		{
+			Name:   "User 1 can delete their own system",
+			Method: http.MethodDelete,
+			URL:    fmt.Sprintf("/api/collections/systems/records/%s", userOneSystem.Id),
+			Headers: map[string]string{
+				"Authorization": user1Token,
+			},
+			ExpectedStatus: 204,
+			TestAppFactory: testAppFactory,
+			BeforeTestFunc: func(t testing.TB, app *pbTests.TestApp, e *core.ServeEvent) {
+				systemsCount, _ := app.CountRecords("systems")
+				assert.EqualValues(t, 3, systemsCount, "should have 3 systems before deletion")
+			},
+			AfterTestFunc: func(t testing.TB, app *pbTests.TestApp, res *http.Response) {
+				systemsCount, _ := app.CountRecords("systems")
+				assert.EqualValues(t, 2, systemsCount, "should have 2 systems after deletion")
+			},
+		},
+		{
+			Name:   "User 1 cannot delete user 2's system",
+			Method: http.MethodDelete,
+			URL:    fmt.Sprintf("/api/collections/systems/records/%s", userTwoSystem.Id),
+			Headers: map[string]string{
+				"Authorization": user1Token,
+			},
+			ExpectedStatus:  404,
+			TestAppFactory:  testAppFactory,
+			ExpectedContent: []string{"resource wasn't found"},
+			BeforeTestFunc: func(t testing.TB, app *pbTests.TestApp, e *core.ServeEvent) {
+				systemsCount, _ := app.CountRecords("systems")
+				assert.EqualValues(t, 2, systemsCount)
+			},
+			AfterTestFunc: func(t testing.TB, app *pbTests.TestApp, res *http.Response) {
+				systemsCount, _ := app.CountRecords("systems")
+				assert.EqualValues(t, 2, systemsCount)
+			},
+		},
+		{
+			Name:   "Readonly cannot delete a system even if SHARE_ALL_SYSTEMS is enabled",
+			Method: http.MethodDelete,
+			URL:    fmt.Sprintf("/api/collections/systems/records/%s", sharedSystem.Id),
+			Headers: map[string]string{
+				"Authorization": userReadonlyToken,
+			},
+			ExpectedStatus:  404,
+			ExpectedContent: []string{"resource wasn't found"},
+			TestAppFactory:  testAppFactory,
+			BeforeTestFunc: func(t testing.TB, app *pbTests.TestApp, e *core.ServeEvent) {
+				t.Setenv("SHARE_ALL_SYSTEMS", "true")
+				hub.SetCollectionAuthSettings()
+				systemsCount, _ := app.CountRecords("systems")
+				assert.EqualValues(t, 2, systemsCount)
+			},
+			AfterTestFunc: func(t testing.TB, app *pbTests.TestApp, res *http.Response) {
+				t.Setenv("SHARE_ALL_SYSTEMS", "")
+				hub.SetCollectionAuthSettings()
+				systemsCount, _ := app.CountRecords("systems")
+				assert.EqualValues(t, 2, systemsCount)
+			},
+		},
+		{
+			Name:   "User 1 can delete user 2's system if SHARE_ALL_SYSTEMS is enabled",
+			Method: http.MethodDelete,
+			URL:    fmt.Sprintf("/api/collections/systems/records/%s", userTwoSystem.Id),
+			Headers: map[string]string{
+				"Authorization": user1Token,
+			},
+			ExpectedStatus: 204,
+			TestAppFactory: testAppFactory,
+			BeforeTestFunc: func(t testing.TB, app *pbTests.TestApp, e *core.ServeEvent) {
+				t.Setenv("SHARE_ALL_SYSTEMS", "true")
+				hub.SetCollectionAuthSettings()
+				systemsCount, _ := app.CountRecords("systems")
+				assert.EqualValues(t, 2, systemsCount)
+			},
+			AfterTestFunc: func(t testing.TB, app *pbTests.TestApp, res *http.Response) {
+				t.Setenv("SHARE_ALL_SYSTEMS", "")
+				hub.SetCollectionAuthSettings()
+				systemsCount, _ := app.CountRecords("systems")
+				assert.EqualValues(t, 1, systemsCount)
+			},
+		},
+	}
+
+	for _, scenario := range scenarios {
+		scenario.Test(t)
+	}
+}
--- a/internal/hub/expirymap/expirymap.go
+++ b/internal/hub/expirymap/expirymap.go
@@ -1,35 +1,39 @@
+// Package expirymap provides a thread-safe map with expiring entries.
+// It supports TTL-based expiration with both lazy cleanup on access
+// and periodic background cleanup.
 package expirymap

 import (
-	"reflect"
+	"sync"
 	"time"

 	"github.com/pocketbase/pocketbase/tools/store"
 )

-type val[T any] struct {
+type val[T comparable] struct {
 	value   T
 	expires time.Time
 }

-type ExpiryMap[T any] struct {
-	store           *store.Store[string, *val[T]]
-	cleanupInterval time.Duration
+type ExpiryMap[T comparable] struct {
+	store    *store.Store[string, val[T]]
+	stopChan chan struct{}
+	stopOnce sync.Once
 }

 // New creates a new expiry map with custom cleanup interval
-func New[T any](cleanupInterval time.Duration) *ExpiryMap[T] {
+func New[T comparable](cleanupInterval time.Duration) *ExpiryMap[T] {
 	m := &ExpiryMap[T]{
-		store:           store.New(map[string]*val[T]{}),
-		cleanupInterval: cleanupInterval,
+		store:    store.New(map[string]val[T]{}),
+		stopChan: make(chan struct{}),
 	}
-	m.startCleaner()
+	go m.startCleaner(cleanupInterval)
 	return m
 }

 // Set stores a value with the given TTL
 func (m *ExpiryMap[T]) Set(key string, value T, ttl time.Duration) {
-	m.store.Set(key, &val[T]{
+	m.store.Set(key, val[T]{
 		value:   value,
 		expires: time.Now().Add(ttl),
 	})
@@ -55,7 +59,7 @@ func (m *ExpiryMap[T]) GetOk(key string) (T, bool) {
 // GetByValue retrieves a value by value
 func (m *ExpiryMap[T]) GetByValue(val T) (key string, value T, ok bool) {
 	for key, v := range m.store.GetAll() {
-		if reflect.DeepEqual(v.value, val) {
+		if v.value == val {
 			// check if expired
 			if v.expires.Before(time.Now()) {
 				m.store.Remove(key)
@@ -75,7 +79,7 @@ func (m *ExpiryMap[T]) Remove(key string) {
 // RemovebyValue removes a value by value
 func (m *ExpiryMap[T]) RemovebyValue(value T) (T, bool) {
 	for key, val := range m.store.GetAll() {
-		if reflect.DeepEqual(val.value, value) {
+		if val.value == value {
 			m.store.Remove(key)
 			return val.value, true
 		}
@@ -84,13 +88,23 @@ func (m *ExpiryMap[T]) RemovebyValue(value T) (T, bool) {
 }

 // startCleaner runs the background cleanup process
-func (m *ExpiryMap[T]) startCleaner() {
-	go func() {
-		tick := time.Tick(m.cleanupInterval)
-		for range tick {
+func (m *ExpiryMap[T]) startCleaner(interval time.Duration) {
+	tick := time.Tick(interval)
+	for {
+		select {
+		case <-tick:
 			m.cleanup()
+		case <-m.stopChan:
+			return
 		}
-	}()
+	}
+}
+
+// StopCleaner stops the background cleanup process
+func (m *ExpiryMap[T]) StopCleaner() {
+	m.stopOnce.Do(func() {
+		close(m.stopChan)
+	})
 }

 // cleanup removes all expired entries
@@ -102,3 +116,12 @@ func (m *ExpiryMap[T]) cleanup() {
 		}
 	}
 }
+
+// UpdateExpiration updates the expiration time of a key
+func (m *ExpiryMap[T]) UpdateExpiration(key string, ttl time.Duration) {
+	value, ok := m.store.GetOk(key)
+	if ok {
+		value.expires = time.Now().Add(ttl)
+		m.store.Set(key, value)
+	}
+}
--- a/internal/hub/expirymap/expirymap_test.go
+++ b/internal/hub/expirymap/expirymap_test.go
@@ -4,6 +4,7 @@ package expirymap

 import (
 	"testing"
+	"testing/synctest"
 	"time"

 	"github.com/stretchr/testify/assert"
@@ -177,6 +178,33 @@ func TestExpiryMap_GenericTypes(t *testing.T) {
 	})
 }

+func TestExpiryMap_UpdateExpiration(t *testing.T) {
+	em := New[string](time.Hour)
+
+	// Set a value with short TTL
+	em.Set("key1", "value1", time.Millisecond*50)
+
+	// Verify it exists
+	assert.True(t, em.Has("key1"))
+
+	// Update expiration to a longer TTL
+	em.UpdateExpiration("key1", time.Hour)
+
+	// Wait for the original TTL to pass
+	time.Sleep(time.Millisecond * 100)
+
+	// Should still exist because expiration was updated
+	assert.True(t, em.Has("key1"))
+	value, ok := em.GetOk("key1")
+	assert.True(t, ok)
+	assert.Equal(t, "value1", value)
+
+	// Try updating non-existent key (should not panic)
+	assert.NotPanics(t, func() {
+		em.UpdateExpiration("nonexistent", time.Hour)
+	})
+}
+
 func TestExpiryMap_ZeroValues(t *testing.T) {
 	em := New[string](time.Hour)

@@ -473,3 +501,52 @@ func TestExpiryMap_ValueOperations_Integration(t *testing.T) {
 	assert.Equal(t, "unique", value)
 	assert.Equal(t, "key2", key)
 }
+
+func TestExpiryMap_Cleaner(t *testing.T) {
+	synctest.Test(t, func(t *testing.T) {
+		em := New[string](time.Second)
+		defer em.StopCleaner()
+
+		em.Set("test", "value", 500*time.Millisecond)
+
+		// Wait 600ms, value is expired but cleaner hasn't run yet (interval is 1s)
+		time.Sleep(600 * time.Millisecond)
+		synctest.Wait()
+
+		// Map should still hold the value in its internal store before lazy access or cleaner
+		assert.Equal(t, 1, len(em.store.GetAll()), "store should still have 1 item before cleaner runs")
+
+		// Wait another 500ms so cleaner (1s interval) runs
+		time.Sleep(500 * time.Millisecond)
+		synctest.Wait() // Wait for background goroutine to process the tick
+
+		assert.Equal(t, 0, len(em.store.GetAll()), "store should be empty after cleaner runs")
+	})
+}
+
+func TestExpiryMap_StopCleaner(t *testing.T) {
+	em := New[string](time.Hour)
+
+	// Initially, stopChan is open, reading would block
+	select {
+	case <-em.stopChan:
+		t.Fatal("stopChan should be open initially")
+	default:
+		// success
+	}
+
+	em.StopCleaner()
+
+	// After StopCleaner, stopChan is closed, reading returns immediately
+	select {
+	case <-em.stopChan:
+		// success
+	default:
+		t.Fatal("stopChan was not closed by StopCleaner")
+	}
+
+	// Calling StopCleaner again should NOT panic thanks to sync.Once
+	assert.NotPanics(t, func() {
+		em.StopCleaner()
+	})
+}
--- a/internal/hub/hub.go
+++ b/internal/hub/hub.go
@@ -4,6 +4,7 @@ package hub
 import (
 	"crypto/ed25519"
 	"encoding/pem"
+	"errors"
 	"fmt"
 	"net/http"
 	"net/url"
@@ -29,6 +30,7 @@ import (
 	"golang.org/x/crypto/ssh"
 )

+// Hub is the application. It embeds the PocketBase app and keeps references to subcomponents.
 type Hub struct {
 	core.App
 	*alerts.AlertManager
@@ -46,18 +48,16 @@ var containerIDPattern = regexp.MustCompile(`^[a-fA-F0-9]{12,64}$`)

 // NewHub creates a new Hub instance with default configuration
 func NewHub(app core.App) *Hub {
-	hub := &Hub{}
-	hub.App = app
-
+	hub := &Hub{App: app}
 	hub.AlertManager = alerts.NewAlertManager(hub)
 	hub.um = users.NewUserManager(hub)
 	hub.rm = records.NewRecordManager(hub)
 	hub.sm = systems.NewSystemManager(hub)
-	hub.appURL, _ = GetEnv("APP_URL")
 	hub.hb = heartbeat.New(app, GetEnv)
 	if hub.hb != nil {
 		hub.hbStop = make(chan struct{})
 	}
+	_ = onAfterBootstrapAndMigrations(app, hub.initialize)
 	return hub
 }

@@ -70,12 +70,28 @@ func GetEnv(key string) (value string, exists bool) {
 	return os.LookupEnv(key)
 }

-func (h *Hub) StartHub() error {
-	h.App.OnServe().BindFunc(func(e *core.ServeEvent) error {
-		// initialize settings / collections
-		if err := h.initialize(e); err != nil {
+// onAfterBootstrapAndMigrations ensures the provided function runs after the database is set up and migrations are applied.
+// This is a workaround for behavior in PocketBase where onBootstrap runs before migrations, forcing use of onServe for this purpose.
+// However, PB's tests.TestApp is already bootstrapped, generally doesn't serve, but does handle migrations.
+// So this ensures that the provided function runs at the right time either way, after DB is ready and migrations are done.
+func onAfterBootstrapAndMigrations(app core.App, fn func(app core.App) error) error {
+	// pb tests.TestApp is already bootstrapped and doesn't serve
+	if app.IsBootstrapped() {
+		return fn(app)
+	}
+	// Must use OnServe because OnBootstrap appears to run before migrations, even if calling e.Next() before anything else
+	app.OnServe().BindFunc(func(e *core.ServeEvent) error {
+		if err := fn(e.App); err != nil {
 			return err
 		}
+		return e.Next()
+	})
+	return nil
+}
+
+// StartHub sets up event handlers and starts the PocketBase server
+func (h *Hub) StartHub() error {
+	h.App.OnServe().BindFunc(func(e *core.ServeEvent) error {
 		// sync systems with config
 		if err := config.SyncSystems(e); err != nil {
 			return err
@@ -110,132 +126,29 @@ func (h *Hub) StartHub() error {
 	h.App.OnRecordCreate("users").BindFunc(h.um.InitializeUserRole)
 	h.App.OnRecordCreate("user_settings").BindFunc(h.um.InitializeUserSettings)

-	if pb, ok := h.App.(*pocketbase.PocketBase); ok {
-		// log.Println("Starting pocketbase")
-		err := pb.Start()
-		if err != nil {
-			return err
-		}
+	pb, ok := h.App.(*pocketbase.PocketBase)
+	if !ok {
+		return errors.New("not a pocketbase app")
 	}
-
-	return nil
+	return pb.Start()
 }

 // initialize sets up initial configuration (collections, settings, etc.)
-func (h *Hub) initialize(e *core.ServeEvent) error {
+func (h *Hub) initialize(app core.App) error {
 	// set general settings
-	settings := e.App.Settings()
-	// batch requests (for global alerts)
+	settings := app.Settings()
+	// batch requests (for alerts)
 	settings.Batch.Enabled = true
-	// set URL if BASE_URL env is set
-	if h.appURL != "" {
-		settings.Meta.AppURL = h.appURL
+	// set URL if APP_URL env is set
+	if appURL, isSet := GetEnv("APP_URL"); isSet {
+		h.appURL = appURL
+		settings.Meta.AppURL = appURL
 	}
-	if err := e.App.Save(settings); err != nil {
+	if err := app.Save(settings); err != nil {
 		return err
 	}
 	// set auth settings
-	if err := setCollectionAuthSettings(e.App); err != nil {
-		return err
-	}
-	return nil
-}
-
-// setCollectionAuthSettings sets up default authentication settings for the app
-func setCollectionAuthSettings(app core.App) error {
-	usersCollection, err := app.FindCollectionByNameOrId("users")
-	if err != nil {
-		return err
-	}
-	superusersCollection, err := app.FindCollectionByNameOrId(core.CollectionNameSuperusers)
-	if err != nil {
-		return err
-	}
-
-	// disable email auth if DISABLE_PASSWORD_AUTH env var is set
-	disablePasswordAuth, _ := GetEnv("DISABLE_PASSWORD_AUTH")
-	usersCollection.PasswordAuth.Enabled = disablePasswordAuth != "true"
-	usersCollection.PasswordAuth.IdentityFields = []string{"email"}
-	// allow oauth user creation if USER_CREATION is set
-	if userCreation, _ := GetEnv("USER_CREATION"); userCreation == "true" {
-		cr := "@request.context = 'oauth2'"
-		usersCollection.CreateRule = &cr
-	} else {
-		usersCollection.CreateRule = nil
-	}
-
-	// enable mfaOtp mfa if MFA_OTP env var is set
-	mfaOtp, _ := GetEnv("MFA_OTP")
-	usersCollection.OTP.Length = 6
-	superusersCollection.OTP.Length = 6
-	usersCollection.OTP.Enabled = mfaOtp == "true"
-	usersCollection.MFA.Enabled = mfaOtp == "true"
-	superusersCollection.OTP.Enabled = mfaOtp == "true" || mfaOtp == "superusers"
-	superusersCollection.MFA.Enabled = mfaOtp == "true" || mfaOtp == "superusers"
-	if err := app.Save(superusersCollection); err != nil {
-		return err
-	}
-	if err := app.Save(usersCollection); err != nil {
-		return err
-	}
-
-	shareAllSystems, _ := GetEnv("SHARE_ALL_SYSTEMS")
-
-	// allow all users to access systems if SHARE_ALL_SYSTEMS is set
-	systemsCollection, err := app.FindCollectionByNameOrId("systems")
-	if err != nil {
-		return err
-	}
-	var systemsReadRule string
-	if shareAllSystems == "true" {
-		systemsReadRule = "@request.auth.id != \"\""
-	} else {
-		systemsReadRule = "@request.auth.id != \"\" && users.id ?= @request.auth.id"
-	}
-	updateDeleteRule := systemsReadRule + " && @request.auth.role != \"readonly\""
-	systemsCollection.ListRule = &systemsReadRule
-	systemsCollection.ViewRule = &systemsReadRule
-	systemsCollection.UpdateRule = &updateDeleteRule
-	systemsCollection.DeleteRule = &updateDeleteRule
-	if err := app.Save(systemsCollection); err != nil {
-		return err
-	}
-
-	// allow all users to access all containers if SHARE_ALL_SYSTEMS is set
-	containersCollection, err := app.FindCollectionByNameOrId("containers")
-	if err != nil {
-		return err
-	}
-	containersListRule := strings.Replace(systemsReadRule, "users.id", "system.users.id", 1)
-	containersCollection.ListRule = &containersListRule
-	if err := app.Save(containersCollection); err != nil {
-		return err
-	}
-
-	// allow all users to access system-related collections if SHARE_ALL_SYSTEMS is set
-	// these collections all have a "system" relation field
-	systemRelatedCollections := []string{"system_details", "smart_devices", "systemd_services"}
-	for _, collectionName := range systemRelatedCollections {
-		collection, err := app.FindCollectionByNameOrId(collectionName)
-		if err != nil {
-			return err
-		}
-		collection.ListRule = &containersListRule
-		// set viewRule for collections that need it (system_details, smart_devices)
-		if collection.ViewRule != nil {
-			collection.ViewRule = &containersListRule
-		}
-		// set deleteRule for smart_devices (allows user to dismiss disk warnings)
-		if collectionName == "smart_devices" {
-			deleteRule := containersListRule + " && @request.auth.role != \"readonly\""
-			collection.DeleteRule = &deleteRule
-		}
-		if err := app.Save(collection); err != nil {
-			return err
-		}
-	}
-
-	return nil
+	return setCollectionAuthSettings(app)
 }

 // registerCronJobs sets up scheduled tasks
@@ -247,7 +160,7 @@ func (h *Hub) registerCronJobs(_ *core.ServeEvent) error {
 	return nil
 }

-// custom middlewares
+// registerMiddlewares registers custom middlewares
 func (h *Hub) registerMiddlewares(se *core.ServeEvent) {
 	// authorizes request with user matching the provided email
 	authorizeRequestWithEmail := func(e *core.RequestEvent, email string) (err error) {
@@ -278,7 +191,7 @@ func (h *Hub) registerMiddlewares(se *core.ServeEvent) {
 	}
 }

-// custom api routes
+// registerApiRoutes registers custom API routes
 func (h *Hub) registerApiRoutes(se *core.ServeEvent) error {
 	// auth protected routes
 	apiAuth := se.Router.Group("/api/beszel")
@@ -327,7 +240,7 @@ func (h *Hub) registerApiRoutes(se *core.ServeEvent) error {
 	return nil
 }

-// Handler for universal token API endpoint (create, read, delete)
+// GetUniversalToken handles the universal token API endpoint (create, read, delete)
 func (h *Hub) getUniversalToken(e *core.RequestEvent) error {
 	tokenMap := universalTokenMap.GetMap()
 	userID := e.Auth.Id
@@ -536,7 +449,7 @@ func (h *Hub) refreshSmartData(e *core.RequestEvent) error {
 	return e.JSON(http.StatusOK, map[string]string{"status": "ok"})
 }

-// generates key pair if it doesn't exist and returns signer
+// GetSSHKey generates key pair if it doesn't exist and returns signer
 func (h *Hub) GetSSHKey(dataDir string) (ssh.Signer, error) {
 	if h.signer != nil {
 		return h.signer, nil
--- a/internal/hub/hub_test.go
+++ b/internal/hub/hub_test.go
@@ -733,10 +733,8 @@ func TestFirstUserCreation(t *testing.T) {
 	})

 	t.Run("CreateUserEndpoint not available when USER_EMAIL, USER_PASSWORD are set", func(t *testing.T) {
-		os.Setenv("BESZEL_HUB_USER_EMAIL", "me@example.com")
-		os.Setenv("BESZEL_HUB_USER_PASSWORD", "password123")
-		defer os.Unsetenv("BESZEL_HUB_USER_EMAIL")
-		defer os.Unsetenv("BESZEL_HUB_USER_PASSWORD")
+		t.Setenv("BESZEL_HUB_USER_EMAIL", "me@example.com")
+		t.Setenv("BESZEL_HUB_USER_PASSWORD", "password123")

 		hub, _ := beszelTests.NewTestHub(t.TempDir())
 		defer hub.Cleanup()
@@ -852,13 +850,12 @@ func TestAutoLoginMiddleware(t *testing.T) {
 	var hubs []*beszelTests.TestHub

 	defer func() {
-		defer os.Unsetenv("AUTO_LOGIN")
 		for _, hub := range hubs {
 			hub.Cleanup()
 		}
 	}()

-	os.Setenv("AUTO_LOGIN", "user@test.com")
+	t.Setenv("AUTO_LOGIN", "user@test.com")

 	testAppFactory := func(t testing.TB) *pbTests.TestApp {
 		hub, _ := beszelTests.NewTestHub(t.TempDir())
@@ -906,13 +903,12 @@ func TestTrustedHeaderMiddleware(t *testing.T) {
 	var hubs []*beszelTests.TestHub

 	defer func() {
-		defer os.Unsetenv("TRUSTED_AUTH_HEADER")
 		for _, hub := range hubs {
 			hub.Cleanup()
 		}
 	}()

-	os.Setenv("TRUSTED_AUTH_HEADER", "X-Beszel-Trusted")
+	t.Setenv("TRUSTED_AUTH_HEADER", "X-Beszel-Trusted")

 	testAppFactory := func(t testing.TB) *pbTests.TestApp {
 		hub, _ := beszelTests.NewTestHub(t.TempDir())
@@ -961,3 +957,21 @@ func TestTrustedHeaderMiddleware(t *testing.T) {
 		scenario.Test(t)
 	}
 }
+
+func TestAppUrl(t *testing.T) {
+	t.Run("no APP_URL does't change app url", func(t *testing.T) {
+		hub, _ := beszelTests.NewTestHub(t.TempDir())
+		defer hub.Cleanup()
+
+		settings := hub.Settings()
+		assert.Equal(t, "http://localhost:8090", settings.Meta.AppURL)
+	})
+	t.Run("APP_URL changes app url", func(t *testing.T) {
+		t.Setenv("APP_URL", "http://example.com/app")
+		hub, _ := beszelTests.NewTestHub(t.TempDir())
+		defer hub.Cleanup()
+
+		settings := hub.Settings()
+		assert.Equal(t, "http://example.com/app", settings.Meta.AppURL)
+	})
+}
--- a/internal/hub/hub_test_helpers.go
+++ b/internal/hub/hub_test_helpers.go
@@ -2,7 +2,9 @@

 package hub

-import "github.com/henrygd/beszel/internal/hub/systems"
+import (
+	"github.com/henrygd/beszel/internal/hub/systems"
+)

 // TESTING ONLY: GetSystemManager returns the system manager
 func (h *Hub) GetSystemManager() *systems.SystemManager {
@@ -18,3 +20,7 @@ func (h *Hub) GetPubkey() string {
 func (h *Hub) SetPubkey(pubkey string) {
 	h.pubKey = pubkey
 }
+
+func (h *Hub) SetCollectionAuthSettings() error {
+	return setCollectionAuthSettings(h)
+}
--- a/internal/hub/systems/system.go
+++ b/internal/hub/systems/system.go
@@ -48,7 +48,6 @@ type System struct {
 	detailsFetched atomic.Bool             // True if static system details have been fetched and saved
 	smartFetching  atomic.Bool             // True if SMART devices are currently being fetched
 	smartInterval  time.Duration           // Interval for periodic SMART data updates
-	lastSmartFetch atomic.Int64            // Unix milliseconds of last SMART data fetch
 }

 func (sm *SystemManager) NewSystem(systemId string) *System {
@@ -134,19 +133,34 @@ func (sys *System) update() error {
 		return err
 	}

+	// ensure deprecated fields from older agents are migrated to current fields
+	migrateDeprecatedFields(data, !sys.detailsFetched.Load())
+
 	// create system records
 	_, err = sys.createRecords(data)

+	// if details were included and fetched successfully, mark details as fetched and update smart interval if set by agent
+	if err == nil && data.Details != nil {
+		sys.detailsFetched.Store(true)
+		// update smart interval if it's set on the agent side
+		if data.Details.SmartInterval > 0 {
+			sys.smartInterval = data.Details.SmartInterval
+			// make sure we reset expiration of lastFetch to remain as long as the new smart interval
+			// to prevent premature expiration leading to new fetch if interval is different.
+			sys.manager.smartFetchMap.UpdateExpiration(sys.Id, sys.smartInterval+time.Minute)
+		}
+	}
+
 	// Fetch and save SMART devices when system first comes online or at intervals
-	if backgroundSmartFetchEnabled() {
+	if backgroundSmartFetchEnabled() && sys.detailsFetched.Load() {
 		if sys.smartInterval <= 0 {
 			sys.smartInterval = time.Hour
 		}
-		lastFetch := sys.lastSmartFetch.Load()
-		if time.Since(time.UnixMilli(lastFetch)) >= sys.smartInterval && sys.smartFetching.CompareAndSwap(false, true) {
+		lastFetch, _ := sys.manager.smartFetchMap.GetOk(sys.Id)
+		if time.Since(time.UnixMilli(lastFetch-1e4)) >= sys.smartInterval && sys.smartFetching.CompareAndSwap(false, true) {
 			go func() {
 				defer sys.smartFetching.Store(false)
-				sys.lastSmartFetch.Store(time.Now().UnixMilli())
+				sys.manager.smartFetchMap.Set(sys.Id, time.Now().UnixMilli(), sys.smartInterval+time.Minute)
 				_ = sys.FetchAndSaveSmartDevices()
 			}()
 		}
@@ -221,11 +235,6 @@ func (sys *System) createRecords(data *system.CombinedData) (*core.Record, error
 			if err := createSystemDetailsRecord(txApp, data.Details, sys.Id); err != nil {
 				return err
 			}
-			sys.detailsFetched.Store(true)
-			// update smart interval if it's set on the agent side
-			if data.Details.SmartInterval > 0 {
-				sys.smartInterval = data.Details.SmartInterval
-			}
 		}

 		// update system record (do this last because it triggers alerts and we need above records to be inserted first)
@@ -309,10 +318,11 @@ func createContainerRecords(app core.App, data []*container.Stats, systemId stri
 	valueStrings := make([]string, 0, len(data))
 	for i, container := range data {
 		suffix := fmt.Sprintf("%d", i)
-		valueStrings = append(valueStrings, fmt.Sprintf("({:id%[1]s}, {:system}, {:name%[1]s}, {:image%[1]s}, {:status%[1]s}, {:health%[1]s}, {:cpu%[1]s}, {:memory%[1]s}, {:net%[1]s}, {:updated})", suffix))
+		valueStrings = append(valueStrings, fmt.Sprintf("({:id%[1]s}, {:system}, {:name%[1]s}, {:image%[1]s}, {:ports%[1]s}, {:status%[1]s}, {:health%[1]s}, {:cpu%[1]s}, {:memory%[1]s}, {:net%[1]s}, {:updated})", suffix))
 		params["id"+suffix] = container.Id
 		params["name"+suffix] = container.Name
 		params["image"+suffix] = container.Image
+		params["ports"+suffix] = container.Ports
 		params["status"+suffix] = container.Status
 		params["health"+suffix] = container.Health
 		params["cpu"+suffix] = container.Cpu
@@ -324,7 +334,7 @@ func createContainerRecords(app core.App, data []*container.Stats, systemId stri
 		params["net"+suffix] = netBytes
 	}
 	queryString := fmt.Sprintf(
-		"INSERT INTO containers (id, system, name, image, status, health, cpu, memory, net, updated) VALUES %s ON CONFLICT(id) DO UPDATE SET system = excluded.system, name = excluded.name, image = excluded.image, status = excluded.status, health = excluded.health, cpu = excluded.cpu, memory = excluded.memory, net = excluded.net, updated = excluded.updated",
+		"INSERT INTO containers (id, system, name, image, ports, status, health, cpu, memory, net, updated) VALUES %s ON CONFLICT(id) DO UPDATE SET system = excluded.system, name = excluded.name, image = excluded.image, ports = excluded.ports, status = excluded.status, health = excluded.health, cpu = excluded.cpu, memory = excluded.memory, net = excluded.net, updated = excluded.updated",
 		strings.Join(valueStrings, ","),
 	)
 	_, err := app.DB().NewQuery(queryString).Bind(params).Execute()
@@ -703,3 +713,50 @@ func getJitter() <-chan time.Time {
 	msDelay := (interval * minPercent / 100) + rand.Intn(interval*jitterRange/100)
 	return time.After(time.Duration(msDelay) * time.Millisecond)
 }
+
+// migrateDeprecatedFields moves values from deprecated fields to their new locations if the new
+// fields are not already populated. Deprecated fields and refs may be removed at least 30 days
+// and one minor version release after the release that includes the migration.
+//
+// This is run when processing incoming system data from agents, which may be on older versions.
+func migrateDeprecatedFields(cd *system.CombinedData, createDetails bool) {
+	// migration added 0.19.0
+	if cd.Stats.Bandwidth[0] == 0 && cd.Stats.Bandwidth[1] == 0 {
+		cd.Stats.Bandwidth[0] = uint64(cd.Stats.NetworkSent * 1024 * 1024)
+		cd.Stats.Bandwidth[1] = uint64(cd.Stats.NetworkRecv * 1024 * 1024)
+		cd.Stats.NetworkSent, cd.Stats.NetworkRecv = 0, 0
+	}
+	// migration added 0.19.0
+	if cd.Info.BandwidthBytes == 0 {
+		cd.Info.BandwidthBytes = uint64(cd.Info.Bandwidth * 1024 * 1024)
+		cd.Info.Bandwidth = 0
+	}
+	// migration added 0.19.0
+	if cd.Stats.DiskIO[0] == 0 && cd.Stats.DiskIO[1] == 0 {
+		cd.Stats.DiskIO[0] = uint64(cd.Stats.DiskReadPs * 1024 * 1024)
+		cd.Stats.DiskIO[1] = uint64(cd.Stats.DiskWritePs * 1024 * 1024)
+		cd.Stats.DiskReadPs, cd.Stats.DiskWritePs = 0, 0
+	}
+	// migration added 0.19.0 - Move deprecated Info fields to Details struct
+	if cd.Details == nil && cd.Info.Hostname != "" {
+		if createDetails {
+			cd.Details = &system.Details{
+				Hostname:    cd.Info.Hostname,
+				Kernel:      cd.Info.KernelVersion,
+				Cores:       cd.Info.Cores,
+				Threads:     cd.Info.Threads,
+				CpuModel:    cd.Info.CpuModel,
+				Podman:      cd.Info.Podman,
+				Os:          cd.Info.Os,
+				MemoryTotal: uint64(cd.Stats.Mem * 1024 * 1024 * 1024),
+			}
+		}
+		// zero the deprecated fields to prevent saving them in systems.info DB json payload
+		cd.Info.Hostname = ""
+		cd.Info.KernelVersion = ""
+		cd.Info.Cores = 0
+		cd.Info.CpuModel = ""
+		cd.Info.Podman = false
+		cd.Info.Os = 0
+	}
+}
--- a/internal/hub/systems/system_manager.go
+++ b/internal/hub/systems/system_manager.go
@@ -8,6 +8,7 @@ import (
 	"github.com/henrygd/beszel/internal/hub/ws"

 	"github.com/henrygd/beszel/internal/entities/system"
+	"github.com/henrygd/beszel/internal/hub/expirymap"

 	"github.com/henrygd/beszel/internal/common"

@@ -40,9 +41,10 @@ var errSystemExists = errors.New("system exists")
 // SystemManager manages a collection of monitored systems and their connections.
 // It handles system lifecycle, status updates, and maintains both SSH and WebSocket connections.
 type SystemManager struct {
-	hub       hubLike                       // Hub interface for database and alert operations
-	systems   *store.Store[string, *System] // Thread-safe store of active systems
-	sshConfig *ssh.ClientConfig             // SSH client configuration for system connections
+	hub           hubLike                       // Hub interface for database and alert operations
+	systems       *store.Store[string, *System] // Thread-safe store of active systems
+	sshConfig     *ssh.ClientConfig             // SSH client configuration for system connections
+	smartFetchMap *expirymap.ExpiryMap[int64]   // Stores last SMART fetch time per system ID
 }

 // hubLike defines the interface requirements for the hub dependency.
@@ -58,8 +60,9 @@ type hubLike interface {
 // The hub must implement the hubLike interface to provide database and alert functionality.
 func NewSystemManager(hub hubLike) *SystemManager {
 	return &SystemManager{
-		systems: store.New(map[string]*System{}),
-		hub:     hub,
+		systems:       store.New(map[string]*System{}),
+		hub:           hub,
+		smartFetchMap: expirymap.New[int64](time.Hour),
 	}
 }

--- a/internal/hub/systems/system_test.go
+++ b/internal/hub/systems/system_test.go
@@ -0,0 +1,159 @@
+//go:build testing
+
+package systems
+
+import (
+	"testing"
+
+	"github.com/henrygd/beszel/internal/entities/system"
+)
+
+func TestCombinedData_MigrateDeprecatedFields(t *testing.T) {
+	t.Run("Migrate NetworkSent and NetworkRecv to Bandwidth", func(t *testing.T) {
+		cd := &system.CombinedData{
+			Stats: system.Stats{
+				NetworkSent: 1.5, // 1.5 MB
+				NetworkRecv: 2.5, // 2.5 MB
+			},
+		}
+		migrateDeprecatedFields(cd, true)
+
+		expectedSent := uint64(1.5 * 1024 * 1024)
+		expectedRecv := uint64(2.5 * 1024 * 1024)
+
+		if cd.Stats.Bandwidth[0] != expectedSent {
+			t.Errorf("expected Bandwidth[0] %d, got %d", expectedSent, cd.Stats.Bandwidth[0])
+		}
+		if cd.Stats.Bandwidth[1] != expectedRecv {
+			t.Errorf("expected Bandwidth[1] %d, got %d", expectedRecv, cd.Stats.Bandwidth[1])
+		}
+		if cd.Stats.NetworkSent != 0 || cd.Stats.NetworkRecv != 0 {
+			t.Errorf("expected NetworkSent and NetworkRecv to be reset, got %f, %f", cd.Stats.NetworkSent, cd.Stats.NetworkRecv)
+		}
+	})
+
+	t.Run("Migrate Info.Bandwidth to Info.BandwidthBytes", func(t *testing.T) {
+		cd := &system.CombinedData{
+			Info: system.Info{
+				Bandwidth: 10.0, // 10 MB
+			},
+		}
+		migrateDeprecatedFields(cd, true)
+
+		expected := uint64(10 * 1024 * 1024)
+		if cd.Info.BandwidthBytes != expected {
+			t.Errorf("expected BandwidthBytes %d, got %d", expected, cd.Info.BandwidthBytes)
+		}
+		if cd.Info.Bandwidth != 0 {
+			t.Errorf("expected Info.Bandwidth to be reset, got %f", cd.Info.Bandwidth)
+		}
+	})
+
+	t.Run("Migrate DiskReadPs and DiskWritePs to DiskIO", func(t *testing.T) {
+		cd := &system.CombinedData{
+			Stats: system.Stats{
+				DiskReadPs:  3.0, // 3 MB
+				DiskWritePs: 4.0, // 4 MB
+			},
+		}
+		migrateDeprecatedFields(cd, true)
+
+		expectedRead := uint64(3 * 1024 * 1024)
+		expectedWrite := uint64(4 * 1024 * 1024)
+
+		if cd.Stats.DiskIO[0] != expectedRead {
+			t.Errorf("expected DiskIO[0] %d, got %d", expectedRead, cd.Stats.DiskIO[0])
+		}
+		if cd.Stats.DiskIO[1] != expectedWrite {
+			t.Errorf("expected DiskIO[1] %d, got %d", expectedWrite, cd.Stats.DiskIO[1])
+		}
+		if cd.Stats.DiskReadPs != 0 || cd.Stats.DiskWritePs != 0 {
+			t.Errorf("expected DiskReadPs and DiskWritePs to be reset, got %f, %f", cd.Stats.DiskReadPs, cd.Stats.DiskWritePs)
+		}
+	})
+
+	t.Run("Migrate Info fields to Details struct", func(t *testing.T) {
+		cd := &system.CombinedData{
+			Stats: system.Stats{
+				Mem: 16.0, // 16 GB
+			},
+			Info: system.Info{
+				Hostname:      "test-host",
+				KernelVersion: "6.8.0",
+				Cores:         8,
+				Threads:       16,
+				CpuModel:      "Intel i7",
+				Podman:        true,
+				Os:            system.Linux,
+			},
+		}
+		migrateDeprecatedFields(cd, true)
+
+		if cd.Details == nil {
+			t.Fatal("expected Details struct to be created")
+		}
+		if cd.Details.Hostname != "test-host" {
+			t.Errorf("expected Hostname 'test-host', got '%s'", cd.Details.Hostname)
+		}
+		if cd.Details.Kernel != "6.8.0" {
+			t.Errorf("expected Kernel '6.8.0', got '%s'", cd.Details.Kernel)
+		}
+		if cd.Details.Cores != 8 {
+			t.Errorf("expected Cores 8, got %d", cd.Details.Cores)
+		}
+		if cd.Details.Threads != 16 {
+			t.Errorf("expected Threads 16, got %d", cd.Details.Threads)
+		}
+		if cd.Details.CpuModel != "Intel i7" {
+			t.Errorf("expected CpuModel 'Intel i7', got '%s'", cd.Details.CpuModel)
+		}
+		if cd.Details.Podman != true {
+			t.Errorf("expected Podman true, got %v", cd.Details.Podman)
+		}
+		if cd.Details.Os != system.Linux {
+			t.Errorf("expected Os Linux, got %d", cd.Details.Os)
+		}
+		expectedMem := uint64(16 * 1024 * 1024 * 1024)
+		if cd.Details.MemoryTotal != expectedMem {
+			t.Errorf("expected MemoryTotal %d, got %d", expectedMem, cd.Details.MemoryTotal)
+		}
+
+		if cd.Info.Hostname != "" || cd.Info.KernelVersion != "" || cd.Info.Cores != 0 || cd.Info.CpuModel != "" || cd.Info.Podman != false || cd.Info.Os != 0 {
+			t.Errorf("expected Info fields to be reset, got %+v", cd.Info)
+		}
+	})
+
+	t.Run("Do not migrate if Details already exists", func(t *testing.T) {
+		cd := &system.CombinedData{
+			Details: &system.Details{Hostname: "existing-host"},
+			Info: system.Info{
+				Hostname: "deprecated-host",
+			},
+		}
+		migrateDeprecatedFields(cd, true)
+
+		if cd.Details.Hostname != "existing-host" {
+			t.Errorf("expected Hostname 'existing-host', got '%s'", cd.Details.Hostname)
+		}
+		if cd.Info.Hostname != "deprecated-host" {
+			t.Errorf("expected Info.Hostname to remain 'deprecated-host', got '%s'", cd.Info.Hostname)
+		}
+	})
+
+	t.Run("Do not create details if migrateDetails is false", func(t *testing.T) {
+		cd := &system.CombinedData{
+			Info: system.Info{
+				Hostname: "deprecated-host",
+			},
+		}
+		migrateDeprecatedFields(cd, false)
+
+		if cd.Details != nil {
+			t.Fatal("expected Details struct to not be created")
+		}
+
+		if cd.Info.Hostname != "" {
+			t.Errorf("expected Info.Hostname to be reset, got '%s'", cd.Info.Hostname)
+		}
+	})
+}
--- a/internal/hub/systems/systems_test_helpers.go
+++ b/internal/hub/systems/systems_test_helpers.go
@@ -7,6 +7,7 @@ import (
 	"fmt"

 	entities "github.com/henrygd/beszel/internal/entities/system"
+	"github.com/pocketbase/pocketbase/core"
 )

 // The hub integration tests create/replace systems and cleanup the test apps quickly.
@@ -113,4 +114,14 @@ func (sm *SystemManager) RemoveAllSystems() {
 	for _, system := range sm.systems.GetAll() {
 		sm.RemoveSystem(system.Id)
 	}
+	sm.smartFetchMap.StopCleaner()
+}
+
+func (s *System) StopUpdater() {
+	s.cancel()
+}
+
+func (s *System) CreateRecords(data *entities.CombinedData) (*core.Record, error) {
+	s.data = data
+	return s.createRecords(data)
 }
--- a/internal/migrations/0_collections_snapshot_0_19_0_dev_1.go
+++ b/internal/migrations/0_collections_snapshot_0_19_0_dev_1.go
@@ -11,11 +11,11 @@ func init() {
 		jsonData := `[
 	{
 		"id": "elngm8x1l60zi2v",
-		"listRule": "@request.auth.id != \"\" && user.id = @request.auth.id",
-		"viewRule": "",
-		"createRule": "@request.auth.id != \"\" && user.id = @request.auth.id",
-		"updateRule": "@request.auth.id != \"\" && user.id = @request.auth.id",
-		"deleteRule": "@request.auth.id != \"\" && user.id = @request.auth.id",
+		"listRule": "@request.auth.id != \"\" && user = @request.auth.id",
+		"viewRule": null,
+		"createRule": "@request.auth.id != \"\" && user = @request.auth.id",
+		"updateRule": "@request.auth.id != \"\" && user = @request.auth.id",
+		"deleteRule": "@request.auth.id != \"\" && user = @request.auth.id",
 		"name": "alerts",
 		"type": "base",
 		"fields": [
@@ -143,11 +143,11 @@ func init() {
 	},
 	{
 		"id": "pbc_1697146157",
-		"listRule": "@request.auth.id != \"\" && user.id = @request.auth.id",
-		"viewRule": "@request.auth.id != \"\" && user.id = @request.auth.id",
+		"listRule": "@request.auth.id != \"\" && user = @request.auth.id",
+		"viewRule": null,
 		"createRule": null,
 		"updateRule": null,
-		"deleteRule": "@request.auth.id != \"\" && user.id = @request.auth.id",
+		"deleteRule": "@request.auth.id != \"\" && user = @request.auth.id",
 		"name": "alerts_history",
 		"type": "base",
 		"fields": [
@@ -261,7 +261,7 @@ func init() {
 	},
 	{
 		"id": "juohu4jipgc13v7",
-		"listRule": "@request.auth.id != \"\"",
+		"listRule": null,
 		"viewRule": null,
 		"createRule": null,
 		"updateRule": null,
@@ -351,10 +351,10 @@ func init() {
 	},
 	{
 		"id": "pbc_3663931638",
-		"listRule": "@request.auth.id != \"\" && system.users.id ?= @request.auth.id",
-		"viewRule": "@request.auth.id != \"\" && system.users.id ?= @request.auth.id",
-		"createRule": "@request.auth.id != \"\" && system.users.id ?= @request.auth.id && @request.auth.role != \"readonly\"",
-		"updateRule": "@request.auth.id != \"\" && system.users.id ?= @request.auth.id && @request.auth.role != \"readonly\"",
+		"listRule": null,
+		"viewRule": null,
+		"createRule": null,
+		"updateRule": null,
 		"deleteRule": null,
 		"name": "fingerprints",
 		"type": "base",
@@ -433,7 +433,7 @@ func init() {
 	},
 	{
 		"id": "ej9oowivz8b2mht",
-		"listRule": "@request.auth.id != \"\"",
+		"listRule": null,
 		"viewRule": null,
 		"createRule": null,
 		"updateRule": null,
@@ -523,10 +523,10 @@ func init() {
 	},
 	{
 		"id": "4afacsdnlu8q8r2",
-		"listRule": "@request.auth.id != \"\" && user.id = @request.auth.id",
+		"listRule": "@request.auth.id != \"\" && user = @request.auth.id",
 		"viewRule": null,
-		"createRule": "@request.auth.id != \"\" && user.id = @request.auth.id",
-		"updateRule": "@request.auth.id != \"\" && user.id = @request.auth.id",
+		"createRule": "@request.auth.id != \"\" && user = @request.auth.id",
+		"updateRule": "@request.auth.id != \"\" && user = @request.auth.id",
 		"deleteRule": null,
 		"name": "user_settings",
 		"type": "base",
@@ -596,11 +596,11 @@ func init() {
 	},
 	{
 		"id": "2hz5ncl8tizk5nx",
-		"listRule": "@request.auth.id != \"\" && users.id ?= @request.auth.id",
-		"viewRule": "@request.auth.id != \"\" && users.id ?= @request.auth.id",
-		"createRule": "@request.auth.id != \"\" && users.id ?= @request.auth.id && @request.auth.role != \"readonly\"",
-		"updateRule": "@request.auth.id != \"\" && users.id ?= @request.auth.id && @request.auth.role != \"readonly\"",
-		"deleteRule": "@request.auth.id != \"\" && users.id ?= @request.auth.id && @request.auth.role != \"readonly\"",
+		"listRule": null,
+		"viewRule": null,
+		"createRule": null,
+		"updateRule": null,
+		"deleteRule": null,
 		"name": "systems",
 		"type": "base",
 		"fields": [
@@ -866,7 +866,7 @@ func init() {
 	},
 	{
 		"id": "pbc_1864144027",
-		"listRule": "@request.auth.id != \"\" && system.users.id ?= @request.auth.id",
+		"listRule": null,
 		"viewRule": null,
 		"createRule": null,
 		"updateRule": null,
@@ -977,18 +977,6 @@ func init() {
 					"system": false,
 					"type": "number"
 				},
-				{
-					"hidden": false,
-					"id": "number3332085495",
-					"max": null,
-					"min": null,
-					"name": "updated",
-					"onlyInt": true,
-					"presentable": false,
-					"required": true,
-					"system": false,
-					"type": "number"
-				},
 				{
 					"autogeneratePattern": "",
 					"hidden": false,
@@ -1002,6 +990,32 @@ func init() {
 					"required": false,
 					"system": false,
 					"type": "text"
+				},
+				{
+					"autogeneratePattern": "",
+					"hidden": false,
+					"id": "text2308952269",
+					"max": 0,
+					"min": 0,
+					"name": "ports",
+					"pattern": "",
+					"presentable": false,
+					"primaryKey": false,
+					"required": false,
+					"system": false,
+					"type": "text"
+				},
+				{
+					"hidden": false,
+					"id": "number3332085495",
+					"max": null,
+					"min": null,
+					"name": "updated",
+					"onlyInt": true,
+					"presentable": false,
+					"required": true,
+					"system": false,
+					"type": "number"
 				}
 		],
 		"indexes": [
@@ -1145,7 +1159,7 @@ func init() {
 			"CREATE INDEX ` + "`" + `idx_4Z7LuLNdQb` + "`" + ` ON ` + "`" + `systemd_services` + "`" + ` (` + "`" + `system` + "`" + `)",
 			"CREATE INDEX ` + "`" + `idx_pBp1fF837e` + "`" + ` ON ` + "`" + `systemd_services` + "`" + ` (` + "`" + `updated` + "`" + `)"
 		],
-		"listRule": "@request.auth.id != \"\" && system.users.id ?= @request.auth.id",
+		"listRule": null,
 		"name": "systemd_services",
 		"system": false,
 		"type": "base",
@@ -1153,8 +1167,8 @@ func init() {
 		"viewRule": null
 	},
 	{
-		"createRule": "@request.auth.id != \"\" && user.id = @request.auth.id",
-		"deleteRule": "@request.auth.id != \"\" && user.id = @request.auth.id",
+		"createRule": "@request.auth.id != \"\" && user = @request.auth.id",
+		"deleteRule": "@request.auth.id != \"\" && user = @request.auth.id",
 		"fields": [
 			{
 				"autogeneratePattern": "[a-z0-9]{10}",
@@ -1238,16 +1252,16 @@ func init() {
 			"CREATE INDEX ` + "`" + `idx_q0iKnRP9v8` + "`" + ` ON ` + "`" + `quiet_hours` + "`" + ` (\n  ` + "`" + `user` + "`" + `,\n  ` + "`" + `system` + "`" + `\n)",
 			"CREATE INDEX ` + "`" + `idx_6T7ljT7FJd` + "`" + ` ON ` + "`" + `quiet_hours` + "`" + ` (\n  ` + "`" + `type` + "`" + `,\n  ` + "`" + `end` + "`" + `\n)"
 		],
-		"listRule": "@request.auth.id != \"\" && user.id = @request.auth.id",
+		"listRule": "@request.auth.id != \"\" && user = @request.auth.id",
 		"name": "quiet_hours",
 		"system": false,
 		"type": "base",
-		"updateRule": "@request.auth.id != \"\" && user.id = @request.auth.id",
-		"viewRule": "@request.auth.id != \"\" && user.id = @request.auth.id"
+		"updateRule": "@request.auth.id != \"\" && user = @request.auth.id",
+		"viewRule": "@request.auth.id != \"\" && user = @request.auth.id"
 	},
 	{
 		"createRule": null,
-		"deleteRule": "@request.auth.id != \"\" && system.users.id ?= @request.auth.id",
+		"deleteRule": null,
 		"fields": [
 			{
 				"autogeneratePattern": "[a-z0-9]{10}",
@@ -1433,16 +1447,16 @@ func init() {
 		"indexes": [
 			"CREATE INDEX ` + "`" + `idx_DZ9yhvgl44` + "`" + ` ON ` + "`" + `smart_devices` + "`" + ` (` + "`" + `system` + "`" + `)"
 		],
-		"listRule": "@request.auth.id != \"\" && system.users.id ?= @request.auth.id",
+		"listRule": null,
 		"name": "smart_devices",
 		"system": false,
 		"type": "base",
 		"updateRule": null,
-		"viewRule": "@request.auth.id != \"\" && system.users.id ?= @request.auth.id"
+		"viewRule": null
 	},
 	{
-		"createRule": "",
-		"deleteRule": "",
+		"createRule": null,
+		"deleteRule": null,
 		"fields": [
 			{
 				"autogeneratePattern": "[a-z0-9]{15}",
@@ -1611,12 +1625,12 @@ func init() {
 		],
 		"id": "pbc_3116237454",
 		"indexes": [],
-		"listRule": "@request.auth.id != \"\" && system.users.id ?= @request.auth.id",
 		"name": "system_details",
 		"system": false,
 		"type": "base",
-		"updateRule": "",
-		"viewRule": "@request.auth.id != \"\" && system.users.id ?= @request.auth.id"
+		"updateRule": null,
+		"listRule": null,
+		"viewRule": null
 	},
 	{
 		"createRule": null,
--- a/internal/site/src/components/charts/load-average-chart.tsx
+++ b/internal/site/src/components/charts/load-average-chart.tsx
@@ -16,19 +16,16 @@ import { useYAxisWidth } from "./hooks"
 export default memo(function LoadAverageChart({ chartData }: { chartData: ChartData }) {
 	const { yAxisWidth, updateYAxisWidth } = useYAxisWidth()

-	const keys: { legacy: keyof SystemStats; color: string; label: string }[] = [
+	const keys: { color: string; label: string }[] = [
 		{
-			legacy: "l1",
 			color: "hsl(271, 81%, 60%)", // Purple
 			label: t({ message: `1 min`, comment: "Load average" }),
 		},
 		{
-			legacy: "l5",
 			color: "hsl(217, 91%, 60%)", // Blue
 			label: t({ message: `5 min`, comment: "Load average" }),
 		},
 		{
-			legacy: "l15",
 			color: "hsl(25, 95%, 53%)", // Orange
 			label: t({ message: `15 min`, comment: "Load average" }),
 		},
@@ -66,27 +63,18 @@ export default memo(function LoadAverageChart({ chartData }: { chartData: ChartD
 							/>
 						}
 					/>
-					{keys.map(({ legacy, color, label }, i) => {
-						const dataKey = (value: { stats: SystemStats }) => {
-							const { minor, patch } = chartData.agentVersion
-							if (minor <= 12 && patch < 1) {
-								return value.stats?.[legacy]
-							}
-							return value.stats?.la?.[i] ?? value.stats?.[legacy]
-						}
-						return (
-							<Line
-								key={label}
-								dataKey={dataKey}
-								name={label}
-								type="monotoneX"
-								dot={false}
-								strokeWidth={1.5}
-								stroke={color}
-								isAnimationActive={false}
-							/>
-						)
-					})}
+					{keys.map(({ color, label }, i) => (
+						<Line
+							key={label}
+							dataKey={(value: { stats: SystemStats }) => value.stats?.la?.[i]}
+							name={label}
+							type="monotoneX"
+							dot={false}
+							strokeWidth={1.5}
+							stroke={color}
+							isAnimationActive={false}
+						/>
+					))}
 					<ChartLegend content={<ChartLegendContent />} />
 				</LineChart>
 			</ChartContainer>
--- a/internal/site/src/components/containers-table/containers-table-columns.tsx
+++ b/internal/site/src/components/containers-table/containers-table-columns.tsx
@@ -4,7 +4,6 @@ import { cn, decimalString, formatBytes, hourWithSeconds } from "@/lib/utils"
 import type { ContainerRecord } from "@/types"
 import { ContainerHealth, ContainerHealthLabels } from "@/lib/enums"
 import {
-	ArrowUpDownIcon,
 	ClockIcon,
 	ContainerIcon,
 	CpuIcon,
@@ -13,11 +12,12 @@ import {
 	ServerIcon,
 	ShieldCheckIcon,
 } from "lucide-react"
-import { EthernetIcon, HourglassIcon } from "../ui/icons"
+import { EthernetIcon, HourglassIcon, SquareArrowRightEnterIcon } from "../ui/icons"
 import { Badge } from "../ui/badge"
 import { t } from "@lingui/core/macro"
-import { $allSystemsById } from "@/lib/stores"
+import { $allSystemsById, $longestSystemNameLen } from "@/lib/stores"
 import { useStore } from "@nanostores/react"
+import { Tooltip, TooltipContent, TooltipTrigger } from "../ui/tooltip"

 // Unit names and their corresponding number of seconds for converting docker status strings
 const unitSeconds = [
@@ -63,7 +63,12 @@ export const containerChartCols: ColumnDef<ContainerRecord>[] = [
 		header: ({ column }) => <HeaderButton column={column} name={t`System`} Icon={ServerIcon} />,
 		cell: ({ getValue }) => {
 			const allSystems = useStore($allSystemsById)
-			return <span className="ms-1.5 xl:w-34 block truncate">{allSystems[getValue() as string]?.name ?? ""}</span>
+			const longestName = useStore($longestSystemNameLen)
+			return (
+				<div className="ms-1 max-w-40 truncate" style={{ width: `${longestName / 1.05}ch` }}>
+					{allSystems[getValue() as string]?.name ?? ""}
+				</div>
+			)
 		},
 	},
 	// {
@@ -82,7 +87,7 @@ export const containerChartCols: ColumnDef<ContainerRecord>[] = [
 		header: ({ column }) => <HeaderButton column={column} name={t`CPU`} Icon={CpuIcon} />,
 		cell: ({ getValue }) => {
 			const val = getValue() as number
-			return <span className="ms-1.5 tabular-nums">{`${decimalString(val, val >= 10 ? 1 : 2)}%`}</span>
+			return <span className="ms-1 tabular-nums">{`${decimalString(val, val >= 10 ? 1 : 2)}%`}</span>
 		},
 	},
 	{
@@ -94,7 +99,7 @@ export const containerChartCols: ColumnDef<ContainerRecord>[] = [
 			const val = getValue() as number
 			const formatted = formatBytes(val, false, undefined, true)
 			return (
-				<span className="ms-1.5 tabular-nums">{`${decimalString(formatted.value, formatted.value >= 10 ? 1 : 2)} ${formatted.unit}`}</span>
+				<span className="ms-1 tabular-nums">{`${decimalString(formatted.value, formatted.value >= 10 ? 1 : 2)} ${formatted.unit}`}</span>
 			)
 		},
 	},
@@ -103,11 +108,12 @@ export const containerChartCols: ColumnDef<ContainerRecord>[] = [
 		accessorFn: (record) => record.net,
 		invertSorting: true,
 		header: ({ column }) => <HeaderButton column={column} name={t`Net`} Icon={EthernetIcon} />,
+		minSize: 112,
 		cell: ({ getValue }) => {
 			const val = getValue() as number
 			const formatted = formatBytes(val, true, undefined, false)
 			return (
-				<span className="ms-1.5 tabular-nums">{`${decimalString(formatted.value, formatted.value >= 10 ? 1 : 2)} ${formatted.unit}`}</span>
+				<div className="ms-1 tabular-nums">{`${decimalString(formatted.value, formatted.value >= 10 ? 1 : 2)} ${formatted.unit}`}</div>
 			)
 		},
 	},
@@ -116,6 +122,7 @@ export const containerChartCols: ColumnDef<ContainerRecord>[] = [
 		invertSorting: true,
 		accessorFn: (record) => record.health,
 		header: ({ column }) => <HeaderButton column={column} name={t`Health`} Icon={ShieldCheckIcon} />,
+		minSize: 121,
 		cell: ({ getValue }) => {
 			const healthValue = getValue() as number
 			const healthStatus = ContainerHealthLabels[healthValue] || "Unknown"
@@ -134,6 +141,35 @@ export const containerChartCols: ColumnDef<ContainerRecord>[] = [
 			)
 		},
 	},
+	{
+		id: "ports",
+		accessorFn: (record) => record.ports || undefined,
+		header: ({ column }) => (
+			<HeaderButton
+				column={column}
+				name={t({ message: "Ports", context: "Container ports" })}
+				Icon={SquareArrowRightEnterIcon}
+			/>
+		),
+		sortingFn: (a, b) => getPortValue(a.original.ports) - getPortValue(b.original.ports),
+		minSize: 147,
+		cell: ({ getValue }) => {
+			const val = getValue() as string | undefined
+			if (!val) {
+				return <div className="ms-1.5 text-muted-foreground">-</div>
+			}
+			const className = "ms-1 w-27 block truncate tabular-nums"
+			if (val.length > 14) {
+				return (
+					<Tooltip>
+						<TooltipTrigger className={className}>{val}</TooltipTrigger>
+						<TooltipContent>{val}</TooltipContent>
+					</Tooltip>
+				)
+			}
+			return <span className={className}>{val}</span>
+		},
+	},
 	{
 		id: "image",
 		sortingFn: (a, b) => a.original.image.localeCompare(b.original.image),
@@ -142,7 +178,12 @@ export const containerChartCols: ColumnDef<ContainerRecord>[] = [
 			<HeaderButton column={column} name={t({ message: "Image", context: "Docker image" })} Icon={LayersIcon} />
 		),
 		cell: ({ getValue }) => {
-			return <span className="ms-1.5 xl:w-40 block truncate">{getValue() as string}</span>
+			const val = getValue() as string
+			return (
+				<div className="ms-1 xl:w-40 truncate" title={val}>
+					{val}
+				</div>
+			)
 		},
 	},
 	{
@@ -152,7 +193,7 @@ export const containerChartCols: ColumnDef<ContainerRecord>[] = [
 		sortingFn: (a, b) => getStatusValue(a.original.status) - getStatusValue(b.original.status),
 		header: ({ column }) => <HeaderButton column={column} name={t`Status`} Icon={HourglassIcon} />,
 		cell: ({ getValue }) => {
-			return <span className="ms-1.5 w-25 block truncate">{getValue() as string}</span>
+			return <span className="ms-1 w-25 block truncate">{getValue() as string}</span>
 		},
 	},
 	{
@@ -162,7 +203,7 @@ export const containerChartCols: ColumnDef<ContainerRecord>[] = [
 		header: ({ column }) => <HeaderButton column={column} name={t`Updated`} Icon={ClockIcon} />,
 		cell: ({ getValue }) => {
 			const timestamp = getValue() as number
-			return <span className="ms-1.5 tabular-nums">{hourWithSeconds(new Date(timestamp).toISOString())}</span>
+			return <span className="ms-1 tabular-nums">{hourWithSeconds(new Date(timestamp).toISOString())}</span>
 		},
 	},
 ]
@@ -188,7 +229,21 @@ function HeaderButton({
 		>
 			{Icon && <Icon className="size-4" />}
 			{name}
-			<ArrowUpDownIcon className="size-4" />
+			{/* <ArrowUpDownIcon className="size-4" /> */}
 		</Button>
 	)
 }
+
+/**
+ * Convert port string to a number for sorting.
+ * Handles formats like "80", "127.0.0.1:80", and "80, 443" (takes the first mapping).
+ */
+function getPortValue(ports: string | undefined): number {
+	if (!ports) {
+		return 0
+	}
+	const first = ports.includes(",") ? ports.substring(0, ports.indexOf(",")) : ports
+	const colonIndex = first.lastIndexOf(":")
+	const portStr = colonIndex === -1 ? first : first.substring(colonIndex + 1)
+	return Number(portStr) || 0
+}
--- a/internal/site/src/components/containers-table/containers-table.tsx
+++ b/internal/site/src/components/containers-table/containers-table.tsx
@@ -1,3 +1,4 @@
+/** biome-ignore-all lint/security/noDangerouslySetInnerHtml: html comes directly from docker via agent */
 import { t } from "@lingui/core/macro"
 import { Trans } from "@lingui/react/macro"
 import {
@@ -13,7 +14,7 @@ import {
 	type VisibilityState,
 } from "@tanstack/react-table"
 import { useVirtualizer, type VirtualItem } from "@tanstack/react-virtual"
-import { memo, RefObject, useEffect, useRef, useState } from "react"
+import { memo, type RefObject, useEffect, useRef, useState } from "react"
 import { Input } from "@/components/ui/input"
 import { TableBody, TableCell, TableHead, TableHeader, TableRow } from "@/components/ui/table"
 import { pb } from "@/lib/api"
@@ -44,6 +45,20 @@ export default function ContainersTable({ systemId }: { systemId?: string }) {
 	)
 	const [columnFilters, setColumnFilters] = useState<ColumnFiltersState>([])
 	const [columnVisibility, setColumnVisibility] = useState<VisibilityState>({})
+
+	// Hide ports column if no ports are present
+	useEffect(() => {
+		if (data) {
+			const hasPorts = data.some((container) => container.ports)
+			setColumnVisibility((prev) => {
+				if (prev.ports === hasPorts) {
+					return prev
+				}
+				return { ...prev, ports: hasPorts }
+			})
+		}
+	}, [data])
+
 	const [rowSelection, setRowSelection] = useState({})
 	const [globalFilter, setGlobalFilter] = useState("")

@@ -51,7 +66,7 @@ export default function ContainersTable({ systemId }: { systemId?: string }) {
 		function fetchData(systemId?: string) {
 			pb.collection<ContainerRecord>("containers")
 				.getList(0, 2000, {
-					fields: "id,name,image,cpu,memory,net,health,status,system,updated",
+					fields: "id,name,image,ports,cpu,memory,net,health,status,system,updated",
 					filter: systemId ? pb.filter("system={:system}", { system: systemId }) : undefined,
 				})
 				.then(({ items }) => {
@@ -67,7 +82,7 @@ export default function ContainersTable({ systemId }: { systemId?: string }) {
 					setData((curItems) => {
 						const lastUpdated = Math.max(items[0].updated, items.at(-1)?.updated ?? 0)
 						const containerIds = new Set()
-						const newItems = []
+						const newItems: ContainerRecord[] = []
 						for (const item of items) {
 							if (Math.abs(lastUpdated - item.updated) < 70_000) {
 								containerIds.add(item.id)
@@ -134,7 +149,8 @@ export default function ContainersTable({ systemId }: { systemId?: string }) {
 			const status = container.status ?? ""
 			const healthLabel = ContainerHealthLabels[container.health as ContainerHealth] ?? ""
 			const image = container.image ?? ""
-			const searchString = `${systemName} ${id} ${name} ${healthLabel} ${status} ${image}`.toLowerCase()
+			const ports = container.ports ?? ""
+			const searchString = `${systemName} ${id} ${name} ${healthLabel} ${status} ${image} ${ports}`.toLowerCase()

 			return (filterValue as string)
 				.toLowerCase()
@@ -300,9 +316,6 @@ function ContainerSheet({
 	setSheetOpen: (open: boolean) => void
 	activeContainer: RefObject<ContainerRecord | null>
 }) {
-	const container = activeContainer.current
-	if (!container) return null
-
 	const [logsDisplay, setLogsDisplay] = useState<string>("")
 	const [infoDisplay, setInfoDisplay] = useState<string>("")
 	const [logsFullscreenOpen, setLogsFullscreenOpen] = useState<boolean>(false)
@@ -310,6 +323,8 @@ function ContainerSheet({
 	const [isRefreshingLogs, setIsRefreshingLogs] = useState<boolean>(false)
 	const logsContainerRef = useRef<HTMLDivElement>(null)

+	const container = activeContainer.current
+
 	function scrollLogsToBottom() {
 		if (logsContainerRef.current) {
 			logsContainerRef.current.scrollTo({ top: logsContainerRef.current.scrollHeight })
@@ -317,6 +332,7 @@ function ContainerSheet({
 	}

 	const refreshLogs = async () => {
+		if (!container) return
 		setIsRefreshingLogs(true)
 		const startTime = Date.now()

@@ -348,6 +364,8 @@ function ContainerSheet({
 		})()
 	}, [container])

+	if (!container) return null
+
 	return (
 		<>
 			<LogsFullscreenDialog
@@ -378,8 +396,14 @@ function ContainerSheet({
 							{container.image}
 							<Separator orientation="vertical" className="h-2.5 bg-muted-foreground opacity-70" />
 							{container.id}
-							<Separator orientation="vertical" className="h-2.5 bg-muted-foreground opacity-70" />
-							{ContainerHealthLabels[container.health as ContainerHealth]}
+							{/* {container.ports && (
+								<>
+									<Separator orientation="vertical" className="h-2.5 bg-muted-foreground opacity-70" />
+									{container.ports}
+								</>
+							)} */}
+							{/* <Separator orientation="vertical" className="h-2.5 bg-muted-foreground opacity-70" />
+							{ContainerHealthLabels[container.health as ContainerHealth]} */}
 						</SheetDescription>
 					</SheetHeader>
 					<div className="px-3 pb-3 -mt-4 flex flex-col gap-3 h-full items-start">
@@ -438,11 +462,12 @@ function ContainerSheet({
 function ContainersTableHead({ table }: { table: TableType<ContainerRecord> }) {
 	return (
 		<TableHeader className="sticky top-0 z-50 w-full border-b-2">
+			<div className="absolute -top-2 left-0 w-full h-4 bg-table-header z-50"></div>
 			{table.getHeaderGroups().map((headerGroup) => (
 				<tr key={headerGroup.id}>
 					{headerGroup.headers.map((header) => {
 						return (
-							<TableHead className="px-2" key={header.id}>
+							<TableHead className="px-2" key={header.id} style={{ width: header.getSize() }}>
 								{header.isPlaceholder ? null : flexRender(header.column.columnDef.header, header.getContext())}
 							</TableHead>
 						)
@@ -474,6 +499,7 @@ const ContainerTableRow = memo(function ContainerTableRow({
 					className="py-0 ps-4.5"
 					style={{
 						height: virtualRow.size,
+						width: cell.column.getSize(),
 					}}
 				>
 					{flexRender(cell.column.columnDef.cell, cell.getContext())}
--- a/internal/site/src/components/routes/settings/general.tsx
+++ b/internal/site/src/components/routes/settings/general.tsx
@@ -12,7 +12,7 @@ import Slider from "@/components/ui/slider"
 import { HourFormat, Unit } from "@/lib/enums"
 import { dynamicActivate } from "@/lib/i18n"
 import languages from "@/lib/languages"
-import { $userSettings } from "@/lib/stores"
+import { $userSettings, defaultLayoutWidth } from "@/lib/stores"
 import { chartTimeData, currentHour12 } from "@/lib/utils"
 import type { UserSettings } from "@/types"
 import { saveSettings } from "./layout"
@@ -21,7 +21,7 @@ export default function SettingsProfilePage({ userSettings }: { userSettings: Us
 	const [isLoading, setIsLoading] = useState(false)
 	const { i18n } = useLingui()
 	const currentUserSettings = useStore($userSettings)
-	const layoutWidth = currentUserSettings.layoutWidth ?? 1500
+	const layoutWidth = currentUserSettings.layoutWidth ?? defaultLayoutWidth

 	async function handleSubmit(e: React.FormEvent<HTMLFormElement>) {
 		e.preventDefault()
--- a/internal/site/src/components/routes/system.tsx
+++ b/internal/site/src/components/routes/system.tsx
@@ -654,7 +654,7 @@ export default memo(function SystemDetail({ id }: { id: string }) {
 					)}

 					{/* Load Average chart */}
-					{chartData.agentVersion?.minor >= 12 && (
+					{chartData.agentVersion?.minor > 12 && (
 						<ChartCard
 							empty={dataEmpty}
 							grid={grid}
--- a/internal/site/src/components/routes/system/smart-table.tsx
+++ b/internal/site/src/components/routes/system/smart-table.tsx
@@ -3,13 +3,16 @@ import {
 	type ColumnDef,
 	type ColumnFiltersState,
 	type Column,
+	type Row,
 	type SortingState,
+	type Table as TableType,
 	flexRender,
 	getCoreRowModel,
 	getFilteredRowModel,
 	getSortedRowModel,
 	useReactTable,
 } from "@tanstack/react-table"
+import { useVirtualizer, type VirtualItem } from "@tanstack/react-virtual"
 import {
 	Activity,
 	Box,
@@ -40,6 +43,7 @@ import {
 	toFixedFloat,
 	formatTemperature,
 	cn,
+	getVisualStringWidth,
 	secondsToString,
 	hourWithSeconds,
 	formatShortDate,
@@ -57,7 +61,7 @@ import {
 	DropdownMenuSeparator,
 	DropdownMenuTrigger,
 } from "@/components/ui/dropdown-menu"
-import { useCallback, useMemo, useEffect, useState } from "react"
+import { memo, useCallback, useMemo, useEffect, useRef, useState } from "react"
 import { Tooltip, TooltipContent, TooltipTrigger } from "@/components/ui/tooltip"

 // Column definition for S.M.A.R.T. attributes table
@@ -101,7 +105,11 @@ function formatCapacity(bytes: number): string {

 const SMART_DEVICE_FIELDS = "id,system,name,model,state,capacity,temp,type,hours,cycles,updated"

-export const columns: ColumnDef<SmartDeviceRecord>[] = [
+export const createColumns = (
+	longestName: number,
+	longestModel: number,
+	longestDevice: number
+): ColumnDef<SmartDeviceRecord>[] => [
 	{
 		id: "system",
 		accessorFn: (record) => record.system,
@@ -114,7 +122,11 @@ export const columns: ColumnDef<SmartDeviceRecord>[] = [
 		header: ({ column }) => <HeaderButton column={column} name={t`System`} Icon={ServerIcon} />,
 		cell: ({ getValue }) => {
 			const allSystems = useStore($allSystemsById)
-			return <span className="ms-1.5 xl:w-30 block truncate">{allSystems[getValue() as string]?.name ?? ""}</span>
+			return (
+				<div className="ms-1.5 max-w-40 block truncate" style={{ width: `${longestName / 1.05}ch` }}>
+					{allSystems[getValue() as string]?.name ?? ""}
+				</div>
+			)
 		},
 	},
 	{
@@ -122,7 +134,11 @@ export const columns: ColumnDef<SmartDeviceRecord>[] = [
 		sortingFn: (a, b) => a.original.name.localeCompare(b.original.name),
 		header: ({ column }) => <HeaderButton column={column} name={t`Device`} Icon={HardDrive} />,
 		cell: ({ getValue }) => (
-			<div className="font-medium max-w-40 truncate ms-1.5" title={getValue() as string}>
+			<div
+				className="font-medium max-w-40 truncate ms-1"
+				title={getValue() as string}
+				style={{ width: `${longestDevice / 1.05}ch` }}
+			>
 				{getValue() as string}
 			</div>
 		),
@@ -132,7 +148,11 @@ export const columns: ColumnDef<SmartDeviceRecord>[] = [
 		sortingFn: (a, b) => a.original.model.localeCompare(b.original.model),
 		header: ({ column }) => <HeaderButton column={column} name={t`Model`} Icon={Box} />,
 		cell: ({ getValue }) => (
-			<div className="max-w-48 truncate ms-1.5" title={getValue() as string}>
+			<div
+				className="max-w-48 truncate ms-1"
+				title={getValue() as string}
+				style={{ width: `${longestModel / 1.05}ch` }}
+			>
 				{getValue() as string}
 			</div>
 		),
@@ -141,7 +161,7 @@ export const columns: ColumnDef<SmartDeviceRecord>[] = [
 		accessorKey: "capacity",
 		invertSorting: true,
 		header: ({ column }) => <HeaderButton column={column} name={t`Capacity`} Icon={BinaryIcon} />,
-		cell: ({ getValue }) => <span className="ms-1.5">{formatCapacity(getValue() as number)}</span>,
+		cell: ({ getValue }) => <span className="ms-1">{formatCapacity(getValue() as number)}</span>,
 	},
 	{
 		accessorKey: "state",
@@ -149,9 +169,9 @@ export const columns: ColumnDef<SmartDeviceRecord>[] = [
 		cell: ({ getValue }) => {
 			const status = getValue() as string
 			return (
-				<div className="ms-1.5">
-					<Badge variant={status === "PASSED" ? "success" : status === "FAILED" ? "danger" : "warning"}>{status}</Badge>
-				</div>
+				<Badge className="ms-1" variant={status === "PASSED" ? "success" : status === "FAILED" ? "danger" : "warning"}>
+					{status}
+				</Badge>
 			)
 		},
 	},
@@ -160,11 +180,9 @@ export const columns: ColumnDef<SmartDeviceRecord>[] = [
 		sortingFn: (a, b) => a.original.type.localeCompare(b.original.type),
 		header: ({ column }) => <HeaderButton column={column} name={t`Type`} Icon={ArrowLeftRightIcon} />,
 		cell: ({ getValue }) => (
-			<div className="ms-1.5">
-				<Badge variant="outline" className="uppercase">
-					{getValue() as string}
-				</Badge>
-			</div>
+			<Badge variant="outline" className="ms-1 uppercase">
+				{getValue() as string}
+			</Badge>
 		),
 	},
 	{
@@ -176,11 +194,11 @@ export const columns: ColumnDef<SmartDeviceRecord>[] = [
 		cell: ({ getValue }) => {
 			const hours = getValue() as number | undefined
 			if (hours == null) {
-				return <div className="text-sm text-muted-foreground ms-1.5">N/A</div>
+				return <div className="text-sm text-muted-foreground ms-1">N/A</div>
 			}
 			const seconds = hours * 3600
 			return (
-				<div className="text-sm ms-1.5">
+				<div className="text-sm ms-1">
 					<div>{secondsToString(seconds, "hour")}</div>
 					<div className="text-muted-foreground text-xs">{secondsToString(seconds, "day")}</div>
 				</div>
@@ -196,9 +214,9 @@ export const columns: ColumnDef<SmartDeviceRecord>[] = [
 		cell: ({ getValue }) => {
 			const cycles = getValue() as number | undefined
 			if (cycles == null) {
-				return <div className="text-muted-foreground ms-1.5">N/A</div>
+				return <div className="text-muted-foreground ms-1">N/A</div>
 			}
-			return <span className="ms-1.5">{cycles.toLocaleString()}</span>
+			return <span className="ms-1">{cycles.toLocaleString()}</span>
 		},
 	},
 	{
@@ -208,10 +226,10 @@ export const columns: ColumnDef<SmartDeviceRecord>[] = [
 		cell: ({ getValue }) => {
 			const temp = getValue() as number | null | undefined
 			if (!temp) {
-				return <div className="text-muted-foreground ms-1.5">N/A</div>
+				return <div className="text-muted-foreground ms-1">N/A</div>
 			}
 			const { value, unit } = formatTemperature(temp)
-			return <span className="ms-1.5">{`${value} ${unit}`}</span>
+			return <span className="ms-1">{`${value} ${unit}`}</span>
 		},
 	},
 	// {
@@ -236,7 +254,7 @@ export const columns: ColumnDef<SmartDeviceRecord>[] = [
 			// if today, use hourWithSeconds, otherwise use formatShortDate
 			const formatter =
 				new Date(timestamp).toDateString() === new Date().toDateString() ? hourWithSeconds : formatShortDate
-			return <span className="ms-1.5 tabular-nums">{formatter(timestamp)}</span>
+			return <span className="ms-1 tabular-nums">{formatter(timestamp)}</span>
 		},
 	},
 ]
@@ -275,6 +293,36 @@ export default function DisksTable({ systemId }: { systemId?: string }) {
 	const [sheetOpen, setSheetOpen] = useState(false)
 	const [rowActionState, setRowActionState] = useState<{ type: "refresh" | "delete"; id: string } | null>(null)
 	const [globalFilter, setGlobalFilter] = useState("")
+	const allSystems = useStore($allSystemsById)
+
+	// duplicate the devices to test with more rows
+	// if (
+	// 	smartDevices?.length &&
+	// 	smartDevices.length < 50 &&
+	// 	typeof window !== "undefined" &&
+	// 	window.location.hostname === "localhost"
+	// ) {
+	// 	setSmartDevices([...smartDevices, ...smartDevices, ...smartDevices])
+	// }
+
+	// Calculate the right width for the columns based on the longest strings among the displayed devices
+	const { longestName, longestModel, longestDevice } = useMemo(() => {
+		const result = { longestName: 0, longestModel: 0, longestDevice: 0 }
+		if (!smartDevices || Object.keys(allSystems).length === 0) {
+			return result
+		}
+		const seenSystems = new Set<string>()
+		for (const device of smartDevices) {
+			if (!systemId && !seenSystems.has(device.system)) {
+				seenSystems.add(device.system)
+				const name = allSystems[device.system]?.name ?? ""
+				result.longestName = Math.max(result.longestName, getVisualStringWidth(name))
+			}
+			result.longestModel = Math.max(result.longestModel, getVisualStringWidth(device.model ?? ""))
+			result.longestDevice = Math.max(result.longestDevice, getVisualStringWidth(device.name ?? ""))
+		}
+		return result
+	}, [smartDevices, systemId, allSystems])

 	const openSheet = (disk: SmartDeviceRecord) => {
 		setActiveDiskId(disk.id)
@@ -440,9 +488,10 @@ export default function DisksTable({ systemId }: { systemId?: string }) {

 	// Filter columns based on whether systemId is provided
 	const tableColumns = useMemo(() => {
+		const columns = createColumns(longestName, longestModel, longestDevice)
 		const baseColumns = systemId ? columns.filter((col) => col.id !== "system") : columns
 		return [...baseColumns, actionColumn]
-	}, [systemId, actionColumn])
+	}, [systemId, actionColumn, longestName, longestModel, longestDevice])

 	const table = useReactTable({
 		data: smartDevices || ([] as SmartDeviceRecord[]),
@@ -474,6 +523,7 @@ export default function DisksTable({ systemId }: { systemId?: string }) {
 				.every((term) => searchString.includes(term))
 		},
 	})
+	const rows = table.getRowModel().rows

 	// Hide the table on system pages if there's no data, but always show on global page
 	if (systemId && !smartDevices?.length && !columnFilters.length) {
@@ -513,57 +563,123 @@ export default function DisksTable({ systemId }: { systemId?: string }) {
 						</div>
 					</div>
 				</CardHeader>
-				<div className="rounded-md border text-nowrap">
-					<Table>
-						<TableHeader>
-							{table.getHeaderGroups().map((headerGroup) => (
-								<TableRow key={headerGroup.id}>
-									{headerGroup.headers.map((header) => {
-										return (
-											<TableHead key={header.id} className="px-2">
-												{header.isPlaceholder ? null : flexRender(header.column.columnDef.header, header.getContext())}
-											</TableHead>
-										)
-									})}
-								</TableRow>
-							))}
-						</TableHeader>
-						<TableBody>
-							{table.getRowModel().rows?.length ? (
-								table.getRowModel().rows.map((row) => (
-									<TableRow
-										key={row.id}
-										data-state={row.getIsSelected() && "selected"}
-										className="cursor-pointer"
-										onClick={() => openSheet(row.original)}
-									>
-										{row.getVisibleCells().map((cell) => (
-											<TableCell key={cell.id} className="md:ps-5">
-												{flexRender(cell.column.columnDef.cell, cell.getContext())}
-											</TableCell>
-										))}
-									</TableRow>
-								))
-							) : (
-								<TableRow>
-									<TableCell colSpan={tableColumns.length} className="h-24 text-center">
-										{smartDevices ? (
-											t`No results.`
-										) : (
-											<LoaderCircleIcon className="animate-spin size-10 opacity-60 mx-auto" />
-										)}
-									</TableCell>
-								</TableRow>
-							)}
-						</TableBody>
-					</Table>
-				</div>
+				<SmartDevicesTable
+					table={table}
+					rows={rows}
+					colLength={tableColumns.length}
+					data={smartDevices}
+					openSheet={openSheet}
+				/>
 			</Card>
 			<DiskSheet diskId={activeDiskId} open={sheetOpen} onOpenChange={setSheetOpen} />
 		</div>
 	)
 }

+const SmartDevicesTable = memo(function SmartDevicesTable({
+	table,
+	rows,
+	colLength,
+	data,
+	openSheet,
+}: {
+	table: TableType<SmartDeviceRecord>
+	rows: Row<SmartDeviceRecord>[]
+	colLength: number
+	data: SmartDeviceRecord[] | undefined
+	openSheet: (disk: SmartDeviceRecord) => void
+}) {
+	const scrollRef = useRef<HTMLDivElement>(null)
+
+	const virtualizer = useVirtualizer<HTMLDivElement, HTMLTableRowElement>({
+		count: rows.length,
+		estimateSize: () => 65,
+		getScrollElement: () => scrollRef.current,
+		overscan: 5,
+	})
+	const virtualRows = virtualizer.getVirtualItems()
+
+	const paddingTop = Math.max(0, virtualRows[0]?.start ?? 0 - virtualizer.options.scrollMargin)
+	const paddingBottom = Math.max(0, virtualizer.getTotalSize() - (virtualRows[virtualRows.length - 1]?.end ?? 0))
+
+	return (
+		<div
+			className={cn(
+				"h-min max-h-[calc(100dvh-17rem)] max-w-full relative overflow-auto rounded-md border",
+				(!rows.length || rows.length > 2) && "min-h-50"
+			)}
+			ref={scrollRef}
+		>
+			<div style={{ height: `${virtualizer.getTotalSize() + 48}px`, paddingTop, paddingBottom }}>
+				<table className="w-full text-sm text-nowrap">
+					<SmartTableHead table={table} />
+					<TableBody>
+						{rows.length ? (
+							virtualRows.map((virtualRow) => {
+								const row = rows[virtualRow.index]
+								return <SmartDeviceTableRow key={row.id} row={row} virtualRow={virtualRow} openSheet={openSheet} />
+							})
+						) : (
+							<TableRow>
+								<TableCell colSpan={colLength} className="h-24 text-center pointer-events-none">
+									{data ? t`No results.` : <LoaderCircleIcon className="animate-spin size-10 opacity-60 mx-auto" />}
+								</TableCell>
+							</TableRow>
+						)}
+					</TableBody>
+				</table>
+			</div>
+		</div>
+	)
+})
+
+function SmartTableHead({ table }: { table: TableType<SmartDeviceRecord> }) {
+	return (
+		<TableHeader className="sticky top-0 z-50 w-full border-b-2">
+			<div className="absolute -top-2 left-0 w-full h-4 bg-table-header z-50"></div>
+			{table.getHeaderGroups().map((headerGroup) => (
+				<TableRow key={headerGroup.id}>
+					{headerGroup.headers.map((header) => (
+						<TableHead key={header.id} className="px-2">
+							{header.isPlaceholder ? null : flexRender(header.column.columnDef.header, header.getContext())}
+						</TableHead>
+					))}
+				</TableRow>
+			))}
+		</TableHeader>
+	)
+}
+
+const SmartDeviceTableRow = memo(function SmartDeviceTableRow({
+	row,
+	virtualRow,
+	openSheet,
+}: {
+	row: Row<SmartDeviceRecord>
+	virtualRow: VirtualItem
+	openSheet: (disk: SmartDeviceRecord) => void
+}) {
+	return (
+		<TableRow
+			data-state={row.getIsSelected() && "selected"}
+			className="cursor-pointer"
+			onClick={() => openSheet(row.original)}
+		>
+			{row.getVisibleCells().map((cell) => (
+				<TableCell
+					key={cell.id}
+					className="md:ps-5 py-0"
+					style={{
+						height: virtualRow.size,
+					}}
+				>
+					{flexRender(cell.column.columnDef.cell, cell.getContext())}
+				</TableCell>
+			))}
+		</TableRow>
+	)
+})
+
 function DiskSheet({
 	diskId,
 	open,
@@ -621,8 +737,8 @@ function DiskSheet({
 	const deviceName = disk?.name || unknown
 	const model = disk?.model || unknown
 	const capacity = disk?.capacity ? formatCapacity(disk.capacity) : unknown
-	const serialNumber = disk?.serial || unknown
-	const firmwareVersion = disk?.firmware || unknown
+	const serialNumber = disk?.serial
+	const firmwareVersion = disk?.firmware
 	const status = disk?.state || unknown

 	return (
@@ -636,24 +752,32 @@ function DiskSheet({
 						{model}
 						<Separator orientation="vertical" className="h-2.5 bg-muted-foreground opacity-70" />
 						{capacity}
-						<Separator orientation="vertical" className="h-2.5 bg-muted-foreground opacity-70" />
-						<Tooltip>
-							<TooltipTrigger asChild>
-								<span>{serialNumber}</span>
-							</TooltipTrigger>
-							<TooltipContent>
-								<Trans>Serial Number</Trans>
-							</TooltipContent>
-						</Tooltip>
-						<Separator orientation="vertical" className="h-2.5 bg-muted-foreground opacity-70" />
-						<Tooltip>
-							<TooltipTrigger asChild>
-								<span>{firmwareVersion}</span>
-							</TooltipTrigger>
-							<TooltipContent>
-								<Trans>Firmware</Trans>
-							</TooltipContent>
-						</Tooltip>
+						{serialNumber && (
+							<>
+								<Separator orientation="vertical" className="h-2.5 bg-muted-foreground opacity-70" />
+								<Tooltip>
+									<TooltipTrigger asChild>
+										<span>{serialNumber}</span>
+									</TooltipTrigger>
+									<TooltipContent>
+										<Trans>Serial Number</Trans>
+									</TooltipContent>
+								</Tooltip>
+							</>
+						)}
+						{firmwareVersion && (
+							<>
+								<Separator orientation="vertical" className="h-2.5 bg-muted-foreground opacity-70" />
+								<Tooltip>
+									<TooltipTrigger asChild>
+										<span>{firmwareVersion}</span>
+									</TooltipTrigger>
+									<TooltipContent>
+										<Trans>Firmware</Trans>
+									</TooltipContent>
+								</Tooltip>
+							</>
+						)}
 					</SheetDescription>
 				</SheetHeader>
 				<div className="flex-1 overflow-hidden p-4 flex flex-col gap-4">
--- a/internal/site/src/components/systemd-table/systemd-table.tsx
+++ b/internal/site/src/components/systemd-table/systemd-table.tsx
@@ -46,7 +46,6 @@ export default function SystemdTable({ systemId }: { systemId?: string }) {
 		return setData([])
 	}, [systemId])

-
 	useEffect(() => {
 		const lastUpdated = data[0]?.updated ?? 0

@@ -360,15 +359,9 @@ function SystemdSheet({
 		return (
 			<>
 				{hasCurrent ? current : notAvailable}
-				{hasMax && (
-					<span className="text-muted-foreground ms-1.5">
-						{`(${t`limit`}: ${max})`}
-					</span>
-				)}
+				{hasMax && <span className="text-muted-foreground ms-1.5">{`(${t`limit`}: ${max})`}</span>}
 				{max === null && (
-					<span className="text-muted-foreground ms-1.5">
-						{`(${t`limit`}: ${t`Unlimited`.toLowerCase()})`}
-					</span>
+					<span className="text-muted-foreground ms-1.5">{`(${t`limit`}: ${t`Unlimited`.toLowerCase()})`}</span>
 				)}
 			</>
 		)
@@ -435,7 +428,7 @@ function SystemdSheet({
 			</tr>
 		)
 	}
-	
+
 	const capitalize = (str: string) => `${str.charAt(0).toUpperCase()}${str.slice(1).toLowerCase()}`

 	return (
@@ -621,6 +614,7 @@ function SystemdSheet({
 function SystemdTableHead({ table }: { table: TableType<SystemdRecord> }) {
 	return (
 		<TableHeader className="sticky top-0 z-50 w-full border-b-2">
+			<div className="absolute -top-2 left-0 w-full h-4 bg-table-header z-50"></div>
 			{table.getHeaderGroups().map((headerGroup) => (
 				<tr key={headerGroup.id}>
 					{headerGroup.headers.map((header) => {
--- a/internal/site/src/components/systems-table/systems-table-columns.tsx
+++ b/internal/site/src/components/systems-table/systems-table-columns.tsx
@@ -184,7 +184,8 @@ export function SystemsTableColumns(viewMode: "table" | "grid"): ColumnDef<Syste
 			accessorFn: ({ info }) => info.dp || undefined,
 			id: "disk",
 			name: () => t`Disk`,
-			cell: DiskCellWithMultiple,
+			cell: (info: CellContext<SystemRecord, unknown>) =>
+				info.row.original.info.efs ? DiskCellWithMultiple(info) : TableCellWithMeter(info),
 			Icon: HardDriveIcon,
 			header: sortableHeader,
 		},
@@ -198,32 +199,19 @@ export function SystemsTableColumns(viewMode: "table" | "grid"): ColumnDef<Syste
 		},
 		{
 			id: "loadAverage",
-			accessorFn: ({ info }) => {
-				const sum = info.la?.reduce((acc, curr) => acc + curr, 0)
-				// TODO: remove this in future release in favor of la array
-				if (!sum) {
-					return (info.l1 ?? 0) + (info.l5 ?? 0) + (info.l15 ?? 0) || undefined
-				}
-				return sum || undefined
-			},
+			accessorFn: ({ info }) => info.la?.reduce((acc, curr) => acc + curr, 0),
 			name: () => t({ message: "Load Avg", comment: "Short label for load average" }),
 			size: 0,
 			Icon: HourglassIcon,
 			header: sortableHeader,
 			cell(info: CellContext<SystemRecord, unknown>) {
 				const { info: sysInfo, status } = info.row.original
+				const { major, minor } = parseSemVer(sysInfo.v)
 				const { colorWarn = 65, colorCrit = 90 } = useStore($userSettings, { keys: ["colorWarn", "colorCrit"] })
-				// agent version
-				const { minor, patch } = parseSemVer(sysInfo.v)
-				let loadAverages = sysInfo.la
-
-				// use legacy load averages if agent version is less than 12.1.0
-				if (!loadAverages || (minor === 12 && patch < 1)) {
-					loadAverages = [sysInfo.l1 ?? 0, sysInfo.l5 ?? 0, sysInfo.l15 ?? 0]
-				}
+				const loadAverages = sysInfo.la || []

 				const max = Math.max(...loadAverages)
-				if (max === 0 && (status === SystemStatus.Paused || minor < 12)) {
+				if (max === 0 && (status === SystemStatus.Paused || (major < 1 && minor < 13))) {
 					return null
 				}

@@ -248,19 +236,20 @@ export function SystemsTableColumns(viewMode: "table" | "grid"): ColumnDef<Syste
 			},
 		},
 		{
-			accessorFn: ({ info }) => info.bb || (info.b || 0) * 1024 * 1024 || undefined,
+			accessorFn: ({ info, status }) => (status !== SystemStatus.Up ? undefined : info.bb),
 			id: "net",
 			name: () => t`Net`,
 			size: 0,
 			Icon: EthernetIcon,
 			header: sortableHeader,
+			sortUndefined: "last",
 			cell(info) {
-				const sys = info.row.original
-				const userSettings = useStore($userSettings, { keys: ["unitNet"] })
-				if (sys.status === SystemStatus.Paused) {
+				const val = info.getValue() as number | undefined
+				if (val === undefined) {
 					return null
 				}
-				const { value, unit } = formatBytes((info.getValue() || 0) as number, true, userSettings.unitNet, false)
+				const userSettings = useStore($userSettings, { keys: ["unitNet"] })
+				const { value, unit } = formatBytes(val, true, userSettings.unitNet, false)
 				return (
 					<span className="tabular-nums whitespace-nowrap">
 						{decimalString(value, value >= 100 ? 1 : 2)} {unit}
@@ -491,11 +480,6 @@ function DiskCellWithMultiple(info: CellContext<SystemRecord, unknown>) {
 	const { colorWarn = 65, colorCrit = 90 } = useStore($userSettings, { keys: ["colorWarn", "colorCrit"] })
 	const { info: sysInfo, status, id } = info.row.original
 	const extraFs = Object.entries(sysInfo.efs ?? {})
-
-	if (extraFs.length === 0) {
-		return TableCellWithMeter(info)
-	}
-
 	const rootDiskPct = sysInfo.dp

 	// sort extra disks by percentage descending
--- a/internal/site/src/components/systems-table/systems-table.tsx
+++ b/internal/site/src/components/systems-table/systems-table.tsx
@@ -391,6 +391,7 @@ function SystemsTableHead({ table }: { table: TableType<SystemRecord> }) {
 	const { t } = useLingui()
 	return (
 		<TableHeader className="sticky top-0 z-50 w-full border-b-2">
+			<div className="absolute -top-2 left-0 w-full h-4 bg-table-header z-50"></div>
 			{table.getHeaderGroups().map((headerGroup) => (
 				<tr key={headerGroup.id}>
 					{headerGroup.headers.map((header) => {
--- a/internal/site/src/components/ui/icons.tsx
+++ b/internal/site/src/components/ui/icons.tsx
@@ -185,3 +185,14 @@ export function PlugChargingIcon(props: SVGProps<SVGSVGElement>) {
 		</svg>
 	)
 }
+
+// Lucide Icons (ISC) - used for ports
+export function SquareArrowRightEnterIcon(props: SVGProps<SVGSVGElement>) {
+	return (
+		<svg viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth="2" strokeLinecap="round" {...props}>
+			<path d="m10 16 4-4-4-4" />
+			<path d="M3 12h11" />
+			<path d="M3 8V5a2 2 0 0 1 2-2h14a2 2 0 0 1 2 2v14a2 2 0 0 1-2 2H5a2 2 0 0 1-2-2v-3" />
+		</svg>
+	)
+}
--- a/internal/site/src/lib/stores.ts
+++ b/internal/site/src/lib/stores.ts
@@ -3,6 +3,9 @@ import type { AlertMap, ChartTimes, SystemRecord, UserSettings } from "@/types"
 import { pb } from "./api"
 import { Unit } from "./enums"

+/** Default layout width. Used as fallback when user setting is unset. */
+export const defaultLayoutWidth = 1580
+
 /** Store if user is authenticated */
 export const $authenticated = atom(pb.authStore.isValid)

--- a/internal/site/src/main.tsx
+++ b/internal/site/src/main.tsx
@@ -14,7 +14,14 @@ import { Toaster } from "@/components/ui/toaster.tsx"
 import { alertManager } from "@/lib/alerts"
 import { pb, updateUserSettings } from "@/lib/api.ts"
 import { dynamicActivate, getLocale } from "@/lib/i18n"
-import { $authenticated, $copyContent, $direction, $publicKey, $userSettings } from "@/lib/stores.ts"
+import {
+	$authenticated,
+	$copyContent,
+	$direction,
+	$publicKey,
+	$userSettings,
+	defaultLayoutWidth,
+} from "@/lib/stores.ts"
 import * as systemsManager from "@/lib/systemsManager.ts"

 const LoginPage = lazy(() => import("@/components/login/login.tsx"))
@@ -100,7 +107,7 @@ const Layout = () => {
 					<LoginPage />
 				</Suspense>
 			) : (
-				<div style={{ "--container": `${userSettings.layoutWidth ?? 1580}px` } as React.CSSProperties}>
+				<div style={{ "--container": `${userSettings.layoutWidth ?? defaultLayoutWidth}px` } as React.CSSProperties}>
 					<div className="container">
 						<Navbar />
 					</div>
--- a/internal/site/src/types.d.ts
+++ b/internal/site/src/types.d.ts
@@ -45,12 +45,6 @@ export interface SystemInfo {
 	c: number
 	/** cpu model */
 	m: string
-	/** load average 1 minute */
-	l1?: number
-	/** load average 5 minutes */
-	l5?: number
-	/** load average 15 minutes */
-	l15?: number
 	/** load average */
 	la?: [number, number, number]
 	/** operating system */
@@ -94,13 +88,6 @@ export interface SystemStats {
 	cpub?: number[]
 	/** per-core cpu usage [CPU0..] (0-100 integers) */
 	cpus?: number[]
-	// TODO: remove these in future release in favor of la
-	/** load average 1 minute */
-	l1?: number
-	/** load average 5 minutes */
-	l5?: number
-	/** load average 15 minutes */
-	l15?: number
 	/** load average */
 	la?: [number, number, number]
 	/** total memory (gb) */
@@ -267,6 +254,7 @@ export interface ContainerRecord extends RecordModel {
 	system: string
 	name: string
 	image: string
+	ports: string
 	cpu: number
 	memory: number
 	net: number
--- a/internal/tests/hub.go
+++ b/internal/tests/hub.go
@@ -77,6 +77,16 @@ func CreateUser(app core.App, email string, password string) (*core.Record, erro
 	return user, app.Save(user)
 }

+func CreateUserWithRole(app core.App, email string, password string, roleName string) (*core.Record, error) {
+	user, err := CreateUser(app, email, password)
+	if err != nil {
+		return nil, err
+	}
+
+	user.Set("role", roleName)
+	return user, app.Save(user)
+}
+
 // Helper function to create a test record
 func CreateRecord(app core.App, collectionName string, fields map[string]any) (*core.Record, error) {
 	collection, err := app.FindCachedCollectionByNameOrId(collectionName)
@@ -98,7 +108,7 @@ func ClearCollection(t testing.TB, app core.App, collectionName string) error {
 }

 func (h *TestHub) Cleanup() {
-	h.GetAlertManager().StopWorker()
+	h.GetAlertManager().Stop()
 	h.GetSystemManager().RemoveAllSystems()
 	h.TestApp.Cleanup()
 }
--- a/readme.md
+++ b/readme.md
@@ -51,7 +51,7 @@ The [quick start guide](https://beszel.dev/guide/getting-started) and other docu
 - **GPU usage / power draw** - Nvidia, AMD, and Intel.
 - **Battery** - Host system battery charge.
 - **Containers** - Status and metrics of all running Docker / Podman containers.
- **S.M.A.R.T.** - Host system disk health (includes eMMC wear/EOL via Linux sysfs when available).
+- **S.M.A.R.T.** - Host system disk health (includes eMMC wear/EOL and Linux mdraid array health via sysfs when available).

 ## Help and discussion
Author	SHA1	Message	Date
henrygd	ff36138229	fix(hub): add onAfterBootstrapAndMigrations to properly queue fns after migrations also remove error return from NewHub and improve comments in hub.go	2026-03-20 19:32:59 -04:00
henrygd	be70840609	test: update tests that use os.Setenv to t.Setenv	2026-03-20 15:00:28 -04:00
henrygd	565162ef5f	refactor(hub): harden/enforce pb api rules and add tests - separate collection related code from hub.go - ensure hub is bootstrapped and collections updated automatically when calling NewHub	2026-03-20 14:39:05 -04:00
henrygd	adbfe7cfb7	chore: upgrade action and go versions in vulncheck workflow	2026-03-19 11:36:10 -04:00
henrygd	1ff7762c80	test(hub): add status alert tests covering multiple users	2026-03-18 17:44:34 -04:00
henrygd	0ab8a606e0	fix(ui): hooks bug in all systems table disk cell	2026-03-18 17:17:58 -04:00
henrygd	e4e0affbc1	test(hub): add additional tests for all system alerts	2026-03-17 18:48:54 -04:00
henrygd	c3a0e645ee	refactor: variable renaming in alerts package	2026-03-17 18:44:46 -04:00
henrygd	c6c3950fb0	refactor: add alertsCache to maintain active alert data in memory	2026-03-17 18:32:57 -04:00
henrygd	48ddc96a0d	systemd: allow timer monitoring with SERVICE_PATTERNS (#1820 )	2026-03-17 15:11:44 -04:00
henrygd	704cb86de8	refactor: change ExpiryMap.store to be a pointer	2026-03-16 17:44:45 -04:00
henrygd	2854ce882f	fix(ui): centralize default layout width and update default setting	2026-03-16 15:23:32 -04:00
henrygd	ed50367f70	fix(agent): add fallback for podman container health (#1475 )	2026-03-15 17:59:59 -04:00
henrygd	4ebe869591	ui: virtualize smart table	2026-03-15 15:20:07 -04:00
henrygd	c9bbbe91f2	ui: improve table col widths and hide text showing above header	2026-03-15 14:59:25 -04:00
henrygd	5bfe4f6970	agent: include ip in container port if not 0.0.0.0 or ::	2026-03-15 14:58:21 -04:00
henrygd	380d2b1091	add ports column to containers table (#1481 )	2026-03-14 19:29:39 -04:00
henrygd	a7f99e7a8c	agent: support new Docker API `Health` field (#1475 )	2026-03-14 15:26:44 -04:00
henrygd	bd94a9d142	agent: improve disk discovery / IO mapping and add tests (#1811 )	2026-03-13 16:03:27 -04:00
henrygd	8e2316f845	refactor: simplify/improve status alert handling (#1519 ) also adds new functionality to restore any pending down alerts that were lost by hub restart before creation	2026-03-12 15:53:40 -04:00
Sven van Ginkel	0d3dfcb207	fix(hub): check if status alert is triggered before sending up alert (#1806 )	2026-03-12 13:38:42 -04:00
henrygd	b386ce5190	hub: add ExpiryMap.UpdateExpiration and sync SMART fetch intervals (#1800 ) - Update smartFetchMap expiration when agent smart interval changes - Prevent background SMART fetching before initial system details are loaded - Add buffer to SMART fetch timing check - Get rid of unnecessary pointers in expirymap	2026-03-11 16:25:52 -04:00
henrygd	e527534016	ensure deprecated system fields are migrated to newer structures also removes refs to legacy load avg fields (l1, l5, l15) that were around for a very short period	2026-03-10 18:46:57 -04:00
Victor Eduardo	ec7ad632a9	fix: Use historical records to average disk usage for extra disk alerts (#1801 ) - Introduced a new test file `alerts_disk_test.go` to validate the behavior of disk alerts using historical data for extra filesystems. - Enhanced the `HandleSystemAlerts` function to correctly calculate disk usage for extra filesystems based on historical records. - Updated the `SystemAlertStats` struct to include `ExtraFs` for tracking additional filesystem statistics.	2026-03-09 18:32:35 -04:00
VACInc	963fce5a33	agent: mark mdraid rebuild as warning, not failed (#1797 )	2026-03-09 17:54:53 -04:00
Sven van Ginkel	d38c0da06d	fix: bypass NIC auto-filter when interface is explicitly whitelisted via NICS (#1805 ) Co-authored-by: henrygd <hank@henrygd.me>	2026-03-09 17:47:59 -04:00
henrygd	cae6ac4626	update go version to 1.26.1	2026-03-09 16:10:38 -04:00
henrygd	6b1ff264f2	gpu(amd): add workaround for misreported sysfs filesize (#1799 )	2026-03-09 14:53:52 -04:00
henrygd	35d0e792ad	refactor(expirymap): optimize performance and add StopCleaner method	2026-03-08 19:09:41 -04:00
henrygd	654cd06b19	respect SMART_INTERVAL across agent reconnects (#1800 ) Move tracking of the last SMART data fetch from individual System instances to the SystemManager using a TTL-based ExpiryMap. This ensures that the SMART_INTERVAL is respected even if an agent connection is dropped and re-established, preventing redundant data collection on every reconnect.	2026-03-08 19:03:50 -04:00
henrygd	5e1b028130	refactor(smart): improve perf by skipping ata_device_statistics parsing if unnecessary	2026-03-08 15:19:50 -04:00
henrygd	638e7dc12a	fix(smart): handle negative ATA device statistics values (#1791 )	2026-03-08 13:34:16 -04:00
henrygd	73c262455d	refactor(agent): move GetEnv to utils package	2026-03-07 14:12:17 -05:00
henrygd	0c4d2edd45	refactor(agent): add utils package; rm utils.go and fs_utils.go	2026-03-07 13:50:49 -05:00
henrygd	8f23fff1c9	refactor: mdraid comments and organization also hide serial / firmware in smart details if empty, remove a few unnecessary ops, and add a few more passed state values	2026-02-27 14:23:10 -05:00
VACInc	02c1a0c13d	Add Linux mdraid health monitoring (#1750 )	2026-02-27 13:42:47 -05:00