fix light flashes when refresh in dark mode (#1832 )

fix: correct DST offset handling in daily quiet hours (#1827 )
fix(hub): add onAfterBootstrapAndMigrations to properly queue fns after migrations
2026-03-22 21:46:18 +01:00 · 2026-03-22 13:35:43 -04:00 · 2026-03-22 12:50:36 -04:00 · 2026-03-20 19:32:59 -04:00 · 2026-03-20 15:00:28 -04:00 · 2026-03-20 14:39:05 -04:00
233 changed files with 28669 additions and 9236 deletions
--- a/.github/CODEOWNERS
+++ b/.github/CODEOWNERS
@@ -0,0 +1,2 @@
+# Everything needs to be reviewed by Hank
+*   @henrygd
--- a/.github/DISCUSSION_TEMPLATE/ideas.yml
+++ b/.github/DISCUSSION_TEMPLATE/ideas.yml
@@ -0,0 +1,19 @@
+body:
+  - type: dropdown
+    id: component
+    attributes:
+      label: Component
+      description: Which part of Beszel is this about?
+      options:
+        - Hub
+        - Agent
+        - Hub & Agent
+      default: 0
+    validations:
+      required: true
+  - type: textarea
+    attributes:
+      label: Description
+      description: Please describe in detail what you want to share.
+    validations:
+      required: true
--- a/.github/DISCUSSION_TEMPLATE/support.yml
+++ b/.github/DISCUSSION_TEMPLATE/support.yml
@@ -1,19 +1,54 @@
 body:
-  - type: markdown
+  - type: checkboxes
+    id: terms
    attributes:
-      value: |
-        ### Before opening a discussion:
+      label: Welcome!
+      description: |
+        Thank you for reaching out to the Beszel community for support! To help us assist you better, please make sure to review the following points before submitting your request:

-        - Check the [common issues guide](https://beszel.dev/guide/common-issues).
-        - Search existing [issues](https://github.com/henrygd/beszel/issues) and [discussions](https://github.com/henrygd/beszel/discussions) (including closed).
+        Please note:
+        - For translation-related issues or requests, please use the [Crowdin project](https://crowdin.com/project/beszel).
+        **- Please do not submit support reqeusts that are specific to ZFS. We plan to add integration with ZFS utilities in the near future.**
+
+      options:
+      - label: I have read the [Documentation](https://beszel.dev/guide/getting-started)
+        required: true
+      - label: I have checked the [Common Issues Guide](https://beszel.dev/guide/common-issues) and my problem was not mentioned there.
+        required: true
+      - label: I have searched open and closed issues and discussions and my problem was not mentioned before.
+        required: true
+      - label: I have verified I am using the latest version available. You can check the latest release [here](https://github.com/henrygd/beszel/releases).
+        required: true
+
+  - type: dropdown
+    id: component
+    attributes:
+      label: Component
+      description: Which part of Beszel is this about?
+      options:
+        - Hub
+        - Agent
+        - Hub & Agent
+    default: 0
+    validations:
+      required: true

  - type: textarea
    id: description
    attributes:
-      label: Description
-      description: A clear and concise description of the issue or question. If applicable, add screenshots to help explain your problem.
+      label: Problem Description
+      description: |
+        How to write a good bug report?
+
+        - Respect the issue template as much as possible.
+        - The title should be short and descriptive.
+        - Explain the conditions which led you to report this issue: the context.
+        - The context should lead to something, a problem that you’re facing.
+        - Remain clear and concise.
+        - Format your messages to help the reader focus on what matters and understand the structure of your message, use [Markdown syntax](https://help.github.com/articles/github-flavored-markdown)
    validations:
      required: true
+
  - type: input
    id: system
    attributes:
@@ -21,13 +56,15 @@ body:
      placeholder: linux/amd64 (agent), freebsd/arm64 (hub)
    validations:
      required: true
-  - type: input
-    id: version
-    attributes:
-      label: Beszel version
-      placeholder: 0.9.1
-    validations:
-      required: true
+  
+#  - type: input
+#    id: version
+#    attributes:
+#      label: Beszel version
+#      placeholder: 0.9.1
+#    validations:
+#      required: true
+
  - type: dropdown
    id: install-method
    attributes:
@@ -41,18 +78,21 @@ body:
        - Other (please describe above)
    validations:
      required: true
+  
  - type: textarea
    id: config
    attributes:
      label: Configuration
      description: Please provide any relevant service configuration
      render: yaml
+
  - type: textarea
    id: hub-logs
    attributes:
      label: Hub Logs
      description: Check the logs page in PocketBase (`/_/#/logs`) for relevant errors (copy JSON).
      render: json
+
  - type: textarea
    id: agent-logs
    attributes:
--- a/.github/ISSUE_TEMPLATE/bug_report.yml
+++ b/.github/ISSUE_TEMPLATE/bug_report.yml
@@ -1,8 +1,30 @@
 name: 🐛 Bug report
-description: Report a new bug or issue.
+description: Use this template to report a bug or issue.
 title: '[Bug]: '
-labels: ['bug', "needs confirmation"]
+labels: ['bug']
 body:
+  - type: checkboxes
+    attributes:
+      label: Welcome!
+      description: |
+        The issue tracker is for reporting bugs and feature requests only. For end-user related support questions, please use the **[GitHub Discussions](https://github.com/henrygd/beszel/discussions/new?category=support)** instead
+
+        Please note:
+        - For translation-related issues or requests, please use the [Crowdin project](https://crowdin.com/project/beszel).
+        - To request a change or feature, use the [feature request form](https://github.com/henrygd/beszel/issues/new?template=feature_request.yml).
+        - Any issues that can be resolved by consulting the documentation or by reviewing existing open or closed issues will be closed.
+        **- Please do not submit bugs that are specific to ZFS. We plan to add integration with ZFS utilities in the near future.**
+
+      options:
+      - label: I have read the [Documentation](https://beszel.dev/guide/getting-started)
+        required: true
+      - label: I have checked the [Common Issues Guide](https://beszel.dev/guide/common-issues) and my problem was not mentioned there.
+        required: true
+      - label: I have searched open and closed issues and my problem was not mentioned before.
+        required: true
+      - label: I have verified I am using the latest version available. You can check the latest release [here](https://github.com/henrygd/beszel/releases).
+        required: true
+
  - type: dropdown
    id: component
    attributes:
@@ -12,81 +34,53 @@ body:
        - Hub
        - Agent
        - Hub & Agent
+      default: 0
    validations:
      required: true
-  - type: markdown
-    attributes:
-      value: |
-        ### Thanks for taking the time to fill out this bug report!

-        - For more general support, please [start a support thread](https://github.com/henrygd/beszel/discussions/new?category=support).
-        - To request a change or feature, use the [feature request form](https://github.com/henrygd/beszel/issues/new?template=feature_request.yml).
-        - Please do not submit bugs that are specific to ZFS. We plan to add integration with ZFS utilities in the near future.
-
-        ### Before submitting a bug report:
-
-        - Check the [common issues guide](https://beszel.dev/guide/common-issues).
-        - Search existing [issues](https://github.com/henrygd/beszel/issues) and [discussions](https://github.com/henrygd/beszel/discussions) (including closed).
  - type: textarea
    id: description
    attributes:
-      label: Description
-      description: Explain the issue you experienced clearly and concisely.
-      placeholder: I went to the coffee pot and it was empty.
+      label: Problem Description
+      description: |
+        How to write a good bug report?
+
+        - Respect the issue template as much as possible.
+        - The title should be short and descriptive.
+        - Explain the conditions which led you to report this issue: the context.
+        - The context should lead to something, a problem that you’re facing.
+        - Remain clear and concise.
+        - Format your messages to help the reader focus on what matters and understand the structure of your message, use [Markdown syntax](https://help.github.com/articles/github-flavored-markdown)
    validations:
      required: true
+
  - type: textarea
    id: expected-behavior
    attributes:
      label: Expected Behavior
-      description: In a perfect world, what should have happened?
+      description: |
+        In a perfect world, what should have happened?
+        **Important:** Be specific. Vague descriptions like "it should work" are not helpful.
      placeholder: When I got to the coffee pot, it should have been full.
    validations:
      required: true
+
  - type: textarea
    id: steps-to-reproduce
    attributes:
      label: Steps to Reproduce
-      description: Describe how to reproduce the issue in repeatable steps.
+      description: |
+        Provide detailed, numbered steps that someone else can follow to reproduce the issue.
+        **Important:** Vague descriptions like "it doesn't work" or "it's broken" will result in the issue being closed.
+        Include specific actions, URLs, button clicks, and any relevant data or configuration.
      placeholder: |
        1. Go to the coffee pot.
        2. Make more coffee.
        3. Pour it into a cup.
+        4. Observe that the cup is empty instead of full.
    validations:
      required: true
-  - type: dropdown
-    id: category
-    attributes:
-      label: Category
-      description: Which category does this relate to most?
-      options:
-        - Metrics
-        - Charts & Visualization
-        - Settings & Configuration
-        - Notifications & Alerts
-        - Authentication
-        - Installation
-        - Performance
-        - UI / UX
-        - Other
-    validations:
-      required: true
-  - type: dropdown
-    id: metrics
-    attributes:
-      label: Affected Metrics
-      description: If applicable, which specific metric does this relate to most?
-      options:
-        - CPU
-        - Memory
-        - Storage
-        - Network
-        - Containers
-        - GPU
-        - Sensors
-        - Other
-    validations:
-      required: true
+
  - type: input
    id: system
    attributes:
@@ -94,6 +88,7 @@ body:
      placeholder: linux/amd64 (agent), freebsd/arm64 (hub)
    validations:
      required: true
+  
  - type: input
    id: version
    attributes:
@@ -101,6 +96,7 @@ body:
      placeholder: 0.9.1
    validations:
      required: true
+  
  - type: dropdown
    id: install-method
    attributes:
@@ -114,18 +110,21 @@ body:
        - Other (please describe above)
    validations:
      required: true
+  
  - type: textarea
    id: config
    attributes:
      label: Configuration
      description: Please provide any relevant service configuration
      render: yaml
+  
  - type: textarea
    id: hub-logs
    attributes:
      label: Hub Logs
      description: Check the logs page in PocketBase (`/_/#/logs`) for relevant errors (copy JSON).
      render: json
+  
  - type: textarea
    id: agent-logs
    attributes:
--- a/.github/ISSUE_TEMPLATE/config.yml
+++ b/.github/ISSUE_TEMPLATE/config.yml
@@ -1,5 +1,8 @@
 blank_issues_enabled: false
 contact_links:
+  - name: 🗣️ Translations
+    url: https://crowdin.com/project/beszel
+    about: Please report translation issues and request new translations here. 
  - name: 💬 Support and questions
    url: https://github.com/henrygd/beszel/discussions
    about: Ask and answer questions here.
--- a/.github/ISSUE_TEMPLATE/feature_request.yml
+++ b/.github/ISSUE_TEMPLATE/feature_request.yml
@@ -1,8 +1,25 @@
 name: 🚀 Feature request
 description: Request a new feature or change.
 title: "[Feature]: "
-labels: ["enhancement", "needs review"]
+labels: ["enhancement"]
 body:
+  - type: checkboxes
+    attributes:
+      label: Welcome!
+      description: |
+        The issue tracker is for reporting bugs and feature requests only. For end-user related support questions, please use the **[GitHub Discussions](https://github.com/henrygd/beszel/discussions)** instead
+
+        Please note:
+        - For **Bug reports**, use the [Bug Form](https://github.com/henrygd/beszel/issues/new?template=bug_report.yml).
+        - Any requests for new translations should be requested within the [crowdin project](https://crowdin.com/project/beszel).
+        - Create one issue per feature request. This helps us keep track of requests and prioritize them accordingly.
+
+      options:
+      - label: I have searched open and closed feature requests to make sure this or similar feature request does not already exist.
+        required: true
+      - label: This is a feature request, not a bug report or support question.
+        required: true
+
  - type: dropdown
    id: component
    attributes:
@@ -12,65 +29,29 @@ body:
        - Hub
        - Agent
        - Hub & Agent
+      default: 0
    validations:
      required: true
-  - type: markdown
-    attributes:
-      value: Before submitting, please search existing [issues](https://github.com/henrygd/beszel/issues) and [discussions](https://github.com/henrygd/beszel/discussions) (including closed).
+
  - type: textarea
+    id: description
    attributes:
-      label: Describe the feature you would like to see
+      label: Description
+      description: |
+        Describe the solution or feature you'd like. Explain what problem this solves or what value it adds.
+        **Important:** Be specific and detailed. Vague requests like "make it better" will be closed.
+      placeholder: |
+        Example:
+        - What is the feature?
+        - What problem does it solve?
+        - How should it work?
    validations:
      required: true
+
  - type: textarea
    id: motivation
    attributes:
      label: Motivation / Use Case
      description: Why do you want this feature? What problem does it solve?
-    validations:
-      required: true
-  - type: textarea
-    attributes:
-      label: Describe how you would like to see this feature implemented
-    validations:
-      required: true
-  - type: textarea
-    id: logs
-    attributes:
-      label: Screenshots
-      description: Please attach any relevant screenshots, such as images from your current solution or similar implementations.
-    validations:
-      required: false
-  - type: dropdown
-    id: category
-    attributes:
-      label: Category
-      description: Which category does this relate to most?
-      options:
-        - Metrics
-        - Charts & Visualization
-        - Settings & Configuration
-        - Notifications & Alerts
-        - Authentication
-        - Installation
-        - Performance
-        - UI / UX
-        - Other
-    validations:
-      required: true
-  - type: dropdown
-    id: metrics
-    attributes:
-      label: Affected Metrics
-      description: If applicable, which specific metric does this relate to most?
-      options:
-        - CPU
-        - Memory
-        - Storage
-        - Network
-        - Containers
-        - GPU
-        - Sensors
-        - Other
    validations:
      required: true
--- a/.github/workflows/inactivity-actions.yml
+++ b/.github/workflows/inactivity-actions.yml
@@ -6,6 +6,7 @@ on:
  workflow_dispatch:

 permissions:
+  actions: write
  issues: write
  pull-requests: write

@@ -48,11 +49,16 @@ jobs:
          # Action can not skip PRs, set it to 100 years to cover it.
          days-before-pr-stale: 36524

+          # Max issues to process before early exit. Next run resumes from cache. GH API limit: 5000.
+          operations-per-run: 1500
+
          # Labels
          stale-issue-label: 'stale'
          remove-stale-when-updated: true
-          only-issue-labels: 'awaiting-requester'
+          any-of-labels: 'awaiting-requester'
+          exempt-issue-labels: 'enhancement' 

          # Exemptions
          exempt-assignees: true
-          exempt-milestones: true
+
+          exempt-milestones: true
--- a/.github/workflows/label-from-dropdown.yml
+++ b/.github/workflows/label-from-dropdown.yml
@@ -1,82 +0,0 @@
-name: Label issues from dropdowns
-
-on:
-  issues:
-    types: [opened]
-
-jobs:
-  label_from_dropdown:
-    runs-on: ubuntu-latest
-    permissions:
-      issues: write
-    steps:
-      - name: Apply labels based on dropdown choices
-        uses: actions/github-script@v7
-        with:
-          script: |
-
-            const issueNumber = context.issue.number;
-            const owner = context.repo.owner;
-            const repo = context.repo.repo;
-
-            // Get the issue body
-            const body = context.payload.issue.body;
-
-            // Helper to find dropdown value in the body (assuming markdown format)
-            function extractSectionValue(heading) {
-              const regex = new RegExp(`### ${heading}\\s+([\\s\\S]*?)(?:\\n###|$)`, 'i');
-              const match = body.match(regex);
-              if (match) {
-                // Get the first non-empty line after the heading
-                const lines = match[1].split('\n').map(l => l.trim()).filter(Boolean);
-                return lines[0] || null;
-              }
-              return null;
-            }
-
-            // Extract dropdown selections
-            const category = extractSectionValue('Category');
-            const metrics = extractSectionValue('Affected Metrics');
-            const component = extractSectionValue('Component');
-
-            // Build labels to add
-            let labelsToAdd = [];
-            if (category) labelsToAdd.push(category);
-            if (metrics) labelsToAdd.push(metrics);
-            if (component) labelsToAdd.push(component);
-
-            // Get existing labels in the repo
-            const { data: existingLabels } = await github.rest.issues.listLabelsForRepo({
-              owner,
-              repo,
-              per_page: 100
-            });
-            const existingLabelNames = existingLabels.map(l => l.name);
-
-            // Find labels that need to be created
-            const labelsToCreate = labelsToAdd.filter(label => !existingLabelNames.includes(label));
-
-            // Create missing labels (with a default color)
-            for (const label of labelsToCreate) {
-              try {
-                await github.rest.issues.createLabel({
-                  owner,
-                  repo,
-                  name: label,
-                  color: 'ededed' // light gray, you can pick any hex color
-                });
-              } catch (e) {
-                // Ignore if label already exists (race condition), otherwise rethrow
-                if (!e || e.status !== 422) throw e;
-              }
-            }
-
-            // Now apply all labels (they all exist now)
-            if (labelsToAdd.length > 0) {
-              await github.rest.issues.addLabels({
-                owner,
-                repo,
-                issue_number: issueNumber,
-                labels: labelsToAdd
-              });
-            } 
--- a/.github/workflows/vulncheck.yml
+++ b/.github/workflows/vulncheck.yml
@@ -19,11 +19,11 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - name: Check out code into the Go module directory
-        uses: actions/checkout@v4
+        uses: actions/checkout@v6
      - name: Set up Go
-        uses: actions/setup-go@v5
+        uses: actions/setup-go@v6
        with:
-          go-version: 1.25.x
+          go-version: 1.26.x
          # cached: false
      - name: Get official govulncheck
        run: go install golang.org/x/vuln/cmd/govulncheck@latest
--- a/.gitignore
+++ b/.gitignore
@@ -10,6 +10,7 @@ dist
 *.exe
 internal/cmd/hub/hub
 internal/cmd/agent/agent
+agent.test
 node_modules
 build
 *timestamp*
--- a/.goreleaser.yml
+++ b/.goreleaser.yml
@@ -16,10 +16,21 @@ builds:
    goos:
      - linux
      - darwin
+      - windows
+      - freebsd
    goarch:
      - amd64
      - arm64
      - arm
+    ignore:
+      - goos: windows
+        goarch: arm64
+      - goos: windows
+        goarch: arm
+      - goos: freebsd
+        goarch: arm64
+      - goos: freebsd
+        goarch: arm

  - id: beszel-agent
    binary: beszel-agent
@@ -65,6 +76,18 @@ builds:
      - goos: windows
        goarch: riscv64

+  - id: beszel-agent-linux-amd64-glibc
+    binary: beszel-agent
+    main: internal/cmd/agent/agent.go
+    env:
+      - CGO_ENABLED=0
+    flags:
+      - -tags=glibc
+    goos:
+      - linux
+    goarch:
+      - amd64
+
 archives:
  - id: beszel-agent
    formats: [tar.gz]
@@ -78,6 +101,15 @@ archives:
      - goos: windows
        formats: [zip]

+  - id: beszel-agent-linux-amd64-glibc
+    formats: [tar.gz]
+    ids:
+      - beszel-agent-linux-amd64-glibc
+    name_template: >-
+      {{ .Binary }}_
+      {{- .Os }}_
+      {{- .Arch }}_glibc
+
  - id: beszel
    formats: [tar.gz]
    ids:
@@ -86,6 +118,9 @@ archives:
      {{ .Binary }}_
      {{- .Os }}_
      {{- .Arch }}
+    format_overrides:
+      - goos: windows
+        formats: [zip]

 nfpms:
  - id: beszel-agent
@@ -123,9 +158,7 @@ nfpms:
        - debconf
      scripts:
        templates: ./supplemental/debian/templates
-        # Currently broken due to a bug in goreleaser
-        # https://github.com/goreleaser/goreleaser/issues/5487
-        #config: ./supplemental/debian/config.sh
+        config: ./supplemental/debian/config.sh

 scoops:
  - ids: [beszel-agent]
--- a/41
+++ b/41
@@ -3,6 +3,40 @@ OS ?= $(shell go env GOOS)
 ARCH ?= $(shell go env GOARCH)
 # Skip building the web UI if true
 SKIP_WEB ?= false
+# Controls NVML/glibc agent build tag behavior:
+# - auto (default): enable on linux/amd64 glibc hosts
+# - true: always enable
+# - false: always disable
+NVML ?= auto
+
+# Detect glibc host for local linux/amd64 builds.
+HOST_GLIBC := $(shell \
+	if [ "$(OS)" = "linux" ] && [ "$(ARCH)" = "amd64" ]; then \
+		for p in /lib64/ld-linux-x86-64.so.2 /lib/x86_64-linux-gnu/ld-linux-x86-64.so.2 /lib/ld-linux-x86-64.so.2; do \
+			[ -e "$$p" ] && { echo true; exit 0; }; \
+		done; \
+		if command -v ldd >/dev/null 2>&1; then \
+			if ldd --version 2>&1 | tr '[:upper:]' '[:lower:]' | awk '/gnu libc|glibc/{found=1} END{exit !found}'; then \
+				echo true; \
+			else \
+				echo false; \
+			fi; \
+		else \
+			echo false; \
+		fi; \
+	else \
+		echo false; \
+	fi)
+
+# Enable glibc build tag for NVML on supported Linux builds.
+AGENT_GO_TAGS :=
+ifeq ($(NVML),true)
+AGENT_GO_TAGS := -tags glibc
+else ifeq ($(NVML),auto)
+ifeq ($(HOST_GLIBC),true)
+AGENT_GO_TAGS := -tags glibc
+endif
+endif

 # Set executable extension based on target OS
 EXE_EXT := $(if $(filter windows,$(OS)),.exe,)
@@ -17,7 +51,6 @@ clean:
 lint:
 	golangci-lint run

-test: export GOEXPERIMENT=synctest
 test:
 	go test -tags=testing ./...

@@ -54,7 +87,7 @@ fetch-smartctl-conditional:

 # Update build-agent to include conditional .NET build
 build-agent: tidy build-dotnet-conditional fetch-smartctl-conditional
-	GOOS=$(OS) GOARCH=$(ARCH) go build -o ./build/beszel-agent_$(OS)_$(ARCH)$(EXE_EXT) -ldflags "-w -s" ./internal/cmd/agent
+	GOOS=$(OS) GOARCH=$(ARCH) go build $(AGENT_GO_TAGS) -o ./build/beszel-agent_$(OS)_$(ARCH)$(EXE_EXT) -ldflags "-w -s" ./internal/cmd/agent

 build-hub: tidy $(if $(filter false,$(SKIP_WEB)),build-web-ui)
 	GOOS=$(OS) GOARCH=$(ARCH) go build -o ./build/beszel_$(OS)_$(ARCH)$(EXE_EXT) -ldflags "-w -s" ./internal/cmd/hub
@@ -90,9 +123,9 @@ dev-hub:

 dev-agent:
 	@if command -v entr >/dev/null 2>&1; then \
-		find ./internal/cmd/agent/*.go ./agent/*.go | entr -r go run github.com/henrygd/beszel/internal/cmd/agent; \
+		find ./internal/cmd/agent/*.go ./agent/*.go | entr -r go run $(AGENT_GO_TAGS) github.com/henrygd/beszel/internal/cmd/agent; \
 	else \
-		go run github.com/henrygd/beszel/internal/cmd/agent; \
+		go run $(AGENT_GO_TAGS) github.com/henrygd/beszel/internal/cmd/agent; \
 	fi
 	
 build-dotnet:
--- a/agent/agent.go
+++ b/agent/agent.go
@@ -5,19 +5,17 @@
 package agent

 import (
-	"crypto/sha256"
-	"encoding/hex"
 	"log/slog"
-	"os"
-	"path/filepath"
 	"strings"
 	"sync"
+	"time"

 	"github.com/gliderlabs/ssh"
 	"github.com/henrygd/beszel"
 	"github.com/henrygd/beszel/agent/deltatracker"
+	"github.com/henrygd/beszel/agent/utils"
+	"github.com/henrygd/beszel/internal/common"
 	"github.com/henrygd/beszel/internal/entities/system"
-	"github.com/shirou/gopsutil/v4/host"
 	gossh "golang.org/x/crypto/ssh"
 )

@@ -29,12 +27,15 @@ type Agent struct {
 	fsNames                   []string                                              // List of filesystem device names being monitored
 	fsStats                   map[string]*system.FsStats                            // Keeps track of disk stats for each filesystem
 	diskPrev                  map[uint16]map[string]prevDisk                        // Previous disk I/O counters per cache interval
+	diskUsageCacheDuration    time.Duration                                         // How long to cache disk usage (to avoid waking sleeping disks)
+	lastDiskUsageUpdate       time.Time                                             // Last time disk usage was collected
 	netInterfaces             map[string]struct{}                                   // Stores all valid network interfaces
 	netIoStats                map[uint16]system.NetIoStats                          // Keeps track of bandwidth usage per cache interval
 	netInterfaceDeltaTrackers map[uint16]*deltatracker.DeltaTracker[string, uint64] // Per-cache-time NIC delta trackers
 	dockerManager             *dockerManager                                        // Manages Docker API requests
 	sensorConfig              *SensorConfig                                         // Sensors config
-	systemInfo                system.Info                                           // Host system info
+	systemInfo                system.Info                                           // Host system info (dynamic)
+	systemDetails             system.Details                                        // Host system details (static, once-per-connection)
 	gpuManager                *GPUManager                                           // Manages GPU data
 	cache                     *systemDataCache                                      // Cache for system stats based on cache time
 	connectionManager         *ConnectionManager                                    // Channel to signal connection events
@@ -60,17 +61,28 @@ func NewAgent(dataDir ...string) (agent *Agent, err error) {
 	agent.netIoStats = make(map[uint16]system.NetIoStats)
 	agent.netInterfaceDeltaTrackers = make(map[uint16]*deltatracker.DeltaTracker[string, uint64])

-	agent.dataDir, err = getDataDir(dataDir...)
+	agent.dataDir, err = GetDataDir(dataDir...)
 	if err != nil {
 		slog.Warn("Data directory not found")
 	} else {
 		slog.Info("Data directory", "path", agent.dataDir)
 	}

-	agent.memCalc, _ = GetEnv("MEM_CALC")
+	agent.memCalc, _ = utils.GetEnv("MEM_CALC")
 	agent.sensorConfig = agent.newSensorConfig()
+
+	// Parse disk usage cache duration (e.g., "15m", "1h") to avoid waking sleeping disks
+	if diskUsageCache, exists := utils.GetEnv("DISK_USAGE_CACHE"); exists {
+		if duration, err := time.ParseDuration(diskUsageCache); err == nil {
+			agent.diskUsageCacheDuration = duration
+			slog.Info("DISK_USAGE_CACHE", "duration", duration)
+		} else {
+			slog.Warn("Invalid DISK_USAGE_CACHE", "err", err)
+		}
+	}
+
 	// Set up slog with a log level determined by the LOG_LEVEL env var
-	if logLevelStr, exists := GetEnv("LOG_LEVEL"); exists {
+	if logLevelStr, exists := utils.GetEnv("LOG_LEVEL"); exists {
 		switch strings.ToLower(logLevelStr) {
 		case "debug":
 			agent.debug = true
@@ -84,8 +96,21 @@ func NewAgent(dataDir ...string) (agent *Agent, err error) {

 	slog.Debug(beszel.Version)

+	// initialize docker manager
+	agent.dockerManager = newDockerManager()
+
 	// initialize system info
-	agent.initializeSystemInfo()
+	agent.refreshSystemDetails()
+
+	// SMART_INTERVAL env var to update smart data at this interval
+	if smartIntervalEnv, exists := utils.GetEnv("SMART_INTERVAL"); exists {
+		if duration, err := time.ParseDuration(smartIntervalEnv); err == nil && duration > 0 {
+			agent.systemDetails.SmartInterval = duration
+			slog.Info("SMART_INTERVAL", "duration", duration)
+		} else {
+			slog.Warn("Invalid SMART_INTERVAL", "err", err)
+		}
+	}

 	// initialize connection manager
 	agent.connectionManager = newConnectionManager(agent)
@@ -99,9 +124,6 @@ func NewAgent(dataDir ...string) (agent *Agent, err error) {
 	// initialize net io stats
 	agent.initializeNetIoStats()

-	// initialize docker manager
-	agent.dockerManager = newDockerManager(agent)
-
 	agent.systemdManager, err = newSystemdManager()
 	if err != nil {
 		slog.Debug("Systemd", "err", err)
@@ -120,25 +142,17 @@ func NewAgent(dataDir ...string) (agent *Agent, err error) {

 	// if debugging, print stats
 	if agent.debug {
-		slog.Debug("Stats", "data", agent.gatherStats(0))
+		slog.Debug("Stats", "data", agent.gatherStats(common.DataRequestOptions{CacheTimeMs: 60_000, IncludeDetails: true}))
 	}

 	return agent, nil
 }

-// GetEnv retrieves an environment variable with a "BESZEL_AGENT_" prefix, or falls back to the unprefixed key.
-func GetEnv(key string) (value string, exists bool) {
-	if value, exists = os.LookupEnv("BESZEL_AGENT_" + key); exists {
-		return value, exists
-	}
-	// Fallback to the old unprefixed key
-	return os.LookupEnv(key)
-}
-
-func (a *Agent) gatherStats(cacheTimeMs uint16) *system.CombinedData {
+func (a *Agent) gatherStats(options common.DataRequestOptions) *system.CombinedData {
 	a.Lock()
 	defer a.Unlock()

+	cacheTimeMs := options.CacheTimeMs
 	data, isCached := a.cache.Get(cacheTimeMs)
 	if isCached {
 		slog.Debug("Cached data", "cacheTimeMs", cacheTimeMs)
@@ -149,6 +163,12 @@ func (a *Agent) gatherStats(cacheTimeMs uint16) *system.CombinedData {
 		Stats: a.getSystemStats(cacheTimeMs),
 		Info:  a.systemInfo,
 	}
+
+	// Include static system details only when requested
+	if options.IncludeDetails {
+		data.Details = &a.systemDetails
+	}
+
 	// slog.Info("System data", "data", data, "cacheTimeMs", cacheTimeMs)

 	if a.dockerManager != nil {
@@ -184,7 +204,7 @@ func (a *Agent) gatherStats(cacheTimeMs uint16) *system.CombinedData {
 			data.Stats.ExtraFs[key] = stats
 			// Add percentages to Info struct for dashboard
 			if stats.DiskTotal > 0 {
-				pct := twoDecimals((stats.DiskUsed / stats.DiskTotal) * 100)
+				pct := utils.TwoDecimals((stats.DiskUsed / stats.DiskTotal) * 100)
 				data.Info.ExtraFsPct[key] = pct
 			}
 		}
@@ -195,37 +215,12 @@ func (a *Agent) gatherStats(cacheTimeMs uint16) *system.CombinedData {
 	return data
 }

-// StartAgent initializes and starts the agent with optional WebSocket connection
+// Start initializes and starts the agent with optional WebSocket connection
 func (a *Agent) Start(serverOptions ServerOptions) error {
 	a.keys = serverOptions.Keys
 	return a.connectionManager.Start(serverOptions)
 }

 func (a *Agent) getFingerprint() string {
-	// first look for a fingerprint in the data directory
-	if a.dataDir != "" {
-		if fp, err := os.ReadFile(filepath.Join(a.dataDir, "fingerprint")); err == nil {
-			return string(fp)
-		}
-	}
-
-	// if no fingerprint is found, generate one
-	fingerprint, err := host.HostID()
-	if err != nil || fingerprint == "" {
-		fingerprint = a.systemInfo.Hostname + a.systemInfo.CpuModel
-	}
-
-	// hash fingerprint
-	sum := sha256.Sum256([]byte(fingerprint))
-	fingerprint = hex.EncodeToString(sum[:24])
-
-	// save fingerprint to data directory
-	if a.dataDir != "" {
-		err = os.WriteFile(filepath.Join(a.dataDir, "fingerprint"), []byte(fingerprint), 0644)
-		if err != nil {
-			slog.Warn("Failed to save fingerprint", "err", err)
-		}
-	}
-
-	return fingerprint
+	return GetFingerprint(a.dataDir, a.systemDetails.Hostname, a.systemDetails.CpuModel)
 }
--- a/agent/agent_cache_test.go
+++ b/agent/agent_cache_test.go
@@ -1,5 +1,4 @@
 //go:build testing
-// +build testing

 package agent

@@ -22,7 +21,7 @@ func createTestCacheData() *system.CombinedData {
 			DiskTotal: 100000,
 		},
 		Info: system.Info{
-			Hostname: "test-host",
+			AgentVersion: "0.12.0",
 		},
 		Containers: []*container.Stats{
 			{
@@ -128,7 +127,7 @@ func TestCacheMultipleIntervals(t *testing.T) {
 				Mem: 16384,
 			},
 			Info: system.Info{
-				Hostname: "test-host-2",
+				AgentVersion: "0.12.0",
 			},
 			Containers: []*container.Stats{},
 		}
@@ -171,7 +170,7 @@ func TestCacheOverwrite(t *testing.T) {
 			Mem: 32768,
 		},
 		Info: system.Info{
-			Hostname: "updated-host",
+			AgentVersion: "0.12.0",
 		},
 		Containers: []*container.Stats{},
 	}
--- a/agent/agent_test_helpers.go
+++ b/agent/agent_test_helpers.go
@@ -1,5 +1,4 @@
 //go:build testing
-// +build testing

 package agent

--- a/agent/battery/battery.go
+++ b/agent/battery/battery.go
@@ -65,7 +65,7 @@ func GetBatteryStats() (batteryPercent uint8, batteryState uint8, err error) {
 			continue
 		}
 		totalCapacity += bat.Full
-		totalCharge += bat.Current
+		totalCharge += min(bat.Current, bat.Full)
 		if bat.State.Raw >= 0 {
 			batteryState = uint8(bat.State.Raw)
 		}
--- a/agent/client.go
+++ b/agent/client.go
@@ -14,10 +14,8 @@ import (
 	"time"

 	"github.com/henrygd/beszel"
+	"github.com/henrygd/beszel/agent/utils"
 	"github.com/henrygd/beszel/internal/common"
-	"github.com/henrygd/beszel/internal/entities/smart"
-	"github.com/henrygd/beszel/internal/entities/system"
-	"github.com/henrygd/beszel/internal/entities/systemd"

 	"github.com/fxamacker/cbor/v2"
 	"github.com/lxzan/gws"
@@ -46,7 +44,7 @@ type WebSocketClient struct {
 // newWebSocketClient creates a new WebSocket client for the given agent.
 // It reads configuration from environment variables and validates the hub URL.
 func newWebSocketClient(agent *Agent) (client *WebSocketClient, err error) {
-	hubURLStr, exists := GetEnv("HUB_URL")
+	hubURLStr, exists := utils.GetEnv("HUB_URL")
 	if !exists {
 		return nil, errors.New("HUB_URL environment variable not set")
 	}
@@ -75,12 +73,12 @@ func newWebSocketClient(agent *Agent) (client *WebSocketClient, err error) {
 // If neither is set, it returns an error.
 func getToken() (string, error) {
 	// get token from env var
-	token, _ := GetEnv("TOKEN")
+	token, _ := utils.GetEnv("TOKEN")
 	if token != "" {
 		return token, nil
 	}
 	// get token from file
-	tokenFile, _ := GetEnv("TOKEN_FILE")
+	tokenFile, _ := utils.GetEnv("TOKEN_FILE")
 	if tokenFile == "" {
 		return "", errors.New("must set TOKEN or TOKEN_FILE")
 	}
@@ -200,8 +198,8 @@ func (client *WebSocketClient) handleAuthChallenge(msg *common.HubRequest[cbor.R
 	}

 	if authRequest.NeedSysInfo {
-		response.Name, _ = GetEnv("SYSTEM_NAME")
-		response.Hostname = client.agent.systemInfo.Hostname
+		response.Name, _ = utils.GetEnv("SYSTEM_NAME")
+		response.Hostname = client.agent.systemDetails.Hostname
 		serverAddr := client.agent.connectionManager.serverOptions.Addr
 		_, response.Port, _ = net.SplitHostPort(serverAddr)
 	}
@@ -259,40 +257,16 @@ func (client *WebSocketClient) sendMessage(data any) error {
 	return err
 }

-// sendResponse sends a response with optional request ID for the new protocol
+// sendResponse sends a response with optional request ID.
+// For ID-based requests, we must populate legacy typed fields for backward
+// compatibility with older hubs (<= 0.17) that don't read the generic Data field.
 func (client *WebSocketClient) sendResponse(data any, requestID *uint32) error {
 	if requestID != nil {
-		// New format with ID - use typed fields
-		response := common.AgentResponse{
-			Id: requestID,
-		}
-
-		// Set the appropriate typed field based on data type
-		switch v := data.(type) {
-		case *system.CombinedData:
-			response.SystemData = v
-		case *common.FingerprintResponse:
-			response.Fingerprint = v
-		case string:
-			response.String = &v
-		case map[string]smart.SmartData:
-			response.SmartData = v
-		case systemd.ServiceDetails:
-			response.ServiceInfo = v
-		// case []byte:
-		// 	response.RawBytes = v
-		// case string:
-		// 	response.RawBytes = []byte(v)
-		default:
-			// For any other type, convert to error
-			response.Error = fmt.Sprintf("unsupported response type: %T", data)
-		}
-
+		response := newAgentResponse(data, requestID)
 		return client.sendMessage(response)
-	} else {
-		// Legacy format - send data directly
-		return client.sendMessage(data)
 	}
+	// Legacy format - send data directly
+	return client.sendMessage(data)
 }

 // getUserAgent returns one of two User-Agent strings based on current time.
--- a/agent/client_test.go
+++ b/agent/client_test.go
@@ -1,5 +1,4 @@
 //go:build testing
-// +build testing

 package agent

@@ -71,19 +70,11 @@ func TestNewWebSocketClient(t *testing.T) {
 		t.Run(tc.name, func(t *testing.T) {
 			// Set up environment
 			if tc.hubURL != "" {
-				os.Setenv("BESZEL_AGENT_HUB_URL", tc.hubURL)
-			} else {
-				os.Unsetenv("BESZEL_AGENT_HUB_URL")
+				t.Setenv("BESZEL_AGENT_HUB_URL", tc.hubURL)
 			}
 			if tc.token != "" {
-				os.Setenv("BESZEL_AGENT_TOKEN", tc.token)
-			} else {
-				os.Unsetenv("BESZEL_AGENT_TOKEN")
+				t.Setenv("BESZEL_AGENT_TOKEN", tc.token)
 			}
-			defer func() {
-				os.Unsetenv("BESZEL_AGENT_HUB_URL")
-				os.Unsetenv("BESZEL_AGENT_TOKEN")
-			}()

 			client, err := newWebSocketClient(agent)

@@ -139,12 +130,8 @@ func TestWebSocketClient_GetOptions(t *testing.T) {
 	for _, tc := range testCases {
 		t.Run(tc.name, func(t *testing.T) {
 			// Set up environment
-			os.Setenv("BESZEL_AGENT_HUB_URL", tc.inputURL)
-			os.Setenv("BESZEL_AGENT_TOKEN", "test-token")
-			defer func() {
-				os.Unsetenv("BESZEL_AGENT_HUB_URL")
-				os.Unsetenv("BESZEL_AGENT_TOKEN")
-			}()
+			t.Setenv("BESZEL_AGENT_HUB_URL", tc.inputURL)
+			t.Setenv("BESZEL_AGENT_TOKEN", "test-token")

 			client, err := newWebSocketClient(agent)
 			require.NoError(t, err)
@@ -186,12 +173,8 @@ func TestWebSocketClient_VerifySignature(t *testing.T) {
 	require.NoError(t, err)

 	// Set up environment
-	os.Setenv("BESZEL_AGENT_HUB_URL", "http://localhost:8080")
-	os.Setenv("BESZEL_AGENT_TOKEN", "test-token")
-	defer func() {
-		os.Unsetenv("BESZEL_AGENT_HUB_URL")
-		os.Unsetenv("BESZEL_AGENT_TOKEN")
-	}()
+	t.Setenv("BESZEL_AGENT_HUB_URL", "http://localhost:8080")
+	t.Setenv("BESZEL_AGENT_TOKEN", "test-token")

 	client, err := newWebSocketClient(agent)
 	require.NoError(t, err)
@@ -259,12 +242,8 @@ func TestWebSocketClient_HandleHubRequest(t *testing.T) {
 	agent := createTestAgent(t)

 	// Set up environment
-	os.Setenv("BESZEL_AGENT_HUB_URL", "http://localhost:8080")
-	os.Setenv("BESZEL_AGENT_TOKEN", "test-token")
-	defer func() {
-		os.Unsetenv("BESZEL_AGENT_HUB_URL")
-		os.Unsetenv("BESZEL_AGENT_TOKEN")
-	}()
+	t.Setenv("BESZEL_AGENT_HUB_URL", "http://localhost:8080")
+	t.Setenv("BESZEL_AGENT_TOKEN", "test-token")

 	client, err := newWebSocketClient(agent)
 	require.NoError(t, err)
@@ -351,13 +330,8 @@ func TestGetUserAgent(t *testing.T) {
 func TestWebSocketClient_Close(t *testing.T) {
 	agent := createTestAgent(t)

-	// Set up environment
-	os.Setenv("BESZEL_AGENT_HUB_URL", "http://localhost:8080")
-	os.Setenv("BESZEL_AGENT_TOKEN", "test-token")
-	defer func() {
-		os.Unsetenv("BESZEL_AGENT_HUB_URL")
-		os.Unsetenv("BESZEL_AGENT_TOKEN")
-	}()
+	t.Setenv("BESZEL_AGENT_HUB_URL", "http://localhost:8080")
+	t.Setenv("BESZEL_AGENT_TOKEN", "test-token")

 	client, err := newWebSocketClient(agent)
 	require.NoError(t, err)
@@ -372,13 +346,8 @@ func TestWebSocketClient_Close(t *testing.T) {
 func TestWebSocketClient_ConnectRateLimit(t *testing.T) {
 	agent := createTestAgent(t)

-	// Set up environment
-	os.Setenv("BESZEL_AGENT_HUB_URL", "http://localhost:8080")
-	os.Setenv("BESZEL_AGENT_TOKEN", "test-token")
-	defer func() {
-		os.Unsetenv("BESZEL_AGENT_HUB_URL")
-		os.Unsetenv("BESZEL_AGENT_TOKEN")
-	}()
+	t.Setenv("BESZEL_AGENT_HUB_URL", "http://localhost:8080")
+	t.Setenv("BESZEL_AGENT_TOKEN", "test-token")

 	client, err := newWebSocketClient(agent)
 	require.NoError(t, err)
@@ -394,20 +363,10 @@ func TestWebSocketClient_ConnectRateLimit(t *testing.T) {

 // TestGetToken tests the getToken function with various scenarios
 func TestGetToken(t *testing.T) {
-	unsetEnvVars := func() {
-		os.Unsetenv("BESZEL_AGENT_TOKEN")
-		os.Unsetenv("TOKEN")
-		os.Unsetenv("BESZEL_AGENT_TOKEN_FILE")
-		os.Unsetenv("TOKEN_FILE")
-	}
-
 	t.Run("token from TOKEN environment variable", func(t *testing.T) {
-		unsetEnvVars()
-
 		// Set TOKEN env var
 		expectedToken := "test-token-from-env"
-		os.Setenv("TOKEN", expectedToken)
-		defer os.Unsetenv("TOKEN")
+		t.Setenv("TOKEN", expectedToken)

 		token, err := getToken()
 		assert.NoError(t, err)
@@ -415,12 +374,9 @@ func TestGetToken(t *testing.T) {
 	})

 	t.Run("token from BESZEL_AGENT_TOKEN environment variable", func(t *testing.T) {
-		unsetEnvVars()
-
 		// Set BESZEL_AGENT_TOKEN env var (should take precedence)
 		expectedToken := "test-token-from-beszel-env"
-		os.Setenv("BESZEL_AGENT_TOKEN", expectedToken)
-		defer os.Unsetenv("BESZEL_AGENT_TOKEN")
+		t.Setenv("BESZEL_AGENT_TOKEN", expectedToken)

 		token, err := getToken()
 		assert.NoError(t, err)
@@ -428,8 +384,6 @@ func TestGetToken(t *testing.T) {
 	})

 	t.Run("token from TOKEN_FILE", func(t *testing.T) {
-		unsetEnvVars()
-
 		// Create a temporary token file
 		expectedToken := "test-token-from-file"
 		tokenFile, err := os.CreateTemp("", "token-test-*.txt")
@@ -441,8 +395,7 @@ func TestGetToken(t *testing.T) {
 		tokenFile.Close()

 		// Set TOKEN_FILE env var
-		os.Setenv("TOKEN_FILE", tokenFile.Name())
-		defer os.Unsetenv("TOKEN_FILE")
+		t.Setenv("TOKEN_FILE", tokenFile.Name())

 		token, err := getToken()
 		assert.NoError(t, err)
@@ -450,8 +403,6 @@ func TestGetToken(t *testing.T) {
 	})

 	t.Run("token from BESZEL_AGENT_TOKEN_FILE", func(t *testing.T) {
-		unsetEnvVars()
-
 		// Create a temporary token file
 		expectedToken := "test-token-from-beszel-file"
 		tokenFile, err := os.CreateTemp("", "token-test-*.txt")
@@ -463,8 +414,7 @@ func TestGetToken(t *testing.T) {
 		tokenFile.Close()

 		// Set BESZEL_AGENT_TOKEN_FILE env var (should take precedence)
-		os.Setenv("BESZEL_AGENT_TOKEN_FILE", tokenFile.Name())
-		defer os.Unsetenv("BESZEL_AGENT_TOKEN_FILE")
+		t.Setenv("BESZEL_AGENT_TOKEN_FILE", tokenFile.Name())

 		token, err := getToken()
 		assert.NoError(t, err)
@@ -472,8 +422,6 @@ func TestGetToken(t *testing.T) {
 	})

 	t.Run("TOKEN takes precedence over TOKEN_FILE", func(t *testing.T) {
-		unsetEnvVars()
-
 		// Create a temporary token file
 		fileToken := "token-from-file"
 		tokenFile, err := os.CreateTemp("", "token-test-*.txt")
@@ -486,12 +434,8 @@ func TestGetToken(t *testing.T) {

 		// Set both TOKEN and TOKEN_FILE
 		envToken := "token-from-env"
-		os.Setenv("TOKEN", envToken)
-		os.Setenv("TOKEN_FILE", tokenFile.Name())
-		defer func() {
-			os.Unsetenv("TOKEN")
-			os.Unsetenv("TOKEN_FILE")
-		}()
+		t.Setenv("TOKEN", envToken)
+		t.Setenv("TOKEN_FILE", tokenFile.Name())

 		token, err := getToken()
 		assert.NoError(t, err)
@@ -499,7 +443,10 @@ func TestGetToken(t *testing.T) {
 	})

 	t.Run("error when neither TOKEN nor TOKEN_FILE is set", func(t *testing.T) {
-		unsetEnvVars()
+		t.Setenv("BESZEL_AGENT_TOKEN", "")
+		t.Setenv("TOKEN", "")
+		t.Setenv("BESZEL_AGENT_TOKEN_FILE", "")
+		t.Setenv("TOKEN_FILE", "")

 		token, err := getToken()
 		assert.Error(t, err)
@@ -508,11 +455,8 @@ func TestGetToken(t *testing.T) {
 	})

 	t.Run("error when TOKEN_FILE points to non-existent file", func(t *testing.T) {
-		unsetEnvVars()
-
 		// Set TOKEN_FILE to a non-existent file
-		os.Setenv("TOKEN_FILE", "/non/existent/file.txt")
-		defer os.Unsetenv("TOKEN_FILE")
+		t.Setenv("TOKEN_FILE", "/non/existent/file.txt")

 		token, err := getToken()
 		assert.Error(t, err)
@@ -521,8 +465,6 @@ func TestGetToken(t *testing.T) {
 	})

 	t.Run("handles empty token file", func(t *testing.T) {
-		unsetEnvVars()
-
 		// Create an empty token file
 		tokenFile, err := os.CreateTemp("", "token-test-*.txt")
 		require.NoError(t, err)
@@ -530,8 +472,7 @@ func TestGetToken(t *testing.T) {
 		tokenFile.Close()

 		// Set TOKEN_FILE env var
-		os.Setenv("TOKEN_FILE", tokenFile.Name())
-		defer os.Unsetenv("TOKEN_FILE")
+		t.Setenv("TOKEN_FILE", tokenFile.Name())

 		token, err := getToken()
 		assert.NoError(t, err)
@@ -539,8 +480,6 @@ func TestGetToken(t *testing.T) {
 	})

 	t.Run("strips whitespace from TOKEN_FILE", func(t *testing.T) {
-		unsetEnvVars()
-
 		tokenWithWhitespace := "  test-token-with-whitespace  \n\t"
 		expectedToken := "test-token-with-whitespace"
 		tokenFile, err := os.CreateTemp("", "token-test-*.txt")
@@ -551,8 +490,7 @@ func TestGetToken(t *testing.T) {
 		require.NoError(t, err)
 		tokenFile.Close()

-		os.Setenv("TOKEN_FILE", tokenFile.Name())
-		defer os.Unsetenv("TOKEN_FILE")
+		t.Setenv("TOKEN_FILE", tokenFile.Name())

 		token, err := getToken()
 		assert.NoError(t, err)
--- a/agent/connection_manager.go
+++ b/agent/connection_manager.go
@@ -1,9 +1,9 @@
 package agent

 import (
+	"context"
 	"errors"
 	"log/slog"
-	"os"
 	"os/signal"
 	"syscall"
 	"time"
@@ -91,8 +91,8 @@ func (c *ConnectionManager) Start(serverOptions ServerOptions) error {
 	c.eventChan = make(chan ConnectionEvent, 1)

 	// signal handling for shutdown
-	sigChan := make(chan os.Signal, 1)
-	signal.Notify(sigChan, syscall.SIGINT, syscall.SIGTERM)
+	sigCtx, stopSignals := signal.NotifyContext(context.Background(), syscall.SIGINT, syscall.SIGTERM)
+	defer stopSignals()

 	c.startWsTicker()
 	c.connect()
@@ -109,8 +109,8 @@ func (c *ConnectionManager) Start(serverOptions ServerOptions) error {
 			_ = c.startWebSocketConnection()
 		case <-healthTicker:
 			_ = health.Update()
-		case <-sigChan:
-			slog.Info("Shutting down")
+		case <-sigCtx.Done():
+			slog.Info("Shutting down", "cause", context.Cause(sigCtx))
 			_ = c.agent.StopServer()
 			c.closeWebSocket()
 			return health.CleanUp()
--- a/agent/connection_manager_test.go
+++ b/agent/connection_manager_test.go
@@ -1,5 +1,4 @@
 //go:build testing
-// +build testing

 package agent

@@ -8,7 +7,6 @@ import (
 	"fmt"
 	"net"
 	"net/url"
-	"os"
 	"testing"
 	"time"

@@ -184,10 +182,6 @@ func TestConnectionManager_TickerManagement(t *testing.T) {

 // TestConnectionManager_WebSocketConnectionFlow tests WebSocket connection logic
 func TestConnectionManager_WebSocketConnectionFlow(t *testing.T) {
-	if testing.Short() {
-		t.Skip("Skipping WebSocket connection test in short mode")
-	}
-
 	agent := createTestAgent(t)
 	cm := agent.connectionManager

@@ -197,19 +191,18 @@ func TestConnectionManager_WebSocketConnectionFlow(t *testing.T) {
 	assert.Equal(t, Disconnected, cm.State, "State should remain Disconnected after failed connection")

 	// Test with invalid URL
-	os.Setenv("BESZEL_AGENT_HUB_URL", "invalid-url")
-	os.Setenv("BESZEL_AGENT_TOKEN", "test-token")
-	defer func() {
-		os.Unsetenv("BESZEL_AGENT_HUB_URL")
-		os.Unsetenv("BESZEL_AGENT_TOKEN")
-	}()
-
-	// Test with missing token
-	os.Setenv("BESZEL_AGENT_HUB_URL", "http://localhost:8080")
-	os.Unsetenv("BESZEL_AGENT_TOKEN")
+	t.Setenv("BESZEL_AGENT_HUB_URL", "1,33%")
+	t.Setenv("BESZEL_AGENT_TOKEN", "test-token")

 	_, err2 := newWebSocketClient(agent)
-	assert.Error(t, err2, "WebSocket client creation should fail without token")
+	assert.Error(t, err2, "WebSocket client creation should fail with invalid URL")
+
+	// Test with missing token
+	t.Setenv("BESZEL_AGENT_HUB_URL", "http://localhost:8080")
+	t.Setenv("BESZEL_AGENT_TOKEN", "")
+
+	_, err3 := newWebSocketClient(agent)
+	assert.Error(t, err3, "WebSocket client creation should fail without token")
 }

 // TestConnectionManager_ReconnectionLogic tests reconnection prevention logic
@@ -235,12 +228,8 @@ func TestConnectionManager_ConnectWithRateLimit(t *testing.T) {
 	cm := agent.connectionManager

 	// Set up environment for WebSocket client creation
-	os.Setenv("BESZEL_AGENT_HUB_URL", "ws://localhost:8080")
-	os.Setenv("BESZEL_AGENT_TOKEN", "test-token")
-	defer func() {
-		os.Unsetenv("BESZEL_AGENT_HUB_URL")
-		os.Unsetenv("BESZEL_AGENT_TOKEN")
-	}()
+	t.Setenv("BESZEL_AGENT_HUB_URL", "ws://localhost:8080")
+	t.Setenv("BESZEL_AGENT_TOKEN", "test-token")

 	// Create WebSocket client
 	wsClient, err := newWebSocketClient(agent)
@@ -286,12 +275,8 @@ func TestConnectionManager_CloseWebSocket(t *testing.T) {
 	}, "Should not panic when closing nil WebSocket client")

 	// Set up environment and create WebSocket client
-	os.Setenv("BESZEL_AGENT_HUB_URL", "ws://localhost:8080")
-	os.Setenv("BESZEL_AGENT_TOKEN", "test-token")
-	defer func() {
-		os.Unsetenv("BESZEL_AGENT_HUB_URL")
-		os.Unsetenv("BESZEL_AGENT_TOKEN")
-	}()
+	t.Setenv("BESZEL_AGENT_HUB_URL", "ws://localhost:8080")
+	t.Setenv("BESZEL_AGENT_TOKEN", "test-token")

 	wsClient, err := newWebSocketClient(agent)
 	require.NoError(t, err)
--- a/agent/cpu.go
+++ b/agent/cpu.go
@@ -14,10 +14,10 @@ var lastPerCoreCpuTimes = make(map[uint16][]cpu.TimesStat)
 // init initializes the CPU monitoring by storing the initial CPU times
 // for the default 60-second cache interval.
 func init() {
-	if times, err := cpu.Times(false); err == nil {
+	if times, err := cpu.Times(false); err == nil && len(times) > 0 {
 		lastCpuTimes[60000] = times[0]
 	}
-	if perCoreTimes, err := cpu.Times(true); err == nil {
+	if perCoreTimes, err := cpu.Times(true); err == nil && len(perCoreTimes) > 0 {
 		lastPerCoreCpuTimes[60000] = perCoreTimes
 	}
 }
@@ -89,10 +89,7 @@ func getPerCoreCpuUsage(cacheTimeMs uint16) (system.Uint8Slice, error) {
 	lastTimes := lastPerCoreCpuTimes[cacheTimeMs]

 	// Limit to the number of cores available in both samples
-	length := len(perCoreTimes)
-	if len(lastTimes) < length {
-		length = len(lastTimes)
-	}
+	length := min(len(lastTimes), len(perCoreTimes))

 	usage := make([]uint8, length)
 	for i := 0; i < length; i++ {
--- a/agent/data_dir.go
+++ b/agent/data_dir.go
@@ -6,17 +6,19 @@ import (
 	"os"
 	"path/filepath"
 	"runtime"
+
+	"github.com/henrygd/beszel/agent/utils"
 )

-// getDataDir returns the path to the data directory for the agent and an error
+// GetDataDir returns the path to the data directory for the agent and an error
 // if the directory is not valid. Attempts to find the optimal data directory if
 // no data directories are provided.
-func getDataDir(dataDirs ...string) (string, error) {
+func GetDataDir(dataDirs ...string) (string, error) {
 	if len(dataDirs) > 0 {
 		return testDataDirs(dataDirs)
 	}

-	dataDir, _ := GetEnv("DATA_DIR")
+	dataDir, _ := utils.GetEnv("DATA_DIR")
 	if dataDir != "" {
 		dataDirs = append(dataDirs, dataDir)
 	}
--- a/agent/data_dir_test.go
+++ b/agent/data_dir_test.go
@@ -1,5 +1,4 @@
 //go:build testing
-// +build testing

 package agent

@@ -17,7 +16,7 @@ func TestGetDataDir(t *testing.T) {
 	// Test with explicit dataDir parameter
 	t.Run("explicit data dir", func(t *testing.T) {
 		tempDir := t.TempDir()
-		result, err := getDataDir(tempDir)
+		result, err := GetDataDir(tempDir)
 		require.NoError(t, err)
 		assert.Equal(t, tempDir, result)
 	})
@@ -26,7 +25,7 @@ func TestGetDataDir(t *testing.T) {
 	t.Run("explicit data dir - create new", func(t *testing.T) {
 		tempDir := t.TempDir()
 		newDir := filepath.Join(tempDir, "new-data-dir")
-		result, err := getDataDir(newDir)
+		result, err := GetDataDir(newDir)
 		require.NoError(t, err)
 		assert.Equal(t, newDir, result)

@@ -40,19 +39,9 @@ func TestGetDataDir(t *testing.T) {
 	t.Run("DATA_DIR environment variable", func(t *testing.T) {
 		tempDir := t.TempDir()

-		// Set environment variable
-		oldValue := os.Getenv("DATA_DIR")
-		defer func() {
-			if oldValue == "" {
-				os.Unsetenv("BESZEL_AGENT_DATA_DIR")
-			} else {
-				os.Setenv("BESZEL_AGENT_DATA_DIR", oldValue)
-			}
-		}()
+		t.Setenv("BESZEL_AGENT_DATA_DIR", tempDir)

-		os.Setenv("BESZEL_AGENT_DATA_DIR", tempDir)
-
-		result, err := getDataDir()
+		result, err := GetDataDir()
 		require.NoError(t, err)
 		assert.Equal(t, tempDir, result)
 	})
@@ -60,26 +49,15 @@ func TestGetDataDir(t *testing.T) {
 	// Test with invalid explicit dataDir
 	t.Run("invalid explicit data dir", func(t *testing.T) {
 		invalidPath := "/invalid/path/that/cannot/be/created"
-		_, err := getDataDir(invalidPath)
+		_, err := GetDataDir(invalidPath)
 		assert.Error(t, err)
 	})

 	// Test fallback behavior (empty dataDir, no env var)
 	t.Run("fallback to default directories", func(t *testing.T) {
-		// Clear DATA_DIR environment variable
-		oldValue := os.Getenv("DATA_DIR")
-		defer func() {
-			if oldValue == "" {
-				os.Unsetenv("DATA_DIR")
-			} else {
-				os.Setenv("DATA_DIR", oldValue)
-			}
-		}()
-		os.Unsetenv("DATA_DIR")
-
 		// This will try platform-specific defaults, which may or may not work
 		// We're mainly testing that it doesn't panic and returns some result
-		result, err := getDataDir()
+		result, err := GetDataDir()
 		// We don't assert success/failure here since it depends on system permissions
 		// Just verify we get a string result if no error
 		if err == nil {
--- a/agent/disk.go
+++ b/agent/disk.go
@@ -8,11 +8,31 @@ import (
 	"strings"
 	"time"

+	"github.com/henrygd/beszel/agent/utils"
 	"github.com/henrygd/beszel/internal/entities/system"

 	"github.com/shirou/gopsutil/v4/disk"
 )

+// fsRegistrationContext holds the shared lookup state needed to resolve a
+// filesystem into the tracked fsStats key and metadata.
+type fsRegistrationContext struct {
+	filesystem     string // value of optional FILESYSTEM env var
+	isWindows      bool
+	efPath         string // path to extra filesystems (default "/extra-filesystems")
+	diskIoCounters map[string]disk.IOCountersStat
+}
+
+// diskDiscovery groups the transient state for a single initializeDiskInfo run so
+// helper methods can share the same partitions, mount paths, and lookup functions
+type diskDiscovery struct {
+	agent          *Agent
+	rootMountPoint string
+	partitions     []disk.PartitionStat
+	usageFn        func(string) (*disk.UsageStat, error)
+	ctx            fsRegistrationContext
+}
+
 // parseFilesystemEntry parses a filesystem entry in the format "device__customname"
 // Returns the device/filesystem part and the custom name part
 func parseFilesystemEntry(entry string) (device, customName string) {
@@ -26,10 +46,230 @@ func parseFilesystemEntry(entry string) (device, customName string) {
 	return device, customName
 }

+// extraFilesystemPartitionInfo derives the I/O device and optional display name
+// for a mounted /extra-filesystems partition. Prefer the partition device reported
+// by the system and only use the folder name for custom naming metadata.
+func extraFilesystemPartitionInfo(p disk.PartitionStat) (device, customName string) {
+	device = strings.TrimSpace(p.Device)
+	folderDevice, customName := parseFilesystemEntry(filepath.Base(p.Mountpoint))
+	if device == "" {
+		device = folderDevice
+	}
+	return device, customName
+}
+
+func isDockerSpecialMountpoint(mountpoint string) bool {
+	switch mountpoint {
+	case "/etc/hosts", "/etc/resolv.conf", "/etc/hostname":
+		return true
+	}
+	return false
+}
+
+// registerFilesystemStats resolves the tracked key and stats payload for a
+// filesystem before it is inserted into fsStats.
+func registerFilesystemStats(existing map[string]*system.FsStats, device, mountpoint string, root bool, customName string, ctx fsRegistrationContext) (string, *system.FsStats, bool) {
+	key := device
+	if !ctx.isWindows {
+		key = filepath.Base(device)
+	}
+
+	if root {
+		// Try to map root device to a diskIoCounters entry. First checks for an
+		// exact key match, then uses findIoDevice for normalized / prefix-based
+		// matching (e.g. nda0p2 -> nda0), and finally falls back to FILESYSTEM.
+		if _, ioMatch := ctx.diskIoCounters[key]; !ioMatch {
+			if matchedKey, match := findIoDevice(key, ctx.diskIoCounters); match {
+				key = matchedKey
+			} else if ctx.filesystem != "" {
+				if matchedKey, match := findIoDevice(ctx.filesystem, ctx.diskIoCounters); match {
+					key = matchedKey
+				}
+			}
+			if _, ioMatch = ctx.diskIoCounters[key]; !ioMatch {
+				slog.Warn("Root I/O unmapped; set FILESYSTEM", "device", device, "mountpoint", mountpoint)
+			}
+		}
+	} else {
+		// Check if non-root has diskstats and prefer the folder device for
+		// /extra-filesystems mounts when the discovered partition device is a
+		// mapper path (e.g. luks UUID) that obscures the underlying block device.
+		if _, ioMatch := ctx.diskIoCounters[key]; !ioMatch {
+			if strings.HasPrefix(mountpoint, ctx.efPath) {
+				folderDevice, _ := parseFilesystemEntry(filepath.Base(mountpoint))
+				if folderDevice != "" {
+					if matchedKey, match := findIoDevice(folderDevice, ctx.diskIoCounters); match {
+						key = matchedKey
+					}
+				}
+			}
+			if _, ioMatch = ctx.diskIoCounters[key]; !ioMatch {
+				if matchedKey, match := findIoDevice(key, ctx.diskIoCounters); match {
+					key = matchedKey
+				}
+			}
+		}
+	}
+
+	if _, exists := existing[key]; exists {
+		return "", nil, false
+	}
+
+	fsStats := &system.FsStats{Root: root, Mountpoint: mountpoint}
+	if customName != "" {
+		fsStats.Name = customName
+	}
+	return key, fsStats, true
+}
+
+// addFsStat inserts a discovered filesystem if it resolves to a new tracking
+// key. The key selection itself lives in buildFsStatRegistration so that logic
+// can stay directly unit-tested.
+func (d *diskDiscovery) addFsStat(device, mountpoint string, root bool, customName string) {
+	key, fsStats, ok := registerFilesystemStats(d.agent.fsStats, device, mountpoint, root, customName, d.ctx)
+	if !ok {
+		return
+	}
+	d.agent.fsStats[key] = fsStats
+	name := key
+	if customName != "" {
+		name = customName
+	}
+	slog.Info("Detected disk", "name", name, "device", device, "mount", mountpoint, "io", key, "root", root)
+}
+
+// addConfiguredRootFs resolves FILESYSTEM against partitions first, then falls
+// back to direct diskstats matching for setups like ZFS where partitions do not
+// expose the physical device name.
+func (d *diskDiscovery) addConfiguredRootFs() bool {
+	if d.ctx.filesystem == "" {
+		return false
+	}
+
+	for _, p := range d.partitions {
+		if filesystemMatchesPartitionSetting(d.ctx.filesystem, p) {
+			d.addFsStat(p.Device, p.Mountpoint, true, "")
+			return true
+		}
+	}
+
+	// FILESYSTEM may name a physical disk absent from partitions (e.g. ZFS lists
+	// dataset paths like zroot/ROOT/default, not block devices).
+	if ioKey, match := findIoDevice(d.ctx.filesystem, d.ctx.diskIoCounters); match {
+		d.agent.fsStats[ioKey] = &system.FsStats{Root: true, Mountpoint: d.rootMountPoint}
+		return true
+	}
+
+	slog.Warn("Partition details not found", "filesystem", d.ctx.filesystem)
+	return false
+}
+
+func isRootFallbackPartition(p disk.PartitionStat, rootMountPoint string) bool {
+	return p.Mountpoint == rootMountPoint ||
+		(isDockerSpecialMountpoint(p.Mountpoint) && strings.HasPrefix(p.Device, "/dev"))
+}
+
+// addPartitionRootFs handles the non-configured root fallback path when a
+// partition looks like the active root mount but still needs translating to an
+// I/O device key.
+func (d *diskDiscovery) addPartitionRootFs(device, mountpoint string) bool {
+	fs, match := findIoDevice(filepath.Base(device), d.ctx.diskIoCounters)
+	if !match {
+		return false
+	}
+	// The resolved I/O device is already known here, so use it directly to avoid
+	// a second fallback search inside buildFsStatRegistration.
+	d.addFsStat(fs, mountpoint, true, "")
+	return true
+}
+
+// addLastResortRootFs is only used when neither FILESYSTEM nor partition-based
+// heuristics can identify root, so it picks the busiest I/O device as a final
+// fallback and preserves the root mountpoint for usage collection.
+func (d *diskDiscovery) addLastResortRootFs() {
+	rootKey := mostActiveIoDevice(d.ctx.diskIoCounters)
+	if rootKey != "" {
+		slog.Warn("Using most active device for root I/O; set FILESYSTEM to override", "device", rootKey)
+	} else {
+		rootKey = filepath.Base(d.rootMountPoint)
+		if _, exists := d.agent.fsStats[rootKey]; exists {
+			rootKey = "root"
+		}
+		slog.Warn("Root I/O device not detected; set FILESYSTEM to override")
+	}
+	d.agent.fsStats[rootKey] = &system.FsStats{Root: true, Mountpoint: d.rootMountPoint}
+}
+
+// findPartitionByFilesystemSetting matches an EXTRA_FILESYSTEMS entry against a
+// discovered partition either by mountpoint or by device suffix.
+func findPartitionByFilesystemSetting(filesystem string, partitions []disk.PartitionStat) (disk.PartitionStat, bool) {
+	for _, p := range partitions {
+		if strings.HasSuffix(p.Device, filesystem) || p.Mountpoint == filesystem {
+			return p, true
+		}
+	}
+	return disk.PartitionStat{}, false
+}
+
+// addConfiguredExtraFsEntry resolves one EXTRA_FILESYSTEMS entry, preferring a
+// discovered partition and falling back to any path that disk.Usage accepts.
+func (d *diskDiscovery) addConfiguredExtraFsEntry(filesystem, customName string) {
+	if p, found := findPartitionByFilesystemSetting(filesystem, d.partitions); found {
+		d.addFsStat(p.Device, p.Mountpoint, false, customName)
+		return
+	}
+
+	if _, err := d.usageFn(filesystem); err == nil {
+		d.addFsStat(filepath.Base(filesystem), filesystem, false, customName)
+		return
+	} else {
+		slog.Error("Invalid filesystem", "name", filesystem, "err", err)
+	}
+}
+
+// addConfiguredExtraFilesystems parses and registers the comma-separated
+// EXTRA_FILESYSTEMS env var entries.
+func (d *diskDiscovery) addConfiguredExtraFilesystems(extraFilesystems string) {
+	for fsEntry := range strings.SplitSeq(extraFilesystems, ",") {
+		filesystem, customName := parseFilesystemEntry(fsEntry)
+		d.addConfiguredExtraFsEntry(filesystem, customName)
+	}
+}
+
+// addPartitionExtraFs registers partitions mounted under /extra-filesystems so
+// their display names can come from the folder name while their I/O keys still
+// prefer the underlying partition device.
+func (d *diskDiscovery) addPartitionExtraFs(p disk.PartitionStat) {
+	if !strings.HasPrefix(p.Mountpoint, d.ctx.efPath) {
+		return
+	}
+	device, customName := extraFilesystemPartitionInfo(p)
+	d.addFsStat(device, p.Mountpoint, false, customName)
+}
+
+// addExtraFilesystemFolders handles bare directories under /extra-filesystems
+// that may not appear in partition discovery, while skipping mountpoints that
+// were already registered from higher-fidelity sources.
+func (d *diskDiscovery) addExtraFilesystemFolders(folderNames []string) {
+	existingMountpoints := make(map[string]bool, len(d.agent.fsStats))
+	for _, stats := range d.agent.fsStats {
+		existingMountpoints[stats.Mountpoint] = true
+	}
+
+	for _, folderName := range folderNames {
+		mountpoint := filepath.Join(d.ctx.efPath, folderName)
+		slog.Debug("/extra-filesystems", "mountpoint", mountpoint)
+		if existingMountpoints[mountpoint] {
+			continue
+		}
+		device, customName := parseFilesystemEntry(folderName)
+		d.addFsStat(device, mountpoint, false, customName)
+	}
+}
+
 // Sets up the filesystems to monitor for disk usage and I/O.
 func (a *Agent) initializeDiskInfo() {
-	filesystem, _ := GetEnv("FILESYSTEM")
-	efPath := "/extra-filesystems"
+	filesystem, _ := utils.GetEnv("FILESYSTEM")
 	hasRoot := false
 	isWindows := runtime.GOOS == "windows"

@@ -46,167 +286,223 @@ func (a *Agent) initializeDiskInfo() {
 		}
 	}

-	// ioContext := context.WithValue(a.sensorsContext,
-	// 	common.EnvKey, common.EnvMap{common.HostProcEnvKey: "/tmp/testproc"},
-	// )
-	// diskIoCounters, err := disk.IOCountersWithContext(ioContext)
-
 	diskIoCounters, err := disk.IOCounters()
 	if err != nil {
 		slog.Error("Error getting diskstats", "err", err)
 	}
 	slog.Debug("Disk I/O", "diskstats", diskIoCounters)
-
-	// Helper function to add a filesystem to fsStats if it doesn't exist
-	addFsStat := func(device, mountpoint string, root bool, customName ...string) {
-		var key string
-		if isWindows {
-			key = device
-		} else {
-			key = filepath.Base(device)
-		}
-		var ioMatch bool
-		if _, exists := a.fsStats[key]; !exists {
-			if root {
-				slog.Info("Detected root device", "name", key)
-				// Check if root device is in /proc/diskstats, use fallback if not
-				if _, ioMatch = diskIoCounters[key]; !ioMatch {
-					key, ioMatch = findIoDevice(filesystem, diskIoCounters, a.fsStats)
-					if !ioMatch {
-						slog.Info("Using I/O fallback", "device", device, "mountpoint", mountpoint, "fallback", key)
-					}
-				}
-			} else {
-				// Check if non-root has diskstats and fall back to folder name if not
-				// Scenario: device is encrypted and named luks-2bcb02be-999d-4417-8d18-5c61e660fb6e - not in /proc/diskstats.
-				// However, the device can be specified by mounting folder from luks device at /extra-filesystems/sda1
-				if _, ioMatch = diskIoCounters[key]; !ioMatch {
-					efBase := filepath.Base(mountpoint)
-					if _, ioMatch = diskIoCounters[efBase]; ioMatch {
-						key = efBase
-					}
-				}
-			}
-			fsStats := &system.FsStats{Root: root, Mountpoint: mountpoint}
-			if len(customName) > 0 && customName[0] != "" {
-				fsStats.Name = customName[0]
-			}
-			a.fsStats[key] = fsStats
-		}
+	ctx := fsRegistrationContext{
+		filesystem:     filesystem,
+		isWindows:      isWindows,
+		diskIoCounters: diskIoCounters,
+		efPath:         "/extra-filesystems",
 	}

 	// Get the appropriate root mount point for this system
-	rootMountPoint := a.getRootMountPoint()
-
-	// Use FILESYSTEM env var to find root filesystem
-	if filesystem != "" {
-		for _, p := range partitions {
-			if strings.HasSuffix(p.Device, filesystem) || p.Mountpoint == filesystem {
-				addFsStat(p.Device, p.Mountpoint, true)
-				hasRoot = true
-				break
-			}
-		}
-		if !hasRoot {
-			slog.Warn("Partition details not found", "filesystem", filesystem)
-		}
+	discovery := diskDiscovery{
+		agent:          a,
+		rootMountPoint: a.getRootMountPoint(),
+		partitions:     partitions,
+		usageFn:        disk.Usage,
+		ctx:            ctx,
 	}

-	// Add EXTRA_FILESYSTEMS env var values to fsStats
-	if extraFilesystems, exists := GetEnv("EXTRA_FILESYSTEMS"); exists {
-		for _, fsEntry := range strings.Split(extraFilesystems, ",") {
-			// Parse custom name from format: device__customname
-			fs, customName := parseFilesystemEntry(fsEntry)
+	hasRoot = discovery.addConfiguredRootFs()

-			found := false
-			for _, p := range partitions {
-				if strings.HasSuffix(p.Device, fs) || p.Mountpoint == fs {
-					addFsStat(p.Device, p.Mountpoint, false, customName)
-					found = true
-					break
-				}
-			}
-			// if not in partitions, test if we can get disk usage
-			if !found {
-				if _, err := disk.Usage(fs); err == nil {
-					addFsStat(filepath.Base(fs), fs, false, customName)
-				} else {
-					slog.Error("Invalid filesystem", "name", fs, "err", err)
-				}
-			}
-		}
+	// Add EXTRA_FILESYSTEMS env var values to fsStats
+	if extraFilesystems, exists := utils.GetEnv("EXTRA_FILESYSTEMS"); exists {
+		discovery.addConfiguredExtraFilesystems(extraFilesystems)
 	}

 	// Process partitions for various mount points
 	for _, p := range partitions {
-		// fmt.Println(p.Device, p.Mountpoint)
-		// Binary root fallback or docker root fallback
-		if !hasRoot && (p.Mountpoint == rootMountPoint || (p.Mountpoint == "/etc/hosts" && strings.HasPrefix(p.Device, "/dev"))) {
-			fs, match := findIoDevice(filepath.Base(p.Device), diskIoCounters, a.fsStats)
-			if match {
-				addFsStat(fs, p.Mountpoint, true)
-				hasRoot = true
-			}
-		}
-
-		// Check if device is in /extra-filesystems
-		if strings.HasPrefix(p.Mountpoint, efPath) {
-			device, customName := parseFilesystemEntry(p.Mountpoint)
-			addFsStat(device, p.Mountpoint, false, customName)
+		if !hasRoot && isRootFallbackPartition(p, discovery.rootMountPoint) {
+			hasRoot = discovery.addPartitionRootFs(p.Device, p.Mountpoint)
 		}
+		discovery.addPartitionExtraFs(p)
 	}

 	// Check all folders in /extra-filesystems and add them if not already present
-	if folders, err := os.ReadDir(efPath); err == nil {
-		existingMountpoints := make(map[string]bool)
-		for _, stats := range a.fsStats {
-			existingMountpoints[stats.Mountpoint] = true
-		}
+	if folders, err := os.ReadDir(discovery.ctx.efPath); err == nil {
+		folderNames := make([]string, 0, len(folders))
 		for _, folder := range folders {
 			if folder.IsDir() {
-				mountpoint := filepath.Join(efPath, folder.Name())
-				slog.Debug("/extra-filesystems", "mountpoint", mountpoint)
-				if !existingMountpoints[mountpoint] {
-					device, customName := parseFilesystemEntry(folder.Name())
-					addFsStat(device, mountpoint, false, customName)
-				}
+				folderNames = append(folderNames, folder.Name())
 			}
 		}
+		discovery.addExtraFilesystemFolders(folderNames)
 	}

-	// If no root filesystem set, use fallback
+	// If no root filesystem set, try the most active I/O device as a last
+	// resort (e.g. ZFS where dataset names are unrelated to disk names).
 	if !hasRoot {
-		rootDevice, _ := findIoDevice(filepath.Base(filesystem), diskIoCounters, a.fsStats)
-		slog.Info("Root disk", "mountpoint", rootMountPoint, "io", rootDevice)
-		a.fsStats[rootDevice] = &system.FsStats{Root: true, Mountpoint: rootMountPoint}
+		discovery.addLastResortRootFs()
 	}

+	a.pruneDuplicateRootExtraFilesystems()
 	a.initializeDiskIoStats(diskIoCounters)
 }

-// Returns matching device from /proc/diskstats,
-// or the device with the most reads if no match is found.
-// bool is true if a match was found.
-func findIoDevice(filesystem string, diskIoCounters map[string]disk.IOCountersStat, fsStats map[string]*system.FsStats) (string, bool) {
-	var maxReadBytes uint64
-	maxReadDevice := "/"
-	for _, d := range diskIoCounters {
-		if d.Name == filesystem || (d.Label != "" && d.Label == filesystem) {
-			return d.Name, true
-		}
-		if d.ReadBytes > maxReadBytes {
-			// don't use if device already exists in fsStats
-			if _, exists := fsStats[d.Name]; !exists {
-				maxReadBytes = d.ReadBytes
-				maxReadDevice = d.Name
-			}
+// Removes extra filesystems that mirror root usage (https://github.com/henrygd/beszel/issues/1428).
+func (a *Agent) pruneDuplicateRootExtraFilesystems() {
+	var rootMountpoint string
+	for _, stats := range a.fsStats {
+		if stats != nil && stats.Root {
+			rootMountpoint = stats.Mountpoint
+			break
 		}
 	}
-	return maxReadDevice, false
+	if rootMountpoint == "" {
+		return
+	}
+	rootUsage, err := disk.Usage(rootMountpoint)
+	if err != nil {
+		return
+	}
+	for name, stats := range a.fsStats {
+		if stats == nil || stats.Root {
+			continue
+		}
+		extraUsage, err := disk.Usage(stats.Mountpoint)
+		if err != nil {
+			continue
+		}
+		if hasSameDiskUsage(rootUsage, extraUsage) {
+			slog.Info("Ignoring duplicate FS", "name", name, "mount", stats.Mountpoint)
+			delete(a.fsStats, name)
+		}
+	}
+}
+
+// hasSameDiskUsage compares root/extra usage with a small byte tolerance.
+func hasSameDiskUsage(a, b *disk.UsageStat) bool {
+	if a == nil || b == nil || a.Total == 0 || b.Total == 0 {
+		return false
+	}
+	// Allow minor drift between sequential disk usage calls.
+	const toleranceBytes uint64 = 16 * 1024 * 1024
+	return withinUsageTolerance(a.Total, b.Total, toleranceBytes) &&
+		withinUsageTolerance(a.Used, b.Used, toleranceBytes)
+}
+
+// withinUsageTolerance reports whether two byte values differ by at most tolerance.
+func withinUsageTolerance(a, b, tolerance uint64) bool {
+	if a >= b {
+		return a-b <= tolerance
+	}
+	return b-a <= tolerance
+}
+
+type ioMatchCandidate struct {
+	name  string
+	bytes uint64
+	ops   uint64
+}
+
+// findIoDevice prefers exact device/label matches, then falls back to a
+// prefix-related candidate with the highest recent activity.
+func findIoDevice(filesystem string, diskIoCounters map[string]disk.IOCountersStat) (string, bool) {
+	filesystem = normalizeDeviceName(filesystem)
+	if filesystem == "" {
+		return "", false
+	}
+
+	candidates := []ioMatchCandidate{}
+
+	for _, d := range diskIoCounters {
+		if normalizeDeviceName(d.Name) == filesystem || (d.Label != "" && normalizeDeviceName(d.Label) == filesystem) {
+			return d.Name, true
+		}
+		if prefixRelated(normalizeDeviceName(d.Name), filesystem) ||
+			(d.Label != "" && prefixRelated(normalizeDeviceName(d.Label), filesystem)) {
+			candidates = append(candidates, ioMatchCandidate{
+				name:  d.Name,
+				bytes: d.ReadBytes + d.WriteBytes,
+				ops:   d.ReadCount + d.WriteCount,
+			})
+		}
+	}
+
+	if len(candidates) == 0 {
+		return "", false
+	}
+
+	best := candidates[0]
+	for _, c := range candidates[1:] {
+		if c.bytes > best.bytes ||
+			(c.bytes == best.bytes && c.ops > best.ops) ||
+			(c.bytes == best.bytes && c.ops == best.ops && c.name < best.name) {
+			best = c
+		}
+	}
+
+	slog.Info("Using disk I/O fallback", "requested", filesystem, "selected", best.name)
+	return best.name, true
+}
+
+// mostActiveIoDevice returns the device with the highest I/O activity,
+// or "" if diskIoCounters is empty.
+func mostActiveIoDevice(diskIoCounters map[string]disk.IOCountersStat) string {
+	var best ioMatchCandidate
+	for _, d := range diskIoCounters {
+		c := ioMatchCandidate{
+			name:  d.Name,
+			bytes: d.ReadBytes + d.WriteBytes,
+			ops:   d.ReadCount + d.WriteCount,
+		}
+		if best.name == "" || c.bytes > best.bytes ||
+			(c.bytes == best.bytes && c.ops > best.ops) ||
+			(c.bytes == best.bytes && c.ops == best.ops && c.name < best.name) {
+			best = c
+		}
+	}
+	return best.name
+}
+
+// prefixRelated reports whether either identifier is a prefix of the other.
+func prefixRelated(a, b string) bool {
+	if a == "" || b == "" || a == b {
+		return false
+	}
+	return strings.HasPrefix(a, b) || strings.HasPrefix(b, a)
+}
+
+// filesystemMatchesPartitionSetting checks whether a FILESYSTEM env var value
+// matches a partition by mountpoint, exact device name, or prefix relationship
+// (e.g. FILESYSTEM=ada0 matches partition /dev/ada0p2).
+func filesystemMatchesPartitionSetting(filesystem string, p disk.PartitionStat) bool {
+	filesystem = strings.TrimSpace(filesystem)
+	if filesystem == "" {
+		return false
+	}
+	if p.Mountpoint == filesystem {
+		return true
+	}
+
+	fsName := normalizeDeviceName(filesystem)
+	partName := normalizeDeviceName(p.Device)
+	if fsName == "" || partName == "" {
+		return false
+	}
+	if fsName == partName {
+		return true
+	}
+	return prefixRelated(partName, fsName)
+}
+
+// normalizeDeviceName canonicalizes device strings for comparisons.
+func normalizeDeviceName(value string) string {
+	name := filepath.Base(strings.TrimSpace(value))
+	if name == "." {
+		return ""
+	}
+	return name
 }

 // Sets start values for disk I/O stats.
 func (a *Agent) initializeDiskIoStats(diskIoCounters map[string]disk.IOCountersStat) {
+	a.fsNames = a.fsNames[:0]
+	now := time.Now()
 	for device, stats := range a.fsStats {
 		// skip if not in diskIoCounters
 		d, exists := diskIoCounters[device]
@@ -215,7 +511,7 @@ func (a *Agent) initializeDiskIoStats(diskIoCounters map[string]disk.IOCountersS
 			continue
 		}
 		// populate initial values
-		stats.Time = time.Now()
+		stats.Time = now
 		stats.TotalRead = d.ReadBytes
 		stats.TotalWrite = d.WriteBytes
 		// add to list of valid io device names
@@ -225,15 +521,26 @@ func (a *Agent) initializeDiskIoStats(diskIoCounters map[string]disk.IOCountersS

 // Updates disk usage statistics for all monitored filesystems
 func (a *Agent) updateDiskUsage(systemStats *system.Stats) {
+	// Check if we should skip extra filesystem collection to avoid waking sleeping disks.
+	// Root filesystem is always updated since it can't be sleeping while the agent runs.
+	// Always collect on first call (lastDiskUsageUpdate is zero) or if caching is disabled.
+	cacheExtraFs := a.diskUsageCacheDuration > 0 &&
+		!a.lastDiskUsageUpdate.IsZero() &&
+		time.Since(a.lastDiskUsageUpdate) < a.diskUsageCacheDuration
+
 	// disk usage
 	for _, stats := range a.fsStats {
+		// Skip non-root filesystems if caching is active
+		if cacheExtraFs && !stats.Root {
+			continue
+		}
 		if d, err := disk.Usage(stats.Mountpoint); err == nil {
-			stats.DiskTotal = bytesToGigabytes(d.Total)
-			stats.DiskUsed = bytesToGigabytes(d.Used)
+			stats.DiskTotal = utils.BytesToGigabytes(d.Total)
+			stats.DiskUsed = utils.BytesToGigabytes(d.Used)
 			if stats.Root {
-				systemStats.DiskTotal = bytesToGigabytes(d.Total)
-				systemStats.DiskUsed = bytesToGigabytes(d.Used)
-				systemStats.DiskPct = twoDecimals(d.UsedPercent)
+				systemStats.DiskTotal = utils.BytesToGigabytes(d.Total)
+				systemStats.DiskUsed = utils.BytesToGigabytes(d.Used)
+				systemStats.DiskPct = utils.TwoDecimals(d.UsedPercent)
 			}
 		} else {
 			// reset stats if error (likely unmounted)
@@ -244,6 +551,11 @@ func (a *Agent) updateDiskUsage(systemStats *system.Stats) {
 			stats.TotalWrite = 0
 		}
 	}
+
+	// Update the last disk usage update time when we've collected extra filesystems
+	if !cacheExtraFs {
+		a.lastDiskUsageUpdate = time.Now()
+	}
 }

 // Updates disk I/O statistics for all monitored filesystems
@@ -281,8 +593,8 @@ func (a *Agent) updateDiskIo(cacheTimeMs uint16, systemStats *system.Stats) {

 			diskIORead := (d.ReadBytes - prev.readBytes) * 1000 / msElapsed
 			diskIOWrite := (d.WriteBytes - prev.writeBytes) * 1000 / msElapsed
-			readMbPerSecond := bytesToMegabytes(float64(diskIORead))
-			writeMbPerSecond := bytesToMegabytes(float64(diskIOWrite))
+			readMbPerSecond := utils.BytesToMegabytes(float64(diskIORead))
+			writeMbPerSecond := utils.BytesToMegabytes(float64(diskIOWrite))

 			// validate values
 			if readMbPerSecond > 50_000 || writeMbPerSecond > 50_000 {
--- a/agent/disk_test.go
+++ b/agent/disk_test.go
@@ -1,5 +1,4 @@
 //go:build testing
-// +build testing

 package agent

@@ -7,6 +6,7 @@ import (
 	"os"
 	"strings"
 	"testing"
+	"time"

 	"github.com/henrygd/beszel/internal/entities/system"
 	"github.com/shirou/gopsutil/v4/disk"
@@ -93,19 +93,602 @@ func TestParseFilesystemEntry(t *testing.T) {
 	}
 }

-func TestInitializeDiskInfoWithCustomNames(t *testing.T) {
-	// Set up environment variables
-	oldEnv := os.Getenv("EXTRA_FILESYSTEMS")
-	defer func() {
-		if oldEnv != "" {
-			os.Setenv("EXTRA_FILESYSTEMS", oldEnv)
-		} else {
-			os.Unsetenv("EXTRA_FILESYSTEMS")
-		}
-	}()
+func TestExtraFilesystemPartitionInfo(t *testing.T) {
+	t.Run("uses partition device for label-only mountpoint", func(t *testing.T) {
+		device, customName := extraFilesystemPartitionInfo(disk.PartitionStat{
+			Device:     "/dev/sdc",
+			Mountpoint: "/extra-filesystems/Share",
+		})

+		assert.Equal(t, "/dev/sdc", device)
+		assert.Equal(t, "", customName)
+	})
+
+	t.Run("uses custom name from mountpoint suffix", func(t *testing.T) {
+		device, customName := extraFilesystemPartitionInfo(disk.PartitionStat{
+			Device:     "/dev/sdc",
+			Mountpoint: "/extra-filesystems/sdc__Share",
+		})
+
+		assert.Equal(t, "/dev/sdc", device)
+		assert.Equal(t, "Share", customName)
+	})
+
+	t.Run("falls back to folder device when partition device is unavailable", func(t *testing.T) {
+		device, customName := extraFilesystemPartitionInfo(disk.PartitionStat{
+			Mountpoint: "/extra-filesystems/sdc__Share",
+		})
+
+		assert.Equal(t, "sdc", device)
+		assert.Equal(t, "Share", customName)
+	})
+
+	t.Run("supports custom name without folder device prefix", func(t *testing.T) {
+		device, customName := extraFilesystemPartitionInfo(disk.PartitionStat{
+			Device:     "/dev/sdc",
+			Mountpoint: "/extra-filesystems/__Share",
+		})
+
+		assert.Equal(t, "/dev/sdc", device)
+		assert.Equal(t, "Share", customName)
+	})
+}
+
+func TestBuildFsStatRegistration(t *testing.T) {
+	t.Run("uses basename for non-windows exact io match", func(t *testing.T) {
+		key, stats, ok := registerFilesystemStats(
+			map[string]*system.FsStats{},
+			"/dev/sda1",
+			"/mnt/data",
+			false,
+			"archive",
+			fsRegistrationContext{
+				isWindows: false,
+				diskIoCounters: map[string]disk.IOCountersStat{
+					"sda1": {Name: "sda1"},
+				},
+			},
+		)
+
+		assert.True(t, ok)
+		assert.Equal(t, "sda1", key)
+		assert.Equal(t, "/mnt/data", stats.Mountpoint)
+		assert.Equal(t, "archive", stats.Name)
+		assert.False(t, stats.Root)
+	})
+
+	t.Run("maps root partition to io device by prefix", func(t *testing.T) {
+		key, stats, ok := registerFilesystemStats(
+			map[string]*system.FsStats{},
+			"/dev/ada0p2",
+			"/",
+			true,
+			"",
+			fsRegistrationContext{
+				isWindows: false,
+				diskIoCounters: map[string]disk.IOCountersStat{
+					"ada0": {Name: "ada0", ReadBytes: 1000, WriteBytes: 1000},
+				},
+			},
+		)
+
+		assert.True(t, ok)
+		assert.Equal(t, "ada0", key)
+		assert.True(t, stats.Root)
+		assert.Equal(t, "/", stats.Mountpoint)
+	})
+
+	t.Run("uses filesystem setting as root fallback", func(t *testing.T) {
+		key, _, ok := registerFilesystemStats(
+			map[string]*system.FsStats{},
+			"overlay",
+			"/",
+			true,
+			"",
+			fsRegistrationContext{
+				filesystem: "nvme0n1p2",
+				isWindows:  false,
+				diskIoCounters: map[string]disk.IOCountersStat{
+					"nvme0n1": {Name: "nvme0n1", ReadBytes: 1000, WriteBytes: 1000},
+				},
+			},
+		)
+
+		assert.True(t, ok)
+		assert.Equal(t, "nvme0n1", key)
+	})
+
+	t.Run("prefers parsed extra-filesystems device over mapper device", func(t *testing.T) {
+		key, stats, ok := registerFilesystemStats(
+			map[string]*system.FsStats{},
+			"/dev/mapper/luks-2bcb02be-999d-4417-8d18-5c61e660fb6e",
+			"/extra-filesystems/nvme0n1p2__Archive",
+			false,
+			"Archive",
+			fsRegistrationContext{
+				isWindows: false,
+				diskIoCounters: map[string]disk.IOCountersStat{
+					"dm-1":      {Name: "dm-1", Label: "luks-2bcb02be-999d-4417-8d18-5c61e660fb6e"},
+					"nvme0n1p2": {Name: "nvme0n1p2"},
+				},
+			},
+		)
+
+		assert.True(t, ok)
+		assert.Equal(t, "nvme0n1p2", key)
+		assert.Equal(t, "Archive", stats.Name)
+	})
+
+	t.Run("falls back to mapper io device when folder device cannot be resolved", func(t *testing.T) {
+		key, stats, ok := registerFilesystemStats(
+			map[string]*system.FsStats{},
+			"/dev/mapper/luks-2bcb02be-999d-4417-8d18-5c61e660fb6e",
+			"/extra-filesystems/Archive",
+			false,
+			"Archive",
+			fsRegistrationContext{
+				isWindows: false,
+				diskIoCounters: map[string]disk.IOCountersStat{
+					"dm-1": {Name: "dm-1", Label: "luks-2bcb02be-999d-4417-8d18-5c61e660fb6e"},
+				},
+			},
+		)
+
+		assert.True(t, ok)
+		assert.Equal(t, "dm-1", key)
+		assert.Equal(t, "Archive", stats.Name)
+	})
+
+	t.Run("uses full device name on windows", func(t *testing.T) {
+		key, _, ok := registerFilesystemStats(
+			map[string]*system.FsStats{},
+			`C:`,
+			`C:\\`,
+			false,
+			"",
+			fsRegistrationContext{
+				isWindows: true,
+				diskIoCounters: map[string]disk.IOCountersStat{
+					`C:`: {Name: `C:`},
+				},
+			},
+		)
+
+		assert.True(t, ok)
+		assert.Equal(t, `C:`, key)
+	})
+
+	t.Run("skips existing key", func(t *testing.T) {
+		key, stats, ok := registerFilesystemStats(
+			map[string]*system.FsStats{"sda1": {Mountpoint: "/existing"}},
+			"/dev/sda1",
+			"/mnt/data",
+			false,
+			"",
+			fsRegistrationContext{
+				isWindows: false,
+				diskIoCounters: map[string]disk.IOCountersStat{
+					"sda1": {Name: "sda1"},
+				},
+			},
+		)
+
+		assert.False(t, ok)
+		assert.Empty(t, key)
+		assert.Nil(t, stats)
+	})
+}
+
+func TestAddConfiguredRootFs(t *testing.T) {
+	t.Run("adds root from matching partition", func(t *testing.T) {
+		agent := &Agent{fsStats: make(map[string]*system.FsStats)}
+		discovery := diskDiscovery{
+			agent:          agent,
+			rootMountPoint: "/",
+			partitions:     []disk.PartitionStat{{Device: "/dev/ada0p2", Mountpoint: "/"}},
+			ctx: fsRegistrationContext{
+				filesystem: "/dev/ada0p2",
+				isWindows:  false,
+				diskIoCounters: map[string]disk.IOCountersStat{
+					"ada0": {Name: "ada0", ReadBytes: 1000, WriteBytes: 1000},
+				},
+			},
+		}
+
+		ok := discovery.addConfiguredRootFs()
+
+		assert.True(t, ok)
+		stats, exists := agent.fsStats["ada0"]
+		assert.True(t, exists)
+		assert.True(t, stats.Root)
+		assert.Equal(t, "/", stats.Mountpoint)
+	})
+
+	t.Run("adds root from io device when partition is missing", func(t *testing.T) {
+		agent := &Agent{fsStats: make(map[string]*system.FsStats)}
+		discovery := diskDiscovery{
+			agent:          agent,
+			rootMountPoint: "/sysroot",
+			ctx: fsRegistrationContext{
+				filesystem: "zroot",
+				isWindows:  false,
+				diskIoCounters: map[string]disk.IOCountersStat{
+					"nda0": {Name: "nda0", Label: "zroot", ReadBytes: 1000, WriteBytes: 1000},
+				},
+			},
+		}
+
+		ok := discovery.addConfiguredRootFs()
+
+		assert.True(t, ok)
+		stats, exists := agent.fsStats["nda0"]
+		assert.True(t, exists)
+		assert.True(t, stats.Root)
+		assert.Equal(t, "/sysroot", stats.Mountpoint)
+	})
+
+	t.Run("returns false when filesystem cannot be resolved", func(t *testing.T) {
+		agent := &Agent{fsStats: make(map[string]*system.FsStats)}
+		discovery := diskDiscovery{
+			agent:          agent,
+			rootMountPoint: "/",
+			ctx: fsRegistrationContext{
+				filesystem:     "missing-disk",
+				isWindows:      false,
+				diskIoCounters: map[string]disk.IOCountersStat{},
+			},
+		}
+
+		ok := discovery.addConfiguredRootFs()
+
+		assert.False(t, ok)
+		assert.Empty(t, agent.fsStats)
+	})
+}
+
+func TestAddPartitionRootFs(t *testing.T) {
+	t.Run("adds root from fallback partition candidate", func(t *testing.T) {
+		agent := &Agent{fsStats: make(map[string]*system.FsStats)}
+		discovery := diskDiscovery{
+			agent: agent,
+			ctx: fsRegistrationContext{
+				isWindows: false,
+				diskIoCounters: map[string]disk.IOCountersStat{
+					"nvme0n1": {Name: "nvme0n1", ReadBytes: 1000, WriteBytes: 1000},
+				},
+			},
+		}
+
+		ok := discovery.addPartitionRootFs("/dev/nvme0n1p2", "/")
+
+		assert.True(t, ok)
+		stats, exists := agent.fsStats["nvme0n1"]
+		assert.True(t, exists)
+		assert.True(t, stats.Root)
+		assert.Equal(t, "/", stats.Mountpoint)
+	})
+
+	t.Run("returns false when no io device matches", func(t *testing.T) {
+		agent := &Agent{fsStats: make(map[string]*system.FsStats)}
+		discovery := diskDiscovery{agent: agent, ctx: fsRegistrationContext{diskIoCounters: map[string]disk.IOCountersStat{}}}
+
+		ok := discovery.addPartitionRootFs("/dev/mapper/root", "/")
+
+		assert.False(t, ok)
+		assert.Empty(t, agent.fsStats)
+	})
+}
+
+func TestAddLastResortRootFs(t *testing.T) {
+	t.Run("uses most active io device when available", func(t *testing.T) {
+		agent := &Agent{fsStats: make(map[string]*system.FsStats)}
+		discovery := diskDiscovery{agent: agent, rootMountPoint: "/", ctx: fsRegistrationContext{diskIoCounters: map[string]disk.IOCountersStat{
+			"sda": {Name: "sda", ReadBytes: 5000, WriteBytes: 5000},
+			"sdb": {Name: "sdb", ReadBytes: 1000, WriteBytes: 1000},
+		}}}
+
+		discovery.addLastResortRootFs()
+
+		stats, exists := agent.fsStats["sda"]
+		assert.True(t, exists)
+		assert.True(t, stats.Root)
+	})
+
+	t.Run("falls back to root key when mountpoint basename collides", func(t *testing.T) {
+		agent := &Agent{fsStats: map[string]*system.FsStats{
+			"sysroot": {Mountpoint: "/extra-filesystems/sysroot"},
+		}}
+		discovery := diskDiscovery{agent: agent, rootMountPoint: "/sysroot", ctx: fsRegistrationContext{diskIoCounters: map[string]disk.IOCountersStat{}}}
+
+		discovery.addLastResortRootFs()
+
+		stats, exists := agent.fsStats["root"]
+		assert.True(t, exists)
+		assert.True(t, stats.Root)
+		assert.Equal(t, "/sysroot", stats.Mountpoint)
+	})
+}
+
+func TestAddConfiguredExtraFsEntry(t *testing.T) {
+	t.Run("uses matching partition when present", func(t *testing.T) {
+		agent := &Agent{fsStats: make(map[string]*system.FsStats)}
+		discovery := diskDiscovery{
+			agent:      agent,
+			partitions: []disk.PartitionStat{{Device: "/dev/sdb1", Mountpoint: "/mnt/backup"}},
+			usageFn: func(string) (*disk.UsageStat, error) {
+				t.Fatal("usage fallback should not be called when partition matches")
+				return nil, nil
+			},
+			ctx: fsRegistrationContext{
+				isWindows: false,
+				diskIoCounters: map[string]disk.IOCountersStat{
+					"sdb1": {Name: "sdb1"},
+				},
+			},
+		}
+
+		discovery.addConfiguredExtraFsEntry("sdb1", "backup")
+
+		stats, exists := agent.fsStats["sdb1"]
+		assert.True(t, exists)
+		assert.Equal(t, "/mnt/backup", stats.Mountpoint)
+		assert.Equal(t, "backup", stats.Name)
+	})
+
+	t.Run("falls back to usage-validated path", func(t *testing.T) {
+		agent := &Agent{fsStats: make(map[string]*system.FsStats)}
+		discovery := diskDiscovery{
+			agent: agent,
+			usageFn: func(path string) (*disk.UsageStat, error) {
+				assert.Equal(t, "/srv/archive", path)
+				return &disk.UsageStat{}, nil
+			},
+			ctx: fsRegistrationContext{
+				isWindows: false,
+				diskIoCounters: map[string]disk.IOCountersStat{
+					"archive": {Name: "archive"},
+				},
+			},
+		}
+
+		discovery.addConfiguredExtraFsEntry("/srv/archive", "archive")
+
+		stats, exists := agent.fsStats["archive"]
+		assert.True(t, exists)
+		assert.Equal(t, "/srv/archive", stats.Mountpoint)
+		assert.Equal(t, "archive", stats.Name)
+	})
+
+	t.Run("ignores invalid filesystem entry", func(t *testing.T) {
+		agent := &Agent{fsStats: make(map[string]*system.FsStats)}
+		discovery := diskDiscovery{
+			agent: agent,
+			usageFn: func(string) (*disk.UsageStat, error) {
+				return nil, os.ErrNotExist
+			},
+		}
+
+		discovery.addConfiguredExtraFsEntry("/missing/archive", "")
+
+		assert.Empty(t, agent.fsStats)
+	})
+}
+
+func TestAddConfiguredExtraFilesystems(t *testing.T) {
+	t.Run("parses and registers multiple configured filesystems", func(t *testing.T) {
+		agent := &Agent{fsStats: make(map[string]*system.FsStats)}
+		discovery := diskDiscovery{
+			agent:      agent,
+			partitions: []disk.PartitionStat{{Device: "/dev/sda1", Mountpoint: "/mnt/fast"}},
+			usageFn: func(path string) (*disk.UsageStat, error) {
+				if path == "/srv/archive" {
+					return &disk.UsageStat{}, nil
+				}
+				return nil, os.ErrNotExist
+			},
+			ctx: fsRegistrationContext{
+				isWindows: false,
+				diskIoCounters: map[string]disk.IOCountersStat{
+					"sda1":    {Name: "sda1"},
+					"archive": {Name: "archive"},
+				},
+			},
+		}
+
+		discovery.addConfiguredExtraFilesystems("sda1__fast,/srv/archive__cold")
+
+		assert.Contains(t, agent.fsStats, "sda1")
+		assert.Equal(t, "fast", agent.fsStats["sda1"].Name)
+		assert.Contains(t, agent.fsStats, "archive")
+		assert.Equal(t, "cold", agent.fsStats["archive"].Name)
+	})
+}
+
+func TestAddExtraFilesystemFolders(t *testing.T) {
+	t.Run("adds missing folders and skips existing mountpoints", func(t *testing.T) {
+		agent := &Agent{fsStats: map[string]*system.FsStats{
+			"existing": {Mountpoint: "/extra-filesystems/existing"},
+		}}
+		discovery := diskDiscovery{
+			agent: agent,
+			ctx: fsRegistrationContext{
+				isWindows: false,
+				efPath:    "/extra-filesystems",
+				diskIoCounters: map[string]disk.IOCountersStat{
+					"newdisk": {Name: "newdisk"},
+				},
+			},
+		}
+
+		discovery.addExtraFilesystemFolders([]string{"existing", "newdisk__Archive"})
+
+		assert.Len(t, agent.fsStats, 2)
+		stats, exists := agent.fsStats["newdisk"]
+		assert.True(t, exists)
+		assert.Equal(t, "/extra-filesystems/newdisk__Archive", stats.Mountpoint)
+		assert.Equal(t, "Archive", stats.Name)
+	})
+}
+
+func TestFindIoDevice(t *testing.T) {
+	t.Run("matches by device name", func(t *testing.T) {
+		ioCounters := map[string]disk.IOCountersStat{
+			"sda": {Name: "sda"},
+			"sdb": {Name: "sdb"},
+		}
+
+		device, ok := findIoDevice("sdb", ioCounters)
+		assert.True(t, ok)
+		assert.Equal(t, "sdb", device)
+	})
+
+	t.Run("matches by device label", func(t *testing.T) {
+		ioCounters := map[string]disk.IOCountersStat{
+			"sda": {Name: "sda", Label: "rootfs"},
+			"sdb": {Name: "sdb"},
+		}
+
+		device, ok := findIoDevice("rootfs", ioCounters)
+		assert.True(t, ok)
+		assert.Equal(t, "sda", device)
+	})
+
+	t.Run("returns no match when not found", func(t *testing.T) {
+		ioCounters := map[string]disk.IOCountersStat{
+			"sda": {Name: "sda"},
+			"sdb": {Name: "sdb"},
+		}
+
+		device, ok := findIoDevice("nvme0n1p1", ioCounters)
+		assert.False(t, ok)
+		assert.Equal(t, "", device)
+	})
+
+	t.Run("uses uncertain unique prefix fallback", func(t *testing.T) {
+		ioCounters := map[string]disk.IOCountersStat{
+			"nvme0n1": {Name: "nvme0n1"},
+			"sda":     {Name: "sda"},
+		}
+
+		device, ok := findIoDevice("nvme0n1p2", ioCounters)
+		assert.True(t, ok)
+		assert.Equal(t, "nvme0n1", device)
+	})
+
+	t.Run("uses dominant activity when prefix matches are ambiguous", func(t *testing.T) {
+		ioCounters := map[string]disk.IOCountersStat{
+			"sda": {Name: "sda", ReadBytes: 5000, WriteBytes: 5000, ReadCount: 100, WriteCount: 100},
+			"sdb": {Name: "sdb", ReadBytes: 1000, WriteBytes: 1000, ReadCount: 50, WriteCount: 50},
+		}
+
+		device, ok := findIoDevice("sd", ioCounters)
+		assert.True(t, ok)
+		assert.Equal(t, "sda", device)
+	})
+
+	t.Run("uses highest activity when ambiguous without dominance", func(t *testing.T) {
+		ioCounters := map[string]disk.IOCountersStat{
+			"sda": {Name: "sda", ReadBytes: 3000, WriteBytes: 3000, ReadCount: 50, WriteCount: 50},
+			"sdb": {Name: "sdb", ReadBytes: 2500, WriteBytes: 2500, ReadCount: 40, WriteCount: 40},
+		}
+
+		device, ok := findIoDevice("sd", ioCounters)
+		assert.True(t, ok)
+		assert.Equal(t, "sda", device)
+	})
+
+	t.Run("matches /dev/-prefixed partition to parent disk", func(t *testing.T) {
+		ioCounters := map[string]disk.IOCountersStat{
+			"nda0": {Name: "nda0", ReadBytes: 1000, WriteBytes: 1000},
+		}
+
+		device, ok := findIoDevice("/dev/nda0p2", ioCounters)
+		assert.True(t, ok)
+		assert.Equal(t, "nda0", device)
+	})
+
+	t.Run("uses deterministic name tie-breaker", func(t *testing.T) {
+		ioCounters := map[string]disk.IOCountersStat{
+			"sdb": {Name: "sdb", ReadBytes: 2000, WriteBytes: 2000, ReadCount: 10, WriteCount: 10},
+			"sda": {Name: "sda", ReadBytes: 2000, WriteBytes: 2000, ReadCount: 10, WriteCount: 10},
+		}
+
+		device, ok := findIoDevice("sd", ioCounters)
+		assert.True(t, ok)
+		assert.Equal(t, "sda", device)
+	})
+}
+
+func TestFilesystemMatchesPartitionSetting(t *testing.T) {
+	p := disk.PartitionStat{Device: "/dev/ada0p2", Mountpoint: "/"}
+
+	t.Run("matches mountpoint setting", func(t *testing.T) {
+		assert.True(t, filesystemMatchesPartitionSetting("/", p))
+	})
+
+	t.Run("matches exact partition setting", func(t *testing.T) {
+		assert.True(t, filesystemMatchesPartitionSetting("ada0p2", p))
+		assert.True(t, filesystemMatchesPartitionSetting("/dev/ada0p2", p))
+	})
+
+	t.Run("matches prefix-style parent setting", func(t *testing.T) {
+		assert.True(t, filesystemMatchesPartitionSetting("ada0", p))
+		assert.True(t, filesystemMatchesPartitionSetting("/dev/ada0", p))
+	})
+
+	t.Run("does not match unrelated device", func(t *testing.T) {
+		assert.False(t, filesystemMatchesPartitionSetting("sda", p))
+		assert.False(t, filesystemMatchesPartitionSetting("nvme0n1", p))
+		assert.False(t, filesystemMatchesPartitionSetting("", p))
+	})
+}
+
+func TestMostActiveIoDevice(t *testing.T) {
+	t.Run("returns most active device", func(t *testing.T) {
+		ioCounters := map[string]disk.IOCountersStat{
+			"nda0": {Name: "nda0", ReadBytes: 5000, WriteBytes: 5000, ReadCount: 100, WriteCount: 100},
+			"nda1": {Name: "nda1", ReadBytes: 1000, WriteBytes: 1000, ReadCount: 50, WriteCount: 50},
+		}
+		assert.Equal(t, "nda0", mostActiveIoDevice(ioCounters))
+	})
+
+	t.Run("uses deterministic tie-breaker", func(t *testing.T) {
+		ioCounters := map[string]disk.IOCountersStat{
+			"sdb": {Name: "sdb", ReadBytes: 1000, WriteBytes: 1000, ReadCount: 10, WriteCount: 10},
+			"sda": {Name: "sda", ReadBytes: 1000, WriteBytes: 1000, ReadCount: 10, WriteCount: 10},
+		}
+		assert.Equal(t, "sda", mostActiveIoDevice(ioCounters))
+	})
+
+	t.Run("returns empty for empty map", func(t *testing.T) {
+		assert.Equal(t, "", mostActiveIoDevice(map[string]disk.IOCountersStat{}))
+	})
+}
+
+func TestIsDockerSpecialMountpoint(t *testing.T) {
+	testCases := []struct {
+		name       string
+		mountpoint string
+		expected   bool
+	}{
+		{name: "hosts", mountpoint: "/etc/hosts", expected: true},
+		{name: "resolv", mountpoint: "/etc/resolv.conf", expected: true},
+		{name: "hostname", mountpoint: "/etc/hostname", expected: true},
+		{name: "root", mountpoint: "/", expected: false},
+		{name: "passwd", mountpoint: "/etc/passwd", expected: false},
+		{name: "extra-filesystem", mountpoint: "/extra-filesystems/sda1", expected: false},
+	}
+
+	for _, tc := range testCases {
+		t.Run(tc.name, func(t *testing.T) {
+			assert.Equal(t, tc.expected, isDockerSpecialMountpoint(tc.mountpoint))
+		})
+	}
+}
+
+func TestInitializeDiskInfoWithCustomNames(t *testing.T) {
 	// Test with custom names
-	os.Setenv("EXTRA_FILESYSTEMS", "sda1__my-storage,/dev/sdb1__backup-drive,nvme0n1p2")
+	t.Setenv("EXTRA_FILESYSTEMS", "sda1__my-storage,/dev/sdb1__backup-drive,nvme0n1p2")

 	// Mock disk partitions (we'll just test the parsing logic)
 	// Since the actual disk operations are system-dependent, we'll focus on the parsing
@@ -133,7 +716,7 @@ func TestInitializeDiskInfoWithCustomNames(t *testing.T) {

 	for _, tc := range testCases {
 		t.Run("env_"+tc.envValue, func(t *testing.T) {
-			os.Setenv("EXTRA_FILESYSTEMS", tc.envValue)
+			t.Setenv("EXTRA_FILESYSTEMS", tc.envValue)

 			// Create mock partitions that would match our test cases
 			partitions := []disk.PartitionStat{}
@@ -154,7 +737,7 @@ func TestInitializeDiskInfoWithCustomNames(t *testing.T) {
 			// Test the parsing logic by calling the relevant part
 			// We'll create a simplified version to test just the parsing
 			extraFilesystems := tc.envValue
-			for _, fsEntry := range strings.Split(extraFilesystems, ",") {
+			for fsEntry := range strings.SplitSeq(extraFilesystems, ",") {
 				// Parse the entry
 				fsEntry = strings.TrimSpace(fsEntry)
 				var fs, customName string
@@ -233,3 +816,150 @@ func TestExtraFsKeyGeneration(t *testing.T) {
 		})
 	}
 }
+
+func TestDiskUsageCaching(t *testing.T) {
+	t.Run("caching disabled updates all filesystems", func(t *testing.T) {
+		agent := &Agent{
+			fsStats: map[string]*system.FsStats{
+				"sda": {Root: true, Mountpoint: "/"},
+				"sdb": {Root: false, Mountpoint: "/mnt/storage"},
+			},
+			diskUsageCacheDuration: 0, // caching disabled
+		}
+
+		var stats system.Stats
+		agent.updateDiskUsage(&stats)
+
+		// Both should be updated (non-zero values from disk.Usage)
+		// Root stats should be populated in systemStats
+		assert.True(t, agent.lastDiskUsageUpdate.IsZero() || !agent.lastDiskUsageUpdate.IsZero(),
+			"lastDiskUsageUpdate should be set when caching is disabled")
+	})
+
+	t.Run("caching enabled always updates root filesystem", func(t *testing.T) {
+		agent := &Agent{
+			fsStats: map[string]*system.FsStats{
+				"sda": {Root: true, Mountpoint: "/", DiskTotal: 100, DiskUsed: 50},
+				"sdb": {Root: false, Mountpoint: "/mnt/storage", DiskTotal: 200, DiskUsed: 100},
+			},
+			diskUsageCacheDuration: 1 * time.Hour,
+			lastDiskUsageUpdate:    time.Now(), // cache is fresh
+		}
+
+		// Store original extra fs values
+		originalExtraTotal := agent.fsStats["sdb"].DiskTotal
+		originalExtraUsed := agent.fsStats["sdb"].DiskUsed
+
+		var stats system.Stats
+		agent.updateDiskUsage(&stats)
+
+		// Root should be updated (systemStats populated from disk.Usage call)
+		// We can't easily check if disk.Usage was called, but we verify the flow works
+
+		// Extra filesystem should retain cached values (not reset)
+		assert.Equal(t, originalExtraTotal, agent.fsStats["sdb"].DiskTotal,
+			"extra filesystem DiskTotal should be unchanged when cached")
+		assert.Equal(t, originalExtraUsed, agent.fsStats["sdb"].DiskUsed,
+			"extra filesystem DiskUsed should be unchanged when cached")
+	})
+
+	t.Run("first call always updates all filesystems", func(t *testing.T) {
+		agent := &Agent{
+			fsStats: map[string]*system.FsStats{
+				"sda": {Root: true, Mountpoint: "/"},
+				"sdb": {Root: false, Mountpoint: "/mnt/storage"},
+			},
+			diskUsageCacheDuration: 1 * time.Hour,
+			// lastDiskUsageUpdate is zero (first call)
+		}
+
+		var stats system.Stats
+		agent.updateDiskUsage(&stats)
+
+		// After first call, lastDiskUsageUpdate should be set
+		assert.False(t, agent.lastDiskUsageUpdate.IsZero(),
+			"lastDiskUsageUpdate should be set after first call")
+	})
+
+	t.Run("expired cache updates extra filesystems", func(t *testing.T) {
+		agent := &Agent{
+			fsStats: map[string]*system.FsStats{
+				"sda": {Root: true, Mountpoint: "/"},
+				"sdb": {Root: false, Mountpoint: "/mnt/storage"},
+			},
+			diskUsageCacheDuration: 1 * time.Millisecond,
+			lastDiskUsageUpdate:    time.Now().Add(-1 * time.Second), // cache expired
+		}
+
+		var stats system.Stats
+		agent.updateDiskUsage(&stats)
+
+		// lastDiskUsageUpdate should be refreshed since cache expired
+		assert.True(t, time.Since(agent.lastDiskUsageUpdate) < time.Second,
+			"lastDiskUsageUpdate should be refreshed when cache expires")
+	})
+}
+
+func TestHasSameDiskUsage(t *testing.T) {
+	const toleranceBytes uint64 = 16 * 1024 * 1024
+
+	t.Run("returns true when totals and usage are equal", func(t *testing.T) {
+		a := &disk.UsageStat{Total: 100 * 1024 * 1024 * 1024, Used: 42 * 1024 * 1024 * 1024}
+		b := &disk.UsageStat{Total: 100 * 1024 * 1024 * 1024, Used: 42 * 1024 * 1024 * 1024}
+		assert.True(t, hasSameDiskUsage(a, b))
+	})
+
+	t.Run("returns true within tolerance", func(t *testing.T) {
+		a := &disk.UsageStat{Total: 100 * 1024 * 1024 * 1024, Used: 42 * 1024 * 1024 * 1024}
+		b := &disk.UsageStat{
+			Total: a.Total + toleranceBytes - 1,
+			Used:  a.Used - toleranceBytes + 1,
+		}
+		assert.True(t, hasSameDiskUsage(a, b))
+	})
+
+	t.Run("returns false when total exceeds tolerance", func(t *testing.T) {
+		a := &disk.UsageStat{Total: 100 * 1024 * 1024 * 1024, Used: 42 * 1024 * 1024 * 1024}
+		b := &disk.UsageStat{
+			Total: a.Total + toleranceBytes + 1,
+			Used:  a.Used,
+		}
+		assert.False(t, hasSameDiskUsage(a, b))
+	})
+
+	t.Run("returns false for nil or zero total", func(t *testing.T) {
+		assert.False(t, hasSameDiskUsage(nil, &disk.UsageStat{Total: 1, Used: 1}))
+		assert.False(t, hasSameDiskUsage(&disk.UsageStat{Total: 1, Used: 1}, nil))
+		assert.False(t, hasSameDiskUsage(&disk.UsageStat{Total: 0, Used: 0}, &disk.UsageStat{Total: 1, Used: 1}))
+	})
+}
+
+func TestInitializeDiskIoStatsResetsTrackedDevices(t *testing.T) {
+	agent := &Agent{
+		fsStats: map[string]*system.FsStats{
+			"sda": {},
+			"sdb": {},
+		},
+		fsNames: []string{"stale", "sda"},
+	}
+
+	agent.initializeDiskIoStats(map[string]disk.IOCountersStat{
+		"sda": {Name: "sda", ReadBytes: 10, WriteBytes: 20},
+		"sdb": {Name: "sdb", ReadBytes: 30, WriteBytes: 40},
+	})
+
+	assert.ElementsMatch(t, []string{"sda", "sdb"}, agent.fsNames)
+	assert.Len(t, agent.fsNames, 2)
+	assert.Equal(t, uint64(10), agent.fsStats["sda"].TotalRead)
+	assert.Equal(t, uint64(20), agent.fsStats["sda"].TotalWrite)
+	assert.False(t, agent.fsStats["sda"].Time.IsZero())
+	assert.False(t, agent.fsStats["sdb"].Time.IsZero())
+
+	agent.initializeDiskIoStats(map[string]disk.IOCountersStat{
+		"sdb": {Name: "sdb", ReadBytes: 50, WriteBytes: 60},
+	})
+
+	assert.Equal(t, []string{"sdb"}, agent.fsNames)
+	assert.Equal(t, uint64(50), agent.fsStats["sdb"].TotalRead)
+	assert.Equal(t, uint64(60), agent.fsStats["sdb"].TotalWrite)
+}
--- a/agent/docker.go
+++ b/agent/docker.go
@@ -1,6 +1,7 @@
 package agent

 import (
+	"bufio"
 	"bytes"
 	"context"
 	"encoding/binary"
@@ -14,16 +15,25 @@ import (
 	"net/url"
 	"os"
 	"path"
+	"regexp"
+	"sort"
+	"strconv"
 	"strings"
 	"sync"
 	"time"

 	"github.com/henrygd/beszel/agent/deltatracker"
+	"github.com/henrygd/beszel/agent/utils"
 	"github.com/henrygd/beszel/internal/entities/container"

 	"github.com/blang/semver"
 )

+// ansiEscapePattern matches ANSI escape sequences (colors, cursor movement, etc.)
+// This includes CSI sequences like \x1b[...m and simple escapes like \x1b[K
+var ansiEscapePattern = regexp.MustCompile(`\x1b\[[0-9;]*[a-zA-Z]|\x1b\][^\x07]*\x07|\x1b[@-Z\\-_]`)
+var dockerContainerIDPattern = regexp.MustCompile(`^[a-fA-F0-9]{12,64}$`)
+
 const (
 	// Docker API timeout in milliseconds
 	dockerTimeoutMs = 2100
@@ -55,6 +65,7 @@ type dockerManager struct {
 	decoder             *json.Decoder               // Reusable JSON decoder that reads from buf
 	apiStats            *container.ApiStats         // Reusable API stats object
 	excludeContainers   []string                    // Patterns to exclude containers by name
+	usingPodman         bool                        // Whether the Docker Engine API is running on Podman

 	// Cache-time-aware tracking for CPU stats (similar to cpu.go)
 	// Maps cache time intervals to container-specific CPU usage tracking
@@ -66,6 +77,7 @@ type dockerManager struct {
 	// cacheTimeMs -> DeltaTracker for network bytes sent/received
 	networkSentTrackers map[uint16]*deltatracker.DeltaTracker[string, uint64]
 	networkRecvTrackers map[uint16]*deltatracker.DeltaTracker[string, uint64]
+	retrySleep          func(time.Duration)
 }

 // userAgentRoundTripper is a custom http.RoundTripper that adds a User-Agent header to all requests
@@ -327,13 +339,48 @@ func validateCpuPercentage(cpuPct float64, containerName string) error {

 // updateContainerStatsValues updates the final stats values
 func updateContainerStatsValues(stats *container.Stats, cpuPct float64, usedMemory uint64, sent_delta, recv_delta uint64, readTime time.Time) {
-	stats.Cpu = twoDecimals(cpuPct)
-	stats.Mem = bytesToMegabytes(float64(usedMemory))
-	stats.NetworkSent = bytesToMegabytes(float64(sent_delta))
-	stats.NetworkRecv = bytesToMegabytes(float64(recv_delta))
+	stats.Cpu = utils.TwoDecimals(cpuPct)
+	stats.Mem = utils.BytesToMegabytes(float64(usedMemory))
+	stats.Bandwidth = [2]uint64{sent_delta, recv_delta}
+	// TODO(0.19+): stop populating NetworkSent/NetworkRecv (deprecated in 0.18.3)
+	stats.NetworkSent = utils.BytesToMegabytes(float64(sent_delta))
+	stats.NetworkRecv = utils.BytesToMegabytes(float64(recv_delta))
 	stats.PrevReadTime = readTime
 }

+// convertContainerPortsToString formats the ports of a container into a sorted, deduplicated string.
+// ctr.Ports is nilled out after processing so the slice is not accidentally reused.
+func convertContainerPortsToString(ctr *container.ApiInfo) string {
+	if len(ctr.Ports) == 0 {
+		return ""
+	}
+	sort.Slice(ctr.Ports, func(i, j int) bool {
+		return ctr.Ports[i].PublicPort < ctr.Ports[j].PublicPort
+	})
+	var builder strings.Builder
+	seenPorts := make(map[uint16]struct{})
+	for _, p := range ctr.Ports {
+		_, ok := seenPorts[p.PublicPort]
+		if p.PublicPort == 0 || ok {
+			continue
+		}
+		seenPorts[p.PublicPort] = struct{}{}
+		if builder.Len() > 0 {
+			builder.WriteString(", ")
+		}
+		switch p.IP {
+		case "0.0.0.0", "::":
+		default:
+			builder.WriteString(p.IP)
+			builder.WriteByte(':')
+		}
+		builder.WriteString(strconv.Itoa(int(p.PublicPort)))
+	}
+	// clear ports slice so it doesn't get reused and blend into next response
+	ctr.Ports = nil
+	return builder.String()
+}
+
 func parseDockerStatus(status string) (string, container.DockerHealth) {
 	trimmed := strings.TrimSpace(status)
 	if trimmed == "" {
@@ -353,22 +400,60 @@ func parseDockerStatus(status string) (string, container.DockerHealth) {
 		statusText = trimmed
 	}

-	healthText := strings.ToLower(strings.TrimSpace(strings.TrimSuffix(trimmed[openIdx+1:], ")")))
+	healthText := strings.TrimSpace(strings.TrimSuffix(trimmed[openIdx+1:], ")"))
 	// Some Docker statuses include a "health:" prefix inside the parentheses.
 	// Strip it so it maps correctly to the known health states.
 	if colonIdx := strings.IndexRune(healthText, ':'); colonIdx != -1 {
-		prefix := strings.TrimSpace(healthText[:colonIdx])
+		prefix := strings.ToLower(strings.TrimSpace(healthText[:colonIdx]))
 		if prefix == "health" || prefix == "health status" {
 			healthText = strings.TrimSpace(healthText[colonIdx+1:])
 		}
 	}
-	if health, ok := container.DockerHealthStrings[healthText]; ok {
+	if health, ok := parseDockerHealthStatus(healthText); ok {
 		return statusText, health
 	}

 	return trimmed, container.DockerHealthNone
 }

+// parseDockerHealthStatus maps Docker health status strings to container.DockerHealth values
+func parseDockerHealthStatus(status string) (container.DockerHealth, bool) {
+	health, ok := container.DockerHealthStrings[strings.ToLower(strings.TrimSpace(status))]
+	return health, ok
+}
+
+// getPodmanContainerHealth fetches container health status from the container inspect endpoint.
+// Used for Podman which doesn't provide health status in the /containers/json endpoint as of March 2026.
+// https://github.com/containers/podman/issues/27786
+func (dm *dockerManager) getPodmanContainerHealth(containerID string) (container.DockerHealth, error) {
+	resp, err := dm.client.Get(fmt.Sprintf("http://localhost/containers/%s/json", url.PathEscape(containerID)))
+	if err != nil {
+		return container.DockerHealthNone, err
+	}
+	defer resp.Body.Close()
+
+	if resp.StatusCode != http.StatusOK {
+		return container.DockerHealthNone, fmt.Errorf("container inspect request failed: %s", resp.Status)
+	}
+
+	var inspectInfo struct {
+		State struct {
+			Health struct {
+				Status string
+			}
+		}
+	}
+	if err := json.NewDecoder(resp.Body).Decode(&inspectInfo); err != nil {
+		return container.DockerHealthNone, err
+	}
+
+	if health, ok := parseDockerHealthStatus(inspectInfo.State.Health.Status); ok {
+		return health, nil
+	}
+
+	return container.DockerHealthNone, nil
+}
+
 // Updates stats for individual container with cache-time-aware delta tracking
 func (dm *dockerManager) updateContainerStats(ctr *container.ApiInfo, cacheTimeMs uint16) error {
 	name := ctr.Names[0][1:]
@@ -378,6 +463,21 @@ func (dm *dockerManager) updateContainerStats(ctr *container.ApiInfo, cacheTimeM
 		return err
 	}

+	statusText, health := parseDockerStatus(ctr.Status)
+
+	// Docker exposes Health.Status on /containers/json in API 1.52+.
+	// Podman currently requires falling back to the inspect endpoint as of March 2026.
+	// https://github.com/containers/podman/issues/27786
+	if ctr.Health.Status != "" {
+		if h, ok := parseDockerHealthStatus(ctr.Health.Status); ok {
+			health = h
+		}
+	} else if dm.usingPodman {
+		if podmanHealth, err := dm.getPodmanContainerHealth(ctr.IdShort); err == nil {
+			health = podmanHealth
+		}
+	}
+
 	dm.containerStatsMutex.Lock()
 	defer dm.containerStatsMutex.Unlock()

@@ -389,14 +489,18 @@ func (dm *dockerManager) updateContainerStats(ctr *container.ApiInfo, cacheTimeM
 	}

 	stats.Id = ctr.IdShort
-
-	statusText, health := parseDockerStatus(ctr.Status)
 	stats.Status = statusText
 	stats.Health = health

+	if len(ctr.Ports) > 0 {
+		stats.Ports = convertContainerPortsToString(ctr)
+	}
+
 	// reset current stats
 	stats.Cpu = 0
 	stats.Mem = 0
+	stats.Bandwidth = [2]uint64{0, 0}
+	// TODO(0.19+): stop populating NetworkSent/NetworkRecv (deprecated in 0.18.3)
 	stats.NetworkSent = 0
 	stats.NetworkRecv = 0

@@ -473,8 +577,8 @@ func (dm *dockerManager) deleteContainerStatsSync(id string) {
 }

 // Creates a new http client for Docker or Podman API
-func newDockerManager(a *Agent) *dockerManager {
-	dockerHost, exists := GetEnv("DOCKER_HOST")
+func newDockerManager() *dockerManager {
+	dockerHost, exists := utils.GetEnv("DOCKER_HOST")
 	if exists {
 		// return nil if set to empty string
 		if dockerHost == "" {
@@ -510,7 +614,7 @@ func newDockerManager(a *Agent) *dockerManager {

 	// configurable timeout
 	timeout := time.Millisecond * time.Duration(dockerTimeoutMs)
-	if t, set := GetEnv("DOCKER_TIMEOUT"); set {
+	if t, set := utils.GetEnv("DOCKER_TIMEOUT"); set {
 		timeout, err = time.ParseDuration(t)
 		if err != nil {
 			slog.Error(err.Error())
@@ -527,7 +631,7 @@ func newDockerManager(a *Agent) *dockerManager {

 	// Read container exclusion patterns from environment variable
 	var excludeContainers []string
-	if excludeStr, set := GetEnv("EXCLUDE_CONTAINERS"); set && excludeStr != "" {
+	if excludeStr, set := utils.GetEnv("EXCLUDE_CONTAINERS"); set && excludeStr != "" {
 		parts := strings.SplitSeq(excludeStr, ",")
 		for part := range parts {
 			trimmed := strings.TrimSpace(part)
@@ -555,16 +659,17 @@ func newDockerManager(a *Agent) *dockerManager {
 		lastCpuReadTime:     make(map[uint16]map[string]time.Time),
 		networkSentTrackers: make(map[uint16]*deltatracker.DeltaTracker[string, uint64]),
 		networkRecvTrackers: make(map[uint16]*deltatracker.DeltaTracker[string, uint64]),
+		retrySleep:          time.Sleep,
 	}

 	// If using podman, return client
 	if strings.Contains(dockerHost, "podman") {
-		a.systemInfo.Podman = true
+		manager.usingPodman = true
 		manager.goodDockerVersion = true
 		return manager
 	}

-	// this can take up to 5 seconds with retry, so run in goroutine
+	// run version check in goroutine to avoid blocking (server may not be ready and requires retries)
 	go manager.checkDockerVersion()

 	// give version check a chance to complete before returning
@@ -584,18 +689,18 @@ func (dm *dockerManager) checkDockerVersion() {
 	const versionMaxTries = 2
 	for i := 1; i <= versionMaxTries; i++ {
 		resp, err = dm.client.Get("http://localhost/version")
-		if err == nil {
+		if err == nil && resp.StatusCode == http.StatusOK {
 			break
 		}
 		if resp != nil {
 			resp.Body.Close()
 		}
 		if i < versionMaxTries {
-			slog.Debug("Failed to get Docker version; retrying", "attempt", i, "error", err)
-			time.Sleep(5 * time.Second)
+			slog.Debug("Failed to get Docker version; retrying", "attempt", i, "err", err, "response", resp)
+			dm.retrySleep(5 * time.Second)
 		}
 	}
-	if err != nil {
+	if err != nil || resp.StatusCode != http.StatusOK {
 		return
 	}
 	if err := dm.decode(resp, &versionInfo); err != nil {
@@ -637,9 +742,34 @@ func getDockerHost() string {
 	return scheme + socks[0]
 }

+func validateContainerID(containerID string) error {
+	if !dockerContainerIDPattern.MatchString(containerID) {
+		return fmt.Errorf("invalid container id")
+	}
+	return nil
+}
+
+func buildDockerContainerEndpoint(containerID, action string, query url.Values) (string, error) {
+	if err := validateContainerID(containerID); err != nil {
+		return "", err
+	}
+	u := &url.URL{
+		Scheme: "http",
+		Host:   "localhost",
+		Path:   fmt.Sprintf("/containers/%s/%s", url.PathEscape(containerID), action),
+	}
+	if len(query) > 0 {
+		u.RawQuery = query.Encode()
+	}
+	return u.String(), nil
+}
+
 // getContainerInfo fetches the inspection data for a container
 func (dm *dockerManager) getContainerInfo(ctx context.Context, containerID string) ([]byte, error) {
-	endpoint := fmt.Sprintf("http://localhost/containers/%s/json", containerID)
+	endpoint, err := buildDockerContainerEndpoint(containerID, "json", nil)
+	if err != nil {
+		return nil, err
+	}
 	req, err := http.NewRequestWithContext(ctx, http.MethodGet, endpoint, nil)
 	if err != nil {
 		return nil, err
@@ -670,7 +800,15 @@ func (dm *dockerManager) getContainerInfo(ctx context.Context, containerID strin

 // getLogs fetches the logs for a container
 func (dm *dockerManager) getLogs(ctx context.Context, containerID string) (string, error) {
-	endpoint := fmt.Sprintf("http://localhost/containers/%s/logs?stdout=1&stderr=1&tail=%d", containerID, dockerLogsTail)
+	query := url.Values{
+		"stdout": []string{"1"},
+		"stderr": []string{"1"},
+		"tail":   []string{fmt.Sprintf("%d", dockerLogsTail)},
+	}
+	endpoint, err := buildDockerContainerEndpoint(containerID, "logs", query)
+	if err != nil {
+		return "", err
+	}
 	req, err := http.NewRequestWithContext(ctx, http.MethodGet, endpoint, nil)
 	if err != nil {
 		return "", err
@@ -688,17 +826,52 @@ func (dm *dockerManager) getLogs(ctx context.Context, containerID string) (strin
 	}

 	var builder strings.Builder
-	if err := decodeDockerLogStream(resp.Body, &builder); err != nil {
+	contentType := resp.Header.Get("Content-Type")
+	multiplexed := strings.HasSuffix(contentType, "multiplexed-stream")
+	logReader := io.Reader(resp.Body)
+	if !multiplexed {
+		// Podman may return multiplexed logs without Content-Type. Sniff the first frame header
+		// with a small buffered reader only when the header check fails.
+		bufferedReader := bufio.NewReaderSize(resp.Body, 8)
+		multiplexed = detectDockerMultiplexedStream(bufferedReader)
+		logReader = bufferedReader
+	}
+	if err := decodeDockerLogStream(logReader, &builder, multiplexed); err != nil {
 		return "", err
 	}

-	return builder.String(), nil
+	// Strip ANSI escape sequences from logs for clean display in web UI
+	logs := builder.String()
+	if strings.Contains(logs, "\x1b") {
+		logs = ansiEscapePattern.ReplaceAllString(logs, "")
+	}
+	return logs, nil
 }

-func decodeDockerLogStream(reader io.Reader, builder *strings.Builder) error {
+func detectDockerMultiplexedStream(reader *bufio.Reader) bool {
+	const headerSize = 8
+	header, err := reader.Peek(headerSize)
+	if err != nil {
+		return false
+	}
+	if header[0] != 0x01 && header[0] != 0x02 {
+		return false
+	}
+	// Docker's stream framing header reserves bytes 1-3 as zero.
+	if header[1] != 0 || header[2] != 0 || header[3] != 0 {
+		return false
+	}
+	frameLen := binary.BigEndian.Uint32(header[4:])
+	return frameLen <= maxLogFrameSize
+}
+
+func decodeDockerLogStream(reader io.Reader, builder *strings.Builder, multiplexed bool) error {
+	if !multiplexed {
+		_, err := io.Copy(builder, io.LimitReader(reader, maxTotalLogSize))
+		return err
+	}
 	const headerSize = 8
 	var header [headerSize]byte
-	buf := make([]byte, 0, dockerLogsTail*200)
 	totalBytesRead := 0

 	for {
@@ -722,36 +895,37 @@ func decodeDockerLogStream(reader io.Reader, builder *strings.Builder) error {
 		// Check if reading this frame would exceed total log size limit
 		if totalBytesRead+int(frameLen) > maxTotalLogSize {
 			// Read and discard remaining data to avoid blocking
-			_, _ = io.Copy(io.Discard, io.LimitReader(reader, int64(frameLen)))
+			_, _ = io.CopyN(io.Discard, reader, int64(frameLen))
 			slog.Debug("Truncating logs: limit reached", "read", totalBytesRead, "limit", maxTotalLogSize)
 			return nil
 		}

-		buf = allocateBuffer(buf, int(frameLen))
-		if _, err := io.ReadFull(reader, buf[:frameLen]); err != nil {
+		n, err := io.CopyN(builder, reader, int64(frameLen))
+		if err != nil {
 			if errors.Is(err, io.EOF) || errors.Is(err, io.ErrUnexpectedEOF) {
-				if len(buf) > 0 {
-					builder.Write(buf[:min(int(frameLen), len(buf))])
-				}
 				return nil
 			}
 			return err
 		}
-		builder.Write(buf[:frameLen])
-		totalBytesRead += int(frameLen)
+		totalBytesRead += int(n)
 	}
 }

-func allocateBuffer(current []byte, needed int) []byte {
-	if cap(current) >= needed {
-		return current[:needed]
+// GetHostInfo fetches the system info from Docker
+func (dm *dockerManager) GetHostInfo() (info container.HostInfo, err error) {
+	resp, err := dm.client.Get("http://localhost/info")
+	if err != nil {
+		return info, err
 	}
-	return make([]byte, needed)
+	defer resp.Body.Close()
+
+	if err := json.NewDecoder(resp.Body).Decode(&info); err != nil {
+		return info, err
+	}
+
+	return info, nil
 }

-func min(a, b int) int {
-	if a < b {
-		return a
-	}
-	return b
+func (dm *dockerManager) IsPodman() bool {
+	return dm.usingPodman
 }
--- a/agent/docker_test.go
+++ b/agent/docker_test.go
@@ -1,17 +1,24 @@
 //go:build testing
-// +build testing

 package agent

 import (
 	"bytes"
+	"context"
 	"encoding/json"
+	"errors"
+	"fmt"
+	"io"
+	"net"
+	"net/http"
+	"net/http/httptest"
 	"os"
 	"strings"
 	"testing"
 	"time"

 	"github.com/henrygd/beszel/agent/deltatracker"
+	"github.com/henrygd/beszel/agent/utils"
 	"github.com/henrygd/beszel/internal/entities/container"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
@@ -19,6 +26,43 @@ import (

 var defaultCacheTimeMs = uint16(60_000)

+type recordingRoundTripper struct {
+	statusCode  int
+	body        string
+	contentType string
+	called      bool
+	lastPath    string
+	lastQuery   map[string]string
+}
+
+type roundTripFunc func(*http.Request) (*http.Response, error)
+
+func (fn roundTripFunc) RoundTrip(req *http.Request) (*http.Response, error) {
+	return fn(req)
+}
+
+func (rt *recordingRoundTripper) RoundTrip(req *http.Request) (*http.Response, error) {
+	rt.called = true
+	rt.lastPath = req.URL.EscapedPath()
+	rt.lastQuery = map[string]string{}
+	for key, values := range req.URL.Query() {
+		if len(values) > 0 {
+			rt.lastQuery[key] = values[0]
+		}
+	}
+	resp := &http.Response{
+		StatusCode: rt.statusCode,
+		Status:     "200 OK",
+		Header:     make(http.Header),
+		Body:       io.NopCloser(strings.NewReader(rt.body)),
+		Request:    req,
+	}
+	if rt.contentType != "" {
+		resp.Header.Set("Content-Type", rt.contentType)
+	}
+	return resp, nil
+}
+
 // cycleCpuDeltas cycles the CPU tracking data for a specific cache time interval
 func (dm *dockerManager) cycleCpuDeltas(cacheTimeMs uint16) {
 	// Clear the CPU tracking maps for this cache time interval
@@ -110,6 +154,94 @@ func TestCalculateMemoryUsage(t *testing.T) {
 	}
 }

+func TestBuildDockerContainerEndpoint(t *testing.T) {
+	t.Run("valid container ID builds escaped endpoint", func(t *testing.T) {
+		endpoint, err := buildDockerContainerEndpoint("0123456789ab", "json", nil)
+		require.NoError(t, err)
+		assert.Equal(t, "http://localhost/containers/0123456789ab/json", endpoint)
+	})
+
+	t.Run("invalid container ID is rejected", func(t *testing.T) {
+		_, err := buildDockerContainerEndpoint("../../version", "json", nil)
+		require.Error(t, err)
+		assert.Contains(t, err.Error(), "invalid container id")
+	})
+}
+
+func TestContainerDetailsRequestsValidateContainerID(t *testing.T) {
+	rt := &recordingRoundTripper{
+		statusCode: 200,
+		body:       `{"Config":{"Env":["SECRET=1"]}}`,
+	}
+	dm := &dockerManager{
+		client: &http.Client{Transport: rt},
+	}
+
+	_, err := dm.getContainerInfo(context.Background(), "../version")
+	require.Error(t, err)
+	assert.Contains(t, err.Error(), "invalid container id")
+	assert.False(t, rt.called, "request should be rejected before dispatching to Docker API")
+}
+
+func TestContainerDetailsRequestsUseExpectedDockerPaths(t *testing.T) {
+	t.Run("container info uses container json endpoint", func(t *testing.T) {
+		rt := &recordingRoundTripper{
+			statusCode: 200,
+			body:       `{"Config":{"Env":["SECRET=1"]},"Name":"demo"}`,
+		}
+		dm := &dockerManager{
+			client: &http.Client{Transport: rt},
+		}
+
+		body, err := dm.getContainerInfo(context.Background(), "0123456789ab")
+		require.NoError(t, err)
+		assert.True(t, rt.called)
+		assert.Equal(t, "/containers/0123456789ab/json", rt.lastPath)
+		assert.NotContains(t, string(body), "SECRET=1", "sensitive env vars should be removed")
+	})
+
+	t.Run("container logs uses expected endpoint and query params", func(t *testing.T) {
+		rt := &recordingRoundTripper{
+			statusCode: 200,
+			body:       "line1\nline2\n",
+		}
+		dm := &dockerManager{
+			client: &http.Client{Transport: rt},
+		}
+
+		logs, err := dm.getLogs(context.Background(), "abcdef123456")
+		require.NoError(t, err)
+		assert.True(t, rt.called)
+		assert.Equal(t, "/containers/abcdef123456/logs", rt.lastPath)
+		assert.Equal(t, "1", rt.lastQuery["stdout"])
+		assert.Equal(t, "1", rt.lastQuery["stderr"])
+		assert.Equal(t, "200", rt.lastQuery["tail"])
+		assert.Equal(t, "line1\nline2\n", logs)
+	})
+}
+
+func TestGetPodmanContainerHealth(t *testing.T) {
+	called := false
+	dm := &dockerManager{
+		client: &http.Client{Transport: roundTripFunc(func(req *http.Request) (*http.Response, error) {
+			called = true
+			assert.Equal(t, "/containers/0123456789ab/json", req.URL.EscapedPath())
+			return &http.Response{
+				StatusCode: http.StatusOK,
+				Status:     "200 OK",
+				Header:     make(http.Header),
+				Body:       io.NopCloser(strings.NewReader(`{"State":{"Health":{"Status":"healthy"}}}`)),
+				Request:    req,
+			}, nil
+		})},
+	}
+
+	health, err := dm.getPodmanContainerHealth("0123456789ab")
+	require.NoError(t, err)
+	assert.True(t, called)
+	assert.Equal(t, container.DockerHealthHealthy, health)
+}
+
 func TestValidateCpuPercentage(t *testing.T) {
 	tests := []struct {
 		name          string
@@ -184,58 +316,17 @@ func TestUpdateContainerStatsValues(t *testing.T) {
 	// Check memory (should be converted to MB: 1048576 bytes = 1 MB)
 	assert.Equal(t, 1.0, stats.Mem)

-	// Check network sent (should be converted to MB: 524288 bytes = 0.5 MB)
-	assert.Equal(t, 0.5, stats.NetworkSent)
+	// Check bandwidth (raw bytes)
+	assert.Equal(t, [2]uint64{524288, 262144}, stats.Bandwidth)

-	// Check network recv (should be converted to MB: 262144 bytes = 0.25 MB)
-	assert.Equal(t, 0.25, stats.NetworkRecv)
+	// Deprecated fields still populated for backward compatibility with older hubs
+	assert.Equal(t, 0.5, stats.NetworkSent)  // 524288 bytes = 0.5 MB
+	assert.Equal(t, 0.25, stats.NetworkRecv) // 262144 bytes = 0.25 MB

 	// Check read time
 	assert.Equal(t, testTime, stats.PrevReadTime)
 }

-func TestTwoDecimals(t *testing.T) {
-	tests := []struct {
-		name     string
-		input    float64
-		expected float64
-	}{
-		{"round down", 1.234, 1.23},
-		{"round half up", 1.235, 1.24}, // math.Round rounds half up
-		{"no rounding needed", 1.23, 1.23},
-		{"negative number", -1.235, -1.24}, // math.Round rounds half up (more negative)
-		{"zero", 0.0, 0.0},
-		{"large number", 123.456, 123.46}, // rounds 5 up
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			result := twoDecimals(tt.input)
-			assert.Equal(t, tt.expected, result)
-		})
-	}
-}
-
-func TestBytesToMegabytes(t *testing.T) {
-	tests := []struct {
-		name     string
-		input    float64
-		expected float64
-	}{
-		{"1 MB", 1048576, 1.0},
-		{"512 KB", 524288, 0.5},
-		{"zero", 0, 0},
-		{"large value", 1073741824, 1024}, // 1 GB = 1024 MB
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			result := bytesToMegabytes(tt.input)
-			assert.Equal(t, tt.expected, result)
-		})
-	}
-}
-
 func TestInitializeCpuTracking(t *testing.T) {
 	dm := &dockerManager{
 		lastCpuContainer: make(map[uint16]map[string]uint64),
@@ -378,6 +469,117 @@ func TestDockerManagerCreation(t *testing.T) {
 	assert.NotNil(t, dm.networkRecvTrackers)
 }

+func TestCheckDockerVersion(t *testing.T) {
+	tests := []struct {
+		name      string
+		responses []struct {
+			statusCode int
+			body       string
+		}
+		expectedGood     bool
+		expectedRequests int
+	}{
+		{
+			name: "200 with good version on first try",
+			responses: []struct {
+				statusCode int
+				body       string
+			}{
+				{http.StatusOK, `{"Version":"25.0.1"}`},
+			},
+			expectedGood:     true,
+			expectedRequests: 1,
+		},
+		{
+			name: "200 with old version on first try",
+			responses: []struct {
+				statusCode int
+				body       string
+			}{
+				{http.StatusOK, `{"Version":"24.0.7"}`},
+			},
+			expectedGood:     false,
+			expectedRequests: 1,
+		},
+		{
+			name: "non-200 then 200 with good version",
+			responses: []struct {
+				statusCode int
+				body       string
+			}{
+				{http.StatusServiceUnavailable, `"not ready"`},
+				{http.StatusOK, `{"Version":"25.1.0"}`},
+			},
+			expectedGood:     true,
+			expectedRequests: 2,
+		},
+		{
+			name: "non-200 on all retries",
+			responses: []struct {
+				statusCode int
+				body       string
+			}{
+				{http.StatusInternalServerError, `"error"`},
+				{http.StatusUnauthorized, `"error"`},
+			},
+			expectedGood:     false,
+			expectedRequests: 2,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			requestCount := 0
+			server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+				idx := requestCount
+				requestCount++
+				if idx >= len(tt.responses) {
+					idx = len(tt.responses) - 1
+				}
+				w.WriteHeader(tt.responses[idx].statusCode)
+				fmt.Fprint(w, tt.responses[idx].body)
+			}))
+			defer server.Close()
+
+			dm := &dockerManager{
+				client: &http.Client{
+					Transport: &http.Transport{
+						DialContext: func(_ context.Context, network, _ string) (net.Conn, error) {
+							return net.Dial(network, server.Listener.Addr().String())
+						},
+					},
+				},
+				retrySleep: func(time.Duration) {},
+			}
+
+			dm.checkDockerVersion()
+
+			assert.Equal(t, tt.expectedGood, dm.goodDockerVersion)
+			assert.Equal(t, tt.expectedRequests, requestCount)
+		})
+	}
+
+	t.Run("request error on all retries", func(t *testing.T) {
+		requestCount := 0
+		dm := &dockerManager{
+			client: &http.Client{
+				Transport: &http.Transport{
+					DialContext: func(_ context.Context, _, _ string) (net.Conn, error) {
+						requestCount++
+						return nil, errors.New("connection refused")
+					},
+				},
+			},
+			retrySleep: func(time.Duration) {},
+		}
+
+		dm.checkDockerVersion()
+
+		assert.False(t, dm.goodDockerVersion)
+		assert.Equal(t, 2, requestCount)
+	})
+}
+
 func TestCycleCpuDeltas(t *testing.T) {
 	dm := &dockerManager{
 		lastCpuContainer: map[uint16]map[string]uint64{
@@ -527,8 +729,10 @@ func TestContainerStatsInitialization(t *testing.T) {

 	assert.Equal(t, 45.67, stats.Cpu)
 	assert.Equal(t, 2.0, stats.Mem)
-	assert.Equal(t, 1.0, stats.NetworkSent)
-	assert.Equal(t, 0.5, stats.NetworkRecv)
+	assert.Equal(t, [2]uint64{1048576, 524288}, stats.Bandwidth)
+	// Deprecated fields still populated for backward compatibility with older hubs
+	assert.Equal(t, 1.0, stats.NetworkSent) // 1048576 bytes = 1 MB
+	assert.Equal(t, 0.5, stats.NetworkRecv) // 524288 bytes = 0.5 MB
 	assert.Equal(t, testTime, stats.PrevReadTime)
 }

@@ -688,12 +892,50 @@ func TestContainerStatsEndToEndWithRealData(t *testing.T) {
 	updateContainerStatsValues(testStats, cpuPct, usedMemory, 1000000, 500000, testTime)

 	assert.Equal(t, cpuPct, testStats.Cpu)
-	assert.Equal(t, bytesToMegabytes(float64(usedMemory)), testStats.Mem)
-	assert.Equal(t, bytesToMegabytes(1000000), testStats.NetworkSent)
-	assert.Equal(t, bytesToMegabytes(500000), testStats.NetworkRecv)
+	assert.Equal(t, utils.BytesToMegabytes(float64(usedMemory)), testStats.Mem)
+	assert.Equal(t, [2]uint64{1000000, 500000}, testStats.Bandwidth)
+	// Deprecated fields still populated for backward compatibility with older hubs
+	assert.Equal(t, utils.BytesToMegabytes(1000000), testStats.NetworkSent)
+	assert.Equal(t, utils.BytesToMegabytes(500000), testStats.NetworkRecv)
 	assert.Equal(t, testTime, testStats.PrevReadTime)
 }

+func TestGetLogsDetectsMultiplexedWithoutContentType(t *testing.T) {
+	// Docker multiplexed frame: [stream][0,0,0][len(4 bytes BE)][payload]
+	frame := []byte{
+		0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x05,
+		'H', 'e', 'l', 'l', 'o',
+	}
+	rt := &recordingRoundTripper{
+		statusCode: 200,
+		body:       string(frame),
+		// Intentionally omit content type to simulate Podman behavior.
+	}
+	dm := &dockerManager{
+		client: &http.Client{Transport: rt},
+	}
+
+	logs, err := dm.getLogs(context.Background(), "abcdef123456")
+	require.NoError(t, err)
+	assert.Equal(t, "Hello", logs)
+}
+
+func TestGetLogsDoesNotMisclassifyRawStreamAsMultiplexed(t *testing.T) {
+	// Starts with 0x01, but doesn't match Docker frame signature (reserved bytes aren't all zero).
+	raw := []byte{0x01, 0x02, 0x03, 0x04, 'r', 'a', 'w'}
+	rt := &recordingRoundTripper{
+		statusCode: 200,
+		body:       string(raw),
+	}
+	dm := &dockerManager{
+		client: &http.Client{Transport: rt},
+	}
+
+	logs, err := dm.getLogs(context.Background(), "abcdef123456")
+	require.NoError(t, err)
+	assert.Equal(t, raw, []byte(logs))
+}
+
 func TestEdgeCasesWithRealData(t *testing.T) {
 	// Test with minimal container stats
 	minimalStats := &container.ApiStats{
@@ -802,6 +1044,24 @@ func TestNetworkRateCalculationFormula(t *testing.T) {
 	}
 }

+func TestGetHostInfo(t *testing.T) {
+	data, err := os.ReadFile("test-data/system_info.json")
+	require.NoError(t, err)
+
+	var info container.HostInfo
+	err = json.Unmarshal(data, &info)
+	require.NoError(t, err)
+
+	assert.Equal(t, "6.8.0-31-generic", info.KernelVersion)
+	assert.Equal(t, "Ubuntu 24.04 LTS", info.OperatingSystem)
+	// assert.Equal(t, "24.04", info.OSVersion)
+	// assert.Equal(t, "linux", info.OSType)
+	// assert.Equal(t, "x86_64", info.Architecture)
+	assert.EqualValues(t, 4, info.NCPU)
+	assert.EqualValues(t, 2095882240, info.MemTotal)
+	// assert.Equal(t, "27.0.1", info.ServerVersion)
+}
+
 func TestDeltaTrackerCacheTimeIsolation(t *testing.T) {
 	// Test that different cache times have separate DeltaTracker instances
 	dm := &dockerManager{
@@ -897,6 +1157,18 @@ func TestParseDockerStatus(t *testing.T) {
 			expectedStatus: "",
 			expectedHealth: container.DockerHealthNone,
 		},
+		{
+			name:           "status health with health: prefix",
+			input:          "Up 5 minutes (health: starting)",
+			expectedStatus: "Up 5 minutes",
+			expectedHealth: container.DockerHealthStarting,
+		},
+		{
+			name:           "status health with health status: prefix",
+			input:          "Up 10 minutes (health status: unhealthy)",
+			expectedStatus: "Up 10 minutes",
+			expectedHealth: container.DockerHealthUnhealthy,
+		},
 	}

 	for _, tt := range tests {
@@ -908,6 +1180,84 @@ func TestParseDockerStatus(t *testing.T) {
 	}
 }

+func TestParseDockerHealthStatus(t *testing.T) {
+	tests := []struct {
+		input          string
+		expectedHealth container.DockerHealth
+		expectedOk     bool
+	}{
+		{"healthy", container.DockerHealthHealthy, true},
+		{"unhealthy", container.DockerHealthUnhealthy, true},
+		{"starting", container.DockerHealthStarting, true},
+		{"none", container.DockerHealthNone, true},
+		{" Healthy ", container.DockerHealthHealthy, true},
+		{"unknown", container.DockerHealthNone, false},
+		{"", container.DockerHealthNone, false},
+	}
+	for _, tt := range tests {
+		t.Run(tt.input, func(t *testing.T) {
+			health, ok := parseDockerHealthStatus(tt.input)
+			assert.Equal(t, tt.expectedHealth, health)
+			assert.Equal(t, tt.expectedOk, ok)
+		})
+	}
+}
+
+func TestUpdateContainerStatsUsesPodmanInspectHealthFallback(t *testing.T) {
+	var requestedPaths []string
+	dm := &dockerManager{
+		client: &http.Client{Transport: roundTripFunc(func(req *http.Request) (*http.Response, error) {
+			requestedPaths = append(requestedPaths, req.URL.EscapedPath())
+			switch req.URL.EscapedPath() {
+			case "/containers/0123456789ab/stats":
+				return &http.Response{
+					StatusCode: http.StatusOK,
+					Status:     "200 OK",
+					Header:     make(http.Header),
+					Body: io.NopCloser(strings.NewReader(`{
+						"read":"2026-03-15T21:26:59Z",
+						"cpu_stats":{"cpu_usage":{"total_usage":1000},"system_cpu_usage":2000},
+						"memory_stats":{"usage":1048576,"stats":{"inactive_file":262144}},
+						"networks":{"eth0":{"rx_bytes":0,"tx_bytes":0}}
+					}`)),
+					Request: req,
+				}, nil
+			case "/containers/0123456789ab/json":
+				return &http.Response{
+					StatusCode: http.StatusOK,
+					Status:     "200 OK",
+					Header:     make(http.Header),
+					Body:       io.NopCloser(strings.NewReader(`{"State":{"Health":{"Status":"healthy"}}}`)),
+					Request:    req,
+				}, nil
+			default:
+				return nil, fmt.Errorf("unexpected path: %s", req.URL.EscapedPath())
+			}
+		})},
+		containerStatsMap:   make(map[string]*container.Stats),
+		apiStats:            &container.ApiStats{},
+		usingPodman:         true,
+		lastCpuContainer:    make(map[uint16]map[string]uint64),
+		lastCpuSystem:       make(map[uint16]map[string]uint64),
+		lastCpuReadTime:     make(map[uint16]map[string]time.Time),
+		networkSentTrackers: make(map[uint16]*deltatracker.DeltaTracker[string, uint64]),
+		networkRecvTrackers: make(map[uint16]*deltatracker.DeltaTracker[string, uint64]),
+	}
+
+	ctr := &container.ApiInfo{
+		IdShort: "0123456789ab",
+		Names:   []string{"/beszel"},
+		Status:  "Up 2 minutes",
+		Image:   "beszel:latest",
+	}
+
+	err := dm.updateContainerStats(ctr, defaultCacheTimeMs)
+	require.NoError(t, err)
+	assert.Equal(t, []string{"/containers/0123456789ab/stats", "/containers/0123456789ab/json"}, requestedPaths)
+	assert.Equal(t, container.DockerHealthHealthy, dm.containerStatsMap[ctr.IdShort].Health)
+	assert.Equal(t, "Up 2 minutes", dm.containerStatsMap[ctr.IdShort].Status)
+}
+
 func TestConstantsAndUtilityFunctions(t *testing.T) {
 	// Test constants are properly defined
 	assert.Equal(t, uint16(60000), defaultCacheTimeMs)
@@ -917,13 +1267,13 @@ func TestConstantsAndUtilityFunctions(t *testing.T) {
 	assert.Equal(t, 5*1024*1024, maxTotalLogSize)               // 5MB

 	// Test utility functions
-	assert.Equal(t, 1.5, twoDecimals(1.499))
-	assert.Equal(t, 1.5, twoDecimals(1.5))
-	assert.Equal(t, 1.5, twoDecimals(1.501))
+	assert.Equal(t, 1.5, utils.TwoDecimals(1.499))
+	assert.Equal(t, 1.5, utils.TwoDecimals(1.5))
+	assert.Equal(t, 1.5, utils.TwoDecimals(1.501))

-	assert.Equal(t, 1.0, bytesToMegabytes(1048576)) // 1 MB
-	assert.Equal(t, 0.5, bytesToMegabytes(524288))  // 512 KB
-	assert.Equal(t, 0.0, bytesToMegabytes(0))
+	assert.Equal(t, 1.0, utils.BytesToMegabytes(1048576)) // 1 MB
+	assert.Equal(t, 0.5, utils.BytesToMegabytes(524288))  // 512 KB
+	assert.Equal(t, 0.0, utils.BytesToMegabytes(0))
 }

 func TestDecodeDockerLogStream(t *testing.T) {
@@ -932,6 +1282,7 @@ func TestDecodeDockerLogStream(t *testing.T) {
 		input       []byte
 		expected    string
 		expectError bool
+		multiplexed bool
 	}{
 		{
 			name: "simple log entry",
@@ -942,6 +1293,7 @@ func TestDecodeDockerLogStream(t *testing.T) {
 			},
 			expected:    "Hello World",
 			expectError: false,
+			multiplexed: true,
 		},
 		{
 			name: "multiple frames",
@@ -955,6 +1307,7 @@ func TestDecodeDockerLogStream(t *testing.T) {
 			},
 			expected:    "HelloWorld",
 			expectError: false,
+			multiplexed: true,
 		},
 		{
 			name: "zero length frame",
@@ -967,12 +1320,20 @@ func TestDecodeDockerLogStream(t *testing.T) {
 			},
 			expected:    "Hello",
 			expectError: false,
+			multiplexed: true,
 		},
 		{
 			name:        "empty input",
 			input:       []byte{},
 			expected:    "",
 			expectError: false,
+			multiplexed: true,
+		},
+		{
+			name:        "raw stream (not multiplexed)",
+			input:       []byte("raw log content"),
+			expected:    "raw log content",
+			multiplexed: false,
 		},
 	}

@@ -980,7 +1341,7 @@ func TestDecodeDockerLogStream(t *testing.T) {
 		t.Run(tt.name, func(t *testing.T) {
 			reader := bytes.NewReader(tt.input)
 			var builder strings.Builder
-			err := decodeDockerLogStream(reader, &builder)
+			err := decodeDockerLogStream(reader, &builder, tt.multiplexed)

 			if tt.expectError {
 				assert.Error(t, err)
@@ -1004,7 +1365,7 @@ func TestDecodeDockerLogStreamMemoryProtection(t *testing.T) {

 		reader := bytes.NewReader(input)
 		var builder strings.Builder
-		err := decodeDockerLogStream(reader, &builder)
+		err := decodeDockerLogStream(reader, &builder, true)

 		assert.Error(t, err)
 		assert.Contains(t, err.Error(), "log frame size")
@@ -1038,7 +1399,7 @@ func TestDecodeDockerLogStreamMemoryProtection(t *testing.T) {

 		reader := bytes.NewReader(input)
 		var builder strings.Builder
-		err := decodeDockerLogStream(reader, &builder)
+		err := decodeDockerLogStream(reader, &builder, true)

 		// Should complete without error (graceful truncation)
 		assert.NoError(t, err)
@@ -1053,53 +1414,6 @@ func TestDecodeDockerLogStreamMemoryProtection(t *testing.T) {
 	})
 }

-func TestAllocateBuffer(t *testing.T) {
-	tests := []struct {
-		name          string
-		currentCap    int
-		needed        int
-		expectedCap   int
-		shouldRealloc bool
-	}{
-		{
-			name:          "buffer has enough capacity",
-			currentCap:    1024,
-			needed:        512,
-			expectedCap:   1024,
-			shouldRealloc: false,
-		},
-		{
-			name:          "buffer needs reallocation",
-			currentCap:    512,
-			needed:        1024,
-			expectedCap:   1024,
-			shouldRealloc: true,
-		},
-		{
-			name:          "buffer needs exact size",
-			currentCap:    1024,
-			needed:        1024,
-			expectedCap:   1024,
-			shouldRealloc: false,
-		},
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			current := make([]byte, 0, tt.currentCap)
-			result := allocateBuffer(current, tt.needed)
-
-			assert.Equal(t, tt.needed, len(result))
-			assert.GreaterOrEqual(t, cap(result), tt.expectedCap)
-
-			if tt.shouldRealloc {
-				// If reallocation was needed, capacity should be at least the needed size
-				assert.GreaterOrEqual(t, cap(result), tt.needed)
-			}
-		})
-	}
-}
-
 func TestShouldExcludeContainer(t *testing.T) {
 	tests := []struct {
 		name          string
@@ -1203,3 +1517,155 @@ func TestShouldExcludeContainer(t *testing.T) {
 		})
 	}
 }
+
+func TestAnsiEscapePattern(t *testing.T) {
+	tests := []struct {
+		name     string
+		input    string
+		expected string
+	}{
+		{
+			name:     "no ANSI codes",
+			input:    "Hello, World!",
+			expected: "Hello, World!",
+		},
+		{
+			name:     "simple color code",
+			input:    "\x1b[34mINFO\x1b[0m client mode",
+			expected: "INFO client mode",
+		},
+		{
+			name:     "multiple color codes",
+			input:    "\x1b[31mERROR\x1b[0m: \x1b[33mWarning\x1b[0m message",
+			expected: "ERROR: Warning message",
+		},
+		{
+			name:     "bold and color",
+			input:    "\x1b[1;32mSUCCESS\x1b[0m",
+			expected: "SUCCESS",
+		},
+		{
+			name:     "cursor movement codes",
+			input:    "Line 1\x1b[KLine 2",
+			expected: "Line 1Line 2",
+		},
+		{
+			name:     "256 color code",
+			input:    "\x1b[38;5;196mRed text\x1b[0m",
+			expected: "Red text",
+		},
+		{
+			name:     "RGB/truecolor code",
+			input:    "\x1b[38;2;255;0;0mRed text\x1b[0m",
+			expected: "Red text",
+		},
+		{
+			name:     "mixed content with newlines",
+			input:    "\x1b[34m2024-01-01 12:00:00\x1b[0m INFO Starting\n\x1b[31m2024-01-01 12:00:01\x1b[0m ERROR Failed",
+			expected: "2024-01-01 12:00:00 INFO Starting\n2024-01-01 12:00:01 ERROR Failed",
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			result := ansiEscapePattern.ReplaceAllString(tt.input, "")
+			assert.Equal(t, tt.expected, result)
+		})
+	}
+}
+
+func TestConvertContainerPortsToString(t *testing.T) {
+	type port = struct {
+		PublicPort uint16
+		IP         string
+	}
+	tests := []struct {
+		name     string
+		ports    []port
+		expected string
+	}{
+		{
+			name:     "empty ports",
+			ports:    nil,
+			expected: "",
+		},
+		{
+			name: "single port",
+			ports: []port{
+				{PublicPort: 80, IP: "0.0.0.0"},
+			},
+			expected: "80",
+		},
+		{
+			name: "single port with non-default IP",
+			ports: []port{
+				{PublicPort: 80, IP: "1.2.3.4"},
+			},
+			expected: "1.2.3.4:80",
+		},
+		{
+			name: "ipv6 default ip",
+			ports: []port{
+				{PublicPort: 80, IP: "::"},
+			},
+			expected: "80",
+		},
+		{
+			name: "zero PublicPort is skipped",
+			ports: []port{
+				{PublicPort: 0, IP: "0.0.0.0"},
+				{PublicPort: 80, IP: "0.0.0.0"},
+			},
+			expected: "80",
+		},
+		{
+			name: "ports sorted ascending by PublicPort",
+			ports: []port{
+				{PublicPort: 443, IP: "0.0.0.0"},
+				{PublicPort: 80, IP: "0.0.0.0"},
+				{PublicPort: 8080, IP: "0.0.0.0"},
+			},
+			expected: "80, 443, 8080",
+		},
+		{
+			name: "duplicates are deduplicated",
+			ports: []port{
+				{PublicPort: 80, IP: "0.0.0.0"},
+				{PublicPort: 80, IP: "0.0.0.0"},
+				{PublicPort: 443, IP: "0.0.0.0"},
+			},
+			expected: "80, 443",
+		},
+		{
+			name: "multiple ports with different IPs",
+			ports: []port{
+				{PublicPort: 80, IP: "0.0.0.0"},
+				{PublicPort: 443, IP: "1.2.3.4"},
+			},
+			expected: "80, 1.2.3.4:443",
+		},
+		{
+			name: "ports slice is nilled after call",
+			ports: []port{
+				{PublicPort: 8080, IP: "0.0.0.0"},
+			},
+			expected: "8080",
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			ctr := &container.ApiInfo{}
+			for _, p := range tt.ports {
+				ctr.Ports = append(ctr.Ports, struct {
+					PublicPort uint16
+					IP         string
+				}{PublicPort: p.PublicPort, IP: p.IP})
+			}
+			result := convertContainerPortsToString(ctr)
+			assert.Equal(t, tt.expected, result)
+			// Ports slice must be cleared to prevent bleed-over into the next response
+			assert.Nil(t, ctr.Ports, "ctr.Ports should be nil after formatContainerPorts")
+		})
+	}
+}
--- a/agent/emmc_common.go
+++ b/agent/emmc_common.go
@@ -0,0 +1,95 @@
+package agent
+
+import (
+	"fmt"
+	"strconv"
+	"strings"
+)
+
+func isEmmcBlockName(name string) bool {
+	if !strings.HasPrefix(name, "mmcblk") {
+		return false
+	}
+	suffix := strings.TrimPrefix(name, "mmcblk")
+	if suffix == "" {
+		return false
+	}
+	for _, c := range suffix {
+		if c < '0' || c > '9' {
+			return false
+		}
+	}
+	return true
+}
+
+func parseHexOrDecByte(s string) (uint8, bool) {
+	s = strings.TrimSpace(s)
+	if s == "" {
+		return 0, false
+	}
+	base := 10
+	if strings.HasPrefix(s, "0x") || strings.HasPrefix(s, "0X") {
+		base = 16
+		s = s[2:]
+	}
+	parsed, err := strconv.ParseUint(s, base, 8)
+	if err != nil {
+		return 0, false
+	}
+	return uint8(parsed), true
+}
+
+func parseHexBytePair(s string) (uint8, uint8, bool) {
+	fields := strings.Fields(s)
+	if len(fields) < 2 {
+		return 0, 0, false
+	}
+	a, okA := parseHexOrDecByte(fields[0])
+	b, okB := parseHexOrDecByte(fields[1])
+	if !okA && !okB {
+		return 0, 0, false
+	}
+	return a, b, true
+}
+
+func emmcSmartStatus(preEOL uint8) string {
+	switch preEOL {
+	case 0x01:
+		return "PASSED"
+	case 0x02:
+		return "WARNING"
+	case 0x03:
+		return "FAILED"
+	default:
+		return "UNKNOWN"
+	}
+}
+
+func emmcPreEOLString(preEOL uint8) string {
+	switch preEOL {
+	case 0x01:
+		return "0x01 (normal)"
+	case 0x02:
+		return "0x02 (warning)"
+	case 0x03:
+		return "0x03 (urgent)"
+	default:
+		return fmt.Sprintf("0x%02x", preEOL)
+	}
+}
+
+func emmcLifeTimeString(v uint8) string {
+	// JEDEC eMMC: 0x01..0x0A => 0-100% used in 10% steps, 0x0B => exceeded.
+	switch {
+	case v == 0:
+		return "0x00 (not reported)"
+	case v >= 0x01 && v <= 0x0A:
+		low := int(v-1) * 10
+		high := int(v) * 10
+		return fmt.Sprintf("0x%02x (%d-%d%% used)", v, low, high)
+	case v == 0x0B:
+		return "0x0b (>100% used)"
+	default:
+		return fmt.Sprintf("0x%02x", v)
+	}
+}
--- a/agent/emmc_common_test.go
+++ b/agent/emmc_common_test.go
@@ -0,0 +1,78 @@
+package agent
+
+import "testing"
+
+func TestParseHexOrDecByte(t *testing.T) {
+	tests := []struct {
+		in   string
+		want uint8
+		ok   bool
+	}{
+		{"0x01", 1, true},
+		{"0X0b", 11, true},
+		{"01", 1, true},
+		{" 3 ", 3, true},
+		{"", 0, false},
+		{"0x", 0, false},
+		{"nope", 0, false},
+	}
+
+	for _, tt := range tests {
+		got, ok := parseHexOrDecByte(tt.in)
+		if ok != tt.ok || got != tt.want {
+			t.Fatalf("parseHexOrDecByte(%q) = (%d,%v), want (%d,%v)", tt.in, got, ok, tt.want, tt.ok)
+		}
+	}
+}
+
+func TestParseHexBytePair(t *testing.T) {
+	a, b, ok := parseHexBytePair("0x01 0x02\n")
+	if !ok || a != 1 || b != 2 {
+		t.Fatalf("parseHexBytePair hex = (%d,%d,%v), want (1,2,true)", a, b, ok)
+	}
+
+	a, b, ok = parseHexBytePair("01 02")
+	if !ok || a != 1 || b != 2 {
+		t.Fatalf("parseHexBytePair dec = (%d,%d,%v), want (1,2,true)", a, b, ok)
+	}
+
+	_, _, ok = parseHexBytePair("0x01")
+	if ok {
+		t.Fatalf("parseHexBytePair short input ok=true, want false")
+	}
+}
+
+func TestEmmcSmartStatus(t *testing.T) {
+	if got := emmcSmartStatus(0x01); got != "PASSED" {
+		t.Fatalf("emmcSmartStatus(0x01) = %q, want PASSED", got)
+	}
+	if got := emmcSmartStatus(0x02); got != "WARNING" {
+		t.Fatalf("emmcSmartStatus(0x02) = %q, want WARNING", got)
+	}
+	if got := emmcSmartStatus(0x03); got != "FAILED" {
+		t.Fatalf("emmcSmartStatus(0x03) = %q, want FAILED", got)
+	}
+	if got := emmcSmartStatus(0x00); got != "UNKNOWN" {
+		t.Fatalf("emmcSmartStatus(0x00) = %q, want UNKNOWN", got)
+	}
+}
+
+func TestIsEmmcBlockName(t *testing.T) {
+	cases := []struct {
+		name string
+		ok   bool
+	}{
+		{"mmcblk0", true},
+		{"mmcblk1", true},
+		{"mmcblk10", true},
+		{"mmcblk0p1", false},
+		{"sda", false},
+		{"mmcblk", false},
+		{"mmcblkA", false},
+	}
+	for _, c := range cases {
+		if got := isEmmcBlockName(c.name); got != c.ok {
+			t.Fatalf("isEmmcBlockName(%q) = %v, want %v", c.name, got, c.ok)
+		}
+	}
+}
--- a/agent/emmc_linux.go
+++ b/agent/emmc_linux.go
@@ -0,0 +1,215 @@
+//go:build linux
+
+package agent
+
+import (
+	"os"
+	"path/filepath"
+	"strconv"
+	"strings"
+
+	"github.com/henrygd/beszel/agent/utils"
+	"github.com/henrygd/beszel/internal/entities/smart"
+)
+
+// emmcSysfsRoot is a test hook; production value is "/sys".
+var emmcSysfsRoot = "/sys"
+
+type emmcHealth struct {
+	model    string
+	serial   string
+	revision string
+	capacity uint64
+	preEOL   uint8
+	lifeA    uint8
+	lifeB    uint8
+}
+
+func scanEmmcDevices() []*DeviceInfo {
+	blockDir := filepath.Join(emmcSysfsRoot, "class", "block")
+	entries, err := os.ReadDir(blockDir)
+	if err != nil {
+		return nil
+	}
+
+	devices := make([]*DeviceInfo, 0, 2)
+	for _, ent := range entries {
+		name := ent.Name()
+		if !isEmmcBlockName(name) {
+			continue
+		}
+
+		deviceDir := filepath.Join(blockDir, name, "device")
+		if !hasEmmcHealthFiles(deviceDir) {
+			continue
+		}
+
+		devPath := filepath.Join("/dev", name)
+		devices = append(devices, &DeviceInfo{
+			Name:     devPath,
+			Type:     "emmc",
+			InfoName: devPath + " [eMMC]",
+			Protocol: "MMC",
+		})
+	}
+
+	return devices
+}
+
+func (sm *SmartManager) collectEmmcHealth(deviceInfo *DeviceInfo) (bool, error) {
+	if deviceInfo == nil || deviceInfo.Name == "" {
+		return false, nil
+	}
+
+	base := filepath.Base(deviceInfo.Name)
+	if !isEmmcBlockName(base) && !strings.EqualFold(deviceInfo.Type, "emmc") && !strings.EqualFold(deviceInfo.Type, "mmc") {
+		return false, nil
+	}
+
+	health, ok := readEmmcHealth(base)
+	if !ok {
+		return false, nil
+	}
+
+	// Normalize the device type to keep pruning logic stable across refreshes.
+	deviceInfo.Type = "emmc"
+
+	key := health.serial
+	if key == "" {
+		key = filepath.Join("/dev", base)
+	}
+
+	status := emmcSmartStatus(health.preEOL)
+
+	attrs := []*smart.SmartAttribute{
+		{
+			Name:      "PreEOLInfo",
+			RawValue:  uint64(health.preEOL),
+			RawString: emmcPreEOLString(health.preEOL),
+		},
+		{
+			Name:      "DeviceLifeTimeEstA",
+			RawValue:  uint64(health.lifeA),
+			RawString: emmcLifeTimeString(health.lifeA),
+		},
+		{
+			Name:      "DeviceLifeTimeEstB",
+			RawValue:  uint64(health.lifeB),
+			RawString: emmcLifeTimeString(health.lifeB),
+		},
+	}
+
+	sm.Lock()
+	defer sm.Unlock()
+
+	if _, exists := sm.SmartDataMap[key]; !exists {
+		sm.SmartDataMap[key] = &smart.SmartData{}
+	}
+
+	data := sm.SmartDataMap[key]
+	data.ModelName = health.model
+	data.SerialNumber = health.serial
+	data.FirmwareVersion = health.revision
+	data.Capacity = health.capacity
+	data.Temperature = 0
+	data.SmartStatus = status
+	data.DiskName = filepath.Join("/dev", base)
+	data.DiskType = "emmc"
+	data.Attributes = attrs
+
+	return true, nil
+}
+
+func readEmmcHealth(blockName string) (emmcHealth, bool) {
+	var out emmcHealth
+
+	if !isEmmcBlockName(blockName) {
+		return out, false
+	}
+
+	deviceDir := filepath.Join(emmcSysfsRoot, "class", "block", blockName, "device")
+	preEOL, okPre := readHexByteFile(filepath.Join(deviceDir, "pre_eol_info"))
+
+	// Some kernels expose EXT_CSD lifetime via "life_time" (two bytes), others as
+	// separate files. Support both.
+	lifeA, lifeB, okLife := readLifeTime(deviceDir)
+
+	if !okPre && !okLife {
+		return out, false
+	}
+
+	out.preEOL = preEOL
+	out.lifeA = lifeA
+	out.lifeB = lifeB
+
+	out.model = utils.ReadStringFile(filepath.Join(deviceDir, "name"))
+	out.serial = utils.ReadStringFile(filepath.Join(deviceDir, "serial"))
+	out.revision = utils.ReadStringFile(filepath.Join(deviceDir, "prv"))
+
+	if capBytes, ok := readBlockCapacityBytes(blockName); ok {
+		out.capacity = capBytes
+	}
+
+	return out, true
+}
+
+func readLifeTime(deviceDir string) (uint8, uint8, bool) {
+	if content, ok := utils.ReadStringFileOK(filepath.Join(deviceDir, "life_time")); ok {
+		a, b, ok := parseHexBytePair(content)
+		return a, b, ok
+	}
+
+	a, okA := readHexByteFile(filepath.Join(deviceDir, "device_life_time_est_typ_a"))
+	b, okB := readHexByteFile(filepath.Join(deviceDir, "device_life_time_est_typ_b"))
+	if okA || okB {
+		return a, b, true
+	}
+	return 0, 0, false
+}
+
+func readBlockCapacityBytes(blockName string) (uint64, bool) {
+	sizePath := filepath.Join(emmcSysfsRoot, "class", "block", blockName, "size")
+	lbsPath := filepath.Join(emmcSysfsRoot, "class", "block", blockName, "queue", "logical_block_size")
+
+	sizeStr, ok := utils.ReadStringFileOK(sizePath)
+	if !ok {
+		return 0, false
+	}
+	sectors, err := strconv.ParseUint(sizeStr, 10, 64)
+	if err != nil || sectors == 0 {
+		return 0, false
+	}
+
+	lbsStr, ok := utils.ReadStringFileOK(lbsPath)
+	logicalBlockSize := uint64(512)
+	if ok {
+		if parsed, err := strconv.ParseUint(lbsStr, 10, 64); err == nil && parsed > 0 {
+			logicalBlockSize = parsed
+		}
+	}
+
+	return sectors * logicalBlockSize, true
+}
+
+func readHexByteFile(path string) (uint8, bool) {
+	content, ok := utils.ReadStringFileOK(path)
+	if !ok {
+		return 0, false
+	}
+	b, ok := parseHexOrDecByte(content)
+	return b, ok
+}
+
+func hasEmmcHealthFiles(deviceDir string) bool {
+	entries, err := os.ReadDir(deviceDir)
+	if err != nil {
+		return false
+	}
+	for _, ent := range entries {
+		switch ent.Name() {
+		case "pre_eol_info", "life_time", "device_life_time_est_typ_a", "device_life_time_est_typ_b":
+			return true
+		}
+	}
+	return false
+}
--- a/agent/emmc_linux_test.go
+++ b/agent/emmc_linux_test.go
@@ -0,0 +1,80 @@
+//go:build linux
+
+package agent
+
+import (
+	"os"
+	"path/filepath"
+	"testing"
+
+	"github.com/henrygd/beszel/internal/entities/smart"
+)
+
+func TestEmmcMockSysfsScanAndCollect(t *testing.T) {
+	tmp := t.TempDir()
+	prev := emmcSysfsRoot
+	emmcSysfsRoot = tmp
+	t.Cleanup(func() { emmcSysfsRoot = prev })
+
+	// Fake: /sys/class/block/mmcblk0
+	mmcDeviceDir := filepath.Join(tmp, "class", "block", "mmcblk0", "device")
+	mmcQueueDir := filepath.Join(tmp, "class", "block", "mmcblk0", "queue")
+	if err := os.MkdirAll(mmcDeviceDir, 0o755); err != nil {
+		t.Fatal(err)
+	}
+	if err := os.MkdirAll(mmcQueueDir, 0o755); err != nil {
+		t.Fatal(err)
+	}
+
+	write := func(path, content string) {
+		t.Helper()
+		if err := os.WriteFile(path, []byte(content), 0o644); err != nil {
+			t.Fatal(err)
+		}
+	}
+
+	write(filepath.Join(mmcDeviceDir, "pre_eol_info"), "0x02\n")
+	write(filepath.Join(mmcDeviceDir, "life_time"), "0x04 0x05\n")
+	write(filepath.Join(mmcDeviceDir, "name"), "H26M52103FMR\n")
+	write(filepath.Join(mmcDeviceDir, "serial"), "01234567\n")
+	write(filepath.Join(mmcDeviceDir, "prv"), "0x08\n")
+	write(filepath.Join(mmcQueueDir, "logical_block_size"), "512\n")
+	write(filepath.Join(tmp, "class", "block", "mmcblk0", "size"), "1024\n") // sectors
+
+	devs := scanEmmcDevices()
+	if len(devs) != 1 {
+		t.Fatalf("scanEmmcDevices() = %d devices, want 1", len(devs))
+	}
+	if devs[0].Name != "/dev/mmcblk0" || devs[0].Type != "emmc" {
+		t.Fatalf("scanEmmcDevices()[0] = %+v, want Name=/dev/mmcblk0 Type=emmc", devs[0])
+	}
+
+	sm := &SmartManager{SmartDataMap: map[string]*smart.SmartData{}}
+	ok, err := sm.collectEmmcHealth(devs[0])
+	if err != nil || !ok {
+		t.Fatalf("collectEmmcHealth() = (ok=%v, err=%v), want (true,nil)", ok, err)
+	}
+	if len(sm.SmartDataMap) != 1 {
+		t.Fatalf("SmartDataMap len=%d, want 1", len(sm.SmartDataMap))
+	}
+	var got *smart.SmartData
+	for _, v := range sm.SmartDataMap {
+		got = v
+		break
+	}
+	if got == nil {
+		t.Fatalf("SmartDataMap value nil")
+	}
+	if got.DiskType != "emmc" || got.DiskName != "/dev/mmcblk0" {
+		t.Fatalf("disk fields = (type=%q name=%q), want (emmc,/dev/mmcblk0)", got.DiskType, got.DiskName)
+	}
+	if got.SmartStatus != "WARNING" {
+		t.Fatalf("SmartStatus=%q, want WARNING", got.SmartStatus)
+	}
+	if got.SerialNumber != "01234567" || got.ModelName == "" || got.Capacity == 0 {
+		t.Fatalf("identity fields = (model=%q serial=%q cap=%d), want non-empty model, serial 01234567, cap>0", got.ModelName, got.SerialNumber, got.Capacity)
+	}
+	if len(got.Attributes) < 3 {
+		t.Fatalf("attributes len=%d, want >= 3", len(got.Attributes))
+	}
+}
--- a/agent/emmc_stub.go
+++ b/agent/emmc_stub.go
@@ -0,0 +1,14 @@
+//go:build !linux
+
+package agent
+
+// Non-Linux builds: eMMC health via sysfs is not available.
+
+func scanEmmcDevices() []*DeviceInfo {
+	return nil
+}
+
+func (sm *SmartManager) collectEmmcHealth(deviceInfo *DeviceInfo) (bool, error) {
+	return false, nil
+}
+
--- a/agent/fingerprint.go
+++ b/agent/fingerprint.go
@@ -0,0 +1,87 @@
+package agent
+
+import (
+	"crypto/sha256"
+	"encoding/hex"
+	"errors"
+	"os"
+	"path/filepath"
+	"strings"
+
+	"github.com/shirou/gopsutil/v4/cpu"
+	"github.com/shirou/gopsutil/v4/host"
+)
+
+const fingerprintFileName = "fingerprint"
+
+// knownBadUUID is a commonly known "product_uuid" that is not unique across systems.
+const knownBadUUID = "03000200-0400-0500-0006-000700080009"
+
+// GetFingerprint returns the agent fingerprint. It first tries to read a saved
+// fingerprint from the data directory. If not found (or dataDir is empty), it
+// generates one from system properties. The hostname and cpuModel parameters are
+// used as fallback material if host.HostID() fails. If either is empty, they
+// are fetched from the system automatically.
+//
+// If a new fingerprint is generated and a dataDir is provided, it is saved.
+func GetFingerprint(dataDir, hostname, cpuModel string) string {
+	if dataDir != "" {
+		if fp, err := readFingerprint(dataDir); err == nil {
+			return fp
+		}
+	}
+	fp := generateFingerprint(hostname, cpuModel)
+	if dataDir != "" {
+		_ = SaveFingerprint(dataDir, fp)
+	}
+	return fp
+}
+
+// generateFingerprint creates a fingerprint from system properties.
+// It tries host.HostID() first, falling back to hostname + cpuModel.
+// If hostname or cpuModel are empty, they are fetched from the system.
+func generateFingerprint(hostname, cpuModel string) string {
+	fingerprint, err := host.HostID()
+	if err != nil || fingerprint == "" || fingerprint == knownBadUUID {
+		if hostname == "" {
+			hostname, _ = os.Hostname()
+		}
+		if cpuModel == "" {
+			if info, err := cpu.Info(); err == nil && len(info) > 0 {
+				cpuModel = info[0].ModelName
+			}
+		}
+		fingerprint = hostname + cpuModel
+	}
+
+	sum := sha256.Sum256([]byte(fingerprint))
+	return hex.EncodeToString(sum[:24])
+}
+
+// readFingerprint reads the saved fingerprint from the data directory.
+func readFingerprint(dataDir string) (string, error) {
+	fp, err := os.ReadFile(filepath.Join(dataDir, fingerprintFileName))
+	if err != nil {
+		return "", err
+	}
+	s := strings.TrimSpace(string(fp))
+	if s == "" {
+		return "", errors.New("fingerprint file is empty")
+	}
+	return s, nil
+}
+
+// SaveFingerprint writes the fingerprint to the data directory.
+func SaveFingerprint(dataDir, fingerprint string) error {
+	return os.WriteFile(filepath.Join(dataDir, fingerprintFileName), []byte(fingerprint), 0o644)
+}
+
+// DeleteFingerprint removes the saved fingerprint file from the data directory.
+// Returns nil if the file does not exist (idempotent).
+func DeleteFingerprint(dataDir string) error {
+	err := os.Remove(filepath.Join(dataDir, fingerprintFileName))
+	if errors.Is(err, os.ErrNotExist) {
+		return nil
+	}
+	return err
+}
--- a/agent/fingerprint_test.go
+++ b/agent/fingerprint_test.go
@@ -0,0 +1,102 @@
+//go:build testing
+
+package agent
+
+import (
+	"os"
+	"path/filepath"
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+)
+
+func TestGetFingerprint(t *testing.T) {
+	t.Run("reads existing fingerprint from file", func(t *testing.T) {
+		dir := t.TempDir()
+		expected := "abc123def456"
+		err := os.WriteFile(filepath.Join(dir, fingerprintFileName), []byte(expected), 0644)
+		require.NoError(t, err)
+
+		fp := GetFingerprint(dir, "", "")
+		assert.Equal(t, expected, fp)
+	})
+
+	t.Run("trims whitespace from file", func(t *testing.T) {
+		dir := t.TempDir()
+		err := os.WriteFile(filepath.Join(dir, fingerprintFileName), []byte("  abc123  \n"), 0644)
+		require.NoError(t, err)
+
+		fp := GetFingerprint(dir, "", "")
+		assert.Equal(t, "abc123", fp)
+	})
+
+	t.Run("generates fingerprint when file does not exist", func(t *testing.T) {
+		dir := t.TempDir()
+		fp := GetFingerprint(dir, "", "")
+		assert.NotEmpty(t, fp)
+	})
+
+	t.Run("generates fingerprint when dataDir is empty", func(t *testing.T) {
+		fp := GetFingerprint("", "", "")
+		assert.NotEmpty(t, fp)
+	})
+
+	t.Run("generates consistent fingerprint for same inputs", func(t *testing.T) {
+		fp1 := GetFingerprint("", "myhost", "mycpu")
+		fp2 := GetFingerprint("", "myhost", "mycpu")
+		assert.Equal(t, fp1, fp2)
+	})
+
+	t.Run("prefers saved fingerprint over generated", func(t *testing.T) {
+		dir := t.TempDir()
+		require.NoError(t, SaveFingerprint(dir, "saved-fp"))
+
+		fp := GetFingerprint(dir, "anyhost", "anycpu")
+		assert.Equal(t, "saved-fp", fp)
+	})
+}
+
+func TestSaveFingerprint(t *testing.T) {
+	t.Run("saves fingerprint to file", func(t *testing.T) {
+		dir := t.TempDir()
+		err := SaveFingerprint(dir, "abc123")
+		require.NoError(t, err)
+
+		content, err := os.ReadFile(filepath.Join(dir, fingerprintFileName))
+		require.NoError(t, err)
+		assert.Equal(t, "abc123", string(content))
+	})
+
+	t.Run("overwrites existing fingerprint", func(t *testing.T) {
+		dir := t.TempDir()
+		require.NoError(t, SaveFingerprint(dir, "old"))
+		require.NoError(t, SaveFingerprint(dir, "new"))
+
+		content, err := os.ReadFile(filepath.Join(dir, fingerprintFileName))
+		require.NoError(t, err)
+		assert.Equal(t, "new", string(content))
+	})
+}
+
+func TestDeleteFingerprint(t *testing.T) {
+	t.Run("deletes existing fingerprint", func(t *testing.T) {
+		dir := t.TempDir()
+		fp := filepath.Join(dir, fingerprintFileName)
+		err := os.WriteFile(fp, []byte("abc123"), 0644)
+		require.NoError(t, err)
+
+		err = DeleteFingerprint(dir)
+		require.NoError(t, err)
+
+		// Verify file is gone
+		_, err = os.Stat(fp)
+		assert.True(t, os.IsNotExist(err))
+	})
+
+	t.Run("no error when file does not exist", func(t *testing.T) {
+		dir := t.TempDir()
+		err := DeleteFingerprint(dir)
+		assert.NoError(t, err)
+	})
+}
--- a/agent/gpu.go
+++ b/agent/gpu.go
@@ -5,29 +5,30 @@ import (
 	"bytes"
 	"encoding/json"
 	"fmt"
+	"log/slog"
 	"maps"
 	"os/exec"
 	"regexp"
+	"runtime"
 	"strconv"
 	"strings"
 	"sync"
 	"time"

+	"github.com/henrygd/beszel/agent/utils"
 	"github.com/henrygd/beszel/internal/entities/system"
-
-	"golang.org/x/exp/slog"
 )

 const (
 	// Commands
-	nvidiaSmiCmd  string = "nvidia-smi"
-	rocmSmiCmd    string = "rocm-smi"
-	tegraStatsCmd string = "tegrastats"
+	nvidiaSmiCmd    string = "nvidia-smi"
+	rocmSmiCmd      string = "rocm-smi"
+	tegraStatsCmd   string = "tegrastats"
+	nvtopCmd        string = "nvtop"
+	powermetricsCmd string = "powermetrics"
+	macmonCmd       string = "macmon"
+	noGPUFoundMsg   string = "no GPU found - see https://beszel.dev/guide/gpu"

-	// Polling intervals
-	nvidiaSmiInterval  string        = "4"    // in seconds
-	tegraStatsInterval string        = "3700" // in milliseconds
-	rocmSmiInterval    time.Duration = 4300 * time.Millisecond
 	// Command retry and timeout constants
 	retryWaitTime     time.Duration = 5 * time.Second
 	maxFailureRetries int           = 5
@@ -40,11 +41,7 @@ const (
 // GPUManager manages data collection for GPUs (either Nvidia or AMD)
 type GPUManager struct {
 	sync.Mutex
-	nvidiaSmi     bool
-	rocmSmi       bool
-	tegrastats    bool
-	intelGpuStats bool
-	GpuDataMap    map[string]*system.GPUData
+	GpuDataMap map[string]*system.GPUData
 	// lastAvgData stores the last calculated averages for each GPU
 	// Used when a collection happens before new data arrives (Count == 0)
 	lastAvgData map[string]system.GPUData
@@ -85,6 +82,58 @@ type gpuCollector struct {

 var errNoValidData = fmt.Errorf("no valid GPU data found") // Error for missing data

+// collectorSource identifies a selectable GPU collector in GPU_COLLECTOR.
+type collectorSource string
+
+const (
+	collectorSourceNVTop        collectorSource = collectorSource(nvtopCmd)
+	collectorSourceNVML         collectorSource = "nvml"
+	collectorSourceNvidiaSMI    collectorSource = collectorSource(nvidiaSmiCmd)
+	collectorSourceIntelGpuTop  collectorSource = collectorSource(intelGpuStatsCmd)
+	collectorSourceAmdSysfs     collectorSource = "amd_sysfs"
+	collectorSourceRocmSMI      collectorSource = collectorSource(rocmSmiCmd)
+	collectorSourceMacmon       collectorSource = collectorSource(macmonCmd)
+	collectorSourcePowermetrics collectorSource = collectorSource(powermetricsCmd)
+	collectorGroupNvidia        string          = "nvidia"
+	collectorGroupIntel         string          = "intel"
+	collectorGroupAmd           string          = "amd"
+	collectorGroupApple         string          = "apple"
+)
+
+func isValidCollectorSource(source collectorSource) bool {
+	switch source {
+	case collectorSourceNVTop,
+		collectorSourceNVML,
+		collectorSourceNvidiaSMI,
+		collectorSourceIntelGpuTop,
+		collectorSourceAmdSysfs,
+		collectorSourceRocmSMI,
+		collectorSourceMacmon,
+		collectorSourcePowermetrics:
+		return true
+	}
+	return false
+}
+
+// gpuCapabilities describes detected GPU tooling and sysfs support on the host.
+type gpuCapabilities struct {
+	hasNvidiaSmi    bool
+	hasRocmSmi      bool
+	hasAmdSysfs     bool
+	hasTegrastats   bool
+	hasIntelGpuTop  bool
+	hasNvtop        bool
+	hasMacmon       bool
+	hasPowermetrics bool
+}
+
+type collectorDefinition struct {
+	group              string
+	available          bool
+	start              func(onFailure func()) bool
+	deprecationWarning string
+}
+
 // starts and manages the ongoing collection of GPU data for the specified GPU management utility
 func (c *gpuCollector) start() {
 	for {
@@ -136,10 +185,10 @@ func (gm *GPUManager) getJetsonParser() func(output []byte) bool {
 	// use closure to avoid recompiling the regex
 	ramPattern := regexp.MustCompile(`RAM (\d+)/(\d+)MB`)
 	gr3dPattern := regexp.MustCompile(`GR3D_FREQ (\d+)%`)
-	tempPattern := regexp.MustCompile(`tj@(\d+\.?\d*)C`)
+	tempPattern := regexp.MustCompile(`(?:tj|GPU)@(\d+\.?\d*)C`)
 	// Orin Nano / NX do not have GPU specific power monitor
 	// TODO: Maybe use VDD_IN for Nano / NX and add a total system power chart
-	powerPattern := regexp.MustCompile(`(GPU_SOC|CPU_GPU_CV) (\d+)mW`)
+	powerPattern := regexp.MustCompile(`(GPU_SOC|CPU_GPU_CV)\s+(\d+)mW|VDD_SYS_GPU\s+(\d+)/\d+`)

 	// jetson devices have only one gpu so we'll just initialize here
 	gpuData := &system.GPUData{Name: "GPU"}
@@ -168,7 +217,13 @@ func (gm *GPUManager) getJetsonParser() func(output []byte) bool {
 		// Parse power usage
 		powerMatches := powerPattern.FindSubmatch(output)
 		if powerMatches != nil {
-			power, _ := strconv.ParseFloat(string(powerMatches[2]), 64)
+			// powerMatches[2] is the "(GPU_SOC|CPU_GPU_CV) <N>mW" capture
+			// powerMatches[3] is the "VDD_SYS_GPU <N>/<N>" capture
+			powerStr := string(powerMatches[2])
+			if powerStr == "" {
+				powerStr = string(powerMatches[3])
+			}
+			power, _ := strconv.ParseFloat(powerStr, 64)
 			gpuData.Power += power / milliwattsInAWatt
 		}
 		gpuData.Count++
@@ -231,13 +286,14 @@ func (gm *GPUManager) parseAmdData(output []byte) bool {
 		totalMemory, _ := strconv.ParseFloat(v.MemoryTotal, 64)
 		usage, _ := strconv.ParseFloat(v.Usage, 64)

-		if _, ok := gm.GpuDataMap[v.ID]; !ok {
-			gm.GpuDataMap[v.ID] = &system.GPUData{Name: v.Name}
+		id := v.ID
+		if _, ok := gm.GpuDataMap[id]; !ok {
+			gm.GpuDataMap[id] = &system.GPUData{Name: v.Name}
 		}
-		gpu := gm.GpuDataMap[v.ID]
+		gpu := gm.GpuDataMap[id]
 		gpu.Temperature, _ = strconv.ParseFloat(v.Temperature, 64)
-		gpu.MemoryUsed = bytesToMegabytes(memoryUsage)
-		gpu.MemoryTotal = bytesToMegabytes(totalMemory)
+		gpu.MemoryUsed = utils.BytesToMegabytes(memoryUsage)
+		gpu.MemoryTotal = utils.BytesToMegabytes(totalMemory)
 		gpu.Usage += usage
 		gpu.Power += power
 		gpu.Count++
@@ -297,8 +353,13 @@ func (gm *GPUManager) calculateGPUAverage(id string, gpu *system.GPUData, cacheK
 	currentCount := uint32(gpu.Count)
 	deltaCount := gm.calculateDeltaCount(currentCount, lastSnapshot)

-	// If no new data arrived, use last known average
+	// If no new data arrived
 	if deltaCount == 0 {
+		// If GPU appears suspended (instantaneous values are 0), return zero values
+		// Otherwise return last known average for temporary collection gaps
+		if gpu.Temperature == 0 && gpu.MemoryUsed == 0 {
+			return system.GPUData{Name: gpu.Name}
+		}
 		return gm.lastAvgData[id] // zero value if not found
 	}

@@ -306,16 +367,16 @@ func (gm *GPUManager) calculateGPUAverage(id string, gpu *system.GPUData, cacheK
 	gpuAvg := *gpu
 	deltaUsage, deltaPower, deltaPowerPkg := gm.calculateDeltas(gpu, lastSnapshot)

-	gpuAvg.Power = twoDecimals(deltaPower / float64(deltaCount))
+	gpuAvg.Power = utils.TwoDecimals(deltaPower / float64(deltaCount))

 	if gpu.Engines != nil {
 		// make fresh map for averaged engine metrics to avoid mutating
 		// the accumulator map stored in gm.GpuDataMap
 		gpuAvg.Engines = make(map[string]float64, len(gpu.Engines))
 		gpuAvg.Usage = gm.calculateIntelGPUUsage(&gpuAvg, gpu, lastSnapshot, deltaCount)
-		gpuAvg.PowerPkg = twoDecimals(deltaPowerPkg / float64(deltaCount))
+		gpuAvg.PowerPkg = utils.TwoDecimals(deltaPowerPkg / float64(deltaCount))
 	} else {
-		gpuAvg.Usage = twoDecimals(deltaUsage / float64(deltaCount))
+		gpuAvg.Usage = utils.TwoDecimals(deltaUsage / float64(deltaCount))
 	}

 	gm.lastAvgData[id] = gpuAvg
@@ -350,17 +411,17 @@ func (gm *GPUManager) calculateIntelGPUUsage(gpuAvg, gpu *system.GPUData, lastSn
 		} else {
 			deltaEngine = engine
 		}
-		gpuAvg.Engines[name] = twoDecimals(deltaEngine / float64(deltaCount))
+		gpuAvg.Engines[name] = utils.TwoDecimals(deltaEngine / float64(deltaCount))
 		maxEngineUsage = max(maxEngineUsage, deltaEngine/float64(deltaCount))
 	}
-	return twoDecimals(maxEngineUsage)
+	return utils.TwoDecimals(maxEngineUsage)
 }

 // updateInstantaneousValues updates values that should reflect current state, not averages
 func (gm *GPUManager) updateInstantaneousValues(gpuAvg *system.GPUData, gpu *system.GPUData) {
-	gpuAvg.Temperature = twoDecimals(gpu.Temperature)
-	gpuAvg.MemoryUsed = twoDecimals(gpu.MemoryUsed)
-	gpuAvg.MemoryTotal = twoDecimals(gpu.MemoryTotal)
+	gpuAvg.Temperature = utils.TwoDecimals(gpu.Temperature)
+	gpuAvg.MemoryUsed = utils.TwoDecimals(gpu.MemoryUsed)
+	gpuAvg.MemoryTotal = utils.TwoDecimals(gpu.MemoryTotal)
 }

 // storeSnapshot saves the current GPU state for this cache key
@@ -378,105 +439,324 @@ func (gm *GPUManager) storeSnapshot(id string, gpu *system.GPUData, cacheKey uin
 	gm.lastSnapshots[cacheKey][id] = snapshot
 }

-// detectGPUs checks for the presence of GPU management tools (nvidia-smi, rocm-smi, tegrastats)
-// in the system path. It sets the corresponding flags in the GPUManager struct if any of these
-// tools are found. If none of the tools are found, it returns an error indicating that no GPU
-// management tools are available.
-func (gm *GPUManager) detectGPUs() error {
+// discoverGpuCapabilities checks for available GPU tooling and sysfs support.
+// It only reports capability presence and does not apply policy decisions.
+func (gm *GPUManager) discoverGpuCapabilities() gpuCapabilities {
+	caps := gpuCapabilities{
+		hasAmdSysfs: gm.hasAmdSysfs(),
+	}
 	if _, err := exec.LookPath(nvidiaSmiCmd); err == nil {
-		gm.nvidiaSmi = true
+		caps.hasNvidiaSmi = true
 	}
 	if _, err := exec.LookPath(rocmSmiCmd); err == nil {
-		gm.rocmSmi = true
+		caps.hasRocmSmi = true
 	}
 	if _, err := exec.LookPath(tegraStatsCmd); err == nil {
-		gm.tegrastats = true
-		gm.nvidiaSmi = false
+		caps.hasTegrastats = true
 	}
 	if _, err := exec.LookPath(intelGpuStatsCmd); err == nil {
-		gm.intelGpuStats = true
+		caps.hasIntelGpuTop = true
 	}
-	if gm.nvidiaSmi || gm.rocmSmi || gm.tegrastats || gm.intelGpuStats {
-		return nil
+	if _, err := exec.LookPath(nvtopCmd); err == nil {
+		caps.hasNvtop = true
 	}
-	return fmt.Errorf("no GPU found - install nvidia-smi, rocm-smi, tegrastats, or intel_gpu_top")
+	if runtime.GOOS == "darwin" {
+		if _, err := exec.LookPath(macmonCmd); err == nil {
+			caps.hasMacmon = true
+		}
+		if _, err := exec.LookPath(powermetricsCmd); err == nil {
+			caps.hasPowermetrics = true
+		}
+	}
+	return caps
 }

-// startCollector starts the appropriate GPU data collector based on the command
-func (gm *GPUManager) startCollector(command string) {
-	collector := gpuCollector{
-		name:    command,
-		bufSize: 10 * 1024,
-	}
-	switch command {
-	case intelGpuStatsCmd:
-		go func() {
-			failures := 0
-			for {
-				if err := gm.collectIntelStats(); err != nil {
-					failures++
-					if failures > maxFailureRetries {
-						break
-					}
-					slog.Warn("Error collecting Intel GPU data; see https://beszel.dev/guide/gpu", "err", err)
-					time.Sleep(retryWaitTime)
-					continue
+func hasAnyGpuCollector(caps gpuCapabilities) bool {
+	return caps.hasNvidiaSmi || caps.hasRocmSmi || caps.hasAmdSysfs || caps.hasTegrastats || caps.hasIntelGpuTop || caps.hasNvtop || caps.hasMacmon || caps.hasPowermetrics
+}
+
+func (gm *GPUManager) startIntelCollector() {
+	go func() {
+		failures := 0
+		for {
+			if err := gm.collectIntelStats(); err != nil {
+				failures++
+				if failures > maxFailureRetries {
+					break
 				}
+				slog.Warn("Error collecting Intel GPU data; see https://beszel.dev/guide/gpu", "err", err)
+				time.Sleep(retryWaitTime)
+				continue
 			}
-		}()
-	case nvidiaSmiCmd:
-		collector.cmdArgs = []string{
-			"-l", nvidiaSmiInterval,
+		}
+	}()
+}
+
+func (gm *GPUManager) startNvidiaSmiCollector(intervalSeconds string) {
+	collector := gpuCollector{
+		name:    nvidiaSmiCmd,
+		bufSize: 10 * 1024,
+		cmdArgs: []string{
+			"-l", intervalSeconds,
 			"--query-gpu=index,name,temperature.gpu,memory.used,memory.total,utilization.gpu,power.draw",
 			"--format=csv,noheader,nounits",
-		}
-		collector.parse = gm.parseNvidiaData
-		go collector.start()
-	case tegraStatsCmd:
-		collector.cmdArgs = []string{"--interval", tegraStatsInterval}
-		collector.parse = gm.getJetsonParser()
-		go collector.start()
-	case rocmSmiCmd:
-		collector.cmdArgs = []string{"--showid", "--showtemp", "--showuse", "--showpower", "--showproductname", "--showmeminfo", "vram", "--json"}
-		collector.parse = gm.parseAmdData
-		go func() {
-			failures := 0
-			for {
-				if err := collector.collect(); err != nil {
-					failures++
-					if failures > maxFailureRetries {
-						break
-					}
-					slog.Warn("Error collecting AMD GPU data", "err", err)
-				}
-				time.Sleep(rocmSmiInterval)
-			}
-		}()
+		},
+		parse: gm.parseNvidiaData,
 	}
+	go collector.start()
+}
+
+func (gm *GPUManager) startTegraStatsCollector(intervalMilliseconds string) {
+	collector := gpuCollector{
+		name:    tegraStatsCmd,
+		bufSize: 10 * 1024,
+		cmdArgs: []string{"--interval", intervalMilliseconds},
+		parse:   gm.getJetsonParser(),
+	}
+	go collector.start()
+}
+
+func (gm *GPUManager) startRocmSmiCollector(pollInterval time.Duration) {
+	collector := gpuCollector{
+		name:    rocmSmiCmd,
+		bufSize: 10 * 1024,
+		cmdArgs: []string{"--showid", "--showtemp", "--showuse", "--showpower", "--showproductname", "--showmeminfo", "vram", "--json"},
+		parse:   gm.parseAmdData,
+	}
+	go func() {
+		failures := 0
+		for {
+			if err := collector.collect(); err != nil {
+				failures++
+				if failures > maxFailureRetries {
+					break
+				}
+				slog.Warn("Error collecting AMD GPU data via rocm-smi", "err", err)
+			}
+			time.Sleep(pollInterval)
+		}
+	}()
+}
+
+func (gm *GPUManager) collectorDefinitions(caps gpuCapabilities) map[collectorSource]collectorDefinition {
+	return map[collectorSource]collectorDefinition{
+		collectorSourceNVML: {
+			group:     collectorGroupNvidia,
+			available: caps.hasNvidiaSmi,
+			start: func(_ func()) bool {
+				return gm.startNvmlCollector()
+			},
+		},
+		collectorSourceNvidiaSMI: {
+			group:     collectorGroupNvidia,
+			available: caps.hasNvidiaSmi,
+			start: func(_ func()) bool {
+				gm.startNvidiaSmiCollector("4") // seconds
+				return true
+			},
+		},
+		collectorSourceIntelGpuTop: {
+			group:     collectorGroupIntel,
+			available: caps.hasIntelGpuTop,
+			start: func(_ func()) bool {
+				gm.startIntelCollector()
+				return true
+			},
+		},
+		collectorSourceAmdSysfs: {
+			group:     collectorGroupAmd,
+			available: caps.hasAmdSysfs,
+			start: func(_ func()) bool {
+				return gm.startAmdSysfsCollector()
+			},
+		},
+		collectorSourceRocmSMI: {
+			group:              collectorGroupAmd,
+			available:          caps.hasRocmSmi,
+			deprecationWarning: "rocm-smi is deprecated and may be removed in a future release",
+			start: func(_ func()) bool {
+				gm.startRocmSmiCollector(4300 * time.Millisecond)
+				return true
+			},
+		},
+		collectorSourceNVTop: {
+			available: caps.hasNvtop,
+			start: func(onFailure func()) bool {
+				gm.startNvtopCollector("30", onFailure) // tens of milliseconds
+				return true
+			},
+		},
+		collectorSourceMacmon: {
+			group:     collectorGroupApple,
+			available: caps.hasMacmon,
+			start: func(_ func()) bool {
+				gm.startMacmonCollector()
+				return true
+			},
+		},
+		collectorSourcePowermetrics: {
+			group:     collectorGroupApple,
+			available: caps.hasPowermetrics,
+			start: func(_ func()) bool {
+				gm.startPowermetricsCollector()
+				return true
+			},
+		},
+	}
+}
+
+// parseCollectorPriority parses GPU_COLLECTOR and returns valid ordered entries.
+func parseCollectorPriority(value string) []collectorSource {
+	parts := strings.Split(value, ",")
+	priorities := make([]collectorSource, 0, len(parts))
+	for _, raw := range parts {
+		name := collectorSource(strings.TrimSpace(strings.ToLower(raw)))
+		if !isValidCollectorSource(name) {
+			if name != "" {
+				slog.Warn("Ignoring unknown GPU collector", "collector", name)
+			}
+			continue
+		}
+		priorities = append(priorities, name)
+	}
+	return priorities
+}
+
+// startNvmlCollector initializes NVML and starts its polling loop.
+func (gm *GPUManager) startNvmlCollector() bool {
+	collector := &nvmlCollector{gm: gm}
+	if err := collector.init(); err != nil {
+		slog.Warn("Failed to initialize NVML", "err", err)
+		return false
+	}
+	go collector.start()
+	return true
+}
+
+// startAmdSysfsCollector starts AMD GPU collection via sysfs.
+func (gm *GPUManager) startAmdSysfsCollector() bool {
+	go func() {
+		if err := gm.collectAmdStats(); err != nil {
+			slog.Warn("Error collecting AMD GPU data via sysfs", "err", err)
+		}
+	}()
+	return true
+}
+
+// startCollectorsByPriority starts collectors in order with one source per vendor group.
+func (gm *GPUManager) startCollectorsByPriority(priorities []collectorSource, caps gpuCapabilities) int {
+	definitions := gm.collectorDefinitions(caps)
+	selectedGroups := make(map[string]bool, 3)
+	started := 0
+	for i, source := range priorities {
+		definition, ok := definitions[source]
+		if !ok || !definition.available {
+			continue
+		}
+		// nvtop is not a vendor-specific collector, so should only be used if no other collectors are selected or it is first in GPU_COLLECTOR.
+		if source == collectorSourceNVTop {
+			if len(selectedGroups) > 0 {
+				slog.Warn("Skipping nvtop because other collectors are selected")
+				continue
+			}
+			// if nvtop fails, fall back to remaining collectors.
+			remaining := append([]collectorSource(nil), priorities[i+1:]...)
+			if definition.start(func() {
+				gm.startCollectorsByPriority(remaining, caps)
+			}) {
+				started++
+				return started
+			}
+		}
+		group := definition.group
+		if group == "" || selectedGroups[group] {
+			continue
+		}
+		if definition.deprecationWarning != "" {
+			slog.Warn(definition.deprecationWarning)
+		}
+		if definition.start(nil) {
+			selectedGroups[group] = true
+			started++
+		}
+	}
+	return started
+}
+
+// resolveLegacyCollectorPriority builds the default collector order when GPU_COLLECTOR is unset.
+func (gm *GPUManager) resolveLegacyCollectorPriority(caps gpuCapabilities) []collectorSource {
+	priorities := make([]collectorSource, 0, 4)
+
+	if caps.hasNvidiaSmi && !caps.hasTegrastats {
+		if nvml, _ := utils.GetEnv("NVML"); nvml == "true" {
+			priorities = append(priorities, collectorSourceNVML, collectorSourceNvidiaSMI)
+		} else {
+			priorities = append(priorities, collectorSourceNvidiaSMI)
+		}
+	}
+
+	if caps.hasRocmSmi {
+		if val, _ := utils.GetEnv("AMD_SYSFS"); val == "true" {
+			priorities = append(priorities, collectorSourceAmdSysfs)
+		} else {
+			priorities = append(priorities, collectorSourceRocmSMI)
+		}
+	} else if caps.hasAmdSysfs {
+		priorities = append(priorities, collectorSourceAmdSysfs)
+	}
+
+	if caps.hasIntelGpuTop {
+		priorities = append(priorities, collectorSourceIntelGpuTop)
+	}
+
+	// Apple collectors are currently opt-in only for testing.
+	// Enable them with GPU_COLLECTOR=macmon or GPU_COLLECTOR=powermetrics.
+	// TODO: uncomment below when Apple collectors are confirmed to be working.
+	//
+	// Prefer macmon on macOS (no sudo). Fall back to powermetrics if present.
+	// if caps.hasMacmon {
+	// 	priorities = append(priorities, collectorSourceMacmon)
+	// } else if caps.hasPowermetrics {
+	// 	priorities = append(priorities, collectorSourcePowermetrics)
+	// }
+
+	// Keep nvtop as a last resort only when no vendor collector exists.
+	if len(priorities) == 0 && caps.hasNvtop {
+		priorities = append(priorities, collectorSourceNVTop)
+	}
+	return priorities
 }

 // NewGPUManager creates and initializes a new GPUManager
 func NewGPUManager() (*GPUManager, error) {
-	if skipGPU, _ := GetEnv("SKIP_GPU"); skipGPU == "true" {
+	if skipGPU, _ := utils.GetEnv("SKIP_GPU"); skipGPU == "true" {
 		return nil, nil
 	}
 	var gm GPUManager
-	if err := gm.detectGPUs(); err != nil {
-		return nil, err
+	caps := gm.discoverGpuCapabilities()
+	if !hasAnyGpuCollector(caps) {
+		return nil, fmt.Errorf(noGPUFoundMsg)
 	}
 	gm.GpuDataMap = make(map[string]*system.GPUData)

-	if gm.nvidiaSmi {
-		gm.startCollector(nvidiaSmiCmd)
+	// Jetson devices should always use tegrastats (ignore GPU_COLLECTOR).
+	if caps.hasTegrastats {
+		gm.startTegraStatsCollector("3700")
+		return &gm, nil
 	}
-	if gm.rocmSmi {
-		gm.startCollector(rocmSmiCmd)
+
+	// if GPU_COLLECTOR is set, start user-defined collectors.
+	if collectorConfig, ok := utils.GetEnv("GPU_COLLECTOR"); ok && strings.TrimSpace(collectorConfig) != "" {
+		priorities := parseCollectorPriority(collectorConfig)
+		if gm.startCollectorsByPriority(priorities, caps) == 0 {
+			return nil, fmt.Errorf("no configured GPU collectors are available")
+		}
+		return &gm, nil
 	}
-	if gm.tegrastats {
-		gm.startCollector(tegraStatsCmd)
-	}
-	if gm.intelGpuStats {
-		gm.startCollector(intelGpuStatsCmd)
+
+	// auto-detect and start collectors when GPU_COLLECTOR is unset.
+	if gm.startCollectorsByPriority(gm.resolveLegacyCollectorPriority(caps), caps) == 0 {
+		return nil, fmt.Errorf(noGPUFoundMsg)
 	}

 	return &gm, nil
--- a/agent/gpu_amd_linux.go
+++ b/agent/gpu_amd_linux.go
@@ -0,0 +1,302 @@
+//go:build linux
+
+package agent
+
+import (
+	"bufio"
+	"fmt"
+	"log/slog"
+	"os"
+	"path/filepath"
+	"strconv"
+	"strings"
+	"sync"
+	"time"
+
+	"github.com/henrygd/beszel/agent/utils"
+	"github.com/henrygd/beszel/internal/entities/system"
+)
+
+var amdgpuNameCache = struct {
+	sync.RWMutex
+	hits   map[string]string
+	misses map[string]struct{}
+}{
+	hits:   make(map[string]string),
+	misses: make(map[string]struct{}),
+}
+
+// hasAmdSysfs returns true if any AMD GPU sysfs nodes are found
+func (gm *GPUManager) hasAmdSysfs() bool {
+	cards, err := filepath.Glob("/sys/class/drm/card*/device/vendor")
+	if err != nil {
+		return false
+	}
+	for _, vendorPath := range cards {
+		vendor, err := utils.ReadStringFileLimited(vendorPath, 64)
+		if err == nil && vendor == "0x1002" {
+			return true
+		}
+	}
+	return false
+}
+
+// collectAmdStats collects AMD GPU metrics directly from sysfs to avoid the overhead of rocm-smi
+func (gm *GPUManager) collectAmdStats() error {
+	sysfsPollInterval := 3000 * time.Millisecond
+	cards, err := filepath.Glob("/sys/class/drm/card*")
+	if err != nil {
+		return err
+	}
+
+	var amdGpuPaths []string
+	for _, card := range cards {
+		// Ignore symbolic links and non-main card directories
+		if strings.Contains(filepath.Base(card), "-") || !isAmdGpu(card) {
+			continue
+		}
+		amdGpuPaths = append(amdGpuPaths, card)
+	}
+
+	if len(amdGpuPaths) == 0 {
+		return errNoValidData
+	}
+
+	slog.Debug("Using sysfs for AMD GPU data collection")
+
+	failures := 0
+	for {
+		hasData := false
+		for _, cardPath := range amdGpuPaths {
+			if gm.updateAmdGpuData(cardPath) {
+				hasData = true
+			}
+		}
+		if !hasData {
+			failures++
+			if failures > maxFailureRetries {
+				return errNoValidData
+			}
+			slog.Warn("No AMD GPU data from sysfs", "failures", failures)
+			time.Sleep(retryWaitTime)
+			continue
+		}
+		failures = 0
+		time.Sleep(sysfsPollInterval)
+	}
+}
+
+// isAmdGpu checks whether a DRM card path belongs to AMD vendor ID 0x1002.
+func isAmdGpu(cardPath string) bool {
+	vendor, err := utils.ReadStringFileLimited(filepath.Join(cardPath, "device/vendor"), 64)
+	if err != nil {
+		return false
+	}
+	return vendor == "0x1002"
+}
+
+// updateAmdGpuData reads GPU metrics from sysfs and updates the GPU data map.
+// Returns true if at least some data was successfully read.
+func (gm *GPUManager) updateAmdGpuData(cardPath string) bool {
+	devicePath := filepath.Join(cardPath, "device")
+	id := filepath.Base(cardPath)
+
+	// Read all sysfs values first (no lock needed - these can be slow)
+	usage, usageErr := readSysfsFloat(filepath.Join(devicePath, "gpu_busy_percent"))
+	memUsed, memUsedErr := readSysfsFloat(filepath.Join(devicePath, "mem_info_vram_used"))
+	memTotal, _ := readSysfsFloat(filepath.Join(devicePath, "mem_info_vram_total"))
+	// if gtt is present, add it to the memory used and total (https://github.com/henrygd/beszel/issues/1569#issuecomment-3837640484)
+	if gttUsed, err := readSysfsFloat(filepath.Join(devicePath, "mem_info_gtt_used")); err == nil && gttUsed > 0 {
+		if gttTotal, err := readSysfsFloat(filepath.Join(devicePath, "mem_info_gtt_total")); err == nil {
+			memUsed += gttUsed
+			memTotal += gttTotal
+		}
+	}
+
+	var temp, power float64
+	hwmons, _ := filepath.Glob(filepath.Join(devicePath, "hwmon/hwmon*"))
+	for _, hwmonDir := range hwmons {
+		if t, err := readSysfsFloat(filepath.Join(hwmonDir, "temp1_input")); err == nil {
+			temp = t / 1000.0
+		}
+		if p, err := readSysfsFloat(filepath.Join(hwmonDir, "power1_average")); err == nil {
+			power += p / 1000000.0
+		} else if p, err := readSysfsFloat(filepath.Join(hwmonDir, "power1_input")); err == nil {
+			power += p / 1000000.0
+		}
+	}
+
+	// Check if we got any meaningful data
+	if usageErr != nil && memUsedErr != nil && temp == 0 {
+		return false
+	}
+
+	// Single lock to update all values atomically
+	gm.Lock()
+	defer gm.Unlock()
+
+	gpu, ok := gm.GpuDataMap[id]
+	if !ok {
+		gpu = &system.GPUData{Name: getAmdGpuName(devicePath)}
+		gm.GpuDataMap[id] = gpu
+	}
+
+	if usageErr == nil {
+		gpu.Usage += usage
+	}
+	gpu.MemoryUsed = utils.BytesToMegabytes(memUsed)
+	gpu.MemoryTotal = utils.BytesToMegabytes(memTotal)
+	gpu.Temperature = temp
+	gpu.Power += power
+	gpu.Count++
+	return true
+}
+
+// readSysfsFloat reads and parses a numeric value from a sysfs file.
+func readSysfsFloat(path string) (float64, error) {
+	val, err := utils.ReadStringFileLimited(path, 64)
+	if err != nil {
+		return 0, err
+	}
+	return strconv.ParseFloat(val, 64)
+}
+
+// normalizeHexID normalizes hex IDs by trimming spaces, lowercasing, and dropping 0x.
+func normalizeHexID(id string) string {
+	return strings.TrimPrefix(strings.ToLower(strings.TrimSpace(id)), "0x")
+}
+
+// cacheKeyForAmdgpu builds the cache key for a device and optional revision.
+func cacheKeyForAmdgpu(deviceID, revisionID string) string {
+	if revisionID != "" {
+		return deviceID + ":" + revisionID
+	}
+	return deviceID
+}
+
+// lookupAmdgpuNameInFile resolves an AMDGPU name from amdgpu.ids by device/revision.
+func lookupAmdgpuNameInFile(deviceID, revisionID, filePath string) (name string, exact bool, found bool) {
+	file, err := os.Open(filePath)
+	if err != nil {
+		return "", false, false
+	}
+	defer file.Close()
+
+	var byDevice string
+	scanner := bufio.NewScanner(file)
+	for scanner.Scan() {
+		line := strings.TrimSpace(scanner.Text())
+		if line == "" || strings.HasPrefix(line, "#") {
+			continue
+		}
+		parts := strings.SplitN(line, ",", 3)
+		if len(parts) != 3 {
+			continue
+		}
+
+		dev := normalizeHexID(parts[0])
+		rev := normalizeHexID(parts[1])
+		productName := strings.TrimSpace(parts[2])
+		if dev == "" || productName == "" || dev != deviceID {
+			continue
+		}
+		if byDevice == "" {
+			byDevice = productName
+		}
+		if revisionID != "" && rev == revisionID {
+			return productName, true, true
+		}
+	}
+	if byDevice != "" {
+		return byDevice, false, true
+	}
+	return "", false, false
+}
+
+// getCachedAmdgpuName returns cached hit/miss status for the given device/revision.
+func getCachedAmdgpuName(deviceID, revisionID string) (name string, found bool, done bool) {
+	// Build the list of cache keys to check. We always look up the exact device+revision key.
+	// When revisionID is set, we also look up deviceID alone, since the cache may store a
+	// device-only fallback when we couldn't resolve the exact revision.
+	keys := []string{cacheKeyForAmdgpu(deviceID, revisionID)}
+	if revisionID != "" {
+		keys = append(keys, deviceID)
+	}
+
+	knownMisses := 0
+	amdgpuNameCache.RLock()
+	defer amdgpuNameCache.RUnlock()
+	for _, key := range keys {
+		if name, ok := amdgpuNameCache.hits[key]; ok {
+			return name, true, true
+		}
+		if _, ok := amdgpuNameCache.misses[key]; ok {
+			knownMisses++
+		}
+	}
+	// done=true means "don't bother doing slow lookup": we either found a name (above) or
+	// every key we checked was already a known miss, so we've tried before and failed.
+	return "", false, knownMisses == len(keys)
+}
+
+// normalizeAmdgpuName trims standard suffixes from AMDGPU product names.
+func normalizeAmdgpuName(name string) string {
+	for _, suffix := range []string{" Graphics", " Series"} {
+		name = strings.TrimSuffix(name, suffix)
+	}
+	return name
+}
+
+// cacheAmdgpuName stores a resolved AMDGPU name in the lookup cache.
+func cacheAmdgpuName(deviceID, revisionID, name string, exact bool) {
+	name = normalizeAmdgpuName(name)
+	amdgpuNameCache.Lock()
+	defer amdgpuNameCache.Unlock()
+	if exact && revisionID != "" {
+		amdgpuNameCache.hits[cacheKeyForAmdgpu(deviceID, revisionID)] = name
+	}
+	amdgpuNameCache.hits[deviceID] = name
+}
+
+// cacheMissingAmdgpuName records unresolved device/revision lookups.
+func cacheMissingAmdgpuName(deviceID, revisionID string) {
+	amdgpuNameCache.Lock()
+	defer amdgpuNameCache.Unlock()
+	amdgpuNameCache.misses[deviceID] = struct{}{}
+	if revisionID != "" {
+		amdgpuNameCache.misses[cacheKeyForAmdgpu(deviceID, revisionID)] = struct{}{}
+	}
+}
+
+// getAmdGpuName attempts to get a descriptive GPU name.
+// First tries product_name (rarely available), then looks up the PCI device ID.
+// Falls back to showing the raw device ID if not found in the lookup table.
+func getAmdGpuName(devicePath string) string {
+	// Try product_name first (works for some enterprise GPUs)
+	if prod, err := utils.ReadStringFileLimited(filepath.Join(devicePath, "product_name"), 128); err == nil {
+		return prod
+	}
+
+	// Read PCI device ID and look it up
+	if deviceID, err := utils.ReadStringFileLimited(filepath.Join(devicePath, "device"), 64); err == nil {
+		id := normalizeHexID(deviceID)
+		revision := ""
+		if rev, revErr := utils.ReadStringFileLimited(filepath.Join(devicePath, "revision"), 64); revErr == nil {
+			revision = normalizeHexID(rev)
+		}
+
+		if name, found, done := getCachedAmdgpuName(id, revision); found {
+			return name
+		} else if !done {
+			if name, exact, ok := lookupAmdgpuNameInFile(id, revision, "/usr/share/libdrm/amdgpu.ids"); ok {
+				cacheAmdgpuName(id, revision, name, exact)
+				return normalizeAmdgpuName(name)
+			}
+			cacheMissingAmdgpuName(id, revision)
+		}
+
+		return fmt.Sprintf("AMD GPU (%s)", id)
+	}
+
+	return "AMD GPU"
+}
--- a/agent/gpu_amd_linux_test.go
+++ b/agent/gpu_amd_linux_test.go
@@ -0,0 +1,265 @@
+//go:build linux
+
+package agent
+
+import (
+	"os"
+	"path/filepath"
+	"testing"
+
+	"github.com/henrygd/beszel/agent/utils"
+	"github.com/henrygd/beszel/internal/entities/system"
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+)
+
+func TestNormalizeHexID(t *testing.T) {
+	tests := []struct {
+		in   string
+		want string
+	}{
+		{"0x1002", "1002"},
+		{"C2", "c2"},
+		{"  15BF  ", "15bf"},
+		{"0x15bf", "15bf"},
+		{"", ""},
+	}
+	for _, tt := range tests {
+		subName := tt.in
+		if subName == "" {
+			subName = "empty_string"
+		}
+		t.Run(subName, func(t *testing.T) {
+			got := normalizeHexID(tt.in)
+			assert.Equal(t, tt.want, got)
+		})
+	}
+}
+
+func TestCacheKeyForAmdgpu(t *testing.T) {
+	tests := []struct {
+		deviceID   string
+		revisionID string
+		want       string
+	}{
+		{"1114", "c2", "1114:c2"},
+		{"15bf", "", "15bf"},
+		{"1506", "c1", "1506:c1"},
+	}
+	for _, tt := range tests {
+		got := cacheKeyForAmdgpu(tt.deviceID, tt.revisionID)
+		assert.Equal(t, tt.want, got)
+	}
+}
+
+func TestReadSysfsFloat(t *testing.T) {
+	dir := t.TempDir()
+
+	validPath := filepath.Join(dir, "val")
+	require.NoError(t, os.WriteFile(validPath, []byte("  42.5  \n"), 0o644))
+	got, err := readSysfsFloat(validPath)
+	require.NoError(t, err)
+	assert.Equal(t, 42.5, got)
+
+	// Integer and scientific
+	sciPath := filepath.Join(dir, "sci")
+	require.NoError(t, os.WriteFile(sciPath, []byte("1e2"), 0o644))
+	got, err = readSysfsFloat(sciPath)
+	require.NoError(t, err)
+	assert.Equal(t, 100.0, got)
+
+	// Missing file
+	_, err = readSysfsFloat(filepath.Join(dir, "missing"))
+	require.Error(t, err)
+
+	// Invalid content
+	badPath := filepath.Join(dir, "bad")
+	require.NoError(t, os.WriteFile(badPath, []byte("not a number"), 0o644))
+	_, err = readSysfsFloat(badPath)
+	require.Error(t, err)
+}
+
+func TestIsAmdGpu(t *testing.T) {
+	dir := t.TempDir()
+	deviceDir := filepath.Join(dir, "device")
+	require.NoError(t, os.MkdirAll(deviceDir, 0o755))
+
+	// AMD vendor 0x1002 -> true
+	require.NoError(t, os.WriteFile(filepath.Join(deviceDir, "vendor"), []byte("0x1002\n"), 0o644))
+	assert.True(t, isAmdGpu(dir), "vendor 0x1002 should be AMD")
+
+	// Non-AMD vendor -> false
+	require.NoError(t, os.WriteFile(filepath.Join(deviceDir, "vendor"), []byte("0x10de\n"), 0o644))
+	assert.False(t, isAmdGpu(dir), "vendor 0x10de should not be AMD")
+
+	// Missing vendor file -> false
+	require.NoError(t, os.Remove(filepath.Join(deviceDir, "vendor")))
+	assert.False(t, isAmdGpu(dir), "missing vendor file should be false")
+}
+
+func TestAmdgpuNameCacheRoundTrip(t *testing.T) {
+	// Cache a name and retrieve it (unique key to avoid affecting other tests)
+	deviceID, revisionID := "cachedev99", "00"
+	cacheAmdgpuName(deviceID, revisionID, "AMD Test GPU 99 Graphics", true)
+
+	name, found, done := getCachedAmdgpuName(deviceID, revisionID)
+	assert.True(t, found)
+	assert.True(t, done)
+	assert.Equal(t, "AMD Test GPU 99", name)
+
+	// Device-only key also stored
+	name2, found2, _ := getCachedAmdgpuName(deviceID, "")
+	assert.True(t, found2)
+	assert.Equal(t, "AMD Test GPU 99", name2)
+
+	// Cache a miss
+	cacheMissingAmdgpuName("missedev99", "ab")
+	_, found3, done3 := getCachedAmdgpuName("missedev99", "ab")
+	assert.False(t, found3)
+	assert.True(t, done3, "done should be true so caller skips file lookup")
+}
+
+func TestUpdateAmdGpuDataWithFakeSysfs(t *testing.T) {
+	tests := []struct {
+		name            string
+		writeGTT        bool
+		wantMemoryUsed  float64
+		wantMemoryTotal float64
+	}{
+		{
+			name:            "sums vram and gtt when gtt is present",
+			writeGTT:        true,
+			wantMemoryUsed:  utils.BytesToMegabytes(1073741824 + 536870912),
+			wantMemoryTotal: utils.BytesToMegabytes(2147483648 + 4294967296),
+		},
+		{
+			name:            "falls back to vram when gtt is missing",
+			writeGTT:        false,
+			wantMemoryUsed:  utils.BytesToMegabytes(1073741824),
+			wantMemoryTotal: utils.BytesToMegabytes(2147483648),
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			dir := t.TempDir()
+			cardPath := filepath.Join(dir, "card0")
+			devicePath := filepath.Join(cardPath, "device")
+			hwmonPath := filepath.Join(devicePath, "hwmon", "hwmon0")
+			require.NoError(t, os.MkdirAll(hwmonPath, 0o755))
+
+			write := func(name, content string) {
+				require.NoError(t, os.WriteFile(filepath.Join(devicePath, name), []byte(content), 0o644))
+			}
+			write("vendor", "0x1002")
+			write("device", "0x1506")
+			write("revision", "0xc1")
+			write("gpu_busy_percent", "25")
+			write("mem_info_vram_used", "1073741824")
+			write("mem_info_vram_total", "2147483648")
+			if tt.writeGTT {
+				write("mem_info_gtt_used", "536870912")
+				write("mem_info_gtt_total", "4294967296")
+			}
+			require.NoError(t, os.WriteFile(filepath.Join(hwmonPath, "temp1_input"), []byte("45000"), 0o644))
+			require.NoError(t, os.WriteFile(filepath.Join(hwmonPath, "power1_input"), []byte("20000000"), 0o644))
+
+			// Pre-cache name so getAmdGpuName returns a known value (it uses system amdgpu.ids path)
+			cacheAmdgpuName("1506", "c1", "AMD Radeon 610M Graphics", true)
+
+			gm := &GPUManager{GpuDataMap: make(map[string]*system.GPUData)}
+			ok := gm.updateAmdGpuData(cardPath)
+			require.True(t, ok)
+
+			gpu, ok := gm.GpuDataMap["card0"]
+			require.True(t, ok)
+			assert.Equal(t, "AMD Radeon 610M", gpu.Name)
+			assert.Equal(t, 25.0, gpu.Usage)
+			assert.Equal(t, tt.wantMemoryUsed, gpu.MemoryUsed)
+			assert.Equal(t, tt.wantMemoryTotal, gpu.MemoryTotal)
+			assert.Equal(t, 45.0, gpu.Temperature)
+			assert.Equal(t, 20.0, gpu.Power)
+			assert.Equal(t, 1.0, gpu.Count)
+		})
+	}
+}
+
+func TestLookupAmdgpuNameInFile(t *testing.T) {
+	idsPath := filepath.Join("test-data", "amdgpu.ids")
+
+	tests := []struct {
+		name       string
+		deviceID   string
+		revisionID string
+		wantName   string
+		wantExact  bool
+		wantFound  bool
+	}{
+		{
+			name:       "exact device and revision match",
+			deviceID:   "1114",
+			revisionID: "c2",
+			wantName:   "AMD Radeon 860M Graphics",
+			wantExact:  true,
+			wantFound:  true,
+		},
+		{
+			name:       "exact match 15BF revision 01 returns 760M",
+			deviceID:   "15bf",
+			revisionID: "01",
+			wantName:   "AMD Radeon 760M Graphics",
+			wantExact:  true,
+			wantFound:  true,
+		},
+		{
+			name:       "exact match 15BF revision 00 returns 780M",
+			deviceID:   "15bf",
+			revisionID: "00",
+			wantName:   "AMD Radeon 780M Graphics",
+			wantExact:  true,
+			wantFound:  true,
+		},
+		{
+			name:       "device-only match returns first entry for device",
+			deviceID:   "1506",
+			revisionID: "",
+			wantName:   "AMD Radeon 610M",
+			wantExact:  false,
+			wantFound:  true,
+		},
+		{
+			name:       "unknown device not found",
+			deviceID:   "dead",
+			revisionID: "00",
+			wantName:   "",
+			wantExact:  false,
+			wantFound:  false,
+		},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			gotName, gotExact, gotFound := lookupAmdgpuNameInFile(tt.deviceID, tt.revisionID, idsPath)
+			assert.Equal(t, tt.wantName, gotName, "name")
+			assert.Equal(t, tt.wantExact, gotExact, "exact")
+			assert.Equal(t, tt.wantFound, gotFound, "found")
+		})
+	}
+}
+
+func TestGetAmdGpuNameFromIdsFile(t *testing.T) {
+	// Test that getAmdGpuName resolves a name when we can't inject the ids path.
+	// We only verify behavior when product_name is missing and device/revision
+	// would be read from sysfs; the actual lookup uses /usr/share/libdrm/amdgpu.ids.
+	// So this test focuses on normalizeAmdgpuName and that lookupAmdgpuNameInFile
+	// returns the expected name for our test-data file.
+	idsPath := filepath.Join("test-data", "amdgpu.ids")
+	name, exact, found := lookupAmdgpuNameInFile("1435", "ae", idsPath)
+	require.True(t, found)
+	require.True(t, exact)
+	assert.Equal(t, "AMD Custom GPU 0932", name)
+	assert.Equal(t, "AMD Custom GPU 0932", normalizeAmdgpuName(name))
+
+	// " Graphics" suffix is trimmed by normalizeAmdgpuName
+	name2 := "AMD Radeon 860M Graphics"
+	assert.Equal(t, "AMD Radeon 860M", normalizeAmdgpuName(name2))
+}
--- a/agent/gpu_amd_unsupported.go
+++ b/agent/gpu_amd_unsupported.go
@@ -0,0 +1,15 @@
+//go:build !linux
+
+package agent
+
+import (
+	"errors"
+)
+
+func (gm *GPUManager) hasAmdSysfs() bool {
+	return false
+}
+
+func (gm *GPUManager) collectAmdStats() error {
+	return errors.ErrUnsupported
+}
--- a/agent/gpu_darwin.go
+++ b/agent/gpu_darwin.go
@@ -0,0 +1,252 @@
+//go:build darwin
+
+package agent
+
+import (
+	"bufio"
+	"bytes"
+	"encoding/json"
+	"io"
+	"log/slog"
+	"os/exec"
+	"strconv"
+	"strings"
+	"time"
+
+	"github.com/henrygd/beszel/internal/entities/system"
+)
+
+const (
+	// powermetricsSampleIntervalMs is the sampling interval passed to powermetrics (-i).
+	powermetricsSampleIntervalMs = 500
+	// powermetricsPollInterval is how often we run powermetrics to collect a new sample.
+	powermetricsPollInterval = 2 * time.Second
+	// macmonIntervalMs is the sampling interval passed to macmon pipe (-i), in milliseconds.
+	macmonIntervalMs = 2500
+)
+
+const appleGPUID = "0"
+
+// startPowermetricsCollector runs powermetrics --samplers gpu_power in a loop and updates
+// GPU usage and power. Requires root (sudo) on macOS. A single logical GPU is reported as id "0".
+func (gm *GPUManager) startPowermetricsCollector() {
+	// Ensure single GPU entry for Apple GPU
+	if _, ok := gm.GpuDataMap[appleGPUID]; !ok {
+		gm.GpuDataMap[appleGPUID] = &system.GPUData{Name: "Apple GPU"}
+	}
+
+	go func() {
+		failures := 0
+		for {
+			if err := gm.collectPowermetrics(); err != nil {
+				failures++
+				if failures > maxFailureRetries {
+					slog.Warn("powermetrics GPU collector failed repeatedly, stopping", "err", err)
+					break
+				}
+				slog.Warn("Error collecting macOS GPU data via powermetrics (may require sudo)", "err", err)
+				time.Sleep(retryWaitTime)
+				continue
+			}
+			failures = 0
+			time.Sleep(powermetricsPollInterval)
+		}
+	}()
+}
+
+// collectPowermetrics runs powermetrics once and parses GPU usage and power from its output.
+func (gm *GPUManager) collectPowermetrics() error {
+	interval := strconv.Itoa(powermetricsSampleIntervalMs)
+	cmd := exec.Command(powermetricsCmd, "--samplers", "gpu_power", "-i", interval, "-n", "1")
+	cmd.Stderr = nil
+	out, err := cmd.Output()
+	if err != nil {
+		return err
+	}
+	if !gm.parsePowermetricsData(out) {
+		return errNoValidData
+	}
+	return nil
+}
+
+// parsePowermetricsData parses powermetrics gpu_power output and updates GpuDataMap["0"].
+// Example output:
+//
+//	**** GPU usage ****
+//	GPU HW active frequency: 444 MHz
+//	GPU HW active residency:   0.97% (444 MHz: .97% ...
+//	GPU idle residency:  99.03%
+//	GPU Power: 4 mW
+func (gm *GPUManager) parsePowermetricsData(output []byte) bool {
+	var idleResidency, powerMW float64
+	var gotIdle, gotPower bool
+
+	scanner := bufio.NewScanner(bytes.NewReader(output))
+	for scanner.Scan() {
+		line := strings.TrimSpace(scanner.Text())
+		if strings.HasPrefix(line, "GPU idle residency:") {
+			// "GPU idle residency:  99.03%"
+			fields := strings.Fields(strings.TrimPrefix(line, "GPU idle residency:"))
+			if len(fields) >= 1 {
+				pct := strings.TrimSuffix(fields[0], "%")
+				if v, err := strconv.ParseFloat(pct, 64); err == nil {
+					idleResidency = v
+					gotIdle = true
+				}
+			}
+		} else if strings.HasPrefix(line, "GPU Power:") {
+			// "GPU Power: 4 mW"
+			fields := strings.Fields(strings.TrimPrefix(line, "GPU Power:"))
+			if len(fields) >= 1 {
+				if v, err := strconv.ParseFloat(fields[0], 64); err == nil {
+					powerMW = v
+					gotPower = true
+				}
+			}
+		}
+	}
+	if err := scanner.Err(); err != nil {
+		return false
+	}
+	if !gotIdle && !gotPower {
+		return false
+	}
+
+	gm.Lock()
+	defer gm.Unlock()
+
+	if _, ok := gm.GpuDataMap[appleGPUID]; !ok {
+		gm.GpuDataMap[appleGPUID] = &system.GPUData{Name: "Apple GPU"}
+	}
+	gpu := gm.GpuDataMap[appleGPUID]
+
+	if gotIdle {
+		// Usage = 100 - idle residency (e.g. 100 - 99.03 = 0.97%)
+		gpu.Usage += 100 - idleResidency
+	}
+	if gotPower {
+		// mW -> W
+		gpu.Power += powerMW / milliwattsInAWatt
+	}
+	gpu.Count++
+	return true
+}
+
+// startMacmonCollector runs `macmon pipe` in a loop and parses one JSON object per line.
+// This collector does not require sudo. A single logical GPU is reported as id "0".
+func (gm *GPUManager) startMacmonCollector() {
+	if _, ok := gm.GpuDataMap[appleGPUID]; !ok {
+		gm.GpuDataMap[appleGPUID] = &system.GPUData{Name: "Apple GPU"}
+	}
+
+	go func() {
+		failures := 0
+		for {
+			if err := gm.collectMacmonPipe(); err != nil {
+				failures++
+				if failures > maxFailureRetries {
+					slog.Warn("macmon GPU collector failed repeatedly, stopping", "err", err)
+					break
+				}
+				slog.Warn("Error collecting macOS GPU data via macmon", "err", err)
+				time.Sleep(retryWaitTime)
+				continue
+			}
+			failures = 0
+			// `macmon pipe` is long-running; if it returns, wait a bit before restarting.
+			time.Sleep(retryWaitTime)
+		}
+	}()
+}
+
+type macmonTemp struct {
+	GPUTempAvg float64 `json:"gpu_temp_avg"`
+}
+
+type macmonSample struct {
+	GPUPower    float64    `json:"gpu_power"`     // watts (macmon reports fractional values)
+	GPURAMPower float64    `json:"gpu_ram_power"` // watts
+	GPUUsage    []float64  `json:"gpu_usage"`     // [freq_mhz, usage] where usage is typically 0..1
+	Temp        macmonTemp `json:"temp"`
+}
+
+func (gm *GPUManager) collectMacmonPipe() (err error) {
+	cmd := exec.Command(macmonCmd, "pipe", "-i", strconv.Itoa(macmonIntervalMs))
+	// Avoid blocking if macmon writes to stderr.
+	cmd.Stderr = io.Discard
+	stdout, err := cmd.StdoutPipe()
+	if err != nil {
+		return err
+	}
+	if err := cmd.Start(); err != nil {
+		return err
+	}
+
+	// Ensure we always reap the child to avoid zombies on any return path and
+	// propagate a non-zero exit code if no other error was set.
+	defer func() {
+		_ = stdout.Close()
+		if cmd.ProcessState == nil || !cmd.ProcessState.Exited() {
+			_ = cmd.Process.Kill()
+		}
+		if waitErr := cmd.Wait(); err == nil && waitErr != nil {
+			err = waitErr
+		}
+	}()
+
+	scanner := bufio.NewScanner(stdout)
+	var hadSample bool
+	for scanner.Scan() {
+		line := bytes.TrimSpace(scanner.Bytes())
+		if len(line) == 0 {
+			continue
+		}
+		if gm.parseMacmonLine(line) {
+			hadSample = true
+		}
+	}
+	if scanErr := scanner.Err(); scanErr != nil {
+		return scanErr
+	}
+	if !hadSample {
+		return errNoValidData
+	}
+	return nil
+}
+
+// parseMacmonLine parses a single macmon JSON line and updates Apple GPU metrics.
+func (gm *GPUManager) parseMacmonLine(line []byte) bool {
+	var sample macmonSample
+	if err := json.Unmarshal(line, &sample); err != nil {
+		return false
+	}
+
+	usage := 0.0
+	if len(sample.GPUUsage) >= 2 {
+		usage = sample.GPUUsage[1]
+		// Heuristic: macmon typically reports 0..1; convert to percentage.
+		if usage <= 1.0 {
+			usage *= 100
+		}
+	}
+
+	// Consider the line valid if it contains at least one GPU metric.
+	if usage == 0 && sample.GPUPower == 0 && sample.Temp.GPUTempAvg == 0 {
+		return false
+	}
+
+	gm.Lock()
+	defer gm.Unlock()
+
+	gpu, ok := gm.GpuDataMap[appleGPUID]
+	if !ok {
+		gpu = &system.GPUData{Name: "Apple GPU"}
+		gm.GpuDataMap[appleGPUID] = gpu
+	}
+	gpu.Temperature = sample.Temp.GPUTempAvg
+	gpu.Usage += usage
+	// macmon reports power in watts; include VRAM power if present.
+	gpu.Power += sample.GPUPower + sample.GPURAMPower
+	gpu.Count++
+	return true
+}
--- a/agent/gpu_darwin_test.go
+++ b/agent/gpu_darwin_test.go
@@ -0,0 +1,81 @@
+//go:build darwin
+
+package agent
+
+import (
+	"testing"
+
+	"github.com/henrygd/beszel/internal/entities/system"
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+)
+
+func TestParsePowermetricsData(t *testing.T) {
+	input := `
+Machine model: Mac14,10
+OS version: 25D125
+
+*** Sampled system activity (Sat Feb 14 00:42:06 2026 -0500) (503.05ms elapsed) ***
+
+**** GPU usage ****
+
+GPU HW active frequency: 444 MHz
+GPU HW active residency:   0.97% (444 MHz: .97% 612 MHz:   0% 808 MHz:   0% 968 MHz:   0% 1110 MHz:   0% 1236 MHz:   0% 1338 MHz:   0% 1398 MHz:   0%)
+GPU SW requested state: (P1 : 100% P2 :   0% P3 :   0% P4 :   0% P5 :   0% P6 :   0% P7 :   0% P8 :   0%)
+GPU idle residency:  99.03%
+GPU Power: 4 mW
+`
+	gm := &GPUManager{
+		GpuDataMap: make(map[string]*system.GPUData),
+	}
+	valid := gm.parsePowermetricsData([]byte(input))
+	require.True(t, valid)
+
+	g0, ok := gm.GpuDataMap["0"]
+	require.True(t, ok)
+	assert.Equal(t, "Apple GPU", g0.Name)
+	// Usage = 100 - 99.03 = 0.97
+	assert.InDelta(t, 0.97, g0.Usage, 0.01)
+	// 4 mW -> 0.004 W
+	assert.InDelta(t, 0.004, g0.Power, 0.0001)
+	assert.Equal(t, 1.0, g0.Count)
+}
+
+func TestParsePowermetricsDataPartial(t *testing.T) {
+	// Only power line (e.g. older macOS or different sampler output)
+	input := `
+**** GPU usage ****
+GPU Power: 120 mW
+`
+	gm := &GPUManager{
+		GpuDataMap: make(map[string]*system.GPUData),
+	}
+	valid := gm.parsePowermetricsData([]byte(input))
+	require.True(t, valid)
+
+	g0, ok := gm.GpuDataMap["0"]
+	require.True(t, ok)
+	assert.Equal(t, "Apple GPU", g0.Name)
+	assert.InDelta(t, 0.12, g0.Power, 0.001)
+	assert.Equal(t, 1.0, g0.Count)
+}
+
+func TestParseMacmonLine(t *testing.T) {
+	input := `{"all_power":0.6468324661254883,"ane_power":0.0,"cpu_power":0.6359732151031494,"ecpu_usage":[2061,0.1726151406764984],"gpu_power":0.010859241709113121,"gpu_ram_power":0.000965250947047025,"gpu_usage":[503,0.013633215799927711],"memory":{"ram_total":17179869184,"ram_usage":12322914304,"swap_total":0,"swap_usage":0},"pcpu_usage":[1248,0.11792058497667313],"ram_power":0.14885640144348145,"sys_power":10.4955415725708,"temp":{"cpu_temp_avg":23.041261672973633,"gpu_temp_avg":29.44516944885254},"timestamp":"2026-02-17T19:34:27.942556+00:00"}`
+
+	gm := &GPUManager{
+		GpuDataMap: make(map[string]*system.GPUData),
+	}
+	valid := gm.parseMacmonLine([]byte(input))
+	require.True(t, valid)
+
+	g0, ok := gm.GpuDataMap["0"]
+	require.True(t, ok)
+	assert.Equal(t, "Apple GPU", g0.Name)
+	// macmon reports usage fraction 0..1; expect percent conversion.
+	assert.InDelta(t, 1.3633, g0.Usage, 0.05)
+	// power includes gpu_power + gpu_ram_power
+	assert.InDelta(t, 0.011824, g0.Power, 0.0005)
+	assert.InDelta(t, 29.445, g0.Temperature, 0.01)
+	assert.Equal(t, 1.0, g0.Count)
+}
--- a/agent/gpu_darwin_unsupported.go
+++ b/agent/gpu_darwin_unsupported.go
@@ -0,0 +1,9 @@
+//go:build !darwin
+
+package agent
+
+// startPowermetricsCollector is a no-op on non-darwin platforms; the real implementation is in gpu_darwin.go.
+func (gm *GPUManager) startPowermetricsCollector() {}
+
+// startMacmonCollector is a no-op on non-darwin platforms; the real implementation is in gpu_darwin.go.
+func (gm *GPUManager) startMacmonCollector() {}
--- a/agent/gpu_intel.go
+++ b/agent/gpu_intel.go
@@ -7,6 +7,7 @@ import (
 	"strconv"
 	"strings"

+	"github.com/henrygd/beszel/agent/utils"
 	"github.com/henrygd/beszel/internal/entities/system"
 )

@@ -27,10 +28,11 @@ func (gm *GPUManager) updateIntelFromStats(sample *intelGpuStats) bool {
 	defer gm.Unlock()

 	// only one gpu for now - cmd doesn't provide all by default
-	gpuData, ok := gm.GpuDataMap["0"]
+	id := "i0" // prefix with i to avoid conflicts with nvidia card ids
+	gpuData, ok := gm.GpuDataMap[id]
 	if !ok {
 		gpuData = &system.GPUData{Name: "GPU", Engines: make(map[string]float64)}
-		gm.GpuDataMap["0"] = gpuData
+		gm.GpuDataMap[id] = gpuData
 	}

 	gpuData.Power += sample.PowerGPU
@@ -51,7 +53,7 @@ func (gm *GPUManager) updateIntelFromStats(sample *intelGpuStats) bool {
 func (gm *GPUManager) collectIntelStats() (err error) {
 	// Build command arguments, optionally selecting a device via -d
 	args := []string{"-s", intelGpuStatsInterval, "-l"}
-	if dev, ok := GetEnv("INTEL_GPU_DEVICE"); ok && dev != "" {
+	if dev, ok := utils.GetEnv("INTEL_GPU_DEVICE"); ok && dev != "" {
 		args = append(args, "-d", dev)
 	}
 	cmd := exec.Command(intelGpuStatsCmd, args...)
--- a/agent/gpu_nvml.go
+++ b/agent/gpu_nvml.go
@@ -0,0 +1,224 @@
+//go:build amd64 && (windows || (linux && glibc))
+
+package agent
+
+import (
+	"fmt"
+	"log/slog"
+	"strings"
+	"time"
+	"unsafe"
+
+	"github.com/ebitengine/purego"
+	"github.com/henrygd/beszel/internal/entities/system"
+)
+
+// NVML constants and types
+const (
+	nvmlSuccess int = 0
+)
+
+type nvmlDevice uintptr
+
+type nvmlReturn int
+
+type nvmlMemoryV1 struct {
+	Total uint64
+	Free  uint64
+	Used  uint64
+}
+
+type nvmlMemoryV2 struct {
+	Version  uint32
+	Total    uint64
+	Reserved uint64
+	Free     uint64
+	Used     uint64
+}
+
+type nvmlUtilization struct {
+	Gpu    uint32
+	Memory uint32
+}
+
+type nvmlPciInfo struct {
+	BusId          [16]byte
+	Domain         uint32
+	Bus            uint32
+	Device         uint32
+	PciDeviceId    uint32
+	PciSubSystemId uint32
+}
+
+// NVML function signatures
+var (
+	nvmlInit                      func() nvmlReturn
+	nvmlShutdown                  func() nvmlReturn
+	nvmlDeviceGetCount            func(count *uint32) nvmlReturn
+	nvmlDeviceGetHandleByIndex    func(index uint32, device *nvmlDevice) nvmlReturn
+	nvmlDeviceGetName             func(device nvmlDevice, name *byte, length uint32) nvmlReturn
+	nvmlDeviceGetMemoryInfo       func(device nvmlDevice, memory uintptr) nvmlReturn
+	nvmlDeviceGetUtilizationRates func(device nvmlDevice, utilization *nvmlUtilization) nvmlReturn
+	nvmlDeviceGetTemperature      func(device nvmlDevice, sensorType int, temp *uint32) nvmlReturn
+	nvmlDeviceGetPowerUsage       func(device nvmlDevice, power *uint32) nvmlReturn
+	nvmlDeviceGetPciInfo          func(device nvmlDevice, pci *nvmlPciInfo) nvmlReturn
+	nvmlErrorString               func(result nvmlReturn) string
+)
+
+type nvmlCollector struct {
+	gm      *GPUManager
+	lib     uintptr
+	devices []nvmlDevice
+	bdfs    []string
+	isV2    bool
+}
+
+func (c *nvmlCollector) init() error {
+	slog.Debug("NVML: Initializing")
+	libPath := getNVMLPath()
+
+	lib, err := openLibrary(libPath)
+	if err != nil {
+		return fmt.Errorf("failed to load %s: %w", libPath, err)
+	}
+	c.lib = lib
+
+	purego.RegisterLibFunc(&nvmlInit, lib, "nvmlInit")
+	purego.RegisterLibFunc(&nvmlShutdown, lib, "nvmlShutdown")
+	purego.RegisterLibFunc(&nvmlDeviceGetCount, lib, "nvmlDeviceGetCount")
+	purego.RegisterLibFunc(&nvmlDeviceGetHandleByIndex, lib, "nvmlDeviceGetHandleByIndex")
+	purego.RegisterLibFunc(&nvmlDeviceGetName, lib, "nvmlDeviceGetName")
+	// Try to get v2 memory info, fallback to v1 if not available
+	if hasSymbol(lib, "nvmlDeviceGetMemoryInfo_v2") {
+		c.isV2 = true
+		purego.RegisterLibFunc(&nvmlDeviceGetMemoryInfo, lib, "nvmlDeviceGetMemoryInfo_v2")
+	} else {
+		purego.RegisterLibFunc(&nvmlDeviceGetMemoryInfo, lib, "nvmlDeviceGetMemoryInfo")
+	}
+	purego.RegisterLibFunc(&nvmlDeviceGetUtilizationRates, lib, "nvmlDeviceGetUtilizationRates")
+	purego.RegisterLibFunc(&nvmlDeviceGetTemperature, lib, "nvmlDeviceGetTemperature")
+	purego.RegisterLibFunc(&nvmlDeviceGetPowerUsage, lib, "nvmlDeviceGetPowerUsage")
+	purego.RegisterLibFunc(&nvmlDeviceGetPciInfo, lib, "nvmlDeviceGetPciInfo")
+	purego.RegisterLibFunc(&nvmlErrorString, lib, "nvmlErrorString")
+
+	if ret := nvmlInit(); ret != nvmlReturn(nvmlSuccess) {
+		return fmt.Errorf("nvmlInit failed: %v", ret)
+	}
+
+	var count uint32
+	if ret := nvmlDeviceGetCount(&count); ret != nvmlReturn(nvmlSuccess) {
+		return fmt.Errorf("nvmlDeviceGetCount failed: %v", ret)
+	}
+
+	for i := uint32(0); i < count; i++ {
+		var device nvmlDevice
+		if ret := nvmlDeviceGetHandleByIndex(i, &device); ret == nvmlReturn(nvmlSuccess) {
+			c.devices = append(c.devices, device)
+			// Get BDF for power state check
+			var pci nvmlPciInfo
+			if ret := nvmlDeviceGetPciInfo(device, &pci); ret == nvmlReturn(nvmlSuccess) {
+				busID := string(pci.BusId[:])
+				if idx := strings.Index(busID, "\x00"); idx != -1 {
+					busID = busID[:idx]
+				}
+				c.bdfs = append(c.bdfs, strings.ToLower(busID))
+			} else {
+				c.bdfs = append(c.bdfs, "")
+			}
+		}
+	}
+
+	return nil
+}
+
+func (c *nvmlCollector) start() {
+	defer nvmlShutdown()
+	ticker := time.Tick(3 * time.Second)
+
+	for range ticker {
+		c.collect()
+	}
+}
+
+func (c *nvmlCollector) collect() {
+	c.gm.Lock()
+	defer c.gm.Unlock()
+
+	for i, device := range c.devices {
+		id := fmt.Sprintf("%d", i)
+		bdf := c.bdfs[i]
+
+		// Update GPUDataMap
+		if _, ok := c.gm.GpuDataMap[id]; !ok {
+			var nameBuf [64]byte
+			if ret := nvmlDeviceGetName(device, &nameBuf[0], 64); ret != nvmlReturn(nvmlSuccess) {
+				continue
+			}
+			name := string(nameBuf[:strings.Index(string(nameBuf[:]), "\x00")])
+			name = strings.TrimPrefix(name, "NVIDIA ")
+			c.gm.GpuDataMap[id] = &system.GPUData{Name: strings.TrimSuffix(name, " Laptop GPU")}
+		}
+		gpu := c.gm.GpuDataMap[id]
+
+		if bdf != "" && !c.isGPUActive(bdf) {
+			slog.Debug("NVML: GPU is suspended, skipping", "bdf", bdf)
+			gpu.Temperature = 0
+			gpu.MemoryUsed = 0
+			continue
+		}
+
+		// Utilization
+		var utilization nvmlUtilization
+		if ret := nvmlDeviceGetUtilizationRates(device, &utilization); ret != nvmlReturn(nvmlSuccess) {
+			slog.Debug("NVML: Utilization failed (GPU likely suspended)", "bdf", bdf, "ret", ret)
+			gpu.Temperature = 0
+			gpu.MemoryUsed = 0
+			continue
+		}
+
+		slog.Debug("NVML: Collecting data for GPU", "bdf", bdf)
+
+		// Temperature
+		var temp uint32
+		nvmlDeviceGetTemperature(device, 0, &temp) // 0 is NVML_TEMPERATURE_GPU
+
+		// Memory: only poll if GPU is active to avoid leaving D3cold state (#1522)
+		if utilization.Gpu > 0 {
+			var usedMem, totalMem uint64
+			if c.isV2 {
+				var memory nvmlMemoryV2
+				memory.Version = 0x02000028 // (2 << 24) | 40 bytes
+				if ret := nvmlDeviceGetMemoryInfo(device, uintptr(unsafe.Pointer(&memory))); ret != nvmlReturn(nvmlSuccess) {
+					slog.Debug("NVML: MemoryInfo_v2 failed", "bdf", bdf, "ret", ret)
+				} else {
+					usedMem = memory.Used
+					totalMem = memory.Total
+				}
+			} else {
+				var memory nvmlMemoryV1
+				if ret := nvmlDeviceGetMemoryInfo(device, uintptr(unsafe.Pointer(&memory))); ret != nvmlReturn(nvmlSuccess) {
+					slog.Debug("NVML: MemoryInfo failed", "bdf", bdf, "ret", ret)
+				} else {
+					usedMem = memory.Used
+					totalMem = memory.Total
+				}
+			}
+			if totalMem > 0 {
+				gpu.MemoryUsed = float64(usedMem) / 1024 / 1024 / mebibytesInAMegabyte
+				gpu.MemoryTotal = float64(totalMem) / 1024 / 1024 / mebibytesInAMegabyte
+			}
+		} else {
+			slog.Debug("NVML: Skipping memory info (utilization=0)", "bdf", bdf)
+		}
+
+		// Power
+		var power uint32
+		nvmlDeviceGetPowerUsage(device, &power)
+
+		gpu.Temperature = float64(temp)
+		gpu.Usage += float64(utilization.Gpu)
+		gpu.Power += float64(power) / 1000.0
+		gpu.Count++
+		slog.Debug("NVML: Collected data", "gpu", gpu)
+	}
+}
--- a/agent/gpu_nvml_linux.go
+++ b/agent/gpu_nvml_linux.go
@@ -0,0 +1,57 @@
+//go:build glibc && linux && amd64
+
+package agent
+
+import (
+	"log/slog"
+	"os"
+	"path/filepath"
+	"strings"
+
+	"github.com/ebitengine/purego"
+)
+
+func openLibrary(name string) (uintptr, error) {
+	return purego.Dlopen(name, purego.RTLD_NOW|purego.RTLD_GLOBAL)
+}
+
+func getNVMLPath() string {
+	return "libnvidia-ml.so.1"
+}
+
+func hasSymbol(lib uintptr, symbol string) bool {
+	_, err := purego.Dlsym(lib, symbol)
+	return err == nil
+}
+
+func (c *nvmlCollector) isGPUActive(bdf string) bool {
+	// runtime_status
+	statusPath := filepath.Join("/sys/bus/pci/devices", bdf, "power/runtime_status")
+	status, err := os.ReadFile(statusPath)
+	if err != nil {
+		slog.Debug("NVML: Can't read runtime_status", "bdf", bdf, "err", err)
+		return true // Assume active if we can't read status
+	}
+	statusStr := strings.TrimSpace(string(status))
+	if statusStr != "active" && statusStr != "resuming" {
+		slog.Debug("NVML: GPU not active", "bdf", bdf, "status", statusStr)
+		return false
+	}
+
+	// power_state (D0 check)
+	// Find any drm card device power_state
+	pstatePathPattern := filepath.Join("/sys/bus/pci/devices", bdf, "drm/card*/device/power_state")
+	matches, _ := filepath.Glob(pstatePathPattern)
+	if len(matches) > 0 {
+		pstate, err := os.ReadFile(matches[0])
+		if err == nil {
+			pstateStr := strings.TrimSpace(string(pstate))
+			if pstateStr != "D0" {
+				slog.Debug("NVML: GPU not in D0 state", "bdf", bdf, "pstate", pstateStr)
+				return false
+			}
+		}
+	}
+
+	return true
+}
--- a/agent/gpu_nvml_unsupported.go
+++ b/agent/gpu_nvml_unsupported.go
@@ -0,0 +1,15 @@
+//go:build (!linux && !windows) || !amd64 || (linux && !glibc)
+
+package agent
+
+import "fmt"
+
+type nvmlCollector struct {
+	gm *GPUManager
+}
+
+func (c *nvmlCollector) init() error {
+	return fmt.Errorf("nvml not supported on this platform")
+}
+
+func (c *nvmlCollector) start() {}
--- a/agent/gpu_nvml_windows.go
+++ b/agent/gpu_nvml_windows.go
@@ -0,0 +1,25 @@
+//go:build windows && amd64
+
+package agent
+
+import (
+	"golang.org/x/sys/windows"
+)
+
+func openLibrary(name string) (uintptr, error) {
+	handle, err := windows.LoadLibrary(name)
+	return uintptr(handle), err
+}
+
+func getNVMLPath() string {
+	return "nvml.dll"
+}
+
+func hasSymbol(lib uintptr, symbol string) bool {
+	_, err := windows.GetProcAddress(windows.Handle(lib), symbol)
+	return err == nil
+}
+
+func (c *nvmlCollector) isGPUActive(bdf string) bool {
+	return true
+}
--- a/agent/gpu_nvtop.go
+++ b/agent/gpu_nvtop.go
@@ -0,0 +1,160 @@
+package agent
+
+import (
+	"encoding/json"
+	"io"
+	"log/slog"
+	"os/exec"
+	"strconv"
+	"strings"
+	"time"
+
+	"github.com/henrygd/beszel/agent/utils"
+	"github.com/henrygd/beszel/internal/entities/system"
+)
+
+type nvtopSnapshot struct {
+	DeviceName string  `json:"device_name"`
+	Temp       *string `json:"temp"`
+	PowerDraw  *string `json:"power_draw"`
+	GpuUtil    *string `json:"gpu_util"`
+	MemTotal   *string `json:"mem_total"`
+	MemUsed    *string `json:"mem_used"`
+}
+
+// parseNvtopNumber parses nvtop numeric strings with units (C/W/%).
+func parseNvtopNumber(raw string) float64 {
+	cleaned := strings.TrimSpace(raw)
+	cleaned = strings.TrimSuffix(cleaned, "C")
+	cleaned = strings.TrimSuffix(cleaned, "W")
+	cleaned = strings.TrimSuffix(cleaned, "%")
+	val, _ := strconv.ParseFloat(cleaned, 64)
+	return val
+}
+
+// parseNvtopData parses a single nvtop JSON snapshot payload.
+func (gm *GPUManager) parseNvtopData(output []byte) bool {
+	var snapshots []nvtopSnapshot
+	if err := json.Unmarshal(output, &snapshots); err != nil || len(snapshots) == 0 {
+		return false
+	}
+	return gm.updateNvtopSnapshots(snapshots)
+}
+
+// updateNvtopSnapshots applies one decoded nvtop snapshot batch to GPU accumulators.
+func (gm *GPUManager) updateNvtopSnapshots(snapshots []nvtopSnapshot) bool {
+	gm.Lock()
+	defer gm.Unlock()
+
+	valid := false
+	usedIDs := make(map[string]struct{}, len(snapshots))
+	for i, sample := range snapshots {
+		if sample.DeviceName == "" {
+			continue
+		}
+		indexID := "n" + strconv.Itoa(i)
+		id := indexID
+
+		// nvtop ordering can change, so prefer reusing an existing slot with matching device name.
+		if existingByIndex, ok := gm.GpuDataMap[indexID]; ok && existingByIndex.Name != "" && existingByIndex.Name != sample.DeviceName {
+			for existingID, gpu := range gm.GpuDataMap {
+				if !strings.HasPrefix(existingID, "n") {
+					continue
+				}
+				if _, taken := usedIDs[existingID]; taken {
+					continue
+				}
+				if gpu.Name == sample.DeviceName {
+					id = existingID
+					break
+				}
+			}
+		}
+
+		if _, ok := gm.GpuDataMap[id]; !ok {
+			gm.GpuDataMap[id] = &system.GPUData{Name: sample.DeviceName}
+		}
+		gpu := gm.GpuDataMap[id]
+		gpu.Name = sample.DeviceName
+
+		if sample.Temp != nil {
+			gpu.Temperature = parseNvtopNumber(*sample.Temp)
+		}
+		if sample.MemUsed != nil {
+			gpu.MemoryUsed = utils.BytesToMegabytes(parseNvtopNumber(*sample.MemUsed))
+		}
+		if sample.MemTotal != nil {
+			gpu.MemoryTotal = utils.BytesToMegabytes(parseNvtopNumber(*sample.MemTotal))
+		}
+		if sample.GpuUtil != nil {
+			gpu.Usage += parseNvtopNumber(*sample.GpuUtil)
+		}
+		if sample.PowerDraw != nil {
+			gpu.Power += parseNvtopNumber(*sample.PowerDraw)
+		}
+		gpu.Count++
+		usedIDs[id] = struct{}{}
+		valid = true
+	}
+	return valid
+}
+
+// collectNvtopStats runs nvtop loop mode and continuously decodes JSON snapshots.
+func (gm *GPUManager) collectNvtopStats(interval string) error {
+	cmd := exec.Command(nvtopCmd, "-lP", "-d", interval)
+	stdout, err := cmd.StdoutPipe()
+	if err != nil {
+		return err
+	}
+	if err := cmd.Start(); err != nil {
+		return err
+	}
+	defer func() {
+		_ = stdout.Close()
+		if cmd.ProcessState == nil || !cmd.ProcessState.Exited() {
+			_ = cmd.Process.Kill()
+		}
+		_ = cmd.Wait()
+	}()
+
+	decoder := json.NewDecoder(stdout)
+	foundValid := false
+	for {
+		var snapshots []nvtopSnapshot
+		if err := decoder.Decode(&snapshots); err != nil {
+			if err == io.EOF {
+				if foundValid {
+					return nil
+				}
+				return errNoValidData
+			}
+			return err
+		}
+		if gm.updateNvtopSnapshots(snapshots) {
+			foundValid = true
+		}
+	}
+}
+
+// startNvtopCollector starts nvtop collection with retry or fallback callback handling.
+func (gm *GPUManager) startNvtopCollector(interval string, onFailure func()) {
+	go func() {
+		failures := 0
+		for {
+			if err := gm.collectNvtopStats(interval); err != nil {
+				if onFailure != nil {
+					slog.Warn("Error collecting GPU data via nvtop", "err", err)
+					onFailure()
+					return
+				}
+				failures++
+				if failures > maxFailureRetries {
+					break
+				}
+				slog.Warn("Error collecting GPU data via nvtop", "err", err)
+				time.Sleep(retryWaitTime)
+				continue
+			}
+		}
+	}()
+}
--- a/agent/gpu_test.go
+++ b/agent/gpu_test.go
@@ -1,5 +1,4 @@
 //go:build testing
-// +build testing

 package agent

@@ -11,6 +10,7 @@ import (
 	"testing"
 	"time"

+	"github.com/henrygd/beszel/agent/utils"
 	"github.com/henrygd/beszel/internal/entities/system"

 	"github.com/stretchr/testify/assert"
@@ -250,6 +250,100 @@ func TestParseAmdData(t *testing.T) {
 	}
 }

+func TestParseNvtopData(t *testing.T) {
+	input, err := os.ReadFile("test-data/nvtop.json")
+	require.NoError(t, err)
+
+	gm := &GPUManager{
+		GpuDataMap: make(map[string]*system.GPUData),
+	}
+	valid := gm.parseNvtopData(input)
+	require.True(t, valid)
+
+	g0, ok := gm.GpuDataMap["n0"]
+	require.True(t, ok)
+	assert.Equal(t, "NVIDIA GeForce RTX 3050 Ti Laptop GPU", g0.Name)
+	assert.Equal(t, 48.0, g0.Temperature)
+	assert.Equal(t, 5.0, g0.Usage)
+	assert.Equal(t, 13.0, g0.Power)
+	assert.Equal(t, utils.BytesToMegabytes(349372416), g0.MemoryUsed)
+	assert.Equal(t, utils.BytesToMegabytes(4294967296), g0.MemoryTotal)
+	assert.Equal(t, 1.0, g0.Count)
+
+	g1, ok := gm.GpuDataMap["n1"]
+	require.True(t, ok)
+	assert.Equal(t, "AMD Radeon 680M", g1.Name)
+	assert.Equal(t, 48.0, g1.Temperature)
+	assert.Equal(t, 12.0, g1.Usage)
+	assert.Equal(t, 9.0, g1.Power)
+	assert.Equal(t, utils.BytesToMegabytes(1213784064), g1.MemoryUsed)
+	assert.Equal(t, utils.BytesToMegabytes(16929173504), g1.MemoryTotal)
+	assert.Equal(t, 1.0, g1.Count)
+}
+
+func TestUpdateNvtopSnapshotsKeepsDeviceAssociationWhenOrderChanges(t *testing.T) {
+	strPtr := func(s string) *string { return &s }
+
+	gm := &GPUManager{
+		GpuDataMap: make(map[string]*system.GPUData),
+	}
+
+	firstBatch := []nvtopSnapshot{
+		{
+			DeviceName: "NVIDIA GeForce RTX 3050 Ti Laptop GPU",
+			GpuUtil:    strPtr("20%"),
+			PowerDraw:  strPtr("10W"),
+		},
+		{
+			DeviceName: "AMD Radeon 680M",
+			GpuUtil:    strPtr("30%"),
+			PowerDraw:  strPtr("20W"),
+		},
+	}
+	secondBatchSwapped := []nvtopSnapshot{
+		{
+			DeviceName: "AMD Radeon 680M",
+			GpuUtil:    strPtr("40%"),
+			PowerDraw:  strPtr("25W"),
+		},
+		{
+			DeviceName: "NVIDIA GeForce RTX 3050 Ti Laptop GPU",
+			GpuUtil:    strPtr("50%"),
+			PowerDraw:  strPtr("15W"),
+		},
+	}
+
+	require.True(t, gm.updateNvtopSnapshots(firstBatch))
+	require.True(t, gm.updateNvtopSnapshots(secondBatchSwapped))
+
+	nvidia := gm.GpuDataMap["n0"]
+	require.NotNil(t, nvidia)
+	assert.Equal(t, "NVIDIA GeForce RTX 3050 Ti Laptop GPU", nvidia.Name)
+	assert.Equal(t, 70.0, nvidia.Usage)
+	assert.Equal(t, 25.0, nvidia.Power)
+	assert.Equal(t, 2.0, nvidia.Count)
+
+	amd := gm.GpuDataMap["n1"]
+	require.NotNil(t, amd)
+	assert.Equal(t, "AMD Radeon 680M", amd.Name)
+	assert.Equal(t, 70.0, amd.Usage)
+	assert.Equal(t, 45.0, amd.Power)
+	assert.Equal(t, 2.0, amd.Count)
+}
+
+func TestParseCollectorPriority(t *testing.T) {
+	got := parseCollectorPriority(" nvml, nvidia-smi, intel_gpu_top, amd_sysfs, nvtop, rocm-smi, bad ")
+	want := []collectorSource{
+		collectorSourceNVML,
+		collectorSourceNvidiaSMI,
+		collectorSourceIntelGpuTop,
+		collectorSourceAmdSysfs,
+		collectorSourceNVTop,
+		collectorSourceRocmSMI,
+	}
+	assert.Equal(t, want, got)
+}
+
 func TestParseJetsonData(t *testing.T) {
 	tests := []struct {
 		name        string
@@ -307,6 +401,19 @@ func TestParseJetsonData(t *testing.T) {
 				Count:       1,
 			},
 		},
+		{
+			name:  "orin-style output with GPU@ temp and VDD_SYS_GPU power",
+			input: "RAM 3276/7859MB (lfb 5x4MB) SWAP 1626/12122MB (cached 181MB) CPU [44%@1421,49%@2031,67%@2034,17%@1420,25%@1419,8%@1420] EMC_FREQ 1%@1866 GR3D_FREQ 0%@114 APE 150 MTS fg 1% bg 1% PLL@42.5C MCPU@42.5C PMIC@50C Tboard@38C GPU@39.5C BCPU@42.5C thermal@41.3C Tdiode@39.25C VDD_SYS_GPU 182/182 VDD_SYS_SOC 730/730 VDD_4V0_WIFI 0/0 VDD_IN 5297/5297 VDD_SYS_CPU 1917/1917 VDD_SYS_DDR 1241/1241",
+			wantMetrics: &system.GPUData{
+				Name:        "GPU",
+				MemoryUsed:  3276.0,
+				MemoryTotal: 7859.0,
+				Usage:       0.0,
+				Power:       0.182, // 182mW -> 0.182W
+				Temperature: 39.5,
+				Count:       1,
+			},
+		},
 	}

 	for _, tt := range tests {
@@ -825,7 +932,7 @@ func TestInitializeSnapshots(t *testing.T) {
 }

 func TestCalculateGPUAverage(t *testing.T) {
-	t.Run("returns zero value when deltaCount is zero", func(t *testing.T) {
+	t.Run("returns cached average when deltaCount is zero", func(t *testing.T) {
 		gm := &GPUManager{
 			lastSnapshots: map[uint16]map[string]*gpuSnapshot{
 				5000: {
@@ -838,9 +945,10 @@ func TestCalculateGPUAverage(t *testing.T) {
 		}

 		gpu := &system.GPUData{
-			Count: 10.0, // Same as snapshot, so delta = 0
-			Usage: 100.0,
-			Power: 200.0,
+			Count:       10.0, // Same as snapshot, so delta = 0
+			Usage:       100.0,
+			Power:       200.0,
+			Temperature: 50.0, // Non-zero to avoid "suspended" check
 		}

 		result := gm.calculateGPUAverage("0", gpu, 5000)
@@ -849,6 +957,31 @@ func TestCalculateGPUAverage(t *testing.T) {
 		assert.Equal(t, 100.0, result.Power, "Should return cached average")
 	})

+	t.Run("returns zero value when GPU is suspended", func(t *testing.T) {
+		gm := &GPUManager{
+			lastSnapshots: map[uint16]map[string]*gpuSnapshot{
+				5000: {
+					"0": {count: 10, usage: 100, power: 200},
+				},
+			},
+			lastAvgData: map[string]system.GPUData{
+				"0": {Usage: 50.0, Power: 100.0},
+			},
+		}
+
+		gpu := &system.GPUData{
+			Name:        "Test GPU",
+			Count:       10.0,
+			Temperature: 0,
+			MemoryUsed:  0,
+		}
+
+		result := gm.calculateGPUAverage("0", gpu, 5000)
+
+		assert.Equal(t, 0.0, result.Usage, "Should return zero usage")
+		assert.Equal(t, 0.0, result.Power, "Should return zero power")
+	})
+
 	t.Run("calculates average for standard GPU", func(t *testing.T) {
 		gm := &GPUManager{
 			lastSnapshots: map[uint16]map[string]*gpuSnapshot{
@@ -948,36 +1081,33 @@ func TestCalculateGPUAverage(t *testing.T) {
 	})
 }

-func TestDetectGPUs(t *testing.T) {
+func TestGPUCapabilitiesAndLegacyPriority(t *testing.T) {
 	// Save original PATH
-	origPath := os.Getenv("PATH")
-	defer os.Setenv("PATH", origPath)
-
-	// Set up temp dir with the commands
-	tempDir := t.TempDir()
-	os.Setenv("PATH", tempDir)
+	hasAmdSysfs := (&GPUManager{}).hasAmdSysfs()

 	tests := []struct {
 		name           string
-		setupCommands  func() error
+		setupCommands  func(string) error
 		wantNvidiaSmi  bool
 		wantRocmSmi    bool
 		wantTegrastats bool
+		wantNvtop      bool
 		wantErr        bool
 	}{
 		{
 			name: "nvidia-smi not available",
-			setupCommands: func() error {
+			setupCommands: func(_ string) error {
 				return nil
 			},
 			wantNvidiaSmi:  false,
 			wantRocmSmi:    false,
 			wantTegrastats: false,
+			wantNvtop:      false,
 			wantErr:        true,
 		},
 		{
 			name: "nvidia-smi available",
-			setupCommands: func() error {
+			setupCommands: func(tempDir string) error {
 				path := filepath.Join(tempDir, "nvidia-smi")
 				script := `#!/bin/sh
 echo "test"`
@@ -989,29 +1119,14 @@ echo "test"`
 			wantNvidiaSmi:  true,
 			wantTegrastats: false,
 			wantRocmSmi:    false,
+			wantNvtop:      false,
 			wantErr:        false,
 		},
 		{
 			name: "rocm-smi available",
-			setupCommands: func() error {
+			setupCommands: func(tempDir string) error {
 				path := filepath.Join(tempDir, "rocm-smi")
 				script := `#!/bin/sh
-echo "test"`
-				if err := os.WriteFile(path, []byte(script), 0755); err != nil {
-					return err
-				}
-				return nil
-			},
-			wantNvidiaSmi:  true,
-			wantRocmSmi:    true,
-			wantTegrastats: false,
-			wantErr:        false,
-		},
-		{
-			name: "tegrastats available",
-			setupCommands: func() error {
-				path := filepath.Join(tempDir, "tegrastats")
-				script := `#!/bin/sh
 echo "test"`
 				if err := os.WriteFile(path, []byte(script), 0755); err != nil {
 					return err
@@ -1020,13 +1135,48 @@ echo "test"`
 			},
 			wantNvidiaSmi:  false,
 			wantRocmSmi:    true,
+			wantTegrastats: false,
+			wantNvtop:      false,
+			wantErr:        false,
+		},
+		{
+			name: "tegrastats available",
+			setupCommands: func(tempDir string) error {
+				path := filepath.Join(tempDir, "tegrastats")
+				script := `#!/bin/sh
+echo "test"`
+				if err := os.WriteFile(path, []byte(script), 0755); err != nil {
+					return err
+				}
+				return nil
+			},
+			wantNvidiaSmi:  false,
+			wantRocmSmi:    false,
 			wantTegrastats: true,
+			wantNvtop:      false,
+			wantErr:        false,
+		},
+		{
+			name: "nvtop available",
+			setupCommands: func(tempDir string) error {
+				path := filepath.Join(tempDir, "nvtop")
+				script := `#!/bin/sh
+echo "[]"`
+				if err := os.WriteFile(path, []byte(script), 0755); err != nil {
+					return err
+				}
+				return nil
+			},
+			wantNvidiaSmi:  false,
+			wantRocmSmi:    false,
+			wantTegrastats: false,
+			wantNvtop:      true,
 			wantErr:        false,
 		},
 		{
 			name: "no gpu tools available",
-			setupCommands: func() error {
-				os.Setenv("PATH", "")
+			setupCommands: func(_ string) error {
+				t.Setenv("PATH", "")
 				return nil
 			},
 			wantErr: true,
@@ -1035,36 +1185,56 @@ echo "test"`

 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
-			if err := tt.setupCommands(); err != nil {
+			tempDir := t.TempDir()
+			t.Setenv("PATH", tempDir)
+			if err := tt.setupCommands(tempDir); err != nil {
 				t.Fatal(err)
 			}

 			gm := &GPUManager{}
-			err := gm.detectGPUs()
+			caps := gm.discoverGpuCapabilities()
+			var err error
+			if !hasAnyGpuCollector(caps) {
+				err = fmt.Errorf(noGPUFoundMsg)
+			}
+			priorities := gm.resolveLegacyCollectorPriority(caps)
+			hasPriority := func(source collectorSource) bool {
+				for _, s := range priorities {
+					if s == source {
+						return true
+					}
+				}
+				return false
+			}
+			gotNvidiaSmi := hasPriority(collectorSourceNvidiaSMI)
+			gotRocmSmi := hasPriority(collectorSourceRocmSMI)
+			gotTegrastats := caps.hasTegrastats
+			gotNvtop := caps.hasNvtop

-			t.Logf("nvidiaSmi: %v, rocmSmi: %v, tegrastats: %v", gm.nvidiaSmi, gm.rocmSmi, gm.tegrastats)
+			t.Logf("nvidiaSmi: %v, rocmSmi: %v, tegrastats: %v", gotNvidiaSmi, gotRocmSmi, gotTegrastats)

-			if tt.wantErr {
+			wantErr := tt.wantErr
+			if hasAmdSysfs && (tt.name == "nvidia-smi not available" || tt.name == "no gpu tools available") {
+				wantErr = false
+			}
+			if wantErr {
 				assert.Error(t, err)
 				return
 			}

 			assert.NoError(t, err)
-			assert.Equal(t, tt.wantNvidiaSmi, gm.nvidiaSmi)
-			assert.Equal(t, tt.wantRocmSmi, gm.rocmSmi)
-			assert.Equal(t, tt.wantTegrastats, gm.tegrastats)
+			assert.Equal(t, tt.wantNvidiaSmi, gotNvidiaSmi)
+			assert.Equal(t, tt.wantRocmSmi, gotRocmSmi)
+			assert.Equal(t, tt.wantTegrastats, gotTegrastats)
+			assert.Equal(t, tt.wantNvtop, gotNvtop)
 		})
 	}
 }

-func TestStartCollector(t *testing.T) {
-	// Save original PATH
-	origPath := os.Getenv("PATH")
-	defer os.Setenv("PATH", origPath)
-
+func TestCollectorStartHelpers(t *testing.T) {
 	// Set up temp dir with the commands
 	dir := t.TempDir()
-	os.Setenv("PATH", dir)
+	t.Setenv("PATH", dir)

 	tests := []struct {
 		name     string
@@ -1142,6 +1312,27 @@ echo "11-14-2024 22:54:33 RAM 1024/4096MB GR3D_FREQ 80% tj@70C VDD_GPU_SOC 1000m
 				},
 			},
 		},
+		{
+			name:    "nvtop collector",
+			command: "nvtop",
+			setup: func(t *testing.T) error {
+				path := filepath.Join(dir, "nvtop")
+				script := `#!/bin/sh
+echo '[{"device_name":"NVIDIA Test GPU","temp":"52C","power_draw":"31W","gpu_util":"37%","mem_total":"4294967296","mem_used":"536870912","processes":[]}]'`
+				if err := os.WriteFile(path, []byte(script), 0755); err != nil {
+					return err
+				}
+				return nil
+			},
+			validate: func(t *testing.T, gm *GPUManager) {
+				gpu, exists := gm.GpuDataMap["n0"]
+				assert.True(t, exists)
+				if exists {
+					assert.Equal(t, "NVIDIA Test GPU", gpu.Name)
+					assert.Equal(t, 52.0, gpu.Temperature)
+				}
+			},
+		},
 	}

 	for _, tt := range tests {
@@ -1154,13 +1345,142 @@ echo "11-14-2024 22:54:33 RAM 1024/4096MB GR3D_FREQ 80% tj@70C VDD_GPU_SOC 1000m
 					GpuDataMap: make(map[string]*system.GPUData),
 				}
 			}
-			tt.gm.startCollector(tt.command)
+			switch tt.command {
+			case nvidiaSmiCmd:
+				tt.gm.startNvidiaSmiCollector("4")
+			case rocmSmiCmd:
+				tt.gm.startRocmSmiCollector(4300 * time.Millisecond)
+			case tegraStatsCmd:
+				tt.gm.startTegraStatsCollector("3700")
+			case nvtopCmd:
+				tt.gm.startNvtopCollector("30", nil)
+			default:
+				t.Fatalf("unknown test command %q", tt.command)
+			}
 			time.Sleep(50 * time.Millisecond) // Give collector time to run
 			tt.validate(t, tt.gm)
 		})
 	}
 }

+func TestNewGPUManagerPriorityNvtopFallback(t *testing.T) {
+	dir := t.TempDir()
+	t.Setenv("PATH", dir)
+	t.Setenv("BESZEL_AGENT_GPU_COLLECTOR", "nvtop,nvidia-smi")
+
+	nvtopPath := filepath.Join(dir, "nvtop")
+	nvtopScript := `#!/bin/sh
+echo 'not-json'`
+	require.NoError(t, os.WriteFile(nvtopPath, []byte(nvtopScript), 0755))
+
+	nvidiaPath := filepath.Join(dir, "nvidia-smi")
+	nvidiaScript := `#!/bin/sh
+echo "0, NVIDIA Priority GPU, 45, 512, 2048, 12, 25"`
+	require.NoError(t, os.WriteFile(nvidiaPath, []byte(nvidiaScript), 0755))
+
+	gm, err := NewGPUManager()
+	require.NoError(t, err)
+	require.NotNil(t, gm)
+
+	time.Sleep(150 * time.Millisecond)
+	gpu, ok := gm.GpuDataMap["0"]
+	require.True(t, ok)
+	assert.Equal(t, "Priority GPU", gpu.Name)
+	assert.Equal(t, 45.0, gpu.Temperature)
+}
+
+func TestNewGPUManagerPriorityMixedCollectors(t *testing.T) {
+	dir := t.TempDir()
+	t.Setenv("PATH", dir)
+	t.Setenv("BESZEL_AGENT_GPU_COLLECTOR", "intel_gpu_top,rocm-smi")
+
+	intelPath := filepath.Join(dir, "intel_gpu_top")
+	intelScript := `#!/bin/sh
+echo "Freq MHz      IRQ RC6     Power W     IMC MiB/s             RCS             VCS"
+echo " req  act       /s   %   gpu   pkg     rd     wr       %  se  wa       %  se  wa"
+echo "226  223      338  58  2.00  2.69   1820    965   0.00    0   0    0.00   0   0"
+echo "189  187      412  67  1.80  2.45   1950    823   8.50    2   1    15.00   1   0"
+`
+	require.NoError(t, os.WriteFile(intelPath, []byte(intelScript), 0755))
+
+	rocmPath := filepath.Join(dir, "rocm-smi")
+	rocmScript := `#!/bin/sh
+echo '{"card0": {"Temperature (Sensor edge) (C)": "49.0", "Current Socket Graphics Package Power (W)": "28.159", "GPU use (%)": "0", "VRAM Total Memory (B)": "536870912", "VRAM Total Used Memory (B)": "445550592", "Card Series": "Rembrandt [Radeon 680M]", "GUID": "34756"}}'
+`
+	require.NoError(t, os.WriteFile(rocmPath, []byte(rocmScript), 0755))
+
+	gm, err := NewGPUManager()
+	require.NoError(t, err)
+	require.NotNil(t, gm)
+
+	time.Sleep(150 * time.Millisecond)
+	_, intelOk := gm.GpuDataMap["i0"]
+	_, amdOk := gm.GpuDataMap["34756"]
+	assert.True(t, intelOk)
+	assert.True(t, amdOk)
+}
+
+func TestNewGPUManagerPriorityNvmlFallbackToNvidiaSmi(t *testing.T) {
+	dir := t.TempDir()
+	t.Setenv("PATH", dir)
+	t.Setenv("BESZEL_AGENT_GPU_COLLECTOR", "nvml,nvidia-smi")
+
+	nvidiaPath := filepath.Join(dir, "nvidia-smi")
+	nvidiaScript := `#!/bin/sh
+echo "0, NVIDIA Fallback GPU, 41, 256, 1024, 8, 14"`
+	require.NoError(t, os.WriteFile(nvidiaPath, []byte(nvidiaScript), 0755))
+
+	gm, err := NewGPUManager()
+	require.NoError(t, err)
+	require.NotNil(t, gm)
+
+	time.Sleep(150 * time.Millisecond)
+	gpu, ok := gm.GpuDataMap["0"]
+	require.True(t, ok)
+	assert.Equal(t, "Fallback GPU", gpu.Name)
+}
+
+func TestNewGPUManagerConfiguredCollectorsMustStart(t *testing.T) {
+	dir := t.TempDir()
+	t.Setenv("PATH", dir)
+
+	t.Run("configured valid collector unavailable", func(t *testing.T) {
+		t.Setenv("BESZEL_AGENT_GPU_COLLECTOR", "nvidia-smi")
+		gm, err := NewGPUManager()
+		require.Nil(t, gm)
+		require.Error(t, err)
+		assert.Contains(t, err.Error(), "no configured GPU collectors are available")
+	})
+
+	t.Run("configured collector list has only unknown entries", func(t *testing.T) {
+		t.Setenv("BESZEL_AGENT_GPU_COLLECTOR", "bad,unknown")
+		gm, err := NewGPUManager()
+		require.Nil(t, gm)
+		require.Error(t, err)
+		assert.Contains(t, err.Error(), "no configured GPU collectors are available")
+	})
+}
+
+func TestNewGPUManagerJetsonIgnoresCollectorConfig(t *testing.T) {
+	dir := t.TempDir()
+	t.Setenv("PATH", dir)
+	t.Setenv("BESZEL_AGENT_GPU_COLLECTOR", "nvidia-smi")
+
+	tegraPath := filepath.Join(dir, "tegrastats")
+	tegraScript := `#!/bin/sh
+echo "11-14-2024 22:54:33 RAM 1024/4096MB GR3D_FREQ 80% tj@70C VDD_GPU_SOC 1000mW"`
+	require.NoError(t, os.WriteFile(tegraPath, []byte(tegraScript), 0755))
+
+	gm, err := NewGPUManager()
+	require.NoError(t, err)
+	require.NotNil(t, gm)
+
+	time.Sleep(100 * time.Millisecond)
+	gpu, ok := gm.GpuDataMap["0"]
+	require.True(t, ok)
+	assert.Equal(t, "GPU", gpu.Name)
+}
+
 // TestAccumulationTableDriven tests the accumulation behavior for all three GPU types
 func TestAccumulation(t *testing.T) {
 	type expectedGPUValues struct {
@@ -1346,7 +1666,7 @@ func TestIntelUpdateFromStats(t *testing.T) {
 	ok := gm.updateIntelFromStats(&sample1)
 	assert.True(t, ok)

-	gpu := gm.GpuDataMap["0"]
+	gpu := gm.GpuDataMap["i0"]
 	require.NotNil(t, gpu)
 	assert.Equal(t, "GPU", gpu.Name)
 	assert.EqualValues(t, 10.5, gpu.Power)
@@ -1368,7 +1688,7 @@ func TestIntelUpdateFromStats(t *testing.T) {
 	ok = gm.updateIntelFromStats(&sample2)
 	assert.True(t, ok)

-	gpu = gm.GpuDataMap["0"]
+	gpu = gm.GpuDataMap["i0"]
 	require.NotNil(t, gpu)
 	assert.EqualValues(t, 10.5, gpu.Power)
 	assert.EqualValues(t, 30.0, gpu.Engines["Render/3D"]) // 20 + 10
@@ -1378,12 +1698,8 @@ func TestIntelUpdateFromStats(t *testing.T) {
 }

 func TestIntelCollectorStreaming(t *testing.T) {
-	// Save and override PATH
-	origPath := os.Getenv("PATH")
-	defer os.Setenv("PATH", origPath)
-
 	dir := t.TempDir()
-	os.Setenv("PATH", dir)
+	t.Setenv("PATH", dir)

 	// Create a fake intel_gpu_top that prints -l format with four samples (first will be skipped) and exits
 	scriptPath := filepath.Join(dir, "intel_gpu_top")
@@ -1407,7 +1723,7 @@ echo "298  295      278  51  2.20  3.12   1675    942   5.75    1   2    9.50
 		t.Fatalf("collectIntelStats error: %v", err)
 	}

-	gpu := gm.GpuDataMap["0"]
+	gpu := gm.GpuDataMap["i0"]
 	require.NotNil(t, gpu)
 	// Power should be sum of samples 2-4 (first is skipped): 2.0 + 1.8 + 2.2 = 6.0
 	assert.EqualValues(t, 6.0, gpu.Power)
--- a/agent/handlers.go
+++ b/agent/handlers.go
@@ -9,7 +9,7 @@ import (
 	"github.com/henrygd/beszel/internal/common"
 	"github.com/henrygd/beszel/internal/entities/smart"

-	"golang.org/x/exp/slog"
+	"log/slog"
 )

 // HandlerContext provides context for request handlers
@@ -94,7 +94,7 @@ func (h *GetDataHandler) Handle(hctx *HandlerContext) error {
 	var options common.DataRequestOptions
 	_ = cbor.Unmarshal(hctx.Request.Data, &options)

-	sysStats := hctx.Agent.gatherStats(options.CacheTimeMs)
+	sysStats := hctx.Agent.gatherStats(options)
 	return hctx.SendResponse(sysStats, hctx.RequestID)
 }

--- a/agent/handlers_test.go
+++ b/agent/handlers_test.go
@@ -1,5 +1,4 @@
 //go:build testing
-// +build testing

 package agent

--- a/agent/health/health.go
+++ b/agent/health/health.go
@@ -9,11 +9,31 @@ import (
 	"log"
 	"os"
 	"path/filepath"
+	"runtime"
 	"time"
 )

 // healthFile is the path to the health file
-var healthFile = filepath.Join(os.TempDir(), "beszel_health")
+var healthFile = getHealthFilePath()
+
+func getHealthFilePath() string {
+	filename := "beszel_health"
+	if runtime.GOOS == "linux" {
+		fullPath := filepath.Join("/dev/shm", filename)
+		if err := updateHealthFile(fullPath); err == nil {
+			return fullPath
+		}
+	}
+	return filepath.Join(os.TempDir(), filename)
+}
+
+func updateHealthFile(path string) error {
+	file, err := os.Create(path)
+	if err != nil {
+		return err
+	}
+	return file.Close()
+}

 // Check checks if the agent is connected by checking the modification time of the health file
 func Check() error {
@@ -30,11 +50,7 @@ func Check() error {

 // Update updates the modification time of the health file
 func Update() error {
-	file, err := os.Create(healthFile)
-	if err != nil {
-		return err
-	}
-	return file.Close()
+	return updateHealthFile(healthFile)
 }

 // CleanUp removes the health file
--- a/agent/health/health_test.go
+++ b/agent/health/health_test.go
@@ -1,5 +1,4 @@
 //go:build testing
-// +build testing

 package health

@@ -37,7 +36,6 @@ func TestHealth(t *testing.T) {
 	})

 	// This test uses synctest to simulate time passing.
-	// NOTE: This test requires GOEXPERIMENT=synctest to run.
 	t.Run("check with simulated time", func(t *testing.T) {
 		synctest.Test(t, func(t *testing.T) {
 			// Update the file to set the initial timestamp.
--- a/agent/lhm/beszel_lhm.cs
+++ b/agent/lhm/beszel_lhm.cs
@@ -52,7 +52,12 @@ class Program
    foreach (var sensor in hardware.Sensors)
    {
      var validTemp = sensor.SensorType == SensorType.Temperature && sensor.Value.HasValue;
-      if (!validTemp || sensor.Name.Contains("Distance"))
+      if (!validTemp ||
+          sensor.Name.IndexOf("Distance", StringComparison.OrdinalIgnoreCase) >= 0 ||
+          sensor.Name.IndexOf("Limit", StringComparison.OrdinalIgnoreCase) >= 0 ||
+          sensor.Name.IndexOf("Critical", StringComparison.OrdinalIgnoreCase) >= 0 ||
+          sensor.Name.IndexOf("Warning", StringComparison.OrdinalIgnoreCase) >= 0 ||
+          sensor.Name.IndexOf("Resolution", StringComparison.OrdinalIgnoreCase) >= 0)
      {
        continue;
      }
--- a/agent/lhm/beszel_lhm.csproj
+++ b/agent/lhm/beszel_lhm.csproj
@@ -3,9 +3,11 @@
    <OutputType>Exe</OutputType>
    <TargetFramework>net48</TargetFramework>
    <Platforms>x64</Platforms>
+    <RuntimeIdentifier>win-x64</RuntimeIdentifier>
+    <AppendRuntimeIdentifierToOutputPath>false</AppendRuntimeIdentifierToOutputPath>
  </PropertyGroup>

  <ItemGroup>
-    <PackageReference Include="LibreHardwareMonitorLib" Version="0.9.4" />
+    <PackageReference Include="LibreHardwareMonitorLib" Version="0.9.5" />
  </ItemGroup>
 </Project>
--- a/agent/mdraid_linux.go
+++ b/agent/mdraid_linux.go
@@ -0,0 +1,233 @@
+//go:build linux
+
+package agent
+
+import (
+	"fmt"
+	"os"
+	"path/filepath"
+	"strconv"
+	"strings"
+
+	"github.com/henrygd/beszel/agent/utils"
+	"github.com/henrygd/beszel/internal/entities/smart"
+)
+
+// mdraidSysfsRoot is a test hook; production value is "/sys".
+var mdraidSysfsRoot = "/sys"
+
+type mdraidHealth struct {
+	level         string
+	arrayState    string
+	degraded      uint64
+	raidDisks     uint64
+	syncAction    string
+	syncCompleted string
+	syncSpeed     string
+	mismatchCnt   uint64
+	capacity      uint64
+}
+
+// scanMdraidDevices discovers Linux md arrays exposed in sysfs.
+func scanMdraidDevices() []*DeviceInfo {
+	blockDir := filepath.Join(mdraidSysfsRoot, "block")
+	entries, err := os.ReadDir(blockDir)
+	if err != nil {
+		return nil
+	}
+
+	devices := make([]*DeviceInfo, 0, 2)
+	for _, ent := range entries {
+		name := ent.Name()
+		if !isMdraidBlockName(name) {
+			continue
+		}
+		mdDir := filepath.Join(blockDir, name, "md")
+		if !utils.FileExists(filepath.Join(mdDir, "array_state")) {
+			continue
+		}
+
+		devPath := filepath.Join("/dev", name)
+		devices = append(devices, &DeviceInfo{
+			Name:     devPath,
+			Type:     "mdraid",
+			InfoName: devPath + " [mdraid]",
+			Protocol: "MD",
+		})
+	}
+
+	return devices
+}
+
+// collectMdraidHealth reads mdraid health and stores it in SmartDataMap.
+func (sm *SmartManager) collectMdraidHealth(deviceInfo *DeviceInfo) (bool, error) {
+	if deviceInfo == nil || deviceInfo.Name == "" {
+		return false, nil
+	}
+
+	base := filepath.Base(deviceInfo.Name)
+	if !isMdraidBlockName(base) && !strings.EqualFold(deviceInfo.Type, "mdraid") {
+		return false, nil
+	}
+
+	health, ok := readMdraidHealth(base)
+	if !ok {
+		return false, nil
+	}
+
+	deviceInfo.Type = "mdraid"
+	key := fmt.Sprintf("mdraid:%s", base)
+	status := mdraidSmartStatus(health)
+
+	attrs := make([]*smart.SmartAttribute, 0, 10)
+	if health.arrayState != "" {
+		attrs = append(attrs, &smart.SmartAttribute{Name: "ArrayState", RawString: health.arrayState})
+	}
+	if health.level != "" {
+		attrs = append(attrs, &smart.SmartAttribute{Name: "RaidLevel", RawString: health.level})
+	}
+	if health.raidDisks > 0 {
+		attrs = append(attrs, &smart.SmartAttribute{Name: "RaidDisks", RawValue: health.raidDisks})
+	}
+	if health.degraded > 0 {
+		attrs = append(attrs, &smart.SmartAttribute{Name: "Degraded", RawValue: health.degraded})
+	}
+	if health.syncAction != "" {
+		attrs = append(attrs, &smart.SmartAttribute{Name: "SyncAction", RawString: health.syncAction})
+	}
+	if health.syncCompleted != "" {
+		attrs = append(attrs, &smart.SmartAttribute{Name: "SyncCompleted", RawString: health.syncCompleted})
+	}
+	if health.syncSpeed != "" {
+		attrs = append(attrs, &smart.SmartAttribute{Name: "SyncSpeed", RawString: health.syncSpeed})
+	}
+	if health.mismatchCnt > 0 {
+		attrs = append(attrs, &smart.SmartAttribute{Name: "MismatchCount", RawValue: health.mismatchCnt})
+	}
+
+	sm.Lock()
+	defer sm.Unlock()
+
+	if _, exists := sm.SmartDataMap[key]; !exists {
+		sm.SmartDataMap[key] = &smart.SmartData{}
+	}
+
+	data := sm.SmartDataMap[key]
+	data.ModelName = "Linux MD RAID"
+	if health.level != "" {
+		data.ModelName = "Linux MD RAID (" + health.level + ")"
+	}
+	data.Capacity = health.capacity
+	data.SmartStatus = status
+	data.DiskName = filepath.Join("/dev", base)
+	data.DiskType = "mdraid"
+	data.Attributes = attrs
+
+	return true, nil
+}
+
+// readMdraidHealth reads md array health fields from sysfs.
+func readMdraidHealth(blockName string) (mdraidHealth, bool) {
+	var out mdraidHealth
+
+	if !isMdraidBlockName(blockName) {
+		return out, false
+	}
+
+	mdDir := filepath.Join(mdraidSysfsRoot, "block", blockName, "md")
+	arrayState, okState := utils.ReadStringFileOK(filepath.Join(mdDir, "array_state"))
+	if !okState {
+		return out, false
+	}
+
+	out.arrayState = arrayState
+	out.level = utils.ReadStringFile(filepath.Join(mdDir, "level"))
+	out.syncAction = utils.ReadStringFile(filepath.Join(mdDir, "sync_action"))
+	out.syncCompleted = utils.ReadStringFile(filepath.Join(mdDir, "sync_completed"))
+	out.syncSpeed = utils.ReadStringFile(filepath.Join(mdDir, "sync_speed"))
+
+	if val, ok := utils.ReadUintFile(filepath.Join(mdDir, "raid_disks")); ok {
+		out.raidDisks = val
+	}
+	if val, ok := utils.ReadUintFile(filepath.Join(mdDir, "degraded")); ok {
+		out.degraded = val
+	}
+	if val, ok := utils.ReadUintFile(filepath.Join(mdDir, "mismatch_cnt")); ok {
+		out.mismatchCnt = val
+	}
+
+	if capBytes, ok := readMdraidBlockCapacityBytes(blockName, mdraidSysfsRoot); ok {
+		out.capacity = capBytes
+	}
+
+	return out, true
+}
+
+// mdraidSmartStatus maps md state/sync signals to a SMART-like status.
+func mdraidSmartStatus(health mdraidHealth) string {
+	state := strings.ToLower(strings.TrimSpace(health.arrayState))
+	switch state {
+	case "inactive", "faulty", "broken", "stopped":
+		return "FAILED"
+	}
+	// During rebuild/recovery, arrays are often temporarily degraded; report as
+	// warning instead of hard failure while synchronization is in progress.
+	syncAction := strings.ToLower(strings.TrimSpace(health.syncAction))
+	switch syncAction {
+	case "resync", "recover", "reshape":
+		return "WARNING"
+	}
+	if health.degraded > 0 {
+		return "FAILED"
+	}
+	switch syncAction {
+	case "check", "repair":
+		return "WARNING"
+	}
+	switch state {
+	case "clean", "active", "active-idle", "write-pending", "read-auto", "readonly":
+		return "PASSED"
+	}
+	return "UNKNOWN"
+}
+
+// isMdraidBlockName matches /dev/mdN-style block device names.
+func isMdraidBlockName(name string) bool {
+	if !strings.HasPrefix(name, "md") {
+		return false
+	}
+	suffix := strings.TrimPrefix(name, "md")
+	if suffix == "" {
+		return false
+	}
+	for _, c := range suffix {
+		if c < '0' || c > '9' {
+			return false
+		}
+	}
+	return true
+}
+
+// readMdraidBlockCapacityBytes converts block size metadata into bytes.
+func readMdraidBlockCapacityBytes(blockName, root string) (uint64, bool) {
+	sizePath := filepath.Join(root, "block", blockName, "size")
+	lbsPath := filepath.Join(root, "block", blockName, "queue", "logical_block_size")
+
+	sizeStr, ok := utils.ReadStringFileOK(sizePath)
+	if !ok {
+		return 0, false
+	}
+	sectors, err := strconv.ParseUint(sizeStr, 10, 64)
+	if err != nil || sectors == 0 {
+		return 0, false
+	}
+
+	logicalBlockSize := uint64(512)
+	if lbsStr, ok := utils.ReadStringFileOK(lbsPath); ok {
+		if parsed, err := strconv.ParseUint(lbsStr, 10, 64); err == nil && parsed > 0 {
+			logicalBlockSize = parsed
+		}
+	}
+
+	return sectors * logicalBlockSize, true
+}
--- a/agent/mdraid_linux_test.go
+++ b/agent/mdraid_linux_test.go
@@ -0,0 +1,103 @@
+//go:build linux
+
+package agent
+
+import (
+	"os"
+	"path/filepath"
+	"testing"
+
+	"github.com/henrygd/beszel/internal/entities/smart"
+)
+
+func TestMdraidMockSysfsScanAndCollect(t *testing.T) {
+	tmp := t.TempDir()
+	prev := mdraidSysfsRoot
+	mdraidSysfsRoot = tmp
+	t.Cleanup(func() { mdraidSysfsRoot = prev })
+
+	mdDir := filepath.Join(tmp, "block", "md0", "md")
+	queueDir := filepath.Join(tmp, "block", "md0", "queue")
+	if err := os.MkdirAll(mdDir, 0o755); err != nil {
+		t.Fatal(err)
+	}
+	if err := os.MkdirAll(queueDir, 0o755); err != nil {
+		t.Fatal(err)
+	}
+
+	write := func(path, content string) {
+		t.Helper()
+		if err := os.WriteFile(path, []byte(content), 0o644); err != nil {
+			t.Fatal(err)
+		}
+	}
+
+	write(filepath.Join(mdDir, "array_state"), "active\n")
+	write(filepath.Join(mdDir, "level"), "raid1\n")
+	write(filepath.Join(mdDir, "raid_disks"), "2\n")
+	write(filepath.Join(mdDir, "degraded"), "0\n")
+	write(filepath.Join(mdDir, "sync_action"), "resync\n")
+	write(filepath.Join(mdDir, "sync_completed"), "10%\n")
+	write(filepath.Join(mdDir, "sync_speed"), "100M\n")
+	write(filepath.Join(mdDir, "mismatch_cnt"), "0\n")
+	write(filepath.Join(queueDir, "logical_block_size"), "512\n")
+	write(filepath.Join(tmp, "block", "md0", "size"), "2048\n")
+
+	devs := scanMdraidDevices()
+	if len(devs) != 1 {
+		t.Fatalf("scanMdraidDevices() = %d devices, want 1", len(devs))
+	}
+	if devs[0].Name != "/dev/md0" || devs[0].Type != "mdraid" {
+		t.Fatalf("scanMdraidDevices()[0] = %+v, want Name=/dev/md0 Type=mdraid", devs[0])
+	}
+
+	sm := &SmartManager{SmartDataMap: map[string]*smart.SmartData{}}
+	ok, err := sm.collectMdraidHealth(devs[0])
+	if err != nil || !ok {
+		t.Fatalf("collectMdraidHealth() = (ok=%v, err=%v), want (true,nil)", ok, err)
+	}
+	if len(sm.SmartDataMap) != 1 {
+		t.Fatalf("SmartDataMap len=%d, want 1", len(sm.SmartDataMap))
+	}
+	var got *smart.SmartData
+	for _, v := range sm.SmartDataMap {
+		got = v
+		break
+	}
+	if got == nil {
+		t.Fatalf("SmartDataMap value nil")
+	}
+	if got.DiskType != "mdraid" || got.DiskName != "/dev/md0" {
+		t.Fatalf("disk fields = (type=%q name=%q), want (mdraid,/dev/md0)", got.DiskType, got.DiskName)
+	}
+	if got.SmartStatus != "WARNING" {
+		t.Fatalf("SmartStatus=%q, want WARNING", got.SmartStatus)
+	}
+	if got.ModelName == "" || got.Capacity == 0 {
+		t.Fatalf("identity fields = (model=%q cap=%d), want non-empty model and cap>0", got.ModelName, got.Capacity)
+	}
+	if len(got.Attributes) < 5 {
+		t.Fatalf("attributes len=%d, want >= 5", len(got.Attributes))
+	}
+}
+
+func TestMdraidSmartStatus(t *testing.T) {
+	if got := mdraidSmartStatus(mdraidHealth{arrayState: "inactive"}); got != "FAILED" {
+		t.Fatalf("mdraidSmartStatus(inactive) = %q, want FAILED", got)
+	}
+	if got := mdraidSmartStatus(mdraidHealth{arrayState: "active", degraded: 1, syncAction: "recover"}); got != "WARNING" {
+		t.Fatalf("mdraidSmartStatus(degraded+recover) = %q, want WARNING", got)
+	}
+	if got := mdraidSmartStatus(mdraidHealth{arrayState: "active", degraded: 1}); got != "FAILED" {
+		t.Fatalf("mdraidSmartStatus(degraded) = %q, want FAILED", got)
+	}
+	if got := mdraidSmartStatus(mdraidHealth{arrayState: "active", syncAction: "recover"}); got != "WARNING" {
+		t.Fatalf("mdraidSmartStatus(recover) = %q, want WARNING", got)
+	}
+	if got := mdraidSmartStatus(mdraidHealth{arrayState: "clean"}); got != "PASSED" {
+		t.Fatalf("mdraidSmartStatus(clean) = %q, want PASSED", got)
+	}
+	if got := mdraidSmartStatus(mdraidHealth{arrayState: "unknown"}); got != "UNKNOWN" {
+		t.Fatalf("mdraidSmartStatus(unknown) = %q, want UNKNOWN", got)
+	}
+}
--- a/agent/mdraid_stub.go
+++ b/agent/mdraid_stub.go
@@ -0,0 +1,11 @@
+//go:build !linux
+
+package agent
+
+func scanMdraidDevices() []*DeviceInfo {
+	return nil
+}
+
+func (sm *SmartManager) collectMdraidHealth(deviceInfo *DeviceInfo) (bool, error) {
+	return false, nil
+}
--- a/agent/network.go
+++ b/agent/network.go
@@ -8,6 +8,7 @@ import (
 	"time"

 	"github.com/henrygd/beszel/agent/deltatracker"
+	"github.com/henrygd/beszel/agent/utils"
 	"github.com/henrygd/beszel/internal/entities/system"
 	psutilNet "github.com/shirou/gopsutil/v4/net"
 )
@@ -94,7 +95,7 @@ func (a *Agent) initializeNetIoStats() {
 	a.netInterfaces = make(map[string]struct{}, 0)

 	// parse NICS env var for whitelist / blacklist
-	nicsEnvVal, nicsEnvExists := GetEnv("NICS")
+	nicsEnvVal, nicsEnvExists := utils.GetEnv("NICS")
 	var nicCfg *NicConfig
 	if nicsEnvExists {
 		nicCfg = newNicConfig(nicsEnvVal)
@@ -103,10 +104,7 @@ func (a *Agent) initializeNetIoStats() {
 	// get current network I/O stats and record valid interfaces
 	if netIO, err := psutilNet.IOCounters(true); err == nil {
 		for _, v := range netIO {
-			if nicsEnvExists && !isValidNic(v.Name, nicCfg) {
-				continue
-			}
-			if a.skipNetworkInterface(v) {
+			if skipNetworkInterface(v, nicCfg) {
 				continue
 			}
 			slog.Info("Detected network interface", "name", v.Name, "sent", v.BytesSent, "recv", v.BytesRecv)
@@ -215,10 +213,8 @@ func (a *Agent) applyNetworkTotals(
 	totalBytesSent, totalBytesRecv uint64,
 	bytesSentPerSecond, bytesRecvPerSecond uint64,
 ) {
-	networkSentPs := bytesToMegabytes(float64(bytesSentPerSecond))
-	networkRecvPs := bytesToMegabytes(float64(bytesRecvPerSecond))
-	if networkSentPs > 10_000 || networkRecvPs > 10_000 {
-		slog.Warn("Invalid net stats. Resetting.", "sent", networkSentPs, "recv", networkRecvPs)
+	if bytesSentPerSecond > 10_000_000_000 || bytesRecvPerSecond > 10_000_000_000 {
+		slog.Warn("Invalid net stats. Resetting.", "sent", bytesSentPerSecond, "recv", bytesRecvPerSecond)
 		for _, v := range netIO {
 			if _, exists := a.netInterfaces[v.Name]; !exists {
 				continue
@@ -228,21 +224,29 @@ func (a *Agent) applyNetworkTotals(
 		a.initializeNetIoStats()
 		delete(a.netIoStats, cacheTimeMs)
 		delete(a.netInterfaceDeltaTrackers, cacheTimeMs)
-		systemStats.NetworkSent = 0
-		systemStats.NetworkRecv = 0
 		systemStats.Bandwidth[0], systemStats.Bandwidth[1] = 0, 0
 		return
 	}

-	systemStats.NetworkSent = networkSentPs
-	systemStats.NetworkRecv = networkRecvPs
 	systemStats.Bandwidth[0], systemStats.Bandwidth[1] = bytesSentPerSecond, bytesRecvPerSecond
 	nis.BytesSent = totalBytesSent
 	nis.BytesRecv = totalBytesRecv
 	a.netIoStats[cacheTimeMs] = nis
 }

-func (a *Agent) skipNetworkInterface(v psutilNet.IOCountersStat) bool {
+// skipNetworkInterface returns true if the network interface should be ignored.
+func skipNetworkInterface(v psutilNet.IOCountersStat, nicCfg *NicConfig) bool {
+	if nicCfg != nil {
+		if !isValidNic(v.Name, nicCfg) {
+			return true
+		}
+		// In whitelist mode, we honor explicit inclusion without auto-filtering.
+		if !nicCfg.isBlacklist {
+			return false
+		}
+		// In blacklist mode, still apply the auto-filter below.
+	}
+
 	switch {
 	case strings.HasPrefix(v.Name, "lo"),
 		strings.HasPrefix(v.Name, "docker"),
--- a/agent/network_test.go
+++ b/agent/network_test.go
@@ -261,6 +261,39 @@ func TestNewNicConfig(t *testing.T) {
 		})
 	}
 }
+func TestSkipNetworkInterface(t *testing.T) {
+	tests := []struct {
+		name       string
+		nic        psutilNet.IOCountersStat
+		nicCfg     *NicConfig
+		expectSkip bool
+	}{
+		{"loopback lo", psutilNet.IOCountersStat{Name: "lo", BytesSent: 100, BytesRecv: 100}, nil, true},
+		{"loopback lo0", psutilNet.IOCountersStat{Name: "lo0", BytesSent: 100, BytesRecv: 100}, nil, true},
+		{"docker prefix", psutilNet.IOCountersStat{Name: "docker0", BytesSent: 100, BytesRecv: 100}, nil, true},
+		{"br- prefix", psutilNet.IOCountersStat{Name: "br-lan", BytesSent: 100, BytesRecv: 100}, nil, true},
+		{"veth prefix", psutilNet.IOCountersStat{Name: "veth0abc", BytesSent: 100, BytesRecv: 100}, nil, true},
+		{"bond prefix", psutilNet.IOCountersStat{Name: "bond0", BytesSent: 100, BytesRecv: 100}, nil, true},
+		{"cali prefix", psutilNet.IOCountersStat{Name: "cali1234", BytesSent: 100, BytesRecv: 100}, nil, true},
+		{"zero BytesRecv", psutilNet.IOCountersStat{Name: "eth0", BytesSent: 100, BytesRecv: 0}, nil, true},
+		{"zero BytesSent", psutilNet.IOCountersStat{Name: "eth0", BytesSent: 0, BytesRecv: 100}, nil, true},
+		{"both zero", psutilNet.IOCountersStat{Name: "eth0", BytesSent: 0, BytesRecv: 0}, nil, true},
+		{"normal eth0", psutilNet.IOCountersStat{Name: "eth0", BytesSent: 100, BytesRecv: 200}, nil, false},
+		{"normal wlan0", psutilNet.IOCountersStat{Name: "wlan0", BytesSent: 1, BytesRecv: 1}, nil, false},
+		{"whitelist overrides skip (docker)", psutilNet.IOCountersStat{Name: "docker0", BytesSent: 100, BytesRecv: 100}, newNicConfig("docker0"), false},
+		{"whitelist overrides skip (lo)", psutilNet.IOCountersStat{Name: "lo", BytesSent: 100, BytesRecv: 100}, newNicConfig("lo"), false},
+		{"whitelist exclusion", psutilNet.IOCountersStat{Name: "eth1", BytesSent: 100, BytesRecv: 100}, newNicConfig("eth0"), true},
+		{"blacklist skip lo", psutilNet.IOCountersStat{Name: "lo", BytesSent: 100, BytesRecv: 100}, newNicConfig("-eth0"), true},
+		{"blacklist explicit eth0", psutilNet.IOCountersStat{Name: "eth0", BytesSent: 100, BytesRecv: 100}, newNicConfig("-eth0"), true},
+		{"blacklist allow eth1", psutilNet.IOCountersStat{Name: "eth1", BytesSent: 100, BytesRecv: 100}, newNicConfig("-eth0"), false},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			assert.Equal(t, tt.expectSkip, skipNetworkInterface(tt.nic, tt.nicCfg))
+		})
+	}
+}
+
 func TestEnsureNetworkInterfacesMap(t *testing.T) {
 	var a Agent
 	var stats system.Stats
@@ -383,8 +416,6 @@ func TestApplyNetworkTotals(t *testing.T) {
 		totalBytesSent        uint64
 		totalBytesRecv        uint64
 		expectReset           bool
-		expectedNetworkSent   float64
-		expectedNetworkRecv   float64
 		expectedBandwidthSent uint64
 		expectedBandwidthRecv uint64
 	}{
@@ -395,8 +426,6 @@ func TestApplyNetworkTotals(t *testing.T) {
 			totalBytesSent:        10000000,
 			totalBytesRecv:        20000000,
 			expectReset:           false,
-			expectedNetworkSent:   0.95, // ~1 MB/s rounded to 2 decimals
-			expectedNetworkRecv:   1.91, // ~2 MB/s rounded to 2 decimals
 			expectedBandwidthSent: 1000000,
 			expectedBandwidthRecv: 2000000,
 		},
@@ -424,18 +453,6 @@ func TestApplyNetworkTotals(t *testing.T) {
 			totalBytesRecv:     20000000,
 			expectReset:        true,
 		},
-		{
-			name:                  "Valid network stats - at threshold boundary",
-			bytesSentPerSecond:    10485750000, // ~9999.99 MB/s (rounds to 9999.99)
-			bytesRecvPerSecond:    10485750000, // ~9999.99 MB/s (rounds to 9999.99)
-			totalBytesSent:        10000000,
-			totalBytesRecv:        20000000,
-			expectReset:           false,
-			expectedNetworkSent:   9999.99,
-			expectedNetworkRecv:   9999.99,
-			expectedBandwidthSent: 10485750000,
-			expectedBandwidthRecv: 10485750000,
-		},
 		{
 			name:                  "Zero values",
 			bytesSentPerSecond:    0,
@@ -443,8 +460,6 @@ func TestApplyNetworkTotals(t *testing.T) {
 			totalBytesSent:        0,
 			totalBytesRecv:        0,
 			expectReset:           false,
-			expectedNetworkSent:   0.0,
-			expectedNetworkRecv:   0.0,
 			expectedBandwidthSent: 0,
 			expectedBandwidthRecv: 0,
 		},
@@ -481,14 +496,10 @@ func TestApplyNetworkTotals(t *testing.T) {
 				// Should have reset network tracking state - maps cleared and stats zeroed
 				assert.NotContains(t, a.netIoStats, cacheTimeMs, "cache entry should be cleared after reset")
 				assert.NotContains(t, a.netInterfaceDeltaTrackers, cacheTimeMs, "tracker should be cleared on reset")
-				assert.Zero(t, systemStats.NetworkSent)
-				assert.Zero(t, systemStats.NetworkRecv)
 				assert.Zero(t, systemStats.Bandwidth[0])
 				assert.Zero(t, systemStats.Bandwidth[1])
 			} else {
 				// Should have applied stats
-				assert.Equal(t, tt.expectedNetworkSent, systemStats.NetworkSent)
-				assert.Equal(t, tt.expectedNetworkRecv, systemStats.NetworkRecv)
 				assert.Equal(t, tt.expectedBandwidthSent, systemStats.Bandwidth[0])
 				assert.Equal(t, tt.expectedBandwidthRecv, systemStats.Bandwidth[1])

--- a/agent/response.go
+++ b/agent/response.go
@@ -0,0 +1,31 @@
+package agent
+
+import (
+	"github.com/fxamacker/cbor/v2"
+	"github.com/henrygd/beszel/internal/common"
+	"github.com/henrygd/beszel/internal/entities/smart"
+	"github.com/henrygd/beszel/internal/entities/system"
+	"github.com/henrygd/beszel/internal/entities/systemd"
+)
+
+// newAgentResponse creates an AgentResponse using legacy typed fields.
+// This maintains backward compatibility with <= 0.17 hubs that expect specific fields.
+func newAgentResponse(data any, requestID *uint32) common.AgentResponse {
+	response := common.AgentResponse{Id: requestID}
+	switch v := data.(type) {
+	case *system.CombinedData:
+		response.SystemData = v
+	case *common.FingerprintResponse:
+		response.Fingerprint = v
+	case string:
+		response.String = &v
+	case map[string]smart.SmartData:
+		response.SmartData = v
+	case systemd.ServiceDetails:
+		response.ServiceInfo = v
+	default:
+		// For unknown types, use the generic Data field
+		response.Data, _ = cbor.Marshal(data)
+	}
+	return response
+}
--- a/agent/sensors.go
+++ b/agent/sensors.go
@@ -10,6 +10,7 @@ import (
 	"strings"
 	"unicode/utf8"

+	"github.com/henrygd/beszel/agent/utils"
 	"github.com/henrygd/beszel/internal/entities/system"

 	"github.com/shirou/gopsutil/v4/common"
@@ -26,9 +27,9 @@ type SensorConfig struct {
 }

 func (a *Agent) newSensorConfig() *SensorConfig {
-	primarySensor, _ := GetEnv("PRIMARY_SENSOR")
-	sysSensors, _ := GetEnv("SYS_SENSORS")
-	sensorsEnvVal, sensorsSet := GetEnv("SENSORS")
+	primarySensor, _ := utils.GetEnv("PRIMARY_SENSOR")
+	sysSensors, _ := utils.GetEnv("SYS_SENSORS")
+	sensorsEnvVal, sensorsSet := utils.GetEnv("SENSORS")
 	skipCollection := sensorsSet && sensorsEnvVal == ""

 	return a.newSensorConfigWithEnv(primarySensor, sysSensors, sensorsEnvVal, skipCollection)
@@ -135,7 +136,7 @@ func (a *Agent) updateTemperatures(systemStats *system.Stats) {
 		case sensorName:
 			a.systemInfo.DashboardTemp = sensor.Temperature
 		}
-		systemStats.Temperatures[sensorName] = twoDecimals(sensor.Temperature)
+		systemStats.Temperatures[sensorName] = utils.TwoDecimals(sensor.Temperature)
 	}
 }

--- a/agent/sensors_test.go
+++ b/agent/sensors_test.go
@@ -1,12 +1,10 @@
 //go:build testing
-// +build testing

 package agent

 import (
 	"context"
 	"fmt"
-	"os"
 	"testing"

 	"github.com/henrygd/beszel/internal/entities/system"
@@ -330,34 +328,10 @@ func TestNewSensorConfigWithEnv(t *testing.T) {
 }

 func TestNewSensorConfig(t *testing.T) {
-	// Save original environment variables
-	originalPrimary, hasPrimary := os.LookupEnv("BESZEL_AGENT_PRIMARY_SENSOR")
-	originalSys, hasSys := os.LookupEnv("BESZEL_AGENT_SYS_SENSORS")
-	originalSensors, hasSensors := os.LookupEnv("BESZEL_AGENT_SENSORS")
-
-	// Restore environment variables after the test
-	defer func() {
-		// Clean up test environment variables
-		os.Unsetenv("BESZEL_AGENT_PRIMARY_SENSOR")
-		os.Unsetenv("BESZEL_AGENT_SYS_SENSORS")
-		os.Unsetenv("BESZEL_AGENT_SENSORS")
-
-		// Restore original values if they existed
-		if hasPrimary {
-			os.Setenv("BESZEL_AGENT_PRIMARY_SENSOR", originalPrimary)
-		}
-		if hasSys {
-			os.Setenv("BESZEL_AGENT_SYS_SENSORS", originalSys)
-		}
-		if hasSensors {
-			os.Setenv("BESZEL_AGENT_SENSORS", originalSensors)
-		}
-	}()
-
 	// Set test environment variables
-	os.Setenv("BESZEL_AGENT_PRIMARY_SENSOR", "test_primary")
-	os.Setenv("BESZEL_AGENT_SYS_SENSORS", "/test/path")
-	os.Setenv("BESZEL_AGENT_SENSORS", "test_sensor1,test_*,test_sensor3")
+	t.Setenv("BESZEL_AGENT_PRIMARY_SENSOR", "test_primary")
+	t.Setenv("BESZEL_AGENT_SYS_SENSORS", "/test/path")
+	t.Setenv("BESZEL_AGENT_SENSORS", "test_sensor1,test_*,test_sensor3")

 	agent := &Agent{}
 	result := agent.newSensorConfig()
--- a/agent/server.go
+++ b/agent/server.go
@@ -12,10 +12,9 @@ import (
 	"time"

 	"github.com/henrygd/beszel"
+	"github.com/henrygd/beszel/agent/utils"
 	"github.com/henrygd/beszel/internal/common"
-	"github.com/henrygd/beszel/internal/entities/smart"
 	"github.com/henrygd/beszel/internal/entities/system"
-	"github.com/henrygd/beszel/internal/entities/systemd"

 	"github.com/blang/semver"
 	"github.com/fxamacker/cbor/v2"
@@ -38,6 +37,9 @@ var hubVersions map[string]semver.Version
 // and begins listening for connections. Returns an error if the server
 // is already running or if there's an issue starting the server.
 func (a *Agent) StartServer(opts ServerOptions) error {
+	if disableSSH, _ := utils.GetEnv("DISABLE_SSH"); disableSSH == "true" {
+		return errors.New("SSH disabled")
+	}
 	if a.server != nil {
 		return errors.New("server already started")
 	}
@@ -165,20 +167,9 @@ func (a *Agent) handleSSHRequest(w io.Writer, req *common.HubRequest[cbor.RawMes
 	}

 	// responder that writes AgentResponse to stdout
+	// Uses legacy typed fields for backward compatibility with <= 0.17
 	sshResponder := func(data any, requestID *uint32) error {
-		response := common.AgentResponse{Id: requestID}
-		switch v := data.(type) {
-		case *system.CombinedData:
-			response.SystemData = v
-		case string:
-			response.String = &v
-		case map[string]smart.SmartData:
-			response.SmartData = v
-		case systemd.ServiceDetails:
-			response.ServiceInfo = v
-		default:
-			response.Error = fmt.Sprintf("unsupported response type: %T", data)
-		}
+		response := newAgentResponse(data, requestID)
 		return cbor.NewEncoder(w).Encode(response)
 	}

@@ -202,7 +193,7 @@ func (a *Agent) handleSSHRequest(w io.Writer, req *common.HubRequest[cbor.RawMes

 // handleLegacyStats serves the legacy one-shot stats payload for older hubs
 func (a *Agent) handleLegacyStats(w io.Writer, hubVersion semver.Version) error {
-	stats := a.gatherStats(60_000)
+	stats := a.gatherStats(common.DataRequestOptions{CacheTimeMs: 60_000})
 	return a.writeToSession(w, stats, hubVersion)
 }

@@ -248,11 +239,11 @@ func ParseKeys(input string) ([]gossh.PublicKey, error) {
 // and finally defaults to ":45876".
 func GetAddress(addr string) string {
 	if addr == "" {
-		addr, _ = GetEnv("LISTEN")
+		addr, _ = utils.GetEnv("LISTEN")
 	}
 	if addr == "" {
 		// Legacy PORT environment variable support
-		addr, _ = GetEnv("PORT")
+		addr, _ = utils.GetEnv("PORT")
 	}
 	if addr == "" {
 		return ":45876"
@@ -268,7 +259,7 @@ func GetAddress(addr string) string {
 // It checks the NETWORK environment variable first, then infers from
 // the address format: addresses starting with "/" are "unix", others are "tcp".
 func GetNetwork(addr string) string {
-	if network, ok := GetEnv("NETWORK"); ok && network != "" {
+	if network, ok := utils.GetEnv("NETWORK"); ok && network != "" {
 		return network
 	}
 	if strings.HasPrefix(addr, "/") {
--- a/agent/server_test.go
+++ b/agent/server_test.go
@@ -1,3 +1,5 @@
+//go:build testing
+
 package agent

 import (
@@ -180,6 +182,22 @@ func TestStartServer(t *testing.T) {
 	}
 }

+func TestStartServerDisableSSH(t *testing.T) {
+	t.Setenv("BESZEL_AGENT_DISABLE_SSH", "true")
+
+	agent, err := NewAgent("")
+	require.NoError(t, err)
+
+	opts := ServerOptions{
+		Network: "tcp",
+		Addr:    ":45990",
+	}
+
+	err = agent.StartServer(opts)
+	assert.Error(t, err)
+	assert.Contains(t, err.Error(), "SSH disabled")
+}
+
 /////////////////////////////////////////////////////////////////
 //////////////////// ParseKeys Tests ////////////////////////////
 /////////////////////////////////////////////////////////////////
@@ -513,7 +531,7 @@ func TestWriteToSessionEncoding(t *testing.T) {
 				err = json.Unmarshal([]byte(encodedData), &decodedJson)
 				assert.Error(t, err, "Should not be valid JSON data")

-				assert.Equal(t, testData.Info.Hostname, decodedCbor.Info.Hostname)
+				assert.Equal(t, testData.Details.Hostname, decodedCbor.Details.Hostname)
 				assert.Equal(t, testData.Stats.Cpu, decodedCbor.Stats.Cpu)
 			} else {
 				// Should be JSON - try to decode as JSON
@@ -526,7 +544,7 @@ func TestWriteToSessionEncoding(t *testing.T) {
 				assert.Error(t, err, "Should not be valid CBOR data")

 				// Verify the decoded JSON data matches our test data
-				assert.Equal(t, testData.Info.Hostname, decodedJson.Info.Hostname)
+				assert.Equal(t, testData.Details.Hostname, decodedJson.Details.Hostname)
 				assert.Equal(t, testData.Stats.Cpu, decodedJson.Stats.Cpu)

 				// Verify it looks like JSON (starts with '{' and contains readable field names)
@@ -550,13 +568,12 @@ func createTestCombinedData() *system.CombinedData {
 			DiskUsed:  549755813888,  // 512GB
 			DiskPct:   50.0,
 		},
+		Details: &system.Details{
+			Hostname: "test-host",
+		},
 		Info: system.Info{
-			Hostname:     "test-host",
-			Cores:        8,
-			CpuModel:     "Test CPU Model",
 			Uptime:       3600,
 			AgentVersion: "0.12.0",
-			Os:           system.Linux,
 		},
 		Containers: []*container.Stats{
 			{
--- a/agent/smart.go
+++ b/agent/smart.go
@@ -8,17 +8,18 @@ import (
 	"encoding/json"
 	"errors"
 	"fmt"
+	"log/slog"
 	"os"
 	"os/exec"
+	"path/filepath"
 	"runtime"
 	"strconv"
 	"strings"
 	"sync"
 	"time"

+	"github.com/henrygd/beszel/agent/utils"
 	"github.com/henrygd/beszel/internal/entities/smart"
-
-	"golang.org/x/exp/slog"
 )

 // SmartManager manages data collection for SMART devices
@@ -28,7 +29,7 @@ type SmartManager struct {
 	SmartDevices    []*DeviceInfo
 	refreshMutex    sync.Mutex
 	lastScanTime    time.Time
-	binPath         string
+	smartctlPath    string
 	excludedDevices map[string]struct{}
 }

@@ -53,6 +54,12 @@ type DeviceInfo struct {
 	parserType string
 }

+// deviceKey is a composite key for a device, used to identify a device uniquely.
+type deviceKey struct {
+	name       string
+	deviceType string
+}
+
 var errNoValidSmartData = fmt.Errorf("no valid SMART data found") // Error for missing data

 // Refresh updates SMART data for all known devices
@@ -150,7 +157,7 @@ func (sm *SmartManager) ScanDevices(force bool) error {
 	currentDevices := sm.devicesSnapshot()

 	var configuredDevices []*DeviceInfo
-	if configuredRaw, ok := GetEnv("SMART_DEVICES"); ok {
+	if configuredRaw, ok := utils.GetEnv("SMART_DEVICES"); ok {
 		slog.Info("SMART_DEVICES", "value", configuredRaw)
 		config := strings.TrimSpace(configuredRaw)
 		if config == "" {
@@ -164,27 +171,42 @@ func (sm *SmartManager) ScanDevices(force bool) error {
 		configuredDevices = parsedDevices
 	}

-	ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
-	defer cancel()
-
-	cmd := exec.CommandContext(ctx, sm.binPath, "--scan", "-j")
-	output, err := cmd.Output()
-
 	var (
 		scanErr        error
 		scannedDevices []*DeviceInfo
 		hasValidScan   bool
 	)

-	if err != nil {
-		scanErr = err
-	} else {
-		scannedDevices, hasValidScan = sm.parseScan(output)
-		if !hasValidScan {
-			scanErr = errNoValidSmartData
+	if sm.smartctlPath != "" {
+		ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
+		defer cancel()
+
+		cmd := exec.CommandContext(ctx, sm.smartctlPath, "--scan", "-j")
+		output, err := cmd.Output()
+		if err != nil {
+			scanErr = err
+		} else {
+			scannedDevices, hasValidScan = sm.parseScan(output)
+			if !hasValidScan {
+				scanErr = errNoValidSmartData
+			}
 		}
 	}

+	// Add eMMC devices (Linux only) by reading sysfs health fields. This does not
+	// require smartctl and does not scan the whole device.
+	if emmcDevices := scanEmmcDevices(); len(emmcDevices) > 0 {
+		scannedDevices = append(scannedDevices, emmcDevices...)
+		hasValidScan = true
+	}
+
+	// Add Linux mdraid arrays by reading sysfs health fields. This does not
+	// require smartctl and does not scan the whole device.
+	if raidDevices := scanMdraidDevices(); len(raidDevices) > 0 {
+		scannedDevices = append(scannedDevices, raidDevices...)
+		hasValidScan = true
+	}
+
 	finalDevices := mergeDeviceLists(currentDevices, scannedDevices, configuredDevices)
 	finalDevices = sm.filterExcludedDevices(finalDevices)
 	sm.updateSmartDevices(finalDevices)
@@ -201,7 +223,11 @@ func (sm *SmartManager) ScanDevices(force bool) error {
 }

 func (sm *SmartManager) parseConfiguredDevices(config string) ([]*DeviceInfo, error) {
-	entries := strings.Split(config, ",")
+	splitChar, _ := utils.GetEnv("SMART_DEVICES_SEPARATOR")
+	if splitChar == "" {
+		splitChar = ","
+	}
+	entries := strings.Split(config, splitChar)
 	devices := make([]*DeviceInfo, 0, len(entries))
 	for _, entry := range entries {
 		entry = strings.TrimSpace(entry)
@@ -235,7 +261,7 @@ func (sm *SmartManager) parseConfiguredDevices(config string) ([]*DeviceInfo, er
 }

 func (sm *SmartManager) refreshExcludedDevices() {
-	rawValue, _ := GetEnv("EXCLUDE_SMART")
+	rawValue, _ := utils.GetEnv("EXCLUDE_SMART")
 	sm.excludedDevices = make(map[string]struct{})

 	for entry := range strings.SplitSeq(rawValue, ",") {
@@ -325,6 +351,13 @@ func normalizeParserType(value string) string {
 	}
 }

+// makeDeviceKey creates a composite key from device name and type.
+// This allows multiple drives under the same device path (e.g., RAID controllers)
+// to be tracked separately.
+func makeDeviceKey(name, deviceType string) deviceKey {
+	return deviceKey{name: name, deviceType: deviceType}
+}
+
 // parseSmartOutput attempts each SMART parser, optionally detecting the type when
 // it is not provided, and updates the device info when a parser succeeds.
 func (sm *SmartManager) parseSmartOutput(deviceInfo *DeviceInfo, output []byte) bool {
@@ -425,35 +458,81 @@ func (sm *SmartManager) CollectSmart(deviceInfo *DeviceInfo) error {
 		return errNoValidSmartData
 	}

+	// mdraid health is not exposed via SMART; Linux exposes array state in sysfs.
+	if deviceInfo != nil {
+		if ok, err := sm.collectMdraidHealth(deviceInfo); ok {
+			return err
+		}
+	}
+	// eMMC health is not exposed via SMART on Linux, but the kernel provides
+	// wear / EOL indicators via sysfs. Prefer that path when available.
+	if deviceInfo != nil {
+		if ok, err := sm.collectEmmcHealth(deviceInfo); ok {
+			return err
+		}
+	}
+
+	if sm.smartctlPath == "" {
+		return errNoValidSmartData
+	}
+
 	// slog.Info("collecting SMART data", "device", deviceInfo.Name, "type", deviceInfo.Type, "has_existing_data", sm.hasDataForDevice(deviceInfo.Name))

 	// Check if we have any existing data for this device
 	hasExistingData := sm.hasDataForDevice(deviceInfo.Name)

-	ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
+	ctx, cancel := context.WithTimeout(context.Background(), 15*time.Second)
 	defer cancel()

 	// Try with -n standby first if we have existing data
-	args := sm.smartctlArgs(deviceInfo, true)
-	cmd := exec.CommandContext(ctx, sm.binPath, args...)
+	args := sm.smartctlArgs(deviceInfo, hasExistingData)
+	cmd := exec.CommandContext(ctx, sm.smartctlPath, args...)
 	output, err := cmd.CombinedOutput()

 	// Check if device is in standby (exit status 2)
-	if exitErr, ok := err.(*exec.ExitError); ok && exitErr.ExitCode() == 2 {
+	if exitErr, ok := errors.AsType[*exec.ExitError](err); ok && exitErr.ExitCode() == 2 {
 		if hasExistingData {
 			// Device is in standby and we have cached data, keep using cache
 			return nil
 		}
 		// No cached data, need to collect initial data by bypassing standby
-		ctx2, cancel2 := context.WithTimeout(context.Background(), 2*time.Second)
+		ctx2, cancel2 := context.WithTimeout(context.Background(), 15*time.Second)
 		defer cancel2()
 		args = sm.smartctlArgs(deviceInfo, false)
-		cmd = exec.CommandContext(ctx2, sm.binPath, args...)
+		cmd = exec.CommandContext(ctx2, sm.smartctlPath, args...)
 		output, err = cmd.CombinedOutput()
 	}

 	hasValidData := sm.parseSmartOutput(deviceInfo, output)

+	// If NVMe controller path failed, try namespace path as fallback.
+	// NVMe controllers (/dev/nvme0) don't always support SMART queries. See github.com/henrygd/beszel/issues/1504
+	if !hasValidData && err != nil && isNvmeControllerPath(deviceInfo.Name) {
+		controllerPath := deviceInfo.Name
+		namespacePath := controllerPath + "n1"
+		if !sm.isExcludedDevice(namespacePath) {
+			deviceInfo.Name = namespacePath
+
+			ctx3, cancel3 := context.WithTimeout(context.Background(), 15*time.Second)
+			defer cancel3()
+			args = sm.smartctlArgs(deviceInfo, false)
+			cmd = exec.CommandContext(ctx3, sm.smartctlPath, args...)
+			output, err = cmd.CombinedOutput()
+			hasValidData = sm.parseSmartOutput(deviceInfo, output)
+
+			// Auto-exclude the controller path so future scans don't re-add it
+			if hasValidData {
+				sm.Lock()
+				if sm.excludedDevices == nil {
+					sm.excludedDevices = make(map[string]struct{})
+				}
+				sm.excludedDevices[controllerPath] = struct{}{}
+				sm.Unlock()
+				slog.Debug("auto-excluded NVMe controller path", "path", controllerPath)
+			}
+		}
+	}
+
 	if !hasValidData {
 		if err != nil {
 			slog.Info("smartctl failed", "device", deviceInfo.Name, "err", err)
@@ -469,10 +548,12 @@ func (sm *SmartManager) CollectSmart(deviceInfo *DeviceInfo) error {
 // smartctlArgs returns the arguments for the smartctl command
 // based on the device type and whether to include standby mode
 func (sm *SmartManager) smartctlArgs(deviceInfo *DeviceInfo, includeStandby bool) []string {
-	args := make([]string, 0, 7)
+	args := make([]string, 0, 9)
+	var deviceType, parserType string

 	if deviceInfo != nil {
-		deviceType := strings.ToLower(deviceInfo.Type)
+		deviceType = strings.ToLower(deviceInfo.Type)
+		parserType = strings.ToLower(deviceInfo.parserType)
 		// types sometimes misidentified in scan; see github.com/henrygd/beszel/issues/1345
 		if deviceType != "" && deviceType != "scsi" && deviceType != "ata" {
 			args = append(args, "-d", deviceInfo.Type)
@@ -480,6 +561,13 @@ func (sm *SmartManager) smartctlArgs(deviceInfo *DeviceInfo, includeStandby bool
 	}

 	args = append(args, "-a", "--json=c")
+	effectiveType := parserType
+	if effectiveType == "" {
+		effectiveType = deviceType
+	}
+	if effectiveType == "sat" || effectiveType == "ata" {
+		args = append(args, "-l", "devstat")
+	}

 	if includeStandby {
 		args = append(args, "-n", "standby")
@@ -540,6 +628,28 @@ func mergeDeviceLists(existing, scanned, configured []*DeviceInfo) []*DeviceInfo
 		return existing
 	}

+	// buildUniqueNameIndex returns devices that appear exactly once by name.
+	// It is used to safely apply name-only fallbacks without RAID ambiguity.
+	buildUniqueNameIndex := func(devices []*DeviceInfo) map[string]*DeviceInfo {
+		counts := make(map[string]int, len(devices))
+		for _, dev := range devices {
+			if dev == nil || dev.Name == "" {
+				continue
+			}
+			counts[dev.Name]++
+		}
+		unique := make(map[string]*DeviceInfo, len(counts))
+		for _, dev := range devices {
+			if dev == nil || dev.Name == "" {
+				continue
+			}
+			if counts[dev.Name] == 1 {
+				unique[dev.Name] = dev
+			}
+		}
+		return unique
+	}
+
 	// preserveVerifiedType copies the verified type/parser metadata from an existing
 	// device record so that subsequent scans/config updates never downgrade a
 	// previously verified device.
@@ -552,69 +662,90 @@ func mergeDeviceLists(existing, scanned, configured []*DeviceInfo) []*DeviceInfo
 		target.parserType = prev.parserType
 	}

-	existingIndex := make(map[string]*DeviceInfo, len(existing))
+	// applyConfiguredMetadata updates a matched device with any configured
+	// overrides, preserving verified type data when present.
+	applyConfiguredMetadata := func(existingDev, configuredDev *DeviceInfo) {
+		// Only update the type if it has not been verified yet; otherwise we
+		// keep the existing verified metadata intact.
+		if configuredDev.Type != "" && !existingDev.typeVerified {
+			newType := strings.TrimSpace(configuredDev.Type)
+			existingDev.Type = newType
+			existingDev.typeVerified = false
+			existingDev.parserType = normalizeParserType(newType)
+		}
+		if configuredDev.InfoName != "" {
+			existingDev.InfoName = configuredDev.InfoName
+		}
+		if configuredDev.Protocol != "" {
+			existingDev.Protocol = configuredDev.Protocol
+		}
+	}
+
+	existingIndex := make(map[deviceKey]*DeviceInfo, len(existing))
 	for _, dev := range existing {
 		if dev == nil || dev.Name == "" {
 			continue
 		}
-		existingIndex[dev.Name] = dev
+		existingIndex[makeDeviceKey(dev.Name, dev.Type)] = dev
 	}
+	existingByName := buildUniqueNameIndex(existing)

 	finalDevices := make([]*DeviceInfo, 0, len(scanned)+len(configured))
-	deviceIndex := make(map[string]*DeviceInfo, len(scanned)+len(configured))
+	deviceIndex := make(map[deviceKey]*DeviceInfo, len(scanned)+len(configured))

 	// Start with the newly scanned devices so we always surface fresh metadata,
 	// but ensure we retain any previously verified parser assignment.
-	for _, dev := range scanned {
-		if dev == nil || dev.Name == "" {
+	for _, scannedDevice := range scanned {
+		if scannedDevice == nil || scannedDevice.Name == "" {
 			continue
 		}

 		// Work on a copy so we can safely adjust metadata without mutating the
 		// input slices that may be reused elsewhere.
-		copyDev := *dev
-		if prev := existingIndex[copyDev.Name]; prev != nil {
+		copyDev := *scannedDevice
+		key := makeDeviceKey(copyDev.Name, copyDev.Type)
+		if prev := existingIndex[key]; prev != nil {
+			preserveVerifiedType(&copyDev, prev)
+		} else if prev := existingByName[copyDev.Name]; prev != nil {
 			preserveVerifiedType(&copyDev, prev)
 		}

 		finalDevices = append(finalDevices, &copyDev)
-		deviceIndex[copyDev.Name] = finalDevices[len(finalDevices)-1]
+		copyKey := makeDeviceKey(copyDev.Name, copyDev.Type)
+		deviceIndex[copyKey] = finalDevices[len(finalDevices)-1]
 	}
+	deviceIndexByName := buildUniqueNameIndex(finalDevices)

 	// Merge configured devices on top so users can override scan results (except
 	// for verified type information).
-	for _, dev := range configured {
-		if dev == nil || dev.Name == "" {
+	for _, configuredDevice := range configured {
+		if configuredDevice == nil || configuredDevice.Name == "" {
 			continue
 		}

-		if existingDev, ok := deviceIndex[dev.Name]; ok {
-			// Only update the type if it has not been verified yet; otherwise we
-			// keep the existing verified metadata intact.
-			if dev.Type != "" && !existingDev.typeVerified {
-				newType := strings.TrimSpace(dev.Type)
-				existingDev.Type = newType
-				existingDev.typeVerified = false
-				existingDev.parserType = normalizeParserType(newType)
-			}
-			if dev.InfoName != "" {
-				existingDev.InfoName = dev.InfoName
-			}
-			if dev.Protocol != "" {
-				existingDev.Protocol = dev.Protocol
-			}
+		key := makeDeviceKey(configuredDevice.Name, configuredDevice.Type)
+		if existingDev, ok := deviceIndex[key]; ok {
+			applyConfiguredMetadata(existingDev, configuredDevice)
+			continue
+		}
+		if existingDev := deviceIndexByName[configuredDevice.Name]; existingDev != nil {
+			applyConfiguredMetadata(existingDev, configuredDevice)
 			continue
 		}

-		copyDev := *dev
-		if prev := existingIndex[copyDev.Name]; prev != nil {
+		copyDev := *configuredDevice
+		key = makeDeviceKey(copyDev.Name, copyDev.Type)
+		if prev := existingIndex[key]; prev != nil {
+			preserveVerifiedType(&copyDev, prev)
+		} else if prev := existingByName[copyDev.Name]; prev != nil {
 			preserveVerifiedType(&copyDev, prev)
 		} else if copyDev.Type != "" {
 			copyDev.parserType = normalizeParserType(copyDev.Type)
 		}

 		finalDevices = append(finalDevices, &copyDev)
-		deviceIndex[copyDev.Name] = finalDevices[len(finalDevices)-1]
+		copyKey := makeDeviceKey(copyDev.Name, copyDev.Type)
+		deviceIndex[copyKey] = finalDevices[len(finalDevices)-1]
 	}

 	return finalDevices
@@ -632,12 +763,14 @@ func (sm *SmartManager) updateSmartDevices(devices []*DeviceInfo) {
 		return
 	}

-	validNames := make(map[string]struct{}, len(devices))
+	validKeys := make(map[deviceKey]struct{}, len(devices))
+	nameCounts := make(map[string]int, len(devices))
 	for _, device := range devices {
 		if device == nil || device.Name == "" {
 			continue
 		}
-		validNames[device.Name] = struct{}{}
+		validKeys[makeDeviceKey(device.Name, device.Type)] = struct{}{}
+		nameCounts[device.Name]++
 	}

 	for key, data := range sm.SmartDataMap {
@@ -646,7 +779,11 @@ func (sm *SmartManager) updateSmartDevices(devices []*DeviceInfo) {
 			continue
 		}

-		if _, ok := validNames[data.DiskName]; ok {
+		if data.DiskType == "" {
+			if nameCounts[data.DiskName] == 1 {
+				continue
+			}
+		} else if _, ok := validKeys[makeDeviceKey(data.DiskName, data.DiskType)]; ok {
 			continue
 		}

@@ -738,6 +875,14 @@ func (sm *SmartManager) parseSmartForSata(output []byte) (bool, int) {
 	smartData.DiskName = data.Device.Name
 	smartData.DiskType = data.Device.Type

+	// get values from ata_device_statistics if necessary
+	var ataDeviceStats smart.AtaDeviceStatistics
+	if smartData.Temperature == 0 {
+		if temp := findAtaDeviceStatisticsValue(&data, &ataDeviceStats, 5, "Current Temperature", 0, 255); temp != nil {
+			smartData.Temperature = uint8(*temp)
+		}
+	}
+
 	// update SmartAttributes
 	smartData.Attributes = make([]*smart.SmartAttribute, 0, len(data.AtaSmartAttributes.Table))
 	for _, attr := range data.AtaSmartAttributes.Table {
@@ -772,6 +917,36 @@ func getSmartStatus(temperature uint8, passed bool) string {
 	}
 }

+// findAtaDeviceStatisticsEntry centralizes ATA devstat lookups so additional
+// metrics can be pulled from the same structure in the future.
+func findAtaDeviceStatisticsValue(data *smart.SmartInfoForSata, ataDeviceStats *smart.AtaDeviceStatistics, entryNumber uint8, entryName string, minValue, maxValue int64) *int64 {
+	if len(ataDeviceStats.Pages) == 0 {
+		if len(data.AtaDeviceStatistics) == 0 {
+			return nil
+		}
+		if err := json.Unmarshal(data.AtaDeviceStatistics, ataDeviceStats); err != nil {
+			return nil
+		}
+	}
+	for pageIdx := range ataDeviceStats.Pages {
+		page := &ataDeviceStats.Pages[pageIdx]
+		if page.Number != entryNumber {
+			continue
+		}
+		for entryIdx := range page.Table {
+			entry := &page.Table[entryIdx]
+			if !strings.EqualFold(entry.Name, entryName) {
+				continue
+			}
+			if entry.Value == nil || *entry.Value < minValue || *entry.Value > maxValue {
+				return nil
+			}
+			return entry.Value
+		}
+	}
+	return nil
+}
+
 func (sm *SmartManager) parseSmartForScsi(output []byte) (bool, int) {
 	var data smart.SmartInfoForScsi

@@ -957,6 +1132,27 @@ func (sm *SmartManager) detectSmartctl() (string, error) {
 	return "", errors.New("smartctl not found")
 }

+// isNvmeControllerPath checks if the path matches an NVMe controller pattern
+// like /dev/nvme0, /dev/nvme1, etc. (without namespace suffix like n1)
+func isNvmeControllerPath(path string) bool {
+	base := filepath.Base(path)
+	if !strings.HasPrefix(base, "nvme") {
+		return false
+	}
+	suffix := strings.TrimPrefix(base, "nvme")
+	if suffix == "" {
+		return false
+	}
+	// Controller paths are just "nvme" + digits (e.g., nvme0, nvme1)
+	// Namespace paths have "n" after the controller number (e.g., nvme0n1)
+	for _, c := range suffix {
+		if c < '0' || c > '9' {
+			return false
+		}
+	}
+	return true
+}
+
 // NewSmartManager creates and initializes a new SmartManager
 func NewSmartManager() (*SmartManager, error) {
 	sm := &SmartManager{
@@ -964,11 +1160,17 @@ func NewSmartManager() (*SmartManager, error) {
 	}
 	sm.refreshExcludedDevices()
 	path, err := sm.detectSmartctl()
+	slog.Debug("smartctl", "path", path, "err", err)
 	if err != nil {
-		slog.Debug(err.Error())
+		// Keep the previous fail-fast behavior unless this Linux host exposes
+		// eMMC or mdraid health via sysfs, in which case smartctl is optional.
+		if runtime.GOOS == "linux" {
+			if len(scanEmmcDevices()) > 0 || len(scanMdraidDevices()) > 0 {
+				return sm, nil
+			}
+		}
 		return nil, err
 	}
-	slog.Debug("smartctl", "path", path)
-	sm.binPath = path
+	sm.smartctlPath = path
 	return sm, nil
 }
--- a/agent/smart_test.go
+++ b/agent/smart_test.go
@@ -1,5 +1,4 @@
 //go:build testing
-// +build testing

 package agent

@@ -89,6 +88,111 @@ func TestParseSmartForSata(t *testing.T) {
 	}
 }

+func TestParseSmartForSataDeviceStatisticsTemperature(t *testing.T) {
+	jsonPayload := []byte(`{
+		"smartctl": {"exit_status": 0},
+		"device": {"name": "/dev/sdb", "type": "sat"},
+		"model_name": "SanDisk SSD U110 16GB",
+		"serial_number": "DEVSTAT123",
+		"firmware_version": "U21B001",
+		"user_capacity": {"bytes": 16013942784},
+		"smart_status": {"passed": true},
+		"ata_smart_attributes": {"table": []},
+		"ata_device_statistics": {
+			"pages": [
+				{
+					"number": 5,
+					"name": "Temperature Statistics",
+					"table": [
+						{"name": "Current Temperature", "value": 22, "flags": {"valid": true}}
+					]
+				}
+			]
+		}
+	}`)
+
+	sm := &SmartManager{SmartDataMap: make(map[string]*smart.SmartData)}
+	hasData, exitStatus := sm.parseSmartForSata(jsonPayload)
+	require.True(t, hasData)
+	assert.Equal(t, 0, exitStatus)
+
+	deviceData, ok := sm.SmartDataMap["DEVSTAT123"]
+	require.True(t, ok, "expected smart data entry for serial DEVSTAT123")
+	assert.Equal(t, uint8(22), deviceData.Temperature)
+}
+
+func TestParseSmartForSataAtaDeviceStatistics(t *testing.T) {
+	// tests that ata_device_statistics values are parsed correctly
+	jsonPayload := []byte(`{
+		"smartctl": {"exit_status": 0},
+		"device": {"name": "/dev/sdb", "type": "sat"},
+		"model_name": "SanDisk SSD U110 16GB",
+		"serial_number": "lksjfh23lhj",
+		"firmware_version": "U21B001",
+		"user_capacity": {"bytes": 16013942784},
+		"smart_status": {"passed": true},
+		"ata_smart_attributes": {"table": []},
+		"ata_device_statistics": {
+			"pages": [
+				{
+					"number": 5,
+					"name": "Temperature Statistics",
+					"table": [
+						{"name": "Current Temperature", "value": 43, "flags": {"valid": true}},
+						{"name": "Specified Minimum Operating Temperature", "value": -20, "flags": {"valid": true}}
+					]
+				}
+			]
+		}
+	}`)
+
+	sm := &SmartManager{SmartDataMap: make(map[string]*smart.SmartData)}
+	hasData, exitStatus := sm.parseSmartForSata(jsonPayload)
+	require.True(t, hasData)
+	assert.Equal(t, 0, exitStatus)
+
+	deviceData, ok := sm.SmartDataMap["lksjfh23lhj"]
+	require.True(t, ok, "expected smart data entry for serial lksjfh23lhj")
+	assert.Equal(t, uint8(43), deviceData.Temperature)
+}
+
+func TestParseSmartForSataNegativeDeviceStatistics(t *testing.T) {
+	// Tests that negative values in ata_device_statistics (e.g. min operating temp)
+	// do not cause the entire SAT parser to fail.
+	jsonPayload := []byte(`{
+		"smartctl": {"exit_status": 0},
+		"device": {"name": "/dev/sdb", "type": "sat"},
+		"model_name": "SanDisk SSD U110 16GB",
+		"serial_number": "NEGATIVE123",
+		"firmware_version": "U21B001",
+		"user_capacity": {"bytes": 16013942784},
+		"smart_status": {"passed": true},
+		"temperature": {"current": 38},
+		"ata_smart_attributes": {"table": []},
+		"ata_device_statistics": {
+			"pages": [
+				{
+					"number": 5,
+					"name": "Temperature Statistics",
+					"table": [
+						{"name": "Current Temperature", "value": 38, "flags": {"valid": true}},
+						{"name": "Specified Minimum Operating Temperature", "value": -20, "flags": {"valid": true}}
+					]
+				}
+			]
+		}
+	}`)
+
+	sm := &SmartManager{SmartDataMap: make(map[string]*smart.SmartData)}
+	hasData, exitStatus := sm.parseSmartForSata(jsonPayload)
+	require.True(t, hasData)
+	assert.Equal(t, 0, exitStatus)
+
+	deviceData, ok := sm.SmartDataMap["NEGATIVE123"]
+	require.True(t, ok, "expected smart data entry for serial NEGATIVE123")
+	assert.Equal(t, uint8(38), deviceData.Temperature)
+}
+
 func TestParseSmartForSataParentheticalRawValue(t *testing.T) {
 	jsonPayload := []byte(`{
 		"smartctl": {"exit_status": 0},
@@ -195,6 +299,24 @@ func TestDevicesSnapshotReturnsCopy(t *testing.T) {
 	assert.Len(t, snapshot, 2)
 }

+func TestScanDevicesWithEnvOverrideAndSeparator(t *testing.T) {
+	t.Setenv("SMART_DEVICES_SEPARATOR", "|")
+	t.Setenv("SMART_DEVICES", "/dev/sda:jmb39x-q,0|/dev/nvme0:nvme")
+
+	sm := &SmartManager{
+		SmartDataMap: make(map[string]*smart.SmartData),
+	}
+
+	err := sm.ScanDevices(true)
+	require.NoError(t, err)
+
+	require.Len(t, sm.SmartDevices, 2)
+	assert.Equal(t, "/dev/sda", sm.SmartDevices[0].Name)
+	assert.Equal(t, "jmb39x-q,0", sm.SmartDevices[0].Type)
+	assert.Equal(t, "/dev/nvme0", sm.SmartDevices[1].Name)
+	assert.Equal(t, "nvme", sm.SmartDevices[1].Type)
+}
+
 func TestScanDevicesWithEnvOverride(t *testing.T) {
 	t.Setenv("SMART_DEVICES", "/dev/sda:sat, /dev/nvme0:nvme")

@@ -249,15 +371,21 @@ func TestSmartctlArgs(t *testing.T) {

 	sataDevice := &DeviceInfo{Name: "/dev/sda", Type: "sat"}
 	assert.Equal(t,
-		[]string{"-d", "sat", "-a", "--json=c", "-n", "standby", "/dev/sda"},
+		[]string{"-d", "sat", "-a", "--json=c", "-l", "devstat", "-n", "standby", "/dev/sda"},
 		sm.smartctlArgs(sataDevice, true),
 	)

 	assert.Equal(t,
-		[]string{"-d", "sat", "-a", "--json=c", "/dev/sda"},
+		[]string{"-d", "sat", "-a", "--json=c", "-l", "devstat", "/dev/sda"},
 		sm.smartctlArgs(sataDevice, false),
 	)

+	nvmeDevice := &DeviceInfo{Name: "/dev/nvme0", Type: "nvme"}
+	assert.Equal(t,
+		[]string{"-d", "nvme", "-a", "--json=c", "-n", "standby", "/dev/nvme0"},
+		sm.smartctlArgs(nvmeDevice, true),
+	)
+
 	assert.Equal(t,
 		[]string{"-a", "--json=c", "-n", "standby"},
 		sm.smartctlArgs(nil, true),
@@ -442,6 +570,88 @@ func TestMergeDeviceListsUpdatesTypeWhenUnverified(t *testing.T) {
 	assert.Equal(t, "", device.parserType)
 }

+func TestMergeDeviceListsHandlesDevicesWithSameNameAndDifferentTypes(t *testing.T) {
+	// There are use cases where the same device name is re-used,
+	// for example, a RAID controller with multiple drives.
+	scanned := []*DeviceInfo{
+		{Name: "/dev/sda", Type: "megaraid,0"},
+		{Name: "/dev/sda", Type: "megaraid,1"},
+		{Name: "/dev/sda", Type: "megaraid,2"},
+	}
+
+	merged := mergeDeviceLists(nil, scanned, nil)
+	require.Len(t, merged, 3, "should have 3 separate devices for RAID controller")
+
+	byKey := make(map[string]*DeviceInfo, len(merged))
+	for _, dev := range merged {
+		key := dev.Name + "|" + dev.Type
+		byKey[key] = dev
+	}
+
+	assert.Contains(t, byKey, "/dev/sda|megaraid,0")
+	assert.Contains(t, byKey, "/dev/sda|megaraid,1")
+	assert.Contains(t, byKey, "/dev/sda|megaraid,2")
+}
+
+func TestMergeDeviceListsHandlesMixedRAIDAndRegular(t *testing.T) {
+	// Test mixing RAID drives with regular devices
+	scanned := []*DeviceInfo{
+		{Name: "/dev/sda", Type: "megaraid,0"},
+		{Name: "/dev/sda", Type: "megaraid,1"},
+		{Name: "/dev/sdb", Type: "sat"},
+		{Name: "/dev/nvme0", Type: "nvme"},
+	}
+
+	merged := mergeDeviceLists(nil, scanned, nil)
+	require.Len(t, merged, 4, "should have 4 separate devices")
+
+	byKey := make(map[string]*DeviceInfo, len(merged))
+	for _, dev := range merged {
+		key := dev.Name + "|" + dev.Type
+		byKey[key] = dev
+	}
+
+	assert.Contains(t, byKey, "/dev/sda|megaraid,0")
+	assert.Contains(t, byKey, "/dev/sda|megaraid,1")
+	assert.Contains(t, byKey, "/dev/sdb|sat")
+	assert.Contains(t, byKey, "/dev/nvme0|nvme")
+}
+
+func TestUpdateSmartDevicesPreservesRAIDDrives(t *testing.T) {
+	// Test that updateSmartDevices correctly validates RAID drives using composite keys
+	sm := &SmartManager{
+		SmartDevices: []*DeviceInfo{
+			{Name: "/dev/sda", Type: "megaraid,0"},
+			{Name: "/dev/sda", Type: "megaraid,1"},
+		},
+		SmartDataMap: map[string]*smart.SmartData{
+			"serial-0": {
+				DiskName:     "/dev/sda",
+				DiskType:     "megaraid,0",
+				SerialNumber: "serial-0",
+			},
+			"serial-1": {
+				DiskName:     "/dev/sda",
+				DiskType:     "megaraid,1",
+				SerialNumber: "serial-1",
+			},
+			"serial-stale": {
+				DiskName:     "/dev/sda",
+				DiskType:     "megaraid,2",
+				SerialNumber: "serial-stale",
+			},
+		},
+	}
+
+	sm.updateSmartDevices(sm.SmartDevices)
+
+	// serial-0 and serial-1 should be preserved (matching devices exist)
+	assert.Contains(t, sm.SmartDataMap, "serial-0")
+	assert.Contains(t, sm.SmartDataMap, "serial-1")
+	// serial-stale should be removed (no matching device)
+	assert.NotContains(t, sm.SmartDataMap, "serial-stale")
+}
+
 func TestParseSmartOutputMarksVerified(t *testing.T) {
 	fixturePath := filepath.Join("test-data", "smart", "nvme0.json")
 	data, err := os.ReadFile(fixturePath)
@@ -589,6 +799,182 @@ func TestIsVirtualDeviceScsi(t *testing.T) {
 	}
 }

+func TestFindAtaDeviceStatisticsValue(t *testing.T) {
+	val42 := int64(42)
+	val100 := int64(100)
+	valMinus20 := int64(-20)
+
+	tests := []struct {
+		name           string
+		data           smart.SmartInfoForSata
+		ataDeviceStats smart.AtaDeviceStatistics
+		entryNumber    uint8
+		entryName      string
+		minValue       int64
+		maxValue       int64
+		expectedValue  *int64
+	}{
+		{
+			name: "value in ataDeviceStats",
+			ataDeviceStats: smart.AtaDeviceStatistics{
+				Pages: []smart.AtaDeviceStatisticsPage{
+					{
+						Number: 5,
+						Table: []smart.AtaDeviceStatisticsEntry{
+							{Name: "Current Temperature", Value: &val42},
+						},
+					},
+				},
+			},
+			entryNumber:   5,
+			entryName:     "Current Temperature",
+			minValue:      0,
+			maxValue:      100,
+			expectedValue: &val42,
+		},
+		{
+			name: "value unmarshaled from data",
+			data: smart.SmartInfoForSata{
+				AtaDeviceStatistics: []byte(`{"pages":[{"number":5,"table":[{"name":"Current Temperature","value":100}]}]}`),
+			},
+			entryNumber:   5,
+			entryName:     "Current Temperature",
+			minValue:      0,
+			maxValue:      255,
+			expectedValue: &val100,
+		},
+		{
+			name: "value out of range (too high)",
+			ataDeviceStats: smart.AtaDeviceStatistics{
+				Pages: []smart.AtaDeviceStatisticsPage{
+					{
+						Number: 5,
+						Table: []smart.AtaDeviceStatisticsEntry{
+							{Name: "Current Temperature", Value: &val100},
+						},
+					},
+				},
+			},
+			entryNumber:   5,
+			entryName:     "Current Temperature",
+			minValue:      0,
+			maxValue:      50,
+			expectedValue: nil,
+		},
+		{
+			name: "value out of range (too low)",
+			ataDeviceStats: smart.AtaDeviceStatistics{
+				Pages: []smart.AtaDeviceStatisticsPage{
+					{
+						Number: 5,
+						Table: []smart.AtaDeviceStatisticsEntry{
+							{Name: "Min Temp", Value: &valMinus20},
+						},
+					},
+				},
+			},
+			entryNumber:   5,
+			entryName:     "Min Temp",
+			minValue:      0,
+			maxValue:      100,
+			expectedValue: nil,
+		},
+		{
+			name:          "no statistics available",
+			data:          smart.SmartInfoForSata{},
+			entryNumber:   5,
+			entryName:     "Current Temperature",
+			minValue:      0,
+			maxValue:      255,
+			expectedValue: nil,
+		},
+		{
+			name: "wrong page number",
+			ataDeviceStats: smart.AtaDeviceStatistics{
+				Pages: []smart.AtaDeviceStatisticsPage{
+					{
+						Number: 1,
+						Table: []smart.AtaDeviceStatisticsEntry{
+							{Name: "Current Temperature", Value: &val42},
+						},
+					},
+				},
+			},
+			entryNumber:   5,
+			entryName:     "Current Temperature",
+			minValue:      0,
+			maxValue:      100,
+			expectedValue: nil,
+		},
+		{
+			name: "wrong entry name",
+			ataDeviceStats: smart.AtaDeviceStatistics{
+				Pages: []smart.AtaDeviceStatisticsPage{
+					{
+						Number: 5,
+						Table: []smart.AtaDeviceStatisticsEntry{
+							{Name: "Other Stat", Value: &val42},
+						},
+					},
+				},
+			},
+			entryNumber:   5,
+			entryName:     "Current Temperature",
+			minValue:      0,
+			maxValue:      100,
+			expectedValue: nil,
+		},
+		{
+			name: "case insensitive name match",
+			ataDeviceStats: smart.AtaDeviceStatistics{
+				Pages: []smart.AtaDeviceStatisticsPage{
+					{
+						Number: 5,
+						Table: []smart.AtaDeviceStatisticsEntry{
+							{Name: "CURRENT TEMPERATURE", Value: &val42},
+						},
+					},
+				},
+			},
+			entryNumber:   5,
+			entryName:     "Current Temperature",
+			minValue:      0,
+			maxValue:      100,
+			expectedValue: &val42,
+		},
+		{
+			name: "entry value is nil",
+			ataDeviceStats: smart.AtaDeviceStatistics{
+				Pages: []smart.AtaDeviceStatisticsPage{
+					{
+						Number: 5,
+						Table: []smart.AtaDeviceStatisticsEntry{
+							{Name: "Current Temperature", Value: nil},
+						},
+					},
+				},
+			},
+			entryNumber:   5,
+			entryName:     "Current Temperature",
+			minValue:      0,
+			maxValue:      100,
+			expectedValue: nil,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			result := findAtaDeviceStatisticsValue(&tt.data, &tt.ataDeviceStats, tt.entryNumber, tt.entryName, tt.minValue, tt.maxValue)
+			if tt.expectedValue == nil {
+				assert.Nil(t, result)
+			} else {
+				require.NotNil(t, result)
+				assert.Equal(t, *tt.expectedValue, *result)
+			}
+		})
+	}
+}
+
 func TestRefreshExcludedDevices(t *testing.T) {
 	tests := []struct {
 		name         string
@@ -649,7 +1035,7 @@ func TestRefreshExcludedDevices(t *testing.T) {
 				t.Setenv("EXCLUDE_SMART", tt.envValue)
 			} else {
 				// Ensure env var is not set for empty test
-				os.Unsetenv("EXCLUDE_SMART")
+				t.Setenv("EXCLUDE_SMART", "")
 			}

 			sm := &SmartManager{}
@@ -780,3 +1166,36 @@ func TestFilterExcludedDevices(t *testing.T) {
 		})
 	}
 }
+
+func TestIsNvmeControllerPath(t *testing.T) {
+	tests := []struct {
+		path     string
+		expected bool
+	}{
+		// Controller paths (should return true)
+		{"/dev/nvme0", true},
+		{"/dev/nvme1", true},
+		{"/dev/nvme10", true},
+		{"nvme0", true},
+
+		// Namespace paths (should return false)
+		{"/dev/nvme0n1", false},
+		{"/dev/nvme1n1", false},
+		{"/dev/nvme0n1p1", false},
+		{"nvme0n1", false},
+
+		// Non-NVMe paths (should return false)
+		{"/dev/sda", false},
+		{"/dev/sda1", false},
+		{"/dev/hda", false},
+		{"", false},
+		{"/dev/nvme", false},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.path, func(t *testing.T) {
+			result := isNvmeControllerPath(tt.path)
+			assert.Equal(t, tt.expected, result, "path: %s", tt.path)
+		})
+	}
+}
--- a/agent/system.go
+++ b/agent/system.go
@@ -2,15 +2,19 @@ package agent

 import (
 	"bufio"
+	"errors"
 	"fmt"
 	"log/slog"
 	"os"
-	"strconv"
+	"runtime"
 	"strings"
 	"time"

 	"github.com/henrygd/beszel"
 	"github.com/henrygd/beszel/agent/battery"
+	"github.com/henrygd/beszel/agent/utils"
+	"github.com/henrygd/beszel/agent/zfs"
+	"github.com/henrygd/beszel/internal/entities/container"
 	"github.com/henrygd/beszel/internal/entities/system"

 	"github.com/shirou/gopsutil/v4/cpu"
@@ -27,46 +31,84 @@ type prevDisk struct {
 }

 // Sets initial / non-changing values about the host system
-func (a *Agent) initializeSystemInfo() {
+func (a *Agent) refreshSystemDetails() {
 	a.systemInfo.AgentVersion = beszel.Version
-	a.systemInfo.Hostname, _ = os.Hostname()
+
+	// get host info from Docker if available
+	var hostInfo container.HostInfo
+
+	if a.dockerManager != nil {
+		a.systemDetails.Podman = a.dockerManager.IsPodman()
+		hostInfo, _ = a.dockerManager.GetHostInfo()
+	}
+
+	a.systemDetails.Hostname, _ = os.Hostname()
+	if arch, err := host.KernelArch(); err == nil {
+		a.systemDetails.Arch = arch
+	} else {
+		a.systemDetails.Arch = runtime.GOARCH
+	}

 	platform, _, version, _ := host.PlatformInformation()

 	if platform == "darwin" {
-		a.systemInfo.KernelVersion = version
-		a.systemInfo.Os = system.Darwin
+		a.systemDetails.Os = system.Darwin
+		a.systemDetails.OsName = fmt.Sprintf("macOS %s", version)
 	} else if strings.Contains(platform, "indows") {
-		a.systemInfo.KernelVersion = fmt.Sprintf("%s %s", strings.Replace(platform, "Microsoft ", "", 1), version)
-		a.systemInfo.Os = system.Windows
+		a.systemDetails.Os = system.Windows
+		a.systemDetails.OsName = strings.Replace(platform, "Microsoft ", "", 1)
+		a.systemDetails.Kernel = version
 	} else if platform == "freebsd" {
-		a.systemInfo.Os = system.Freebsd
-		a.systemInfo.KernelVersion = version
+		a.systemDetails.Os = system.Freebsd
+		a.systemDetails.Kernel, _ = host.KernelVersion()
+		if prettyName, err := getOsPrettyName(); err == nil {
+			a.systemDetails.OsName = prettyName
+		} else {
+			a.systemDetails.OsName = "FreeBSD"
+		}
 	} else {
-		a.systemInfo.Os = system.Linux
-	}
-
-	if a.systemInfo.KernelVersion == "" {
-		a.systemInfo.KernelVersion, _ = host.KernelVersion()
+		a.systemDetails.Os = system.Linux
+		a.systemDetails.OsName = hostInfo.OperatingSystem
+		if a.systemDetails.OsName == "" {
+			if prettyName, err := getOsPrettyName(); err == nil {
+				a.systemDetails.OsName = prettyName
+			} else {
+				a.systemDetails.OsName = platform
+			}
+		}
+		a.systemDetails.Kernel = hostInfo.KernelVersion
+		if a.systemDetails.Kernel == "" {
+			a.systemDetails.Kernel, _ = host.KernelVersion()
+		}
 	}

 	// cpu model
 	if info, err := cpu.Info(); err == nil && len(info) > 0 {
-		a.systemInfo.CpuModel = info[0].ModelName
+		a.systemDetails.CpuModel = info[0].ModelName
 	}
 	// cores / threads
-	a.systemInfo.Cores, _ = cpu.Counts(false)
-	if threads, err := cpu.Counts(true); err == nil {
-		if threads > 0 && threads < a.systemInfo.Cores {
-			// in lxc logical cores reflects container limits, so use that as cores if lower
-			a.systemInfo.Cores = threads
-		} else {
-			a.systemInfo.Threads = threads
+	cores, _ := cpu.Counts(false)
+	threads := hostInfo.NCPU
+	if threads == 0 {
+		threads, _ = cpu.Counts(true)
+	}
+	// in lxc, logical cores reflects container limits, so use that as cores if lower
+	if threads > 0 && threads < cores {
+		cores = threads
+	}
+	a.systemDetails.Cores = cores
+	a.systemDetails.Threads = threads
+
+	// total memory
+	a.systemDetails.MemoryTotal = hostInfo.MemTotal
+	if a.systemDetails.MemoryTotal == 0 {
+		if v, err := mem.VirtualMemory(); err == nil {
+			a.systemDetails.MemoryTotal = v.Total
 		}
 	}

 	// zfs
-	if _, err := getARCSize(); err != nil {
+	if _, err := zfs.ARCSize(); err != nil {
 		slog.Debug("Not monitoring ZFS ARC", "err", err)
 	} else {
 		a.zfs = true
@@ -86,13 +128,13 @@ func (a *Agent) getSystemStats(cacheTimeMs uint16) system.Stats {
 	// cpu metrics
 	cpuMetrics, err := getCpuMetrics(cacheTimeMs)
 	if err == nil {
-		systemStats.Cpu = twoDecimals(cpuMetrics.Total)
+		systemStats.Cpu = utils.TwoDecimals(cpuMetrics.Total)
 		systemStats.CpuBreakdown = []float64{
-			twoDecimals(cpuMetrics.User),
-			twoDecimals(cpuMetrics.System),
-			twoDecimals(cpuMetrics.Iowait),
-			twoDecimals(cpuMetrics.Steal),
-			twoDecimals(cpuMetrics.Idle),
+			utils.TwoDecimals(cpuMetrics.User),
+			utils.TwoDecimals(cpuMetrics.System),
+			utils.TwoDecimals(cpuMetrics.Iowait),
+			utils.TwoDecimals(cpuMetrics.Steal),
+			utils.TwoDecimals(cpuMetrics.Idle),
 		}
 	} else {
 		slog.Error("Error getting cpu metrics", "err", err)
@@ -116,8 +158,8 @@ func (a *Agent) getSystemStats(cacheTimeMs uint16) system.Stats {
 	// memory
 	if v, err := mem.VirtualMemory(); err == nil {
 		// swap
-		systemStats.Swap = bytesToGigabytes(v.SwapTotal)
-		systemStats.SwapUsed = bytesToGigabytes(v.SwapTotal - v.SwapFree - v.SwapCached)
+		systemStats.Swap = utils.BytesToGigabytes(v.SwapTotal)
+		systemStats.SwapUsed = utils.BytesToGigabytes(v.SwapTotal - v.SwapFree - v.SwapCached)
 		// cache + buffers value for default mem calculation
 		// note: gopsutil automatically adds SReclaimable to v.Cached
 		cacheBuff := v.Cached + v.Buffers - v.Shared
@@ -137,16 +179,16 @@ func (a *Agent) getSystemStats(cacheTimeMs uint16) system.Stats {
 		// }
 		// subtract ZFS ARC size from used memory and add as its own category
 		if a.zfs {
-			if arcSize, _ := getARCSize(); arcSize > 0 && arcSize < v.Used {
+			if arcSize, _ := zfs.ARCSize(); arcSize > 0 && arcSize < v.Used {
 				v.Used = v.Used - arcSize
 				v.UsedPercent = float64(v.Used) / float64(v.Total) * 100.0
-				systemStats.MemZfsArc = bytesToGigabytes(arcSize)
+				systemStats.MemZfsArc = utils.BytesToGigabytes(arcSize)
 			}
 		}
-		systemStats.Mem = bytesToGigabytes(v.Total)
-		systemStats.MemBuffCache = bytesToGigabytes(cacheBuff)
-		systemStats.MemUsed = bytesToGigabytes(v.Used)
-		systemStats.MemPct = twoDecimals(v.UsedPercent)
+		systemStats.Mem = utils.BytesToGigabytes(v.Total)
+		systemStats.MemBuffCache = utils.BytesToGigabytes(cacheBuff)
+		systemStats.MemUsed = utils.BytesToGigabytes(v.Used)
+		systemStats.MemPct = utils.TwoDecimals(v.UsedPercent)
 	}

 	// disk usage
@@ -195,47 +237,37 @@ func (a *Agent) getSystemStats(cacheTimeMs uint16) system.Stats {
 		}
 	}

-	// update base system info
+	// update system info
 	a.systemInfo.ConnectionType = a.connectionManager.ConnectionType
 	a.systemInfo.Cpu = systemStats.Cpu
 	a.systemInfo.LoadAvg = systemStats.LoadAvg
-	// TODO: remove these in future release in favor of load avg array
-	a.systemInfo.LoadAvg1 = systemStats.LoadAvg[0]
-	a.systemInfo.LoadAvg5 = systemStats.LoadAvg[1]
-	a.systemInfo.LoadAvg15 = systemStats.LoadAvg[2]
 	a.systemInfo.MemPct = systemStats.MemPct
 	a.systemInfo.DiskPct = systemStats.DiskPct
+	a.systemInfo.Battery = systemStats.Battery
 	a.systemInfo.Uptime, _ = host.Uptime()
-	// TODO: in future release, remove MB bandwidth values in favor of bytes
-	a.systemInfo.Bandwidth = twoDecimals(systemStats.NetworkSent + systemStats.NetworkRecv)
 	a.systemInfo.BandwidthBytes = systemStats.Bandwidth[0] + systemStats.Bandwidth[1]
-	slog.Debug("sysinfo", "data", a.systemInfo)
+	a.systemInfo.Threads = a.systemDetails.Threads

 	return systemStats
 }

-// Returns the size of the ZFS ARC memory cache in bytes
-func getARCSize() (uint64, error) {
-	file, err := os.Open("/proc/spl/kstat/zfs/arcstats")
+// getOsPrettyName attempts to get the pretty OS name from /etc/os-release on Linux systems
+func getOsPrettyName() (string, error) {
+	file, err := os.Open("/etc/os-release")
 	if err != nil {
-		return 0, err
+		return "", err
 	}
 	defer file.Close()

-	// Scan the lines
 	scanner := bufio.NewScanner(file)
 	for scanner.Scan() {
 		line := scanner.Text()
-		if strings.HasPrefix(line, "size") {
-			// Example line: size 4 15032385536
-			fields := strings.Fields(line)
-			if len(fields) < 3 {
-				return 0, err
-			}
-			// Return the size as uint64
-			return strconv.ParseUint(fields[2], 10, 64)
+		if after, ok := strings.CutPrefix(line, "PRETTY_NAME="); ok {
+			value := after
+			value = strings.Trim(value, `"`)
+			return value, nil
 		}
 	}

-	return 0, fmt.Errorf("failed to parse size field")
+	return "", errors.New("pretty name not found")
 }
--- a/agent/systemd.go
+++ b/agent/systemd.go
@@ -8,18 +8,18 @@ import (
 	"log/slog"
 	"maps"
 	"math"
+	"os"
 	"strconv"
 	"strings"
 	"sync"
 	"time"

 	"github.com/coreos/go-systemd/v22/dbus"
+	"github.com/henrygd/beszel/agent/utils"
 	"github.com/henrygd/beszel/internal/entities/systemd"
 )

-var (
-	errNoActiveTime = errors.New("no active time")
-)
+var errNoActiveTime = errors.New("no active time")

 // systemdManager manages the collection of systemd service statistics.
 type systemdManager struct {
@@ -30,11 +30,39 @@ type systemdManager struct {
 	patterns        []string
 }

+// isSystemdAvailable checks if systemd is used on the system to avoid unnecessary connection attempts (#1548)
+func isSystemdAvailable() bool {
+	paths := []string{
+		"/run/systemd/system",
+		"/run/dbus/system_bus_socket",
+		"/var/run/dbus/system_bus_socket",
+	}
+	for _, path := range paths {
+		if _, err := os.Stat(path); err == nil {
+			return true
+		}
+	}
+	if data, err := os.ReadFile("/proc/1/comm"); err == nil {
+		return strings.TrimSpace(string(data)) == "systemd"
+	}
+	return false
+}
+
 // newSystemdManager creates a new systemdManager.
 func newSystemdManager() (*systemdManager, error) {
+	if skipSystemd, _ := utils.GetEnv("SKIP_SYSTEMD"); skipSystemd == "true" {
+		return nil, nil
+	}
+
+	// Check if systemd is available on the system before attempting connection
+	if !isSystemdAvailable() {
+		slog.Debug("Systemd not available")
+		return nil, nil
+	}
+
 	conn, err := dbus.NewSystemConnectionContext(context.Background())
 	if err != nil {
-		slog.Warn("Error connecting to systemd", "err", err, "ref", "https://beszel.dev/guide/systemd")
+		slog.Debug("Error connecting to systemd", "err", err, "ref", "https://beszel.dev/guide/systemd")
 		return nil, err
 	}

@@ -117,13 +145,27 @@ func (sm *systemdManager) getServiceStats(conn *dbus.Conn, refresh bool) []*syst
 		return nil
 	}

+	// Track which units are currently present to remove stale entries
+	currentUnits := make(map[string]struct{}, len(units))
+
 	for _, unit := range units {
+		currentUnits[unit.Name] = struct{}{}
 		service, err := sm.updateServiceStats(conn, unit)
 		if err != nil {
 			continue
 		}
 		services = append(services, service)
 	}
+
+	// Remove services that no longer exist in systemd
+	sm.Lock()
+	for unitName := range sm.serviceStatsMap {
+		if _, exists := currentUnits[unitName]; !exists {
+			delete(sm.serviceStatsMap, unitName)
+		}
+	}
+	sm.Unlock()
+
 	sm.hasFreshStats = true
 	return services
 }
@@ -253,13 +295,13 @@ func unescapeServiceName(name string) string {
 // otherwise defaults to "*service".
 func getServicePatterns() []string {
 	patterns := []string{}
-	if envPatterns, _ := GetEnv("SERVICE_PATTERNS"); envPatterns != "" {
+	if envPatterns, _ := utils.GetEnv("SERVICE_PATTERNS"); envPatterns != "" {
 		for pattern := range strings.SplitSeq(envPatterns, ",") {
 			pattern = strings.TrimSpace(pattern)
 			if pattern == "" {
 				continue
 			}
-			if !strings.HasSuffix(pattern, ".service") {
+			if !strings.HasSuffix(pattern, "timer") && !strings.HasSuffix(pattern, ".service") {
 				pattern += ".service"
 			}
 			patterns = append(patterns, pattern)
--- a/agent/systemd_nonlinux_test.go
+++ b/agent/systemd_nonlinux_test.go
@@ -19,11 +19,11 @@ func TestSystemdManagerGetServiceStats(t *testing.T) {
 	assert.NoError(t, err)

 	// Test with refresh = true
-	result := manager.getServiceStats(true)
+	result := manager.getServiceStats("any-service", true)
 	assert.Nil(t, result)

 	// Test with refresh = false
-	result = manager.getServiceStats(false)
+	result = manager.getServiceStats("any-service", false)
 	assert.Nil(t, result)
 }

--- a/agent/systemd_test.go
+++ b/agent/systemd_test.go
@@ -4,6 +4,7 @@ package agent

 import (
 	"os"
+	"strings"
 	"testing"

 	"github.com/stretchr/testify/assert"
@@ -48,6 +49,35 @@ func TestUnescapeServiceNameInvalid(t *testing.T) {
 	}
 }

+func TestIsSystemdAvailable(t *testing.T) {
+	// Note: This test's result will vary based on the actual system running the tests
+	// On systems with systemd, it should return true
+	// On systems without systemd, it should return false
+	result := isSystemdAvailable()
+
+	// Check if either the /run/systemd/system directory exists or PID 1 is systemd
+	runSystemdExists := false
+	if _, err := os.Stat("/run/systemd/system"); err == nil {
+		runSystemdExists = true
+	}
+
+	pid1IsSystemd := false
+	if data, err := os.ReadFile("/proc/1/comm"); err == nil {
+		pid1IsSystemd = strings.TrimSpace(string(data)) == "systemd"
+	}
+
+	expected := runSystemdExists || pid1IsSystemd
+
+	assert.Equal(t, expected, result, "isSystemdAvailable should correctly detect systemd presence")
+
+	// Log the result for informational purposes
+	if result {
+		t.Log("Systemd is available on this system")
+	} else {
+		t.Log("Systemd is not available on this system")
+	}
+}
+
 func TestGetServicePatterns(t *testing.T) {
 	tests := []struct {
 		name           string
@@ -126,20 +156,23 @@ func TestGetServicePatterns(t *testing.T) {
 			expected:       []string{"*nginx*.service", "*apache*.service"},
 			cleanupEnvVars: true,
 		},
+		{
+			name:           "opt into timer monitoring",
+			prefixedEnv:    "nginx.service,docker,apache.timer",
+			unprefixedEnv:  "",
+			expected:       []string{"nginx.service", "docker.service", "apache.timer"},
+			cleanupEnvVars: true,
+		},
 	}

 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
-			// Clean up any existing env vars
-			os.Unsetenv("BESZEL_AGENT_SERVICE_PATTERNS")
-			os.Unsetenv("SERVICE_PATTERNS")
-
 			// Set up environment variables
 			if tt.prefixedEnv != "" {
-				os.Setenv("BESZEL_AGENT_SERVICE_PATTERNS", tt.prefixedEnv)
+				t.Setenv("BESZEL_AGENT_SERVICE_PATTERNS", tt.prefixedEnv)
 			}
 			if tt.unprefixedEnv != "" {
-				os.Setenv("SERVICE_PATTERNS", tt.unprefixedEnv)
+				t.Setenv("SERVICE_PATTERNS", tt.unprefixedEnv)
 			}

 			// Run the function
@@ -147,12 +180,6 @@ func TestGetServicePatterns(t *testing.T) {

 			// Verify results
 			assert.Equal(t, tt.expected, result, "Patterns should match expected values")
-
-			// Cleanup
-			if tt.cleanupEnvVars {
-				os.Unsetenv("BESZEL_AGENT_SERVICE_PATTERNS")
-				os.Unsetenv("SERVICE_PATTERNS")
-			}
 		})
 	}
 }
--- a/agent/test-data/amdgpu.ids
+++ b/agent/test-data/amdgpu.ids
@@ -0,0 +1,700 @@
+# List of AMDGPU IDs
+#
+# Syntax:
+# device_id,	revision_id,	product_name        <-- single tab after comma
+
+1.0.0
+1114,	C2,	AMD Radeon 860M Graphics
+1114,	C3,	AMD Radeon 840M Graphics
+1114,	D2,	AMD Radeon 860M Graphics
+1114,	D3,	AMD Radeon 840M Graphics
+1309,	00,	AMD Radeon R7 Graphics
+130A,	00,	AMD Radeon R6 Graphics
+130B,	00,	AMD Radeon R4 Graphics
+130C,	00,	AMD Radeon R7 Graphics
+130D,	00,	AMD Radeon R6 Graphics
+130E,	00,	AMD Radeon R5 Graphics
+130F,	00,	AMD Radeon R7 Graphics
+130F,	D4,	AMD Radeon R7 Graphics
+130F,	D5,	AMD Radeon R7 Graphics
+130F,	D6,	AMD Radeon R7 Graphics
+130F,	D7,	AMD Radeon R7 Graphics
+1313,	00,	AMD Radeon R7 Graphics
+1313,	D4,	AMD Radeon R7 Graphics
+1313,	D5,	AMD Radeon R7 Graphics
+1313,	D6,	AMD Radeon R7 Graphics
+1315,	00,	AMD Radeon R5 Graphics
+1315,	D4,	AMD Radeon R5 Graphics
+1315,	D5,	AMD Radeon R5 Graphics
+1315,	D6,	AMD Radeon R5 Graphics
+1315,	D7,	AMD Radeon R5 Graphics
+1316,	00,	AMD Radeon R5 Graphics
+1318,	00,	AMD Radeon R5 Graphics
+131B,	00,	AMD Radeon R4 Graphics
+131C,	00,	AMD Radeon R7 Graphics
+131D,	00,	AMD Radeon R6 Graphics
+1435,	AE,	AMD Custom GPU 0932
+1506,	C1,	AMD Radeon 610M
+1506,	C2,	AMD Radeon 610M
+1506,	C3,	AMD Radeon 610M
+1506,	C4,	AMD Radeon 610M
+150E,	C1,	AMD Radeon 890M Graphics
+150E,	C4,	AMD Radeon 890M Graphics
+150E,	C5,	AMD Radeon 890M Graphics
+150E,	C6,	AMD Radeon 890M Graphics
+150E,	D1,	AMD Radeon 890M Graphics
+150E,	D2,	AMD Radeon 890M Graphics
+150E,	D3,	AMD Radeon 890M Graphics
+1586,	C1,	Radeon 8060S Graphics
+1586,	C2,	Radeon 8050S Graphics
+1586,	C4,	Radeon 8050S Graphics
+1586,	D1,	Radeon 8060S Graphics
+1586,	D2,	Radeon 8050S Graphics
+1586,	D4,	Radeon 8050S Graphics
+1586,	D5,	Radeon 8040S Graphics
+15BF,	00,	AMD Radeon 780M Graphics
+15BF,	01,	AMD Radeon 760M Graphics
+15BF,	02,	AMD Radeon 780M Graphics
+15BF,	03,	AMD Radeon 760M Graphics
+15BF,	C1,	AMD Radeon 780M Graphics
+15BF,	C2,	AMD Radeon 780M Graphics
+15BF,	C3,	AMD Radeon 760M Graphics
+15BF,	C4,	AMD Radeon 780M Graphics
+15BF,	C5,	AMD Radeon 740M Graphics
+15BF,	C6,	AMD Radeon 780M Graphics
+15BF,	C7,	AMD Radeon 780M Graphics
+15BF,	C8,	AMD Radeon 760M Graphics
+15BF,	C9,	AMD Radeon 780M Graphics
+15BF,	CA,	AMD Radeon 740M Graphics
+15BF,	CB,	AMD Radeon 760M Graphics
+15BF,	CC,	AMD Radeon 740M Graphics
+15BF,	CD,	AMD Radeon 760M Graphics
+15BF,	CF,	AMD Radeon 780M Graphics
+15BF,	D0,	AMD Radeon 780M Graphics
+15BF,	D1,	AMD Radeon 780M Graphics
+15BF,	D2,	AMD Radeon 780M Graphics
+15BF,	D3,	AMD Radeon 780M Graphics
+15BF,	D4,	AMD Radeon 780M Graphics
+15BF,	D5,	AMD Radeon 760M Graphics
+15BF,	D6,	AMD Radeon 760M Graphics
+15BF,	D7,	AMD Radeon 780M Graphics
+15BF,	D8,	AMD Radeon 740M Graphics
+15BF,	D9,	AMD Radeon 780M Graphics
+15BF,	DA,	AMD Radeon 780M Graphics
+15BF,	DB,	AMD Radeon 760M Graphics
+15BF,	DC,	AMD Radeon 760M Graphics
+15BF,	DD,	AMD Radeon 780M Graphics
+15BF,	DE,	AMD Radeon 740M Graphics
+15BF,	DF,	AMD Radeon 760M Graphics
+15BF,	F0,	AMD Radeon 760M Graphics
+15C8,	C1,	AMD Radeon 740M Graphics
+15C8,	C2,	AMD Radeon 740M Graphics
+15C8,	C3,	AMD Radeon 740M Graphics
+15C8,	C4,	AMD Radeon 740M Graphics
+15C8,	D1,	AMD Radeon 740M Graphics
+15C8,	D2,	AMD Radeon 740M Graphics
+15C8,	D3,	AMD Radeon 740M Graphics
+15C8,	D4,	AMD Radeon 740M Graphics
+15D8,	00,	AMD Radeon RX Vega 8 Graphics WS
+15D8,	91,	AMD Radeon Vega 3 Graphics
+15D8,	91,	AMD Ryzen Embedded R1606G with Radeon Vega Gfx
+15D8,	92,	AMD Radeon Vega 3 Graphics
+15D8,	92,	AMD Ryzen Embedded R1505G with Radeon Vega Gfx
+15D8,	93,	AMD Radeon Vega 1 Graphics
+15D8,	A1,	AMD Radeon Vega 10 Graphics
+15D8,	A2,	AMD Radeon Vega 8 Graphics
+15D8,	A3,	AMD Radeon Vega 6 Graphics
+15D8,	A4,	AMD Radeon Vega 3 Graphics
+15D8,	B1,	AMD Radeon Vega 10 Graphics
+15D8,	B2,	AMD Radeon Vega 8 Graphics
+15D8,	B3,	AMD Radeon Vega 6 Graphics
+15D8,	B4,	AMD Radeon Vega 3 Graphics
+15D8,	C1,	AMD Radeon Vega 10 Graphics
+15D8,	C2,	AMD Radeon Vega 8 Graphics
+15D8,	C3,	AMD Radeon Vega 6 Graphics
+15D8,	C4,	AMD Radeon Vega 3 Graphics
+15D8,	C5,	AMD Radeon Vega 3 Graphics
+15D8,	C8,	AMD Radeon Vega 11 Graphics
+15D8,	C9,	AMD Radeon Vega 8 Graphics
+15D8,	CA,	AMD Radeon Vega 11 Graphics
+15D8,	CB,	AMD Radeon Vega 8 Graphics
+15D8,	CC,	AMD Radeon Vega 3 Graphics
+15D8,	CE,	AMD Radeon Vega 3 Graphics
+15D8,	CF,	AMD Ryzen Embedded R1305G with Radeon Vega Gfx
+15D8,	D1,	AMD Radeon Vega 10 Graphics
+15D8,	D2,	AMD Radeon Vega 8 Graphics
+15D8,	D3,	AMD Radeon Vega 6 Graphics
+15D8,	D4,	AMD Radeon Vega 3 Graphics
+15D8,	D8,	AMD Radeon Vega 11 Graphics
+15D8,	D9,	AMD Radeon Vega 8 Graphics
+15D8,	DA,	AMD Radeon Vega 11 Graphics
+15D8,	DB,	AMD Radeon Vega 3 Graphics
+15D8,	DB,	AMD Radeon Vega 8 Graphics
+15D8,	DC,	AMD Radeon Vega 3 Graphics
+15D8,	DD,	AMD Radeon Vega 3 Graphics
+15D8,	DE,	AMD Radeon Vega 3 Graphics
+15D8,	DF,	AMD Radeon Vega 3 Graphics
+15D8,	E3,	AMD Radeon Vega 3 Graphics
+15D8,	E4,	AMD Ryzen Embedded R1102G with Radeon Vega Gfx
+15DD,	81,	AMD Ryzen Embedded V1807B with Radeon Vega Gfx
+15DD,	82,	AMD Ryzen Embedded V1756B with Radeon Vega Gfx
+15DD,	83,	AMD Ryzen Embedded V1605B with Radeon Vega Gfx
+15DD,	84,	AMD Radeon Vega 6 Graphics
+15DD,	85,	AMD Ryzen Embedded V1202B with Radeon Vega Gfx
+15DD,	86,	AMD Radeon Vega 11 Graphics
+15DD,	88,	AMD Radeon Vega 8 Graphics
+15DD,	C1,	AMD Radeon Vega 11 Graphics
+15DD,	C2,	AMD Radeon Vega 8 Graphics
+15DD,	C3,	AMD Radeon Vega 3 / 10 Graphics
+15DD,	C4,	AMD Radeon Vega 8 Graphics
+15DD,	C5,	AMD Radeon Vega 3 Graphics
+15DD,	C6,	AMD Radeon Vega 11 Graphics
+15DD,	C8,	AMD Radeon Vega 8 Graphics
+15DD,	C9,	AMD Radeon Vega 11 Graphics
+15DD,	CA,	AMD Radeon Vega 8 Graphics
+15DD,	CB,	AMD Radeon Vega 3 Graphics
+15DD,	CC,	AMD Radeon Vega 6 Graphics
+15DD,	CE,	AMD Radeon Vega 3 Graphics
+15DD,	CF,	AMD Radeon Vega 3 Graphics
+15DD,	D0,	AMD Radeon Vega 10 Graphics
+15DD,	D1,	AMD Radeon Vega 8 Graphics
+15DD,	D3,	AMD Radeon Vega 11 Graphics
+15DD,	D5,	AMD Radeon Vega 8 Graphics
+15DD,	D6,	AMD Radeon Vega 11 Graphics
+15DD,	D7,	AMD Radeon Vega 8 Graphics
+15DD,	D8,	AMD Radeon Vega 3 Graphics
+15DD,	D9,	AMD Radeon Vega 6 Graphics
+15DD,	E1,	AMD Radeon Vega 3 Graphics
+15DD,	E2,	AMD Radeon Vega 3 Graphics
+163F,	AE,	AMD Custom GPU 0405
+163F,	E1,	AMD Custom GPU 0405
+164E,	D8,	AMD Radeon 610M
+164E,	D9,	AMD Radeon 610M
+164E,	DA,	AMD Radeon 610M
+164E,	DB,	AMD Radeon 610M
+164E,	DC,	AMD Radeon 610M
+1681,	06,	AMD Radeon 680M
+1681,	07,	AMD Radeon 660M
+1681,	0A,	AMD Radeon 680M
+1681,	0B,	AMD Radeon 660M
+1681,	C7,	AMD Radeon 680M
+1681,	C8,	AMD Radeon 680M
+1681,	C9,	AMD Radeon 660M
+1900,	01,	AMD Radeon 780M Graphics
+1900,	02,	AMD Radeon 760M Graphics
+1900,	03,	AMD Radeon 780M Graphics
+1900,	04,	AMD Radeon 760M Graphics
+1900,	05,	AMD Radeon 780M Graphics
+1900,	06,	AMD Radeon 780M Graphics
+1900,	07,	AMD Radeon 760M Graphics
+1900,	B0,	AMD Radeon 780M Graphics
+1900,	B1,	AMD Radeon 780M Graphics
+1900,	B2,	AMD Radeon 780M Graphics
+1900,	B3,	AMD Radeon 780M Graphics
+1900,	B4,	AMD Radeon 780M Graphics
+1900,	B5,	AMD Radeon 780M Graphics
+1900,	B6,	AMD Radeon 780M Graphics
+1900,	B7,	AMD Radeon 760M Graphics
+1900,	B8,	AMD Radeon 760M Graphics
+1900,	B9,	AMD Radeon 780M Graphics
+1900,	BA,	AMD Radeon 780M Graphics
+1900,	BB,	AMD Radeon 780M Graphics
+1900,	C0,	AMD Radeon 780M Graphics
+1900,	C1,	AMD Radeon 760M Graphics
+1900,	C2,	AMD Radeon 780M Graphics
+1900,	C3,	AMD Radeon 760M Graphics
+1900,	C4,	AMD Radeon 780M Graphics
+1900,	C5,	AMD Radeon 780M Graphics
+1900,	C6,	AMD Radeon 760M Graphics
+1900,	C7,	AMD Radeon 780M Graphics
+1900,	C8,	AMD Radeon 760M Graphics
+1900,	C9,	AMD Radeon 780M Graphics
+1900,	CA,	AMD Radeon 760M Graphics
+1900,	CB,	AMD Radeon 780M Graphics
+1900,	CC,	AMD Radeon 780M Graphics
+1900,	CD,	AMD Radeon 760M Graphics
+1900,	CE,	AMD Radeon 780M Graphics
+1900,	CF,	AMD Radeon 760M Graphics
+1900,	D0,	AMD Radeon 780M Graphics
+1900,	D1,	AMD Radeon 760M Graphics
+1900,	D2,	AMD Radeon 780M Graphics
+1900,	D3,	AMD Radeon 760M Graphics
+1900,	D4,	AMD Radeon 780M Graphics
+1900,	D5,	AMD Radeon 780M Graphics
+1900,	D6,	AMD Radeon 760M Graphics
+1900,	D7,	AMD Radeon 780M Graphics
+1900,	D8,	AMD Radeon 760M Graphics
+1900,	D9,	AMD Radeon 780M Graphics
+1900,	DA,	AMD Radeon 760M Graphics
+1900,	DB,	AMD Radeon 780M Graphics
+1900,	DC,	AMD Radeon 780M Graphics
+1900,	DD,	AMD Radeon 760M Graphics
+1900,	DE,	AMD Radeon 780M Graphics
+1900,	DF,	AMD Radeon 760M Graphics
+1900,	F0,	AMD Radeon 780M Graphics
+1900,	F1,	AMD Radeon 780M Graphics
+1900,	F2,	AMD Radeon 780M Graphics
+1901,	C1,	AMD Radeon 740M Graphics
+1901,	C2,	AMD Radeon 740M Graphics
+1901,	C3,	AMD Radeon 740M Graphics
+1901,	C6,	AMD Radeon 740M Graphics
+1901,	C7,	AMD Radeon 740M Graphics
+1901,	C8,	AMD Radeon 740M Graphics
+1901,	C9,	AMD Radeon 740M Graphics
+1901,	CA,	AMD Radeon 740M Graphics
+1901,	D1,	AMD Radeon 740M Graphics
+1901,	D2,	AMD Radeon 740M Graphics
+1901,	D3,	AMD Radeon 740M Graphics
+1901,	D4,	AMD Radeon 740M Graphics
+1901,	D5,	AMD Radeon 740M Graphics
+1901,	D6,	AMD Radeon 740M Graphics
+1901,	D7,	AMD Radeon 740M Graphics
+1901,	D8,	AMD Radeon 740M Graphics
+6600,	00,	AMD Radeon HD 8600 / 8700M
+6600,	81,	AMD Radeon R7 M370
+6601,	00,	AMD Radeon HD 8500M / 8700M
+6604,	00,	AMD Radeon R7 M265 Series
+6604,	81,	AMD Radeon R7 M350
+6605,	00,	AMD Radeon R7 M260 Series
+6605,	81,	AMD Radeon R7 M340
+6606,	00,	AMD Radeon HD 8790M
+6607,	00,	AMD Radeon R5 M240
+6608,	00,	AMD FirePro W2100
+6610,	00,	AMD Radeon R7 200 Series
+6610,	81,	AMD Radeon R7 350
+6610,	83,	AMD Radeon R5 340
+6610,	87,	AMD Radeon R7 200 Series
+6611,	00,	AMD Radeon R7 200 Series
+6611,	87,	AMD Radeon R7 200 Series
+6613,	00,	AMD Radeon R7 200 Series
+6617,	00,	AMD Radeon R7 240 Series
+6617,	87,	AMD Radeon R7 200 Series
+6617,	C7,	AMD Radeon R7 240 Series
+6640,	00,	AMD Radeon HD 8950
+6640,	80,	AMD Radeon R9 M380
+6646,	00,	AMD Radeon R9 M280X
+6646,	80,	AMD Radeon R9 M385
+6646,	80,	AMD Radeon R9 M470X
+6647,	00,	AMD Radeon R9 M200X Series
+6647,	80,	AMD Radeon R9 M380
+6649,	00,	AMD FirePro W5100
+6658,	00,	AMD Radeon R7 200 Series
+665C,	00,	AMD Radeon HD 7700 Series
+665D,	00,	AMD Radeon R7 200 Series
+665F,	81,	AMD Radeon R7 360 Series
+6660,	00,	AMD Radeon HD 8600M Series
+6660,	81,	AMD Radeon R5 M335
+6660,	83,	AMD Radeon R5 M330
+6663,	00,	AMD Radeon HD 8500M Series
+6663,	83,	AMD Radeon R5 M320
+6664,	00,	AMD Radeon R5 M200 Series
+6665,	00,	AMD Radeon R5 M230 Series
+6665,	83,	AMD Radeon R5 M320
+6665,	C3,	AMD Radeon R5 M435
+6666,	00,	AMD Radeon R5 M200 Series
+6667,	00,	AMD Radeon R5 M200 Series
+666F,	00,	AMD Radeon HD 8500M
+66A1,	02,	AMD Instinct MI60 / MI50
+66A1,	06,	AMD Radeon Pro VII
+66AF,	C1,	AMD Radeon VII
+6780,	00,	AMD FirePro W9000
+6784,	00,	ATI FirePro V (FireGL V) Graphics Adapter
+6788,	00,	ATI FirePro V (FireGL V) Graphics Adapter
+678A,	00,	AMD FirePro W8000
+6798,	00,	AMD Radeon R9 200 / HD 7900 Series
+6799,	00,	AMD Radeon HD 7900 Series
+679A,	00,	AMD Radeon HD 7900 Series
+679B,	00,	AMD Radeon HD 7900 Series
+679E,	00,	AMD Radeon HD 7800 Series
+67A0,	00,	AMD Radeon FirePro W9100
+67A1,	00,	AMD Radeon FirePro W8100
+67B0,	00,	AMD Radeon R9 200 Series
+67B0,	80,	AMD Radeon R9 390 Series
+67B1,	00,	AMD Radeon R9 200 Series
+67B1,	80,	AMD Radeon R9 390 Series
+67B9,	00,	AMD Radeon R9 200 Series
+67C0,	00,	AMD Radeon Pro WX 7100 Graphics
+67C0,	80,	AMD Radeon E9550
+67C2,	01,	AMD Radeon Pro V7350x2
+67C2,	02,	AMD Radeon Pro V7300X
+67C4,	00,	AMD Radeon Pro WX 7100 Graphics
+67C4,	80,	AMD Radeon E9560 / E9565 Graphics
+67C7,	00,	AMD Radeon Pro WX 5100 Graphics
+67C7,	80,	AMD Radeon E9390 Graphics
+67D0,	01,	AMD Radeon Pro V7350x2
+67D0,	02,	AMD Radeon Pro V7300X
+67DF,	C0,	AMD Radeon Pro 580X
+67DF,	C1,	AMD Radeon RX 580 Series
+67DF,	C2,	AMD Radeon RX 570 Series
+67DF,	C3,	AMD Radeon RX 580 Series
+67DF,	C4,	AMD Radeon RX 480 Graphics
+67DF,	C5,	AMD Radeon RX 470 Graphics
+67DF,	C6,	AMD Radeon RX 570 Series
+67DF,	C7,	AMD Radeon RX 480 Graphics
+67DF,	CF,	AMD Radeon RX 470 Graphics
+67DF,	D7,	AMD Radeon RX 470 Graphics
+67DF,	E0,	AMD Radeon RX 470 Series
+67DF,	E1,	AMD Radeon RX 590 Series
+67DF,	E3,	AMD Radeon RX Series
+67DF,	E7,	AMD Radeon RX 580 Series
+67DF,	EB,	AMD Radeon Pro 580X
+67DF,	EF,	AMD Radeon RX 570 Series
+67DF,	F7,	AMD Radeon RX P30PH
+67DF,	FF,	AMD Radeon RX 470 Series
+67E0,	00,	AMD Radeon Pro WX Series
+67E3,	00,	AMD Radeon Pro WX 4100
+67E8,	00,	AMD Radeon Pro WX Series
+67E8,	01,	AMD Radeon Pro WX Series
+67E8,	80,	AMD Radeon E9260 Graphics
+67EB,	00,	AMD Radeon Pro V5300X
+67EF,	C0,	AMD Radeon RX Graphics
+67EF,	C1,	AMD Radeon RX 460 Graphics
+67EF,	C2,	AMD Radeon Pro Series
+67EF,	C3,	AMD Radeon RX Series
+67EF,	C5,	AMD Radeon RX 460 Graphics
+67EF,	C7,	AMD Radeon RX Graphics
+67EF,	CF,	AMD Radeon RX 460 Graphics
+67EF,	E0,	AMD Radeon RX 560 Series
+67EF,	E1,	AMD Radeon RX Series
+67EF,	E2,	AMD Radeon RX 560X
+67EF,	E3,	AMD Radeon RX Series
+67EF,	E5,	AMD Radeon RX 560 Series
+67EF,	E7,	AMD Radeon RX 560 Series
+67EF,	EF,	AMD Radeon 550 Series
+67EF,	FF,	AMD Radeon RX 460 Graphics
+67FF,	C0,	AMD Radeon Pro 465
+67FF,	C1,	AMD Radeon RX 560 Series
+67FF,	CF,	AMD Radeon RX 560 Series
+67FF,	EF,	AMD Radeon RX 560 Series
+67FF,	FF,	AMD Radeon RX 550 Series
+6800,	00,	AMD Radeon HD 7970M
+6801,	00,	AMD Radeon HD 8970M
+6806,	00,	AMD Radeon R9 M290X
+6808,	00,	AMD FirePro W7000
+6808,	00,	ATI FirePro V (FireGL V) Graphics Adapter
+6809,	00,	ATI FirePro W5000
+6810,	00,	AMD Radeon R9 200 Series
+6810,	81,	AMD Radeon R9 370 Series
+6811,	00,	AMD Radeon R9 200 Series
+6811,	81,	AMD Radeon R7 370 Series
+6818,	00,	AMD Radeon HD 7800 Series
+6819,	00,	AMD Radeon HD 7800 Series
+6820,	00,	AMD Radeon R9 M275X
+6820,	81,	AMD Radeon R9 M375
+6820,	83,	AMD Radeon R9 M375X
+6821,	00,	AMD Radeon R9 M200X Series
+6821,	83,	AMD Radeon R9 M370X
+6821,	87,	AMD Radeon R7 M380
+6822,	00,	AMD Radeon E8860
+6823,	00,	AMD Radeon R9 M200X Series
+6825,	00,	AMD Radeon HD 7800M Series
+6826,	00,	AMD Radeon HD 7700M Series
+6827,	00,	AMD Radeon HD 7800M Series
+6828,	00,	AMD FirePro W600
+682B,	00,	AMD Radeon HD 8800M Series
+682B,	87,	AMD Radeon R9 M360
+682C,	00,	AMD FirePro W4100
+682D,	00,	AMD Radeon HD 7700M Series
+682F,	00,	AMD Radeon HD 7700M Series
+6830,	00,	AMD Radeon 7800M Series
+6831,	00,	AMD Radeon 7700M Series
+6835,	00,	AMD Radeon R7 Series / HD 9000 Series
+6837,	00,	AMD Radeon HD 7700 Series
+683D,	00,	AMD Radeon HD 7700 Series
+683F,	00,	AMD Radeon HD 7700 Series
+684C,	00,	ATI FirePro V (FireGL V) Graphics Adapter
+6860,	00,	AMD Radeon Instinct MI25
+6860,	01,	AMD Radeon Instinct MI25
+6860,	02,	AMD Radeon Instinct MI25
+6860,	03,	AMD Radeon Pro V340
+6860,	04,	AMD Radeon Instinct MI25x2
+6860,	07,	AMD Radeon Pro V320
+6861,	00,	AMD Radeon Pro WX 9100
+6862,	00,	AMD Radeon Pro SSG
+6863,	00,	AMD Radeon Vega Frontier Edition
+6864,	03,	AMD Radeon Pro V340
+6864,	04,	AMD Radeon Instinct MI25x2
+6864,	05,	AMD Radeon Pro V340
+6868,	00,	AMD Radeon Pro WX 8200
+686C,	00,	AMD Radeon Instinct MI25 MxGPU
+686C,	01,	AMD Radeon Instinct MI25 MxGPU
+686C,	02,	AMD Radeon Instinct MI25 MxGPU
+686C,	03,	AMD Radeon Pro V340 MxGPU
+686C,	04,	AMD Radeon Instinct MI25x2 MxGPU
+686C,	05,	AMD Radeon Pro V340L MxGPU
+686C,	06,	AMD Radeon Instinct MI25 MxGPU
+687F,	01,	AMD Radeon RX Vega
+687F,	C0,	AMD Radeon RX Vega
+687F,	C1,	AMD Radeon RX Vega
+687F,	C3,	AMD Radeon RX Vega
+687F,	C7,	AMD Radeon RX Vega
+6900,	00,	AMD Radeon R7 M260
+6900,	81,	AMD Radeon R7 M360
+6900,	83,	AMD Radeon R7 M340
+6900,	C1,	AMD Radeon R5 M465 Series
+6900,	C3,	AMD Radeon R5 M445 Series
+6900,	D1,	AMD Radeon 530 Series
+6900,	D3,	AMD Radeon 530 Series
+6901,	00,	AMD Radeon R5 M255
+6902,	00,	AMD Radeon Series
+6907,	00,	AMD Radeon R5 M255
+6907,	87,	AMD Radeon R5 M315
+6920,	00,	AMD Radeon R9 M395X
+6920,	01,	AMD Radeon R9 M390X
+6921,	00,	AMD Radeon R9 M390X
+6929,	00,	AMD FirePro S7150
+6929,	01,	AMD FirePro S7100X
+692B,	00,	AMD FirePro W7100
+6938,	00,	AMD Radeon R9 200 Series
+6938,	F0,	AMD Radeon R9 200 Series
+6938,	F1,	AMD Radeon R9 380 Series
+6939,	00,	AMD Radeon R9 200 Series
+6939,	F0,	AMD Radeon R9 200 Series
+6939,	F1,	AMD Radeon R9 380 Series
+694C,	C0,	AMD Radeon RX Vega M GH Graphics
+694E,	C0,	AMD Radeon RX Vega M GL Graphics
+6980,	00,	AMD Radeon Pro WX 3100
+6981,	00,	AMD Radeon Pro WX 3200 Series
+6981,	01,	AMD Radeon Pro WX 3200 Series
+6981,	10,	AMD Radeon Pro WX 3200 Series
+6985,	00,	AMD Radeon Pro WX 3100
+6986,	00,	AMD Radeon Pro WX 2100
+6987,	80,	AMD Embedded Radeon E9171
+6987,	C0,	AMD Radeon 550X Series
+6987,	C1,	AMD Radeon RX 640
+6987,	C3,	AMD Radeon 540X Series
+6987,	C7,	AMD Radeon 540
+6995,	00,	AMD Radeon Pro WX 2100
+6997,	00,	AMD Radeon Pro WX 2100
+699F,	81,	AMD Embedded Radeon E9170 Series
+699F,	C0,	AMD Radeon 500 Series
+699F,	C1,	AMD Radeon 540 Series
+699F,	C3,	AMD Radeon 500 Series
+699F,	C7,	AMD Radeon RX 550 / 550 Series
+699F,	C9,	AMD Radeon 540
+6FDF,	E7,	AMD Radeon RX 590 GME
+6FDF,	EF,	AMD Radeon RX 580 2048SP
+7300,	C1,	AMD FirePro S9300 x2
+7300,	C8,	AMD Radeon R9 Fury Series
+7300,	C9,	AMD Radeon Pro Duo
+7300,	CA,	AMD Radeon R9 Fury Series
+7300,	CB,	AMD Radeon R9 Fury Series
+7312,	00,	AMD Radeon Pro W5700
+731E,	C6,	AMD Radeon RX 5700XTB
+731E,	C7,	AMD Radeon RX 5700B
+731F,	C0,	AMD Radeon RX 5700 XT 50th Anniversary
+731F,	C1,	AMD Radeon RX 5700 XT
+731F,	C2,	AMD Radeon RX 5600M
+731F,	C3,	AMD Radeon RX 5700M
+731F,	C4,	AMD Radeon RX 5700
+731F,	C5,	AMD Radeon RX 5700 XT
+731F,	CA,	AMD Radeon RX 5600 XT
+731F,	CB,	AMD Radeon RX 5600 OEM
+7340,	C1,	AMD Radeon RX 5500M
+7340,	C3,	AMD Radeon RX 5300M
+7340,	C5,	AMD Radeon RX 5500 XT
+7340,	C7,	AMD Radeon RX 5500
+7340,	C9,	AMD Radeon RX 5500XTB
+7340,	CF,	AMD Radeon RX 5300
+7341,	00,	AMD Radeon Pro W5500
+7347,	00,	AMD Radeon Pro W5500M
+7360,	41,	AMD Radeon Pro 5600M
+7360,	C3,	AMD Radeon Pro V520
+7362,	C1,	AMD Radeon Pro V540
+7362,	C3,	AMD Radeon Pro V520
+738C,	01,	AMD Instinct MI100
+73A1,	00,	AMD Radeon Pro V620
+73A3,	00,	AMD Radeon Pro W6800
+73A5,	C0,	AMD Radeon RX 6950 XT
+73AE,	00,	AMD Radeon Pro V620 MxGPU
+73AF,	C0,	AMD Radeon RX 6900 XT
+73BF,	C0,	AMD Radeon RX 6900 XT
+73BF,	C1,	AMD Radeon RX 6800 XT
+73BF,	C3,	AMD Radeon RX 6800
+73DF,	C0,	AMD Radeon RX 6750 XT
+73DF,	C1,	AMD Radeon RX 6700 XT
+73DF,	C2,	AMD Radeon RX 6800M
+73DF,	C3,	AMD Radeon RX 6800M
+73DF,	C5,	AMD Radeon RX 6700 XT
+73DF,	CF,	AMD Radeon RX 6700M
+73DF,	D5,	AMD Radeon RX 6750 GRE 12GB
+73DF,	D7,	AMD TDC-235
+73DF,	DF,	AMD Radeon RX 6700
+73DF,	E5,	AMD Radeon RX 6750 GRE 12GB
+73DF,	FF,	AMD Radeon RX 6700
+73E0,	00,	AMD Radeon RX 6600M
+73E1,	00,	AMD Radeon Pro W6600M
+73E3,	00,	AMD Radeon Pro W6600
+73EF,	C0,	AMD Radeon RX 6800S
+73EF,	C1,	AMD Radeon RX 6650 XT
+73EF,	C2,	AMD Radeon RX 6700S
+73EF,	C3,	AMD Radeon RX 6650M
+73EF,	C4,	AMD Radeon RX 6650M XT
+73FF,	C1,	AMD Radeon RX 6600 XT
+73FF,	C3,	AMD Radeon RX 6600M
+73FF,	C7,	AMD Radeon RX 6600
+73FF,	CB,	AMD Radeon RX 6600S
+73FF,	CF,	AMD Radeon RX 6600 LE
+73FF,	DF,	AMD Radeon RX 6750 GRE 10GB
+7408,	00,	AMD Instinct MI250X
+740C,	01,	AMD Instinct MI250X / MI250
+740F,	02,	AMD Instinct MI210
+7421,	00,	AMD Radeon Pro W6500M
+7422,	00,	AMD Radeon Pro W6400
+7423,	00,	AMD Radeon Pro W6300M
+7423,	01,	AMD Radeon Pro W6300
+7424,	00,	AMD Radeon RX 6300
+743F,	C1,	AMD Radeon RX 6500 XT
+743F,	C3,	AMD Radeon RX 6500
+743F,	C3,	AMD Radeon RX 6500M
+743F,	C7,	AMD Radeon RX 6400
+743F,	C8,	AMD Radeon RX 6500M
+743F,	CC,	AMD Radeon 6550S
+743F,	CE,	AMD Radeon RX 6450M
+743F,	CF,	AMD Radeon RX 6300M
+743F,	D3,	AMD Radeon RX 6550M
+743F,	D7,	AMD Radeon RX 6400
+7448,	00,	AMD Radeon Pro W7900
+7449,	00,	AMD Radeon Pro W7800 48GB
+744A,	00,	AMD Radeon Pro W7900 Dual Slot
+744B,	00,	AMD Radeon Pro W7900D
+744C,	C8,	AMD Radeon RX 7900 XTX
+744C,	CC,	AMD Radeon RX 7900 XT
+744C,	CE,	AMD Radeon RX 7900 GRE
+744C,	CF,	AMD Radeon RX 7900M
+745E,	CC,	AMD Radeon Pro W7800
+7460,	00,	AMD Radeon Pro V710
+7461,	00,	AMD Radeon Pro V710 MxGPU
+7470,	00,	AMD Radeon Pro W7700
+747E,	C8,	AMD Radeon RX 7800 XT
+747E,	D8,	AMD Radeon RX 7800M
+747E,	DB,	AMD Radeon RX 7700
+747E,	FF,	AMD Radeon RX 7700 XT
+7480,	00,	AMD Radeon Pro W7600
+7480,	C0,	AMD Radeon RX 7600 XT
+7480,	C1,	AMD Radeon RX 7700S
+7480,	C2,	AMD Radeon RX 7650 GRE
+7480,	C3,	AMD Radeon RX 7600S
+7480,	C7,	AMD Radeon RX 7600M XT
+7480,	CF,	AMD Radeon RX 7600
+7481,   C7,     AMD Steam Machine
+7483,	CF,	AMD Radeon RX 7600M
+7489,	00,	AMD Radeon Pro W7500
+7499,	00,	AMD Radeon Pro W7400
+7499,	C0,	AMD Radeon RX 7400
+7499,	C1,	AMD Radeon RX 7300
+74A0,	00,	AMD Instinct MI300A
+74A1,	00,	AMD Instinct MI300X
+74A2,	00,	AMD Instinct MI308X
+74A5,	00,	AMD Instinct MI325X
+74A8,	00,	AMD Instinct MI308X HF
+74A9,	00,	AMD Instinct MI300X HF
+74B5,	00,	AMD Instinct MI300X VF
+74B6,	00,	AMD Instinct MI308X
+74BD,	00,	AMD Instinct MI300X HF
+7550,	C0,	AMD Radeon RX 9070 XT
+7550,	C2,	AMD Radeon RX 9070 GRE
+7550,	C3,	AMD Radeon RX 9070
+7551,	C0,	AMD Radeon AI PRO R9700
+7590,	C0,	AMD Radeon RX 9060 XT
+7590,	C7,	AMD Radeon RX 9060
+75A0,	C0,	AMD Instinct MI350X
+75A3,	C0,	AMD Instinct MI355X
+75B0,	C0,	AMD Instinct MI350X VF
+75B3,	C0,	AMD Instinct MI355X VF
+9830,	00,	AMD Radeon HD 8400 / R3 Series
+9831,	00,	AMD Radeon HD 8400E
+9832,	00,	AMD Radeon HD 8330
+9833,	00,	AMD Radeon HD 8330E
+9834,	00,	AMD Radeon HD 8210
+9835,	00,	AMD Radeon HD 8210E
+9836,	00,	AMD Radeon HD 8200 / R3 Series
+9837,	00,	AMD Radeon HD 8280E
+9838,	00,	AMD Radeon HD 8200 / R3 series
+9839,	00,	AMD Radeon HD 8180
+983D,	00,	AMD Radeon HD 8250
+9850,	00,	AMD Radeon R3 Graphics
+9850,	03,	AMD Radeon R3 Graphics
+9850,	40,	AMD Radeon R2 Graphics
+9850,	45,	AMD Radeon R3 Graphics
+9851,	00,	AMD Radeon R4 Graphics
+9851,	01,	AMD Radeon R5E Graphics
+9851,	05,	AMD Radeon R5 Graphics
+9851,	06,	AMD Radeon R5E Graphics
+9851,	40,	AMD Radeon R4 Graphics
+9851,	45,	AMD Radeon R5 Graphics
+9852,	00,	AMD Radeon R2 Graphics
+9852,	40,	AMD Radeon E1 Graphics
+9853,	00,	AMD Radeon R2 Graphics
+9853,	01,	AMD Radeon R4E Graphics
+9853,	03,	AMD Radeon R2 Graphics
+9853,	05,	AMD Radeon R1E Graphics
+9853,	06,	AMD Radeon R1E Graphics
+9853,	07,	AMD Radeon R1E Graphics
+9853,	08,	AMD Radeon R1E Graphics
+9853,	40,	AMD Radeon R2 Graphics
+9854,	00,	AMD Radeon R3 Graphics
+9854,	01,	AMD Radeon R3E Graphics
+9854,	02,	AMD Radeon R3 Graphics
+9854,	05,	AMD Radeon R2 Graphics
+9854,	06,	AMD Radeon R4 Graphics
+9854,	07,	AMD Radeon R3 Graphics
+9855,	02,	AMD Radeon R6 Graphics
+9855,	05,	AMD Radeon R4 Graphics
+9856,	00,	AMD Radeon R2 Graphics
+9856,	01,	AMD Radeon R2E Graphics
+9856,	02,	AMD Radeon R2 Graphics
+9856,	05,	AMD Radeon R1E Graphics
+9856,	06,	AMD Radeon R2 Graphics
+9856,	07,	AMD Radeon R1E Graphics
+9856,	08,	AMD Radeon R1E Graphics
+9856,	13,	AMD Radeon R1E Graphics
+9874,	81,	AMD Radeon R6 Graphics
+9874,	84,	AMD Radeon R7 Graphics
+9874,	85,	AMD Radeon R6 Graphics
+9874,	87,	AMD Radeon R5 Graphics
+9874,	88,	AMD Radeon R7E Graphics
+9874,	89,	AMD Radeon R6E Graphics
+9874,	C4,	AMD Radeon R7 Graphics
+9874,	C5,	AMD Radeon R6 Graphics
+9874,	C6,	AMD Radeon R6 Graphics
+9874,	C7,	AMD Radeon R5 Graphics
+9874,	C8,	AMD Radeon R7 Graphics
+9874,	C9,	AMD Radeon R7 Graphics
+9874,	CA,	AMD Radeon R5 Graphics
+9874,	CB,	AMD Radeon R5 Graphics
+9874,	CC,	AMD Radeon R7 Graphics
+9874,	CD,	AMD Radeon R7 Graphics
+9874,	CE,	AMD Radeon R5 Graphics
+9874,	E1,	AMD Radeon R7 Graphics
+9874,	E2,	AMD Radeon R7 Graphics
+9874,	E3,	AMD Radeon R7 Graphics
+9874,	E4,	AMD Radeon R7 Graphics
+9874,	E5,	AMD Radeon R5 Graphics
+9874,	E6,	AMD Radeon R5 Graphics
+98E4,	80,	AMD Radeon R5E Graphics
+98E4,	81,	AMD Radeon R4E Graphics
+98E4,	83,	AMD Radeon R2E Graphics
+98E4,	84,	AMD Radeon R2E Graphics
+98E4,	86,	AMD Radeon R1E Graphics
+98E4,	C0,	AMD Radeon R4 Graphics
+98E4,	C1,	AMD Radeon R5 Graphics
+98E4,	C2,	AMD Radeon R4 Graphics
+98E4,	C4,	AMD Radeon R5 Graphics
+98E4,	C6,	AMD Radeon R5 Graphics
+98E4,	C8,	AMD Radeon R4 Graphics
+98E4,	C9,	AMD Radeon R4 Graphics
+98E4,	CA,	AMD Radeon R5 Graphics
+98E4,	D0,	AMD Radeon R2 Graphics
+98E4,	D1,	AMD Radeon R2 Graphics
+98E4,	D2,	AMD Radeon R2 Graphics
+98E4,	D4,	AMD Radeon R2 Graphics
+98E4,	D9,	AMD Radeon R5 Graphics
+98E4,	DA,	AMD Radeon R5 Graphics
+98E4,	DB,	AMD Radeon R3 Graphics
+98E4,	E1,	AMD Radeon R3 Graphics
+98E4,	E2,	AMD Radeon R3 Graphics
+98E4,	E9,	AMD Radeon R4 Graphics
+98E4,	EA,	AMD Radeon R4 Graphics
+98E4,	EB,	AMD Radeon R3 Graphics
+98E4,	EB,	AMD Radeon R4 Graphics
--- a/agent/test-data/nvtop.json
+++ b/agent/test-data/nvtop.json
@@ -0,0 +1,34 @@
+[
+  {
+   "device_name": "NVIDIA GeForce RTX 3050 Ti Laptop GPU",
+   "gpu_clock": "1485MHz",
+   "mem_clock": "6001MHz",
+   "temp": "48C",
+   "fan_speed": null,
+   "power_draw": "13W",
+   "gpu_util": "5%",
+   "encode": "0%",
+   "decode": "0%",
+   "mem_util": "8%",
+   "mem_total": "4294967296",
+   "mem_used": "349372416",
+   "mem_free": "3945594880",
+   "processes" : []
+  },
+  {
+   "device_name": "AMD Radeon 680M",
+   "gpu_clock": "2200MHz",
+   "mem_clock": "2400MHz",
+   "temp": "48C",
+   "fan_speed": "CPU Fan",
+   "power_draw": "9W",
+   "gpu_util": "12%",
+   "encode": null,
+   "decode": "0%",
+   "mem_util": "7%",
+   "mem_total": "16929173504",
+   "mem_used": "1213784064",
+   "mem_free": "15715389440",
+   "processes" : []
+  }
+]
--- a/agent/test-data/system_info.json
+++ b/agent/test-data/system_info.json
@@ -0,0 +1,17 @@
+{
+  "ID": "7TRN:IPZB:QYBB:VPBQ:UMPP:KARE:6ZNR:XE6T:7EWV:PKF4:ZOJD:TPYS",
+  "Containers": 14,
+  "ContainersRunning": 3,
+  "ContainersPaused": 1,
+  "ContainersStopped": 10,
+  "Images": 508,
+  "Driver": "overlay2",
+  "KernelVersion": "6.8.0-31-generic",
+  "OperatingSystem": "Ubuntu 24.04 LTS",
+  "OSVersion": "24.04",
+  "OSType": "linux",
+  "Architecture": "x86_64",
+  "NCPU": 4,
+  "MemTotal": 2095882240,
+  "ServerVersion": "27.0.1"
+}
--- a/agent/update.go
+++ b/agent/update.go
@@ -1,12 +1,10 @@
 package agent

 import (
-	"fmt"
 	"log"
 	"os"
 	"os/exec"
 	"runtime"
-	"strings"

 	"github.com/henrygd/beszel/internal/ghupdate"
 )
@@ -65,9 +63,9 @@ func detectRestarter() restarter {
 	if path, err := exec.LookPath("rc-service"); err == nil {
 		return &openRCRestarter{cmd: path}
 	}
-    if path, err := exec.LookPath("procd"); err == nil {
-        return &openWRTRestarter{cmd: path}
-    }
+	if path, err := exec.LookPath("procd"); err == nil {
+		return &openWRTRestarter{cmd: path}
+	}
 	if path, err := exec.LookPath("service"); err == nil {
 		if runtime.GOOS == "freebsd" {
 			return &freeBSDRestarter{cmd: path}
@@ -81,7 +79,7 @@ func detectRestarter() restarter {
 func Update(useMirror bool) error {
 	exePath, _ := os.Executable()

-	dataDir, err := getDataDir()
+	dataDir, err := GetDataDir()
 	if err != nil {
 		dataDir = os.TempDir()
 	}
@@ -108,12 +106,12 @@ func Update(useMirror bool) error {
 		}
 	}

-	// 6) Fix SELinux context if necessary
-	if err := handleSELinuxContext(exePath); err != nil {
+	// Fix SELinux context if necessary
+	if err := ghupdate.HandleSELinuxContext(exePath); err != nil {
 		ghupdate.ColorPrintf(ghupdate.ColorYellow, "Warning: SELinux context handling: %v", err)
 	}

-	// 7) Restart service if running under a recognised init system
+	// Restart service if running under a recognised init system
 	if r := detectRestarter(); r != nil {
 		if err := r.Restart(); err != nil {
 			ghupdate.ColorPrintf(ghupdate.ColorYellow, "Warning: failed to restart service: %v", err)
@@ -127,42 +125,3 @@ func Update(useMirror bool) error {

 	return nil
 }
-
-// handleSELinuxContext restores or applies the correct SELinux label to the binary.
-func handleSELinuxContext(path string) error {
-	out, err := exec.Command("getenforce").Output()
-	if err != nil {
-		// SELinux not enabled or getenforce not available
-		return nil
-	}
-	state := strings.TrimSpace(string(out))
-	if state == "Disabled" {
-		return nil
-	}
-
-	ghupdate.ColorPrint(ghupdate.ColorYellow, "SELinux is enabled; applying context…")
-	var errs []string
-
-	// Try persistent context via semanage+restorecon
-	if semanagePath, err := exec.LookPath("semanage"); err == nil {
-		if err := exec.Command(semanagePath, "fcontext", "-a", "-t", "bin_t", path).Run(); err != nil {
-			errs = append(errs, "semanage fcontext failed: "+err.Error())
-		} else if restoreconPath, err := exec.LookPath("restorecon"); err == nil {
-			if err := exec.Command(restoreconPath, "-v", path).Run(); err != nil {
-				errs = append(errs, "restorecon failed: "+err.Error())
-			}
-		}
-	}
-
-	// Fallback to temporary context via chcon
-	if chconPath, err := exec.LookPath("chcon"); err == nil {
-		if err := exec.Command(chconPath, "-t", "bin_t", path).Run(); err != nil {
-			errs = append(errs, "chcon failed: "+err.Error())
-		}
-	}
-
-	if len(errs) > 0 {
-		return fmt.Errorf("SELinux context errors: %s", strings.Join(errs, "; "))
-	}
-	return nil
-}
--- a/agent/utils.go
+++ b/agent/utils.go
@@ -1,15 +0,0 @@
-package agent
-
-import "math"
-
-func bytesToMegabytes(b float64) float64 {
-	return twoDecimals(b / 1048576)
-}
-
-func bytesToGigabytes(b uint64) float64 {
-	return twoDecimals(float64(b) / 1073741824)
-}
-
-func twoDecimals(value float64) float64 {
-	return math.Round(value*100) / 100
-}
--- a/agent/utils/utils.go
+++ b/agent/utils/utils.go
@@ -0,0 +1,88 @@
+package utils
+
+import (
+	"io"
+	"math"
+	"os"
+	"strconv"
+	"strings"
+)
+
+// GetEnv retrieves an environment variable with a "BESZEL_AGENT_" prefix, or falls back to the unprefixed key.
+func GetEnv(key string) (value string, exists bool) {
+	if value, exists = os.LookupEnv("BESZEL_AGENT_" + key); exists {
+		return value, exists
+	}
+	return os.LookupEnv(key)
+}
+
+// BytesToMegabytes converts bytes to megabytes and rounds to two decimal places.
+func BytesToMegabytes(b float64) float64 {
+	return TwoDecimals(b / 1048576)
+}
+
+// BytesToGigabytes converts bytes to gigabytes and rounds to two decimal places.
+func BytesToGigabytes(b uint64) float64 {
+	return TwoDecimals(float64(b) / 1073741824)
+}
+
+// TwoDecimals rounds a float64 value to two decimal places.
+func TwoDecimals(value float64) float64 {
+	return math.Round(value*100) / 100
+}
+
+// func RoundFloat(val float64, precision uint) float64 {
+//     ratio := math.Pow(10, float64(precision))
+//     return math.Round(val*ratio) / ratio
+// }
+
+// ReadStringFile returns trimmed file contents or empty string on error.
+func ReadStringFile(path string) string {
+	content, _ := ReadStringFileOK(path)
+	return content
+}
+
+// ReadStringFileOK returns trimmed file contents and read success.
+func ReadStringFileOK(path string) (string, bool) {
+	b, err := os.ReadFile(path)
+	if err != nil {
+		return "", false
+	}
+	return strings.TrimSpace(string(b)), true
+}
+
+// ReadStringFileLimited reads a file into a string with a maximum size (in bytes) to avoid
+// allocating large buffers and potential panics with pseudo-files when the size is misreported.
+func ReadStringFileLimited(path string, maxSize int) (string, error) {
+	f, err := os.Open(path)
+	if err != nil {
+		return "", err
+	}
+	defer f.Close()
+
+	buf := make([]byte, maxSize)
+	n, err := f.Read(buf)
+	if err != nil && err != io.EOF {
+		return "", err
+	}
+	return strings.TrimSpace(string(buf[:n])), nil
+}
+
+// FileExists reports whether the given path exists.
+func FileExists(path string) bool {
+	_, err := os.Stat(path)
+	return err == nil
+}
+
+// ReadUintFile parses a decimal uint64 value from a file.
+func ReadUintFile(path string) (uint64, bool) {
+	raw, ok := ReadStringFileOK(path)
+	if !ok {
+		return 0, false
+	}
+	parsed, err := strconv.ParseUint(raw, 10, 64)
+	if err != nil {
+		return 0, false
+	}
+	return parsed, true
+}
--- a/agent/utils/utils_test.go
+++ b/agent/utils/utils_test.go
@@ -0,0 +1,158 @@
+package utils
+
+import (
+	"os"
+	"path/filepath"
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+)
+
+func TestTwoDecimals(t *testing.T) {
+	tests := []struct {
+		name     string
+		input    float64
+		expected float64
+	}{
+		{"round down", 1.234, 1.23},
+		{"round half up", 1.235, 1.24}, // math.Round rounds half up
+		{"no rounding needed", 1.23, 1.23},
+		{"negative number", -1.235, -1.24}, // math.Round rounds half up (more negative)
+		{"zero", 0.0, 0.0},
+		{"large number", 123.456, 123.46}, // rounds 5 up
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			result := TwoDecimals(tt.input)
+			assert.Equal(t, tt.expected, result)
+		})
+	}
+}
+
+func TestBytesToMegabytes(t *testing.T) {
+	tests := []struct {
+		name     string
+		input    float64
+		expected float64
+	}{
+		{"1 MB", 1048576, 1.0},
+		{"512 KB", 524288, 0.5},
+		{"zero", 0, 0},
+		{"large value", 1073741824, 1024}, // 1 GB = 1024 MB
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			result := BytesToMegabytes(tt.input)
+			assert.Equal(t, tt.expected, result)
+		})
+	}
+}
+
+func TestBytesToGigabytes(t *testing.T) {
+	tests := []struct {
+		name     string
+		input    uint64
+		expected float64
+	}{
+		{"1 GB", 1073741824, 1.0},
+		{"512 MB", 536870912, 0.5},
+		{"0 GB", 0, 0},
+		{"2 GB", 2147483648, 2.0},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			result := BytesToGigabytes(tt.input)
+			assert.Equal(t, tt.expected, result)
+		})
+	}
+}
+
+func TestFileFunctions(t *testing.T) {
+	tmpDir := t.TempDir()
+	testFilePath := filepath.Join(tmpDir, "test.txt")
+	testContent := "hello world"
+
+	// Test FileExists (false)
+	assert.False(t, FileExists(testFilePath))
+
+	// Test ReadStringFileOK (false)
+	content, ok := ReadStringFileOK(testFilePath)
+	assert.False(t, ok)
+	assert.Empty(t, content)
+
+	// Test ReadStringFile (empty)
+	assert.Empty(t, ReadStringFile(testFilePath))
+
+	// Write file
+	err := os.WriteFile(testFilePath, []byte(testContent+"\n "), 0644)
+	assert.NoError(t, err)
+
+	// Test FileExists (true)
+	assert.True(t, FileExists(testFilePath))
+
+	// Test ReadStringFileOK (true)
+	content, ok = ReadStringFileOK(testFilePath)
+	assert.True(t, ok)
+	assert.Equal(t, testContent, content)
+
+	// Test ReadStringFile (content)
+	assert.Equal(t, testContent, ReadStringFile(testFilePath))
+}
+
+func TestReadUintFile(t *testing.T) {
+	tmpDir := t.TempDir()
+
+	t.Run("valid uint", func(t *testing.T) {
+		path := filepath.Join(tmpDir, "uint.txt")
+		os.WriteFile(path, []byte(" 12345\n"), 0644)
+		val, ok := ReadUintFile(path)
+		assert.True(t, ok)
+		assert.Equal(t, uint64(12345), val)
+	})
+
+	t.Run("invalid uint", func(t *testing.T) {
+		path := filepath.Join(tmpDir, "invalid.txt")
+		os.WriteFile(path, []byte("abc"), 0644)
+		val, ok := ReadUintFile(path)
+		assert.False(t, ok)
+		assert.Equal(t, uint64(0), val)
+	})
+
+	t.Run("missing file", func(t *testing.T) {
+		path := filepath.Join(tmpDir, "missing.txt")
+		val, ok := ReadUintFile(path)
+		assert.False(t, ok)
+		assert.Equal(t, uint64(0), val)
+	})
+}
+
+func TestGetEnv(t *testing.T) {
+	key := "TEST_VAR"
+	prefixedKey := "BESZEL_AGENT_" + key
+
+	t.Run("prefixed variable exists", func(t *testing.T) {
+		t.Setenv(prefixedKey, "prefixed_val")
+		t.Setenv(key, "unprefixed_val")
+
+		val, exists := GetEnv(key)
+		assert.True(t, exists)
+		assert.Equal(t, "prefixed_val", val)
+	})
+
+	t.Run("only unprefixed variable exists", func(t *testing.T) {
+		t.Setenv(key, "unprefixed_val")
+
+		val, exists := GetEnv(key)
+		assert.True(t, exists)
+		assert.Equal(t, "unprefixed_val", val)
+	})
+
+	t.Run("neither variable exists", func(t *testing.T) {
+		val, exists := GetEnv(key)
+		assert.False(t, exists)
+		assert.Empty(t, val)
+	})
+}
--- a/agent/zfs/zfs_freebsd.go
+++ b/agent/zfs/zfs_freebsd.go
@@ -0,0 +1,11 @@
+//go:build freebsd
+
+package zfs
+
+import (
+	"golang.org/x/sys/unix"
+)
+
+func ARCSize() (uint64, error) {
+	return unix.SysctlUint64("kstat.zfs.misc.arcstats.size")
+}
--- a/agent/zfs/zfs_linux.go
+++ b/agent/zfs/zfs_linux.go
@@ -0,0 +1,34 @@
+//go:build linux
+
+// Package zfs provides functions to read ZFS statistics.
+package zfs
+
+import (
+	"bufio"
+	"fmt"
+	"os"
+	"strconv"
+	"strings"
+)
+
+func ARCSize() (uint64, error) {
+	file, err := os.Open("/proc/spl/kstat/zfs/arcstats")
+	if err != nil {
+		return 0, err
+	}
+	defer file.Close()
+
+	scanner := bufio.NewScanner(file)
+	for scanner.Scan() {
+		line := scanner.Text()
+		if strings.HasPrefix(line, "size") {
+			fields := strings.Fields(line)
+			if len(fields) < 3 {
+				return 0, fmt.Errorf("unexpected arcstats size format: %s", line)
+			}
+			return strconv.ParseUint(fields[2], 10, 64)
+		}
+	}
+
+	return 0, fmt.Errorf("size field not found in arcstats")
+}
--- a/agent/zfs/zfs_unsupported.go
+++ b/agent/zfs/zfs_unsupported.go
@@ -0,0 +1,9 @@
+//go:build !linux && !freebsd
+
+package zfs
+
+import "errors"
+
+func ARCSize() (uint64, error) {
+	return 0, errors.ErrUnsupported
+}
--- a/beszel.go
+++ b/beszel.go
@@ -6,7 +6,7 @@ import "github.com/blang/semver"

 const (
 	// Version is the current version of the application.
-	Version = "0.16.1"
+	Version = "0.18.4"
 	// AppName is the name of the application.
 	AppName = "beszel"
 )
--- a/go.mod
+++ b/go.mod
@@ -1,25 +1,27 @@
 module github.com/henrygd/beszel

-go 1.25.3
+go 1.26.1

 require (
 	github.com/blang/semver v3.5.1+incompatible
-	github.com/coreos/go-systemd/v22 v22.6.0
+	github.com/coreos/go-systemd/v22 v22.7.0
 	github.com/distatus/battery v0.11.0
+	github.com/ebitengine/purego v0.9.1
 	github.com/fxamacker/cbor/v2 v2.9.0
 	github.com/gliderlabs/ssh v0.3.8
 	github.com/google/uuid v1.6.0
 	github.com/lxzan/gws v1.8.9
-	github.com/nicholas-fedor/shoutrrr v0.12.0
-	github.com/pocketbase/dbx v1.11.0
-	github.com/pocketbase/pocketbase v0.33.0
-	github.com/shirou/gopsutil/v4 v4.25.10
+	github.com/nicholas-fedor/shoutrrr v0.13.2
+	github.com/pocketbase/dbx v1.12.0
+	github.com/pocketbase/pocketbase v0.36.4
+	github.com/shirou/gopsutil/v4 v4.26.1
 	github.com/spf13/cast v1.10.0
-	github.com/spf13/cobra v1.10.1
+	github.com/spf13/cobra v1.10.2
 	github.com/spf13/pflag v1.0.10
 	github.com/stretchr/testify v1.11.1
-	golang.org/x/crypto v0.44.0
-	golang.org/x/exp v0.0.0-20251113190631-e25ba8c21ef6
+	golang.org/x/crypto v0.48.0
+	golang.org/x/exp v0.0.0-20260218203240-3dfff04db8fa
+	golang.org/x/sys v0.41.0
 	gopkg.in/yaml.v3 v3.0.1
 )

@@ -31,18 +33,17 @@ require (
 	github.com/dolthub/maphash v0.1.0 // indirect
 	github.com/domodwyer/mailyak/v3 v3.6.2 // indirect
 	github.com/dustin/go-humanize v1.0.1 // indirect
-	github.com/ebitengine/purego v0.9.1 // indirect
 	github.com/fatih/color v1.18.0 // indirect
-	github.com/gabriel-vasile/mimetype v1.4.11 // indirect
+	github.com/gabriel-vasile/mimetype v1.4.13 // indirect
 	github.com/ganigeorgiev/fexpr v0.5.0 // indirect
 	github.com/go-ole/go-ole v1.3.0 // indirect
 	github.com/go-ozzo/ozzo-validation/v4 v4.3.0 // indirect
 	github.com/go-sql-driver/mysql v1.9.1 // indirect
-	github.com/godbus/dbus/v5 v5.2.0 // indirect
-	github.com/golang-jwt/jwt/v5 v5.3.0 // indirect
+	github.com/godbus/dbus/v5 v5.2.2 // indirect
+	github.com/golang-jwt/jwt/v5 v5.3.1 // indirect
 	github.com/inconshreveable/mousetrap v1.1.0 // indirect
-	github.com/klauspost/compress v1.18.1 // indirect
-	github.com/lufia/plan9stats v0.0.0-20251013123823-9fd1530e3ec3 // indirect
+	github.com/klauspost/compress v1.18.4 // indirect
+	github.com/lufia/plan9stats v0.0.0-20260216142805-b3301c5f2a88 // indirect
 	github.com/mattn/go-colorable v0.1.14 // indirect
 	github.com/mattn/go-isatty v0.0.20 // indirect
 	github.com/ncruces/go-strftime v1.0.0 // indirect
@@ -53,16 +54,15 @@ require (
 	github.com/tklauser/numcpus v0.11.0 // indirect
 	github.com/x448/float16 v0.8.4 // indirect
 	github.com/yusufpapurcu/wmi v1.2.4 // indirect
-	golang.org/x/image v0.33.0 // indirect
-	golang.org/x/net v0.47.0 // indirect
-	golang.org/x/oauth2 v0.33.0 // indirect
-	golang.org/x/sync v0.18.0 // indirect
-	golang.org/x/sys v0.38.0 // indirect
-	golang.org/x/term v0.37.0 // indirect
-	golang.org/x/text v0.31.0 // indirect
+	golang.org/x/image v0.36.0 // indirect
+	golang.org/x/net v0.50.0 // indirect
+	golang.org/x/oauth2 v0.35.0 // indirect
+	golang.org/x/sync v0.19.0 // indirect
+	golang.org/x/term v0.40.0 // indirect
+	golang.org/x/text v0.34.0 // indirect
 	howett.net/plist v1.0.1 // indirect
-	modernc.org/libc v1.66.10 // indirect
+	modernc.org/libc v1.67.6 // indirect
 	modernc.org/mathutil v1.7.1 // indirect
 	modernc.org/memory v1.11.0 // indirect
-	modernc.org/sqlite v1.40.0 // indirect
+	modernc.org/sqlite v1.45.0 // indirect
 )
--- a/go.sum
+++ b/go.sum
@@ -9,8 +9,8 @@ github.com/asaskevich/govalidator v0.0.0-20230301143203-a9d515a09cc2 h1:DklsrG3d
 github.com/asaskevich/govalidator v0.0.0-20230301143203-a9d515a09cc2/go.mod h1:WaHUgvxTVq04UNunO+XhnAqY/wQc+bxr74GqbsZ/Jqw=
 github.com/blang/semver v3.5.1+incompatible h1:cQNTCjp13qL8KC3Nbxr/y2Bqb63oX6wdnnjpJbkM4JQ=
 github.com/blang/semver v3.5.1+incompatible/go.mod h1:kRBLl5iJ+tD4TcOOxsy/0fnwebNt5EWlYSAyrTnjyyk=
-github.com/coreos/go-systemd/v22 v22.6.0 h1:aGVa/v8B7hpb0TKl0MWoAavPDmHvobFe5R5zn0bCJWo=
-github.com/coreos/go-systemd/v22 v22.6.0/go.mod h1:iG+pp635Fo7ZmV/j14KUcmEyWF+0X7Lua8rrTWzYgWU=
+github.com/coreos/go-systemd/v22 v22.7.0 h1:LAEzFkke61DFROc7zNLX/WA2i5J8gYqe0rSj9KI28KA=
+github.com/coreos/go-systemd/v22 v22.7.0/go.mod h1:xNUYtjHu2EDXbsxz1i41wouACIwT7Ybq9o0BQhMwD0w=
 github.com/cpuguy83/go-md2man/v2 v2.0.6/go.mod h1:oOW0eioCTA6cOiMLiUPZOpcVxMig6NIQQ7OS05n1F4g=
 github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
 github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM=
@@ -33,8 +33,8 @@ github.com/frankban/quicktest v1.14.6 h1:7Xjx+VpznH+oBnejlPUj8oUpdxnVs4f8XU8WnHk
 github.com/frankban/quicktest v1.14.6/go.mod h1:4ptaffx2x8+WTWXmUCuVU6aPUX1/Mz7zb5vbUoiM6w0=
 github.com/fxamacker/cbor/v2 v2.9.0 h1:NpKPmjDBgUfBms6tr6JZkTHtfFGcMKsw3eGcmD/sapM=
 github.com/fxamacker/cbor/v2 v2.9.0/go.mod h1:vM4b+DJCtHn+zz7h3FFp/hDAI9WNWCsZj23V5ytsSxQ=
-github.com/gabriel-vasile/mimetype v1.4.11 h1:AQvxbp830wPhHTqc1u7nzoLT+ZFxGY7emj5DR5DYFik=
-github.com/gabriel-vasile/mimetype v1.4.11/go.mod h1:d+9Oxyo1wTzWdyVUPMmXFvp4F9tea18J8ufA774AB3s=
+github.com/gabriel-vasile/mimetype v1.4.13 h1:46nXokslUBsAJE/wMsp5gtO500a4F3Nkz9Ufpk2AcUM=
+github.com/gabriel-vasile/mimetype v1.4.13/go.mod h1:d+9Oxyo1wTzWdyVUPMmXFvp4F9tea18J8ufA774AB3s=
 github.com/ganigeorgiev/fexpr v0.5.0 h1:XA9JxtTE/Xm+g/JFI6RfZEHSiQlk+1glLvRK1Lpv/Tk=
 github.com/ganigeorgiev/fexpr v0.5.0/go.mod h1:RyGiGqmeXhEQ6+mlGdnUleLHgtzzu/VGO2WtJkF5drE=
 github.com/gliderlabs/ssh v0.3.8 h1:a4YXD1V7xMF9g5nTkdfnja3Sxy1PVDCj1Zg4Wb8vY6c=
@@ -51,15 +51,15 @@ github.com/go-sql-driver/mysql v1.9.1 h1:FrjNGn/BsJQjVRuSa8CBrM5BWA9BWoXXat3KrtS
 github.com/go-sql-driver/mysql v1.9.1/go.mod h1:qn46aNg1333BRMNU69Lq93t8du/dwxI64Gl8i5p1WMU=
 github.com/go-task/slim-sprig/v3 v3.0.0 h1:sUs3vkvUymDpBKi3qH1YSqBQk9+9D/8M2mN1vB6EwHI=
 github.com/go-task/slim-sprig/v3 v3.0.0/go.mod h1:W848ghGpv3Qj3dhTPRyJypKRiqCdHZiAzKg9hl15HA8=
-github.com/godbus/dbus/v5 v5.2.0 h1:3WexO+U+yg9T70v9FdHr9kCxYlazaAXUhx2VMkbfax8=
-github.com/godbus/dbus/v5 v5.2.0/go.mod h1:3AAv2+hPq5rdnr5txxxRwiGjPXamgoIHgz9FPBfOp3c=
-github.com/golang-jwt/jwt/v5 v5.3.0 h1:pv4AsKCKKZuqlgs5sUmn4x8UlGa0kEVt/puTpKx9vvo=
-github.com/golang-jwt/jwt/v5 v5.3.0/go.mod h1:fxCRLWMO43lRc8nhHWY6LGqRcf+1gQWArsqaEUEa5bE=
+github.com/godbus/dbus/v5 v5.2.2 h1:TUR3TgtSVDmjiXOgAAyaZbYmIeP3DPkld3jgKGV8mXQ=
+github.com/godbus/dbus/v5 v5.2.2/go.mod h1:3AAv2+hPq5rdnr5txxxRwiGjPXamgoIHgz9FPBfOp3c=
+github.com/golang-jwt/jwt/v5 v5.3.1 h1:kYf81DTWFe7t+1VvL7eS+jKFVWaUnK9cB1qbwn63YCY=
+github.com/golang-jwt/jwt/v5 v5.3.1/go.mod h1:fxCRLWMO43lRc8nhHWY6LGqRcf+1gQWArsqaEUEa5bE=
 github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
 github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8=
 github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU=
-github.com/google/pprof v0.0.0-20251007162407-5df77e3f7d1d h1:KJIErDwbSHjnp/SGzE5ed8Aol7JsKiI5X7yWKAtzhM0=
-github.com/google/pprof v0.0.0-20251007162407-5df77e3f7d1d/go.mod h1:I6V7YzU0XDpsHqbsyrghnFZLO1gwK6NPTNvmetQIk9U=
+github.com/google/pprof v0.0.0-20260115054156-294ebfa9ad83 h1:z2ogiKUYzX5Is6zr/vP9vJGqPwcdqsWjOt+V8J7+bTc=
+github.com/google/pprof v0.0.0-20260115054156-294ebfa9ad83/go.mod h1:MxpfABSjhmINe3F1It9d+8exIHFvUqtLIRCdOGNXqiI=
 github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
 github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
 github.com/hashicorp/golang-lru/v2 v2.0.7 h1:a+bsQ5rvGLjzHuww6tVxozPZFVghXaHOwFs4luLUK2k=
@@ -69,14 +69,14 @@ github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLf
 github.com/jarcoal/httpmock v1.4.1 h1:0Ju+VCFuARfFlhVXFc2HxlcQkfB+Xq12/EotHko+x2A=
 github.com/jarcoal/httpmock v1.4.1/go.mod h1:ftW1xULwo+j0R0JJkJIIi7UKigZUXCLLanykgjwBXL0=
 github.com/jessevdk/go-flags v1.4.0/go.mod h1:4FA24M0QyGHXBuZZK/XkWh8h0e1EYbRYJSGM75WSRxI=
-github.com/klauspost/compress v1.18.1 h1:bcSGx7UbpBqMChDtsF28Lw6v/G94LPrrbMbdC3JH2co=
-github.com/klauspost/compress v1.18.1/go.mod h1:ZQFFVG+MdnR0P+l6wpXgIL4NTtwiKIdBnrBd8Nrxr+0=
+github.com/klauspost/compress v1.18.4 h1:RPhnKRAQ4Fh8zU2FY/6ZFDwTVTxgJ/EMydqSTzE9a2c=
+github.com/klauspost/compress v1.18.4/go.mod h1:R0h/fSBs8DE4ENlcrlib3PsXS61voFxhIs2DeRhCvJ4=
 github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
 github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk=
 github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
 github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
-github.com/lufia/plan9stats v0.0.0-20251013123823-9fd1530e3ec3 h1:PwQumkgq4/acIiZhtifTV5OUqqiP82UAl0h87xj/l9k=
-github.com/lufia/plan9stats v0.0.0-20251013123823-9fd1530e3ec3/go.mod h1:autxFIvghDt3jPTLoqZ9OZ7s9qTGNAWmYCjVFWPX/zg=
+github.com/lufia/plan9stats v0.0.0-20260216142805-b3301c5f2a88 h1:PTw+yKnXcOFCR6+8hHTyWBeQ/P4Nb7dd4/0ohEcWQuM=
+github.com/lufia/plan9stats v0.0.0-20260216142805-b3301c5f2a88/go.mod h1:autxFIvghDt3jPTLoqZ9OZ7s9qTGNAWmYCjVFWPX/zg=
 github.com/lxzan/gws v1.8.9 h1:VU3SGUeWlQrEwfUSfokcZep8mdg/BrUF+y73YYshdBM=
 github.com/lxzan/gws v1.8.9/go.mod h1:d9yHaR1eDTBHagQC6KY7ycUOaz5KWeqQtP3xu7aMK8Y=
 github.com/mattn/go-colorable v0.1.14 h1:9A9LHSqF/7dyVVX6g0U9cwm9pG3kP9gSzcuIPHPsaIE=
@@ -85,19 +85,19 @@ github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWE
 github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
 github.com/ncruces/go-strftime v1.0.0 h1:HMFp8mLCTPp341M/ZnA4qaf7ZlsbTc+miZjCLOFAw7w=
 github.com/ncruces/go-strftime v1.0.0/go.mod h1:Fwc5htZGVVkseilnfgOVb9mKy6w1naJmn9CehxcKcls=
-github.com/nicholas-fedor/shoutrrr v0.12.0 h1:8mwJdfU+uBEybSymwQJMGl/grG7lvVUKbVSNxn3XvUI=
-github.com/nicholas-fedor/shoutrrr v0.12.0/go.mod h1:WYiRalR4C43Qmd2zhPWGIFIxu633NB1hDM6Ap/DQcsA=
-github.com/onsi/ginkgo/v2 v2.27.2 h1:LzwLj0b89qtIy6SSASkzlNvX6WktqurSHwkk2ipF/Ns=
-github.com/onsi/ginkgo/v2 v2.27.2/go.mod h1:ArE1D/XhNXBXCBkKOLkbsb2c81dQHCRcF5zwn/ykDRo=
-github.com/onsi/gomega v1.38.2 h1:eZCjf2xjZAqe+LeWvKb5weQ+NcPwX84kqJ0cZNxok2A=
-github.com/onsi/gomega v1.38.2/go.mod h1:W2MJcYxRGV63b418Ai34Ud0hEdTVXq9NW9+Sx6uXf3k=
+github.com/nicholas-fedor/shoutrrr v0.13.2 h1:hfsYBIqSFYGg92pZP5CXk/g7/OJIkLYmiUnRl+AD1IA=
+github.com/nicholas-fedor/shoutrrr v0.13.2/go.mod h1:ZqzV3gY/Wj6AvWs1etlO7+yKbh4iptSbeL8avBpMQbA=
+github.com/onsi/ginkgo/v2 v2.28.1 h1:S4hj+HbZp40fNKuLUQOYLDgZLwNUVn19N3Atb98NCyI=
+github.com/onsi/ginkgo/v2 v2.28.1/go.mod h1:CLtbVInNckU3/+gC8LzkGUb9oF+e8W8TdUsxPwvdOgE=
+github.com/onsi/gomega v1.39.1 h1:1IJLAad4zjPn2PsnhH70V4DKRFlrCzGBNrNaru+Vf28=
+github.com/onsi/gomega v1.39.1/go.mod h1:hL6yVALoTOxeWudERyfppUcZXjMwIMLnuSfruD2lcfg=
 github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
 github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRIccs7FGNTlIRMkT8wgtp5eCXdBlqhYGL6U=
 github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
-github.com/pocketbase/dbx v1.11.0 h1:LpZezioMfT3K4tLrqA55wWFw1EtH1pM4tzSVa7kgszU=
-github.com/pocketbase/dbx v1.11.0/go.mod h1:xXRCIAKTHMgUCyCKZm55pUOdvFziJjQfXaWKhu2vhMs=
-github.com/pocketbase/pocketbase v0.33.0 h1:v2EfiY3hxigzRJ/BwFuwVn0vUv7d2QQoD5zUFPaKR9o=
-github.com/pocketbase/pocketbase v0.33.0/go.mod h1:9BEs+CRV7CrS+X5LfBh4bdJQsbzQAIklft3ovGe/c5A=
+github.com/pocketbase/dbx v1.12.0 h1:/oLErM+A0b4xI0PWTGPqSDVjzix48PqI/bng2l0PzoA=
+github.com/pocketbase/dbx v1.12.0/go.mod h1:xXRCIAKTHMgUCyCKZm55pUOdvFziJjQfXaWKhu2vhMs=
+github.com/pocketbase/pocketbase v0.36.4 h1:zTjRZbp2WfTOJJfb+pFRWa200UaQwxZYt8RzkFMlAZ4=
+github.com/pocketbase/pocketbase v0.36.4/go.mod h1:9CiezhRudd9FZGa5xZa53QZBTNxc5vvw/FGG+diAECI=
 github.com/power-devops/perfstat v0.0.0-20240221224432-82ca36839d55 h1:o4JXh1EVt9k/+g42oCprj/FisM4qX9L3sZB3upGN2ZU=
 github.com/power-devops/perfstat v0.0.0-20240221224432-82ca36839d55/go.mod h1:OmDBASR4679mdNQnz2pUhc2G8CO2JrUAVFDRBDP/hJE=
 github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec h1:W09IVJc94icq4NjY3clb7Lk8O1qJ8BdBEF8z0ibU0rE=
@@ -105,12 +105,12 @@ github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec/go.mod h1:qq
 github.com/rogpeppe/go-internal v1.9.0 h1:73kH8U+JUqXU8lRuOHeVHaa/SZPifC7BkcraZVejAe8=
 github.com/rogpeppe/go-internal v1.9.0/go.mod h1:WtVeX8xhTBvf0smdhujwtBcq4Qrzq/fJaraNFVN+nFs=
 github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
-github.com/shirou/gopsutil/v4 v4.25.10 h1:at8lk/5T1OgtuCp+AwrDofFRjnvosn0nkN2OLQ6g8tA=
-github.com/shirou/gopsutil/v4 v4.25.10/go.mod h1:+kSwyC8DRUD9XXEHCAFjK+0nuArFJM0lva+StQAcskM=
+github.com/shirou/gopsutil/v4 v4.26.1 h1:TOkEyriIXk2HX9d4isZJtbjXbEjf5qyKPAzbzY0JWSo=
+github.com/shirou/gopsutil/v4 v4.26.1/go.mod h1:medLI9/UNAb0dOI9Q3/7yWSqKkj00u+1tgY8nvv41pc=
 github.com/spf13/cast v1.10.0 h1:h2x0u2shc1QuLHfxi+cTJvs30+ZAHOGRic8uyGTDWxY=
 github.com/spf13/cast v1.10.0/go.mod h1:jNfB8QC9IA6ZuY2ZjDp0KtFO2LZZlg4S/7bzP6qqeHo=
-github.com/spf13/cobra v1.10.1 h1:lJeBwCfmrnXthfAupyUTzJ/J4Nc1RsHC/mSRU2dll/s=
-github.com/spf13/cobra v1.10.1/go.mod h1:7SmJGaTHFVBY0jW4NXGluQoLvhqFQM+6XSKD+P4XaB0=
+github.com/spf13/cobra v1.10.2 h1:DMTTonx5m65Ic0GOoRY2c16WCbHxOOw6xxezuLaBpcU=
+github.com/spf13/cobra v1.10.2/go.mod h1:7C1pvHqHw5A4vrJfjNwvOdzYu0Gml16OCs2GRiTUUS4=
 github.com/spf13/pflag v1.0.9/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
 github.com/spf13/pflag v1.0.10 h1:4EBh2KAYBwaONj6b2Ye1GiHfwjqyROoF4RwYO+vPwFk=
 github.com/spf13/pflag v1.0.10/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
@@ -129,41 +129,41 @@ github.com/yusufpapurcu/wmi v1.2.4/go.mod h1:SBZ9tNy3G9/m5Oi98Zks0QjeHVDvuK0qfxQ
 go.yaml.in/yaml/v3 v3.0.4 h1:tfq32ie2Jv2UxXFdLJdh3jXuOzWiL1fo0bu/FbuKpbc=
 go.yaml.in/yaml/v3 v3.0.4/go.mod h1:DhzuOOF2ATzADvBadXxruRBLzYTpT36CKvDb3+aBEFg=
 golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
-golang.org/x/crypto v0.44.0 h1:A97SsFvM3AIwEEmTBiaxPPTYpDC47w720rdiiUvgoAU=
-golang.org/x/crypto v0.44.0/go.mod h1:013i+Nw79BMiQiMsOPcVCB5ZIJbYkerPrGnOa00tvmc=
-golang.org/x/exp v0.0.0-20251113190631-e25ba8c21ef6 h1:zfMcR1Cs4KNuomFFgGefv5N0czO2XZpUbxGUy8i8ug0=
-golang.org/x/exp v0.0.0-20251113190631-e25ba8c21ef6/go.mod h1:46edojNIoXTNOhySWIWdix628clX9ODXwPsQuG6hsK0=
+golang.org/x/crypto v0.48.0 h1:/VRzVqiRSggnhY7gNRxPauEQ5Drw9haKdM0jqfcCFts=
+golang.org/x/crypto v0.48.0/go.mod h1:r0kV5h3qnFPlQnBSrULhlsRfryS2pmewsg+XfMgkVos=
+golang.org/x/exp v0.0.0-20260218203240-3dfff04db8fa h1:Zt3DZoOFFYkKhDT3v7Lm9FDMEV06GpzjG2jrqW+QTE0=
+golang.org/x/exp v0.0.0-20260218203240-3dfff04db8fa/go.mod h1:K79w1Vqn7PoiZn+TkNpx3BUWUQksGO3JcVX6qIjytmA=
 golang.org/x/image v0.0.0-20191009234506-e7c1f5e7dbb8/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0=
-golang.org/x/image v0.33.0 h1:LXRZRnv1+zGd5XBUVRFmYEphyyKJjQjCRiOuAP3sZfQ=
-golang.org/x/image v0.33.0/go.mod h1:DD3OsTYT9chzuzTQt+zMcOlBHgfoKQb1gry8p76Y1sc=
-golang.org/x/mod v0.30.0 h1:fDEXFVZ/fmCKProc/yAXXUijritrDzahmwwefnjoPFk=
-golang.org/x/mod v0.30.0/go.mod h1:lAsf5O2EvJeSFMiBxXDki7sCgAxEUcZHXoXMKT4GJKc=
+golang.org/x/image v0.36.0 h1:Iknbfm1afbgtwPTmHnS2gTM/6PPZfH+z2EFuOkSbqwc=
+golang.org/x/image v0.36.0/go.mod h1:YsWD2TyyGKiIX1kZlu9QfKIsQ4nAAK9bdgdrIsE7xy4=
+golang.org/x/mod v0.33.0 h1:tHFzIWbBifEmbwtGz65eaWyGiGZatSrT9prnU8DbVL8=
+golang.org/x/mod v0.33.0/go.mod h1:swjeQEj+6r7fODbD2cqrnje9PnziFuw4bmLbBZFrQ5w=
 golang.org/x/net v0.0.0-20190603091049-60506f45cf65/go.mod h1:HSz+uSET+XFnRR8LxR5pz3Of3rY3CfYBVs4xY44aLks=
-golang.org/x/net v0.47.0 h1:Mx+4dIFzqraBXUugkia1OOvlD6LemFo1ALMHjrXDOhY=
-golang.org/x/net v0.47.0/go.mod h1:/jNxtkgq5yWUGYkaZGqo27cfGZ1c5Nen03aYrrKpVRU=
-golang.org/x/oauth2 v0.33.0 h1:4Q+qn+E5z8gPRJfmRy7C2gGG3T4jIprK6aSYgTXGRpo=
-golang.org/x/oauth2 v0.33.0/go.mod h1:lzm5WQJQwKZ3nwavOZ3IS5Aulzxi68dUSgRHujetwEA=
-golang.org/x/sync v0.18.0 h1:kr88TuHDroi+UVf+0hZnirlk8o8T+4MrK6mr60WkH/I=
-golang.org/x/sync v0.18.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI=
+golang.org/x/net v0.50.0 h1:ucWh9eiCGyDR3vtzso0WMQinm2Dnt8cFMuQa9K33J60=
+golang.org/x/net v0.50.0/go.mod h1:UgoSli3F/pBgdJBHCTc+tp3gmrU4XswgGRgtnwWTfyM=
+golang.org/x/oauth2 v0.35.0 h1:Mv2mzuHuZuY2+bkyWXIHMfhNdJAdwW3FuWeCPYN5GVQ=
+golang.org/x/oauth2 v0.35.0/go.mod h1:lzm5WQJQwKZ3nwavOZ3IS5Aulzxi68dUSgRHujetwEA=
+golang.org/x/sync v0.19.0 h1:vV+1eWNmZ5geRlYjzm2adRgW2/mcpevXNg50YZtPCE4=
+golang.org/x/sync v0.19.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI=
 golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
 golang.org/x/sys v0.0.0-20190916202348-b4ddaad3f8a3/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20201204225414-ed752295db88/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.1.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
-golang.org/x/sys v0.38.0 h1:3yZWxaJjBmCWXqhN1qh02AkOnCQ1poK6oF+a7xWL6Gc=
-golang.org/x/sys v0.38.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks=
-golang.org/x/term v0.37.0 h1:8EGAD0qCmHYZg6J17DvsMy9/wJ7/D/4pV/wfnld5lTU=
-golang.org/x/term v0.37.0/go.mod h1:5pB4lxRNYYVZuTLmy8oR2BH8dflOR+IbTYFD8fi3254=
+golang.org/x/sys v0.41.0 h1:Ivj+2Cp/ylzLiEU89QhWblYnOE9zerudt9Ftecq2C6k=
+golang.org/x/sys v0.41.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks=
+golang.org/x/term v0.40.0 h1:36e4zGLqU4yhjlmxEaagx2KuYbJq3EwY8K943ZsHcvg=
+golang.org/x/term v0.40.0/go.mod h1:w2P8uVp06p2iyKKuvXIm7N/y0UCRt3UfJTfZ7oOpglM=
 golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
 golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk=
-golang.org/x/text v0.31.0 h1:aC8ghyu4JhP8VojJ2lEHBnochRno1sgL6nEi9WGFGMM=
-golang.org/x/text v0.31.0/go.mod h1:tKRAlv61yKIjGGHX/4tP1LTbc13YSec1pxVEWXzfoeM=
+golang.org/x/text v0.34.0 h1:oL/Qq0Kdaqxa1KbNeMKwQq0reLCCaFtqu2eNuSeNHbk=
+golang.org/x/text v0.34.0/go.mod h1:homfLqTYRFyVYemLBFl5GgL/DWEiH5wcsQ5gSh1yziA=
 golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
-golang.org/x/tools v0.39.0 h1:ik4ho21kwuQln40uelmciQPp9SipgNDdrafrYA4TmQQ=
-golang.org/x/tools v0.39.0/go.mod h1:JnefbkDPyD8UU2kI5fuf8ZX4/yUeh9W877ZeBONxUqQ=
+golang.org/x/tools v0.42.0 h1:uNgphsn75Tdz5Ji2q36v/nsFSfR/9BRFvqhGBaJGd5k=
+golang.org/x/tools v0.42.0/go.mod h1:Ma6lCIwGZvHK6XtgbswSoWroEkhugApmsXyrUmBhfr0=
 google.golang.org/appengine v1.6.5/go.mod h1:8WjMMxjGQR8xUklV/ARdw2HLXBOI7O7uCIDZVag1xfc=
-google.golang.org/protobuf v1.36.10 h1:AYd7cD/uASjIL6Q9LiTjz8JLcrh/88q5UObnmY3aOOE=
-google.golang.org/protobuf v1.36.10/go.mod h1:HTf+CrKn2C3g5S8VImy6tdcUvCska2kB7j23XfzDpco=
+google.golang.org/protobuf v1.36.11 h1:fV6ZwhNocDyBLK0dj+fg8ektcVegBBuEolpbTQyBNVE=
+google.golang.org/protobuf v1.36.11/go.mod h1:HTf+CrKn2C3g5S8VImy6tdcUvCska2kB7j23XfzDpco=
 gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
 gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
 gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q=
@@ -185,10 +185,8 @@ modernc.org/gc/v3 v3.1.1 h1:k8T3gkXWY9sEiytKhcgyiZ2L0DTyCQ/nvX+LoCljoRE=
 modernc.org/gc/v3 v3.1.1/go.mod h1:HFK/6AGESC7Ex+EZJhJ2Gni6cTaYpSMmU/cT9RmlfYY=
 modernc.org/goabi0 v0.2.0 h1:HvEowk7LxcPd0eq6mVOAEMai46V+i7Jrj13t4AzuNks=
 modernc.org/goabi0 v0.2.0/go.mod h1:CEFRnnJhKvWT1c1JTI3Avm+tgOWbkOu5oPA8eH8LnMI=
-modernc.org/libc v1.66.10 h1:yZkb3YeLx4oynyR+iUsXsybsX4Ubx7MQlSYEw4yj59A=
-modernc.org/libc v1.66.10/go.mod h1:8vGSEwvoUoltr4dlywvHqjtAqHBaw0j1jI7iFBTAr2I=
-modernc.org/libc v1.67.0 h1:QzL4IrKab2OFmxA3/vRYl0tLXrIamwrhD6CKD4WBVjQ=
-modernc.org/libc v1.67.0/go.mod h1:QvvnnJ5P7aitu0ReNpVIEyesuhmDLQ8kaEoyMjIFZJA=
+modernc.org/libc v1.67.6 h1:eVOQvpModVLKOdT+LvBPjdQqfrZq+pC39BygcT+E7OI=
+modernc.org/libc v1.67.6/go.mod h1:JAhxUVlolfYDErnwiqaLvUqc8nfb2r6S6slAgZOnaiE=
 modernc.org/mathutil v1.7.1 h1:GCZVGXdaN8gTqB1Mf/usp1Y/hSqgI2vAGGP4jZMCxOU=
 modernc.org/mathutil v1.7.1/go.mod h1:4p5IwJITfppl0G4sUEDtCr4DthTaT47/N3aT6MhfgJg=
 modernc.org/memory v1.11.0 h1:o4QC8aMQzmcwCK3t3Ux/ZHmwFPzE6hf2Y5LbkRs+hbI=
@@ -197,8 +195,8 @@ modernc.org/opt v0.1.4 h1:2kNGMRiUjrp4LcaPuLY2PzUfqM/w9N23quVwhKt5Qm8=
 modernc.org/opt v0.1.4/go.mod h1:03fq9lsNfvkYSfxrfUhZCWPk1lm4cq4N+Bh//bEtgns=
 modernc.org/sortutil v1.2.1 h1:+xyoGf15mM3NMlPDnFqrteY07klSFxLElE2PVuWIJ7w=
 modernc.org/sortutil v1.2.1/go.mod h1:7ZI3a3REbai7gzCLcotuw9AC4VZVpYMjDzETGsSMqJE=
-modernc.org/sqlite v1.40.0 h1:bNWEDlYhNPAUdUdBzjAvn8icAs/2gaKlj4vM+tQ6KdQ=
-modernc.org/sqlite v1.40.0/go.mod h1:9fjQZ0mB1LLP0GYrp39oOJXx/I2sxEnZtzCmEQIKvGE=
+modernc.org/sqlite v1.45.0 h1:r51cSGzKpbptxnby+EIIz5fop4VuE4qFoVEjNvWoObs=
+modernc.org/sqlite v1.45.0/go.mod h1:CzbrU2lSB1DKUusvwGz7rqEKIq+NUd8GWuBBZDs9/nA=
 modernc.org/strutil v1.2.1 h1:UneZBkQA+DX2Rp35KcM69cSsNES9ly8mQWD71HKlOA0=
 modernc.org/strutil v1.2.1/go.mod h1:EHkiggD70koQxjVdSBM3JKM7k6L0FbGE5eymy9i3B9A=
 modernc.org/token v1.1.0 h1:Xl7Ap9dKaEs5kLoOQeQmPWevfnk/DM5qcLcYlA8ys6Y=
--- a/internal/alerts/alerts.go
+++ b/internal/alerts/alerts.go
@@ -21,13 +21,14 @@ type hubLike interface {

 type AlertManager struct {
 	hub           hubLike
-	alertQueue    chan alertTask
-	stopChan      chan struct{}
+	stopOnce      sync.Once
 	pendingAlerts sync.Map
+	alertsCache   *AlertsCache
 }

 type AlertMessageData struct {
 	UserID   string
+	SystemID string
 	Title    string
 	Message  string
 	Link     string
@@ -39,15 +40,22 @@ type UserNotificationSettings struct {
 	Webhooks []string `json:"webhooks"`
 }

+type SystemAlertFsStats struct {
+	DiskTotal float64 `json:"d"`
+	DiskUsed  float64 `json:"du"`
+}
+
+// Values pulled from system_stats.stats that are relevant to alerts.
 type SystemAlertStats struct {
 	Cpu          float64                       `json:"cpu"`
 	Mem          float64                       `json:"mp"`
 	Disk         float64                       `json:"dp"`
-	NetSent      float64                       `json:"ns"`
-	NetRecv      float64                       `json:"nr"`
+	Bandwidth    [2]uint64                     `json:"b"`
 	GPU          map[string]SystemAlertGPUData `json:"g"`
 	Temperatures map[string]float32            `json:"t"`
 	LoadAvg      [3]float64                    `json:"la"`
+	Battery      [2]uint8                      `json:"bat"`
+	ExtraFs      map[string]SystemAlertFsStats `json:"efs"`
 }

 type SystemAlertGPUData struct {
@@ -56,7 +64,7 @@ type SystemAlertGPUData struct {

 type SystemAlertData struct {
 	systemRecord *core.Record
-	alertRecord  *core.Record
+	alertData    CachedAlertData
 	name         string
 	unit         string
 	val          float64
@@ -90,12 +98,10 @@ var supportsTitle = map[string]struct{}{
 // NewAlertManager creates a new AlertManager instance.
 func NewAlertManager(app hubLike) *AlertManager {
 	am := &AlertManager{
-		hub:        app,
-		alertQueue: make(chan alertTask, 5),
-		stopChan:   make(chan struct{}),
+		hub:         app,
+		alertsCache: NewAlertsCache(app),
 	}
 	am.bindEvents()
-	go am.startWorker()
 	return am
 }

@@ -103,10 +109,97 @@ func NewAlertManager(app hubLike) *AlertManager {
 func (am *AlertManager) bindEvents() {
 	am.hub.OnRecordAfterUpdateSuccess("alerts").BindFunc(updateHistoryOnAlertUpdate)
 	am.hub.OnRecordAfterDeleteSuccess("alerts").BindFunc(resolveHistoryOnAlertDelete)
+	am.hub.OnRecordAfterUpdateSuccess("smart_devices").BindFunc(am.handleSmartDeviceAlert)
+
+	am.hub.OnServe().BindFunc(func(e *core.ServeEvent) error {
+		// Populate all alerts into cache on startup
+		_ = am.alertsCache.PopulateFromDB(true)
+
+		if err := resolveStatusAlerts(e.App); err != nil {
+			e.App.Logger().Error("Failed to resolve stale status alerts", "err", err)
+		}
+		if err := am.restorePendingStatusAlerts(); err != nil {
+			e.App.Logger().Error("Failed to restore pending status alerts", "err", err)
+		}
+		return e.Next()
+	})
+}
+
+// IsNotificationSilenced checks if a notification should be silenced based on configured quiet hours
+func (am *AlertManager) IsNotificationSilenced(userID, systemID string) bool {
+	// Query for quiet hours windows that match this user and system
+	// Include both global windows (system is null/empty) and system-specific windows
+	var filter string
+	var params dbx.Params
+
+	if systemID == "" {
+		// If no systemID provided, only check global windows
+		filter = "user={:user} AND system=''"
+		params = dbx.Params{"user": userID}
+	} else {
+		// Check both global and system-specific windows
+		filter = "user={:user} AND (system='' OR system={:system})"
+		params = dbx.Params{
+			"user":   userID,
+			"system": systemID,
+		}
+	}
+
+	quietHourWindows, err := am.hub.FindAllRecords("quiet_hours", dbx.NewExp(filter, params))
+	if err != nil || len(quietHourWindows) == 0 {
+		return false
+	}
+
+	now := time.Now().UTC()
+
+	for _, window := range quietHourWindows {
+		windowType := window.GetString("type")
+		start := window.GetDateTime("start").Time()
+		end := window.GetDateTime("end").Time()
+
+		if windowType == "daily" {
+			// For daily recurring windows, extract just the time portion and compare
+			// The start/end are stored as full datetime but we only care about HH:MM
+			startHour, startMin, _ := start.Clock()
+			endHour, endMin, _ := end.Clock()
+			nowHour, nowMin, _ := now.Clock()
+
+			// Convert to minutes since midnight for easier comparison
+			startMinutes := startHour*60 + startMin
+			endMinutes := endHour*60 + endMin
+			nowMinutes := nowHour*60 + nowMin
+
+			// Handle case where window crosses midnight
+			if endMinutes < startMinutes {
+				// Window crosses midnight (e.g., 23:00 - 01:00)
+				if nowMinutes >= startMinutes || nowMinutes < endMinutes {
+					return true
+				}
+			} else {
+				// Normal case (e.g., 09:00 - 17:00)
+				if nowMinutes >= startMinutes && nowMinutes < endMinutes {
+					return true
+				}
+			}
+		} else {
+			// One-time window: check if current time is within the date range
+			if (now.After(start) || now.Equal(start)) && now.Before(end) {
+				return true
+			}
+		}
+	}
+
+	return false
 }

 // SendAlert sends an alert to the user
 func (am *AlertManager) SendAlert(data AlertMessageData) error {
+	// Check if alert is silenced
+	if am.IsNotificationSilenced(data.UserID, data.SystemID) {
+		am.hub.Logger().Info("Notification silenced", "user", data.UserID, "system", data.SystemID, "title", data.Title)
+		return nil
+	}
+
 	// get user settings
 	record, err := am.hub.FindFirstRecordByFilter(
 		"user_settings", "user={:user}",
@@ -183,13 +276,14 @@ func (am *AlertManager) SendShoutrrrAlert(notificationUrl, title, message, link,
 	}

 	// Add link
-	if scheme == "ntfy" {
+	switch scheme {
+	case "ntfy":
 		queryParams.Add("Actions", fmt.Sprintf("view, %s, %s", linkText, link))
-	} else if scheme == "lark" {
+	case "lark":
 		queryParams.Add("link", link)
-	} else if scheme == "bark" {
+	case "bark":
 		queryParams.Add("url", link)
-	} else {
+	default:
 		message += "\n\n" + link
 	}

@@ -222,3 +316,13 @@ func (am *AlertManager) SendTestNotification(e *core.RequestEvent) error {
 	}
 	return e.JSON(200, map[string]bool{"err": false})
 }
+
+// setAlertTriggered updates the "triggered" status of an alert record in the database
+func (am *AlertManager) setAlertTriggered(alert CachedAlertData, triggered bool) error {
+	alertRecord, err := am.hub.FindRecordById("alerts", alert.Id)
+	if err != nil {
+		return err
+	}
+	alertRecord.Set("triggered", triggered)
+	return am.hub.Save(alertRecord)
+}
--- a/internal/alerts/alerts_battery_test.go
+++ b/internal/alerts/alerts_battery_test.go
@@ -0,0 +1,386 @@
+//go:build testing
+
+package alerts_test
+
+import (
+	"encoding/json"
+	"testing"
+	"time"
+
+	"github.com/henrygd/beszel/internal/entities/system"
+	beszelTests "github.com/henrygd/beszel/internal/tests"
+
+	"github.com/pocketbase/dbx"
+	"github.com/pocketbase/pocketbase/tools/types"
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+)
+
+// TestBatteryAlertLogic tests that battery alerts trigger when value drops BELOW threshold
+// (opposite of other alerts like CPU, Memory, etc. which trigger when exceeding threshold)
+func TestBatteryAlertLogic(t *testing.T) {
+	hub, user := beszelTests.GetHubWithUser(t)
+	defer hub.Cleanup()
+
+	// Create a system
+	systems, err := beszelTests.CreateSystems(hub, 1, user.Id, "up")
+	require.NoError(t, err)
+	systemRecord := systems[0]
+
+	// Create a battery alert with threshold of 20% and min of 1 minute (immediate trigger)
+	batteryAlert, err := beszelTests.CreateRecord(hub, "alerts", map[string]any{
+		"name":   "Battery",
+		"system": systemRecord.Id,
+		"user":   user.Id,
+		"value":  20, // threshold: 20%
+		"min":    1,  // 1 minute (immediate trigger for testing)
+	})
+	require.NoError(t, err)
+
+	// Verify alert is not triggered initially
+	assert.False(t, batteryAlert.GetBool("triggered"), "Alert should not be triggered initially")
+
+	// Create system stats with battery at 50% (above threshold - should NOT trigger)
+	statsHigh := system.Stats{
+		Cpu:     10,
+		MemPct:  30,
+		DiskPct: 40,
+		Battery: [2]uint8{50, 1}, // 50% battery, discharging
+	}
+	statsHighJSON, _ := json.Marshal(statsHigh)
+	_, err = beszelTests.CreateRecord(hub, "system_stats", map[string]any{
+		"system": systemRecord.Id,
+		"type":   "1m",
+		"stats":  string(statsHighJSON),
+	})
+	require.NoError(t, err)
+
+	// Create CombinedData for the alert handler
+	combinedDataHigh := &system.CombinedData{
+		Stats: statsHigh,
+		Info: system.Info{
+			AgentVersion: "0.12.0",
+			Cpu:          10,
+			MemPct:       30,
+			DiskPct:      40,
+		},
+	}
+
+	// Simulate system update time
+	systemRecord.Set("updated", time.Now().UTC())
+	err = hub.SaveNoValidate(systemRecord)
+	require.NoError(t, err)
+
+	// Handle system alerts with high battery
+	am := hub.GetAlertManager()
+	err = am.HandleSystemAlerts(systemRecord, combinedDataHigh)
+	require.NoError(t, err)
+
+	// Verify alert is still NOT triggered (battery 50% is above threshold 20%)
+	batteryAlert, err = hub.FindFirstRecordByFilter("alerts", "id={:id}", dbx.Params{"id": batteryAlert.Id})
+	require.NoError(t, err)
+	assert.False(t, batteryAlert.GetBool("triggered"), "Alert should NOT be triggered when battery (50%%) is above threshold (20%%)")
+
+	// Now create stats with battery at 15% (below threshold - should trigger)
+	statsLow := system.Stats{
+		Cpu:     10,
+		MemPct:  30,
+		DiskPct: 40,
+		Battery: [2]uint8{15, 1}, // 15% battery, discharging
+	}
+	statsLowJSON, _ := json.Marshal(statsLow)
+	_, err = beszelTests.CreateRecord(hub, "system_stats", map[string]any{
+		"system": systemRecord.Id,
+		"type":   "1m",
+		"stats":  string(statsLowJSON),
+	})
+	require.NoError(t, err)
+
+	combinedDataLow := &system.CombinedData{
+		Stats: statsLow,
+		Info: system.Info{
+			AgentVersion: "0.12.0",
+			Cpu:          10,
+			MemPct:       30,
+			DiskPct:      40,
+		},
+	}
+
+	// Update system timestamp
+	systemRecord.Set("updated", time.Now().UTC())
+	err = hub.SaveNoValidate(systemRecord)
+	require.NoError(t, err)
+
+	// Handle system alerts with low battery
+	err = am.HandleSystemAlerts(systemRecord, combinedDataLow)
+	require.NoError(t, err)
+
+	// Wait for the alert to be processed
+	time.Sleep(20 * time.Millisecond)
+
+	// Verify alert IS triggered (battery 15% is below threshold 20%)
+	batteryAlert, err = hub.FindFirstRecordByFilter("alerts", "id={:id}", dbx.Params{"id": batteryAlert.Id})
+	require.NoError(t, err)
+	assert.True(t, batteryAlert.GetBool("triggered"), "Alert SHOULD be triggered when battery (15%%) drops below threshold (20%%)")
+
+	// Now test resolution: battery goes back above threshold
+	statsRecovered := system.Stats{
+		Cpu:     10,
+		MemPct:  30,
+		DiskPct: 40,
+		Battery: [2]uint8{25, 1}, // 25% battery, discharging
+	}
+	statsRecoveredJSON, _ := json.Marshal(statsRecovered)
+	_, err = beszelTests.CreateRecord(hub, "system_stats", map[string]any{
+		"system": systemRecord.Id,
+		"type":   "1m",
+		"stats":  string(statsRecoveredJSON),
+	})
+	require.NoError(t, err)
+
+	combinedDataRecovered := &system.CombinedData{
+		Stats: statsRecovered,
+		Info: system.Info{
+			AgentVersion: "0.12.0",
+			Cpu:          10,
+			MemPct:       30,
+			DiskPct:      40,
+		},
+	}
+
+	// Update system timestamp
+	systemRecord.Set("updated", time.Now().UTC())
+	err = hub.SaveNoValidate(systemRecord)
+	require.NoError(t, err)
+
+	// Handle system alerts with recovered battery
+	err = am.HandleSystemAlerts(systemRecord, combinedDataRecovered)
+	require.NoError(t, err)
+
+	// Wait for the alert to be processed
+	time.Sleep(20 * time.Millisecond)
+
+	// Verify alert is now resolved (battery 25% is above threshold 20%)
+	batteryAlert, err = hub.FindFirstRecordByFilter("alerts", "id={:id}", dbx.Params{"id": batteryAlert.Id})
+	require.NoError(t, err)
+	assert.False(t, batteryAlert.GetBool("triggered"), "Alert should be resolved when battery (25%%) goes above threshold (20%%)")
+}
+
+// TestBatteryAlertNoBattery verifies that systems without battery data don't trigger alerts
+func TestBatteryAlertNoBattery(t *testing.T) {
+	hub, user := beszelTests.GetHubWithUser(t)
+	defer hub.Cleanup()
+
+	// Create a system
+	systems, err := beszelTests.CreateSystems(hub, 1, user.Id, "up")
+	require.NoError(t, err)
+	systemRecord := systems[0]
+
+	// Create a battery alert
+	batteryAlert, err := beszelTests.CreateRecord(hub, "alerts", map[string]any{
+		"name":   "Battery",
+		"system": systemRecord.Id,
+		"user":   user.Id,
+		"value":  20,
+		"min":    1,
+	})
+	require.NoError(t, err)
+
+	// Create stats with NO battery data (Battery[0] = 0)
+	statsNoBattery := system.Stats{
+		Cpu:     10,
+		MemPct:  30,
+		DiskPct: 40,
+		Battery: [2]uint8{0, 0}, // No battery
+	}
+
+	combinedData := &system.CombinedData{
+		Stats: statsNoBattery,
+		Info: system.Info{
+			AgentVersion: "0.12.0",
+			Cpu:          10,
+			MemPct:       30,
+			DiskPct:      40,
+		},
+	}
+
+	// Simulate system update time
+	systemRecord.Set("updated", time.Now().UTC())
+	err = hub.SaveNoValidate(systemRecord)
+	require.NoError(t, err)
+
+	// Handle system alerts
+	am := hub.GetAlertManager()
+	err = am.HandleSystemAlerts(systemRecord, combinedData)
+	require.NoError(t, err)
+
+	// Wait a moment for processing
+	time.Sleep(20 * time.Millisecond)
+
+	// Verify alert is NOT triggered (no battery data should skip the alert)
+	batteryAlert, err = hub.FindFirstRecordByFilter("alerts", "id={:id}", dbx.Params{"id": batteryAlert.Id})
+	require.NoError(t, err)
+	assert.False(t, batteryAlert.GetBool("triggered"), "Alert should NOT be triggered when system has no battery")
+}
+
+// TestBatteryAlertAveragedSamples tests battery alerts with min > 1 (averaging multiple samples)
+// This ensures the inverted threshold logic works correctly across averaged time windows
+func TestBatteryAlertAveragedSamples(t *testing.T) {
+	hub, user := beszelTests.GetHubWithUser(t)
+	defer hub.Cleanup()
+
+	// Create a system
+	systems, err := beszelTests.CreateSystems(hub, 1, user.Id, "up")
+	require.NoError(t, err)
+	systemRecord := systems[0]
+
+	// Create a battery alert with threshold of 25% and min of 2 minutes (requires averaging)
+	batteryAlert, err := beszelTests.CreateRecord(hub, "alerts", map[string]any{
+		"name":   "Battery",
+		"system": systemRecord.Id,
+		"user":   user.Id,
+		"value":  25, // threshold: 25%
+		"min":    2,  // 2 minutes - requires averaging
+	})
+	require.NoError(t, err)
+
+	// Verify alert is not triggered initially
+	assert.False(t, batteryAlert.GetBool("triggered"), "Alert should not be triggered initially")
+
+	am := hub.GetAlertManager()
+	now := time.Now().UTC()
+
+	// Create system_stats records with low battery (below threshold)
+	// The alert has min=2 minutes, so alert.time = now - 2 minutes
+	// For the alert to be valid, alert.time must be AFTER the oldest record's created time
+	// So we need records older than (now - 2 min), plus records within the window
+	// Records at: now-3min (oldest, before window), now-90s, now-60s, now-30s
+	recordTimes := []time.Duration{
+		-180 * time.Second, // 3 min ago - this makes the oldest record before alert.time
+		-90 * time.Second,
+		-60 * time.Second,
+		-30 * time.Second,
+	}
+
+	for _, offset := range recordTimes {
+		statsLow := system.Stats{
+			Cpu:     10,
+			MemPct:  30,
+			DiskPct: 40,
+			Battery: [2]uint8{15, 1}, // 15% battery (below 25% threshold)
+		}
+		statsLowJSON, _ := json.Marshal(statsLow)
+
+		recordTime := now.Add(offset)
+		record, err := beszelTests.CreateRecord(hub, "system_stats", map[string]any{
+			"system": systemRecord.Id,
+			"type":   "1m",
+			"stats":  string(statsLowJSON),
+		})
+		require.NoError(t, err)
+		// Update created time to simulate historical records - use SetRaw with formatted string
+		record.SetRaw("created", recordTime.Format(types.DefaultDateLayout))
+		err = hub.SaveNoValidate(record)
+		require.NoError(t, err)
+	}
+
+	// Create combined data with low battery
+	combinedDataLow := &system.CombinedData{
+		Stats: system.Stats{
+			Cpu:     10,
+			MemPct:  30,
+			DiskPct: 40,
+			Battery: [2]uint8{15, 1},
+		},
+		Info: system.Info{
+			AgentVersion: "0.12.0",
+			Cpu:          10,
+			MemPct:       30,
+			DiskPct:      40,
+		},
+	}
+
+	// Update system timestamp
+	systemRecord.Set("updated", now)
+	err = hub.SaveNoValidate(systemRecord)
+	require.NoError(t, err)
+
+	// Handle system alerts - should trigger because average battery is below threshold
+	err = am.HandleSystemAlerts(systemRecord, combinedDataLow)
+	require.NoError(t, err)
+
+	// Wait for alert processing
+	time.Sleep(20 * time.Millisecond)
+
+	// Verify alert IS triggered (average battery 15% is below threshold 25%)
+	batteryAlert, err = hub.FindFirstRecordByFilter("alerts", "id={:id}", dbx.Params{"id": batteryAlert.Id})
+	require.NoError(t, err)
+	assert.True(t, batteryAlert.GetBool("triggered"),
+		"Alert SHOULD be triggered when average battery (15%%) is below threshold (25%%) over min period")
+
+	// Now add records with high battery to test resolution
+	// Use a new time window 2 minutes later
+	newNow := now.Add(2 * time.Minute)
+	// Records need to span before the alert time window (newNow - 2 min)
+	recordTimesHigh := []time.Duration{
+		-180 * time.Second, // 3 min before newNow - makes oldest record before alert.time
+		-90 * time.Second,
+		-60 * time.Second,
+		-30 * time.Second,
+	}
+
+	for _, offset := range recordTimesHigh {
+		statsHigh := system.Stats{
+			Cpu:     10,
+			MemPct:  30,
+			DiskPct: 40,
+			Battery: [2]uint8{50, 1}, // 50% battery (above 25% threshold)
+		}
+		statsHighJSON, _ := json.Marshal(statsHigh)
+
+		recordTime := newNow.Add(offset)
+		record, err := beszelTests.CreateRecord(hub, "system_stats", map[string]any{
+			"system": systemRecord.Id,
+			"type":   "1m",
+			"stats":  string(statsHighJSON),
+		})
+		require.NoError(t, err)
+		record.SetRaw("created", recordTime.Format(types.DefaultDateLayout))
+		err = hub.SaveNoValidate(record)
+		require.NoError(t, err)
+	}
+
+	// Create combined data with high battery
+	combinedDataHigh := &system.CombinedData{
+		Stats: system.Stats{
+			Cpu:     10,
+			MemPct:  30,
+			DiskPct: 40,
+			Battery: [2]uint8{50, 1},
+		},
+		Info: system.Info{
+			AgentVersion: "0.12.0",
+			Cpu:          10,
+			MemPct:       30,
+			DiskPct:      40,
+		},
+	}
+
+	// Update system timestamp to the new time window
+	systemRecord.Set("updated", newNow)
+	err = hub.SaveNoValidate(systemRecord)
+	require.NoError(t, err)
+
+	// Handle system alerts - should resolve because average battery is now above threshold
+	err = am.HandleSystemAlerts(systemRecord, combinedDataHigh)
+	require.NoError(t, err)
+
+	// Wait for alert processing
+	time.Sleep(20 * time.Millisecond)
+
+	// Verify alert is resolved (average battery 50% is above threshold 25%)
+	batteryAlert, err = hub.FindFirstRecordByFilter("alerts", "id={:id}", dbx.Params{"id": batteryAlert.Id})
+	require.NoError(t, err)
+	assert.False(t, batteryAlert.GetBool("triggered"),
+		"Alert should be resolved when average battery (50%%) is above threshold (25%%) over min period")
+}
--- a/internal/alerts/alerts_cache.go
+++ b/internal/alerts/alerts_cache.go
@@ -0,0 +1,177 @@
+package alerts
+
+import (
+	"github.com/pocketbase/dbx"
+	"github.com/pocketbase/pocketbase/core"
+	"github.com/pocketbase/pocketbase/tools/store"
+)
+
+// CachedAlertData represents the relevant fields of an alert record for status checking and updates.
+type CachedAlertData struct {
+	Id        string
+	SystemID  string
+	UserID    string
+	Name      string
+	Value     float64
+	Triggered bool
+	Min       uint8
+	// Created   types.DateTime
+}
+
+func (a *CachedAlertData) PopulateFromRecord(record *core.Record) {
+	a.Id = record.Id
+	a.SystemID = record.GetString("system")
+	a.UserID = record.GetString("user")
+	a.Name = record.GetString("name")
+	a.Value = record.GetFloat("value")
+	a.Triggered = record.GetBool("triggered")
+	a.Min = uint8(record.GetInt("min"))
+	// a.Created = record.GetDateTime("created")
+}
+
+// AlertsCache provides an in-memory cache for system alerts.
+type AlertsCache struct {
+	app       core.App
+	store     *store.Store[string, *store.Store[string, CachedAlertData]]
+	populated bool
+}
+
+// NewAlertsCache creates a new instance of SystemAlertsCache.
+func NewAlertsCache(app core.App) *AlertsCache {
+	c := AlertsCache{
+		app:   app,
+		store: store.New(map[string]*store.Store[string, CachedAlertData]{}),
+	}
+	return c.bindEvents()
+}
+
+// bindEvents sets up event listeners to keep the cache in sync with database changes.
+func (c *AlertsCache) bindEvents() *AlertsCache {
+	c.app.OnRecordAfterUpdateSuccess("alerts").BindFunc(func(e *core.RecordEvent) error {
+		// c.Delete(e.Record.Original()) // this would be needed if the system field on an existing alert was changed, however we don't currently allow that in the UI so we'll leave it commented out
+		c.Update(e.Record)
+		return e.Next()
+	})
+	c.app.OnRecordAfterDeleteSuccess("alerts").BindFunc(func(e *core.RecordEvent) error {
+		c.Delete(e.Record)
+		return e.Next()
+	})
+	c.app.OnRecordAfterCreateSuccess("alerts").BindFunc(func(e *core.RecordEvent) error {
+		c.Update(e.Record)
+		return e.Next()
+	})
+	return c
+}
+
+// PopulateFromDB clears current entries and loads all alerts from the database into the cache.
+func (c *AlertsCache) PopulateFromDB(force bool) error {
+	if !force && c.populated {
+		return nil
+	}
+	records, err := c.app.FindAllRecords("alerts")
+	if err != nil {
+		return err
+	}
+	c.store.RemoveAll()
+	for _, record := range records {
+		c.Update(record)
+	}
+	c.populated = true
+	return nil
+}
+
+// Update adds or updates an alert record in the cache.
+func (c *AlertsCache) Update(record *core.Record) {
+	systemID := record.GetString("system")
+	if systemID == "" {
+		return
+	}
+	systemStore, ok := c.store.GetOk(systemID)
+	if !ok {
+		systemStore = store.New(map[string]CachedAlertData{})
+		c.store.Set(systemID, systemStore)
+	}
+	var ca CachedAlertData
+	ca.PopulateFromRecord(record)
+	systemStore.Set(record.Id, ca)
+}
+
+// Delete removes an alert record from the cache.
+func (c *AlertsCache) Delete(record *core.Record) {
+	systemID := record.GetString("system")
+	if systemID == "" {
+		return
+	}
+	if systemStore, ok := c.store.GetOk(systemID); ok {
+		systemStore.Remove(record.Id)
+	}
+}
+
+// GetSystemAlerts returns all alerts for the specified system, lazy-loading if necessary.
+func (c *AlertsCache) GetSystemAlerts(systemID string) []CachedAlertData {
+	systemStore, ok := c.store.GetOk(systemID)
+	if !ok {
+		// Populate cache for this system
+		records, err := c.app.FindAllRecords("alerts", dbx.NewExp("system={:system}", dbx.Params{"system": systemID}))
+		if err != nil {
+			return nil
+		}
+		systemStore = store.New(map[string]CachedAlertData{})
+		for _, record := range records {
+			var ca CachedAlertData
+			ca.PopulateFromRecord(record)
+			systemStore.Set(record.Id, ca)
+		}
+		c.store.Set(systemID, systemStore)
+	}
+	all := systemStore.GetAll()
+	alerts := make([]CachedAlertData, 0, len(all))
+	for _, alert := range all {
+		alerts = append(alerts, alert)
+	}
+	return alerts
+}
+
+// GetAlert returns a specific alert by its ID from the cache.
+func (c *AlertsCache) GetAlert(systemID, alertID string) (CachedAlertData, bool) {
+	if systemStore, ok := c.store.GetOk(systemID); ok {
+		return systemStore.GetOk(alertID)
+	}
+	return CachedAlertData{}, false
+}
+
+// GetAlertsByName returns all alerts of a specific type for the specified system.
+func (c *AlertsCache) GetAlertsByName(systemID, alertName string) []CachedAlertData {
+	allAlerts := c.GetSystemAlerts(systemID)
+	var alerts []CachedAlertData
+	for _, record := range allAlerts {
+		if record.Name == alertName {
+			alerts = append(alerts, record)
+		}
+	}
+	return alerts
+}
+
+// GetAlertsExcludingNames returns all alerts for the specified system excluding the given types.
+func (c *AlertsCache) GetAlertsExcludingNames(systemID string, excludedNames ...string) []CachedAlertData {
+	excludeMap := make(map[string]struct{})
+	for _, name := range excludedNames {
+		excludeMap[name] = struct{}{}
+	}
+	allAlerts := c.GetSystemAlerts(systemID)
+	var alerts []CachedAlertData
+	for _, record := range allAlerts {
+		if _, excluded := excludeMap[record.Name]; !excluded {
+			alerts = append(alerts, record)
+		}
+	}
+	return alerts
+}
+
+// Refresh returns the latest cached copy for an alert snapshot if it still exists.
+func (c *AlertsCache) Refresh(alert CachedAlertData) (CachedAlertData, bool) {
+	if alert.Id == "" {
+		return CachedAlertData{}, false
+	}
+	return c.GetAlert(alert.SystemID, alert.Id)
+}
--- a/internal/alerts/alerts_cache_test.go
+++ b/internal/alerts/alerts_cache_test.go
@@ -0,0 +1,215 @@
+//go:build testing
+
+package alerts_test
+
+import (
+	"testing"
+
+	"github.com/henrygd/beszel/internal/alerts"
+	beszelTests "github.com/henrygd/beszel/internal/tests"
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+)
+
+func TestSystemAlertsCachePopulateAndFilter(t *testing.T) {
+	hub, user := beszelTests.GetHubWithUser(t)
+	defer hub.Cleanup()
+
+	systems, err := beszelTests.CreateSystems(hub, 2, user.Id, "up")
+	require.NoError(t, err)
+	system1 := systems[0]
+	system2 := systems[1]
+
+	statusAlert, err := beszelTests.CreateRecord(hub, "alerts", map[string]any{
+		"name":   "Status",
+		"system": system1.Id,
+		"user":   user.Id,
+		"min":    1,
+	})
+	require.NoError(t, err)
+
+	cpuAlert, err := beszelTests.CreateRecord(hub, "alerts", map[string]any{
+		"name":   "CPU",
+		"system": system1.Id,
+		"user":   user.Id,
+		"value":  80,
+		"min":    1,
+	})
+	require.NoError(t, err)
+
+	memoryAlert, err := beszelTests.CreateRecord(hub, "alerts", map[string]any{
+		"name":   "Memory",
+		"system": system2.Id,
+		"user":   user.Id,
+		"value":  90,
+		"min":    1,
+	})
+	require.NoError(t, err)
+
+	cache := alerts.NewAlertsCache(hub)
+	cache.PopulateFromDB(false)
+
+	statusAlerts := cache.GetAlertsByName(system1.Id, "Status")
+	require.Len(t, statusAlerts, 1)
+	assert.Equal(t, statusAlert.Id, statusAlerts[0].Id)
+
+	nonStatusAlerts := cache.GetAlertsExcludingNames(system1.Id, "Status")
+	require.Len(t, nonStatusAlerts, 1)
+	assert.Equal(t, cpuAlert.Id, nonStatusAlerts[0].Id)
+
+	system2Alerts := cache.GetSystemAlerts(system2.Id)
+	require.Len(t, system2Alerts, 1)
+	assert.Equal(t, memoryAlert.Id, system2Alerts[0].Id)
+}
+
+func TestSystemAlertsCacheLazyLoadUpdateAndDelete(t *testing.T) {
+	hub, user := beszelTests.GetHubWithUser(t)
+	defer hub.Cleanup()
+
+	systems, err := beszelTests.CreateSystems(hub, 1, user.Id, "up")
+	require.NoError(t, err)
+	systemRecord := systems[0]
+
+	statusAlert, err := beszelTests.CreateRecord(hub, "alerts", map[string]any{
+		"name":   "Status",
+		"system": systemRecord.Id,
+		"user":   user.Id,
+		"min":    1,
+	})
+	require.NoError(t, err)
+
+	cache := alerts.NewAlertsCache(hub)
+	require.Len(t, cache.GetSystemAlerts(systemRecord.Id), 1, "first lookup should lazy-load alerts for the system")
+
+	cpuAlert, err := beszelTests.CreateRecord(hub, "alerts", map[string]any{
+		"name":   "CPU",
+		"system": systemRecord.Id,
+		"user":   user.Id,
+		"value":  80,
+		"min":    1,
+	})
+	require.NoError(t, err)
+
+	cache.Update(cpuAlert)
+
+	nonStatusAlerts := cache.GetAlertsExcludingNames(systemRecord.Id, "Status")
+	require.Len(t, nonStatusAlerts, 1)
+	assert.Equal(t, cpuAlert.Id, nonStatusAlerts[0].Id)
+
+	cache.Delete(statusAlert)
+	assert.Empty(t, cache.GetAlertsByName(systemRecord.Id, "Status"), "deleted alerts should be removed from the in-memory cache")
+}
+
+func TestSystemAlertsCacheRefreshReturnsLatestCopy(t *testing.T) {
+	hub, user := beszelTests.GetHubWithUser(t)
+	defer hub.Cleanup()
+
+	systems, err := beszelTests.CreateSystems(hub, 1, user.Id, "up")
+	require.NoError(t, err)
+	system := systems[0]
+
+	alert, err := beszelTests.CreateRecord(hub, "alerts", map[string]any{
+		"name":      "Status",
+		"system":    system.Id,
+		"user":      user.Id,
+		"min":       1,
+		"triggered": false,
+	})
+	require.NoError(t, err)
+
+	cache := alerts.NewAlertsCache(hub)
+	snapshot := cache.GetSystemAlerts(system.Id)[0]
+	assert.False(t, snapshot.Triggered)
+
+	alert.Set("triggered", true)
+	require.NoError(t, hub.Save(alert))
+
+	refreshed, ok := cache.Refresh(snapshot)
+	require.True(t, ok)
+	assert.Equal(t, snapshot.Id, refreshed.Id)
+	assert.True(t, refreshed.Triggered, "refresh should return the updated cached value rather than the stale snapshot")
+
+	require.NoError(t, hub.Delete(alert))
+	_, ok = cache.Refresh(snapshot)
+	assert.False(t, ok, "refresh should report false when the cached alert no longer exists")
+}
+
+func TestAlertManagerCacheLifecycle(t *testing.T) {
+	hub, user := beszelTests.GetHubWithUser(t)
+	defer hub.Cleanup()
+
+	systems, err := beszelTests.CreateSystems(hub, 1, user.Id, "up")
+	require.NoError(t, err)
+	system := systems[0]
+
+	// Create an alert
+	alert, err := beszelTests.CreateRecord(hub, "alerts", map[string]any{
+		"name":   "CPU",
+		"system": system.Id,
+		"user":   user.Id,
+		"value":  80,
+		"min":    1,
+	})
+	require.NoError(t, err)
+
+	am := hub.AlertManager
+	cache := am.GetSystemAlertsCache()
+
+	// Verify it's in cache (it should be since CreateRecord triggers the event)
+	assert.Len(t, cache.GetSystemAlerts(system.Id), 1)
+	assert.Equal(t, alert.Id, cache.GetSystemAlerts(system.Id)[0].Id)
+	assert.EqualValues(t, 80, cache.GetSystemAlerts(system.Id)[0].Value)
+
+	// Update the alert through PocketBase to trigger events
+	alert.Set("value", 85)
+	require.NoError(t, hub.Save(alert))
+
+	// Check if updated value is reflected (or at least that it's still there)
+	cachedAlerts := cache.GetSystemAlerts(system.Id)
+	assert.Len(t, cachedAlerts, 1)
+	assert.EqualValues(t, 85, cachedAlerts[0].Value)
+
+	// Delete the alert through PocketBase to trigger events
+	require.NoError(t, hub.Delete(alert))
+
+	// Verify it's removed from cache
+	assert.Empty(t, cache.GetSystemAlerts(system.Id), "alert should be removed from cache after PocketBase delete")
+}
+
+// func TestAlertManagerCacheMovesAlertToNewSystemOnUpdate(t *testing.T) {
+// 	hub, user := beszelTests.GetHubWithUser(t)
+// 	defer hub.Cleanup()
+
+// 	systems, err := beszelTests.CreateSystems(hub, 2, user.Id, "up")
+// 	require.NoError(t, err)
+// 	system1 := systems[0]
+// 	system2 := systems[1]
+
+// 	alert, err := beszelTests.CreateRecord(hub, "alerts", map[string]any{
+// 		"name":   "CPU",
+// 		"system": system1.Id,
+// 		"user":   user.Id,
+// 		"value":  80,
+// 		"min":    1,
+// 	})
+// 	require.NoError(t, err)
+
+// 	am := hub.AlertManager
+// 	cache := am.GetSystemAlertsCache()
+
+// 	// Initially in system1 cache
+// 	assert.Len(t, cache.Get(system1.Id), 1)
+// 	assert.Empty(t, cache.Get(system2.Id))
+
+// 	// Move alert to system2
+// 	alert.Set("system", system2.Id)
+// 	require.NoError(t, hub.Save(alert))
+
+// 	// DEBUG: print if it is found
+// 	// fmt.Printf("system1 alerts after update: %v\n", cache.Get(system1.Id))
+
+// 	// Should be removed from system1 and present in system2
+// 	assert.Empty(t, cache.GetType(system1.Id, "CPU"), "updated alerts should be evicted from the previous system cache")
+// 	require.Len(t, cache.Get(system2.Id), 1)
+// 	assert.Equal(t, alert.Id, cache.Get(system2.Id)[0].Id)
+// }
--- a/internal/alerts/alerts_disk_test.go
+++ b/internal/alerts/alerts_disk_test.go
@@ -0,0 +1,155 @@
+//go:build testing
+
+package alerts_test
+
+import (
+	"encoding/json"
+	"testing"
+	"time"
+
+	"github.com/henrygd/beszel/internal/entities/system"
+	beszelTests "github.com/henrygd/beszel/internal/tests"
+
+	"github.com/pocketbase/dbx"
+	"github.com/pocketbase/pocketbase/tools/types"
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+)
+
+// TestDiskAlertExtraFsMultiMinute tests that multi-minute disk alerts correctly use
+// historical per-minute values for extra (non-root) filesystems, not the current live snapshot.
+func TestDiskAlertExtraFsMultiMinute(t *testing.T) {
+	hub, user := beszelTests.GetHubWithUser(t)
+	defer hub.Cleanup()
+
+	systems, err := beszelTests.CreateSystems(hub, 1, user.Id, "up")
+	require.NoError(t, err)
+	systemRecord := systems[0]
+
+	// Disk alert: threshold 80%, min=2 (requires historical averaging)
+	diskAlert, err := beszelTests.CreateRecord(hub, "alerts", map[string]any{
+		"name":   "Disk",
+		"system": systemRecord.Id,
+		"user":   user.Id,
+		"value":  80, // threshold: 80%
+		"min":    2,  // 2 minutes - requires historical averaging
+	})
+	require.NoError(t, err)
+	assert.False(t, diskAlert.GetBool("triggered"), "Alert should not be triggered initially")
+
+	am := hub.GetAlertManager()
+	now := time.Now().UTC()
+
+	extraFsHigh := map[string]*system.FsStats{
+		"/mnt/data": {DiskTotal: 1000, DiskUsed: 920}, // 92% - above threshold
+	}
+
+	// Insert 4 historical records spread over 3 minutes (same pattern as battery tests).
+	// The oldest record must predate (now - 2min) so the alert time window is valid.
+	recordTimes := []time.Duration{
+		-180 * time.Second, // 3 min ago - anchors oldest record before alert.time
+		-90 * time.Second,
+		-60 * time.Second,
+		-30 * time.Second,
+	}
+
+	for _, offset := range recordTimes {
+		stats := system.Stats{
+			DiskPct: 30, // root disk at 30% - below threshold
+			ExtraFs: extraFsHigh,
+		}
+		statsJSON, _ := json.Marshal(stats)
+
+		recordTime := now.Add(offset)
+		record, err := beszelTests.CreateRecord(hub, "system_stats", map[string]any{
+			"system": systemRecord.Id,
+			"type":   "1m",
+			"stats":  string(statsJSON),
+		})
+		require.NoError(t, err)
+		record.SetRaw("created", recordTime.Format(types.DefaultDateLayout))
+		err = hub.SaveNoValidate(record)
+		require.NoError(t, err)
+	}
+
+	combinedDataHigh := &system.CombinedData{
+		Stats: system.Stats{
+			DiskPct: 30,
+			ExtraFs: extraFsHigh,
+		},
+		Info: system.Info{
+			DiskPct: 30,
+		},
+	}
+
+	systemRecord.Set("updated", now)
+	err = hub.SaveNoValidate(systemRecord)
+	require.NoError(t, err)
+
+	err = am.HandleSystemAlerts(systemRecord, combinedDataHigh)
+	require.NoError(t, err)
+
+	time.Sleep(20 * time.Millisecond)
+
+	diskAlert, err = hub.FindFirstRecordByFilter("alerts", "id={:id}", dbx.Params{"id": diskAlert.Id})
+	require.NoError(t, err)
+	assert.True(t, diskAlert.GetBool("triggered"),
+		"Alert SHOULD be triggered when extra disk average (92%%) exceeds threshold (80%%)")
+
+	// --- Resolution: extra disk drops to 50%, alert should resolve ---
+
+	extraFsLow := map[string]*system.FsStats{
+		"/mnt/data": {DiskTotal: 1000, DiskUsed: 500}, // 50% - below threshold
+	}
+
+	newNow := now.Add(2 * time.Minute)
+	recordTimesLow := []time.Duration{
+		-180 * time.Second,
+		-90 * time.Second,
+		-60 * time.Second,
+		-30 * time.Second,
+	}
+
+	for _, offset := range recordTimesLow {
+		stats := system.Stats{
+			DiskPct: 30,
+			ExtraFs: extraFsLow,
+		}
+		statsJSON, _ := json.Marshal(stats)
+
+		recordTime := newNow.Add(offset)
+		record, err := beszelTests.CreateRecord(hub, "system_stats", map[string]any{
+			"system": systemRecord.Id,
+			"type":   "1m",
+			"stats":  string(statsJSON),
+		})
+		require.NoError(t, err)
+		record.SetRaw("created", recordTime.Format(types.DefaultDateLayout))
+		err = hub.SaveNoValidate(record)
+		require.NoError(t, err)
+	}
+
+	combinedDataLow := &system.CombinedData{
+		Stats: system.Stats{
+			DiskPct: 30,
+			ExtraFs: extraFsLow,
+		},
+		Info: system.Info{
+			DiskPct: 30,
+		},
+	}
+
+	systemRecord.Set("updated", newNow)
+	err = hub.SaveNoValidate(systemRecord)
+	require.NoError(t, err)
+
+	err = am.HandleSystemAlerts(systemRecord, combinedDataLow)
+	require.NoError(t, err)
+
+	time.Sleep(20 * time.Millisecond)
+
+	diskAlert, err = hub.FindFirstRecordByFilter("alerts", "id={:id}", dbx.Params{"id": diskAlert.Id})
+	require.NoError(t, err)
+	assert.False(t, diskAlert.GetBool("triggered"),
+		"Alert should be resolved when extra disk average (50%%) drops below threshold (80%%)")
+}
--- a/internal/alerts/alerts_quiet_hours_test.go
+++ b/internal/alerts/alerts_quiet_hours_test.go
@@ -0,0 +1,425 @@
+//go:build testing
+
+package alerts_test
+
+import (
+	"testing"
+	"testing/synctest"
+	"time"
+
+	"github.com/henrygd/beszel/internal/alerts"
+	beszelTests "github.com/henrygd/beszel/internal/tests"
+
+	"github.com/pocketbase/dbx"
+	"github.com/stretchr/testify/assert"
+)
+
+func TestAlertSilencedOneTime(t *testing.T) {
+	hub, user := beszelTests.GetHubWithUser(t)
+	defer hub.Cleanup()
+
+	// Create a system
+	systems, err := beszelTests.CreateSystems(hub, 1, user.Id, "up")
+	assert.NoError(t, err)
+	system := systems[0]
+
+	// Create an alert
+	alert, err := beszelTests.CreateRecord(hub, "alerts", map[string]any{
+		"name":   "CPU",
+		"system": system.Id,
+		"user":   user.Id,
+		"value":  80,
+		"min":    1,
+	})
+	assert.NoError(t, err)
+
+	// Create a one-time quiet hours window (current time - 1 hour to current time + 1 hour)
+	now := time.Now().UTC()
+	startTime := now.Add(-1 * time.Hour)
+	endTime := now.Add(1 * time.Hour)
+
+	_, err = beszelTests.CreateRecord(hub, "quiet_hours", map[string]any{
+		"user":   user.Id,
+		"system": system.Id,
+		"type":   "one-time",
+		"start":  startTime,
+		"end":    endTime,
+	})
+	assert.NoError(t, err)
+
+	// Get alert manager
+	am := alerts.NewAlertManager(hub)
+	defer am.Stop()
+
+	// Test that alert is silenced
+	silenced := am.IsNotificationSilenced(user.Id, system.Id)
+	assert.True(t, silenced, "Alert should be silenced during active one-time window")
+
+	// Create a window that has already ended
+	pastStart := now.Add(-3 * time.Hour)
+	pastEnd := now.Add(-2 * time.Hour)
+
+	_, err = beszelTests.CreateRecord(hub, "quiet_hours", map[string]any{
+		"user":   user.Id,
+		"system": system.Id,
+		"type":   "one-time",
+		"start":  pastStart,
+		"end":    pastEnd,
+	})
+	assert.NoError(t, err)
+
+	// Should still be silenced because of the first window
+	silenced = am.IsNotificationSilenced(user.Id, system.Id)
+	assert.True(t, silenced, "Alert should still be silenced (past window doesn't affect active window)")
+
+	// Clear all windows and create a future window
+	_, err = hub.DB().NewQuery("DELETE FROM quiet_hours").Execute()
+	assert.NoError(t, err)
+
+	futureStart := now.Add(2 * time.Hour)
+	futureEnd := now.Add(3 * time.Hour)
+
+	_, err = beszelTests.CreateRecord(hub, "quiet_hours", map[string]any{
+		"user":   user.Id,
+		"system": system.Id,
+		"type":   "one-time",
+		"start":  futureStart,
+		"end":    futureEnd,
+	})
+	assert.NoError(t, err)
+
+	// Alert should NOT be silenced (window hasn't started yet)
+	silenced = am.IsNotificationSilenced(user.Id, system.Id)
+	assert.False(t, silenced, "Alert should not be silenced (window hasn't started)")
+
+	_ = alert
+}
+
+func TestAlertSilencedDaily(t *testing.T) {
+	hub, user := beszelTests.GetHubWithUser(t)
+	defer hub.Cleanup()
+
+	// Create a system
+	systems, err := beszelTests.CreateSystems(hub, 1, user.Id, "up")
+	assert.NoError(t, err)
+	system := systems[0]
+
+	// Get alert manager
+	am := alerts.NewAlertManager(hub)
+	defer am.Stop()
+
+	// Get current hour and create a window that includes current time
+	now := time.Now().UTC()
+	currentHour := now.Hour()
+	currentMin := now.Minute()
+
+	// Create a window from 1 hour ago to 1 hour from now
+	startHour := (currentHour - 1 + 24) % 24
+	endHour := (currentHour + 1) % 24
+
+	// Create times with just the hours/minutes we want (date doesn't matter for daily)
+	startTime := time.Date(2000, 1, 1, startHour, currentMin, 0, 0, time.UTC)
+	endTime := time.Date(2000, 1, 1, endHour, currentMin, 0, 0, time.UTC)
+
+	_, err = beszelTests.CreateRecord(hub, "quiet_hours", map[string]any{
+		"user":   user.Id,
+		"system": system.Id,
+		"type":   "daily",
+		"start":  startTime,
+		"end":    endTime,
+	})
+	assert.NoError(t, err)
+
+	// Alert should be silenced (current time is within the daily window)
+	silenced := am.IsNotificationSilenced(user.Id, system.Id)
+	assert.True(t, silenced, "Alert should be silenced during active daily window")
+
+	// Clear windows and create one that doesn't include current time
+	_, err = hub.DB().NewQuery("DELETE FROM quiet_hours").Execute()
+	assert.NoError(t, err)
+
+	// Create a window from 6-12 hours from now
+	futureStartHour := (currentHour + 6) % 24
+	futureEndHour := (currentHour + 12) % 24
+
+	startTime = time.Date(2000, 1, 1, futureStartHour, 0, 0, 0, time.UTC)
+	endTime = time.Date(2000, 1, 1, futureEndHour, 0, 0, 0, time.UTC)
+
+	_, err = beszelTests.CreateRecord(hub, "quiet_hours", map[string]any{
+		"user":   user.Id,
+		"system": system.Id,
+		"type":   "daily",
+		"start":  startTime,
+		"end":    endTime,
+	})
+	assert.NoError(t, err)
+
+	// Alert should NOT be silenced
+	silenced = am.IsNotificationSilenced(user.Id, system.Id)
+	assert.False(t, silenced, "Alert should not be silenced (outside daily window)")
+}
+
+func TestAlertSilencedDailyMidnightCrossing(t *testing.T) {
+	hub, user := beszelTests.GetHubWithUser(t)
+	defer hub.Cleanup()
+
+	// Create a system
+	systems, err := beszelTests.CreateSystems(hub, 1, user.Id, "up")
+	assert.NoError(t, err)
+	system := systems[0]
+
+	// Get alert manager
+	am := alerts.NewAlertManager(hub)
+	defer am.Stop()
+
+	// Create a window that crosses midnight: 22:00 - 02:00
+	startTime := time.Date(2000, 1, 1, 22, 0, 0, 0, time.UTC)
+	endTime := time.Date(2000, 1, 1, 2, 0, 0, 0, time.UTC)
+
+	_, err = beszelTests.CreateRecord(hub, "quiet_hours", map[string]any{
+		"user":   user.Id,
+		"system": system.Id,
+		"type":   "daily",
+		"start":  startTime,
+		"end":    endTime,
+	})
+	assert.NoError(t, err)
+
+	// Test with a time at 23:00 (should be silenced)
+	// We can't control the actual current time, but we can verify the logic
+	// by checking if the window was created correctly
+	windows, err := hub.FindAllRecords("quiet_hours", dbx.HashExp{
+		"user":   user.Id,
+		"system": system.Id,
+	})
+	assert.NoError(t, err)
+	assert.Len(t, windows, 1, "Should have created 1 window")
+
+	window := windows[0]
+	assert.Equal(t, "daily", window.GetString("type"))
+	assert.Equal(t, 22, window.GetDateTime("start").Time().Hour())
+	assert.Equal(t, 2, window.GetDateTime("end").Time().Hour())
+}
+
+func TestAlertSilencedGlobal(t *testing.T) {
+	hub, user := beszelTests.GetHubWithUser(t)
+	defer hub.Cleanup()
+
+	// Create multiple systems
+	systems, err := beszelTests.CreateSystems(hub, 3, user.Id, "up")
+	assert.NoError(t, err)
+
+	// Get alert manager
+	am := alerts.NewAlertManager(hub)
+	defer am.Stop()
+
+	// Create a global quiet hours window (no system specified)
+	now := time.Now().UTC()
+	startTime := now.Add(-1 * time.Hour)
+	endTime := now.Add(1 * time.Hour)
+
+	_, err = beszelTests.CreateRecord(hub, "quiet_hours", map[string]any{
+		"user":  user.Id,
+		"type":  "one-time",
+		"start": startTime,
+		"end":   endTime,
+		// system field is empty/null for global windows
+	})
+	assert.NoError(t, err)
+
+	// All systems should be silenced
+	for _, system := range systems {
+		silenced := am.IsNotificationSilenced(user.Id, system.Id)
+		assert.True(t, silenced, "Alert should be silenced for system %s (global window)", system.Id)
+	}
+
+	// Even with a systemID that doesn't exist, should be silenced
+	silenced := am.IsNotificationSilenced(user.Id, "nonexistent-system")
+	assert.True(t, silenced, "Alert should be silenced for any system (global window)")
+}
+
+func TestAlertSilencedSystemSpecific(t *testing.T) {
+	hub, user := beszelTests.GetHubWithUser(t)
+	defer hub.Cleanup()
+
+	// Create multiple systems
+	systems, err := beszelTests.CreateSystems(hub, 2, user.Id, "up")
+	assert.NoError(t, err)
+	system1 := systems[0]
+	system2 := systems[1]
+
+	// Get alert manager
+	am := alerts.NewAlertManager(hub)
+	defer am.Stop()
+
+	// Create a system-specific quiet hours window for system1 only
+	now := time.Now().UTC()
+	startTime := now.Add(-1 * time.Hour)
+	endTime := now.Add(1 * time.Hour)
+
+	_, err = beszelTests.CreateRecord(hub, "quiet_hours", map[string]any{
+		"user":   user.Id,
+		"system": system1.Id,
+		"type":   "one-time",
+		"start":  startTime,
+		"end":    endTime,
+	})
+	assert.NoError(t, err)
+
+	// System1 should be silenced
+	silenced := am.IsNotificationSilenced(user.Id, system1.Id)
+	assert.True(t, silenced, "Alert should be silenced for system1")
+
+	// System2 should NOT be silenced
+	silenced = am.IsNotificationSilenced(user.Id, system2.Id)
+	assert.False(t, silenced, "Alert should not be silenced for system2")
+}
+
+func TestAlertSilencedMultiUser(t *testing.T) {
+	hub, _ := beszelTests.GetHubWithUser(t)
+	defer hub.Cleanup()
+
+	// Create two users
+	user1, err := beszelTests.CreateUser(hub, "user1@example.com", "password")
+	assert.NoError(t, err)
+
+	user2, err := beszelTests.CreateUser(hub, "user2@example.com", "password")
+	assert.NoError(t, err)
+
+	// Create a system accessible to both users
+	system, err := beszelTests.CreateRecord(hub, "systems", map[string]any{
+		"name":  "shared-system",
+		"users": []string{user1.Id, user2.Id},
+		"host":  "127.0.0.1",
+	})
+	assert.NoError(t, err)
+
+	// Get alert manager
+	am := alerts.NewAlertManager(hub)
+	defer am.Stop()
+
+	// Create a quiet hours window for user1 only
+	now := time.Now().UTC()
+	startTime := now.Add(-1 * time.Hour)
+	endTime := now.Add(1 * time.Hour)
+
+	_, err = beszelTests.CreateRecord(hub, "quiet_hours", map[string]any{
+		"user":   user1.Id,
+		"system": system.Id,
+		"type":   "one-time",
+		"start":  startTime,
+		"end":    endTime,
+	})
+	assert.NoError(t, err)
+
+	// User1 should be silenced
+	silenced := am.IsNotificationSilenced(user1.Id, system.Id)
+	assert.True(t, silenced, "Alert should be silenced for user1")
+
+	// User2 should NOT be silenced
+	silenced = am.IsNotificationSilenced(user2.Id, system.Id)
+	assert.False(t, silenced, "Alert should not be silenced for user2")
+}
+
+func TestAlertSilencedWithActualAlert(t *testing.T) {
+	synctest.Test(t, func(t *testing.T) {
+		hub, user := beszelTests.GetHubWithUser(t)
+		defer hub.Cleanup()
+
+		// Create a system
+		systems, err := beszelTests.CreateSystems(hub, 1, user.Id, "up")
+		assert.NoError(t, err)
+		system := systems[0]
+
+		// Create a status alert
+		_, err = beszelTests.CreateRecord(hub, "alerts", map[string]any{
+			"name":   "Status",
+			"system": system.Id,
+			"user":   user.Id,
+			"min":    1,
+		})
+		assert.NoError(t, err)
+
+		// Create user settings with email
+		userSettings, err := hub.FindFirstRecordByFilter("user_settings", "user={:user}", dbx.Params{"user": user.Id})
+		if err != nil || userSettings == nil {
+			userSettings, err = beszelTests.CreateRecord(hub, "user_settings", map[string]any{
+				"user": user.Id,
+				"settings": map[string]any{
+					"emails": []string{"test@example.com"},
+				},
+			})
+			assert.NoError(t, err)
+		}
+
+		// Create a quiet hours window
+		now := time.Now().UTC()
+		startTime := now.Add(-1 * time.Hour)
+		endTime := now.Add(1 * time.Hour)
+
+		_, err = beszelTests.CreateRecord(hub, "quiet_hours", map[string]any{
+			"user":   user.Id,
+			"system": system.Id,
+			"type":   "one-time",
+			"start":  startTime,
+			"end":    endTime,
+		})
+		assert.NoError(t, err)
+
+		// Get initial email count
+		initialEmailCount := hub.TestMailer.TotalSend()
+
+		// Trigger an alert by setting system to down
+		system.Set("status", "down")
+		err = hub.SaveNoValidate(system)
+		assert.NoError(t, err)
+
+		// Wait for the alert to be processed (1 minute + buffer)
+		time.Sleep(time.Second * 75)
+		synctest.Wait()
+
+		// Check that no email was sent (because alert is silenced)
+		finalEmailCount := hub.TestMailer.TotalSend()
+		assert.Equal(t, initialEmailCount, finalEmailCount, "No emails should be sent when alert is silenced")
+
+		// Clear quiet hours windows
+		_, err = hub.DB().NewQuery("DELETE FROM quiet_hours").Execute()
+		assert.NoError(t, err)
+
+		// Reset system to up, then down again
+		system.Set("status", "up")
+		err = hub.SaveNoValidate(system)
+		assert.NoError(t, err)
+		time.Sleep(100 * time.Millisecond)
+
+		system.Set("status", "down")
+		err = hub.SaveNoValidate(system)
+		assert.NoError(t, err)
+
+		// Wait for the alert to be processed
+		time.Sleep(time.Second * 75)
+		synctest.Wait()
+
+		// Now an email should be sent
+		newEmailCount := hub.TestMailer.TotalSend()
+		assert.Greater(t, newEmailCount, finalEmailCount, "Email should be sent when not silenced")
+	})
+}
+
+func TestAlertSilencedNoWindows(t *testing.T) {
+	hub, user := beszelTests.GetHubWithUser(t)
+	defer hub.Cleanup()
+
+	// Create a system
+	systems, err := beszelTests.CreateSystems(hub, 1, user.Id, "up")
+	assert.NoError(t, err)
+	system := systems[0]
+
+	// Get alert manager
+	am := alerts.NewAlertManager(hub)
+	defer am.Stop()
+
+	// Without any quiet hours windows, alert should NOT be silenced
+	silenced := am.IsNotificationSilenced(user.Id, system.Id)
+	assert.False(t, silenced, "Alert should not be silenced when no windows exist")
+}
--- a/internal/alerts/alerts_smart.go
+++ b/internal/alerts/alerts_smart.go
@@ -0,0 +1,107 @@
+package alerts
+
+import (
+	"fmt"
+	"strings"
+
+	"github.com/pocketbase/pocketbase/core"
+)
+
+// handleSmartDeviceAlert sends alerts when a SMART device state worsens into WARNING/FAILED.
+// This is automatic and does not require user opt-in.
+func (am *AlertManager) handleSmartDeviceAlert(e *core.RecordEvent) error {
+	oldState := e.Record.Original().GetString("state")
+	newState := e.Record.GetString("state")
+
+	if !shouldSendSmartDeviceAlert(oldState, newState) {
+		return e.Next()
+	}
+
+	systemID := e.Record.GetString("system")
+	if systemID == "" {
+		return e.Next()
+	}
+
+	// Fetch the system record to get the name and users
+	systemRecord, err := e.App.FindRecordById("systems", systemID)
+	if err != nil {
+		e.App.Logger().Error("Failed to find system for SMART alert", "err", err, "systemID", systemID)
+		return e.Next()
+	}
+
+	systemName := systemRecord.GetString("name")
+	deviceName := e.Record.GetString("name")
+	model := e.Record.GetString("model")
+	statusLabel := smartStateLabel(newState)
+
+	// Build alert message
+	title := fmt.Sprintf("SMART %s on %s: %s %s", statusLabel, systemName, deviceName, smartStateEmoji(newState))
+	var message string
+	if model != "" {
+		message = fmt.Sprintf("Disk %s (%s) SMART status changed to %s", deviceName, model, newState)
+	} else {
+		message = fmt.Sprintf("Disk %s SMART status changed to %s", deviceName, newState)
+	}
+
+	// Get users associated with the system
+	userIDs := systemRecord.GetStringSlice("users")
+	if len(userIDs) == 0 {
+		return e.Next()
+	}
+
+	// Send alert to each user
+	for _, userID := range userIDs {
+		if err := am.SendAlert(AlertMessageData{
+			UserID:   userID,
+			SystemID: systemID,
+			Title:    title,
+			Message:  message,
+			Link:     am.hub.MakeLink("system", systemID),
+			LinkText: "View " + systemName,
+		}); err != nil {
+			e.App.Logger().Error("Failed to send SMART alert", "err", err, "userID", userID)
+		}
+	}
+
+	return e.Next()
+}
+
+func shouldSendSmartDeviceAlert(oldState, newState string) bool {
+	oldSeverity := smartStateSeverity(oldState)
+	newSeverity := smartStateSeverity(newState)
+
+	// Ignore unknown states and recoveries; only alert on worsening transitions
+	// from known-good/degraded states into WARNING/FAILED.
+	return oldSeverity >= 1 && newSeverity > oldSeverity
+}
+
+func smartStateSeverity(state string) int {
+	switch state {
+	case "PASSED":
+		return 1
+	case "WARNING":
+		return 2
+	case "FAILED":
+		return 3
+	default:
+		return 0
+	}
+}
+
+func smartStateEmoji(state string) string {
+	switch state {
+	case "WARNING":
+		return "\U0001F7E0"
+	default:
+		return "\U0001F534"
+	}
+}
+
+func smartStateLabel(state string) string {
+	switch state {
+	case "FAILED":
+		return "failure"
+	default:
+		return strings.ToLower(state)
+	}
+}
--- a/internal/alerts/alerts_smart_test.go
+++ b/internal/alerts/alerts_smart_test.go
@@ -0,0 +1,264 @@
+//go:build testing
+
+package alerts_test
+
+import (
+	"testing"
+	"time"
+
+	beszelTests "github.com/henrygd/beszel/internal/tests"
+	"github.com/stretchr/testify/assert"
+)
+
+func TestSmartDeviceAlert(t *testing.T) {
+	hub, user := beszelTests.GetHubWithUser(t)
+	defer hub.Cleanup()
+
+	// Create a system for the user
+	system, err := beszelTests.CreateRecord(hub, "systems", map[string]any{
+		"name":  "test-system",
+		"users": []string{user.Id},
+		"host":  "127.0.0.1",
+	})
+	assert.NoError(t, err)
+
+	// Create a smart_device with state PASSED
+	smartDevice, err := beszelTests.CreateRecord(hub, "smart_devices", map[string]any{
+		"system": system.Id,
+		"name":   "/dev/sda",
+		"model":  "Samsung SSD 970 EVO",
+		"state":  "PASSED",
+	})
+	assert.NoError(t, err)
+
+	// Verify no emails sent initially
+	assert.Zero(t, hub.TestMailer.TotalSend(), "should have 0 emails sent initially")
+
+	// Re-fetch the record so PocketBase can properly track original values
+	smartDevice, err = hub.FindRecordById("smart_devices", smartDevice.Id)
+	assert.NoError(t, err)
+
+	// Update the smart device state to FAILED
+	smartDevice.Set("state", "FAILED")
+	err = hub.Save(smartDevice)
+	assert.NoError(t, err)
+
+	// Wait for the alert to be processed
+	time.Sleep(50 * time.Millisecond)
+
+	// Verify that an email was sent
+	assert.EqualValues(t, 1, hub.TestMailer.TotalSend(), "should have 1 email sent after state changed to FAILED")
+
+	// Check the email content
+	lastMessage := hub.TestMailer.LastMessage()
+	assert.Contains(t, lastMessage.Subject, "SMART failure on test-system")
+	assert.Contains(t, lastMessage.Subject, "/dev/sda")
+	assert.Contains(t, lastMessage.Text, "Samsung SSD 970 EVO")
+	assert.Contains(t, lastMessage.Text, "FAILED")
+}
+
+func TestSmartDeviceAlertPassedToWarning(t *testing.T) {
+	hub, user := beszelTests.GetHubWithUser(t)
+	defer hub.Cleanup()
+
+	system, err := beszelTests.CreateRecord(hub, "systems", map[string]any{
+		"name":  "test-system",
+		"users": []string{user.Id},
+		"host":  "127.0.0.1",
+	})
+	assert.NoError(t, err)
+
+	smartDevice, err := beszelTests.CreateRecord(hub, "smart_devices", map[string]any{
+		"system": system.Id,
+		"name":   "/dev/mmcblk0",
+		"model":  "eMMC",
+		"state":  "PASSED",
+	})
+	assert.NoError(t, err)
+
+	smartDevice, err = hub.FindRecordById("smart_devices", smartDevice.Id)
+	assert.NoError(t, err)
+
+	smartDevice.Set("state", "WARNING")
+	err = hub.Save(smartDevice)
+	assert.NoError(t, err)
+
+	time.Sleep(50 * time.Millisecond)
+
+	assert.EqualValues(t, 1, hub.TestMailer.TotalSend(), "should have 1 email sent after state changed to WARNING")
+	lastMessage := hub.TestMailer.LastMessage()
+	assert.Contains(t, lastMessage.Subject, "SMART warning on test-system")
+	assert.Contains(t, lastMessage.Text, "WARNING")
+}
+
+func TestSmartDeviceAlertWarningToFailed(t *testing.T) {
+	hub, user := beszelTests.GetHubWithUser(t)
+	defer hub.Cleanup()
+
+	system, err := beszelTests.CreateRecord(hub, "systems", map[string]any{
+		"name":  "test-system",
+		"users": []string{user.Id},
+		"host":  "127.0.0.1",
+	})
+	assert.NoError(t, err)
+
+	smartDevice, err := beszelTests.CreateRecord(hub, "smart_devices", map[string]any{
+		"system": system.Id,
+		"name":   "/dev/mmcblk0",
+		"model":  "eMMC",
+		"state":  "WARNING",
+	})
+	assert.NoError(t, err)
+
+	smartDevice, err = hub.FindRecordById("smart_devices", smartDevice.Id)
+	assert.NoError(t, err)
+
+	smartDevice.Set("state", "FAILED")
+	err = hub.Save(smartDevice)
+	assert.NoError(t, err)
+
+	time.Sleep(50 * time.Millisecond)
+
+	assert.EqualValues(t, 1, hub.TestMailer.TotalSend(), "should have 1 email sent after state changed from WARNING to FAILED")
+	lastMessage := hub.TestMailer.LastMessage()
+	assert.Contains(t, lastMessage.Subject, "SMART failure on test-system")
+	assert.Contains(t, lastMessage.Text, "FAILED")
+}
+
+func TestSmartDeviceAlertNoAlertOnNonPassedToFailed(t *testing.T) {
+	hub, user := beszelTests.GetHubWithUser(t)
+	defer hub.Cleanup()
+
+	// Create a system for the user
+	system, err := beszelTests.CreateRecord(hub, "systems", map[string]any{
+		"name":  "test-system",
+		"users": []string{user.Id},
+		"host":  "127.0.0.1",
+	})
+	assert.NoError(t, err)
+
+	// Create a smart_device with state UNKNOWN
+	smartDevice, err := beszelTests.CreateRecord(hub, "smart_devices", map[string]any{
+		"system": system.Id,
+		"name":   "/dev/sda",
+		"model":  "Samsung SSD 970 EVO",
+		"state":  "UNKNOWN",
+	})
+	assert.NoError(t, err)
+
+	// Re-fetch the record so PocketBase can properly track original values
+	smartDevice, err = hub.FindRecordById("smart_devices", smartDevice.Id)
+	assert.NoError(t, err)
+
+	// Update the state from UNKNOWN to FAILED - should NOT trigger alert.
+	// We only alert from known healthy/degraded states.
+	smartDevice.Set("state", "FAILED")
+	err = hub.Save(smartDevice)
+	assert.NoError(t, err)
+
+	time.Sleep(50 * time.Millisecond)
+
+	// Verify no email was sent (only PASSED -> FAILED triggers alert)
+	assert.Zero(t, hub.TestMailer.TotalSend(), "should have 0 emails when changing from UNKNOWN to FAILED")
+
+	// Re-fetch the record again
+	smartDevice, err = hub.FindRecordById("smart_devices", smartDevice.Id)
+	assert.NoError(t, err)
+
+	// Update state from FAILED to PASSED - should NOT trigger alert
+	smartDevice.Set("state", "PASSED")
+	err = hub.Save(smartDevice)
+	assert.NoError(t, err)
+
+	time.Sleep(50 * time.Millisecond)
+
+	// Verify no email was sent
+	assert.Zero(t, hub.TestMailer.TotalSend(), "should have 0 emails when changing from FAILED to PASSED")
+}
+
+func TestSmartDeviceAlertMultipleUsers(t *testing.T) {
+	hub, user1 := beszelTests.GetHubWithUser(t)
+	defer hub.Cleanup()
+
+	// Create a second user
+	user2, err := beszelTests.CreateUser(hub, "test2@example.com", "password")
+	assert.NoError(t, err)
+
+	// Create user settings for the second user
+	_, err = beszelTests.CreateRecord(hub, "user_settings", map[string]any{
+		"user":     user2.Id,
+		"settings": `{"emails":["test2@example.com"],"webhooks":[]}`,
+	})
+	assert.NoError(t, err)
+
+	// Create a system with both users
+	system, err := beszelTests.CreateRecord(hub, "systems", map[string]any{
+		"name":  "shared-system",
+		"users": []string{user1.Id, user2.Id},
+		"host":  "127.0.0.1",
+	})
+	assert.NoError(t, err)
+
+	// Create a smart_device with state PASSED
+	smartDevice, err := beszelTests.CreateRecord(hub, "smart_devices", map[string]any{
+		"system": system.Id,
+		"name":   "/dev/nvme0n1",
+		"model":  "WD Black SN850",
+		"state":  "PASSED",
+	})
+	assert.NoError(t, err)
+
+	// Re-fetch the record so PocketBase can properly track original values
+	smartDevice, err = hub.FindRecordById("smart_devices", smartDevice.Id)
+	assert.NoError(t, err)
+
+	// Update the smart device state to FAILED
+	smartDevice.Set("state", "FAILED")
+	err = hub.Save(smartDevice)
+	assert.NoError(t, err)
+
+	time.Sleep(50 * time.Millisecond)
+
+	// Verify that two emails were sent (one for each user)
+	assert.EqualValues(t, 2, hub.TestMailer.TotalSend(), "should have 2 emails sent for 2 users")
+}
+
+func TestSmartDeviceAlertWithoutModel(t *testing.T) {
+	hub, user := beszelTests.GetHubWithUser(t)
+	defer hub.Cleanup()
+
+	// Create a system for the user
+	system, err := beszelTests.CreateRecord(hub, "systems", map[string]any{
+		"name":  "test-system",
+		"users": []string{user.Id},
+		"host":  "127.0.0.1",
+	})
+	assert.NoError(t, err)
+
+	// Create a smart_device with state PASSED but no model
+	smartDevice, err := beszelTests.CreateRecord(hub, "smart_devices", map[string]any{
+		"system": system.Id,
+		"name":   "/dev/sdb",
+		"state":  "PASSED",
+	})
+	assert.NoError(t, err)
+
+	// Re-fetch the record so PocketBase can properly track original values
+	smartDevice, err = hub.FindRecordById("smart_devices", smartDevice.Id)
+	assert.NoError(t, err)
+
+	// Update the smart device state to FAILED
+	smartDevice.Set("state", "FAILED")
+	err = hub.Save(smartDevice)
+	assert.NoError(t, err)
+
+	time.Sleep(50 * time.Millisecond)
+
+	// Verify that an email was sent
+	assert.EqualValues(t, 1, hub.TestMailer.TotalSend(), "should have 1 email sent")
+
+	// Check that the email doesn't have empty parentheses for missing model
+	lastMessage := hub.TestMailer.LastMessage()
+	assert.NotContains(t, lastMessage.Text, "()", "should not have empty parentheses for missing model")
+	assert.Contains(t, lastMessage.Text, "/dev/sdb")
+}
--- a/internal/alerts/alerts_status.go
+++ b/internal/alerts/alerts_status.go
@@ -5,67 +5,28 @@ import (
 	"strings"
 	"time"

-	"github.com/pocketbase/dbx"
 	"github.com/pocketbase/pocketbase/core"
 )

-type alertTask struct {
-	action      string // "schedule" or "cancel"
-	systemName  string
-	alertRecord *core.Record
-	delay       time.Duration
-}
-
 type alertInfo struct {
-	systemName  string
-	alertRecord *core.Record
-	expireTime  time.Time
+	systemName string
+	alertData  CachedAlertData
+	expireTime time.Time
+	timer      *time.Timer
 }

-// startWorker is a long-running goroutine that processes alert tasks
-// every x seconds. It must be running to process status alerts.
-func (am *AlertManager) startWorker() {
-	processPendingAlerts := time.Tick(15 * time.Second)
-
-	// check for status alerts that are not resolved when system comes up
-	// (can be removed if we figure out core bug in #1052)
-	checkStatusAlerts := time.Tick(561 * time.Second)
-
-	for {
-		select {
-		case <-am.stopChan:
-			return
-		case task := <-am.alertQueue:
-			switch task.action {
-			case "schedule":
-				am.pendingAlerts.Store(task.alertRecord.Id, &alertInfo{
-					systemName:  task.systemName,
-					alertRecord: task.alertRecord,
-					expireTime:  time.Now().Add(task.delay),
-				})
-			case "cancel":
-				am.pendingAlerts.Delete(task.alertRecord.Id)
+// Stop cancels all pending status alert timers.
+func (am *AlertManager) Stop() {
+	am.stopOnce.Do(func() {
+		am.pendingAlerts.Range(func(key, value any) bool {
+			info := value.(*alertInfo)
+			if info.timer != nil {
+				info.timer.Stop()
 			}
-		case <-checkStatusAlerts:
-			resolveStatusAlerts(am.hub)
-		case <-processPendingAlerts:
-			// Check for expired alerts every tick
-			now := time.Now()
-			for key, value := range am.pendingAlerts.Range {
-				info := value.(*alertInfo)
-				if now.After(info.expireTime) {
-					// Downtime delay has passed, process alert
-					am.sendStatusAlert("down", info.systemName, info.alertRecord)
-					am.pendingAlerts.Delete(key)
-				}
-			}
-		}
-	}
-}
-
-// StopWorker shuts down the AlertManager.worker goroutine
-func (am *AlertManager) StopWorker() {
-	close(am.stopChan)
+			am.pendingAlerts.Delete(key)
+			return true
+		})
+	})
 }

 // HandleStatusAlerts manages the logic when system status changes.
@@ -74,82 +35,104 @@ func (am *AlertManager) HandleStatusAlerts(newStatus string, systemRecord *core.
 		return nil
 	}

-	alertRecords, err := am.getSystemStatusAlerts(systemRecord.Id)
-	if err != nil {
-		return err
-	}
-	if len(alertRecords) == 0 {
+	alerts := am.alertsCache.GetAlertsByName(systemRecord.Id, "Status")
+	if len(alerts) == 0 {
 		return nil
 	}

 	systemName := systemRecord.GetString("name")
 	if newStatus == "down" {
-		am.handleSystemDown(systemName, alertRecords)
+		am.handleSystemDown(systemName, alerts)
 	} else {
-		am.handleSystemUp(systemName, alertRecords)
+		am.handleSystemUp(systemName, alerts)
 	}
 	return nil
 }

-// getSystemStatusAlerts retrieves all "Status" alert records for a given system ID.
-func (am *AlertManager) getSystemStatusAlerts(systemID string) ([]*core.Record, error) {
-	alertRecords, err := am.hub.FindAllRecords("alerts", dbx.HashExp{
-		"system": systemID,
-		"name":   "Status",
-	})
-	if err != nil {
-		return nil, err
+// handleSystemDown manages the logic when a system status changes to "down". It schedules pending alerts for each alert record.
+func (am *AlertManager) handleSystemDown(systemName string, alerts []CachedAlertData) {
+	for _, alertData := range alerts {
+		min := max(1, int(alertData.Min))
+		am.schedulePendingStatusAlert(systemName, alertData, time.Duration(min)*time.Minute)
 	}
-	return alertRecords, nil
 }

-// Schedules delayed "down" alerts for each alert record.
-func (am *AlertManager) handleSystemDown(systemName string, alertRecords []*core.Record) {
-	for _, alertRecord := range alertRecords {
-		// Continue if alert is already scheduled
-		if _, exists := am.pendingAlerts.Load(alertRecord.Id); exists {
-			continue
-		}
-		// Schedule by adding to queue
-		min := max(1, alertRecord.GetInt("min"))
-		am.alertQueue <- alertTask{
-			action:      "schedule",
-			systemName:  systemName,
-			alertRecord: alertRecord,
-			delay:       time.Duration(min) * time.Minute,
-		}
+// schedulePendingStatusAlert sets up a timer to send a "down" alert after the specified delay if the system is still down.
+// It returns true if the alert was scheduled, or false if an alert was already pending for the given alert record.
+func (am *AlertManager) schedulePendingStatusAlert(systemName string, alertData CachedAlertData, delay time.Duration) bool {
+	alert := &alertInfo{
+		systemName: systemName,
+		alertData:  alertData,
+		expireTime: time.Now().Add(delay),
 	}
+
+	storedAlert, loaded := am.pendingAlerts.LoadOrStore(alertData.Id, alert)
+	if loaded {
+		return false
+	}
+
+	stored := storedAlert.(*alertInfo)
+	stored.timer = time.AfterFunc(time.Until(stored.expireTime), func() {
+		am.processPendingAlert(alertData.Id)
+	})
+	return true
 }

 // handleSystemUp manages the logic when a system status changes to "up".
 // It cancels any pending alerts and sends "up" alerts.
-func (am *AlertManager) handleSystemUp(systemName string, alertRecords []*core.Record) {
-	for _, alertRecord := range alertRecords {
-		alertRecordID := alertRecord.Id
+func (am *AlertManager) handleSystemUp(systemName string, alerts []CachedAlertData) {
+	for _, alertData := range alerts {
 		// If alert exists for record, delete and continue (down alert not sent)
-		if _, exists := am.pendingAlerts.Load(alertRecordID); exists {
-			am.alertQueue <- alertTask{
-				action:      "cancel",
-				alertRecord: alertRecord,
-			}
+		if am.cancelPendingAlert(alertData.Id) {
 			continue
 		}
-		// No alert scheduled for this record, send "up" alert
-		if err := am.sendStatusAlert("up", systemName, alertRecord); err != nil {
+		if !alertData.Triggered {
+			continue
+		}
+		if err := am.sendStatusAlert("up", systemName, alertData); err != nil {
 			am.hub.Logger().Error("Failed to send alert", "err", err)
 		}
 	}
 }

-// sendStatusAlert sends a status alert ("up" or "down") to the users associated with the alert records.
-func (am *AlertManager) sendStatusAlert(alertStatus string, systemName string, alertRecord *core.Record) error {
-	switch alertStatus {
-	case "up":
-		alertRecord.Set("triggered", false)
-	case "down":
-		alertRecord.Set("triggered", true)
+// cancelPendingAlert stops the timer and removes the pending alert for the given alert ID. Returns true if a pending alert was found and cancelled.
+func (am *AlertManager) cancelPendingAlert(alertID string) bool {
+	value, loaded := am.pendingAlerts.LoadAndDelete(alertID)
+	if !loaded {
+		return false
+	}
+
+	info := value.(*alertInfo)
+	if info.timer != nil {
+		info.timer.Stop()
+	}
+	return true
+}
+
+// processPendingAlert sends a "down" alert if the pending alert has expired and the system is still down.
+func (am *AlertManager) processPendingAlert(alertID string) {
+	value, loaded := am.pendingAlerts.LoadAndDelete(alertID)
+	if !loaded {
+		return
+	}
+
+	info := value.(*alertInfo)
+	refreshedAlertData, ok := am.alertsCache.Refresh(info.alertData)
+	if !ok || refreshedAlertData.Triggered {
+		return
+	}
+	if err := am.sendStatusAlert("down", info.systemName, refreshedAlertData); err != nil {
+		am.hub.Logger().Error("Failed to send alert", "err", err)
+	}
+}
+
+// sendStatusAlert sends a status alert ("up" or "down") to the users associated with the alert records.
+func (am *AlertManager) sendStatusAlert(alertStatus string, systemName string, alertData CachedAlertData) error {
+	// Update trigger state for alert record before sending alert
+	triggered := alertStatus == "down"
+	if err := am.setAlertTriggered(alertData, triggered); err != nil {
+		return err
 	}
-	am.hub.Save(alertRecord)

 	var emoji string
 	if alertStatus == "up" {
@@ -162,10 +145,11 @@ func (am *AlertManager) sendStatusAlert(alertStatus string, systemName string, a
 	message := strings.TrimSuffix(title, emoji)

 	// Get system ID for the link
-	systemID := alertRecord.GetString("system")
+	systemID := alertData.SystemID

 	return am.SendAlert(AlertMessageData{
-		UserID:   alertRecord.GetString("user"),
+		UserID:   alertData.UserID,
+		SystemID: systemID,
 		Title:    title,
 		Message:  message,
 		Link:     am.hub.MakeLink("system", systemID),
@@ -173,8 +157,8 @@ func (am *AlertManager) sendStatusAlert(alertStatus string, systemName string, a
 	})
 }

-// resolveStatusAlerts resolves any status alerts that weren't resolved
-// when system came up (https://github.com/henrygd/beszel/issues/1052)
+// resolveStatusAlerts resolves any triggered status alerts that weren't resolved
+// when system came up (https://github.com/henrygd/beszel/issues/1052).
 func resolveStatusAlerts(app core.App) error {
 	db := app.DB()
 	// Find all active status alerts where the system is actually up
@@ -204,3 +188,40 @@ func resolveStatusAlerts(app core.App) error {
 	}
 	return nil
 }
+
+// restorePendingStatusAlerts re-queues untriggered status alerts for systems that
+// are still down after a hub restart. This rebuilds the lost in-memory timer state.
+func (am *AlertManager) restorePendingStatusAlerts() error {
+	type pendingStatusAlert struct {
+		AlertID    string `db:"alert_id"`
+		SystemID   string `db:"system_id"`
+		SystemName string `db:"system_name"`
+	}
+
+	var pending []pendingStatusAlert
+	err := am.hub.DB().NewQuery(`
+		SELECT a.id AS alert_id, a.system AS system_id, s.name AS system_name
+		FROM alerts a
+		JOIN systems s ON a.system = s.id
+		WHERE a.name = 'Status'
+		AND a.triggered = false
+		AND s.status = 'down'
+	`).All(&pending)
+	if err != nil {
+		return err
+	}
+
+	// Make sure cache is populated before trying to restore pending alerts
+	_ = am.alertsCache.PopulateFromDB(false)
+
+	for _, item := range pending {
+		alertData, ok := am.alertsCache.GetAlert(item.SystemID, item.AlertID)
+		if !ok {
+			continue
+		}
+		min := max(1, int(alertData.Min))
+		am.schedulePendingStatusAlert(item.SystemName, alertData, time.Duration(min)*time.Minute)
+	}
+
+	return nil
+}
--- a/internal/alerts/alerts_status_test.go
+++ b/internal/alerts/alerts_status_test.go
@@ -0,0 +1,943 @@
+//go:build testing
+
+package alerts_test
+
+import (
+	"testing"
+	"testing/synctest"
+	"time"
+
+	"github.com/henrygd/beszel/internal/alerts"
+	beszelTests "github.com/henrygd/beszel/internal/tests"
+	"github.com/pocketbase/dbx"
+	"github.com/pocketbase/pocketbase/core"
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+)
+
+func setStatusAlertEmail(t *testing.T, hub core.App, userID, email string) {
+	t.Helper()
+
+	userSettings, err := hub.FindFirstRecordByFilter("user_settings", "user={:user}", map[string]any{"user": userID})
+	require.NoError(t, err)
+
+	userSettings.Set("settings", map[string]any{
+		"emails":   []string{email},
+		"webhooks": []string{},
+	})
+	require.NoError(t, hub.Save(userSettings))
+}
+
+func TestStatusAlerts(t *testing.T) {
+	synctest.Test(t, func(t *testing.T) {
+		hub, user := beszelTests.GetHubWithUser(t)
+		defer hub.Cleanup()
+
+		systems, err := beszelTests.CreateSystems(hub, 4, user.Id, "paused")
+		assert.NoError(t, err)
+
+		var alerts []*core.Record
+		for i, system := range systems {
+			alert, err := beszelTests.CreateRecord(hub, "alerts", map[string]any{
+				"name":   "Status",
+				"system": system.Id,
+				"user":   user.Id,
+				"min":    i + 1,
+			})
+			assert.NoError(t, err)
+			alerts = append(alerts, alert)
+		}
+
+		time.Sleep(10 * time.Millisecond)
+
+		for _, alert := range alerts {
+			assert.False(t, alert.GetBool("triggered"), "Alert should not be triggered immediately")
+		}
+		if hub.TestMailer.TotalSend() != 0 {
+			assert.Zero(t, hub.TestMailer.TotalSend(), "Expected 0 messages, got %d", hub.TestMailer.TotalSend())
+		}
+		for _, system := range systems {
+			assert.EqualValues(t, "paused", system.GetString("status"), "System should be paused")
+		}
+		for _, system := range systems {
+			system.Set("status", "up")
+			err = hub.SaveNoValidate(system)
+			assert.NoError(t, err)
+		}
+		time.Sleep(time.Second)
+		assert.EqualValues(t, 0, hub.GetPendingAlertsCount(), "should have 0 alerts in the pendingAlerts map")
+		for _, system := range systems {
+			system.Set("status", "down")
+			err = hub.SaveNoValidate(system)
+			assert.NoError(t, err)
+		}
+		// after 30 seconds, should have 4 alerts in the pendingAlerts map, no triggered alerts
+		time.Sleep(time.Second * 30)
+		assert.EqualValues(t, 4, hub.GetPendingAlertsCount(), "should have 4 alerts in the pendingAlerts map")
+		triggeredCount, err := hub.CountRecords("alerts", dbx.HashExp{"triggered": true})
+		assert.NoError(t, err)
+		assert.EqualValues(t, 0, triggeredCount, "should have 0 alert triggered")
+		assert.EqualValues(t, 0, hub.TestMailer.TotalSend(), "should have 0 messages sent")
+		// after 1:30 seconds, should have 1 triggered alert and 3 pending alerts
+		time.Sleep(time.Second * 60)
+		assert.EqualValues(t, 3, hub.GetPendingAlertsCount(), "should have 3 alerts in the pendingAlerts map")
+		triggeredCount, err = hub.CountRecords("alerts", dbx.HashExp{"triggered": true})
+		assert.NoError(t, err)
+		assert.EqualValues(t, 1, triggeredCount, "should have 1 alert triggered")
+		assert.EqualValues(t, 1, hub.TestMailer.TotalSend(), "should have 1 messages sent")
+		// after 2:30 seconds, should have 2 triggered alerts and 2 pending alerts
+		time.Sleep(time.Second * 60)
+		assert.EqualValues(t, 2, hub.GetPendingAlertsCount(), "should have 2 alerts in the pendingAlerts map")
+		triggeredCount, err = hub.CountRecords("alerts", dbx.HashExp{"triggered": true})
+		assert.NoError(t, err)
+		assert.EqualValues(t, 2, triggeredCount, "should have 2 alert triggered")
+		assert.EqualValues(t, 2, hub.TestMailer.TotalSend(), "should have 2 messages sent")
+		// now we will bring the remaning systems back up
+		for _, system := range systems {
+			system.Set("status", "up")
+			err = hub.SaveNoValidate(system)
+			assert.NoError(t, err)
+		}
+		time.Sleep(time.Second)
+		// should have 0 alerts in the pendingAlerts map and 0 alerts triggered
+		assert.EqualValues(t, 0, hub.GetPendingAlertsCount(), "should have 0 alerts in the pendingAlerts map")
+		triggeredCount, err = hub.CountRecords("alerts", dbx.HashExp{"triggered": true})
+		assert.NoError(t, err)
+		assert.Zero(t, triggeredCount, "should have 0 alert triggered")
+		// 4 messages sent, 2 down alerts and 2 up alerts for first 2 systems
+		assert.EqualValues(t, 4, hub.TestMailer.TotalSend(), "should have 4 messages sent")
+	})
+}
+func TestStatusAlertRecoveryBeforeDeadline(t *testing.T) {
+	hub, user := beszelTests.GetHubWithUser(t)
+	defer hub.Cleanup()
+
+	// Ensure user settings have an email
+	userSettings, _ := hub.FindFirstRecordByFilter("user_settings", "user={:user}", map[string]any{"user": user.Id})
+	userSettings.Set("settings", `{"emails":["test@example.com"],"webhooks":[]}`)
+	hub.Save(userSettings)
+
+	// Initial email count
+	initialEmailCount := hub.TestMailer.TotalSend()
+
+	systemCollection, _ := hub.FindCollectionByNameOrId("systems")
+	system := core.NewRecord(systemCollection)
+	system.Set("name", "test-system")
+	system.Set("status", "up")
+	system.Set("host", "127.0.0.1")
+	system.Set("users", []string{user.Id})
+	hub.Save(system)
+
+	alertCollection, _ := hub.FindCollectionByNameOrId("alerts")
+	alert := core.NewRecord(alertCollection)
+	alert.Set("user", user.Id)
+	alert.Set("system", system.Id)
+	alert.Set("name", "Status")
+	alert.Set("triggered", false)
+	alert.Set("min", 1)
+	hub.Save(alert)
+
+	am := hub.AlertManager
+
+	// 1. System goes down
+	am.HandleStatusAlerts("down", system)
+	assert.Equal(t, 1, am.GetPendingAlertsCount(), "Alert should be scheduled")
+
+	// 2. System goes up BEFORE delay expires
+	// Triggering HandleStatusAlerts("up") SHOULD NOT send an alert.
+	am.HandleStatusAlerts("up", system)
+
+	assert.Equal(t, 0, am.GetPendingAlertsCount(), "Alert should be canceled if system recovers before delay expires")
+
+	// Verify that NO email was sent.
+	assert.Equal(t, initialEmailCount, hub.TestMailer.TotalSend(), "Recovery notification should not be sent if system never went down")
+
+}
+
+func TestStatusAlertNormalRecovery(t *testing.T) {
+	hub, user := beszelTests.GetHubWithUser(t)
+	defer hub.Cleanup()
+
+	// Ensure user settings have an email
+	userSettings, _ := hub.FindFirstRecordByFilter("user_settings", "user={:user}", map[string]any{"user": user.Id})
+	userSettings.Set("settings", `{"emails":["test@example.com"],"webhooks":[]}`)
+	hub.Save(userSettings)
+
+	systemCollection, _ := hub.FindCollectionByNameOrId("systems")
+	system := core.NewRecord(systemCollection)
+	system.Set("name", "test-system")
+	system.Set("status", "up")
+	system.Set("host", "127.0.0.1")
+	system.Set("users", []string{user.Id})
+	hub.Save(system)
+
+	alertCollection, _ := hub.FindCollectionByNameOrId("alerts")
+	alert := core.NewRecord(alertCollection)
+	alert.Set("user", user.Id)
+	alert.Set("system", system.Id)
+	alert.Set("name", "Status")
+	alert.Set("triggered", true) // System was confirmed DOWN
+	hub.Save(alert)
+
+	am := hub.AlertManager
+	initialEmailCount := hub.TestMailer.TotalSend()
+
+	// System goes up
+	am.HandleStatusAlerts("up", system)
+
+	// Verify that an email WAS sent (normal recovery).
+	assert.Equal(t, initialEmailCount+1, hub.TestMailer.TotalSend(), "Recovery notification should be sent if system was triggered as down")
+
+}
+
+func TestHandleStatusAlertsDoesNotSendRecoveryWhileDownIsOnlyPending(t *testing.T) {
+	hub, user := beszelTests.GetHubWithUser(t)
+	defer hub.Cleanup()
+
+	userSettings, err := hub.FindFirstRecordByFilter("user_settings", "user={:user}", map[string]any{"user": user.Id})
+	require.NoError(t, err)
+	userSettings.Set("settings", `{"emails":["test@example.com"],"webhooks":[]}`)
+	require.NoError(t, hub.Save(userSettings))
+
+	systemCollection, err := hub.FindCollectionByNameOrId("systems")
+	require.NoError(t, err)
+	system := core.NewRecord(systemCollection)
+	system.Set("name", "test-system")
+	system.Set("status", "up")
+	system.Set("host", "127.0.0.1")
+	system.Set("users", []string{user.Id})
+	require.NoError(t, hub.Save(system))
+
+	alertCollection, err := hub.FindCollectionByNameOrId("alerts")
+	require.NoError(t, err)
+	alert := core.NewRecord(alertCollection)
+	alert.Set("user", user.Id)
+	alert.Set("system", system.Id)
+	alert.Set("name", "Status")
+	alert.Set("triggered", false)
+	alert.Set("min", 1)
+	require.NoError(t, hub.Save(alert))
+
+	initialEmailCount := hub.TestMailer.TotalSend()
+	am := alerts.NewTestAlertManagerWithoutWorker(hub)
+
+	require.NoError(t, am.HandleStatusAlerts("down", system))
+	assert.Equal(t, 1, am.GetPendingAlertsCount(), "down transition should register a pending alert immediately")
+
+	require.NoError(t, am.HandleStatusAlerts("up", system))
+	assert.Zero(t, am.GetPendingAlertsCount(), "recovery should cancel the pending down alert")
+	assert.Equal(t, initialEmailCount, hub.TestMailer.TotalSend(), "recovery notification should not be sent before a down alert triggers")
+
+	alertRecord, err := hub.FindRecordById("alerts", alert.Id)
+	require.NoError(t, err)
+	assert.False(t, alertRecord.GetBool("triggered"), "alert should remain untriggered when downtime never matured")
+}
+
+func TestStatusAlertTimerCancellationPreventsBoundaryDelivery(t *testing.T) {
+	synctest.Test(t, func(t *testing.T) {
+		hub, user := beszelTests.GetHubWithUser(t)
+		defer hub.Cleanup()
+
+		userSettings, err := hub.FindFirstRecordByFilter("user_settings", "user={:user}", map[string]any{"user": user.Id})
+		require.NoError(t, err)
+		userSettings.Set("settings", `{"emails":["test@example.com"],"webhooks":[]}`)
+		require.NoError(t, hub.Save(userSettings))
+
+		systemCollection, err := hub.FindCollectionByNameOrId("systems")
+		require.NoError(t, err)
+		system := core.NewRecord(systemCollection)
+		system.Set("name", "test-system")
+		system.Set("status", "up")
+		system.Set("host", "127.0.0.1")
+		system.Set("users", []string{user.Id})
+		require.NoError(t, hub.Save(system))
+
+		alertCollection, err := hub.FindCollectionByNameOrId("alerts")
+		require.NoError(t, err)
+		alert := core.NewRecord(alertCollection)
+		alert.Set("user", user.Id)
+		alert.Set("system", system.Id)
+		alert.Set("name", "Status")
+		alert.Set("triggered", false)
+		alert.Set("min", 1)
+		require.NoError(t, hub.Save(alert))
+
+		initialEmailCount := hub.TestMailer.TotalSend()
+		am := alerts.NewTestAlertManagerWithoutWorker(hub)
+
+		require.NoError(t, am.HandleStatusAlerts("down", system))
+		assert.Equal(t, 1, am.GetPendingAlertsCount(), "down transition should register a pending alert immediately")
+		require.True(t, am.ResetPendingAlertTimer(alert.Id, 25*time.Millisecond), "test should shorten the pending alert timer")
+
+		time.Sleep(10 * time.Millisecond)
+		require.NoError(t, am.HandleStatusAlerts("up", system))
+		assert.Zero(t, am.GetPendingAlertsCount(), "recovery should remove the pending alert before the timer callback runs")
+
+		time.Sleep(40 * time.Millisecond)
+		assert.Equal(t, initialEmailCount, hub.TestMailer.TotalSend(), "timer callback should not deliver after recovery cancels the pending alert")
+
+		alertRecord, err := hub.FindRecordById("alerts", alert.Id)
+		require.NoError(t, err)
+		assert.False(t, alertRecord.GetBool("triggered"), "alert should remain untriggered when cancellation wins the timer race")
+
+		time.Sleep(time.Minute)
+		synctest.Wait()
+	})
+}
+
+func TestStatusAlertDownFiresAfterDelayExpires(t *testing.T) {
+	hub, user := beszelTests.GetHubWithUser(t)
+	defer hub.Cleanup()
+
+	userSettings, err := hub.FindFirstRecordByFilter("user_settings", "user={:user}", map[string]any{"user": user.Id})
+	require.NoError(t, err)
+	userSettings.Set("settings", `{"emails":["test@example.com"],"webhooks":[]}`)
+	require.NoError(t, hub.Save(userSettings))
+
+	systemCollection, err := hub.FindCollectionByNameOrId("systems")
+	require.NoError(t, err)
+	system := core.NewRecord(systemCollection)
+	system.Set("name", "test-system")
+	system.Set("status", "up")
+	system.Set("host", "127.0.0.1")
+	system.Set("users", []string{user.Id})
+	require.NoError(t, hub.Save(system))
+
+	alertCollection, err := hub.FindCollectionByNameOrId("alerts")
+	require.NoError(t, err)
+	alert := core.NewRecord(alertCollection)
+	alert.Set("user", user.Id)
+	alert.Set("system", system.Id)
+	alert.Set("name", "Status")
+	alert.Set("triggered", false)
+	alert.Set("min", 1)
+	require.NoError(t, hub.Save(alert))
+
+	initialEmailCount := hub.TestMailer.TotalSend()
+	am := alerts.NewTestAlertManagerWithoutWorker(hub)
+
+	require.NoError(t, am.HandleStatusAlerts("down", system))
+	assert.Equal(t, 1, am.GetPendingAlertsCount(), "alert should be pending after system goes down")
+
+	// Expire the pending alert and process it
+	am.ForceExpirePendingAlerts()
+	processed, err := am.ProcessPendingAlerts()
+	require.NoError(t, err)
+	assert.Len(t, processed, 1, "one alert should have been processed")
+	assert.Equal(t, 0, am.GetPendingAlertsCount(), "pending alert should be consumed after processing")
+
+	// Verify down email was sent
+	assert.Equal(t, initialEmailCount+1, hub.TestMailer.TotalSend(), "down notification should be sent after delay expires")
+
+	// Verify triggered flag is set in the DB
+	alertRecord, err := hub.FindRecordById("alerts", alert.Id)
+	require.NoError(t, err)
+	assert.True(t, alertRecord.GetBool("triggered"), "alert should be marked triggered after downtime matures")
+}
+
+func TestStatusAlertMultipleUsersRespectDifferentMinutes(t *testing.T) {
+	synctest.Test(t, func(t *testing.T) {
+		hub, user1 := beszelTests.GetHubWithUser(t)
+		defer hub.Cleanup()
+
+		setStatusAlertEmail(t, hub, user1.Id, "user1@example.com")
+
+		user2, err := beszelTests.CreateUser(hub, "user2@example.com", "password")
+		require.NoError(t, err)
+		_, err = beszelTests.CreateRecord(hub, "user_settings", map[string]any{
+			"user": user2.Id,
+			"settings": map[string]any{
+				"emails":   []string{"user2@example.com"},
+				"webhooks": []string{},
+			},
+		})
+		require.NoError(t, err)
+
+		system, err := beszelTests.CreateRecord(hub, "systems", map[string]any{
+			"name":  "shared-system",
+			"users": []string{user1.Id, user2.Id},
+			"host":  "127.0.0.1",
+		})
+		require.NoError(t, err)
+		system.Set("status", "up")
+		require.NoError(t, hub.SaveNoValidate(system))
+
+		alertUser1, err := beszelTests.CreateRecord(hub, "alerts", map[string]any{
+			"name":   "Status",
+			"system": system.Id,
+			"user":   user1.Id,
+			"min":    1,
+		})
+		require.NoError(t, err)
+		alertUser2, err := beszelTests.CreateRecord(hub, "alerts", map[string]any{
+			"name":   "Status",
+			"system": system.Id,
+			"user":   user2.Id,
+			"min":    2,
+		})
+		require.NoError(t, err)
+
+		time.Sleep(10 * time.Millisecond)
+
+		system.Set("status", "down")
+		require.NoError(t, hub.SaveNoValidate(system))
+
+		assert.Equal(t, 2, hub.GetPendingAlertsCount(), "both user alerts should be pending after the system goes down")
+
+		time.Sleep(59 * time.Second)
+		synctest.Wait()
+		assert.Zero(t, hub.TestMailer.TotalSend(), "no messages should be sent before the earliest alert minute elapses")
+
+		time.Sleep(2 * time.Second)
+		synctest.Wait()
+
+		messages := hub.TestMailer.Messages()
+		require.Len(t, messages, 1, "only the first user's alert should send after one minute")
+		require.Len(t, messages[0].To, 1)
+		assert.Equal(t, "user1@example.com", messages[0].To[0].Address)
+		assert.Contains(t, messages[0].Subject, "Connection to shared-system is down")
+		assert.Equal(t, 1, hub.GetPendingAlertsCount(), "the later user alert should still be pending")
+
+		time.Sleep(58 * time.Second)
+		synctest.Wait()
+		assert.Equal(t, 1, hub.TestMailer.TotalSend(), "the second user's alert should still be waiting before two minutes")
+
+		time.Sleep(2 * time.Second)
+		synctest.Wait()
+
+		messages = hub.TestMailer.Messages()
+		require.Len(t, messages, 2, "both users should eventually receive their own status alert")
+		require.Len(t, messages[1].To, 1)
+		assert.Equal(t, "user2@example.com", messages[1].To[0].Address)
+		assert.Contains(t, messages[1].Subject, "Connection to shared-system is down")
+		assert.Zero(t, hub.GetPendingAlertsCount(), "all pending alerts should be consumed after both timers fire")
+
+		alertUser1, err = hub.FindRecordById("alerts", alertUser1.Id)
+		require.NoError(t, err)
+		assert.True(t, alertUser1.GetBool("triggered"), "user1 alert should be marked triggered after delivery")
+
+		alertUser2, err = hub.FindRecordById("alerts", alertUser2.Id)
+		require.NoError(t, err)
+		assert.True(t, alertUser2.GetBool("triggered"), "user2 alert should be marked triggered after delivery")
+	})
+}
+
+func TestStatusAlertMultipleUsersRecoveryBetweenMinutesOnlyAlertsEarlierUser(t *testing.T) {
+	synctest.Test(t, func(t *testing.T) {
+		hub, user1 := beszelTests.GetHubWithUser(t)
+		defer hub.Cleanup()
+
+		setStatusAlertEmail(t, hub, user1.Id, "user1@example.com")
+
+		user2, err := beszelTests.CreateUser(hub, "user2@example.com", "password")
+		require.NoError(t, err)
+		_, err = beszelTests.CreateRecord(hub, "user_settings", map[string]any{
+			"user": user2.Id,
+			"settings": map[string]any{
+				"emails":   []string{"user2@example.com"},
+				"webhooks": []string{},
+			},
+		})
+		require.NoError(t, err)
+
+		system, err := beszelTests.CreateRecord(hub, "systems", map[string]any{
+			"name":  "shared-system",
+			"users": []string{user1.Id, user2.Id},
+			"host":  "127.0.0.1",
+		})
+		require.NoError(t, err)
+		system.Set("status", "up")
+		require.NoError(t, hub.SaveNoValidate(system))
+
+		alertUser1, err := beszelTests.CreateRecord(hub, "alerts", map[string]any{
+			"name":   "Status",
+			"system": system.Id,
+			"user":   user1.Id,
+			"min":    1,
+		})
+		require.NoError(t, err)
+		alertUser2, err := beszelTests.CreateRecord(hub, "alerts", map[string]any{
+			"name":   "Status",
+			"system": system.Id,
+			"user":   user2.Id,
+			"min":    2,
+		})
+		require.NoError(t, err)
+
+		time.Sleep(10 * time.Millisecond)
+
+		system.Set("status", "down")
+		require.NoError(t, hub.SaveNoValidate(system))
+
+		time.Sleep(61 * time.Second)
+		synctest.Wait()
+
+		messages := hub.TestMailer.Messages()
+		require.Len(t, messages, 1, "the first user's down alert should send before recovery")
+		require.Len(t, messages[0].To, 1)
+		assert.Equal(t, "user1@example.com", messages[0].To[0].Address)
+		assert.Contains(t, messages[0].Subject, "Connection to shared-system is down")
+		assert.Equal(t, 1, hub.GetPendingAlertsCount(), "the second user's alert should still be pending")
+
+		system.Set("status", "up")
+		require.NoError(t, hub.SaveNoValidate(system))
+
+		time.Sleep(time.Second)
+		synctest.Wait()
+
+		messages = hub.TestMailer.Messages()
+		require.Len(t, messages, 2, "recovery should notify only the user whose down alert had already triggered")
+		for _, message := range messages {
+			require.Len(t, message.To, 1)
+			assert.Equal(t, "user1@example.com", message.To[0].Address)
+		}
+		assert.Contains(t, messages[1].Subject, "Connection to shared-system is up")
+		assert.Zero(t, hub.GetPendingAlertsCount(), "recovery should cancel the later user's pending alert")
+
+		time.Sleep(61 * time.Second)
+		synctest.Wait()
+
+		messages = hub.TestMailer.Messages()
+		require.Len(t, messages, 2, "user2 should never receive a down alert once recovery cancels the pending timer")
+
+		alertUser1, err = hub.FindRecordById("alerts", alertUser1.Id)
+		require.NoError(t, err)
+		assert.False(t, alertUser1.GetBool("triggered"), "user1 alert should be cleared after recovery")
+
+		alertUser2, err = hub.FindRecordById("alerts", alertUser2.Id)
+		require.NoError(t, err)
+		assert.False(t, alertUser2.GetBool("triggered"), "user2 alert should remain untriggered because it never fired")
+	})
+}
+
+func TestStatusAlertDuplicateDownCallIsIdempotent(t *testing.T) {
+	hub, user := beszelTests.GetHubWithUser(t)
+	defer hub.Cleanup()
+
+	userSettings, err := hub.FindFirstRecordByFilter("user_settings", "user={:user}", map[string]any{"user": user.Id})
+	require.NoError(t, err)
+	userSettings.Set("settings", `{"emails":["test@example.com"],"webhooks":[]}`)
+	require.NoError(t, hub.Save(userSettings))
+
+	systemCollection, err := hub.FindCollectionByNameOrId("systems")
+	require.NoError(t, err)
+	system := core.NewRecord(systemCollection)
+	system.Set("name", "test-system")
+	system.Set("status", "up")
+	system.Set("host", "127.0.0.1")
+	system.Set("users", []string{user.Id})
+	require.NoError(t, hub.Save(system))
+
+	alertCollection, err := hub.FindCollectionByNameOrId("alerts")
+	require.NoError(t, err)
+	alert := core.NewRecord(alertCollection)
+	alert.Set("user", user.Id)
+	alert.Set("system", system.Id)
+	alert.Set("name", "Status")
+	alert.Set("triggered", false)
+	alert.Set("min", 5)
+	require.NoError(t, hub.Save(alert))
+
+	am := alerts.NewTestAlertManagerWithoutWorker(hub)
+
+	require.NoError(t, am.HandleStatusAlerts("down", system))
+	require.NoError(t, am.HandleStatusAlerts("down", system))
+	require.NoError(t, am.HandleStatusAlerts("down", system))
+
+	assert.Equal(t, 1, am.GetPendingAlertsCount(), "repeated down calls should not schedule duplicate pending alerts")
+}
+
+func TestStatusAlertNoAlertRecord(t *testing.T) {
+	hub, user := beszelTests.GetHubWithUser(t)
+	defer hub.Cleanup()
+
+	systemCollection, err := hub.FindCollectionByNameOrId("systems")
+	require.NoError(t, err)
+	system := core.NewRecord(systemCollection)
+	system.Set("name", "test-system")
+	system.Set("status", "up")
+	system.Set("host", "127.0.0.1")
+	system.Set("users", []string{user.Id})
+	require.NoError(t, hub.Save(system))
+
+	// No Status alert record created for this system
+	initialEmailCount := hub.TestMailer.TotalSend()
+	am := alerts.NewTestAlertManagerWithoutWorker(hub)
+
+	require.NoError(t, am.HandleStatusAlerts("down", system))
+	assert.Equal(t, 0, am.GetPendingAlertsCount(), "no pending alert when no alert record exists")
+
+	require.NoError(t, am.HandleStatusAlerts("up", system))
+	assert.Equal(t, initialEmailCount, hub.TestMailer.TotalSend(), "no email when no alert record exists")
+}
+
+func TestRestorePendingStatusAlertsRequeuesDownSystemsAfterRestart(t *testing.T) {
+	hub, user := beszelTests.GetHubWithUser(t)
+	defer hub.Cleanup()
+
+	userSettings, err := hub.FindFirstRecordByFilter("user_settings", "user={:user}", map[string]any{"user": user.Id})
+	require.NoError(t, err)
+	userSettings.Set("settings", `{"emails":["test@example.com"],"webhooks":[]}`)
+	require.NoError(t, hub.Save(userSettings))
+
+	systems, err := beszelTests.CreateSystems(hub, 1, user.Id, "down")
+	require.NoError(t, err)
+	system := systems[0]
+
+	alertCollection, err := hub.FindCollectionByNameOrId("alerts")
+	require.NoError(t, err)
+	alert := core.NewRecord(alertCollection)
+	alert.Set("user", user.Id)
+	alert.Set("system", system.Id)
+	alert.Set("name", "Status")
+	alert.Set("triggered", false)
+	alert.Set("min", 1)
+	require.NoError(t, hub.Save(alert))
+
+	initialEmailCount := hub.TestMailer.TotalSend()
+	am := alerts.NewTestAlertManagerWithoutWorker(hub)
+
+	require.NoError(t, am.RestorePendingStatusAlerts())
+	assert.Equal(t, 1, am.GetPendingAlertsCount(), "startup restore should requeue a pending down alert for a system still marked down")
+
+	am.ForceExpirePendingAlerts()
+	processed, err := am.ProcessPendingAlerts()
+	require.NoError(t, err)
+	assert.Len(t, processed, 1, "restored pending alert should be processable after the delay expires")
+	assert.Equal(t, initialEmailCount+1, hub.TestMailer.TotalSend(), "restored pending alert should send the down notification")
+
+	alertRecord, err := hub.FindRecordById("alerts", alert.Id)
+	require.NoError(t, err)
+	assert.True(t, alertRecord.GetBool("triggered"), "restored pending alert should mark the alert as triggered once delivered")
+}
+
+func TestRestorePendingStatusAlertsSkipsNonDownOrAlreadyTriggeredAlerts(t *testing.T) {
+	hub, user := beszelTests.GetHubWithUser(t)
+	defer hub.Cleanup()
+
+	systemsDown, err := beszelTests.CreateSystems(hub, 2, user.Id, "down")
+	require.NoError(t, err)
+	systemDownPending := systemsDown[0]
+	systemDownTriggered := systemsDown[1]
+
+	systemUp, err := beszelTests.CreateRecord(hub, "systems", map[string]any{
+		"name":   "up-system",
+		"users":  []string{user.Id},
+		"host":   "127.0.0.2",
+		"status": "up",
+	})
+	require.NoError(t, err)
+
+	_, err = beszelTests.CreateRecord(hub, "alerts", map[string]any{
+		"name":      "Status",
+		"system":    systemDownPending.Id,
+		"user":      user.Id,
+		"min":       1,
+		"triggered": false,
+	})
+	require.NoError(t, err)
+
+	_, err = beszelTests.CreateRecord(hub, "alerts", map[string]any{
+		"name":      "Status",
+		"system":    systemUp.Id,
+		"user":      user.Id,
+		"min":       1,
+		"triggered": false,
+	})
+	require.NoError(t, err)
+
+	_, err = beszelTests.CreateRecord(hub, "alerts", map[string]any{
+		"name":      "Status",
+		"system":    systemDownTriggered.Id,
+		"user":      user.Id,
+		"min":       1,
+		"triggered": true,
+	})
+	require.NoError(t, err)
+
+	am := alerts.NewTestAlertManagerWithoutWorker(hub)
+	require.NoError(t, am.RestorePendingStatusAlerts())
+	assert.Equal(t, 1, am.GetPendingAlertsCount(), "only untriggered alerts for currently down systems should be restored")
+}
+
+func TestRestorePendingStatusAlertsIsIdempotent(t *testing.T) {
+	hub, user := beszelTests.GetHubWithUser(t)
+	defer hub.Cleanup()
+
+	systems, err := beszelTests.CreateSystems(hub, 1, user.Id, "down")
+	require.NoError(t, err)
+	system := systems[0]
+
+	_, err = beszelTests.CreateRecord(hub, "alerts", map[string]any{
+		"name":      "Status",
+		"system":    system.Id,
+		"user":      user.Id,
+		"min":       1,
+		"triggered": false,
+	})
+	require.NoError(t, err)
+
+	am := alerts.NewTestAlertManagerWithoutWorker(hub)
+	require.NoError(t, am.RestorePendingStatusAlerts())
+	require.NoError(t, am.RestorePendingStatusAlerts())
+
+	assert.Equal(t, 1, am.GetPendingAlertsCount(), "restoring twice should not create duplicate pending alerts")
+	am.ForceExpirePendingAlerts()
+	processed, err := am.ProcessPendingAlerts()
+	require.NoError(t, err)
+	assert.Len(t, processed, 1, "restored alert should still be processable exactly once")
+	assert.Zero(t, am.GetPendingAlertsCount(), "processing the restored alert should empty the pending map")
+}
+
+func TestResolveStatusAlertsFixesStaleTriggered(t *testing.T) {
+	hub, user := beszelTests.GetHubWithUser(t)
+	defer hub.Cleanup()
+
+	// CreateSystems uses SaveNoValidate after initial save to bypass the
+	// onRecordCreate hook that forces status = "pending".
+	systems, err := beszelTests.CreateSystems(hub, 1, user.Id, "up")
+	require.NoError(t, err)
+	system := systems[0]
+
+	alertCollection, err := hub.FindCollectionByNameOrId("alerts")
+	require.NoError(t, err)
+	alert := core.NewRecord(alertCollection)
+	alert.Set("user", user.Id)
+	alert.Set("system", system.Id)
+	alert.Set("name", "Status")
+	alert.Set("triggered", true) // Stale: system is up but alert still says triggered
+	require.NoError(t, hub.Save(alert))
+
+	// resolveStatusAlerts should clear the stale triggered flag
+	require.NoError(t, alerts.ResolveStatusAlerts(hub))
+
+	alertRecord, err := hub.FindRecordById("alerts", alert.Id)
+	require.NoError(t, err)
+	assert.False(t, alertRecord.GetBool("triggered"), "stale triggered flag should be cleared when system is up")
+}
+func TestResolveStatusAlerts(t *testing.T) {
+	hub, user := beszelTests.GetHubWithUser(t)
+	defer hub.Cleanup()
+
+	// Create a systemUp
+	systemUp, err := beszelTests.CreateRecord(hub, "systems", map[string]any{
+		"name":   "test-system",
+		"users":  []string{user.Id},
+		"host":   "127.0.0.1",
+		"status": "up",
+	})
+	assert.NoError(t, err)
+
+	systemDown, err := beszelTests.CreateRecord(hub, "systems", map[string]any{
+		"name":   "test-system-2",
+		"users":  []string{user.Id},
+		"host":   "127.0.0.2",
+		"status": "up",
+	})
+	assert.NoError(t, err)
+
+	// Create a status alertUp for the system
+	alertUp, err := beszelTests.CreateRecord(hub, "alerts", map[string]any{
+		"name":   "Status",
+		"system": systemUp.Id,
+		"user":   user.Id,
+		"min":    1,
+	})
+	assert.NoError(t, err)
+
+	alertDown, err := beszelTests.CreateRecord(hub, "alerts", map[string]any{
+		"name":   "Status",
+		"system": systemDown.Id,
+		"user":   user.Id,
+		"min":    1,
+	})
+	assert.NoError(t, err)
+
+	// Verify alert is not triggered initially
+	assert.False(t, alertUp.GetBool("triggered"), "Alert should not be triggered initially")
+
+	// Set the system to 'up' (this should not trigger the alert)
+	systemUp.Set("status", "up")
+	err = hub.SaveNoValidate(systemUp)
+	assert.NoError(t, err)
+
+	systemDown.Set("status", "down")
+	err = hub.SaveNoValidate(systemDown)
+	assert.NoError(t, err)
+
+	// Wait a moment for any processing
+	time.Sleep(10 * time.Millisecond)
+
+	// Verify alertUp is still not triggered after setting system to up
+	alertUp, err = hub.FindFirstRecordByFilter("alerts", "id={:id}", dbx.Params{"id": alertUp.Id})
+	assert.NoError(t, err)
+	assert.False(t, alertUp.GetBool("triggered"), "Alert should not be triggered when system is up")
+
+	// Manually set both alerts triggered to true
+	alertUp.Set("triggered", true)
+	err = hub.SaveNoValidate(alertUp)
+	assert.NoError(t, err)
+	alertDown.Set("triggered", true)
+	err = hub.SaveNoValidate(alertDown)
+	assert.NoError(t, err)
+
+	// Verify we have exactly one alert with triggered true
+	triggeredCount, err := hub.CountRecords("alerts", dbx.HashExp{"triggered": true})
+	assert.NoError(t, err)
+	assert.EqualValues(t, 2, triggeredCount, "Should have exactly two alerts with triggered true")
+
+	// Verify the specific alertUp is triggered
+	alertUp, err = hub.FindFirstRecordByFilter("alerts", "id={:id}", dbx.Params{"id": alertUp.Id})
+	assert.NoError(t, err)
+	assert.True(t, alertUp.GetBool("triggered"), "Alert should be triggered")
+
+	// Verify we have two unresolved alert history records
+	alertHistoryCount, err := hub.CountRecords("alerts_history", dbx.HashExp{"resolved": ""})
+	assert.NoError(t, err)
+	assert.EqualValues(t, 2, alertHistoryCount, "Should have exactly two unresolved alert history records")
+
+	err = alerts.ResolveStatusAlerts(hub)
+	assert.NoError(t, err)
+
+	// Verify alertUp is not triggered after resolving
+	alertUp, err = hub.FindFirstRecordByFilter("alerts", "id={:id}", dbx.Params{"id": alertUp.Id})
+	assert.NoError(t, err)
+	assert.False(t, alertUp.GetBool("triggered"), "Alert should not be triggered after resolving")
+	// Verify alertDown is still triggered
+	alertDown, err = hub.FindFirstRecordByFilter("alerts", "id={:id}", dbx.Params{"id": alertDown.Id})
+	assert.NoError(t, err)
+	assert.True(t, alertDown.GetBool("triggered"), "Alert should still be triggered after resolving")
+
+	// Verify we have one unresolved alert history record
+	alertHistoryCount, err = hub.CountRecords("alerts_history", dbx.HashExp{"resolved": ""})
+	assert.NoError(t, err)
+	assert.EqualValues(t, 1, alertHistoryCount, "Should have exactly one unresolved alert history record")
+
+}
+
+func TestAlertsHistoryStatus(t *testing.T) {
+	synctest.Test(t, func(t *testing.T) {
+		hub, user := beszelTests.GetHubWithUser(t)
+		defer hub.Cleanup()
+
+		// Create a system
+		systems, err := beszelTests.CreateSystems(hub, 1, user.Id, "up")
+		assert.NoError(t, err)
+		system := systems[0]
+
+		// Create a status alertRecord for the system
+		alertRecord, err := beszelTests.CreateRecord(hub, "alerts", map[string]any{
+			"name":   "Status",
+			"system": system.Id,
+			"user":   user.Id,
+			"min":    1,
+		})
+		assert.NoError(t, err)
+
+		// Verify alert is not triggered initially
+		assert.False(t, alertRecord.GetBool("triggered"), "Alert should not be triggered initially")
+
+		// Set the system to 'down' (this should trigger the alert)
+		system.Set("status", "down")
+		err = hub.Save(system)
+		assert.NoError(t, err)
+
+		time.Sleep(time.Second * 30)
+		synctest.Wait()
+
+		alertFresh, _ := hub.FindRecordById("alerts", alertRecord.Id)
+		assert.False(t, alertFresh.GetBool("triggered"), "Alert should not be triggered after 30 seconds")
+
+		time.Sleep(time.Minute)
+		synctest.Wait()
+
+		// Verify alert is triggered after setting system to down
+		alertFresh, err = hub.FindRecordById("alerts", alertRecord.Id)
+		assert.NoError(t, err)
+		assert.True(t, alertFresh.GetBool("triggered"), "Alert should be triggered after one minute")
+
+		// Verify we have one unresolved alert history record
+		alertHistoryCount, err := hub.CountRecords("alerts_history", dbx.HashExp{"resolved": ""})
+		assert.NoError(t, err)
+		assert.EqualValues(t, 1, alertHistoryCount, "Should have exactly one unresolved alert history record")
+
+		// Set the system back to 'up' (this should resolve the alert)
+		system.Set("status", "up")
+		err = hub.Save(system)
+		assert.NoError(t, err)
+
+		time.Sleep(time.Second)
+		synctest.Wait()
+
+		// Verify alert is not triggered after setting system back to up
+		alertFresh, err = hub.FindRecordById("alerts", alertRecord.Id)
+		assert.NoError(t, err)
+		assert.False(t, alertFresh.GetBool("triggered"), "Alert should not be triggered after system recovers")
+
+		// Verify the alert history record is resolved
+		alertHistoryCount, err = hub.CountRecords("alerts_history", dbx.HashExp{"resolved": ""})
+		assert.NoError(t, err)
+		assert.EqualValues(t, 0, alertHistoryCount, "Should have no unresolved alert history records")
+	})
+}
+
+func TestStatusAlertClearedBeforeSend(t *testing.T) {
+	synctest.Test(t, func(t *testing.T) {
+		hub, user := beszelTests.GetHubWithUser(t)
+		defer hub.Cleanup()
+
+		// Create a system
+		systems, err := beszelTests.CreateSystems(hub, 1, user.Id, "up")
+		assert.NoError(t, err)
+		system := systems[0]
+
+		// Ensure user settings have an email
+		userSettings, _ := hub.FindFirstRecordByFilter("user_settings", "user={:user}", map[string]any{"user": user.Id})
+		userSettings.Set("settings", `{"emails":["test@example.com"],"webhooks":[]}`)
+		hub.Save(userSettings)
+
+		// Initial email count
+		initialEmailCount := hub.TestMailer.TotalSend()
+
+		// Create a status alertRecord for the system
+		alertRecord, err := beszelTests.CreateRecord(hub, "alerts", map[string]any{
+			"name":   "Status",
+			"system": system.Id,
+			"user":   user.Id,
+			"min":    1,
+		})
+		assert.NoError(t, err)
+
+		// Verify alert is not triggered initially
+		assert.False(t, alertRecord.GetBool("triggered"), "Alert should not be triggered initially")
+
+		// Set the system to 'down' (this should trigger the alert)
+		system.Set("status", "down")
+		err = hub.Save(system)
+		assert.NoError(t, err)
+
+		time.Sleep(time.Second * 30)
+		synctest.Wait()
+
+		// Set system back up to clear the pending alert before it triggers
+		system.Set("status", "up")
+		err = hub.Save(system)
+		assert.NoError(t, err)
+
+		time.Sleep(time.Minute)
+		synctest.Wait()
+
+		// Verify that we have not sent any emails since the system recovered before the alert triggered
+		assert.Equal(t, initialEmailCount, hub.TestMailer.TotalSend(), "No email should be sent if system recovers before alert triggers")
+
+		// Verify alert is not triggered after setting system back to up
+		alertFresh, err := hub.FindRecordById("alerts", alertRecord.Id)
+		assert.NoError(t, err)
+		assert.False(t, alertFresh.GetBool("triggered"), "Alert should not be triggered after system recovers")
+
+		// Verify that no alert history record was created since the alert never triggered
+		alertHistoryCount, err := hub.CountRecords("alerts_history")
+		assert.NoError(t, err)
+		assert.EqualValues(t, 0, alertHistoryCount, "Should have no unresolved alert history records since alert never triggered")
+	})
+}
--- a/internal/alerts/alerts_system.go
+++ b/internal/alerts/alerts_system.go
@@ -11,15 +11,11 @@ import (
 	"github.com/pocketbase/dbx"
 	"github.com/pocketbase/pocketbase/core"
 	"github.com/pocketbase/pocketbase/tools/types"
-	"github.com/spf13/cast"
 )

 func (am *AlertManager) HandleSystemAlerts(systemRecord *core.Record, data *system.CombinedData) error {
-	alertRecords, err := am.hub.FindAllRecords("alerts",
-		dbx.NewExp("system={:system} AND name!='Status'", dbx.Params{"system": systemRecord.Id}),
-	)
-	if err != nil || len(alertRecords) == 0 {
-		// log.Println("no alerts found for system")
+	alerts := am.alertsCache.GetAlertsExcludingNames(systemRecord.Id, "Status")
+	if len(alerts) == 0 {
 		return nil
 	}

@@ -27,8 +23,8 @@ func (am *AlertManager) HandleSystemAlerts(systemRecord *core.Record, data *syst
 	now := systemRecord.GetDateTime("updated").Time().UTC()
 	oldestTime := now

-	for _, alertRecord := range alertRecords {
-		name := alertRecord.GetString("name")
+	for _, alertData := range alerts {
+		name := alertData.Name
 		var val float64
 		unit := "%"

@@ -38,7 +34,7 @@ func (am *AlertManager) HandleSystemAlerts(systemRecord *core.Record, data *syst
 		case "Memory":
 			val = data.Info.MemPct
 		case "Bandwidth":
-			val = data.Info.Bandwidth
+			val = float64(data.Info.BandwidthBytes) / (1024 * 1024)
 			unit = " MB/s"
 		case "Disk":
 			maxUsedPct := data.Info.DiskPct
@@ -66,24 +62,37 @@ func (am *AlertManager) HandleSystemAlerts(systemRecord *core.Record, data *syst
 			unit = ""
 		case "GPU":
 			val = data.Info.GpuPct
+		case "Battery":
+			if data.Stats.Battery[0] == 0 {
+				continue
+			}
+			val = float64(data.Stats.Battery[0])
 		}

-		triggered := alertRecord.GetBool("triggered")
-		threshold := alertRecord.GetFloat("value")
+		triggered := alertData.Triggered
+		threshold := alertData.Value
+
+		// Battery alert has inverted logic: trigger when value is BELOW threshold
+		lowAlert := isLowAlert(name)

 		// CONTINUE
-		// IF alert is not triggered and curValue is less than threshold
-		// OR alert is triggered and curValue is greater than threshold
-		if (!triggered && val <= threshold) || (triggered && val > threshold) {
-			// log.Printf("Skipping alert %s: val %f | threshold %f | triggered %v\n", name, val, threshold, triggered)
-			continue
+		// For normal alerts: IF not triggered and curValue <= threshold, OR triggered and curValue > threshold
+		// For low alerts (Battery): IF not triggered and curValue >= threshold, OR triggered and curValue < threshold
+		if lowAlert {
+			if (!triggered && val >= threshold) || (triggered && val < threshold) {
+				continue
+			}
+		} else {
+			if (!triggered && val <= threshold) || (triggered && val > threshold) {
+				continue
+			}
 		}

-		min := max(1, cast.ToUint8(alertRecord.Get("min")))
+		min := max(1, alertData.Min)

 		alert := SystemAlertData{
 			systemRecord: systemRecord,
-			alertRecord:  alertRecord,
+			alertData:    alertData,
 			name:         name,
 			unit:         unit,
 			val:          val,
@@ -94,7 +103,11 @@ func (am *AlertManager) HandleSystemAlerts(systemRecord *core.Record, data *syst

 		// send alert immediately if min is 1 - no need to sum up values.
 		if min == 1 {
-			alert.triggered = val > threshold
+			if lowAlert {
+				alert.triggered = val < threshold
+			} else {
+				alert.triggered = val > threshold
+			}
 			go am.sendSystemAlert(alert)
 			continue
 		}
@@ -112,7 +125,7 @@ func (am *AlertManager) HandleSystemAlerts(systemRecord *core.Record, data *syst
 		Created types.DateTime `db:"created"`
 	}{}

-	err = am.hub.DB().
+	err := am.hub.DB().
 		Select("stats", "created").
 		From("system_stats").
 		Where(dbx.NewExp(
@@ -175,22 +188,24 @@ func (am *AlertManager) HandleSystemAlerts(systemRecord *core.Record, data *syst
 			case "Memory":
 				alert.val += stats.Mem
 			case "Bandwidth":
-				alert.val += stats.NetSent + stats.NetRecv
+				alert.val += float64(stats.Bandwidth[0]+stats.Bandwidth[1]) / (1024 * 1024)
 			case "Disk":
 				if alert.mapSums == nil {
-					alert.mapSums = make(map[string]float32, len(data.Stats.ExtraFs)+1)
+					alert.mapSums = make(map[string]float32, len(stats.ExtraFs)+1)
 				}
 				// add root disk
 				if _, ok := alert.mapSums["root"]; !ok {
 					alert.mapSums["root"] = 0.0
 				}
 				alert.mapSums["root"] += float32(stats.Disk)
-				// add extra disks
-				for key, fs := range data.Stats.ExtraFs {
-					if _, ok := alert.mapSums[key]; !ok {
-						alert.mapSums[key] = 0.0
+				// add extra disks from historical record
+				for key, fs := range stats.ExtraFs {
+					if fs.DiskTotal > 0 {
+						if _, ok := alert.mapSums[key]; !ok {
+							alert.mapSums[key] = 0.0
+						}
+						alert.mapSums[key] += float32(fs.DiskUsed / fs.DiskTotal * 100)
 					}
-					alert.mapSums[key] += float32(fs.DiskUsed / fs.DiskTotal * 100)
 				}
 			case "Temperature":
 				if alert.mapSums == nil {
@@ -219,6 +234,8 @@ func (am *AlertManager) HandleSystemAlerts(systemRecord *core.Record, data *syst
 					}
 				}
 				alert.val += maxUsage
+			case "Battery":
+				alert.val += float64(stats.Battery[0])
 			default:
 				continue
 			}
@@ -256,12 +273,24 @@ func (am *AlertManager) HandleSystemAlerts(systemRecord *core.Record, data *syst
 		// log.Printf("%s: val %f | count %d | min-count %f | threshold %f\n", alert.name, alert.val, alert.count, minCount, alert.threshold)
 		// pass through alert if count is greater than or equal to minCount
 		if float32(alert.count) >= minCount {
-			if !alert.triggered && alert.val > alert.threshold {
-				alert.triggered = true
-				go am.sendSystemAlert(alert)
-			} else if alert.triggered && alert.val <= alert.threshold {
-				alert.triggered = false
-				go am.sendSystemAlert(alert)
+			// Battery alert has inverted logic: trigger when value is BELOW threshold
+			lowAlert := isLowAlert(alert.name)
+			if lowAlert {
+				if !alert.triggered && alert.val < alert.threshold {
+					alert.triggered = true
+					go am.sendSystemAlert(alert)
+				} else if alert.triggered && alert.val >= alert.threshold {
+					alert.triggered = false
+					go am.sendSystemAlert(alert)
+				}
+			} else {
+				if !alert.triggered && alert.val > alert.threshold {
+					alert.triggered = true
+					go am.sendSystemAlert(alert)
+				} else if alert.triggered && alert.val <= alert.threshold {
+					alert.triggered = false
+					go am.sendSystemAlert(alert)
+				}
 			}
 		}
 	}
@@ -288,10 +317,19 @@ func (am *AlertManager) sendSystemAlert(alert SystemAlertData) {
 	}

 	var subject string
+	lowAlert := isLowAlert(alert.name)
 	if alert.triggered {
-		subject = fmt.Sprintf("%s %s above threshold", systemName, titleAlertName)
+		if lowAlert {
+			subject = fmt.Sprintf("%s %s below threshold", systemName, titleAlertName)
+		} else {
+			subject = fmt.Sprintf("%s %s above threshold", systemName, titleAlertName)
+		}
 	} else {
-		subject = fmt.Sprintf("%s %s below threshold", systemName, titleAlertName)
+		if lowAlert {
+			subject = fmt.Sprintf("%s %s above threshold", systemName, titleAlertName)
+		} else {
+			subject = fmt.Sprintf("%s %s below threshold", systemName, titleAlertName)
+		}
 	}
 	minutesLabel := "minute"
 	if alert.min > 1 {
@@ -302,16 +340,20 @@ func (am *AlertManager) sendSystemAlert(alert SystemAlertData) {
 	}
 	body := fmt.Sprintf("%s averaged %.2f%s for the previous %v %s.", alert.descriptor, alert.val, alert.unit, alert.min, minutesLabel)

-	alert.alertRecord.Set("triggered", alert.triggered)
-	if err := am.hub.Save(alert.alertRecord); err != nil {
+	if err := am.setAlertTriggered(alert.alertData, alert.triggered); err != nil {
 		// app.Logger().Error("failed to save alert record", "err", err)
 		return
 	}
 	am.SendAlert(AlertMessageData{
-		UserID:   alert.alertRecord.GetString("user"),
+		UserID:   alert.alertData.UserID,
+		SystemID: alert.systemRecord.Id,
 		Title:    subject,
 		Message:  body,
 		Link:     am.hub.MakeLink("system", alert.systemRecord.Id),
 		LinkText: "View " + systemName,
 	})
 }
+
+func isLowAlert(name string) bool {
+	return name == "Battery"
+}
--- a/internal/alerts/alerts_system_test.go
+++ b/internal/alerts/alerts_system_test.go
@@ -0,0 +1,218 @@
+//go:build testing
+
+package alerts_test
+
+import (
+	"testing"
+	"testing/synctest"
+	"time"
+
+	"github.com/henrygd/beszel/internal/entities/system"
+	beszelTests "github.com/henrygd/beszel/internal/tests"
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+)
+
+type systemAlertValueSetter[T any] func(info *system.Info, stats *system.Stats, value T)
+
+type systemAlertTestFixture struct {
+	hub     *beszelTests.TestHub
+	alertID string
+	submit  func(*system.CombinedData) error
+}
+
+func createCombinedData[T any](value T, setValue systemAlertValueSetter[T]) *system.CombinedData {
+	var data system.CombinedData
+	setValue(&data.Info, &data.Stats, value)
+	return &data
+}
+
+func newSystemAlertTestFixture(t *testing.T, alertName string, min int, threshold float64) *systemAlertTestFixture {
+	t.Helper()
+
+	hub, user := beszelTests.GetHubWithUser(t)
+
+	systems, err := beszelTests.CreateSystems(hub, 1, user.Id, "up")
+	require.NoError(t, err)
+	systemRecord := systems[0]
+
+	sysManagerSystem, err := hub.GetSystemManager().GetSystemFromStore(systemRecord.Id)
+	require.NoError(t, err)
+	require.NotNil(t, sysManagerSystem)
+	sysManagerSystem.StopUpdater()
+
+	userSettings, err := hub.FindFirstRecordByFilter("user_settings", "user={:user}", map[string]any{"user": user.Id})
+	require.NoError(t, err)
+	userSettings.Set("settings", `{"emails":["test@example.com"],"webhooks":[]}`)
+	require.NoError(t, hub.Save(userSettings))
+
+	alertRecord, err := beszelTests.CreateRecord(hub, "alerts", map[string]any{
+		"name":   alertName,
+		"system": systemRecord.Id,
+		"user":   user.Id,
+		"min":    min,
+		"value":  threshold,
+	})
+	require.NoError(t, err)
+
+	assert.False(t, alertRecord.GetBool("triggered"), "Alert should not be triggered initially")
+
+	alertsCache := hub.GetAlertManager().GetSystemAlertsCache()
+	cachedAlerts := alertsCache.GetAlertsExcludingNames(systemRecord.Id, "Status")
+	assert.Len(t, cachedAlerts, 1, "Alert should be in cache")
+
+	return &systemAlertTestFixture{
+		hub:     hub,
+		alertID: alertRecord.Id,
+		submit: func(data *system.CombinedData) error {
+			_, err := sysManagerSystem.CreateRecords(data)
+			return err
+		},
+	}
+}
+
+func (fixture *systemAlertTestFixture) cleanup() {
+	fixture.hub.Cleanup()
+}
+
+func submitValue[T any](fixture *systemAlertTestFixture, t *testing.T, value T, setValue systemAlertValueSetter[T]) {
+	t.Helper()
+	require.NoError(t, fixture.submit(createCombinedData(value, setValue)))
+}
+
+func (fixture *systemAlertTestFixture) assertTriggered(t *testing.T, triggered bool, message string) {
+	t.Helper()
+
+	alertRecord, err := fixture.hub.FindRecordById("alerts", fixture.alertID)
+	require.NoError(t, err)
+	assert.Equal(t, triggered, alertRecord.GetBool("triggered"), message)
+}
+
+func waitForSystemAlert(d time.Duration) {
+	time.Sleep(d)
+	synctest.Wait()
+}
+
+func testOneMinuteSystemAlert[T any](t *testing.T, alertName string, threshold float64, setValue systemAlertValueSetter[T], triggerValue, resolveValue T) {
+	t.Helper()
+
+	synctest.Test(t, func(t *testing.T) {
+		fixture := newSystemAlertTestFixture(t, alertName, 1, threshold)
+		defer fixture.cleanup()
+
+		submitValue(fixture, t, triggerValue, setValue)
+		waitForSystemAlert(time.Second)
+
+		fixture.assertTriggered(t, true, "Alert should be triggered")
+		assert.Equal(t, 1, fixture.hub.TestMailer.TotalSend(), "An email should have been sent")
+
+		submitValue(fixture, t, resolveValue, setValue)
+		waitForSystemAlert(time.Second)
+
+		fixture.assertTriggered(t, false, "Alert should be untriggered")
+		assert.Equal(t, 2, fixture.hub.TestMailer.TotalSend(), "A second email should have been sent for untriggering the alert")
+
+		waitForSystemAlert(time.Minute)
+	})
+}
+
+func testMultiMinuteSystemAlert[T any](t *testing.T, alertName string, threshold float64, min int, setValue systemAlertValueSetter[T], baselineValue, triggerValue, resolveValue T) {
+	t.Helper()
+
+	synctest.Test(t, func(t *testing.T) {
+		fixture := newSystemAlertTestFixture(t, alertName, min, threshold)
+		defer fixture.cleanup()
+
+		submitValue(fixture, t, baselineValue, setValue)
+		waitForSystemAlert(time.Minute + time.Second)
+		fixture.assertTriggered(t, false, "Alert should not be triggered yet")
+
+		submitValue(fixture, t, triggerValue, setValue)
+		waitForSystemAlert(time.Minute)
+		fixture.assertTriggered(t, false, "Alert should not be triggered until the history window is full")
+
+		submitValue(fixture, t, triggerValue, setValue)
+		waitForSystemAlert(time.Second)
+		fixture.assertTriggered(t, true, "Alert should be triggered")
+		assert.Equal(t, 1, fixture.hub.TestMailer.TotalSend(), "An email should have been sent")
+
+		submitValue(fixture, t, resolveValue, setValue)
+		waitForSystemAlert(time.Second)
+		fixture.assertTriggered(t, false, "Alert should be untriggered")
+		assert.Equal(t, 2, fixture.hub.TestMailer.TotalSend(), "A second email should have been sent for untriggering the alert")
+	})
+}
+
+func setCPUAlertValue(info *system.Info, stats *system.Stats, value float64) {
+	info.Cpu = value
+	stats.Cpu = value
+}
+
+func setMemoryAlertValue(info *system.Info, stats *system.Stats, value float64) {
+	info.MemPct = value
+	stats.MemPct = value
+}
+
+func setDiskAlertValue(info *system.Info, stats *system.Stats, value float64) {
+	info.DiskPct = value
+	stats.DiskPct = value
+}
+
+func setBandwidthAlertValue(info *system.Info, stats *system.Stats, value [2]uint64) {
+	info.BandwidthBytes = value[0] + value[1]
+	stats.Bandwidth = value
+}
+
+func megabytesToBytes(mb uint64) uint64 {
+	return mb * 1024 * 1024
+}
+
+func setGPUAlertValue(info *system.Info, stats *system.Stats, value float64) {
+	info.GpuPct = value
+	stats.GPUData = map[string]system.GPUData{
+		"GPU0": {Usage: value},
+	}
+}
+
+func setTemperatureAlertValue(info *system.Info, stats *system.Stats, value float64) {
+	info.DashboardTemp = value
+	stats.Temperatures = map[string]float64{
+		"Temp0": value,
+	}
+}
+
+func setLoadAvgAlertValue(info *system.Info, stats *system.Stats, value [3]float64) {
+	info.LoadAvg = value
+	stats.LoadAvg = value
+}
+
+func setBatteryAlertValue(info *system.Info, stats *system.Stats, value [2]uint8) {
+	info.Battery = value
+	stats.Battery = value
+}
+
+func TestSystemAlertsOneMin(t *testing.T) {
+	testOneMinuteSystemAlert(t, "CPU", 50, setCPUAlertValue, 51, 49)
+	testOneMinuteSystemAlert(t, "Memory", 50, setMemoryAlertValue, 51, 49)
+	testOneMinuteSystemAlert(t, "Disk", 50, setDiskAlertValue, 51, 49)
+	testOneMinuteSystemAlert(t, "Bandwidth", 50, setBandwidthAlertValue, [2]uint64{megabytesToBytes(26), megabytesToBytes(25)}, [2]uint64{megabytesToBytes(25), megabytesToBytes(24)})
+	testOneMinuteSystemAlert(t, "GPU", 50, setGPUAlertValue, 51, 49)
+	testOneMinuteSystemAlert(t, "Temperature", 70, setTemperatureAlertValue, 71, 69)
+	testOneMinuteSystemAlert(t, "LoadAvg1", 4, setLoadAvgAlertValue, [3]float64{4.1, 0, 0}, [3]float64{3.9, 0, 0})
+	testOneMinuteSystemAlert(t, "LoadAvg5", 4, setLoadAvgAlertValue, [3]float64{0, 4.1, 0}, [3]float64{0, 3.9, 0})
+	testOneMinuteSystemAlert(t, "LoadAvg15", 4, setLoadAvgAlertValue, [3]float64{0, 0, 4.1}, [3]float64{0, 0, 3.9})
+	testOneMinuteSystemAlert(t, "Battery", 20, setBatteryAlertValue, [2]uint8{19, 0}, [2]uint8{21, 0})
+}
+
+func TestSystemAlertsTwoMin(t *testing.T) {
+	testMultiMinuteSystemAlert(t, "CPU", 50, 2, setCPUAlertValue, 10, 51, 48)
+	testMultiMinuteSystemAlert(t, "Memory", 50, 2, setMemoryAlertValue, 10, 51, 48)
+	testMultiMinuteSystemAlert(t, "Disk", 50, 2, setDiskAlertValue, 10, 51, 48)
+	testMultiMinuteSystemAlert(t, "Bandwidth", 50, 2, setBandwidthAlertValue, [2]uint64{megabytesToBytes(10), megabytesToBytes(10)}, [2]uint64{megabytesToBytes(26), megabytesToBytes(25)}, [2]uint64{megabytesToBytes(10), megabytesToBytes(10)})
+	testMultiMinuteSystemAlert(t, "GPU", 50, 2, setGPUAlertValue, 10, 51, 48)
+	testMultiMinuteSystemAlert(t, "Temperature", 70, 2, setTemperatureAlertValue, 10, 71, 67)
+	testMultiMinuteSystemAlert(t, "LoadAvg1", 4, 2, setLoadAvgAlertValue, [3]float64{0, 0, 0}, [3]float64{4.1, 0, 0}, [3]float64{3.5, 0, 0})
+	testMultiMinuteSystemAlert(t, "LoadAvg5", 4, 2, setLoadAvgAlertValue, [3]float64{0, 2, 0}, [3]float64{0, 4.1, 0}, [3]float64{0, 3.5, 0})
+	testMultiMinuteSystemAlert(t, "LoadAvg15", 4, 2, setLoadAvgAlertValue, [3]float64{0, 0, 2}, [3]float64{0, 0, 4.1}, [3]float64{0, 0, 3.5})
+	testMultiMinuteSystemAlert(t, "Battery", 20, 2, setBatteryAlertValue, [2]uint8{21, 0}, [2]uint8{19, 0}, [2]uint8{25, 1})
+}
--- a/internal/alerts/alerts_test.go
+++ b/internal/alerts/alerts_test.go
@@ -1,5 +1,4 @@
 //go:build testing
-// +build testing

 package alerts_test

@@ -13,9 +12,9 @@ import (
 	"testing/synctest"
 	"time"

-	"github.com/henrygd/beszel/internal/alerts"
 	beszelTests "github.com/henrygd/beszel/internal/tests"

+	"github.com/henrygd/beszel/internal/alerts"
 	"github.com/pocketbase/dbx"
 	"github.com/pocketbase/pocketbase/core"
 	pbTests "github.com/pocketbase/pocketbase/tests"
@@ -370,87 +369,6 @@ func TestUserAlertsApi(t *testing.T) {
 	}
 }

-func TestStatusAlerts(t *testing.T) {
-	synctest.Test(t, func(t *testing.T) {
-		hub, user := beszelTests.GetHubWithUser(t)
-		defer hub.Cleanup()
-
-		systems, err := beszelTests.CreateSystems(hub, 4, user.Id, "paused")
-		assert.NoError(t, err)
-
-		var alerts []*core.Record
-		for i, system := range systems {
-			alert, err := beszelTests.CreateRecord(hub, "alerts", map[string]any{
-				"name":   "Status",
-				"system": system.Id,
-				"user":   user.Id,
-				"min":    i + 1,
-			})
-			assert.NoError(t, err)
-			alerts = append(alerts, alert)
-		}
-
-		time.Sleep(10 * time.Millisecond)
-
-		for _, alert := range alerts {
-			assert.False(t, alert.GetBool("triggered"), "Alert should not be triggered immediately")
-		}
-		if hub.TestMailer.TotalSend() != 0 {
-			assert.Zero(t, hub.TestMailer.TotalSend(), "Expected 0 messages, got %d", hub.TestMailer.TotalSend())
-		}
-		for _, system := range systems {
-			assert.EqualValues(t, "paused", system.GetString("status"), "System should be paused")
-		}
-		for _, system := range systems {
-			system.Set("status", "up")
-			err = hub.SaveNoValidate(system)
-			assert.NoError(t, err)
-		}
-		time.Sleep(time.Second)
-		assert.EqualValues(t, 0, hub.GetPendingAlertsCount(), "should have 0 alerts in the pendingAlerts map")
-		for _, system := range systems {
-			system.Set("status", "down")
-			err = hub.SaveNoValidate(system)
-			assert.NoError(t, err)
-		}
-		// after 30 seconds, should have 4 alerts in the pendingAlerts map, no triggered alerts
-		time.Sleep(time.Second * 30)
-		assert.EqualValues(t, 4, hub.GetPendingAlertsCount(), "should have 4 alerts in the pendingAlerts map")
-		triggeredCount, err := hub.CountRecords("alerts", dbx.HashExp{"triggered": true})
-		assert.NoError(t, err)
-		assert.EqualValues(t, 0, triggeredCount, "should have 0 alert triggered")
-		assert.EqualValues(t, 0, hub.TestMailer.TotalSend(), "should have 0 messages sent")
-		// after 1:30 seconds, should have 1 triggered alert and 3 pending alerts
-		time.Sleep(time.Second * 60)
-		assert.EqualValues(t, 3, hub.GetPendingAlertsCount(), "should have 3 alerts in the pendingAlerts map")
-		triggeredCount, err = hub.CountRecords("alerts", dbx.HashExp{"triggered": true})
-		assert.NoError(t, err)
-		assert.EqualValues(t, 1, triggeredCount, "should have 1 alert triggered")
-		assert.EqualValues(t, 1, hub.TestMailer.TotalSend(), "should have 1 messages sent")
-		// after 2:30 seconds, should have 2 triggered alerts and 2 pending alerts
-		time.Sleep(time.Second * 60)
-		assert.EqualValues(t, 2, hub.GetPendingAlertsCount(), "should have 2 alerts in the pendingAlerts map")
-		triggeredCount, err = hub.CountRecords("alerts", dbx.HashExp{"triggered": true})
-		assert.NoError(t, err)
-		assert.EqualValues(t, 2, triggeredCount, "should have 2 alert triggered")
-		assert.EqualValues(t, 2, hub.TestMailer.TotalSend(), "should have 2 messages sent")
-		// now we will bring the remaning systems back up
-		for _, system := range systems {
-			system.Set("status", "up")
-			err = hub.SaveNoValidate(system)
-			assert.NoError(t, err)
-		}
-		time.Sleep(time.Second)
-		// should have 0 alerts in the pendingAlerts map and 0 alerts triggered
-		assert.EqualValues(t, 0, hub.GetPendingAlertsCount(), "should have 0 alerts in the pendingAlerts map")
-		triggeredCount, err = hub.CountRecords("alerts", dbx.HashExp{"triggered": true})
-		assert.NoError(t, err)
-		assert.Zero(t, triggeredCount, "should have 0 alert triggered")
-		// 4 messages sent, 2 down alerts and 2 up alerts for first 2 systems
-		assert.EqualValues(t, 4, hub.TestMailer.TotalSend(), "should have 4 messages sent")
-	})
-}
-
 func TestAlertsHistory(t *testing.T) {
 	synctest.Test(t, func(t *testing.T) {
 		hub, user := beszelTests.GetHubWithUser(t)
@@ -579,102 +497,46 @@ func TestAlertsHistory(t *testing.T) {
 		assert.EqualValues(t, 2, totalHistoryCount, "Should have 2 total alert history records")
 	})
 }
-func TestResolveStatusAlerts(t *testing.T) {
-	hub, user := beszelTests.GetHubWithUser(t)
+
+func TestSetAlertTriggered(t *testing.T) {
+	hub, _ := beszelTests.NewTestHub(t.TempDir())
 	defer hub.Cleanup()

-	// Create a systemUp
-	systemUp, err := beszelTests.CreateRecord(hub, "systems", map[string]any{
-		"name":   "test-system",
-		"users":  []string{user.Id},
-		"host":   "127.0.0.1",
-		"status": "up",
+	hub.StartHub()
+
+	user, _ := beszelTests.CreateUser(hub, "test@example.com", "password")
+	system, _ := beszelTests.CreateRecord(hub, "systems", map[string]any{
+		"name":  "test-system",
+		"users": []string{user.Id},
+		"host":  "127.0.0.1",
 	})
-	assert.NoError(t, err)

-	systemDown, err := beszelTests.CreateRecord(hub, "systems", map[string]any{
-		"name":   "test-system-2",
-		"users":  []string{user.Id},
-		"host":   "127.0.0.2",
-		"status": "up",
+	alertRecord, _ := beszelTests.CreateRecord(hub, "alerts", map[string]any{
+		"name":      "CPU",
+		"system":    system.Id,
+		"user":      user.Id,
+		"value":     80,
+		"triggered": false,
 	})
+
+	am := alerts.NewAlertManager(hub)
+
+	var alert alerts.CachedAlertData
+	alert.PopulateFromRecord(alertRecord)
+
+	// Test triggering the alert
+	err := am.SetAlertTriggered(alert, true)
 	assert.NoError(t, err)

-	// Create a status alertUp for the system
-	alertUp, err := beszelTests.CreateRecord(hub, "alerts", map[string]any{
-		"name":   "Status",
-		"system": systemUp.Id,
-		"user":   user.Id,
-		"min":    1,
-	})
+	updatedRecord, err := hub.FindRecordById("alerts", alert.Id)
+	assert.NoError(t, err)
+	assert.True(t, updatedRecord.GetBool("triggered"))
+
+	// Test un-triggering the alert
+	err = am.SetAlertTriggered(alert, false)
 	assert.NoError(t, err)

-	alertDown, err := beszelTests.CreateRecord(hub, "alerts", map[string]any{
-		"name":   "Status",
-		"system": systemDown.Id,
-		"user":   user.Id,
-		"min":    1,
-	})
+	updatedRecord, err = hub.FindRecordById("alerts", alert.Id)
 	assert.NoError(t, err)
-
-	// Verify alert is not triggered initially
-	assert.False(t, alertUp.GetBool("triggered"), "Alert should not be triggered initially")
-
-	// Set the system to 'up' (this should not trigger the alert)
-	systemUp.Set("status", "up")
-	err = hub.SaveNoValidate(systemUp)
-	assert.NoError(t, err)
-
-	systemDown.Set("status", "down")
-	err = hub.SaveNoValidate(systemDown)
-	assert.NoError(t, err)
-
-	// Wait a moment for any processing
-	time.Sleep(10 * time.Millisecond)
-
-	// Verify alertUp is still not triggered after setting system to up
-	alertUp, err = hub.FindFirstRecordByFilter("alerts", "id={:id}", dbx.Params{"id": alertUp.Id})
-	assert.NoError(t, err)
-	assert.False(t, alertUp.GetBool("triggered"), "Alert should not be triggered when system is up")
-
-	// Manually set both alerts triggered to true
-	alertUp.Set("triggered", true)
-	err = hub.SaveNoValidate(alertUp)
-	assert.NoError(t, err)
-	alertDown.Set("triggered", true)
-	err = hub.SaveNoValidate(alertDown)
-	assert.NoError(t, err)
-
-	// Verify we have exactly one alert with triggered true
-	triggeredCount, err := hub.CountRecords("alerts", dbx.HashExp{"triggered": true})
-	assert.NoError(t, err)
-	assert.EqualValues(t, 2, triggeredCount, "Should have exactly two alerts with triggered true")
-
-	// Verify the specific alertUp is triggered
-	alertUp, err = hub.FindFirstRecordByFilter("alerts", "id={:id}", dbx.Params{"id": alertUp.Id})
-	assert.NoError(t, err)
-	assert.True(t, alertUp.GetBool("triggered"), "Alert should be triggered")
-
-	// Verify we have two unresolved alert history records
-	alertHistoryCount, err := hub.CountRecords("alerts_history", dbx.HashExp{"resolved": ""})
-	assert.NoError(t, err)
-	assert.EqualValues(t, 2, alertHistoryCount, "Should have exactly two unresolved alert history records")
-
-	err = alerts.ResolveStatusAlerts(hub)
-	assert.NoError(t, err)
-
-	// Verify alertUp is not triggered after resolving
-	alertUp, err = hub.FindFirstRecordByFilter("alerts", "id={:id}", dbx.Params{"id": alertUp.Id})
-	assert.NoError(t, err)
-	assert.False(t, alertUp.GetBool("triggered"), "Alert should not be triggered after resolving")
-	// Verify alertDown is still triggered
-	alertDown, err = hub.FindFirstRecordByFilter("alerts", "id={:id}", dbx.Params{"id": alertDown.Id})
-	assert.NoError(t, err)
-	assert.True(t, alertDown.GetBool("triggered"), "Alert should still be triggered after resolving")
-
-	// Verify we have one unresolved alert history record
-	alertHistoryCount, err = hub.CountRecords("alerts_history", dbx.HashExp{"resolved": ""})
-	assert.NoError(t, err)
-	assert.EqualValues(t, 1, alertHistoryCount, "Should have exactly one unresolved alert history record")
-
+	assert.False(t, updatedRecord.GetBool("triggered"))
 }
--- a/internal/alerts/alerts_test_helpers.go
+++ b/internal/alerts/alerts_test_helpers.go
@@ -1,5 +1,4 @@
 //go:build testing
-// +build testing

 package alerts

@@ -10,6 +9,18 @@ import (
 	"github.com/pocketbase/pocketbase/core"
 )

+func NewTestAlertManagerWithoutWorker(app hubLike) *AlertManager {
+	return &AlertManager{
+		hub:         app,
+		alertsCache: NewAlertsCache(app),
+	}
+}
+
+// GetSystemAlertsCache returns the internal system alerts cache.
+func (am *AlertManager) GetSystemAlertsCache() *AlertsCache {
+	return am.alertsCache
+}
+
 func (am *AlertManager) GetAlertManager() *AlertManager {
 	return am
 }
@@ -28,19 +39,18 @@ func (am *AlertManager) GetPendingAlertsCount() int {
 }

 // ProcessPendingAlerts manually processes all expired alerts (for testing)
-func (am *AlertManager) ProcessPendingAlerts() ([]*core.Record, error) {
+func (am *AlertManager) ProcessPendingAlerts() ([]CachedAlertData, error) {
 	now := time.Now()
 	var lastErr error
-	var processedAlerts []*core.Record
+	var processedAlerts []CachedAlertData
 	am.pendingAlerts.Range(func(key, value any) bool {
 		info := value.(*alertInfo)
 		if now.After(info.expireTime) {
-			// Downtime delay has passed, process alert
-			if err := am.sendStatusAlert("down", info.systemName, info.alertRecord); err != nil {
-				lastErr = err
+			if info.timer != nil {
+				info.timer.Stop()
 			}
-			processedAlerts = append(processedAlerts, info.alertRecord)
-			am.pendingAlerts.Delete(key)
+			am.processPendingAlert(key.(string))
+			processedAlerts = append(processedAlerts, info.alertData)
 		}
 		return true
 	})
@@ -57,6 +67,31 @@ func (am *AlertManager) ForceExpirePendingAlerts() {
 	})
 }

+func (am *AlertManager) ResetPendingAlertTimer(alertID string, delay time.Duration) bool {
+	value, loaded := am.pendingAlerts.Load(alertID)
+	if !loaded {
+		return false
+	}
+
+	info := value.(*alertInfo)
+	if info.timer != nil {
+		info.timer.Stop()
+	}
+	info.expireTime = time.Now().Add(delay)
+	info.timer = time.AfterFunc(delay, func() {
+		am.processPendingAlert(alertID)
+	})
+	return true
+}
+
 func ResolveStatusAlerts(app core.App) error {
 	return resolveStatusAlerts(app)
 }
+
+func (am *AlertManager) RestorePendingStatusAlerts() error {
+	return am.restorePendingStatusAlerts()
+}
+
+func (am *AlertManager) SetAlertTriggered(alert CachedAlertData, triggered bool) error {
+	return am.setAlertTriggered(alert, triggered)
+}
--- a/internal/cmd/agent/agent.go
+++ b/internal/cmd/agent/agent.go
@@ -9,6 +9,7 @@ import (
 	"github.com/henrygd/beszel"
 	"github.com/henrygd/beszel/agent"
 	"github.com/henrygd/beszel/agent/health"
+	"github.com/henrygd/beszel/agent/utils"
 	"github.com/spf13/pflag"
 	"golang.org/x/crypto/ssh"
 )
@@ -17,9 +18,8 @@ import (
 type cmdOptions struct {
 	key    string // key is the public key(s) for SSH authentication.
 	listen string // listen is the address or port to listen on.
-	// TODO: add hubURL and token
-	// hubURL string // hubURL is the URL of the hub to use.
-	// token  string // token is the token to use for authentication.
+	hubURL string // hubURL is the URL of the Beszel hub.
+	token  string // token is the token to use for authentication.
 }

 // parse parses the command line flags and populates the config struct.
@@ -32,9 +32,6 @@ func (opts *cmdOptions) parse() bool {

 	// Subcommands that don't require any pflag parsing
 	switch subcommand {
-	case "-v", "version":
-		fmt.Println(beszel.AppName+"-agent", beszel.Version)
-		return true
 	case "health":
 		err := health.Check()
 		if err != nil {
@@ -42,18 +39,22 @@ func (opts *cmdOptions) parse() bool {
 		}
 		fmt.Print("ok")
 		return true
+	case "fingerprint":
+		handleFingerprint()
+		return true
 	}

 	// pflag.CommandLine.ParseErrorsWhitelist.UnknownFlags = true
 	pflag.StringVarP(&opts.key, "key", "k", "", "Public key(s) for SSH authentication")
 	pflag.StringVarP(&opts.listen, "listen", "l", "", "Address or port to listen on")
-	// pflag.StringVarP(&opts.hubURL, "hub-url", "u", "", "URL of the hub to use")
-	// pflag.StringVarP(&opts.token, "token", "t", "", "Token to use for authentication")
+	pflag.StringVarP(&opts.hubURL, "url", "u", "", "URL of the Beszel hub")
+	pflag.StringVarP(&opts.token, "token", "t", "", "Token to use for authentication")
 	chinaMirrors := pflag.BoolP("china-mirrors", "c", false, "Use mirror for update (gh.beszel.dev) instead of GitHub")
+	version := pflag.BoolP("version", "v", false, "Show version information")
 	help := pflag.BoolP("help", "h", false, "Show this help message")

 	// Convert old single-dash long flags to double-dash for backward compatibility
-	flagsToConvert := []string{"key", "listen"}
+	flagsToConvert := []string{"key", "listen", "url", "token"}
 	for i, arg := range os.Args {
 		for _, flag := range flagsToConvert {
 			singleDash := "-" + flag
@@ -74,9 +75,9 @@ func (opts *cmdOptions) parse() bool {
 		builder.WriteString(os.Args[0])
 		builder.WriteString(" [command] [flags]\n")
 		builder.WriteString("\nCommands:\n")
-		builder.WriteString("  health    Check if the agent is running\n")
-		// builder.WriteString("  help      Display this help message\n")
-		builder.WriteString("  update    Update to the latest version\n")
+		builder.WriteString("  fingerprint  View or reset the agent fingerprint\n")
+		builder.WriteString("  health       Check if the agent is running\n")
+		builder.WriteString("  update       Update to the latest version\n")
 		builder.WriteString("\nFlags:\n")
 		fmt.Print(builder.String())
 		pflag.PrintDefaults()
@@ -87,6 +88,9 @@ func (opts *cmdOptions) parse() bool {

 	// Must run after pflag.Parse()
 	switch {
+	case *version:
+		fmt.Println(beszel.AppName+"-agent", beszel.Version)
+		return true
 	case *help || subcommand == "help":
 		pflag.Usage()
 		return true
@@ -95,6 +99,13 @@ func (opts *cmdOptions) parse() bool {
 		return true
 	}

+	// Set environment variables from CLI flags (if provided)
+	if opts.hubURL != "" {
+		os.Setenv("HUB_URL", opts.hubURL)
+	}
+	if opts.token != "" {
+		os.Setenv("TOKEN", opts.token)
+	}
 	return false
 }

@@ -106,12 +117,12 @@ func (opts *cmdOptions) loadPublicKeys() ([]ssh.PublicKey, error) {
 	}

 	// Try environment variable
-	if key, ok := agent.GetEnv("KEY"); ok && key != "" {
+	if key, ok := utils.GetEnv("KEY"); ok && key != "" {
 		return agent.ParseKeys(key)
 	}

 	// Try key file
-	keyFile, ok := agent.GetEnv("KEY_FILE")
+	keyFile, ok := utils.GetEnv("KEY_FILE")
 	if !ok {
 		return nil, fmt.Errorf("no key provided: must set -key flag, KEY env var, or KEY_FILE env var. Use 'beszel-agent help' for usage")
 	}
@@ -127,6 +138,38 @@ func (opts *cmdOptions) getAddress() string {
 	return agent.GetAddress(opts.listen)
 }

+// handleFingerprint handles the "fingerprint" command with subcommands "view" and "reset".
+func handleFingerprint() {
+	subCmd := ""
+	if len(os.Args) > 2 {
+		subCmd = os.Args[2]
+	}
+
+	switch subCmd {
+	case "", "view":
+		dataDir, _ := agent.GetDataDir()
+		fp := agent.GetFingerprint(dataDir, "", "")
+		fmt.Println(fp)
+	case "help", "-h", "--help":
+		fmt.Print(fingerprintUsage())
+	case "reset":
+		dataDir, err := agent.GetDataDir()
+		if err != nil {
+			log.Fatal(err)
+		}
+		if err := agent.DeleteFingerprint(dataDir); err != nil {
+			log.Fatal(err)
+		}
+		fmt.Println("Fingerprint reset. A new one will be generated on next start.")
+	default:
+		log.Fatalf("Unknown command: %q\n\n%s", subCmd, fingerprintUsage())
+	}
+}
+
+func fingerprintUsage() string {
+	return fmt.Sprintf("Usage: %s fingerprint [view|reset]\n\nCommands:\n  view   Print fingerprint (default)\n  reset  Reset saved fingerprint\n", os.Args[0])
+}
+
 func main() {
 	var opts cmdOptions
 	subcommandHandled := opts.parse()
--- a/internal/cmd/hub/hub.go
+++ b/internal/cmd/hub/hub.go
@@ -28,8 +28,8 @@ func main() {
 	}

 	baseApp := getBaseApp()
-	h := hub.NewHub(baseApp)
-	if err := h.StartHub(); err != nil {
+	hub := hub.NewHub(baseApp)
+	if err := hub.StartHub(); err != nil {
 		log.Fatal(err)
 	}
 }
--- a/internal/common/common-ws.go
+++ b/internal/common/common-ws.go
@@ -1,6 +1,7 @@
 package common

 import (
+	"github.com/fxamacker/cbor/v2"
 	"github.com/henrygd/beszel/internal/entities/smart"
 	"github.com/henrygd/beszel/internal/entities/system"
 	"github.com/henrygd/beszel/internal/entities/systemd"
@@ -34,14 +35,14 @@ type HubRequest[T any] struct {
 // AgentResponse defines the structure for responses sent from agent to hub.
 type AgentResponse struct {
 	Id          *uint32                    `cbor:"0,keyasint,omitempty"`
-	SystemData  *system.CombinedData       `cbor:"1,keyasint,omitempty,omitzero"`
-	Fingerprint *FingerprintResponse       `cbor:"2,keyasint,omitempty,omitzero"`
+	SystemData  *system.CombinedData       `cbor:"1,keyasint,omitempty,omitzero"` // Legacy (<= 0.17)
+	Fingerprint *FingerprintResponse       `cbor:"2,keyasint,omitempty,omitzero"` // Legacy (<= 0.17)
 	Error       string                     `cbor:"3,keyasint,omitempty,omitzero"`
-	String      *string                    `cbor:"4,keyasint,omitempty,omitzero"`
-	SmartData   map[string]smart.SmartData `cbor:"5,keyasint,omitempty,omitzero"`
-	ServiceInfo systemd.ServiceDetails     `cbor:"6,keyasint,omitempty,omitzero"`
-	// Logs        *LogsPayload         `cbor:"4,keyasint,omitempty,omitzero"`
-	// RawBytes    []byte               `cbor:"4,keyasint,omitempty,omitzero"`
+	String      *string                    `cbor:"4,keyasint,omitempty,omitzero"` // Legacy (<= 0.17)
+	SmartData   map[string]smart.SmartData `cbor:"5,keyasint,omitempty,omitzero"` // Legacy (<= 0.17)
+	ServiceInfo systemd.ServiceDetails     `cbor:"6,keyasint,omitempty,omitzero"` // Legacy (<= 0.17)
+	// Data is the generic response payload for new endpoints (0.18+)
+	Data cbor.RawMessage `cbor:"7,keyasint,omitempty,omitzero"`
 }

 type FingerprintRequest struct {
@@ -58,8 +59,8 @@ type FingerprintResponse struct {
 }

 type DataRequestOptions struct {
-	CacheTimeMs uint16 `cbor:"0,keyasint"`
-	// ResourceType uint8  `cbor:"1,keyasint,omitempty,omitzero"`
+	CacheTimeMs    uint16 `cbor:"0,keyasint"`
+	IncludeDetails bool   `cbor:"1,keyasint"`
 }

 type ContainerLogsRequest struct {
--- a/Show More
+++ b/Show More