mirror of
https://github.com/henrygd/beszel.git
synced 2026-03-25 06:56:17 +01:00
Compare commits
1 Commits
apple-gpu
...
docker-24-
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
845369ab54 |
5
.github/workflows/inactivity-actions.yml
vendored
5
.github/workflows/inactivity-actions.yml
vendored
@@ -6,7 +6,6 @@ on:
|
|||||||
workflow_dispatch:
|
workflow_dispatch:
|
||||||
|
|
||||||
permissions:
|
permissions:
|
||||||
actions: write
|
|
||||||
issues: write
|
issues: write
|
||||||
pull-requests: write
|
pull-requests: write
|
||||||
|
|
||||||
@@ -49,9 +48,6 @@ jobs:
|
|||||||
# Action can not skip PRs, set it to 100 years to cover it.
|
# Action can not skip PRs, set it to 100 years to cover it.
|
||||||
days-before-pr-stale: 36524
|
days-before-pr-stale: 36524
|
||||||
|
|
||||||
# Max issues to process before early exit. Next run resumes from cache. GH API limit: 5000.
|
|
||||||
operations-per-run: 1500
|
|
||||||
|
|
||||||
# Labels
|
# Labels
|
||||||
stale-issue-label: 'stale'
|
stale-issue-label: 'stale'
|
||||||
remove-stale-when-updated: true
|
remove-stale-when-updated: true
|
||||||
@@ -60,5 +56,4 @@ jobs:
|
|||||||
|
|
||||||
# Exemptions
|
# Exemptions
|
||||||
exempt-assignees: true
|
exempt-assignees: true
|
||||||
|
|
||||||
exempt-milestones: true
|
exempt-milestones: true
|
||||||
1
.gitignore
vendored
1
.gitignore
vendored
@@ -10,7 +10,6 @@ dist
|
|||||||
*.exe
|
*.exe
|
||||||
internal/cmd/hub/hub
|
internal/cmd/hub/hub
|
||||||
internal/cmd/agent/agent
|
internal/cmd/agent/agent
|
||||||
agent.test
|
|
||||||
node_modules
|
node_modules
|
||||||
build
|
build
|
||||||
*timestamp*
|
*timestamp*
|
||||||
|
|||||||
40
Makefile
40
Makefile
@@ -3,40 +3,6 @@ OS ?= $(shell go env GOOS)
|
|||||||
ARCH ?= $(shell go env GOARCH)
|
ARCH ?= $(shell go env GOARCH)
|
||||||
# Skip building the web UI if true
|
# Skip building the web UI if true
|
||||||
SKIP_WEB ?= false
|
SKIP_WEB ?= false
|
||||||
# Controls NVML/glibc agent build tag behavior:
|
|
||||||
# - auto (default): enable on linux/amd64 glibc hosts
|
|
||||||
# - true: always enable
|
|
||||||
# - false: always disable
|
|
||||||
NVML ?= auto
|
|
||||||
|
|
||||||
# Detect glibc host for local linux/amd64 builds.
|
|
||||||
HOST_GLIBC := $(shell \
|
|
||||||
if [ "$(OS)" = "linux" ] && [ "$(ARCH)" = "amd64" ]; then \
|
|
||||||
for p in /lib64/ld-linux-x86-64.so.2 /lib/x86_64-linux-gnu/ld-linux-x86-64.so.2 /lib/ld-linux-x86-64.so.2; do \
|
|
||||||
[ -e "$$p" ] && { echo true; exit 0; }; \
|
|
||||||
done; \
|
|
||||||
if command -v ldd >/dev/null 2>&1; then \
|
|
||||||
if ldd --version 2>&1 | tr '[:upper:]' '[:lower:]' | awk '/gnu libc|glibc/{found=1} END{exit !found}'; then \
|
|
||||||
echo true; \
|
|
||||||
else \
|
|
||||||
echo false; \
|
|
||||||
fi; \
|
|
||||||
else \
|
|
||||||
echo false; \
|
|
||||||
fi; \
|
|
||||||
else \
|
|
||||||
echo false; \
|
|
||||||
fi)
|
|
||||||
|
|
||||||
# Enable glibc build tag for NVML on supported Linux builds.
|
|
||||||
AGENT_GO_TAGS :=
|
|
||||||
ifeq ($(NVML),true)
|
|
||||||
AGENT_GO_TAGS := -tags glibc
|
|
||||||
else ifeq ($(NVML),auto)
|
|
||||||
ifeq ($(HOST_GLIBC),true)
|
|
||||||
AGENT_GO_TAGS := -tags glibc
|
|
||||||
endif
|
|
||||||
endif
|
|
||||||
|
|
||||||
# Set executable extension based on target OS
|
# Set executable extension based on target OS
|
||||||
EXE_EXT := $(if $(filter windows,$(OS)),.exe,)
|
EXE_EXT := $(if $(filter windows,$(OS)),.exe,)
|
||||||
@@ -88,7 +54,7 @@ fetch-smartctl-conditional:
|
|||||||
|
|
||||||
# Update build-agent to include conditional .NET build
|
# Update build-agent to include conditional .NET build
|
||||||
build-agent: tidy build-dotnet-conditional fetch-smartctl-conditional
|
build-agent: tidy build-dotnet-conditional fetch-smartctl-conditional
|
||||||
GOOS=$(OS) GOARCH=$(ARCH) go build $(AGENT_GO_TAGS) -o ./build/beszel-agent_$(OS)_$(ARCH)$(EXE_EXT) -ldflags "-w -s" ./internal/cmd/agent
|
GOOS=$(OS) GOARCH=$(ARCH) go build -o ./build/beszel-agent_$(OS)_$(ARCH)$(EXE_EXT) -ldflags "-w -s" ./internal/cmd/agent
|
||||||
|
|
||||||
build-hub: tidy $(if $(filter false,$(SKIP_WEB)),build-web-ui)
|
build-hub: tidy $(if $(filter false,$(SKIP_WEB)),build-web-ui)
|
||||||
GOOS=$(OS) GOARCH=$(ARCH) go build -o ./build/beszel_$(OS)_$(ARCH)$(EXE_EXT) -ldflags "-w -s" ./internal/cmd/hub
|
GOOS=$(OS) GOARCH=$(ARCH) go build -o ./build/beszel_$(OS)_$(ARCH)$(EXE_EXT) -ldflags "-w -s" ./internal/cmd/hub
|
||||||
@@ -124,9 +90,9 @@ dev-hub:
|
|||||||
|
|
||||||
dev-agent:
|
dev-agent:
|
||||||
@if command -v entr >/dev/null 2>&1; then \
|
@if command -v entr >/dev/null 2>&1; then \
|
||||||
find ./internal/cmd/agent/*.go ./agent/*.go | entr -r go run $(AGENT_GO_TAGS) github.com/henrygd/beszel/internal/cmd/agent; \
|
find ./internal/cmd/agent/*.go ./agent/*.go | entr -r go run github.com/henrygd/beszel/internal/cmd/agent; \
|
||||||
else \
|
else \
|
||||||
go run $(AGENT_GO_TAGS) github.com/henrygd/beszel/internal/cmd/agent; \
|
go run github.com/henrygd/beszel/internal/cmd/agent; \
|
||||||
fi
|
fi
|
||||||
|
|
||||||
build-dotnet:
|
build-dotnet:
|
||||||
|
|||||||
@@ -32,6 +32,10 @@ var ansiEscapePattern = regexp.MustCompile(`\x1b\[[0-9;]*[a-zA-Z]|\x1b\][^\x07]*
|
|||||||
const (
|
const (
|
||||||
// Docker API timeout in milliseconds
|
// Docker API timeout in milliseconds
|
||||||
dockerTimeoutMs = 2100
|
dockerTimeoutMs = 2100
|
||||||
|
// Number of consecutive /containers/json failures before forcing a client reset on old Docker versions
|
||||||
|
dockerClientResetFailureThreshold = 3
|
||||||
|
// Minimum time between Docker client resets to avoid reset flapping
|
||||||
|
dockerClientResetCooldown = 30 * time.Second
|
||||||
// Maximum realistic network speed (5 GB/s) to detect bad deltas
|
// Maximum realistic network speed (5 GB/s) to detect bad deltas
|
||||||
maxNetworkSpeedBps uint64 = 5e9
|
maxNetworkSpeedBps uint64 = 5e9
|
||||||
// Maximum conceivable memory usage of a container (100TB) to detect bad memory stats
|
// Maximum conceivable memory usage of a container (100TB) to detect bad memory stats
|
||||||
@@ -55,12 +59,16 @@ type dockerManager struct {
|
|||||||
containerStatsMap map[string]*container.Stats // Keeps track of container stats
|
containerStatsMap map[string]*container.Stats // Keeps track of container stats
|
||||||
validIds map[string]struct{} // Map of valid container ids, used to prune invalid containers from containerStatsMap
|
validIds map[string]struct{} // Map of valid container ids, used to prune invalid containers from containerStatsMap
|
||||||
goodDockerVersion bool // Whether docker version is at least 25.0.0 (one-shot works correctly)
|
goodDockerVersion bool // Whether docker version is at least 25.0.0 (one-shot works correctly)
|
||||||
|
versionChecked bool // Whether docker version detection completed successfully
|
||||||
isWindows bool // Whether the Docker Engine API is running on Windows
|
isWindows bool // Whether the Docker Engine API is running on Windows
|
||||||
buf *bytes.Buffer // Buffer to store and read response bodies
|
buf *bytes.Buffer // Buffer to store and read response bodies
|
||||||
decoder *json.Decoder // Reusable JSON decoder that reads from buf
|
decoder *json.Decoder // Reusable JSON decoder that reads from buf
|
||||||
apiStats *container.ApiStats // Reusable API stats object
|
apiStats *container.ApiStats // Reusable API stats object
|
||||||
excludeContainers []string // Patterns to exclude containers by name
|
excludeContainers []string // Patterns to exclude containers by name
|
||||||
usingPodman bool // Whether the Docker Engine API is running on Podman
|
usingPodman bool // Whether the Docker Engine API is running on Podman
|
||||||
|
transport *http.Transport // Base transport used by client for connection resets
|
||||||
|
consecutiveListFailures int // Number of consecutive /containers/json request failures
|
||||||
|
lastClientReset time.Time // Last time the Docker client connections were reset
|
||||||
|
|
||||||
// Cache-time-aware tracking for CPU stats (similar to cpu.go)
|
// Cache-time-aware tracking for CPU stats (similar to cpu.go)
|
||||||
// Maps cache time intervals to container-specific CPU usage tracking
|
// Maps cache time intervals to container-specific CPU usage tracking
|
||||||
@@ -72,7 +80,6 @@ type dockerManager struct {
|
|||||||
// cacheTimeMs -> DeltaTracker for network bytes sent/received
|
// cacheTimeMs -> DeltaTracker for network bytes sent/received
|
||||||
networkSentTrackers map[uint16]*deltatracker.DeltaTracker[string, uint64]
|
networkSentTrackers map[uint16]*deltatracker.DeltaTracker[string, uint64]
|
||||||
networkRecvTrackers map[uint16]*deltatracker.DeltaTracker[string, uint64]
|
networkRecvTrackers map[uint16]*deltatracker.DeltaTracker[string, uint64]
|
||||||
retrySleep func(time.Duration)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// userAgentRoundTripper is a custom http.RoundTripper that adds a User-Agent header to all requests
|
// userAgentRoundTripper is a custom http.RoundTripper that adds a User-Agent header to all requests
|
||||||
@@ -120,8 +127,10 @@ func (dm *dockerManager) shouldExcludeContainer(name string) bool {
|
|||||||
func (dm *dockerManager) getDockerStats(cacheTimeMs uint16) ([]*container.Stats, error) {
|
func (dm *dockerManager) getDockerStats(cacheTimeMs uint16) ([]*container.Stats, error) {
|
||||||
resp, err := dm.client.Get("http://localhost/containers/json")
|
resp, err := dm.client.Get("http://localhost/containers/json")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
dm.handleContainerListError(err)
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
dm.consecutiveListFailures = 0
|
||||||
|
|
||||||
dm.apiContainerList = dm.apiContainerList[:0]
|
dm.apiContainerList = dm.apiContainerList[:0]
|
||||||
if err := dm.decode(resp, &dm.apiContainerList); err != nil {
|
if err := dm.decode(resp, &dm.apiContainerList); err != nil {
|
||||||
@@ -205,6 +214,50 @@ func (dm *dockerManager) getDockerStats(cacheTimeMs uint16) ([]*container.Stats,
|
|||||||
return stats, nil
|
return stats, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (dm *dockerManager) handleContainerListError(err error) {
|
||||||
|
dm.consecutiveListFailures++
|
||||||
|
if !dm.shouldResetDockerClient(err) {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
dm.resetDockerClientConnections()
|
||||||
|
}
|
||||||
|
|
||||||
|
func (dm *dockerManager) shouldResetDockerClient(err error) bool {
|
||||||
|
if !dm.versionChecked || dm.goodDockerVersion {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
if dm.consecutiveListFailures < dockerClientResetFailureThreshold {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
if !dm.lastClientReset.IsZero() && time.Since(dm.lastClientReset) < dockerClientResetCooldown {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
return isDockerApiOverloadError(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
func isDockerApiOverloadError(err error) bool {
|
||||||
|
if err == nil {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
if errors.Is(err, context.DeadlineExceeded) {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
msg := err.Error()
|
||||||
|
return strings.Contains(msg, "Client.Timeout exceeded") ||
|
||||||
|
strings.Contains(msg, "request canceled") ||
|
||||||
|
strings.Contains(msg, "context deadline exceeded") ||
|
||||||
|
strings.Contains(msg, "EOF")
|
||||||
|
}
|
||||||
|
|
||||||
|
func (dm *dockerManager) resetDockerClientConnections() {
|
||||||
|
if dm.transport == nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
dm.transport.CloseIdleConnections()
|
||||||
|
dm.lastClientReset = time.Now()
|
||||||
|
slog.Warn("Reset Docker client connections after repeated /containers/json failures", "failures", dm.consecutiveListFailures)
|
||||||
|
}
|
||||||
|
|
||||||
// initializeCpuTracking initializes CPU tracking maps for a specific cache time interval
|
// initializeCpuTracking initializes CPU tracking maps for a specific cache time interval
|
||||||
func (dm *dockerManager) initializeCpuTracking(cacheTimeMs uint16) {
|
func (dm *dockerManager) initializeCpuTracking(cacheTimeMs uint16) {
|
||||||
// Initialize cache time maps if they don't exist
|
// Initialize cache time maps if they don't exist
|
||||||
@@ -554,6 +607,7 @@ func newDockerManager() *dockerManager {
|
|||||||
Timeout: timeout,
|
Timeout: timeout,
|
||||||
Transport: userAgentTransport,
|
Transport: userAgentTransport,
|
||||||
},
|
},
|
||||||
|
transport: transport,
|
||||||
containerStatsMap: make(map[string]*container.Stats),
|
containerStatsMap: make(map[string]*container.Stats),
|
||||||
sem: make(chan struct{}, 5),
|
sem: make(chan struct{}, 5),
|
||||||
apiContainerList: []*container.ApiInfo{},
|
apiContainerList: []*container.ApiInfo{},
|
||||||
@@ -566,7 +620,6 @@ func newDockerManager() *dockerManager {
|
|||||||
lastCpuReadTime: make(map[uint16]map[string]time.Time),
|
lastCpuReadTime: make(map[uint16]map[string]time.Time),
|
||||||
networkSentTrackers: make(map[uint16]*deltatracker.DeltaTracker[string, uint64]),
|
networkSentTrackers: make(map[uint16]*deltatracker.DeltaTracker[string, uint64]),
|
||||||
networkRecvTrackers: make(map[uint16]*deltatracker.DeltaTracker[string, uint64]),
|
networkRecvTrackers: make(map[uint16]*deltatracker.DeltaTracker[string, uint64]),
|
||||||
retrySleep: time.Sleep,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// If using podman, return client
|
// If using podman, return client
|
||||||
@@ -576,7 +629,7 @@ func newDockerManager() *dockerManager {
|
|||||||
return manager
|
return manager
|
||||||
}
|
}
|
||||||
|
|
||||||
// run version check in goroutine to avoid blocking (server may not be ready and requires retries)
|
// this can take up to 5 seconds with retry, so run in goroutine
|
||||||
go manager.checkDockerVersion()
|
go manager.checkDockerVersion()
|
||||||
|
|
||||||
// give version check a chance to complete before returning
|
// give version check a chance to complete before returning
|
||||||
@@ -596,23 +649,24 @@ func (dm *dockerManager) checkDockerVersion() {
|
|||||||
const versionMaxTries = 2
|
const versionMaxTries = 2
|
||||||
for i := 1; i <= versionMaxTries; i++ {
|
for i := 1; i <= versionMaxTries; i++ {
|
||||||
resp, err = dm.client.Get("http://localhost/version")
|
resp, err = dm.client.Get("http://localhost/version")
|
||||||
if err == nil && resp.StatusCode == http.StatusOK {
|
if err == nil {
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
if resp != nil {
|
if resp != nil {
|
||||||
resp.Body.Close()
|
resp.Body.Close()
|
||||||
}
|
}
|
||||||
if i < versionMaxTries {
|
if i < versionMaxTries {
|
||||||
slog.Debug("Failed to get Docker version; retrying", "attempt", i, "err", err, "response", resp)
|
slog.Debug("Failed to get Docker version; retrying", "attempt", i, "error", err)
|
||||||
dm.retrySleep(5 * time.Second)
|
time.Sleep(5 * time.Second)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if err != nil || resp.StatusCode != http.StatusOK {
|
if err != nil {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
if err := dm.decode(resp, &versionInfo); err != nil {
|
if err := dm.decode(resp, &versionInfo); err != nil {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
dm.versionChecked = true
|
||||||
// if version > 24, one-shot works correctly and we can limit concurrent operations
|
// if version > 24, one-shot works correctly and we can limit concurrent operations
|
||||||
if dockerVersion, err := semver.Parse(versionInfo.Version); err == nil && dockerVersion.Major > 24 {
|
if dockerVersion, err := semver.Parse(versionInfo.Version); err == nil && dockerVersion.Major > 24 {
|
||||||
dm.goodDockerVersion = true
|
dm.goodDockerVersion = true
|
||||||
|
|||||||
@@ -5,13 +5,7 @@ package agent
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"bytes"
|
"bytes"
|
||||||
"context"
|
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"errors"
|
|
||||||
"fmt"
|
|
||||||
"net"
|
|
||||||
"net/http"
|
|
||||||
"net/http/httptest"
|
|
||||||
"os"
|
"os"
|
||||||
"strings"
|
"strings"
|
||||||
"testing"
|
"testing"
|
||||||
@@ -385,117 +379,6 @@ func TestDockerManagerCreation(t *testing.T) {
|
|||||||
assert.NotNil(t, dm.networkRecvTrackers)
|
assert.NotNil(t, dm.networkRecvTrackers)
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestCheckDockerVersion(t *testing.T) {
|
|
||||||
tests := []struct {
|
|
||||||
name string
|
|
||||||
responses []struct {
|
|
||||||
statusCode int
|
|
||||||
body string
|
|
||||||
}
|
|
||||||
expectedGood bool
|
|
||||||
expectedRequests int
|
|
||||||
}{
|
|
||||||
{
|
|
||||||
name: "200 with good version on first try",
|
|
||||||
responses: []struct {
|
|
||||||
statusCode int
|
|
||||||
body string
|
|
||||||
}{
|
|
||||||
{http.StatusOK, `{"Version":"25.0.1"}`},
|
|
||||||
},
|
|
||||||
expectedGood: true,
|
|
||||||
expectedRequests: 1,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "200 with old version on first try",
|
|
||||||
responses: []struct {
|
|
||||||
statusCode int
|
|
||||||
body string
|
|
||||||
}{
|
|
||||||
{http.StatusOK, `{"Version":"24.0.7"}`},
|
|
||||||
},
|
|
||||||
expectedGood: false,
|
|
||||||
expectedRequests: 1,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "non-200 then 200 with good version",
|
|
||||||
responses: []struct {
|
|
||||||
statusCode int
|
|
||||||
body string
|
|
||||||
}{
|
|
||||||
{http.StatusServiceUnavailable, `"not ready"`},
|
|
||||||
{http.StatusOK, `{"Version":"25.1.0"}`},
|
|
||||||
},
|
|
||||||
expectedGood: true,
|
|
||||||
expectedRequests: 2,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "non-200 on all retries",
|
|
||||||
responses: []struct {
|
|
||||||
statusCode int
|
|
||||||
body string
|
|
||||||
}{
|
|
||||||
{http.StatusInternalServerError, `"error"`},
|
|
||||||
{http.StatusUnauthorized, `"error"`},
|
|
||||||
},
|
|
||||||
expectedGood: false,
|
|
||||||
expectedRequests: 2,
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
for _, tt := range tests {
|
|
||||||
t.Run(tt.name, func(t *testing.T) {
|
|
||||||
requestCount := 0
|
|
||||||
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
|
||||||
idx := requestCount
|
|
||||||
requestCount++
|
|
||||||
if idx >= len(tt.responses) {
|
|
||||||
idx = len(tt.responses) - 1
|
|
||||||
}
|
|
||||||
w.WriteHeader(tt.responses[idx].statusCode)
|
|
||||||
fmt.Fprint(w, tt.responses[idx].body)
|
|
||||||
}))
|
|
||||||
defer server.Close()
|
|
||||||
|
|
||||||
dm := &dockerManager{
|
|
||||||
client: &http.Client{
|
|
||||||
Transport: &http.Transport{
|
|
||||||
DialContext: func(_ context.Context, network, _ string) (net.Conn, error) {
|
|
||||||
return net.Dial(network, server.Listener.Addr().String())
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
retrySleep: func(time.Duration) {},
|
|
||||||
}
|
|
||||||
|
|
||||||
dm.checkDockerVersion()
|
|
||||||
|
|
||||||
assert.Equal(t, tt.expectedGood, dm.goodDockerVersion)
|
|
||||||
assert.Equal(t, tt.expectedRequests, requestCount)
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
t.Run("request error on all retries", func(t *testing.T) {
|
|
||||||
requestCount := 0
|
|
||||||
dm := &dockerManager{
|
|
||||||
client: &http.Client{
|
|
||||||
Transport: &http.Transport{
|
|
||||||
DialContext: func(_ context.Context, _, _ string) (net.Conn, error) {
|
|
||||||
requestCount++
|
|
||||||
return nil, errors.New("connection refused")
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
retrySleep: func(time.Duration) {},
|
|
||||||
}
|
|
||||||
|
|
||||||
dm.checkDockerVersion()
|
|
||||||
|
|
||||||
assert.False(t, dm.goodDockerVersion)
|
|
||||||
assert.Equal(t, 2, requestCount)
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestCycleCpuDeltas(t *testing.T) {
|
func TestCycleCpuDeltas(t *testing.T) {
|
||||||
dm := &dockerManager{
|
dm := &dockerManager{
|
||||||
lastCpuContainer: map[uint16]map[string]uint64{
|
lastCpuContainer: map[uint16]map[string]uint64{
|
||||||
|
|||||||
@@ -1,95 +0,0 @@
|
|||||||
package agent
|
|
||||||
|
|
||||||
import (
|
|
||||||
"fmt"
|
|
||||||
"strconv"
|
|
||||||
"strings"
|
|
||||||
)
|
|
||||||
|
|
||||||
func isEmmcBlockName(name string) bool {
|
|
||||||
if !strings.HasPrefix(name, "mmcblk") {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
suffix := strings.TrimPrefix(name, "mmcblk")
|
|
||||||
if suffix == "" {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
for _, c := range suffix {
|
|
||||||
if c < '0' || c > '9' {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
|
|
||||||
func parseHexOrDecByte(s string) (uint8, bool) {
|
|
||||||
s = strings.TrimSpace(s)
|
|
||||||
if s == "" {
|
|
||||||
return 0, false
|
|
||||||
}
|
|
||||||
base := 10
|
|
||||||
if strings.HasPrefix(s, "0x") || strings.HasPrefix(s, "0X") {
|
|
||||||
base = 16
|
|
||||||
s = s[2:]
|
|
||||||
}
|
|
||||||
parsed, err := strconv.ParseUint(s, base, 8)
|
|
||||||
if err != nil {
|
|
||||||
return 0, false
|
|
||||||
}
|
|
||||||
return uint8(parsed), true
|
|
||||||
}
|
|
||||||
|
|
||||||
func parseHexBytePair(s string) (uint8, uint8, bool) {
|
|
||||||
fields := strings.Fields(s)
|
|
||||||
if len(fields) < 2 {
|
|
||||||
return 0, 0, false
|
|
||||||
}
|
|
||||||
a, okA := parseHexOrDecByte(fields[0])
|
|
||||||
b, okB := parseHexOrDecByte(fields[1])
|
|
||||||
if !okA && !okB {
|
|
||||||
return 0, 0, false
|
|
||||||
}
|
|
||||||
return a, b, true
|
|
||||||
}
|
|
||||||
|
|
||||||
func emmcSmartStatus(preEOL uint8) string {
|
|
||||||
switch preEOL {
|
|
||||||
case 0x01:
|
|
||||||
return "PASSED"
|
|
||||||
case 0x02:
|
|
||||||
return "WARNING"
|
|
||||||
case 0x03:
|
|
||||||
return "FAILED"
|
|
||||||
default:
|
|
||||||
return "UNKNOWN"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func emmcPreEOLString(preEOL uint8) string {
|
|
||||||
switch preEOL {
|
|
||||||
case 0x01:
|
|
||||||
return "0x01 (normal)"
|
|
||||||
case 0x02:
|
|
||||||
return "0x02 (warning)"
|
|
||||||
case 0x03:
|
|
||||||
return "0x03 (urgent)"
|
|
||||||
default:
|
|
||||||
return fmt.Sprintf("0x%02x", preEOL)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func emmcLifeTimeString(v uint8) string {
|
|
||||||
// JEDEC eMMC: 0x01..0x0A => 0-100% used in 10% steps, 0x0B => exceeded.
|
|
||||||
switch {
|
|
||||||
case v == 0:
|
|
||||||
return "0x00 (not reported)"
|
|
||||||
case v >= 0x01 && v <= 0x0A:
|
|
||||||
low := int(v-1) * 10
|
|
||||||
high := int(v) * 10
|
|
||||||
return fmt.Sprintf("0x%02x (%d-%d%% used)", v, low, high)
|
|
||||||
case v == 0x0B:
|
|
||||||
return "0x0b (>100% used)"
|
|
||||||
default:
|
|
||||||
return fmt.Sprintf("0x%02x", v)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,78 +0,0 @@
|
|||||||
package agent
|
|
||||||
|
|
||||||
import "testing"
|
|
||||||
|
|
||||||
func TestParseHexOrDecByte(t *testing.T) {
|
|
||||||
tests := []struct {
|
|
||||||
in string
|
|
||||||
want uint8
|
|
||||||
ok bool
|
|
||||||
}{
|
|
||||||
{"0x01", 1, true},
|
|
||||||
{"0X0b", 11, true},
|
|
||||||
{"01", 1, true},
|
|
||||||
{" 3 ", 3, true},
|
|
||||||
{"", 0, false},
|
|
||||||
{"0x", 0, false},
|
|
||||||
{"nope", 0, false},
|
|
||||||
}
|
|
||||||
|
|
||||||
for _, tt := range tests {
|
|
||||||
got, ok := parseHexOrDecByte(tt.in)
|
|
||||||
if ok != tt.ok || got != tt.want {
|
|
||||||
t.Fatalf("parseHexOrDecByte(%q) = (%d,%v), want (%d,%v)", tt.in, got, ok, tt.want, tt.ok)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestParseHexBytePair(t *testing.T) {
|
|
||||||
a, b, ok := parseHexBytePair("0x01 0x02\n")
|
|
||||||
if !ok || a != 1 || b != 2 {
|
|
||||||
t.Fatalf("parseHexBytePair hex = (%d,%d,%v), want (1,2,true)", a, b, ok)
|
|
||||||
}
|
|
||||||
|
|
||||||
a, b, ok = parseHexBytePair("01 02")
|
|
||||||
if !ok || a != 1 || b != 2 {
|
|
||||||
t.Fatalf("parseHexBytePair dec = (%d,%d,%v), want (1,2,true)", a, b, ok)
|
|
||||||
}
|
|
||||||
|
|
||||||
_, _, ok = parseHexBytePair("0x01")
|
|
||||||
if ok {
|
|
||||||
t.Fatalf("parseHexBytePair short input ok=true, want false")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestEmmcSmartStatus(t *testing.T) {
|
|
||||||
if got := emmcSmartStatus(0x01); got != "PASSED" {
|
|
||||||
t.Fatalf("emmcSmartStatus(0x01) = %q, want PASSED", got)
|
|
||||||
}
|
|
||||||
if got := emmcSmartStatus(0x02); got != "WARNING" {
|
|
||||||
t.Fatalf("emmcSmartStatus(0x02) = %q, want WARNING", got)
|
|
||||||
}
|
|
||||||
if got := emmcSmartStatus(0x03); got != "FAILED" {
|
|
||||||
t.Fatalf("emmcSmartStatus(0x03) = %q, want FAILED", got)
|
|
||||||
}
|
|
||||||
if got := emmcSmartStatus(0x00); got != "UNKNOWN" {
|
|
||||||
t.Fatalf("emmcSmartStatus(0x00) = %q, want UNKNOWN", got)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestIsEmmcBlockName(t *testing.T) {
|
|
||||||
cases := []struct {
|
|
||||||
name string
|
|
||||||
ok bool
|
|
||||||
}{
|
|
||||||
{"mmcblk0", true},
|
|
||||||
{"mmcblk1", true},
|
|
||||||
{"mmcblk10", true},
|
|
||||||
{"mmcblk0p1", false},
|
|
||||||
{"sda", false},
|
|
||||||
{"mmcblk", false},
|
|
||||||
{"mmcblkA", false},
|
|
||||||
}
|
|
||||||
for _, c := range cases {
|
|
||||||
if got := isEmmcBlockName(c.name); got != c.ok {
|
|
||||||
t.Fatalf("isEmmcBlockName(%q) = %v, want %v", c.name, got, c.ok)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,227 +0,0 @@
|
|||||||
//go:build linux
|
|
||||||
|
|
||||||
package agent
|
|
||||||
|
|
||||||
import (
|
|
||||||
"os"
|
|
||||||
"path/filepath"
|
|
||||||
"strconv"
|
|
||||||
"strings"
|
|
||||||
|
|
||||||
"github.com/henrygd/beszel/internal/entities/smart"
|
|
||||||
)
|
|
||||||
|
|
||||||
// emmcSysfsRoot is a test hook; production value is "/sys".
|
|
||||||
var emmcSysfsRoot = "/sys"
|
|
||||||
|
|
||||||
type emmcHealth struct {
|
|
||||||
model string
|
|
||||||
serial string
|
|
||||||
revision string
|
|
||||||
capacity uint64
|
|
||||||
preEOL uint8
|
|
||||||
lifeA uint8
|
|
||||||
lifeB uint8
|
|
||||||
}
|
|
||||||
|
|
||||||
func scanEmmcDevices() []*DeviceInfo {
|
|
||||||
blockDir := filepath.Join(emmcSysfsRoot, "class", "block")
|
|
||||||
entries, err := os.ReadDir(blockDir)
|
|
||||||
if err != nil {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
devices := make([]*DeviceInfo, 0, 2)
|
|
||||||
for _, ent := range entries {
|
|
||||||
name := ent.Name()
|
|
||||||
if !isEmmcBlockName(name) {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
deviceDir := filepath.Join(blockDir, name, "device")
|
|
||||||
if !hasEmmcHealthFiles(deviceDir) {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
devPath := filepath.Join("/dev", name)
|
|
||||||
devices = append(devices, &DeviceInfo{
|
|
||||||
Name: devPath,
|
|
||||||
Type: "emmc",
|
|
||||||
InfoName: devPath + " [eMMC]",
|
|
||||||
Protocol: "MMC",
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
return devices
|
|
||||||
}
|
|
||||||
|
|
||||||
func (sm *SmartManager) collectEmmcHealth(deviceInfo *DeviceInfo) (bool, error) {
|
|
||||||
if deviceInfo == nil || deviceInfo.Name == "" {
|
|
||||||
return false, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
base := filepath.Base(deviceInfo.Name)
|
|
||||||
if !isEmmcBlockName(base) && !strings.EqualFold(deviceInfo.Type, "emmc") && !strings.EqualFold(deviceInfo.Type, "mmc") {
|
|
||||||
return false, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
health, ok := readEmmcHealth(base)
|
|
||||||
if !ok {
|
|
||||||
return false, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// Normalize the device type to keep pruning logic stable across refreshes.
|
|
||||||
deviceInfo.Type = "emmc"
|
|
||||||
|
|
||||||
key := health.serial
|
|
||||||
if key == "" {
|
|
||||||
key = filepath.Join("/dev", base)
|
|
||||||
}
|
|
||||||
|
|
||||||
status := emmcSmartStatus(health.preEOL)
|
|
||||||
|
|
||||||
attrs := []*smart.SmartAttribute{
|
|
||||||
{
|
|
||||||
Name: "PreEOLInfo",
|
|
||||||
RawValue: uint64(health.preEOL),
|
|
||||||
RawString: emmcPreEOLString(health.preEOL),
|
|
||||||
},
|
|
||||||
{
|
|
||||||
Name: "DeviceLifeTimeEstA",
|
|
||||||
RawValue: uint64(health.lifeA),
|
|
||||||
RawString: emmcLifeTimeString(health.lifeA),
|
|
||||||
},
|
|
||||||
{
|
|
||||||
Name: "DeviceLifeTimeEstB",
|
|
||||||
RawValue: uint64(health.lifeB),
|
|
||||||
RawString: emmcLifeTimeString(health.lifeB),
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
sm.Lock()
|
|
||||||
defer sm.Unlock()
|
|
||||||
|
|
||||||
if _, exists := sm.SmartDataMap[key]; !exists {
|
|
||||||
sm.SmartDataMap[key] = &smart.SmartData{}
|
|
||||||
}
|
|
||||||
|
|
||||||
data := sm.SmartDataMap[key]
|
|
||||||
data.ModelName = health.model
|
|
||||||
data.SerialNumber = health.serial
|
|
||||||
data.FirmwareVersion = health.revision
|
|
||||||
data.Capacity = health.capacity
|
|
||||||
data.Temperature = 0
|
|
||||||
data.SmartStatus = status
|
|
||||||
data.DiskName = filepath.Join("/dev", base)
|
|
||||||
data.DiskType = "emmc"
|
|
||||||
data.Attributes = attrs
|
|
||||||
|
|
||||||
return true, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func readEmmcHealth(blockName string) (emmcHealth, bool) {
|
|
||||||
var out emmcHealth
|
|
||||||
|
|
||||||
if !isEmmcBlockName(blockName) {
|
|
||||||
return out, false
|
|
||||||
}
|
|
||||||
|
|
||||||
deviceDir := filepath.Join(emmcSysfsRoot, "class", "block", blockName, "device")
|
|
||||||
preEOL, okPre := readHexByteFile(filepath.Join(deviceDir, "pre_eol_info"))
|
|
||||||
|
|
||||||
// Some kernels expose EXT_CSD lifetime via "life_time" (two bytes), others as
|
|
||||||
// separate files. Support both.
|
|
||||||
lifeA, lifeB, okLife := readLifeTime(deviceDir)
|
|
||||||
|
|
||||||
if !okPre && !okLife {
|
|
||||||
return out, false
|
|
||||||
}
|
|
||||||
|
|
||||||
out.preEOL = preEOL
|
|
||||||
out.lifeA = lifeA
|
|
||||||
out.lifeB = lifeB
|
|
||||||
|
|
||||||
out.model = readStringFile(filepath.Join(deviceDir, "name"))
|
|
||||||
out.serial = readStringFile(filepath.Join(deviceDir, "serial"))
|
|
||||||
out.revision = readStringFile(filepath.Join(deviceDir, "prv"))
|
|
||||||
|
|
||||||
if capBytes, ok := readBlockCapacityBytes(blockName); ok {
|
|
||||||
out.capacity = capBytes
|
|
||||||
}
|
|
||||||
|
|
||||||
return out, true
|
|
||||||
}
|
|
||||||
|
|
||||||
func readLifeTime(deviceDir string) (uint8, uint8, bool) {
|
|
||||||
if content, ok := readStringFileOK(filepath.Join(deviceDir, "life_time")); ok {
|
|
||||||
a, b, ok := parseHexBytePair(content)
|
|
||||||
return a, b, ok
|
|
||||||
}
|
|
||||||
|
|
||||||
a, okA := readHexByteFile(filepath.Join(deviceDir, "device_life_time_est_typ_a"))
|
|
||||||
b, okB := readHexByteFile(filepath.Join(deviceDir, "device_life_time_est_typ_b"))
|
|
||||||
if okA || okB {
|
|
||||||
return a, b, true
|
|
||||||
}
|
|
||||||
return 0, 0, false
|
|
||||||
}
|
|
||||||
|
|
||||||
func readBlockCapacityBytes(blockName string) (uint64, bool) {
|
|
||||||
sizePath := filepath.Join(emmcSysfsRoot, "class", "block", blockName, "size")
|
|
||||||
lbsPath := filepath.Join(emmcSysfsRoot, "class", "block", blockName, "queue", "logical_block_size")
|
|
||||||
|
|
||||||
sizeStr, ok := readStringFileOK(sizePath)
|
|
||||||
if !ok {
|
|
||||||
return 0, false
|
|
||||||
}
|
|
||||||
sectors, err := strconv.ParseUint(sizeStr, 10, 64)
|
|
||||||
if err != nil || sectors == 0 {
|
|
||||||
return 0, false
|
|
||||||
}
|
|
||||||
|
|
||||||
lbsStr, ok := readStringFileOK(lbsPath)
|
|
||||||
logicalBlockSize := uint64(512)
|
|
||||||
if ok {
|
|
||||||
if parsed, err := strconv.ParseUint(lbsStr, 10, 64); err == nil && parsed > 0 {
|
|
||||||
logicalBlockSize = parsed
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return sectors * logicalBlockSize, true
|
|
||||||
}
|
|
||||||
|
|
||||||
func readHexByteFile(path string) (uint8, bool) {
|
|
||||||
content, ok := readStringFileOK(path)
|
|
||||||
if !ok {
|
|
||||||
return 0, false
|
|
||||||
}
|
|
||||||
b, ok := parseHexOrDecByte(content)
|
|
||||||
return b, ok
|
|
||||||
}
|
|
||||||
|
|
||||||
func readStringFile(path string) string {
|
|
||||||
content, _ := readStringFileOK(path)
|
|
||||||
return content
|
|
||||||
}
|
|
||||||
|
|
||||||
func readStringFileOK(path string) (string, bool) {
|
|
||||||
b, err := os.ReadFile(path)
|
|
||||||
if err != nil {
|
|
||||||
return "", false
|
|
||||||
}
|
|
||||||
return strings.TrimSpace(string(b)), true
|
|
||||||
}
|
|
||||||
|
|
||||||
func hasEmmcHealthFiles(deviceDir string) bool {
|
|
||||||
entries, err := os.ReadDir(deviceDir)
|
|
||||||
if err != nil {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
for _, ent := range entries {
|
|
||||||
switch ent.Name() {
|
|
||||||
case "pre_eol_info", "life_time", "device_life_time_est_typ_a", "device_life_time_est_typ_b":
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
@@ -1,80 +0,0 @@
|
|||||||
//go:build linux
|
|
||||||
|
|
||||||
package agent
|
|
||||||
|
|
||||||
import (
|
|
||||||
"os"
|
|
||||||
"path/filepath"
|
|
||||||
"testing"
|
|
||||||
|
|
||||||
"github.com/henrygd/beszel/internal/entities/smart"
|
|
||||||
)
|
|
||||||
|
|
||||||
func TestEmmcMockSysfsScanAndCollect(t *testing.T) {
|
|
||||||
tmp := t.TempDir()
|
|
||||||
prev := emmcSysfsRoot
|
|
||||||
emmcSysfsRoot = tmp
|
|
||||||
t.Cleanup(func() { emmcSysfsRoot = prev })
|
|
||||||
|
|
||||||
// Fake: /sys/class/block/mmcblk0
|
|
||||||
mmcDeviceDir := filepath.Join(tmp, "class", "block", "mmcblk0", "device")
|
|
||||||
mmcQueueDir := filepath.Join(tmp, "class", "block", "mmcblk0", "queue")
|
|
||||||
if err := os.MkdirAll(mmcDeviceDir, 0o755); err != nil {
|
|
||||||
t.Fatal(err)
|
|
||||||
}
|
|
||||||
if err := os.MkdirAll(mmcQueueDir, 0o755); err != nil {
|
|
||||||
t.Fatal(err)
|
|
||||||
}
|
|
||||||
|
|
||||||
write := func(path, content string) {
|
|
||||||
t.Helper()
|
|
||||||
if err := os.WriteFile(path, []byte(content), 0o644); err != nil {
|
|
||||||
t.Fatal(err)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
write(filepath.Join(mmcDeviceDir, "pre_eol_info"), "0x02\n")
|
|
||||||
write(filepath.Join(mmcDeviceDir, "life_time"), "0x04 0x05\n")
|
|
||||||
write(filepath.Join(mmcDeviceDir, "name"), "H26M52103FMR\n")
|
|
||||||
write(filepath.Join(mmcDeviceDir, "serial"), "01234567\n")
|
|
||||||
write(filepath.Join(mmcDeviceDir, "prv"), "0x08\n")
|
|
||||||
write(filepath.Join(mmcQueueDir, "logical_block_size"), "512\n")
|
|
||||||
write(filepath.Join(tmp, "class", "block", "mmcblk0", "size"), "1024\n") // sectors
|
|
||||||
|
|
||||||
devs := scanEmmcDevices()
|
|
||||||
if len(devs) != 1 {
|
|
||||||
t.Fatalf("scanEmmcDevices() = %d devices, want 1", len(devs))
|
|
||||||
}
|
|
||||||
if devs[0].Name != "/dev/mmcblk0" || devs[0].Type != "emmc" {
|
|
||||||
t.Fatalf("scanEmmcDevices()[0] = %+v, want Name=/dev/mmcblk0 Type=emmc", devs[0])
|
|
||||||
}
|
|
||||||
|
|
||||||
sm := &SmartManager{SmartDataMap: map[string]*smart.SmartData{}}
|
|
||||||
ok, err := sm.collectEmmcHealth(devs[0])
|
|
||||||
if err != nil || !ok {
|
|
||||||
t.Fatalf("collectEmmcHealth() = (ok=%v, err=%v), want (true,nil)", ok, err)
|
|
||||||
}
|
|
||||||
if len(sm.SmartDataMap) != 1 {
|
|
||||||
t.Fatalf("SmartDataMap len=%d, want 1", len(sm.SmartDataMap))
|
|
||||||
}
|
|
||||||
var got *smart.SmartData
|
|
||||||
for _, v := range sm.SmartDataMap {
|
|
||||||
got = v
|
|
||||||
break
|
|
||||||
}
|
|
||||||
if got == nil {
|
|
||||||
t.Fatalf("SmartDataMap value nil")
|
|
||||||
}
|
|
||||||
if got.DiskType != "emmc" || got.DiskName != "/dev/mmcblk0" {
|
|
||||||
t.Fatalf("disk fields = (type=%q name=%q), want (emmc,/dev/mmcblk0)", got.DiskType, got.DiskName)
|
|
||||||
}
|
|
||||||
if got.SmartStatus != "WARNING" {
|
|
||||||
t.Fatalf("SmartStatus=%q, want WARNING", got.SmartStatus)
|
|
||||||
}
|
|
||||||
if got.SerialNumber != "01234567" || got.ModelName == "" || got.Capacity == 0 {
|
|
||||||
t.Fatalf("identity fields = (model=%q serial=%q cap=%d), want non-empty model, serial 01234567, cap>0", got.ModelName, got.SerialNumber, got.Capacity)
|
|
||||||
}
|
|
||||||
if len(got.Attributes) < 3 {
|
|
||||||
t.Fatalf("attributes len=%d, want >= 3", len(got.Attributes))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,14 +0,0 @@
|
|||||||
//go:build !linux
|
|
||||||
|
|
||||||
package agent
|
|
||||||
|
|
||||||
// Non-Linux builds: eMMC health via sysfs is not available.
|
|
||||||
|
|
||||||
func scanEmmcDevices() []*DeviceInfo {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (sm *SmartManager) collectEmmcHealth(deviceInfo *DeviceInfo) (bool, error) {
|
|
||||||
return false, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
389
agent/gpu.go
389
agent/gpu.go
@@ -9,7 +9,6 @@ import (
|
|||||||
"maps"
|
"maps"
|
||||||
"os/exec"
|
"os/exec"
|
||||||
"regexp"
|
"regexp"
|
||||||
"runtime"
|
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
"sync"
|
"sync"
|
||||||
@@ -22,12 +21,13 @@ const (
|
|||||||
// Commands
|
// Commands
|
||||||
nvidiaSmiCmd string = "nvidia-smi"
|
nvidiaSmiCmd string = "nvidia-smi"
|
||||||
rocmSmiCmd string = "rocm-smi"
|
rocmSmiCmd string = "rocm-smi"
|
||||||
|
amdgpuCmd string = "amdgpu" // internal cmd for sysfs collection
|
||||||
tegraStatsCmd string = "tegrastats"
|
tegraStatsCmd string = "tegrastats"
|
||||||
nvtopCmd string = "nvtop"
|
|
||||||
powermetricsCmd string = "powermetrics"
|
|
||||||
macmonCmd string = "macmon"
|
|
||||||
noGPUFoundMsg string = "no GPU found - see https://beszel.dev/guide/gpu"
|
|
||||||
|
|
||||||
|
// Polling intervals
|
||||||
|
nvidiaSmiInterval string = "4" // in seconds
|
||||||
|
tegraStatsInterval string = "3700" // in milliseconds
|
||||||
|
rocmSmiInterval time.Duration = 4300 * time.Millisecond
|
||||||
// Command retry and timeout constants
|
// Command retry and timeout constants
|
||||||
retryWaitTime time.Duration = 5 * time.Second
|
retryWaitTime time.Duration = 5 * time.Second
|
||||||
maxFailureRetries int = 5
|
maxFailureRetries int = 5
|
||||||
@@ -40,6 +40,12 @@ const (
|
|||||||
// GPUManager manages data collection for GPUs (either Nvidia or AMD)
|
// GPUManager manages data collection for GPUs (either Nvidia or AMD)
|
||||||
type GPUManager struct {
|
type GPUManager struct {
|
||||||
sync.Mutex
|
sync.Mutex
|
||||||
|
nvidiaSmi bool
|
||||||
|
rocmSmi bool
|
||||||
|
amdgpu bool
|
||||||
|
tegrastats bool
|
||||||
|
intelGpuStats bool
|
||||||
|
nvml bool
|
||||||
GpuDataMap map[string]*system.GPUData
|
GpuDataMap map[string]*system.GPUData
|
||||||
// lastAvgData stores the last calculated averages for each GPU
|
// lastAvgData stores the last calculated averages for each GPU
|
||||||
// Used when a collection happens before new data arrives (Count == 0)
|
// Used when a collection happens before new data arrives (Count == 0)
|
||||||
@@ -81,58 +87,6 @@ type gpuCollector struct {
|
|||||||
|
|
||||||
var errNoValidData = fmt.Errorf("no valid GPU data found") // Error for missing data
|
var errNoValidData = fmt.Errorf("no valid GPU data found") // Error for missing data
|
||||||
|
|
||||||
// collectorSource identifies a selectable GPU collector in GPU_COLLECTOR.
|
|
||||||
type collectorSource string
|
|
||||||
|
|
||||||
const (
|
|
||||||
collectorSourceNVTop collectorSource = collectorSource(nvtopCmd)
|
|
||||||
collectorSourceNVML collectorSource = "nvml"
|
|
||||||
collectorSourceNvidiaSMI collectorSource = collectorSource(nvidiaSmiCmd)
|
|
||||||
collectorSourceIntelGpuTop collectorSource = collectorSource(intelGpuStatsCmd)
|
|
||||||
collectorSourceAmdSysfs collectorSource = "amd_sysfs"
|
|
||||||
collectorSourceRocmSMI collectorSource = collectorSource(rocmSmiCmd)
|
|
||||||
collectorSourceMacmon collectorSource = collectorSource(macmonCmd)
|
|
||||||
collectorSourcePowermetrics collectorSource = collectorSource(powermetricsCmd)
|
|
||||||
collectorGroupNvidia string = "nvidia"
|
|
||||||
collectorGroupIntel string = "intel"
|
|
||||||
collectorGroupAmd string = "amd"
|
|
||||||
collectorGroupApple string = "apple"
|
|
||||||
)
|
|
||||||
|
|
||||||
func isValidCollectorSource(source collectorSource) bool {
|
|
||||||
switch source {
|
|
||||||
case collectorSourceNVTop,
|
|
||||||
collectorSourceNVML,
|
|
||||||
collectorSourceNvidiaSMI,
|
|
||||||
collectorSourceIntelGpuTop,
|
|
||||||
collectorSourceAmdSysfs,
|
|
||||||
collectorSourceRocmSMI,
|
|
||||||
collectorSourceMacmon,
|
|
||||||
collectorSourcePowermetrics:
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
|
|
||||||
// gpuCapabilities describes detected GPU tooling and sysfs support on the host.
|
|
||||||
type gpuCapabilities struct {
|
|
||||||
hasNvidiaSmi bool
|
|
||||||
hasRocmSmi bool
|
|
||||||
hasAmdSysfs bool
|
|
||||||
hasTegrastats bool
|
|
||||||
hasIntelGpuTop bool
|
|
||||||
hasNvtop bool
|
|
||||||
hasMacmon bool
|
|
||||||
hasPowermetrics bool
|
|
||||||
}
|
|
||||||
|
|
||||||
type collectorDefinition struct {
|
|
||||||
group string
|
|
||||||
available bool
|
|
||||||
start func(onFailure func()) bool
|
|
||||||
deprecationWarning string
|
|
||||||
}
|
|
||||||
|
|
||||||
// starts and manages the ongoing collection of GPU data for the specified GPU management utility
|
// starts and manages the ongoing collection of GPU data for the specified GPU management utility
|
||||||
func (c *gpuCollector) start() {
|
func (c *gpuCollector) start() {
|
||||||
for {
|
for {
|
||||||
@@ -438,43 +392,44 @@ func (gm *GPUManager) storeSnapshot(id string, gpu *system.GPUData, cacheKey uin
|
|||||||
gm.lastSnapshots[cacheKey][id] = snapshot
|
gm.lastSnapshots[cacheKey][id] = snapshot
|
||||||
}
|
}
|
||||||
|
|
||||||
// discoverGpuCapabilities checks for available GPU tooling and sysfs support.
|
// detectGPUs checks for the presence of GPU management tools (nvidia-smi, rocm-smi, tegrastats)
|
||||||
// It only reports capability presence and does not apply policy decisions.
|
// in the system path. It sets the corresponding flags in the GPUManager struct if any of these
|
||||||
func (gm *GPUManager) discoverGpuCapabilities() gpuCapabilities {
|
// tools are found. If none of the tools are found, it returns an error indicating that no GPU
|
||||||
caps := gpuCapabilities{
|
// management tools are available.
|
||||||
hasAmdSysfs: gm.hasAmdSysfs(),
|
func (gm *GPUManager) detectGPUs() error {
|
||||||
}
|
|
||||||
if _, err := exec.LookPath(nvidiaSmiCmd); err == nil {
|
if _, err := exec.LookPath(nvidiaSmiCmd); err == nil {
|
||||||
caps.hasNvidiaSmi = true
|
gm.nvidiaSmi = true
|
||||||
}
|
}
|
||||||
if _, err := exec.LookPath(rocmSmiCmd); err == nil {
|
if _, err := exec.LookPath(rocmSmiCmd); err == nil {
|
||||||
caps.hasRocmSmi = true
|
if val, _ := GetEnv("AMD_SYSFS"); val == "true" {
|
||||||
|
gm.amdgpu = true
|
||||||
|
} else {
|
||||||
|
gm.rocmSmi = true
|
||||||
|
}
|
||||||
|
} else if gm.hasAmdSysfs() {
|
||||||
|
gm.amdgpu = true
|
||||||
}
|
}
|
||||||
if _, err := exec.LookPath(tegraStatsCmd); err == nil {
|
if _, err := exec.LookPath(tegraStatsCmd); err == nil {
|
||||||
caps.hasTegrastats = true
|
gm.tegrastats = true
|
||||||
|
gm.nvidiaSmi = false
|
||||||
}
|
}
|
||||||
if _, err := exec.LookPath(intelGpuStatsCmd); err == nil {
|
if _, err := exec.LookPath(intelGpuStatsCmd); err == nil {
|
||||||
caps.hasIntelGpuTop = true
|
gm.intelGpuStats = true
|
||||||
}
|
}
|
||||||
if _, err := exec.LookPath(nvtopCmd); err == nil {
|
if gm.nvidiaSmi || gm.rocmSmi || gm.amdgpu || gm.tegrastats || gm.intelGpuStats || gm.nvml {
|
||||||
caps.hasNvtop = true
|
return nil
|
||||||
}
|
}
|
||||||
if runtime.GOOS == "darwin" {
|
return fmt.Errorf("no GPU found - install nvidia-smi, rocm-smi, or intel_gpu_top")
|
||||||
if _, err := exec.LookPath(macmonCmd); err == nil {
|
|
||||||
caps.hasMacmon = true
|
|
||||||
}
|
|
||||||
if _, err := exec.LookPath(powermetricsCmd); err == nil {
|
|
||||||
caps.hasPowermetrics = true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return caps
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func hasAnyGpuCollector(caps gpuCapabilities) bool {
|
// startCollector starts the appropriate GPU data collector based on the command
|
||||||
return caps.hasNvidiaSmi || caps.hasRocmSmi || caps.hasAmdSysfs || caps.hasTegrastats || caps.hasIntelGpuTop || caps.hasNvtop || caps.hasMacmon || caps.hasPowermetrics
|
func (gm *GPUManager) startCollector(command string) {
|
||||||
}
|
collector := gpuCollector{
|
||||||
|
name: command,
|
||||||
func (gm *GPUManager) startIntelCollector() {
|
bufSize: 10 * 1024,
|
||||||
|
}
|
||||||
|
switch command {
|
||||||
|
case intelGpuStatsCmd:
|
||||||
go func() {
|
go func() {
|
||||||
failures := 0
|
failures := 0
|
||||||
for {
|
for {
|
||||||
@@ -489,39 +444,27 @@ func (gm *GPUManager) startIntelCollector() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}()
|
}()
|
||||||
}
|
case nvidiaSmiCmd:
|
||||||
|
collector.cmdArgs = []string{
|
||||||
func (gm *GPUManager) startNvidiaSmiCollector(intervalSeconds string) {
|
"-l", nvidiaSmiInterval,
|
||||||
collector := gpuCollector{
|
|
||||||
name: nvidiaSmiCmd,
|
|
||||||
bufSize: 10 * 1024,
|
|
||||||
cmdArgs: []string{
|
|
||||||
"-l", intervalSeconds,
|
|
||||||
"--query-gpu=index,name,temperature.gpu,memory.used,memory.total,utilization.gpu,power.draw",
|
"--query-gpu=index,name,temperature.gpu,memory.used,memory.total,utilization.gpu,power.draw",
|
||||||
"--format=csv,noheader,nounits",
|
"--format=csv,noheader,nounits",
|
||||||
},
|
|
||||||
parse: gm.parseNvidiaData,
|
|
||||||
}
|
}
|
||||||
|
collector.parse = gm.parseNvidiaData
|
||||||
go collector.start()
|
go collector.start()
|
||||||
}
|
case tegraStatsCmd:
|
||||||
|
collector.cmdArgs = []string{"--interval", tegraStatsInterval}
|
||||||
func (gm *GPUManager) startTegraStatsCollector(intervalMilliseconds string) {
|
collector.parse = gm.getJetsonParser()
|
||||||
collector := gpuCollector{
|
|
||||||
name: tegraStatsCmd,
|
|
||||||
bufSize: 10 * 1024,
|
|
||||||
cmdArgs: []string{"--interval", intervalMilliseconds},
|
|
||||||
parse: gm.getJetsonParser(),
|
|
||||||
}
|
|
||||||
go collector.start()
|
go collector.start()
|
||||||
}
|
case amdgpuCmd:
|
||||||
|
go func() {
|
||||||
func (gm *GPUManager) startRocmSmiCollector(pollInterval time.Duration) {
|
if err := gm.collectAmdStats(); err != nil {
|
||||||
collector := gpuCollector{
|
slog.Warn("Error collecting AMD GPU data via sysfs", "err", err)
|
||||||
name: rocmSmiCmd,
|
|
||||||
bufSize: 10 * 1024,
|
|
||||||
cmdArgs: []string{"--showid", "--showtemp", "--showuse", "--showpower", "--showproductname", "--showmeminfo", "vram", "--json"},
|
|
||||||
parse: gm.parseAmdData,
|
|
||||||
}
|
}
|
||||||
|
}()
|
||||||
|
case rocmSmiCmd:
|
||||||
|
collector.cmdArgs = []string{"--showid", "--showtemp", "--showuse", "--showpower", "--showproductname", "--showmeminfo", "vram", "--json"}
|
||||||
|
collector.parse = gm.parseAmdData
|
||||||
go func() {
|
go func() {
|
||||||
failures := 0
|
failures := 0
|
||||||
for {
|
for {
|
||||||
@@ -532,222 +475,50 @@ func (gm *GPUManager) startRocmSmiCollector(pollInterval time.Duration) {
|
|||||||
}
|
}
|
||||||
slog.Warn("Error collecting AMD GPU data via rocm-smi", "err", err)
|
slog.Warn("Error collecting AMD GPU data via rocm-smi", "err", err)
|
||||||
}
|
}
|
||||||
time.Sleep(pollInterval)
|
time.Sleep(rocmSmiInterval)
|
||||||
}
|
}
|
||||||
}()
|
}()
|
||||||
}
|
|
||||||
|
|
||||||
func (gm *GPUManager) collectorDefinitions(caps gpuCapabilities) map[collectorSource]collectorDefinition {
|
|
||||||
return map[collectorSource]collectorDefinition{
|
|
||||||
collectorSourceNVML: {
|
|
||||||
group: collectorGroupNvidia,
|
|
||||||
available: caps.hasNvidiaSmi,
|
|
||||||
start: func(_ func()) bool {
|
|
||||||
return gm.startNvmlCollector()
|
|
||||||
},
|
|
||||||
},
|
|
||||||
collectorSourceNvidiaSMI: {
|
|
||||||
group: collectorGroupNvidia,
|
|
||||||
available: caps.hasNvidiaSmi,
|
|
||||||
start: func(_ func()) bool {
|
|
||||||
gm.startNvidiaSmiCollector("4") // seconds
|
|
||||||
return true
|
|
||||||
},
|
|
||||||
},
|
|
||||||
collectorSourceIntelGpuTop: {
|
|
||||||
group: collectorGroupIntel,
|
|
||||||
available: caps.hasIntelGpuTop,
|
|
||||||
start: func(_ func()) bool {
|
|
||||||
gm.startIntelCollector()
|
|
||||||
return true
|
|
||||||
},
|
|
||||||
},
|
|
||||||
collectorSourceAmdSysfs: {
|
|
||||||
group: collectorGroupAmd,
|
|
||||||
available: caps.hasAmdSysfs,
|
|
||||||
start: func(_ func()) bool {
|
|
||||||
return gm.startAmdSysfsCollector()
|
|
||||||
},
|
|
||||||
},
|
|
||||||
collectorSourceRocmSMI: {
|
|
||||||
group: collectorGroupAmd,
|
|
||||||
available: caps.hasRocmSmi,
|
|
||||||
deprecationWarning: "rocm-smi is deprecated and may be removed in a future release",
|
|
||||||
start: func(_ func()) bool {
|
|
||||||
gm.startRocmSmiCollector(4300 * time.Millisecond)
|
|
||||||
return true
|
|
||||||
},
|
|
||||||
},
|
|
||||||
collectorSourceNVTop: {
|
|
||||||
available: caps.hasNvtop,
|
|
||||||
start: func(onFailure func()) bool {
|
|
||||||
gm.startNvtopCollector("30", onFailure) // tens of milliseconds
|
|
||||||
return true
|
|
||||||
},
|
|
||||||
},
|
|
||||||
collectorSourceMacmon: {
|
|
||||||
group: collectorGroupApple,
|
|
||||||
available: caps.hasMacmon,
|
|
||||||
start: func(_ func()) bool {
|
|
||||||
gm.startMacmonCollector()
|
|
||||||
return true
|
|
||||||
},
|
|
||||||
},
|
|
||||||
collectorSourcePowermetrics: {
|
|
||||||
group: collectorGroupApple,
|
|
||||||
available: caps.hasPowermetrics,
|
|
||||||
start: func(_ func()) bool {
|
|
||||||
gm.startPowermetricsCollector()
|
|
||||||
return true
|
|
||||||
},
|
|
||||||
},
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// parseCollectorPriority parses GPU_COLLECTOR and returns valid ordered entries.
|
|
||||||
func parseCollectorPriority(value string) []collectorSource {
|
|
||||||
parts := strings.Split(value, ",")
|
|
||||||
priorities := make([]collectorSource, 0, len(parts))
|
|
||||||
for _, raw := range parts {
|
|
||||||
name := collectorSource(strings.TrimSpace(strings.ToLower(raw)))
|
|
||||||
if !isValidCollectorSource(name) {
|
|
||||||
if name != "" {
|
|
||||||
slog.Warn("Ignoring unknown GPU collector", "collector", name)
|
|
||||||
}
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
priorities = append(priorities, name)
|
|
||||||
}
|
|
||||||
return priorities
|
|
||||||
}
|
|
||||||
|
|
||||||
// startNvmlCollector initializes NVML and starts its polling loop.
|
|
||||||
func (gm *GPUManager) startNvmlCollector() bool {
|
|
||||||
collector := &nvmlCollector{gm: gm}
|
|
||||||
if err := collector.init(); err != nil {
|
|
||||||
slog.Warn("Failed to initialize NVML", "err", err)
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
go collector.start()
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
|
|
||||||
// startAmdSysfsCollector starts AMD GPU collection via sysfs.
|
|
||||||
func (gm *GPUManager) startAmdSysfsCollector() bool {
|
|
||||||
go func() {
|
|
||||||
if err := gm.collectAmdStats(); err != nil {
|
|
||||||
slog.Warn("Error collecting AMD GPU data via sysfs", "err", err)
|
|
||||||
}
|
|
||||||
}()
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
|
|
||||||
// startCollectorsByPriority starts collectors in order with one source per vendor group.
|
|
||||||
func (gm *GPUManager) startCollectorsByPriority(priorities []collectorSource, caps gpuCapabilities) int {
|
|
||||||
definitions := gm.collectorDefinitions(caps)
|
|
||||||
selectedGroups := make(map[string]bool, 3)
|
|
||||||
started := 0
|
|
||||||
for i, source := range priorities {
|
|
||||||
definition, ok := definitions[source]
|
|
||||||
if !ok || !definition.available {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
// nvtop is not a vendor-specific collector, so should only be used if no other collectors are selected or it is first in GPU_COLLECTOR.
|
|
||||||
if source == collectorSourceNVTop {
|
|
||||||
if len(selectedGroups) > 0 {
|
|
||||||
slog.Warn("Skipping nvtop because other collectors are selected")
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
// if nvtop fails, fall back to remaining collectors.
|
|
||||||
remaining := append([]collectorSource(nil), priorities[i+1:]...)
|
|
||||||
if definition.start(func() {
|
|
||||||
gm.startCollectorsByPriority(remaining, caps)
|
|
||||||
}) {
|
|
||||||
started++
|
|
||||||
return started
|
|
||||||
}
|
|
||||||
}
|
|
||||||
group := definition.group
|
|
||||||
if group == "" || selectedGroups[group] {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
if definition.deprecationWarning != "" {
|
|
||||||
slog.Warn(definition.deprecationWarning)
|
|
||||||
}
|
|
||||||
if definition.start(nil) {
|
|
||||||
selectedGroups[group] = true
|
|
||||||
started++
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return started
|
|
||||||
}
|
|
||||||
|
|
||||||
// resolveLegacyCollectorPriority builds the default collector order when GPU_COLLECTOR is unset.
|
|
||||||
func (gm *GPUManager) resolveLegacyCollectorPriority(caps gpuCapabilities) []collectorSource {
|
|
||||||
priorities := make([]collectorSource, 0, 4)
|
|
||||||
|
|
||||||
if caps.hasNvidiaSmi && !caps.hasTegrastats {
|
|
||||||
if nvml, _ := GetEnv("NVML"); nvml == "true" {
|
|
||||||
priorities = append(priorities, collectorSourceNVML, collectorSourceNvidiaSMI)
|
|
||||||
} else {
|
|
||||||
priorities = append(priorities, collectorSourceNvidiaSMI)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if caps.hasRocmSmi {
|
|
||||||
if val, _ := GetEnv("AMD_SYSFS"); val == "true" {
|
|
||||||
priorities = append(priorities, collectorSourceAmdSysfs)
|
|
||||||
} else {
|
|
||||||
priorities = append(priorities, collectorSourceRocmSMI)
|
|
||||||
}
|
|
||||||
} else if caps.hasAmdSysfs {
|
|
||||||
priorities = append(priorities, collectorSourceAmdSysfs)
|
|
||||||
}
|
|
||||||
|
|
||||||
if caps.hasIntelGpuTop {
|
|
||||||
priorities = append(priorities, collectorSourceIntelGpuTop)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Apple collectors are currently opt-in only.
|
|
||||||
// Enable them with GPU_COLLECTOR=macmon or GPU_COLLECTOR=powermetrics.
|
|
||||||
|
|
||||||
// Keep nvtop as a last resort only when no vendor collector exists.
|
|
||||||
if len(priorities) == 0 && caps.hasNvtop {
|
|
||||||
priorities = append(priorities, collectorSourceNVTop)
|
|
||||||
}
|
|
||||||
return priorities
|
|
||||||
}
|
|
||||||
|
|
||||||
// NewGPUManager creates and initializes a new GPUManager
|
// NewGPUManager creates and initializes a new GPUManager
|
||||||
func NewGPUManager() (*GPUManager, error) {
|
func NewGPUManager() (*GPUManager, error) {
|
||||||
if skipGPU, _ := GetEnv("SKIP_GPU"); skipGPU == "true" {
|
if skipGPU, _ := GetEnv("SKIP_GPU"); skipGPU == "true" {
|
||||||
return nil, nil
|
return nil, nil
|
||||||
}
|
}
|
||||||
var gm GPUManager
|
var gm GPUManager
|
||||||
caps := gm.discoverGpuCapabilities()
|
if err := gm.detectGPUs(); err != nil {
|
||||||
if !hasAnyGpuCollector(caps) {
|
return nil, err
|
||||||
return nil, fmt.Errorf(noGPUFoundMsg)
|
|
||||||
}
|
}
|
||||||
gm.GpuDataMap = make(map[string]*system.GPUData)
|
gm.GpuDataMap = make(map[string]*system.GPUData)
|
||||||
|
|
||||||
// Jetson devices should always use tegrastats (ignore GPU_COLLECTOR).
|
if gm.nvidiaSmi {
|
||||||
if caps.hasTegrastats {
|
if nvml, _ := GetEnv("NVML"); nvml == "true" {
|
||||||
gm.startTegraStatsCollector("3700")
|
gm.nvml = true
|
||||||
return &gm, nil
|
gm.nvidiaSmi = false
|
||||||
|
collector := &nvmlCollector{gm: &gm}
|
||||||
|
if err := collector.init(); err == nil {
|
||||||
|
go collector.start()
|
||||||
|
} else {
|
||||||
|
slog.Warn("Failed to initialize NVML, falling back to nvidia-smi", "err", err)
|
||||||
|
gm.nvidiaSmi = true
|
||||||
|
gm.startCollector(nvidiaSmiCmd)
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
// if GPU_COLLECTOR is set, start user-defined collectors.
|
gm.startCollector(nvidiaSmiCmd)
|
||||||
if collectorConfig, ok := GetEnv("GPU_COLLECTOR"); ok && strings.TrimSpace(collectorConfig) != "" {
|
|
||||||
priorities := parseCollectorPriority(collectorConfig)
|
|
||||||
if gm.startCollectorsByPriority(priorities, caps) == 0 {
|
|
||||||
return nil, fmt.Errorf("no configured GPU collectors are available")
|
|
||||||
}
|
}
|
||||||
return &gm, nil
|
|
||||||
}
|
}
|
||||||
|
if gm.rocmSmi {
|
||||||
// auto-detect and start collectors when GPU_COLLECTOR is unset.
|
gm.startCollector(rocmSmiCmd)
|
||||||
if gm.startCollectorsByPriority(gm.resolveLegacyCollectorPriority(caps), caps) == 0 {
|
}
|
||||||
return nil, fmt.Errorf(noGPUFoundMsg)
|
if gm.amdgpu {
|
||||||
|
gm.startCollector(amdgpuCmd)
|
||||||
|
}
|
||||||
|
if gm.tegrastats {
|
||||||
|
gm.startCollector(tegraStatsCmd)
|
||||||
|
}
|
||||||
|
if gm.intelGpuStats {
|
||||||
|
gm.startCollector(intelGpuStatsCmd)
|
||||||
}
|
}
|
||||||
|
|
||||||
return &gm, nil
|
return &gm, nil
|
||||||
|
|||||||
@@ -3,7 +3,6 @@
|
|||||||
package agent
|
package agent
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"bufio"
|
|
||||||
"fmt"
|
"fmt"
|
||||||
"log/slog"
|
"log/slog"
|
||||||
"os"
|
"os"
|
||||||
@@ -16,15 +15,6 @@ import (
|
|||||||
"github.com/henrygd/beszel/internal/entities/system"
|
"github.com/henrygd/beszel/internal/entities/system"
|
||||||
)
|
)
|
||||||
|
|
||||||
var amdgpuNameCache = struct {
|
|
||||||
sync.RWMutex
|
|
||||||
hits map[string]string
|
|
||||||
misses map[string]struct{}
|
|
||||||
}{
|
|
||||||
hits: make(map[string]string),
|
|
||||||
misses: make(map[string]struct{}),
|
|
||||||
}
|
|
||||||
|
|
||||||
// hasAmdSysfs returns true if any AMD GPU sysfs nodes are found
|
// hasAmdSysfs returns true if any AMD GPU sysfs nodes are found
|
||||||
func (gm *GPUManager) hasAmdSysfs() bool {
|
func (gm *GPUManager) hasAmdSysfs() bool {
|
||||||
cards, err := filepath.Glob("/sys/class/drm/card*/device/vendor")
|
cards, err := filepath.Glob("/sys/class/drm/card*/device/vendor")
|
||||||
@@ -42,7 +32,6 @@ func (gm *GPUManager) hasAmdSysfs() bool {
|
|||||||
|
|
||||||
// collectAmdStats collects AMD GPU metrics directly from sysfs to avoid the overhead of rocm-smi
|
// collectAmdStats collects AMD GPU metrics directly from sysfs to avoid the overhead of rocm-smi
|
||||||
func (gm *GPUManager) collectAmdStats() error {
|
func (gm *GPUManager) collectAmdStats() error {
|
||||||
sysfsPollInterval := 3000 * time.Millisecond
|
|
||||||
cards, err := filepath.Glob("/sys/class/drm/card*")
|
cards, err := filepath.Glob("/sys/class/drm/card*")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
@@ -81,11 +70,10 @@ func (gm *GPUManager) collectAmdStats() error {
|
|||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
failures = 0
|
failures = 0
|
||||||
time.Sleep(sysfsPollInterval)
|
time.Sleep(rocmSmiInterval)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// isAmdGpu checks whether a DRM card path belongs to AMD vendor ID 0x1002.
|
|
||||||
func isAmdGpu(cardPath string) bool {
|
func isAmdGpu(cardPath string) bool {
|
||||||
vendorPath := filepath.Join(cardPath, "device/vendor")
|
vendorPath := filepath.Join(cardPath, "device/vendor")
|
||||||
vendor, err := os.ReadFile(vendorPath)
|
vendor, err := os.ReadFile(vendorPath)
|
||||||
@@ -103,17 +91,8 @@ func (gm *GPUManager) updateAmdGpuData(cardPath string) bool {
|
|||||||
|
|
||||||
// Read all sysfs values first (no lock needed - these can be slow)
|
// Read all sysfs values first (no lock needed - these can be slow)
|
||||||
usage, usageErr := readSysfsFloat(filepath.Join(devicePath, "gpu_busy_percent"))
|
usage, usageErr := readSysfsFloat(filepath.Join(devicePath, "gpu_busy_percent"))
|
||||||
vramUsed, memUsedErr := readSysfsFloat(filepath.Join(devicePath, "mem_info_vram_used"))
|
memUsed, memUsedErr := readSysfsFloat(filepath.Join(devicePath, "mem_info_vram_used"))
|
||||||
vramTotal, _ := readSysfsFloat(filepath.Join(devicePath, "mem_info_vram_total"))
|
memTotal, _ := readSysfsFloat(filepath.Join(devicePath, "mem_info_vram_total"))
|
||||||
memUsed := vramUsed
|
|
||||||
memTotal := vramTotal
|
|
||||||
// if gtt is present, add it to the memory used and total (https://github.com/henrygd/beszel/issues/1569#issuecomment-3837640484)
|
|
||||||
if gttUsed, err := readSysfsFloat(filepath.Join(devicePath, "mem_info_gtt_used")); err == nil && gttUsed > 0 {
|
|
||||||
if gttTotal, err := readSysfsFloat(filepath.Join(devicePath, "mem_info_gtt_total")); err == nil {
|
|
||||||
memUsed += gttUsed
|
|
||||||
memTotal += gttTotal
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
var temp, power float64
|
var temp, power float64
|
||||||
hwmons, _ := filepath.Glob(filepath.Join(devicePath, "hwmon/hwmon*"))
|
hwmons, _ := filepath.Glob(filepath.Join(devicePath, "hwmon/hwmon*"))
|
||||||
@@ -154,7 +133,6 @@ func (gm *GPUManager) updateAmdGpuData(cardPath string) bool {
|
|||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
// readSysfsFloat reads and parses a numeric value from a sysfs file.
|
|
||||||
func readSysfsFloat(path string) (float64, error) {
|
func readSysfsFloat(path string) (float64, error) {
|
||||||
val, err := os.ReadFile(path)
|
val, err := os.ReadFile(path)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@@ -163,110 +141,6 @@ func readSysfsFloat(path string) (float64, error) {
|
|||||||
return strconv.ParseFloat(strings.TrimSpace(string(val)), 64)
|
return strconv.ParseFloat(strings.TrimSpace(string(val)), 64)
|
||||||
}
|
}
|
||||||
|
|
||||||
// normalizeHexID normalizes hex IDs by trimming spaces, lowercasing, and dropping 0x.
|
|
||||||
func normalizeHexID(id string) string {
|
|
||||||
return strings.TrimPrefix(strings.ToLower(strings.TrimSpace(id)), "0x")
|
|
||||||
}
|
|
||||||
|
|
||||||
// cacheKeyForAmdgpu builds the cache key for a device and optional revision.
|
|
||||||
func cacheKeyForAmdgpu(deviceID, revisionID string) string {
|
|
||||||
if revisionID != "" {
|
|
||||||
return deviceID + ":" + revisionID
|
|
||||||
}
|
|
||||||
return deviceID
|
|
||||||
}
|
|
||||||
|
|
||||||
// lookupAmdgpuNameInFile resolves an AMDGPU name from amdgpu.ids by device/revision.
|
|
||||||
func lookupAmdgpuNameInFile(deviceID, revisionID, filePath string) (name string, exact bool, found bool) {
|
|
||||||
file, err := os.Open(filePath)
|
|
||||||
if err != nil {
|
|
||||||
return "", false, false
|
|
||||||
}
|
|
||||||
defer file.Close()
|
|
||||||
|
|
||||||
var byDevice string
|
|
||||||
scanner := bufio.NewScanner(file)
|
|
||||||
for scanner.Scan() {
|
|
||||||
line := strings.TrimSpace(scanner.Text())
|
|
||||||
if line == "" || strings.HasPrefix(line, "#") {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
parts := strings.SplitN(line, ",", 3)
|
|
||||||
if len(parts) != 3 {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
dev := normalizeHexID(parts[0])
|
|
||||||
rev := normalizeHexID(parts[1])
|
|
||||||
productName := strings.TrimSpace(parts[2])
|
|
||||||
if dev == "" || productName == "" || dev != deviceID {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
if byDevice == "" {
|
|
||||||
byDevice = productName
|
|
||||||
}
|
|
||||||
if revisionID != "" && rev == revisionID {
|
|
||||||
return productName, true, true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if byDevice != "" {
|
|
||||||
return byDevice, false, true
|
|
||||||
}
|
|
||||||
return "", false, false
|
|
||||||
}
|
|
||||||
|
|
||||||
// getCachedAmdgpuName returns cached hit/miss status for the given device/revision.
|
|
||||||
func getCachedAmdgpuName(deviceID, revisionID string) (name string, found bool, done bool) {
|
|
||||||
// Build the list of cache keys to check. We always look up the exact device+revision key.
|
|
||||||
// When revisionID is set, we also look up deviceID alone, since the cache may store a
|
|
||||||
// device-only fallback when we couldn't resolve the exact revision.
|
|
||||||
keys := []string{cacheKeyForAmdgpu(deviceID, revisionID)}
|
|
||||||
if revisionID != "" {
|
|
||||||
keys = append(keys, deviceID)
|
|
||||||
}
|
|
||||||
|
|
||||||
knownMisses := 0
|
|
||||||
amdgpuNameCache.RLock()
|
|
||||||
defer amdgpuNameCache.RUnlock()
|
|
||||||
for _, key := range keys {
|
|
||||||
if name, ok := amdgpuNameCache.hits[key]; ok {
|
|
||||||
return name, true, true
|
|
||||||
}
|
|
||||||
if _, ok := amdgpuNameCache.misses[key]; ok {
|
|
||||||
knownMisses++
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// done=true means "don't bother doing slow lookup": we either found a name (above) or
|
|
||||||
// every key we checked was already a known miss, so we've tried before and failed.
|
|
||||||
return "", false, knownMisses == len(keys)
|
|
||||||
}
|
|
||||||
|
|
||||||
// normalizeAmdgpuName trims standard suffixes from AMDGPU product names.
|
|
||||||
func normalizeAmdgpuName(name string) string {
|
|
||||||
return strings.TrimSuffix(strings.TrimSpace(name), " Graphics")
|
|
||||||
}
|
|
||||||
|
|
||||||
// cacheAmdgpuName stores a resolved AMDGPU name in the lookup cache.
|
|
||||||
func cacheAmdgpuName(deviceID, revisionID, name string, exact bool) {
|
|
||||||
name = normalizeAmdgpuName(name)
|
|
||||||
amdgpuNameCache.Lock()
|
|
||||||
defer amdgpuNameCache.Unlock()
|
|
||||||
if exact && revisionID != "" {
|
|
||||||
amdgpuNameCache.hits[cacheKeyForAmdgpu(deviceID, revisionID)] = name
|
|
||||||
}
|
|
||||||
amdgpuNameCache.hits[deviceID] = name
|
|
||||||
}
|
|
||||||
|
|
||||||
// cacheMissingAmdgpuName records unresolved device/revision lookups.
|
|
||||||
func cacheMissingAmdgpuName(deviceID, revisionID string) {
|
|
||||||
amdgpuNameCache.Lock()
|
|
||||||
defer amdgpuNameCache.Unlock()
|
|
||||||
amdgpuNameCache.misses[deviceID] = struct{}{}
|
|
||||||
if revisionID != "" {
|
|
||||||
amdgpuNameCache.misses[cacheKeyForAmdgpu(deviceID, revisionID)] = struct{}{}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// getAmdGpuName attempts to get a descriptive GPU name.
|
// getAmdGpuName attempts to get a descriptive GPU name.
|
||||||
// First tries product_name (rarely available), then looks up the PCI device ID.
|
// First tries product_name (rarely available), then looks up the PCI device ID.
|
||||||
// Falls back to showing the raw device ID if not found in the lookup table.
|
// Falls back to showing the raw device ID if not found in the lookup table.
|
||||||
@@ -278,24 +152,33 @@ func getAmdGpuName(devicePath string) string {
|
|||||||
|
|
||||||
// Read PCI device ID and look it up
|
// Read PCI device ID and look it up
|
||||||
if deviceID, err := os.ReadFile(filepath.Join(devicePath, "device")); err == nil {
|
if deviceID, err := os.ReadFile(filepath.Join(devicePath, "device")); err == nil {
|
||||||
id := normalizeHexID(string(deviceID))
|
id := strings.TrimPrefix(strings.ToLower(strings.TrimSpace(string(deviceID))), "0x")
|
||||||
revision := ""
|
if name, ok := getRadeonNames()[id]; ok {
|
||||||
if revBytes, revErr := os.ReadFile(filepath.Join(devicePath, "revision")); revErr == nil {
|
return fmt.Sprintf("Radeon %s", name)
|
||||||
revision = normalizeHexID(string(revBytes))
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if name, found, done := getCachedAmdgpuName(id, revision); found {
|
|
||||||
return name
|
|
||||||
} else if !done {
|
|
||||||
if name, exact, ok := lookupAmdgpuNameInFile(id, revision, "/usr/share/libdrm/amdgpu.ids"); ok {
|
|
||||||
cacheAmdgpuName(id, revision, name, exact)
|
|
||||||
return normalizeAmdgpuName(name)
|
|
||||||
}
|
|
||||||
cacheMissingAmdgpuName(id, revision)
|
|
||||||
}
|
|
||||||
|
|
||||||
return fmt.Sprintf("AMD GPU (%s)", id)
|
return fmt.Sprintf("AMD GPU (%s)", id)
|
||||||
}
|
}
|
||||||
|
|
||||||
return "AMD GPU"
|
return "AMD GPU"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// getRadeonNames returns the AMD GPU name lookup table
|
||||||
|
// Device IDs from https://pci-ids.ucw.cz/read/PC/1002
|
||||||
|
var getRadeonNames = sync.OnceValue(func() map[string]string {
|
||||||
|
return map[string]string{
|
||||||
|
"7550": "RX 9070",
|
||||||
|
"7590": "RX 9060 XT",
|
||||||
|
"7551": "AI PRO R9700",
|
||||||
|
|
||||||
|
"744c": "RX 7900",
|
||||||
|
|
||||||
|
"1681": "680M",
|
||||||
|
|
||||||
|
"7448": "PRO W7900",
|
||||||
|
"745e": "PRO W7800",
|
||||||
|
"7470": "PRO W7700",
|
||||||
|
"73e3": "PRO W6600",
|
||||||
|
"7422": "PRO W6400",
|
||||||
|
"7341": "PRO W5500",
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|||||||
@@ -1,264 +0,0 @@
|
|||||||
//go:build linux
|
|
||||||
|
|
||||||
package agent
|
|
||||||
|
|
||||||
import (
|
|
||||||
"os"
|
|
||||||
"path/filepath"
|
|
||||||
"testing"
|
|
||||||
|
|
||||||
"github.com/henrygd/beszel/internal/entities/system"
|
|
||||||
"github.com/stretchr/testify/assert"
|
|
||||||
"github.com/stretchr/testify/require"
|
|
||||||
)
|
|
||||||
|
|
||||||
func TestNormalizeHexID(t *testing.T) {
|
|
||||||
tests := []struct {
|
|
||||||
in string
|
|
||||||
want string
|
|
||||||
}{
|
|
||||||
{"0x1002", "1002"},
|
|
||||||
{"C2", "c2"},
|
|
||||||
{" 15BF ", "15bf"},
|
|
||||||
{"0x15bf", "15bf"},
|
|
||||||
{"", ""},
|
|
||||||
}
|
|
||||||
for _, tt := range tests {
|
|
||||||
subName := tt.in
|
|
||||||
if subName == "" {
|
|
||||||
subName = "empty_string"
|
|
||||||
}
|
|
||||||
t.Run(subName, func(t *testing.T) {
|
|
||||||
got := normalizeHexID(tt.in)
|
|
||||||
assert.Equal(t, tt.want, got)
|
|
||||||
})
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestCacheKeyForAmdgpu(t *testing.T) {
|
|
||||||
tests := []struct {
|
|
||||||
deviceID string
|
|
||||||
revisionID string
|
|
||||||
want string
|
|
||||||
}{
|
|
||||||
{"1114", "c2", "1114:c2"},
|
|
||||||
{"15bf", "", "15bf"},
|
|
||||||
{"1506", "c1", "1506:c1"},
|
|
||||||
}
|
|
||||||
for _, tt := range tests {
|
|
||||||
got := cacheKeyForAmdgpu(tt.deviceID, tt.revisionID)
|
|
||||||
assert.Equal(t, tt.want, got)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestReadSysfsFloat(t *testing.T) {
|
|
||||||
dir := t.TempDir()
|
|
||||||
|
|
||||||
validPath := filepath.Join(dir, "val")
|
|
||||||
require.NoError(t, os.WriteFile(validPath, []byte(" 42.5 \n"), 0o644))
|
|
||||||
got, err := readSysfsFloat(validPath)
|
|
||||||
require.NoError(t, err)
|
|
||||||
assert.Equal(t, 42.5, got)
|
|
||||||
|
|
||||||
// Integer and scientific
|
|
||||||
sciPath := filepath.Join(dir, "sci")
|
|
||||||
require.NoError(t, os.WriteFile(sciPath, []byte("1e2"), 0o644))
|
|
||||||
got, err = readSysfsFloat(sciPath)
|
|
||||||
require.NoError(t, err)
|
|
||||||
assert.Equal(t, 100.0, got)
|
|
||||||
|
|
||||||
// Missing file
|
|
||||||
_, err = readSysfsFloat(filepath.Join(dir, "missing"))
|
|
||||||
require.Error(t, err)
|
|
||||||
|
|
||||||
// Invalid content
|
|
||||||
badPath := filepath.Join(dir, "bad")
|
|
||||||
require.NoError(t, os.WriteFile(badPath, []byte("not a number"), 0o644))
|
|
||||||
_, err = readSysfsFloat(badPath)
|
|
||||||
require.Error(t, err)
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestIsAmdGpu(t *testing.T) {
|
|
||||||
dir := t.TempDir()
|
|
||||||
deviceDir := filepath.Join(dir, "device")
|
|
||||||
require.NoError(t, os.MkdirAll(deviceDir, 0o755))
|
|
||||||
|
|
||||||
// AMD vendor 0x1002 -> true
|
|
||||||
require.NoError(t, os.WriteFile(filepath.Join(deviceDir, "vendor"), []byte("0x1002\n"), 0o644))
|
|
||||||
assert.True(t, isAmdGpu(dir), "vendor 0x1002 should be AMD")
|
|
||||||
|
|
||||||
// Non-AMD vendor -> false
|
|
||||||
require.NoError(t, os.WriteFile(filepath.Join(deviceDir, "vendor"), []byte("0x10de\n"), 0o644))
|
|
||||||
assert.False(t, isAmdGpu(dir), "vendor 0x10de should not be AMD")
|
|
||||||
|
|
||||||
// Missing vendor file -> false
|
|
||||||
require.NoError(t, os.Remove(filepath.Join(deviceDir, "vendor")))
|
|
||||||
assert.False(t, isAmdGpu(dir), "missing vendor file should be false")
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestAmdgpuNameCacheRoundTrip(t *testing.T) {
|
|
||||||
// Cache a name and retrieve it (unique key to avoid affecting other tests)
|
|
||||||
deviceID, revisionID := "cachedev99", "00"
|
|
||||||
cacheAmdgpuName(deviceID, revisionID, "AMD Test GPU 99 Graphics", true)
|
|
||||||
|
|
||||||
name, found, done := getCachedAmdgpuName(deviceID, revisionID)
|
|
||||||
assert.True(t, found)
|
|
||||||
assert.True(t, done)
|
|
||||||
assert.Equal(t, "AMD Test GPU 99", name)
|
|
||||||
|
|
||||||
// Device-only key also stored
|
|
||||||
name2, found2, _ := getCachedAmdgpuName(deviceID, "")
|
|
||||||
assert.True(t, found2)
|
|
||||||
assert.Equal(t, "AMD Test GPU 99", name2)
|
|
||||||
|
|
||||||
// Cache a miss
|
|
||||||
cacheMissingAmdgpuName("missedev99", "ab")
|
|
||||||
_, found3, done3 := getCachedAmdgpuName("missedev99", "ab")
|
|
||||||
assert.False(t, found3)
|
|
||||||
assert.True(t, done3, "done should be true so caller skips file lookup")
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestUpdateAmdGpuDataWithFakeSysfs(t *testing.T) {
|
|
||||||
tests := []struct {
|
|
||||||
name string
|
|
||||||
writeGTT bool
|
|
||||||
wantMemoryUsed float64
|
|
||||||
wantMemoryTotal float64
|
|
||||||
}{
|
|
||||||
{
|
|
||||||
name: "sums vram and gtt when gtt is present",
|
|
||||||
writeGTT: true,
|
|
||||||
wantMemoryUsed: bytesToMegabytes(1073741824 + 536870912),
|
|
||||||
wantMemoryTotal: bytesToMegabytes(2147483648 + 4294967296),
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "falls back to vram when gtt is missing",
|
|
||||||
writeGTT: false,
|
|
||||||
wantMemoryUsed: bytesToMegabytes(1073741824),
|
|
||||||
wantMemoryTotal: bytesToMegabytes(2147483648),
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
for _, tt := range tests {
|
|
||||||
t.Run(tt.name, func(t *testing.T) {
|
|
||||||
dir := t.TempDir()
|
|
||||||
cardPath := filepath.Join(dir, "card0")
|
|
||||||
devicePath := filepath.Join(cardPath, "device")
|
|
||||||
hwmonPath := filepath.Join(devicePath, "hwmon", "hwmon0")
|
|
||||||
require.NoError(t, os.MkdirAll(hwmonPath, 0o755))
|
|
||||||
|
|
||||||
write := func(name, content string) {
|
|
||||||
require.NoError(t, os.WriteFile(filepath.Join(devicePath, name), []byte(content), 0o644))
|
|
||||||
}
|
|
||||||
write("vendor", "0x1002")
|
|
||||||
write("device", "0x1506")
|
|
||||||
write("revision", "0xc1")
|
|
||||||
write("gpu_busy_percent", "25")
|
|
||||||
write("mem_info_vram_used", "1073741824")
|
|
||||||
write("mem_info_vram_total", "2147483648")
|
|
||||||
if tt.writeGTT {
|
|
||||||
write("mem_info_gtt_used", "536870912")
|
|
||||||
write("mem_info_gtt_total", "4294967296")
|
|
||||||
}
|
|
||||||
require.NoError(t, os.WriteFile(filepath.Join(hwmonPath, "temp1_input"), []byte("45000"), 0o644))
|
|
||||||
require.NoError(t, os.WriteFile(filepath.Join(hwmonPath, "power1_input"), []byte("20000000"), 0o644))
|
|
||||||
|
|
||||||
// Pre-cache name so getAmdGpuName returns a known value (it uses system amdgpu.ids path)
|
|
||||||
cacheAmdgpuName("1506", "c1", "AMD Radeon 610M Graphics", true)
|
|
||||||
|
|
||||||
gm := &GPUManager{GpuDataMap: make(map[string]*system.GPUData)}
|
|
||||||
ok := gm.updateAmdGpuData(cardPath)
|
|
||||||
require.True(t, ok)
|
|
||||||
|
|
||||||
gpu, ok := gm.GpuDataMap["card0"]
|
|
||||||
require.True(t, ok)
|
|
||||||
assert.Equal(t, "AMD Radeon 610M", gpu.Name)
|
|
||||||
assert.Equal(t, 25.0, gpu.Usage)
|
|
||||||
assert.Equal(t, tt.wantMemoryUsed, gpu.MemoryUsed)
|
|
||||||
assert.Equal(t, tt.wantMemoryTotal, gpu.MemoryTotal)
|
|
||||||
assert.Equal(t, 45.0, gpu.Temperature)
|
|
||||||
assert.Equal(t, 20.0, gpu.Power)
|
|
||||||
assert.Equal(t, 1.0, gpu.Count)
|
|
||||||
})
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestLookupAmdgpuNameInFile(t *testing.T) {
|
|
||||||
idsPath := filepath.Join("test-data", "amdgpu.ids")
|
|
||||||
|
|
||||||
tests := []struct {
|
|
||||||
name string
|
|
||||||
deviceID string
|
|
||||||
revisionID string
|
|
||||||
wantName string
|
|
||||||
wantExact bool
|
|
||||||
wantFound bool
|
|
||||||
}{
|
|
||||||
{
|
|
||||||
name: "exact device and revision match",
|
|
||||||
deviceID: "1114",
|
|
||||||
revisionID: "c2",
|
|
||||||
wantName: "AMD Radeon 860M Graphics",
|
|
||||||
wantExact: true,
|
|
||||||
wantFound: true,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "exact match 15BF revision 01 returns 760M",
|
|
||||||
deviceID: "15bf",
|
|
||||||
revisionID: "01",
|
|
||||||
wantName: "AMD Radeon 760M Graphics",
|
|
||||||
wantExact: true,
|
|
||||||
wantFound: true,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "exact match 15BF revision 00 returns 780M",
|
|
||||||
deviceID: "15bf",
|
|
||||||
revisionID: "00",
|
|
||||||
wantName: "AMD Radeon 780M Graphics",
|
|
||||||
wantExact: true,
|
|
||||||
wantFound: true,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "device-only match returns first entry for device",
|
|
||||||
deviceID: "1506",
|
|
||||||
revisionID: "",
|
|
||||||
wantName: "AMD Radeon 610M",
|
|
||||||
wantExact: false,
|
|
||||||
wantFound: true,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "unknown device not found",
|
|
||||||
deviceID: "dead",
|
|
||||||
revisionID: "00",
|
|
||||||
wantName: "",
|
|
||||||
wantExact: false,
|
|
||||||
wantFound: false,
|
|
||||||
},
|
|
||||||
}
|
|
||||||
for _, tt := range tests {
|
|
||||||
t.Run(tt.name, func(t *testing.T) {
|
|
||||||
gotName, gotExact, gotFound := lookupAmdgpuNameInFile(tt.deviceID, tt.revisionID, idsPath)
|
|
||||||
assert.Equal(t, tt.wantName, gotName, "name")
|
|
||||||
assert.Equal(t, tt.wantExact, gotExact, "exact")
|
|
||||||
assert.Equal(t, tt.wantFound, gotFound, "found")
|
|
||||||
})
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestGetAmdGpuNameFromIdsFile(t *testing.T) {
|
|
||||||
// Test that getAmdGpuName resolves a name when we can't inject the ids path.
|
|
||||||
// We only verify behavior when product_name is missing and device/revision
|
|
||||||
// would be read from sysfs; the actual lookup uses /usr/share/libdrm/amdgpu.ids.
|
|
||||||
// So this test focuses on normalizeAmdgpuName and that lookupAmdgpuNameInFile
|
|
||||||
// returns the expected name for our test-data file.
|
|
||||||
idsPath := filepath.Join("test-data", "amdgpu.ids")
|
|
||||||
name, exact, found := lookupAmdgpuNameInFile("1435", "ae", idsPath)
|
|
||||||
require.True(t, found)
|
|
||||||
require.True(t, exact)
|
|
||||||
assert.Equal(t, "AMD Custom GPU 0932", name)
|
|
||||||
assert.Equal(t, "AMD Custom GPU 0932", normalizeAmdgpuName(name))
|
|
||||||
|
|
||||||
// " Graphics" suffix is trimmed by normalizeAmdgpuName
|
|
||||||
name2 := "AMD Radeon 860M Graphics"
|
|
||||||
assert.Equal(t, "AMD Radeon 860M", normalizeAmdgpuName(name2))
|
|
||||||
}
|
|
||||||
@@ -1,9 +0,0 @@
|
|||||||
//go:build !darwin
|
|
||||||
|
|
||||||
package agent
|
|
||||||
|
|
||||||
// startPowermetricsCollector is a no-op on non-darwin platforms; the real implementation is in gpu_darwin.go.
|
|
||||||
func (gm *GPUManager) startPowermetricsCollector() {}
|
|
||||||
|
|
||||||
// startMacmonCollector is a no-op on non-darwin platforms; the real implementation is in gpu_darwin.go.
|
|
||||||
func (gm *GPUManager) startMacmonCollector() {}
|
|
||||||
@@ -1,252 +0,0 @@
|
|||||||
//go:build darwin
|
|
||||||
|
|
||||||
package agent
|
|
||||||
|
|
||||||
import (
|
|
||||||
"bufio"
|
|
||||||
"bytes"
|
|
||||||
"encoding/json"
|
|
||||||
"io"
|
|
||||||
"log/slog"
|
|
||||||
"os/exec"
|
|
||||||
"strconv"
|
|
||||||
"strings"
|
|
||||||
"time"
|
|
||||||
|
|
||||||
"github.com/henrygd/beszel/internal/entities/system"
|
|
||||||
)
|
|
||||||
|
|
||||||
const (
|
|
||||||
// powermetricsSampleIntervalMs is the sampling interval passed to powermetrics (-i).
|
|
||||||
powermetricsSampleIntervalMs = 500
|
|
||||||
// powermetricsPollInterval is how often we run powermetrics to collect a new sample.
|
|
||||||
powermetricsPollInterval = 2 * time.Second
|
|
||||||
// macmonIntervalMs is the sampling interval passed to macmon pipe (-i), in milliseconds.
|
|
||||||
macmonIntervalMs = 2500
|
|
||||||
)
|
|
||||||
|
|
||||||
const appleGPUID = "0"
|
|
||||||
|
|
||||||
// startPowermetricsCollector runs powermetrics --samplers gpu_power in a loop and updates
|
|
||||||
// GPU usage and power. Requires root (sudo) on macOS. A single logical GPU is reported as id "0".
|
|
||||||
func (gm *GPUManager) startPowermetricsCollector() {
|
|
||||||
// Ensure single GPU entry for Apple GPU
|
|
||||||
if _, ok := gm.GpuDataMap[appleGPUID]; !ok {
|
|
||||||
gm.GpuDataMap[appleGPUID] = &system.GPUData{Name: "Apple GPU"}
|
|
||||||
}
|
|
||||||
|
|
||||||
go func() {
|
|
||||||
failures := 0
|
|
||||||
for {
|
|
||||||
if err := gm.collectPowermetrics(); err != nil {
|
|
||||||
failures++
|
|
||||||
if failures > maxFailureRetries {
|
|
||||||
slog.Warn("powermetrics GPU collector failed repeatedly, stopping", "err", err)
|
|
||||||
break
|
|
||||||
}
|
|
||||||
slog.Warn("Error collecting macOS GPU data via powermetrics (may require sudo)", "err", err)
|
|
||||||
time.Sleep(retryWaitTime)
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
failures = 0
|
|
||||||
time.Sleep(powermetricsPollInterval)
|
|
||||||
}
|
|
||||||
}()
|
|
||||||
}
|
|
||||||
|
|
||||||
// collectPowermetrics runs powermetrics once and parses GPU usage and power from its output.
|
|
||||||
func (gm *GPUManager) collectPowermetrics() error {
|
|
||||||
interval := strconv.Itoa(powermetricsSampleIntervalMs)
|
|
||||||
cmd := exec.Command(powermetricsCmd, "--samplers", "gpu_power", "-i", interval, "-n", "1")
|
|
||||||
cmd.Stderr = nil
|
|
||||||
out, err := cmd.Output()
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
if !gm.parsePowermetricsData(out) {
|
|
||||||
return errNoValidData
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// parsePowermetricsData parses powermetrics gpu_power output and updates GpuDataMap["0"].
|
|
||||||
// Example output:
|
|
||||||
//
|
|
||||||
// **** GPU usage ****
|
|
||||||
// GPU HW active frequency: 444 MHz
|
|
||||||
// GPU HW active residency: 0.97% (444 MHz: .97% ...
|
|
||||||
// GPU idle residency: 99.03%
|
|
||||||
// GPU Power: 4 mW
|
|
||||||
func (gm *GPUManager) parsePowermetricsData(output []byte) bool {
|
|
||||||
var idleResidency, powerMW float64
|
|
||||||
var gotIdle, gotPower bool
|
|
||||||
|
|
||||||
scanner := bufio.NewScanner(bytes.NewReader(output))
|
|
||||||
for scanner.Scan() {
|
|
||||||
line := strings.TrimSpace(scanner.Text())
|
|
||||||
if strings.HasPrefix(line, "GPU idle residency:") {
|
|
||||||
// "GPU idle residency: 99.03%"
|
|
||||||
fields := strings.Fields(strings.TrimPrefix(line, "GPU idle residency:"))
|
|
||||||
if len(fields) >= 1 {
|
|
||||||
pct := strings.TrimSuffix(fields[0], "%")
|
|
||||||
if v, err := strconv.ParseFloat(pct, 64); err == nil {
|
|
||||||
idleResidency = v
|
|
||||||
gotIdle = true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else if strings.HasPrefix(line, "GPU Power:") {
|
|
||||||
// "GPU Power: 4 mW"
|
|
||||||
fields := strings.Fields(strings.TrimPrefix(line, "GPU Power:"))
|
|
||||||
if len(fields) >= 1 {
|
|
||||||
if v, err := strconv.ParseFloat(fields[0], 64); err == nil {
|
|
||||||
powerMW = v
|
|
||||||
gotPower = true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if err := scanner.Err(); err != nil {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
if !gotIdle && !gotPower {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
|
|
||||||
gm.Lock()
|
|
||||||
defer gm.Unlock()
|
|
||||||
|
|
||||||
if _, ok := gm.GpuDataMap[appleGPUID]; !ok {
|
|
||||||
gm.GpuDataMap[appleGPUID] = &system.GPUData{Name: "Apple GPU"}
|
|
||||||
}
|
|
||||||
gpu := gm.GpuDataMap[appleGPUID]
|
|
||||||
|
|
||||||
if gotIdle {
|
|
||||||
// Usage = 100 - idle residency (e.g. 100 - 99.03 = 0.97%)
|
|
||||||
gpu.Usage += 100 - idleResidency
|
|
||||||
}
|
|
||||||
if gotPower {
|
|
||||||
// mW -> W
|
|
||||||
gpu.Power += powerMW / milliwattsInAWatt
|
|
||||||
}
|
|
||||||
gpu.Count++
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
|
|
||||||
// startMacmonCollector runs `macmon pipe` in a loop and parses one JSON object per line.
|
|
||||||
// This collector does not require sudo. A single logical GPU is reported as id "0".
|
|
||||||
func (gm *GPUManager) startMacmonCollector() {
|
|
||||||
if _, ok := gm.GpuDataMap[appleGPUID]; !ok {
|
|
||||||
gm.GpuDataMap[appleGPUID] = &system.GPUData{Name: "Apple GPU"}
|
|
||||||
}
|
|
||||||
|
|
||||||
go func() {
|
|
||||||
failures := 0
|
|
||||||
for {
|
|
||||||
if err := gm.collectMacmonPipe(); err != nil {
|
|
||||||
failures++
|
|
||||||
if failures > maxFailureRetries {
|
|
||||||
slog.Warn("macmon GPU collector failed repeatedly, stopping", "err", err)
|
|
||||||
break
|
|
||||||
}
|
|
||||||
slog.Warn("Error collecting macOS GPU data via macmon", "err", err)
|
|
||||||
time.Sleep(retryWaitTime)
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
failures = 0
|
|
||||||
// `macmon pipe` is long-running; if it returns, wait a bit before restarting.
|
|
||||||
time.Sleep(retryWaitTime)
|
|
||||||
}
|
|
||||||
}()
|
|
||||||
}
|
|
||||||
|
|
||||||
type macmonTemp struct {
|
|
||||||
GPUTempAvg float64 `json:"gpu_temp_avg"`
|
|
||||||
}
|
|
||||||
|
|
||||||
type macmonSample struct {
|
|
||||||
GPUPower float64 `json:"gpu_power"` // watts (macmon reports fractional values)
|
|
||||||
GPURAMPower float64 `json:"gpu_ram_power"` // watts
|
|
||||||
GPUUsage []float64 `json:"gpu_usage"` // [freq_mhz, usage] where usage is typically 0..1
|
|
||||||
Temp macmonTemp `json:"temp"`
|
|
||||||
}
|
|
||||||
|
|
||||||
func (gm *GPUManager) collectMacmonPipe() (err error) {
|
|
||||||
cmd := exec.Command(macmonCmd, "pipe", "-i", strconv.Itoa(macmonIntervalMs))
|
|
||||||
// Avoid blocking if macmon writes to stderr.
|
|
||||||
cmd.Stderr = io.Discard
|
|
||||||
stdout, err := cmd.StdoutPipe()
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
if err := cmd.Start(); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
// Ensure we always reap the child to avoid zombies on any return path and
|
|
||||||
// propagate a non-zero exit code if no other error was set.
|
|
||||||
defer func() {
|
|
||||||
_ = stdout.Close()
|
|
||||||
if cmd.ProcessState == nil || !cmd.ProcessState.Exited() {
|
|
||||||
_ = cmd.Process.Kill()
|
|
||||||
}
|
|
||||||
if waitErr := cmd.Wait(); err == nil && waitErr != nil {
|
|
||||||
err = waitErr
|
|
||||||
}
|
|
||||||
}()
|
|
||||||
|
|
||||||
scanner := bufio.NewScanner(stdout)
|
|
||||||
var hadSample bool
|
|
||||||
for scanner.Scan() {
|
|
||||||
line := bytes.TrimSpace(scanner.Bytes())
|
|
||||||
if len(line) == 0 {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
if gm.parseMacmonLine(line) {
|
|
||||||
hadSample = true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if scanErr := scanner.Err(); scanErr != nil {
|
|
||||||
return scanErr
|
|
||||||
}
|
|
||||||
if !hadSample {
|
|
||||||
return errNoValidData
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// parseMacmonLine parses a single macmon JSON line and updates Apple GPU metrics.
|
|
||||||
func (gm *GPUManager) parseMacmonLine(line []byte) bool {
|
|
||||||
var sample macmonSample
|
|
||||||
if err := json.Unmarshal(line, &sample); err != nil {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
|
|
||||||
usage := 0.0
|
|
||||||
if len(sample.GPUUsage) >= 2 {
|
|
||||||
usage = sample.GPUUsage[1]
|
|
||||||
// Heuristic: macmon typically reports 0..1; convert to percentage.
|
|
||||||
if usage <= 1.0 {
|
|
||||||
usage *= 100
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Consider the line valid if it contains at least one GPU metric.
|
|
||||||
if usage == 0 && sample.GPUPower == 0 && sample.Temp.GPUTempAvg == 0 {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
|
|
||||||
gm.Lock()
|
|
||||||
defer gm.Unlock()
|
|
||||||
|
|
||||||
gpu, ok := gm.GpuDataMap[appleGPUID]
|
|
||||||
if !ok {
|
|
||||||
gpu = &system.GPUData{Name: "Apple GPU"}
|
|
||||||
gm.GpuDataMap[appleGPUID] = gpu
|
|
||||||
}
|
|
||||||
gpu.Temperature = sample.Temp.GPUTempAvg
|
|
||||||
gpu.Usage += usage
|
|
||||||
// macmon reports power in watts; include VRAM power if present.
|
|
||||||
gpu.Power += sample.GPUPower + sample.GPURAMPower
|
|
||||||
gpu.Count++
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
@@ -1,81 +0,0 @@
|
|||||||
//go:build darwin
|
|
||||||
|
|
||||||
package agent
|
|
||||||
|
|
||||||
import (
|
|
||||||
"testing"
|
|
||||||
|
|
||||||
"github.com/henrygd/beszel/internal/entities/system"
|
|
||||||
"github.com/stretchr/testify/assert"
|
|
||||||
"github.com/stretchr/testify/require"
|
|
||||||
)
|
|
||||||
|
|
||||||
func TestParsePowermetricsData(t *testing.T) {
|
|
||||||
input := `
|
|
||||||
Machine model: Mac14,10
|
|
||||||
OS version: 25D125
|
|
||||||
|
|
||||||
*** Sampled system activity (Sat Feb 14 00:42:06 2026 -0500) (503.05ms elapsed) ***
|
|
||||||
|
|
||||||
**** GPU usage ****
|
|
||||||
|
|
||||||
GPU HW active frequency: 444 MHz
|
|
||||||
GPU HW active residency: 0.97% (444 MHz: .97% 612 MHz: 0% 808 MHz: 0% 968 MHz: 0% 1110 MHz: 0% 1236 MHz: 0% 1338 MHz: 0% 1398 MHz: 0%)
|
|
||||||
GPU SW requested state: (P1 : 100% P2 : 0% P3 : 0% P4 : 0% P5 : 0% P6 : 0% P7 : 0% P8 : 0%)
|
|
||||||
GPU idle residency: 99.03%
|
|
||||||
GPU Power: 4 mW
|
|
||||||
`
|
|
||||||
gm := &GPUManager{
|
|
||||||
GpuDataMap: make(map[string]*system.GPUData),
|
|
||||||
}
|
|
||||||
valid := gm.parsePowermetricsData([]byte(input))
|
|
||||||
require.True(t, valid)
|
|
||||||
|
|
||||||
g0, ok := gm.GpuDataMap["0"]
|
|
||||||
require.True(t, ok)
|
|
||||||
assert.Equal(t, "Apple GPU", g0.Name)
|
|
||||||
// Usage = 100 - 99.03 = 0.97
|
|
||||||
assert.InDelta(t, 0.97, g0.Usage, 0.01)
|
|
||||||
// 4 mW -> 0.004 W
|
|
||||||
assert.InDelta(t, 0.004, g0.Power, 0.0001)
|
|
||||||
assert.Equal(t, 1.0, g0.Count)
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestParsePowermetricsDataPartial(t *testing.T) {
|
|
||||||
// Only power line (e.g. older macOS or different sampler output)
|
|
||||||
input := `
|
|
||||||
**** GPU usage ****
|
|
||||||
GPU Power: 120 mW
|
|
||||||
`
|
|
||||||
gm := &GPUManager{
|
|
||||||
GpuDataMap: make(map[string]*system.GPUData),
|
|
||||||
}
|
|
||||||
valid := gm.parsePowermetricsData([]byte(input))
|
|
||||||
require.True(t, valid)
|
|
||||||
|
|
||||||
g0, ok := gm.GpuDataMap["0"]
|
|
||||||
require.True(t, ok)
|
|
||||||
assert.Equal(t, "Apple GPU", g0.Name)
|
|
||||||
assert.InDelta(t, 0.12, g0.Power, 0.001)
|
|
||||||
assert.Equal(t, 1.0, g0.Count)
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestParseMacmonLine(t *testing.T) {
|
|
||||||
input := `{"all_power":0.6468324661254883,"ane_power":0.0,"cpu_power":0.6359732151031494,"ecpu_usage":[2061,0.1726151406764984],"gpu_power":0.010859241709113121,"gpu_ram_power":0.000965250947047025,"gpu_usage":[503,0.013633215799927711],"memory":{"ram_total":17179869184,"ram_usage":12322914304,"swap_total":0,"swap_usage":0},"pcpu_usage":[1248,0.11792058497667313],"ram_power":0.14885640144348145,"sys_power":10.4955415725708,"temp":{"cpu_temp_avg":23.041261672973633,"gpu_temp_avg":29.44516944885254},"timestamp":"2026-02-17T19:34:27.942556+00:00"}`
|
|
||||||
|
|
||||||
gm := &GPUManager{
|
|
||||||
GpuDataMap: make(map[string]*system.GPUData),
|
|
||||||
}
|
|
||||||
valid := gm.parseMacmonLine([]byte(input))
|
|
||||||
require.True(t, valid)
|
|
||||||
|
|
||||||
g0, ok := gm.GpuDataMap["0"]
|
|
||||||
require.True(t, ok)
|
|
||||||
assert.Equal(t, "Apple GPU", g0.Name)
|
|
||||||
// macmon reports usage fraction 0..1; expect percent conversion.
|
|
||||||
assert.InDelta(t, 1.3633, g0.Usage, 0.05)
|
|
||||||
// power includes gpu_power + gpu_ram_power
|
|
||||||
assert.InDelta(t, 0.011824, g0.Power, 0.0005)
|
|
||||||
assert.InDelta(t, 29.445, g0.Temperature, 0.01)
|
|
||||||
assert.Equal(t, 1.0, g0.Count)
|
|
||||||
}
|
|
||||||
@@ -13,3 +13,21 @@ func (c *nvmlCollector) init() error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (c *nvmlCollector) start() {}
|
func (c *nvmlCollector) start() {}
|
||||||
|
|
||||||
|
func (c *nvmlCollector) collect() {}
|
||||||
|
|
||||||
|
func openLibrary(name string) (uintptr, error) {
|
||||||
|
return 0, fmt.Errorf("nvml not supported on this platform")
|
||||||
|
}
|
||||||
|
|
||||||
|
func getNVMLPath() string {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
|
func hasSymbol(lib uintptr, symbol string) bool {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *nvmlCollector) isGPUActive(bdf string) bool {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|||||||
@@ -1,159 +0,0 @@
|
|||||||
package agent
|
|
||||||
|
|
||||||
import (
|
|
||||||
"encoding/json"
|
|
||||||
"io"
|
|
||||||
"log/slog"
|
|
||||||
"os/exec"
|
|
||||||
"strconv"
|
|
||||||
"strings"
|
|
||||||
"time"
|
|
||||||
|
|
||||||
"github.com/henrygd/beszel/internal/entities/system"
|
|
||||||
)
|
|
||||||
|
|
||||||
type nvtopSnapshot struct {
|
|
||||||
DeviceName string `json:"device_name"`
|
|
||||||
Temp *string `json:"temp"`
|
|
||||||
PowerDraw *string `json:"power_draw"`
|
|
||||||
GpuUtil *string `json:"gpu_util"`
|
|
||||||
MemTotal *string `json:"mem_total"`
|
|
||||||
MemUsed *string `json:"mem_used"`
|
|
||||||
}
|
|
||||||
|
|
||||||
// parseNvtopNumber parses nvtop numeric strings with units (C/W/%).
|
|
||||||
func parseNvtopNumber(raw string) float64 {
|
|
||||||
cleaned := strings.TrimSpace(raw)
|
|
||||||
cleaned = strings.TrimSuffix(cleaned, "C")
|
|
||||||
cleaned = strings.TrimSuffix(cleaned, "W")
|
|
||||||
cleaned = strings.TrimSuffix(cleaned, "%")
|
|
||||||
val, _ := strconv.ParseFloat(cleaned, 64)
|
|
||||||
return val
|
|
||||||
}
|
|
||||||
|
|
||||||
// parseNvtopData parses a single nvtop JSON snapshot payload.
|
|
||||||
func (gm *GPUManager) parseNvtopData(output []byte) bool {
|
|
||||||
var snapshots []nvtopSnapshot
|
|
||||||
if err := json.Unmarshal(output, &snapshots); err != nil || len(snapshots) == 0 {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
return gm.updateNvtopSnapshots(snapshots)
|
|
||||||
}
|
|
||||||
|
|
||||||
// updateNvtopSnapshots applies one decoded nvtop snapshot batch to GPU accumulators.
|
|
||||||
func (gm *GPUManager) updateNvtopSnapshots(snapshots []nvtopSnapshot) bool {
|
|
||||||
gm.Lock()
|
|
||||||
defer gm.Unlock()
|
|
||||||
|
|
||||||
valid := false
|
|
||||||
usedIDs := make(map[string]struct{}, len(snapshots))
|
|
||||||
for i, sample := range snapshots {
|
|
||||||
if sample.DeviceName == "" {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
indexID := "n" + strconv.Itoa(i)
|
|
||||||
id := indexID
|
|
||||||
|
|
||||||
// nvtop ordering can change, so prefer reusing an existing slot with matching device name.
|
|
||||||
if existingByIndex, ok := gm.GpuDataMap[indexID]; ok && existingByIndex.Name != "" && existingByIndex.Name != sample.DeviceName {
|
|
||||||
for existingID, gpu := range gm.GpuDataMap {
|
|
||||||
if !strings.HasPrefix(existingID, "n") {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
if _, taken := usedIDs[existingID]; taken {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
if gpu.Name == sample.DeviceName {
|
|
||||||
id = existingID
|
|
||||||
break
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if _, ok := gm.GpuDataMap[id]; !ok {
|
|
||||||
gm.GpuDataMap[id] = &system.GPUData{Name: sample.DeviceName}
|
|
||||||
}
|
|
||||||
gpu := gm.GpuDataMap[id]
|
|
||||||
gpu.Name = sample.DeviceName
|
|
||||||
|
|
||||||
if sample.Temp != nil {
|
|
||||||
gpu.Temperature = parseNvtopNumber(*sample.Temp)
|
|
||||||
}
|
|
||||||
if sample.MemUsed != nil {
|
|
||||||
gpu.MemoryUsed = bytesToMegabytes(parseNvtopNumber(*sample.MemUsed))
|
|
||||||
}
|
|
||||||
if sample.MemTotal != nil {
|
|
||||||
gpu.MemoryTotal = bytesToMegabytes(parseNvtopNumber(*sample.MemTotal))
|
|
||||||
}
|
|
||||||
if sample.GpuUtil != nil {
|
|
||||||
gpu.Usage += parseNvtopNumber(*sample.GpuUtil)
|
|
||||||
}
|
|
||||||
if sample.PowerDraw != nil {
|
|
||||||
gpu.Power += parseNvtopNumber(*sample.PowerDraw)
|
|
||||||
}
|
|
||||||
gpu.Count++
|
|
||||||
usedIDs[id] = struct{}{}
|
|
||||||
valid = true
|
|
||||||
}
|
|
||||||
return valid
|
|
||||||
}
|
|
||||||
|
|
||||||
// collectNvtopStats runs nvtop loop mode and continuously decodes JSON snapshots.
|
|
||||||
func (gm *GPUManager) collectNvtopStats(interval string) error {
|
|
||||||
cmd := exec.Command(nvtopCmd, "-lP", "-d", interval)
|
|
||||||
stdout, err := cmd.StdoutPipe()
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
if err := cmd.Start(); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
defer func() {
|
|
||||||
_ = stdout.Close()
|
|
||||||
if cmd.ProcessState == nil || !cmd.ProcessState.Exited() {
|
|
||||||
_ = cmd.Process.Kill()
|
|
||||||
}
|
|
||||||
_ = cmd.Wait()
|
|
||||||
}()
|
|
||||||
|
|
||||||
decoder := json.NewDecoder(stdout)
|
|
||||||
foundValid := false
|
|
||||||
for {
|
|
||||||
var snapshots []nvtopSnapshot
|
|
||||||
if err := decoder.Decode(&snapshots); err != nil {
|
|
||||||
if err == io.EOF {
|
|
||||||
if foundValid {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
return errNoValidData
|
|
||||||
}
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
if gm.updateNvtopSnapshots(snapshots) {
|
|
||||||
foundValid = true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// startNvtopCollector starts nvtop collection with retry or fallback callback handling.
|
|
||||||
func (gm *GPUManager) startNvtopCollector(interval string, onFailure func()) {
|
|
||||||
go func() {
|
|
||||||
failures := 0
|
|
||||||
for {
|
|
||||||
if err := gm.collectNvtopStats(interval); err != nil {
|
|
||||||
if onFailure != nil {
|
|
||||||
slog.Warn("Error collecting GPU data via nvtop", "err", err)
|
|
||||||
onFailure()
|
|
||||||
return
|
|
||||||
}
|
|
||||||
failures++
|
|
||||||
if failures > maxFailureRetries {
|
|
||||||
break
|
|
||||||
}
|
|
||||||
slog.Warn("Error collecting GPU data via nvtop", "err", err)
|
|
||||||
time.Sleep(retryWaitTime)
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}()
|
|
||||||
}
|
|
||||||
@@ -250,100 +250,6 @@ func TestParseAmdData(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestParseNvtopData(t *testing.T) {
|
|
||||||
input, err := os.ReadFile("test-data/nvtop.json")
|
|
||||||
require.NoError(t, err)
|
|
||||||
|
|
||||||
gm := &GPUManager{
|
|
||||||
GpuDataMap: make(map[string]*system.GPUData),
|
|
||||||
}
|
|
||||||
valid := gm.parseNvtopData(input)
|
|
||||||
require.True(t, valid)
|
|
||||||
|
|
||||||
g0, ok := gm.GpuDataMap["n0"]
|
|
||||||
require.True(t, ok)
|
|
||||||
assert.Equal(t, "NVIDIA GeForce RTX 3050 Ti Laptop GPU", g0.Name)
|
|
||||||
assert.Equal(t, 48.0, g0.Temperature)
|
|
||||||
assert.Equal(t, 5.0, g0.Usage)
|
|
||||||
assert.Equal(t, 13.0, g0.Power)
|
|
||||||
assert.Equal(t, bytesToMegabytes(349372416), g0.MemoryUsed)
|
|
||||||
assert.Equal(t, bytesToMegabytes(4294967296), g0.MemoryTotal)
|
|
||||||
assert.Equal(t, 1.0, g0.Count)
|
|
||||||
|
|
||||||
g1, ok := gm.GpuDataMap["n1"]
|
|
||||||
require.True(t, ok)
|
|
||||||
assert.Equal(t, "AMD Radeon 680M", g1.Name)
|
|
||||||
assert.Equal(t, 48.0, g1.Temperature)
|
|
||||||
assert.Equal(t, 12.0, g1.Usage)
|
|
||||||
assert.Equal(t, 9.0, g1.Power)
|
|
||||||
assert.Equal(t, bytesToMegabytes(1213784064), g1.MemoryUsed)
|
|
||||||
assert.Equal(t, bytesToMegabytes(16929173504), g1.MemoryTotal)
|
|
||||||
assert.Equal(t, 1.0, g1.Count)
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestUpdateNvtopSnapshotsKeepsDeviceAssociationWhenOrderChanges(t *testing.T) {
|
|
||||||
strPtr := func(s string) *string { return &s }
|
|
||||||
|
|
||||||
gm := &GPUManager{
|
|
||||||
GpuDataMap: make(map[string]*system.GPUData),
|
|
||||||
}
|
|
||||||
|
|
||||||
firstBatch := []nvtopSnapshot{
|
|
||||||
{
|
|
||||||
DeviceName: "NVIDIA GeForce RTX 3050 Ti Laptop GPU",
|
|
||||||
GpuUtil: strPtr("20%"),
|
|
||||||
PowerDraw: strPtr("10W"),
|
|
||||||
},
|
|
||||||
{
|
|
||||||
DeviceName: "AMD Radeon 680M",
|
|
||||||
GpuUtil: strPtr("30%"),
|
|
||||||
PowerDraw: strPtr("20W"),
|
|
||||||
},
|
|
||||||
}
|
|
||||||
secondBatchSwapped := []nvtopSnapshot{
|
|
||||||
{
|
|
||||||
DeviceName: "AMD Radeon 680M",
|
|
||||||
GpuUtil: strPtr("40%"),
|
|
||||||
PowerDraw: strPtr("25W"),
|
|
||||||
},
|
|
||||||
{
|
|
||||||
DeviceName: "NVIDIA GeForce RTX 3050 Ti Laptop GPU",
|
|
||||||
GpuUtil: strPtr("50%"),
|
|
||||||
PowerDraw: strPtr("15W"),
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
require.True(t, gm.updateNvtopSnapshots(firstBatch))
|
|
||||||
require.True(t, gm.updateNvtopSnapshots(secondBatchSwapped))
|
|
||||||
|
|
||||||
nvidia := gm.GpuDataMap["n0"]
|
|
||||||
require.NotNil(t, nvidia)
|
|
||||||
assert.Equal(t, "NVIDIA GeForce RTX 3050 Ti Laptop GPU", nvidia.Name)
|
|
||||||
assert.Equal(t, 70.0, nvidia.Usage)
|
|
||||||
assert.Equal(t, 25.0, nvidia.Power)
|
|
||||||
assert.Equal(t, 2.0, nvidia.Count)
|
|
||||||
|
|
||||||
amd := gm.GpuDataMap["n1"]
|
|
||||||
require.NotNil(t, amd)
|
|
||||||
assert.Equal(t, "AMD Radeon 680M", amd.Name)
|
|
||||||
assert.Equal(t, 70.0, amd.Usage)
|
|
||||||
assert.Equal(t, 45.0, amd.Power)
|
|
||||||
assert.Equal(t, 2.0, amd.Count)
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestParseCollectorPriority(t *testing.T) {
|
|
||||||
got := parseCollectorPriority(" nvml, nvidia-smi, intel_gpu_top, amd_sysfs, nvtop, rocm-smi, bad ")
|
|
||||||
want := []collectorSource{
|
|
||||||
collectorSourceNVML,
|
|
||||||
collectorSourceNvidiaSMI,
|
|
||||||
collectorSourceIntelGpuTop,
|
|
||||||
collectorSourceAmdSysfs,
|
|
||||||
collectorSourceNVTop,
|
|
||||||
collectorSourceRocmSMI,
|
|
||||||
}
|
|
||||||
assert.Equal(t, want, got)
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestParseJetsonData(t *testing.T) {
|
func TestParseJetsonData(t *testing.T) {
|
||||||
tests := []struct {
|
tests := []struct {
|
||||||
name string
|
name string
|
||||||
@@ -1081,35 +987,36 @@ func TestCalculateGPUAverage(t *testing.T) {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestGPUCapabilitiesAndLegacyPriority(t *testing.T) {
|
func TestDetectGPUs(t *testing.T) {
|
||||||
// Save original PATH
|
// Save original PATH
|
||||||
origPath := os.Getenv("PATH")
|
origPath := os.Getenv("PATH")
|
||||||
defer os.Setenv("PATH", origPath)
|
defer os.Setenv("PATH", origPath)
|
||||||
hasAmdSysfs := (&GPUManager{}).hasAmdSysfs()
|
|
||||||
|
// Set up temp dir with the commands
|
||||||
|
tempDir := t.TempDir()
|
||||||
|
os.Setenv("PATH", tempDir)
|
||||||
|
|
||||||
tests := []struct {
|
tests := []struct {
|
||||||
name string
|
name string
|
||||||
setupCommands func(string) error
|
setupCommands func() error
|
||||||
wantNvidiaSmi bool
|
wantNvidiaSmi bool
|
||||||
wantRocmSmi bool
|
wantRocmSmi bool
|
||||||
wantTegrastats bool
|
wantTegrastats bool
|
||||||
wantNvtop bool
|
|
||||||
wantErr bool
|
wantErr bool
|
||||||
}{
|
}{
|
||||||
{
|
{
|
||||||
name: "nvidia-smi not available",
|
name: "nvidia-smi not available",
|
||||||
setupCommands: func(_ string) error {
|
setupCommands: func() error {
|
||||||
return nil
|
return nil
|
||||||
},
|
},
|
||||||
wantNvidiaSmi: false,
|
wantNvidiaSmi: false,
|
||||||
wantRocmSmi: false,
|
wantRocmSmi: false,
|
||||||
wantTegrastats: false,
|
wantTegrastats: false,
|
||||||
wantNvtop: false,
|
|
||||||
wantErr: true,
|
wantErr: true,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
name: "nvidia-smi available",
|
name: "nvidia-smi available",
|
||||||
setupCommands: func(tempDir string) error {
|
setupCommands: func() error {
|
||||||
path := filepath.Join(tempDir, "nvidia-smi")
|
path := filepath.Join(tempDir, "nvidia-smi")
|
||||||
script := `#!/bin/sh
|
script := `#!/bin/sh
|
||||||
echo "test"`
|
echo "test"`
|
||||||
@@ -1121,12 +1028,11 @@ echo "test"`
|
|||||||
wantNvidiaSmi: true,
|
wantNvidiaSmi: true,
|
||||||
wantTegrastats: false,
|
wantTegrastats: false,
|
||||||
wantRocmSmi: false,
|
wantRocmSmi: false,
|
||||||
wantNvtop: false,
|
|
||||||
wantErr: false,
|
wantErr: false,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
name: "rocm-smi available",
|
name: "rocm-smi available",
|
||||||
setupCommands: func(tempDir string) error {
|
setupCommands: func() error {
|
||||||
path := filepath.Join(tempDir, "rocm-smi")
|
path := filepath.Join(tempDir, "rocm-smi")
|
||||||
script := `#!/bin/sh
|
script := `#!/bin/sh
|
||||||
echo "test"`
|
echo "test"`
|
||||||
@@ -1135,15 +1041,14 @@ echo "test"`
|
|||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
},
|
},
|
||||||
wantNvidiaSmi: false,
|
wantNvidiaSmi: true,
|
||||||
wantRocmSmi: true,
|
wantRocmSmi: true,
|
||||||
wantTegrastats: false,
|
wantTegrastats: false,
|
||||||
wantNvtop: false,
|
|
||||||
wantErr: false,
|
wantErr: false,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
name: "tegrastats available",
|
name: "tegrastats available",
|
||||||
setupCommands: func(tempDir string) error {
|
setupCommands: func() error {
|
||||||
path := filepath.Join(tempDir, "tegrastats")
|
path := filepath.Join(tempDir, "tegrastats")
|
||||||
script := `#!/bin/sh
|
script := `#!/bin/sh
|
||||||
echo "test"`
|
echo "test"`
|
||||||
@@ -1153,31 +1058,13 @@ echo "test"`
|
|||||||
return nil
|
return nil
|
||||||
},
|
},
|
||||||
wantNvidiaSmi: false,
|
wantNvidiaSmi: false,
|
||||||
wantRocmSmi: false,
|
wantRocmSmi: true,
|
||||||
wantTegrastats: true,
|
wantTegrastats: true,
|
||||||
wantNvtop: false,
|
|
||||||
wantErr: false,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "nvtop available",
|
|
||||||
setupCommands: func(tempDir string) error {
|
|
||||||
path := filepath.Join(tempDir, "nvtop")
|
|
||||||
script := `#!/bin/sh
|
|
||||||
echo "[]"`
|
|
||||||
if err := os.WriteFile(path, []byte(script), 0755); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
},
|
|
||||||
wantNvidiaSmi: false,
|
|
||||||
wantRocmSmi: false,
|
|
||||||
wantTegrastats: false,
|
|
||||||
wantNvtop: true,
|
|
||||||
wantErr: false,
|
wantErr: false,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
name: "no gpu tools available",
|
name: "no gpu tools available",
|
||||||
setupCommands: func(_ string) error {
|
setupCommands: func() error {
|
||||||
os.Setenv("PATH", "")
|
os.Setenv("PATH", "")
|
||||||
return nil
|
return nil
|
||||||
},
|
},
|
||||||
@@ -1187,53 +1074,29 @@ echo "[]"`
|
|||||||
|
|
||||||
for _, tt := range tests {
|
for _, tt := range tests {
|
||||||
t.Run(tt.name, func(t *testing.T) {
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
tempDir := t.TempDir()
|
if err := tt.setupCommands(); err != nil {
|
||||||
os.Setenv("PATH", tempDir)
|
|
||||||
if err := tt.setupCommands(tempDir); err != nil {
|
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
gm := &GPUManager{}
|
gm := &GPUManager{}
|
||||||
caps := gm.discoverGpuCapabilities()
|
err := gm.detectGPUs()
|
||||||
var err error
|
|
||||||
if !hasAnyGpuCollector(caps) {
|
|
||||||
err = fmt.Errorf(noGPUFoundMsg)
|
|
||||||
}
|
|
||||||
priorities := gm.resolveLegacyCollectorPriority(caps)
|
|
||||||
hasPriority := func(source collectorSource) bool {
|
|
||||||
for _, s := range priorities {
|
|
||||||
if s == source {
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
gotNvidiaSmi := hasPriority(collectorSourceNvidiaSMI)
|
|
||||||
gotRocmSmi := hasPriority(collectorSourceRocmSMI)
|
|
||||||
gotTegrastats := caps.hasTegrastats
|
|
||||||
gotNvtop := caps.hasNvtop
|
|
||||||
|
|
||||||
t.Logf("nvidiaSmi: %v, rocmSmi: %v, tegrastats: %v", gotNvidiaSmi, gotRocmSmi, gotTegrastats)
|
t.Logf("nvidiaSmi: %v, rocmSmi: %v, tegrastats: %v", gm.nvidiaSmi, gm.rocmSmi, gm.tegrastats)
|
||||||
|
|
||||||
wantErr := tt.wantErr
|
if tt.wantErr {
|
||||||
if hasAmdSysfs && (tt.name == "nvidia-smi not available" || tt.name == "no gpu tools available") {
|
|
||||||
wantErr = false
|
|
||||||
}
|
|
||||||
if wantErr {
|
|
||||||
assert.Error(t, err)
|
assert.Error(t, err)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
assert.NoError(t, err)
|
assert.NoError(t, err)
|
||||||
assert.Equal(t, tt.wantNvidiaSmi, gotNvidiaSmi)
|
assert.Equal(t, tt.wantNvidiaSmi, gm.nvidiaSmi)
|
||||||
assert.Equal(t, tt.wantRocmSmi, gotRocmSmi)
|
assert.Equal(t, tt.wantRocmSmi, gm.rocmSmi)
|
||||||
assert.Equal(t, tt.wantTegrastats, gotTegrastats)
|
assert.Equal(t, tt.wantTegrastats, gm.tegrastats)
|
||||||
assert.Equal(t, tt.wantNvtop, gotNvtop)
|
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestCollectorStartHelpers(t *testing.T) {
|
func TestStartCollector(t *testing.T) {
|
||||||
// Save original PATH
|
// Save original PATH
|
||||||
origPath := os.Getenv("PATH")
|
origPath := os.Getenv("PATH")
|
||||||
defer os.Setenv("PATH", origPath)
|
defer os.Setenv("PATH", origPath)
|
||||||
@@ -1318,27 +1181,6 @@ echo "11-14-2024 22:54:33 RAM 1024/4096MB GR3D_FREQ 80% tj@70C VDD_GPU_SOC 1000m
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
|
||||||
name: "nvtop collector",
|
|
||||||
command: "nvtop",
|
|
||||||
setup: func(t *testing.T) error {
|
|
||||||
path := filepath.Join(dir, "nvtop")
|
|
||||||
script := `#!/bin/sh
|
|
||||||
echo '[{"device_name":"NVIDIA Test GPU","temp":"52C","power_draw":"31W","gpu_util":"37%","mem_total":"4294967296","mem_used":"536870912","processes":[]}]'`
|
|
||||||
if err := os.WriteFile(path, []byte(script), 0755); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
},
|
|
||||||
validate: func(t *testing.T, gm *GPUManager) {
|
|
||||||
gpu, exists := gm.GpuDataMap["n0"]
|
|
||||||
assert.True(t, exists)
|
|
||||||
if exists {
|
|
||||||
assert.Equal(t, "NVIDIA Test GPU", gpu.Name)
|
|
||||||
assert.Equal(t, 52.0, gpu.Temperature)
|
|
||||||
}
|
|
||||||
},
|
|
||||||
},
|
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, tt := range tests {
|
for _, tt := range tests {
|
||||||
@@ -1351,157 +1193,13 @@ echo '[{"device_name":"NVIDIA Test GPU","temp":"52C","power_draw":"31W","gpu_uti
|
|||||||
GpuDataMap: make(map[string]*system.GPUData),
|
GpuDataMap: make(map[string]*system.GPUData),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
switch tt.command {
|
tt.gm.startCollector(tt.command)
|
||||||
case nvidiaSmiCmd:
|
|
||||||
tt.gm.startNvidiaSmiCollector("4")
|
|
||||||
case rocmSmiCmd:
|
|
||||||
tt.gm.startRocmSmiCollector(4300 * time.Millisecond)
|
|
||||||
case tegraStatsCmd:
|
|
||||||
tt.gm.startTegraStatsCollector("3700")
|
|
||||||
case nvtopCmd:
|
|
||||||
tt.gm.startNvtopCollector("30", nil)
|
|
||||||
default:
|
|
||||||
t.Fatalf("unknown test command %q", tt.command)
|
|
||||||
}
|
|
||||||
time.Sleep(50 * time.Millisecond) // Give collector time to run
|
time.Sleep(50 * time.Millisecond) // Give collector time to run
|
||||||
tt.validate(t, tt.gm)
|
tt.validate(t, tt.gm)
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestNewGPUManagerPriorityNvtopFallback(t *testing.T) {
|
|
||||||
origPath := os.Getenv("PATH")
|
|
||||||
defer os.Setenv("PATH", origPath)
|
|
||||||
|
|
||||||
dir := t.TempDir()
|
|
||||||
os.Setenv("PATH", dir)
|
|
||||||
t.Setenv("BESZEL_AGENT_GPU_COLLECTOR", "nvtop,nvidia-smi")
|
|
||||||
|
|
||||||
nvtopPath := filepath.Join(dir, "nvtop")
|
|
||||||
nvtopScript := `#!/bin/sh
|
|
||||||
echo 'not-json'`
|
|
||||||
require.NoError(t, os.WriteFile(nvtopPath, []byte(nvtopScript), 0755))
|
|
||||||
|
|
||||||
nvidiaPath := filepath.Join(dir, "nvidia-smi")
|
|
||||||
nvidiaScript := `#!/bin/sh
|
|
||||||
echo "0, NVIDIA Priority GPU, 45, 512, 2048, 12, 25"`
|
|
||||||
require.NoError(t, os.WriteFile(nvidiaPath, []byte(nvidiaScript), 0755))
|
|
||||||
|
|
||||||
gm, err := NewGPUManager()
|
|
||||||
require.NoError(t, err)
|
|
||||||
require.NotNil(t, gm)
|
|
||||||
|
|
||||||
time.Sleep(150 * time.Millisecond)
|
|
||||||
gpu, ok := gm.GpuDataMap["0"]
|
|
||||||
require.True(t, ok)
|
|
||||||
assert.Equal(t, "Priority GPU", gpu.Name)
|
|
||||||
assert.Equal(t, 45.0, gpu.Temperature)
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestNewGPUManagerPriorityMixedCollectors(t *testing.T) {
|
|
||||||
origPath := os.Getenv("PATH")
|
|
||||||
defer os.Setenv("PATH", origPath)
|
|
||||||
|
|
||||||
dir := t.TempDir()
|
|
||||||
os.Setenv("PATH", dir)
|
|
||||||
t.Setenv("BESZEL_AGENT_GPU_COLLECTOR", "intel_gpu_top,rocm-smi")
|
|
||||||
|
|
||||||
intelPath := filepath.Join(dir, "intel_gpu_top")
|
|
||||||
intelScript := `#!/bin/sh
|
|
||||||
echo "Freq MHz IRQ RC6 Power W IMC MiB/s RCS VCS"
|
|
||||||
echo " req act /s % gpu pkg rd wr % se wa % se wa"
|
|
||||||
echo "226 223 338 58 2.00 2.69 1820 965 0.00 0 0 0.00 0 0"
|
|
||||||
echo "189 187 412 67 1.80 2.45 1950 823 8.50 2 1 15.00 1 0"
|
|
||||||
`
|
|
||||||
require.NoError(t, os.WriteFile(intelPath, []byte(intelScript), 0755))
|
|
||||||
|
|
||||||
rocmPath := filepath.Join(dir, "rocm-smi")
|
|
||||||
rocmScript := `#!/bin/sh
|
|
||||||
echo '{"card0": {"Temperature (Sensor edge) (C)": "49.0", "Current Socket Graphics Package Power (W)": "28.159", "GPU use (%)": "0", "VRAM Total Memory (B)": "536870912", "VRAM Total Used Memory (B)": "445550592", "Card Series": "Rembrandt [Radeon 680M]", "GUID": "34756"}}'
|
|
||||||
`
|
|
||||||
require.NoError(t, os.WriteFile(rocmPath, []byte(rocmScript), 0755))
|
|
||||||
|
|
||||||
gm, err := NewGPUManager()
|
|
||||||
require.NoError(t, err)
|
|
||||||
require.NotNil(t, gm)
|
|
||||||
|
|
||||||
time.Sleep(150 * time.Millisecond)
|
|
||||||
_, intelOk := gm.GpuDataMap["i0"]
|
|
||||||
_, amdOk := gm.GpuDataMap["34756"]
|
|
||||||
assert.True(t, intelOk)
|
|
||||||
assert.True(t, amdOk)
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestNewGPUManagerPriorityNvmlFallbackToNvidiaSmi(t *testing.T) {
|
|
||||||
origPath := os.Getenv("PATH")
|
|
||||||
defer os.Setenv("PATH", origPath)
|
|
||||||
|
|
||||||
dir := t.TempDir()
|
|
||||||
os.Setenv("PATH", dir)
|
|
||||||
t.Setenv("BESZEL_AGENT_GPU_COLLECTOR", "nvml,nvidia-smi")
|
|
||||||
|
|
||||||
nvidiaPath := filepath.Join(dir, "nvidia-smi")
|
|
||||||
nvidiaScript := `#!/bin/sh
|
|
||||||
echo "0, NVIDIA Fallback GPU, 41, 256, 1024, 8, 14"`
|
|
||||||
require.NoError(t, os.WriteFile(nvidiaPath, []byte(nvidiaScript), 0755))
|
|
||||||
|
|
||||||
gm, err := NewGPUManager()
|
|
||||||
require.NoError(t, err)
|
|
||||||
require.NotNil(t, gm)
|
|
||||||
|
|
||||||
time.Sleep(150 * time.Millisecond)
|
|
||||||
gpu, ok := gm.GpuDataMap["0"]
|
|
||||||
require.True(t, ok)
|
|
||||||
assert.Equal(t, "Fallback GPU", gpu.Name)
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestNewGPUManagerConfiguredCollectorsMustStart(t *testing.T) {
|
|
||||||
origPath := os.Getenv("PATH")
|
|
||||||
defer os.Setenv("PATH", origPath)
|
|
||||||
|
|
||||||
dir := t.TempDir()
|
|
||||||
os.Setenv("PATH", dir)
|
|
||||||
|
|
||||||
t.Run("configured valid collector unavailable", func(t *testing.T) {
|
|
||||||
t.Setenv("BESZEL_AGENT_GPU_COLLECTOR", "nvidia-smi")
|
|
||||||
gm, err := NewGPUManager()
|
|
||||||
require.Nil(t, gm)
|
|
||||||
require.Error(t, err)
|
|
||||||
assert.Contains(t, err.Error(), "no configured GPU collectors are available")
|
|
||||||
})
|
|
||||||
|
|
||||||
t.Run("configured collector list has only unknown entries", func(t *testing.T) {
|
|
||||||
t.Setenv("BESZEL_AGENT_GPU_COLLECTOR", "bad,unknown")
|
|
||||||
gm, err := NewGPUManager()
|
|
||||||
require.Nil(t, gm)
|
|
||||||
require.Error(t, err)
|
|
||||||
assert.Contains(t, err.Error(), "no configured GPU collectors are available")
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestNewGPUManagerJetsonIgnoresCollectorConfig(t *testing.T) {
|
|
||||||
origPath := os.Getenv("PATH")
|
|
||||||
defer os.Setenv("PATH", origPath)
|
|
||||||
|
|
||||||
dir := t.TempDir()
|
|
||||||
os.Setenv("PATH", dir)
|
|
||||||
t.Setenv("BESZEL_AGENT_GPU_COLLECTOR", "nvidia-smi")
|
|
||||||
|
|
||||||
tegraPath := filepath.Join(dir, "tegrastats")
|
|
||||||
tegraScript := `#!/bin/sh
|
|
||||||
echo "11-14-2024 22:54:33 RAM 1024/4096MB GR3D_FREQ 80% tj@70C VDD_GPU_SOC 1000mW"`
|
|
||||||
require.NoError(t, os.WriteFile(tegraPath, []byte(tegraScript), 0755))
|
|
||||||
|
|
||||||
gm, err := NewGPUManager()
|
|
||||||
require.NoError(t, err)
|
|
||||||
require.NotNil(t, gm)
|
|
||||||
|
|
||||||
time.Sleep(100 * time.Millisecond)
|
|
||||||
gpu, ok := gm.GpuDataMap["0"]
|
|
||||||
require.True(t, ok)
|
|
||||||
assert.Equal(t, "GPU", gpu.Name)
|
|
||||||
}
|
|
||||||
|
|
||||||
// TestAccumulationTableDriven tests the accumulation behavior for all three GPU types
|
// TestAccumulationTableDriven tests the accumulation behavior for all three GPU types
|
||||||
func TestAccumulation(t *testing.T) {
|
func TestAccumulation(t *testing.T) {
|
||||||
type expectedGPUValues struct {
|
type expectedGPUValues struct {
|
||||||
|
|||||||
@@ -28,7 +28,7 @@ type SmartManager struct {
|
|||||||
SmartDevices []*DeviceInfo
|
SmartDevices []*DeviceInfo
|
||||||
refreshMutex sync.Mutex
|
refreshMutex sync.Mutex
|
||||||
lastScanTime time.Time
|
lastScanTime time.Time
|
||||||
smartctlPath string
|
binPath string
|
||||||
excludedDevices map[string]struct{}
|
excludedDevices map[string]struct{}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -170,18 +170,18 @@ func (sm *SmartManager) ScanDevices(force bool) error {
|
|||||||
configuredDevices = parsedDevices
|
configuredDevices = parsedDevices
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
|
||||||
|
defer cancel()
|
||||||
|
|
||||||
|
cmd := exec.CommandContext(ctx, sm.binPath, "--scan", "-j")
|
||||||
|
output, err := cmd.Output()
|
||||||
|
|
||||||
var (
|
var (
|
||||||
scanErr error
|
scanErr error
|
||||||
scannedDevices []*DeviceInfo
|
scannedDevices []*DeviceInfo
|
||||||
hasValidScan bool
|
hasValidScan bool
|
||||||
)
|
)
|
||||||
|
|
||||||
if sm.smartctlPath != "" {
|
|
||||||
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
|
|
||||||
defer cancel()
|
|
||||||
|
|
||||||
cmd := exec.CommandContext(ctx, sm.smartctlPath, "--scan", "-j")
|
|
||||||
output, err := cmd.Output()
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
scanErr = err
|
scanErr = err
|
||||||
} else {
|
} else {
|
||||||
@@ -190,14 +190,6 @@ func (sm *SmartManager) ScanDevices(force bool) error {
|
|||||||
scanErr = errNoValidSmartData
|
scanErr = errNoValidSmartData
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
// Add eMMC devices (Linux only) by reading sysfs health fields. This does not
|
|
||||||
// require smartctl and does not scan the whole device.
|
|
||||||
if emmcDevices := scanEmmcDevices(); len(emmcDevices) > 0 {
|
|
||||||
scannedDevices = append(scannedDevices, emmcDevices...)
|
|
||||||
hasValidScan = true
|
|
||||||
}
|
|
||||||
|
|
||||||
finalDevices := mergeDeviceLists(currentDevices, scannedDevices, configuredDevices)
|
finalDevices := mergeDeviceLists(currentDevices, scannedDevices, configuredDevices)
|
||||||
finalDevices = sm.filterExcludedDevices(finalDevices)
|
finalDevices = sm.filterExcludedDevices(finalDevices)
|
||||||
@@ -450,18 +442,6 @@ func (sm *SmartManager) CollectSmart(deviceInfo *DeviceInfo) error {
|
|||||||
return errNoValidSmartData
|
return errNoValidSmartData
|
||||||
}
|
}
|
||||||
|
|
||||||
// eMMC health is not exposed via SMART on Linux, but the kernel provides
|
|
||||||
// wear / EOL indicators via sysfs. Prefer that path when available.
|
|
||||||
if deviceInfo != nil {
|
|
||||||
if ok, err := sm.collectEmmcHealth(deviceInfo); ok {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if sm.smartctlPath == "" {
|
|
||||||
return errNoValidSmartData
|
|
||||||
}
|
|
||||||
|
|
||||||
// slog.Info("collecting SMART data", "device", deviceInfo.Name, "type", deviceInfo.Type, "has_existing_data", sm.hasDataForDevice(deviceInfo.Name))
|
// slog.Info("collecting SMART data", "device", deviceInfo.Name, "type", deviceInfo.Type, "has_existing_data", sm.hasDataForDevice(deviceInfo.Name))
|
||||||
|
|
||||||
// Check if we have any existing data for this device
|
// Check if we have any existing data for this device
|
||||||
@@ -472,7 +452,7 @@ func (sm *SmartManager) CollectSmart(deviceInfo *DeviceInfo) error {
|
|||||||
|
|
||||||
// Try with -n standby first if we have existing data
|
// Try with -n standby first if we have existing data
|
||||||
args := sm.smartctlArgs(deviceInfo, hasExistingData)
|
args := sm.smartctlArgs(deviceInfo, hasExistingData)
|
||||||
cmd := exec.CommandContext(ctx, sm.smartctlPath, args...)
|
cmd := exec.CommandContext(ctx, sm.binPath, args...)
|
||||||
output, err := cmd.CombinedOutput()
|
output, err := cmd.CombinedOutput()
|
||||||
|
|
||||||
// Check if device is in standby (exit status 2)
|
// Check if device is in standby (exit status 2)
|
||||||
@@ -485,7 +465,7 @@ func (sm *SmartManager) CollectSmart(deviceInfo *DeviceInfo) error {
|
|||||||
ctx2, cancel2 := context.WithTimeout(context.Background(), 15*time.Second)
|
ctx2, cancel2 := context.WithTimeout(context.Background(), 15*time.Second)
|
||||||
defer cancel2()
|
defer cancel2()
|
||||||
args = sm.smartctlArgs(deviceInfo, false)
|
args = sm.smartctlArgs(deviceInfo, false)
|
||||||
cmd = exec.CommandContext(ctx2, sm.smartctlPath, args...)
|
cmd = exec.CommandContext(ctx2, sm.binPath, args...)
|
||||||
output, err = cmd.CombinedOutput()
|
output, err = cmd.CombinedOutput()
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -502,7 +482,7 @@ func (sm *SmartManager) CollectSmart(deviceInfo *DeviceInfo) error {
|
|||||||
ctx3, cancel3 := context.WithTimeout(context.Background(), 15*time.Second)
|
ctx3, cancel3 := context.WithTimeout(context.Background(), 15*time.Second)
|
||||||
defer cancel3()
|
defer cancel3()
|
||||||
args = sm.smartctlArgs(deviceInfo, false)
|
args = sm.smartctlArgs(deviceInfo, false)
|
||||||
cmd = exec.CommandContext(ctx3, sm.smartctlPath, args...)
|
cmd = exec.CommandContext(ctx3, sm.binPath, args...)
|
||||||
output, err = cmd.CombinedOutput()
|
output, err = cmd.CombinedOutput()
|
||||||
hasValidData = sm.parseSmartOutput(deviceInfo, output)
|
hasValidData = sm.parseSmartOutput(deviceInfo, output)
|
||||||
|
|
||||||
@@ -1143,15 +1123,10 @@ func NewSmartManager() (*SmartManager, error) {
|
|||||||
}
|
}
|
||||||
sm.refreshExcludedDevices()
|
sm.refreshExcludedDevices()
|
||||||
path, err := sm.detectSmartctl()
|
path, err := sm.detectSmartctl()
|
||||||
slog.Debug("smartctl", "path", path, "err", err)
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
// Keep the previous fail-fast behavior unless this Linux host exposes
|
|
||||||
// eMMC health via sysfs, in which case smartctl is optional.
|
|
||||||
if runtime.GOOS == "linux" && len(scanEmmcDevices()) > 0 {
|
|
||||||
return sm, nil
|
|
||||||
}
|
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
sm.smartctlPath = path
|
slog.Debug("smartctl", "path", path)
|
||||||
|
sm.binPath = path
|
||||||
return sm, nil
|
return sm, nil
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,700 +0,0 @@
|
|||||||
# List of AMDGPU IDs
|
|
||||||
#
|
|
||||||
# Syntax:
|
|
||||||
# device_id, revision_id, product_name <-- single tab after comma
|
|
||||||
|
|
||||||
1.0.0
|
|
||||||
1114, C2, AMD Radeon 860M Graphics
|
|
||||||
1114, C3, AMD Radeon 840M Graphics
|
|
||||||
1114, D2, AMD Radeon 860M Graphics
|
|
||||||
1114, D3, AMD Radeon 840M Graphics
|
|
||||||
1309, 00, AMD Radeon R7 Graphics
|
|
||||||
130A, 00, AMD Radeon R6 Graphics
|
|
||||||
130B, 00, AMD Radeon R4 Graphics
|
|
||||||
130C, 00, AMD Radeon R7 Graphics
|
|
||||||
130D, 00, AMD Radeon R6 Graphics
|
|
||||||
130E, 00, AMD Radeon R5 Graphics
|
|
||||||
130F, 00, AMD Radeon R7 Graphics
|
|
||||||
130F, D4, AMD Radeon R7 Graphics
|
|
||||||
130F, D5, AMD Radeon R7 Graphics
|
|
||||||
130F, D6, AMD Radeon R7 Graphics
|
|
||||||
130F, D7, AMD Radeon R7 Graphics
|
|
||||||
1313, 00, AMD Radeon R7 Graphics
|
|
||||||
1313, D4, AMD Radeon R7 Graphics
|
|
||||||
1313, D5, AMD Radeon R7 Graphics
|
|
||||||
1313, D6, AMD Radeon R7 Graphics
|
|
||||||
1315, 00, AMD Radeon R5 Graphics
|
|
||||||
1315, D4, AMD Radeon R5 Graphics
|
|
||||||
1315, D5, AMD Radeon R5 Graphics
|
|
||||||
1315, D6, AMD Radeon R5 Graphics
|
|
||||||
1315, D7, AMD Radeon R5 Graphics
|
|
||||||
1316, 00, AMD Radeon R5 Graphics
|
|
||||||
1318, 00, AMD Radeon R5 Graphics
|
|
||||||
131B, 00, AMD Radeon R4 Graphics
|
|
||||||
131C, 00, AMD Radeon R7 Graphics
|
|
||||||
131D, 00, AMD Radeon R6 Graphics
|
|
||||||
1435, AE, AMD Custom GPU 0932
|
|
||||||
1506, C1, AMD Radeon 610M
|
|
||||||
1506, C2, AMD Radeon 610M
|
|
||||||
1506, C3, AMD Radeon 610M
|
|
||||||
1506, C4, AMD Radeon 610M
|
|
||||||
150E, C1, AMD Radeon 890M Graphics
|
|
||||||
150E, C4, AMD Radeon 890M Graphics
|
|
||||||
150E, C5, AMD Radeon 890M Graphics
|
|
||||||
150E, C6, AMD Radeon 890M Graphics
|
|
||||||
150E, D1, AMD Radeon 890M Graphics
|
|
||||||
150E, D2, AMD Radeon 890M Graphics
|
|
||||||
150E, D3, AMD Radeon 890M Graphics
|
|
||||||
1586, C1, Radeon 8060S Graphics
|
|
||||||
1586, C2, Radeon 8050S Graphics
|
|
||||||
1586, C4, Radeon 8050S Graphics
|
|
||||||
1586, D1, Radeon 8060S Graphics
|
|
||||||
1586, D2, Radeon 8050S Graphics
|
|
||||||
1586, D4, Radeon 8050S Graphics
|
|
||||||
1586, D5, Radeon 8040S Graphics
|
|
||||||
15BF, 00, AMD Radeon 780M Graphics
|
|
||||||
15BF, 01, AMD Radeon 760M Graphics
|
|
||||||
15BF, 02, AMD Radeon 780M Graphics
|
|
||||||
15BF, 03, AMD Radeon 760M Graphics
|
|
||||||
15BF, C1, AMD Radeon 780M Graphics
|
|
||||||
15BF, C2, AMD Radeon 780M Graphics
|
|
||||||
15BF, C3, AMD Radeon 760M Graphics
|
|
||||||
15BF, C4, AMD Radeon 780M Graphics
|
|
||||||
15BF, C5, AMD Radeon 740M Graphics
|
|
||||||
15BF, C6, AMD Radeon 780M Graphics
|
|
||||||
15BF, C7, AMD Radeon 780M Graphics
|
|
||||||
15BF, C8, AMD Radeon 760M Graphics
|
|
||||||
15BF, C9, AMD Radeon 780M Graphics
|
|
||||||
15BF, CA, AMD Radeon 740M Graphics
|
|
||||||
15BF, CB, AMD Radeon 760M Graphics
|
|
||||||
15BF, CC, AMD Radeon 740M Graphics
|
|
||||||
15BF, CD, AMD Radeon 760M Graphics
|
|
||||||
15BF, CF, AMD Radeon 780M Graphics
|
|
||||||
15BF, D0, AMD Radeon 780M Graphics
|
|
||||||
15BF, D1, AMD Radeon 780M Graphics
|
|
||||||
15BF, D2, AMD Radeon 780M Graphics
|
|
||||||
15BF, D3, AMD Radeon 780M Graphics
|
|
||||||
15BF, D4, AMD Radeon 780M Graphics
|
|
||||||
15BF, D5, AMD Radeon 760M Graphics
|
|
||||||
15BF, D6, AMD Radeon 760M Graphics
|
|
||||||
15BF, D7, AMD Radeon 780M Graphics
|
|
||||||
15BF, D8, AMD Radeon 740M Graphics
|
|
||||||
15BF, D9, AMD Radeon 780M Graphics
|
|
||||||
15BF, DA, AMD Radeon 780M Graphics
|
|
||||||
15BF, DB, AMD Radeon 760M Graphics
|
|
||||||
15BF, DC, AMD Radeon 760M Graphics
|
|
||||||
15BF, DD, AMD Radeon 780M Graphics
|
|
||||||
15BF, DE, AMD Radeon 740M Graphics
|
|
||||||
15BF, DF, AMD Radeon 760M Graphics
|
|
||||||
15BF, F0, AMD Radeon 760M Graphics
|
|
||||||
15C8, C1, AMD Radeon 740M Graphics
|
|
||||||
15C8, C2, AMD Radeon 740M Graphics
|
|
||||||
15C8, C3, AMD Radeon 740M Graphics
|
|
||||||
15C8, C4, AMD Radeon 740M Graphics
|
|
||||||
15C8, D1, AMD Radeon 740M Graphics
|
|
||||||
15C8, D2, AMD Radeon 740M Graphics
|
|
||||||
15C8, D3, AMD Radeon 740M Graphics
|
|
||||||
15C8, D4, AMD Radeon 740M Graphics
|
|
||||||
15D8, 00, AMD Radeon RX Vega 8 Graphics WS
|
|
||||||
15D8, 91, AMD Radeon Vega 3 Graphics
|
|
||||||
15D8, 91, AMD Ryzen Embedded R1606G with Radeon Vega Gfx
|
|
||||||
15D8, 92, AMD Radeon Vega 3 Graphics
|
|
||||||
15D8, 92, AMD Ryzen Embedded R1505G with Radeon Vega Gfx
|
|
||||||
15D8, 93, AMD Radeon Vega 1 Graphics
|
|
||||||
15D8, A1, AMD Radeon Vega 10 Graphics
|
|
||||||
15D8, A2, AMD Radeon Vega 8 Graphics
|
|
||||||
15D8, A3, AMD Radeon Vega 6 Graphics
|
|
||||||
15D8, A4, AMD Radeon Vega 3 Graphics
|
|
||||||
15D8, B1, AMD Radeon Vega 10 Graphics
|
|
||||||
15D8, B2, AMD Radeon Vega 8 Graphics
|
|
||||||
15D8, B3, AMD Radeon Vega 6 Graphics
|
|
||||||
15D8, B4, AMD Radeon Vega 3 Graphics
|
|
||||||
15D8, C1, AMD Radeon Vega 10 Graphics
|
|
||||||
15D8, C2, AMD Radeon Vega 8 Graphics
|
|
||||||
15D8, C3, AMD Radeon Vega 6 Graphics
|
|
||||||
15D8, C4, AMD Radeon Vega 3 Graphics
|
|
||||||
15D8, C5, AMD Radeon Vega 3 Graphics
|
|
||||||
15D8, C8, AMD Radeon Vega 11 Graphics
|
|
||||||
15D8, C9, AMD Radeon Vega 8 Graphics
|
|
||||||
15D8, CA, AMD Radeon Vega 11 Graphics
|
|
||||||
15D8, CB, AMD Radeon Vega 8 Graphics
|
|
||||||
15D8, CC, AMD Radeon Vega 3 Graphics
|
|
||||||
15D8, CE, AMD Radeon Vega 3 Graphics
|
|
||||||
15D8, CF, AMD Ryzen Embedded R1305G with Radeon Vega Gfx
|
|
||||||
15D8, D1, AMD Radeon Vega 10 Graphics
|
|
||||||
15D8, D2, AMD Radeon Vega 8 Graphics
|
|
||||||
15D8, D3, AMD Radeon Vega 6 Graphics
|
|
||||||
15D8, D4, AMD Radeon Vega 3 Graphics
|
|
||||||
15D8, D8, AMD Radeon Vega 11 Graphics
|
|
||||||
15D8, D9, AMD Radeon Vega 8 Graphics
|
|
||||||
15D8, DA, AMD Radeon Vega 11 Graphics
|
|
||||||
15D8, DB, AMD Radeon Vega 3 Graphics
|
|
||||||
15D8, DB, AMD Radeon Vega 8 Graphics
|
|
||||||
15D8, DC, AMD Radeon Vega 3 Graphics
|
|
||||||
15D8, DD, AMD Radeon Vega 3 Graphics
|
|
||||||
15D8, DE, AMD Radeon Vega 3 Graphics
|
|
||||||
15D8, DF, AMD Radeon Vega 3 Graphics
|
|
||||||
15D8, E3, AMD Radeon Vega 3 Graphics
|
|
||||||
15D8, E4, AMD Ryzen Embedded R1102G with Radeon Vega Gfx
|
|
||||||
15DD, 81, AMD Ryzen Embedded V1807B with Radeon Vega Gfx
|
|
||||||
15DD, 82, AMD Ryzen Embedded V1756B with Radeon Vega Gfx
|
|
||||||
15DD, 83, AMD Ryzen Embedded V1605B with Radeon Vega Gfx
|
|
||||||
15DD, 84, AMD Radeon Vega 6 Graphics
|
|
||||||
15DD, 85, AMD Ryzen Embedded V1202B with Radeon Vega Gfx
|
|
||||||
15DD, 86, AMD Radeon Vega 11 Graphics
|
|
||||||
15DD, 88, AMD Radeon Vega 8 Graphics
|
|
||||||
15DD, C1, AMD Radeon Vega 11 Graphics
|
|
||||||
15DD, C2, AMD Radeon Vega 8 Graphics
|
|
||||||
15DD, C3, AMD Radeon Vega 3 / 10 Graphics
|
|
||||||
15DD, C4, AMD Radeon Vega 8 Graphics
|
|
||||||
15DD, C5, AMD Radeon Vega 3 Graphics
|
|
||||||
15DD, C6, AMD Radeon Vega 11 Graphics
|
|
||||||
15DD, C8, AMD Radeon Vega 8 Graphics
|
|
||||||
15DD, C9, AMD Radeon Vega 11 Graphics
|
|
||||||
15DD, CA, AMD Radeon Vega 8 Graphics
|
|
||||||
15DD, CB, AMD Radeon Vega 3 Graphics
|
|
||||||
15DD, CC, AMD Radeon Vega 6 Graphics
|
|
||||||
15DD, CE, AMD Radeon Vega 3 Graphics
|
|
||||||
15DD, CF, AMD Radeon Vega 3 Graphics
|
|
||||||
15DD, D0, AMD Radeon Vega 10 Graphics
|
|
||||||
15DD, D1, AMD Radeon Vega 8 Graphics
|
|
||||||
15DD, D3, AMD Radeon Vega 11 Graphics
|
|
||||||
15DD, D5, AMD Radeon Vega 8 Graphics
|
|
||||||
15DD, D6, AMD Radeon Vega 11 Graphics
|
|
||||||
15DD, D7, AMD Radeon Vega 8 Graphics
|
|
||||||
15DD, D8, AMD Radeon Vega 3 Graphics
|
|
||||||
15DD, D9, AMD Radeon Vega 6 Graphics
|
|
||||||
15DD, E1, AMD Radeon Vega 3 Graphics
|
|
||||||
15DD, E2, AMD Radeon Vega 3 Graphics
|
|
||||||
163F, AE, AMD Custom GPU 0405
|
|
||||||
163F, E1, AMD Custom GPU 0405
|
|
||||||
164E, D8, AMD Radeon 610M
|
|
||||||
164E, D9, AMD Radeon 610M
|
|
||||||
164E, DA, AMD Radeon 610M
|
|
||||||
164E, DB, AMD Radeon 610M
|
|
||||||
164E, DC, AMD Radeon 610M
|
|
||||||
1681, 06, AMD Radeon 680M
|
|
||||||
1681, 07, AMD Radeon 660M
|
|
||||||
1681, 0A, AMD Radeon 680M
|
|
||||||
1681, 0B, AMD Radeon 660M
|
|
||||||
1681, C7, AMD Radeon 680M
|
|
||||||
1681, C8, AMD Radeon 680M
|
|
||||||
1681, C9, AMD Radeon 660M
|
|
||||||
1900, 01, AMD Radeon 780M Graphics
|
|
||||||
1900, 02, AMD Radeon 760M Graphics
|
|
||||||
1900, 03, AMD Radeon 780M Graphics
|
|
||||||
1900, 04, AMD Radeon 760M Graphics
|
|
||||||
1900, 05, AMD Radeon 780M Graphics
|
|
||||||
1900, 06, AMD Radeon 780M Graphics
|
|
||||||
1900, 07, AMD Radeon 760M Graphics
|
|
||||||
1900, B0, AMD Radeon 780M Graphics
|
|
||||||
1900, B1, AMD Radeon 780M Graphics
|
|
||||||
1900, B2, AMD Radeon 780M Graphics
|
|
||||||
1900, B3, AMD Radeon 780M Graphics
|
|
||||||
1900, B4, AMD Radeon 780M Graphics
|
|
||||||
1900, B5, AMD Radeon 780M Graphics
|
|
||||||
1900, B6, AMD Radeon 780M Graphics
|
|
||||||
1900, B7, AMD Radeon 760M Graphics
|
|
||||||
1900, B8, AMD Radeon 760M Graphics
|
|
||||||
1900, B9, AMD Radeon 780M Graphics
|
|
||||||
1900, BA, AMD Radeon 780M Graphics
|
|
||||||
1900, BB, AMD Radeon 780M Graphics
|
|
||||||
1900, C0, AMD Radeon 780M Graphics
|
|
||||||
1900, C1, AMD Radeon 760M Graphics
|
|
||||||
1900, C2, AMD Radeon 780M Graphics
|
|
||||||
1900, C3, AMD Radeon 760M Graphics
|
|
||||||
1900, C4, AMD Radeon 780M Graphics
|
|
||||||
1900, C5, AMD Radeon 780M Graphics
|
|
||||||
1900, C6, AMD Radeon 760M Graphics
|
|
||||||
1900, C7, AMD Radeon 780M Graphics
|
|
||||||
1900, C8, AMD Radeon 760M Graphics
|
|
||||||
1900, C9, AMD Radeon 780M Graphics
|
|
||||||
1900, CA, AMD Radeon 760M Graphics
|
|
||||||
1900, CB, AMD Radeon 780M Graphics
|
|
||||||
1900, CC, AMD Radeon 780M Graphics
|
|
||||||
1900, CD, AMD Radeon 760M Graphics
|
|
||||||
1900, CE, AMD Radeon 780M Graphics
|
|
||||||
1900, CF, AMD Radeon 760M Graphics
|
|
||||||
1900, D0, AMD Radeon 780M Graphics
|
|
||||||
1900, D1, AMD Radeon 760M Graphics
|
|
||||||
1900, D2, AMD Radeon 780M Graphics
|
|
||||||
1900, D3, AMD Radeon 760M Graphics
|
|
||||||
1900, D4, AMD Radeon 780M Graphics
|
|
||||||
1900, D5, AMD Radeon 780M Graphics
|
|
||||||
1900, D6, AMD Radeon 760M Graphics
|
|
||||||
1900, D7, AMD Radeon 780M Graphics
|
|
||||||
1900, D8, AMD Radeon 760M Graphics
|
|
||||||
1900, D9, AMD Radeon 780M Graphics
|
|
||||||
1900, DA, AMD Radeon 760M Graphics
|
|
||||||
1900, DB, AMD Radeon 780M Graphics
|
|
||||||
1900, DC, AMD Radeon 780M Graphics
|
|
||||||
1900, DD, AMD Radeon 760M Graphics
|
|
||||||
1900, DE, AMD Radeon 780M Graphics
|
|
||||||
1900, DF, AMD Radeon 760M Graphics
|
|
||||||
1900, F0, AMD Radeon 780M Graphics
|
|
||||||
1900, F1, AMD Radeon 780M Graphics
|
|
||||||
1900, F2, AMD Radeon 780M Graphics
|
|
||||||
1901, C1, AMD Radeon 740M Graphics
|
|
||||||
1901, C2, AMD Radeon 740M Graphics
|
|
||||||
1901, C3, AMD Radeon 740M Graphics
|
|
||||||
1901, C6, AMD Radeon 740M Graphics
|
|
||||||
1901, C7, AMD Radeon 740M Graphics
|
|
||||||
1901, C8, AMD Radeon 740M Graphics
|
|
||||||
1901, C9, AMD Radeon 740M Graphics
|
|
||||||
1901, CA, AMD Radeon 740M Graphics
|
|
||||||
1901, D1, AMD Radeon 740M Graphics
|
|
||||||
1901, D2, AMD Radeon 740M Graphics
|
|
||||||
1901, D3, AMD Radeon 740M Graphics
|
|
||||||
1901, D4, AMD Radeon 740M Graphics
|
|
||||||
1901, D5, AMD Radeon 740M Graphics
|
|
||||||
1901, D6, AMD Radeon 740M Graphics
|
|
||||||
1901, D7, AMD Radeon 740M Graphics
|
|
||||||
1901, D8, AMD Radeon 740M Graphics
|
|
||||||
6600, 00, AMD Radeon HD 8600 / 8700M
|
|
||||||
6600, 81, AMD Radeon R7 M370
|
|
||||||
6601, 00, AMD Radeon HD 8500M / 8700M
|
|
||||||
6604, 00, AMD Radeon R7 M265 Series
|
|
||||||
6604, 81, AMD Radeon R7 M350
|
|
||||||
6605, 00, AMD Radeon R7 M260 Series
|
|
||||||
6605, 81, AMD Radeon R7 M340
|
|
||||||
6606, 00, AMD Radeon HD 8790M
|
|
||||||
6607, 00, AMD Radeon R5 M240
|
|
||||||
6608, 00, AMD FirePro W2100
|
|
||||||
6610, 00, AMD Radeon R7 200 Series
|
|
||||||
6610, 81, AMD Radeon R7 350
|
|
||||||
6610, 83, AMD Radeon R5 340
|
|
||||||
6610, 87, AMD Radeon R7 200 Series
|
|
||||||
6611, 00, AMD Radeon R7 200 Series
|
|
||||||
6611, 87, AMD Radeon R7 200 Series
|
|
||||||
6613, 00, AMD Radeon R7 200 Series
|
|
||||||
6617, 00, AMD Radeon R7 240 Series
|
|
||||||
6617, 87, AMD Radeon R7 200 Series
|
|
||||||
6617, C7, AMD Radeon R7 240 Series
|
|
||||||
6640, 00, AMD Radeon HD 8950
|
|
||||||
6640, 80, AMD Radeon R9 M380
|
|
||||||
6646, 00, AMD Radeon R9 M280X
|
|
||||||
6646, 80, AMD Radeon R9 M385
|
|
||||||
6646, 80, AMD Radeon R9 M470X
|
|
||||||
6647, 00, AMD Radeon R9 M200X Series
|
|
||||||
6647, 80, AMD Radeon R9 M380
|
|
||||||
6649, 00, AMD FirePro W5100
|
|
||||||
6658, 00, AMD Radeon R7 200 Series
|
|
||||||
665C, 00, AMD Radeon HD 7700 Series
|
|
||||||
665D, 00, AMD Radeon R7 200 Series
|
|
||||||
665F, 81, AMD Radeon R7 360 Series
|
|
||||||
6660, 00, AMD Radeon HD 8600M Series
|
|
||||||
6660, 81, AMD Radeon R5 M335
|
|
||||||
6660, 83, AMD Radeon R5 M330
|
|
||||||
6663, 00, AMD Radeon HD 8500M Series
|
|
||||||
6663, 83, AMD Radeon R5 M320
|
|
||||||
6664, 00, AMD Radeon R5 M200 Series
|
|
||||||
6665, 00, AMD Radeon R5 M230 Series
|
|
||||||
6665, 83, AMD Radeon R5 M320
|
|
||||||
6665, C3, AMD Radeon R5 M435
|
|
||||||
6666, 00, AMD Radeon R5 M200 Series
|
|
||||||
6667, 00, AMD Radeon R5 M200 Series
|
|
||||||
666F, 00, AMD Radeon HD 8500M
|
|
||||||
66A1, 02, AMD Instinct MI60 / MI50
|
|
||||||
66A1, 06, AMD Radeon Pro VII
|
|
||||||
66AF, C1, AMD Radeon VII
|
|
||||||
6780, 00, AMD FirePro W9000
|
|
||||||
6784, 00, ATI FirePro V (FireGL V) Graphics Adapter
|
|
||||||
6788, 00, ATI FirePro V (FireGL V) Graphics Adapter
|
|
||||||
678A, 00, AMD FirePro W8000
|
|
||||||
6798, 00, AMD Radeon R9 200 / HD 7900 Series
|
|
||||||
6799, 00, AMD Radeon HD 7900 Series
|
|
||||||
679A, 00, AMD Radeon HD 7900 Series
|
|
||||||
679B, 00, AMD Radeon HD 7900 Series
|
|
||||||
679E, 00, AMD Radeon HD 7800 Series
|
|
||||||
67A0, 00, AMD Radeon FirePro W9100
|
|
||||||
67A1, 00, AMD Radeon FirePro W8100
|
|
||||||
67B0, 00, AMD Radeon R9 200 Series
|
|
||||||
67B0, 80, AMD Radeon R9 390 Series
|
|
||||||
67B1, 00, AMD Radeon R9 200 Series
|
|
||||||
67B1, 80, AMD Radeon R9 390 Series
|
|
||||||
67B9, 00, AMD Radeon R9 200 Series
|
|
||||||
67C0, 00, AMD Radeon Pro WX 7100 Graphics
|
|
||||||
67C0, 80, AMD Radeon E9550
|
|
||||||
67C2, 01, AMD Radeon Pro V7350x2
|
|
||||||
67C2, 02, AMD Radeon Pro V7300X
|
|
||||||
67C4, 00, AMD Radeon Pro WX 7100 Graphics
|
|
||||||
67C4, 80, AMD Radeon E9560 / E9565 Graphics
|
|
||||||
67C7, 00, AMD Radeon Pro WX 5100 Graphics
|
|
||||||
67C7, 80, AMD Radeon E9390 Graphics
|
|
||||||
67D0, 01, AMD Radeon Pro V7350x2
|
|
||||||
67D0, 02, AMD Radeon Pro V7300X
|
|
||||||
67DF, C0, AMD Radeon Pro 580X
|
|
||||||
67DF, C1, AMD Radeon RX 580 Series
|
|
||||||
67DF, C2, AMD Radeon RX 570 Series
|
|
||||||
67DF, C3, AMD Radeon RX 580 Series
|
|
||||||
67DF, C4, AMD Radeon RX 480 Graphics
|
|
||||||
67DF, C5, AMD Radeon RX 470 Graphics
|
|
||||||
67DF, C6, AMD Radeon RX 570 Series
|
|
||||||
67DF, C7, AMD Radeon RX 480 Graphics
|
|
||||||
67DF, CF, AMD Radeon RX 470 Graphics
|
|
||||||
67DF, D7, AMD Radeon RX 470 Graphics
|
|
||||||
67DF, E0, AMD Radeon RX 470 Series
|
|
||||||
67DF, E1, AMD Radeon RX 590 Series
|
|
||||||
67DF, E3, AMD Radeon RX Series
|
|
||||||
67DF, E7, AMD Radeon RX 580 Series
|
|
||||||
67DF, EB, AMD Radeon Pro 580X
|
|
||||||
67DF, EF, AMD Radeon RX 570 Series
|
|
||||||
67DF, F7, AMD Radeon RX P30PH
|
|
||||||
67DF, FF, AMD Radeon RX 470 Series
|
|
||||||
67E0, 00, AMD Radeon Pro WX Series
|
|
||||||
67E3, 00, AMD Radeon Pro WX 4100
|
|
||||||
67E8, 00, AMD Radeon Pro WX Series
|
|
||||||
67E8, 01, AMD Radeon Pro WX Series
|
|
||||||
67E8, 80, AMD Radeon E9260 Graphics
|
|
||||||
67EB, 00, AMD Radeon Pro V5300X
|
|
||||||
67EF, C0, AMD Radeon RX Graphics
|
|
||||||
67EF, C1, AMD Radeon RX 460 Graphics
|
|
||||||
67EF, C2, AMD Radeon Pro Series
|
|
||||||
67EF, C3, AMD Radeon RX Series
|
|
||||||
67EF, C5, AMD Radeon RX 460 Graphics
|
|
||||||
67EF, C7, AMD Radeon RX Graphics
|
|
||||||
67EF, CF, AMD Radeon RX 460 Graphics
|
|
||||||
67EF, E0, AMD Radeon RX 560 Series
|
|
||||||
67EF, E1, AMD Radeon RX Series
|
|
||||||
67EF, E2, AMD Radeon RX 560X
|
|
||||||
67EF, E3, AMD Radeon RX Series
|
|
||||||
67EF, E5, AMD Radeon RX 560 Series
|
|
||||||
67EF, E7, AMD Radeon RX 560 Series
|
|
||||||
67EF, EF, AMD Radeon 550 Series
|
|
||||||
67EF, FF, AMD Radeon RX 460 Graphics
|
|
||||||
67FF, C0, AMD Radeon Pro 465
|
|
||||||
67FF, C1, AMD Radeon RX 560 Series
|
|
||||||
67FF, CF, AMD Radeon RX 560 Series
|
|
||||||
67FF, EF, AMD Radeon RX 560 Series
|
|
||||||
67FF, FF, AMD Radeon RX 550 Series
|
|
||||||
6800, 00, AMD Radeon HD 7970M
|
|
||||||
6801, 00, AMD Radeon HD 8970M
|
|
||||||
6806, 00, AMD Radeon R9 M290X
|
|
||||||
6808, 00, AMD FirePro W7000
|
|
||||||
6808, 00, ATI FirePro V (FireGL V) Graphics Adapter
|
|
||||||
6809, 00, ATI FirePro W5000
|
|
||||||
6810, 00, AMD Radeon R9 200 Series
|
|
||||||
6810, 81, AMD Radeon R9 370 Series
|
|
||||||
6811, 00, AMD Radeon R9 200 Series
|
|
||||||
6811, 81, AMD Radeon R7 370 Series
|
|
||||||
6818, 00, AMD Radeon HD 7800 Series
|
|
||||||
6819, 00, AMD Radeon HD 7800 Series
|
|
||||||
6820, 00, AMD Radeon R9 M275X
|
|
||||||
6820, 81, AMD Radeon R9 M375
|
|
||||||
6820, 83, AMD Radeon R9 M375X
|
|
||||||
6821, 00, AMD Radeon R9 M200X Series
|
|
||||||
6821, 83, AMD Radeon R9 M370X
|
|
||||||
6821, 87, AMD Radeon R7 M380
|
|
||||||
6822, 00, AMD Radeon E8860
|
|
||||||
6823, 00, AMD Radeon R9 M200X Series
|
|
||||||
6825, 00, AMD Radeon HD 7800M Series
|
|
||||||
6826, 00, AMD Radeon HD 7700M Series
|
|
||||||
6827, 00, AMD Radeon HD 7800M Series
|
|
||||||
6828, 00, AMD FirePro W600
|
|
||||||
682B, 00, AMD Radeon HD 8800M Series
|
|
||||||
682B, 87, AMD Radeon R9 M360
|
|
||||||
682C, 00, AMD FirePro W4100
|
|
||||||
682D, 00, AMD Radeon HD 7700M Series
|
|
||||||
682F, 00, AMD Radeon HD 7700M Series
|
|
||||||
6830, 00, AMD Radeon 7800M Series
|
|
||||||
6831, 00, AMD Radeon 7700M Series
|
|
||||||
6835, 00, AMD Radeon R7 Series / HD 9000 Series
|
|
||||||
6837, 00, AMD Radeon HD 7700 Series
|
|
||||||
683D, 00, AMD Radeon HD 7700 Series
|
|
||||||
683F, 00, AMD Radeon HD 7700 Series
|
|
||||||
684C, 00, ATI FirePro V (FireGL V) Graphics Adapter
|
|
||||||
6860, 00, AMD Radeon Instinct MI25
|
|
||||||
6860, 01, AMD Radeon Instinct MI25
|
|
||||||
6860, 02, AMD Radeon Instinct MI25
|
|
||||||
6860, 03, AMD Radeon Pro V340
|
|
||||||
6860, 04, AMD Radeon Instinct MI25x2
|
|
||||||
6860, 07, AMD Radeon Pro V320
|
|
||||||
6861, 00, AMD Radeon Pro WX 9100
|
|
||||||
6862, 00, AMD Radeon Pro SSG
|
|
||||||
6863, 00, AMD Radeon Vega Frontier Edition
|
|
||||||
6864, 03, AMD Radeon Pro V340
|
|
||||||
6864, 04, AMD Radeon Instinct MI25x2
|
|
||||||
6864, 05, AMD Radeon Pro V340
|
|
||||||
6868, 00, AMD Radeon Pro WX 8200
|
|
||||||
686C, 00, AMD Radeon Instinct MI25 MxGPU
|
|
||||||
686C, 01, AMD Radeon Instinct MI25 MxGPU
|
|
||||||
686C, 02, AMD Radeon Instinct MI25 MxGPU
|
|
||||||
686C, 03, AMD Radeon Pro V340 MxGPU
|
|
||||||
686C, 04, AMD Radeon Instinct MI25x2 MxGPU
|
|
||||||
686C, 05, AMD Radeon Pro V340L MxGPU
|
|
||||||
686C, 06, AMD Radeon Instinct MI25 MxGPU
|
|
||||||
687F, 01, AMD Radeon RX Vega
|
|
||||||
687F, C0, AMD Radeon RX Vega
|
|
||||||
687F, C1, AMD Radeon RX Vega
|
|
||||||
687F, C3, AMD Radeon RX Vega
|
|
||||||
687F, C7, AMD Radeon RX Vega
|
|
||||||
6900, 00, AMD Radeon R7 M260
|
|
||||||
6900, 81, AMD Radeon R7 M360
|
|
||||||
6900, 83, AMD Radeon R7 M340
|
|
||||||
6900, C1, AMD Radeon R5 M465 Series
|
|
||||||
6900, C3, AMD Radeon R5 M445 Series
|
|
||||||
6900, D1, AMD Radeon 530 Series
|
|
||||||
6900, D3, AMD Radeon 530 Series
|
|
||||||
6901, 00, AMD Radeon R5 M255
|
|
||||||
6902, 00, AMD Radeon Series
|
|
||||||
6907, 00, AMD Radeon R5 M255
|
|
||||||
6907, 87, AMD Radeon R5 M315
|
|
||||||
6920, 00, AMD Radeon R9 M395X
|
|
||||||
6920, 01, AMD Radeon R9 M390X
|
|
||||||
6921, 00, AMD Radeon R9 M390X
|
|
||||||
6929, 00, AMD FirePro S7150
|
|
||||||
6929, 01, AMD FirePro S7100X
|
|
||||||
692B, 00, AMD FirePro W7100
|
|
||||||
6938, 00, AMD Radeon R9 200 Series
|
|
||||||
6938, F0, AMD Radeon R9 200 Series
|
|
||||||
6938, F1, AMD Radeon R9 380 Series
|
|
||||||
6939, 00, AMD Radeon R9 200 Series
|
|
||||||
6939, F0, AMD Radeon R9 200 Series
|
|
||||||
6939, F1, AMD Radeon R9 380 Series
|
|
||||||
694C, C0, AMD Radeon RX Vega M GH Graphics
|
|
||||||
694E, C0, AMD Radeon RX Vega M GL Graphics
|
|
||||||
6980, 00, AMD Radeon Pro WX 3100
|
|
||||||
6981, 00, AMD Radeon Pro WX 3200 Series
|
|
||||||
6981, 01, AMD Radeon Pro WX 3200 Series
|
|
||||||
6981, 10, AMD Radeon Pro WX 3200 Series
|
|
||||||
6985, 00, AMD Radeon Pro WX 3100
|
|
||||||
6986, 00, AMD Radeon Pro WX 2100
|
|
||||||
6987, 80, AMD Embedded Radeon E9171
|
|
||||||
6987, C0, AMD Radeon 550X Series
|
|
||||||
6987, C1, AMD Radeon RX 640
|
|
||||||
6987, C3, AMD Radeon 540X Series
|
|
||||||
6987, C7, AMD Radeon 540
|
|
||||||
6995, 00, AMD Radeon Pro WX 2100
|
|
||||||
6997, 00, AMD Radeon Pro WX 2100
|
|
||||||
699F, 81, AMD Embedded Radeon E9170 Series
|
|
||||||
699F, C0, AMD Radeon 500 Series
|
|
||||||
699F, C1, AMD Radeon 540 Series
|
|
||||||
699F, C3, AMD Radeon 500 Series
|
|
||||||
699F, C7, AMD Radeon RX 550 / 550 Series
|
|
||||||
699F, C9, AMD Radeon 540
|
|
||||||
6FDF, E7, AMD Radeon RX 590 GME
|
|
||||||
6FDF, EF, AMD Radeon RX 580 2048SP
|
|
||||||
7300, C1, AMD FirePro S9300 x2
|
|
||||||
7300, C8, AMD Radeon R9 Fury Series
|
|
||||||
7300, C9, AMD Radeon Pro Duo
|
|
||||||
7300, CA, AMD Radeon R9 Fury Series
|
|
||||||
7300, CB, AMD Radeon R9 Fury Series
|
|
||||||
7312, 00, AMD Radeon Pro W5700
|
|
||||||
731E, C6, AMD Radeon RX 5700XTB
|
|
||||||
731E, C7, AMD Radeon RX 5700B
|
|
||||||
731F, C0, AMD Radeon RX 5700 XT 50th Anniversary
|
|
||||||
731F, C1, AMD Radeon RX 5700 XT
|
|
||||||
731F, C2, AMD Radeon RX 5600M
|
|
||||||
731F, C3, AMD Radeon RX 5700M
|
|
||||||
731F, C4, AMD Radeon RX 5700
|
|
||||||
731F, C5, AMD Radeon RX 5700 XT
|
|
||||||
731F, CA, AMD Radeon RX 5600 XT
|
|
||||||
731F, CB, AMD Radeon RX 5600 OEM
|
|
||||||
7340, C1, AMD Radeon RX 5500M
|
|
||||||
7340, C3, AMD Radeon RX 5300M
|
|
||||||
7340, C5, AMD Radeon RX 5500 XT
|
|
||||||
7340, C7, AMD Radeon RX 5500
|
|
||||||
7340, C9, AMD Radeon RX 5500XTB
|
|
||||||
7340, CF, AMD Radeon RX 5300
|
|
||||||
7341, 00, AMD Radeon Pro W5500
|
|
||||||
7347, 00, AMD Radeon Pro W5500M
|
|
||||||
7360, 41, AMD Radeon Pro 5600M
|
|
||||||
7360, C3, AMD Radeon Pro V520
|
|
||||||
7362, C1, AMD Radeon Pro V540
|
|
||||||
7362, C3, AMD Radeon Pro V520
|
|
||||||
738C, 01, AMD Instinct MI100
|
|
||||||
73A1, 00, AMD Radeon Pro V620
|
|
||||||
73A3, 00, AMD Radeon Pro W6800
|
|
||||||
73A5, C0, AMD Radeon RX 6950 XT
|
|
||||||
73AE, 00, AMD Radeon Pro V620 MxGPU
|
|
||||||
73AF, C0, AMD Radeon RX 6900 XT
|
|
||||||
73BF, C0, AMD Radeon RX 6900 XT
|
|
||||||
73BF, C1, AMD Radeon RX 6800 XT
|
|
||||||
73BF, C3, AMD Radeon RX 6800
|
|
||||||
73DF, C0, AMD Radeon RX 6750 XT
|
|
||||||
73DF, C1, AMD Radeon RX 6700 XT
|
|
||||||
73DF, C2, AMD Radeon RX 6800M
|
|
||||||
73DF, C3, AMD Radeon RX 6800M
|
|
||||||
73DF, C5, AMD Radeon RX 6700 XT
|
|
||||||
73DF, CF, AMD Radeon RX 6700M
|
|
||||||
73DF, D5, AMD Radeon RX 6750 GRE 12GB
|
|
||||||
73DF, D7, AMD TDC-235
|
|
||||||
73DF, DF, AMD Radeon RX 6700
|
|
||||||
73DF, E5, AMD Radeon RX 6750 GRE 12GB
|
|
||||||
73DF, FF, AMD Radeon RX 6700
|
|
||||||
73E0, 00, AMD Radeon RX 6600M
|
|
||||||
73E1, 00, AMD Radeon Pro W6600M
|
|
||||||
73E3, 00, AMD Radeon Pro W6600
|
|
||||||
73EF, C0, AMD Radeon RX 6800S
|
|
||||||
73EF, C1, AMD Radeon RX 6650 XT
|
|
||||||
73EF, C2, AMD Radeon RX 6700S
|
|
||||||
73EF, C3, AMD Radeon RX 6650M
|
|
||||||
73EF, C4, AMD Radeon RX 6650M XT
|
|
||||||
73FF, C1, AMD Radeon RX 6600 XT
|
|
||||||
73FF, C3, AMD Radeon RX 6600M
|
|
||||||
73FF, C7, AMD Radeon RX 6600
|
|
||||||
73FF, CB, AMD Radeon RX 6600S
|
|
||||||
73FF, CF, AMD Radeon RX 6600 LE
|
|
||||||
73FF, DF, AMD Radeon RX 6750 GRE 10GB
|
|
||||||
7408, 00, AMD Instinct MI250X
|
|
||||||
740C, 01, AMD Instinct MI250X / MI250
|
|
||||||
740F, 02, AMD Instinct MI210
|
|
||||||
7421, 00, AMD Radeon Pro W6500M
|
|
||||||
7422, 00, AMD Radeon Pro W6400
|
|
||||||
7423, 00, AMD Radeon Pro W6300M
|
|
||||||
7423, 01, AMD Radeon Pro W6300
|
|
||||||
7424, 00, AMD Radeon RX 6300
|
|
||||||
743F, C1, AMD Radeon RX 6500 XT
|
|
||||||
743F, C3, AMD Radeon RX 6500
|
|
||||||
743F, C3, AMD Radeon RX 6500M
|
|
||||||
743F, C7, AMD Radeon RX 6400
|
|
||||||
743F, C8, AMD Radeon RX 6500M
|
|
||||||
743F, CC, AMD Radeon 6550S
|
|
||||||
743F, CE, AMD Radeon RX 6450M
|
|
||||||
743F, CF, AMD Radeon RX 6300M
|
|
||||||
743F, D3, AMD Radeon RX 6550M
|
|
||||||
743F, D7, AMD Radeon RX 6400
|
|
||||||
7448, 00, AMD Radeon Pro W7900
|
|
||||||
7449, 00, AMD Radeon Pro W7800 48GB
|
|
||||||
744A, 00, AMD Radeon Pro W7900 Dual Slot
|
|
||||||
744B, 00, AMD Radeon Pro W7900D
|
|
||||||
744C, C8, AMD Radeon RX 7900 XTX
|
|
||||||
744C, CC, AMD Radeon RX 7900 XT
|
|
||||||
744C, CE, AMD Radeon RX 7900 GRE
|
|
||||||
744C, CF, AMD Radeon RX 7900M
|
|
||||||
745E, CC, AMD Radeon Pro W7800
|
|
||||||
7460, 00, AMD Radeon Pro V710
|
|
||||||
7461, 00, AMD Radeon Pro V710 MxGPU
|
|
||||||
7470, 00, AMD Radeon Pro W7700
|
|
||||||
747E, C8, AMD Radeon RX 7800 XT
|
|
||||||
747E, D8, AMD Radeon RX 7800M
|
|
||||||
747E, DB, AMD Radeon RX 7700
|
|
||||||
747E, FF, AMD Radeon RX 7700 XT
|
|
||||||
7480, 00, AMD Radeon Pro W7600
|
|
||||||
7480, C0, AMD Radeon RX 7600 XT
|
|
||||||
7480, C1, AMD Radeon RX 7700S
|
|
||||||
7480, C2, AMD Radeon RX 7650 GRE
|
|
||||||
7480, C3, AMD Radeon RX 7600S
|
|
||||||
7480, C7, AMD Radeon RX 7600M XT
|
|
||||||
7480, CF, AMD Radeon RX 7600
|
|
||||||
7481, C7, AMD Steam Machine
|
|
||||||
7483, CF, AMD Radeon RX 7600M
|
|
||||||
7489, 00, AMD Radeon Pro W7500
|
|
||||||
7499, 00, AMD Radeon Pro W7400
|
|
||||||
7499, C0, AMD Radeon RX 7400
|
|
||||||
7499, C1, AMD Radeon RX 7300
|
|
||||||
74A0, 00, AMD Instinct MI300A
|
|
||||||
74A1, 00, AMD Instinct MI300X
|
|
||||||
74A2, 00, AMD Instinct MI308X
|
|
||||||
74A5, 00, AMD Instinct MI325X
|
|
||||||
74A8, 00, AMD Instinct MI308X HF
|
|
||||||
74A9, 00, AMD Instinct MI300X HF
|
|
||||||
74B5, 00, AMD Instinct MI300X VF
|
|
||||||
74B6, 00, AMD Instinct MI308X
|
|
||||||
74BD, 00, AMD Instinct MI300X HF
|
|
||||||
7550, C0, AMD Radeon RX 9070 XT
|
|
||||||
7550, C2, AMD Radeon RX 9070 GRE
|
|
||||||
7550, C3, AMD Radeon RX 9070
|
|
||||||
7551, C0, AMD Radeon AI PRO R9700
|
|
||||||
7590, C0, AMD Radeon RX 9060 XT
|
|
||||||
7590, C7, AMD Radeon RX 9060
|
|
||||||
75A0, C0, AMD Instinct MI350X
|
|
||||||
75A3, C0, AMD Instinct MI355X
|
|
||||||
75B0, C0, AMD Instinct MI350X VF
|
|
||||||
75B3, C0, AMD Instinct MI355X VF
|
|
||||||
9830, 00, AMD Radeon HD 8400 / R3 Series
|
|
||||||
9831, 00, AMD Radeon HD 8400E
|
|
||||||
9832, 00, AMD Radeon HD 8330
|
|
||||||
9833, 00, AMD Radeon HD 8330E
|
|
||||||
9834, 00, AMD Radeon HD 8210
|
|
||||||
9835, 00, AMD Radeon HD 8210E
|
|
||||||
9836, 00, AMD Radeon HD 8200 / R3 Series
|
|
||||||
9837, 00, AMD Radeon HD 8280E
|
|
||||||
9838, 00, AMD Radeon HD 8200 / R3 series
|
|
||||||
9839, 00, AMD Radeon HD 8180
|
|
||||||
983D, 00, AMD Radeon HD 8250
|
|
||||||
9850, 00, AMD Radeon R3 Graphics
|
|
||||||
9850, 03, AMD Radeon R3 Graphics
|
|
||||||
9850, 40, AMD Radeon R2 Graphics
|
|
||||||
9850, 45, AMD Radeon R3 Graphics
|
|
||||||
9851, 00, AMD Radeon R4 Graphics
|
|
||||||
9851, 01, AMD Radeon R5E Graphics
|
|
||||||
9851, 05, AMD Radeon R5 Graphics
|
|
||||||
9851, 06, AMD Radeon R5E Graphics
|
|
||||||
9851, 40, AMD Radeon R4 Graphics
|
|
||||||
9851, 45, AMD Radeon R5 Graphics
|
|
||||||
9852, 00, AMD Radeon R2 Graphics
|
|
||||||
9852, 40, AMD Radeon E1 Graphics
|
|
||||||
9853, 00, AMD Radeon R2 Graphics
|
|
||||||
9853, 01, AMD Radeon R4E Graphics
|
|
||||||
9853, 03, AMD Radeon R2 Graphics
|
|
||||||
9853, 05, AMD Radeon R1E Graphics
|
|
||||||
9853, 06, AMD Radeon R1E Graphics
|
|
||||||
9853, 07, AMD Radeon R1E Graphics
|
|
||||||
9853, 08, AMD Radeon R1E Graphics
|
|
||||||
9853, 40, AMD Radeon R2 Graphics
|
|
||||||
9854, 00, AMD Radeon R3 Graphics
|
|
||||||
9854, 01, AMD Radeon R3E Graphics
|
|
||||||
9854, 02, AMD Radeon R3 Graphics
|
|
||||||
9854, 05, AMD Radeon R2 Graphics
|
|
||||||
9854, 06, AMD Radeon R4 Graphics
|
|
||||||
9854, 07, AMD Radeon R3 Graphics
|
|
||||||
9855, 02, AMD Radeon R6 Graphics
|
|
||||||
9855, 05, AMD Radeon R4 Graphics
|
|
||||||
9856, 00, AMD Radeon R2 Graphics
|
|
||||||
9856, 01, AMD Radeon R2E Graphics
|
|
||||||
9856, 02, AMD Radeon R2 Graphics
|
|
||||||
9856, 05, AMD Radeon R1E Graphics
|
|
||||||
9856, 06, AMD Radeon R2 Graphics
|
|
||||||
9856, 07, AMD Radeon R1E Graphics
|
|
||||||
9856, 08, AMD Radeon R1E Graphics
|
|
||||||
9856, 13, AMD Radeon R1E Graphics
|
|
||||||
9874, 81, AMD Radeon R6 Graphics
|
|
||||||
9874, 84, AMD Radeon R7 Graphics
|
|
||||||
9874, 85, AMD Radeon R6 Graphics
|
|
||||||
9874, 87, AMD Radeon R5 Graphics
|
|
||||||
9874, 88, AMD Radeon R7E Graphics
|
|
||||||
9874, 89, AMD Radeon R6E Graphics
|
|
||||||
9874, C4, AMD Radeon R7 Graphics
|
|
||||||
9874, C5, AMD Radeon R6 Graphics
|
|
||||||
9874, C6, AMD Radeon R6 Graphics
|
|
||||||
9874, C7, AMD Radeon R5 Graphics
|
|
||||||
9874, C8, AMD Radeon R7 Graphics
|
|
||||||
9874, C9, AMD Radeon R7 Graphics
|
|
||||||
9874, CA, AMD Radeon R5 Graphics
|
|
||||||
9874, CB, AMD Radeon R5 Graphics
|
|
||||||
9874, CC, AMD Radeon R7 Graphics
|
|
||||||
9874, CD, AMD Radeon R7 Graphics
|
|
||||||
9874, CE, AMD Radeon R5 Graphics
|
|
||||||
9874, E1, AMD Radeon R7 Graphics
|
|
||||||
9874, E2, AMD Radeon R7 Graphics
|
|
||||||
9874, E3, AMD Radeon R7 Graphics
|
|
||||||
9874, E4, AMD Radeon R7 Graphics
|
|
||||||
9874, E5, AMD Radeon R5 Graphics
|
|
||||||
9874, E6, AMD Radeon R5 Graphics
|
|
||||||
98E4, 80, AMD Radeon R5E Graphics
|
|
||||||
98E4, 81, AMD Radeon R4E Graphics
|
|
||||||
98E4, 83, AMD Radeon R2E Graphics
|
|
||||||
98E4, 84, AMD Radeon R2E Graphics
|
|
||||||
98E4, 86, AMD Radeon R1E Graphics
|
|
||||||
98E4, C0, AMD Radeon R4 Graphics
|
|
||||||
98E4, C1, AMD Radeon R5 Graphics
|
|
||||||
98E4, C2, AMD Radeon R4 Graphics
|
|
||||||
98E4, C4, AMD Radeon R5 Graphics
|
|
||||||
98E4, C6, AMD Radeon R5 Graphics
|
|
||||||
98E4, C8, AMD Radeon R4 Graphics
|
|
||||||
98E4, C9, AMD Radeon R4 Graphics
|
|
||||||
98E4, CA, AMD Radeon R5 Graphics
|
|
||||||
98E4, D0, AMD Radeon R2 Graphics
|
|
||||||
98E4, D1, AMD Radeon R2 Graphics
|
|
||||||
98E4, D2, AMD Radeon R2 Graphics
|
|
||||||
98E4, D4, AMD Radeon R2 Graphics
|
|
||||||
98E4, D9, AMD Radeon R5 Graphics
|
|
||||||
98E4, DA, AMD Radeon R5 Graphics
|
|
||||||
98E4, DB, AMD Radeon R3 Graphics
|
|
||||||
98E4, E1, AMD Radeon R3 Graphics
|
|
||||||
98E4, E2, AMD Radeon R3 Graphics
|
|
||||||
98E4, E9, AMD Radeon R4 Graphics
|
|
||||||
98E4, EA, AMD Radeon R4 Graphics
|
|
||||||
98E4, EB, AMD Radeon R3 Graphics
|
|
||||||
98E4, EB, AMD Radeon R4 Graphics
|
|
||||||
@@ -1,34 +0,0 @@
|
|||||||
[
|
|
||||||
{
|
|
||||||
"device_name": "NVIDIA GeForce RTX 3050 Ti Laptop GPU",
|
|
||||||
"gpu_clock": "1485MHz",
|
|
||||||
"mem_clock": "6001MHz",
|
|
||||||
"temp": "48C",
|
|
||||||
"fan_speed": null,
|
|
||||||
"power_draw": "13W",
|
|
||||||
"gpu_util": "5%",
|
|
||||||
"encode": "0%",
|
|
||||||
"decode": "0%",
|
|
||||||
"mem_util": "8%",
|
|
||||||
"mem_total": "4294967296",
|
|
||||||
"mem_used": "349372416",
|
|
||||||
"mem_free": "3945594880",
|
|
||||||
"processes" : []
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"device_name": "AMD Radeon 680M",
|
|
||||||
"gpu_clock": "2200MHz",
|
|
||||||
"mem_clock": "2400MHz",
|
|
||||||
"temp": "48C",
|
|
||||||
"fan_speed": "CPU Fan",
|
|
||||||
"power_draw": "9W",
|
|
||||||
"gpu_util": "12%",
|
|
||||||
"encode": null,
|
|
||||||
"decode": "0%",
|
|
||||||
"mem_util": "7%",
|
|
||||||
"mem_total": "16929173504",
|
|
||||||
"mem_used": "1213784064",
|
|
||||||
"mem_free": "15715389440",
|
|
||||||
"processes" : []
|
|
||||||
}
|
|
||||||
]
|
|
||||||
@@ -2,18 +2,18 @@ package alerts
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"strings"
|
|
||||||
|
|
||||||
"github.com/pocketbase/pocketbase/core"
|
"github.com/pocketbase/pocketbase/core"
|
||||||
)
|
)
|
||||||
|
|
||||||
// handleSmartDeviceAlert sends alerts when a SMART device state worsens into WARNING/FAILED.
|
// handleSmartDeviceAlert sends alerts when a SMART device state changes from PASSED to FAILED.
|
||||||
// This is automatic and does not require user opt-in.
|
// This is automatic and does not require user opt-in.
|
||||||
func (am *AlertManager) handleSmartDeviceAlert(e *core.RecordEvent) error {
|
func (am *AlertManager) handleSmartDeviceAlert(e *core.RecordEvent) error {
|
||||||
oldState := e.Record.Original().GetString("state")
|
oldState := e.Record.Original().GetString("state")
|
||||||
newState := e.Record.GetString("state")
|
newState := e.Record.GetString("state")
|
||||||
|
|
||||||
if !shouldSendSmartDeviceAlert(oldState, newState) {
|
// Only alert when transitioning from PASSED to FAILED
|
||||||
|
if oldState != "PASSED" || newState != "FAILED" {
|
||||||
return e.Next()
|
return e.Next()
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -32,15 +32,14 @@ func (am *AlertManager) handleSmartDeviceAlert(e *core.RecordEvent) error {
|
|||||||
systemName := systemRecord.GetString("name")
|
systemName := systemRecord.GetString("name")
|
||||||
deviceName := e.Record.GetString("name")
|
deviceName := e.Record.GetString("name")
|
||||||
model := e.Record.GetString("model")
|
model := e.Record.GetString("model")
|
||||||
statusLabel := smartStateLabel(newState)
|
|
||||||
|
|
||||||
// Build alert message
|
// Build alert message
|
||||||
title := fmt.Sprintf("SMART %s on %s: %s %s", statusLabel, systemName, deviceName, smartStateEmoji(newState))
|
title := fmt.Sprintf("SMART failure on %s: %s \U0001F534", systemName, deviceName)
|
||||||
var message string
|
var message string
|
||||||
if model != "" {
|
if model != "" {
|
||||||
message = fmt.Sprintf("Disk %s (%s) SMART status changed to %s", deviceName, model, newState)
|
message = fmt.Sprintf("Disk %s (%s) SMART status changed to FAILED", deviceName, model)
|
||||||
} else {
|
} else {
|
||||||
message = fmt.Sprintf("Disk %s SMART status changed to %s", deviceName, newState)
|
message = fmt.Sprintf("Disk %s SMART status changed to FAILED", deviceName)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Get users associated with the system
|
// Get users associated with the system
|
||||||
@@ -66,42 +65,3 @@ func (am *AlertManager) handleSmartDeviceAlert(e *core.RecordEvent) error {
|
|||||||
return e.Next()
|
return e.Next()
|
||||||
}
|
}
|
||||||
|
|
||||||
func shouldSendSmartDeviceAlert(oldState, newState string) bool {
|
|
||||||
oldSeverity := smartStateSeverity(oldState)
|
|
||||||
newSeverity := smartStateSeverity(newState)
|
|
||||||
|
|
||||||
// Ignore unknown states and recoveries; only alert on worsening transitions
|
|
||||||
// from known-good/degraded states into WARNING/FAILED.
|
|
||||||
return oldSeverity >= 1 && newSeverity > oldSeverity
|
|
||||||
}
|
|
||||||
|
|
||||||
func smartStateSeverity(state string) int {
|
|
||||||
switch state {
|
|
||||||
case "PASSED":
|
|
||||||
return 1
|
|
||||||
case "WARNING":
|
|
||||||
return 2
|
|
||||||
case "FAILED":
|
|
||||||
return 3
|
|
||||||
default:
|
|
||||||
return 0
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func smartStateEmoji(state string) string {
|
|
||||||
switch state {
|
|
||||||
case "WARNING":
|
|
||||||
return "\U0001F7E0"
|
|
||||||
default:
|
|
||||||
return "\U0001F534"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func smartStateLabel(state string) string {
|
|
||||||
switch state {
|
|
||||||
case "FAILED":
|
|
||||||
return "failure"
|
|
||||||
default:
|
|
||||||
return strings.ToLower(state)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|||||||
@@ -58,74 +58,6 @@ func TestSmartDeviceAlert(t *testing.T) {
|
|||||||
assert.Contains(t, lastMessage.Text, "FAILED")
|
assert.Contains(t, lastMessage.Text, "FAILED")
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestSmartDeviceAlertPassedToWarning(t *testing.T) {
|
|
||||||
hub, user := beszelTests.GetHubWithUser(t)
|
|
||||||
defer hub.Cleanup()
|
|
||||||
|
|
||||||
system, err := beszelTests.CreateRecord(hub, "systems", map[string]any{
|
|
||||||
"name": "test-system",
|
|
||||||
"users": []string{user.Id},
|
|
||||||
"host": "127.0.0.1",
|
|
||||||
})
|
|
||||||
assert.NoError(t, err)
|
|
||||||
|
|
||||||
smartDevice, err := beszelTests.CreateRecord(hub, "smart_devices", map[string]any{
|
|
||||||
"system": system.Id,
|
|
||||||
"name": "/dev/mmcblk0",
|
|
||||||
"model": "eMMC",
|
|
||||||
"state": "PASSED",
|
|
||||||
})
|
|
||||||
assert.NoError(t, err)
|
|
||||||
|
|
||||||
smartDevice, err = hub.FindRecordById("smart_devices", smartDevice.Id)
|
|
||||||
assert.NoError(t, err)
|
|
||||||
|
|
||||||
smartDevice.Set("state", "WARNING")
|
|
||||||
err = hub.Save(smartDevice)
|
|
||||||
assert.NoError(t, err)
|
|
||||||
|
|
||||||
time.Sleep(50 * time.Millisecond)
|
|
||||||
|
|
||||||
assert.EqualValues(t, 1, hub.TestMailer.TotalSend(), "should have 1 email sent after state changed to WARNING")
|
|
||||||
lastMessage := hub.TestMailer.LastMessage()
|
|
||||||
assert.Contains(t, lastMessage.Subject, "SMART warning on test-system")
|
|
||||||
assert.Contains(t, lastMessage.Text, "WARNING")
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestSmartDeviceAlertWarningToFailed(t *testing.T) {
|
|
||||||
hub, user := beszelTests.GetHubWithUser(t)
|
|
||||||
defer hub.Cleanup()
|
|
||||||
|
|
||||||
system, err := beszelTests.CreateRecord(hub, "systems", map[string]any{
|
|
||||||
"name": "test-system",
|
|
||||||
"users": []string{user.Id},
|
|
||||||
"host": "127.0.0.1",
|
|
||||||
})
|
|
||||||
assert.NoError(t, err)
|
|
||||||
|
|
||||||
smartDevice, err := beszelTests.CreateRecord(hub, "smart_devices", map[string]any{
|
|
||||||
"system": system.Id,
|
|
||||||
"name": "/dev/mmcblk0",
|
|
||||||
"model": "eMMC",
|
|
||||||
"state": "WARNING",
|
|
||||||
})
|
|
||||||
assert.NoError(t, err)
|
|
||||||
|
|
||||||
smartDevice, err = hub.FindRecordById("smart_devices", smartDevice.Id)
|
|
||||||
assert.NoError(t, err)
|
|
||||||
|
|
||||||
smartDevice.Set("state", "FAILED")
|
|
||||||
err = hub.Save(smartDevice)
|
|
||||||
assert.NoError(t, err)
|
|
||||||
|
|
||||||
time.Sleep(50 * time.Millisecond)
|
|
||||||
|
|
||||||
assert.EqualValues(t, 1, hub.TestMailer.TotalSend(), "should have 1 email sent after state changed from WARNING to FAILED")
|
|
||||||
lastMessage := hub.TestMailer.LastMessage()
|
|
||||||
assert.Contains(t, lastMessage.Subject, "SMART failure on test-system")
|
|
||||||
assert.Contains(t, lastMessage.Text, "FAILED")
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestSmartDeviceAlertNoAlertOnNonPassedToFailed(t *testing.T) {
|
func TestSmartDeviceAlertNoAlertOnNonPassedToFailed(t *testing.T) {
|
||||||
hub, user := beszelTests.GetHubWithUser(t)
|
hub, user := beszelTests.GetHubWithUser(t)
|
||||||
defer hub.Cleanup()
|
defer hub.Cleanup()
|
||||||
@@ -151,8 +83,7 @@ func TestSmartDeviceAlertNoAlertOnNonPassedToFailed(t *testing.T) {
|
|||||||
smartDevice, err = hub.FindRecordById("smart_devices", smartDevice.Id)
|
smartDevice, err = hub.FindRecordById("smart_devices", smartDevice.Id)
|
||||||
assert.NoError(t, err)
|
assert.NoError(t, err)
|
||||||
|
|
||||||
// Update the state from UNKNOWN to FAILED - should NOT trigger alert.
|
// Update the state from UNKNOWN to FAILED - should NOT trigger alert
|
||||||
// We only alert from known healthy/degraded states.
|
|
||||||
smartDevice.Set("state", "FAILED")
|
smartDevice.Set("state", "FAILED")
|
||||||
err = hub.Save(smartDevice)
|
err = hub.Save(smartDevice)
|
||||||
assert.NoError(t, err)
|
assert.NoError(t, err)
|
||||||
|
|||||||
@@ -23,9 +23,6 @@ COPY --from=builder /agent /agent
|
|||||||
# this is so we don't need to create the /tmp directory in the scratch container
|
# this is so we don't need to create the /tmp directory in the scratch container
|
||||||
COPY --from=builder /tmp /tmp
|
COPY --from=builder /tmp /tmp
|
||||||
|
|
||||||
# AMD GPU name lookup (used by agent on Linux when /usr/share/libdrm/amdgpu.ids is read)
|
|
||||||
COPY --from=builder /app/agent/test-data/amdgpu.ids /usr/share/libdrm/amdgpu.ids
|
|
||||||
|
|
||||||
# Ensure data persistence across container recreations
|
# Ensure data persistence across container recreations
|
||||||
VOLUME ["/var/lib/beszel-agent"]
|
VOLUME ["/var/lib/beszel-agent"]
|
||||||
|
|
||||||
|
|||||||
@@ -20,9 +20,6 @@ RUN rm -rf /tmp/*
|
|||||||
FROM alpine:3.23
|
FROM alpine:3.23
|
||||||
COPY --from=builder /agent /agent
|
COPY --from=builder /agent /agent
|
||||||
|
|
||||||
# AMD GPU name lookup (used by agent on Linux when /usr/share/libdrm/amdgpu.ids is read)
|
|
||||||
COPY --from=builder /app/agent/test-data/amdgpu.ids /usr/share/libdrm/amdgpu.ids
|
|
||||||
|
|
||||||
RUN apk add --no-cache smartmontools
|
RUN apk add --no-cache smartmontools
|
||||||
|
|
||||||
# Ensure data persistence across container recreations
|
# Ensure data persistence across container recreations
|
||||||
|
|||||||
@@ -37,9 +37,6 @@ RUN apt-get update && apt-get install -y \
|
|||||||
FROM nvidia/cuda:12.2.2-base-ubuntu22.04
|
FROM nvidia/cuda:12.2.2-base-ubuntu22.04
|
||||||
COPY --from=builder /agent /agent
|
COPY --from=builder /agent /agent
|
||||||
|
|
||||||
# AMD GPU name lookup (used by agent on hybrid laptops when /usr/share/libdrm/amdgpu.ids is read)
|
|
||||||
COPY --from=builder /app/agent/test-data/amdgpu.ids /usr/share/libdrm/amdgpu.ids
|
|
||||||
|
|
||||||
# Copy smartmontools binaries and config files
|
# Copy smartmontools binaries and config files
|
||||||
COPY --from=smartmontools-builder /usr/sbin/smartctl /usr/sbin/smartctl
|
COPY --from=smartmontools-builder /usr/sbin/smartctl /usr/sbin/smartctl
|
||||||
|
|
||||||
|
|||||||
@@ -1,303 +0,0 @@
|
|||||||
// Package heartbeat sends periodic outbound pings to an external monitoring
|
|
||||||
// endpoint (e.g. BetterStack, Uptime Kuma, Healthchecks.io) so operators can
|
|
||||||
// monitor Beszel without exposing it to the internet.
|
|
||||||
package heartbeat
|
|
||||||
|
|
||||||
import (
|
|
||||||
"bytes"
|
|
||||||
"encoding/json"
|
|
||||||
"fmt"
|
|
||||||
"net/http"
|
|
||||||
"net/url"
|
|
||||||
"strconv"
|
|
||||||
"strings"
|
|
||||||
"time"
|
|
||||||
|
|
||||||
"github.com/henrygd/beszel"
|
|
||||||
"github.com/pocketbase/pocketbase/core"
|
|
||||||
)
|
|
||||||
|
|
||||||
// Default values for heartbeat configuration.
|
|
||||||
const (
|
|
||||||
defaultInterval = 60 // seconds
|
|
||||||
httpTimeout = 10 * time.Second
|
|
||||||
)
|
|
||||||
|
|
||||||
// Payload is the JSON body sent with each heartbeat request.
|
|
||||||
type Payload struct {
|
|
||||||
// Status is "ok" when all non-paused systems are up, "warn" when alerts
|
|
||||||
// are triggered but no systems are down, and "error" when any system is down.
|
|
||||||
Status string `json:"status"`
|
|
||||||
Timestamp string `json:"timestamp"`
|
|
||||||
Msg string `json:"msg"`
|
|
||||||
Systems SystemsSummary `json:"systems"`
|
|
||||||
Down []SystemInfo `json:"down_systems,omitempty"`
|
|
||||||
Alerts []AlertInfo `json:"triggered_alerts,omitempty"`
|
|
||||||
Version string `json:"beszel_version"`
|
|
||||||
}
|
|
||||||
|
|
||||||
// SystemsSummary contains counts of systems by status.
|
|
||||||
type SystemsSummary struct {
|
|
||||||
Total int `json:"total"`
|
|
||||||
Up int `json:"up"`
|
|
||||||
Down int `json:"down"`
|
|
||||||
Paused int `json:"paused"`
|
|
||||||
Pending int `json:"pending"`
|
|
||||||
}
|
|
||||||
|
|
||||||
// SystemInfo identifies a system that is currently down.
|
|
||||||
type SystemInfo struct {
|
|
||||||
ID string `json:"id" db:"id"`
|
|
||||||
Name string `json:"name" db:"name"`
|
|
||||||
Host string `json:"host" db:"host"`
|
|
||||||
}
|
|
||||||
|
|
||||||
// AlertInfo describes a currently triggered alert.
|
|
||||||
type AlertInfo struct {
|
|
||||||
SystemID string `json:"system_id"`
|
|
||||||
SystemName string `json:"system_name"`
|
|
||||||
AlertName string `json:"alert_name"`
|
|
||||||
Threshold float64 `json:"threshold"`
|
|
||||||
}
|
|
||||||
|
|
||||||
// Config holds heartbeat settings read from environment variables.
|
|
||||||
type Config struct {
|
|
||||||
URL string // endpoint to ping
|
|
||||||
Interval int // seconds between pings
|
|
||||||
Method string // HTTP method (GET or POST, default POST)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Heartbeat manages the periodic outbound health check.
|
|
||||||
type Heartbeat struct {
|
|
||||||
app core.App
|
|
||||||
config Config
|
|
||||||
client *http.Client
|
|
||||||
}
|
|
||||||
|
|
||||||
// New creates a Heartbeat if configuration is present.
|
|
||||||
// Returns nil if HEARTBEAT_URL is not set (feature disabled).
|
|
||||||
func New(app core.App, getEnv func(string) (string, bool)) *Heartbeat {
|
|
||||||
url, _ := getEnv("HEARTBEAT_URL")
|
|
||||||
url = strings.TrimSpace(url)
|
|
||||||
if app == nil || url == "" {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
interval := defaultInterval
|
|
||||||
if v, ok := getEnv("HEARTBEAT_INTERVAL"); ok {
|
|
||||||
if parsed, err := strconv.Atoi(v); err == nil && parsed > 0 {
|
|
||||||
interval = parsed
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
method := http.MethodPost
|
|
||||||
if v, ok := getEnv("HEARTBEAT_METHOD"); ok {
|
|
||||||
v = strings.ToUpper(strings.TrimSpace(v))
|
|
||||||
if v == http.MethodGet || v == http.MethodHead {
|
|
||||||
method = v
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return &Heartbeat{
|
|
||||||
app: app,
|
|
||||||
config: Config{
|
|
||||||
URL: url,
|
|
||||||
Interval: interval,
|
|
||||||
Method: method,
|
|
||||||
},
|
|
||||||
client: &http.Client{Timeout: httpTimeout},
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Start begins the heartbeat loop. It blocks and should be called in a goroutine.
|
|
||||||
// The loop runs until the provided stop channel is closed.
|
|
||||||
func (hb *Heartbeat) Start(stop <-chan struct{}) {
|
|
||||||
sanitizedURL := sanitizeHeartbeatURL(hb.config.URL)
|
|
||||||
hb.app.Logger().Info("Heartbeat enabled",
|
|
||||||
"url", sanitizedURL,
|
|
||||||
"interval", fmt.Sprintf("%ds", hb.config.Interval),
|
|
||||||
"method", hb.config.Method,
|
|
||||||
)
|
|
||||||
|
|
||||||
// Send an initial heartbeat immediately on startup.
|
|
||||||
hb.send()
|
|
||||||
|
|
||||||
ticker := time.NewTicker(time.Duration(hb.config.Interval) * time.Second)
|
|
||||||
defer ticker.Stop()
|
|
||||||
|
|
||||||
for {
|
|
||||||
select {
|
|
||||||
case <-stop:
|
|
||||||
return
|
|
||||||
case <-ticker.C:
|
|
||||||
hb.send()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Send performs a single heartbeat ping. Exposed for the test-heartbeat API endpoint.
|
|
||||||
func (hb *Heartbeat) Send() error {
|
|
||||||
return hb.send()
|
|
||||||
}
|
|
||||||
|
|
||||||
// GetConfig returns the current heartbeat configuration.
|
|
||||||
func (hb *Heartbeat) GetConfig() Config {
|
|
||||||
return hb.config
|
|
||||||
}
|
|
||||||
|
|
||||||
func (hb *Heartbeat) send() error {
|
|
||||||
var req *http.Request
|
|
||||||
var err error
|
|
||||||
method := normalizeMethod(hb.config.Method)
|
|
||||||
|
|
||||||
if method == http.MethodGet || method == http.MethodHead {
|
|
||||||
req, err = http.NewRequest(method, hb.config.URL, nil)
|
|
||||||
} else {
|
|
||||||
payload, payloadErr := hb.buildPayload()
|
|
||||||
if payloadErr != nil {
|
|
||||||
hb.app.Logger().Error("Heartbeat: failed to build payload", "err", payloadErr)
|
|
||||||
return payloadErr
|
|
||||||
}
|
|
||||||
|
|
||||||
body, jsonErr := json.Marshal(payload)
|
|
||||||
if jsonErr != nil {
|
|
||||||
hb.app.Logger().Error("Heartbeat: failed to marshal payload", "err", jsonErr)
|
|
||||||
return jsonErr
|
|
||||||
}
|
|
||||||
req, err = http.NewRequest(http.MethodPost, hb.config.URL, bytes.NewReader(body))
|
|
||||||
if err == nil {
|
|
||||||
req.Header.Set("Content-Type", "application/json")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if err != nil {
|
|
||||||
hb.app.Logger().Error("Heartbeat: failed to create request", "err", err)
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
req.Header.Set("User-Agent", "Beszel-Heartbeat")
|
|
||||||
|
|
||||||
resp, err := hb.client.Do(req)
|
|
||||||
if err != nil {
|
|
||||||
hb.app.Logger().Error("Heartbeat: request failed", "url", sanitizeHeartbeatURL(hb.config.URL), "err", err)
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
defer resp.Body.Close()
|
|
||||||
|
|
||||||
if resp.StatusCode >= 400 {
|
|
||||||
hb.app.Logger().Warn("Heartbeat: non-success response",
|
|
||||||
"url", sanitizeHeartbeatURL(hb.config.URL),
|
|
||||||
"status", resp.StatusCode,
|
|
||||||
)
|
|
||||||
return fmt.Errorf("heartbeat endpoint returned status %d", resp.StatusCode)
|
|
||||||
}
|
|
||||||
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (hb *Heartbeat) buildPayload() (*Payload, error) {
|
|
||||||
db := hb.app.DB()
|
|
||||||
|
|
||||||
// Count systems by status.
|
|
||||||
var systemCounts []struct {
|
|
||||||
Status string `db:"status"`
|
|
||||||
Count int `db:"cnt"`
|
|
||||||
}
|
|
||||||
err := db.NewQuery("SELECT status, COUNT(*) as cnt FROM systems GROUP BY status").All(&systemCounts)
|
|
||||||
if err != nil {
|
|
||||||
return nil, fmt.Errorf("query system counts: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
summary := SystemsSummary{}
|
|
||||||
for _, sc := range systemCounts {
|
|
||||||
switch sc.Status {
|
|
||||||
case "up":
|
|
||||||
summary.Up = sc.Count
|
|
||||||
case "down":
|
|
||||||
summary.Down = sc.Count
|
|
||||||
case "paused":
|
|
||||||
summary.Paused = sc.Count
|
|
||||||
case "pending":
|
|
||||||
summary.Pending = sc.Count
|
|
||||||
}
|
|
||||||
summary.Total += sc.Count
|
|
||||||
}
|
|
||||||
|
|
||||||
// Get names of down systems.
|
|
||||||
var downSystems []SystemInfo
|
|
||||||
if summary.Down > 0 {
|
|
||||||
err = db.NewQuery("SELECT id, name, host FROM systems WHERE status = 'down'").All(&downSystems)
|
|
||||||
if err != nil {
|
|
||||||
return nil, fmt.Errorf("query down systems: %w", err)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Get triggered alerts with system names.
|
|
||||||
var triggeredAlerts []struct {
|
|
||||||
SystemID string `db:"system"`
|
|
||||||
SystemName string `db:"system_name"`
|
|
||||||
AlertName string `db:"name"`
|
|
||||||
Value float64 `db:"value"`
|
|
||||||
}
|
|
||||||
err = db.NewQuery(`
|
|
||||||
SELECT a.system, s.name as system_name, a.name, a.value
|
|
||||||
FROM alerts a
|
|
||||||
JOIN systems s ON a.system = s.id
|
|
||||||
WHERE a.triggered = true
|
|
||||||
`).All(&triggeredAlerts)
|
|
||||||
if err != nil {
|
|
||||||
// Non-fatal: alerts info is supplementary.
|
|
||||||
triggeredAlerts = nil
|
|
||||||
}
|
|
||||||
|
|
||||||
alerts := make([]AlertInfo, 0, len(triggeredAlerts))
|
|
||||||
for _, ta := range triggeredAlerts {
|
|
||||||
alerts = append(alerts, AlertInfo{
|
|
||||||
SystemID: ta.SystemID,
|
|
||||||
SystemName: ta.SystemName,
|
|
||||||
AlertName: ta.AlertName,
|
|
||||||
Threshold: ta.Value,
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
// Determine overall status.
|
|
||||||
status := "ok"
|
|
||||||
msg := "All systems operational"
|
|
||||||
if summary.Down > 0 {
|
|
||||||
status = "error"
|
|
||||||
names := make([]string, len(downSystems))
|
|
||||||
for i, ds := range downSystems {
|
|
||||||
names[i] = ds.Name
|
|
||||||
}
|
|
||||||
msg = fmt.Sprintf("%d system(s) down: %s", summary.Down, strings.Join(names, ", "))
|
|
||||||
} else if len(alerts) > 0 {
|
|
||||||
status = "warn"
|
|
||||||
msg = fmt.Sprintf("%d alert(s) triggered", len(alerts))
|
|
||||||
}
|
|
||||||
|
|
||||||
return &Payload{
|
|
||||||
Status: status,
|
|
||||||
Timestamp: time.Now().UTC().Format(time.RFC3339),
|
|
||||||
Msg: msg,
|
|
||||||
Systems: summary,
|
|
||||||
Down: downSystems,
|
|
||||||
Alerts: alerts,
|
|
||||||
Version: beszel.Version,
|
|
||||||
}, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func normalizeMethod(method string) string {
|
|
||||||
upper := strings.ToUpper(strings.TrimSpace(method))
|
|
||||||
if upper == http.MethodGet || upper == http.MethodHead || upper == http.MethodPost {
|
|
||||||
return upper
|
|
||||||
}
|
|
||||||
return http.MethodPost
|
|
||||||
}
|
|
||||||
|
|
||||||
func sanitizeHeartbeatURL(rawURL string) string {
|
|
||||||
parsed, err := url.Parse(strings.TrimSpace(rawURL))
|
|
||||||
if err != nil || parsed.Scheme == "" || parsed.Host == "" {
|
|
||||||
return "<invalid-url>"
|
|
||||||
}
|
|
||||||
return parsed.Scheme + "://" + parsed.Host
|
|
||||||
}
|
|
||||||
@@ -1,258 +0,0 @@
|
|||||||
//go:build testing
|
|
||||||
// +build testing
|
|
||||||
|
|
||||||
package heartbeat_test
|
|
||||||
|
|
||||||
import (
|
|
||||||
"encoding/json"
|
|
||||||
"io"
|
|
||||||
"net/http"
|
|
||||||
"net/http/httptest"
|
|
||||||
"testing"
|
|
||||||
|
|
||||||
"github.com/henrygd/beszel/internal/hub/heartbeat"
|
|
||||||
beszeltests "github.com/henrygd/beszel/internal/tests"
|
|
||||||
"github.com/pocketbase/pocketbase/core"
|
|
||||||
"github.com/stretchr/testify/assert"
|
|
||||||
"github.com/stretchr/testify/require"
|
|
||||||
)
|
|
||||||
|
|
||||||
func TestNew(t *testing.T) {
|
|
||||||
t.Run("returns nil when app is missing", func(t *testing.T) {
|
|
||||||
hb := heartbeat.New(nil, envGetter(map[string]string{
|
|
||||||
"HEARTBEAT_URL": "https://heartbeat.example.com/ping",
|
|
||||||
}))
|
|
||||||
assert.Nil(t, hb)
|
|
||||||
})
|
|
||||||
|
|
||||||
t.Run("returns nil when URL is missing", func(t *testing.T) {
|
|
||||||
app := newTestHub(t)
|
|
||||||
hb := heartbeat.New(app.App, func(string) (string, bool) {
|
|
||||||
return "", false
|
|
||||||
})
|
|
||||||
assert.Nil(t, hb)
|
|
||||||
})
|
|
||||||
|
|
||||||
t.Run("parses and normalizes config values", func(t *testing.T) {
|
|
||||||
app := newTestHub(t)
|
|
||||||
env := map[string]string{
|
|
||||||
"HEARTBEAT_URL": " https://heartbeat.example.com/ping ",
|
|
||||||
"HEARTBEAT_INTERVAL": "90",
|
|
||||||
"HEARTBEAT_METHOD": "head",
|
|
||||||
}
|
|
||||||
getEnv := func(key string) (string, bool) {
|
|
||||||
v, ok := env[key]
|
|
||||||
return v, ok
|
|
||||||
}
|
|
||||||
|
|
||||||
hb := heartbeat.New(app.App, getEnv)
|
|
||||||
require.NotNil(t, hb)
|
|
||||||
cfg := hb.GetConfig()
|
|
||||||
assert.Equal(t, "https://heartbeat.example.com/ping", cfg.URL)
|
|
||||||
assert.Equal(t, 90, cfg.Interval)
|
|
||||||
assert.Equal(t, http.MethodHead, cfg.Method)
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestSendGETDoesNotRequireAppOrDB(t *testing.T) {
|
|
||||||
app := newTestHub(t)
|
|
||||||
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
|
||||||
assert.Equal(t, http.MethodGet, r.Method)
|
|
||||||
assert.Equal(t, "Beszel-Heartbeat", r.Header.Get("User-Agent"))
|
|
||||||
w.WriteHeader(http.StatusOK)
|
|
||||||
}))
|
|
||||||
defer server.Close()
|
|
||||||
|
|
||||||
hb := heartbeat.New(app.App, envGetter(map[string]string{
|
|
||||||
"HEARTBEAT_URL": server.URL,
|
|
||||||
"HEARTBEAT_METHOD": "GET",
|
|
||||||
}))
|
|
||||||
require.NotNil(t, hb)
|
|
||||||
|
|
||||||
require.NoError(t, hb.Send())
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestSendReturnsErrorOnHTTPFailureStatus(t *testing.T) {
|
|
||||||
app := newTestHub(t)
|
|
||||||
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
|
||||||
w.WriteHeader(http.StatusInternalServerError)
|
|
||||||
}))
|
|
||||||
defer server.Close()
|
|
||||||
|
|
||||||
hb := heartbeat.New(app.App, envGetter(map[string]string{
|
|
||||||
"HEARTBEAT_URL": server.URL,
|
|
||||||
"HEARTBEAT_METHOD": "GET",
|
|
||||||
}))
|
|
||||||
require.NotNil(t, hb)
|
|
||||||
|
|
||||||
err := hb.Send()
|
|
||||||
require.Error(t, err)
|
|
||||||
assert.ErrorContains(t, err, "heartbeat endpoint returned status 500")
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestSendPOSTBuildsExpectedStatuses(t *testing.T) {
|
|
||||||
tests := []struct {
|
|
||||||
name string
|
|
||||||
setup func(t *testing.T, app *beszeltests.TestHub, user *core.Record)
|
|
||||||
expectStatus string
|
|
||||||
expectMsgPart string
|
|
||||||
expectDown int
|
|
||||||
expectAlerts int
|
|
||||||
expectTotal int
|
|
||||||
expectUp int
|
|
||||||
expectPaused int
|
|
||||||
expectPending int
|
|
||||||
expectDownSumm int
|
|
||||||
}{
|
|
||||||
{
|
|
||||||
name: "error when at least one system is down",
|
|
||||||
setup: func(t *testing.T, app *beszeltests.TestHub, user *core.Record) {
|
|
||||||
downSystem := createTestSystem(t, app, user.Id, "db-1", "10.0.0.1", "down")
|
|
||||||
_ = createTestSystem(t, app, user.Id, "web-1", "10.0.0.2", "up")
|
|
||||||
createTriggeredAlert(t, app, user.Id, downSystem.Id, "CPU", 95)
|
|
||||||
},
|
|
||||||
expectStatus: "error",
|
|
||||||
expectMsgPart: "1 system(s) down",
|
|
||||||
expectDown: 1,
|
|
||||||
expectAlerts: 1,
|
|
||||||
expectTotal: 2,
|
|
||||||
expectUp: 1,
|
|
||||||
expectDownSumm: 1,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "warn when only alerts are triggered",
|
|
||||||
setup: func(t *testing.T, app *beszeltests.TestHub, user *core.Record) {
|
|
||||||
system := createTestSystem(t, app, user.Id, "api-1", "10.1.0.1", "up")
|
|
||||||
createTriggeredAlert(t, app, user.Id, system.Id, "CPU", 90)
|
|
||||||
},
|
|
||||||
expectStatus: "warn",
|
|
||||||
expectMsgPart: "1 alert(s) triggered",
|
|
||||||
expectDown: 0,
|
|
||||||
expectAlerts: 1,
|
|
||||||
expectTotal: 1,
|
|
||||||
expectUp: 1,
|
|
||||||
expectDownSumm: 0,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "ok when no down systems and no alerts",
|
|
||||||
setup: func(t *testing.T, app *beszeltests.TestHub, user *core.Record) {
|
|
||||||
_ = createTestSystem(t, app, user.Id, "node-1", "10.2.0.1", "up")
|
|
||||||
_ = createTestSystem(t, app, user.Id, "node-2", "10.2.0.2", "paused")
|
|
||||||
_ = createTestSystem(t, app, user.Id, "node-3", "10.2.0.3", "pending")
|
|
||||||
},
|
|
||||||
expectStatus: "ok",
|
|
||||||
expectMsgPart: "All systems operational",
|
|
||||||
expectDown: 0,
|
|
||||||
expectAlerts: 0,
|
|
||||||
expectTotal: 3,
|
|
||||||
expectUp: 1,
|
|
||||||
expectPaused: 1,
|
|
||||||
expectPending: 1,
|
|
||||||
expectDownSumm: 0,
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
for _, tt := range tests {
|
|
||||||
t.Run(tt.name, func(t *testing.T) {
|
|
||||||
app := newTestHub(t)
|
|
||||||
user := createTestUser(t, app)
|
|
||||||
tt.setup(t, app, user)
|
|
||||||
|
|
||||||
type requestCapture struct {
|
|
||||||
method string
|
|
||||||
userAgent string
|
|
||||||
contentType string
|
|
||||||
payload heartbeat.Payload
|
|
||||||
}
|
|
||||||
|
|
||||||
captured := make(chan requestCapture, 1)
|
|
||||||
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
|
||||||
defer r.Body.Close()
|
|
||||||
body, err := io.ReadAll(r.Body)
|
|
||||||
require.NoError(t, err)
|
|
||||||
|
|
||||||
var payload heartbeat.Payload
|
|
||||||
require.NoError(t, json.Unmarshal(body, &payload))
|
|
||||||
captured <- requestCapture{
|
|
||||||
method: r.Method,
|
|
||||||
userAgent: r.Header.Get("User-Agent"),
|
|
||||||
contentType: r.Header.Get("Content-Type"),
|
|
||||||
payload: payload,
|
|
||||||
}
|
|
||||||
w.WriteHeader(http.StatusNoContent)
|
|
||||||
}))
|
|
||||||
defer server.Close()
|
|
||||||
|
|
||||||
hb := heartbeat.New(app.App, envGetter(map[string]string{
|
|
||||||
"HEARTBEAT_URL": server.URL,
|
|
||||||
"HEARTBEAT_METHOD": "POST",
|
|
||||||
}))
|
|
||||||
require.NotNil(t, hb)
|
|
||||||
require.NoError(t, hb.Send())
|
|
||||||
|
|
||||||
req := <-captured
|
|
||||||
assert.Equal(t, http.MethodPost, req.method)
|
|
||||||
assert.Equal(t, "Beszel-Heartbeat", req.userAgent)
|
|
||||||
assert.Equal(t, "application/json", req.contentType)
|
|
||||||
|
|
||||||
assert.Equal(t, tt.expectStatus, req.payload.Status)
|
|
||||||
assert.Contains(t, req.payload.Msg, tt.expectMsgPart)
|
|
||||||
assert.Equal(t, tt.expectDown, len(req.payload.Down))
|
|
||||||
assert.Equal(t, tt.expectAlerts, len(req.payload.Alerts))
|
|
||||||
assert.Equal(t, tt.expectTotal, req.payload.Systems.Total)
|
|
||||||
assert.Equal(t, tt.expectUp, req.payload.Systems.Up)
|
|
||||||
assert.Equal(t, tt.expectDownSumm, req.payload.Systems.Down)
|
|
||||||
assert.Equal(t, tt.expectPaused, req.payload.Systems.Paused)
|
|
||||||
assert.Equal(t, tt.expectPending, req.payload.Systems.Pending)
|
|
||||||
})
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func newTestHub(t *testing.T) *beszeltests.TestHub {
|
|
||||||
t.Helper()
|
|
||||||
app, err := beszeltests.NewTestHub(t.TempDir())
|
|
||||||
require.NoError(t, err)
|
|
||||||
t.Cleanup(app.Cleanup)
|
|
||||||
return app
|
|
||||||
}
|
|
||||||
|
|
||||||
func createTestUser(t *testing.T, app *beszeltests.TestHub) *core.Record {
|
|
||||||
t.Helper()
|
|
||||||
user, err := beszeltests.CreateUser(app.App, "admin@example.com", "password123")
|
|
||||||
require.NoError(t, err)
|
|
||||||
return user
|
|
||||||
}
|
|
||||||
|
|
||||||
func createTestSystem(t *testing.T, app *beszeltests.TestHub, userID, name, host, status string) *core.Record {
|
|
||||||
t.Helper()
|
|
||||||
system, err := beszeltests.CreateRecord(app.App, "systems", map[string]any{
|
|
||||||
"name": name,
|
|
||||||
"host": host,
|
|
||||||
"port": "45876",
|
|
||||||
"users": []string{userID},
|
|
||||||
"status": status,
|
|
||||||
})
|
|
||||||
require.NoError(t, err)
|
|
||||||
return system
|
|
||||||
}
|
|
||||||
|
|
||||||
func createTriggeredAlert(t *testing.T, app *beszeltests.TestHub, userID, systemID, name string, threshold float64) *core.Record {
|
|
||||||
t.Helper()
|
|
||||||
alert, err := beszeltests.CreateRecord(app.App, "alerts", map[string]any{
|
|
||||||
"name": name,
|
|
||||||
"system": systemID,
|
|
||||||
"user": userID,
|
|
||||||
"value": threshold,
|
|
||||||
"min": 0,
|
|
||||||
"triggered": true,
|
|
||||||
})
|
|
||||||
require.NoError(t, err)
|
|
||||||
return alert
|
|
||||||
}
|
|
||||||
|
|
||||||
func envGetter(values map[string]string) func(string) (string, bool) {
|
|
||||||
return func(key string) (string, bool) {
|
|
||||||
v, ok := values[key]
|
|
||||||
return v, ok
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -15,7 +15,6 @@ import (
|
|||||||
"github.com/henrygd/beszel"
|
"github.com/henrygd/beszel"
|
||||||
"github.com/henrygd/beszel/internal/alerts"
|
"github.com/henrygd/beszel/internal/alerts"
|
||||||
"github.com/henrygd/beszel/internal/hub/config"
|
"github.com/henrygd/beszel/internal/hub/config"
|
||||||
"github.com/henrygd/beszel/internal/hub/heartbeat"
|
|
||||||
"github.com/henrygd/beszel/internal/hub/systems"
|
"github.com/henrygd/beszel/internal/hub/systems"
|
||||||
"github.com/henrygd/beszel/internal/records"
|
"github.com/henrygd/beszel/internal/records"
|
||||||
"github.com/henrygd/beszel/internal/users"
|
"github.com/henrygd/beszel/internal/users"
|
||||||
@@ -34,8 +33,6 @@ type Hub struct {
|
|||||||
um *users.UserManager
|
um *users.UserManager
|
||||||
rm *records.RecordManager
|
rm *records.RecordManager
|
||||||
sm *systems.SystemManager
|
sm *systems.SystemManager
|
||||||
hb *heartbeat.Heartbeat
|
|
||||||
hbStop chan struct{}
|
|
||||||
pubKey string
|
pubKey string
|
||||||
signer ssh.Signer
|
signer ssh.Signer
|
||||||
appURL string
|
appURL string
|
||||||
@@ -51,10 +48,6 @@ func NewHub(app core.App) *Hub {
|
|||||||
hub.rm = records.NewRecordManager(hub)
|
hub.rm = records.NewRecordManager(hub)
|
||||||
hub.sm = systems.NewSystemManager(hub)
|
hub.sm = systems.NewSystemManager(hub)
|
||||||
hub.appURL, _ = GetEnv("APP_URL")
|
hub.appURL, _ = GetEnv("APP_URL")
|
||||||
hub.hb = heartbeat.New(app, GetEnv)
|
|
||||||
if hub.hb != nil {
|
|
||||||
hub.hbStop = make(chan struct{})
|
|
||||||
}
|
|
||||||
return hub
|
return hub
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -95,10 +88,6 @@ func (h *Hub) StartHub() error {
|
|||||||
if err := h.sm.Initialize(); err != nil {
|
if err := h.sm.Initialize(); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
// start heartbeat if configured
|
|
||||||
if h.hb != nil {
|
|
||||||
go h.hb.Start(h.hbStop)
|
|
||||||
}
|
|
||||||
return e.Next()
|
return e.Next()
|
||||||
})
|
})
|
||||||
|
|
||||||
@@ -298,9 +287,6 @@ func (h *Hub) registerApiRoutes(se *core.ServeEvent) error {
|
|||||||
})
|
})
|
||||||
// send test notification
|
// send test notification
|
||||||
apiAuth.POST("/test-notification", h.SendTestNotification)
|
apiAuth.POST("/test-notification", h.SendTestNotification)
|
||||||
// heartbeat status and test
|
|
||||||
apiAuth.GET("/heartbeat-status", h.getHeartbeatStatus)
|
|
||||||
apiAuth.POST("/test-heartbeat", h.testHeartbeat)
|
|
||||||
// get config.yml content
|
// get config.yml content
|
||||||
apiAuth.GET("/config-yaml", config.GetYamlConfig)
|
apiAuth.GET("/config-yaml", config.GetYamlConfig)
|
||||||
// handle agent websocket connection
|
// handle agent websocket connection
|
||||||
@@ -417,42 +403,6 @@ func (h *Hub) getUniversalToken(e *core.RequestEvent) error {
|
|||||||
return e.JSON(http.StatusOK, response)
|
return e.JSON(http.StatusOK, response)
|
||||||
}
|
}
|
||||||
|
|
||||||
// getHeartbeatStatus returns current heartbeat configuration and whether it's enabled
|
|
||||||
func (h *Hub) getHeartbeatStatus(e *core.RequestEvent) error {
|
|
||||||
if e.Auth.GetString("role") != "admin" {
|
|
||||||
return e.ForbiddenError("Requires admin role", nil)
|
|
||||||
}
|
|
||||||
if h.hb == nil {
|
|
||||||
return e.JSON(http.StatusOK, map[string]any{
|
|
||||||
"enabled": false,
|
|
||||||
"msg": "Set HEARTBEAT_URL to enable outbound heartbeat monitoring",
|
|
||||||
})
|
|
||||||
}
|
|
||||||
cfg := h.hb.GetConfig()
|
|
||||||
return e.JSON(http.StatusOK, map[string]any{
|
|
||||||
"enabled": true,
|
|
||||||
"url": cfg.URL,
|
|
||||||
"interval": cfg.Interval,
|
|
||||||
"method": cfg.Method,
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
// testHeartbeat triggers a single heartbeat ping and returns the result
|
|
||||||
func (h *Hub) testHeartbeat(e *core.RequestEvent) error {
|
|
||||||
if e.Auth.GetString("role") != "admin" {
|
|
||||||
return e.ForbiddenError("Requires admin role", nil)
|
|
||||||
}
|
|
||||||
if h.hb == nil {
|
|
||||||
return e.JSON(http.StatusOK, map[string]any{
|
|
||||||
"err": "Heartbeat not configured. Set HEARTBEAT_URL environment variable.",
|
|
||||||
})
|
|
||||||
}
|
|
||||||
if err := h.hb.Send(); err != nil {
|
|
||||||
return e.JSON(http.StatusOK, map[string]any{"err": err.Error()})
|
|
||||||
}
|
|
||||||
return e.JSON(http.StatusOK, map[string]any{"err": false})
|
|
||||||
}
|
|
||||||
|
|
||||||
// containerRequestHandler handles both container logs and info requests
|
// containerRequestHandler handles both container logs and info requests
|
||||||
func (h *Hub) containerRequestHandler(e *core.RequestEvent, fetchFunc func(*systems.System, string) (string, error), responseKey string) error {
|
func (h *Hub) containerRequestHandler(e *core.RequestEvent, fetchFunc func(*systems.System, string) (string, error), responseKey string) error {
|
||||||
systemID := e.Request.URL.Query().Get("system")
|
systemID := e.Request.URL.Query().Get("system")
|
||||||
|
|||||||
@@ -362,58 +362,6 @@ func TestApiRoutesAuthentication(t *testing.T) {
|
|||||||
ExpectedContent: []string{"test-system"},
|
ExpectedContent: []string{"test-system"},
|
||||||
TestAppFactory: testAppFactory,
|
TestAppFactory: testAppFactory,
|
||||||
},
|
},
|
||||||
{
|
|
||||||
Name: "GET /heartbeat-status - no auth should fail",
|
|
||||||
Method: http.MethodGet,
|
|
||||||
URL: "/api/beszel/heartbeat-status",
|
|
||||||
ExpectedStatus: 401,
|
|
||||||
ExpectedContent: []string{"requires valid"},
|
|
||||||
TestAppFactory: testAppFactory,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
Name: "GET /heartbeat-status - with user auth should fail",
|
|
||||||
Method: http.MethodGet,
|
|
||||||
URL: "/api/beszel/heartbeat-status",
|
|
||||||
Headers: map[string]string{
|
|
||||||
"Authorization": userToken,
|
|
||||||
},
|
|
||||||
ExpectedStatus: 403,
|
|
||||||
ExpectedContent: []string{"Requires admin role"},
|
|
||||||
TestAppFactory: testAppFactory,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
Name: "GET /heartbeat-status - with admin auth should succeed",
|
|
||||||
Method: http.MethodGet,
|
|
||||||
URL: "/api/beszel/heartbeat-status",
|
|
||||||
Headers: map[string]string{
|
|
||||||
"Authorization": adminUserToken,
|
|
||||||
},
|
|
||||||
ExpectedStatus: 200,
|
|
||||||
ExpectedContent: []string{`"enabled":false`},
|
|
||||||
TestAppFactory: testAppFactory,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
Name: "POST /test-heartbeat - with user auth should fail",
|
|
||||||
Method: http.MethodPost,
|
|
||||||
URL: "/api/beszel/test-heartbeat",
|
|
||||||
Headers: map[string]string{
|
|
||||||
"Authorization": userToken,
|
|
||||||
},
|
|
||||||
ExpectedStatus: 403,
|
|
||||||
ExpectedContent: []string{"Requires admin role"},
|
|
||||||
TestAppFactory: testAppFactory,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
Name: "POST /test-heartbeat - with admin auth should report disabled state",
|
|
||||||
Method: http.MethodPost,
|
|
||||||
URL: "/api/beszel/test-heartbeat",
|
|
||||||
Headers: map[string]string{
|
|
||||||
"Authorization": adminUserToken,
|
|
||||||
},
|
|
||||||
ExpectedStatus: 200,
|
|
||||||
ExpectedContent: []string{"Heartbeat not configured"},
|
|
||||||
TestAppFactory: testAppFactory,
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
Name: "GET /universal-token - no auth should fail",
|
Name: "GET /universal-token - no auth should fail",
|
||||||
Method: http.MethodGet,
|
Method: http.MethodGet,
|
||||||
|
|||||||
@@ -54,7 +54,8 @@ export default function ContainersTable({ systemId }: { systemId?: string }) {
|
|||||||
fields: "id,name,image,cpu,memory,net,health,status,system,updated",
|
fields: "id,name,image,cpu,memory,net,health,status,system,updated",
|
||||||
filter: systemId ? pb.filter("system={:system}", { system: systemId }) : undefined,
|
filter: systemId ? pb.filter("system={:system}", { system: systemId }) : undefined,
|
||||||
})
|
})
|
||||||
.then(({ items }) => {
|
.then(
|
||||||
|
({ items }) => {
|
||||||
if (items.length === 0) {
|
if (items.length === 0) {
|
||||||
setData((curItems) => {
|
setData((curItems) => {
|
||||||
if (systemId) {
|
if (systemId) {
|
||||||
@@ -81,7 +82,8 @@ export default function ContainersTable({ systemId }: { systemId?: string }) {
|
|||||||
}
|
}
|
||||||
return newItems
|
return newItems
|
||||||
})
|
})
|
||||||
})
|
}
|
||||||
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
// initial load
|
// initial load
|
||||||
@@ -283,7 +285,7 @@ async function getInfoHtml(container: ContainerRecord): Promise<string> {
|
|||||||
])
|
])
|
||||||
try {
|
try {
|
||||||
info = JSON.stringify(JSON.parse(info), null, 2)
|
info = JSON.stringify(JSON.parse(info), null, 2)
|
||||||
} catch (_) {}
|
} catch (_) { }
|
||||||
return info ? highlighter.codeToHtml(info, { lang: "json", theme: syntaxTheme }) : t`No results.`
|
return info ? highlighter.codeToHtml(info, { lang: "json", theme: syntaxTheme }) : t`No results.`
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.error(error)
|
console.error(error)
|
||||||
@@ -340,7 +342,7 @@ function ContainerSheet({
|
|||||||
setLogsDisplay("")
|
setLogsDisplay("")
|
||||||
setInfoDisplay("")
|
setInfoDisplay("")
|
||||||
if (!container) return
|
if (!container) return
|
||||||
;(async () => {
|
; (async () => {
|
||||||
const [logsHtml, infoHtml] = await Promise.all([getLogsHtml(container), getInfoHtml(container)])
|
const [logsHtml, infoHtml] = await Promise.all([getLogsHtml(container), getInfoHtml(container)])
|
||||||
setLogsDisplay(logsHtml)
|
setLogsDisplay(logsHtml)
|
||||||
setInfoDisplay(infoHtml)
|
setInfoDisplay(infoHtml)
|
||||||
@@ -471,7 +473,7 @@ const ContainerTableRow = memo(function ContainerTableRow({
|
|||||||
{row.getVisibleCells().map((cell) => (
|
{row.getVisibleCells().map((cell) => (
|
||||||
<TableCell
|
<TableCell
|
||||||
key={cell.id}
|
key={cell.id}
|
||||||
className="py-0 ps-4.5"
|
className="py-0"
|
||||||
style={{
|
style={{
|
||||||
height: virtualRow.size,
|
height: virtualRow.size,
|
||||||
}}
|
}}
|
||||||
|
|||||||
@@ -1,205 +0,0 @@
|
|||||||
import { t } from "@lingui/core/macro"
|
|
||||||
import { Trans } from "@lingui/react/macro"
|
|
||||||
import { redirectPage } from "@nanostores/router"
|
|
||||||
import clsx from "clsx"
|
|
||||||
import { LoaderCircleIcon, SendIcon } from "lucide-react"
|
|
||||||
import { useEffect, useState } from "react"
|
|
||||||
import { $router } from "@/components/router"
|
|
||||||
import { Badge } from "@/components/ui/badge"
|
|
||||||
import { Button } from "@/components/ui/button"
|
|
||||||
import { Separator } from "@/components/ui/separator"
|
|
||||||
import { toast } from "@/components/ui/use-toast"
|
|
||||||
import { isAdmin, pb } from "@/lib/api"
|
|
||||||
|
|
||||||
interface HeartbeatStatus {
|
|
||||||
enabled: boolean
|
|
||||||
url?: string
|
|
||||||
interval?: number
|
|
||||||
method?: string
|
|
||||||
msg?: string
|
|
||||||
}
|
|
||||||
|
|
||||||
export default function HeartbeatSettings() {
|
|
||||||
const [status, setStatus] = useState<HeartbeatStatus | null>(null)
|
|
||||||
const [isLoading, setIsLoading] = useState(true)
|
|
||||||
const [isTesting, setIsTesting] = useState(false)
|
|
||||||
|
|
||||||
if (!isAdmin()) {
|
|
||||||
redirectPage($router, "settings", { name: "general" })
|
|
||||||
}
|
|
||||||
|
|
||||||
useEffect(() => {
|
|
||||||
fetchStatus()
|
|
||||||
}, [])
|
|
||||||
|
|
||||||
async function fetchStatus() {
|
|
||||||
try {
|
|
||||||
setIsLoading(true)
|
|
||||||
const res = await pb.send<HeartbeatStatus>("/api/beszel/heartbeat-status", {})
|
|
||||||
setStatus(res)
|
|
||||||
} catch (error: any) {
|
|
||||||
toast({
|
|
||||||
title: t`Error`,
|
|
||||||
description: error.message,
|
|
||||||
variant: "destructive",
|
|
||||||
})
|
|
||||||
} finally {
|
|
||||||
setIsLoading(false)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
async function sendTestHeartbeat() {
|
|
||||||
setIsTesting(true)
|
|
||||||
try {
|
|
||||||
const res = await pb.send<{ err: string | false }>("/api/beszel/test-heartbeat", {
|
|
||||||
method: "POST",
|
|
||||||
})
|
|
||||||
if ("err" in res && !res.err) {
|
|
||||||
toast({
|
|
||||||
title: t`Heartbeat sent successfully`,
|
|
||||||
description: t`Check your monitoring service`,
|
|
||||||
})
|
|
||||||
} else {
|
|
||||||
toast({
|
|
||||||
title: t`Error`,
|
|
||||||
description: (res.err as string) ?? t`Failed to send heartbeat`,
|
|
||||||
variant: "destructive",
|
|
||||||
})
|
|
||||||
}
|
|
||||||
} catch (error: any) {
|
|
||||||
toast({
|
|
||||||
title: t`Error`,
|
|
||||||
description: error.message,
|
|
||||||
variant: "destructive",
|
|
||||||
})
|
|
||||||
} finally {
|
|
||||||
setIsTesting(false)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
const TestIcon = isTesting ? LoaderCircleIcon : SendIcon
|
|
||||||
|
|
||||||
return (
|
|
||||||
<div>
|
|
||||||
<div>
|
|
||||||
<h3 className="text-xl font-medium mb-2">
|
|
||||||
<Trans>Heartbeat Monitoring</Trans>
|
|
||||||
</h3>
|
|
||||||
<p className="text-sm text-muted-foreground leading-relaxed">
|
|
||||||
<Trans>
|
|
||||||
Send periodic outbound pings to an external monitoring service so you can monitor Beszel without exposing it
|
|
||||||
to the internet.
|
|
||||||
</Trans>
|
|
||||||
</p>
|
|
||||||
</div>
|
|
||||||
<Separator className="my-4" />
|
|
||||||
|
|
||||||
{isLoading ? (
|
|
||||||
<div className="flex items-center gap-2 text-muted-foreground py-4">
|
|
||||||
<LoaderCircleIcon className="h-4 w-4 animate-spin" />
|
|
||||||
<Trans>Loading heartbeat status...</Trans>
|
|
||||||
</div>
|
|
||||||
) : status?.enabled ? (
|
|
||||||
<div className="space-y-5">
|
|
||||||
<div className="flex items-center gap-2">
|
|
||||||
<Badge variant="success">
|
|
||||||
<Trans>Active</Trans>
|
|
||||||
</Badge>
|
|
||||||
</div>
|
|
||||||
<div className="grid gap-4 sm:grid-cols-2">
|
|
||||||
<ConfigItem label={t`Endpoint URL`} value={status.url ?? ""} mono />
|
|
||||||
<ConfigItem label={t`Interval`} value={`${status.interval}s`} />
|
|
||||||
<ConfigItem label={t`HTTP Method`} value={status.method ?? "POST"} />
|
|
||||||
</div>
|
|
||||||
|
|
||||||
<Separator />
|
|
||||||
|
|
||||||
<div>
|
|
||||||
<h4 className="text-base font-medium mb-1">
|
|
||||||
<Trans>Test heartbeat</Trans>
|
|
||||||
</h4>
|
|
||||||
<p className="text-sm text-muted-foreground leading-relaxed mb-3">
|
|
||||||
<Trans>Send a single heartbeat ping to verify your endpoint is working.</Trans>
|
|
||||||
</p>
|
|
||||||
<Button
|
|
||||||
type="button"
|
|
||||||
variant="outline"
|
|
||||||
className="flex items-center gap-1.5"
|
|
||||||
onClick={sendTestHeartbeat}
|
|
||||||
disabled={isTesting}
|
|
||||||
>
|
|
||||||
<TestIcon className={clsx("h-4 w-4", isTesting && "animate-spin")} />
|
|
||||||
<Trans>Send test heartbeat</Trans>
|
|
||||||
</Button>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
<Separator />
|
|
||||||
|
|
||||||
<div>
|
|
||||||
<h4 className="text-base font-medium mb-2">
|
|
||||||
<Trans>Payload format</Trans>
|
|
||||||
</h4>
|
|
||||||
<p className="text-sm text-muted-foreground leading-relaxed mb-2">
|
|
||||||
<Trans>
|
|
||||||
When using POST, each heartbeat includes a JSON payload with system status summary, list of down
|
|
||||||
systems, and triggered alerts.
|
|
||||||
</Trans>
|
|
||||||
</p>
|
|
||||||
<p className="text-sm text-muted-foreground leading-relaxed">
|
|
||||||
<Trans>
|
|
||||||
The overall status is <code className="bg-muted rounded-sm px-1 text-primary">ok</code> when all systems
|
|
||||||
are up, <code className="bg-muted rounded-sm px-1 text-primary">warn</code> when alerts are triggered,
|
|
||||||
and <code className="bg-muted rounded-sm px-1 text-primary">error</code> when any system is down.
|
|
||||||
</Trans>
|
|
||||||
</p>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
) : (
|
|
||||||
<div className="grid gap-4">
|
|
||||||
<div>
|
|
||||||
<p className="text-sm text-muted-foreground leading-relaxed mb-3">
|
|
||||||
<Trans>Set the following environment variables on your Beszel hub to enable heartbeat monitoring:</Trans>
|
|
||||||
</p>
|
|
||||||
<div className="grid gap-2.5">
|
|
||||||
<EnvVarItem
|
|
||||||
name="HEARTBEAT_URL"
|
|
||||||
description={t`Endpoint URL to ping (required)`}
|
|
||||||
example="https://uptime.betterstack.com/api/v1/heartbeat/xxxx"
|
|
||||||
/>
|
|
||||||
<EnvVarItem name="HEARTBEAT_INTERVAL" description={t`Seconds between pings (default: 60)`} example="60" />
|
|
||||||
<EnvVarItem
|
|
||||||
name="HEARTBEAT_METHOD"
|
|
||||||
description={t`HTTP method: POST, GET, or HEAD (default: POST)`}
|
|
||||||
example="POST"
|
|
||||||
/>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
<p className="text-sm text-muted-foreground leading-relaxed">
|
|
||||||
<Trans>After setting the environment variables, restart your Beszel hub for changes to take effect.</Trans>
|
|
||||||
</p>
|
|
||||||
</div>
|
|
||||||
)}
|
|
||||||
</div>
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
function ConfigItem({ label, value, mono }: { label: string; value: string; mono?: boolean }) {
|
|
||||||
return (
|
|
||||||
<div>
|
|
||||||
<p className="text-sm font-medium mb-0.5">{label}</p>
|
|
||||||
<p className={clsx("text-sm text-muted-foreground break-all", mono && "font-mono")}>{value}</p>
|
|
||||||
</div>
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
function EnvVarItem({ name, description, example }: { name: string; description: string; example: string }) {
|
|
||||||
return (
|
|
||||||
<div className="bg-muted/50 rounded-md px-3 py-2 grid gap-1.5">
|
|
||||||
<code className="text-sm font-mono text-primary font-medium leading-tight">{name}</code>
|
|
||||||
<p className="text-sm text-muted-foreground">{description}</p>
|
|
||||||
<p className="text-xs text-muted-foreground">
|
|
||||||
<Trans>Example:</Trans> <code className="font-mono">{example}</code>
|
|
||||||
</p>
|
|
||||||
</div>
|
|
||||||
)
|
|
||||||
}
|
|
||||||
@@ -2,14 +2,7 @@ import { t } from "@lingui/core/macro"
|
|||||||
import { Trans, useLingui } from "@lingui/react/macro"
|
import { Trans, useLingui } from "@lingui/react/macro"
|
||||||
import { useStore } from "@nanostores/react"
|
import { useStore } from "@nanostores/react"
|
||||||
import { getPagePath, redirectPage } from "@nanostores/router"
|
import { getPagePath, redirectPage } from "@nanostores/router"
|
||||||
import {
|
import { AlertOctagonIcon, BellIcon, FileSlidersIcon, FingerprintIcon, SettingsIcon } from "lucide-react"
|
||||||
AlertOctagonIcon,
|
|
||||||
BellIcon,
|
|
||||||
FileSlidersIcon,
|
|
||||||
FingerprintIcon,
|
|
||||||
HeartPulseIcon,
|
|
||||||
SettingsIcon,
|
|
||||||
} from "lucide-react"
|
|
||||||
import { lazy, useEffect } from "react"
|
import { lazy, useEffect } from "react"
|
||||||
import { $router } from "@/components/router.tsx"
|
import { $router } from "@/components/router.tsx"
|
||||||
import { Card, CardContent, CardDescription, CardHeader, CardTitle } from "@/components/ui/card.tsx"
|
import { Card, CardContent, CardDescription, CardHeader, CardTitle } from "@/components/ui/card.tsx"
|
||||||
@@ -25,14 +18,12 @@ const notificationsSettingsImport = () => import("./notifications.tsx")
|
|||||||
const configYamlSettingsImport = () => import("./config-yaml.tsx")
|
const configYamlSettingsImport = () => import("./config-yaml.tsx")
|
||||||
const fingerprintsSettingsImport = () => import("./tokens-fingerprints.tsx")
|
const fingerprintsSettingsImport = () => import("./tokens-fingerprints.tsx")
|
||||||
const alertsHistoryDataTableSettingsImport = () => import("./alerts-history-data-table.tsx")
|
const alertsHistoryDataTableSettingsImport = () => import("./alerts-history-data-table.tsx")
|
||||||
const heartbeatSettingsImport = () => import("./heartbeat.tsx")
|
|
||||||
|
|
||||||
const GeneralSettings = lazy(generalSettingsImport)
|
const GeneralSettings = lazy(generalSettingsImport)
|
||||||
const NotificationsSettings = lazy(notificationsSettingsImport)
|
const NotificationsSettings = lazy(notificationsSettingsImport)
|
||||||
const ConfigYamlSettings = lazy(configYamlSettingsImport)
|
const ConfigYamlSettings = lazy(configYamlSettingsImport)
|
||||||
const FingerprintsSettings = lazy(fingerprintsSettingsImport)
|
const FingerprintsSettings = lazy(fingerprintsSettingsImport)
|
||||||
const AlertsHistoryDataTableSettings = lazy(alertsHistoryDataTableSettingsImport)
|
const AlertsHistoryDataTableSettings = lazy(alertsHistoryDataTableSettingsImport)
|
||||||
const HeartbeatSettings = lazy(heartbeatSettingsImport)
|
|
||||||
|
|
||||||
export async function saveSettings(newSettings: Partial<UserSettings>) {
|
export async function saveSettings(newSettings: Partial<UserSettings>) {
|
||||||
try {
|
try {
|
||||||
@@ -97,13 +88,6 @@ export default function SettingsLayout() {
|
|||||||
admin: true,
|
admin: true,
|
||||||
preload: configYamlSettingsImport,
|
preload: configYamlSettingsImport,
|
||||||
},
|
},
|
||||||
{
|
|
||||||
title: t`Heartbeat`,
|
|
||||||
href: getPagePath($router, "settings", { name: "heartbeat" }),
|
|
||||||
icon: HeartPulseIcon,
|
|
||||||
admin: true,
|
|
||||||
preload: heartbeatSettingsImport,
|
|
||||||
},
|
|
||||||
]
|
]
|
||||||
|
|
||||||
const page = useStore($router)
|
const page = useStore($router)
|
||||||
@@ -157,7 +141,5 @@ function SettingsContent({ name }: { name: string }) {
|
|||||||
return <FingerprintsSettings />
|
return <FingerprintsSettings />
|
||||||
case "alert-history":
|
case "alert-history":
|
||||||
return <AlertsHistoryDataTableSettings />
|
return <AlertsHistoryDataTableSettings />
|
||||||
case "heartbeat":
|
|
||||||
return <HeartbeatSettings />
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -19,7 +19,7 @@ import { FreeBsdIcon, TuxIcon, WebSocketIcon, WindowsIcon } from "@/components/u
|
|||||||
import { Separator } from "@/components/ui/separator"
|
import { Separator } from "@/components/ui/separator"
|
||||||
import { Tooltip, TooltipContent, TooltipTrigger } from "@/components/ui/tooltip"
|
import { Tooltip, TooltipContent, TooltipTrigger } from "@/components/ui/tooltip"
|
||||||
import { ConnectionType, connectionTypeLabels, Os, SystemStatus } from "@/lib/enums"
|
import { ConnectionType, connectionTypeLabels, Os, SystemStatus } from "@/lib/enums"
|
||||||
import { cn, formatBytes, getHostDisplayValue, secondsToUptimeString, toFixedFloat } from "@/lib/utils"
|
import { cn, formatBytes, getHostDisplayValue, secondsToString, toFixedFloat } from "@/lib/utils"
|
||||||
import type { ChartData, SystemDetailsRecord, SystemRecord } from "@/types"
|
import type { ChartData, SystemDetailsRecord, SystemRecord } from "@/types"
|
||||||
|
|
||||||
export default function InfoBar({
|
export default function InfoBar({
|
||||||
@@ -77,6 +77,14 @@ export default function InfoBar({
|
|||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
let uptime: string
|
||||||
|
if (system.info.u < 3600) {
|
||||||
|
uptime = secondsToString(system.info.u, "minute")
|
||||||
|
} else if (system.info.u < 360000) {
|
||||||
|
uptime = secondsToString(system.info.u, "hour")
|
||||||
|
} else {
|
||||||
|
uptime = secondsToString(system.info.u, "day")
|
||||||
|
}
|
||||||
const info = [
|
const info = [
|
||||||
{ value: getHostDisplayValue(system), Icon: GlobeIcon },
|
{ value: getHostDisplayValue(system), Icon: GlobeIcon },
|
||||||
{
|
{
|
||||||
@@ -86,7 +94,7 @@ export default function InfoBar({
|
|||||||
// hide if hostname is same as host or name
|
// hide if hostname is same as host or name
|
||||||
hide: hostname === system.host || hostname === system.name,
|
hide: hostname === system.host || hostname === system.name,
|
||||||
},
|
},
|
||||||
{ value: secondsToUptimeString(system.info.u), Icon: ClockArrowUp, label: t`Uptime`, hide: !system.info.u },
|
{ value: uptime, Icon: ClockArrowUp, label: t`Uptime`, hide: !system.info.u },
|
||||||
osInfo[os],
|
osInfo[os],
|
||||||
{
|
{
|
||||||
value: cpuModel,
|
value: cpuModel,
|
||||||
|
|||||||
@@ -174,8 +174,8 @@ export const columns: ColumnDef<SmartDeviceRecord>[] = [
|
|||||||
<HeaderButton column={column} name={t({ message: "Power On", comment: "Power On Time" })} Icon={Clock} />
|
<HeaderButton column={column} name={t({ message: "Power On", comment: "Power On Time" })} Icon={Clock} />
|
||||||
),
|
),
|
||||||
cell: ({ getValue }) => {
|
cell: ({ getValue }) => {
|
||||||
const hours = getValue() as number | undefined
|
const hours = (getValue() ?? 0) as number
|
||||||
if (hours == null) {
|
if (!hours && hours !== 0) {
|
||||||
return <div className="text-sm text-muted-foreground ms-1.5">N/A</div>
|
return <div className="text-sm text-muted-foreground ms-1.5">N/A</div>
|
||||||
}
|
}
|
||||||
const seconds = hours * 3600
|
const seconds = hours * 3600
|
||||||
@@ -195,7 +195,7 @@ export const columns: ColumnDef<SmartDeviceRecord>[] = [
|
|||||||
),
|
),
|
||||||
cell: ({ getValue }) => {
|
cell: ({ getValue }) => {
|
||||||
const cycles = getValue() as number | undefined
|
const cycles = getValue() as number | undefined
|
||||||
if (cycles == null) {
|
if (!cycles && cycles !== 0) {
|
||||||
return <div className="text-muted-foreground ms-1.5">N/A</div>
|
return <div className="text-muted-foreground ms-1.5">N/A</div>
|
||||||
}
|
}
|
||||||
return <span className="ms-1.5">{cycles.toLocaleString()}</span>
|
return <span className="ms-1.5">{cycles.toLocaleString()}</span>
|
||||||
@@ -206,11 +206,7 @@ export const columns: ColumnDef<SmartDeviceRecord>[] = [
|
|||||||
invertSorting: true,
|
invertSorting: true,
|
||||||
header: ({ column }) => <HeaderButton column={column} name={t`Temp`} Icon={ThermometerIcon} />,
|
header: ({ column }) => <HeaderButton column={column} name={t`Temp`} Icon={ThermometerIcon} />,
|
||||||
cell: ({ getValue }) => {
|
cell: ({ getValue }) => {
|
||||||
const temp = getValue() as number | null | undefined
|
const { value, unit } = formatTemperature(getValue() as number)
|
||||||
if (!temp) {
|
|
||||||
return <div className="text-muted-foreground ms-1.5">N/A</div>
|
|
||||||
}
|
|
||||||
const { value, unit } = formatTemperature(temp)
|
|
||||||
return <span className="ms-1.5">{`${value} ${unit}`}</span>
|
return <span className="ms-1.5">{`${value} ${unit}`}</span>
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
@@ -308,7 +304,7 @@ export default function DisksTable({ systemId }: { systemId?: string }) {
|
|||||||
? { fields: SMART_DEVICE_FIELDS, filter: pb.filter("system = {:system}", { system: systemId }) }
|
? { fields: SMART_DEVICE_FIELDS, filter: pb.filter("system = {:system}", { system: systemId }) }
|
||||||
: { fields: SMART_DEVICE_FIELDS }
|
: { fields: SMART_DEVICE_FIELDS }
|
||||||
|
|
||||||
;(async () => {
|
; (async () => {
|
||||||
try {
|
try {
|
||||||
unsubscribe = await pb.collection("smart_devices").subscribe(
|
unsubscribe = await pb.collection("smart_devices").subscribe(
|
||||||
"*",
|
"*",
|
||||||
|
|||||||
@@ -35,7 +35,7 @@ import {
|
|||||||
formatTemperature,
|
formatTemperature,
|
||||||
getMeterState,
|
getMeterState,
|
||||||
parseSemVer,
|
parseSemVer,
|
||||||
secondsToUptimeString,
|
secondsToString,
|
||||||
} from "@/lib/utils"
|
} from "@/lib/utils"
|
||||||
import { batteryStateTranslations } from "@/lib/i18n"
|
import { batteryStateTranslations } from "@/lib/i18n"
|
||||||
import type { SystemRecord } from "@/types"
|
import type { SystemRecord } from "@/types"
|
||||||
@@ -154,7 +154,11 @@ export function SystemsTableColumns(viewMode: "table" | "grid"): ColumnDef<Syste
|
|||||||
{name}
|
{name}
|
||||||
</Link>
|
</Link>
|
||||||
</span>
|
</span>
|
||||||
<Link href={linkUrl} className="inset-0 absolute size-full" aria-label={name}></Link>
|
<Link
|
||||||
|
href={linkUrl}
|
||||||
|
className="inset-0 absolute size-full"
|
||||||
|
aria-label={name}
|
||||||
|
></Link>
|
||||||
</>
|
</>
|
||||||
)
|
)
|
||||||
},
|
},
|
||||||
@@ -378,13 +382,20 @@ export function SystemsTableColumns(viewMode: "table" | "grid"): ColumnDef<Syste
|
|||||||
size: 50,
|
size: 50,
|
||||||
Icon: ClockArrowUp,
|
Icon: ClockArrowUp,
|
||||||
header: sortableHeader,
|
header: sortableHeader,
|
||||||
hideSort: true,
|
|
||||||
cell(info) {
|
cell(info) {
|
||||||
const uptime = info.getValue() as number
|
const uptime = info.getValue() as number
|
||||||
if (!uptime) {
|
if (!uptime) {
|
||||||
return null
|
return null
|
||||||
}
|
}
|
||||||
return <span className="tabular-nums whitespace-nowrap">{secondsToUptimeString(uptime)}</span>
|
let formatted: string
|
||||||
|
if (uptime < 3600) {
|
||||||
|
formatted = secondsToString(uptime, "minute")
|
||||||
|
} else if (uptime < 360000) {
|
||||||
|
formatted = secondsToString(uptime, "hour")
|
||||||
|
} else {
|
||||||
|
formatted = secondsToString(uptime, "day")
|
||||||
|
}
|
||||||
|
return <span className="tabular-nums whitespace-nowrap">{formatted}</span>
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -434,7 +434,7 @@ const SystemTableRow = memo(
|
|||||||
width: cell.column.getSize(),
|
width: cell.column.getSize(),
|
||||||
height: virtualRow.size,
|
height: virtualRow.size,
|
||||||
}}
|
}}
|
||||||
className="py-0 ps-4.5"
|
className="py-0"
|
||||||
>
|
>
|
||||||
{flexRender(cell.column.columnDef.cell, cell.getContext())}
|
{flexRender(cell.column.columnDef.cell, cell.getContext())}
|
||||||
</TableCell>
|
</TableCell>
|
||||||
|
|||||||
@@ -466,14 +466,3 @@ export function secondsToString(seconds: number, unit: "hour" | "minute" | "day"
|
|||||||
return plural(count, { one: `${countString} day`, other: `${countString} days` })
|
return plural(count, { one: `${countString} day`, other: `${countString} days` })
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Format seconds to uptime string - "X minutes", "X hours", "X days" */
|
|
||||||
export function secondsToUptimeString(seconds: number): string {
|
|
||||||
if (seconds < 3600) {
|
|
||||||
return secondsToString(seconds, "minute")
|
|
||||||
} else if (seconds < 360000) {
|
|
||||||
return secondsToString(seconds, "hour")
|
|
||||||
} else {
|
|
||||||
return secondsToString(seconds, "day")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -51,7 +51,7 @@ The [quick start guide](https://beszel.dev/guide/getting-started) and other docu
|
|||||||
- **GPU usage / power draw** - Nvidia, AMD, and Intel.
|
- **GPU usage / power draw** - Nvidia, AMD, and Intel.
|
||||||
- **Battery** - Host system battery charge.
|
- **Battery** - Host system battery charge.
|
||||||
- **Containers** - Status and metrics of all running Docker / Podman containers.
|
- **Containers** - Status and metrics of all running Docker / Podman containers.
|
||||||
- **S.M.A.R.T.** - Host system disk health (includes eMMC wear/EOL via Linux sysfs when available).
|
- **S.M.A.R.T.** - Host system disk health.
|
||||||
|
|
||||||
## Help and discussion
|
## Help and discussion
|
||||||
|
|
||||||
|
|||||||
@@ -1,9 +1,3 @@
|
|||||||
## Unreleased
|
|
||||||
|
|
||||||
- Add outbound heartbeat monitoring to external services (BetterStack, Uptime Kuma, Healthchecks.io, etc.) with system status summary payload. Configured via `BESZEL_HUB_HEARTBEAT_URL`, `BESZEL_HUB_HEARTBEAT_INTERVAL`, and `BESZEL_HUB_HEARTBEAT_METHOD` environment variables.
|
|
||||||
|
|
||||||
- Add Heartbeat settings page to the admin UI with status display, configuration reference, and test button.
|
|
||||||
|
|
||||||
## 0.18.3
|
## 0.18.3
|
||||||
|
|
||||||
- Add experimental sysfs AMD GPU collector. (#737, #1569)
|
- Add experimental sysfs AMD GPU collector. (#737, #1569)
|
||||||
|
|||||||
Reference in New Issue
Block a user