diff --git a/agent/pve.go b/agent/pve.go
index bc4b1212..7ae00c95 100644
--- a/agent/pve.go
+++ b/agent/pve.go
@@ -18,9 +18,11 @@ type pveManager struct {
nodeName string // Cluster node name
cpuCount int // CPU count on node
nodeStatsMap map[string]*container.PveNodeStats // Keeps track of pve node stats
+ lastInitTry time.Time // Last time node initialization was attempted
}
-// Creates a new PVE manager - may return nil if required environment variables are not set or if there is an error connecting to the API
+// newPVEManager creates a new PVE manager - may return nil if required environment variables
+// are not set or if there is an error connecting to the API
func newPVEManager() *pveManager {
url, exists := GetEnv("PROXMOX_URL")
if !exists {
@@ -63,22 +65,41 @@ func newPVEManager() *pveManager {
nodeStatsMap: make(map[string]*container.PveNodeStats),
}
- // Retrieve node cpu count
- node, err := client.Node(context.Background(), nodeName)
- if err != nil {
- slog.Error("Error connecting to Proxmox", "err", err)
- return nil
- } else {
- pveManager.cpuCount = node.CPUInfo.CPUs
- }
-
return &pveManager
}
-// Returns stats for all running VMs/LXCs
-func (pm *pveManager) getPVEStats() ([]*container.PveNodeStats, error) {
+// ensureInitialized checks if the PVE manager is initialized and attempts to initialize it if not.
+// It returns an error if initialization fails or if a retry is pending.
+func (pm *pveManager) ensureInitialized(ctx context.Context) error {
if pm.client == nil {
- return nil, errors.New("PVE client not configured")
+ return errors.New("PVE client not configured")
+ }
+ if pm.cpuCount > 0 {
+ return nil
+ }
+
+ if time.Since(pm.lastInitTry) < 30*time.Second {
+ return errors.New("PVE initialization retry pending")
+ }
+ pm.lastInitTry = time.Now()
+
+ node, err := pm.client.Node(ctx, pm.nodeName)
+ if err != nil {
+ return err
+ }
+ if node.CPUInfo.CPUs <= 0 {
+ return errors.New("node returned zero CPUs")
+ }
+
+ pm.cpuCount = node.CPUInfo.CPUs
+ return nil
+}
+
+// getPVEStats returns stats for all running VMs/LXCs
+func (pm *pveManager) getPVEStats() ([]*container.PveNodeStats, error) {
+ if err := pm.ensureInitialized(context.Background()); err != nil {
+ slog.Warn("Proxmox API unavailable", "err", err)
+ return nil, err
}
cluster, err := pm.client.Cluster(context.Background())
if err != nil {
diff --git a/agent/pve_test.go b/agent/pve_test.go
new file mode 100644
index 00000000..0fae078c
--- /dev/null
+++ b/agent/pve_test.go
@@ -0,0 +1,92 @@
+package agent
+
+import (
+ "errors"
+ "fmt"
+ "net/http"
+ "net/http/httptest"
+ "sync/atomic"
+ "testing"
+ "time"
+
+ "github.com/henrygd/beszel/internal/entities/container"
+ "github.com/luthermonson/go-proxmox"
+ "github.com/stretchr/testify/assert"
+ "github.com/stretchr/testify/require"
+)
+
+func TestNewPVEManagerDoesNotConnectAtStartup(t *testing.T) {
+ t.Setenv("BESZEL_AGENT_PROXMOX_URL", "https://127.0.0.1:1/api2/json")
+ t.Setenv("BESZEL_AGENT_PROXMOX_NODE", "pve")
+ t.Setenv("BESZEL_AGENT_PROXMOX_TOKENID", "root@pam!test")
+ t.Setenv("BESZEL_AGENT_PROXMOX_SECRET", "secret")
+
+ pm := newPVEManager()
+ require.NotNil(t, pm)
+ assert.Zero(t, pm.cpuCount)
+}
+
+func TestPVEManagerRetriesInitialization(t *testing.T) {
+ var nodeRequests atomic.Int32
+ var clusterRequests atomic.Int32
+
+ server := httptest.NewTLSServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+ switch r.URL.Path {
+ case "/api2/json/nodes/pve/status":
+ nodeRequests.Add(1)
+ fmt.Fprint(w, `{"data":{"cpuinfo":{"cpus":8}}}`)
+ case "/api2/json/cluster/status":
+ fmt.Fprint(w, `{"data":[{"type":"cluster","name":"test-cluster","id":"test-cluster","version":1,"quorate":1}]}`)
+ case "/api2/json/cluster/resources":
+ clusterRequests.Add(1)
+ fmt.Fprint(w, `{"data":[{"id":"qemu/101","type":"qemu","node":"pve","status":"running","name":"vm-101","cpu":0.5,"maxcpu":4,"maxmem":4096,"mem":2048,"netin":1024,"netout":2048,"diskread":10,"diskwrite":20,"maxdisk":8192,"uptime":60}]}`)
+ default:
+ t.Fatalf("unexpected path: %s", r.URL.Path)
+ }
+ }))
+ defer server.Close()
+
+ pm := &pveManager{
+ client: proxmox.NewClient(server.URL+"/api2/json",
+ proxmox.WithHTTPClient(&http.Client{
+ Transport: &failOnceRoundTripper{
+ base: server.Client().Transport,
+ },
+ }),
+ proxmox.WithAPIToken("root@pam!test", "secret"),
+ ),
+ nodeName: "pve",
+ nodeStatsMap: make(map[string]*container.PveNodeStats),
+ }
+
+ stats, err := pm.getPVEStats()
+ require.Error(t, err)
+ assert.Nil(t, stats)
+ assert.Zero(t, pm.cpuCount)
+
+ pm.lastInitTry = time.Now().Add(-31 * time.Second)
+ stats, err = pm.getPVEStats()
+ require.NoError(t, err)
+ require.Len(t, stats, 1)
+ assert.Equal(t, int32(1), nodeRequests.Load())
+ assert.Equal(t, int32(1), clusterRequests.Load())
+ assert.Equal(t, 8, pm.cpuCount)
+ assert.Equal(t, "qemu/101", stats[0].Id)
+ assert.Equal(t, 25.0, stats[0].Cpu)
+ assert.Equal(t, uint64(1024), stats[0].NetIn)
+ assert.Equal(t, uint64(2048), stats[0].NetOut)
+}
+
+type failOnceRoundTripper struct {
+ base http.RoundTripper
+ failed atomic.Bool
+}
+
+func (rt *failOnceRoundTripper) RoundTrip(req *http.Request) (*http.Response, error) {
+ if req.URL.Path == "/api2/json/nodes/pve/status" && !rt.failed.Swap(true) {
+ return nil, errors.New("dial tcp 127.0.0.1:8006: connect: connection refused")
+ }
+ return rt.base.RoundTrip(req)
+}
+
+var _ http.RoundTripper = (*failOnceRoundTripper)(nil)
diff --git a/internal/site/src/components/routes/system.tsx b/internal/site/src/components/routes/system.tsx
index 5c00b8c5..bd5730b7 100644
--- a/internal/site/src/components/routes/system.tsx
+++ b/internal/site/src/components/routes/system.tsx
@@ -952,6 +952,8 @@ export default memo(function SystemDetail({ id }: { id: string }) {