From cdfd116da09bbf49dfa7917b84924e1280a398ae Mon Sep 17 00:00:00 2001 From: Amir Moradi <1281163+amirhmoradi@users.noreply.github.com> Date: Tue, 17 Feb 2026 21:48:20 +0100 Subject: [PATCH] Add outbound heartbeat monitoring (#1729) * feat: add outbound heartbeat monitoring to external endpoints Allow Beszel hub to periodically ping an external monitoring service (e.g. BetterStack, Uptime Kuma, Healthchecks.io) with system status summaries, enabling monitoring without exposing Beszel to the internet. Configuration via environment variables: - BESZEL_HUB_HEARTBEAT_URL: endpoint to ping (required to enable) - BESZEL_HUB_HEARTBEAT_INTERVAL: seconds between pings (default: 60) - BESZEL_HUB_HEARTBEAT_METHOD: HTTP method - POST/GET/HEAD (default: POST) --- internal/hub/heartbeat/heartbeat.go | 280 ++++++++++++++++++ internal/hub/hub.go | 56 +++- .../components/routes/settings/heartbeat.tsx | 215 ++++++++++++++ .../src/components/routes/settings/layout.tsx | 20 +- supplemental/CHANGELOG.md | 6 + 5 files changed, 570 insertions(+), 7 deletions(-) create mode 100644 internal/hub/heartbeat/heartbeat.go create mode 100644 internal/site/src/components/routes/settings/heartbeat.tsx diff --git a/internal/hub/heartbeat/heartbeat.go b/internal/hub/heartbeat/heartbeat.go new file mode 100644 index 00000000..f577529e --- /dev/null +++ b/internal/hub/heartbeat/heartbeat.go @@ -0,0 +1,280 @@ +// Package heartbeat sends periodic outbound pings to an external monitoring +// endpoint (e.g. BetterStack, Uptime Kuma, Healthchecks.io) so operators can +// monitor Beszel without exposing it to the internet. +package heartbeat + +import ( + "bytes" + "encoding/json" + "fmt" + "net/http" + "strconv" + "strings" + "time" + + "github.com/henrygd/beszel" + "github.com/pocketbase/pocketbase/core" +) + +// Default values for heartbeat configuration. +const ( + defaultInterval = 60 // seconds + httpTimeout = 10 * time.Second +) + +// Payload is the JSON body sent with each heartbeat request. +type Payload struct { + // Status is "ok" when all non-paused systems are up, "warn" when alerts + // are triggered but no systems are down, and "error" when any system is down. + Status string `json:"status"` + Timestamp string `json:"timestamp"` + Msg string `json:"msg"` + Systems SystemsSummary `json:"systems"` + Down []SystemInfo `json:"down_systems,omitempty"` + Alerts []AlertInfo `json:"triggered_alerts,omitempty"` + Version string `json:"beszel_version"` +} + +// SystemsSummary contains counts of systems by status. +type SystemsSummary struct { + Total int `json:"total"` + Up int `json:"up"` + Down int `json:"down"` + Paused int `json:"paused"` + Pending int `json:"pending"` +} + +// SystemInfo identifies a system that is currently down. +type SystemInfo struct { + ID string `json:"id" db:"id"` + Name string `json:"name" db:"name"` + Host string `json:"host" db:"host"` +} + +// AlertInfo describes a currently triggered alert. +type AlertInfo struct { + SystemID string `json:"system_id"` + SystemName string `json:"system_name"` + AlertName string `json:"alert_name"` + Threshold float64 `json:"threshold"` +} + +// Config holds heartbeat settings read from environment variables. +type Config struct { + URL string // endpoint to ping + Interval int // seconds between pings + Method string // HTTP method (GET or POST, default POST) +} + +// Heartbeat manages the periodic outbound health check. +type Heartbeat struct { + app core.App + config Config + client *http.Client +} + +// New creates a Heartbeat if configuration is present. +// Returns nil if HEARTBEAT_URL is not set (feature disabled). +func New(app core.App, getEnv func(string) (string, bool)) *Heartbeat { + url, ok := getEnv("HEARTBEAT_URL") + if !ok || url == "" { + return nil + } + + interval := defaultInterval + if v, ok := getEnv("HEARTBEAT_INTERVAL"); ok { + if parsed, err := strconv.Atoi(v); err == nil && parsed > 0 { + interval = parsed + } + } + + method := "POST" + if v, ok := getEnv("HEARTBEAT_METHOD"); ok { + v = strings.ToUpper(strings.TrimSpace(v)) + if v == "GET" || v == "HEAD" { + method = v + } + } + + return &Heartbeat{ + app: app, + config: Config{ + URL: url, + Interval: interval, + Method: method, + }, + client: &http.Client{Timeout: httpTimeout}, + } +} + +// Start begins the heartbeat loop. It blocks and should be called in a goroutine. +// The loop runs until the provided stop channel is closed. +func (hb *Heartbeat) Start(stop <-chan struct{}) { + hb.app.Logger().Info("Heartbeat enabled", + "url", hb.config.URL, + "interval", fmt.Sprintf("%ds", hb.config.Interval), + "method", hb.config.Method, + ) + + // Send an initial heartbeat immediately on startup. + hb.send() + + ticker := time.NewTicker(time.Duration(hb.config.Interval) * time.Second) + defer ticker.Stop() + + for { + select { + case <-stop: + return + case <-ticker.C: + hb.send() + } + } +} + +// Send performs a single heartbeat ping. Exposed for the test-heartbeat API endpoint. +func (hb *Heartbeat) Send() error { + return hb.send() +} + +// GetConfig returns the current heartbeat configuration. +func (hb *Heartbeat) GetConfig() Config { + return hb.config +} + +func (hb *Heartbeat) send() error { + payload, err := hb.buildPayload() + if err != nil { + hb.app.Logger().Error("Heartbeat: failed to build payload", "err", err) + return err + } + + var req *http.Request + + if hb.config.Method == "GET" || hb.config.Method == "HEAD" { + req, err = http.NewRequest(hb.config.Method, hb.config.URL, nil) + } else { + body, jsonErr := json.Marshal(payload) + if jsonErr != nil { + hb.app.Logger().Error("Heartbeat: failed to marshal payload", "err", jsonErr) + return jsonErr + } + req, err = http.NewRequest("POST", hb.config.URL, bytes.NewReader(body)) + if err == nil { + req.Header.Set("Content-Type", "application/json") + } + } + + if err != nil { + hb.app.Logger().Error("Heartbeat: failed to create request", "err", err) + return err + } + + req.Header.Set("User-Agent", "Beszel-Heartbeat") + + resp, err := hb.client.Do(req) + if err != nil { + hb.app.Logger().Error("Heartbeat: request failed", "url", hb.config.URL, "err", err) + return err + } + defer resp.Body.Close() + + if resp.StatusCode >= 400 { + hb.app.Logger().Warn("Heartbeat: non-success response", + "url", hb.config.URL, + "status", resp.StatusCode, + ) + return fmt.Errorf("heartbeat endpoint returned status %d", resp.StatusCode) + } + + return nil +} + +func (hb *Heartbeat) buildPayload() (*Payload, error) { + db := hb.app.DB() + + // Count systems by status. + var systemCounts []struct { + Status string `db:"status"` + Count int `db:"cnt"` + } + err := db.NewQuery("SELECT status, COUNT(*) as cnt FROM systems GROUP BY status").All(&systemCounts) + if err != nil { + return nil, fmt.Errorf("query system counts: %w", err) + } + + summary := SystemsSummary{} + for _, sc := range systemCounts { + switch sc.Status { + case "up": + summary.Up = sc.Count + case "down": + summary.Down = sc.Count + case "paused": + summary.Paused = sc.Count + case "pending": + summary.Pending = sc.Count + } + summary.Total += sc.Count + } + + // Get names of down systems. + var downSystems []SystemInfo + err = db.NewQuery("SELECT id, name, host FROM systems WHERE status = 'down'").All(&downSystems) + if err != nil { + return nil, fmt.Errorf("query down systems: %w", err) + } + + // Get triggered alerts with system names. + var triggeredAlerts []struct { + SystemID string `db:"system"` + SystemName string `db:"system_name"` + AlertName string `db:"name"` + Value float64 `db:"value"` + } + err = db.NewQuery(` + SELECT a.system, s.name as system_name, a.name, a.value + FROM alerts a + JOIN systems s ON a.system = s.id + WHERE a.triggered = true + `).All(&triggeredAlerts) + if err != nil { + // Non-fatal: alerts info is supplementary. + triggeredAlerts = nil + } + + alerts := make([]AlertInfo, 0, len(triggeredAlerts)) + for _, ta := range triggeredAlerts { + alerts = append(alerts, AlertInfo{ + SystemID: ta.SystemID, + SystemName: ta.SystemName, + AlertName: ta.AlertName, + Threshold: ta.Value, + }) + } + + // Determine overall status. + status := "ok" + msg := "All systems operational" + if summary.Down > 0 { + status = "error" + names := make([]string, len(downSystems)) + for i, ds := range downSystems { + names[i] = ds.Name + } + msg = fmt.Sprintf("%d system(s) down: %s", summary.Down, strings.Join(names, ", ")) + } else if len(alerts) > 0 { + status = "warn" + msg = fmt.Sprintf("%d alert(s) triggered", len(alerts)) + } + + return &Payload{ + Status: status, + Timestamp: time.Now().UTC().Format(time.RFC3339), + Msg: msg, + Systems: summary, + Down: downSystems, + Alerts: alerts, + Version: beszel.Version, + }, nil +} diff --git a/internal/hub/hub.go b/internal/hub/hub.go index 82b21f44..70d96d93 100644 --- a/internal/hub/hub.go +++ b/internal/hub/hub.go @@ -15,6 +15,7 @@ import ( "github.com/henrygd/beszel" "github.com/henrygd/beszel/internal/alerts" "github.com/henrygd/beszel/internal/hub/config" + "github.com/henrygd/beszel/internal/hub/heartbeat" "github.com/henrygd/beszel/internal/hub/systems" "github.com/henrygd/beszel/internal/records" "github.com/henrygd/beszel/internal/users" @@ -30,12 +31,14 @@ import ( type Hub struct { core.App *alerts.AlertManager - um *users.UserManager - rm *records.RecordManager - sm *systems.SystemManager - pubKey string - signer ssh.Signer - appURL string + um *users.UserManager + rm *records.RecordManager + sm *systems.SystemManager + hb *heartbeat.Heartbeat + hbStop chan struct{} + pubKey string + signer ssh.Signer + appURL string } // NewHub creates a new Hub instance with default configuration @@ -48,6 +51,10 @@ func NewHub(app core.App) *Hub { hub.rm = records.NewRecordManager(hub) hub.sm = systems.NewSystemManager(hub) hub.appURL, _ = GetEnv("APP_URL") + hub.hb = heartbeat.New(app, GetEnv) + if hub.hb != nil { + hub.hbStop = make(chan struct{}) + } return hub } @@ -88,6 +95,10 @@ func (h *Hub) StartHub() error { if err := h.sm.Initialize(); err != nil { return err } + // start heartbeat if configured + if h.hb != nil { + go h.hb.Start(h.hbStop) + } return e.Next() }) @@ -287,6 +298,9 @@ func (h *Hub) registerApiRoutes(se *core.ServeEvent) error { }) // send test notification apiAuth.POST("/test-notification", h.SendTestNotification) + // heartbeat status and test + apiAuth.GET("/heartbeat-status", h.getHeartbeatStatus) + apiAuth.POST("/test-heartbeat", h.testHeartbeat) // get config.yml content apiAuth.GET("/config-yaml", config.GetYamlConfig) // handle agent websocket connection @@ -403,6 +417,36 @@ func (h *Hub) getUniversalToken(e *core.RequestEvent) error { return e.JSON(http.StatusOK, response) } +// getHeartbeatStatus returns current heartbeat configuration and whether it's enabled +func (h *Hub) getHeartbeatStatus(e *core.RequestEvent) error { + if h.hb == nil { + return e.JSON(http.StatusOK, map[string]any{ + "enabled": false, + "msg": "Set BESZEL_HUB_HEARTBEAT_URL to enable outbound heartbeat monitoring", + }) + } + cfg := h.hb.GetConfig() + return e.JSON(http.StatusOK, map[string]any{ + "enabled": true, + "url": cfg.URL, + "interval": cfg.Interval, + "method": cfg.Method, + }) +} + +// testHeartbeat triggers a single heartbeat ping and returns the result +func (h *Hub) testHeartbeat(e *core.RequestEvent) error { + if h.hb == nil { + return e.JSON(http.StatusOK, map[string]any{ + "err": "Heartbeat not configured. Set BESZEL_HUB_HEARTBEAT_URL environment variable.", + }) + } + if err := h.hb.Send(); err != nil { + return e.JSON(http.StatusOK, map[string]any{"err": err.Error()}) + } + return e.JSON(http.StatusOK, map[string]any{"err": false}) +} + // containerRequestHandler handles both container logs and info requests func (h *Hub) containerRequestHandler(e *core.RequestEvent, fetchFunc func(*systems.System, string) (string, error), responseKey string) error { systemID := e.Request.URL.Query().Get("system") diff --git a/internal/site/src/components/routes/settings/heartbeat.tsx b/internal/site/src/components/routes/settings/heartbeat.tsx new file mode 100644 index 00000000..00fc5af1 --- /dev/null +++ b/internal/site/src/components/routes/settings/heartbeat.tsx @@ -0,0 +1,215 @@ +import { t } from "@lingui/core/macro" +import { Trans } from "@lingui/react/macro" +import { redirectPage } from "@nanostores/router" +import clsx from "clsx" +import { LoaderCircleIcon, SendIcon } from "lucide-react" +import { useEffect, useState } from "react" +import { $router } from "@/components/router" +import { Badge } from "@/components/ui/badge" +import { Button } from "@/components/ui/button" +import { Separator } from "@/components/ui/separator" +import { toast } from "@/components/ui/use-toast" +import { isAdmin, pb } from "@/lib/api" + +interface HeartbeatStatus { + enabled: boolean + url?: string + interval?: number + method?: string + msg?: string +} + +export default function HeartbeatSettings() { + const [status, setStatus] = useState(null) + const [isLoading, setIsLoading] = useState(true) + const [isTesting, setIsTesting] = useState(false) + + if (!isAdmin()) { + redirectPage($router, "settings", { name: "general" }) + } + + useEffect(() => { + fetchStatus() + }, []) + + async function fetchStatus() { + try { + setIsLoading(true) + const res = await pb.send("/api/beszel/heartbeat-status", {}) + setStatus(res) + } catch (error: any) { + toast({ + title: t`Error`, + description: error.message, + variant: "destructive", + }) + } finally { + setIsLoading(false) + } + } + + async function sendTestHeartbeat() { + setIsTesting(true) + try { + const res = await pb.send<{ err: string | false }>("/api/beszel/test-heartbeat", { + method: "POST", + }) + if ("err" in res && !res.err) { + toast({ + title: t`Heartbeat sent successfully`, + description: t`Check your monitoring service`, + }) + } else { + toast({ + title: t`Error`, + description: (res.err as string) ?? t`Failed to send heartbeat`, + variant: "destructive", + }) + } + } catch (error: any) { + toast({ + title: t`Error`, + description: error.message, + variant: "destructive", + }) + } finally { + setIsTesting(false) + } + } + + const TestIcon = isTesting ? LoaderCircleIcon : SendIcon + + return ( +
+
+

+ Heartbeat Monitoring +

+

+ + Send periodic outbound pings to an external monitoring service so you can monitor Beszel without exposing it + to the internet. + +

+
+ + + {isLoading ? ( +
+ + Loading heartbeat status... +
+ ) : status?.enabled ? ( +
+
+ + Active + +
+
+ + + +
+ + + +
+

+ Test heartbeat +

+

+ Send a single heartbeat ping to verify your endpoint is working. +

+ +
+ + + +
+

+ Payload format +

+

+ + When using POST, each heartbeat includes a JSON payload with system status summary, list of down + systems, and triggered alerts. + +

+

+ + The overall status is ok when all systems + are up, warn when alerts are triggered, + and error when any system is down. + +

+
+
+ ) : ( +
+

+ Heartbeat monitoring is not configured. +

+
+

+ Configuration +

+

+ Set the following environment variables on your Beszel hub to enable heartbeat monitoring: +

+
+ + + +
+
+

+ After setting the environment variables, restart your Beszel hub for changes to take effect. +

+
+ )} +
+ ) +} + +function ConfigItem({ label, value, mono }: { label: string; value: string; mono?: boolean }) { + return ( +
+

{label}

+

{value}

+
+ ) +} + +function EnvVarItem({ name, description, example }: { name: string; description: string; example: string }) { + return ( +
+ {name} +

{description}

+

+ Example: {example} +

+
+ ) +} diff --git a/internal/site/src/components/routes/settings/layout.tsx b/internal/site/src/components/routes/settings/layout.tsx index e1ea4b85..33488e2d 100644 --- a/internal/site/src/components/routes/settings/layout.tsx +++ b/internal/site/src/components/routes/settings/layout.tsx @@ -2,7 +2,14 @@ import { t } from "@lingui/core/macro" import { Trans, useLingui } from "@lingui/react/macro" import { useStore } from "@nanostores/react" import { getPagePath, redirectPage } from "@nanostores/router" -import { AlertOctagonIcon, BellIcon, FileSlidersIcon, FingerprintIcon, SettingsIcon } from "lucide-react" +import { + AlertOctagonIcon, + BellIcon, + FileSlidersIcon, + FingerprintIcon, + HeartPulseIcon, + SettingsIcon, +} from "lucide-react" import { lazy, useEffect } from "react" import { $router } from "@/components/router.tsx" import { Card, CardContent, CardDescription, CardHeader, CardTitle } from "@/components/ui/card.tsx" @@ -18,12 +25,14 @@ const notificationsSettingsImport = () => import("./notifications.tsx") const configYamlSettingsImport = () => import("./config-yaml.tsx") const fingerprintsSettingsImport = () => import("./tokens-fingerprints.tsx") const alertsHistoryDataTableSettingsImport = () => import("./alerts-history-data-table.tsx") +const heartbeatSettingsImport = () => import("./heartbeat.tsx") const GeneralSettings = lazy(generalSettingsImport) const NotificationsSettings = lazy(notificationsSettingsImport) const ConfigYamlSettings = lazy(configYamlSettingsImport) const FingerprintsSettings = lazy(fingerprintsSettingsImport) const AlertsHistoryDataTableSettings = lazy(alertsHistoryDataTableSettingsImport) +const HeartbeatSettings = lazy(heartbeatSettingsImport) export async function saveSettings(newSettings: Partial) { try { @@ -88,6 +97,13 @@ export default function SettingsLayout() { admin: true, preload: configYamlSettingsImport, }, + { + title: t`Heartbeat`, + href: getPagePath($router, "settings", { name: "heartbeat" }), + icon: HeartPulseIcon, + admin: true, + preload: heartbeatSettingsImport, + }, ] const page = useStore($router) @@ -141,5 +157,7 @@ function SettingsContent({ name }: { name: string }) { return case "alert-history": return + case "heartbeat": + return } } diff --git a/supplemental/CHANGELOG.md b/supplemental/CHANGELOG.md index ef1c8ab7..f04ffc1b 100644 --- a/supplemental/CHANGELOG.md +++ b/supplemental/CHANGELOG.md @@ -1,3 +1,9 @@ +## Unreleased + +- Add outbound heartbeat monitoring to external services (BetterStack, Uptime Kuma, Healthchecks.io, etc.) with system status summary payload. Configured via `BESZEL_HUB_HEARTBEAT_URL`, `BESZEL_HUB_HEARTBEAT_INTERVAL`, and `BESZEL_HUB_HEARTBEAT_METHOD` environment variables. + +- Add Heartbeat settings page to the admin UI with status display, configuration reference, and test button. + ## 0.18.3 - Add experimental sysfs AMD GPU collector. (#737, #1569)