diff --git a/internal/hub/heartbeat/heartbeat.go b/internal/hub/heartbeat/heartbeat.go new file mode 100644 index 00000000..f577529e --- /dev/null +++ b/internal/hub/heartbeat/heartbeat.go @@ -0,0 +1,280 @@ +// Package heartbeat sends periodic outbound pings to an external monitoring +// endpoint (e.g. BetterStack, Uptime Kuma, Healthchecks.io) so operators can +// monitor Beszel without exposing it to the internet. +package heartbeat + +import ( + "bytes" + "encoding/json" + "fmt" + "net/http" + "strconv" + "strings" + "time" + + "github.com/henrygd/beszel" + "github.com/pocketbase/pocketbase/core" +) + +// Default values for heartbeat configuration. +const ( + defaultInterval = 60 // seconds + httpTimeout = 10 * time.Second +) + +// Payload is the JSON body sent with each heartbeat request. +type Payload struct { + // Status is "ok" when all non-paused systems are up, "warn" when alerts + // are triggered but no systems are down, and "error" when any system is down. + Status string `json:"status"` + Timestamp string `json:"timestamp"` + Msg string `json:"msg"` + Systems SystemsSummary `json:"systems"` + Down []SystemInfo `json:"down_systems,omitempty"` + Alerts []AlertInfo `json:"triggered_alerts,omitempty"` + Version string `json:"beszel_version"` +} + +// SystemsSummary contains counts of systems by status. +type SystemsSummary struct { + Total int `json:"total"` + Up int `json:"up"` + Down int `json:"down"` + Paused int `json:"paused"` + Pending int `json:"pending"` +} + +// SystemInfo identifies a system that is currently down. +type SystemInfo struct { + ID string `json:"id" db:"id"` + Name string `json:"name" db:"name"` + Host string `json:"host" db:"host"` +} + +// AlertInfo describes a currently triggered alert. +type AlertInfo struct { + SystemID string `json:"system_id"` + SystemName string `json:"system_name"` + AlertName string `json:"alert_name"` + Threshold float64 `json:"threshold"` +} + +// Config holds heartbeat settings read from environment variables. +type Config struct { + URL string // endpoint to ping + Interval int // seconds between pings + Method string // HTTP method (GET or POST, default POST) +} + +// Heartbeat manages the periodic outbound health check. +type Heartbeat struct { + app core.App + config Config + client *http.Client +} + +// New creates a Heartbeat if configuration is present. +// Returns nil if HEARTBEAT_URL is not set (feature disabled). +func New(app core.App, getEnv func(string) (string, bool)) *Heartbeat { + url, ok := getEnv("HEARTBEAT_URL") + if !ok || url == "" { + return nil + } + + interval := defaultInterval + if v, ok := getEnv("HEARTBEAT_INTERVAL"); ok { + if parsed, err := strconv.Atoi(v); err == nil && parsed > 0 { + interval = parsed + } + } + + method := "POST" + if v, ok := getEnv("HEARTBEAT_METHOD"); ok { + v = strings.ToUpper(strings.TrimSpace(v)) + if v == "GET" || v == "HEAD" { + method = v + } + } + + return &Heartbeat{ + app: app, + config: Config{ + URL: url, + Interval: interval, + Method: method, + }, + client: &http.Client{Timeout: httpTimeout}, + } +} + +// Start begins the heartbeat loop. It blocks and should be called in a goroutine. +// The loop runs until the provided stop channel is closed. +func (hb *Heartbeat) Start(stop <-chan struct{}) { + hb.app.Logger().Info("Heartbeat enabled", + "url", hb.config.URL, + "interval", fmt.Sprintf("%ds", hb.config.Interval), + "method", hb.config.Method, + ) + + // Send an initial heartbeat immediately on startup. + hb.send() + + ticker := time.NewTicker(time.Duration(hb.config.Interval) * time.Second) + defer ticker.Stop() + + for { + select { + case <-stop: + return + case <-ticker.C: + hb.send() + } + } +} + +// Send performs a single heartbeat ping. Exposed for the test-heartbeat API endpoint. +func (hb *Heartbeat) Send() error { + return hb.send() +} + +// GetConfig returns the current heartbeat configuration. +func (hb *Heartbeat) GetConfig() Config { + return hb.config +} + +func (hb *Heartbeat) send() error { + payload, err := hb.buildPayload() + if err != nil { + hb.app.Logger().Error("Heartbeat: failed to build payload", "err", err) + return err + } + + var req *http.Request + + if hb.config.Method == "GET" || hb.config.Method == "HEAD" { + req, err = http.NewRequest(hb.config.Method, hb.config.URL, nil) + } else { + body, jsonErr := json.Marshal(payload) + if jsonErr != nil { + hb.app.Logger().Error("Heartbeat: failed to marshal payload", "err", jsonErr) + return jsonErr + } + req, err = http.NewRequest("POST", hb.config.URL, bytes.NewReader(body)) + if err == nil { + req.Header.Set("Content-Type", "application/json") + } + } + + if err != nil { + hb.app.Logger().Error("Heartbeat: failed to create request", "err", err) + return err + } + + req.Header.Set("User-Agent", "Beszel-Heartbeat") + + resp, err := hb.client.Do(req) + if err != nil { + hb.app.Logger().Error("Heartbeat: request failed", "url", hb.config.URL, "err", err) + return err + } + defer resp.Body.Close() + + if resp.StatusCode >= 400 { + hb.app.Logger().Warn("Heartbeat: non-success response", + "url", hb.config.URL, + "status", resp.StatusCode, + ) + return fmt.Errorf("heartbeat endpoint returned status %d", resp.StatusCode) + } + + return nil +} + +func (hb *Heartbeat) buildPayload() (*Payload, error) { + db := hb.app.DB() + + // Count systems by status. + var systemCounts []struct { + Status string `db:"status"` + Count int `db:"cnt"` + } + err := db.NewQuery("SELECT status, COUNT(*) as cnt FROM systems GROUP BY status").All(&systemCounts) + if err != nil { + return nil, fmt.Errorf("query system counts: %w", err) + } + + summary := SystemsSummary{} + for _, sc := range systemCounts { + switch sc.Status { + case "up": + summary.Up = sc.Count + case "down": + summary.Down = sc.Count + case "paused": + summary.Paused = sc.Count + case "pending": + summary.Pending = sc.Count + } + summary.Total += sc.Count + } + + // Get names of down systems. + var downSystems []SystemInfo + err = db.NewQuery("SELECT id, name, host FROM systems WHERE status = 'down'").All(&downSystems) + if err != nil { + return nil, fmt.Errorf("query down systems: %w", err) + } + + // Get triggered alerts with system names. + var triggeredAlerts []struct { + SystemID string `db:"system"` + SystemName string `db:"system_name"` + AlertName string `db:"name"` + Value float64 `db:"value"` + } + err = db.NewQuery(` + SELECT a.system, s.name as system_name, a.name, a.value + FROM alerts a + JOIN systems s ON a.system = s.id + WHERE a.triggered = true + `).All(&triggeredAlerts) + if err != nil { + // Non-fatal: alerts info is supplementary. + triggeredAlerts = nil + } + + alerts := make([]AlertInfo, 0, len(triggeredAlerts)) + for _, ta := range triggeredAlerts { + alerts = append(alerts, AlertInfo{ + SystemID: ta.SystemID, + SystemName: ta.SystemName, + AlertName: ta.AlertName, + Threshold: ta.Value, + }) + } + + // Determine overall status. + status := "ok" + msg := "All systems operational" + if summary.Down > 0 { + status = "error" + names := make([]string, len(downSystems)) + for i, ds := range downSystems { + names[i] = ds.Name + } + msg = fmt.Sprintf("%d system(s) down: %s", summary.Down, strings.Join(names, ", ")) + } else if len(alerts) > 0 { + status = "warn" + msg = fmt.Sprintf("%d alert(s) triggered", len(alerts)) + } + + return &Payload{ + Status: status, + Timestamp: time.Now().UTC().Format(time.RFC3339), + Msg: msg, + Systems: summary, + Down: downSystems, + Alerts: alerts, + Version: beszel.Version, + }, nil +} diff --git a/internal/hub/hub.go b/internal/hub/hub.go index 82b21f44..70d96d93 100644 --- a/internal/hub/hub.go +++ b/internal/hub/hub.go @@ -15,6 +15,7 @@ import ( "github.com/henrygd/beszel" "github.com/henrygd/beszel/internal/alerts" "github.com/henrygd/beszel/internal/hub/config" + "github.com/henrygd/beszel/internal/hub/heartbeat" "github.com/henrygd/beszel/internal/hub/systems" "github.com/henrygd/beszel/internal/records" "github.com/henrygd/beszel/internal/users" @@ -30,12 +31,14 @@ import ( type Hub struct { core.App *alerts.AlertManager - um *users.UserManager - rm *records.RecordManager - sm *systems.SystemManager - pubKey string - signer ssh.Signer - appURL string + um *users.UserManager + rm *records.RecordManager + sm *systems.SystemManager + hb *heartbeat.Heartbeat + hbStop chan struct{} + pubKey string + signer ssh.Signer + appURL string } // NewHub creates a new Hub instance with default configuration @@ -48,6 +51,10 @@ func NewHub(app core.App) *Hub { hub.rm = records.NewRecordManager(hub) hub.sm = systems.NewSystemManager(hub) hub.appURL, _ = GetEnv("APP_URL") + hub.hb = heartbeat.New(app, GetEnv) + if hub.hb != nil { + hub.hbStop = make(chan struct{}) + } return hub } @@ -88,6 +95,10 @@ func (h *Hub) StartHub() error { if err := h.sm.Initialize(); err != nil { return err } + // start heartbeat if configured + if h.hb != nil { + go h.hb.Start(h.hbStop) + } return e.Next() }) @@ -287,6 +298,9 @@ func (h *Hub) registerApiRoutes(se *core.ServeEvent) error { }) // send test notification apiAuth.POST("/test-notification", h.SendTestNotification) + // heartbeat status and test + apiAuth.GET("/heartbeat-status", h.getHeartbeatStatus) + apiAuth.POST("/test-heartbeat", h.testHeartbeat) // get config.yml content apiAuth.GET("/config-yaml", config.GetYamlConfig) // handle agent websocket connection @@ -403,6 +417,36 @@ func (h *Hub) getUniversalToken(e *core.RequestEvent) error { return e.JSON(http.StatusOK, response) } +// getHeartbeatStatus returns current heartbeat configuration and whether it's enabled +func (h *Hub) getHeartbeatStatus(e *core.RequestEvent) error { + if h.hb == nil { + return e.JSON(http.StatusOK, map[string]any{ + "enabled": false, + "msg": "Set BESZEL_HUB_HEARTBEAT_URL to enable outbound heartbeat monitoring", + }) + } + cfg := h.hb.GetConfig() + return e.JSON(http.StatusOK, map[string]any{ + "enabled": true, + "url": cfg.URL, + "interval": cfg.Interval, + "method": cfg.Method, + }) +} + +// testHeartbeat triggers a single heartbeat ping and returns the result +func (h *Hub) testHeartbeat(e *core.RequestEvent) error { + if h.hb == nil { + return e.JSON(http.StatusOK, map[string]any{ + "err": "Heartbeat not configured. Set BESZEL_HUB_HEARTBEAT_URL environment variable.", + }) + } + if err := h.hb.Send(); err != nil { + return e.JSON(http.StatusOK, map[string]any{"err": err.Error()}) + } + return e.JSON(http.StatusOK, map[string]any{"err": false}) +} + // containerRequestHandler handles both container logs and info requests func (h *Hub) containerRequestHandler(e *core.RequestEvent, fetchFunc func(*systems.System, string) (string, error), responseKey string) error { systemID := e.Request.URL.Query().Get("system") diff --git a/internal/site/src/components/routes/settings/heartbeat.tsx b/internal/site/src/components/routes/settings/heartbeat.tsx new file mode 100644 index 00000000..00fc5af1 --- /dev/null +++ b/internal/site/src/components/routes/settings/heartbeat.tsx @@ -0,0 +1,215 @@ +import { t } from "@lingui/core/macro" +import { Trans } from "@lingui/react/macro" +import { redirectPage } from "@nanostores/router" +import clsx from "clsx" +import { LoaderCircleIcon, SendIcon } from "lucide-react" +import { useEffect, useState } from "react" +import { $router } from "@/components/router" +import { Badge } from "@/components/ui/badge" +import { Button } from "@/components/ui/button" +import { Separator } from "@/components/ui/separator" +import { toast } from "@/components/ui/use-toast" +import { isAdmin, pb } from "@/lib/api" + +interface HeartbeatStatus { + enabled: boolean + url?: string + interval?: number + method?: string + msg?: string +} + +export default function HeartbeatSettings() { + const [status, setStatus] = useState(null) + const [isLoading, setIsLoading] = useState(true) + const [isTesting, setIsTesting] = useState(false) + + if (!isAdmin()) { + redirectPage($router, "settings", { name: "general" }) + } + + useEffect(() => { + fetchStatus() + }, []) + + async function fetchStatus() { + try { + setIsLoading(true) + const res = await pb.send("/api/beszel/heartbeat-status", {}) + setStatus(res) + } catch (error: any) { + toast({ + title: t`Error`, + description: error.message, + variant: "destructive", + }) + } finally { + setIsLoading(false) + } + } + + async function sendTestHeartbeat() { + setIsTesting(true) + try { + const res = await pb.send<{ err: string | false }>("/api/beszel/test-heartbeat", { + method: "POST", + }) + if ("err" in res && !res.err) { + toast({ + title: t`Heartbeat sent successfully`, + description: t`Check your monitoring service`, + }) + } else { + toast({ + title: t`Error`, + description: (res.err as string) ?? t`Failed to send heartbeat`, + variant: "destructive", + }) + } + } catch (error: any) { + toast({ + title: t`Error`, + description: error.message, + variant: "destructive", + }) + } finally { + setIsTesting(false) + } + } + + const TestIcon = isTesting ? LoaderCircleIcon : SendIcon + + return ( +
+
+

+ Heartbeat Monitoring +

+

+ + Send periodic outbound pings to an external monitoring service so you can monitor Beszel without exposing it + to the internet. + +

+
+ + + {isLoading ? ( +
+ + Loading heartbeat status... +
+ ) : status?.enabled ? ( +
+
+ + Active + +
+
+ + + +
+ + + +
+

+ Test heartbeat +

+

+ Send a single heartbeat ping to verify your endpoint is working. +

+ +
+ + + +
+

+ Payload format +

+

+ + When using POST, each heartbeat includes a JSON payload with system status summary, list of down + systems, and triggered alerts. + +

+

+ + The overall status is ok when all systems + are up, warn when alerts are triggered, + and error when any system is down. + +

+
+
+ ) : ( +
+

+ Heartbeat monitoring is not configured. +

+
+

+ Configuration +

+

+ Set the following environment variables on your Beszel hub to enable heartbeat monitoring: +

+
+ + + +
+
+

+ After setting the environment variables, restart your Beszel hub for changes to take effect. +

+
+ )} +
+ ) +} + +function ConfigItem({ label, value, mono }: { label: string; value: string; mono?: boolean }) { + return ( +
+

{label}

+

{value}

+
+ ) +} + +function EnvVarItem({ name, description, example }: { name: string; description: string; example: string }) { + return ( +
+ {name} +

{description}

+

+ Example: {example} +

+
+ ) +} diff --git a/internal/site/src/components/routes/settings/layout.tsx b/internal/site/src/components/routes/settings/layout.tsx index e1ea4b85..33488e2d 100644 --- a/internal/site/src/components/routes/settings/layout.tsx +++ b/internal/site/src/components/routes/settings/layout.tsx @@ -2,7 +2,14 @@ import { t } from "@lingui/core/macro" import { Trans, useLingui } from "@lingui/react/macro" import { useStore } from "@nanostores/react" import { getPagePath, redirectPage } from "@nanostores/router" -import { AlertOctagonIcon, BellIcon, FileSlidersIcon, FingerprintIcon, SettingsIcon } from "lucide-react" +import { + AlertOctagonIcon, + BellIcon, + FileSlidersIcon, + FingerprintIcon, + HeartPulseIcon, + SettingsIcon, +} from "lucide-react" import { lazy, useEffect } from "react" import { $router } from "@/components/router.tsx" import { Card, CardContent, CardDescription, CardHeader, CardTitle } from "@/components/ui/card.tsx" @@ -18,12 +25,14 @@ const notificationsSettingsImport = () => import("./notifications.tsx") const configYamlSettingsImport = () => import("./config-yaml.tsx") const fingerprintsSettingsImport = () => import("./tokens-fingerprints.tsx") const alertsHistoryDataTableSettingsImport = () => import("./alerts-history-data-table.tsx") +const heartbeatSettingsImport = () => import("./heartbeat.tsx") const GeneralSettings = lazy(generalSettingsImport) const NotificationsSettings = lazy(notificationsSettingsImport) const ConfigYamlSettings = lazy(configYamlSettingsImport) const FingerprintsSettings = lazy(fingerprintsSettingsImport) const AlertsHistoryDataTableSettings = lazy(alertsHistoryDataTableSettingsImport) +const HeartbeatSettings = lazy(heartbeatSettingsImport) export async function saveSettings(newSettings: Partial) { try { @@ -88,6 +97,13 @@ export default function SettingsLayout() { admin: true, preload: configYamlSettingsImport, }, + { + title: t`Heartbeat`, + href: getPagePath($router, "settings", { name: "heartbeat" }), + icon: HeartPulseIcon, + admin: true, + preload: heartbeatSettingsImport, + }, ] const page = useStore($router) @@ -141,5 +157,7 @@ function SettingsContent({ name }: { name: string }) { return case "alert-history": return + case "heartbeat": + return } } diff --git a/supplemental/CHANGELOG.md b/supplemental/CHANGELOG.md index ef1c8ab7..f04ffc1b 100644 --- a/supplemental/CHANGELOG.md +++ b/supplemental/CHANGELOG.md @@ -1,3 +1,9 @@ +## Unreleased + +- Add outbound heartbeat monitoring to external services (BetterStack, Uptime Kuma, Healthchecks.io, etc.) with system status summary payload. Configured via `BESZEL_HUB_HEARTBEAT_URL`, `BESZEL_HUB_HEARTBEAT_INTERVAL`, and `BESZEL_HUB_HEARTBEAT_METHOD` environment variables. + +- Add Heartbeat settings page to the admin UI with status display, configuration reference, and test button. + ## 0.18.3 - Add experimental sysfs AMD GPU collector. (#737, #1569)