Add outbound heartbeat monitoring (#1729)

* feat: add outbound heartbeat monitoring to external endpoints

Allow Beszel hub to periodically ping an external monitoring service
(e.g. BetterStack, Uptime Kuma, Healthchecks.io) with system status
summaries, enabling monitoring without exposing Beszel to the internet.

Configuration via environment variables:
- BESZEL_HUB_HEARTBEAT_URL: endpoint to ping (required to enable)
- BESZEL_HUB_HEARTBEAT_INTERVAL: seconds between pings (default: 60)
- BESZEL_HUB_HEARTBEAT_METHOD: HTTP method - POST/GET/HEAD (default: POST)
This commit is contained in:
Amir Moradi
2026-02-17 21:48:20 +01:00
committed by GitHub
parent 283fa9d5c2
commit cdfd116da0
5 changed files with 570 additions and 7 deletions

View File

@@ -0,0 +1,280 @@
// Package heartbeat sends periodic outbound pings to an external monitoring
// endpoint (e.g. BetterStack, Uptime Kuma, Healthchecks.io) so operators can
// monitor Beszel without exposing it to the internet.
package heartbeat
import (
"bytes"
"encoding/json"
"fmt"
"net/http"
"strconv"
"strings"
"time"
"github.com/henrygd/beszel"
"github.com/pocketbase/pocketbase/core"
)
// Default values for heartbeat configuration.
const (
defaultInterval = 60 // seconds
httpTimeout = 10 * time.Second
)
// Payload is the JSON body sent with each heartbeat request.
type Payload struct {
// Status is "ok" when all non-paused systems are up, "warn" when alerts
// are triggered but no systems are down, and "error" when any system is down.
Status string `json:"status"`
Timestamp string `json:"timestamp"`
Msg string `json:"msg"`
Systems SystemsSummary `json:"systems"`
Down []SystemInfo `json:"down_systems,omitempty"`
Alerts []AlertInfo `json:"triggered_alerts,omitempty"`
Version string `json:"beszel_version"`
}
// SystemsSummary contains counts of systems by status.
type SystemsSummary struct {
Total int `json:"total"`
Up int `json:"up"`
Down int `json:"down"`
Paused int `json:"paused"`
Pending int `json:"pending"`
}
// SystemInfo identifies a system that is currently down.
type SystemInfo struct {
ID string `json:"id" db:"id"`
Name string `json:"name" db:"name"`
Host string `json:"host" db:"host"`
}
// AlertInfo describes a currently triggered alert.
type AlertInfo struct {
SystemID string `json:"system_id"`
SystemName string `json:"system_name"`
AlertName string `json:"alert_name"`
Threshold float64 `json:"threshold"`
}
// Config holds heartbeat settings read from environment variables.
type Config struct {
URL string // endpoint to ping
Interval int // seconds between pings
Method string // HTTP method (GET or POST, default POST)
}
// Heartbeat manages the periodic outbound health check.
type Heartbeat struct {
app core.App
config Config
client *http.Client
}
// New creates a Heartbeat if configuration is present.
// Returns nil if HEARTBEAT_URL is not set (feature disabled).
func New(app core.App, getEnv func(string) (string, bool)) *Heartbeat {
url, ok := getEnv("HEARTBEAT_URL")
if !ok || url == "" {
return nil
}
interval := defaultInterval
if v, ok := getEnv("HEARTBEAT_INTERVAL"); ok {
if parsed, err := strconv.Atoi(v); err == nil && parsed > 0 {
interval = parsed
}
}
method := "POST"
if v, ok := getEnv("HEARTBEAT_METHOD"); ok {
v = strings.ToUpper(strings.TrimSpace(v))
if v == "GET" || v == "HEAD" {
method = v
}
}
return &Heartbeat{
app: app,
config: Config{
URL: url,
Interval: interval,
Method: method,
},
client: &http.Client{Timeout: httpTimeout},
}
}
// Start begins the heartbeat loop. It blocks and should be called in a goroutine.
// The loop runs until the provided stop channel is closed.
func (hb *Heartbeat) Start(stop <-chan struct{}) {
hb.app.Logger().Info("Heartbeat enabled",
"url", hb.config.URL,
"interval", fmt.Sprintf("%ds", hb.config.Interval),
"method", hb.config.Method,
)
// Send an initial heartbeat immediately on startup.
hb.send()
ticker := time.NewTicker(time.Duration(hb.config.Interval) * time.Second)
defer ticker.Stop()
for {
select {
case <-stop:
return
case <-ticker.C:
hb.send()
}
}
}
// Send performs a single heartbeat ping. Exposed for the test-heartbeat API endpoint.
func (hb *Heartbeat) Send() error {
return hb.send()
}
// GetConfig returns the current heartbeat configuration.
func (hb *Heartbeat) GetConfig() Config {
return hb.config
}
func (hb *Heartbeat) send() error {
payload, err := hb.buildPayload()
if err != nil {
hb.app.Logger().Error("Heartbeat: failed to build payload", "err", err)
return err
}
var req *http.Request
if hb.config.Method == "GET" || hb.config.Method == "HEAD" {
req, err = http.NewRequest(hb.config.Method, hb.config.URL, nil)
} else {
body, jsonErr := json.Marshal(payload)
if jsonErr != nil {
hb.app.Logger().Error("Heartbeat: failed to marshal payload", "err", jsonErr)
return jsonErr
}
req, err = http.NewRequest("POST", hb.config.URL, bytes.NewReader(body))
if err == nil {
req.Header.Set("Content-Type", "application/json")
}
}
if err != nil {
hb.app.Logger().Error("Heartbeat: failed to create request", "err", err)
return err
}
req.Header.Set("User-Agent", "Beszel-Heartbeat")
resp, err := hb.client.Do(req)
if err != nil {
hb.app.Logger().Error("Heartbeat: request failed", "url", hb.config.URL, "err", err)
return err
}
defer resp.Body.Close()
if resp.StatusCode >= 400 {
hb.app.Logger().Warn("Heartbeat: non-success response",
"url", hb.config.URL,
"status", resp.StatusCode,
)
return fmt.Errorf("heartbeat endpoint returned status %d", resp.StatusCode)
}
return nil
}
func (hb *Heartbeat) buildPayload() (*Payload, error) {
db := hb.app.DB()
// Count systems by status.
var systemCounts []struct {
Status string `db:"status"`
Count int `db:"cnt"`
}
err := db.NewQuery("SELECT status, COUNT(*) as cnt FROM systems GROUP BY status").All(&systemCounts)
if err != nil {
return nil, fmt.Errorf("query system counts: %w", err)
}
summary := SystemsSummary{}
for _, sc := range systemCounts {
switch sc.Status {
case "up":
summary.Up = sc.Count
case "down":
summary.Down = sc.Count
case "paused":
summary.Paused = sc.Count
case "pending":
summary.Pending = sc.Count
}
summary.Total += sc.Count
}
// Get names of down systems.
var downSystems []SystemInfo
err = db.NewQuery("SELECT id, name, host FROM systems WHERE status = 'down'").All(&downSystems)
if err != nil {
return nil, fmt.Errorf("query down systems: %w", err)
}
// Get triggered alerts with system names.
var triggeredAlerts []struct {
SystemID string `db:"system"`
SystemName string `db:"system_name"`
AlertName string `db:"name"`
Value float64 `db:"value"`
}
err = db.NewQuery(`
SELECT a.system, s.name as system_name, a.name, a.value
FROM alerts a
JOIN systems s ON a.system = s.id
WHERE a.triggered = true
`).All(&triggeredAlerts)
if err != nil {
// Non-fatal: alerts info is supplementary.
triggeredAlerts = nil
}
alerts := make([]AlertInfo, 0, len(triggeredAlerts))
for _, ta := range triggeredAlerts {
alerts = append(alerts, AlertInfo{
SystemID: ta.SystemID,
SystemName: ta.SystemName,
AlertName: ta.AlertName,
Threshold: ta.Value,
})
}
// Determine overall status.
status := "ok"
msg := "All systems operational"
if summary.Down > 0 {
status = "error"
names := make([]string, len(downSystems))
for i, ds := range downSystems {
names[i] = ds.Name
}
msg = fmt.Sprintf("%d system(s) down: %s", summary.Down, strings.Join(names, ", "))
} else if len(alerts) > 0 {
status = "warn"
msg = fmt.Sprintf("%d alert(s) triggered", len(alerts))
}
return &Payload{
Status: status,
Timestamp: time.Now().UTC().Format(time.RFC3339),
Msg: msg,
Systems: summary,
Down: downSystems,
Alerts: alerts,
Version: beszel.Version,
}, nil
}

View File

@@ -15,6 +15,7 @@ import (
"github.com/henrygd/beszel"
"github.com/henrygd/beszel/internal/alerts"
"github.com/henrygd/beszel/internal/hub/config"
"github.com/henrygd/beszel/internal/hub/heartbeat"
"github.com/henrygd/beszel/internal/hub/systems"
"github.com/henrygd/beszel/internal/records"
"github.com/henrygd/beszel/internal/users"
@@ -30,12 +31,14 @@ import (
type Hub struct {
core.App
*alerts.AlertManager
um *users.UserManager
rm *records.RecordManager
sm *systems.SystemManager
pubKey string
signer ssh.Signer
appURL string
um *users.UserManager
rm *records.RecordManager
sm *systems.SystemManager
hb *heartbeat.Heartbeat
hbStop chan struct{}
pubKey string
signer ssh.Signer
appURL string
}
// NewHub creates a new Hub instance with default configuration
@@ -48,6 +51,10 @@ func NewHub(app core.App) *Hub {
hub.rm = records.NewRecordManager(hub)
hub.sm = systems.NewSystemManager(hub)
hub.appURL, _ = GetEnv("APP_URL")
hub.hb = heartbeat.New(app, GetEnv)
if hub.hb != nil {
hub.hbStop = make(chan struct{})
}
return hub
}
@@ -88,6 +95,10 @@ func (h *Hub) StartHub() error {
if err := h.sm.Initialize(); err != nil {
return err
}
// start heartbeat if configured
if h.hb != nil {
go h.hb.Start(h.hbStop)
}
return e.Next()
})
@@ -287,6 +298,9 @@ func (h *Hub) registerApiRoutes(se *core.ServeEvent) error {
})
// send test notification
apiAuth.POST("/test-notification", h.SendTestNotification)
// heartbeat status and test
apiAuth.GET("/heartbeat-status", h.getHeartbeatStatus)
apiAuth.POST("/test-heartbeat", h.testHeartbeat)
// get config.yml content
apiAuth.GET("/config-yaml", config.GetYamlConfig)
// handle agent websocket connection
@@ -403,6 +417,36 @@ func (h *Hub) getUniversalToken(e *core.RequestEvent) error {
return e.JSON(http.StatusOK, response)
}
// getHeartbeatStatus returns current heartbeat configuration and whether it's enabled
func (h *Hub) getHeartbeatStatus(e *core.RequestEvent) error {
if h.hb == nil {
return e.JSON(http.StatusOK, map[string]any{
"enabled": false,
"msg": "Set BESZEL_HUB_HEARTBEAT_URL to enable outbound heartbeat monitoring",
})
}
cfg := h.hb.GetConfig()
return e.JSON(http.StatusOK, map[string]any{
"enabled": true,
"url": cfg.URL,
"interval": cfg.Interval,
"method": cfg.Method,
})
}
// testHeartbeat triggers a single heartbeat ping and returns the result
func (h *Hub) testHeartbeat(e *core.RequestEvent) error {
if h.hb == nil {
return e.JSON(http.StatusOK, map[string]any{
"err": "Heartbeat not configured. Set BESZEL_HUB_HEARTBEAT_URL environment variable.",
})
}
if err := h.hb.Send(); err != nil {
return e.JSON(http.StatusOK, map[string]any{"err": err.Error()})
}
return e.JSON(http.StatusOK, map[string]any{"err": false})
}
// containerRequestHandler handles both container logs and info requests
func (h *Hub) containerRequestHandler(e *core.RequestEvent, fetchFunc func(*systems.System, string) (string, error), responseKey string) error {
systemID := e.Request.URL.Query().Get("system")

View File

@@ -0,0 +1,215 @@
import { t } from "@lingui/core/macro"
import { Trans } from "@lingui/react/macro"
import { redirectPage } from "@nanostores/router"
import clsx from "clsx"
import { LoaderCircleIcon, SendIcon } from "lucide-react"
import { useEffect, useState } from "react"
import { $router } from "@/components/router"
import { Badge } from "@/components/ui/badge"
import { Button } from "@/components/ui/button"
import { Separator } from "@/components/ui/separator"
import { toast } from "@/components/ui/use-toast"
import { isAdmin, pb } from "@/lib/api"
interface HeartbeatStatus {
enabled: boolean
url?: string
interval?: number
method?: string
msg?: string
}
export default function HeartbeatSettings() {
const [status, setStatus] = useState<HeartbeatStatus | null>(null)
const [isLoading, setIsLoading] = useState(true)
const [isTesting, setIsTesting] = useState(false)
if (!isAdmin()) {
redirectPage($router, "settings", { name: "general" })
}
useEffect(() => {
fetchStatus()
}, [])
async function fetchStatus() {
try {
setIsLoading(true)
const res = await pb.send<HeartbeatStatus>("/api/beszel/heartbeat-status", {})
setStatus(res)
} catch (error: any) {
toast({
title: t`Error`,
description: error.message,
variant: "destructive",
})
} finally {
setIsLoading(false)
}
}
async function sendTestHeartbeat() {
setIsTesting(true)
try {
const res = await pb.send<{ err: string | false }>("/api/beszel/test-heartbeat", {
method: "POST",
})
if ("err" in res && !res.err) {
toast({
title: t`Heartbeat sent successfully`,
description: t`Check your monitoring service`,
})
} else {
toast({
title: t`Error`,
description: (res.err as string) ?? t`Failed to send heartbeat`,
variant: "destructive",
})
}
} catch (error: any) {
toast({
title: t`Error`,
description: error.message,
variant: "destructive",
})
} finally {
setIsTesting(false)
}
}
const TestIcon = isTesting ? LoaderCircleIcon : SendIcon
return (
<div>
<div>
<h3 className="text-xl font-medium mb-2">
<Trans>Heartbeat Monitoring</Trans>
</h3>
<p className="text-sm text-muted-foreground leading-relaxed">
<Trans>
Send periodic outbound pings to an external monitoring service so you can monitor Beszel without exposing it
to the internet.
</Trans>
</p>
</div>
<Separator className="my-4" />
{isLoading ? (
<div className="flex items-center gap-2 text-muted-foreground py-4">
<LoaderCircleIcon className="h-4 w-4 animate-spin" />
<Trans>Loading heartbeat status...</Trans>
</div>
) : status?.enabled ? (
<div className="space-y-5">
<div className="flex items-center gap-2">
<Badge variant="success">
<Trans>Active</Trans>
</Badge>
</div>
<div className="grid gap-4 sm:grid-cols-2">
<ConfigItem label={t`Endpoint URL`} value={status.url ?? ""} mono />
<ConfigItem label={t`Interval`} value={`${status.interval}s`} />
<ConfigItem label={t`HTTP Method`} value={status.method ?? "POST"} />
</div>
<Separator />
<div>
<h4 className="text-base font-medium mb-1">
<Trans>Test heartbeat</Trans>
</h4>
<p className="text-sm text-muted-foreground leading-relaxed mb-3">
<Trans>Send a single heartbeat ping to verify your endpoint is working.</Trans>
</p>
<Button
type="button"
variant="outline"
className="flex items-center gap-1.5"
onClick={sendTestHeartbeat}
disabled={isTesting}
>
<TestIcon className={clsx("h-4 w-4", isTesting && "animate-spin")} />
<Trans>Send test heartbeat</Trans>
</Button>
</div>
<Separator />
<div>
<h4 className="text-base font-medium mb-1">
<Trans>Payload format</Trans>
</h4>
<p className="text-sm text-muted-foreground leading-relaxed mb-2">
<Trans>
When using POST, each heartbeat includes a JSON payload with system status summary, list of down
systems, and triggered alerts.
</Trans>
</p>
<p className="text-sm text-muted-foreground leading-relaxed">
<Trans>
The overall status is <code className="bg-muted rounded-sm px-1 text-primary">ok</code> when all systems
are up, <code className="bg-muted rounded-sm px-1 text-primary">warn</code> when alerts are triggered,
and <code className="bg-muted rounded-sm px-1 text-primary">error</code> when any system is down.
</Trans>
</p>
</div>
</div>
) : (
<div className="space-y-4">
<p className="text-sm text-muted-foreground leading-relaxed">
<Trans>Heartbeat monitoring is not configured.</Trans>
</p>
<div>
<h4 className="text-base font-medium mb-1">
<Trans>Configuration</Trans>
</h4>
<p className="text-sm text-muted-foreground leading-relaxed mb-3">
<Trans>Set the following environment variables on your Beszel hub to enable heartbeat monitoring:</Trans>
</p>
<div className="grid gap-2">
<EnvVarItem
name="BESZEL_HUB_HEARTBEAT_URL"
description={t`Endpoint URL to ping (required)`}
example="https://uptime.betterstack.com/api/v1/heartbeat/xxxx"
/>
<EnvVarItem
name="BESZEL_HUB_HEARTBEAT_INTERVAL"
description={t`Seconds between pings (default: 60)`}
example="60"
/>
<EnvVarItem
name="BESZEL_HUB_HEARTBEAT_METHOD"
description={t`HTTP method: POST, GET, or HEAD (default: POST)`}
example="POST"
/>
</div>
</div>
<p className="text-sm text-muted-foreground leading-relaxed">
<Trans>After setting the environment variables, restart your Beszel hub for changes to take effect.</Trans>
</p>
</div>
)}
</div>
)
}
function ConfigItem({ label, value, mono }: { label: string; value: string; mono?: boolean }) {
return (
<div>
<p className="text-sm font-medium mb-0.5">{label}</p>
<p className={clsx("text-sm text-muted-foreground break-all", mono && "font-mono")}>{value}</p>
</div>
)
}
function EnvVarItem({ name, description, example }: { name: string; description: string; example: string }) {
return (
<div className="bg-muted/50 rounded-md px-3 py-2">
<code className="text-sm font-mono text-primary">{name}</code>
<p className="text-sm text-muted-foreground mt-0.5">{description}</p>
<p className="text-xs text-muted-foreground mt-0.5">
<Trans>Example:</Trans> <code className="font-mono">{example}</code>
</p>
</div>
)
}

View File

@@ -2,7 +2,14 @@ import { t } from "@lingui/core/macro"
import { Trans, useLingui } from "@lingui/react/macro"
import { useStore } from "@nanostores/react"
import { getPagePath, redirectPage } from "@nanostores/router"
import { AlertOctagonIcon, BellIcon, FileSlidersIcon, FingerprintIcon, SettingsIcon } from "lucide-react"
import {
AlertOctagonIcon,
BellIcon,
FileSlidersIcon,
FingerprintIcon,
HeartPulseIcon,
SettingsIcon,
} from "lucide-react"
import { lazy, useEffect } from "react"
import { $router } from "@/components/router.tsx"
import { Card, CardContent, CardDescription, CardHeader, CardTitle } from "@/components/ui/card.tsx"
@@ -18,12 +25,14 @@ const notificationsSettingsImport = () => import("./notifications.tsx")
const configYamlSettingsImport = () => import("./config-yaml.tsx")
const fingerprintsSettingsImport = () => import("./tokens-fingerprints.tsx")
const alertsHistoryDataTableSettingsImport = () => import("./alerts-history-data-table.tsx")
const heartbeatSettingsImport = () => import("./heartbeat.tsx")
const GeneralSettings = lazy(generalSettingsImport)
const NotificationsSettings = lazy(notificationsSettingsImport)
const ConfigYamlSettings = lazy(configYamlSettingsImport)
const FingerprintsSettings = lazy(fingerprintsSettingsImport)
const AlertsHistoryDataTableSettings = lazy(alertsHistoryDataTableSettingsImport)
const HeartbeatSettings = lazy(heartbeatSettingsImport)
export async function saveSettings(newSettings: Partial<UserSettings>) {
try {
@@ -88,6 +97,13 @@ export default function SettingsLayout() {
admin: true,
preload: configYamlSettingsImport,
},
{
title: t`Heartbeat`,
href: getPagePath($router, "settings", { name: "heartbeat" }),
icon: HeartPulseIcon,
admin: true,
preload: heartbeatSettingsImport,
},
]
const page = useStore($router)
@@ -141,5 +157,7 @@ function SettingsContent({ name }: { name: string }) {
return <FingerprintsSettings />
case "alert-history":
return <AlertsHistoryDataTableSettings />
case "heartbeat":
return <HeartbeatSettings />
}
}

View File

@@ -1,3 +1,9 @@
## Unreleased
- Add outbound heartbeat monitoring to external services (BetterStack, Uptime Kuma, Healthchecks.io, etc.) with system status summary payload. Configured via `BESZEL_HUB_HEARTBEAT_URL`, `BESZEL_HUB_HEARTBEAT_INTERVAL`, and `BESZEL_HUB_HEARTBEAT_METHOD` environment variables.
- Add Heartbeat settings page to the admin UI with status display, configuration reference, and test button.
## 0.18.3
- Add experimental sysfs AMD GPU collector. (#737, #1569)