This commit is contained in:
henrygd
2026-04-22 19:19:34 -04:00
parent a35cc6ef39
commit 1c5808f430
8 changed files with 95 additions and 52 deletions

View File

@@ -13,11 +13,23 @@ import (
"github.com/henrygd/beszel/internal/entities/probe" "github.com/henrygd/beszel/internal/entities/probe"
) )
// Probe functionality overview:
// Probes run at user-defined intervals (e.g., every 10s).
// To keep memory usage low and constant, data is stored in two layers:
// 1. Raw samples: The most recent individual results (kept for probeRawRetention).
// 2. Minute buckets: A fixed-size ring buffer of 61 buckets, each representing one
// wall-clock minute. Samples collected within the same minute are aggregated
// (sum, min, max, count) into a single bucket.
//
// Short-term requests (<= 2m) use raw samples for perfect accuracy.
// Long-term requests (up to 1h) use the minute buckets to avoid storing thousands
// of individual data points.
const ( const (
probeRawRetention = 2 * time.Minute // probeRawRetention is the duration to keep individual samples for high-precision short-term requests
probeMinuteBucketTTL = time.Hour probeRawRetention = 80 * time.Second
probeMinuteBucketLen = int(probeMinuteBucketTTL/time.Minute) + 1 // probeMinuteBucketLen is the number of 1-minute buckets to keep (1 hour + 1 for partials)
probeHourWindow = time.Hour probeMinuteBucketLen int32 = 61
) )
// ProbeManager manages network probe tasks. // ProbeManager manages network probe tasks.
@@ -44,7 +56,7 @@ type probeSample struct {
// probeBucket stores one minute of aggregated probe data. // probeBucket stores one minute of aggregated probe data.
type probeBucket struct { type probeBucket struct {
minute int64 minute int32
filled bool filled bool
stats probeAggregate stats probeAggregate
} }
@@ -121,7 +133,7 @@ func (agg probeAggregate) result() probe.Result {
avg, avg,
minMs, minMs,
math.Round(agg.maxMs*100) / 100, math.Round(agg.maxMs*100) / 100,
math.Round(float64(agg.totalCount-agg.successCount)/float64(agg.totalCount)*10000) / 100, agg.lossPercentage(),
} }
} }
@@ -133,6 +145,14 @@ func (agg probeAggregate) avgResponse() float64 {
return math.Round(agg.sumMs/float64(agg.successCount)*100) / 100 return math.Round(agg.sumMs/float64(agg.successCount)*100) / 100
} }
// lossPercentage returns the rounded failure rate for the aggregate.
func (agg probeAggregate) lossPercentage() float64 {
if agg.totalCount == 0 {
return 0
}
return math.Round(float64(agg.totalCount-agg.successCount)/float64(agg.totalCount)*10000) / 100
}
// SyncProbes replaces all probe tasks with the given configs. // SyncProbes replaces all probe tasks with the given configs.
func (pm *ProbeManager) SyncProbes(configs []probe.Config) { func (pm *ProbeManager) SyncProbes(configs []probe.Config) {
pm.mu.Lock() pm.mu.Lock()
@@ -180,7 +200,7 @@ func (pm *ProbeManager) GetResults(durationMs uint16) map[string]probe.Result {
task.mu.Lock() task.mu.Lock()
agg := task.aggregateLocked(duration, now) agg := task.aggregateLocked(duration, now)
// The live request window still controls avg/loss, but the range fields are always 1h. // The live request window still controls avg/loss, but the range fields are always 1h.
hourAgg := task.aggregateLocked(probeHourWindow, now) hourAgg := task.aggregateLocked(time.Hour, now)
task.mu.Unlock() task.mu.Unlock()
if !agg.hasData() { if !agg.hasData() {
@@ -189,16 +209,17 @@ func (pm *ProbeManager) GetResults(durationMs uint16) map[string]probe.Result {
result := agg.result() result := agg.result()
hourAvg := hourAgg.avgResponse() hourAvg := hourAgg.avgResponse()
hourLoss := hourAgg.lossPercentage()
if hourAgg.successCount > 0 { if hourAgg.successCount > 0 {
result = probe.Result{ result = probe.Result{
result[0], result[0],
hourAvg, hourAvg,
math.Round(hourAgg.minMs*100) / 100, math.Round(hourAgg.minMs*100) / 100,
math.Round(hourAgg.maxMs*100) / 100, math.Round(hourAgg.maxMs*100) / 100,
result[3], hourLoss,
} }
} else { } else {
result = probe.Result{result[0], hourAvg, 0, 0, result[3]} result = probe.Result{result[0], hourAvg, 0, 0, hourLoss}
} }
results[key] = result results[key] = result
} }
@@ -262,8 +283,8 @@ func aggregateSamplesSince(samples []probeSample, cutoff time.Time) probeAggrega
// aggregateBucketsSince aggregates minute buckets overlapping the requested window. // aggregateBucketsSince aggregates minute buckets overlapping the requested window.
func aggregateBucketsSince(buckets []probeBucket, cutoff, now time.Time) probeAggregate { func aggregateBucketsSince(buckets []probeBucket, cutoff, now time.Time) probeAggregate {
agg := newProbeAggregate() agg := newProbeAggregate()
startMinute := cutoff.Unix() / 60 startMinute := int32(cutoff.Unix() / 60)
endMinute := now.Unix() / 60 endMinute := int32(now.Unix() / 60)
for _, bucket := range buckets { for _, bucket := range buckets {
if !bucket.filled || bucket.minute < startMinute || bucket.minute > endMinute { if !bucket.filled || bucket.minute < startMinute || bucket.minute > endMinute {
continue continue
@@ -292,9 +313,9 @@ func (task *probeTask) addSampleLocked(sample probeSample) {
} }
task.samples = append(task.samples, sample) task.samples = append(task.samples, sample)
minute := sample.timestamp.Unix() / 60 minute := int32(sample.timestamp.Unix() / 60)
// Each slot stores one wall-clock minute, so the ring stays fixed-size at ~1h per probe. // Each slot stores one wall-clock minute, so the ring stays fixed-size at ~1h per probe.
bucket := &task.buckets[int(minute%int64(probeMinuteBucketLen))] bucket := &task.buckets[minute%probeMinuteBucketLen]
if !bucket.filled || bucket.minute != minute { if !bucket.filled || bucket.minute != minute {
bucket.minute = minute bucket.minute = minute
bucket.filled = true bucket.filled = true

View File

@@ -85,5 +85,5 @@ func TestProbeManagerGetResultsIncludesHourResponseRange(t *testing.T) {
assert.Equal(t, 25.0, result[1]) assert.Equal(t, 25.0, result[1])
assert.Equal(t, 10.0, result[2]) assert.Equal(t, 10.0, result[2])
assert.Equal(t, 40.0, result[3]) assert.Equal(t, 40.0, result[3])
assert.Equal(t, 0.0, result[4]) assert.Equal(t, 20.0, result[4])
} }

View File

@@ -20,7 +20,7 @@ type Config struct {
// //
// 3: max response over the last hour in ms // 3: max response over the last hour in ms
// //
// 4: packet loss percentage (0-100) // 4: packet loss percentage over the last hour (0-100)
type Result []float64 type Result []float64
// Key returns the map key used for this probe config (e.g. "icmp:1.1.1.1", "tcp:host:443", "http:https://example.com"). // Key returns the map key used for this probe config (e.g. "icmp:1.1.1.1", "tcp:host:443", "http:https://example.com").

View File

@@ -335,7 +335,7 @@ func updateNetworkProbesRecords(app core.App, data map[string]probe.Result, syst
if !realtimeActive { if !realtimeActive {
db = app.DB() db = app.DB()
nowString = time.Now().UTC().Format(types.DefaultDateLayout) nowString = time.Now().UTC().Format(types.DefaultDateLayout)
sql := fmt.Sprintf("UPDATE %s SET resAvg={:resAvg}, resMin1h={:resMin1h}, resMax1h={:resMax1h}, resAvg1h={:resAvg1h}, loss={:loss}, updated={:updated} WHERE id={:id}", collectionName) sql := fmt.Sprintf("UPDATE %s SET resAvg={:resAvg}, resMin1h={:resMin1h}, resMax1h={:resMax1h}, resAvg1h={:resAvg1h}, loss1h={:loss1h}, updated={:updated} WHERE id={:id}", collectionName)
updateQuery = db.NewQuery(sql) updateQuery = db.NewQuery(sql)
} }
@@ -376,7 +376,7 @@ func updateNetworkProbesRecords(app core.App, data map[string]probe.Result, syst
record.Set("resAvg1h", probeMetric(values, 1)) record.Set("resAvg1h", probeMetric(values, 1))
record.Set("resMin1h", probeMetric(values, 2)) record.Set("resMin1h", probeMetric(values, 2))
record.Set("resMax1h", probeMetric(values, 3)) record.Set("resMax1h", probeMetric(values, 3))
record.Set("loss", probeMetric(values, 4)) record.Set("loss1h", probeMetric(values, 4))
err = app.SaveNoValidate(record) err = app.SaveNoValidate(record)
} }
default: default:
@@ -386,7 +386,7 @@ func updateNetworkProbesRecords(app core.App, data map[string]probe.Result, syst
"resAvg1h": probeMetric(values, 1), "resAvg1h": probeMetric(values, 1),
"resMin1h": probeMetric(values, 2), "resMin1h": probeMetric(values, 2),
"resMax1h": probeMetric(values, 3), "resMax1h": probeMetric(values, 3),
"loss": probeMetric(values, 4), "loss1h": probeMetric(values, 4),
"updated": nowString, "updated": nowString,
}).Execute() }).Execute()
} }

View File

@@ -1,10 +1,9 @@
import type { Column, ColumnDef } from "@tanstack/react-table" import type { CellContext, Column, ColumnDef } from "@tanstack/react-table"
import { Button } from "@/components/ui/button" import { Button } from "@/components/ui/button"
import { cn, decimalString, hourWithSeconds } from "@/lib/utils" import { cn, decimalString, hourWithSeconds } from "@/lib/utils"
import { import {
GlobeIcon, GlobeIcon,
TimerIcon, TimerIcon,
ActivityIcon,
WifiOffIcon, WifiOffIcon,
Trash2Icon, Trash2Icon,
ArrowLeftRightIcon, ArrowLeftRightIcon,
@@ -12,6 +11,7 @@ import {
ServerIcon, ServerIcon,
ClockIcon, ClockIcon,
NetworkIcon, NetworkIcon,
RefreshCwIcon,
} from "lucide-react" } from "lucide-react"
import { t } from "@lingui/core/macro" import { t } from "@lingui/core/macro"
import type { NetworkProbeRecord } from "@/types" import type { NetworkProbeRecord } from "@/types"
@@ -61,7 +61,7 @@ export function getProbeColumns(longestName = 0, longestTarget = 0): ColumnDef<N
header: ({ column }) => <HeaderButton column={column} name={t`System`} Icon={ServerIcon} />, header: ({ column }) => <HeaderButton column={column} name={t`System`} Icon={ServerIcon} />,
cell: ({ getValue }) => { cell: ({ getValue }) => {
const allSystems = useStore($allSystemsById) const allSystems = useStore($allSystemsById)
return <span className="ms-1.5 xl:w-34 block truncate">{allSystems[getValue() as string]?.name ?? ""}</span> return <span className="ms-1.5 xl:w-20 block truncate">{allSystems[getValue() as string]?.name ?? ""}</span>
}, },
}, },
{ {
@@ -91,33 +91,36 @@ export function getProbeColumns(longestName = 0, longestTarget = 0): ColumnDef<N
{ {
id: "interval", id: "interval",
accessorFn: (record) => record.interval, accessorFn: (record) => record.interval,
header: ({ column }) => <HeaderButton column={column} name={t`Interval`} Icon={TimerIcon} />, header: ({ column }) => <HeaderButton column={column} name={t`Interval`} Icon={RefreshCwIcon} />,
cell: ({ getValue }) => <span className="ms-1.5 tabular-nums">{getValue() as number}s</span>, cell: ({ getValue }) => <span className="ms-1.5 tabular-nums">{getValue() as number}s</span>,
}, },
{ {
id: "response", id: "res",
accessorFn: (record) => record.response, accessorFn: (record) => record.res,
invertSorting: true, invertSorting: true,
header: ({ column }) => <HeaderButton column={column} name={t`Response`} Icon={ActivityIcon} />, header: ({ column }) => <HeaderButton column={column} name={t`Response`} Icon={TimerIcon} />,
cell: ({ row }) => { cell: responseTimeCell,
const val = row.original.response },
if (!val) { {
return <span className="ms-1.5 text-muted-foreground">-</span> id: "res1h",
} accessorFn: (record) => record.resAvg1h,
let color = "bg-green-500" invertSorting: true,
if (val > 200) { header: ({ column }) => <HeaderButton column={column} name={t`Avg 1h`} Icon={TimerIcon} />,
color = "bg-yellow-500" cell: responseTimeCell,
} },
if (val > 2000) { {
color = "bg-red-500" id: "max1h",
} accessorFn: (record) => record.resMax1h,
return ( invertSorting: true,
<span className="ms-1.5 tabular-nums flex gap-2 items-center"> header: ({ column }) => <HeaderButton column={column} name={t`Max 1h`} Icon={TimerIcon} />,
<span className={cn("shrink-0 size-2 rounded-full", color)} /> cell: responseTimeCell,
{decimalString(val, val < 100 ? 2 : 1).toLocaleString()} ms },
</span> {
) id: "min1h",
}, accessorFn: (record) => record.resMin1h,
invertSorting: true,
header: ({ column }) => <HeaderButton column={column} name={t`Min 1h`} Icon={TimerIcon} />,
cell: responseTimeCell,
}, },
{ {
id: "loss", id: "loss",
@@ -125,8 +128,8 @@ export function getProbeColumns(longestName = 0, longestTarget = 0): ColumnDef<N
invertSorting: true, invertSorting: true,
header: ({ column }) => <HeaderButton column={column} name={t`Loss`} Icon={WifiOffIcon} />, header: ({ column }) => <HeaderButton column={column} name={t`Loss`} Icon={WifiOffIcon} />,
cell: ({ row }) => { cell: ({ row }) => {
const { loss, response } = row.original const { loss, res } = row.original
if (loss === undefined || (!response && !loss)) { if (loss === undefined || (!res && !loss)) {
return <span className="ms-1.5 text-muted-foreground">-</span> return <span className="ms-1.5 text-muted-foreground">-</span>
} }
let color = "bg-green-500" let color = "bg-green-500"
@@ -188,6 +191,25 @@ export function getProbeColumns(longestName = 0, longestTarget = 0): ColumnDef<N
}, },
] ]
} }
function responseTimeCell(cell: CellContext<NetworkProbeRecord, unknown>) {
const val = cell.getValue() as number | undefined
if (!val) {
return <span className="ms-1.5 text-muted-foreground">-</span>
}
let color = "bg-green-500"
if (val > 200) {
color = "bg-yellow-500"
}
if (val > 2000) {
color = "bg-red-500"
}
return (
<span className="ms-1.5 tabular-nums flex gap-2 items-center">
<span className={cn("shrink-0 size-2 rounded-full", color)} />
{decimalString(val, val < 100 ? 2 : 1).toLocaleString()}ms
</span>
)
}
function HeaderButton({ function HeaderButton({
column, column,

View File

@@ -44,7 +44,7 @@ function ProbeChart({
const filter = useStore($filter) const filter = useStore($filter)
const { dataPoints, visibleKeys } = useMemo(() => { const { dataPoints, visibleKeys } = useMemo(() => {
const sortedProbes = [...probes].sort((a, b) => b.response - a.response) const sortedProbes = [...probes].sort((a, b) => b.resAvg1h - a.resAvg1h)
const count = sortedProbes.length const count = sortedProbes.length
const points: DataPoint<NetworkProbeStatsRecord>[] = [] const points: DataPoint<NetworkProbeStatsRecord>[] = []
const visibleKeys: string[] = [] const visibleKeys: string[] = []
@@ -115,7 +115,7 @@ export function ResponseChart({ probeStats, grid, probes, chartData, empty }: Pr
empty={empty} empty={empty}
valueIndex={0} valueIndex={0}
title={t`Response`} title={t`Response`}
description={t`Average round-trip time (ms)`} description={t`Average response time (ms)`}
tickFormatter={(value) => `${toFixedFloat(value, value >= 10 ? 0 : 1)} ms`} tickFormatter={(value) => `${toFixedFloat(value, value >= 10 ? 0 : 1)} ms`}
contentFormatter={({ value }) => { contentFormatter={({ value }) => {
if (typeof value !== "number") { if (typeof value !== "number") {
@@ -137,7 +137,7 @@ export function LossChart({ probeStats, grid, probes, chartData, empty }: ProbeC
probes={probes} probes={probes}
chartData={chartData} chartData={chartData}
empty={empty} empty={empty}
valueIndex={3} valueIndex={4}
title={t`Loss`} title={t`Loss`}
description={t`Packet loss (%)`} description={t`Packet loss (%)`}
domain={[0, 100]} domain={[0, 100]}

View File

@@ -32,7 +32,7 @@ function appendCacheValue(
} }
const NETWORK_PROBE_FIELDS = const NETWORK_PROBE_FIELDS =
"id,name,system,target,protocol,port,interval,response,resMin1h,resMax1h,resAvg1h,loss,enabled,updated" "id,name,system,target,protocol,port,interval,res,resMin1h,resMax1h,resAvg1h,loss,enabled,updated"
interface UseNetworkProbesProps { interface UseNetworkProbesProps {
systemId?: string systemId?: string
@@ -254,7 +254,7 @@ function probesToStats(probes: NetworkProbeRecord[]): NetworkProbeStatsRecord["s
const stats: NetworkProbeStatsRecord["stats"] = {} const stats: NetworkProbeStatsRecord["stats"] = {}
for (const probe of probes) { for (const probe of probes) {
const key = probeKey(probe) const key = probeKey(probe)
stats[key] = [probe.response, 0, 0, probe.loss] stats[key] = [probe.res, probe.resAvg1h, probe.resMin1h, probe.resMax1h, probe.loss]
} }
return stats return stats
} }

View File

@@ -552,7 +552,7 @@ export interface NetworkProbeRecord {
target: string target: string
protocol: "icmp" | "tcp" | "http" protocol: "icmp" | "tcp" | "http"
port: number port: number
response: number res: number
resMin1h: number resMin1h: number
resMax1h: number resMax1h: number
resAvg1h: number resAvg1h: number