This commit is contained in:
henrygd
2026-04-22 19:19:34 -04:00
parent a35cc6ef39
commit 1c5808f430
8 changed files with 95 additions and 52 deletions

View File

@@ -13,11 +13,23 @@ import (
"github.com/henrygd/beszel/internal/entities/probe"
)
// Probe functionality overview:
// Probes run at user-defined intervals (e.g., every 10s).
// To keep memory usage low and constant, data is stored in two layers:
// 1. Raw samples: The most recent individual results (kept for probeRawRetention).
// 2. Minute buckets: A fixed-size ring buffer of 61 buckets, each representing one
// wall-clock minute. Samples collected within the same minute are aggregated
// (sum, min, max, count) into a single bucket.
//
// Short-term requests (<= 2m) use raw samples for perfect accuracy.
// Long-term requests (up to 1h) use the minute buckets to avoid storing thousands
// of individual data points.
const (
probeRawRetention = 2 * time.Minute
probeMinuteBucketTTL = time.Hour
probeMinuteBucketLen = int(probeMinuteBucketTTL/time.Minute) + 1
probeHourWindow = time.Hour
// probeRawRetention is the duration to keep individual samples for high-precision short-term requests
probeRawRetention = 80 * time.Second
// probeMinuteBucketLen is the number of 1-minute buckets to keep (1 hour + 1 for partials)
probeMinuteBucketLen int32 = 61
)
// ProbeManager manages network probe tasks.
@@ -44,7 +56,7 @@ type probeSample struct {
// probeBucket stores one minute of aggregated probe data.
type probeBucket struct {
minute int64
minute int32
filled bool
stats probeAggregate
}
@@ -121,7 +133,7 @@ func (agg probeAggregate) result() probe.Result {
avg,
minMs,
math.Round(agg.maxMs*100) / 100,
math.Round(float64(agg.totalCount-agg.successCount)/float64(agg.totalCount)*10000) / 100,
agg.lossPercentage(),
}
}
@@ -133,6 +145,14 @@ func (agg probeAggregate) avgResponse() float64 {
return math.Round(agg.sumMs/float64(agg.successCount)*100) / 100
}
// lossPercentage returns the rounded failure rate for the aggregate.
func (agg probeAggregate) lossPercentage() float64 {
if agg.totalCount == 0 {
return 0
}
return math.Round(float64(agg.totalCount-agg.successCount)/float64(agg.totalCount)*10000) / 100
}
// SyncProbes replaces all probe tasks with the given configs.
func (pm *ProbeManager) SyncProbes(configs []probe.Config) {
pm.mu.Lock()
@@ -180,7 +200,7 @@ func (pm *ProbeManager) GetResults(durationMs uint16) map[string]probe.Result {
task.mu.Lock()
agg := task.aggregateLocked(duration, now)
// The live request window still controls avg/loss, but the range fields are always 1h.
hourAgg := task.aggregateLocked(probeHourWindow, now)
hourAgg := task.aggregateLocked(time.Hour, now)
task.mu.Unlock()
if !agg.hasData() {
@@ -189,16 +209,17 @@ func (pm *ProbeManager) GetResults(durationMs uint16) map[string]probe.Result {
result := agg.result()
hourAvg := hourAgg.avgResponse()
hourLoss := hourAgg.lossPercentage()
if hourAgg.successCount > 0 {
result = probe.Result{
result[0],
hourAvg,
math.Round(hourAgg.minMs*100) / 100,
math.Round(hourAgg.maxMs*100) / 100,
result[3],
hourLoss,
}
} else {
result = probe.Result{result[0], hourAvg, 0, 0, result[3]}
result = probe.Result{result[0], hourAvg, 0, 0, hourLoss}
}
results[key] = result
}
@@ -262,8 +283,8 @@ func aggregateSamplesSince(samples []probeSample, cutoff time.Time) probeAggrega
// aggregateBucketsSince aggregates minute buckets overlapping the requested window.
func aggregateBucketsSince(buckets []probeBucket, cutoff, now time.Time) probeAggregate {
agg := newProbeAggregate()
startMinute := cutoff.Unix() / 60
endMinute := now.Unix() / 60
startMinute := int32(cutoff.Unix() / 60)
endMinute := int32(now.Unix() / 60)
for _, bucket := range buckets {
if !bucket.filled || bucket.minute < startMinute || bucket.minute > endMinute {
continue
@@ -292,9 +313,9 @@ func (task *probeTask) addSampleLocked(sample probeSample) {
}
task.samples = append(task.samples, sample)
minute := sample.timestamp.Unix() / 60
minute := int32(sample.timestamp.Unix() / 60)
// Each slot stores one wall-clock minute, so the ring stays fixed-size at ~1h per probe.
bucket := &task.buckets[int(minute%int64(probeMinuteBucketLen))]
bucket := &task.buckets[minute%probeMinuteBucketLen]
if !bucket.filled || bucket.minute != minute {
bucket.minute = minute
bucket.filled = true

View File

@@ -85,5 +85,5 @@ func TestProbeManagerGetResultsIncludesHourResponseRange(t *testing.T) {
assert.Equal(t, 25.0, result[1])
assert.Equal(t, 10.0, result[2])
assert.Equal(t, 40.0, result[3])
assert.Equal(t, 0.0, result[4])
assert.Equal(t, 20.0, result[4])
}

View File

@@ -20,7 +20,7 @@ type Config struct {
//
// 3: max response over the last hour in ms
//
// 4: packet loss percentage (0-100)
// 4: packet loss percentage over the last hour (0-100)
type Result []float64
// Key returns the map key used for this probe config (e.g. "icmp:1.1.1.1", "tcp:host:443", "http:https://example.com").

View File

@@ -335,7 +335,7 @@ func updateNetworkProbesRecords(app core.App, data map[string]probe.Result, syst
if !realtimeActive {
db = app.DB()
nowString = time.Now().UTC().Format(types.DefaultDateLayout)
sql := fmt.Sprintf("UPDATE %s SET resAvg={:resAvg}, resMin1h={:resMin1h}, resMax1h={:resMax1h}, resAvg1h={:resAvg1h}, loss={:loss}, updated={:updated} WHERE id={:id}", collectionName)
sql := fmt.Sprintf("UPDATE %s SET resAvg={:resAvg}, resMin1h={:resMin1h}, resMax1h={:resMax1h}, resAvg1h={:resAvg1h}, loss1h={:loss1h}, updated={:updated} WHERE id={:id}", collectionName)
updateQuery = db.NewQuery(sql)
}
@@ -376,7 +376,7 @@ func updateNetworkProbesRecords(app core.App, data map[string]probe.Result, syst
record.Set("resAvg1h", probeMetric(values, 1))
record.Set("resMin1h", probeMetric(values, 2))
record.Set("resMax1h", probeMetric(values, 3))
record.Set("loss", probeMetric(values, 4))
record.Set("loss1h", probeMetric(values, 4))
err = app.SaveNoValidate(record)
}
default:
@@ -386,7 +386,7 @@ func updateNetworkProbesRecords(app core.App, data map[string]probe.Result, syst
"resAvg1h": probeMetric(values, 1),
"resMin1h": probeMetric(values, 2),
"resMax1h": probeMetric(values, 3),
"loss": probeMetric(values, 4),
"loss1h": probeMetric(values, 4),
"updated": nowString,
}).Execute()
}

View File

@@ -1,10 +1,9 @@
import type { Column, ColumnDef } from "@tanstack/react-table"
import type { CellContext, Column, ColumnDef } from "@tanstack/react-table"
import { Button } from "@/components/ui/button"
import { cn, decimalString, hourWithSeconds } from "@/lib/utils"
import {
GlobeIcon,
TimerIcon,
ActivityIcon,
WifiOffIcon,
Trash2Icon,
ArrowLeftRightIcon,
@@ -12,6 +11,7 @@ import {
ServerIcon,
ClockIcon,
NetworkIcon,
RefreshCwIcon,
} from "lucide-react"
import { t } from "@lingui/core/macro"
import type { NetworkProbeRecord } from "@/types"
@@ -61,7 +61,7 @@ export function getProbeColumns(longestName = 0, longestTarget = 0): ColumnDef<N
header: ({ column }) => <HeaderButton column={column} name={t`System`} Icon={ServerIcon} />,
cell: ({ getValue }) => {
const allSystems = useStore($allSystemsById)
return <span className="ms-1.5 xl:w-34 block truncate">{allSystems[getValue() as string]?.name ?? ""}</span>
return <span className="ms-1.5 xl:w-20 block truncate">{allSystems[getValue() as string]?.name ?? ""}</span>
},
},
{
@@ -91,33 +91,36 @@ export function getProbeColumns(longestName = 0, longestTarget = 0): ColumnDef<N
{
id: "interval",
accessorFn: (record) => record.interval,
header: ({ column }) => <HeaderButton column={column} name={t`Interval`} Icon={TimerIcon} />,
header: ({ column }) => <HeaderButton column={column} name={t`Interval`} Icon={RefreshCwIcon} />,
cell: ({ getValue }) => <span className="ms-1.5 tabular-nums">{getValue() as number}s</span>,
},
{
id: "response",
accessorFn: (record) => record.response,
id: "res",
accessorFn: (record) => record.res,
invertSorting: true,
header: ({ column }) => <HeaderButton column={column} name={t`Response`} Icon={ActivityIcon} />,
cell: ({ row }) => {
const val = row.original.response
if (!val) {
return <span className="ms-1.5 text-muted-foreground">-</span>
}
let color = "bg-green-500"
if (val > 200) {
color = "bg-yellow-500"
}
if (val > 2000) {
color = "bg-red-500"
}
return (
<span className="ms-1.5 tabular-nums flex gap-2 items-center">
<span className={cn("shrink-0 size-2 rounded-full", color)} />
{decimalString(val, val < 100 ? 2 : 1).toLocaleString()} ms
</span>
)
},
header: ({ column }) => <HeaderButton column={column} name={t`Response`} Icon={TimerIcon} />,
cell: responseTimeCell,
},
{
id: "res1h",
accessorFn: (record) => record.resAvg1h,
invertSorting: true,
header: ({ column }) => <HeaderButton column={column} name={t`Avg 1h`} Icon={TimerIcon} />,
cell: responseTimeCell,
},
{
id: "max1h",
accessorFn: (record) => record.resMax1h,
invertSorting: true,
header: ({ column }) => <HeaderButton column={column} name={t`Max 1h`} Icon={TimerIcon} />,
cell: responseTimeCell,
},
{
id: "min1h",
accessorFn: (record) => record.resMin1h,
invertSorting: true,
header: ({ column }) => <HeaderButton column={column} name={t`Min 1h`} Icon={TimerIcon} />,
cell: responseTimeCell,
},
{
id: "loss",
@@ -125,8 +128,8 @@ export function getProbeColumns(longestName = 0, longestTarget = 0): ColumnDef<N
invertSorting: true,
header: ({ column }) => <HeaderButton column={column} name={t`Loss`} Icon={WifiOffIcon} />,
cell: ({ row }) => {
const { loss, response } = row.original
if (loss === undefined || (!response && !loss)) {
const { loss, res } = row.original
if (loss === undefined || (!res && !loss)) {
return <span className="ms-1.5 text-muted-foreground">-</span>
}
let color = "bg-green-500"
@@ -188,6 +191,25 @@ export function getProbeColumns(longestName = 0, longestTarget = 0): ColumnDef<N
},
]
}
function responseTimeCell(cell: CellContext<NetworkProbeRecord, unknown>) {
const val = cell.getValue() as number | undefined
if (!val) {
return <span className="ms-1.5 text-muted-foreground">-</span>
}
let color = "bg-green-500"
if (val > 200) {
color = "bg-yellow-500"
}
if (val > 2000) {
color = "bg-red-500"
}
return (
<span className="ms-1.5 tabular-nums flex gap-2 items-center">
<span className={cn("shrink-0 size-2 rounded-full", color)} />
{decimalString(val, val < 100 ? 2 : 1).toLocaleString()}ms
</span>
)
}
function HeaderButton({
column,

View File

@@ -44,7 +44,7 @@ function ProbeChart({
const filter = useStore($filter)
const { dataPoints, visibleKeys } = useMemo(() => {
const sortedProbes = [...probes].sort((a, b) => b.response - a.response)
const sortedProbes = [...probes].sort((a, b) => b.resAvg1h - a.resAvg1h)
const count = sortedProbes.length
const points: DataPoint<NetworkProbeStatsRecord>[] = []
const visibleKeys: string[] = []
@@ -115,7 +115,7 @@ export function ResponseChart({ probeStats, grid, probes, chartData, empty }: Pr
empty={empty}
valueIndex={0}
title={t`Response`}
description={t`Average round-trip time (ms)`}
description={t`Average response time (ms)`}
tickFormatter={(value) => `${toFixedFloat(value, value >= 10 ? 0 : 1)} ms`}
contentFormatter={({ value }) => {
if (typeof value !== "number") {
@@ -137,7 +137,7 @@ export function LossChart({ probeStats, grid, probes, chartData, empty }: ProbeC
probes={probes}
chartData={chartData}
empty={empty}
valueIndex={3}
valueIndex={4}
title={t`Loss`}
description={t`Packet loss (%)`}
domain={[0, 100]}

View File

@@ -32,7 +32,7 @@ function appendCacheValue(
}
const NETWORK_PROBE_FIELDS =
"id,name,system,target,protocol,port,interval,response,resMin1h,resMax1h,resAvg1h,loss,enabled,updated"
"id,name,system,target,protocol,port,interval,res,resMin1h,resMax1h,resAvg1h,loss,enabled,updated"
interface UseNetworkProbesProps {
systemId?: string
@@ -254,7 +254,7 @@ function probesToStats(probes: NetworkProbeRecord[]): NetworkProbeStatsRecord["s
const stats: NetworkProbeStatsRecord["stats"] = {}
for (const probe of probes) {
const key = probeKey(probe)
stats[key] = [probe.response, 0, 0, probe.loss]
stats[key] = [probe.res, probe.resAvg1h, probe.resMin1h, probe.resMax1h, probe.loss]
}
return stats
}

View File

@@ -552,7 +552,7 @@ export interface NetworkProbeRecord {
target: string
protocol: "icmp" | "tcp" | "http"
port: number
response: number
res: number
resMin1h: number
resMax1h: number
resAvg1h: number