From 1c5808f43015217b57606b995bd29d67b1889fa1 Mon Sep 17 00:00:00 2001 From: henrygd Date: Wed, 22 Apr 2026 19:19:34 -0400 Subject: [PATCH] update --- agent/probe.go | 47 +++++++---- agent/probe_test.go | 2 +- internal/entities/probe/probe.go | 2 +- internal/hub/systems/system.go | 6 +- .../network-probes-columns.tsx | 78 ++++++++++++------- .../routes/system/charts/probes-charts.tsx | 6 +- internal/site/src/lib/use-network-probes.ts | 4 +- internal/site/src/types.d.ts | 2 +- 8 files changed, 95 insertions(+), 52 deletions(-) diff --git a/agent/probe.go b/agent/probe.go index f978e588..b5285947 100644 --- a/agent/probe.go +++ b/agent/probe.go @@ -13,11 +13,23 @@ import ( "github.com/henrygd/beszel/internal/entities/probe" ) +// Probe functionality overview: +// Probes run at user-defined intervals (e.g., every 10s). +// To keep memory usage low and constant, data is stored in two layers: +// 1. Raw samples: The most recent individual results (kept for probeRawRetention). +// 2. Minute buckets: A fixed-size ring buffer of 61 buckets, each representing one +// wall-clock minute. Samples collected within the same minute are aggregated +// (sum, min, max, count) into a single bucket. +// +// Short-term requests (<= 2m) use raw samples for perfect accuracy. +// Long-term requests (up to 1h) use the minute buckets to avoid storing thousands +// of individual data points. + const ( - probeRawRetention = 2 * time.Minute - probeMinuteBucketTTL = time.Hour - probeMinuteBucketLen = int(probeMinuteBucketTTL/time.Minute) + 1 - probeHourWindow = time.Hour + // probeRawRetention is the duration to keep individual samples for high-precision short-term requests + probeRawRetention = 80 * time.Second + // probeMinuteBucketLen is the number of 1-minute buckets to keep (1 hour + 1 for partials) + probeMinuteBucketLen int32 = 61 ) // ProbeManager manages network probe tasks. @@ -44,7 +56,7 @@ type probeSample struct { // probeBucket stores one minute of aggregated probe data. type probeBucket struct { - minute int64 + minute int32 filled bool stats probeAggregate } @@ -121,7 +133,7 @@ func (agg probeAggregate) result() probe.Result { avg, minMs, math.Round(agg.maxMs*100) / 100, - math.Round(float64(agg.totalCount-agg.successCount)/float64(agg.totalCount)*10000) / 100, + agg.lossPercentage(), } } @@ -133,6 +145,14 @@ func (agg probeAggregate) avgResponse() float64 { return math.Round(agg.sumMs/float64(agg.successCount)*100) / 100 } +// lossPercentage returns the rounded failure rate for the aggregate. +func (agg probeAggregate) lossPercentage() float64 { + if agg.totalCount == 0 { + return 0 + } + return math.Round(float64(agg.totalCount-agg.successCount)/float64(agg.totalCount)*10000) / 100 +} + // SyncProbes replaces all probe tasks with the given configs. func (pm *ProbeManager) SyncProbes(configs []probe.Config) { pm.mu.Lock() @@ -180,7 +200,7 @@ func (pm *ProbeManager) GetResults(durationMs uint16) map[string]probe.Result { task.mu.Lock() agg := task.aggregateLocked(duration, now) // The live request window still controls avg/loss, but the range fields are always 1h. - hourAgg := task.aggregateLocked(probeHourWindow, now) + hourAgg := task.aggregateLocked(time.Hour, now) task.mu.Unlock() if !agg.hasData() { @@ -189,16 +209,17 @@ func (pm *ProbeManager) GetResults(durationMs uint16) map[string]probe.Result { result := agg.result() hourAvg := hourAgg.avgResponse() + hourLoss := hourAgg.lossPercentage() if hourAgg.successCount > 0 { result = probe.Result{ result[0], hourAvg, math.Round(hourAgg.minMs*100) / 100, math.Round(hourAgg.maxMs*100) / 100, - result[3], + hourLoss, } } else { - result = probe.Result{result[0], hourAvg, 0, 0, result[3]} + result = probe.Result{result[0], hourAvg, 0, 0, hourLoss} } results[key] = result } @@ -262,8 +283,8 @@ func aggregateSamplesSince(samples []probeSample, cutoff time.Time) probeAggrega // aggregateBucketsSince aggregates minute buckets overlapping the requested window. func aggregateBucketsSince(buckets []probeBucket, cutoff, now time.Time) probeAggregate { agg := newProbeAggregate() - startMinute := cutoff.Unix() / 60 - endMinute := now.Unix() / 60 + startMinute := int32(cutoff.Unix() / 60) + endMinute := int32(now.Unix() / 60) for _, bucket := range buckets { if !bucket.filled || bucket.minute < startMinute || bucket.minute > endMinute { continue @@ -292,9 +313,9 @@ func (task *probeTask) addSampleLocked(sample probeSample) { } task.samples = append(task.samples, sample) - minute := sample.timestamp.Unix() / 60 + minute := int32(sample.timestamp.Unix() / 60) // Each slot stores one wall-clock minute, so the ring stays fixed-size at ~1h per probe. - bucket := &task.buckets[int(minute%int64(probeMinuteBucketLen))] + bucket := &task.buckets[minute%probeMinuteBucketLen] if !bucket.filled || bucket.minute != minute { bucket.minute = minute bucket.filled = true diff --git a/agent/probe_test.go b/agent/probe_test.go index 92d48f09..865a4b24 100644 --- a/agent/probe_test.go +++ b/agent/probe_test.go @@ -85,5 +85,5 @@ func TestProbeManagerGetResultsIncludesHourResponseRange(t *testing.T) { assert.Equal(t, 25.0, result[1]) assert.Equal(t, 10.0, result[2]) assert.Equal(t, 40.0, result[3]) - assert.Equal(t, 0.0, result[4]) + assert.Equal(t, 20.0, result[4]) } diff --git a/internal/entities/probe/probe.go b/internal/entities/probe/probe.go index 3a6845c4..b7dd3c46 100644 --- a/internal/entities/probe/probe.go +++ b/internal/entities/probe/probe.go @@ -20,7 +20,7 @@ type Config struct { // // 3: max response over the last hour in ms // -// 4: packet loss percentage (0-100) +// 4: packet loss percentage over the last hour (0-100) type Result []float64 // Key returns the map key used for this probe config (e.g. "icmp:1.1.1.1", "tcp:host:443", "http:https://example.com"). diff --git a/internal/hub/systems/system.go b/internal/hub/systems/system.go index 24e51f97..6c6e1496 100644 --- a/internal/hub/systems/system.go +++ b/internal/hub/systems/system.go @@ -335,7 +335,7 @@ func updateNetworkProbesRecords(app core.App, data map[string]probe.Result, syst if !realtimeActive { db = app.DB() nowString = time.Now().UTC().Format(types.DefaultDateLayout) - sql := fmt.Sprintf("UPDATE %s SET resAvg={:resAvg}, resMin1h={:resMin1h}, resMax1h={:resMax1h}, resAvg1h={:resAvg1h}, loss={:loss}, updated={:updated} WHERE id={:id}", collectionName) + sql := fmt.Sprintf("UPDATE %s SET resAvg={:resAvg}, resMin1h={:resMin1h}, resMax1h={:resMax1h}, resAvg1h={:resAvg1h}, loss1h={:loss1h}, updated={:updated} WHERE id={:id}", collectionName) updateQuery = db.NewQuery(sql) } @@ -376,7 +376,7 @@ func updateNetworkProbesRecords(app core.App, data map[string]probe.Result, syst record.Set("resAvg1h", probeMetric(values, 1)) record.Set("resMin1h", probeMetric(values, 2)) record.Set("resMax1h", probeMetric(values, 3)) - record.Set("loss", probeMetric(values, 4)) + record.Set("loss1h", probeMetric(values, 4)) err = app.SaveNoValidate(record) } default: @@ -386,7 +386,7 @@ func updateNetworkProbesRecords(app core.App, data map[string]probe.Result, syst "resAvg1h": probeMetric(values, 1), "resMin1h": probeMetric(values, 2), "resMax1h": probeMetric(values, 3), - "loss": probeMetric(values, 4), + "loss1h": probeMetric(values, 4), "updated": nowString, }).Execute() } diff --git a/internal/site/src/components/network-probes-table/network-probes-columns.tsx b/internal/site/src/components/network-probes-table/network-probes-columns.tsx index 3f6f3695..0c036d33 100644 --- a/internal/site/src/components/network-probes-table/network-probes-columns.tsx +++ b/internal/site/src/components/network-probes-table/network-probes-columns.tsx @@ -1,10 +1,9 @@ -import type { Column, ColumnDef } from "@tanstack/react-table" +import type { CellContext, Column, ColumnDef } from "@tanstack/react-table" import { Button } from "@/components/ui/button" import { cn, decimalString, hourWithSeconds } from "@/lib/utils" import { GlobeIcon, TimerIcon, - ActivityIcon, WifiOffIcon, Trash2Icon, ArrowLeftRightIcon, @@ -12,6 +11,7 @@ import { ServerIcon, ClockIcon, NetworkIcon, + RefreshCwIcon, } from "lucide-react" import { t } from "@lingui/core/macro" import type { NetworkProbeRecord } from "@/types" @@ -61,7 +61,7 @@ export function getProbeColumns(longestName = 0, longestTarget = 0): ColumnDef , cell: ({ getValue }) => { const allSystems = useStore($allSystemsById) - return {allSystems[getValue() as string]?.name ?? ""} + return {allSystems[getValue() as string]?.name ?? ""} }, }, { @@ -91,33 +91,36 @@ export function getProbeColumns(longestName = 0, longestTarget = 0): ColumnDef record.interval, - header: ({ column }) => , + header: ({ column }) => , cell: ({ getValue }) => {getValue() as number}s, }, { - id: "response", - accessorFn: (record) => record.response, + id: "res", + accessorFn: (record) => record.res, invertSorting: true, - header: ({ column }) => , - cell: ({ row }) => { - const val = row.original.response - if (!val) { - return - - } - let color = "bg-green-500" - if (val > 200) { - color = "bg-yellow-500" - } - if (val > 2000) { - color = "bg-red-500" - } - return ( - - - {decimalString(val, val < 100 ? 2 : 1).toLocaleString()} ms - - ) - }, + header: ({ column }) => , + cell: responseTimeCell, + }, + { + id: "res1h", + accessorFn: (record) => record.resAvg1h, + invertSorting: true, + header: ({ column }) => , + cell: responseTimeCell, + }, + { + id: "max1h", + accessorFn: (record) => record.resMax1h, + invertSorting: true, + header: ({ column }) => , + cell: responseTimeCell, + }, + { + id: "min1h", + accessorFn: (record) => record.resMin1h, + invertSorting: true, + header: ({ column }) => , + cell: responseTimeCell, }, { id: "loss", @@ -125,8 +128,8 @@ export function getProbeColumns(longestName = 0, longestTarget = 0): ColumnDef , cell: ({ row }) => { - const { loss, response } = row.original - if (loss === undefined || (!response && !loss)) { + const { loss, res } = row.original + if (loss === undefined || (!res && !loss)) { return - } let color = "bg-green-500" @@ -188,6 +191,25 @@ export function getProbeColumns(longestName = 0, longestTarget = 0): ColumnDef) { + const val = cell.getValue() as number | undefined + if (!val) { + return - + } + let color = "bg-green-500" + if (val > 200) { + color = "bg-yellow-500" + } + if (val > 2000) { + color = "bg-red-500" + } + return ( + + + {decimalString(val, val < 100 ? 2 : 1).toLocaleString()}ms + + ) +} function HeaderButton({ column, diff --git a/internal/site/src/components/routes/system/charts/probes-charts.tsx b/internal/site/src/components/routes/system/charts/probes-charts.tsx index b21359df..c8c25103 100644 --- a/internal/site/src/components/routes/system/charts/probes-charts.tsx +++ b/internal/site/src/components/routes/system/charts/probes-charts.tsx @@ -44,7 +44,7 @@ function ProbeChart({ const filter = useStore($filter) const { dataPoints, visibleKeys } = useMemo(() => { - const sortedProbes = [...probes].sort((a, b) => b.response - a.response) + const sortedProbes = [...probes].sort((a, b) => b.resAvg1h - a.resAvg1h) const count = sortedProbes.length const points: DataPoint[] = [] const visibleKeys: string[] = [] @@ -115,7 +115,7 @@ export function ResponseChart({ probeStats, grid, probes, chartData, empty }: Pr empty={empty} valueIndex={0} title={t`Response`} - description={t`Average round-trip time (ms)`} + description={t`Average response time (ms)`} tickFormatter={(value) => `${toFixedFloat(value, value >= 10 ? 0 : 1)} ms`} contentFormatter={({ value }) => { if (typeof value !== "number") { @@ -137,7 +137,7 @@ export function LossChart({ probeStats, grid, probes, chartData, empty }: ProbeC probes={probes} chartData={chartData} empty={empty} - valueIndex={3} + valueIndex={4} title={t`Loss`} description={t`Packet loss (%)`} domain={[0, 100]} diff --git a/internal/site/src/lib/use-network-probes.ts b/internal/site/src/lib/use-network-probes.ts index 4c09f8e0..8ac6c59d 100644 --- a/internal/site/src/lib/use-network-probes.ts +++ b/internal/site/src/lib/use-network-probes.ts @@ -32,7 +32,7 @@ function appendCacheValue( } const NETWORK_PROBE_FIELDS = - "id,name,system,target,protocol,port,interval,response,resMin1h,resMax1h,resAvg1h,loss,enabled,updated" + "id,name,system,target,protocol,port,interval,res,resMin1h,resMax1h,resAvg1h,loss,enabled,updated" interface UseNetworkProbesProps { systemId?: string @@ -254,7 +254,7 @@ function probesToStats(probes: NetworkProbeRecord[]): NetworkProbeStatsRecord["s const stats: NetworkProbeStatsRecord["stats"] = {} for (const probe of probes) { const key = probeKey(probe) - stats[key] = [probe.response, 0, 0, probe.loss] + stats[key] = [probe.res, probe.resAvg1h, probe.resMin1h, probe.resMax1h, probe.loss] } return stats } diff --git a/internal/site/src/types.d.ts b/internal/site/src/types.d.ts index 596da15b..f0f487c8 100644 --- a/internal/site/src/types.d.ts +++ b/internal/site/src/types.d.ts @@ -552,7 +552,7 @@ export interface NetworkProbeRecord { target: string protocol: "icmp" | "tcp" | "http" port: number - response: number + res: number resMin1h: number resMax1h: number resAvg1h: number