This commit is contained in:
henrygd
2026-04-26 17:19:15 -04:00
parent af49ebf2df
commit f830665984
8 changed files with 113 additions and 91 deletions

View File

@@ -31,7 +31,7 @@ import (
const (
// probeRawRetention is the duration to keep individual samples for high-precision short-term requests
probeRawRetention = 80 * time.Second
probeRawRetention = 70 * time.Second
// probeMinuteBucketLen is the number of 1-minute buckets to keep (1 hour + 1 for partials)
probeMinuteBucketLen int32 = 61
)
@@ -54,7 +54,7 @@ type probeTask struct {
// probeSample stores one probe attempt and its collection time.
type probeSample struct {
responseMs float64 // -1 means loss
responseUs int64 // -1 means loss
timestamp time.Time
}
@@ -67,11 +67,11 @@ type probeBucket struct {
// probeAggregate accumulates successful response stats and total sample counts.
type probeAggregate struct {
sumMs float64
minMs float64
maxMs float64
totalCount int
successCount int
sumUs int64
minUs int64
maxUs int64
totalCount int64
successCount int64
}
func newProbeManager() *ProbeManager {
@@ -104,22 +104,22 @@ func newProbeTaskFromExisting(config probe.Config, existing *probeTask) *probeTa
// newProbeAggregate initializes an aggregate with an unset minimum value.
func newProbeAggregate() probeAggregate {
return probeAggregate{minMs: math.MaxFloat64}
return probeAggregate{minUs: math.MaxInt64}
}
// addResponse folds a single probe sample into the aggregate.
func (agg *probeAggregate) addResponse(responseMs float64) {
func (agg *probeAggregate) addResponse(responseUs int64) {
agg.totalCount++
if responseMs < 0 {
if responseUs < 0 {
return
}
agg.successCount++
agg.sumMs += responseMs
if responseMs < agg.minMs {
agg.minMs = responseMs
agg.sumUs += responseUs
if responseUs < agg.minUs {
agg.minUs = responseUs
}
if responseMs > agg.maxMs {
agg.maxMs = responseMs
if responseUs > agg.maxUs {
agg.maxUs = responseUs
}
}
@@ -130,15 +130,15 @@ func (agg *probeAggregate) addAggregate(other probeAggregate) {
}
agg.totalCount += other.totalCount
agg.successCount += other.successCount
agg.sumMs += other.sumMs
agg.sumUs += other.sumUs
if other.successCount == 0 {
return
}
if agg.minMs == math.MaxFloat64 || other.minMs < agg.minMs {
agg.minMs = other.minMs
if agg.minUs == math.MaxInt64 || other.minUs < agg.minUs {
agg.minUs = other.minUs
}
if other.maxMs > agg.maxMs {
agg.maxMs = other.maxMs
if other.maxUs > agg.maxUs {
agg.maxUs = other.maxUs
}
}
@@ -150,14 +150,14 @@ func (agg probeAggregate) hasData() bool {
// result converts the aggregate into the probe result slice format.
func (agg probeAggregate) result() probe.Result {
avg := agg.avgResponse()
minMs := 0.0
minUs := 0.0
if agg.successCount > 0 {
minMs = math.Round(agg.minMs*100) / 100
minUs = float64(agg.minUs)
}
return probe.Result{
avg,
minMs,
math.Round(agg.maxMs*100) / 100,
minUs,
float64(agg.maxUs),
agg.lossPercentage(),
}
}
@@ -167,7 +167,8 @@ func (agg probeAggregate) avgResponse() float64 {
if agg.successCount == 0 {
return 0
}
return math.Round(agg.sumMs/float64(agg.successCount)*100) / 100
return float64(agg.sumUs / agg.successCount)
}
// lossPercentage returns the rounded failure rate for the aggregate.
@@ -406,8 +407,8 @@ func (task *probeTask) resultLocked(duration time.Duration, now time.Time) (prob
return probe.Result{
result[0],
hourAvg,
math.Round(hourAgg.minMs*100) / 100,
math.Round(hourAgg.maxMs*100) / 100,
float64(hourAgg.minUs),
float64(hourAgg.maxUs),
hourLoss,
}, true
}
@@ -421,7 +422,7 @@ func aggregateSamplesSince(samples []probeSample, cutoff time.Time) probeAggrega
if sample.timestamp.Before(cutoff) {
continue
}
agg.addResponse(sample.responseMs)
agg.addResponse(sample.responseUs)
}
return agg
}
@@ -467,27 +468,27 @@ func (task *probeTask) addSampleLocked(sample probeSample) {
bucket.filled = true
bucket.stats = newProbeAggregate()
}
bucket.stats.addResponse(sample.responseMs)
bucket.stats.addResponse(sample.responseUs)
}
// executeProbe runs the configured probe and records the sample.
func (pm *ProbeManager) executeProbe(task *probeTask) {
var responseMs float64
var responseUs int64
switch task.config.Protocol {
case "icmp":
responseMs = probeICMP(task.config.Target)
responseUs = probeICMP(task.config.Target)
case "tcp":
responseMs = probeTCP(task.config.Target, task.config.Port)
responseUs = probeTCP(task.config.Target, task.config.Port)
case "http":
responseMs = probeHTTP(pm.httpClient, task.config.Target)
responseUs = probeHTTP(pm.httpClient, task.config.Target)
default:
slog.Warn("unknown probe protocol", "protocol", task.config.Protocol)
return
}
sample := probeSample{
responseMs: responseMs,
responseUs: responseUs,
timestamp: time.Now(),
}
@@ -498,7 +499,7 @@ func (pm *ProbeManager) executeProbe(task *probeTask) {
// probeTCP measures pure TCP handshake response (excluding DNS resolution).
// Returns -1 on failure.
func probeTCP(target string, port uint16) float64 {
func probeTCP(target string, port uint16) int64 {
// Resolve DNS first, outside the timing window
ips, err := net.LookupHost(target)
if err != nil || len(ips) == 0 {
@@ -513,11 +514,11 @@ func probeTCP(target string, port uint16) float64 {
return -1
}
conn.Close()
return float64(time.Since(start).Microseconds()) / 1000.0
return time.Since(start).Microseconds()
}
// probeHTTP measures HTTP GET request response. Returns -1 on failure.
func probeHTTP(client *http.Client, url string) float64 {
// probeHTTP measures HTTP GET request response in microseconds. Returns -1 on failure.
func probeHTTP(client *http.Client, url string) int64 {
if client == nil {
client = http.DefaultClient
}
@@ -530,5 +531,5 @@ func probeHTTP(client *http.Client, url string) float64 {
if resp.StatusCode >= 400 {
return -1
}
return float64(time.Since(start).Microseconds()) / 1000.0
return time.Since(start).Microseconds()
}

View File

@@ -1,6 +1,7 @@
package agent
import (
"math"
"net"
"os"
"os/exec"
@@ -75,8 +76,8 @@ var (
// Supports both IPv4 and IPv6 targets. The ICMP method (raw socket,
// unprivileged datagram, or exec fallback) is detected once per address
// family and cached for subsequent probes.
// Returns response in milliseconds, or -1 on failure.
func probeICMP(target string) float64 {
// Returns response in microseconds, or -1 on failure.
func probeICMP(target string) int64 {
family, ip := resolveICMPTarget(target)
if family == nil {
return -1
@@ -150,7 +151,7 @@ func detectICMPMode(family *icmpFamily, listen func(network, listenAddr string)
}
// probeICMPNative sends an ICMP echo request using Go's x/net/icmp package.
func probeICMPNative(network string, family *icmpFamily, dst net.Addr) float64 {
func probeICMPNative(network string, family *icmpFamily, dst net.Addr) int64 {
conn, err := icmp.ListenPacket(network, family.listenAddr)
if err != nil {
return -1
@@ -194,14 +195,14 @@ func probeICMPNative(network string, family *icmpFamily, dst net.Addr) float64 {
}
if reply.Type == family.replyType {
return float64(time.Since(start).Microseconds()) / 1000.0
return time.Since(start).Microseconds()
}
// Ignore non-echo-reply messages (e.g. destination unreachable) and keep reading
}
}
// probeICMPExec falls back to the system ping command. Returns -1 on failure.
func probeICMPExec(target string, isIPv6 bool) float64 {
func probeICMPExec(target string, isIPv6 bool) int64 {
var cmd *exec.Cmd
switch runtime.GOOS {
case "windows":
@@ -230,13 +231,13 @@ func probeICMPExec(target string, isIPv6 bool) float64 {
matches := pingTimeRegex.FindSubmatch(output)
if len(matches) >= 2 {
if ms, err := strconv.ParseFloat(string(matches[1]), 64); err == nil {
return ms
return int64(math.Round(ms * 1000))
}
}
// Fallback: use wall clock time if ping succeeded but parsing failed
if err == nil {
return float64(time.Since(start).Microseconds()) / 1000.0
return time.Since(start).Microseconds()
}
return -1
}

View File

@@ -16,9 +16,9 @@ func TestProbeTaskAggregateLockedUsesRawSamplesForShortWindows(t *testing.T) {
now := time.Date(2026, time.April, 21, 12, 0, 0, 0, time.UTC)
task := &probeTask{}
task.addSampleLocked(probeSample{responseMs: 10, timestamp: now.Add(-90 * time.Second)})
task.addSampleLocked(probeSample{responseMs: 20, timestamp: now.Add(-30 * time.Second)})
task.addSampleLocked(probeSample{responseMs: -1, timestamp: now.Add(-10 * time.Second)})
task.addSampleLocked(probeSample{responseUs: 10, timestamp: now.Add(-90 * time.Second)})
task.addSampleLocked(probeSample{responseUs: 20, timestamp: now.Add(-30 * time.Second)})
task.addSampleLocked(probeSample{responseUs: -1, timestamp: now.Add(-10 * time.Second)})
agg := task.aggregateLocked(time.Minute, now)
require.True(t, agg.hasData())
@@ -34,11 +34,11 @@ func TestProbeTaskAggregateLockedUsesMinuteBucketsForLongWindows(t *testing.T) {
now := time.Date(2026, time.April, 21, 12, 0, 30, 0, time.UTC)
task := &probeTask{}
task.addSampleLocked(probeSample{responseMs: 10, timestamp: now.Add(-11 * time.Minute)})
task.addSampleLocked(probeSample{responseMs: 20, timestamp: now.Add(-9 * time.Minute)})
task.addSampleLocked(probeSample{responseMs: 40, timestamp: now.Add(-5 * time.Minute)})
task.addSampleLocked(probeSample{responseMs: -1, timestamp: now.Add(-90 * time.Second)})
task.addSampleLocked(probeSample{responseMs: 30, timestamp: now.Add(-30 * time.Second)})
task.addSampleLocked(probeSample{responseUs: 10, timestamp: now.Add(-11 * time.Minute)})
task.addSampleLocked(probeSample{responseUs: 20, timestamp: now.Add(-9 * time.Minute)})
task.addSampleLocked(probeSample{responseUs: 40, timestamp: now.Add(-5 * time.Minute)})
task.addSampleLocked(probeSample{responseUs: -1, timestamp: now.Add(-90 * time.Second)})
task.addSampleLocked(probeSample{responseUs: 30, timestamp: now.Add(-30 * time.Second)})
agg := task.aggregateLocked(10*time.Minute, now)
require.True(t, agg.hasData())
@@ -54,11 +54,11 @@ func TestProbeTaskAddSampleLockedTrimsRawSamplesButKeepsBucketHistory(t *testing
now := time.Date(2026, time.April, 21, 12, 0, 0, 0, time.UTC)
task := &probeTask{}
task.addSampleLocked(probeSample{responseMs: 10, timestamp: now.Add(-10 * time.Minute)})
task.addSampleLocked(probeSample{responseMs: 20, timestamp: now})
task.addSampleLocked(probeSample{responseUs: 10, timestamp: now.Add(-10 * time.Minute)})
task.addSampleLocked(probeSample{responseUs: 20, timestamp: now})
require.Len(t, task.samples, 1)
assert.Equal(t, 20.0, task.samples[0].responseMs)
assert.Equal(t, int64(20), task.samples[0].responseUs)
agg := task.aggregateLocked(10*time.Minute, now)
require.True(t, agg.hasData())
@@ -73,11 +73,11 @@ func TestProbeTaskAddSampleLockedTrimsRawSamplesButKeepsBucketHistory(t *testing
func TestProbeManagerGetResultsIncludesHourResponseRange(t *testing.T) {
now := time.Now().UTC()
task := &probeTask{config: probe.Config{ID: "probe-1"}}
task.addSampleLocked(probeSample{responseMs: 10, timestamp: now.Add(-30 * time.Minute)})
task.addSampleLocked(probeSample{responseMs: 20, timestamp: now.Add(-9 * time.Minute)})
task.addSampleLocked(probeSample{responseMs: 40, timestamp: now.Add(-5 * time.Minute)})
task.addSampleLocked(probeSample{responseMs: -1, timestamp: now.Add(-90 * time.Second)})
task.addSampleLocked(probeSample{responseMs: 30, timestamp: now.Add(-30 * time.Second)})
task.addSampleLocked(probeSample{responseUs: 10, timestamp: now.Add(-30 * time.Minute)})
task.addSampleLocked(probeSample{responseUs: 20, timestamp: now.Add(-9 * time.Minute)})
task.addSampleLocked(probeSample{responseUs: 40, timestamp: now.Add(-5 * time.Minute)})
task.addSampleLocked(probeSample{responseUs: -1, timestamp: now.Add(-90 * time.Second)})
task.addSampleLocked(probeSample{responseUs: 30, timestamp: now.Add(-30 * time.Second)})
pm := &ProbeManager{probes: map[string]*probeTask{"icmp:example.com": task}}
@@ -95,8 +95,8 @@ func TestProbeManagerGetResultsIncludesHourResponseRange(t *testing.T) {
func TestProbeManagerGetResultsIncludesLossOnlyHourData(t *testing.T) {
now := time.Now().UTC()
task := &probeTask{config: probe.Config{ID: "probe-1"}}
task.addSampleLocked(probeSample{responseMs: -1, timestamp: now.Add(-30 * time.Second)})
task.addSampleLocked(probeSample{responseMs: -1, timestamp: now.Add(-10 * time.Second)})
task.addSampleLocked(probeSample{responseUs: -1, timestamp: now.Add(-30 * time.Second)})
task.addSampleLocked(probeSample{responseUs: -1, timestamp: now.Add(-10 * time.Second)})
pm := &ProbeManager{probes: map[string]*probeTask{"icmp:example.com": task}}
@@ -222,8 +222,8 @@ func TestProbeManagerUpsertProbeKeepsHistoryWhenOnlyIntervalChanges(t *testing.T
now := time.Now().UTC()
existingTask := &probeTask{config: originalCfg, cancel: make(chan struct{})}
existingTask.addSampleLocked(probeSample{responseMs: 12, timestamp: now.Add(-50 * time.Minute)})
existingTask.addSampleLocked(probeSample{responseMs: 24, timestamp: now.Add(-30 * time.Second)})
existingTask.addSampleLocked(probeSample{responseUs: 12, timestamp: now.Add(-50 * time.Minute)})
existingTask.addSampleLocked(probeSample{responseUs: 24, timestamp: now.Add(-30 * time.Second)})
pm := &ProbeManager{
probes: map[string]*probeTask{originalCfg.ID: existingTask},
@@ -243,7 +243,7 @@ func TestProbeManagerUpsertProbeKeepsHistoryWhenOnlyIntervalChanges(t *testing.T
updatedTask.mu.Lock()
defer updatedTask.mu.Unlock()
require.Len(t, updatedTask.samples, 1)
assert.Equal(t, 24.0, updatedTask.samples[0].responseMs)
assert.Equal(t, int64(24), updatedTask.samples[0].responseUs)
agg := updatedTask.aggregateLocked(time.Hour, now)
require.True(t, agg.hasData())
@@ -296,8 +296,8 @@ func TestProbeHTTP(t *testing.T) {
}))
defer server.Close()
responseMs := probeHTTP(server.Client(), server.URL)
assert.GreaterOrEqual(t, responseMs, 0.0)
responseUs := probeHTTP(server.Client(), server.URL)
assert.GreaterOrEqual(t, responseUs, int64(0))
})
t.Run("server error", func(t *testing.T) {
@@ -306,7 +306,7 @@ func TestProbeHTTP(t *testing.T) {
}))
defer server.Close()
assert.Equal(t, -1.0, probeHTTP(server.Client(), server.URL))
assert.Equal(t, int64(-1), probeHTTP(server.Client(), server.URL))
})
}
@@ -326,8 +326,8 @@ func TestProbeTCP(t *testing.T) {
}()
port := uint16(listener.Addr().(*net.TCPAddr).Port)
responseMs := probeTCP("127.0.0.1", port)
assert.GreaterOrEqual(t, responseMs, 0.0)
responseUs := probeTCP("127.0.0.1", port)
assert.GreaterOrEqual(t, responseUs, int64(0))
<-accepted
})
@@ -338,6 +338,6 @@ func TestProbeTCP(t *testing.T) {
port := uint16(listener.Addr().(*net.TCPAddr).Port)
require.NoError(t, listener.Close())
assert.Equal(t, -1.0, probeTCP("127.0.0.1", port))
assert.Equal(t, int64(-1), probeTCP("127.0.0.1", port))
})
}

View File

@@ -36,13 +36,13 @@ type SyncResponse struct {
// Result holds aggregated probe results for a single target.
//
// 0: avg response in ms
// 0: avg response in microseconds
//
// 1: average response over the last hour in ms
// 1: average response over the last hour in microseconds
//
// 2: min response over the last hour in ms
// 2: min response over the last hour in microseconds
//
// 3: max response over the last hour in ms
// 3: max response over the last hour in microseconds
//
// 4: packet loss percentage over the last hour (0-100)
type Result []float64

View File

@@ -1,6 +1,6 @@
import type { CellContext, Column, ColumnDef } from "@tanstack/react-table"
import { Button } from "@/components/ui/button"
import { cn, decimalString, hourWithSeconds } from "@/lib/utils"
import { cn, formatMicroseconds, hourWithSeconds } from "@/lib/utils"
import {
GlobeIcon,
TimerIcon,
@@ -290,9 +290,9 @@ export function getProbeColumns(
}
const responseTimeThresholds = {
http: { warning: 800, critical: 3000 },
tcp: { warning: 500, critical: 2000 },
icmp: { warning: 100, critical: 500 },
http: { warning: 800_000, critical: 3_000_000 },
tcp: { warning: 500_000, critical: 2_000_000 },
icmp: { warning: 100_000, critical: 500_000 },
}
function responseTimeCell(cell: CellContext<NetworkProbeRecord, unknown>) {
@@ -317,7 +317,7 @@ function responseTimeCell(cell: CellContext<NetworkProbeRecord, unknown>) {
return (
<span className="ms-1.5 tabular-nums flex gap-2 items-center">
<span className={cn("shrink-0 size-2 rounded-full", color)} />
{decimalString(responseTime, responseTime < 100 ? 2 : 1).toLocaleString()}ms
{formatMicroseconds(responseTime)}
</span>
)
}

View File

@@ -1,6 +1,6 @@
import LineChartDefault from "@/components/charts/line-chart"
import type { DataPoint } from "@/components/charts/line-chart"
import { toFixedFloat, decimalString } from "@/lib/utils"
import { decimalString, formatMicroseconds, toFixedFloat } from "@/lib/utils"
import { useLingui } from "@lingui/react/macro"
import { ChartCard, FilterBar } from "../chart-card"
import type { ChartData, NetworkProbeRecord, NetworkProbeStatsRecord } from "@/types"
@@ -116,13 +116,13 @@ export function ResponseChart({ probeStats, grid, probes, chartData, empty }: Pr
empty={empty}
valueIndex={0}
title={t`Response`}
description={t`Average response time (ms)`}
tickFormatter={(value) => `${toFixedFloat(value, value >= 10 ? 0 : 1)} ms`}
description={t`Average response time`}
tickFormatter={(value) => formatMicroseconds(value, false)}
contentFormatter={({ value }) => {
if (typeof value !== "number") {
return value
}
return `${decimalString(value, 2)} ms`
return formatMicroseconds(value)
}}
/>
)

View File

@@ -199,6 +199,26 @@ export function decimalString(num: number, digits = 2) {
return formatter.format(num)
}
export function formatMicroseconds(microseconds: number, showDigits = true): string {
if (!Number.isFinite(microseconds)) {
return "-"
}
if (microseconds < 1000) {
return `${microseconds}μs`
}
if (microseconds < 1_000_000) {
const milliseconds = microseconds / 1000
const digits = milliseconds >= 10 ? 1 : 2
return `${decimalString(milliseconds, showDigits ? digits : 0)}ms`
}
const seconds = microseconds / 1_000_000
const digits = seconds >= 10 ? 1 : 2
return `${decimalString(seconds, showDigits ? digits : 0)}s`
}
/** Get value from local or session storage */
function getStorageValue(key: string, defaultValue: unknown, storageInterface: Storage = localStorage) {
const saved = storageInterface?.getItem(key)

View File

@@ -563,15 +563,15 @@ export interface NetworkProbeRecord {
}
/**
* 0: avg 1 minute response in ms
* 0: avg 1 minute response in microseconds
*
* 1: avg response over 1 hour in ms
* 1: avg response over 1 hour in microseconds
*
* 2: min response over the last hour in ms
* 2: min response over the last hour in microseconds
*
* 3: max response over the last hour in ms
* 3: max response over the last hour in microseconds
*
* 4: packet loss in %
* 4: packet loss over 1 hour in %
*/
type ProbeResult = number[]