Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

* [CHANGE]
* [FEATURE]
* [ENHANCEMENT]
* [ENHANCEMENT] mountstats: Add `--collector.mountstats.nfs.aggregate-transports` flag to sum NFS transport metrics across connections and reduce cardinality from `nconnect`
* [BUGFIX]

## 1.11.1 / 2026-04-07
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -200,7 +200,7 @@ ksmd | Exposes kernel and system statistics from `/sys/kernel/mm/ksm`. | Linux
lnstat | Exposes stats from `/proc/net/stat/`. | Linux
logind | Exposes session counts from [logind](http://www.freedesktop.org/wiki/Software/systemd/logind/). | Linux
meminfo\_numa | Exposes memory statistics from `/sys/devices/system/node/node[0-9]*/meminfo`, `/sys/devices/system/node/node[0-9]*/numastat`. | Linux
mountstats | Exposes filesystem statistics from `/proc/self/mountstats`. Exposes detailed NFS client statistics. | Linux
mountstats | Exposes filesystem statistics from `/proc/self/mountstats`. Exposes detailed NFS client statistics. Use `--collector.mountstats.nfs.aggregate-transports` to sum NFS transport metrics across connections (e.g. `nconnect`) and emit a single series per mount instead of one per transport. | Linux
network_route | Exposes the routing table as metrics | Linux
pcidevice | Exposes pci devices' information including their link status and parent devices. | Linux
perf | Exposes perf based metrics (Warning: Metrics are dependent on kernel configuration and settings). | Linux
Expand Down
191 changes: 119 additions & 72 deletions collector/mountstats_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,19 @@ import (
"log/slog"
"strconv"

"github.com/alecthomas/kingpin/v2"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/procfs"
)

var (
// 64-bit float mantissa: https://en.wikipedia.org/wiki/Double-precision_floating-point_format
float64Mantissa uint64 = 9007199254740992

mountStatsNFSAggregateTransports = kingpin.Flag(
"collector.mountstats.nfs.aggregate-transports",
"Sum NFS transport statistics across all connections (e.g. nconnect) and emit a single value per mount, instead of one series per transport index.",
).Default("false").Bool()
)

type mountStatsCollector struct {
Expand Down Expand Up @@ -132,6 +138,12 @@ func NewMountStatsCollector(logger *slog.Logger) (Collector, error) {
translabels = []string{"export", "protocol", "mountaddr", "transport"}
)

// When aggregating transports (e.g. NFS nconnect) into a single series per
// mount, drop the per-transport index label to avoid cardinality explosion.
if *mountStatsNFSAggregateTransports {
translabels = labels
}

return &mountStatsCollector{
NFSAgeSecondsTotal: prometheus.NewDesc(
prometheus.BuildFQName(namespace, subsystem, "age_seconds_total"),
Expand Down Expand Up @@ -618,78 +630,13 @@ func (c *mountStatsCollector) updateNFSStats(ch chan<- prometheus.Metric, s *pro
labelValues...,
)

for i := range s.Transport {
translabelValues := []string{export, protocol, mountAddress, strconv.Itoa(i)}

ch <- prometheus.MustNewConstMetric(
c.NFSTransportBindTotal,
prometheus.CounterValue,
float64(s.Transport[i].Bind),
translabelValues...,
)

ch <- prometheus.MustNewConstMetric(
c.NFSTransportConnectTotal,
prometheus.CounterValue,
float64(s.Transport[i].Connect),
translabelValues...,
)

ch <- prometheus.MustNewConstMetric(
c.NFSTransportIdleTimeSeconds,
prometheus.GaugeValue,
float64(s.Transport[i].IdleTimeSeconds%float64Mantissa),
translabelValues...,
)

ch <- prometheus.MustNewConstMetric(
c.NFSTransportSendsTotal,
prometheus.CounterValue,
float64(s.Transport[i].Sends),
translabelValues...,
)

ch <- prometheus.MustNewConstMetric(
c.NFSTransportReceivesTotal,
prometheus.CounterValue,
float64(s.Transport[i].Receives),
translabelValues...,
)

ch <- prometheus.MustNewConstMetric(
c.NFSTransportBadTransactionIDsTotal,
prometheus.CounterValue,
float64(s.Transport[i].BadTransactionIDs),
translabelValues...,
)

ch <- prometheus.MustNewConstMetric(
c.NFSTransportBacklogQueueTotal,
prometheus.CounterValue,
float64(s.Transport[i].CumulativeBacklog),
translabelValues...,
)

ch <- prometheus.MustNewConstMetric(
c.NFSTransportMaximumRPCSlots,
prometheus.GaugeValue,
float64(s.Transport[i].MaximumRPCSlotsUsed),
translabelValues...,
)

ch <- prometheus.MustNewConstMetric(
c.NFSTransportSendingQueueTotal,
prometheus.CounterValue,
float64(s.Transport[i].CumulativeSendingQueue),
translabelValues...,
)

ch <- prometheus.MustNewConstMetric(
c.NFSTransportPendingQueueTotal,
prometheus.CounterValue,
float64(s.Transport[i].CumulativePendingQueue),
translabelValues...,
)
if *mountStatsNFSAggregateTransports {
c.emitAggregatedTransportStats(ch, s.Transport, labelValues)
} else {
for i := range s.Transport {
translabelValues := []string{export, protocol, mountAddress, strconv.Itoa(i)}
c.emitTransportStats(ch, &s.Transport[i], translabelValues)
}
}

for _, op := range s.Operations {
Expand Down Expand Up @@ -934,3 +881,103 @@ func (c *mountStatsCollector) updateNFSStats(ch chan<- prometheus.Metric, s *pro
labelValues...,
)
}

func (c *mountStatsCollector) emitTransportStats(ch chan<- prometheus.Metric, t *procfs.NFSTransportStats, labelValues []string) {
ch <- prometheus.MustNewConstMetric(
c.NFSTransportBindTotal,
prometheus.CounterValue,
float64(t.Bind),
labelValues...,
)

ch <- prometheus.MustNewConstMetric(
c.NFSTransportConnectTotal,
prometheus.CounterValue,
float64(t.Connect),
labelValues...,
)

ch <- prometheus.MustNewConstMetric(
c.NFSTransportIdleTimeSeconds,
prometheus.GaugeValue,
float64(t.IdleTimeSeconds%float64Mantissa),
labelValues...,
)

ch <- prometheus.MustNewConstMetric(
c.NFSTransportSendsTotal,
prometheus.CounterValue,
float64(t.Sends),
labelValues...,
)

ch <- prometheus.MustNewConstMetric(
c.NFSTransportReceivesTotal,
prometheus.CounterValue,
float64(t.Receives),
labelValues...,
)

ch <- prometheus.MustNewConstMetric(
c.NFSTransportBadTransactionIDsTotal,
prometheus.CounterValue,
float64(t.BadTransactionIDs),
labelValues...,
)

ch <- prometheus.MustNewConstMetric(
c.NFSTransportBacklogQueueTotal,
prometheus.CounterValue,
float64(t.CumulativeBacklog),
labelValues...,
)

ch <- prometheus.MustNewConstMetric(
c.NFSTransportMaximumRPCSlots,
prometheus.GaugeValue,
float64(t.MaximumRPCSlotsUsed),
labelValues...,
)

ch <- prometheus.MustNewConstMetric(
c.NFSTransportSendingQueueTotal,
prometheus.CounterValue,
float64(t.CumulativeSendingQueue),
labelValues...,
)

ch <- prometheus.MustNewConstMetric(
c.NFSTransportPendingQueueTotal,
prometheus.CounterValue,
float64(t.CumulativePendingQueue),
labelValues...,
)
}

func (c *mountStatsCollector) emitAggregatedTransportStats(ch chan<- prometheus.Metric, transports []procfs.NFSTransportStats, labelValues []string) {
if len(transports) == 0 {
return
}
agg := procfs.NFSTransportStats{
// Gauge: time since the mount last saw any RPC traffic on *any*
// transport — the minimum across all transports.
IdleTimeSeconds: transports[0].IdleTimeSeconds,
}
for i := range transports {
agg.Bind += transports[i].Bind
agg.Connect += transports[i].Connect
agg.Sends += transports[i].Sends
agg.Receives += transports[i].Receives
agg.BadTransactionIDs += transports[i].BadTransactionIDs
agg.CumulativeBacklog += transports[i].CumulativeBacklog
agg.CumulativeSendingQueue += transports[i].CumulativeSendingQueue
agg.CumulativePendingQueue += transports[i].CumulativePendingQueue
if transports[i].IdleTimeSeconds < agg.IdleTimeSeconds {
agg.IdleTimeSeconds = transports[i].IdleTimeSeconds
}
if transports[i].MaximumRPCSlotsUsed > agg.MaximumRPCSlotsUsed {
agg.MaximumRPCSlotsUsed = transports[i].MaximumRPCSlotsUsed
}
}
c.emitTransportStats(ch, &agg, labelValues)
}