Skip to content

Commit 41626bb

Browse files
Codexclaude
andcommitted
feat(gpu): add full 1/6/12/24h stats for temp, power, PCIe and unified-memory support
- What: Add PCIe TX/RX history (RingBuffer + TimeWindowAggregator) to GpuHistory, recording per-tick throughput when available. - What: Expand GPU Detail stats to 2 lines per time window showing: util avg/max, temp avg/max, mem avg/max, power avg/max with cumulative energy (kWh), and PCIe TX/RX avg/max (discrete GPUs only). - What: Handle unified-memory GPUs (DGX Spark GB10, GH200) where NVML reports utilization_memory=0% and mem_clock=N/A. Uses known-specs lookup table for theoretical bandwidth and shows "no HW counter" instead of misleading 0 GB/s. - What: Add memory_type_label() for unified-memory type identification (LPDDR5X/LPDDR5/LPDDR4x). - What: Update README with expanded GPU monitoring metrics. - Why: Provide complete historical analysis of GPU thermal/power/PCIe behavior and honest bandwidth reporting on unified-memory architectures. Tests: cargo fmt, clippy, test all pass. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 099b9b1 commit 41626bb

3 files changed

Lines changed: 209 additions & 66 deletions

File tree

README.md

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -37,9 +37,9 @@ See [Installation](#installation) for more options.
3737
|----------|---------|
3838
| **Utilization** | GPU %, Memory controller %, per-process SM utilization |
3939
| **Memory** | VRAM used/total/free, BAR1 usage, per-process GPU memory |
40-
| **Bandwidth** | Memory bandwidth utilization (actual/theoretical GB/s), PCIe TX/RX throughput |
41-
| **Thermal** | Temperature (with slowdown/shutdown thresholds), fan speed |
42-
| **Power** | Draw/limit (watts), usage %, total energy consumption (kWh) |
40+
| **Bandwidth** | Memory bandwidth utilization (actual/theoretical GB/s), unified-memory auto-detection (LPDDR5X), PCIe TX/RX throughput with 1/6/12/24h avg/max stats |
41+
| **Thermal** | Temperature (with slowdown/shutdown thresholds), fan speed, 1/6/12/24h avg/max |
42+
| **Power** | Draw/limit (watts), usage %, total energy consumption (kWh), 1/6/12/24h avg/max/cumulative energy |
4343
| **Clock** | Graphics, SM, Memory, Video frequencies (current/max MHz) |
4444
| **State** | Performance state (P0–P15), throttle reasons, compute mode, persistence mode |
4545
| **Health** | ECC errors (corrected/uncorrected), retired pages (SBE/DBE) |
@@ -59,7 +59,7 @@ See [Installation](#installation) for more options.
5959
### Time-Series Analytics
6060

6161
- **Line charts** for GPU utilization, memory, temperature, and power with Braille markers
62-
- **1/6/12/24h statistics**average and max for CPU, Memory, GPU; avg R/W for Disk; cumulative ↓/↑ for Network
62+
- **1/6/12/24h statistics**avg/max for CPU, Memory, GPU utilization, temperature, power (with cumulative kWh), memory, PCIe TX/RX; avg R/W for Disk; cumulative ↓/↑ for Network
6363
- **Progressive display** — stats windows only appear after enough data has been collected
6464
- **Minute-resolution aggregation** — memory-efficient 24h storage (~28KB per metric)
6565

src/domain/gpu.rs

Lines changed: 87 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -72,20 +72,83 @@ impl GpuStats {
7272
}
7373

7474
/// Theoretical peak memory bandwidth in GB/s.
75-
/// Formula: mem_clock_mhz * bus_width_bits * 2 (DDR) / 8 / 1000
75+
///
76+
/// For discrete GPUs: `mem_clock_mhz * bus_width_bits * 2 (DDR) / 8 / 1000`.
77+
/// For unified-memory GPUs (GH200, GB10, etc.): uses known specs lookup
78+
/// since NVML does not report mem clock or bus width on these architectures.
7679
pub fn theoretical_mem_bandwidth_gbps(&self) -> Option<f64> {
77-
let bus_width = self.memory_bus_width_bits? as f64;
78-
if self.clock_memory_mhz <= 0.0 || bus_width <= 0.0 {
79-
return None;
80+
// Try standard formula first (discrete GPUs)
81+
if let Some(bus_width) = self.memory_bus_width_bits {
82+
let bw = bus_width as f64;
83+
if self.clock_memory_mhz > 0.0 && bw > 0.0 {
84+
return Some(self.clock_memory_mhz * bw * 2.0 / 8.0 / 1000.0);
85+
}
86+
}
87+
88+
// Unified-memory fallback: lookup by GPU name
89+
if self.memory_is_shared {
90+
return self.unified_memory_bandwidth_gbps();
8091
}
81-
Some(self.clock_memory_mhz * bus_width * 2.0 / 8.0 / 1000.0)
92+
93+
None
8294
}
8395

8496
/// Estimated actual memory bandwidth in GB/s based on utilization.
97+
/// Returns None on unified-memory GPUs where NVML always reports 0%.
8598
pub fn actual_mem_bandwidth_gbps(&self) -> Option<f64> {
8699
let theoretical = self.theoretical_mem_bandwidth_gbps()?;
100+
// On unified memory, NVML reports utilization_memory = 0 always.
101+
// Return None so the UI knows the value is not meaningful.
102+
if self.memory_is_shared && self.utilization_memory <= 0.0 {
103+
return None;
104+
}
87105
Some(theoretical * self.utilization_memory / 100.0)
88106
}
107+
108+
/// Known memory bandwidth for unified-memory GPU models.
109+
/// These GPUs use LPDDR5X shared with the CPU; NVML cannot report
110+
/// mem clock or bus width, so we use published specs.
111+
fn unified_memory_bandwidth_gbps(&self) -> Option<f64> {
112+
let lower = self.name.to_ascii_lowercase();
113+
// DGX Spark / Jetson Thor — GB10 (Blackwell) with LPDDR5X
114+
if lower.contains("gb10") {
115+
return Some(273.0); // 128-bit LPDDR5X-8533
116+
}
117+
// GH200 Grace Hopper — LPDDR5X
118+
if lower.contains("gh200") || lower.contains("grace hopper") {
119+
return Some(546.0); // 512 GB LPDDR5X
120+
}
121+
// GH100 in Grace Hopper Superchip
122+
if lower.contains("gh100") {
123+
return Some(546.0);
124+
}
125+
// Jetson AGX Orin — LPDDR5
126+
if lower.contains("orin") {
127+
return Some(204.8); // 256-bit LPDDR5
128+
}
129+
// Jetson AGX Xavier — LPDDR4x
130+
if lower.contains("xavier") && !lower.contains("nx") {
131+
return Some(136.5);
132+
}
133+
None
134+
}
135+
136+
/// Descriptive string for the memory type.
137+
pub fn memory_type_label(&self) -> &'static str {
138+
if !self.memory_is_shared {
139+
return "GDDR/HBM";
140+
}
141+
let lower = self.name.to_ascii_lowercase();
142+
if lower.contains("gb10") || lower.contains("gh200") || lower.contains("gh100") {
143+
"LPDDR5X (Unified)"
144+
} else if lower.contains("orin") {
145+
"LPDDR5 (Unified)"
146+
} else if lower.contains("xavier") {
147+
"LPDDR4x (Unified)"
148+
} else {
149+
"Unified Memory"
150+
}
151+
}
89152
}
90153

91154
/// Per-GPU history buffers for chart/sparkline rendering and long-term aggregation.
@@ -95,10 +158,14 @@ pub struct GpuHistory {
95158
pub temperature: RingBuffer,
96159
pub power: RingBuffer,
97160
pub memory_usage: RingBuffer,
161+
pub pcie_tx: RingBuffer,
162+
pub pcie_rx: RingBuffer,
98163
pub utilization_agg: TimeWindowAggregator,
99164
pub temperature_agg: TimeWindowAggregator,
100165
pub power_agg: TimeWindowAggregator,
101166
pub memory_agg: TimeWindowAggregator,
167+
pub pcie_tx_agg: TimeWindowAggregator,
168+
pub pcie_rx_agg: TimeWindowAggregator,
102169
}
103170

104171
impl GpuHistory {
@@ -108,10 +175,14 @@ impl GpuHistory {
108175
temperature: RingBuffer::new(capacity),
109176
power: RingBuffer::new(capacity),
110177
memory_usage: RingBuffer::new(capacity),
178+
pcie_tx: RingBuffer::new(capacity),
179+
pcie_rx: RingBuffer::new(capacity),
111180
utilization_agg: TimeWindowAggregator::new(),
112181
temperature_agg: TimeWindowAggregator::new(),
113182
power_agg: TimeWindowAggregator::new(),
114183
memory_agg: TimeWindowAggregator::new(),
184+
pcie_tx_agg: TimeWindowAggregator::new(),
185+
pcie_rx_agg: TimeWindowAggregator::new(),
115186
}
116187
}
117188

@@ -130,6 +201,17 @@ impl GpuHistory {
130201
self.temperature_agg.push(temp);
131202
self.power_agg.push(power);
132203
self.memory_agg.push(mem);
204+
205+
if let Some(tx) = stats.pcie_tx_bytes_per_sec {
206+
let tx_f = tx as f64;
207+
self.pcie_tx.push(tx_f);
208+
self.pcie_tx_agg.push(tx_f);
209+
}
210+
if let Some(rx) = stats.pcie_rx_bytes_per_sec {
211+
let rx_f = rx as f64;
212+
self.pcie_rx.push(rx_f);
213+
self.pcie_rx_agg.push(rx_f);
214+
}
133215
}
134216
}
135217

src/ui/views/gpu_detail.rs

Lines changed: 118 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -105,22 +105,40 @@ fn render_metrics_column(
105105
theme,
106106
));
107107

108+
// Memory type (show for unified memory)
109+
if gpu.memory_is_shared {
110+
lines.push(metric_line(
111+
"Type",
112+
gpu.memory_type_label(),
113+
theme.text_dim,
114+
theme,
115+
));
116+
}
117+
108118
// Memory bandwidth
109-
lines.push(metric_line(
110-
"MemBW",
111-
&format!(
112-
"{:.0}% util{}",
113-
gpu.utilization_memory,
114-
gpu.actual_mem_bandwidth_gbps()
115-
.map(|bw| format!(
116-
" ~{bw:.0}/{:.0} GB/s",
117-
gpu.theoretical_mem_bandwidth_gbps().unwrap_or(0.0)
118-
))
119-
.unwrap_or_default()
120-
),
121-
theme.percent_color(gpu.utilization_memory),
122-
theme,
123-
));
119+
let bw_str = match (
120+
gpu.actual_mem_bandwidth_gbps(),
121+
gpu.theoretical_mem_bandwidth_gbps(),
122+
) {
123+
(Some(actual), Some(peak)) => {
124+
// Discrete GPU: NVML reports real utilization
125+
format!(
126+
"{:.0}% util ~{actual:.0}/{peak:.0} GB/s",
127+
gpu.utilization_memory
128+
)
129+
}
130+
(None, Some(peak)) if gpu.memory_is_shared => {
131+
// Unified memory: NVML cannot measure real-time bandwidth
132+
format!("peak {peak:.0} GB/s (no HW counter)")
133+
}
134+
_ => format!("{:.0}% util", gpu.utilization_memory),
135+
};
136+
let bw_color = if gpu.memory_is_shared && gpu.utilization_memory <= 0.0 {
137+
theme.text_dim
138+
} else {
139+
theme.percent_color(gpu.utilization_memory)
140+
};
141+
lines.push(metric_line("MemBW", &bw_str, bw_color, theme));
124142

125143
if let (Some(used), Some(total)) = (gpu.bar1_used_bytes, gpu.bar1_total_bytes) {
126144
let bar1_pct = if total > 0 {
@@ -407,19 +425,24 @@ fn render_charts_column(
407425
None => return,
408426
};
409427

428+
// Stats: 2 lines per active window (max 4 windows = 8 lines)
429+
let elapsed = history.utilization_agg.elapsed_hours();
430+
let num_windows = [1, 6, 12, 24]
431+
.iter()
432+
.filter(|&&h| elapsed >= h as f64)
433+
.count() as u16;
434+
let stats_h = num_windows * 2;
435+
410436
// Decide how many charts fit: each needs label(1) + chart(min 2) = 3
411437
let available = area.height;
412-
let stats_h: u16 = 2; // stats at bottom
413438
let chart_budget = available.saturating_sub(stats_h);
414439
// 4 charts: util, mem, temp, power. Each gets label(1) + Fill
415440
let num_charts = (chart_budget / 3).min(4) as usize;
416441

417442
if num_charts == 0 {
418443
// Just show stats
419-
if available >= stats_h {
420-
let lines = build_gpu_stats_lines(history, theme);
421-
frame.render_widget(Paragraph::new(lines), area);
422-
}
444+
let lines = build_gpu_stats_lines(history, theme);
445+
frame.render_widget(Paragraph::new(lines), area);
423446
return;
424447
}
425448

@@ -569,44 +592,82 @@ fn build_gpu_stats_lines<'a>(history: &GpuHistory, theme: &Theme) -> Vec<Line<'a
569592
return vec![];
570593
}
571594

572-
windows
573-
.iter()
574-
.map(|&h| {
575-
let label = format!("{h:>2}h");
576-
let u_avg = history.utilization_agg.average_over_hours(h);
577-
let u_max = history.utilization_agg.max_over_hours(h);
578-
let t_avg = history.temperature_agg.average_over_hours(h);
579-
let t_max = history.temperature_agg.max_over_hours(h);
580-
let p_avg = history.power_agg.average_over_hours(h);
581-
let m_avg = history.memory_agg.average_over_hours(h);
582-
583-
Line::from(vec![
584-
Span::styled(format!(" {label}"), Style::default().fg(theme.text_muted)),
585-
Span::styled(" util ", Style::default().fg(theme.text_dim)),
586-
Span::styled(
587-
format!("{u_avg:.0}%"),
588-
Style::default().fg(theme.percent_color(u_avg)),
589-
),
590-
Span::styled(format!("/{u_max:.0}%"), Style::default().fg(theme.text_dim)),
591-
Span::styled(" temp ", Style::default().fg(theme.text_dim)),
592-
Span::styled(
593-
format!("{t_avg:.0}°"),
594-
Style::default().fg(theme.temp_color(t_avg)),
595-
),
596-
Span::styled(
597-
format!("/{t_max:.0}°"),
598-
Style::default().fg(theme.temp_color(t_max)),
599-
),
600-
Span::styled(" pwr ", Style::default().fg(theme.text_dim)),
601-
Span::styled(format!("{p_avg:.0}W"), Style::default().fg(theme.text)),
602-
Span::styled(" mem ", Style::default().fg(theme.text_dim)),
603-
Span::styled(
604-
format!("{m_avg:.0}%"),
605-
Style::default().fg(theme.percent_color(m_avg)),
595+
let has_pcie = !history.pcie_tx.is_empty();
596+
let mut lines = Vec::new();
597+
598+
for &h in &windows {
599+
let label = format!("{h:>2}h");
600+
let u_avg = history.utilization_agg.average_over_hours(h);
601+
let u_max = history.utilization_agg.max_over_hours(h);
602+
let t_avg = history.temperature_agg.average_over_hours(h);
603+
let t_max = history.temperature_agg.max_over_hours(h);
604+
let p_avg = history.power_agg.average_over_hours(h);
605+
let p_max = history.power_agg.max_over_hours(h);
606+
// Energy: avg_watts * hours = Wh → kWh
607+
let energy_kwh = p_avg * h as f64 / 1000.0;
608+
let m_avg = history.memory_agg.average_over_hours(h);
609+
let m_max = history.memory_agg.max_over_hours(h);
610+
611+
// Line 1: util + temp + mem
612+
lines.push(Line::from(vec![
613+
Span::styled(format!(" {label}"), Style::default().fg(theme.text_muted)),
614+
Span::styled(" util ", Style::default().fg(theme.text_dim)),
615+
Span::styled(
616+
format!("{u_avg:.0}/{u_max:.0}%"),
617+
Style::default().fg(theme.percent_color(u_avg)),
618+
),
619+
Span::styled(" temp ", Style::default().fg(theme.text_dim)),
620+
Span::styled(
621+
format!("{t_avg:.0}°"),
622+
Style::default().fg(theme.temp_color(t_avg)),
623+
),
624+
Span::styled(
625+
format!("/{t_max:.0}°"),
626+
Style::default().fg(theme.temp_color(t_max)),
627+
),
628+
Span::styled(" mem ", Style::default().fg(theme.text_dim)),
629+
Span::styled(
630+
format!("{m_avg:.0}/{m_max:.0}%"),
631+
Style::default().fg(theme.percent_color(m_avg)),
632+
),
633+
]));
634+
635+
// Line 2: power avg/max + energy + PCIe
636+
let mut pwr_spans = vec![
637+
Span::styled(" ", Style::default()),
638+
Span::styled("pwr ", Style::default().fg(theme.text_dim)),
639+
Span::styled(
640+
format!("{p_avg:.0}/{p_max:.0}W",),
641+
Style::default().fg(theme.text),
642+
),
643+
Span::styled(
644+
format!(" ({energy_kwh:.3}kWh)"),
645+
Style::default().fg(theme.text_muted),
646+
),
647+
];
648+
649+
if has_pcie {
650+
let tx_avg = history.pcie_tx_agg.average_over_hours(h);
651+
let rx_avg = history.pcie_rx_agg.average_over_hours(h);
652+
let tx_max = history.pcie_tx_agg.max_over_hours(h);
653+
let rx_max = history.pcie_rx_agg.max_over_hours(h);
654+
pwr_spans.push(Span::styled(" PCIe ", Style::default().fg(theme.text_dim)));
655+
pwr_spans.push(Span::styled(
656+
format!(
657+
"TX:{}/{} RX:{}/{}",
658+
format_rate(tx_avg),
659+
format_rate(tx_max),
660+
format_rate(rx_avg),
661+
format_rate(rx_max),
606662
),
607-
])
608-
})
609-
.collect()
663+
Style::default().fg(theme.text_dim),
664+
));
665+
}
666+
667+
lines.push(Line::from(pwr_spans));
668+
}
669+
670+
lines
610671
}
611672

612673
fn format_rate(bytes_per_sec: f64) -> String {

0 commit comments

Comments
 (0)