Skip to content

Commit 688805a

Browse files
TangoRoxyfacebook-github-bot
authored andcommitted
Add additional logging for collection/write time (#8248)
Summary: Pull Request resolved: #8248 Add reporting 1. total collection loop time 2.store write time Removed arc, as stats doesnt have to be shared. Reviewed By: lnyng Differential Revision: D74594590 fbshipit-source-id: 5c848d9d32fa43843bca70be2d5d57b88fa62300
1 parent bf75b6d commit 688805a

2 files changed

Lines changed: 23 additions & 17 deletions

File tree

below/src/main.rs

Lines changed: 16 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -553,7 +553,6 @@ pub struct WorkerTask {
553553
fn start_store_writer_thread(
554554
logger: slog::Logger,
555555
mut store: store::StoreWriter,
556-
_stats: Arc<Mutex<statistics::Statistics>>,
557556
store_size_limit: Option<u64>,
558557
retention: Option<Duration>,
559558
writer_buffer_size: usize,
@@ -563,6 +562,7 @@ fn start_store_writer_thread(
563562
.name("store_writer".to_owned())
564563
.spawn(move || {
565564
loop {
565+
let loop_start_time = Instant::now();
566566
match recv_task.recv() {
567567
Ok(write_task) => {
568568
match store.put(write_task.post_collect_sys_time, &write_task.data) {
@@ -573,11 +573,18 @@ fn start_store_writer_thread(
573573
store_size_limit,
574574
/* retention */ None,
575575
)
576-
.expect("cleanup_store failed")
576+
.expect("cleanup_store failed");
577577
}
578578
Ok(/* new shard */ false) => {}
579-
Err(e) => error!(logger, "{:#}", e),
579+
Err(e) => {
580+
error!(logger, "{:#}", e);
581+
// no need to report/cleanup
582+
continue;
583+
}
580584
}
585+
statistics::report_writer_time_ms(
586+
Instant::now().duration_since(loop_start_time),
587+
);
581588
}
582589
Err(_) => {
583590
warn!(
@@ -1095,12 +1102,11 @@ fn record(
10951102
store::Format::Cbor,
10961103
)?;
10971104

1098-
let shared_stats = Arc::new(Mutex::new(statistics::Statistics::new(init)));
1105+
let mut stats = statistics::Statistics::new(init);
10991106

11001107
let (writer_thread, send_task) = start_store_writer_thread(
11011108
logger.clone(),
11021109
store,
1103-
Arc::clone(&shared_stats),
11041110
store_size_limit,
11051111
retention,
11061112
writer_buffer_size,
@@ -1153,17 +1159,13 @@ fn record(
11531159
collection_skew.as_millis(),
11541160
skew_detection_threshold.as_millis()
11551161
);
1156-
shared_stats
1157-
.lock()
1158-
.expect("error acquired stats lock")
1159-
.report_collection_skew();
1162+
statistics::report_collection_skew();
11601163
}
11611164

11621165
match collected_sample {
11631166
Ok(s) => {
11641167
if below_config.enable_gpu_stats {
1165-
let mut lock = shared_stats.lock().expect("error acquiring stats lock");
1166-
lock.report_nr_accelerators(&s);
1168+
stats.report_nr_accelerators(&s);
11671169
}
11681170
send_task
11691171
.send(WorkerTask {
@@ -1182,12 +1184,11 @@ fn record(
11821184
}
11831185
};
11841186

1185-
{
1186-
let mut lock = shared_stats.lock().expect("error acquired stats lock");
1187-
lock.report_store_size(below_config.store_dir.as_path());
1188-
}
1187+
stats.report_store_size(below_config.store_dir.as_path());
11891188

11901189
let collect_duration = Instant::now().duration_since(collect_instant);
1190+
statistics::report_collection_time_ms(collect_duration);
1191+
11911192
// Sleep for at least 1s to avoid sample collision
11921193
let sleep_duration = if interval > collect_duration {
11931194
std::cmp::max(Duration::from_secs(1), interval - collect_duration)

below/src/open_source/statistics.rs

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
// limitations under the License.
1414

1515
use std::path::Path;
16+
use std::time::Duration;
1617

1718
use crate::init;
1819

@@ -25,7 +26,11 @@ impl Statistics {
2526

2627
pub fn report_store_size<P: AsRef<Path>>(&mut self, _dir: P) {}
2728

28-
pub fn report_collection_skew(&mut self) {}
29-
3029
pub fn report_nr_accelerators(&mut self, _sample: &model::Sample) {}
3130
}
31+
32+
pub fn report_collection_skew() {}
33+
34+
pub fn report_collection_time_ms(ms: Duration) {}
35+
36+
pub fn report_writer_time_ms(ms: Duration) {}

0 commit comments

Comments
 (0)