proglog 0.4.0

Thread safe progress logging
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
//! This is a simple, thread-safe, count-based, progress logger.
//!
//! This progress logger is intended to be as low-overhead as possible so that it can be used in [hot-loops](#things-to-know).
//!
//! # Synopsis
//!
//! `proglog` hooks into your existing `log` implementation (i.e. `env_logger`) and will output a log message every `unit` number of items it has seen.
//! There are two primary methods, `record()` and `record_with(...)`.
//! `record()` simply increments the counter and will cause a log message to output when `counter % unit == 0`.
//! `record_with(Fn() -> impl Display)` takes a function that outputs anything implementing display which will be appended to the log message.
//!
//! # Things to Know
//!
//! If `unit` is too small, and your loop is too tight, this will output many log messages which will slow your program down in the same way any logging would slow a program down in a hot loop.
//! If `unit` is sufficiently large, this should be safe to put in a hot loop as all it does increment update an atomic `u64`.
//!
//! If your loop is tight, `unit` is small, _and_ you are using rayon / updating from multiple threads your log messages may end up out of order.
//! There is no guaranteed ordering of the submission of the log message to the logger.
//! So thread A could hit the first `unit` break, thread B could hit the second point at the same time, but thread B gets to submit its log message first.
//! Having sufficiently large `unit` will mitigate this, but you should not be depending on the log output order here.
//! The tradeoff made is for speed of incrementing so this can be put in hot loops over guaranteed output ordering.
//!
//! # Example
//!
//! ```rust
//! use proglog::ProgLogBuilder;
//!
//! // Note a `log` backend needs to be globally initialized first
//! env_logger::Builder::from_env(env_logger::Env::default().default_filter_or("info")).init();
//!
//! let logger = ProgLogBuilder::new().build();
//! for i in 0..10_000 {
//!     logger.record_with(|| format!("Logged item: {}", i));
//! }
//! // The logger will flush when it is dropped, writing a final progress message no mater the count.
//! // Alternatively you can call .flush() or .flush_with().
//! ```
//!
//! # Features
//!
//! ## `pretty_counts`
//!
//! The `pretty_counts` features turns on the ability to format the numbers in the log messages.
//! Set the [`ProgLogBuilder::count_formatter`] to one of the [`CountFormatterKind`]s and numbers will
//! be formatted accordingly. i.e. `100000000` -> `100_000_000` with [`CountFormatterKind::Underscore`].
//! ```
#![deny(missing_docs, unsafe_code)]
use log::{log, Level};
use std::{
    fmt::Display,
    sync::{
        atomic::{AtomicU64, Ordering},
        Arc,
    },
};
#[cfg(feature = "pretty_counts")]
use thousands::{
    policies::{COMMA_SEPARATOR, DOT_SEPARATOR, HEX_FOUR, SPACE_SEPARATOR, UNDERSCORE_SEPARATOR},
    Separable,
};

static DEFAULT_NAME: &str = "proglog";
static DEFAULT_NOUN: &str = "records";
static DEFAULT_VERB: &str = "Processed";
static DEFAULT_UNIT: u64 = 100_000;
static DEFAULT_LEVEL: Level = Level::Info;

/// The types of formatting separators that can be applied to counts.
#[cfg(feature = "pretty_counts")]
pub enum CountFormatterKind {
    /// Delimit counter with a `,`.
    Comma,
    /// Delimit counter with a `.`.
    Dot,
    /// Delimit counter with a ` ` every four hexadecimal digits.
    HexFour,
    /// Delimit counter with a ` `.
    Space,
    /// Delimit counter with an `_`.
    Underscore,
    /// Don't delimit counter.
    Nothing,
}

#[cfg(feature = "pretty_counts")]
impl CountFormatterKind {
    fn fmt(&self, count: u64) -> String {
        match self {
            CountFormatterKind::Nothing => count.to_string(),
            CountFormatterKind::Comma => count.separate_by_policy(COMMA_SEPARATOR),
            CountFormatterKind::Dot => count.separate_by_policy(DOT_SEPARATOR),
            CountFormatterKind::HexFour => count.separate_by_policy(HEX_FOUR),
            CountFormatterKind::Space => count.separate_by_policy(SPACE_SEPARATOR),
            CountFormatterKind::Underscore => count.separate_by_policy(UNDERSCORE_SEPARATOR),
        }
    }
}

/// [`ProgLog`] is the the progress logger.
///
/// `ProgLog` hooks into your underlying logger implementation and will emit a
/// log message every time the counter hits a multiple of `unit` at the indicated
/// `level`.
///
/// There are two primary methods for incrementing the counter:
///
/// - [`ProgLog::record`]
/// - [`ProgLog::record_with`]
///
/// Both of these methods will increment the counter and check to see if a log
/// message should be emitted.
///
/// The structure of output messages will look like:
///
/// ```text
/// [{name}] {verb} {seen} {noun}: {meta}
/// ```
///
/// where `meta` is anything returned by the closure given to [`ProgLog::record_with`].
/// `seen` is the number of items counted so far.
///
/// A log message can be force-written by calling [`ProgLog::flush`]/[`ProgLog::flush_with`].
/// Calling flush does not end the logger, another log message will be written on drop.
/// Additionally, flush will be called on drop.
///
/// **Note**: `unit` should be adjusted so that you emit ~1 log message every 15 seconds.
/// If `unit` is too small and this is in a hot-loop logging will happen too frequently
/// and impact performance.
pub struct ProgLog {
    /// The counter tracks the number of items seen by the logger.
    counter: Arc<AtomicU64>,
    /// The name of the logger, used so that multiple progress loggers can run at once.
    name: String,
    /// The noun used in the log output string format, ideally lowercase and plural.
    noun: String,
    /// The verb used in the log output string format, ideally capitalized.
    verb: String,
    /// How many items must be seen before emitting a log message.
    unit: u64,
    /// The [`log::Level`] at which to emit log messages.
    level: Level,
    /// The formatter to use for outputting the current count.
    #[cfg(feature = "pretty_counts")]
    count_formatter: CountFormatterKind,
}

impl Default for ProgLog {
    /// Default for [`ProgLog`].
    fn default() -> Self {
        Self {
            counter: Default::default(),
            name: String::from(DEFAULT_NAME),
            noun: String::from(DEFAULT_NOUN),
            verb: String::from(DEFAULT_VERB),
            unit: DEFAULT_UNIT,
            level: DEFAULT_LEVEL,
            #[cfg(feature = "pretty_counts")]
            count_formatter: CountFormatterKind::Nothing,
        }
    }
}

impl ProgLog {
    /// Create a new [`ProgLog`].
    ///
    /// The [`ProgLogBuilder`] should be preferred.
    #[allow(clippy::must_use_candidate)]
    pub fn new(
        name: String,
        noun: String,
        verb: String,
        unit: u64,
        level: Level,
        #[cfg(feature = "pretty_counts")] count_formatter: CountFormatterKind,
    ) -> Self {
        Self {
            counter: Arc::new(AtomicU64::new(0)),
            name,
            noun,
            verb,
            unit,
            level,
            #[cfg(feature = "pretty_counts")]
            count_formatter,
        }
    }

    /// Get the number of items seen so far.
    ///
    /// This should be treated with some caution as it is using the
    /// atomic load with [`Ordering::Relaxed`].
    pub fn seen(&self) -> u64 {
        self.counter.load(Ordering::Relaxed)
    }

    /// Helper method to pull out log formatting .
    #[inline]
    fn log_it(&self, total: u64) {
        #[cfg(feature = "pretty_counts")]
        {
            log!(
                self.level,
                "[{name}] {verb} {seen} {noun}",
                name = &self.name,
                verb = &self.verb,
                seen = self.count_formatter.fmt(total),
                noun = &self.noun
            );
        }
        #[cfg(not(feature = "pretty_counts"))]
        {
            log!(
                self.level,
                "[{name}] {verb} {seen} {noun}",
                name = &self.name,
                verb = &self.verb,
                seen = total,
                noun = &self.noun
            );
        }
    }

    /// Helper method to pull out log formatting with custom user closure.
    #[inline]
    fn log_it_with<F, T>(&self, f: F, total: u64)
    where
        F: Fn() -> T,
        T: Display,
    {
        #[cfg(feature = "pretty_counts")]
        {
            log!(
                self.level,
                "[{name}] {verb} {seen} {noun}: {extra}",
                name = &self.name,
                verb = &self.verb,
                seen = self.count_formatter.fmt(total),
                noun = &self.noun,
                extra = f()
            );
        }

        #[cfg(not(feature = "pretty_counts"))]
        {
            log!(
                self.level,
                "[{name}] {verb} {seen} {noun}: {extra}",
                name = &self.name,
                verb = &self.verb,
                seen = total,
                noun = &self.noun,
                extra = f()
            );
        }
    }

    fn count(&self) -> u64 {
        self.counter.load(Ordering::Relaxed)
    }

    fn fetch_add(&self, count: u64) -> u64 {
        self.counter.fetch_add(count, Ordering::Relaxed)
    }

    /// Increment the progress logger by 1 and check if a new message should be emitted.
    ///
    /// Returns `true` if total seen after incrementing is a multiple of `unit`.
    pub fn record(&self) -> bool {
        let prev: u64 = self.fetch_add(1);
        let total = prev + 1;
        if total % self.unit == 0 {
            self.log_it(total);
            true
        } else {
            false
        }
    }

    /// Increment the progress logger by `count`, checking if new message(s) should be emitted.
    /// Emits `count // unit` messages, there is no carry over of remainder counts between calls.
    ///
    /// Returns the number of additional `unit`s we've seen after incrementing.
    ///
    /// **Note**: Calling `record` 10 times with `unit` of 2 will emit 5 messages, but calling this with `count=5` twice will emit 4 messages.
    pub fn record_count(&self, mut count: u64) -> usize {
        let mut retval = 0;
        while count > self.unit {
            self.log_it(self.count() + self.unit);
            retval += 1;
            count -= self.unit;
            self.fetch_add(self.unit);
        }
        if (count + (self.count() % self.unit)) > self.unit {
            self.log_it(self.count() + count);
            retval += 1;
        }
        self.fetch_add(count);
        retval
    }

    /// Increment the progress logger by 1 and check if a new message should be emitted.
    ///
    /// The returned displayable from the passed in closure will be appended to the log message.
    ///
    /// Returns `true` if total seen after incrementing is a multiple of `unit`.
    ///
    /// # Example
    ///
    /// ```rust
    /// use proglog::ProgLogBuilder;
    ///
    /// // Note a `log` backend needs to be globally initialized first
    /// env_logger::Builder::from_env(env_logger::Env::default().default_filter_or("info")).init();
    ///
    /// let logger = ProgLogBuilder::new().build();
    /// for i in 0..10_000 {
    ///     logger.record_with(|| format!("Logged item: {}", i));
    /// }
    /// // The logger will flush when it is dropped, writing a final progress message no mater the count.
    /// // Alternatively you can call .flush() or .flush_with().
    /// ```
    pub fn record_with<T, F>(&self, f: F) -> bool
    where
        F: Fn() -> T,
        T: Display,
    {
        let prev = self.fetch_add(1);
        let total = prev + 1;
        if total % self.unit == 0 {
            self.log_it_with(f, total);
            true
        } else {
            false
        }
    }
    /// Increment the progress logger by `count`, checking if new message(s) should be emitted.
    /// This is equivalent to calling the `record` function `count` number of times.
    ///
    /// The returned displayable from the passed in closure will be appended to each log message.
    ///
    /// Returns the number of additional `unit`s we've seen after incrementing.
    ///
    /// **Note**: Calling `record` 10 times with `unit` of 2 will emit 5 messages, but calling this with `count=5` twice will emit 4 messages.
    pub fn record_count_with<T, F>(&self, mut count: u64, f: F) -> usize
    where
        F: Fn() -> T,
        T: Display,
    {
        let mut retval = 0usize;
        while count > self.unit {
            self.log_it_with(&f, self.count() + self.unit);
            retval += 1;
            count -= self.unit;
            self.fetch_add(self.unit);
        }
        self.fetch_add(count);
        if (count + (self.count() % self.unit)) > self.unit {
            self.log_it_with(f, self.count() + count);
            retval += 1;
        }
        retval
    }

    /// Force the output of a log message, including the output of the input closure.
    ///
    /// This does not increment the counter.
    /// This does not close the logger.
    pub fn flush_with<T, F>(&self, f: F)
    where
        F: Fn() -> T,
        T: Display,
    {
        let total = self.count();
        if total % self.unit != 0 {
            self.log_it_with(f, total);
        }
    }

    /// Force the output of a log message.
    ///
    /// This does not increment the counter.
    /// This does not close the logger.
    pub fn flush(&self) {
        let total = self.count();
        if total % self.unit != 0 {
            self.log_it(total);
        }
    }
}

impl Drop for ProgLog {
    /// Drop the logger, calling flush before dropping.
    fn drop(&mut self) {
        self.flush();
    }
}

/// The builder for [`ProgLog`].
pub struct ProgLogBuilder {
    name: String,
    noun: String,
    verb: String,
    unit: u64,
    level: Level,
    /// The formatter to use for outputting the current count.
    #[cfg(feature = "pretty_counts")]
    count_formatter: CountFormatterKind,
}

impl ProgLogBuilder {
    /// Create a new [`ProgLogBuilder`].
    pub fn new() -> Self {
        Self::default()
    }

    /// The name of the logger, used so that multiple progress loggers can run at once.
    pub fn name(mut self, name: impl Into<String>) -> Self {
        self.name = name.into();
        self
    }

    /// The noun used in the log output string format, ideally lowercase and plural.
    pub fn noun(mut self, noun: impl Into<String>) -> Self {
        self.noun = noun.into();
        self
    }

    /// The verb used in the log output string format, ideally capitalized.
    pub fn verb(mut self, verb: impl Into<String>) -> Self {
        self.verb = verb.into();
        self
    }

    /// How many items must be seen before emitting a log message.
    pub fn unit(mut self, unit: u64) -> Self {
        self.unit = unit;
        self
    }

    /// The [`log::Level`] at which to emit log messages.
    pub fn level(mut self, level: Level) -> Self {
        self.level = level;
        self
    }

    /// The formatter to use for outputting the current count.
    #[cfg(feature = "pretty_counts")]
    pub fn count_formatter(mut self, formatter: CountFormatterKind) -> Self {
        self.count_formatter = formatter;
        self
    }

    /// Build the [`ProgLog`] instance.
    pub fn build(self) -> ProgLog {
        ProgLog::new(
            self.name,
            self.noun,
            self.verb,
            self.unit,
            self.level,
            #[cfg(feature = "pretty_counts")]
            self.count_formatter,
        )
    }
}

impl Default for ProgLogBuilder {
    fn default() -> Self {
        Self {
            name: String::from(DEFAULT_NAME),
            noun: String::from(DEFAULT_NOUN),
            verb: String::from(DEFAULT_VERB),
            unit: DEFAULT_UNIT,
            level: DEFAULT_LEVEL,
            #[cfg(feature = "pretty_counts")]
            count_formatter: CountFormatterKind::Nothing,
        }
    }
}

#[cfg(test)]
mod tests {
    use std::collections::HashSet;

    use super::*;
    use logtest::Logger;
    use rayon::prelude::*;

    fn drain_logger(logger: &mut Logger) {
        while let Some(_msg) = logger.pop() {}
        assert_eq!(logger.len(), 0);
    }

    /// This function drives all other tests since the logtest Logger is global :(.
    ///
    /// Each function called is expected to drain the logger.
    #[test]
    fn test_log_messages() {
        let mut logger = logtest::Logger::start();
        assert_eq!(logger.len(), 0);
        test_simple_case(&mut logger);
        drain_logger(&mut logger);

        test_rayon(&mut logger);
        drain_logger(&mut logger);

        test_messages_simple(&mut logger);
        drain_logger(&mut logger);

        test_messages_simple_verify_unit(&mut logger);
        drain_logger(&mut logger);

        test_messages_rayon(&mut logger);
        drain_logger(&mut logger);

        test_count(&mut logger);
        drain_logger(&mut logger);

        test_messages_count(&mut logger);
        drain_logger(&mut logger);
        #[cfg(feature = "pretty_counts")]
        {
            test_pretty_counts(&mut logger);
            drain_logger(&mut logger);
        }
    }

    fn test_simple_case(logger: &mut Logger) {
        let my_logger = ProgLogBuilder::new().build();
        for _i in 0..101 {
            my_logger.record();
        }
        assert_eq!(my_logger.seen(), 101);
        assert_eq!(logger.len(), 0);
    }

    fn test_count(logger: &mut Logger) {
        let my_logger = ProgLogBuilder::new().unit(2).build();
        for _i in 0..101 {
            assert_eq!(my_logger.record_count(5), 2);
        }
        assert_eq!(my_logger.seen(), 505);
        assert_eq!(logger.len(), 202);
    }

    fn test_rayon(logger: &mut Logger) {
        let my_logger = ProgLogBuilder::new().build();
        (0..1_000_000).par_bridge().for_each(|_i| {
            my_logger.record();
        });
        assert_eq!(my_logger.seen(), 1_000_000);
        assert_eq!(logger.len(), 10);
    }

    fn test_messages_simple(logger: &mut Logger) {
        let my_logger = ProgLogBuilder::new().unit(1).build();
        my_logger.record_with(|| "This is a test");
        assert_eq!(logger.len(), 1);
        assert!(logger.pop().unwrap().args().ends_with("This is a test"));
    }

    fn test_messages_count(logger: &mut Logger) {
        let my_logger = ProgLogBuilder::new().unit(2).build();
        assert_eq!(my_logger.record_count_with(5, || "This is a test"), 2);
        assert_eq!(logger.len(), 2);
        assert!(logger.pop().unwrap().args().ends_with("This is a test"));
    }

    fn test_messages_simple_verify_unit(logger: &mut Logger) {
        let my_logger = ProgLogBuilder::new().unit(10).build();
        for _ in 0..9 {
            my_logger.record_with(|| "This is a test");
        }
        assert_eq!(logger.len(), 0);
        my_logger.record_with(|| "The 10th");
        assert_eq!(logger.len(), 1);
        assert!(logger.pop().unwrap().args().ends_with("The 10th"));
    }

    fn test_messages_rayon(logger: &mut Logger) {
        let pool = rayon::ThreadPoolBuilder::new()
            .num_threads(2)
            .build()
            .expect("Unable to create rayon global thread pool.");
        let my_logger = ProgLogBuilder::new().unit(100_000).build();

        // Note - it just so happens the log messages are in the correct order here,
        // if the loop is tight enough, and the unit is too small, and depending how
        // rayon breaks things up the logger internal queue / print buffer can get
        // out of order.
        pool.install(|| {
            (1..=1_000_000).par_bridge().for_each(|i| {
                my_logger.record_with(|| format!("Logged {}", i));
            });
        });
        assert_eq!(my_logger.seen(), 1_000_000);

        assert_eq!(logger.len(), 10);

        let mut expected: HashSet<String> = HashSet::new();
        let mut len_expected = 0;
        let out_of_ordering = 10;
        for msg in (100_000_usize..=1_000_000).step_by(100_000) {
            len_expected += 1;
            for i in 0..=out_of_ordering {
                // Handle +- 1 for relaxed ordering
                expected.insert(format!(
                    "[proglog] Processed {} records: Logged {}",
                    msg,
                    msg + i
                ));
                expected.insert(format!(
                    "[proglog] Processed {} records: Logged {}",
                    msg,
                    msg.saturating_sub(i)
                ));
            }
        }
        assert_eq!(
            logger.len(),
            10,
            "Didn't collect expected number of messages."
        );

        for _ in 0..len_expected {
            let found = logger.pop().unwrap();
            assert!(
                expected.remove(found.args()),
                "`{}` not in expected",
                found.args()
            );
        }
    }

    #[cfg(feature = "pretty_counts")]
    fn test_pretty_counts(logger: &mut Logger) {
        let my_logger = ProgLogBuilder::new()
            .unit(100_000)
            .count_formatter(CountFormatterKind::Underscore)
            .build();
        for _ in 0..99_999 {
            my_logger.record_with(|| "This is a test");
        }
        assert_eq!(logger.len(), 0);
        my_logger.record_with(|| "The 100,000th");
        assert_eq!(logger.len(), 1);
        assert_eq!(
            logger.pop().unwrap().args(),
            "[proglog] Processed 100_000 records: The 100,000th"
        );
    }
}