Skip to main content

dsc/
utils.rs

1use anyhow::{Context, Result};
2use std::collections::HashMap;
3use std::fs;
4use std::io::IsTerminal;
5use std::path::{Path, PathBuf};
6
7/// Trim trailing slashes from a base URL.
8pub fn normalize_baseurl(baseurl: &str) -> String {
9    baseurl.trim_end_matches('/').to_string()
10}
11
12/// Create a URL-safe slug from arbitrary input.
13///
14/// Wraps the [`slug`] crate, which transliterates Unicode (so `"Café"`
15/// becomes `"cafe"`, Cyrillic and CJK get sensible romanisations) and
16/// emits the standard kebab-case shape used across most slug-generating
17/// tooling. Returns `"untitled"` when the slug would otherwise be empty
18/// (the `slug` crate itself returns an empty string for input that has
19/// no transliterable characters).
20pub fn slugify(input: &str) -> String {
21    let s = slug::slugify(input);
22    if s.is_empty() {
23        "untitled".to_string()
24    } else {
25        s
26    }
27}
28
29/// Ensure a directory exists.
30pub fn ensure_dir(path: &Path) -> Result<()> {
31    fs::create_dir_all(path).with_context(|| format!("creating {}", path.display()))?;
32    Ok(())
33}
34
35/// Resolve a topic path from a user-provided path and a topic title.
36pub fn resolve_topic_path(
37    provided: Option<&Path>,
38    title: &str,
39    default_dir: &Path,
40) -> Result<PathBuf> {
41    let filename = format!("{}.md", slugify(title));
42    match provided {
43        Some(path) if path.exists() && path.is_dir() => Ok(path.join(filename)),
44        Some(path) if path.extension().is_some() => Ok(path.to_path_buf()),
45        Some(path) => Ok(path.join(filename)),
46        None => Ok(default_dir.join(filename)),
47    }
48}
49
50/// Read a Markdown file.
51pub fn read_markdown(path: &Path) -> Result<String> {
52    let raw = fs::read_to_string(path).with_context(|| format!("reading {}", path.display()))?;
53    Ok(raw)
54}
55
56/// Write a Markdown file, creating parent directories if needed.
57pub fn write_markdown(path: &Path, content: &str) -> Result<()> {
58    if let Some(parent) = path.parent() {
59        ensure_dir(parent)?;
60    }
61    fs::write(path, content).with_context(|| format!("writing {}", path.display()))?;
62    Ok(())
63}
64
65/// Quote a YAML scalar if it contains characters that would confuse the
66/// parser. Keeps simple values unquoted. Shared by every command that
67/// writes YAML front matter (`topic pull --full`, `category pull`).
68pub fn yaml_scalar(value: &str) -> String {
69    let needs_quoting = value.is_empty()
70        || value.contains(':')
71        || value.contains('#')
72        || value.contains('\n')
73        || value.starts_with(['-', '?', '!', '&', '*', '|', '>', '@', '`', '%', '\'', '"', '['])
74        || value.starts_with("  ");
75    if needs_quoting {
76        let escaped = value.replace('\\', "\\\\").replace('"', "\\\"");
77        format!("\"{}\"", escaped)
78    } else {
79        value.to_string()
80    }
81}
82
83/// Split a Markdown document into its leading YAML front matter (if any) and
84/// the body that follows.
85///
86/// Front matter is recognised only when the file's very first line is exactly
87/// `---` (an optional leading BOM is tolerated), terminated by a later line
88/// that is exactly `---`. The fenced block is parsed shallowly into a flat
89/// `key → value` map (one `key: value` pair per line; lines without a colon
90/// are ignored) — `dsc` front matter is intentionally shallow (`title`,
91/// `topic_id`, `url`, `pulled_at`), so a full YAML parse is unnecessary and a
92/// flat scan keeps the body intact.
93///
94/// Returns `(map, body)`. When there is no recognisable front matter the map
95/// is empty and the body is the original content unchanged, so callers can
96/// treat "no front matter" and "empty front matter" identically. One blank
97/// line separating the closing fence from the body is consumed (it mirrors
98/// what the `pull` side writes), giving a stable pull → push round-trip.
99///
100/// Note the inherent ambiguity shared with Jekyll/Hugo: a file with no front
101/// matter whose body genuinely opens with a `---` thematic break followed by
102/// another `---` will be misread as front matter. This is accepted; real
103/// snapshots written by `dsc` always carry proper front matter.
104pub fn strip_frontmatter(raw: &str) -> (HashMap<String, String>, String) {
105    let mut map = HashMap::new();
106    let text = raw.strip_prefix('\u{feff}').unwrap_or(raw);
107
108    let mut lines = text.lines();
109    if lines.next().map(str::trim_end) != Some("---") {
110        return (map, raw.to_string());
111    }
112
113    let mut body_lines: Vec<&str> = Vec::new();
114    let mut closed = false;
115    for line in &mut lines {
116        if line.trim_end() == "---" {
117            closed = true;
118            break;
119        }
120        if let Some((key, value)) = line.split_once(':') {
121            map.insert(key.trim().to_string(), unquote_yaml_scalar(value.trim()));
122        }
123    }
124
125    if !closed {
126        // Opening fence with no matching close: not front matter after all.
127        return (HashMap::new(), raw.to_string());
128    }
129
130    body_lines.extend(lines);
131    // Consume a single conventional blank line between fence and body.
132    if body_lines.first() == Some(&"") {
133        body_lines.remove(0);
134    }
135    let mut body = body_lines.join("\n");
136    if raw.ends_with('\n') && !body.is_empty() {
137        body.push('\n');
138    }
139    (map, body)
140}
141
142/// Inverse of [`yaml_scalar`]'s quoting: if `value` is wrapped in double
143/// quotes, strip them and unescape `\"` and `\\`. Bare values pass through
144/// unchanged, so a value Discourse never sees as quoted (an integer, a URL)
145/// is untouched.
146fn unquote_yaml_scalar(value: &str) -> String {
147    let bytes = value.as_bytes();
148    if bytes.len() < 2 || bytes[0] != b'"' || bytes[bytes.len() - 1] != b'"' {
149        return value.to_string();
150    }
151    let inner = &value[1..value.len() - 1];
152    let mut out = String::with_capacity(inner.len());
153    let mut chars = inner.chars();
154    while let Some(c) = chars.next() {
155        if c == '\\' {
156            match chars.next() {
157                Some('"') => out.push('"'),
158                Some('\\') => out.push('\\'),
159                Some(other) => {
160                    out.push('\\');
161                    out.push(other);
162                }
163                None => out.push('\\'),
164            }
165        } else {
166            out.push(c);
167        }
168    }
169    out
170}
171
172/// Current time in `YYYY-MM-DDTHH:MM:SSZ` form, derived directly from
173/// `SystemTime` to avoid a chrono dependency where one is not otherwise
174/// needed. Used for the `pulled_at` front-matter stamp.
175pub fn current_utc_iso8601() -> String {
176    use std::time::{SystemTime, UNIX_EPOCH};
177    let secs = SystemTime::now()
178        .duration_since(UNIX_EPOCH)
179        .map(|d| d.as_secs())
180        .unwrap_or(0);
181    // Days-from-epoch arithmetic (proleptic Gregorian via the standard
182    // 1970-01-01 epoch). Good for any year `dsc` will plausibly run in.
183    let days = (secs / 86_400) as i64;
184    let secs_of_day = secs % 86_400;
185    let hh = secs_of_day / 3600;
186    let mm = (secs_of_day % 3600) / 60;
187    let ss = secs_of_day % 60;
188    let (y, m, d) = civil_from_days(days);
189    format!("{:04}-{:02}-{:02}T{:02}:{:02}:{:02}Z", y, m, d, hh, mm, ss)
190}
191
192/// Convert days-from-1970-01-01 to (year, month, day).
193/// Reference: Howard Hinnant, "chrono-Compatible Low-Level Date Algorithms".
194fn civil_from_days(z: i64) -> (i32, u32, u32) {
195    let z = z + 719_468;
196    let era = if z >= 0 { z } else { z - 146_096 } / 146_097;
197    let doe = (z - era * 146_097) as u64; // [0, 146096]
198    let yoe = (doe - doe / 1460 + doe / 36_524 - doe / 146_096) / 365; // [0, 399]
199    let y = yoe as i64 + era * 400;
200    let doy = doe - (365 * yoe + yoe / 4 - yoe / 100); // [0, 365]
201    let mp = (5 * doy + 2) / 153; // [0, 11]
202    let d = doy - (153 * mp + 2) / 5 + 1; // [1, 31]
203    let m = if mp < 10 { mp + 3 } else { mp - 9 }; // [1, 12]
204    let y = y + if m <= 2 { 1 } else { 0 };
205    (y as i32, m as u32, d as u32)
206}
207
208fn color_mode() -> &'static str {
209    match std::env::var("DSC_COLOR") {
210        Ok(value) => match value.trim().to_ascii_lowercase().as_str() {
211            "always" => "always",
212            "never" => "never",
213            _ => "auto",
214        },
215        Err(_) => "auto",
216    }
217}
218
219fn color_allowed_for_stdout() -> bool {
220    if std::env::var_os("NO_COLOR").is_some() {
221        return false;
222    }
223    match color_mode() {
224        "always" => true,
225        "never" => false,
226        _ => std::io::stdout().is_terminal(),
227    }
228}
229
230fn discourse_color_code(key: &str) -> u8 {
231    const COLORS: [u8; 12] = [31, 32, 33, 34, 35, 36, 91, 92, 93, 94, 95, 96];
232    let hash = key.bytes().fold(0usize, |acc, b| {
233        acc.wrapping_mul(31).wrapping_add(b as usize)
234    });
235    COLORS[hash % COLORS.len()]
236}
237
238pub fn color_discourse_label(label: &str, key: &str) -> String {
239    if !color_allowed_for_stdout() {
240        return label.to_string();
241    }
242    let code = discourse_color_code(key);
243    format!("\x1b[1;{}m{}\x1b[0m", code, label)
244}
245
246/// Parse a `--since`-style value. Accepts either a relative duration
247/// (`7d`, `24h`, `30m`, `1w`, `90s`) or an ISO-8601 absolute timestamp
248/// (`2026-04-01`, `2026-04-01T12:00:00Z`). Returns the resulting cutoff
249/// instant (now - duration, or the ISO value itself).
250pub fn parse_since_cutoff(input: &str) -> anyhow::Result<chrono::DateTime<chrono::Utc>> {
251    use anyhow::anyhow;
252    let trimmed = input.trim();
253    if trimmed.is_empty() {
254        return Err(anyhow!("empty --since value"));
255    }
256
257    if let Some(duration) = parse_relative_duration(trimmed) {
258        return Ok(chrono::Utc::now() - duration);
259    }
260
261    // Try RFC3339 (full timestamp).
262    if let Ok(dt) = chrono::DateTime::parse_from_rfc3339(trimmed) {
263        return Ok(dt.with_timezone(&chrono::Utc));
264    }
265    // Try date-only — treat as midnight UTC.
266    if let Ok(d) = chrono::NaiveDate::parse_from_str(trimmed, "%Y-%m-%d") {
267        return Ok(
268            chrono::NaiveDateTime::new(d, chrono::NaiveTime::from_hms_opt(0, 0, 0).unwrap())
269                .and_utc(),
270        );
271    }
272
273    Err(anyhow!(
274        "unrecognised --since value: {:?} (expected e.g. `7d`, `24h`, `30m`, `1w`, or an ISO-8601 timestamp)",
275        input
276    ))
277}
278
279/// Parse a relative duration like `7d`, `24h`, `1w`, `1m`, `90s`, `1y`.
280///
281/// Calendar units (`m`, `y`) are imprecise; for windows we use these
282/// conventions:
283///
284/// - `s` — seconds
285/// - `min` — minutes (use this rather than `m` to avoid the months-vs-minutes
286///   ambiguity)
287/// - `h` — hours
288/// - `d` — days
289/// - `w` — weeks (= 7 days)
290/// - `m` — **months** (= 30 days; matches what most users mean by "1m" in
291///   analytics windows)
292/// - `y` — years (= 365 days)
293///
294/// For exact calendar math, pass an ISO-8601 timestamp instead.
295pub fn parse_relative_duration(input: &str) -> Option<chrono::Duration> {
296    let s = input.trim();
297    if s.len() < 2 {
298        return None;
299    }
300    // Order matters: `min` must be tried before `m` so we don't read
301    // "10min" as "10mi" + "n".
302    let multi_char_units = [("min", 60i64)];
303    for (suffix, secs_per_unit) in multi_char_units {
304        if let Some(digits) = s.strip_suffix(suffix) {
305            let n: i64 = digits.parse().ok()?;
306            return Some(chrono::Duration::seconds(n * secs_per_unit));
307        }
308    }
309    let (digits, unit) = s.split_at(s.len() - 1);
310    let n: i64 = digits.parse().ok()?;
311    match unit {
312        "s" => Some(chrono::Duration::seconds(n)),
313        "h" => Some(chrono::Duration::hours(n)),
314        "d" => Some(chrono::Duration::days(n)),
315        "w" => Some(chrono::Duration::weeks(n)),
316        "m" => Some(chrono::Duration::days(n * 30)),
317        "y" => Some(chrono::Duration::days(n * 365)),
318        _ => None,
319    }
320}
321
322#[cfg(test)]
323mod tests {
324    use super::*;
325
326    #[test]
327    fn slugify_simple_ascii() {
328        assert_eq!(slugify("Hello World"), "hello-world");
329    }
330
331    #[test]
332    fn slugify_collapses_runs_of_non_alnum() {
333        assert_eq!(slugify("a   b___c!!!d"), "a-b-c-d");
334    }
335
336    #[test]
337    fn slugify_trims_leading_and_trailing_dashes() {
338        assert_eq!(slugify("   hello   "), "hello");
339        assert_eq!(slugify("!!!foo!!!"), "foo");
340    }
341
342    #[test]
343    fn slugify_empty_input_returns_untitled() {
344        assert_eq!(slugify(""), "untitled");
345        assert_eq!(slugify("   "), "untitled");
346        assert_eq!(slugify("!!!"), "untitled");
347    }
348
349    #[test]
350    fn slugify_preserves_numbers() {
351        assert_eq!(slugify("Topic 42 - intro"), "topic-42-intro");
352    }
353
354    #[test]
355    fn slugify_lowercases() {
356        assert_eq!(slugify("ABCxyz"), "abcxyz");
357    }
358
359    #[test]
360    fn slugify_transliterates_unicode() {
361        // The whole reason for adopting the `slug` crate: pre-existing
362        // ASCII behaviour preserved, plus accented Latin, Cyrillic, and
363        // CJK now produce meaningful slugs instead of "untitled".
364        assert_eq!(slugify("Café Tonight"), "cafe-tonight");
365        assert_eq!(slugify("Привет мир"), "privet-mir");
366        assert_eq!(slugify("日本語"), "ri-ben-yu");
367    }
368
369    #[test]
370    fn slugify_trims_both_ends_of_dashes() {
371        // Regression guard: catches a latent bug from a contributor PR
372        // that only trimmed trailing dashes. The `slug` crate handles
373        // both ends correctly.
374        assert_eq!(slugify("-foo-"), "foo");
375        assert_eq!(slugify("---foo---bar---"), "foo-bar");
376    }
377
378    #[test]
379    fn normalize_baseurl_strips_trailing_slashes() {
380        assert_eq!(normalize_baseurl("https://example.com/"), "https://example.com");
381        assert_eq!(normalize_baseurl("https://example.com///"), "https://example.com");
382        assert_eq!(normalize_baseurl("https://example.com"), "https://example.com");
383    }
384
385    #[test]
386    fn normalize_baseurl_preserves_no_trailing() {
387        assert_eq!(normalize_baseurl(""), "");
388    }
389
390    #[test]
391    fn resolve_topic_path_uses_title_when_no_path_given() {
392        let default_dir = Path::new("/tmp/dsc-test");
393        let out = resolve_topic_path(None, "Hello World", default_dir).unwrap();
394        assert_eq!(out, default_dir.join("hello-world.md"));
395    }
396
397    #[test]
398    fn resolve_topic_path_uses_given_path_with_extension() {
399        let default_dir = Path::new("/tmp/dsc-test");
400        let explicit = Path::new("/tmp/custom.md");
401        let out = resolve_topic_path(Some(explicit), "Ignored", default_dir).unwrap();
402        assert_eq!(out, explicit);
403    }
404
405    #[test]
406    fn parse_relative_duration_common_units() {
407        assert_eq!(
408            parse_relative_duration("7d"),
409            Some(chrono::Duration::days(7))
410        );
411        assert_eq!(
412            parse_relative_duration("24h"),
413            Some(chrono::Duration::hours(24))
414        );
415        assert_eq!(
416            parse_relative_duration("30min"),
417            Some(chrono::Duration::minutes(30))
418        );
419        assert_eq!(
420            parse_relative_duration("1w"),
421            Some(chrono::Duration::weeks(1))
422        );
423        assert_eq!(
424            parse_relative_duration("90s"),
425            Some(chrono::Duration::seconds(90))
426        );
427    }
428
429    #[test]
430    fn parse_relative_duration_rejects_nonsense() {
431        assert!(parse_relative_duration("").is_none());
432        assert!(parse_relative_duration("d").is_none());
433        assert!(parse_relative_duration("7x").is_none());
434        assert!(parse_relative_duration("abc").is_none());
435        assert!(parse_relative_duration("3M").is_none()); // case-sensitive
436    }
437
438    #[test]
439    fn parse_relative_duration_treats_m_as_months() {
440        // `m` = months (= 30 days). Users naturally write `1m` for "one
441        // month" in analytics windows; we match that. Use `min` for the
442        // rare minutes case.
443        assert_eq!(
444            parse_relative_duration("1m"),
445            Some(chrono::Duration::days(30))
446        );
447        assert_eq!(
448            parse_relative_duration("3m"),
449            Some(chrono::Duration::days(90))
450        );
451    }
452
453    #[test]
454    fn parse_relative_duration_minutes_via_min_suffix() {
455        assert_eq!(
456            parse_relative_duration("5min"),
457            Some(chrono::Duration::minutes(5))
458        );
459        assert_eq!(
460            parse_relative_duration("90min"),
461            Some(chrono::Duration::minutes(90))
462        );
463    }
464
465    #[test]
466    fn parse_relative_duration_accepts_years_as_365d() {
467        assert_eq!(
468            parse_relative_duration("1y"),
469            Some(chrono::Duration::days(365))
470        );
471        assert_eq!(
472            parse_relative_duration("2y"),
473            Some(chrono::Duration::days(730))
474        );
475    }
476
477    #[test]
478    fn parse_since_cutoff_iso_date() {
479        let cutoff = parse_since_cutoff("2026-01-01").unwrap();
480        assert_eq!(cutoff.to_rfc3339(), "2026-01-01T00:00:00+00:00");
481    }
482
483    #[test]
484    fn parse_since_cutoff_iso_timestamp() {
485        let cutoff = parse_since_cutoff("2026-04-15T12:30:00Z").unwrap();
486        assert_eq!(cutoff.to_rfc3339(), "2026-04-15T12:30:00+00:00");
487    }
488
489    #[test]
490    fn parse_since_cutoff_relative_is_in_the_past() {
491        let now = chrono::Utc::now();
492        let cutoff = parse_since_cutoff("7d").unwrap();
493        let diff = now - cutoff;
494        // Should be very close to 7 days (within a second).
495        assert!(
496            (diff - chrono::Duration::days(7)).num_seconds().abs() < 2,
497            "expected ~7 day delta, got {}",
498            diff
499        );
500    }
501
502    #[test]
503    fn parse_since_cutoff_rejects_garbage() {
504        assert!(parse_since_cutoff("not a date").is_err());
505        assert!(parse_since_cutoff("").is_err());
506    }
507
508    #[test]
509    fn yaml_scalar_leaves_simple_values_bare() {
510        assert_eq!(yaml_scalar("Dependency management"), "Dependency management");
511        assert_eq!(yaml_scalar("Topic 42"), "Topic 42");
512    }
513
514    #[test]
515    fn yaml_scalar_quotes_when_needed() {
516        assert_eq!(yaml_scalar("a: b"), "\"a: b\"");
517        assert_eq!(yaml_scalar("# hash"), "\"# hash\"");
518        assert_eq!(yaml_scalar("- leading dash"), "\"- leading dash\"");
519        // Interior quotes alone do not trigger quoting; a quote that coincides
520        // with another trigger (here the colon) is escaped inside the wrap.
521        assert_eq!(yaml_scalar("she said \"hi\""), "she said \"hi\"");
522        assert_eq!(yaml_scalar("a: \"b\""), "\"a: \\\"b\\\"\"");
523    }
524
525    #[test]
526    fn strip_frontmatter_parses_block_and_body() {
527        let raw = "---\ntitle: Dependency management\ntopic_id: 412\nurl: https://forum.rcpch.tech/t/dependency-management/412\npulled_at: 2026-06-22T09:19:00Z\n---\n\nBody line one.\nBody line two.\n";
528        let (front, body) = strip_frontmatter(raw);
529        assert_eq!(front.get("topic_id").map(String::as_str), Some("412"));
530        assert_eq!(
531            front.get("title").map(String::as_str),
532            Some("Dependency management")
533        );
534        assert_eq!(
535            front.get("url").map(String::as_str),
536            Some("https://forum.rcpch.tech/t/dependency-management/412")
537        );
538        assert_eq!(body, "Body line one.\nBody line two.\n");
539    }
540
541    #[test]
542    fn strip_frontmatter_absent_returns_empty_map_and_full_body() {
543        let raw = "# Heading\n\nNo front matter here.\n";
544        let (front, body) = strip_frontmatter(raw);
545        assert!(front.is_empty());
546        assert_eq!(body, raw);
547    }
548
549    #[test]
550    fn strip_frontmatter_unclosed_fence_is_not_front_matter() {
551        // Opening `---` but never closed: treat the whole thing as body.
552        let raw = "---\ntitle: oops\nstill body, no closing fence\n";
553        let (front, body) = strip_frontmatter(raw);
554        assert!(front.is_empty());
555        assert_eq!(body, raw);
556    }
557
558    #[test]
559    fn strip_frontmatter_preserves_horizontal_rules_in_body() {
560        // A `---` inside the body (after the real close) must survive intact.
561        let raw = "---\ntopic_id: 7\n---\n\nIntro.\n\n---\n\nAfter the rule.\n";
562        let (front, body) = strip_frontmatter(raw);
563        assert_eq!(front.get("topic_id").map(String::as_str), Some("7"));
564        assert_eq!(body, "Intro.\n\n---\n\nAfter the rule.\n");
565    }
566
567    #[test]
568    fn strip_frontmatter_unquotes_yaml_scalar_values() {
569        // yaml_scalar quotes a title containing a colon; strip must invert it.
570        let title = "Intro: getting started";
571        let raw = format!("---\ntitle: {}\ntopic_id: 3\n---\n\nbody\n", yaml_scalar(title));
572        let (front, body) = strip_frontmatter(&raw);
573        assert_eq!(front.get("title").map(String::as_str), Some(title));
574        assert_eq!(front.get("topic_id").map(String::as_str), Some("3"));
575        assert_eq!(body, "body\n");
576    }
577
578    #[test]
579    fn strip_frontmatter_leaves_url_with_colons_intact() {
580        // URLs are written bare (not via yaml_scalar) and only the first colon
581        // separates key from value, so the scheme colon must survive.
582        let raw = "---\nurl: https://forum.rcpch.tech/t/x/9\n---\n\nbody\n";
583        let (front, _) = strip_frontmatter(raw);
584        assert_eq!(
585            front.get("url").map(String::as_str),
586            Some("https://forum.rcpch.tech/t/x/9")
587        );
588    }
589
590    #[test]
591    fn strip_frontmatter_tolerates_leading_bom() {
592        let raw = "\u{feff}---\ntopic_id: 99\n---\n\nbody\n";
593        let (front, body) = strip_frontmatter(raw);
594        assert_eq!(front.get("topic_id").map(String::as_str), Some("99"));
595        assert_eq!(body, "body\n");
596    }
597
598    #[test]
599    fn current_utc_iso8601_has_expected_shape() {
600        let s = current_utc_iso8601();
601        assert_eq!(s.len(), 20, "got {s:?}");
602        assert!(s.ends_with('Z'));
603        assert_eq!(&s[4..5], "-");
604        assert_eq!(&s[10..11], "T");
605    }
606
607    #[test]
608    fn civil_from_days_matches_known_dates() {
609        // 1970-01-01 is day 0.
610        assert_eq!(civil_from_days(0), (1970, 1, 1));
611        // 2026-06-10 = 20614 days from epoch (well-known via cal / date).
612        assert_eq!(civil_from_days(20614), (2026, 6, 10));
613        // Leap-day check: 2024-02-29.
614        assert_eq!(civil_from_days(19782), (2024, 2, 29));
615    }
616}
617