Skip to main content

mkit_cli/commands/
log.rs

1//! `mkit log [<rev>] [<A>..<B> | <A>...<B>]` — walk commit history.
2//!
3//! With no argument the walk starts at `HEAD`. A single `<rev>` starts there
4//! instead; a range `A..B` shows commits reachable from `B` but not `A`
5//! (empty side = `HEAD`, so `A..` is `A..HEAD` and `..B` is `HEAD..B`).
6//! An `A...B` symmetric range shows commits reachable from `A` or `B` but not
7//! their common ancestors (the merge base). Commits are ordered
8//! reverse-chronologically with a topological tie-break (a parent never
9//! precedes a child) — git's `--date-order`. This is identical to git's
10//! default for linear history and monotonic-timestamp merges; it can differ
11//! only on merge DAGs with non-monotonic (skewed or imported) timestamps.
12//!
13//! Output modes:
14//!
15//! - default — human-oriented multi-line per commit on stdout. The
16//!   full commit message body is printed indented (four spaces) and the
17//!   timestamp is rendered as a stable UTC date
18//!   (`YYYY-MM-DD HH:MM:SS +0000`), not the raw integer.
19//! - `--oneline` — `<abbrev-hex> <title>` per commit on stdout. The
20//!   abbreviation length defaults to 7 (`DEFAULT_ABBREV`) and is
21//!   overridable with `--abbrev[=N]`.
22//! - `--format=json` — JSONL, one self-contained JSON object per
23//!   commit. Suitable for piping into `jq`.
24//!
25//! `--graph` is silently accepted as a no-op pending Phase 10.
26//!
27//! Argument parsing is delegated to clap-derive via
28//! [`crate::clap_shim::parse`]; clap emits standard diagnostics on
29//! errors and the shim maps them to mkit sysexits (`USAGE` for
30//! unknown flags, `DATAERR` for malformed `-n` values, etc.).
31
32use std::cmp::Ordering;
33use std::collections::{BinaryHeap, HashMap, HashSet};
34use std::io::Write;
35
36use clap::{Parser, ValueEnum};
37use mkit_core::Hash;
38use mkit_core::object::{Commit, Object};
39use mkit_core::ops::graph::collect_ancestor_set;
40use mkit_core::ops::merge::find_merge_base;
41use mkit_core::refs;
42use mkit_core::store::ObjectStore;
43
44use super::revspec;
45use crate::clap_shim;
46use crate::exit;
47use crate::format;
48use crate::signal;
49
50/// Default abbreviated-hash length, matching git's nominal `core.abbrev`
51/// starting point. Overridable with `--abbrev[=N]`.
52const DEFAULT_ABBREV: usize = 7;
53
54#[derive(Debug, Clone, Copy, PartialEq, Eq, ValueEnum)]
55enum Format {
56    Default,
57    Oneline,
58    Json,
59}
60
61#[derive(Debug, Parser)]
62#[command(
63    name = "mkit log",
64    about = "Show commit history.",
65    disable_help_flag = false,
66    disable_version_flag = true
67)]
68struct LogOpts {
69    /// Compact one-line-per-commit output. Equivalent to
70    /// `--format=oneline`; if both are given, `--format` wins.
71    #[arg(long)]
72    oneline: bool,
73
74    /// Output format.
75    #[arg(long, value_enum)]
76    format: Option<Format>,
77
78    /// Cap the number of commits printed.
79    #[arg(short = 'n')]
80    limit: Option<usize>,
81
82    /// Abbreviate commit hashes in the default format (implied by
83    /// `--oneline`).
84    #[arg(long = "abbrev-commit")]
85    abbrev_commit: bool,
86
87    /// Minimum length of abbreviated hashes. Bare `--abbrev` uses the
88    /// default (7); `--abbrev=N` sets the length.
89    #[arg(long, value_name = "N", num_args = 0..=1, default_missing_value = "7")]
90    abbrev: Option<usize>,
91
92    /// Render an ASCII graph. Accepted for compatibility; Phase-10
93    /// follow-up.
94    #[arg(long)]
95    graph: bool,
96
97    /// Optional starting revision (`<rev>`), range (`A..B`, `A..`, `..B`), or
98    /// symmetric range (`A...B`). Defaults to `HEAD`; an empty range side
99    /// means `HEAD`.
100    start: Option<String>,
101}
102
103impl LogOpts {
104    /// Resolve `(oneline, format)` into the single `Format` the
105    /// renderer consumes. Explicit `--format` wins over `--oneline`.
106    fn render_format(&self) -> Format {
107        match self.format {
108            Some(f) => f,
109            None if self.oneline => Format::Oneline,
110            None => Format::Default,
111        }
112    }
113
114    /// Abbreviation length for commit ids, or `None` to print the full
115    /// 64-hex hash. `--abbrev=N` sets the length (and implies
116    /// abbreviation); `--abbrev-commit` (or the `Oneline` format)
117    /// abbreviates at `DEFAULT_ABBREV`. `short_hash` clamps the length
118    /// to `[4, 64]`, so out-of-range `N` is harmless.
119    fn abbrev_len(&self) -> Option<usize> {
120        if let Some(n) = self.abbrev {
121            return Some(n);
122        }
123        if self.abbrev_commit || self.render_format() == Format::Oneline {
124            return Some(DEFAULT_ABBREV);
125        }
126        None
127    }
128}
129
130#[must_use]
131pub fn run(args: &[String]) -> u8 {
132    let opts = match clap_shim::parse::<LogOpts>("mkit log", args) {
133        Ok(o) => o,
134        Err(code) => return code,
135    };
136    let fmt = opts.render_format();
137    let abbrev = opts.abbrev_len();
138    let _ = opts.graph; // accepted, currently no-op.
139
140    let cwd = match std::env::current_dir() {
141        Ok(p) => p,
142        Err(e) => return emit_err(&format!("cwd: {e}"), exit::NOINPUT),
143    };
144    let mkit_dir = cwd.join(mkit_core::MKIT_DIR);
145    let store = match ObjectStore::open(&cwd) {
146        Ok(s) => s,
147        Err(e) => return emit_err(&format!("not a mkit repo: {e}"), exit::GENERAL_ERROR),
148    };
149    // Resolve the revision selection: default HEAD, a single `<rev>`, a
150    // `A..B` range, or an `A...B` symmetric range (empty side = HEAD).
151    let selection = parse_rev_arg(opts.start.as_deref());
152    let (tips, excluded) = match resolve_selection(&store, &mkit_dir, &selection) {
153        Ok(Some(v)) => v,
154        Ok(None) => {
155            // No HEAD yet and no explicit revision → nothing to show.
156            if opts.start.is_none() && matches!(fmt, Format::Default | Format::Oneline) {
157                let mut stderr = std::io::stderr().lock();
158                let _ = writeln!(stderr, "no commits yet");
159            }
160            return exit::OK;
161        }
162        Err(msg) => return emit_err(&msg, exit::DATAERR),
163    };
164
165    let ordered = match ordered_commits(&store, &tips, &excluded) {
166        Ok(v) => v,
167        Err(code) => return code,
168    };
169
170    let mut stdout = std::io::stdout().lock();
171    let limit = opts.limit.unwrap_or(usize::MAX);
172    for (hash, c) in ordered.iter().take(limit) {
173        if signal::is_shutdown() {
174            return exit::TEMPFAIL;
175        }
176        render_commit(&mut stdout, fmt, abbrev, hash, c);
177    }
178    exit::OK
179}
180
181/// The include tips to walk plus the excluded ancestor set, resolved from a
182/// [`RevSelection`].
183type WalkSet = (Vec<Hash>, HashSet<Hash>);
184
185/// A parsed `log` revision selection.
186enum RevSelection {
187    /// No argument → walk `HEAD`.
188    Default,
189    /// A single `<rev>` → walk its history.
190    Single(String),
191    /// `A..B` → reachable from `B` but not `A`.
192    Range { exclude: String, include: String },
193    /// `A...B` → reachable from `A` or `B` but not their common ancestors.
194    Symmetric { a: String, b: String },
195}
196
197/// Parse the optional `<rev>` / `A..B` / `A...B` positional. An empty range
198/// side resolves to `HEAD`.
199fn parse_rev_arg(arg: Option<&str>) -> RevSelection {
200    let Some(s) = arg else {
201        return RevSelection::Default;
202    };
203    let to_spec = |side: &str| {
204        if side.is_empty() {
205            "HEAD".to_string()
206        } else {
207            side.to_string()
208        }
209    };
210    // Check `...` before `..` since the former contains the latter.
211    if let Some((a, b)) = s.split_once("...") {
212        return RevSelection::Symmetric {
213            a: to_spec(a),
214            b: to_spec(b),
215        };
216    }
217    if let Some((a, b)) = s.split_once("..") {
218        return RevSelection::Range {
219            exclude: to_spec(a),
220            include: to_spec(b),
221        };
222    }
223    RevSelection::Single(s.to_string())
224}
225
226/// Resolve a [`RevSelection`] into the set of include tips to walk and the
227/// excluded ancestor set. `Ok(None)` means there is nothing to show (e.g. a
228/// HEAD-less repo with no explicit revision).
229fn resolve_selection(
230    store: &ObjectStore,
231    mkit_dir: &std::path::Path,
232    sel: &RevSelection,
233) -> Result<Option<WalkSet>, String> {
234    let mut excluded: HashSet<Hash> = HashSet::new();
235    let tips: Vec<Hash> = match sel {
236        RevSelection::Default => match resolve_tip(store, mkit_dir, None)? {
237            Some(h) => vec![h],
238            None => return Ok(None),
239        },
240        RevSelection::Single(spec) => match resolve_tip(store, mkit_dir, Some(spec))? {
241            Some(h) => vec![h],
242            None => return Ok(None),
243        },
244        RevSelection::Range { exclude, include } => {
245            let Some(inc) = resolve_tip(store, mkit_dir, Some(include))? else {
246                return Ok(None);
247            };
248            if let Some(a) = resolve_tip(store, mkit_dir, Some(exclude))? {
249                collect_ancestor_set(store, a, &mut excluded)
250                    .map_err(|e| format!("walk range base: {e}"))?;
251            }
252            vec![inc]
253        }
254        RevSelection::Symmetric { a, b } => {
255            let ra = resolve_tip(store, mkit_dir, Some(a))?;
256            let rb = resolve_tip(store, mkit_dir, Some(b))?;
257            // Exclude the common ancestors (ancestors of the merge base).
258            if let (Some(x), Some(y)) = (ra, rb)
259                && let Some(mb) =
260                    find_merge_base(store, x, y).map_err(|e| format!("merge base: {e}"))?
261            {
262                collect_ancestor_set(store, mb, &mut excluded)
263                    .map_err(|e| format!("walk merge base: {e}"))?;
264            }
265            let tips: Vec<Hash> = ra.into_iter().chain(rb).collect();
266            if tips.is_empty() {
267                return Ok(None);
268            }
269            tips
270        }
271    };
272    Ok(Some((tips, excluded)))
273}
274
275/// Resolve a tip spec to a commit hash. `None` spec = HEAD (which may be
276/// absent → `Ok(None)`). An explicit spec that fails to resolve is an error.
277/// The resolved hash is peeled through annotated/signed tag objects so
278/// `log <tag>` / `<tag>..HEAD` walk the tagged commit, like git.
279fn resolve_tip(
280    store: &ObjectStore,
281    mkit_dir: &std::path::Path,
282    spec: Option<&str>,
283) -> Result<Option<Hash>, String> {
284    let raw = match spec {
285        None | Some("HEAD") => refs::resolve_head(mkit_dir).ok().flatten(),
286        Some(s) => Some(
287            revspec::resolve_revision(store, mkit_dir, s)
288                .map_err(|e| format!("bad revision '{s}': {e}"))?,
289        ),
290    };
291    Ok(raw.map(|h| peel_tags(store, h)))
292}
293
294/// Maximum tag-of-tag chain length to follow when peeling (cycle guard).
295const MAX_TAG_DEPTH: usize = 16;
296
297/// Follow `Object::Tag` targets to the first non-tag object, so an
298/// annotated/signed tag resolves to the commit it points at. A non-tag (or
299/// unreadable) object stops the peel and is returned as-is.
300fn peel_tags(store: &ObjectStore, mut h: Hash) -> Hash {
301    for _ in 0..MAX_TAG_DEPTH {
302        match store.read_object(&h) {
303            Ok(Object::Tag(t)) => h = t.target,
304            _ => break,
305        }
306    }
307    h
308}
309
310/// A commit ready to emit, ordered by timestamp (newest first) with the hash
311/// as a deterministic tiebreak.
312struct HeapItem {
313    timestamp: u64,
314    hash: Hash,
315}
316
317impl Ord for HeapItem {
318    fn cmp(&self, other: &Self) -> Ordering {
319        self.timestamp
320            .cmp(&other.timestamp)
321            .then_with(|| self.hash.cmp(&other.hash))
322    }
323}
324impl PartialOrd for HeapItem {
325    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
326        Some(self.cmp(other))
327    }
328}
329impl PartialEq for HeapItem {
330    fn eq(&self, other: &Self) -> bool {
331        self.cmp(other) == Ordering::Equal
332    }
333}
334impl Eq for HeapItem {}
335
336/// Hard cap on commits collected for one `log` invocation.
337const MAX_LOG_COMMITS: usize = 1_000_000;
338
339/// Collect the commits reachable from any of `tips` (minus `excluded`) in
340/// git's `--date-order`: reverse-chronological by commit timestamp, with a
341/// parent never shown before any of its children (topological tie-break). Uses
342/// an in-degree + max-heap revwalk so equal-timestamp linear history keeps its
343/// natural child→parent order. Matches git's *default* order for linear and
344/// monotonic-timestamp history.
345fn ordered_commits(
346    store: &ObjectStore,
347    tips: &[Hash],
348    excluded: &HashSet<Hash>,
349) -> Result<Vec<(Hash, Commit)>, u8> {
350    // 1. Collect the candidate commit set (DFS over parents, skip excluded).
351    let mut commits: HashMap<Hash, Commit> = HashMap::new();
352    let mut stack: Vec<Hash> = tips.to_vec();
353    while let Some(h) = stack.pop() {
354        if excluded.contains(&h) || commits.contains_key(&h) {
355            continue;
356        }
357        if commits.len() >= MAX_LOG_COMMITS {
358            break;
359        }
360        let c = match store.read_object(&h) {
361            Ok(Object::Commit(c)) => c,
362            Ok(_) => {
363                return Err(emit_err(
364                    &format!("not a commit: {}", format::hex_hash(&h)),
365                    exit::DATAERR,
366                ));
367            }
368            Err(e) => {
369                return Err(emit_err(
370                    &format!("read {}: {e}", format::hex_hash(&h)),
371                    exit::DATAERR,
372                ));
373            }
374        };
375        for p in &c.parents {
376            if !excluded.contains(p) {
377                stack.push(*p);
378            }
379        }
380        commits.insert(h, c);
381    }
382
383    // 2. In-degree = number of children within the candidate set.
384    let mut indeg: HashMap<Hash, usize> = commits.keys().map(|h| (*h, 0usize)).collect();
385    for c in commits.values() {
386        for p in &c.parents {
387            if let Some(d) = indeg.get_mut(p) {
388                *d += 1;
389            }
390        }
391    }
392
393    // 3. Max-heap (by timestamp) over commits whose children are all emitted.
394    let mut heap: BinaryHeap<HeapItem> = BinaryHeap::new();
395    for (h, c) in &commits {
396        if indeg[h] == 0 {
397            heap.push(HeapItem {
398                timestamp: c.timestamp,
399                hash: *h,
400            });
401        }
402    }
403    let mut out: Vec<(Hash, Commit)> = Vec::with_capacity(commits.len());
404    while let Some(item) = heap.pop() {
405        let c = commits[&item.hash].clone();
406        for p in &c.parents {
407            if let Some(d) = indeg.get_mut(p) {
408                *d -= 1;
409                if *d == 0 {
410                    heap.push(HeapItem {
411                        timestamp: commits[p].timestamp,
412                        hash: *p,
413                    });
414                }
415            }
416        }
417        out.push((item.hash, c));
418    }
419    Ok(out)
420}
421
422/// Render one commit in the selected format.
423fn render_commit(
424    out: &mut impl Write,
425    fmt: Format,
426    abbrev: Option<usize>,
427    hash: &Hash,
428    c: &Commit,
429) {
430    let full_message: String = String::from_utf8_lossy(&c.message).into_owned();
431    let title = full_message.lines().next().unwrap_or("");
432    match fmt {
433        Format::Oneline => {
434            let id = format::short_hash(hash, abbrev.unwrap_or(DEFAULT_ABBREV));
435            let _ = writeln!(out, "{id} {title}");
436        }
437        Format::Default => {
438            let id = match abbrev {
439                Some(n) => format::short_hash(hash, n),
440                None => format::hex_hash(hash),
441            };
442            let _ = writeln!(out, "commit {id}");
443            let _ = writeln!(out, "Author: {}", format::short_identity(&c.author));
444            let _ = writeln!(out, "Date:   {}", format::human_date_utc(c.timestamp));
445            let _ = writeln!(out);
446            // Full message body, indented like git. Each line is prefixed
447            // with four spaces; blank lines stay blank.
448            for line in full_message.lines() {
449                if line.is_empty() {
450                    let _ = writeln!(out);
451                } else {
452                    let _ = writeln!(out, "    {line}");
453                }
454            }
455            let _ = writeln!(out);
456        }
457        Format::Json => {
458            emit_json_entry(out, hash, c, title, &full_message);
459        }
460    }
461}
462
463/// Emit one JSONL record for a commit. Schema:
464///
465/// ```json
466/// {
467///   "hash": "<64-hex>",
468///   "parents": ["<64-hex>", ...],
469///   "tree": "<64-hex>",
470///   "author": "<identity-string>",
471///   "timestamp": <unix-seconds>,
472///   "title": "<first line of message>",
473///   "message": "<full message, JSON-escaped>"
474/// }
475/// ```
476///
477/// Keys are written in a deterministic order so the output is
478/// reproducible and easy to snapshot-test.
479fn emit_json_entry(
480    out: &mut impl Write,
481    hash: &mkit_core::Hash,
482    c: &mkit_core::object::Commit,
483    title: &str,
484    full_message: &str,
485) {
486    let _ = out.write_all(b"{");
487    let _ = write!(out, "\"hash\":\"{}\"", format::hex_hash(hash));
488    let _ = out.write_all(b",\"parents\":[");
489    for (i, p) in c.parents.iter().enumerate() {
490        if i > 0 {
491            let _ = out.write_all(b",");
492        }
493        let _ = write!(out, "\"{}\"", format::hex_hash(p));
494    }
495    let _ = out.write_all(b"]");
496    let _ = write!(out, ",\"tree\":\"{}\"", format::hex_hash(&c.tree_hash));
497    let _ = write!(
498        out,
499        ",\"author\":\"{}\"",
500        format::json_escape(&format::full_identity(&c.author))
501    );
502    let _ = write!(out, ",\"timestamp\":{}", c.timestamp);
503    let _ = write!(out, ",\"title\":\"{}\"", format::json_escape(title));
504    let _ = write!(
505        out,
506        ",\"message\":\"{}\"",
507        format::json_escape(full_message)
508    );
509    let _ = out.write_all(b"}\n");
510}
511
512fn emit_err(msg: &str, code: u8) -> u8 {
513    let mut stderr = std::io::stderr().lock();
514    let _ = writeln!(stderr, "error: {msg}");
515    code
516}
517
518#[cfg(test)]
519mod tests {
520    use super::*;
521
522    #[test]
523    fn render_format_explicit_format_wins_over_oneline() {
524        let opts = LogOpts {
525            oneline: true,
526            format: Some(Format::Default),
527            limit: None,
528            abbrev_commit: false,
529            abbrev: None,
530            graph: false,
531            start: None,
532        };
533        assert_eq!(opts.render_format(), Format::Default);
534    }
535
536    #[test]
537    fn render_format_oneline_alone_resolves_to_oneline() {
538        let opts = LogOpts {
539            oneline: true,
540            format: None,
541            limit: None,
542            abbrev_commit: false,
543            abbrev: None,
544            graph: false,
545            start: None,
546        };
547        assert_eq!(opts.render_format(), Format::Oneline);
548    }
549
550    #[test]
551    fn render_format_default_when_no_flags() {
552        let opts = LogOpts {
553            oneline: false,
554            format: None,
555            limit: None,
556            abbrev_commit: false,
557            abbrev: None,
558            graph: false,
559            start: None,
560        };
561        assert_eq!(opts.render_format(), Format::Default);
562    }
563
564    #[test]
565    fn render_format_json_via_format_flag() {
566        let opts = LogOpts {
567            oneline: false,
568            format: Some(Format::Json),
569            limit: None,
570            abbrev_commit: false,
571            abbrev: None,
572            graph: false,
573            start: None,
574        };
575        assert_eq!(opts.render_format(), Format::Json);
576    }
577
578    fn opts_for_abbrev(oneline: bool, abbrev_commit: bool, abbrev: Option<usize>) -> LogOpts {
579        LogOpts {
580            oneline,
581            format: None,
582            limit: None,
583            abbrev_commit,
584            abbrev,
585            graph: false,
586            start: None,
587        }
588    }
589
590    #[test]
591    fn abbrev_len_off_by_default() {
592        assert_eq!(opts_for_abbrev(false, false, None).abbrev_len(), None);
593    }
594
595    #[test]
596    fn abbrev_len_default_for_oneline_and_abbrev_commit() {
597        assert_eq!(
598            opts_for_abbrev(true, false, None).abbrev_len(),
599            Some(DEFAULT_ABBREV)
600        );
601        assert_eq!(
602            opts_for_abbrev(false, true, None).abbrev_len(),
603            Some(DEFAULT_ABBREV)
604        );
605    }
606
607    #[test]
608    fn abbrev_len_explicit_value_wins() {
609        assert_eq!(
610            opts_for_abbrev(true, false, Some(12)).abbrev_len(),
611            Some(12)
612        );
613    }
614}