ostool 0.15.0

A tool for operating system development
Documentation
use std::time::{Duration, Instant};

use anyhow::anyhow;
use colored::Colorize;
use regex::Regex;

pub(crate) const MATCH_DRAIN_DURATION: Duration = Duration::from_millis(500);
const MAX_MATCH_WINDOW_BYTES: usize = 2048;
const DEFAULT_FAIL_PATTERNS: &[&str] = &[r"(?i)\bpanic(?:ked)?\b", r"(?i)kernel panic"];
const MATCH_EXCERPT_CONTEXT_CHARS: usize = 120;
const MATCH_EXCERPT_MAX_CHARS: usize = 240;

#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum StreamMatchKind {
    Success,
    Fail,
}

impl StreamMatchKind {
    pub(crate) fn into_result(self, matched: &StreamMatch) -> anyhow::Result<()> {
        match self {
            StreamMatchKind::Success => Ok(()),
            StreamMatchKind::Fail => Err(anyhow!(
                "Fail pattern matched '{}': {}",
                matched.matched_regex,
                match_excerpt(&matched.matched_text, &matched.matched_regex)
            )),
        }
    }
}

#[derive(Debug, Clone)]
pub struct StreamMatch {
    pub kind: StreamMatchKind,
    pub matched_regex: String,
    pub matched_text: String,
    pub deadline: Instant,
}

pub(crate) fn compile_regexes(
    success_patterns: &[String],
    fail_patterns: &[String],
) -> anyhow::Result<(Vec<Regex>, Vec<Regex>)> {
    let success_regex = success_patterns
        .iter()
        .map(|p| Regex::new(p).map_err(|e| anyhow!("success regex error: {e}")))
        .collect::<Result<Vec<_>, _>>()?;

    let mut merged_fail_patterns = fail_patterns.to_vec();
    for pattern in DEFAULT_FAIL_PATTERNS {
        if !merged_fail_patterns
            .iter()
            .any(|existing| existing == pattern)
        {
            merged_fail_patterns.push((*pattern).to_string());
        }
    }

    let fail_regex = merged_fail_patterns
        .iter()
        .map(|p| Regex::new(p).map_err(|e| anyhow!("fail regex error: {e}")))
        .collect::<Result<Vec<_>, _>>()?;

    Ok((success_regex, fail_regex))
}

pub(crate) fn print_match_event(matched: &StreamMatch) {
    match matched.kind {
        StreamMatchKind::Success => println!(
            "{}",
            format!(
                "\n=== SUCCESS PATTERN MATCHED: {} ===",
                matched.matched_regex
            )
            .green()
        ),
        StreamMatchKind::Fail => println!(
            "{}",
            format!("\n=== FAIL PATTERN MATCHED: {}", matched.matched_regex).red()
        ),
    }
}

#[derive(Debug, Clone)]
enum StreamMatchState {
    Pending,
    Matched(StreamMatch),
}

pub struct ByteStreamMatcher {
    success_regex: Vec<Regex>,
    fail_regex: Vec<Regex>,
    match_buf: Vec<u8>,
    state: StreamMatchState,
}

impl ByteStreamMatcher {
    pub fn new(success_regex: Vec<Regex>, fail_regex: Vec<Regex>) -> Self {
        Self {
            success_regex,
            fail_regex,
            match_buf: Vec::with_capacity(MAX_MATCH_WINDOW_BYTES),
            state: StreamMatchState::Pending,
        }
    }

    pub fn observe_byte(&mut self, byte: u8) -> Option<StreamMatch> {
        self.match_buf.push(byte);
        if self.match_buf.len() > MAX_MATCH_WINDOW_BYTES {
            let overflow = self.match_buf.len() - MAX_MATCH_WINDOW_BYTES;
            self.match_buf.drain(..overflow);
        }

        match self.state {
            StreamMatchState::Pending => {
                let text = String::from_utf8_lossy(&self.match_buf);
                let text = strip_ansi_escape_sequences(&text);

                let matched = self
                    .fail_regex
                    .iter()
                    .find(|regex| regex.is_match(&text))
                    .map(|regex| StreamMatch {
                        kind: StreamMatchKind::Fail,
                        matched_regex: regex.as_str().to_string(),
                        matched_text: text.to_string(),
                        deadline: Instant::now() + MATCH_DRAIN_DURATION,
                    })
                    .or_else(|| {
                        self.success_regex
                            .iter()
                            .find(|regex| regex.is_match(&text))
                            .map(|regex| StreamMatch {
                                kind: StreamMatchKind::Success,
                                matched_regex: regex.as_str().to_string(),
                                matched_text: text.to_string(),
                                deadline: Instant::now() + MATCH_DRAIN_DURATION,
                            })
                    });

                if let Some(matched) = matched {
                    self.state = StreamMatchState::Matched(matched.clone());
                    Some(matched)
                } else {
                    None
                }
            }
            StreamMatchState::Matched(_) => None,
        }
    }

    pub fn matched(&self) -> Option<&StreamMatch> {
        match &self.state {
            StreamMatchState::Pending => None,
            StreamMatchState::Matched(matched) => Some(matched),
        }
    }

    pub fn should_stop(&self) -> bool {
        self.matched()
            .is_some_and(|matched| Instant::now() >= matched.deadline)
    }
}

fn strip_ansi_escape_sequences(input: &str) -> String {
    let bytes = input.as_bytes();
    let mut output = Vec::with_capacity(bytes.len());
    let mut index = 0;

    while index < bytes.len() {
        if bytes[index] == 0x1b
            && let Some(next) = bytes.get(index + 1)
            && *next == b'['
        {
            index += 2;
            while index < bytes.len() {
                let byte = bytes[index];
                index += 1;
                if (0x40..=0x7e).contains(&byte) {
                    break;
                }
            }
            continue;
        }

        output.push(bytes[index]);
        index += 1;
    }

    String::from_utf8_lossy(&output).into_owned()
}

fn match_excerpt(text: &str, matched_regex: &str) -> String {
    let compact = text.replace(['\r', '\n'], " ");
    let compact = compact.split_whitespace().collect::<Vec<_>>().join(" ");

    let Some(regex) = Regex::new(matched_regex).ok() else {
        return truncate_chars(&compact, MATCH_EXCERPT_MAX_CHARS);
    };

    let Some(found) = regex.find(&compact) else {
        return truncate_chars(&compact, MATCH_EXCERPT_MAX_CHARS);
    };

    let start = char_boundary_before(&compact, found.start(), MATCH_EXCERPT_CONTEXT_CHARS);
    let end = char_boundary_after(&compact, found.end(), MATCH_EXCERPT_CONTEXT_CHARS);
    let excerpt = compact[start..end].trim();
    let mut rendered = excerpt.to_string();

    if start > 0 {
        rendered.insert_str(0, "...");
    }
    if end < compact.len() {
        rendered.push_str("...");
    }

    truncate_chars(&rendered, MATCH_EXCERPT_MAX_CHARS)
}

fn char_boundary_before(text: &str, byte_index: usize, chars: usize) -> usize {
    text[..byte_index]
        .char_indices()
        .rev()
        .nth(chars)
        .map(|(index, _)| index)
        .unwrap_or(0)
}

fn char_boundary_after(text: &str, byte_index: usize, chars: usize) -> usize {
    text[byte_index..]
        .char_indices()
        .nth(chars)
        .map(|(offset, _)| byte_index + offset)
        .unwrap_or(text.len())
}

fn truncate_chars(text: &str, max_chars: usize) -> String {
    let mut iter = text.chars();
    let truncated = iter.by_ref().take(max_chars).collect::<String>();
    if iter.next().is_some() {
        format!("{truncated}...")
    } else {
        truncated
    }
}

#[cfg(test)]
mod tests {
    use super::{
        ByteStreamMatcher, StreamMatchKind, compile_regexes, match_excerpt,
        strip_ansi_escape_sequences,
    };
    use regex::Regex;

    #[test]
    fn strips_basic_csi_sequences() {
        assert_eq!(
            strip_ansi_escape_sequences("\u{1b}[31mpanicked at test\u{1b}[m"),
            "panicked at test"
        );
    }

    #[test]
    fn fail_matcher_ignores_ansi_sequences() {
        let mut matcher =
            ByteStreamMatcher::new(vec![], vec![Regex::new("(?i)\\bpanic(?:ked)?\\b").unwrap()]);

        let input = "\u{1b}[31mpanicked at os/arceos/foo.rs:1:1\n";
        let mut matched = None;
        for byte in input.bytes() {
            matched = matcher.observe_byte(byte).or(matched);
        }

        let matched = matched.expect("expected panic match");
        assert_eq!(matched.kind, StreamMatchKind::Fail);
        assert!(matched.matched_text.to_ascii_lowercase().contains("panic"));
    }

    #[test]
    fn matcher_detects_fail_pattern_across_multiple_lines() {
        let mut matcher = ByteStreamMatcher::new(
            vec![],
            vec![Regex::new("Failed to load VM images").unwrap()],
        );

        let input = "line one\nline two\npanicked at foo\nFailed to load VM images: AxErrorKind::NotFound\n";
        let mut matched = None;
        for byte in input.bytes() {
            matched = matcher.observe_byte(byte).or(matched);
        }

        let matched = matched.expect("expected match");
        assert_eq!(matched.kind, StreamMatchKind::Fail);
        assert!(matched.matched_text.contains("Failed to load VM images"));
    }

    #[test]
    fn compile_regexes_appends_builtin_panic_patterns() {
        let (_success, fail) = compile_regexes(&[], &[]).unwrap();

        assert!(
            fail.iter()
                .any(|regex| regex.as_str() == r"(?i)\bpanic(?:ked)?\b")
        );
        assert!(
            fail.iter()
                .any(|regex| regex.as_str() == r"(?i)kernel panic")
        );
    }

    #[test]
    fn match_excerpt_returns_local_context_only() {
        let text = "prefix text one two three panic happened in vm loader because image missing suffix text four five";
        let excerpt = match_excerpt(text, r"(?i)\bpanic\b");

        assert!(excerpt.contains("panic"));
        assert!(!excerpt.is_empty());
        assert!(excerpt.len() <= text.len() + 3);
    }
}