formal-ai 0.200.0

Formal symbolic AI implementation with OpenAI-compatible APIs
use std::collections::BTreeSet;

#[derive(Debug, Clone, PartialEq, Eq)]
pub(super) struct WordSpan {
    pub word: String,
    pub start: usize,
    pub end: usize,
}

pub(super) fn normalized_word_spans(text: &str) -> Vec<WordSpan> {
    let mut spans = Vec::new();
    let mut current_start = None;
    let mut current = String::new();
    let mut current_end = 0usize;

    for (index, character) in text.char_indices() {
        if character.is_alphanumeric() {
            if current_start.is_none() {
                current_start = Some(index);
            }
            current.extend(character.to_lowercase());
            current_end = index + character.len_utf8();
        } else if let Some(start) = current_start.take() {
            spans.push(WordSpan {
                word: std::mem::take(&mut current),
                start,
                end: current_end,
            });
        }
    }

    if let Some(start) = current_start {
        spans.push(WordSpan {
            word: current,
            start,
            end: current_end,
        });
    }
    spans
}

pub(super) fn extract_email_addresses(input: &str) -> Vec<String> {
    input
        .split_whitespace()
        .map(clean_email_candidate)
        .filter(|candidate| looks_like_email(candidate))
        .map(ToOwned::to_owned)
        .collect()
}

fn clean_email_candidate(candidate: &str) -> &str {
    candidate
        .trim_matches(|character: char| {
            !(character.is_ascii_alphanumeric() || matches!(character, '@' | '.' | '_' | '-' | '+'))
        })
        .trim_matches('.')
}

fn looks_like_email(candidate: &str) -> bool {
    if candidate
        .chars()
        .filter(|character| *character == '@')
        .count()
        != 1
    {
        return false;
    }
    let Some((local, domain)) = candidate.split_once('@') else {
        return false;
    };
    !local.is_empty()
        && domain.contains('.')
        && domain
            .split('.')
            .all(|segment| !segment.is_empty() && segment.chars().all(is_email_domain_char))
}

const fn is_email_domain_char(character: char) -> bool {
    character.is_ascii_alphanumeric() || character == '-'
}

pub(super) fn extract_urls(input: &str) -> Vec<String> {
    input
        .split_whitespace()
        .map(clean_url_candidate)
        .filter(|candidate| {
            candidate.starts_with("http://")
                || candidate.starts_with("https://")
                || candidate.starts_with("www.")
        })
        .map(ToOwned::to_owned)
        .collect()
}

fn clean_url_candidate(candidate: &str) -> &str {
    candidate
        .trim_matches(|character: char| {
            matches!(
                character,
                '"' | '\'' | '`' | '<' | '>' | '(' | ')' | '[' | ']' | '{' | '}' | ',' | ';'
            )
        })
        .trim_end_matches(['.', '!', '?', ':'])
}

pub(super) fn extract_numbers(input: &str) -> Vec<String> {
    let mut numbers = Vec::new();
    let chars = input.char_indices().collect::<Vec<_>>();
    let mut index = 0usize;
    while index < chars.len() {
        let (start, character) = chars[index];
        let mut number_start = start;
        if matches!(character, '-' | '+')
            && chars
                .get(index + 1)
                .is_some_and(|(_, ch)| ch.is_ascii_digit())
            && !previous_char_is_alphanumeric(input, start)
        {
            index += 1;
        } else if character.is_ascii_digit() && !previous_char_is_alphanumeric(input, start) {
            number_start = start;
        } else {
            index += 1;
            continue;
        }

        while chars.get(index).is_some_and(|(_, ch)| ch.is_ascii_digit()) {
            index += 1;
        }
        if chars.get(index).is_some_and(|(_, ch)| *ch == '.')
            && chars
                .get(index + 1)
                .is_some_and(|(_, ch)| ch.is_ascii_digit())
        {
            index += 1;
            while chars.get(index).is_some_and(|(_, ch)| ch.is_ascii_digit()) {
                index += 1;
            }
        }
        let end = chars
            .get(index)
            .map_or(input.len(), |(char_start, _)| *char_start);
        if chars.get(index).is_some_and(|(_, ch)| ch.is_alphanumeric()) {
            continue;
        }
        numbers.push(input[number_start..end].to_owned());
    }
    numbers
}

fn previous_char_is_alphanumeric(input: &str, byte_index: usize) -> bool {
    input[..byte_index]
        .chars()
        .next_back()
        .is_some_and(char::is_alphanumeric)
}

pub(super) fn count_unique_words(input: &str) -> usize {
    input
        .split_whitespace()
        .map(clean_word)
        .filter(|word| !word.is_empty())
        .collect::<BTreeSet<_>>()
        .len()
}

pub(super) fn count_words(input: &str) -> usize {
    input
        .split_whitespace()
        .map(clean_word)
        .filter(|word| !word.is_empty())
        .count()
}

fn clean_word(word: &str) -> String {
    word.trim_matches(|character: char| !character.is_alphanumeric())
        .to_owned()
}

pub(super) fn title_case(input: &str) -> String {
    case_words(input)
        .iter()
        .map(|word| capitalize_word(word))
        .collect::<Vec<_>>()
        .join(" ")
}

pub(super) fn sentence_case(input: &str) -> String {
    let mut out = String::new();
    let mut capitalized = false;
    for character in input.chars().flat_map(char::to_lowercase) {
        if !capitalized && character.is_alphanumeric() {
            out.extend(character.to_uppercase());
            capitalized = true;
        } else {
            out.push(character);
        }
    }
    out
}

pub(super) fn delimiter_case(input: &str, delimiter: &str) -> String {
    case_words(input).join(delimiter)
}

pub(super) fn camel_case(input: &str) -> String {
    let mut words = case_words(input).into_iter();
    let Some(first) = words.next() else {
        return String::new();
    };

    let mut out = first;
    for word in words {
        out.push_str(&capitalize_word(&word));
    }
    out
}

pub(super) fn pascal_case(input: &str) -> String {
    case_words(input)
        .iter()
        .map(|word| capitalize_word(word))
        .collect::<String>()
}

fn case_words(input: &str) -> Vec<String> {
    normalized_word_spans(input)
        .into_iter()
        .map(|span| span.word)
        .collect()
}

fn capitalize_word(word: &str) -> String {
    let mut chars = word.chars();
    let Some(first) = chars.next() else {
        return String::new();
    };

    let mut out = first.to_uppercase().collect::<String>();
    out.extend(chars.flat_map(char::to_lowercase));
    out
}

pub(super) fn remove_punctuation(input: &str) -> String {
    input
        .chars()
        .filter(|character| character.is_alphanumeric() || character.is_whitespace())
        .collect()
}

pub(super) fn sort_words(input: &str) -> String {
    let mut words = input.split_whitespace().collect::<Vec<_>>();
    words.sort_unstable();
    words.join(" ")
}

pub(super) fn strip_empty_lines(input: &str) -> Vec<String> {
    input
        .lines()
        .filter(|line| !line.trim().is_empty())
        .map(str::to_owned)
        .collect()
}

pub(super) fn join_lines(input: &str) -> String {
    input
        .lines()
        .map(str::trim)
        .filter(|line| !line.is_empty())
        .collect::<Vec<_>>()
        .join(" ")
}

pub(super) fn number_lines(input: &str) -> Vec<String> {
    input
        .lines()
        .enumerate()
        .map(|(index, line)| format!("{}. {line}", index + 1))
        .collect()
}

pub(super) fn reverse_lines(input: &str) -> String {
    input.lines().rev().collect::<Vec<_>>().join("\n")
}

pub(super) fn comment_lines(input: &str) -> String {
    input
        .lines()
        .map(|line| format!("// {line}"))
        .collect::<Vec<_>>()
        .join("\n")
}

pub(super) fn uncomment_lines(input: &str) -> String {
    input
        .lines()
        .map(uncomment_line)
        .collect::<Vec<_>>()
        .join("\n")
}

fn uncomment_line(line: &str) -> String {
    let indent_len = line.len() - line.trim_start().len();
    let (indent, rest) = line.split_at(indent_len);
    for prefix in ["// ", "//", "# ", "#"] {
        if let Some(stripped) = rest.strip_prefix(prefix) {
            return format!("{indent}{stripped}");
        }
    }
    line.to_owned()
}

pub(super) fn outdent_line(line: &str) -> &str {
    line.strip_prefix("    ")
        .or_else(|| line.strip_prefix('\t'))
        .unwrap_or(line)
}

pub(super) fn deduplicate_lines(input: &str) -> Vec<String> {
    let mut seen = BTreeSet::new();
    let mut lines = Vec::new();
    for line in input.lines() {
        if seen.insert(line.to_owned()) {
            lines.push(line.to_owned());
        }
    }
    lines
}