use regex::Regex;
use std::sync::LazyLock;
static ABBREVIATION_PATTERN: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^\*\[([^\]]+)\]:\s*(.*)$").unwrap());
#[derive(Debug, Clone, PartialEq)]
pub struct Abbreviation {
pub abbr: String,
pub definition: String,
pub line: usize,
}
#[inline]
pub fn is_abbreviation_definition(line: &str) -> bool {
if !line.trim_start().starts_with("*[") {
return false;
}
ABBREVIATION_PATTERN.is_match(line)
}
#[inline]
pub fn might_be_abbreviation(line: &str) -> bool {
let trimmed = line.trim_start();
trimmed.starts_with("*[") && trimmed.contains("]:")
}
pub fn parse_abbreviation(line: &str, line_num: usize) -> Option<Abbreviation> {
if let Some(caps) = ABBREVIATION_PATTERN.captures(line) {
let abbr = caps.get(1)?.as_str().to_string();
let definition = caps.get(2).map(|m| m.as_str().to_string()).unwrap_or_default();
Some(Abbreviation {
abbr,
definition,
line: line_num,
})
} else {
None
}
}
pub fn extract_abbreviations(content: &str) -> Vec<Abbreviation> {
let mut abbreviations = Vec::new();
for (line_idx, line) in content.lines().enumerate() {
if let Some(abbr) = parse_abbreviation(line, line_idx + 1) {
abbreviations.push(abbr);
}
}
abbreviations
}
pub fn is_in_abbreviation_definition(line: &str, position: usize) -> bool {
if is_abbreviation_definition(line) {
return position < line.len();
}
false
}
pub fn get_abbreviation_terms(content: &str) -> Vec<String> {
extract_abbreviations(content).into_iter().map(|a| a.abbr).collect()
}
pub fn is_defined_abbreviation(content: &str, word: &str) -> bool {
for line in content.lines() {
if let Some(abbr) = parse_abbreviation(line, 0)
&& abbr.abbr == word
{
return true;
}
}
false
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_is_abbreviation_definition() {
assert!(is_abbreviation_definition("*[HTML]: Hypertext Markup Language"));
assert!(is_abbreviation_definition("*[CSS]: Cascading Style Sheets"));
assert!(is_abbreviation_definition("*[W3C]: World Wide Web Consortium"));
assert!(is_abbreviation_definition("*[CSS3]: CSS Level 3"));
assert!(is_abbreviation_definition("*[abbr]: definition"));
assert!(is_abbreviation_definition("*[HTML]:"));
assert!(is_abbreviation_definition("*[HTML]: "));
assert!(!is_abbreviation_definition("# Heading"));
assert!(!is_abbreviation_definition("Regular text"));
assert!(!is_abbreviation_definition("[HTML]: Not an abbr"));
assert!(!is_abbreviation_definition("*HTML: Not an abbr"));
assert!(!is_abbreviation_definition("*[HTML] Not an abbr"));
}
#[test]
fn test_parse_abbreviation() {
let abbr = parse_abbreviation("*[HTML]: Hypertext Markup Language", 1);
assert!(abbr.is_some());
let abbr = abbr.unwrap();
assert_eq!(abbr.abbr, "HTML");
assert_eq!(abbr.definition, "Hypertext Markup Language");
assert_eq!(abbr.line, 1);
let abbr = parse_abbreviation("*[CSS3]: CSS Level 3", 5);
assert!(abbr.is_some());
let abbr = abbr.unwrap();
assert_eq!(abbr.abbr, "CSS3");
assert_eq!(abbr.definition, "CSS Level 3");
assert_eq!(abbr.line, 5);
let abbr = parse_abbreviation("Not an abbreviation", 1);
assert!(abbr.is_none());
}
#[test]
fn test_extract_abbreviations() {
let content = r#"# Document
The HTML specification is maintained by the W3C.
CSS is used for styling.
*[HTML]: Hypertext Markup Language
*[W3C]: World Wide Web Consortium
*[CSS]: Cascading Style Sheets
"#;
let abbreviations = extract_abbreviations(content);
assert_eq!(abbreviations.len(), 3);
assert_eq!(abbreviations[0].abbr, "HTML");
assert_eq!(abbreviations[0].definition, "Hypertext Markup Language");
assert_eq!(abbreviations[1].abbr, "W3C");
assert_eq!(abbreviations[1].definition, "World Wide Web Consortium");
assert_eq!(abbreviations[2].abbr, "CSS");
assert_eq!(abbreviations[2].definition, "Cascading Style Sheets");
}
#[test]
fn test_is_defined_abbreviation() {
let content = r#"Some text.
*[HTML]: Hypertext Markup Language
*[CSS]: Cascading Style Sheets
"#;
assert!(is_defined_abbreviation(content, "HTML"));
assert!(is_defined_abbreviation(content, "CSS"));
assert!(!is_defined_abbreviation(content, "W3C"));
assert!(!is_defined_abbreviation(content, "html")); }
#[test]
fn test_get_abbreviation_terms() {
let content = r#"Text here.
*[HTML]: Hypertext Markup Language
*[CSS]: Cascading Style Sheets
*[W3C]: World Wide Web Consortium
"#;
let terms = get_abbreviation_terms(content);
assert_eq!(terms, vec!["HTML", "CSS", "W3C"]);
}
#[test]
fn test_might_be_abbreviation() {
assert!(might_be_abbreviation("*[HTML]: Definition"));
assert!(might_be_abbreviation(" *[HTML]: Definition")); assert!(!might_be_abbreviation("*HTML: Not abbr"));
assert!(!might_be_abbreviation("[HTML]: Not abbr"));
assert!(!might_be_abbreviation("Regular text"));
}
#[test]
fn test_abbreviation_with_special_characters() {
let abbr = parse_abbreviation("*[C++]: C Plus Plus", 1);
assert!(abbr.is_some());
let abbr = abbr.unwrap();
assert_eq!(abbr.abbr, "C++");
let abbr = parse_abbreviation("*[.NET]: Dot NET Framework", 1);
assert!(abbr.is_some());
let abbr = abbr.unwrap();
assert_eq!(abbr.abbr, ".NET");
}
#[test]
fn test_multi_word_definitions() {
let abbr = parse_abbreviation("*[API]: Application Programming Interface", 1);
assert!(abbr.is_some());
let abbr = abbr.unwrap();
assert_eq!(abbr.definition, "Application Programming Interface");
}
#[test]
fn test_empty_definition() {
let abbr = parse_abbreviation("*[HTML]:", 1);
assert!(abbr.is_some());
let abbr = abbr.unwrap();
assert_eq!(abbr.abbr, "HTML");
assert_eq!(abbr.definition, "");
}
}