use codebook::{
parser::{TextRange, WordLocation},
queries::LanguageType,
};
mod utils;
#[test]
fn test_markdown_paragraph() {
utils::init_logging();
let processor = utils::get_processor();
let sample_text = "Some paragraph text with a misspeled word.\n";
let expected = [WordLocation::new(
"misspeled".to_string(),
vec![TextRange {
start_byte: 27,
end_byte: 36,
}],
)];
let misspelled = processor
.spell_check(sample_text, Some(LanguageType::Markdown), None)
.to_vec();
println!("Misspelled words: {misspelled:?}");
assert_eq!(misspelled.len(), 1);
assert_eq!(misspelled[0].word, expected[0].word);
assert_eq!(misspelled[0].locations, expected[0].locations);
}
#[test]
fn test_markdown_heading() {
utils::init_logging();
let processor = utils::get_processor();
let sample_text = "# A headng with a tyypo\n";
let misspelled = processor
.spell_check(sample_text, Some(LanguageType::Markdown), None)
.to_vec();
let words: Vec<&str> = misspelled.iter().map(|r| r.word.as_str()).collect();
println!("Misspelled words: {words:?}");
assert!(words.contains(&"headng"));
assert!(words.contains(&"tyypo"));
}
#[test]
fn test_markdown_fenced_code_block_known_lang() {
utils::init_logging();
let processor = utils::get_processor();
let sample_text = r#"# Hello World
Some correct text here.
```bash
mkdir some_dir
```
More correct text here.
"#;
let misspelled = processor
.spell_check(sample_text, Some(LanguageType::Markdown), None)
.to_vec();
let words: Vec<&str> = misspelled.iter().map(|r| r.word.as_str()).collect();
println!("Misspelled words: {words:?}");
assert!(!words.contains(&"mkdir"));
assert!(!words.contains(&"dir"));
}
#[test]
fn test_markdown_fenced_code_block_unknown_lang_skipped() {
utils::init_logging();
let processor = utils::get_processor();
let sample_text = r#"Some text.
```unknownlang
badwwword_in_code
```
More text.
"#;
let misspelled = processor
.spell_check(sample_text, Some(LanguageType::Markdown), None)
.to_vec();
let words: Vec<&str> = misspelled.iter().map(|r| r.word.as_str()).collect();
println!("Misspelled words: {words:?}");
assert!(!words.contains(&"badwwword"));
}
#[test]
fn test_markdown_fenced_code_block_no_lang_skipped() {
utils::init_logging();
let processor = utils::get_processor();
let sample_text = r#"Some text.
```
badwwword_in_code
```
More text.
"#;
let misspelled = processor
.spell_check(sample_text, Some(LanguageType::Markdown), None)
.to_vec();
let words: Vec<&str> = misspelled.iter().map(|r| r.word.as_str()).collect();
println!("Misspelled words: {words:?}");
assert!(!words.contains(&"badwwword"));
}
#[test]
fn test_markdown_code_block_uses_language_grammar() {
utils::init_logging();
let processor = utils::get_processor();
let sample_text = r#"A paragrap with a tyypo.
```python
def some_functin():
pass
```
Another paragrap with a tyypo.
"#;
let misspelled = processor
.spell_check(sample_text, Some(LanguageType::Markdown), None)
.to_vec();
let words: Vec<&str> = misspelled.iter().map(|r| r.word.as_str()).collect();
println!("Misspelled words: {words:?}");
assert!(words.contains(&"paragrap"));
assert!(words.contains(&"tyypo"));
assert!(words.contains(&"functin"));
}
#[test]
fn test_markdown_multiple_code_blocks() {
utils::init_logging();
let processor = utils::get_processor();
let sample_text = r#"Some text with a tyypo.
```bash
mkdir somedir
```
Middle text is corect.
```unknownlang
badspel = True
```
End text is also corect.
"#;
let misspelled = processor
.spell_check(sample_text, Some(LanguageType::Markdown), None)
.to_vec();
let words: Vec<&str> = misspelled.iter().map(|r| r.word.as_str()).collect();
println!("Misspelled words: {words:?}");
assert!(words.contains(&"tyypo"));
assert!(words.contains(&"corect"));
assert!(!words.contains(&"mkdir"));
assert!(!words.contains(&"badspel"));
}
#[test]
fn test_markdown_block_quote() {
utils::init_logging();
let processor = utils::get_processor();
let sample_text = "> A block quoet with a tyypo.\n";
let misspelled = processor
.spell_check(sample_text, Some(LanguageType::Markdown), None)
.to_vec();
let words: Vec<&str> = misspelled.iter().map(|r| r.word.as_str()).collect();
println!("Misspelled words: {words:?}");
assert!(words.contains(&"quoet"));
assert!(words.contains(&"tyypo"));
}
#[test]
fn test_markdown_code_block_alias_resolution() {
utils::init_logging();
let processor = utils::get_processor();
let sample_text = r#"Some text.
```py
def hello_wrld():
pass
```
```js
function hello_wrld() {}
```
More text.
"#;
let misspelled = processor
.spell_check(sample_text, Some(LanguageType::Markdown), None)
.to_vec();
println!("Misspelled words: {misspelled:?}");
let wrld = misspelled.iter().find(|w| w.word == "wrld");
assert!(wrld.is_some(), "wrld should be flagged");
assert_eq!(
wrld.unwrap().locations.len(),
2,
"wrld should have 2 locations (one from py block, one from js block)"
);
}
#[test]
fn test_markdown_injected_region_byte_offsets() {
utils::init_logging();
let processor = utils::get_processor();
let sample_text = "# OK\n\n```python\ndef some_functin(): pass\n```\n";
let misspelled = processor
.spell_check(sample_text, Some(LanguageType::Markdown), None)
.to_vec();
println!("Misspelled words: {misspelled:?}");
let functin = misspelled.iter().find(|w| w.word == "functin");
assert!(functin.is_some(), "Expected 'functin' to be flagged");
let loc = &functin.unwrap().locations[0];
assert_eq!(
&sample_text[loc.start_byte..loc.end_byte],
"functin",
"Byte offsets should map back to 'functin' in the original document"
);
}
#[test]
fn test_markdown_no_duplicate_spans() {
utils::init_logging();
let processor = utils::get_processor();
let sample_text = "> A tyypo in a block quoet.\n";
let misspelled = processor
.spell_check(sample_text, Some(LanguageType::Markdown), None)
.to_vec();
for result in &misspelled {
let unique_count = result.locations.len();
let deduped: std::collections::HashSet<_> = result.locations.iter().collect();
assert_eq!(
unique_count,
deduped.len(),
"Word '{}' has duplicate spans: {:?}",
result.word,
result.locations
);
}
}