Skip to main content

harper_core/expr/
anchor_end.rs

1use crate::Token;
2
3use super::Step;
4
5/// A [`Step`] which will match only if the cursor is over the last non-whitespace character in stream.
6/// It will return that token.
7///
8/// For example, if you built `SequenceExpr::default().t_aco("word").then(AnchorEnd)` and ran it on `This is a word`, the resulting `Span` would only cover the final word token.
9pub struct AnchorEnd;
10
11impl Step for AnchorEnd {
12    fn step(&self, tokens: &[Token], cursor: usize, _source: &[char]) -> Option<isize> {
13        let last_non_ws = tokens
14            .iter()
15            .enumerate()
16            .rev()
17            .filter(|(_, t)| !t.kind.is_whitespace())
18            .map(|(i, _)| i)
19            .next();
20
21        // Match if cursor is at or past the last non-whitespace token
22        // This allows AnchorEnd to work in sequences where the cursor has advanced
23        // past the matched content, including when cursor is past the end of the token stream
24        if let Some(last) = last_non_ws
25            && cursor >= last
26        {
27            return Some(0);
28        }
29
30        None
31    }
32}
33
34#[cfg(test)]
35mod tests {
36    use crate::expr::{AnchorStart, ExprExt, SequenceExpr};
37    use crate::{Document, Span, TokenStringExt};
38
39    use super::AnchorEnd;
40
41    #[test]
42    fn matches_period() {
43        let document = Document::new_markdown_default_curated("This is a test.");
44        let matches: Vec<_> = AnchorEnd.iter_matches_in_doc(&document).collect();
45
46        assert_eq!(matches, vec![Span::new(7, 7)])
47    }
48
49    #[test]
50    fn does_not_match_empty() {
51        let document = Document::new_markdown_default_curated("");
52        let matches: Vec<_> = AnchorEnd.iter_matches_in_doc(&document).collect();
53
54        assert_eq!(matches, vec![])
55    }
56
57    #[test]
58    fn test_word_at_end_of_document() {
59        // Test matching a specific word at the end of a document
60        let document = Document::new_plain_english_curated("This is the end");
61        let expr = SequenceExpr::default()
62            .then_any_capitalization_of("end")
63            .then(AnchorEnd);
64
65        let matches: Vec<_> = expr.iter_matches_in_doc(&document).collect();
66        // Should match "end" at position 6 (accounting for whitespace tokens)
67        assert_eq!(matches.len(), 1);
68        assert_eq!(matches[0].start, 6);
69    }
70
71    #[test]
72    fn test_word_not_at_end_of_doc() {
73        // Test that it doesn't match when word has trailing content
74        let document = Document::new_plain_english_curated("This is the end, really");
75        let expr = SequenceExpr::default()
76            .then_any_capitalization_of("end")
77            .then(AnchorEnd);
78
79        let matches: Vec<_> = expr.iter_matches_in_doc(&document).collect();
80        // Should NOT match because "end" is not at chunk end
81        assert_eq!(matches.len(), 0);
82    }
83
84    #[test]
85    fn test_word_at_end_of_chunk() {
86        // Chunks are split by commas, so "hello, world" becomes two chunks
87        // Test that AnchorEnd works at the end of a chunk
88        let document = Document::new_plain_english_curated("hello, world");
89        let expr = SequenceExpr::default()
90            .then_any_capitalization_of("hello")
91            .then(AnchorEnd);
92
93        // Test on the first chunk which contains just "hello"
94        let first_chunk = document.iter_chunks().next().unwrap();
95        let matches: Vec<_> = expr
96            .iter_matches(first_chunk, document.get_source())
97            .collect();
98        // Should match because "hello" is at the end of its chunk
99        assert_eq!(matches.len(), 1);
100    }
101
102    #[test]
103    fn test_compare_with_anchor_start() {
104        // AnchorStart works as expected
105        let document = Document::new_plain_english_curated("Start here");
106        let expr = SequenceExpr::default()
107            .then(AnchorStart)
108            .then_any_capitalization_of("start");
109
110        let matches: Vec<_> = expr.iter_matches_in_doc(&document).collect();
111        assert_eq!(matches.len(), 1);
112        assert_eq!(matches[0].start, 0);
113    }
114
115    #[test]
116    fn test_word_with_trailing_whitespace_at_end_of_doc() {
117        // Test AnchorEnd after matching whitespace
118        let document = Document::new_plain_english_curated("foo ");
119        let expr = SequenceExpr::default()
120            .then_any_capitalization_of("foo")
121            .then_whitespace()
122            .then(AnchorEnd);
123
124        let matches: Vec<_> = expr.iter_matches_in_doc(&document).collect();
125        // Should match "foo" at the end (cursor is past the token stream)
126        assert_eq!(matches.len(), 1);
127        assert_eq!(matches[0].end, 2);
128    }
129}