doyoumarkdown/
lib.rs

1#![deny(rust_2018_idioms)]
2
3use nom::branch::alt;
4use nom::bytes::complete::{is_not, tag, take_until};
5use nom::multi::fold_many0;
6use nom::sequence::{delimited, pair, terminated, tuple};
7use nom::IResult;
8pub use nom_locate::LocatedSpan;
9
10const LEFT_PARENS: &str = "(";
11const RIGHT_PARENS: &str = ")";
12const LEFT_BRACKET: &str = "[";
13const RIGHT_BRACKET: &str = "]";
14const EMPTY_BRACKETS: &str = "[]";
15const EMPTY_IMAGE_BRACKETS: &str = "![]";
16const LEFT_MARKDOWN_IMAGE_BRACKET: &str = "![";
17
18pub type Span<'a> = LocatedSpan<&'a str>;
19
20fn left_parens<'a>(s: Span<'a>) -> IResult<Span<'a>, Span<'a>> {
21    tag(LEFT_PARENS)(s)
22}
23
24fn right_parens<'a>(s: Span<'a>) -> IResult<Span<'a>, Span<'a>> {
25    tag(RIGHT_PARENS)(s)
26}
27
28fn non_empty_parens<'a>(s: Span<'a>) -> IResult<Span<'a>, Span<'a>> {
29    delimited(left_parens, is_not(RIGHT_PARENS), right_parens)(s)
30}
31
32fn empty_parens_pair<'a>(s: Span<'a>) -> IResult<Span<'a>, Span<'a>> {
33    terminated(left_parens, right_parens)(s)
34}
35
36fn right_bracket<'a>(s: Span<'a>) -> IResult<Span<'a>, Span<'a>> {
37    tag(RIGHT_BRACKET)(s)
38}
39
40fn left_bracket<'a>(s: Span<'a>) -> IResult<Span<'a>, Span<'a>> {
41    tag(LEFT_BRACKET)(s)
42}
43
44fn non_empty_brackets<'a>(s: Span<'a>) -> IResult<Span<'a>, Span<'a>> {
45    delimited(left_bracket, is_not(RIGHT_BRACKET), right_bracket)(s)
46}
47
48fn empty_brackets_pair<'a>(s: Span<'a>) -> IResult<Span<'a>, Span<'a>> {
49    terminated(left_bracket, right_bracket)(s)
50}
51
52fn left_markdown_image_bracket<'a>(s: Span<'a>) -> IResult<Span<'a>, Span<'a>> {
53    tag(LEFT_MARKDOWN_IMAGE_BRACKET)(s)
54}
55
56fn empty_markdown_image_bracket_pair<'a>(s: Span<'a>) -> IResult<Span<'a>, Span<'a>> {
57    terminated(left_markdown_image_bracket, right_bracket)(s)
58}
59
60fn non_empty_markdown_image_bracket_pair<'a>(s: Span<'a>) -> IResult<Span<'a>, Span<'a>> {
61    delimited(
62        left_markdown_image_bracket,
63        is_not(RIGHT_BRACKET),
64        right_bracket,
65    )(s)
66}
67
68fn markdown_image_brackets<'a>(s: Span<'a>) -> IResult<Span<'a>, Span<'a>> {
69    alt((
70        empty_markdown_image_bracket_pair,
71        non_empty_markdown_image_bracket_pair,
72    ))(s)
73}
74
75fn brackets<'a>(s: Span<'a>) -> IResult<Span<'a>, Span<'a>> {
76    alt((empty_brackets_pair, non_empty_brackets))(s)
77}
78
79fn parens<'a>(s: Span<'a>) -> IResult<Span<'a>, Span<'a>> {
80    alt((empty_parens_pair, non_empty_parens))(s)
81}
82
83fn markdown_url<'a>(s: Span<'a>) -> IResult<Span<'a>, (Span<'a>, Span<'a>)> {
84    tuple((brackets, parens))(s)
85}
86
87fn markdown_image<'a>(s: Span<'a>) -> IResult<Span<'a>, (Span<'a>, Span<'a>)> {
88    tuple((markdown_image_brackets, parens))(s)
89}
90
91pub fn all_markdown_images<'a>(s: Span<'a>) -> IResult<Span<'a>, Vec<MarkdownUrl<'a>>> {
92    fold_many0(
93        pair(take_until(LEFT_MARKDOWN_IMAGE_BRACKET), markdown_image),
94        Vec::new,
95        |mut acc: Vec<_>, item| {
96            //here we want to inspect what we took_until with `take_until` so we can verify
97            //we want to actually accumulate this instead of skipping it
98            //since it might be a markdown image instead of a url
99            let position = item.1 .0;
100            //println!("{:?}", item.1);
101            let url = MarkdownUrl {
102                issue_type: MarkdownUrlIssueType::FoundImage(position),
103                href: &item.1 .1,
104            };
105            acc.push(url);
106            acc
107        },
108    )(s)
109}
110
111pub fn all_markdown_urls<'a>(s: Span<'a>) -> IResult<Span<'a>, Vec<MarkdownUrl<'a>>> {
112    fold_many0(
113        pair(take_until(LEFT_BRACKET), markdown_url),
114        Vec::new,
115        |mut acc: Vec<_>, item| {
116            //here we want to inspect what we took_until with `take_until` so we can verify
117            //we want to actually accumulate this instead of skipping it
118            //since it might be a markdown image instead of a url
119            if !item.0.ends_with('!') {
120                let position = item.1 .0;
121                let href = match item.1 .1.fragment().eq(&"(") {
122                    true => "",
123                    _ => &item.1 .1,
124                };
125                //println!("FOUND: {:?}", item.1 .1);
126                let url = MarkdownUrl {
127                    issue_type: MarkdownUrlIssueType::FoundUrl(position),
128                    href,
129                };
130                //println!("FOUND URL: {:?}", url);
131                acc.push(url);
132            }
133            acc
134        },
135    )(s)
136}
137
138pub fn all_empty_alt_text_markdown_images<'a>(
139    s: Span<'a>,
140) -> IResult<Span<'a>, Vec<MarkdownUrl<'a>>> {
141    fold_many0(
142        pair(take_until(EMPTY_IMAGE_BRACKETS), markdown_image),
143        Vec::new,
144        |mut acc: Vec<_>, item| {
145            //here we want to inspect what we took_until with `take_until` so we can verify
146            //we want to actually accumulate this instead of skipping it
147            //since it might be a markdown image instead of a url
148            let position = item.1 .0;
149            // println!("FOUND: {:?}", item.1);
150            let url = MarkdownUrl {
151                issue_type: MarkdownUrlIssueType::EmptyImageAltText(position),
152                href: &item.1 .1,
153            };
154            //println!("{:?}", url.position);
155            acc.push(url);
156            acc
157        },
158    )(s)
159}
160
161pub fn all_empty_href_markdown_urls<'a>(s: Span<'a>) -> IResult<Span<'a>, Vec<MarkdownUrl<'a>>> {
162    fold_many0(
163        pair(take_until(LEFT_BRACKET), markdown_url),
164        Vec::new,
165        |mut acc: Vec<_>, item| {
166            //here we want to inspect what we took_until with `take_until` so we can verify
167            //we want to actually accumulate this instead of skipping it
168            //since it might be a markdown image instead of a url
169            if !item.0.ends_with('!') && item.1 .1.fragment().eq(&"(") {
170                //println!("FOUND: {:?}", item.1 .1);
171                let position = item.1 .0;
172                //(_, possible_empty_href) = tag(EMPTY_PARENS)(position);
173
174                let url = MarkdownUrl {
175                    issue_type: MarkdownUrlIssueType::EmptyAnchorHref(position),
176                    href: "",
177                };
178                //println!("{:?}", url.position);
179                acc.push(url);
180            }
181            acc
182        },
183    )(s)
184}
185
186pub fn all_empty_href_markdown_images<'a>(s: Span<'a>) -> IResult<Span<'a>, Vec<MarkdownUrl<'a>>> {
187    fold_many0(
188        pair(take_until(LEFT_MARKDOWN_IMAGE_BRACKET), markdown_image),
189        Vec::new,
190        |mut acc: Vec<_>, item| {
191            //here we want to inspect what we took_until with `take_until` so we can verify
192            //we want to actually accumulate this instead of skipping it
193            //since it might be a markdown image instead of a url
194            if item.1 .1.fragment().eq(&"(") {
195                let position = item.1 .0;
196                //println!("{:?}", item.1);
197                let url = MarkdownUrl {
198                    issue_type: MarkdownUrlIssueType::FoundImage(position),
199                    href: "",
200                };
201                acc.push(url);
202            }
203            acc
204        },
205    )(s)
206}
207
208pub fn all_empty_anchor_text_markdown_urls<'a>(
209    s: Span<'a>,
210) -> IResult<Span<'a>, Vec<MarkdownUrl<'a>>> {
211    fold_many0(
212        pair(take_until(EMPTY_BRACKETS), markdown_url),
213        Vec::new,
214        |mut acc: Vec<_>, item| {
215            //here we want to inspect what we took_until with `take_until` so we can verify
216            //we want to actually accumulate this instead of skipping it
217            //since it might be a markdown image instead of a url
218            if !item.0.ends_with('!') {
219                let position = item.1 .0;
220                //println!("{:?}", item.1);
221                let url = MarkdownUrl {
222                    issue_type: MarkdownUrlIssueType::EmptyAnchorText(position),
223                    href: &item.1 .1,
224                };
225                //println!("{:?}", url.position);
226                acc.push(url);
227            }
228            acc
229        },
230    )(s)
231}
232
233pub fn all_low_alt_text_markdown_images<'a>(
234    s: Span<'a>,
235) -> IResult<Span<'a>, Vec<MarkdownUrl<'a>>> {
236    fold_many0(
237        pair(take_until(LEFT_MARKDOWN_IMAGE_BRACKET), markdown_image),
238        Vec::new,
239        |mut acc: Vec<_>, item| {
240            //here we want to inspect what we took_until with `take_until` so we can verify
241            //we want to actually accumulate this instead of skipping it
242            //since it might be a markdown image instead of a url
243            let position = item.1 .0;
244            //here we wan't to test if the number of words in the alt-text is < 5 (arbitrarily
245            //picked and open to update)
246            //if so, we warn that the alt text lacks fidelity
247            let alt_text = *position.fragment();
248            let count = alt_text.split_whitespace().count();
249            if count < 5 {
250                let url = MarkdownUrl {
251                    issue_type: MarkdownUrlIssueType::LowImageAltText(position),
252                    href: &item.1 .1,
253                };
254                //println!("{:?}", url);
255                //println!("Found a low alt text image: {:?}", url);
256                acc.push(url);
257            }
258            acc
259        },
260    )(s)
261}
262
263#[derive(Debug, Copy, Clone)]
264pub struct MarkdownUrl<'a> {
265    pub issue_type: MarkdownUrlIssueType<'a>,
266    pub href: &'a str,
267}
268
269#[derive(Debug, Copy, Clone)]
270pub enum MarkdownUrlIssueType<'a> {
271    FoundImage(Span<'a>),
272    FoundUrl(Span<'a>),
273    EmptyAnchorText(Span<'a>),
274    EmptyAnchorHref(Span<'a>),
275    EmptyImageAltText(Span<'a>),
276    LowImageAltText(Span<'a>),
277}
278
279#[cfg(test)]
280mod unit_tests {
281
282    use super::*;
283
284    #[test]
285    fn test_left_markdown_image_bracket() -> anyhow::Result<()> {
286        let input = Span::new("![]()");
287
288        let (_, token) = left_markdown_image_bracket(input)?;
289
290        assert_eq!(token.fragment(), &"![");
291
292        Ok(())
293    }
294
295    #[test]
296    fn test_empty_parens_pair() -> anyhow::Result<()> {
297        let input = Span::new("()");
298
299        let (_, token) = empty_parens_pair(input)?;
300
301        assert_eq!(token.fragment(), &"(");
302
303        Ok(())
304    }
305
306    #[test]
307    fn test_non_empty_parens() -> anyhow::Result<()> {
308        let input = Span::new("(abc)");
309
310        let (_, token) = non_empty_parens(input)?;
311
312        assert_eq!(token.fragment(), &"abc");
313
314        Ok(())
315    }
316
317    #[test]
318    fn test_right_parens() -> anyhow::Result<()> {
319        let input = Span::new(")abc)");
320
321        let (_, token) = right_parens(input)?;
322
323        //        println!("{:?}", token);
324        assert_eq!(token.fragment(), &")");
325
326        Ok(())
327    }
328
329    #[test]
330    fn test_left_parens() -> anyhow::Result<()> {
331        let input = Span::new("(abc)");
332
333        let (_, token) = left_parens(input)?;
334
335        //       println!("{:?}", token);
336        assert_eq!(token.fragment(), &"(");
337
338        Ok(())
339    }
340
341    #[test]
342    fn test_right_bracket() -> anyhow::Result<()> {
343        let input = Span::new("]abc]");
344
345        let (_, token) = right_bracket(input)?;
346
347        //       println!("{:?}", token);
348        assert_eq!(token.fragment(), &"]");
349
350        Ok(())
351    }
352
353    #[test]
354    fn test_left_bracket() -> anyhow::Result<()> {
355        let input = Span::new("[abc]");
356
357        let (_, token) = left_bracket(input)?;
358
359        //       println!("{:?}", token);
360        assert_eq!(token.fragment(), &"[");
361
362        Ok(())
363    }
364
365    #[test]
366    fn test_find_all_images() -> anyhow::Result<()> {
367        //include a test markdown file
368        let input = Span::new(
369            r#"# Hi there
370              ## A heading
371
372              ![an image](some img url)
373              ![an image](some img url)
374              ![an image](some img url)
375
376              [a regular url](please don't find me)"#,
377        );
378
379        let result = all_markdown_images(input)?;
380
381        let results = result.1;
382        let count = results.len();
383
384        assert_eq!(count, 3);
385
386        Ok(())
387    }
388
389    #[test]
390    fn test_find_all_links_empty_alt_text() -> anyhow::Result<()> {
391        //include a test markdown file
392        let input = Span::new(
393            r#"# Hi there\n\r
394              ## A heading\n
395
396              ![an image](please don't find me!)
397                       [](please find me!)
398              [](please find me!)
399
400              [a regular url](find me)
401    [another regular url](don't find me)
402              "#,
403        );
404
405        let result = all_empty_anchor_text_markdown_urls(input)?;
406
407        let results = result.1;
408        let count = results.len();
409        for url in results {
410            //println!("{:?}", url.position);
411            if let MarkdownUrlIssueType::EmptyAnchorText(v) = url.issue_type {
412                assert_eq!(v.fragment(), &"[");
413            }
414        }
415
416        assert_eq!(count, 2);
417
418        Ok(())
419    }
420
421    #[test]
422    fn test_find_all_images_empty_alt_text() -> anyhow::Result<()> {
423        //include a test markdown file
424        let input = Span::new(
425            r#"# Hi there
426              ## A heading
427
428              ![](some img url)
429              ![an image](some img url)
430              ![](some img url)
431              ![an image with a decent alt text](some img url)
432              ![](some img url)
433
434              [a regular url](please don't find me)
435              [a regular url](please don't find me)
436              [a regular url](please don't find me)
437
438              [a regular url but with enough alt text](def shouldn't find this one)
439              [a regular url with enough](def shouldn't find this one)
440              "#,
441        );
442
443        let (_, results) = all_empty_alt_text_markdown_images(input)?;
444
445        for url in &results {
446            //println!("{:?}", url.issue_type);
447            if let MarkdownUrlIssueType::LowImageAltText(v) = url.issue_type {
448                assert_eq!(v.fragment(), &"an image");
449            }
450        }
451
452        let count = results.len();
453
454        assert_eq!(count, 3);
455
456        Ok(())
457    }
458
459    #[test]
460    fn test_find_all_links_low_alt_text() -> anyhow::Result<()> {
461        //include a test markdown file
462        let input = Span::new(
463            r#"# Hi there
464              ## A heading
465
466              ![an image](some img url)
467              ![an image](some img url)
468              ![an image](some img url)
469              ![an image with a decent alt text](some img url)
470              ![another with just enough alt](some img url)
471
472              [a regular url](please don't find me)
473              [a regular url](please don't find me)
474              [a regular url](please don't find me)
475
476              [a regular url but with enough alt text](def shouldn't find this one)
477              [a regular url with enough](def shouldn't find this one)
478              "#,
479        );
480
481        let (_, results) = all_low_alt_text_markdown_images(input)?;
482
483        for url in &results {
484            //println!("{:?}", url.issue_type);
485            if let MarkdownUrlIssueType::LowImageAltText(v) = url.issue_type {
486                assert_eq!(v.fragment(), &"an image");
487            }
488        }
489
490        let count = results.len();
491
492        assert_eq!(count, 3);
493
494        Ok(())
495    }
496
497    #[test]
498    fn test_find_all_links() -> anyhow::Result<()> {
499        //include a test markdown file
500        let input = Span::new(
501            r#"# Hi there
502              ## A heading
503
504              ![an image](please don't find me!)
505
506              [a regular url](find me)
507    [another regular url](find me)
508              "#,
509        );
510
511        let result = all_markdown_urls(input)?;
512
513        let results = result.1;
514        let count = results.len();
515
516        assert_eq!(count, 2);
517
518        Ok(())
519    }
520
521    #[test]
522    fn test_find_all_links_with_empty_hrefs() -> anyhow::Result<()> {
523        //include a test markdown file
524        let input = Span::new(
525            r#"# Hi there
526              ## A heading
527
528              ![an image](please don't find me!)
529
530              [find me]()
531    [another regular url](don't find me)
532              "#,
533        );
534
535        let (_, results) = all_empty_href_markdown_urls(input)?;
536
537        //println!("{:?}", results);
538        let count = results.len();
539
540        assert_eq!(count, 1);
541
542        Ok(())
543    }
544}