texted 1.2.1 - Docs.rs

use std::io::ErrorKind;
use std::path::{Path, PathBuf};
use std::str::Lines;
use std::{fs, io};

use chrono::{DateTime, Utc};
use lazy_static::lazy_static;
use regex::Regex;
use uuid::Uuid;

use crate::content::content_renderer::RenderOptions;
use crate::content::{ContentHeader, PostId};
use crate::text_utils::parse_date_time;
use crate::util::os_helper::get_name;

pub fn parse_texted_header<'a>(file_name: &Path, lines: Lines<'a>) -> io::Result<(ContentHeader, Lines<'a>, Option<&'a str>)> {
    let mut id: String = "".to_string();
    let mut date: String = "".to_string();
    let mut author: String = "".to_string();
    let mut tags: String = "".to_string();

    let mut lines = lines.clone();
    let mut maybe_line = lines.next();

    // Skip optional HTML comment in the beginning
    let mut start_with_comment = false;

    while let Some(line) = maybe_line {
        let line = line.trim();

        // Empty lines are ok
        if line.is_empty() {
            maybe_line = lines.next();
            continue;
        }

        if line == "<!--" {
            maybe_line = lines.next();
            start_with_comment = true;
        }
        break;
    }

    while let Some(line) = maybe_line {
        if line.is_empty() {
            maybe_line = lines.next();
            continue;
        }

        let (key, val) = match extract_texted_header(line) {
            None => break,
            Some((k, v)) => (k, v),
        };

        match key {
            "ID" => id = val.to_string(),
            "DATE" => date = val.to_string(),
            "AUTHOR" => author = val.to_string(),
            "TAGS" => tags = val.to_string(),
            _ => {}
        }
        maybe_line = lines.next();
    }

    if start_with_comment {
        // Let's find the end of the comment
        loop {
            if let Some(line) = maybe_line {
                let line = line.trim();

                // Empty lines are ok.
                if line.is_empty() {
                    maybe_line = lines.next();
                    continue;
                }

                if line == "-->" {
                    break;
                }
            } else {
                return Err(io::Error::new(
                    ErrorKind::InvalidData,
                    format!("End of comment in the header is missing - file={}", file_name.to_str().unwrap()))
                );
            }

            maybe_line = lines.next();
        }
    }

    if id.is_empty() && date.is_empty() && author.is_empty() && tags.is_empty() {
        return Err(io::Error::new(ErrorKind::InvalidData, "Invalid texted header".to_string()));
    }

    let tags = extract_tags(&tags);
    let date = match parse_date_time(&date) {
        Ok(d) => Ok(d),
        Err(e) => {
            Err(io::Error::new(ErrorKind::InvalidData, format!("{} - file={}", e, file_name.to_str().unwrap())))
        }
    }?;

    let header = ContentHeader {
        file_name: file_name.to_path_buf(),
        id: PostId(id),
        date,
        author,
        tags,
    };

    Ok((header, lines, maybe_line))
}

pub fn parse_title_markdown<'a>(lines: Lines<'a>, mut maybe_line: Option<&'a str>) -> (String, Lines<'a>, Option<&'a str>) {
    let mut lines = lines;
    let title = loop {
        if let Some(line) = maybe_line {
            if line.starts_with("# ") {
                let title = line[2..line.len()].to_string();
                break title;
            }
        } else {
            let title = "".to_string();
            break title;
        }
        maybe_line = lines.next();
    };
    (title, lines, maybe_line)
}

pub fn generate_header_from_file(file_name: &PathBuf) -> io::Result<ContentHeader> {
    let md = fs::metadata(file_name)?;

    let file_name = file_name.clone();
    let id = PostId(Uuid::new_v4().to_string());
    let now_utc: DateTime<Utc> = md.modified()?.into();
    let date = now_utc.naive_utc();
    let author = get_name();

    Ok(ContentHeader {
        file_name,
        id,
        date,
        author,
        tags: vec![],
    })
}


pub fn parse_title_html<'a>(lines: Lines<'a>, mut maybe_line: Option<&'a str>) -> (String, Lines<'a>, Option<&'a str>) {
    lazy_static! {
            static ref TITLE_REGEX : Regex = Regex::new(
                r"<h[12]>(?P<title>.+)</h[12]>"
            ).unwrap();
        }

    let mut lines = lines;
    let title = loop {
        if let Some(line) = maybe_line {
            if let Some(title) = TITLE_REGEX.captures(line).and_then(|cap| {
                cap.name("title").map(|v| v.as_str())
            }) {
                break title.to_string();
            }
        } else {
            let title = "".to_string();
            break title;
        }
        maybe_line = lines.next();
    };
    (title, lines, maybe_line)
}

pub fn extract_content(lines: Lines, render_options: &RenderOptions) -> String {
    match render_options {
        RenderOptions::PreviewOnly(preview_opt, _img_prefix) => {
            let mut content = String::new();
            //let mut counter = 0;
            for (counter, line) in lines.enumerate() {
                if let Some(ref line_count) = preview_opt.max_line_count {
                    if counter as i32 >= line_count.0 {
                        break;
                    }
                }
                if line.contains(&preview_opt.tag_based.0) {
                    break;
                }
                content.push_str(line);
                content.push('\n');
            }
            content
        }
        RenderOptions::FullContent => {
            let mut content = String::new();
            for line in lines {
                content.push_str(line);
                content.push('\n');
            }
            content
        }
    }
}

fn extract_tags(tags_str: &str) -> Vec<String> {
    let x = tags_str.split(' ')
        .filter(|x| !x.is_empty())
        .map(|s| s.to_string())
        .collect();
    x
}

fn extract_texted_header(line: &str) -> Option<(&str, &str)> {
    lazy_static! {
            static ref HEADER_REGEX : Regex = Regex::new(r"\[(?P<key>\w+)\]: # \((?P<value>.+)\)").unwrap();
        }
    extract_header_key_val(line, &HEADER_REGEX)
}

fn extract_header_key_val<'a>(line: &'a str, header_regex: &Regex) -> Option<(&'a str, &'a str)> {
    let res = header_regex.captures(line).and_then(|cap| {
        let key = cap.name("key").map(|key| key.as_str());
        let val = cap.name("value").map(|key| key.as_str());
        match (key, val) {
            (Some(key), Some(val)) => Some((key, val)),
            _ => None
        }
    });

    res
}

pub fn remove_comments(md_post: &str) -> io::Result<String> {
    let mut res: String = String::new();
    let mut slice = Some(md_post);

    let start_comment = "<!--";
    let end_comment = "-->";


    while let Some(block) = slice {
        let maybe_start = block.find(start_comment);
        let md_buf: &str = match maybe_start {
            Some(start) => {
                let to_render: &str = &block[0..start];

                let next: &str = &block[(start + start_comment.len())..];
                match next.find(end_comment) {
                    Some(end) => {
                        slice = Some(&next[(end + end_comment.len())..]);
                    }
                    None => {
                        return Err(io::Error::new(
                            io::ErrorKind::InvalidData,
                            "Error finding end of comment",
                        ));
                    }
                };

                to_render
            }
            None => {
                slice = None;
                block
            }
        };
        res.push_str(md_buf);
    }

    Ok(res)
}

#[cfg(test)]
mod tests {
    use chrono::{NaiveDate, NaiveDateTime, NaiveTime};

    use super::*;

    #[test]
    fn test_extract_texted_header() {
        let res = extract_texted_header("[ID]: # (a63bd715-a3fe-4788-b0e1-2a3153778544)");
        assert_eq!(res, Some(("ID", "a63bd715-a3fe-4788-b0e1-2a3153778544")));
        let res = extract_texted_header("[DATE]: # (2022-04-02 12:05:00.000)");
        assert_eq!(res, Some(("DATE", "2022-04-02 12:05:00.000")));
        let res = extract_texted_header("[AUTHOR]: # (thiago)");
        assert_eq!(res, Some(("AUTHOR", "thiago")));
        let res = extract_texted_header("[TAGS]: # (rust something-else)");
        assert_eq!(res, Some(("TAGS", "rust something-else")));

        let res = extract_texted_header("[AUTHOR]: (thiago)");
        assert!(res.is_none());
    }

    #[test]
    fn test_extract_tags() {
        let tags_str = "one two three   four";
        let tags = extract_tags(tags_str);
        assert_eq!(tags, ["one", "two", "three", "four"]);
    }

    #[test]
    fn test_lines_texted() {
        let file_name = PathBuf::from("posts/20200522_how_to_write_a_code_review/index.md");
        let content = r##"

<!--

[ID]: # (21c1e9ad-4ebb-4168-a543-fbf77cc35a85)

[DATE]: # (2024-02-12 22:54:00.000)

[AUTHOR]: # (thiago)

-->        "##;

        let (header, _lines, _next_line) = parse_texted_header(&file_name, content.lines()).unwrap();
        let date = NaiveDate::from_ymd_opt(2024, 02, 12).unwrap();
        let time = NaiveTime::from_hms_opt(22, 54, 00).unwrap();
        let expected = ContentHeader {
            file_name: PathBuf::from("posts/20200522_how_to_write_a_code_review/index.md"),
            id: PostId("21c1e9ad-4ebb-4168-a543-fbf77cc35a85".to_string()),
            date: NaiveDateTime::new(date, time),
            author: "thiago".to_string(),
            tags: vec![],
        };
        assert_eq!(header, expected);
    }

    #[test]
    fn test_no_header() {
        let file_name = PathBuf::from("posts/20200522_how_to_write_a_code_review/index.md");
        let content = r##"
-->        "##;

        let res = parse_texted_header(&file_name, content.lines());
        assert!(res.is_err());
        if let Err(err) = res {
            assert_eq!(err.kind(), ErrorKind::InvalidData);
            assert_eq!(err.to_string(), "Invalid texted header");
        }
    }

    #[test]
    fn test_parse_removes_comment() {
        let content = r#"Some text.<!-- more -->Wo<!-- xyz -->rd"#;
        let res = remove_comments(content).unwrap();
        assert_eq!(res, "Some text.Word");

        let content = r#"Some text.Word"#;
        let res = remove_comments(content).unwrap();
        assert_eq!(res, "Some text.Word");

        let content = r#""#;
        let res = remove_comments(content).unwrap();
        assert_eq!(res, "");

        let content = r#"<!-- more --><!-- xyz -->"#;
        let res = remove_comments(content).unwrap();
        assert_eq!(res, "");

        let content = r#"<!-- more -->"#;
        let res = remove_comments(content).unwrap();
        assert_eq!(res, "");
    }
}