Skip to main content

memo_cli/preprocess/
validate.rs

1use super::{ContentType, ValidationError, ValidationResult};
2
3pub fn validate_content(content_type: ContentType, input: &str) -> ValidationResult {
4    match content_type {
5        ContentType::Url => validate_url(input),
6        ContentType::Json => validate_json(input),
7        ContentType::Yaml => validate_yaml(input),
8        ContentType::Xml => validate_xml(input),
9        ContentType::Markdown => validate_markdown(input),
10        ContentType::Text => ValidationResult::skipped(),
11        ContentType::Unknown => ValidationResult::unknown(),
12    }
13}
14
15pub(crate) fn looks_like_url(input: &str) -> bool {
16    let candidate = input.trim();
17    if candidate.is_empty() {
18        return false;
19    }
20    if candidate.chars().any(|ch| ch.is_whitespace()) {
21        return false;
22    }
23    let Some((scheme, remainder)) = candidate.split_once("://") else {
24        return false;
25    };
26    !scheme.is_empty() && !remainder.chars().any(|ch| ch.is_whitespace())
27}
28
29fn validate_url(input: &str) -> ValidationResult {
30    let candidate = input.trim();
31    if !looks_like_url(candidate) {
32        return invalid(
33            "invalid-url",
34            "URL must include a scheme and host, e.g. https://example.com",
35            None,
36        );
37    }
38
39    let Some((scheme, remainder)) = candidate.split_once("://") else {
40        return invalid(
41            "invalid-url",
42            "URL must include a scheme and host, e.g. https://example.com",
43            None,
44        );
45    };
46    if !is_valid_url_scheme(scheme) {
47        return invalid(
48            "invalid-url",
49            "URL scheme contains unsupported characters",
50            Some("scheme".to_string()),
51        );
52    }
53
54    let host_end = remainder.find(['/', '?', '#']).unwrap_or(remainder.len());
55    let host = &remainder[..host_end];
56    if host.is_empty() {
57        return invalid(
58            "invalid-url",
59            "URL host is missing",
60            Some("host".to_string()),
61        );
62    }
63    if host.starts_with('.') || host.ends_with('.') || host.contains("..") {
64        return invalid(
65            "invalid-url",
66            "URL host is malformed",
67            Some("host".to_string()),
68        );
69    }
70    if host.chars().any(|ch| ch.is_whitespace()) || host.contains('@') {
71        return invalid(
72            "invalid-url",
73            "URL host is malformed",
74            Some("host".to_string()),
75        );
76    }
77
78    ValidationResult::valid()
79}
80
81fn is_valid_url_scheme(scheme: &str) -> bool {
82    let mut chars = scheme.chars();
83    let Some(first) = chars.next() else {
84        return false;
85    };
86    if !first.is_ascii_alphabetic() {
87        return false;
88    }
89    chars.all(|ch| ch.is_ascii_alphanumeric() || matches!(ch, '+' | '-' | '.'))
90}
91
92fn validate_json(input: &str) -> ValidationResult {
93    let candidate = input.trim();
94    match serde_json::from_str::<serde_json::Value>(candidate) {
95        Ok(_) => ValidationResult::valid(),
96        Err(err) => {
97            let line = err.line();
98            let column = err.column();
99            let path = if line > 0 && column > 0 {
100                Some(format!("line:{line},column:{column}"))
101            } else {
102                None
103            };
104            invalid("invalid-json", format!("invalid JSON syntax: {err}"), path)
105        }
106    }
107}
108
109fn validate_yaml(input: &str) -> ValidationResult {
110    let lines: Vec<(usize, &str)> = input
111        .lines()
112        .enumerate()
113        .filter_map(|(index, line)| {
114            let trimmed = line.trim_start();
115            if trimmed.is_empty() || trimmed.starts_with('#') {
116                return None;
117            }
118            Some((index + 1, line))
119        })
120        .collect();
121
122    if lines.is_empty() {
123        return invalid("invalid-yaml", "YAML content is empty", None);
124    }
125
126    let mut saw_yaml_token = false;
127    for (line_no, raw_line) in &lines {
128        if raw_line.contains('\t') {
129            return invalid(
130                "invalid-yaml",
131                "YAML indentation must use spaces, not tabs",
132                Some(format!("line:{line_no}")),
133            );
134        }
135
136        let line = raw_line.trim_start();
137        if line == "---" || line == "..." {
138            saw_yaml_token = true;
139            continue;
140        }
141        if line.starts_with("- ") {
142            saw_yaml_token = true;
143            if line.trim() == "-" {
144                return invalid(
145                    "invalid-yaml",
146                    "YAML list item is missing a value",
147                    Some(format!("line:{line_no}")),
148                );
149            }
150            continue;
151        }
152        if let Some((key, _)) = line.split_once(':') {
153            saw_yaml_token = true;
154            if key.trim().is_empty() {
155                return invalid(
156                    "invalid-yaml",
157                    "YAML mapping key cannot be empty",
158                    Some(format!("line:{line_no}")),
159                );
160            }
161            continue;
162        }
163        if lines.len() == 1 {
164            return ValidationResult::valid();
165        }
166        return invalid(
167            "invalid-yaml",
168            "YAML line is not parseable in mapping/list form",
169            Some(format!("line:{line_no}")),
170        );
171    }
172
173    if saw_yaml_token || lines.len() == 1 {
174        return ValidationResult::valid();
175    }
176
177    invalid("invalid-yaml", "YAML content is malformed", None)
178}
179
180fn validate_xml(input: &str) -> ValidationResult {
181    let candidate = input.trim();
182    if candidate.is_empty() {
183        return invalid("invalid-xml", "XML content is empty", None);
184    }
185
186    let mut stack: Vec<String> = Vec::new();
187    let mut cursor = 0usize;
188    let mut saw_element = false;
189
190    while let Some(rel_start) = candidate[cursor..].find('<') {
191        let start = cursor + rel_start;
192        let token_start = start + 1;
193        let Some(rel_end) = candidate[token_start..].find('>') else {
194            return invalid(
195                "invalid-xml",
196                "XML contains an unclosed tag",
197                Some(format!("byte:{token_start}")),
198            );
199        };
200        let token_end = token_start + rel_end;
201        let token = candidate[token_start..token_end].trim();
202        cursor = token_end + 1;
203
204        if token.is_empty() {
205            return invalid(
206                "invalid-xml",
207                "XML tag cannot be empty",
208                Some(format!("byte:{token_start}")),
209            );
210        }
211        if token.starts_with("!--") {
212            if !token.ends_with("--") {
213                return invalid(
214                    "invalid-xml",
215                    "XML comment is malformed",
216                    Some(format!("byte:{token_start}")),
217                );
218            }
219            continue;
220        }
221        if token.starts_with('?') || token.starts_with('!') {
222            continue;
223        }
224
225        if let Some(rest) = token.strip_prefix('/') {
226            let name = match parse_xml_tag_name(rest) {
227                Ok(name) => name.to_string(),
228                Err(err) => return ValidationResult::invalid(vec![err]),
229            };
230            saw_element = true;
231            let Some(open_name) = stack.pop() else {
232                return invalid(
233                    "invalid-xml",
234                    "XML has a closing tag without a matching opening tag",
235                    Some(format!("/{name}")),
236                );
237            };
238            if open_name != name {
239                return invalid(
240                    "invalid-xml",
241                    format!("XML closing tag does not match opening tag: expected </{open_name}>"),
242                    Some(format!("/{name}")),
243                );
244            }
245            continue;
246        }
247
248        let self_closing = token.ends_with('/');
249        let open_token = if self_closing {
250            token[..token.len() - 1].trim_end()
251        } else {
252            token
253        };
254
255        let name = match parse_xml_tag_name(open_token) {
256            Ok(name) => name.to_string(),
257            Err(err) => return ValidationResult::invalid(vec![err]),
258        };
259        saw_element = true;
260        if !self_closing {
261            stack.push(name);
262        }
263    }
264
265    if !saw_element {
266        return invalid("invalid-xml", "XML does not contain any element tags", None);
267    }
268    if !stack.is_empty() {
269        return invalid(
270            "invalid-xml",
271            "XML has unclosed tags",
272            Some(format!("/{}", stack.join("/"))),
273        );
274    }
275
276    ValidationResult::valid()
277}
278
279fn parse_xml_tag_name(token: &str) -> Result<&str, ValidationError> {
280    let name = token.split_whitespace().next().unwrap_or("");
281    if name.is_empty() {
282        return Err(ValidationError::new(
283            "invalid-xml",
284            "XML tag name cannot be empty",
285        ));
286    }
287
288    let mut chars = name.chars();
289    let Some(first) = chars.next() else {
290        return Err(ValidationError::new(
291            "invalid-xml",
292            "XML tag name cannot be empty",
293        ));
294    };
295    if !(first.is_ascii_alphabetic() || matches!(first, '_' | ':')) {
296        return Err(ValidationError::new(
297            "invalid-xml",
298            "XML tag name contains invalid characters",
299        ));
300    }
301    if !chars.all(|ch| ch.is_ascii_alphanumeric() || matches!(ch, '_' | '-' | '.' | ':')) {
302        return Err(ValidationError::new(
303            "invalid-xml",
304            "XML tag name contains invalid characters",
305        ));
306    }
307    Ok(name)
308}
309
310fn validate_markdown(input: &str) -> ValidationResult {
311    let mut fence_count = 0usize;
312    let mut last_fence_line = None;
313
314    for (index, raw_line) in input.lines().enumerate() {
315        let line_no = index + 1;
316        let line = raw_line.trim_start();
317
318        if line.starts_with("```") {
319            fence_count += 1;
320            last_fence_line = Some(line_no);
321        }
322
323        if let Some(link_start) = raw_line.find("](") {
324            let candidate = &raw_line[link_start + 2..];
325            if !candidate.contains(')') {
326                return invalid(
327                    "invalid-markdown",
328                    "Markdown link is missing a closing parenthesis",
329                    Some(format!("line:{line_no}")),
330                );
331            }
332        }
333    }
334
335    if fence_count % 2 == 1 {
336        return invalid(
337            "invalid-markdown",
338            "Markdown fenced code block is not closed",
339            last_fence_line.map(|line_no| format!("line:{line_no}")),
340        );
341    }
342
343    ValidationResult::valid()
344}
345
346fn invalid(
347    code: impl Into<String>,
348    message: impl Into<String>,
349    path: Option<String>,
350) -> ValidationResult {
351    let mut err = ValidationError::new(code, message);
352    if let Some(path) = path {
353        err = err.with_path(path);
354    }
355    ValidationResult::invalid(vec![err])
356}