1use std::path::Path;
4use std::sync::LazyLock;
5
6use regex::Regex;
7
8use crate::frontmatter::{self, Frontmatter};
9use crate::index::types::{LinkType, NoteType};
10
11#[derive(Debug, Clone)]
13pub struct ExtractedNote {
14 pub title: String,
16 pub note_type: NoteType,
18 pub frontmatter_json: Option<String>,
20 pub links: Vec<ExtractedLink>,
22}
23
24#[derive(Debug, Clone)]
26pub struct ExtractedLink {
27 pub target: String,
29 pub text: Option<String>,
31 pub link_type: LinkType,
33 pub line_number: u32,
35 pub context: Option<String>,
37}
38
39static WIKILINK_RE: LazyLock<Regex> = LazyLock::new(|| {
41 Regex::new(r"\[\[([^\]|]+)(?:\|([^\]]+))?\]\]").unwrap()
44});
45
46static MARKDOWN_LINK_RE: LazyLock<Regex> = LazyLock::new(|| {
47 Regex::new(r"\[([^\]]+)\]\(([^)]+)\)").unwrap()
50});
51
52pub fn extract_note(content: &str, file_path: &Path) -> ExtractedNote {
54 let parsed = frontmatter::parse(content).unwrap_or_else(|_| {
56 crate::frontmatter::ParsedDocument {
57 frontmatter: None,
58 body: content.to_string(),
59 }
60 });
61
62 let note_type = parsed
64 .frontmatter
65 .as_ref()
66 .and_then(|fm| fm.fields.get("type"))
67 .and_then(|v| v.as_str())
68 .map(|s| s.parse().unwrap_or_default())
69 .unwrap_or_default();
70
71 let title = extract_title(&parsed.frontmatter, &parsed.body, file_path);
73
74 let frontmatter_json = parsed
76 .frontmatter
77 .as_ref()
78 .map(|fm| serde_json::to_string(&fm.fields).unwrap_or_default());
79
80 let mut links = extract_links(&parsed.body);
82
83 let fm_links = extract_frontmatter_links(&parsed.frontmatter);
85 links.extend(fm_links);
86
87 ExtractedNote { title, note_type, frontmatter_json, links }
88}
89
90fn extract_title(fm: &Option<Frontmatter>, body: &str, file_path: &Path) -> String {
91 if let Some(fm) = fm
93 && let Some(title) = fm.fields.get("title").and_then(|v| v.as_str())
94 {
95 return title.to_string();
96 }
97
98 for line in body.lines() {
100 let trimmed = line.trim();
101 if let Some(heading) = trimmed.strip_prefix('#') {
102 let heading = heading.trim_start_matches('#').trim();
103 if !heading.is_empty() {
104 return heading.to_string();
105 }
106 }
107 }
108
109 file_path.file_stem().and_then(|s| s.to_str()).unwrap_or("Untitled").to_string()
111}
112
113fn extract_links(body: &str) -> Vec<ExtractedLink> {
114 let mut links = Vec::new();
115
116 for (line_num, line) in body.lines().enumerate() {
117 let line_number = (line_num + 1) as u32;
118
119 for cap in WIKILINK_RE.captures_iter(line) {
121 let target = cap.get(1).map(|m| m.as_str()).unwrap_or("");
122 let alias = cap.get(2).map(|m| m.as_str().to_string());
123
124 links.push(ExtractedLink {
125 target: target.to_string(),
126 text: alias,
127 link_type: LinkType::Wikilink,
128 line_number,
129 context: Some(truncate_context(line, 100)),
130 });
131 }
132
133 for cap in MARKDOWN_LINK_RE.captures_iter(line) {
135 let text = cap.get(1).map(|m| m.as_str()).unwrap_or("");
136 let url = cap.get(2).map(|m| m.as_str()).unwrap_or("");
137
138 if url.starts_with("http://") || url.starts_with("https://") {
140 continue;
141 }
142
143 if !url.ends_with(".md") && !is_likely_note_reference(url) {
145 continue;
146 }
147
148 links.push(ExtractedLink {
149 target: url.to_string(),
150 text: Some(text.to_string()),
151 link_type: LinkType::Markdown,
152 line_number,
153 context: Some(truncate_context(line, 100)),
154 });
155 }
156 }
157
158 links
159}
160
161fn is_likely_note_reference(url: &str) -> bool {
162 let lower = url.to_lowercase();
167
168 if lower.ends_with(".png")
170 || lower.ends_with(".jpg")
171 || lower.ends_with(".jpeg")
172 || lower.ends_with(".gif")
173 || lower.ends_with(".svg")
174 || lower.ends_with(".pdf")
175 {
176 return false;
177 }
178
179 !url.contains('.')
181}
182
183fn extract_frontmatter_links(fm: &Option<Frontmatter>) -> Vec<ExtractedLink> {
184 let mut links = Vec::new();
185
186 let fm = match fm {
187 Some(fm) => fm,
188 None => return links,
189 };
190
191 let ref_fields = ["project", "parent", "related", "blocks", "blocked_by"];
193
194 for field in &ref_fields {
195 if let Some(value) = fm.fields.get(*field) {
196 if let Some(s) = value.as_str() {
198 links.push(ExtractedLink {
199 target: s.to_string(),
200 text: Some(format!("{}: {}", field, s)),
201 link_type: LinkType::Frontmatter,
202 line_number: 0, context: None,
204 });
205 }
206 if let Some(arr) = value.as_sequence() {
208 for item in arr {
209 if let Some(s) = item.as_str() {
210 links.push(ExtractedLink {
211 target: s.to_string(),
212 text: Some(format!("{}: {}", field, s)),
213 link_type: LinkType::Frontmatter,
214 line_number: 0,
215 context: None,
216 });
217 }
218 }
219 }
220 }
221 }
222
223 links
224}
225
226fn truncate_context(line: &str, max_len: usize) -> String {
227 if line.len() <= max_len {
228 line.to_string()
229 } else {
230 format!("{}...", &line[..max_len])
231 }
232}
233
234#[cfg(test)]
235mod tests {
236 use super::*;
237
238 #[test]
239 fn test_extract_wikilinks() {
240 let content = r#"---
241title: Test Note
242---
243# Heading
244
245This links to [[other-note]] and [[another|with alias]].
246Also [[path/to/note]] works.
247"#;
248 let note = extract_note(content, Path::new("test.md"));
249
250 assert_eq!(note.links.len(), 3);
251 assert_eq!(note.links[0].target, "other-note");
252 assert_eq!(note.links[0].text, None);
253 assert_eq!(note.links[0].link_type, LinkType::Wikilink);
254
255 assert_eq!(note.links[1].target, "another");
256 assert_eq!(note.links[1].text, Some("with alias".to_string()));
257
258 assert_eq!(note.links[2].target, "path/to/note");
259 }
260
261 #[test]
262 fn test_extract_markdown_links() {
263 let content = r#"# Note
264
265See [this note](./other.md) for details.
266Also [external](https://example.com) should be skipped.
267And [image](./pic.png) should be skipped too.
268"#;
269 let note = extract_note(content, Path::new("test.md"));
270
271 assert_eq!(note.links.len(), 1);
272 assert_eq!(note.links[0].target, "./other.md");
273 assert_eq!(note.links[0].text, Some("this note".to_string()));
274 assert_eq!(note.links[0].link_type, LinkType::Markdown);
275 }
276
277 #[test]
278 fn test_extract_frontmatter_links() {
279 let content = r#"---
280title: Task
281type: task
282project: my-project
283related:
284 - note-a
285 - note-b
286---
287# Task content
288"#;
289 let note = extract_note(content, Path::new("task.md"));
290
291 let fm_links: Vec<_> =
292 note.links.iter().filter(|l| l.link_type == LinkType::Frontmatter).collect();
293
294 assert_eq!(fm_links.len(), 3);
295 assert!(fm_links.iter().any(|l| l.target == "my-project"));
296 assert!(fm_links.iter().any(|l| l.target == "note-a"));
297 assert!(fm_links.iter().any(|l| l.target == "note-b"));
298 }
299
300 #[test]
301 fn test_extract_title_from_frontmatter() {
302 let content = r#"---
303title: My Title
304---
305# Heading
306"#;
307 let note = extract_note(content, Path::new("file.md"));
308 assert_eq!(note.title, "My Title");
309 }
310
311 #[test]
312 fn test_extract_title_from_heading() {
313 let content = "# First Heading\n\nContent here.";
314 let note = extract_note(content, Path::new("file.md"));
315 assert_eq!(note.title, "First Heading");
316 }
317
318 #[test]
319 fn test_extract_title_from_filename() {
320 let content = "No frontmatter, no heading.";
321 let note = extract_note(content, Path::new("my-note.md"));
322 assert_eq!(note.title, "my-note");
323 }
324
325 #[test]
326 fn test_extract_note_type() {
327 let content = r#"---
328type: task
329---
330# Task
331"#;
332 let note = extract_note(content, Path::new("task.md"));
333 assert_eq!(note.note_type, NoteType::Task);
334 }
335
336 #[test]
337 fn test_extract_note_type_default() {
338 let content = "# Just a note";
339 let note = extract_note(content, Path::new("note.md"));
340 assert_eq!(note.note_type, NoteType::None);
341 }
342
343 #[test]
344 fn test_line_numbers() {
345 let content = r#"Line 1
346Line 2 with [[link1]]
347Line 3
348Line 4 with [[link2]]
349"#;
350 let note = extract_note(content, Path::new("test.md"));
351
352 assert_eq!(note.links.len(), 2);
353 assert_eq!(note.links[0].line_number, 2);
354 assert_eq!(note.links[1].line_number, 4);
355 }
356
357 #[test]
358 fn test_wikilink_with_section() {
359 let content = "Link to [[note#section]] here.";
360 let note = extract_note(content, Path::new("test.md"));
361
362 assert_eq!(note.links.len(), 1);
363 assert_eq!(note.links[0].target, "note#section");
364 }
365}