Skip to main content

mdql_core/
migrate.rs

1//! Field migration operations on markdown files.
2//!
3//! Both frontmatter keys and H2 sections are "fields" in MDQL.
4
5use std::path::Path;
6
7use regex::Regex;
8use std::sync::LazyLock;
9
10use crate::parser::{normalize_heading};
11use crate::txn::atomic_write;
12
13static FENCE_OPEN_RE: LazyLock<Regex> =
14    LazyLock::new(|| Regex::new(r"^(`{3,}|~{3,})").unwrap());
15static H2_RE: LazyLock<Regex> =
16    LazyLock::new(|| Regex::new(r"^##\s+(.+)$").unwrap());
17
18// ── Section span detection ────────────────────────────────────────────────
19
20struct SectionSpan {
21    #[allow(dead_code)]
22    raw_heading: String,
23    normalized_heading: String,
24    heading_line_idx: usize,
25    end_line_idx: usize, // exclusive
26}
27
28fn find_sections(lines: &[String], normalize: bool) -> Vec<SectionSpan> {
29    let mut sections = Vec::new();
30    let mut in_fence = false;
31    let mut fence_char: Option<char> = None;
32    let mut fence_width: usize = 0;
33
34    // Skip frontmatter
35    let mut start = 0;
36    if !lines.is_empty() && lines[0].trim() == "---" {
37        for i in 1..lines.len() {
38            if lines[i].trim() == "---" {
39                start = i + 1;
40                break;
41            }
42        }
43    }
44
45    for i in start..lines.len() {
46        let line = &lines[i];
47
48        if let Some(caps) = FENCE_OPEN_RE.captures(line) {
49            let marker = caps.get(1).unwrap().as_str();
50            let char = marker.chars().next().unwrap();
51            let width = marker.len();
52            if !in_fence {
53                in_fence = true;
54                fence_char = Some(char);
55                fence_width = width;
56                continue;
57            } else if Some(char) == fence_char && width >= fence_width && line.trim() == marker {
58                in_fence = false;
59                fence_char = None;
60                fence_width = 0;
61                continue;
62            }
63        }
64
65        if in_fence {
66            continue;
67        }
68
69        if let Some(caps) = H2_RE.captures(line) {
70            let raw_h = caps.get(1).unwrap().as_str().trim().to_string();
71            let norm_h = if normalize {
72                normalize_heading(&raw_h)
73            } else {
74                raw_h.clone()
75            };
76            sections.push(SectionSpan {
77                raw_heading: raw_h,
78                normalized_heading: norm_h,
79                heading_line_idx: i,
80                end_line_idx: lines.len(),
81            });
82        }
83    }
84
85    // Fix up end indices
86    for i in 0..sections.len().saturating_sub(1) {
87        let next_start = sections[i + 1].heading_line_idx;
88        sections[i].end_line_idx = next_start;
89    }
90
91    sections
92}
93
94// ── Frontmatter field operations ────────────────────────────────────────
95
96fn find_frontmatter_bounds(lines: &[String]) -> Option<(usize, usize)> {
97    if lines.is_empty() || lines[0].trim() != "---" {
98        return None;
99    }
100    for i in 1..lines.len() {
101        if lines[i].trim() == "---" {
102            return Some((1, i));
103        }
104    }
105    None
106}
107
108pub fn rename_frontmatter_key_in_file(
109    path: &Path,
110    old_key: &str,
111    new_key: &str,
112) -> crate::errors::Result<bool> {
113    let text = std::fs::read_to_string(path)?;
114    let mut lines: Vec<String> = text.split('\n').map(|s| s.to_string()).collect();
115    let bounds = match find_frontmatter_bounds(&lines) {
116        Some(b) => b,
117        None => return Ok(false),
118    };
119
120    let pattern = Regex::new(&format!(r"^{}(\s*:.*)$", regex::escape(old_key))).unwrap();
121
122    let mut changed = false;
123    for i in bounds.0..bounds.1 {
124        if let Some(caps) = pattern.captures(&lines[i].clone()) {
125            lines[i] = format!("{}{}", new_key, caps.get(1).unwrap().as_str());
126            changed = true;
127            break;
128        }
129    }
130
131    if changed {
132        atomic_write(path, &lines.join("\n"))?;
133    }
134    Ok(changed)
135}
136
137pub fn drop_frontmatter_key_in_file(
138    path: &Path,
139    key: &str,
140) -> crate::errors::Result<bool> {
141    let text = std::fs::read_to_string(path)?;
142    let mut lines: Vec<String> = text.split('\n').map(|s| s.to_string()).collect();
143    let bounds = match find_frontmatter_bounds(&lines) {
144        Some(b) => b,
145        None => return Ok(false),
146    };
147
148    let pattern = Regex::new(&format!(r"^{}\s*:", regex::escape(key))).unwrap();
149
150    let mut key_start = None;
151    let mut key_end = None;
152    for i in bounds.0..bounds.1 {
153        if pattern.is_match(&lines[i]) {
154            key_start = Some(i);
155            key_end = Some(i + 1);
156            while key_end.unwrap() < bounds.1
157                && (lines[key_end.unwrap()].starts_with(' ')
158                    || lines[key_end.unwrap()].starts_with('\t'))
159            {
160                key_end = Some(key_end.unwrap() + 1);
161            }
162            break;
163        }
164    }
165
166    match (key_start, key_end) {
167        (Some(start), Some(end)) => {
168            lines.drain(start..end);
169            atomic_write(path, &lines.join("\n"))?;
170            Ok(true)
171        }
172        _ => Ok(false),
173    }
174}
175
176// ── Section operations ──────────────────────────────────────────────────
177
178pub fn rename_section_in_file(
179    path: &Path,
180    old_name: &str,
181    new_name: &str,
182    normalize: bool,
183) -> crate::errors::Result<bool> {
184    let text = std::fs::read_to_string(path)?;
185    let mut lines: Vec<String> = text.split('\n').map(|s| s.to_string()).collect();
186    let sections = find_sections(&lines, normalize);
187
188    let mut changed = false;
189    for sec in &sections {
190        if sec.normalized_heading == old_name {
191            lines[sec.heading_line_idx] = format!("## {}", new_name);
192            changed = true;
193        }
194    }
195
196    if changed {
197        atomic_write(path, &lines.join("\n"))?;
198    }
199    Ok(changed)
200}
201
202pub fn drop_section_in_file(
203    path: &Path,
204    section_name: &str,
205    normalize: bool,
206) -> crate::errors::Result<bool> {
207    let text = std::fs::read_to_string(path)?;
208    let mut lines: Vec<String> = text.split('\n').map(|s| s.to_string()).collect();
209    let sections = find_sections(&lines, normalize);
210
211    let to_remove: Vec<_> = sections
212        .iter()
213        .filter(|s| s.normalized_heading == section_name)
214        .collect();
215
216    if to_remove.is_empty() {
217        return Ok(false);
218    }
219
220    // Remove from bottom up
221    for sec in to_remove.iter().rev() {
222        let mut start = sec.heading_line_idx;
223        let end = sec.end_line_idx;
224        if start > 0 && lines[start - 1].trim().is_empty() {
225            start -= 1;
226        }
227        lines.drain(start..end);
228    }
229
230    atomic_write(path, &lines.join("\n"))?;
231    Ok(true)
232}
233
234pub fn merge_sections_in_file(
235    path: &Path,
236    source_names: &[String],
237    into: &str,
238    normalize: bool,
239) -> crate::errors::Result<bool> {
240    let text = std::fs::read_to_string(path)?;
241    let mut lines: Vec<String> = text.split('\n').map(|s| s.to_string()).collect();
242    let sections = find_sections(&lines, normalize);
243
244    let all_names: std::collections::HashSet<&str> =
245        source_names.iter().map(|s| s.as_str()).collect();
246    let matching: Vec<_> = sections
247        .iter()
248        .filter(|s| all_names.contains(s.normalized_heading.as_str()))
249        .collect();
250
251    if matching.len() < 2 {
252        return Ok(false);
253    }
254
255    // Collect bodies
256    let mut bodies = Vec::new();
257    for sec in &matching {
258        let body_lines = &lines[sec.heading_line_idx + 1..sec.end_line_idx];
259        let body = body_lines.join("\n").trim().to_string();
260        if !body.is_empty() {
261            bodies.push(body);
262        }
263    }
264
265    let merged_body = bodies.join("\n\n");
266
267    // Replace first, delete rest
268    let target = matching[0];
269    let to_delete: Vec<_> = matching[1..].iter().collect();
270
271    let target_replacement = vec![
272        format!("## {}", into),
273        String::new(),
274        merged_body,
275        String::new(),
276    ];
277
278    let old_span = target.end_line_idx - target.heading_line_idx;
279    let new_span = target_replacement.len();
280
281    lines.splice(
282        target.heading_line_idx..target.end_line_idx,
283        target_replacement,
284    );
285
286    let mut shift = new_span as i64 - old_span as i64;
287
288    for sec in to_delete.iter().rev() {
289        let mut adj_start = (sec.heading_line_idx as i64 + shift) as usize;
290        let adj_end = (sec.end_line_idx as i64 + shift) as usize;
291        if adj_start > 0 && lines[adj_start - 1].trim().is_empty() {
292            adj_start -= 1;
293        }
294        let removed = adj_end - adj_start;
295        lines.drain(adj_start..adj_end);
296        shift -= removed as i64;
297    }
298
299    atomic_write(path, &lines.join("\n"))?;
300    Ok(true)
301}
302
303// ── Schema update ─────────────────────────────────────────────────────────
304
305pub fn update_schema(
306    schema_path: &Path,
307    rename_frontmatter: Option<(&str, &str)>,
308    drop_frontmatter: Option<&str>,
309    rename_section: Option<(&str, &str)>,
310    drop_section: Option<&str>,
311    merge_sections: Option<(&[String], &str)>,
312) -> crate::errors::Result<()> {
313    let text = std::fs::read_to_string(schema_path)?;
314    let file_lines: Vec<&str> = text.split('\n').collect();
315
316    if file_lines.is_empty() || file_lines[0].trim() != "---" {
317        return Ok(());
318    }
319
320    let mut end_idx = None;
321    for i in 1..file_lines.len() {
322        if file_lines[i].trim() == "---" {
323            end_idx = Some(i);
324            break;
325        }
326    }
327
328    let end_idx = match end_idx {
329        Some(i) => i,
330        None => return Ok(()),
331    };
332
333    let fm_text = file_lines[1..end_idx].join("\n");
334    let mut fm: serde_yaml::Value =
335        serde_yaml::from_str(&fm_text).unwrap_or(serde_yaml::Value::Mapping(serde_yaml::Mapping::new()));
336
337    let fm_map = fm.as_mapping_mut().unwrap();
338
339    // Frontmatter field operations
340    let fm_key = serde_yaml::Value::String("frontmatter".into());
341    let fm_fields = fm_map
342        .entry(fm_key.clone())
343        .or_insert(serde_yaml::Value::Mapping(serde_yaml::Mapping::new()));
344
345    if let Some(fields_map) = fm_fields.as_mapping_mut() {
346        if let Some((old, new)) = rename_frontmatter {
347            let old_key = serde_yaml::Value::String(old.to_string());
348            let new_key = serde_yaml::Value::String(new.to_string());
349            if let Some(val) = fields_map.remove(&old_key) {
350                fields_map.insert(new_key, val);
351            }
352        }
353        if let Some(key) = drop_frontmatter {
354            fields_map.remove(&serde_yaml::Value::String(key.to_string()));
355        }
356    }
357
358    // Section operations
359    let sec_key = serde_yaml::Value::String("sections".into());
360    let sections = fm_map
361        .entry(sec_key.clone())
362        .or_insert(serde_yaml::Value::Mapping(serde_yaml::Mapping::new()));
363
364    if let Some(sections_map) = sections.as_mapping_mut() {
365        if let Some((old, new)) = rename_section {
366            let old_key = serde_yaml::Value::String(old.to_string());
367            let new_key = serde_yaml::Value::String(new.to_string());
368            if let Some(val) = sections_map.remove(&old_key) {
369                sections_map.insert(new_key, val);
370            }
371        }
372        if let Some(key) = drop_section {
373            sections_map.remove(&serde_yaml::Value::String(key.to_string()));
374        }
375        if let Some((sources, target)) = merge_sections {
376            let mut target_config = None;
377            for s in sources {
378                let k = serde_yaml::Value::String(s.clone());
379                if target_config.is_none() {
380                    target_config = sections_map.get(&k).cloned();
381                }
382            }
383            let target_config = target_config.unwrap_or_else(|| {
384                let mut m = serde_yaml::Mapping::new();
385                m.insert(
386                    serde_yaml::Value::String("type".into()),
387                    serde_yaml::Value::String("markdown".into()),
388                );
389                m.insert(
390                    serde_yaml::Value::String("required".into()),
391                    serde_yaml::Value::Bool(false),
392                );
393                serde_yaml::Value::Mapping(m)
394            });
395            for s in sources {
396                sections_map.remove(&serde_yaml::Value::String(s.clone()));
397            }
398            sections_map.insert(
399                serde_yaml::Value::String(target.to_string()),
400                target_config,
401            );
402        }
403    }
404
405    // Re-serialize
406    let new_fm = serde_yaml::to_string(&fm).unwrap_or_default();
407    let new_fm = new_fm.trim_end();
408
409    let mut new_lines = vec!["---".to_string(), new_fm.to_string(), "---".to_string()];
410    for line in &file_lines[end_idx + 1..] {
411        new_lines.push(line.to_string());
412    }
413
414    atomic_write(schema_path, &new_lines.join("\n"))?;
415    Ok(())
416}