Skip to main content

anyback_reader/
markdown.rs

1use std::fs;
2use std::path::Path;
3use std::{collections::HashMap, hash::RandomState};
4
5use anyhow::{Context, Result, anyhow, bail};
6use prost::Message;
7use prost_types::{Struct, value::Kind};
8
9use crate::archive::ArchiveReader;
10use anytype_rpc::{
11    anytype::SnapshotWithType,
12    model::{
13        Block, Range, SmartBlockType,
14        block::{
15            ContentValue,
16            content::{
17                Bookmark, Div, File, Latex, Link, Table, TableColumn, TableRow, Text,
18                div::Style as DivStyle,
19                file::{State as FileState, Type as FileType},
20                layout::Style as LayoutStyle,
21                text::{Mark, Style as TextStyle, mark::Type as MarkType},
22            },
23        },
24    },
25};
26use serde_json::Value as JsonValue;
27
28/// Metadata used to resolve object links and file names during markdown rendering.
29#[derive(Debug, Clone)]
30pub struct ArchiveObjectInfo {
31    pub id: String,
32    pub name: String,
33    pub snippet: String,
34    pub layout: Option<i64>,
35    pub file_ext: Option<String>,
36}
37
38#[derive(Debug, Clone, Copy, PartialEq, Eq)]
39pub enum SavedObjectKind {
40    Markdown,
41    Raw,
42}
43
44#[derive(Debug, Clone, Default)]
45struct RenderState {
46    indent: String,
47    list_opened: bool,
48    list_number: usize,
49}
50
51impl RenderState {
52    fn with_space_indent(&self) -> Self {
53        let mut next = self.clone();
54        next.indent.push_str("    ");
55        next
56    }
57
58    fn with_nb_indent(&self) -> Self {
59        let mut next = self.clone();
60        next.indent.push_str("  ");
61        next
62    }
63}
64
65#[derive(Debug)]
66struct MarkdownConverter<'a> {
67    blocks_by_id: HashMap<String, &'a Block>,
68    docs: &'a HashMap<String, ArchiveObjectInfo, RandomState>,
69}
70
71impl MarkdownConverter<'_> {
72    fn render(&self, root: &Block) -> String {
73        let mut out = String::new();
74        let mut state = RenderState::default();
75        self.render_children(&mut out, &mut state, root);
76        out
77    }
78
79    fn render_children(&self, out: &mut String, state: &mut RenderState, parent: &Block) {
80        for child_id in &parent.children_ids {
81            let Some(block) = self.blocks_by_id.get(child_id) else {
82                continue;
83            };
84            self.render_block(out, state, block);
85        }
86    }
87
88    fn render_block(&self, out: &mut String, state: &mut RenderState, block: &Block) {
89        match block.content_value.as_ref() {
90            Some(ContentValue::Text(text)) => self.render_text(out, state, block, text),
91            Some(ContentValue::File(file)) => self.render_file(out, state, file),
92            Some(ContentValue::Bookmark(bookmark)) => self.render_bookmark(out, state, bookmark),
93            Some(ContentValue::Table(_)) => self.render_table(out, state, block),
94            Some(ContentValue::Div(div)) => {
95                if matches!(
96                    DivStyle::try_from(div.style).ok(),
97                    Some(DivStyle::Dots | DivStyle::Line)
98                ) {
99                    out.push_str(" --- \n");
100                }
101                self.render_children(out, state, block);
102            }
103            Some(ContentValue::Link(link)) => self.render_link(out, state, link),
104            Some(ContentValue::Latex(latex)) => self.render_latex(out, state, latex),
105            _ => self.render_children(out, state, block),
106        }
107    }
108
109    fn render_text(&self, out: &mut String, state: &mut RenderState, block: &Block, text: &Text) {
110        let style = TextStyle::try_from(text.style).unwrap_or(TextStyle::Paragraph);
111        if state.list_opened && !matches!(style, TextStyle::Marked | TextStyle::Numbered) {
112            out.push_str("   \n");
113            state.list_opened = false;
114            state.list_number = 0;
115        }
116
117        out.push_str(&state.indent);
118        match style {
119            TextStyle::Header1 | TextStyle::ToggleHeader1 | TextStyle::Title => {
120                out.push_str("# ");
121                self.render_text_content(out, text);
122                let mut nested = state.with_space_indent();
123                self.render_children(out, &mut nested, block);
124            }
125            TextStyle::Header2 | TextStyle::ToggleHeader2 => {
126                out.push_str("## ");
127                self.render_text_content(out, text);
128                let mut nested = state.with_space_indent();
129                self.render_children(out, &mut nested, block);
130            }
131            TextStyle::Header3 | TextStyle::ToggleHeader3 => {
132                out.push_str("### ");
133                self.render_text_content(out, text);
134                let mut nested = state.with_space_indent();
135                self.render_children(out, &mut nested, block);
136            }
137            TextStyle::Header4 => {
138                out.push_str("#### ");
139                self.render_text_content(out, text);
140                let mut nested = state.with_space_indent();
141                self.render_children(out, &mut nested, block);
142            }
143            TextStyle::Quote | TextStyle::Toggle => {
144                out.push_str("> ");
145                out.push_str(&text.text.replace('\n', "   \n> "));
146                out.push_str("   \n\n");
147                self.render_children(out, state, block);
148            }
149            TextStyle::Code => {
150                out.push_str("```\n");
151                out.push_str(&state.indent);
152                out.push_str(&text.text.replace("```", "\\`\\`\\`"));
153                out.push('\n');
154                out.push_str(&state.indent);
155                out.push_str("```\n");
156                self.render_children(out, state, block);
157            }
158            TextStyle::Checkbox => {
159                if text.checked {
160                    out.push_str("- [x] ");
161                } else {
162                    out.push_str("- [ ] ");
163                }
164                self.render_text_content(out, text);
165                let mut nested = state.with_nb_indent();
166                self.render_children(out, &mut nested, block);
167            }
168            TextStyle::Marked => {
169                out.push_str("- ");
170                self.render_text_content(out, text);
171                let mut nested = state.with_space_indent();
172                self.render_children(out, &mut nested, block);
173                state.list_opened = true;
174            }
175            TextStyle::Numbered => {
176                state.list_number += 1;
177                out.push_str(&format!("{}. ", state.list_number));
178                self.render_text_content(out, text);
179                let mut nested = state.with_space_indent();
180                self.render_children(out, &mut nested, block);
181                state.list_opened = true;
182            }
183            _ => {
184                self.render_text_content(out, text);
185                let mut nested = state.with_nb_indent();
186                self.render_children(out, &mut nested, block);
187            }
188        }
189    }
190
191    fn render_text_content(&self, out: &mut String, text: &Text) {
192        let mut marks = MarksWriter::new(self, text);
193        let chars: Vec<char> = text.text.chars().collect();
194        for (idx, ch) in chars.iter().enumerate() {
195            marks.write_marks(out, idx);
196            escape_markdown_char(*ch, out);
197        }
198        marks.write_marks(out, chars.len());
199        out.push_str("   \n");
200    }
201
202    fn render_file(&self, out: &mut String, state: &RenderState, file: &File) {
203        if !matches!(FileState::try_from(file.state).ok(), Some(FileState::Done)) {
204            return;
205        }
206        let (title, filename) = self.link_info_for_file(file);
207        if title.is_empty() || filename.is_empty() {
208            return;
209        }
210        out.push_str(&state.indent);
211        if matches!(FileType::try_from(file.r#type).ok(), Some(FileType::Image)) {
212            out.push_str(&format!("![{title}]({filename})    \n"));
213        } else {
214            out.push_str(&format!("[{title}]({filename})    \n"));
215        }
216    }
217
218    #[allow(clippy::unused_self)]
219    fn render_bookmark(&self, out: &mut String, state: &RenderState, bookmark: &Bookmark) {
220        if bookmark.url.is_empty() {
221            return;
222        }
223        out.push_str(&state.indent);
224        let title = if bookmark.title.is_empty() {
225            bookmark.url.clone()
226        } else {
227            escape_markdown_string(&bookmark.title)
228        };
229        out.push_str(&format!("[{}]({})    \n", title, bookmark.url));
230    }
231
232    fn render_link(&self, out: &mut String, state: &RenderState, link: &Link) {
233        if link.target_block_id.is_empty() {
234            return;
235        }
236        let Some((title, filename)) = self.link_info(&link.target_block_id) else {
237            return;
238        };
239        out.push_str(&state.indent);
240        out.push_str(&format!(
241            "[{}]({})    \n",
242            escape_markdown_string(&title),
243            filename
244        ));
245    }
246
247    #[allow(clippy::unused_self)]
248    fn render_latex(&self, out: &mut String, state: &RenderState, latex: &Latex) {
249        out.push_str(&state.indent);
250        out.push_str("\n$$\n");
251        out.push_str(&latex.text);
252        out.push_str("\n$$\n");
253    }
254
255    fn render_table(&self, out: &mut String, state: &mut RenderState, table_block: &Block) {
256        let mut column_ids: Vec<String> = Vec::new();
257        let mut row_ids: Vec<String> = Vec::new();
258
259        for child_id in &table_block.children_ids {
260            let Some(child) = self.blocks_by_id.get(child_id) else {
261                continue;
262            };
263            match child.content_value.as_ref() {
264                Some(ContentValue::Layout(layout)) => {
265                    match LayoutStyle::try_from(layout.style).ok() {
266                        Some(LayoutStyle::TableColumns) => {
267                            column_ids.clone_from(&child.children_ids);
268                        }
269                        Some(LayoutStyle::TableRows) => {
270                            row_ids.clone_from(&child.children_ids);
271                        }
272                        _ => {}
273                    }
274                }
275                Some(ContentValue::TableRow(_)) => row_ids.push(child.id.clone()),
276                Some(ContentValue::TableColumn(_)) => column_ids.push(child.id.clone()),
277                _ => {}
278            }
279        }
280
281        if row_ids.is_empty() {
282            self.render_children(out, state, table_block);
283            return;
284        }
285
286        let rows = self.build_table_rows(&row_ids, &column_ids);
287        write_markdown_table(out, &state.indent, rows);
288    }
289
290    fn build_table_rows(&self, row_ids: &[String], column_ids: &[String]) -> Vec<Vec<String>> {
291        let mut rows: Vec<Vec<String>> = Vec::new();
292        for row_id in row_ids {
293            let Some(row_block) = self.blocks_by_id.get(row_id) else {
294                continue;
295            };
296            let mut by_col: HashMap<String, String> = HashMap::new();
297            let mut unordered: Vec<String> = Vec::new();
298
299            for cell_id in &row_block.children_ids {
300                let Some(cell_block) = self.blocks_by_id.get(cell_id) else {
301                    continue;
302                };
303                let content = self.render_cell(cell_block);
304                if let Some(col_id) = cell_id.strip_prefix(&format!("{row_id}-")) {
305                    by_col.insert(col_id.to_string(), content);
306                } else {
307                    unordered.push(content);
308                }
309            }
310
311            if column_ids.is_empty() {
312                if by_col.is_empty() {
313                    rows.push(unordered);
314                } else {
315                    let mut pairs: Vec<(String, String)> = by_col.into_iter().collect();
316                    pairs.sort_by(|a, b| a.0.cmp(&b.0));
317                    rows.push(pairs.into_iter().map(|(_, v)| v).collect());
318                }
319                continue;
320            }
321
322            let mut row = Vec::with_capacity(column_ids.len());
323            for (idx, col_id) in column_ids.iter().enumerate() {
324                if let Some(cell) = by_col.remove(col_id) {
325                    row.push(cell);
326                } else if let Some(cell) = unordered.get(idx) {
327                    row.push(cell.clone());
328                } else {
329                    row.push(" ".to_string());
330                }
331            }
332            rows.push(row);
333        }
334        rows
335    }
336
337    fn render_cell(&self, block: &Block) -> String {
338        let mut text = String::new();
339        let mut state = RenderState::default();
340        self.render_block(&mut text, &mut state, block);
341        text = text.replace("\r\n", " ").replace('\n', " ");
342        let trimmed = text.trim();
343        if trimmed.is_empty() {
344            " ".to_string()
345        } else {
346            trimmed.to_string()
347        }
348    }
349
350    fn link_info_for_file(&self, file: &File) -> (String, String) {
351        if !file.target_object_id.is_empty() {
352            if let Some((title, filename)) = self.link_info(&file.target_object_id) {
353                return (title, filename);
354            }
355            let fallback_title = path_basename(&file.name).to_string();
356            let fallback_ext = file_ext_from_name(&file.name).unwrap_or_default();
357            let filename =
358                file_name_for_file(&file.target_object_id, &fallback_title, &fallback_ext);
359            return (fallback_title, filename);
360        }
361
362        let title = path_basename(&file.name).to_string();
363        let ext = file_ext_from_name(&file.name).unwrap_or_default();
364        let filename = file_name_for_file(&file.hash, &title, &ext);
365        (title, filename)
366    }
367
368    fn link_info(&self, object_id: &str) -> Option<(String, String)> {
369        let info = self.docs.get(object_id)?;
370        let mut title = info.name.clone();
371        if title.is_empty() {
372            title.clone_from(&info.snippet);
373        }
374        if title.is_empty() {
375            title = object_id.to_string();
376        }
377
378        let is_file = matches!(info.layout, Some(8..=12));
379        if is_file {
380            let ext = info
381                .file_ext
382                .as_deref()
383                .map(|ext| format!(".{}", ext.trim_start_matches('.')))
384                .unwrap_or_default();
385            let file_title = title.trim_end_matches(&ext).to_string();
386            let filename = file_name_for_file(object_id, &file_title, &ext);
387            return Some((file_title, filename));
388        }
389
390        let filename = file_name_for_doc(object_id, &title);
391        Some((title, filename))
392    }
393}
394
395#[derive(Debug, Clone)]
396struct MarkRange {
397    from: usize,
398    to: usize,
399    mark: Mark,
400}
401
402#[derive(Debug)]
403struct MarksWriter<'a, 'b> {
404    converter: &'a MarkdownConverter<'b>,
405    starts: HashMap<usize, Vec<MarkRange>>,
406    ends: HashMap<usize, Vec<MarkRange>>,
407    open: Vec<MarkRange>,
408}
409
410impl<'a, 'b> MarksWriter<'a, 'b> {
411    fn new(converter: &'a MarkdownConverter<'b>, text: &Text) -> Self {
412        let mut starts: HashMap<usize, Vec<MarkRange>> = HashMap::new();
413        let mut ends: HashMap<usize, Vec<MarkRange>> = HashMap::new();
414        if let Some(marks) = text.marks.as_ref() {
415            for mark in &marks.marks {
416                let Some(range) = mark.range.as_ref() else {
417                    continue;
418                };
419                if range.from == range.to || range.from < 0 || range.to < 0 {
420                    continue;
421                }
422                #[allow(clippy::cast_sign_loss)]
423                let item = MarkRange {
424                    from: range.from as usize,
425                    to: range.to as usize,
426                    mark: mark.clone(),
427                };
428                starts.entry(item.from).or_default().push(item.clone());
429                ends.entry(item.to).or_default().push(item);
430            }
431        }
432        for values in starts.values_mut() {
433            values.sort_by(|a, b| {
434                let la = a.to.saturating_sub(a.from);
435                let lb = b.to.saturating_sub(b.from);
436                lb.cmp(&la).then_with(|| a.mark.r#type.cmp(&b.mark.r#type))
437            });
438        }
439        for values in ends.values_mut() {
440            values.sort_by(|a, b| {
441                let la = a.to.saturating_sub(a.from);
442                let lb = b.to.saturating_sub(b.from);
443                lb.cmp(&la).then_with(|| a.mark.r#type.cmp(&b.mark.r#type))
444            });
445        }
446        Self {
447            converter,
448            starts,
449            ends,
450            open: Vec::new(),
451        }
452    }
453
454    fn write_marks(&mut self, out: &mut String, pos: usize) {
455        if let Some(ends) = self.ends.get(&pos).cloned() {
456            for item in ends.iter().rev() {
457                if let Some(last) = self.open.pop()
458                    && (last.from != item.from || last.to != item.to || last.mark != item.mark)
459                {
460                    self.open.push(last.clone());
461                }
462                self.write_mark(out, &item.mark, false);
463            }
464        }
465        if let Some(starts) = self.starts.get(&pos).cloned() {
466            for item in &starts {
467                self.write_mark(out, &item.mark, true);
468                self.open.push(item.clone());
469            }
470        }
471    }
472
473    fn write_mark(&self, out: &mut String, mark: &Mark, start: bool) {
474        let kind = MarkType::try_from(mark.r#type).ok();
475        match kind {
476            Some(MarkType::Strikethrough) => out.push_str("~~"),
477            Some(MarkType::Italic) => out.push('*'),
478            Some(MarkType::Bold) => out.push_str("**"),
479            Some(MarkType::Keyboard) => out.push('`'),
480            Some(MarkType::Link) => {
481                if start {
482                    out.push('[');
483                } else {
484                    out.push_str(&format!("]({})", mark.param));
485                }
486            }
487            Some(MarkType::Mention | MarkType::Object) => {
488                if let Some((_, filename)) = self.converter.link_info(&mark.param) {
489                    if start {
490                        out.push('[');
491                    } else {
492                        out.push_str(&format!("]({filename})"));
493                    }
494                }
495            }
496            Some(MarkType::Emoji) => {
497                if start {
498                    out.push_str(&mark.param);
499                }
500            }
501            _ => {}
502        }
503    }
504}
505
506fn write_markdown_table(out: &mut String, indent: &str, mut rows: Vec<Vec<String>>) {
507    if rows.is_empty() {
508        return;
509    }
510    let cols = rows.iter().map(std::vec::Vec::len).max().unwrap_or(0);
511    if cols == 0 {
512        return;
513    }
514    for row in &mut rows {
515        while row.len() < cols {
516            row.push(" ".to_string());
517        }
518    }
519
520    let mut widths = vec![3usize; cols];
521    for row in &rows {
522        for (idx, cell) in row.iter().enumerate() {
523            widths[idx] = widths[idx].max(cell.len());
524        }
525    }
526
527    for (idx, row) in rows.iter().enumerate() {
528        out.push_str(indent);
529        out.push('|');
530        for (col, cell) in row.iter().enumerate() {
531            out.push_str(&format!(" {:<width$} |", cell, width = widths[col]));
532        }
533        out.push('\n');
534
535        if idx == 0 {
536            out.push_str(indent);
537            out.push('|');
538            for width in &widths {
539                out.push(':');
540                out.push_str(&"-".repeat(width.saturating_add(1)));
541                out.push('|');
542            }
543            out.push('\n');
544        }
545    }
546    out.push('\n');
547}
548
549fn escape_markdown_char(ch: char, out: &mut String) {
550    if matches!(
551        ch,
552        '\\' | '`'
553            | '*'
554            | '_'
555            | '{'
556            | '}'
557            | '['
558            | ']'
559            | '('
560            | ')'
561            | '#'
562            | '+'
563            | '-'
564            | '.'
565            | '!'
566            | '|'
567            | '>'
568            | '~'
569    ) {
570        out.push('\\');
571    }
572    out.push(ch);
573}
574
575fn escape_markdown_string(value: &str) -> String {
576    let mut out = String::with_capacity(value.len() + 8);
577    for ch in value.chars() {
578        escape_markdown_char(ch, &mut out);
579    }
580    out
581}
582
583fn path_basename(path: &str) -> &str {
584    Path::new(path)
585        .file_name()
586        .and_then(|v| v.to_str())
587        .unwrap_or(path)
588}
589
590fn file_ext_from_name(name: &str) -> Option<String> {
591    Path::new(name)
592        .extension()
593        .and_then(|v| v.to_str())
594        .map(|v| format!(".{v}"))
595}
596
597fn sanitize_filename(input: &str) -> String {
598    let mut out = String::with_capacity(input.len());
599    for ch in input.chars() {
600        if ch.is_ascii_alphanumeric() || matches!(ch, '-' | '_' | '.') {
601            out.push(ch.to_ascii_lowercase());
602        } else if ch.is_whitespace() || matches!(ch, '/' | '\\') {
603            out.push('_');
604        }
605    }
606    let compact = out.trim_matches('_');
607    if compact.is_empty() {
608        "untitled".to_string()
609    } else {
610        compact.to_string()
611    }
612}
613
614fn file_name_for_doc(id: &str, title: &str) -> String {
615    let base = sanitize_filename(title);
616    format!("{base}_{id}.md")
617}
618
619fn file_name_for_file(id: &str, title: &str, ext: &str) -> String {
620    let base = sanitize_filename(title);
621    format!("files/{base}_{id}{ext}")
622}
623
624fn struct_field_as_string(details: &Struct, key: &str) -> Option<String> {
625    let value = details.fields.get(key)?;
626    match value.kind.as_ref()? {
627        Kind::StringValue(v) => Some(v.clone()),
628        Kind::NumberValue(v) => Some(v.to_string()),
629        Kind::BoolValue(v) => Some(v.to_string()),
630        _ => None,
631    }
632}
633
634/// Build a lightweight archive-wide object metadata index used for markdown link rendering.
635pub fn build_archive_object_index(
636    reader: &ArchiveReader,
637) -> Result<HashMap<String, ArchiveObjectInfo>> {
638    let mut out = HashMap::new();
639    for file in reader.list_files()? {
640        let lower = file.path.to_ascii_lowercase();
641        #[allow(clippy::case_sensitive_file_extension_comparisons)]
642        if !lower.ends_with(".pb") && !lower.ends_with(".pb.json") {
643            continue;
644        }
645        let Ok(bytes) = reader.read_bytes(&file.path) else {
646            continue;
647        };
648        let Ok(details) = parse_snapshot_details_to_map(&file.path, &bytes) else {
649            continue;
650        };
651        let Some(id) = details.get("id").cloned().filter(|v| !v.is_empty()) else {
652            continue;
653        };
654        let info = ArchiveObjectInfo {
655            id: id.clone(),
656            name: details.get("name").cloned().unwrap_or_default(),
657            snippet: details.get("snippet").cloned().unwrap_or_default(),
658            layout: details
659                .get("layout")
660                .and_then(|v| v.parse::<i64>().ok())
661                .or_else(|| {
662                    details
663                        .get("resolvedLayout")
664                        .and_then(|v| v.parse::<i64>().ok())
665                }),
666            file_ext: details.get("fileExt").cloned(),
667        };
668        out.insert(info.id.clone(), info);
669    }
670    Ok(out)
671}
672
673fn find_snapshot_path(reader: &ArchiveReader, object_id: &str) -> Option<String> {
674    let pb = format!("{object_id}.pb");
675    let pb_json = format!("{object_id}.pb.json");
676    let files = reader.list_files().ok()?;
677    files.iter().find_map(|f| {
678        let lower = f.path.to_ascii_lowercase();
679        if lower.ends_with(&pb) || lower.ends_with(&pb_json) {
680            Some(f.path.clone())
681        } else {
682            None
683        }
684    })
685}
686
687/// Convert a snapshot file (`objects/<id>.pb` or `objects/<id>.pb.json`) to markdown text,
688/// using a prebuilt object index for link/name resolution.
689pub fn convert_archive_snapshot_to_markdown(
690    reader: &ArchiveReader,
691    snapshot_path: &str,
692    object_index: &HashMap<String, ArchiveObjectInfo, RandomState>,
693) -> Result<String> {
694    let snapshot_bytes = reader
695        .read_bytes(snapshot_path)
696        .with_context(|| format!("failed reading snapshot from archive: {snapshot_path}"))?;
697    convert_snapshot_bytes_to_markdown(snapshot_path, &snapshot_bytes, object_index)
698}
699
700/// Convert raw snapshot bytes (`*.pb`/`*.pb.json`) to markdown using a prebuilt object index.
701pub fn convert_snapshot_bytes_to_markdown(
702    snapshot_path: &str,
703    snapshot_bytes: &[u8],
704    object_index: &HashMap<String, ArchiveObjectInfo, RandomState>,
705) -> Result<String> {
706    let lower = snapshot_path.to_ascii_lowercase();
707    #[allow(clippy::case_sensitive_file_extension_comparisons)]
708    if lower.ends_with(".pb") {
709        return convert_pb_snapshot_to_markdown(snapshot_bytes, object_index);
710    }
711    if lower.ends_with(".pb.json") {
712        return convert_pb_json_snapshot_to_markdown(snapshot_bytes, object_index);
713    }
714    bail!("unsupported snapshot format: {snapshot_path}")
715}
716
717fn parse_snapshot_details_to_map(path: &str, bytes: &[u8]) -> Result<HashMap<String, String>> {
718    let lower = path.to_ascii_lowercase();
719    #[allow(clippy::case_sensitive_file_extension_comparisons)]
720    if lower.ends_with(".pb") {
721        let snapshot =
722            SnapshotWithType::decode(bytes).context("failed to decode protobuf snapshot")?;
723        let data = snapshot
724            .snapshot
725            .and_then(|v| v.data)
726            .ok_or_else(|| anyhow!("snapshot payload missing data"))?;
727        let Some(details) = data.details else {
728            return Ok(HashMap::new());
729        };
730        let mut map = HashMap::new();
731        for (k, v) in details.fields {
732            if let Some(value) = prost_value_to_string(&v) {
733                map.insert(k, value);
734            }
735        }
736        return Ok(map);
737    }
738    if lower.ends_with(".pb.json") {
739        let root: JsonValue = serde_json::from_slice(bytes).context("invalid pb-json")?;
740        let details = root
741            .get("snapshot")
742            .and_then(|v| v.get("data"))
743            .and_then(|v| v.get("details"))
744            .and_then(JsonValue::as_object)
745            .ok_or_else(|| anyhow!("pb-json snapshot missing details object"))?;
746        let mut map = HashMap::new();
747        for (k, v) in details {
748            if let Some(value) = json_value_to_string(v) {
749                map.insert(k.clone(), value);
750            }
751        }
752        return Ok(map);
753    }
754    bail!("unsupported snapshot format: {path}")
755}
756
757fn prost_value_to_string(v: &prost_types::Value) -> Option<String> {
758    match v.kind.as_ref()? {
759        Kind::StringValue(s) => Some(s.clone()),
760        Kind::NumberValue(n) => Some(n.to_string()),
761        Kind::BoolValue(b) => Some(b.to_string()),
762        _ => None,
763    }
764}
765
766fn json_value_to_string(v: &JsonValue) -> Option<String> {
767    if let Some(s) = v.as_str() {
768        return Some(s.to_string());
769    }
770    if let Some(n) = v.as_i64() {
771        return Some(n.to_string());
772    }
773    if let Some(n) = v.as_f64() {
774        return Some(n.to_string());
775    }
776    if let Some(b) = v.as_bool() {
777        return Some(b.to_string());
778    }
779    None
780}
781
782fn infer_raw_payload_path(
783    object_id: &str,
784    details: &HashMap<String, String>,
785    files: &[crate::archive::ArchiveFileEntry],
786) -> Option<String> {
787    let mut tokens = Vec::<String>::new();
788    if !object_id.is_empty() {
789        tokens.push(object_id.to_ascii_lowercase());
790    }
791    for key in [
792        "source",
793        "fileHash",
794        "hash",
795        "fileObjectId",
796        "targetObjectId",
797        "fileName",
798        "name",
799        "oldAnytypeID",
800    ] {
801        if let Some(value) = details.get(key) {
802            let token = value.trim().to_ascii_lowercase();
803            if !token.is_empty() {
804                tokens.push(token);
805            }
806        }
807    }
808    if let Some(ext) = details.get("fileExt") {
809        let token = ext.trim().trim_start_matches('.').to_ascii_lowercase();
810        if !token.is_empty() {
811            tokens.push(format!(".{token}"));
812        }
813    }
814
815    let mut best: Option<(&str, i32)> = None;
816    for file in files {
817        let path_lc = file.path.to_ascii_lowercase();
818        #[allow(clippy::case_sensitive_file_extension_comparisons)]
819        if path_lc.ends_with(".pb") || path_lc.ends_with(".pb.json") || path_lc == "manifest.json" {
820            continue;
821        }
822        let mut score = 0;
823        if path_lc.starts_with("files/") {
824            score += 30;
825        }
826        for token in &tokens {
827            if token.len() < 3 {
828                continue;
829            }
830            if path_lc.contains(token) {
831                score += 25;
832            }
833        }
834        if score == 0 {
835            continue;
836        }
837        match best {
838            Some((_, best_score)) if best_score >= score => {}
839            _ => best = Some((file.path.as_str(), score)),
840        }
841    }
842    best.map(|(path, _)| path.to_string())
843}
844
845fn should_skip_export(sb_type: SmartBlockType) -> bool {
846    matches!(
847        sb_type,
848        SmartBlockType::StType
849            | SmartBlockType::StRelation
850            | SmartBlockType::StRelationOption
851            | SmartBlockType::Participant
852            | SmartBlockType::SpaceView
853            | SmartBlockType::ChatObjectDeprecated
854            | SmartBlockType::ChatDerivedObject
855    )
856}
857
858/// Convert one archive object snapshot (`objects/<id>.pb`) to markdown text.
859///
860/// This reads the target object snapshot from the archive and builds a lightweight
861/// index of object details from sibling `*.pb` snapshots so object/file links can be
862/// rendered as markdown links.
863pub fn convert_archive_object_pb_to_markdown(
864    archive_path: &Path,
865    object_id: &str,
866) -> Result<String> {
867    let reader = ArchiveReader::from_path(archive_path)?;
868    let snapshot_path = find_snapshot_path(&reader, object_id)
869        .ok_or_else(|| anyhow!("snapshot not found in archive for object: {object_id}"))?;
870    if !snapshot_path.to_ascii_lowercase().ends_with(".pb") {
871        bail!("markdown conversion currently supports protobuf snapshots (*.pb) only");
872    }
873    let snapshot_bytes = reader
874        .read_bytes(&snapshot_path)
875        .with_context(|| format!("failed reading snapshot from archive: {snapshot_path}"))?;
876    let object_index = build_archive_object_index(&reader)?;
877    convert_pb_snapshot_to_markdown(&snapshot_bytes, &object_index)
878}
879
880/// Convert one archive object snapshot (`objects/<id>.pb` or `objects/<id>.pb.json`) to markdown.
881pub fn convert_archive_object_to_markdown(archive_path: &Path, object_id: &str) -> Result<String> {
882    let reader = ArchiveReader::from_path(archive_path)?;
883    let snapshot_path = find_snapshot_path(&reader, object_id)
884        .ok_or_else(|| anyhow!("snapshot not found in archive for object: {object_id}"))?;
885    let object_index = build_archive_object_index(&reader)?;
886    convert_archive_snapshot_to_markdown(&reader, &snapshot_path, &object_index)
887}
888
889pub fn save_archive_object(
890    archive_path: &Path,
891    object_id: &str,
892    dest: &Path,
893) -> Result<SavedObjectKind> {
894    let reader = ArchiveReader::from_path(archive_path)?;
895    let files = reader.list_files()?;
896    let snapshot_path = find_snapshot_path(&reader, object_id)
897        .ok_or_else(|| anyhow!("snapshot not found in archive for object: {object_id}"))?;
898    let snapshot_bytes = reader
899        .read_bytes(&snapshot_path)
900        .with_context(|| format!("failed reading snapshot from archive: {snapshot_path}"))?;
901    let details = parse_snapshot_details_to_map(&snapshot_path, &snapshot_bytes)?;
902
903    if !is_file_layout_from_details(&details) {
904        let markdown = convert_archive_object_to_markdown(archive_path, object_id)?;
905        fs::write(dest, markdown)
906            .with_context(|| format!("failed writing markdown to {}", dest.display()))?;
907        return Ok(SavedObjectKind::Markdown);
908    }
909
910    let payload = infer_raw_payload_path(object_id, &details, &files)
911        .ok_or_else(|| anyhow!("could not resolve raw payload for object: {object_id}"))?;
912    let bytes = reader
913        .read_bytes(&payload)
914        .with_context(|| format!("failed reading payload from archive: {payload}"))?;
915    fs::write(dest, bytes)
916        .with_context(|| format!("failed writing raw payload to {}", dest.display()))?;
917    Ok(SavedObjectKind::Raw)
918}
919
920fn is_file_layout_from_details(details: &HashMap<String, String>) -> bool {
921    let parse_i64 = |key: &str| details.get(key).and_then(|v| v.parse::<i64>().ok());
922    matches!(
923        parse_i64("layout").or_else(|| parse_i64("resolvedLayout")),
924        Some(8..=12)
925    )
926}
927
928fn convert_pb_json_snapshot_to_markdown(
929    snapshot_bytes: &[u8],
930    object_index: &HashMap<String, ArchiveObjectInfo>,
931) -> Result<String> {
932    let root: JsonValue = serde_json::from_slice(snapshot_bytes).context("invalid pb-json")?;
933    let sb_type = parse_json_smart_block_type(&root);
934    if let Some(sb_type) = sb_type
935        && should_skip_export(sb_type)
936    {
937        return Ok(String::new());
938    }
939    let data = root
940        .get("snapshot")
941        .and_then(|v| v.get("data"))
942        .ok_or_else(|| anyhow!("pb-json snapshot missing snapshot.data"))?;
943    let blocks_json = data
944        .get("blocks")
945        .and_then(JsonValue::as_array)
946        .ok_or_else(|| anyhow!("pb-json snapshot missing snapshot.data.blocks"))?;
947    if blocks_json.is_empty() {
948        return Ok(String::new());
949    }
950    let blocks: Vec<Block> = blocks_json
951        .iter()
952        .map(parse_json_block)
953        .collect::<Result<Vec<_>>>()?;
954    if blocks.is_empty() {
955        return Ok(String::new());
956    }
957
958    let mut blocks_by_id = HashMap::<String, &Block>::with_capacity(blocks.len());
959    for block in &blocks {
960        blocks_by_id.insert(block.id.clone(), block);
961    }
962
963    let root_id = data
964        .get("details")
965        .and_then(JsonValue::as_object)
966        .and_then(|details| details.get("id"))
967        .and_then(JsonValue::as_str)
968        .map_or_else(|| blocks[0].id.clone(), ToString::to_string);
969    let Some(root) = blocks_by_id.get(&root_id) else {
970        bail!("root block not found: {root_id}");
971    };
972    if root.children_ids.is_empty() {
973        return Ok(String::new());
974    }
975
976    let converter = MarkdownConverter {
977        blocks_by_id,
978        docs: object_index,
979    };
980    let root = converter
981        .blocks_by_id
982        .get(&root_id)
983        .ok_or_else(|| anyhow!("root block not found after converter init: {root_id}"))?;
984    Ok(converter.render(root))
985}
986
987fn parse_json_smart_block_type(root: &JsonValue) -> Option<SmartBlockType> {
988    let sb = root.get("sbType")?;
989    if let Some(name) = sb.as_str() {
990        return SmartBlockType::from_str_name(name);
991    }
992    if let Some(value) = sb.as_i64().and_then(|n| i32::try_from(n).ok()) {
993        return SmartBlockType::try_from(value).ok();
994    }
995    None
996}
997
998fn parse_json_block(value: &JsonValue) -> Result<Block> {
999    let obj = value
1000        .as_object()
1001        .ok_or_else(|| anyhow!("pb-json block is not an object"))?;
1002    let id = obj
1003        .get("id")
1004        .and_then(JsonValue::as_str)
1005        .ok_or_else(|| anyhow!("pb-json block missing id"))?
1006        .to_string();
1007    let children_ids = obj
1008        .get("childrenIds")
1009        .and_then(JsonValue::as_array)
1010        .map_or_else(Vec::new, |items| {
1011            items
1012                .iter()
1013                .filter_map(JsonValue::as_str)
1014                .map(ToString::to_string)
1015                .collect()
1016        });
1017    let background_color = obj
1018        .get("backgroundColor")
1019        .and_then(JsonValue::as_str)
1020        .unwrap_or_default()
1021        .to_string();
1022    let align = obj
1023        .get("align")
1024        .map_or(0, |v| parse_block_align(v).unwrap_or_default());
1025    let vertical_align = obj
1026        .get("verticalAlign")
1027        .map_or(0, |v| parse_block_vertical_align(v).unwrap_or_default());
1028    let content_value = parse_json_content_value(obj)?;
1029
1030    Ok(Block {
1031        id,
1032        fields: None,
1033        restrictions: None,
1034        children_ids,
1035        background_color,
1036        align,
1037        vertical_align,
1038        content_value,
1039    })
1040}
1041
1042fn parse_json_content_value(
1043    obj: &serde_json::Map<String, JsonValue>,
1044) -> Result<Option<ContentValue>> {
1045    if let Some(v) = obj.get("text") {
1046        return Ok(Some(ContentValue::Text(parse_json_text(v)?)));
1047    }
1048    if let Some(v) = obj.get("file") {
1049        return Ok(Some(ContentValue::File(parse_json_file(v)?)));
1050    }
1051    if let Some(v) = obj.get("bookmark") {
1052        return Ok(Some(ContentValue::Bookmark(parse_json_bookmark(v))));
1053    }
1054    if let Some(v) = obj.get("link") {
1055        return Ok(Some(ContentValue::Link(parse_json_link(v))));
1056    }
1057    if let Some(v) = obj.get("latex") {
1058        return Ok(Some(ContentValue::Latex(parse_json_latex(v))));
1059    }
1060    if let Some(v) = obj.get("div") {
1061        return Ok(Some(ContentValue::Div(parse_json_div(v))));
1062    }
1063    if obj.contains_key("table") {
1064        return Ok(Some(ContentValue::Table(Table {})));
1065    }
1066    if obj.contains_key("tableColumn") {
1067        return Ok(Some(ContentValue::TableColumn(TableColumn {})));
1068    }
1069    if let Some(v) = obj.get("tableRow") {
1070        return Ok(Some(ContentValue::TableRow(parse_json_table_row(v))));
1071    }
1072    Ok(None)
1073}
1074
1075fn parse_json_text(value: &JsonValue) -> Result<Text> {
1076    let obj = value
1077        .as_object()
1078        .ok_or_else(|| anyhow!("pb-json text block is not an object"))?;
1079    let style = obj
1080        .get("style")
1081        .map_or(0, |v| parse_text_style(v).unwrap_or(0));
1082    let marks = obj
1083        .get("marks")
1084        .map(parse_json_marks)
1085        .transpose()?
1086        .or_else(|| Some(anytype_rpc::model::block::content::text::Marks { marks: Vec::new() }));
1087
1088    Ok(Text {
1089        text: obj
1090            .get("text")
1091            .and_then(JsonValue::as_str)
1092            .unwrap_or_default()
1093            .to_string(),
1094        style,
1095        marks,
1096        checked: obj
1097            .get("checked")
1098            .and_then(JsonValue::as_bool)
1099            .unwrap_or(false),
1100        color: obj
1101            .get("color")
1102            .and_then(JsonValue::as_str)
1103            .unwrap_or_default()
1104            .to_string(),
1105        icon_emoji: obj
1106            .get("iconEmoji")
1107            .and_then(JsonValue::as_str)
1108            .unwrap_or_default()
1109            .to_string(),
1110        icon_image: obj
1111            .get("iconImage")
1112            .and_then(JsonValue::as_str)
1113            .unwrap_or_default()
1114            .to_string(),
1115    })
1116}
1117
1118fn parse_json_marks(value: &JsonValue) -> Result<anytype_rpc::model::block::content::text::Marks> {
1119    let obj = value
1120        .as_object()
1121        .ok_or_else(|| anyhow!("pb-json marks is not an object"))?;
1122    let marks = obj
1123        .get("marks")
1124        .and_then(JsonValue::as_array)
1125        .map_or_else(Vec::new, |items| {
1126            items.iter().filter_map(parse_json_mark).collect()
1127        });
1128    Ok(anytype_rpc::model::block::content::text::Marks { marks })
1129}
1130
1131fn parse_json_mark(value: &JsonValue) -> Option<Mark> {
1132    let obj = value.as_object()?;
1133    let range = obj.get("range").and_then(parse_json_range);
1134    let r#type = obj
1135        .get("type")
1136        .map_or(0, |v| parse_mark_type(v).unwrap_or(0));
1137    let param = obj
1138        .get("param")
1139        .and_then(JsonValue::as_str)
1140        .unwrap_or_default()
1141        .to_string();
1142    Some(Mark {
1143        range,
1144        r#type,
1145        param,
1146    })
1147}
1148
1149fn parse_json_range(value: &JsonValue) -> Option<Range> {
1150    let obj = value.as_object()?;
1151    let from = obj.get("from").and_then(JsonValue::as_i64)?;
1152    let to = obj.get("to").and_then(JsonValue::as_i64)?;
1153    Some(Range {
1154        from: i32::try_from(from).ok()?,
1155        to: i32::try_from(to).ok()?,
1156    })
1157}
1158
1159fn parse_json_file(value: &JsonValue) -> Result<File> {
1160    let obj = value
1161        .as_object()
1162        .ok_or_else(|| anyhow!("pb-json file block is not an object"))?;
1163    Ok(File {
1164        hash: obj
1165            .get("hash")
1166            .and_then(JsonValue::as_str)
1167            .unwrap_or_default()
1168            .to_string(),
1169        name: obj
1170            .get("name")
1171            .and_then(JsonValue::as_str)
1172            .unwrap_or_default()
1173            .to_string(),
1174        r#type: obj
1175            .get("type")
1176            .map_or(0, |v| parse_file_type(v).unwrap_or(0)),
1177        mime: obj
1178            .get("mime")
1179            .and_then(JsonValue::as_str)
1180            .unwrap_or_default()
1181            .to_string(),
1182        size: obj.get("size").and_then(JsonValue::as_i64).unwrap_or(0),
1183        added_at: obj.get("addedAt").and_then(JsonValue::as_i64).unwrap_or(0),
1184        target_object_id: obj
1185            .get("targetObjectId")
1186            .and_then(JsonValue::as_str)
1187            .unwrap_or_default()
1188            .to_string(),
1189        state: obj
1190            .get("state")
1191            .map_or(FileState::Done as i32, |v| parse_file_state(v).unwrap_or(0)),
1192        style: obj
1193            .get("style")
1194            .map_or(0, |v| parse_file_style(v).unwrap_or(0)),
1195    })
1196}
1197
1198fn parse_json_bookmark(value: &JsonValue) -> Bookmark {
1199    let obj = value.as_object();
1200    Bookmark {
1201        url: obj
1202            .and_then(|o| o.get("url"))
1203            .and_then(JsonValue::as_str)
1204            .unwrap_or_default()
1205            .to_string(),
1206        title: obj
1207            .and_then(|o| o.get("title"))
1208            .and_then(JsonValue::as_str)
1209            .unwrap_or_default()
1210            .to_string(),
1211        description: obj
1212            .and_then(|o| o.get("description"))
1213            .and_then(JsonValue::as_str)
1214            .unwrap_or_default()
1215            .to_string(),
1216        image_hash: obj
1217            .and_then(|o| o.get("imageHash"))
1218            .and_then(JsonValue::as_str)
1219            .unwrap_or_default()
1220            .to_string(),
1221        favicon_hash: obj
1222            .and_then(|o| o.get("faviconHash"))
1223            .and_then(JsonValue::as_str)
1224            .unwrap_or_default()
1225            .to_string(),
1226        r#type: 0,
1227        target_object_id: obj
1228            .and_then(|o| o.get("targetObjectId"))
1229            .and_then(JsonValue::as_str)
1230            .unwrap_or_default()
1231            .to_string(),
1232        state: 0,
1233    }
1234}
1235
1236fn parse_json_link(value: &JsonValue) -> Link {
1237    let obj = value.as_object();
1238    Link {
1239        target_block_id: obj
1240            .and_then(|o| o.get("targetBlockId"))
1241            .and_then(JsonValue::as_str)
1242            .unwrap_or_default()
1243            .to_string(),
1244        style: obj
1245            .and_then(|o| o.get("style"))
1246            .map_or(0, |v| parse_link_style(v).unwrap_or(0)),
1247        fields: None,
1248        icon_size: obj
1249            .and_then(|o| o.get("iconSize"))
1250            .map_or(0, |v| parse_link_icon_size(v).unwrap_or(0)),
1251        card_style: obj
1252            .and_then(|o| o.get("cardStyle"))
1253            .map_or(0, |v| parse_link_card_style(v).unwrap_or(0)),
1254        description: obj
1255            .and_then(|o| o.get("description"))
1256            .map_or(0, |v| parse_link_description(v).unwrap_or(0)),
1257        relations: obj
1258            .and_then(|o| o.get("relations"))
1259            .and_then(JsonValue::as_array)
1260            .map_or_else(Vec::new, |arr| {
1261                arr.iter()
1262                    .filter_map(JsonValue::as_str)
1263                    .map(ToString::to_string)
1264                    .collect()
1265            }),
1266    }
1267}
1268
1269fn parse_json_latex(value: &JsonValue) -> Latex {
1270    let obj = value.as_object();
1271    Latex {
1272        text: obj
1273            .and_then(|o| o.get("text"))
1274            .and_then(JsonValue::as_str)
1275            .unwrap_or_default()
1276            .to_string(),
1277        processor: 0,
1278    }
1279}
1280
1281fn parse_json_div(value: &JsonValue) -> Div {
1282    let style = value
1283        .as_object()
1284        .and_then(|o| o.get("style"))
1285        .and_then(parse_div_style)
1286        .unwrap_or(0);
1287    Div { style }
1288}
1289
1290fn parse_json_table_row(value: &JsonValue) -> TableRow {
1291    let is_header = value
1292        .as_object()
1293        .and_then(|o| o.get("isHeader"))
1294        .and_then(JsonValue::as_bool)
1295        .unwrap_or(false);
1296    TableRow { is_header }
1297}
1298
1299fn parse_block_align(value: &JsonValue) -> Option<i32> {
1300    if let Some(name) = value.as_str() {
1301        return anytype_rpc::model::block::Align::from_str_name(name).map(|v| v as i32);
1302    }
1303    value.as_i64().and_then(|n| i32::try_from(n).ok())
1304}
1305
1306fn parse_block_vertical_align(value: &JsonValue) -> Option<i32> {
1307    if let Some(name) = value.as_str() {
1308        return anytype_rpc::model::block::VerticalAlign::from_str_name(name).map(|v| v as i32);
1309    }
1310    value.as_i64().and_then(|n| i32::try_from(n).ok())
1311}
1312
1313fn parse_text_style(value: &JsonValue) -> Option<i32> {
1314    if let Some(name) = value.as_str() {
1315        return TextStyle::from_str_name(name).map(|v| v as i32);
1316    }
1317    value.as_i64().and_then(|n| i32::try_from(n).ok())
1318}
1319
1320fn parse_mark_type(value: &JsonValue) -> Option<i32> {
1321    if let Some(name) = value.as_str() {
1322        return MarkType::from_str_name(name).map(|v| v as i32);
1323    }
1324    value.as_i64().and_then(|n| i32::try_from(n).ok())
1325}
1326
1327fn parse_file_type(value: &JsonValue) -> Option<i32> {
1328    if let Some(name) = value.as_str() {
1329        return FileType::from_str_name(name).map(|v| v as i32);
1330    }
1331    value.as_i64().and_then(|n| i32::try_from(n).ok())
1332}
1333
1334fn parse_file_state(value: &JsonValue) -> Option<i32> {
1335    if let Some(name) = value.as_str() {
1336        return FileState::from_str_name(name).map(|v| v as i32);
1337    }
1338    value.as_i64().and_then(|n| i32::try_from(n).ok())
1339}
1340
1341fn parse_file_style(value: &JsonValue) -> Option<i32> {
1342    if let Some(name) = value.as_str() {
1343        return anytype_rpc::model::block::content::file::Style::from_str_name(name)
1344            .map(|v| v as i32);
1345    }
1346    value.as_i64().and_then(|n| i32::try_from(n).ok())
1347}
1348
1349fn parse_link_style(value: &JsonValue) -> Option<i32> {
1350    if let Some(name) = value.as_str() {
1351        return anytype_rpc::model::block::content::link::Style::from_str_name(name)
1352            .map(|v| v as i32);
1353    }
1354    value.as_i64().and_then(|n| i32::try_from(n).ok())
1355}
1356
1357fn parse_link_icon_size(value: &JsonValue) -> Option<i32> {
1358    if let Some(name) = value.as_str() {
1359        return anytype_rpc::model::block::content::link::IconSize::from_str_name(name)
1360            .map(|v| v as i32);
1361    }
1362    value.as_i64().and_then(|n| i32::try_from(n).ok())
1363}
1364
1365fn parse_link_card_style(value: &JsonValue) -> Option<i32> {
1366    if let Some(name) = value.as_str() {
1367        return anytype_rpc::model::block::content::link::CardStyle::from_str_name(name)
1368            .map(|v| v as i32);
1369    }
1370    value.as_i64().and_then(|n| i32::try_from(n).ok())
1371}
1372
1373fn parse_link_description(value: &JsonValue) -> Option<i32> {
1374    if let Some(name) = value.as_str() {
1375        return anytype_rpc::model::block::content::link::Description::from_str_name(name)
1376            .map(|v| v as i32);
1377    }
1378    value.as_i64().and_then(|n| i32::try_from(n).ok())
1379}
1380
1381fn parse_div_style(value: &JsonValue) -> Option<i32> {
1382    if let Some(name) = value.as_str() {
1383        return DivStyle::from_str_name(name).map(|v| v as i32);
1384    }
1385    value.as_i64().and_then(|n| i32::try_from(n).ok())
1386}
1387
1388/// Convert a protobuf snapshot payload to markdown.
1389///
1390/// `object_index` should include object metadata (name/layout/file extension) for
1391/// linked objects so mentions/files can be rendered as markdown links.
1392pub fn convert_pb_snapshot_to_markdown(
1393    snapshot_bytes: &[u8],
1394    object_index: &HashMap<String, ArchiveObjectInfo, RandomState>,
1395) -> Result<String> {
1396    let snapshot =
1397        SnapshotWithType::decode(snapshot_bytes).context("failed to decode protobuf snapshot")?;
1398    let sb_type = SmartBlockType::try_from(snapshot.sb_type).unwrap_or(SmartBlockType::Page);
1399    if should_skip_export(sb_type) {
1400        return Ok(String::new());
1401    }
1402    let data = snapshot
1403        .snapshot
1404        .and_then(|v| v.data)
1405        .ok_or_else(|| anyhow!("snapshot payload missing data"))?;
1406    if data.blocks.is_empty() {
1407        return Ok(String::new());
1408    }
1409
1410    let mut blocks_by_id = HashMap::<String, &Block>::with_capacity(data.blocks.len());
1411    for block in &data.blocks {
1412        blocks_by_id.insert(block.id.clone(), block);
1413    }
1414
1415    let root_id = data
1416        .details
1417        .as_ref()
1418        .and_then(|details| struct_field_as_string(details, "id"))
1419        .unwrap_or_else(|| data.blocks[0].id.clone());
1420    let Some(root) = blocks_by_id.get(&root_id) else {
1421        bail!("root block not found: {root_id}");
1422    };
1423    if root.children_ids.is_empty() {
1424        return Ok(String::new());
1425    }
1426
1427    let converter = MarkdownConverter {
1428        blocks_by_id,
1429        docs: object_index,
1430    };
1431    let root = converter
1432        .blocks_by_id
1433        .get(&root_id)
1434        .ok_or_else(|| anyhow!("root block not found after converter init: {root_id}"))?;
1435    Ok(converter.render(root))
1436}
1437
1438#[cfg(test)]
1439mod tests {
1440    use super::*;
1441
1442    #[test]
1443    fn convert_sample_pb_object_to_markdown_contains_headings() {
1444        let archive = Path::new("samples/getting-started-pb");
1445        let object_id = "bafyreidgyug7rj6lweslb5rbeavhc44ytr5osfwj6w5snlspnjnsqa6ytm";
1446        let markdown = convert_archive_object_pb_to_markdown(archive, object_id).unwrap();
1447        assert!(markdown.contains("How Widgets Work"));
1448        assert!(markdown.contains("## "));
1449        assert!(!markdown.is_empty());
1450        assert!(markdown.contains("   \n"));
1451    }
1452
1453    #[test]
1454    fn convert_sample_pb_object_renders_markdown_tables() {
1455        let archive = Path::new("samples/getting-started-pb");
1456        let object_id = "bafyreihs3oyibcjqhwjuynp6j6aaqjhz6quijsy4vgakv4223exvruc5wi";
1457        let markdown = convert_archive_object_pb_to_markdown(archive, object_id).unwrap();
1458        assert!(markdown.contains("Simple table 3x2"));
1459        assert!(markdown.contains('|'));
1460        assert!(markdown.contains(":-"));
1461    }
1462
1463    #[test]
1464    fn convert_sample_pb_json_object_to_markdown_contains_headings() {
1465        let archive = Path::new("samples/getting-started-json");
1466        let object_id = "bafyreidgyug7rj6lweslb5rbeavhc44ytr5osfwj6w5snlspnjnsqa6ytm";
1467        let markdown = convert_archive_object_to_markdown(archive, object_id).unwrap();
1468        assert!(markdown.contains("How Widgets Work"));
1469        assert!(markdown.contains("## "));
1470        assert!(!markdown.is_empty());
1471    }
1472
1473    #[test]
1474    fn save_sample_pb_json_document_writes_markdown() {
1475        let archive = Path::new("samples/getting-started-json");
1476        let object_id = "bafyreidgyug7rj6lweslb5rbeavhc44ytr5osfwj6w5snlspnjnsqa6ytm";
1477        let dir = tempfile::tempdir().unwrap();
1478        let dest = dir.path().join("out.md");
1479        let kind = save_archive_object(archive, object_id, &dest).unwrap();
1480        assert_eq!(kind, SavedObjectKind::Markdown);
1481        let text = fs::read_to_string(dest).unwrap();
1482        assert!(text.contains("How Widgets Work"));
1483    }
1484}