Skip to main content

pebble_cms/cli/
import_ghost.rs

1use crate::models::{ContentStatus, ContentType, CreateContent};
2use crate::services::{content, html_to_markdown};
3use crate::Config;
4use anyhow::Result;
5use serde_json::Value;
6use std::collections::HashMap;
7use std::path::Path;
8
9pub async fn run(config_path: &Path, file: &Path, overwrite: bool) -> Result<()> {
10    let config = Config::load(config_path)?;
11    let db = crate::Database::open(&config.database.path)?;
12    db.migrate()?;
13
14    if !file.exists() {
15        anyhow::bail!("Ghost export file not found: {}", file.display());
16    }
17
18    let json_content = std::fs::read_to_string(file)?;
19    let export: Value = serde_json::from_str(&json_content)?;
20
21    // Ghost export format: { "db": [{ "data": { "posts": [...], "tags": [...], "posts_tags": [...] } }] }
22    let data = export
23        .get("db")
24        .and_then(|db| db.as_array())
25        .and_then(|arr| arr.first())
26        .and_then(|first| first.get("data"))
27        .ok_or_else(|| anyhow::anyhow!("Invalid Ghost export format: missing db[0].data"))?;
28
29    let posts = data
30        .get("posts")
31        .and_then(|p| p.as_array())
32        .cloned()
33        .unwrap_or_default();
34
35    let ghost_tags = data
36        .get("tags")
37        .and_then(|t| t.as_array())
38        .cloned()
39        .unwrap_or_default();
40
41    let posts_tags = data
42        .get("posts_tags")
43        .and_then(|pt| pt.as_array())
44        .cloned()
45        .unwrap_or_default();
46
47    // Build tag lookup: ghost tag id -> tag name
48    let mut tag_map: HashMap<String, String> = HashMap::new();
49    for tag in &ghost_tags {
50        let id = tag.get("id").and_then(|v| v.as_str()).unwrap_or_default().to_string();
51        let name = tag.get("name").and_then(|v| v.as_str()).unwrap_or_default().to_string();
52        if !id.is_empty() && !name.is_empty() {
53            tag_map.insert(id, name);
54        }
55    }
56
57    // Build post -> tags mapping
58    let mut post_tags_map: HashMap<String, Vec<String>> = HashMap::new();
59    for pt in &posts_tags {
60        let post_id = pt.get("post_id").and_then(|v| v.as_str()).unwrap_or_default().to_string();
61        let tag_id = pt.get("tag_id").and_then(|v| v.as_str()).unwrap_or_default().to_string();
62        if let Some(tag_name) = tag_map.get(&tag_id) {
63            post_tags_map
64                .entry(post_id)
65                .or_default()
66                .push(tag_name.clone());
67        }
68    }
69
70    tracing::info!("Found {} posts/pages in Ghost export", posts.len());
71
72    let mut posts_imported = 0;
73    let mut pages_imported = 0;
74    let mut skipped = 0;
75
76    for post in &posts {
77        let title = post.get("title").and_then(|v| v.as_str()).unwrap_or("Untitled").to_string();
78        let slug = post.get("slug").and_then(|v| v.as_str()).unwrap_or_default().to_string();
79        let ghost_status = post.get("status").and_then(|v| v.as_str()).unwrap_or("draft");
80        let post_type = post.get("type").and_then(|v| v.as_str()).unwrap_or("post");
81        let ghost_id = post.get("id").and_then(|v| v.as_str()).unwrap_or_default().to_string();
82
83        let content_type = match post_type {
84            "post" => ContentType::Post,
85            "page" => ContentType::Page,
86            _ => {
87                skipped += 1;
88                continue;
89            }
90        };
91
92        let status = match ghost_status {
93            "published" => ContentStatus::Published,
94            "scheduled" => ContentStatus::Scheduled,
95            _ => ContentStatus::Draft,
96        };
97
98        // Get the content: prefer html, fall back to mobiledoc
99        let html = post.get("html").and_then(|v| v.as_str()).unwrap_or_default();
100        let body_html = if html.is_empty() {
101            // Try mobiledoc
102            extract_mobiledoc_text(post.get("mobiledoc").and_then(|v| v.as_str()).unwrap_or(""))
103        } else {
104            html.to_string()
105        };
106
107        let markdown = html_to_markdown::convert(&body_html);
108
109        let slug = if slug.is_empty() {
110            crate::services::slug::generate_slug(&title)
111        } else {
112            slug
113        };
114
115        // Check for existing content
116        if let Ok(Some(_)) = content::get_content_by_slug(&db, &slug) {
117            if !overwrite {
118                tracing::info!("Skipping existing: {}", slug);
119                skipped += 1;
120                continue;
121            }
122            let conn = db.get()?;
123            let _ = conn.execute("DELETE FROM content WHERE slug = ?", [&slug]);
124        }
125
126        let tags = post_tags_map.get(&ghost_id).cloned().unwrap_or_default();
127
128        let input = CreateContent {
129            title,
130            slug: Some(slug.clone()),
131            content_type: content_type.clone(),
132            body_markdown: markdown,
133            status,
134            scheduled_at: None,
135            excerpt: post.get("custom_excerpt").and_then(|v| v.as_str()).map(|s| s.to_string()),
136            featured_image: post.get("feature_image").and_then(|v| v.as_str()).map(|s| s.to_string()),
137            tags,
138            metadata: None,
139        };
140
141        match content::create_content(&db, input, None, config.content.excerpt_length) {
142            Ok(_) => {
143                match content_type {
144                    ContentType::Post => posts_imported += 1,
145                    ContentType::Page => pages_imported += 1,
146                    _ => {}
147                }
148                tracing::info!("Imported: {} ({})", slug, content_type);
149            }
150            Err(e) => {
151                tracing::warn!("Failed to import {}: {}", slug, e);
152                skipped += 1;
153            }
154        }
155    }
156
157    tracing::info!(
158        "Ghost import complete: {} posts, {} pages imported, {} skipped",
159        posts_imported,
160        pages_imported,
161        skipped
162    );
163    Ok(())
164}
165
166/// Extract plain text from Ghost's mobiledoc format.
167/// Mobiledoc is a JSON-based document format used by Ghost.
168fn extract_mobiledoc_text(mobiledoc_str: &str) -> String {
169    if mobiledoc_str.is_empty() {
170        return String::new();
171    }
172
173    let mobiledoc: Value = match serde_json::from_str(mobiledoc_str) {
174        Ok(v) => v,
175        Err(_) => return String::new(),
176    };
177
178    let mut parts = Vec::new();
179
180    // Extract text from sections
181    if let Some(sections) = mobiledoc.get("sections").and_then(|s| s.as_array()) {
182        for section in sections {
183            if let Some(arr) = section.as_array() {
184                // [1, "p", [[0, [], 0, "text"]]]  -- markup section
185                if arr.len() >= 3 {
186                    if let Some(markers) = arr.get(2).and_then(|m| m.as_array()) {
187                        for marker in markers {
188                            if let Some(m_arr) = marker.as_array() {
189                                // Last element is the text
190                                if let Some(text) = m_arr.last().and_then(|t| t.as_str()) {
191                                    parts.push(text.to_string());
192                                }
193                            }
194                        }
195                    }
196                }
197            }
198        }
199    }
200
201    // Extract from cards
202    if let Some(cards) = mobiledoc.get("cards").and_then(|c| c.as_array()) {
203        for card in cards {
204            if let Some(arr) = card.as_array() {
205                if arr.len() >= 2 {
206                    let card_type = arr.first().and_then(|t| t.as_str()).unwrap_or("");
207                    let payload = arr.get(1);
208                    match card_type {
209                        "html" => {
210                            if let Some(html) = payload.and_then(|p| p.get("html")).and_then(|h| h.as_str()) {
211                                parts.push(html.to_string());
212                            }
213                        }
214                        "markdown" => {
215                            if let Some(md) = payload.and_then(|p| p.get("markdown")).and_then(|m| m.as_str()) {
216                                parts.push(md.to_string());
217                            }
218                        }
219                        "image" => {
220                            if let Some(src) = payload.and_then(|p| p.get("src")).and_then(|s| s.as_str()) {
221                                let alt = payload.and_then(|p| p.get("alt")).and_then(|a| a.as_str()).unwrap_or("");
222                                parts.push(format!("<img src=\"{}\" alt=\"{}\" />", src, alt));
223                            }
224                        }
225                        _ => {}
226                    }
227                }
228            }
229        }
230    }
231
232    parts.join("\n\n")
233}