pebble_cms/cli/
import_wordpress.rs1use crate::models::{ContentStatus, ContentType, CreateContent};
2use crate::services::{content, html_to_markdown};
3use crate::Config;
4use anyhow::Result;
5use quick_xml::events::Event;
6use quick_xml::Reader;
7use std::path::Path;
8
9#[allow(dead_code)]
10struct WxrItem {
11 title: String,
12 slug: String,
13 content_html: String,
14 status: String,
15 post_type: String,
16 published_at: Option<String>,
17 tags: Vec<String>,
18}
19
20pub async fn run(config_path: &Path, file: &Path, overwrite: bool) -> Result<()> {
21 let config = Config::load(config_path)?;
22 let db = crate::Database::open(&config.database.path)?;
23 db.migrate()?;
24
25 if !file.exists() {
26 anyhow::bail!("WordPress export file not found: {}", file.display());
27 }
28
29 let xml_content = std::fs::read_to_string(file)?;
30 let items = parse_wxr(&xml_content)?;
31
32 tracing::info!(
33 "Found {} items in WordPress export",
34 items.len()
35 );
36
37 let mut posts_imported = 0;
38 let mut pages_imported = 0;
39 let mut skipped = 0;
40
41 for item in items {
42 let content_type = match item.post_type.as_str() {
43 "post" => ContentType::Post,
44 "page" => ContentType::Page,
45 _ => {
46 skipped += 1;
47 continue;
48 }
49 };
50
51 let status = match item.status.as_str() {
52 "publish" => ContentStatus::Published,
53 "draft" => ContentStatus::Draft,
54 "private" => ContentStatus::Draft,
55 _ => ContentStatus::Draft,
56 };
57
58 let markdown = html_to_markdown::convert(&item.content_html);
59
60 let slug = if item.slug.is_empty() {
61 crate::services::slug::generate_slug(&item.title)
62 } else {
63 item.slug.clone()
64 };
65
66 if let Ok(Some(_)) = content::get_content_by_slug(&db, &slug) {
68 if !overwrite {
69 tracing::info!("Skipping existing: {}", slug);
70 skipped += 1;
71 continue;
72 }
73 let conn = db.get()?;
75 let _ = conn.execute("DELETE FROM content WHERE slug = ?", [&slug]);
76 }
77
78 let input = CreateContent {
79 title: item.title,
80 slug: Some(slug.clone()),
81 content_type: content_type.clone(),
82 body_markdown: markdown,
83 status,
84 scheduled_at: None,
85 excerpt: None,
86 featured_image: None,
87 tags: item.tags,
88 metadata: None,
89 };
90
91 match content::create_content(&db, input, None, config.content.excerpt_length) {
92 Ok(_) => {
93 match content_type {
94 ContentType::Post => posts_imported += 1,
95 ContentType::Page => pages_imported += 1,
96 _ => {}
97 }
98 tracing::info!("Imported: {} ({})", slug, content_type);
99 }
100 Err(e) => {
101 tracing::warn!("Failed to import {}: {}", slug, e);
102 skipped += 1;
103 }
104 }
105 }
106
107 tracing::info!(
108 "WordPress import complete: {} posts, {} pages imported, {} skipped",
109 posts_imported,
110 pages_imported,
111 skipped
112 );
113 Ok(())
114}
115
116fn parse_wxr(xml: &str) -> Result<Vec<WxrItem>> {
117 let mut reader = Reader::from_str(xml);
118 reader.config_mut().trim_text(true);
119
120 let mut items = Vec::new();
121 let mut buf = Vec::new();
122
123 let mut in_item = false;
125 let mut current_tag = String::new();
126 let mut title = String::new();
127 let mut slug = String::new();
128 let mut content_html = String::new();
129 let mut status = String::new();
130 let mut post_type = String::new();
131 let mut published_at = Option::<String>::None;
132 let mut tags: Vec<String> = Vec::new();
133 let mut _in_content_encoded = false;
134
135 loop {
136 match reader.read_event_into(&mut buf) {
137 Ok(Event::Start(ref e)) => {
138 let local_name = e.local_name();
139 let tag_name = std::str::from_utf8(local_name.as_ref()).unwrap_or("");
140
141 if tag_name == "item" {
142 in_item = true;
143 title.clear();
144 slug.clear();
145 content_html.clear();
146 status.clear();
147 post_type.clear();
148 published_at = None;
149 tags.clear();
150 } else if in_item {
151 current_tag = tag_name.to_string();
152
153 let qname = e.name();
156 let full_name = std::str::from_utf8(qname.as_ref()).unwrap_or("");
157 if full_name.contains("post_name") {
158 current_tag = "wp:post_name".to_string();
159 } else if full_name.contains("status") && full_name.contains("wp") {
160 current_tag = "wp:status".to_string();
161 } else if full_name.contains("post_type") {
162 current_tag = "wp:post_type".to_string();
163 } else if full_name.contains("post_date") && !full_name.contains("gmt") {
164 current_tag = "wp:post_date".to_string();
165 } else if full_name.contains("encoded") {
166 _in_content_encoded = true;
167 current_tag = "content:encoded".to_string();
168 }
169
170 if tag_name == "category" {
172 let domain = e.attributes()
173 .filter_map(|a| a.ok())
174 .find(|a| a.key.as_ref() == b"domain")
175 .and_then(|a| String::from_utf8(a.value.to_vec()).ok());
176 if domain.as_deref() == Some("post_tag") {
177 current_tag = "post_tag".to_string();
178 }
179 }
180 }
181 }
182 Ok(Event::CData(ref e)) => {
183 if in_item {
184 let text = std::str::from_utf8(e.as_ref()).unwrap_or("");
185 match current_tag.as_str() {
186 "content:encoded" => content_html.push_str(text),
187 "title" => title.push_str(text),
188 _ => {}
189 }
190 }
191 }
192 Ok(Event::Text(ref e)) => {
193 if in_item {
194 let text = e.unescape().unwrap_or_default();
195 match current_tag.as_str() {
196 "title" => title.push_str(&text),
197 "wp:post_name" => slug.push_str(&text),
198 "content:encoded" => content_html.push_str(&text),
199 "wp:status" => status.push_str(&text),
200 "wp:post_type" => post_type.push_str(&text),
201 "wp:post_date" => published_at = Some(text.to_string()),
202 "post_tag" => {
203 let tag = text.trim().to_string();
204 if !tag.is_empty() {
205 tags.push(tag);
206 }
207 }
208 _ => {}
209 }
210 }
211 }
212 Ok(Event::End(ref e)) => {
213 let local_name = e.local_name();
214 let tag_name = std::str::from_utf8(local_name.as_ref()).unwrap_or("");
215
216 if tag_name == "item" && in_item {
217 if !title.is_empty() {
218 items.push(WxrItem {
219 title: title.clone(),
220 slug: slug.clone(),
221 content_html: content_html.clone(),
222 status: status.clone(),
223 post_type: post_type.clone(),
224 published_at: published_at.clone(),
225 tags: tags.clone(),
226 });
227 }
228 in_item = false;
229 }
230
231 let end_qname = e.name();
232 let full_name = std::str::from_utf8(end_qname.as_ref()).unwrap_or("");
233 if full_name.contains("encoded") {
234 _in_content_encoded = false;
235 }
236 current_tag.clear();
237 }
238 Ok(Event::Eof) => break,
239 Err(e) => {
240 tracing::warn!("XML parsing error: {}", e);
241 break;
242 }
243 _ => {}
244 }
245 buf.clear();
246 }
247
248 Ok(items)
249}