ppt_rs/web2ppt/
converter.rs1use super::{Web2PptError, Result, Web2PptConfig, WebContent, ContentType};
4use crate::{create_pptx_with_content, SlideContent, SlideLayout};
5
6fn truncate_text(text: &str, max_len: usize) -> String {
8 if text.len() <= max_len {
9 return text.to_string();
10 }
11
12 let mut end = max_len;
14 while end > 0 && !text.is_char_boundary(end) {
15 end -= 1;
16 }
17
18 if let Some(last_space) = text[..end].rfind(' ') {
20 if last_space > max_len / 2 {
21 end = last_space;
22 }
23 }
24
25 format!("{}...", &text[..end].trim_end())
26}
27
28#[derive(Clone, Debug)]
30pub struct ConversionOptions {
31 pub title: Option<String>,
33 pub author: Option<String>,
35 pub include_source_url: bool,
37 pub add_page_numbers: bool,
39}
40
41impl Default for ConversionOptions {
42 fn default() -> Self {
43 ConversionOptions {
44 title: None,
45 author: None,
46 include_source_url: true,
47 add_page_numbers: false,
48 }
49 }
50}
51
52impl ConversionOptions {
53 pub fn new() -> Self {
55 Self::default()
56 }
57
58 pub fn title(mut self, title: &str) -> Self {
60 self.title = Some(title.to_string());
61 self
62 }
63
64 pub fn author(mut self, author: &str) -> Self {
66 self.author = Some(author.to_string());
67 self
68 }
69
70 pub fn with_source_url(mut self, include: bool) -> Self {
72 self.include_source_url = include;
73 self
74 }
75
76 pub fn with_page_numbers(mut self, add: bool) -> Self {
78 self.add_page_numbers = add;
79 self
80 }
81}
82
83pub struct Web2Ppt {
85 config: Web2PptConfig,
86}
87
88impl Web2Ppt {
89 pub fn new() -> Self {
91 Self::with_config(Web2PptConfig::default())
92 }
93
94 pub fn with_config(config: Web2PptConfig) -> Self {
96 Web2Ppt { config }
97 }
98
99 pub fn convert(&self, content: &WebContent, options: &ConversionOptions) -> Result<Vec<u8>> {
101 let slides = self.build_slides(content, options)?;
102 let title = options.title.as_ref().unwrap_or(&content.title);
103
104 create_pptx_with_content(title, slides)
105 .map_err(|e| Web2PptError::GenerationError(e.to_string()))
106 }
107
108 fn build_slides(&self, content: &WebContent, options: &ConversionOptions) -> Result<Vec<SlideContent>> {
110 let mut slides = Vec::new();
111
112 let title = options.title.as_ref().unwrap_or(&content.title);
114 let mut title_slide = SlideContent::new(title)
115 .layout(SlideLayout::CenteredTitle);
116
117 if let Some(desc) = &content.description {
118 title_slide = title_slide.add_bullet(desc);
119 }
120
121 if options.include_source_url {
122 title_slide = title_slide.add_bullet(&format!("Source: {}", content.url));
123 }
124
125 slides.push(title_slide);
126
127 if self.config.group_by_headings {
129 self.build_grouped_slides(content, &mut slides)?;
130 } else {
131 self.build_linear_slides(content, &mut slides)?;
132 }
133
134 if slides.len() > self.config.max_slides {
136 slides.truncate(self.config.max_slides);
137 }
138
139 Ok(slides)
140 }
141
142 fn build_grouped_slides(&self, content: &WebContent, slides: &mut Vec<SlideContent>) -> Result<()> {
144 let groups = content.grouped_by_headings();
145
146 if groups.is_empty() {
148 return self.build_linear_slides(content, slides);
149 }
150
151 for (heading, blocks) in groups {
152 if slides.len() >= self.config.max_slides {
153 break;
154 }
155
156 let mut slide = SlideContent::new(&heading.text)
157 .layout(SlideLayout::TitleAndContent);
158
159 let mut bullet_count = 0;
160
161 for block in blocks {
162 if bullet_count >= self.config.max_bullets_per_slide {
163 slides.push(slide);
165 slide = SlideContent::new(&format!("{} (cont.)", heading.text))
166 .layout(SlideLayout::TitleAndContent);
167 bullet_count = 0;
168
169 if slides.len() >= self.config.max_slides {
170 break;
171 }
172 }
173
174 match &block.content_type {
175 ContentType::Paragraph => {
176 let text = truncate_text(&block.text, 200);
178 slide = slide.add_bullet(&text);
179 bullet_count += 1;
180 }
181 ContentType::ListItem => {
182 let text = truncate_text(&block.text, 180);
183 slide = slide.add_bullet(&format!("• {}", text));
184 bullet_count += 1;
185 }
186 ContentType::Quote => {
187 let text = truncate_text(&block.text, 180);
188 slide = slide.add_bullet(&format!("\"{}\"", text));
189 bullet_count += 1;
190 }
191 ContentType::Code => {
192 if self.config.include_code {
193 let code = truncate_text(&block.text, 150);
194 slide = slide.add_bullet(&format!("[Code] {}", code));
195 bullet_count += 1;
196 }
197 }
198 ContentType::Table(rows) => {
199 if self.config.include_tables && !rows.is_empty() {
200 let summary = format!("[Table: {} rows × {} cols]",
201 rows.len(),
202 rows.first().map(|r| r.len()).unwrap_or(0)
203 );
204 slide = slide.add_bullet(&summary);
205 bullet_count += 1;
206 }
207 }
208 ContentType::Image { alt, .. } => {
209 if self.config.include_images && !alt.is_empty() {
210 slide = slide.add_bullet(&format!("[Image: {}]", alt));
211 bullet_count += 1;
212 }
213 }
214 _ => {}
215 }
216 }
217
218 if bullet_count > 0 {
220 slides.push(slide);
221 }
222 }
223
224 Ok(())
225 }
226
227 fn build_linear_slides(&self, content: &WebContent, slides: &mut Vec<SlideContent>) -> Result<()> {
229 let mut current_slide: Option<SlideContent> = None;
230 let mut bullet_count = 0;
231
232 if content.blocks.is_empty() {
234 if let Some(desc) = &content.description {
235 let slide = SlideContent::new("Content")
236 .layout(SlideLayout::TitleAndContent)
237 .add_bullet(desc);
238 slides.push(slide);
239 }
240 return Ok(());
241 }
242
243 for block in &content.blocks {
244 if slides.len() >= self.config.max_slides {
245 break;
246 }
247
248 match &block.content_type {
249 ContentType::Title | ContentType::Heading(_) => {
250 if let Some(slide) = current_slide.take() {
252 if bullet_count > 0 {
253 slides.push(slide);
254 }
255 }
256
257 current_slide = Some(
259 SlideContent::new(&block.text)
260 .layout(SlideLayout::TitleAndContent)
261 );
262 bullet_count = 0;
263 }
264 ContentType::Paragraph => {
265 if current_slide.is_none() {
267 current_slide = Some(
268 SlideContent::new("Overview")
269 .layout(SlideLayout::TitleAndContent)
270 );
271 }
272
273 if let Some(ref mut slide) = current_slide {
274 if bullet_count < self.config.max_bullets_per_slide {
275 let text = truncate_text(&block.text, 200);
276 *slide = slide.clone().add_bullet(&text);
277 bullet_count += 1;
278 } else {
279 slides.push(slide.clone());
281 let title = slide.title.clone();
282 *slide = SlideContent::new(&format!("{} (cont.)", title))
283 .layout(SlideLayout::TitleAndContent);
284 let text = truncate_text(&block.text, 200);
285 *slide = slide.clone().add_bullet(&text);
286 bullet_count = 1;
287 }
288 }
289 }
290 ContentType::ListItem => {
291 if current_slide.is_none() {
292 current_slide = Some(
293 SlideContent::new("Key Points")
294 .layout(SlideLayout::TitleAndContent)
295 );
296 }
297
298 if let Some(ref mut slide) = current_slide {
299 if bullet_count < self.config.max_bullets_per_slide {
300 let text = truncate_text(&block.text, 180);
301 *slide = slide.clone().add_bullet(&format!("• {}", text));
302 bullet_count += 1;
303 }
304 }
305 }
306 ContentType::Quote => {
307 if let Some(ref mut slide) = current_slide {
308 if bullet_count < self.config.max_bullets_per_slide {
309 let text = truncate_text(&block.text, 180);
310 *slide = slide.clone().add_bullet(&format!("\"{}\"", text));
311 bullet_count += 1;
312 }
313 }
314 }
315 _ => {}
316 }
317 }
318
319 if let Some(slide) = current_slide {
321 if bullet_count > 0 {
322 slides.push(slide);
323 }
324 }
325
326 Ok(())
327 }
328
329 pub fn config(&self) -> &Web2PptConfig {
331 &self.config
332 }
333}
334
335impl Default for Web2Ppt {
336 fn default() -> Self {
337 Self::new()
338 }
339}
340
341#[cfg(feature = "web2ppt")]
343pub fn url_to_pptx(url: &str) -> Result<Vec<u8>> {
344 url_to_pptx_with_options(url, Web2PptConfig::default(), ConversionOptions::default())
345}
346
347#[cfg(feature = "web2ppt")]
349pub fn url_to_pptx_with_options(
350 url: &str,
351 config: Web2PptConfig,
352 options: ConversionOptions,
353) -> Result<Vec<u8>> {
354 use super::{WebFetcher, WebParser};
355
356 let fetcher = WebFetcher::with_config(config.clone())?;
358 let html = fetcher.fetch(url)?;
359
360 let parser = WebParser::with_config(config.clone());
362 let content = parser.parse(&html, url)?;
363
364 let converter = Web2Ppt::with_config(config);
366 converter.convert(&content, &options)
367}
368
369pub fn html_to_pptx(html: &str, url: &str) -> Result<Vec<u8>> {
371 html_to_pptx_with_options(html, url, Web2PptConfig::default(), ConversionOptions::default())
372}
373
374pub fn html_to_pptx_with_options(
376 html: &str,
377 url: &str,
378 config: Web2PptConfig,
379 options: ConversionOptions,
380) -> Result<Vec<u8>> {
381 use super::WebParser;
382
383 let parser = WebParser::with_config(config.clone());
385 let content = parser.parse(html, url)?;
386
387 let converter = Web2Ppt::with_config(config);
389 converter.convert(&content, &options)
390}
391
392#[cfg(test)]
393mod tests {
394 use super::*;
395
396 #[test]
397 fn test_conversion_options() {
398 let options = ConversionOptions::new()
399 .title("Custom Title")
400 .author("Test Author")
401 .with_source_url(false);
402
403 assert_eq!(options.title, Some("Custom Title".to_string()));
404 assert_eq!(options.author, Some("Test Author".to_string()));
405 assert!(!options.include_source_url);
406 }
407
408 #[test]
409 fn test_html_to_pptx() {
410 let html = r#"
411 <!DOCTYPE html>
412 <html>
413 <head><title>Test Page</title></head>
414 <body>
415 <h1>Main Title</h1>
416 <p>This is a paragraph with enough text to be included in the presentation.</p>
417 <h2>Section 1</h2>
418 <p>Section 1 content with enough text to be included in the presentation.</p>
419 <ul>
420 <li>Item 1</li>
421 <li>Item 2</li>
422 </ul>
423 </body>
424 </html>
425 "#;
426
427 let result = html_to_pptx(html, "https://example.com");
428 assert!(result.is_ok());
429
430 let pptx = result.unwrap();
431 assert!(!pptx.is_empty());
432 }
433
434 #[test]
435 fn test_web2ppt_config() {
436 let config = Web2PptConfig::new()
437 .max_slides(5)
438 .max_bullets(3);
439
440 let converter = Web2Ppt::with_config(config);
441 assert_eq!(converter.config().max_slides, 5);
442 assert_eq!(converter.config().max_bullets_per_slide, 3);
443 }
444}