Skip to main content

ppt_rs/web2ppt/
converter.rs

1//! Converter from web content to PowerPoint
2
3use super::{Web2PptError, Result, Web2PptConfig, WebContent, ContentType};
4use crate::{create_pptx_with_content, SlideContent, SlideLayout};
5
6/// Safely truncate text at char boundary
7fn truncate_text(text: &str, max_len: usize) -> String {
8    if text.len() <= max_len {
9        return text.to_string();
10    }
11    
12    // Find a safe truncation point
13    let mut end = max_len;
14    while end > 0 && !text.is_char_boundary(end) {
15        end -= 1;
16    }
17    
18    // Try to break at word boundary
19    if let Some(last_space) = text[..end].rfind(' ') {
20        if last_space > max_len / 2 {
21            end = last_space;
22        }
23    }
24    
25    format!("{}...", &text[..end].trim_end())
26}
27
28/// Options for conversion
29#[derive(Clone, Debug)]
30pub struct ConversionOptions {
31    /// Presentation title (overrides page title)
32    pub title: Option<String>,
33    /// Author name
34    pub author: Option<String>,
35    /// Add source URL to slides
36    pub include_source_url: bool,
37    /// Add page numbers
38    pub add_page_numbers: bool,
39}
40
41impl Default for ConversionOptions {
42    fn default() -> Self {
43        ConversionOptions {
44            title: None,
45            author: None,
46            include_source_url: true,
47            add_page_numbers: false,
48        }
49    }
50}
51
52impl ConversionOptions {
53    /// Create new options
54    pub fn new() -> Self {
55        Self::default()
56    }
57
58    /// Set custom title
59    pub fn title(mut self, title: &str) -> Self {
60        self.title = Some(title.to_string());
61        self
62    }
63
64    /// Set author
65    pub fn author(mut self, author: &str) -> Self {
66        self.author = Some(author.to_string());
67        self
68    }
69
70    /// Include source URL
71    pub fn with_source_url(mut self, include: bool) -> Self {
72        self.include_source_url = include;
73        self
74    }
75
76    /// Add page numbers
77    pub fn with_page_numbers(mut self, add: bool) -> Self {
78        self.add_page_numbers = add;
79        self
80    }
81}
82
83/// Web to PowerPoint converter
84pub struct Web2Ppt {
85    config: Web2PptConfig,
86}
87
88impl Web2Ppt {
89    /// Create a new converter with default config
90    pub fn new() -> Self {
91        Self::with_config(Web2PptConfig::default())
92    }
93
94    /// Create a new converter with custom config
95    pub fn with_config(config: Web2PptConfig) -> Self {
96        Web2Ppt { config }
97    }
98
99    /// Convert web content to PowerPoint bytes
100    pub fn convert(&self, content: &WebContent, options: &ConversionOptions) -> Result<Vec<u8>> {
101        let slides = self.build_slides(content, options)?;
102        let title = options.title.as_ref().unwrap_or(&content.title);
103
104        create_pptx_with_content(title, slides)
105            .map_err(|e| Web2PptError::GenerationError(e.to_string()))
106    }
107
108    /// Build slides from web content
109    fn build_slides(&self, content: &WebContent, options: &ConversionOptions) -> Result<Vec<SlideContent>> {
110        let mut slides = Vec::new();
111
112        // Title slide
113        let title = options.title.as_ref().unwrap_or(&content.title);
114        let mut title_slide = SlideContent::new(title)
115            .layout(SlideLayout::CenteredTitle);
116
117        if let Some(desc) = &content.description {
118            title_slide = title_slide.add_bullet(desc);
119        }
120
121        if options.include_source_url {
122            title_slide = title_slide.add_bullet(&format!("Source: {}", content.url));
123        }
124
125        slides.push(title_slide);
126
127        // Content slides
128        if self.config.group_by_headings {
129            self.build_grouped_slides(content, &mut slides)?;
130        } else {
131            self.build_linear_slides(content, &mut slides)?;
132        }
133
134        // Limit slides
135        if slides.len() > self.config.max_slides {
136            slides.truncate(self.config.max_slides);
137        }
138
139        Ok(slides)
140    }
141
142    /// Build slides grouped by headings
143    fn build_grouped_slides(&self, content: &WebContent, slides: &mut Vec<SlideContent>) -> Result<()> {
144        let groups = content.grouped_by_headings();
145
146        // If no groups found, fall back to linear mode
147        if groups.is_empty() {
148            return self.build_linear_slides(content, slides);
149        }
150
151        for (heading, blocks) in groups {
152            if slides.len() >= self.config.max_slides {
153                break;
154            }
155
156            let mut slide = SlideContent::new(&heading.text)
157                .layout(SlideLayout::TitleAndContent);
158
159            let mut bullet_count = 0;
160
161            for block in blocks {
162                if bullet_count >= self.config.max_bullets_per_slide {
163                    // Start a new slide for overflow
164                    slides.push(slide);
165                    slide = SlideContent::new(&format!("{} (cont.)", heading.text))
166                        .layout(SlideLayout::TitleAndContent);
167                    bullet_count = 0;
168
169                    if slides.len() >= self.config.max_slides {
170                        break;
171                    }
172                }
173
174                match &block.content_type {
175                    ContentType::Paragraph => {
176                        // Truncate long paragraphs - use char_indices for safe slicing
177                        let text = truncate_text(&block.text, 200);
178                        slide = slide.add_bullet(&text);
179                        bullet_count += 1;
180                    }
181                    ContentType::ListItem => {
182                        let text = truncate_text(&block.text, 180);
183                        slide = slide.add_bullet(&format!("• {}", text));
184                        bullet_count += 1;
185                    }
186                    ContentType::Quote => {
187                        let text = truncate_text(&block.text, 180);
188                        slide = slide.add_bullet(&format!("\"{}\"", text));
189                        bullet_count += 1;
190                    }
191                    ContentType::Code => {
192                        if self.config.include_code {
193                            let code = truncate_text(&block.text, 150);
194                            slide = slide.add_bullet(&format!("[Code] {}", code));
195                            bullet_count += 1;
196                        }
197                    }
198                    ContentType::Table(rows) => {
199                        if self.config.include_tables && !rows.is_empty() {
200                            let summary = format!("[Table: {} rows × {} cols]", 
201                                rows.len(), 
202                                rows.first().map(|r| r.len()).unwrap_or(0)
203                            );
204                            slide = slide.add_bullet(&summary);
205                            bullet_count += 1;
206                        }
207                    }
208                    ContentType::Image { alt, .. } => {
209                        if self.config.include_images && !alt.is_empty() {
210                            slide = slide.add_bullet(&format!("[Image: {}]", alt));
211                            bullet_count += 1;
212                        }
213                    }
214                    _ => {}
215                }
216            }
217
218            // Only add slide if it has content
219            if bullet_count > 0 {
220                slides.push(slide);
221            }
222        }
223
224        Ok(())
225    }
226
227    /// Build slides linearly (not grouped)
228    fn build_linear_slides(&self, content: &WebContent, slides: &mut Vec<SlideContent>) -> Result<()> {
229        let mut current_slide: Option<SlideContent> = None;
230        let mut bullet_count = 0;
231
232        // If no content blocks, create a slide with description
233        if content.blocks.is_empty() {
234            if let Some(desc) = &content.description {
235                let slide = SlideContent::new("Content")
236                    .layout(SlideLayout::TitleAndContent)
237                    .add_bullet(desc);
238                slides.push(slide);
239            }
240            return Ok(());
241        }
242
243        for block in &content.blocks {
244            if slides.len() >= self.config.max_slides {
245                break;
246            }
247
248            match &block.content_type {
249                ContentType::Title | ContentType::Heading(_) => {
250                    // Save current slide if it has content
251                    if let Some(slide) = current_slide.take() {
252                        if bullet_count > 0 {
253                            slides.push(slide);
254                        }
255                    }
256
257                    // Start new slide
258                    current_slide = Some(
259                        SlideContent::new(&block.text)
260                            .layout(SlideLayout::TitleAndContent)
261                    );
262                    bullet_count = 0;
263                }
264                ContentType::Paragraph => {
265                    // If no current slide, create one
266                    if current_slide.is_none() {
267                        current_slide = Some(
268                            SlideContent::new("Overview")
269                                .layout(SlideLayout::TitleAndContent)
270                        );
271                    }
272                    
273                    if let Some(ref mut slide) = current_slide {
274                        if bullet_count < self.config.max_bullets_per_slide {
275                            let text = truncate_text(&block.text, 200);
276                            *slide = slide.clone().add_bullet(&text);
277                            bullet_count += 1;
278                        } else {
279                            // Start new continuation slide
280                            slides.push(slide.clone());
281                            let title = slide.title.clone();
282                            *slide = SlideContent::new(&format!("{} (cont.)", title))
283                                .layout(SlideLayout::TitleAndContent);
284                            let text = truncate_text(&block.text, 200);
285                            *slide = slide.clone().add_bullet(&text);
286                            bullet_count = 1;
287                        }
288                    }
289                }
290                ContentType::ListItem => {
291                    if current_slide.is_none() {
292                        current_slide = Some(
293                            SlideContent::new("Key Points")
294                                .layout(SlideLayout::TitleAndContent)
295                        );
296                    }
297                    
298                    if let Some(ref mut slide) = current_slide {
299                        if bullet_count < self.config.max_bullets_per_slide {
300                            let text = truncate_text(&block.text, 180);
301                            *slide = slide.clone().add_bullet(&format!("• {}", text));
302                            bullet_count += 1;
303                        }
304                    }
305                }
306                ContentType::Quote => {
307                    if let Some(ref mut slide) = current_slide {
308                        if bullet_count < self.config.max_bullets_per_slide {
309                            let text = truncate_text(&block.text, 180);
310                            *slide = slide.clone().add_bullet(&format!("\"{}\"", text));
311                            bullet_count += 1;
312                        }
313                    }
314                }
315                _ => {}
316            }
317        }
318
319        // Save last slide
320        if let Some(slide) = current_slide {
321            if bullet_count > 0 {
322                slides.push(slide);
323            }
324        }
325
326        Ok(())
327    }
328
329    /// Get config
330    pub fn config(&self) -> &Web2PptConfig {
331        &self.config
332    }
333}
334
335impl Default for Web2Ppt {
336    fn default() -> Self {
337        Self::new()
338    }
339}
340
341/// High-level function to convert a URL to PPTX bytes
342#[cfg(feature = "web2ppt")]
343pub fn url_to_pptx(url: &str) -> Result<Vec<u8>> {
344    url_to_pptx_with_options(url, Web2PptConfig::default(), ConversionOptions::default())
345}
346
347/// High-level function to convert a URL to PPTX bytes with options
348#[cfg(feature = "web2ppt")]
349pub fn url_to_pptx_with_options(
350    url: &str,
351    config: Web2PptConfig,
352    options: ConversionOptions,
353) -> Result<Vec<u8>> {
354    use super::{WebFetcher, WebParser};
355
356    // Fetch
357    let fetcher = WebFetcher::with_config(config.clone())?;
358    let html = fetcher.fetch(url)?;
359
360    // Parse
361    let parser = WebParser::with_config(config.clone());
362    let content = parser.parse(&html, url)?;
363
364    // Convert
365    let converter = Web2Ppt::with_config(config);
366    converter.convert(&content, &options)
367}
368
369/// Convert HTML string to PPTX bytes
370pub fn html_to_pptx(html: &str, url: &str) -> Result<Vec<u8>> {
371    html_to_pptx_with_options(html, url, Web2PptConfig::default(), ConversionOptions::default())
372}
373
374/// Convert HTML string to PPTX bytes with options
375pub fn html_to_pptx_with_options(
376    html: &str,
377    url: &str,
378    config: Web2PptConfig,
379    options: ConversionOptions,
380) -> Result<Vec<u8>> {
381    use super::WebParser;
382
383    // Parse
384    let parser = WebParser::with_config(config.clone());
385    let content = parser.parse(html, url)?;
386
387    // Convert
388    let converter = Web2Ppt::with_config(config);
389    converter.convert(&content, &options)
390}
391
392#[cfg(test)]
393mod tests {
394    use super::*;
395
396    #[test]
397    fn test_conversion_options() {
398        let options = ConversionOptions::new()
399            .title("Custom Title")
400            .author("Test Author")
401            .with_source_url(false);
402
403        assert_eq!(options.title, Some("Custom Title".to_string()));
404        assert_eq!(options.author, Some("Test Author".to_string()));
405        assert!(!options.include_source_url);
406    }
407
408    #[test]
409    fn test_html_to_pptx() {
410        let html = r#"
411            <!DOCTYPE html>
412            <html>
413            <head><title>Test Page</title></head>
414            <body>
415                <h1>Main Title</h1>
416                <p>This is a paragraph with enough text to be included in the presentation.</p>
417                <h2>Section 1</h2>
418                <p>Section 1 content with enough text to be included in the presentation.</p>
419                <ul>
420                    <li>Item 1</li>
421                    <li>Item 2</li>
422                </ul>
423            </body>
424            </html>
425        "#;
426
427        let result = html_to_pptx(html, "https://example.com");
428        assert!(result.is_ok());
429
430        let pptx = result.unwrap();
431        assert!(!pptx.is_empty());
432    }
433
434    #[test]
435    fn test_web2ppt_config() {
436        let config = Web2PptConfig::new()
437            .max_slides(5)
438            .max_bullets(3);
439
440        let converter = Web2Ppt::with_config(config);
441        assert_eq!(converter.config().max_slides, 5);
442        assert_eq!(converter.config().max_bullets_per_slide, 3);
443    }
444}