linch_docx_rs/document/
run.rs

1//! Run element (w:r) - a contiguous run of text with uniform formatting
2
3use crate::error::Result;
4use crate::xml::{get_w_val, parse_bool, RawXmlElement, RawXmlNode};
5use quick_xml::events::{BytesEnd, BytesStart, BytesText, Event};
6use quick_xml::{Reader, Writer};
7use std::io::BufRead;
8
9/// Run element (w:r)
10#[derive(Clone, Debug, Default)]
11pub struct Run {
12    /// Run properties
13    pub properties: Option<RunProperties>,
14    /// Run content
15    pub content: Vec<RunContent>,
16    /// Unknown attributes (preserved)
17    pub unknown_attrs: Vec<(String, String)>,
18    /// Unknown children (preserved)
19    pub unknown_children: Vec<RawXmlNode>,
20}
21
22/// Content within a run
23#[derive(Clone, Debug)]
24pub enum RunContent {
25    /// Text (w:t)
26    Text(String),
27    /// Tab (w:tab)
28    Tab,
29    /// Break (w:br)
30    Break(BreakType),
31    /// Carriage return (w:cr)
32    CarriageReturn,
33    /// Soft hyphen
34    SoftHyphen,
35    /// Non-breaking hyphen
36    NoBreakHyphen,
37    /// Unknown (preserved)
38    Unknown(RawXmlNode),
39}
40
41/// Break type
42#[derive(Clone, Debug, Default)]
43pub enum BreakType {
44    #[default]
45    TextWrapping,
46    Page,
47    Column,
48}
49
50/// Run properties (w:rPr)
51#[derive(Clone, Debug, Default)]
52pub struct RunProperties {
53    /// Style ID
54    pub style: Option<String>,
55    /// Bold
56    pub bold: Option<bool>,
57    /// Italic
58    pub italic: Option<bool>,
59    /// Underline type
60    pub underline: Option<String>,
61    /// Strike-through
62    pub strike: Option<bool>,
63    /// Double strike-through
64    pub double_strike: Option<bool>,
65    /// Font size (in half-points, e.g., 24 = 12pt)
66    pub size: Option<u32>,
67    /// Color (RGB hex)
68    pub color: Option<String>,
69    /// Highlight color
70    pub highlight: Option<String>,
71    /// Font (ASCII)
72    pub font_ascii: Option<String>,
73    /// Font (East Asia)
74    pub font_east_asia: Option<String>,
75    /// Vertical alignment (superscript/subscript)
76    pub vertical_align: Option<String>,
77    /// Unknown children (preserved)
78    pub unknown_children: Vec<RawXmlNode>,
79}
80
81impl Run {
82    /// Parse from reader (after w:r start tag)
83    pub fn from_reader<R: BufRead>(reader: &mut Reader<R>, start: &BytesStart) -> Result<Self> {
84        let mut run = Run::default();
85
86        // Parse attributes
87        for attr in start.attributes().filter_map(|a| a.ok()) {
88            let key = String::from_utf8_lossy(attr.key.as_ref()).to_string();
89            let value = String::from_utf8_lossy(&attr.value).to_string();
90            run.unknown_attrs.push((key, value));
91        }
92
93        let mut buf = Vec::new();
94
95        loop {
96            match reader.read_event_into(&mut buf)? {
97                Event::Start(e) => {
98                    let name = e.name();
99                    let local = name.local_name();
100
101                    match local.as_ref() {
102                        b"rPr" => {
103                            run.properties = Some(RunProperties::from_reader(reader)?);
104                        }
105                        b"t" => {
106                            // Read text content
107                            let text = read_text_content(reader)?;
108                            run.content.push(RunContent::Text(text));
109                        }
110                        _ => {
111                            // Unknown - preserve
112                            let raw = RawXmlElement::from_reader(reader, &e)?;
113                            run.content
114                                .push(RunContent::Unknown(RawXmlNode::Element(raw)));
115                        }
116                    }
117                }
118                Event::Empty(e) => {
119                    let name = e.name();
120                    let local = name.local_name();
121
122                    match local.as_ref() {
123                        b"t" => {
124                            // Empty text element
125                            run.content.push(RunContent::Text(String::new()));
126                        }
127                        b"tab" => {
128                            run.content.push(RunContent::Tab);
129                        }
130                        b"br" => {
131                            let break_type = match crate::xml::get_attr(&e, "w:type")
132                                .or_else(|| crate::xml::get_attr(&e, "type"))
133                                .as_deref()
134                            {
135                                Some("page") => BreakType::Page,
136                                Some("column") => BreakType::Column,
137                                _ => BreakType::TextWrapping,
138                            };
139                            run.content.push(RunContent::Break(break_type));
140                        }
141                        b"cr" => {
142                            run.content.push(RunContent::CarriageReturn);
143                        }
144                        b"softHyphen" => {
145                            run.content.push(RunContent::SoftHyphen);
146                        }
147                        b"noBreakHyphen" => {
148                            run.content.push(RunContent::NoBreakHyphen);
149                        }
150                        _ => {
151                            // Unknown - preserve
152                            let raw = RawXmlElement {
153                                name: String::from_utf8_lossy(e.name().as_ref()).to_string(),
154                                attributes: e
155                                    .attributes()
156                                    .filter_map(|a| a.ok())
157                                    .map(|a| {
158                                        (
159                                            String::from_utf8_lossy(a.key.as_ref()).to_string(),
160                                            String::from_utf8_lossy(&a.value).to_string(),
161                                        )
162                                    })
163                                    .collect(),
164                                children: Vec::new(),
165                                self_closing: true,
166                            };
167                            run.content
168                                .push(RunContent::Unknown(RawXmlNode::Element(raw)));
169                        }
170                    }
171                }
172                Event::End(e) => {
173                    if e.name().local_name().as_ref() == b"r" {
174                        break;
175                    }
176                }
177                Event::Eof => break,
178                _ => {}
179            }
180            buf.clear();
181        }
182
183        Ok(run)
184    }
185
186    /// Create from empty element
187    pub fn from_empty(start: &BytesStart) -> Result<Self> {
188        let mut run = Run::default();
189
190        for attr in start.attributes().filter_map(|a| a.ok()) {
191            let key = String::from_utf8_lossy(attr.key.as_ref()).to_string();
192            let value = String::from_utf8_lossy(&attr.value).to_string();
193            run.unknown_attrs.push((key, value));
194        }
195
196        Ok(run)
197    }
198
199    /// Get all text in this run
200    pub fn text(&self) -> String {
201        let mut result = String::new();
202        for content in &self.content {
203            match content {
204                RunContent::Text(t) => result.push_str(t),
205                RunContent::Tab => result.push('\t'),
206                RunContent::Break(BreakType::TextWrapping) => result.push('\n'),
207                RunContent::CarriageReturn => result.push('\n'),
208                _ => {}
209            }
210        }
211        result
212    }
213
214    /// Check if bold
215    pub fn bold(&self) -> bool {
216        self.properties
217            .as_ref()
218            .and_then(|p| p.bold)
219            .unwrap_or(false)
220    }
221
222    /// Check if italic
223    pub fn italic(&self) -> bool {
224        self.properties
225            .as_ref()
226            .and_then(|p| p.italic)
227            .unwrap_or(false)
228    }
229
230    /// Get font size in points (None if not specified)
231    pub fn font_size_pt(&self) -> Option<f32> {
232        self.properties.as_ref()?.size.map(|s| s as f32 / 2.0)
233    }
234
235    /// Get color (RGB hex string)
236    pub fn color(&self) -> Option<&str> {
237        self.properties.as_ref()?.color.as_deref()
238    }
239
240    /// Get underline type
241    pub fn underline(&self) -> Option<&str> {
242        self.properties.as_ref()?.underline.as_deref()
243    }
244
245    /// Check if has strike-through
246    pub fn strike(&self) -> bool {
247        self.properties
248            .as_ref()
249            .and_then(|p| p.strike)
250            .unwrap_or(false)
251    }
252
253    /// Write to XML writer
254    pub fn write_to<W: std::io::Write>(&self, writer: &mut Writer<W>) -> Result<()> {
255        let mut start = BytesStart::new("w:r");
256        for (key, value) in &self.unknown_attrs {
257            start.push_attribute((key.as_str(), value.as_str()));
258        }
259
260        // Check if run is empty (no properties, no content, no unknown children)
261        let is_empty = self.properties.is_none()
262            && self.content.is_empty()
263            && self.unknown_children.is_empty();
264
265        if is_empty {
266            writer.write_event(Event::Empty(start))?;
267        } else {
268            writer.write_event(Event::Start(start))?;
269
270            // Write properties
271            if let Some(props) = &self.properties {
272                props.write_to(writer)?;
273            }
274
275            // Write content
276            for content in &self.content {
277                content.write_to(writer)?;
278            }
279
280            // Write unknown children
281            for child in &self.unknown_children {
282                child.write_to(writer)?;
283            }
284
285            writer.write_event(Event::End(BytesEnd::new("w:r")))?;
286        }
287
288        Ok(())
289    }
290
291    /// Create a new run with text
292    pub fn new(text: impl Into<String>) -> Self {
293        Run {
294            content: vec![RunContent::Text(text.into())],
295            ..Default::default()
296        }
297    }
298
299    /// Set bold
300    pub fn set_bold(&mut self, bold: bool) {
301        self.properties.get_or_insert_with(Default::default).bold = Some(bold);
302    }
303
304    /// Set italic
305    pub fn set_italic(&mut self, italic: bool) {
306        self.properties.get_or_insert_with(Default::default).italic = Some(italic);
307    }
308
309    /// Set font size in points
310    pub fn set_font_size_pt(&mut self, size: f32) {
311        self.properties.get_or_insert_with(Default::default).size = Some((size * 2.0) as u32);
312    }
313
314    /// Set color (RGB hex string)
315    pub fn set_color(&mut self, color: impl Into<String>) {
316        self.properties.get_or_insert_with(Default::default).color = Some(color.into());
317    }
318}
319
320impl RunContent {
321    /// Write to XML writer
322    pub fn write_to<W: std::io::Write>(&self, writer: &mut Writer<W>) -> Result<()> {
323        match self {
324            RunContent::Text(text) => {
325                let mut start = BytesStart::new("w:t");
326                // Preserve space if text has leading/trailing whitespace
327                if text.starts_with(' ') || text.ends_with(' ') || text.contains("  ") {
328                    start.push_attribute(("xml:space", "preserve"));
329                }
330                writer.write_event(Event::Start(start))?;
331                writer.write_event(Event::Text(BytesText::new(text)))?;
332                writer.write_event(Event::End(BytesEnd::new("w:t")))?;
333            }
334            RunContent::Tab => {
335                writer.write_event(Event::Empty(BytesStart::new("w:tab")))?;
336            }
337            RunContent::Break(break_type) => {
338                let mut start = BytesStart::new("w:br");
339                match break_type {
340                    BreakType::Page => start.push_attribute(("w:type", "page")),
341                    BreakType::Column => start.push_attribute(("w:type", "column")),
342                    BreakType::TextWrapping => {}
343                }
344                writer.write_event(Event::Empty(start))?;
345            }
346            RunContent::CarriageReturn => {
347                writer.write_event(Event::Empty(BytesStart::new("w:cr")))?;
348            }
349            RunContent::SoftHyphen => {
350                writer.write_event(Event::Empty(BytesStart::new("w:softHyphen")))?;
351            }
352            RunContent::NoBreakHyphen => {
353                writer.write_event(Event::Empty(BytesStart::new("w:noBreakHyphen")))?;
354            }
355            RunContent::Unknown(node) => {
356                node.write_to(writer)?;
357            }
358        }
359        Ok(())
360    }
361}
362
363impl RunProperties {
364    /// Parse from reader (after w:rPr start tag)
365    pub fn from_reader<R: BufRead>(reader: &mut Reader<R>) -> Result<Self> {
366        let mut props = RunProperties::default();
367        let mut buf = Vec::new();
368
369        loop {
370            match reader.read_event_into(&mut buf)? {
371                Event::Start(e) => {
372                    let name = e.name();
373                    let local = name.local_name();
374
375                    match local.as_ref() {
376                        b"rFonts" => {
377                            // Read font info then skip
378                            props.font_ascii = crate::xml::get_attr(&e, "w:ascii")
379                                .or_else(|| crate::xml::get_attr(&e, "ascii"));
380                            props.font_east_asia = crate::xml::get_attr(&e, "w:eastAsia")
381                                .or_else(|| crate::xml::get_attr(&e, "eastAsia"));
382                            // Skip to end
383                            skip_element(reader, &e)?;
384                        }
385                        _ => {
386                            // Unknown - preserve
387                            let raw = RawXmlElement::from_reader(reader, &e)?;
388                            props.unknown_children.push(RawXmlNode::Element(raw));
389                        }
390                    }
391                }
392                Event::Empty(e) => {
393                    let name = e.name();
394                    let local = name.local_name();
395
396                    match local.as_ref() {
397                        b"rStyle" => {
398                            props.style = get_w_val(&e);
399                        }
400                        b"b" => {
401                            props.bold = Some(parse_bool(&e));
402                        }
403                        b"bCs" => {
404                            // Complex script bold - ignore for now
405                        }
406                        b"i" => {
407                            props.italic = Some(parse_bool(&e));
408                        }
409                        b"iCs" => {
410                            // Complex script italic - ignore for now
411                        }
412                        b"u" => {
413                            props.underline = get_w_val(&e).or(Some("single".into()));
414                        }
415                        b"strike" => {
416                            props.strike = Some(parse_bool(&e));
417                        }
418                        b"dstrike" => {
419                            props.double_strike = Some(parse_bool(&e));
420                        }
421                        b"sz" => {
422                            props.size = get_w_val(&e).and_then(|v| v.parse().ok());
423                        }
424                        b"szCs" => {
425                            // Complex script size - ignore for now
426                        }
427                        b"color" => {
428                            props.color = get_w_val(&e);
429                        }
430                        b"highlight" => {
431                            props.highlight = get_w_val(&e);
432                        }
433                        b"vertAlign" => {
434                            props.vertical_align = get_w_val(&e);
435                        }
436                        b"rFonts" => {
437                            props.font_ascii = crate::xml::get_attr(&e, "w:ascii")
438                                .or_else(|| crate::xml::get_attr(&e, "ascii"));
439                            props.font_east_asia = crate::xml::get_attr(&e, "w:eastAsia")
440                                .or_else(|| crate::xml::get_attr(&e, "eastAsia"));
441                        }
442                        _ => {
443                            // Unknown - preserve
444                            let raw = RawXmlElement {
445                                name: String::from_utf8_lossy(e.name().as_ref()).to_string(),
446                                attributes: e
447                                    .attributes()
448                                    .filter_map(|a| a.ok())
449                                    .map(|a| {
450                                        (
451                                            String::from_utf8_lossy(a.key.as_ref()).to_string(),
452                                            String::from_utf8_lossy(&a.value).to_string(),
453                                        )
454                                    })
455                                    .collect(),
456                                children: Vec::new(),
457                                self_closing: true,
458                            };
459                            props.unknown_children.push(RawXmlNode::Element(raw));
460                        }
461                    }
462                }
463                Event::End(e) => {
464                    if e.name().local_name().as_ref() == b"rPr" {
465                        break;
466                    }
467                }
468                Event::Eof => break,
469                _ => {}
470            }
471            buf.clear();
472        }
473
474        Ok(props)
475    }
476
477    /// Write to XML writer
478    pub fn write_to<W: std::io::Write>(&self, writer: &mut Writer<W>) -> Result<()> {
479        // Check if there are any properties to write
480        let has_content = self.style.is_some()
481            || self.bold.is_some()
482            || self.italic.is_some()
483            || self.underline.is_some()
484            || self.strike.is_some()
485            || self.double_strike.is_some()
486            || self.size.is_some()
487            || self.color.is_some()
488            || self.highlight.is_some()
489            || self.font_ascii.is_some()
490            || self.vertical_align.is_some()
491            || !self.unknown_children.is_empty();
492
493        if !has_content {
494            return Ok(());
495        }
496
497        writer.write_event(Event::Start(BytesStart::new("w:rPr")))?;
498
499        // Style
500        if let Some(style) = &self.style {
501            let mut elem = BytesStart::new("w:rStyle");
502            elem.push_attribute(("w:val", style.as_str()));
503            writer.write_event(Event::Empty(elem))?;
504        }
505
506        // Fonts
507        if self.font_ascii.is_some() || self.font_east_asia.is_some() {
508            let mut elem = BytesStart::new("w:rFonts");
509            if let Some(font) = &self.font_ascii {
510                elem.push_attribute(("w:ascii", font.as_str()));
511            }
512            if let Some(font) = &self.font_east_asia {
513                elem.push_attribute(("w:eastAsia", font.as_str()));
514            }
515            writer.write_event(Event::Empty(elem))?;
516        }
517
518        // Bold
519        if let Some(bold) = self.bold {
520            let mut elem = BytesStart::new("w:b");
521            if !bold {
522                elem.push_attribute(("w:val", "0"));
523            }
524            writer.write_event(Event::Empty(elem))?;
525        }
526
527        // Italic
528        if let Some(italic) = self.italic {
529            let mut elem = BytesStart::new("w:i");
530            if !italic {
531                elem.push_attribute(("w:val", "0"));
532            }
533            writer.write_event(Event::Empty(elem))?;
534        }
535
536        // Strike
537        if let Some(strike) = self.strike {
538            let mut elem = BytesStart::new("w:strike");
539            if !strike {
540                elem.push_attribute(("w:val", "0"));
541            }
542            writer.write_event(Event::Empty(elem))?;
543        }
544
545        // Double strike
546        if let Some(dstrike) = self.double_strike {
547            let mut elem = BytesStart::new("w:dstrike");
548            if !dstrike {
549                elem.push_attribute(("w:val", "0"));
550            }
551            writer.write_event(Event::Empty(elem))?;
552        }
553
554        // Underline
555        if let Some(underline) = &self.underline {
556            let mut elem = BytesStart::new("w:u");
557            elem.push_attribute(("w:val", underline.as_str()));
558            writer.write_event(Event::Empty(elem))?;
559        }
560
561        // Color
562        if let Some(color) = &self.color {
563            let mut elem = BytesStart::new("w:color");
564            elem.push_attribute(("w:val", color.as_str()));
565            writer.write_event(Event::Empty(elem))?;
566        }
567
568        // Size
569        if let Some(size) = self.size {
570            let mut elem = BytesStart::new("w:sz");
571            elem.push_attribute(("w:val", size.to_string().as_str()));
572            writer.write_event(Event::Empty(elem))?;
573        }
574
575        // Highlight
576        if let Some(highlight) = &self.highlight {
577            let mut elem = BytesStart::new("w:highlight");
578            elem.push_attribute(("w:val", highlight.as_str()));
579            writer.write_event(Event::Empty(elem))?;
580        }
581
582        // Vertical align
583        if let Some(valign) = &self.vertical_align {
584            let mut elem = BytesStart::new("w:vertAlign");
585            elem.push_attribute(("w:val", valign.as_str()));
586            writer.write_event(Event::Empty(elem))?;
587        }
588
589        // Unknown children
590        for child in &self.unknown_children {
591            child.write_to(writer)?;
592        }
593
594        writer.write_event(Event::End(BytesEnd::new("w:rPr")))?;
595        Ok(())
596    }
597}
598
599/// Read text content from w:t element
600fn read_text_content<R: BufRead>(reader: &mut Reader<R>) -> Result<String> {
601    let mut text = String::new();
602    let mut buf = Vec::new();
603
604    loop {
605        match reader.read_event_into(&mut buf)? {
606            Event::Text(t) => {
607                text.push_str(&t.unescape()?);
608            }
609            Event::End(e) => {
610                if e.name().local_name().as_ref() == b"t" {
611                    break;
612                }
613            }
614            Event::Eof => break,
615            _ => {}
616        }
617        buf.clear();
618    }
619
620    Ok(text)
621}
622
623/// Skip to end of element
624fn skip_element<R: BufRead>(reader: &mut Reader<R>, start: &BytesStart) -> Result<()> {
625    let target_name = start.name().as_ref().to_vec();
626    let mut depth = 1;
627    let mut buf = Vec::new();
628
629    loop {
630        match reader.read_event_into(&mut buf)? {
631            Event::Start(e) if e.name().as_ref() == target_name => depth += 1,
632            Event::End(e) if e.name().as_ref() == target_name => {
633                depth -= 1;
634                if depth == 0 {
635                    break;
636                }
637            }
638            Event::Eof => break,
639            _ => {}
640        }
641        buf.clear();
642    }
643
644    Ok(())
645}