ruvector_scipix/output/
docx.rs1use super::{LineData, OcrResult};
12use std::io::Write;
13
14#[allow(dead_code)]
16pub struct DocxFormatter {
17 include_styles: bool,
18 page_size: PageSize,
19 margins: Margins,
20}
21
22#[derive(Debug, Clone, Copy)]
23pub struct PageSize {
24 pub width: u32, pub height: u32,
26}
27
28impl PageSize {
29 pub fn letter() -> Self {
30 Self {
31 width: 12240, height: 15840, }
34 }
35
36 pub fn a4() -> Self {
37 Self {
38 width: 11906, height: 16838, }
41 }
42}
43
44#[derive(Debug, Clone, Copy)]
45pub struct Margins {
46 pub top: u32,
47 pub right: u32,
48 pub bottom: u32,
49 pub left: u32,
50}
51
52impl Margins {
53 pub fn normal() -> Self {
54 Self {
55 top: 1440, right: 1440,
57 bottom: 1440,
58 left: 1440,
59 }
60 }
61}
62
63impl DocxFormatter {
64 pub fn new() -> Self {
65 Self {
66 include_styles: true,
67 page_size: PageSize::letter(),
68 margins: Margins::normal(),
69 }
70 }
71
72 pub fn with_page_size(mut self, page_size: PageSize) -> Self {
73 self.page_size = page_size;
74 self
75 }
76
77 pub fn with_margins(mut self, margins: Margins) -> Self {
78 self.margins = margins;
79 self
80 }
81
82 pub fn latex_to_mathml(&self, latex: &str) -> String {
85 format!(
88 r#"<m:oMathPara>
89 <m:oMath>
90 <m:r>
91 <m:t>{}</m:t>
92 </m:r>
93 </m:oMath>
94</m:oMathPara>"#,
95 self.escape_xml(latex)
96 )
97 }
98
99 pub fn generate_document_xml(&self, lines: &[LineData]) -> String {
101 let mut xml = String::from(r#"<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
102<w:document xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main"
103 xmlns:m="http://schemas.openxmlformats.org/officeDocument/2006/math">
104 <w:body>
105"#);
106
107 for line in lines {
108 xml.push_str(&self.format_line(line));
109 }
110
111 xml.push_str(" </w:body>\n</w:document>");
112 xml
113 }
114
115 fn format_line(&self, line: &LineData) -> String {
116 match line.line_type.as_str() {
117 "text" => self.format_paragraph(&line.text),
118 "math" | "equation" => {
119 let latex = line.latex.as_ref().unwrap_or(&line.text);
120 self.format_math(latex)
121 }
122 "heading" => self.format_heading(&line.text, 1),
123 _ => self.format_paragraph(&line.text),
124 }
125 }
126
127 fn format_paragraph(&self, text: &str) -> String {
128 format!(
129 r#" <w:p>
130 <w:r>
131 <w:t>{}</w:t>
132 </w:r>
133 </w:p>
134"#,
135 self.escape_xml(text)
136 )
137 }
138
139 fn format_heading(&self, text: &str, level: u32) -> String {
140 format!(
141 r#" <w:p>
142 <w:pPr>
143 <w:pStyle w:val="Heading{}"/>
144 </w:pPr>
145 <w:r>
146 <w:t>{}</w:t>
147 </w:r>
148 </w:p>
149"#,
150 level,
151 self.escape_xml(text)
152 )
153 }
154
155 fn format_math(&self, latex: &str) -> String {
156 let mathml = self.latex_to_mathml(latex);
157 format!(
158 r#" <w:p>
159 <w:r>
160 {}
161 </w:r>
162 </w:p>
163"#,
164 mathml
165 )
166 }
167
168 fn escape_xml(&self, text: &str) -> String {
169 text.replace('&', "&")
170 .replace('<', "<")
171 .replace('>', ">")
172 .replace('"', """)
173 .replace('\'', "'")
174 }
175
176 pub fn save_to_file<W: Write>(
178 &self,
179 _writer: &mut W,
180 _result: &OcrResult,
181 ) -> Result<(), String> {
182 Err("DOCX binary format generation not implemented. Use docx-rs library for full implementation.".to_string())
183 }
184
185 pub fn generate_styles_xml(&self) -> String {
187 r#"<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
188<w:styles xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main">
189 <w:style w:type="paragraph" w:styleId="Normal">
190 <w:name w:val="Normal"/>
191 <w:qFormat/>
192 </w:style>
193 <w:style w:type="paragraph" w:styleId="Heading1">
194 <w:name w:val="Heading 1"/>
195 <w:basedOn w:val="Normal"/>
196 <w:qFormat/>
197 <w:pPr>
198 <w:keepNext/>
199 <w:keepLines/>
200 </w:pPr>
201 <w:rPr>
202 <w:b/>
203 <w:sz w:val="32"/>
204 </w:rPr>
205 </w:style>
206</w:styles>"#.to_string()
207 }
208}
209
210impl Default for DocxFormatter {
211 fn default() -> Self {
212 Self::new()
213 }
214}
215
216#[cfg(test)]
217mod tests {
218 use super::*;
219 use crate::output::BoundingBox;
220
221 #[test]
222 fn test_page_sizes() {
223 let letter = PageSize::letter();
224 assert_eq!(letter.width, 12240);
225
226 let a4 = PageSize::a4();
227 assert!(a4.width < letter.width);
228 }
229
230 #[test]
231 fn test_escape_xml() {
232 let formatter = DocxFormatter::new();
233 let result = formatter.escape_xml("Test <tag> & \"quote\"");
234
235 assert!(result.contains("<"));
236 assert!(result.contains(">"));
237 assert!(result.contains("&"));
238 assert!(result.contains("""));
239 }
240
241 #[test]
242 fn test_format_paragraph() {
243 let formatter = DocxFormatter::new();
244 let result = formatter.format_paragraph("Hello World");
245
246 assert!(result.contains("<w:p>"));
247 assert!(result.contains("<w:t>Hello World</w:t>"));
248 }
249
250 #[test]
251 fn test_format_heading() {
252 let formatter = DocxFormatter::new();
253 let result = formatter.format_heading("Chapter 1", 1);
254
255 assert!(result.contains("Heading1"));
256 assert!(result.contains("Chapter 1"));
257 }
258
259 #[test]
260 fn test_latex_to_mathml() {
261 let formatter = DocxFormatter::new();
262 let result = formatter.latex_to_mathml("E = mc^2");
263
264 assert!(result.contains("<m:oMath>"));
265 assert!(result.contains("mc^2"));
266 }
267
268 #[test]
269 fn test_generate_document_xml() {
270 let formatter = DocxFormatter::new();
271 let lines = vec![
272 LineData {
273 line_type: "text".to_string(),
274 text: "Hello".to_string(),
275 latex: None,
276 bbox: BoundingBox::new(0.0, 0.0, 100.0, 20.0),
277 confidence: 0.95,
278 words: None,
279 },
280 ];
281
282 let xml = formatter.generate_document_xml(&lines);
283 assert!(xml.contains("<?xml"));
284 assert!(xml.contains("<w:document"));
285 assert!(xml.contains("Hello"));
286 }
287
288 #[test]
289 fn test_generate_styles_xml() {
290 let formatter = DocxFormatter::new();
291 let xml = formatter.generate_styles_xml();
292
293 assert!(xml.contains("<w:styles"));
294 assert!(xml.contains("Normal"));
295 assert!(xml.contains("Heading 1"));
296 }
297}