docx_handlebars/
utils.rs

1//! 实用工具函数
2
3use crate::errors::DocxError;
4
5/// 验证 DOCX 文件格式
6/// 检查文件是否为有效的 ZIP 格式,并包含必需的 DOCX 文件结构
7pub fn validate_docx_format(file_data: &[u8]) -> Result<(), DocxError> {
8    // 检查文件大小
9    if file_data.len() < 22 {
10        return Err(DocxError::InvalidZipFormat);
11    }
12    
13    // 检查 ZIP 文件签名
14    // ZIP 文件的签名通常是 0x504B0304 (PK..) 或 0x504B0506 (PK.. 空文件)
15    // 或者 0x504B0708 (PK.. 分割压缩包)
16    let signature = u32::from_le_bytes([
17        file_data[0], file_data[1], file_data[2], file_data[3]
18    ]);
19    
20    match signature {
21        0x04034b50 | 0x06054b50 | 0x08074b50 => {
22            // 有效的 ZIP 签名
23        },
24        _ => return Err(DocxError::InvalidZipFormat),
25    }
26    
27    Ok(())
28}
29
30/// XML 转义符转换成正常字符
31// pub fn xml_escape_to_normal(xml_content: String) -> String {
32//     xml_content
33//         .replace("&lt;", "<")
34//         .replace("&gt;", ">")
35//         .replace("&amp;", "&")
36//         .replace("&quot;", "\"")
37//         .replace("&apos;", "'")
38// }
39
40/// 合并被XML标签分割的Handlebars语法
41/// 这个函数会识别被分割的 Handlebars 表达式并将其合并
42pub fn merge_handlebars_in_xml(xml_content: String) -> Result<String, Box<dyn std::error::Error>> {
43    
44    // 判断如果 xml_content 中没有 { 字符, 则直接返回 xml_content 本身
45    if !xml_content.contains('{') {
46        return Ok(xml_content);
47    }
48    
49    let len = xml_content.len();
50    let chars = xml_content.chars();
51    
52    // 是否在尖括号中间
53    let mut is_in_angle = false;
54    
55    // 尖括号中间的内容
56    let mut angle_content = vec![];
57    // 尖括号外面的内容
58    let mut text_content = vec![];
59    
60    let mut contents: Vec<(String, u8)> = vec![];
61    
62    for c in chars.into_iter() {
63        
64        if c == '<' {
65            is_in_angle = true;
66            angle_content.push(c);
67            if text_content.len() > 0 {
68                contents.push((text_content.iter().collect::<String>(), 0_u8));
69                text_content = vec![];
70            }
71            continue;
72        }
73        
74        if c == '>' {
75            angle_content.push(c);
76            is_in_angle = false;
77            contents.push((angle_content.iter().collect::<String>(), 1_u8));
78            angle_content = vec![];
79            continue;
80        }
81        
82        if is_in_angle {
83            angle_content.push(c);
84            continue;
85        }
86        
87        text_content.push(c);
88        
89    }
90    
91    let mut result: String = String::with_capacity(len);
92    
93    // 大括号的数量
94    let mut in_braces_num: i32 = 0;
95    
96    for (i, (part, ty)) in contents
97      .iter()
98      .enumerate()
99    {
100        if *ty == 0_u8 {
101            // 如果上一个是开始标签, 下一个也是开始标签, 忽略
102            // 上一个是结束标签, 下一个也是结束标签, 忽略
103            if i > 0 && i < contents.len() - 1 && contents[i - 1].1 == 1_u8 && contents[i + 1].1 == 1_u8 {
104                if !contents[i - 1].0.starts_with("</") && !contents[i + 1].0.starts_with("</") {
105                    continue;
106                }
107                if contents[i - 1].0.starts_with("</") && contents[i + 1].0.starts_with("</") {
108                    continue;
109                }
110                // 如果上一个标签名称和下一个标签名称剔除掉 < 跟 </ 之后不相同, 忽略
111                
112                let prev_tag_name = contents[i - 1].0
113                  .trim_start_matches('<')
114                  .trim_end_matches('>')
115                  .split_whitespace()
116                  .next()
117                  .unwrap_or("");
118                
119                let next_tag_name = contents[i + 1].0
120                  .trim_start_matches('<')
121                  .trim_end_matches('>')
122                  .split_whitespace()
123                  .next()
124                  .unwrap_or("");
125                
126                if prev_tag_name == next_tag_name || prev_tag_name != next_tag_name.trim_start_matches('/') {
127                    continue;
128                }
129            }
130            
131            for c in part.chars() {
132                if c == '{' {
133                    in_braces_num += 1;
134                } else if c == '}' {
135                    in_braces_num -= 1;
136                }
137            }
138            result.push_str(&part);
139            continue;
140        }
141        if in_braces_num == 0 {
142            result.push_str(&part);
143            continue;
144        }
145    }
146    
147    Ok(result)
148}
149
150/// 注册基础的 Handlebars helper 函数
151pub fn register_basic_helpers(handlebars: &mut handlebars::Handlebars) -> Result<(), Box<dyn std::error::Error>> {
152    use handlebars::handlebars_helper;
153    use serde_json::Value;
154    
155    // 注册 eq helper (相等比较)
156    handlebars_helper!(eq: |x: Value, y: Value| x == y);
157    handlebars.register_helper("eq", Box::new(eq));
158    
159    // 注册 ne helper (不等比较)  
160    handlebars_helper!(ne: |x: Value, y: Value| x != y);
161    handlebars.register_helper("ne", Box::new(ne));
162    
163    // 注册 gt helper (大于)
164    handlebars_helper!(gt: |x: i64, y: i64| x > y);
165    handlebars.register_helper("gt", Box::new(gt));
166    
167    // 注册 lt helper (小于)
168    handlebars_helper!(lt: |x: i64, y: i64| x < y);
169    handlebars.register_helper("lt", Box::new(lt));
170    
171    // 注册 upper helper (转大写)
172    handlebars_helper!(upper: |s: String| s.to_uppercase());
173    handlebars.register_helper("upper", Box::new(upper));
174    
175    // 注册 lower helper (转小写)
176    handlebars_helper!(lower: |s: String| s.to_lowercase());
177    handlebars.register_helper("lower", Box::new(lower));
178    
179    // 注册 len helper (数组/字符串长度)
180    handlebars_helper!(len: |x: Value| {
181        match x {
182            Value::Array(arr) => arr.len(),
183            Value::String(s) => s.chars().count(),
184            Value::Object(obj) => obj.len(),
185            _ => 0
186        }
187    });
188    handlebars.register_helper("len", Box::new(len));
189    
190    Ok(())
191}
192
193/// 获取 PNG 图片的宽高
194fn get_png_dimensions(data: &[u8]) -> Option<(u32, u32)> {
195    if data.len() < 24 || &data[0..8] != b"\x89PNG\r\n\x1a\n" {
196        return None;
197    }
198    // IHDR chunk: starts at byte 8, width at 16~19, height at 20~23
199    let width = u32::from_be_bytes([data[16], data[17], data[18], data[19]]);
200    let height = u32::from_be_bytes([data[20], data[21], data[22], data[23]]);
201    Some((width, height))
202}
203
204/// 获取 JPEG 图片的宽高
205fn get_jpeg_dimensions(data: &[u8]) -> Option<(u16, u16)> {
206    let mut i = 2;
207    while i + 9 < data.len() {
208        if data[i] != 0xFF {
209            i += 1;
210            continue;
211        }
212        let marker = data[i + 1];
213        let len = u16::from_be_bytes([data[i + 2], data[i + 3]]) as usize;
214        if marker == 0xC0 || marker == 0xC2 {
215            // SOF0 or SOF2
216            let height = u16::from_be_bytes([data[i + 5], data[i + 6]]);
217            let width = u16::from_be_bytes([data[i + 7], data[i + 8]]);
218            return Some((width, height));
219        }
220        i += 2 + len;
221    }
222    None
223}
224
225/// 获取 WebP 图片的宽高
226fn get_webp_dimensions(data: &[u8]) -> Option<(u32, u32)> {
227    if data.len() < 30 || &data[0..4] != b"RIFF" || &data[8..12] != b"WEBP" {
228        return None;
229    }
230    // VP8X
231    if &data[12..16] == b"VP8X" {
232        let width = 1 + u32::from_le_bytes([data[24], data[25], data[26], 0]);
233        let height = 1 + u32::from_le_bytes([data[27], data[28], data[29], 0]);
234        return Some((width, height));
235    }
236    // VP8
237    if &data[12..15] == b"VP8" && data[15] == b' ' {
238        // Lossy
239        let width = u16::from_le_bytes([data[26], data[27]]) as u32;
240        let height = u16::from_le_bytes([data[28], data[29]]) as u32;
241        return Some((width, height));
242    }
243    // VP8L
244    if &data[12..16] == b"VP8L" {
245        let b = &data[21..25];
246        let width = 1 + (((b[1] & 0x3F) as u32) << 8 | b[0] as u32);
247        let height = 1 + (((b[3] & 0xF) as u32) << 10 | (b[2] as u32) << 2 | ((b[1] & 0xC0) as u32) >> 6);
248        return Some((width, height));
249    }
250    None
251}
252
253/// 获取 BMP 图片的宽高
254fn get_bmp_dimensions(data: &[u8]) -> Option<(u32, u32)> {
255    if data.len() < 26 || &data[0..2] != b"BM" {
256        return None;
257    }
258    // BMP 文件头后是 DIB 头,宽高在偏移 18~21(宽),22~25(高),都是 little-endian
259    let width = u32::from_le_bytes([data[18], data[19], data[20], data[21]]);
260    let height = u32::from_le_bytes([data[22], data[23], data[24], data[25]]);
261    Some((width, height))
262}
263
264fn get_tiff_dimensions(data: &[u8]) -> Option<(u32, u32)> {
265    if data.len() < 8 {
266        return None;
267    }
268    // 判断字节序
269    let le = &data[0..2] == b"II";
270    let be = &data[0..2] == b"MM";
271    if !le && !be {
272        return None;
273    }
274    let read_u16 = |d: &[u8]| if le {
275        u16::from_le_bytes([d[0], d[1]])
276    } else {
277        u16::from_be_bytes([d[0], d[1]])
278    };
279    let read_u32 = |d: &[u8]| if le {
280        u32::from_le_bytes([d[0], d[1], d[2], d[3]])
281    } else {
282        u32::from_be_bytes([d[0], d[1], d[2], d[3]])
283    };
284    // 检查 magic number
285    let magic = read_u16(&data[2..4]);
286    if magic != 42 {
287        return None;
288    }
289    // IFD 偏移
290    let ifd_offset = read_u32(&data[4..8]) as usize;
291    if data.len() < ifd_offset + 2 {
292        return None;
293    }
294    let num_dir = read_u16(&data[ifd_offset..ifd_offset + 2]) as usize;
295    let mut width = None;
296    let mut height = None;
297    for i in 0..num_dir {
298        let entry = ifd_offset + 2 + i * 12;
299        if data.len() < entry + 12 {
300            break;
301        }
302        let tag = read_u16(&data[entry..entry + 2]);
303        let field_type = read_u16(&data[entry + 2..entry + 4]);
304        // let count = read_u32(&data[entry + 4..entry + 8]);
305        let value_offset = &data[entry + 8..entry + 12];
306        // tag 256: ImageWidth, tag 257: ImageLength
307        if tag == 256 {
308            width = Some(match field_type {
309                3 => read_u16(value_offset) as u32, // SHORT
310                4 => read_u32(value_offset),        // LONG
311                _ => continue,
312            });
313        }
314        if tag == 257 {
315            height = Some(match field_type {
316                3 => read_u16(value_offset) as u32,
317                4 => read_u32(value_offset),
318                _ => continue,
319            });
320        }
321        if width.is_some() && height.is_some() {
322            break;
323        }
324    }
325    match (width, height) {
326        (Some(w), Some(h)) => Some((w, h)),
327        _ => None,
328    }
329}
330
331fn get_gif_dimensions(data: &[u8]) -> Option<(u32, u32)> {
332    if data.len() < 10 || (&data[0..6] != b"GIF87a" && &data[0..6] != b"GIF89a") {
333        return None;
334    }
335    let width = u16::from_le_bytes([data[6], data[7]]) as u32;
336    let height = u16::from_le_bytes([data[8], data[9]]) as u32;
337    Some((width, height))
338}
339
340// 获取图片的宽高
341/// 支持 PNG, JPEG, WebP, BMP, TIFF, GIF 格式
342pub fn get_image_dimensions(data: &[u8]) -> Option<(u32, u32)> {
343    if let Some((w, h)) = get_png_dimensions(data) {
344        Some((w, h))
345    } else if let Some((w, h)) = get_jpeg_dimensions(data).map(|(w, h)| (w as u32, h as u32)) {
346        Some((w, h))
347    } else if let Some((w, h)) = get_webp_dimensions(data) {
348        Some((w, h))
349    } else if let Some((w, h)) = get_bmp_dimensions(data) {
350        Some((w, h))
351    } else if let Some((w, h)) = get_tiff_dimensions(data) {
352        Some((w, h))
353    } else if let Some((w, h)) = get_gif_dimensions(data) {
354        Some((w, h))
355    } else {
356        None
357    }
358}