Skip to main content

zai_rs/tool/file_parser_create/
request.rs

1//! File parser creation request models and types.
2//!
3//! This module provides data structures for file parser creation requests,
4//! supporting multiple file formats and parsing tools.
5
6use std::path::Path;
7
8use serde::{Deserialize, Serialize};
9
10/// Parsing tool types with different capabilities.
11#[derive(Debug, Clone, Serialize, Deserialize)]
12#[serde(rename_all = "lowercase")]
13pub enum ToolType {
14    /// Lite parser with basic file format support
15    Lite,
16    /// Expert parser optimized for PDF files
17    Expert,
18    /// Prime parser with extensive file format support
19    Prime,
20}
21
22/// Supported file types for parsing.
23#[derive(Debug, Clone, Serialize, Deserialize)]
24#[serde(rename_all = "UPPERCASE")]
25pub enum FileType {
26    /// PDF documents
27    PDF,
28    /// Word documents (.docx)
29    DOCX,
30    /// Word documents (.doc)
31    DOC,
32    /// Excel spreadsheets (.xls)
33    XLS,
34    /// Excel spreadsheets (.xlsx)
35    XLSX,
36    /// PowerPoint presentations (.ppt)
37    PPT,
38    /// PowerPoint presentations (.pptx)
39    PPTX,
40    /// PNG images
41    PNG,
42    /// JPG images
43    JPG,
44    /// JPEG images
45    JPEG,
46    /// CSV files
47    CSV,
48    /// Text files
49    TXT,
50    /// Markdown files
51    MD,
52    /// HTML files
53    HTML,
54    /// EPUB files
55    EPUB,
56    /// BMP images
57    BMP,
58    /// GIF images
59    GIF,
60    /// WEBP images
61    WEBP,
62    /// HEIC images
63    HEIC,
64    /// EPS files
65    EPS,
66    /// ICNS files
67    ICNS,
68    /// IM images
69    IM,
70    /// PCX images
71    PCX,
72    /// PPM images
73    PPM,
74    /// TIFF images
75    TIFF,
76    /// XBM images
77    XBM,
78    /// HEIF images
79    HEIF,
80    /// JP2 images
81    JP2,
82}
83
84impl FileType {
85    /// Check if this file type is supported by the given tool type.
86    pub fn is_supported_by(&self, tool_type: &ToolType) -> bool {
87        match tool_type {
88            ToolType::Lite => {
89                matches!(
90                    self,
91                    FileType::PDF
92                        | FileType::DOCX
93                        | FileType::DOC
94                        | FileType::XLS
95                        | FileType::XLSX
96                        | FileType::PPT
97                        | FileType::PPTX
98                        | FileType::PNG
99                        | FileType::JPG
100                        | FileType::JPEG
101                        | FileType::CSV
102                        | FileType::TXT
103                        | FileType::MD
104                )
105            },
106            ToolType::Expert => matches!(self, FileType::PDF),
107            ToolType::Prime => true, // Prime supports all file types
108        }
109    }
110
111    /// Get the file extension for this file type.
112    pub fn extension(&self) -> &'static str {
113        match self {
114            FileType::PDF => "pdf",
115            FileType::DOCX => "docx",
116            FileType::DOC => "doc",
117            FileType::XLS => "xls",
118            FileType::XLSX => "xlsx",
119            FileType::PPT => "ppt",
120            FileType::PPTX => "pptx",
121            FileType::PNG => "png",
122            FileType::JPG => "jpg",
123            FileType::JPEG => "jpeg",
124            FileType::CSV => "csv",
125            FileType::TXT => "txt",
126            FileType::MD => "md",
127            FileType::HTML => "html",
128            FileType::EPUB => "epub",
129            FileType::BMP => "bmp",
130            FileType::GIF => "gif",
131            FileType::WEBP => "webp",
132            FileType::HEIC => "heic",
133            FileType::EPS => "eps",
134            FileType::ICNS => "icns",
135            FileType::IM => "im",
136            FileType::PCX => "pcx",
137            FileType::PPM => "ppm",
138            FileType::TIFF => "tiff",
139            FileType::XBM => "xbm",
140            FileType::HEIF => "heif",
141            FileType::JP2 => "jp2",
142        }
143    }
144
145    /// Try to infer file type from file path.
146    pub fn from_path(path: &Path) -> Option<Self> {
147        path.extension()
148            .and_then(|ext| ext.to_str())
149            .and_then(|ext_str| match ext_str.to_lowercase().as_str() {
150                "pdf" => Some(FileType::PDF),
151                "docx" => Some(FileType::DOCX),
152                "doc" => Some(FileType::DOC),
153                "xls" => Some(FileType::XLS),
154                "xlsx" => Some(FileType::XLSX),
155                "ppt" => Some(FileType::PPT),
156                "pptx" => Some(FileType::PPTX),
157                "png" => Some(FileType::PNG),
158                "jpg" => Some(FileType::JPG),
159                "jpeg" => Some(FileType::JPEG),
160                "csv" => Some(FileType::CSV),
161                "txt" => Some(FileType::TXT),
162                "md" => Some(FileType::MD),
163                "html" => Some(FileType::HTML),
164                "htm" => Some(FileType::HTML),
165                "epub" => Some(FileType::EPUB),
166                "bmp" => Some(FileType::BMP),
167                "gif" => Some(FileType::GIF),
168                "webp" => Some(FileType::WEBP),
169                "heic" => Some(FileType::HEIC),
170                "eps" => Some(FileType::EPS),
171                "icns" => Some(FileType::ICNS),
172                "im" => Some(FileType::IM),
173                "pcx" => Some(FileType::PCX),
174                "ppm" => Some(FileType::PPM),
175                "tiff" => Some(FileType::TIFF),
176                "tif" => Some(FileType::TIFF),
177                "xbm" => Some(FileType::XBM),
178                "heif" => Some(FileType::HEIF),
179                "jp2" => Some(FileType::JP2),
180                _ => None,
181            })
182    }
183}