1use crate::types::DocumentExtractor;
28
29#[derive(Debug, Clone)]
31pub struct PluginManifest {
32 pub plugin_id: &'static str,
34 pub display_name: &'static str,
36 pub extensions: &'static [&'static str],
38 pub author: &'static str,
40 pub license: &'static str,
42 pub builtin: bool,
45 pub privacy_note: &'static str,
47}
48
49pub struct PluginExtractor {
51 pub manifest: PluginManifest,
52 pub extractor: Box<dyn DocumentExtractor>,
53}
54
55impl PluginExtractor {
56 pub fn builtin(manifest: PluginManifest, extractor: Box<dyn DocumentExtractor>) -> Self {
58 debug_assert!(
59 manifest.builtin,
60 "use PluginExtractor::external for non-built-in plugins"
61 );
62 Self {
63 manifest,
64 extractor,
65 }
66 }
67}
68
69pub struct PluginRegistry {
74 plugins: Vec<PluginExtractor>,
75}
76
77impl Default for PluginRegistry {
78 fn default() -> Self {
79 use crate::docx::DocxExtractor;
80 use crate::html::HtmlExtractor;
81 use crate::markdown::MarkdownExtractor;
82 use crate::pdf::PdfExtractor;
83 use crate::text::PlainTextExtractor;
84 let mut reg = Self {
85 plugins: Vec::new(),
86 };
87 reg.register_builtin(
88 PluginManifest {
89 plugin_id: "docx-v1",
90 display_name: "Microsoft Word (DOCX)",
91 extensions: &["docx"],
92 author: "orbok built-in",
93 license: "Apache-2.0",
94 builtin: true,
95 privacy_note: "Does not transmit content externally.",
96 },
97 Box::new(DocxExtractor),
98 );
99 reg.register_builtin(
100 PluginManifest {
101 plugin_id: "html-v1",
102 display_name: "HTML",
103 extensions: &["html", "htm"],
104 author: "orbok built-in",
105 license: "Apache-2.0",
106 builtin: true,
107 privacy_note: "Does not transmit content externally.",
108 },
109 Box::new(HtmlExtractor),
110 );
111 reg.register_builtin(
112 PluginManifest {
113 plugin_id: "markdown-v1",
114 display_name: "Markdown",
115 extensions: &["md", "markdown"],
116 author: "orbok built-in",
117 license: "Apache-2.0",
118 builtin: true,
119 privacy_note: "Does not transmit content externally.",
120 },
121 Box::new(MarkdownExtractor),
122 );
123 reg.register_builtin(
124 PluginManifest {
125 plugin_id: "plain-text-v1",
126 display_name: "Plain Text",
127 extensions: &[
128 "txt", "log", "rs", "py", "js", "ts", "go", "sql", "toml", "yaml", "yml",
129 "json", "xml", "css", "html", "htm",
130 ],
131 author: "orbok built-in",
132 license: "Apache-2.0",
133 builtin: true,
134 privacy_note: "Does not transmit content externally.",
135 },
136 Box::new(PlainTextExtractor),
137 );
138 reg.register_builtin(
139 PluginManifest {
140 plugin_id: "pdf-lopdf-v1",
141 display_name: "PDF (lopdf)",
142 extensions: &["pdf"],
143 author: "orbok built-in",
144 license: "Apache-2.0",
145 builtin: true,
146 privacy_note: "Extracts text locally. Does not transmit content externally.",
147 },
148 Box::new(PdfExtractor),
149 );
150 reg
151 }
152}
153
154impl PluginRegistry {
155 fn register_builtin(
156 &mut self,
157 manifest: PluginManifest,
158 extractor: Box<dyn DocumentExtractor>,
159 ) {
160 self.plugins
161 .push(PluginExtractor::builtin(manifest, extractor));
162 }
163
164 pub fn find_for_extension(&self, ext: &str) -> Option<&PluginExtractor> {
166 let ext_lower = ext.to_ascii_lowercase();
167 self.plugins
168 .iter()
169 .find(|p| p.manifest.extensions.contains(&ext_lower.as_str()))
170 }
171
172 pub fn manifests(&self) -> Vec<&PluginManifest> {
174 self.plugins.iter().map(|p| &p.manifest).collect()
175 }
176
177 pub fn len(&self) -> usize {
179 self.plugins.len()
180 }
181}