1use crate::types::DocumentExtractor;
28
29#[derive(Debug, Clone)]
31pub struct PluginManifest {
32 pub plugin_id: &'static str,
34 pub display_name: &'static str,
36 pub extensions: &'static [&'static str],
38 pub author: &'static str,
40 pub license: &'static str,
42 pub builtin: bool,
45 pub privacy_note: &'static str,
47}
48
49pub struct PluginExtractor {
51 pub manifest: PluginManifest,
52 pub extractor: Box<dyn DocumentExtractor>,
53}
54
55impl PluginExtractor {
56 pub fn builtin(manifest: PluginManifest, extractor: Box<dyn DocumentExtractor>) -> Self {
58 debug_assert!(manifest.builtin, "use PluginExtractor::external for non-built-in plugins");
59 Self { manifest, extractor }
60 }
61}
62
63pub struct PluginRegistry {
68 plugins: Vec<PluginExtractor>,
69}
70
71impl Default for PluginRegistry {
72 fn default() -> Self {
73 use crate::docx::DocxExtractor;
74 use crate::html::HtmlExtractor;
75 use crate::markdown::MarkdownExtractor;
76 use crate::pdf::PdfExtractor;
77 use crate::text::PlainTextExtractor;
78 let mut reg = Self { plugins: Vec::new() };
79 reg.register_builtin(
80 PluginManifest {
81 plugin_id: "docx-v1",
82 display_name: "Microsoft Word (DOCX)",
83 extensions: &["docx"],
84 author: "orbok built-in",
85 license: "Apache-2.0",
86 builtin: true,
87 privacy_note: "Does not transmit content externally.",
88 },
89 Box::new(DocxExtractor),
90 );
91 reg.register_builtin(
92 PluginManifest {
93 plugin_id: "html-v1",
94 display_name: "HTML",
95 extensions: &["html", "htm"],
96 author: "orbok built-in",
97 license: "Apache-2.0",
98 builtin: true,
99 privacy_note: "Does not transmit content externally.",
100 },
101 Box::new(HtmlExtractor),
102 );
103 reg.register_builtin(
104 PluginManifest {
105 plugin_id: "markdown-v1",
106 display_name: "Markdown",
107 extensions: &["md", "markdown"],
108 author: "orbok built-in",
109 license: "Apache-2.0",
110 builtin: true,
111 privacy_note: "Does not transmit content externally.",
112 },
113 Box::new(MarkdownExtractor),
114 );
115 reg.register_builtin(
116 PluginManifest {
117 plugin_id: "plain-text-v1",
118 display_name: "Plain Text",
119 extensions: &["txt", "log", "rs", "py", "js", "ts", "go", "sql", "toml",
120 "yaml", "yml", "json", "xml", "css", "html", "htm"],
121 author: "orbok built-in",
122 license: "Apache-2.0",
123 builtin: true,
124 privacy_note: "Does not transmit content externally.",
125 },
126 Box::new(PlainTextExtractor),
127 );
128 reg.register_builtin(
129 PluginManifest {
130 plugin_id: "pdf-lopdf-v1",
131 display_name: "PDF (lopdf)",
132 extensions: &["pdf"],
133 author: "orbok built-in",
134 license: "Apache-2.0",
135 builtin: true,
136 privacy_note: "Extracts text locally. Does not transmit content externally.",
137 },
138 Box::new(PdfExtractor),
139 );
140 reg
141 }
142}
143
144impl PluginRegistry {
145 fn register_builtin(&mut self, manifest: PluginManifest, extractor: Box<dyn DocumentExtractor>) {
146 self.plugins.push(PluginExtractor::builtin(manifest, extractor));
147 }
148
149 pub fn find_for_extension(&self, ext: &str) -> Option<&PluginExtractor> {
151 let ext_lower = ext.to_ascii_lowercase();
152 self.plugins
153 .iter()
154 .find(|p| p.manifest.extensions.contains(&ext_lower.as_str()))
155 }
156
157 pub fn manifests(&self) -> Vec<&PluginManifest> {
159 self.plugins.iter().map(|p| &p.manifest).collect()
160 }
161
162 pub fn len(&self) -> usize {
164 self.plugins.len()
165 }
166}