orbok_extract/plugin.rs
1//! Plugin extractor interface (RFC-028 §7).
2//!
3//! This module defines the security-boundary types for external
4//! extractor plugins. In v0.8, plugin *loading* is not yet implemented
5//! (dynamic linking is deferred), but the interface is defined so that:
6//!
7//! 1. Built-in extractors can be registered with the same manifest.
8//! 2. The security contract is formalized before any loading code exists.
9//!
10//! ## Security model (RFC-028 §6)
11//!
12//! - A plugin extractor receives only a `ValidatedPath` — it cannot
13//! request arbitrary filesystem access. The PathGuard boundary
14//! (RFC-003 §8) applies before any plugin receives a path.
15//! - Plugin failures are isolated: a panic in a plugin extractor must
16//! not crash the orbok process (RFC-005 §13).
17//! - User consent is required before a non-built-in plugin is used;
18//! the manifest provides the metadata for that consent dialog.
19//! - Plugin logging must follow NFR-014: no document contents logged.
20//!
21//! ## Dynamic loading (future)
22//!
23//! When RFC-028 is fully activated, plugin `.so`/`.dll` files will be
24//! located via the `PluginRegistry`. Until then, `PluginRegistry` only
25//! holds the built-in extractors.
26
27use crate::types::DocumentExtractor;
28
29/// Metadata attached to every extractor plugin for display and consent.
30#[derive(Debug, Clone)]
31pub struct PluginManifest {
32 /// Stable identifier (e.g. `"excel-xlsx-v1"`). Must be unique.
33 pub plugin_id: &'static str,
34 /// Human-readable display name.
35 pub display_name: &'static str,
36 /// Comma-separated list of handled file extensions.
37 pub extensions: &'static [&'static str],
38 /// Author name.
39 pub author: &'static str,
40 /// License (user sees this in the consent dialog).
41 pub license: &'static str,
42 /// Whether this plugin is built-in (no user consent required) or
43 /// external (user must explicitly allow).
44 pub builtin: bool,
45 /// Privacy statement: what the plugin does NOT do.
46 pub privacy_note: &'static str,
47}
48
49/// A plugin extractor: manifest metadata + the extraction implementation.
50pub struct PluginExtractor {
51 pub manifest: PluginManifest,
52 pub extractor: Box<dyn DocumentExtractor>,
53}
54
55impl PluginExtractor {
56 /// Wrap a built-in extractor with its manifest.
57 pub fn builtin(manifest: PluginManifest, extractor: Box<dyn DocumentExtractor>) -> Self {
58 debug_assert!(manifest.builtin, "use PluginExtractor::external for non-built-in plugins");
59 Self { manifest, extractor }
60 }
61}
62
63/// The plugin registry (RFC-028 §8).
64///
65/// In v0.8, only built-in plugins are registered. Dynamic loading is
66/// gated behind `RFC-028` being fully activated.
67pub struct PluginRegistry {
68 plugins: Vec<PluginExtractor>,
69}
70
71impl Default for PluginRegistry {
72 fn default() -> Self {
73 use crate::markdown::MarkdownExtractor;
74 use crate::pdf::PdfExtractor;
75 use crate::text::PlainTextExtractor;
76 let mut reg = Self { plugins: Vec::new() };
77 reg.register_builtin(
78 PluginManifest {
79 plugin_id: "markdown-v1",
80 display_name: "Markdown",
81 extensions: &["md", "markdown"],
82 author: "orbok built-in",
83 license: "Apache-2.0",
84 builtin: true,
85 privacy_note: "Does not transmit content externally.",
86 },
87 Box::new(MarkdownExtractor),
88 );
89 reg.register_builtin(
90 PluginManifest {
91 plugin_id: "plain-text-v1",
92 display_name: "Plain Text",
93 extensions: &["txt", "log", "rs", "py", "js", "ts", "go", "sql", "toml",
94 "yaml", "yml", "json", "xml", "css", "html", "htm"],
95 author: "orbok built-in",
96 license: "Apache-2.0",
97 builtin: true,
98 privacy_note: "Does not transmit content externally.",
99 },
100 Box::new(PlainTextExtractor),
101 );
102 reg.register_builtin(
103 PluginManifest {
104 plugin_id: "pdf-lopdf-v1",
105 display_name: "PDF (lopdf)",
106 extensions: &["pdf"],
107 author: "orbok built-in",
108 license: "Apache-2.0",
109 builtin: true,
110 privacy_note: "Extracts text locally. Does not transmit content externally.",
111 },
112 Box::new(PdfExtractor),
113 );
114 reg
115 }
116}
117
118impl PluginRegistry {
119 fn register_builtin(&mut self, manifest: PluginManifest, extractor: Box<dyn DocumentExtractor>) {
120 self.plugins.push(PluginExtractor::builtin(manifest, extractor));
121 }
122
123 /// Find the plugin that handles the given extension.
124 pub fn find_for_extension(&self, ext: &str) -> Option<&PluginExtractor> {
125 let ext_lower = ext.to_ascii_lowercase();
126 self.plugins
127 .iter()
128 .find(|p| p.manifest.extensions.contains(&ext_lower.as_str()))
129 }
130
131 /// All registered plugin manifests (for the Models/Settings view).
132 pub fn manifests(&self) -> Vec<&PluginManifest> {
133 self.plugins.iter().map(|p| &p.manifest).collect()
134 }
135
136 /// Number of registered plugins.
137 pub fn len(&self) -> usize {
138 self.plugins.len()
139 }
140}