Skip to main content

orbok_extract/
plugin.rs

1//! Plugin extractor interface (RFC-028 §7).
2//!
3//! This module defines the security-boundary types for external
4//! extractor plugins. In v0.8, plugin *loading* is not yet implemented
5//! (dynamic linking is deferred), but the interface is defined so that:
6//!
7//! 1. Built-in extractors can be registered with the same manifest.
8//! 2. The security contract is formalized before any loading code exists.
9//!
10//! ## Security model (RFC-028 §6)
11//!
12//! - A plugin extractor receives only a `ValidatedPath` — it cannot
13//!   request arbitrary filesystem access. The PathGuard boundary
14//!   (RFC-003 §8) applies before any plugin receives a path.
15//! - Plugin failures are isolated: a panic in a plugin extractor must
16//!   not crash the orbok process (RFC-005 §13).
17//! - User consent is required before a non-built-in plugin is used;
18//!   the manifest provides the metadata for that consent dialog.
19//! - Plugin logging must follow NFR-014: no document contents logged.
20//!
21//! ## Dynamic loading (future)
22//!
23//! When RFC-028 is fully activated, plugin `.so`/`.dll` files will be
24//! located via the `PluginRegistry`. Until then, `PluginRegistry` only
25//! holds the built-in extractors.
26
27use crate::types::DocumentExtractor;
28
29/// Metadata attached to every extractor plugin for display and consent.
30#[derive(Debug, Clone)]
31pub struct PluginManifest {
32    /// Stable identifier (e.g. `"excel-xlsx-v1"`). Must be unique.
33    pub plugin_id: &'static str,
34    /// Human-readable display name.
35    pub display_name: &'static str,
36    /// Comma-separated list of handled file extensions.
37    pub extensions: &'static [&'static str],
38    /// Author name.
39    pub author: &'static str,
40    /// License (user sees this in the consent dialog).
41    pub license: &'static str,
42    /// Whether this plugin is built-in (no user consent required) or
43    /// external (user must explicitly allow).
44    pub builtin: bool,
45    /// Privacy statement: what the plugin does NOT do.
46    pub privacy_note: &'static str,
47}
48
49/// A plugin extractor: manifest metadata + the extraction implementation.
50pub struct PluginExtractor {
51    pub manifest: PluginManifest,
52    pub extractor: Box<dyn DocumentExtractor>,
53}
54
55impl PluginExtractor {
56    /// Wrap a built-in extractor with its manifest.
57    pub fn builtin(manifest: PluginManifest, extractor: Box<dyn DocumentExtractor>) -> Self {
58        debug_assert!(manifest.builtin, "use PluginExtractor::external for non-built-in plugins");
59        Self { manifest, extractor }
60    }
61}
62
63/// The plugin registry (RFC-028 §8).
64///
65/// In v0.8, only built-in plugins are registered. Dynamic loading is
66/// gated behind `RFC-028` being fully activated.
67pub struct PluginRegistry {
68    plugins: Vec<PluginExtractor>,
69}
70
71impl Default for PluginRegistry {
72    fn default() -> Self {
73        use crate::markdown::MarkdownExtractor;
74        use crate::pdf::PdfExtractor;
75        use crate::text::PlainTextExtractor;
76        let mut reg = Self { plugins: Vec::new() };
77        reg.register_builtin(
78            PluginManifest {
79                plugin_id: "markdown-v1",
80                display_name: "Markdown",
81                extensions: &["md", "markdown"],
82                author: "orbok built-in",
83                license: "Apache-2.0",
84                builtin: true,
85                privacy_note: "Does not transmit content externally.",
86            },
87            Box::new(MarkdownExtractor),
88        );
89        reg.register_builtin(
90            PluginManifest {
91                plugin_id: "plain-text-v1",
92                display_name: "Plain Text",
93                extensions: &["txt", "log", "rs", "py", "js", "ts", "go", "sql", "toml",
94                              "yaml", "yml", "json", "xml", "css", "html", "htm"],
95                author: "orbok built-in",
96                license: "Apache-2.0",
97                builtin: true,
98                privacy_note: "Does not transmit content externally.",
99            },
100            Box::new(PlainTextExtractor),
101        );
102        reg.register_builtin(
103            PluginManifest {
104                plugin_id: "pdf-lopdf-v1",
105                display_name: "PDF (lopdf)",
106                extensions: &["pdf"],
107                author: "orbok built-in",
108                license: "Apache-2.0",
109                builtin: true,
110                privacy_note: "Extracts text locally. Does not transmit content externally.",
111            },
112            Box::new(PdfExtractor),
113        );
114        reg
115    }
116}
117
118impl PluginRegistry {
119    fn register_builtin(&mut self, manifest: PluginManifest, extractor: Box<dyn DocumentExtractor>) {
120        self.plugins.push(PluginExtractor::builtin(manifest, extractor));
121    }
122
123    /// Find the plugin that handles the given extension.
124    pub fn find_for_extension(&self, ext: &str) -> Option<&PluginExtractor> {
125        let ext_lower = ext.to_ascii_lowercase();
126        self.plugins
127            .iter()
128            .find(|p| p.manifest.extensions.contains(&ext_lower.as_str()))
129    }
130
131    /// All registered plugin manifests (for the Models/Settings view).
132    pub fn manifests(&self) -> Vec<&PluginManifest> {
133        self.plugins.iter().map(|p| &p.manifest).collect()
134    }
135
136    /// Number of registered plugins.
137    pub fn len(&self) -> usize {
138        self.plugins.len()
139    }
140}