use omniparse::core::{Content, ExtractionResult, Metadata, MetadataValue, Result};
use omniparse::parsers::{Parser, ParserRegistry};
use std::io::Read;
struct MyFormatParser;
impl Parser for MyFormatParser {
fn supported_types(&self) -> &[&str] {
&["application/x-myformat"]
}
fn parse(&self, data: &[u8], mime_type: &str) -> Result<ExtractionResult> {
let content_str = String::from_utf8_lossy(data);
let mut metadata = Metadata::new();
let mut content_lines = Vec::new();
let mut in_header = true;
for line in content_str.lines() {
if line == "---" {
in_header = false;
continue;
}
if in_header {
if let Some(title) = line.strip_prefix("HEADER: ") {
metadata.insert(
"title".to_string(),
MetadataValue::Text(title.to_string())
);
} else if let Some(author) = line.strip_prefix("AUTHOR: ") {
metadata.insert(
"author".to_string(),
MetadataValue::Text(author.to_string())
);
}
} else {
content_lines.push(line);
}
}
let content = content_lines.join("\n");
metadata.insert(
"line_count".to_string(),
MetadataValue::Number(content_lines.len() as i64)
);
metadata.insert(
"character_count".to_string(),
MetadataValue::Number(content.len() as i64)
);
Ok(ExtractionResult {
mime_type: mime_type.to_string(),
content: Content::Text(content),
metadata,
detection_confidence: 1.0,
})
}
fn parse_stream(&self, reader: &mut dyn Read, mime_type: &str) -> Result<ExtractionResult> {
let mut buffer = Vec::new();
reader.read_to_end(&mut buffer)?;
self.parse(&buffer, mime_type)
}
fn name(&self) -> &str {
"MyFormatParser"
}
}
fn main() -> Result<()> {
println!("Custom Parser Example");
println!("{}", "=".repeat(60));
let sample_data = r#"HEADER: My Custom Document
AUTHOR: John Doe
---
This is the content of my custom format file.
It can have multiple lines.
And the parser will extract both the metadata and content.
"#;
println!("Sample data:");
println!("{}", sample_data);
println!("{}", "=".repeat(60));
let mut registry = ParserRegistry::new();
registry.register(Box::new(MyFormatParser));
println!("\n✅ Registered custom parser for: application/x-myformat");
if let Some(parser) = registry.get_parser("application/x-myformat") {
println!("📦 Parser name: {}", parser.name());
let result = parser.parse(sample_data.as_bytes(), "application/x-myformat")?;
println!("\n📄 Extraction Results:");
println!("{}", "-".repeat(60));
println!("MIME Type: {}", result.mime_type);
println!("Confidence: {:.0}%", result.detection_confidence * 100.0);
println!("\n📋 Metadata:");
for key in result.metadata.keys() {
if let Some(value) = result.metadata.get(key) {
println!(" • {}: {:?}", key, value);
}
}
println!("\n📝 Content:");
if let Content::Text(text) = result.content {
println!("{}", text);
}
}
println!("\n{}", "=".repeat(60));
println!("💡 Tips for creating custom parsers:");
println!(" 1. Implement the Parser trait with your parsing logic");
println!(" 2. Register your parser with ParserRegistry");
println!(" 3. Optionally add magic byte patterns for auto-detection");
println!(" 4. Consider implementing streaming for large files");
Ok(())
}