omnivore_core/intelligence/
entity.rs1use crate::Result;
2use regex::Regex;
3use serde::{Deserialize, Serialize};
4
5#[derive(Debug, Clone, Serialize, Deserialize)]
6pub struct Entity {
7 pub text: String,
8 pub entity_type: EntityType,
9 pub confidence: f32,
10 pub start: usize,
11 pub end: usize,
12}
13
14#[derive(Debug, Clone, Serialize, Deserialize)]
15#[serde(rename_all = "UPPERCASE")]
16pub enum EntityType {
17 Person,
18 Organization,
19 Location,
20 Date,
21 Email,
22 Phone,
23 Url,
24 Money,
25 Other,
26}
27
28pub struct EntityRecognizer;
29
30impl EntityRecognizer {
31 pub fn recognize(text: &str) -> Result<Vec<Entity>> {
32 let mut entities = Vec::new();
33
34 let email_regex = Regex::new(r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}").unwrap();
35 for mat in email_regex.find_iter(text) {
36 entities.push(Entity {
37 text: mat.as_str().to_string(),
38 entity_type: EntityType::Email,
39 confidence: 0.95,
40 start: mat.start(),
41 end: mat.end(),
42 });
43 }
44
45 let url_regex = Regex::new(r"https?://[^\s]+").unwrap();
46 for mat in url_regex.find_iter(text) {
47 entities.push(Entity {
48 text: mat.as_str().to_string(),
49 entity_type: EntityType::Url,
50 confidence: 0.95,
51 start: mat.start(),
52 end: mat.end(),
53 });
54 }
55
56 Ok(entities)
57 }
58}