1use std::path::Path;
2
3use composable::Composable;
4use gliner::model::input::relation::schema::RelationSchema;
5use gliner::model::input::text::TextInput;
6use gliner::model::output::decoded::SpanOutput;
7use gliner::model::output::relation::RelationOutput;
8use gliner::model::params::Parameters;
9use gliner::model::pipeline::relation::RelationPipeline;
10use gliner::model::pipeline::token::TokenPipeline;
11use orp::model::Model;
12use orp::params::RuntimeParameters;
13use orp::pipeline::Pipeline;
14
15use crate::ner::ExtractedEntity;
16use crate::relclf::RelationClassifier;
17use crate::relex::RelexEngine;
18use crate::schema::ExtractionSchema;
19
20#[derive(Debug, Clone)]
22pub struct ExtractedRelation {
23 pub head: String,
24 pub relation: String,
25 pub tail: String,
26 pub confidence: f64,
27}
28
29pub enum RelEngine {
38 ModelBased(ModelBasedRelEngine),
40 Heuristic,
41}
42
43static RELEX_ENGINE: std::sync::OnceLock<Option<RelexEngine>> = std::sync::OnceLock::new();
45
46static RELCLF_ENGINE: std::sync::OnceLock<Option<RelationClassifier>> = std::sync::OnceLock::new();
48
49pub struct ModelBasedRelEngine {
53 model: Model,
54 params: Parameters,
55 tokenizer_path: String,
56}
57
58impl ModelBasedRelEngine {
59 pub fn new(model_path: &Path, tokenizer_path: &Path) -> Result<Self, RelError> {
60 let runtime_params = RuntimeParameters::default();
61 let model = Model::new(
62 model_path
63 .to_str()
64 .ok_or(RelError::InvalidPath(model_path.display().to_string()))?,
65 runtime_params,
66 )
67 .map_err(|e| RelError::ModelLoad(e.to_string()))?;
68
69 Ok(Self {
70 model,
71 params: Parameters::default(),
72 tokenizer_path: tokenizer_path
73 .to_str()
74 .ok_or(RelError::InvalidPath(
75 tokenizer_path.display().to_string(),
76 ))?
77 .to_string(),
78 })
79 }
80
81 pub fn extract(
82 &self,
83 text: &str,
84 labels: &[&str],
85 schema: &ExtractionSchema,
86 ) -> Result<(Vec<ExtractedEntity>, Vec<ExtractedRelation>), RelError> {
87 let mut relation_schema = RelationSchema::new();
89 for (rel_name, spec) in &schema.relation_types {
90 let heads: Vec<&str> = spec.head.iter().map(|s| s.as_str()).collect();
91 let tails: Vec<&str> = spec.tail.iter().map(|s| s.as_str()).collect();
92 relation_schema.push_with_allowed_labels(rel_name, &heads, &tails);
93 }
94
95 let input = TextInput::from_str(&[text], labels)
96 .map_err(|e| RelError::Inference(e.to_string()))?;
97
98 let ner_pipeline = TokenPipeline::new(&self.tokenizer_path)
100 .map_err(|e| RelError::Inference(e.to_string()))?;
101 let ner_composable = ner_pipeline.to_composable(&self.model, &self.params);
102 let ner_output: SpanOutput = ner_composable
103 .apply(input)
104 .map_err(|e| RelError::Inference(e.to_string()))?;
105
106 let mut entities = Vec::new();
108 for sequence_spans in &ner_output.spans {
109 for span in sequence_spans {
110 let (start, end) = span.offsets();
111 entities.push(ExtractedEntity {
112 text: span.text().to_string(),
113 entity_type: span.class().to_string(),
114 span_start: start,
115 span_end: end,
116 confidence: span.probability() as f64,
117 });
118 }
119 }
120
121 let rel_pipeline =
123 RelationPipeline::default(&self.tokenizer_path, &relation_schema)
124 .map_err(|e| RelError::Inference(e.to_string()))?;
125 let rel_composable = rel_pipeline.to_composable(&self.model, &self.params);
126 let rel_output: RelationOutput = rel_composable
127 .apply(ner_output)
128 .map_err(|e| RelError::Inference(e.to_string()))?;
129
130 let mut relations = Vec::new();
132 for sequence_rels in &rel_output.relations {
133 for rel in sequence_rels {
134 relations.push(ExtractedRelation {
135 head: rel.subject().to_string(),
136 relation: rel.class().to_string(),
137 tail: rel.object().to_string(),
138 confidence: rel.probability() as f64,
139 });
140 }
141 }
142
143 Ok((entities, relations))
144 }
145}
146
147impl RelEngine {
148 pub fn new(model_path: Option<&Path>, tokenizer_path: Option<&Path>) -> Result<Self, RelError> {
151 match (model_path, tokenizer_path) {
152 (Some(mp), Some(tp)) if mp.exists() && tp.exists() => {
153 let engine = ModelBasedRelEngine::new(mp, tp)?;
154 Ok(Self::ModelBased(engine))
155 }
156 _ => Ok(Self::Heuristic),
157 }
158 }
159
160 pub fn extract(
167 &self,
168 text: &str,
169 entities: &[ExtractedEntity],
170 schema: &ExtractionSchema,
171 ) -> Result<Vec<ExtractedRelation>, RelError> {
172 let mut relations = heuristic_relations(text, entities, schema);
173
174 if std::env::var("CTXGRAPH_RELEX").is_ok() {
176 let relex = RELEX_ENGINE.get_or_init(|| {
177 let mgr = crate::model_manager::ModelManager::new().ok()?;
178 let (model_path, tok_path) = mgr.find_relex_model()?;
179 RelexEngine::new(&model_path, &tok_path).ok()
180 });
181
182 if let Some(engine) = relex {
183 let entity_labels: Vec<&str> = schema.entity_labels();
184 let relation_labels: Vec<&str> = schema.relation_labels();
185 if let Ok(result) = engine.extract(text, &entity_labels, &relation_labels, 0.5, 0.5, schema) {
186 let existing: std::collections::HashSet<(String, String)> = relations
187 .iter()
188 .map(|r| (r.head.clone(), r.tail.clone()))
189 .collect();
190
191 for rel in &result.relations {
192 if rel.confidence < 0.80 {
193 continue;
194 }
195
196 let mapped_head = map_span_to_entity(&rel.head, entities);
197 let mapped_tail = map_span_to_entity(&rel.tail, entities);
198
199 if let (Some(head), Some(tail)) = (mapped_head, mapped_tail) {
200 if head != tail
201 && !existing.contains(&(head.clone(), tail.clone()))
202 && !existing.contains(&(tail.clone(), head.clone()))
203 {
204 relations.push(ExtractedRelation {
205 head,
206 relation: rel.relation.clone(),
207 tail,
208 confidence: rel.confidence,
209 });
210 }
211 }
212 }
213 }
214 }
215 }
216
217 let relclf = RELCLF_ENGINE.get_or_init(|| {
219 let mgr = crate::model_manager::ModelManager::new().ok()?;
220 let model_path = mgr.find_relation_classifier()?;
221 RelationClassifier::new(&model_path).ok()
222 });
223
224 if let Some(classifier) = relclf {
225 static EMBED_ENGINE: std::sync::OnceLock<
227 Option<fastembed::TextEmbedding>,
228 > = std::sync::OnceLock::new();
229 let embed = EMBED_ENGINE.get_or_init(|| {
230 fastembed::TextEmbedding::try_new(
231 fastembed::InitOptions::new(fastembed::EmbeddingModel::AllMiniLML6V2),
232 )
233 .ok()
234 });
235
236 if let Some(embed_model) = embed {
237 let embed_fn = |text: &str| -> Result<Vec<f32>, RelError> {
238 let mut vecs = embed_model
239 .embed(vec![text], None)
240 .map_err(|e| RelError::Inference(e.to_string()))?;
241 vecs.pop()
242 .ok_or_else(|| RelError::Inference("empty embedding".into()))
243 };
244
245 if let Ok(clf_relations) =
246 classifier.classify_batch(text, entities, &embed_fn)
247 {
248 let existing: std::collections::HashSet<(String, String)> = relations
249 .iter()
250 .map(|r| (r.head.clone(), r.tail.clone()))
251 .collect();
252
253 for rel in clf_relations {
254 if rel.confidence < 0.70 {
255 continue;
256 }
257 if !existing.contains(&(rel.head.clone(), rel.tail.clone()))
259 && !existing.contains(&(rel.tail.clone(), rel.head.clone()))
260 {
261 relations.push(rel);
262 }
263 }
264 }
265 }
266 }
267
268 Ok(relations)
269 }
270}
271
272fn heuristic_relations(
280 text: &str,
281 entities: &[ExtractedEntity],
282 schema: &ExtractionSchema,
283) -> Vec<ExtractedRelation> {
284 let patterns: &[(&str, &[&str])] = &[
291 ("chose", &[
292 "chose", "choose", "select", "picked", "went with", "adopt",
293 "decided to use", "decided to add", "opted for", "settled on",
294 "standardiz", "switched to",
295 "decided that",
296 ]),
297 ("rejected", &[
298 "reject", "ruled out", "decided against", "dropped",
299 "abandon", "discard", "veto",
300 ]),
301 ("replaced", &[
302 "replac", "swapped",
306 "in favor of", "instead of", "in place of",
307 "over the legacy", "over the old",
308 ]),
309 ("depends_on", &[
310 "depend", "relies on", "rely on", "built on",
311 " uses ", " use ", "using ",
312 "connect", "backed by", "powered by",
313 "running on", "runs on", "deployed on", "hosted on",
314 "integrat", "communicat",
315 "proxied by", "proxies", "in front of",
316 "managed by", "orchestrat",
317 "reads from", "writes to", "persist",
318 "publish", "subscrib", "consum",
319 "sends to", "receives from",
320 "queries", "fetches from",
321 "leverag", "switched to",
322 "scraped", "scrapes",
323 "flow through", "flows through",
324 "target", "written in", "implemented in",
325 "-based", "caching layer",
326 "counter in", "stored in", "cache to",
327 "stitched", "backed by",
328 "local cache", "sync when",
329 "used by", "via ",
331 "package manager",
332 "is down",
333 "goroutine",
335 "scraped by", "dashboards",
337 ]),
338 ("fixed", &[
339 "fixed", "fixing", "resolv", "patched", "repaired",
340 "debugged", "addressed", "correct ",
341 "eliminat", "mitigat", "diagnos", "root-caus",
342 "identified", "found ",
343 "traced", "investigated",
344 "patch ",
345 ]),
346 ("introduced", &[
347 "introduc", " add ", "added", "implement", "created", "built",
348 "set up", "deploy", "enabl", "integrat", "install",
349 "configur", "establish", "rolled out", "launched",
350 "onboard", "provision", "stood up", "spun up",
351 "upgrad", "extract",
352 "chosen to enforc", "chosen to implement",
354 ]),
355 ("deprecated", &[
356 "deprecat", "removed", "removing", "phased out", "phase out",
357 "sunset", "decommission", "retired", "killed", "shut down",
358 "tore down", "ripped out", "turned off",
359 ]),
360 ("caused", &[
361 "caused", "causing", "resulted in", "led to", "trigger",
362 "contributed to",
363 "dropped to", "reduced to", "improved to", "decreased to",
364 "reduced by", "improved by", "increased by", "decreased by",
365 "spiked", "spike",
366 ]),
367 ("constrained_by", &[
368 "constrain", "blocked by", "due to",
369 "required to", "has to", "have to",
370 "cannot exceed", "comply", "enforc",
371 "subject to", "bound by", "governed by",
372 "mandated", "driven by", "must comply",
373 "guarantee", "accepted",
374 "rate limit", "forbidden", "must not",
375 "exceed", "scoped", "capped at", "cap at",
376 "broke", "break ", "breaking",
377 "zero-trust", "least privilege",
378 "cannot handle",
379 "exactly-once",
381 " sla ",
382 "memory safety",
383 ]),
384 ];
385
386 let sentence_ranges = split_sentences(text);
387
388 let mut relations = Vec::new();
389 let mut seen = std::collections::HashSet::<(String, String, String)>::new();
390
391 detect_from_to_pattern(text, entities, schema, &mut relations, &mut seen);
395
396 for (sent_idx, &(sent_start, sent_end)) in sentence_ranges.iter().enumerate() {
397 let sent_text = &text[sent_start..sent_end];
398 let sent_lower = sent_text.to_lowercase();
399
400 let sent_entities: Vec<&ExtractedEntity> = entities
402 .iter()
403 .filter(|e| e.span_start >= sent_start && e.span_start < sent_end)
404 .collect();
405
406 if sent_entities.len() < 2 {
407 }
410
411 let window_start = if sent_idx > 0 { sentence_ranges[sent_idx - 1].0 } else { sent_start };
413 let window_end = if sent_idx + 1 < sentence_ranges.len() {
414 sentence_ranges[sent_idx + 1].1
415 } else {
416 sent_end
417 };
418
419 let window_entities: Vec<&ExtractedEntity> = entities
420 .iter()
421 .filter(|e| e.span_start >= window_start && e.span_start < window_end)
422 .collect();
423
424 for (relation, keywords) in patterns {
425 if !keywords.iter().any(|kw| sent_lower.contains(kw)) {
426 continue;
427 }
428
429 let rel_spec = schema.relation_types.get(*relation);
431
432 let kw_pos = keywords.iter()
434 .filter_map(|kw| sent_lower.find(kw).map(|p| p + kw.len() / 2))
435 .min()
436 .unwrap_or(sent_lower.len() / 2);
437 let kw_abs_pos = sent_start + kw_pos;
438
439 let mut candidates: Vec<(f64, &ExtractedEntity, &ExtractedEntity)> = Vec::new();
441
442 for &head in &sent_entities {
443 for &tail in &window_entities {
444 if std::ptr::eq(head, tail) || head.text == tail.text {
445 continue;
446 }
447
448 if is_reference_entity(&head.text)
451 || is_reference_entity(&tail.text)
452 {
453 continue;
454 }
455
456 let schema_valid = rel_spec
458 .map(|spec| {
459 spec.head.contains(&head.entity_type)
460 && spec.tail.contains(&tail.entity_type)
461 })
462 .unwrap_or(false);
463
464 if !schema_valid {
465 continue;
466 }
467
468 let both_person = head.entity_type == "Person" && tail.entity_type == "Person";
470 if both_person && *relation != "chose" && *relation != "rejected" {
471 continue;
472 }
473
474 let in_same_sentence =
475 tail.span_start >= sent_start && tail.span_start < sent_end;
476
477 let head_dist = (head.span_start as f64 - kw_abs_pos as f64).abs();
479 let tail_dist = (tail.span_start as f64 - kw_abs_pos as f64).abs();
480 let avg_dist = (head_dist + tail_dist) / 2.0;
481 let proximity = 1.0 / (1.0 + avg_dist / 50.0);
482
483 let base = if in_same_sentence { 0.65 } else { 0.45 };
484 let confidence = base * proximity;
485
486 candidates.push((confidence, head, tail));
487 }
488 }
489
490 candidates.sort_by(|a, b| b.0.partial_cmp(&a.0).unwrap_or(std::cmp::Ordering::Equal));
493
494 for (confidence, head, tail) in candidates.into_iter().take(1) {
495 let (actual_head, actual_tail) = determine_direction(
496 relation, head, tail, rel_spec, &sent_lower, sent_start,
497 );
498
499 let key = (actual_head.clone(), relation.to_string(), actual_tail.clone());
500 if seen.insert(key) {
501 relations.push(ExtractedRelation {
502 head: actual_head.clone(),
503 relation: relation.to_string(),
504 tail: actual_tail.clone(),
505 confidence,
506 });
507 }
508 }
509 }
510
511 if let Some(over_pos) = sent_lower.find(" over ") {
514 let rejected_spec = schema.relation_types.get("rejected");
515 let over_abs = sent_start + over_pos;
518 let mut rejected_entity: Option<&ExtractedEntity> = None;
519 let mut chooser: Option<&ExtractedEntity> = None;
520
521 for ent in &sent_entities {
522 if is_reference_entity(&ent.text) {
523 continue;
524 }
525 if ent.span_start > over_abs && ent.span_start <= over_abs + 15 {
527 rejected_entity = Some(ent);
528 }
529 if ent.entity_type == "Person" {
531 chooser = Some(ent);
532 }
533 }
534
535 if let (Some(rej), Some(ch)) = (rejected_entity, chooser) {
536 if rej.text != ch.text {
537 let schema_valid = rejected_spec
539 .map(|spec| {
540 spec.head.contains(&ch.entity_type)
541 && spec.tail.contains(&rej.entity_type)
542 })
543 .unwrap_or(true);
544
545 if schema_valid {
546 let key = (ch.text.clone(), "rejected".to_string(), rej.text.clone());
547 if seen.insert(key) {
548 relations.push(ExtractedRelation {
549 head: ch.text.clone(),
550 relation: "rejected".to_string(),
551 tail: rej.text.clone(),
552 confidence: 0.60,
553 });
554 }
555 }
556 }
557 }
558 }
559 }
560
561 relations.retain(|r| {
563 if r.relation == "replaced" {
566 let h = r.head.to_lowercase();
567 let t = r.tail.to_lowercase();
568 let h_base = h.trim_end_matches(|c: char| c.is_ascii_digit() || c == '.' || c == ' ');
570 let t_base = t.trim_end_matches(|c: char| c.is_ascii_digit() || c == '.' || c == ' ');
571 if !h_base.is_empty() && h_base == t_base {
572 return false;
573 }
574 }
575 true
576 });
577
578 let conflicts: &[(&str, &str)] = &[
582 ("chose", "rejected"), ("introduced", "deprecated"), ("replaced", "depends_on"), ("introduced", "depends_on"), ("introduced", "replaced"), ("chose", "depends_on"), ("caused", "fixed"), ("introduced", "caused"), ];
591
592 let mut to_remove = std::collections::HashSet::new();
593 for (i, r1) in relations.iter().enumerate() {
594 for (j, r2) in relations.iter().enumerate() {
595 if i >= j { continue; }
596 let same_pair = (r1.head == r2.head && r1.tail == r2.tail)
598 || (r1.head == r2.tail && r1.tail == r2.head);
599 if !same_pair { continue; }
600
601 for &(a, b) in conflicts {
602 if (r1.relation == a && r2.relation == b)
603 || (r1.relation == b && r2.relation == a)
604 {
605 if r1.confidence >= r2.confidence {
607 to_remove.insert(j);
608 } else {
609 to_remove.insert(i);
610 }
611 }
612 }
613 }
614 }
615
616 if !to_remove.is_empty() {
617 let mut idx = 0;
618 relations.retain(|_| {
619 let keep = !to_remove.contains(&idx);
620 idx += 1;
621 keep
622 });
623 }
624
625 relations
626}
627
628fn map_span_to_entity(span_text: &str, entities: &[ExtractedEntity]) -> Option<String> {
632 let span_lower = span_text.to_lowercase();
633
634 for ent in entities {
636 if ent.text.to_lowercase() == span_lower {
637 return Some(ent.text.clone());
638 }
639 }
640
641 let mut best: Option<(&ExtractedEntity, f64)> = None;
643 for ent in entities {
644 let ent_lower = ent.text.to_lowercase();
645 if ent_lower.contains(&span_lower) || span_lower.contains(&ent_lower) {
646 let score = span_lower.len().min(ent_lower.len()) as f64
647 / span_lower.len().max(ent_lower.len()) as f64;
648 if score >= 0.3 {
649 if best.as_ref().is_none_or(|(_, s)| score > *s) {
650 best = Some((ent, score));
651 }
652 }
653 }
654 }
655
656 best.map(|(ent, _)| ent.text.clone())
657}
658
659fn detect_from_to_pattern(
665 text: &str,
666 entities: &[ExtractedEntity],
667 schema: &ExtractionSchema,
668 relations: &mut Vec<ExtractedRelation>,
669 seen: &mut std::collections::HashSet<(String, String, String)>,
670) {
671 let text_lower = text.to_lowercase();
672 let rel_spec = schema.relation_types.get("replaced");
673
674 let mut search_start = 0;
676 while let Some(from_pos) = text_lower[search_start..].find("from ") {
677 let abs_from = search_start + from_pos;
678
679 for old_ent in entities.iter() {
681 if old_ent.span_start < abs_from + 4 || old_ent.span_start > abs_from + 40 {
682 continue;
683 }
684
685 let after_old = old_ent.span_end;
687 if let Some(to_rel_pos) = text_lower[after_old..].find(" to ") {
688 let abs_to = after_old + to_rel_pos;
689
690 for new_ent in entities.iter() {
692 if new_ent.span_start < abs_to + 3 || new_ent.span_start > abs_to + 40 {
693 continue;
694 }
695 if new_ent.text == old_ent.text {
696 continue;
697 }
698 if is_reference_entity(&new_ent.text) || is_reference_entity(&old_ent.text) {
699 continue;
700 }
701
702 let schema_valid = rel_spec
704 .map(|spec| {
705 (spec.head.contains(&new_ent.entity_type)
706 && spec.tail.contains(&old_ent.entity_type))
707 || (spec.head.contains(&old_ent.entity_type)
708 && spec.tail.contains(&new_ent.entity_type))
709 })
710 .unwrap_or(true);
711
712 if !schema_valid {
713 continue;
714 }
715
716 let key = (new_ent.text.clone(), "replaced".to_string(), old_ent.text.clone());
718 if seen.insert(key) {
719 relations.push(ExtractedRelation {
720 head: new_ent.text.clone(),
721 relation: "replaced".to_string(),
722 tail: old_ent.text.clone(),
723 confidence: 0.75,
724 });
725 }
726 }
727 }
728 }
729
730 search_start = abs_from + 5;
731 }
732}
733
734fn is_reference_entity(text: &str) -> bool {
737 if text.starts_with("ADR-") || text.starts_with("ADR ") {
739 return true;
740 }
741 if text == "PR" || text.starts_with("PR #") || text.starts_with("PR:") {
742 return true;
743 }
744 if text.starts_with("Issue #") || text.starts_with("Issue:") {
745 return true;
746 }
747 false
748}
749
750fn determine_direction(
760 relation: &str,
761 head: &ExtractedEntity,
762 tail: &ExtractedEntity,
763 rel_spec: Option<&crate::schema::RelationSpec>,
764 sent_lower: &str,
765 _sent_start: usize,
766) -> (String, String) {
767 let (first, second) = if head.span_start <= tail.span_start {
768 (head, tail)
769 } else {
770 (tail, head)
771 };
772
773 if relation == "replaced" {
775 let first_lower = first.text.to_lowercase();
776 let second_lower = second.text.to_lowercase();
777
778 let from_to = sent_lower.contains(&format!("from {}", first_lower))
780 && (sent_lower.contains(&format!("to {}", second_lower))
781 || sent_lower.contains(&format!("to the {}", second_lower)));
782 if from_to {
783 return (second.text.clone(), first.text.clone());
784 }
785
786 let rev_from_to = sent_lower.contains(&format!("from {}", second_lower))
788 && (sent_lower.contains(&format!("to {}", first_lower))
789 || sent_lower.contains(&format!("to the {}", first_lower)));
790 if rev_from_to {
791 return (first.text.clone(), second.text.clone());
792 }
793
794 if sent_lower.contains("fallback") {
796 if let Some(fb_pos) = sent_lower.find("fallback") {
797 let after_fb = &sent_lower[fb_pos..];
798 let first_in_fb = after_fb.find(&first_lower);
799 let second_in_fb = after_fb.find(&second_lower);
800 match (first_in_fb, second_in_fb) {
801 (Some(_), None) => {
802 return (first.text.clone(), second.text.clone());
804 }
805 (None, Some(_)) => {
806 return (second.text.clone(), first.text.clone());
808 }
809 (Some(fp), Some(sp)) => {
810 if fp < sp {
812 return (first.text.clone(), second.text.clone());
813 } else {
814 return (second.text.clone(), first.text.clone());
815 }
816 }
817 _ => {}
818 }
819 }
820 }
821
822 if let Some(pos) = sent_lower.find("replac") {
824 let after_replace = &sent_lower[pos..];
825 if after_replace.contains(" it ") && after_replace.contains(" with ") {
828 let first_in_replace = after_replace.find(&first_lower);
829 let second_in_replace = after_replace.find(&second_lower);
830 match (first_in_replace, second_in_replace) {
831 (Some(_), None) => {
832 return (first.text.clone(), second.text.clone());
834 }
835 (None, Some(_)) => {
836 return (second.text.clone(), first.text.clone());
838 }
839 _ => {} }
841 }
842
843 let first_in_replace = after_replace.find(&first_lower);
844 let second_in_replace = after_replace.find(&second_lower);
845 if let (Some(fp), Some(sp)) = (first_in_replace, second_in_replace) {
846 if fp < sp {
848 return (second.text.clone(), first.text.clone());
849 } else {
850 return (first.text.clone(), second.text.clone());
851 }
852 }
853 }
854
855 if let Some(pos) = sent_lower.find("in favor of") {
857 let favor_text = &sent_lower[pos..];
858 if favor_text.contains(&second_lower) {
859 return (second.text.clone(), first.text.clone());
860 }
861 if favor_text.contains(&first_lower) {
862 return (first.text.clone(), second.text.clone());
863 }
864 }
865
866 if let Some(rp) = sent_lower.find("replac") {
869 let first_pos = sent_lower.find(&first_lower);
870 let second_pos = sent_lower.find(&second_lower);
871 if let (Some(fp), Some(sp)) = (first_pos, second_pos) {
872 if fp < rp && sp > rp {
873 return (first.text.clone(), second.text.clone());
875 }
876 if sp < rp && fp > rp {
877 return (second.text.clone(), first.text.clone());
878 }
879 }
880 }
881 }
882
883 if relation == "deprecated" {
885 let first_lower = first.text.to_lowercase();
886 let second_lower = second.text.to_lowercase();
887
888 let first_passive = sent_lower.contains(&format!("{} is deprecated", first_lower))
890 || sent_lower.contains(&format!("{} are deprecated", first_lower))
891 || sent_lower.contains(&format!("{}-based", first_lower))
892 && sent_lower.contains("deprecated");
893 let second_passive = sent_lower.contains(&format!("{} is deprecated", second_lower))
894 || sent_lower.contains(&format!("{} are deprecated", second_lower))
895 || sent_lower.contains(&format!("{}-based", second_lower))
896 && sent_lower.contains("deprecated");
897
898 if first_passive && !second_passive {
899 return (second.text.clone(), first.text.clone());
901 }
902 if second_passive && !first_passive {
903 return (first.text.clone(), second.text.clone());
905 }
906 }
907
908 if relation == "depends_on" {
910 let first_lower = first.text.to_lowercase();
911 let second_lower = second.text.to_lowercase();
912
913 let passive_markers = ["used by", "scraped by", "managed by", "orchestrated by", "handled by"];
917 for marker in passive_markers {
918 if let Some(pos) = sent_lower.find(marker) {
919 let first_pos = sent_lower.find(&first_lower);
920 let second_pos = sent_lower.find(&second_lower);
921 if let (Some(fp), Some(sp)) = (first_pos, second_pos) {
922 if fp < pos && sp > pos {
923 return (second.text.clone(), first.text.clone());
925 }
926 if sp < pos && fp > pos {
927 return (first.text.clone(), second.text.clone());
928 }
929 }
930 }
931 }
932
933 if let Some(pos) = sent_lower.find(" via ") {
941 let first_pos = sent_lower.find(&first_lower);
942 let second_pos = sent_lower.find(&second_lower);
943 if let (Some(fp), Some(sp)) = (first_pos, second_pos) {
944 if fp < pos && sp > pos {
945 return (first.text.clone(), second.text.clone());
946 }
947 if sp < pos && fp > pos {
948 return (second.text.clone(), first.text.clone());
949 }
950 }
951 }
952
953 if sent_lower.contains("-based") {
955 let first_pos = sent_lower.find(&first_lower);
956 let second_pos = sent_lower.find(&second_lower);
957 if let Some(based_pos) = sent_lower.find("-based") {
958 if let (Some(fp), Some(sp)) = (first_pos, second_pos) {
959 if fp < based_pos && based_pos <= fp + first_lower.len() + 1 {
961 return (second.text.clone(), first.text.clone());
963 }
964 if sp < based_pos && based_pos <= sp + second_lower.len() + 1 {
965 return (first.text.clone(), second.text.clone());
966 }
967 }
968 }
969 }
970
971 let consumer_types = ["Service", "Component"];
972 let provider_types = ["Database", "Infrastructure", "Language"];
973
974 let first_is_consumer = consumer_types.contains(&first.entity_type.as_str());
975 let second_is_consumer = consumer_types.contains(&second.entity_type.as_str());
976 let first_is_provider = provider_types.contains(&first.entity_type.as_str());
977 let second_is_provider = provider_types.contains(&second.entity_type.as_str());
978
979 if first_is_consumer && second_is_provider {
981 return (first.text.clone(), second.text.clone());
982 }
983 if second_is_consumer && first_is_provider {
984 return (second.text.clone(), first.text.clone());
985 }
986
987 let first_is_service = first.entity_type == "Service"
990 || first.text.ends_with("Service");
991 let second_is_service = second.entity_type == "Service"
992 || second.text.ends_with("Service");
993
994 if first_is_service && !second_is_service {
995 return (first.text.clone(), second.text.clone());
996 }
997 if second_is_service && !first_is_service {
998 return (second.text.clone(), first.text.clone());
999 }
1000 }
1001
1002 if let Some(spec) = rel_spec {
1004 let fwd_valid = spec.head.contains(&head.entity_type)
1005 && spec.tail.contains(&tail.entity_type);
1006 let rev_valid = spec.head.contains(&tail.entity_type)
1007 && spec.tail.contains(&head.entity_type);
1008 match (fwd_valid, rev_valid) {
1009 (true, false) => return (head.text.clone(), tail.text.clone()),
1010 (false, true) => return (tail.text.clone(), head.text.clone()),
1011 _ => {}
1012 }
1013 }
1014
1015 (first.text.clone(), second.text.clone())
1017}
1018
1019fn split_sentences(text: &str) -> Vec<(usize, usize)> {
1021 let mut ranges = Vec::new();
1022 let bytes = text.as_bytes();
1023 let len = text.len();
1024 let mut seg_start = 0usize;
1025 let mut i = 0usize;
1026
1027 while i < len {
1028 let boundary = if i + 1 < len
1029 && (bytes[i] == b'.' || bytes[i] == b'!' || bytes[i] == b'?')
1030 && bytes[i + 1] == b' '
1031 {
1032 Some(i + 1)
1033 } else if i + 1 < len && bytes[i] == b'\n' && bytes[i + 1] == b'\n' {
1034 Some(i)
1035 } else {
1036 None
1037 };
1038
1039 if let Some(end) = boundary {
1040 ranges.push((seg_start, end));
1041 seg_start = end + 1;
1042 i = seg_start;
1043 continue;
1044 }
1045 i += 1;
1046 }
1047 if seg_start < len {
1048 ranges.push((seg_start, len));
1049 }
1050 if ranges.is_empty() {
1051 ranges.push((0, len));
1052 }
1053 ranges
1054}
1055
1056#[derive(Debug, thiserror::Error)]
1057pub enum RelError {
1058 #[error("invalid path: {0}")]
1059 InvalidPath(String),
1060
1061 #[error("failed to load model: {0}")]
1062 ModelLoad(String),
1063
1064 #[error("inference error: {0}")]
1065 Inference(String),
1066}