lattix 0.7.0

Knowledge graph substrate: core types + basic algorithms + formats
//! Triple type for knowledge graphs.
//!
//! A triple represents a (subject, predicate, object) statement.

use crate::{EntityId, Error, RelationType, Result};
use serde::{Deserialize, Serialize};
use std::fmt;

/// A (subject, predicate, object) triple.
///
/// This is the fundamental unit of a knowledge graph.
///
/// # Example
///
/// ```rust
/// use lattix::Triple;
///
/// let triple = Triple::new("Apple", "founded_by", "Steve Jobs");
/// assert_eq!(triple.subject().as_str(), "Apple");
/// assert_eq!(triple.predicate().as_str(), "founded_by");
/// assert_eq!(triple.object().as_str(), "Steve Jobs");
/// ```
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct Triple {
    /// Subject entity.
    subject: EntityId,

    /// Predicate (relation type).
    predicate: RelationType,

    /// Object entity.
    object: EntityId,

    /// Optional confidence score in `[0.0, 1.0]`. Defaults to `None`, which is
    /// treated as full confidence (`1.0`) when the triple is added to a
    /// [`KnowledgeGraph`].
    confidence: Option<f32>,

    /// Source document or provenance.
    source: Option<String>,
}

impl Triple {
    /// Create a new triple.
    pub fn new(
        subject: impl Into<EntityId>,
        predicate: impl Into<RelationType>,
        object: impl Into<EntityId>,
    ) -> Self {
        Self {
            subject: subject.into(),
            predicate: predicate.into(),
            object: object.into(),
            confidence: None,
            source: None,
        }
    }

    /// Get the subject entity.
    pub fn subject(&self) -> &EntityId {
        &self.subject
    }

    /// Get the predicate (relation type).
    pub fn predicate(&self) -> &RelationType {
        &self.predicate
    }

    /// Get the object entity.
    pub fn object(&self) -> &EntityId {
        &self.object
    }

    /// Get the confidence score.
    pub fn confidence(&self) -> Option<f32> {
        self.confidence
    }

    /// Get the source/provenance.
    pub fn source(&self) -> Option<&str> {
        self.source.as_deref()
    }

    /// Set confidence score.
    pub fn with_confidence(mut self, confidence: f32) -> Self {
        self.confidence = Some(confidence.clamp(0.0, 1.0));
        self
    }

    /// Set source/provenance.
    pub fn with_source(mut self, source: impl Into<String>) -> Self {
        self.source = Some(source.into());
        self
    }

    /// Parse from N-Triples format.
    ///
    /// Format: `<subject> <predicate> <object> .`
    ///
    /// # Example
    ///
    /// ```rust
    /// use lattix::Triple;
    ///
    /// let line = r#"<http://example.org/Apple> <http://example.org/founded_by> <http://example.org/Steve_Jobs> ."#;
    /// let triple = Triple::from_ntriples(line).unwrap();
    /// ```
    pub fn from_ntriples(line: &str) -> Result<Self> {
        let line = line.trim();
        if line.is_empty() || line.starts_with('#') {
            return Err(Error::ParseTriple("Empty or comment line".into()));
        }

        // Simple N-Triples parser
        // Format: <subject> <predicate> <object> .
        // Handles IRIs (<...>), blank nodes (_:xxx), and literals ("...")
        let mut parts = Vec::new();
        let mut current = String::new();
        let mut in_uri = false;
        let mut in_literal = false;
        let mut in_bnode = false;
        let mut escape_next = false;

        for c in line.chars() {
            if escape_next {
                current.push(c);
                escape_next = false;
                continue;
            }

            match c {
                '\\' => {
                    escape_next = true;
                    current.push(c);
                }
                '<' if !in_literal && !in_bnode && current.is_empty() => {
                    in_uri = true;
                }
                // Datatype IRI in literal suffix: "42"^^<xsd:integer>
                '<' if !in_literal && !in_bnode && !current.is_empty() => {
                    current.push(c);
                }
                '>' if in_uri && !in_literal => {
                    in_uri = false;
                    parts.push(current.clone());
                    current.clear();
                }
                // Closing > for datatype IRI in literal suffix
                '>' if !in_uri && !in_literal && !current.is_empty() => {
                    current.push(c);
                }
                '"' if !in_uri && !in_bnode => {
                    in_literal = !in_literal;
                    current.push(c);
                }
                // Blank node: starts with _: when not inside another term
                '_' if !in_uri && !in_literal && !in_bnode && current.is_empty() => {
                    in_bnode = true;
                    current.push(c);
                }
                // Blank node ends at whitespace
                ' ' | '\t' if in_bnode => {
                    in_bnode = false;
                    parts.push(current.clone());
                    current.clear();
                }
                // Whitespace outside any term: flush accumulated token (literal suffix, etc.)
                ' ' | '\t' if !in_uri && !in_literal && !in_bnode && !current.is_empty() => {
                    parts.push(current.clone());
                    current.clear();
                }
                '.' if !in_uri && !in_literal && !in_bnode && current.is_empty() => {
                    // End of triple
                    break;
                }
                _ if in_uri || in_literal || in_bnode => {
                    current.push(c);
                }
                // Accumulate literal suffixes (@en, ^^<datatype>) after closing quote
                _ if !current.is_empty() => {
                    current.push(c);
                }
                _ => {}
            }
        }

        if parts.len() < 3 {
            return Err(Error::InvalidNTriples(format!(
                "Expected 3 parts, got {}: {}",
                parts.len(),
                line
            )));
        }

        Ok(Self::new(
            parts[0].clone(),
            parts[1].clone(),
            parts[2].clone(),
        ))
    }

    /// Convert to N-Triples format.
    pub fn to_ntriples(&self) -> String {
        // Minimal RDF-ish rendering:
        // - Named nodes (IRIs) are written as `<iri>`
        // - Blank nodes are written as `_:id`
        // - Literals are written as-is if `self.object` already starts with `"`
        //
        // This keeps `Triple` usable in both “simple KG” mode and the `formats` module
        // (which may store literals in the object position).
        fn render_iri_or_blank(s: &str) -> String {
            if s.starts_with("_:") {
                s.to_string()
            } else if s.starts_with('<') && s.ends_with('>') {
                // Accept bracketed form defensively.
                s.to_string()
            } else {
                format!("<{}>", s)
            }
        }
        fn render_object(s: &str) -> String {
            if s.starts_with('"') || s.starts_with("_:") || (s.starts_with('<') && s.ends_with('>'))
            {
                s.to_string()
            } else {
                format!("<{}>", s)
            }
        }

        format!(
            "{} {} {} .",
            render_iri_or_blank(self.subject.as_str()),
            render_iri_or_blank(self.predicate.as_str()),
            render_object(self.object.as_str())
        )
    }
}

impl fmt::Display for Triple {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        write!(f, "({}, {}, {})", self.subject, self.predicate, self.object)
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_triple_creation() {
        let t = Triple::new("Apple", "founded_by", "Steve Jobs");
        assert_eq!(t.subject().as_str(), "Apple");
        assert_eq!(t.predicate().as_str(), "founded_by");
        assert_eq!(t.object().as_str(), "Steve Jobs");
    }

    #[test]
    fn test_ntriples_roundtrip() {
        let original = Triple::new(
            "http://example.org/Apple",
            "http://example.org/founded_by",
            "http://example.org/Steve_Jobs",
        );

        let ntriples = original.to_ntriples();
        let parsed = Triple::from_ntriples(&ntriples).unwrap();

        assert_eq!(original.subject(), parsed.subject());
        assert_eq!(original.predicate(), parsed.predicate());
        assert_eq!(original.object(), parsed.object());
    }

    #[test]
    fn test_parse_ntriples() {
        let line = r#"<http://example.org/Apple> <http://example.org/type> <http://example.org/Company> ."#;
        let triple = Triple::from_ntriples(line).unwrap();

        assert_eq!(triple.subject().as_str(), "http://example.org/Apple");
        assert_eq!(triple.predicate().as_str(), "http://example.org/type");
        assert_eq!(triple.object().as_str(), "http://example.org/Company");
    }
}