use anyhow::Result;
use super::context::TrigParserState;
pub fn strip_inline_comment(line: &str) -> &str {
let mut in_string = false;
let mut escape_next = false;
let mut byte_index = 0;
for ch in line.chars() {
if escape_next {
escape_next = false;
byte_index += ch.len_utf8();
continue;
}
match ch {
'\\' if in_string => {
escape_next = true;
byte_index += ch.len_utf8();
}
'"' => {
in_string = !in_string;
byte_index += ch.len_utf8();
}
'#' if !in_string => {
return &line[..byte_index];
}
_ => {
byte_index += ch.len_utf8();
}
}
}
line
}
pub fn is_complete_turtle_statement(statement: &str) -> bool {
let trimmed = statement.trim();
if trimmed.starts_with("@prefix") || trimmed.starts_with("@base") {
return trimmed.ends_with('.');
}
let mut in_string = false;
let mut escape_next = false;
let mut quoted_triple_depth: i32 = 0;
let mut annotation_depth: i32 = 0;
let mut chars = statement.chars().peekable();
while let Some(ch) = chars.next() {
if escape_next {
escape_next = false;
continue;
}
match ch {
'\\' if in_string => escape_next = true,
'"' => in_string = !in_string,
'<' if !in_string && chars.peek() == Some(&'<') => {
chars.next(); quoted_triple_depth += 1;
}
'>' if !in_string && quoted_triple_depth > 0 && chars.peek() == Some(&'>') => {
chars.next(); quoted_triple_depth = quoted_triple_depth.saturating_sub(1);
}
'{' if !in_string && quoted_triple_depth == 0 && chars.peek() == Some(&'|') => {
chars.next(); annotation_depth += 1;
}
'|' if !in_string
&& quoted_triple_depth == 0
&& annotation_depth > 0
&& chars.peek() == Some(&'}') =>
{
chars.next(); annotation_depth = annotation_depth.saturating_sub(1);
}
'.' if !in_string && quoted_triple_depth == 0 && annotation_depth == 0 => {
if chars.peek().is_some_and(|c| c.is_ascii_digit()) {
continue;
}
return true;
}
_ => {}
}
}
false
}
pub fn is_complete_trig_statement(statement: &str, state: &mut TrigParserState) -> bool {
let trimmed = statement.trim();
if trimmed.starts_with("@prefix") || trimmed.starts_with("@base") {
return trimmed.ends_with('.');
}
if trimmed == "}" {
return true;
}
let mut brace_count: i32 = 0;
let mut in_string = false;
let mut escape_next = false;
let mut quoted_triple_depth: i32 = 0;
let mut chars = statement.chars().peekable();
while let Some(ch) = chars.next() {
if escape_next {
escape_next = false;
continue;
}
match ch {
'\\' if in_string => escape_next = true,
'"' => in_string = !in_string,
'<' if !in_string && chars.peek() == Some(&'<') => {
chars.next(); quoted_triple_depth += 1;
}
'>' if !in_string && quoted_triple_depth > 0 && chars.peek() == Some(&'>') => {
chars.next(); quoted_triple_depth = quoted_triple_depth.saturating_sub(1);
}
'{' if !in_string && quoted_triple_depth == 0 => {
brace_count += 1;
if !state.in_graph_block {
state.parsing_graph_name = false;
}
}
'}' if !in_string && quoted_triple_depth == 0 => {
brace_count = brace_count.saturating_sub(1);
}
'.' if !in_string && quoted_triple_depth == 0 && brace_count == 0 => {
if chars.peek().is_some_and(|c| c.is_ascii_digit()) {
continue;
}
return true;
}
_ => {}
}
}
if brace_count > 0 && !state.in_graph_block {
return true;
}
if brace_count == 0 && state.in_graph_block && trimmed.ends_with('}') {
return true;
}
false
}
pub fn tokenize_triple(pattern: &str) -> Result<Vec<String>> {
let mut tokens = Vec::new();
let mut current_token = String::new();
let mut chars = pattern.chars().peekable();
let mut depth = 0;
let mut in_string = false;
let mut escape_next = false;
while let Some(ch) = chars.next() {
if escape_next {
current_token.push(ch);
escape_next = false;
continue;
}
match ch {
'\\' if in_string => {
escape_next = true;
current_token.push(ch);
}
'"' => {
in_string = !in_string;
current_token.push(ch);
}
'<' if !in_string && chars.peek() == Some(&'<') => {
chars.next(); depth += 1;
current_token.push_str("<<");
}
'>' if !in_string && chars.peek() == Some(&'>') => {
chars.next(); depth -= 1;
current_token.push_str(">>");
}
' ' | '\t' if !in_string && depth == 0 => {
if !current_token.trim().is_empty() {
tokens.push(current_token.trim().to_string());
current_token.clear();
}
}
_ => {
current_token.push(ch);
}
}
}
if !current_token.trim().is_empty() {
tokens.push(current_token.trim().to_string());
}
Ok(tokens)
}
pub fn tokenize_quad(pattern: &str) -> Result<Vec<String>> {
let mut tokens = Vec::new();
let mut current_token = String::new();
let mut chars = pattern.chars().peekable();
let mut depth = 0;
let mut in_string = false;
let mut escape_next = false;
while let Some(ch) = chars.next() {
if escape_next {
current_token.push(ch);
escape_next = false;
continue;
}
match ch {
'\\' if in_string => {
escape_next = true;
current_token.push(ch);
}
'"' => {
in_string = !in_string;
current_token.push(ch);
}
'<' if !in_string && chars.peek() == Some(&'<') => {
chars.next(); depth += 1;
current_token.push_str("<<");
}
'>' if !in_string && chars.peek() == Some(&'>') => {
chars.next(); depth -= 1;
current_token.push_str(">>");
}
' ' | '\t' if !in_string && depth == 0 => {
if !current_token.trim().is_empty() {
tokens.push(current_token.trim().to_string());
current_token.clear();
}
}
_ => {
current_token.push(ch);
}
}
}
if !current_token.trim().is_empty() {
tokens.push(current_token.trim().to_string());
}
Ok(tokens)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_strip_inline_comment() {
assert_eq!(strip_inline_comment("ex:foo ex:bar ."), "ex:foo ex:bar .");
assert_eq!(
strip_inline_comment("ex:foo ex:bar . # comment"),
"ex:foo ex:bar . "
);
assert_eq!(
strip_inline_comment("ex:foo ex:bar \"value # not a comment\" ."),
"ex:foo ex:bar \"value # not a comment\" ."
);
assert_eq!(
strip_inline_comment("ex:foo ex:bar \"value\\\" # still in string\" . # comment"),
"ex:foo ex:bar \"value\\\" # still in string\" . "
);
}
#[test]
fn test_is_complete_turtle_statement() {
assert!(is_complete_turtle_statement("ex:alice ex:knows ex:bob ."));
assert!(is_complete_turtle_statement(
"@prefix ex: <http://example.org/> ."
));
assert!(!is_complete_turtle_statement("ex:alice ex:knows ex:bob"));
assert!(!is_complete_turtle_statement(
"@prefix ex: <http://example.org/>"
));
assert!(is_complete_turtle_statement(
"<< ex:alice ex:knows ex:bob >> ex:certainty 0.9 ."
));
assert!(!is_complete_turtle_statement(
"<< ex:alice ex:knows ex:bob >> ex:certainty 0.9"
));
assert!(is_complete_turtle_statement(
"ex:alice ex:knows ex:bob {| ex:since 2020 |} ."
));
assert!(!is_complete_turtle_statement(
"ex:alice ex:knows ex:bob {| ex:since 2020 |}"
));
}
#[test]
fn test_tokenize_triple() {
let tokens = tokenize_triple("ex:alice ex:knows ex:bob").unwrap();
assert_eq!(tokens, vec!["ex:alice", "ex:knows", "ex:bob"]);
let tokens = tokenize_triple("<< ex:alice ex:knows ex:bob >> ex:certainty 0.9").unwrap();
assert_eq!(
tokens,
vec!["<< ex:alice ex:knows ex:bob >>", "ex:certainty", "0.9"]
);
let tokens = tokenize_triple("ex:alice ex:name \"Alice Wonder\"").unwrap();
assert_eq!(tokens, vec!["ex:alice", "ex:name", "\"Alice Wonder\""]);
}
#[test]
fn test_tokenize_quad() {
let tokens = tokenize_quad("ex:alice ex:knows ex:bob ex:graph1").unwrap();
assert_eq!(tokens, vec!["ex:alice", "ex:knows", "ex:bob", "ex:graph1"]);
let tokens =
tokenize_quad("<< ex:alice ex:knows ex:bob >> ex:certainty 0.9 ex:graph1").unwrap();
assert_eq!(
tokens,
vec![
"<< ex:alice ex:knows ex:bob >>",
"ex:certainty",
"0.9",
"ex:graph1"
]
);
}
#[test]
fn test_is_complete_trig_statement() {
let mut state = TrigParserState::new();
assert!(is_complete_trig_statement(
"ex:alice ex:knows ex:bob .",
&mut state
));
assert!(is_complete_trig_statement(
"@prefix ex: <http://example.org/> .",
&mut state
));
state = TrigParserState::new();
assert!(is_complete_trig_statement("ex:graph1 {", &mut state));
state = TrigParserState::new();
state.in_graph_block = true;
assert!(is_complete_trig_statement("}", &mut state));
state = TrigParserState::new();
assert!(!is_complete_trig_statement(
"ex:alice ex:knows ex:bob",
&mut state
));
}
}