use plg_frontend::{Parser, ProgramDirectives};
use plg_shared::{Clause, StringInterner, Term};
use tower_lsp::lsp_types::Position;
pub fn position_to_byte_offset(content: &str, position: Position) -> Option<usize> {
let line_start = nth_line_byte_offset(content, position.line as usize)?;
let line_end = content[line_start..]
.find('\n')
.map(|i| line_start + i)
.unwrap_or(content.len());
let line = &content[line_start..line_end];
let target = position.character as usize;
let mut consumed_utf16: usize = 0;
for (offset, ch) in line.char_indices() {
if consumed_utf16 >= target {
return Some(line_start + offset);
}
consumed_utf16 += ch.len_utf16();
}
Some(line_end)
}
fn nth_line_byte_offset(content: &str, n: usize) -> Option<usize> {
if n == 0 {
return Some(0);
}
let mut count = 0;
for (i, b) in content.bytes().enumerate() {
if b == b'\n' {
count += 1;
if count == n {
return Some(i + 1);
}
}
}
None
}
pub fn parse_best_effort(
content: &str,
) -> Option<(Vec<Clause>, ProgramDirectives, StringInterner)> {
let mut interner = StringInterner::new();
if let Ok((c, d)) = Parser::parse_program_with_directives(content, &mut interner) {
return Some((c, d, interner));
}
let mut interner = StringInterner::new();
let mut all_clauses: Vec<Clause> = Vec::new();
let mut all_directives = ProgramDirectives::default();
for chunk in clause_chunks(content) {
if let Ok((c, d)) = Parser::parse_program_with_directives(chunk, &mut interner) {
all_clauses.extend(c);
all_directives.dynamic.extend(d.dynamic);
}
}
if all_clauses.is_empty() && all_directives.dynamic.is_empty() {
None
} else {
Some((all_clauses, all_directives, interner))
}
}
fn clause_chunks(content: &str) -> Vec<&str> {
let bytes = content.as_bytes();
let mut chunks: Vec<&str> = Vec::new();
let mut chunk_start = 0;
let mut i = 0;
while i < bytes.len() {
if bytes[i] == b'.' {
let is_terminator = matches!(
bytes.get(i + 1),
None | Some(b' ') | Some(b'\t') | Some(b'\r') | Some(b'\n')
);
if is_terminator {
let chunk_end = i + 1;
if chunk_end > chunk_start {
chunks.push(&content[chunk_start..chunk_end]);
}
chunk_start = chunk_end;
i += 1;
continue;
}
}
if bytes[i] == b'\n'
&& let Some(&next) = bytes.get(i + 1)
&& (next.is_ascii_lowercase() || next == b'_' || next == b':')
{
let chunk_end = i + 1;
if chunk_end > chunk_start {
chunks.push(&content[chunk_start..chunk_end]);
}
chunk_start = chunk_end;
}
i += 1;
}
chunks
}
pub fn word_at_position(content: &str, position: Position) -> Option<String> {
let byte_pos = position_to_byte_offset(content, position)?;
let line_start = content[..byte_pos]
.rfind('\n')
.map(|nl| nl + 1)
.unwrap_or(0);
let line_end = content[byte_pos..]
.find('\n')
.map(|i| byte_pos + i)
.unwrap_or(content.len());
let line = &content[line_start..line_end];
let col = byte_pos - line_start;
let is_id = |c: char| c.is_alphanumeric() || c == '_';
let prefix = &line[..col];
let start_offset_in_line = prefix
.char_indices()
.rev()
.take_while(|(_, c)| is_id(*c))
.last()
.map(|(i, _)| i)
.unwrap_or(col);
let suffix = &line[col..];
let end_offset_in_line = suffix
.char_indices()
.take_while(|(_, c)| is_id(*c))
.last()
.map(|(i, c)| col + i + c.len_utf8())
.unwrap_or(col);
if start_offset_in_line == end_offset_in_line {
return None;
}
Some(line[start_offset_in_line..end_offset_in_line].to_string())
}
pub fn predicate_indicator(head: &Term, interner: &StringInterner) -> Option<(String, usize)> {
match head {
Term::Atom(id) => Some((interner.resolve(*id).to_string(), 0)),
Term::Compound { functor, args } => {
Some((interner.resolve(*functor).to_string(), args.len()))
}
_ => None,
}
}
#[cfg(test)]
mod tests {
use super::*;
fn pos(line: u32, col: u32) -> Position {
Position {
line,
character: col,
}
}
#[test]
fn position_to_byte_offset_clamps_past_line_end() {
let off = position_to_byte_offset("hello\nworld", pos(0, 99));
assert_eq!(off, Some(5));
}
#[test]
fn position_to_byte_offset_handles_emoji_surrogate_pair() {
let content = "🐱x";
assert_eq!(position_to_byte_offset(content, pos(0, 2)), Some(4));
assert_eq!(position_to_byte_offset(content, pos(0, 3)), Some(5));
}
#[test]
fn position_to_byte_offset_handles_multibyte_ascii_mix() {
let content = "% é\nfoo";
let line1_start = position_to_byte_offset(content, pos(1, 0)).unwrap();
let at_3 = position_to_byte_offset(content, pos(1, 3)).unwrap();
assert_eq!(at_3 - line1_start, 3);
}
#[test]
fn position_to_byte_offset_returns_none_for_line_past_end() {
assert_eq!(position_to_byte_offset("one line", pos(5, 0)), None);
}
#[test]
fn parse_best_effort_recovers_after_unfinished_trailing_clause() {
let src = "p(foo).\nq(bar) :- mem";
let (clauses, _, interner) = parse_best_effort(src).expect("recovery");
let names: Vec<_> = clauses
.iter()
.filter_map(|c| predicate_indicator(&c.head, &interner))
.map(|(n, _)| n)
.collect();
assert!(names.iter().any(|n| n == "p"));
}
#[test]
fn parse_best_effort_recovers_above_and_below_a_broken_middle_clause() {
let src = "above(X) :- field(X).\nbroken :- mem\nbelow(Y).\n";
let (clauses, _, interner) = parse_best_effort(src).expect("recovery");
let names: Vec<_> = clauses
.iter()
.filter_map(|c| predicate_indicator(&c.head, &interner))
.map(|(n, _)| n)
.collect();
assert!(
names.iter().any(|n| n == "above"),
"expected `above`, got {:?}",
names
);
assert!(
names.iter().any(|n| n == "below"),
"expected `below` (below the broken clause), got {:?}",
names
);
assert!(
!names.iter().any(|n| n == "broken"),
"broken clause must not parse, got {:?}",
names
);
}
#[test]
fn parse_best_effort_handles_multiple_broken_clauses() {
let src = "a(1).\nb :- ?\nc(2).\nd :- $\ne(3).\n";
let (clauses, _, interner) = parse_best_effort(src).expect("recovery");
let names: Vec<_> = clauses
.iter()
.filter_map(|c| predicate_indicator(&c.head, &interner))
.map(|(n, _)| n)
.collect();
assert!(names.iter().any(|n| n == "a"), "got {:?}", names);
assert!(names.iter().any(|n| n == "c"), "got {:?}", names);
assert!(names.iter().any(|n| n == "e"), "got {:?}", names);
}
}