use std::collections::HashMap;
use crate::bib::{ParsedEntry, Span};
pub fn parse_ris_full(input: &str) -> Result<Vec<ParsedEntry>, String> {
let records = parse_records(input)?;
if records.is_empty() {
return Err("RIS file contains no records".to_string());
}
let mut entries = Vec::new();
for record in records {
if let Some(entry) = extract_full_entry(&record)? {
entries.push(entry);
}
}
Ok(entries)
}
pub fn parse_ris_entries(input: &str) -> Result<Vec<(String, Span)>, String> {
let records = parse_records(input)?;
if records.is_empty() {
return Err("RIS file contains no records".to_string());
}
let mut entries = Vec::new();
for record in records {
if let Some((id, span)) = extract_id(&record)? {
entries.push((id, span));
}
}
Ok(entries)
}
pub fn validate_ris(input: &str) -> Result<(), String> {
let records = parse_records(input)?;
if records.is_empty() {
return Err("RIS file contains no records".to_string());
}
Ok(())
}
#[derive(Debug)]
struct RisRecord {
tags: Vec<RisTag>,
#[allow(dead_code)] start: usize,
end: usize,
}
#[derive(Debug)]
struct RisTag {
name: String,
value: String,
value_start: usize,
value_end: usize,
}
fn parse_records(input: &str) -> Result<Vec<RisRecord>, String> {
let mut records = Vec::new();
let mut current_record: Option<RisRecord> = None;
let mut line_start = 0;
for line in input.lines() {
let line_end = line_start + line.len();
if let Some(tag) = parse_tag_line(line, line_start)? {
if tag.name == "TY" {
if let Some(_record) = current_record.take() {
return Err("RIS record missing ER tag".to_string());
}
current_record = Some(RisRecord {
tags: vec![tag],
start: line_start,
end: line_end,
});
}
else if tag.name == "ER" {
match current_record.as_mut() {
Some(record) => {
record.tags.push(tag);
record.end = line_end;
records.push(current_record.take().unwrap());
}
None => {
return Err("RIS record has ER tag without TY tag".to_string());
}
}
}
else {
match current_record.as_mut() {
Some(record) => {
record.tags.push(tag);
record.end = line_end;
}
None => {
return Err("RIS record contains tags outside TY/ER block".to_string());
}
}
}
}
else if line.starts_with(|c: char| c.is_whitespace()) && !line.trim().is_empty() {
match current_record.as_mut() {
Some(record) => {
if let Some(last_tag) = record.tags.last_mut()
&& last_tag.name != "ID"
&& last_tag.name != "TY"
&& last_tag.name != "ER"
{
if !last_tag.value.is_empty() {
last_tag.value.push(' ');
}
last_tag.value.push_str(line.trim());
}
}
None => {
return Err("RIS record contains invalid content".to_string());
}
}
}
else if line.trim().is_empty() {
}
else if !line.trim().is_empty() {
return Err("RIS record contains invalid content".to_string());
}
line_start = line_end + 1; }
if current_record.is_some() {
return Err("RIS record missing ER tag".to_string());
}
Ok(records)
}
fn parse_tag_line(line: &str, line_start: usize) -> Result<Option<RisTag>, String> {
let trimmed = line.trim();
if trimmed.is_empty() {
return Ok(None);
}
let bytes = trimmed.as_bytes();
if bytes.len() < 2 {
return Ok(None);
}
if !bytes[0].is_ascii_uppercase() || !bytes[1].is_ascii_uppercase() {
return Ok(None);
}
let name = &trimmed[0..2];
let rest = &trimmed[2..];
let rest = rest.trim_start();
if !rest.starts_with('-') {
return Ok(None);
}
let value = rest[1..].trim_start().to_string();
let value_offset = line.find(&value).unwrap_or(0);
let value_start = line_start + value_offset;
let value_end = value_start + value.len();
Ok(Some(RisTag {
name: name.to_string(),
value,
value_start,
value_end,
}))
}
fn extract_full_entry(record: &RisRecord) -> Result<Option<ParsedEntry>, String> {
let mut has_ty = false;
let mut has_er = false;
let mut id_value: Option<(String, Span)> = None;
let mut entry_type: Option<String> = None;
let mut fields: HashMap<String, String> = HashMap::new();
for tag in &record.tags {
match tag.name.as_str() {
"TY" => {
has_ty = true;
entry_type = Some(tag.value.clone());
}
"ER" => {
has_er = true;
}
"ID" if id_value.is_none() && !tag.value.is_empty() => {
id_value = Some((
tag.value.clone(),
Span {
start: tag.value_start,
end: tag.value_end,
},
));
}
_ => {
fields.insert(tag.name.clone(), tag.value.clone());
}
}
}
if !has_ty {
return Err("RIS record missing TY tag".to_string());
}
if !has_er {
return Err("RIS record missing ER tag".to_string());
}
match id_value {
Some((id, span)) => Ok(Some((id, entry_type, fields, span))),
None => Ok(None),
}
}
fn extract_id(record: &RisRecord) -> Result<Option<(String, Span)>, String> {
let mut has_ty = false;
let mut has_er = false;
let mut id_value: Option<(String, Span)> = None;
for tag in &record.tags {
match tag.name.as_str() {
"TY" => has_ty = true,
"ER" => has_er = true,
"ID" => {
if id_value.is_none() && !tag.value.is_empty() {
id_value = Some((
tag.value.clone(),
Span {
start: tag.value_start,
end: tag.value_end,
},
));
}
}
_ => {}
}
}
if !has_ty {
return Err("RIS record missing TY tag".to_string());
}
if !has_er {
return Err("RIS record missing ER tag".to_string());
}
Ok(id_value)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_basic_ris() {
let input = "TY - JOUR
ID - Smith2020
AU - Smith, John
TI - Test Article
PY - 2020
ER -
";
let result = parse_ris_full(input).unwrap();
assert_eq!(result.len(), 1);
let (id, entry_type, fields, _span) = &result[0];
assert_eq!(id, "Smith2020");
assert_eq!(entry_type, &Some("JOUR".to_string()));
assert_eq!(fields.get("AU"), Some(&"Smith, John".to_string()));
assert_eq!(fields.get("TI"), Some(&"Test Article".to_string()));
assert_eq!(fields.get("PY"), Some(&"2020".to_string()));
}
#[test]
fn test_multiline_value() {
let input = "TY - JOUR
ID - Test
TI - First line
Second line
Third line
ER -
";
let result = parse_ris_full(input).unwrap();
assert_eq!(result.len(), 1);
let (_id, _entry_type, fields, _span) = &result[0];
assert_eq!(
fields.get("TI"),
Some(&"First line Second line Third line".to_string())
);
}
#[test]
fn test_missing_er() {
let input = "TY - JOUR
ID - Test
";
let result = parse_ris_full(input);
assert!(result.is_err());
assert!(result.unwrap_err().contains("missing ER"));
}
#[test]
fn test_missing_ty() {
let input = "ID - Test
ER -
";
let result = parse_ris_full(input);
assert!(result.is_err());
}
}