use std::collections::{HashMap, HashSet};
#[derive(Debug, Clone)]
pub struct VennSet {
pub id: String,
pub label: Option<String>,
}
#[derive(Debug, Clone)]
pub struct VennIntersection {
pub sets: Vec<String>,
pub label: Option<String>,
}
#[derive(Debug, Clone)]
pub struct VennTextNode {
#[allow(dead_code)]
pub sets: Vec<String>, #[allow(dead_code)]
pub id: String,
#[allow(dead_code)]
pub label: Option<String>,
}
#[derive(Debug, Clone)]
pub struct VennStyleEntry {
pub targets: Vec<String>, pub styles: HashMap<String, String>,
}
#[derive(Debug)]
pub struct VennDiagram {
pub title: Option<String>,
pub sets: Vec<VennSet>,
pub intersections: Vec<VennIntersection>,
#[allow(dead_code)]
pub text_nodes: Vec<VennTextNode>,
pub style_entries: Vec<VennStyleEntry>,
}
pub fn parse(input: &str) -> crate::error::ParseResult<VennDiagram> {
let mut title: Option<String> = None;
let mut sets: Vec<VennSet> = Vec::new();
let mut intersections: Vec<VennIntersection> = Vec::new();
let mut text_nodes: Vec<VennTextNode> = Vec::new();
let mut style_entries: Vec<VennStyleEntry> = Vec::new();
let mut known_sets: HashSet<String> = HashSet::new();
let mut in_header = true;
for raw in input.lines() {
let line = if let Some(p) = raw.find("%%") {
&raw[..p]
} else {
raw
};
let trimmed = line.trim();
if trimmed.is_empty() {
continue;
}
if in_header {
if trimmed.starts_with("venn") {
in_header = false;
}
continue;
}
if let Some(rest) = trimmed
.strip_prefix("title ")
.or_else(|| trimmed.strip_prefix("title\t"))
{
title = Some(rest.trim().to_string());
continue;
}
if trimmed == "title" {
title = Some(String::new());
continue;
}
if trimmed.starts_with("accTitle") || trimmed.starts_with("accDescr") {
continue;
}
if let Some(rest) = trimmed
.strip_prefix("set ")
.or_else(|| trimmed.strip_prefix("set\t"))
{
let (id, label, _size) = parse_set_line(rest.trim());
let norm_id = normalize_text(&id);
known_sets.insert(norm_id.clone());
sets.push(VennSet {
id: norm_id,
label: label.map(|l| normalize_text(&l)),
});
continue;
}
if let Some(rest) = trimmed
.strip_prefix("union ")
.or_else(|| trimmed.strip_prefix("union\t"))
{
let (ids, label, _size) = parse_union_line(rest.trim());
let mut norm_ids: Vec<String> = ids.iter().map(|id| normalize_text(id)).collect();
norm_ids.sort();
intersections.push(VennIntersection {
sets: norm_ids,
label: label.map(|l| normalize_text(&l)),
});
continue;
}
if let Some(rest) = trimmed
.strip_prefix("text ")
.or_else(|| trimmed.strip_prefix("text\t"))
{
if let Some(tn) = parse_text_line(rest.trim()) {
text_nodes.push(tn);
}
continue;
}
if let Some(rest) = trimmed
.strip_prefix("style ")
.or_else(|| trimmed.strip_prefix("style\t"))
{
if let Some(se) = parse_style_line(rest.trim()) {
style_entries.push(se);
}
continue;
}
}
crate::error::ParseResult::ok(VennDiagram {
title,
sets,
intersections,
text_nodes,
style_entries,
})
}
fn parse_set_line(s: &str) -> (String, Option<String>, Option<f64>) {
parse_id_label_size(s)
}
fn parse_union_line(s: &str) -> (Vec<String>, Option<String>, Option<f64>) {
let (ids_part, rest) = split_ids_from_rest(s);
let ids: Vec<String> = ids_part
.split(',')
.map(|p| p.trim().trim_matches('"').to_string())
.filter(|p| !p.is_empty())
.collect();
let (label, size) = parse_label_size(rest.trim());
(ids, label, size)
}
fn split_ids_from_rest(s: &str) -> (&str, &str) {
let bytes = s.as_bytes();
let mut i = 0;
while i < bytes.len() {
let c = bytes[i] as char;
if c == '[' || c == '"' {
return (&s[..i], &s[i..]);
}
if c == ':' {
return (&s[..i], &s[i..]);
}
i += 1;
}
(s, "")
}
fn parse_id_label_size(s: &str) -> (String, Option<String>, Option<f64>) {
let id_end = s
.find(|c: char| c.is_whitespace() || c == '[' || c == ':')
.unwrap_or(s.len());
let id = s[..id_end].trim().trim_matches('"').to_string();
let rest = s[id_end..].trim();
let (label, size) = parse_label_size(rest);
(id, label, size)
}
fn parse_label_size(s: &str) -> (Option<String>, Option<f64>) {
let mut label: Option<String> = None;
let mut size: Option<f64> = None;
let mut rest = s;
if rest.starts_with('[') || rest.starts_with('"') {
if let Some(label_str) = extract_bracket_label(&mut rest) {
label = Some(normalize_text(&label_str));
}
rest = rest.trim();
}
if let Some(after_colon) = rest.strip_prefix(':') {
size = after_colon.trim().parse::<f64>().ok();
}
(label, size)
}
fn extract_bracket_label(s: &mut &str) -> Option<String> {
let t = s.trim_start();
if t.starts_with('[') {
if let Some(end) = t.find(']') {
let inner = t[1..end].trim().trim_matches('"').to_string();
*s = &t[end + 1..];
return if inner.is_empty() { None } else { Some(inner) };
}
} else if let Some(inner_start) = t.strip_prefix('"') {
if let Some(end) = inner_start.find('"') {
let inner = inner_start[..end].to_string();
*s = &inner_start[end + 1..];
return if inner.is_empty() { None } else { Some(inner) };
}
}
None
}
fn parse_text_line(s: &str) -> Option<VennTextNode> {
let (body, bracket_label) = if let Some(bi) = s.find('[') {
let lbl = extract_bracket_label_from_str(&s[bi..]);
(&s[..bi], lbl)
} else {
(s, None)
};
let tokens: Vec<&str> = body
.split(',')
.map(str::trim)
.filter(|t| !t.is_empty())
.collect();
if tokens.is_empty() {
return None;
}
let (sets_tokens, id_token) = if tokens.len() == 1 {
(tokens.as_slice(), tokens[0])
} else {
(&tokens[..tokens.len() - 1], tokens[tokens.len() - 1])
};
let id_parts: Vec<&str> = id_token.split_whitespace().collect();
let (extra_sets, actual_id) = if id_parts.len() > 1 {
(
&id_parts[..id_parts.len() - 1],
id_parts[id_parts.len() - 1],
)
} else {
(&id_parts[..0], id_parts[0])
};
let mut sets: Vec<String> = sets_tokens
.iter()
.flat_map(|t| t.split_whitespace())
.map(normalize_text)
.collect();
for es in extra_sets {
sets.push(normalize_text(es));
}
sets.sort();
let id = normalize_text(actual_id);
let label = bracket_label.map(|l| normalize_text(&l));
Some(VennTextNode { sets, id, label })
}
fn extract_bracket_label_from_str(s: &str) -> Option<String> {
let t = s.trim();
if t.starts_with('[') {
if let Some(end) = t.find(']') {
let inner = t[1..end].trim().trim_matches('"').to_string();
return if inner.is_empty() { None } else { Some(inner) };
}
}
None
}
fn parse_style_line(s: &str) -> Option<VennStyleEntry> {
let parts: Vec<&str> = s.split(',').map(str::trim).collect();
let mut set_ids: Vec<String> = Vec::new();
let mut styles: HashMap<String, String> = HashMap::new();
let mut in_styles = false;
let mut i = 0;
while i < parts.len() {
let part = parts[i];
if !in_styles {
if let Some(cp) = part.find(':') {
let key = part[..cp].trim();
let is_css_key = key.chars().all(|c| c.is_alphanumeric() || c == '-')
&& !key.is_empty()
&& !key.contains(' ');
if is_css_key && !set_ids.is_empty() {
in_styles = true;
let val = normalize_style_val(part[cp + 1..].trim());
styles.insert(key.to_string(), val);
i += 1;
continue;
} else {
set_ids.push(normalize_text(
part.split(':').next().unwrap_or(part).trim(),
));
}
} else {
set_ids.push(normalize_text(part));
}
} else {
if let Some(cp) = part.find(':') {
let key = part[..cp].trim().to_string();
let val = normalize_style_val(part[cp + 1..].trim());
styles.insert(key, val);
}
}
i += 1;
}
set_ids.sort();
let targets: Vec<String> = set_ids.into_iter().filter(|s| !s.is_empty()).collect();
if targets.is_empty() {
return None;
}
Some(VennStyleEntry { targets, styles })
}
fn normalize_text(s: &str) -> String {
let trimmed = s.trim();
if trimmed.len() >= 2 && trimmed.starts_with('"') && trimmed.ends_with('"') {
trimmed[1..trimmed.len() - 1].to_string()
} else {
trimmed.to_string()
}
}
fn normalize_style_val(s: &str) -> String {
normalize_text(s)
}