Skip to main content

bids_core/
hed.rs

1//! HED (Hierarchical Event Descriptors) tag parsing.
2//!
3//! BIDS events files may include a `HED` column containing HED tags that
4//! describe events in a structured, hierarchical vocabulary. This module
5//! provides basic parsing and access to HED tag strings.
6//!
7//! See: <https://www.hedtags.org/> and
8//! <https://bids-specification.readthedocs.io/en/stable/appendices/hed.html>
9//!
10//! # Example
11//!
12//! ```
13//! use bids_core::hed::{parse_hed_string, HedTag};
14//!
15//! let tags = parse_hed_string("Sensory-event, Visual-presentation, (Item/Object/Man-made/Vehicle/Car, Color/Red)");
16//! assert_eq!(tags.len(), 3);
17//! assert_eq!(tags[0].tag, "Sensory-event");
18//! assert!(tags[2].is_group());
19//! ```
20
21/// A single HED tag or tag group.
22#[derive(Debug, Clone, PartialEq, Eq)]
23pub struct HedTag {
24    /// The tag string (e.g., `"Sensory-event"` or a group like `"(A, B)"`).
25    pub tag: String,
26    /// If this is a group `(...)`, the child tags within the parentheses.
27    pub children: Vec<HedTag>,
28}
29
30impl HedTag {
31    /// Create a simple (non-group) tag.
32    #[must_use]
33    pub fn simple(tag: &str) -> Self {
34        Self {
35            tag: tag.trim().to_string(),
36            children: Vec::new(),
37        }
38    }
39
40    /// Create a group tag containing child tags.
41    #[must_use]
42    pub fn group(children: Vec<HedTag>) -> Self {
43        let tag = format!(
44            "({})",
45            children
46                .iter()
47                .map(|c| c.tag.as_str())
48                .collect::<Vec<_>>()
49                .join(", ")
50        );
51        Self { tag, children }
52    }
53
54    /// Returns `true` if this is a tag group (has children).
55    #[must_use]
56    pub fn is_group(&self) -> bool {
57        !self.children.is_empty()
58    }
59
60    /// Get the leaf tag name (last component of a hierarchical path).
61    ///
62    /// For `"Item/Object/Man-made/Vehicle/Car"` returns `"Car"`.
63    #[must_use]
64    pub fn leaf(&self) -> &str {
65        self.tag.rsplit('/').next().unwrap_or(&self.tag)
66    }
67
68    /// Get all path components of a hierarchical tag.
69    ///
70    /// For `"Item/Object/Vehicle/Car"` returns `["Item", "Object", "Vehicle", "Car"]`.
71    #[must_use]
72    pub fn components(&self) -> Vec<&str> {
73        self.tag.split('/').collect()
74    }
75
76    /// Check if this tag starts with a given prefix path.
77    #[must_use]
78    pub fn starts_with(&self, prefix: &str) -> bool {
79        self.tag.starts_with(prefix)
80    }
81}
82
83impl std::fmt::Display for HedTag {
84    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
85        write!(f, "{}", self.tag)
86    }
87}
88
89/// Parse a HED annotation string into a list of tags and tag groups.
90///
91/// HED strings use commas to separate tags, and parentheses to group
92/// related tags. Hierarchical levels are separated by `/`.
93///
94/// # Example
95///
96/// ```
97/// use bids_core::hed::parse_hed_string;
98///
99/// let tags = parse_hed_string("Sensory-event, (Item/Object/Car, Color/Red)");
100/// assert_eq!(tags.len(), 2);
101/// assert!(!tags[0].is_group());
102/// assert!(tags[1].is_group());
103/// assert_eq!(tags[1].children.len(), 2);
104/// ```
105#[must_use]
106pub fn parse_hed_string(hed: &str) -> Vec<HedTag> {
107    let hed = hed.trim();
108    if hed.is_empty() {
109        return Vec::new();
110    }
111
112    let mut tags = Vec::new();
113    let mut depth = 0usize;
114    let mut start = 0;
115
116    for (i, ch) in hed.char_indices() {
117        match ch {
118            '(' => {
119                if depth == 0 {
120                    // Flush any pending simple tag before the group
121                    let before = hed[start..i].trim();
122                    if !before.is_empty() && before != "," {
123                        for part in before.split(',') {
124                            let part = part.trim();
125                            if !part.is_empty() {
126                                tags.push(HedTag::simple(part));
127                            }
128                        }
129                    }
130                    start = i + 1;
131                }
132                depth += 1;
133            }
134            ')' => {
135                depth = depth.saturating_sub(1);
136                if depth == 0 {
137                    // Parse the group contents
138                    let group_str = &hed[start..i];
139                    let children: Vec<HedTag> = group_str
140                        .split(',')
141                        .map(|s| s.trim())
142                        .filter(|s| !s.is_empty())
143                        .map(HedTag::simple)
144                        .collect();
145                    tags.push(HedTag::group(children));
146                    start = i + 1;
147                }
148            }
149            _ => {}
150        }
151    }
152
153    // Handle remaining text after last group/comma
154    let remaining = hed[start..].trim();
155    if !remaining.is_empty() {
156        for part in remaining.split(',') {
157            let part = part.trim();
158            if !part.is_empty() {
159                tags.push(HedTag::simple(part));
160            }
161        }
162    }
163
164    tags
165}
166
167/// Extract all HED tags from an events TSV column as a flat list of tag strings.
168///
169/// This is useful for frequency analysis or filtering events by HED tag.
170#[must_use]
171pub fn extract_all_tags(hed_strings: &[&str]) -> Vec<String> {
172    let mut all = Vec::new();
173    for s in hed_strings {
174        for tag in parse_hed_string(s) {
175            if tag.is_group() {
176                for child in &tag.children {
177                    all.push(child.tag.clone());
178                }
179            } else {
180                all.push(tag.tag);
181            }
182        }
183    }
184    all
185}
186
187#[cfg(test)]
188mod tests {
189    use super::*;
190
191    #[test]
192    fn test_simple_tags() {
193        let tags = parse_hed_string("Sensory-event, Visual-presentation");
194        assert_eq!(tags.len(), 2);
195        assert_eq!(tags[0].tag, "Sensory-event");
196        assert_eq!(tags[1].tag, "Visual-presentation");
197        assert!(!tags[0].is_group());
198    }
199
200    #[test]
201    fn test_group() {
202        let tags = parse_hed_string("(Item/Object/Car, Color/Red)");
203        assert_eq!(tags.len(), 1);
204        assert!(tags[0].is_group());
205        assert_eq!(tags[0].children.len(), 2);
206        assert_eq!(tags[0].children[0].tag, "Item/Object/Car");
207        assert_eq!(tags[0].children[0].leaf(), "Car");
208    }
209
210    #[test]
211    fn test_mixed() {
212        let tags = parse_hed_string("Sensory-event, (Item/Car, Color/Red), Agent-action");
213        assert_eq!(tags.len(), 3);
214        assert!(!tags[0].is_group());
215        assert!(tags[1].is_group());
216        assert!(!tags[2].is_group());
217    }
218
219    #[test]
220    fn test_empty() {
221        assert!(parse_hed_string("").is_empty());
222        assert!(parse_hed_string("  ").is_empty());
223    }
224
225    #[test]
226    fn test_hierarchical() {
227        let tag = HedTag::simple("Item/Object/Man-made/Vehicle/Car");
228        assert_eq!(tag.leaf(), "Car");
229        assert_eq!(
230            tag.components(),
231            vec!["Item", "Object", "Man-made", "Vehicle", "Car"]
232        );
233        assert!(tag.starts_with("Item/Object"));
234    }
235
236    #[test]
237    fn test_extract_all() {
238        let strings = vec!["Sensory-event, Visual", "(Motor, Hand)"];
239        let all = extract_all_tags(&strings);
240        assert_eq!(all, vec!["Sensory-event", "Visual", "Motor", "Hand"]);
241    }
242}