Skip to main content

swc_neuron/
lib.rs

1//! Rust library for reading, writing, and manipulating SWC files for neuronal morphology.
2//! Also includes a CLI for basic validation, sorting, and reindexing (with optional feature `cli`).
3//!
4//! The format was originally proposed in [Cannon, et al. 1998](http://dx.doi.org/10.1016/S0165-0270(98)00091-0),
5//! with an implementation in [dohalloran/SWC_BATCH_CHECK](https://github.com/dohalloran/SWC_BATCH_CHECK).
6//!
7//! While commonly used, many variants exist; this implementation tries to cover the "standardised" version described
8//! [here](http://www.neuronland.org/NLMorphologyConverter/MorphologyFormats/SWC/Spec.html),
9//! with some ambiguities resolved by the [SWCplus specification](https://neuroinformatics.nl/swcPlus/).
10//!
11//! The header is a series of `#`-prefixed lines starting at the beginning of the file.
12//! Blank lines (i.e. without a `#` or any other non-whitespace content) do not interrupt the header,
13//! but are also not included in the parsed header.
14//! All other `#`-prefixed and all whitespace-only lines in the file after the first sample are ignored.
15//! Samples define a sample ID, a structure represented by that sample, its location, radius,
16//! and the sample ID of the sample's parent in the neuron's tree structure (the root's parent is `-1`).
17//!
18//! The [SwcNeuron] is generic over implementors of [StructureIdentifier] and [Header],
19//! and can be parsed from [Read]ers and dumped to [Write]rs.
20//! [StructureIdentifier] is implemented for some known sub-specifications.
21//! [Header] is implemented for [String] (i.e. a free-text header field).
22//! [AnySwc] is a [SwcNeuron] with any structure integer and string header.
23
24#![warn(missing_docs)]
25use std::cmp::Reverse;
26use std::collections::{HashMap, HashSet};
27use std::convert::{TryFrom, TryInto};
28use std::error::Error;
29use std::fmt::{Debug, Display};
30use std::io::{self, BufRead, Lines, Write};
31use std::iter::FromIterator;
32use std::marker::PhantomData;
33use std::str::FromStr;
34
35pub mod structures;
36
37pub use crate::structures::{AnyStructure, StructureIdentifier};
38
39pub mod header;
40pub use crate::header::Header;
41
42/// Type used for sample IDs.
43///
44/// SWC files use -1 for the parent ID of the root sample,
45/// but this package uses a signed ID type with the root's parent as `None`.
46pub type SampleId = u64;
47
48/// Float type used for locations and radii.
49pub type Precision = f64;
50type Radius = Precision;
51
52/// Maximally flexible [SwcNeuron] with any structure specification and a free-text header.
53pub type AnySwc = SwcNeuron<AnyStructure, String>;
54
55pub(crate) const HEADER_BUF_LEN: usize = 512;
56
57/// Error in parsing a SWC sample (row).
58#[derive(thiserror::Error, Debug)]
59pub enum SampleParseError {
60    /// A field which should be an integer could not be parsed.
61    #[error("Integer field parse error")]
62    IntField(#[from] std::num::ParseIntError),
63    /// A field which should be a float/ decimal could not be parsed.
64    #[error("Float field parse error")]
65    FloatField(#[from] std::num::ParseFloatError),
66    /// The value of the `structure` field was not recognised.
67    /// Not checked for structure enums with a catch-all variant.
68    #[error("Reference to unknown structure {0}")]
69    UnknownStructure(isize),
70    /// Line terminated early; more fields expected.
71    #[error("Incorrect number of fields: found {0}")]
72    IncorrectNumFields(usize),
73}
74
75/// Struct representing a row of a SWC file (a single sample from the neuron),
76/// which gives information about a single node in the tree.
77///
78/// If the `parent_id` is None, this is a root node.
79#[derive(Debug, Clone, Copy)]
80pub struct SwcSample<S: StructureIdentifier> {
81    /// ID of the SWC sample
82    pub sample_id: SampleId,
83    /// SWC structure enum
84    pub structure: S,
85    /// X location
86    pub x: Precision,
87    /// Y location
88    pub y: Precision,
89    /// Z location
90    pub z: Precision,
91    /// Radius of the neuron at this sample
92    pub radius: Radius,
93    /// ID of the parent sample (None if this is the root)
94    pub parent_id: Option<SampleId>,
95}
96
97impl<S: StructureIdentifier> FromStr for SwcSample<S> {
98    type Err = SampleParseError;
99
100    fn from_str(s: &str) -> Result<Self, Self::Err> {
101        let trimmed = s.trim();
102        let mut items = trimmed.split_whitespace();
103
104        let sample_id = items
105            .next()
106            .ok_or(SampleParseError::IncorrectNumFields(0))?
107            .parse()?;
108
109        let struct_int = items
110            .next()
111            .ok_or(SampleParseError::IncorrectNumFields(1))?
112            .parse::<isize>()?;
113        let structure =
114            S::try_from(struct_int).map_err(|_e| SampleParseError::UnknownStructure(struct_int))?;
115        let x = items
116            .next()
117            .ok_or(SampleParseError::IncorrectNumFields(2))?
118            .parse::<Precision>()?;
119        let y = items
120            .next()
121            .ok_or(SampleParseError::IncorrectNumFields(3))?
122            .parse::<Precision>()?;
123        let z = items
124            .next()
125            .ok_or(SampleParseError::IncorrectNumFields(4))?
126            .parse::<Precision>()?;
127        let radius = items
128            .next()
129            .ok_or(SampleParseError::IncorrectNumFields(5))?
130            .parse::<Radius>()?;
131        let parent_id = match items.next() {
132            Some("-1") => None,
133            Some(p_str) => Some(p_str.parse()?),
134            None => None,
135        };
136
137        let count: usize = items.fold(0, |x, _| x + 1);
138        if count > 0 {
139            return Err(SampleParseError::IncorrectNumFields(7 + count));
140        }
141
142        Ok(Self {
143            sample_id,
144            structure,
145            x,
146            y,
147            z,
148            radius,
149            parent_id,
150        })
151    }
152}
153
154impl<S: StructureIdentifier> Display for SwcSample<S> {
155    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
156        let structure: isize = self.structure.into();
157        write!(
158            f,
159            "{} {} {} {} {} {} {}",
160            self.sample_id,
161            structure,
162            self.x,
163            self.y,
164            self.z,
165            self.radius,
166            self.parent_id.map_or("-1".to_string(), |p| p.to_string()),
167        )
168    }
169}
170
171impl<S: StructureIdentifier> SwcSample<S> {
172    /// Create a new [SwcSample] with  different sample and parent IDs.
173    ///
174    /// The other features are the same.
175    fn with_ids(self, sample: SampleId, parent: Option<SampleId>) -> Self {
176        SwcSample {
177            sample_id: sample,
178            structure: self.structure,
179            x: self.x,
180            y: self.y,
181            z: self.z,
182            radius: self.radius,
183            parent_id: parent,
184        }
185    }
186}
187
188/// Struct representing a neuron skeleton as a tree of [SwcSample]s.
189#[derive(Debug, Clone)]
190pub struct SwcNeuron<S: StructureIdentifier, H: Header> {
191    /// Samples present in the SWC file.
192    pub samples: Vec<SwcSample<S>>,
193    /// Header of the SWC file
194    pub header: Option<H>,
195}
196
197impl<S: StructureIdentifier, H: Header> FromIterator<SwcSample<S>> for SwcNeuron<S, H> {
198    fn from_iter<I: IntoIterator<Item = SwcSample<S>>>(iter: I) -> Self {
199        SwcNeuron {
200            samples: iter.into_iter().collect(),
201            header: None,
202        }
203    }
204}
205
206/// Error for a SWC file which cannot be parsed
207#[derive(thiserror::Error, Debug)]
208pub enum SwcParseError {
209    /// A line could not be read from the file.
210    #[error("Line read error")]
211    Read(#[from] std::io::Error),
212    /// The line could not be parsed as a SwcSample.
213    #[error("Sample parse error")]
214    SampleParse(#[from] SampleParseError),
215    /// The SWC header could not be parsed.
216    #[error("Header parse error")]
217    HeaderParse(String),
218}
219
220/// Error where a sample refers to an unknown parent sample.
221#[derive(thiserror::Error, Debug)]
222#[error("Parent sample {parent_sample} does not exist")]
223pub struct MissingSampleError {
224    parent_sample: SampleId,
225}
226
227/// Error where a neuron represented by a SWC file is inconsistent.
228#[derive(thiserror::Error, Debug)]
229pub enum InconsistentNeuronError {
230    /// Neuron is not a tree as it has multiple roots (parentless nodes).
231    #[error("Neuron has >1 root")]
232    MultipleRoots,
233    /// Neuron is not a tree as it has several disconnected parts.
234    #[error("Neuron has >1 connected component")]
235    Disconnected,
236    /// Neuron has no root: it could be empty or be cyclic.
237    #[error("Neuron has no root")]
238    NoRootError,
239    /// Samples refer to unknown parent samples.
240    #[error("Neuron has missing sample(s)")]
241    MissingSample(#[from] MissingSampleError),
242    /// Samples have clashing IDs.
243    #[error("Neuron has multiple samples numbered {0}")]
244    DuplicateSample(SampleId),
245}
246
247impl<S: StructureIdentifier, H: Header> SwcNeuron<S, H> {
248    /// Number of samples in the neuron.
249    pub fn len(&self) -> usize {
250        self.samples.len()
251    }
252
253    /// Whether there are any samples in the neuron.
254    pub fn is_empty(&self) -> bool {
255        self.samples.is_empty()
256    }
257
258    /// Sort the neuron's samples by their index.
259    pub fn sort_index(mut self) -> Self {
260        self.samples
261            .sort_unstable_by(|a, b| a.sample_id.cmp(&b.sample_id));
262        self
263    }
264
265    /// Re-index the neuron's samples in order of occurrence, starting at the given ID and incrementing.
266    /// Returns error if the SWC is missing a parent sample
267    pub fn reindex_from(self, first_id: SampleId) -> Result<Self, MissingSampleError> {
268        let old_to_new: HashMap<SampleId, SampleId> = self
269            .samples
270            .iter()
271            .enumerate()
272            .map(|(idx, row)| (row.sample_id, idx as SampleId + first_id))
273            .collect();
274
275        let mut samples = Vec::with_capacity(self.samples.len());
276        for (idx, row) in self.samples.into_iter().enumerate() {
277            let parent;
278            if let Some(old) = row.parent_id {
279                parent = Some(
280                    *old_to_new
281                        .get(&old)
282                        .ok_or(MissingSampleError { parent_sample: old })?,
283                );
284            } else {
285                parent = None;
286            }
287            samples.push(row.with_ids(idx as u64 + first_id, parent));
288        }
289
290        Ok(Self {
291            samples,
292            header: self.header,
293        })
294    }
295
296    /// Re-index the neuron's samples in order of occurrence, starting at 1.
297    /// Returns error if the SWC is missing a parent sample
298    pub fn reindex(self) -> Result<Self, MissingSampleError> {
299        self.reindex_from(1)
300    }
301
302    /// Re-order its samples in pre-order depth first search.
303    /// Children are visited in the order of their original sample number.
304    ///
305    /// Returns an error if the neuron is not a self-consistent tree.
306    pub fn sort_topo(self, reindex: bool) -> Result<Self, InconsistentNeuronError> {
307        let mut parent_to_children: HashMap<SampleId, Vec<SampleId>> =
308            HashMap::with_capacity(self.samples.len());
309        let mut id_to_sample: HashMap<SampleId, SwcSample<S>> =
310            HashMap::with_capacity(self.samples.len());
311        let mut root = None;
312
313        for row in self.samples {
314            if let Some(p) = row.parent_id {
315                let entry = parent_to_children.entry(p).or_default();
316                entry.push(row.sample_id);
317                if id_to_sample.insert(row.sample_id, row).is_some() {
318                    return Err(InconsistentNeuronError::DuplicateSample(row.sample_id));
319                }
320            } else if root.is_some() {
321                return Err(InconsistentNeuronError::MultipleRoots);
322            } else {
323                root = Some(row);
324            }
325        }
326
327        let mut samples = Vec::with_capacity(id_to_sample.len() + 1);
328
329        let mut to_visit = vec![];
330        let mut next_id: SampleId = 1;
331
332        if let Some(r) = root {
333            let children = parent_to_children.remove(&r.sample_id).map_or_else(
334                || Vec::with_capacity(0),
335                |mut v| {
336                    v.sort_unstable_by_key(|&x| Reverse(x));
337                    v
338                },
339            );
340            if reindex {
341                samples.push(r.with_ids(next_id, None));
342                to_visit.extend(children.into_iter().map(|c| (next_id, c)));
343                next_id += 1;
344            } else {
345                samples.push(r);
346                to_visit.extend(children.into_iter().map(|c| (r.sample_id, c)));
347            }
348        } else {
349            return Err(InconsistentNeuronError::NoRootError);
350        }
351
352        while let Some((parent_id, row_id)) = to_visit.pop() {
353            let row = id_to_sample
354                .remove(&row_id)
355                .expect("Just constructed this vec");
356            let children = parent_to_children.remove(&row.sample_id).map_or_else(
357                || Vec::with_capacity(0),
358                |mut v| {
359                    v.sort_unstable_by_key(|&id| Reverse(id));
360                    v
361                },
362            );
363            if reindex {
364                samples.push(row.with_ids(next_id, Some(parent_id)));
365                to_visit.extend(children.into_iter().map(|c| (next_id, c)));
366                next_id += 1;
367            } else {
368                samples.push(row);
369                to_visit.extend(children.into_iter().map(|c| (row.sample_id, c)));
370            }
371        }
372
373        if id_to_sample.is_empty() {
374            Ok(SwcNeuron {
375                samples,
376                header: self.header,
377            })
378        } else {
379            Err(InconsistentNeuronError::Disconnected)
380        }
381        // todo: possible MissingSampleError
382    }
383
384    /// Ensure that [SwcNeuron] is a self-consistent tree.
385    ///
386    /// If `validate_order` is `false`, samples may be defined earlier in the file than their parents.
387    pub fn validate(&self, validate_order: bool) -> Result<(), InconsistentNeuronError> {
388        use InconsistentNeuronError::*;
389
390        // use sample IDs
391        let mut parent_to_children: HashMap<SampleId, Vec<SampleId>> =
392            HashMap::with_capacity(self.samples.len());
393        let mut sample_ids: HashSet<SampleId> = HashSet::with_capacity(self.samples.len());
394        let mut root = None;
395
396        for row in self.samples.iter() {
397            if !sample_ids.insert(row.sample_id) {
398                return Err(DuplicateSample(row.sample_id));
399            }
400            if let Some(p) = row.parent_id {
401                if validate_order && !sample_ids.contains(&p) {
402                    return Err(MissingSample(MissingSampleError { parent_sample: p }));
403                }
404                let entry = parent_to_children.entry(p).or_default();
405                entry.push(row.sample_id);
406            } else if root.is_some() {
407                return Err(MultipleRoots);
408            } else {
409                root = Some(row.sample_id);
410            }
411        }
412
413        let Some(rt) = root else {
414            return Err(NoRootError);
415        };
416
417        let mut to_visit = Vec::default();
418        to_visit.push(rt);
419
420        while let Some(sample_id) = to_visit.pop() {
421            let children = parent_to_children
422                .remove(&sample_id)
423                .unwrap_or_else(|| Vec::with_capacity(0));
424            to_visit.extend(children.into_iter());
425        }
426
427        if parent_to_children.is_empty() {
428            Ok(())
429        } else if let Some((unknown_p, _)) = parent_to_children
430            .iter()
431            .take_while(|(p, _)| !sample_ids.contains(p))
432            .next()
433        {
434            Err(MissingSample(MissingSampleError {
435                parent_sample: *unknown_p,
436            }))
437        } else {
438            Err(Disconnected)
439        }
440    }
441
442    /// Parse a [SwcNeuron] from a [Read]er.
443    ///
444    /// Does not check the neuron for consistency, but does check for valid structures.
445    pub fn from_reader<R: BufRead>(reader: R) -> Result<Self, SwcParseError> {
446        SwcLines::<S, R>::new(reader)?.try_into()
447    }
448
449    /// Dump this [SwcNeuron] to a [Write]r.
450    ///
451    /// These writes are quite small: consider wrapping it in a [BufWriter].
452    pub fn to_writer<W: Write>(&self, writer: &mut W) -> Result<(), std::io::Error> {
453        if let Some(h) = self.header.clone() {
454            for line in h.to_string().lines() {
455                writeln!(writer, "#{}", line)?;
456            }
457        }
458        for s in self.samples.iter() {
459            writeln!(writer, "{}", s)?;
460        }
461        Ok(())
462    }
463
464    /// Replace the existing header with a new one, returning the existing.
465    pub fn replace_header(&mut self, header: Option<H>) -> Option<H> {
466        std::mem::replace(&mut self.header, header)
467    }
468
469    /// Create a new header by applying a function to this [SwcNeuron],
470    /// then create a new neuron with that header.
471    pub fn try_map_header<H2: Header, E: Error, F: Fn(&Self) -> Result<Option<H2>, E>>(
472        self,
473        f: F,
474    ) -> Result<SwcNeuron<S, H2>, E> {
475        let header = f(&self)?;
476        Ok(SwcNeuron {
477            header,
478            samples: self.samples,
479        })
480    }
481
482    /// Map every sample's structure to another type.
483    pub fn try_map_structure<S2: StructureIdentifier, E: Error, F: Fn(&S) -> Result<S2, E>>(
484        self,
485        f: F,
486    ) -> Result<SwcNeuron<S2, H>, E> {
487        let samples: Result<Vec<_>, E> = self
488            .samples
489            .into_iter()
490            .map(|s| {
491                Ok(SwcSample {
492                    sample_id: s.sample_id,
493                    structure: f(&s.structure)?,
494                    x: s.x,
495                    y: s.y,
496                    z: s.z,
497                    radius: s.radius,
498                    parent_id: s.parent_id,
499                })
500            })
501            .collect();
502        Ok(SwcNeuron {
503            samples: samples?,
504            header: self.header,
505        })
506    }
507
508    /// Get the highest sample ID in the neuron, if any samples exist.
509    pub fn max_idx(&self) -> Option<SampleId> {
510        self.samples.iter().map(|s| s.sample_id).max()
511    }
512}
513
514impl<S: StructureIdentifier, H: Header, R: BufRead> TryFrom<SwcLines<S, R>> for SwcNeuron<S, H> {
515    type Error = SwcParseError;
516
517    fn try_from(mut lines: SwcLines<S, R>) -> Result<Self, Self::Error> {
518        let header: Option<H> = if let Some(h_res) = lines.header() {
519            let h = h_res.map_err(|_e| SwcParseError::HeaderParse(lines.header_string.clone()))?;
520            Some(h)
521        } else {
522            None
523        };
524
525        let mut samples = Vec::default();
526        for ln_res in lines {
527            match ln_res? {
528                SwcLine::Comment(_) => (),
529                SwcLine::Sample(s) => samples.push(s),
530            }
531        }
532        Ok(Self { samples, header })
533    }
534}
535
536/// [Iterator] which eagerly reads the header on construction and then lazily reads the remaining lines.
537pub struct SwcLines<S: StructureIdentifier, R: BufRead> {
538    inner: SwcFileLines<S, R>,
539    header_string: String,
540    next_row: Option<<SwcFileLines<S, R> as Iterator>::Item>,
541}
542
543impl<S: StructureIdentifier, R: BufRead> SwcLines<S, R> {
544    /// Create a new [SwcLines] iterator from a reader.
545    pub fn new(reader: R) -> Result<Self, io::Error> {
546        let mut inner = SwcFileLines::new(reader);
547        let mut header_string = String::with_capacity(HEADER_BUF_LEN);
548        let mut next_row = None;
549
550        loop {
551            let Some(ln) = inner.next() else {
552                break;
553            };
554            match ln {
555                Ok(SwcLine::Comment(c)) => {
556                    header_string.push('\n');
557                    header_string.push_str(&c);
558                }
559                _ => {
560                    next_row = Some(ln);
561                    break;
562                }
563            }
564        }
565
566        Ok(Self {
567            inner,
568            header_string,
569            next_row,
570        })
571    }
572
573    /// Parse the header string into a [Header] struct.
574    pub fn header<H: Header>(&mut self) -> Option<Result<H, <H as std::str::FromStr>::Err>> {
575        if self.header_string.is_empty() {
576            None
577        } else {
578            Some(self.header_string.parse::<H>())
579        }
580    }
581}
582
583impl<S: StructureIdentifier, R: BufRead> Iterator for SwcLines<S, R> {
584    type Item = Result<SwcLine<S>, SwcParseError>;
585
586    fn next(&mut self) -> Option<Self::Item> {
587        if let Some(o) = self.next_row.take() {
588            return Some(o);
589        }
590        self.inner.next()
591    }
592}
593
594/// Line in a SWC file, which might be a comment or sample.
595pub enum SwcLine<S: StructureIdentifier> {
596    /// Comment line, which may be part of the header
597    Comment(String),
598    /// Sample line
599    Sample(SwcSample<S>),
600}
601
602/// [Iterator] which produces [SwcLine]s from a [Read]er.
603struct SwcFileLines<S: StructureIdentifier, R: BufRead> {
604    lines: Lines<R>,
605    pub is_header: bool,
606    _s: PhantomData<S>,
607}
608
609impl<S: StructureIdentifier, R: BufRead> SwcFileLines<S, R> {
610    fn new(reader: R) -> Self {
611        Self {
612            lines: reader.lines(),
613            is_header: true,
614            _s: PhantomData,
615        }
616    }
617}
618
619impl<S: StructureIdentifier, R: BufRead> Iterator for SwcFileLines<S, R> {
620    type Item = Result<SwcLine<S>, SwcParseError>;
621
622    fn next(&mut self) -> Option<Self::Item> {
623        loop {
624            let raw_line = match self.lines.next()? {
625                Ok(s) => s,
626                Err(e) => return Some(Err(SwcParseError::Read(e))),
627            };
628
629            let line = raw_line.trim_end();
630
631            if line.is_empty() {
632                continue;
633            } else if let Some(remainder) = line.strip_prefix('#') {
634                return Some(Ok(SwcLine::Comment(remainder.to_string())));
635            } else {
636                return match SwcSample::from_str(line) {
637                    Ok(sample) => {
638                        self.is_header = false;
639                        Some(Ok(SwcLine::Sample(sample)))
640                    }
641                    Err(err) => Some(Err(SwcParseError::SampleParse(err))),
642                };
643            }
644        }
645    }
646}
647
648/// Parse lines from a SWC file.
649///
650/// Skips blank lines and treats all `#`-prefixed lines as comments.
651/// Does not check the neuron for consistency, but does check for valid structures.
652pub fn parse_swc_lines<R: BufRead, S: StructureIdentifier>(
653    reader: R,
654) -> impl Iterator<Item = Result<SwcLine<S>, SwcParseError>> {
655    SwcFileLines::new(reader)
656}
657
658#[cfg(test)]
659mod tests {
660    use super::*;
661    use std::fs::{read_dir, File};
662    use std::io::BufReader;
663    use std::path::PathBuf;
664
665    fn data_dir() -> PathBuf {
666        let mut p = PathBuf::from_str(env!("CARGO_MANIFEST_DIR")).unwrap();
667        p.push("data");
668        p
669    }
670
671    fn data_paths() -> impl IntoIterator<Item = PathBuf> {
672        let root = data_dir();
673        read_dir(&root).unwrap().filter_map(|er| {
674            let e = er.unwrap();
675            let p = e.path();
676            if p.is_file() && p.ends_with(".swc") {
677                Some(p)
678            } else {
679                None
680            }
681        })
682    }
683
684    fn all_swcs() -> impl IntoIterator<Item = AnySwc> {
685        data_paths().into_iter().map(|p| {
686            let f = File::open(&p).unwrap();
687            AnySwc::from_reader(BufReader::new(f))
688                .unwrap_or_else(|_| panic!("Could not read {:?}", p.as_os_str()))
689        })
690    }
691
692    #[test]
693    fn can_read() {
694        for s in all_swcs() {
695            assert!(matches!(s.header, Some(h) if !h.is_empty()));
696            assert!(!s.samples.is_empty());
697        }
698    }
699
700    #[test]
701    fn can_validate() {
702        for swc in all_swcs() {
703            swc.validate(true).expect("Invalid SWC");
704        }
705    }
706}