1#![warn(missing_docs)]
25use std::cmp::Reverse;
26use std::collections::{HashMap, HashSet};
27use std::convert::{TryFrom, TryInto};
28use std::error::Error;
29use std::fmt::{Debug, Display};
30use std::io::{self, BufRead, Lines, Write};
31use std::iter::FromIterator;
32use std::marker::PhantomData;
33use std::str::FromStr;
34
35pub mod structures;
36
37pub use crate::structures::{AnyStructure, StructureIdentifier};
38
39pub mod header;
40pub use crate::header::Header;
41
42pub type SampleId = u64;
47
48pub type Precision = f64;
50type Radius = Precision;
51
52pub type AnySwc = SwcNeuron<AnyStructure, String>;
54
55pub(crate) const HEADER_BUF_LEN: usize = 512;
56
57#[derive(thiserror::Error, Debug)]
59pub enum SampleParseError {
60 #[error("Integer field parse error")]
62 IntField(#[from] std::num::ParseIntError),
63 #[error("Float field parse error")]
65 FloatField(#[from] std::num::ParseFloatError),
66 #[error("Reference to unknown structure {0}")]
69 UnknownStructure(isize),
70 #[error("Incorrect number of fields: found {0}")]
72 IncorrectNumFields(usize),
73}
74
75#[derive(Debug, Clone, Copy)]
80pub struct SwcSample<S: StructureIdentifier> {
81 pub sample_id: SampleId,
83 pub structure: S,
85 pub x: Precision,
87 pub y: Precision,
89 pub z: Precision,
91 pub radius: Radius,
93 pub parent_id: Option<SampleId>,
95}
96
97impl<S: StructureIdentifier> FromStr for SwcSample<S> {
98 type Err = SampleParseError;
99
100 fn from_str(s: &str) -> Result<Self, Self::Err> {
101 let trimmed = s.trim();
102 let mut items = trimmed.split_whitespace();
103
104 let sample_id = items
105 .next()
106 .ok_or(SampleParseError::IncorrectNumFields(0))?
107 .parse()?;
108
109 let struct_int = items
110 .next()
111 .ok_or(SampleParseError::IncorrectNumFields(1))?
112 .parse::<isize>()?;
113 let structure =
114 S::try_from(struct_int).map_err(|_e| SampleParseError::UnknownStructure(struct_int))?;
115 let x = items
116 .next()
117 .ok_or(SampleParseError::IncorrectNumFields(2))?
118 .parse::<Precision>()?;
119 let y = items
120 .next()
121 .ok_or(SampleParseError::IncorrectNumFields(3))?
122 .parse::<Precision>()?;
123 let z = items
124 .next()
125 .ok_or(SampleParseError::IncorrectNumFields(4))?
126 .parse::<Precision>()?;
127 let radius = items
128 .next()
129 .ok_or(SampleParseError::IncorrectNumFields(5))?
130 .parse::<Radius>()?;
131 let parent_id = match items.next() {
132 Some("-1") => None,
133 Some(p_str) => Some(p_str.parse()?),
134 None => None,
135 };
136
137 let count: usize = items.fold(0, |x, _| x + 1);
138 if count > 0 {
139 return Err(SampleParseError::IncorrectNumFields(7 + count));
140 }
141
142 Ok(Self {
143 sample_id,
144 structure,
145 x,
146 y,
147 z,
148 radius,
149 parent_id,
150 })
151 }
152}
153
154impl<S: StructureIdentifier> Display for SwcSample<S> {
155 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
156 let structure: isize = self.structure.into();
157 write!(
158 f,
159 "{} {} {} {} {} {} {}",
160 self.sample_id,
161 structure,
162 self.x,
163 self.y,
164 self.z,
165 self.radius,
166 self.parent_id.map_or("-1".to_string(), |p| p.to_string()),
167 )
168 }
169}
170
171impl<S: StructureIdentifier> SwcSample<S> {
172 fn with_ids(self, sample: SampleId, parent: Option<SampleId>) -> Self {
176 SwcSample {
177 sample_id: sample,
178 structure: self.structure,
179 x: self.x,
180 y: self.y,
181 z: self.z,
182 radius: self.radius,
183 parent_id: parent,
184 }
185 }
186}
187
188#[derive(Debug, Clone)]
190pub struct SwcNeuron<S: StructureIdentifier, H: Header> {
191 pub samples: Vec<SwcSample<S>>,
193 pub header: Option<H>,
195}
196
197impl<S: StructureIdentifier, H: Header> FromIterator<SwcSample<S>> for SwcNeuron<S, H> {
198 fn from_iter<I: IntoIterator<Item = SwcSample<S>>>(iter: I) -> Self {
199 SwcNeuron {
200 samples: iter.into_iter().collect(),
201 header: None,
202 }
203 }
204}
205
206#[derive(thiserror::Error, Debug)]
208pub enum SwcParseError {
209 #[error("Line read error")]
211 Read(#[from] std::io::Error),
212 #[error("Sample parse error")]
214 SampleParse(#[from] SampleParseError),
215 #[error("Header parse error")]
217 HeaderParse(String),
218}
219
220#[derive(thiserror::Error, Debug)]
222#[error("Parent sample {parent_sample} does not exist")]
223pub struct MissingSampleError {
224 parent_sample: SampleId,
225}
226
227#[derive(thiserror::Error, Debug)]
229pub enum InconsistentNeuronError {
230 #[error("Neuron has >1 root")]
232 MultipleRoots,
233 #[error("Neuron has >1 connected component")]
235 Disconnected,
236 #[error("Neuron has no root")]
238 NoRootError,
239 #[error("Neuron has missing sample(s)")]
241 MissingSample(#[from] MissingSampleError),
242 #[error("Neuron has multiple samples numbered {0}")]
244 DuplicateSample(SampleId),
245}
246
247impl<S: StructureIdentifier, H: Header> SwcNeuron<S, H> {
248 pub fn len(&self) -> usize {
250 self.samples.len()
251 }
252
253 pub fn is_empty(&self) -> bool {
255 self.samples.is_empty()
256 }
257
258 pub fn sort_index(mut self) -> Self {
260 self.samples
261 .sort_unstable_by(|a, b| a.sample_id.cmp(&b.sample_id));
262 self
263 }
264
265 pub fn reindex_from(self, first_id: SampleId) -> Result<Self, MissingSampleError> {
268 let old_to_new: HashMap<SampleId, SampleId> = self
269 .samples
270 .iter()
271 .enumerate()
272 .map(|(idx, row)| (row.sample_id, idx as SampleId + first_id))
273 .collect();
274
275 let mut samples = Vec::with_capacity(self.samples.len());
276 for (idx, row) in self.samples.into_iter().enumerate() {
277 let parent;
278 if let Some(old) = row.parent_id {
279 parent = Some(
280 *old_to_new
281 .get(&old)
282 .ok_or(MissingSampleError { parent_sample: old })?,
283 );
284 } else {
285 parent = None;
286 }
287 samples.push(row.with_ids(idx as u64 + first_id, parent));
288 }
289
290 Ok(Self {
291 samples,
292 header: self.header,
293 })
294 }
295
296 pub fn reindex(self) -> Result<Self, MissingSampleError> {
299 self.reindex_from(1)
300 }
301
302 pub fn sort_topo(self, reindex: bool) -> Result<Self, InconsistentNeuronError> {
307 let mut parent_to_children: HashMap<SampleId, Vec<SampleId>> =
308 HashMap::with_capacity(self.samples.len());
309 let mut id_to_sample: HashMap<SampleId, SwcSample<S>> =
310 HashMap::with_capacity(self.samples.len());
311 let mut root = None;
312
313 for row in self.samples {
314 if let Some(p) = row.parent_id {
315 let entry = parent_to_children.entry(p).or_default();
316 entry.push(row.sample_id);
317 if id_to_sample.insert(row.sample_id, row).is_some() {
318 return Err(InconsistentNeuronError::DuplicateSample(row.sample_id));
319 }
320 } else if root.is_some() {
321 return Err(InconsistentNeuronError::MultipleRoots);
322 } else {
323 root = Some(row);
324 }
325 }
326
327 let mut samples = Vec::with_capacity(id_to_sample.len() + 1);
328
329 let mut to_visit = vec![];
330 let mut next_id: SampleId = 1;
331
332 if let Some(r) = root {
333 let children = parent_to_children.remove(&r.sample_id).map_or_else(
334 || Vec::with_capacity(0),
335 |mut v| {
336 v.sort_unstable_by_key(|&x| Reverse(x));
337 v
338 },
339 );
340 if reindex {
341 samples.push(r.with_ids(next_id, None));
342 to_visit.extend(children.into_iter().map(|c| (next_id, c)));
343 next_id += 1;
344 } else {
345 samples.push(r);
346 to_visit.extend(children.into_iter().map(|c| (r.sample_id, c)));
347 }
348 } else {
349 return Err(InconsistentNeuronError::NoRootError);
350 }
351
352 while let Some((parent_id, row_id)) = to_visit.pop() {
353 let row = id_to_sample
354 .remove(&row_id)
355 .expect("Just constructed this vec");
356 let children = parent_to_children.remove(&row.sample_id).map_or_else(
357 || Vec::with_capacity(0),
358 |mut v| {
359 v.sort_unstable_by_key(|&id| Reverse(id));
360 v
361 },
362 );
363 if reindex {
364 samples.push(row.with_ids(next_id, Some(parent_id)));
365 to_visit.extend(children.into_iter().map(|c| (next_id, c)));
366 next_id += 1;
367 } else {
368 samples.push(row);
369 to_visit.extend(children.into_iter().map(|c| (row.sample_id, c)));
370 }
371 }
372
373 if id_to_sample.is_empty() {
374 Ok(SwcNeuron {
375 samples,
376 header: self.header,
377 })
378 } else {
379 Err(InconsistentNeuronError::Disconnected)
380 }
381 }
383
384 pub fn validate(&self, validate_order: bool) -> Result<(), InconsistentNeuronError> {
388 use InconsistentNeuronError::*;
389
390 let mut parent_to_children: HashMap<SampleId, Vec<SampleId>> =
392 HashMap::with_capacity(self.samples.len());
393 let mut sample_ids: HashSet<SampleId> = HashSet::with_capacity(self.samples.len());
394 let mut root = None;
395
396 for row in self.samples.iter() {
397 if !sample_ids.insert(row.sample_id) {
398 return Err(DuplicateSample(row.sample_id));
399 }
400 if let Some(p) = row.parent_id {
401 if validate_order && !sample_ids.contains(&p) {
402 return Err(MissingSample(MissingSampleError { parent_sample: p }));
403 }
404 let entry = parent_to_children.entry(p).or_default();
405 entry.push(row.sample_id);
406 } else if root.is_some() {
407 return Err(MultipleRoots);
408 } else {
409 root = Some(row.sample_id);
410 }
411 }
412
413 let Some(rt) = root else {
414 return Err(NoRootError);
415 };
416
417 let mut to_visit = Vec::default();
418 to_visit.push(rt);
419
420 while let Some(sample_id) = to_visit.pop() {
421 let children = parent_to_children
422 .remove(&sample_id)
423 .unwrap_or_else(|| Vec::with_capacity(0));
424 to_visit.extend(children.into_iter());
425 }
426
427 if parent_to_children.is_empty() {
428 Ok(())
429 } else if let Some((unknown_p, _)) = parent_to_children
430 .iter()
431 .take_while(|(p, _)| !sample_ids.contains(p))
432 .next()
433 {
434 Err(MissingSample(MissingSampleError {
435 parent_sample: *unknown_p,
436 }))
437 } else {
438 Err(Disconnected)
439 }
440 }
441
442 pub fn from_reader<R: BufRead>(reader: R) -> Result<Self, SwcParseError> {
446 SwcLines::<S, R>::new(reader)?.try_into()
447 }
448
449 pub fn to_writer<W: Write>(&self, writer: &mut W) -> Result<(), std::io::Error> {
453 if let Some(h) = self.header.clone() {
454 for line in h.to_string().lines() {
455 writeln!(writer, "#{}", line)?;
456 }
457 }
458 for s in self.samples.iter() {
459 writeln!(writer, "{}", s)?;
460 }
461 Ok(())
462 }
463
464 pub fn replace_header(&mut self, header: Option<H>) -> Option<H> {
466 std::mem::replace(&mut self.header, header)
467 }
468
469 pub fn try_map_header<H2: Header, E: Error, F: Fn(&Self) -> Result<Option<H2>, E>>(
472 self,
473 f: F,
474 ) -> Result<SwcNeuron<S, H2>, E> {
475 let header = f(&self)?;
476 Ok(SwcNeuron {
477 header,
478 samples: self.samples,
479 })
480 }
481
482 pub fn try_map_structure<S2: StructureIdentifier, E: Error, F: Fn(&S) -> Result<S2, E>>(
484 self,
485 f: F,
486 ) -> Result<SwcNeuron<S2, H>, E> {
487 let samples: Result<Vec<_>, E> = self
488 .samples
489 .into_iter()
490 .map(|s| {
491 Ok(SwcSample {
492 sample_id: s.sample_id,
493 structure: f(&s.structure)?,
494 x: s.x,
495 y: s.y,
496 z: s.z,
497 radius: s.radius,
498 parent_id: s.parent_id,
499 })
500 })
501 .collect();
502 Ok(SwcNeuron {
503 samples: samples?,
504 header: self.header,
505 })
506 }
507
508 pub fn max_idx(&self) -> Option<SampleId> {
510 self.samples.iter().map(|s| s.sample_id).max()
511 }
512}
513
514impl<S: StructureIdentifier, H: Header, R: BufRead> TryFrom<SwcLines<S, R>> for SwcNeuron<S, H> {
515 type Error = SwcParseError;
516
517 fn try_from(mut lines: SwcLines<S, R>) -> Result<Self, Self::Error> {
518 let header: Option<H> = if let Some(h_res) = lines.header() {
519 let h = h_res.map_err(|_e| SwcParseError::HeaderParse(lines.header_string.clone()))?;
520 Some(h)
521 } else {
522 None
523 };
524
525 let mut samples = Vec::default();
526 for ln_res in lines {
527 match ln_res? {
528 SwcLine::Comment(_) => (),
529 SwcLine::Sample(s) => samples.push(s),
530 }
531 }
532 Ok(Self { samples, header })
533 }
534}
535
536pub struct SwcLines<S: StructureIdentifier, R: BufRead> {
538 inner: SwcFileLines<S, R>,
539 header_string: String,
540 next_row: Option<<SwcFileLines<S, R> as Iterator>::Item>,
541}
542
543impl<S: StructureIdentifier, R: BufRead> SwcLines<S, R> {
544 pub fn new(reader: R) -> Result<Self, io::Error> {
546 let mut inner = SwcFileLines::new(reader);
547 let mut header_string = String::with_capacity(HEADER_BUF_LEN);
548 let mut next_row = None;
549
550 loop {
551 let Some(ln) = inner.next() else {
552 break;
553 };
554 match ln {
555 Ok(SwcLine::Comment(c)) => {
556 header_string.push('\n');
557 header_string.push_str(&c);
558 }
559 _ => {
560 next_row = Some(ln);
561 break;
562 }
563 }
564 }
565
566 Ok(Self {
567 inner,
568 header_string,
569 next_row,
570 })
571 }
572
573 pub fn header<H: Header>(&mut self) -> Option<Result<H, <H as std::str::FromStr>::Err>> {
575 if self.header_string.is_empty() {
576 None
577 } else {
578 Some(self.header_string.parse::<H>())
579 }
580 }
581}
582
583impl<S: StructureIdentifier, R: BufRead> Iterator for SwcLines<S, R> {
584 type Item = Result<SwcLine<S>, SwcParseError>;
585
586 fn next(&mut self) -> Option<Self::Item> {
587 if let Some(o) = self.next_row.take() {
588 return Some(o);
589 }
590 self.inner.next()
591 }
592}
593
594pub enum SwcLine<S: StructureIdentifier> {
596 Comment(String),
598 Sample(SwcSample<S>),
600}
601
602struct SwcFileLines<S: StructureIdentifier, R: BufRead> {
604 lines: Lines<R>,
605 pub is_header: bool,
606 _s: PhantomData<S>,
607}
608
609impl<S: StructureIdentifier, R: BufRead> SwcFileLines<S, R> {
610 fn new(reader: R) -> Self {
611 Self {
612 lines: reader.lines(),
613 is_header: true,
614 _s: PhantomData,
615 }
616 }
617}
618
619impl<S: StructureIdentifier, R: BufRead> Iterator for SwcFileLines<S, R> {
620 type Item = Result<SwcLine<S>, SwcParseError>;
621
622 fn next(&mut self) -> Option<Self::Item> {
623 loop {
624 let raw_line = match self.lines.next()? {
625 Ok(s) => s,
626 Err(e) => return Some(Err(SwcParseError::Read(e))),
627 };
628
629 let line = raw_line.trim_end();
630
631 if line.is_empty() {
632 continue;
633 } else if let Some(remainder) = line.strip_prefix('#') {
634 return Some(Ok(SwcLine::Comment(remainder.to_string())));
635 } else {
636 return match SwcSample::from_str(line) {
637 Ok(sample) => {
638 self.is_header = false;
639 Some(Ok(SwcLine::Sample(sample)))
640 }
641 Err(err) => Some(Err(SwcParseError::SampleParse(err))),
642 };
643 }
644 }
645 }
646}
647
648pub fn parse_swc_lines<R: BufRead, S: StructureIdentifier>(
653 reader: R,
654) -> impl Iterator<Item = Result<SwcLine<S>, SwcParseError>> {
655 SwcFileLines::new(reader)
656}
657
658#[cfg(test)]
659mod tests {
660 use super::*;
661 use std::fs::{read_dir, File};
662 use std::io::BufReader;
663 use std::path::PathBuf;
664
665 fn data_dir() -> PathBuf {
666 let mut p = PathBuf::from_str(env!("CARGO_MANIFEST_DIR")).unwrap();
667 p.push("data");
668 p
669 }
670
671 fn data_paths() -> impl IntoIterator<Item = PathBuf> {
672 let root = data_dir();
673 read_dir(&root).unwrap().filter_map(|er| {
674 let e = er.unwrap();
675 let p = e.path();
676 if p.is_file() && p.ends_with(".swc") {
677 Some(p)
678 } else {
679 None
680 }
681 })
682 }
683
684 fn all_swcs() -> impl IntoIterator<Item = AnySwc> {
685 data_paths().into_iter().map(|p| {
686 let f = File::open(&p).unwrap();
687 AnySwc::from_reader(BufReader::new(f))
688 .unwrap_or_else(|_| panic!("Could not read {:?}", p.as_os_str()))
689 })
690 }
691
692 #[test]
693 fn can_read() {
694 for s in all_swcs() {
695 assert!(matches!(s.header, Some(h) if !h.is_empty()));
696 assert!(!s.samples.is_empty());
697 }
698 }
699
700 #[test]
701 fn can_validate() {
702 for swc in all_swcs() {
703 swc.validate(true).expect("Invalid SWC");
704 }
705 }
706}