1#![doc = include_str!("../README.md")]
2
3#[macro_use]
4extern crate pyo3_built;
5extern crate nafcodec;
6extern crate pyo3;
7
8mod pyfile;
9
10use std::borrow::Cow;
11use std::convert::Infallible;
12use std::io::BufReader;
13use std::ops::DerefMut;
14
15use nafcodec::DecoderBuilder;
16use pyo3::exceptions::PyFileNotFoundError;
17use pyo3::exceptions::PyIsADirectoryError;
18use pyo3::exceptions::PyOSError;
19use pyo3::exceptions::PyRuntimeError;
20use pyo3::exceptions::PyUnicodeError;
21use pyo3::exceptions::PyValueError;
22use pyo3::prelude::*;
23use pyo3::types::PyDict;
24use pyo3::types::PyList;
25use pyo3::types::PyString;
26use pyo3::PyTypeInfo;
27
28use self::pyfile::PyFileRead;
29use self::pyfile::PyFileReadWrapper;
30use self::pyfile::PyFileWrite;
31use self::pyfile::PyFileWriteWrapper;
32
33#[allow(dead_code)]
34mod build {
35 include!(concat!(env!("OUT_DIR"), "/built.rs"));
36}
37
38fn convert_error(_py: Python, error: nafcodec::error::Error, path: Option<&str>) -> PyErr {
40 use nafcodec::error::Error;
41
42 match error {
43 Error::Utf8(_utf8_error) => PyUnicodeError::new_err("failed to decode UTF-8 data"),
44 Error::Nom(nom_error) => {
45 PyValueError::new_err(format!("parser failed: {:?}", nom_error.code))
46 }
47 Error::MissingField(field) => {
48 PyValueError::new_err(format!("missing record field: {:?}", field))
49 }
50 Error::InvalidLength => PyValueError::new_err("inconsistent sequence length"),
51 Error::InvalidSequence => PyValueError::new_err("invalid characters found in sequence"),
52 Error::Io(io_error) => {
53 let desc = io_error.to_string();
54 if let Some(p) = path.map(str::to_string) {
55 match io_error.raw_os_error() {
56 Some(2) => PyFileNotFoundError::new_err((p,)),
57 #[cfg(target_os = "windows")]
58 Some(3) => PyFileNotFoundError::new_err((p,)),
59 #[cfg(not(target_os = "windows"))]
60 Some(21) => PyIsADirectoryError::new_err((p,)),
61 Some(code) => PyOSError::new_err((code, desc, p)),
62 None => PyOSError::new_err((desc,)),
63 }
64 } else {
65 match io_error.raw_os_error() {
66 Some(2) => PyFileNotFoundError::new_err((desc,)),
67 #[cfg(target_os = "windows")]
68 Some(3) => PyFileNotFoundError::new_err((desc,)),
69 #[cfg(not(target_os = "windows"))]
70 Some(21) => PyIsADirectoryError::new_err((desc,)),
71 Some(code) => PyOSError::new_err((code, desc)),
72 None => PyOSError::new_err((desc,)),
73 }
74 }
75 }
76 }
77}
78
79pub struct SequenceType(nafcodec::SequenceType);
82
83impl<'py> FromPyObject<'py> for SequenceType {
84 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
85 let py = ob.py();
86 match ob.downcast::<PyString>()?.to_string_lossy().as_ref() {
87 "dna" => Ok(SequenceType(nafcodec::SequenceType::Dna)),
88 "rna" => Ok(SequenceType(nafcodec::SequenceType::Rna)),
89 "protein" => Ok(SequenceType(nafcodec::SequenceType::Protein)),
90 "text" => Ok(SequenceType(nafcodec::SequenceType::Text)),
91 other => {
92 let msg = PyString::new(py, "expected 'dna', 'rna', 'protein' or 'text', got {!r}")
93 .call_method1("format", (other,))?
94 .unbind()
95 .into_any();
96 Err(PyValueError::new_err(msg))
97 }
98 }
99 }
100}
101
102impl<'py> IntoPyObject<'py> for SequenceType {
103 type Target = PyString;
104 type Output = Bound<'py, Self::Target>;
105 type Error = Infallible;
106
107 fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {
108 let tag = match self.0 {
109 nafcodec::SequenceType::Dna => pyo3::intern!(py, "dna"),
110 nafcodec::SequenceType::Rna => pyo3::intern!(py, "rna"),
111 nafcodec::SequenceType::Protein => pyo3::intern!(py, "protein"),
112 nafcodec::SequenceType::Text => pyo3::intern!(py, "text"),
113 };
114 Ok(tag.clone())
115 }
116}
117
118impl From<nafcodec::SequenceType> for SequenceType {
119 fn from(ty: nafcodec::SequenceType) -> Self {
120 Self(ty)
121 }
122}
123
124impl From<SequenceType> for nafcodec::SequenceType {
125 fn from(ty: SequenceType) -> Self {
126 ty.0
127 }
128}
129
130#[derive(Clone, Copy, PartialEq)]
133pub enum OpenMode {
134 Read,
135 Write,
136}
137
138impl<'py> FromPyObject<'py> for OpenMode {
139 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
140 let py = ob.py();
141 match ob.downcast::<PyString>()?.to_string_lossy().as_ref() {
142 "r" => Ok(OpenMode::Read),
143 "w" => Ok(OpenMode::Write),
144 other => {
145 let msg = PyString::new(py, "expected 'r' or 'w', got {!r}")
146 .call_method1("format", (other,))?
147 .unbind()
148 .into_any();
149 Err(PyValueError::new_err(msg))
150 }
151 }
152 }
153}
154
155#[pyclass(module = "nafcodec")]
159#[derive(Clone, Debug)]
160pub struct Record {
161 #[pyo3(get, set)]
163 id: Option<Py<PyString>>,
164 #[pyo3(get, set)]
166 comment: Option<Py<PyString>>,
167 #[pyo3(get, set)]
169 sequence: Option<Py<PyString>>,
170 #[pyo3(get, set)]
172 quality: Option<Py<PyString>>,
173 #[pyo3(get, set)]
175 length: Option<u64>,
176}
177
178impl Record {
179 pub fn from_py<'py>(py: Python<'py>, record: nafcodec::Record) -> Self {
180 let id = record.id.map(|x| PyString::new(py, &x).into());
181 let sequence = record.sequence.map(|x| PyString::new(py, &x).into());
182 let comment = record.comment.map(|x| PyString::new(py, &x).into());
183 let quality = record.quality.map(|x| PyString::new(py, &x).into());
184 let length = record.length;
185 Self {
186 id,
187 sequence,
188 comment,
189 quality,
190 length,
191 }
192 }
193}
194
195#[pymethods]
196impl Record {
197 #[new]
198 #[pyo3(signature = (*, id=None, comment=None, sequence=None, quality=None, length=None))]
199 fn __init__<'py>(
200 py: Python<'py>,
201 id: Option<Py<PyString>>,
202 comment: Option<Py<PyString>>,
203 sequence: Option<Py<PyString>>,
204 quality: Option<Py<PyString>>,
205 mut length: Option<u64>,
206 ) -> PyResult<PyClassInitializer<Self>> {
207 if let Some(seq) = sequence.as_ref() {
209 if let Some(qual) = quality.as_ref() {
210 if seq.bind(py).len()? != qual.bind(py).len()? {
211 return Err(PyValueError::new_err(
212 "lengths of sequence and quality don't match",
213 ));
214 }
215 }
216 if let Some(&l) = length.as_ref() {
217 if seq.bind(py).len()? != l as usize {
218 return Err(PyValueError::new_err(
219 "length of sequence and record length don't match",
220 ));
221 }
222 } else {
223 length = Some(seq.bind(py).len()? as u64);
224 }
225 }
226 if let Some(qual) = quality.as_ref() {
227 if let Some(&l) = length.as_ref() {
228 if qual.bind(py).len()? != l as usize {
229 return Err(PyValueError::new_err(
230 "length of quality and record length don't match",
231 ));
232 }
233 } else {
234 length = Some(qual.bind(py).len()? as u64);
235 }
236 }
237
238 Ok(PyClassInitializer::from(Record {
239 id,
240 comment,
241 sequence,
242 quality,
243 length,
244 }))
245 }
246
247 fn __repr__<'py>(slf: &Bound<'py, Self>) -> PyResult<Bound<'py, PyAny>> {
248 let py = slf.py();
249 let format = pyo3::intern!(py, "format");
250 let args = PyList::empty(py);
251 let record = slf.borrow();
252 if let Some(id) = &record.id {
253 args.append(pyo3::intern!(py, "id={!r}").call_method1(format, (id,))?)?;
254 }
255 if let Some(comment) = &record.comment {
256 args.append(pyo3::intern!(py, "comment={!r}").call_method1(format, (comment,))?)?;
257 }
258 if let Some(sequence) = &record.sequence {
259 args.append(pyo3::intern!(py, "sequence={!r}").call_method1(format, (sequence,))?)?;
260 }
261 if let Some(quality) = &record.quality {
262 args.append(pyo3::intern!(py, "quality={!r}").call_method1(format, (quality,))?)?;
263 }
264 if let Some(length) = &record.length {
265 args.append(format!("length={}", length))?;
266 }
267 pyo3::intern!(py, "{}({})").call_method1(
268 format,
269 (
270 slf.get_type().name()?,
271 pyo3::intern!(py, ", ").call_method1("join", (args,))?,
272 ),
273 )
274 }
275}
276
277impl TryFrom<&Record> for nafcodec::Record<'static> {
278 type Error = PyErr;
279 fn try_from(value: &Record) -> Result<Self, PyErr> {
280 Python::with_gil(|py| {
281 let id = value
282 .id
283 .as_ref()
284 .map(|s| s.to_str(py))
285 .transpose()?
286 .map(String::from)
287 .map(Cow::Owned);
288 let comment = value
289 .comment
290 .as_ref()
291 .map(|s| s.to_str(py))
292 .transpose()?
293 .map(String::from)
294 .map(Cow::Owned);
295 let sequence = value
296 .sequence
297 .as_ref()
298 .map(|s| s.to_str(py))
299 .transpose()?
300 .map(String::from)
301 .map(Cow::Owned);
302 let quality = value
303 .quality
304 .as_ref()
305 .map(|s| s.to_str(py))
306 .transpose()?
307 .map(String::from)
308 .map(Cow::Owned);
309 let length = value.length.clone();
310 Ok(nafcodec::Record {
311 id,
312 comment,
313 sequence,
314 quality,
315 length,
316 })
317 })
318 }
319}
320
321#[pyclass(module = "nafcodec")]
325pub struct Decoder {
326 decoder: nafcodec::Decoder<'static, BufReader<PyFileReadWrapper>>,
327}
328
329#[pymethods]
330impl Decoder {
331 #[new]
332 #[pyo3(signature = (file, *, id=true, comment=true, sequence=true, quality=true, mask=true, buffer_size=None))]
333 pub fn __init__<'py>(
334 file: Bound<'py, PyAny>,
335 id: bool,
336 comment: bool,
337 sequence: bool,
338 quality: bool,
339 mask: bool,
340 buffer_size: Option<usize>,
341 ) -> PyResult<PyClassInitializer<Self>> {
342 let py = file.py();
343
344 let mut builder = DecoderBuilder::new();
345 builder.id(id);
346 builder.comment(comment);
347 builder.sequence(sequence);
348 builder.quality(quality);
349 builder.mask(mask);
350 builder.buffer_size(buffer_size.map(Ok).unwrap_or_else(|| {
351 py.import(pyo3::intern!(py, "io"))?
352 .getattr(pyo3::intern!(py, "DEFAULT_BUFFER_SIZE"))?
353 .extract::<usize>()
354 })?);
355
356 let decoder = match PyFileRead::from_ref(&file) {
357 Ok(handle) => {
358 let wrapper = PyFileReadWrapper::PyFile(handle);
359 builder
360 .with_reader(std::io::BufReader::new(wrapper))
361 .map_err(|e| convert_error(py, e, None))?
362 }
363 Err(_e) => {
364 let path = py
365 .import("os")?
366 .call_method1(pyo3::intern!(py, "fspath"), (file,))?
367 .extract::<Bound<'_, PyString>>()?;
368 let path_str = path.to_str()?;
369 let wrapper = std::fs::File::open(path_str)
370 .map_err(nafcodec::error::Error::Io)
371 .map_err(|e| convert_error(py, e, Some(path_str)))
372 .map(PyFileReadWrapper::File)?;
373 builder
374 .with_reader(std::io::BufReader::new(wrapper))
375 .map_err(|e| convert_error(py, e, Some(path_str)))?
376 }
377 };
378
379 Ok(Decoder { decoder }.into())
380 }
381
382 pub fn __iter__(slf: PyRef<'_, Self>) -> PyResult<PyRef<'_, Self>> {
383 Ok(slf)
384 }
385
386 pub fn __len__(slf: PyRef<'_, Self>) -> PyResult<usize> {
387 Ok(slf.decoder.len())
388 }
389
390 pub fn __next__(mut slf: PyRefMut<'_, Self>) -> PyResult<Option<Record>> {
391 let py = slf.py();
392 let result = slf.deref_mut().decoder.next().transpose();
393 match result {
394 Ok(None) => Ok(None),
395 Ok(Some(record)) => Ok(Some(Record::from_py(py, record))),
396 Err(e) => Err(convert_error(py, e, None)),
397 }
398 }
399
400 pub fn __enter__<'py>(slf: PyRef<'py, Self>) -> PyRef<'py, Self> {
401 slf
402 }
403
404 #[allow(unused)]
405 pub fn __exit__<'py>(
406 slf: PyRefMut<'py, Self>,
407 exc_type: Bound<'py, PyAny>,
408 exc_value: Bound<'py, PyAny>,
409 traceback: Bound<'py, PyAny>,
410 ) -> PyResult<bool> {
411 Ok(false)
412 }
413
414 #[getter]
416 pub fn sequence_type(slf: PyRef<'_, Self>) -> SequenceType {
417 SequenceType(slf.decoder.sequence_type())
418 }
419
420 #[getter]
422 pub fn format_version(slf: PyRef<'_, Self>) -> &Bound<'_, PyString> {
423 use nafcodec::FormatVersion;
424 let py = slf.py();
425 match slf.decoder.header().format_version() {
426 FormatVersion::V1 => pyo3::intern!(py, "v1"),
427 FormatVersion::V2 => pyo3::intern!(py, "v2"),
428 }
429 }
430
431 #[getter]
433 pub fn line_length(slf: PyRef<'_, Self>) -> u64 {
434 slf.decoder.header().line_length()
435 }
436
437 #[getter]
439 pub fn name_separator(slf: PyRef<'_, Self>) -> char {
440 slf.decoder.header().name_separator()
441 }
442
443 #[getter]
445 pub fn number_of_sequences(slf: PyRef<'_, Self>) -> u64 {
446 slf.decoder.header().number_of_sequences()
447 }
448
449 pub fn read(mut slf: PyRefMut<'_, Self>) -> PyResult<Option<Record>> {
453 let py = slf.py();
454 let result = slf.deref_mut().decoder.next().transpose();
455 match result {
456 Ok(None) => Ok(None),
457 Ok(Some(record)) => Ok(Some(Record::from_py(py, record))),
458 Err(e) => Err(convert_error(py, e, None)),
459 }
460 }
461}
462
463#[pyclass(module = "nafcodec")]
467pub struct Encoder {
468 encoder: Option<nafcodec::Encoder<'static, nafcodec::Memory>>,
469 file: PyFileWriteWrapper,
470}
471
472#[pymethods]
473impl Encoder {
474 #[new]
475 #[pyo3(signature=(
476 file,
477 sequence_type=SequenceType(nafcodec::SequenceType::Dna),
478 *,
479 id = false,
480 comment = false,
481 sequence = false,
482 quality = false,
483 compression_level = 0,
484 ))]
485 pub fn __init__<'py>(
486 file: Bound<'py, PyAny>,
487 sequence_type: SequenceType,
488 id: bool,
489 comment: bool,
490 sequence: bool,
491 quality: bool,
492 compression_level: i32,
493 ) -> PyResult<PyClassInitializer<Self>> {
494 let py = file.py();
495 let file = match PyFileWrite::from_ref(&file) {
496 Ok(handle) => PyFileWriteWrapper::PyFile(handle),
497 Err(_e) => {
498 let path = py
499 .import("os")?
500 .call_method1(pyo3::intern!(py, "fspath"), (file,))?
501 .extract::<Bound<'_, PyString>>()?;
502 let path_str = path.to_str()?;
503 std::fs::File::create(path_str)
504 .map_err(nafcodec::error::Error::Io)
505 .map_err(|e| convert_error(py, e, Some(path_str)))
506 .map(PyFileWriteWrapper::File)?
507 }
508 };
509 let encoder = nafcodec::EncoderBuilder::new(sequence_type.0)
510 .id(id)
511 .comment(comment)
512 .quality(quality)
513 .sequence(sequence)
514 .compression_level(compression_level)
515 .with_memory()
516 .map(Some)
517 .map_err(|e| convert_error(py, e, None))?;
518 Ok(Self { file, encoder }.into())
519 }
520
521 pub fn __enter__<'py>(slf: PyRef<'py, Self>) -> PyRef<'py, Self> {
522 slf
523 }
524
525 #[allow(unused)]
526 pub fn __exit__<'py>(
527 slf: PyRefMut<'py, Self>,
528 exc_type: Bound<'py, PyAny>,
529 exc_value: Bound<'py, PyAny>,
530 traceback: Bound<'py, PyAny>,
531 ) -> PyResult<bool> {
532 Encoder::close(slf)?;
533 Ok(false)
534 }
535
536 pub fn write<'py>(mut slf: PyRefMut<'py, Self>, record: &'py Record) -> PyResult<()> {
537 let py = slf.py();
538
539 macro_rules! borrow_field {
557 ($field:ident) => {
558 #[allow(unused_assignments)]
559 let mut borrowed = None;
560 let mut $field = None;
561 if let Some(x) = record.$field.as_ref() {
562 let s = x.bind(py);
563 let b = s.as_borrowed();
564 borrowed = Some(b);
565 $field = borrowed.as_ref().map(|b| b.to_cow()).transpose()?;
566 }
567 };
568 }
569
570 if let Some(encoder) = slf.encoder.as_mut() {
571 borrow_field!(id);
572 borrow_field!(comment);
573 borrow_field!(sequence);
574 borrow_field!(quality);
575 let r = nafcodec::Record {
576 id,
577 comment,
578 sequence,
579 quality,
580 length: record.length.clone(),
581 };
582 encoder.push(&r).map_err(|err| convert_error(py, err, None))
583 } else {
584 Err(PyRuntimeError::new_err("operation on closed encoder."))
585 }
586 }
587
588 pub fn close<'py>(mut slf: PyRefMut<'py, Self>) -> PyResult<()> {
589 let py = slf.py();
590 if let Some(encoder) = slf.encoder.take() {
591 encoder
592 .write(&mut slf.file)
593 .map_err(|e| convert_error(py, e, None))?;
594 }
595 Ok(())
596 }
597}
598
599#[pymodule]
601#[pyo3(name = "lib")]
602pub fn init<'py>(py: Python<'py>, m: &Bound<'py, PyModule>) -> PyResult<()> {
603 m.add("__package__", "nafcodec")?;
604 m.add("__version__", env!("CARGO_PKG_VERSION"))?;
605 m.add("__author__", env!("CARGO_PKG_AUTHORS").replace(':', "\n"))?;
606 m.add("__build__", pyo3_built!(py, build))?;
607
608 m.add_class::<Decoder>()?;
609 m.add_class::<Encoder>()?;
610 m.add_class::<Record>()?;
611
612 #[pyfn(m)]
642 #[pyo3(signature = (file, mode = OpenMode::Read, **options))]
643 fn open<'py>(
644 file: &Bound<'py, PyAny>,
645 mode: OpenMode,
646 options: Option<&Bound<'py, PyDict>>,
647 ) -> PyResult<Bound<'py, PyAny>> {
648 let py = file.py();
649 match mode {
650 OpenMode::Read => Decoder::type_object(py).call((file,), options),
651 OpenMode::Write => Encoder::type_object(py).call((file,), options),
652 }
653 }
654
655 Ok(())
656}