1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
//! VCF/BCF writing with type-safe alleles and zero-allocation encoding.
//!
//! Use [`Writer`] with [`OutputFormat`] for all VCF/BCF output:
//! - `.vcf` → plain text VCF
//! - `.vcf.gz` → BGZF-compressed VCF with automatic TBI index co-production
//! - `.bcf` → BCF binary with automatic CSI index co-production
//!
//! Records are encoded through a typestate [`RecordEncoder`] that enforces
//! the correct field ordering at compile time.
//!
//! A single [`Writer`] type handles all output formats (VCF, VCF.gz, BCF).
//! Records are encoded through a typestate chain that is enforced at compile time:
//! `Begun` → `Filtered` → `WithSamples` → `emit()`.
//!
//! # Writing VCF records
//!
//! ```
//! use seqair_types::{Base, Pos1};
//! use seqair::vcf::{
//! Alleles, ContigDef, Genotype, Number, OutputFormat, ValueType, VcfHeader, Writer, FormatGt, FormatInt, InfoInt,
//! record_encoder::{FormatFieldDef, Gt, InfoFieldDef, Scalar},
//! };
//! use std::sync::Arc;
//!
//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
//! // 1. Build header — typed field handles are resolved once at setup, not per record.
//! // In production use VcfHeaderBuilder::from_bam_header() to copy contig info.
//! let mut builder = VcfHeader::builder();
//! let chr1 = builder.register_contig("chr1", ContigDef { length: Some(248_956_422) })?;
//! let mut builder = builder.infos();
//! let dp_info: InfoInt = builder.register_info(
//! &InfoFieldDef::<Scalar<i32>>::new("DP", Number::Count(1), ValueType::Integer, "Total read depth")
//! )?;
//! let mut builder = builder.formats();
//! let gt_fmt: FormatGt = builder.register_format(
//! &FormatFieldDef::<Gt>::new("GT", Number::Count(1), ValueType::String, "Genotype")
//! )?;
//! let dp_fmt: FormatInt = builder.register_format(
//! &FormatFieldDef::<Scalar<i32>>::new("DP", Number::Count(1), ValueType::Integer, "Sample depth")
//! )?;
//! let mut builder = builder.samples();
//! builder.add_sample("sample1")?;
//! let header = Arc::new(builder.build()?);
//!
//! // 2. Write to an in-memory buffer (or any `impl Write`)
//! let mut output = Vec::new();
//! let writer = Writer::new(&mut output, OutputFormat::Vcf);
//! let mut writer = writer.write_header(&header)?;
//!
//! // 3. Encode one record — the typestate chain is enforced at compile time
//! let alleles = Alleles::snv(Base::A, Base::T)?;
//! let enc = writer.begin_record(&chr1, Pos1::new(12345).unwrap(), &alleles, Some(30.0))?;
//! let mut enc = enc.filter_pass(); // Begun → Filtered
//! dp_info.encode(&mut enc, 50);
//! let mut enc = enc.begin_samples(); // Filtered → WithSamples
//! gt_fmt.encode(&mut enc, &[Genotype::unphased(0, 1)])?;
//! dp_fmt.encode(&mut enc, &[45])?;
//! enc.emit()?;
//!
//! writer.finish()?;
//! # let vcf = String::from_utf8(output)?;
//! # assert!(vcf.contains("chr1\t12345\t.\tA\tT\t30\tPASS\tDP=50"));
//! # Ok(())
//! # }
//! ```
//!
//! # Writing BCF (binary VCF)
//!
//! Switch [`OutputFormat::Vcf`] to [`OutputFormat::Bcf`] — the encoding API is
//! identical. Pre-resolved handles perform direct BCF encoding with no per-record
//! allocations or string dictionary lookups.
//!
//! ```
//! use seqair_types::{Base, Pos1};
//! use seqair::vcf::{
//! Alleles, ContigDef, Genotype, Number, OutputFormat, ValueType, VcfHeader, Writer, FormatGt, InfoInt,
//! record_encoder::{FormatFieldDef, Gt, InfoFieldDef, Scalar}
//! };
//! use std::sync::Arc;
//!
//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
//! let mut builder = VcfHeader::builder();
//! let contig = builder.register_contig("chr1", ContigDef { length: Some(1000) })?;
//! let mut builder = builder.infos();
//! let dp: InfoInt = builder.register_info(
//! &InfoFieldDef::<Scalar<i32>>::new("DP", Number::Count(1), ValueType::Integer, "Depth")
//! )?;
//! let mut builder = builder.formats();
//! let gt: FormatGt = builder.register_format(
//! &FormatFieldDef::<Gt>::new("GT", Number::Count(1), ValueType::String, "Genotype")
//! )?;
//! let mut builder = builder.samples();
//! builder.add_sample("sample1")?;
//! let header = Arc::new(builder.build()?);
//!
//! let mut buf = Vec::new();
//! let writer = Writer::new(&mut buf, OutputFormat::Bcf);
//! let mut writer = writer.write_header(&header)?;
//!
//! let alleles = Alleles::snv(Base::A, Base::T)?;
//! let enc = writer.begin_record(&contig, Pos1::new(100).unwrap(), &alleles, Some(30.0))?;
//! let mut enc = enc.filter_pass();
//! dp.encode(&mut enc, 50);
//! let mut enc = enc.begin_samples();
//! gt.encode(&mut enc, &[Genotype::unphased(0, 1)])?;
//! enc.emit()?;
//!
//! writer.finish()?;
//! # assert!(!buf.is_empty());
//! # Ok(())
//! # }
//! ```
//!
//! # Custom fields and collecting field defs in a struct
//!
//! You can "co-locate" the field definitions with custom fields.
//!
//! ```rust
//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
//! use seqair_types::{Base, Pos1};
//! use seqair::vcf::{self, EncodeInfo as _};
//!
//! // define type
//! struct Depth(i32);
//! impl Depth {
//! // header metadata
//! const DEF: vcf::InfoFieldDef<vcf::Scalar<i32>> = vcf::InfoFieldDef::new(
//! "DP", vcf::Number::Count(1), vcf::ValueType::Integer, "Combined depth"
//! );
//! }
//! impl vcf::EncodeInfo for Depth {
//! type Key = vcf::InfoInt;
//! // custom encoding logic
//! fn encode_info(&self, enc: &mut dyn vcf::InfoEncoder, key: &Self::Key) {
//! key.encode(enc, self.0);
//! }
//! }
//!
//! // define all fields your app will use
//! struct MyInfoFields {
//! depth: vcf::InfoInt,
//! }
//!
//! // default header setup
//! let mut builder = vcf::VcfHeader::builder();
//! let contig =
//! builder.register_contig("chr1", vcf::ContigDef { length: Some(1000) })?;
//! let mut builder = builder.infos();
//! // register our field
//! let depth_key = builder.register_info(&Depth::DEF)?;
//! let header = builder.build()?;
//! // collect fields
//! let my_fields = MyInfoFields { depth: depth_key };
//!
//! let mut buf = Vec::new();
//! let writer = vcf::Writer::new(&mut buf, vcf::OutputFormat::Bcf);
//!
//! // start by writing header
//! let mut writer = writer.write_header(&header)?;
//!
//! // new record
//! let mut enc = writer.begin_record(&contig,
//! Pos1::new(1).ok_or("invalid position")?,
//! &vcf::Alleles::snv(Base::A, Base::T).unwrap(),
//! None
//! )?;
//! // PASS
//! let mut enc = enc.filter_pass();
//! // add custom info field
//! my_fields.depth.encode(&mut enc, Depth(30).0);
//! // …or like this (btw, setting the field twice overwrites it)
//! Depth(30).encode_info(&mut enc, &my_fields.depth);
//! // write record
//! enc.emit()?;
//!
//! writer.finish()?;
//! # assert!(!buf.is_empty());
//! # Ok(()) }
//! ```
pub
pub
pub
pub use Alleles;
pub use ;
pub use ;
pub use Genotype;
pub use ;
pub use ;
/// Output format for [`Writer`].