1use std::{
2 io::{Error, ErrorKind, Read, Result, Seek},
3 {fs::File, io::Write},
4};
5
6use d4_framefile::{Directory, Stream};
7
8use crate::{
9 stab::{
10 RangeRecord, Record, RecordBlockParsingState, SparseArraryMetadata,
11 SECONDARY_TABLE_METADATA_NAME, SECONDARY_TABLE_NAME,
12 },
13 Header,
14};
15
16#[derive(Clone, Copy)]
17#[repr(packed)]
18struct FrameIndexEntry {
19 chrom_id: u32,
20 start_pos: u32,
21 end_pos: u32,
22 offset: u64,
23 frame_size: u64,
24 record_offset: u8,
25 first_frame: bool,
26}
27
28#[derive(Clone, Copy)]
29pub struct RecordFrameAddress {
30 pub frame_offset: u64,
31 pub frame_size: usize,
32 pub record_offset: usize,
33 pub first_frame: bool,
34}
35
36impl RecordFrameAddress {
37 pub fn open_stream<T: Seek + Read>(&self, stab_root: &Directory<T>) -> Result<Stream<T>> {
38 stab_root.open_stream_by_offset(self.frame_offset, self.frame_size)
39 }
40 fn from_frame_index_entry(entry: &FrameIndexEntry) -> Self {
41 Self {
42 frame_offset: entry.offset,
43 frame_size: entry.frame_size as usize,
44 record_offset: entry.record_offset as usize,
45 first_frame: entry.first_frame,
46 }
47 }
48}
49
50impl FrameIndexEntry {
51 fn ensure_byte_odering(&mut self) {
52 self.chrom_id = self.chrom_id.to_le();
53 self.start_pos = self.start_pos.to_le();
54 self.end_pos = self.end_pos.to_le();
55 self.offset = self.offset.to_le();
56 self.frame_size = self.frame_size.to_le();
57 self.record_offset = self.record_offset.to_le();
58 }
59}
60
61pub struct SecondaryFrameIndex {
62 header: Header,
63 items: Vec<FrameIndexEntry>,
64}
65impl SecondaryFrameIndex {
66 pub const STREAM_NAME: &'static str = "secondary_frame_index";
67
68 pub fn print_secondary_table_index<W: Write>(&self, mut writer: W) -> Result<()> {
69 let chrom_list = self.header.chrom_list();
70 for item in self.items.iter() {
71 let chr_name = chrom_list[item.chrom_id as usize].name.as_str();
72 writeln!(
73 writer,
74 "{is_head}{offset:8x}({size:4x})+{rec_ofs}\t{chr_name}:{begin}-{end}",
75 chr_name = chr_name,
76 begin = { item.start_pos },
77 end = { item.end_pos },
78 offset = { item.offset },
79 size = { item.frame_size },
80 rec_ofs = item.record_offset,
81 is_head = if item.first_frame { "H" } else { "T" },
82 )?;
83 }
84 Ok(())
85 }
86
87 pub fn find_partial_seconary_table(
88 &self,
89 chr: &str,
90 from: u32,
91 ) -> Result<Option<RecordFrameAddress>> {
92 if let Some((chr_id, _)) = self
93 .header
94 .chrom_list()
95 .iter()
96 .enumerate()
97 .find(|(_, c)| &c.name == chr)
98 {
99 let ret = match self
100 .items
101 .binary_search_by_key(&(chr_id as u32, from), |item| {
102 (item.chrom_id, item.start_pos)
103 }) {
104 Ok(idx) => RecordFrameAddress::from_frame_index_entry(&self.items[idx]),
105 Err(idx) if !self.items.is_empty() => {
106 let prev_idx = if idx > 0 { idx - 1 } else { 0 };
107 if self.items[prev_idx].chrom_id == chr_id as u32 {
108 RecordFrameAddress::from_frame_index_entry(&self.items[prev_idx])
109 } else {
110 return Ok(None);
111 }
112 }
113 _ => {
114 return Ok(None);
115 }
116 };
117 Ok(Some(ret))
118 } else {
119 Ok(None)
120 }
121 }
122
123 pub(crate) fn from_reader<R: Read>(mut reader: R, header: Header) -> Result<Self> {
124 let mut size_buf = [0; std::mem::size_of::<u64>()];
125 reader.read_exact(&mut size_buf)?;
126 let size = u64::from_le_bytes(size_buf) as usize;
127 let mut ret = SecondaryFrameIndex {
128 items: Vec::with_capacity(size),
129 header,
130 };
131
132 let mut buffer = vec![0; std::mem::size_of::<FrameIndexEntry>() * size];
133 reader.read_exact(&mut buffer)?;
134 let items = unsafe {
135 std::slice::from_raw_parts_mut(buffer.as_mut_ptr() as *mut FrameIndexEntry, size)
136 };
137 items.iter_mut().for_each(|item| item.ensure_byte_odering());
138 ret.items.extend_from_slice(items);
139
140 Ok(ret)
141 }
142 pub(crate) fn write<W: Write>(&self, mut out: W) -> Result<()> {
143 out.write_all(&(self.items.len() as u64).to_le_bytes())?;
144 for item in self.items.iter() {
145 let mut item = *item;
146 item.ensure_byte_odering();
147 let bytes = unsafe {
148 std::slice::from_raw_parts(
149 &item as *const _ as *const u8,
150 std::mem::size_of::<FrameIndexEntry>(),
151 )
152 };
153 out.write_all(bytes)?;
154 }
155 Ok(())
156 }
157 pub(crate) fn get_blob_size(&self) -> usize {
158 std::mem::size_of::<FrameIndexEntry>() * self.items.len() + std::mem::size_of::<usize>()
159 }
160 pub(crate) fn from_data_track(track_root: &Directory<File>) -> Result<Self> {
161 let header = Header::read(track_root.open_stream(Header::HEADER_STREAM_NAME)?)?;
162 let stab_root = track_root.map_directory(SECONDARY_TABLE_NAME)?;
163 let stab_metadata = {
164 let stream = stab_root
165 .open_stream(SECONDARY_TABLE_METADATA_NAME)
166 .unwrap();
167 let mut stream_content = Vec::new();
168 stream.copy_content(&mut stream_content);
169 let raw_metadata = String::from_utf8_lossy(&stream_content);
170 serde_json::from_str::<SparseArraryMetadata>(raw_metadata.trim_end_matches('\0'))
171 .map_err(|e| Error::new(ErrorKind::Other, e.to_string()))?
172 };
173
174 let mut items = Vec::<FrameIndexEntry>::new();
175 let chrom_list = header.chrom_list();
176
177 for stream in stab_metadata.streams() {
178 let chrom_id = if let Some((idx, _)) = chrom_list
179 .iter()
180 .enumerate()
181 .find(|(_, c)| c.name == stream.chr)
182 {
183 idx as u32
184 } else {
185 return Err(Error::new(ErrorKind::Other, "No such chrom"));
186 };
187 let data_stream = stab_root.open_stream(&stream.id).unwrap();
188 let mut frame = data_stream.get_primary_frame();
189 let mut state =
190 RecordBlockParsingState::<RangeRecord>::new(stab_metadata.compression());
191 let mut parsing_buf = vec![];
192 let mut first_frame = true;
193 loop {
194 let offset = unsafe { frame.offset_from(stab_root.get_base_addr()) };
195 assert!(offset >= 0);
196 let offset = offset as u64;
197
198 parsing_buf.clear();
199 let rec_offset = state.first_record_offset();
200 state.parse_frame(frame.as_ref(), &mut parsing_buf);
201 let frame_size = std::mem::size_of_val(frame);
202
203 let has_next_frame = if let Some(next_frame) = frame.next_frame() {
204 frame = next_frame;
205 true
206 } else {
207 false
208 };
209
210 for (id, block) in parsing_buf.iter().enumerate() {
211 let mut recs = block.as_ref();
212 if !has_next_frame {
213 while let Some(last) = recs.last() {
214 if !last.is_valid() {
215 recs = &recs[..recs.len() - 1];
216 } else {
217 break;
218 }
219 }
220 }
221 if !recs.is_empty() {
222 if id == 0 && block.is_single_record() {
223 if let Some(last_entry) = items.last_mut() {
224 last_entry.end_pos = recs[0].effective_range().1;
225 }
226 } else {
227 items.push(FrameIndexEntry {
228 chrom_id,
229 start_pos: recs[0].effective_range().0,
230 end_pos: recs[recs.len() - 1].effective_range().1,
231 offset,
232 frame_size: frame_size as u64,
233 record_offset: rec_offset as u8,
234 first_frame,
235 });
236 }
237 } else {
238 break;
239 }
240 }
241
242 if !has_next_frame {
243 break;
244 }
245 first_frame = false;
246 }
247 }
248
249 Ok(Self { items, header })
250 }
251}