d4/index/
sfi.rs

1use std::{
2    io::{Error, ErrorKind, Read, Result, Seek},
3    {fs::File, io::Write},
4};
5
6use d4_framefile::{Directory, Stream};
7
8use crate::{
9    stab::{
10        RangeRecord, Record, RecordBlockParsingState, SparseArraryMetadata,
11        SECONDARY_TABLE_METADATA_NAME, SECONDARY_TABLE_NAME,
12    },
13    Header,
14};
15
16#[derive(Clone, Copy)]
17#[repr(packed)]
18struct FrameIndexEntry {
19    chrom_id: u32,
20    start_pos: u32,
21    end_pos: u32,
22    offset: u64,
23    frame_size: u64,
24    record_offset: u8,
25    first_frame: bool,
26}
27
28#[derive(Clone, Copy)]
29pub struct RecordFrameAddress {
30    pub frame_offset: u64,
31    pub frame_size: usize,
32    pub record_offset: usize,
33    pub first_frame: bool,
34}
35
36impl RecordFrameAddress {
37    pub fn open_stream<T: Seek + Read>(&self, stab_root: &Directory<T>) -> Result<Stream<T>> {
38        stab_root.open_stream_by_offset(self.frame_offset, self.frame_size)
39    }
40    fn from_frame_index_entry(entry: &FrameIndexEntry) -> Self {
41        Self {
42            frame_offset: entry.offset,
43            frame_size: entry.frame_size as usize,
44            record_offset: entry.record_offset as usize,
45            first_frame: entry.first_frame,
46        }
47    }
48}
49
50impl FrameIndexEntry {
51    fn ensure_byte_odering(&mut self) {
52        self.chrom_id = self.chrom_id.to_le();
53        self.start_pos = self.start_pos.to_le();
54        self.end_pos = self.end_pos.to_le();
55        self.offset = self.offset.to_le();
56        self.frame_size = self.frame_size.to_le();
57        self.record_offset = self.record_offset.to_le();
58    }
59}
60
61pub struct SecondaryFrameIndex {
62    header: Header,
63    items: Vec<FrameIndexEntry>,
64}
65impl SecondaryFrameIndex {
66    pub const STREAM_NAME: &'static str = "secondary_frame_index";
67
68    pub fn print_secondary_table_index<W: Write>(&self, mut writer: W) -> Result<()> {
69        let chrom_list = self.header.chrom_list();
70        for item in self.items.iter() {
71            let chr_name = chrom_list[item.chrom_id as usize].name.as_str();
72            writeln!(
73                writer,
74                "{is_head}{offset:8x}({size:4x})+{rec_ofs}\t{chr_name}:{begin}-{end}",
75                chr_name = chr_name,
76                begin = { item.start_pos },
77                end = { item.end_pos },
78                offset = { item.offset },
79                size = { item.frame_size },
80                rec_ofs = item.record_offset,
81                is_head = if item.first_frame { "H" } else { "T" },
82            )?;
83        }
84        Ok(())
85    }
86
87    pub fn find_partial_seconary_table(
88        &self,
89        chr: &str,
90        from: u32,
91    ) -> Result<Option<RecordFrameAddress>> {
92        if let Some((chr_id, _)) = self
93            .header
94            .chrom_list()
95            .iter()
96            .enumerate()
97            .find(|(_, c)| &c.name == chr)
98        {
99            let ret = match self
100                .items
101                .binary_search_by_key(&(chr_id as u32, from), |item| {
102                    (item.chrom_id, item.start_pos)
103                }) {
104                Ok(idx) => RecordFrameAddress::from_frame_index_entry(&self.items[idx]),
105                Err(idx) if !self.items.is_empty() => {
106                    let prev_idx = if idx > 0 { idx - 1 } else { 0 };
107                    if  self.items[prev_idx].chrom_id == chr_id as u32 {
108                        RecordFrameAddress::from_frame_index_entry(&self.items[prev_idx])
109                    } else {
110                        return Ok(None);
111                    }
112                }
113                _ => {
114                    return Ok(None);
115                }
116            };
117            Ok(Some(ret))
118        } else {
119            Ok(None)
120        }
121    }
122
123    pub(crate) fn from_reader<R: Read>(mut reader: R, header: Header) -> Result<Self> {
124        let mut size_buf = [0; std::mem::size_of::<u64>()];
125        reader.read_exact(&mut size_buf)?;
126        let size = u64::from_le_bytes(size_buf) as usize;
127        let mut ret = SecondaryFrameIndex {
128            items: Vec::with_capacity(size),
129            header,
130        };
131
132        let mut buffer = vec![0; std::mem::size_of::<FrameIndexEntry>() * size];
133        reader.read_exact(&mut buffer)?;
134        let items = unsafe {
135            std::slice::from_raw_parts_mut(buffer.as_mut_ptr() as *mut FrameIndexEntry, size)
136        };
137        items.iter_mut().for_each(|item| item.ensure_byte_odering());
138        ret.items.extend_from_slice(items);
139
140        Ok(ret)
141    }
142    pub(crate) fn write<W: Write>(&self, mut out: W) -> Result<()> {
143        out.write_all(&(self.items.len() as u64).to_le_bytes())?;
144        for item in self.items.iter() {
145            let mut item = *item;
146            item.ensure_byte_odering();
147            let bytes = unsafe {
148                std::slice::from_raw_parts(
149                    &item as *const _ as *const u8,
150                    std::mem::size_of::<FrameIndexEntry>(),
151                )
152            };
153            out.write_all(bytes)?;
154        }
155        Ok(())
156    }
157    pub(crate) fn get_blob_size(&self) -> usize {
158        std::mem::size_of::<FrameIndexEntry>() * self.items.len() + std::mem::size_of::<usize>()
159    }
160    pub(crate) fn from_data_track(track_root: &Directory<File>) -> Result<Self> {
161        let header = Header::read(track_root.open_stream(Header::HEADER_STREAM_NAME)?)?;
162        let stab_root = track_root.map_directory(SECONDARY_TABLE_NAME)?;
163        let stab_metadata = {
164            let stream = stab_root
165                .open_stream(SECONDARY_TABLE_METADATA_NAME)
166                .unwrap();
167            let mut stream_content = Vec::new();
168            stream.copy_content(&mut stream_content);
169            let raw_metadata = String::from_utf8_lossy(&stream_content);
170            serde_json::from_str::<SparseArraryMetadata>(raw_metadata.trim_end_matches('\0'))
171                .map_err(|e| Error::new(ErrorKind::Other, e.to_string()))?
172        };
173
174        let mut items = Vec::<FrameIndexEntry>::new();
175        let chrom_list = header.chrom_list();
176
177        for stream in stab_metadata.streams() {
178            let chrom_id = if let Some((idx, _)) = chrom_list
179                .iter()
180                .enumerate()
181                .find(|(_, c)| c.name == stream.chr)
182            {
183                idx as u32
184            } else {
185                return Err(Error::new(ErrorKind::Other, "No such chrom"));
186            };
187            let data_stream = stab_root.open_stream(&stream.id).unwrap();
188            let mut frame = data_stream.get_primary_frame();
189            let mut state =
190                RecordBlockParsingState::<RangeRecord>::new(stab_metadata.compression());
191            let mut parsing_buf = vec![];
192            let mut first_frame = true;
193            loop {
194                let offset = unsafe { frame.offset_from(stab_root.get_base_addr()) };
195                assert!(offset >= 0);
196                let offset = offset as u64;
197
198                parsing_buf.clear();
199                let rec_offset = state.first_record_offset();
200                state.parse_frame(frame.as_ref(), &mut parsing_buf);
201                let frame_size = std::mem::size_of_val(frame);
202
203                let has_next_frame = if let Some(next_frame) = frame.next_frame() {
204                    frame = next_frame;
205                    true
206                } else {
207                    false
208                };
209
210                for (id, block) in parsing_buf.iter().enumerate() {
211                    let mut recs = block.as_ref();
212                    if !has_next_frame {
213                        while let Some(last) = recs.last() {
214                            if !last.is_valid() {
215                                recs = &recs[..recs.len() - 1];
216                            } else {
217                                break;
218                            }
219                        }
220                    }
221                    if !recs.is_empty() {
222                        if id == 0 && block.is_single_record() {
223                            if let Some(last_entry) = items.last_mut() {
224                                last_entry.end_pos = recs[0].effective_range().1;
225                            }
226                        } else {
227                            items.push(FrameIndexEntry {
228                                chrom_id,
229                                start_pos: recs[0].effective_range().0,
230                                end_pos: recs[recs.len() - 1].effective_range().1,
231                                offset,
232                                frame_size: frame_size as u64,
233                                record_offset: rec_offset as u8,
234                                first_frame,
235                            });
236                        }
237                    } else {
238                        break;
239                    }
240                }
241
242                if !has_next_frame {
243                    break;
244                }
245                first_frame = false;
246            }
247        }
248
249        Ok(Self { items, header })
250    }
251}