Skip to main content

patchouly_build/
extract.rs

1use std::{
2    collections::{BTreeMap, HashMap, hash_map::Entry},
3    error::Error,
4    fs::File,
5    io::{Read, Seek, SeekFrom},
6    ops::Range,
7    path::Path,
8};
9
10use memmap2::Mmap;
11use object::{
12    Object, ObjectSection, ObjectSymbol, Relocation, RelocationEncoding, RelocationKind,
13    RelocationTarget, Section, Symbol, SymbolKind, read::archive::ArchiveFile,
14};
15use patchouly_core::{
16    Stencil,
17    relocation::{
18        PatchKind, Relocation as StencilRelocation, RelocationEncoding as StencilRelocationEncoding,
19    },
20    stencils::{io_to_index, stencils_len},
21};
22use smallvec::SmallVec;
23
24use crate::structs::StencilArgs;
25
26const LIB_NAME_SYMBOL: &[u8] = b"__STENCIL_API_NAME";
27const STENCIL_FUNC_PREFIX: &str = "__patchouly__";
28
29enum FileContents {
30    Mmap(Mmap),
31    Vec(Vec<u8>),
32}
33impl FileContents {
34    fn open(path: &Path) -> Result<FileContents, Box<dyn Error>> {
35        let mut file = File::open(path)?;
36        if let Ok(mmap) = unsafe { Mmap::map(&file) } {
37            Ok(FileContents::Mmap(mmap))
38        } else {
39            let mut ar_data = Vec::with_capacity(file.metadata()?.len() as usize);
40            file.seek(SeekFrom::Start(0))?;
41            file.read_to_end(&mut ar_data)?;
42            Ok(FileContents::Vec(ar_data))
43        }
44    }
45    fn as_slice(&self) -> &[u8] {
46        match self {
47            FileContents::Mmap(mmap) => &mmap[..],
48            FileContents::Vec(data) => data.as_slice(),
49        }
50    }
51}
52
53#[allow(non_snake_case)]
54pub struct StencilFamilyBuild {
55    pub IN: usize,
56    pub OUT: usize,
57    pub MAX_REGS: usize,
58    pub HOLES: usize,
59    pub JUMPS: usize,
60    pub relocation_data: Vec<StencilRelocation>,
61    pub stencils: Vec<Stencil<0, 0, 0, 0>>,
62}
63struct StencilFamilyBuilder {
64    family: StencilFamilyBuild,
65    existing_relocations: HashMap<SmallVec<[StencilRelocation; 8]>, usize>,
66}
67impl StencilFamilyBuilder {
68    fn new(metadata: Metadata) -> Self {
69        let mut family = StencilFamilyBuild {
70            IN: metadata.inputs as usize,
71            OUT: metadata.outputs as usize,
72            MAX_REGS: metadata.max_regs as usize,
73            HOLES: metadata.holes as usize,
74            JUMPS: metadata.jumps as usize,
75            relocation_data: Default::default(),
76            stencils: vec![],
77        };
78        let mut new_len = stencils_len(family.IN, family.OUT, family.MAX_REGS);
79        if metadata.holes > 0 {
80            new_len *= 2;
81        }
82        family.stencils.resize(new_len, Default::default());
83        Self {
84            family,
85            existing_relocations: Default::default(),
86        }
87    }
88
89    fn add_stencil(
90        &mut self,
91        name: &str,
92        code: Range<usize>,
93        io: StencilArgs,
94        wide: bool,
95        relocations: SmallVec<[StencilRelocation; 8]>,
96    ) -> Result<(), Box<dyn Error>> {
97        let holes = relocations
98            .iter()
99            .filter_map(|reloc| {
100                if let PatchKind::Hole = reloc.patch_kind() {
101                    Some(reloc.patch_id() + 1)
102                } else {
103                    None
104                }
105            })
106            .max()
107            .unwrap_or(0) as usize;
108        let jumps = relocations
109            .iter()
110            .filter_map(|reloc| {
111                if let PatchKind::Target = reloc.patch_kind() {
112                    Some(reloc.patch_id() + 1)
113                } else {
114                    None
115                }
116            })
117            .max()
118            .unwrap_or(0) as usize;
119
120        if self.family.IN != io.inputs.len()
121            || self.family.OUT != io.outputs.len()
122            || self.family.HOLES < holes
123            || self.family.JUMPS < jumps
124        {
125            return Err(format!(
126                r#"stencil family mismatch ({}):
127(inputs, outputs, holes, jumps): expected({}, {}, {}, {}), got ({}, {}, {}, {}) {:?}"#,
128                name,
129                self.family.IN,
130                self.family.OUT,
131                self.family.HOLES,
132                self.family.JUMPS,
133                io.inputs.len(),
134                io.outputs.len(),
135                holes,
136                jumps,
137                relocations,
138            )
139            .into());
140        }
141
142        let relocation_start = match self.existing_relocations.entry(relocations) {
143            Entry::Occupied(occupied_entry) => *occupied_entry.get(),
144            Entry::Vacant(vacant_entry) => {
145                let start = self.family.relocation_data.len();
146                let data = &mut self.family.relocation_data;
147                data.extend(vacant_entry.key());
148                data.push(StencilRelocation::new());
149                vacant_entry.insert(start);
150                start
151            }
152        };
153
154        let stencil = Stencil {
155            code_index: code.start.try_into()?,
156            code_len: code.len().try_into()?,
157            relocation_index: relocation_start.try_into()?,
158        };
159        let index = io_to_index(&io.inputs, &io.outputs, self.family.MAX_REGS, wide);
160        self.family.stencils[index] = stencil;
161
162        Ok(())
163    }
164
165    fn finalize(self) -> StencilFamilyBuild {
166        self.family
167    }
168}
169
170pub struct Extraction {
171    pub lib_name: String,
172    pub all_code: Vec<u8>,
173    pub max_regs: usize,
174    pub families: BTreeMap<String, StencilFamilyBuild>,
175}
176pub fn extract(rlib_path: &Path) -> Result<Extraction, Box<dyn Error>> {
177    let rlib_file = FileContents::open(rlib_path)?;
178    let ar_data = rlib_file.as_slice();
179    let ar = ArchiveFile::parse(ar_data)?;
180
181    let mut lib_name = None;
182    let mut max_regs = None;
183    let mut all_code = Vec::with_capacity(ar_data.len() / 2);
184    let mut stencils = HashMap::new();
185
186    for entry in ar.members() {
187        let entry = entry?;
188        let name = entry.name();
189        if !name.ends_with(b".o") {
190            continue;
191        }
192        let data = entry.data(ar_data)?;
193        let file = object::File::parse(data)?;
194
195        'next: for symbol in file.symbols() {
196            let kind = symbol.kind();
197            if kind != SymbolKind::Text {
198                if kind == SymbolKind::Data
199                    && let Ok(LIB_NAME_SYMBOL) = symbol.name_bytes()
200                    && let Some((_, data)) = get_data(&file, &symbol)
201                {
202                    lib_name = Some(String::from_utf8_lossy(data).to_string());
203                }
204                continue;
205            }
206
207            let sym_name = if let Ok(name) = symbol.name()
208                && let Some(after) = name.strip_prefix(STENCIL_FUNC_PREFIX)
209            {
210                after
211            } else {
212                continue;
213            };
214            let Some((name, wide, io)) = parse_name(sym_name) else {
215                continue;
216            };
217            let Some((section, code)) = get_data(&file, &symbol) else {
218                continue;
219            };
220
221            let mut relocations = SmallVec::<[_; 8]>::new();
222            for (offset, info) in section.relocations() {
223                let RelocationTarget::Symbol(target) = info.target() else {
224                    continue 'next;
225                };
226                let Ok(sym) = file.symbol_by_index(target) else {
227                    continue 'next;
228                };
229                if sym.section_index().is_some() {
230                    // relocation to a defined function/data
231                    return Err(format!(
232                        "all function calls/data in stencils must be inlined: {}@{}",
233                        sym_name, name,
234                    )
235                    .into());
236                }
237
238                let Some(reloc) = sym.name().ok() else {
239                    continue 'next;
240                };
241                let Some(patch) = get_patch_type(name, reloc) else {
242                    continue 'next;
243                };
244                let reloc = new_relocation(offset, &info, patch)?;
245                relocations.push(reloc);
246            }
247            relocations.sort_by_key(|reloc| reloc.offset());
248
249            let start = all_code.len();
250            all_code.extend_from_slice(code);
251
252            let mut anchor = None;
253            match stencils.entry(name) {
254                Entry::Occupied(occupied_entry) => anchor.get_or_insert(occupied_entry).get_mut(),
255                Entry::Vacant(vacant_entry) => {
256                    let metadata = file
257                        .symbol_by_name(&format!("{}{}__meta", STENCIL_FUNC_PREFIX, name))
258                        .and_then(|symbol| get_data(&file, &symbol))
259                        .and_then(|(_, data)| Metadata::unpack(data));
260                    let Some(metadata) = metadata else {
261                        return Err(format!("no meta symbol for {}", name).into());
262                    };
263                    if let Some(max_regs) = max_regs {
264                        if max_regs != metadata.max_regs {
265                            return Err(format!(
266                                "all stencils must have the same max_regs: {}",
267                                name,
268                            )
269                            .into());
270                        }
271                    } else {
272                        max_regs = Some(metadata.max_regs);
273                    }
274                    let builder = StencilFamilyBuilder::new(metadata);
275                    vacant_entry.insert(builder)
276                }
277            }
278            .add_stencil(sym_name, start..all_code.len(), io, wide, relocations)?;
279        }
280    }
281
282    Ok(Extraction {
283        lib_name: lib_name.unwrap(),
284        all_code,
285        max_regs: max_regs.unwrap_or(0) as usize,
286        families: stencils
287            .into_iter()
288            .map(|(name, builder)| (name.to_string(), builder.finalize()))
289            .collect(),
290    })
291}
292
293fn get_data<'file>(
294    file: &'file object::File<'file>,
295    symbol: &Symbol,
296) -> Option<(Section<'file, 'file>, &'file [u8])> {
297    let section = symbol.section_index()?;
298    let section = file.section_by_index(section).ok()?;
299    section
300        .data_range(symbol.address(), symbol.size())
301        .ok()
302        .flatten()
303        .map(|data| (section, data))
304}
305
306#[derive(Debug, Clone, Copy)]
307struct Metadata {
308    inputs: u16,
309    outputs: u16,
310    max_regs: u16,
311    holes: u16,
312    jumps: u16,
313}
314impl Metadata {
315    /// See `stencil.rs` in `patchouly-macros`
316    fn unpack(data: &[u8]) -> Option<Self> {
317        if data.len() != 10 {
318            return None;
319        }
320        Some(Metadata {
321            inputs: u16::from_le_bytes(data[0..2].try_into().unwrap()),
322            outputs: u16::from_le_bytes(data[2..4].try_into().unwrap()),
323            max_regs: u16::from_le_bytes(data[4..6].try_into().unwrap()),
324            holes: u16::from_le_bytes(data[6..8].try_into().unwrap()),
325            jumps: u16::from_le_bytes(data[8..10].try_into().unwrap()),
326        })
327    }
328}
329
330fn parse_name(sym_name: &str) -> Option<(&str, bool, StencilArgs)> {
331    if sym_name.is_empty() {
332        return None;
333    }
334
335    struct SplitIter<'a>(usize, &'a str);
336    impl<'a> Iterator for SplitIter<'a> {
337        type Item = &'a str;
338        fn next(&mut self) -> Option<Self::Item> {
339            if self.0 > self.1.len() {
340                return None;
341            }
342            let prev = self.0;
343            let start = self.0.max(1);
344            let Some(end) = self.1[start..].find("__") else {
345                self.0 = self.1.len() + 1;
346                return Some(&self.1[prev..]);
347            };
348            self.0 = start + end + 2;
349            Some(&self.1[prev..start + end])
350        }
351    }
352
353    let mut split = SplitIter(0, sym_name);
354    let name = split.next()?;
355    let inputs = split.next()?;
356    let outputs = split.next()?;
357    let args = StencilArgs::parse(inputs, outputs)?;
358    let wide = matches!(split.next(), Some("wide"));
359
360    Some((name, wide, args))
361}
362
363#[derive(Debug, Clone, Copy)]
364enum PatchType {
365    Hole(u16),
366    Stack(u16),
367    Target(u16),
368    Next,
369}
370
371fn get_patch_type(name: &str, reloc: &str) -> Option<PatchType> {
372    if reloc == "copy_and_patch_next" {
373        return Some(PatchType::Next);
374    }
375    assert!(reloc.starts_with(name), "reloc: {reloc}, name: {name}");
376    let name = &reloc[name.len()..];
377    assert!(name.starts_with("__"));
378    let name = &name[2..];
379
380    /// Parse ints with trailing chars
381    fn parse_int(s: &str) -> Option<u16> {
382        let end = s
383            .as_bytes()
384            .iter()
385            .position(|b| !b.is_ascii_digit())
386            .unwrap_or(s.len());
387        s[..end].parse().ok()
388    }
389
390    if let Some(name) = name.strip_prefix("stack") {
391        parse_int(name).map(PatchType::Stack)
392    } else if let Some(name) = name.strip_prefix("target") {
393        parse_int(name).map(PatchType::Target)
394    } else if let Some(name) = name.strip_prefix("hole") {
395        parse_int(name).map(PatchType::Hole)
396    } else {
397        None
398    }
399}
400
401fn new_relocation(
402    offset: u64,
403    relocation: &Relocation,
404    patch: PatchType,
405) -> Result<StencilRelocation, Box<dyn Error>> {
406    let mut reloc = StencilRelocation::new();
407
408    let offset: u16 = offset.try_into()?;
409    reloc.checked_set_offset(offset)?;
410
411    let encoding = match relocation.encoding() {
412        RelocationEncoding::Generic => StencilRelocationEncoding::Generic,
413        RelocationEncoding::X86Signed => StencilRelocationEncoding::X86Signed,
414        _ => return Err("unsupported relocation encoding".into()),
415    };
416    reloc.checked_set_encoding(encoding)?;
417
418    let relative = match relocation.kind() {
419        RelocationKind::Absolute => false,
420        RelocationKind::Relative | RelocationKind::PltRelative => true,
421        _ => return Err("unsupported relocation kind".into()),
422    };
423    reloc.checked_set_relative(relative)?;
424
425    let size = relocation.size();
426    reloc.checked_set_size(size)?;
427
428    let addend = relocation.addend().try_into()?;
429    reloc.checked_set_addend(addend)?;
430
431    let (kind, extra) = match patch {
432        PatchType::Hole(i) => (PatchKind::Hole, i),
433        PatchType::Stack(i) => (PatchKind::Stack, i),
434        PatchType::Target(i) => (PatchKind::Target, i),
435        PatchType::Next => (PatchKind::Target, 0),
436    };
437    reloc.checked_set_patch_kind(kind)?;
438    reloc.checked_set_patch_id(extra)?;
439
440    Ok(reloc)
441}
442
443#[cfg(test)]
444mod tests {
445    use super::*;
446
447    #[test]
448    fn test_parse_name() {
449        let max_args = 10;
450        let assertions = [
451            ("add_const__1__0", "add_const", false, 10),
452            ("add_const__9__0__wide", "add_const", true, 190),
453            ("__empty____", "__empty", false, 0),
454            ("__move__1__1", "__move", false, 11),
455        ];
456        for (input, name, wide, index) in assertions {
457            let (n, is_wide, args) = parse_name(input).unwrap();
458            assert_eq!(n, name);
459            assert_eq!(is_wide, wide);
460            assert_eq!(
461                io_to_index(&args.inputs, &args.outputs, max_args, wide),
462                index
463            );
464        }
465    }
466}