git_internal/internal/
index.rs

1use std::collections::BTreeMap;
2use std::fmt::{Display, Formatter};
3use std::fs::{self, File};
4use std::io;
5use std::io::{BufReader, Read, Write};
6#[cfg(unix)]
7use std::os::unix::fs::MetadataExt;
8use std::path::{Path, PathBuf};
9use std::time::{SystemTime, UNIX_EPOCH};
10
11use byteorder::{BigEndian, ReadBytesExt, WriteBytesExt};
12use sha1::{Digest, Sha1};
13
14use crate::errors::GitError;
15use crate::hash::SHA1;
16use crate::internal::pack::wrapper::Wrapper;
17use crate::utils;
18
19#[derive(PartialEq, Eq, Debug, Clone)]
20pub struct Time {
21    seconds: u32,
22    nanos: u32,
23}
24impl Time {
25    pub fn from_stream(stream: &mut impl Read) -> Result<Self, GitError> {
26        let seconds = stream.read_u32::<BigEndian>()?;
27        let nanos = stream.read_u32::<BigEndian>()?;
28        Ok(Time { seconds, nanos })
29    }
30
31    #[allow(dead_code)]
32    fn to_system_time(&self) -> SystemTime {
33        UNIX_EPOCH + std::time::Duration::new(self.seconds.into(), self.nanos)
34    }
35
36    pub fn from_system_time(system_time: SystemTime) -> Self {
37        match system_time.duration_since(UNIX_EPOCH) {
38            Ok(duration) => {
39                let seconds = duration
40                    .as_secs()
41                    .try_into()
42                    .expect("Time is too far in the future");
43                let nanos = duration.subsec_nanos();
44                Time { seconds, nanos }
45            }
46            Err(_) => panic!("Time is before the UNIX epoch"),
47        }
48    }
49}
50impl Display for Time {
51    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
52        write!(f, "{}:{}", self.seconds, self.nanos)
53    }
54}
55
56/// 16 bits
57#[derive(Debug)]
58pub struct Flags {
59    pub assume_valid: bool,
60    pub extended: bool,   // must be 0 in v2
61    pub stage: u8,        // 2-bit during merge
62    pub name_length: u16, // 12-bit
63}
64
65impl From<u16> for Flags {
66    fn from(flags: u16) -> Self {
67        Flags {
68            assume_valid: flags & 0x8000 != 0,
69            extended: flags & 0x4000 != 0,
70            stage: ((flags & 0x3000) >> 12) as u8,
71            name_length: flags & 0xFFF,
72        }
73    }
74}
75
76impl TryInto<u16> for &Flags {
77    type Error = &'static str;
78    fn try_into(self) -> Result<u16, Self::Error> {
79        let mut flags = 0u16;
80        if self.assume_valid {
81            flags |= 0x8000; // 16
82        }
83        if self.extended {
84            flags |= 0x4000; // 15
85        }
86        flags |= (self.stage as u16) << 12; // 13-14
87        if self.name_length > 0xFFF {
88            return Err("Name length is too long");
89        }
90        flags |= self.name_length; // 0-11
91        Ok(flags)
92    }
93}
94
95impl Flags {
96    pub fn new(name_len: u16) -> Self {
97        Flags {
98            assume_valid: true,
99            extended: false,
100            stage: 0,
101            name_length: name_len,
102        }
103    }
104}
105
106pub struct IndexEntry {
107    pub ctime: Time,
108    pub mtime: Time,
109    pub dev: u32,  // 0 for windows
110    pub ino: u32,  // 0 for windows
111    pub mode: u32, // 0o100644 // 4-bit object type + 3-bit unused + 9-bit unix permission
112    pub uid: u32,  // 0 for windows
113    pub gid: u32,  // 0 for windows
114    pub size: u32,
115    pub hash: SHA1,
116    pub flags: Flags,
117    pub name: String,
118}
119impl Display for IndexEntry {
120    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
121        write!(
122            f,
123            "IndexEntry {{ ctime: {}, mtime: {}, dev: {}, ino: {}, mode: {:o}, uid: {}, gid: {}, size: {}, hash: {}, flags: {:?}, name: {} }}",
124            self.ctime,
125            self.mtime,
126            self.dev,
127            self.ino,
128            self.mode,
129            self.uid,
130            self.gid,
131            self.size,
132            self.hash,
133            self.flags,
134            self.name
135        )
136    }
137}
138
139impl IndexEntry {
140    /** Metadata must be got by [fs::symlink_metadata] to avoid following symlink */
141    pub fn new(meta: &fs::Metadata, hash: SHA1, name: String) -> Self {
142        let mut entry = IndexEntry {
143            ctime: Time::from_system_time(meta.created().unwrap()),
144            mtime: Time::from_system_time(meta.modified().unwrap()),
145            dev: 0,
146            ino: 0,
147            uid: 0,
148            gid: 0,
149            size: meta.len() as u32,
150            hash,
151            flags: Flags::new(name.len() as u16),
152            name,
153            mode: 0o100644,
154        };
155        #[cfg(unix)]
156        {
157            entry.dev = meta.dev() as u32;
158            entry.ino = meta.ino() as u32;
159            entry.uid = meta.uid();
160            entry.gid = meta.gid();
161
162            entry.mode = match meta.mode() & 0o170000/* file mode */ {
163                0o100000 => {
164                    match meta.mode() & 0o111 {
165                        0 => 0o100644, // no execute permission
166                        _ => 0o100755, // with execute permission
167                    }
168                }
169                0o120000 => 0o120000, // symlink
170                _ =>  entry.mode, // keep the original mode
171            }
172        }
173        #[cfg(windows)]
174        {
175            if meta.is_symlink() {
176                entry.mode = 0o120000;
177            }
178        }
179        entry
180    }
181
182    /// - `file`: **to workdir path**
183    /// - `workdir`: absolute or relative path
184    pub fn new_from_file(file: &Path, hash: SHA1, workdir: &Path) -> io::Result<Self> {
185        let name = file.to_str().unwrap().to_string();
186        let file_abs = workdir.join(file);
187        let meta = fs::symlink_metadata(file_abs)?; // without following symlink
188        let index = IndexEntry::new(&meta, hash, name);
189        Ok(index)
190    }
191
192    pub fn new_from_blob(name: String, hash: SHA1, size: u32) -> Self {
193        IndexEntry {
194            ctime: Time {
195                seconds: 0,
196                nanos: 0,
197            },
198            mtime: Time {
199                seconds: 0,
200                nanos: 0,
201            },
202            dev: 0,
203            ino: 0,
204            mode: 0o100644,
205            uid: 0,
206            gid: 0,
207            size,
208            hash,
209            flags: Flags::new(name.len() as u16),
210            name,
211        }
212    }
213}
214
215/// see [index-format](https://git-scm.com/docs/index-format)
216/// <br> to Working Dir relative path
217pub struct Index {
218    entries: BTreeMap<(String, u8), IndexEntry>,
219}
220
221impl Index {
222    pub fn new() -> Self {
223        Index {
224            entries: BTreeMap::new(),
225        }
226    }
227
228    fn check_header(file: &mut impl Read) -> Result<u32, GitError> {
229        let mut magic = [0; 4];
230        file.read_exact(&mut magic)?;
231        if magic != *b"DIRC" {
232            return Err(GitError::InvalidIndexHeader(
233                String::from_utf8_lossy(&magic).to_string(),
234            ));
235        }
236
237        let version = file.read_u32::<BigEndian>()?;
238        // only support v2 now
239        if version != 2 {
240            return Err(GitError::InvalidIndexHeader(version.to_string()));
241        }
242
243        let entries = file.read_u32::<BigEndian>()?;
244        Ok(entries)
245    }
246
247    pub fn size(&self) -> usize {
248        self.entries.len()
249    }
250
251    pub fn from_file(path: impl AsRef<Path>) -> Result<Self, GitError> {
252        let file = File::open(path.as_ref())?; // read-only
253        let total_size = file.metadata()?.len();
254        let file = &mut Wrapper::new(BufReader::new(file)); // TODO move Wrapper & utils to a common module
255
256        let num = Index::check_header(file)?;
257        let mut index = Index::new();
258
259        for _ in 0..num {
260            let mut entry = IndexEntry {
261                ctime: Time::from_stream(file)?,
262                mtime: Time::from_stream(file)?,
263                dev: file.read_u32::<BigEndian>()?, //utils::read_u32_be(file)?,
264                ino: file.read_u32::<BigEndian>()?,
265                mode: file.read_u32::<BigEndian>()?,
266                uid: file.read_u32::<BigEndian>()?,
267                gid: file.read_u32::<BigEndian>()?,
268                size: file.read_u32::<BigEndian>()?,
269                hash: utils::read_sha1(file)?,
270                flags: Flags::from(file.read_u16::<BigEndian>()?),
271                name: String::new(),
272            };
273            let name_len = entry.flags.name_length as usize;
274            let mut name = vec![0; name_len];
275            file.read_exact(&mut name)?;
276            // The exact encoding is undefined, but the '.' and '/' characters are encoded in 7-bit ASCII
277            entry.name =
278                String::from_utf8(name).map_err(|e| GitError::ConversionError(e.to_string()))?; // TODO check the encoding
279            index
280                .entries
281                .insert((entry.name.clone(), entry.flags.stage), entry);
282
283            // 1-8 nul bytes as necessary to pad the entry to a multiple of eight bytes
284            // while keeping the name NUL-terminated. // so at least 1 byte nul
285            let padding = 8 - ((22 + name_len) % 8); // 22 = sha1 + flags, others are 40 % 8 == 0
286            utils::read_bytes(file, padding)?;
287        }
288
289        // Extensions
290        while file.bytes_read() + SHA1::SIZE < total_size as usize {
291            // The remaining 20 bytes must be checksum
292            let sign = utils::read_bytes(file, 4)?;
293            println!(
294                "{:?}",
295                String::from_utf8(sign.clone())
296                    .map_err(|e| GitError::ConversionError(e.to_string()))?
297            );
298            // If the first byte is 'A'...'Z' the extension is optional and can be ignored.
299            if sign[0] >= b'A' && sign[0] <= b'Z' {
300                // Optional extension
301                let size = file.read_u32::<BigEndian>()?;
302                utils::read_bytes(file, size as usize)?; // Ignore the extension
303            } else {
304                // 'link' or 'sdir' extension
305                return Err(GitError::InvalidIndexFile(
306                    "Unsupported extension".to_string(),
307                ));
308            }
309        }
310
311        // check sum
312        let file_hash = file.final_hash();
313        let check_sum = utils::read_sha1(file)?;
314        if file_hash != check_sum {
315            return Err(GitError::InvalidIndexFile("Check sum failed".to_string()));
316        }
317        assert_eq!(index.size(), num as usize);
318        Ok(index)
319    }
320
321    pub fn to_file(&self, path: impl AsRef<Path>) -> Result<(), GitError> {
322        let mut file = File::create(path)?;
323        let mut hash = Sha1::new();
324
325        let mut header = Vec::new();
326        header.write_all(b"DIRC")?;
327        header.write_u32::<BigEndian>(2u32)?; // version 2
328        header.write_u32::<BigEndian>(self.entries.len() as u32)?;
329        file.write_all(&header)?;
330        hash.update(&header);
331
332        for (_, entry) in self.entries.iter() {
333            let mut entry_bytes = Vec::new();
334            entry_bytes.write_u32::<BigEndian>(entry.ctime.seconds)?;
335            entry_bytes.write_u32::<BigEndian>(entry.ctime.nanos)?;
336            entry_bytes.write_u32::<BigEndian>(entry.mtime.seconds)?;
337            entry_bytes.write_u32::<BigEndian>(entry.mtime.nanos)?;
338            entry_bytes.write_u32::<BigEndian>(entry.dev)?;
339            entry_bytes.write_u32::<BigEndian>(entry.ino)?;
340            entry_bytes.write_u32::<BigEndian>(entry.mode)?;
341            entry_bytes.write_u32::<BigEndian>(entry.uid)?;
342            entry_bytes.write_u32::<BigEndian>(entry.gid)?;
343            entry_bytes.write_u32::<BigEndian>(entry.size)?;
344            entry_bytes.write_all(&entry.hash.0)?;
345            entry_bytes.write_u16::<BigEndian>((&entry.flags).try_into().unwrap())?;
346            entry_bytes.write_all(entry.name.as_bytes())?;
347            let padding = 8 - ((22 + entry.name.len()) % 8);
348            entry_bytes.write_all(&vec![0; padding])?;
349
350            file.write_all(&entry_bytes)?;
351            hash.update(&entry_bytes);
352        }
353
354        // Extensions
355
356        // check sum
357        let file_hash: [u8; 20] = hash.finalize().into();
358        file.write_all(&file_hash)?;
359        Ok(())
360    }
361
362    pub fn refresh(&mut self, file: impl AsRef<Path>, workdir: &Path) -> Result<bool, GitError> {
363        let path = file.as_ref();
364        let name = path
365            .to_str()
366            .ok_or(GitError::InvalidPathError(format!("{path:?}")))?;
367
368        if let Some(entry) = self.entries.get_mut(&(name.to_string(), 0)) {
369            let abs_path = workdir.join(path);
370            let meta = fs::symlink_metadata(&abs_path)?;
371            // Try creation time; on error, warn and use modification time (or now)
372            let new_ctime = Time::from_system_time(Self::time_or_now(
373                "creation time",
374                &abs_path,
375                meta.created(),
376            ));
377            let new_mtime = Time::from_system_time(Self::time_or_now(
378                "modification time",
379                &abs_path,
380                meta.modified(),
381            ));
382            let new_size = meta.len() as u32;
383
384            // re-calculate SHA1
385            let mut file = File::open(&abs_path)?;
386            let mut hasher = Sha1::new();
387            io::copy(&mut file, &mut hasher)?;
388            let new_hash = SHA1::from_bytes(&hasher.finalize());
389
390            // refresh index
391            if entry.ctime != new_ctime
392                || entry.mtime != new_mtime
393                || entry.size != new_size
394                || entry.hash != new_hash
395            {
396                entry.ctime = new_ctime;
397                entry.mtime = new_mtime;
398                entry.size = new_size;
399                entry.hash = new_hash;
400                return Ok(true);
401            }
402        }
403        Ok(false)
404    }
405
406    /// Try to get a timestamp, logging on error, and finally falling back to now.
407    fn time_or_now(what: &str, path: &Path, res: io::Result<SystemTime>) -> SystemTime {
408        match res {
409            Ok(ts) => ts,
410            Err(e) => {
411                eprintln!(
412                    "warning: failed to get {what} for {path:?}: {e}; using SystemTime::now()",
413                    what = what,
414                    path = path.display()
415                );
416                SystemTime::now()
417            }
418        }
419    }
420}
421
422impl Default for Index {
423    fn default() -> Self {
424        Self::new()
425    }
426}
427
428impl Index {
429    /// Load index. If it does not exist, return an empty index.
430    pub fn load(index_file: impl AsRef<Path>) -> Result<Self, GitError> {
431        let path = index_file.as_ref();
432        if !path.exists() {
433            return Ok(Index::new());
434        }
435        Index::from_file(path)
436    }
437
438    pub fn update(&mut self, entry: IndexEntry) {
439        self.add(entry)
440    }
441
442    pub fn add(&mut self, entry: IndexEntry) {
443        self.entries
444            .insert((entry.name.clone(), entry.flags.stage), entry);
445    }
446
447    pub fn remove(&mut self, name: &str, stage: u8) -> Option<IndexEntry> {
448        self.entries.remove(&(name.to_string(), stage))
449    }
450
451    pub fn get(&self, name: &str, stage: u8) -> Option<&IndexEntry> {
452        self.entries.get(&(name.to_string(), stage))
453    }
454
455    pub fn tracked(&self, name: &str, stage: u8) -> bool {
456        self.entries.contains_key(&(name.to_string(), stage))
457    }
458
459    pub fn get_hash(&self, file: &str, stage: u8) -> Option<SHA1> {
460        self.get(file, stage).map(|entry| entry.hash)
461    }
462
463    pub fn verify_hash(&self, file: &str, stage: u8, hash: &SHA1) -> bool {
464        let inner_hash = self.get_hash(file, stage);
465        if let Some(inner_hash) = inner_hash {
466            &inner_hash == hash
467        } else {
468            false
469        }
470    }
471    /// is file modified after last `add` (need hash to confirm content change)
472    /// - `workdir` is used to rebuild absolute file path
473    pub fn is_modified(&self, file: &str, stage: u8, workdir: &Path) -> bool {
474        if let Some(entry) = self.get(file, stage) {
475            let path_abs = workdir.join(file);
476            let meta = path_abs.symlink_metadata().unwrap();
477            // TODO more fields
478            let same = entry.ctime
479                == Time::from_system_time(meta.created().unwrap_or(SystemTime::now()))
480                && entry.mtime
481                    == Time::from_system_time(meta.modified().unwrap_or(SystemTime::now()))
482                && entry.size == meta.len() as u32;
483
484            !same
485        } else {
486            panic!("File not found in index");
487        }
488    }
489
490    /// Get all entries with the same stage
491    pub fn tracked_entries(&self, stage: u8) -> Vec<&IndexEntry> {
492        // ? should use stage or not
493        self.entries
494            .iter()
495            .filter(|(_, entry)| entry.flags.stage == stage)
496            .map(|(_, entry)| entry)
497            .collect()
498    }
499
500    /// Get all tracked files(stage = 0)
501    pub fn tracked_files(&self) -> Vec<PathBuf> {
502        self.tracked_entries(0)
503            .iter()
504            .map(|entry| PathBuf::from(&entry.name))
505            .collect()
506    }
507
508    /// Judge if the file(s) of `dir` is in the index
509    /// - false if `dir` is a file
510    pub fn contains_dir_file(&self, dir: &str) -> bool {
511        let dir = Path::new(dir);
512        self.entries.iter().any(|((name, _), _)| {
513            let path = Path::new(name);
514            path.starts_with(dir) && path != dir // TODO change to is_sub_path!
515        })
516    }
517
518    /// remove all files in `dir` from index
519    /// - do nothing if `dir` is a file
520    pub fn remove_dir_files(&mut self, dir: &str) -> Vec<String> {
521        let dir = Path::new(dir);
522        let mut removed = Vec::new();
523        self.entries.retain(|(name, _), _| {
524            let path = Path::new(name);
525            if path.starts_with(dir) && path != dir {
526                removed.push(name.clone());
527                false
528            } else {
529                true
530            }
531        });
532        removed
533    }
534
535    /// saved to index file
536    pub fn save(&self, index_file: impl AsRef<Path>) -> Result<(), GitError> {
537        self.to_file(index_file)
538    }
539}
540
541#[cfg(test)]
542mod tests {
543    use super::*;
544
545    #[test]
546    fn test_time() {
547        let time = Time {
548            seconds: 0,
549            nanos: 0,
550        };
551        let system_time = time.to_system_time();
552        let new_time = Time::from_system_time(system_time);
553        assert_eq!(time, new_time);
554    }
555
556    #[test]
557    fn test_check_header() {
558        let mut source = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
559        source.push("tests/data/index/index-2");
560
561        let file = File::open(source).unwrap();
562        let entries = Index::check_header(&mut BufReader::new(file)).unwrap();
563        assert_eq!(entries, 2);
564    }
565
566    #[test]
567    fn test_index() {
568        let mut source = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
569        source.push("tests/data/index/index-760");
570
571        let index = Index::from_file(source).unwrap();
572        assert_eq!(index.size(), 760);
573        for (_, entry) in index.entries.iter() {
574            println!("{entry}");
575        }
576    }
577
578    #[test]
579    fn test_index_to_file() {
580        let mut source = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
581        source.push("tests/data/index/index-760");
582
583        let index = Index::from_file(source).unwrap();
584        index.to_file("/tmp/index-760").unwrap();
585        let new_index = Index::from_file("/tmp/index-760").unwrap();
586        assert_eq!(index.size(), new_index.size());
587    }
588
589    #[test]
590    fn test_index_entry_create() {
591        let mut source = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
592        source.push("Cargo.toml");
593
594        let file = Path::new(source.as_path()); // use as a normal file
595        let hash = SHA1::from_bytes(&[0; 20]);
596        let workdir = Path::new("../");
597        let entry = IndexEntry::new_from_file(file, hash, workdir).unwrap();
598        println!("{entry}");
599    }
600}