1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
use breezy_osutils::rand_chars;
use lazy_static::lazy_static;
use regex::bytes::Regex;
use std::time::{SystemTime, UNIX_EPOCH};
lazy_static! {
// the regex removes any weird characters; we don't escape them
// but rather just pull them out
static ref FILE_ID_CHARS_RE: Regex = Regex::new(r#"[^\w.]"#).unwrap();
static ref REV_ID_CHARS_RE: Regex = Regex::new(r#"[^-\w.+@]"#).unwrap();
static ref GEN_FILE_ID_SUFFIX: String = gen_file_id_suffix();
}
fn gen_file_id_suffix() -> String {
let current_time = SystemTime::now()
.duration_since(UNIX_EPOCH)
.unwrap()
.as_secs();
let random_chars = rand_chars(16);
format!(
"-{}-{}-",
breezy_osutils::time::compact_date(current_time),
random_chars
)
}
pub fn next_id_suffix(suffix: Option<&str>) -> Vec<u8> {
static mut GEN_FILE_ID_SERIAL: u64 = 0;
// XXX TODO: change breezy.add.smart_add_tree to call workingtree.add() rather
// than having to move the id randomness out of the inner loop like this.
// XXX TODO: for the global randomness this uses we should add the thread-id
// before the serial #.
// XXX TODO: jam 20061102 I think it would be good to reset every 100 or
// 1000 calls, or perhaps if time.time() increases by a certain
// amount. time.time() shouldn't be terribly expensive to call,
// and it means that long-lived processes wouldn't use the same
// suffix forever.
// TODO(jelmer): Avoid unsafe code here..
unsafe {
GEN_FILE_ID_SERIAL += 1;
format!(
"{}{}",
suffix.unwrap_or(GEN_FILE_ID_SUFFIX.as_str()),
GEN_FILE_ID_SERIAL
)
.into_bytes()
}
}
pub fn gen_file_id(name: &str) -> Vec<u8> {
// The real randomness is in the _next_id_suffix, the
// rest of the identifier is just to be nice.
// So we:
// 1) Remove non-ascii word characters to keep the ids portable
// 2) squash to lowercase, so the file id doesn't have to
// be escaped (case insensitive filesystems would bork for ids
// that only differ in case without escaping).
// 3) truncate the filename to 20 chars. Long filenames also bork on some
// filesystems
// 4) Removing starting '.' characters to prevent the file ids from
// being considered hidden.
let name_bytes = name
.chars()
.filter(|c| c.is_ascii())
.collect::<String>()
.to_ascii_lowercase()
.as_bytes()
.to_vec();
let ascii_word_only = FILE_ID_CHARS_RE
.replace_all(&name_bytes, |_: ®ex::bytes::Captures| b"")
.to_vec();
let without_dots = ascii_word_only
.into_iter()
.skip_while(|c| *c == b'.')
.collect::<Vec<u8>>();
let short = without_dots.iter().take(20).cloned().collect::<Vec<u8>>();
let suffix = next_id_suffix(None);
[short, suffix].concat()
}
pub fn gen_root_id() -> Vec<u8> {
gen_file_id("tree_root")
}
fn get_identifier(s: &str) -> Vec<u8> {
let mut identifier = s.to_string();
if let Some(start) = s.find('<') {
let end = s.rfind('>');
if end.is_some()
&& start < end.unwrap()
&& end.unwrap() == s.len() - 1
&& s[start..].find('@').is_some()
{
identifier = s[start + 1..end.unwrap()].to_string();
}
}
let identifier: String = identifier
.to_ascii_lowercase()
.replace(' ', "_")
.chars()
.filter(|c| c.is_ascii())
.collect();
REV_ID_CHARS_RE
.replace_all(identifier.as_bytes(), |_: ®ex::bytes::Captures| b"")
.to_vec()
}
pub fn gen_revision_id(username: &str, timestamp: Option<u64>) -> Vec<u8> {
let user_or_email = get_identifier(username);
// This gives 36^16 ~= 2^82.7 ~= 83 bits of entropy
let unique_chunk = breezy_osutils::rand_chars(16).as_bytes().to_vec();
let timestamp = timestamp.unwrap_or_else(|| {
SystemTime::now()
.duration_since(UNIX_EPOCH)
.unwrap()
.as_secs()
});
vec![
user_or_email,
breezy_osutils::time::compact_date(timestamp)
.as_bytes()
.to_vec(),
unique_chunk,
]
.join(&b'-')
}