pyreq_rs/parser/
requirement_specifier.rs

1//! 解析(requirement specifier)[https://pip.pypa.io/en/stable/reference/requirement-specifiers]
2//! refer to https://peps.python.org/pep-0508/ for the complete parsley grammar.
3//! -> pythonExpression 是表示解析'->'前面的一串语法, 对应的python返回值是什么
4use crate::requirements::{Comparison, MarkerExpr, MarkerOp, RequirementSpecifier, VersionSpec};
5use nom::{
6    branch::alt,
7    bytes::complete::{tag, take_while, take_while_m_n},
8    character::{
9        complete::{char as nomchar, digit0, digit1, hex_digit1, satisfy, space0, space1},
10        is_alphabetic, is_alphanumeric, is_digit, is_hex_digit, is_space,
11    },
12    combinator::{eof, map, opt, recognize},
13    multi::{count, many0, many1, many_m_n},
14    sequence::{delimited, preceded, separated_pair, terminated, tuple},
15    IResult, Parser,
16};
17
18use super::version::{dev, epoch, local, post, pre, release};
19
20// wsp* = space0
21
22pub fn version_cmp(input: &str) -> IResult<&str, Comparison> {
23    map(
24        preceded(
25            space0,
26            alt((
27                tag("<="),
28                tag("<"),
29                tag("!="),
30                tag("=="),
31                tag(">="),
32                tag(">"),
33                tag("~="),
34                tag("==="),
35            )),
36        ),
37        |s| Comparison::try_from(s).unwrap(),
38    )(input)
39}
40
41// version可以进一步细化为4个分类,参考https://github.com/pypa/packaging/blob/main/src/packaging/specifiers.py
42// Specifier类中的_version_regex_str
43// 根据operator的不同有不同的要求
44
45pub fn version_arbitraryequal(input: &str) -> IResult<&str, String> {
46    map(
47        preceded(
48            space0,
49            take_while(|c: char| !c.is_whitespace() && c != ';' && c != ')'),
50        ),
51        |s: &str| s.to_string(),
52    )(input)
53}
54
55pub fn version_equal_notequal(input: &str) -> IResult<&str, String> {
56    map(
57        preceded(
58            space0,
59            recognize(tuple((
60                opt(nomchar('v')),
61                opt(epoch),
62                release,
63                opt(alt((
64                    tag(".*"),
65                    recognize(tuple((opt(pre), opt(post), opt(dev), opt(local)))),
66                ))),
67            ))),
68        ),
69        |s: &str| s.to_string(),
70    )(input)
71}
72
73pub fn version_compatiblerelease(input: &str) -> IResult<&str, String> {
74    map(
75        preceded(
76            space0,
77            recognize(tuple((
78                opt(nomchar('v')),
79                opt(epoch),
80                digit1.and(many1(preceded(nomchar('.'), digit1))),
81                opt(pre),
82                opt(post),
83                opt(dev),
84            ))),
85        ),
86        |s: &str| s.to_string(),
87    )(input)
88}
89
90pub fn version_other_operator(input: &str) -> IResult<&str, String> {
91    map(
92        preceded(
93            space0,
94            recognize(tuple((
95                opt(nomchar('v')),
96                opt(epoch),
97                release,
98                opt(pre),
99                opt(post),
100                opt(dev),
101            ))),
102        ),
103        |s: &str| s.to_string(),
104    )(input)
105}
106
107pub fn version_one(input: &str) -> IResult<&str, VersionSpec> {
108    let (input, operator) = version_cmp(input)?;
109    match operator {
110        Comparison::ArbitraryEqual => terminated(version_arbitraryequal, space0)
111            .map(|v| (operator, v).into())
112            .parse(input),
113        Comparison::Equal | Comparison::NotEqual => terminated(version_equal_notequal, space0)
114            .map(|v| (operator, v).into())
115            .parse(input),
116        Comparison::CompatibleRelease => terminated(version_compatiblerelease, space0)
117            .map(|v| (operator, v).into())
118            .parse(input),
119        _ => terminated(version_other_operator, space0)
120            .map(|v| (operator, v).into())
121            .parse(input),
122    }
123}
124
125pub fn version_many(input: &str) -> IResult<&str, Vec<VersionSpec>> {
126    version_one
127        .and(many0(preceded(space0.and(nomchar(',')), version_one)))
128        .map(|(one, mut v)| {
129            v.insert(0, one);
130            v
131        })
132        .parse(input)
133}
134
135pub fn versionspec(input: &str) -> IResult<&str, Vec<VersionSpec>> {
136    delimited(nomchar('('), version_many, nomchar(')'))
137        .or(version_many)
138        .parse(input)
139}
140
141pub fn urlspec(input: &str) -> IResult<&str, &str> {
142    preceded(nomchar('@').and(space0), uri_reference)(input)
143}
144
145pub fn marker_op(input: &str) -> IResult<&str, MarkerOp> {
146    alt((
147        version_cmp.map(|cmp| cmp.into()),
148        preceded(space0, tag("in")).map(|_| MarkerOp::In),
149        preceded(space0, tag("not"))
150            .and(preceded(space1, tag("in")))
151            .map(|_| MarkerOp::NotIn),
152    ))(input)
153}
154
155pub fn is_python_str_c(c: char) -> bool {
156    is_space(c as u8) || is_alphanumeric(c as u8) || "().{}-_*#:;,/?[]!~`@$%^&=+|<>".contains(c)
157}
158
159pub fn python_str(input: &str) -> IResult<&str, &str> {
160    delimited(
161        nomchar('\''),
162        take_while(|c| is_python_str_c(c) || c == '"'),
163        nomchar('\''),
164    )
165    .or(delimited(
166        nomchar('"'),
167        take_while(|c| is_python_str_c(c) || c == '\''),
168        nomchar('"'),
169    ))
170    .parse(input)
171}
172
173pub fn env_var(input: &str) -> IResult<&str, &str> {
174    alt((
175        tag("python_version"),
176        tag("python_full_version"),
177        tag("os_name"),
178        tag("sys_platform"),
179        tag("platform_release"),
180        tag("platform_system"),
181        tag("platform_version"),
182        tag("platform_machine"),
183        tag("platform_python_implementation"),
184        tag("implementation_name"),
185        tag("implementation_version"),
186        tag("extra"),
187    ))(input)
188}
189
190pub fn marker_var(input: &str) -> IResult<&str, &str> {
191    preceded(space0, env_var.or(python_str))(input)
192}
193
194// 表达式优先级: op > () > and > or
195// marker_expr = basic | '(' marker_or ')'
196// marker_and = marker_expr 'and' marker_expr | marker_expr
197// marker_or = marker_and 'or' marker_and | marker_and
198// marker = marker_or
199// 从下往上看更容易理解
200
201pub fn marker_expr(input: &str) -> IResult<&str, MarkerExpr> {
202    alt((
203        // 不用考虑空格的问题,因为marker_var和marker_op都是只吃前边的空格,后边的空格不管
204        tuple((marker_var, marker_op, marker_var))
205            .map(|(left, op, right)| MarkerExpr::Basic(left.to_string(), op, right.to_string())),
206        delimited(
207            preceded(space0, nomchar('(')),
208            marker_or,
209            preceded(space0, nomchar(')')),
210        ),
211    ))(input)
212}
213
214pub fn marker_and(input: &str) -> IResult<&str, MarkerExpr> {
215    alt((
216        separated_pair(
217            marker_expr,
218            delimited(space0, tag("and"), space0),
219            marker_expr,
220        )
221        .map(|(left, right)| MarkerExpr::And(Box::new(left), Box::new(right))),
222        marker_expr,
223    ))(input)
224}
225
226pub fn marker_or(input: &str) -> IResult<&str, MarkerExpr> {
227    alt((
228        separated_pair(marker_and, delimited(space0, tag("or"), space0), marker_and)
229            .map(|(left, right)| MarkerExpr::Or(Box::new(left), Box::new(right))),
230        marker_and,
231    ))(input)
232}
233
234// marker = marker_or
235
236pub fn quoted_marker(input: &str) -> IResult<&str, MarkerExpr> {
237    preceded(nomchar(';').and(space0), marker_or)(input)
238}
239
240pub fn identifier_end(input: &str) -> IResult<&str, &str> {
241    recognize(take_while(|c| "-_.".contains(c)).and(satisfy(|u| is_alphanumeric(u as u8))))(input)
242}
243
244// name = identifier
245pub fn identifier(input: &str) -> IResult<&str, String> {
246    recognize(satisfy(|u| is_alphanumeric(u as u8)).and(many0(identifier_end)))
247        .map(|s| s.to_string())
248        .parse(input)
249}
250
251pub fn extras_list(input: &str) -> IResult<&str, Vec<String>> {
252    identifier
253        .and(many0(preceded(
254            delimited(space0, nomchar(','), space0),
255            identifier,
256        )))
257        .map(|(first, mut rest)| {
258            rest.insert(0, first);
259            rest
260        })
261        .parse(input)
262}
263
264pub fn extras(input: &str) -> IResult<&str, Option<Vec<String>>> {
265    delimited(
266        nomchar('[').and(space0),
267        opt(extras_list),
268        space0.and(nomchar(']')),
269    )
270    .parse(input)
271}
272
273pub fn name_req(input: &str) -> IResult<&str, RequirementSpecifier> {
274    tuple((
275        identifier,
276        space0,
277        opt(extras),
278        space0,
279        opt(versionspec),
280        space0,
281        opt(quoted_marker),
282    ))
283    .map(|(i, _, e, _, v, _, m)| RequirementSpecifier {
284        name: i,
285        extras: if let Some(Some(j)) = e { j } else { vec![] },
286        version_specs: if let Some(j) = v { j } else { vec![] },
287        marker_expr: m,
288        ..Default::default()
289    })
290    .parse(input)
291}
292
293pub fn url_req(input: &str) -> IResult<&str, RequirementSpecifier> {
294    tuple((
295        identifier,
296        space0,
297        opt(extras),
298        space0,
299        urlspec,
300        alt((space1, eof)),
301        opt(quoted_marker),
302    ))
303    .map(|(i, _, e, _, v, _, m)| RequirementSpecifier {
304        name: i,
305        extras: if let Some(Some(j)) = e { j } else { vec![] },
306        urlspec: Some(v.to_string()),
307        marker_expr: m,
308        ..Default::default()
309    })
310    .parse(input)
311}
312
313pub fn specification(input: &str) -> IResult<&str, RequirementSpecifier> {
314    delimited(space0, alt((url_req, name_req)), space0)(input)
315}
316
317// following is URI rules. https://www.rfc-editor.org/rfc/rfc3986#appendix-A
318// ABNF syntax is at https://www.rfc-editor.org/rfc/rfc5234
319
320pub fn uri(input: &str) -> IResult<&str, &str> {
321    recognize(tuple((
322        scheme,
323        nomchar(':'),
324        hier_part,
325        opt(nomchar('?').and(query)),
326        opt(nomchar('#').and(fragment)),
327    )))(input)
328}
329
330pub fn hier_part(input: &str) -> IResult<&str, &str> {
331    alt((
332        recognize(tuple((tag("//"), authority, path_abempty))),
333        path_absolute,
334        path_rootless,
335        path_empty,
336    ))(input)
337}
338
339pub fn uri_reference(input: &str) -> IResult<&str, &str> {
340    alt((uri, relative_ref))(input)
341}
342
343pub fn absolute_uri(input: &str) -> IResult<&str, &str> {
344    recognize(tuple((
345        scheme,
346        nomchar(':'),
347        hier_part,
348        opt(nomchar('?').and(query)),
349    )))(input)
350}
351
352pub fn relative_ref(input: &str) -> IResult<&str, &str> {
353    recognize(tuple((
354        relative_part,
355        opt(nomchar('?').and(query)),
356        opt(nomchar('#').and(fragment)),
357    )))(input)
358}
359
360pub fn relative_part(input: &str) -> IResult<&str, &str> {
361    alt((
362        recognize(tuple((tag("//"), authority, path_abempty))),
363        path_absolute,
364        path_noscheme,
365        path_empty,
366    ))(input)
367}
368
369pub fn scheme(input: &str) -> IResult<&str, &str> {
370    recognize(satisfy(|c| is_alphabetic(c as u8)).and(take_while(|c| {
371        is_alphanumeric(c as u8) || c == '+' || c == '-' || c == '.'
372    })))(input)
373}
374
375pub fn authority(input: &str) -> IResult<&str, &str> {
376    recognize(tuple((
377        opt(userinfo.and(nomchar('@'))),
378        host,
379        opt(nomchar(':').and(digit0)),
380    )))(input)
381}
382
383pub fn userinfo(input: &str) -> IResult<&str, &str> {
384    recognize(many0(alt((unreserved, pct_encoded, sub_delims, tag(":")))))(input)
385}
386
387pub fn host(input: &str) -> IResult<&str, &str> {
388    alt((ip_literal, ipv4address, reg_name))(input)
389}
390
391// port = digit0
392
393pub fn ip_literal(input: &str) -> IResult<&str, &str> {
394    recognize(delimited(
395        nomchar('['),
396        alt((ipv6address, ipvfuture)),
397        nomchar(']'),
398    ))(input)
399}
400
401pub fn ipvfuture(input: &str) -> IResult<&str, &str> {
402    recognize(tuple((
403        nomchar('v'),
404        hex_digit1,
405        nomchar('.'),
406        many1(alt((unreserved, sub_delims, tag(":")))),
407    )))(input)
408}
409
410pub fn ipv6address(input: &str) -> IResult<&str, &str> {
411    alt((
412        recognize(count(h16.and(nomchar(':')), 6).and(ls32)),
413        recognize(tuple((tag("::"), count(h16.and(nomchar(':')), 5), ls32))),
414        recognize(tuple((
415            opt(h16),
416            tag("::"),
417            count(h16.and(nomchar(':')), 4),
418            ls32,
419        ))),
420        recognize(tuple((
421            opt(many_m_n(0, 1, h16.and(nomchar(':'))).and(h16)),
422            tag("::"),
423            count(h16.and(nomchar(':')), 3),
424            ls32,
425        ))),
426        recognize(tuple((
427            opt(many_m_n(0, 2, h16.and(nomchar(':'))).and(h16)),
428            tag("::"),
429            count(h16.and(nomchar(':')), 2),
430            ls32,
431        ))),
432        recognize(tuple((
433            opt(many_m_n(0, 3, h16.and(nomchar(':'))).and(h16)),
434            tag("::"),
435            h16.and(nomchar(':')),
436            ls32,
437        ))),
438        recognize(tuple((
439            opt(many_m_n(0, 4, h16.and(nomchar(':'))).and(h16)),
440            tag("::"),
441            ls32,
442        ))),
443        recognize(tuple((
444            opt(many_m_n(0, 5, h16.and(nomchar(':'))).and(h16)),
445            tag("::"),
446            h16,
447        ))),
448        recognize(tuple((
449            opt(many_m_n(0, 6, h16.and(nomchar(':'))).and(h16)),
450            tag("::"),
451        ))),
452    ))(input)
453}
454
455pub fn h16(input: &str) -> IResult<&str, &str> {
456    take_while_m_n(1, 4, |c| is_hex_digit(c as u8))(input)
457}
458
459pub fn ls32(input: &str) -> IResult<&str, &str> {
460    alt((recognize(tuple((h16, nomchar(':'), h16))), ipv4address))(input)
461}
462
463pub fn ipv4address(input: &str) -> IResult<&str, &str> {
464    recognize(tuple((
465        dec_octet,
466        nomchar('.'),
467        dec_octet,
468        nomchar('.'),
469        dec_octet,
470        nomchar('.'),
471        dec_octet,
472    )))(input)
473}
474
475pub fn dec_octet(input: &str) -> IResult<&str, &str> {
476    alt((
477        take_while_m_n(1, 1, |c| is_digit(c as u8)),
478        recognize(tuple((
479            satisfy(|c| "123456789".contains(c)),
480            satisfy(|c| is_digit(c as u8)),
481        ))),
482        recognize(tuple((
483            nomchar('1'),
484            take_while_m_n(2, 2, |c| is_digit(c as u8)),
485        ))),
486        recognize(tuple((
487            nomchar('2'),
488            satisfy(|c| "01234".contains(c)),
489            satisfy(|c| is_digit(c as u8)),
490        ))),
491        recognize(tuple((tag("25"), satisfy(|c| "012345".contains(c))))),
492    ))(input)
493}
494
495pub fn reg_name(input: &str) -> IResult<&str, &str> {
496    recognize(many0(alt((unreserved, pct_encoded, sub_delims))))(input)
497}
498
499pub fn path(input: &str) -> IResult<&str, &str> {
500    alt((
501        path_abempty,
502        path_absolute,
503        path_noscheme,
504        path_rootless,
505        path_empty,
506    ))(input)
507}
508
509pub fn path_abempty(input: &str) -> IResult<&str, &str> {
510    recognize(many0(nomchar('/').and(segment)))(input)
511}
512
513pub fn path_absolute(input: &str) -> IResult<&str, &str> {
514    recognize(nomchar('/').and(opt(segment_nz.and(many0(nomchar('/').and(segment))))))(input)
515}
516
517pub fn path_noscheme(input: &str) -> IResult<&str, &str> {
518    recognize(segment_nz_nc.and(many0(nomchar('/').and(segment))))(input)
519}
520
521pub fn path_rootless(input: &str) -> IResult<&str, &str> {
522    recognize(segment_nz.and(many0(nomchar('/').and(segment))))(input)
523}
524
525pub fn path_empty(input: &str) -> IResult<&str, &str> {
526    tag("")(input)
527}
528
529pub fn segment(input: &str) -> IResult<&str, &str> {
530    recognize(many0(pchar))(input)
531}
532
533pub fn segment_nz(input: &str) -> IResult<&str, &str> {
534    recognize(many1(pchar))(input)
535}
536
537pub fn segment_nz_nc(input: &str) -> IResult<&str, &str> {
538    recognize(many1(alt((unreserved, pct_encoded, sub_delims, tag("@")))))(input)
539}
540
541pub fn pchar(input: &str) -> IResult<&str, &str> {
542    alt((unreserved, pct_encoded, sub_delims, tag(":"), tag("@")))(input)
543}
544
545pub fn query(input: &str) -> IResult<&str, &str> {
546    recognize(many0(alt((pchar, tag("/"), tag("?")))))(input)
547}
548
549pub fn fragment(input: &str) -> IResult<&str, &str> {
550    recognize(many0(alt((pchar, tag("/"), tag("?")))))(input)
551}
552
553pub fn pct_encoded(input: &str) -> IResult<&str, &str> {
554    recognize(tag("%").and(take_while_m_n(2, 2, |c| is_hex_digit(c as u8))))(input)
555}
556
557pub fn unreserved(input: &str) -> IResult<&str, &str> {
558    take_while_m_n(1, 1, |c| is_alphanumeric(c as u8) || "-._~".contains(c))(input)
559}
560
561pub fn reserved(input: &str) -> IResult<&str, &str> {
562    alt((gen_delims, sub_delims))(input)
563}
564
565pub fn gen_delims(input: &str) -> IResult<&str, &str> {
566    alt((
567        tag(":"),
568        tag("/"),
569        tag("?"),
570        tag("#"),
571        tag("["),
572        tag("]"),
573        tag("@"),
574    ))(input)
575}
576
577pub fn sub_delims(input: &str) -> IResult<&str, &str> {
578    alt((
579        tag("!"),
580        tag("$"),
581        tag("&"),
582        tag("'"),
583        tag("("),
584        tag(")"),
585        tag("*"),
586        tag("+"),
587        tag(","),
588        tag(";"),
589        tag("="),
590    ))(input)
591}