1use bstr::{BStr, ByteSlice};
2
3use winnow::{
4 ascii::{digit1, line_ending, space1, Caseless},
5 combinator::{alt, delimited, repeat},
6 error::ErrMode,
7 stream::Stream,
8 token::{literal, one_of, take_till, take_until, take_while},
9 Parser,
10};
11
12use crate::{errors::VB6ErrorKind, language::VB6Token, parsers::VB6Stream};
13
14pub type VB6Result<T> = Result<T, ErrMode<VB6ErrorKind>>;
15
16pub fn line_comment_parse<'a>(input: &mut VB6Stream<'a>) -> VB6Result<&'a BStr> {
50 let comment = ('\'', take_till(0.., (b"\r\n", b"\n", b"\r")))
51 .take()
52 .parse_next(input)?;
53
54 Ok(comment)
55}
56
57pub fn variable_name_parse<'a>(input: &mut VB6Stream<'a>) -> VB6Result<&'a BStr> {
84 let variable_name = (
85 one_of(('a'..='z', 'A'..='Z', 128..=255)),
86 take_while(0.., ('_', 'a'..='z', 'A'..='Z', '0'..='9', 128..=255)),
87 )
88 .take()
89 .parse_next(input)?;
90
91 if variable_name.len() >= 255 {
92 return Err(ErrMode::Cut(VB6ErrorKind::VariableNameTooLong));
93 }
94
95 Ok(variable_name)
96}
97
98pub fn take_until_line_ending<'a>(input: &mut VB6Stream<'a>) -> VB6Result<&'a BStr> {
99 alt((take_until(1.., "\r\n"), take_until(1.., "\n"))).parse_next(input)
100}
101
102pub fn keyword_parse<'a>(
140 keyword: &'static str,
141) -> impl FnMut(&mut VB6Stream<'a>) -> VB6Result<&'a BStr> {
142 move |input: &mut VB6Stream<'a>| -> VB6Result<&'a BStr> {
143 let checkpoint = input.checkpoint();
144
145 let word = Caseless(keyword).parse_next(input)?;
146
147 if one_of::<VB6Stream, _, VB6ErrorKind>(('_', 'a'..='z', 'A'..='Z', '0'..='9'))
148 .parse_next(input)
149 .is_ok()
150 {
151 input.reset(&checkpoint);
152
153 return Err(ErrMode::Backtrack(VB6ErrorKind::KeywordNotFound));
154 }
155
156 Ok(word)
157 }
158}
159
160pub fn string_parse<'a>(input: &mut VB6Stream<'a>) -> VB6Result<&'a BStr> {
183 let mut build_string =
189 repeat(0.., string_fragment_parse).fold(Vec::new, |mut string, fragment| {
190 match fragment {
191 StringFragment::Literal(literal) => {
192 string.extend_from_slice(literal.as_bytes());
193 }
194 StringFragment::EscapedDoubleQuote(double_qoutes) => {
195 string.extend_from_slice(double_qoutes.as_bytes());
196 }
197 }
198 string
199 });
200
201 "\"".parse_next(input)?;
202 let start_index = input.index;
203
204 build_string.parse_next(input)?;
205
206 let end_index = input.index;
207 "\"".parse_next(input)?;
208
209 Ok(&input.stream[start_index..end_index])
210}
211
212enum StringFragment<'a> {
213 Literal(&'a BStr),
214 EscapedDoubleQuote(&'a BStr),
215}
216
217fn string_fragment_parse<'a>(input: &mut VB6Stream<'a>) -> VB6Result<StringFragment<'a>> {
218 let fragment = alt((
219 "\"\"".take().map(StringFragment::EscapedDoubleQuote),
220 take_until(1.., "\"").map(StringFragment::Literal),
221 ))
222 .parse_next(input)?;
223
224 Ok(fragment)
225}
226
227pub fn vb6_parse<'a>(input: &mut VB6Stream<'a>) -> VB6Result<Vec<VB6Token<'a>>> {
263 let mut tokens = Vec::new();
264
265 if !is_english_code(input.stream) {
266 return Err(ErrMode::Cut(VB6ErrorKind::LikelyNonEnglishCharacterSet));
267 }
268
269 while !input.is_empty() {
270 if literal::<_, _, VB6ErrorKind>('\0')
272 .parse_next(input)
273 .is_ok()
274 {
275 break;
276 }
277
278 if let Ok(token) = line_ending::<VB6Stream<'a>, VB6ErrorKind>.parse_next(input) {
279 let token = VB6Token::Newline(token);
280 tokens.push(token);
281 continue;
282 }
283
284 if let Ok(token) = line_comment_parse.parse_next(input) {
285 let token = VB6Token::Comment(token);
286 tokens.push(token);
287 continue;
288 }
289
290 if let Ok(token) = delimited::<VB6Stream<'a>, _, &BStr, _, VB6ErrorKind, _, _, _>(
291 '\"',
292 take_till(0.., '\"'),
293 '\"',
294 )
295 .take()
296 .parse_next(input)
297 {
298 let token = VB6Token::StringLiteral(token);
299 tokens.push(token);
300 continue;
301 }
302
303 if let Ok(token) = vb6_token_parse.parse_next(input) {
304 tokens.push(token);
305 continue;
306 }
307
308 return Err(ErrMode::Cut(VB6ErrorKind::UnknownToken));
309 }
310
311 Ok(tokens)
312}
313
314#[must_use]
315pub fn is_english_code(content: &BStr) -> bool {
316 let character_count = content.len();
318 let higher_half_character_count = content.iter().filter(|&c| *c >= 128).count();
319
320 higher_half_character_count == 0 || (100 * higher_half_character_count / character_count) < 1
321}
322
323fn vb6_keyword_parse<'a>(input: &mut VB6Stream<'a>) -> VB6Result<VB6Token<'a>> {
324 alt((
327 alt((
328 keyword_parse("Type").map(|token: &BStr| VB6Token::TypeKeyword(token)),
329 keyword_parse("Optional").map(|token: &BStr| VB6Token::OptionalKeyword(token)),
330 keyword_parse("Option").map(|token: &BStr| VB6Token::OptionKeyword(token)),
331 keyword_parse("Explicit").map(|token: &BStr| VB6Token::ExplicitKeyword(token)),
332 keyword_parse("Private").map(|token: &BStr| VB6Token::PrivateKeyword(token)),
333 keyword_parse("Public").map(|token: &BStr| VB6Token::PublicKeyword(token)),
334 keyword_parse("Dim").map(|token: &BStr| VB6Token::DimKeyword(token)),
335 keyword_parse("With").map(|token: &BStr| VB6Token::WithKeyword(token)),
336 keyword_parse("Declare").map(|token: &BStr| VB6Token::DeclareKeyword(token)),
337 keyword_parse("Lib").map(|token: &BStr| VB6Token::LibKeyword(token)),
338 keyword_parse("Const").map(|token: &BStr| VB6Token::ConstKeyword(token)),
339 keyword_parse("As").map(|token: &BStr| VB6Token::AsKeyword(token)),
340 keyword_parse("Enum").map(|token: &BStr| VB6Token::EnumKeyword(token)),
341 keyword_parse("Long").map(|token: &BStr| VB6Token::LongKeyword(token)),
342 keyword_parse("Integer").map(|token: &BStr| VB6Token::IntegerKeyword(token)),
343 keyword_parse("Boolean").map(|token: &BStr| VB6Token::BooleanKeyword(token)),
344 keyword_parse("Byte").map(|token: &BStr| VB6Token::ByteKeyword(token)),
345 keyword_parse("Single").map(|token: &BStr| VB6Token::SingleKeyword(token)),
346 keyword_parse("String").map(|token: &BStr| VB6Token::StringKeyword(token)),
347 )),
348 alt((
349 keyword_parse("True").map(|token: &BStr| VB6Token::TrueKeyword(token)),
350 keyword_parse("False").map(|token: &BStr| VB6Token::FalseKeyword(token)),
351 keyword_parse("Function").map(|token: &BStr| VB6Token::FunctionKeyword(token)),
352 keyword_parse("Sub").map(|token: &BStr| VB6Token::SubKeyword(token)),
353 keyword_parse("End").map(|token: &BStr| VB6Token::EndKeyword(token)),
354 keyword_parse("If").map(|token: &BStr| VB6Token::IfKeyword(token)),
355 keyword_parse("Else").map(|token: &BStr| VB6Token::ElseKeyword(token)),
356 keyword_parse("And").map(|token: &BStr| VB6Token::AndKeyword(token)),
357 keyword_parse("Or").map(|token: &BStr| VB6Token::OrKeyword(token)),
358 keyword_parse("Not").map(|token: &BStr| VB6Token::NotKeyword(token)),
359 keyword_parse("Then").map(|token: &BStr| VB6Token::ThenKeyword(token)),
360 keyword_parse("For").map(|token: &BStr| VB6Token::ForKeyword(token)),
361 keyword_parse("To").map(|token: &BStr| VB6Token::ToKeyword(token)),
362 keyword_parse("Step").map(|token: &BStr| VB6Token::StepKeyword(token)),
363 keyword_parse("Next").map(|token: &BStr| VB6Token::NextKeyword(token)),
364 keyword_parse("ReDim").map(|token: &BStr| VB6Token::ReDimKeyword(token)),
365 keyword_parse("ByVal").map(|token: &BStr| VB6Token::ByValKeyword(token)),
366 keyword_parse("ByRef").map(|token: &BStr| VB6Token::ByRefKeyword(token)),
367 keyword_parse("Goto").map(|token: &BStr| VB6Token::GotoKeyword(token)),
368 keyword_parse("Exit").map(|token: &BStr| VB6Token::ExitKeyword(token)),
369 )),
370 ))
371 .parse_next(input)
372}
373
374fn vb6_symbol_parse<'a>(input: &mut VB6Stream<'a>) -> VB6Result<VB6Token<'a>> {
375 alt((
378 alt((
379 "=".map(|token: &BStr| VB6Token::EqualityOperator(token)),
380 "$".map(|token: &BStr| VB6Token::DollarSign(token)),
381 "_".map(|token: &BStr| VB6Token::Underscore(token)),
382 "&".map(|token: &BStr| VB6Token::Ampersand(token)),
383 "%".map(|token: &BStr| VB6Token::Percent(token)),
384 "#".map(|token: &BStr| VB6Token::Octothorpe(token)),
385 "<".map(|token: &BStr| VB6Token::LessThanOperator(token)),
386 ">".map(|token: &BStr| VB6Token::GreaterThanOperator(token)),
387 "(".map(|token: &BStr| VB6Token::LeftParanthesis(token)),
388 ")".map(|token: &BStr| VB6Token::RightParanthesis(token)),
389 ",".map(|token: &BStr| VB6Token::Comma(token)),
390 "+".map(|token: &BStr| VB6Token::AdditionOperator(token)),
391 "-".map(|token: &BStr| VB6Token::SubtractionOperator(token)),
392 "*".map(|token: &BStr| VB6Token::MultiplicationOperator(token)),
393 "\\".map(|token: &BStr| VB6Token::BackwardSlashOperator(token)),
394 "/".map(|token: &BStr| VB6Token::DivisionOperator(token)),
395 ".".map(|token: &BStr| VB6Token::PeriodOperator(token)),
396 ":".map(|token: &BStr| VB6Token::ColonOperator(token)),
397 "^".map(|token: &BStr| VB6Token::ExponentiationOperator(token)),
398 )),
399 alt((
400 "!".map(|token: &BStr| VB6Token::ExclamationMark(token)),
401 "[".map(|token: &BStr| VB6Token::LeftSquareBracket(token)),
402 "]".map(|token: &BStr| VB6Token::RightSquareBracket(token)),
403 ";".map(|token: &BStr| VB6Token::Semicolon(token)),
404 "@".map(|token: &BStr| VB6Token::AtSign(token)),
405 )),
406 ))
407 .parse_next(input)
408}
409
410fn vb6_token_parse<'a>(input: &mut VB6Stream<'a>) -> VB6Result<VB6Token<'a>> {
411 alt((
414 (line_comment_parse).map(|token: &BStr| VB6Token::Comment(token)),
415 vb6_keyword_parse,
416 vb6_symbol_parse,
417 alt((
418 digit1.map(|token: &BStr| VB6Token::Number(token)),
419 variable_name_parse.map(|token: &BStr| VB6Token::VariableName(token)),
420 space1.map(|token: &BStr| VB6Token::Whitespace(token)),
421 )),
422 ))
423 .parse_next(input)
424}
425
426#[cfg(test)]
427mod test {
428 use super::*;
429 use bstr::ByteSlice;
430
431 #[test]
432 fn no_escaped_double_quote_string_parse() {
433 let input_line = b"\"This is a string\"\r\n";
434 let mut stream = VB6Stream::new("", input_line);
435 let string = string_parse(&mut stream).unwrap();
436
437 assert_eq!(string, "This is a string");
438 }
439
440 #[test]
441 fn contains_escaped_double_quote_string_parse() {
442 let input_line = b"\"This is also \"\"a\"\" string\"\r\n";
443 let mut stream = VB6Stream::new("", input_line);
444 let string = string_parse(&mut stream).unwrap();
445
446 assert_eq!(string, "This is also \"\"a\"\" string");
447 }
448
449 #[test]
450 fn keyword() {
451 let mut input1 = VB6Stream::new("", "option".as_bytes());
452 let mut input2 = VB6Stream::new("", "op do".as_bytes());
453
454 let mut op_parse = keyword_parse("op");
455
456 let keyword = op_parse(&mut input1);
457 let keyword2 = op_parse(&mut input2);
458
459 assert!(keyword.is_err());
460 assert!(keyword2.is_ok());
461 assert_eq!(keyword2.unwrap(), b"op".as_bstr());
462 }
463
464 #[test]
465 fn eol_comment_carriage_return_newline() {
466 use crate::parsers::VB6Stream;
467 use crate::vb6::line_comment_parse;
468
469 let mut input = VB6Stream::new("", "' This is a comment\r\n".as_bytes());
470 let comment = line_comment_parse(&mut input).unwrap();
471
472 assert_eq!(comment, "' This is a comment");
473 }
474
475 #[test]
476 fn eol_comment_newline() {
477 use crate::parsers::VB6Stream;
478 use crate::vb6::line_comment_parse;
479
480 let mut input = VB6Stream::new("", "' This is a comment\n".as_bytes());
481 let comment = line_comment_parse(&mut input).unwrap();
482
483 assert_eq!(comment, "' This is a comment");
484 }
485
486 #[test]
487 fn eol_comment_carriage_return() {
488 use crate::parsers::VB6Stream;
489 use crate::vb6::line_comment_parse;
490
491 let mut input = VB6Stream::new("", "' This is a comment\r".as_bytes());
492 let comment = line_comment_parse(&mut input).unwrap();
493
494 assert_eq!(comment, "' This is a comment");
495 }
496
497 #[test]
498 fn eol_comment_eof() {
499 use crate::parsers::VB6Stream;
500 use crate::vb6::line_comment_parse;
501
502 let mut input = VB6Stream::new("", "' This is a comment".as_bytes());
503 let comment = line_comment_parse(&mut input).unwrap();
504
505 assert_eq!(comment, "' This is a comment");
506 }
507
508 #[test]
509 fn variable_name() {
510 use crate::parsers::VB6Stream;
511 use crate::vb6::variable_name_parse;
512
513 let mut input = VB6Stream::new("", "variable_name".as_bytes());
514
515 let variable_name = variable_name_parse(&mut input).unwrap();
516
517 assert_eq!(variable_name, "variable_name");
518 }
519
520 #[test]
521 fn vb6_parse() {
522 use crate::parsers::VB6Stream;
523 use crate::vb6::{vb6_parse, VB6Token};
524
525 let mut input = VB6Stream::new("", "Dim x As Integer".as_bytes());
526 let tokens = vb6_parse(&mut input).unwrap();
527
528 assert_eq!(tokens.len(), 7);
529 assert_eq!(tokens[0], VB6Token::DimKeyword("Dim".into()));
530 assert_eq!(tokens[1], VB6Token::Whitespace(" ".into()));
531 assert_eq!(tokens[2], VB6Token::VariableName("x".into()));
532 assert_eq!(tokens[3], VB6Token::Whitespace(" ".into()));
533 assert_eq!(tokens[4], VB6Token::AsKeyword("As".into()));
534 assert_eq!(tokens[5], VB6Token::Whitespace(" ".into()));
535 assert_eq!(tokens[6], VB6Token::IntegerKeyword("Integer".into()));
536 }
537
538 #[test]
539 fn non_english_parse() {
540 use crate::vb6::vb6_parse;
541 use crate::vb6::VB6Stream;
542
543 let code = "Option Explicit\r
544Public app_path As String '���|�]�w�X\r
545Public ����H����ԤH��(1 To 2, 1 To 2) As Integer '�����Ԩ���H�Ƭ�����(1.�ϥΪ�/2.�q��,1.�`�@�H��/2.�ثe�ĴX��)\r
546Public ����ݾ��H��������(1 To 2, 1 To 3) As Integer '����ݾ�����H���s��������(1.�ϥΪ�/2.�q��,1.���W����/2~3.�ݾ������n��s��)\r
547Public �Ĥ@���Ұ�Ū�J�{�ǼаO As Boolean '�Ĥ@���Ұʵ{��Ū�J�{�ǼаO��\r
548Attribute �Ĥ@���Ұ�Ū�J�{�ǼаO.VB_VarUserMemId = 1073741834\r
549Public �����ˬd����ؼм� As Integer '�����ˬd����p�ƾ��ؼм�\r
550Attribute �����ˬd����ؼм�.VB_VarUserMemId = 1073741836\r
551Public �q������O�_�w�X�{ As Boolean '���ҳq������O�_�w�g�X�{�Ȯ��ܼ�\r
552Attribute �q������O�_�w�X�{.VB_VarUserMemId = 1073741837\r
553Public ProgramIsOnWine As Boolean '�{���O�_�B��Wine���ҤU����\r
554Attribute ProgramIsOnWine.VB_VarUserMemId = 1073741838";
555
556 let mut input = VB6Stream::new("", code.as_bytes());
557
558 let result = vb6_parse(&mut input);
559
560 assert!(result.is_err());
561 assert!(matches!(
562 result.unwrap_err(),
563 ErrMode::Cut(VB6ErrorKind::LikelyNonEnglishCharacterSet)
564 ));
565 }
566
567 #[test]
568 fn multi_keyword() {
569 use crate::vb6::keyword_parse;
570
571 let mut input = VB6Stream::new("", "Option As Integer".as_bytes());
572
573 let key1 = keyword_parse("Option").parse_next(&mut input).unwrap();
574
575 let _ = space1::<_, VB6ErrorKind>.parse_next(&mut input);
576
577 let key2 = keyword_parse("As").parse_next(&mut input).unwrap();
578
579 let _ = space1::<_, VB6ErrorKind>.parse_next(&mut input);
580
581 let key3 = keyword_parse("Integer").parse_next(&mut input).unwrap();
582
583 assert_eq!(key1, "Option");
584 assert_eq!(key2, "As");
585 assert_eq!(key3, "Integer");
586 }
587}