pub const REGEX_UTILS: &str = "// ============================================\n// Regex-like Utilities - \u{6b63}\u{5219}\u{98ce}\u{683c}\u{6587}\u{672c}\u{5904}\u{7406}\u{5de5}\u{5177}\n// ============================================\n// \u{63d0}\u{4f9b}\u{6587}\u{672c}\u{6a21}\u{5f0f}\u{5339}\u{914d}\u{548c}\u{5904}\u{7406}\u{529f}\u{80fd}\u{ff08}\u{57fa}\u{4e8e}\u{5b57}\u{7b26}\u{4e32}\u{64cd}\u{4f5c}\u{ff09}\n// \u{6ce8}\u{610f}\u{ff1a}\u{8fd9}\u{4e0d}\u{662f}\u{771f}\u{6b63}\u{7684}\u{6b63}\u{5219}\u{8868}\u{8fbe}\u{5f0f}\u{5f15}\u{64ce}\u{ff0c}\u{800c}\u{662f}\u{5e38}\u{7528}\u{6a21}\u{5f0f}\u{7684}\u{5b9e}\u{7528}\u{5de5}\u{5177}\n// \u{652f}\u{6301}\u{ff1a}\n// - \u{901a}\u{914d}\u{7b26}\u{5339}\u{914d}\n// - \u{6a21}\u{5f0f}\u{67e5}\u{627e}\u{548c}\u{66ff}\u{6362}\n// - \u{6587}\u{672c}\u{63d0}\u{53d6}\n// - \u{5e38}\u{7528}\u{9a8c}\u{8bc1}\u{ff08}\u{90ae}\u{7bb1}\u{3001}URL\u{3001}\u{7535}\u{8bdd}\u{7b49}\u{ff09}\n// ============================================\n\n// ============================================\n// \u{901a}\u{914d}\u{7b26}\u{5339}\u{914d}\n// ============================================\n\n// \u{901a}\u{914d}\u{7b26}\u{5339}\u{914d}\u{ff08}\u{652f}\u{6301} * \u{548c} ?\u{ff09}\n// * \u{5339}\u{914d}\u{4efb}\u{610f}\u{5b57}\u{7b26}\u{5e8f}\u{5217}\n// ? \u{5339}\u{914d}\u{5355}\u{4e2a}\u{5b57}\u{7b26}\nFunc REGEX_WILDCARD_MATCH(TEXT, PATTERN) {\n Return REGEX_WILDCARD_MATCH_IMPL(TEXT, PATTERN, 0, 0)\n}\n\nFunc REGEX_WILDCARD_MATCH_IMPL(TEXT, PATTERN, TEXT_IDX, PAT_IDX) {\n Set TEXT_LEN LEN(TEXT)\n Set PAT_LEN LEN(PATTERN)\n \n // \u{90fd}\u{5230}\u{672b}\u{5c3e}\u{ff0c}\u{5339}\u{914d}\u{6210}\u{529f}\n If ((TEXT_IDX == TEXT_LEN) And (PAT_IDX == PAT_LEN)) {\n Return True\n }\n \n // \u{6a21}\u{5f0f}\u{5230}\u{672b}\u{5c3e}\u{4f46}\u{6587}\u{672c}\u{672a}\u{5b8c}\u{ff0c}\u{5931}\u{8d25}\n If ((PAT_IDX == PAT_LEN) And (TEXT_IDX < TEXT_LEN)) {\n Return False\n }\n \n // \u{6587}\u{672c}\u{5230}\u{672b}\u{5c3e}\u{4f46}\u{6a21}\u{5f0f}\u{6709}\u{975e}*\u{5b57}\u{7b26}\u{ff0c}\u{5931}\u{8d25}\n If ((TEXT_IDX == TEXT_LEN) And (PAT_IDX < PAT_LEN)) {\n Set I PAT_IDX\n While (I < PAT_LEN) {\n If (CHARAT(PATTERN, I) != \"*\") {\n Return False\n }\n Set I (I + 1)\n }\n Return True\n }\n \n // \u{83b7}\u{53d6}\u{5f53}\u{524d}\u{5b57}\u{7b26}\n Set PAT_CHAR CHARAT(PATTERN, PAT_IDX)\n Set TEXT_CHAR CHARAT(TEXT, TEXT_IDX)\n \n // ? \u{5339}\u{914d}\u{4efb}\u{610f}\u{5355}\u{4e2a}\u{5b57}\u{7b26}\n If (PAT_CHAR == \"?\") {\n Return REGEX_WILDCARD_MATCH_IMPL(TEXT, PATTERN, (TEXT_IDX + 1), (PAT_IDX + 1))\n }\n \n // * \u{5339}\u{914d}\u{4efb}\u{610f}\u{5e8f}\u{5217}\n If (PAT_CHAR == \"*\") {\n // \u{5c1d}\u{8bd5}\u{5339}\u{914d}0\u{4e2a}\u{5b57}\u{7b26}\n If (REGEX_WILDCARD_MATCH_IMPL(TEXT, PATTERN, TEXT_IDX, (PAT_IDX + 1))) {\n Return True\n }\n // \u{5c1d}\u{8bd5}\u{5339}\u{914d}1\u{4e2a}\u{6216}\u{591a}\u{4e2a}\u{5b57}\u{7b26}\n Return REGEX_WILDCARD_MATCH_IMPL(TEXT, PATTERN, (TEXT_IDX + 1), PAT_IDX)\n }\n \n // \u{666e}\u{901a}\u{5b57}\u{7b26}\u{5fc5}\u{987b}\u{5b8c}\u{5168}\u{5339}\u{914d}\n If (PAT_CHAR == TEXT_CHAR) {\n Return REGEX_WILDCARD_MATCH_IMPL(TEXT, PATTERN, (TEXT_IDX + 1), (PAT_IDX + 1))\n }\n \n Return False\n}\n\n// ============================================\n// \u{6a21}\u{5f0f}\u{67e5}\u{627e}\n// ============================================\n\n// \u{67e5}\u{627e}\u{6240}\u{6709}\u{5339}\u{914d}\u{6307}\u{5b9a}\u{524d}\u{7f00}\u{548c}\u{540e}\u{7f00}\u{7684}\u{5b50}\u{4e32}\nFunc REGEX_FIND_BETWEEN(TEXT, PREFIX, SUFFIX) {\n Set RESULTS []\n Set START 0\n \n While (START < LEN(TEXT)) {\n Set PREFIX_POS INDEXOF(STRSLICE(TEXT, START, LEN(TEXT)), PREFIX)\n If (PREFIX_POS < 0) {\n Break\n }\n Set PREFIX_POS (PREFIX_POS + START)\n \n Set CONTENT_START (PREFIX_POS + LEN(PREFIX))\n Set SUFFIX_POS INDEXOF(STRSLICE(TEXT, CONTENT_START, LEN(TEXT)), SUFFIX)\n \n If (SUFFIX_POS < 0) {\n Break\n }\n Set SUFFIX_POS (SUFFIX_POS + CONTENT_START)\n \n // \u{63d0}\u{53d6}\u{5185}\u{5bb9}\n Set CONTENT STRSLICE(TEXT, CONTENT_START, SUFFIX_POS)\n PUSH(RESULTS, CONTENT)\n \n Set START (SUFFIX_POS + LEN(SUFFIX))\n }\n \n Return RESULTS\n}\n\n// \u{67e5}\u{627e}\u{6240}\u{6709}\u{4ee5}\u{6307}\u{5b9a}\u{524d}\u{7f00}\u{5f00}\u{59cb}\u{7684}\u{884c}\nFunc REGEX_FIND_LINES_STARTING_WITH(TEXT, PREFIX) {\n Set LINES SPLIT(TEXT, \"\\n\")\n Set RESULTS []\n \n Set I 0\n While (I < LEN(LINES)) {\n Set LINE LINES[I]\n If (STARTS_WITH(TRIM(LINE), PREFIX)) {\n PUSH(RESULTS, LINE)\n }\n Set I (I + 1)\n }\n \n Return RESULTS\n}\n\n// \u{67e5}\u{627e}\u{6240}\u{6709}\u{5305}\u{542b}\u{6307}\u{5b9a}\u{6587}\u{672c}\u{7684}\u{884c}\nFunc REGEX_FIND_LINES_CONTAINING(TEXT, NEEDLE) {\n Set LINES SPLIT(TEXT, \"\\n\")\n Set RESULTS []\n \n Set I 0\n While (I < LEN(LINES)) {\n Set LINE LINES[I]\n If (CONTAINS(LINE, NEEDLE)) {\n PUSH(RESULTS, LINE)\n }\n Set I (I + 1)\n }\n \n Return RESULTS\n}\n\n// ============================================\n// \u{6a21}\u{5f0f}\u{66ff}\u{6362}\n// ============================================\n\n// \u{66ff}\u{6362}\u{6240}\u{6709}\u{5339}\u{914d}\u{901a}\u{914d}\u{7b26}\u{6a21}\u{5f0f}\u{7684}\u{6587}\u{672c}\nFunc REGEX_REPLACE_PATTERN(TEXT, PATTERN, REPLACEMENT) {\n // \u{7b80}\u{5316}\u{5b9e}\u{73b0}\u{ff1a}\u{5982}\u{679c}\u{6a21}\u{5f0f}\u{662f}\u{7eaf}\u{6587}\u{672c}\u{ff08}\u{65e0}\u{901a}\u{914d}\u{7b26}\u{ff09}\u{ff0c}\u{4f7f}\u{7528} REPLACE_ALL\n If ((Not CONTAINS(PATTERN, \"*\")) And (Not CONTAINS(PATTERN, \"?\"))) {\n Return REPLACE(TEXT, PATTERN, REPLACEMENT)\n }\n \n // \u{5bf9}\u{4e8e}\u{901a}\u{914d}\u{7b26}\u{6a21}\u{5f0f}\u{ff0c}\u{9010}\u{8bcd}\u{68c0}\u{67e5}\u{ff08}\u{7b80}\u{5316}\u{5b9e}\u{73b0}\u{ff09}\n Set WORDS SPLIT(TEXT, \" \")\n Set RESULTS []\n \n Set I 0\n While (I < LEN(WORDS)) {\n Set WORD WORDS[I]\n If (REGEX_WILDCARD_MATCH(WORD, PATTERN)) {\n PUSH(RESULTS, REPLACEMENT)\n } Else {\n PUSH(RESULTS, WORD)\n }\n Set I (I + 1)\n }\n \n Return JOIN(RESULTS, \" \")\n}\n\n// \u{5220}\u{9664}\u{5339}\u{914d}\u{6a21}\u{5f0f}\u{7684}\u{6240}\u{6709}\u{6587}\u{672c}\nFunc REGEX_REMOVE_PATTERN(TEXT, PATTERN) {\n Return REGEX_REPLACE_PATTERN(TEXT, PATTERN, \"\")\n}\n\n// ============================================\n// \u{6587}\u{672c}\u{63d0}\u{53d6}\n// ============================================\n\n// \u{63d0}\u{53d6}\u{6240}\u{6709}\u{6570}\u{5b57}\nFunc REGEX_EXTRACT_NUMBERS(TEXT) {\n Set RESULTS []\n Set CURRENT \"\"\n Set IN_NUMBER False\n \n Set I 0\n While (I < LEN(TEXT)) {\n Set CHAR CHARAT(TEXT, I)\n \n If (REGEX_IS_DIGIT(CHAR)) {\n Set CURRENT (CURRENT + CHAR)\n Set IN_NUMBER True\n } Elif ((CHAR == \".\") And IN_NUMBER) {\n // \u{652f}\u{6301}\u{5c0f}\u{6570}\u{70b9}\n Set CURRENT (CURRENT + CHAR)\n } Else {\n If (IN_NUMBER) {\n PUSH(RESULTS, TO_NUMBER(CURRENT))\n Set CURRENT \"\"\n Set IN_NUMBER False\n }\n }\n \n Set I (I + 1)\n }\n \n // \u{5904}\u{7406}\u{672b}\u{5c3e}\u{7684}\u{6570}\u{5b57}\n If (IN_NUMBER) {\n PUSH(RESULTS, TO_NUMBER(CURRENT))\n }\n \n Return RESULTS\n}\n\n// \u{63d0}\u{53d6}\u{6240}\u{6709}\u{5355}\u{8bcd}\u{ff08}\u{5b57}\u{6bcd}\u{5e8f}\u{5217}\u{ff09}\nFunc REGEX_EXTRACT_WORDS(TEXT) {\n Set RESULTS []\n Set CURRENT \"\"\n \n Set I 0\n While (I < LEN(TEXT)) {\n Set CHAR CHARAT(TEXT, I)\n \n If (REGEX_IS_ALPHA(CHAR)) {\n Set CURRENT (CURRENT + CHAR)\n } Else {\n If (LEN(CURRENT) > 0) {\n PUSH(RESULTS, CURRENT)\n Set CURRENT \"\"\n }\n }\n \n Set I (I + 1)\n }\n \n // \u{5904}\u{7406}\u{672b}\u{5c3e}\u{7684}\u{5355}\u{8bcd}\n If (LEN(CURRENT) > 0) {\n PUSH(RESULTS, CURRENT)\n }\n \n Return RESULTS\n}\n\n// \u{63d0}\u{53d6}\u{6240}\u{6709}\u{90ae}\u{7bb1}\u{5730}\u{5740}\u{ff08}\u{7b80}\u{5316}\u{7248}\u{ff09}\nFunc REGEX_EXTRACT_EMAILS(TEXT) {\n Set RESULTS []\n Set WORDS SPLIT(TEXT, \" \")\n \n Set I 0\n While (I < LEN(WORDS)) {\n Set WORD TRIM(WORDS[I])\n If (REGEX_IS_EMAIL(WORD)) {\n PUSH(RESULTS, WORD)\n }\n Set I (I + 1)\n }\n \n Return RESULTS\n}\n\n// \u{63d0}\u{53d6}\u{6240}\u{6709} URL\u{ff08}\u{7b80}\u{5316}\u{7248}\u{ff09}\nFunc REGEX_EXTRACT_URLS(TEXT) {\n Set RESULTS []\n Set WORDS SPLIT(TEXT, \" \")\n \n Set I 0\n While (I < LEN(WORDS)) {\n Set WORD TRIM(WORDS[I])\n If (REGEX_IS_URL(WORD)) {\n PUSH(RESULTS, WORD)\n }\n Set I (I + 1)\n }\n \n Return RESULTS\n}\n\n// ============================================\n// \u{9a8c}\u{8bc1}\u{51fd}\u{6570}\n// ============================================\n\n// \u{9a8c}\u{8bc1}\u{90ae}\u{7bb1}\u{683c}\u{5f0f}\nFunc REGEX_IS_EMAIL(TEXT) {\n // \u{7b80}\u{5355}\u{9a8c}\u{8bc1}\u{ff1a}\u{5305}\u{542b} @ \u{4e14}\u{4e24}\u{8fb9}\u{90fd}\u{6709}\u{5185}\u{5bb9}\n If (Not CONTAINS(TEXT, \"@\")) {\n Return False\n }\n \n Set PARTS SPLIT(TEXT, \"@\")\n If (LEN(PARTS) != 2) {\n Return False\n }\n \n Set LOCAL PARTS[0]\n Set DOMAIN PARTS[1]\n \n If ((LEN(LOCAL) == 0) Or (LEN(DOMAIN) == 0)) {\n Return False\n }\n \n // \u{57df}\u{540d}\u{5fc5}\u{987b}\u{5305}\u{542b}\u{81f3}\u{5c11}\u{4e00}\u{4e2a}\u{70b9}\n If (Not CONTAINS(DOMAIN, \".\")) {\n Return False\n }\n \n Return True\n}\n\n// \u{9a8c}\u{8bc1} URL \u{683c}\u{5f0f}\nFunc REGEX_IS_URL(TEXT) {\n Set LOWER_TEXT LOWER(TEXT)\n \n If (STARTS_WITH(LOWER_TEXT, \"http://\")) {\n Return True\n }\n If (STARTS_WITH(LOWER_TEXT, \"https://\")) {\n Return True\n }\n If (STARTS_WITH(LOWER_TEXT, \"ftp://\")) {\n Return True\n }\n \n Return False\n}\n\n// \u{9a8c}\u{8bc1}\u{662f}\u{5426}\u{53ea}\u{5305}\u{542b}\u{6570}\u{5b57}\nFunc REGEX_IS_NUMERIC(TEXT) {\n If (LEN(TEXT) == 0) {\n Return False\n }\n \n Set I 0\n While (I < LEN(TEXT)) {\n Set CHAR CHARAT(TEXT, I)\n If (Not REGEX_IS_DIGIT(CHAR)) {\n Return False\n }\n Set I (I + 1)\n }\n \n Return True\n}\n\n// \u{9a8c}\u{8bc1}\u{662f}\u{5426}\u{53ea}\u{5305}\u{542b}\u{5b57}\u{6bcd}\nFunc REGEX_IS_ALPHA(TEXT) {\n If (LEN(TEXT) == 0) {\n Return False\n }\n \n Set I 0\n While (I < LEN(TEXT)) {\n Set CHAR CHARAT(TEXT, I)\n If (Not REGEX_IS_ALPHA_CHAR(CHAR)) {\n Return False\n }\n Set I (I + 1)\n }\n \n Return True\n}\n\n// \u{9a8c}\u{8bc1}\u{662f}\u{5426}\u{53ea}\u{5305}\u{542b}\u{5b57}\u{6bcd}\u{548c}\u{6570}\u{5b57}\nFunc REGEX_IS_ALPHANUMERIC(TEXT) {\n If (LEN(TEXT) == 0) {\n Return False\n }\n \n Set I 0\n While (I < LEN(TEXT)) {\n Set CHAR CHARAT(TEXT, I)\n If ((Not REGEX_IS_ALPHA_CHAR(CHAR)) And (Not REGEX_IS_DIGIT(CHAR))) {\n Return False\n }\n Set I (I + 1)\n }\n \n Return True\n}\n\n// \u{9a8c}\u{8bc1}\u{7535}\u{8bdd}\u{53f7}\u{7801}\u{ff08}\u{7b80}\u{5316}\u{ff0c}\u{652f}\u{6301}\u{4e2d}\u{56fd}\u{683c}\u{5f0f}\u{ff09}\nFunc REGEX_IS_PHONE(TEXT) {\n // \u{53bb}\u{9664}\u{5e38}\u{89c1}\u{5206}\u{9694}\u{7b26}\n Set CLEANED REPLACE(TEXT, \"-\", \"\")\n Set CLEANED REPLACE(CLEANED, \" \", \"\")\n Set CLEANED REPLACE(CLEANED, \"(\", \"\")\n Set CLEANED REPLACE(CLEANED, \")\", \"\")\n \n // \u{68c0}\u{67e5}\u{662f}\u{5426}\u{5168}\u{662f}\u{6570}\u{5b57}\n If (Not REGEX_IS_NUMERIC(CLEANED)) {\n Return False\n }\n \n // \u{957f}\u{5ea6}\u{68c0}\u{67e5}\u{ff08}\u{4e2d}\u{56fd}\u{624b}\u{673a}\u{53f7}11\u{4f4d}\u{ff0c}\u{5ea7}\u{673a}8-12\u{4f4d}\u{ff09}\n Set LEN_VAL LEN(CLEANED)\n Return ((LEN_VAL >= 8) And (LEN_VAL <= 12))\n}\n\n// \u{9a8c}\u{8bc1}\u{8eab}\u{4efd}\u{8bc1}\u{53f7}\u{ff08}\u{4e2d}\u{56fd}18\u{4f4d}\u{ff09}\nFunc REGEX_IS_ID_CARD(TEXT) {\n If (LEN(TEXT) != 18) {\n Return False\n }\n \n // \u{524d}17\u{4f4d}\u{5fc5}\u{987b}\u{662f}\u{6570}\u{5b57}\n Set I 0\n While (I < 17) {\n Set CHAR CHARAT(TEXT, I)\n If (Not REGEX_IS_DIGIT(CHAR)) {\n Return False\n }\n Set I (I + 1)\n }\n \n // \u{6700}\u{540e}\u{4e00}\u{4f4d}\u{53ef}\u{4ee5}\u{662f}\u{6570}\u{5b57}\u{6216} X\n Set LAST UPPER(CHARAT(TEXT, 17))\n Return (REGEX_IS_DIGIT(LAST) Or (LAST == \"X\"))\n}\n\n// ============================================\n// \u{5b57}\u{7b26}\u{7c7b}\u{578b}\u{68c0}\u{67e5}\u{ff08}\u{8f85}\u{52a9}\u{51fd}\u{6570}\u{ff09}\n// ============================================\n\nFunc REGEX_IS_DIGIT(CHAR) {\n Return ((CHAR >= \"0\") And (CHAR <= \"9\"))\n}\n\nFunc REGEX_IS_ALPHA_CHAR(CHAR) {\n Set UPPER_CHAR UPPER(CHAR)\n Return ((UPPER_CHAR >= \"A\") And (UPPER_CHAR <= \"Z\"))\n}\n\nFunc REGEX_IS_WHITESPACE(CHAR) {\n Return ((CHAR == \" \") Or (CHAR == \"\\t\") Or (CHAR == \"\\n\") Or (CHAR == \"\\r\"))\n}\n\n// ============================================\n// \u{6587}\u{672c}\u{6e05}\u{7406}\n// ============================================\n\n// \u{5220}\u{9664}\u{6240}\u{6709}\u{7a7a}\u{767d}\u{5b57}\u{7b26}\nFunc REGEX_REMOVE_WHITESPACE(TEXT) {\n Set RESULT \"\"\n Set I 0\n While (I < LEN(TEXT)) {\n Set CHAR CHARAT(TEXT, I)\n If (Not REGEX_IS_WHITESPACE(CHAR)) {\n Set RESULT (RESULT + CHAR)\n }\n Set I (I + 1)\n }\n Return RESULT\n}\n\n// \u{5220}\u{9664}\u{6240}\u{6709}\u{975e}\u{5b57}\u{6bcd}\u{6570}\u{5b57}\u{5b57}\u{7b26}\nFunc REGEX_REMOVE_NON_ALPHANUMERIC(TEXT) {\n Set RESULT \"\"\n Set I 0\n While (I < LEN(TEXT)) {\n Set CHAR CHARAT(TEXT, I)\n If (REGEX_IS_ALPHA_CHAR(CHAR) Or REGEX_IS_DIGIT(CHAR)) {\n Set RESULT (RESULT + CHAR)\n }\n Set I (I + 1)\n }\n Return RESULT\n}\n\n// \u{5220}\u{9664}\u{6240}\u{6709}\u{6570}\u{5b57}\nFunc REGEX_REMOVE_DIGITS(TEXT) {\n Set RESULT \"\"\n Set I 0\n While (I < LEN(TEXT)) {\n Set CHAR CHARAT(TEXT, I)\n If (Not REGEX_IS_DIGIT(CHAR)) {\n Set RESULT (RESULT + CHAR)\n }\n Set I (I + 1)\n }\n Return RESULT\n}\n\n// \u{4fdd}\u{7559}\u{6307}\u{5b9a}\u{5b57}\u{7b26}\u{96c6}\nFunc REGEX_KEEP_CHARS(TEXT, ALLOWED) {\n Set RESULT \"\"\n Set I 0\n While (I < LEN(TEXT)) {\n Set CHAR CHARAT(TEXT, I)\n If (CONTAINS(ALLOWED, CHAR)) {\n Set RESULT (RESULT + CHAR)\n }\n Set I (I + 1)\n }\n Return RESULT\n}\n\n// ============================================\n// \u{5206}\u{8bcd}\u{548c}\u{5206}\u{6bb5}\n// ============================================\n\n// \u{6309}\u{53e5}\u{5b50}\u{5206}\u{5272}\u{ff08}\u{57fa}\u{4e8e}\u{6807}\u{70b9}\u{ff09}\nFunc REGEX_SPLIT_SENTENCES(TEXT) {\n Set RESULT []\n Set CURRENT \"\"\n \n Set I 0\n While (I < LEN(TEXT)) {\n Set CHAR CHARAT(TEXT, I)\n Set CURRENT (CURRENT + CHAR)\n \n // \u{53e5}\u{5b50}\u{7ed3}\u{675f}\u{7b26}\n If ((CHAR == \".\") Or (CHAR == \"!\") Or (CHAR == \"?\") Or (CHAR == \"\u{3002}\") Or (CHAR == \"\u{ff01}\") Or (CHAR == \"\u{ff1f}\")) {\n Set TRIMMED TRIM(CURRENT)\n If (LEN(TRIMMED) > 0) {\n PUSH(RESULT, TRIMMED)\n }\n Set CURRENT \"\"\n }\n \n Set I (I + 1)\n }\n \n // \u{5904}\u{7406}\u{5269}\u{4f59}\u{6587}\u{672c}\n Set TRIMMED TRIM(CURRENT)\n If (LEN(TRIMMED) > 0) {\n PUSH(RESULT, TRIMMED)\n }\n \n Return RESULT\n}\n\n// \u{6309}\u{6bb5}\u{843d}\u{5206}\u{5272}\nFunc REGEX_SPLIT_PARAGRAPHS(TEXT) {\n Set LINES SPLIT(TEXT, \"\\n\")\n Set RESULT []\n Set CURRENT \"\"\n \n Set I 0\n While (I < LEN(LINES)) {\n Set LINE TRIM(LINES[I])\n \n If (LEN(LINE) == 0) {\n // \u{7a7a}\u{884c}\u{ff0c}\u{7ed3}\u{675f}\u{5f53}\u{524d}\u{6bb5}\u{843d}\n If (LEN(CURRENT) > 0) {\n PUSH(RESULT, TRIM(CURRENT))\n Set CURRENT \"\"\n }\n } Else {\n If (LEN(CURRENT) > 0) {\n Set CURRENT (CURRENT + \" \")\n }\n Set CURRENT (CURRENT + LINE)\n }\n \n Set I (I + 1)\n }\n \n // \u{5904}\u{7406}\u{6700}\u{540e}\u{4e00}\u{4e2a}\u{6bb5}\u{843d}\n If (LEN(CURRENT) > 0) {\n PUSH(RESULT, TRIM(CURRENT))\n }\n \n Return RESULT\n}\n\n// ============================================\n// \u{7edf}\u{8ba1}\u{51fd}\u{6570}\n// ============================================\n\n// \u{7edf}\u{8ba1}\u{5339}\u{914d}\u{6b21}\u{6570}\nFunc REGEX_COUNT_MATCHES(TEXT, PATTERN) {\n Set COUNT 0\n Set POS 0\n \n While (POS < LEN(TEXT)) {\n Set FOUND INDEXOF(STRSLICE(TEXT, POS, LEN(TEXT)), PATTERN)\n If (FOUND < 0) {\n Break\n }\n Set COUNT (COUNT + 1)\n Set POS (POS + FOUND + LEN(PATTERN))\n }\n \n Return COUNT\n}\n\n// \u{7edf}\u{8ba1}\u{5b57}\u{7b26}\u{7c7b}\u{578b}\nFunc REGEX_COUNT_CHARS(TEXT) {\n Set STATS {}\n Set STATS[\"total\"] LEN(TEXT)\n Set STATS[\"alpha\"] 0\n Set STATS[\"digit\"] 0\n Set STATS[\"space\"] 0\n Set STATS[\"other\"] 0\n \n Set I 0\n While (I < LEN(TEXT)) {\n Set CHAR CHARAT(TEXT, I)\n \n If (REGEX_IS_ALPHA_CHAR(CHAR)) {\n Set STATS[\"alpha\"] (STATS[\"alpha\"] + 1)\n } Elif (REGEX_IS_DIGIT(CHAR)) {\n Set STATS[\"digit\"] (STATS[\"digit\"] + 1)\n } Elif (REGEX_IS_WHITESPACE(CHAR)) {\n Set STATS[\"space\"] (STATS[\"space\"] + 1)\n } Else {\n Set STATS[\"other\"] (STATS[\"other\"] + 1)\n }\n \n Set I (I + 1)\n }\n \n Return STATS\n}\n\n";Expand description
正则风格文本处理