REGEX_UTILS

Constant REGEX_UTILS 

Source
pub const REGEX_UTILS: &str = "// ============================================\n// Regex-like Utilities - \u{6b63}\u{5219}\u{98ce}\u{683c}\u{6587}\u{672c}\u{5904}\u{7406}\u{5de5}\u{5177}\n// ============================================\n// \u{63d0}\u{4f9b}\u{6587}\u{672c}\u{6a21}\u{5f0f}\u{5339}\u{914d}\u{548c}\u{5904}\u{7406}\u{529f}\u{80fd}\u{ff08}\u{57fa}\u{4e8e}\u{5b57}\u{7b26}\u{4e32}\u{64cd}\u{4f5c}\u{ff09}\n// \u{6ce8}\u{610f}\u{ff1a}\u{8fd9}\u{4e0d}\u{662f}\u{771f}\u{6b63}\u{7684}\u{6b63}\u{5219}\u{8868}\u{8fbe}\u{5f0f}\u{5f15}\u{64ce}\u{ff0c}\u{800c}\u{662f}\u{5e38}\u{7528}\u{6a21}\u{5f0f}\u{7684}\u{5b9e}\u{7528}\u{5de5}\u{5177}\n// \u{652f}\u{6301}\u{ff1a}\n// - \u{901a}\u{914d}\u{7b26}\u{5339}\u{914d}\n// - \u{6a21}\u{5f0f}\u{67e5}\u{627e}\u{548c}\u{66ff}\u{6362}\n// - \u{6587}\u{672c}\u{63d0}\u{53d6}\n// - \u{5e38}\u{7528}\u{9a8c}\u{8bc1}\u{ff08}\u{90ae}\u{7bb1}\u{3001}URL\u{3001}\u{7535}\u{8bdd}\u{7b49}\u{ff09}\n// ============================================\n\n// ============================================\n// \u{901a}\u{914d}\u{7b26}\u{5339}\u{914d}\n// ============================================\n\n// \u{901a}\u{914d}\u{7b26}\u{5339}\u{914d}\u{ff08}\u{652f}\u{6301} * \u{548c} ?\u{ff09}\n// * \u{5339}\u{914d}\u{4efb}\u{610f}\u{5b57}\u{7b26}\u{5e8f}\u{5217}\n// ? \u{5339}\u{914d}\u{5355}\u{4e2a}\u{5b57}\u{7b26}\nFunc REGEX_WILDCARD_MATCH(TEXT, PATTERN) {\n    Return REGEX_WILDCARD_MATCH_IMPL(TEXT, PATTERN, 0, 0)\n}\n\nFunc REGEX_WILDCARD_MATCH_IMPL(TEXT, PATTERN, TEXT_IDX, PAT_IDX) {\n    Set TEXT_LEN LEN(TEXT)\n    Set PAT_LEN LEN(PATTERN)\n    \n    // \u{90fd}\u{5230}\u{672b}\u{5c3e}\u{ff0c}\u{5339}\u{914d}\u{6210}\u{529f}\n    If ((TEXT_IDX == TEXT_LEN) And (PAT_IDX == PAT_LEN)) {\n        Return True\n    }\n    \n    // \u{6a21}\u{5f0f}\u{5230}\u{672b}\u{5c3e}\u{4f46}\u{6587}\u{672c}\u{672a}\u{5b8c}\u{ff0c}\u{5931}\u{8d25}\n    If ((PAT_IDX == PAT_LEN) And (TEXT_IDX < TEXT_LEN)) {\n        Return False\n    }\n    \n    // \u{6587}\u{672c}\u{5230}\u{672b}\u{5c3e}\u{4f46}\u{6a21}\u{5f0f}\u{6709}\u{975e}*\u{5b57}\u{7b26}\u{ff0c}\u{5931}\u{8d25}\n    If ((TEXT_IDX == TEXT_LEN) And (PAT_IDX < PAT_LEN)) {\n        Set I PAT_IDX\n        While (I < PAT_LEN) {\n            If (CHARAT(PATTERN, I) != \"*\") {\n                Return False\n            }\n            Set I (I + 1)\n        }\n        Return True\n    }\n    \n    // \u{83b7}\u{53d6}\u{5f53}\u{524d}\u{5b57}\u{7b26}\n    Set PAT_CHAR CHARAT(PATTERN, PAT_IDX)\n    Set TEXT_CHAR CHARAT(TEXT, TEXT_IDX)\n    \n    // ? \u{5339}\u{914d}\u{4efb}\u{610f}\u{5355}\u{4e2a}\u{5b57}\u{7b26}\n    If (PAT_CHAR == \"?\") {\n        Return REGEX_WILDCARD_MATCH_IMPL(TEXT, PATTERN, (TEXT_IDX + 1), (PAT_IDX + 1))\n    }\n    \n    // * \u{5339}\u{914d}\u{4efb}\u{610f}\u{5e8f}\u{5217}\n    If (PAT_CHAR == \"*\") {\n        // \u{5c1d}\u{8bd5}\u{5339}\u{914d}0\u{4e2a}\u{5b57}\u{7b26}\n        If (REGEX_WILDCARD_MATCH_IMPL(TEXT, PATTERN, TEXT_IDX, (PAT_IDX + 1))) {\n            Return True\n        }\n        // \u{5c1d}\u{8bd5}\u{5339}\u{914d}1\u{4e2a}\u{6216}\u{591a}\u{4e2a}\u{5b57}\u{7b26}\n        Return REGEX_WILDCARD_MATCH_IMPL(TEXT, PATTERN, (TEXT_IDX + 1), PAT_IDX)\n    }\n    \n    // \u{666e}\u{901a}\u{5b57}\u{7b26}\u{5fc5}\u{987b}\u{5b8c}\u{5168}\u{5339}\u{914d}\n    If (PAT_CHAR == TEXT_CHAR) {\n        Return REGEX_WILDCARD_MATCH_IMPL(TEXT, PATTERN, (TEXT_IDX + 1), (PAT_IDX + 1))\n    }\n    \n    Return False\n}\n\n// ============================================\n// \u{6a21}\u{5f0f}\u{67e5}\u{627e}\n// ============================================\n\n// \u{67e5}\u{627e}\u{6240}\u{6709}\u{5339}\u{914d}\u{6307}\u{5b9a}\u{524d}\u{7f00}\u{548c}\u{540e}\u{7f00}\u{7684}\u{5b50}\u{4e32}\nFunc REGEX_FIND_BETWEEN(TEXT, PREFIX, SUFFIX) {\n    Set RESULTS []\n    Set START 0\n    \n    While (START < LEN(TEXT)) {\n        Set PREFIX_POS INDEXOF(STRSLICE(TEXT, START, LEN(TEXT)), PREFIX)\n        If (PREFIX_POS < 0) {\n            Break\n        }\n        Set PREFIX_POS (PREFIX_POS + START)\n        \n        Set CONTENT_START (PREFIX_POS + LEN(PREFIX))\n        Set SUFFIX_POS INDEXOF(STRSLICE(TEXT, CONTENT_START, LEN(TEXT)), SUFFIX)\n        \n        If (SUFFIX_POS < 0) {\n            Break\n        }\n        Set SUFFIX_POS (SUFFIX_POS + CONTENT_START)\n        \n        // \u{63d0}\u{53d6}\u{5185}\u{5bb9}\n        Set CONTENT STRSLICE(TEXT, CONTENT_START, SUFFIX_POS)\n        PUSH(RESULTS, CONTENT)\n        \n        Set START (SUFFIX_POS + LEN(SUFFIX))\n    }\n    \n    Return RESULTS\n}\n\n// \u{67e5}\u{627e}\u{6240}\u{6709}\u{4ee5}\u{6307}\u{5b9a}\u{524d}\u{7f00}\u{5f00}\u{59cb}\u{7684}\u{884c}\nFunc REGEX_FIND_LINES_STARTING_WITH(TEXT, PREFIX) {\n    Set LINES SPLIT(TEXT, \"\\n\")\n    Set RESULTS []\n    \n    Set I 0\n    While (I < LEN(LINES)) {\n        Set LINE LINES[I]\n        If (STARTS_WITH(TRIM(LINE), PREFIX)) {\n            PUSH(RESULTS, LINE)\n        }\n        Set I (I + 1)\n    }\n    \n    Return RESULTS\n}\n\n// \u{67e5}\u{627e}\u{6240}\u{6709}\u{5305}\u{542b}\u{6307}\u{5b9a}\u{6587}\u{672c}\u{7684}\u{884c}\nFunc REGEX_FIND_LINES_CONTAINING(TEXT, NEEDLE) {\n    Set LINES SPLIT(TEXT, \"\\n\")\n    Set RESULTS []\n    \n    Set I 0\n    While (I < LEN(LINES)) {\n        Set LINE LINES[I]\n        If (CONTAINS(LINE, NEEDLE)) {\n            PUSH(RESULTS, LINE)\n        }\n        Set I (I + 1)\n    }\n    \n    Return RESULTS\n}\n\n// ============================================\n// \u{6a21}\u{5f0f}\u{66ff}\u{6362}\n// ============================================\n\n// \u{66ff}\u{6362}\u{6240}\u{6709}\u{5339}\u{914d}\u{901a}\u{914d}\u{7b26}\u{6a21}\u{5f0f}\u{7684}\u{6587}\u{672c}\nFunc REGEX_REPLACE_PATTERN(TEXT, PATTERN, REPLACEMENT) {\n    // \u{7b80}\u{5316}\u{5b9e}\u{73b0}\u{ff1a}\u{5982}\u{679c}\u{6a21}\u{5f0f}\u{662f}\u{7eaf}\u{6587}\u{672c}\u{ff08}\u{65e0}\u{901a}\u{914d}\u{7b26}\u{ff09}\u{ff0c}\u{4f7f}\u{7528} REPLACE_ALL\n    If ((Not CONTAINS(PATTERN, \"*\")) And (Not CONTAINS(PATTERN, \"?\"))) {\n        Return REPLACE(TEXT, PATTERN, REPLACEMENT)\n    }\n    \n    // \u{5bf9}\u{4e8e}\u{901a}\u{914d}\u{7b26}\u{6a21}\u{5f0f}\u{ff0c}\u{9010}\u{8bcd}\u{68c0}\u{67e5}\u{ff08}\u{7b80}\u{5316}\u{5b9e}\u{73b0}\u{ff09}\n    Set WORDS SPLIT(TEXT, \" \")\n    Set RESULTS []\n    \n    Set I 0\n    While (I < LEN(WORDS)) {\n        Set WORD WORDS[I]\n        If (REGEX_WILDCARD_MATCH(WORD, PATTERN)) {\n            PUSH(RESULTS, REPLACEMENT)\n        } Else {\n            PUSH(RESULTS, WORD)\n        }\n        Set I (I + 1)\n    }\n    \n    Return JOIN(RESULTS, \" \")\n}\n\n// \u{5220}\u{9664}\u{5339}\u{914d}\u{6a21}\u{5f0f}\u{7684}\u{6240}\u{6709}\u{6587}\u{672c}\nFunc REGEX_REMOVE_PATTERN(TEXT, PATTERN) {\n    Return REGEX_REPLACE_PATTERN(TEXT, PATTERN, \"\")\n}\n\n// ============================================\n// \u{6587}\u{672c}\u{63d0}\u{53d6}\n// ============================================\n\n// \u{63d0}\u{53d6}\u{6240}\u{6709}\u{6570}\u{5b57}\nFunc REGEX_EXTRACT_NUMBERS(TEXT) {\n    Set RESULTS []\n    Set CURRENT \"\"\n    Set IN_NUMBER False\n    \n    Set I 0\n    While (I < LEN(TEXT)) {\n        Set CHAR CHARAT(TEXT, I)\n        \n        If (REGEX_IS_DIGIT(CHAR)) {\n            Set CURRENT (CURRENT + CHAR)\n            Set IN_NUMBER True\n        } Elif ((CHAR == \".\") And IN_NUMBER) {\n            // \u{652f}\u{6301}\u{5c0f}\u{6570}\u{70b9}\n            Set CURRENT (CURRENT + CHAR)\n        } Else {\n            If (IN_NUMBER) {\n                PUSH(RESULTS, TO_NUMBER(CURRENT))\n                Set CURRENT \"\"\n                Set IN_NUMBER False\n            }\n        }\n        \n        Set I (I + 1)\n    }\n    \n    // \u{5904}\u{7406}\u{672b}\u{5c3e}\u{7684}\u{6570}\u{5b57}\n    If (IN_NUMBER) {\n        PUSH(RESULTS, TO_NUMBER(CURRENT))\n    }\n    \n    Return RESULTS\n}\n\n// \u{63d0}\u{53d6}\u{6240}\u{6709}\u{5355}\u{8bcd}\u{ff08}\u{5b57}\u{6bcd}\u{5e8f}\u{5217}\u{ff09}\nFunc REGEX_EXTRACT_WORDS(TEXT) {\n    Set RESULTS []\n    Set CURRENT \"\"\n    \n    Set I 0\n    While (I < LEN(TEXT)) {\n        Set CHAR CHARAT(TEXT, I)\n        \n        If (REGEX_IS_ALPHA(CHAR)) {\n            Set CURRENT (CURRENT + CHAR)\n        } Else {\n            If (LEN(CURRENT) > 0) {\n                PUSH(RESULTS, CURRENT)\n                Set CURRENT \"\"\n            }\n        }\n        \n        Set I (I + 1)\n    }\n    \n    // \u{5904}\u{7406}\u{672b}\u{5c3e}\u{7684}\u{5355}\u{8bcd}\n    If (LEN(CURRENT) > 0) {\n        PUSH(RESULTS, CURRENT)\n    }\n    \n    Return RESULTS\n}\n\n// \u{63d0}\u{53d6}\u{6240}\u{6709}\u{90ae}\u{7bb1}\u{5730}\u{5740}\u{ff08}\u{7b80}\u{5316}\u{7248}\u{ff09}\nFunc REGEX_EXTRACT_EMAILS(TEXT) {\n    Set RESULTS []\n    Set WORDS SPLIT(TEXT, \" \")\n    \n    Set I 0\n    While (I < LEN(WORDS)) {\n        Set WORD TRIM(WORDS[I])\n        If (REGEX_IS_EMAIL(WORD)) {\n            PUSH(RESULTS, WORD)\n        }\n        Set I (I + 1)\n    }\n    \n    Return RESULTS\n}\n\n// \u{63d0}\u{53d6}\u{6240}\u{6709} URL\u{ff08}\u{7b80}\u{5316}\u{7248}\u{ff09}\nFunc REGEX_EXTRACT_URLS(TEXT) {\n    Set RESULTS []\n    Set WORDS SPLIT(TEXT, \" \")\n    \n    Set I 0\n    While (I < LEN(WORDS)) {\n        Set WORD TRIM(WORDS[I])\n        If (REGEX_IS_URL(WORD)) {\n            PUSH(RESULTS, WORD)\n        }\n        Set I (I + 1)\n    }\n    \n    Return RESULTS\n}\n\n// ============================================\n// \u{9a8c}\u{8bc1}\u{51fd}\u{6570}\n// ============================================\n\n// \u{9a8c}\u{8bc1}\u{90ae}\u{7bb1}\u{683c}\u{5f0f}\nFunc REGEX_IS_EMAIL(TEXT) {\n    // \u{7b80}\u{5355}\u{9a8c}\u{8bc1}\u{ff1a}\u{5305}\u{542b} @ \u{4e14}\u{4e24}\u{8fb9}\u{90fd}\u{6709}\u{5185}\u{5bb9}\n    If (Not CONTAINS(TEXT, \"@\")) {\n        Return False\n    }\n    \n    Set PARTS SPLIT(TEXT, \"@\")\n    If (LEN(PARTS) != 2) {\n        Return False\n    }\n    \n    Set LOCAL PARTS[0]\n    Set DOMAIN PARTS[1]\n    \n    If ((LEN(LOCAL) == 0) Or (LEN(DOMAIN) == 0)) {\n        Return False\n    }\n    \n    // \u{57df}\u{540d}\u{5fc5}\u{987b}\u{5305}\u{542b}\u{81f3}\u{5c11}\u{4e00}\u{4e2a}\u{70b9}\n    If (Not CONTAINS(DOMAIN, \".\")) {\n        Return False\n    }\n    \n    Return True\n}\n\n// \u{9a8c}\u{8bc1} URL \u{683c}\u{5f0f}\nFunc REGEX_IS_URL(TEXT) {\n    Set LOWER_TEXT LOWER(TEXT)\n    \n    If (STARTS_WITH(LOWER_TEXT, \"http://\")) {\n        Return True\n    }\n    If (STARTS_WITH(LOWER_TEXT, \"https://\")) {\n        Return True\n    }\n    If (STARTS_WITH(LOWER_TEXT, \"ftp://\")) {\n        Return True\n    }\n    \n    Return False\n}\n\n// \u{9a8c}\u{8bc1}\u{662f}\u{5426}\u{53ea}\u{5305}\u{542b}\u{6570}\u{5b57}\nFunc REGEX_IS_NUMERIC(TEXT) {\n    If (LEN(TEXT) == 0) {\n        Return False\n    }\n    \n    Set I 0\n    While (I < LEN(TEXT)) {\n        Set CHAR CHARAT(TEXT, I)\n        If (Not REGEX_IS_DIGIT(CHAR)) {\n            Return False\n        }\n        Set I (I + 1)\n    }\n    \n    Return True\n}\n\n// \u{9a8c}\u{8bc1}\u{662f}\u{5426}\u{53ea}\u{5305}\u{542b}\u{5b57}\u{6bcd}\nFunc REGEX_IS_ALPHA(TEXT) {\n    If (LEN(TEXT) == 0) {\n        Return False\n    }\n    \n    Set I 0\n    While (I < LEN(TEXT)) {\n        Set CHAR CHARAT(TEXT, I)\n        If (Not REGEX_IS_ALPHA_CHAR(CHAR)) {\n            Return False\n        }\n        Set I (I + 1)\n    }\n    \n    Return True\n}\n\n// \u{9a8c}\u{8bc1}\u{662f}\u{5426}\u{53ea}\u{5305}\u{542b}\u{5b57}\u{6bcd}\u{548c}\u{6570}\u{5b57}\nFunc REGEX_IS_ALPHANUMERIC(TEXT) {\n    If (LEN(TEXT) == 0) {\n        Return False\n    }\n    \n    Set I 0\n    While (I < LEN(TEXT)) {\n        Set CHAR CHARAT(TEXT, I)\n        If ((Not REGEX_IS_ALPHA_CHAR(CHAR)) And (Not REGEX_IS_DIGIT(CHAR))) {\n            Return False\n        }\n        Set I (I + 1)\n    }\n    \n    Return True\n}\n\n// \u{9a8c}\u{8bc1}\u{7535}\u{8bdd}\u{53f7}\u{7801}\u{ff08}\u{7b80}\u{5316}\u{ff0c}\u{652f}\u{6301}\u{4e2d}\u{56fd}\u{683c}\u{5f0f}\u{ff09}\nFunc REGEX_IS_PHONE(TEXT) {\n    // \u{53bb}\u{9664}\u{5e38}\u{89c1}\u{5206}\u{9694}\u{7b26}\n    Set CLEANED REPLACE(TEXT, \"-\", \"\")\n    Set CLEANED REPLACE(CLEANED, \" \", \"\")\n    Set CLEANED REPLACE(CLEANED, \"(\", \"\")\n    Set CLEANED REPLACE(CLEANED, \")\", \"\")\n    \n    // \u{68c0}\u{67e5}\u{662f}\u{5426}\u{5168}\u{662f}\u{6570}\u{5b57}\n    If (Not REGEX_IS_NUMERIC(CLEANED)) {\n        Return False\n    }\n    \n    // \u{957f}\u{5ea6}\u{68c0}\u{67e5}\u{ff08}\u{4e2d}\u{56fd}\u{624b}\u{673a}\u{53f7}11\u{4f4d}\u{ff0c}\u{5ea7}\u{673a}8-12\u{4f4d}\u{ff09}\n    Set LEN_VAL LEN(CLEANED)\n    Return ((LEN_VAL >= 8) And (LEN_VAL <= 12))\n}\n\n// \u{9a8c}\u{8bc1}\u{8eab}\u{4efd}\u{8bc1}\u{53f7}\u{ff08}\u{4e2d}\u{56fd}18\u{4f4d}\u{ff09}\nFunc REGEX_IS_ID_CARD(TEXT) {\n    If (LEN(TEXT) != 18) {\n        Return False\n    }\n    \n    // \u{524d}17\u{4f4d}\u{5fc5}\u{987b}\u{662f}\u{6570}\u{5b57}\n    Set I 0\n    While (I < 17) {\n        Set CHAR CHARAT(TEXT, I)\n        If (Not REGEX_IS_DIGIT(CHAR)) {\n            Return False\n        }\n        Set I (I + 1)\n    }\n    \n    // \u{6700}\u{540e}\u{4e00}\u{4f4d}\u{53ef}\u{4ee5}\u{662f}\u{6570}\u{5b57}\u{6216} X\n    Set LAST UPPER(CHARAT(TEXT, 17))\n    Return (REGEX_IS_DIGIT(LAST) Or (LAST == \"X\"))\n}\n\n// ============================================\n// \u{5b57}\u{7b26}\u{7c7b}\u{578b}\u{68c0}\u{67e5}\u{ff08}\u{8f85}\u{52a9}\u{51fd}\u{6570}\u{ff09}\n// ============================================\n\nFunc REGEX_IS_DIGIT(CHAR) {\n    Return ((CHAR >= \"0\") And (CHAR <= \"9\"))\n}\n\nFunc REGEX_IS_ALPHA_CHAR(CHAR) {\n    Set UPPER_CHAR UPPER(CHAR)\n    Return ((UPPER_CHAR >= \"A\") And (UPPER_CHAR <= \"Z\"))\n}\n\nFunc REGEX_IS_WHITESPACE(CHAR) {\n    Return ((CHAR == \" \") Or (CHAR == \"\\t\") Or (CHAR == \"\\n\") Or (CHAR == \"\\r\"))\n}\n\n// ============================================\n// \u{6587}\u{672c}\u{6e05}\u{7406}\n// ============================================\n\n// \u{5220}\u{9664}\u{6240}\u{6709}\u{7a7a}\u{767d}\u{5b57}\u{7b26}\nFunc REGEX_REMOVE_WHITESPACE(TEXT) {\n    Set RESULT \"\"\n    Set I 0\n    While (I < LEN(TEXT)) {\n        Set CHAR CHARAT(TEXT, I)\n        If (Not REGEX_IS_WHITESPACE(CHAR)) {\n            Set RESULT (RESULT + CHAR)\n        }\n        Set I (I + 1)\n    }\n    Return RESULT\n}\n\n// \u{5220}\u{9664}\u{6240}\u{6709}\u{975e}\u{5b57}\u{6bcd}\u{6570}\u{5b57}\u{5b57}\u{7b26}\nFunc REGEX_REMOVE_NON_ALPHANUMERIC(TEXT) {\n    Set RESULT \"\"\n    Set I 0\n    While (I < LEN(TEXT)) {\n        Set CHAR CHARAT(TEXT, I)\n        If (REGEX_IS_ALPHA_CHAR(CHAR) Or REGEX_IS_DIGIT(CHAR)) {\n            Set RESULT (RESULT + CHAR)\n        }\n        Set I (I + 1)\n    }\n    Return RESULT\n}\n\n// \u{5220}\u{9664}\u{6240}\u{6709}\u{6570}\u{5b57}\nFunc REGEX_REMOVE_DIGITS(TEXT) {\n    Set RESULT \"\"\n    Set I 0\n    While (I < LEN(TEXT)) {\n        Set CHAR CHARAT(TEXT, I)\n        If (Not REGEX_IS_DIGIT(CHAR)) {\n            Set RESULT (RESULT + CHAR)\n        }\n        Set I (I + 1)\n    }\n    Return RESULT\n}\n\n// \u{4fdd}\u{7559}\u{6307}\u{5b9a}\u{5b57}\u{7b26}\u{96c6}\nFunc REGEX_KEEP_CHARS(TEXT, ALLOWED) {\n    Set RESULT \"\"\n    Set I 0\n    While (I < LEN(TEXT)) {\n        Set CHAR CHARAT(TEXT, I)\n        If (CONTAINS(ALLOWED, CHAR)) {\n            Set RESULT (RESULT + CHAR)\n        }\n        Set I (I + 1)\n    }\n    Return RESULT\n}\n\n// ============================================\n// \u{5206}\u{8bcd}\u{548c}\u{5206}\u{6bb5}\n// ============================================\n\n// \u{6309}\u{53e5}\u{5b50}\u{5206}\u{5272}\u{ff08}\u{57fa}\u{4e8e}\u{6807}\u{70b9}\u{ff09}\nFunc REGEX_SPLIT_SENTENCES(TEXT) {\n    Set RESULT []\n    Set CURRENT \"\"\n    \n    Set I 0\n    While (I < LEN(TEXT)) {\n        Set CHAR CHARAT(TEXT, I)\n        Set CURRENT (CURRENT + CHAR)\n        \n        // \u{53e5}\u{5b50}\u{7ed3}\u{675f}\u{7b26}\n        If ((CHAR == \".\") Or (CHAR == \"!\") Or (CHAR == \"?\") Or (CHAR == \"\u{3002}\") Or (CHAR == \"\u{ff01}\") Or (CHAR == \"\u{ff1f}\")) {\n            Set TRIMMED TRIM(CURRENT)\n            If (LEN(TRIMMED) > 0) {\n                PUSH(RESULT, TRIMMED)\n            }\n            Set CURRENT \"\"\n        }\n        \n        Set I (I + 1)\n    }\n    \n    // \u{5904}\u{7406}\u{5269}\u{4f59}\u{6587}\u{672c}\n    Set TRIMMED TRIM(CURRENT)\n    If (LEN(TRIMMED) > 0) {\n        PUSH(RESULT, TRIMMED)\n    }\n    \n    Return RESULT\n}\n\n// \u{6309}\u{6bb5}\u{843d}\u{5206}\u{5272}\nFunc REGEX_SPLIT_PARAGRAPHS(TEXT) {\n    Set LINES SPLIT(TEXT, \"\\n\")\n    Set RESULT []\n    Set CURRENT \"\"\n    \n    Set I 0\n    While (I < LEN(LINES)) {\n        Set LINE TRIM(LINES[I])\n        \n        If (LEN(LINE) == 0) {\n            // \u{7a7a}\u{884c}\u{ff0c}\u{7ed3}\u{675f}\u{5f53}\u{524d}\u{6bb5}\u{843d}\n            If (LEN(CURRENT) > 0) {\n                PUSH(RESULT, TRIM(CURRENT))\n                Set CURRENT \"\"\n            }\n        } Else {\n            If (LEN(CURRENT) > 0) {\n                Set CURRENT (CURRENT + \" \")\n            }\n            Set CURRENT (CURRENT + LINE)\n        }\n        \n        Set I (I + 1)\n    }\n    \n    // \u{5904}\u{7406}\u{6700}\u{540e}\u{4e00}\u{4e2a}\u{6bb5}\u{843d}\n    If (LEN(CURRENT) > 0) {\n        PUSH(RESULT, TRIM(CURRENT))\n    }\n    \n    Return RESULT\n}\n\n// ============================================\n// \u{7edf}\u{8ba1}\u{51fd}\u{6570}\n// ============================================\n\n// \u{7edf}\u{8ba1}\u{5339}\u{914d}\u{6b21}\u{6570}\nFunc REGEX_COUNT_MATCHES(TEXT, PATTERN) {\n    Set COUNT 0\n    Set POS 0\n    \n    While (POS < LEN(TEXT)) {\n        Set FOUND INDEXOF(STRSLICE(TEXT, POS, LEN(TEXT)), PATTERN)\n        If (FOUND < 0) {\n            Break\n        }\n        Set COUNT (COUNT + 1)\n        Set POS (POS + FOUND + LEN(PATTERN))\n    }\n    \n    Return COUNT\n}\n\n// \u{7edf}\u{8ba1}\u{5b57}\u{7b26}\u{7c7b}\u{578b}\nFunc REGEX_COUNT_CHARS(TEXT) {\n    Set STATS {}\n    Set STATS[\"total\"] LEN(TEXT)\n    Set STATS[\"alpha\"] 0\n    Set STATS[\"digit\"] 0\n    Set STATS[\"space\"] 0\n    Set STATS[\"other\"] 0\n    \n    Set I 0\n    While (I < LEN(TEXT)) {\n        Set CHAR CHARAT(TEXT, I)\n        \n        If (REGEX_IS_ALPHA_CHAR(CHAR)) {\n            Set STATS[\"alpha\"] (STATS[\"alpha\"] + 1)\n        } Elif (REGEX_IS_DIGIT(CHAR)) {\n            Set STATS[\"digit\"] (STATS[\"digit\"] + 1)\n        } Elif (REGEX_IS_WHITESPACE(CHAR)) {\n            Set STATS[\"space\"] (STATS[\"space\"] + 1)\n        } Else {\n            Set STATS[\"other\"] (STATS[\"other\"] + 1)\n        }\n        \n        Set I (I + 1)\n    }\n    \n    Return STATS\n}\n\n";
Expand description

正则风格文本处理