grex 1.4.6

grex generates regular expressions from user-provided test cases.
Documentation
#
# Copyright © 2019-today Peter M. Stahl pemistahl@gmail.com
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either expressed or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import inspect
import pytest
import re

from grex import RegExpBuilder


@pytest.mark.parametrize(
    "test_cases,expected_pattern",
    [
        pytest.param(["abc", "abd", "abe"], "^ab[c-e]$"),
    ]
)
def test_default_settings(test_cases, expected_pattern):
    pattern = RegExpBuilder.from_test_cases(test_cases).build()
    assert pattern == expected_pattern
    for test_case in test_cases:
        assert re.match(pattern, test_case)


@pytest.mark.parametrize(
    "test_cases,expected_pattern",
    [
        pytest.param(["My ♥ and 💩 is yours."], "^My \\u2665 and \\U0001f4a9 is yours\\.$"),
    ]
)
def test_escaping(test_cases, expected_pattern):
    pattern = (RegExpBuilder.from_test_cases(test_cases)
               .with_escaping_of_non_ascii_chars(use_surrogate_pairs=False)
               .build())
    assert pattern == expected_pattern
    for test_case in test_cases:
        assert re.match(pattern, test_case)


@pytest.mark.parametrize(
    "test_cases,expected_pattern",
    [
        pytest.param(["My ♥ and 💩 is yours."], "^My \\u2665 and \\ud83d\\udca9 is yours\\.$"),
    ]
)
def test_escaping_with_surrogate_pairs(test_cases, expected_pattern):
    pattern = (RegExpBuilder.from_test_cases(test_cases)
               .with_escaping_of_non_ascii_chars(use_surrogate_pairs=True)
               .build())
    assert pattern == expected_pattern
    # module re does not support matching surrogate pairs


@pytest.mark.parametrize(
    "test_cases,expected_pattern",
    [
        pytest.param(["efgh", "abcxy", "abcw"], "^(abc(xy|w)|efgh)$"),
    ]
)
def test_capturing_groups(test_cases, expected_pattern):
    pattern = (RegExpBuilder.from_test_cases(test_cases)
               .with_capturing_groups()
               .build())
    assert pattern == expected_pattern
    for test_case in test_cases:
        assert re.match(pattern, test_case)


@pytest.mark.parametrize(
    "test_cases,expected_pattern",
    [
        pytest.param(["efgh", "abcxy", "abcw"], "(?:abc(?:xy|w)|efgh)"),
    ]
)
def test_without_anchors(test_cases, expected_pattern):
    pattern = (RegExpBuilder.from_test_cases(test_cases)
               .without_anchors()
               .build())
    assert pattern == expected_pattern
    for test_case in test_cases:
        assert re.match(pattern, test_case)


@pytest.mark.parametrize(
    "test_cases,expected_pattern",
    [
        pytest.param(["ABC", "zBC", "abc", "AbC", "aBc"], "(?i)^[az]bc$"),
    ]
)
def test_case_insensitive_matching(test_cases, expected_pattern):
    pattern = (RegExpBuilder.from_test_cases(test_cases)
               .with_case_insensitive_matching()
               .build())
    assert pattern == expected_pattern
    for test_case in test_cases:
        assert re.match(pattern, test_case)


@pytest.mark.parametrize(
    "test_cases,expected_pattern",
    [
        pytest.param(
            ["[a-z]", "(d,e,f)"],
            inspect.cleandoc("""
                (?x)
                ^
                  (?:
                    \\(d,e,f\\)
                    |
                    \\[a\\-z\\]
                  )
                $
                """)
        ),
    ]
)
def test_verbose_mode(test_cases, expected_pattern):
    pattern = (RegExpBuilder.from_test_cases(test_cases)
               .with_verbose_mode()
               .build())
    assert pattern == expected_pattern
    for test_case in test_cases:
        assert re.match(pattern, test_case)


@pytest.mark.parametrize(
    "test_cases,expected_pattern",
    [
        pytest.param(
            ["Ä@Ö€Ü", "ä@ö€ü", "Ä@ö€Ü", "ä@Ö€ü"],
            inspect.cleandoc("""
                (?ix)
                ^
                  ä@ö€ü
                $
                """)
        )
    ]
)
def test_case_insensitive_matching_and_verbose_mode(test_cases, expected_pattern):
    pattern = (RegExpBuilder.from_test_cases(test_cases)
               .with_case_insensitive_matching()
               .with_verbose_mode()
               .build())
    assert pattern == expected_pattern
    for test_case in test_cases:
        assert re.match(pattern, test_case)


@pytest.mark.parametrize(
    "test_cases,expected_pattern",
    [
        pytest.param(["a", "b\nx\nx", "c"], "^(?:b(?:\\nx){2}|[ac])$"),
    ]
)
def test_conversion_of_repetitions(test_cases, expected_pattern):
    pattern = (RegExpBuilder.from_test_cases(test_cases)
               .with_conversion_of_repetitions()
               .build())
    assert pattern == expected_pattern
    for test_case in test_cases:
        assert re.match(pattern, test_case)


@pytest.mark.parametrize(
    "test_cases,expected_pattern",
    [
        pytest.param(["My ♥♥♥ and 💩💩 is yours."], "^My \\u2665{3} and \\U0001f4a9{2} is yours\\.$"),
    ]
)
def test_escaping_and_conversion_of_repetitions(test_cases, expected_pattern):
    pattern = (RegExpBuilder.from_test_cases(test_cases)
               .with_escaping_of_non_ascii_chars(use_surrogate_pairs=False)
               .with_conversion_of_repetitions()
               .build())
    assert pattern == expected_pattern
    for test_case in test_cases:
        assert re.match(pattern, test_case)


@pytest.mark.parametrize(
    "test_cases,expected_pattern",
    [
        pytest.param(["a1b2c3"], "^a\\db\\dc\\d$"),
    ]
)
def test_conversion_of_digits(test_cases, expected_pattern):
    pattern = (RegExpBuilder.from_test_cases(test_cases)
               .with_conversion_of_digits()
               .build())
    assert pattern == expected_pattern
    for test_case in test_cases:
        assert re.match(pattern, test_case)


@pytest.mark.parametrize(
    "test_cases,expected_pattern",
    [
        pytest.param(["a1b2c3"], "^\\D1\\D2\\D3$"),
    ]
)
def test_conversion_of_non_digits(test_cases, expected_pattern):
    pattern = (RegExpBuilder.from_test_cases(test_cases)
               .with_conversion_of_non_digits()
               .build())
    assert pattern == expected_pattern
    for test_case in test_cases:
        assert re.match(pattern, test_case)


@pytest.mark.parametrize(
    "test_cases,expected_pattern",
    [
        pytest.param(["\n\t", "\r"], "^\\s(?:\\s)?$"),
    ]
)
def test_conversion_of_whitespace(test_cases, expected_pattern):
    pattern = (RegExpBuilder.from_test_cases(test_cases)
               .with_conversion_of_whitespace()
               .build())
    assert pattern == expected_pattern
    for test_case in test_cases:
        assert re.match(pattern, test_case)


@pytest.mark.parametrize(
    "test_cases,expected_pattern",
    [
        pytest.param(["a1 b2 c3"], "^\\S\\S \\S\\S \\S\\S$"),
    ]
)
def test_conversion_of_non_whitespace(test_cases, expected_pattern):
    pattern = (RegExpBuilder.from_test_cases(test_cases)
               .with_conversion_of_non_whitespace()
               .build())
    assert pattern == expected_pattern
    for test_case in test_cases:
        assert re.match(pattern, test_case)


@pytest.mark.parametrize(
    "test_cases,expected_pattern",
    [
        pytest.param(["abc", "1234"], "^\\w\\w\\w(?:\\w)?$"),
    ]
)
def test_conversion_of_words(test_cases, expected_pattern):
    pattern = (RegExpBuilder.from_test_cases(test_cases)
               .with_conversion_of_words()
               .build())
    assert pattern == expected_pattern
    for test_case in test_cases:
        assert re.match(pattern, test_case)


@pytest.mark.parametrize(
    "test_cases,expected_pattern",
    [
        pytest.param(["abc 1234"], "^abc\\W1234$"),
    ]
)
def test_conversion_of_non_words(test_cases, expected_pattern):
    pattern = (RegExpBuilder.from_test_cases(test_cases)
               .with_conversion_of_non_words()
               .build())
    assert pattern == expected_pattern
    for test_case in test_cases:
        assert re.match(pattern, test_case)


@pytest.mark.parametrize(
    "test_cases,expected_pattern",
    [
        pytest.param(["aababab"], "^aababab$"),
        pytest.param(["aabababab"], "^a(?:ab){4}$")
    ]
)
def test_minimum_repetitions(test_cases, expected_pattern):
    pattern = (RegExpBuilder.from_test_cases(test_cases)
               .with_conversion_of_repetitions()
               .with_minimum_repetitions(3)
               .build())
    assert pattern == expected_pattern
    for test_case in test_cases:
        assert re.match(pattern, test_case)


@pytest.mark.parametrize(
    "test_cases,expected_pattern",
    [
        pytest.param(["ababab"], "^ababab$"),
        pytest.param(["abcabcabc"], "^(?:abc){3}$")
    ]
)
def test_minimum_substring_length(test_cases, expected_pattern):
    pattern = (RegExpBuilder.from_test_cases(test_cases)
               .with_conversion_of_repetitions()
               .with_minimum_substring_length(3)
               .build())
    assert pattern == expected_pattern
    for test_case in test_cases:
        assert re.match(pattern, test_case)


def test_error_for_empty_test_cases():
    with pytest.raises(ValueError) as exception_info:
        RegExpBuilder.from_test_cases([])
    assert (
        exception_info.value.args[0] ==
        "No test cases have been provided for regular expression generation"
    )


def test_error_for_invalid_minimum_repetitions():
    with pytest.raises(ValueError) as exception_info:
        RegExpBuilder.from_test_cases(["abcd"]).with_minimum_repetitions(-4)
    assert (
        exception_info.value.args[0] ==
        "Quantity of minimum repetitions must be greater than zero"
    )


def test_error_for_invalid_minimum_substring_length():
    with pytest.raises(ValueError) as exception_info:
        RegExpBuilder.from_test_cases(["abcd"]).with_minimum_substring_length(-2)
    assert (
        exception_info.value.args[0] ==
        "Minimum substring length must be greater than zero"
    )