#3 Add tests for the OpamString type
Merged 4 years ago by jjames. Opened 4 years ago by defolos.
defolos/opam2rpm more_tests  into  master

file modified
+21 -13
@@ -42,10 +42,15 @@ 

  file.

  

  """

+ from __future__ import annotations

  

  from distutils.sysconfig import get_python_lib

  from enum import IntEnum

  from functools import partial

+ from typing import (

+     Any, AnyStr, Callable, Dict, Iterator, List, Match, NoReturn, Optional,

+     Pattern, Protocol, Set, Tuple, Union

+ )

  import os

  import platform

  import re
@@ -145,6 +150,7 @@ 

      return token_type == TokenType.EQ \

          or TokenType.PLUSEQ <= token_type <= TokenType.EQPLUSEQ

  

+ 

  class OpamString(pp.Token):

      """Token for matching strings that are delimited by quoting characters.

      The opam file format allows for a small set of escape sequences.
@@ -152,8 +158,9 @@ 

      This code was largely borrowed from pyparsing.QuotedString.

  

      """

+     strRepr: Optional[str]

  

-     def __init__(self, quote_char):

+     def __init__(self, quote_char: str) -> None:

          """

          Initialize an opam string.

  
@@ -169,9 +176,9 @@ 

                            SyntaxWarning, stacklevel=2)

              raise SyntaxError()

  

-         self.quote_char = quote_char

-         self.quote_char_len = len(quote_char)

-         self.first_quote_char = quote_char[0]

+         self.quote_char: str = quote_char

+         self.quote_char_len: int = len(quote_char)

+         self.first_quote_char: str = quote_char[0]

          self.flags = re.MULTILINE | re.DOTALL

          self.pattern = r'%s(?:[^%s]' \

              % (re.escape(self.quote_char),
@@ -188,19 +195,20 @@ 

          self.pattern += (r')*%s' % re.escape(self.quote_char))

  

          try:

-             self.re = re.compile(self.pattern, self.flags)

-             self.re_match = self.re.match

+             self.re: Pattern = re.compile(self.pattern, self.flags)

+             self.re_match: Callable[[AnyStr, int], Optional[Match]] \

+                 = self.re.match

          except re.error:

-             warnings.warn("invalid pattern (%s) passed to Regex" % self.pattern,

+             warnings.warn(f"invalid pattern ({self.pattern}) passed to Regex",

                            SyntaxWarning, stacklevel=2)

              raise

  

-         self.name = str(self)

-         self.errmsg = "Expected " + self.name

+         self.name: str = str(self)

+         self.errmsg: str = "Expected " + self.name

          self.mayIndexError = False

          self.mayReturnEmpty = True

  

-     def parseImpl(self, instring, loc, doActions=True):

+     def parseImpl(self, instring, loc, doActions=True) -> Tuple[int, str]:

          """Parse an opam string."""

          result = self.re_match(instring, loc) \

              if instring[loc] == self.first_quote_char else None
@@ -232,7 +240,7 @@ 

              idx = ret.find('\\x')

              while idx >= 0:

                  digits = ret[(idx + 2):(idx + 4)]

-                 ret = ret[:idx] + int(digits, 16) + ret[idx+4:]

+                 ret = ret[:idx] + chr(int(digits, 16)) + ret[idx+4:]

                  idx = ret.find('\\x', idx + 1)

  

              # replace decimal character literals
@@ -243,12 +251,12 @@ 

                      char = int(digits)

                      if char < 0 or char > 255:

                          raise ValueError('illegal escape sequence')

-                     ret = ret[:idx] + char + ret[idx+4:]

+                     ret = ret[:idx] + chr(char) + ret[idx+4:]

                  idx = ret.find('\\', idx + 1)

  

          return loc, ret

  

-     def __str__(self):

+     def __str__(self) -> str:

          try:

              return super(OpamString, self).__str__()

          except Exception:

@@ -0,0 +1,90 @@ 

+ from opam2rpm.opamparser import OpamString

+ 

+ from pyparsing import ParseException

+ import pytest

+ 

+ 

+ class TestOpamString:

+     def test_throws_on_empty_strings(self):

+         with pytest.raises(SyntaxError):

+             OpamString("")

+ 

+     def test_throws_on_whitespace(self):

+         with pytest.raises(SyntaxError):

+             OpamString("   ")

+ 

+ 

+ class TestParseImpl:

+     single_quote = OpamString('"')

+     pipe_quote = OpamString("|")

+     crazy_quote = OpamString("fedora!!")

+ 

+     def test_throws_on_invalid_input(self):

+         with pytest.raises(ParseException):

+             TestParseImpl.single_quote.parseImpl("foo", 0)

+         with pytest.raises(ParseException):

+             TestParseImpl.pipe_quote.parseImpl('"', 0)

+ 

+     def test_extracts_quoted_string(self):

+         assert TestParseImpl.single_quote.parseImpl('"foo"', 0) == (5, "foo")

+         assert TestParseImpl.single_quote.parseImpl('"foo"  ', 0) == (5, "foo")

+         assert TestParseImpl.single_quote.parseImpl('"fooo"asdf', 0) == (

+             6,

+             "fooo",

+         )

+ 

+     def test_start_parsing_later(self):

+         assert TestParseImpl.single_quote.parseImpl('as"foo"', 2) == (7, "foo")

+         assert TestParseImpl.single_quote.parseImpl('|||"foo"yada', 3) == (

+             8,

+             "foo",

+         )

+ 

+     def test_multiple_quotes(self):

+         assert TestParseImpl.pipe_quote.parseImpl("|abc|def|xyz|", 0) == (

+             5,

+             "abc",

+         )

+         assert TestParseImpl.pipe_quote.parseImpl("|abc|def|xyz|", 4) == (

+             9,

+             "def",

+         )

+         assert TestParseImpl.pipe_quote.parseImpl("|abc|def|xyz|", 8) == (

+             13,

+             "xyz",

+         )

+ 

+     def test_quoted_whitespace(self):

+         assert TestParseImpl.pipe_quote.parseImpl("|\t|", 0) == (3, "\t")

+         assert TestParseImpl.pipe_quote.parseImpl("|  |", 0) == (4, "  ")

+ 

+     def test_empty_string(self):

+         assert TestParseImpl.pipe_quote.parseImpl("||", 0) == (2, "")

+ 

+     def test_string_with_escapes(self):

+         assert TestParseImpl.pipe_quote.parseImpl("|\\t\\r\\n|", 0) == (

+             8,

+             "\t\r\n",

+         )

+ 

+     def test_string_with_hex_escapes(self):

+         assert TestParseImpl.pipe_quote.parseImpl("|abc\\x90efg|", 0) == (

+             12,

+             "abc\x90efg",

+         )

+         assert TestParseImpl.pipe_quote.parseImpl("|abc\\x5aefg|", 0) == (

+             12,

+             r"abcZefg",

+         )

+ 

+     def test_string_with_decimal_escapes(self):

+         assert TestParseImpl.single_quote.parseImpl(

+             '"\\101\\102\\103"', 0

+         ) == (14, "efg")

+ 

+     def test_string_with_too_large_decimal_escapes(self):

+         with pytest.raises(ValueError, match="illegal escape sequence"):

+             TestParseImpl.single_quote.parseImpl('"\\301"', 0)

+ 

+     def test_string_with_nondecimal_escapes(self):

+         assert TestParseImpl.single_quote.parseImpl('"\\as"', 0) == (5, "\\as")

I have added type hints and tests to the OpamString type and while doing that realized that decoding of escape strings does not work properly. I have therefore changed lines 80 & 89 in opamparser.py to properly handle escape sequences.

However, I am not entirely sure whether that is the correct result. Please take a look at the unit tests, whether the results are what you expect.

Yes, that looks correct. Thanks for the tests and bug fixes!

Pull-Request has been merged by jjames

4 years ago