From 8e7c78b21ae5da1a2b4c99db6593d724491831dd Mon Sep 17 00:00:00 2001
From: Christof Schulze <christof.schulze@fau.de>
Date: Fri, 7 Feb 2020 16:17:47 +0100
Subject: [PATCH] fixed #1 DEFAULT_CONFIG and config parameter all transformed
 to String in `parsing.to_text`, removed workaround in `Display`

---
 display.py           |   4 +-
 parsing/__init__.py  | 180 ++++++++++++++++++++-----------------------
 requirements.txt     |   2 +-
 requirements_opt.txt |   1 +
 4 files changed, 87 insertions(+), 100 deletions(-)
 create mode 100644 requirements_opt.txt

diff --git a/display.py b/display.py
index c83bd56..7bc6802 100644
--- a/display.py
+++ b/display.py
@@ -201,9 +201,7 @@ class Display(with_metaclass(Singleton, object)):
         """ """
         to_stderr = C.VERBOSE_TO_STDERR
 
-        # print(type(self.verbosity), "\n----\n", type(caplevel))
-        # FIXME verbosity is somehow changing from int at __init__ to string
-        if int(self.verbosity) > caplevel:
+        if self.verbosity > caplevel:
             if host is None:
                 self.display(msg, color=C.COLOR_VERBOSE, stderr=to_stderr)
             else:
diff --git a/parsing/__init__.py b/parsing/__init__.py
index 233b0e9..2a4007c 100644
--- a/parsing/__init__.py
+++ b/parsing/__init__.py
@@ -16,48 +16,69 @@
 # You should have received a copy of the GNU Lesser General Public License
 # along with Ammsml.  If not, see <http://www.gnu.org/licenses/>.
 
-import codecs
-
 BOOLEANS_TRUE = frozenset(('y', 'yes', 'on', '1', 'true', 't', 1, 1.0, True))
 BOOLEANS_FALSE = frozenset(('n', 'no', 'off', '0', 'false', 'f', 0, 0.0, False))
 BOOLEANS = BOOLEANS_TRUE.union(BOOLEANS_FALSE)
 
 
-try:
-    codecs.lookup_error('surrogateescape')
-    HAS_SURROGATEESCAPE = True
-except LookupError:
-    HAS_SURROGATEESCAPE = False
+def to_text(obj, encoding='utf-8', nonstring: str='simplerepr'):
+    """
+    Make sure that a string is a text string
+
+    :param obj:  An object to make sure is a text string.  In most cases this
+        will be either a text string or a byte string.  However, with
+        ``nonstring='simplerepr'``, this can be used as a traceback-free
+        version of ``str(obj)``.
+    :param encoding:  The encoding to use to transform from a byte string to
+        a text string.  Defaults to using 'utf-8'.
 
-_COMPOSED_ERROR_HANDLERS = frozenset((None, 'surrogate_or_replace',
-                                      'surrogate_or_strict',
-                                      'surrogate_then_replace'))
+    :param nonstring: The strategy to use if a nonstring is specified in
+        ``obj``.  Default is 'simplerepr'.  Valid values are:
 
+        :simplerepr: The default.  This takes the ``str`` of the object and
+            then returns the text version of that string.
+        :empty: Return an empty text string
+        :passthru: Return the object passed in
+        :strict: Raise a :exc:`TypeError`
 
-def to_text(obj, encoding='utf-8'):
-    """
+    :return: Typically this returns a text string.  If a nonstring object is
+        passed in this may be a different type depending on the strategy
+        specified by nonstring.  This will never return a byte string.
     """
 
     if isinstance(obj, str):
         return obj
 
-    if isinstance(obj, bytes):
-        # Note: We don't need special handling for surrogate_then_replace
-        # because all bytes will either be made into surrogates or are valid
-        # to decode.
-        return obj.decode(encoding)
+    # The error handler to use if the byte string is not decodable using the specified encoding.
+    errors = 'surrogateescape'
 
-    try:
-        value = str(obj)
+    if isinstance(obj, bytes):
+        # Note: We don't need special handling for surrogateescape because
+        # all bytes will either be made into surrogates or are valid to decode.
+        return obj.decode(encoding, errors)
 
-    except UnicodeError:
+    # Note: We do these last even though we have to call to_text again on the
+    # value because we're optimizing the common case
+    if nonstring == 'simplerepr':
         try:
-            value = repr(obj)
+            value = str(obj)
+
         except UnicodeError:
-            # Giving up
-            raise TypeError('Invalid value %s for to_text\'s nonstring parameter' % value)
+            try:
+                value = repr(obj)
+            except UnicodeError:
+                # Giving up
+                return ''
+    elif nonstring == 'passthru':
+        return obj
+    elif nonstring == 'empty':
+        return ''
+    elif nonstring == 'strict':
+        raise TypeError('obj must be a string type')
+    else:
+        raise TypeError('Invalid value %s for to_text\'s nonstring parameter' % nonstring)
 
-    return to_text(value)
+    return to_text(value, encoding, errors)
 
 
 def boolean(value):
@@ -77,39 +98,7 @@ def boolean(value):
                     to_text(value), ', '.join(repr(i) for i in BOOLEANS)))
 
 
-def unquote(data):
-    """
-    removes first and last quotes from a string,
-    if the string starts and ends with the same quotes
-    """
-    if is_quoted(data):
-        return data[1:-1]
-
-    return data
-
-
-def is_quoted(data):
-    return len(data) > 1 and data[0] == data[-1] and data[0] in ('"', "'") and data[-2] != '\\'
-
-
-def humanize_time(d) -> str:
-    """
-    Return a human-friendly description of elapsed time
-    :param d:
-    :return:
-    """
-    human = ""
-    if d >= 3600:
-        human += "%dh" % (int(d) // 3600)
-        d %= 3600
-    if d >= 60:
-        human += "%dm" % (int(d) // 60)
-        d %= 60
-    human += ("%.3fs" % d)
-    return human
-
-
-def to_bytes(obj, encoding='utf-8', errors=None, nonstring='simplerepr'):
+def to_bytes(obj, encoding='utf-8', nonstring='simplerepr'):
     """
     Make sure that a string is a byte string
     :param obj: An object to make sure is a byte string.  In most cases this
@@ -118,33 +107,6 @@ def to_bytes(obj, encoding='utf-8', errors=None, nonstring='simplerepr'):
         version of ``str(obj)``.
     :param encoding: The encoding to use to transform from a text string to
         a byte string.  Defaults to using 'utf-8'.
-    :param errors: The error handler to use if the text string is not
-        encodable using the specified encoding.  Any valid `codecs error
-        handler <https://docs.python.org/2/library/codecs.html#codec-base-classes>`_
-        may be specified. There are three additional error strategies
-        specifically aimed at helping people to port code.  The first two are:
-            :surrogate_or_strict: Will use ``surrogateescape`` if it is a valid
-                handler, otherwise it will use ``strict``
-            :surrogate_or_replace: Will use ``surrogateescape`` if it is a valid
-                handler, otherwise it will use ``replace``.
-        Because ``surrogateescape`` was added in Python3 this usually means that
-        Python3 will use ``surrogateescape``
-
-        FIXME and Python2 will use the fallback
-        error handler. Note that the code checks for ``surrogateescape`` when the
-        module is imported.  If you have a backport of ``surrogateescape`` for
-        Python2, be sure to register the error handler prior to importing this
-        module.
-        The last error handler is:
-            :surrogate_then_replace: Will use ``surrogateescape`` if it is a valid
-                handler.  If encoding with ``surrogateescape`` would traceback,
-                surrogates are first replaced with a replacement characters
-                and then the string is encoded using ``replace`` (which replaces
-                the rest of the nonencodable bytes).
-                FIXME (Python 2 stuff) If ``surrogateescape`` is not present it will simply use ``replace``.
-                This strategy is designed to never traceback when it attempts
-                to encode a string.
-        The default is ``surrogate_then_replace``.
     :param nonstring: The strategy to use if a nonstring is specified in
         ``obj``.  Default is 'simplerepr'.  Valid values are:
         :simplerepr: The default.  This takes the ``str`` of the object and
@@ -159,22 +121,17 @@ def to_bytes(obj, encoding='utf-8', errors=None, nonstring='simplerepr'):
         string is valid in the specified encoding.  If it's important that the
         byte string is in the specified encoding do::
             encoded_string = to_bytes(to_text(input_string, 'latin-1'), 'utf-8')
-    .. version_changed:: 2.3
-        Added the ``surrogate_then_replace`` error handler and made it the default error handler.
     """
+
     if isinstance(obj, bytes):
         return obj
 
+    # The error handler to use if the byte string is not decodable using the specified encoding.
+    errors = 'surrogateescape'
+
     # We're given a text string
     # If it has surrogates, we know because it will decode
     original_errors = errors
-    if errors in _COMPOSED_ERROR_HANDLERS:
-        if HAS_SURROGATEESCAPE:
-            errors = 'surrogateescape'
-        elif errors == 'surrogate_or_strict':
-            errors = 'strict'
-        else:
-            errors = 'replace'
 
     if isinstance(obj, str):
         try:
@@ -187,8 +144,8 @@ def to_bytes(obj, encoding='utf-8', errors=None, nonstring='simplerepr'):
 
                 # Slow but works
                 return_string = obj.encode('utf-8', 'surrogateescape')
-                return_string = return_string.decode('utf-8', 'replace')
-                return return_string.encode(encoding, 'replace')
+        #         return_string = return_string.decode('utf-8', 'replace')
+        #         return return_string.encode(encoding, 'replace')
             raise
 
     # Note: We do these last even though we have to call to_bytes again on the
@@ -205,7 +162,6 @@ def to_bytes(obj, encoding='utf-8', errors=None, nonstring='simplerepr'):
     elif nonstring == 'passthru':
         return obj
     elif nonstring == 'empty':
-        # python2.4 doesn't have b''
         return to_bytes('')
     elif nonstring == 'strict':
         raise TypeError('obj must be a string type')
@@ -213,3 +169,35 @@ def to_bytes(obj, encoding='utf-8', errors=None, nonstring='simplerepr'):
         raise TypeError('Invalid value %s for to_bytes\' nonstring parameter' % nonstring)
 
     return to_bytes(value, encoding, errors)
+
+
+def unquote(data):
+    """
+    removes first and last quotes from a string,
+    if the string starts and ends with the same quotes
+    """
+    if is_quoted(data):
+        return data[1:-1]
+
+    return data
+
+
+def is_quoted(data):
+    return len(data) > 1 and data[0] == data[-1] and data[0] in ('"', "'") and data[-2] != '\\'
+
+
+def humanize_time(d) -> str:
+    """
+    Return a human-friendly description of elapsed time
+    :param d:
+    :return:
+    """
+    human = ""
+    if d >= 3600:
+        human += "%dh" % (int(d) // 3600)
+        d %= 3600
+    if d >= 60:
+        human += "%dm" % (int(d) // 60)
+        d %= 60
+    human += ("%.3fs" % d)
+    return human
diff --git a/requirements.txt b/requirements.txt
index a1b1204..496efad 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,3 +1,3 @@
 Jinja2==2.10.3
 MarkupSafe==1.1.1
-PyYAML==5.1.2
+PyYAML>=5.1.2
diff --git a/requirements_opt.txt b/requirements_opt.txt
new file mode 100644
index 0000000..3b6f072
--- /dev/null
+++ b/requirements_opt.txt
@@ -0,0 +1 @@
+argcomplete==1.11.1
-- 
GitLab