From 39e843ec2422c4d5c0deb7d0276bb758687205b8 Mon Sep 17 00:00:00 2001
From: Christof Schulze <christof.schulze@fau.de>
Date: Tue, 4 Feb 2020 22:56:29 +0100
Subject: [PATCH] added `to_bytes` for piping config dump and list to `less`
 workaround for issue #1

---
 display.py          |   7 ++-
 parsing/__init__.py | 117 ++++++++++++++++++++++++++++++++++++++++++++
 singleton.py        |   3 +-
 3 files changed, 124 insertions(+), 3 deletions(-)

diff --git a/display.py b/display.py
index b89f4a1..c83bd56 100644
--- a/display.py
+++ b/display.py
@@ -197,10 +197,13 @@ class Display(with_metaclass(Singleton, object)):
             else:
                 self.display("%6d %0.5f [%s]: %s" % (os.getpid(), time.time(), host, msg), color=C.COLOR_DEBUG)
 
-    def verbose(self, msg, host=None, caplevel=2):
+    def verbose(self, msg, host=None, caplevel: int = 2):
         """ """
         to_stderr = C.VERBOSE_TO_STDERR
-        if self.verbosity > caplevel:
+
+        # print(type(self.verbosity), "\n----\n", type(caplevel))
+        # FIXME verbosity is somehow changing from int at __init__ to string
+        if int(self.verbosity) > caplevel:
             if host is None:
                 self.display(msg, color=C.COLOR_VERBOSE, stderr=to_stderr)
             else:
diff --git a/parsing/__init__.py b/parsing/__init__.py
index 1a1e5df..233b0e9 100644
--- a/parsing/__init__.py
+++ b/parsing/__init__.py
@@ -16,11 +16,24 @@
 # You should have received a copy of the GNU Lesser General Public License
 # along with Ammsml.  If not, see <http://www.gnu.org/licenses/>.
 
+import codecs
+
 BOOLEANS_TRUE = frozenset(('y', 'yes', 'on', '1', 'true', 't', 1, 1.0, True))
 BOOLEANS_FALSE = frozenset(('n', 'no', 'off', '0', 'false', 'f', 0, 0.0, False))
 BOOLEANS = BOOLEANS_TRUE.union(BOOLEANS_FALSE)
 
 
+try:
+    codecs.lookup_error('surrogateescape')
+    HAS_SURROGATEESCAPE = True
+except LookupError:
+    HAS_SURROGATEESCAPE = False
+
+_COMPOSED_ERROR_HANDLERS = frozenset((None, 'surrogate_or_replace',
+                                      'surrogate_or_strict',
+                                      'surrogate_then_replace'))
+
+
 def to_text(obj, encoding='utf-8'):
     """
     """
@@ -96,3 +109,107 @@ def humanize_time(d) -> str:
     return human
 
 
+def to_bytes(obj, encoding='utf-8', errors=None, nonstring='simplerepr'):
+    """
+    Make sure that a string is a byte string
+    :param obj: An object to make sure is a byte string.  In most cases this
+        will be either a text string or a byte string.  However, with
+        ``nonstring='simplerepr'``, this can be used as a traceback-free
+        version of ``str(obj)``.
+    :param encoding: The encoding to use to transform from a text string to
+        a byte string.  Defaults to using 'utf-8'.
+    :param errors: The error handler to use if the text string is not
+        encodable using the specified encoding.  Any valid `codecs error
+        handler <https://docs.python.org/2/library/codecs.html#codec-base-classes>`_
+        may be specified. There are three additional error strategies
+        specifically aimed at helping people to port code.  The first two are:
+            :surrogate_or_strict: Will use ``surrogateescape`` if it is a valid
+                handler, otherwise it will use ``strict``
+            :surrogate_or_replace: Will use ``surrogateescape`` if it is a valid
+                handler, otherwise it will use ``replace``.
+        Because ``surrogateescape`` was added in Python3 this usually means that
+        Python3 will use ``surrogateescape``
+
+        FIXME and Python2 will use the fallback
+        error handler. Note that the code checks for ``surrogateescape`` when the
+        module is imported.  If you have a backport of ``surrogateescape`` for
+        Python2, be sure to register the error handler prior to importing this
+        module.
+        The last error handler is:
+            :surrogate_then_replace: Will use ``surrogateescape`` if it is a valid
+                handler.  If encoding with ``surrogateescape`` would traceback,
+                surrogates are first replaced with a replacement characters
+                and then the string is encoded using ``replace`` (which replaces
+                the rest of the nonencodable bytes).
+                FIXME (Python 2 stuff) If ``surrogateescape`` is not present it will simply use ``replace``.
+                This strategy is designed to never traceback when it attempts
+                to encode a string.
+        The default is ``surrogate_then_replace``.
+    :param nonstring: The strategy to use if a nonstring is specified in
+        ``obj``.  Default is 'simplerepr'.  Valid values are:
+        :simplerepr: The default.  This takes the ``str`` of the object and
+            then returns the bytes version of that string.
+        :empty: Return an empty byte string
+        :passthru: Return the object passed in
+        :strict: Raise a :exc:`TypeError`
+    :return: Typically this returns a byte string.  If a nonstring object is
+        passed in this may be a different type depending on the strategy
+        specified by nonstring.  This will never return a text string.
+    .. note:: If passed a byte string, this function does not check that the
+        string is valid in the specified encoding.  If it's important that the
+        byte string is in the specified encoding do::
+            encoded_string = to_bytes(to_text(input_string, 'latin-1'), 'utf-8')
+    .. version_changed:: 2.3
+        Added the ``surrogate_then_replace`` error handler and made it the default error handler.
+    """
+    if isinstance(obj, bytes):
+        return obj
+
+    # We're given a text string
+    # If it has surrogates, we know because it will decode
+    original_errors = errors
+    if errors in _COMPOSED_ERROR_HANDLERS:
+        if HAS_SURROGATEESCAPE:
+            errors = 'surrogateescape'
+        elif errors == 'surrogate_or_strict':
+            errors = 'strict'
+        else:
+            errors = 'replace'
+
+    if isinstance(obj, str):
+        try:
+            # Try this first as it's the fastest
+            return obj.encode(encoding, errors)
+        except UnicodeEncodeError:
+            if original_errors in (None, 'surrogate_then_replace'):
+                # We should only reach this if encoding was non-utf8 original_errors was
+                # surrogate_then_escape and errors was surrogateescape
+
+                # Slow but works
+                return_string = obj.encode('utf-8', 'surrogateescape')
+                return_string = return_string.decode('utf-8', 'replace')
+                return return_string.encode(encoding, 'replace')
+            raise
+
+    # Note: We do these last even though we have to call to_bytes again on the
+    # value because we're optimizing the common case
+    if nonstring == 'simplerepr':
+        try:
+            value = str(obj)
+        except UnicodeError:
+            try:
+                value = repr(obj)
+            except UnicodeError:
+                # Giving up
+                return to_bytes('')
+    elif nonstring == 'passthru':
+        return obj
+    elif nonstring == 'empty':
+        # python2.4 doesn't have b''
+        return to_bytes('')
+    elif nonstring == 'strict':
+        raise TypeError('obj must be a string type')
+    else:
+        raise TypeError('Invalid value %s for to_bytes\' nonstring parameter' % nonstring)
+
+    return to_bytes(value, encoding, errors)
diff --git a/singleton.py b/singleton.py
index bb4b69e..79a745a 100644
--- a/singleton.py
+++ b/singleton.py
@@ -58,6 +58,7 @@ def with_metaclass(meta, *bases):
             return meta.__prepare__(name, bases)
     return type.__new__(metaclass, 'temporary_class', (), {})
 
+
 def add_metaclass(metaclass):
     """Class decorator for creating a class with a metaclass."""
     def wrapper(cls):
@@ -71,4 +72,4 @@ def add_metaclass(metaclass):
         orig_vars.pop('__dict__', None)
         orig_vars.pop('__weakref__', None)
         return metaclass(cls.__name__, cls.__bases__, orig_vars)
-    return wrapper
\ No newline at end of file
+    return wrapper
-- 
GitLab