From 39e843ec2422c4d5c0deb7d0276bb758687205b8 Mon Sep 17 00:00:00 2001 From: Christof Schulze Date: Tue, 4 Feb 2020 22:56:29 +0100 Subject: [PATCH] added `to_bytes` for piping config dump and list to `less` workaround for issue #1 --- display.py | 7 ++- parsing/__init__.py | 117 ++++++++++++++++++++++++++++++++++++++++++++ singleton.py | 3 +- 3 files changed, 124 insertions(+), 3 deletions(-) diff --git a/display.py b/display.py index b89f4a1..c83bd56 100644 --- a/display.py +++ b/display.py @@ -197,10 +197,13 @@ class Display(with_metaclass(Singleton, object)): else: self.display("%6d %0.5f [%s]: %s" % (os.getpid(), time.time(), host, msg), color=C.COLOR_DEBUG) - def verbose(self, msg, host=None, caplevel=2): + def verbose(self, msg, host=None, caplevel: int = 2): """ """ to_stderr = C.VERBOSE_TO_STDERR - if self.verbosity > caplevel: + + # print(type(self.verbosity), "\n----\n", type(caplevel)) + # FIXME verbosity is somehow changing from int at __init__ to string + if int(self.verbosity) > caplevel: if host is None: self.display(msg, color=C.COLOR_VERBOSE, stderr=to_stderr) else: diff --git a/parsing/__init__.py b/parsing/__init__.py index 1a1e5df..233b0e9 100644 --- a/parsing/__init__.py +++ b/parsing/__init__.py @@ -16,11 +16,24 @@ # You should have received a copy of the GNU Lesser General Public License # along with Ammsml. If not, see . +import codecs + BOOLEANS_TRUE = frozenset(('y', 'yes', 'on', '1', 'true', 't', 1, 1.0, True)) BOOLEANS_FALSE = frozenset(('n', 'no', 'off', '0', 'false', 'f', 0, 0.0, False)) BOOLEANS = BOOLEANS_TRUE.union(BOOLEANS_FALSE) +try: + codecs.lookup_error('surrogateescape') + HAS_SURROGATEESCAPE = True +except LookupError: + HAS_SURROGATEESCAPE = False + +_COMPOSED_ERROR_HANDLERS = frozenset((None, 'surrogate_or_replace', + 'surrogate_or_strict', + 'surrogate_then_replace')) + + def to_text(obj, encoding='utf-8'): """ """ @@ -96,3 +109,107 @@ def humanize_time(d) -> str: return human +def to_bytes(obj, encoding='utf-8', errors=None, nonstring='simplerepr'): + """ + Make sure that a string is a byte string + :param obj: An object to make sure is a byte string. In most cases this + will be either a text string or a byte string. However, with + ``nonstring='simplerepr'``, this can be used as a traceback-free + version of ``str(obj)``. + :param encoding: The encoding to use to transform from a text string to + a byte string. Defaults to using 'utf-8'. + :param errors: The error handler to use if the text string is not + encodable using the specified encoding. Any valid `codecs error + handler `_ + may be specified. There are three additional error strategies + specifically aimed at helping people to port code. The first two are: + :surrogate_or_strict: Will use ``surrogateescape`` if it is a valid + handler, otherwise it will use ``strict`` + :surrogate_or_replace: Will use ``surrogateescape`` if it is a valid + handler, otherwise it will use ``replace``. + Because ``surrogateescape`` was added in Python3 this usually means that + Python3 will use ``surrogateescape`` + + FIXME and Python2 will use the fallback + error handler. Note that the code checks for ``surrogateescape`` when the + module is imported. If you have a backport of ``surrogateescape`` for + Python2, be sure to register the error handler prior to importing this + module. + The last error handler is: + :surrogate_then_replace: Will use ``surrogateescape`` if it is a valid + handler. If encoding with ``surrogateescape`` would traceback, + surrogates are first replaced with a replacement characters + and then the string is encoded using ``replace`` (which replaces + the rest of the nonencodable bytes). + FIXME (Python 2 stuff) If ``surrogateescape`` is not present it will simply use ``replace``. + This strategy is designed to never traceback when it attempts + to encode a string. + The default is ``surrogate_then_replace``. + :param nonstring: The strategy to use if a nonstring is specified in + ``obj``. Default is 'simplerepr'. Valid values are: + :simplerepr: The default. This takes the ``str`` of the object and + then returns the bytes version of that string. + :empty: Return an empty byte string + :passthru: Return the object passed in + :strict: Raise a :exc:`TypeError` + :return: Typically this returns a byte string. If a nonstring object is + passed in this may be a different type depending on the strategy + specified by nonstring. This will never return a text string. + .. note:: If passed a byte string, this function does not check that the + string is valid in the specified encoding. If it's important that the + byte string is in the specified encoding do:: + encoded_string = to_bytes(to_text(input_string, 'latin-1'), 'utf-8') + .. version_changed:: 2.3 + Added the ``surrogate_then_replace`` error handler and made it the default error handler. + """ + if isinstance(obj, bytes): + return obj + + # We're given a text string + # If it has surrogates, we know because it will decode + original_errors = errors + if errors in _COMPOSED_ERROR_HANDLERS: + if HAS_SURROGATEESCAPE: + errors = 'surrogateescape' + elif errors == 'surrogate_or_strict': + errors = 'strict' + else: + errors = 'replace' + + if isinstance(obj, str): + try: + # Try this first as it's the fastest + return obj.encode(encoding, errors) + except UnicodeEncodeError: + if original_errors in (None, 'surrogate_then_replace'): + # We should only reach this if encoding was non-utf8 original_errors was + # surrogate_then_escape and errors was surrogateescape + + # Slow but works + return_string = obj.encode('utf-8', 'surrogateescape') + return_string = return_string.decode('utf-8', 'replace') + return return_string.encode(encoding, 'replace') + raise + + # Note: We do these last even though we have to call to_bytes again on the + # value because we're optimizing the common case + if nonstring == 'simplerepr': + try: + value = str(obj) + except UnicodeError: + try: + value = repr(obj) + except UnicodeError: + # Giving up + return to_bytes('') + elif nonstring == 'passthru': + return obj + elif nonstring == 'empty': + # python2.4 doesn't have b'' + return to_bytes('') + elif nonstring == 'strict': + raise TypeError('obj must be a string type') + else: + raise TypeError('Invalid value %s for to_bytes\' nonstring parameter' % nonstring) + + return to_bytes(value, encoding, errors) diff --git a/singleton.py b/singleton.py index bb4b69e..79a745a 100644 --- a/singleton.py +++ b/singleton.py @@ -58,6 +58,7 @@ def with_metaclass(meta, *bases): return meta.__prepare__(name, bases) return type.__new__(metaclass, 'temporary_class', (), {}) + def add_metaclass(metaclass): """Class decorator for creating a class with a metaclass.""" def wrapper(cls): @@ -71,4 +72,4 @@ def add_metaclass(metaclass): orig_vars.pop('__dict__', None) orig_vars.pop('__weakref__', None) return metaclass(cls.__name__, cls.__bases__, orig_vars) - return wrapper \ No newline at end of file + return wrapper -- GitLab