From 8e7c78b21ae5da1a2b4c99db6593d724491831dd Mon Sep 17 00:00:00 2001 From: Christof Schulze Date: Fri, 7 Feb 2020 16:17:47 +0100 Subject: [PATCH] fixed #1 DEFAULT_CONFIG and config parameter all transformed to String in `parsing.to_text`, removed workaround in `Display` --- display.py | 4 +- parsing/__init__.py | 180 ++++++++++++++++++++----------------------- requirements.txt | 2 +- requirements_opt.txt | 1 + 4 files changed, 87 insertions(+), 100 deletions(-) create mode 100644 requirements_opt.txt diff --git a/display.py b/display.py index c83bd56..7bc6802 100644 --- a/display.py +++ b/display.py @@ -201,9 +201,7 @@ class Display(with_metaclass(Singleton, object)): """ """ to_stderr = C.VERBOSE_TO_STDERR - # print(type(self.verbosity), "\n----\n", type(caplevel)) - # FIXME verbosity is somehow changing from int at __init__ to string - if int(self.verbosity) > caplevel: + if self.verbosity > caplevel: if host is None: self.display(msg, color=C.COLOR_VERBOSE, stderr=to_stderr) else: diff --git a/parsing/__init__.py b/parsing/__init__.py index 233b0e9..2a4007c 100644 --- a/parsing/__init__.py +++ b/parsing/__init__.py @@ -16,48 +16,69 @@ # You should have received a copy of the GNU Lesser General Public License # along with Ammsml. If not, see . -import codecs - BOOLEANS_TRUE = frozenset(('y', 'yes', 'on', '1', 'true', 't', 1, 1.0, True)) BOOLEANS_FALSE = frozenset(('n', 'no', 'off', '0', 'false', 'f', 0, 0.0, False)) BOOLEANS = BOOLEANS_TRUE.union(BOOLEANS_FALSE) -try: - codecs.lookup_error('surrogateescape') - HAS_SURROGATEESCAPE = True -except LookupError: - HAS_SURROGATEESCAPE = False +def to_text(obj, encoding='utf-8', nonstring: str='simplerepr'): + """ + Make sure that a string is a text string + + :param obj: An object to make sure is a text string. In most cases this + will be either a text string or a byte string. However, with + ``nonstring='simplerepr'``, this can be used as a traceback-free + version of ``str(obj)``. + :param encoding: The encoding to use to transform from a byte string to + a text string. Defaults to using 'utf-8'. -_COMPOSED_ERROR_HANDLERS = frozenset((None, 'surrogate_or_replace', - 'surrogate_or_strict', - 'surrogate_then_replace')) + :param nonstring: The strategy to use if a nonstring is specified in + ``obj``. Default is 'simplerepr'. Valid values are: + :simplerepr: The default. This takes the ``str`` of the object and + then returns the text version of that string. + :empty: Return an empty text string + :passthru: Return the object passed in + :strict: Raise a :exc:`TypeError` -def to_text(obj, encoding='utf-8'): - """ + :return: Typically this returns a text string. If a nonstring object is + passed in this may be a different type depending on the strategy + specified by nonstring. This will never return a byte string. """ if isinstance(obj, str): return obj - if isinstance(obj, bytes): - # Note: We don't need special handling for surrogate_then_replace - # because all bytes will either be made into surrogates or are valid - # to decode. - return obj.decode(encoding) + # The error handler to use if the byte string is not decodable using the specified encoding. + errors = 'surrogateescape' - try: - value = str(obj) + if isinstance(obj, bytes): + # Note: We don't need special handling for surrogateescape because + # all bytes will either be made into surrogates or are valid to decode. + return obj.decode(encoding, errors) - except UnicodeError: + # Note: We do these last even though we have to call to_text again on the + # value because we're optimizing the common case + if nonstring == 'simplerepr': try: - value = repr(obj) + value = str(obj) + except UnicodeError: - # Giving up - raise TypeError('Invalid value %s for to_text\'s nonstring parameter' % value) + try: + value = repr(obj) + except UnicodeError: + # Giving up + return '' + elif nonstring == 'passthru': + return obj + elif nonstring == 'empty': + return '' + elif nonstring == 'strict': + raise TypeError('obj must be a string type') + else: + raise TypeError('Invalid value %s for to_text\'s nonstring parameter' % nonstring) - return to_text(value) + return to_text(value, encoding, errors) def boolean(value): @@ -77,39 +98,7 @@ def boolean(value): to_text(value), ', '.join(repr(i) for i in BOOLEANS))) -def unquote(data): - """ - removes first and last quotes from a string, - if the string starts and ends with the same quotes - """ - if is_quoted(data): - return data[1:-1] - - return data - - -def is_quoted(data): - return len(data) > 1 and data[0] == data[-1] and data[0] in ('"', "'") and data[-2] != '\\' - - -def humanize_time(d) -> str: - """ - Return a human-friendly description of elapsed time - :param d: - :return: - """ - human = "" - if d >= 3600: - human += "%dh" % (int(d) // 3600) - d %= 3600 - if d >= 60: - human += "%dm" % (int(d) // 60) - d %= 60 - human += ("%.3fs" % d) - return human - - -def to_bytes(obj, encoding='utf-8', errors=None, nonstring='simplerepr'): +def to_bytes(obj, encoding='utf-8', nonstring='simplerepr'): """ Make sure that a string is a byte string :param obj: An object to make sure is a byte string. In most cases this @@ -118,33 +107,6 @@ def to_bytes(obj, encoding='utf-8', errors=None, nonstring='simplerepr'): version of ``str(obj)``. :param encoding: The encoding to use to transform from a text string to a byte string. Defaults to using 'utf-8'. - :param errors: The error handler to use if the text string is not - encodable using the specified encoding. Any valid `codecs error - handler `_ - may be specified. There are three additional error strategies - specifically aimed at helping people to port code. The first two are: - :surrogate_or_strict: Will use ``surrogateescape`` if it is a valid - handler, otherwise it will use ``strict`` - :surrogate_or_replace: Will use ``surrogateescape`` if it is a valid - handler, otherwise it will use ``replace``. - Because ``surrogateescape`` was added in Python3 this usually means that - Python3 will use ``surrogateescape`` - - FIXME and Python2 will use the fallback - error handler. Note that the code checks for ``surrogateescape`` when the - module is imported. If you have a backport of ``surrogateescape`` for - Python2, be sure to register the error handler prior to importing this - module. - The last error handler is: - :surrogate_then_replace: Will use ``surrogateescape`` if it is a valid - handler. If encoding with ``surrogateescape`` would traceback, - surrogates are first replaced with a replacement characters - and then the string is encoded using ``replace`` (which replaces - the rest of the nonencodable bytes). - FIXME (Python 2 stuff) If ``surrogateescape`` is not present it will simply use ``replace``. - This strategy is designed to never traceback when it attempts - to encode a string. - The default is ``surrogate_then_replace``. :param nonstring: The strategy to use if a nonstring is specified in ``obj``. Default is 'simplerepr'. Valid values are: :simplerepr: The default. This takes the ``str`` of the object and @@ -159,22 +121,17 @@ def to_bytes(obj, encoding='utf-8', errors=None, nonstring='simplerepr'): string is valid in the specified encoding. If it's important that the byte string is in the specified encoding do:: encoded_string = to_bytes(to_text(input_string, 'latin-1'), 'utf-8') - .. version_changed:: 2.3 - Added the ``surrogate_then_replace`` error handler and made it the default error handler. """ + if isinstance(obj, bytes): return obj + # The error handler to use if the byte string is not decodable using the specified encoding. + errors = 'surrogateescape' + # We're given a text string # If it has surrogates, we know because it will decode original_errors = errors - if errors in _COMPOSED_ERROR_HANDLERS: - if HAS_SURROGATEESCAPE: - errors = 'surrogateescape' - elif errors == 'surrogate_or_strict': - errors = 'strict' - else: - errors = 'replace' if isinstance(obj, str): try: @@ -187,8 +144,8 @@ def to_bytes(obj, encoding='utf-8', errors=None, nonstring='simplerepr'): # Slow but works return_string = obj.encode('utf-8', 'surrogateescape') - return_string = return_string.decode('utf-8', 'replace') - return return_string.encode(encoding, 'replace') + # return_string = return_string.decode('utf-8', 'replace') + # return return_string.encode(encoding, 'replace') raise # Note: We do these last even though we have to call to_bytes again on the @@ -205,7 +162,6 @@ def to_bytes(obj, encoding='utf-8', errors=None, nonstring='simplerepr'): elif nonstring == 'passthru': return obj elif nonstring == 'empty': - # python2.4 doesn't have b'' return to_bytes('') elif nonstring == 'strict': raise TypeError('obj must be a string type') @@ -213,3 +169,35 @@ def to_bytes(obj, encoding='utf-8', errors=None, nonstring='simplerepr'): raise TypeError('Invalid value %s for to_bytes\' nonstring parameter' % nonstring) return to_bytes(value, encoding, errors) + + +def unquote(data): + """ + removes first and last quotes from a string, + if the string starts and ends with the same quotes + """ + if is_quoted(data): + return data[1:-1] + + return data + + +def is_quoted(data): + return len(data) > 1 and data[0] == data[-1] and data[0] in ('"', "'") and data[-2] != '\\' + + +def humanize_time(d) -> str: + """ + Return a human-friendly description of elapsed time + :param d: + :return: + """ + human = "" + if d >= 3600: + human += "%dh" % (int(d) // 3600) + d %= 3600 + if d >= 60: + human += "%dm" % (int(d) // 60) + d %= 60 + human += ("%.3fs" % d) + return human diff --git a/requirements.txt b/requirements.txt index a1b1204..496efad 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,3 @@ Jinja2==2.10.3 MarkupSafe==1.1.1 -PyYAML==5.1.2 +PyYAML>=5.1.2 diff --git a/requirements_opt.txt b/requirements_opt.txt new file mode 100644 index 0000000..3b6f072 --- /dev/null +++ b/requirements_opt.txt @@ -0,0 +1 @@ +argcomplete==1.11.1 -- GitLab