Loading display.py +5 −2 Original line number Diff line number Diff line Loading @@ -197,10 +197,13 @@ class Display(with_metaclass(Singleton, object)): else: self.display("%6d %0.5f [%s]: %s" % (os.getpid(), time.time(), host, msg), color=C.COLOR_DEBUG) def verbose(self, msg, host=None, caplevel=2): def verbose(self, msg, host=None, caplevel: int = 2): """ """ to_stderr = C.VERBOSE_TO_STDERR if self.verbosity > caplevel: # print(type(self.verbosity), "\n----\n", type(caplevel)) # FIXME verbosity is somehow changing from int at __init__ to string if int(self.verbosity) > caplevel: if host is None: self.display(msg, color=C.COLOR_VERBOSE, stderr=to_stderr) else: Loading parsing/__init__.py +117 −0 Original line number Diff line number Diff line Loading @@ -16,11 +16,24 @@ # You should have received a copy of the GNU Lesser General Public License # along with Ammsml. If not, see <http://www.gnu.org/licenses/>. import codecs BOOLEANS_TRUE = frozenset(('y', 'yes', 'on', '1', 'true', 't', 1, 1.0, True)) BOOLEANS_FALSE = frozenset(('n', 'no', 'off', '0', 'false', 'f', 0, 0.0, False)) BOOLEANS = BOOLEANS_TRUE.union(BOOLEANS_FALSE) try: codecs.lookup_error('surrogateescape') HAS_SURROGATEESCAPE = True except LookupError: HAS_SURROGATEESCAPE = False _COMPOSED_ERROR_HANDLERS = frozenset((None, 'surrogate_or_replace', 'surrogate_or_strict', 'surrogate_then_replace')) def to_text(obj, encoding='utf-8'): """ """ Loading Loading @@ -96,3 +109,107 @@ def humanize_time(d) -> str: return human def to_bytes(obj, encoding='utf-8', errors=None, nonstring='simplerepr'): """ Make sure that a string is a byte string :param obj: An object to make sure is a byte string. In most cases this will be either a text string or a byte string. However, with ``nonstring='simplerepr'``, this can be used as a traceback-free version of ``str(obj)``. :param encoding: The encoding to use to transform from a text string to a byte string. Defaults to using 'utf-8'. :param errors: The error handler to use if the text string is not encodable using the specified encoding. Any valid `codecs error handler <https://docs.python.org/2/library/codecs.html#codec-base-classes>`_ may be specified. There are three additional error strategies specifically aimed at helping people to port code. The first two are: :surrogate_or_strict: Will use ``surrogateescape`` if it is a valid handler, otherwise it will use ``strict`` :surrogate_or_replace: Will use ``surrogateescape`` if it is a valid handler, otherwise it will use ``replace``. Because ``surrogateescape`` was added in Python3 this usually means that Python3 will use ``surrogateescape`` FIXME and Python2 will use the fallback error handler. Note that the code checks for ``surrogateescape`` when the module is imported. If you have a backport of ``surrogateescape`` for Python2, be sure to register the error handler prior to importing this module. The last error handler is: :surrogate_then_replace: Will use ``surrogateescape`` if it is a valid handler. If encoding with ``surrogateescape`` would traceback, surrogates are first replaced with a replacement characters and then the string is encoded using ``replace`` (which replaces the rest of the nonencodable bytes). FIXME (Python 2 stuff) If ``surrogateescape`` is not present it will simply use ``replace``. This strategy is designed to never traceback when it attempts to encode a string. The default is ``surrogate_then_replace``. :param nonstring: The strategy to use if a nonstring is specified in ``obj``. Default is 'simplerepr'. Valid values are: :simplerepr: The default. This takes the ``str`` of the object and then returns the bytes version of that string. :empty: Return an empty byte string :passthru: Return the object passed in :strict: Raise a :exc:`TypeError` :return: Typically this returns a byte string. If a nonstring object is passed in this may be a different type depending on the strategy specified by nonstring. This will never return a text string. .. note:: If passed a byte string, this function does not check that the string is valid in the specified encoding. If it's important that the byte string is in the specified encoding do:: encoded_string = to_bytes(to_text(input_string, 'latin-1'), 'utf-8') .. version_changed:: 2.3 Added the ``surrogate_then_replace`` error handler and made it the default error handler. """ if isinstance(obj, bytes): return obj # We're given a text string # If it has surrogates, we know because it will decode original_errors = errors if errors in _COMPOSED_ERROR_HANDLERS: if HAS_SURROGATEESCAPE: errors = 'surrogateescape' elif errors == 'surrogate_or_strict': errors = 'strict' else: errors = 'replace' if isinstance(obj, str): try: # Try this first as it's the fastest return obj.encode(encoding, errors) except UnicodeEncodeError: if original_errors in (None, 'surrogate_then_replace'): # We should only reach this if encoding was non-utf8 original_errors was # surrogate_then_escape and errors was surrogateescape # Slow but works return_string = obj.encode('utf-8', 'surrogateescape') return_string = return_string.decode('utf-8', 'replace') return return_string.encode(encoding, 'replace') raise # Note: We do these last even though we have to call to_bytes again on the # value because we're optimizing the common case if nonstring == 'simplerepr': try: value = str(obj) except UnicodeError: try: value = repr(obj) except UnicodeError: # Giving up return to_bytes('') elif nonstring == 'passthru': return obj elif nonstring == 'empty': # python2.4 doesn't have b'' return to_bytes('') elif nonstring == 'strict': raise TypeError('obj must be a string type') else: raise TypeError('Invalid value %s for to_bytes\' nonstring parameter' % nonstring) return to_bytes(value, encoding, errors) singleton.py +2 −1 Original line number Diff line number Diff line Loading @@ -58,6 +58,7 @@ def with_metaclass(meta, *bases): return meta.__prepare__(name, bases) return type.__new__(metaclass, 'temporary_class', (), {}) def add_metaclass(metaclass): """Class decorator for creating a class with a metaclass.""" def wrapper(cls): Loading Loading
display.py +5 −2 Original line number Diff line number Diff line Loading @@ -197,10 +197,13 @@ class Display(with_metaclass(Singleton, object)): else: self.display("%6d %0.5f [%s]: %s" % (os.getpid(), time.time(), host, msg), color=C.COLOR_DEBUG) def verbose(self, msg, host=None, caplevel=2): def verbose(self, msg, host=None, caplevel: int = 2): """ """ to_stderr = C.VERBOSE_TO_STDERR if self.verbosity > caplevel: # print(type(self.verbosity), "\n----\n", type(caplevel)) # FIXME verbosity is somehow changing from int at __init__ to string if int(self.verbosity) > caplevel: if host is None: self.display(msg, color=C.COLOR_VERBOSE, stderr=to_stderr) else: Loading
parsing/__init__.py +117 −0 Original line number Diff line number Diff line Loading @@ -16,11 +16,24 @@ # You should have received a copy of the GNU Lesser General Public License # along with Ammsml. If not, see <http://www.gnu.org/licenses/>. import codecs BOOLEANS_TRUE = frozenset(('y', 'yes', 'on', '1', 'true', 't', 1, 1.0, True)) BOOLEANS_FALSE = frozenset(('n', 'no', 'off', '0', 'false', 'f', 0, 0.0, False)) BOOLEANS = BOOLEANS_TRUE.union(BOOLEANS_FALSE) try: codecs.lookup_error('surrogateescape') HAS_SURROGATEESCAPE = True except LookupError: HAS_SURROGATEESCAPE = False _COMPOSED_ERROR_HANDLERS = frozenset((None, 'surrogate_or_replace', 'surrogate_or_strict', 'surrogate_then_replace')) def to_text(obj, encoding='utf-8'): """ """ Loading Loading @@ -96,3 +109,107 @@ def humanize_time(d) -> str: return human def to_bytes(obj, encoding='utf-8', errors=None, nonstring='simplerepr'): """ Make sure that a string is a byte string :param obj: An object to make sure is a byte string. In most cases this will be either a text string or a byte string. However, with ``nonstring='simplerepr'``, this can be used as a traceback-free version of ``str(obj)``. :param encoding: The encoding to use to transform from a text string to a byte string. Defaults to using 'utf-8'. :param errors: The error handler to use if the text string is not encodable using the specified encoding. Any valid `codecs error handler <https://docs.python.org/2/library/codecs.html#codec-base-classes>`_ may be specified. There are three additional error strategies specifically aimed at helping people to port code. The first two are: :surrogate_or_strict: Will use ``surrogateescape`` if it is a valid handler, otherwise it will use ``strict`` :surrogate_or_replace: Will use ``surrogateescape`` if it is a valid handler, otherwise it will use ``replace``. Because ``surrogateescape`` was added in Python3 this usually means that Python3 will use ``surrogateescape`` FIXME and Python2 will use the fallback error handler. Note that the code checks for ``surrogateescape`` when the module is imported. If you have a backport of ``surrogateescape`` for Python2, be sure to register the error handler prior to importing this module. The last error handler is: :surrogate_then_replace: Will use ``surrogateescape`` if it is a valid handler. If encoding with ``surrogateescape`` would traceback, surrogates are first replaced with a replacement characters and then the string is encoded using ``replace`` (which replaces the rest of the nonencodable bytes). FIXME (Python 2 stuff) If ``surrogateescape`` is not present it will simply use ``replace``. This strategy is designed to never traceback when it attempts to encode a string. The default is ``surrogate_then_replace``. :param nonstring: The strategy to use if a nonstring is specified in ``obj``. Default is 'simplerepr'. Valid values are: :simplerepr: The default. This takes the ``str`` of the object and then returns the bytes version of that string. :empty: Return an empty byte string :passthru: Return the object passed in :strict: Raise a :exc:`TypeError` :return: Typically this returns a byte string. If a nonstring object is passed in this may be a different type depending on the strategy specified by nonstring. This will never return a text string. .. note:: If passed a byte string, this function does not check that the string is valid in the specified encoding. If it's important that the byte string is in the specified encoding do:: encoded_string = to_bytes(to_text(input_string, 'latin-1'), 'utf-8') .. version_changed:: 2.3 Added the ``surrogate_then_replace`` error handler and made it the default error handler. """ if isinstance(obj, bytes): return obj # We're given a text string # If it has surrogates, we know because it will decode original_errors = errors if errors in _COMPOSED_ERROR_HANDLERS: if HAS_SURROGATEESCAPE: errors = 'surrogateescape' elif errors == 'surrogate_or_strict': errors = 'strict' else: errors = 'replace' if isinstance(obj, str): try: # Try this first as it's the fastest return obj.encode(encoding, errors) except UnicodeEncodeError: if original_errors in (None, 'surrogate_then_replace'): # We should only reach this if encoding was non-utf8 original_errors was # surrogate_then_escape and errors was surrogateescape # Slow but works return_string = obj.encode('utf-8', 'surrogateescape') return_string = return_string.decode('utf-8', 'replace') return return_string.encode(encoding, 'replace') raise # Note: We do these last even though we have to call to_bytes again on the # value because we're optimizing the common case if nonstring == 'simplerepr': try: value = str(obj) except UnicodeError: try: value = repr(obj) except UnicodeError: # Giving up return to_bytes('') elif nonstring == 'passthru': return obj elif nonstring == 'empty': # python2.4 doesn't have b'' return to_bytes('') elif nonstring == 'strict': raise TypeError('obj must be a string type') else: raise TypeError('Invalid value %s for to_bytes\' nonstring parameter' % nonstring) return to_bytes(value, encoding, errors)
singleton.py +2 −1 Original line number Diff line number Diff line Loading @@ -58,6 +58,7 @@ def with_metaclass(meta, *bases): return meta.__prepare__(name, bases) return type.__new__(metaclass, 'temporary_class', (), {}) def add_metaclass(metaclass): """Class decorator for creating a class with a metaclass.""" def wrapper(cls): Loading