diff --git a/.gitignore b/.gitignore index 3ae773a035237a7734ced3c53b1b206860f3f87b..b02f9e243aa3ff69d8edf4cfa65833bcfb3dce88 100644 --- a/.gitignore +++ b/.gitignore @@ -129,10 +129,6 @@ dmypy.json cython_debug/ # PyCharm -# JetBrains specific template is maintained in a separate JetBrains.gitignore that can -# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore -# and can be added to the global gitignore or merged into this file. For a more nuclear -# option (not recommended) you can uncomment the following to ignore the entire idea folder. -#.idea/ +.idea/ diff --git a/README.md b/README.md index e1c32bfa157e4159db1e8b492990744dcf075439..1860fec4a48f7a739f4661ccb9ed404f29d84383 100644 --- a/README.md +++ b/README.md @@ -8,12 +8,43 @@ A collection of simple python utilities each file can be used as it's own. Drop in replacement for a better Display class - - display(msg: str, log_only=None) +### `display(msg: str, log_only=None)` Print to stdout - - display(msg: str, wrap_text=None) +### `display(msg: str, wrap_text=None)` Print to stderr +## parsing.py +Basic functions to help parse Text and booleans. +[TODO] need more documentation + +### `to_text(obj, encoding='utf-8', nonstring: str = 'simplerepr')` + +### `to_bytes(obj, encoding='utf-8', nonstring: str = 'simplerepr')` + Make sure that a string is a byte string + :param `obj`: An object to make sure is a byte string. In most cases this + will be either a text string or a byte string. However, with + ``nonstring='simplerepr'``, this can be used as a traceback-free + version of ``str(obj)``. + :param `encoding`: The encoding to use to transform from a text string to + a byte string. Defaults to using 'utf-8'. + :param `nonstring`: The strategy to use if a nonstring is specified in + `obj`. Default is 'simplerepr'. Valid values are: + :simplerepr: The default. This takes the `str` of the object and + then returns the bytes version of that string. + :empty: Return an empty byte string + :passthru: Return the object passed in + :strict: Raise a :exc:`TypeError` + :return: Typically this returns a byte string. If a nonstring object is + passed in this may be a different type depending on the strategy + specified by nonstring. This will never return a text string. + .. note:: If passed a byte string, this function does not check that the + string is valid in the specified encoding. If it's important that the + byte string is in the specified encoding do:: + `encoded_string = to_bytes(to_text(input_string, 'latin-1'), 'utf-8')` + +### `boolean(value)` + ## License This collection is free software: you can redistribute it and/or modify it under the terms of the MIT License as published by the Free Software Foundation. diff --git a/parsing.py b/parsing.py new file mode 100644 index 0000000000000000000000000000000000000000..011b5f2bf2bd9fd0629c9c035da5a2e09317db95 --- /dev/null +++ b/parsing.py @@ -0,0 +1,198 @@ +#!/usr/bin/env python3 +# (c) 2023, Christof Schulze <christof.schulze@fau.de>, WW8 +# +# parsing.py is free software: you can redistribute it and/or modify it +# under the terms of the MIT License as published by the Free Software Foundation. +# +# parsing.py is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY OF ANY KIND; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# See the MIT License for more details. +# +# You should have received a copy of the MIT License along with parsing.py. +# If not, see <http://www.gnu.org/licenses/mit>. +# +# Current versions of parsing.py can be found on: +# https://simlab.ww.uni-erlangen.de/ww8it/python/simple-utils + +BOOLEANS_TRUE = frozenset(('y', 'yes', 'on', '1', 'true', 't', 1, 1.0, True)) +BOOLEANS_FALSE = frozenset(('n', 'no', 'off', '0', 'false', 'f', 0, 0.0, False)) +BOOLEANS = BOOLEANS_TRUE.union(BOOLEANS_FALSE) + +# The error handler to use if the byte string is not decodable using the specified encoding. +DEFAULT_DECODING_ERROR = 'surrogateescape' + + +def to_text(obj, encoding: str = 'utf-8', nonstring: str = 'simplerepr'): + """ + Make sure that a string is a text string + + :param obj: An object to make sure is a text string. In most cases this + will be either a text string or a byte string. However, with + ``nonstring='simplerepr'``, this can be used as a traceback-free + version of ``str(obj)``. + :param encoding: The encoding to use to transform from a byte string to + a text string. Defaults to using 'utf-8'. + + :param nonstring: The strategy to use if a nonstring is specified in + ``obj``. Default is 'simplerepr'. Valid values are: + + :simplerepr: The default. This takes the ``str`` of the object and + then returns the text version of that string. + :empty: Return an empty text string + :passthru: Return the object passed in + :strict: Raise a :exc:`TypeError` + + :return: Typically, this returns a text string. If a nonstring object is + passed in this may be a different type depending on the strategy + specified by nonstring. This will never return a byte string. + """ + + if isinstance(obj, str): + return obj + + if isinstance(obj, bytes): + # Note: We don't need special handling for surrogateescape because + # all bytes will either be made into surrogates or are valid to decode. + return obj.decode(encoding, DEFAULT_DECODING_ERROR) + + # Note: We do these last even though we have to call to_text again on the + # value because we're optimizing the common case + if nonstring == 'simplerepr': + try: + value = str(obj) + + except UnicodeError: + try: + value = repr(obj) + except UnicodeError: + # Giving up + return '' + elif nonstring == 'passthru': + return obj + elif nonstring == 'empty': + return '' + elif nonstring == 'strict': + raise TypeError('obj must be a string type') + else: + raise TypeError('Invalid value %s for to_text\'s nonstring parameter' % nonstring) + + return to_text(value, encoding, DEFAULT_DECODING_ERROR) + + +def boolean(value): + """ + Make sure that a boolean is a boolean + :param value: An object to make sure is a boolean. In most cases this + will be a boolean or either a text string or a byte string containing + boolean information. + """ + if isinstance(value, bool): + return value + + normalized_value = value + if isinstance(value, (str, bytes)): + normalized_value = to_text(value).lower().strip() + + if normalized_value in BOOLEANS_TRUE: + return True + if normalized_value in BOOLEANS_FALSE: + return False + + raise TypeError("The value '%s' is not a valid boolean. Valid booleans include: %s" % ( + to_text(value), ', '.join(repr(i) for i in BOOLEANS))) + + +def to_bytes(obj, encoding: str = 'utf-8', nonstring: str = 'simplerepr'): + """ + Make sure that a string is a byte string + :param obj: An object to make sure is a byte string. In most cases this + will be either a text string or a byte string. However, with + ``nonstring='simplerepr'``, this can be used as a traceback-free + version of ``str(obj)``. + :param encoding: The encoding to use to transform from a text string to + a byte string. Defaults to using 'utf-8'. + :param nonstring: The strategy to use if a nonstring is specified in + ``obj``. Default is 'simplerepr'. Valid values are: + :simplerepr: The default. This takes the ``str`` of the object and + then returns the bytes version of that string. + :empty: Return an empty byte string + :passthru: Return the object passed in + :strict: Raise a :exc:`TypeError` + :return: Typically this returns a byte string. If a nonstring object is + passed in this may be a different type depending on the strategy + specified by nonstring. This will never return a text string. + .. note:: If passed a byte string, this function does not check that the + string is valid in the specified encoding. If it's important that the + byte string is in the specified encoding do:: + encoded_string = to_bytes(to_text(input_string, 'latin-1'), 'utf-8') + """ + + if isinstance(obj, bytes): + return obj + + # We're given a text string + # If it has surrogates, we know because it will decode + if isinstance(obj, str): + try: + # Try this first as it's the fastest + return obj.encode(encoding) + except UnicodeEncodeError: + raise + + # Note: We do these last even though we have to call to_bytes again on the + # value because we're optimizing the common case + if nonstring == 'simplerepr': + try: + value = str(obj) + except UnicodeError: + try: + value = repr(obj) + except UnicodeError: + # Giving up + return to_bytes('') + elif nonstring == 'passthru': + return obj + elif nonstring == 'empty': + return to_bytes('') + elif nonstring == 'strict': + raise TypeError('obj must be a string type') + else: + raise TypeError(f"Invalid value {nonstring} for to_bytes\' nonstring parameter") + + return to_bytes(value, encoding) + + +def unquote(data): + """ + removes first and last quotes from a string, + if the string starts and ends with the same quotes + """ + if is_quoted(data): + return data[1:-1] + + return data + + +def is_quoted(data): + """ + check if the string starts and ends with the same quotes + """ + return len(data) > 1 and data[0] == data[-1] and data[0] in ('"', "'") and data[-2] != '\\' + + +def humanize_time(duration: int) -> str: + """ + Return a human-friendly description of elapsed time + :param duration: time period in seconds + :return: + """ + human = "" + if duration >= 3600: + human += "%dh" % (int(duration) // 3600) + duration %= 3600 + if duration >= 60: + human += "%dm" % (int(duration) // 60) + duration %= 60 + human += ("%.3fs" % duration) + return human diff --git a/simpledisplay.py b/simpledisplay.py index 6ef78445aa1e232acb34c16aec2bc3e81d16b4fd..25e5f5b3779431f87984ebc7c0af393b24f2ee2a 100644 --- a/simpledisplay.py +++ b/simpledisplay.py @@ -1,5 +1,5 @@ #!/usr/bin/env python3 -# (c) 2023, Christof Schulze <christof.schulze@fau.de> +# (c) 2023, Christof Schulze <christof.schulze@fau.de>, WW8 # # simpledisplay is free software: you can redistribute it and/or modify it # under the terms of the MIT License as published by the Free Software Foundation. @@ -18,7 +18,7 @@ import sys -class SimpleDisplay(object): +class SimpleDisplay: """ OUTPUT OF LAST RESORT @@ -28,7 +28,13 @@ class SimpleDisplay(object): functions, with colors and parallel/partial loging to files etc. """ def display(self, msg, log_only=None): + """ + [TODO] + """ print(msg, file=sys.stdout) def error(self, msg, wrap_text=None): + """ + [TODO] + """ print(msg, file=sys.stderr)