Commit ca6e3ec8 authored by Christof Schulze's avatar Christof Schulze 😎
Browse files

added parsing.py

parent d9c8ec12
Loading
Loading
Loading
Loading
Loading
+1 −5
Original line number Diff line number Diff line
@@ -129,10 +129,6 @@ dmypy.json
cython_debug/

# PyCharm
#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
#  and can be added to the global gitignore or merged into this file.  For a more nuclear
#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/
.idea/

+33 −2
Original line number Diff line number Diff line
@@ -8,12 +8,43 @@ A collection of simple python utilities each file can be used as it's own.

  Drop in replacement for a better Display class
  
  - display(msg: str, log_only=None)
### `display(msg: str, log_only=None)`
    Print to stdout

  - display(msg: str, wrap_text=None)
### `display(msg: str, wrap_text=None)`
    Print to stderr

## parsing.py
Basic functions to help parse Text and booleans.
[TODO] need more documentation

###  `to_text(obj, encoding='utf-8', nonstring: str = 'simplerepr')`

### `to_bytes(obj, encoding='utf-8', nonstring: str = 'simplerepr')`
   Make sure that a string is a byte string  
    :param `obj`: An object to make sure is a byte string.  In most cases this  
        will be either a text string or a byte string.  However, with  
        ``nonstring='simplerepr'``, this can be used as a traceback-free  
        version of ``str(obj)``.  
    :param `encoding`: The encoding to use to transform from a text string to  
        a byte string.  Defaults to using 'utf-8'.  
    :param `nonstring`: The strategy to use if a nonstring is specified in  
        `obj`.  Default is 'simplerepr'.  Valid values are:  
        :simplerepr: The default.  This takes the `str` of the object and  
            then returns the bytes version of that string.  
        :empty: Return an empty byte string  
        :passthru: Return the object passed in  
        :strict: Raise a :exc:`TypeError`  
    :return: Typically this returns a byte string. If a nonstring object is  
        passed in this may be a different type depending on the strategy  
        specified by nonstring.  This will never return a text string.  
    .. note:: If passed a byte string, this function does not check that the  
        string is valid in the specified encoding. If it's important that the  
        byte string is in the specified encoding do::  
            `encoded_string = to_bytes(to_text(input_string, 'latin-1'), 'utf-8')`  

### `boolean(value)`

## License
This collection is free software: you can redistribute it and/or modify it
under the terms of the MIT License as published by the Free Software Foundation.

parsing.py

0 → 100644
+198 −0
Original line number Diff line number Diff line
#!/usr/bin/env python3
# (c) 2023, Christof Schulze <christof.schulze@fau.de>, WW8
#
# parsing.py is free software: you can redistribute it and/or modify it
# under the terms of the MIT License as published by the Free Software Foundation.
#
# parsing.py is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY OF ANY KIND; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
# See the MIT License for more details.
#
# You should have received a copy of the MIT License along with parsing.py.
# If not, see <http://www.gnu.org/licenses/mit>.
#
# Current versions of parsing.py can be found on:
# https://simlab.ww.uni-erlangen.de/ww8it/python/simple-utils

BOOLEANS_TRUE = frozenset(('y', 'yes', 'on', '1', 'true', 't', 1, 1.0, True))
BOOLEANS_FALSE = frozenset(('n', 'no', 'off', '0', 'false', 'f', 0, 0.0, False))
BOOLEANS = BOOLEANS_TRUE.union(BOOLEANS_FALSE)

# The error handler to use if the byte string is not decodable using the specified encoding.
DEFAULT_DECODING_ERROR = 'surrogateescape'


def to_text(obj, encoding: str = 'utf-8', nonstring: str = 'simplerepr'):
    """
    Make sure that a string is a text string

    :param obj:  An object to make sure is a text string.  In most cases this
        will be either a text string or a byte string.  However, with
        ``nonstring='simplerepr'``, this can be used as a traceback-free
        version of ``str(obj)``.
    :param encoding:  The encoding to use to transform from a byte string to
        a text string.  Defaults to using 'utf-8'.

    :param nonstring: The strategy to use if a nonstring is specified in
        ``obj``.  Default is 'simplerepr'.  Valid values are:

        :simplerepr: The default.  This takes the ``str`` of the object and
            then returns the text version of that string.
        :empty: Return an empty text string
        :passthru: Return the object passed in
        :strict: Raise a :exc:`TypeError`

    :return: Typically, this returns a text string.  If a nonstring object is
        passed in this may be a different type depending on the strategy
        specified by nonstring.  This will never return a byte string.
    """

    if isinstance(obj, str):
        return obj

    if isinstance(obj, bytes):
        # Note: We don't need special handling for surrogateescape because
        # all bytes will either be made into surrogates or are valid to decode.
        return obj.decode(encoding, DEFAULT_DECODING_ERROR)

    # Note: We do these last even though we have to call to_text again on the
    # value because we're optimizing the common case
    if nonstring == 'simplerepr':
        try:
            value = str(obj)

        except UnicodeError:
            try:
                value = repr(obj)
            except UnicodeError:
                # Giving up
                return ''
    elif nonstring == 'passthru':
        return obj
    elif nonstring == 'empty':
        return ''
    elif nonstring == 'strict':
        raise TypeError('obj must be a string type')
    else:
        raise TypeError('Invalid value %s for to_text\'s nonstring parameter' % nonstring)

    return to_text(value, encoding, DEFAULT_DECODING_ERROR)


def boolean(value):
    """
    Make sure that a boolean is a boolean
    :param value:  An object to make sure is a boolean.  In most cases this
        will be a boolean or either a text string or a byte string containing
        boolean information.
    """
    if isinstance(value, bool):
        return value

    normalized_value = value
    if isinstance(value, (str, bytes)):
        normalized_value = to_text(value).lower().strip()

    if normalized_value in BOOLEANS_TRUE:
        return True
    if normalized_value in BOOLEANS_FALSE:
        return False

    raise TypeError("The value '%s' is not a valid boolean.  Valid booleans include: %s" % (
                    to_text(value), ', '.join(repr(i) for i in BOOLEANS)))


def to_bytes(obj, encoding: str = 'utf-8', nonstring: str = 'simplerepr'):
    """
    Make sure that a string is a byte string
    :param obj: An object to make sure is a byte string.  In most cases this
        will be either a text string or a byte string.  However, with
        ``nonstring='simplerepr'``, this can be used as a traceback-free
        version of ``str(obj)``.
    :param encoding: The encoding to use to transform from a text string to
        a byte string.  Defaults to using 'utf-8'.
    :param nonstring: The strategy to use if a nonstring is specified in
        ``obj``.  Default is 'simplerepr'.  Valid values are:
        :simplerepr: The default.  This takes the ``str`` of the object and
            then returns the bytes version of that string.
        :empty: Return an empty byte string
        :passthru: Return the object passed in
        :strict: Raise a :exc:`TypeError`
    :return: Typically this returns a byte string. If a nonstring object is
        passed in this may be a different type depending on the strategy
        specified by nonstring.  This will never return a text string.
    .. note:: If passed a byte string, this function does not check that the
        string is valid in the specified encoding. If it's important that the
        byte string is in the specified encoding do::
            encoded_string = to_bytes(to_text(input_string, 'latin-1'), 'utf-8')
    """

    if isinstance(obj, bytes):
        return obj

    # We're given a text string
    # If it has surrogates, we know because it will decode
    if isinstance(obj, str):
        try:
            # Try this first as it's the fastest
            return obj.encode(encoding)
        except UnicodeEncodeError:
            raise

    # Note: We do these last even though we have to call to_bytes again on the
    # value because we're optimizing the common case
    if nonstring == 'simplerepr':
        try:
            value = str(obj)
        except UnicodeError:
            try:
                value = repr(obj)
            except UnicodeError:
                # Giving up
                return to_bytes('')
    elif nonstring == 'passthru':
        return obj
    elif nonstring == 'empty':
        return to_bytes('')
    elif nonstring == 'strict':
        raise TypeError('obj must be a string type')
    else:
        raise TypeError(f"Invalid value {nonstring} for to_bytes\' nonstring parameter")

    return to_bytes(value, encoding)


def unquote(data):
    """
    removes first and last quotes from a string,
    if the string starts and ends with the same quotes
    """
    if is_quoted(data):
        return data[1:-1]

    return data


def is_quoted(data):
    """
    check if the string starts and ends with the same quotes
    """
    return len(data) > 1 and data[0] == data[-1] and data[0] in ('"', "'") and data[-2] != '\\'


def humanize_time(duration: int) -> str:
    """
    Return a human-friendly description of elapsed time
    :param duration: time period in seconds
    :return:
    """
    human = ""
    if duration >= 3600:
        human += "%dh" % (int(duration) // 3600)
        duration %= 3600
    if duration >= 60:
        human += "%dm" % (int(duration) // 60)
        duration %= 60
    human += ("%.3fs" % duration)
    return human
+8 −2
Original line number Diff line number Diff line
#!/usr/bin/env python3
# (c) 2023, Christof Schulze <christof.schulze@fau.de>
# (c) 2023, Christof Schulze <christof.schulze@fau.de>, WW8
#
# simpledisplay is free software: you can redistribute it and/or modify it
# under the terms of the MIT License as published by the Free Software Foundation.
@@ -18,7 +18,7 @@
import sys


class SimpleDisplay(object):
class SimpleDisplay:
    """
    OUTPUT OF LAST RESORT

@@ -28,7 +28,13 @@ class SimpleDisplay(object):
    functions, with colors and parallel/partial loging to files etc.
    """
    def display(self, msg, log_only=None):
        """
        [TODO]
        """
        print(msg, file=sys.stdout)

    def error(self, msg, wrap_text=None):
        """
        [TODO]
        """
        print(msg, file=sys.stderr)