added parsing.py (ca6e3ec8) · Commits · ww8it / python / simple Utils

.gitignore

+1 −5

Original line number	Diff line number	Diff line
		@@ -129,10 +129,6 @@ dmypy.json
		cython_debug/

		# PyCharm
		# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
		# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
		# and can be added to the global gitignore or merged into this file. For a more nuclear
		# option (not recommended) you can uncomment the following to ignore the entire idea folder.
		#.idea/
		.idea/

README.md

+33 −2

Original line number	Diff line number	Diff line
		@@ -8,12 +8,43 @@ A collection of simple python utilities each file can be used as it's own.

		Drop in replacement for a better Display class

		- display(msg: str, log_only=None)
		### `display(msg: str, log_only=None)`
		Print to stdout

		- display(msg: str, wrap_text=None)
		### `display(msg: str, wrap_text=None)`
		Print to stderr

		## parsing.py
		Basic functions to help parse Text and booleans.
		[TODO] need more documentation

		### `to_text(obj, encoding='utf-8', nonstring: str = 'simplerepr')`

		### `to_bytes(obj, encoding='utf-8', nonstring: str = 'simplerepr')`
		Make sure that a string is a byte string
		:param `obj`: An object to make sure is a byte string. In most cases this
		will be either a text string or a byte string. However, with
		``nonstring='simplerepr'``, this can be used as a traceback-free
		version of ``str(obj)``.
		:param `encoding`: The encoding to use to transform from a text string to
		a byte string. Defaults to using 'utf-8'.
		:param `nonstring`: The strategy to use if a nonstring is specified in
		`obj`. Default is 'simplerepr'. Valid values are:
		:simplerepr: The default. This takes the `str` of the object and
		then returns the bytes version of that string.
		:empty: Return an empty byte string
		:passthru: Return the object passed in
		:strict: Raise a :exc:`TypeError`
		:return: Typically this returns a byte string. If a nonstring object is
		passed in this may be a different type depending on the strategy
		specified by nonstring. This will never return a text string.
		.. note:: If passed a byte string, this function does not check that the
		string is valid in the specified encoding. If it's important that the
		byte string is in the specified encoding do::
		`encoded_string = to_bytes(to_text(input_string, 'latin-1'), 'utf-8')`

		### `boolean(value)`

		## License
		This collection is free software: you can redistribute it and/or modify it
		under the terms of the MIT License as published by the Free Software Foundation.

parsing.py

0 → 100644

+198 −0

Original line number	Diff line number	Diff line
		#!/usr/bin/env python3
		# (c) 2023, Christof Schulze <christof.schulze@fau.de>, WW8
		#
		# parsing.py is free software: you can redistribute it and/or modify it
		# under the terms of the MIT License as published by the Free Software Foundation.
		#
		# parsing.py is distributed in the hope that it will be useful,
		# but WITHOUT ANY WARRANTY OF ANY KIND; without even the implied warranty of
		# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
		# See the MIT License for more details.
		#
		# You should have received a copy of the MIT License along with parsing.py.
		# If not, see <http://www.gnu.org/licenses/mit>.
		#
		# Current versions of parsing.py can be found on:
		# https://simlab.ww.uni-erlangen.de/ww8it/python/simple-utils

		BOOLEANS_TRUE = frozenset(('y', 'yes', 'on', '1', 'true', 't', 1, 1.0, True))
		BOOLEANS_FALSE = frozenset(('n', 'no', 'off', '0', 'false', 'f', 0, 0.0, False))
		BOOLEANS = BOOLEANS_TRUE.union(BOOLEANS_FALSE)

		# The error handler to use if the byte string is not decodable using the specified encoding.
		DEFAULT_DECODING_ERROR = 'surrogateescape'


		def to_text(obj, encoding: str = 'utf-8', nonstring: str = 'simplerepr'):
		"""
		Make sure that a string is a text string

		:param obj: An object to make sure is a text string. In most cases this
		will be either a text string or a byte string. However, with
		``nonstring='simplerepr'``, this can be used as a traceback-free
		version of ``str(obj)``.
		:param encoding: The encoding to use to transform from a byte string to
		a text string. Defaults to using 'utf-8'.

		:param nonstring: The strategy to use if a nonstring is specified in
		``obj``. Default is 'simplerepr'. Valid values are:

		:simplerepr: The default. This takes the ``str`` of the object and
		then returns the text version of that string.
		:empty: Return an empty text string
		:passthru: Return the object passed in
		:strict: Raise a :exc:`TypeError`

		:return: Typically, this returns a text string. If a nonstring object is
		passed in this may be a different type depending on the strategy
		specified by nonstring. This will never return a byte string.
		"""

		if isinstance(obj, str):
		return obj

		if isinstance(obj, bytes):
		# Note: We don't need special handling for surrogateescape because
		# all bytes will either be made into surrogates or are valid to decode.
		return obj.decode(encoding, DEFAULT_DECODING_ERROR)

		# Note: We do these last even though we have to call to_text again on the
		# value because we're optimizing the common case
		if nonstring == 'simplerepr':
		try:
		value = str(obj)

		except UnicodeError:
		try:
		value = repr(obj)
		except UnicodeError:
		# Giving up
		return ''
		elif nonstring == 'passthru':
		return obj
		elif nonstring == 'empty':
		return ''
		elif nonstring == 'strict':
		raise TypeError('obj must be a string type')
		else:
		raise TypeError('Invalid value %s for to_text\'s nonstring parameter' % nonstring)

		return to_text(value, encoding, DEFAULT_DECODING_ERROR)


		def boolean(value):
		"""
		Make sure that a boolean is a boolean
		:param value: An object to make sure is a boolean. In most cases this
		will be a boolean or either a text string or a byte string containing
		boolean information.
		"""
		if isinstance(value, bool):
		return value

		normalized_value = value
		if isinstance(value, (str, bytes)):
		normalized_value = to_text(value).lower().strip()

		if normalized_value in BOOLEANS_TRUE:
		return True
		if normalized_value in BOOLEANS_FALSE:
		return False

		raise TypeError("The value '%s' is not a valid boolean. Valid booleans include: %s" % (
		to_text(value), ', '.join(repr(i) for i in BOOLEANS)))


		def to_bytes(obj, encoding: str = 'utf-8', nonstring: str = 'simplerepr'):
		"""
		Make sure that a string is a byte string
		:param obj: An object to make sure is a byte string. In most cases this
		will be either a text string or a byte string. However, with
		``nonstring='simplerepr'``, this can be used as a traceback-free
		version of ``str(obj)``.
		:param encoding: The encoding to use to transform from a text string to
		a byte string. Defaults to using 'utf-8'.
		:param nonstring: The strategy to use if a nonstring is specified in
		``obj``. Default is 'simplerepr'. Valid values are:
		:simplerepr: The default. This takes the ``str`` of the object and
		then returns the bytes version of that string.
		:empty: Return an empty byte string
		:passthru: Return the object passed in
		:strict: Raise a :exc:`TypeError`
		:return: Typically this returns a byte string. If a nonstring object is
		passed in this may be a different type depending on the strategy
		specified by nonstring. This will never return a text string.
		.. note:: If passed a byte string, this function does not check that the
		string is valid in the specified encoding. If it's important that the
		byte string is in the specified encoding do::
		encoded_string = to_bytes(to_text(input_string, 'latin-1'), 'utf-8')
		"""

		if isinstance(obj, bytes):
		return obj

		# We're given a text string
		# If it has surrogates, we know because it will decode
		if isinstance(obj, str):
		try:
		# Try this first as it's the fastest
		return obj.encode(encoding)
		except UnicodeEncodeError:
		raise

		# Note: We do these last even though we have to call to_bytes again on the
		# value because we're optimizing the common case
		if nonstring == 'simplerepr':
		try:
		value = str(obj)
		except UnicodeError:
		try:
		value = repr(obj)
		except UnicodeError:
		# Giving up
		return to_bytes('')
		elif nonstring == 'passthru':
		return obj
		elif nonstring == 'empty':
		return to_bytes('')
		elif nonstring == 'strict':
		raise TypeError('obj must be a string type')
		else:
		raise TypeError(f"Invalid value {nonstring} for to_bytes\' nonstring parameter")

		return to_bytes(value, encoding)


		def unquote(data):
		"""
		removes first and last quotes from a string,
		if the string starts and ends with the same quotes
		"""
		if is_quoted(data):
		return data[1:-1]

		return data


		def is_quoted(data):
		"""
		check if the string starts and ends with the same quotes
		"""
		return len(data) > 1 and data[0] == data[-1] and data[0] in ('"', "'") and data[-2] != '\\'


		def humanize_time(duration: int) -> str:
		"""
		Return a human-friendly description of elapsed time
		:param duration: time period in seconds
		:return:
		"""
		human = ""
		if duration >= 3600:
		human += "%dh" % (int(duration) // 3600)
		duration %= 3600
		if duration >= 60:
		human += "%dm" % (int(duration) // 60)
		duration %= 60
		human += ("%.3fs" % duration)
		return human

simpledisplay.py

+8 −2

Original line number	Diff line number	Diff line
		#!/usr/bin/env python3
		# (c) 2023, Christof Schulze <christof.schulze@fau.de>
		# (c) 2023, Christof Schulze <christof.schulze@fau.de>, WW8
		#
		# simpledisplay is free software: you can redistribute it and/or modify it
		# under the terms of the MIT License as published by the Free Software Foundation.
		@@ -18,7 +18,7 @@
		import sys


		class SimpleDisplay(object):
		class SimpleDisplay:
		"""
		OUTPUT OF LAST RESORT

		@@ -28,7 +28,13 @@ class SimpleDisplay(object):
		functions, with colors and parallel/partial loging to files etc.
		"""
		def display(self, msg, log_only=None):
		"""
		[TODO]
		"""
		print(msg, file=sys.stdout)

		def error(self, msg, wrap_text=None):
		"""
		[TODO]
		"""
		print(msg, file=sys.stderr)