Cleanup Python 2 workarounds closing #2 (9e3848e1) · Commits · Ammsml / utils

parsing/init.py

+10 −23

Original line number	Diff line number	Diff line
		@@ -20,6 +20,9 @@ BOOLEANS_TRUE = frozenset(('y', 'yes', 'on', '1', 'true', 't', 1, 1.0, True))
		BOOLEANS_FALSE = frozenset(('n', 'no', 'off', '0', 'false', 'f', 0, 0.0, False))
		BOOLEANS = BOOLEANS_TRUE.union(BOOLEANS_FALSE)

		# The error handler to use if the byte string is not decodable using the specified encoding.
		decoding_error = 'surrogateescape'


		def to_text(obj, encoding='utf-8', nonstring: str = 'simplerepr'):
		"""
		@@ -49,13 +52,10 @@ def to_text(obj, encoding='utf-8', nonstring: str='simplerepr'):
		if isinstance(obj, str):
		return obj

		# The error handler to use if the byte string is not decodable using the specified encoding.
		errors = 'surrogateescape'

		if isinstance(obj, bytes):
		# Note: We don't need special handling for surrogateescape because
		# all bytes will either be made into surrogates or are valid to decode.
		return obj.decode(encoding, errors)
		return obj.decode(encoding, decoding_error)

		# Note: We do these last even though we have to call to_text again on the
		# value because we're optimizing the common case
		@@ -78,7 +78,7 @@ def to_text(obj, encoding='utf-8', nonstring: str='simplerepr'):
		else:
		raise TypeError('Invalid value %s for to_text\'s nonstring parameter' % nonstring)

		return to_text(value, encoding, errors)
		return to_text(value, encoding, decoding_error)


		def boolean(value):
		@@ -126,26 +126,13 @@ def to_bytes(obj, encoding='utf-8', nonstring='simplerepr'):
		if isinstance(obj, bytes):
		return obj

		# The error handler to use if the byte string is not decodable using the specified encoding.
		errors = 'surrogateescape'

		# We're given a text string
		# If it has surrogates, we know because it will decode
		original_errors = errors

		if isinstance(obj, str):
		try:
		# Try this first as it's the fastest
		return obj.encode(encoding, errors)
		return obj.encode(encoding)
		except UnicodeEncodeError:
		if original_errors in (None, 'surrogate_then_replace'):
		# We should only reach this if encoding was non-utf8 original_errors was
		# surrogate_then_escape and errors was surrogateescape

		# Slow but works
		return_string = obj.encode('utf-8', 'surrogateescape')
		# return_string = return_string.decode('utf-8', 'replace')
		# return return_string.encode(encoding, 'replace')
		raise

		# Note: We do these last even though we have to call to_bytes again on the
		@@ -168,7 +155,7 @@ def to_bytes(obj, encoding='utf-8', nonstring='simplerepr'):
		else:
		raise TypeError('Invalid value %s for to_bytes\' nonstring parameter' % nonstring)

		return to_bytes(value, encoding, errors)
		return to_bytes(value, encoding)


		def unquote(data):