Loading parsing/__init__.py +10 −23 Original line number Diff line number Diff line Loading @@ -20,6 +20,9 @@ BOOLEANS_TRUE = frozenset(('y', 'yes', 'on', '1', 'true', 't', 1, 1.0, True)) BOOLEANS_FALSE = frozenset(('n', 'no', 'off', '0', 'false', 'f', 0, 0.0, False)) BOOLEANS = BOOLEANS_TRUE.union(BOOLEANS_FALSE) # The error handler to use if the byte string is not decodable using the specified encoding. decoding_error = 'surrogateescape' def to_text(obj, encoding='utf-8', nonstring: str = 'simplerepr'): """ Loading Loading @@ -49,13 +52,10 @@ def to_text(obj, encoding='utf-8', nonstring: str='simplerepr'): if isinstance(obj, str): return obj # The error handler to use if the byte string is not decodable using the specified encoding. errors = 'surrogateescape' if isinstance(obj, bytes): # Note: We don't need special handling for surrogateescape because # all bytes will either be made into surrogates or are valid to decode. return obj.decode(encoding, errors) return obj.decode(encoding, decoding_error) # Note: We do these last even though we have to call to_text again on the # value because we're optimizing the common case Loading @@ -78,7 +78,7 @@ def to_text(obj, encoding='utf-8', nonstring: str='simplerepr'): else: raise TypeError('Invalid value %s for to_text\'s nonstring parameter' % nonstring) return to_text(value, encoding, errors) return to_text(value, encoding, decoding_error) def boolean(value): Loading Loading @@ -126,26 +126,13 @@ def to_bytes(obj, encoding='utf-8', nonstring='simplerepr'): if isinstance(obj, bytes): return obj # The error handler to use if the byte string is not decodable using the specified encoding. errors = 'surrogateescape' # We're given a text string # If it has surrogates, we know because it will decode original_errors = errors if isinstance(obj, str): try: # Try this first as it's the fastest return obj.encode(encoding, errors) return obj.encode(encoding) except UnicodeEncodeError: if original_errors in (None, 'surrogate_then_replace'): # We should only reach this if encoding was non-utf8 original_errors was # surrogate_then_escape and errors was surrogateescape # Slow but works return_string = obj.encode('utf-8', 'surrogateescape') # return_string = return_string.decode('utf-8', 'replace') # return return_string.encode(encoding, 'replace') raise # Note: We do these last even though we have to call to_bytes again on the Loading @@ -168,7 +155,7 @@ def to_bytes(obj, encoding='utf-8', nonstring='simplerepr'): else: raise TypeError('Invalid value %s for to_bytes\' nonstring parameter' % nonstring) return to_bytes(value, encoding, errors) return to_bytes(value, encoding) def unquote(data): Loading Loading
parsing/__init__.py +10 −23 Original line number Diff line number Diff line Loading @@ -20,6 +20,9 @@ BOOLEANS_TRUE = frozenset(('y', 'yes', 'on', '1', 'true', 't', 1, 1.0, True)) BOOLEANS_FALSE = frozenset(('n', 'no', 'off', '0', 'false', 'f', 0, 0.0, False)) BOOLEANS = BOOLEANS_TRUE.union(BOOLEANS_FALSE) # The error handler to use if the byte string is not decodable using the specified encoding. decoding_error = 'surrogateescape' def to_text(obj, encoding='utf-8', nonstring: str = 'simplerepr'): """ Loading Loading @@ -49,13 +52,10 @@ def to_text(obj, encoding='utf-8', nonstring: str='simplerepr'): if isinstance(obj, str): return obj # The error handler to use if the byte string is not decodable using the specified encoding. errors = 'surrogateescape' if isinstance(obj, bytes): # Note: We don't need special handling for surrogateescape because # all bytes will either be made into surrogates or are valid to decode. return obj.decode(encoding, errors) return obj.decode(encoding, decoding_error) # Note: We do these last even though we have to call to_text again on the # value because we're optimizing the common case Loading @@ -78,7 +78,7 @@ def to_text(obj, encoding='utf-8', nonstring: str='simplerepr'): else: raise TypeError('Invalid value %s for to_text\'s nonstring parameter' % nonstring) return to_text(value, encoding, errors) return to_text(value, encoding, decoding_error) def boolean(value): Loading Loading @@ -126,26 +126,13 @@ def to_bytes(obj, encoding='utf-8', nonstring='simplerepr'): if isinstance(obj, bytes): return obj # The error handler to use if the byte string is not decodable using the specified encoding. errors = 'surrogateescape' # We're given a text string # If it has surrogates, we know because it will decode original_errors = errors if isinstance(obj, str): try: # Try this first as it's the fastest return obj.encode(encoding, errors) return obj.encode(encoding) except UnicodeEncodeError: if original_errors in (None, 'surrogate_then_replace'): # We should only reach this if encoding was non-utf8 original_errors was # surrogate_then_escape and errors was surrogateescape # Slow but works return_string = obj.encode('utf-8', 'surrogateescape') # return_string = return_string.decode('utf-8', 'replace') # return return_string.encode(encoding, 'replace') raise # Note: We do these last even though we have to call to_bytes again on the Loading @@ -168,7 +155,7 @@ def to_bytes(obj, encoding='utf-8', nonstring='simplerepr'): else: raise TypeError('Invalid value %s for to_bytes\' nonstring parameter' % nonstring) return to_bytes(value, encoding, errors) return to_bytes(value, encoding) def unquote(data): Loading