Loading parsing/__init__.py +10 −23 Original line number Diff line number Diff line Loading @@ -20,8 +20,11 @@ BOOLEANS_TRUE = frozenset(('y', 'yes', 'on', '1', 'true', 't', 1, 1.0, True)) BOOLEANS_FALSE = frozenset(('n', 'no', 'off', '0', 'false', 'f', 0, 0.0, False)) BOOLEANS = BOOLEANS_TRUE.union(BOOLEANS_FALSE) # The error handler to use if the byte string is not decodable using the specified encoding. decoding_error = 'surrogateescape' def to_text(obj, encoding='utf-8', nonstring: str='simplerepr'): def to_text(obj, encoding='utf-8', nonstring: str = 'simplerepr'): """ Make sure that a string is a text string Loading Loading @@ -49,13 +52,10 @@ def to_text(obj, encoding='utf-8', nonstring: str='simplerepr'): if isinstance(obj, str): return obj # The error handler to use if the byte string is not decodable using the specified encoding. errors = 'surrogateescape' if isinstance(obj, bytes): # Note: We don't need special handling for surrogateescape because # all bytes will either be made into surrogates or are valid to decode. return obj.decode(encoding, errors) return obj.decode(encoding, decoding_error) # Note: We do these last even though we have to call to_text again on the # value because we're optimizing the common case Loading @@ -78,7 +78,7 @@ def to_text(obj, encoding='utf-8', nonstring: str='simplerepr'): else: raise TypeError('Invalid value %s for to_text\'s nonstring parameter' % nonstring) return to_text(value, encoding, errors) return to_text(value, encoding, decoding_error) def boolean(value): Loading Loading @@ -114,11 +114,11 @@ def to_bytes(obj, encoding='utf-8', nonstring='simplerepr'): :empty: Return an empty byte string :passthru: Return the object passed in :strict: Raise a :exc:`TypeError` :return: Typically this returns a byte string. If a nonstring object is :return: Typically this returns a byte string. If a nonstring object is passed in this may be a different type depending on the strategy specified by nonstring. This will never return a text string. .. note:: If passed a byte string, this function does not check that the string is valid in the specified encoding. If it's important that the string is valid in the specified encoding. If it's important that the byte string is in the specified encoding do:: encoded_string = to_bytes(to_text(input_string, 'latin-1'), 'utf-8') """ Loading @@ -126,26 +126,13 @@ def to_bytes(obj, encoding='utf-8', nonstring='simplerepr'): if isinstance(obj, bytes): return obj # The error handler to use if the byte string is not decodable using the specified encoding. errors = 'surrogateescape' # We're given a text string # If it has surrogates, we know because it will decode original_errors = errors if isinstance(obj, str): try: # Try this first as it's the fastest return obj.encode(encoding, errors) return obj.encode(encoding) except UnicodeEncodeError: if original_errors in (None, 'surrogate_then_replace'): # We should only reach this if encoding was non-utf8 original_errors was # surrogate_then_escape and errors was surrogateescape # Slow but works return_string = obj.encode('utf-8', 'surrogateescape') # return_string = return_string.decode('utf-8', 'replace') # return return_string.encode(encoding, 'replace') raise # Note: We do these last even though we have to call to_bytes again on the Loading @@ -168,7 +155,7 @@ def to_bytes(obj, encoding='utf-8', nonstring='simplerepr'): else: raise TypeError('Invalid value %s for to_bytes\' nonstring parameter' % nonstring) return to_bytes(value, encoding, errors) return to_bytes(value, encoding) def unquote(data): Loading Loading
parsing/__init__.py +10 −23 Original line number Diff line number Diff line Loading @@ -20,8 +20,11 @@ BOOLEANS_TRUE = frozenset(('y', 'yes', 'on', '1', 'true', 't', 1, 1.0, True)) BOOLEANS_FALSE = frozenset(('n', 'no', 'off', '0', 'false', 'f', 0, 0.0, False)) BOOLEANS = BOOLEANS_TRUE.union(BOOLEANS_FALSE) # The error handler to use if the byte string is not decodable using the specified encoding. decoding_error = 'surrogateescape' def to_text(obj, encoding='utf-8', nonstring: str='simplerepr'): def to_text(obj, encoding='utf-8', nonstring: str = 'simplerepr'): """ Make sure that a string is a text string Loading Loading @@ -49,13 +52,10 @@ def to_text(obj, encoding='utf-8', nonstring: str='simplerepr'): if isinstance(obj, str): return obj # The error handler to use if the byte string is not decodable using the specified encoding. errors = 'surrogateescape' if isinstance(obj, bytes): # Note: We don't need special handling for surrogateescape because # all bytes will either be made into surrogates or are valid to decode. return obj.decode(encoding, errors) return obj.decode(encoding, decoding_error) # Note: We do these last even though we have to call to_text again on the # value because we're optimizing the common case Loading @@ -78,7 +78,7 @@ def to_text(obj, encoding='utf-8', nonstring: str='simplerepr'): else: raise TypeError('Invalid value %s for to_text\'s nonstring parameter' % nonstring) return to_text(value, encoding, errors) return to_text(value, encoding, decoding_error) def boolean(value): Loading Loading @@ -114,11 +114,11 @@ def to_bytes(obj, encoding='utf-8', nonstring='simplerepr'): :empty: Return an empty byte string :passthru: Return the object passed in :strict: Raise a :exc:`TypeError` :return: Typically this returns a byte string. If a nonstring object is :return: Typically this returns a byte string. If a nonstring object is passed in this may be a different type depending on the strategy specified by nonstring. This will never return a text string. .. note:: If passed a byte string, this function does not check that the string is valid in the specified encoding. If it's important that the string is valid in the specified encoding. If it's important that the byte string is in the specified encoding do:: encoded_string = to_bytes(to_text(input_string, 'latin-1'), 'utf-8') """ Loading @@ -126,26 +126,13 @@ def to_bytes(obj, encoding='utf-8', nonstring='simplerepr'): if isinstance(obj, bytes): return obj # The error handler to use if the byte string is not decodable using the specified encoding. errors = 'surrogateescape' # We're given a text string # If it has surrogates, we know because it will decode original_errors = errors if isinstance(obj, str): try: # Try this first as it's the fastest return obj.encode(encoding, errors) return obj.encode(encoding) except UnicodeEncodeError: if original_errors in (None, 'surrogate_then_replace'): # We should only reach this if encoding was non-utf8 original_errors was # surrogate_then_escape and errors was surrogateescape # Slow but works return_string = obj.encode('utf-8', 'surrogateescape') # return_string = return_string.decode('utf-8', 'replace') # return return_string.encode(encoding, 'replace') raise # Note: We do these last even though we have to call to_bytes again on the Loading @@ -168,7 +155,7 @@ def to_bytes(obj, encoding='utf-8', nonstring='simplerepr'): else: raise TypeError('Invalid value %s for to_bytes\' nonstring parameter' % nonstring) return to_bytes(value, encoding, errors) return to_bytes(value, encoding) def unquote(data): Loading