From 9e3848e1a4d0067b8179914eae63910a7a1868fa Mon Sep 17 00:00:00 2001
From: Christof Schulze <christof.schulze@fau.de>
Date: Sat, 15 Feb 2020 22:32:24 +0100
Subject: [PATCH] Cleanup Python 2 workarounds closing #2

---
 parsing/__init__.py | 33 ++++++++++-----------------------
 1 file changed, 10 insertions(+), 23 deletions(-)

diff --git a/parsing/__init__.py b/parsing/__init__.py
index 2a4007c..85731b7 100644
--- a/parsing/__init__.py
+++ b/parsing/__init__.py
@@ -20,8 +20,11 @@ BOOLEANS_TRUE = frozenset(('y', 'yes', 'on', '1', 'true', 't', 1, 1.0, True))
 BOOLEANS_FALSE = frozenset(('n', 'no', 'off', '0', 'false', 'f', 0, 0.0, False))
 BOOLEANS = BOOLEANS_TRUE.union(BOOLEANS_FALSE)
 
+# The error handler to use if the byte string is not decodable using the specified encoding.
+decoding_error = 'surrogateescape'
 
-def to_text(obj, encoding='utf-8', nonstring: str='simplerepr'):
+
+def to_text(obj, encoding='utf-8', nonstring: str = 'simplerepr'):
     """
     Make sure that a string is a text string
 
@@ -49,13 +52,10 @@ def to_text(obj, encoding='utf-8', nonstring: str='simplerepr'):
     if isinstance(obj, str):
         return obj
 
-    # The error handler to use if the byte string is not decodable using the specified encoding.
-    errors = 'surrogateescape'
-
     if isinstance(obj, bytes):
         # Note: We don't need special handling for surrogateescape because
         # all bytes will either be made into surrogates or are valid to decode.
-        return obj.decode(encoding, errors)
+        return obj.decode(encoding, decoding_error)
 
     # Note: We do these last even though we have to call to_text again on the
     # value because we're optimizing the common case
@@ -78,7 +78,7 @@ def to_text(obj, encoding='utf-8', nonstring: str='simplerepr'):
     else:
         raise TypeError('Invalid value %s for to_text\'s nonstring parameter' % nonstring)
 
-    return to_text(value, encoding, errors)
+    return to_text(value, encoding, decoding_error)
 
 
 def boolean(value):
@@ -114,11 +114,11 @@ def to_bytes(obj, encoding='utf-8', nonstring='simplerepr'):
         :empty: Return an empty byte string
         :passthru: Return the object passed in
         :strict: Raise a :exc:`TypeError`
-    :return: Typically this returns a byte string.  If a nonstring object is
+    :return: Typically this returns a byte string. If a nonstring object is
         passed in this may be a different type depending on the strategy
         specified by nonstring.  This will never return a text string.
     .. note:: If passed a byte string, this function does not check that the
-        string is valid in the specified encoding.  If it's important that the
+        string is valid in the specified encoding. If it's important that the
         byte string is in the specified encoding do::
             encoded_string = to_bytes(to_text(input_string, 'latin-1'), 'utf-8')
     """
@@ -126,26 +126,13 @@ def to_bytes(obj, encoding='utf-8', nonstring='simplerepr'):
     if isinstance(obj, bytes):
         return obj
 
-    # The error handler to use if the byte string is not decodable using the specified encoding.
-    errors = 'surrogateescape'
-
     # We're given a text string
     # If it has surrogates, we know because it will decode
-    original_errors = errors
-
     if isinstance(obj, str):
         try:
             # Try this first as it's the fastest
-            return obj.encode(encoding, errors)
+            return obj.encode(encoding)
         except UnicodeEncodeError:
-            if original_errors in (None, 'surrogate_then_replace'):
-                # We should only reach this if encoding was non-utf8 original_errors was
-                # surrogate_then_escape and errors was surrogateescape
-
-                # Slow but works
-                return_string = obj.encode('utf-8', 'surrogateescape')
-        #         return_string = return_string.decode('utf-8', 'replace')
-        #         return return_string.encode(encoding, 'replace')
             raise
 
     # Note: We do these last even though we have to call to_bytes again on the
@@ -168,7 +155,7 @@ def to_bytes(obj, encoding='utf-8', nonstring='simplerepr'):
     else:
         raise TypeError('Invalid value %s for to_bytes\' nonstring parameter' % nonstring)
 
-    return to_bytes(value, encoding, errors)
+    return to_bytes(value, encoding)
 
 
 def unquote(data):
-- 
GitLab