Merge pull request #2695 from pallets/backport-2691-json-encoding
detect UTF encodings when loading json
This commit is contained in:
commit
b178e89e44
3 changed files with 67 additions and 23 deletions
|
|
@ -8,6 +8,7 @@
|
|||
:copyright: (c) 2015 by Armin Ronacher.
|
||||
:license: BSD, see LICENSE for more details.
|
||||
"""
|
||||
import codecs
|
||||
import io
|
||||
import uuid
|
||||
from datetime import date
|
||||
|
|
@ -108,6 +109,49 @@ def _load_arg_defaults(kwargs):
|
|||
kwargs.setdefault('cls', JSONDecoder)
|
||||
|
||||
|
||||
def detect_encoding(data):
|
||||
"""Detect which UTF codec was used to encode the given bytes.
|
||||
|
||||
The latest JSON standard (:rfc:`8259`) suggests that only UTF-8 is
|
||||
accepted. Older documents allowed 8, 16, or 32. 16 and 32 can be big
|
||||
or little endian. Some editors or libraries may prepend a BOM.
|
||||
|
||||
:param data: Bytes in unknown UTF encoding.
|
||||
:return: UTF encoding name
|
||||
"""
|
||||
head = data[:4]
|
||||
|
||||
if head[:3] == codecs.BOM_UTF8:
|
||||
return 'utf-8-sig'
|
||||
|
||||
if b'\x00' not in head:
|
||||
return 'utf-8'
|
||||
|
||||
if head in (codecs.BOM_UTF32_BE, codecs.BOM_UTF32_LE):
|
||||
return 'utf-32'
|
||||
|
||||
if head[:2] in (codecs.BOM_UTF16_BE, codecs.BOM_UTF16_LE):
|
||||
return 'utf-16'
|
||||
|
||||
if len(head) == 4:
|
||||
if head[:3] == b'\x00\x00\x00':
|
||||
return 'utf-32-be'
|
||||
|
||||
if head[::2] == b'\x00\x00':
|
||||
return 'utf-16-be'
|
||||
|
||||
if head[1:] == b'\x00\x00\x00':
|
||||
return 'utf-32-le'
|
||||
|
||||
if head[1::2] == b'\x00\x00':
|
||||
return 'utf-16-le'
|
||||
|
||||
if len(head) == 2:
|
||||
return 'utf-16-be' if head.startswith(b'\x00') else 'utf-16-le'
|
||||
|
||||
return 'utf-8'
|
||||
|
||||
|
||||
def dumps(obj, **kwargs):
|
||||
"""Serialize ``obj`` to a JSON formatted ``str`` by using the application's
|
||||
configured encoder (:attr:`~flask.Flask.json_encoder`) if there is an
|
||||
|
|
@ -142,7 +186,10 @@ def loads(s, **kwargs):
|
|||
"""
|
||||
_load_arg_defaults(kwargs)
|
||||
if isinstance(s, bytes):
|
||||
s = s.decode(kwargs.pop('encoding', None) or 'utf-8')
|
||||
encoding = kwargs.pop('encoding', None)
|
||||
if encoding is None:
|
||||
encoding = detect_encoding(s)
|
||||
s = s.decode(encoding)
|
||||
return _json.loads(s, **kwargs)
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -144,17 +144,10 @@ class Request(RequestBase):
|
|||
if not (force or self.is_json):
|
||||
return None
|
||||
|
||||
# We accept a request charset against the specification as
|
||||
# certain clients have been using this in the past. This
|
||||
# fits our general approach of being nice in what we accept
|
||||
# and strict in what we send out.
|
||||
request_charset = self.mimetype_params.get('charset')
|
||||
data = _get_data(self, cache)
|
||||
|
||||
try:
|
||||
data = _get_data(self, cache)
|
||||
if request_charset is not None:
|
||||
rv = json.loads(data, encoding=request_charset)
|
||||
else:
|
||||
rv = json.loads(data)
|
||||
rv = json.loads(data)
|
||||
except ValueError as e:
|
||||
if silent:
|
||||
rv = None
|
||||
|
|
|
|||
|
|
@ -21,6 +21,8 @@ from werkzeug.datastructures import Range
|
|||
from werkzeug.exceptions import BadRequest, NotFound
|
||||
from werkzeug.http import parse_cache_control_header, parse_options_header
|
||||
from werkzeug.http import http_date
|
||||
|
||||
from flask import json
|
||||
from flask._compat import StringIO, text_type
|
||||
|
||||
|
||||
|
|
@ -34,6 +36,20 @@ def has_encoding(name):
|
|||
|
||||
|
||||
class TestJSON(object):
|
||||
@pytest.mark.parametrize('value', (
|
||||
1, 't', True, False, None,
|
||||
[], [1, 2, 3],
|
||||
{}, {'foo': u'🐍'},
|
||||
))
|
||||
@pytest.mark.parametrize('encoding', (
|
||||
'utf-8', 'utf-8-sig',
|
||||
'utf-16-le', 'utf-16-be', 'utf-16',
|
||||
'utf-32-le', 'utf-32-be', 'utf-32',
|
||||
))
|
||||
def test_detect_encoding(self, value, encoding):
|
||||
data = json.dumps(value).encode(encoding)
|
||||
assert json.detect_encoding(data) == encoding
|
||||
assert json.loads(data) == value
|
||||
|
||||
def test_ignore_cached_json(self):
|
||||
app = flask.Flask(__name__)
|
||||
|
|
@ -85,18 +101,6 @@ class TestJSON(object):
|
|||
rv = c.post('/json', data='"foo"', content_type='application/x+json')
|
||||
assert rv.data == b'foo'
|
||||
|
||||
def test_json_body_encoding(self):
|
||||
app = flask.Flask(__name__)
|
||||
app.testing = True
|
||||
@app.route('/')
|
||||
def index():
|
||||
return flask.request.get_json()
|
||||
|
||||
c = app.test_client()
|
||||
resp = c.get('/', data=u'"Hällo Wörld"'.encode('iso-8859-15'),
|
||||
content_type='application/json; charset=iso-8859-15')
|
||||
assert resp.data == u'Hällo Wörld'.encode('utf-8')
|
||||
|
||||
def test_json_as_unicode(self):
|
||||
app = flask.Flask(__name__)
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue