Merge pull request #2691 from pallets/json-encoding
detect UTF encodings when loading json
This commit is contained in:
commit
465b48ed4e
4 changed files with 82 additions and 26 deletions
|
|
@ -147,6 +147,9 @@ unreleased
|
||||||
when it is registered with the app. (`#2629`_)
|
when it is registered with the app. (`#2629`_)
|
||||||
- :meth:`Request.get_json() <flask.Request.get_json>` doesn't cache the
|
- :meth:`Request.get_json() <flask.Request.get_json>` doesn't cache the
|
||||||
result if parsing fails when ``silent`` is true. (`#2651`_)
|
result if parsing fails when ``silent`` is true. (`#2651`_)
|
||||||
|
- :func:`request.get_json <flask.Request.get_json>` no longer accepts
|
||||||
|
arbitrary encodings. Incoming JSON should be encoded using UTF-8 per
|
||||||
|
:rfc:`8259`, but Flask will autodetect UTF-8, -16, or -32. (`#2691`_)
|
||||||
|
|
||||||
.. _pallets/meta#24: https://github.com/pallets/meta/issues/24
|
.. _pallets/meta#24: https://github.com/pallets/meta/issues/24
|
||||||
.. _#1421: https://github.com/pallets/flask/issues/1421
|
.. _#1421: https://github.com/pallets/flask/issues/1421
|
||||||
|
|
@ -192,6 +195,7 @@ unreleased
|
||||||
.. _#2635: https://github.com/pallets/flask/pull/2635
|
.. _#2635: https://github.com/pallets/flask/pull/2635
|
||||||
.. _#2629: https://github.com/pallets/flask/pull/2629
|
.. _#2629: https://github.com/pallets/flask/pull/2629
|
||||||
.. _#2651: https://github.com/pallets/flask/issues/2651
|
.. _#2651: https://github.com/pallets/flask/issues/2651
|
||||||
|
.. _#2691: https://github.com/pallets/flask/pull/2691
|
||||||
|
|
||||||
|
|
||||||
Version 0.12.2
|
Version 0.12.2
|
||||||
|
|
|
||||||
|
|
@ -6,7 +6,7 @@ flask.json
|
||||||
:copyright: © 2010 by the Pallets team.
|
:copyright: © 2010 by the Pallets team.
|
||||||
:license: BSD, see LICENSE for more details.
|
:license: BSD, see LICENSE for more details.
|
||||||
"""
|
"""
|
||||||
|
import codecs
|
||||||
import io
|
import io
|
||||||
import uuid
|
import uuid
|
||||||
from datetime import date, datetime
|
from datetime import date, datetime
|
||||||
|
|
@ -121,6 +121,49 @@ def _load_arg_defaults(kwargs):
|
||||||
kwargs.setdefault('cls', JSONDecoder)
|
kwargs.setdefault('cls', JSONDecoder)
|
||||||
|
|
||||||
|
|
||||||
|
def detect_encoding(data):
|
||||||
|
"""Detect which UTF codec was used to encode the given bytes.
|
||||||
|
|
||||||
|
The latest JSON standard (:rfc:`8259`) suggests that only UTF-8 is
|
||||||
|
accepted. Older documents allowed 8, 16, or 32. 16 and 32 can be big
|
||||||
|
or little endian. Some editors or libraries may prepend a BOM.
|
||||||
|
|
||||||
|
:param data: Bytes in unknown UTF encoding.
|
||||||
|
:return: UTF encoding name
|
||||||
|
"""
|
||||||
|
head = data[:4]
|
||||||
|
|
||||||
|
if head[:3] == codecs.BOM_UTF8:
|
||||||
|
return 'utf-8-sig'
|
||||||
|
|
||||||
|
if b'\x00' not in head:
|
||||||
|
return 'utf-8'
|
||||||
|
|
||||||
|
if head in (codecs.BOM_UTF32_BE, codecs.BOM_UTF32_LE):
|
||||||
|
return 'utf-32'
|
||||||
|
|
||||||
|
if head[:2] in (codecs.BOM_UTF16_BE, codecs.BOM_UTF16_LE):
|
||||||
|
return 'utf-16'
|
||||||
|
|
||||||
|
if len(head) == 4:
|
||||||
|
if head[:3] == b'\x00\x00\x00':
|
||||||
|
return 'utf-32-be'
|
||||||
|
|
||||||
|
if head[::2] == b'\x00\x00':
|
||||||
|
return 'utf-16-be'
|
||||||
|
|
||||||
|
if head[1:] == b'\x00\x00\x00':
|
||||||
|
return 'utf-32-le'
|
||||||
|
|
||||||
|
if head[1::2] == b'\x00\x00':
|
||||||
|
return 'utf-16-le'
|
||||||
|
|
||||||
|
if len(head) == 2:
|
||||||
|
return 'utf-16-be' if head.startswith(b'\x00') else 'utf-16-le'
|
||||||
|
|
||||||
|
return 'utf-8'
|
||||||
|
|
||||||
|
|
||||||
def dumps(obj, **kwargs):
|
def dumps(obj, **kwargs):
|
||||||
"""Serialize ``obj`` to a JSON formatted ``str`` by using the application's
|
"""Serialize ``obj`` to a JSON formatted ``str`` by using the application's
|
||||||
configured encoder (:attr:`~flask.Flask.json_encoder`) if there is an
|
configured encoder (:attr:`~flask.Flask.json_encoder`) if there is an
|
||||||
|
|
@ -155,7 +198,10 @@ def loads(s, **kwargs):
|
||||||
"""
|
"""
|
||||||
_load_arg_defaults(kwargs)
|
_load_arg_defaults(kwargs)
|
||||||
if isinstance(s, bytes):
|
if isinstance(s, bytes):
|
||||||
s = s.decode(kwargs.pop('encoding', None) or 'utf-8')
|
encoding = kwargs.pop('encoding', None)
|
||||||
|
if encoding is None:
|
||||||
|
encoding = detect_encoding(s)
|
||||||
|
s = s.decode(encoding)
|
||||||
return _json.loads(s, **kwargs)
|
return _json.loads(s, **kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -50,15 +50,17 @@ class JSONMixin(object):
|
||||||
return self.get_data(cache=cache)
|
return self.get_data(cache=cache)
|
||||||
|
|
||||||
def get_json(self, force=False, silent=False, cache=True):
|
def get_json(self, force=False, silent=False, cache=True):
|
||||||
"""Parse and return the data as JSON. If the mimetype does not indicate
|
"""Parse and return the data as JSON. If the mimetype does not
|
||||||
JSON (:mimetype:`application/json`, see :meth:`is_json`), this returns
|
indicate JSON (:mimetype:`application/json`, see
|
||||||
``None`` unless ``force`` is true. If parsing fails,
|
:meth:`is_json`), this returns ``None`` unless ``force`` is
|
||||||
:meth:`on_json_loading_failed` is called and its return value is used
|
true. If parsing fails, :meth:`on_json_loading_failed` is called
|
||||||
as the return value.
|
and its return value is used as the return value.
|
||||||
|
|
||||||
:param force: Ignore the mimetype and always try to parse JSON.
|
:param force: Ignore the mimetype and always try to parse JSON.
|
||||||
:param silent: Silence parsing errors and return ``None`` instead.
|
:param silent: Silence parsing errors and return ``None``
|
||||||
:param cache: Store the parsed JSON to return for subsequent calls.
|
instead.
|
||||||
|
:param cache: Store the parsed JSON to return for subsequent
|
||||||
|
calls.
|
||||||
"""
|
"""
|
||||||
if cache and self._cached_json[silent] is not Ellipsis:
|
if cache and self._cached_json[silent] is not Ellipsis:
|
||||||
return self._cached_json[silent]
|
return self._cached_json[silent]
|
||||||
|
|
@ -66,14 +68,10 @@ class JSONMixin(object):
|
||||||
if not (force or self.is_json):
|
if not (force or self.is_json):
|
||||||
return None
|
return None
|
||||||
|
|
||||||
# We accept MIME charset against the specification as certain clients
|
data = self._get_data_for_json(cache=cache)
|
||||||
# have used this in the past. For responses, we assume that if the
|
|
||||||
# charset is set then the data has been encoded correctly as well.
|
|
||||||
charset = self.mimetype_params.get('charset')
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
data = self._get_data_for_json(cache=cache)
|
rv = json.loads(data)
|
||||||
rv = json.loads(data, encoding=charset)
|
|
||||||
except ValueError as e:
|
except ValueError as e:
|
||||||
if silent:
|
if silent:
|
||||||
rv = None
|
rv = None
|
||||||
|
|
|
||||||
|
|
@ -16,10 +16,13 @@ import uuid
|
||||||
import pytest
|
import pytest
|
||||||
from werkzeug.datastructures import Range
|
from werkzeug.datastructures import Range
|
||||||
from werkzeug.exceptions import BadRequest, NotFound
|
from werkzeug.exceptions import BadRequest, NotFound
|
||||||
from werkzeug.http import http_date, parse_cache_control_header, \
|
from werkzeug.http import (
|
||||||
|
http_date, parse_cache_control_header,
|
||||||
parse_options_header
|
parse_options_header
|
||||||
|
)
|
||||||
|
|
||||||
import flask
|
import flask
|
||||||
|
from flask import json
|
||||||
from flask._compat import StringIO, text_type
|
from flask._compat import StringIO, text_type
|
||||||
from flask.helpers import get_debug_flag, get_env
|
from flask.helpers import get_debug_flag, get_env
|
||||||
|
|
||||||
|
|
@ -55,6 +58,21 @@ class FixedOffset(datetime.tzinfo):
|
||||||
|
|
||||||
|
|
||||||
class TestJSON(object):
|
class TestJSON(object):
|
||||||
|
@pytest.mark.parametrize('value', (
|
||||||
|
1, 't', True, False, None,
|
||||||
|
[], [1, 2, 3],
|
||||||
|
{}, {'foo': u'🐍'},
|
||||||
|
))
|
||||||
|
@pytest.mark.parametrize('encoding', (
|
||||||
|
'utf-8', 'utf-8-sig',
|
||||||
|
'utf-16-le', 'utf-16-be', 'utf-16',
|
||||||
|
'utf-32-le', 'utf-32-be', 'utf-32',
|
||||||
|
))
|
||||||
|
def test_detect_encoding(self, value, encoding):
|
||||||
|
data = json.dumps(value).encode(encoding)
|
||||||
|
assert json.detect_encoding(data) == encoding
|
||||||
|
assert json.loads(data) == value
|
||||||
|
|
||||||
def test_ignore_cached_json(self, app):
|
def test_ignore_cached_json(self, app):
|
||||||
with app.test_request_context('/', method='POST', data='malformed',
|
with app.test_request_context('/', method='POST', data='malformed',
|
||||||
content_type='application/json'):
|
content_type='application/json'):
|
||||||
|
|
@ -121,16 +139,6 @@ class TestJSON(object):
|
||||||
rv = client.post('/json', data='"foo"', content_type='application/x+json')
|
rv = client.post('/json', data='"foo"', content_type='application/x+json')
|
||||||
assert rv.data == b'foo'
|
assert rv.data == b'foo'
|
||||||
|
|
||||||
def test_json_body_encoding(self, app, client):
|
|
||||||
|
|
||||||
@app.route('/')
|
|
||||||
def index():
|
|
||||||
return flask.request.get_json()
|
|
||||||
|
|
||||||
resp = client.get('/', data=u'"Hällo Wörld"'.encode('iso-8859-15'),
|
|
||||||
content_type='application/json; charset=iso-8859-15')
|
|
||||||
assert resp.data == u'Hällo Wörld'.encode('utf-8')
|
|
||||||
|
|
||||||
@pytest.mark.parametrize('test_value,expected', [(True, '"\\u2603"'), (False, u'"\u2603"')])
|
@pytest.mark.parametrize('test_value,expected', [(True, '"\\u2603"'), (False, u'"\u2603"')])
|
||||||
def test_json_as_unicode(self, test_value, expected, app, app_ctx):
|
def test_json_as_unicode(self, test_value, expected, app, app_ctx):
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue