detect UTF encodings when loading json
This commit is contained in:
parent
0cbe698958
commit
ab4142215d
4 changed files with 82 additions and 26 deletions
|
|
@ -6,7 +6,7 @@ flask.json
|
|||
:copyright: © 2010 by the Pallets team.
|
||||
:license: BSD, see LICENSE for more details.
|
||||
"""
|
||||
|
||||
import codecs
|
||||
import io
|
||||
import uuid
|
||||
from datetime import date, datetime
|
||||
|
|
@ -121,6 +121,49 @@ def _load_arg_defaults(kwargs):
|
|||
kwargs.setdefault('cls', JSONDecoder)
|
||||
|
||||
|
||||
def detect_encoding(data):
|
||||
"""Detect which UTF codec was used to encode the given bytes.
|
||||
|
||||
The latest JSON standard (:rfc:`8259`) suggests that only UTF-8 is
|
||||
accepted. Older documents allowed 8, 16, or 32. 16 and 32 can be big
|
||||
or little endian. Some editors or libraries may prepend a BOM.
|
||||
|
||||
:param data: Bytes in unknown UTF encoding.
|
||||
:return: UTF encoding name
|
||||
"""
|
||||
head = data[:4]
|
||||
|
||||
if head[:3] == codecs.BOM_UTF8:
|
||||
return 'utf-8-sig'
|
||||
|
||||
if b'\x00' not in head:
|
||||
return 'utf-8'
|
||||
|
||||
if head in (codecs.BOM_UTF32_BE, codecs.BOM_UTF32_LE):
|
||||
return 'utf-32'
|
||||
|
||||
if head[:2] in (codecs.BOM_UTF16_BE, codecs.BOM_UTF16_LE):
|
||||
return 'utf-16'
|
||||
|
||||
if len(head) == 4:
|
||||
if head[:3] == b'\x00\x00\x00':
|
||||
return 'utf-32-be'
|
||||
|
||||
if head[::2] == b'\x00\x00':
|
||||
return 'utf-16-be'
|
||||
|
||||
if head[1:] == b'\x00\x00\x00':
|
||||
return 'utf-32-le'
|
||||
|
||||
if head[1::2] == b'\x00\x00':
|
||||
return 'utf-16-le'
|
||||
|
||||
if len(head) == 2:
|
||||
return 'utf-16-be' if head.startswith(b'\x00') else 'utf-16-le'
|
||||
|
||||
return 'utf-8'
|
||||
|
||||
|
||||
def dumps(obj, **kwargs):
|
||||
"""Serialize ``obj`` to a JSON formatted ``str`` by using the application's
|
||||
configured encoder (:attr:`~flask.Flask.json_encoder`) if there is an
|
||||
|
|
@ -155,7 +198,10 @@ def loads(s, **kwargs):
|
|||
"""
|
||||
_load_arg_defaults(kwargs)
|
||||
if isinstance(s, bytes):
|
||||
s = s.decode(kwargs.pop('encoding', None) or 'utf-8')
|
||||
encoding = kwargs.pop('encoding', None)
|
||||
if encoding is None:
|
||||
encoding = detect_encoding(s)
|
||||
s = s.decode(encoding)
|
||||
return _json.loads(s, **kwargs)
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -50,15 +50,17 @@ class JSONMixin(object):
|
|||
return self.get_data(cache=cache)
|
||||
|
||||
def get_json(self, force=False, silent=False, cache=True):
|
||||
"""Parse and return the data as JSON. If the mimetype does not indicate
|
||||
JSON (:mimetype:`application/json`, see :meth:`is_json`), this returns
|
||||
``None`` unless ``force`` is true. If parsing fails,
|
||||
:meth:`on_json_loading_failed` is called and its return value is used
|
||||
as the return value.
|
||||
"""Parse and return the data as JSON. If the mimetype does not
|
||||
indicate JSON (:mimetype:`application/json`, see
|
||||
:meth:`is_json`), this returns ``None`` unless ``force`` is
|
||||
true. If parsing fails, :meth:`on_json_loading_failed` is called
|
||||
and its return value is used as the return value.
|
||||
|
||||
:param force: Ignore the mimetype and always try to parse JSON.
|
||||
:param silent: Silence parsing errors and return ``None`` instead.
|
||||
:param cache: Store the parsed JSON to return for subsequent calls.
|
||||
:param silent: Silence parsing errors and return ``None``
|
||||
instead.
|
||||
:param cache: Store the parsed JSON to return for subsequent
|
||||
calls.
|
||||
"""
|
||||
if cache and self._cached_json[silent] is not Ellipsis:
|
||||
return self._cached_json[silent]
|
||||
|
|
@ -66,14 +68,10 @@ class JSONMixin(object):
|
|||
if not (force or self.is_json):
|
||||
return None
|
||||
|
||||
# We accept MIME charset against the specification as certain clients
|
||||
# have used this in the past. For responses, we assume that if the
|
||||
# charset is set then the data has been encoded correctly as well.
|
||||
charset = self.mimetype_params.get('charset')
|
||||
data = self._get_data_for_json(cache=cache)
|
||||
|
||||
try:
|
||||
data = self._get_data_for_json(cache=cache)
|
||||
rv = json.loads(data, encoding=charset)
|
||||
rv = json.loads(data)
|
||||
except ValueError as e:
|
||||
if silent:
|
||||
rv = None
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue