Commit 68382246 authored by Denis Laxalde's avatar Denis Laxalde
Browse files

Prevent UnicodeEncodeError in serialize_links()

When non-ASCII characters are present in "links" data, the previous
code (as it used bytes for string formatting) would try to encode this
data and may thus fail with a UnicodeEncodeError. To prevent this, we
use unicode string when formatting links parameters (i.e. add a u''
prefix). Nevertheless, we must still return a "native" string for
validity with WSGI specification (this is either bytes on Python 2 or
unicode string on Python 3):

  https://www.python.org/dev/peps/pep-3333/#a-note-on-string-types
  https://docs.pylonsproject.org/projects/webtest/en/latest/api.html#module-webtest.lint

Added regression tests accordingly.
parent a0ef5166e34c
...@@ -18,6 +18,7 @@ ...@@ -18,6 +18,7 @@
import re import re
import six
from webob.multidict import MultiDict from webob.multidict import MultiDict
...@@ -30,11 +31,17 @@ def serialize_links(link_dict): ...@@ -30,11 +31,17 @@ def serialize_links(link_dict):
for link in links: for link in links:
link['rel'] = rel link['rel'] = rel
href = link.pop('href') href = link.pop('href')
params = '; '.join('{}="{}"'.format(k, v) params = u'; '.join(u'{}="{}"'.format(k, v)
for k, v in sorted(link.items())) for k, v in sorted(link.items()))
ldos.append({"href": href, "params": params}) ldos.append({"href": href, "params": params})
ldos.sort(key=lambda ldo: (ldo["href"], ldo["params"])) ldos.sort(key=lambda ldo: (ldo["href"], ldo["params"]))
return ', '.join('<{href}>; {params}'.format(**ldo) for ldo in ldos) header = u', '.join(u'<{href}>; {params}'.format(**ldo) for ldo in ldos)
# Return a "native" string per WSGI specification (i.e. bytes on Python2
# but unicode on Python 3):
# https://www.python.org/dev/peps/pep-3333/#a-note-on-string-types
if six.PY2:
header = header.encode('latin1')
return header
def _split_unquotted(delimiter, string): def _split_unquotted(delimiter, string):
......
...@@ -29,11 +29,11 @@ class LinkHeaderTest(TestCase): ...@@ -29,11 +29,11 @@ class LinkHeaderTest(TestCase):
def test_serialize(self): def test_serialize(self):
links = MultiDict() links = MultiDict()
links.add('describedby', {'href': 'LINK1'}) links.add('describedby', {'href': 'LINK1'})
links.add('describedby', {'href': 'LINK2'}) links.add('describedby', {'href': 'LINK2', 'title': u'\xe9'})
links['up'] = {'href': 'LINK3'} links['up'] = {'href': 'LINK3'}
expected = ('<LINK1>; rel="describedby", ' expected = ('<LINK1>; rel="describedby", '
'<LINK2>; rel="describedby", ' '<LINK2>; rel="describedby"; title="\xe9", '
'<LINK3>; rel="up"') '<LINK3>; rel="up"')
self.assertEqual(serialize_links(links), expected) self.assertEqual(serialize_links(links), expected)
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment