Commit 68382246 authored by Denis Laxalde's avatar Denis Laxalde
Browse files

Prevent UnicodeEncodeError in serialize_links()

When non-ASCII characters are present in "links" data, the previous
code (as it used bytes for string formatting) would try to encode this
data and may thus fail with a UnicodeEncodeError. To prevent this, we
use unicode string when formatting links parameters (i.e. add a u''
prefix). Nevertheless, we must still return a "native" string for
validity with WSGI specification (this is either bytes on Python 2 or
unicode string on Python 3):

Added regression tests accordingly.
parent a0ef5166e34c
......@@ -18,6 +18,7 @@
import re
import six
from webob.multidict import MultiDict
......@@ -30,11 +31,17 @@ def serialize_links(link_dict):
for link in links:
link['rel'] = rel
href = link.pop('href')
params = '; '.join('{}="{}"'.format(k, v)
params = u'; '.join(u'{}="{}"'.format(k, v)
for k, v in sorted(link.items()))
ldos.append({"href": href, "params": params})
ldos.sort(key=lambda ldo: (ldo["href"], ldo["params"]))
return ', '.join('<{href}>; {params}'.format(**ldo) for ldo in ldos)
header = u', '.join(u'<{href}>; {params}'.format(**ldo) for ldo in ldos)
# Return a "native" string per WSGI specification (i.e. bytes on Python2
# but unicode on Python 3):
if six.PY2:
header = header.encode('latin1')
return header
def _split_unquotted(delimiter, string):
......@@ -29,11 +29,11 @@ class LinkHeaderTest(TestCase):
def test_serialize(self):
links = MultiDict()
links.add('describedby', {'href': 'LINK1'})
links.add('describedby', {'href': 'LINK2'})
links.add('describedby', {'href': 'LINK2', 'title': u'\xe9'})
links['up'] = {'href': 'LINK3'}
expected = ('<LINK1>; rel="describedby", '
'<LINK2>; rel="describedby", '
'<LINK2>; rel="describedby"; title="\xe9", '
'<LINK3>; rel="up"')
self.assertEqual(serialize_links(links), expected)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment