Skip to content

Commit e91581f

Browse files
authored
Merge pull request #64 from methane/speedup-pep393
Use PEP 393 new APIs
2 parents cf009f7 + 3fe4a11 commit e91581f

3 files changed

Lines changed: 228 additions & 7 deletions

File tree

bench/runbench.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
"""
33
Runs the benchmarks
44
"""
5+
from __future__ import print_function
56
import sys
67
import os
78
import re
@@ -30,13 +31,13 @@ def run_bench(name):
3031

3132

3233
def main():
33-
print '=' * 80
34-
print 'Running benchmark for MarkupSafe'
35-
print '-' * 80
34+
print('=' * 80)
35+
print('Running benchmark for MarkupSafe')
36+
print('-' * 80)
3637
os.chdir(bench_directory)
3738
for bench in list_benchmarks():
3839
run_bench(bench)
39-
print '-' * 80
40+
print('-' * 80)
4041

4142

4243
if __name__ == '__main__':

markupsafe/_speedups.c

Lines changed: 188 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,20 +8,24 @@
88
* :copyright: © 2010 by the Pallets team.
99
* :license: BSD, see LICENSE for more details.
1010
*/
11-
1211
#include <Python.h>
1312

13+
#if PY_MAJOR_VERSION < 3
1414
#define ESCAPED_CHARS_TABLE_SIZE 63
1515
#define UNICHR(x) (PyUnicode_AS_UNICODE((PyUnicodeObject*)PyUnicode_DecodeASCII(x, strlen(x), NULL)));
1616

17-
static PyObject* markup;
1817
static Py_ssize_t escaped_chars_delta_len[ESCAPED_CHARS_TABLE_SIZE];
1918
static Py_UNICODE *escaped_chars_repl[ESCAPED_CHARS_TABLE_SIZE];
19+
#endif
20+
21+
static PyObject* markup;
2022

2123
static int
2224
init_constants(void)
2325
{
2426
PyObject *module;
27+
28+
#if PY_MAJOR_VERSION < 3
2529
/* mapping of characters to replace */
2630
escaped_chars_repl['"'] = UNICHR("&#34;");
2731
escaped_chars_repl['\''] = UNICHR("&#39;");
@@ -34,6 +38,7 @@ init_constants(void)
3438
escaped_chars_delta_len['"'] = escaped_chars_delta_len['\''] = \
3539
escaped_chars_delta_len['&'] = 4;
3640
escaped_chars_delta_len['<'] = escaped_chars_delta_len['>'] = 3;
41+
#endif
3742

3843
/* import markup type so that we can mark the return value */
3944
module = PyImport_ImportModule("markupsafe");
@@ -45,6 +50,7 @@ init_constants(void)
4550
return 1;
4651
}
4752

53+
#if PY_MAJOR_VERSION < 3
4854
static PyObject*
4955
escape_unicode(PyUnicodeObject *in)
5056
{
@@ -105,13 +111,192 @@ escape_unicode(PyUnicodeObject *in)
105111

106112
return (PyObject*)out;
107113
}
114+
#else /* PY_MAJOR_VERSION < 3 */
115+
116+
#define GET_DELTA(inp, inp_end, delta) \
117+
while (inp < inp_end) { \
118+
switch (*inp++) { \
119+
case '"': \
120+
case '\'': \
121+
case '&': \
122+
delta += 4; \
123+
break; \
124+
case '<': \
125+
case '>': \
126+
delta += 3; \
127+
break; \
128+
} \
129+
}
108130

131+
#define DO_ESCAPE(inp, inp_end, outp) \
132+
{ \
133+
Py_ssize_t ncopy = 0; \
134+
while (inp < inp_end) { \
135+
switch (*inp) { \
136+
case '"': \
137+
memcpy(outp, inp-ncopy, sizeof(*outp)*ncopy); \
138+
outp += ncopy; ncopy = 0; \
139+
*outp++ = '&'; \
140+
*outp++ = '#'; \
141+
*outp++ = '3'; \
142+
*outp++ = '4'; \
143+
*outp++ = ';'; \
144+
break; \
145+
case '\'': \
146+
memcpy(outp, inp-ncopy, sizeof(*outp)*ncopy); \
147+
outp += ncopy; ncopy = 0; \
148+
*outp++ = '&'; \
149+
*outp++ = '#'; \
150+
*outp++ = '3'; \
151+
*outp++ = '9'; \
152+
*outp++ = ';'; \
153+
break; \
154+
case '&': \
155+
memcpy(outp, inp-ncopy, sizeof(*outp)*ncopy); \
156+
outp += ncopy; ncopy = 0; \
157+
*outp++ = '&'; \
158+
*outp++ = 'a'; \
159+
*outp++ = 'm'; \
160+
*outp++ = 'p'; \
161+
*outp++ = ';'; \
162+
break; \
163+
case '<': \
164+
memcpy(outp, inp-ncopy, sizeof(*outp)*ncopy); \
165+
outp += ncopy; ncopy = 0; \
166+
*outp++ = '&'; \
167+
*outp++ = 'l'; \
168+
*outp++ = 't'; \
169+
*outp++ = ';'; \
170+
break; \
171+
case '>': \
172+
memcpy(outp, inp-ncopy, sizeof(*outp)*ncopy); \
173+
outp += ncopy; ncopy = 0; \
174+
*outp++ = '&'; \
175+
*outp++ = 'g'; \
176+
*outp++ = 't'; \
177+
*outp++ = ';'; \
178+
break; \
179+
default: \
180+
ncopy++; \
181+
} \
182+
inp++; \
183+
} \
184+
memcpy(outp, inp-ncopy, sizeof(*outp)*ncopy); \
185+
}
186+
187+
static PyObject*
188+
escape_unicode_kind1(PyUnicodeObject *in)
189+
{
190+
Py_UCS1 *inp = PyUnicode_1BYTE_DATA(in);
191+
Py_UCS1 *inp_end = inp + PyUnicode_GET_LENGTH(in);
192+
Py_UCS1 *outp;
193+
PyObject *out;
194+
Py_ssize_t delta = 0;
195+
196+
GET_DELTA(inp, inp_end, delta);
197+
if (!delta) {
198+
Py_INCREF(in);
199+
return (PyObject*)in;
200+
}
201+
202+
out = PyUnicode_New(PyUnicode_GET_LENGTH(in) + delta,
203+
PyUnicode_IS_ASCII(in) ? 127 : 255);
204+
if (!out)
205+
return NULL;
206+
207+
inp = PyUnicode_1BYTE_DATA(in);
208+
outp = PyUnicode_1BYTE_DATA(out);
209+
DO_ESCAPE(inp, inp_end, outp);
210+
return out;
211+
}
212+
213+
static PyObject*
214+
escape_unicode_kind2(PyUnicodeObject *in)
215+
{
216+
Py_UCS2 *inp = PyUnicode_2BYTE_DATA(in);
217+
Py_UCS2 *inp_end = inp + PyUnicode_GET_LENGTH(in);
218+
Py_UCS2 *outp;
219+
PyObject *out;
220+
Py_ssize_t delta = 0;
221+
222+
GET_DELTA(inp, inp_end, delta);
223+
if (!delta) {
224+
Py_INCREF(in);
225+
return (PyObject*)in;
226+
}
227+
228+
out = PyUnicode_New(PyUnicode_GET_LENGTH(in) + delta, 65535);
229+
if (!out)
230+
return NULL;
231+
232+
inp = PyUnicode_2BYTE_DATA(in);
233+
outp = PyUnicode_2BYTE_DATA(out);
234+
DO_ESCAPE(inp, inp_end, outp);
235+
return out;
236+
}
237+
238+
239+
static PyObject*
240+
escape_unicode_kind4(PyUnicodeObject *in)
241+
{
242+
Py_UCS4 *inp = PyUnicode_4BYTE_DATA(in);
243+
Py_UCS4 *inp_end = inp + PyUnicode_GET_LENGTH(in);
244+
Py_UCS4 *outp;
245+
PyObject *out;
246+
Py_ssize_t delta = 0;
247+
248+
GET_DELTA(inp, inp_end, delta);
249+
if (!delta) {
250+
Py_INCREF(in);
251+
return (PyObject*)in;
252+
}
253+
254+
out = PyUnicode_New(PyUnicode_GET_LENGTH(in) + delta, 1114111);
255+
if (!out)
256+
return NULL;
257+
258+
inp = PyUnicode_4BYTE_DATA(in);
259+
outp = PyUnicode_4BYTE_DATA(out);
260+
DO_ESCAPE(inp, inp_end, outp);
261+
return out;
262+
}
263+
264+
static PyObject*
265+
escape_unicode(PyUnicodeObject *in)
266+
{
267+
if (PyUnicode_READY(in))
268+
return NULL;
269+
270+
switch (PyUnicode_KIND(in)) {
271+
case PyUnicode_1BYTE_KIND:
272+
return escape_unicode_kind1(in);
273+
case PyUnicode_2BYTE_KIND:
274+
return escape_unicode_kind2(in);
275+
case PyUnicode_4BYTE_KIND:
276+
return escape_unicode_kind4(in);
277+
}
278+
assert(0); /* shouldn't happen */
279+
return NULL;
280+
}
281+
#endif /* PY_MAJOR_VERSION < 3 */
109282

110283
static PyObject*
111284
escape(PyObject *self, PyObject *text)
112285
{
286+
static PyObject *id_html;
113287
PyObject *s = NULL, *rv = NULL, *html;
114288

289+
if (id_html == NULL) {
290+
#if PY_MAJOR_VERSION < 3
291+
id_html = PyString_InternFromString("__html__");
292+
#else
293+
id_html = PyUnicode_InternFromString("__html__");
294+
#endif
295+
if (id_html == NULL) {
296+
return NULL;
297+
}
298+
}
299+
115300
/* we don't have to escape integers, bools or floats */
116301
if (PyLong_CheckExact(text) ||
117302
#if PY_MAJOR_VERSION < 3
@@ -122,7 +307,7 @@ escape(PyObject *self, PyObject *text)
122307
return PyObject_CallFunctionObjArgs(markup, text, NULL);
123308

124309
/* if the object has an __html__ method that performs the escaping */
125-
html = PyObject_GetAttrString(text, "__html__");
310+
html = PyObject_GetAttr(text ,id_html);
126311
if (html) {
127312
s = PyObject_CallObject(html, NULL);
128313
Py_DECREF(html);

tests/test_escape.py

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
# -*- coding: utf-8 -*-
2+
import pytest
3+
4+
from markupsafe import Markup, _native
5+
6+
try:
7+
from markupsafe import _speedups
8+
except ImportError:
9+
_speedups = None
10+
11+
12+
@pytest.mark.parametrize('mod', (
13+
_native,
14+
pytest.param(_speedups, marks=pytest.mark.skipif(
15+
_speedups is None, reason='speedups unavailable')),
16+
))
17+
@pytest.mark.parametrize(('value', 'expect'), (
18+
# empty
19+
(u'', u''),
20+
# ascii
21+
(u'abcd&><\'"efgh', u'abcd&amp;&gt;&lt;&#39;&#34;efgh'),
22+
(u'&><\'"efgh', u'&amp;&gt;&lt;&#39;&#34;efgh'),
23+
(u'abcd&><\'"', u'abcd&amp;&gt;&lt;&#39;&#34;'),
24+
# 2 byte
25+
(u'こんにちは&><\'"こんばんは',
26+
u'こんにちは&amp;&gt;&lt;&#39;&#34;こんばんは'),
27+
(u'&><\'"こんばんは', u'&amp;&gt;&lt;&#39;&#34;こんばんは'),
28+
(u'こんにちは&><\'"', u'こんにちは&amp;&gt;&lt;&#39;&#34;'),
29+
# 4 byte
30+
(u'\U0001F363\U0001F362&><\'"\U0001F37A xyz', u'\U0001F363\U0001F362&amp;&gt;&lt;&#39;&#34;\U0001F37A xyz'),
31+
(u'&><\'"\U0001F37A xyz', u'&amp;&gt;&lt;&#39;&#34;\U0001F37A xyz'),
32+
(u'\U0001F363\U0001F362&><\'"', u'\U0001F363\U0001F362&amp;&gt;&lt;&#39;&#34;'),
33+
))
34+
def test_escape(mod, value, expect):
35+
assert mod.escape(value) == Markup(expect)

0 commit comments

Comments
 (0)