diff --git a/.github/workflows/coding-standards.yml b/.github/workflows/coding-standards.yml
deleted file mode 100644
index 2cdfa1d..0000000
--- a/.github/workflows/coding-standards.yml
+++ /dev/null
@@ -1,35 +0,0 @@
-name: "CS"
-
-on:
- pull_request:
- push:
- branches:
- - master
-
-jobs:
- coding-standards:
- name: "CS Fixer"
- runs-on: "ubuntu-20.04"
-
- steps:
- - name: "Checkout"
- uses: "actions/checkout@v2"
-
- - name: "Install PHP"
- uses: "shivammathur/setup-php@v2"
- with:
- coverage: "none"
- php-version: "7.4"
- tools: cs2pr, pecl, composer:v2
- extensions: tidy
- ini-values: "date.timezone=Europe/Paris"
- env:
- COMPOSER_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-
- - name: "Install dependencies with Composer"
- uses: "ramsey/composer-install@v2"
- with:
- composer-options: "--optimize-autoloader --prefer-dist"
-
- - name: "RUN PHP CS Fixer"
- run: "php vendor/bin/php-cs-fixer fix --verbose --dry-run --format=checkstyle | cs2pr"
diff --git a/htmLawed.php b/htmLawed.php
index bedad6e..6676cff 100755
--- a/htmLawed.php
+++ b/htmLawed.php
@@ -1,1165 +1,1593 @@
+ * @copyright (c) 2007-, Santosh Patnaik
+ * @dependency None
+ * @license LGPL 3 and GPL 2+ dual license
+ * @link https://bioinformatics.org/phplabware/internal_utilities/htmLawed
+ * @package htmLawed
+ * @php >=4.4
+ * @time 2023-01-23
+ * @version 1.2.11
*/
-function htmLawed($t, $C = 1, $S = [])
+/*
+ * Main function.
+ * Calls all other functions (alphabetically ordered further below).
+ *
+ * @param string $t HTM.
+ * @param mixed $C $config configuration option.
+ * @param mixed $S $spec specification option.
+ * @return string Filtered/sanitized $t.
+ */
+function htmLawed($t, $C=1, $S=array())
{
- $C = is_array($C) ? $C : [];
- if (!empty($C['valid_xhtml'])) {
- $C['elements'] = empty($C['elements']) ? '*-acronym-big-center-dir-font-isindex-s-strike-tt' : $C['elements'];
- $C['make_tag_strict'] = isset($C['make_tag_strict']) ? $C['make_tag_strict'] : 2;
- $C['xml:lang'] = isset($C['xml:lang']) ? $C['xml:lang'] : 2;
- }
- // config eles
- $e = ['a' => 1, 'abbr' => 1, 'acronym' => 1, 'address' => 1, 'applet' => 1, 'area' => 1, 'article' => 1, 'aside' => 1, 'audio' => 1, 'b' => 1, 'bdi' => 1, 'bdo' => 1, 'big' => 1, 'blockquote' => 1, 'br' => 1, 'button' => 1, 'canvas' => 1, 'caption' => 1, 'center' => 1, 'cite' => 1, 'code' => 1, 'col' => 1, 'colgroup' => 1, 'command' => 1, 'data' => 1, 'datalist' => 1, 'dd' => 1, 'del' => 1, 'details' => 1, 'dialog' => 1, 'dfn' => 1, 'dir' => 1, 'div' => 1, 'dl' => 1, 'dt' => 1, 'em' => 1, 'embed' => 1, 'fieldset' => 1, 'figcaption' => 1, 'figure' => 1, 'font' => 1, 'footer' => 1, 'form' => 1, 'h1' => 1, 'h2' => 1, 'h3' => 1, 'h4' => 1, 'h5' => 1, 'h6' => 1, 'header' => 1, 'hgroup' => 1, 'hr' => 1, 'i' => 1, 'iframe' => 1, 'img' => 1, 'input' => 1, 'ins' => 1, 'isindex' => 1, 'kbd' => 1, 'keygen' => 1, 'label' => 1, 'legend' => 1, 'li' => 1, 'link' => 1, 'main' => 1, 'map' => 1, 'mark' => 1, 'menu' => 1, 'meta' => 1, 'meter' => 1, 'nav' => 1, 'noscript' => 1, 'object' => 1, 'ol' => 1, 'optgroup' => 1, 'option' => 1, 'output' => 1, 'p' => 1, 'param' => 1, 'picture' => 1, 'pre' => 1, 'progress' => 1, 'q' => 1, 'rb' => 1, 'rbc' => 1, 'rp' => 1, 'rt' => 1, 'rtc' => 1, 'ruby' => 1, 's' => 1, 'samp' => 1, 'script' => 1, 'section' => 1, 'select' => 1, 'slot' => 1, 'small' => 1, 'source' => 1, 'span' => 1, 'strike' => 1, 'strong' => 1, 'style' => 1, 'sub' => 1, 'summary' => 1, 'sup' => 1, 'table' => 1, 'tbody' => 1, 'td' => 1, 'template' => 1, 'textarea' => 1, 'tfoot' => 1, 'th' => 1, 'thead' => 1, 'time' => 1, 'tr' => 1, 'track' => 1, 'tt' => 1, 'u' => 1, 'ul' => 1, 'var' => 1, 'video' => 1, 'wbr' => 1]; // 122 incl. deprecated & some Ruby
+ // Standard elements including deprecated.
- if (!empty($C['safe'])) {
- unset($e['applet'], $e['audio'], $e['canvas'], $e['dialog'], $e['embed'], $e['iframe'], $e['object'], $e['script'], $e['video']);
- }
- $x = !empty($C['elements']) ? str_replace(["\n", "\r", "\t", ' '], '', strtolower($C['elements'])) : '*';
- if ('-*' === $x) {
- $e = [];
- } elseif (false === strpos($x, '*')) {
- $e = array_flip(explode(',', $x));
- } else {
- if (isset($x[1])) {
- if (strpos($x, '(')) {
- $x = preg_replace_callback('`\([^()]+\)`', function ($m) {return str_replace(['(', ')', '-'], ['', '', 'A'], $m[0]); }, $x);
- }
+ $eleAr = array('a'=>1, 'abbr'=>1, 'acronym'=>1, 'address'=>1, 'applet'=>1, 'area'=>1, 'article'=>1, 'aside'=>1, 'audio'=>1, 'b'=>1, 'bdi'=>1, 'bdo'=>1, 'big'=>1, 'blockquote'=>1, 'br'=>1, 'button'=>1, 'canvas'=>1, 'caption'=>1, 'center'=>1, 'cite'=>1, 'code'=>1, 'col'=>1, 'colgroup'=>1, 'command'=>1, 'data'=>1, 'datalist'=>1, 'dd'=>1, 'del'=>1, 'details'=>1, 'dialog'=>1, 'dfn'=>1, 'dir'=>1, 'div'=>1, 'dl'=>1, 'dt'=>1, 'em'=>1, 'embed'=>1, 'fieldset'=>1, 'figcaption'=>1, 'figure'=>1, 'font'=>1, 'footer'=>1, 'form'=>1, 'h1'=>1, 'h2'=>1, 'h3'=>1, 'h4'=>1, 'h5'=>1, 'h6'=>1, 'header'=>1, 'hgroup'=>1, 'hr'=>1, 'i'=>1, 'iframe'=>1, 'img'=>1, 'input'=>1, 'ins'=>1, 'isindex'=>1, 'kbd'=>1, 'keygen'=>1, 'label'=>1, 'legend'=>1, 'li'=>1, 'link'=>1, 'main'=>1, 'map'=>1, 'mark'=>1, 'menu'=>1, 'meta'=>1, 'meter'=>1, 'nav'=>1, 'noscript'=>1, 'object'=>1, 'ol'=>1, 'optgroup'=>1, 'option'=>1, 'output'=>1, 'p'=>1, 'param'=>1, 'picture'=>1, 'pre'=>1, 'progress'=>1, 'q'=>1, 'rb'=>1, 'rbc'=>1, 'rp'=>1, 'rt'=>1, 'rtc'=>1, 'ruby'=>1, 's'=>1, 'samp'=>1, 'script'=>1, 'section'=>1, 'select'=>1, 'slot'=>1, 'small'=>1, 'source'=>1, 'span'=>1, 'strike'=>1, 'strong'=>1, 'style'=>1, 'sub'=>1, 'summary'=>1, 'sup'=>1, 'table'=>1, 'tbody'=>1, 'td'=>1, 'template'=>1, 'textarea'=>1, 'tfoot'=>1, 'th'=>1, 'thead'=>1, 'time'=>1, 'tr'=>1, 'track'=>1, 'tt'=>1, 'u'=>1, 'ul'=>1, 'var'=>1, 'video'=>1, 'wbr'=>1);
- preg_match_all('`(?:^|-|\+)[^\-+]+?(?=-|\+|$)`', $x, $m, \PREG_SET_ORDER);
- for ($i = count($m); --$i >= 0;) {
- $m[$i] = $m[$i][0];
- }
- foreach ($m as $v) {
- $v = str_replace('A', '-', $v);
- if ('+' === $v[0]) {
- $e[substr($v, 1)] = 1;
- } elseif ('-' === $v[0]) {
- if (strpos($v, '-', 1)) {
- $e[$v] = 1;
- } elseif (isset($e[($v = substr($v, 1))]) && !in_array('+' . $v, $m, true)) {
- unset($e[$v]);
- }
- }
- }
- }
- }
- $C['elements'] = &$e;
- // config attrs
- $x = !empty($C['deny_attribute']) ? strtolower(preg_replace('"\s+-"', '/', trim($C['deny_attribute']))) : '';
- $x = array_flip((isset($x[0]) && '*' === $x[0]) ? explode('/', $x) : explode(',', $x . (!empty($C['safe']) ? ',on*' : '')));
- $C['deny_attribute'] = $x;
- // config URLs
- $x = (isset($C['schemes'][2]) && strpos($C['schemes'], ':')) ? strtolower($C['schemes']) : 'href: aim, feed, file, ftp, gopher, http, https, irc, mailto, news, nntp, sftp, ssh, tel, telnet' . (empty($C['safe']) ? ', app, javascript; *: data, javascript, ' : '; *:') . 'file, http, https';
- $C['schemes'] = [];
- foreach (explode(';', trim(str_replace([' ', "\t", "\r", "\n"], '', $x), ';')) as $v) {
- $x = $x2 = null;
- list($x, $x2) = explode(':', $v, 2);
- if ($x2) {
- $C['schemes'][$x] = array_flip(explode(',', $x2));
- }
- }
- if (!isset($C['schemes']['*'])) {
- $C['schemes']['*'] = ['file' => 1, 'http' => 1, 'https' => 1];
- if (empty($C['safe'])) {
- $C['schemes']['*'] += ['data' => 1, 'javascript' => 1];
- }
- }
- if (!empty($C['safe']) && empty($C['schemes']['style'])) {
- $C['schemes']['style'] = ['!' => 1];
- }
- $C['abs_url'] = isset($C['abs_url']) ? $C['abs_url'] : 0;
- if (!isset($C['base_url']) || !preg_match('`^[a-zA-Z\d.+\-]+://[^/]+/(.+?/)?$`', $C['base_url'])) {
- $C['base_url'] = $C['abs_url'] = 0;
- }
- // config rest
- $C['and_mark'] = empty($C['and_mark']) ? 0 : 1;
- $C['anti_link_spam'] = (isset($C['anti_link_spam']) && is_array($C['anti_link_spam']) && 2 === count($C['anti_link_spam']) && (empty($C['anti_link_spam'][0]) || hl_regex($C['anti_link_spam'][0])) && (empty($C['anti_link_spam'][1]) || hl_regex($C['anti_link_spam'][1]))) ? $C['anti_link_spam'] : 0;
- $C['anti_mail_spam'] = isset($C['anti_mail_spam']) ? $C['anti_mail_spam'] : 0;
- $C['any_custom_element'] = (!isset($C['any_custom_element']) || !empty($C['any_custom_element'])) ? 1 : 0;
- $C['balance'] = isset($C['balance']) ? (bool) $C['balance'] : 1;
- $C['cdata'] = isset($C['cdata']) ? $C['cdata'] : (empty($C['safe']) ? 3 : 0);
- $C['clean_ms_char'] = empty($C['clean_ms_char']) ? 0 : $C['clean_ms_char'];
- $C['comment'] = isset($C['comment']) ? $C['comment'] : (empty($C['safe']) ? 3 : 0);
- $C['css_expression'] = empty($C['css_expression']) ? 0 : 1;
- $C['direct_list_nest'] = empty($C['direct_list_nest']) ? 0 : 1;
- $C['hexdec_entity'] = isset($C['hexdec_entity']) ? $C['hexdec_entity'] : 1;
- $C['hook'] = (!empty($C['hook']) && function_exists($C['hook'])) ? $C['hook'] : 0;
- $C['hook_tag'] = (!empty($C['hook_tag']) && function_exists($C['hook_tag'])) ? $C['hook_tag'] : 0;
- $C['keep_bad'] = isset($C['keep_bad']) ? $C['keep_bad'] : 6;
- $C['lc_std_val'] = isset($C['lc_std_val']) ? (bool) $C['lc_std_val'] : 1;
- $C['make_tag_strict'] = isset($C['make_tag_strict']) ? $C['make_tag_strict'] : 1;
- $C['named_entity'] = isset($C['named_entity']) ? (bool) $C['named_entity'] : 1;
- $C['no_deprecated_attr'] = isset($C['no_deprecated_attr']) ? $C['no_deprecated_attr'] : 1;
- $C['parent'] = isset($C['parent'][0]) ? strtolower($C['parent']) : 'body';
- $C['show_setting'] = !empty($C['show_setting']) ? $C['show_setting'] : 0;
- $C['style_pass'] = empty($C['style_pass']) ? 0 : 1;
- $C['tidy'] = empty($C['tidy']) ? 0 : $C['tidy'];
- $C['unique_ids'] = isset($C['unique_ids']) && (!preg_match('`\W`', $C['unique_ids'])) ? $C['unique_ids'] : 1;
- $C['xml:lang'] = isset($C['xml:lang']) ? $C['xml:lang'] : 0;
-
- if (isset($GLOBALS['C'])) {
- $reC = $GLOBALS['C'];
- }
- $GLOBALS['C'] = $C;
- $S = is_array($S) ? $S : hl_spec($S);
- if (isset($GLOBALS['S'])) {
- $reS = $GLOBALS['S'];
- }
- $GLOBALS['S'] = $S;
+ // Set $C array ($config), using default parameters as needed.
- $t = preg_replace('`[\x00-\x08\x0b-\x0c\x0e-\x1f]`', '', $t);
- if ($C['clean_ms_char']) {
- $x = ["\x7f" => '', "\x80" => '€', "\x81" => '', "\x83" => 'ƒ', "\x85" => '…', "\x86" => '†', "\x87" => '‡', "\x88" => 'ˆ', "\x89" => '‰', "\x8a" => 'Š', "\x8b" => '‹', "\x8c" => 'Œ', "\x8d" => '', "\x8e" => 'Ž', "\x8f" => '', "\x90" => '', "\x95" => '•', "\x96" => '–', "\x97" => '—', "\x98" => '˜', "\x99" => '™', "\x9a" => 'š', "\x9b" => '›', "\x9c" => 'œ', "\x9d" => '', "\x9e" => 'ž', "\x9f" => 'Ÿ'];
- $x = $x + (1 === $C['clean_ms_char'] ? ["\x82" => '‚', "\x84" => '„', "\x91" => '‘', "\x92" => '’', "\x93" => '“', "\x94" => '”'] : ["\x82" => '\'', "\x84" => '"', "\x91" => '\'', "\x92" => '\'', "\x93" => '"', "\x94" => '"']);
- $t = strtr($t, $x);
- }
- if ($C['cdata'] || $C['comment']) {
- $t = preg_replace_callback('``sm', 'hl_cmtcd', $t);
- }
- $t = preg_replace_callback('`&([a-zA-Z][a-zA-Z0-9]{1,30}|#(?:[0-9]{1,8}|[Xx][0-9A-Fa-f]{1,7}));`', 'hl_ent', str_replace('&', '&', $t));
- if ($C['unique_ids'] && !isset($GLOBALS['hl_Ids'])) {
- $GLOBALS['hl_Ids'] = [];
- }
- if ($C['hook']) {
- $t = $C['hook']($t, $C, $S);
- }
- if ($C['show_setting'] && preg_match('`^[a-z][a-z0-9_]*$`i', $C['show_setting'])) {
- $GLOBALS[$C['show_setting']] = ['config' => $C, 'spec' => $S, 'time' => microtime()];
- }
- // main
- $t = preg_replace_callback('`<(?:(?:\s|$)|(?:[^>]*(?:>|$)))|>`m', 'hl_tag', $t);
- $t = $C['balance'] ? hl_bal($t, $C['keep_bad'], $C['parent']) : $t;
- $t = (($C['cdata'] || $C['comment']) && false !== strpos($t, "\x01")) ? str_replace(["\x01", "\x02", "\x03", "\x04", "\x05"], ['', '', '&', '<', '>'], $t) : $t;
- $t = $C['tidy'] ? hl_tidy($t, $C['tidy'], $C['parent']) : $t;
- unset($C, $e);
- if (isset($reC)) {
- $GLOBALS['C'] = $reC;
- }
- if (isset($reS)) {
- $GLOBALS['S'] = $reS;
- }
+ $C = is_array($C) ? $C : array();
+ if (!empty($C['valid_xhtml'])) {
+ $C['elements'] = empty($C['elements']) ? '*-acronym-big-center-dir-font-isindex-s-strike-tt' : $C['elements'];
+ $C['make_tag_strict'] = isset($C['make_tag_strict']) ? $C['make_tag_strict'] : 2;
+ $C['xml:lang'] = isset($C['xml:lang']) ? $C['xml:lang'] : 2;
+ }
- return $t;
-}
+ // -- Configure for elements.
-function hl_attrval($a, $t, $p)
-{
- // check attr val against $S
- static $ma = ['accesskey', 'class', 'itemtype', 'rel'];
- $s = in_array($a, $ma, true) ? ' ' : ('srcset' === $a ? ',' : '');
- $r = [];
- $t = !empty($s) ? explode($s, $t) : [$t];
- foreach ($t as $tk => $tv) {
- $o = 1;
- $tv = trim($tv);
- $l = strlen($tv);
- foreach ($p as $k => $v) {
- if (!$l) {
- continue;
- }
- switch ($k) {
- case 'maxlen':
- if ($l > $v) {
- $o = 0;
- }
- break;
- case 'minlen':
- if ($l < $v) {
- $o = 0;
- }
- break;
- case 'maxval':
- if ((float) ($tv) > $v) {
- $o = 0;
- }
- break;
- case 'minval':
- if ((float) ($tv) < $v) {
- $o = 0;
- }
- break;
- case 'match':
- if (!preg_match($v, $tv)) {
- $o = 0;
- }
- break;
- case 'nomatch':
- if (preg_match($v, $tv)) {
- $o = 0;
- }
- break;
- case 'oneof':
- $m = 0;
- foreach (explode('|', $v) as $n) {
- if ($tv === $n) {
- $m = 1;
- break;
- }
- }
- $o = $m;
- break;
- case 'noneof':
- $m = 1;
- foreach (explode('|', $v) as $n) {
- if ($tv === $n) {
- $m = 0;
- break;
- }
- }
- $o = $m;
- break;
- default:
- break;
- }
- if (!$o) {
- break;
- }
- }
- if ($o) {
- $r[] = $tv;
+ if (!empty($C['safe'])) {
+ unset($eleAr['applet'], $eleAr['audio'], $eleAr['canvas'], $eleAr['dialog'], $eleAr['embed'], $eleAr['iframe'], $eleAr['object'], $eleAr['script'], $eleAr['video']);
+ }
+ $x = !empty($C['elements']) ? str_replace(array("\n", "\r", "\t", ' '), '', strtolower($C['elements'])) : '*';
+ if ($x == '-*') {
+ $eleAr = array();
+ } elseif (strpos($x, '*') === false) {
+ $eleAr = array_flip(explode(',', $x));
+ } else {
+ if (isset($x[1])) {
+ if (strpos($x, '(')) { // Temporarily replace hyphen of custom element, minus being special character
+ $x =
+ preg_replace_callback(
+ '`\([^()]+\)`',
+ function ($m) {
+ return str_replace(array('(', ')', '-'), array('', '', 'A'), $m[0]);
+ },
+ $x);
+ }
+ preg_match_all('`(?:^|-|\+)[^\-+]+?(?=-|\+|$)`', $x, $m, PREG_SET_ORDER);
+ for ($i=count($m); --$i>=0;) {
+ $m[$i] = $m[$i][0];
+ }
+ foreach ($m as $v) {
+ $v = str_replace('A', '-', $v);
+ if ($v[0] == '+') {
+ $eleAr[substr($v, 1)] = 1;
+ } elseif ($v[0] == '-') {
+ if (strpos($v, '-', 1)) {
+ $eleAr[$v] = 1;
+ } elseif (isset($eleAr[($v = substr($v, 1))]) && !in_array('+'. $v, $m)) {
+ unset($eleAr[$v]);
+ }
}
+ }
}
- if (',' === $s) {
- $s = ', ';
- }
- $r = implode($s, $r);
+ }
+ $C['elements'] =& $eleAr;
- return isset($r[0]) ? $r : (isset($p['default']) ? $p['default'] : 0);
-}
+ // -- Configure for attributes.
-function hl_bal($t, $do = 1, $in = 'div')
-{
- // balance tags
- // by content
- $cB = ['blockquote' => 1, 'form' => 1, 'map' => 1, 'noscript' => 1]; // Block
- $cE = ['area' => 1, 'br' => 1, 'col' => 1, 'command' => 1, 'embed' => 1, 'hr' => 1, 'img' => 1, 'input' => 1, 'isindex' => 1, 'keygen' => 1, 'link' => 1, 'meta' => 1, 'param' => 1, 'source' => 1, 'track' => 1, 'wbr' => 1]; // Empty
- $cF = ['a' => 1, 'article' => 1, 'aside' => 1, 'audio' => 1, 'button' => 1, 'canvas' => 1, 'del' => 1, 'details' => 1, 'dialog' => 1, 'div' => 1, 'dd' => 1, 'fieldset' => 1, 'figure' => 1, 'footer' => 1, 'header' => 1, 'iframe' => 1, 'ins' => 1, 'li' => 1, 'main' => 1, 'menu' => 1, 'nav' => 1, 'noscript' => 1, 'object' => 1, 'section' => 1, 'slot' => 1, 'style' => 1, 'td' => 1, 'template' => 1, 'th' => 1, 'video' => 1]; // Flow; later context-wise dynamic move of ins & del to $cI
- $cI = ['abbr' => 1, 'acronym' => 1, 'address' => 1, 'b' => 1, 'bdi' => 1, 'bdo' => 1, 'big' => 1, 'caption' => 1, 'cite' => 1, 'code' => 1, 'data' => 1, 'datalist' => 1, 'dfn' => 1, 'dt' => 1, 'em' => 1, 'figcaption' => 1, 'font' => 1, 'h1' => 1, 'h2' => 1, 'h3' => 1, 'h4' => 1, 'h5' => 1, 'h6' => 1, 'hgroup' => 1, 'i' => 1, 'kbd' => 1, 'label' => 1, 'legend' => 1, 'mark' => 1, 'meter' => 1, 'output' => 1, 'p' => 1, 'picture' => 1, 'pre' => 1, 'progress' => 1, 'q' => 1, 'rb' => 1, 'rt' => 1, 's' => 1, 'samp' => 1, 'small' => 1, 'span' => 1, 'strike' => 1, 'strong' => 1, 'sub' => 1, 'summary' => 1, 'sup' => 1, 'time' => 1, 'tt' => 1, 'u' => 1, 'var' => 1]; // Inline
- $cN = ['a' => ['a' => 1, 'address' => 1, 'button' => 1, 'details' => 1, 'embed' => 1, 'keygen' => 1, 'label' => 1, 'select' => 1, 'textarea' => 1], 'address' => ['address' => 1, 'article' => 1, 'aside' => 1, 'header' => 1, 'keygen' => 1, 'footer' => 1, 'nav' => 1, 'section' => 1], 'button' => ['a' => 1, 'address' => 1, 'button' => 1, 'details' => 1, 'embed' => 1, 'fieldset' => 1, 'form' => 1, 'iframe' => 1, 'input' => 1, 'keygen' => 1, 'label' => 1, 'select' => 1, 'textarea' => 1], 'fieldset' => ['fieldset' => 1], 'footer' => ['header' => 1, 'footer' => 1], 'form' => ['form' => 1], 'header' => ['header' => 1, 'footer' => 1], 'label' => ['label' => 1], 'main' => ['main' => 1], 'meter' => ['meter' => 1], 'noscript' => ['script' => 1], 'pre' => ['big' => 1, 'font' => 1, 'img' => 1, 'object' => 1, 'script' => 1, 'small' => 1, 'sub' => 1, 'sup' => 1], 'progress' => ['progress' => 1], 'rb' => ['ruby' => 1], 'rt' => ['ruby' => 1], 'time' => ['time' => 1]]; // Illegal
- $cN2 = array_keys($cN);
- $cS = ['colgroup' => ['col' => 1], 'datalist' => ['option' => 1], 'dir' => ['li' => 1], 'dl' => ['dd' => 1, 'dt' => 1], 'hgroup' => ['h1' => 1, 'h2' => 1, 'h3' => 1, 'h4' => 1, 'h5' => 1, 'h6' => 1], 'menu' => ['li' => 1], 'ol' => ['li' => 1], 'optgroup' => ['option' => 1], 'option' => ['#pcdata' => 1], 'rbc' => ['rb' => 1], 'rp' => ['#pcdata' => 1], 'rtc' => ['rt' => 1], 'ruby' => ['rb' => 1, 'rbc' => 1, 'rp' => 1, 'rt' => 1, 'rtc' => 1, '#pcdata' => 1], 'select' => ['optgroup' => 1, 'option' => 1], 'script' => ['#pcdata' => 1], 'table' => ['caption' => 1, 'col' => 1, 'colgroup' => 1, 'tfoot' => 1, 'tbody' => 1, 'tr' => 1, 'thead' => 1], 'tbody' => ['tr' => 1], 'tfoot' => ['tr' => 1], 'textarea' => ['#pcdata' => 1], 'thead' => ['tr' => 1], 'tr' => ['td' => 1, 'th' => 1], 'ul' => ['li' => 1]]; // Specific - immediate parent-child
- if ($GLOBALS['C']['direct_list_nest']) {
- $cS['ol'] = $cS['ul'] = $cS['menu'] += ['menu' => 1, 'ol' => 1, 'ul' => 1];
- }
- $cO = ['address' => ['p' => 1], 'applet' => ['param' => 1], 'audio' => ['source' => 1, 'track' => 1], 'blockquote' => ['script' => 1], 'details' => ['summary' => 1], 'fieldset' => ['legend' => 1, '#pcdata' => 1], 'figure' => ['figcaption' => 1], 'form' => ['script' => 1], 'map' => ['area' => 1], 'object' => ['param' => 1, 'embed' => 1], 'video' => ['source' => 1, 'track' => 1]]; // Other
- $cT = ['colgroup' => 1, 'dd' => 1, 'dt' => 1, 'li' => 1, 'option' => 1, 'p' => 1, 'td' => 1, 'tfoot' => 1, 'th' => 1, 'thead' => 1, 'tr' => 1]; // Omitable closing
- // block/inline type; a/ins/del both type; #pcdata: text
- $eB = ['a' => 1, 'address' => 1, 'article' => 1, 'aside' => 1, 'blockquote' => 1, 'center' => 1, 'del' => 1, 'details' => 1, 'dialog' => 1, 'dir' => 1, 'dl' => 1, 'div' => 1, 'fieldset' => 1, 'figure' => 1, 'footer' => 1, 'form' => 1, 'ins' => 1, 'h1' => 1, 'h2' => 1, 'h3' => 1, 'h4' => 1, 'h5' => 1, 'h6' => 1, 'header' => 1, 'hr' => 1, 'isindex' => 1, 'main' => 1, 'menu' => 1, 'nav' => 1, 'noscript' => 1, 'ol' => 1, 'p' => 1, 'pre' => 1, 'section' => 1, 'slot' => 1, 'style' => 1, 'table' => 1, 'template' => 1, 'ul' => 1];
- $eI = ['#pcdata' => 1, 'a' => 1, 'abbr' => 1, 'acronym' => 1, 'applet' => 1, 'audio' => 1, 'b' => 1, 'bdi' => 1, 'bdo' => 1, 'big' => 1, 'br' => 1, 'button' => 1, 'canvas' => 1, 'cite' => 1, 'code' => 1, 'command' => 1, 'data' => 1, 'datalist' => 1, 'del' => 1, 'dfn' => 1, 'em' => 1, 'embed' => 1, 'figcaption' => 1, 'font' => 1, 'i' => 1, 'iframe' => 1, 'img' => 1, 'input' => 1, 'ins' => 1, 'kbd' => 1, 'label' => 1, 'link' => 1, 'map' => 1, 'mark' => 1, 'meta' => 1, 'meter' => 1, 'object' => 1, 'output' => 1, 'picture' => 1, 'progress' => 1, 'q' => 1, 'ruby' => 1, 's' => 1, 'samp' => 1, 'select' => 1, 'script' => 1, 'small' => 1, 'span' => 1, 'strike' => 1, 'strong' => 1, 'sub' => 1, 'summary' => 1, 'sup' => 1, 'textarea' => 1, 'time' => 1, 'tt' => 1, 'u' => 1, 'var' => 1, 'video' => 1, 'wbr' => 1];
- $eN = ['a' => 1, 'address' => 1, 'article' => 1, 'aside' => 1, 'big' => 1, 'button' => 1, 'details' => 1, 'embed' => 1, 'fieldset' => 1, 'font' => 1, 'footer' => 1, 'form' => 1, 'header' => 1, 'iframe' => 1, 'img' => 1, 'input' => 1, 'keygen' => 1, 'label' => 1, 'meter' => 1, 'nav' => 1, 'object' => 1, 'progress' => 1, 'ruby' => 1, 'script' => 1, 'select' => 1, 'small' => 1, 'sub' => 1, 'sup' => 1, 'textarea' => 1, 'time' => 1]; // Exclude from specific ele; $cN values
- $eO = ['area' => 1, 'caption' => 1, 'col' => 1, 'colgroup' => 1, 'command' => 1, 'dd' => 1, 'dt' => 1, 'hgroup' => 1, 'keygen' => 1, 'legend' => 1, 'li' => 1, 'optgroup' => 1, 'option' => 1, 'param' => 1, 'rb' => 1, 'rbc' => 1, 'rp' => 1, 'rt' => 1, 'rtc' => 1, 'script' => 1, 'source' => 1, 'tbody' => 1, 'td' => 1, 'tfoot' => 1, 'thead' => 1, 'th' => 1, 'tr' => 1, 'track' => 1]; // Missing in $eB & $eI
- $eF = $eB + $eI;
-
- // $in sets allowed child
- $in = ((isset($eF[$in]) && '#pcdata' !== $in) || isset($eO[$in])) ? $in : 'div';
- if (isset($cE[$in])) {
- return !$do ? '' : str_replace(['<', '>'], ['<', '>'], $t);
- }
- if (isset($cS[$in])) {
- $inOk = $cS[$in];
- } elseif (isset($cI[$in])) {
- $inOk = $eI;
- $cI['del'] = 1;
- $cI['ins'] = 1;
- } elseif (isset($cF[$in])) {
- $inOk = $eF;
- unset($cI['del'], $cI['ins']);
- } elseif (isset($cB[$in])) {
- $inOk = $eB;
- unset($cI['del'], $cI['ins']);
- }
- if (isset($cO[$in])) {
- $inOk = $inOk + $cO[$in];
- }
- if (isset($cN[$in])) {
- $inOk = array_diff_assoc($inOk, $cN[$in]);
- }
- if (strpos($in, '-')) {
- $inOk = ['*' => 1, '#pcdata' => 1];
- } // custom ele
+ $x = !empty($C['deny_attribute']) ? strtolower(preg_replace('"\s+-"', '/', trim($C['deny_attribute']))) : '';
+ $x = str_replace(array(' ', "\t", "\r", "\n"), '', $x);
+ $x =
+ array_flip(
+ (isset($x[0]) && $x[0] == '*')
+ ? preg_replace(
+ '`^[^*]`',
+ '-'. '\\0',
+ explode(
+ '/',
+ (!empty($C['safe']) ? preg_replace('`/on[^/]+`', '', $x) : $x)))
+ : array_filter(explode(',', $x. (!empty($C['safe']) ? ',on*' : ''))));
+ $C['deny_attribute'] = $x;
- $t = explode('<', $t);
- $ok = $q = []; // $q seq list of open non-empty ele
- ob_start();
+ // -- Configure URL handling.
- for ($i = -1, $ci = count($t); ++$i < $ci;) {
- // allowed $ok in parent $p
- if ($ql = count($q)) {
- $p = array_pop($q);
- $q[] = $p;
- if (isset($cS[$p])) {
- $ok = $cS[$p];
- } elseif (isset($cI[$p])) {
- $ok = $eI;
- $cI['del'] = 1;
- $cI['ins'] = 1;
- } elseif (isset($cF[$p])) {
- $ok = $eF;
- unset($cI['del'], $cI['ins']);
- } elseif (isset($cB[$p])) {
- $ok = $eB;
- unset($cI['del'], $cI['ins']);
- }
- if (isset($cO[$p])) {
- $ok = $ok + $cO[$p];
- }
- if (isset($cN[$p])) {
- $ok = array_diff_assoc($ok, $cN[$p]);
- }
- if (strpos($p, '-')) {
- $ok = ['*' => 1, '#pcdata' => 1];
- }
- } else {
- $ok = $inOk;
- unset($cI['del'], $cI['ins']);
- }
- // bad tags, & ele content
- if (isset($e) && (1 === $do || (isset($ok['#pcdata']) && (3 === $do || 5 === $do)))) {
- echo '<', $s, $e, $a, '>';
- }
- if (isset($x[0])) {
- if (strlen(trim($x)) && (($ql && isset($cB[$p])) || (isset($cB[$in]) && !$ql))) {
- echo '
', $x, '
';
- } elseif ($do < 3 || isset($ok['#pcdata'])) {
- echo $x;
- } elseif (strpos($x, "\x02\x04")) {
- foreach (preg_split('`(\x01\x02[^\x01\x02]+\x02\x01)`', $x, -1, \PREG_SPLIT_DELIM_CAPTURE | \PREG_SPLIT_NO_EMPTY) as $v) {
- echo "\x01\x02" === substr($v, 0, 2) ? $v : ($do > 4 ? preg_replace('`\S`', '', $v) : '');
- }
- } elseif ($do > 4) {
- echo preg_replace('`\S`', '', $x);
- }
- }
- // get markup
- if (!preg_match('`^(/?)([a-z][^ >]*)([^>]*)>(.*)`sm', $t[$i], $r)) {
- $x = $t[$i];
- continue;
+ $x = (isset($C['schemes'][2]) && strpos($C['schemes'], ':')
+ ? strtolower($C['schemes'])
+ : 'href: aim, feed, file, ftp, gopher, http, https, irc, mailto, news, nntp, sftp, ssh, tel, telnet')
+ . (empty($C['safe'])
+ ? ', app, javascript; *: data, javascript, '
+ : '; *:')
+ . 'file, http, https';
+ $C['schemes'] = array();
+ foreach (explode(';', trim(str_replace(array(' ', "\t", "\r", "\n"), '', $x), ';')) as $v) {
+ if(strpos($v, ':')) {
+ list($x, $y) = explode(':', $v, 2);
+ $C['schemes'][$x] = array_flip(explode(',', $y));
+ }
+ }
+ if (!isset($C['schemes']['*'])) {
+ $C['schemes']['*'] = array('file'=>1, 'http'=>1, 'https'=>1);
+ if (empty($C['safe'])) {
+ $C['schemes']['*'] += array('data'=>1, 'javascript'=>1);
+ }
+ }
+ if (!empty($C['safe']) && empty($C['schemes']['style'])) {
+ $C['schemes']['style'] = array('!'=>1);
+ }
+ $C['abs_url'] = isset($C['abs_url']) ? $C['abs_url'] : 0;
+ if (!isset($C['base_url']) || !preg_match('`^[a-zA-Z\d.+\-]+://[^/]+/(.+?/)?$`', $C['base_url'])) {
+ $C['base_url'] = $C['abs_url'] = 0;
+ }
+
+ // -- Configure other parameters.
+
+ $C['and_mark'] = empty($C['and_mark']) ? 0 : 1;
+ $C['anti_link_spam'] =
+ (isset($C['anti_link_spam'])
+ && is_array($C['anti_link_spam'])
+ && count($C['anti_link_spam']) == 2
+ && (empty($C['anti_link_spam'][0])
+ || hl_regex($C['anti_link_spam'][0]))
+ && (empty($C['anti_link_spam'][1])
+ || hl_regex($C['anti_link_spam'][1])))
+ ? $C['anti_link_spam']
+ : 0;
+ $C['anti_mail_spam'] = isset($C['anti_mail_spam']) ? $C['anti_mail_spam'] : 0;
+ $C['any_custom_element'] = (!isset($C['any_custom_element']) || !empty($C['any_custom_element'])) ? 1 : 0;
+ $C['balance'] = isset($C['balance']) ? (bool)$C['balance'] : 1;
+ $C['cdata'] = isset($C['cdata']) ? $C['cdata'] : (empty($C['safe']) ? 3 : 0);
+ $C['clean_ms_char'] = empty($C['clean_ms_char']) ? 0 : $C['clean_ms_char'];
+ $C['comment'] = isset($C['comment']) ? $C['comment'] : (empty($C['safe']) ? 3 : 0);
+ $C['css_expression'] = empty($C['css_expression']) ? 0 : 1;
+ $C['direct_list_nest'] = empty($C['direct_list_nest']) ? 0 : 1;
+ $C['hexdec_entity'] = isset($C['hexdec_entity']) ? $C['hexdec_entity'] : 1;
+ $C['hook'] = (!empty($C['hook']) && is_callable($C['hook'])) ? $C['hook'] : 0;
+ $C['hook_tag'] = (!empty($C['hook_tag']) && is_callable($C['hook_tag'])) ? $C['hook_tag'] : 0;
+ $C['keep_bad'] = isset($C['keep_bad']) ? $C['keep_bad'] : 6;
+ $C['lc_std_val'] = isset($C['lc_std_val']) ? (bool)$C['lc_std_val'] : 1;
+ $C['make_tag_strict'] = isset($C['make_tag_strict']) ? $C['make_tag_strict'] : 1;
+ $C['named_entity'] = isset($C['named_entity']) ? (bool)$C['named_entity'] : 1;
+ $C['no_deprecated_attr'] = isset($C['no_deprecated_attr']) ? $C['no_deprecated_attr'] : 1;
+ $C['parent'] = isset($C['parent'][0]) ? strtolower($C['parent']) : 'body';
+ $C['show_setting'] = !empty($C['show_setting']) ? $C['show_setting'] : 0;
+ $C['style_pass'] = empty($C['style_pass']) ? 0 : 1;
+ $C['tidy'] = empty($C['tidy']) ? 0 : $C['tidy'];
+ $C['unique_ids'] = isset($C['unique_ids']) && (!preg_match('`\W`', $C['unique_ids'])) ? $C['unique_ids'] : 1;
+ $C['xml:lang'] = isset($C['xml:lang']) ? $C['xml:lang'] : 0;
+
+ if (isset($GLOBALS['C'])) {
+ $oldC = $GLOBALS['C'];
+ }
+ $GLOBALS['C'] = $C;
+
+ // Set $S array ($spec).
+
+ $S = is_array($S) ? $S : hl_spec($S);
+ if (isset($GLOBALS['S'])) {
+ $oldS = $GLOBALS['S'];
+ }
+ $GLOBALS['S'] = $S;
+
+ // Handle characters.
+
+ $t = preg_replace('`[\x00-\x08\x0b-\x0c\x0e-\x1f]`', '', $t); // Remove illegal
+ if ($C['clean_ms_char']) { // Convert MS Windows CP-1252
+ $x = array("\x7f"=>'', "\x80"=>'€', "\x81"=>'', "\x83"=>'ƒ', "\x85"=>'…', "\x86"=>'†', "\x87"=>'‡', "\x88"=>'ˆ', "\x89"=>'‰', "\x8a"=>'Š', "\x8b"=>'‹', "\x8c"=>'Œ', "\x8d"=>'', "\x8e"=>'Ž', "\x8f"=>'', "\x90"=>'', "\x95"=>'•', "\x96"=>'–', "\x97"=>'—', "\x98"=>'˜', "\x99"=>'™', "\x9a"=>'š', "\x9b"=>'›', "\x9c"=>'œ', "\x9d"=>'', "\x9e"=>'ž', "\x9f"=>'Ÿ');
+ $x = $x
+ + ($C['clean_ms_char'] == 1
+ ? array("\x82"=>'‚', "\x84"=>'„', "\x91"=>'‘', "\x92"=>'’', "\x93"=>'“', "\x94"=>'”')
+ : array("\x82"=>'\'', "\x84"=>'"', "\x91"=>'\'', "\x92"=>'\'', "\x93"=>'"', "\x94"=>'"'));
+ $t = strtr($t, $x);
+ }
+
+ // Handle CDATA, comments, and entities.
+
+ if ($C['cdata'] || $C['comment']) {
+ $t = preg_replace_callback('``sm', 'hl_commentCdata', $t);
+ }
+ $t =
+ preg_replace_callback(
+ '`&([a-zA-Z][a-zA-Z0-9]{1,30}|#(?:[0-9]{1,8}|[Xx][0-9A-Fa-f]{1,7}));`',
+ 'hl_entity',
+ str_replace('&', '&', $t));
+ if ($C['unique_ids'] && !isset($GLOBALS['hl_Ids'])) {
+ $GLOBALS['hl_Ids'] = array();
+ }
+
+ if ($C['hook']) {
+ $t = call_user_func($C['hook'], $t, $C, $S);
+ }
+
+ // Handle remaining text.
+
+ $t = preg_replace_callback('`<(?:(?:\s|$)|(?:[^>]*(?:>|$)))|>`m', 'hl_tag', $t);
+ $t = $C['balance'] ? hl_balance($t, $C['keep_bad'], $C['parent']) : $t;
+ $t = (($C['cdata'] || $C['comment']) && strpos($t, "\x01") !== false)
+ ? str_replace(array("\x01", "\x02", "\x03", "\x04", "\x05"), array('', '', '&', '<', '>'), $t)
+ : $t;
+ $t = $C['tidy'] ? hl_tidy($t, $C['tidy'], $C['parent']) : $t;
+
+ // Cleanup.
+
+ if ($C['show_setting'] && preg_match('`^[a-z][a-z0-9_]*$`i', $C['show_setting'])) {
+ $GLOBALS[$C['show_setting']] = array('config'=>$C, 'spec'=>$S, 'time'=>microtime(true), 'version'=>hl_version());
+ }
+ unset($C, $eleAr);
+ if (isset($oldC)) {
+ $GLOBALS['C'] = $oldC;
+ }
+ if (isset($oldS)) {
+ $GLOBALS['S'] = $oldS;
+ }
+ return $t;
+}
+
+/**
+ * Validate attribute value and possibly reset to a default.
+ *
+ * @param string $attr Attribute name.
+ * @param string $value Attribute value.
+ * @param array $ruleAr Array of rules derived from $spec.
+ * @param string $ele Element.
+ * @return mixed 0 if invalid $value,
+ * or string with validated or default value.
+ */
+function hl_attributeValue($attr, $value, $ruleAr, $ele)
+{
+ static $spacedValsAttrAr = array('accesskey', 'class', 'itemtype', 'rel'); // Some attributes have multiple values
+ $valSep =
+ (in_array($attr, $spacedValsAttrAr) || ($attr == 'archive' && $ele == 'object'))
+ ? ' '
+ : (($attr == 'srcset' || ($attr == 'archive' && $ele == 'applet'))
+ ? ','
+ : '');
+ $out = array();
+ $valAr = !empty($valSep) ? explode($valSep, $value) : array($value);
+ foreach ($valAr as $v) {
+ $ok = 1;
+ $v = trim($v);
+ $lengthVal = strlen($v);
+ foreach ($ruleAr as $ruleType=>$ruleVal) {
+ if (!$lengthVal) {
+ continue;
+ }
+ switch ($ruleType) {
+ case 'maxlen': if ($lengthVal > $ruleVal) {
+ $ok = 0;
}
- $s = null;
- $e = null;
- $a = null;
- $x = null;
- list($all, $s, $e, $a, $x) = $r;
- // close tag
- if ($s) {
- if (isset($cE[$e]) || !in_array($e, $q, true)) {
- continue;
- } // Empty/unopen
- if ($p === $e) {
- array_pop($q);
- echo '', $e, '>';
- unset($e);
- continue;
- } // Last open
- $add = ''; // Nesting - close open tags that need to be
- for ($j = -1, $cj = count($q); ++$j < $cj;) {
- if (($d = array_pop($q)) === $e) {
- break;
- }
- $add .= "{$d}>";
- }
- echo $add, '', $e, '>';
- unset($e);
- continue;
+ break; case 'minlen': if ($lengthVal < $ruleVal) {
+ $ok = 0;
}
- // open tag
- // $cB ele needs $eB ele as child
- if (isset($cB[$e]) && strlen(trim($x))) {
- $t[$i] = "{$e}{$a}>";
- array_splice($t, $i + 1, 0, 'div>' . $x);
- unset($e, $x);
- ++$ci;
- --$i;
- continue;
+ break; case 'maxval': if ((float)($v) > $ruleVal) {
+ $ok = 0;
}
- if (strpos($e, '-')) {
- $ok[$e] = 1;
+ break; case 'minval': if ((float)($v) < $ruleVal) {
+ $ok = 0;
}
- if ((($ql && isset($cB[$p])) || (isset($cB[$in]) && !$ql)) && !isset($eB[$e]) && !isset($ok[$e]) && !isset($ok['*'])) {
- array_splice($t, $i, 0, 'div>');
- unset($e, $x);
- ++$ci;
- --$i;
- continue;
+ break; case 'match': if (!preg_match($ruleVal, $v)) {
+ $ok = 0;
}
- // if no open ele, $in = parent; mostly immediate parent-child relation should hold
- if (!$ql || !isset($eN[$e]) || !array_intersect($q, $cN2)) {
- if (!isset($ok[$e]) && !isset($ok['*'])) {
- if ($ql && isset($cT[$p])) {
- echo '', array_pop($q), '>';
- unset($e, $x);
- --$i;
- }
- continue;
- }
- if (!isset($cE[$e])) {
- $q[] = $e;
- }
- echo '<', $e, $a, '>';
- unset($e);
- continue;
+ break; case 'nomatch': if (preg_match($ruleVal, $v)) {
+ $ok = 0;
}
- // specific parent-child
- if (isset($cS[$p][$e])) {
- if (!isset($cE[$e])) {
- $q[] = $e;
- }
- echo '<', $e, $a, '>';
- unset($e);
- continue;
+ break; case 'oneof': if(!in_array($v, explode('|', $ruleVal))) {
+ $ok = 0;
}
- // nesting
- $add = '';
- $q2 = [];
- for ($k = -1, $kc = count($q); ++$k < $kc;) {
- $d = $q[$k];
- $ok2 = [];
- if (isset($cS[$d])) {
- $q2[] = $d;
- continue;
- }
- $ok2 = isset($cI[$d]) ? $eI : $eF;
- if (isset($cO[$d])) {
- $ok2 = $ok2 + $cO[$d];
- }
- if (isset($cN[$d])) {
- $ok2 = array_diff_assoc($ok2, $cN[$d]);
- }
- if (!isset($ok2[$e]) && !strpos($e, '-')) {
- if (!$k && !isset($inOk[$e]) && !isset($inOk['*'])) {
- continue 2;
- }
- $add = "{$d}>";
- for (; ++$k < $kc;) {
- $add = "{$q[$k]}>{$add}";
- }
- break;
- }
- $q2[] = $d;
+ break; case 'noneof': if(in_array($v, explode('|', $ruleVal))) {
+ $ok = 0;
}
- $q = $q2;
- if (!isset($cE[$e])) {
- $q[] = $e;
+ break; default:
+ break;
+ }
+ if (!$ok) {
+ break;
+ }
+ }
+ if ($ok) {
+ $out[] = $v;
+ }
+ }
+ $out = implode($valSep == ',' ? ', ' : ' ', $out);
+ return (isset($out[0]) ? $out : (isset($ruleAr['default']) ? $ruleAr['default'] : 0));
+}
+
+/*
+ * Enforce parent-child validity of elements and balance tags.
+ *
+ * @param string $t HTM. Previously partly sanitized/filtered. CDATA
+ * and comment sections have > characters hidden.
+ * @param int $act $config's keep_bad parameter.
+ * @param string $parentEle $t's parent element option.
+ * @return string $t with valid nesting and balanced tags.
+ */
+function hl_balance($t, $act=1, $parentEle='div')
+{
+ // Group elements in different ways.
+
+ $closingTagOmitableEleAr = array('caption'=>1, 'colgroup'=>1, 'dd'=>1, 'dt'=>1, 'li'=>1, 'optgroup'=>1, 'option'=>1, 'p'=>1, 'rp'=>1, 'rt'=>1, 'tbody'=>1, 'td'=>1, 'tfoot'=>1, 'th'=>1, 'thead'=>1, 'tr'=>1);
+
+ // -- Block, inline, etc.
+
+ $blockEleAr = array('a'=>1, 'address'=>1, 'article'=>1, 'aside'=>1, 'blockquote'=>1, 'center'=>1, 'del'=>1, 'details'=>1, 'dialog'=>1, 'dir'=>1, 'dl'=>1, 'div'=>1, 'fieldset'=>1, 'figure'=>1, 'footer'=>1, 'form'=>1, 'ins'=>1, 'h1'=>1, 'h2'=>1, 'h3'=>1, 'h4'=>1, 'h5'=>1, 'h6'=>1, 'header'=>1, 'hr'=>1, 'isindex'=>1, 'main'=>1, 'menu'=>1, 'nav'=>1, 'noscript'=>1, 'ol'=>1, 'p'=>1, 'pre'=>1, 'section'=>1, 'slot'=>1, 'style'=>1, 'table'=>1, 'template'=>1, 'ul'=>1);
+ $inlineEleAr = array('#pcdata'=>1, 'a'=>1, 'abbr'=>1, 'acronym'=>1, 'applet'=>1, 'audio'=>1, 'b'=>1, 'bdi'=>1, 'bdo'=>1, 'big'=>1, 'br'=>1, 'button'=>1, 'canvas'=>1, 'cite'=>1, 'code'=>1, 'command'=>1, 'data'=>1, 'datalist'=>1, 'del'=>1, 'dfn'=>1, 'em'=>1, 'embed'=>1, 'figcaption'=>1, 'font'=>1, 'i'=>1, 'iframe'=>1, 'img'=>1, 'input'=>1, 'ins'=>1, 'kbd'=>1, 'label'=>1, 'link'=>1, 'map'=>1, 'mark'=>1, 'meta'=>1, 'meter'=>1, 'object'=>1, 'output'=>1, 'picture'=>1, 'progress'=>1, 'q'=>1, 'ruby'=>1, 's'=>1, 'samp'=>1, 'select'=>1, 'script'=>1, 'small'=>1, 'span'=>1, 'strike'=>1, 'strong'=>1, 'sub'=>1, 'summary'=>1, 'sup'=>1, 'textarea'=>1, 'time'=>1, 'tt'=>1, 'u'=>1, 'var'=>1, 'video'=>1, 'wbr'=>1);
+ $otherEleAr = array('area'=>1, 'caption'=>1, 'col'=>1, 'colgroup'=>1, 'command'=>1, 'dd'=>1, 'dt'=>1, 'hgroup'=>1, 'keygen'=>1, 'legend'=>1, 'li'=>1, 'optgroup'=>1, 'option'=>1, 'param'=>1, 'rb'=>1, 'rbc'=>1, 'rp'=>1, 'rt'=>1, 'rtc'=>1, 'script'=>1, 'source'=>1, 'tbody'=>1, 'td'=>1, 'tfoot'=>1, 'thead'=>1, 'th'=>1, 'tr'=>1, 'track'=>1);
+ $flowEleAr = $blockEleAr + $inlineEleAr;
+
+ // -- Type of child allowed.
+
+ $blockKidEleAr = array('blockquote'=>1, 'form'=>1, 'map'=>1, 'noscript'=>1);
+ $flowKidEleAr = array('a'=>1, 'article'=>1, 'aside'=>1, 'audio'=>1, 'button'=>1, 'canvas'=>1, 'del'=>1, 'details'=>1, 'dialog'=>1, 'div'=>1, 'dd'=>1, 'fieldset'=>1, 'figure'=>1, 'footer'=>1, 'header'=>1, 'iframe'=>1, 'ins'=>1, 'li'=>1, 'main'=>1, 'menu'=>1, 'nav'=>1, 'noscript'=>1, 'object'=>1, 'section'=>1, 'slot'=>1, 'style'=>1, 'td'=>1, 'template'=>1, 'th'=>1, 'video'=>1); // Later context-wise dynamic move of ins & del to $inlineKidEleAr
+ $inlineKidEleAr = array('abbr'=>1, 'acronym'=>1, 'address'=>1, 'b'=>1, 'bdi'=>1, 'bdo'=>1, 'big'=>1, 'caption'=>1, 'cite'=>1, 'code'=>1, 'data'=>1, 'datalist'=>1, 'dfn'=>1, 'dt'=>1, 'em'=>1, 'figcaption'=>1, 'font'=>1, 'h1'=>1, 'h2'=>1, 'h3'=>1, 'h4'=>1, 'h5'=>1, 'h6'=>1, 'hgroup'=>1, 'i'=>1, 'kbd'=>1, 'label'=>1, 'legend'=>1, 'mark'=>1, 'meter'=>1, 'output'=>1, 'p'=>1, 'picture'=>1, 'pre'=>1, 'progress'=>1, 'q'=>1, 'rb'=>1, 'rt'=>1, 's'=>1, 'samp'=>1, 'small'=>1, 'span'=>1, 'strike'=>1, 'strong'=>1, 'sub'=>1, 'summary'=>1, 'sup'=>1, 'time'=>1, 'tt'=>1, 'u'=>1, 'var'=>1);
+ $noKidEleAr = array('area'=>1, 'br'=>1, 'col'=>1, 'command'=>1, 'embed'=>1, 'hr'=>1, 'img'=>1, 'input'=>1, 'isindex'=>1, 'keygen'=>1, 'link'=>1, 'meta'=>1, 'param'=>1, 'source'=>1, 'track'=>1, 'wbr'=>1);
+
+ // Special parent-child relations.
+
+ $invalidMomKidAr = array('a'=>array('a'=>1, 'address'=>1, 'button'=>1, 'details'=>1, 'embed'=>1, 'iframe'=>1, 'keygen'=>1, 'label'=>1, 'select'=>1, 'textarea'=>1), 'address'=>array('address'=>1, 'article'=>1, 'aside'=>1, 'footer'=>1, 'h1'=>1, 'h2'=>1, 'h3'=>1, 'h4'=>1, 'h5'=>1, 'h6'=>1, 'header'=>1, 'hgroup'=>1, 'keygen'=>1, 'nav'=>1, 'section'=>1), 'audio'=>array('audio'=>1, 'video'=>1), 'button'=>array('a'=>1, 'address'=>1, 'button'=>1, 'details'=>1, 'embed'=>1, 'iframe'=>1, 'keygen'=>1, 'label'=>1, 'select'=>1, 'textarea'=>1), 'dfn'=>array('dfn'=>1), 'fieldset'=>array('fieldset'=>1), 'footer'=>array('footer'=>1, 'header'=>1), 'form'=>array('form'=>1), 'header'=>array('footer'=>1, 'header'=>1), 'label'=>array('label'=>1), 'main'=>array('main'=>1), 'meter'=>array('meter'=>1), 'noscript'=>array('script'=>1), 'progress'=>array('progress'=>1), 'rb'=>array('ruby'=>1), 'rt'=>array('ruby'=>1), 'time'=>array('time'=>1), 'video'=>array('audio'=>1, 'video'=>1));
+ $invalidKidEleAr = array('a'=>1, 'address'=>1, 'article'=>1, 'aside'=>1, 'audio'=>1, 'button'=>1, 'details'=>1, 'dfn'=>1, 'embed'=>1, 'fieldset'=>1, 'footer'=>1, 'form'=>1, 'h1'=>1, 'h2'=>1, 'h3'=>1, 'h4'=>1, 'h5'=>1, 'h6'=>1, 'header'=>1, 'hgroup'=>1, 'iframe'=>1, 'keygen'=>1, 'label'=>1, 'main'=>1, 'meter'=>1, 'nav'=>1, 'progress'=>1, 'ruby'=>1, 'script'=>1, 'section'=>1, 'select'=>1, 'textarea'=>1, 'time'=>1, 'video'=>1); // $invalidMomKidAr values
+ $invalidMomEleAr = array_keys($invalidMomKidAr);
+ $validMomKidAr = array('colgroup'=>array('col'=>1, 'template'=>1), 'datalist'=>array('option'=>1, 'script'=>1), 'details'=>array('summary'=>1), 'dir'=>array('li'=>1), 'dl'=>array('dd'=>1, 'div'=>1, 'dt'=>1), 'hgroup'=>array('h1'=>1, 'h2'=>1, 'h3'=>1, 'h4'=>1, 'h5'=>1, 'h6'=>1), 'menu'=>array('li'=>1, 'script'=>1, 'template'=>1), 'ol'=>array('li'=>1, 'script'=>1, 'template'=>1), 'optgroup'=>array('option'=>1, 'script'=>1, 'template'=>1), 'option'=>array('#pcdata'=>1), 'picture'=>array('img'=>1, 'script'=>1, 'source'=>1, 'template'=>1), 'rbc'=>array('rb'=>1), 'rp'=>array('#pcdata'=>1), 'rtc'=>array('rp'=>1, 'rt'=>1), 'ruby'=>array('rb'=>1, 'rbc'=>1, 'rp'=>1, 'rt'=>1, 'rtc'=>1, '#pcdata'=>1), 'select'=>array('optgroup'=>1, 'option'=>1), 'script'=>array('#pcdata'=>1), 'table'=>array('caption'=>1, 'col'=>1, 'colgroup'=>1, 'script'=>1, 'tbody'=>1, 'tfoot'=>1, 'thead'=>1, 'tr'=>1, 'template'=>1), 'tbody'=>array('script'=>1, 'template'=>1, 'tr'=>1), 'tfoot'=>array('tr'=>1), 'textarea'=>array('#pcdata'=>1), 'thead'=>array('script'=>1, 'template'=>1, 'tr'=>1), 'tr'=>array('script'=>1, 'td'=>1, 'template'=>1, 'th'=>1), 'ul'=>array('li'=>1, 'script'=>1, 'template'=>1)); // Immediate parent-child relation
+ if ($GLOBALS['C']['direct_list_nest']) {
+ $validMomKidAr['ol'] = $validMomKidAr['ul'] = $validMomKidAr['menu'] += array('menu'=>1, 'ol'=>1, 'ul'=>1);
+ }
+ $otherValidMomKidAr = array('address'=>array('p'=>1), 'applet'=>array('param'=>1), 'audio'=>array('source'=>1, 'track'=>1), 'blockquote'=>array('script'=>1), 'details'=>array('summary'=>1), 'fieldset'=>array('legend'=>1, '#pcdata'=>1), 'figure'=>array('figcaption'=>1),'form'=>array('script'=>1), 'map'=>array('area'=>1), 'legend'=>array('h1'=>1, 'h2'=>1, 'h3'=>1, 'h4'=>1, 'h5'=>1, 'h6'=>1), 'object'=>array('param'=>1, 'embed'=>1), 'summary'=>array('h1'=>1, 'h2'=>1, 'h3'=>1, 'h4'=>1, 'h5'=>1, 'h6'=>1, 'hgroup'=>1), 'video'=>array('source'=>1, 'track'=>1));
+
+ // Valid elements for top-level parent.
+
+ $mom = ((isset($flowEleAr[$parentEle]) && $parentEle != '#pcdata')
+ || isset($otherEleAr[$parentEle]))
+ ? $parentEle
+ : 'div';
+ if (isset($noKidEleAr[$mom])) {
+ return (!$act ? '' : str_replace(array('<', '>'), array('<', '>'), $t));
+ }
+ if (isset($validMomKidAr[$mom])) {
+ $validInMomEleAr = $validMomKidAr[$mom];
+ } elseif (isset($inlineKidEleAr[$mom])) {
+ $validInMomEleAr = $inlineEleAr;
+ $inlineKidEleAr['del'] = 1;
+ $inlineKidEleAr['ins'] = 1;
+ } elseif (isset($flowKidEleAr[$mom])) {
+ $validInMomEleAr = $flowEleAr;
+ unset($inlineKidEleAr['del'], $inlineKidEleAr['ins']);
+ } elseif (isset($blockKidEleAr[$mom])) {
+ $validInMomEleAr = $blockEleAr;
+ unset($inlineKidEleAr['del'], $inlineKidEleAr['ins']);
+ }
+ if (isset($otherValidMomKidAr[$mom])) {
+ $validInMomEleAr = $validInMomEleAr + $otherValidMomKidAr[$mom];
+ }
+ if (isset($invalidMomKidAr[$mom])) {
+ $validInMomEleAr = array_diff_assoc($validInMomEleAr, $invalidMomKidAr[$mom]);
+ }
+ if (strpos($mom, '-')) { // Custom element
+ $validInMomEleAr = array('*' => 1, '#pcdata' =>1);
+ }
+
+ // Loop over elements.
+
+ $t = explode('<', $t);
+ $validKidsOfMom = $openEleQueue = array(); // Queue of opened elements
+ ob_start();
+ for ($i=-1, $eleCount=count($t); ++$i<$eleCount;) {
+
+ // Check element validity as child. Same code as section: Finishing (below).
+
+ if ($queueLength = count($openEleQueue)) {
+ $eleNow = array_pop($openEleQueue);
+ $openEleQueue[] = $eleNow;
+ if (isset($validMomKidAr[$eleNow])) {
+ $validKidsOfMom = $validMomKidAr[$eleNow];
+ } elseif (isset($inlineKidEleAr[$eleNow])) {
+ $validKidsOfMom = $inlineEleAr;
+ $inlineKidEleAr['del'] = 1;
+ $inlineKidEleAr['ins'] = 1;
+ } elseif (isset($flowKidEleAr[$eleNow])) {
+ $validKidsOfMom = $flowEleAr;
+ unset($inlineKidEleAr['del'], $inlineKidEleAr['ins']);
+ } elseif (isset($blockKidEleAr[$eleNow])) {
+ $validKidsOfMom = $blockEleAr;
+ unset($inlineKidEleAr['del'], $inlineKidEleAr['ins']);
+ }
+ if (isset($otherValidMomKidAr[$eleNow])) {
+ $validKidsOfMom = $validKidsOfMom + $otherValidMomKidAr[$eleNow];
+ }
+ if (isset($invalidMomKidAr[$eleNow])) {
+ $validKidsOfMom = array_diff_assoc($validKidsOfMom, $invalidMomKidAr[$eleNow]);
+ }
+ if (strpos($eleNow, '-')) { // Custom element
+ $validKidsOfMom = array('*'=>1, '#pcdata'=>1);
+ }
+ } else {
+ $validKidsOfMom = $validInMomEleAr;
+ unset($inlineKidEleAr['del'], $inlineKidEleAr['ins']);
+ }
+ if (
+ isset($ele)
+ && ($act == 1
+ || (isset($validKidsOfMom['#pcdata'])
+ && ($act == 3
+ || $act == 5)))
+ ) {
+ echo '<', $slash, $ele, $attrs, '>';
+ }
+ if (isset($content[0])) {
+ if (strlen(trim($content))
+ && (($queueLength && isset($blockKidEleAr[$eleNow]))
+ || (isset($blockKidEleAr[$mom]) && !$queueLength))
+ ) {
+ echo '', $content, '
';
+ } elseif ($act < 3 || isset($validKidsOfMom['#pcdata'])) {
+ echo $content;
+ } elseif (strpos($content, "\x02\x04")) {
+ foreach (
+ preg_split(
+ '`(\x01\x02[^\x01\x02]+\x02\x01)`', $content, -1, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY) as $m) {
+ echo(
+ substr($m, 0, 2) == "\x01\x02"
+ ? $m
+ : ($act > 4
+ ? preg_replace('`\S`', '', $m)
+ : ''));
}
- echo $add, '<', $e, $a, '>';
- unset($e);
+ } elseif ($act > 4) {
+ echo preg_replace('`\S`', '', $content);
+ }
+ } // End: Check element validity as child
+
+ // Get parts of element.
+
+ if (!preg_match('`^(/?)([a-z][^ >]*)([^>]*)>(.*)`sm', $t[$i], $m)) {
+ $content = $t[$i];
+ continue;
+ }
+ $slash = null; // Closing tag's slash
+ $ele = null; // Name
+ $attrs = null; // Attribute string
+ $content = null; // Content
+ list($all, $slash, $ele, $attrs, $content) = $m;
+
+ // Handle closing tag.
+
+ if ($slash) {
+ if (isset($noKidEleAr[$ele]) || !in_array($ele, $openEleQueue)) { // Element empty type or unopened
+ continue;
+ }
+ if ($eleNow == $ele) { // Last open tag
+ array_pop($openEleQueue);
+ echo '', $ele, '>';
+ unset($ele);
continue;
+ }
+ $closedTags = ''; // Nesting, so close open elements as necessary
+ for ($j=-1, $cj=count($openEleQueue); ++$j<$cj;) {
+ if (($closableEle = array_pop($openEleQueue)) == $ele) {
+ break;
+ } else {
+ $closedTags .= "{$closableEle}>";
+ }
+ }
+ echo $closedTags, '', $ele, '>';
+ unset($ele);
+ continue;
}
- // end
- if ($ql = count($q)) {
- $p = array_pop($q);
- $q[] = $p;
- if (isset($cS[$p])) {
- $ok = $cS[$p];
- } elseif (isset($cI[$p])) {
- $ok = $eI;
- $cI['del'] = 1;
- $cI['ins'] = 1;
- } elseif (isset($cF[$p])) {
- $ok = $eF;
- unset($cI['del'], $cI['ins']);
- } elseif (isset($cB[$p])) {
- $ok = $eB;
- unset($cI['del'], $cI['ins']);
- }
- if (isset($cO[$p])) {
- $ok = $ok + $cO[$p];
- }
- if (isset($cN[$p])) {
- $ok = array_diff_assoc($ok, $cN[$p]);
+ // Handle opening tag.
+
+ if (isset($blockKidEleAr[$ele]) && strlen(trim($content))) { // $blockKidEleAr element needs $blockEleAr element
+ $t[$i] = "{$ele}{$attrs}>";
+ array_splice($t, $i+1, 0, 'div>'. $content);
+ unset($ele, $content);
+ ++$eleCount;
+ --$i;
+ continue;
+ }
+ if (strpos($ele, '-')) { // Custom element
+ $validKidsOfMom[$ele] = 1;
+ }
+ if ((($queueLength && isset($blockKidEleAr[$eleNow]))
+ || (isset($blockKidEleAr[$mom]) && !$queueLength))
+ && !isset($blockEleAr[$ele])
+ && !isset($validKidsOfMom[$ele])
+ && !isset($validKidsOfMom['*'])
+ ) {
+ array_splice($t, $i, 0, 'div>');
+ unset($ele, $content);
+ ++$eleCount;
+ --$i;
+ continue;
+ }
+ if (
+ !$queueLength
+ || !isset($invalidKidEleAr[$ele])
+ || !array_intersect($openEleQueue, $invalidMomEleAr)
+ ) { // If no open element; mostly immediate parent-child relation should hold
+ if (!isset($validKidsOfMom[$ele]) && !isset($validKidsOfMom['*'])) {
+ if ($queueLength && isset($closingTagOmitableEleAr[$eleNow])) {
+ echo '', array_pop($openEleQueue), '>';
+ unset($ele, $content);
+ --$i;
}
- if (strpos($p, '-')) {
- $ok = ['*' => 1, '#pcdata' => 1];
+ continue;
+ }
+ if (!isset($noKidEleAr[$ele])) {
+ $openEleQueue[] = $ele;
+ }
+ echo '<', $ele, $attrs, '>';
+ unset($ele);
+ continue;
+ }
+ if (isset($validMomKidAr[$eleNow][$ele])) { // Specific parent-child relation
+ if (!isset($noKidEleAr[$ele])) {
+ $openEleQueue[] = $ele;
+ }
+ echo '<', $ele, $attrs, '>';
+ unset($ele);
+ continue;
+ }
+ $closedTags = ''; // Nesting, so close open elements as needed
+ $openEleQueue2 = array();
+ for ($k=-1, $kc=count($openEleQueue); ++$k<$kc;) {
+ $closableEle = $openEleQueue[$k];
+ $validKids2 = array();
+ if (isset($validMomKidAr[$closableEle])) {
+ $openEleQueue2[] = $closableEle;
+ continue;
+ }
+ $validKids2 = isset($inlineKidEleAr[$closableEle]) ? $inlineEleAr : $flowEleAr;
+ if (isset($otherValidMomKidAr[$closableEle])) {
+ $validKids2 = $validKids2 + $otherValidMomKidAr[$closableEle];
+ }
+ if (isset($invalidMomKidAr[$closableEle])) {
+ $validKids2 = array_diff_assoc($validKids2, $invalidMomKidAr[$closableEle]);
+ }
+ if (!isset($validKids2[$ele]) && !strpos($ele, '-')) {
+ if (!$k && !isset($validInMomEleAr[$ele]) && !isset($validInMomEleAr['*'])) {
+ continue 2;
}
- } else {
- $ok = $inOk;
- unset($cI['del'], $cI['ins']);
- }
- if (isset($e) && (1 === $do || (isset($ok['#pcdata']) && (3 === $do || 5 === $do)))) {
- echo '<', $s, $e, $a, '>';
- }
- if (isset($x[0])) {
- if (strlen(trim($x)) && (($ql && isset($cB[$p])) || (isset($cB[$in]) && !$ql))) {
- echo '', $x, '
';
- } elseif ($do < 3 || isset($ok['#pcdata'])) {
- echo $x;
- } elseif (strpos($x, "\x02\x04")) {
- foreach (preg_split('`(\x01\x02[^\x01\x02]+\x02\x01)`', $x, -1, \PREG_SPLIT_DELIM_CAPTURE | \PREG_SPLIT_NO_EMPTY) as $v) {
- echo "\x01\x02" === substr($v, 0, 2) ? $v : ($do > 4 ? preg_replace('`\S`', '', $v) : '');
- }
- } elseif ($do > 4) {
- echo preg_replace('`\S`', '', $x);
+ $closedTags = "{$closableEle}>";
+ for (;++$k<$kc;) {
+ $closedTags = "{$openEleQueue[$k]}>{$closedTags}";
}
- }
- while (!empty($q) && ($e = array_pop($q))) {
- echo '', $e, '>';
- }
- $o = ob_get_contents();
- ob_end_clean();
+ break;
+ } else {
+ $openEleQueue2[] = $closableEle;
+ }
+ }
+ $openEleQueue = $openEleQueue2;
+ if (!isset($noKidEleAr[$ele])) {
+ $openEleQueue[] = $ele;
+ }
+ echo $closedTags, '<', $ele, $attrs, '>';
+ unset($ele);
+ continue;
+ } // End of For: loop over elements
- return $o;
-}
+ // Finishing. Same code as: 'Check element validity as child'.
-function hl_cmtcd($t)
-{
- // comment/CDATA sec handler
- $t = $t[0];
- global $C;
- if (!($v = $C[$n = '-' === $t[3] ? 'comment' : 'cdata'])) {
- return $t;
- }
- if (1 === $v) {
- return '';
- }
- if ('comment' === $n && $v < 4) {
- if (' ' !== substr(($t = preg_replace('`--+`', '-', substr($t, 4, -3))), -1)) {
- $t .= ' ';
- }
- } else {
- $t = substr($t, 1, -1);
- }
- $t = 2 === $v ? str_replace(['&', '<', '>'], ['&', '<', '>'], $t) : $t;
+ if ($queueLength = count($openEleQueue)) {
+ $eleNow = array_pop($openEleQueue);
+ $openEleQueue[] = $eleNow;
+ if (isset($validMomKidAr[$eleNow])) {
+ $validKidsOfMom = $validMomKidAr[$eleNow];
+ } elseif (isset($inlineKidEleAr[$eleNow])) {
+ $validKidsOfMom = $inlineEleAr;
+ $inlineKidEleAr['del'] = 1;
+ $inlineKidEleAr['ins'] = 1;
+ } elseif (isset($flowKidEleAr[$eleNow])) {
+ $validKidsOfMom = $flowEleAr;
+ unset($inlineKidEleAr['del'], $inlineKidEleAr['ins']);
+ } elseif (isset($blockKidEleAr[$eleNow])) {
+ $validKidsOfMom = $blockEleAr;
+ unset($inlineKidEleAr['del'], $inlineKidEleAr['ins']);
+ }
+ if (isset($otherValidMomKidAr[$eleNow])) {
+ $validKidsOfMom = $validKidsOfMom + $otherValidMomKidAr[$eleNow];
+ }
+ if (isset($invalidMomKidAr[$eleNow])) {
+ $validKidsOfMom = array_diff_assoc($validKidsOfMom, $invalidMomKidAr[$eleNow]);
+ }
+ if (strpos($eleNow, '-')) { // Custom element
+ $validKidsOfMom = array('*'=>1, '#pcdata'=>1);
+ }
+ } else {
+ $validKidsOfMom = $validInMomEleAr;
+ unset($inlineKidEleAr['del'], $inlineKidEleAr['ins']);
+ }
+ if (
+ isset($ele)
+ && ($act == 1
+ || (isset($validKidsOfMom['#pcdata'])
+ && ($act == 3
+ || $act == 5)))
+ ) {
+ echo '<', $slash, $ele, $attrs, '>';
+ }
+ if (isset($content[0])) {
+ if (
+ strlen(trim($content))
+ && (($queueLength && isset($blockKidEleAr[$eleNow]))
+ || (isset($blockKidEleAr[$mom]) && !$queueLength))
+ ) {
+ echo '', $content, '
';
+ } elseif ($act < 3 || isset($validKidsOfMom['#pcdata'])) {
+ echo $content;
+ } elseif (strpos($content, "\x02\x04")) {
+ foreach (
+ preg_split(
+ '`(\x01\x02[^\x01\x02]+\x02\x01)`', $content, -1, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY) as $m) {
+ echo(
+ substr($m, 0, 2) == "\x01\x02"
+ ? $m
+ : ($act > 4
+ ? preg_replace('`\S`', '', $m)
+ : ''));
+ }
+ } elseif ($act > 4) {
+ echo preg_replace('`\S`', '', $content);
+ }
+ } // End: Finishing
- return str_replace(['&', '<', '>'], ["\x03", "\x04", "\x05"], ('comment' === $n ? "\x01\x02\x04!--$t--\x05\x02\x01" : "\x01\x01\x04$t\x05\x01\x01"));
+ while (!empty($openEleQueue) && ($ele = array_pop($openEleQueue))) {
+ echo '', $ele, '>';
+ }
+ $o = ob_get_contents();
+ ob_end_clean();
+ return $o;
}
-function hl_ent($t)
+/**
+ * Handle comment/CDATA section.
+ *
+ * Filter/sanitize as per $config and disguise special characters.
+ *
+ * @param array $t Array result of preg_replace, with potential comment/CDATA.
+ * @return string Sanitized comment/CDATA with hidden special characters.
+ */
+function hl_commentCdata($t)
{
- // entitity handler
- global $C;
- $t = $t[1];
- static $U = ['quot' => 1, 'amp' => 1, 'lt' => 1, 'gt' => 1];
- static $N = ['fnof' => '402', 'Alpha' => '913', 'Beta' => '914', 'Gamma' => '915', 'Delta' => '916', 'Epsilon' => '917', 'Zeta' => '918', 'Eta' => '919', 'Theta' => '920', 'Iota' => '921', 'Kappa' => '922', 'Lambda' => '923', 'Mu' => '924', 'Nu' => '925', 'Xi' => '926', 'Omicron' => '927', 'Pi' => '928', 'Rho' => '929', 'Sigma' => '931', 'Tau' => '932', 'Upsilon' => '933', 'Phi' => '934', 'Chi' => '935', 'Psi' => '936', 'Omega' => '937', 'alpha' => '945', 'beta' => '946', 'gamma' => '947', 'delta' => '948', 'epsilon' => '949', 'zeta' => '950', 'eta' => '951', 'theta' => '952', 'iota' => '953', 'kappa' => '954', 'lambda' => '955', 'mu' => '956', 'nu' => '957', 'xi' => '958', 'omicron' => '959', 'pi' => '960', 'rho' => '961', 'sigmaf' => '962', 'sigma' => '963', 'tau' => '964', 'upsilon' => '965', 'phi' => '966', 'chi' => '967', 'psi' => '968', 'omega' => '969', 'thetasym' => '977', 'upsih' => '978', 'piv' => '982', 'bull' => '8226', 'hellip' => '8230', 'prime' => '8242', 'Prime' => '8243', 'oline' => '8254', 'frasl' => '8260', 'weierp' => '8472', 'image' => '8465', 'real' => '8476', 'trade' => '8482', 'alefsym' => '8501', 'larr' => '8592', 'uarr' => '8593', 'rarr' => '8594', 'darr' => '8595', 'harr' => '8596', 'crarr' => '8629', 'lArr' => '8656', 'uArr' => '8657', 'rArr' => '8658', 'dArr' => '8659', 'hArr' => '8660', 'forall' => '8704', 'part' => '8706', 'exist' => '8707', 'empty' => '8709', 'nabla' => '8711', 'isin' => '8712', 'notin' => '8713', 'ni' => '8715', 'prod' => '8719', 'sum' => '8721', 'minus' => '8722', 'lowast' => '8727', 'radic' => '8730', 'prop' => '8733', 'infin' => '8734', 'ang' => '8736', 'and' => '8743', 'or' => '8744', 'cap' => '8745', 'cup' => '8746', 'int' => '8747', 'there4' => '8756', 'sim' => '8764', 'cong' => '8773', 'asymp' => '8776', 'ne' => '8800', 'equiv' => '8801', 'le' => '8804', 'ge' => '8805', 'sub' => '8834', 'sup' => '8835', 'nsub' => '8836', 'sube' => '8838', 'supe' => '8839', 'oplus' => '8853', 'otimes' => '8855', 'perp' => '8869', 'sdot' => '8901', 'lceil' => '8968', 'rceil' => '8969', 'lfloor' => '8970', 'rfloor' => '8971', 'lang' => '9001', 'rang' => '9002', 'loz' => '9674', 'spades' => '9824', 'clubs' => '9827', 'hearts' => '9829', 'diams' => '9830', 'apos' => '39', 'OElig' => '338', 'oelig' => '339', 'Scaron' => '352', 'scaron' => '353', 'Yuml' => '376', 'circ' => '710', 'tilde' => '732', 'ensp' => '8194', 'emsp' => '8195', 'thinsp' => '8201', 'zwnj' => '8204', 'zwj' => '8205', 'lrm' => '8206', 'rlm' => '8207', 'ndash' => '8211', 'mdash' => '8212', 'lsquo' => '8216', 'rsquo' => '8217', 'sbquo' => '8218', 'ldquo' => '8220', 'rdquo' => '8221', 'bdquo' => '8222', 'dagger' => '8224', 'Dagger' => '8225', 'permil' => '8240', 'lsaquo' => '8249', 'rsaquo' => '8250', 'euro' => '8364', 'nbsp' => '160', 'iexcl' => '161', 'cent' => '162', 'pound' => '163', 'curren' => '164', 'yen' => '165', 'brvbar' => '166', 'sect' => '167', 'uml' => '168', 'copy' => '169', 'ordf' => '170', 'laquo' => '171', 'not' => '172', 'shy' => '173', 'reg' => '174', 'macr' => '175', 'deg' => '176', 'plusmn' => '177', 'sup2' => '178', 'sup3' => '179', 'acute' => '180', 'micro' => '181', 'para' => '182', 'middot' => '183', 'cedil' => '184', 'sup1' => '185', 'ordm' => '186', 'raquo' => '187', 'frac14' => '188', 'frac12' => '189', 'frac34' => '190', 'iquest' => '191', 'Agrave' => '192', 'Aacute' => '193', 'Acirc' => '194', 'Atilde' => '195', 'Auml' => '196', 'Aring' => '197', 'AElig' => '198', 'Ccedil' => '199', 'Egrave' => '200', 'Eacute' => '201', 'Ecirc' => '202', 'Euml' => '203', 'Igrave' => '204', 'Iacute' => '205', 'Icirc' => '206', 'Iuml' => '207', 'ETH' => '208', 'Ntilde' => '209', 'Ograve' => '210', 'Oacute' => '211', 'Ocirc' => '212', 'Otilde' => '213', 'Ouml' => '214', 'times' => '215', 'Oslash' => '216', 'Ugrave' => '217', 'Uacute' => '218', 'Ucirc' => '219', 'Uuml' => '220', 'Yacute' => '221', 'THORN' => '222', 'szlig' => '223', 'agrave' => '224', 'aacute' => '225', 'acirc' => '226', 'atilde' => '227', 'auml' => '228', 'aring' => '229', 'aelig' => '230', 'ccedil' => '231', 'egrave' => '232', 'eacute' => '233', 'ecirc' => '234', 'euml' => '235', 'igrave' => '236', 'iacute' => '237', 'icirc' => '238', 'iuml' => '239', 'eth' => '240', 'ntilde' => '241', 'ograve' => '242', 'oacute' => '243', 'ocirc' => '244', 'otilde' => '245', 'ouml' => '246', 'divide' => '247', 'oslash' => '248', 'ugrave' => '249', 'uacute' => '250', 'ucirc' => '251', 'uuml' => '252', 'yacute' => '253', 'thorn' => '254', 'yuml' => '255'];
- if ('#' !== $t[0]) {
- return ($C['and_mark'] ? "\x06" : '&') . (isset($U[$t]) ? $t : (isset($N[$t]) ? (!$C['named_entity'] ? '#' . ($C['hexdec_entity'] > 1 ? 'x' . dechex($N[$t]) : $N[$t]) : $t) : 'amp;' . $t)) . ';';
- }
- if (($n = ctype_digit($t = substr($t, 1)) ? (int) $t : hexdec(substr($t, 1))) < 9 || ($n > 13 && $n < 32) || 11 === $n || 12 === $n || ($n > 126 && $n < 160 && 133 !== $n) || ($n > 55295 && ($n < 57344 || ($n > 64975 && $n < 64992) || 65534 === $n || 65535 === $n || $n > 1114111))) {
- return ($C['and_mark'] ? "\x06" : '&') . "amp;#{$t};";
- }
-
- return ($C['and_mark'] ? "\x06" : '&') . '#' . (((ctype_digit($t) && $C['hexdec_entity'] < 2) || !$C['hexdec_entity']) ? $n : 'x' . dechex($n)) . ';';
+ $t = $t[0];
+ global $C;
+ if (!($rule = $C[$type = $t[3] == '-' ? 'comment' : 'cdata'])) {
+ return $t;
+ }
+ if ($rule == 1) {
+ return '';
+ }
+ if ($type == 'comment') {
+ if (substr(($t = preg_replace('`--+`', '-', substr($t, 4, -3))), -1) != ' ') {
+ $t .= $rule == 4 ? '' : ' ';
+ }
+ } else {
+ $t = substr($t, 1, -1);
+ }
+ $t = $rule == 2 ? str_replace(array('&', '<', '>'), array('&', '<', '>'), $t) : $t;
+ return
+ str_replace(
+ array('&', '<', '>'),
+ array("\x03", "\x04", "\x05"),
+ ($type == 'comment' ? "\x01\x02\x04!--$t--\x05\x02\x01" : "\x01\x01\x04$t\x05\x01\x01"));
}
-function hl_prot($p, $c = null)
+/**
+ * Transform deprecated element, with any attribute, into a new element.
+ *
+ *
+ * @param string $ele Deprecated element.
+ * @param string $attrStr Attribute string of element.
+ * @param int $act No transformation if 2.
+ * @return mixed New attribute string (may be empty) or 0.
+ */
+function hl_deprecatedElement(&$ele, &$attrStr, $act=1)
{
- // check URL scheme
- global $C;
- $b = $a = '';
- if (null === $c) {
- $c = 'style';
- $b = $p[1];
- $a = $p[3];
- $p = trim($p[2]);
- }
- $c = isset($C['schemes'][$c]) ? $C['schemes'][$c] : $C['schemes']['*'];
- static $d = 'denied:';
- if (isset($c['!']) && substr($p, 0, 7) !== $d) {
- $p = "$d$p";
- }
- if (isset($c['*']) || !strcspn($p, '#?;') || (substr($p, 0, 7) === $d)) {
- return "{$b}{$p}{$a}";
- } // All ok, frag, query, param
- if (preg_match('`^([^:?[@!$()*,=/\'\]]+?)(:|(58|x3a);|%3a|\\\\0{0,4}3a).`i', $p, $m) && !isset($c[strtolower($m[1])])) { // Denied prot
- return "{$b}{$d}{$p}{$a}";
- }
- if ($C['abs_url']) {
- if (-1 === $C['abs_url'] && 0 === strpos($p, $C['base_url'])) { // Make url rel
- $p = substr($p, strlen($C['base_url']));
- } elseif (empty($m[1])) { // Make URL abs
- if ('//' === substr($p, 0, 2)) {
- $p = substr($C['base_url'], 0, strpos($C['base_url'], ':') + 1) . $p;
- } elseif ('/' === $p[0]) {
- $p = preg_replace('`(^.+?://[^/]+)(.*)`', '$1', $C['base_url']) . $p;
- } elseif (strcspn($p, './')) {
- $p = $C['base_url'] . $p;
- } else {
- preg_match('`^([a-zA-Z\d\-+.]+://[^/]+)(.*)`', $C['base_url'], $m);
- $p = preg_replace('`(?<=/)\./`', '', $m[2] . $p);
- while (preg_match('`(?<=/)([^/]{3,}|[^/.]+?|\.[^/.]|[^/.]\.)/\.\./`', $p)) {
- $p = preg_replace('`(?<=/)([^/]{3,}|[^/.]+?|\.[^/.]|[^/.]\.)/\.\./`', '', $p);
- }
- $p = $m[1] . $p;
- }
- }
- }
+ if ($ele == 'big') {
+ $ele = 'span';
+ return 'font-size: larger;';
+ }
+ if ($ele == 's' || $ele == 'strike') {
+ $ele = 'span';
+ return 'text-decoration: line-through;';
+ }
+ if ($ele == 'tt') {
+ $ele = 'code';
+ return '';
+ }
+ if ($ele == 'center') {
+ $ele = 'div';
+ return 'text-align: center;';
+ }
+ static $fontSizeAr = array('0'=>'xx-small', '1'=>'xx-small', '2'=>'small', '3'=>'medium', '4'=>'large', '5'=>'x-large', '6'=>'xx-large', '7'=>'300%', '-1'=>'smaller', '-2'=>'60%', '+1'=>'larger', '+2'=>'150%', '+3'=>'200%', '+4'=>'300%');
+ if ($ele == 'font') {
+ $attrStrNew = '';
+ while (preg_match('`(^|\s)(color|size)\s*=\s*(\'|")?(.+?)(\\3|\s|$)`i', $attrStr, $m)) {
+ $attrStr = str_replace($m[0], ' ', $attrStr) ;
+ $attrStrNew .=
+ strtolower($m[2]) == 'color'
+ ? ' color: '. str_replace(array('"', ';', ':'), '\'', trim($m[4])). ';'
+ : (isset($fontSizeAr[($m = trim($m[4]))])
+ ? ' font-size: '. $fontSizeAr[$m]. ';'
+ : '');
+ }
+ while (
+ preg_match('`(^|\s)face\s*=\s*(\'|")?([^=]+?)\\2`i', $attrStr, $m)
+ || preg_match('`(^|\s)face\s*=(\s*)(\S+)`i', $attrStr, $m)
+ ) {
+ $attrStr = str_replace($m[0], ' ', $attrStr) ;
+ $attrStrNew .= ' font-family: '. str_replace(array('"', ';', ':'), '\'', trim($m[3])). ';';
+ }
+ $ele = 'span';
+ return ltrim(str_replace('<', '', $attrStrNew));
+ }
+ if ($ele == 'acronym') {
+ $ele = 'abbr';
+ return '';
+ }
+ if ($ele == 'dir') {
+ $ele = 'ul';
+ return '';
+ }
+ if ($act == 2) {
+ $ele = 0;
+ return 0;
+ }
+ return '';
+}
- return "{$b}{$p}{$a}";
+/**
+ * Handle entity.
+ *
+ * As needed, convert to named/hexadecimal form, or neutralize '&' as '&'.
+ *
+ * @param array $t Array result of preg_replace, with potential entity.
+ * @return string Neutralized or converted entity.
+ */
+function hl_entity($t)
+{
+ global $C;
+ $t = $t[1];
+ static $reservedEntAr = array('amp'=>1, 'AMP'=>1, 'gt'=>1, 'GT'=>1, 'lt'=>1, 'LT'=>1, 'quot'=>1, 'QUOT'=>1);
+ static $commonEntNameAr = array('Aacute'=>'193', 'aacute'=>'225', 'Acirc'=>'194', 'acirc'=>'226', 'acute'=>'180', 'AElig'=>'198', 'aelig'=>'230', 'Agrave'=>'192', 'agrave'=>'224', 'alefsym'=>'8501', 'Alpha'=>'913', 'alpha'=>'945', 'and'=>'8743', 'ang'=>'8736', 'apos'=>'39', 'Aring'=>'197', 'aring'=>'229', 'asymp'=>'8776', 'Atilde'=>'195', 'atilde'=>'227', 'Auml'=>'196', 'auml'=>'228', 'bdquo'=>'8222', 'Beta'=>'914', 'beta'=>'946', 'brvbar'=>'166', 'bull'=>'8226', 'cap'=>'8745', 'Ccedil'=>'199', 'ccedil'=>'231', 'cedil'=>'184', 'cent'=>'162', 'Chi'=>'935', 'chi'=>'967', 'circ'=>'710', 'clubs'=>'9827', 'cong'=>'8773', 'copy'=>'169', 'crarr'=>'8629', 'cup'=>'8746', 'curren'=>'164', 'dagger'=>'8224', 'Dagger'=>'8225', 'darr'=>'8595', 'dArr'=>'8659', 'deg'=>'176', 'Delta'=>'916', 'delta'=>'948', 'diams'=>'9830', 'divide'=>'247', 'Eacute'=>'201', 'eacute'=>'233', 'Ecirc'=>'202', 'ecirc'=>'234', 'Egrave'=>'200', 'egrave'=>'232', 'empty'=>'8709', 'emsp'=>'8195', 'ensp'=>'8194', 'Epsilon'=>'917', 'epsilon'=>'949', 'equiv'=>'8801', 'Eta'=>'919', 'eta'=>'951', 'ETH'=>'208', 'eth'=>'240', 'Euml'=>'203', 'euml'=>'235', 'euro'=>'8364', 'exist'=>'8707', 'fnof'=>'402', 'forall'=>'8704', 'frac12'=>'189', 'frac14'=>'188', 'frac34'=>'190', 'frasl'=>'8260', 'Gamma'=>'915', 'gamma'=>'947', 'ge'=>'8805', 'harr'=>'8596', 'hArr'=>'8660', 'hearts'=>'9829', 'hellip'=>'8230', 'Iacute'=>'205', 'iacute'=>'237', 'Icirc'=>'206', 'icirc'=>'238', 'iexcl'=>'161', 'Igrave'=>'204', 'igrave'=>'236', 'image'=>'8465', 'infin'=>'8734', 'int'=>'8747', 'Iota'=>'921', 'iota'=>'953', 'iquest'=>'191', 'isin'=>'8712', 'Iuml'=>'207', 'iuml'=>'239', 'Kappa'=>'922', 'kappa'=>'954', 'Lambda'=>'923', 'lambda'=>'955', 'laquo'=>'171', 'larr'=>'8592', 'lArr'=>'8656', 'lceil'=>'8968', 'ldquo'=>'8220', 'le'=>'8804', 'lfloor'=>'8970', 'lowast'=>'8727', 'loz'=>'9674', 'lrm'=>'8206', 'lsaquo'=>'8249', 'lsquo'=>'8216', 'macr'=>'175', 'mdash'=>'8212', 'micro'=>'181', 'middot'=>'183', 'minus'=>'8722', 'Mu'=>'924', 'mu'=>'956', 'nabla'=>'8711', 'nbsp'=>'160', 'ndash'=>'8211', 'ne'=>'8800', 'ni'=>'8715', 'not'=>'172', 'notin'=>'8713', 'nsub'=>'8836', 'Ntilde'=>'209', 'ntilde'=>'241', 'Nu'=>'925', 'nu'=>'957', 'Oacute'=>'211', 'oacute'=>'243', 'Ocirc'=>'212', 'ocirc'=>'244', 'OElig'=>'338', 'oelig'=>'339', 'Ograve'=>'210', 'ograve'=>'242', 'oline'=>'8254', 'Omega'=>'937', 'omega'=>'969', 'Omicron'=>'927', 'omicron'=>'959', 'oplus'=>'8853', 'or'=>'8744', 'ordf'=>'170', 'ordm'=>'186', 'Oslash'=>'216', 'oslash'=>'248', 'Otilde'=>'213', 'otilde'=>'245', 'otimes'=>'8855', 'Ouml'=>'214', 'ouml'=>'246', 'para'=>'182', 'part'=>'8706', 'permil'=>'8240', 'perp'=>'8869', 'Phi'=>'934', 'phi'=>'966', 'Pi'=>'928', 'pi'=>'960', 'piv'=>'982', 'plusmn'=>'177', 'pound'=>'163', 'prime'=>'8242', 'Prime'=>'8243', 'prod'=>'8719', 'prop'=>'8733', 'Psi'=>'936', 'psi'=>'968', 'radic'=>'8730', 'raquo'=>'187', 'rarr'=>'8594', 'rArr'=>'8658', 'rceil'=>'8969', 'rdquo'=>'8221', 'real'=>'8476', 'reg'=>'174', 'rfloor'=>'8971', 'Rho'=>'929', 'rho'=>'961', 'rlm'=>'8207', 'rsaquo'=>'8250', 'rsquo'=>'8217', 'sbquo'=>'8218', 'Scaron'=>'352', 'scaron'=>'353', 'sdot'=>'8901', 'sect'=>'167', 'shy'=>'173', 'Sigma'=>'931', 'sigma'=>'963', 'sigmaf'=>'962', 'sim'=>'8764', 'spades'=>'9824', 'sub'=>'8834', 'sube'=>'8838', 'sum'=>'8721', 'sup'=>'8835', 'sup1'=>'185', 'sup2'=>'178', 'sup3'=>'179', 'supe'=>'8839', 'szlig'=>'223', 'Tau'=>'932', 'tau'=>'964', 'there4'=>'8756', 'Theta'=>'920', 'theta'=>'952', 'thetasym'=>'977', 'thinsp'=>'8201', 'THORN'=>'222', 'thorn'=>'254', 'tilde'=>'732', 'times'=>'215', 'trade'=>'8482', 'Uacute'=>'218', 'uacute'=>'250', 'uarr'=>'8593', 'uArr'=>'8657', 'Ucirc'=>'219', 'ucirc'=>'251', 'Ugrave'=>'217', 'ugrave'=>'249', 'uml'=>'168', 'upsih'=>'978', 'Upsilon'=>'933', 'upsilon'=>'965', 'Uuml'=>'220', 'uuml'=>'252', 'weierp'=>'8472', 'Xi'=>'926', 'xi'=>'958', 'Yacute'=>'221', 'yacute'=>'253', 'yen'=>'165', 'yuml'=>'255', 'Yuml'=>'376', 'Zeta'=>'918', 'zeta'=>'950', 'zwj'=>'8205', 'zwnj'=>'8204');
+ static $rareEntNameAr = array('Abreve'=>'258', 'abreve'=>'259', 'ac'=>'8766', 'acd'=>'8767', 'Acy'=>'1040', 'acy'=>'1072', 'af'=>'8289', 'Afr'=>'120068', 'afr'=>'120094', 'aleph'=>'8501', 'Amacr'=>'256', 'amacr'=>'257', 'amalg'=>'10815', 'And'=>'10835', 'andand'=>'10837', 'andd'=>'10844', 'andslope'=>'10840', 'andv'=>'10842', 'ange'=>'10660', 'angle'=>'8736', 'angmsd'=>'8737', 'angmsdaa'=>'10664', 'angmsdab'=>'10665', 'angmsdac'=>'10666', 'angmsdad'=>'10667', 'angmsdae'=>'10668', 'angmsdaf'=>'10669', 'angmsdag'=>'10670', 'angmsdah'=>'10671', 'angrt'=>'8735', 'angrtvb'=>'8894', 'angrtvbd'=>'10653', 'angsph'=>'8738', 'angst'=>'197', 'angzarr'=>'9084', 'Aogon'=>'260', 'aogon'=>'261', 'Aopf'=>'120120', 'aopf'=>'120146', 'ap'=>'8776', 'apacir'=>'10863', 'apE'=>'10864', 'ape'=>'8778', 'apid'=>'8779', 'ApplyFunction'=>'8289', 'approx'=>'8776', 'approxeq'=>'8778', 'Ascr'=>'119964', 'ascr'=>'119990', 'Assign'=>'8788', 'ast'=>'42', 'asympeq'=>'8781', 'awconint'=>'8755', 'awint'=>'10769', 'backcong'=>'8780', 'backepsilon'=>'1014', 'backprime'=>'8245', 'backsim'=>'8765', 'backsimeq'=>'8909', 'Backslash'=>'8726', 'Barv'=>'10983', 'barvee'=>'8893', 'barwed'=>'8965', 'Barwed'=>'8966', 'barwedge'=>'8965', 'bbrk'=>'9141', 'bbrktbrk'=>'9142', 'bcong'=>'8780', 'Bcy'=>'1041', 'bcy'=>'1073', 'becaus'=>'8757', 'because'=>'8757', 'Because'=>'8757', 'bemptyv'=>'10672', 'bepsi'=>'1014', 'bernou'=>'8492', 'Bernoullis'=>'8492', 'beth'=>'8502', 'between'=>'8812', 'Bfr'=>'120069', 'bfr'=>'120095', 'bigcap'=>'8898', 'bigcirc'=>'9711', 'bigcup'=>'8899', 'bigodot'=>'10752', 'bigoplus'=>'10753', 'bigotimes'=>'10754', 'bigsqcup'=>'10758', 'bigstar'=>'9733', 'bigtriangledown'=>'9661', 'bigtriangleup'=>'9651', 'biguplus'=>'10756', 'bigvee'=>'8897', 'bigwedge'=>'8896', 'bkarow'=>'10509', 'blacklozenge'=>'10731', 'blacksquare'=>'9642', 'blacktriangle'=>'9652', 'blacktriangledown'=>'9662', 'blacktriangleleft'=>'9666', 'blacktriangleright'=>'9656', 'blank'=>'9251', 'blk12'=>'9618', 'blk14'=>'9617', 'blk34'=>'9619', 'block'=>'9608', 'bNot'=>'10989', 'bnot'=>'8976', 'Bopf'=>'120121', 'bopf'=>'120147', 'bot'=>'8869', 'bottom'=>'8869', 'bowtie'=>'8904', 'boxbox'=>'10697', 'boxdl'=>'9488', 'boxdL'=>'9557', 'boxDl'=>'9558', 'boxDL'=>'9559', 'boxdr'=>'9484', 'boxdR'=>'9554', 'boxDr'=>'9555', 'boxDR'=>'9556', 'boxh'=>'9472', 'boxH'=>'9552', 'boxhd'=>'9516', 'boxHd'=>'9572', 'boxhD'=>'9573', 'boxHD'=>'9574', 'boxhu'=>'9524', 'boxHu'=>'9575', 'boxhU'=>'9576', 'boxHU'=>'9577', 'boxminus'=>'8863', 'boxplus'=>'8862', 'boxtimes'=>'8864', 'boxul'=>'9496', 'boxuL'=>'9563', 'boxUl'=>'9564', 'boxUL'=>'9565', 'boxur'=>'9492', 'boxuR'=>'9560', 'boxUr'=>'9561', 'boxUR'=>'9562', 'boxv'=>'9474', 'boxV'=>'9553', 'boxvh'=>'9532', 'boxvH'=>'9578', 'boxVh'=>'9579', 'boxVH'=>'9580', 'boxvl'=>'9508', 'boxvL'=>'9569', 'boxVl'=>'9570', 'boxVL'=>'9571', 'boxvr'=>'9500', 'boxvR'=>'9566', 'boxVr'=>'9567', 'boxVR'=>'9568', 'bprime'=>'8245', 'breve'=>'728', 'Breve'=>'728', 'bscr'=>'119991', 'Bscr'=>'8492', 'bsemi'=>'8271', 'bsim'=>'8765', 'bsime'=>'8909', 'bsol'=>'92', 'bsolb'=>'10693', 'bsolhsub'=>'10184', 'bullet'=>'8226', 'bump'=>'8782', 'bumpE'=>'10926', 'bumpe'=>'8783', 'Bumpeq'=>'8782', 'bumpeq'=>'8783', 'Cacute'=>'262', 'cacute'=>'263', 'Cap'=>'8914', 'capand'=>'10820', 'capbrcup'=>'10825', 'capcap'=>'10827', 'capcup'=>'10823', 'capdot'=>'10816', 'CapitalDifferentialD'=>'8517', 'caret'=>'8257', 'caron'=>'711', 'Cayleys'=>'8493', 'ccaps'=>'10829', 'Ccaron'=>'268', 'ccaron'=>'269', 'Ccirc'=>'264', 'ccirc'=>'265', 'Cconint'=>'8752', 'ccups'=>'10828', 'ccupssm'=>'10832', 'Cdot'=>'266', 'cdot'=>'267', 'Cedilla'=>'184', 'cemptyv'=>'10674', 'centerdot'=>'183', 'CenterDot'=>'183', 'cfr'=>'120096', 'Cfr'=>'8493', 'CHcy'=>'1063', 'chcy'=>'1095', 'check'=>'10003', 'checkmark'=>'10003', 'cir'=>'9675', 'circeq'=>'8791', 'circlearrowleft'=>'8634', 'circlearrowright'=>'8635', 'circledast'=>'8859', 'circledcirc'=>'8858', 'circleddash'=>'8861', 'CircleDot'=>'8857', 'circledR'=>'174', 'circledS'=>'9416', 'CircleMinus'=>'8854', 'CirclePlus'=>'8853', 'CircleTimes'=>'8855', 'cirE'=>'10691', 'cire'=>'8791', 'cirfnint'=>'10768', 'cirmid'=>'10991', 'cirscir'=>'10690', 'ClockwiseContourIntegral'=>'8754', 'CloseCurlyDoubleQuote'=>'8221', 'CloseCurlyQuote'=>'8217', 'clubsuit'=>'9827', 'colon'=>'58', 'Colon'=>'8759', 'Colone'=>'10868', 'colone'=>'8788', 'coloneq'=>'8788', 'comma'=>'44', 'commat'=>'64', 'comp'=>'8705', 'compfn'=>'8728', 'complement'=>'8705', 'complexes'=>'8450', 'congdot'=>'10861', 'Congruent'=>'8801', 'conint'=>'8750', 'Conint'=>'8751', 'ContourIntegral'=>'8750', 'copf'=>'120148', 'Copf'=>'8450', 'coprod'=>'8720', 'Coproduct'=>'8720', 'COPY'=>'169', 'copysr'=>'8471', 'CounterClockwiseContourIntegral'=>'8755', 'cross'=>'10007', 'Cross'=>'10799', 'Cscr'=>'119966', 'cscr'=>'119992', 'csub'=>'10959', 'csube'=>'10961', 'csup'=>'10960', 'csupe'=>'10962', 'ctdot'=>'8943', 'cudarrl'=>'10552', 'cudarrr'=>'10549', 'cuepr'=>'8926', 'cuesc'=>'8927', 'cularr'=>'8630', 'cularrp'=>'10557', 'Cup'=>'8915', 'cupbrcap'=>'10824', 'cupcap'=>'10822', 'CupCap'=>'8781', 'cupcup'=>'10826', 'cupdot'=>'8845', 'cupor'=>'10821', 'curarr'=>'8631', 'curarrm'=>'10556', 'curlyeqprec'=>'8926', 'curlyeqsucc'=>'8927', 'curlyvee'=>'8910', 'curlywedge'=>'8911', 'curvearrowleft'=>'8630', 'curvearrowright'=>'8631', 'cuvee'=>'8910', 'cuwed'=>'8911', 'cwconint'=>'8754', 'cwint'=>'8753', 'cylcty'=>'9005', 'daleth'=>'8504', 'Darr'=>'8609', 'dash'=>'8208', 'Dashv'=>'10980', 'dashv'=>'8867', 'dbkarow'=>'10511', 'dblac'=>'733', 'Dcaron'=>'270', 'dcaron'=>'271', 'Dcy'=>'1044', 'dcy'=>'1076', 'DD'=>'8517', 'dd'=>'8518', 'ddagger'=>'8225', 'ddarr'=>'8650', 'DDotrahd'=>'10513', 'ddotseq'=>'10871', 'Del'=>'8711', 'demptyv'=>'10673', 'dfisht'=>'10623', 'Dfr'=>'120071', 'dfr'=>'120097', 'dHar'=>'10597', 'dharl'=>'8643', 'dharr'=>'8642', 'DiacriticalAcute'=>'180', 'DiacriticalDot'=>'729', 'DiacriticalDoubleAcute'=>'733', 'DiacriticalGrave'=>'96', 'DiacriticalTilde'=>'732', 'diam'=>'8900', 'diamond'=>'8900', 'Diamond'=>'8900', 'diamondsuit'=>'9830', 'die'=>'168', 'DifferentialD'=>'8518', 'digamma'=>'989', 'disin'=>'8946', 'div'=>'247', 'divideontimes'=>'8903', 'divonx'=>'8903', 'DJcy'=>'1026', 'djcy'=>'1106', 'dlcorn'=>'8990', 'dlcrop'=>'8973', 'dollar'=>'36', 'Dopf'=>'120123', 'dopf'=>'120149', 'Dot'=>'168', 'dot'=>'729', 'DotDot'=>'8412', 'doteq'=>'8784', 'doteqdot'=>'8785', 'DotEqual'=>'8784', 'dotminus'=>'8760', 'dotplus'=>'8724', 'dotsquare'=>'8865', 'doublebarwedge'=>'8966', 'DoubleContourIntegral'=>'8751', 'DoubleDot'=>'168', 'DoubleDownArrow'=>'8659', 'DoubleLeftArrow'=>'8656', 'DoubleLeftRightArrow'=>'8660', 'DoubleLeftTee'=>'10980', 'DoubleLongLeftArrow'=>'10232', 'DoubleLongLeftRightArrow'=>'10234', 'DoubleLongRightArrow'=>'10233', 'DoubleRightArrow'=>'8658', 'DoubleRightTee'=>'8872', 'DoubleUpArrow'=>'8657', 'DoubleUpDownArrow'=>'8661', 'DoubleVerticalBar'=>'8741', 'downarrow'=>'8595', 'DownArrow'=>'8595', 'Downarrow'=>'8659', 'DownArrowBar'=>'10515', 'DownArrowUpArrow'=>'8693', 'DownBreve'=>'785', 'downdownarrows'=>'8650', 'downharpoonleft'=>'8643', 'downharpoonright'=>'8642', 'DownLeftRightVector'=>'10576', 'DownLeftTeeVector'=>'10590', 'DownLeftVector'=>'8637', 'DownLeftVectorBar'=>'10582', 'DownRightTeeVector'=>'10591', 'DownRightVector'=>'8641', 'DownRightVectorBar'=>'10583', 'DownTee'=>'8868', 'DownTeeArrow'=>'8615', 'drbkarow'=>'10512', 'drcorn'=>'8991', 'drcrop'=>'8972', 'Dscr'=>'119967', 'dscr'=>'119993', 'DScy'=>'1029', 'dscy'=>'1109', 'dsol'=>'10742', 'Dstrok'=>'272', 'dstrok'=>'273', 'dtdot'=>'8945', 'dtri'=>'9663', 'dtrif'=>'9662', 'duarr'=>'8693', 'duhar'=>'10607', 'dwangle'=>'10662', 'DZcy'=>'1039', 'dzcy'=>'1119', 'dzigrarr'=>'10239', 'easter'=>'10862', 'Ecaron'=>'282', 'ecaron'=>'283', 'ecir'=>'8790', 'ecolon'=>'8789', 'Ecy'=>'1069', 'ecy'=>'1101', 'eDDot'=>'10871', 'Edot'=>'278', 'edot'=>'279', 'eDot'=>'8785', 'ee'=>'8519', 'efDot'=>'8786', 'Efr'=>'120072', 'efr'=>'120098', 'eg'=>'10906', 'egs'=>'10902', 'egsdot'=>'10904', 'el'=>'10905', 'Element'=>'8712', 'elinters'=>'9191', 'ell'=>'8467', 'els'=>'10901', 'elsdot'=>'10903', 'Emacr'=>'274', 'emacr'=>'275', 'emptyset'=>'8709', 'EmptySmallSquare'=>'9723', 'emptyv'=>'8709', 'EmptyVerySmallSquare'=>'9643', 'emsp13'=>'8196', 'emsp14'=>'8197', 'ENG'=>'330', 'eng'=>'331', 'Eogon'=>'280', 'eogon'=>'281', 'Eopf'=>'120124', 'eopf'=>'120150', 'epar'=>'8917', 'eparsl'=>'10723', 'eplus'=>'10865', 'epsi'=>'949', 'epsiv'=>'1013', 'eqcirc'=>'8790', 'eqcolon'=>'8789', 'eqsim'=>'8770', 'eqslantgtr'=>'10902', 'eqslantless'=>'10901', 'Equal'=>'10869', 'equals'=>'61', 'EqualTilde'=>'8770', 'equest'=>'8799', 'Equilibrium'=>'8652', 'equivDD'=>'10872', 'eqvparsl'=>'10725', 'erarr'=>'10609', 'erDot'=>'8787', 'escr'=>'8495', 'Escr'=>'8496', 'esdot'=>'8784', 'Esim'=>'10867', 'esim'=>'8770', 'excl'=>'33', 'Exists'=>'8707', 'expectation'=>'8496', 'exponentiale'=>'8519', 'ExponentialE'=>'8519', 'fallingdotseq'=>'8786', 'Fcy'=>'1060', 'fcy'=>'1092', 'female'=>'9792', 'ffilig'=>'64259', 'fflig'=>'64256', 'ffllig'=>'64260', 'Ffr'=>'120073', 'ffr'=>'120099', 'filig'=>'64257', 'FilledSmallSquare'=>'9724', 'FilledVerySmallSquare'=>'9642', 'flat'=>'9837', 'fllig'=>'64258', 'fltns'=>'9649', 'Fopf'=>'120125', 'fopf'=>'120151', 'ForAll'=>'8704', 'fork'=>'8916', 'forkv'=>'10969', 'Fouriertrf'=>'8497', 'fpartint'=>'10765', 'frac13'=>'8531', 'frac15'=>'8533', 'frac16'=>'8537', 'frac18'=>'8539', 'frac23'=>'8532', 'frac25'=>'8534', 'frac35'=>'8535', 'frac38'=>'8540', 'frac45'=>'8536', 'frac56'=>'8538', 'frac58'=>'8541', 'frac78'=>'8542', 'frown'=>'8994', 'fscr'=>'119995', 'Fscr'=>'8497', 'gacute'=>'501', 'Gammad'=>'988', 'gammad'=>'989', 'gap'=>'10886', 'Gbreve'=>'286', 'gbreve'=>'287', 'Gcedil'=>'290', 'Gcirc'=>'284', 'gcirc'=>'285', 'Gcy'=>'1043', 'gcy'=>'1075', 'Gdot'=>'288', 'gdot'=>'289', 'gE'=>'8807', 'gEl'=>'10892', 'gel'=>'8923', 'geq'=>'8805', 'geqq'=>'8807', 'geqslant'=>'10878', 'ges'=>'10878', 'gescc'=>'10921', 'gesdot'=>'10880', 'gesdoto'=>'10882', 'gesdotol'=>'10884', 'gesles'=>'10900', 'Gfr'=>'120074', 'gfr'=>'120100', 'gg'=>'8811', 'Gg'=>'8921', 'ggg'=>'8921', 'gimel'=>'8503', 'GJcy'=>'1027', 'gjcy'=>'1107', 'gl'=>'8823', 'gla'=>'10917', 'glE'=>'10898', 'glj'=>'10916', 'gnap'=>'10890', 'gnapprox'=>'10890', 'gne'=>'10888', 'gnE'=>'8809', 'gneq'=>'10888', 'gneqq'=>'8809', 'gnsim'=>'8935', 'Gopf'=>'120126', 'gopf'=>'120152', 'grave'=>'96', 'GreaterEqual'=>'8805', 'GreaterEqualLess'=>'8923', 'GreaterFullEqual'=>'8807', 'GreaterGreater'=>'10914', 'GreaterLess'=>'8823', 'GreaterSlantEqual'=>'10878', 'GreaterTilde'=>'8819', 'Gscr'=>'119970', 'gscr'=>'8458', 'gsim'=>'8819', 'gsime'=>'10894', 'gsiml'=>'10896', 'Gt'=>'8811', 'gtcc'=>'10919', 'gtcir'=>'10874', 'gtdot'=>'8919', 'gtlPar'=>'10645', 'gtquest'=>'10876', 'gtrapprox'=>'10886', 'gtrarr'=>'10616', 'gtrdot'=>'8919', 'gtreqless'=>'8923', 'gtreqqless'=>'10892', 'gtrless'=>'8823', 'gtrsim'=>'8819', 'Hacek'=>'711', 'hairsp'=>'8202', 'half'=>'189', 'hamilt'=>'8459', 'HARDcy'=>'1066', 'hardcy'=>'1098', 'harrcir'=>'10568', 'harrw'=>'8621', 'Hat'=>'94', 'hbar'=>'8463', 'Hcirc'=>'292', 'hcirc'=>'293', 'heartsuit'=>'9829', 'hercon'=>'8889', 'hfr'=>'120101', 'Hfr'=>'8460', 'HilbertSpace'=>'8459', 'hksearow'=>'10533', 'hkswarow'=>'10534', 'hoarr'=>'8703', 'homtht'=>'8763', 'hookleftarrow'=>'8617', 'hookrightarrow'=>'8618', 'hopf'=>'120153', 'Hopf'=>'8461', 'horbar'=>'8213', 'HorizontalLine'=>'9472', 'hscr'=>'119997', 'Hscr'=>'8459', 'hslash'=>'8463', 'Hstrok'=>'294', 'hstrok'=>'295', 'HumpDownHump'=>'8782', 'HumpEqual'=>'8783', 'hybull'=>'8259', 'hyphen'=>'8208', 'ic'=>'8291', 'Icy'=>'1048', 'icy'=>'1080', 'Idot'=>'304', 'IEcy'=>'1045', 'iecy'=>'1077', 'iff'=>'8660', 'ifr'=>'120102', 'Ifr'=>'8465', 'ii'=>'8520', 'iiiint'=>'10764', 'iiint'=>'8749', 'iinfin'=>'10716', 'iiota'=>'8489', 'IJlig'=>'306', 'ijlig'=>'307', 'Im'=>'8465', 'Imacr'=>'298', 'imacr'=>'299', 'ImaginaryI'=>'8520', 'imagline'=>'8464', 'imagpart'=>'8465', 'imath'=>'305', 'imof'=>'8887', 'imped'=>'437', 'Implies'=>'8658', 'in'=>'8712', 'incare'=>'8453', 'infintie'=>'10717', 'inodot'=>'305', 'Int'=>'8748', 'intcal'=>'8890', 'integers'=>'8484', 'Integral'=>'8747', 'intercal'=>'8890', 'Intersection'=>'8898', 'intlarhk'=>'10775', 'intprod'=>'10812', 'InvisibleComma'=>'8291', 'InvisibleTimes'=>'8290', 'IOcy'=>'1025', 'iocy'=>'1105', 'Iogon'=>'302', 'iogon'=>'303', 'Iopf'=>'120128', 'iopf'=>'120154', 'iprod'=>'10812', 'iscr'=>'119998', 'Iscr'=>'8464', 'isindot'=>'8949', 'isinE'=>'8953', 'isins'=>'8948', 'isinsv'=>'8947', 'isinv'=>'8712', 'it'=>'8290', 'Itilde'=>'296', 'itilde'=>'297', 'Iukcy'=>'1030', 'iukcy'=>'1110', 'Jcirc'=>'308', 'jcirc'=>'309', 'Jcy'=>'1049', 'jcy'=>'1081', 'Jfr'=>'120077', 'jfr'=>'120103', 'jmath'=>'567', 'Jopf'=>'120129', 'jopf'=>'120155', 'Jscr'=>'119973', 'jscr'=>'119999', 'Jsercy'=>'1032', 'jsercy'=>'1112', 'Jukcy'=>'1028', 'jukcy'=>'1108', 'kappav'=>'1008', 'Kcedil'=>'310', 'kcedil'=>'311', 'Kcy'=>'1050', 'kcy'=>'1082', 'Kfr'=>'120078', 'kfr'=>'120104', 'kgreen'=>'312', 'KHcy'=>'1061', 'khcy'=>'1093', 'KJcy'=>'1036', 'kjcy'=>'1116', 'Kopf'=>'120130', 'kopf'=>'120156', 'Kscr'=>'119974', 'kscr'=>'120000', 'lAarr'=>'8666', 'Lacute'=>'313', 'lacute'=>'314', 'laemptyv'=>'10676', 'lagran'=>'8466', 'lang'=>'10216', 'Lang'=>'10218', 'langd'=>'10641', 'langle'=>'10216', 'lap'=>'10885', 'Laplacetrf'=>'8466', 'Larr'=>'8606', 'larrb'=>'8676', 'larrbfs'=>'10527', 'larrfs'=>'10525', 'larrhk'=>'8617', 'larrlp'=>'8619', 'larrpl'=>'10553', 'larrsim'=>'10611', 'larrtl'=>'8610', 'lat'=>'10923', 'latail'=>'10521', 'lAtail'=>'10523', 'late'=>'10925', 'lbarr'=>'10508', 'lBarr'=>'10510', 'lbbrk'=>'10098', 'lbrace'=>'123', 'lbrack'=>'91', 'lbrke'=>'10635', 'lbrksld'=>'10639', 'lbrkslu'=>'10637', 'Lcaron'=>'317', 'lcaron'=>'318', 'Lcedil'=>'315', 'lcedil'=>'316', 'lcub'=>'123', 'Lcy'=>'1051', 'lcy'=>'1083', 'ldca'=>'10550', 'ldquor'=>'8222', 'ldrdhar'=>'10599', 'ldrushar'=>'10571', 'ldsh'=>'8626', 'lE'=>'8806', 'LeftAngleBracket'=>'10216', 'leftarrow'=>'8592', 'LeftArrow'=>'8592', 'Leftarrow'=>'8656', 'LeftArrowBar'=>'8676', 'LeftArrowRightArrow'=>'8646', 'leftarrowtail'=>'8610', 'LeftCeiling'=>'8968', 'LeftDoubleBracket'=>'10214', 'LeftDownTeeVector'=>'10593', 'LeftDownVector'=>'8643', 'LeftDownVectorBar'=>'10585', 'LeftFloor'=>'8970', 'leftharpoondown'=>'8637', 'leftharpoonup'=>'8636', 'leftleftarrows'=>'8647', 'leftrightarrow'=>'8596', 'LeftRightArrow'=>'8596', 'Leftrightarrow'=>'8660', 'leftrightarrows'=>'8646', 'leftrightharpoons'=>'8651', 'leftrightsquigarrow'=>'8621', 'LeftRightVector'=>'10574', 'LeftTee'=>'8867', 'LeftTeeArrow'=>'8612', 'LeftTeeVector'=>'10586', 'leftthreetimes'=>'8907', 'LeftTriangle'=>'8882', 'LeftTriangleBar'=>'10703', 'LeftTriangleEqual'=>'8884', 'LeftUpDownVector'=>'10577', 'LeftUpTeeVector'=>'10592', 'LeftUpVector'=>'8639', 'LeftUpVectorBar'=>'10584', 'LeftVector'=>'8636', 'LeftVectorBar'=>'10578', 'lEg'=>'10891', 'leg'=>'8922', 'leq'=>'8804', 'leqq'=>'8806', 'leqslant'=>'10877', 'les'=>'10877', 'lescc'=>'10920', 'lesdot'=>'10879', 'lesdoto'=>'10881', 'lesdotor'=>'10883', 'lesges'=>'10899', 'lessapprox'=>'10885', 'lessdot'=>'8918', 'lesseqgtr'=>'8922', 'lesseqqgtr'=>'10891', 'LessEqualGreater'=>'8922', 'LessFullEqual'=>'8806', 'LessGreater'=>'8822', 'lessgtr'=>'8822', 'LessLess'=>'10913', 'lesssim'=>'8818', 'LessSlantEqual'=>'10877', 'LessTilde'=>'8818', 'lfisht'=>'10620', 'Lfr'=>'120079', 'lfr'=>'120105', 'lg'=>'8822', 'lgE'=>'10897', 'lHar'=>'10594', 'lhard'=>'8637', 'lharu'=>'8636', 'lharul'=>'10602', 'lhblk'=>'9604', 'LJcy'=>'1033', 'ljcy'=>'1113', 'll'=>'8810', 'Ll'=>'8920', 'llarr'=>'8647', 'llcorner'=>'8990', 'Lleftarrow'=>'8666', 'llhard'=>'10603', 'lltri'=>'9722', 'Lmidot'=>'319', 'lmidot'=>'320', 'lmoust'=>'9136', 'lmoustache'=>'9136', 'lnap'=>'10889', 'lnapprox'=>'10889', 'lne'=>'10887', 'lnE'=>'8808', 'lneq'=>'10887', 'lneqq'=>'8808', 'lnsim'=>'8934', 'loang'=>'10220', 'loarr'=>'8701', 'lobrk'=>'10214', 'longleftarrow'=>'10229', 'LongLeftArrow'=>'10229', 'Longleftarrow'=>'10232', 'longleftrightarrow'=>'10231', 'LongLeftRightArrow'=>'10231', 'Longleftrightarrow'=>'10234', 'longmapsto'=>'10236', 'longrightarrow'=>'10230', 'LongRightArrow'=>'10230', 'Longrightarrow'=>'10233', 'looparrowleft'=>'8619', 'looparrowright'=>'8620', 'lopar'=>'10629', 'Lopf'=>'120131', 'lopf'=>'120157', 'loplus'=>'10797', 'lotimes'=>'10804', 'lowbar'=>'95', 'LowerLeftArrow'=>'8601', 'LowerRightArrow'=>'8600', 'lozenge'=>'9674', 'lozf'=>'10731', 'lpar'=>'40', 'lparlt'=>'10643', 'lrarr'=>'8646', 'lrcorner'=>'8991', 'lrhar'=>'8651', 'lrhard'=>'10605', 'lrtri'=>'8895', 'lscr'=>'120001', 'Lscr'=>'8466', 'lsh'=>'8624', 'Lsh'=>'8624', 'lsim'=>'8818', 'lsime'=>'10893', 'lsimg'=>'10895', 'lsqb'=>'91', 'lsquor'=>'8218', 'Lstrok'=>'321', 'lstrok'=>'322', 'Lt'=>'8810', 'ltcc'=>'10918', 'ltcir'=>'10873', 'ltdot'=>'8918', 'lthree'=>'8907', 'ltimes'=>'8905', 'ltlarr'=>'10614', 'ltquest'=>'10875', 'ltri'=>'9667', 'ltrie'=>'8884', 'ltrif'=>'9666', 'ltrPar'=>'10646', 'lurdshar'=>'10570', 'luruhar'=>'10598', 'male'=>'9794', 'malt'=>'10016', 'maltese'=>'10016', 'Map'=>'10501', 'map'=>'8614', 'mapsto'=>'8614', 'mapstodown'=>'8615', 'mapstoleft'=>'8612', 'mapstoup'=>'8613', 'marker'=>'9646', 'mcomma'=>'10793', 'Mcy'=>'1052', 'mcy'=>'1084', 'mDDot'=>'8762', 'measuredangle'=>'8737', 'MediumSpace'=>'8287', 'Mellintrf'=>'8499', 'Mfr'=>'120080', 'mfr'=>'120106', 'mho'=>'8487', 'mid'=>'8739', 'midast'=>'42', 'midcir'=>'10992', 'minusb'=>'8863', 'minusd'=>'8760', 'minusdu'=>'10794', 'MinusPlus'=>'8723', 'mlcp'=>'10971', 'mldr'=>'8230', 'mnplus'=>'8723', 'models'=>'8871', 'Mopf'=>'120132', 'mopf'=>'120158', 'mp'=>'8723', 'mscr'=>'120002', 'Mscr'=>'8499', 'mstpos'=>'8766', 'multimap'=>'8888', 'mumap'=>'8888', 'Nacute'=>'323', 'nacute'=>'324', 'nap'=>'8777', 'napos'=>'329', 'napprox'=>'8777', 'natur'=>'9838', 'natural'=>'9838', 'naturals'=>'8469', 'ncap'=>'10819', 'Ncaron'=>'327', 'ncaron'=>'328', 'Ncedil'=>'325', 'ncedil'=>'326', 'ncong'=>'8775', 'ncup'=>'10818', 'Ncy'=>'1053', 'ncy'=>'1085', 'nearhk'=>'10532', 'nearr'=>'8599', 'neArr'=>'8663', 'nearrow'=>'8599', 'NegativeMediumSpace'=>'8203', 'NegativeThickSpace'=>'8203', 'NegativeThinSpace'=>'8203', 'NegativeVeryThinSpace'=>'8203', 'nequiv'=>'8802', 'nesear'=>'10536', 'NestedGreaterGreater'=>'8811', 'NestedLessLess'=>'8810', 'NewLine'=>'10', 'nexist'=>'8708', 'nexists'=>'8708', 'Nfr'=>'120081', 'nfr'=>'120107', 'nge'=>'8817', 'ngeq'=>'8817', 'ngsim'=>'8821', 'ngt'=>'8815', 'ngtr'=>'8815', 'nharr'=>'8622', 'nhArr'=>'8654', 'nhpar'=>'10994', 'nis'=>'8956', 'nisd'=>'8954', 'niv'=>'8715', 'NJcy'=>'1034', 'njcy'=>'1114', 'nlarr'=>'8602', 'nlArr'=>'8653', 'nldr'=>'8229', 'nle'=>'8816', 'nleftarrow'=>'8602', 'nLeftarrow'=>'8653', 'nleftrightarrow'=>'8622', 'nLeftrightarrow'=>'8654', 'nleq'=>'8816', 'nless'=>'8814', 'nlsim'=>'8820', 'nlt'=>'8814', 'nltri'=>'8938', 'nltrie'=>'8940', 'nmid'=>'8740', 'NoBreak'=>'8288', 'NonBreakingSpace'=>'160', 'nopf'=>'120159', 'Nopf'=>'8469', 'Not'=>'10988', 'NotCongruent'=>'8802', 'NotCupCap'=>'8813', 'NotDoubleVerticalBar'=>'8742', 'NotElement'=>'8713', 'NotEqual'=>'8800', 'NotExists'=>'8708', 'NotGreater'=>'8815', 'NotGreaterEqual'=>'8817', 'NotGreaterLess'=>'8825', 'NotGreaterTilde'=>'8821', 'notinva'=>'8713', 'notinvb'=>'8951', 'notinvc'=>'8950', 'NotLeftTriangle'=>'8938', 'NotLeftTriangleEqual'=>'8940', 'NotLess'=>'8814', 'NotLessEqual'=>'8816', 'NotLessGreater'=>'8824', 'NotLessTilde'=>'8820', 'notni'=>'8716', 'notniva'=>'8716', 'notnivb'=>'8958', 'notnivc'=>'8957', 'NotPrecedes'=>'8832', 'NotPrecedesSlantEqual'=>'8928', 'NotReverseElement'=>'8716', 'NotRightTriangle'=>'8939', 'NotRightTriangleEqual'=>'8941', 'NotSquareSubsetEqual'=>'8930', 'NotSquareSupersetEqual'=>'8931', 'NotSubsetEqual'=>'8840', 'NotSucceeds'=>'8833', 'NotSucceedsSlantEqual'=>'8929', 'NotSupersetEqual'=>'8841', 'NotTilde'=>'8769', 'NotTildeEqual'=>'8772', 'NotTildeFullEqual'=>'8775', 'NotTildeTilde'=>'8777', 'NotVerticalBar'=>'8740', 'npar'=>'8742', 'nparallel'=>'8742', 'npolint'=>'10772', 'npr'=>'8832', 'nprcue'=>'8928', 'nprec'=>'8832', 'nrarr'=>'8603', 'nrArr'=>'8655', 'nrightarrow'=>'8603', 'nRightarrow'=>'8655', 'nrtri'=>'8939', 'nrtrie'=>'8941', 'nsc'=>'8833', 'nsccue'=>'8929', 'Nscr'=>'119977', 'nscr'=>'120003', 'nshortmid'=>'8740', 'nshortparallel'=>'8742', 'nsim'=>'8769', 'nsime'=>'8772', 'nsimeq'=>'8772', 'nsmid'=>'8740', 'nspar'=>'8742', 'nsqsube'=>'8930', 'nsqsupe'=>'8931', 'nsube'=>'8840', 'nsubseteq'=>'8840', 'nsucc'=>'8833', 'nsup'=>'8837', 'nsupe'=>'8841', 'nsupseteq'=>'8841', 'ntgl'=>'8825', 'ntlg'=>'8824', 'ntriangleleft'=>'8938', 'ntrianglelefteq'=>'8940', 'ntriangleright'=>'8939', 'ntrianglerighteq'=>'8941', 'num'=>'35', 'numero'=>'8470', 'numsp'=>'8199', 'nvdash'=>'8876', 'nvDash'=>'8877', 'nVdash'=>'8878', 'nVDash'=>'8879', 'nvHarr'=>'10500', 'nvinfin'=>'10718', 'nvlArr'=>'10498', 'nvrArr'=>'10499', 'nwarhk'=>'10531', 'nwarr'=>'8598', 'nwArr'=>'8662', 'nwarrow'=>'8598', 'nwnear'=>'10535', 'oast'=>'8859', 'ocir'=>'8858', 'Ocy'=>'1054', 'ocy'=>'1086', 'odash'=>'8861', 'Odblac'=>'336', 'odblac'=>'337', 'odiv'=>'10808', 'odot'=>'8857', 'odsold'=>'10684', 'ofcir'=>'10687', 'Ofr'=>'120082', 'ofr'=>'120108', 'ogon'=>'731', 'ogt'=>'10689', 'ohbar'=>'10677', 'ohm'=>'937', 'oint'=>'8750', 'olarr'=>'8634', 'olcir'=>'10686', 'olcross'=>'10683', 'olt'=>'10688', 'Omacr'=>'332', 'omacr'=>'333', 'omid'=>'10678', 'ominus'=>'8854', 'Oopf'=>'120134', 'oopf'=>'120160', 'opar'=>'10679', 'OpenCurlyDoubleQuote'=>'8220', 'OpenCurlyQuote'=>'8216', 'operp'=>'10681', 'Or'=>'10836', 'orarr'=>'8635', 'ord'=>'10845', 'order'=>'8500', 'orderof'=>'8500', 'origof'=>'8886', 'oror'=>'10838', 'orslope'=>'10839', 'orv'=>'10843', 'oS'=>'9416', 'Oscr'=>'119978', 'oscr'=>'8500', 'osol'=>'8856', 'Otimes'=>'10807', 'otimesas'=>'10806', 'ovbar'=>'9021', 'OverBar'=>'8254', 'OverBrace'=>'9182', 'OverBracket'=>'9140', 'OverParenthesis'=>'9180', 'par'=>'8741', 'parallel'=>'8741', 'parsim'=>'10995', 'parsl'=>'11005', 'PartialD'=>'8706', 'Pcy'=>'1055', 'pcy'=>'1087', 'percnt'=>'37', 'period'=>'46', 'pertenk'=>'8241', 'Pfr'=>'120083', 'pfr'=>'120109', 'phiv'=>'981', 'phmmat'=>'8499', 'phone'=>'9742', 'pitchfork'=>'8916', 'planck'=>'8463', 'planckh'=>'8462', 'plankv'=>'8463', 'plus'=>'43', 'plusacir'=>'10787', 'plusb'=>'8862', 'pluscir'=>'10786', 'plusdo'=>'8724', 'plusdu'=>'10789', 'pluse'=>'10866', 'PlusMinus'=>'177', 'plussim'=>'10790', 'plustwo'=>'10791', 'pm'=>'177', 'Poincareplane'=>'8460', 'pointint'=>'10773', 'popf'=>'120161', 'Popf'=>'8473', 'Pr'=>'10939', 'pr'=>'8826', 'prap'=>'10935', 'prcue'=>'8828', 'pre'=>'10927', 'prE'=>'10931', 'prec'=>'8826', 'precapprox'=>'10935', 'preccurlyeq'=>'8828', 'Precedes'=>'8826', 'PrecedesEqual'=>'10927', 'PrecedesSlantEqual'=>'8828', 'PrecedesTilde'=>'8830', 'preceq'=>'10927', 'precnapprox'=>'10937', 'precneqq'=>'10933', 'precnsim'=>'8936', 'precsim'=>'8830', 'primes'=>'8473', 'prnap'=>'10937', 'prnE'=>'10933', 'prnsim'=>'8936', 'Product'=>'8719', 'profalar'=>'9006', 'profline'=>'8978', 'profsurf'=>'8979', 'Proportion'=>'8759', 'Proportional'=>'8733', 'propto'=>'8733', 'prsim'=>'8830', 'prurel'=>'8880', 'Pscr'=>'119979', 'pscr'=>'120005', 'puncsp'=>'8200', 'Qfr'=>'120084', 'qfr'=>'120110', 'qint'=>'10764', 'qopf'=>'120162', 'Qopf'=>'8474', 'qprime'=>'8279', 'Qscr'=>'119980', 'qscr'=>'120006', 'quaternions'=>'8461', 'quatint'=>'10774', 'quest'=>'63', 'questeq'=>'8799', 'rAarr'=>'8667', 'Racute'=>'340', 'racute'=>'341', 'raemptyv'=>'10675', 'rang'=>'10217', 'Rang'=>'10219', 'rangd'=>'10642', 'range'=>'10661', 'rangle'=>'10217', 'Rarr'=>'8608', 'rarrap'=>'10613', 'rarrb'=>'8677', 'rarrbfs'=>'10528', 'rarrc'=>'10547', 'rarrfs'=>'10526', 'rarrhk'=>'8618', 'rarrlp'=>'8620', 'rarrpl'=>'10565', 'rarrsim'=>'10612', 'Rarrtl'=>'10518', 'rarrtl'=>'8611', 'rarrw'=>'8605', 'ratail'=>'10522', 'rAtail'=>'10524', 'ratio'=>'8758', 'rationals'=>'8474', 'rbarr'=>'10509', 'rBarr'=>'10511', 'RBarr'=>'10512', 'rbbrk'=>'10099', 'rbrace'=>'125', 'rbrack'=>'93', 'rbrke'=>'10636', 'rbrksld'=>'10638', 'rbrkslu'=>'10640', 'Rcaron'=>'344', 'rcaron'=>'345', 'Rcedil'=>'342', 'rcedil'=>'343', 'rcub'=>'125', 'Rcy'=>'1056', 'rcy'=>'1088', 'rdca'=>'10551', 'rdldhar'=>'10601', 'rdquor'=>'8221', 'rdsh'=>'8627', 'Re'=>'8476', 'realine'=>'8475', 'realpart'=>'8476', 'reals'=>'8477', 'rect'=>'9645', 'REG'=>'174', 'ReverseElement'=>'8715', 'ReverseEquilibrium'=>'8651', 'ReverseUpEquilibrium'=>'10607', 'rfisht'=>'10621', 'rfr'=>'120111', 'Rfr'=>'8476', 'rHar'=>'10596', 'rhard'=>'8641', 'rharu'=>'8640', 'rharul'=>'10604', 'rhov'=>'1009', 'RightAngleBracket'=>'10217', 'rightarrow'=>'8594', 'RightArrow'=>'8594', 'Rightarrow'=>'8658', 'RightArrowBar'=>'8677', 'RightArrowLeftArrow'=>'8644', 'rightarrowtail'=>'8611', 'RightCeiling'=>'8969', 'RightDoubleBracket'=>'10215', 'RightDownTeeVector'=>'10589', 'RightDownVector'=>'8642', 'RightDownVectorBar'=>'10581', 'RightFloor'=>'8971', 'rightharpoondown'=>'8641', 'rightharpoonup'=>'8640', 'rightleftarrows'=>'8644', 'rightleftharpoons'=>'8652', 'rightrightarrows'=>'8649', 'rightsquigarrow'=>'8605', 'RightTee'=>'8866', 'RightTeeArrow'=>'8614', 'RightTeeVector'=>'10587', 'rightthreetimes'=>'8908', 'RightTriangle'=>'8883', 'RightTriangleBar'=>'10704', 'RightTriangleEqual'=>'8885', 'RightUpDownVector'=>'10575', 'RightUpTeeVector'=>'10588', 'RightUpVector'=>'8638', 'RightUpVectorBar'=>'10580', 'RightVector'=>'8640', 'RightVectorBar'=>'10579', 'ring'=>'730', 'risingdotseq'=>'8787', 'rlarr'=>'8644', 'rlhar'=>'8652', 'rmoust'=>'9137', 'rmoustache'=>'9137', 'rnmid'=>'10990', 'roang'=>'10221', 'roarr'=>'8702', 'robrk'=>'10215', 'ropar'=>'10630', 'ropf'=>'120163', 'Ropf'=>'8477', 'roplus'=>'10798', 'rotimes'=>'10805', 'RoundImplies'=>'10608', 'rpar'=>'41', 'rpargt'=>'10644', 'rppolint'=>'10770', 'rrarr'=>'8649', 'Rrightarrow'=>'8667', 'rscr'=>'120007', 'Rscr'=>'8475', 'rsh'=>'8625', 'Rsh'=>'8625', 'rsqb'=>'93', 'rsquor'=>'8217', 'rthree'=>'8908', 'rtimes'=>'8906', 'rtri'=>'9657', 'rtrie'=>'8885', 'rtrif'=>'9656', 'rtriltri'=>'10702', 'RuleDelayed'=>'10740', 'ruluhar'=>'10600', 'rx'=>'8478', 'Sacute'=>'346', 'sacute'=>'347', 'Sc'=>'10940', 'sc'=>'8827', 'scap'=>'10936', 'sccue'=>'8829', 'sce'=>'10928', 'scE'=>'10932', 'Scedil'=>'350', 'scedil'=>'351', 'Scirc'=>'348', 'scirc'=>'349', 'scnap'=>'10938', 'scnE'=>'10934', 'scnsim'=>'8937', 'scpolint'=>'10771', 'scsim'=>'8831', 'Scy'=>'1057', 'scy'=>'1089', 'sdotb'=>'8865', 'sdote'=>'10854', 'searhk'=>'10533', 'searr'=>'8600', 'seArr'=>'8664', 'searrow'=>'8600', 'semi'=>'59', 'seswar'=>'10537', 'setminus'=>'8726', 'setmn'=>'8726', 'sext'=>'10038', 'Sfr'=>'120086', 'sfr'=>'120112', 'sfrown'=>'8994', 'sharp'=>'9839', 'SHCHcy'=>'1065', 'shchcy'=>'1097', 'SHcy'=>'1064', 'shcy'=>'1096', 'ShortDownArrow'=>'8595', 'ShortLeftArrow'=>'8592', 'shortmid'=>'8739', 'shortparallel'=>'8741', 'ShortRightArrow'=>'8594', 'ShortUpArrow'=>'8593', 'sigmav'=>'962', 'simdot'=>'10858', 'sime'=>'8771', 'simeq'=>'8771', 'simg'=>'10910', 'simgE'=>'10912', 'siml'=>'10909', 'simlE'=>'10911', 'simne'=>'8774', 'simplus'=>'10788', 'simrarr'=>'10610', 'slarr'=>'8592', 'SmallCircle'=>'8728', 'smallsetminus'=>'8726', 'smashp'=>'10803', 'smeparsl'=>'10724', 'smid'=>'8739', 'smile'=>'8995', 'smt'=>'10922', 'smte'=>'10924', 'SOFTcy'=>'1068', 'softcy'=>'1100', 'sol'=>'47', 'solb'=>'10692', 'solbar'=>'9023', 'Sopf'=>'120138', 'sopf'=>'120164', 'spadesuit'=>'9824', 'spar'=>'8741', 'sqcap'=>'8851', 'sqcup'=>'8852', 'Sqrt'=>'8730', 'sqsub'=>'8847', 'sqsube'=>'8849', 'sqsubset'=>'8847', 'sqsubseteq'=>'8849', 'sqsup'=>'8848', 'sqsupe'=>'8850', 'sqsupset'=>'8848', 'sqsupseteq'=>'8850', 'squ'=>'9633', 'square'=>'9633', 'Square'=>'9633', 'SquareIntersection'=>'8851', 'SquareSubset'=>'8847', 'SquareSubsetEqual'=>'8849', 'SquareSuperset'=>'8848', 'SquareSupersetEqual'=>'8850', 'SquareUnion'=>'8852', 'squarf'=>'9642', 'squf'=>'9642', 'srarr'=>'8594', 'Sscr'=>'119982', 'sscr'=>'120008', 'ssetmn'=>'8726', 'ssmile'=>'8995', 'sstarf'=>'8902', 'Star'=>'8902', 'star'=>'9734', 'starf'=>'9733', 'straightepsilon'=>'1013', 'straightphi'=>'981', 'strns'=>'175', 'Sub'=>'8912', 'subdot'=>'10941', 'subE'=>'10949', 'subedot'=>'10947', 'submult'=>'10945', 'subnE'=>'10955', 'subne'=>'8842', 'subplus'=>'10943', 'subrarr'=>'10617', 'subset'=>'8834', 'Subset'=>'8912', 'subseteq'=>'8838', 'subseteqq'=>'10949', 'SubsetEqual'=>'8838', 'subsetneq'=>'8842', 'subsetneqq'=>'10955', 'subsim'=>'10951', 'subsub'=>'10965', 'subsup'=>'10963', 'succ'=>'8827', 'succapprox'=>'10936', 'succcurlyeq'=>'8829', 'Succeeds'=>'8827', 'SucceedsEqual'=>'10928', 'SucceedsSlantEqual'=>'8829', 'SucceedsTilde'=>'8831', 'succeq'=>'10928', 'succnapprox'=>'10938', 'succneqq'=>'10934', 'succnsim'=>'8937', 'succsim'=>'8831', 'SuchThat'=>'8715', 'Sum'=>'8721', 'sung'=>'9834', 'Sup'=>'8913', 'supdot'=>'10942', 'supdsub'=>'10968', 'supE'=>'10950', 'supedot'=>'10948', 'Superset'=>'8835', 'SupersetEqual'=>'8839', 'suphsol'=>'10185', 'suphsub'=>'10967', 'suplarr'=>'10619', 'supmult'=>'10946', 'supnE'=>'10956', 'supne'=>'8843', 'supplus'=>'10944', 'supset'=>'8835', 'Supset'=>'8913', 'supseteq'=>'8839', 'supseteqq'=>'10950', 'supsetneq'=>'8843', 'supsetneqq'=>'10956', 'supsim'=>'10952', 'supsub'=>'10964', 'supsup'=>'10966', 'swarhk'=>'10534', 'swarr'=>'8601', 'swArr'=>'8665', 'swarrow'=>'8601', 'swnwar'=>'10538', 'Tab'=>'9', 'target'=>'8982', 'tbrk'=>'9140', 'Tcaron'=>'356', 'tcaron'=>'357', 'Tcedil'=>'354', 'tcedil'=>'355', 'Tcy'=>'1058', 'tcy'=>'1090', 'tdot'=>'8411', 'telrec'=>'8981', 'Tfr'=>'120087', 'tfr'=>'120113', 'therefore'=>'8756', 'Therefore'=>'8756', 'thetav'=>'977', 'thickapprox'=>'8776', 'thicksim'=>'8764', 'ThinSpace'=>'8201', 'thkap'=>'8776', 'thksim'=>'8764', 'Tilde'=>'8764', 'TildeEqual'=>'8771', 'TildeFullEqual'=>'8773', 'TildeTilde'=>'8776', 'timesb'=>'8864', 'timesbar'=>'10801', 'timesd'=>'10800', 'tint'=>'8749', 'toea'=>'10536', 'top'=>'8868', 'topbot'=>'9014', 'topcir'=>'10993', 'Topf'=>'120139', 'topf'=>'120165', 'topfork'=>'10970', 'tosa'=>'10537', 'tprime'=>'8244', 'TRADE'=>'8482', 'triangle'=>'9653', 'triangledown'=>'9663', 'triangleleft'=>'9667', 'trianglelefteq'=>'8884', 'triangleq'=>'8796', 'triangleright'=>'9657', 'trianglerighteq'=>'8885', 'tridot'=>'9708', 'trie'=>'8796', 'triminus'=>'10810', 'TripleDot'=>'8411', 'triplus'=>'10809', 'trisb'=>'10701', 'tritime'=>'10811', 'trpezium'=>'9186', 'Tscr'=>'119983', 'tscr'=>'120009', 'TScy'=>'1062', 'tscy'=>'1094', 'TSHcy'=>'1035', 'tshcy'=>'1115', 'Tstrok'=>'358', 'tstrok'=>'359', 'twixt'=>'8812', 'twoheadleftarrow'=>'8606', 'twoheadrightarrow'=>'8608', 'Uarr'=>'8607', 'Uarrocir'=>'10569', 'Ubrcy'=>'1038', 'ubrcy'=>'1118', 'Ubreve'=>'364', 'ubreve'=>'365', 'Ucy'=>'1059', 'ucy'=>'1091', 'udarr'=>'8645', 'Udblac'=>'368', 'udblac'=>'369', 'udhar'=>'10606', 'ufisht'=>'10622', 'Ufr'=>'120088', 'ufr'=>'120114', 'uHar'=>'10595', 'uharl'=>'8639', 'uharr'=>'8638', 'uhblk'=>'9600', 'ulcorn'=>'8988', 'ulcorner'=>'8988', 'ulcrop'=>'8975', 'ultri'=>'9720', 'Umacr'=>'362', 'umacr'=>'363', 'UnderBar'=>'95', 'UnderBrace'=>'9183', 'UnderBracket'=>'9141', 'UnderParenthesis'=>'9181', 'Union'=>'8899', 'UnionPlus'=>'8846', 'Uogon'=>'370', 'uogon'=>'371', 'Uopf'=>'120140', 'uopf'=>'120166', 'uparrow'=>'8593', 'UpArrow'=>'8593', 'Uparrow'=>'8657', 'UpArrowBar'=>'10514', 'UpArrowDownArrow'=>'8645', 'updownarrow'=>'8597', 'UpDownArrow'=>'8597', 'Updownarrow'=>'8661', 'UpEquilibrium'=>'10606', 'upharpoonleft'=>'8639', 'upharpoonright'=>'8638', 'uplus'=>'8846', 'UpperLeftArrow'=>'8598', 'UpperRightArrow'=>'8599', 'upsi'=>'965', 'Upsi'=>'978', 'UpTee'=>'8869', 'UpTeeArrow'=>'8613', 'upuparrows'=>'8648', 'urcorn'=>'8989', 'urcorner'=>'8989', 'urcrop'=>'8974', 'Uring'=>'366', 'uring'=>'367', 'urtri'=>'9721', 'Uscr'=>'119984', 'uscr'=>'120010', 'utdot'=>'8944', 'Utilde'=>'360', 'utilde'=>'361', 'utri'=>'9653', 'utrif'=>'9652', 'uuarr'=>'8648', 'uwangle'=>'10663', 'vangrt'=>'10652', 'varepsilon'=>'1013', 'varkappa'=>'1008', 'varnothing'=>'8709', 'varphi'=>'981', 'varpi'=>'982', 'varpropto'=>'8733', 'varr'=>'8597', 'vArr'=>'8661', 'varrho'=>'1009', 'varsigma'=>'962', 'vartheta'=>'977', 'vartriangleleft'=>'8882', 'vartriangleright'=>'8883', 'vBar'=>'10984', 'Vbar'=>'10987', 'vBarv'=>'10985', 'Vcy'=>'1042', 'vcy'=>'1074', 'vdash'=>'8866', 'vDash'=>'8872', 'Vdash'=>'8873', 'VDash'=>'8875', 'Vdashl'=>'10982', 'vee'=>'8744', 'Vee'=>'8897', 'veebar'=>'8891', 'veeeq'=>'8794', 'vellip'=>'8942', 'verbar'=>'124', 'Verbar'=>'8214', 'vert'=>'124', 'Vert'=>'8214', 'VerticalBar'=>'8739', 'VerticalLine'=>'124', 'VerticalSeparator'=>'10072', 'VerticalTilde'=>'8768', 'VeryThinSpace'=>'8202', 'Vfr'=>'120089', 'vfr'=>'120115', 'vltri'=>'8882', 'Vopf'=>'120141', 'vopf'=>'120167', 'vprop'=>'8733', 'vrtri'=>'8883', 'Vscr'=>'119985', 'vscr'=>'120011', 'Vvdash'=>'8874', 'vzigzag'=>'10650', 'Wcirc'=>'372', 'wcirc'=>'373', 'wedbar'=>'10847', 'wedge'=>'8743', 'Wedge'=>'8896', 'wedgeq'=>'8793', 'Wfr'=>'120090', 'wfr'=>'120116', 'Wopf'=>'120142', 'wopf'=>'120168', 'wp'=>'8472', 'wr'=>'8768', 'wreath'=>'8768', 'Wscr'=>'119986', 'wscr'=>'120012', 'xcap'=>'8898', 'xcirc'=>'9711', 'xcup'=>'8899', 'xdtri'=>'9661', 'Xfr'=>'120091', 'xfr'=>'120117', 'xharr'=>'10231', 'xhArr'=>'10234', 'xlarr'=>'10229', 'xlArr'=>'10232', 'xmap'=>'10236', 'xnis'=>'8955', 'xodot'=>'10752', 'Xopf'=>'120143', 'xopf'=>'120169', 'xoplus'=>'10753', 'xotime'=>'10754', 'xrarr'=>'10230', 'xrArr'=>'10233', 'Xscr'=>'119987', 'xscr'=>'120013', 'xsqcup'=>'10758', 'xuplus'=>'10756', 'xutri'=>'9651', 'xvee'=>'8897', 'xwedge'=>'8896', 'YAcy'=>'1071', 'yacy'=>'1103', 'Ycirc'=>'374', 'ycirc'=>'375', 'Ycy'=>'1067', 'ycy'=>'1099', 'Yfr'=>'120092', 'yfr'=>'120118', 'YIcy'=>'1031', 'yicy'=>'1111', 'Yopf'=>'120144', 'yopf'=>'120170', 'Yscr'=>'119988', 'yscr'=>'120014', 'YUcy'=>'1070', 'yucy'=>'1102', 'Zacute'=>'377', 'zacute'=>'378', 'Zcaron'=>'381', 'zcaron'=>'382', 'Zcy'=>'1047', 'zcy'=>'1079', 'Zdot'=>'379', 'zdot'=>'380', 'zeetrf'=>'8488', 'ZeroWidthSpace'=>'8203', 'zfr'=>'120119', 'Zfr'=>'8488', 'ZHcy'=>'1046', 'zhcy'=>'1078', 'zigrarr'=>'8669', 'zopf'=>'120171', 'Zopf'=>'8484', 'Zscr'=>'119989', 'zscr'=>'120015');
+ if ($t[0] != '#') {
+ return
+ ($C['and_mark'] ? "\x06" : '&')
+ . (isset($reservedEntAr[$t])
+ ? $t
+ : (isset($commonEntNameAr[$t])
+ ? (!$C['named_entity']
+ ? '#'. ($C['hexdec_entity'] > 1
+ ? 'x'. dechex($commonEntNameAr[$t])
+ : $commonEntNameAr[$t])
+ : $t)
+ : (isset($rareEntNameAr[$t])
+ ? (!$C['named_entity']
+ ? '#'. ($C['hexdec_entity'] > 1
+ ? 'x'. dechex($rareEntNameAr[$t])
+ : $rareEntNameAr[$t])
+ : $t)
+ : 'amp;'. $t)))
+ . ';';
+ }
+ if (
+ ($n = ctype_digit($t = substr($t, 1)) ? intval($t) : hexdec(substr($t, 1))) < 9
+ || ($n > 13 && $n < 32)
+ || $n == 11
+ || $n == 12
+ || ($n > 126 && $n < 160 && $n != 133)
+ || ($n > 55295
+ && ($n < 57344
+ || ($n > 64975 && $n < 64992)
+ || $n == 65534
+ || $n == 65535
+ || $n > 1114111))
+ ) {
+ return ($C['and_mark'] ? "\x06" : '&'). "amp;#{$t};";
+ }
+ return
+ ($C['and_mark'] ? "\x06" : '&')
+ . '#'
+ . (((ctype_digit($t) && $C['hexdec_entity'] < 2)
+ || !$C['hexdec_entity'])
+ ? $n
+ : 'x'. dechex($n))
+ . ';';
}
-function hl_regex($p)
+/**
+ * Check regex pattern for PHP error.
+ *
+ * @param string $t Pattern including limiters/modifiers.
+ * @return int 0 or 1 if pattern is invalid or valid, respectively.
+ */
+function hl_regex($t)
{
- // check regex
- if (empty($p)) {
- return 0;
- }
- if ($v = function_exists('error_clear_last') && function_exists('error_get_last')) {
- error_clear_last();
+ if (empty($t) || !is_string($t)) {
+ return 0;
+ }
+ if ($funcsExist = function_exists('error_clear_last') && function_exists('error_get_last')) {
+ error_clear_last();
+ } else {
+ if ($valTrackErr = ini_get('track_errors')) {
+ $valMsgErr = isset($php_errormsg) ? $php_errormsg : null;
} else {
- if ($t = ini_get('track_errors')) {
- $o = isset($php_errormsg) ? $php_errormsg : null;
- } else {
- ini_set('track_errors', 1);
- }
- unset($php_errormsg);
- }
- if (($d = ini_get('display_errors'))) {
- ini_set('display_errors', 0);
- }
- preg_match($p, '');
- if ($v) {
- $r = null === error_get_last() ? 1 : 0;
+ ini_set('track_errors', '1');
+ }
+ unset($php_errormsg);
+ }
+ if (($valShowErr = ini_get('display_errors'))) {
+ ini_set('display_errors', '0');
+ }
+ preg_match($t, '');
+ if ($funcsExist) {
+ $out = error_get_last() == null ? 1 : 0;
+ } else {
+ $out = isset($php_errormsg) ? 0 : 1;
+ if ($valTrackErr) {
+ $php_errormsg = isset($valMsgErr) ? $valMsgErr : null;
} else {
- $r = isset($php_errormsg) ? 0 : 1;
- if ($t) {
- $php_errormsg = isset($o) ? $o : null;
- } else {
- ini_set('track_errors', 0);
- }
+ ini_set('track_errors', '0');
}
- if ($d) {
- ini_set('display_errors', 1);
- }
-
- return $r;
+ }
+ if ($valShowErr) {
+ ini_set('display_errors', '1');
+ }
+ return $out;
}
+/**
+ * Parse $spec htmLawed argument as array.
+ *
+ * @param string $t Value of $spec.
+ * @return array Multidimensional array of form: tag -> attribute -> rule.
+ */
function hl_spec($t)
{
- // final $spec
- $s = [];
- if (!function_exists('hl_aux1')) {
- function hl_aux1($m)
- {
- return substr(str_replace([';', '|', '~', ' ', ',', '/', '(', ')', '`"'], ["\x01", "\x02", "\x03", "\x04", "\x05", "\x06", "\x07", "\x08", '"'], $m[0]), 1, -1);
+ $out = array();
+
+ // Hide special characters used for rules.
+
+ if (!function_exists('hl_aux1')) {
+ function hl_aux1($x) {
+ return
+ substr(
+ str_replace(
+ array(";", "|", "~", " ", ",", "/", "(", ")", '`"'),
+ array("\x01", "\x02", "\x03", "\x04", "\x05", "\x06", "\x07", "\x08", '"'),
+ $x[0]),
+ 1, -1);
+ }
+ }
+ $t =
+ str_replace(
+ array("\t", "\r", "\n", ' '),
+ '',
+ preg_replace_callback('/"(?>(`.|[^"])*)"/sm', 'hl_aux1', trim($t)));
+
+ // Tag, attribute, and rule separators: semi-colon, comma, and slash respectively.
+
+ for ($i = count(($t = explode(';', $t))); --$i>=0;) {
+ $ele = $t[$i];
+ if (
+ empty($ele)
+ || ($tagPos = strpos($ele, '=')) === false
+ || !strlen(($tagSpec = substr($ele, $tagPos + 1)))
+ ) {
+ continue;
+ }
+ $ruleAr = $denyAttrAr = array();
+ foreach (explode(',', $tagSpec) as $v) {
+ if (!preg_match('`^(-?data-[^:=]+|[a-z:\-\*]+)(?:\((.*?)\))?`i', $v, $m)
+ || preg_match('`^-?data-xml`i', $m[1])) {
+ continue;
+ }
+ if (($attr = strtolower($m[1])) == '-*') {
+ $denyAttrAr['*'] = 1;
+ continue;
+ }
+ if ($attr[0] == '-') {
+ $denyAttrAr[substr($attr, 1)] = 1;
+ continue;
+ }
+ if (!isset($m[2])) {
+ $ruleAr[$attr] = 1;
+ continue;
+ }
+ foreach (explode('/', $m[2]) as $m) {
+ if (empty($m)
+ || ($rulePos = strpos($m, '=')) === 0
+ || $rulePos < 5 // Shortest rule: oneof
+ ) {
+ $ruleAr[$attr] = 1;
+ continue;
}
+ $rule = strtolower(substr($m, 0, $rulePos));
+ $ruleAr[$attr][$rule] =
+ str_replace(
+ array("\x01", "\x02", "\x03", "\x04", "\x05", "\x06", "\x07", "\x08"),
+ array(";", "|", "~", " ", ",", "/", "(", ")"),
+ substr($m, $rulePos + 1));
+ }
+ if (isset($ruleAr[$attr]['match']) && !hl_regex($ruleAr[$attr]['match'])) {
+ unset($ruleAr[$attr]['match']);
+ }
+ if (isset($ruleAr[$attr]['nomatch']) && !hl_regex($ruleAr[$attr]['nomatch'])) {
+ unset($ruleAr[$attr]['nomatch']);
+ }
}
- $t = str_replace(["\t", "\r", "\n", ' '], '', preg_replace_callback('/"(?>(`.|[^"])*)"/sm', 'hl_aux1', trim($t)));
- for ($i = count(($t = explode(';', $t))); --$i >= 0;) {
- $w = $t[$i];
- if (empty($w) || ($e = strpos($w, '=')) === false || !strlen(($a = substr($w, $e + 1)))) {
- continue;
- }
- $y = $n = [];
- foreach (explode(',', $a) as $v) {
- if (!preg_match('`^([a-z:\-\*]+)(?:\((.*?)\))?`i', $v, $m)) {
- continue;
- }
- if (($x = strtolower($m[1])) === '-*') {
- $n['*'] = 1;
- continue;
- }
- if ('-' === $x[0]) {
- $n[substr($x, 1)] = 1;
- continue;
- }
- if (!isset($m[2])) {
- $y[$x] = 1;
- continue;
- }
- foreach (explode('/', $m[2]) as $m) {
- if (empty($m) || ($p = strpos($m, '=')) === 0 || $p < 5) {
- $y[$x] = 1;
- continue;
- }
- $y[$x][strtolower(substr($m, 0, $p))] = str_replace(["\x01", "\x02", "\x03", "\x04", "\x05", "\x06", "\x07", "\x08"], [';', '|', '~', ' ', ',', '/', '(', ')'], substr($m, $p + 1));
- }
- if (isset($y[$x]['match']) && !hl_regex($y[$x]['match'])) {
- unset($y[$x]['match']);
- }
- if (isset($y[$x]['nomatch']) && !hl_regex($y[$x]['nomatch'])) {
- unset($y[$x]['nomatch']);
- }
- }
- if (!count($y) && !count($n)) {
- continue;
- }
- foreach (explode(',', substr($w, 0, $e)) as $v) {
- if (!strlen(($v = strtolower($v)))) {
- continue;
- }
- if (count($y)) {
- if (!isset($s[$v])) {
- $s[$v] = $y;
- } else {
- $s[$v] = array_merge($s[$v], $y);
- }
- }
- if (count($n)) {
- if (!isset($s[$v]['n'])) {
- $s[$v]['n'] = $n;
- } else {
- $s[$v]['n'] = array_merge($s[$v]['n'], $n);
- }
- }
- }
+
+ if (!count($ruleAr) && !count($denyAttrAr)) {
+ continue;
}
+ foreach (explode(',', substr($ele, 0, $tagPos)) as $tag) {
+ if (!strlen(($tag = strtolower($tag)))) {
+ continue;
+ }
+ if (count($ruleAr)) {
+ $out[$tag] = !isset($out[$tag]) ? $ruleAr : array_merge($out[$tag], $ruleAr);
+ }
+ if (count($denyAttrAr)) {
+ $out[$tag]['deny'] = !isset($out[$tag]['deny']) ? $denyAttrAr : array_merge($out[$tag]['deny'], $denyAttrAr);
+ }
+ }
+ }
- return $s;
+ return $out;
}
+/**
+ * Handle tag text with > limiters, and attributes in opening tags.
+ *
+ * @param array $t Array from preg_replace call.
+ * @return string Tag with any attribute,
+ * or text with > neutralized into entities, or empty.
+ */
function hl_tag($t)
{
- // tag/attribute handler
- global $C;
- $t = $t[0];
- // invalid < >
- if ('< ' === $t) {
- return '< ';
- }
- if ('>' === $t) {
- return '>';
- }
- if (!preg_match('`^<(/?)([a-zA-Z][^\s>]*)([^>]*?)\s?>$`m', $t, $m)) {
- return str_replace(['<', '>'], ['<', '>'], $t);
- }
- $e = strtolower($m[2]);
- static $eIC = ['annotation-xml' => 1, 'color-profile' => 1, 'font-face' => 1, 'font-face-src' => 1, 'font-face-uri' => 1, 'font-face-format' => 1, 'font-face-name' => 1, 'missing-glyph' => 1]; // Illegal cust ele
- if ((!strpos($e, '-') && !isset($C['elements'][$e])) || (strpos($e, '-') && (isset($C['elements']['-' . $e]) || (!$C['any_custom_element'] && !isset($C['elements'][$e])) || isset($eIC[$e]) || preg_match('`[^-._0-9a-z\xb7\xc0-\xd6\xd8-\xf6\xf8-\x{2ff}\x{370}-\x{37d}\x{37f}-\x{1fff}\x{200c}-\x{200d}\x{2070}-\x{218f}\x{2c00}-\x{2fef}\x{3001}-\x{d7ff}\x{f900}-\x{fdcf}\x{fdf0}-\x{fffd}\x{10000}-\x{effff}]`u', $e)))) {
- return ($C['keep_bad'] % 2) ? str_replace(['<', '>'], ['<', '>'], $t) : '';
- }
- // attr string
- $a = str_replace(["\n", "\r", "\t"], ' ', trim($m[3]));
- // tag transform
- static $eD = ['acronym' => 1, 'applet' => 1, 'big' => 1, 'center' => 1, 'dir' => 1, 'font' => 1, 'isindex' => 1, 's' => 1, 'strike' => 1, 'tt' => 1]; // Deprecated
- if ($C['make_tag_strict'] && isset($eD[$e])) {
- $trt = hl_tag2($e, $a, $C['make_tag_strict']);
- if (!$e) {
- return ($C['keep_bad'] % 2) ? str_replace(['<', '>'], ['<', '>'], $t) : '';
- }
- }
- // close tag
- static $eE = ['area' => 1, 'br' => 1, 'col' => 1, 'command' => 1, 'embed' => 1, 'hr' => 1, 'img' => 1, 'input' => 1, 'isindex' => 1, 'keygen' => 1, 'link' => 1, 'meta' => 1, 'param' => 1, 'source' => 1, 'track' => 1, 'wbr' => 1]; // Empty ele
- if (!empty($m[1])) {
- return !isset($eE[$e]) ? (empty($C['hook_tag']) ? "$e>" : $C['hook_tag']($e)) : (($C['keep_bad']) % 2 ? str_replace(['<', '>'], ['<', '>'], $t) : '');
- }
+ $t = $t[0];
+ global $C;
- // open tag & attr
- static $aN = ['abbr' => ['td' => 1, 'th' => 1], 'accept' => ['form' => 1, 'input' => 1], 'accept-charset' => ['form' => 1], 'action' => ['form' => 1], 'align' => ['applet' => 1, 'caption' => 1, 'col' => 1, 'colgroup' => 1, 'div' => 1, 'embed' => 1, 'h1' => 1, 'h2' => 1, 'h3' => 1, 'h4' => 1, 'h5' => 1, 'h6' => 1, 'hr' => 1, 'iframe' => 1, 'img' => 1, 'input' => 1, 'legend' => 1, 'object' => 1, 'p' => 1, 'table' => 1, 'tbody' => 1, 'td' => 1, 'tfoot' => 1, 'th' => 1, 'thead' => 1, 'tr' => 1], 'allowfullscreen' => ['iframe' => 1], 'alt' => ['applet' => 1, 'area' => 1, 'img' => 1, 'input' => 1], 'archive' => ['applet' => 1, 'object' => 1], 'async' => ['script' => 1], 'autocomplete' => ['form' => 1, 'input' => 1], 'autofocus' => ['button' => 1, 'input' => 1, 'keygen' => 1, 'select' => 1, 'textarea' => 1], 'autoplay' => ['audio' => 1, 'video' => 1], 'axis' => ['td' => 1, 'th' => 1], 'bgcolor' => ['embed' => 1, 'table' => 1, 'td' => 1, 'th' => 1, 'tr' => 1], 'border' => ['img' => 1, 'object' => 1, 'table' => 1], 'bordercolor' => ['table' => 1, 'td' => 1, 'tr' => 1], 'cellpadding' => ['table' => 1], 'cellspacing' => ['table' => 1], 'challenge' => ['keygen' => 1], 'char' => ['col' => 1, 'colgroup' => 1, 'tbody' => 1, 'td' => 1, 'tfoot' => 1, 'th' => 1, 'thead' => 1, 'tr' => 1], 'charoff' => ['col' => 1, 'colgroup' => 1, 'tbody' => 1, 'td' => 1, 'tfoot' => 1, 'th' => 1, 'thead' => 1, 'tr' => 1], 'charset' => ['a' => 1, 'script' => 1], 'checked' => ['command' => 1, 'input' => 1], 'cite' => ['blockquote' => 1, 'del' => 1, 'ins' => 1, 'q' => 1], 'classid' => ['object' => 1], 'clear' => ['br' => 1], 'code' => ['applet' => 1], 'codebase' => ['applet' => 1, 'object' => 1], 'codetype' => ['object' => 1], 'color' => ['font' => 1], 'cols' => ['textarea' => 1], 'colspan' => ['td' => 1, 'th' => 1], 'compact' => ['dir' => 1, 'dl' => 1, 'menu' => 1, 'ol' => 1, 'ul' => 1], 'content' => ['meta' => 1], 'controls' => ['audio' => 1, 'video' => 1], 'coords' => ['a' => 1, 'area' => 1], 'crossorigin' => ['img' => 1], 'data' => ['object' => 1], 'datetime' => ['del' => 1, 'ins' => 1, 'time' => 1], 'declare' => ['object' => 1], 'default' => ['track' => 1], 'defer' => ['script' => 1], 'dirname' => ['input' => 1, 'textarea' => 1], 'disabled' => ['button' => 1, 'command' => 1, 'fieldset' => 1, 'input' => 1, 'keygen' => 1, 'optgroup' => 1, 'option' => 1, 'select' => 1, 'textarea' => 1], 'download' => ['a' => 1], 'enctype' => ['form' => 1], 'face' => ['font' => 1], 'flashvars' => ['embed' => 1], 'for' => ['label' => 1, 'output' => 1], 'form' => ['button' => 1, 'fieldset' => 1, 'input' => 1, 'keygen' => 1, 'label' => 1, 'object' => 1, 'output' => 1, 'select' => 1, 'textarea' => 1], 'formaction' => ['button' => 1, 'input' => 1], 'formenctype' => ['button' => 1, 'input' => 1], 'formmethod' => ['button' => 1, 'input' => 1], 'formnovalidate' => ['button' => 1, 'input' => 1], 'formtarget' => ['button' => 1, 'input' => 1], 'frame' => ['table' => 1], 'frameborder' => ['iframe' => 1], 'headers' => ['td' => 1, 'th' => 1], 'height' => ['applet' => 1, 'canvas' => 1, 'embed' => 1, 'iframe' => 1, 'img' => 1, 'input' => 1, 'object' => 1, 'td' => 1, 'th' => 1, 'video' => 1], 'high' => ['meter' => 1], 'href' => ['a' => 1, 'area' => 1, 'link' => 1], 'hreflang' => ['a' => 1, 'area' => 1, 'link' => 1], 'hspace' => ['applet' => 1, 'embed' => 1, 'img' => 1, 'object' => 1], 'icon' => ['command' => 1], 'ismap' => ['img' => 1, 'input' => 1], 'keyparams' => ['keygen' => 1], 'keytype' => ['keygen' => 1], 'kind' => ['track' => 1], 'label' => ['command' => 1, 'menu' => 1, 'option' => 1, 'optgroup' => 1, 'track' => 1], 'language' => ['script' => 1], 'list' => ['input' => 1], 'longdesc' => ['img' => 1, 'iframe' => 1], 'loop' => ['audio' => 1, 'video' => 1], 'low' => ['meter' => 1], 'marginheight' => ['iframe' => 1], 'marginwidth' => ['iframe' => 1], 'max' => ['input' => 1, 'meter' => 1, 'progress' => 1], 'maxlength' => ['input' => 1, 'textarea' => 1], 'media' => ['a' => 1, 'area' => 1, 'link' => 1, 'source' => 1, 'style' => 1], 'mediagroup' => ['audio' => 1, 'video' => 1], 'method' => ['form' => 1], 'min' => ['input' => 1, 'meter' => 1], 'model' => ['embed' => 1], 'multiple' => ['input' => 1, 'select' => 1], 'muted' => ['audio' => 1, 'video' => 1], 'name' => ['a' => 1, 'applet' => 1, 'button' => 1, 'embed' => 1, 'fieldset' => 1, 'form' => 1, 'iframe' => 1, 'img' => 1, 'input' => 1, 'keygen' => 1, 'map' => 1, 'object' => 1, 'output' => 1, 'param' => 1, 'select' => 1, 'slot' => 1, 'textarea' => 1], 'nohref' => ['area' => 1], 'noshade' => ['hr' => 1], 'novalidate' => ['form' => 1], 'nowrap' => ['td' => 1, 'th' => 1], 'object' => ['applet' => 1], 'open' => ['details' => 1, 'dialog' => 1], 'optimum' => ['meter' => 1], 'pattern' => ['input' => 1], 'ping' => ['a' => 1, 'area' => 1], 'placeholder' => ['input' => 1, 'textarea' => 1], 'pluginspage' => ['embed' => 1], 'pluginurl' => ['embed' => 1], 'poster' => ['video' => 1], 'pqg' => ['keygen' => 1], 'preload' => ['audio' => 1, 'video' => 1], 'prompt' => ['isindex' => 1], 'pubdate' => ['time' => 1], 'radiogroup' => ['command' => 1], 'readonly' => ['input' => 1, 'textarea' => 1], 'referrerpolicy' => ['a' => 1, 'area' => 1, 'img' => 1, 'iframe' => 1, 'link' => 1], 'rel' => ['a' => 1, 'area' => 1, 'link' => 1], 'required' => ['input' => 1, 'select' => 1, 'textarea' => 1], 'rev' => ['a' => 1], 'reversed' => ['ol' => 1], 'rows' => ['textarea' => 1], 'rowspan' => ['td' => 1, 'th' => 1], 'rules' => ['table' => 1], 'sandbox' => ['iframe' => 1], 'scope' => ['td' => 1, 'th' => 1], 'scoped' => ['style' => 1], 'scrolling' => ['iframe' => 1], 'seamless' => ['iframe' => 1], 'selected' => ['option' => 1], 'shape' => ['a' => 1, 'area' => 1], 'size' => ['font' => 1, 'hr' => 1, 'input' => 1, 'select' => 1], 'sizes' => ['link' => 1], 'span' => ['col' => 1, 'colgroup' => 1], 'src' => ['audio' => 1, 'embed' => 1, 'iframe' => 1, 'img' => 1, 'input' => 1, 'script' => 1, 'source' => 1, 'track' => 1, 'video' => 1], 'srcdoc' => ['iframe' => 1], 'srclang' => ['track' => 1], 'srcset' => ['img' => 1], 'standby' => ['object' => 1], 'start' => ['ol' => 1], 'step' => ['input' => 1], 'summary' => ['table' => 1], 'target' => ['a' => 1, 'area' => 1, 'form' => 1], 'type' => ['a' => 1, 'area' => 1, 'button' => 1, 'command' => 1, 'embed' => 1, 'input' => 1, 'li' => 1, 'link' => 1, 'menu' => 1, 'object' => 1, 'ol' => 1, 'param' => 1, 'script' => 1, 'source' => 1, 'style' => 1, 'ul' => 1], 'typemustmatch' => ['object' => 1], 'usemap' => ['img' => 1, 'input' => 1, 'object' => 1], 'valign' => ['col' => 1, 'colgroup' => 1, 'tbody' => 1, 'td' => 1, 'tfoot' => 1, 'th' => 1, 'thead' => 1, 'tr' => 1], 'value' => ['button' => 1, 'data' => 1, 'input' => 1, 'li' => 1, 'meter' => 1, 'option' => 1, 'param' => 1, 'progress' => 1], 'valuetype' => ['param' => 1], 'vspace' => ['applet' => 1, 'embed' => 1, 'img' => 1, 'object' => 1], 'width' => ['applet' => 1, 'canvas' => 1, 'col' => 1, 'colgroup' => 1, 'embed' => 1, 'hr' => 1, 'iframe' => 1, 'img' => 1, 'input' => 1, 'object' => 1, 'pre' => 1, 'table' => 1, 'td' => 1, 'th' => 1, 'video' => 1], 'wmode' => ['embed' => 1], 'wrap' => ['textarea' => 1]]; // Ele-specific
- static $aNA = ['aria-activedescendant' => 1, 'aria-atomic' => 1, 'aria-autocomplete' => 1, 'aria-braillelabel' => 1, 'aria-brailleroledescription' => 1, 'aria-busy' => 1, 'aria-checked' => 1, 'aria-colcount' => 1, 'aria-colindex' => 1, 'aria-colindextext' => 1, 'aria-colspan' => 1, 'aria-controls' => 1, 'aria-current' => 1, 'aria-describedby' => 1, 'aria-description' => 1, 'aria-details' => 1, 'aria-disabled' => 1, 'aria-dropeffect' => 1, 'aria-errormessage' => 1, 'aria-expanded' => 1, 'aria-flowto' => 1, 'aria-grabbed' => 1, 'aria-haspopup' => 1, 'aria-hidden' => 1, 'aria-invalid' => 1, 'aria-keyshortcuts' => 1, 'aria-label' => 1, 'aria-labelledby' => 1, 'aria-level' => 1, 'aria-live' => 1, 'aria-multiline' => 1, 'aria-multiselectable' => 1, 'aria-orientation' => 1, 'aria-owns' => 1, 'aria-placeholder' => 1, 'aria-posinset' => 1, 'aria-pressed' => 1, 'aria-readonly' => 1, 'aria-relevant' => 1, 'aria-required' => 1, 'aria-roledescription' => 1, 'aria-rowcount' => 1, 'aria-rowindex' => 1, 'aria-rowindextext' => 1, 'aria-rowspan' => 1, 'aria-selected' => 1, 'aria-setsize' => 1, 'aria-sort' => 1, 'aria-valuemax' => 1, 'aria-valuemin' => 1, 'aria-valuenow' => 1, 'aria-valuetext' => 1]; // ARIA
- static $aNE = ['allowfullscreen' => 1, 'checkbox' => 1, 'checked' => 1, 'command' => 1, 'compact' => 1, 'declare' => 1, 'defer' => 1, 'default' => 1, 'disabled' => 1, 'hidden' => 1, 'inert' => 1, 'ismap' => 1, 'itemscope' => 1, 'multiple' => 1, 'nohref' => 1, 'noresize' => 1, 'noshade' => 1, 'nowrap' => 1, 'open' => 1, 'radio' => 1, 'readonly' => 1, 'required' => 1, 'reversed' => 1, 'selected' => 1]; // Empty
- static $aNO = ['onabort' => 1, 'onblur' => 1, 'oncanplay' => 1, 'oncanplaythrough' => 1, 'onchange' => 1, 'onclick' => 1, 'oncontextmenu' => 1, 'oncopy' => 1, 'oncuechange' => 1, 'oncut' => 1, 'ondblclick' => 1, 'ondrag' => 1, 'ondragend' => 1, 'ondragenter' => 1, 'ondragleave' => 1, 'ondragover' => 1, 'ondragstart' => 1, 'ondrop' => 1, 'ondurationchange' => 1, 'onemptied' => 1, 'onended' => 1, 'onerror' => 1, 'onfocus' => 1, 'onformchange' => 1, 'onforminput' => 1, 'oninput' => 1, 'oninvalid' => 1, 'onkeydown' => 1, 'onkeypress' => 1, 'onkeyup' => 1, 'onload' => 1, 'onloadeddata' => 1, 'onloadedmetadata' => 1, 'onloadstart' => 1, 'onlostpointercapture' => 1, 'onmousedown' => 1, 'onmousemove' => 1, 'onmouseout' => 1, 'onmouseover' => 1, 'onmouseup' => 1, 'onmousewheel' => 1, 'onpaste' => 1, 'onpause' => 1, 'onplay' => 1, 'onplaying' => 1, 'onpointercancel' => 1, 'ongotpointercapture' => 1, 'onpointerdown' => 1, 'onpointerenter' => 1, 'onpointerleave' => 1, 'onpointermove' => 1, 'onpointerout' => 1, 'onpointerover' => 1, 'onpointerup' => 1, 'onprogress' => 1, 'onratechange' => 1, 'onreadystatechange' => 1, 'onreset' => 1, 'onsearch' => 1, 'onscroll' => 1, 'onseeked' => 1, 'onseeking' => 1, 'onselect' => 1, 'onshow' => 1, 'onstalled' => 1, 'onsubmit' => 1, 'onsuspend' => 1, 'ontimeupdate' => 1, 'ontoggle' => 1, 'ontouchcancel' => 1, 'ontouchend' => 1, 'ontouchmove' => 1, 'ontouchstart' => 1, 'onvolumechange' => 1, 'onwaiting' => 1, 'onwheel' => 1, 'onauxclick' => 1, 'oncancel' => 1, 'onclose' => 1, 'oncontextlost' => 1, 'oncontextrestored' => 1, 'onformdata' => 1, 'onmouseenter' => 1, 'onmouseleave' => 1, 'onresize' => 1, 'onsecuritypolicyviolation' => 1, 'onslotchange' => 1]; // Event
- static $aNP = ['action' => 1, 'cite' => 1, 'classid' => 1, 'codebase' => 1, 'data' => 1, 'href' => 1, 'itemtype' => 1, 'longdesc' => 1, 'model' => 1, 'pluginspage' => 1, 'pluginurl' => 1, 'src' => 1, 'srcset' => 1, 'usemap' => 1]; // Need scheme check; excludes style, on*
- static $aNU = ['accesskey' => 1, 'autocapitalize' => 1, 'autofocus' => 1, 'class' => 1, 'contenteditable' => 1, 'contextmenu' => 1, 'dir' => 1, 'draggable' => 1, 'dropzone' => 1, 'enterkeyhint' => 1, 'hidden' => 1, 'id' => 1, 'inert' => 1, 'inputmode' => 1, 'is' => 1, 'itemid' => 1, 'itemprop' => 1, 'itemref' => 1, 'itemscope' => 1, 'itemtype' => 1, 'lang' => 1, 'nonce' => 1, 'role' => 1, 'slot' => 1, 'spellcheck' => 1, 'style' => 1, 'tabindex' => 1, 'title' => 1, 'translate' => 1, 'xmlns' => 1, 'xml:base' => 1, 'xml:lang' => 1, 'xml:space' => 1]; // Univ; excludes on*, aria*
-
- if ($C['lc_std_val']) {
- // predef attr vals for $eAL & $aNE ele
- static $aNL = ['all' => 1, 'auto' => 1, 'baseline' => 1, 'bottom' => 1, 'button' => 1, 'captions' => 1, 'center' => 1, 'chapters' => 1, 'char' => 1, 'checkbox' => 1, 'circle' => 1, 'col' => 1, 'colgroup' => 1, 'color' => 1, 'cols' => 1, 'data' => 1, 'date' => 1, 'datetime' => 1, 'datetime-local' => 1, 'default' => 1, 'descriptions' => 1, 'email' => 1, 'file' => 1, 'get' => 1, 'groups' => 1, 'hidden' => 1, 'image' => 1, 'justify' => 1, 'left' => 1, 'ltr' => 1, 'metadata' => 1, 'middle' => 1, 'month' => 1, 'none' => 1, 'number' => 1, 'object' => 1, 'password' => 1, 'poly' => 1, 'post' => 1, 'preserve' => 1, 'radio' => 1, 'range' => 1, 'rect' => 1, 'ref' => 1, 'reset' => 1, 'right' => 1, 'row' => 1, 'rowgroup' => 1, 'rows' => 1, 'rtl' => 1, 'search' => 1, 'submit' => 1, 'subtitles' => 1, 'tel' => 1, 'text' => 1, 'time' => 1, 'top' => 1, 'url' => 1, 'week' => 1];
- static $eAL = ['a' => 1, 'area' => 1, 'bdo' => 1, 'button' => 1, 'col' => 1, 'fieldset' => 1, 'form' => 1, 'img' => 1, 'input' => 1, 'object' => 1, 'ol' => 1, 'optgroup' => 1, 'option' => 1, 'param' => 1, 'script' => 1, 'select' => 1, 'table' => 1, 'td' => 1, 'textarea' => 1, 'tfoot' => 1, 'th' => 1, 'thead' => 1, 'tr' => 1, 'track' => 1, 'xml:space' => 1];
- $lcase = isset($eAL[$e]) ? 1 : 0;
- }
+ // Check if > character not in tag.
+
+ if ($t == '< ') {
+ return '< ';
+ }
+ if ($t == '>') {
+ return '>';
+ }
+ if (!preg_match('`^<(/?)([a-zA-Z][^\s>]*)([^>]*?)\s?>$`m', $t, $m)) { // Get tag with element name and attributes
+ return str_replace(array('<', '>'), array('<', '>'), $t);
+ }
+
+ // Check if element not permitted. Custom element names have certain requirements.
+
+ $ele = strtolower($m[2]);
+ static $invalidCustomEleAr = array('annotation-xml'=>1, 'color-profile'=>1, 'font-face'=>1, 'font-face-src'=>1, 'font-face-uri'=>1, 'font-face-format'=>1, 'font-face-name'=>1, 'missing-glyph'=>1);
+ if (
+ (!strpos($ele, '-')
+ && !isset($C['elements'][$ele])) // Not custom element
+ || (strpos($ele, '-')
+ && (isset($C['elements']['-' . $ele])
+ || (!$C['any_custom_element']
+ && !isset($C['elements'][$ele]))
+ || isset($invalidCustomEleAr[$ele])
+ || preg_match(
+ '`[^-._0-9a-z\xb7\xc0-\xd6\xd8-\xf6\xf8-\x{2ff}'
+ . '\x{370}-\x{37d}\x{37f}-\x{1fff}\x{200c}-\x{200d}\x{2070}-\x{218f}'
+ . '\x{2c00}-\x{2fef}\x{3001}-\x{d7ff}\x{f900}-\x{fdcf}\x{fdf0}-\x{fffd}\x{10000}-\x{effff}]`u'
+ , $ele)))
+ ) {
+ return (($C['keep_bad']%2) ? str_replace(array('<', '>'), array('<', '>'), $t) : '');
+ }
+
+ // Attribute string.
- $depTr = 0;
- if ($C['no_deprecated_attr']) {
- // depr attr:applicable ele
- static $aND = ['align' => ['caption' => 1, 'div' => 1, 'h1' => 1, 'h2' => 1, 'h3' => 1, 'h4' => 1, 'h5' => 1, 'h6' => 1, 'hr' => 1, 'img' => 1, 'input' => 1, 'legend' => 1, 'object' => 1, 'p' => 1, 'table' => 1], 'bgcolor' => ['table' => 1, 'td' => 1, 'th' => 1, 'tr' => 1], 'border' => ['object' => 1], 'bordercolor' => ['table' => 1, 'td' => 1, 'tr' => 1], 'cellspacing' => ['table' => 1], 'clear' => ['br' => 1], 'compact' => ['dl' => 1, 'ol' => 1, 'ul' => 1], 'height' => ['td' => 1, 'th' => 1], 'hspace' => ['img' => 1, 'object' => 1], 'language' => ['script' => 1], 'name' => ['a' => 1, 'form' => 1, 'iframe' => 1, 'img' => 1, 'map' => 1], 'noshade' => ['hr' => 1], 'nowrap' => ['td' => 1, 'th' => 1], 'size' => ['hr' => 1], 'vspace' => ['img' => 1, 'object' => 1], 'width' => ['hr' => 1, 'pre' => 1, 'table' => 1, 'td' => 1, 'th' => 1]];
- static $eAD = ['a' => 1, 'br' => 1, 'caption' => 1, 'div' => 1, 'dl' => 1, 'form' => 1, 'h1' => 1, 'h2' => 1, 'h3' => 1, 'h4' => 1, 'h5' => 1, 'h6' => 1, 'hr' => 1, 'iframe' => 1, 'img' => 1, 'input' => 1, 'legend' => 1, 'map' => 1, 'object' => 1, 'ol' => 1, 'p' => 1, 'pre' => 1, 'script' => 1, 'table' => 1, 'td' => 1, 'th' => 1, 'tr' => 1, 'ul' => 1];
- $depTr = isset($eAD[$e]) ? 1 : 0;
+ $attrStr = str_replace(array("\n", "\r", "\t"), ' ', trim($m[3]));
+
+ // Transform deprecated element.
+
+ static $deprecatedEleAr = array('acronym'=>1, 'applet'=>1, 'big'=>1, 'center'=>1, 'dir'=>1, 'font'=>1, 'isindex'=>1, 's'=>1, 'strike'=>1, 'tt'=>1);
+ if ($C['make_tag_strict'] && isset($deprecatedEleAr[$ele])) {
+ $eleTransformed = hl_deprecatedElement($ele, $attrStr, $C['make_tag_strict']); // hl_deprecatedElement uses referencing
+ if (!$ele) {
+ return (($C['keep_bad'] % 2) ? str_replace(array('<', '>'), array('<', '>'), $t) : '');
}
+ }
- // attr name-vals
- if (false !== strpos($a, "\x01")) {
- $a = preg_replace('`\x01[^\x01]*\x01`', '', $a);
- } // No comment/CDATA sec
- $mode = 0;
- $a = trim($a, ' /');
- $aA = [];
- while (strlen($a)) {
- $w = 0;
- switch ($mode) {
- case 0: // Name
- if (preg_match('`^[^=\s/\x7f-\x9f]+`', $a, $m)) {
- $nm = strtolower($m[0]);
- $w = $mode = 1;
- $a = ltrim(substr_replace($a, '', 0, strlen($m[0])));
- }
- break;
- case 1:
- if ('=' === $a[0]) { // =
- $w = 1;
- $mode = 2;
- $a = ltrim($a, '= ');
- } else { // No val
- $w = 1;
- $mode = 0;
- $a = ltrim($a);
- $aA[$nm] = '';
- }
- break;
- case 2: // Val
- if (preg_match('`^((?:"[^"]*")|(?:\'[^\']*\')|(?:\s*[^\s"\']+))(.*)`', $a, $m)) {
- $a = ltrim($m[2]);
- $m = $m[1];
- $w = 1;
- $mode = 0;
- $aA[$nm] = trim(str_replace('<', '<', ('"' === $m[0] || '\'' === $m[0]) ? substr($m, 1, -1) : $m));
- }
- break;
- }
- if (0 === $w) { // Parse errs, deal with space, " & '
- $a = preg_replace('`^(?:"[^"]*("|$)|\'[^\']*(\'|$)|\S)*\s*`', '', $a);
- $mode = 0;
+ // Handle closing tag.
+
+ static $emptyEleAr = array('area'=>1, 'br'=>1, 'col'=>1, 'command'=>1, 'embed'=>1, 'hr'=>1, 'img'=>1, 'input'=>1, 'isindex'=>1, 'keygen'=>1, 'link'=>1, 'meta'=>1, 'param'=>1, 'source'=>1, 'track'=>1, 'wbr'=>1);
+ if (!empty($m[1])) {
+ return(
+ !isset($emptyEleAr[$ele])
+ ? (empty($C['hook_tag'])
+ ? "$ele>"
+ : call_user_func($C['hook_tag'], $ele, 0))
+ : ($C['keep_bad'] % 2
+ ? str_replace(array('<', '>'), array('<', '>'), $t)
+ : ''));
+ }
+
+ // Handle opening tag.
+
+ // -- Sets of possible attributes.
+
+ // .. Element-specific non-global.
+
+ static $attrEleAr = array('abbr'=>array('td'=>1, 'th'=>1), 'accept'=>array('form'=>1, 'input'=>1), 'accept-charset'=>array('form'=>1), 'action'=>array('form'=>1), 'align'=>array('applet'=>1, 'caption'=>1, 'col'=>1, 'colgroup'=>1, 'div'=>1, 'embed'=>1, 'h1'=>1, 'h2'=>1, 'h3'=>1, 'h4'=>1, 'h5'=>1, 'h6'=>1, 'hr'=>1, 'iframe'=>1, 'img'=>1, 'input'=>1, 'legend'=>1, 'object'=>1, 'p'=>1, 'table'=>1, 'tbody'=>1, 'td'=>1, 'tfoot'=>1, 'th'=>1, 'thead'=>1, 'tr'=>1), 'allowfullscreen'=>array('iframe'=>1), 'alt'=>array('applet'=>1, 'area'=>1, 'img'=>1, 'input'=>1), 'archive'=>array('applet'=>1, 'object'=>1), 'async'=>array('script'=>1), 'autocomplete'=>array('form'=>1, 'input'=>1), 'autofocus'=>array('button'=>1, 'input'=>1, 'keygen'=>1, 'select'=>1, 'textarea'=>1), 'autoplay'=>array('audio'=>1, 'video'=>1), 'axis'=>array('td'=>1, 'th'=>1), 'bgcolor'=>array('embed'=>1, 'table'=>1, 'td'=>1, 'th'=>1, 'tr'=>1), 'border'=>array('img'=>1, 'object'=>1, 'table'=>1), 'bordercolor'=>array('table'=>1, 'td'=>1, 'tr'=>1), 'cellpadding'=>array('table'=>1), 'cellspacing'=>array('table'=>1), 'challenge'=>array('keygen'=>1), 'char'=>array('col'=>1, 'colgroup'=>1, 'tbody'=>1, 'td'=>1, 'tfoot'=>1, 'th'=>1, 'thead'=>1, 'tr'=>1), 'charoff'=>array('col'=>1, 'colgroup'=>1, 'tbody'=>1, 'td'=>1, 'tfoot'=>1, 'th'=>1, 'thead'=>1, 'tr'=>1), 'charset'=>array('a'=>1, 'script'=>1), 'checked'=>array('command'=>1, 'input'=>1), 'cite'=>array('blockquote'=>1, 'del'=>1, 'ins'=>1, 'q'=>1), 'classid'=>array('object'=>1), 'clear'=>array('br'=>1), 'code'=>array('applet'=>1), 'codebase'=>array('applet'=>1, 'object'=>1), 'codetype'=>array('object'=>1), 'color'=>array('font'=>1), 'cols'=>array('textarea'=>1), 'colspan'=>array('td'=>1, 'th'=>1), 'compact'=>array('dir'=>1, 'dl'=>1, 'menu'=>1, 'ol'=>1, 'ul'=>1), 'content'=>array('meta'=>1), 'controls'=>array('audio'=>1, 'video'=>1), 'coords'=>array('a'=>1, 'area'=>1), 'crossorigin'=>array('img'=>1), 'data'=>array('object'=>1), 'datetime'=>array('del'=>1, 'ins'=>1, 'time'=>1), 'declare'=>array('object'=>1), 'default'=>array('track'=>1), 'defer'=>array('script'=>1), 'dirname'=>array('input'=>1, 'textarea'=>1), 'disabled'=>array('button'=>1, 'command'=>1, 'fieldset'=>1, 'input'=>1, 'keygen'=>1, 'optgroup'=>1, 'option'=>1, 'select'=>1, 'textarea'=>1), 'download'=>array('a'=>1), 'enctype'=>array('form'=>1), 'face'=>array('font'=>1), 'flashvars'=>array('embed'=>1), 'for'=>array('label'=>1, 'output'=>1), 'form'=>array('button'=>1, 'fieldset'=>1, 'input'=>1, 'keygen'=>1, 'label'=>1, 'object'=>1, 'output'=>1, 'select'=>1, 'textarea'=>1), 'formaction'=>array('button'=>1, 'input'=>1), 'formenctype'=>array('button'=>1, 'input'=>1), 'formmethod'=>array('button'=>1, 'input'=>1), 'formnovalidate'=>array('button'=>1, 'input'=>1), 'formtarget'=>array('button'=>1, 'input'=>1), 'frame'=>array('table'=>1), 'frameborder'=>array('iframe'=>1), 'headers'=>array('td'=>1, 'th'=>1), 'height'=>array('applet'=>1, 'canvas'=>1, 'embed'=>1, 'iframe'=>1, 'img'=>1, 'input'=>1, 'object'=>1, 'td'=>1, 'th'=>1, 'video'=>1), 'high'=>array('meter'=>1), 'href'=>array('a'=>1, 'area'=>1, 'link'=>1), 'hreflang'=>array('a'=>1, 'area'=>1, 'link'=>1), 'hspace'=>array('applet'=>1, 'embed'=>1, 'img'=>1, 'object'=>1), 'icon'=>array('command'=>1), 'ismap'=>array('img'=>1, 'input'=>1), 'keyparams'=>array('keygen'=>1), 'keytype'=>array('keygen'=>1), 'kind'=>array('track'=>1), 'label'=>array('command'=>1, 'menu'=>1, 'option'=>1, 'optgroup'=>1, 'track'=>1), 'language'=>array('script'=>1), 'list'=>array('input'=>1), 'longdesc'=>array('img'=>1, 'iframe'=>1), 'loop'=>array('audio'=>1, 'video'=>1), 'low'=>array('meter'=>1), 'marginheight'=>array('iframe'=>1), 'marginwidth'=>array('iframe'=>1), 'max'=>array('input'=>1, 'meter'=>1, 'progress'=>1), 'maxlength'=>array('input'=>1, 'textarea'=>1), 'media'=>array('a'=>1, 'area'=>1, 'link'=>1, 'source'=>1, 'style'=>1), 'mediagroup'=>array('audio'=>1, 'video'=>1), 'method'=>array('form'=>1), 'min'=>array('input'=>1, 'meter'=>1), 'model'=>array('embed'=>1), 'multiple'=>array('input'=>1, 'select'=>1), 'muted'=>array('audio'=>1, 'video'=>1), 'name'=>array('a'=>1, 'applet'=>1, 'button'=>1, 'embed'=>1, 'fieldset'=>1, 'form'=>1, 'iframe'=>1, 'img'=>1, 'input'=>1, 'keygen'=>1, 'map'=>1, 'object'=>1, 'output'=>1, 'param'=>1, 'select'=>1, 'slot'=>1, 'textarea'=>1), 'nohref'=>array('area'=>1), 'noshade'=>array('hr'=>1), 'novalidate'=>array('form'=>1), 'nowrap'=>array('td'=>1, 'th'=>1), 'object'=>array('applet'=>1), 'open'=>array('details'=>1, 'dialog'=>1), 'optimum'=>array('meter'=>1), 'pattern'=>array('input'=>1), 'ping'=>array('a'=>1, 'area'=>1), 'placeholder'=>array('input'=>1, 'textarea'=>1), 'pluginspage'=>array('embed'=>1), 'pluginurl'=>array('embed'=>1), 'poster'=>array('video'=>1), 'pqg'=>array('keygen'=>1), 'preload'=>array('audio'=>1, 'video'=>1), 'prompt'=>array('isindex'=>1), 'pubdate'=>array('time'=>1), 'radiogroup'=>array('command'=>1), 'readonly'=>array('input'=>1, 'textarea'=>1), 'referrerpolicy' => array('a'=>1,'area'=>1,'img'=>1,'iframe'=>1,'link'=>1), 'rel'=>array('a'=>1, 'area'=>1, 'link'=>1), 'required'=>array('input'=>1, 'select'=>1, 'textarea'=>1), 'rev'=>array('a'=>1), 'reversed'=>array('ol'=>1), 'rows'=>array('textarea'=>1), 'rowspan'=>array('td'=>1, 'th'=>1), 'rules'=>array('table'=>1), 'sandbox'=>array('iframe'=>1), 'scope'=>array('td'=>1, 'th'=>1), 'scoped'=>array('style'=>1), 'scrolling'=>array('iframe'=>1), 'seamless'=>array('iframe'=>1), 'selected'=>array('option'=>1), 'shape'=>array('a'=>1, 'area'=>1), 'size'=>array('font'=>1, 'hr'=>1, 'input'=>1, 'select'=>1), 'sizes'=>array('link'=>1), 'span'=>array('col'=>1, 'colgroup'=>1), 'src'=>array('audio'=>1, 'embed'=>1, 'iframe'=>1, 'img'=>1, 'input'=>1, 'script'=>1, 'source'=>1, 'track'=>1, 'video'=>1), 'srcdoc'=>array('iframe'=>1), 'srclang'=>array('track'=>1), 'srcset'=>array('img'=>1), 'standby'=>array('object'=>1), 'start'=>array('ol'=>1), 'step'=>array('input'=>1), 'summary'=>array('table'=>1), 'target'=>array('a'=>1, 'area'=>1, 'form'=>1), 'type'=>array('a'=>1, 'area'=>1, 'button'=>1, 'command'=>1, 'embed'=>1, 'input'=>1, 'li'=>1, 'link'=>1, 'menu'=>1, 'object'=>1, 'ol'=>1, 'param'=>1, 'script'=>1, 'source'=>1, 'style'=>1, 'ul'=>1), 'typemustmatch'=>array('object'=>1), 'usemap'=>array('img'=>1, 'input'=>1, 'object'=>1), 'valign'=>array('col'=>1, 'colgroup'=>1, 'tbody'=>1, 'td'=>1, 'tfoot'=>1, 'th'=>1, 'thead'=>1, 'tr'=>1), 'value'=>array('button'=>1, 'data'=>1, 'input'=>1, 'li'=>1, 'meter'=>1, 'option'=>1, 'param'=>1, 'progress'=>1), 'valuetype'=>array('param'=>1), 'vspace'=>array('applet'=>1, 'embed'=>1, 'img'=>1, 'object'=>1), 'width'=>array('applet'=>1, 'canvas'=>1, 'col'=>1, 'colgroup'=>1, 'embed'=>1, 'hr'=>1, 'iframe'=>1, 'img'=>1, 'input'=>1, 'object'=>1, 'pre'=>1, 'table'=>1, 'td'=>1, 'th'=>1, 'video'=>1), 'wmode'=>array('embed'=>1), 'wrap'=>array('textarea'=>1));
+
+ // .. Empty.
+
+ static $emptyAttrAr = array('allowfullscreen'=>1, 'checkbox'=>1, 'checked'=>1, 'command'=>1, 'compact'=>1, 'declare'=>1, 'defer'=>1, 'default'=>1, 'disabled'=>1, 'hidden'=>1, 'inert'=>1, 'ismap'=>1, 'itemscope'=>1, 'multiple'=>1, 'nohref'=>1, 'noresize'=>1, 'noshade'=>1, 'nowrap'=>1, 'open'=>1, 'radio'=>1, 'readonly'=>1, 'required'=>1, 'reversed'=>1, 'selected'=>1);
+
+ // .. Global.
+
+ static $globalAttrAr = array(
+
+ // .... General.
+
+ 'accesskey'=>1, 'autocapitalize'=>1, 'autofocus'=>1, 'class'=>1, 'contenteditable'=>1, 'contextmenu'=>1, 'dir'=>1, 'draggable'=>1, 'dropzone'=>1, 'enterkeyhint'=>1, 'hidden'=>1, 'id'=>1, 'inert'=>1, 'inputmode'=>1, 'is'=>1, 'itemid'=>1, 'itemprop'=>1, 'itemref'=>1, 'itemscope'=>1, 'itemtype'=>1, 'lang'=>1, 'nonce'=>1, 'role'=>1, 'slot'=>1, 'spellcheck'=>1, 'style'=>1, 'tabindex'=>1, 'title'=>1, 'translate'=>1, 'xmlns'=>1, 'xml:base'=>1, 'xml:lang'=>1, 'xml:space'=>1,
+
+ // .... Event.
+
+ 'onabort'=>1, 'onauxclick'=>1, 'onblur'=>1, 'oncancel'=>1, 'oncanplay'=>1, 'oncanplaythrough'=>1, 'onchange'=>1, 'onclick'=>1, 'onclose'=>1, 'oncontextlost'=>1, 'oncontextmenu'=>1, 'oncontextrestored'=>1, 'oncopy'=>1, 'oncuechange'=>1, 'oncut'=>1, 'ondblclick'=>1, 'ondrag'=>1, 'ondragend'=>1, 'ondragenter'=>1, 'ondragleave'=>1, 'ondragover'=>1, 'ondragstart'=>1, 'ondrop'=>1, 'ondurationchange'=>1, 'onemptied'=>1, 'onended'=>1, 'onerror'=>1, 'onfocus'=>1, 'onformchange'=>1, 'onformdata'=>1, 'onforminput'=>1, 'ongotpointercapture'=>1, 'oninput'=>1, 'oninvalid'=>1, 'onkeydown'=>1, 'onkeypress'=>1, 'onkeyup'=>1, 'onload'=>1, 'onloadeddata'=>1, 'onloadedmetadata'=>1, 'onloadend'=>1, 'onloadstart'=>1, 'onlostpointercapture'=>1, 'onmousedown'=>1, 'onmouseenter'=>1, 'onmouseleave'=>1, 'onmousemove'=>1, 'onmouseout'=>1, 'onmouseover'=>1, 'onmouseup'=>1, 'onmousewheel'=>1, 'onpaste'=>1, 'onpause'=>1, 'onplay'=>1, 'onplaying'=>1, 'onpointercancel'=>1, 'onpointerdown'=>1, 'onpointerenter'=>1, 'onpointerleave'=>1, 'onpointermove'=>1, 'onpointerout'=>1, 'onpointerover'=>1, 'onpointerup'=>1, 'onprogress'=>1, 'onratechange'=>1, 'onreadystatechange'=>1, 'onreset'=>1, 'onresize'=>1, 'onscroll'=>1, 'onsearch'=>1, 'onsecuritypolicyviolation'=>1, 'onseeked'=>1, 'onseeking'=>1, 'onselect'=>1, 'onshow'=>1, 'onslotchange'=>1, 'onstalled'=>1, 'onsubmit'=>1, 'onsuspend'=>1, 'ontimeupdate'=>1, 'ontoggle'=>1, 'ontouchcancel'=>1, 'ontouchend'=>1, 'ontouchmove'=>1, 'ontouchstart'=>1, 'onvolumechange'=>1, 'onwaiting'=>1, 'onwheel'=>1,
+
+ // .... Aria.
+
+ 'aria-activedescendant'=>1, 'aria-atomic'=>1, 'aria-autocomplete'=>1, 'aria-braillelabel'=>1, 'aria-brailleroledescription'=>1, 'aria-busy'=>1, 'aria-checked'=>1, 'aria-colcount'=>1, 'aria-colindex'=>1, 'aria-colindextext'=>1, 'aria-colspan'=>1, 'aria-controls'=>1, 'aria-current'=>1, 'aria-describedby'=>1, 'aria-description'=>1, 'aria-details'=>1, 'aria-disabled'=>1, 'aria-dropeffect'=>1, 'aria-errormessage'=>1, 'aria-expanded'=>1, 'aria-flowto'=>1, 'aria-grabbed'=>1, 'aria-haspopup'=>1, 'aria-hidden'=>1, 'aria-invalid'=>1, 'aria-keyshortcuts'=>1, 'aria-label'=>1, 'aria-labelledby'=>1, 'aria-level'=>1, 'aria-live'=>1, 'aria-multiline'=>1, 'aria-multiselectable'=>1, 'aria-orientation'=>1, 'aria-owns'=>1, 'aria-placeholder'=>1, 'aria-posinset'=>1, 'aria-pressed'=>1, 'aria-readonly'=>1, 'aria-relevant'=>1, 'aria-required'=>1, 'aria-roledescription'=>1, 'aria-rowcount'=>1, 'aria-rowindex'=>1, 'aria-rowindextext'=>1, 'aria-rowspan'=>1, 'aria-selected'=>1, 'aria-setsize'=>1, 'aria-sort'=>1, 'aria-valuemax'=>1, 'aria-valuemin'=>1, 'aria-valuenow'=>1, 'aria-valuetext'=>1);
+
+ static $urlAttrAr = array('action'=>1, 'archive'=>1, 'cite'=>1, 'classid'=>1, 'codebase'=>1, 'data'=>1, 'href'=>1, 'itemtype'=>1, 'longdesc'=>1, 'model'=>1, 'pluginspage'=>1, 'pluginurl'=>1, 'poster'=>1, 'src'=>1, 'srcset'=>1, 'usemap'=>1); // Excludes style and on*
+
+ // .. Deprecated.
+
+ $alterDeprecAttr = 0;
+ if ($C['no_deprecated_attr']) {
+ static $deprecAttrEleAr = array('align'=>array('caption'=>1, 'div'=>1, 'h1'=>1, 'h2'=>1, 'h3'=>1, 'h4'=>1, 'h5'=>1, 'h6'=>1, 'hr'=>1, 'img'=>1, 'input'=>1, 'legend'=>1, 'object'=>1, 'p'=>1, 'table'=>1), 'bgcolor'=>array('table'=>1, 'td'=>1, 'th'=>1, 'tr'=>1), 'border'=>array('object'=>1), 'bordercolor'=>array('table'=>1, 'td'=>1, 'tr'=>1), 'cellspacing'=>array('table'=>1), 'clear'=>array('br'=>1), 'compact'=>array('dl'=>1, 'ol'=>1, 'ul'=>1), 'height'=>array('td'=>1, 'th'=>1), 'hspace'=>array('img'=>1, 'object'=>1), 'language'=>array('script'=>1), 'name'=>array('a'=>1, 'form'=>1, 'iframe'=>1, 'img'=>1, 'map'=>1), 'noshade'=>array('hr'=>1), 'nowrap'=>array('td'=>1, 'th'=>1), 'size'=>array('hr'=>1), 'vspace'=>array('img'=>1, 'object'=>1), 'width'=>array('hr'=>1, 'pre'=>1, 'table'=>1, 'td'=>1, 'th'=>1));
+ static $deprecAttrPossibleEleAr = array('a'=>1, 'br'=>1, 'caption'=>1, 'div'=>1, 'dl'=>1, 'form'=>1, 'h1'=>1, 'h2'=>1, 'h3'=>1, 'h4'=>1, 'h5'=>1, 'h6'=>1, 'hr'=>1, 'iframe'=>1, 'img'=>1, 'input'=>1, 'legend'=>1, 'map'=>1, 'object'=>1, 'ol'=>1, 'p'=>1, 'pre'=>1, 'script'=>1, 'table'=>1, 'td'=>1, 'th'=>1, 'tr'=>1, 'ul'=>1);
+ $alterDeprecAttr = isset($deprecAttrPossibleEleAr[$ele]) ? 1 : 0;
+ }
+
+ // -- Standard attribute values that may need lowercasing.
+
+ if ($C['lc_std_val']) {
+ static $lCaseStdAttrValAr = array('all'=>1, 'auto'=>1, 'baseline'=>1, 'bottom'=>1, 'button'=>1, 'captions'=>1, 'center'=>1, 'chapters'=>1, 'char'=>1, 'checkbox'=>1, 'circle'=>1, 'col'=>1, 'colgroup'=>1, 'color'=>1, 'cols'=>1, 'data'=>1, 'date'=>1, 'datetime'=>1, 'datetime-local'=>1, 'default'=>1, 'descriptions'=>1, 'email'=>1, 'file'=>1, 'get'=>1, 'groups'=>1, 'hidden'=>1, 'image'=>1, 'justify'=>1, 'left'=>1, 'ltr'=>1, 'metadata'=>1, 'middle'=>1, 'month'=>1, 'none'=>1, 'number'=>1, 'object'=>1, 'password'=>1, 'poly'=>1, 'post'=>1, 'preserve'=>1, 'radio'=>1, 'range'=>1, 'rect'=>1, 'ref'=>1, 'reset'=>1, 'right'=>1, 'row'=>1, 'rowgroup'=>1, 'rows'=>1, 'rtl'=>1, 'search'=>1, 'submit'=>1, 'subtitles'=>1, 'tel'=>1, 'text'=>1, 'time'=>1, 'top'=>1, 'url'=>1, 'week'=>1);
+ static $lCaseStdAttrValPossibleEleAr = array('a'=>1, 'area'=>1, 'bdo'=>1, 'button'=>1, 'col'=>1, 'fieldset'=>1, 'form'=>1, 'img'=>1, 'input'=>1, 'object'=>1, 'ol'=>1, 'optgroup'=>1, 'option'=>1, 'param'=>1, 'script'=>1, 'select'=>1, 'table'=>1, 'td'=>1, 'textarea'=>1, 'tfoot'=>1, 'th'=>1, 'thead'=>1, 'tr'=>1, 'track'=>1, 'xml:space'=>1);
+ $lCaseStdAttrVal = isset($lCaseStdAttrValPossibleEleAr[$ele]) ? 1 : 0;
+ }
+
+ // -- Get attribute name-value pairs.
+
+ if (strpos($attrStr, "\x01") !== false) { // Remove CDATA/comment
+ $attrStr = preg_replace('`\x01[^\x01]*\x01`', '', $attrStr);
+ }
+ $attrStr = trim($attrStr, ' /');
+ $attrAr = array();
+ $state = 0;
+ while (strlen($attrStr)) {
+ $ok = 0; // For parsing errors, to deal with space, ", and ' characters
+ switch ($state) {
+ case 0: if (preg_match('`^[^=\s/\x7f-\x9f]+`', $attrStr, $m)) { // Name
+ $attr = strtolower($m[0]);
+ $ok = $state = 1;
+ $attrStr = ltrim(substr_replace($attrStr, '', 0, strlen($m[0])));
+ }
+ break; case 1: if ($attrStr[0] == '=') {
+ $ok = 1;
+ $state = 2;
+ $attrStr = ltrim($attrStr, '= ');
+ } else { // No value
+ $ok = 1;
+ $state = 0;
+ $attrStr = ltrim($attrStr);
+ $attrAr[$attr] = '';
+ }
+ break; case 2: if (preg_match('`^((?:"[^"]*")|(?:\'[^\']*\')|(?:\s*[^\s"\']+))(.*)`', $attrStr, $m)) { // Value
+ $attrStr = ltrim($m[2]);
+ $m = $m[1];
+ $ok = 1;
+ $state = 0;
+ $attrAr[$attr] =
+ trim(
+ str_replace('<', '<',
+ ($m[0] == '"' || $m[0] == '\'')
+ ? substr($m, 1, -1)
+ : $m));
+ }
+ break;
+ }
+ if (!$ok) {
+ $attrStr = preg_replace('`^(?:"[^"]*("|$)|\'[^\']*(\'|$)|\S)*\s*`', '', $attrStr);
+ $state = 0;
+ }
+ }
+ if ($state == 1) {
+ $attrAr[$attr] = '';
+ }
+
+ // -- Clean attributes.
+
+ global $S;
+ $eleSpec = isset($S[$ele]) ? $S[$ele] : array();
+ $filtAttrAr = array(); // Finalized attributes
+ $deniedAttrAr = $C['deny_attribute'];
+
+ foreach ($attrAr as $attr=>$v) {
+
+ // .. Check if attribute is permitted.
+
+ if (
+
+ // .... Valid attribute.
+
+ ((isset($attrEleAr[$attr][$ele])
+ || isset($globalAttrAr[$attr])
+ || preg_match('`data-((?!xml)[^:]+$)`', $attr)
+ || (strpos($ele, '-')
+ && strpos($attr, 'data-xml') !== 0))
+
+ // .... No denial through $spec.
+
+ && (empty($eleSpec)
+ || (!isset($eleSpec['deny'])
+ || (!isset($eleSpec['deny']['*'])
+ && !isset($eleSpec['deny'][$attr])
+ && !isset($eleSpec['deny'][preg_replace('`^(on|aria|data).+`', '\\1', $attr). '*']))))
+
+ // .... No denial through $config.
+
+ && (empty($deniedAttrAr)
+ || (isset($deniedAttrAr['*'])
+ ? (isset($deniedAttrAr["-$attr"])
+ || isset($deniedAttrAr['-'. preg_replace('`^(on|aria|data)..+`', '\\1', $attr). '*']))
+ : (!isset($deniedAttrAr[$attr])
+ && !isset($deniedAttrAr[preg_replace('`^(on|aria|data).+`', '\\1', $attr). '*'])))))
+
+ // .... Permit if permission through $spec.
+
+ || (!empty($eleSpec)
+ && (isset($eleSpec[$attr])
+ || (isset($globalAttrAr[$attr])
+ && isset($eleSpec[preg_replace('`^(on|aria|data).+`', '\\1', $attr). '*']))))
+ ) {
+
+ // .. Attribute with no value or standard value.
+
+ if (isset($emptyAttrAr[$attr])) {
+ $v = $attr;
+ } elseif (
+ !empty($lCaseStdAttrVal) // ! Rather loose but should be ok
+ && (($ele != 'button' || $ele != 'input')
+ || $attr == 'type')
+ ) {
+ $v = (isset($lCaseStdAttrValAr[($vNew = strtolower($v))])) ? $vNew : $v;
+ }
+
+ // .. URLs and CSS expressions in style attribute.
+
+ if ($attr == 'style' && !$C['style_pass']) {
+ if (false !== strpos($v, '')) { // Change any entity to character
+ static $entityAr = array(' '=>' ', ' '=>' ', ':'=>':', ':'=>':', '"'=>'"', '"'=>'"', '('=>'(', '('=>'(', ')'=>')', ')'=>')', '*'=>'*', '*'=>'*', '/'=>'/', '/'=>'/', '\'=>'\\', '\'=>'\\', 'e'=>'e', 'E'=>'e', 'E'=>'e', 'e'=>'e', 'i'=>'i', 'I'=>'i', 'I'=>'i', 'i'=>'i', 'l'=>'l', 'L'=>'l', 'L'=>'l', 'l'=>'l', 'n'=>'n', 'N'=>'n', 'N'=>'n', 'n'=>'n', 'o'=>'o', 'O'=>'o', 'O'=>'o', 'o'=>'o', 'p'=>'p', 'P'=>'p', 'P'=>'p', 'p'=>'p', 'r'=>'r', 'R'=>'r', 'R'=>'r', 'r'=>'r', 's'=>'s', 'S'=>'s', 'S'=>'s', 's'=>'s', 'u'=>'u', 'U'=>'u', 'U'=>'u', 'u'=>'u', 'x'=>'x', 'X'=>'x', 'X'=>'x', 'x'=>'x', '''=>"'", '''=>"'");
+ $v = strtr($v, $entityAr);
}
- }
- if (1 === $mode) {
- $aA[$nm] = '';
- }
+ $v =
+ preg_replace_callback(
+ '`(url(?:\()(?: )*(?:\'|"|&(?:quot|apos);)?)(.+?)((?:\'|"|&(?:quot|apos);)?(?: )*(?:\)))`iS',
+ 'hl_url',
+ $v);
+ $v = !$C['css_expression']
+ ? preg_replace('`expression`i', ' ', preg_replace('`\\\\\S|(/|(%2f))(\*|(%2a))`i', ' ', $v))
+ : $v;
- // clean attrs
- global $S;
- $rl = isset($S[$e]) ? $S[$e] : [];
- $a = [];
- $nfr = 0;
- $d = $C['deny_attribute'];
- foreach ($aA as $k => $v) {
- if (((isset($d['*']) ? isset($d[$k]) : !isset($d[$k])) && (isset($aN[$k][$e]) || isset($aNU[$k]) || (isset($aNO[$k]) && !isset($d['on*'])) || (isset($aNA[$k]) && !isset($d['aria*'])) || (!isset($d['data*']) && preg_match('`data-((?!xml)[^:]+$)`', $k)) || strpos($e, '-')) && !isset($rl['n'][$k]) && !isset($rl['n']['*'])) || isset($rl[$k])) {
- if (isset($aNE[$k])) {
- $v = $k;
- } elseif (!empty($lcase) && (('button' !== $e || 'input' !== $e) || 'type' === $k)) { // Rather loose but ?not cause issues
- $v = (isset($aNL[($v2 = strtolower($v))])) ? $v2 : $v;
- }
- if ('style' === $k && !$C['style_pass']) {
- if (false !== strpos($v, '')) {
- static $sC = [' ' => ' ', ' ' => ' ', 'E' => 'e', 'E' => 'e', 'e' => 'e', 'e' => 'e', 'X' => 'x', 'X' => 'x', 'x' => 'x', 'x' => 'x', 'P' => 'p', 'P' => 'p', 'p' => 'p', 'p' => 'p', 'S' => 's', 'S' => 's', 's' => 's', 's' => 's', 'I' => 'i', 'I' => 'i', 'i' => 'i', 'i' => 'i', 'O' => 'o', 'O' => 'o', 'o' => 'o', 'o' => 'o', 'N' => 'n', 'N' => 'n', 'n' => 'n', 'n' => 'n', 'U' => 'u', 'U' => 'u', 'u' => 'u', 'u' => 'u', 'R' => 'r', 'R' => 'r', 'r' => 'r', 'r' => 'r', 'L' => 'l', 'L' => 'l', 'l' => 'l', 'l' => 'l', '(' => '(', '(' => '(', ')' => ')', ')' => ')', ' ' => ':', ' ' => ':', '"' => '"', '"' => '"', ''' => "'", ''' => "'", '/' => '/', '/' => '/', '*' => '*', '*' => '*', '\' => '\\', '\' => '\\'];
- $v = strtr($v, $sC);
- }
- $v = preg_replace_callback('`(url(?:\()(?: )*(?:\'|"|&(?:quot|apos);)?)(.+?)((?:\'|"|&(?:quot|apos);)?(?: )*(?:\)))`iS', 'hl_prot', $v);
- $v = !$C['css_expression'] ? preg_replace('`expression`i', ' ', preg_replace('`\\\\\S|(/|(%2f))(\*|(%2a))`i', ' ', $v)) : $v;
- } elseif (isset($aNP[$k]) || isset($aNO[$k])) {
- $v = str_replace('', ' ', (false !== strpos($v, '&') ? str_replace(['', '', ''], ' ', $v) : $v)); // double-quoted char: soft-hyphen; appears here as "" or hyphen or something else depending on viewing software
- if ('srcset' === $k) {
- $v2 = '';
- // Following pattern tries to implement srcset spec
- // See https://html.spec.whatwg.org/dev/images.html#srcset-attributes
- // See https://html.spec.whatwg.org/#parse-a-srcset-attribute
- $pattern = "/(?:\s*(?:[^,\s][^\s]*[^,\s])(?:\s*\S*\s*))(?:,|$)/";
- preg_match_all($pattern, $v, $matches);
- $matches = call_user_func_array('array_merge', $matches);
- foreach ($matches as $k1 => $v1) {
- $v1 = explode(' ', trim($v1, ', '), 2);
- $k1 = isset($v1[1]) ? trim($v1[1]) : '';
- if ('' !== $k1 && !preg_match('/(?:\d+(?:\.\d*)?[wx])/', $k1)) {
- // We remove candidates with an invalid descriptor
- continue;
- }
- $v1 = trim($v1[0]);
- if (isset($v1[0])) {
- $v2 .= hl_prot($v1, $k) . (empty($k1) ? '' : ' ' . $k1) . ', ';
- }
- }
- $v = trim($v2, ', ');
- }
- if ('itemtype' === $k) {
- $v2 = '';
- foreach (explode(' ', $v) as $v1) {
- if (isset($v1[0])) {
- $v2 .= hl_prot($v1, $k) . ' ';
- }
- }
- $v = trim($v2, ' ');
- } else {
- $v = hl_prot($v, $k);
- }
- if ('href' === $k) { // X-spam
- if ($C['anti_mail_spam'] && 0 === strpos($v, 'mailto:')) {
- $v = str_replace('@', htmlspecialchars($C['anti_mail_spam']), $v);
- } elseif ($C['anti_link_spam']) {
- $r1 = $C['anti_link_spam'][1];
- if (!empty($r1) && preg_match($r1, $v)) {
- continue;
- }
- $r0 = $C['anti_link_spam'][0];
- if (!empty($r0) && preg_match($r0, $v)) {
- if (isset($a['rel'])) {
- if (!preg_match('`\bnofollow\b`i', $a['rel'])) {
- $a['rel'] .= ' nofollow';
- }
- } elseif (isset($aA['rel'])) {
- if (!preg_match('`\bnofollow\b`i', $aA['rel'])) {
- $nfr = 1;
- }
- } else {
- $a['rel'] = 'nofollow';
- }
- }
- }
- }
+ // .. URLs in other attributes.
+
+ } elseif (isset($urlAttrAr[$attr]) || (isset($globalAttrAr[$attr]) && strpos($attr, 'on') === 0)) {
+ $v =
+ str_replace("", ' ',
+ (strpos($v, '&') !== false // ! Double-quoted character = soft-hyphen
+ ? str_replace(array('', '', ''), ' ', $v)
+ : $v));
+ if ($attr == 'srcset' || ($attr == 'archive' && $ele == 'applet')) {
+ $vNew = '';
+ // Following pattern tries to implement srcset spec
+ // See https://html.spec.whatwg.org/dev/images.html#srcset-attributes
+ // See https://html.spec.whatwg.org/#parse-a-srcset-attribute
+ $pattern = "/(?:\s*(?:[^,\s][^\s]*[^,\s])(?:\s*\S*\s*))(?:,|$)/";
+ preg_match_all($pattern, $v, $matches);
+ $matches = call_user_func_array('array_merge', $matches);
+ foreach ($matches as $k=>$x) {
+ $x = explode(' ', trim($x, ', '), 2);
+ $k = isset($x[1]) ? trim($x[1]) : '';
+ if ('' !== $k && !preg_match('/(?:\d+(?:\.\d*)?[wx])/', $k)) {
+ // We remove candidates with an invalid descriptor
+ continue;
}
- if (isset($rl[$k]) && is_array($rl[$k]) && ($v = hl_attrval($k, $v, $rl[$k])) === 0) {
- continue;
+ $x = trim($x[0]);
+ if (isset($x[0])) {
+ $vNew .= hl_url($x, $attr). (empty($k) ? '' : ' '. $k). ', ';
}
- $a[$k] = str_replace('"', '"', $v);
+ }
+ $v = trim($vNew, ', ');
}
- }
- if ($nfr) {
- $a['rel'] = isset($a['rel']) ? $a['rel'] . ' nofollow' : 'nofollow';
- }
-
- // rqd attr
- static $eAR = ['area' => ['alt' => 'area'], 'bdo' => ['dir' => 'ltr'], 'command' => ['label' => ''], 'form' => ['action' => ''], 'img' => ['src' => 'data:image/gif;base64,R0lGODlhAQABAAAAACH5BAEKAAEALAAAAAABAAEAAAICTAEAOw==', 'alt' => 'image'], 'map' => ['name' => ''], 'optgroup' => ['label' => ''], 'param' => ['name' => ''], 'style' => ['scoped' => ''], 'textarea' => ['rows' => '10', 'cols' => '50']];
- if (isset($eAR[$e])) {
- foreach ($eAR[$e] as $k => $v) {
- if (!isset($a[$k])) {
- $a[$k] = isset($v[0]) ? $v : $k;
+ if ($attr == 'itemtype' || ($attr == 'archive' && $ele == 'object')) {
+ $vNew = '';
+ foreach (explode(' ', $v) as $x) {
+ if (isset($x[0])) {
+ $vNew .= hl_url($x, $attr). ' ';
}
+ }
+ $v = trim($vNew, ' ');
+ } else {
+ $v = hl_url($v, $attr);
}
- }
- // depr attr
- if ($depTr) {
- $c = [];
- foreach ($a as $k => $v) {
- if ('style' === $k || !isset($aND[$k][$e])) {
- continue;
+ // Anti-spam measure.
+
+ if ($attr == 'href') {
+ if ($C['anti_mail_spam'] && strpos($v, 'mailto:') === 0) {
+ $v = str_replace('@', htmlspecialchars($C['anti_mail_spam']), $v);
+ } elseif ($C['anti_link_spam']) {
+ $x = $C['anti_link_spam'][1];
+ if (!empty($x) && preg_match($x, $v)) {
+ continue;
}
- $v = str_replace(['\\', ':', ';', ''], '', $v);
- if ('align' === $k) {
- unset($a['align']);
- if ('img' === $e && ('left' === $v || 'right' === $v)) {
- $c[] = 'float: ' . $v;
- } elseif (('div' === $e || 'table' === $e) && 'center' === $v) {
- $c[] = 'margin: auto';
- } else {
- $c[] = 'text-align: ' . $v;
- }
- } elseif ('bgcolor' === $k) {
- unset($a['bgcolor']);
- $c[] = 'background-color: ' . $v;
- } elseif ('border' === $k) {
- unset($a['border']);
- $c[] = "border: {$v}px";
- } elseif ('bordercolor' === $k) {
- unset($a['bordercolor']);
- $c[] = 'border-color: ' . $v;
- } elseif ('cellspacing' === $k) {
- unset($a['cellspacing']);
- $c[] = "border-spacing: {$v}px";
- } elseif ('clear' === $k) {
- unset($a['clear']);
- $c[] = 'clear: ' . ('all' !== $v ? $v : 'both');
- } elseif ('compact' === $k) {
- unset($a['compact']);
- $c[] = 'font-size: 85%';
- } elseif ('height' === $k || 'width' === $k) {
- unset($a[$k]);
- $c[] = $k . ': ' . (isset($v[0]) && '*' !== $v[0] ? $v . (ctype_digit($v) ? 'px' : '') : 'auto');
- } elseif ('hspace' === $k) {
- unset($a['hspace']);
- $c[] = "margin-left: {$v}px; margin-right: {$v}px";
- } elseif ('language' === $k && !isset($a['type'])) {
- unset($a['language']);
- $a['type'] = 'text/' . strtolower($v);
- } elseif ('name' === $k) {
- if (2 === $C['no_deprecated_attr'] || ('a' !== $e && 'map' !== $e)) {
- unset($a['name']);
+ $x = $C['anti_link_spam'][0];
+ if (!empty($x) && preg_match($x, $v)) {
+ if (isset($filtAttrAr['rel'])) {
+ if (!preg_match('`\bnofollow\b`i', $filtAttrAr['rel'])) {
+ $filtAttrAr['rel'] .= ' nofollow';
}
- if (!isset($a['id']) && !preg_match('`\W`', $v)) {
- $a['id'] = $v;
+ } elseif (isset($attrAr['rel'])) {
+ if (!preg_match('`\bnofollow\b`i', $attrAr['rel'])) {
+ $addNofollow = 1;
}
- } elseif ('noshade' === $k) {
- unset($a['noshade']);
- $c[] = 'border-style: none; border: 0; background-color: gray; color: gray';
- } elseif ('nowrap' === $k) {
- unset($a['nowrap']);
- $c[] = 'white-space: nowrap';
- } elseif ('size' === $k) {
- unset($a['size']);
- $c[] = 'size: ' . $v . 'px';
- } elseif ('vspace' === $k) {
- unset($a['vspace']);
- $c[] = "margin-top: {$v}px; margin-bottom: {$v}px";
- }
- }
- if (count($c)) {
- $c = implode('; ', $c);
- $a['style'] = isset($a['style']) ? rtrim($a['style'], ' ;') . '; ' . $c . ';' : $c . ';';
- }
- }
- // unique ID
- if ($C['unique_ids'] && isset($a['id'])) {
- if (preg_match('`\s`', ($id = $a['id'])) || (isset($GLOBALS['hl_Ids'][$id]) && 1 === $C['unique_ids'])) {
- unset($a['id']);
- } else {
- while (isset($GLOBALS['hl_Ids'][$id])) {
- $id = $C['unique_ids'] . $id;
+ } else {
+ $filtAttrAr['rel'] = 'nofollow';
+ }
}
- $GLOBALS['hl_Ids'][($a['id'] = $id)] = 1;
- }
- }
- // xml:lang
- if ($C['xml:lang'] && isset($a['lang'])) {
- $a['xml:lang'] = isset($a['xml:lang']) ? $a['xml:lang'] : $a['lang'];
- if (2 === $C['xml:lang']) {
- unset($a['lang']);
- }
- }
- // for transformed tag
- if (!empty($trt)) {
- $a['style'] = isset($a['style']) ? rtrim($a['style'], ' ;') . '; ' . $trt : $trt;
- }
- // return with empty ele /
- if (empty($C['hook_tag'])) {
- $aA = '';
- foreach ($a as $k => $v) {
- $aA .= " {$k}=\"{$v}\"";
+ }
}
+ }
- return "<{$e}{$aA}" . (isset($eE[$e]) ? ' /' : '') . '>';
+ // .. Check attribute value against any $spec rule.
+
+ if (isset($eleSpec[$attr])
+ && is_array($eleSpec[$attr])
+ && ($v = hl_attributeValue($attr, $v, $eleSpec[$attr], $ele)) === 0) {
+ continue;
+ }
+
+ $filtAttrAr[$attr] = str_replace('"', '"', $v);
}
+ }
- return $C['hook_tag']($e, $a);
-}
+ // -- Add nofollow.
-function hl_tag2(&$e, &$a, $t = 1)
-{
- // transform tag
- if ('big' === $e) {
- $e = 'span';
+ if (isset($addNofollow)) {
+ $filtAttrAr['rel'] = isset($filtAttrAr['rel']) ? $filtAttrAr['rel']. ' nofollow' : 'nofollow';
+ }
- return 'font-size: larger;';
- }
- if ('s' === $e || 'strike' === $e) {
- $e = 'span';
+ // -- Add required attributes.
- return 'text-decoration: line-through;';
- }
- if ('tt' === $e) {
- $e = 'code';
+ static $requiredAttrAr = array('area'=>array('alt'=>'area'), 'bdo'=>array('dir'=>'ltr'), 'command'=>array('label'=>''), 'form'=>array('action'=>''), 'img'=>array('src'=>'data:image/gif;base64,R0lGODlhAQABAAAAACH5BAEKAAEALAAAAAABAAEAAAICTAEAOw==', 'alt'=>'image'), 'map'=>array('name'=>''), 'optgroup'=>array('label'=>''), 'param'=>array('name'=>''), 'style'=>array('scoped'=>''), 'textarea'=>array('rows'=>'10', 'cols'=>'50'));
+ if (isset($requiredAttrAr[$ele])) {
+ foreach ($requiredAttrAr[$ele] as $k=>$v) {
+ if (!isset($filtAttrAr[$k])) {
+ $filtAttrAr[$k] = isset($v[0]) ? $v : $k;
+ }
+ }
+ }
- return '';
- }
- if ('center' === $e) {
- $e = 'div';
+ // -- Transform deprecated attributes into CSS declarations in style attribute.
- return 'text-align: center;';
- }
- static $fs = ['0' => 'xx-small', '1' => 'xx-small', '2' => 'small', '3' => 'medium', '4' => 'large', '5' => 'x-large', '6' => 'xx-large', '7' => '300%', '-1' => 'smaller', '-2' => '60%', '+1' => 'larger', '+2' => '150%', '+3' => '200%', '+4' => '300%'];
- if ('font' === $e) {
- $a2 = '';
- while (preg_match('`(^|\s)(color|size)\s*=\s*(\'|")?(.+?)(\\3|\s|$)`i', $a, $m)) {
- $a = str_replace($m[0], ' ', $a);
- $a2 .= 'color' === strtolower($m[2]) ? (' color: ' . str_replace(['"', ';', ':'], '\'', trim($m[4])) . ';') : (isset($fs[($m = trim($m[4]))]) ? (' font-size: ' . $fs[$m] . ';') : '');
+ if ($alterDeprecAttr) {
+ $css = array();
+ foreach ($filtAttrAr as $name=>$val) {
+ if ($name == 'style' || !isset($deprecAttrEleAr[$name][$ele])) {
+ continue;
+ }
+ $val = str_replace(array('\\', ':', ';', ''), '', $val);
+ if ($name == 'align') {
+ unset($filtAttrAr['align']);
+ if ($ele == 'img' && ($val == 'left' || $val == 'right')) {
+ $css[] = 'float: '. $val;
+ } elseif (($ele == 'div' || $ele == 'table') && $val == 'center') {
+ $css[] = 'margin: auto';
+ } else {
+ $css[] = 'text-align: '. $val;
+ }
+ } elseif ($name == 'bgcolor') {
+ unset($filtAttrAr['bgcolor']);
+ $css[] = 'background-color: '. $val;
+ } elseif ($name == 'border') {
+ unset($filtAttrAr['border']);
+ $css[] = "border: {$val}px";
+ } elseif ($name == 'bordercolor') {
+ unset($filtAttrAr['bordercolor']);
+ $css[] = 'border-color: '. $val;
+ } elseif ($name == 'cellspacing') {
+ unset($filtAttrAr['cellspacing']);
+ $css[] = "border-spacing: {$val}px";
+ } elseif ($name == 'clear') {
+ unset($filtAttrAr['clear']);
+ $css[] = 'clear: '. ($val != 'all' ? $val : 'both');
+ } elseif ($name == 'compact') {
+ unset($filtAttrAr['compact']);
+ $css[] = 'font-size: 85%';
+ } elseif ($name == 'height' || $name == 'width') {
+ unset($filtAttrAr[$name]);
+ $css[] =
+ $name
+ . ': '
+ . ((isset($val[0]) && $val[0] != '*')
+ ? $val. (ctype_digit($val) ? 'px' : '')
+ : 'auto');
+ } elseif ($name == 'hspace') {
+ unset($filtAttrAr['hspace']);
+ $css[] = "margin-left: {$val}px; margin-right: {$val}px";
+ } elseif ($name == 'language' && !isset($filtAttrAr['type'])) {
+ unset($filtAttrAr['language']);
+ $filtAttrAr['type'] = 'text/'. strtolower($val);
+ } elseif ($name == 'name') {
+ if ($C['no_deprecated_attr'] == 2 || ($ele != 'a' && $ele != 'map')) {
+ unset($filtAttrAr['name']);
}
- while (preg_match('`(^|\s)face\s*=\s*(\'|")?([^=]+?)\\2`i', $a, $m) || preg_match('`(^|\s)face\s*=(\s*)(\S+)`i', $a, $m)) {
- $a = str_replace($m[0], ' ', $a);
- $a2 .= ' font-family: ' . str_replace(['"', ';', ':'], '\'', trim($m[3])) . ';';
+ if (!isset($filtAttrAr['id']) && !preg_match('`\W`', $val)) {
+ $filtAttrAr['id'] = $val;
}
- $e = 'span';
+ } elseif ($name == 'noshade') {
+ unset($filtAttrAr['noshade']);
+ $css[] = 'border-style: none; border: 0; background-color: gray; color: gray';
+ } elseif ($name == 'nowrap') {
+ unset($filtAttrAr['nowrap']);
+ $css[] = 'white-space: nowrap';
+ } elseif ($name == 'size') {
+ unset($filtAttrAr['size']);
+ $css[] = 'size: '. $val. 'px';
+ } elseif ($name == 'vspace') {
+ unset($filtAttrAr['vspace']);
+ $css[] = "margin-top: {$val}px; margin-bottom: {$val}px";
+ }
+ }
+ if (count($css)) {
+ $css = implode('; ', $css);
+ $filtAttrAr['style'] =
+ isset($filtAttrAr['style'])
+ ? rtrim($filtAttrAr['style'], ' ;'). '; '. $css. ';'
+ : $css. ';';
+ }
+ }
- return ltrim(str_replace('<', '', $a2));
- }
- if ('acronym' === $e) {
- $e = 'abbr';
+ // -- Enforce unique id attribute values.
- return '';
+ if ($C['unique_ids'] && isset($filtAttrAr['id'])) {
+ if (preg_match('`\s`', ($id = $filtAttrAr['id'])) || (isset($GLOBALS['hl_Ids'][$id]) && $C['unique_ids'] == 1)) {
+ unset($filtAttrAr['id']);
+ } else {
+ while (isset($GLOBALS['hl_Ids'][$id])) {
+ $id = $C['unique_ids']. $id;
+ }
+ $GLOBALS['hl_Ids'][($filtAttrAr['id'] = $id)] = 1;
}
- if ('dir' === $e) {
- $e = 'ul';
+ }
- return '';
- }
- if (2 === $t) {
- $e = 0;
+ // -- Handle lang attributes.
- return 0;
+ if ($C['xml:lang'] && isset($filtAttrAr['lang'])) {
+ $filtAttrAr['xml:lang'] = isset($filtAttrAr['xml:lang']) ? $filtAttrAr['xml:lang'] : $filtAttrAr['lang'];
+ if ($C['xml:lang'] == 2) {
+ unset($filtAttrAr['lang']);
}
+ }
- return '';
+ // -- If transformed element, modify style attribute.
+
+ if (!empty($eleTransformed)) {
+ $filtAttrAr['style'] =
+ isset($filtAttrAr['style'])
+ ? rtrim($filtAttrAr['style'], ' ;'). '; '. $eleTransformed
+ : $eleTransformed;
+ }
+
+ // -- Return opening tag with attributes.
+
+ if (empty($C['hook_tag'])) {
+ $attrStr = '';
+ foreach ($filtAttrAr as $k=>$v) {
+ $attrStr .= " {$k}=\"{$v}\"";
+ }
+ return "<{$ele}{$attrStr}". (isset($emptyEleAr[$ele]) ? ' /' : ''). '>';
+ } else {
+ return call_user_func($C['hook_tag'], $ele, $filtAttrAr);
+ }
}
-function hl_tidy($t, $w, $p)
+/**
+ * Tidy/beautify HTM by adding newline and other spaces (padding),
+ * or compact by removing unnecessary spaces.
+ *
+ * @param string $t HTM.
+ * @param mixed $format -1 (compact) or string (type of padding).
+ * @param string $parentEle Parent element of $t.
+ * @return mixed Transformed attribute string (may be empty) or 0.
+ */
+function hl_tidy($t, $format, $parentEle)
{
- // tidy/compact HTM
- if (strpos(' pre,script,textarea', "$p,")) {
- return $t;
- }
- if (!function_exists('hl_aux2')) {
- function hl_aux2($m)
- {
- return $m[1] . str_replace(['<', '>', "\n", "\r", "\t", ' '], ["\x01", "\x02", "\x03", "\x04", "\x05", "\x07"], $m[3]) . $m[4];
+ if (strpos(' pre,script,textarea', "$parentEle,")) {
+ return $t;
+ }
+
+ // Hide CDATA/comment.
+
+ if (!function_exists('hl_aux2')) {
+ function hl_aux2($x) {
+ return
+ $x[1]
+ . str_replace(
+ array("<", ">", "\n", "\r", "\t", ' '),
+ array("\x01", "\x02", "\x03", "\x04", "\x05", "\x07"),
+ $x[3])
+ . $x[4];
+ }
+ }
+ $t =
+ preg_replace(
+ array('`(<\w[^>]*(?)\s+`', '`\s+`', '`(<\w[^>]*(?) `'),
+ array(' $1', ' ', '$1'),
+ preg_replace_callback(
+ array('`(<(!\[CDATA\[))(.+?)(\]\]>)`sm', '`(<(!--))(.+?)(-->)`sm', '`(<(pre|script|textarea)[^>]*?>)(.+?)(\2>)`sm'),
+ 'hl_aux2',
+ $t));
+
+ if (($format = strtolower($format)) == -1) {
+ return
+ str_replace(array("\x01", "\x02", "\x03", "\x04", "\x05", "\x07"), array('<', '>', "\n", "\r", "\t", ' '), $t);
+ }
+ $padChar = strpos(" $format", 't') ? "\t" : ' ';
+ $padStr =
+ preg_match('`\d`', $format, $m)
+ ? str_repeat($padChar, intval($m[0]))
+ : str_repeat($padChar, ($padChar == "\t" ? 1 : 2));
+ $leadN = preg_match('`[ts]([1-9])`', $format, $m) ? intval($m[1]) : 0;
+
+ // Group elements by line-break requirement.
+
+ $postCloseEleAr = array('br'=>1); // After closing
+ $preEleAr = array('button'=>1, 'command'=>1, 'input'=>1, 'option'=>1, 'param'=>1, 'track'=>1); // Before opening or closing
+ $preOpenPostCloseEleAr = array('audio'=>1, 'canvas'=>1, 'caption'=>1, 'dd'=>1, 'dt'=>1, 'figcaption'=>1, 'h1'=>1, 'h2'=>1, 'h3'=>1, 'h4'=>1, 'h5'=>1, 'h6'=>1, 'isindex'=>1, 'label'=>1, 'legend'=>1, 'li'=>1, 'object'=>1, 'p'=>1, 'pre'=>1, 'style'=>1, 'summary'=>1, 'td'=>1, 'textarea'=>1, 'th'=>1, 'video'=>1); // Before opening and after closing
+ $prePostEleAr = array('address'=>1, 'article'=>1, 'aside'=>1, 'blockquote'=>1, 'center'=>1, 'colgroup'=>1, 'datalist'=>1, 'details'=>1, 'dialog'=>1, 'dir'=>1, 'div'=>1, 'dl'=>1, 'fieldset'=>1, 'figure'=>1, 'footer'=>1, 'form'=>1, 'header'=>1, 'hgroup'=>1, 'hr'=>1, 'iframe'=>1, 'main'=>1, 'map'=>1, 'menu'=>1, 'nav'=>1, 'noscript'=>1, 'ol'=>1, 'optgroup'=>1, 'picture'=>1, 'rbc'=>1, 'rtc'=>1, 'ruby'=>1, 'script'=>1, 'section'=>1, 'select'=>1, 'table'=>1, 'tbody'=>1, 'template'=>1, 'tfoot'=>1, 'thead'=>1, 'tr'=>1, 'ul'=>1); // Before and after opening and closing
+
+ $doPad = 1;
+ $t = explode('<', $t);
+ while ($doPad) {
+ $n = $leadN;
+ $eleAr = $t;
+ ob_start();
+ if (isset($prePostEleAr[$parentEle])) {
+ echo str_repeat($padStr, ++$n);
+ }
+ echo ltrim(array_shift($eleAr));
+ for ($i=-1, $j=count($eleAr); ++$i<$j;) {
+ $rest = '';
+ list($tag, $rest) = explode('>', $eleAr[$i]);
+ $open = $tag[0] == '/' ? 0 : (substr($tag, -1) == '/' ? 1 : ($tag[0] != '!' ? 2 : -1));
+ $ele = !$open ? ltrim($tag, '/') : ($open > 0 ? substr($tag, 0, strcspn($tag, ' ')) : 0);
+ $tag = "<$tag>";
+ if (isset($prePostEleAr[$ele])) {
+ if (!$open) {
+ if ($n) {
+ echo "\n", str_repeat($padStr, --$n), "$tag\n", str_repeat($padStr, $n);
+ } else {
+ ++$leadN;
+ ob_end_clean();
+ continue 2;
+ }
+ } else {
+ echo "\n", str_repeat($padStr, $n), "$tag\n", str_repeat($padStr, ($open != 1 ? ++$n : $n));
}
- }
- $t = preg_replace(['`(<\w[^>]*(?)\s+`', '`\s+`', '`(<\w[^>]*(?) `'], [' $1', ' ', '$1'], preg_replace_callback(['`(<(!\[CDATA\[))(.+?)(\]\]>)`sm', '`(<(!--))(.+?)(-->)`sm', '`(<(pre|script|textarea)[^>]*?>)(.+?)(\2>)`sm'], 'hl_aux2', $t));
- if (($w = strtolower($w)) === -1) {
- return str_replace(["\x01", "\x02", "\x03", "\x04", "\x05", "\x07"], ['<', '>', "\n", "\r", "\t", ' '], $t);
- }
- $s = strpos(" $w", 't') ? "\t" : ' ';
- $s = preg_match('`\d`', $w, $m) ? str_repeat($s, $m[0]) : str_repeat($s, ("\t" === $s ? 1 : 2));
- $N = preg_match('`[ts]([1-9])`', $w, $m) ? $m[1] : 0;
- $a = ['br' => 1];
- $b = ['button' => 1, 'command' => 1, 'input' => 1, 'option' => 1, 'param' => 1, 'track' => 1];
- $c = ['audio' => 1, 'canvas' => 1, 'caption' => 1, 'dd' => 1, 'dt' => 1, 'figcaption' => 1, 'h1' => 1, 'h2' => 1, 'h3' => 1, 'h4' => 1, 'h5' => 1, 'h6' => 1, 'isindex' => 1, 'label' => 1, 'legend' => 1, 'li' => 1, 'object' => 1, 'p' => 1, 'pre' => 1, 'style' => 1, 'summary' => 1, 'td' => 1, 'textarea' => 1, 'th' => 1, 'video' => 1];
- $d = ['address' => 1, 'article' => 1, 'aside' => 1, 'blockquote' => 1, 'center' => 1, 'colgroup' => 1, 'datalist' => 1, 'details' => 1, 'dialog' => 1, 'dir' => 1, 'div' => 1, 'dl' => 1, 'fieldset' => 1, 'figure' => 1, 'footer' => 1, 'form' => 1, 'header' => 1, 'hgroup' => 1, 'hr' => 1, 'iframe' => 1, 'main' => 1, 'map' => 1, 'menu' => 1, 'nav' => 1, 'noscript' => 1, 'ol' => 1, 'optgroup' => 1, 'picture' => 1, 'rbc' => 1, 'rtc' => 1, 'ruby' => 1, 'script' => 1, 'section' => 1, 'select' => 1, 'table' => 1, 'tbody' => 1, 'template' => 1, 'tfoot' => 1, 'thead' => 1, 'tr' => 1, 'ul' => 1];
- $T = explode('<', $t);
- $X = 1;
- while ($X) {
- $n = $N;
- $t = $T;
- ob_start();
- if (isset($d[$p])) {
- echo str_repeat($s, ++$n);
+ echo $rest;
+ continue;
+ }
+ $pad = "\n". str_repeat($padStr, $n);
+ if (isset($preOpenPostCloseEleAr[$ele])) {
+ if (!$open) {
+ echo $tag, $pad, $rest;
+ } else {
+ echo $pad, $tag, $rest;
}
- echo ltrim(array_shift($t));
- for ($i = -1, $j = count($t); ++$i < $j;) {
- $r = '';
- list($e, $r) = explode('>', $t[$i]);
- $x = '/' === $e[0] ? 0 : ('/' === substr($e, -1) ? 1 : ('!' !== $e[0] ? 2 : -1));
- $y = !$x ? ltrim($e, '/') : ($x > 0 ? substr($e, 0, strcspn($e, ' ')) : 0);
- $e = "<$e>";
- if (isset($d[$y])) {
- if (!$x) {
- if ($n) {
- echo "\n", str_repeat($s, --$n), "$e\n", str_repeat($s, $n);
- } else {
- ++$N;
- ob_end_clean();
- continue 2;
- }
- } else {
- echo "\n", str_repeat($s, $n), "$e\n", str_repeat($s, (1 !== $x ? ++$n : $n));
- }
- echo $r;
- continue;
- }
- $f = "\n" . str_repeat($s, $n);
- if (isset($c[$y])) {
- if (!$x) {
- echo $e, $f, $r;
- } else {
- echo $f, $e, $r;
- }
- } elseif (isset($b[$y])) {
- echo $f, $e, $r;
- } elseif (isset($a[$y])) {
- echo $e, $f, $r;
- } elseif (!$y) {
- echo $f, $e, $f, $r;
- } else {
- echo $e, $r;
- }
+ } elseif (isset($preEleAr[$ele])) {
+ echo $pad, $tag, $rest;
+ } elseif (isset($postCloseEleAr[$ele])) {
+ echo $tag, $pad, $rest;
+ } elseif (!$ele) {
+ echo $pad, $tag, $pad, $rest;
+ } else {
+ echo $tag, $rest;
+ }
+ }
+ $doPad = 0;
+ }
+ $t = str_replace(array("\n ", " \n"), "\n", preg_replace('`[\n]\s*?[\n]+`', "\n", ob_get_contents()));
+ ob_end_clean();
+ if (($newline = strpos(" $format", 'r') ? (strpos(" $format", 'n') ? "\r\n" : "\r") : 0)) {
+ $t = str_replace("\n", $newline, $t);
+ }
+ return str_replace(array("\x01", "\x02", "\x03", "\x04", "\x05", "\x07"), array('<', '>', "\n", "\r", "\t", ' '), $t);
+}
+
+/**
+ * Handle URL to convert to relative/absolute type,
+ * block scheme, or add anti-spam text.
+ *
+ * @param mixed $url URL string, or array with URL value (if $attr is null).
+ * @param mixed $attr Attribute name string, or null (if $url is array).
+ * @return string With URL after any conversion/obfuscation.
+ */
+function hl_url($url, $attr=null)
+{
+ global $C;
+ $preUrl = $postUrl = '';
+ static $blocker = 'denied:';
+ if ($attr == null) { // style attribute value
+ $attr = 'style';
+ $preUrl = $url[1];
+ $postUrl = $url[3];
+ $url = trim($url[2]);
+ }
+ $okSchemeAr = isset($C['schemes'][$attr]) ? $C['schemes'][$attr] : $C['schemes']['*'];
+ if (isset($okSchemeAr['!']) && substr($url, 0, 7) != $blocker) {
+ $url = "{$blocker}{$url}";
+ }
+ if (isset($okSchemeAr['*'])
+ || !strcspn($url, '#?;')
+ || substr($url, 0, strlen($blocker)) == $blocker
+ ) {
+ return "{$preUrl}{$url}{$postUrl}";
+ }
+ if (preg_match('`^([^:?[@!$()*,=/\'\]]+?)(:|&(#(58|x3a)|colon);|%3a|\\\\0{0,4}3a).`i', $url, $m)
+ && !isset($okSchemeAr[strtolower($m[1])]) // Special crafting suggests malice
+ ) {
+ return "{$preUrl}{$blocker}{$url}{$postUrl}";
+ }
+ if ($C['abs_url']) {
+ if ($C['abs_url'] == -1 && strpos($url, $C['base_url']) === 0) { // Make URL relative
+ $url = substr($url, strlen($C['base_url']));
+ } elseif (empty($m[1])) { // Make URL absolute
+ if (substr($url, 0, 2) == '//') {
+ $url = substr($C['base_url'], 0, strpos($C['base_url'], ':') + 1). $url;
+ } elseif ($url[0] == '/') {
+ $url = preg_replace('`(^.+?://[^/]+)(.*)`', '$1', $C['base_url']). $url;
+ } elseif (strcspn($url, './')) {
+ $url = $C['base_url']. $url;
+ } else {
+ preg_match('`^([a-zA-Z\d\-+.]+://[^/]+)(.*)`', $C['base_url'], $m);
+ $url = preg_replace('`(?<=/)\./`', '', $m[2]. $url);
+ while (preg_match('`(?<=/)([^/]{3,}|[^/.]+?|\.[^/.]|[^/.]\.)/\.\./`', $url)) {
+ $url = preg_replace('`(?<=/)([^/]{3,}|[^/.]+?|\.[^/.]|[^/.]\.)/\.\./`', '', $url);
}
- $X = 0;
- }
- $t = str_replace(["\n ", " \n"], "\n", preg_replace('`[\n]\s*?[\n]+`', "\n", ob_get_contents()));
- ob_end_clean();
- if (($l = strpos(" $w", 'r') ? (strpos(" $w", 'n') ? "\r\n" : "\r") : 0)) {
- $t = str_replace("\n", $l, $t);
+ $url = $m[1]. $url;
+ }
}
-
- return str_replace(["\x01", "\x02", "\x03", "\x04", "\x05", "\x07"], ['<', '>', "\n", "\r", "\t", ' '], $t);
+ }
+ return "{$preUrl}{$url}{$postUrl}";
}
+/**
+ * Report version.
+ *
+ * @return string Version.
+ */
function hl_version()
{
- // version
- return '1.2.7';
+ return '1.2.11';
}
diff --git a/htmLawed_README.htm b/htmLawed_README.htm
index cc88e4e..26513df 100644
--- a/htmLawed_README.htm
+++ b/htmLawed_README.htm
@@ -7,40 +7,40 @@
htmLawed documentation | htmLawed PHP software is a free, open-source, customizable HTML input purifier and filter
@@ -75,6 +75,7 @@ htmLawed documentation
3.3.3 Tag balancing & proper nesting
3.3.4 Elements requiring child elements
3.3.5 Beautify or compact HTML
+ 3.3.6 Custom elements
3.4 Attributes
3.4.1 Auto-addition of XHTML-required attributes
3.4.2 Duplicate/invalid id values
@@ -111,11 +112,11 @@ htmLawed documentation
-
@@ -152,7 +153,7 @@ htmLawed documentation
htmLawed:
* makes input more secure and standard-compliant for HTML as well as generic XML documents ^
- * supports markup for HTML 5 and microdata, ARIA, Ruby, custom attributes, etc. ^
+ * supports markup for HTML 5, custom elements, and microdata, ARIA, Ruby, custom attributes, etc. ^
* can beautify or compact HTML ~
* works with input of almost any character encoding and does not affect it
* has good tolerance for ill-written HTML
@@ -259,7 +260,7 @@ htmLawed documentation
1.6 Availability
(to top)
- htmLawed can be downloaded for free at its website. Besides the htmLawed.php file, the download has the htmLawed documentation (this document) in plain text and HTML formats, a script for testing, and a text file for test-cases. htmLawed is also available as a PHP class (OOP code) at its website.
+ htmLawed can be downloaded for free at its website. Besides the htmLawed.php file, the download has the htmLawed documentation (this document) in plain text and HTML formats, a script for testing, and a text file for test-cases. htmLawed can be installed with Composer, and is also available as a PHP class (OOP code) – see the website. Official htmLawed releases are also put up on Sourceforge.
@@ -353,6 +354,12 @@ htmLawed documentation
0 - no measure taken *
word - @ in mail address in href attribute value is replaced with specified word
+ any_custom_element
+ Permit any custom element; regardless of this setting, specific custom elements can be denied or permitted through $config["elements"]; see section 3.3.6
+
+ 0 - no
+ 1 - yes *
+
balance
Balance tags for well-formedness and proper nesting; see section 3.3.3
@@ -409,7 +416,7 @@ htmLawed documentation
all - *^
* -acronym -big -center -dir -font -isindex -s -strike -tt - ~^
- applet, audio, canvas, embed, iframe, object, script, and video elements not allowed - "^
+ applet, audio, canvas, dialog, embed, iframe, object, script, and video elements not allowed - "^
hexdec_entity
Allow hexadecimal numeric entities and do not convert to the more widely accepted decimal ones, or convert decimal to hexadecimal ones; see section 3.2
@@ -538,7 +545,7 @@ htmLawed documentation
A rule begins with an HTML element name(s) (rule-element), for which the rule applies, followed by an equal-to (=) sign. A rule-element may represent multiple elements if comma (,)-separated element names are used. E.g., th,td,tr=.
- Rest of the rule consists of comma-separated HTML attribute names. A minus (-) character before an attribute means that the attribute is not permitted inside the rule-element. E.g., -width. To deny all attributes, -* can be used.
+ Rest of the rule consists of comma-separated HTML attribute names, which can be the wildcard references *, aria*, data*, and on* for the sets of all standard, Aria, data-*, and event (on*) attributes, respectively. A minus (-) character before an attribute means that the attribute is not permitted inside the rule-element. E.g., -width. To deny all attributes, -* can be used. All Aria, data-*, and event (on*) attributes can similarly be denined using aria*, data*, and on*, respectively.
Following shows examples of rule excerpts with rule-element a and the attributes that are being permitted:
@@ -549,8 +556,9 @@ htmLawed documentation
* a=-* - none
* a=-*, href, title - none except href and title
* a=-*, -id, href, title - none except href and title
+ * a=-on*, -id, href, onclick, title - all except id and on* other than onclick
- Rules regarding attribute values are optionally specified inside round brackets after attribute names in solidus (/)-separated parameter = value pairs. E.g., title(maxlen=30/minlen=5). None or one or more of the following parameters may be specified:
+ Rules regarding attribute values are optionally specified inside round brackets after attribute names – which cannot be wildcard references like * or data* – in solidus (/)-separated parameter = value pairs. E.g., title(maxlen=30/minlen=5). None or one or more of the following parameters may be specified:
* oneof - one or more choices separated by | that the value should match; if only one choice is provided, then the value must match that choice; matching is case-sensitive
@@ -582,13 +590,15 @@ htmLawed documentation
Special characters: The characters ;, ,, /, (, ), |, ~ and space have special meanings in the rules. Words in the rules that use such characters, or the characters themselves, should be escaped by enclosing in pairs of double-quotes ("). A back-tick (`) can be used to escape a literal ". An example rule illustrating this is input=value(maxlen=30/match="/^\w/"/default="your `"ID`"").
- Attributes that accept multiple values: If an attribute is accesskey, class, itemtype or rel, which can have multiple, space-separated values, or srcset, which can have multiple, comma-separated values, htmLawed will parse the attribute value for such multiple values and will individually test each of them.
+ Attributes that accept multiple values: If an attribute is accesskey, class, itemtype or rel, or archive in case of object element, which can have multiple, space-separated values, or archive in case of object element and srcset, which can have multiple, comma-separated values, htmLawed will parse the attribute value for such multiple values and will individually test each of them. The parsing is performed after any URL assessment of the attribute values (section 3.4.3).
Note: To deny an attribute for all elements for which it is legal, $config["deny_attribute"] (see section 3.4) can be used instead of $spec. Also, attributes can be allowed element-specifically through $spec while being denied globally through $config["deny_attribute"]. The hook_tag parameter (section 3.4.9) can also be possibly used to implement a functionality like that achieved using $spec functionality.
- Note: Attributes' specifications for an element may be set through multiple rules. In case of conflict, the attribute specification in the first rule will get precedence.
+ Note: Attributes permitted through $spec are permitted regardless of any denial through $config. An attribute for which $spec indicates both permission and denial will be permitted. E.g., onclick with $spec value of a = *, -onclick, onclick, a = -on*, onclick or a = on*, -onclick will be permitted inside a.
+
+ Note: Attributes' specifications for an element may be (inadvertently) set through multiple rules. In case of conflict, the attribute specification in the first rule will get precedence.
- $spec can also be used to permit custom, non-standard attributes as well as custom rules for standard attributes. Thus, the following value of $spec will permit the custom uses of the standard rel attribute in input (not permitted as per standards) and of a non-standard attribute, vFlag, in img.
+ $spec can also be used to permit custom or non-standard attributes. Thus, the following value of $spec will permit the custom uses of the standard rel attribute in input (not permitted as per standards) and of a non-standard attribute, vFlag, in img.
$spec = 'img=vFlag; input=rel'
@@ -612,7 +622,7 @@ htmLawed documentation
When setting the parameters/arguments (like those to allow certain HTML elements) for use with htmLawed, one should bear in mind that the setting may let through potentially dangerous HTML code which is meant to steal user-data, deface a website, render a page non-functional, etc. Unless end-users, either people or software, supplying the content are completely trusted, security issues arising from the degree of HTML usage permitted through htmLawed's setting should be considered. For example, following increase security risks:
- * Allowing script, applet, embed, iframe, canvas, audio, video or object elements, or certain of their attributes like allowscriptaccess
+ * Allowing script, applet, embed, iframe, canvas, audio, video, dialog or object elements, or certain of their attributes like allowscriptaccess
* Allowing HTML comments (some Internet Explorer versions are vulnerable with, e.g., <!--[if gte IE 4]><script>alert("xss");</script><![endif]-->
@@ -781,7 +791,7 @@ htmLawed documentation
It should be borne in mind that no browser application is 100% standard-compliant, standard specifications continue to evolve, and many browsers accept commonly used non-standard HTML. Regarding security, note that unsafe HTML code is not legally invalid per se.
- * By default, htmLawed will not strictly adhere to the current HTML standard. Admins can configure htmLawed to be more strict about standard compliance. Standard specification for HTML is continuously evolving. There are two bodies (W3C and WHATWG) that specify the standard and their specifications are not identical. E.g., as in mid-2013, the border attribute is valid in table as per W3C but not WHATWG. Thus, htmLawed may not be fully compliant with the standard of a specific group. The HTML standards/rules that htmLawed uses in its logic are a mix of the W3C and WHATWG standards, and can be lax because of the laxity of HTML interpreters (browsers) regarding standards.
+ * htmLawed might not strictly adhere to current HTML standards as standard specification for HTML by WHATWG is continuously evolving, and there is laxity among HTML interpreters (browsers) regarding standards. Admins can configure htmLawed to be more strict about standard compliance.
* In general, htmLawed processes input to generate output that is most likely to be standard-compatible in most users' browsers. Thus, for example, it does not enforce the required value of 0 on border attribute of img (an HTML version 5 specification).
@@ -791,9 +801,9 @@ htmLawed documentation
* By default, htmLawed won't check many attribute values for standard compliance. E.g., width="20m" with the dimension in non-standard m is let through. Implementing universal and strict attribute value checks can make htmLawed slow and resource-intensive. Admins should look at the hook_tag parameter (section 3.4.9) or $spec to enforce finer checks on attribute values.
- * By default, htmLawed considers all ARIA, data-*, event and microdata attributes as global attributes and permits them in all elements. This is not strictly standard-compliant. E.g., the itemtype microdata attribute is permitted only in elements that also have the itemscope attribute. Admins can configure htmLawed to be more strict about this (section 2.3).
+ * By default, htmLawed considers all ARIA, data-*, event, and microdata attributes as global attributes and permits them in all elements. This is not strictly standard-compliant. E.g., the itemtype microdata attribute is permitted only in elements that also have the itemscope attribute. Admins can configure htmLawed to be more strict about this (section 2.3).
- * The attributes, deprecated (which can be transformed too) or not, that it supports are largely those that are in the specifications. Only a few of the proprietary attributes are supported. However, $spec can be used to allow custom attributes (section 2.3).
+ * The attributes, whether deprecated (which can be transformed by htmLawed) or not, that it supports are largely those that are in the specifications. Only a few of the proprietary attributes are supported. However, $spec can be used to allow custom attributes (section 2.3).
* Except for contained URLs and dynamic expressions (also optional), htmLawed does not check CSS style property values. Admins should look at using the hook_tag parameter (section 3.4.9) or $spec for finer checks. Perhaps the best option is to disallow style but allow class attributes with the right oneof or match values for class, and have the various class style properties in .css CSS stylesheet files.
@@ -817,7 +827,7 @@ htmLawed documentation
* htmLawed does not correct certain possible attribute-based security vulnerabilities (e.g., <a href="http://x%22+style=%22background-image:xss">x</a>). These arise when browsers mis-identify markup in escaped text, defeating the very purpose of escaping text (a bad browser will read the given example as <a href="http://x" style="background-image:xss">x</a>).
- * Because of poor Unicode support in PHP, htmLawed does not remove the high value HTML-invalid characters with multi-byte code-points. Such characters however are extremely unlikely to be in the input. (see section 3.1).
+ * Because of inadequate Unicode support in PHP, htmLawed does not remove the high value HTML-invalid characters with multi-byte code-points. Such characters however are extremely unlikely to be in the input. (see section 3.1).
* htmLawed does not check or correct the character encoding of the input it receives. In conjunction with permitting circumstances such as when the character encoding is left undefined through HTTP headers or HTML meta tags, this can permit an exploit (like Google's UTF-7/XSS vulnerability of the past). Also, htmLawed can mangle input text if it is not well-formed in terms of character encoding. Administrators can consider using code available elsewhere to check well-formedness of input text characters to correct any defect.
@@ -974,7 +984,7 @@ htmLawed documentation
Valid character entities take the form &*; where * is #x followed by a hexadecimal number (hexadecimal numeric entity; like   for non-breaking space), or alphanumeric like gt (external or named entity; like for non-breaking space), or # followed by a number (decimal numeric entity; like   for non-breaking space). Character entities referring to the soft-hyphen character (the ­ or \xad character; hexadecimal code-point ad [decimal 173]) in URL-accepting attribute values are always replaced with spaces; soft-hyphens in attribute values introduce vulnerabilities in some older versions of the Opera and Mozilla [Firefox] browsers.
- htmLawed (function hl_ent()):
+ htmLawed (function hl_entity()):
* Neutralizes entities with multiple leading zeroes or missing semi-colons (potentially dangerous)
@@ -984,7 +994,7 @@ htmLawed documentation
* Neutralizes entities referring to characters that are HTML-discouraged (code-points, hexadecimally, 7f to 84, 86 to 9f, and fdd0 to fddf, or decimally, 127 to 132, 134 to 159, and 64991 to 64976). Entities referring to the remaining discouraged characters (see section 5.1 for a full list) are let through.
- * Neutralizes named entities that are not in the specifications
+ * Neutralizes named entities that are not in the HTML5 specification
* Optionally converts valid HTML-specific named entities except >, <, ", and & to decimal numeric ones (hexadecimal if $config["hexdec_entity"] is 2) for generic XML-compliance. For this, $config["named_entity"] should be 1.
@@ -1033,17 +1043,25 @@ htmLawed documentation
See section 3.3.3 for differences between the various non-zero $config["keep_bad"] values.
- htmLawed by default permits these 118 HTML elements:
+ htmLawed by default permits these 122 HTML elements:
- a, abbr, acronym, address, applet, area, article, aside, audio, b, bdi, bdo, big, blockquote, br, button, canvas, caption, center, cite, code, col, colgroup, command, data, datalist, dd, del, details, dfn, dir, div, dl, dt, em, embed, fieldset, figcaption, figure, font, footer, form, h1, h2, h3, h4, h5, h6, header, hgroup, hr, i, iframe, img, input, ins, isindex, kbd, keygen, label, legend, li, link, main, map, mark, menu, meta, meter, nav, noscript, object, ol, optgroup, option, output, p, param, pre, progress, q, rb, rbc, rp, rt, rtc, ruby, s, samp, script, section, select, small, source, span, strike, strong, style, sub, summary, sup, table, tbody, td, textarea, tfoot, th, thead, time, tr, track, tt, u, ul, var, video, wbr
+ a, abbr, acronym, address, applet, area, article, aside, audio, b, bdi, bdo, big, blockquote, br, button, canvas, caption, center, cite, code, col, colgroup, command, data, datalist, dd, del, details, dfn, dialog, dir, div, dl, dt, em, embed, fieldset, figcaption, figure, font, footer, form, h1, h2, h3, h4, h5, h6, header, hgroup, hr, i, iframe, img, input, ins, isindex, kbd, keygen, label, legend, li, link, main, map, mark, menu, meta, meter, nav, noscript, object, ol, optgroup, option, output, p, param, picture, pre, progress, q, rb, rbc, rp, rt, rtc, ruby, s, samp, script, section, select, slot, small, source, span, strike, strong, style, sub, summary, sup, table, tbody, td, template, textarea, tfoot, th, thead, time, tr, track, tt, u, ul, var, video, wbr
+
+
+ htmLawed also supports use of custom HTML elements, but this support can be turned off when $config is appropriately set (i.e., in default configuration, such elements are permitted); see section 3.3.6.
+
+ Elements math and svg are not supported. They and their content will get filtered unless a strategy like in section 3.9 is used.
+
+ Elements like acronym, applet, basefont, bgsound, big, blink, center, command, dir, font, hgroup, image, keygen, marquee, menuitem, nobr, noembed, rb, rtc, shadow, spacer, strike, tt, and xmp are currently obsolete/deprecated. Some of them, like acronym and keygen, are supported in htmLawed (see above list). Tag transformation is possible for improving compliance with HTML standards -- most, but not all, of the obsolete/deprecated elements are converted to valid ones; see section 3.3.2.
+ These 16 htmLawed-supported elements are empty elements that have an opening tag with possible content but no element content (thus, no closing tag): area, br, col, command, embed, hr, img, input, isindex, keygen, link, meta, param, source, track, and wbr.
- The HTML version 4 elements acronym, applet, big, center, dir, font, strike, and tt are obsolete/deprecated in HTML version 5. On the other hand, the obsolete/deprecated HTML 4 elements embed, menu and u are no longer so in HTML 5. Elements new to HTML 5 are article, aside, audio, bdi, canvas, command, data, datalist, details, figure, figcaption, footer, header, hgroup, keygen, link, main, mark, meta, meter, nav, output, progress, section, source, style, summary, time, track, video, and wbr. The link, meta and style elements exist in HTML 4 but are not allowed in the HTML body. These 16 elements are empty elements that have an opening tag with possible content but no element content (thus, no closing tag): area, br, col, command, embed, hr, img, input, isindex, keygen, link, meta, param, source, track, and wbr.
+ As per standards, closing tags are optional for these elements under certain conditions: caption, colgroup, dd, dt, li, optgroup, option, p, rp, rt, tbody, td, tfoot, th, thead, and tr. By default, htmLawed will add a missing closing tag for such elements, unless balancing (section 3.3.3) is turned off.
- With $config["safe"] = 1, the default set will exclude applet, audio, canvas, embed, iframe, object, script and video; see section 3.6.
+ With $config["safe"] = 1, the default set of htmLawed-supported elements will exclude applet, audio, canvas, dialog, embed, iframe, object, script and video; see section 3.6.
- When $config["elements"], which specifies allowed elements, is properly defined, and neither empty nor set to 0 or *, the default set is not used. To have elements added to or removed from the default set, a +/- notation is used. E.g., *-script-object implies that only script and object are disallowed, whereas *+embed means that noembed is also allowed. Elements can also be specified as comma separated names. E.g., a, b, i means only a, b and i are permitted. In this notation, *, + and - have no significance and can actually cause a mis-reading.
+ When $config["elements"], which specifies allowed elements, is properly defined, and neither empty nor set to 0 or *, the default set is not used. To have elements added to or removed from the default set, a +/- notation is used. E.g., *-script-object implies that only script and object are disallowed, whereas *+noembed means that noembed is also allowed. For an element with a hyphen in name, use round brackets around the name; e.g., (my-custom-element). Elements can also be specified as comma separated names. E.g., a, b, i means only a, b and i are permitted. In this notation, *, + and - have no significance and can actually cause a mis-reading.
Some more examples of $config["elements"] values indicating permitted elements (note that empty spaces are liberally allowed for clarity):
@@ -1051,6 +1069,7 @@ htmLawed documentation
* *-script -- all excluding script
* * -acronym -big -center -dir -font -isindex -s -strike -tt -- only non-obsolete/deprecated elements of HTML5
* *+noembed-script -- all including noembed excluding script
+ * *+noembed+(my-custom-element) -- all including noembed and my-custom-element
Some mis-usages (and the resulting permitted elements) that can be avoided:
@@ -1064,11 +1083,9 @@ htmLawed documentation
Basically, when using the +/- notation, commas (,) should not be used, and vice versa, and * should be used with the former but not the latter.
- Note: Even if an element that is not in the default set is allowed through $config["elements"], like noembed in the last example, it will eventually be removed during tag balancing unless such balancing is turned off ($config["balance"] set to 0). Currently, the only way around this, which actually is simple, is to edit htmLawed's PHP code which define various arrays in the function hl_bal() to accommodate the element and its nesting properties.
+ Note: Even if an element that is not in the default set is allowed through $config["elements"], like noembed in the last example, it will eventually be removed during tag balancing unless such balancing is turned off ($config["balance"] set to 0). Currently, the only way around this, which actually is simple, is to edit htmLawed's PHP code which define various arrays in the function hl_balance() to accommodate the element and its nesting properties.
- A possible second way to specify allowed elements is to set $config["parent"] to an element name that supposedly will hold the input, and to set $config["balance"] to 1. During tag balancing (see section 3.3.3), all elements that cannot legally nest inside the parent element will be removed. The parent element is auto-reset to div if $config["parent"] is empty, body, or an element not in htmLawed's default set of 118 elements.
-
- Tag transformation is possible for improving compliance with HTML standards -- most of the obsolete/deprecated elements of HTML version 5 are converted to valid ones; see section 3.3.2.
+ A possible second way to specify allowed elements is to set $config["parent"] to an element name that supposedly will hold the input, and to set $config["balance"] to 1. During tag balancing (see section 3.3.3), all elements that cannot legally nest inside the parent element will be removed. The parent element is auto-reset to div if $config["parent"] is empty, body, or an element not in htmLawed's default set of 122 elements.
3.3.1 Handling of comments & CDATA sections
@@ -1076,7 +1093,7 @@ htmLawed documentation
CDATA sections have the format <![CDATA[...anything but not "]]>"...]]>, and HTML comments, <!--...anything but not "-->"... -->. Neither HTML comments nor CDATA sections can reside inside tags. HTML comments can exist anywhere else, but CDATA sections can exist only where plain text is allowed (e.g., immediately inside td element content but not immediately inside tr element content).
- htmLawed (function hl_cmtcd()) handles HTML comments or CDATA sections depending on the values of $config["comment"] or $config["cdata"]. If 0, such markup is not looked for and the text is processed like plain text. If 1, it is removed completely. If 2, it is preserved but any <, > and & inside are changed to entities. If 3 for $config["cdata"], or 3 or 4 for $config["comment"], they are left as such. When $config["comment"] is set to 4, htmLawed will not force a space character before the --> comment-closing marker. While such a space is required for standard-compliance, it can corrupt marker code put in HTML by some software (such as Microsoft Outlook).
+ htmLawed (function hl_commentCdata()) handles HTML comments or CDATA sections depending on the values of $config["comment"] or $config["cdata"]. If 0, such markup is not looked for and the text is processed like plain text. If 1, it is removed completely. If 2, it is preserved but any <, > and & inside are changed to entities. If 3 for $config["cdata"], or 3 or 4 for $config["comment"], they are left as such. When $config["comment"] is set to 4, htmLawed will not force a space character before the --> comment-closing marker. While such a space is required for standard-compliance, it can corrupt marker code put in HTML by some software (such as Microsoft Outlook).
Note that for the last two cases, HTML comments and CDATA sections will always be removed from tag content (function hl_tag()).
@@ -1120,14 +1137,14 @@ htmLawed documentation
3.3.2 Tag-transformation for better compliance with standards
(to top)
- If
$config["make_tag_strict"] is set and not
0, following deprecated elements (and attributes), as per HTML 5 specification, even if admin-permitted, are mutated as indicated (element content remains intact; function
hl_tag2()):
+ If
$config["make_tag_strict"] is set and not
0, following deprecated elements (and attributes), even if admin-permitted, are mutated as indicated (element content remains intact; function
hl_deprecatedElement()):
* acronym -
abbr
* applet - based on
$config["make_tag_strict"], unchanged (
1) or removed (
2)
* big -
span style="font-size: larger;"
* center -
div style="text-align: center;"
* dir -
ul
- * font (face, size, color) -
span style="font-family: ; font-size: ; color: ;" (size transformation
reference)
+ * font (face, size, color) -
span style="font-family: ; font-size: ; color: ;" (size transformation
reference)
* isindex - based on
$config["make_tag_strict"], unchanged (
1) or removed (
2)
* s -
span style="text-decoration: line-through;"
* strike -
span style="text-decoration: line-through;"
@@ -1153,7 +1170,7 @@
htmLawed documentation
<div style="text-align: center;">
-
The PHP <span style="text-decoration: line-through;">software</span> script used for this <span style="text-decoration: line-through;">web-page</span> web-page is <span style="font-weight: bold; font-family: arial; color: red; font-size: 200%;">htmLawedTest.php</span>, from <span style="color:green; text-decoration: underline;">PHP Labware</span>.
+
The PHP <span style="text-decoration: line-through;">software</span> script used for this <span style="text-decoration: line-through;">web-page</span> web-page is <span style="font-weight: bold; font-size: 200%; color: red; font-family: arial;">htmLawedTest.php</span>, from <u style="color:green">PHP Labware</u>.
</div>
@@ -1164,7 +1181,7 @@
htmLawed documentation
3.3.3 Tag balancing & proper nesting
(to top)
- If
$config["balance"] is set to
1, htmLawed (function
hl_bal()) checks and corrects the input to have properly balanced tags and legal element content (i.e., any element nesting should be valid, and plain text may be present only in the content of elements that allow them).
+ If
$config["balance"] is set to
1, htmLawed (function
hl_balance()) checks and corrects the input to have properly balanced tags and legal element content (i.e., any element nesting should be valid, and plain text may be present only in the content of elements that allow them).
Depending on the value of
$config["keep_bad"] (see
section 2.2 and
section 3.3), illegal content may be removed or neutralized to plain text by converting < and > to entities:
@@ -1258,7 +1275,7 @@
htmLawed documentation
Note: In the example above, unlike
<*>,
<xml> gets considered as a tag (even though there is no HTML element named
xml). Thus, the
keep_bad parameter's value affects
<xml> but not
<*>. In general, text matching the regular expression pattern
<(/?)([a-zA-Z][a-zA-Z1-6]*)([^>]*?)\s?> is considered a tag (phrase enclosed by the angled brackets
< and
>, and starting [with an optional slash preceding] with an alphanumeric word that starts with an alphabet...), and is subjected to the
keep_bad value.
- Nesting/content rules for each of the 118 elements in htmLawed's default set (see
section 3.3) are defined in function
hl_bal(). This means that if a non-standard element besides
embed is being permitted through
$config["elements"], the element's tag content will end up getting removed if
$config["balance"] is set to
1.
+ Nesting/content rules for each of the 122 standard elements in htmLawed's default set (see
section 3.3) are defined in function
hl_balance(). Any custom element (
section 3.3.6) is permitted to be within and to contain any other element.
Plain text and/or certain elements nested inside
blockquote,
form,
map and
noscript need to be in block-level elements. This point is often missed during manual writing of HTML code. htmLawed attempts to address this during balancing. E.g., if the parent container is set as
form, the input
B:<input type="text" value="b" />C:<input type="text" value="c" /> is converted to
<div>B:<input type="text" value="b" />C:<input type="text" value="c" /></div>.
@@ -1288,34 +1305,57 @@
htmLawed documentation
As per the HTML standards, spaces, tabs and line-breaks in web-pages (except those inside
pre elements) are all considered equivalent, and referred to as
white-spaces. Browser applications are supposed to consider contiguous white-spaces as just a single space, and to disregard white-spaces trailing opening tags or preceding closing tags. This white-space
normalization allows the use of text/code beautifully formatted with indentations and line-spacings for readability. Such
pretty HTML can, however, increase the size of web-pages, or make the extraction or scraping of plain text cumbersome.
- With the
$config parameter
tidy, htmLawed can be used to beautify or compact the input text. Input with just plain text and no HTML markup is also subject to this. Besides
pre, the
script and
textarea elements, CDATA sections, and HTML comments are not subjected to the tidying process.
+ With the
$config parameter
tidy, htmLawed can be used to beautify or compact the input text. Input with just plain text and no HTML markup is also subject to this. Besides
pre, the
script, and
textarea elements, CDATA sections, and HTML comments are not subjected to the tidying process.
+
+ Any custom HTML element (
section 3.3.6) is treated like an inline element, like
strong, during tidying.
To
compact, use
$config["tidy"] = -1; single instances or runs of white-spaces are replaced with a single space, and white-spaces trailing and leading open and closing tags, respectively, are removed.
To
beautify,
$config["tidy"] is set as
1, or for customized tidying, as a string like
2s2n. The
s or
t character specifies the use of spaces or tabs for indentation. The first and third characters, any of the digits 0-9, specify the number of spaces or tabs per indentation, and any parental lead spacing (extra indenting of the whole block of input text). The
r and
n characters are used to specify line-break characters:
n for
\n (Unix/Mac OS X line-breaks),
rn or
nr for
\r\n (Windows/DOS line-breaks), or
r for
\r.
+ For instance, with
$config["tidy"] set as
3s2n, 3 space characters are used per indentation level, the entire block of text (HTML code) gets a lead (left spacing) of 2 space characters, and line-breaks are with
\n character.
+
The
$config["tidy"] value of
1 is equivalent to
2s0n. Other
$config["tidy"] values are read loosely: a value of
4 is equivalent to
4s0n;
t2, to
1t2n;
s, to
2s0n;
2TR, to
2t0r;
T1, to
1t1n;
nr3, to
3s0nr, and so on. Except in the indentations and line-spacings, runs of white-spaces are replaced with a single space during beautification.
Input formatting using
$config["tidy"] is not recommended when input text has mixed markup (like HTML + PHP).
+
+
+3.3.6 Custom HTML elements
+
(to top)
+
+ Custom elements are HTML elements whose properties/behaviors are defined by the
author, instead of being
universal (i.e., defined by the HTML interpreter like a browser). Their names must begin with a lowercased a-z character, contain at least one hyphen (-), and cannot be:
annotation-xml, color-profile, font-face, font-face-src, font-face-uri, font-face-format, font-face-name, missing-glyph. A huge variety of characters is permitted in the name.
+
+
+
0-9 | . | _ | #xB7 | #xC0-#xD6 | #xD8-#xF6 | #xF8-#x37D | #x37F-#x1FFF | #x200C-#x200D | #x203F-#x2040 | #x2070-#x218F | #x2C00-#x2FEF | #x3001-#xD7FF | #xF900-#xFDCF | #xFDF0-#xFFFD | [#x10000-#xEFFFF]
+
+
+ With
$config["any_custom_element"] set to
0, no custom element is permitted, whereas with a value of
1 (default value), any such element is permitted. Regardless of the setting, specific custom elements can be denied or permitted through
$config["elements"] (see
section 3.3.1).
+
+ Any custom HTML element is treated like an inline element, like
strong, during tidying (
section 3.3.5). During tag balancing (
section 3.3.3), any custom element is permitted to be within and to contain any other element. These laxities are necessitated because, by definition, custom elements are parochial.
+
+ Custom elements are permitted to have attributes of any name consisting of any character except a few such as equal, forward slash, and most control characters (unless denied through
$spec) and satisfying any
data attribute name requirement.
+
3.4 Attributes
(to top)
- In its default setting, htmLawed will only permit attributes described in the HTML specifications (including deprecated ones). A list of the attributes and the elements they are allowed in is in
section 5.2. Using the
$spec argument, htmLawed can be forced to permit custom, non-standard attributes as well as custom rules for standard attributes (
section 2.3).
+
+
In its default setting, htmLawed will only permit attributes described in the HTML specifications (including deprecated ones). A list of the attributes and the elements they are allowed in is in section:- #5.2. Using the '$spec' argument, htmLawed can be forced to permit custom, non-standard attributes as well as custom rules for standard attributes (section:- #2.3).
+
Custom
data-* (
data-star) attributes, where the first three characters of the value of
star (*) after lower-casing do not equal
xml, and the value of
star does not have a colon (:), equal-to (=), newline, solidus (/), space or tab character, or any upper-case A-Z character are allowed in all elements. ARIA, event and microdata attributes like
aria-live,
onclick and
itemid are also considered global attributes (
section 5.2).
- When
$config["deny_attribute"] is not set, or set to
0, or empty (
""), all attributes are permitted. Otherwise,
$config["deny_attribute"] can be set as a list of comma-separated names of the denied attributes.
on* can be used to refer to the group of potentially dangerous, script-accepting event attributes like
onblur and
onchange that have
on at the beginning of their names. Similarly,
aria* and
data* can be used to respectively refer to the set of all ARIA and data-* attributes.
+ When
$config["deny_attribute"] is not set, or set to
0, or empty (
""), all attributes are permitted as per standards. Otherwise,
$config["deny_attribute"] can be set in two different ways. One way is as a list of comma-separated names of the denied attributes.
on* can be used to refer to the group of potentially dangerous, script-accepting event attributes like
onchange that have
on at the beginning of their names. Similarly,
aria* and
data* can be used to respectively refer to the set of all ARIA and data-* attributes. The second way to set
$config["deny_attribute"] permits the denying of all but a few attributes globally. The notation is
* -attribute1 -attribute2 .... Thus, a value of
* -title -href implies that except
href and
title (where allowed as per standards) all other attributes are to be removed. Terms
aria* data*, and
on* can be used in this notation, and a whitespace character is necessary before the
- character.
- With
$config["safe"] = 1 (
section 3.6), the
on* event attributes are automatically disallowed even if a value for
$config["deny_attribute"] has been manually provided.
+ With
$config["safe"] = 1 (
section 3.6), any
on* event attribute is disallowed even if
$config["deny_attribute"] is set otherwise (such as
* -style -on*).
- Note that attributes specified in
$config["deny_attribute"] are denied globally, for all elements. To deny attributes for only specific elements,
$spec (see
section 2.3) can be used.
$spec can also be used to element-specifically permit an attribute otherwise denied through
$config["deny_attribute"].
+ The attribute restrictions specified with
$config["deny_attribute"] apply to all elements. To deny attributes for only specific elements,
$spec (see
section 2.3) can be used.
$spec can also be used to element-specifically permit an attribute otherwise denied through
$config["deny_attribute"].
- Finer restrictions on attributes can also be put into effect through
$config["deny_attribute"] (
section).
+ Finer restrictions on attributes can also be put into effect through
$config["hook_tag"] (
section 3.4.9).
-
Note: To deny all but a few attributes globally, a simpler way to specify
$config["deny_attribute"] would be to use the notation
* -attribute1 -attribute2 .... Thus, a value of
* -title -href implies that except
href and
title (where allowed as per standards) all other attributes are to be removed. With this notation, the value for the parameter
safe (
section 3.6) will have no effect on
deny_attribute. Values of
aria* data*, and
on* cannot be used in this notation to refer to the sets of all ARIA, data-*, and on* attributes respectively.
+ Custom elements are permitted to have attributes of any name consisting of any character except a few such as equal, forward slash, and most control characters (unless denied through
$spec) and satisfying any
data attribute name requirement.
htmLawed (function
hl_tag()) also:
@@ -1323,6 +1363,7 @@
htmLawed documentation
* Removes duplicate attributes (last one stays)
* Gives attributes the form
name="value" and single-spaces them, removing unnecessary white-spacing
* Provides
required attributes (see
section 3.4.1)
+ * Optionally lowercases certain standard attribute values (see
section 3.4.5)
* Double-quotes values and escapes any
" inside them
* Replaces the possibly dangerous soft-hyphen characters (hexadecimal code-point
ad) in the values with spaces
* Allows custom function to additionally filter/modify attribute values (see
section 3.4.9)
@@ -1380,11 +1421,13 @@
htmLawed documentation
Also, only
data,
file,
http,
https and
javascript are permitted in these attributes that accept URLs:
-
action, cite, classid, codebase, data, itemtype, longdesc, model, pluginspage, pluginurl, src, srcset, style, usemap, and event attributes like onclick
+
action, archive, cite, classid, codebase, data, itemtype, longdesc, model, pluginspage, pluginurl, poster, src, srcset, style, usemap, and event attributes like onclick
With
$config["safe"] = 1 (
section 3.6), the above is changed to disallow
app,
data and
javascript.
+
Note: URLs in
data-* attribute values are not checked, but $spec (
section 2.3) or
$config["hook_tag"] (
section 3.4.9) can be used for this purpose.
+
These default sets are used when
$config["schemes"] is not set (see
section 2.2). To over-ride the defaults,
$config["schemes"] is defined as a string of semi-colon-separated sub-strings of type
attribute: comma-separated schemes. E.g.,
href: mailto, http, https; onclick: javascript; src: http, https. For unspecified attributes,
data,
file,
http,
https and
javascript are permitted. This can be changed by passing schemes for
* in
$config["schemes"]. E.g.,
href: mailto, http, https; *: https, https.
* (asterisk) can be put in the list of schemes to permit all protocols. E.g.,
style: *; img: http, https results in protocols not being checked in
style attribute values. However, in such cases, any relative-to-absolute URL conversion, or vice versa, (
section 3.4.4) is not done. When an attribute is explicitly listed in
$config["schemes"], then filtering is dictated by the setting for the attribute, with no effect of the setting for asterisk. That is, the set of attributes that asterisk refers to no longer includes the listed attribute.
@@ -1395,8 +1438,6 @@
htmLawed documentation
! can be put in the list of schemes to disallow all protocols as well as
local URLs. Thus, with
href: http, style: !,
<a href="http://cnn.com" style="background-image: url(local.jpg);">CNN</a> will become
<a href="http://cnn.com" style="background-image: url(denied:local.jpg);">CNN</a>
-
Note: If URL-accepting attributes other than those listed above are being allowed, then the scheme will not be checked unless the attribute name contains the string
src (e.g.,
dynsrc) or starts with
o (e.g.,
onbeforecopy).
-
With
$config["safe"] = 1, all URLs are disallowed in the
style attribute values.
@@ -1606,11 +1647,9 @@ htmLawed documentation
3.4.9 Hook function for tag content
(to top)
- It is possible to utilize a custom hook function to alter the tag content htmLawed has finalized (i.e., after it has checked/corrected for required attributes, transformed attributes, lower-cased attribute names, etc.).
+ It is possible to utilize a custom hook function to alter the tag content htmLawed has finalized (i.e., after it has checked/corrected for required attributes, transformed attributes, lower-cased attribute names, etc.). The function should have two arguments, the first receiving an element name and the second receiving either 0 (in case of a closing tag) or an array of attribute name-value pairs (opening tag). It should return a string with full HTM markup, either an opening or a closing tag with element name and any string of attributes.
- When $config parameter hook_tag is set to the name of a function, htmLawed (function hl_tag()) will pass on the element name, and the finalized attribute name-value pairs as array elements to the function. The function, after completing a task such as filtering or tag transformation, will typically return an empty string, the full opening tag string like <element_name attribute_1_name="attribute_1_value"...> (for empty elements like img and input, the element-closing slash / should also be included), etc.
-
- Any hook_tag function, since htmLawed version 1.1.11, also receives names of elements in closing tags, such as a in the closing </a> tag of the element <a href="http://cnn.com">CNN</a>. No other value is passed to the function since a closing tag contains only element names. Typically, the function will return an empty string or a full closing tag (like </a>).
+ When $config parameter hook_tag is set to the name of a function or class method, htmLawed (function hl_tag()) will pass on the element name, and the finalized attribute name-value pairs as array elements to the function. The function, after completing a task such as filtering or tag transformation, will typically return an empty string, the full opening tag string like <element_name attribute_1_name="attribute_1_value"...> (for empty elements like img and input, the element-closing slash / should also be included), etc.
This is a powerful functionality that can be exploited for various objectives: consolidate-and-convert inline style attributes to class, convert embed elements to object, permit only one caption element in a table element, disallow embedding of certain types of media, inject HTML, use CSSTidy to sanitize style attribute values, etc.
@@ -1705,7 +1744,7 @@ htmLawed documentation
The hook_tag parameter is different from the hook parameter (section 3.7).
- Snippets of hook function code developed by others may be available on the htmLawed website.
+ Snippets of hook function code developed by others may be available on the htmLawed website.
@@ -1733,13 +1772,13 @@ htmLawed documentation
deny_attribute - on*
- elements - * -applet -audio -canvas -embed -iframe -object -script -video
+ elements - * -applet -audio -canvas -dialog -embed -iframe -object -script -video
schemes - href: aim, feed, file, ftp, gopher, http, https, irc, mailto, news, nntp, sftp, ssh, tel, telnet; style: !; *:file, http, https
- With safe set to 1, htmLawed considers CDATA sections and HTML comments as plain text, and prohibits the applet, audio, canvas, embed, iframe, object, script and video elements, and the on* attributes like onclick. ( There are $config parameters like css_expression that are not affected by the value set for safe but whose default values still contribute towards a more safe output.) Further, unless overridden by the value for parameter schemes (see section 3.4.3), the schemes app, data and javascript are not permitted, and URLs with schemes are neutralized so that, e.g., style="moz-binding:url(http://danger)" becomes style="moz-binding:url(denied:http://danger)".
+ With safe set to 1, htmLawed considers CDATA sections and HTML comments as plain text, and prohibits the applet, audio, canvas, dialog, embed, iframe, object, script and video elements, and the on* attributes like onclick. ( There are $config parameters like css_expression that are not affected by the value set for safe but whose default values still contribute towards a more safe output.) Further, unless overridden by the value for parameter schemes (see section 3.4.3), the schemes app, data and javascript are not permitted, and URLs with schemes are neutralized so that, e.g., style="moz-binding:url(http://danger)" becomes style="moz-binding:url(denied:http://danger)".
Admins, however, may still want to completely deny the style attribute, e.g., with code like
@@ -1751,27 +1790,27 @@ htmLawed documentation
If a value for a parameter auto-set through safe is still manually provided, then that value can over-ride the auto-set value. E.g., with $config["safe"] = 1 and $config["elements"] = "* +script", script, but not applet, is allowed. Such over-ride does not occur for deny_attribute (for legacy reason) when comma-separated attribute names are provided as the value for this parameter (section 3.4); instead htmLawed will add on* to the value provided for deny_attribute.
- A page illustrating the efficacy of htmLawed's anti-XSS abilities with safe set to 1 against XSS vectors listed by RSnake may be available here.
+ A page illustrating the efficacy of htmLawed's anti-XSS abilities with safe set to 1 against XSS vectors listed by RSnake may be available here.
3.7 Using a hook function
(to top)
- If
$config["hook"] is not set to
0, then htmLawed will allow preliminarily processed input to be altered by a hook function named by
$config["hook"] before starting the main work (but after handling of characters, entities, HTML comments and
CDATA sections -- see code for function
htmLawed()).
+ If
$config["hook"] is not set to
0, then htmLawed will allow preliminarily processed input to be altered by a function or class method named by
$config["hook"] before starting the main work (but after handling of characters, entities, HTML comments and
CDATA sections -- see code for function
htmLawed()). The function should have three arguments – the processed input string, and the finalized
$config and
$spec arrays, in order – and it should return the string after any manipulation.
The hook function also allows one to alter the
finalized values of
$config and
$spec.
Note that the
hook parameter is different from the
hook_tag parameter (
section 3.4.9).
- Snippets of hook function code developed by others may be available on the
htmLawed website.
+ Snippets of hook function code developed by others may be available on the
htmLawed website.
3.8 Obtaining finalized parameter values
(to top)
- htmLawed can assign the
finalized $config and
$spec values to a variable named by
$config["show_setting"]. The variable, made global by htmLawed, is set as an array with three keys:
config, with the
$config value,
spec, with the
$spec value, and
time, with a value that is the Unix time (the output of PHP's
microtime() function) when the value was assigned. Admins should use a PHP-compliant variable name (e.g., one that does not begin with a numerical digit) that does not conflict with variable names in their non-htmLawed code.
+ htmLawed can assign the
finalized $config and
$spec values to a variable named by
$config["show_setting"]. The variable, made global by htmLawed, is set as an array with four keys:
config, with the
$config value,
spec, with the
$spec value,
time, with a value that is the Unix time (the output of PHP's
microtime function) when htmLawed completed filtering, and
version, with htmLawed version. Admins should use a PHP-compliant variable name (e.g., one that does not begin with a numerical digit) that does not conflict with variable names in their non-htmLawed code.
The values, which are also post-hook function (if any), can be used to auto-generate information (on, e.g., the elements that are permitted) for input writers.
@@ -1809,7 +1848,7 @@
htmLawed documentation
4.1 Support
(to top)
- Software updates and forum-based community-support may be found at
http://www.bioinformatics.org/phplabware/internal_utilities/htmLawed. For general PHP issues (not htmLawed-specific), support may be found through internet searches and at
http://php.net.
+ Software updates and forum-based community-support may be found at
https://bioinformatics.org/phplabware/internal_utilities/htmLawed.
@@ -1827,6 +1866,20 @@ htmLawed documentation
Version number - Release date. Notes
+ 1.2.11 - 23 January 2023. Fixes an XSS vulnerability arising from a lack of inspection for the alphabetical HTML entity for colon character in URLs
+
+ 1.2.10 - 5 November 2022. Class methods can now be specified as $config hook and hook_tag functions; corrects a PHP notice if $config["schemes"] mistakenly lacks colons.
+
+ 1.2.9 - 2 July 2022. Improves parsing of $config["deny_attribute"] to permit spaces flanking comma characters and allow references to sets of all ARIA, data-* and event attributes; fixes parsing of $spec for data-* attribute rules; now permits use of aria*, data*, and on* in $spec; now covers all named HTML entities of current standard specification (this increased htmLawed code size by ~40%); recognizes that closing tag may be omitted for caption, optgroup, rp, rt, and tbody as well; recognizes that archive and poster attribute values can have URLs, which can be multiple; recognizes onloadend as global attribute; renames some internal functions; improved standards-compliance for element nesting.
+
+ 1.2.8 - 6 June 2022. Fixes incorrect formatting of HTML comments when $config["comment"] = 4; fixes misreading of entity-fied colon characters in style attribute values; $config["show_setting"] now includes htmLawed version; improved PHP 8.2 code compatibility, and readability
+
+ 1.2.7 - 10 April 2022. Support for elements dialog, picture, slot, and template; support for custom HTML elements; support for global attributes autocapitalize, autofocus, enterkeyhint, inputmode, is, and nonce; support for 17 additional ARIA and 11 additional on* event handler attributes; support for attributes with names not beginning with a-z; fix for a minor bug arising during deprecated height/weight attribute transformation
+
+ 1.2.6 - 4 September 2021. Fixes a bug that arises when $config["deny_attribute"] has a data-* attribute with > 1 hyphen character
+
+ 1.2.5 - 24 September 2019. Fixes two bugs in font tag transformation
+
1.2.4.2 - 16 May 2019. Corrects a PHP notice if a semi-colon is present in $config["schemes"]
1.2.4.1 - 12 September 2017. Corrects a function re-declaration bug introduced in version 1.2.4
@@ -1911,7 +1964,7 @@ htmLawed documentation
1.0.2 - 13 February 2008. Improved implementation of $config["keep_bad"]
- 1.0.1 - 7 November 2007. Improved regex for identifying URLs, protocols and dynamic expressions (hl_tag() and hl_prot()); no error display with hl_regex()
+ 1.0.1 - 7 November 2007. Improved regex for identifying URLs, protocols and dynamic expressions; no error display during regex testing
1.0 - 2 November 2007. First release
@@ -1937,14 +1990,14 @@ htmLawed documentation
(3) From version older than 1.2 to later, if htmLawed is used without $config["safe"] set to 1: Unlike previous versions, htmLawed version 1.2 and later permit data and javascript URL schemes by default (see section 3.4.3).
- Old versions of htmLawed may be available online. E.g., for version 1.0, check http://www.bioinformatics.org/phplabware/downloads/htmLawed1.zip; for 1.1.1, http://www.bioinformatics.org/phplabware/downloads/htmLawed111.zip; and for 1.1.22, http://www.bioinformatics.org/phplabware/downloads/htmLawed1122.zip.
+ Old versions of htmLawed may be available online. E.g., for version 1.0, check https://bioinformatics.org/phplabware/downloads/htmLawed1.zip; for 1.1.1, https://bioinformatics.org/phplabware/downloads/htmLawed111.zip; and for 1.1.22, https://bioinformatics.org/phplabware/downloads/htmLawed1122.zip.
4.6 Comparison with HTMLPurifier
(to top)
- The HTMLPurifier PHP library by Edward Yang is a very good HTML filtering script that uses object oriented PHP code. Compared to htmLawed, it (as of year 2015):
+ The
HTMLPurifier PHP library by Edward Yang is a good HTML filtering script that uses object-oriented PHP code. Compared to htmLawed, as of year 2015, HTMLPurifier:
* does not support PHP versions older than 5.0 (HTMLPurifier dropped PHP 4 support after version 2)
@@ -1952,27 +2005,29 @@
htmLawed documentation
* consumes 10-15 times more RAM memory (just including the HTMLPurifier files without calling the filter requires a few MBs of memory)
- * is expectedly slower
+ * is expectedly considerably slower
* lacks many of the extra features of htmLawed (like entity conversions and code compaction/beautification)
* has poor documentation
- However, HTMLPurifier has finer checks for character encodings and attribute values, and can log warnings and errors. Visit the HTMLPurifier
website for updated information.
+ * may have finer checks for character encodings and attribute values
+
+ * can log warnings and errors
4.7 Use through application plug-ins/modules
(to top)
- Plug-ins/modules to implement htmLawed in applications such as Drupal may have been developed. Check the application websites and the htmLawed
forum.
+ Plug-ins/modules to implement htmLawed in applications such as Drupal may have been developed. Check the application websites and the htmLawed
forum.
4.8 Use in non-PHP applications
(to top)
- Non-PHP applications written in Python, Ruby, etc., may be able to use htmLawed through system calls to the PHP engine. Such code may have been documented on the internet. Also check the forum on the htmLawed
site.
+ Non-PHP applications written in Python, Ruby, etc., may be able to use htmLawed through system calls to the PHP engine. Such code may have been documented on the internet. Also check the forum on the htmLawed
site.
@@ -1986,7 +2041,7 @@ htmLawed documentation
4.10 Acknowledgements
(to top)
- Nicholas Alipaz, Bryan Blakey, Pádraic Brady, Dac Chartrand, Alexandre Chouinard, Ulf Harnhammer, Gareth Heyes, Hakre, Klaus Leithoff, Lukasz Pilorz, Shelley Powers, Psych0tr1a, Lincoln Russell, Tomas Sykorka, Harro Verton, Edward Yang, and many anonymous users.
+ Nicholas Alipaz, Bryan Blakey, Pádraic Brady, Michael Butler, Dac Chartrand, Alexandre Chouinard, NinCollin, Ulf Harnhammer, Gareth Heyes, Hakre, Klaus Leithoff, Hideki Mitsuda, Lukasz Pilorz, Shelley Powers, Psych0tr1a, Lincoln Russell, Tomas Sykorka, Harro Verton, walrusmoose, Edward Yang, and many others.
Thank you!
@@ -2008,11 +2063,11 @@
htmLawed documentation
5.2 Valid attribute-element combinations
(to top)
- * includes deprecated attributes (marked
^), attributes for microdata (marked
*), the non-standard
bordercolor, and new-in-HTML5 attributes (marked
~); can have multiple comma-separated values (marked
%); can have multiple space-separated values (marked
$)
+ * includes deprecated attributes (marked
^), attributes for microdata (marked
*), some non-standard attributes for
embed (marked
**), and the non-standard
bordercolor; can have multiple comma-separated values (marked
%); can have multiple space-separated values (marked
$)
* only non-frameset, HTML body elements
*
name for
a and
map, and
lang are invalid in XHTML 1.1
- *
target is valid for
a in XHTML 1.1 and higher
*
xml:space is only for XHTML 1.1
+ * excludes data-* and author-specified, non-standard attributes of custom elements
abbr - td, th
accept - form, input
@@ -2022,17 +2077,17 @@
htmLawed documentation
allowfullscreen - iframe
alt - applet, area, img, input
archive - applet, object
- async~ - script
- autocomplete~ - input
- autofocus~ - button, input, keygen, select, textarea
- autoplay~ - audio, video
+ async - script
+ autocomplete - input
+ autofocus - button, input, keygen, select, textarea
+ autoplay - audio, video
axis - td, th
bgcolor - embed, table^, td^, th^, tr^
border - img, object^, table
bordercolor - table, td, tr
cellpadding - table
cellspacing - table
- challenge~ - keygen
+ challenge - keygen
char - col, colgroup, tbody, td, tfoot, th, thead, tr
charoff - col, colgroup, tbody, td, tfoot, th, thead, tr
charset - a, script
@@ -2048,94 +2103,94 @@
htmLawed documentation
colspan - td, th
compact - dir, dl^, menu, ol^, ul^
content - meta
- controls~ - audio, video
+ controls - audio, video
coords - area, a
- crossorigin~ - img
+ crossorigin - img
data - object
datetime - del, ins, time
declare - object
- default~ - track
+ default - track
defer - script
dir - bdo
- dirname~ - input, textarea
+ dirname - input, textarea
disabled - button, command, fieldset, input, keygen, optgroup, option, select, textarea
- download~ - a
+ download - a
enctype - form
face - font
flashvars** - embed
for - label, output
- form~ - button, fieldset, input, keygen, label, object, output, select, textarea
- formaction~ - button, input
- formenctype~ - button, input
- formmethod~ - button, input
- formnovalidate~ - button, input
- formtarget~ - button, input
+ form - button, fieldset, input, keygen, label, object, output, select, textarea
+ formaction - button, input
+ formenctype - button, input
+ formmethod - button, input
+ formnovalidate - button, input
+ formtarget - button, input
frame - table
frameborder - iframe
headers - td, th
height - applet, canvas, embed, iframe, img, input, object, td^, th^, video
- high~ - meter
+ high - meter
href - a, area, link
hreflang - a, area, link
hspace - applet, embed, img^, object^
- icon~ - command
+ icon - command
ismap - img, input
- keytype~ - keygen
- keyparams~ - keygen
- kind~ - track
+ keytype - keygen
+ keyparams - keygen
+ kind - track
label - command, menu, option, optgroup, track
language - script^
- list~ - input
+ list - input
longdesc - img, iframe
- loop~ - audio, video
- low~ - meter
+ loop - audio, video
+ low - meter
marginheight - iframe
marginwidth - iframe
- max~ - input, meter, progress
+ max - input, meter, progress
maxlength - input, textarea
- media~ - a, area, link, source, style
- mediagroup~ - audio, video
+ media - a, area, link, source, style
+ mediagroup - audio, video
method - form
- min~ - input, meter
+ min - input, meter
model** - embed
multiple - input, select
- muted~ - audio, video
- name - a^, applet^, button, embed, fieldset, form^, iframe^, img^, input, keygen, map^, object, output, param, select, textarea
+ muted - audio, video
+ name - a^, applet^, button, embed, fieldset, form^, iframe^, img^, input, keygen, map^, object, output, param, select, slot, textarea
nohref - area
noshade - hr^
- novalidate~ - form
+ novalidate - form
nowrap - td^, th^
object - applet
- open~ - details
- optimum~ - meter
- pattern~ - input
- ping~ - a, area
- placeholder~ - input, textarea
+ open - details, dialog
+ optimum - meter
+ pattern - input
+ ping - a, area
+ placeholder - input, textarea
pluginspage** - embed
pluginurl** - embed
- poster~ - video
- pqg~ - keygen
- preload~ - audio, video
+ poster - video
+ pqg - keygen
+ preload - audio, video
prompt - isindex
- pubdate~ - time
+ pubdate - time
radiogroup* - command
readonly - input, textarea
- required~ - input, select, textarea
+ required - input, select, textarea
rel$ - a, area, link
rev - a
- reversed~ - old
+ reversed - old
rows - textarea
rowspan - td, th
rules - table
- sandbox~ - iframe
+ sandbox - iframe
scope - td, th
- scoped~ - style
+ scoped - style
scrolling - iframe
- seamless~ - iframe
+ seamless - iframe
selected - option
shape - area, a
size - font, hr^, input, select
- sizes~ - link
+ sizes - link
span - col, colgroup
src - audio, embed, iframe, img, input, script, source, track, video
srcdoc~ - iframe
@@ -2159,7 +2214,7 @@
htmLawed documentation
The following attributes, including event-specific ones and attributes of ARIA and microdata specifications, are considered global and allowed in all elements:
- accesskey, aria-activedescendant, aria-atomic, aria-autocomplete, aria-busy, aria-checked, aria-controls, aria-describedby, aria-disabled, aria-dropeffect, aria-expanded, aria-flowto, aria-grabbed, aria-haspopup, aria-hidden, aria-invalid, aria-label, aria-labelledby, aria-level, aria-live, aria-multiline, aria-multiselectable, aria-orientation, aria-owns, aria-posinset, aria-pressed, aria-readonly, aria-relevant, aria-required, aria-selected, aria-setsize, aria-sort, aria-valuemax, aria-valuemin, aria-valuenow, aria-valuetext, class$, contenteditable, contextmenu, dir, draggable, dropzone, hidden, id, inert, itemid, itemprop, itemref, itemscope, itemtype, lang, onabort, onblur, oncanplay, oncanplaythrough, onchange, onclick, oncontextmenu, oncopy, oncuechange, oncut, ondblclick, ondrag, ondragend, ondragenter, ondragleave, ondragover, ondragstart, ondrop, ondurationchange, onemptied, onended, onerror, onfocus, onformchange, onforminput, oninput, oninvalid, onkeydown, onkeypress, onkeyup, onload, onloadeddata, onloadedmetadata, onloadstart, onlostpointercapture, onmousedown, onmousemove, onmouseout, onmouseover, onmouseup, onmousewheel, onpaste, onpause, onplay, onplaying, onpointercancel, ongotpointercapture, onpointerdown, onpointerenter, onpointerleave, onpointermove, onpointerout, onpointerover, onpointerup, onprogress, onratechange, onreadystatechange, onreset, onsearch, onscroll, onseeked, onseeking, onselect, onshow, onstalled, onsubmit, onsuspend, ontimeupdate, ontoggle, ontouchcancel, ontouchend, ontouchmove, ontouchstart, onvolumechange, onwaiting, onwheel, role, spellcheck, style, tabindex, title, translate, xmlns, xml:base, xml:lang, xml:space
+ accesskey, autocapitalize, autofocus, aria-activedescendant, aria-atomic, aria-autocomplete, aria-braillelabel, aria-brailleroledescription, aria-busy, aria-checked, aria-colcount, aria-colindex, aria-colindextext, aria-colspan, aria-controls, aria-current, aria-describedby, aria-description, aria-details, aria-disabled, aria-dropeffect, aria-errormessage, aria-expanded, aria-flowto, aria-grabbed, aria-haspopup, aria-hidden, aria-invalid, aria-keyshortcuts, aria-label, aria-labelledby, aria-level, aria-live, aria-multiline, aria-multiselectable, aria-orientation, aria-owns, aria-placeholder, aria-posinset, aria-pressed, aria-readonly, aria-relevant, aria-required, aria-roledescription, aria-rowcount, aria-rowindex, aria-rowindextext, aria-rowspan, aria-selected, aria-setsize, aria-sort, aria-valuemax, aria-valuemin, aria-valuenow, aria-valuetext, class, contenteditable, contextmenu, dir, draggable, dropzone, enterkeyhint, hidden, id, inert, inputmode, is, itemid, itemprop, itemref, itemscope, itemtype, lang, nonce, onabort, onblur, oncanplay, oncanplaythrough, onchange, onclick, oncontextmenu, oncopy, oncuechange, oncut, ondblclick, ondrag, ondragend, ondragenter, ondragleave, ondragover, ondragstart, ondrop, ondurationchange, onemptied, onended, onerror, onfocus, onformchange, onforminput, oninput, oninvalid, onkeydown, onkeypress, onkeyup, onload, onloadeddata, onloadedmetadata, onloadend, onloadstart, onlostpointercapture, onmousedown, onmousemove, onmouseout, onmouseover, onmouseup, onmousewheel, onpaste, onpause, onplay, onplaying, onpointercancel, ongotpointercapture, onpointerdown, onpointerenter, onpointerleave, onpointermove, onpointerout, onpointerover, onpointerup, onprogress, onratechange, onreadystatechange, onreset, onsearch, onscroll, onseeked, onseeking, onselect, onshow, onstalled, onsubmit, onsuspend, ontimeupdate, ontoggle, ontouchcancel, ontouchend, ontouchmove, ontouchstart, onvolumechange, onwaiting, onwheel, onauxclick, oncancel, onclose, oncontextlost, oncontextrestored, onformdata, onmouseenter, onmouseleave, onresize, onsecuritypolicyviolation, onslotchange, role, slot, spellcheck, style, tabindex, title, translate, xmlns, xml:base, xml:lang, xml:space
Custom
data-* attributes, where the first three characters of the value of
star (*) after lower-casing do not equal
xml and the value of
star does not have a colon (:), equal-to (=), newline, solidus (/), space, tab, or any A-Z character, are also considered global and allowed in all elements.
@@ -2257,22 +2312,22 @@
htmLawed documentation
Except for the main
htmLawed() function, htmLawed's functions are
name-spaced using the
hl_ prefix. The
functions and their roles are:
- *
hl_attrval - check attribute values against
$spec
- *
hl_bal - balance tags and ensure proper nesting
- *
hl_cmtcd - handle CDATA sections and HTML comments
- *
hl_ent - handle character entities
- *
hl_prot - check a URL scheme/protocol
+ *
hl_attributeValue - check attribute values against
$spec rules
+ *
hl_balance - balance tags and ensure proper nesting
+ *
hl_commentCdata - handle CDATA sections and HTML comments
+ *
hl_deprecatedElement - transform element tags
+ *
hl_entity - handle character entities
*
hl_regex - check syntax of a regular expression
- *
hl_spec - convert user-supplied
$spec value to one used internally
+ *
hl_spec - convert
$spec value to one used internally
*
hl_tag - handle element tags and attributes
- *
hl_tag2 - transform element tags
*
hl_tidy - compact/beautify HTML
+ *
hl_url - check URL-containing values
*
hl_version - report htmLawed version
*
htmLawed - main function
-
htmLawed() finalizes
$spec (with the help of
hl_spec()) and
$config, and globalizes them. Finalization of
$config involves setting default values if an inappropriate or invalid one is supplied. This includes calling
hl_regex() to check well-formedness of regular expression patterns if such expressions are user-supplied through
$config.
htmLawed() then removes invalid characters like nulls and
x01 and appropriately handles entities using
hl_ent(). HTML comments and CDATA sections are identified and treated as per
$config with the help of
hl_cmtcd(). When retained, the
< and
> characters identifying them, and the
<,
> and
& characters inside them, are replaced with control characters (code-points
1 to
5) till any tag balancing is completed.
+
htmLawed() finalizes
$spec (with the help of
hl_spec()) and
$config, and globalizes them. Finalization of
$config involves setting default values if an inappropriate or invalid one is supplied. This includes calling
hl_regex() to check well-formedness of regular expression patterns if such expressions are user-supplied through
$config.
htmLawed() then removes invalid characters like nulls and
x01 and appropriately handles entities using
hl_entity(). HTML comments and CDATA sections are identified and treated as per
$config with the help of
hl_commentCdata(). When retained, the
< and
> characters identifying them, and the
<,
> and
& characters inside them, are replaced with control characters (code-points
1 to
5) till any tag balancing is completed.
- After this
initial processing htmLawed() identifies tags using regex and processes them with the help of
hl_tag() -- a large function that analyzes tag content, filtering it as per HTML standards,
$config and
$spec. Among other things,
hl_tag() transforms deprecated elements using
hl_tag2(), removes attributes from closing tags, checks attribute values as per
$spec rules using
hl_attrval(), and checks URL protocols using
hl_prot().
htmLawed() performs tag balancing and nesting checks with a call to
hl_bal(), and optionally compacts/beautifies the output with proper white-spacing with a call to
hl_tidy(). The latter temporarily replaces white-space, and
<,
> and
& characters inside
pre,
script and
textarea elements, and HTML comments and CDATA sections with control characters (code-points
1 to
5, and
7).
+ After this
initial processing htmLawed() identifies tags using regex and processes them with the help of
hl_tag() -- a large function that analyzes tag content, filtering it as per HTML standards,
$config and
$spec. Among other things,
hl_tag() transforms deprecated elements using
hl_deprecatedElement(), removes attributes from closing tags, checks attribute values as per
$spec rules using
hl_attributeValue(), and checks URL protocols using
hl_url().
htmLawed() performs tag balancing and nesting checks with a call to
hl_balance(), and optionally compacts/beautifies the output with proper white-spacing with a call to
hl_tidy(). The latter temporarily replaces white-space, and
<,
> and
& characters inside
pre,
script and
textarea elements, and HTML comments and CDATA sections with control characters (code-points
1 to
5, and
7).
htmLawed permits the use of custom code or
hook functions at two stages. The first, called inside
htmLawed(), allows the input text as well as the finalized
$config and
$spec values to be altered right after the initial processing (see
section 3.7). The second is called by
hl_tag() once the tag content is finalized (see
section 3.4.9).
@@ -2280,8 +2335,8 @@
htmLawed documentation
-
HTM version of htmLawed_README.txt generated on 16 May, 2019 using rTxt2htm from PHP Labware
+
HTM version of htmLawed_README.txt generated on 23 Jan, 2023 using rTxt2htm from PHP Labware
-htmLawed 1.2.4.2, 16 May 2019
+
+htmLawed 1.2.11
Copyright Santosh Patnaik
Dual licensed with LGPL 3 and GPL 2+
-A PHP Labware internal utility - http://www.bioinformatics.org/phplabware/internal_utilities/htmLawed