Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 21 additions & 24 deletions src/wp-includes/formatting.php
Original file line number Diff line number Diff line change
Expand Up @@ -250,8 +250,20 @@ function wptexturize( $text, $reset = false ) {
} else {
// This is an HTML element delimiter.

// Replace each & with & unless it already looks like an entity.
$curl = preg_replace( '/&(?!#(?:\d+|x[a-f0-9]+);|[a-z1-4]{1,8};)/i', '&', $curl );
/*
* Replace each & with & unless it already looks like an entity,
* but preserve & inside quoted attribute values (e.g. Tailwind
* arbitrary variants like [&>.swiper-pagination] in class attrs).
*/
$curl = preg_replace_callback(
'/"[^"]*"|\'[^\']*\'|(&(?!#(?:\d+|x[a-f0-9]+);|[a-z1-4]{1,8};))/i',
static function ( array $matches ): string {
// Quoted strings (captured by first two alternations): return unchanged.
// Bare & (captured in group 1): replace with &.
return isset( $matches[1] ) && '' !== $matches[1] ? '&' : $matches[0];
},
$curl
);

_wptexturize_pushpop_element( $curl, $no_texturize_tags_stack, $no_texturize_tags );
}
Expand Down Expand Up @@ -663,7 +675,7 @@ function get_html_split_regex() {
. '(?' // Conditional expression follows.
. $escaped // Find end of escaped element.
. '|' // ...else...
. '[^>]*>?' // Find end of normal element.
. '(?:"[^"]*"|\'[^\']*\'|[^<>])*+>' // Find end of element, allowing > inside quoted strings.
. ')'
. ')/';
// phpcs:enable
Expand All @@ -684,27 +696,12 @@ function get_html_split_regex() {
* @return string The regular expression.
*/
function _get_wptexturize_split_regex( $shortcode_regex = '' ) {
static $html_regex;

if ( ! isset( $html_regex ) ) {
// phpcs:disable Squiz.Strings.ConcatenationSpacing.PaddingFound -- don't remove regex indentation
$comment_regex =
'!' // Start of comment, after the <.
. '(?:' // Unroll the loop: Consume everything until --> is found.
. '-(?!->)' // Dash not followed by end of comment.
. '[^\-]*+' // Consume non-dashes.
. ')*+' // Loop possessively.
. '(?:-->)?'; // End of comment. If not found, match all input.

$html_regex = // Needs replaced with wp_html_split() per Shortcode API Roadmap.
'<' // Find start of element.
. '(?(?=!--)' // Is this a comment?
. $comment_regex // Find end of comment.
. '|'
. '[^>]*>?' // Find end of element. If not found, match all input.
. ')';
// phpcs:enable
}
/*
* Derive the HTML pattern from get_html_split_regex() instead of
* maintaining a separate copy. Strip the outer '/(' prefix and
* ')/' suffix to get the inner matching pattern.
*/
$html_regex = substr( get_html_split_regex(), 2, -2 );

if ( empty( $shortcode_regex ) ) {
$regex = '/(' . $html_regex . ')/';
Expand Down
33 changes: 33 additions & 0 deletions tests/phpunit/tests/formatting/wpHtmlSplit.php
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,39 @@ public function data_basic_features() {
);
}

/**
* Ensure that > inside quoted attribute values does not cause premature
* tag splitting in wp_html_split().
*
* @ticket 63997
* @dataProvider data_gt_in_quoted_attribute_values
*/
public function test_gt_in_quoted_attribute_values( string $input, array $output ): void {
return $this->assertSame( $output, wp_html_split( $input ) );
}

/**
* Data provider for {@see self::test_gt_in_quoted_attribute_values()}.
*
* @return list<array{input: string, output: array{ string, string, string, string, string }}>

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
* @return list<array{input: string, output: array{ string, string, string, string, string }}>
* @return list<array{0: string, 1: array{ string, string, string, string, string }}>

*/
public function data_gt_in_quoted_attribute_values(): array {
return array(
array(
'<div data-test="a > b">content</div>',
array( '', '<div data-test="a > b">', 'content', '</div>', '' ),
),
array(
'<div data-test=\'a > b\'>content</div>',
array( '', '<div data-test=\'a > b\'>', 'content', '</div>', '' ),
),
array(
'<div data-test="a > b" data-other="c &gt; d">content</div>',
array( '', '<div data-test="a > b" data-other="c &gt; d">', 'content', '</div>', '' ),
Comment thread
westonruter marked this conversation as resolved.
),
);
}

/**
* Automated performance testing of the main regex.
*
Expand Down
77 changes: 67 additions & 10 deletions tests/phpunit/tests/formatting/wpTexturize.php
Original file line number Diff line number Diff line change
Expand Up @@ -384,7 +384,7 @@ public function data_opening_single_quote() {
),
array(
"word <'word word", // Invalid HTML.
"word <'word word",
'word <&#8217;word word',
),
array(
"word &lt;'word word", // Valid HTML input makes curly quotes.
Expand Down Expand Up @@ -412,7 +412,7 @@ public function data_opening_single_quote() {
),
array(
"word<'word word",
"word<'word word",
'word<&#8217;word word',
),
array(
"word&lt;'word word",
Expand Down Expand Up @@ -440,7 +440,7 @@ public function data_opening_single_quote() {
),
array(
"word <' word word",
"word <' word word",
'word <&#8217; word word',
),
array(
"word &lt;' word word",
Expand Down Expand Up @@ -468,7 +468,7 @@ public function data_opening_single_quote() {
),
array(
"word<' word word",
"word<' word word",
'word<&#8217; word word',
),
array(
"word&lt;' word word",
Expand Down Expand Up @@ -620,7 +620,7 @@ public function data_opening_quote() {
),
array(
'word <"word word', // Invalid HTML.
'word <"word word',
'word <&#8221;word word',
),
array(
'word &lt;"word word',
Expand Down Expand Up @@ -652,7 +652,7 @@ public function data_opening_quote() {
),
array(
'word<"word word',
'word<"word word',
'word<&#8221;word word',
),
array(
'word&lt;"word word',
Expand Down Expand Up @@ -1278,11 +1278,11 @@ public function data_tag_avoidance() {
),
array(
'[ photos by <a href="http://example.com/?a[]=1&a[]=2"> this guy & that guy </a> ]',
'[ photos by <a href="http://example.com/?a[]=1&#038;a[]=2"> this guy &#038; that guy </a> ]',
'[ photos by <a href="http://example.com/?a[]=1&a[]=2"> this guy &#038; that guy </a> ]',
),
array(
'[photos by <a href="http://example.com/?a[]=1&a[]=2"> this guy & that guy </a>]',
'[photos by <a href="http://example.com/?a[]=1&#038;a[]=2"> this guy &#038; that guy </a>]',
'[photos by <a href="http://example.com/?a[]=1&a[]=2"> this guy &#038; that guy </a>]',
),
array(
'& <script>&&</script>',
Expand Down Expand Up @@ -1326,7 +1326,7 @@ public function data_tag_avoidance() {
),
array(
'<br [gallery ...] ... /',
'<br [gallery ...] ... /',
'<br [gallery ...] &#8230; /',
),
array(
'<br ... />',
Expand Down Expand Up @@ -1366,7 +1366,7 @@ public function data_tag_avoidance() {
),
array(
'<br [[gallery ...]] ... /',
'<br [[gallery ...]] ... /',
'<br [[gallery ...]] &#8230; /',
),
array(
'[[gallery ...]]...[[gallery ...]]',
Expand Down Expand Up @@ -2086,6 +2086,7 @@ public function data_primes_quotes_translation() {
*
* @covers ::_get_wptexturize_split_regex
* @covers ::_get_wptexturize_shortcode_regex
* @covers ::get_html_split_regex
*/
public function test_pcre_performance( $input ) {
global $shortcode_tags;
Expand All @@ -2111,6 +2112,62 @@ public function test_trailing_less_than() {
$this->assertSame( 'F&#8211;oo<', wptexturize( 'F--oo<', true ) );
}

/**
* Ensure that > inside quoted attribute values does not cause premature
* tag splitting, which would leave quotes exposed to texturization.
*
* @ticket 63997
* @dataProvider data_gt_in_quoted_attribute_values
*/
public function test_gt_in_quoted_attribute_values( $input, $output ) {
$this->assertSame( $output, wptexturize( $input ) );
}

public function data_gt_in_quoted_attribute_values() {
return array(
// Single-quoted attribute containing double quotes and >.
array(
'<div data-test=\'a > 0 ? "yes" : "no"\'>content</div>',
'<div data-test=\'a > 0 ? "yes" : "no"\'>content</div>',
),
// Double-quoted attribute containing single quotes and >.
array(
"<div data-test=\"a > 0 ? 'yes' : 'no'\">content</div>",
"<div data-test=\"a > 0 ? 'yes' : 'no'\">content</div>",
),
// Multiple attributes, with > in one of them.
array(
'<div data-a="x > y" data-b="z">content</div>',
'<div data-a="x > y" data-b="z">content</div>',
),
// Escaped &gt; with nested quotes should still work as before.
array(
'<div data-test=\'a &gt; 0 ? "yes" : "no"\'>content</div>',
'<div data-test=\'a &gt; 0 ? "yes" : "no"\'>content</div>',
),
// Ticket 63997
array(
'<div data-template="Label <% value %>" data-label="{A \'message\' string}">Content</div>',
'<div data-template="Label <% value %>" data-label="{A \'message\' string}">Content</div>',
),
// Ticket 63426
array(
'<iframe srcdoc="<body></body>"></iframe>',
'<iframe srcdoc="<body></body>"></iframe>',
),
// Ticket 43785
array(
'<script type="text/javascript">if(a<b)window&&document</script>',
'<script type="text/javascript">if(a<b)window&&document</script>',
),
// Ticket 57381
array(
'<div class="swiper [&>.swiper-pagination]:static">content</div>',
'<div class="swiper [&>.swiper-pagination]:static">content</div>',
),
);
}

public function data_whole_posts() {
require_once DIR_TESTDATA . '/formatting/whole-posts.php';
return data_whole_posts();
Expand Down
2 changes: 1 addition & 1 deletion tests/phpunit/tests/rest-api/rest-comments-controller.php
Original file line number Diff line number Diff line change
Expand Up @@ -3166,7 +3166,7 @@ public function test_comment_roundtrip_as_superadmin() {
array(
'content' => array(
'raw' => '\\\&\\\ &amp; &invalid; < &lt; &amp;lt;',
'rendered' => '<p>\\\&#038;\\\ &amp; &invalid; < &lt; &amp;lt;' . "\n</p>",
'rendered' => '<p>\\\&#038;\\\ &amp; &invalid; < &lt; &amp;lt;</p>',
),
'author_name' => '\\\&amp;\\\ &amp; &amp;invalid; &lt; &lt; &amp;lt;',
'author_user_agent' => '\\\&\\\ &amp; &invalid; &lt; &lt; &amp;lt;',
Expand Down
Loading