Skips contents of script tags.
Source
private function skip_script_data() {
$state = 'unescaped';
$html = $this->html;
$doc_length = strlen( $html );
$at = $this->bytes_already_parsed;
while ( false !== $at && $at < $doc_length ) {
$at += strcspn( $html, '-<', $at );
/*
* For all script states a "-->" transitions
* back into the normal unescaped script mode,
* even if that's the current state.
*/
if (
$at + 2 < $doc_length &&
'-' === $html[ $at ] &&
'-' === $html[ $at + 1 ] &&
'>' === $html[ $at + 2 ]
) {
$at += 3;
$state = 'unescaped';
continue;
}
// Everything of interest past here starts with "<".
if ( $at + 1 >= $doc_length || '<' !== $html[ $at++ ] ) {
continue;
}
/*
* Unlike with "-->", the "<!--" only transitions
* into the escaped mode if not already there.
*
* Inside the escaped modes it will be ignored; and
* should never break out of the double-escaped
* mode and back into the escaped mode.
*
* While this requires a mode change, it does not
* impact the parsing otherwise, so continue
* parsing after updating the state.
*/
if (
$at + 2 < $doc_length &&
'!' === $html[ $at ] &&
'-' === $html[ $at + 1 ] &&
'-' === $html[ $at + 2 ]
) {
$at += 3;
$state = 'unescaped' === $state ? 'escaped' : $state;
continue;
}
if ( '/' === $html[ $at ] ) {
$closer_potentially_starts_at = $at - 1;
$is_closing = true;
++$at;
} else {
$is_closing = false;
}
/*
* At this point the only remaining state-changes occur with the
* <script> and </script> tags; unless one of these appears next,
* proceed scanning to the next potential token in the text.
*/
if ( ! (
$at + 6 < $doc_length &&
( 's' === $html[ $at ] || 'S' === $html[ $at ] ) &&
( 'c' === $html[ $at + 1 ] || 'C' === $html[ $at + 1 ] ) &&
( 'r' === $html[ $at + 2 ] || 'R' === $html[ $at + 2 ] ) &&
( 'i' === $html[ $at + 3 ] || 'I' === $html[ $at + 3 ] ) &&
( 'p' === $html[ $at + 4 ] || 'P' === $html[ $at + 4 ] ) &&
( 't' === $html[ $at + 5 ] || 'T' === $html[ $at + 5 ] )
) ) {
++$at;
continue;
}
/*
* Ensure that the script tag terminates to avoid matching on
* substrings of a non-match. For example, the sequence
* "<script123" should not end a script region even though
* "<script" is found within the text.
*/
if ( $at + 6 >= $doc_length ) {
continue;
}
$at += 6;
$c = $html[ $at ];
if ( ' ' !== $c && "\t" !== $c && "\r" !== $c && "\n" !== $c && '/' !== $c && '>' !== $c ) {
++$at;
continue;
}
if ( 'escaped' === $state && ! $is_closing ) {
$state = 'double-escaped';
continue;
}
if ( 'double-escaped' === $state && $is_closing ) {
$state = 'escaped';
continue;
}
if ( $is_closing ) {
$this->bytes_already_parsed = $closer_potentially_starts_at;
$this->tag_name_starts_at = $closer_potentially_starts_at;
if ( $this->bytes_already_parsed >= $doc_length ) {
return false;
}
while ( $this->parse_next_attribute() ) {
continue;
}
if ( $this->bytes_already_parsed >= $doc_length ) {
$this->parser_state = self::STATE_INCOMPLETE_INPUT;
return false;
}
if ( '>' === $html[ $this->bytes_already_parsed ] ) {
++$this->bytes_already_parsed;
return true;
}
}
++$at;
}
return false;
}
Changelog
Version | Description |
---|---|
6.2.0 | Introduced. |
User Contributed Notes
You must log in before being able to contribute a note or feedback.