WP_HTML_Tag_Processor::base_class_next_token(): bool

In this article

This function’s access is marked private. This means it is not intended for use by plugin or theme developers, only in other core functions. It is listed here for completeness.

Internal method which finds the next token in the HTML document.

Description

This method is a protected internal function which implements the logic for finding the next token in a document. It exists so that the parser can update its state without affecting the location of the cursor in the document and without triggering subclass methods for things like next_token(), e.g. when applying patches before searching for the next token.

Return

bool Whether a token was parsed.

Source

private function base_class_next_token() {
	$was_at = $this->bytes_already_parsed;
	$this->after_tag();

	// Don't proceed if there's nothing more to scan.
	if (
		self::STATE_COMPLETE === $this->parser_state ||
		self::STATE_INCOMPLETE_INPUT === $this->parser_state
	) {
		return false;
	}

	/*
	 * The next step in the parsing loop determines the parsing state;
	 * clear it so that state doesn't linger from the previous step.
	 */
	$this->parser_state = self::STATE_READY;

	if ( $this->bytes_already_parsed >= strlen( $this->html ) ) {
		$this->parser_state = self::STATE_COMPLETE;
		return false;
	}

	// Find the next tag if it exists.
	if ( false === $this->parse_next_tag() ) {
		if ( self::STATE_INCOMPLETE_INPUT === $this->parser_state ) {
			$this->bytes_already_parsed = $was_at;
		}

		return false;
	}

	/*
	 * For legacy reasons the rest of this function handles tags and their
	 * attributes. If the processor has reached the end of the document
	 * or if it matched any other token then it should return here to avoid
	 * attempting to process tag-specific syntax.
	 */
	if (
		self::STATE_INCOMPLETE_INPUT !== $this->parser_state &&
		self::STATE_COMPLETE !== $this->parser_state &&
		self::STATE_MATCHED_TAG !== $this->parser_state
	) {
		return true;
	}

	// Parse all of its attributes.
	while ( $this->parse_next_attribute() ) {
		continue;
	}

	// Ensure that the tag closes before the end of the document.
	if (
		self::STATE_INCOMPLETE_INPUT === $this->parser_state ||
		$this->bytes_already_parsed >= strlen( $this->html )
	) {
		// Does this appropriately clear state (parsed attributes)?
		$this->parser_state         = self::STATE_INCOMPLETE_INPUT;
		$this->bytes_already_parsed = $was_at;

		return false;
	}

	$tag_ends_at = strpos( $this->html, '>', $this->bytes_already_parsed );
	if ( false === $tag_ends_at ) {
		$this->parser_state         = self::STATE_INCOMPLETE_INPUT;
		$this->bytes_already_parsed = $was_at;

		return false;
	}
	$this->parser_state         = self::STATE_MATCHED_TAG;
	$this->token_length         = $tag_ends_at - $this->token_starts_at;
	$this->bytes_already_parsed = $tag_ends_at + 1;

	/*
	 * For non-DATA sections which might contain text that looks like HTML tags but
	 * isn't, scan with the appropriate alternative mode. Looking at the first letter
	 * of the tag name as a pre-check avoids a string allocation when it's not needed.
	 */
	$t = $this->html[ $this->tag_name_starts_at ];
	if (
		$this->is_closing_tag ||
		! (
			'i' === $t || 'I' === $t ||
			'n' === $t || 'N' === $t ||
			's' === $t || 'S' === $t ||
			't' === $t || 'T' === $t ||
			'x' === $t || 'X' === $t
		)
	) {
		return true;
	}

	$tag_name = $this->get_tag();

	/*
	 * Preserve the opening tag pointers, as these will be overwritten
	 * when finding the closing tag. They will be reset after finding
	 * the closing to tag to point to the opening of the special atomic
	 * tag sequence.
	 */
	$tag_name_starts_at   = $this->tag_name_starts_at;
	$tag_name_length      = $this->tag_name_length;
	$tag_ends_at          = $this->token_starts_at + $this->token_length;
	$attributes           = $this->attributes;
	$duplicate_attributes = $this->duplicate_attributes;

	// Find the closing tag if necessary.
	$found_closer = false;
	switch ( $tag_name ) {
		case 'SCRIPT':
			$found_closer = $this->skip_script_data();
			break;

		case 'TEXTAREA':
		case 'TITLE':
			$found_closer = $this->skip_rcdata( $tag_name );
			break;

		/*
		 * In the browser this list would include the NOSCRIPT element,
		 * but the Tag Processor is an environment with the scripting
		 * flag disabled, meaning that it needs to descend into the
		 * NOSCRIPT element to be able to properly process what will be
		 * sent to a browser.
		 *
		 * Note that this rule makes HTML5 syntax incompatible with XML,
		 * because the parsing of this token depends on client application.
		 * The NOSCRIPT element cannot be represented in the XHTML syntax.
		 */
		case 'IFRAME':
		case 'NOEMBED':
		case 'NOFRAMES':
		case 'STYLE':
		case 'XMP':
			$found_closer = $this->skip_rawtext( $tag_name );
			break;

		// No other tags should be treated in their entirety here.
		default:
			return true;
	}

	if ( ! $found_closer ) {
		$this->parser_state         = self::STATE_INCOMPLETE_INPUT;
		$this->bytes_already_parsed = $was_at;
		return false;
	}

	/*
	 * The values here look like they reference the opening tag but they reference
	 * the closing tag instead. This is why the opening tag values were stored
	 * above in a variable. It reads confusingly here, but that's because the
	 * functions that skip the contents have moved all the internal cursors past
	 * the inner content of the tag.
	 */
	$this->token_starts_at      = $was_at;
	$this->token_length         = $this->bytes_already_parsed - $this->token_starts_at;
	$this->text_starts_at       = $tag_ends_at + 1;
	$this->text_length          = $this->tag_name_starts_at - $this->text_starts_at;
	$this->tag_name_starts_at   = $tag_name_starts_at;
	$this->tag_name_length      = $tag_name_length;
	$this->attributes           = $attributes;
	$this->duplicate_attributes = $duplicate_attributes;

	return true;
}

Changelog

VersionDescription
6.5.0Introduced.

User Contributed Notes

You must log in before being able to contribute a note or feedback.