WP_HTML_Tag_Processor::base_class_next_token(): bool

In this article

This function’s access is marked private. This means it is not intended for use by plugin or theme developers, only in other core functions. It is listed here for completeness.

Internal method which finds the next token in the HTML document.

Description

This method is a protected internal function which implements the logic for finding the next token in a document. It exists so that the parser can update its state without affecting the location of the cursor in the document and without triggering subclass methods for things like next_token(), e.g. when applying patches before searching for the next token.

Return

bool Whether a token was parsed.

Source


	$this->parsing_namespace = $new_namespace;
	return true;
}

/**
 * Finds the next tag matching the $query.
 *
 * @since 6.2.0
 * @since 6.5.0 No longer processes incomplete tokens at end of document; pauses the processor at start of token.
 *
 * @param array|string|null $query {
 *     Optional. Which tag name to find, having which class, etc. Default is to find any tag.
 *
 *     @type string|null $tag_name     Which tag to find, or `null` for "any tag."
 *     @type int|null    $match_offset Find the Nth tag matching all search criteria.
 *                                     1 for "first" tag, 3 for "third," etc.
 *                                     Defaults to first tag.
 *     @type string|null $class_name   Tag must contain this whole class name to match.
 *     @type string|null $tag_closers  "visit" or "skip": whether to stop on tag closers, e.g. </div>.
 * }
 * @return bool Whether a tag was matched.
 */
public function next_tag( $query = null ): bool {
	$this->parse_query( $query );
	$already_found = 0;

	do {
		if ( false === $this->next_token() ) {
			return false;
		}

		if ( self::STATE_MATCHED_TAG !== $this->parser_state ) {
			continue;
		}

		if ( $this->matches() ) {
			++$already_found;
		}
	} while ( $already_found < $this->sought_match_offset );

	return true;
}

/**
 * Finds the next token in the HTML document.
 *
 * An HTML document can be viewed as a stream of tokens,
 * where tokens are things like HTML tags, HTML comments,
 * text nodes, etc. This method finds the next token in
 * the HTML document and returns whether it found one.
 *
 * If it starts parsing a token and reaches the end of the
 * document then it will seek to the start of the last
 * token and pause, returning `false` to indicate that it
 * failed to find a complete token.
 *
 * Possible token types, based on the HTML specification:
 *
 *  - an HTML tag, whether opening, closing, or void.
 *  - a text node - the plaintext inside tags.
 *  - an HTML comment.
 *  - a DOCTYPE declaration.
 *  - a processing instruction, e.g. `<?xml version="1.0" ?>`.
 *
 * The Tag Processor currently only supports the tag token.
 *
 * @since 6.5.0
 * @since 6.7.0 Recognizes CDATA sections within foreign content.
 *
 * @return bool Whether a token was parsed.
 */
public function next_token(): bool {
	return $this->base_class_next_token();
}

/**
 * Internal method which finds the next token in the HTML document.
 *
 * This method is a protected internal function which implements the logic for
 * finding the next token in a document. It exists so that the parser can update
 * its state without affecting the location of the cursor in the document and
 * without triggering subclass methods for things like `next_token()`, e.g. when
 * applying patches before searching for the next token.
 *
 * @since 6.5.0
 *
 * @access private
 *
 * @return bool Whether a token was parsed.
 */
private function base_class_next_token(): bool {
	$was_at = $this->bytes_already_parsed;
	$this->after_tag();

	// Don't proceed if there's nothing more to scan.
	if (
		self::STATE_COMPLETE === $this->parser_state ||
		self::STATE_INCOMPLETE_INPUT === $this->parser_state
	) {
		return false;
	}

	/*
	 * The next step in the parsing loop determines the parsing state;
	 * clear it so that state doesn't linger from the previous step.
	 */
	$this->parser_state = self::STATE_READY;

	if ( $this->bytes_already_parsed >= strlen( $this->html ) ) {
		$this->parser_state = self::STATE_COMPLETE;
		return false;
	}

	// Find the next tag if it exists.
	if ( false === $this->parse_next_tag() ) {
		if ( self::STATE_INCOMPLETE_INPUT === $this->parser_state ) {
			$this->bytes_already_parsed = $was_at;
		}

		return false;
	}

	/*
	 * For legacy reasons the rest of this function handles tags and their
	 * attributes. If the processor has reached the end of the document
	 * or if it matched any other token then it should return here to avoid
	 * attempting to process tag-specific syntax.
	 */
	if (
		self::STATE_INCOMPLETE_INPUT !== $this->parser_state &&
		self::STATE_COMPLETE !== $this->parser_state &&
		self::STATE_MATCHED_TAG !== $this->parser_state
	) {
		return true;
	}

	// Parse all of its attributes.
	while ( $this->parse_next_attribute() ) {
		continue;
	}

	// Ensure that the tag closes before the end of the document.
	if (
		self::STATE_INCOMPLETE_INPUT === $this->parser_state ||
		$this->bytes_already_parsed >= strlen( $this->html )
	) {
		// Does this appropriately clear state (parsed attributes)?
		$this->parser_state         = self::STATE_INCOMPLETE_INPUT;
		$this->bytes_already_parsed = $was_at;

		return false;
	}

	$tag_ends_at = strpos( $this->html, '>', $this->bytes_already_parsed );
	if ( false === $tag_ends_at ) {
		$this->parser_state         = self::STATE_INCOMPLETE_INPUT;
		$this->bytes_already_parsed = $was_at;

		return false;
	}
	$this->parser_state         = self::STATE_MATCHED_TAG;
	$this->bytes_already_parsed = $tag_ends_at + 1;
	$this->token_length         = $this->bytes_already_parsed - $this->token_starts_at;

	/*
	 * Certain tags require additional processing. The first-letter pre-check

Changelog

VersionDescription
6.5.0Introduced.

User Contributed Notes

You must log in before being able to contribute a note or feedback.