WP_HTML_Tag_Processor::parse_next_attribute(): bool

In this article

This function’s access is marked private. This means it is not intended for use by plugin or theme developers, only in other core functions. It is listed here for completeness.

Parses the next attribute.

Return

bool Whether an attribute was found before the end of the document.

Source

	/*
	 * </> is a missing end tag name, which is ignored.
	 *
	 * This was also known as the "presumptuous empty tag"
	 * in early discussions as it was proposed to close
	 * the nearest previous opening tag.
	 *
	 * See https://html.spec.whatwg.org/#parse-error-missing-end-tag-name
	 */
	if ( '>' === $html[ $at + 1 ] ) {
		// `<>` is interpreted as plaintext.
		if ( ! $this->is_closing_tag ) {
			++$at;
			continue;
		}

		$this->parser_state         = self::STATE_PRESUMPTUOUS_TAG;
		$this->token_length         = $at + 2 - $this->token_starts_at;
		$this->bytes_already_parsed = $at + 2;
		return true;
	}

	/*
	 * `<?` transitions to a bogus comment state – skip to the nearest >
	 * See https://html.spec.whatwg.org/multipage/parsing.html#tag-open-state
	 */
	if ( ! $this->is_closing_tag && '?' === $html[ $at + 1 ] ) {
		$closer_at = strpos( $html, '>', $at + 2 );
		if ( false === $closer_at ) {
			$this->parser_state = self::STATE_INCOMPLETE_INPUT;

			return false;
		}

		$this->parser_state         = self::STATE_COMMENT;
		$this->comment_type         = self::COMMENT_AS_INVALID_HTML;
		$this->token_length         = $closer_at + 1 - $this->token_starts_at;
		$this->text_starts_at       = $this->token_starts_at + 2;
		$this->text_length          = $closer_at - $this->text_starts_at;
		$this->bytes_already_parsed = $closer_at + 1;

		/*
		 * Identify a Processing Instruction node were HTML to have them.
		 *
		 * This section must occur after identifying the bogus comment end
		 * because in an HTML parser it will span to the nearest `>`, even
		 * if there's no `?>` as would be required in an XML document. It
		 * is therefore not possible to parse a Processing Instruction node
		 * containing a `>` in the HTML syntax.
		 *
		 * XML allows for more target names, but this code only identifies
		 * those with ASCII-representable target names. This means that it
		 * may identify some Processing Instruction nodes as bogus comments,
		 * but it will not misinterpret the HTML structure. By limiting the
		 * identification to these target names the Tag Processor can avoid
		 * the need to start parsing UTF-8 sequences.
		 *
		 * > NameStartChar ::= ":" | [A-Z] | "_" | [a-z] | [#xC0-#xD6] | [#xD8-#xF6] | [#xF8-#x2FF] |
		 *                     [#x370-#x37D] | [#x37F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F] |
		 *                     [#x2C00-#x2FEF] | [#x3001-#xD7FF] | [#xF900-#xFDCF] | [#xFDF0-#xFFFD] |
		 *                     [#x10000-#xEFFFF]
		 * > NameChar      ::= NameStartChar | "-" | "." | [0-9] | #xB7 | [#x0300-#x036F] | [#x203F-#x2040]
		 *
		 * @todo Processing instruction nodes in SGML may contain any kind of markup. XML defines a
		 *       special case with `<?xml ... ?>` syntax, but the `?` is part of the bogus comment.
		 *
		 * @see https://www.w3.org/TR/2006/REC-xml11-20060816/#NT-PITarget
		 */
		if ( $this->token_length >= 5 && '?' === $html[ $closer_at - 1 ] ) {
			$comment_text     = substr( $html, $this->token_starts_at + 2, $this->token_length - 4 );
			$pi_target_length = strspn( $comment_text, 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ:_' );

			if ( 0 < $pi_target_length ) {
				$pi_target_length += strspn( $comment_text, 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789:_-.', $pi_target_length );

				$this->comment_type       = self::COMMENT_AS_PI_NODE_LOOKALIKE;
				$this->tag_name_starts_at = $this->token_starts_at + 2;
				$this->tag_name_length    = $pi_target_length;
				$this->text_starts_at    += $pi_target_length;
				$this->text_length       -= $pi_target_length + 1;
			}
		}

		return true;
	}

	/*
	 * If a non-alpha starts the tag name in a tag closer it's a comment.
	 * Find the first `>`, which closes the comment.
	 *
	 * This parser classifies these particular comments as special "funky comments"
	 * which are made available for further processing.
	 *
	 * See https://html.spec.whatwg.org/#parse-error-invalid-first-character-of-tag-name
	 */
	if ( $this->is_closing_tag ) {
		// No chance of finding a closer.
		if ( $at + 3 > $doc_length ) {
			$this->parser_state = self::STATE_INCOMPLETE_INPUT;

			return false;
		}

		$closer_at = strpos( $html, '>', $at + 2 );
		if ( false === $closer_at ) {
			$this->parser_state = self::STATE_INCOMPLETE_INPUT;

			return false;
		}

		$this->parser_state         = self::STATE_FUNKY_COMMENT;
		$this->token_length         = $closer_at + 1 - $this->token_starts_at;
		$this->text_starts_at       = $this->token_starts_at + 2;
		$this->text_length          = $closer_at - $this->text_starts_at;
		$this->bytes_already_parsed = $closer_at + 1;
		return true;
	}

	++$at;
}

/*
 * This does not imply an incomplete parse; it indicates that there
 * can be nothing left in the document other than a #text node.
 */

Changelog

VersionDescription
6.2.0Introduced.

User Contributed Notes

You must log in before being able to contribute a note or feedback.