wptexturize( string $text, bool $reset = false )

Replaces common plain text characters with formatted entities.


Description Description

Returns given text with transformations of quotes into smart quotes, apostrophes, dashes, ellipses, the trademark symbol, and the multiplication symbol.

As an example,

'cause today's effort makes it worth tomorrow's "holiday" ...

Becomes:

’cause today’s effort makes it worth tomorrow’s “holiday” …

Code within certain HTML blocks are skipped.

Do not use this function before the ‘init’ action hook; everything will break.


Top ↑

Parameters Parameters

$text

(string) (Required) The text to be formatted.

$reset

(bool) (Optional) Set to true for unit testing. Translated patterns will reset.

Default value: false


Top ↑

Return Return

(string) The string replaced with HTML entities.


Top ↑

More Information More Information

  • Text enclosed in the tags <pre>, <code>, <kbd>, <style>, <script>, and <tt> will be skipped. This list of tags can be changed with the no_texturize_tags filter.
  • Text in the ‘' shortcode will also be ignored. The list of shortcodes can be changed with the no_texturize_shortcodes filter.
  • Do not use this function before the init action hook. All of the settings must be initialized before the first call to wptexturize or it will fail on every subsequent use.
  • Opening and closing quotes can be customized in a WordPress translation file. Here are some of the text transformations:
source text transformed text symbol name
"---" "—" em-dash
" -- " "—" em-dash
"--" "–" en-dash
" - " "–" en-dash
"..." "…" ellipsis
`` opening quote
"hello “hello opening quote
'hello ‘hello opening quote
'' closing quote
world." world.” closing quote
world.' world.’ closing quote
" (tm)" " ™" trademark symbol
1234" 1234″ double prime symbol
1234' 1234′ prime symbol
'99 ’99 apostrophe before abbreviated year
Webster's Webster’s apostrophe in a word
1234x1234 1234×1234 multiplication symbol
  • There is a small "cockney" list of transformations, as well. They can be replaced if the variable $wp_cockneyreplace is defined and contains an associative array with the keys containing the source strings and the values containing the transformed strings. By default the following strings will be transformed:
  • 'tain't
  • 'twere
  • 'twas
  • 'tis
  • 'twill
  • 'til
  • 'bout
  • 'nuff
  • 'round
  • 'cause


Top ↑

Source Source

File: wp-includes/formatting.php

function wptexturize( $text, $reset = false ) {
	global $wp_cockneyreplace, $shortcode_tags;
	static $static_characters            = null,
		$static_replacements             = null,
		$dynamic_characters              = null,
		$dynamic_replacements            = null,
		$default_no_texturize_tags       = null,
		$default_no_texturize_shortcodes = null,
		$run_texturize                   = true,
		$apos                            = null,
		$prime                           = null,
		$double_prime                    = null,
		$opening_quote                   = null,
		$closing_quote                   = null,
		$opening_single_quote            = null,
		$closing_single_quote            = null,
		$open_q_flag                     = '<!--oq-->',
		$open_sq_flag                    = '<!--osq-->',
		$apos_flag                       = '<!--apos-->';

	// If there's nothing to do, just stop.
	if ( empty( $text ) || false === $run_texturize ) {
		return $text;
	}

	// Set up static variables. Run once only.
	if ( $reset || ! isset( $static_characters ) ) {
		/**
		 * Filters whether to skip running wptexturize().
		 *
		 * Passing false to the filter will effectively short-circuit wptexturize().
		 * returning the original text passed to the function instead.
		 *
		 * The filter runs only once, the first time wptexturize() is called.
		 *
		 * @since 4.0.0
		 *
		 * @see wptexturize()
		 *
		 * @param bool $run_texturize Whether to short-circuit wptexturize().
		 */
		$run_texturize = apply_filters( 'run_wptexturize', $run_texturize );
		if ( false === $run_texturize ) {
			return $text;
		}

		/* translators: Opening curly double quote. */
		$opening_quote = _x( '&#8220;', 'opening curly double quote' );
		/* translators: Closing curly double quote. */
		$closing_quote = _x( '&#8221;', 'closing curly double quote' );

		/* translators: Apostrophe, for example in 'cause or can't. */
		$apos = _x( '&#8217;', 'apostrophe' );

		/* translators: Prime, for example in 9' (nine feet). */
		$prime = _x( '&#8242;', 'prime' );
		/* translators: Double prime, for example in 9" (nine inches). */
		$double_prime = _x( '&#8243;', 'double prime' );

		/* translators: Opening curly single quote. */
		$opening_single_quote = _x( '&#8216;', 'opening curly single quote' );
		/* translators: Closing curly single quote. */
		$closing_single_quote = _x( '&#8217;', 'closing curly single quote' );

		/* translators: En dash. */
		$en_dash = _x( '&#8211;', 'en dash' );
		/* translators: Em dash. */
		$em_dash = _x( '&#8212;', 'em dash' );

		$default_no_texturize_tags       = array( 'pre', 'code', 'kbd', 'style', 'script', 'tt' );
		$default_no_texturize_shortcodes = array( 'code' );

		// If a plugin has provided an autocorrect array, use it.
		if ( isset( $wp_cockneyreplace ) ) {
			$cockney        = array_keys( $wp_cockneyreplace );
			$cockneyreplace = array_values( $wp_cockneyreplace );
		} else {
			/*
			 * translators: This is a comma-separated list of words that defy the syntax of quotations in normal use,
			 * for example... 'We do not have enough words yet'... is a typical quoted phrase. But when we write
			 * lines of code 'til we have enough of 'em, then we need to insert apostrophes instead of quotes.
			 */
			$cockney = explode(
				',',
				_x(
					"'tain't,'twere,'twas,'tis,'twill,'til,'bout,'nuff,'round,'cause,'em",
					'Comma-separated list of words to texturize in your language'
				)
			);

			$cockneyreplace = explode(
				',',
				_x(
					'&#8217;tain&#8217;t,&#8217;twere,&#8217;twas,&#8217;tis,&#8217;twill,&#8217;til,&#8217;bout,&#8217;nuff,&#8217;round,&#8217;cause,&#8217;em',
					'Comma-separated list of replacement words in your language'
				)
			);
		}

		$static_characters   = array_merge( array( '...', '``', '\'\'', ' (tm)' ), $cockney );
		$static_replacements = array_merge( array( '&#8230;', $opening_quote, $closing_quote, ' &#8482;' ), $cockneyreplace );

		// Pattern-based replacements of characters.
		// Sort the remaining patterns into several arrays for performance tuning.
		$dynamic_characters   = array(
			'apos'  => array(),
			'quote' => array(),
			'dash'  => array(),
		);
		$dynamic_replacements = array(
			'apos'  => array(),
			'quote' => array(),
			'dash'  => array(),
		);
		$dynamic              = array();
		$spaces               = wp_spaces_regexp();

		// '99' and '99" are ambiguous among other patterns; assume it's an abbreviated year at the end of a quotation.
		if ( "'" !== $apos || "'" !== $closing_single_quote ) {
			$dynamic[ '/\'(\d\d)\'(?=\Z|[.,:;!?)}\-\]]|&gt;|' . $spaces . ')/' ] = $apos_flag . '$1' . $closing_single_quote;
		}
		if ( "'" !== $apos || '"' !== $closing_quote ) {
			$dynamic[ '/\'(\d\d)"(?=\Z|[.,:;!?)}\-\]]|&gt;|' . $spaces . ')/' ] = $apos_flag . '$1' . $closing_quote;
		}

		// '99 '99s '99's (apostrophe)  But never '9 or '99% or '999 or '99.0.
		if ( "'" !== $apos ) {
			$dynamic['/\'(?=\d\d(?:\Z|(?![%\d]|[.,]\d)))/'] = $apos_flag;
		}

		// Quoted numbers like '0.42'.
		if ( "'" !== $opening_single_quote && "'" !== $closing_single_quote ) {
			$dynamic[ '/(?<=\A|' . $spaces . ')\'(\d[.,\d]*)\'/' ] = $open_sq_flag . '$1' . $closing_single_quote;
		}

		// Single quote at start, or preceded by (, {, <, [, ", -, or spaces.
		if ( "'" !== $opening_single_quote ) {
			$dynamic[ '/(?<=\A|[([{"\-]|&lt;|' . $spaces . ')\'/' ] = $open_sq_flag;
		}

		// Apostrophe in a word. No spaces, double apostrophes, or other punctuation.
		if ( "'" !== $apos ) {
			$dynamic[ '/(?<!' . $spaces . ')\'(?!\Z|[.,:;!?"\'(){}[\]\-]|&[lg]t;|' . $spaces . ')/' ] = $apos_flag;
		}

		$dynamic_characters['apos']   = array_keys( $dynamic );
		$dynamic_replacements['apos'] = array_values( $dynamic );
		$dynamic                      = array();

		// Quoted numbers like "42".
		if ( '"' !== $opening_quote && '"' !== $closing_quote ) {
			$dynamic[ '/(?<=\A|' . $spaces . ')"(\d[.,\d]*)"/' ] = $open_q_flag . '$1' . $closing_quote;
		}

		// Double quote at start, or preceded by (, {, <, [, -, or spaces, and not followed by spaces.
		if ( '"' !== $opening_quote ) {
			$dynamic[ '/(?<=\A|[([{\-]|&lt;|' . $spaces . ')"(?!' . $spaces . ')/' ] = $open_q_flag;
		}

		$dynamic_characters['quote']   = array_keys( $dynamic );
		$dynamic_replacements['quote'] = array_values( $dynamic );
		$dynamic                       = array();

		// Dashes and spaces.
		$dynamic['/---/'] = $em_dash;
		$dynamic[ '/(?<=^|' . $spaces . ')--(?=$|' . $spaces . ')/' ] = $em_dash;
		$dynamic['/(?<!xn)--/']                                       = $en_dash;
		$dynamic[ '/(?<=^|' . $spaces . ')-(?=$|' . $spaces . ')/' ]  = $en_dash;

		$dynamic_characters['dash']   = array_keys( $dynamic );
		$dynamic_replacements['dash'] = array_values( $dynamic );
	}

	// Must do this every time in case plugins use these filters in a context sensitive manner.
	/**
	 * Filters the list of HTML elements not to texturize.
	 *
	 * @since 2.8.0
	 *
	 * @param string[] $default_no_texturize_tags An array of HTML element names.
	 */
	$no_texturize_tags = apply_filters( 'no_texturize_tags', $default_no_texturize_tags );
	/**
	 * Filters the list of shortcodes not to texturize.
	 *
	 * @since 2.8.0
	 *
	 * @param string[] $default_no_texturize_shortcodes An array of shortcode names.
	 */
	$no_texturize_shortcodes = apply_filters( 'no_texturize_shortcodes', $default_no_texturize_shortcodes );

	$no_texturize_tags_stack       = array();
	$no_texturize_shortcodes_stack = array();

	// Look for shortcodes and HTML elements.

	preg_match_all( '@\[/?([^<>&/\[\]\x00-\x20=]++)@', $text, $matches );
	$tagnames         = array_intersect( array_keys( $shortcode_tags ), $matches[1] );
	$found_shortcodes = ! empty( $tagnames );
	$shortcode_regex  = $found_shortcodes ? _get_wptexturize_shortcode_regex( $tagnames ) : '';
	$regex            = _get_wptexturize_split_regex( $shortcode_regex );

	$textarr = preg_split( $regex, $text, -1, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY );

	foreach ( $textarr as &$curl ) {
		// Only call _wptexturize_pushpop_element if $curl is a delimiter.
		$first = $curl[0];
		if ( '<' === $first ) {
			if ( '<!--' === substr( $curl, 0, 4 ) ) {
				// This is an HTML comment delimiter.
				continue;
			} else {
				// This is an HTML element delimiter.

				// Replace each & with &#038; unless it already looks like an entity.
				$curl = preg_replace( '/&(?!#(?:\d+|x[a-f0-9]+);|[a-z1-4]{1,8};)/i', '&#038;', $curl );

				_wptexturize_pushpop_element( $curl, $no_texturize_tags_stack, $no_texturize_tags );
			}
		} elseif ( '' === trim( $curl ) ) {
			// This is a newline between delimiters. Performance improves when we check this.
			continue;

		} elseif ( '[' === $first && $found_shortcodes && 1 === preg_match( '/^' . $shortcode_regex . '$/', $curl ) ) {
			// This is a shortcode delimiter.

			if ( '[[' !== substr( $curl, 0, 2 ) && ']]' !== substr( $curl, -2 ) ) {
				// Looks like a normal shortcode.
				_wptexturize_pushpop_element( $curl, $no_texturize_shortcodes_stack, $no_texturize_shortcodes );
			} else {
				// Looks like an escaped shortcode.
				continue;
			}
		} elseif ( empty( $no_texturize_shortcodes_stack ) && empty( $no_texturize_tags_stack ) ) {
			// This is neither a delimiter, nor is this content inside of no_texturize pairs. Do texturize.

			$curl = str_replace( $static_characters, $static_replacements, $curl );

			if ( false !== strpos( $curl, "'" ) ) {
				$curl = preg_replace( $dynamic_characters['apos'], $dynamic_replacements['apos'], $curl );
				$curl = wptexturize_primes( $curl, "'", $prime, $open_sq_flag, $closing_single_quote );
				$curl = str_replace( $apos_flag, $apos, $curl );
				$curl = str_replace( $open_sq_flag, $opening_single_quote, $curl );
			}
			if ( false !== strpos( $curl, '"' ) ) {
				$curl = preg_replace( $dynamic_characters['quote'], $dynamic_replacements['quote'], $curl );
				$curl = wptexturize_primes( $curl, '"', $double_prime, $open_q_flag, $closing_quote );
				$curl = str_replace( $open_q_flag, $opening_quote, $curl );
			}
			if ( false !== strpos( $curl, '-' ) ) {
				$curl = preg_replace( $dynamic_characters['dash'], $dynamic_replacements['dash'], $curl );
			}

			// 9x9 (times), but never 0x9999.
			if ( 1 === preg_match( '/(?<=\d)x\d/', $curl ) ) {
				// Searching for a digit is 10 times more expensive than for the x, so we avoid doing this one!
				$curl = preg_replace( '/\b(\d(?(?<=0)[\d\.,]+|[\d\.,]*))x(\d[\d\.,]*)\b/', '$1&#215;$2', $curl );
			}

			// Replace each & with &#038; unless it already looks like an entity.
			$curl = preg_replace( '/&(?!#(?:\d+|x[a-f0-9]+);|[a-z1-4]{1,8};)/i', '&#038;', $curl );
		}
	}

	return implode( '', $textarr );
}

Top ↑

Changelog Changelog

Changelog
Version Description
0.71 Introduced.

Top ↑

User Contributed Notes User Contributed Notes

You must log in before being able to contribute a note or feedback.