Plural_Forms::parse( string $str )

In this article

Parse a Plural-Forms string into tokens.

Description

Uses the shunting-yard algorithm to convert the string to Reverse Polish Notation tokens.

Parameters

$strstringrequired
String to parse.

Source

protected function parse( $str ) {
	$pos = 0;
	$len = strlen( $str );

	// Convert infix operators to postfix using the shunting-yard algorithm.
	$output = array();
	$stack  = array();
	while ( $pos < $len ) {
		$next = substr( $str, $pos, 1 );

		switch ( $next ) {
			// Ignore whitespace.
			case ' ':
			case "\t":
				++$pos;
				break;

			// Variable (n).
			case 'n':
				$output[] = array( 'var' );
				++$pos;
				break;

			// Parentheses.
			case '(':
				$stack[] = $next;
				++$pos;
				break;

			case ')':
				$found = false;
				while ( ! empty( $stack ) ) {
					$o2 = $stack[ count( $stack ) - 1 ];
					if ( '(' !== $o2 ) {
						$output[] = array( 'op', array_pop( $stack ) );
						continue;
					}

					// Discard open paren.
					array_pop( $stack );
					$found = true;
					break;
				}

				if ( ! $found ) {
					throw new Exception( 'Mismatched parentheses' );
				}

				++$pos;
				break;

			// Operators.
			case '|':
			case '&':
			case '>':
			case '<':
			case '!':
			case '=':
			case '%':
			case '?':
				$end_operator = strspn( $str, self::OP_CHARS, $pos );
				$operator     = substr( $str, $pos, $end_operator );
				if ( ! array_key_exists( $operator, self::$op_precedence ) ) {
					throw new Exception( sprintf( 'Unknown operator "%s"', $operator ) );
				}

				while ( ! empty( $stack ) ) {
					$o2 = $stack[ count( $stack ) - 1 ];

					// Ternary is right-associative in C.
					if ( '?:' === $operator || '?' === $operator ) {
						if ( self::$op_precedence[ $operator ] >= self::$op_precedence[ $o2 ] ) {
							break;
						}
					} elseif ( self::$op_precedence[ $operator ] > self::$op_precedence[ $o2 ] ) {
						break;
					}

					$output[] = array( 'op', array_pop( $stack ) );
				}
				$stack[] = $operator;

				$pos += $end_operator;
				break;

			// Ternary "else".
			case ':':
				$found = false;
				$s_pos = count( $stack ) - 1;
				while ( $s_pos >= 0 ) {
					$o2 = $stack[ $s_pos ];
					if ( '?' !== $o2 ) {
						$output[] = array( 'op', array_pop( $stack ) );
						--$s_pos;
						continue;
					}

					// Replace.
					$stack[ $s_pos ] = '?:';
					$found           = true;
					break;
				}

				if ( ! $found ) {
					throw new Exception( 'Missing starting "?" ternary operator' );
				}
				++$pos;
				break;

			// Default - number or invalid.
			default:
				if ( $next >= '0' && $next <= '9' ) {
					$span     = strspn( $str, self::NUM_CHARS, $pos );
					$output[] = array( 'value', intval( substr( $str, $pos, $span ) ) );
					$pos     += $span;
					break;
				}

				throw new Exception( sprintf( 'Unknown symbol "%s"', $next ) );
		}
	}

	while ( ! empty( $stack ) ) {
		$o2 = array_pop( $stack );
		if ( '(' === $o2 || ')' === $o2 ) {
			throw new Exception( 'Mismatched parentheses' );
		}

		$output[] = array( 'op', $o2 );
	}

	$this->tokens = $output;
}

Changelog

VersionDescription
4.9.0Introduced.

User Contributed Notes

You must log in before being able to contribute a note or feedback.