includes/clientside/tinymce/plugins/spellchecker/classes/utils/JSON.php
author Dan
Tue, 05 Jan 2010 09:53:26 -0500
changeset 1201 9593e62929d1
parent 784 72df14a56a03
permissions -rw-r--r--
Search: some refinements to algorithm, introduced score promotion for phrase matches and Levenshtein distance based score manipulation

<?php
/**
 * $Id: JSON.php 40 2007-06-18 11:43:15Z spocke $
 *
 * @package MCManager.utils
 * @author Moxiecode
 * @copyright Copyright © 2007, Moxiecode Systems AB, All rights reserved.
 */

define('JSON_BOOL', 1);
define('JSON_INT', 2);
define('JSON_STR', 3);
define('JSON_FLOAT', 4);
define('JSON_NULL', 5);
define('JSON_START_OBJ', 6);
define('JSON_END_OBJ', 7);
define('JSON_START_ARRAY', 8);
define('JSON_END_ARRAY', 9);
define('JSON_KEY', 10);
define('JSON_SKIP', 11);

define('JSON_IN_ARRAY', 30);
define('JSON_IN_OBJECT', 40);
define('JSON_IN_BETWEEN', 50);

class Moxiecode_JSONReader {
	var $_data, $_len, $_pos;
	var $_value, $_token;
	var $_location, $_lastLocations;
	var $_needProp;

	function Moxiecode_JSONReader($data) {
		$this->_data = $data;
		$this->_len = strlen($data);
		$this->_pos = -1;
		$this->_location = JSON_IN_BETWEEN;
		$this->_lastLocations = array();
		$this->_needProp = false;
	}

	function getToken() {
		return $this->_token;
	}

	function getLocation() {
		return $this->_location;
	}

	function getTokenName() {
		switch ($this->_token) {
			case JSON_BOOL:
				return 'JSON_BOOL';

			case JSON_INT:
				return 'JSON_INT';

			case JSON_STR:
				return 'JSON_STR';

			case JSON_FLOAT:
				return 'JSON_FLOAT';

			case JSON_NULL:
				return 'JSON_NULL';

			case JSON_START_OBJ:
				return 'JSON_START_OBJ';

			case JSON_END_OBJ:
				return 'JSON_END_OBJ';

			case JSON_START_ARRAY:
				return 'JSON_START_ARRAY';

			case JSON_END_ARRAY:
				return 'JSON_END_ARRAY';

			case JSON_KEY:
				return 'JSON_KEY';
		}

		return 'UNKNOWN';
	}

	function getValue() {
		return $this->_value;
	}

	function readToken() {
		$chr = $this->read();

		if ($chr != null) {
			switch ($chr) {
				case '[':
					$this->_lastLocation[] = $this->_location;
					$this->_location = JSON_IN_ARRAY;
					$this->_token = JSON_START_ARRAY;
					$this->_value = null;
					$this->readAway();
					return true;

				case ']':
					$this->_location = array_pop($this->_lastLocation);
					$this->_token = JSON_END_ARRAY;
					$this->_value = null;
					$this->readAway();

					if ($this->_location == JSON_IN_OBJECT)
						$this->_needProp = true;

					return true;

				case '{':
					$this->_lastLocation[] = $this->_location;
					$this->_location = JSON_IN_OBJECT;
					$this->_needProp = true;
					$this->_token = JSON_START_OBJ;
					$this->_value = null;
					$this->readAway();
					return true;

				case '}':
					$this->_location = array_pop($this->_lastLocation);
					$this->_token = JSON_END_OBJ;
					$this->_value = null;
					$this->readAway();

					if ($this->_location == JSON_IN_OBJECT)
						$this->_needProp = true;

					return true;

				// String
				case '"':
				case '\'':
					return $this->_readString($chr);

				// Null
				case 'n':
					return $this->_readNull();

				// Bool
				case 't':
				case 'f':
					return $this->_readBool($chr);

				default:
					// Is number
					if (is_numeric($chr) || $chr == '-' || $chr == '.')
						return $this->_readNumber($chr);

					return true;
			}
		}

		return false;
	}

	function _readBool($chr) {
		$this->_token = JSON_BOOL;
		$this->_value = $chr == 't';

		if ($chr == 't')
			$this->skip(3); // rue
		else
			$this->skip(4); // alse

		$this->readAway();

		if ($this->_location == JSON_IN_OBJECT && !$this->_needProp)
			$this->_needProp = true;

		return true;
	}

	function _readNull() {
		$this->_token = JSON_NULL;
		$this->_value = null;

		$this->skip(3); // ull
		$this->readAway();

		if ($this->_location == JSON_IN_OBJECT && !$this->_needProp)
			$this->_needProp = true;

		return true;
	}

	function _readString($quote) {
		$output = "";
		$this->_token = JSON_STR;
		$endString = false;

		while (($chr = $this->peek()) != -1) {
			switch ($chr) {
				case '\\':
					// Read away slash
					$this->read();

					// Read escape code
					$chr = $this->read();
					switch ($chr) {
							case 't':
								$output .= "\t";
								break;

							case 'b':
								$output .= "\b";
								break;

							case 'f':
								$output .= "\f";
								break;

							case 'r':
								$output .= "\r";
								break;

							case 'n':
								$output .= "\n";
								break;

							case 'u':
								$output .= $this->_int2utf8(hexdec($this->read(4)));
								break;

							default:
								$output .= $chr;
								break;
					}

					break;

					case '\'':
					case '"':
						if ($chr == $quote)
							$endString = true;

						$chr = $this->read();
						if ($chr != -1 && $chr != $quote)
							$output .= $chr;

						break;

					default:
						$output .= $this->read();
			}

			// String terminated
			if ($endString)
				break;
		}

		$this->readAway();
		$this->_value = $output;

		// Needed a property
		if ($this->_needProp) {
			$this->_token = JSON_KEY;
			$this->_needProp = false;
			return true;
		}

		if ($this->_location == JSON_IN_OBJECT && !$this->_needProp)
			$this->_needProp = true;

		return true;
	}

	function _int2utf8($int) {
		$int = intval($int);

		switch ($int) {
			case 0:
				return chr(0);

			case ($int & 0x7F):
				return chr($int);

			case ($int & 0x7FF):
				return chr(0xC0 | (($int >> 6) & 0x1F)) . chr(0x80 | ($int & 0x3F));

			case ($int & 0xFFFF):
				return chr(0xE0 | (($int >> 12) & 0x0F)) . chr(0x80 | (($int >> 6) & 0x3F)) . chr (0x80 | ($int & 0x3F));

			case ($int & 0x1FFFFF):
				return chr(0xF0 | ($int >> 18)) . chr(0x80 | (($int >> 12) & 0x3F)) . chr(0x80 | (($int >> 6) & 0x3F)) . chr(0x80 | ($int & 0x3F));
		}
	}

	function _readNumber($start) {
		$value = "";
		$isFloat = false;

		$this->_token = JSON_INT;
		$value .= $start;

		while (($chr = $this->peek()) != -1) {
			if (is_numeric($chr) || $chr == '-' || $chr == '.') {
				if ($chr == '.')
					$isFloat = true;

				$value .= $this->read();
			} else
				break;
		}

		$this->readAway();

		if ($isFloat) {
			$this->_token = JSON_FLOAT;
			$this->_value = floatval($value);
		} else
			$this->_value = intval($value);

		if ($this->_location == JSON_IN_OBJECT && !$this->_needProp)
			$this->_needProp = true;

		return true;
	}

	function readAway() {
		while (($chr = $this->peek()) != null) {
			if ($chr != ':' && $chr != ',' && $chr != ' ')
				return;

			$this->read();
		}
	}

	function read($len = 1) {
		if ($this->_pos < $this->_len) {
			if ($len > 1) {
				$str = substr($this->_data, $this->_pos + 1, $len);
				$this->_pos += $len;

				return $str;
			} else
				return $this->_data[++$this->_pos];
		}

		return null;
	}

	function skip($len) {
		$this->_pos += $len;
	}

	function peek() {
		if ($this->_pos < $this->_len)
			return $this->_data[$this->_pos + 1];

		return null;
	}
}

/**
 * This class handles JSON stuff.
 *
 * @package MCManager.utils
 */
class Moxiecode_JSON {
	function Moxiecode_JSON() {
	}

	function decode($input) {
		$reader = new Moxiecode_JSONReader($input);

		return $this->readValue($reader);
	}

	function readValue(&$reader) {
		$this->data = array();
		$this->parents = array();
		$this->cur =& $this->data;
		$key = null;
		$loc = JSON_IN_ARRAY;

		while ($reader->readToken()) {
			switch ($reader->getToken()) {
				case JSON_STR:
				case JSON_INT:
				case JSON_BOOL:
				case JSON_FLOAT:
				case JSON_NULL:
					switch ($reader->getLocation()) {
						case JSON_IN_OBJECT:
							$this->cur[$key] = $reader->getValue();
							break;

						case JSON_IN_ARRAY:
							$this->cur[] = $reader->getValue();
							break;

						default:
							return $reader->getValue();
					}
					break;

				case JSON_KEY:
					$key = $reader->getValue();
					break;

				case JSON_START_OBJ:
				case JSON_START_ARRAY:
					if ($loc == JSON_IN_OBJECT)
						$this->addArray($key);
					else
						$this->addArray(null);

					$cur =& $obj;

					$loc = $reader->getLocation();
					break;

				case JSON_END_OBJ:
				case JSON_END_ARRAY:
					$loc = $reader->getLocation();

					if (count($this->parents) > 0) {
						$this->cur =& $this->parents[count($this->parents) - 1];
						array_pop($this->parents);
					}
					break;
			}
		}

		return $this->data[0];
	}

	// This method was needed since PHP is crapy and doesn't have pointers/references
	function addArray($key) {
		$this->parents[] =& $this->cur;
		$ar = array();

		if ($key)
			$this->cur[$key] =& $ar;
		else
			$this->cur[] =& $ar;

		$this->cur =& $ar;
	}

	function getDelim($index, &$reader) {
		switch ($reader->getLocation()) {
			case JSON_IN_ARRAY:
			case JSON_IN_OBJECT:
				if ($index > 0)
					return ",";
				break;
		}

		return "";
	}

	function encode($input) {
		switch (gettype($input)) {
			case 'boolean':
				return $input ? 'true' : 'false';

			case 'integer':
				return (int) $input;

			case 'float':
			case 'double':
				return (float) $input;

			case 'NULL':
				return 'null';

			case 'string':
				return $this->encodeString($input);

			case 'array':
				return $this->_encodeArray($input);

			case 'object':
				return $this->_encodeArray(get_object_vars($input));
		}

		return '';
	}

	function encodeString($input) {
		// Needs to be escaped
		if (preg_match('/[^a-zA-Z0-9]/', $input)) {
			$output = '';

			for ($i=0; $i<strlen($input); $i++) {
				switch ($input[$i]) {
					case "\b":
						$output .= "\\b";
						break;

					case "\t":
						$output .= "\\t";
						break;

					case "\f":
						$output .= "\\f";
						break;

					case "\r":
						$output .= "\\r";
						break;

					case "\n":
						$output .= "\\n";
						break;

					case '\\':
						$output .= "\\\\";
						break;

					case '\'':
						$output .= "\\'";
						break;

					case '"':
						$output .= '\"';
						break;

					default:
						$byte = ord($input[$i]);

						if (($byte & 0xE0) == 0xC0) {
							$char = pack('C*', $byte, ord($input[$i + 1]));
							$i += 1;
							$output .= sprintf('\u%04s', bin2hex($this->_utf82utf16($char)));
						} if (($byte & 0xF0) == 0xE0) {
							$char = pack('C*', $byte, ord($input[$i + 1]), ord($input[$i + 2]));
							$i += 2;
							$output .= sprintf('\u%04s', bin2hex($this->_utf82utf16($char)));
						} if (($byte & 0xF8) == 0xF0) {
							$char = pack('C*', $byte, ord($input[$i + 1]), ord($input[$i + 2], ord($input[$i + 3])));
							$i += 3;
							$output .= sprintf('\u%04s', bin2hex($this->_utf82utf16($char)));
						} if (($byte & 0xFC) == 0xF8) {
							$char = pack('C*', $byte, ord($input[$i + 1]), ord($input[$i + 2], ord($input[$i + 3]), ord($input[$i + 4])));
							$i += 4;
							$output .= sprintf('\u%04s', bin2hex($this->_utf82utf16($char)));
						} if (($byte & 0xFE) == 0xFC) {
							$char = pack('C*', $byte, ord($input[$i + 1]), ord($input[$i + 2], ord($input[$i + 3]), ord($input[$i + 4]), ord($input[$i + 5])));
							$i += 5;
							$output .= sprintf('\u%04s', bin2hex($this->_utf82utf16($char)));
						} else if ($byte < 128)
							$output .= $input[$i];
				}
			}

			return '"' . $output . '"';
		}

		return '"' . $input . '"';
	}

	function _utf82utf16($utf8) {
		if (function_exists('mb_convert_encoding'))
			return mb_convert_encoding($utf8, 'UTF-16', 'UTF-8');

		switch (strlen($utf8)) {
			case 1:
				return $utf8;

			case 2:
				return chr(0x07 & (ord($utf8[0]) >> 2)) . chr((0xC0 & (ord($utf8[0]) << 6)) | (0x3F & ord($utf8[1])));

			case 3:
				return chr((0xF0 & (ord($utf8[0]) << 4)) | (0x0F & (ord($utf8[1]) >> 2))) . chr((0xC0 & (ord($utf8[1]) << 6)) | (0x7F & ord($utf8[2])));
		}

		return '';
	}

	function _encodeArray($input) {
		$output = '';
		$isIndexed = true;

		$keys = array_keys($input);
		for ($i=0; $i<count($keys); $i++) {
			if (!is_int($keys[$i])) {
				$output .= $this->encodeString($keys[$i]) . ':' . $this->encode($input[$keys[$i]]);
				$isIndexed = false;
			} else
				$output .= $this->encode($input[$keys[$i]]);

			if ($i != count($keys) - 1)
				$output .= ',';
		}

		return $isIndexed ? '[' . $output . ']' : '{' . $output . '}';
	}
}

?>