<?php
/**
 * HUBzero CMS
 *
 * Copyright 2009-2014 Purdue University. All rights reserved.
 *
 * This file is part of: The HUBzero(R) Platform for Scientific Collaboration
 *
 * The HUBzero(R) Platform for Scientific Collaboration (HUBzero) is free
 * software: you can redistribute it and/or modify it under the terms of
 * the GNU Lesser General Public License as published by the Free Software
 * Foundation, either version 3 of the License, or (at your option) any
 * later version.
 *
 * HUBzero is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 *
 * HUBzero is a registered trademark of Purdue University.
 *
 * @package   hubzero-cms
 * @author    Shawn Rice <zooley@purdue.edu>
 * @copyright Copyright 2009-2014 Purdue University. All rights reserved.
 * @license   http://www.gnu.org/licenses/lgpl-3.0.html LGPLv3
 */

namespace Hubzero\Utility;

/**
 * String handling methods.
 * 
 * Largely inspired by CakePHP (http://cakephp.org) and Zend (http://framework.zend.com)
 */
class String 
{
	/**
	 * Replaces variable placeholders inside a $str with any given $data. Each key in the $data array
	 * corresponds to a variable placeholder name in $str.
	 * Example: `String::insert(':name is :age years old.', array('name' => 'Bob', '65'));`
	 * Returns: Bob is 65 years old.
	 *
	 * Available $options are:
	 *
	 * - before: The character or string in front of the name of the variable placeholder (Defaults to `:`)
	 * - after: The character or string after the name of the variable placeholder (Defaults to null)
	 * - escape: The character or string used to escape the before character / string (Defaults to `\`)
	 * - format: A regex to use for matching variable placeholders. Default is: `/(?<!\\)\:%s/`
	 *   (Overwrites before, after, breaks escape / clean)
	 * - clean: A boolean or array with instructions for String::cleanInsert
	 *
	 * @param  string $str     A string containing variable placeholders
	 * @param  array  $data    A key => val array where each key stands for a placeholder to be replaced with val
	 * @param  array  $options An array of options, see description above
	 * @return string
	 */
	public static function insert($str, $data, $options = array()) 
	{
		$defaults = array(
			'before' => ':', 
			'after'  => null, 
			'escape' => '\\', 
			'format' => null, 
			'clean'  => false
		);
		$options += $defaults;
		$format = $options['format'];
		$data   = (array)$data;
		if (empty($data)) 
		{
			return ($options['clean']) ? self::cleanInsert($str, $options) : $str;
		}

		if (!isset($format)) 
		{
			$format = sprintf(
				'/(?<!%s)%s%%s%s/',
				preg_quote($options['escape'], '/'),
				str_replace('%', '%%', preg_quote($options['before'], '/')),
				str_replace('%', '%%', preg_quote($options['after'], '/'))
			);
		}

		if (strpos($str, '?') !== false && is_numeric(key($data))) 
		{
			$offset = 0;
			while (($pos = strpos($str, '?', $offset)) !== false) 
			{
				$val = array_shift($data);
				$offset = $pos + strlen($val);
				$str = substr_replace($str, $val, $pos, 1);
			}
			return ($options['clean']) ? self::cleanInsert($str, $options) : $str;
		}

		asort($data);

		$dataKeys = array_keys($data);
		$hashKeys = array_map('crc32', $dataKeys);
		$tempData = array_combine($dataKeys, $hashKeys);
		krsort($tempData);

		foreach ($tempData as $key => $hashVal) 
		{
			$key = sprintf($format, preg_quote($key, '/'));
			$str = preg_replace($key, $hashVal, $str);
		}
		$dataReplacements = array_combine($hashKeys, array_values($data));
		foreach ($dataReplacements as $tmpHash => $tmpValue) 
		{
			$tmpValue = (is_array($tmpValue)) ? '' : $tmpValue;
			$str = str_replace($tmpHash, $tmpValue, $str);
		}

		if (!isset($options['format']) && isset($options['before'])) 
		{
			$str = str_replace($options['escape'] . $options['before'], $options['before'], $str);
		}
		return ($options['clean']) ? self::cleanInsert($str, $options) : $str;
	}

	/**
	 * Cleans up a String::insert() formatted string with given $options depending on the 'clean' key in
	 * $options. The default method used is text but html is also available. The goal of this function
	 * is to replace all whitespace and unneeded markup around placeholders that did not get replaced
	 * by String::insert().
	 *
	 * @param  string $str
	 * @param  array  $options
	 * @return string
	 * @see String::insert()
	 */
	public static function cleanInsert($str, $options) 
	{
		$clean = $options['clean'];
		if (!$clean) 
		{
			return $str;
		}
		if ($clean === true) 
		{
			$clean = array('method' => 'text');
		}
		if (!is_array($clean)) 
		{
			$clean = array('method' => $options['clean']);
		}
		switch ($clean['method']) 
		{
			case 'html':
				$clean = array_merge(array(
					'word' => '[\w,.]+',
					'andText' => true,
					'replacement' => '',
				), $clean);
				$kleenex = sprintf(
					'/[\s]*[a-z]+=(")(%s%s%s[\s]*)+\\1/i',
					preg_quote($options['before'], '/'),
					$clean['word'],
					preg_quote($options['after'], '/')
				);
				$str = preg_replace($kleenex, $clean['replacement'], $str);
				if ($clean['andText']) 
				{
					$options['clean'] = array('method' => 'text');
					$str = self::cleanInsert($str, $options);
				}
			break;

			case 'text':
				$clean = array_merge(array(
					'word' => '[\w,.]+',
					'gap' => '[\s]*(?:(?:and|or)[\s]*)?',
					'replacement' => '',
				), $clean);

				$kleenex = sprintf(
					'/(%s%s%s%s|%s%s%s%s)/',
					preg_quote($options['before'], '/'),
					$clean['word'],
					preg_quote($options['after'], '/'),
					$clean['gap'],
					$clean['gap'],
					preg_quote($options['before'], '/'),
					$clean['word'],
					preg_quote($options['after'], '/')
				);
				$str = preg_replace($kleenex, $clean['replacement'], $str);
			break;
		}
		return $str;
	}

	/**
	 * Highlights a given phrase in a text. You can specify any expression in highlighter that
	 * may include the \1 expression to include the $phrase found.
	 *
	 * ### Options:
	 *
	 * - `format` The piece of html with that the phrase will be highlighted
	 * - `html` If true, will ignore any HTML tags, ensuring that only the correct text is highlighted
	 * - `regex` a custom regex rule that is used to match words, default is '|$tag|iu'
	 *
	 * @param  string $text    Text to search the phrase in
	 * @param  string $phrase  The phrase that will be searched
	 * @param  array  $options An array of html attributes and options.
	 * @return string The highlighted text
	 */
	public static function highlight($text, $phrase, $options = array()) 
	{
		if (empty($phrase)) 
		{
			return $text;
		}

		$default = array(
			'format' => '<span class="highlight">\1</span>',
			'html'   => false,
			'regex'  => "|%s|iu"
		);
		$options = array_merge($default, $options);
		extract($options);

		if (is_array($phrase)) 
		{
			$replace = array();
			$with    = array();

			foreach ($phrase as $key => $segment) 
			{
				$segment = '(' . preg_quote($segment, '|') . ')';
				if ($html) 
				{
					$segment = "(?![^<]+>)$segment(?![^<]+>)";
				}

				$with[]    = (is_array($format)) ? $format[$key] : $format;
				$replace[] = sprintf($options['regex'], $segment);
			}

			return preg_replace($replace, $with, $text);
		}

		$phrase = '(' . preg_quote($phrase, '|') . ')';
		if ($html) 
		{
			$phrase = "(?![^<]+>)$phrase(?![^<]+>)";
		}

		return preg_replace(sprintf($options['regex'], $phrase), $format, $text);
	}

	/**
	 * Truncates text starting from the end.
	 *
	 * Cuts a string to the length of $length and replaces the first characters
	 * with the ellipsis if the text is longer than length.
	 *
	 * ### Options:
	 *
	 * - `ellipsis` Will be used as Beginning and prepended to the trimmed string
	 * - `exact` If false, $text will not be cut mid-word
	 *
	 * @param  string  $text    String to truncate.
	 * @param  integer $length  Length of returned string, including ellipsis.
	 * @param  array   $options An array of options.
	 * @return string  Trimmed string.
	 */
	public static function tail($text, $length = 100, $options = array()) 
	{
		$default = array(
			'ellipsis' => '...', 
			'exact'    => true
		);
		$options = array_merge($default, $options);
		extract($options);

		if (!function_exists('mb_strlen')) 
		{
			class_exists('Multibyte');
		}

		if (mb_strlen($text) <= $length) 
		{
			return $text;
		}

		$truncate = mb_substr($text, mb_strlen($text) - $length + mb_strlen($ellipsis));
		if (!$exact) 
		{
			$spacepos = mb_strpos($truncate, ' ');
			$truncate = $spacepos === false ? '' : trim(mb_substr($truncate, $spacepos));
		}

		return $ellipsis . $truncate;
	}

	/**
	 * Truncates text.
	 *
	 * Cuts a string to the length of $length and replaces the last characters
	 * with the ellipsis if the text is longer than length.
	 *
	 * ### Options:
	 *
	 * - `ellipsis` Will be used as Ending and appended to the trimmed string (`ending` is deprecated)
	 * - `exact` If false, $text will not be cut mid-word
	 * - `html` If true, HTML tags would be handled correctly
	 *
	 * @param  string  $text    String to truncate.
	 * @param  integer $length  Length of returned string, including ellipsis.
	 * @param  array   $options An array of html attributes and options.
	 * @return string  Trimmed string.
	 */
	public static function truncate($text, $length = 100, $options = array()) 
	{
		$default = array(
			'ellipsis' => '...', 
			'exact'    => false, 
			'html'     => false
		);
		if (isset($options['ending'])) 
		{
			$default['ellipsis'] = $options['ending'];
		} 
		elseif (!empty($options['html'])) // && Configure::read('App.encoding') === 'UTF-8') 
		{
			$default['ellipsis'] = "\xe2\x80\xa6";
		}
		$options = array_merge($default, $options);
		extract($options);

		if (!function_exists('mb_strlen')) 
		{
			class_exists('Multibyte');
		}

		if ($html) 
		{
			if (mb_strlen(preg_replace('/<.*?>/', '', $text)) <= $length) 
			{
				return $text;
			}
			$totalLength = mb_strlen(strip_tags($ellipsis));
			$openTags = array();
			$truncate = '';

			preg_match_all('/(<\/?([\w+]+)[^>]*>)?([^<>]*)/', $text, $tags, PREG_SET_ORDER);
			foreach ($tags as $tag) 
			{
				if (!preg_match('/img|br|input|hr|area|base|basefont|col|frame|isindex|link|meta|param/s', $tag[2])) 
				{
					if (preg_match('/<[\w]+[^>]*>/s', $tag[0])) 
					{
						array_unshift($openTags, $tag[2]);
					} 
					elseif (preg_match('/<\/([\w]+)[^>]*>/s', $tag[0], $closeTag)) 
					{
						$pos = array_search($closeTag[1], $openTags);
						if ($pos !== false) 
						{
							array_splice($openTags, $pos, 1);
						}
					}
				}
				$truncate .= $tag[1];

				$contentLength = mb_strlen(preg_replace('/&[0-9a-z]{2,8};|&#[0-9]{1,7};|&#x[0-9a-f]{1,6};/i', ' ', $tag[3]));
				if ($contentLength + $totalLength > $length) 
				{
					$left = $length - $totalLength;
					$entitiesLength = 0;
					if (preg_match_all('/&[0-9a-z]{2,8};|&#[0-9]{1,7};|&#x[0-9a-f]{1,6};/i', $tag[3], $entities, PREG_OFFSET_CAPTURE)) 
					{
						foreach ($entities[0] as $entity) 
						{
							if ($entity[1] + 1 - $entitiesLength <= $left) 
							{
								$left--;
								$entitiesLength += mb_strlen($entity[0]);
							} 
							else 
							{
								break;
							}
						}
					}

					$truncate .= mb_substr($tag[3], 0, $left + $entitiesLength);
					break;
				} 
				else 
				{
					$truncate .= $tag[3];
					$totalLength += $contentLength;
				}
				if ($totalLength >= $length) 
				{
					break;
				}
			}
		} 
		else 
		{
			if (mb_strlen($text) <= $length) 
			{
				return $text;
			}
			$truncate = mb_substr($text, 0, $length - mb_strlen($ellipsis));
		}
		if (!$exact) 
		{
			$spacepos = mb_strrpos($truncate, ' ');
			if ($html) 
			{
				$truncateCheck = mb_substr($truncate, 0, $spacepos);
				$lastOpenTag   = mb_strrpos($truncateCheck, '<');
				$lastCloseTag  = mb_strrpos($truncateCheck, '>');
				if ($lastOpenTag > $lastCloseTag) 
				{
					preg_match_all('/<[\w]+[^>]*>/s', $truncate, $lastTagMatches);
					$lastTag  = array_pop($lastTagMatches[0]);
					$spacepos = mb_strrpos($truncate, $lastTag) + mb_strlen($lastTag);
				}
				$bits = mb_substr($truncate, $spacepos);
				preg_match_all('/<\/([a-z]+)>/', $bits, $droppedTags, PREG_SET_ORDER);
				if (!empty($droppedTags)) 
				{
					if (!empty($openTags)) 
					{
						foreach ($droppedTags as $closingTag) 
						{
							if (!in_array($closingTag[1], $openTags)) 
							{
								array_unshift($openTags, $closingTag[1]);
							}
						}
					} 
					else 
					{
						foreach ($droppedTags as $closingTag) 
						{
							$openTags[] = $closingTag[1];
						}
					}
				}
			}
			$truncate = mb_substr($truncate, 0, $spacepos);
		}
		$truncate .= $ellipsis;

		if ($html) 
		{
			foreach ($openTags as $tag) 
			{
				$truncate .= '</' . $tag . '>';
			}
		}

		return $truncate;
	}

	/**
	 * Extracts an excerpt from the text surrounding the phrase with a number of characters on each side
	 * determined by radius.
	 *
	 * @param  string  $text     String to search the phrase in
	 * @param  string  $phrase   Phrase that will be searched for
	 * @param  integer $radius   The amount of characters that will be returned on each side of the founded phrase
	 * @param  string  $ellipsis Ending that will be appended
	 * @return string  Modified string
	 */
	public static function excerpt($text, $phrase, $radius = 100, $ellipsis = '...') 
	{
		if (empty($text) || empty($phrase)) 
		{
			return self::truncate($text, $radius * 2, array('ellipsis' => $ellipsis));
		}

		$append = $prepend = $ellipsis;

		$phraseLen = mb_strlen($phrase);
		$textLen = mb_strlen($text);

		$pos = mb_strpos(mb_strtolower($text), mb_strtolower($phrase));
		if ($pos === false) 
		{
			return mb_substr($text, 0, $radius) . $ellipsis;
		}

		$startPos = $pos - $radius;
		if ($startPos <= 0) 
		{
			$startPos = 0;
			$prepend  = '';
		}

		$endPos = $pos + $phraseLen + $radius;
		if ($endPos >= $textLen) 
		{
			$endPos = $textLen;
			$append = '';
		}

		$excerpt = mb_substr($text, $startPos, $endPos - $startPos);
		$excerpt = $prepend . $excerpt . $append;

		return $excerpt;
	}

	/**
	 * Obfuscate a string to prevent spam-bots from sniffing it.
	 *
	 * @param  string  $value
	 * @return string
	 */
	public static function obfuscate($value)
	{
		$safe = '';

		foreach (str_split($value) as $letter)
		{
			// To properly obfuscate the value, we will randomly convert each letter to
			// its entity or hexadecimal representation, keeping a bot from sniffing
			// the randomly obfuscated letters out of the string on the responses.
			switch (rand(1, 3))
			{
				case 1:
					$safe .= '&#' . ord($letter) . ';'; break;

				case 2:
					$safe .= '&#x' . dechex(ord($letter)) . ';'; break;

				case 3:
					$safe .= $letter;
			}
		}

		return $safe;
	}

	/**
	 * Format a number by prefixing a character to a specificed length.
	 * 
	 * @param      integer $value  Number to format
	 * @param      integer $length Final string length
	 * @param      integer $prfx   Character to prepend
	 * @return     string
	 */
	public static function pad($value, $length = 5, $prfx = 0)
	{
		$pre = '';

		if (is_numeric($value) && $value < 0) 
		{
			$pre = 'n';
			$value = abs($value);
		}

		while (strlen($value) < $length)
		{
			$value = $prfx . "$value";
		}
		return $pre . $value;
	}
}
