// 
// Copyright (c) 2006-2008 Ben Motmans
// 
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
//
// Author(s):
//    Tomas Petricek (http://www.codeproject.com/useritems/ahocorasick.asp)
//    Ben Motmans <ben.motmans@gmail.com>
//

using System;
using System.Collections.Generic;

namespace Anculus.Core
{
	// Aho-Corasick exact set matching algorithm
	// Search time: O(n + m + z), where z is number of pattern occurrences
	//
	// This implementation is loosely based on http://www.codeproject.com/useritems/ahocorasick.asp
	// 
	// String Matching: An Aid to Bibliographic Search - Alfred V. Aho and Margaret J. Corasick (Bell Laboratories) (http://hidden.dankirsh.com/CS549/paper.pdf)
	// 
	// Set Matching and Aho-Corasick Algorithm - Pekka Kilpeläinen (University of Kuopio) (www.cs.uku.fi/~kilpelai/BSA05/lectures/slides04.pdf)
	// http://www-sr.informatik.uni-tuebingen.de/~buehler/AC/AC.html
	public class AhoCorasickSearchAlgorithm : AbstractSetSearchAlgorithm
	{
		public override SearchResult[] SearchAll (string text, int start, int count, params string[] keywords)
		{
			CheckArguments (text, start, count);
			CheckKeywords (keywords);

			List<SearchResult> results = null;
			if (count == int.MaxValue)
				results = new List<SearchResult> ();
			else
				results = new List<SearchResult> (count);
			
			foreach (SearchResult result in SearchIterator (text, start, keywords)) {
				results.Add (result);
				if (results.Count == count)
					break;
			}

			return results.ToArray ();
		}

		public override SearchResult SearchFirst (string text, int start, params string[] keywords)
		{
			CheckArguments (text, start, int.MaxValue);
			CheckKeywords (keywords);
			
			IEnumerator<SearchResult> iter = SearchIterator (text, start, keywords).GetEnumerator ();
			if (iter.MoveNext ())
				return iter.Current;
			return SearchResult.Empty;
		}

		protected static IEnumerable<SearchResult> SearchIterator (string text, int start, params string[] keywords)
		{
			if (start > 0)
				text = text.Substring (start);
			
			Node root = BuildTree (keywords);
			Node ptr = root;
			int index = 0;

			while (index < text.Length) {
				Node trans = null;
				
				while (trans == null) {
					trans = ptr.GetTransition (text[index]);
					
					if (ptr == root)
						break;
					
					if (trans == null)
						ptr = ptr.Failure;
				}
				
				if (trans != null)
					ptr = trans;
				
				foreach (string found in ptr.Results)
					yield return new SearchResult (index - found.Length + 1, found);

				index++;
			}
		}

		protected static Node BuildTree (string[] keywords)
		{
			Node root = new Node (null,' ');
			
			foreach (string keyword in keywords) {
				// add pattern to tree
				Node node = root;
				foreach (char c in keyword) {
					Node newNode = null;
					foreach (Node trans in node.Transitions) {
						if (trans.Char == c) {
							newNode = trans;
							break;
						}
					}

					if (newNode == null)  { 
						newNode = new Node (node, c);
						node.AddTransition (newNode);
					}
					node = newNode;
				}
				node.AddResult (keyword);
			}

			// Find failure functions
			List<Node> nodes = new List<Node> ();
			// level 1 nodes - fail to root node
			foreach (Node node in root.Transitions) {
				node.Failure = root;
				foreach (Node trans in node.Transitions)
					nodes.Add (trans);
			}
			
			// other nodes - using BFS
			while (nodes.Count!=0) {
				List<Node> newNodes = new List<Node> ();
				foreach (Node node in nodes) {
					Node r = node.Parent.Failure;
					char c = node.Char;

					while (r != null && !r.TransitionHash.ContainsKey (c))
						 r = r.Failure;

					if (r == null) {
						node.Failure = root;
					} else {
						node.Failure = r.TransitionHash[c];
						foreach (string result in node.Failure.Results)
							node.AddResult (result);
					}
  
					// add child nodes to BFS list 
					foreach (Node child in node.Transitions)
						newNodes.Add (child);
				}
				nodes=newNodes;
			}
			root.Failure = root;	
			return root;
		}
		
		protected class Node
		{
			internal char Char;
			internal Node Parent;
			internal Node Failure;
			
			internal IList<string> Results;
			
			internal Node[] Transitions;
			internal Dictionary<char, Node> TransitionHash;
			
			public Node (Node parent, char c)
			{
				Char=c;
				Parent=parent;
				
				Results = new List<string> ();

				Transitions = new Node[] {};
				TransitionHash = new Dictionary<char, Node> ();
			}

			public void AddResult (string result)
			{
				if (Results.Contains (result))
					return;

				Results.Add (result);
			}

			public void AddTransition (Node node)
			{
				TransitionHash.Add (node.Char, node);
				Transitions = new Node[TransitionHash.Values.Count];
				TransitionHash.Values.CopyTo (Transitions, 0);
			}
			
			public Node GetTransition (char c)
			{
				Node node = null;
				if (TransitionHash.TryGetValue (c, out node))
					return node;
				return null;
			}
		}
	}
}
