package de.uni_koeln.spinfo.tesla.component.patterndetector;

import java.util.ArrayList;
import java.util.Collection;
import java.util.HashSet;
import java.util.List;
import java.util.Set;

import org.apache.log4j.Logger;

import de.uni_koeln.spinfo.tesla.annotation.adapter.IOutputAdapter;
import de.uni_koeln.spinfo.tesla.annotation.adapter.InputIterator;
import de.uni_koeln.spinfo.tesla.annotation.adapter.TypeMapping;
import de.uni_koeln.spinfo.tesla.annotation.adapter.tunguska.DefaultTunguskaOutputAdapter;
import de.uni_koeln.spinfo.tesla.component.ngramtree.access.INgramTreeAccessAdapter;
import de.uni_koeln.spinfo.tesla.component.ngramtree.data.INgramTree;
import de.uni_koeln.spinfo.tesla.component.ngramtree.data.INode;
import de.uni_koeln.spinfo.tesla.component.ngramtree.data.IPosition;
import de.uni_koeln.spinfo.tesla.component.util.MatrixLoader;
import de.uni_koeln.spinfo.tesla.roles.core.access.IAnchoredElementAccessAdapter;
import de.uni_koeln.spinfo.tesla.roles.core.data.IAnchoredElement;
import de.uni_koeln.spinfo.tesla.roles.parser.data.IConstituent;
import de.uni_koeln.spinfo.tesla.roles.parser.impl.hibernate.data.Constituent;
import de.uni_koeln.spinfo.tesla.roles.parser.impl.tunguska.access.TunguskaConstituentAccessAdapter.DefaultTunguskaConstituentAccessAdapter;
import de.uni_koeln.spinfo.tesla.runtime.Result;
import de.uni_koeln.spinfo.tesla.runtime.TeslaComponent;
import de.uni_koeln.spinfo.tesla.runtime.component.annotations.AccessAdapter;
import de.uni_koeln.spinfo.tesla.runtime.component.annotations.Author;
import de.uni_koeln.spinfo.tesla.runtime.component.annotations.Component;
import de.uni_koeln.spinfo.tesla.runtime.component.annotations.Configuration;
import de.uni_koeln.spinfo.tesla.runtime.component.annotations.Description;
import de.uni_koeln.spinfo.tesla.runtime.component.annotations.OutputAdapter;
import de.uni_koeln.spinfo.tesla.runtime.component.annotations.RoleDescription;
import de.uni_koeln.spinfo.tesla.runtime.component.annotations.Run;
import de.uni_koeln.spinfo.tesla.runtime.component.annotations.ThreadMode;
import de.uni_koeln.spinfo.tesla.runtime.persistence.Annotation;

@Component(threadMode=ThreadMode.CUSTOM, 
		author=@Author(	author="jhermes", 
						email="jhermes@spinfo.uni-koeln.de", 
						web="http://spinfo.uni-koeln.de", 
						organization="Sprachliche Informationsverarbeitung"),
		description=@Description(name="Pattern Detector",
								 summary="Searches longest or most frequent patterns in suffix trees",
								 bigO="",
								 version="1.0",
								 reusableResults=true))
public class PatternDetectorComponent extends TeslaComponent {
	
	private static final long serialVersionUID = 1214407936563L;

	@Configuration(name = "Minimum occurrence of patterns", defaultValue = "5")
	private int minFreq = 5;

	@Configuration(name = "Minimum length of patterns", defaultValue = "1", description="Choose a number <2 to recieve only the longest patterns or a number >=2 to recieve all patterns that are larger than the number.")
	private int minLength = 1;
	
	@AccessAdapter(role = "de.uni_koeln.spinfo.tesla.roles.core.AnchoredElementGenerator", name = "Sequences", description = "The sequences to align (for instance, sentences)")
	private IAnchoredElementAccessAdapter<IAnchoredElement> sequences;

	@AccessAdapter(role = "de.uni_koeln.spinfo.tesla.roles.core.AnchoredElementGenerator", name = "Sequence Items", description = "The items to align (for instance, words)")
	private IAnchoredElementAccessAdapter<IAnchoredElement> words;

	@AccessAdapter(role = "de.uni_koeln.spinfo.tesla.roles.structure.ngramTreeGenerator", name = "Suffix Tree")
	private INgramTreeAccessAdapter<INgramTree> suffixTree;

	@OutputAdapter(dataObject=Constituent.class, type=DefaultTunguskaOutputAdapter.ProtoStuff.class, name="Patterns", 
			accessAdapterImpl=DefaultTunguskaConstituentAccessAdapter.class,
			description="All patterns detected in the suffix tree")
	@RoleDescription("de.uni_koeln.spinfo.tesla.roles.parser.ConstituentTagger")
	private IOutputAdapter<IConstituent> constituentOut;

	private ArrayList<int[]> sequenceMatrix;
	
	private ArrayList<List<Annotation<IAnchoredElement>>> dataMatrix;
	
	//private ArrayList<Annotation<IAnchoredElement>> sequencesList;
		
	private Logger logger = Logger.getLogger(getClass());

	@Run
	public Result run() throws Exception {
		logger.info("Loading data...");
		MatrixLoader loader = new MatrixLoader();
		InputIterator<String> signals = words.getAllSignalIds();
		ArrayList<String> signalIds = new ArrayList<String>();
		while(signals.hasNext()) {
			signalIds.add(signals.next());
		}
		
		sequenceMatrix = loader.loadData(signalIds, this, sequences, words, 20D);
		dataMatrix = loader.getDataMatrix();
		
		while(signals.hasNext()) {
			signalIds.add(signals.next());
		}
		
		PatternDetector pd = new PatternDetector(suffixTree.getTree());
		Set<INodeChain> longestPatterns = new HashSet<INodeChain>();
		if(minLength<2){
			longestPatterns = pd.getLongestPatterns(minFreq);
		}
		else{
			longestPatterns = pd.getAllPatterns(minFreq, minLength);
		}
		
		List <Annotation<Constituent>> patternsToStore = new ArrayList<Annotation<Constituent>>();
		for (INodeChain iNodeChain : longestPatterns) {
			INode lastNode = iNodeChain.getDeepestNode();
			Collection<IPosition> referencedPositions = lastNode.getReferencedPositions();
			for (IPosition iPosition : referencedPositions) {
				short lastElementIndex = iPosition.getElementIndex();
				int firstElementIndex = lastElementIndex - (iNodeChain.size()-1);
				if(firstElementIndex<0){
					System.out.println("Too short: " + iNodeChain);
					continue;
				}
				int sequenceIndex = iPosition.getSequenceIndex();
				int endIndex = dataMatrix.get(sequenceIndex).get(lastElementIndex).getRightAnchor();
				int beginIndex = dataMatrix.get(sequenceIndex).get(firstElementIndex).getLeftAnchor();
				String signalId = dataMatrix.get(sequenceIndex).get(lastElementIndex).getSignalId();
				Constituent c = new Constituent(iNodeChain.getDeepestNode().getDepth()+"");
				
				Annotation<Constituent> anno = constituentOut.getAnnotationFactory().newAnnotation(beginIndex, endIndex, signalId, c, TypeMapping.SELF_GENERATED);
				patternsToStore.add(anno);
				logger.info("Stored an annotation: " + anno.getSignalContent());
			}
		}	
		DefaultTunguskaOutputAdapter.sort(patternsToStore);
		for (Annotation<Constituent> annotation : patternsToStore) {
			constituentOut.store(annotation);
		}
		constituentOut.close();
				
		if (cancelled()) {
			return Result.CANCELLED;
		}
		return Result.OK;
	}	
	
	@Override
	public boolean cancelled() {
		return super.cancelled();
	}



}
