package de.uni_koeln.spinfo.tesla.component.ngramtree;

import java.text.NumberFormat;
import java.util.ArrayList;

import de.uni_koeln.spinfo.tesla.annotation.adapter.IOutputAdapter;
import de.uni_koeln.spinfo.tesla.annotation.adapter.InputIterator;
import de.uni_koeln.spinfo.tesla.annotation.adapter.TypeMapping;
import de.uni_koeln.spinfo.tesla.annotation.adapter.tunguska.DefaultTunguskaOutputAdapter;
import de.uni_koeln.spinfo.tesla.component.ngramtree.access.impl.NGramTreeAccessAdapter;
import de.uni_koeln.spinfo.tesla.component.ngramtree.data.impl.NGramTree;
import de.uni_koeln.spinfo.tesla.component.util.MatrixLoader;
import de.uni_koeln.spinfo.tesla.roles.core.access.IAnchoredElementAccessAdapter;
import de.uni_koeln.spinfo.tesla.roles.core.data.IAnchoredElement;
import de.uni_koeln.spinfo.tesla.runtime.Result;
import de.uni_koeln.spinfo.tesla.runtime.TeslaComponent;
import de.uni_koeln.spinfo.tesla.runtime.component.annotations.AccessAdapter;
import de.uni_koeln.spinfo.tesla.runtime.component.annotations.Author;
import de.uni_koeln.spinfo.tesla.runtime.component.annotations.Component;
import de.uni_koeln.spinfo.tesla.runtime.component.annotations.Configuration;
import de.uni_koeln.spinfo.tesla.runtime.component.annotations.Description;
import de.uni_koeln.spinfo.tesla.runtime.component.annotations.OutputAdapter;
import de.uni_koeln.spinfo.tesla.runtime.component.annotations.RoleDescription;
import de.uni_koeln.spinfo.tesla.runtime.component.annotations.Run;
import de.uni_koeln.spinfo.tesla.runtime.component.annotations.ThreadMode;
import de.uni_koeln.spinfo.tesla.runtime.persistence.Annotation;
import de.uni_koeln.spinfo.tesla.runtime.persistence.ExecutionReport.TYPE;

@Component(threadMode=ThreadMode.CUSTOM, 
		author=@Author(	author="Stephan Schwiebert", 
				email="sschwieb@spinfo.uni-koeln.de", 
				web="http://www.spinfo.uni-koeln.de/space/sschwieb", 
				organization="Sprachliche Informationsverarbeitung"),
				description=@Description(name="N-Gram Tree Generator",
						summary="This component generates NGram Trees from any items of any sequences." +
								"The tree can either represent suffix- or prefix- N-Grams.",
						bigO="",
						version="1.0",
						reusableResults=true))
						public class NGramTreeGenerator extends TeslaComponent {

	@Configuration(name="Max N-Gram length", defaultValue="5", description="The maximum N-Gram length to analyze. Increasing this value will also increase required CPU and RAM (lots!)")
	protected int maxDepth = 5;


	@AccessAdapter(role="de.uni_koeln.spinfo.tesla.roles.core.AnchoredElementGenerator", name="Sequences", 
			description="The annotations which define the boundaries of a sequence")
			private IAnchoredElementAccessAdapter<IAnchoredElement> sequences;

	@AccessAdapter(role="de.uni_koeln.spinfo.tesla.roles.core.AnchoredElementGenerator", name="Symbols",
			description="The symbols which are inserted into the tree")
			private IAnchoredElementAccessAdapter<IAnchoredElement> words;
	
	
	@OutputAdapter(dataObject=NGramTree.class, type=DefaultTunguskaOutputAdapter.class, name="Tree", 
			accessAdapterImpl=NGramTreeAccessAdapter.class,
			description="The generated N-Gram Tree")
	@RoleDescription("de.uni_koeln.spinfo.tesla.roles.structure.ngramTreeGenerator")
	private IOutputAdapter<NGramTree> out;
	
	@Configuration(defaultValue="false", name="Reverse (Prefix Tree)")
	private boolean reverse;
	

	@Run
	public Result run() throws Exception {
		InputIterator<String> signals = words.getAllSignalIds();
		ArrayList<String> signalIds = new ArrayList<String>();
		setProgressName("Loading sequences...");
		while(signals.hasNext()) {
			signalIds.add(signals.next());
		}
		logger.info("Building matrix of " + signals.getSize() + " signals...");
		MatrixLoader loader = new MatrixLoader();
		ArrayList<int[]> data = loader.loadData(signalIds, this, sequences, words, 40D);
		loader.release();
		if(cancelled()) {
			return Result.CANCELLED;
		}
		setProgressName("Building tree...");
		buildTree(data);
		if(cancelled()) {
			return Result.CANCELLED;
		}
		return Result.OK;
	}

	private  void buildTree(ArrayList<int[]> sequenceMatrix) {
		int counter = 0;
		NGramTree tree = new NGramTree(maxDepth);
		double step = 40D/sequenceMatrix.size();
		double progress = 50;
		if(reverse) {
			setProgressName("Building n-gram prefix tree...");
		} else {
			setProgressName("Building n-gram suffix tree...");
		}
		for (int[] sentence : sequenceMatrix) {
			tree.insert(sentence, counter, reverse);
			counter++;
			progress += step;
			super.setProgress((int) progress);
			if(cancelled()) return;
			if(counter % 1000 == 0) {
				logger.info("Inserted " + counter + " sequences.");
			}
		}
		int nodes = tree.getNumberOfNodes();
		if(reverse) {
			tree.reversePositions(sequenceMatrix);
		} else {
			tree.trim();
		}
		report("Nodes", nodes, nodes > 1 ? TYPE.DEFAULT : TYPE.ERROR);
		Annotation<NGramTree> anno = out.getAnnotationFactory().newAnnotation(-1, -1, null, tree, TypeMapping.NONE);
		logger.info("A tree containing " + NumberFormat.getIntegerInstance().format(nodes) + " nodes has been created.");
		out.store(anno);
		setProgress(100);
	}

}
