package de.uni_koeln.spinfo.tesla.component.misc;

import java.text.NumberFormat;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Map.Entry;
import java.util.Set;
import java.util.SortedMap;
import java.util.TreeMap;

import de.uni_koeln.spinfo.tesla.annotation.adapter.IOutputAdapter;
import de.uni_koeln.spinfo.tesla.annotation.adapter.InputIterator;
import de.uni_koeln.spinfo.tesla.annotation.adapter.queryconstraints.Range;
import de.uni_koeln.spinfo.tesla.annotation.adapter.tunguska.DefaultTunguskaOutputAdapter;
import de.uni_koeln.spinfo.tesla.component.paradigms.data.IGeneralizedHeuristicConstituent;
import de.uni_koeln.spinfo.tesla.component.paradigms.data.impl.GeneralizedConstituent;
import de.uni_koeln.spinfo.tesla.component.paradigms.data.impl.Reason;
import de.uni_koeln.spinfo.tesla.roles.core.data.IAnchoredElement;
import de.uni_koeln.spinfo.tesla.roles.filter.access.IFilterAccessAdapter;
import de.uni_koeln.spinfo.tesla.roles.filter.data.IFilter;
import de.uni_koeln.spinfo.tesla.roles.parser.access.IConstituentAccessAdapter;
import de.uni_koeln.spinfo.tesla.roles.parser.data.IMultiLabelConstituent;
import de.uni_koeln.spinfo.tesla.roles.parser.impl.hibernate.data.MultiLabelConstituent;
import de.uni_koeln.spinfo.tesla.roles.parser.impl.tunguska.access.TunguskaConstituentAccessAdapter.DefaultTunguskaConstituentAccessAdapter;
import de.uni_koeln.spinfo.tesla.runtime.Result;
import de.uni_koeln.spinfo.tesla.runtime.TeslaComponent;
import de.uni_koeln.spinfo.tesla.runtime.component.annotations.AccessAdapter;
import de.uni_koeln.spinfo.tesla.runtime.component.annotations.Author;
import de.uni_koeln.spinfo.tesla.runtime.component.annotations.Component;
import de.uni_koeln.spinfo.tesla.runtime.component.annotations.Description;
import de.uni_koeln.spinfo.tesla.runtime.component.annotations.OutputAdapter;
import de.uni_koeln.spinfo.tesla.runtime.component.annotations.RoleDescription;
import de.uni_koeln.spinfo.tesla.runtime.component.annotations.Run;
import de.uni_koeln.spinfo.tesla.runtime.component.annotations.SignalAdapter;
import de.uni_koeln.spinfo.tesla.runtime.component.annotations.ThreadMode;
import de.uni_koeln.spinfo.tesla.runtime.persistence.Annotation;
import de.uni_koeln.spinfo.tesla.runtime.persistence.TabularReport;
import de.uni_koeln.spinfo.tesla.runtime.persistence.TeslaDocument;
import de.uni_koeln.spinfo.tesla.runtime.signal.SignalAccessor;
import de.uni_koeln.spinfo.tesla.runtime.signal.SignalIterator;

@Component(threadMode=ThreadMode.NOT_SUPPORTED, 
		author=@Author(	author="Stephan Schwiebert", 
						email="sschwieb@spinfo.uni-koeln.de", 
						web="http://www.spinfo.phil-fak.uni-koeln.de/sschwieb.html", 
						organization="Sprachliche Informationsverarbeitung"),
		description=@Description(name="SOG Validator",
								 summary="Recombines the annotations of all consumed adapters.",
								 bigO="unknown",
								 version="1.0",
								 reusableResults=true))
public class Validator extends TeslaComponent {

	private static final long serialVersionUID = 879812013823937578L;
	
	@AccessAdapter(role="de.uni_koeln.spinfo.tesla.roles.structure.bootstrapping.GeneralizedHeuristicConstituentDetector", name="Validatable")
	private IConstituentAccessAdapter<IGeneralizedHeuristicConstituent> sequences;

	@SignalAdapter(signalType=String.class, name="texts")
	private SignalAccessor<String> sa;
		
	 @OutputAdapter(dataObject=MultiLabelConstituent.class, type=DefaultTunguskaOutputAdapter.class, name="Constituents", 
				accessAdapterImpl=DefaultTunguskaConstituentAccessAdapter.class,
				description="All detected constituent hypotheses",
				bufferSize=20)
	@RoleDescription("de.uni_koeln.spinfo.tesla.roles.parser.MultiValueConstituentTagger")
	private IOutputAdapter<IMultiLabelConstituent> constituentOut;

	@AccessAdapter(role="de.uni_koeln.spinfo.tesla.roles.filter.Filter", name="Filters", min=0, max=-1)
	private List<IFilterAccessAdapter<IAnchoredElement, IFilter<IAnchoredElement>>> filters;
	
	@Run
	public Result run() throws Exception {
		SignalIterator signals = sa.getSignalIterator();
		signals.registerAsProgressBar(this);
		int missed = 0;
		int toomany = 0;
		int success = 0;
		int sequenceCounter = 0;
		while(signals.hasNext()) {
			TeslaDocument doc = signals.next();
			String signalId = doc.getId();
			List<Annotation<MultiLabelConstituent>> toStore = new ArrayList<Annotation<MultiLabelConstituent>>();
			InputIterator<Annotation<IGeneralizedHeuristicConstituent>> allSequences = sequences.getAnchoredElements(Range.forSignal(signalId), null, IGeneralizedHeuristicConstituent.class);
			while(allSequences.hasNext()) {
				Annotation<IGeneralizedHeuristicConstituent> anno = allSequences.next();
				boolean analyze = true;
				for (IFilterAccessAdapter<IAnchoredElement, IFilter<IAnchoredElement>> filter : filters) {
					Annotation hack = anno;
					if(!filter.matches(hack)) {
						analyze = false;
						break;
					}
				}
				if(!analyze) continue;
				GeneralizedConstituent gc = (GeneralizedConstituent) anno.getDataObject();
				analyze(gc);
				/*List<Reason> matchDetails = gc.getMatchDetails();
				for (Reason match : matchDetails) {
					if(match.isEmpty()) continue;
					InputIterator<Annotation<IGeneralizedHeuristicConstituent>> anchoredElements = sequences.getAnchoredElements(new Range(signalId, match.getFrom(), match.getTo(), false), null, IGeneralizedHeuristicConstituent.class);
					if(!anchoredElements.hasNext()) {
						missed++;
						//System.out.println("No constituent found: " + match.getFrom() + " to " + match.getTo());
					} else {
						Annotation<IGeneralizedHeuristicConstituent> toAnalyze = anchoredElements.next();
						if(anchoredElements.hasNext()) {
							toomany++;
						} else {
							success++;
							analyze((GeneralizedConstituent) toAnalyze.getDataObject());
						}						
					}
				}*/
				sequenceCounter++;
				if(sequenceCounter % 100 == 0) {
				//	System.out.println(sequenceCounter + " sequences, Success: " + success + ", missed: " + missed + ", toomany " + toomany);
				}
			}
			System.out.println(sequenceCounter + " sequences, Success: " + success + ", missed: " + missed + ", toomany " + toomany);
			DefaultTunguskaOutputAdapter.sort(toStore);
			for (Annotation<MultiLabelConstituent> anno : toStore) {
				constituentOut.store(anno);
			}

		}
		System.out.println(sequenceCounter + " sequences, Success: " + success + ", missed: " + missed + ", toomany " + toomany);
		
		// Number of justifications for a hypothesis:
		int rows = reasonCounter.size();
		TabularReport report = super.newReport("Number of justifications", "The number of justifications found for a hypothesis", rows, 3);
		NumberFormat intFormat = NumberFormat.getIntegerInstance();
		NumberFormat percentFormat = NumberFormat.getNumberInstance();
		report.setColumnLabel(0, "Justifications");
		report.setColumnLabel(1, "Count" );
		report.setColumnLabel(2, "Percent", percentFormat);
		Set<Entry<Integer, Integer>> entries = reasonCounter.entrySet();
		fillReport(report, entries);
		
		rows = structureLength.size();
		report = super.newReport("Structure Length", "Length of the constituent", rows, 3);
		report.setColumnLabel(0, "Length");
		report.setColumnLabel(1, "Count");
		report.setColumnLabel(2, "Percent", percentFormat);
		entries = structureLength.entrySet();
		fillReport(report, entries);
		
		aggregatedLeftWidths = aggregatedLeftWidths.headMap(2500);
		rows = aggregatedLeftWidths.size();
		report = super.newReport("Average left ctx width", "Average left ctx width, weighted, multiplied by 100", rows, 3);
		report.setColumnLabel(0, "Length");
		report.setColumnLabel(1, "Count");
		report.setColumnLabel(2, "Percent", percentFormat);
		entries = aggregatedLeftWidths.entrySet();
		fillReport(report, entries);
		
		aggregatedRightWidths = aggregatedRightWidths.headMap(2500);
		rows = aggregatedRightWidths.size();
		report = super.newReport("Average right ctx width", "Average right ctx width, weighted, multiplied by 100", rows, 3);
		report.setColumnLabel(0, "Length");
		report.setColumnLabel(1, "Count");
		report.setColumnLabel(2, "Percent", percentFormat);
		entries = aggregatedRightWidths.entrySet();
		fillReport(report, entries);
		
		aggregatedWidthSum = aggregatedWidthSum.headMap(10000);
		rows = aggregatedWidthSum.size();
		report = super.newReport("Average shared ctx width", "Average shared ctx width, weighted, multiplied by 100", rows, 3);
		report.setColumnLabel(0, "Length");
		report.setColumnLabel(1, "Count");
		report.setColumnLabel(2, "Percent", percentFormat);
		entries = aggregatedWidthSum.entrySet();
		fillReport(report, entries);
		
		return cancelled() ? Result.CANCELLED : Result.OK;
	}

	protected void fillReport(TabularReport report,
			Set<Entry<Integer, Integer>> entries) {
		double overall = 0;
		for (Entry<Integer, Integer> entry : entries) {
			overall += entry.getValue();
		}
		int counter = 0;
		for (Entry<Integer, Integer> entry : entries) {
			report.setValue(counter, 0, entry.getKey());
			report.setValue(counter, 1, entry.getValue());
			report.setValue(counter, 2, (entry.getValue()*100)/overall);
			counter++;
		}
	}

	private SortedMap<Integer, Integer> structureLength = new TreeMap<Integer, Integer>();
	private SortedMap<Integer, Integer> reasonCounter = new TreeMap<Integer, Integer>();
	SortedMap<Integer, Integer> aggregatedRightWidths = new TreeMap<Integer, Integer>();
	SortedMap<Integer, Integer> aggregatedWidthSum = new TreeMap<Integer, Integer>();	
	SortedMap<Integer, Integer> aggregatedLeftWidths = new TreeMap<Integer, Integer>();

/**
 * Interessante Werte:
 * 
 * - Länge einer Konstituente
 * -> Aggregieren in Integer/Integer-Map, zusätzlich auch Durchschnittswert ermitteln
 * Ergebnis: Kein signifikanter Unterschied
 * 
 * 
 * - Anzahl der "Reasons"
 * -> Aggregieren in Integer/Integer-Map, zusätzlich auch Durchschnittswert ermitteln
 * Ergebnis: Kein signifikanter Unterschied
 * 
 *  Reason-spezifisch: 
 *  - Kontextlänge der Reasons - links, rechts, kombiniert
 *  - Länge der Reason-Konstituente
 *  
 * 
 * 
 * Muss pro Konstituente in einer Integer/Integer-Map aggregiert werden, bspw. für Left:
 * 
 * 1:1000
 * 2:200
 * 3:10
 * 4:1
 * 
 * Durchschnitt: 1,18
 * 
 * 
 * 
 * @param hypothesis
 */
	private void analyze(GeneralizedConstituent hypothesis) {

		/*
		 * Key: width, Value: Number of occurrences
		 */
		TreeMap<Integer, Integer> rightWidths = new TreeMap<Integer, Integer>();
		/*
		 * Key: width, Value: Number of occurrences
		 */
		TreeMap<Integer, Integer> widthSum = new TreeMap<Integer, Integer>();
		/*
		 * Key: width, Value: Number of occurrences
		 */
		TreeMap<Integer, Integer> leftWidths = new TreeMap<Integer, Integer>();
		
		List<Reason> reasons = hypothesis.getMatchDetails();
		Integer count = reasonCounter.get(reasons.size());
		if(count == null) {
			reasonCounter.put(reasons.size(), 1);
		} else {
			reasonCounter.put(reasons.size(), count+1);
		}
		int length = hypothesis.getEnd() - hypothesis.getStart();
		count = structureLength.get(length);
		if(count == null) {
			structureLength.put(length, 1);
		} else {
			structureLength.put(length, count+1);
		}
		for (Reason reason : reasons) {
			int width = reason.getLeftContextWidth() + reason.getLeftBonus();
			Integer c = leftWidths.get(width);
			if(c == null) {
				leftWidths.put(width, 1);
			} else {
				leftWidths.put(width, c+1);
			}
			width = reason.getRightContextWidth() + reason.getRightBonus();
			c = rightWidths.get(width);
			if(c == null) {
				rightWidths.put(width, 1);
			} else {
				rightWidths.put(width, c+1);
			}
			width = reason.getLeftContextWidth() + reason.getRightContextWidth() + reason.getLeftBonus() + reason.getRightBonus();
			c = widthSum.get(width);
			if(c == null) {
				widthSum.put(width, 1);
			} else {
				widthSum.put(width, c+1);
			}
		}
		/*
		 * Average: 
		 * Belege links, rechts und kombiniert, gemittelt:
		 * Anzahl der Fundstellen multipliziert mit Quadrat der Länge.
		 */
		double averageRight = 0;
		Iterator<Entry<Integer, Integer>> iterator = rightWidths.entrySet().iterator();
		while(iterator.hasNext()) {
			Entry<Integer, Integer> entry = iterator.next();
			averageRight += Math.pow(entry.getKey(), 2) * entry.getValue();
		}
		int scale = 50;
		averageRight = averageRight * 100 / reasons.size();
		int toInsert = ((int) averageRight / scale) * scale;
		Integer value = aggregatedRightWidths.get(toInsert);
		if(value == null) {
			aggregatedRightWidths.put(toInsert, 1);
		} else {
			aggregatedRightWidths.put(toInsert, value+1);
		}
		double averageLeft = 0;
		iterator = rightWidths.entrySet().iterator();
		while(iterator.hasNext()) {
			Entry<Integer, Integer> entry = iterator.next();
			averageLeft += Math.pow(entry.getKey(), 2) * entry.getValue();
		}
		averageLeft = averageLeft * 100 / reasons.size();
		toInsert = ((int) averageLeft / scale ) * scale;
		value = aggregatedLeftWidths.get(toInsert);
		if(value == null) {
			aggregatedLeftWidths.put(toInsert, 1);
		} else {
			aggregatedLeftWidths.put(toInsert, value+1);
		}
		double averageSum = 0;
		iterator = widthSum.entrySet().iterator();
		while(iterator.hasNext()) {
			Entry<Integer, Integer> entry = iterator.next();
			averageSum += Math.pow(entry.getKey(), 2) * entry.getValue();
		}
		averageSum = averageSum * 100 / reasons.size();
		toInsert = ((int) averageSum) / scale * scale;
		value = aggregatedWidthSum.get(toInsert);
		if(value == null) {
			aggregatedWidthSum.put(toInsert, 1);
		} else {
			aggregatedWidthSum.put(toInsert, value+1);
		}
	}

}
