/*
 * Decompiled with CFR 0.152.
 */
package de.uni_koeln.spinfo.tesla.component.ngramtree;

import de.uni_koeln.spinfo.tesla.annotation.adapter.IOutputAdapter;
import de.uni_koeln.spinfo.tesla.annotation.adapter.InputIterator;
import de.uni_koeln.spinfo.tesla.annotation.adapter.TypeMapping;
import de.uni_koeln.spinfo.tesla.annotation.adapter.tunguska.DefaultTunguskaOutputAdapter;
import de.uni_koeln.spinfo.tesla.component.ngramtree.access.impl.NGramTreeAccessAdapter;
import de.uni_koeln.spinfo.tesla.component.ngramtree.data.impl.NGramTree;
import de.uni_koeln.spinfo.tesla.component.util.MatrixLoader;
import de.uni_koeln.spinfo.tesla.roles.core.access.IAnchoredElementAccessAdapter;
import de.uni_koeln.spinfo.tesla.roles.core.data.IAnchoredElement;
import de.uni_koeln.spinfo.tesla.runtime.Result;
import de.uni_koeln.spinfo.tesla.runtime.TeslaComponent;
import de.uni_koeln.spinfo.tesla.runtime.component.annotations.AccessAdapter;
import de.uni_koeln.spinfo.tesla.runtime.component.annotations.Author;
import de.uni_koeln.spinfo.tesla.runtime.component.annotations.Component;
import de.uni_koeln.spinfo.tesla.runtime.component.annotations.Configuration;
import de.uni_koeln.spinfo.tesla.runtime.component.annotations.Description;
import de.uni_koeln.spinfo.tesla.runtime.component.annotations.OutputAdapter;
import de.uni_koeln.spinfo.tesla.runtime.component.annotations.RoleDescription;
import de.uni_koeln.spinfo.tesla.runtime.component.annotations.Run;
import de.uni_koeln.spinfo.tesla.runtime.component.annotations.ThreadMode;
import de.uni_koeln.spinfo.tesla.runtime.persistence.Annotation;
import de.uni_koeln.spinfo.tesla.runtime.persistence.DataObject;
import de.uni_koeln.spinfo.tesla.runtime.persistence.ExecutionReport;
import java.text.NumberFormat;
import java.util.ArrayList;

@Component(threadMode=ThreadMode.CUSTOM, author={@Author(author="Stephan Schwiebert", email="sschwieb@spinfo.uni-koeln.de", web="http://www.spinfo.uni-koeln.de/space/sschwieb", organization="Sprachliche Informationsverarbeitung")}, description=@Description(name="N-Gram Tree Generator", summary="This component generates NGram Trees from any items of any sequences.The tree can either represent suffix- or prefix- N-Grams.", bigO="", version="1.0", reusableResults=true))
public class NGramTreeGenerator
extends TeslaComponent {
    @Configuration(name="Max N-Gram length", defaultValue="5", description="The maximum N-Gram length to analyze. Increasing this value will also increase required CPU and RAM (lots!)")
    protected int maxDepth = 5;
    @AccessAdapter(role="de.uni_koeln.spinfo.tesla.roles.core.AnchoredElementGenerator", name="Sequences", description="The annotations which define the boundaries of a sequence")
    private IAnchoredElementAccessAdapter<IAnchoredElement> sequences;
    @AccessAdapter(role="de.uni_koeln.spinfo.tesla.roles.core.AnchoredElementGenerator", name="Symbols", description="The symbols which are inserted into the tree")
    private IAnchoredElementAccessAdapter<IAnchoredElement> words;
    @OutputAdapter(dataObject=NGramTree.class, type=DefaultTunguskaOutputAdapter.class, name="Tree", accessAdapterImpl=NGramTreeAccessAdapter.class, description="The generated N-Gram Tree")
    @RoleDescription(value="de.uni_koeln.spinfo.tesla.roles.structure.ngramTreeGenerator")
    private IOutputAdapter<NGramTree> out;
    @Configuration(defaultValue="false", name="Reverse (Prefix Tree)")
    private boolean reverse;

    @Run
    public Result run() throws Exception {
        InputIterator signals = this.words.getAllSignalIds();
        ArrayList<String> signalIds = new ArrayList<String>();
        this.setProgressName("Loading sequences...");
        while (signals.hasNext()) {
            signalIds.add((String)signals.next());
        }
        this.logger.info((Object)("Building matrix of " + signals.getSize() + " signals..."));
        MatrixLoader loader = new MatrixLoader();
        ArrayList<int[]> data = loader.loadData(signalIds, this, this.sequences, this.words, 40.0);
        loader.release();
        if (this.cancelled()) {
            return Result.CANCELLED;
        }
        this.setProgressName("Building tree...");
        this.buildTree(data);
        if (this.cancelled()) {
            return Result.CANCELLED;
        }
        return Result.OK;
    }

    private void buildTree(ArrayList<int[]> sequenceMatrix) {
        int counter = 0;
        NGramTree tree = new NGramTree(this.maxDepth);
        double step = 40.0 / (double)sequenceMatrix.size();
        double progress = 50.0;
        if (this.reverse) {
            this.setProgressName("Building n-gram prefix tree...");
        } else {
            this.setProgressName("Building n-gram suffix tree...");
        }
        for (int[] sentence : sequenceMatrix) {
            tree.insert(sentence, counter, this.reverse);
            ++counter;
            super.setProgress((int)(progress += step));
            if (this.cancelled()) {
                return;
            }
            if (counter % 1000 != 0) continue;
            this.logger.info((Object)("Inserted " + counter + " sequences."));
        }
        int nodes = tree.getNumberOfNodes();
        if (this.reverse) {
            tree.reversePositions(sequenceMatrix);
        } else {
            tree.trim();
        }
        this.report("Nodes", nodes, nodes > 1 ? ExecutionReport.TYPE.DEFAULT : ExecutionReport.TYPE.ERROR);
        Annotation anno = this.out.getAnnotationFactory().newAnnotation(-1, -1, null, (DataObject)tree, TypeMapping.NONE);
        this.logger.info((Object)("A tree containing " + NumberFormat.getIntegerInstance().format(nodes) + " nodes has been created."));
        this.out.store(anno);
        this.setProgress(100);
    }
}

