/*
 * Decompiled with CFR 0.152.
 */
package de.uni_koeln.spinfo.tesla.component.patterndetector;

import de.uni_koeln.spinfo.tesla.annotation.adapter.IOutputAdapter;
import de.uni_koeln.spinfo.tesla.annotation.adapter.InputIterator;
import de.uni_koeln.spinfo.tesla.annotation.adapter.TypeMapping;
import de.uni_koeln.spinfo.tesla.annotation.adapter.tunguska.DefaultTunguskaOutputAdapter;
import de.uni_koeln.spinfo.tesla.component.ngramtree.access.INgramTreeAccessAdapter;
import de.uni_koeln.spinfo.tesla.component.ngramtree.data.INgramTree;
import de.uni_koeln.spinfo.tesla.component.ngramtree.data.INode;
import de.uni_koeln.spinfo.tesla.component.ngramtree.data.IPosition;
import de.uni_koeln.spinfo.tesla.component.patterndetector.INodeChain;
import de.uni_koeln.spinfo.tesla.component.patterndetector.PatternDetector;
import de.uni_koeln.spinfo.tesla.component.util.MatrixLoader;
import de.uni_koeln.spinfo.tesla.roles.core.access.IAnchoredElementAccessAdapter;
import de.uni_koeln.spinfo.tesla.roles.core.data.IAnchoredElement;
import de.uni_koeln.spinfo.tesla.roles.parser.data.IConstituent;
import de.uni_koeln.spinfo.tesla.roles.parser.impl.hibernate.data.Constituent;
import de.uni_koeln.spinfo.tesla.roles.parser.impl.tunguska.access.TunguskaConstituentAccessAdapter;
import de.uni_koeln.spinfo.tesla.runtime.Result;
import de.uni_koeln.spinfo.tesla.runtime.TeslaComponent;
import de.uni_koeln.spinfo.tesla.runtime.component.annotations.AccessAdapter;
import de.uni_koeln.spinfo.tesla.runtime.component.annotations.Author;
import de.uni_koeln.spinfo.tesla.runtime.component.annotations.Component;
import de.uni_koeln.spinfo.tesla.runtime.component.annotations.Configuration;
import de.uni_koeln.spinfo.tesla.runtime.component.annotations.Description;
import de.uni_koeln.spinfo.tesla.runtime.component.annotations.OutputAdapter;
import de.uni_koeln.spinfo.tesla.runtime.component.annotations.RoleDescription;
import de.uni_koeln.spinfo.tesla.runtime.component.annotations.Run;
import de.uni_koeln.spinfo.tesla.runtime.component.annotations.ThreadMode;
import de.uni_koeln.spinfo.tesla.runtime.persistence.Annotation;
import de.uni_koeln.spinfo.tesla.runtime.persistence.DataObject;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import org.apache.log4j.Logger;

@Component(threadMode=ThreadMode.CUSTOM, author={@Author(author="jhermes", email="jhermes@spinfo.uni-koeln.de", web="http://spinfo.uni-koeln.de", organization="Sprachliche Informationsverarbeitung")}, description=@Description(name="Pattern Detector", summary="Searches longest or most frequent patterns in suffix trees", bigO="", version="1.0", reusableResults=true))
public class PatternDetectorComponent
extends TeslaComponent {
    private static final long serialVersionUID = 1214407936563L;
    @Configuration(name="Minimum occurrence of patterns", defaultValue="5")
    private int minFreq = 5;
    @Configuration(name="Minimum length of patterns", defaultValue="1", description="Choose a number <2 to recieve only the longest patterns or a number >=2 to recieve all patterns that are larger than the number.")
    private int minLength = 1;
    @AccessAdapter(role="de.uni_koeln.spinfo.tesla.roles.core.AnchoredElementGenerator", name="Sequences", description="The sequences to align (for instance, sentences)")
    private IAnchoredElementAccessAdapter<IAnchoredElement> sequences;
    @AccessAdapter(role="de.uni_koeln.spinfo.tesla.roles.core.AnchoredElementGenerator", name="Sequence Items", description="The items to align (for instance, words)")
    private IAnchoredElementAccessAdapter<IAnchoredElement> words;
    @AccessAdapter(role="de.uni_koeln.spinfo.tesla.roles.structure.ngramTreeGenerator", name="Suffix Tree")
    private INgramTreeAccessAdapter<INgramTree> suffixTree;
    @OutputAdapter(dataObject=Constituent.class, type=DefaultTunguskaOutputAdapter.ProtoStuff.class, name="Patterns", accessAdapterImpl=TunguskaConstituentAccessAdapter.DefaultTunguskaConstituentAccessAdapter.class, description="All patterns detected in the suffix tree")
    @RoleDescription(value="de.uni_koeln.spinfo.tesla.roles.parser.ConstituentTagger")
    private IOutputAdapter<IConstituent> constituentOut;
    private ArrayList<int[]> sequenceMatrix;
    private ArrayList<List<Annotation<IAnchoredElement>>> dataMatrix;
    private Logger logger = Logger.getLogger(((Object)((Object)this)).getClass());

    @Run
    public Result run() throws Exception {
        this.logger.info((Object)"Loading data...");
        MatrixLoader loader = new MatrixLoader();
        InputIterator signals = this.words.getAllSignalIds();
        ArrayList<String> signalIds = new ArrayList<String>();
        while (signals.hasNext()) {
            signalIds.add((String)signals.next());
        }
        this.sequenceMatrix = loader.loadData(signalIds, (TeslaComponent)this, this.sequences, this.words, 20.0);
        this.dataMatrix = loader.getDataMatrix();
        while (signals.hasNext()) {
            signalIds.add((String)signals.next());
        }
        PatternDetector pd = new PatternDetector(this.suffixTree.getTree());
        Set<Object> longestPatterns = new HashSet();
        longestPatterns = this.minLength < 2 ? pd.getLongestPatterns(this.minFreq) : pd.getAllPatterns(this.minFreq, this.minLength);
        ArrayList<Annotation> patternsToStore = new ArrayList<Annotation>();
        for (INodeChain iNodeChain : longestPatterns) {
            INode lastNode = iNodeChain.getDeepestNode();
            Collection referencedPositions = lastNode.getReferencedPositions();
            for (IPosition iPosition : referencedPositions) {
                short lastElementIndex = iPosition.getElementIndex();
                int firstElementIndex = lastElementIndex - (iNodeChain.size() - 1);
                if (firstElementIndex < 0) {
                    System.out.println("Too short: " + iNodeChain);
                    continue;
                }
                int sequenceIndex = iPosition.getSequenceIndex();
                int endIndex = this.dataMatrix.get(sequenceIndex).get(lastElementIndex).getRightAnchor();
                int beginIndex = this.dataMatrix.get(sequenceIndex).get(firstElementIndex).getLeftAnchor();
                String signalId = this.dataMatrix.get(sequenceIndex).get(lastElementIndex).getSignalId();
                Constituent c = new Constituent(String.valueOf(iNodeChain.getDeepestNode().getDepth()));
                Annotation anno = this.constituentOut.getAnnotationFactory().newAnnotation(beginIndex, endIndex, signalId, (DataObject)c, TypeMapping.SELF_GENERATED);
                patternsToStore.add(anno);
                this.logger.info((Object)("Stored an annotation: " + anno.getSignalContent()));
            }
        }
        DefaultTunguskaOutputAdapter.sort(patternsToStore);
        for (Annotation annotation : patternsToStore) {
            this.constituentOut.store(annotation);
        }
        this.constituentOut.close();
        if (this.cancelled()) {
            return Result.CANCELLED;
        }
        return Result.OK;
    }

    public boolean cancelled() {
        return super.cancelled();
    }
}

