/*
 * Decompiled with CFR 0.152.
 */
package org.tribuo.data.text.impl;

import com.oracle.labs.mlrg.olcut.config.Config;
import com.oracle.labs.mlrg.olcut.config.Configurable;
import com.oracle.labs.mlrg.olcut.config.PropertyException;
import com.oracle.labs.mlrg.olcut.provenance.ConfiguredObjectProvenance;
import com.oracle.labs.mlrg.olcut.provenance.impl.ConfiguredObjectProvenanceImpl;
import java.util.ArrayList;
import java.util.List;
import java.util.logging.Level;
import java.util.logging.Logger;
import org.tribuo.Feature;
import org.tribuo.data.text.FeatureAggregator;
import org.tribuo.data.text.TextPipeline;
import org.tribuo.data.text.TextProcessingException;
import org.tribuo.data.text.TextProcessor;
import org.tribuo.data.text.impl.NgramProcessor;
import org.tribuo.data.text.impl.UniqueAggregator;
import org.tribuo.util.tokens.Tokenizer;

public class BasicPipeline
implements TextPipeline {
    private static final Logger logger = Logger.getLogger(BasicPipeline.class.getName());
    private List<TextProcessor> processors = new ArrayList<TextProcessor>();
    private FeatureAggregator aggregator = new UniqueAggregator();
    @Config(mandatory=true, description="Tokenizer to use.")
    private Tokenizer tokenizer;
    @Config(description="n in the n-gram to emit.")
    private int ngram = 2;

    public BasicPipeline(Tokenizer tokenizer, int ngram) {
        this.tokenizer = tokenizer;
        this.ngram = ngram;
        this.postConfig();
    }

    private BasicPipeline() {
    }

    public void postConfig() {
        if (this.ngram < 1) {
            throw new PropertyException("", "ngram", "ngram must be positive, found " + this.ngram);
        }
        for (int i = 1; i <= this.ngram; ++i) {
            this.processors.add(new NgramProcessor(this.tokenizer, i, 1.0));
        }
    }

    public String toString() {
        return this.ngram + "gramPipeline({1.." + this.ngram + "}-grams)";
    }

    @Override
    public List<Feature> process(String tag, String data) {
        ArrayList<Feature> features = new ArrayList<Feature>();
        for (TextProcessor p : this.processors) {
            try {
                features.addAll(p.process(tag, data));
            }
            catch (TextProcessingException e) {
                logger.log(Level.INFO, String.format("TextProcessingException thrown by processor %s with text %s", p, data), e);
            }
        }
        return this.aggregator.aggregate(features);
    }

    public ConfiguredObjectProvenance getProvenance() {
        return new ConfiguredObjectProvenanceImpl((Configurable)this, "TextPipeline");
    }
}

