/*
 * Decompiled with CFR 0.152.
 */
package org.tribuo.data.text;

import com.oracle.labs.mlrg.olcut.config.Config;
import java.io.File;
import java.io.IOException;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
import org.tribuo.ConfigurableDataSource;
import org.tribuo.Example;
import org.tribuo.Output;
import org.tribuo.OutputFactory;
import org.tribuo.data.text.DocumentPreprocessor;
import org.tribuo.data.text.TextFeatureExtractor;

public abstract class TextDataSource<T extends Output<T>>
implements ConfigurableDataSource<T> {
    @Config(description="The document preprocessors to run on each document in the data source.")
    protected List<DocumentPreprocessor> preprocessors = new ArrayList<DocumentPreprocessor>();
    @Config(mandatory=true, description="The path to read the data from.")
    protected Path path;
    @Config(mandatory=true, description="The factory that converts a String into an Output instance.")
    protected OutputFactory<T> outputFactory;
    @Config(mandatory=true, description="The feature extractor that generates Features from text.")
    protected TextFeatureExtractor<T> extractor;
    protected final List<Example<T>> data = new ArrayList<Example<T>>();

    protected TextDataSource() {
    }

    public TextDataSource(Path path, OutputFactory<T> outputFactory, TextFeatureExtractor<T> extractor, DocumentPreprocessor ... preprocessors) {
        this.path = path;
        this.outputFactory = outputFactory;
        this.extractor = extractor;
        this.preprocessors.addAll(Arrays.asList(preprocessors));
    }

    public TextDataSource(File file, OutputFactory<T> outputFactory, TextFeatureExtractor<T> extractor, DocumentPreprocessor ... preprocessors) {
        this(file.toPath(), outputFactory, extractor, preprocessors);
    }

    public Iterator<Example<T>> iterator() {
        if (!this.data.isEmpty()) {
            return this.data.iterator();
        }
        throw new IllegalStateException("read was not called in " + this.getClass().getName());
    }

    public String toString() {
        StringBuilder buffer = new StringBuilder();
        buffer.append(this.getClass().getSimpleName());
        buffer.append("(path=");
        buffer.append(this.path.toString());
        buffer.append(",extractor=");
        buffer.append(this.extractor.toString());
        buffer.append(",preprocessors=");
        buffer.append(this.preprocessors.toString());
        buffer.append(")");
        return buffer.toString();
    }

    protected String handleDoc(String doc) {
        String newDoc = doc;
        for (DocumentPreprocessor p : this.preprocessors) {
            newDoc = p.processDoc(newDoc);
        }
        return newDoc;
    }

    protected abstract void read() throws IOException;

    public OutputFactory<T> getOutputFactory() {
        return this.outputFactory;
    }
}

