/*
 * Decompiled with CFR 0.152.
 */
package org.apache.mahout.classifier.df.data;

import com.google.common.base.Preconditions;
import com.google.common.collect.Lists;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Scanner;
import java.util.Set;
import java.util.regex.Pattern;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.mahout.classifier.df.data.Data;
import org.apache.mahout.classifier.df.data.DataConverter;
import org.apache.mahout.classifier.df.data.Dataset;
import org.apache.mahout.classifier.df.data.DescriptorException;
import org.apache.mahout.classifier.df.data.DescriptorUtils;
import org.apache.mahout.classifier.df.data.Instance;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public final class DataLoader {
    private static final Logger log = LoggerFactory.getLogger(DataLoader.class);
    private static final Pattern SEPARATORS = Pattern.compile("[, ]");

    private DataLoader() {
    }

    private static boolean parseString(Dataset.Attribute[] attrs, Set<String>[] values, CharSequence string, boolean regression) {
        int attr;
        String[] tokens = SEPARATORS.split(string);
        Preconditions.checkArgument((tokens.length == attrs.length ? 1 : 0) != 0, (Object)("Wrong number of attributes in the string: " + tokens.length + ". Must be: " + attrs.length));
        for (attr = 0; attr < attrs.length; ++attr) {
            if (attrs[attr].isIgnored() || !"?".equals(tokens[attr])) continue;
            return false;
        }
        for (attr = 0; attr < attrs.length; ++attr) {
            if (attrs[attr].isIgnored()) continue;
            String token = tokens[attr];
            if (attrs[attr].isCategorical() || !regression && attrs[attr].isLabel()) {
                if (values[attr] == null) {
                    values[attr] = new HashSet<String>();
                }
                values[attr].add(token);
                continue;
            }
            try {
                Double.parseDouble(token);
                continue;
            }
            catch (NumberFormatException e) {
                return false;
            }
        }
        return true;
    }

    public static Data loadData(Dataset dataset, FileSystem fs, Path fpath) throws IOException {
        FSDataInputStream input = fs.open(fpath);
        Scanner scanner = new Scanner((InputStream)input, "UTF-8");
        ArrayList<Instance> instances = new ArrayList<Instance>();
        DataConverter converter = new DataConverter(dataset);
        while (scanner.hasNextLine()) {
            String line = scanner.nextLine();
            if (!line.isEmpty()) {
                Instance instance = converter.convert(line);
                if (instance != null) {
                    instances.add(instance);
                    continue;
                }
                log.warn("{}: missing values", (Object)instances.size());
                continue;
            }
            log.warn("{}: empty string", (Object)instances.size());
        }
        scanner.close();
        return new Data(dataset, instances);
    }

    public static Data loadData(Dataset dataset, FileSystem fs, Path[] pathes) throws IOException {
        ArrayList<Instance> instances = new ArrayList<Instance>();
        for (Path path : pathes) {
            Data loadedData = DataLoader.loadData(dataset, fs, path);
            for (int index = 0; index <= loadedData.size(); ++index) {
                instances.add(loadedData.get(index));
            }
        }
        return new Data(dataset, instances);
    }

    public static Data loadData(Dataset dataset, String[] data) {
        ArrayList<Instance> instances = new ArrayList<Instance>();
        DataConverter converter = new DataConverter(dataset);
        for (String line : data) {
            if (!line.isEmpty()) {
                Instance instance = converter.convert(line);
                if (instance != null) {
                    instances.add(instance);
                    continue;
                }
                log.warn("{}: missing values", (Object)instances.size());
                continue;
            }
            log.warn("{}: empty string", (Object)instances.size());
        }
        return new Data(dataset, instances);
    }

    public static Dataset generateDataset(CharSequence descriptor, boolean regression, FileSystem fs, Path path) throws DescriptorException, IOException {
        Dataset.Attribute[] attrs = DescriptorUtils.parseDescriptor(descriptor);
        FSDataInputStream input = fs.open(path);
        Scanner scanner = new Scanner((InputStream)input, "UTF-8");
        Set[] valsets = new Set[attrs.length];
        int size = 0;
        while (scanner.hasNextLine()) {
            String line = scanner.nextLine();
            if (line.isEmpty() || !DataLoader.parseString(attrs, valsets, line, regression)) continue;
            ++size;
        }
        scanner.close();
        List[] values = new List[attrs.length];
        for (int i = 0; i < valsets.length; ++i) {
            if (valsets[i] == null) continue;
            values[i] = Lists.newArrayList((Iterable)valsets[i]);
        }
        return new Dataset(attrs, values, size, regression);
    }

    public static Dataset generateDataset(CharSequence descriptor, boolean regression, String[] data) throws DescriptorException {
        Dataset.Attribute[] attrs = DescriptorUtils.parseDescriptor(descriptor);
        Set[] valsets = new Set[attrs.length];
        int size = 0;
        for (String aData : data) {
            if (aData.isEmpty() || !DataLoader.parseString(attrs, valsets, aData, regression)) continue;
            ++size;
        }
        List[] values = new List[attrs.length];
        for (int i = 0; i < valsets.length; ++i) {
            if (valsets[i] == null) continue;
            values[i] = Lists.newArrayList((Iterable)valsets[i]);
        }
        return new Dataset(attrs, values, size, regression);
    }
}

