package com.aliasi.spell;

import com.aliasi.corpus.TextHandler;
import com.aliasi.tokenizer.TokenizerFactory;
import com.aliasi.util.Counter;
import com.aliasi.util.ObjectToCounterMap;
import com.aliasi.util.Strings;
import java.util.Collections;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;

/* loaded from: input_file:com/aliasi/spell/TfIdfDistance.class */
public class TfIdfDistance extends TokenizedDistance implements TextHandler {
    private int mDocCount;
    private final ObjectToCounterMap<String> mDocFrequency;

    public TfIdfDistance(TokenizerFactory tokenizerFactory) {
        super(tokenizerFactory);
        this.mDocCount = 0;
        this.mDocFrequency = new ObjectToCounterMap<>();
    }

    @Deprecated
    public void trainIdf(CharSequence charSequence) {
        char[] charArray = Strings.toCharArray(charSequence);
        handle(charArray, 0, charArray.length);
    }

    @Override // com.aliasi.corpus.TextHandler
    @Deprecated
    public void handle(char[] cArr, int i, int i2) {
        Iterator<String> it = tokenSet(cArr, i, i2).iterator();
        while (it.hasNext()) {
            this.mDocFrequency.increment(it.next());
        }
        this.mDocCount++;
    }

    public void handle(CharSequence charSequence) {
        char[] charArray = Strings.toCharArray(charSequence);
        Iterator<String> it = tokenSet(charArray, 0, charArray.length).iterator();
        while (it.hasNext()) {
            this.mDocFrequency.increment(it.next());
        }
        this.mDocCount++;
    }

    @Override // com.aliasi.util.Distance
    public double distance(CharSequence charSequence, CharSequence charSequence2) {
        return 1.0d - proximity(charSequence, charSequence2);
    }

    @Override // com.aliasi.util.Proximity
    public double proximity(CharSequence charSequence, CharSequence charSequence2) {
        ObjectToCounterMap<String> termFrequencyVector = termFrequencyVector(charSequence);
        ObjectToCounterMap<String> termFrequencyVector2 = termFrequencyVector(charSequence2);
        double d = 0.0d;
        double d2 = 0.0d;
        double d3 = 0.0d;
        for (Map.Entry<String, Counter> entry : termFrequencyVector.entrySet()) {
            String key = entry.getKey();
            double tfIdf = tfIdf(key, entry.getValue());
            d += tfIdf * tfIdf;
            Counter remove = termFrequencyVector2.remove(key);
            if (remove != null) {
                double tfIdf2 = tfIdf(key, remove);
                d2 += tfIdf2 * tfIdf2;
                d3 += tfIdf * tfIdf2;
            }
        }
        for (Map.Entry<String, Counter> entry2 : termFrequencyVector2.entrySet()) {
            double tfIdf3 = tfIdf(entry2.getKey(), entry2.getValue());
            d2 += tfIdf3 * tfIdf3;
        }
        if (d == 0.0d) {
            return d2 == 0.0d ? 1.0d : 0.0d;
        }
        if (d2 == 0.0d) {
            return 0.0d;
        }
        double sqrt = d3 / Math.sqrt(d * d2);
        if (sqrt < 0.0d) {
            return 0.0d;
        }
        if (sqrt > 1.0d) {
            return 1.0d;
        }
        return sqrt;
    }

    public int docFrequency(String str) {
        return this.mDocFrequency.getCount(str);
    }

    public double idf(String str) {
        int count = this.mDocFrequency.getCount(str);
        if (count == 0) {
            return 0.0d;
        }
        return Math.log(this.mDocCount / count);
    }

    public int numDocuments() {
        return this.mDocCount;
    }

    public int numTerms() {
        return this.mDocFrequency.size();
    }

    public Set<String> termSet() {
        return Collections.unmodifiableSet(this.mDocFrequency.keySet());
    }

    double tfIdf(String str, Counter counter) {
        return Math.sqrt(counter.doubleValue() * idf(str));
    }
}
