/*
 * Decompiled with CFR 0.152.
 */
package com.aliasi.suffixarray;

import com.aliasi.suffixarray.TokenSuffixArray;
import com.aliasi.tokenizer.Tokenization;
import com.aliasi.tokenizer.TokenizerFactory;
import com.aliasi.util.Strings;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
import java.util.Set;

public class DocumentTokenSuffixArray {
    private final TokenSuffixArray mTsa;
    private final int[] mDocStarts;
    private final String[] mDocIds;
    private final Map<String, Integer> mDocIdToIndex;

    public DocumentTokenSuffixArray(Map<String, String> idToDocMap, TokenizerFactory tf, int maxSuffixLength, String documentBoundaryToken) {
        String test = " " + documentBoundaryToken + " ";
        String[] test_tokens = tf.tokenizer(test.toCharArray(), 0, test.length()).tokenize();
        if (test_tokens.length != 1 || !test_tokens[0].equals(documentBoundaryToken)) {
            String msg = "Tokenizer factory must convert boundary token to self. Found documentBoundaryToken=|" + documentBoundaryToken + "|" + " tokenizerFactory=" + tf + " result of tokenizing boundary token=|" + Arrays.asList(test_tokens) + "|";
            throw new IllegalArgumentException(msg);
        }
        this.mDocIds = idToDocMap.keySet().toArray(Strings.EMPTY_STRING_ARRAY);
        Arrays.sort(this.mDocIds);
        ArrayList<Integer> docStarts = new ArrayList<Integer>(idToDocMap.size());
        this.mDocIdToIndex = new HashMap<String, Integer>(idToDocMap.size());
        Set<Map.Entry<String, String>> entrySet = idToDocMap.entrySet();
        int token_pos = 0;
        int total_chars = 0;
        int count = 0;
        for (String id : this.mDocIds) {
            String text = idToDocMap.get(id);
            this.mDocIdToIndex.put(id, count++);
            docStarts.add(token_pos);
            token_pos += DocumentTokenSuffixArray.tokenCount(tf, text) + 1;
            total_chars += text.length() + documentBoundaryToken.length() + 2;
        }
        this.mDocStarts = new int[docStarts.size()];
        for (int i = 0; i < this.mDocStarts.length; ++i) {
            this.mDocStarts[i] = (Integer)docStarts.get(i);
        }
        char[] cs = new char[total_chars];
        int char_pos = 0;
        for (String id : this.mDocIds) {
            int i;
            String text = idToDocMap.get(id);
            for (i = 0; i < text.length(); ++i) {
                cs[char_pos++] = text.charAt(i);
            }
            cs[char_pos++] = 32;
            for (i = 0; i < documentBoundaryToken.length(); ++i) {
                cs[char_pos++] = documentBoundaryToken.charAt(i);
            }
            cs[char_pos++] = 32;
        }
        Tokenization tokenization = new Tokenization(cs, 0, cs.length, tf);
        this.mTsa = new TokenSuffixArray(tokenization, maxSuffixLength, documentBoundaryToken);
    }

    public TokenSuffixArray suffixArray() {
        return this.mTsa;
    }

    public String textPositionToDocId(int textPosition) {
        if (textPosition < 0 || textPosition > this.mTsa.tokenization().text().length()) {
            String msg = "Position must be >= 0 and <= text.length=" + this.mTsa.tokenization().text().length() + " Found textPosition=" + textPosition;
            throw new IndexOutOfBoundsException(msg);
        }
        return this.mDocIds[DocumentTokenSuffixArray.largestWithoutGoingOver(this.mDocStarts, textPosition)];
    }

    public String documentText(String docName) {
        String boundaryToken = this.mTsa.documentBoundaryToken();
        String text = this.mTsa.tokenization().text();
        int idx = this.mDocIdToIndex.get(docName);
        int start = this.mDocStarts[idx];
        int boundaryEnd = idx + 1 == this.mDocStarts.length ? text.length() : this.mDocStarts[idx + 1];
        int end = boundaryEnd - boundaryToken.length() - 2;
        return text.substring(start, end);
    }

    public int numDocuments() {
        return this.mDocStarts.length;
    }

    public Set<String> documentNames() {
        return Collections.unmodifiableSet(this.mDocIdToIndex.keySet());
    }

    public int docStartToken(String docId) {
        int idx = Arrays.binarySearch(this.mDocIds, docId);
        return idx < 0 ? -1 : this.mDocStarts[idx];
    }

    public int docEndToken(String docId) {
        int idx = Arrays.binarySearch(this.mDocIds, docId);
        if (idx < 0) {
            return -1;
        }
        int next_idx = idx + 1;
        if (next_idx == this.mDocIds.length) {
            return Math.max(1, this.mTsa.suffixArrayLength() - 1);
        }
        return Math.max(1, this.mDocStarts[idx + 1] - 1);
    }

    public static int largestWithoutGoingOver(int[] vals, int val) {
        int start = 0;
        int end = vals.length;
        if (vals.length == 0) {
            return -1;
        }
        if (val < vals[start]) {
            return -1;
        }
        if (val >= vals[end - 1]) {
            return end - 1;
        }
        while (start + 1 < end) {
            int mid = (start + end) / 2;
            if (val < vals[mid]) {
                end = mid;
                continue;
            }
            if (val > vals[mid]) {
                start = mid;
                continue;
            }
            return mid;
        }
        return start;
    }

    static int tokenCount(TokenizerFactory tf, String text) {
        int count = 0;
        for (String token : tf.tokenizer(text.toCharArray(), 0, text.length())) {
            ++count;
        }
        return count;
    }
}

