/*
 * Decompiled with CFR 0.152.
 */
package org.apache.tika.parser.txt;

import java.io.IOException;
import java.io.InputStream;
import java.io.Reader;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import org.apache.tika.parser.txt.CharsetMatch;
import org.apache.tika.parser.txt.CharsetRecog_2022;
import org.apache.tika.parser.txt.CharsetRecog_UTF8;
import org.apache.tika.parser.txt.CharsetRecog_Unicode;
import org.apache.tika.parser.txt.CharsetRecog_mbcs;
import org.apache.tika.parser.txt.CharsetRecog_sbcs;
import org.apache.tika.parser.txt.CharsetRecognizer;

public class CharsetDetector {
    private static final int kBufSize = 12000;
    private static final int MAX_CONFIDENCE = 100;
    private static final List<CSRecognizerInfo> ALL_CS_RECOGNIZERS;
    byte[] fInputBytes = new byte[12000];
    int fInputLen;
    short[] fByteStats = new short[256];
    boolean fC1Bytes = false;
    String fDeclaredEncoding;
    byte[] fRawInput;
    int fRawLength;
    InputStream fInputStream;
    private boolean fStripTags = false;
    private boolean[] fEnabledRecognizers;

    public static String[] getAllDetectableCharsets() {
        String[] allCharsetNames = new String[ALL_CS_RECOGNIZERS.size()];
        for (int i = 0; i < allCharsetNames.length; ++i) {
            allCharsetNames[i] = CharsetDetector.ALL_CS_RECOGNIZERS.get((int)i).recognizer.getName();
        }
        return allCharsetNames;
    }

    public CharsetDetector setDeclaredEncoding(String encoding) {
        this.setCanonicalDeclaredEncoding(encoding);
        return this;
    }

    public CharsetDetector setText(byte[] in) {
        this.fRawInput = in;
        this.fRawLength = in.length;
        return this;
    }

    public CharsetDetector setText(InputStream in) throws IOException {
        int bytesRead;
        this.fInputStream = in;
        this.fInputStream.mark(12000);
        this.fRawInput = new byte[12000];
        this.fRawLength = 0;
        for (int remainingLength = 12000; remainingLength > 0 && (bytesRead = this.fInputStream.read(this.fRawInput, this.fRawLength, remainingLength)) > 0; remainingLength -= bytesRead) {
            this.fRawLength += bytesRead;
        }
        this.fInputStream.reset();
        this.MungeInput();
        return this;
    }

    public CharsetMatch detect() {
        CharsetMatch[] matches = this.detectAll();
        if (matches == null || matches.length == 0) {
            return null;
        }
        return matches[0];
    }

    public CharsetMatch[] detectAll() {
        ArrayList<CharsetMatch> matches = new ArrayList<CharsetMatch>();
        for (int i = 0; i < ALL_CS_RECOGNIZERS.size(); ++i) {
            int confidence;
            CharsetRecognizer csr = CharsetDetector.ALL_CS_RECOGNIZERS.get((int)i).recognizer;
            CharsetMatch charsetMatch = csr.match(this);
            if (charsetMatch == null || (confidence = charsetMatch.getConfidence() & 0xFF) <= 0) continue;
            confidence = Math.min(confidence, 100);
            if (this.fDeclaredEncoding != null && this.fDeclaredEncoding.equalsIgnoreCase(csr.getName())) {
                confidence += (100 - confidence) / 2;
            }
            CharsetMatch m = new CharsetMatch(this, csr, confidence, charsetMatch.getName(), charsetMatch.getLanguage());
            matches.add(m);
        }
        Collections.sort(matches);
        Collections.reverse(matches);
        CharsetMatch[] resultArray = new CharsetMatch[matches.size()];
        resultArray = matches.toArray(resultArray);
        return resultArray;
    }

    public Reader getReader(InputStream in, String declaredEncoding) {
        this.fDeclaredEncoding = declaredEncoding;
        try {
            this.setText(in);
            CharsetMatch match = this.detect();
            if (match == null) {
                return null;
            }
            return match.getReader();
        }
        catch (IOException e) {
            return null;
        }
    }

    public String getString(byte[] in, String declaredEncoding) {
        this.fDeclaredEncoding = declaredEncoding;
        try {
            this.setText(in);
            CharsetMatch match = this.detect();
            if (match == null) {
                return null;
            }
            return match.getString(-1);
        }
        catch (IOException e) {
            return null;
        }
    }

    public boolean inputFilterEnabled() {
        return this.fStripTags;
    }

    public boolean enableInputFilter(boolean filter) {
        boolean previous = this.fStripTags;
        this.fStripTags = filter;
        return previous;
    }

    private void setCanonicalDeclaredEncoding(String encoding) {
        if (encoding == null || encoding.isEmpty()) {
            return;
        }
        Charset cs = Charset.forName(encoding);
        if (cs != null) {
            this.fDeclaredEncoding = cs.name();
        }
    }

    private void MungeInput() {
        int srci = 0;
        int dsti = 0;
        boolean inMarkup = false;
        int openTags = 0;
        int badTags = 0;
        if (this.fStripTags) {
            for (srci = 0; srci < this.fRawLength && dsti < this.fInputBytes.length; ++srci) {
                byte b = this.fRawInput[srci];
                if (b == 60) {
                    if (inMarkup) {
                        ++badTags;
                    }
                    inMarkup = true;
                    ++openTags;
                }
                if (!inMarkup) {
                    this.fInputBytes[dsti++] = b;
                }
                if (b != 62) continue;
                inMarkup = false;
            }
            this.fInputLen = dsti;
        }
        if (openTags < 5 || openTags / 5 < badTags || this.fInputLen < 100 && this.fRawLength > 600) {
            int limit = this.fRawLength;
            if (limit > 12000) {
                limit = 12000;
            }
            for (srci = 0; srci < limit; ++srci) {
                this.fInputBytes[srci] = this.fRawInput[srci];
            }
            this.fInputLen = srci;
        }
        Arrays.fill(this.fByteStats, (short)0);
        for (srci = 0; srci < this.fInputLen; ++srci) {
            int val;
            int n = val = this.fInputBytes[srci] & 0xFF;
            this.fByteStats[n] = (short)(this.fByteStats[n] + 1);
        }
        this.fC1Bytes = false;
        for (int i = 128; i <= 159; ++i) {
            if (this.fByteStats[i] == 0) continue;
            this.fC1Bytes = true;
            break;
        }
    }

    @Deprecated
    public String[] getDetectableCharsets() {
        ArrayList<String> csnames = new ArrayList<String>(ALL_CS_RECOGNIZERS.size());
        for (int i = 0; i < ALL_CS_RECOGNIZERS.size(); ++i) {
            boolean active;
            CSRecognizerInfo rcinfo = ALL_CS_RECOGNIZERS.get(i);
            boolean bl = active = this.fEnabledRecognizers == null ? rcinfo.isDefaultEnabled : this.fEnabledRecognizers[i];
            if (!active) continue;
            csnames.add(rcinfo.recognizer.getName());
        }
        return csnames.toArray(new String[csnames.size()]);
    }

    @Deprecated
    public CharsetDetector setDetectableCharset(String encoding, boolean enabled) {
        int i;
        int modIdx = -1;
        boolean isDefaultVal = false;
        for (i = 0; i < ALL_CS_RECOGNIZERS.size(); ++i) {
            CSRecognizerInfo csrinfo = ALL_CS_RECOGNIZERS.get(i);
            if (!csrinfo.recognizer.getName().equals(encoding)) continue;
            modIdx = i;
            isDefaultVal = csrinfo.isDefaultEnabled == enabled;
            break;
        }
        if (modIdx < 0) {
            throw new IllegalArgumentException("Invalid encoding: \"" + encoding + "\"");
        }
        if (this.fEnabledRecognizers == null && !isDefaultVal) {
            this.fEnabledRecognizers = new boolean[ALL_CS_RECOGNIZERS.size()];
            for (i = 0; i < ALL_CS_RECOGNIZERS.size(); ++i) {
                this.fEnabledRecognizers[i] = CharsetDetector.ALL_CS_RECOGNIZERS.get((int)i).isDefaultEnabled;
            }
        }
        if (this.fEnabledRecognizers != null) {
            this.fEnabledRecognizers[modIdx] = enabled;
        }
        return this;
    }

    static {
        ArrayList<CSRecognizerInfo> list = new ArrayList<CSRecognizerInfo>();
        list.add(new CSRecognizerInfo(new CharsetRecog_UTF8(), true));
        list.add(new CSRecognizerInfo(new CharsetRecog_Unicode.CharsetRecog_UTF_16_BE(), true));
        list.add(new CSRecognizerInfo(new CharsetRecog_Unicode.CharsetRecog_UTF_16_LE(), true));
        list.add(new CSRecognizerInfo(new CharsetRecog_Unicode.CharsetRecog_UTF_32_BE(), true));
        list.add(new CSRecognizerInfo(new CharsetRecog_Unicode.CharsetRecog_UTF_32_LE(), true));
        list.add(new CSRecognizerInfo(new CharsetRecog_mbcs.CharsetRecog_sjis(), true));
        list.add(new CSRecognizerInfo(new CharsetRecog_2022.CharsetRecog_2022JP(), true));
        list.add(new CSRecognizerInfo(new CharsetRecog_2022.CharsetRecog_2022CN(), true));
        list.add(new CSRecognizerInfo(new CharsetRecog_2022.CharsetRecog_2022KR(), true));
        list.add(new CSRecognizerInfo(new CharsetRecog_mbcs.CharsetRecog_gb_18030(), true));
        list.add(new CSRecognizerInfo(new CharsetRecog_mbcs.CharsetRecog_euc.CharsetRecog_euc_jp(), true));
        list.add(new CSRecognizerInfo(new CharsetRecog_mbcs.CharsetRecog_euc.CharsetRecog_euc_kr(), true));
        list.add(new CSRecognizerInfo(new CharsetRecog_mbcs.CharsetRecog_big5(), true));
        list.add(new CSRecognizerInfo(new CharsetRecog_sbcs.CharsetRecog_8859_1(), true));
        list.add(new CSRecognizerInfo(new CharsetRecog_sbcs.CharsetRecog_8859_2(), true));
        list.add(new CSRecognizerInfo(new CharsetRecog_sbcs.CharsetRecog_8859_5_ru(), true));
        list.add(new CSRecognizerInfo(new CharsetRecog_sbcs.CharsetRecog_8859_6_ar(), true));
        list.add(new CSRecognizerInfo(new CharsetRecog_sbcs.CharsetRecog_8859_7_el(), true));
        list.add(new CSRecognizerInfo(new CharsetRecog_sbcs.CharsetRecog_8859_8_I_he(), true));
        list.add(new CSRecognizerInfo(new CharsetRecog_sbcs.CharsetRecog_8859_8_he(), true));
        list.add(new CSRecognizerInfo(new CharsetRecog_sbcs.CharsetRecog_windows_1251(), true));
        list.add(new CSRecognizerInfo(new CharsetRecog_sbcs.CharsetRecog_windows_1256(), true));
        list.add(new CSRecognizerInfo(new CharsetRecog_sbcs.CharsetRecog_KOI8_R(), true));
        list.add(new CSRecognizerInfo(new CharsetRecog_sbcs.CharsetRecog_8859_9_tr(), true));
        list.add(new CSRecognizerInfo(new CharsetRecog_sbcs.CharsetRecog_EBCDIC_500_de(), true));
        list.add(new CSRecognizerInfo(new CharsetRecog_sbcs.CharsetRecog_EBCDIC_500_en(), true));
        list.add(new CSRecognizerInfo(new CharsetRecog_sbcs.CharsetRecog_EBCDIC_500_es(), true));
        list.add(new CSRecognizerInfo(new CharsetRecog_sbcs.CharsetRecog_EBCDIC_500_fr(), true));
        list.add(new CSRecognizerInfo(new CharsetRecog_sbcs.CharsetRecog_EBCDIC_500_it(), true));
        list.add(new CSRecognizerInfo(new CharsetRecog_sbcs.CharsetRecog_EBCDIC_500_nl(), true));
        list.add(new CSRecognizerInfo(new CharsetRecog_sbcs.CharsetRecog_IBM424_he_rtl(), true));
        list.add(new CSRecognizerInfo(new CharsetRecog_sbcs.CharsetRecog_IBM424_he_ltr(), true));
        list.add(new CSRecognizerInfo(new CharsetRecog_sbcs.CharsetRecog_IBM420_ar_rtl(), true));
        list.add(new CSRecognizerInfo(new CharsetRecog_sbcs.CharsetRecog_IBM420_ar_ltr(), true));
        list.add(new CSRecognizerInfo(new CharsetRecog_sbcs.CharsetRecog_IBM866_ru(), true));
        ALL_CS_RECOGNIZERS = Collections.unmodifiableList(list);
    }

    private static class CSRecognizerInfo {
        CharsetRecognizer recognizer;
        boolean isDefaultEnabled;

        CSRecognizerInfo(CharsetRecognizer recognizer, boolean isDefaultEnabled) {
            this.recognizer = recognizer;
            this.isDefaultEnabled = isDefaultEnabled;
        }
    }
}

