/*
 * Decompiled with CFR 0.152.
 */
package com.aliasi.chunk;

import com.aliasi.chunk.AbstractCharLmRescoringChunker;
import com.aliasi.chunk.CharLmHmmChunker;
import com.aliasi.chunk.Chunk;
import com.aliasi.chunk.Chunking;
import com.aliasi.chunk.NBestChunker;
import com.aliasi.corpus.ObjectHandler;
import com.aliasi.hmm.HmmCharLmEstimator;
import com.aliasi.lm.LanguageModel;
import com.aliasi.lm.NGramBoundaryLM;
import com.aliasi.lm.NGramProcessLM;
import com.aliasi.tokenizer.TokenizerFactory;
import com.aliasi.util.AbstractExternalizable;
import com.aliasi.util.Compilable;
import com.aliasi.util.Strings;
import java.io.IOException;
import java.io.ObjectInput;
import java.io.ObjectOutput;
import java.util.HashMap;

public class CharLmRescoringChunker
extends AbstractCharLmRescoringChunker<CharLmHmmChunker, NGramProcessLM, NGramBoundaryLM>
implements ObjectHandler<Chunking>,
Compilable {
    final int mNGram;
    final int mNumChars;
    final double mInterpolationRatio;
    char mNextCodeChar = (char)65532;

    public CharLmRescoringChunker(TokenizerFactory tokenizerFactory, int numChunkingsRescored, int nGram, int numChars, double interpolationRatio) {
        super(new CharLmHmmChunker(tokenizerFactory, new HmmCharLmEstimator(nGram, numChars, interpolationRatio)), numChunkingsRescored, new NGramProcessLM(nGram, numChars, interpolationRatio), new HashMap<String, Character>(), new HashMap());
        this.mNGram = nGram;
        this.mNumChars = numChars;
        this.mInterpolationRatio = interpolationRatio;
    }

    public CharLmRescoringChunker(TokenizerFactory tokenizerFactory, int numChunkingsRescored, int nGram, int numChars, double interpolationRatio, boolean smoothTags) {
        super(new CharLmHmmChunker(tokenizerFactory, new HmmCharLmEstimator(nGram, numChars, interpolationRatio), smoothTags), numChunkingsRescored, new NGramProcessLM(nGram, numChars, interpolationRatio), new HashMap<String, Character>(), new HashMap());
        this.mNGram = nGram;
        this.mNumChars = numChars;
        this.mInterpolationRatio = interpolationRatio;
    }

    @Override
    public void handle(Chunking chunking) {
        ObjectHandler handler2 = (ObjectHandler)this.baseChunker();
        handler2.handle(chunking);
        String text = ((Object)chunking.charSequence()).toString();
        char prevTagChar = '\ufffe';
        int pos = 0;
        for (Chunk chunk : CharLmRescoringChunker.orderedSet(chunking)) {
            int start = chunk.start();
            int end = chunk.end();
            if (pos > start) {
                String msg = "Chunk overlap for chunk=" + chunk + " in chunking=" + chunking;
                throw new IllegalArgumentException(msg);
            }
            String chunkType = chunk.type();
            this.createTypeIfNecessary(chunkType);
            char tagChar = this.typeToChar(chunkType);
            this.trainOutLM(text.substring(pos, start), prevTagChar, tagChar);
            this.trainTypeLM(chunkType, text.substring(start, end));
            pos = end;
            prevTagChar = tagChar;
        }
        this.trainOutLM(text.substring(pos), prevTagChar, '\ufffd');
    }

    @Override
    public void compileTo(ObjectOutput objOut) throws IOException {
        objOut.writeObject(new Externalizer(this));
    }

    public void trainDictionary(CharSequence cSeq, String type) {
        ((CharLmHmmChunker)this.baseChunker()).trainDictionary(cSeq, type);
        this.trainTypeLM(type, cSeq);
    }

    public void trainOut(CharSequence cSeq) {
        ((NGramProcessLM)this.outLM()).train(cSeq);
    }

    void createTypeIfNecessary(String chunkType) {
        if (this.mTypeToChar.containsKey(chunkType)) {
            return;
        }
        char c = this.mNextCodeChar;
        this.mNextCodeChar = (char)(c - '\u0001');
        Character c2 = Character.valueOf(c);
        this.mTypeToChar.put(chunkType, c2);
        NGramBoundaryLM lm = new NGramBoundaryLM(this.mNGram, this.mNumChars, this.mInterpolationRatio, '\uffff');
        this.mTypeToLM.put(chunkType, lm);
    }

    void trainTypeLM(String type, CharSequence text) {
        this.createTypeIfNecessary(type);
        NGramBoundaryLM lm = (NGramBoundaryLM)this.mTypeToLM.get(type);
        lm.train(text);
    }

    void trainOutLM(String text, char prevTagChar, char nextTagChar) {
        String trainSeq = prevTagChar + text + nextTagChar;
        ((NGramProcessLM)this.outLM()).train(trainSeq);
        ((NGramProcessLM)this.outLM()).substringCounter().decrementUnigram(prevTagChar);
    }

    static class Externalizer
    extends AbstractExternalizable {
        private static final long serialVersionUID = 3555143657918695241L;
        final CharLmRescoringChunker mChunker;

        public Externalizer() {
            this(null);
        }

        public Externalizer(CharLmRescoringChunker chunker) {
            this.mChunker = chunker;
        }

        @Override
        public void writeExternal(ObjectOutput objOut) throws IOException {
            ((CharLmHmmChunker)this.mChunker.baseChunker()).compileTo(objOut);
            objOut.writeInt(this.mChunker.numChunkingsRescored());
            String[] types = this.mChunker.mTypeToLM.keySet().toArray(Strings.EMPTY_STRING_ARRAY);
            objOut.writeInt(types.length);
            for (int i = 0; i < types.length; ++i) {
                objOut.writeUTF(types[i]);
                objOut.writeChar(this.mChunker.typeToChar(types[i]));
                NGramBoundaryLM lm = (NGramBoundaryLM)this.mChunker.mTypeToLM.get(types[i]);
                lm.compileTo(objOut);
            }
            ((NGramProcessLM)this.mChunker.outLM()).compileTo(objOut);
        }

        @Override
        public Object read(ObjectInput in) throws ClassNotFoundException, IOException {
            NBestChunker baseChunker = (NBestChunker)in.readObject();
            int numChunkingsRescored = in.readInt();
            int numTypes = in.readInt();
            HashMap<String, Character> typeToChar = new HashMap<String, Character>();
            HashMap<String, LanguageModel.Sequence> typeToLM = new HashMap<String, LanguageModel.Sequence>();
            for (int i = 0; i < numTypes; ++i) {
                String type = in.readUTF();
                char c = in.readChar();
                LanguageModel.Sequence lm = (LanguageModel.Sequence)in.readObject();
                typeToChar.put(type, Character.valueOf(c));
                typeToLM.put(type, lm);
            }
            LanguageModel.Process outLM = (LanguageModel.Process)in.readObject();
            return new AbstractCharLmRescoringChunker(baseChunker, numChunkingsRescored, outLM, typeToChar, typeToLM);
        }
    }
}

