package com.aliasi.chunk;

import com.aliasi.corpus.ObjectHandler;
import com.aliasi.corpus.TagHandler;
import com.aliasi.tokenizer.TokenCategorizer;
import com.aliasi.tokenizer.TokenizerFactory;
import com.aliasi.util.AbstractExternalizable;
import com.aliasi.util.Compilable;
import com.aliasi.util.ObjectToCounterMap;
import com.aliasi.util.Strings;
import java.io.IOException;
import java.io.ObjectInput;
import java.io.ObjectOutput;
import java.util.ArrayList;
import java.util.List;

/* loaded from: input_file:com/aliasi/chunk/TrainTokenShapeChunker.class */
public class TrainTokenShapeChunker implements TagHandler, ObjectHandler<Chunking>, Compilable {
    private final int mKnownMinTokenCount;
    private final int mMinTokenCount;
    private final int mMinTagCount;
    private final TokenCategorizer mTokenCategorizer;
    private final TokenizerFactory mTokenizerFactory;
    private final TrainableEstimator mTrainableEstimator;
    private final List<String> mTokenList;
    private final List<String> mTagList;

    /* loaded from: input_file:com/aliasi/chunk/TrainTokenShapeChunker$Externalizer.class */
    static class Externalizer extends AbstractExternalizable {
        private static final long serialVersionUID = 142720610674437597L;
        final TrainTokenShapeChunker mChunker;

        public Externalizer() {
            this(null);
        }

        public Externalizer(TrainTokenShapeChunker trainTokenShapeChunker) {
            this.mChunker = trainTokenShapeChunker;
        }

        @Override // com.aliasi.util.AbstractExternalizable
        public Object read(ObjectInput objectInput) throws ClassNotFoundException, IOException {
            return new TokenShapeChunker((TokenizerFactory) objectInput.readObject(), new TokenShapeDecoder((CompiledEstimator) objectInput.readObject(), (TokenCategorizer) objectInput.readObject(), 1000.0d));
        }

        @Override // com.aliasi.util.AbstractExternalizable, java.io.Externalizable
        public void writeExternal(ObjectOutput objectOutput) throws IOException {
            this.mChunker.mTagList.size();
            String[] strArr = (String[]) this.mChunker.mTokenList.toArray(Strings.EMPTY_STRING_ARRAY);
            String[] strArr2 = (String[]) this.mChunker.mTagList.toArray(Strings.EMPTY_STRING_ARRAY);
            this.mChunker.mTrainableEstimator.handle(strArr, strArr2);
            this.mChunker.replaceUnknownsWithCategories(strArr);
            this.mChunker.mTrainableEstimator.handle(strArr, strArr2);
            this.mChunker.mTrainableEstimator.prune(this.mChunker.mMinTagCount, this.mChunker.mMinTokenCount);
            this.mChunker.mTrainableEstimator.smoothTags(1);
            AbstractExternalizable.compileOrSerialize(this.mChunker.mTokenizerFactory, objectOutput);
            AbstractExternalizable.compileOrSerialize(this.mChunker.mTokenCategorizer, objectOutput);
            this.mChunker.mTrainableEstimator.compileTo(objectOutput);
        }
    }

    public TrainTokenShapeChunker(TokenCategorizer tokenCategorizer, TokenizerFactory tokenizerFactory) {
        this(tokenCategorizer, tokenizerFactory, 8, 1, 1);
    }

    public TrainTokenShapeChunker(TokenCategorizer tokenCategorizer, TokenizerFactory tokenizerFactory, int i, int i2, int i3) {
        this.mTokenList = new ArrayList();
        this.mTagList = new ArrayList();
        this.mTokenCategorizer = tokenCategorizer;
        this.mTokenizerFactory = tokenizerFactory;
        this.mKnownMinTokenCount = i;
        this.mMinTokenCount = i2;
        this.mMinTagCount = i3;
        this.mTrainableEstimator = new TrainableEstimator(tokenCategorizer);
    }

    @Override // com.aliasi.corpus.TagHandler
    @Deprecated
    public void handle(String[] strArr, String[] strArr2, String[] strArr3) {
        if (strArr.length != strArr3.length) {
            throw new IllegalArgumentException("Tokens and tags must be same length. Found tokens.length=" + strArr.length + " tags.length=" + strArr3.length);
        }
        for (int i = 0; i < strArr.length; i++) {
            if (strArr[i] == null || strArr3[i] == null) {
                throw new NullPointerException("Tags and tokens must not be null. Found tokens[" + i + "]=" + strArr[i] + " tags[" + i + "]=" + strArr3[i]);
            }
            this.mTokenList.add(strArr[i]);
            this.mTagList.add(strArr3[i]);
        }
    }

    @Override // com.aliasi.corpus.ObjectHandler
    public void handle(Chunking chunking) {
        new ChunkHandlerAdapter2(this, this.mTokenizerFactory, false).handle((ChunkHandlerAdapter2) chunking);
    }

    @Override // com.aliasi.util.Compilable
    public void compileTo(ObjectOutput objectOutput) throws IOException {
        objectOutput.writeObject(new Externalizer(this));
    }

    void replaceUnknownsWithCategories(String[] strArr) {
        ObjectToCounterMap objectToCounterMap = new ObjectToCounterMap();
        for (String str : strArr) {
            objectToCounterMap.increment(str);
        }
        for (int i = 0; i < strArr.length; i++) {
            if (objectToCounterMap.getCount(strArr[i]) < this.mKnownMinTokenCount) {
                strArr[i] = this.mTokenCategorizer.categorize(strArr[i]);
            }
        }
    }
}
