package com.hankcs.hanlp.mining.word2vec;

import com.hankcs.hanlp.utility.Predefine;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.io.Writer;
import java.nio.charset.Charset;
import java.util.Comparator;
import okhttp3.internal.ws.WebSocketProtocol;

/* loaded from: classes2.dex */
class Word2VecTraining {
    static final int EXP_TABLE_SIZE = 1000;
    static final int MAX_EXP = 6;
    static final int MAX_SENTENCE_LENGTH = 1000;
    static final int TABLE_SIZE = 100000000;
    static double[] syn0;
    static double[] syn1;
    static double[] syn1neg;
    private final Config config;
    int[] table;
    int threadCount;
    long timeStart;
    static final Charset ENCODING = Charset.forName("UTF-8");
    static final double[] expTable = new double[1001];

    /* loaded from: classes2.dex */
    static class TrainModelThread extends Thread {
        static int wordCountActual;
        float alpha;
        final Config config;
        final Corpus corpus;
        final int id;
        final float startingAlpha;
        final int[] table;
        final long timeStart;
        final float trainWords;
        final Word2VecTraining vec;
        final VocabWord[] vocab;
        final int vocabSize;

        public TrainModelThread(Word2VecTraining word2VecTraining, Corpus corpus, Config config, int i) {
            this.vec = word2VecTraining;
            this.corpus = corpus;
            this.config = config;
            float alpha = config.getAlpha();
            this.alpha = alpha;
            this.startingAlpha = alpha;
            this.id = i;
            this.table = word2VecTraining.table;
            this.trainWords = corpus.getTrainWords();
            this.timeStart = word2VecTraining.timeStart;
            this.vocabSize = corpus.getVocabSize();
            this.vocab = corpus.getVocab();
        }

        /* JADX WARN: Removed duplicated region for block: B:130:0x0359 A[Catch: IOException -> 0x0607, LOOP:13: B:129:0x0357->B:130:0x0359, LOOP_END, TryCatch #0 {IOException -> 0x0607, blocks: (B:3:0x0035, B:4:0x0053, B:6:0x005d, B:8:0x0086, B:10:0x00d4, B:11:0x00e0, B:12:0x00f8, B:14:0x0115, B:19:0x012c, B:38:0x0196, B:277:0x05b8, B:282:0x05cd, B:279:0x05e7, B:40:0x01aa, B:50:0x01c5, B:54:0x01ce, B:56:0x01d3, B:59:0x01ea, B:62:0x01f8, B:67:0x0201, B:72:0x0229, B:75:0x020d, B:77:0x021e, B:85:0x023e, B:90:0x0252, B:92:0x025a, B:94:0x0269, B:100:0x02cb, B:104:0x0284, B:106:0x02a5, B:110:0x02ba, B:116:0x02d8, B:120:0x0316, B:122:0x031d, B:126:0x0330, B:127:0x0335, B:130:0x0359, B:134:0x036e, B:138:0x0381, B:141:0x033d, B:142:0x0345, B:143:0x02e7, B:145:0x02ff, B:156:0x039b, B:163:0x03c3, B:164:0x03a6, B:170:0x03b0, B:176:0x0591, B:187:0x03e4, B:190:0x03ed, B:195:0x03f8, B:198:0x03ff, B:200:0x0404, B:204:0x040d, B:206:0x0415, B:208:0x0426, B:214:0x0498, B:218:0x0449, B:220:0x046e, B:224:0x0483, B:230:0x04b2, B:234:0x04e7, B:236:0x04ee, B:240:0x0505, B:241:0x050a, B:244:0x0530, B:248:0x0545, B:252:0x055c, B:255:0x0513, B:256:0x051b, B:257:0x04be, B:259:0x04d3, B:267:0x0566, B:269:0x057f, B:23:0x0138, B:27:0x0143, B:33:0x0176, B:305:0x00e6), top: B:2:0x0035 }] */
        /* JADX WARN: Removed duplicated region for block: B:134:0x036e A[Catch: IOException -> 0x0607, LOOP:14: B:133:0x036c->B:134:0x036e, LOOP_END, TryCatch #0 {IOException -> 0x0607, blocks: (B:3:0x0035, B:4:0x0053, B:6:0x005d, B:8:0x0086, B:10:0x00d4, B:11:0x00e0, B:12:0x00f8, B:14:0x0115, B:19:0x012c, B:38:0x0196, B:277:0x05b8, B:282:0x05cd, B:279:0x05e7, B:40:0x01aa, B:50:0x01c5, B:54:0x01ce, B:56:0x01d3, B:59:0x01ea, B:62:0x01f8, B:67:0x0201, B:72:0x0229, B:75:0x020d, B:77:0x021e, B:85:0x023e, B:90:0x0252, B:92:0x025a, B:94:0x0269, B:100:0x02cb, B:104:0x0284, B:106:0x02a5, B:110:0x02ba, B:116:0x02d8, B:120:0x0316, B:122:0x031d, B:126:0x0330, B:127:0x0335, B:130:0x0359, B:134:0x036e, B:138:0x0381, B:141:0x033d, B:142:0x0345, B:143:0x02e7, B:145:0x02ff, B:156:0x039b, B:163:0x03c3, B:164:0x03a6, B:170:0x03b0, B:176:0x0591, B:187:0x03e4, B:190:0x03ed, B:195:0x03f8, B:198:0x03ff, B:200:0x0404, B:204:0x040d, B:206:0x0415, B:208:0x0426, B:214:0x0498, B:218:0x0449, B:220:0x046e, B:224:0x0483, B:230:0x04b2, B:234:0x04e7, B:236:0x04ee, B:240:0x0505, B:241:0x050a, B:244:0x0530, B:248:0x0545, B:252:0x055c, B:255:0x0513, B:256:0x051b, B:257:0x04be, B:259:0x04d3, B:267:0x0566, B:269:0x057f, B:23:0x0138, B:27:0x0143, B:33:0x0176, B:305:0x00e6), top: B:2:0x0035 }] */
        /* JADX WARN: Removed duplicated region for block: B:178:0x0599  */
        /* JADX WARN: Removed duplicated region for block: B:182:0x059d  */
        /* JADX WARN: Removed duplicated region for block: B:244:0x0530 A[Catch: IOException -> 0x0607, LOOP:25: B:243:0x052e->B:244:0x0530, LOOP_END, TryCatch #0 {IOException -> 0x0607, blocks: (B:3:0x0035, B:4:0x0053, B:6:0x005d, B:8:0x0086, B:10:0x00d4, B:11:0x00e0, B:12:0x00f8, B:14:0x0115, B:19:0x012c, B:38:0x0196, B:277:0x05b8, B:282:0x05cd, B:279:0x05e7, B:40:0x01aa, B:50:0x01c5, B:54:0x01ce, B:56:0x01d3, B:59:0x01ea, B:62:0x01f8, B:67:0x0201, B:72:0x0229, B:75:0x020d, B:77:0x021e, B:85:0x023e, B:90:0x0252, B:92:0x025a, B:94:0x0269, B:100:0x02cb, B:104:0x0284, B:106:0x02a5, B:110:0x02ba, B:116:0x02d8, B:120:0x0316, B:122:0x031d, B:126:0x0330, B:127:0x0335, B:130:0x0359, B:134:0x036e, B:138:0x0381, B:141:0x033d, B:142:0x0345, B:143:0x02e7, B:145:0x02ff, B:156:0x039b, B:163:0x03c3, B:164:0x03a6, B:170:0x03b0, B:176:0x0591, B:187:0x03e4, B:190:0x03ed, B:195:0x03f8, B:198:0x03ff, B:200:0x0404, B:204:0x040d, B:206:0x0415, B:208:0x0426, B:214:0x0498, B:218:0x0449, B:220:0x046e, B:224:0x0483, B:230:0x04b2, B:234:0x04e7, B:236:0x04ee, B:240:0x0505, B:241:0x050a, B:244:0x0530, B:248:0x0545, B:252:0x055c, B:255:0x0513, B:256:0x051b, B:257:0x04be, B:259:0x04d3, B:267:0x0566, B:269:0x057f, B:23:0x0138, B:27:0x0143, B:33:0x0176, B:305:0x00e6), top: B:2:0x0035 }] */
        /* JADX WARN: Removed duplicated region for block: B:248:0x0545 A[Catch: IOException -> 0x0607, LOOP:26: B:247:0x0543->B:248:0x0545, LOOP_END, TryCatch #0 {IOException -> 0x0607, blocks: (B:3:0x0035, B:4:0x0053, B:6:0x005d, B:8:0x0086, B:10:0x00d4, B:11:0x00e0, B:12:0x00f8, B:14:0x0115, B:19:0x012c, B:38:0x0196, B:277:0x05b8, B:282:0x05cd, B:279:0x05e7, B:40:0x01aa, B:50:0x01c5, B:54:0x01ce, B:56:0x01d3, B:59:0x01ea, B:62:0x01f8, B:67:0x0201, B:72:0x0229, B:75:0x020d, B:77:0x021e, B:85:0x023e, B:90:0x0252, B:92:0x025a, B:94:0x0269, B:100:0x02cb, B:104:0x0284, B:106:0x02a5, B:110:0x02ba, B:116:0x02d8, B:120:0x0316, B:122:0x031d, B:126:0x0330, B:127:0x0335, B:130:0x0359, B:134:0x036e, B:138:0x0381, B:141:0x033d, B:142:0x0345, B:143:0x02e7, B:145:0x02ff, B:156:0x039b, B:163:0x03c3, B:164:0x03a6, B:170:0x03b0, B:176:0x0591, B:187:0x03e4, B:190:0x03ed, B:195:0x03f8, B:198:0x03ff, B:200:0x0404, B:204:0x040d, B:206:0x0415, B:208:0x0426, B:214:0x0498, B:218:0x0449, B:220:0x046e, B:224:0x0483, B:230:0x04b2, B:234:0x04e7, B:236:0x04ee, B:240:0x0505, B:241:0x050a, B:244:0x0530, B:248:0x0545, B:252:0x055c, B:255:0x0513, B:256:0x051b, B:257:0x04be, B:259:0x04d3, B:267:0x0566, B:269:0x057f, B:23:0x0138, B:27:0x0143, B:33:0x0176, B:305:0x00e6), top: B:2:0x0035 }] */
        @Override // java.lang.Thread, java.lang.Runnable
        /*
            Code decompiled incorrectly, please refer to instructions dump.
            To view partially-correct add '--show-bad-code' argument
        */
        public void run() {
            /*
                Method dump skipped, instructions count: 1550
                To view this dump add '--comments-level debug' option
            */
            throw new UnsupportedOperationException("Method not decompiled: com.hankcs.hanlp.mining.word2vec.Word2VecTraining.TrainModelThread.run():void");
        }
    }

    /* loaded from: classes2.dex */
    static class VocabWordComparator implements Comparator<VocabWord> {
        VocabWordComparator() {
        }

        @Override // java.util.Comparator
        public int compare(VocabWord vocabWord, VocabWord vocabWord2) {
            return vocabWord2.cn - vocabWord.cn;
        }
    }

    static {
        for (int i = 0; i < 1000; i++) {
            double[] dArr = expTable;
            double exp = Math.exp((((i / 1000.0d) * 2.0d) - 1.0d) * 6.0d);
            dArr[i] = exp;
            dArr[i] = exp / (1.0d + exp);
        }
    }

    public Word2VecTraining(Config config) {
        this.config = config;
    }

    static long nextRandom(long j) {
        return (j * 25214903917L) + 11;
    }

    static double[] posixMemAlign128(int i) {
        return i % 128 > 0 ? new double[((i / 128) + 1) * 128] : new double[i];
    }

    public Config getConfig() {
        return this.config;
    }

    void initNet(Corpus corpus) {
        int layer1Size = this.config.getLayer1Size();
        int vocabSize = corpus.getVocabSize();
        int i = vocabSize * layer1Size;
        syn0 = posixMemAlign128(i);
        if (this.config.useHierarchicalSoftmax()) {
            syn1 = posixMemAlign128(i);
            for (int i2 = 0; i2 < vocabSize; i2++) {
                for (int i3 = 0; i3 < layer1Size; i3++) {
                    syn1[(i2 * layer1Size) + i3] = 0.0d;
                }
            }
        }
        if (this.config.getNegative() > 0) {
            syn1neg = posixMemAlign128(i);
            for (int i4 = 0; i4 < vocabSize; i4++) {
                for (int i5 = 0; i5 < layer1Size; i5++) {
                    syn1neg[(i4 * layer1Size) + i5] = 0.0d;
                }
            }
        }
        long j = 1;
        for (int i6 = 0; i6 < vocabSize; i6++) {
            for (int i7 = 0; i7 < layer1Size; i7++) {
                j = nextRandom(j);
                syn0[(i6 * layer1Size) + i7] = (((WebSocketProtocol.PAYLOAD_SHORT_MAX & j) / 65536.0d) - 0.5d) / layer1Size;
            }
        }
        corpus.createBinaryTree();
    }

    void initUnigramTable(Corpus corpus) {
        int vocabSize = corpus.getVocabSize();
        VocabWord[] vocab = corpus.getVocab();
        this.table = new int[TABLE_SIZE];
        long j = 0;
        for (int i = 0; i < vocabSize; i++) {
            j = (long) (j + Math.pow(vocab[i].cn, 0.75d));
        }
        double d = j;
        double pow = Math.pow(vocab[0].cn, 0.75d) / d;
        int i2 = 0;
        for (int i3 = 0; i3 < TABLE_SIZE; i3++) {
            this.table[i3] = i2;
            if (i3 / 1.0E8d > pow) {
                i2++;
                pow += Math.pow(vocab[i2].cn, 0.75d) / d;
            }
            if (i2 >= vocabSize) {
                i2 = vocabSize - 1;
            }
        }
    }

    public void trainModel() throws IOException {
        FileOutputStream fileOutputStream;
        OutputStreamWriter outputStreamWriter;
        int layer1Size = this.config.getLayer1Size();
        TextFileCorpus textFileCorpus = new TextFileCorpus(this.config);
        Predefine.logger.info("learning vocabulary");
        textFileCorpus.learnVocab();
        Predefine.logger.info("sorting vocabulary");
        textFileCorpus.sortVocab();
        int vocabSize = textFileCorpus.getVocabSize();
        VocabWord[] vocab = textFileCorpus.getVocab();
        Predefine.logger.info("Vocab size: " + vocabSize);
        Predefine.logger.info("Words in train file: " + textFileCorpus.getTrainWords());
        if (this.config.getOutputFile() == null) {
            return;
        }
        initNet(textFileCorpus);
        if (this.config.getNegative() > 0) {
            initUnigramTable(textFileCorpus);
        }
        this.timeStart = System.currentTimeMillis();
        this.threadCount = this.config.getNumThreads();
        for (int i = 0; i < this.config.getNumThreads(); i++) {
            new TrainModelThread(this, new CacheCorpus(textFileCorpus), this.config, i).start();
        }
        textFileCorpus.shutdown();
        synchronized (this) {
            while (this.threadCount > 0) {
                try {
                    wait();
                } catch (InterruptedException unused) {
                }
            }
        }
        System.err.println();
        Predefine.logger.info(String.format("finished training in %s", Utility.humanTime(System.currentTimeMillis() - this.timeStart)));
        PrintWriter printWriter = null;
        syn1 = null;
        this.table = null;
        try {
            fileOutputStream = new FileOutputStream(this.config.getOutputFile());
            try {
                outputStreamWriter = new OutputStreamWriter(fileOutputStream, ENCODING);
                try {
                    PrintWriter printWriter2 = new PrintWriter(outputStreamWriter);
                    try {
                        Predefine.logger.info("now saving the word vectors to the file " + this.config.getOutputFile());
                        printWriter2.printf("%d %d\n", Integer.valueOf(vocabSize), Integer.valueOf(layer1Size));
                        for (int i2 = 0; i2 < vocabSize; i2++) {
                            printWriter2.print(vocab[i2].word);
                            for (int i3 = 0; i3 < layer1Size; i3++) {
                                printWriter2.printf(" %f", Double.valueOf(syn0[(i2 * layer1Size) + i3]));
                            }
                            printWriter2.println();
                        }
                        textFileCorpus.close();
                        Utility.closeQuietly((Writer) printWriter2);
                        Utility.closeQuietly((Writer) outputStreamWriter);
                        Utility.closeQuietly((OutputStream) fileOutputStream);
                    } catch (Throwable th) {
                        th = th;
                        printWriter = printWriter2;
                        textFileCorpus.close();
                        Utility.closeQuietly((Writer) printWriter);
                        Utility.closeQuietly((Writer) outputStreamWriter);
                        Utility.closeQuietly((OutputStream) fileOutputStream);
                        throw th;
                    }
                } catch (Throwable th2) {
                    th = th2;
                }
            } catch (Throwable th3) {
                th = th3;
                outputStreamWriter = null;
            }
        } catch (Throwable th4) {
            th = th4;
            fileOutputStream = null;
            outputStreamWriter = null;
        }
    }
}
