package com.aliyun.midware.nui;

import android.util.Log;
import com.alibaba.fastjson.JSONException;
import com.alibaba.fastjson.JSONObject;
import com.alibaba.idst.nui.CommonUtils;
import com.alibaba.idst.nui.Constants;
import com.alibaba.idst.nui.INativeTtsCallback;
import com.alibaba.idst.nui.NativeNui;
import com.aliyun.tongyi.kit.utils.DeviceUtils;
import com.aliyun.tongyi.kit.utils.SystemUtils;

/* loaded from: classes2.dex */
public class TTSManager {
    static final String CN_PREVIEW = "近年来，随着端到端语音识别的流行，基于Transformer结构的语音识别系统逐渐成为了主流。然而，由于Transformer是一种自回归模型，需要逐个生成目标文字，计算复杂度随着目标文字数量线性增加，限制了其在工业生产中的应用。针对Transoformer模型自回归生成文字的低计算效率缺陷，学术界提出了非自回归模型来并行的输出目标文字。根据生成目标文字时，迭代轮数，非自回归模型分为：多轮迭代式与单轮迭代非自回归模型。其中实用的是基于单轮迭代的非自回归模型。对于单轮非自回归模型，现有工作往往聚焦于如何更加准确的预测目标文字个数，如CTC-enhanced采用CTC预测输出文字个数，尽管如此，考虑到现实应用中，语速、口音、静音以及噪声等因素的影响，如何准确的预测目标文字个数以及抽取目标文字对应的声学隐变量仍然是一个比较大的挑战；另外一方面，我们通过对比自回归模型与单轮非自回归模型在工业大数据上的错误类型（如下图所示，AR与vanilla NAR），发现，相比于自回归模型，非自回归模型，在预测目标文字个数方面差距较小，但是替换错误显著的增加，我们认为这是由于单轮非自回归模型中条件独立假设导致的语义信息丢失。于此同时，目前非自回归模型主要停留在学术验证阶段，还没有工业大数据上的相关实验与结论。";
    private static final String TAG = "TTSManager";
    private static TTSManager mInstance;
    private NativeNui nui_tts_instance = new NativeNui(Constants.ModeType.MODE_TTS);
    private AudioPlayer mAudioTrack = new AudioPlayer(new AudioPlayerCallback() { // from class: com.aliyun.midware.nui.TTSManager.1
        @Override // com.aliyun.midware.nui.AudioPlayerCallback
        public void playOver() {
            String unused = TTSManager.TAG;
        }

        @Override // com.aliyun.midware.nui.AudioPlayerCallback
        public void playStart() {
            String unused = TTSManager.TAG;
        }
    });
    private boolean initialized = false;

    private String genTicket(String str) {
        String str2;
        try {
            JSONObject jSONObject = new JSONObject();
            jSONObject.put("app_key", (Object) "c1TqCuHTtAvZzv43");
            jSONObject.put("token", (Object) "08e7f8c4e2b745a1bb49cd9b939a8cde");
            jSONObject.put("device_id", (Object) DeviceUtils.getDeviceId());
            jSONObject.put("url", (Object) "wss://nls-gateway.cn-shanghai.aliyuncs.com:443/ws/v1");
            jSONObject.put("workspace", (Object) str);
            jSONObject.put("mode_type", (Object) "2");
            str2 = jSONObject.toString();
        } catch (JSONException e2) {
            e2.printStackTrace();
            str2 = "";
        }
        String str3 = "UserContext:" + str2;
        return str2;
    }

    public static TTSManager getInstance() {
        if (mInstance == null) {
            synchronized (TTSManager.class) {
                if (mInstance == null) {
                    mInstance = new TTSManager();
                }
            }
        }
        return mInstance;
    }

    public int initialize() {
        int tts_initialize = this.nui_tts_instance.tts_initialize(new INativeTtsCallback() { // from class: com.aliyun.midware.nui.TTSManager.2
            @Override // com.alibaba.idst.nui.INativeTtsCallback
            public void onTtsDataCallback(String str, int i2, byte[] bArr) {
                if (str.length() > 0) {
                    String unused = TTSManager.TAG;
                    String str2 = "info: " + str;
                }
                if (bArr.length > 0) {
                    TTSManager.this.mAudioTrack.setAudioData(bArr);
                    String unused2 = TTSManager.TAG;
                    String str3 = "write:" + bArr.length;
                }
            }

            @Override // com.alibaba.idst.nui.INativeTtsCallback
            public void onTtsEventCallback(INativeTtsCallback.TtsEvent ttsEvent, String str, int i2) {
                String unused = TTSManager.TAG;
                String str2 = "tts event:" + ttsEvent + " task id " + str + " ret " + i2;
                if (ttsEvent == INativeTtsCallback.TtsEvent.TTS_EVENT_START) {
                    TTSManager.this.mAudioTrack.play();
                    String unused2 = TTSManager.TAG;
                    return;
                }
                if (ttsEvent == INativeTtsCallback.TtsEvent.TTS_EVENT_END) {
                    String unused3 = TTSManager.TAG;
                    TTSManager.this.mAudioTrack.isFinishSend(true);
                    return;
                }
                if (ttsEvent == INativeTtsCallback.TtsEvent.TTS_EVENT_PAUSE) {
                    TTSManager.this.mAudioTrack.pause();
                    String unused4 = TTSManager.TAG;
                    return;
                }
                if (ttsEvent == INativeTtsCallback.TtsEvent.TTS_EVENT_RESUME) {
                    TTSManager.this.mAudioTrack.play();
                    return;
                }
                if (ttsEvent == INativeTtsCallback.TtsEvent.TTS_EVENT_ERROR) {
                    TTSManager.this.mAudioTrack.isFinishSend(true);
                    String str3 = TTSManager.this.nui_tts_instance.getparamTts("error_msg");
                    Log.e(TTSManager.TAG, "TTS_EVENT_ERROR error_code:" + i2 + " errmsg:" + str3);
                }
            }

            @Override // com.alibaba.idst.nui.INativeTtsCallback
            public void onTtsVolCallback(int i2) {
                String unused = TTSManager.TAG;
                String str = "tts vol " + i2;
            }
        }, genTicket(CommonUtils.getModelPath(SystemUtils.sApplication)), Constants.LogLevel.LOG_LEVEL_VERBOSE, true);
        this.nui_tts_instance.setparamTts("font_name", "siqi");
        this.nui_tts_instance.setparamTts("sample_rate", "24000");
        this.mAudioTrack.setSampleRate(24000);
        this.nui_tts_instance.setparamTts("enable_subtitle", "1");
        return tts_initialize;
    }

    public int pauseTTS() {
        int pauseTts = this.nui_tts_instance.pauseTts();
        this.mAudioTrack.pause();
        return pauseTts;
    }

    public int releaseTTS() {
        this.mAudioTrack.stop();
        int tts_release = this.nui_tts_instance.tts_release();
        this.initialized = false;
        return tts_release;
    }

    public int resumeTTS() {
        int resumeTts = this.nui_tts_instance.resumeTts();
        this.mAudioTrack.play();
        return resumeTts;
    }

    public int startPlayTTS(String str) {
        int utf8CharsNum = this.nui_tts_instance.getUtf8CharsNum(str);
        String str2 = TAG;
        String str3 = "chars:" + utf8CharsNum + " of text:" + str;
        if (utf8CharsNum > 300) {
            Log.w(str2, "text exceed 300 chars.");
            this.nui_tts_instance.setparamTts("tts_version", "1");
        } else {
            this.nui_tts_instance.setparamTts("tts_version", "0");
        }
        this.nui_tts_instance.startTts("1", "", str);
        return utf8CharsNum;
    }

    public int stopPlayTTS() {
        int cancelTts = this.nui_tts_instance.cancelTts("");
        this.mAudioTrack.stop();
        return cancelTts;
    }
}
