package com.atistudios.app.data.utils.language;

import com.atistudios.app.data.model.quiz.TokenModel;
import com.atistudios.app.data.model.word.WordTokenWithRangeModel;
import com.atistudios.app.data.model.word.WordWithRangeModel;
import java.text.BreakIterator;
import java.util.ArrayList;
import java.util.List;
import java.util.Locale;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import kotlin.collections.t;
import op.i;
import op.u;
import vm.o;

/* loaded from: classes2.dex */
public final class WordPhraseTokenizer {
    public static final String COMPLETABLE_TOKEN = "_____";
    public static final Companion Companion = new Companion(null);
    private static final i anySpaceRegex;
    private static final i dashRegex;
    private static final i placeholderTokenRegex;
    private static final i punctuationRegex;
    private static final i specialCharsRegex;
    private static final Pattern splitPlaceholderTokenRegex;
    private static final List<String> unmatchedChars;
    private static final i whitespaceRegex;

    /* loaded from: classes2.dex */
    public static final class Companion {
        private Companion() {
        }

        public /* synthetic */ Companion(vm.i iVar) {
            this();
        }

        private final boolean detectPlaceholders(String str) {
            boolean M;
            M = u.M(str, "_____", false, 2, null);
            return M || o.b(str, "_____");
        }

        private final boolean isTextPlaceholder(String str) {
            return WordPhraseTokenizer.placeholderTokenRegex.b(str);
        }

        private final km.o<List<WordWithRangeModel>, Integer> splitTokenWithPlaceholder(String str, int i10) {
            CharSequence R0;
            ArrayList arrayList = new ArrayList();
            R0 = u.R0(str);
            String obj = R0.toString();
            if (o.b(obj, "_____")) {
                arrayList.add(new WordWithRangeModel(obj, i10, obj.length()));
                i10 += obj.length();
            } else {
                Matcher matcher = WordPhraseTokenizer.splitPlaceholderTokenRegex.matcher(obj);
                while (matcher.find()) {
                    int groupCount = matcher.groupCount();
                    if (1 <= groupCount) {
                        int i11 = 1;
                        while (true) {
                            String group = matcher.group(i11);
                            if (o.b(group, "_____") && i11 == 2) {
                                arrayList.add(new WordWithRangeModel(" ", i10, 1));
                                i10++;
                            }
                            if (!(group == null || group.length() == 0)) {
                                StringBuilder sb2 = new StringBuilder();
                                sb2.append("groupText: ");
                                sb2.append(group);
                                if (group != null) {
                                    arrayList.add(new WordWithRangeModel(group, i10, group.length()));
                                    i10 += group.length();
                                }
                            }
                            if (i11 != groupCount) {
                                i11++;
                            }
                        }
                    }
                }
            }
            return new km.o<>(arrayList, Integer.valueOf(i10));
        }

        public final List<TokenModel> mapWordTokenWithRangeModelListToSeparatePunctuationTokenModelList(List<WordTokenWithRangeModel> list) {
            o.f(list, "wordTokenWithRangeModelList");
            ArrayList arrayList = new ArrayList();
            i iVar = new i("\\s");
            int i10 = 0;
            for (WordTokenWithRangeModel wordTokenWithRangeModel : list) {
                String c10 = iVar.c(wordTokenWithRangeModel.getPreviousTokenLinker().getText(), "");
                String c11 = iVar.c(wordTokenWithRangeModel.getRawPrefix().getText(), "");
                String c12 = iVar.c(wordTokenWithRangeModel.getRaw().getText(), "");
                String c13 = iVar.c(wordTokenWithRangeModel.getRawSuffix().getText(), "");
                if (c10.length() > 0) {
                    arrayList.add(new TokenModel("txt" + i10, c10));
                    i10++;
                }
                if (c11.length() > 0) {
                    arrayList.add(new TokenModel("txt" + i10, c11));
                    i10++;
                }
                if (c12.length() > 0) {
                    arrayList.add(new TokenModel("txt" + i10, c12));
                    i10++;
                }
                if (c13.length() > 0) {
                    arrayList.add(new TokenModel("txt" + i10, c13));
                    i10++;
                }
            }
            return arrayList;
        }

        public final ArrayList<String> tokenizeTextResourceInChars(String str, Locale locale) {
            List n10;
            o.f(str, "inputTextResource");
            o.f(locale, "languageLocale");
            n10 = t.n("\u202c", "\u202b", "\ud83d", "�", "‼", "【", "】", "《", "᙭", "\u200c", "\u202a", "⁉", "⃣", "》", "「", "〰", "ٟ", "༺", "༻", "\uf610", "￼");
            ArrayList<String> arrayList = new ArrayList<>();
            BreakIterator characterInstance = BreakIterator.getCharacterInstance(locale);
            characterInstance.setText(str);
            int first = characterInstance.first();
            while (first != -1) {
                int next = characterInstance.next();
                if (next != -1) {
                    String substring = str.substring(first, next);
                    o.e(substring, "this as java.lang.String…ing(startIndex, endIndex)");
                    if (!n10.contains(substring)) {
                        arrayList.add(substring);
                    }
                }
                first = next;
            }
            return arrayList;
        }

        /* JADX WARN: Removed duplicated region for block: B:100:0x010f  */
        /* JADX WARN: Removed duplicated region for block: B:39:0x010d  */
        /* JADX WARN: Removed duplicated region for block: B:41:0x0112  */
        /* JADX WARN: Removed duplicated region for block: B:81:0x01bf  */
        /* JADX WARN: Removed duplicated region for block: B:83:0x01c4  */
        /* JADX WARN: Removed duplicated region for block: B:94:0x01f0  */
        /* JADX WARN: Removed duplicated region for block: B:96:0x01f5  */
        /* JADX WARN: Removed duplicated region for block: B:98:0x01f2  */
        /* JADX WARN: Removed duplicated region for block: B:99:0x01c1  */
        /*
            Code decompiled incorrectly, please refer to instructions dump.
            To view partially-correct add '--show-bad-code' argument
        */
        public final java.util.List<com.atistudios.app.data.model.word.WordTokenWithRangeModel> tokenizeTextResourceInWordsByLanguage(java.lang.String r24, java.util.Locale r25) {
            /*
                Method dump skipped, instructions count: 845
                To view this dump add '--comments-level debug' option
            */
            throw new UnsupportedOperationException("Method not decompiled: com.atistudios.app.data.utils.language.WordPhraseTokenizer.Companion.tokenizeTextResourceInWordsByLanguage(java.lang.String, java.util.Locale):java.util.List");
        }
    }

    static {
        List<String> n10;
        n10 = t.n("`", "´", "΄", "°", "～");
        unmatchedChars = n10;
        Pattern compile = Pattern.compile("(?u)^[\\p{P}\\p{Z}\\p{M}\\p{C}\\s]+$", 64);
        o.e(compile, "compile(\"(?u)^[\\\\p{P}\\\\p…$\", Pattern.UNICODE_CASE)");
        specialCharsRegex = new i(compile);
        Pattern compile2 = Pattern.compile("(?u)^[\\p{P}]+$", 64);
        o.e(compile2, "compile(\"(?u)^[\\\\p{P}]+\\$\", Pattern.UNICODE_CASE)");
        punctuationRegex = new i(compile2);
        Pattern compile3 = Pattern.compile("(?u)^[\\s]+$", 64);
        o.e(compile3, "compile(\"(?u)^[\\\\s]+\\$\", Pattern.UNICODE_CASE)");
        whitespaceRegex = new i(compile3);
        Pattern compile4 = Pattern.compile("(?u)^_{2,}+$", 64);
        o.e(compile4, "compile(\"(?u)^_{2,}+\\$\", Pattern.UNICODE_CASE)");
        placeholderTokenRegex = new i(compile4);
        Pattern compile5 = Pattern.compile("(?u)^[\\p{Pd}]+$", 64);
        o.e(compile5, "compile(\"(?u)^[\\\\p{Pd}]+\\$\", Pattern.UNICODE_CASE)");
        dashRegex = new i(compile5);
        Pattern compile6 = Pattern.compile("(?u)[\\s]+$", 64);
        o.e(compile6, "compile(\"(?u)[\\\\s]+\\$\", Pattern.UNICODE_CASE)");
        anySpaceRegex = new i(compile6);
        splitPlaceholderTokenRegex = Pattern.compile("(?u)((?:(?!_{2,}).)+)?(_{2,})?", 64);
    }
}
