/*
 * Decompiled with CFR 0.152.
 */
package org.opensearch.ml.engine.algorithms.question_answering.sentence;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import lombok.Generated;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.opensearch.ml.engine.algorithms.question_answering.sentence.Sentence;
import org.opensearch.ml.engine.algorithms.question_answering.sentence.SentenceSegmentationConfig;
import org.opensearch.ml.engine.algorithms.question_answering.sentence.SentenceSegmenter;

public class DefaultSentenceSegmenter
implements SentenceSegmenter {
    @Generated
    private static final Logger log = LogManager.getLogger(DefaultSentenceSegmenter.class);
    private static final Pattern SENTENCE_PATTERN = Pattern.compile("(?<!\\w\\.\\w.)(?<!\\w[A-Z]\\.)(?<=[.!?])(?<!\\w[A-Z]\\.[A-Z]\\.)\\s+");
    private static final Map<String, Pattern> ABBREVIATION_PATTERNS = new HashMap<String, Pattern>();
    private final SentenceSegmentationConfig defaultConfig;

    public DefaultSentenceSegmenter() {
        this.defaultConfig = SentenceSegmentationConfig.getDefault();
    }

    public DefaultSentenceSegmenter(SentenceSegmentationConfig config) {
        this.defaultConfig = config != null ? config : SentenceSegmentationConfig.getDefault();
    }

    @Override
    public List<Sentence> segment(String text) {
        return this.segment(text, this.defaultConfig);
    }

    @Override
    public List<Sentence> segment(String text, SentenceSegmentationConfig config) {
        return this.segmentSentences(text, config);
    }

    private List<Sentence> segmentSentences(String text, SentenceSegmentationConfig config) {
        ArrayList<Sentence> sentences = new ArrayList<Sentence>();
        String[] rawSentences = this.splitIntoSentences(text, config);
        log.debug("Split into {} raw sentences", (Object)rawSentences.length);
        if (rawSentences.length == 0) {
            throw new IllegalArgumentException("Failed to split text into sentences");
        }
        int currentPosition = 0;
        int currentIndex = 0;
        for (String rawSentence : rawSentences) {
            String trimmedSentence = config.isPreserveWhitespace() ? rawSentence : rawSentence.trim();
            int startIndex = text.indexOf(rawSentence, currentIndex);
            if (startIndex == -1) {
                startIndex = this.findApproximatePosition(text, rawSentence, currentIndex);
            }
            int endIndex = startIndex + rawSentence.length();
            Sentence.SentenceType type = this.determineSentenceType(trimmedSentence);
            sentences.add(Sentence.builder().text(trimmedSentence).startIndex(startIndex).endIndex(endIndex).position(currentPosition).type(type).build());
            ++currentPosition;
            currentIndex = endIndex;
        }
        if (sentences.isEmpty()) {
            throw new IllegalArgumentException(String.format(Locale.ROOT, "Failed to segment text into sentences: %s", text));
        }
        return sentences;
    }

    private String[] splitIntoSentences(String text, SentenceSegmentationConfig config) {
        String processedText = text;
        if (config.isHandleAbbreviations()) {
            processedText = this.handleAbbreviations(processedText, config.getCommonAbbreviations());
        }
        if (config.isHandleQuotes()) {
            log.debug("Handling quotes");
            processedText = this.handleQuotes(processedText);
        }
        Pattern splitPattern = SENTENCE_PATTERN;
        if (!config.getCustomSentenceDelimiters().isEmpty()) {
            String pattern = SENTENCE_PATTERN.pattern() + "|" + config.getCustomSentenceDelimiters();
            splitPattern = Pattern.compile(pattern);
            log.debug("Using custom delimiters: {}", (Object)config.getCustomSentenceDelimiters());
        }
        String[] result = splitPattern.split(processedText);
        log.debug("Split result: {} sentences", (Object)result.length);
        return result;
    }

    private String handleAbbreviations(String text, String[] abbreviations) {
        if (text == null || text.isEmpty() || abbreviations == null || abbreviations.length == 0) {
            return text;
        }
        String processedText = text;
        for (String abbr : abbreviations) {
            Matcher matcher;
            Pattern pattern = ABBREVIATION_PATTERNS.get(abbr);
            if (pattern == null) {
                pattern = Pattern.compile("\\b" + Pattern.quote(abbr) + "\\s+");
            }
            if (!(matcher = pattern.matcher(processedText)).find()) continue;
            processedText = matcher.replaceAll(abbr.replace(".", "@@@") + " ");
            log.debug("Handled abbreviation: {}", (Object)abbr);
        }
        return processedText;
    }

    private String handleQuotes(String text) {
        String before = text;
        String after = text.replaceAll("([.!?])\"", "\"$1");
        if (!before.equals(after)) {
            log.debug("Handled quotes in text");
        }
        return after;
    }

    private Sentence.SentenceType determineSentenceType(String sentence) {
        if (sentence.endsWith("?")) {
            return Sentence.SentenceType.QUESTION;
        }
        if (sentence.endsWith("!")) {
            return Sentence.SentenceType.EXCLAMATION;
        }
        if (sentence.endsWith(".")) {
            return Sentence.SentenceType.STATEMENT;
        }
        return Sentence.SentenceType.INCOMPLETE;
    }

    private int findApproximatePosition(String text, String sentence, int startFrom) {
        if (sentence.isEmpty()) {
            return startFrom;
        }
        String prefix = sentence.substring(0, Math.min(10, sentence.length()));
        int prefixPos = text.indexOf(prefix, startFrom);
        if (prefixPos >= 0) {
            return prefixPos;
        }
        return startFrom;
    }

    static {
        SentenceSegmentationConfig defaultConfig = SentenceSegmentationConfig.getDefault();
        for (String abbr : defaultConfig.getCommonAbbreviations()) {
            ABBREVIATION_PATTERNS.put(abbr, Pattern.compile("\\b" + Pattern.quote(abbr) + "\\s+"));
        }
    }
}

