package io.scigraph.lexical;

import com.google.common.annotations.Beta;
import com.google.common.base.Joiner;
import com.google.common.collect.Lists;
import io.scigraph.annotation.Token;
import io.scigraph.lexical.chunk.NounChunk;
import io.scigraph.lexical.chunk.VerbChunk;
import io.scigraph.lexical.pos.PosToken;
import io.scigraph.opennlp.OpenNlpModule;
import java.io.IOException;
import java.util.ArrayList;
import java.util.LinkedList;
import java.util.List;
import javax.inject.Inject;
import opennlp.tools.chunker.ChunkerME;
import opennlp.tools.postag.POSTaggerME;
import opennlp.tools.sentdetect.SentenceDetectorME;
import opennlp.tools.tokenize.Tokenizer;
import opennlp.tools.util.Span;

/* loaded from: input_file:io/scigraph/lexical/LexicalLibOpenNlpImpl.class */
public class LexicalLibOpenNlpImpl implements LexicalLib {
    Tokenizer tokenizer;
    SentenceDetectorME sentenceDetector;
    POSTaggerME tagger;
    ChunkerME chunker;

    @Inject
    protected LexicalLibOpenNlpImpl(OpenNlpModule.TokenizerProvider tokenizerProvider, OpenNlpModule.SentenceDetectorProvider sentenceDetectorProvider, OpenNlpModule.PosTaggerProvider posTaggerProvider, OpenNlpModule.ChunkerProvider chunkerProvider) throws IOException {
        this.tokenizer = tokenizerProvider.m12get();
        this.sentenceDetector = sentenceDetectorProvider.m11get();
        this.tagger = posTaggerProvider.m10get();
        this.chunker = chunkerProvider.m9get();
    }

    @Override // io.scigraph.lexical.LexicalLib
    public List<String> extractSentences(String str) {
        return Lists.newArrayList(this.sentenceDetector.sentDetect(str));
    }

    @Override // io.scigraph.lexical.LexicalLib
    public List<PosToken> tagPOS(String str) {
        String[] strArr = this.tokenizer.tokenize(str);
        Span[] spanArr = this.tokenizer.tokenizePos(str);
        String[] tag = this.tagger.tag(strArr);
        ArrayList arrayList = new ArrayList();
        for (int i = 0; i < strArr.length; i++) {
            arrayList.add(new PosToken(strArr[i], tag[i], spanArr[i].getStart(), spanArr[i].getEnd()));
        }
        return arrayList;
    }

    @Override // io.scigraph.lexical.LexicalLib
    public List<Token<String>> getChunks(String str) {
        int i = 0;
        LinkedList linkedList = new LinkedList();
        for (String str2 : extractSentences(str)) {
            String[] strArr = this.tokenizer.tokenize(str2);
            Span[] spanArr = this.tokenizer.tokenizePos(str2);
            String[] chunk = this.chunker.chunk(strArr, this.tagger.tag(strArr));
            int i2 = 0;
            while (i2 < chunk.length) {
                LinkedList linkedList2 = new LinkedList();
                int i3 = i2;
                if ("B-NP".equals(chunk[i2])) {
                    linkedList2.add(strArr[i2]);
                    while (i2 + 1 < chunk.length && "I-NP".equals(chunk[i2 + 1])) {
                        linkedList2.add(strArr[i2 + 1]);
                        i2++;
                    }
                    linkedList.add(new NounChunk(Joiner.on(' ').join(linkedList2).replace(" ,", ","), i + spanArr[i3].getStart(), i + spanArr[i2].getEnd()));
                } else if ("B-VP".equals(chunk[i2])) {
                    linkedList2.add(strArr[i2]);
                    while (i2 + 1 < chunk.length && "I-VP".equals(chunk[i2 + 1])) {
                        linkedList2.add(strArr[i2 + 1]);
                        i2++;
                    }
                    linkedList.add(new VerbChunk(Joiner.on(' ').join(linkedList2).replace(" ,", ","), i + spanArr[i3].getStart(), i + spanArr[i2].getEnd()));
                }
                i2++;
            }
            i += spanArr[spanArr.length - 1].getEnd() + 2;
        }
        return linkedList;
    }

    @Override // io.scigraph.lexical.LexicalLib
    @Beta
    public List<Token<String>> getEntities(String str) {
        int i = 0;
        LinkedList linkedList = new LinkedList();
        for (String str2 : extractSentences(str)) {
            String[] strArr = this.tokenizer.tokenize(str2);
            Span[] spanArr = this.tokenizer.tokenizePos(str2);
            String[] tag = this.tagger.tag(strArr);
            int i2 = 0;
            while (i2 < tag.length) {
                LinkedList linkedList2 = new LinkedList();
                int i3 = i2;
                if (PhraseChunker.START_NOUN_TAGS.contains(tag[i2])) {
                    linkedList2.add(strArr[i2]);
                    while (i2 + 1 < tag.length && PhraseChunker.CONTINUE_NOUN_TAGS.contains(tag[i2 + 1])) {
                        linkedList2.add(strArr[i2 + 1]);
                        i2++;
                    }
                    linkedList.add(new NounChunk(Joiner.on(' ').join(linkedList2).replace(" ,", ","), i + spanArr[i3].getStart(), i + spanArr[i2].getEnd()));
                } else if (PhraseChunker.START_VERB_TAGS.contains(tag[i2])) {
                    linkedList2.add(strArr[i2]);
                    while (i2 + 1 < tag.length && PhraseChunker.CONTINUE_VERB_TAGS.contains(tag[i2 + 1])) {
                        linkedList2.add(strArr[i2 + 1]);
                        i2++;
                    }
                    linkedList.add(new VerbChunk(Joiner.on(' ').join(linkedList2).replace(" ,", ","), i + spanArr[i3].getStart(), i + spanArr[i2].getEnd()));
                }
                i2++;
            }
            i += spanArr[spanArr.length - 1].getEnd() + 2;
        }
        return linkedList;
    }
}
