package io.scigraph.annotation;

import com.google.common.base.Function;
import com.google.common.base.Joiner;
import com.google.common.base.Preconditions;
import com.google.common.base.Strings;
import com.google.common.collect.ForwardingMap;
import com.google.common.collect.Iterables;
import com.google.common.collect.Iterators;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.collect.PeekingIterator;
import com.google.common.collect.Sets;
import io.scigraph.lucene.LuceneUtils;
import java.io.IOException;
import java.io.StringReader;
import java.io.Writer;
import java.net.URL;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.logging.Logger;
import javax.inject.Inject;
import net.htmlparser.jericho.Attribute;
import net.htmlparser.jericho.EndTag;
import net.htmlparser.jericho.Segment;
import net.htmlparser.jericho.StartTag;
import net.htmlparser.jericho.StartTagType;
import net.htmlparser.jericho.StreamedSource;
import org.apache.lucene.analysis.Analyzer;

/* loaded from: input_file:io/scigraph/annotation/EntityProcessorImpl.class */
class EntityProcessorImpl implements EntityProcessor {
    private static final Logger logger = Logger.getLogger(EntityProcessorImpl.class.getName());
    private final Analyzer analyzer = new EntityAnalyzer();
    private final EntityRecognizer recognizer;

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:io/scigraph/annotation/EntityProcessorImpl$Element.class */
    public static class Element extends ForwardingMap<String, String> {
        String name;
        Map<String, String> attributes = Maps.newHashMap();

        Element(String str, Iterable<Attribute> iterable) {
            this.name = str;
            for (Attribute attribute : iterable) {
                put(attribute.getName(), attribute.getValue());
            }
        }

        String getName() {
            return this.name;
        }

        public String toString() {
            return this.name + " {" + this.attributes + "}";
        }

        /* JADX INFO: Access modifiers changed from: protected */
        /* renamed from: delegate, reason: merged with bridge method [inline-methods] */
        public Map<String, String> m6delegate() {
            return this.attributes;
        }
    }

    @Inject
    protected EntityProcessorImpl(EntityRecognizer entityRecognizer) {
        this.recognizer = entityRecognizer;
    }

    BlockingQueue<List<Token<String>>> startShingleProducer(String str) {
        LinkedBlockingQueue linkedBlockingQueue = new LinkedBlockingQueue();
        new Thread(new ShingleProducer(this.analyzer, new StringReader(str), linkedBlockingQueue), "Shingle Producer Thread").start();
        return linkedBlockingQueue;
    }

    String combineTokens(List<Token<String>> list) {
        return Joiner.on(' ').join(Iterables.transform(list, new Function<Token<String>, String>() { // from class: io.scigraph.annotation.EntityProcessorImpl.1
            public String apply(Token<String> token) {
                return token.getToken();
            }
        }));
    }

    protected List<EntityAnnotation> getAnnotations(String str, EntityFormatConfiguration entityFormatConfiguration) throws InterruptedException {
        Preconditions.checkNotNull(str);
        BlockingQueue<List<Token<String>>> startShingleProducer = startShingleProducer(str);
        ArrayList arrayList = new ArrayList();
        while (true) {
            List<Token<String>> take = startShingleProducer.take();
            if (take.equals(ShingleProducer.END_TOKEN)) {
                break;
            }
            if (!LuceneUtils.isStopword((String) ((Token) Iterables.getFirst(take, (Object) null)).getToken()) && !LuceneUtils.isStopword((String) ((Token) Iterables.getLast(take)).getToken())) {
                String combineTokens = combineTokens(take);
                if (combineTokens.length() >= entityFormatConfiguration.getMinLength()) {
                    int start = take.get(0).getStart();
                    int end = take.get(take.size() - 1).getEnd();
                    Iterator<Entity> it = this.recognizer.getEntities(combineTokens, entityFormatConfiguration).iterator();
                    while (it.hasNext()) {
                        arrayList.add(new EntityAnnotation(it.next(), start, end));
                    }
                }
            }
        }
        ArrayList newArrayList = Lists.newArrayList();
        Iterator<EntityAnnotationGroup> it2 = getAnnotationGroups(arrayList, entityFormatConfiguration.isLongestOnly()).iterator();
        while (it2.hasNext()) {
            newArrayList.addAll(it2.next());
        }
        Collections.sort(newArrayList);
        return newArrayList;
    }

    static List<EntityAnnotationGroup> getAnnotationGroups(List<EntityAnnotation> list, boolean z) {
        ArrayList arrayList = new ArrayList();
        Collections.sort(list, Collections.reverseOrder());
        PeekingIterator peekingIterator = Iterators.peekingIterator(list.iterator());
        while (peekingIterator.hasNext()) {
            EntityAnnotationGroup entityAnnotationGroup = new EntityAnnotationGroup();
            entityAnnotationGroup.add(peekingIterator.next());
            HashSet hashSet = new HashSet();
            while (peekingIterator.hasNext() && entityAnnotationGroup.intersects((EntityAnnotation) peekingIterator.peek())) {
                if (hashSet.contains(((EntityAnnotation) peekingIterator.peek()).getToken())) {
                    peekingIterator.next();
                } else {
                    hashSet.add(((EntityAnnotation) peekingIterator.peek()).getToken());
                    entityAnnotationGroup.add(peekingIterator.next());
                }
            }
            if (z) {
                Iterator it = entityAnnotationGroup.iterator();
                int length = ((EntityAnnotation) entityAnnotationGroup.peek()).length();
                while (it.hasNext()) {
                    if (((EntityAnnotation) it.next()).length() < length) {
                        it.remove();
                    }
                }
            }
            arrayList.add(entityAnnotationGroup);
        }
        return arrayList;
    }

    protected final String insertSpans(List<EntityAnnotation> list, String str, EntityFormatConfiguration entityFormatConfiguration) throws IOException {
        StringBuilder sb = new StringBuilder(str);
        HashSet newHashSet = Sets.newHashSet();
        HashSet newHashSet2 = Sets.newHashSet();
        HashSet newHashSet3 = Sets.newHashSet();
        for (EntityAnnotationGroup entityAnnotationGroup : getAnnotationGroups(list, entityFormatConfiguration.isLongestOnly())) {
            newHashSet3.clear();
            newHashSet2.clear();
            newHashSet.clear();
            Iterator<Entity> it = entityAnnotationGroup.getAnnotations().iterator();
            while (it.hasNext()) {
                String serialize = it.next().serialize();
                if (!Strings.isNullOrEmpty(serialize)) {
                    newHashSet.add(serialize);
                }
                newHashSet3.add(this.recognizer.getCssClass());
            }
            if (!newHashSet.isEmpty()) {
                newHashSet2.add(String.format("%s=\"%s\"", entityFormatConfiguration.getDataAttrName(), Joiner.on("|").join(newHashSet)));
            }
            sb.insert(entityAnnotationGroup.getEnd(), "</span>");
            sb.insert(entityAnnotationGroup.getStart(), String.format("<span class=\"%s\" %s>", Joiner.on(" ").join(newHashSet3), Joiner.on(" ").join(newHashSet2)));
        }
        return sb.toString();
    }

    static String getBase(URL url) {
        StringBuilder sb = new StringBuilder();
        sb.append(url.getProtocol());
        sb.append("://");
        sb.append(url.getHost());
        if (url.getPort() > 0 && 80 != url.getPort()) {
            sb.append(':');
            sb.append(url.getPort());
        }
        if (!Strings.isNullOrEmpty(url.getPath()) && url.getPath().contains("/")) {
            String path = url.getPath();
            sb.append(path.substring(0, path.lastIndexOf("/")));
        }
        sb.append('/');
        return sb.toString();
    }

    private static void injectStyles(Writer writer, List<String> list) throws IOException {
        Iterator<String> it = list.iterator();
        while (it.hasNext()) {
            writer.write(String.format("<link rel=\"stylesheet\" style=\"text/css\" href=\"%s\"></link>", it.next()));
        }
    }

    private static void injectScripts(Writer writer, List<String> list) throws IOException {
        Iterator<String> it = list.iterator();
        while (it.hasNext()) {
            writer.write(String.format("<script type=\"text/javascript\" language=\"javascript\" src=\"%s\"></script>", it.next()));
        }
    }

    boolean shouldAnnotate(LinkedList<Element> linkedList, EntityFormatConfiguration entityFormatConfiguration) {
        boolean z = false;
        if (entityFormatConfiguration.getTargetClasses().isEmpty() && entityFormatConfiguration.getTargetIds().isEmpty()) {
            z = true;
        }
        Iterator<Element> it = linkedList.iterator();
        while (true) {
            if (!it.hasNext()) {
                break;
            }
            Element next = it.next();
            if (entityFormatConfiguration.getIgnoreTags().contains(next.getName().toLowerCase())) {
                z = false;
                break;
            }
            if (next.containsKey("id") && entityFormatConfiguration.getTargetIds().contains(next.get("id"))) {
                z = true;
            }
            if (next.containsKey("class")) {
                for (String str : ((String) next.get("class")).split("\\s+")) {
                    if (!str.isEmpty() && entityFormatConfiguration.getTargetClasses().contains(str)) {
                        z = true;
                    }
                }
            }
        }
        return z;
    }

    @Override // io.scigraph.annotation.EntityProcessor
    public List<EntityAnnotation> annotateEntities(EntityFormatConfiguration entityFormatConfiguration) throws IOException {
        Preconditions.checkNotNull(entityFormatConfiguration);
        StreamedSource streamedSource = new StreamedSource(entityFormatConfiguration.getReader());
        Throwable th = null;
        try {
            try {
                LinkedList<Element> linkedList = new LinkedList<>();
                ArrayList newArrayList = Lists.newArrayList();
                Iterator it = streamedSource.iterator();
                while (it.hasNext()) {
                    StartTag startTag = (Segment) it.next();
                    if (startTag instanceof StartTag) {
                        StartTag startTag2 = startTag;
                        entityFormatConfiguration.getWriter().write(startTag.toString());
                        if (startTag2.getTagType() == StartTagType.NORMAL && !startTag2.toString().endsWith("/>") && !startTag2.toString().endsWith("/ >")) {
                            linkedList.push(new Element(startTag2.getName(), startTag2.getAttributes()));
                        }
                        if (entityFormatConfiguration.getUrl() != null && "head".equals(startTag2.getName())) {
                            entityFormatConfiguration.getWriter().write("<base href=\"" + getBase(entityFormatConfiguration.getUrl()) + "\"></base>");
                            injectStyles(entityFormatConfiguration.getWriter(), entityFormatConfiguration.getStylesheets());
                            injectScripts(entityFormatConfiguration.getWriter(), entityFormatConfiguration.getScripts());
                        }
                    } else if (startTag instanceof EndTag) {
                        entityFormatConfiguration.getWriter().write(startTag.toString());
                        linkedList.pop();
                    } else if (startTag.getClass().equals(Segment.class)) {
                        if (shouldAnnotate(linkedList, entityFormatConfiguration)) {
                            try {
                                List<EntityAnnotation> annotations = getAnnotations(startTag.toString(), entityFormatConfiguration);
                                newArrayList.addAll(annotations);
                                entityFormatConfiguration.getWriter().write(insertSpans(annotations, startTag.toString(), entityFormatConfiguration));
                            } catch (IOException e) {
                                entityFormatConfiguration.getWriter().write(startTag.toString());
                                logger.warning(e.getMessage());
                            } catch (InterruptedException e2) {
                                Thread.currentThread().interrupt();
                            }
                        } else {
                            entityFormatConfiguration.getWriter().write(startTag.toString());
                        }
                    }
                }
                ArrayList newArrayList2 = Lists.newArrayList();
                Iterator<EntityAnnotationGroup> it2 = getAnnotationGroups(newArrayList, entityFormatConfiguration.isLongestOnly()).iterator();
                while (it2.hasNext()) {
                    newArrayList2.addAll(it2.next());
                }
                Collections.sort(newArrayList2);
                if (streamedSource != null) {
                    if (0 != 0) {
                        try {
                            streamedSource.close();
                        } catch (Throwable th2) {
                            th.addSuppressed(th2);
                        }
                    } else {
                        streamedSource.close();
                    }
                }
                return newArrayList2;
            } finally {
            }
        } catch (Throwable th3) {
            if (streamedSource != null) {
                if (th != null) {
                    try {
                        streamedSource.close();
                    } catch (Throwable th4) {
                        th.addSuppressed(th4);
                    }
                } else {
                    streamedSource.close();
                }
            }
            throw th3;
        }
    }
}
