package opennlp.tools.formats.ad;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Stack;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import opennlp.tools.util.FilterObjectStream;
import opennlp.tools.util.ObjectStream;
import org.slf4j.Logger;

/* loaded from: input_file:WEB-INF/lib/opennlp-tools-1.5.3.jar:opennlp/tools/formats/ad/ADSentenceStream.class */
public class ADSentenceStream extends FilterObjectStream<String, Sentence> {
    private static final Pattern sentStart = Pattern.compile("<s[^>]*>");
    private static final Pattern sentEnd = Pattern.compile("</s>");
    private static final Pattern extEnd = Pattern.compile("</ext>");
    private static final Pattern titleStart = Pattern.compile("<t[^>]*>");
    private static final Pattern titleEnd = Pattern.compile("</t>");
    private static final Pattern boxStart = Pattern.compile("<caixa[^>]*>");
    private static final Pattern boxEnd = Pattern.compile("</caixa>");
    private static final Pattern paraStart = Pattern.compile("<p[^>]*>");
    private static final Pattern textStart = Pattern.compile("<ext[^>]*>");
    private SentenceParser parser;
    private int paraID;
    private boolean isTitle;
    private boolean isBox;

    /* loaded from: input_file:WEB-INF/lib/opennlp-tools-1.5.3.jar:opennlp/tools/formats/ad/ADSentenceStream$Sentence.class */
    public static class Sentence {
        private String text;
        private SentenceParser.Node root;
        private String metadata;
        public static final String META_LABEL_FINAL = "final";

        public String getText() {
            return this.text;
        }

        public void setText(String str) {
            this.text = str;
        }

        public SentenceParser.Node getRoot() {
            return this.root;
        }

        public void setRoot(SentenceParser.Node node) {
            this.root = node;
        }

        public void setMetadata(String str) {
            this.metadata = str;
        }

        public String getMetadata() {
            return this.metadata;
        }
    }

    /* loaded from: input_file:WEB-INF/lib/opennlp-tools-1.5.3.jar:opennlp/tools/formats/ad/ADSentenceStream$SentenceParser.class */
    public static class SentenceParser {
        private Pattern nodePattern = Pattern.compile("([=-]*)([^:=]+:[^\\(\\s]+)(\\(([^\\)]+)\\))?\\s*(?:(\\((<.+>)\\))*)\\s*$");
        private Pattern leafPattern = Pattern.compile("^([=-]*)([^:=]+):([^\\(\\s]+)\\([\"'](.+)[\"']\\s*((?:<.+>)*)\\s*([^\\)]+)?\\)\\s+(.+)");
        private Pattern bizarreLeafPattern = Pattern.compile("^([=-]*)([^:=]+=[^\\(\\s]+)\\(([\"'].+[\"'])?\\s*([^\\)]+)?\\)\\s+(.+)");
        private Pattern punctuationPattern = Pattern.compile("^(=*)(\\W+)$");
        private String text;
        private String meta;

        /* loaded from: input_file:WEB-INF/lib/opennlp-tools-1.5.3.jar:opennlp/tools/formats/ad/ADSentenceStream$SentenceParser$Leaf.class */
        public class Leaf extends TreeElement {
            private String word;
            private String lemma;
            private String secondaryTag;
            private String functionalTag;

            public Leaf() {
                super();
            }

            @Override // opennlp.tools.formats.ad.ADSentenceStream.SentenceParser.TreeElement
            public boolean isLeaf() {
                return true;
            }

            public void setFunctionalTag(String str) {
                this.functionalTag = str;
            }

            public String getFunctionalTag() {
                return this.functionalTag;
            }

            public void setSecondaryTag(String str) {
                this.secondaryTag = str;
            }

            public String getSecondaryTag() {
                return this.secondaryTag;
            }

            public void setLexeme(String str) {
                this.word = str;
            }

            public String getLexeme() {
                return this.word;
            }

            private String emptyOrString(String str, String str2, String str3) {
                return str == null ? "" : str2 + str + str3;
            }

            public String toString() {
                StringBuilder sb = new StringBuilder();
                for (int i = 0; i < getLevel(); i++) {
                    sb.append("=");
                }
                if (getSyntacticTag() != null) {
                    sb.append(getSyntacticTag()).append(":").append(getFunctionalTag()).append("(").append(emptyOrString(getLemma(), "'", "' ")).append(emptyOrString(getSecondaryTag(), "", " ")).append(getMorphologicalTag()).append(") ");
                }
                sb.append(this.word).append("\n");
                return sb.toString();
            }

            public void setLemma(String str) {
                this.lemma = str;
            }

            public String getLemma() {
                return this.lemma;
            }
        }

        /* loaded from: input_file:WEB-INF/lib/opennlp-tools-1.5.3.jar:opennlp/tools/formats/ad/ADSentenceStream$SentenceParser$Node.class */
        public class Node extends TreeElement {
            private List<TreeElement> elems;

            public Node() {
                super();
                this.elems = new ArrayList();
            }

            public void addElement(TreeElement treeElement) {
                this.elems.add(treeElement);
            }

            public TreeElement[] getElements() {
                return (TreeElement[]) this.elems.toArray(new TreeElement[this.elems.size()]);
            }

            public String toString() {
                StringBuilder sb = new StringBuilder();
                for (int i = 0; i < getLevel(); i++) {
                    sb.append("=");
                }
                sb.append(getSyntacticTag());
                if (getMorphologicalTag() != null) {
                    sb.append(getMorphologicalTag());
                }
                sb.append("\n");
                Iterator<TreeElement> it = this.elems.iterator();
                while (it.hasNext()) {
                    sb.append(it.next().toString());
                }
                return sb.toString();
            }
        }

        /* loaded from: input_file:WEB-INF/lib/opennlp-tools-1.5.3.jar:opennlp/tools/formats/ad/ADSentenceStream$SentenceParser$TreeElement.class */
        public abstract class TreeElement {
            private String syntacticTag;
            private String morphologicalTag;
            private int level;

            public TreeElement() {
            }

            public boolean isLeaf() {
                return false;
            }

            public void setSyntacticTag(String str) {
                this.syntacticTag = str;
            }

            public String getSyntacticTag() {
                return this.syntacticTag;
            }

            public void setLevel(int i) {
                this.level = i;
            }

            public int getLevel() {
                return this.level;
            }

            public void setMorphologicalTag(String str) {
                this.morphologicalTag = str;
            }

            public String getMorphologicalTag() {
                return this.morphologicalTag;
            }
        }

        public Sentence parse(String str, int i, boolean z, boolean z2) {
            BufferedReader bufferedReader = new BufferedReader(new StringReader(str));
            Sentence sentence = new Sentence();
            Node node = new Node();
            try {
                String readLine = bufferedReader.readLine();
                boolean z3 = false;
                do {
                    if (!readLine.startsWith("SOURCE")) {
                        if (readLine.equals("&&")) {
                            z3 = true;
                        } else {
                            readLine = bufferedReader.readLine();
                        }
                    }
                    if (!z3) {
                        String substring = readLine.substring(7);
                        String readLine2 = bufferedReader.readLine();
                        int indexOf = readLine2.indexOf(" ");
                        this.text = readLine2.substring(indexOf + 1).trim();
                        this.text = fixPunctuation(this.text);
                        String str2 = z ? " title" : "";
                        String str3 = z2 ? " box" : "";
                        if (indexOf > 0) {
                            this.meta = readLine2.substring(0, indexOf) + " p=" + i + str2 + str3 + substring;
                        }
                    }
                    sentence.setText(this.text);
                    sentence.setMetadata(this.meta);
                    String readLine3 = bufferedReader.readLine();
                    while (readLine3 != null && readLine3.startsWith("###")) {
                        readLine3 = bufferedReader.readLine();
                    }
                    Stack stack = new Stack();
                    node.setSyntacticTag(Logger.ROOT_LOGGER_NAME);
                    node.setLevel(0);
                    stack.add(node);
                    while (readLine3 != null) {
                        if (readLine3.length() == 0 || readLine3.startsWith("</s>") || readLine3.equals("&&")) {
                            break;
                        }
                        TreeElement element = getElement(readLine3);
                        if (element != null) {
                            while (!stack.isEmpty() && element.getLevel() > 0 && element.getLevel() <= ((Node) stack.peek()).getLevel()) {
                            }
                            if (!element.isLeaf()) {
                                if (stack.isEmpty() || ((Node) stack.peek()).getLevel() >= element.getLevel()) {
                                    System.err.println("should not happen!");
                                } else {
                                    ((Node) stack.peek()).addElement(element);
                                }
                                stack.push((Node) element);
                            } else if (stack.isEmpty()) {
                                node.addElement(element);
                            } else {
                                Node node2 = (Node) stack.peek();
                                if (element.level == 0) {
                                    ((Node) stack.firstElement()).addElement(element);
                                } else {
                                    Node node3 = null;
                                    int size = stack.size() - 1;
                                    while (node3 == null) {
                                        if (node2.getLevel() < element.getLevel()) {
                                            node3 = node2;
                                        } else {
                                            size--;
                                            if (size > -1) {
                                                node2 = (Node) stack.get(size);
                                            } else {
                                                node3 = (Node) stack.firstElement();
                                            }
                                        }
                                    }
                                    node3.addElement(element);
                                }
                            }
                        }
                        readLine3 = bufferedReader.readLine();
                    }
                    sentence.setRoot(node);
                    return sentence;
                } while (readLine != null);
                return null;
            } catch (Exception e) {
                System.err.println(str);
                e.printStackTrace();
                return sentence;
            }
        }

        private String fixPunctuation(String str) {
            return str.replaceAll("\\»\\s+\\.", "».").replaceAll("\\»\\s+\\,", "»,");
        }

        public TreeElement getElement(String str) {
            Matcher matcher = this.nodePattern.matcher(str);
            if (matcher.matches()) {
                int length = matcher.group(1).length() + 1;
                String group = matcher.group(2);
                Node node = new Node();
                node.setLevel(length);
                node.setSyntacticTag(group);
                return node;
            }
            Matcher matcher2 = this.leafPattern.matcher(str);
            if (matcher2.matches()) {
                int length2 = matcher2.group(1).length() + 1;
                String group2 = matcher2.group(2);
                String group3 = matcher2.group(3);
                String group4 = matcher2.group(4);
                String group5 = matcher2.group(5);
                String group6 = matcher2.group(6);
                String group7 = matcher2.group(7);
                Leaf leaf = new Leaf();
                leaf.setLevel(length2);
                leaf.setSyntacticTag(group2);
                leaf.setFunctionalTag(group3);
                leaf.setSecondaryTag(group5);
                leaf.setMorphologicalTag(group6);
                leaf.setLexeme(group7);
                leaf.setLemma(group4);
                return leaf;
            }
            Matcher matcher3 = this.punctuationPattern.matcher(str);
            if (matcher3.matches()) {
                int length3 = matcher3.group(1).length() + 1;
                String group8 = matcher3.group(2);
                Leaf leaf2 = new Leaf();
                leaf2.setLevel(length3);
                leaf2.setLexeme(group8);
                return leaf2;
            }
            if (str.equals("_") || str.startsWith("<lixo") || str.startsWith("pause")) {
                return null;
            }
            if (!str.startsWith("=")) {
                System.err.println("Couldn't parse leaf: " + str);
                Leaf leaf3 = new Leaf();
                leaf3.setLevel(1);
                leaf3.setSyntacticTag("");
                leaf3.setMorphologicalTag("");
                leaf3.setLexeme(str);
                return leaf3;
            }
            Matcher matcher4 = this.bizarreLeafPattern.matcher(str);
            if (!matcher4.matches()) {
                int lastIndexOf = str.lastIndexOf("=") + 1;
                String substring = str.substring(lastIndexOf + 1);
                if (substring.matches("\\w.*?[\\.<>].*")) {
                    return null;
                }
                Leaf leaf4 = new Leaf();
                leaf4.setLevel(lastIndexOf + 1);
                leaf4.setSyntacticTag("");
                leaf4.setMorphologicalTag("");
                leaf4.setLexeme(substring);
                return leaf4;
            }
            int length4 = matcher4.group(1).length() + 1;
            String group9 = matcher4.group(2);
            String group10 = matcher4.group(3);
            String group11 = matcher4.group(4);
            String group12 = matcher4.group(5);
            Leaf leaf5 = new Leaf();
            leaf5.setLevel(length4);
            leaf5.setSyntacticTag(group9);
            leaf5.setMorphologicalTag(group11);
            leaf5.setLexeme(group12);
            if (group10 != null) {
                if (group10.length() > 2) {
                    group10 = group10.substring(1, group10.length() - 1);
                }
                leaf5.setLemma(group10);
            }
            return leaf5;
        }
    }

    public ADSentenceStream(ObjectStream<String> objectStream) {
        super(objectStream);
        this.paraID = 0;
        this.isTitle = false;
        this.isBox = false;
        this.parser = new SentenceParser();
    }

    @Override // opennlp.tools.util.ObjectStream
    public Sentence read() throws IOException {
        StringBuilder sb = new StringBuilder();
        boolean z = false;
        while (true) {
            String str = (String) this.samples.read();
            if (str != null) {
                if (z) {
                    if (sentEnd.matcher(str).matches() || extEnd.matcher(str).matches()) {
                        z = false;
                    } else if (!str.startsWith("A1")) {
                        sb.append(str).append('\n');
                    }
                } else if (sentStart.matcher(str).matches()) {
                    z = true;
                } else if (paraStart.matcher(str).matches()) {
                    this.paraID++;
                } else if (titleStart.matcher(str).matches()) {
                    this.isTitle = true;
                } else if (titleEnd.matcher(str).matches()) {
                    this.isTitle = false;
                } else if (textStart.matcher(str).matches()) {
                    this.paraID = 0;
                } else if (boxStart.matcher(str).matches()) {
                    this.isBox = true;
                } else if (boxEnd.matcher(str).matches()) {
                    this.isBox = false;
                }
                if (!z && sb.length() > 0) {
                    return this.parser.parse(sb.toString(), this.paraID, this.isTitle, this.isBox);
                }
            } else {
                if (!z) {
                    return null;
                }
                if (sb.length() > 0) {
                    return this.parser.parse(sb.toString(), this.paraID, this.isTitle, this.isBox);
                }
            }
        }
    }
}
