package org.mitre.medfacts.zoner;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.io.PrintWriter;
import java.io.StringWriter;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerConfigurationException;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpression;
import javax.xml.xpath.XPathExpressionException;
import javax.xml.xpath.XPathFactory;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.w3c.dom.bootstrap.DOMImplementationRegistry;
import org.w3c.dom.ls.DOMImplementationLS;
import org.w3c.dom.ls.LSInput;
import org.w3c.dom.ls.LSParser;

/* loaded from: input_file:org/mitre/medfacts/zoner/ZonerCli.class */
public class ZonerCli {
    protected String inputFilename;
    protected List<SectionRegexDefinition> sectionRegexDefinitionList;
    protected Map<String, Node> fragmentMap;
    protected List<Range> fullRangeList;
    protected List<Range> rangeList;
    protected List<Range> fullRangeListAdjusted;
    protected List<HeadingRange> headings;
    protected String entireContents;
    public static final int expansionThreshold = 5;
    private CharacterOffsetToLineTokenConverter converter;
    private static final Logger logger = Logger.getLogger(ZonerCli.class.getName());
    public static final String EOL = System.getProperty("line.separator");
    public static final Pattern WHITESPACE_PATTERN = Pattern.compile("\\s+");
    private static String defaultRegexFilename = "org/mitre/medfacts/zoner/section_regex.xml";

    /* loaded from: input_file:org/mitre/medfacts/zoner/ZonerCli$HeadingRange.class */
    public class HeadingRange {
        protected int headingBegin;
        protected int headingEnd;
        protected String label;
        protected String headingText;

        public HeadingRange() {
        }

        public String toString() {
            return String.format("HEADING \"%s\" (%s)", this.headingText, this.label);
        }

        public int getHeadingBegin() {
            return this.headingBegin;
        }

        public void setHeadingBegin(int i) {
            this.headingBegin = i;
        }

        public int getHeadingEnd() {
            return this.headingEnd;
        }

        public void setHeadingEnd(int i) {
            this.headingEnd = i;
        }

        public void setLabel(String str) {
            this.label = str;
        }

        public String getLabel() {
            return this.label;
        }

        public void setHeadingText(String str) {
            this.headingText = str;
        }

        public String getHeadingText() {
            return this.headingText;
        }
    }

    /* loaded from: input_file:org/mitre/medfacts/zoner/ZonerCli$Range.class */
    public class Range implements Comparable<Range> {
        protected int begin;
        protected int end;
        protected LineAndTokenPosition beginLineAndToken;
        protected LineAndTokenPosition endLineAndToken;
        protected String label;
        protected boolean ignore;
        protected boolean truncated;

        public Range() {
        }

        public String toString() {
            return String.format("RANGE \"%s\" [%d-%d]", this.label, Integer.valueOf(this.begin), Integer.valueOf(this.end));
        }

        public int getBegin() {
            return this.begin;
        }

        public void setBegin(int i) {
            this.begin = i;
        }

        public int getEnd() {
            return this.end;
        }

        public void setEnd(int i) {
            this.end = i;
        }

        public boolean isIgnore() {
            return this.ignore;
        }

        public void setIgnore(boolean z) {
            this.ignore = z;
        }

        public boolean isTruncated() {
            return this.truncated;
        }

        public void setTruncated(boolean z) {
            this.truncated = z;
        }

        @Override // java.lang.Comparable
        public int compareTo(Range range) {
            if (this.begin < range.begin) {
                return -1;
            }
            if (this.begin > range.begin) {
                return 1;
            }
            if (this.end == range.end) {
                return 0;
            }
            return this.end < range.end ? -1 : 1;
        }

        public String getLabel() {
            return this.label;
        }

        public void setLabel(String str) {
            this.label = str;
        }

        public LineAndTokenPosition getBeginLineAndToken() {
            return this.beginLineAndToken;
        }

        public void setBeginLineAndToken(LineAndTokenPosition lineAndTokenPosition) {
            this.beginLineAndToken = lineAndTokenPosition;
        }

        public LineAndTokenPosition getEndLineAndToken() {
            return this.endLineAndToken;
        }

        public void setEndLineAndToken(LineAndTokenPosition lineAndTokenPosition) {
            this.endLineAndToken = lineAndTokenPosition;
        }
    }

    /* loaded from: input_file:org/mitre/medfacts/zoner/ZonerCli$SectionRegexDefinition.class */
    public class SectionRegexDefinition {
        protected Pattern regex;
        protected String label;
        protected boolean findAll;

        public SectionRegexDefinition() {
        }

        public Pattern getRegex() {
            return this.regex;
        }

        public void setRegex(Pattern pattern) {
            this.regex = pattern;
        }

        public String getLabel() {
            return this.label;
        }

        public void setLabel(String str) {
            this.label = str;
        }

        public boolean isFindAll() {
            return this.findAll;
        }

        public void setFindAll(boolean z) {
            this.findAll = z;
        }
    }

    public ZonerCli() {
        this(null);
    }

    public ZonerCli(URI uri) {
        this.fullRangeList = new ArrayList();
        this.rangeList = new ArrayList();
        this.fullRangeListAdjusted = new ArrayList();
        this.headings = new ArrayList();
        if (uri == null) {
            try {
                uri = getClass().getClassLoader().getResource(defaultRegexFilename).toURI();
            } catch (URISyntaxException e) {
                Logger.getLogger(ZonerCli.class.getName()).log(Level.SEVERE, "problem (URISyntaxException) reading regex from xml file", (Throwable) e);
                throw new RuntimeException("problem (URISyntaxException) reading regex from xml file", e);
            } catch (XPathExpressionException e2) {
                Logger.getLogger(ZonerCli.class.getName()).log(Level.SEVERE, "problem (XPathExpressionException) reading regex from xml file", (Throwable) e2);
                throw new RuntimeException("problem (XPathExpressionException) reading regex from xml file", e2);
            }
        }
        Document parseDocument = parseDocument(uri.toString());
        XPath newXPath = XPathFactory.newInstance().newXPath();
        XPathExpression compile = newXPath.compile("/root/sections/section");
        XPathExpression compile2 = newXPath.compile("./regex");
        XPathExpression compile3 = newXPath.compile("./regex/@ignore-case");
        XPathExpression compile4 = newXPath.compile("./regex/@find-all");
        XPathExpression compile5 = newXPath.compile("./label/text()");
        XPathExpression compile6 = newXPath.compile("/root/fragments/fragment");
        XPathExpression compile7 = newXPath.compile("./name/text()");
        XPathExpression compile8 = newXPath.compile("./expansion/text()");
        XPathExpression compile9 = newXPath.compile("./expansion");
        XPathExpression compile10 = newXPath.compile("./fragment-ref");
        XPathExpression compile11 = newXPath.compile("./@name");
        this.fragmentMap = new LinkedHashMap();
        NodeList nodeList = (NodeList) compile6.evaluate(parseDocument, XPathConstants.NODESET);
        for (int i = 0; i < nodeList.getLength(); i++) {
            Element element = (Element) nodeList.item(i);
            String evaluate = compile7.evaluate(element);
            compile8.evaluate(element);
            Node node = (Node) compile9.evaluate(element, XPathConstants.NODE);
            this.fragmentMap.put(evaluate, node);
            logger.log(Level.FINEST, "found fragment: {0} -> {1}", new Object[]{evaluate, nodeToString(node)});
        }
        this.sectionRegexDefinitionList = new ArrayList();
        NodeList nodeList2 = (NodeList) compile.evaluate(parseDocument, XPathConstants.NODESET);
        for (int i2 = 0; i2 < nodeList2.getLength(); i2++) {
            Element element2 = (Element) nodeList2.item(i2);
            String expandFragments = expandFragments((Node) compile2.evaluate(element2, XPathConstants.NODE), compile10, compile11);
            if (expandFragments != null) {
                String evaluate2 = compile3.evaluate(element2);
                evaluate2 = (evaluate2 == null || evaluate2.isEmpty()) ? "true" : evaluate2;
                boolean equalsIgnoreCase = evaluate2.equalsIgnoreCase("true");
                String evaluate3 = compile4.evaluate(element2);
                evaluate3 = (evaluate3 == null || evaluate3.isEmpty()) ? "true" : evaluate3;
                boolean equalsIgnoreCase2 = evaluate3.equalsIgnoreCase("true");
                String evaluate4 = compile5.evaluate(element2);
                logger.finest(String.format(" - section -- label: \"%s\"; regex: \"%s\"; ignore case: \"%s\"; match all: \"%s\"", evaluate4, expandFragments, evaluate2, evaluate3));
                Pattern compile12 = Pattern.compile(expandFragments, (equalsIgnoreCase ? 0 + 2 : 0) + 8);
                SectionRegexDefinition sectionRegexDefinition = new SectionRegexDefinition();
                sectionRegexDefinition.setLabel(evaluate4);
                sectionRegexDefinition.setRegex(compile12);
                sectionRegexDefinition.setFindAll(equalsIgnoreCase2);
                this.sectionRegexDefinitionList.add(sectionRegexDefinition);
            }
        }
    }

    public static Document parseDocument(String str) {
        try {
            DOMImplementationLS dOMImplementationLS = (DOMImplementationLS) DOMImplementationRegistry.newInstance().getDOMImplementation("LS");
            LSParser createLSParser = dOMImplementationLS.createLSParser((short) 1, null);
            createLSParser.getDomConfig();
            LSInput createLSInput = dOMImplementationLS.createLSInput();
            createLSInput.setSystemId(str);
            return createLSParser.parse(createLSInput);
        } catch (ClassCastException e) {
            Logger.getLogger(ZonerCli.class.getName()).log(Level.SEVERE, "problem before attempting to parse xml (registry problem)", (Throwable) e);
            throw new RuntimeException("problem before attempting to parse xml (registry problem)", e);
        } catch (ClassNotFoundException e2) {
            Logger.getLogger(ZonerCli.class.getName()).log(Level.SEVERE, "problem before attempting to parse xml (registry problem)", (Throwable) e2);
            throw new RuntimeException("problem before attempting to parse xml (registry problem)", e2);
        } catch (IllegalAccessException e3) {
            Logger.getLogger(ZonerCli.class.getName()).log(Level.SEVERE, "problem before attempting to parse xml (registry problem)", (Throwable) e3);
            throw new RuntimeException("problem before attempting to parse xml (registry problem)", e3);
        } catch (InstantiationException e4) {
            Logger.getLogger(ZonerCli.class.getName()).log(Level.SEVERE, "problem before attempting to parse xml (registry problem)", (Throwable) e4);
            throw new RuntimeException("problem before attempting to parse xml (registry problem)", e4);
        }
    }

    private String expandFragments(Node node, XPathExpression xPathExpression, XPathExpression xPathExpression2) {
        int i = 0;
        Element element = (Element) node;
        element.getOwnerDocument();
        logger.log(Level.FINEST, "expandFragments on Node: {0}", nodeToString(node));
        try {
            NodeList nodeList = (NodeList) xPathExpression.evaluate(node, XPathConstants.NODESET);
            while (i < 5) {
                if (nodeList.getLength() <= 0) {
                    break;
                }
                for (int i2 = 0; i2 < nodeList.getLength(); i2++) {
                    Element element2 = (Element) nodeList.item(i2);
                    element2.getParentNode().replaceChild(this.fragmentMap.get(xPathExpression2.evaluate(element2)), element2);
                    logger.log(Level.FINEST, "Level {0} fragment {1} expansion: {2}", new Object[]{Integer.valueOf(i), Integer.valueOf(i2), nodeToString(element)});
                }
                i++;
                logger.log(Level.FINEST, "checking for any level {0} embedded fragments in {1}", new Object[]{Integer.valueOf(i), nodeToString(element)});
                StringBuffer stringBuffer = new StringBuffer("./");
                for (int i3 = 0; i3 < i; i3++) {
                    stringBuffer.append("expansion/");
                }
                stringBuffer.append("fragment-ref");
                nodeList = (NodeList) XPathFactory.newInstance().newXPath().compile(stringBuffer.toString()).evaluate(element, XPathConstants.NODESET);
                logger.log(Level.FINEST, "found {0} embedded fragments", Integer.valueOf(nodeList.getLength()));
            }
            if (i == 5) {
                return null;
            }
            logger.log(Level.FINEST, "\texpanded to {0}", element.getTextContent());
            return element.getTextContent();
        } catch (XPathExpressionException e) {
            Logger.getLogger(ZonerCli.class.getName()).log(Level.SEVERE, "problem (XPathExpressionException) expanding regex fragment", (Throwable) e);
            throw new RuntimeException("problem (XPathExpressionException) expanding regex fragment", e);
        }
    }

    private static String nodeToString(Node node) {
        String str = null;
        try {
            Transformer newTransformer = TransformerFactory.newInstance().newTransformer();
            StringWriter stringWriter = new StringWriter();
            newTransformer.setOutputProperty("omit-xml-declaration", "yes");
            newTransformer.transform(new DOMSource(node), new StreamResult(stringWriter));
            str = stringWriter.toString();
        } catch (TransformerConfigurationException e) {
            Logger.getLogger(ZonerCli.class.getName()).log(Level.SEVERE, (String) null, (Throwable) e);
        } catch (TransformerException e2) {
            Logger.getLogger(ZonerCli.class.getName()).log(Level.SEVERE, (String) null, (Throwable) e2);
        }
        return str;
    }

    public static void main(String[] strArr) throws IOException {
        if (strArr.length != 1) {
            logger.severe("Usage:  " + ZonerCli.class.getName() + " <input file name>");
            return;
        }
        logger.finest("finest logging");
        logger.severe("severe logging");
        System.out.println("runnning stdout...");
        String str = strArr[0];
        System.out.println("inputFile: " + str);
        ZonerCli zonerCli = new ZonerCli();
        zonerCli.setInputFilename(str);
        zonerCli.readFile(str);
        zonerCli.execute();
        zonerCli.logRangesAndHeadings();
    }

    public void execute() throws IOException {
        clearRangeLists();
        clearHeadings();
        findHeadings();
        pruneRanges();
    }

    public void initialize() {
        if (this.converter == null) {
            this.converter = new CharacterOffsetToLineTokenConverterDefaultImpl(getEntireContents());
        }
    }

    public String getInputFilename() {
        return this.inputFilename;
    }

    public void readFile(String str) throws IOException, FileNotFoundException {
        logger.finest(String.format("input: %s", str));
        FileReader fileReader = new FileReader(new File(str));
        BufferedReader bufferedReader = new BufferedReader(fileReader);
        StringWriter stringWriter = new StringWriter();
        PrintWriter printWriter = new PrintWriter(stringWriter);
        String readLine = bufferedReader.readLine();
        while (true) {
            String str2 = readLine;
            if (str2 == null) {
                bufferedReader.close();
                fileReader.close();
                setEntireContents(stringWriter.toString());
                return;
            }
            printWriter.println(str2);
            readLine = bufferedReader.readLine();
        }
    }

    public void setInputFilename(String str) {
        this.inputFilename = str;
    }

    public String buildString(String[] strArr) {
        StringBuilder sb = new StringBuilder();
        int i = 0;
        while (i < strArr.length) {
            boolean z = i == strArr.length - 1;
            sb.append(strArr[i]);
            if (!z) {
                sb.append(EOL);
            }
            i++;
        }
        return sb.toString();
    }

    public void findHeadings() {
        for (SectionRegexDefinition sectionRegexDefinition : this.sectionRegexDefinitionList) {
            Matcher matcher = sectionRegexDefinition.regex.matcher(getEntireContents());
            boolean z = !sectionRegexDefinition.isFindAll();
            logger.finest(String.format(" trying %s ...", sectionRegexDefinition.getLabel()));
            while (matcher.find()) {
                int start = matcher.start();
                int end = matcher.end();
                logger.finest(String.format(" ** " + sectionRegexDefinition.getLabel() + " match found: %d-%d", Integer.valueOf(start), Integer.valueOf(end)));
                Range range = new Range();
                range.setLabel(sectionRegexDefinition.getLabel());
                range.setBegin(start);
                range.setEnd(end);
                range.setIgnore(false);
                getFullRangeList().add(range);
                if (z) {
                    break;
                }
            }
        }
        Collections.sort(getFullRangeList());
        Iterator<Range> it = getFullRangeList().iterator();
        while (it.hasNext()) {
            logger.finest(String.format(" - %s", it.next()));
        }
        logger.finest("===");
        ArrayList arrayList = new ArrayList();
        int size = getFullRangeList().size();
        int i = 0;
        while (i < size) {
            boolean z2 = i == size - 1;
            Range range2 = getFullRangeList().get(i);
            if (!range2.isIgnore()) {
                int begin = range2.getBegin();
                int end2 = range2.getEnd();
                int i2 = 0;
                HeadingRange headingRange = new HeadingRange();
                this.headings.add(headingRange);
                headingRange.setHeadingEnd(end2);
                headingRange.setHeadingBegin(begin);
                headingRange.setLabel(range2.getLabel());
                headingRange.setHeadingText(this.entireContents.substring(begin, end2));
                if (z2) {
                    i2 = getEntireContents().length() - 1;
                } else {
                    int i3 = i + 1;
                    Range range3 = getFullRangeList().get(i3);
                    int begin2 = range3.getBegin();
                    if (begin2 < end2) {
                        logger.finest("*** overlap found: \"" + headingRange.getHeadingText() + "\" " + range2 + " *** \"" + this.entireContents.substring(begin2, range3.getEnd()) + "\" " + range3);
                        if (end2 - begin < range3.getEnd() - begin2) {
                            logger.finest("\ttruncating current: " + range2);
                            range2.setTruncated(true);
                            range2.setEnd(begin2 - 1);
                        } else {
                            while (true) {
                                i3++;
                                if (i3 >= size || begin2 >= end2) {
                                    break;
                                }
                                logger.finest("\tignoring next: " + range3);
                                range3.setIgnore(true);
                                range3 = getFullRangeList().get(i3);
                                begin2 = range3.getBegin();
                            }
                            if (i3 == size && begin2 < end2) {
                                range3.setIgnore(true);
                                i2 = getEntireContents().length() - 1;
                                z2 = true;
                            }
                        }
                    }
                    if (!range2.isIgnore() && !z2) {
                        i2 = findLastCharOffsetOfPreviousWord(this.entireContents, begin2 - 1);
                    }
                }
                if (!range2.isIgnore()) {
                    int i4 = i2;
                    logger.fine("ZonerCli: calling converter on 'begin': " + begin);
                    LineAndTokenPosition convert = this.converter.convert(begin);
                    logger.fine("ZonerCli: calling converter on 'realSectionEnd': " + i4);
                    LineAndTokenPosition convert2 = this.converter.convert(i4);
                    logger.finest(String.format(" - %s: %s (%d-%d) (section end: %d) %s to %s ", range2, getEntireContents().substring(begin, end2), Integer.valueOf(begin), Integer.valueOf(end2), Integer.valueOf(i4), convert.toString(), convert2.toString()));
                    range2.setEnd(i2);
                    range2.setBeginLineAndToken(convert);
                    range2.setEndLineAndToken(convert2);
                    arrayList.add(range2);
                }
            }
            i++;
        }
        this.fullRangeListAdjusted = arrayList;
    }

    public String getEntireContents() {
        return this.entireContents;
    }

    public void setEntireContents(String str) {
        this.entireContents = str;
    }

    public void pruneRanges() {
        for (Range range : getFullRangeList()) {
            if (!range.isIgnore()) {
                getRangeList().add(range);
            }
        }
    }

    public void clearRangeLists() {
        getRangeList().clear();
        getFullRangeList().clear();
    }

    public void clearHeadings() {
        getHeadings().clear();
    }

    public List<Range> getFullRangeList() {
        return this.fullRangeList;
    }

    public List<Range> getRangeList() {
        return this.rangeList;
    }

    public void setRangeList(List<Range> list) {
        this.fullRangeList = list;
    }

    public List<HeadingRange> getHeadings() {
        return this.headings;
    }

    public void setHeadings(List<HeadingRange> list) {
        this.headings = list;
    }

    public void logRangesAndHeadings() {
        logger.finest("================== RangeList ======================");
        Iterator<Range> it = getRangeList().iterator();
        while (it.hasNext()) {
            logger.finest(it.next().toString());
        }
        logger.finest("================== FullRangeList ======================");
        Iterator<Range> it2 = getFullRangeList().iterator();
        while (it2.hasNext()) {
            logger.finest(it2.next().toString());
        }
        logger.finest("================== Headings ======================");
        Iterator<HeadingRange> it3 = getHeadings().iterator();
        while (it3.hasNext()) {
            logger.finest(it3.next().toString());
        }
    }

    private int findLastCharOffsetOfPreviousWord(String str, int i) {
        boolean z = false;
        int i2 = i;
        while (!z && i2 >= 0) {
            char charAt = str.charAt(i2);
            if (charAt == ' ' || charAt == '\r' || charAt == '\n') {
                i2--;
            } else {
                z = true;
            }
        }
        if (i2 < 0) {
            i2 = 0;
        }
        return i2;
    }

    public static ParsedTextFile processTextFile(File file) throws FileNotFoundException, IOException {
        System.out.format("processing text file \"%s\"...%n", file.getAbsolutePath());
        FileReader fileReader = new FileReader(file);
        BufferedReader bufferedReader = new BufferedReader(fileReader);
        ParsedTextFile processTextBufferedReader = processTextBufferedReader(bufferedReader);
        processTextBufferedReader.getTokens();
        bufferedReader.close();
        fileReader.close();
        System.out.println("=====");
        System.out.format("done processing text file \"%s\".%n", file.getAbsolutePath());
        return processTextBufferedReader;
    }

    public static ParsedTextFile processTextBufferedReader(BufferedReader bufferedReader) throws FileNotFoundException, IOException {
        StringWriter stringWriter = new StringWriter();
        PrintWriter printWriter = new PrintWriter(stringWriter);
        ArrayList arrayList = new ArrayList();
        int i = 0;
        while (true) {
            String readLine = bufferedReader.readLine();
            if (readLine == null) {
                ParsedTextFile parsedTextFile = new ParsedTextFile();
                parsedTextFile.setEverything(stringWriter.toString());
                printWriter.close();
                stringWriter.close();
                parsedTextFile.setTokens((String[][]) arrayList.toArray(new String[1]));
                return parsedTextFile;
            }
            printWriter.println(readLine);
            arrayList.add(WHITESPACE_PATTERN.split(readLine));
            i++;
        }
    }

    public static String printOutLineOfTokens(String[] strArr) {
        StringBuilder sb = new StringBuilder();
        sb.append("[");
        int i = 0;
        while (i < strArr.length) {
            boolean z = i == strArr.length - 1;
            sb.append(i);
            sb.append(":");
            sb.append('\"');
            sb.append(strArr[i]);
            sb.append('\"');
            if (!z) {
                sb.append(", ");
            }
            i++;
        }
        sb.append("]");
        return sb.toString();
    }

    public static String printOutFileOfLinesOfTokens(String[][] strArr) {
        StringBuilder sb = new StringBuilder();
        sb.append("[");
        int i = 0;
        while (i < strArr.length) {
            boolean z = i == strArr.length - 1;
            sb.append("line_");
            sb.append(i);
            sb.append(":::");
            sb.append(printOutLineOfTokens(strArr[i]));
            if (!z) {
                sb.append(", ");
            }
            sb.append("\n");
            i++;
        }
        sb.append("]");
        return sb.toString();
    }

    public List<Range> getFullRangeListAdjusted() {
        return this.fullRangeListAdjusted;
    }

    public void setFullRangeListAdjusted(List<Range> list) {
        this.fullRangeListAdjusted = list;
    }

    public CharacterOffsetToLineTokenConverter getConverter() {
        return this.converter;
    }

    public void setConverter(CharacterOffsetToLineTokenConverter characterOffsetToLineTokenConverter) {
        this.converter = characterOffsetToLineTokenConverter;
    }
}
