package org.apache.ctakes.ytex.tools;

import gov.nih.nlm.nls.lvg.Api.LvgCmdApi;
import java.io.BufferedInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.net.URL;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Locale;
import java.util.Set;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.ctakes.core.nlp.tokenizer.Token;
import org.apache.ctakes.core.nlp.tokenizer.TokenizerPTB;
import org.apache.ctakes.ytex.kernel.KernelContextHolder;
import org.apache.ctakes.ytex.umls.dao.UMLSDao;
import org.apache.ctakes.ytex.umls.model.UmlsAuiFirstWord;
import org.springframework.transaction.PlatformTransactionManager;
import org.springframework.transaction.support.TransactionTemplate;
import org.w3c.dom.Element;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;

/* loaded from: input_file:org/apache/ctakes/ytex/tools/SetupAuiFirstWord.class */
public class SetupAuiFirstWord {
    private static final Log log = LogFactory.getLog(SetupAuiFirstWord.class);
    private TokenizerPTB tokenizer;
    private LvgCmdApi lvgCmd;
    private Set<String> exclusionSet = null;

    public SetupAuiFirstWord() throws Exception {
        initTokenizer();
        initExclusionSet();
        initLvg();
    }

    private void initLvg() {
        try {
            URL resource = getClass().getClassLoader().getResource("org/apache/ctakes/lvg/data/config/lvg.properties");
            if (log.isInfoEnabled()) {
                log.info("loading lvg.properties from:" + resource.getPath());
            }
            File file = new File(resource.getPath());
            String absolutePath = file.getParentFile().getAbsolutePath();
            System.setProperty("user.dir", absolutePath.substring(0, absolutePath.length() - "data/config".length()));
            this.lvgCmd = new LvgCmdApi("-f:l:b", file.getAbsolutePath());
        } catch (Exception e) {
            log.warn("could not initialize lvg - will not create a stemmed dictionary.", e);
        }
    }

    private void initExclusionSet() throws ParserConfigurationException, SAXException, IOException {
        this.exclusionSet = new HashSet();
        InputStream inputStream = null;
        try {
            InputStream resourceAsStream = getClass().getClassLoader().getResourceAsStream("ctakes-lvg/desc/analysis_engine/LvgAnnotator.xml");
            if (resourceAsStream == null) {
                log.warn("classpath:ctakes-lvg/desc/analysis_engine/LvgAnnotator.xml not available, attempting to load from file system");
                File file = new File("../ctakes-lvg/desc/analysis_engine/LvgAnnotator.xml");
                if (file.exists()) {
                    resourceAsStream = new BufferedInputStream(new FileInputStream(file));
                }
            }
            if (resourceAsStream == null) {
                log.warn("ctakes-lvg/desc/analysis_engine/LvgAnnotator.xml not available, using empty exclusion set");
            } else {
                NodeList elementsByTagName = DocumentBuilderFactory.newInstance().newDocumentBuilder().parse(resourceAsStream).getElementsByTagName("nameValuePair");
                for (int i = 0; i < elementsByTagName.getLength(); i++) {
                    Element element = (Element) elementsByTagName.item(i);
                    if ("ExclusionSet".equals(element.getElementsByTagName("name").item(0).getChildNodes().item(0).getNodeValue())) {
                        NodeList elementsByTagName2 = element.getElementsByTagName("string");
                        for (int i2 = 0; i2 < elementsByTagName2.getLength(); i2++) {
                            this.exclusionSet.add(elementsByTagName2.item(i2).getChildNodes().item(0).getNodeValue());
                        }
                    }
                }
            }
            if (resourceAsStream != null) {
                resourceAsStream.close();
            }
        } catch (Throwable th) {
            if (0 != 0) {
                inputStream.close();
            }
            throw th;
        }
    }

    private void initTokenizer() throws FileNotFoundException, IOException {
        this.tokenizer = new TokenizerPTB();
    }

    public static void main(String[] strArr) throws Exception {
        new SetupAuiFirstWord().setupAuiFirstWord();
    }

    public void setupAuiFirstWord() {
        List<Object[]> allAuiStr;
        UMLSDao uMLSDao = (UMLSDao) KernelContextHolder.getApplicationContext().getBean(UMLSDao.class);
        new TransactionTemplate((PlatformTransactionManager) KernelContextHolder.getApplicationContext().getBean(PlatformTransactionManager.class)).setPropagationBehavior(3);
        String lastAui = uMLSDao.getLastAui();
        do {
            allAuiStr = uMLSDao.getAllAuiStr(lastAui);
            ArrayList arrayList = new ArrayList(1000);
            for (Object[] objArr : allAuiStr) {
                String str = (String) objArr[0];
                String str2 = (String) objArr[1];
                lastAui = str;
                if (str2.length() < 200) {
                    try {
                        UmlsAuiFirstWord umlsAuiFirstWord = tokenizeStr(str, str2);
                        if (umlsAuiFirstWord == null) {
                            log.error("Error tokenizing aui=" + str + ", str=" + str2);
                        } else if (umlsAuiFirstWord.getFword().length() > 70) {
                            log.debug("fword too long: aui=" + str + ", str=" + umlsAuiFirstWord.getFword());
                        } else if (umlsAuiFirstWord.getTokenizedStr().length() > 250) {
                            log.debug("string too long: aui=" + str + ", str=" + str2);
                        } else {
                            if (log.isDebugEnabled()) {
                                log.debug("aui=" + str + ", fw=" + umlsAuiFirstWord);
                            }
                            arrayList.add(umlsAuiFirstWord);
                        }
                    } catch (Exception e) {
                        log.error("Error tokenizing aui=" + str + ", str=" + str2, e);
                    }
                } else {
                    log.debug("Skipping aui because str to long: aui=" + str + ", str=" + str2);
                }
            }
            if (arrayList.size() > 0) {
                uMLSDao.insertAuiFirstWord(arrayList);
                log.info("inserted " + arrayList.size() + " rows");
            }
        } while (allAuiStr.size() > 0);
    }

    public UmlsAuiFirstWord tokenizeStr(String str, String str2) throws Exception {
        int i = 0;
        String str3 = "";
        StringBuilder sb = new StringBuilder();
        String str4 = "";
        StringBuilder sb2 = new StringBuilder();
        for (Token token : this.tokenizer.tokenize(str2)) {
            i++;
            if (i == 1) {
                str3 = token.getText();
                sb.append(str3);
                if (this.lvgCmd != null) {
                    str4 = stemToken(token);
                    sb2.append(str4);
                }
            } else {
                sb.append(" ").append(token.getText());
                if (this.lvgCmd != null && str4 != null) {
                    sb2.append(" ").append(stemToken(token));
                }
            }
        }
        UmlsAuiFirstWord umlsAuiFirstWord = new UmlsAuiFirstWord();
        umlsAuiFirstWord.setAui(str);
        umlsAuiFirstWord.setFword(str3.toLowerCase(Locale.ENGLISH));
        umlsAuiFirstWord.setTokenizedStr(sb.toString());
        if (this.lvgCmd != null) {
            umlsAuiFirstWord.setFstem(str4.toLowerCase(Locale.ENGLISH));
            umlsAuiFirstWord.setStemmedStr(sb2.toString());
        }
        return umlsAuiFirstWord;
    }

    private String stemToken(Token token) throws Exception {
        String text = token.getText();
        if (1 == token.getType() || 0 == token.getType()) {
            text = getCanonicalForm(token.getText());
            if (text == null || text.length() == 0) {
                text = token.getText();
            }
        }
        return text;
    }

    private String getCanonicalForm(String str) throws Exception {
        if (this.lvgCmd == null || this.exclusionSet.contains(str)) {
            return null;
        }
        String str2 = null;
        String MutateToString = this.lvgCmd.MutateToString(str);
        String[] strArr = null;
        if (MutateToString != null) {
            strArr = MutateToString.split("\\|");
        } else {
            log.warn("mutateToString returned null for: " + str);
        }
        if (strArr != null && strArr.length >= 2 && !strArr[1].matches("No Output")) {
            str2 = strArr[1];
        }
        return str2;
    }
}
