package org.phenotips.vocabulary.internal.solr;

import java.io.File;
import java.io.PrintWriter;
import java.net.URLEncoder;
import java.util.Iterator;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

/* loaded from: input_file:org/phenotips/vocabulary/internal/solr/EthnicityParser.class */
public final class EthnicityParser {
    private static final Pattern POPSIZE_PATTERN = Pattern.compile("^<?\\s*([0-9,]+)(?:[^0-9]+)?([0-9,]+)?");

    private EthnicityParser() {
    }

    public static void main(String[] strArr) {
        try {
            Elements select = Jsoup.parse(new File("ListEthnicGroups.html"), "UTF-8").select("tr");
            PrintWriter printWriter = new PrintWriter("ListEthnicGroups.csv");
            PrintWriter printWriter2 = new PrintWriter("ListEthnicGroups.xml");
            printWriter2.println("<add>");
            Iterator it = select.iterator();
            while (it.hasNext()) {
                Element element = (Element) it.next();
                String text = ((Element) element.select("a").get(0)).text();
                int i = 1000;
                try {
                    Matcher matcher = POPSIZE_PATTERN.matcher(((Element) element.select("td").get(3)).ownText());
                    if (matcher.find()) {
                        i = Integer.parseInt(matcher.group(1).replaceAll(",", ""));
                        if (matcher.group(2) != null) {
                            i = (i + Integer.parseInt(matcher.group(2).replaceAll(",", ""))) / 2;
                        }
                    }
                } catch (Exception e) {
                }
                printWriter2.println("<doc boost=\"" + Math.log(i) + "\"><field name=\"id\">ETHNO:" + URLEncoder.encode(text.toLowerCase(), "UTF-8").replaceAll("[^a-z]", "") + "</field><field name=\"name\">" + text + "</field><field name=\"popsize\">" + i + "</field></doc>");
                printWriter.println(text + "\t" + i);
            }
            printWriter2.println("</add>");
            printWriter.close();
            printWriter2.close();
        } catch (Exception e2) {
            e2.printStackTrace();
        }
    }
}
