package de.charite.compbio.jannovar.impl.parse;

import com.google.common.collect.ImmutableList;
import de.charite.compbio.jannovar.data.ReferenceDictionary;
import de.charite.compbio.jannovar.impl.util.PathUtil;
import de.charite.compbio.jannovar.reference.GenomeInterval;
import de.charite.compbio.jannovar.reference.PositionType;
import de.charite.compbio.jannovar.reference.Strand;
import de.charite.compbio.jannovar.reference.TranscriptModel;
import de.charite.compbio.jannovar.reference.TranscriptModelBuilder;
import htsjdk.variant.vcf.VCFConstants;
import java.io.BufferedReader;
import java.io.DataInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.zip.GZIPInputStream;
import org.ini4j.Profile;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:de/charite/compbio/jannovar/impl/parse/UCSCParser.class */
public class UCSCParser implements TranscriptParser {
    private static final Logger LOGGER = LoggerFactory.getLogger(UCSCParser.class);
    private static final int NFIELDS = 12;
    private final ReferenceDictionary refDict;
    private final String basePath;
    private final Profile.Section iniSection;
    private HashMap<String, TranscriptModelBuilder> knownGeneMap = new HashMap<>();

    public UCSCParser(ReferenceDictionary referenceDictionary, String str, Profile.Section section) {
        this.refDict = referenceDictionary;
        this.basePath = str;
        this.iniSection = section;
    }

    @Override // de.charite.compbio.jannovar.impl.parse.TranscriptParser
    public ImmutableList<TranscriptModel> run() throws TranscriptParseException {
        String join = PathUtil.join(this.basePath, getINIFileName("knownGene"));
        String join2 = PathUtil.join(this.basePath, getINIFileName("knownGeneMrna"));
        String join3 = PathUtil.join(this.basePath, getINIFileName("kgXref"));
        String join4 = PathUtil.join(this.basePath, getINIFileName("knownToLocusLink"));
        String str = null;
        if (getINIFileName("knownCanonical") != null && !"".equals(getINIFileName("knownCanonical"))) {
            str = PathUtil.join(this.basePath, getINIFileName("knownCanonical"));
        }
        parseKnownGeneFile(join);
        parseKnownGeneMrna(join2);
        parseKnownGeneXref(join3);
        parseKnown2LocusLink(join4);
        if (str != null) {
            parseKnownCanonical(str);
        } else {
            TranscriptSupportLevelsSetterFromLengths.run(this.knownGeneMap.values());
        }
        ImmutableList.Builder builder = new ImmutableList.Builder();
        Iterator<Map.Entry<String, TranscriptModelBuilder>> it = this.knownGeneMap.entrySet().iterator();
        while (it.hasNext()) {
            TranscriptModel build = it.next().getValue().build();
            if (checkTranscriptInfo(build)) {
                builder.add((ImmutableList.Builder) build);
            }
        }
        return builder.build();
    }

    private boolean checkTranscriptInfo(TranscriptModel transcriptModel) {
        if (transcriptModel.transcriptLength() <= transcriptModel.getSequence().length()) {
            return true;
        }
        LOGGER.debug("Transcript {} is indicated to be longer than its sequence. Ignoring.", transcriptModel.getAccession());
        return false;
    }

    /* JADX WARN: Multi-variable type inference failed */
    public TranscriptModelBuilder parseTranscriptModelFromLine(String str) throws TranscriptParseException {
        TranscriptModelBuilder transcriptModelBuilder = new TranscriptModelBuilder();
        String[] split = str.split("\t");
        if (split.length != 12) {
            throw new TranscriptParseException(String.format("Malformed line in UCSC knownGene.txt file:\n%s\nExpected %d fields but there were %d", str, 12, Integer.valueOf(split.length)));
        }
        transcriptModelBuilder.setAccession(split[0]);
        transcriptModelBuilder.setGeneSymbol(transcriptModelBuilder.getAccession());
        Integer num = this.refDict.getContigNameToID().get(split[1]);
        if (num == null) {
            throw new TranscriptParseException("Could not parse chromosome field: " + split[1]);
        }
        char charAt = split[2].charAt(0);
        if (charAt != '+' && charAt != '-') {
            throw new TranscriptParseException("Malformed strand: " + split[2]);
        }
        Strand strand = charAt == '+' ? Strand.FWD : Strand.REV;
        transcriptModelBuilder.setStrand(strand);
        try {
            try {
                transcriptModelBuilder.setTXRegion(new GenomeInterval(this.refDict, Strand.FWD, num.intValue(), Integer.parseInt(split[3]) + 1, Integer.parseInt(split[4]), PositionType.ONE_BASED).withStrand(strand));
                try {
                    try {
                        transcriptModelBuilder.setCDSRegion(new GenomeInterval(this.refDict, Strand.FWD, num.intValue(), Integer.parseInt(split[5]) + 1, Integer.parseInt(split[6]), PositionType.ONE_BASED).withStrand(strand));
                        try {
                            int parseShort = Short.parseShort(split[7]);
                            int[] iArr = new int[parseShort];
                            int[] iArr2 = new int[parseShort];
                            String str2 = split[8];
                            String str3 = split[9];
                            String[] split2 = str2.split(VCFConstants.INFO_FIELD_ARRAY_SEPARATOR);
                            if (split2.length != parseShort) {
                                throw new TranscriptParseException(String.format("%s. This should never happen, the knownGene.txt file may be corrupted", String.format("[UCSCKGParser] Malformed exonStarts list: found %d but I expected %d exons", Integer.valueOf(split2.length), Short.valueOf((short) parseShort))));
                            }
                            for (int i = 0; i < parseShort; i++) {
                                try {
                                    iArr[i] = Integer.parseInt(split2[i]) + 1;
                                } catch (NumberFormatException e) {
                                    throw new TranscriptParseException(String.format("%s. This should never happen, the knownGene.txt file may be corrupted", String.format("[UCSCKGParser] Malformed exon start at position %d of line %s", Integer.valueOf(i), str2)));
                                }
                            }
                            String[] split3 = str3.split(VCFConstants.INFO_FIELD_ARRAY_SEPARATOR);
                            for (int i2 = 0; i2 < parseShort; i2++) {
                                try {
                                    iArr2[i2] = Integer.parseInt(split3[i2]);
                                } catch (NumberFormatException e2) {
                                    throw new TranscriptParseException(String.format("%s. This should never happen, the knownGene.txt file may be corrupted", String.format("[UCSCKGParser] Malformed exon end at position %d of line %s", Integer.valueOf(i2), str3)));
                                }
                            }
                            for (int i3 = 0; i3 < iArr.length; i3++) {
                                transcriptModelBuilder.addExonRegion(new GenomeInterval(this.refDict, Strand.FWD, num.intValue(), iArr[i3], iArr2[i3], PositionType.ONE_BASED));
                            }
                            return transcriptModelBuilder;
                        } catch (NumberFormatException e3) {
                            throw new TranscriptParseException("Could not parse exonCount:" + split[7]);
                        }
                    } catch (NumberFormatException e4) {
                        throw new TranscriptParseException("Could not parse cdsEnd:" + split[6]);
                    }
                } catch (NumberFormatException e5) {
                    throw new TranscriptParseException("Could not parse cdsStart:" + split[5]);
                }
            } catch (NumberFormatException e6) {
                throw new TranscriptParseException("Could not parse txEnd:" + split[4]);
            }
        } catch (NumberFormatException e7) {
            throw new TranscriptParseException("Could not parse txStart:" + split[3]);
        }
    }

    private void parseKnownGeneFile(String str) throws TranscriptParseException {
        String str2 = null;
        BufferedReader bufferedReader = null;
        try {
            try {
                try {
                    bufferedReader = getBufferedReaderFromFilePath(str, str.endsWith(".gz"));
                    while (true) {
                        String readLine = bufferedReader.readLine();
                        if (readLine == null) {
                            break;
                        }
                        try {
                            TranscriptModelBuilder parseTranscriptModelFromLine = parseTranscriptModelFromLine(readLine);
                            this.knownGeneMap.put(parseTranscriptModelFromLine.getAccession(), parseTranscriptModelFromLine);
                        } catch (TranscriptParseException e) {
                        }
                    }
                    if (bufferedReader != null) {
                        try {
                            bufferedReader.close();
                        } catch (IOException e2) {
                        }
                    }
                } catch (FileNotFoundException e3) {
                    str2 = String.format("[Jannovar/USCSKGParser] Could not find KnownGene.txt file: %s\n%s", str, e3.toString());
                    if (bufferedReader != null) {
                        try {
                            bufferedReader.close();
                        } catch (IOException e4) {
                        }
                    }
                }
            } catch (IOException e5) {
                str2 = String.format("[Jannovar/USCSKGParser] Exception while parsing UCSC KnownGene file at \"%s\"\n%s", str, e5.toString());
                if (bufferedReader != null) {
                    try {
                        bufferedReader.close();
                    } catch (IOException e6) {
                    }
                }
            }
            if (str2 != null) {
                throw new TranscriptParseException(str2);
            }
        } catch (Throwable th) {
            if (bufferedReader != null) {
                try {
                    bufferedReader.close();
                } catch (IOException e7) {
                    throw th;
                }
            }
            throw th;
        }
    }

    private void parseKnown2LocusLink(String str) throws TranscriptParseException {
        try {
            BufferedReader bufferedReaderFromFilePath = getBufferedReaderFromFilePath(str, str.endsWith(".gz"));
            int i = 0;
            int i2 = 0;
            while (true) {
                String readLine = bufferedReaderFromFilePath.readLine();
                if (readLine == null) {
                    bufferedReaderFromFilePath.close();
                    LOGGER.info("knownToLocusLink contained ids for {} knownGenes (no ids available for {})", Integer.valueOf(i), Integer.valueOf(i2));
                    return;
                }
                String[] split = readLine.split("\t");
                if (split.length != 2) {
                    throw new TranscriptParseException(String.format("Bad format for UCSC KnownToLocusLink.txt file: %s. Got %d fields instead of the expected 2.", readLine, Integer.valueOf(split.length)));
                }
                String str2 = split[0];
                Integer valueOf = Integer.valueOf(Integer.parseInt(split[1]));
                TranscriptModelBuilder transcriptModelBuilder = this.knownGeneMap.get(str2);
                if (transcriptModelBuilder == null) {
                    i2++;
                } else {
                    i++;
                    transcriptModelBuilder.setGeneID("ENTREZ" + valueOf);
                }
            }
        } catch (FileNotFoundException e) {
            throw new TranscriptParseException(String.format("Exception while parsing UCSC  knownToLocusLink file at \"%s\"\n%s", str, e.toString()));
        } catch (IOException e2) {
            throw new TranscriptParseException(String.format("Exception while parsing UCSC KnownToLocusfile at \"%s\"\n%s", str, e2.toString()));
        }
    }

    private void parseKnownCanonical(String str) throws TranscriptParseException {
        Iterator<TranscriptModelBuilder> it = this.knownGeneMap.values().iterator();
        while (it.hasNext()) {
            it.next().setTranscriptSupportLevel(8);
        }
        try {
            BufferedReader bufferedReaderFromFilePath = getBufferedReaderFromFilePath(str, str.endsWith(".gz"));
            while (true) {
                String readLine = bufferedReaderFromFilePath.readLine();
                if (readLine == null) {
                    bufferedReaderFromFilePath.close();
                    LOGGER.info("knownCanonicalPath contained ids for {} knownGenes (no ids available for {})", (Object) 0, (Object) 0);
                    return;
                }
                String[] split = readLine.split("\t");
                if (split.length != 6) {
                    throw new TranscriptParseException(String.format("Bad format for UCSC knownCanonicalPath.txt file: %s. Got %d fields instead of the expected 6.", readLine, Integer.valueOf(split.length)));
                }
                TranscriptModelBuilder transcriptModelBuilder = this.knownGeneMap.get(split[5]);
                if (transcriptModelBuilder != null) {
                    transcriptModelBuilder.setTranscriptSupportLevel(6);
                }
            }
        } catch (FileNotFoundException e) {
            throw new TranscriptParseException(String.format("Exception while parsing UCSC knownCanonicalPath file at \"%s\"\n%s", str, e.toString()));
        } catch (IOException e2) {
            throw new TranscriptParseException(String.format("Exception while parsing UCSC knownCanonicalPath at \"%s\"\n%s", str, e2.toString()));
        }
    }

    private void parseKnownGeneMrna(String str) throws TranscriptParseException {
        try {
            BufferedReader bufferedReaderFromFilePath = getBufferedReaderFromFilePath(str, str.endsWith(".gz"));
            int i = 0;
            int i2 = 0;
            while (true) {
                String readLine = bufferedReaderFromFilePath.readLine();
                if (readLine == null) {
                    bufferedReaderFromFilePath.close();
                    LOGGER.info("Found {} transcript models from UCSC KnownGenes resource, {} of which had sequences", Integer.valueOf(i2), Integer.valueOf(i2 - i));
                    return;
                }
                String[] split = readLine.split("\t");
                if (split.length != 2) {
                    throw new TranscriptParseException(String.format("Bad format for UCSC KnownToLocusLink.txt file: %s. Got %d fields instead of the expected 2.", readLine, Integer.valueOf(split.length)));
                }
                String str2 = split[0];
                String upperCase = split[1].toUpperCase();
                TranscriptModelBuilder transcriptModelBuilder = this.knownGeneMap.get(str2);
                if (transcriptModelBuilder == null) {
                    i++;
                } else {
                    i2++;
                    transcriptModelBuilder.setSequence(upperCase);
                }
            }
        } catch (FileNotFoundException e) {
            throw new TranscriptParseException(String.format("Could not find file: %s\n%s", str, e.toString()));
        } catch (IOException e2) {
            throw new TranscriptParseException(String.format("Exception while parsing UCSC KnownGene FASTA file at \"%s\"\n%s", str, e2.toString()));
        }
    }

    /* JADX WARN: Code restructure failed: missing block: B:22:0x0057, code lost:
    
        throw new de.charite.compbio.jannovar.impl.parse.TranscriptParseException(java.lang.String.format("Error, malformed ucsc xref line: %s\nExpected 8 fields but got %d", r0, java.lang.Integer.valueOf(r0.length)));
     */
    /*
        Code decompiled incorrectly, please refer to instructions dump.
        To view partially-correct add '--show-bad-code' argument
    */
    private void parseKnownGeneXref(java.lang.String r7) throws de.charite.compbio.jannovar.impl.parse.TranscriptParseException {
        /*
            Method dump skipped, instructions count: 260
            To view this dump add '--comments-level debug' option
        */
        throw new UnsupportedOperationException("Method not decompiled: de.charite.compbio.jannovar.impl.parse.UCSCParser.parseKnownGeneXref(java.lang.String):void");
    }

    private String getINIFileName(String str) {
        return new File((String) this.iniSection.get(str)).getName();
    }

    private static BufferedReader getBufferedReaderFromFilePath(String str, boolean z) throws IOException {
        FileInputStream fileInputStream = new FileInputStream(str);
        return z ? new BufferedReader(new InputStreamReader(new GZIPInputStream(fileInputStream))) : new BufferedReader(new InputStreamReader(new DataInputStream(fileInputStream)));
    }
}
