package ontologizer.association;

import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.PushbackInputStream;
import java.nio.channels.FileChannel;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.logging.Logger;
import java.util.zip.GZIPInputStream;
import ontologizer.go.PrefixPool;
import ontologizer.go.Term;
import ontologizer.go.TermContainer;
import ontologizer.go.TermID;
import ontologizer.types.ByteString;

/* loaded from: input_file:ontologizer/association/AssociationParser.class */
public class AssociationParser {
    private ArrayList<Association> associations;
    private HashMap<ByteString, ByteString> synonym2gene;
    private HashMap<ByteString, ByteString> dbObjectID2gene;
    private PrefixPool prefixPool;
    private Type fileType;
    private int symbolWarnings;
    private int dbObjectWarnings;
    private static Logger logger = Logger.getLogger(AssociationParser.class.getCanonicalName());
    private static ByteString THREE_SLASHES = new ByteString("///");

    /* JADX INFO: Access modifiers changed from: package-private */
    /* renamed from: ontologizer.association.AssociationParser$1GAFByteLineScanner, reason: invalid class name */
    /* loaded from: input_file:ontologizer/association/AssociationParser$1GAFByteLineScanner.class */
    public class C1GAFByteLineScanner extends AbstractByteLineScanner {
        int lineno;
        long millis;
        int good;
        int bad;
        int skipped;
        int nots;
        int evidenceMismatch;
        int kept;
        int obsolete;
        HashMap<TermID, Term> altTermID2Term;
        final /* synthetic */ IAssociationParserProgress val$progress;
        final /* synthetic */ FileChannel val$fc;
        final /* synthetic */ HashSet val$myEvidences;
        final /* synthetic */ TermContainer val$terms;
        final /* synthetic */ HashSet val$usedGoTerms;
        final /* synthetic */ HashSet val$names;
        final /* synthetic */ HashMap val$objectSymbol2dbObject;
        final /* synthetic */ HashMap val$dbObject2ObjectSymbol;
        final /* synthetic */ HashMap val$gene2Associations;

        /* JADX WARN: 'super' call moved to the top of the method (can break code semantics) */
        public C1GAFByteLineScanner(InputStream inputStream, IAssociationParserProgress iAssociationParserProgress, FileChannel fileChannel, HashSet hashSet, TermContainer termContainer, HashSet hashSet2, HashSet hashSet3, HashMap hashMap, HashMap hashMap2, HashMap hashMap3) {
            super(inputStream);
            this.val$progress = iAssociationParserProgress;
            this.val$fc = fileChannel;
            this.val$myEvidences = hashSet;
            this.val$terms = termContainer;
            this.val$usedGoTerms = hashSet2;
            this.val$names = hashSet3;
            this.val$objectSymbol2dbObject = hashMap;
            this.val$dbObject2ObjectSymbol = hashMap2;
            this.val$gene2Associations = hashMap3;
            this.lineno = 0;
            this.millis = 0L;
            this.good = 0;
            this.bad = 0;
            this.skipped = 0;
            this.nots = 0;
            this.evidenceMismatch = 0;
            this.kept = 0;
            this.obsolete = 0;
            this.altTermID2Term = null;
        }

        @Override // ontologizer.association.AbstractByteLineScanner
        public boolean newLine(byte[] bArr, int i, int i2) {
            TermID id;
            if (this.val$progress != null) {
                long currentTimeMillis = System.currentTimeMillis();
                if (currentTimeMillis - this.millis > 250) {
                    try {
                        this.val$progress.update((int) this.val$fc.position());
                    } catch (IOException e) {
                    }
                    this.millis = currentTimeMillis;
                }
            }
            this.lineno++;
            if (i2 < 1 || bArr[i] == 33) {
                return true;
            }
            Association createFromGAFLine = Association.createFromGAFLine(bArr, i, i2, AssociationParser.this.prefixPool);
            try {
                TermID termID = createFromGAFLine.getTermID();
                this.good++;
                if (createFromGAFLine.hasNotQualifier()) {
                    this.skipped++;
                    this.nots++;
                    return true;
                }
                if (this.val$myEvidences != null && !this.val$myEvidences.contains(createFromGAFLine.getEvidence())) {
                    this.skipped++;
                    this.evidenceMismatch++;
                    return true;
                }
                Term term = this.val$terms.get(termID);
                if (term == null) {
                    if (this.altTermID2Term == null) {
                        this.altTermID2Term = new HashMap<>();
                        Iterator<Term> it = this.val$terms.iterator();
                        while (it.hasNext()) {
                            Term next = it.next();
                            Iterator<TermID> it2 = next.getAlternatives().iterator();
                            while (it2.hasNext()) {
                                this.altTermID2Term.put(it2.next(), next);
                            }
                        }
                    }
                    term = this.altTermID2Term.get(termID);
                    if (term == null) {
                        System.err.println("Skipping association of item \"" + createFromGAFLine.getObjectSymbol() + "\" to " + termID + " because the term was not found!");
                        System.err.println("(Are the obo file and the association file both up-to-date?)");
                        this.skipped++;
                        return true;
                    }
                    id = term.getID();
                    createFromGAFLine.setTermID(id);
                } else {
                    id = term.getID();
                    createFromGAFLine.setTermID(id);
                }
                this.val$usedGoTerms.add(id);
                if (term.isObsolete()) {
                    System.err.println("Skipping association of item \"" + createFromGAFLine.getObjectSymbol() + "\" to " + id + " because term is obsolete!");
                    System.err.println("(Are the obo file and the association file in sync?)");
                    this.skipped++;
                    this.obsolete++;
                    return true;
                }
                ByteString[] splitBySingleChar = (createFromGAFLine.getSynonym() == null || createFromGAFLine.getSynonym().length() <= 2) ? null : createFromGAFLine.getSynonym().splitBySingleChar('|');
                if (this.val$names != null) {
                    boolean z = false;
                    if (splitBySingleChar != null) {
                        ByteString[] byteStringArr = splitBySingleChar;
                        int length = byteStringArr.length;
                        int i3 = 0;
                        while (true) {
                            if (i3 >= length) {
                                break;
                            }
                            if (this.val$names.contains(byteStringArr[i3])) {
                                z = true;
                                break;
                            }
                            i3++;
                        }
                    }
                    if (!z && !this.val$names.contains(createFromGAFLine.getObjectSymbol()) && !this.val$names.contains(createFromGAFLine.getDB_Object())) {
                        this.skipped++;
                        return true;
                    }
                    this.kept++;
                } else {
                    this.kept++;
                }
                if (splitBySingleChar != null) {
                    for (ByteString byteString : splitBySingleChar) {
                        AssociationParser.this.synonym2gene.put(byteString, createFromGAFLine.getObjectSymbol());
                    }
                }
                ByteString byteString2 = (ByteString) this.val$objectSymbol2dbObject.get(createFromGAFLine.getObjectSymbol());
                if (byteString2 == null) {
                    this.val$objectSymbol2dbObject.put(createFromGAFLine.getObjectSymbol(), createFromGAFLine.getDB_Object());
                } else if (!byteString2.equals(createFromGAFLine.getDB_Object())) {
                    AssociationParser.access$208(AssociationParser.this);
                    if (AssociationParser.this.symbolWarnings < 1000) {
                        AssociationParser.logger.warning("Line " + this.lineno + ": Expected that symbol \"" + createFromGAFLine.getObjectSymbol() + "\" maps to \"" + byteString2 + "\" but it maps to \"" + createFromGAFLine.getDB_Object() + "\"");
                    }
                }
                ByteString byteString3 = (ByteString) this.val$dbObject2ObjectSymbol.get(createFromGAFLine.getDB_Object());
                if (byteString3 == null) {
                    this.val$dbObject2ObjectSymbol.put(createFromGAFLine.getDB_Object(), createFromGAFLine.getObjectSymbol());
                } else if (!byteString3.equals(createFromGAFLine.getObjectSymbol())) {
                    AssociationParser.access$408(AssociationParser.this);
                    if (AssociationParser.this.dbObjectWarnings < 1000) {
                        AssociationParser.logger.warning("Line " + this.lineno + ": Expected that dbObject \"" + createFromGAFLine.getDB_Object() + "\" maps to symbol \"" + byteString3 + "\" but it maps to \"" + createFromGAFLine.getObjectSymbol() + "\"");
                    }
                }
                AssociationParser.this.associations.add(createFromGAFLine);
                ArrayList arrayList = (ArrayList) this.val$gene2Associations.get(createFromGAFLine.getObjectSymbol());
                if (arrayList == null) {
                    arrayList = new ArrayList();
                    this.val$gene2Associations.put(createFromGAFLine.getObjectSymbol(), arrayList);
                }
                arrayList.add(createFromGAFLine);
                AssociationParser.this.dbObjectID2gene.put(createFromGAFLine.getDB_Object(), createFromGAFLine.getObjectSymbol());
                return true;
            } catch (Exception e2) {
                this.bad++;
                System.err.println("Nonfatal error: malformed line in association file \n\nCould not parse line " + this.lineno + "\n" + e2.getMessage() + "\n\"" + bArr + "\"\n");
                return true;
            }
        }
    }

    /* loaded from: input_file:ontologizer/association/AssociationParser$Type.class */
    enum Type {
        UNKNOWN,
        GAF,
        IDS,
        AFFYMETRIX
    }

    public AssociationParser(String str, TermContainer termContainer) throws IOException {
        this(str, termContainer, null);
    }

    public AssociationParser(String str, TermContainer termContainer, HashSet<ByteString> hashSet) throws IOException {
        this(str, termContainer, hashSet, null);
    }

    public AssociationParser(String str, TermContainer termContainer, HashSet<ByteString> hashSet, IAssociationParserProgress iAssociationParserProgress) throws IOException {
        this(str, termContainer, hashSet, null, iAssociationParserProgress);
    }

    public AssociationParser(String str, TermContainer termContainer, HashSet<ByteString> hashSet, Collection<String> collection, IAssociationParserProgress iAssociationParserProgress) throws IOException {
        InputStream inputStream;
        this.prefixPool = new PrefixPool();
        this.fileType = Type.UNKNOWN;
        this.associations = new ArrayList<>();
        this.synonym2gene = new HashMap<>();
        this.dbObjectID2gene = new HashMap<>();
        if (str.endsWith(".ids")) {
            importIDSAssociation(str, termContainer, iAssociationParserProgress);
            this.fileType = Type.IDS;
            return;
        }
        FileInputStream fileInputStream = new FileInputStream(str);
        try {
            inputStream = new GZIPInputStream(fileInputStream);
        } catch (IOException e) {
            fileInputStream.close();
            fileInputStream = new FileInputStream(str);
            inputStream = fileInputStream;
        }
        final StringBuilder sb = new StringBuilder();
        PushbackInputStream pushbackInputStream = new PushbackInputStream(inputStream, 66560);
        AbstractByteLineScanner abstractByteLineScanner = new AbstractByteLineScanner(pushbackInputStream) { // from class: ontologizer.association.AssociationParser.1
            @Override // ontologizer.association.AbstractByteLineScanner
            public boolean newLine(byte[] bArr, int i, int i2) {
                if (i2 <= 0 || bArr[i] == 35) {
                    return true;
                }
                sb.append(new String(bArr, i, i2));
                return false;
            }
        };
        abstractByteLineScanner.scan();
        if (sb.length() != 0) {
            String sb2 = sb.toString();
            byte[] bytes = sb2.getBytes();
            byte[] bArr = new byte[bytes.length + abstractByteLineScanner.available()];
            System.arraycopy(bytes, 0, bArr, 0, bytes.length);
            System.arraycopy(abstractByteLineScanner.availableBuffer(), 0, bArr, bytes.length, abstractByteLineScanner.available());
            pushbackInputStream.unread(bArr);
            if (sb2.startsWith("\"Probe Set ID\",\"GeneChip Array\"")) {
                importAffyFile(new BufferedReader(new InputStreamReader(pushbackInputStream)), fileInputStream, hashSet, termContainer, iAssociationParserProgress);
                this.fileType = Type.AFFYMETRIX;
            } else {
                importAssociationFile(pushbackInputStream, fileInputStream, hashSet, termContainer, collection, iAssociationParserProgress);
                this.fileType = Type.GAF;
            }
        }
    }

    private void importIDSAssociation(String str, TermContainer termContainer, IAssociationParserProgress iAssociationParserProgress) {
        TermID termID;
        try {
            BufferedReader bufferedReader = new BufferedReader(new FileReader(str));
            while (true) {
                String readLine = bufferedReader.readLine();
                if (readLine == null) {
                    return;
                }
                if (!readLine.equalsIgnoreCase("GoStat IDs Format Version 1.0")) {
                    String[] split = readLine.split("\t", 2);
                    if (split.length == 2) {
                        for (String str2 : split[1].split(",")) {
                            try {
                                termID = new TermID(str2);
                            } catch (IllegalArgumentException e) {
                                termID = new TermID(TermID.DEFAULT_PREFIX, new Integer(str2).intValue());
                            }
                            if (termContainer.get(termID) != null) {
                                this.associations.add(new Association(new ByteString(split[0]), termID.toString()));
                            } else {
                                logger.warning(termID.toString() + " which annotates " + split[0] + " not found");
                            }
                        }
                    }
                }
            }
        } catch (FileNotFoundException e2) {
        } catch (IOException e3) {
        }
    }

    private void importAssociationFile(InputStream inputStream, FileInputStream fileInputStream, HashSet<ByteString> hashSet, TermContainer termContainer, Collection<String> collection, IAssociationParserProgress iAssociationParserProgress) throws IOException {
        HashSet hashSet2;
        if (collection != null) {
            hashSet2 = new HashSet();
            Iterator<String> it = collection.iterator();
            while (it.hasNext()) {
                hashSet2.add(new ByteString(it.next()));
            }
        } else {
            hashSet2 = null;
        }
        HashSet hashSet3 = new HashSet();
        HashMap hashMap = new HashMap();
        HashMap hashMap2 = new HashMap();
        HashMap hashMap3 = new HashMap();
        FileChannel channel = fileInputStream.getChannel();
        if (iAssociationParserProgress != null) {
            iAssociationParserProgress.init((int) channel.size());
        }
        C1GAFByteLineScanner c1GAFByteLineScanner = new C1GAFByteLineScanner(inputStream, iAssociationParserProgress, channel, hashSet2, termContainer, hashSet3, hashSet, hashMap3, hashMap2, hashMap);
        c1GAFByteLineScanner.scan();
        if (iAssociationParserProgress != null) {
            iAssociationParserProgress.update((int) channel.size());
        }
        inputStream.close();
        logger.info(c1GAFByteLineScanner.good + " associations parsed, " + c1GAFByteLineScanner.kept + " of which were kept while " + c1GAFByteLineScanner.bad + " malformed lines had to be ignored.");
        logger.info("A further " + c1GAFByteLineScanner.skipped + " associations were skipped due to various reasons whereas " + c1GAFByteLineScanner.nots + " of those where explicitly qualified with NOT, " + c1GAFByteLineScanner.obsolete + " referred to obsolete terms and " + c1GAFByteLineScanner.evidenceMismatch + " didn't match the requested evidence codes");
        logger.info("A total of " + hashSet3.size() + " terms are directly associated to " + this.dbObjectID2gene.size() + " items.");
        if (this.symbolWarnings >= 1000) {
            logger.warning("The symbols of a total of " + this.symbolWarnings + " entries mapped ambiguously");
        }
        if (this.dbObjectWarnings >= 1000) {
            logger.warning("The objects of a  total of " + this.dbObjectWarnings + " entries mapped ambiguously");
        }
    }

    private void importAffyFile(BufferedReader bufferedReader, FileInputStream fileInputStream, HashSet<ByteString> hashSet, TermContainer termContainer, IAssociationParserProgress iAssociationParserProgress) throws IOException {
        String readLine;
        String[] split;
        String[] strArr = {"Probe Set ID", "GeneChip Array", "Species Scientific Name", "Annotation Date", "Sequence Type", "Sequence Source", "Transcript ID(Array Design)", "Target Description", "Representative Public ID", "Archival UniGene Cluster", "UniGene ID", "Genome Version", "Alignments", "Gene Title", "Gene Symbol", "Chromosomal Location", "Unigene Cluster Type", "Ensembl", "Entrez Gene", "SwissProt", "EC", "OMIM", "RefSeq Protein ID", "RefSeq Transcript ID", "FlyBase", "AGI", "WormBase", "MGI Name", "RGD Name", "SGD accession number", "Gene Ontology Biological Process", "Gene Ontology Cellular Component", "Gene Ontology Molecular Function", "Pathway", "Protein Families", "Protein Domains", "InterPro", "Trans Membrane", "QTL", "Annotation Description", "Annotation Transcript Cluster", "Transcript Assignments", "Annotation Notes"};
        FileChannel channel = fileInputStream.getChannel();
        if (iAssociationParserProgress != null) {
            iAssociationParserProgress.init((int) channel.size());
        }
        int i = 0;
        long j = 0;
        do {
            readLine = bufferedReader.readLine();
        } while (readLine.startsWith("#"));
        boolean z = false;
        String[] split2 = readLine.split(",");
        int i2 = 0;
        while (true) {
            if (i2 >= 33) {
                break;
            }
            String str = split2[i2];
            int indexOf = str.indexOf(34) + 1;
            int lastIndexOf = str.lastIndexOf(34);
            if (indexOf == 0 && lastIndexOf == str.length() - 1) {
                System.out.print("OK");
            }
            String substring = str.substring(indexOf, lastIndexOf);
            if (!substring.equals(strArr[i2])) {
                logger.severe("Found column header \"" + substring + "\" but expected \"" + strArr[i2] + "\"");
                z = true;
                break;
            }
            i2++;
        }
        if (!z) {
            SwissProtAffyAnnotaionSet swissProtAffyAnnotaionSet = new SwissProtAffyAnnotaionSet();
            while (true) {
                String readLine2 = bufferedReader.readLine();
                if (readLine2 == null) {
                    break;
                }
                if (iAssociationParserProgress != null) {
                    long currentTimeMillis = System.currentTimeMillis();
                    if (currentTimeMillis - j > 250) {
                        iAssociationParserProgress.update((int) channel.position());
                        j = currentTimeMillis;
                    }
                }
                ByteString byteString = null;
                ByteString byteString2 = null;
                LinkedList linkedList = new LinkedList();
                int length = readLine2.length();
                int i3 = -1;
                int i4 = 0;
                for (int i5 = 0; i5 < length; i5++) {
                    if (readLine2.charAt(i5) == '\"') {
                        if (i3 == -1) {
                            i3 = i5;
                        } else {
                            int i6 = i5;
                            if (i6 > i3) {
                                if (i4 == 0) {
                                    byteString = new ByteString(readLine2.substring(i3 + 1, i6));
                                } else if (i4 == 14) {
                                    String substring2 = readLine2.substring(i3 + 1, i6);
                                    if (substring2.startsWith("---")) {
                                        byteString2 = null;
                                    } else {
                                        byteString2 = new ByteString(substring2);
                                        int indexOf2 = byteString2.indexOf(THREE_SLASHES);
                                        if (indexOf2 != -1) {
                                            byteString2 = byteString2.trimmedSubstring(0, indexOf2);
                                        }
                                    }
                                } else if ((i4 == 30 || i4 == 31 || i4 == 32) && (split = readLine2.substring(i3 + 1, i6).split("///")) != null) {
                                    for (int i7 = 0; i7 < split.length; i7++) {
                                        try {
                                            TermID termID = new TermID(Integer.parseInt(split[i7].contains("/") ? split[i7].substring(0, split[i7].indexOf(47)).trim() : split[i7].trim()));
                                            if (termContainer.get(termID) != null) {
                                                linkedList.add(termID);
                                            } else {
                                                i++;
                                            }
                                        } catch (NumberFormatException e) {
                                        }
                                    }
                                }
                                i4++;
                                i3 = -1;
                            }
                        }
                    }
                }
                if (byteString2 != null && byteString2.length() > 0) {
                    swissProtAffyAnnotaionSet.add(byteString2, byteString, linkedList);
                } else if (linkedList.size() > 0) {
                    swissProtAffyAnnotaionSet.add(byteString, byteString, linkedList);
                }
            }
            Iterator<SwissProtAffyAnnotation> it = swissProtAffyAnnotaionSet.iterator();
            while (it.hasNext()) {
                SwissProtAffyAnnotation next = it.next();
                ByteString swissProtID = next.getSwissProtID();
                Iterator<TermID> it2 = next.getGOIDs().iterator();
                while (it2.hasNext()) {
                    this.associations.add(new Association(swissProtID, it2.next()));
                }
                Iterator<ByteString> it3 = next.getAffyIDs().iterator();
                while (it3.hasNext()) {
                    this.synonym2gene.put(it3.next(), swissProtID);
                }
            }
        }
        System.err.println("Skipped " + i + " annotations");
    }

    public ArrayList<Association> getAssociations() {
        return this.associations;
    }

    public HashMap<ByteString, ByteString> getSynonym2gene() {
        return this.synonym2gene;
    }

    public HashMap<ByteString, ByteString> getDbObject2gene() {
        return this.dbObjectID2gene;
    }

    public List<ByteString> getListOfObjectSymbols() {
        ArrayList arrayList = new ArrayList();
        Iterator<Association> it = this.associations.iterator();
        while (it.hasNext()) {
            arrayList.add(it.next().getObjectSymbol());
        }
        return arrayList;
    }

    public Type getFileType() {
        return this.fileType;
    }

    static /* synthetic */ int access$208(AssociationParser associationParser) {
        int i = associationParser.symbolWarnings;
        associationParser.symbolWarnings = i + 1;
        return i;
    }

    static /* synthetic */ int access$408(AssociationParser associationParser) {
        int i = associationParser.dbObjectWarnings;
        associationParser.dbObjectWarnings = i + 1;
        return i;
    }
}
