package org.apache.drill.exec.store.parquet;

import com.codahale.metrics.Histogram;
import com.codahale.metrics.MetricRegistry;
import com.codahale.metrics.Timer;
import com.fasterxml.jackson.annotation.JacksonInject;
import com.fasterxml.jackson.annotation.JsonIgnore;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.fasterxml.jackson.annotation.JsonTypeName;
import com.google.common.base.Preconditions;
import com.google.common.base.Stopwatch;
import com.google.common.collect.ListMultimap;
import com.google.common.collect.Lists;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.concurrent.TimeUnit;
import org.apache.drill.common.exceptions.ExecutionSetupException;
import org.apache.drill.common.exceptions.PhysicalOperatorSetupException;
import org.apache.drill.common.expression.SchemaPath;
import org.apache.drill.common.logical.FormatPluginConfig;
import org.apache.drill.common.logical.StoragePluginConfig;
import org.apache.drill.exec.metrics.DrillMetrics;
import org.apache.drill.exec.physical.EndpointAffinity;
import org.apache.drill.exec.physical.base.AbstractGroupScan;
import org.apache.drill.exec.physical.base.GroupScan;
import org.apache.drill.exec.physical.base.PhysicalOperator;
import org.apache.drill.exec.physical.base.ScanStats;
import org.apache.drill.exec.proto.CoordinationProtos;
import org.apache.drill.exec.store.StoragePluginRegistry;
import org.apache.drill.exec.store.dfs.ReadEntryFromHDFS;
import org.apache.drill.exec.store.dfs.ReadEntryWithPath;
import org.apache.drill.exec.store.dfs.easy.FileWork;
import org.apache.drill.exec.store.schedule.AffinityCreator;
import org.apache.drill.exec.store.schedule.AssignmentCreator;
import org.apache.drill.exec.store.schedule.BlockMapBuilder;
import org.apache.drill.exec.store.schedule.CompleteWork;
import org.apache.drill.exec.store.schedule.EndpointByteMap;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import parquet.hadoop.Footer;
import parquet.hadoop.ParquetFileReader;
import parquet.hadoop.metadata.BlockMetaData;
import parquet.hadoop.metadata.ColumnChunkMetaData;
import parquet.org.codehaus.jackson.annotate.JsonCreator;

@JsonTypeName("parquet-scan")
/* loaded from: input_file:org/apache/drill/exec/store/parquet/ParquetGroupScan.class */
public class ParquetGroupScan extends AbstractGroupScan {
    static final Logger logger;
    static final MetricRegistry metrics;
    static final String READ_FOOTER_TIMER;
    static final String ENDPOINT_BYTES_TIMER;
    static final String ASSIGNMENT_TIMER;
    static final String ASSIGNMENT_AFFINITY_HIST;
    final Histogram assignmentAffinityStats;
    private ListMultimap<Integer, RowGroupInfo> mappings;
    private List<RowGroupInfo> rowGroupInfos;
    private final List<ReadEntryWithPath> entries;
    private final Stopwatch watch;
    private final ParquetFormatPlugin formatPlugin;
    private final ParquetFormatConfig formatConfig;
    private final FileSystem fs;
    private List<EndpointAffinity> endpointAffinities;
    private String selectionRoot;
    private List<SchemaPath> columns;
    private long rowCount;
    private Map<SchemaPath, Long> columnValueCounts;
    static final /* synthetic */ boolean $assertionsDisabled;

    /* loaded from: input_file:org/apache/drill/exec/store/parquet/ParquetGroupScan$RowGroupInfo.class */
    public static class RowGroupInfo extends ReadEntryFromHDFS implements CompleteWork, FileWork {
        private EndpointByteMap byteMap;
        private int rowGroupIndex;
        private String root;

        @JsonCreator
        public RowGroupInfo(@JsonProperty("path") String str, @JsonProperty("start") long j, @JsonProperty("length") long j2, @JsonProperty("rowGroupIndex") int i) {
            super(str, j, j2);
            this.rowGroupIndex = i;
        }

        public RowGroupReadEntry getRowGroupReadEntry() {
            return new RowGroupReadEntry(getPath(), getStart(), getLength(), this.rowGroupIndex);
        }

        public int getRowGroupIndex() {
            return this.rowGroupIndex;
        }

        @Override // java.lang.Comparable
        public int compareTo(CompleteWork completeWork) {
            return Long.compare(getTotalBytes(), completeWork.getTotalBytes());
        }

        @Override // org.apache.drill.exec.store.schedule.CompleteWork
        public long getTotalBytes() {
            return getLength();
        }

        @Override // org.apache.drill.exec.store.schedule.CompleteWork
        public EndpointByteMap getByteMap() {
            return this.byteMap;
        }

        public void setEndpointByteMap(EndpointByteMap endpointByteMap) {
            this.byteMap = endpointByteMap;
        }
    }

    public List<ReadEntryWithPath> getEntries() {
        return this.entries;
    }

    @JsonProperty("format")
    public ParquetFormatConfig getFormatConfig() {
        return this.formatConfig;
    }

    @JsonProperty("storage")
    public StoragePluginConfig getEngineConfig() {
        return this.formatPlugin.getStorageConfig();
    }

    @JsonCreator
    public ParquetGroupScan(@JsonProperty("entries") List<ReadEntryWithPath> list, @JsonProperty("storage") StoragePluginConfig storagePluginConfig, @JsonProperty("format") FormatPluginConfig formatPluginConfig, @JacksonInject StoragePluginRegistry storagePluginRegistry, @JsonProperty("columns") List<SchemaPath> list2, @JsonProperty("selectionRoot") String str) throws IOException, ExecutionSetupException {
        this.assignmentAffinityStats = metrics.histogram(ASSIGNMENT_AFFINITY_HIST);
        this.watch = new Stopwatch();
        this.columns = list2;
        formatPluginConfig = formatPluginConfig == null ? new ParquetFormatConfig() : formatPluginConfig;
        Preconditions.checkNotNull(storagePluginConfig);
        Preconditions.checkNotNull(formatPluginConfig);
        this.formatPlugin = (ParquetFormatPlugin) storagePluginRegistry.getFormatPlugin(storagePluginConfig, formatPluginConfig);
        Preconditions.checkNotNull(this.formatPlugin);
        this.fs = this.formatPlugin.getFileSystem().getUnderlying();
        this.formatConfig = this.formatPlugin.getConfig();
        this.entries = list;
        this.selectionRoot = str;
        readFooterFromEntries();
    }

    public String getSelectionRoot() {
        return this.selectionRoot;
    }

    public ParquetGroupScan(List<FileStatus> list, ParquetFormatPlugin parquetFormatPlugin, String str, List<SchemaPath> list2) throws IOException {
        this.assignmentAffinityStats = metrics.histogram(ASSIGNMENT_AFFINITY_HIST);
        this.watch = new Stopwatch();
        this.formatPlugin = parquetFormatPlugin;
        this.columns = list2;
        this.formatConfig = parquetFormatPlugin.getConfig();
        this.fs = parquetFormatPlugin.getFileSystem().getUnderlying();
        this.entries = Lists.newArrayList();
        Iterator<FileStatus> it = list.iterator();
        while (it.hasNext()) {
            this.entries.add(new ReadEntryWithPath(it.next().getPath().toString()));
        }
        this.selectionRoot = str;
        readFooter(list);
    }

    private ParquetGroupScan(ParquetGroupScan parquetGroupScan) {
        this.assignmentAffinityStats = metrics.histogram(ASSIGNMENT_AFFINITY_HIST);
        this.watch = new Stopwatch();
        this.columns = parquetGroupScan.columns;
        this.endpointAffinities = parquetGroupScan.endpointAffinities;
        this.entries = parquetGroupScan.entries;
        this.formatConfig = parquetGroupScan.formatConfig;
        this.formatPlugin = parquetGroupScan.formatPlugin;
        this.fs = parquetGroupScan.fs;
        this.mappings = parquetGroupScan.mappings;
        this.rowCount = parquetGroupScan.rowCount;
        this.rowGroupInfos = parquetGroupScan.rowGroupInfos;
        this.selectionRoot = parquetGroupScan.selectionRoot;
        this.columnValueCounts = parquetGroupScan.columnValueCounts;
    }

    private void readFooterFromEntries() throws IOException {
        ArrayList newArrayList = Lists.newArrayList();
        Iterator<ReadEntryWithPath> it = this.entries.iterator();
        while (it.hasNext()) {
            newArrayList.add(this.fs.getFileStatus(new Path(it.next().getPath())));
        }
        readFooter(newArrayList);
    }

    private void readFooter(List<FileStatus> list) throws IOException {
        this.watch.reset();
        this.watch.start();
        Timer.Context time = metrics.timer(READ_FOOTER_TIMER).time();
        this.rowGroupInfos = Lists.newArrayList();
        this.rowCount = 0L;
        this.columnValueCounts = new HashMap();
        for (FileStatus fileStatus : list) {
            List<Footer> readFooters = ParquetFileReader.readFooters(this.formatPlugin.getHadoopConfig(), fileStatus);
            if (readFooters.size() == 0) {
                throw new IOException(String.format("Unable to find footer for file %s", fileStatus.getPath().getName()));
            }
            for (Footer footer : readFooters) {
                int i = 0;
                for (BlockMetaData blockMetaData : footer.getParquetMetadata().getBlocks()) {
                    long j = 0;
                    long firstDataPageOffset = ((ColumnChunkMetaData) blockMetaData.getColumns().iterator().next()).getFirstDataPageOffset();
                    long j2 = 0;
                    for (ColumnChunkMetaData columnChunkMetaData : blockMetaData.getColumns()) {
                        j2 += columnChunkMetaData.getTotalSize();
                        j = Math.max(columnChunkMetaData.getValueCount(), j);
                        SchemaPath simplePath = SchemaPath.getSimplePath(columnChunkMetaData.getPath().toString().replace("[", "").replace("]", "").toLowerCase());
                        this.columnValueCounts.put(simplePath, Long.valueOf((this.columnValueCounts.containsKey(simplePath) ? this.columnValueCounts.get(simplePath).longValue() : 0L) + columnChunkMetaData.getValueCount()));
                    }
                    String path = footer.getFile().toUri().getPath();
                    this.rowGroupInfos.add(new RowGroupInfo(path, firstDataPageOffset, j2, i));
                    logger.debug("rowGroupInfo path: {} start: {} length {}", new Object[]{path, Long.valueOf(firstDataPageOffset), Long.valueOf(j2)});
                    i++;
                    this.rowCount += j;
                }
            }
        }
        Preconditions.checkState(!this.rowGroupInfos.isEmpty(), "No row groups found");
        time.stop();
        this.watch.stop();
        logger.debug("Took {} ms to get row group infos", Long.valueOf(this.watch.elapsed(TimeUnit.MILLISECONDS)));
    }

    @JsonIgnore
    public FileSystem getFileSystem() {
        return this.fs;
    }

    @Override // org.apache.drill.exec.physical.base.HasAffinity
    public List<EndpointAffinity> getOperatorAffinity() {
        if (this.endpointAffinities == null) {
            BlockMapBuilder blockMapBuilder = new BlockMapBuilder(this.fs, this.formatPlugin.getContext().getBits());
            try {
                for (RowGroupInfo rowGroupInfo : this.rowGroupInfos) {
                    rowGroupInfo.setEndpointByteMap(blockMapBuilder.getEndpointByteMap(rowGroupInfo));
                }
                this.endpointAffinities = AffinityCreator.getAffinityMap(this.rowGroupInfos);
            } catch (IOException e) {
                logger.warn("Failure while determining operator affinity.", e);
                return Collections.emptyList();
            }
        }
        return this.endpointAffinities;
    }

    @Override // org.apache.drill.exec.physical.base.GroupScan
    public void applyAssignments(List<CoordinationProtos.DrillbitEndpoint> list) throws PhysicalOperatorSetupException {
        this.mappings = AssignmentCreator.getMappings(list, this.rowGroupInfos);
    }

    @Override // org.apache.drill.exec.physical.base.GroupScan
    public ParquetRowGroupScan getSpecificScan(int i) {
        if (!$assertionsDisabled && i >= this.mappings.size()) {
            throw new AssertionError(String.format("Mappings length [%d] should be longer than minor fragment id [%d] but it isn't.", Integer.valueOf(this.mappings.size()), Integer.valueOf(i)));
        }
        List<RowGroupInfo> list = this.mappings.get(Integer.valueOf(i));
        Preconditions.checkArgument(!list.isEmpty(), String.format("MinorFragmentId %d has no read entries assigned", Integer.valueOf(i)));
        return new ParquetRowGroupScan(this.formatPlugin, convertToReadEntries(list), this.columns, this.selectionRoot);
    }

    private List<RowGroupReadEntry> convertToReadEntries(List<RowGroupInfo> list) {
        ArrayList newArrayList = Lists.newArrayList();
        for (RowGroupInfo rowGroupInfo : list) {
            newArrayList.add(new RowGroupReadEntry(rowGroupInfo.getPath(), rowGroupInfo.getStart(), rowGroupInfo.getLength(), rowGroupInfo.getRowGroupIndex()));
        }
        return newArrayList;
    }

    @Override // org.apache.drill.exec.physical.base.GroupScan
    public int getMaxParallelizationWidth() {
        return this.rowGroupInfos.size();
    }

    public List<SchemaPath> getColumns() {
        return this.columns;
    }

    @Override // org.apache.drill.exec.physical.base.GroupScan
    public ScanStats getScanStats() {
        return new ScanStats(ScanStats.GroupScanProperty.EXACT_ROW_COUNT, this.rowCount, 1.0f, (float) (this.rowCount * (this.columns == null ? 20 : this.columns.size())));
    }

    @Override // org.apache.drill.exec.physical.base.PhysicalOperator
    @JsonIgnore
    public PhysicalOperator getNewWithChildren(List<PhysicalOperator> list) {
        Preconditions.checkArgument(list.isEmpty());
        return new ParquetGroupScan(this);
    }

    @Override // org.apache.drill.exec.physical.base.GroupScan
    public String getDigest() {
        return toString();
    }

    public String toString() {
        return "ParquetGroupScan [entries=" + this.entries + ", selectionRoot=" + this.selectionRoot + ", columns=" + this.columns + "]";
    }

    @Override // org.apache.drill.exec.physical.base.AbstractGroupScan, org.apache.drill.exec.physical.base.GroupScan
    public GroupScan clone(List<SchemaPath> list) {
        ParquetGroupScan parquetGroupScan = new ParquetGroupScan(this);
        parquetGroupScan.columns = list;
        return parquetGroupScan;
    }

    @Override // org.apache.drill.exec.physical.base.AbstractGroupScan, org.apache.drill.exec.physical.base.GroupScan
    @JsonIgnore
    public boolean canPushdownProjects(List<SchemaPath> list) {
        return true;
    }

    @Override // org.apache.drill.exec.physical.base.AbstractGroupScan, org.apache.drill.exec.physical.base.GroupScan
    public long getColumnValueCount(SchemaPath schemaPath) {
        if (this.columnValueCounts.containsKey(schemaPath)) {
            return this.columnValueCounts.get(schemaPath).longValue();
        }
        return 0L;
    }

    static {
        $assertionsDisabled = !ParquetGroupScan.class.desiredAssertionStatus();
        logger = LoggerFactory.getLogger(ParquetGroupScan.class);
        metrics = DrillMetrics.getInstance();
        READ_FOOTER_TIMER = MetricRegistry.name(ParquetGroupScan.class, new String[]{"readFooter"});
        ENDPOINT_BYTES_TIMER = MetricRegistry.name(ParquetGroupScan.class, new String[]{"endpointBytes"});
        ASSIGNMENT_TIMER = MetricRegistry.name(ParquetGroupScan.class, new String[]{"applyAssignments"});
        ASSIGNMENT_AFFINITY_HIST = MetricRegistry.name(ParquetGroupScan.class, new String[]{"assignmentAffinity"});
    }
}
