/*
 * Decompiled with CFR 0.152.
 */
package water.rapids;

import java.util.Arrays;
import water.DKV;
import water.DTask;
import water.Futures;
import water.H2O;
import water.H2ONode;
import water.Key;
import water.MemoryManager;
import water.RPC;
import water.Value;
import water.fvec.Chunk;
import water.fvec.Frame;
import water.fvec.NewChunk;
import water.fvec.Vec;
import water.rapids.SingleThreadRadixOrder;
import water.rapids.SplitByMSBLocal;
import water.util.ArrayUtils;

public class BinaryMerge
extends DTask<BinaryMerge> {
    long _numRowsInResult = 0L;
    int[] _chunkSizes;
    double[] _timings;
    transient long[][] _retFirst;
    transient long[][] _retLen;
    transient byte[][] _leftKey;
    transient byte[][] _rightKey;
    transient long[][] _leftOrder;
    transient long[][] _rightOrder;
    transient boolean _oneToManyMatch = false;
    int[] _leftFieldSizes;
    int[] _rightFieldSizes;
    long[] _leftBase;
    long[] _rightBase;
    transient int _leftKeyNCol;
    transient int _rightKeyNCol;
    transient int _leftKeySize;
    transient int _rightKeySize;
    transient int _numJoinCols;
    transient long _leftN;
    transient long _rightN;
    transient long _leftFrom;
    transient int _retBatchSize;
    transient long _leftBatchSize;
    transient long _rightBatchSize;
    Frame _leftFrame;
    Frame _rightFrame;
    transient long[] _perNodeNumRightRowsToFetch;
    transient long[] _perNodeNumLeftRowsToFetch;
    int _leftMSB;
    int _rightMSB;
    int _leftShift;
    int _rightShift;
    boolean _allLeft;
    boolean _allRight;
    Vec _leftVec;
    Vec _rightVec;
    transient int[] _leftChunkNode;
    transient int[] _rightChunkNode;

    BinaryMerge(Frame leftFrame, Frame rightFrame, int leftMSB, int rightMSB, int leftShift, int rightShift, int[] leftFieldSizes, int[] rightFieldSizes, long[] leftBase, long[] rightBase, boolean allLeft) {
        this._leftFrame = leftFrame;
        this._rightFrame = rightFrame;
        this._leftMSB = leftMSB;
        this._rightMSB = rightMSB;
        this._leftShift = leftShift;
        this._rightShift = rightShift;
        this._leftFieldSizes = leftFieldSizes;
        this._rightFieldSizes = rightFieldSizes;
        this._leftBase = leftBase;
        this._rightBase = rightBase;
        this._allLeft = allLeft;
        this._allRight = false;
    }

    @Override
    public void compute2() {
        int i;
        Value v;
        int b;
        this._timings = new double[20];
        long t0 = System.nanoTime();
        SingleThreadRadixOrder.OXHeader leftSortedOXHeader = (SingleThreadRadixOrder.OXHeader)DKV.getGet(SingleThreadRadixOrder.getSortedOXHeaderKey(true, this._leftMSB));
        if (leftSortedOXHeader == null) {
            if (this._allRight) {
                throw H2O.unimpl();
            }
            this.tryComplete();
            return;
        }
        SingleThreadRadixOrder.OXHeader rightSortedOXHeader = (SingleThreadRadixOrder.OXHeader)DKV.getGet(SingleThreadRadixOrder.getSortedOXHeaderKey(false, this._rightMSB));
        if (rightSortedOXHeader == null) {
            if (!this._allLeft) {
                this.tryComplete();
                return;
            }
            rightSortedOXHeader = new SingleThreadRadixOrder.OXHeader(0, 0L, 0);
        }
        this._leftBatchSize = leftSortedOXHeader._batchSize;
        this._rightBatchSize = rightSortedOXHeader._batchSize;
        this._perNodeNumRightRowsToFetch = new long[H2O.CLOUD.size()];
        this._perNodeNumLeftRowsToFetch = new long[H2O.CLOUD.size()];
        this._leftKey = new byte[leftSortedOXHeader._nBatch][];
        this._leftOrder = new long[leftSortedOXHeader._nBatch][];
        for (b = 0; b < leftSortedOXHeader._nBatch; ++b) {
            v = DKV.get(SplitByMSBLocal.getSortedOXbatchKey(true, this._leftMSB, b));
            SplitByMSBLocal.OXbatch oxLeft = (SplitByMSBLocal.OXbatch)v.get();
            v.freeMem();
            this._leftKey[b] = oxLeft._x;
            this._leftOrder[b] = oxLeft._o;
        }
        this._leftN = leftSortedOXHeader._numRows;
        assert (this._leftN >= 1L);
        this._rightKey = new byte[rightSortedOXHeader._nBatch][];
        this._rightOrder = new long[rightSortedOXHeader._nBatch][];
        for (b = 0; b < rightSortedOXHeader._nBatch; ++b) {
            v = DKV.get(SplitByMSBLocal.getSortedOXbatchKey(false, this._rightMSB, b));
            SplitByMSBLocal.OXbatch oxRight = (SplitByMSBLocal.OXbatch)v.get();
            v.freeMem();
            this._rightKey[b] = oxRight._x;
            this._rightOrder[b] = oxRight._o;
        }
        this._rightN = rightSortedOXHeader._numRows;
        this._leftKeyNCol = this._leftFieldSizes.length;
        this._rightKeyNCol = this._rightFieldSizes.length;
        this._leftKeySize = ArrayUtils.sum(this._leftFieldSizes);
        this._rightKeySize = ArrayUtils.sum(this._rightFieldSizes);
        this._numJoinCols = Math.min(this._leftKeyNCol, this._rightKeyNCol);
        this._leftChunkNode = new int[this._leftFrame.anyVec().nChunks()];
        this._rightChunkNode = new int[this._rightFrame.anyVec().nChunks()];
        for (i = 0; i < this._leftFrame.anyVec().nChunks(); ++i) {
            this._leftChunkNode[i] = this._leftFrame.anyVec().chunkKey(i).home_node().index();
        }
        for (i = 0; i < this._rightFrame.anyVec().nChunks(); ++i) {
            this._rightChunkNode[i] = this._rightFrame.anyVec().chunkKey(i).home_node().index();
        }
        this._leftVec = this._leftFrame.anyVec();
        this._rightVec = this._rightFrame.anyVec();
        this._timings[0] = this._timings[0] + (double)(System.nanoTime() - t0) / 1.0E9;
        assert (0 <= this._leftMSB && this._leftMSB <= 255 && -1 <= this._rightMSB && this._rightMSB <= 255);
        if (this._rightMSB == -1) assert (this._allLeft);
        long leftMin = ((long)this._leftMSB << this._leftShift) + this._leftBase[0] - 1L;
        long leftMax = ((long)this._leftMSB + 1L << this._leftShift) + this._leftBase[0] - 2L;
        long rightMin = ((long)this._rightMSB << this._rightShift) + this._rightBase[0] - 1L;
        long rightMax = ((long)this._rightMSB + 1L << this._rightShift) + this._rightBase[0] - 2L;
        this._leftFrom = this._rightMSB == -1 || leftMin >= rightMin || this._allLeft && this._rightMSB == 0 ? -1L : this.bsearchLeft(rightMin, true);
        long leftTo = this._rightMSB == -1 || leftMax <= rightMax || this._allLeft && this._rightMSB == 255 ? this._leftN : this.bsearchLeft(rightMax, false);
        long retSize = leftTo - this._leftFrom - 1L;
        assert (retSize >= 0L);
        if (retSize == 0L) {
            this.tryComplete();
            return;
        }
        this._retBatchSize = 0x10000000;
        int retNBatch = (int)((retSize - 1L) / (long)this._retBatchSize + 1L);
        int retLastSize = (int)(retSize - (long)((retNBatch - 1) * this._retBatchSize));
        this._retFirst = new long[retNBatch][];
        this._retLen = new long[retNBatch][];
        for (int b2 = 0; b2 < retNBatch - 1; ++b2) {
            this._retFirst[b2] = MemoryManager.malloc8(this._retBatchSize);
            this._retLen[b2] = MemoryManager.malloc8(this._retBatchSize);
        }
        this._retFirst[b2] = MemoryManager.malloc8(retLastSize);
        this._retLen[b2] = MemoryManager.malloc8(retLastSize);
        t0 = System.nanoTime();
        this.bmerge_r(this._leftFrom, leftTo, -1L, this._rightN);
        this._timings[1] = this._timings[1] + (double)(System.nanoTime() - t0) / 1.0E9;
        if (this._allLeft) {
            assert (ArrayUtils.sum(this._perNodeNumLeftRowsToFetch) == retSize);
        } else {
            long tt = 0L;
            for (int i2 = 0; i2 < this._retFirst.length; ++i2) {
                for (int j = 0; j < this._retFirst[i2].length; ++j) {
                    tt += this._retFirst[i2][j] > 0L ? 1L : 0L;
                }
            }
            assert (tt <= retSize);
            assert (ArrayUtils.sum(this._perNodeNumLeftRowsToFetch) == tt);
        }
        if (this._numRowsInResult > 0L) {
            this.createChunksInDKV();
        }
        this.tryComplete();
    }

    private int keycmp(byte[][] x, long xi, byte[][] y, long yi) {
        byte[] xbatch = x[(int)(xi / this._leftBatchSize)];
        byte[] ybatch = y[(int)(yi / this._rightBatchSize)];
        int xoff = (int)(xi % this._leftBatchSize) * this._leftKeySize;
        int yoff = (int)(yi % this._rightBatchSize) * this._rightKeySize;
        long xval = 0L;
        long yval = 0L;
        for (int i = 0; i < this._numJoinCols && xval == yval; ++i) {
            int ylen = this._rightFieldSizes[i];
            xval = (long)xbatch[xoff] & 0xFFL;
            for (int xlen = this._leftFieldSizes[i]; xlen > 1; --xlen) {
                xval <<= 8;
                xval |= (long)xbatch[++xoff] & 0xFFL;
            }
            ++xoff;
            yval = (long)ybatch[yoff] & 0xFFL;
            while (ylen > 1) {
                yval <<= 8;
                yval |= (long)ybatch[++yoff] & 0xFFL;
                --ylen;
            }
            ++yoff;
            xval = xval == 0L ? Long.MIN_VALUE : xval - 1L + this._leftBase[i];
            yval = yval == 0L ? Long.MIN_VALUE : yval - 1L + this._rightBase[i];
        }
        long diff = xval - yval;
        if (xval > yval) {
            return diff < 0L | diff > Integer.MAX_VALUE ? Integer.MAX_VALUE : (int)diff;
        }
        return diff > 0L | diff < -2147483647L ? -2147483647 : (int)diff;
    }

    private long bsearchLeft(long x, boolean returnLow) {
        long low = -1L;
        long upp = this._leftN;
        while (low < upp - 1L) {
            long mid = low + (upp - low) / 2L;
            byte[] keyBatch = this._leftKey[(int)(mid / this._leftBatchSize)];
            int off = (int)(mid % this._leftBatchSize) * this._leftKeySize;
            long val = (long)keyBatch[off] & 0xFFL;
            for (int len = this._leftFieldSizes[0]; len > 1; --len) {
                val <<= 8;
                val |= (long)keyBatch[++off] & 0xFFL;
            }
            ++off;
            long l = val = val == 0L ? Long.MIN_VALUE : val - 1L + this._leftBase[0];
            if (x < val || x == val && returnLow) {
                upp = mid;
                continue;
            }
            low = mid;
        }
        return returnLow ? low : upp;
    }

    private boolean leftKeyEqual(byte[][] x, long xi, long yi) {
        int i;
        byte[] xbatch = x[(int)(xi / this._leftBatchSize)];
        byte[] ybatch = x[(int)(yi / this._leftBatchSize)];
        int xoff = (int)(xi % this._leftBatchSize) * this._leftKeySize;
        int yoff = (int)(yi % this._leftBatchSize) * this._leftKeySize;
        for (i = 0; i < this._leftKeySize && xbatch[xoff++] == ybatch[yoff++]; ++i) {
        }
        return i == this._leftKeySize;
    }

    private void bmerge_r(long lLowIn, long lUppIn, long rLowIn, long rUppIn) {
        long tmpUpp;
        long tmpLow;
        long lLow = lLowIn;
        long lUpp = lUppIn;
        long rLow = rLowIn;
        long rUpp = rUppIn;
        long lr = lLow + (lUpp - lLow) / 2L;
        while (rLow < rUpp - 1L) {
            long mid = rLow + (rUpp - rLow) / 2L;
            int cmp = this.keycmp(this._leftKey, lr, this._rightKey, mid);
            if (cmp < 0) {
                rUpp = mid;
                continue;
            }
            if (cmp > 0) {
                rLow = mid;
                continue;
            }
            tmpLow = mid;
            tmpUpp = mid;
            while (tmpLow < rUpp - 1L) {
                mid = tmpLow + (rUpp - tmpLow) / 2L;
                if (this.keycmp(this._leftKey, lr, this._rightKey, mid) == 0) {
                    tmpLow = mid;
                    continue;
                }
                rUpp = mid;
            }
            while (rLow < tmpUpp - 1L) {
                mid = rLow + (tmpUpp - rLow) / 2L;
                if (this.keycmp(this._leftKey, lr, this._rightKey, mid) == 0) {
                    tmpUpp = mid;
                    continue;
                }
                rLow = mid;
            }
            break block0;
        }
        for (tmpLow = lr + 1L; tmpLow < lUpp && this.leftKeyEqual(this._leftKey, tmpLow, lr); ++tmpLow) {
        }
        lUpp = tmpLow;
        for (tmpUpp = lr - 1L; tmpUpp > lLow && this.leftKeyEqual(this._leftKey, tmpUpp, lr); --tmpUpp) {
        }
        lLow = tmpUpp;
        assert (lUpp - lLow >= 2L);
        long len = rUpp - rLow - 1L;
        if (len > 0L || this._allLeft) {
            int chkIdx;
            long globalRowNumber;
            long t0 = System.nanoTime();
            if (len > 1L) {
                this._oneToManyMatch = true;
            }
            this._numRowsInResult += Math.max(1L, len) * (lUpp - lLow - 1L);
            for (long j = lLow + 1L; j < lUpp; ++j) {
                long t00 = System.nanoTime();
                int jb = (int)(j / this._leftBatchSize);
                int jo = (int)(j % this._leftBatchSize);
                globalRowNumber = this._leftOrder[jb][jo];
                this._timings[17] = this._timings[17] + (double)(System.nanoTime() - t00) / 1.0E9;
                t00 = System.nanoTime();
                chkIdx = this._leftVec.elem2ChunkIdx(globalRowNumber);
                this._timings[15] = this._timings[15] + (double)(System.nanoTime() - t00) / 1.0E9;
                int n = this._leftChunkNode[chkIdx];
                this._perNodeNumLeftRowsToFetch[n] = this._perNodeNumLeftRowsToFetch[n] + 1L;
                if (len == 0L) continue;
                long outLoc = j - (this._leftFrom + 1L);
                jb = (int)(outLoc / (long)this._retBatchSize);
                jo = (int)(outLoc % (long)this._retBatchSize);
                this._retFirst[jb][jo] = rLow + 2L;
                this._retLen[jb][jo] = len;
            }
            for (long i = 0L; i < len; ++i) {
                long loc = rLow + 1L + i;
                long t00 = System.nanoTime();
                globalRowNumber = this._rightOrder[(int)(loc / this._rightBatchSize)][(int)(loc % this._rightBatchSize)];
                this._timings[18] = this._timings[18] + (double)(System.nanoTime() - t00) / 1.0E9;
                t00 = System.nanoTime();
                chkIdx = this._rightVec.elem2ChunkIdx(globalRowNumber);
                this._timings[16] = this._timings[16] + (double)(System.nanoTime() - t00) / 1.0E9;
                int n = this._rightChunkNode[chkIdx];
                this._perNodeNumRightRowsToFetch[n] = this._perNodeNumRightRowsToFetch[n] + 1L;
            }
            this._timings[14] = this._timings[14] + (double)(System.nanoTime() - t0) / 1.0E9;
        }
        if (lLow > lLowIn && (rLow > rLowIn || this._allLeft)) {
            this.bmerge_r(lLowIn, lLow + 1L, rLowIn, rLow + 1L);
        }
        if (lUpp < lUppIn && (rUpp < rUppIn || this._allLeft)) {
            this.bmerge_r(lUpp - 1L, lUppIn, rUpp - 1L, rUppIn);
        }
    }

    private void createChunksInDKV() {
        int b;
        int bUppLeft;
        int bUppRite;
        long t0 = System.nanoTime();
        long[][][] perNodeRightRows = new long[H2O.CLOUD.size()][][];
        long[] perNodeRightLoc = new long[H2O.CLOUD.size()];
        long[][][] perNodeLeftRows = new long[H2O.CLOUD.size()][][];
        long[] perNodeLeftLoc = new long[H2O.CLOUD.size()];
        int batchSize = 0x2000000;
        for (int i = 0; i < H2O.CLOUD.size(); ++i) {
            int b2;
            int lastSize;
            int nbatch;
            if (this._perNodeNumRightRowsToFetch[i] > 0L) {
                nbatch = (int)((this._perNodeNumRightRowsToFetch[i] - 1L) / (long)batchSize + 1L);
                lastSize = (int)(this._perNodeNumRightRowsToFetch[i] - (long)((nbatch - 1) * batchSize));
                assert (nbatch >= 1);
                assert (lastSize > 0);
                perNodeRightRows[i] = new long[nbatch][];
                for (b2 = 0; b2 < nbatch - 1; ++b2) {
                    perNodeRightRows[i][b2] = MemoryManager.malloc8(batchSize);
                }
                perNodeRightRows[i][b2] = MemoryManager.malloc8(lastSize);
            }
            if (this._perNodeNumLeftRowsToFetch[i] <= 0L) continue;
            nbatch = (int)((this._perNodeNumLeftRowsToFetch[i] - 1L) / (long)batchSize + 1L);
            lastSize = (int)(this._perNodeNumLeftRowsToFetch[i] - (long)((nbatch - 1) * batchSize));
            assert (nbatch >= 1);
            assert (lastSize > 0);
            perNodeLeftRows[i] = new long[nbatch][];
            for (b2 = 0; b2 < nbatch - 1; ++b2) {
                perNodeLeftRows[i][b2] = MemoryManager.malloc8(batchSize);
            }
            perNodeLeftRows[i][b2] = MemoryManager.malloc8(lastSize);
        }
        this._timings[2] = this._timings[2] + (double)(System.nanoTime() - t0) / 1.0E9;
        t0 = System.nanoTime();
        long prevf = -1L;
        long prevl = -1L;
        long resultLoc = 0L;
        long leftLoc = this._leftFrom;
        for (int jb = 0; jb < this._retFirst.length; ++jb) {
            for (int jo = 0; jo < this._retFirst[jb].length; ++jo) {
                int ni;
                ++leftLoc;
                long f = this._retFirst[jb][jo];
                long l = this._retLen[jb][jo];
                if (f == 0L) {
                    assert (l == 0L);
                    if (!this._allLeft) continue;
                }
                long row = this._leftOrder[(int)(leftLoc / this._leftBatchSize)][(int)(leftLoc % this._leftBatchSize)];
                int chkIdx = this._leftVec.elem2ChunkIdx(row);
                int n = ni = this._leftChunkNode[chkIdx];
                perNodeLeftLoc[n] = perNodeLeftLoc[n] + 1L;
                perNodeLeftRows[ni][(int)(pnl / (long)batchSize)][(int)(pnl % (long)batchSize)] = row;
                if (f == 0L) {
                    ++resultLoc;
                    continue;
                }
                assert (l > 0L);
                if (prevf == f && prevl == l) continue;
                prevf = f;
                prevl = l;
                int r = 0;
                while ((long)r < l) {
                    int ni2;
                    long loc = f + (long)r - 1L;
                    long row2 = this._rightOrder[(int)(loc / this._rightBatchSize)][(int)(loc % this._rightBatchSize)];
                    int chkIdx2 = this._rightVec.elem2ChunkIdx(row2);
                    int n2 = ni2 = this._rightChunkNode[chkIdx2];
                    perNodeRightLoc[n2] = perNodeRightLoc[n2] + 1L;
                    perNodeRightRows[ni2][(int)(pnl / (long)batchSize)][(int)(pnl % (long)batchSize)] = row2;
                    ++r;
                }
            }
        }
        for (int i = 0; i < H2O.CLOUD.size(); ++i) {
            perNodeLeftLoc[i] = 0L;
            perNodeRightLoc[i] = 0L;
        }
        this._timings[3] = this._timings[3] + (double)(System.nanoTime() - t0) / 1.0E9;
        t0 = System.nanoTime();
        batchSize = 0x1000000;
        int nbatch = (int)((this._numRowsInResult - 1L) / (long)batchSize + 1L);
        int lastSize = (int)(this._numRowsInResult - (long)((nbatch - 1) * batchSize));
        assert (nbatch >= 1);
        assert (lastSize > 0);
        this._chunkSizes = new int[nbatch];
        int _numLeftCols = this._leftFrame.numCols();
        int _numColsInResult = this._leftFrame.numCols() + this._rightFrame.numCols() - this._numJoinCols;
        double[][][] frameLikeChunks = new double[_numColsInResult][nbatch][];
        for (int col = 0; col < _numColsInResult; ++col) {
            int b3;
            for (b3 = 0; b3 < nbatch - 1; ++b3) {
                frameLikeChunks[col][b3] = MemoryManager.malloc8d(batchSize);
                Arrays.fill(frameLikeChunks[col][b3], Double.NaN);
                this._chunkSizes[b3] = batchSize;
            }
            frameLikeChunks[col][b3] = MemoryManager.malloc8d(lastSize);
            Arrays.fill(frameLikeChunks[col][b3], Double.NaN);
            this._chunkSizes[b3] = lastSize;
        }
        this._timings[4] = this._timings[4] + (double)(System.nanoTime() - t0) / 1.0E9;
        t0 = System.nanoTime();
        RPC[][] grrrsRiteRPC = new RPC[H2O.CLOUD.size()][];
        RPC[][] grrrsLeftRPC = new RPC[H2O.CLOUD.size()][];
        GetRawRemoteRows[][] grrrsLeft = new GetRawRemoteRows[H2O.CLOUD.size()][];
        GetRawRemoteRows[][] grrrsRite = new GetRawRemoteRows[H2O.CLOUD.size()][];
        for (H2ONode node : H2O.CLOUD._memary) {
            int ni = node.index();
            bUppRite = perNodeRightRows[ni] == null ? 0 : perNodeRightRows[ni].length;
            bUppLeft = perNodeLeftRows[ni] == null ? 0 : perNodeLeftRows[ni].length;
            grrrsRiteRPC[ni] = new RPC[bUppRite];
            grrrsLeftRPC[ni] = new RPC[bUppLeft];
            grrrsRite[ni] = new GetRawRemoteRows[bUppRite];
            grrrsLeft[ni] = new GetRawRemoteRows[bUppLeft];
            for (b = 0; b < bUppRite; ++b) {
                grrrsRiteRPC[ni][b] = new RPC<GetRawRemoteRows>(node, new GetRawRemoteRows(this._rightFrame, perNodeRightRows[ni][b])).call();
            }
            for (b = 0; b < bUppLeft; ++b) {
                grrrsLeftRPC[ni][b] = new RPC<GetRawRemoteRows>(node, new GetRawRemoteRows(this._leftFrame, perNodeLeftRows[ni][b])).call();
            }
        }
        for (H2ONode node : H2O.CLOUD._memary) {
            int ni = node.index();
            bUppRite = perNodeRightRows[ni] == null ? 0 : perNodeRightRows[ni].length;
            for (int b4 = 0; b4 < bUppRite; ++b4) {
                GetRawRemoteRows getRawRemoteRows = (GetRawRemoteRows)grrrsRiteRPC[ni][b4].get();
                grrrsRite[ni][b4] = getRawRemoteRows;
                this._timings[5] = this._timings[5] + getRawRemoteRows.timeTaken;
            }
            bUppLeft = perNodeLeftRows[ni] == null ? 0 : perNodeLeftRows[ni].length;
            for (b = 0; b < bUppLeft; ++b) {
                GetRawRemoteRows getRawRemoteRows = (GetRawRemoteRows)grrrsLeftRPC[ni][b].get();
                grrrsLeft[ni][b] = getRawRemoteRows;
                this._timings[5] = this._timings[5] + getRawRemoteRows.timeTaken;
            }
        }
        this._timings[6] = this._timings[6] + (double)(System.nanoTime() - t0) / 1.0E9;
        t0 = System.nanoTime();
        grrrsRiteRPC = null;
        grrrsLeftRPC = null;
        resultLoc = 0L;
        leftLoc = this._leftFrom;
        prevf = -1L;
        prevl = -1L;
        for (int jb = 0; jb < this._retFirst.length; ++jb) {
            for (int jo = 0; jo < this._retFirst[jb].length; ++jo) {
                int r;
                int col;
                long pnl;
                int ni;
                ++leftLoc;
                long f = this._retFirst[jb][jo];
                long l = this._retLen[jb][jo];
                if (f == 0L && !this._allLeft) continue;
                long row = this._leftOrder[(int)(leftLoc / this._leftBatchSize)][(int)(leftLoc % this._leftBatchSize)];
                int chkIdx = this._leftVec.elem2ChunkIdx(row);
                int n = ni = this._leftChunkNode[chkIdx];
                perNodeLeftLoc[n] = perNodeLeftLoc[n] + 1L;
                int b5 = (int)(pnl / (long)batchSize);
                int o = (int)(pnl % (long)batchSize);
                double[][] chks = grrrsLeft[ni][b5]._chk;
                int rep = 0;
                while ((long)rep < Math.max(l, 1L)) {
                    long a = resultLoc + (long)rep;
                    int whichChunk = (int)(a / (long)batchSize);
                    int offset = (int)(a % (long)batchSize);
                    for (col = 0; col < chks.length; ++col) {
                        frameLikeChunks[col][whichChunk][offset] = chks[col][o];
                    }
                    ++rep;
                }
                if (f == 0L) {
                    ++resultLoc;
                    continue;
                }
                assert (l > 0L);
                if (prevf == f && prevl == l) {
                    r = 0;
                    while ((long)r < l) {
                        int toChunk = (int)(resultLoc / (long)batchSize);
                        int toOffset = (int)(resultLoc % (long)batchSize);
                        int fromChunk = (int)((resultLoc - l) / (long)batchSize);
                        int fromOffset = (int)((resultLoc - l) % (long)batchSize);
                        for (col = 0; col < _numColsInResult - _numLeftCols; ++col) {
                            frameLikeChunks[_numLeftCols + col][toChunk][toOffset] = frameLikeChunks[_numLeftCols + col][fromChunk][fromOffset];
                        }
                        ++resultLoc;
                        ++r;
                    }
                    continue;
                }
                prevf = f;
                prevl = l;
                r = 0;
                while ((long)r < l) {
                    int whichChunk = (int)(resultLoc / (long)batchSize);
                    int offset = (int)(resultLoc % (long)batchSize);
                    long loc = f + (long)r - 1L;
                    row = this._rightOrder[(int)(loc / this._rightBatchSize)][(int)(loc % this._rightBatchSize)];
                    chkIdx = this._rightVec.elem2ChunkIdx(row);
                    int n3 = ni = this._rightChunkNode[chkIdx];
                    perNodeRightLoc[n3] = perNodeRightLoc[n3] + 1L;
                    chks = grrrsRite[ni][(int)(pnl / (long)batchSize)]._chk;
                    o = (int)(pnl % (long)batchSize);
                    for (col = 0; col < _numColsInResult - _numLeftCols; ++col) {
                        frameLikeChunks[_numLeftCols + col][whichChunk][offset] = chks[this._numJoinCols + col][o];
                    }
                    ++resultLoc;
                    ++r;
                }
            }
        }
        this._timings[10] = this._timings[10] + (double)(System.nanoTime() - t0) / 1.0E9;
        t0 = System.nanoTime();
        grrrsLeft = null;
        grrrsRite = null;
        Futures fs = new Futures();
        for (int col = 0; col < _numColsInResult; ++col) {
            for (int b6 = 0; b6 < nbatch; ++b6) {
                Chunk ck = new NewChunk(frameLikeChunks[col][b6]).compress();
                DKV.put(BinaryMerge.getKeyForMSBComboPerCol(this._leftMSB, this._rightMSB, col, b6), ck, fs, true);
                frameLikeChunks[col][b6] = null;
            }
        }
        fs.blockForPending();
        this._timings[11] = this._timings[11] + (double)(System.nanoTime() - t0) / 1.0E9;
    }

    static Key getKeyForMSBComboPerCol(int leftMSB, int rightMSB, int col, int batch) {
        return Key.make("__binary_merge__Chunk_for_col" + col + "_batch" + batch + "_leftMSB" + leftMSB + "_rightMSB" + rightMSB, (byte)1, (byte)31, false, SplitByMSBLocal.ownerOfMSB(rightMSB));
    }

    class GetRawRemoteRows
    extends DTask<GetRawRemoteRows> {
        double[][] _chk;
        long[] _rows;
        double timeTaken;
        Frame _fr;

        GetRawRemoteRows(Frame fr, long[] rows) {
            this._rows = rows;
            this._fr = fr;
        }

        @Override
        public void compute2() {
            assert (this._rows != null);
            assert (this._chk == null);
            long t0 = System.nanoTime();
            this._chk = MemoryManager.malloc8d(this._fr.numCols(), this._rows.length);
            int[] cidx = MemoryManager.malloc4(this._rows.length);
            int[] offset = MemoryManager.malloc4(this._rows.length);
            Vec anyVec = this._fr.anyVec();
            for (int row = 0; row < this._rows.length; ++row) {
                cidx[row] = anyVec.elem2ChunkIdx(this._rows[row]);
                offset[row] = (int)(this._rows[row] - anyVec.espc()[cidx[row]]);
            }
            Chunk[] c = new Chunk[anyVec.nChunks()];
            for (int col = 0; col < this._fr.numCols(); ++col) {
                Vec v = this._fr.vec(col);
                for (int i = 0; i < c.length; ++i) {
                    c[i] = v.chunkKey(i).home() ? v.chunkForChunkIdx(i) : null;
                }
                for (int row = 0; row < this._rows.length; ++row) {
                    this._chk[col][row] = c[cidx[row]].atd(offset[row]);
                }
            }
            this._rows = null;
            assert (this._chk != null);
            this.timeTaken = (double)(System.nanoTime() - t0) / 1.0E9;
            this.tryComplete();
        }
    }
}

