/*
 * Decompiled with CFR 0.152.
 */
package org.apache.sysml.runtime.matrix.data;

import java.io.Serializable;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Future;
import java.util.stream.Collectors;
import org.apache.sysml.runtime.DMLRuntimeException;
import org.apache.sysml.runtime.controlprogram.caching.MatrixObject;
import org.apache.sysml.runtime.functionobjects.DiagIndex;
import org.apache.sysml.runtime.functionobjects.RevIndex;
import org.apache.sysml.runtime.functionobjects.SortIndex;
import org.apache.sysml.runtime.functionobjects.SwapIndex;
import org.apache.sysml.runtime.matrix.MatrixCharacteristics;
import org.apache.sysml.runtime.matrix.data.DenseBlock;
import org.apache.sysml.runtime.matrix.data.DenseBlockFactory;
import org.apache.sysml.runtime.matrix.data.IJV;
import org.apache.sysml.runtime.matrix.data.MatrixBlock;
import org.apache.sysml.runtime.matrix.data.MatrixIndexes;
import org.apache.sysml.runtime.matrix.data.MatrixValue;
import org.apache.sysml.runtime.matrix.data.SparseBlock;
import org.apache.sysml.runtime.matrix.data.SparseBlockCSR;
import org.apache.sysml.runtime.matrix.mapred.IndexedMatrixValue;
import org.apache.sysml.runtime.matrix.operators.ReorgOperator;
import org.apache.sysml.runtime.util.CommonThreadPool;
import org.apache.sysml.runtime.util.DataConverter;
import org.apache.sysml.runtime.util.SortUtils;
import org.apache.sysml.runtime.util.UtilFunctions;

public class LibMatrixReorg {
    public static final long PAR_NUMCELL_THRESHOLD = 0x100000L;
    public static final boolean SHALLOW_COPY_REORG = true;
    public static final boolean SPARSE_OUTPUTS_IN_CSR = true;

    private LibMatrixReorg() {
    }

    public static boolean isSupportedReorgOperator(ReorgOperator op) {
        return LibMatrixReorg.getReorgType(op) != ReorgType.INVALID;
    }

    public static MatrixBlock reorg(MatrixBlock in, MatrixBlock out, ReorgOperator op) {
        ReorgType type = LibMatrixReorg.getReorgType(op);
        switch (type) {
            case TRANSPOSE: {
                if (op.getNumThreads() > 1) {
                    return LibMatrixReorg.transpose(in, out, op.getNumThreads());
                }
                return LibMatrixReorg.transpose(in, out);
            }
            case REV: {
                return LibMatrixReorg.rev(in, out);
            }
            case DIAG: {
                return LibMatrixReorg.diag(in, out);
            }
            case SORT: {
                SortIndex ix = (SortIndex)op.fn;
                return LibMatrixReorg.sort(in, out, ix.getCols(), ix.getDecreasing(), ix.getIndexReturn());
            }
        }
        throw new DMLRuntimeException("Unsupported reorg operator: " + op.fn);
    }

    public static MatrixBlock transpose(MatrixBlock in, MatrixBlock out) {
        if (in.isEmptyBlock(false)) {
            return out;
        }
        out.nonZeros = in.nonZeros;
        if (!(in.sparse || out.sparse || in.rlen != 1 && in.clen != 1)) {
            out.denseBlock = DenseBlockFactory.createDenseBlock(in.getDenseBlockValues(), in.clen, in.rlen);
            return out;
        }
        if (out.sparse) {
            out.allocateSparseRowsBlock(false);
        } else {
            out.allocateDenseBlock(false);
        }
        if (!in.sparse && !out.sparse) {
            LibMatrixReorg.transposeDenseToDense(in, out, 0, in.rlen, 0, in.clen);
        } else if (in.sparse && out.sparse) {
            LibMatrixReorg.transposeSparseToSparse(in, out, 0, in.rlen, 0, in.clen, LibMatrixReorg.countNnzPerColumn(in, 0, in.rlen));
        } else if (in.sparse) {
            LibMatrixReorg.transposeSparseToDense(in, out, 0, in.rlen, 0, in.clen);
        } else {
            LibMatrixReorg.transposeDenseToSparse(in, out);
        }
        return out;
    }

    public static MatrixBlock transpose(MatrixBlock in, MatrixBlock out, int k) {
        if (in.isEmptyBlock(false) || (long)(in.rlen * in.clen) < 0x100000L || k == 1 || !in.sparse && !out.sparse && (in.rlen == 1 || in.clen == 1) || in.sparse && !out.sparse && in.rlen == 1 || !in.sparse && out.sparse && in.rlen == 1 || !in.sparse && out.sparse || !out.isThreadSafe()) {
            return LibMatrixReorg.transpose(in, out);
        }
        out.nonZeros = in.nonZeros;
        if (out.sparse) {
            out.allocateSparseRowsBlock(false);
        } else {
            out.allocateDenseBlock(false);
        }
        try {
            ArrayList<Callable<int[]>> tasks;
            ExecutorService pool = CommonThreadPool.get(k);
            int[] cnt = null;
            if (in.sparse && out.sparse) {
                tasks = new ArrayList();
                int blklen = (int)Math.ceil((double)in.rlen / (double)k);
                int i = 0;
                while (i < k & i * blklen < in.rlen) {
                    tasks.add(new CountNnzTask(in, i * blklen, Math.min((i + 1) * blklen, in.rlen)));
                    ++i;
                }
                List rtasks = pool.invokeAll(tasks);
                for (Future rtask : rtasks) {
                    cnt = LibMatrixReorg.mergeNnzCounts(cnt, (int[])rtask.get());
                }
            }
            tasks = new ArrayList<Callable<int[]>>();
            boolean row = (in.sparse || in.rlen >= in.clen) && !out.sparse;
            int len = row ? in.rlen : in.clen;
            int blklen = (int)Math.ceil((double)len / (double)k);
            blklen += blklen % 8 != 0 ? 8 - blklen % 8 : 0;
            int i = 0;
            while (i < k & i * blklen < len) {
                tasks.add(new TransposeTask(in, out, row, i * blklen, Math.min((i + 1) * blklen, len), cnt));
                ++i;
            }
            List taskret = pool.invokeAll(tasks);
            pool.shutdown();
            for (Future task : taskret) {
                task.get();
            }
        }
        catch (Exception ex) {
            throw new DMLRuntimeException(ex);
        }
        return out;
    }

    public static MatrixBlock rev(MatrixBlock in, MatrixBlock out) {
        if (in.isEmptyBlock(false)) {
            return out;
        }
        if (in.rlen == 1) {
            out.copy(in);
            return out;
        }
        if (in.sparse) {
            LibMatrixReorg.reverseSparse(in, out);
        } else {
            LibMatrixReorg.reverseDense(in, out);
        }
        return out;
    }

    public static void rev(IndexedMatrixValue in, long rlen, int brlen, ArrayList<IndexedMatrixValue> out) {
        MatrixIndexes inix = in.getIndexes();
        MatrixBlock inblk = (MatrixBlock)in.getValue();
        MatrixBlock tmpblk = LibMatrixReorg.rev(inblk, new MatrixBlock(inblk.getNumRows(), inblk.getNumColumns(), inblk.isInSparseFormat()));
        if (rlen % (long)brlen == 0L) {
            int nrblks = (int)Math.ceil((double)rlen / (double)brlen);
            out.add(new IndexedMatrixValue(new MatrixIndexes((long)nrblks - inix.getRowIndex() + 1L, inix.getColumnIndex()), tmpblk));
        } else {
            long pos1 = rlen - UtilFunctions.computeCellIndex(inix.getRowIndex(), brlen, tmpblk.getNumRows() - 1) + 1L;
            long pos2 = pos1 + (long)tmpblk.getNumRows() - 1L;
            int ipos1 = UtilFunctions.computeCellInBlock(pos1, brlen);
            int iposCut = tmpblk.getNumRows() - ipos1 - 1;
            int blkix1 = (int)UtilFunctions.computeBlockIndex(pos1, brlen);
            int blkix2 = (int)UtilFunctions.computeBlockIndex(pos2, brlen);
            int blklen1 = UtilFunctions.computeBlockSize(rlen, blkix1, brlen);
            int blklen2 = UtilFunctions.computeBlockSize(rlen, blkix2, brlen);
            MatrixIndexes outix1 = new MatrixIndexes(blkix1, inix.getColumnIndex());
            MatrixBlock outblk1 = new MatrixBlock(blklen1, inblk.getNumColumns(), inblk.isInSparseFormat());
            MatrixBlock tmp1 = tmpblk.slice(0, iposCut);
            outblk1.leftIndexingOperations(tmp1, ipos1, ipos1 + tmp1.getNumRows() - 1, 0, tmpblk.getNumColumns() - 1, outblk1, MatrixObject.UpdateType.INPLACE_PINNED);
            out.add(new IndexedMatrixValue(outix1, outblk1));
            if (blkix1 != blkix2) {
                MatrixIndexes outix2 = new MatrixIndexes(blkix2, inix.getColumnIndex());
                MatrixBlock outblk2 = new MatrixBlock(blklen2, inblk.getNumColumns(), inblk.isInSparseFormat());
                MatrixBlock tmp2 = tmpblk.slice(iposCut + 1, tmpblk.getNumRows() - 1);
                outblk2.leftIndexingOperations(tmp2, 0, tmp2.getNumRows() - 1, 0, tmpblk.getNumColumns() - 1, outblk2, MatrixObject.UpdateType.INPLACE_PINNED);
                out.add(new IndexedMatrixValue(outix2, outblk2));
            }
        }
    }

    public static MatrixBlock diag(MatrixBlock in, MatrixBlock out) {
        if (in.isEmptyBlock(false)) {
            return out;
        }
        int rlen = in.rlen;
        int clen = in.clen;
        if (clen == 1) {
            LibMatrixReorg.diagV2M(in, out);
        } else if (rlen == clen) {
            LibMatrixReorg.diagM2V(in, out);
        } else {
            throw new DMLRuntimeException("Reorg diagM2V requires squared block input. (" + rlen + ", " + clen + ")");
        }
        return out;
    }

    public static MatrixBlock sort(MatrixBlock in, MatrixBlock out, int[] by, boolean desc, boolean ixret) {
        int i;
        boolean sparse = in.isInSparseFormat();
        int rlen = in.rlen;
        int clen = in.clen;
        out.sparse = in.sparse && !ixret;
        long l = out.nonZeros = ixret ? (long)rlen : in.nonZeros;
        if (!LibMatrixReorg.isValidSortByList(by, clen)) {
            throw new DMLRuntimeException("Sort configuration issue: invalid orderby columns: " + Arrays.toString(by) + " (" + rlen + "x" + clen + " input).");
        }
        if (!ixret) {
            if (in.isEmptyBlock(false)) {
                return out;
            }
            if (!sparse && clen == 1) {
                out.copy(in);
                Arrays.sort(out.getDenseBlockValues());
                if (desc) {
                    LibMatrixReorg.sortReverseDense(out);
                }
                return out;
            }
        } else if (in.isEmptyBlock(false)) {
            out.allocateDenseBlock(false);
            double[] c = out.getDenseBlockValues();
            for (int i2 = 0; i2 < rlen; ++i2) {
                c[i2] = i2 + 1;
            }
            return out;
        }
        int[] vix = new int[rlen];
        double[] values = new double[rlen];
        for (i = 0; i < rlen; ++i) {
            vix[i] = i;
            values[i] = in.quickGetValue(i, by[0] - 1);
        }
        SortUtils.sortByValue(0, rlen, values, vix);
        if (by.length > 1) {
            LibMatrixReorg.sortBySecondary(0, rlen, values, vix, in, by, 1);
        }
        if (desc) {
            LibMatrixReorg.sortReverseDense(vix);
            LibMatrixReorg.sortReverseDense(values);
        }
        LibMatrixReorg.sortIndexesStable(0, rlen, values, vix, in, by, 1);
        if (!ixret) {
            if (!sparse) {
                out.allocateDenseBlock(false);
                DenseBlock a = in.getDenseBlock();
                DenseBlock c = out.getDenseBlock();
                for (int i3 = 0; i3 < rlen; ++i3) {
                    System.arraycopy(a.values(vix[i3]), a.pos(vix[i3]), c.values(i3), c.pos(i3), clen);
                }
            } else {
                out.allocateSparseRowsBlock(false);
                for (i = 0; i < rlen; ++i) {
                    if (in.sparseBlock.isEmpty(vix[i])) continue;
                    out.sparseBlock.set(i, in.sparseBlock.get(vix[i]), false);
                }
            }
        } else {
            out.allocateDenseBlock(false);
            DenseBlock c = out.getDenseBlock();
            for (int i4 = 0; i4 < rlen; ++i4) {
                c.set(i4, 0, vix[i4] + 1);
            }
        }
        return out;
    }

    public static MatrixBlock reshape(MatrixBlock in, MatrixBlock out, int rows, int cols, boolean rowwise) {
        int rlen = in.rlen;
        int clen = in.clen;
        if ((long)rlen * (long)clen != (long)rows * (long)cols) {
            throw new DMLRuntimeException("Reshape matrix requires consistent numbers of input/output cells (" + rlen + ":" + clen + ", " + rows + ":" + cols + ").");
        }
        if (rlen == rows && clen == cols) {
            out.copyShallow(in);
            return out;
        }
        out.sparse = MatrixBlock.evalSparseFormatInMemory(rows, cols, in.nonZeros);
        out.rlen = rows;
        out.clen = cols;
        out.nonZeros = in.nonZeros;
        if (!in.sparse && !out.sparse) {
            LibMatrixReorg.reshapeDense(in, out, rows, cols, rowwise);
        } else if (in.sparse && out.sparse) {
            LibMatrixReorg.reshapeSparse(in, out, rows, cols, rowwise);
        } else if (in.sparse) {
            LibMatrixReorg.reshapeSparseToDense(in, out, rows, cols, rowwise);
        } else {
            LibMatrixReorg.reshapeDenseToSparse(in, out, rows, cols, rowwise);
        }
        return out;
    }

    public static List<IndexedMatrixValue> reshape(IndexedMatrixValue in, MatrixCharacteristics mcIn, MatrixCharacteristics mcOut, boolean rowwise, boolean outputEmptyBlocks) {
        MatrixIndexes ixIn = in.getIndexes();
        MatrixBlock mbIn = (MatrixBlock)in.getValue();
        Collection<MatrixIndexes> rix = LibMatrixReorg.computeAllResultBlockIndexes(ixIn, mcIn, mcOut, mbIn, rowwise, outputEmptyBlocks);
        Map<MatrixIndexes, MatrixBlock> rblk = LibMatrixReorg.createAllResultBlocks(rix, mbIn.nonZeros, mcOut);
        long row_offset = (ixIn.getRowIndex() - 1L) * (long)mcIn.getRowsPerBlock();
        long col_offset = (ixIn.getColumnIndex() - 1L) * (long)mcIn.getColsPerBlock();
        if (mbIn.sparse) {
            LibMatrixReorg.reshapeSparse(mbIn, row_offset, col_offset, rblk, mcIn, mcOut, rowwise);
        } else {
            LibMatrixReorg.reshapeDense(mbIn, row_offset, col_offset, rblk, mcIn, mcOut, rowwise);
        }
        return rblk.entrySet().stream().filter(e -> outputEmptyBlocks || !((MatrixBlock)e.getValue()).isEmptyBlock(false)).map(e -> {
            ((MatrixBlock)e.getValue()).examSparsity();
            return new IndexedMatrixValue((MatrixIndexes)e.getKey(), (MatrixValue)e.getValue());
        }).collect(Collectors.toList());
    }

    public static MatrixBlock rmempty(MatrixBlock in, MatrixBlock ret, boolean rows, boolean emptyReturn, MatrixBlock select) {
        if (in.isEmptyBlock(false) && select == null) {
            int n;
            int n2 = n = emptyReturn ? 1 : 0;
            if (rows) {
                ret.reset(n, in.clen, in.sparse);
            } else {
                ret.reset(in.rlen, n, in.sparse);
            }
            return ret;
        }
        if (rows) {
            return LibMatrixReorg.removeEmptyRows(in, ret, select, emptyReturn);
        }
        return LibMatrixReorg.removeEmptyColumns(in, ret, select, emptyReturn);
    }

    public static void rmempty(IndexedMatrixValue data, IndexedMatrixValue offset, boolean rmRows, long len, long brlen, long bclen, ArrayList<IndexedMatrixValue> outList) {
        IndexedMatrixValue tmpIMV;
        MatrixBlock src;
        int i;
        long clen;
        long rlen;
        if (!(data.getValue() instanceof MatrixBlock) || !(offset.getValue() instanceof MatrixBlock)) {
            throw new DMLRuntimeException("Unsupported input data: expected " + MatrixBlock.class.getName() + " but got " + data.getValue().getClass().getName() + " and " + offset.getValue().getClass().getName());
        }
        if (rmRows && data.getValue().getNumRows() != offset.getValue().getNumRows() || !rmRows && data.getValue().getNumColumns() != offset.getValue().getNumColumns()) {
            throw new DMLRuntimeException("Dimension mismatch between input data and offsets: [" + data.getValue().getNumRows() + "x" + data.getValue().getNumColumns() + " vs " + offset.getValue().getNumRows() + "x" + offset.getValue().getNumColumns());
        }
        HashMap<MatrixIndexes, IndexedMatrixValue> out = new HashMap<MatrixIndexes, IndexedMatrixValue>();
        MatrixBlock linData = (MatrixBlock)data.getValue();
        MatrixBlock linOffset = (MatrixBlock)offset.getValue();
        MatrixIndexes tmpIx = new MatrixIndexes(-1L, -1L);
        if (rmRows) {
            rlen = len;
            clen = linData.getNumColumns();
            for (i = 0; i < linOffset.getNumRows(); ++i) {
                long rix = (long)linOffset.quickGetValue(i, 0);
                if (rix <= 0L) continue;
                src = linData.slice(i, i, 0, (int)(clen - 1L), new MatrixBlock());
                long brix = (rix - 1L) / brlen + 1L;
                long lbrix = (rix - 1L) % brlen;
                tmpIx.setIndexes(brix, data.getIndexes().getColumnIndex());
                if (!out.containsKey(tmpIx)) {
                    tmpIMV = new IndexedMatrixValue(new MatrixIndexes(), new MatrixBlock());
                    tmpIMV.getIndexes().setIndexes(tmpIx);
                    ((MatrixBlock)tmpIMV.getValue()).reset((int)Math.min(brlen, rlen - (brix - 1L) * brlen), (int)clen);
                    out.put(tmpIMV.getIndexes(), tmpIMV);
                }
                ((MatrixBlock)((IndexedMatrixValue)out.get(tmpIx)).getValue()).copy((int)lbrix, (int)lbrix, 0, (int)clen - 1, src, false);
            }
        } else {
            rlen = linData.getNumRows();
            clen = len;
            for (i = 0; i < linOffset.getNumColumns(); ++i) {
                long cix = (long)linOffset.quickGetValue(0, i);
                if (cix <= 0L) continue;
                src = linData.slice(0, (int)(rlen - 1L), i, i, new MatrixBlock());
                long bcix = (cix - 1L) / bclen + 1L;
                long lbcix = (cix - 1L) % bclen;
                tmpIx.setIndexes(data.getIndexes().getRowIndex(), bcix);
                if (!out.containsKey(tmpIx)) {
                    tmpIMV = new IndexedMatrixValue(new MatrixIndexes(), new MatrixBlock());
                    tmpIMV.getIndexes().setIndexes(tmpIx);
                    ((MatrixBlock)tmpIMV.getValue()).reset((int)rlen, (int)Math.min(bclen, clen - (bcix - 1L) * bclen));
                    out.put(tmpIMV.getIndexes(), tmpIMV);
                }
                ((MatrixBlock)((IndexedMatrixValue)out.get(tmpIx)).getValue()).copy(0, (int)rlen - 1, (int)lbcix, (int)lbcix, src, false);
            }
        }
        for (IndexedMatrixValue imv : out.values()) {
            ((MatrixBlock)imv.getValue()).recomputeNonZeros();
            outList.add(imv);
        }
    }

    public static MatrixBlock rexpand(MatrixBlock in, MatrixBlock ret, double max, boolean rows, boolean cast, boolean ignore, int k) {
        int lmax = (int)UtilFunctions.toLong(max);
        if (!ignore && in.getNonZeros() < (long)in.getNumRows()) {
            throw new DMLRuntimeException("Invalid input w/ zeros for rexpand ignore=false (rlen=" + in.getNumRows() + ", nnz=" + in.getNonZeros() + ").");
        }
        if (in.isEmptyBlock(false)) {
            if (rows) {
                ret.reset(lmax, in.rlen, true);
            } else {
                ret.reset(in.rlen, lmax, true);
            }
            return ret;
        }
        if (rows) {
            return LibMatrixReorg.rexpandRows(in, ret, lmax, cast, ignore);
        }
        return LibMatrixReorg.rexpandColumns(in, ret, lmax, cast, ignore, k);
    }

    public static void rexpand(IndexedMatrixValue data, double max, boolean rows, boolean cast, boolean ignore, long brlen, long bclen, ArrayList<IndexedMatrixValue> outList) {
        MatrixIndexes ix = data.getIndexes();
        MatrixBlock in = (MatrixBlock)data.getValue();
        MatrixBlock tmp = LibMatrixReorg.rexpand(in, new MatrixBlock(), max, rows, cast, ignore, 1);
        if (rows) {
            int rl = 0;
            while (rl < tmp.getNumRows()) {
                MatrixBlock mb = tmp.slice(rl, (int)(Math.min((long)rl + brlen, (long)tmp.getNumRows()) - 1L));
                outList.add(new IndexedMatrixValue(new MatrixIndexes((long)rl / brlen + 1L, ix.getRowIndex()), mb));
                rl = (int)((long)rl + brlen);
            }
        } else {
            int cl = 0;
            while (cl < tmp.getNumColumns()) {
                MatrixBlock mb = tmp.slice(0, tmp.getNumRows() - 1, cl, (int)(Math.min((long)cl + bclen, (long)tmp.getNumColumns()) - 1L), new MatrixBlock());
                outList.add(new IndexedMatrixValue(new MatrixIndexes(ix.getRowIndex(), (long)cl / bclen + 1L), mb));
                cl = (int)((long)cl + bclen);
            }
        }
    }

    private static ReorgType getReorgType(ReorgOperator op) {
        if (op.fn instanceof SwapIndex) {
            return ReorgType.TRANSPOSE;
        }
        if (op.fn instanceof RevIndex) {
            return ReorgType.REV;
        }
        if (op.fn instanceof DiagIndex) {
            return ReorgType.DIAG;
        }
        if (op.fn instanceof SortIndex) {
            return ReorgType.SORT;
        }
        return ReorgType.INVALID;
    }

    private static void transposeDenseToDense(MatrixBlock in, MatrixBlock out, int rl, int ru, int cl, int cu) {
        int m = in.rlen;
        int n = in.clen;
        int n2 = out.clen;
        DenseBlock a = in.getDenseBlock();
        DenseBlock c = out.getDenseBlock();
        if (m == 1 || n == 1) {
            int ix = rl + cl;
            int len = ru + cu - ix - 1;
            System.arraycopy(a.valuesAt(0), ix, c.valuesAt(0), ix, len);
        } else {
            int blocksizeI = 128;
            int blocksizeJ = 128;
            if (a.numBlocks() == 1 && c.numBlocks() == 1) {
                double[] avals = a.valuesAt(0);
                double[] cvals = c.valuesAt(0);
                for (int bi = rl; bi < ru; bi += 128) {
                    int bimin = Math.min(bi + 128, ru);
                    for (int bj = cl; bj < cu; bj += 128) {
                        int bjmin = Math.min(bj + 128, cu);
                        for (int i = bi; i < bimin; ++i) {
                            int aix = i * n + bj;
                            int cix = bj * n2 + i;
                            LibMatrixReorg.transposeRow(avals, cvals, aix, cix, n2, bjmin - bj);
                        }
                    }
                }
            } else {
                for (int bi = rl; bi < ru; bi += 128) {
                    int bimin = Math.min(bi + 128, ru);
                    for (int bj = cl; bj < cu; bj += 128) {
                        int bjmin = Math.min(bj + 128, cu);
                        for (int i = bi; i < bimin; ++i) {
                            double[] avals = a.values(i);
                            int aix = a.pos(i);
                            for (int j = bj; j < bjmin; ++j) {
                                c.set(j, i, avals[aix + j]);
                            }
                        }
                    }
                }
            }
        }
    }

    private static void transposeDenseToSparse(MatrixBlock in, MatrixBlock out) {
        int m = in.rlen;
        int n = in.clen;
        int m2 = out.rlen;
        int n2 = out.clen;
        int ennz2 = (int)(in.nonZeros / (long)m2);
        DenseBlock a = in.getDenseBlock();
        SparseBlock c = out.getSparseBlock();
        if (out.rlen == 1) {
            c.allocate(0, (int)in.nonZeros);
            c.setIndexRange(0, 0, m, a.valuesAt(0), 0, m);
        } else {
            int blocksizeI = 128;
            int blocksizeJ = 128;
            for (int bi = 0; bi < m; bi += 128) {
                int bimin = Math.min(bi + 128, m);
                for (int bj = 0; bj < n; bj += 128) {
                    int bjmin = Math.min(bj + 128, n);
                    for (int i = bi; i < bimin; ++i) {
                        double[] avals = a.values(i);
                        int aix = a.pos(i);
                        for (int j = bj; j < bjmin; ++j) {
                            c.allocate(j, ennz2, n2);
                            c.append(j, i, avals[aix + j]);
                        }
                    }
                }
            }
        }
    }

    private static void transposeSparseToSparse(MatrixBlock in, MatrixBlock out, int rl, int ru, int cl, int cu, int[] cnt) {
        if (rl > 0 || ru < in.rlen) {
            throw new RuntimeException("Unsupported row-parallel transposeSparseToSparse: " + rl + ", " + ru);
        }
        int m2 = out.rlen;
        int n2 = out.clen;
        int ennz2 = (int)(in.nonZeros / (long)m2);
        SparseBlock a = in.getSparseBlock();
        SparseBlock c = out.getSparseBlock();
        if (cnt != null) {
            for (int i = cl; i < cu; ++i) {
                if (cnt[i] <= 0) continue;
                c.allocate(i, cnt[i]);
            }
        }
        long xsp = (long)in.rlen * (long)in.clen / in.nonZeros;
        int blocksizeI = Math.max(128, (int)(8L * xsp));
        int blocksizeJ = Math.max(128, (int)(8L * xsp));
        int[] ix = new int[Math.min(blocksizeI, ru - rl)];
        for (int bi = rl; bi < ru; bi += blocksizeI) {
            Arrays.fill(ix, 0);
            int bimin = Math.min(bi + blocksizeI, ru);
            if (cl > 0) {
                for (int i = bi; i < bimin; ++i) {
                    if (a.isEmpty(i)) continue;
                    int j = a.posFIndexGTE(i, cl);
                    ix[i - bi] = j >= 0 ? j : a.size(i);
                }
            }
            for (int bj = cl; bj < cu; bj += blocksizeJ) {
                int bjmin = Math.min(bj + blocksizeJ, cu);
                for (int i = bi; i < bimin; ++i) {
                    int j;
                    if (a.isEmpty(i)) continue;
                    int apos = a.pos(i);
                    int alen = a.size(i);
                    int[] aix = a.indexes(i);
                    double[] avals = a.values(i);
                    for (j = ix[i - bi] + apos; j < apos + alen && aix[j] < bjmin; ++j) {
                        c.allocate(aix[j], ennz2, n2);
                        c.append(aix[j], i, avals[j]);
                    }
                    ix[i - bi] = j - apos;
                }
            }
        }
    }

    private static void transposeSparseToDense(MatrixBlock in, MatrixBlock out, int rl, int ru, int cl, int cu) {
        int m = in.rlen;
        int n = in.clen;
        SparseBlock a = in.getSparseBlock();
        DenseBlock c = out.getDenseBlock();
        if (m == 1) {
            int alen = a.size(0);
            int[] aix = a.indexes(0);
            double[] avals = a.values(0);
            double[] cvals = c.valuesAt(0);
            for (int j = 0; j < alen; ++j) {
                cvals[aix[j]] = avals[j];
            }
        } else {
            int blocksizeI = 128;
            int blocksizeJ = 128;
            int[] ix = new int[128];
            for (int bi = rl; bi < ru; bi += 128) {
                Arrays.fill(ix, 0);
                int bimin = Math.min(bi + 128, ru);
                for (int bj = 0; bj < n; bj += 128) {
                    int bjmin = Math.min(bj + 128, n);
                    int i = bi;
                    int iix = 0;
                    while (i < bimin) {
                        if (!a.isEmpty(i)) {
                            int j;
                            int apos = a.pos(i);
                            int alen = a.size(i);
                            int[] aix = a.indexes(i);
                            double[] avals = a.values(i);
                            for (j = ix[iix]; j < alen && aix[apos + j] < bjmin; ++j) {
                                c.set(aix[apos + j], i, avals[apos + j]);
                            }
                            ix[iix] = j;
                        }
                        ++i;
                        ++iix;
                    }
                }
            }
        }
    }

    static void transposeRow(double[] a, double[] c, int aix, int cix, int n2, int len) {
        int bn = len % 8;
        int j = 0;
        while (j < bn) {
            c[cix] = a[aix + 0];
            ++j;
            ++aix;
            cix += n2;
        }
        j = bn;
        while (j < len) {
            c[cix + 0 * n2] = a[aix + 0];
            c[cix + 1 * n2] = a[aix + 1];
            c[cix + 2 * n2] = a[aix + 2];
            c[cix + 3 * n2] = a[aix + 3];
            c[cix + 4 * n2] = a[aix + 4];
            c[cix + 5 * n2] = a[aix + 5];
            c[cix + 6 * n2] = a[aix + 6];
            c[cix + 7 * n2] = a[aix + 7];
            j += 8;
            aix += 8;
            cix += 8 * n2;
        }
    }

    private static int[] countNnzPerColumn(MatrixBlock in, int rl, int ru) {
        int[] cnt = null;
        if (in.sparse && in.clen <= 4096) {
            SparseBlock a = in.sparseBlock;
            cnt = new int[in.clen];
            for (int i = rl; i < ru; ++i) {
                if (a.isEmpty(i)) continue;
                LibMatrixReorg.countAgg(cnt, a.indexes(i), a.pos(i), a.size(i));
            }
        }
        return cnt;
    }

    private static int[] mergeNnzCounts(int[] cnt, int[] cnt2) {
        if (cnt == null) {
            return cnt2;
        }
        for (int i = 0; i < cnt.length; ++i) {
            int n = i;
            cnt[n] = cnt[n] + cnt2[i];
        }
        return cnt;
    }

    private static void reverseDense(MatrixBlock in, MatrixBlock out) {
        int m = in.rlen;
        int n = in.clen;
        out.sparse = false;
        out.nonZeros = in.nonZeros;
        out.allocateDenseBlock(false);
        if (n == 1) {
            double[] a = in.getDenseBlockValues();
            double[] c = out.getDenseBlockValues();
            for (int i = 0; i < m; ++i) {
                c[m - 1 - i] = a[i];
            }
        } else {
            DenseBlock a = in.getDenseBlock();
            DenseBlock c = out.getDenseBlock();
            for (int i = 0; i < m; ++i) {
                int ri = m - 1 - i;
                System.arraycopy(a.values(i), a.pos(i), c.values(ri), c.pos(ri), n);
            }
        }
    }

    private static void reverseSparse(MatrixBlock in, MatrixBlock out) {
        int m = in.rlen;
        out.sparse = true;
        out.nonZeros = in.nonZeros;
        out.allocateSparseRowsBlock(false);
        SparseBlock a = in.getSparseBlock();
        SparseBlock c = out.getSparseBlock();
        for (int i = 0; i < m; ++i) {
            if (a.isEmpty(i)) continue;
            c.set(m - 1 - i, a.get(i), true);
        }
    }

    private static void diagV2M(MatrixBlock in, MatrixBlock out) {
        int rlen = in.rlen;
        if (out.sparse) {
            int[] rptr = new int[in.rlen + 1];
            int[] cix = new int[(int)in.nonZeros];
            double[] vals = new double[(int)in.nonZeros];
            int pos = 0;
            for (int i = 0; i < rlen; ++i) {
                double val = in.quickGetValue(i, 0);
                if (val != 0.0) {
                    cix[pos] = i;
                    vals[pos] = val;
                }
                rptr[i + 1] = ++pos;
            }
            out.sparseBlock = new SparseBlockCSR(rptr, cix, vals, (int)in.nonZeros);
        } else {
            for (int i = 0; i < rlen; ++i) {
                double val = in.quickGetValue(i, 0);
                if (val == 0.0) continue;
                out.appendValue(i, i, val);
            }
        }
        out.setNonZeros(in.nonZeros);
    }

    private static void diagM2V(MatrixBlock in, MatrixBlock out) {
        DenseBlock c = out.allocateBlock().getDenseBlock();
        int rlen = in.rlen;
        int nnz = 0;
        for (int i = 0; i < rlen; ++i) {
            double val = in.quickGetValue(i, i);
            if (val == 0.0) continue;
            c.set(i, 0, val);
            ++nnz;
        }
        out.setNonZeros(nnz);
    }

    private static void reshapeDense(MatrixBlock in, MatrixBlock out, int rows, int cols, boolean rowwise) {
        int rlen = in.rlen;
        int clen = in.clen;
        if (in.denseBlock == null) {
            return;
        }
        if (rowwise && in.denseBlock.numBlocks() == 1) {
            out.denseBlock = DenseBlockFactory.createDenseBlock(in.getDenseBlockValues(), rows, cols);
            return;
        }
        out.allocateDenseBlock(false);
        DenseBlock a = in.getDenseBlock();
        DenseBlock c = out.getDenseBlock();
        if (rowwise) {
            c.set(a);
        } else if (rlen == 1 || clen == 1) {
            double[] avals = a.valuesAt(0);
            double[] cvals = c.valuesAt(0);
            int aix = 0;
            for (int j = 0; j < cols; ++j) {
                int i = 0;
                int cix = 0;
                while (i < rows) {
                    cvals[cix + j] = avals[aix++];
                    ++i;
                    cix += cols;
                }
            }
        } else if (rows == 1 || cols == 1) {
            double[] avals = a.valuesAt(0);
            double[] cvals = c.valuesAt(0);
            int cix = 0;
            for (int j = 0; j < clen; ++j) {
                int i = 0;
                int aix = 0;
                while (i < rlen) {
                    cvals[cix++] = avals[aix + j];
                    ++i;
                    aix += clen;
                }
            }
        } else {
            for (int i = 0; i < rows; ++i) {
                double[] cvals = c.values(i);
                int cix = c.pos(i);
                int j = 0;
                int aix2 = i;
                while (j < cols) {
                    int ai = aix2 % rlen;
                    int aj = aix2 / rlen;
                    cvals[cix + j] = a.get(ai, aj);
                    ++j;
                    aix2 += rows;
                }
            }
        }
    }

    private static void reshapeSparse(MatrixBlock in, MatrixBlock out, int rows, int cols, boolean rowwise) {
        int rlen = in.rlen;
        int clen = in.clen;
        if (in.isEmptyBlock(false)) {
            return;
        }
        out.allocateSparseRowsBlock(false);
        int estnnz = (int)(in.nonZeros / (long)rows);
        SparseBlock a = in.sparseBlock;
        SparseBlock c = out.sparseBlock;
        if (rowwise) {
            if (rows == 1) {
                c.allocate(0, estnnz, cols);
                int i = 0;
                int cix = 0;
                while (i < rlen) {
                    if (!a.isEmpty(i)) {
                        int apos = a.pos(i);
                        int alen = a.size(i);
                        int[] aix = a.indexes(i);
                        double[] avals = a.values(i);
                        for (int j = apos; j < apos + alen; ++j) {
                            c.append(0, cix + aix[j], avals[j]);
                        }
                    }
                    ++i;
                    cix += clen;
                }
            } else if (cols % clen == 0 && a instanceof SparseBlockCSR) {
                int[] aix = ((SparseBlockCSR)a).indexes();
                int n = cols / clen;
                int pos = 0;
                int[] rptr = new int[rows + 1];
                int[] indexes = new int[(int)a.size()];
                rptr[0] = 0;
                int bi = 0;
                int ci = 0;
                while (bi < rlen) {
                    int i = bi;
                    int cix = 0;
                    while (i < bi + n) {
                        if (!a.isEmpty(i)) {
                            int apos = a.pos(i);
                            int alen = a.size(i);
                            for (int j = apos; j < apos + alen; ++j) {
                                indexes[pos++] = cix + aix[j];
                            }
                        }
                        ++i;
                        cix += clen;
                    }
                    rptr[ci + 1] = pos;
                    bi += n;
                    ++ci;
                }
                out.sparseBlock = new SparseBlockCSR(rptr, indexes, ((SparseBlockCSR)a).values(), pos);
            } else if (cols % clen == 0) {
                int n = cols / clen;
                int bi = 0;
                int ci = 0;
                while (bi < rlen) {
                    long lnnz = a.size(bi, bi + n);
                    c.allocate(ci, (int)lnnz);
                    int i = bi;
                    int cix = 0;
                    while (i < bi + n) {
                        if (!a.isEmpty(i)) {
                            int apos = a.pos(i);
                            int alen = a.size(i);
                            int[] aix = a.indexes(i);
                            double[] avals = a.values(i);
                            for (int j = apos; j < apos + alen; ++j) {
                                c.append(ci, cix + aix[j], avals[j]);
                            }
                        }
                        ++i;
                        cix += clen;
                    }
                    bi += n;
                    ++ci;
                }
            } else {
                long cix = 0L;
                for (int i = 0; i < rlen; ++i) {
                    if (!a.isEmpty(i)) {
                        int apos = a.pos(i);
                        int alen = a.size(i);
                        int[] aix = a.indexes(i);
                        double[] avals = a.values(i);
                        for (int j = apos; j < apos + alen; ++j) {
                            int ci = (int)((cix + (long)aix[j]) / (long)cols);
                            int cj = (int)((cix + (long)aix[j]) % (long)cols);
                            c.allocate(ci, estnnz, cols);
                            c.append(ci, cj, avals[j]);
                        }
                    }
                    cix += (long)clen;
                }
            }
        } else if (rlen == 1) {
            if (!a.isEmpty(0)) {
                int alen = a.size(0);
                int[] aix = a.indexes(0);
                double[] avals = a.values(0);
                for (int j = 0; j < alen; ++j) {
                    int ci = aix[j] % rows;
                    int cj = aix[j] / rows;
                    c.allocate(ci, estnnz, cols);
                    c.append(ci, cj, avals[j]);
                }
            }
        } else {
            for (int i = 0; i < rlen; ++i) {
                if (a.isEmpty(i)) continue;
                int apos = a.pos(i);
                int alen = a.size(i);
                int[] aix = a.indexes(i);
                double[] avals = a.values(i);
                for (int j = apos; j < apos + alen; ++j) {
                    long tmpix = (long)aix[j] * (long)rlen + (long)i;
                    int ci = (int)(tmpix % (long)rows);
                    int cj = (int)(tmpix / (long)rows);
                    c.allocate(ci, estnnz, cols);
                    c.append(ci, cj, avals[j]);
                }
            }
            out.sortSparseRows();
        }
    }

    private static void reshapeDenseToSparse(MatrixBlock in, MatrixBlock out, int rows, int cols, boolean rowwise) {
        int rlen = in.rlen;
        int clen = in.clen;
        if (in.denseBlock == null) {
            return;
        }
        out.allocateSparseRowsBlock(false);
        int estnnz = (int)(in.nonZeros / (long)rows);
        DenseBlock a = in.getDenseBlock();
        SparseBlock c = out.sparseBlock;
        if (rowwise) {
            for (int i = 0; i < rlen; ++i) {
                double[] avals = a.values(i);
                int aix = a.pos(i);
                for (int j = 0; j < clen; ++j) {
                    double val = avals[aix + j];
                    if (val == 0.0) continue;
                    long cix = (long)i * (long)clen + (long)j;
                    int ci = (int)(cix / (long)cols);
                    int cj = (int)(cix % (long)cols);
                    c.allocate(ci, estnnz, cols);
                    c.append(ci, cj, val);
                }
            }
        } else if (rlen == 1) {
            double[] avals = a.valuesAt(0);
            int aix = 0;
            for (int j = 0; j < cols; ++j) {
                for (int i = 0; i < rows; ++i) {
                    double val;
                    if ((val = avals[aix++]) == 0.0) continue;
                    c.allocate(i, estnnz, cols);
                    c.append(i, j, val);
                }
            }
        } else {
            for (int i = 0; i < rows; ++i) {
                int j = 0;
                int aix2 = i;
                while (j < cols) {
                    int ai = aix2 % rlen;
                    int aj = aix2 / rlen;
                    double val = a.get(ai, aj);
                    if (val != 0.0) {
                        c.allocate(i, estnnz, cols);
                        c.append(i, j, val);
                    }
                    ++j;
                    aix2 += rows;
                }
            }
        }
    }

    private static void reshapeSparseToDense(MatrixBlock in, MatrixBlock out, int rows, int cols, boolean rowwise) {
        block8: {
            DenseBlock c;
            SparseBlock a;
            int rlen;
            block9: {
                block7: {
                    rlen = in.rlen;
                    int clen = in.clen;
                    if (in.sparseBlock == null) {
                        return;
                    }
                    out.allocateDenseBlock(false);
                    a = in.sparseBlock;
                    c = out.getDenseBlock();
                    if (!rowwise) break block7;
                    int i = 0;
                    int cix = 0;
                    while (i < rlen) {
                        if (!a.isEmpty(i)) {
                            int apos = a.pos(i);
                            int alen = a.size(i);
                            int[] aix = a.indexes(i);
                            double[] avals = a.values(i);
                            for (int j = apos; j < apos + alen; ++j) {
                                int ci = (cix + aix[j]) / cols;
                                int cj = (cix + aix[j]) % cols;
                                c.set(ci, cj, avals[j]);
                            }
                        }
                        ++i;
                        cix += clen;
                    }
                    break block8;
                }
                if (rlen != 1) break block9;
                double[] cvals = c.valuesAt(0);
                if (a.isEmpty(0)) break block8;
                int apos = a.pos(0);
                int alen = a.size(0);
                int[] aix = a.indexes(0);
                double[] avals = a.values(0);
                for (int j = apos; j < apos + alen; ++j) {
                    int ci = aix[j] % rows;
                    int cj = aix[j] / rows;
                    cvals[ci * cols + cj] = avals[j];
                }
                break block8;
            }
            for (int i = 0; i < rlen; ++i) {
                if (a.isEmpty(i)) continue;
                int apos = a.pos(i);
                int alen = a.size(i);
                int[] aix = a.indexes(i);
                double[] avals = a.values(i);
                for (int j = apos; j < apos + alen; ++j) {
                    int tmpix = aix[j] * rlen + i;
                    int ci = tmpix % rows;
                    int cj = tmpix / rows;
                    c.set(ci, cj, avals[j]);
                }
            }
        }
    }

    private static Collection<MatrixIndexes> computeAllResultBlockIndexes(MatrixIndexes ixin, MatrixCharacteristics mcIn, MatrixCharacteristics mcOut, MatrixBlock in, boolean rowwise, boolean outputEmpty) {
        HashSet<MatrixIndexes> ret = new HashSet<MatrixIndexes>();
        long row_offset = (ixin.getRowIndex() - 1L) * (long)mcOut.getRowsPerBlock();
        long col_offset = (ixin.getColumnIndex() - 1L) * (long)mcOut.getColsPerBlock();
        long max_row_offset = Math.min(mcIn.getRows(), row_offset + (long)mcIn.getRowsPerBlock()) - 1L;
        long max_col_offset = Math.min(mcIn.getCols(), col_offset + (long)mcIn.getColsPerBlock()) - 1L;
        if (rowwise) {
            if (mcIn.getCols() == 1L) {
                MatrixIndexes first = LibMatrixReorg.computeResultBlockIndex(new MatrixIndexes(), row_offset, 0L, mcIn, mcOut, rowwise);
                MatrixIndexes last = LibMatrixReorg.computeResultBlockIndex(new MatrixIndexes(), max_row_offset, 0L, mcIn, mcOut, rowwise);
                LibMatrixReorg.createRowwiseIndexes(first, last, mcOut.getNumColBlocks(), ret);
            } else if (in.getNonZeros() < (long)in.getNumRows() && !outputEmpty) {
                LibMatrixReorg.createNonZeroIndexes(mcIn, mcOut, in, row_offset, col_offset, rowwise, ret);
            } else {
                for (long i = row_offset; i < max_row_offset + 1L; ++i) {
                    MatrixIndexes first = LibMatrixReorg.computeResultBlockIndex(new MatrixIndexes(), i, col_offset, mcIn, mcOut, rowwise);
                    MatrixIndexes last = LibMatrixReorg.computeResultBlockIndex(new MatrixIndexes(), i, max_col_offset, mcIn, mcOut, rowwise);
                    LibMatrixReorg.createRowwiseIndexes(first, last, mcOut.getNumColBlocks(), ret);
                }
            }
        } else if (mcIn.getRows() == 1L) {
            MatrixIndexes first = LibMatrixReorg.computeResultBlockIndex(new MatrixIndexes(), 0L, col_offset, mcIn, mcOut, rowwise);
            MatrixIndexes last = LibMatrixReorg.computeResultBlockIndex(new MatrixIndexes(), 0L, max_col_offset, mcIn, mcOut, rowwise);
            LibMatrixReorg.createColwiseIndexes(first, last, mcOut.getNumRowBlocks(), ret);
        } else if (in.getNonZeros() < (long)in.getNumColumns() && !outputEmpty) {
            LibMatrixReorg.createNonZeroIndexes(mcIn, mcOut, in, row_offset, col_offset, rowwise, ret);
        } else {
            for (long j = col_offset; j < max_col_offset + 1L; ++j) {
                MatrixIndexes first = LibMatrixReorg.computeResultBlockIndex(new MatrixIndexes(), row_offset, j, mcIn, mcOut, rowwise);
                MatrixIndexes last = LibMatrixReorg.computeResultBlockIndex(new MatrixIndexes(), max_row_offset, j, mcIn, mcOut, rowwise);
                LibMatrixReorg.createColwiseIndexes(first, last, mcOut.getNumRowBlocks(), ret);
            }
        }
        return ret;
    }

    private static void createRowwiseIndexes(MatrixIndexes first, MatrixIndexes last, long ncblks, HashSet<MatrixIndexes> ret) {
        if (first.getRowIndex() <= 0L || first.getColumnIndex() <= 0L) {
            throw new RuntimeException("Invalid computed first index: " + first.toString());
        }
        if (last.getRowIndex() <= 0L || last.getColumnIndex() <= 0L) {
            throw new RuntimeException("Invalid computed last index: " + last.toString());
        }
        ret.add(first);
        if (!first.equals(last)) {
            boolean fill = first.getRowIndex() == last.getRowIndex() && first.getColumnIndex() > last.getColumnIndex();
            for (long k1 = first.getRowIndex(); k1 <= last.getRowIndex(); ++k1) {
                long k2_start = k1 == first.getRowIndex() ? first.getColumnIndex() + 1L : 1L;
                long k2_end = k1 == last.getRowIndex() && !fill ? last.getColumnIndex() - 1L : ncblks;
                for (long k2 = k2_start; k2 <= k2_end; ++k2) {
                    ret.add(new MatrixIndexes(k1, k2));
                }
            }
            ret.add(last);
        }
    }

    private static void createColwiseIndexes(MatrixIndexes first, MatrixIndexes last, long nrblks, HashSet<MatrixIndexes> ret) {
        if (first.getRowIndex() <= 0L || first.getColumnIndex() <= 0L) {
            throw new RuntimeException("Invalid computed first index: " + first.toString());
        }
        if (last.getRowIndex() <= 0L || last.getColumnIndex() <= 0L) {
            throw new RuntimeException("Invalid computed last index: " + last.toString());
        }
        ret.add(first);
        if (!first.equals(last)) {
            boolean fill = first.getColumnIndex() == last.getColumnIndex() && first.getRowIndex() > last.getRowIndex();
            for (long k1 = first.getColumnIndex(); k1 <= last.getColumnIndex(); ++k1) {
                long k2_start = k1 == first.getColumnIndex() ? first.getRowIndex() + 1L : 1L;
                long k2_end = k1 == last.getColumnIndex() && !fill ? last.getRowIndex() - 1L : nrblks;
                for (long k2 = k2_start; k2 <= k2_end; ++k2) {
                    ret.add(new MatrixIndexes(k2, k1));
                }
            }
            ret.add(last);
        }
    }

    private static void createNonZeroIndexes(MatrixCharacteristics mcIn, MatrixCharacteristics mcOut, MatrixBlock in, long row_offset, long col_offset, boolean rowwise, HashSet<MatrixIndexes> ret) {
        Iterator<IJV> iter = in.getSparseBlockIterator();
        while (iter.hasNext()) {
            IJV cell = iter.next();
            ret.add(LibMatrixReorg.computeResultBlockIndex(new MatrixIndexes(), row_offset + (long)cell.getI(), col_offset + (long)cell.getJ(), mcIn, mcOut, rowwise));
        }
    }

    private static Map<MatrixIndexes, MatrixBlock> createAllResultBlocks(Collection<MatrixIndexes> rix, long nnz, MatrixCharacteristics mcOut) {
        return rix.stream().collect(Collectors.toMap(ix -> ix, ix -> LibMatrixReorg.createResultBlock(ix, nnz, rix.size(), mcOut)));
    }

    private static MatrixBlock createResultBlock(MatrixIndexes ix, long nnz, int nBlocks, MatrixCharacteristics mcOut) {
        long bi = ix.getRowIndex();
        long bj = ix.getColumnIndex();
        int lbrlen = UtilFunctions.computeBlockSize(mcOut.getRows(), bi, mcOut.getRowsPerBlock());
        int lbclen = UtilFunctions.computeBlockSize(mcOut.getCols(), bj, mcOut.getColsPerBlock());
        if (lbrlen < 1 || lbclen < 1) {
            throw new DMLRuntimeException("Computed block dimensions (" + bi + "," + bj + " -> " + lbrlen + "," + lbclen + ") are invalid!");
        }
        int estnnz = (int)(nnz / (long)nBlocks);
        boolean sparse = MatrixBlock.evalSparseFormatInMemory(lbrlen, lbclen, estnnz);
        return new MatrixBlock(lbrlen, lbclen, sparse, estnnz);
    }

    private static void reshapeDense(MatrixBlock in, long row_offset, long col_offset, Map<MatrixIndexes, MatrixBlock> rix, MatrixCharacteristics mcIn, MatrixCharacteristics mcOut, boolean rowwise) {
        if (in.isEmptyBlock(false)) {
            return;
        }
        int rlen = in.rlen;
        int clen = in.clen;
        double[] a = in.getDenseBlockValues();
        MatrixIndexes ixtmp = new MatrixIndexes();
        int i = 0;
        int aix = 0;
        while (i < rlen) {
            long ai = row_offset + (long)i;
            for (int j = 0; j < clen; ++j) {
                double val = a[aix + j];
                if (val == 0.0) continue;
                long aj = col_offset + (long)j;
                MatrixBlock out = rix.get(ixtmp = LibMatrixReorg.computeResultBlockIndex(ixtmp, ai, aj, mcIn, mcOut, rowwise));
                if (out == null) {
                    throw new DMLRuntimeException("Missing result block: " + ixtmp);
                }
                ixtmp = LibMatrixReorg.computeInBlockIndex(ixtmp, ai, aj, mcIn, mcOut, rowwise);
                out.appendValue((int)ixtmp.getRowIndex(), (int)ixtmp.getColumnIndex(), val);
            }
            ++i;
            aix += clen;
        }
        if (!rowwise && mcIn.getRows() > 1L) {
            rix.values().stream().filter(b -> b.sparse).forEach(b -> b.sortSparseRows());
        }
    }

    private static void reshapeSparse(MatrixBlock in, long row_offset, long col_offset, Map<MatrixIndexes, MatrixBlock> rix, MatrixCharacteristics mcIn, MatrixCharacteristics mcOut, boolean rowwise) {
        if (in.isEmptyBlock(false)) {
            return;
        }
        int rlen = in.rlen;
        SparseBlock a = in.sparseBlock;
        MatrixIndexes ixtmp = new MatrixIndexes();
        for (int i = 0; i < rlen; ++i) {
            if (a.isEmpty(i)) continue;
            long ai = row_offset + (long)i;
            int apos = a.pos(i);
            int alen = a.size(i);
            int[] aix = a.indexes(i);
            double[] avals = a.values(i);
            for (int j = apos; j < apos + alen; ++j) {
                long aj = col_offset + (long)aix[j];
                ixtmp = LibMatrixReorg.computeResultBlockIndex(ixtmp, ai, aj, mcIn, mcOut, rowwise);
                MatrixBlock out = LibMatrixReorg.getAllocatedBlock(rix, ixtmp);
                ixtmp = LibMatrixReorg.computeInBlockIndex(ixtmp, ai, aj, mcIn, mcOut, rowwise);
                out.appendValue((int)ixtmp.getRowIndex(), (int)ixtmp.getColumnIndex(), avals[j]);
            }
        }
        if (!rowwise && mcIn.getRows() > 1L) {
            rix.values().stream().filter(b -> b.sparse).forEach(b -> b.sortSparseRows());
        }
    }

    private static MatrixBlock getAllocatedBlock(Map<MatrixIndexes, MatrixBlock> rix, MatrixIndexes ix) {
        MatrixBlock out = rix.get(ix);
        if (out == null) {
            throw new DMLRuntimeException("Missing result block: " + ix);
        }
        return out;
    }

    private static MatrixIndexes computeResultBlockIndex(MatrixIndexes ixout, long ai, long aj, MatrixCharacteristics mcIn, MatrixCharacteristics mcOut, boolean rowwise) {
        long tempc = LibMatrixReorg.computeGlobalCellIndex(mcIn, ai, aj, rowwise);
        long ci = rowwise ? tempc / mcOut.getCols() : tempc % mcOut.getRows();
        long cj = rowwise ? tempc % mcOut.getCols() : tempc / mcOut.getRows();
        long bci = ci / (long)mcOut.getRowsPerBlock() + 1L;
        long bcj = cj / (long)mcOut.getColsPerBlock() + 1L;
        return ixout.setIndexes(bci, bcj);
    }

    private static MatrixIndexes computeInBlockIndex(MatrixIndexes ixout, long ai, long aj, MatrixCharacteristics mcIn, MatrixCharacteristics mcOut, boolean rowwise) {
        long tempc = LibMatrixReorg.computeGlobalCellIndex(mcIn, ai, aj, rowwise);
        long ci = rowwise ? tempc / mcOut.getCols() % (long)mcOut.getRowsPerBlock() : tempc % mcOut.getRows() % (long)mcOut.getRowsPerBlock();
        long cj = rowwise ? tempc % mcOut.getCols() % (long)mcOut.getColsPerBlock() : tempc / mcOut.getRows() % (long)mcOut.getColsPerBlock();
        return ixout.setIndexes(ci, cj);
    }

    private static long computeGlobalCellIndex(MatrixCharacteristics mcIn, long ai, long aj, boolean rowwise) {
        return rowwise ? ai * mcIn.getCols() + aj : ai + mcIn.getRows() * aj;
    }

    private static MatrixBlock removeEmptyRows(MatrixBlock in, MatrixBlock ret, MatrixBlock select, boolean emptyReturn) {
        int i;
        int i2;
        int m = in.rlen;
        int n = in.clen;
        boolean[] flags = null;
        int rlen2 = 0;
        if (in.sparse && !in.isEmptyBlock(false) && select == null && in.sparseBlock instanceof SparseBlockCSR && in.nonZeros < Integer.MAX_VALUE) {
            SparseBlockCSR sblock = (SparseBlockCSR)in.sparseBlock;
            int lrlen = 0;
            for (i2 = 0; i2 < m; ++i2) {
                lrlen += sblock.isEmpty(i2) ? 0 : 1;
            }
            if (MatrixBlock.evalSparseFormatInMemory(lrlen, n, in.nonZeros)) {
                int[] rptr = new int[lrlen + 1];
                int j = 0;
                int pos = 0;
                for (int i3 = 0; i3 < m; ++i3) {
                    if (sblock.isEmpty(i3)) continue;
                    rptr[++j] = pos += sblock.size(i3);
                }
                ret.reset(lrlen, in.clen, true);
                ret.sparseBlock = new SparseBlockCSR(rptr, sblock.indexes(), sblock.values(), (int)in.nonZeros);
                ret.nonZeros = in.nonZeros;
                return ret;
            }
        }
        if (select == null) {
            Serializable a;
            flags = new boolean[m];
            if (in.sparse) {
                a = in.sparseBlock;
                for (i = 0; i < m; ++i) {
                    flags[i] = !((SparseBlock)a).isEmpty(i);
                    rlen2 += flags[i] ? 1 : 0;
                }
            } else {
                a = in.getDenseBlock();
                block3: for (i = 0; i < m; ++i) {
                    double[] avals = ((DenseBlock)a).values(i);
                    int aix = ((DenseBlock)a).pos(i);
                    for (int j = 0; j < n; ++j) {
                        if (avals[aix + j] == 0.0) continue;
                        flags[i] = true;
                        ++rlen2;
                        continue block3;
                    }
                }
            }
        } else {
            flags = DataConverter.convertToBooleanVector(select);
            rlen2 = (int)select.getNonZeros();
        }
        rlen2 = Math.max(rlen2, emptyReturn ? 1 : 0);
        boolean sp = MatrixBlock.evalSparseFormatInMemory(rlen2, n, in.nonZeros);
        ret.reset(rlen2, n, sp);
        if (in.isEmptyBlock(false)) {
            return ret;
        }
        if (m == rlen2) {
            ret.sparse = in.sparse;
            if (ret.sparse) {
                ret.sparseBlock = in.sparseBlock;
            } else {
                ret.denseBlock = in.denseBlock;
            }
        } else if (in.sparse) {
            int cix = 0;
            for (i = 0; i < m; ++i) {
                if (!flags[i]) continue;
                ret.appendRow(cix++, in.sparseBlock.get(i), false);
            }
        } else if (!in.sparse && !ret.sparse) {
            ret.allocateDenseBlock();
            DenseBlock a = in.getDenseBlock();
            DenseBlock c = ret.getDenseBlock();
            int ci = 0;
            for (int i4 = 0; i4 < m; ++i4) {
                if (!flags[i4]) continue;
                System.arraycopy(a.values(i4), a.pos(i4), c.values(ci), c.pos(ci), n);
                ++ci;
            }
        } else {
            ret.allocateSparseRowsBlock();
            DenseBlock a = in.getDenseBlock();
            int ci = 0;
            for (i2 = 0; i2 < m; ++i2) {
                if (!flags[i2]) continue;
                double[] avals = a.values(i2);
                int aix = a.pos(i2);
                for (int j = 0; j < n; ++j) {
                    ret.appendValue(ci, j, avals[aix + j]);
                }
                ++ci;
            }
        }
        ret.nonZeros = select == null ? in.nonZeros : ret.recomputeNonZeros();
        ret.examSparsity();
        return ret;
    }

    private static MatrixBlock removeEmptyColumns(MatrixBlock in, MatrixBlock ret, MatrixBlock select, boolean emptyReturn) {
        int m = in.rlen;
        int n = in.clen;
        boolean[] flags = null;
        if (select == null) {
            int i;
            Serializable a;
            flags = new boolean[n];
            if (in.sparse) {
                a = in.sparseBlock;
                for (i = 0; i < m; ++i) {
                    if (((SparseBlock)a).isEmpty(i)) continue;
                    int apos = ((SparseBlock)a).pos(i);
                    int alen = ((SparseBlock)a).size(i);
                    int[] aix = ((SparseBlock)a).indexes(i);
                    for (int j = apos; j < apos + alen; ++j) {
                        flags[aix[j]] = true;
                    }
                }
            } else {
                a = in.getDenseBlock();
                for (i = 0; i < m; ++i) {
                    double[] avals = ((DenseBlock)a).values(i);
                    int aix = ((DenseBlock)a).pos(i);
                    for (int j = 0; j < n; ++j) {
                        int n2 = j;
                        flags[n2] = flags[n2] | avals[aix + j] != 0.0;
                    }
                }
            }
        } else {
            flags = DataConverter.convertToBooleanVector(select);
        }
        int clen2 = 0;
        for (int j = 0; j < n; ++j) {
            clen2 += flags[j] ? 1 : 0;
        }
        clen2 = Math.max(clen2, emptyReturn ? 1 : 0);
        boolean sp = MatrixBlock.evalSparseFormatInMemory(m, clen2, in.nonZeros);
        ret.reset(m, clen2, sp);
        if (in.isEmptyBlock(false)) {
            return ret;
        }
        if (n == clen2) {
            ret.sparse = in.sparse;
            if (ret.sparse) {
                ret.sparseBlock = in.sparseBlock;
            } else {
                ret.denseBlock = in.denseBlock;
            }
        } else {
            int i;
            int[] cix = new int[n];
            int pos = 0;
            for (int j = 0; j < n; ++j) {
                if (!flags[j]) continue;
                cix[j] = pos++;
            }
            if (in.sparse) {
                SparseBlock a = in.sparseBlock;
                for (i = 0; i < m; ++i) {
                    if (a.isEmpty(i)) continue;
                    int apos = a.pos(i);
                    int alen = a.size(i);
                    int[] aix = a.indexes(i);
                    double[] avals = a.values(i);
                    for (int j = apos; j < apos + alen; ++j) {
                        if (!flags[aix[j]]) continue;
                        ret.appendValue(i, cix[aix[j]], avals[j]);
                    }
                }
            } else if (!in.sparse && !ret.sparse) {
                ret.allocateDenseBlock();
                DenseBlock a = in.getDenseBlock();
                DenseBlock c = ret.getDenseBlock();
                for (int i2 = 0; i2 < m; ++i2) {
                    double[] avals = a.values(i2);
                    double[] cvals = c.values(i2);
                    int aix = a.pos(i2);
                    int lcix = c.pos(i2);
                    for (int j = 0; j < n; ++j) {
                        if (!flags[j]) continue;
                        cvals[lcix + cix[j]] = avals[aix + j];
                    }
                }
            } else {
                ret.allocateSparseRowsBlock();
                DenseBlock a = in.getDenseBlock();
                for (i = 0; i < m; ++i) {
                    double[] avals = a.values(i);
                    int aix = a.pos(i);
                    for (int j = 0; j < n; ++j) {
                        double aval = avals[aix + j];
                        if (!flags[j] || aval == 0.0) continue;
                        ret.appendValue(i, cix[j], aval);
                    }
                }
            }
        }
        ret.nonZeros = select == null ? in.nonZeros : ret.recomputeNonZeros();
        ret.examSparsity();
        return ret;
    }

    private static MatrixBlock rexpandRows(MatrixBlock in, MatrixBlock ret, int max, boolean cast, boolean ignore) {
        int rlen = max;
        int clen = in.rlen;
        long nnz = in.nonZeros;
        boolean sp = MatrixBlock.evalSparseFormatInMemory(rlen, clen, nnz);
        ret.reset(rlen, clen, sp);
        int blksize = 0x100000;
        int[] tmpi = new int[Math.min(0x100000, clen)];
        double[] tmp = new double[Math.min(0x100000, clen)];
        for (int i = 0; i < clen; i += 0x100000) {
            int len = Math.min(0x100000, clen - i);
            LibMatrixReorg.copyColVector(in, i, tmp, tmpi, len);
            SortUtils.sortByValue(0, len, tmp, tmpi);
            for (int j = 0; j < len; ++j) {
                double val = tmp[j];
                if (cast) {
                    val = UtilFunctions.toLong(val);
                }
                if (!ignore && val <= 0.0) {
                    throw new DMLRuntimeException("Invalid input value <= 0 for ignore=false: " + val);
                }
                if (val != Math.floor(val) || !(val >= 1.0) || !(val <= (double)max)) continue;
                ret.appendValue((int)(val - 1.0), i + tmpi[j], 1.0);
            }
        }
        if (ret.isInSparseFormat()) {
            ret.sortSparseRows();
        }
        return ret;
    }

    private static MatrixBlock rexpandColumns(MatrixBlock in, MatrixBlock ret, int max, boolean cast, boolean ignore, int k) {
        int rlen = in.rlen;
        int clen = max;
        long nnz = in.nonZeros;
        boolean sp = MatrixBlock.evalSparseFormatInMemory(rlen, clen, nnz);
        ret.reset(rlen, clen, sp);
        ret.allocateBlock();
        long rnnz = 0L;
        if (k <= 1 || (long)in.getNumRows() <= 0x100000L || sp) {
            rnnz = LibMatrixReorg.rexpandColumns(in, ret, max, cast, ignore, 0, rlen);
        } else {
            try {
                ExecutorService pool = CommonThreadPool.get(k);
                ArrayList<RExpandColsTask> tasks = new ArrayList<RExpandColsTask>();
                int blklen = (int)Math.ceil((double)rlen / (double)k / 8.0);
                int i = 0;
                while (i < 8 * k & i * blklen < rlen) {
                    tasks.add(new RExpandColsTask(in, ret, max, cast, ignore, i * blklen, Math.min((i + 1) * blklen, rlen)));
                    ++i;
                }
                List taskret = pool.invokeAll(tasks);
                pool.shutdown();
                for (Future task : taskret) {
                    rnnz += ((Long)task.get()).longValue();
                }
            }
            catch (Exception ex) {
                throw new DMLRuntimeException(ex);
            }
        }
        ret.setNonZeros(rnnz);
        return ret;
    }

    private static long rexpandColumns(MatrixBlock in, MatrixBlock ret, int max, boolean cast, boolean ignore, int rl, int ru) {
        int lnnz = 0;
        int[] cix = null;
        if (ret.sparse) {
            cix = new int[in.rlen];
            Arrays.fill(cix, -1);
        }
        DenseBlock cd = ret.getDenseBlock();
        SparseBlock cs = ret.getSparseBlock();
        for (int i = rl; i < ru; ++i) {
            double val = in.quickGetValue(i, 0);
            if (cast) {
                val = UtilFunctions.toLong(val);
            }
            if (!ignore && val <= 0.0) {
                throw new DMLRuntimeException("Invalid input value <= 0 for ignore=false: " + val);
            }
            if (val != Math.floor(val) || !(val >= 1.0) || !(val <= (double)max)) continue;
            if (cix != null) {
                cix[i] = (int)(val - 1.0);
            } else if (ret.sparse) {
                cs.allocate(i, 1);
                cs.append(i, (int)(val - 1.0), 1.0);
            } else {
                cd.set(i, (int)(val - 1.0), 1.0);
            }
            ++lnnz;
        }
        if (cix != null) {
            ret.sparseBlock = new SparseBlockCSR(in.rlen, lnnz, cix);
        }
        return ret.setNonZeros(lnnz);
    }

    private static void copyColVector(MatrixBlock in, int ixin, double[] tmp, int[] tmpi, int len) {
        int i;
        if (in.isEmptyBlock(false)) {
            Arrays.fill(tmp, 0, len, 0.0);
        } else if (in.sparse) {
            for (i = 0; i < len; ++i) {
                tmp[i] = in.quickGetValue(ixin + i, 0);
            }
        } else {
            System.arraycopy(in.getDenseBlockValues(), ixin, tmp, 0, len);
        }
        for (i = 0; i < len; ++i) {
            tmpi[i] = ixin + i;
        }
    }

    private static void sortReverseDense(MatrixBlock m1) {
        int rlen = m1.rlen;
        double[] a = m1.getDenseBlockValues();
        for (int i = 0; i < rlen / 2; ++i) {
            double tmp = a[i];
            a[i] = a[rlen - i - 1];
            a[rlen - i - 1] = tmp;
        }
    }

    private static void sortReverseDense(int[] a) {
        int rlen = a.length;
        for (int i = 0; i < rlen / 2; ++i) {
            int tmp = a[i];
            a[i] = a[rlen - i - 1];
            a[rlen - i - 1] = tmp;
        }
    }

    private static void sortReverseDense(double[] a) {
        int rlen = a.length;
        for (int i = 0; i < rlen / 2; ++i) {
            double tmp = a[i];
            a[i] = a[rlen - i - 1];
            a[rlen - i - 1] = tmp;
        }
    }

    private static void sortBySecondary(int rl, int ru, double[] values, int[] vix, MatrixBlock in, int[] by, int off) {
        for (int i = rl; i < ru - 1; ++i) {
            double tmp = values[i];
            int len = 0;
            while (i + len + 1 < ru && tmp == values[i + len + 1]) {
                ++len;
            }
            if (len <= 0) continue;
            double old = values[i];
            for (int j = i; j < i + len + 1; ++j) {
                values[j] = in.quickGetValue(vix[j], by[off] - 1);
            }
            SortUtils.sortByValue(i, i + len + 1, values, vix);
            if (off + 1 < by.length) {
                LibMatrixReorg.sortBySecondary(i, i + len + 1, values, vix, in, by, off + 1);
            }
            Arrays.fill(values, i, i + len + 1, old);
            i += len;
        }
    }

    private static void sortIndexesStable(int rl, int ru, double[] values, int[] vix, MatrixBlock in, int[] by, int off) {
        for (int i = rl; i < ru - 1; ++i) {
            double tmp = values[i];
            int len = 0;
            while (i + len + 1 < ru && tmp == values[i + len + 1]) {
                ++len;
            }
            if (len <= 0) continue;
            if (off < by.length) {
                for (int j = i; j < i + len + 1; ++j) {
                    values[j] = in.quickGetValue(vix[j], by[off] - 1);
                }
                LibMatrixReorg.sortIndexesStable(i, i + len + 1, values, vix, in, by, off + 1);
            } else {
                Arrays.sort(vix, i, i + len + 1);
            }
            i += len;
        }
    }

    private static boolean isValidSortByList(int[] by, int clen) {
        if (by == null || by.length == 0 || by.length > clen) {
            return false;
        }
        for (int i = 0; i < by.length; ++i) {
            if (by[i] > 0 && clen >= by[i]) continue;
            return false;
        }
        return true;
    }

    private static void countAgg(int[] c, int[] ai, int len) {
        int i;
        int bn = len % 8;
        for (i = 0; i < bn; ++i) {
            int n = ai[i];
            c[n] = c[n] + 1;
        }
        for (i = bn; i < len; i += 8) {
            int n = ai[i + 0];
            c[n] = c[n] + 1;
            int n2 = ai[i + 1];
            c[n2] = c[n2] + 1;
            int n3 = ai[i + 2];
            c[n3] = c[n3] + 1;
            int n4 = ai[i + 3];
            c[n4] = c[n4] + 1;
            int n5 = ai[i + 4];
            c[n5] = c[n5] + 1;
            int n6 = ai[i + 5];
            c[n6] = c[n6] + 1;
            int n7 = ai[i + 6];
            c[n7] = c[n7] + 1;
            int n8 = ai[i + 7];
            c[n8] = c[n8] + 1;
        }
    }

    private static void countAgg(int[] c, int[] aix, int ai, int len) {
        int i;
        int bn = len % 8;
        for (i = ai; i < ai + bn; ++i) {
            int n = aix[i];
            c[n] = c[n] + 1;
        }
        for (i = ai + bn; i < ai + len; i += 8) {
            int n = aix[i + 0];
            c[n] = c[n] + 1;
            int n2 = aix[i + 1];
            c[n2] = c[n2] + 1;
            int n3 = aix[i + 2];
            c[n3] = c[n3] + 1;
            int n4 = aix[i + 3];
            c[n4] = c[n4] + 1;
            int n5 = aix[i + 4];
            c[n5] = c[n5] + 1;
            int n6 = aix[i + 5];
            c[n6] = c[n6] + 1;
            int n7 = aix[i + 6];
            c[n7] = c[n7] + 1;
            int n8 = aix[i + 7];
            c[n8] = c[n8] + 1;
        }
    }

    private static class RExpandColsTask
    implements Callable<Long> {
        private final MatrixBlock _in;
        private final MatrixBlock _out;
        private final int _max;
        private final boolean _cast;
        private final boolean _ignore;
        private final int _rl;
        private final int _ru;

        protected RExpandColsTask(MatrixBlock in, MatrixBlock out, int max, boolean cast, boolean ignore, int rl, int ru) {
            this._in = in;
            this._out = out;
            this._max = max;
            this._cast = cast;
            this._ignore = ignore;
            this._rl = rl;
            this._ru = ru;
        }

        @Override
        public Long call() {
            return LibMatrixReorg.rexpandColumns(this._in, this._out, this._max, this._cast, this._ignore, this._rl, this._ru);
        }
    }

    private static class CountNnzTask
    implements Callable<int[]> {
        private MatrixBlock _in = null;
        private int _rl = -1;
        private int _ru = -1;

        protected CountNnzTask(MatrixBlock in, int rl, int ru) {
            this._in = in;
            this._rl = rl;
            this._ru = ru;
        }

        @Override
        public int[] call() {
            return LibMatrixReorg.countNnzPerColumn(this._in, this._rl, this._ru);
        }
    }

    private static class TransposeTask
    implements Callable<Object> {
        private MatrixBlock _in = null;
        private MatrixBlock _out = null;
        private boolean _row = false;
        private int _rl = -1;
        private int _ru = -1;
        private int[] _cnt = null;

        protected TransposeTask(MatrixBlock in, MatrixBlock out, boolean row, int rl, int ru, int[] cnt) {
            this._in = in;
            this._out = out;
            this._row = row;
            this._rl = rl;
            this._ru = ru;
            this._cnt = cnt;
        }

        @Override
        public Object call() {
            int cu;
            int rl = this._row ? this._rl : 0;
            int ru = this._row ? this._ru : this._in.rlen;
            int cl = this._row ? 0 : this._rl;
            int n = cu = this._row ? this._in.clen : this._ru;
            if (!this._in.sparse && !this._out.sparse) {
                LibMatrixReorg.transposeDenseToDense(this._in, this._out, rl, ru, cl, cu);
            } else if (this._in.sparse && this._out.sparse) {
                LibMatrixReorg.transposeSparseToSparse(this._in, this._out, rl, ru, cl, cu, this._cnt);
            } else if (this._in.sparse) {
                LibMatrixReorg.transposeSparseToDense(this._in, this._out, rl, ru, cl, cu);
            } else {
                throw new DMLRuntimeException("Unsupported multi-threaded dense-sparse transpose.");
            }
            return null;
        }
    }

    private static class DescRowComparator
    implements Comparator<Integer> {
        private MatrixBlock _mb = null;
        private int _col = -1;

        public DescRowComparator(MatrixBlock mb, int col) {
            this._mb = mb;
            this._col = col;
        }

        @Override
        public int compare(Integer arg0, Integer arg1) {
            double val1;
            double val0 = this._mb.quickGetValue(arg0, this._col);
            return val0 > (val1 = this._mb.quickGetValue(arg1, this._col)) ? -1 : (val0 == val1 ? 0 : 1);
        }
    }

    private static class AscRowComparator
    implements Comparator<Integer> {
        private MatrixBlock _mb = null;
        private int _col = -1;

        public AscRowComparator(MatrixBlock mb, int col) {
            this._mb = mb;
            this._col = col;
        }

        @Override
        public int compare(Integer arg0, Integer arg1) {
            double val1;
            double val0 = this._mb.quickGetValue(arg0, this._col);
            return val0 < (val1 = this._mb.quickGetValue(arg1, this._col)) ? -1 : (val0 == val1 ? 0 : 1);
        }
    }

    private static enum ReorgType {
        TRANSPOSE,
        REV,
        DIAG,
        RESHAPE,
        SORT,
        INVALID;

    }
}

