/* ======================== piro_band_blocksize.c =========================== */

/* -----------------------------------------------------------------------------
 * PIRO_BAND.  Version 0.9.
 * Copyright (C) 2009, Sivasankaran Rajamanickam, Timothy A. Davis.
 * PIRO_BAND is licensed under Version 2.0 of the GNU
 * General Public License.  See gpl.txt for a text of the license.
 * PIRO_BAND is also available under other licenses; contact authors for 
 * details. http://www.cise.ufl.edu/research/sparse
 * -------------------------------------------------------------------------- */

/* Function to detemine the recommended blocksize for a given banded matrix
 * of size mxn, a lower bandwidth of bl and upper bandwidth of bu.
 * blk should be atleast of size 4.
 * Output is in blk where * blk[0] and blk[1]  is the #columns and #rows in the 
 * block for the reducing the upper bandwidth. blk[2] and blk[2] is the #rows 
 * and #columns in the block for reducing the lower bandwidth.
 * */

#include "piro_band_util.h"
/*#include "piro_band_internal.h"*/
#include "piro_band_blocksize.h"

int PIRO_BAND_LONG_NAME(get_blocksize)
(
    Int m,              /* #rows in the input matrix */
    Int n,              /* #columns in the input matrix */
    Int bl,             /* lower bandwidth of the input matrix */
    Int bu,             /* upper bandwidth of the input matrix */
    Int wantuv,         /* flag : 0/1 whether U or V is needed */
    Int *blk            /* Output, recommended block size. Shoule be atleast of
                         * size 4. */
)
{
    int info ;
    double minmn, d_bw, fl ;

    info = 0 ;
    if (m < 0)
        info = -1 ;
    else if (n < 0)
        info = -2 ;
    else if (bl < 0)
        info = -3 ;
    else if (bu < 1)
        info = -4 ;
    else if (wantuv < 0)
        info = -5 ;
    else if (blk == NULL)
        info = -6 ;

    if (info < 0) return info ;

    blk[0] = 0 ;
    blk[1] = 0 ;
    blk[2] = 0 ;
    blk[3] = 0 ;

    d_bw = bl + bu ;
    minmn = (m <= n ? m : n) ;
    /* Rough estimate of the flops cutoff point that is to choose larger 
     * block size. */
    fl = 6 * d_bw * minmn * minmn ;


    /* Find the block size for the upper band */
    if (bu > 1)
    {
        if (wantuv || bu < 16)
        {
            /* Entire row is the block */
            blk[0] = bu-1 ;
            blk[1] = 1 ;
        }
        else
        {
            if (fl > 1e+10 && bu >= 64)
            {
                /* flops high. Use 32x32. We can use 64x64 in very high cases. 
                 * But 32x32 is not very bad in those cases too.
                 * */
                blk[0] = 32 ;
                blk[1] = 32 ;
            }
            else
            {
                /* If flops is less than cutoff or bu is small to use 32, use
                 * 8x8. We can use 16x16 in some cases. But 8x8 is not very bad.
                 * We can use higher cutoff for unsymmetric but the current 
                 * cutoff is not bad in those cases too.
                 * */
                blk[0] = 8 ;
                blk[1] = 8 ;
            }
        }
    }
    else
    {
        blk[0] = 0 ;
        blk[1] = 0 ;
    }

    /* Find the block size for the lower band */
    if (bl > 1)
    {
        if (wantuv || bl < 16)
        {
            /* Entire row is the block */
            blk[2] = bl ;
            blk[3] = 1 ;
        }
        else
        {
            if (fl > 1e+10 && bl >= 64)
            {
                /* flops high. Use 32x32. We can use 64x64 in very high cases. 
                 * But 32x32 is not very bad in those cases too.
                 * */
                blk[2] = 32 ;
                blk[3] = 32 ;
            }
            else
            {
                /* If flops is less than cutoff or bl is small to use 32, use
                 * 8x8. We can use 16x16 in some cases. But 8x8 is not very bad.
                 * We can use higher cutoff for unsymmetric but the current 
                 * cutoff is not bad in those cases too.
                 * */
                blk[2] = 8 ;
                blk[3] = 8 ;
            }
        }
    }
    else
    {
        blk[2] = bl ;
        blk[3] = bl ;
    }

    return info ;
}

