/* --------------------------- piro_band_lapack.c --------------------------- */

/* -----------------------------------------------------------------------------
 * PIRO_BAND.  Version 0.9.
 * Copyright (C) 2009, Sivasankaran Rajamanickam, Timothy A. Davis.
 * PIRO_BAND is licensed under Version 2.0 of the GNU
 * General Public License.  See gpl.txt for a text of the license.
 * PIRO_BAND is also available under other licenses; contact authors for 
 * details. http://www.cise.ufl.edu/research/sparse
 * -------------------------------------------------------------------------- */

/* 
 * Replacement fucntions for LAPACK's band reduction. Prefix all LAPACK names 
 * with "piro_band_" to get the piro_band equivalent of it. 
 *
 * Eg : dgbbrd will piro_band_dgbbrd(). The arguments and the return values are
 * the same as in LAPACK. The long versions of LAPACK functions will have a 
 * suffix _l like piro_band_dgbbrd_l().
 *
 *
 * */

#include "piro_band_memory.h"
#include "piro_band_lapack.h"
#include "piro_band_lapack_internal.h"
#include "piro_band_util.h"            
#include "piro_band_blocksize.h"            

void PIRO_BAND_LONG_NAME(dgbbrd)
(
  char *vect,       /* vect[0] = 'B' | 'Q' | 'P' for both Q and PT | Q | PT  */
  Int m,            /* #rows in AB */
  Int n,            /* #columns in AB */
  Int ncc,          /* #columns in C */
  Int kl,           /* lower bandwidth */
  Int ku,           /* upper bandwidth */
  double *AB,       /* input matrix in packed band format */
  Int ldab,         /* leading dimension of AB */
  double *D,        /* output : main diagonal of the bidiagonal matrix */
  double *E,        /* output: super diagonal of the bidiagonal matrix */
  double *Q,        /* Accumulated left rotations */
  Int ldq,          /* leading dimension of Q */
  double *PT,       /* Accumulated right rotations */
  Int ldpt,         /* leading dimension of PT */
  double *C,        /* Accumulated left rotations */
  Int ldc,          /* leading dimension of C */
  double *work,     /* workspace : ignored now */
  Int *info         /* info = 0 for success and negative for failure */
)
{
    Int wantq, wantpt, wantb ;
    Int blk[4] ;
    double *dwork ;
    double *A, *newA, *newC ;

    newC = NULL ;
    wantb = (vect[0] == 'B') ;                  /* both Q and PT */
    wantq = ((vect[0] == 'Q') || wantb) ;       /* just Q */
    wantpt = ((vect[0] == 'P') || wantb) ;      /* just PT */

    if (!wantq && !wantpt && vect[0] != 'N')
    {
        *info = PIRO_BAND_VECT_INVALID ;
        return ;
    }

    if (ncc > 0)
    {
        /* Transpose C, and transpose back later. */
        newC = (double *) MALLOC(ncc * m * sizeof(double)) ;
        if (newC == NULL)
        {
            *info = PIRO_BAND_OUT_OF_MEMORY ;
            return ;
        }
        PIRO_BAND_LAPACK_NAME(general_transpose_dr)(m, ncc, C, ldc, newC, ncc ) ;
    }

    if (ku > 0)
    {
        A = AB ;
        newA = NULL ;
    }
    else if (ku == 0 && kl >= 0)
    {
        /* Add a upper bidiagonal with zeros to AB */
        newA = (double *) MALLOC(n * (kl + 2) * sizeof(double)) ;
        if (newA == NULL)
        {
            if (newC != NULL)
            {   
                FREE(newC) ;
            }
            *info = PIRO_BAND_OUT_OF_MEMORY ;
            return ;
        }
        PIRO_BAND_LAPACK_NAME(add_bidiag_dr) (n, AB, ldab, kl, newA) ;
        A = newA ;
        ku = 1 ;
        ldab = kl + 2 ;
    }
    /* Find block size */
    PIRO_BAND_LONG_NAME(get_blocksize)(m, n, kl, ku, 
                        (wantq || wantpt || ncc > 0), blk) ;

    /* Allocate work for the band reduction*/
    dwork = (double *) MALLOC(2 * (MAX( blk[0]*blk[1], blk[2]*blk[3] )) * 
                                    sizeof(double)) ;
    if (dwork == NULL && (kl != 0 || ku > 1))
    {
        *info = PIRO_BAND_OUT_OF_MEMORY ;
    }
    else
    {
        /* Reduce the matrix to bidiagonal */
        *info = PIRO_BAND_LAPACK_NAME(reduce_dr)(blk, m, n, ncc, kl, ku, A, ldab,
                    D, E, Q, ldq, PT, ldpt, newC, ncc, dwork, 0) ;

        if (wantpt)
        {
            /* Transpose PT as reduce finds P */
            PIRO_BAND_LAPACK_NAME(inplace_conjugate_transpose_dr)(n, PT, ldpt) ;
        }

        if (ncc > 0)
        {
            /* Transpose C back */
            PIRO_BAND_LAPACK_NAME(general_transpose_dr)(ncc, m, newC, ncc, C,
                    ldc) ;
        }
        FREE(dwork) ;
    }

    /* free work */
    if (newC != NULL)
    {   
        FREE(newC) ;
    }
    if (newA != NULL)
    {
        FREE(newA) ;
    }
    return ;
}

void PIRO_BAND_LONG_NAME(sgbbrd)
(
  char *vect,       /* vect[0] = 'B' | 'Q' | 'P' for both Q and PT | Q | PT  */
  Int m,            /* #rows in AB */
  Int n,            /* #columns in AB */
  Int ncc,          /* #columns in C */
  Int kl,           /* lower bandwidth */
  Int ku,           /* upper bandwidth */
  float *AB,        /* input matrix in packed band format */
  Int ldab,         /* leading dimension of AB */
  float *D,         /* output : main diagonal of the bidiagonal matrix */
  float *E,         /* output: super diagonal of the bidiagonal matrix */
  float *Q,         /* Accumulated left rotations */
  Int ldq,          /* leading dimension of Q */
  float *PT,        /* Accumulated right rotations */
  Int ldpt,         /* leading dimension of PT */
  float *C,         /* Accumulated left rotations */
  Int ldc,          /* leading dimension of C */
  float *work,      /* workspace : ignored now */
  Int *info         /* info = 0 for success and negative for failure */
)
{
    Int wantq, wantpt, wantb ;
    Int blk[4] ;
    float *dwork ;
    float *A, *newA, *newC ;

    newC = NULL ;
    wantb = (vect[0] == 'B') ;
    wantq = ((vect[0] == 'Q') || wantb) ;
    wantpt = ((vect[0] == 'P') || wantb) ;

    if (!wantq && !wantpt && vect[0] != 'N')
    {
        *info = PIRO_BAND_VECT_INVALID ;
        return ;
    }

    if (ncc > 0)
    {
        /* Transpose C, and transpose back later. */
        newC = (float *) MALLOC(ncc * m * sizeof(float)) ;
        if (newC == NULL)
        {
            *info = PIRO_BAND_OUT_OF_MEMORY ;
            return ;
        }
        PIRO_BAND_LAPACK_NAME(general_transpose_sr)(m, ncc, C, ldc, newC, ncc ) ;
    }

    if (ku > 0)
    {
        A = AB ;
        newA = NULL ;
    }
    else if (ku == 0 && kl >= 0)
    {
        /* Add a upper bidiagonal with zeros to AB */
        newA = (float *) MALLOC(n * (kl + 2) * sizeof(float)) ;
        if (newA == NULL)
        {
            *info = PIRO_BAND_OUT_OF_MEMORY ;
            if (newC != NULL)
            {   
                FREE(newC) ;
            }
            return ;
        }
        PIRO_BAND_LAPACK_NAME(add_bidiag_sr) (n, AB, ldab, kl, newA) ;
        A = newA ;
        ku = 1 ;
        ldab = kl + 2 ;
    }
    /* Find block size */
    PIRO_BAND_LONG_NAME(get_blocksize)(m, n, kl, ku, 
                        (wantq || wantpt || ncc > 0), blk) ;

    /* Allocate work for the band reduction*/
    dwork = (float *) MALLOC(2 * (MAX( blk[0]*blk[1], blk[2]*blk[3] )) * 
                                    sizeof(float)) ;
    if (dwork == NULL && (kl != 0 || ku > 1))
    {
        *info = PIRO_BAND_OUT_OF_MEMORY ;
    }
    else
    {

        /* Reduce the matrix to bidiagonal */
        *info = PIRO_BAND_LAPACK_NAME(reduce_sr)(blk, m, n, ncc, kl, ku, A, ldab,
                D, E, Q, ldq, PT, ldpt, newC, ncc, dwork, 0) ;

        if (wantpt)
        {
            /* Transpose PT as reduce finds P */
            PIRO_BAND_LAPACK_NAME(inplace_conjugate_transpose_sr)(n, PT, ldpt) ;
        }

        if (ncc > 0)
        {
            /* Transpose C back */
            PIRO_BAND_LAPACK_NAME(general_transpose_sr)(ncc, m, newC, ncc, C, 
                    ldc ) ;
        }
        FREE(dwork) ;
    }

    /* free work */
    if (newC != NULL)
    {   
        FREE(newC) ;
    }
    if (newA != NULL)
    {
        FREE(newA) ;
    }

    return ;
}

void PIRO_BAND_LONG_NAME(zgbbrd)
(
  char *vect,       /* vect[0] = 'B' | 'Q' | 'P' for both Q and PT | Q | PT  */
  Int m,            /* #rows in AB */
  Int n,            /* #columns in AB */
  Int ncc,          /* #columns in C */
  Int kl,           /* lower bandwidth */
  Int ku,           /* upper bandwidth */
  double *AB,       /* input matrix in packed band format */
  Int ldab,         /* leading dimension of AB */
  double *D,        /* output : main diagonal of the bidiagonal matrix */
  double *E,        /* output: super diagonal of the bidiagonal matrix */
  double *Q,        /* Accumulated left rotations */
  Int ldq,          /* leading dimension of Q */
  double *PT,       /* Accumulated right rotations */
  Int ldpt,         /* leading dimension of PT */
  double *C,        /* Accumulated left rotations */
  Int ldc,          /* leading dimension of C */
  double *work,     /* workspace : ignored now */
  Int *info         /* vect[0] = 'B' | 'Q' | 'P' for both Q and PT | Q | PT  */
)
{
    Int wantq, wantpt, wantb ;
    Int blk[4] ;
    double *dwork ;
    double *A, *newA, *newC ;

    newC = NULL ;
    wantb = (vect[0] == 'B') ;
    wantq = ((vect[0] == 'Q') || wantb) ;
    wantpt = ((vect[0] == 'P') || wantb) ;

    if (!wantq && !wantpt && vect[0] != 'N')
    {
        *info = PIRO_BAND_VECT_INVALID ;
        return ;
    }

    if (ncc > 0)
    {
        /* Transpose C, and transpose back later. */
        newC = (double *) MALLOC(2 * ncc * m * sizeof(double)) ;
        if (newC == NULL)
        {
            *info = PIRO_BAND_OUT_OF_MEMORY ;
            return ;
        }
        PIRO_BAND_LAPACK_NAME(general_transpose_dc)(m, ncc, C, ldc, newC, ncc ) ;
    }

    if (ku > 0)
    {
        A = AB ;
        newA = NULL ;
    }
    else if (ku == 0 && kl >= 0)
    {
        /* Add a upper bidiagonal with zeros to AB */
        newA = (double *) MALLOC(2 * n * (kl + 2) * 
                                            sizeof(double)) ;
        if (newA == NULL)
        {
            *info = PIRO_BAND_OUT_OF_MEMORY ;
            if (newC != NULL)
            {   
                FREE(newC) ;
            }
            return ;
        }
        PIRO_BAND_LAPACK_NAME(add_bidiag_dc) (n, AB, ldab, kl, newA) ;
        A = newA ;
        ku = 1 ;
        ldab = kl + 2 ;
    }
    /* Find block size */
    PIRO_BAND_LONG_NAME(get_blocksize)(m, n, kl, ku,
                        (wantq || wantpt || ncc > 0), blk) ;

    /* Allocate work for the band reduction*/
    dwork = (double *) MALLOC(2 * 2 * 
            (MAX( blk[0]*blk[1], blk[2]*blk[3] )) * sizeof(double)) ;
    if (dwork == NULL && (kl != 0 || ku > 1))
    {
        *info = PIRO_BAND_OUT_OF_MEMORY ;
    }
    else
    {

        /* Reduce the matrix to bidiagonal */
        *info = PIRO_BAND_LAPACK_NAME(reduce_dc)(blk, m, n, ncc, kl, ku, A, ldab, 
                D, E, Q, ldq, PT, ldpt, newC, ncc, dwork, 0) ;

        if (wantpt)
        {
            /* Transpose PT as reduce finds P */
            PIRO_BAND_LAPACK_NAME(inplace_conjugate_transpose_dc)(n, PT, ldpt) ;
        }

        if (ncc > 0)
        {
            /* Transpose C back */
            PIRO_BAND_LAPACK_NAME(general_transpose_dc)(ncc, m, newC, ncc, C, 
                    ldc ) ;
        }
        FREE(dwork) ;
    }

    /* free work */
    if (newC != NULL)
    {
        FREE(newC) ;
    }
    if (newA != NULL)
    {
        FREE(newA) ;
    }

    return ;
}

void PIRO_BAND_LONG_NAME(cgbbrd)
(
  char *vect,       /* vect[0] = 'B' | 'Q' | 'P' for both Q and PT | Q | PT  */
  Int m,            /* #rows in AB */
  Int n,            /* #columns in AB */
  Int ncc,          /* #columns in C */
  Int kl,           /* lower bandwidth */
  Int ku,           /* upper bandwidth */
  float *AB,        /* input matrix in packed band format */
  Int ldab,         /* leading dimension of AB */
  float *D,         /* output : main diagonal of the bidiagonal matrix */
  float *E,         /* output: super diagonal of the bidiagonal matrix */
  float *Q,         /* Accumulated left rotations */
  Int ldq,          /* leading dimension of Q */
  float *PT,        /* Accumulated right rotations */
  Int ldpt,         /* leading dimension of PT */
  float *C,         /* Accumulated left rotations */
  Int ldc,          /* leading dimension of C */
  float *work,      /* workspace : ignored now */
  Int *info         /* vect[0] = 'B' | 'Q' | 'P' for both Q and PT | Q | PT  */
)
{
    Int wantq, wantpt, wantb ;
    Int blk[4] ;
    float *dwork ;
    float *A, *newA, *newC ;

    newC = NULL ;
    wantb = (vect[0] == 'B') ;
    wantq = ((vect[0] == 'Q') || wantb) ;
    wantpt = ((vect[0] == 'P') || wantb) ;

    if (!wantq && !wantpt && vect[0] != 'N')
    {
        *info = PIRO_BAND_VECT_INVALID ;
        return ;
    }

    if (ncc > 0)
    {
        /* Transpose C, and transpose back later. */
        newC = (float *) MALLOC(2 * ncc * m * sizeof(float)) ;
        if (newC == NULL)
        {
            *info = PIRO_BAND_OUT_OF_MEMORY ;
            return ;
        }
        PIRO_BAND_LAPACK_NAME(general_transpose_sc)(m, ncc, C, ldc, newC, ncc ) ;
    }

    if (ku > 0)
    {
        A = AB ;
        newA = NULL ;
    }
    else if (ku == 0 && kl >= 0)
    {
        /* Add a upper bidiagonal with zeros to AB */
        newA = (float *) MALLOC(2 * n * (kl + 2) * 
                                            sizeof(float)) ;
        if (newA == NULL)
        {
            *info = PIRO_BAND_OUT_OF_MEMORY ;
            if (newC != NULL) 
            {
                FREE(newC) ;
            }
            return ;
        }
        PIRO_BAND_LAPACK_NAME(add_bidiag_sc) (n, AB, ldab, kl, newA) ;
        A = newA ;
        ku = 1 ;
        ldab = kl + 2 ;
    }
    /* Find block size */
    PIRO_BAND_LONG_NAME(get_blocksize)(m, n, kl, ku, 
                        (wantq || wantpt || ncc > 0), blk) ;

    /* Allocate work for the band reduction*/
    dwork = (float *) MALLOC(2 * 2 * 
            (MAX( blk[0]*blk[1], blk[2]*blk[3] )) * sizeof(float)) ;
    if (dwork == NULL && (kl != 0 || ku > 1))
    {
        *info = PIRO_BAND_OUT_OF_MEMORY ;
    }
    else
    {

        /* Reduce the matrix to bidiagonal */
        *info = PIRO_BAND_LAPACK_NAME(reduce_sc)(blk, m, n, ncc, kl, ku, A, ldab, 
                D, E, Q, ldq, PT, ldpt, newC, ncc, dwork, 0) ;

        if (wantpt)
        {
            /* Transpose PT as reduce finds P */
            PIRO_BAND_LAPACK_NAME(inplace_conjugate_transpose_sc)(n, PT, ldpt) ;
        }

        if (ncc > 0)
        {
            /* Transpose C back */
            PIRO_BAND_LAPACK_NAME(general_transpose_sc)(ncc, m, newC, ncc, C, 
                    ldc ) ;
        }
        FREE(dwork) ;
    }

    /* free work */
    if (newC != NULL) 
    {
        FREE(newC) ;
    }
    if (newA != NULL)
    {
        FREE(newA) ;
    }

    return ;
}

void PIRO_BAND_LONG_NAME(dsbtrd)
(
  char *vect,       /* vect[0] = 'V' | 'U' for initialize Q | accumulate Q */
  char *uplo,       /* uplo[0] = 'U' | 'L' for upper | lower triangular stored*/
  Int n,            /* #columns in AB */
  Int ku,           /* upper bandwidth */
  double *AB,       /* input matrix in packed band format */
  Int ldab,         /* leading dimension of AB */
  double *D,        /* output : main diagonal of the bidiagonal matrix */
  double *E,        /* output : super diagonal of the bidiagonal matrix */
  double *Q,        /* Accumulated rotations */
  Int ldq,          /* leading dimension of Q */
  double *work,     /* workspace : ignored now */
  Int *info         /* vect[0] = 'B' | 'Q' | 'P' for both Q and PT | Q | PT  */
)
{
    Int wantq, initq ;
    Int blk[4] ;
    double *dwork, *upper ;
    Int ncc ;
    double *A, *newA ;

    initq = (vect[0] == 'V') ;
    wantq = ((vect[0] == 'U') || initq) ;

    if (!wantq && vect[0] != 'N')
    {
        *info = PIRO_BAND_VECT_INVALID ;
        return ;
    }
    if (uplo[0] != 'U' && uplo[0] != 'L')
    {
        *info = PIRO_BAND_UPLO_INVALID ;
        return ;
    }
    if (ku < 0)
    {
        *info = PIRO_BAND_BU_INVALID ;
        return ;
    }

    ncc = wantq ? n : 0 ;

    /* Initialize Q to identity */
    if (initq) PIRO_BAND_LAPACK_NAME(set_to_eye_dr)(ldq, n, Q) ;

    if (ku > 0)
    {
        A = AB ;
        newA = NULL ;
    }
    else if (ku == 0)
    {
        /* Add a upper bidiagonal with zeros to AB */
        /* ku is zero, so there can be just two diagonals */
        newA = (double *) MALLOC(n * 2 * sizeof(double)) ;
        if (newA == NULL)
        {
            *info = PIRO_BAND_OUT_OF_MEMORY ;
            return ;
        }
        PIRO_BAND_LAPACK_NAME(add_bidiag_dr) (n, AB, ldab, ku, newA) ;
        A = newA ;
        ku = 1 ;
        ldab = 2 ;
        uplo[0] = 'U' ; /* We stored the upper diagonal */
    }

    /* Find block size */
    PIRO_BAND_LONG_NAME(get_blocksize)(n, n, 0, ku, wantq, blk) ;

    /* Allocate work for the band reduction*/
    dwork = (double *) MALLOC(2 * blk[0] * blk[1] * sizeof(double)) ;
    if (dwork == NULL && ku > 1)
    {
        *info = PIRO_BAND_OUT_OF_MEMORY ;
    }
    else
    {
        if (uplo[0] == 'L')
        {
            /* If lower triangular part of the band is stores transpose it */
            upper = (double *) MALLOC(n * (ku+1) * sizeof(double)) ;
            if (upper == NULL)
            {
                *info = PIRO_BAND_OUT_OF_MEMORY ;
                FREE (dwork) ;
                ASSERT (newA == NULL) ;
                return ;
            }
            PIRO_BAND_LAPACK_NAME(lowerband_transpose_dr)(n, ku, A, ldab, upper) ;

            /* Reduce the matrix to tridiagonal */
            *info = PIRO_BAND_LAPACK_NAME(reduce_dr)(blk, n, n, ncc, 0, ku, upper,
                    ku+1, D, E, NULL, 0, NULL, 0, Q, ldq, dwork, 1);

            FREE(upper) ;
        }
        else
        {
            /* Reduce the matrix to tridiagonal */
            *info = PIRO_BAND_LAPACK_NAME(reduce_dr)(blk, n, n, ncc, 0, ku, A, 
                    ldab, D, E, NULL, 0, NULL, 0, Q, ldq, dwork, 1) ;
        }
        FREE(dwork) ;
    }

    /* free work */
    if (newA != NULL)
    {
        FREE(newA) ;
    }

    return ;
}

void PIRO_BAND_LONG_NAME(ssbtrd)
(
  char *vect,       /* vect[0] = 'V' | 'U' for initialize Q | accumulate Q */
  char *uplo,       /* uplo[0] = 'U' | 'L' for upper | lower triangular stored*/
  Int n,            /* #columns in AB */
  Int ku,           /* upper bandwidth */
  float *AB,        /* input matrix in packed band format */
  Int ldab,         /* leading dimension of AB */
  float *D,         /* output : main diagonal of the bidiagonal matrix */
  float *E,         /* output: super diagonal of the bidiagonal matrix */
  float *Q,         /* Accumulated rotations */
  Int ldq,          /* leading dimension of Q */
  float *work,      /* workspace : ignored now */
  Int *info         /* vect[0] = 'B' | 'Q' | 'P' for both Q and PT | Q | PT  */
)
{
    Int initq, wantq ;
    Int blk[4] ;
    float *dwork, *upper ;
    Int ncc ;
    float *A, *newA ;


    initq = (vect[0] == 'V') ;
    wantq = ((vect[0] == 'U') || initq) ;

    if (!wantq && vect[0] != 'N')
    {
        *info = PIRO_BAND_VECT_INVALID ;
        return ;
    }
    if (uplo[0] != 'U' && uplo[0] != 'L')
    {
        *info = PIRO_BAND_UPLO_INVALID ;
        return ;
    }
    if (ku < 0)
    {
        *info = PIRO_BAND_BU_INVALID ;
        return ;
    }

    ncc = wantq ? n : 0 ;

    /* Initialize Q to identity */
    if (initq) PIRO_BAND_LAPACK_NAME(set_to_eye_sr)(ldq, n, Q) ;

    if (ku > 0)
    {
        A = AB ;
        newA = NULL ;
    }
    else if (ku == 0)
    {
        /* Add a upper bidiagonal with zeros to AB */
        /* ku is zero, so there can be just two diagonals */
        newA = (float *) MALLOC(n * 2 * sizeof(float)) ;
        if (newA == NULL)
        {
            *info = PIRO_BAND_OUT_OF_MEMORY ;
            return ;
        }
        PIRO_BAND_LAPACK_NAME(add_bidiag_sr) (n, AB, ldab, ku, newA) ;
        A = newA ;
        ku = 1 ;
        ldab = 2 ;
        uplo[0] = 'U' ; /* We stored the upper diagonal */
    }

    /* Find block size */
    PIRO_BAND_LONG_NAME(get_blocksize)(n, n, 0, ku, wantq, blk) ;

    /* Allocate work for the band reduction*/
    dwork = (float *) MALLOC(2 * blk[0] * blk[1] * sizeof(float)) ;
    if (dwork == NULL && ku > 1)
    {
        *info = PIRO_BAND_OUT_OF_MEMORY ;
    }
    else
    {
        if (uplo[0] == 'L')
        {
            /* If lower triangular part of the band is stores transpose it */
            upper = (float *) MALLOC(n * (ku+1) * sizeof(float)) ;
            if (upper == NULL)
            {
                *info = PIRO_BAND_OUT_OF_MEMORY ;
                FREE (dwork) ;
                ASSERT (newA == NULL) ;
                return ;
            }
            PIRO_BAND_LAPACK_NAME(lowerband_transpose_sr)(n, ku, A, ldab, upper) ;

            /* Reduce the matrix to tridiagonal */
            *info = PIRO_BAND_LAPACK_NAME(reduce_sr)(blk, n, n, ncc, 0, ku, upper,
                    ku+1, D, E, NULL, 0, NULL, 0, Q, ldq, dwork, 1) ;

            FREE(upper) ;
        }
        else
        {
            /* Reduce the matrix to tridiagonal */
            *info = PIRO_BAND_LAPACK_NAME(reduce_sr)(blk, n, n, ncc, 0, ku, A, 
                    ldab, D, E, NULL, 0, NULL, 0, Q, ldq, dwork, 1) ;
        }
        FREE(dwork) ;
    }

    /* free work */
    if (newA != NULL)
    {
        FREE(newA) ;
    }

    return ;
}

void PIRO_BAND_LONG_NAME(zhbtrd)
(
  char *vect,       /* vect[0] = 'B' | 'Q' | 'P' for both Q and PT | Q | PT  */
  char *uplo,       /* uplo[0] = 'U' | 'L' for upper | lower triangular stored*/
  Int n,            /* #columns in AB */
  Int ku,           /* upper bandwidth */
  double *AB,       /* input matrix in packed band format */
  Int ldab,         /* leading dimension of AB */
  double *D,        /* output : main diagonal of the bidiagonal matrix */
  double *E,        /* output: super diagonal of the bidiagonal matrix */
  double *Q,        /* Accumulated rotations */
  Int ldq,          /* leading dimension of Q */
  double *work,     /* workspace :  ignored now */
  Int *info         /* vect[0] = 'B' | 'Q' | 'P' for both Q and PT | Q | PT  */
)
{
    Int initq, wantq ;
    Int blk[4] ;
    double *dwork, *upper ;
    Int ncc ;
    double *A, *newA ;


    initq = (vect[0] == 'V') ;
    wantq = ((vect[0] == 'U') || initq) ;

    if (!wantq && vect[0] != 'N')
    {
        *info = PIRO_BAND_VECT_INVALID ;
        return ;
    }
    if (uplo[0] != 'U' && uplo[0] != 'L')
    {
        *info = PIRO_BAND_UPLO_INVALID ;
        return ;
    }
    if (ku < 0)
    {
        *info = PIRO_BAND_BU_INVALID ;
        return ;
    }

    ncc = wantq ? n : 0 ;

    /* Initialize Q to identity */
    if (initq) PIRO_BAND_LAPACK_NAME(set_to_eye_dc)(ldq, n, Q) ;

    if (ku > 0)
    {
        A = AB ;
        newA = NULL ;
    }
    else if (ku == 0)
    {
        /* Add a upper bidiagonal with zeros to AB */
        /* ku is zero, so there can be just two diagonals */
        newA = (double *) MALLOC(2 * n * 2 * sizeof(double)) ;
        if (newA == NULL)
        {
            *info = PIRO_BAND_OUT_OF_MEMORY ;
            return ;
        }
        PIRO_BAND_LAPACK_NAME(add_bidiag_dc) (n, AB, ldab, ku, newA) ;
        A = newA ;
        ku = 1 ;
        ldab = 2 ;
        uplo[0] = 'U' ; /* We stored the upper diagonal */
    }

    /* Find block size */
    PIRO_BAND_LONG_NAME(get_blocksize)(n, n, 0, ku, wantq, blk) ;

    /* Allocate work for the band reduction*/
    dwork = (double *) MALLOC(2 * 2 * blk[0] * blk[1] * 
                    sizeof(double)) ;
    if (dwork == NULL && ku > 1)
    {
        *info = PIRO_BAND_OUT_OF_MEMORY ;
    }
    else
    {
        if (uplo[0] == 'L')
        {
            /* If lower triangular part of the band is stores transpose it */
            upper = (double *)MALLOC(2 * n * (ku+1) *
                            sizeof(double));
            if (upper == NULL)
            {
                *info = PIRO_BAND_OUT_OF_MEMORY ;
                FREE (dwork) ;
                ASSERT (newA == NULL) ;
                return ;
            }
            PIRO_BAND_LAPACK_NAME(lowerband_transpose_dc)(n, ku, A, ldab, upper) ;

            /* Reduce the matrix to tridiagonal */
            *info = PIRO_BAND_LAPACK_NAME(reduce_dc)(blk, n, n, ncc, 0, ku, upper,
                    ku+1, D, E, NULL, 0, NULL, 0, Q, ldq, dwork, 1) ;

            FREE(upper) ;
        }
        else
        {
            /* Reduce the matrix to tridiagonal */
            *info = PIRO_BAND_LAPACK_NAME(reduce_dc)(blk, n, n, ncc, 0, ku, A, 
                    ldab, D, E, NULL, 0, NULL, 0, Q, ldq, dwork, 1) ;
        }
        FREE(dwork) ;
    }

    /* free work */
    if (newA != NULL)
    {
        FREE(newA) ;
    }

    return ;
}

void PIRO_BAND_LONG_NAME(chbtrd)
(
  char *vect,       /* vect[0] = 'B' | 'Q' | 'P' for both Q and PT | Q | PT  */
  char *uplo,       /* uplo[0] = 'U' | 'L' for upper | lower triangular stored*/
  Int n,            /* #columns in AB */
  Int ku,           /* upper bandwidth */
  float *AB,        /* input matrix in packed band format */
  Int ldab,         /* leading dimension of AB */
  float *D,         /* output : main diagonal of the bidiagonal matrix */
  float *E,         /* output: super diagonal of the bidiagonal matrix */
  float *Q,         /* Accumulated rotations */
  Int ldq,          /* leading dimension of Q */
  float *work,      /* workspace : ignored now */
  Int *info         /* vect[0] = 'B' | 'Q' | 'P' for both Q and PT | Q | PT  */
)
{
    Int initq, wantq ;
    Int blk[4] ;
    float *dwork, *upper ;
    Int ncc ;
    float *A, *newA ;


    initq = (vect[0] == 'V') ;
    wantq = ((vect[0] == 'U') || initq) ;

    if (!wantq && vect[0] != 'N')
    {
        *info = PIRO_BAND_VECT_INVALID ;
        return ;
    }
    if (uplo[0] != 'U' && uplo[0] != 'L')
    {
        *info = PIRO_BAND_UPLO_INVALID ;
        return ;
    }
    if (ku < 0)
    {
        *info = PIRO_BAND_BU_INVALID ;
        return ;
    }

    ncc = wantq ? n : 0 ;

    /* Initialize Q to identity */
    if (initq) PIRO_BAND_LAPACK_NAME(set_to_eye_sc)(ldq, n, Q) ;

    if (ku > 0)
    {
        A = AB ;
        newA = NULL ;
    }
    else if (ku == 0)
    {
        /* Add a upper bidiagonal with zeros to AB */
        /* ku is zero, so there can be just two diagonals */
        newA = (float *) MALLOC(2 * n * 2 * sizeof(float)) ;
        if (newA == NULL)
        {
            *info = PIRO_BAND_OUT_OF_MEMORY ;
            return ;
        }
        PIRO_BAND_LAPACK_NAME(add_bidiag_sc) (n, AB, ldab, ku, newA) ;
        A = newA ;
        ku = 1 ;
        ldab = 2 ;
        uplo[0] = 'U' ; /* We stored the upper diagonal */
    }

    /* Find block size */
    PIRO_BAND_LONG_NAME(get_blocksize)(n, n, 0, ku, wantq, blk) ;

    /* Allocate work for the band reduction*/
    dwork = (float *) MALLOC(2 * 2 * blk[0] * blk[1] * 
                                    sizeof(float)) ;
    if (dwork == NULL && ku > 1)
    {
        *info = PIRO_BAND_OUT_OF_MEMORY ;
    }
    else
    {
        if (uplo[0] == 'L')
        {
            /* If lower triangular part of the band is stores transpose it */
            upper = (float *) MALLOC(2 * n * (ku+1) * 
                                sizeof(float)) ;
            if (upper == NULL)
            {
                *info = PIRO_BAND_OUT_OF_MEMORY ;
                FREE (dwork) ;
                return ;
            }
            PIRO_BAND_LAPACK_NAME(lowerband_transpose_sc)(n, ku, A, ldab, upper) ;

            /* Reduce the matrix to tridiagonal */
            *info = PIRO_BAND_LAPACK_NAME(reduce_sc)(blk, n, n, ncc, 0, ku, upper,
                    ku+1, D, E, NULL, 0, NULL, 0, Q, ldq, dwork, 1) ;

            FREE(upper) ;
        }
        else
        {
            /* Reduce the matrix to tridiagonal */
            *info = PIRO_BAND_LAPACK_NAME(reduce_sc)(blk, n, n, ncc, 0, ku, A, 
                    ldab, D, E, NULL, 0, NULL, 0, Q, ldq, dwork, 1) ;
        }
        FREE(dwork) ;
    }

    /* free work */
    if (newA != NULL)
    {
        FREE(newA) ;
    }

    return ;
}

