|
libflame
revision_anchor
|
Go to the source code of this file.
| FLA_Error FLA_Bidiag_UT | ( | FLA_Obj | A, |
| FLA_Obj | TU, | ||
| FLA_Obj | TV | ||
| ) |
References FLA_Bidiag_UT_check(), FLA_Bidiag_UT_internal(), FLA_Check_error_level(), FLA_Obj_is_double_precision(), and FLA_Obj_row_stride().
Referenced by FLA_Svd_ext_u_unb_var1(), FLA_Svd_uv_unb_var1(), and FLA_Svd_uv_unb_var2().
{
FLA_Error r_val;
// Check parameters.
if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING )
FLA_Bidiag_UT_check( A, TU, TV );
if ( FLA_Obj_row_stride( A ) == 1 &&
FLA_Obj_row_stride( TU ) == 1 &&
FLA_Obj_row_stride( TV ) == 1 &&
FLA_Obj_is_double_precision( A ) )
// Temporary modification to "nofus";
// fused operations are not working for row-major, ex) bl1_ddotsv2
r_val = FLA_Bidiag_UT_internal( A, TU, TV, fla_bidiagut_cntl_plain );
else
r_val = FLA_Bidiag_UT_internal( A, TU, TV, fla_bidiagut_cntl_plain );
return r_val;
}
| FLA_Error FLA_Bidiag_UT_create_T | ( | FLA_Obj | A, |
| FLA_Obj * | TU, | ||
| FLA_Obj * | TV | ||
| ) |
References FLA_Obj_create(), FLA_Obj_datatype(), FLA_Obj_min_dim(), FLA_Obj_row_stride(), and FLA_Query_blocksize().
Referenced by FLA_Svd_ext_u_unb_var1(), FLA_Svd_uv_unb_var1(), and FLA_Svd_uv_unb_var2().
{
FLA_Datatype datatype;
dim_t b_alg, k;
dim_t rs_T, cs_T;
// Query the datatype of A.
datatype = FLA_Obj_datatype( A );
// Query the blocksize from the library.
b_alg = FLA_Query_blocksize( datatype, FLA_DIMENSION_MIN );
// Scale the blocksize by a pre-set global constant.
b_alg = ( dim_t )( ( ( double ) b_alg ) * FLA_BIDIAG_INNER_TO_OUTER_B_RATIO );
// Query the minimum dimension of A.
k = FLA_Obj_min_dim( A );
b_alg = 5;
// Adjust the blocksize with respect to the min-dim of A.
b_alg = min( b_alg, k );
// Figure out whether TU and TV should be row-major or column-major.
if ( FLA_Obj_row_stride( A ) == 1 )
{
rs_T = 1;
cs_T = b_alg;
}
else // if ( FLA_Obj_col_stride( A ) == 1 )
{
rs_T = k;
cs_T = 1;
}
// Create two b_alg x k matrices to hold the block Householder transforms
// that will be accumulated within the bidiagonal reduction algorithm.
// If the matrix dimension has a zero dimension, apply_q complains it.
if ( TU != NULL ) FLA_Obj_create( datatype, b_alg, k, rs_T, cs_T, TU );
if ( TV != NULL ) FLA_Obj_create( datatype, b_alg, k, rs_T, cs_T, TV );
return FLA_SUCCESS;
}
| FLA_Error FLA_Bidiag_UT_extract_diagonals | ( | FLA_Obj | A, |
| FLA_Obj | d, | ||
| FLA_Obj | e | ||
| ) |
References FLA_Bidiag_UT_extract_diagonals_check(), FLA_Bidiag_UT_l_extract_diagonals(), FLA_Bidiag_UT_u_extract_diagonals(), FLA_Check_error_level(), FLA_Obj_length(), and FLA_Obj_width().
{
FLA_Error r_val = FLA_SUCCESS;
if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING )
FLA_Bidiag_UT_extract_diagonals_check( A, d, e );
if ( FLA_Obj_length( A ) >= FLA_Obj_width( A ) )
r_val = FLA_Bidiag_UT_u_extract_diagonals( A, d, e );
else
r_val = FLA_Bidiag_UT_l_extract_diagonals( A, d, e );
return r_val;
}
References FLA_Bidiag_UT_extract_real_diagonals_check(), FLA_Bidiag_UT_l_extract_real_diagonals(), FLA_Bidiag_UT_u_extract_real_diagonals(), FLA_Check_error_level(), FLA_Obj_length(), and FLA_Obj_width().
Referenced by FLA_Svd_ext_u_unb_var1(), FLA_Svd_uv_unb_var1(), and FLA_Svd_uv_unb_var2().
{
FLA_Error r_val = FLA_SUCCESS;
if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING )
FLA_Bidiag_UT_extract_real_diagonals_check( A, d, e );
if ( FLA_Obj_length( A ) >= FLA_Obj_width( A ) )
r_val = FLA_Bidiag_UT_u_extract_real_diagonals( A, d, e );
else
r_val = FLA_Bidiag_UT_l_extract_real_diagonals( A, d, e );
return r_val;
}
| FLA_Error FLA_Bidiag_UT_form_U | ( | FLA_Obj | A, |
| FLA_Obj | T, | ||
| FLA_Obj | U | ||
| ) |
References FLA_Bidiag_UT_form_U_check(), FLA_Bidiag_UT_form_U_ext(), FLA_Check_error_level(), FLA_Obj_length(), and FLA_Obj_width().
Referenced by FLA_Svd_uv_unb_var1(), and FLA_Svd_uv_unb_var2().
{
FLA_Uplo uplo;
if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING )
FLA_Bidiag_UT_form_U_check( A, T, U );
uplo = ( FLA_Obj_length( A ) >= FLA_Obj_width( A ) ?
FLA_UPPER_TRIANGULAR : FLA_LOWER_TRIANGULAR );
FLA_Bidiag_UT_form_U_ext( uplo, A, T,
FLA_NO_TRANSPOSE, U );
return FLA_SUCCESS;
}
| FLA_Error FLA_Bidiag_UT_form_U_ext | ( | FLA_Uplo | uplo, |
| FLA_Obj | A, | ||
| FLA_Obj | T, | ||
| FLA_Trans | transu, | ||
| FLA_Obj | U | ||
| ) |
References FLA_Apply_pivots(), FLA_Bidiag_UT_form_V_ext(), FLA_Obj_create(), FLA_Obj_flip_base(), FLA_Obj_flip_view(), FLA_Obj_free(), FLA_Obj_is(), FLA_Obj_length(), FLA_ONE, FLA_Part_1x2(), FLA_Part_2x1(), FLA_Part_2x2(), FLA_QR_UT_form_Q(), FLA_Set(), and FLA_ZERO.
Referenced by FLA_Bidiag_UT_form_U(), FLA_Bidiag_UT_form_V_ext(), and FLA_Svd_ext_u_unb_var1().
{
//if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING )
// FLA_Bidiag_UT_form_U_ext_check( uplo, A, T, transu, U );
if ( transu == FLA_NO_TRANSPOSE ||
transu == FLA_CONJ_NO_TRANSPOSE )
{
if ( uplo == FLA_UPPER_TRIANGULAR )
{
FLA_QR_UT_form_Q( A, T, U );
}
else // if ( uplo == FLA_LOWER_TRIANGULAR )
{
FLA_Obj ATL, ATR,
ABL, ABR;
FLA_Obj UTL, UTR,
UBL, UBR;
FLA_Obj TL, TR;
dim_t b = ( FLA_Obj_length( A ) - 1 );
FLA_Part_1x2( T, &TL, &TR, 1, FLA_RIGHT );
FLA_Part_2x2( U, &UTL, &UTR,
&UBL, &UBR, 1, 1, FLA_TL );
if ( FLA_Obj_is( A, U ) == FALSE )
{
FLA_Set( FLA_ONE, UTL ); FLA_Set( FLA_ZERO, UTR );
FLA_Set( FLA_ZERO, UBL );
FLA_Part_2x2( A, &ATL, &ATR,
&ABL, &ABR, 1, b, FLA_TL );
FLA_QR_UT_form_Q( ABL, TL, UBR );
}
else
{
FLA_Obj p, pt, pb;
FLA_Part_2x2( A, &ATL, &ATR,
&ABL, &ABR, 1, b+1, FLA_TL );
FLA_Obj_create( FLA_INT, b+1,1, 0, 0, &p );
FLA_Part_2x1( p, &pt,
&pb, 1, FLA_BOTTOM );
FLA_Set( FLA_ONE, pt );
FLA_Set( FLA_ZERO, pb );
FLA_Apply_pivots ( FLA_RIGHT, FLA_NO_TRANSPOSE, p, ABL );
FLA_Obj_free(&p );
FLA_Set( FLA_ONE, UTL );
FLA_Set( FLA_ZERO, UBL );
FLA_Set( FLA_ZERO, UTR );
FLA_Part_1x2( ABL, &ABL,
&ABR, 1, FLA_LEFT );
FLA_QR_UT_form_Q( ABR, TL, UBR );
}
}
}
else
{
FLA_Uplo uplo_flip = ( uplo == FLA_UPPER_TRIANGULAR ?
FLA_LOWER_TRIANGULAR : FLA_UPPER_TRIANGULAR );
FLA_Obj_flip_base( &A );
FLA_Obj_flip_view( &A );
// A and U should have different base objects
FLA_Bidiag_UT_form_V_ext( uplo_flip, A, T,
FLA_CONJ_TRANSPOSE, U );
FLA_Obj_flip_base( &A );
// As we use QR and LQ for constructing U and V,
// conjugation naturally fits there.
// Never apply conjugation separately here even if flipping trick is applied.
//FLA_Conjugate( U );
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Bidiag_UT_form_V | ( | FLA_Obj | A, |
| FLA_Obj | S, | ||
| FLA_Obj | V | ||
| ) |
References FLA_Bidiag_UT_form_V_check(), FLA_Bidiag_UT_form_V_ext(), FLA_Check_error_level(), FLA_Obj_length(), and FLA_Obj_width().
Referenced by FLA_Svd_uv_unb_var1(), and FLA_Svd_uv_unb_var2().
{
FLA_Uplo uplo;
if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING )
FLA_Bidiag_UT_form_V_check( A, S, V );
uplo = ( FLA_Obj_length( A ) >= FLA_Obj_width( A ) ?
FLA_UPPER_TRIANGULAR : FLA_LOWER_TRIANGULAR );
FLA_Bidiag_UT_form_V_ext( uplo, A, S,
FLA_NO_TRANSPOSE, V );
return FLA_SUCCESS;
}
| FLA_Error FLA_Bidiag_UT_form_V_ext | ( | FLA_Uplo | uplo, |
| FLA_Obj | A, | ||
| FLA_Obj | S, | ||
| FLA_Trans | transv, | ||
| FLA_Obj | V | ||
| ) |
References FLA_Apply_pivots(), FLA_Bidiag_UT_form_U_ext(), FLA_LQ_UT_form_Q(), FLA_Obj_create(), FLA_Obj_flip_base(), FLA_Obj_flip_view(), FLA_Obj_free(), FLA_Obj_is(), FLA_Obj_width(), FLA_ONE, FLA_Part_1x2(), FLA_Part_2x1(), FLA_Part_2x2(), FLA_Set(), and FLA_ZERO.
Referenced by FLA_Bidiag_UT_form_U_ext(), FLA_Bidiag_UT_form_V(), and FLA_Svd_ext_u_unb_var1().
{
//if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING )
// FLA_Bidiag_UT_form_V_ext_check( uplo, A, S, transv, V );
if ( transv == FLA_TRANSPOSE ||
transv == FLA_CONJ_TRANSPOSE )
{
if ( uplo == FLA_UPPER_TRIANGULAR )
{
FLA_Obj ATL, ATR,
ABL, ABR;
FLA_Obj VTL, VTR,
VBL, VBR;
FLA_Obj SL, SR;
dim_t b = ( FLA_Obj_width( A ) - 1 );
FLA_Part_1x2( S, &SL, &SR, 1, FLA_RIGHT );
FLA_Part_2x2( V, &VTL, &VTR,
&VBL, &VBR, 1, 1, FLA_TL );
if ( FLA_Obj_is( A, V ) == FALSE )
{
FLA_Set( FLA_ONE, VTL ); FLA_Set( FLA_ZERO, VTR );
FLA_Set( FLA_ZERO, VBL );
FLA_Part_2x2( A, &ATL, &ATR,
&ABL, &ABR, b, b, FLA_TR );
FLA_LQ_UT_form_Q( ATR, SL, VBR );
}
else
{
FLA_Obj p, pt, pb;
FLA_Part_2x2( A, &ATL, &ATR,
&ABL, &ABR, b+1, b, FLA_TR );
FLA_Obj_create( FLA_INT, b+1, 1, 0, 0, &p );
FLA_Part_2x1( p, &pt,
&pb, 1, FLA_BOTTOM );
FLA_Set( FLA_ONE, pt );
FLA_Set( FLA_ZERO, pb );
FLA_Apply_pivots ( FLA_LEFT, FLA_TRANSPOSE, p, ATR );
FLA_Obj_free( &p );
FLA_Set( FLA_ONE, VTL );
FLA_Set( FLA_ZERO, VBL );
FLA_Set( FLA_ZERO, VTR );
FLA_Part_2x1( ATR, &ATR,
&ABR, 1, FLA_TOP );
FLA_LQ_UT_form_Q( ABR, SL, VBR );
}
}
else // if ( uplo == FLA_LOWER_TRIANGULAR )
{
FLA_LQ_UT_form_Q( A, S, V );
}
}
else
{
FLA_Uplo uplo_flip = ( uplo == FLA_UPPER_TRIANGULAR ?
FLA_LOWER_TRIANGULAR : FLA_UPPER_TRIANGULAR );
FLA_Obj_flip_base( &A );
FLA_Obj_flip_view( &A );
// A and U should have different base objects.
FLA_Bidiag_UT_form_U_ext( uplo_flip, A, S,
FLA_NO_TRANSPOSE, V );
FLA_Obj_flip_base( &A );
// As we use QR and LQ for constructing U and V,
// conjugation naturally fits there.
// Never apply conjugation separately here even if flipping trick is applied.
// FLA_Conjugate( V );
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Bidiag_UT_internal | ( | FLA_Obj | A, |
| FLA_Obj | TU, | ||
| FLA_Obj | TV, | ||
| fla_bidiagut_t * | cntl | ||
| ) |
References FLA_Bidiag_UT_internal_check(), FLA_Bidiag_UT_u(), FLA_Check_error_level(), FLA_Conjugate(), FLA_Conjugate_r(), FLA_Obj_flip_base(), FLA_Obj_flip_view(), FLA_Obj_is_complex(), FLA_Obj_length(), FLA_Obj_width(), FLA_Part_1x2(), and FLA_Part_2x2().
Referenced by FLA_Bidiag_UT().
{
FLA_Error r_val = FLA_SUCCESS;
if ( FLA_Check_error_level() == FLA_FULL_ERROR_CHECKING )
FLA_Bidiag_UT_internal_check( A, TU, TV, cntl );
if ( FLA_Obj_length( A ) >= FLA_Obj_width( A ) )
{
r_val = FLA_Bidiag_UT_u( A, TU, TV, cntl );
}
else // if ( FLA_Obj_length( A ) < FLA_Obj_width( A ) )
{
// Flip A; swap(rs, cs), swap(m, n)
FLA_Obj_flip_base( &A );
FLA_Obj_flip_view( &A );
r_val = FLA_Bidiag_UT_u( A, TV, TU, cntl );
// Recover A; swap(rs, cs), swap(m, n)
FLA_Obj_flip_base( &A );
FLA_Obj_flip_view( &A );
// According to the UT transform, the house-holder vectors are conjugated
// when they are applied from the right.
if ( FLA_Obj_is_complex( A ) )
{
FLA_Obj ATL, ATR,
ABL, ABR;
dim_t b;
FLA_Conjugate( TU );
FLA_Conjugate( TV );
// U
b = ( FLA_Obj_length( A ) - 1 );
FLA_Part_2x2( A, &ATL, &ATR,
&ABL, &ABR, 2, b, FLA_TL );
FLA_Conjugate_r( FLA_LOWER_TRIANGULAR, ABL );
// V
b = ( FLA_Obj_width( A ) - 1 );
FLA_Part_1x2( A, &ATL, &ATR, b, FLA_RIGHT );
FLA_Conjugate_r( FLA_UPPER_TRIANGULAR, ATR );
}
}
return r_val;
}
| FLA_Error FLA_Bidiag_UT_l | ( | FLA_Obj | A, |
| FLA_Obj | TU, | ||
| FLA_Obj | TV, | ||
| fla_bidiagut_t * | cntl | ||
| ) |
| FLA_Error FLA_Bidiag_UT_l_extract_diagonals | ( | FLA_Obj | A, |
| FLA_Obj | d, | ||
| FLA_Obj | e | ||
| ) |
References FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), and FLA_Obj_vector_inc().
Referenced by FLA_Bidiag_UT_extract_diagonals(), and FLA_Tridiag_UT_extract_diagonals().
{
FLA_Datatype datatype;
int m_A;
int rs_A, cs_A;
int inc_d;
int inc_e;
int i;
datatype = FLA_Obj_datatype( A );
m_A = FLA_Obj_length( A );
rs_A = FLA_Obj_row_stride( A );
cs_A = FLA_Obj_col_stride( A );
inc_d = FLA_Obj_vector_inc( d );
if ( m_A != 1 )
inc_e = FLA_Obj_vector_inc( e );
else
inc_e = 0;
switch ( datatype )
{
case FLA_FLOAT:
{
float* buff_A = FLA_FLOAT_PTR( A );
float* buff_d = FLA_FLOAT_PTR( d );
float* buff_e = ( m_A != 1 ? FLA_FLOAT_PTR( e ) : NULL );
for ( i = 0; i < m_A; ++i )
{
float* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
float* a21_t = buff_A + (i )*cs_A + (i+1)*rs_A;
float* delta1 = buff_d + (i )*inc_d;
float* epsilon1 = buff_e + (i )*inc_e;
int m_ahead = m_A - i - 1;
// delta1 = alpha11;
*delta1 = *alpha11;
// epsilon1 = a21_t;
if ( m_ahead > 0 )
*epsilon1 = *a21_t;
}
break;
}
case FLA_DOUBLE:
{
double* buff_A = FLA_DOUBLE_PTR( A );
double* buff_d = FLA_DOUBLE_PTR( d );
double* buff_e = ( m_A != 1 ? FLA_DOUBLE_PTR( e ) : NULL );
for ( i = 0; i < m_A; ++i )
{
double* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
double* a21_t = buff_A + (i )*cs_A + (i+1)*rs_A;
double* delta1 = buff_d + (i )*inc_d;
double* epsilon1 = buff_e + (i )*inc_e;
int m_ahead = m_A - i - 1;
// delta1 = alpha11;
*delta1 = *alpha11;
// epsilon1 = a21_t;
if ( m_ahead > 0 )
*epsilon1 = *a21_t;
}
break;
}
case FLA_COMPLEX:
{
scomplex* buff_A = FLA_COMPLEX_PTR( A );
scomplex* buff_d = FLA_COMPLEX_PTR( d );
scomplex* buff_e = ( m_A != 1 ? FLA_COMPLEX_PTR( e ) : NULL );
for ( i = 0; i < m_A; ++i )
{
scomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
scomplex* a21_t = buff_A + (i )*cs_A + (i+1)*rs_A;
scomplex* delta1 = buff_d + (i )*inc_d;
scomplex* epsilon1 = buff_e + (i )*inc_e;
int m_ahead = m_A - i - 1;
// delta1 = alpha11;
*delta1 = *alpha11;
// epsilon1 = a21_t;
if ( m_ahead > 0 )
*epsilon1 = *a21_t;
}
break;
}
case FLA_DOUBLE_COMPLEX:
{
dcomplex* buff_A = FLA_DOUBLE_COMPLEX_PTR( A );
dcomplex* buff_d = FLA_DOUBLE_COMPLEX_PTR( d );
dcomplex* buff_e = ( m_A != 1 ? FLA_DOUBLE_COMPLEX_PTR( e ) : NULL );
for ( i = 0; i < m_A; ++i )
{
dcomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
dcomplex* a21_t = buff_A + (i )*cs_A + (i+1)*rs_A;
dcomplex* delta1 = buff_d + (i )*inc_d;
dcomplex* epsilon1 = buff_e + (i )*inc_e;
int m_ahead = m_A - i - 1;
// delta1 = alpha11;
*delta1 = *alpha11;
// epsilon1 = a21_t;
if ( m_ahead > 0 )
*epsilon1 = *a21_t;
}
break;
}
}
return FLA_SUCCESS;
}
References FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Obj_vector_inc(), scomplex::real, and dcomplex::real.
Referenced by FLA_Bidiag_UT_extract_real_diagonals(), and FLA_Tridiag_UT_extract_real_diagonals().
{
FLA_Datatype datatype;
int m_A;
int rs_A, cs_A;
int inc_d;
int inc_e;
int i;
datatype = FLA_Obj_datatype( A );
m_A = FLA_Obj_length( A );
rs_A = FLA_Obj_row_stride( A );
cs_A = FLA_Obj_col_stride( A );
inc_d = FLA_Obj_vector_inc( d );
if ( m_A != 1 )
inc_e = FLA_Obj_vector_inc( e );
else
inc_e = 0;
switch ( datatype )
{
case FLA_FLOAT:
{
float* buff_A = FLA_FLOAT_PTR( A );
float* buff_d = FLA_FLOAT_PTR( d );
float* buff_e = ( m_A != 1 ? FLA_FLOAT_PTR( e ) : NULL );
for ( i = 0; i < m_A; ++i )
{
float* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
float* a21_t = buff_A + (i )*cs_A + (i+1)*rs_A;
float* delta1 = buff_d + (i )*inc_d;
float* epsilon1 = buff_e + (i )*inc_e;
int m_ahead = m_A - i - 1;
// delta1 = alpha11;
*delta1 = *alpha11;
// epsilon1 = a21_t;
if ( m_ahead > 0 )
*epsilon1 = *a21_t;
}
break;
}
case FLA_DOUBLE:
{
double* buff_A = FLA_DOUBLE_PTR( A );
double* buff_d = FLA_DOUBLE_PTR( d );
double* buff_e = ( m_A != 1 ? FLA_DOUBLE_PTR( e ) : NULL );
for ( i = 0; i < m_A; ++i )
{
double* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
double* a21_t = buff_A + (i )*cs_A + (i+1)*rs_A;
double* delta1 = buff_d + (i )*inc_d;
double* epsilon1 = buff_e + (i )*inc_e;
int m_ahead = m_A - i - 1;
// delta1 = alpha11;
*delta1 = *alpha11;
// epsilon1 = a21_t;
if ( m_ahead > 0 )
*epsilon1 = *a21_t;
}
break;
}
case FLA_COMPLEX:
{
scomplex* buff_A = FLA_COMPLEX_PTR( A );
float* buff_d = FLA_FLOAT_PTR( d );
float* buff_e = ( m_A != 1 ? FLA_FLOAT_PTR( e ) : NULL );
for ( i = 0; i < m_A; ++i )
{
scomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
scomplex* a21_t = buff_A + (i )*cs_A + (i+1)*rs_A;
float* delta1 = buff_d + (i )*inc_d;
float* epsilon1 = buff_e + (i )*inc_e;
int m_ahead = m_A - i - 1;
// delta1 = alpha11;
*delta1 = alpha11->real;
// epsilon1 = a21_t;
if ( m_ahead > 0 )
*epsilon1 = a21_t->real;
}
break;
}
case FLA_DOUBLE_COMPLEX:
{
dcomplex* buff_A = FLA_DOUBLE_COMPLEX_PTR( A );
double* buff_d = FLA_DOUBLE_PTR( d );
double* buff_e = ( m_A != 1 ? FLA_DOUBLE_PTR( e ) : NULL );
for ( i = 0; i < m_A; ++i )
{
dcomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
dcomplex* a21_t = buff_A + (i )*cs_A + (i+1)*rs_A;
double* delta1 = buff_d + (i )*inc_d;
double* epsilon1 = buff_e + (i )*inc_e;
int m_ahead = m_A - i - 1;
// delta1 = alpha11;
*delta1 = alpha11->real;
// epsilon1 = a21_t;
if ( m_ahead > 0 )
*epsilon1 = a21_t->real;
}
break;
}
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Bidiag_UT_l_realify_opt | ( | FLA_Obj | A, |
| FLA_Obj | d, | ||
| FLA_Obj | e | ||
| ) |
References bl1_dsetv(), bl1_ssetv(), BLIS1_CONJUGATE, FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_min_dim(), FLA_Obj_row_stride(), FLA_Obj_vector_inc(), FLA_Obj_width(), FLA_ONE, FLA_ZERO, scomplex::imag, and dcomplex::imag.
Referenced by FLA_Bidiag_UT_realify().
{
FLA_Datatype datatype;
int m_A, n_A;
int min_m_n;
int rs_A, cs_A;
int inc_d;
int inc_e;
int i;
datatype = FLA_Obj_datatype( A );
m_A = FLA_Obj_length( A );
n_A = FLA_Obj_width( A );
min_m_n = FLA_Obj_min_dim( A );
rs_A = FLA_Obj_row_stride( A );
cs_A = FLA_Obj_col_stride( A );
inc_d = FLA_Obj_vector_inc( d );
inc_e = FLA_Obj_vector_inc( e );
switch ( datatype )
{
case FLA_FLOAT:
{
float* buff_d = FLA_FLOAT_PTR( d );
float* buff_e = FLA_FLOAT_PTR( e );
float* buff_1 = FLA_FLOAT_PTR( FLA_ONE );
bl1_ssetv( min_m_n,
buff_1,
buff_d, inc_d );
bl1_ssetv( min_m_n,
buff_1,
buff_e, inc_e );
break;
}
case FLA_DOUBLE:
{
double* buff_d = FLA_DOUBLE_PTR( d );
double* buff_e = FLA_DOUBLE_PTR( e );
double* buff_1 = FLA_DOUBLE_PTR( FLA_ONE );
bl1_dsetv( min_m_n,
buff_1,
buff_d, inc_d );
bl1_dsetv( min_m_n,
buff_1,
buff_e, inc_e );
break;
}
case FLA_COMPLEX:
{
scomplex* buff_A = FLA_COMPLEX_PTR( A );
scomplex* buff_d = FLA_COMPLEX_PTR( d );
scomplex* buff_e = FLA_COMPLEX_PTR( e );
scomplex* buff_1 = FLA_COMPLEX_PTR( FLA_ONE );
float* buff_0 = FLA_FLOAT_PTR( FLA_ZERO );
for ( i = 0; i < min_m_n; ++i )
{
scomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
scomplex* delta1 = buff_d + (i )*inc_d;
scomplex* epsilon1 = buff_e + (i )*inc_e;
scomplex absv;
int m_ahead = m_A - i - 1;
int m_behind = i;
if ( m_behind == 0 )
{
// FLA_Set( FLA_ONE, delta1 );
*delta1 = *buff_1;
}
else
{
scomplex* a10t_r = buff_A + (i-1)*cs_A + (i )*rs_A;
// FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, a10t_r, delta1 );
// FLA_Copyt( FLA_NO_TRANSPOSE, a10t_r, absv );
// FLA_Absolute_value( absv );
// FLA_Inv_scal( absv, delta1 );
bl1_ccopys( BLIS1_CONJUGATE, a10t_r, delta1 );
bl1_cabsval2( a10t_r, &absv );
bl1_cinvscals( &absv, delta1 );
// FLA_Scalc( FLA_NO_CONJUGATE, delta1, a10t_r );
// FLA_Obj_set_imag_part( FLA_ZERO, a10t_r );
bl1_cscals( delta1, a10t_r );
a10t_r->imag = *buff_0;
// FLA_Scalc( FLA_NO_CONJUGATE, delta1, alpha11 );
bl1_cscals( delta1, alpha11 );
}
// FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, alpha11, epsilon1 );
// FLA_Copyt( FLA_NO_TRANSPOSE, alpha11, absv );
// FLA_Absolute_value( absv );
// FLA_Inv_scal( absv, epsilon1 );
bl1_ccopys( BLIS1_CONJUGATE, alpha11, epsilon1 );
bl1_cabsval2( alpha11, &absv );
bl1_cinvscals( &absv, epsilon1 );
// FLA_Scalc( FLA_NO_CONJUGATE, epsilon1, alpha11 );
// FLA_Obj_set_imag_part( FLA_ZERO, alpha11 );
bl1_cscals( epsilon1, alpha11 );
alpha11->imag = *buff_0;
if ( m_ahead > 0 )
{
scomplex* a21_t = buff_A + (i )*cs_A + (i+1)*rs_A;
// FLA_Scalc( FLA_NO_CONJUGATE, epsilon1, a21_t );
bl1_cscals( epsilon1, a21_t );
}
}
break;
}
case FLA_DOUBLE_COMPLEX:
{
dcomplex* buff_A = FLA_DOUBLE_COMPLEX_PTR( A );
dcomplex* buff_d = FLA_DOUBLE_COMPLEX_PTR( d );
dcomplex* buff_e = FLA_DOUBLE_COMPLEX_PTR( e );
dcomplex* buff_1 = FLA_DOUBLE_COMPLEX_PTR( FLA_ONE );
double* buff_0 = FLA_DOUBLE_PTR( FLA_ZERO );
for ( i = 0; i < min_m_n; ++i )
{
dcomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
dcomplex* delta1 = buff_d + (i )*inc_d;
dcomplex* epsilon1 = buff_e + (i )*inc_e;
dcomplex absv;
int m_ahead = m_A - i - 1;
int m_behind = i;
if ( m_behind == 0 )
{
// FLA_Set( FLA_ONE, delta1 );
*delta1 = *buff_1;
}
else
{
dcomplex* a10t_r = buff_A + (i-1)*cs_A + (i )*rs_A;
// FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, a10t_r, delta1 );
// FLA_Copyt( FLA_NO_TRANSPOSE, a10t_r, absv );
// FLA_Absolute_value( absv );
// FLA_Inv_scal( absv, delta1 );
bl1_zcopys( BLIS1_CONJUGATE, a10t_r, delta1 );
bl1_zabsval2( a10t_r, &absv );
bl1_zinvscals( &absv, delta1 );
// FLA_Scalc( FLA_NO_CONJUGATE, delta1, a10t_r );
// FLA_Obj_set_imag_part( FLA_ZERO, a10t_r );
bl1_zscals( delta1, a10t_r );
a10t_r->imag = *buff_0;
// FLA_Scalc( FLA_NO_CONJUGATE, delta1, alpha11 );
bl1_zscals( delta1, alpha11 );
}
// FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, alpha11, epsilon1 );
// FLA_Copyt( FLA_NO_TRANSPOSE, alpha11, absv );
// FLA_Absolute_value( absv );
// FLA_Inv_scal( absv, epsilon1 );
bl1_zcopys( BLIS1_CONJUGATE, alpha11, epsilon1 );
bl1_zabsval2( alpha11, &absv );
bl1_zinvscals( &absv, epsilon1 );
// FLA_Scalc( FLA_NO_CONJUGATE, epsilon1, alpha11 );
// FLA_Obj_set_imag_part( FLA_ZERO, alpha11 );
bl1_zscals( epsilon1, alpha11 );
alpha11->imag = *buff_0;
if ( m_ahead > 0 )
{
dcomplex* a21_t = buff_A + (i )*cs_A + (i+1)*rs_A;
// FLA_Scalc( FLA_NO_CONJUGATE, epsilon1, a21_t );
bl1_zscals( epsilon1, a21_t );
}
}
break;
}
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Bidiag_UT_l_realify_unb | ( | FLA_Obj | A, |
| FLA_Obj | d, | ||
| FLA_Obj | e | ||
| ) |
References FLA_Absolute_value(), FLA_Cont_with_3x1_to_2x1(), FLA_Cont_with_3x3_to_2x2(), FLA_Copyt(), FLA_Inv_scal(), FLA_Obj_create(), FLA_Obj_datatype(), FLA_Obj_free(), FLA_Obj_length(), FLA_Obj_min_dim(), FLA_Obj_set_imag_part(), FLA_Obj_width(), FLA_ONE, FLA_Part_1x2(), FLA_Part_2x1(), FLA_Part_2x2(), FLA_Repart_2x1_to_3x1(), FLA_Repart_2x2_to_3x3(), FLA_Scalc(), FLA_Set(), and FLA_ZERO.
{
FLA_Obj ATL, ATR, A00, a01, A02,
ABL, ABR, a10t, alpha11, a12t,
A20, a21, A22;
FLA_Obj dT, d0,
dB, delta1,
d2;
FLA_Obj eT, e0,
eB, epsilon1,
e2;
FLA_Obj a10t_l, a10t_r;
FLA_Obj a21_t,
a21_b;
FLA_Obj absv;
FLA_Obj_create( FLA_Obj_datatype( A ), 1, 1, 0, 0, &absv );
FLA_Part_2x2( A, &ATL, &ATR,
&ABL, &ABR, 0, 0, FLA_TL );
FLA_Part_2x1( d, &dT,
&dB, 0, FLA_TOP );
FLA_Part_2x1( e, &eT,
&eB, 0, FLA_TOP );
while ( FLA_Obj_min_dim( ABR ) > 0 )
{
FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &a01, &A02,
/* ************* */ /* ************************** */
&a10t, /**/ &alpha11, &a12t,
ABL, /**/ ABR, &A20, /**/ &a21, &A22,
1, 1, FLA_BR );
FLA_Repart_2x1_to_3x1( dT, &d0,
/* ** */ /* ****** */
&delta1,
dB, &d2, 1, FLA_BOTTOM );
FLA_Repart_2x1_to_3x1( eT, &e0,
/* ** */ /* ******** */
&epsilon1,
eB, &e2, 1, FLA_BOTTOM );
/*------------------------------------------------------------*/
if ( FLA_Obj_width( a10t ) == 0 )
{
// delta1 = 1;
FLA_Set( FLA_ONE, delta1 );
}
else
{
FLA_Part_1x2( a10t, &a10t_l, &a10t_r, 1, FLA_RIGHT );
// delta1 = conj(a10t_r) / abs(a10t_r);
FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, a10t_r, delta1 );
FLA_Copyt( FLA_NO_TRANSPOSE, a10t_r, absv );
FLA_Absolute_value( absv );
FLA_Inv_scal( absv, delta1 );
// a10t_r = delta1 * a10t_r;
// a10t_r.imag = 0;
FLA_Scalc( FLA_NO_CONJUGATE, delta1, a10t_r );
FLA_Obj_set_imag_part( FLA_ZERO, a10t_r );
// alpha11 = delta1 * alpha11;
FLA_Scalc( FLA_NO_CONJUGATE, delta1, alpha11 );
}
// epsilon1 = conj(alpha11) / abs(alpha11);
FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, alpha11, epsilon1 );
FLA_Copyt( FLA_NO_TRANSPOSE, alpha11, absv );
FLA_Absolute_value( absv );
FLA_Inv_scal( absv, epsilon1 );
// alpha11 = epsilon1 * alpha11;
// alpha11.imag = 0;
FLA_Scalc( FLA_NO_CONJUGATE, epsilon1, alpha11 );
FLA_Obj_set_imag_part( FLA_ZERO, alpha11 );
if ( FLA_Obj_length( a21 ) > 0 )
{
FLA_Part_2x1( a21, &a21_t,
&a21_b, 1, FLA_TOP );
// a21_t = epsilon1 * a21_t;
FLA_Scalc( FLA_NO_CONJUGATE, epsilon1, a21_t );
}
/*------------------------------------------------------------*/
FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, a01, /**/ A02,
a10t, alpha11, /**/ a12t,
/* ************** */ /* ************************ */
&ABL, /**/ &ABR, A20, a21, /**/ A22,
FLA_TL );
FLA_Cont_with_3x1_to_2x1( &dT, d0,
delta1,
/* ** */ /* ****** */
&dB, d2, FLA_TOP );
FLA_Cont_with_3x1_to_2x1( &eT, e0,
epsilon1,
/* ** */ /* ******** */
&eB, e2, FLA_TOP );
}
FLA_Obj_free( &absv );
return FLA_SUCCESS;
}
| FLA_Error FLA_Bidiag_UT_l_scale_diagonals | ( | FLA_Obj | alpha, |
| FLA_Obj | A | ||
| ) |
References FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), and FLA_Obj_row_stride().
Referenced by FLA_Bidiag_UT_scale_diagonals(), and FLA_Tridiag_UT_scale_diagonals().
{
FLA_Datatype datatype;
int m_A;
int rs_A, cs_A;
int i;
datatype = FLA_Obj_datatype( A );
m_A = FLA_Obj_length( A );
rs_A = FLA_Obj_row_stride( A );
cs_A = FLA_Obj_col_stride( A );
switch ( datatype )
{
case FLA_FLOAT:
{
float* buff_A = FLA_FLOAT_PTR( A );
float* buff_alpha = FLA_FLOAT_PTR( alpha );
for ( i = 0; i < m_A; ++i )
{
float* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
float* a21_t = buff_A + (i )*cs_A + (i+1)*rs_A;
int m_ahead = m_A - i - 1;
bl1_sscals( buff_alpha, alpha11 );
if ( m_ahead > 0 )
bl1_sscals( buff_alpha, a21_t );
}
break;
}
case FLA_DOUBLE:
{
double* buff_A = FLA_DOUBLE_PTR( A );
double* buff_alpha = FLA_DOUBLE_PTR( alpha );
for ( i = 0; i < m_A; ++i )
{
double* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
double* a21_t = buff_A + (i )*cs_A + (i+1)*rs_A;
int m_ahead = m_A - i - 1;
bl1_dscals( buff_alpha, alpha11 );
if ( m_ahead > 0 )
bl1_dscals( buff_alpha, a21_t );
}
break;
}
case FLA_COMPLEX:
{
scomplex* buff_A = FLA_COMPLEX_PTR( A );
float* buff_alpha = FLA_FLOAT_PTR( alpha );
for ( i = 0; i < m_A; ++i )
{
scomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
scomplex* a21_t = buff_A + (i )*cs_A + (i+1)*rs_A;
int m_ahead = m_A - i - 1;
bl1_csscals( buff_alpha, alpha11 );
if ( m_ahead > 0 )
bl1_csscals( buff_alpha, a21_t );
}
break;
}
case FLA_DOUBLE_COMPLEX:
{
dcomplex* buff_A = FLA_DOUBLE_COMPLEX_PTR( A );
double* buff_alpha = FLA_DOUBLE_PTR( alpha );
for ( i = 0; i < m_A; ++i )
{
dcomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
dcomplex* a21_t = buff_A + (i )*cs_A + (i+1)*rs_A;
int m_ahead = m_A - i - 1;
bl1_zdscals( buff_alpha, alpha11 );
if ( m_ahead > 0 )
bl1_zdscals( buff_alpha, a21_t );
}
break;
}
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Bidiag_UT_realify | ( | FLA_Obj | A, |
| FLA_Obj | d, | ||
| FLA_Obj | e | ||
| ) |
References FLA_Bidiag_UT_l_realify_opt(), FLA_Bidiag_UT_realify_check(), FLA_Bidiag_UT_u_realify_opt(), FLA_Check_error_level(), FLA_Obj_is_real(), FLA_Obj_length(), FLA_Obj_width(), FLA_ONE, and FLA_Set().
Referenced by FLA_Svd_ext_u_unb_var1(), FLA_Svd_uv_unb_var1(), and FLA_Svd_uv_unb_var2().
{
FLA_Error r_val = FLA_SUCCESS;
if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING )
FLA_Bidiag_UT_realify_check( A, d, e );
if ( FLA_Obj_is_real( A ) )
{
FLA_Set( FLA_ONE, d );
FLA_Set( FLA_ONE, e );
return FLA_SUCCESS;
}
if ( FLA_Obj_length( A ) < FLA_Obj_width( A ) )
//r_val = FLA_Bidiag_UT_l_realify_unb( A, d, e );
r_val = FLA_Bidiag_UT_l_realify_opt( A, d, e );
else
//r_val = FLA_Bidiag_UT_u_realify_unb( A, d, e );
r_val = FLA_Bidiag_UT_u_realify_opt( A, d, e );
return r_val;
}
| FLA_Error FLA_Bidiag_UT_realify_diagonals | ( | FLA_Uplo | uplo, |
| FLA_Obj | a, | ||
| FLA_Obj | b, | ||
| FLA_Obj | d, | ||
| FLA_Obj | e | ||
| ) |
References FLA_Bidiag_UT_realify_diagonals_check(), FLA_Bidiag_UT_realify_diagonals_opt(), and FLA_Check_error_level().
{
FLA_Error r_val = FLA_SUCCESS;
if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING )
FLA_Bidiag_UT_realify_diagonals_check( uplo, a, b, d, e );
if ( uplo == FLA_LOWER_TRIANGULAR )
r_val = FLA_Bidiag_UT_realify_diagonals_opt( a, b, d, e );
else
r_val = FLA_Bidiag_UT_realify_diagonals_opt( a, b, e, d );
return r_val;
}
References bl1_dsetv(), bl1_ssetv(), BLIS1_CONJUGATE, FLA_Obj_datatype(), FLA_Obj_vector_dim(), FLA_Obj_vector_inc(), FLA_ONE, FLA_ZERO, scomplex::imag, and dcomplex::imag.
Referenced by FLA_Bidiag_UT_realify_diagonals().
{
FLA_Datatype datatype;
int i, m, inc_a, inc_b, inc_d, inc_e;
datatype = FLA_Obj_datatype( a );
m = FLA_Obj_vector_dim( a );
inc_a = FLA_Obj_vector_inc( a );
inc_b = ( m > 1 ? FLA_Obj_vector_inc( b ) : 0 );
inc_d = FLA_Obj_vector_inc( d );
inc_e = FLA_Obj_vector_inc( e );
switch ( datatype )
{
case FLA_FLOAT:
{
float* buff_d = FLA_FLOAT_PTR( d );
float* buff_e = FLA_FLOAT_PTR( e );
float* buff_1 = FLA_FLOAT_PTR( FLA_ONE );
bl1_ssetv( m,
buff_1,
buff_d, inc_d );
bl1_ssetv( m,
buff_1,
buff_e, inc_e );
break;
}
case FLA_DOUBLE:
{
double* buff_d = FLA_DOUBLE_PTR( d );
double* buff_e = FLA_DOUBLE_PTR( e );
double* buff_1 = FLA_DOUBLE_PTR( FLA_ONE );
bl1_dsetv( m,
buff_1,
buff_d, inc_d );
bl1_dsetv( m,
buff_1,
buff_e, inc_e );
break;
}
case FLA_COMPLEX:
{
scomplex* buff_a = FLA_COMPLEX_PTR( a );
scomplex* buff_b = ( m > 1 ? FLA_COMPLEX_PTR( b ) : NULL );
scomplex* buff_d = FLA_COMPLEX_PTR( d );
scomplex* buff_e = FLA_COMPLEX_PTR( e );
scomplex* buff_1 = FLA_COMPLEX_PTR( FLA_ONE );
float* buff_0 = FLA_FLOAT_PTR( FLA_ZERO );
for ( i = 0; i < m; ++i )
{
scomplex* alpha1 = buff_a + (i )*inc_a;
scomplex* delta1 = buff_d + (i )*inc_d;
scomplex* epsilon1 = buff_e + (i )*inc_e;
scomplex absv;
if ( i == 0 )
{
*delta1 = *buff_1;
}
else
{
scomplex* beta1 = buff_b + (i-1)*inc_b;
if ( beta1->imag == 0.0F )
*delta1 = *buff_1;
else
{
bl1_ccopys( BLIS1_CONJUGATE, beta1, delta1 );
bl1_cabsval2( beta1, &absv );
bl1_cinvscals( &absv, delta1 );
bl1_cscals( delta1, beta1 );
beta1->imag = *buff_0;
bl1_cscals( delta1, alpha1 );
}
}
if ( alpha1->imag == 0.0F )
*epsilon1 = *buff_1;
else
{
bl1_ccopys( BLIS1_CONJUGATE, alpha1, epsilon1 );
bl1_cabsval2( alpha1, &absv );
bl1_cinvscals( &absv, epsilon1 );
bl1_cscals( epsilon1, alpha1 );
alpha1->imag = *buff_0;
}
if ( i < ( m - 1 ) )
{
scomplex* beta2 = buff_b + (i )*inc_b;
bl1_cscals( epsilon1, beta2 );
}
}
break;
}
case FLA_DOUBLE_COMPLEX:
{
dcomplex* buff_a = FLA_DOUBLE_COMPLEX_PTR( a );
dcomplex* buff_b = ( m > 1 ? FLA_DOUBLE_COMPLEX_PTR( b ) : NULL );
dcomplex* buff_d = FLA_DOUBLE_COMPLEX_PTR( d );
dcomplex* buff_e = FLA_DOUBLE_COMPLEX_PTR( e );
dcomplex* buff_1 = FLA_DOUBLE_COMPLEX_PTR( FLA_ONE );
double* buff_0 = FLA_DOUBLE_PTR( FLA_ZERO );
for ( i = 0; i < m; ++i )
{
dcomplex* alpha1 = buff_a + (i )*inc_a;
dcomplex* delta1 = buff_d + (i )*inc_d;
dcomplex* epsilon1 = buff_e + (i )*inc_e;
dcomplex absv;
if ( i == 0 )
{
*delta1 = *buff_1;
}
else
{
dcomplex* beta1 = buff_b + (i-1)*inc_b;
bl1_zcopys( BLIS1_CONJUGATE, beta1, delta1 );
bl1_zabsval2( beta1, &absv );
bl1_zinvscals( &absv, delta1 );
bl1_zscals( delta1, beta1 );
beta1->imag = *buff_0;
bl1_zscals( delta1, alpha1 );
}
bl1_zcopys( BLIS1_CONJUGATE, alpha1, epsilon1 );
bl1_zabsval2( alpha1, &absv );
bl1_zinvscals( &absv, epsilon1 );
bl1_zscals( epsilon1, alpha1 );
alpha1->imag = *buff_0;
if ( i < ( m - 1 ) )
{
dcomplex* beta2 = buff_b + (i )*inc_b;
bl1_zscals( epsilon1, beta2 );
}
}
break;
}
}
return FLA_SUCCESS;
}
References FLA_Bidiag_UT_recover_tau_check(), FLA_Bidiag_UT_recover_tau_panel(), and FLA_Check_error_level().
{
if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING )
FLA_Bidiag_UT_recover_tau_check( TU, TV, tu, tv );
FLA_Bidiag_UT_recover_tau_panel( TU, tu );
FLA_Bidiag_UT_recover_tau_panel( TV, tv );
return FLA_SUCCESS;
}
| FLA_Error FLA_Bidiag_UT_scale_diagonals | ( | FLA_Obj | alpha, |
| FLA_Obj | A | ||
| ) |
References FLA_Bidiag_UT_l_scale_diagonals(), FLA_Bidiag_UT_scale_diagonals_check(), FLA_Bidiag_UT_u_scale_diagonals(), FLA_Check_error_level(), FLA_Obj_length(), and FLA_Obj_width().
{
FLA_Error r_val = FLA_SUCCESS;
if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING )
FLA_Bidiag_UT_scale_diagonals_check( alpha, A );
if ( FLA_Obj_length( A ) >= FLA_Obj_width( A ) )
r_val = FLA_Bidiag_UT_u_scale_diagonals( alpha, A );
else
r_val = FLA_Bidiag_UT_l_scale_diagonals( alpha, A );
return r_val;
}
| FLA_Error FLA_Bidiag_UT_u | ( | FLA_Obj | A, |
| FLA_Obj | TU, | ||
| FLA_Obj | TV, | ||
| fla_bidiagut_t * | cntl | ||
| ) |
References FLA_Bidiag_UT_u_blf_var2(), FLA_Bidiag_UT_u_blf_var3(), FLA_Bidiag_UT_u_blf_var4(), FLA_Bidiag_UT_u_blk_var1(), FLA_Bidiag_UT_u_blk_var2(), FLA_Bidiag_UT_u_blk_var3(), FLA_Bidiag_UT_u_blk_var4(), FLA_Bidiag_UT_u_blk_var5(), FLA_Bidiag_UT_u_opt_var1(), FLA_Bidiag_UT_u_opt_var2(), FLA_Bidiag_UT_u_opt_var3(), FLA_Bidiag_UT_u_opt_var4(), FLA_Bidiag_UT_u_opt_var5(), FLA_Bidiag_UT_u_unb_var1(), FLA_Bidiag_UT_u_unb_var2(), FLA_Bidiag_UT_u_unb_var3(), FLA_Bidiag_UT_u_unb_var4(), and FLA_Bidiag_UT_u_unb_var5().
Referenced by FLA_Bidiag_UT_internal().
{
FLA_Error r_val = FLA_SUCCESS;
if ( FLA_Cntl_variant( cntl ) == FLA_UNBLOCKED_VARIANT1 )
{
r_val = FLA_Bidiag_UT_u_unb_var1( A, TU, TV );
}
else if ( FLA_Cntl_variant( cntl ) == FLA_UNBLOCKED_VARIANT2 )
{
r_val = FLA_Bidiag_UT_u_unb_var2( A, TU, TV );
}
else if ( FLA_Cntl_variant( cntl ) == FLA_UNBLOCKED_VARIANT3 )
{
r_val = FLA_Bidiag_UT_u_unb_var3( A, TU, TV );
}
else if ( FLA_Cntl_variant( cntl ) == FLA_UNBLOCKED_VARIANT4 )
{
r_val = FLA_Bidiag_UT_u_unb_var4( A, TU, TV );
}
else if ( FLA_Cntl_variant( cntl ) == FLA_UNBLOCKED_VARIANT5 )
{
r_val = FLA_Bidiag_UT_u_unb_var5( A, TU, TV );
}
else if ( FLA_Cntl_variant( cntl ) == FLA_UNB_OPT_VARIANT1 )
{
r_val = FLA_Bidiag_UT_u_opt_var1( A, TU, TV );
}
else if ( FLA_Cntl_variant( cntl ) == FLA_UNB_OPT_VARIANT2 )
{
r_val = FLA_Bidiag_UT_u_opt_var2( A, TU, TV );
}
else if ( FLA_Cntl_variant( cntl ) == FLA_UNB_OPT_VARIANT3 )
{
r_val = FLA_Bidiag_UT_u_opt_var3( A, TU, TV );
}
else if ( FLA_Cntl_variant( cntl ) == FLA_UNB_OPT_VARIANT4 )
{
r_val = FLA_Bidiag_UT_u_opt_var4( A, TU, TV );
}
else if ( FLA_Cntl_variant( cntl ) == FLA_UNB_OPT_VARIANT5 )
{
r_val = FLA_Bidiag_UT_u_opt_var5( A, TU, TV );
}
else if ( FLA_Cntl_variant( cntl ) == FLA_BLOCKED_VARIANT1 )
{
r_val = FLA_Bidiag_UT_u_blk_var1( A, TU, TV );
}
else if ( FLA_Cntl_variant( cntl ) == FLA_BLOCKED_VARIANT2 )
{
r_val = FLA_Bidiag_UT_u_blk_var2( A, TU, TV );
}
else if ( FLA_Cntl_variant( cntl ) == FLA_BLOCKED_VARIANT3 )
{
r_val = FLA_Bidiag_UT_u_blk_var3( A, TU, TV );
}
else if ( FLA_Cntl_variant( cntl ) == FLA_BLOCKED_VARIANT4 )
{
r_val = FLA_Bidiag_UT_u_blk_var4( A, TU, TV );
}
else if ( FLA_Cntl_variant( cntl ) == FLA_BLOCKED_VARIANT5 )
{
r_val = FLA_Bidiag_UT_u_blk_var5( A, TU, TV );
}
else if ( FLA_Cntl_variant( cntl ) == FLA_BLK_FUS_VARIANT2 )
{
r_val = FLA_Bidiag_UT_u_blf_var2( A, TU, TV );
}
else if ( FLA_Cntl_variant( cntl ) == FLA_BLK_FUS_VARIANT3 )
{
r_val = FLA_Bidiag_UT_u_blf_var3( A, TU, TV );
}
else if ( FLA_Cntl_variant( cntl ) == FLA_BLK_FUS_VARIANT4 )
{
r_val = FLA_Bidiag_UT_u_blf_var4( A, TU, TV );
}
else
{
FLA_Check_error_code( FLA_NOT_YET_IMPLEMENTED );
}
return r_val;
}
| FLA_Error FLA_Bidiag_UT_u_extract_diagonals | ( | FLA_Obj | A, |
| FLA_Obj | d, | ||
| FLA_Obj | e | ||
| ) |
References FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_row_stride(), FLA_Obj_vector_inc(), and FLA_Obj_width().
Referenced by FLA_Bidiag_UT_extract_diagonals(), and FLA_Tridiag_UT_extract_diagonals().
{
FLA_Datatype datatype;
int n_A;
int rs_A, cs_A;
int inc_d;
int inc_e;
int i;
datatype = FLA_Obj_datatype( A );
n_A = FLA_Obj_width( A );
rs_A = FLA_Obj_row_stride( A );
cs_A = FLA_Obj_col_stride( A );
inc_d = FLA_Obj_vector_inc( d );
if ( n_A != 1 )
inc_e = FLA_Obj_vector_inc( e );
else
inc_e = 0;
switch ( datatype )
{
case FLA_FLOAT:
{
float* buff_A = FLA_FLOAT_PTR( A );
float* buff_d = FLA_FLOAT_PTR( d );
float* buff_e = ( n_A != 1 ? FLA_FLOAT_PTR( e ) : NULL );
for ( i = 0; i < n_A; ++i )
{
float* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
float* a12t_l = buff_A + (i+1)*cs_A + (i )*rs_A;
float* delta1 = buff_d + (i )*inc_d;
float* epsilon1 = buff_e + (i )*inc_e;
int n_ahead = n_A - i - 1;
// delta1 = alpha11;
*delta1 = *alpha11;
// epsilon1 = a12t_l;
if ( n_ahead > 0 )
*epsilon1 = *a12t_l;
}
break;
}
case FLA_DOUBLE:
{
double* buff_A = FLA_DOUBLE_PTR( A );
double* buff_d = FLA_DOUBLE_PTR( d );
double* buff_e = ( n_A != 1 ? FLA_DOUBLE_PTR( e ) : NULL );
for ( i = 0; i < n_A; ++i )
{
double* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
double* a12t_l = buff_A + (i+1)*cs_A + (i )*rs_A;
double* delta1 = buff_d + (i )*inc_d;
double* epsilon1 = buff_e + (i )*inc_e;
int n_ahead = n_A - i - 1;
// delta1 = alpha11;
*delta1 = *alpha11;
// epsilon1 = a12t_l;
if ( n_ahead > 0 )
*epsilon1 = *a12t_l;
}
break;
}
case FLA_COMPLEX:
{
scomplex* buff_A = FLA_COMPLEX_PTR( A );
scomplex* buff_d = FLA_COMPLEX_PTR( d );
scomplex* buff_e = ( n_A != 1 ? FLA_COMPLEX_PTR( e ) : NULL );
for ( i = 0; i < n_A; ++i )
{
scomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
scomplex* a12t_l = buff_A + (i+1)*cs_A + (i )*rs_A;
scomplex* delta1 = buff_d + (i )*inc_d;
scomplex* epsilon1 = buff_e + (i )*inc_e;
int n_ahead = n_A - i - 1;
// delta1 = alpha11;
*delta1 = *alpha11;
// epsilon1 = a12t_l;
if ( n_ahead > 0 )
*epsilon1 = *a12t_l;
}
break;
}
case FLA_DOUBLE_COMPLEX:
{
dcomplex* buff_A = FLA_DOUBLE_COMPLEX_PTR( A );
dcomplex* buff_d = FLA_DOUBLE_COMPLEX_PTR( d );
dcomplex* buff_e = ( n_A != 1 ? FLA_DOUBLE_COMPLEX_PTR( e ) : NULL );
for ( i = 0; i < n_A; ++i )
{
dcomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
dcomplex* a12t_l = buff_A + (i+1)*cs_A + (i )*rs_A;
dcomplex* delta1 = buff_d + (i )*inc_d;
dcomplex* epsilon1 = buff_e + (i )*inc_e;
int n_ahead = n_A - i - 1;
// delta1 = alpha11;
*delta1 = *alpha11;
// epsilon1 = a12t_l;
if ( n_ahead > 0 )
*epsilon1 = *a12t_l;
}
break;
}
}
return FLA_SUCCESS;
}
References FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_row_stride(), FLA_Obj_vector_inc(), FLA_Obj_width(), scomplex::real, and dcomplex::real.
Referenced by FLA_Bidiag_UT_extract_real_diagonals(), and FLA_Tridiag_UT_extract_real_diagonals().
{
FLA_Datatype datatype;
int n_A;
int rs_A, cs_A;
int inc_d;
int inc_e;
int i;
datatype = FLA_Obj_datatype( A );
n_A = FLA_Obj_width( A );
rs_A = FLA_Obj_row_stride( A );
cs_A = FLA_Obj_col_stride( A );
inc_d = FLA_Obj_vector_inc( d );
if ( n_A != 1 )
inc_e = FLA_Obj_vector_inc( e );
else
inc_e = 0;
switch ( datatype )
{
case FLA_FLOAT:
{
float* buff_A = FLA_FLOAT_PTR( A );
float* buff_d = FLA_FLOAT_PTR( d );
float* buff_e = ( n_A != 1 ? FLA_FLOAT_PTR( e ) : NULL );
for ( i = 0; i < n_A; ++i )
{
float* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
float* a12t_l = buff_A + (i+1)*cs_A + (i )*rs_A;
float* delta1 = buff_d + (i )*inc_d;
float* epsilon1 = buff_e + (i )*inc_e;
int n_ahead = n_A - i - 1;
// delta1 = alpha11;
*delta1 = *alpha11;
// epsilon1 = a12t_l;
if ( n_ahead > 0 )
*epsilon1 = *a12t_l;
}
break;
}
case FLA_DOUBLE:
{
double* buff_A = FLA_DOUBLE_PTR( A );
double* buff_d = FLA_DOUBLE_PTR( d );
double* buff_e = ( n_A != 1 ? FLA_DOUBLE_PTR( e ) : NULL );
for ( i = 0; i < n_A; ++i )
{
double* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
double* a12t_l = buff_A + (i+1)*cs_A + (i )*rs_A;
double* delta1 = buff_d + (i )*inc_d;
double* epsilon1 = buff_e + (i )*inc_e;
int n_ahead = n_A - i - 1;
// delta1 = alpha11;
*delta1 = *alpha11;
// epsilon1 = a12t_l;
if ( n_ahead > 0 )
*epsilon1 = *a12t_l;
}
break;
}
case FLA_COMPLEX:
{
scomplex* buff_A = FLA_COMPLEX_PTR( A );
float* buff_d = FLA_FLOAT_PTR( d );
float* buff_e = ( n_A != 1 ? FLA_FLOAT_PTR( e ) : NULL );
for ( i = 0; i < n_A; ++i )
{
scomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
scomplex* a12t_l = buff_A + (i+1)*cs_A + (i )*rs_A;
float* delta1 = buff_d + (i )*inc_d;
float* epsilon1 = buff_e + (i )*inc_e;
int n_ahead = n_A - i - 1;
// delta1 = alpha11;
*delta1 = alpha11->real;
// epsilon1 = a12t_l;
if ( n_ahead > 0 )
*epsilon1 = a12t_l->real;
}
break;
}
case FLA_DOUBLE_COMPLEX:
{
dcomplex* buff_A = FLA_DOUBLE_COMPLEX_PTR( A );
double* buff_d = FLA_DOUBLE_PTR( d );
double* buff_e = ( n_A != 1 ? FLA_DOUBLE_PTR( e ) : NULL );
for ( i = 0; i < n_A; ++i )
{
dcomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
dcomplex* a12t_l = buff_A + (i+1)*cs_A + (i )*rs_A;
double* delta1 = buff_d + (i )*inc_d;
double* epsilon1 = buff_e + (i )*inc_e;
int n_ahead = n_A - i - 1;
// delta1 = alpha11;
*delta1 = alpha11->real;
// epsilon1 = a12t_l;
if ( n_ahead > 0 )
*epsilon1 = a12t_l->real;
}
break;
}
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Bidiag_UT_u_realify_opt | ( | FLA_Obj | A, |
| FLA_Obj | d, | ||
| FLA_Obj | e | ||
| ) |
References bl1_dsetv(), bl1_ssetv(), BLIS1_CONJUGATE, FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_min_dim(), FLA_Obj_row_stride(), FLA_Obj_vector_inc(), FLA_Obj_width(), FLA_ONE, FLA_ZERO, scomplex::imag, and dcomplex::imag.
Referenced by FLA_Bidiag_UT_realify().
{
FLA_Datatype datatype;
int m_A, n_A;
int min_m_n;
int rs_A, cs_A;
int inc_d;
int inc_e;
int i;
datatype = FLA_Obj_datatype( A );
m_A = FLA_Obj_length( A );
n_A = FLA_Obj_width( A );
min_m_n = FLA_Obj_min_dim( A );
rs_A = FLA_Obj_row_stride( A );
cs_A = FLA_Obj_col_stride( A );
inc_d = FLA_Obj_vector_inc( d );
inc_e = FLA_Obj_vector_inc( e );
switch ( datatype )
{
case FLA_FLOAT:
{
float* buff_d = FLA_FLOAT_PTR( d );
float* buff_e = FLA_FLOAT_PTR( e );
float* buff_1 = FLA_FLOAT_PTR( FLA_ONE );
bl1_ssetv( min_m_n,
buff_1,
buff_d, inc_d );
bl1_ssetv( min_m_n,
buff_1,
buff_e, inc_e );
break;
}
case FLA_DOUBLE:
{
double* buff_d = FLA_DOUBLE_PTR( d );
double* buff_e = FLA_DOUBLE_PTR( e );
double* buff_1 = FLA_DOUBLE_PTR( FLA_ONE );
bl1_dsetv( min_m_n,
buff_1,
buff_d, inc_d );
bl1_dsetv( min_m_n,
buff_1,
buff_e, inc_e );
break;
}
case FLA_COMPLEX:
{
scomplex* buff_A = FLA_COMPLEX_PTR( A );
scomplex* buff_d = FLA_COMPLEX_PTR( d );
scomplex* buff_e = FLA_COMPLEX_PTR( e );
scomplex* buff_1 = FLA_COMPLEX_PTR( FLA_ONE );
float* buff_0 = FLA_FLOAT_PTR( FLA_ZERO );
for ( i = 0; i < min_m_n; ++i )
{
scomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
scomplex* delta1 = buff_d + (i )*inc_d;
scomplex* epsilon1 = buff_e + (i )*inc_e;
scomplex absv;
int n_ahead = n_A - i - 1;
int n_behind = i;
if ( n_behind == 0 )
{
// FLA_Set( FLA_ONE, epsilon1 );
*epsilon1 = *buff_1;
}
else
{
scomplex* a01_b = buff_A + (i )*cs_A + (i-1)*rs_A;
// FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, a01_b, epsilon1 );
// FLA_Copyt( FLA_NO_TRANSPOSE, a01_b, absv );
// FLA_Absolute_value( absv );
// FLA_Inv_scal( absv, epsilon1 );
bl1_ccopys( BLIS1_CONJUGATE, a01_b, epsilon1 );
bl1_cabsval2( a01_b, &absv );
bl1_cinvscals( &absv, epsilon1 );
// FLA_Scalc( FLA_NO_CONJUGATE, epsilon1, a01_b );
// FLA_Obj_set_imag_part( FLA_ZERO, a01_b );
bl1_cscals( epsilon1, a01_b );
a01_b->imag = *buff_0;
// FLA_Scalc( FLA_NO_CONJUGATE, epsilon1, alpha11 );
bl1_cscals( epsilon1, alpha11 );
}
// FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, alpha11, delta1 );
// FLA_Copyt( FLA_NO_TRANSPOSE, alpha11, absv );
// FLA_Absolute_value( absv );
// FLA_Inv_scal( absv, delta1 );
bl1_ccopys( BLIS1_CONJUGATE, alpha11, delta1 );
bl1_cabsval2( alpha11, &absv );
bl1_cinvscals( &absv, delta1 );
// FLA_Scalc( FLA_NO_CONJUGATE, delta1, alpha11 );
// FLA_Obj_set_imag_part( FLA_ZERO, alpha11 );
bl1_cscals( delta1, alpha11 );
alpha11->imag = *buff_0;
if ( n_ahead > 0 )
{
scomplex* a12t_l = buff_A + (i+1)*cs_A + (i )*rs_A;
// FLA_Scalc( FLA_NO_CONJUGATE, delta1, a12t_l );
bl1_cscals( delta1, a12t_l );
}
}
break;
}
case FLA_DOUBLE_COMPLEX:
{
dcomplex* buff_A = FLA_DOUBLE_COMPLEX_PTR( A );
dcomplex* buff_d = FLA_DOUBLE_COMPLEX_PTR( d );
dcomplex* buff_e = FLA_DOUBLE_COMPLEX_PTR( e );
dcomplex* buff_1 = FLA_DOUBLE_COMPLEX_PTR( FLA_ONE );
double* buff_0 = FLA_DOUBLE_PTR( FLA_ZERO );
for ( i = 0; i < min_m_n; ++i )
{
dcomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
dcomplex* delta1 = buff_d + (i )*inc_d;
dcomplex* epsilon1 = buff_e + (i )*inc_e;
dcomplex absv;
int n_ahead = n_A - i - 1;
int n_behind = i;
if ( n_behind == 0 )
{
// FLA_Set( FLA_ONE, epsilon1 );
*epsilon1 = *buff_1;
}
else
{
dcomplex* a01_b = buff_A + (i )*cs_A + (i-1)*rs_A;
// FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, a01_b, epsilon1 );
// FLA_Copyt( FLA_NO_TRANSPOSE, a01_b, absv );
// FLA_Absolute_value( absv );
// FLA_Inv_scal( absv, epsilon1 );
bl1_zcopys( BLIS1_CONJUGATE, a01_b, epsilon1 );
bl1_zabsval2( a01_b, &absv );
bl1_zinvscals( &absv, epsilon1 );
// FLA_Scalc( FLA_NO_CONJUGATE, epsilon1, a01_b );
// FLA_Obj_set_imag_part( FLA_ZERO, a01_b );
bl1_zscals( epsilon1, a01_b );
a01_b->imag = *buff_0;
// FLA_Scalc( FLA_NO_CONJUGATE, epsilon1, alpha11 );
bl1_zscals( epsilon1, alpha11 );
}
// FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, alpha11, delta1 );
// FLA_Copyt( FLA_NO_TRANSPOSE, alpha11, absv );
// FLA_Absolute_value( absv );
// FLA_Inv_scal( absv, delta1 );
bl1_zcopys( BLIS1_CONJUGATE, alpha11, delta1 );
bl1_zabsval2( alpha11, &absv );
bl1_zinvscals( &absv, delta1 );
// FLA_Scalc( FLA_NO_CONJUGATE, delta1, alpha11 );
// FLA_Obj_set_imag_part( FLA_ZERO, alpha11 );
bl1_zscals( delta1, alpha11 );
alpha11->imag = *buff_0;
if ( n_ahead > 0 )
{
dcomplex* a12t_l = buff_A + (i+1)*cs_A + (i )*rs_A;
// FLA_Scalc( FLA_NO_CONJUGATE, delta1, a12t_l );
bl1_zscals( delta1, a12t_l );
}
}
break;
}
}
return FLA_SUCCESS;
}
| FLA_Error FLA_Bidiag_UT_u_realify_unb | ( | FLA_Obj | A, |
| FLA_Obj | d, | ||
| FLA_Obj | e | ||
| ) |
References FLA_Absolute_value(), FLA_Cont_with_3x1_to_2x1(), FLA_Cont_with_3x3_to_2x2(), FLA_Copyt(), FLA_Inv_scal(), FLA_Obj_create(), FLA_Obj_datatype(), FLA_Obj_free(), FLA_Obj_length(), FLA_Obj_min_dim(), FLA_Obj_set_imag_part(), FLA_Obj_width(), FLA_ONE, FLA_Part_1x2(), FLA_Part_2x1(), FLA_Part_2x2(), FLA_Repart_2x1_to_3x1(), FLA_Repart_2x2_to_3x3(), FLA_Scalc(), FLA_Set(), and FLA_ZERO.
{
FLA_Obj ATL, ATR, A00, a01, A02,
ABL, ABR, a10t, alpha11, a12t,
A20, a21, A22;
FLA_Obj dT, d0,
dB, delta1,
d2;
FLA_Obj eT, e0,
eB, epsilon1,
e2;
FLA_Obj a01_t,
a01_b;
FLA_Obj a12t_l, a12t_r;
FLA_Obj absv;
FLA_Obj_create( FLA_Obj_datatype( A ), 1, 1, 0, 0, &absv );
FLA_Part_2x2( A, &ATL, &ATR,
&ABL, &ABR, 0, 0, FLA_TL );
FLA_Part_2x1( d, &dT,
&dB, 0, FLA_TOP );
FLA_Part_2x1( e, &eT,
&eB, 0, FLA_TOP );
while ( FLA_Obj_min_dim( ABR ) > 0 )
{
FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &a01, &A02,
/* ************* */ /* ************************** */
&a10t, /**/ &alpha11, &a12t,
ABL, /**/ ABR, &A20, /**/ &a21, &A22,
1, 1, FLA_BR );
FLA_Repart_2x1_to_3x1( dT, &d0,
/* ** */ /* ****** */
&delta1,
dB, &d2, 1, FLA_BOTTOM );
FLA_Repart_2x1_to_3x1( eT, &e0,
/* ** */ /* ******** */
&epsilon1,
eB, &e2, 1, FLA_BOTTOM );
/*------------------------------------------------------------*/
if ( FLA_Obj_length( a01 ) == 0 )
{
// epsilon1 = 1;
FLA_Set( FLA_ONE, epsilon1 );
}
else
{
FLA_Part_2x1( a01, &a01_t,
&a01_b, 1, FLA_BOTTOM );
// epsilon1 = conj(a01_b) / abs(a01_b);
FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, a01_b, epsilon1 );
FLA_Copyt( FLA_NO_TRANSPOSE, a01_b, absv );
FLA_Absolute_value( absv );
FLA_Inv_scal( absv, epsilon1 );
// a01_b = epsilon1 * a01_b;
// a01_b.imag = 0;
FLA_Scalc( FLA_NO_CONJUGATE, epsilon1, a01_b );
FLA_Obj_set_imag_part( FLA_ZERO, a01_b );
// alpha11 = epsilon1 * alpha11;
FLA_Scalc( FLA_NO_CONJUGATE, epsilon1, alpha11 );
}
// delta1 = conj(alpha11) / abs(alpha11);
FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, alpha11, delta1 );
FLA_Copyt( FLA_NO_TRANSPOSE, alpha11, absv );
FLA_Absolute_value( absv );
FLA_Inv_scal( absv, delta1 );
// alpha11 = delta1 * alpha11;
// alpha11.imag = 0;
FLA_Scalc( FLA_NO_CONJUGATE, delta1, alpha11 );
FLA_Obj_set_imag_part( FLA_ZERO, alpha11 );
if ( FLA_Obj_width( a12t ) > 0 )
{
FLA_Part_1x2( a12t, &a12t_l, &a12t_r, 1, FLA_LEFT );
// a12t_l = delta1 * a12t_l;
FLA_Scalc( FLA_NO_CONJUGATE, delta1, a12t_l );
}
/*------------------------------------------------------------*/
FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, a01, /**/ A02,
a10t, alpha11, /**/ a12t,
/* ************** */ /* ************************ */
&ABL, /**/ &ABR, A20, a21, /**/ A22,
FLA_TL );
FLA_Cont_with_3x1_to_2x1( &dT, d0,
delta1,
/* ** */ /* ****** */
&dB, d2, FLA_TOP );
FLA_Cont_with_3x1_to_2x1( &eT, e0,
epsilon1,
/* ** */ /* ******** */
&eB, e2, FLA_TOP );
}
FLA_Obj_free( &absv );
return FLA_SUCCESS;
}
| FLA_Error FLA_Bidiag_UT_u_scale_diagonals | ( | FLA_Obj | alpha, |
| FLA_Obj | A | ||
| ) |
References FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_row_stride(), and FLA_Obj_width().
Referenced by FLA_Bidiag_UT_scale_diagonals(), and FLA_Tridiag_UT_scale_diagonals().
{
FLA_Datatype datatype;
int n_A;
int rs_A, cs_A;
int i;
datatype = FLA_Obj_datatype( A );
n_A = FLA_Obj_width( A );
rs_A = FLA_Obj_row_stride( A );
cs_A = FLA_Obj_col_stride( A );
switch ( datatype )
{
case FLA_FLOAT:
{
float* buff_A = FLA_FLOAT_PTR( A );
float* buff_alpha = FLA_FLOAT_PTR( alpha );
for ( i = 0; i < n_A; ++i )
{
float* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
float* a12t_l = buff_A + (i+1)*cs_A + (i )*rs_A;
int n_ahead = n_A - i - 1;
bl1_sscals( buff_alpha, alpha11 );
if ( n_ahead > 0 )
bl1_sscals( buff_alpha, a12t_l );
}
break;
}
case FLA_DOUBLE:
{
double* buff_A = FLA_DOUBLE_PTR( A );
double* buff_alpha = FLA_DOUBLE_PTR( alpha );
for ( i = 0; i < n_A; ++i )
{
double* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
double* a12t_l = buff_A + (i+1)*cs_A + (i )*rs_A;
int n_ahead = n_A - i - 1;
bl1_dscals( buff_alpha, alpha11 );
if ( n_ahead > 0 )
bl1_dscals( buff_alpha, a12t_l );
}
break;
}
case FLA_COMPLEX:
{
scomplex* buff_A = FLA_COMPLEX_PTR( A );
float* buff_alpha = FLA_FLOAT_PTR( alpha );
for ( i = 0; i < n_A; ++i )
{
scomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
scomplex* a12t_l = buff_A + (i+1)*cs_A + (i )*rs_A;
int n_ahead = n_A - i - 1;
bl1_csscals( buff_alpha, alpha11 );
if ( n_ahead > 0 )
bl1_csscals( buff_alpha, a12t_l );
}
break;
}
case FLA_DOUBLE_COMPLEX:
{
dcomplex* buff_A = FLA_DOUBLE_COMPLEX_PTR( A );
double* buff_alpha = FLA_DOUBLE_PTR( alpha );
for ( i = 0; i < n_A; ++i )
{
dcomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
dcomplex* a12t_l = buff_A + (i+1)*cs_A + (i )*rs_A;
int n_ahead = n_A - i - 1;
bl1_zdscals( buff_alpha, alpha11 );
if ( n_ahead > 0 )
bl1_zdscals( buff_alpha, a12t_l );
}
break;
}
}
return FLA_SUCCESS;
}
1.7.6.1