|
libflame
revision_anchor
|
Go to the source code of this file.
Functions | |
| FLA_Error | FLA_QR_UT_piv_unb_var1 (FLA_Obj A, FLA_Obj T, FLA_Obj w, FLA_Obj p) |
| FLA_Error | FLA_QR_UT_piv_blk_var1 (FLA_Obj A, FLA_Obj T, FLA_Obj w, FLA_Obj p, fla_qrut_t *cntl) |
| FLA_Error | FLA_QR_UT_piv_unb_var2 (FLA_Obj A, FLA_Obj T, FLA_Obj w, FLA_Obj p) |
| FLA_Error | FLA_QR_UT_piv_blk_var2 (FLA_Obj A, FLA_Obj T, FLA_Obj w, FLA_Obj p, fla_qrut_t *cntl) |
| FLA_Error | FLA_Apply_H2_UT_piv_row (FLA_Obj tau, FLA_Obj a1t, FLA_Obj u1t, FLA_Obj W, FLA_Obj u2, FLA_Obj A2, FLA_Obj U2, FLA_Obj w1t, FLA_Obj vt) |
| FLA_Error FLA_Apply_H2_UT_piv_row | ( | FLA_Obj | tau, |
| FLA_Obj | a1t, | ||
| FLA_Obj | u1t, | ||
| FLA_Obj | W, | ||
| FLA_Obj | u2, | ||
| FLA_Obj | A2, | ||
| FLA_Obj | U2, | ||
| FLA_Obj | w1t, | ||
| FLA_Obj | vt | ||
| ) |
References FLA_Axpy_external(), FLA_Copy_external(), FLA_Gemvc_external(), FLA_Inv_scalc_external(), FLA_MINUS_ONE, FLA_Obj_min_dim(), FLA_Obj_width(), FLA_ONE, FLA_Part_1x2(), and FLA_ZERO.
Referenced by FLA_QR_UT_piv_unb_var2().
{
// a1t -= u1t W = 1 a1t -1 W^T u1t;
FLA_Gemvc_external( FLA_TRANSPOSE, FLA_NO_CONJUGATE,
FLA_MINUS_ONE, W, u1t, FLA_ONE, a1t );
// w1t := a1t;
FLA_Copy_external( a1t, w1t );
// w1t += u2' A2 = 1 w1t + 1 A2^T conj(u2);
FLA_Gemvc_external( FLA_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A2, u2, FLA_ONE, w1t );
if ( FLA_Obj_min_dim( U2 ) > 0 )
{
FLA_Obj vtR;
// Partition the workspace (a row vector matching the width of a1t)
FLA_Part_1x2( vt, &vt, &vtR, FLA_Obj_width( U2 ), FLA_LEFT );
// vt := u2'U2 = 0 vt + 1 U2^T conj(u2);
FLA_Gemvc_external( FLA_TRANSPOSE, FLA_CONJUGATE,
FLA_ONE, U2, u2, FLA_ZERO, vt );
// w1t -= - vt W = 1 w1t -1 W^T vt;
FLA_Gemvc_external( FLA_TRANSPOSE, FLA_NO_CONJUGATE,
FLA_MINUS_ONE, W, vt, FLA_ONE, w1t );
}
// w1t = w1t / tau;
FLA_Inv_scalc_external( FLA_NO_CONJUGATE, tau, w1t );
// a1t = a1t - w1t;
FLA_Axpy_external( FLA_MINUS_ONE, w1t, a1t );
return FLA_SUCCESS;
}
| FLA_Error FLA_QR_UT_piv_blk_var1 | ( | FLA_Obj | A, |
| FLA_Obj | T, | ||
| FLA_Obj | w, | ||
| FLA_Obj | p, | ||
| fla_qrut_t * | cntl | ||
| ) |
References FLA_Apply_pivots(), FLA_Cont_with_1x3_to_1x2(), FLA_Cont_with_3x1_to_2x1(), FLA_Cont_with_3x3_to_2x2(), FLA_Obj_length(), FLA_Obj_min_dim(), FLA_Part_1x2(), FLA_Part_2x1(), FLA_Part_2x2(), FLA_QR_UT_piv_internal(), FLA_Repart_1x2_to_1x3(), FLA_Repart_2x1_to_3x1(), and FLA_Repart_2x2_to_3x3().
Referenced by FLA_QR_UT_piv_internal().
{
FLA_Obj ATL, ATR, A00, A01, A02,
ABL, ABR, A10, A11, A12,
A20, A21, A22;
FLA_Obj TL, TR, T0, T1, W12;
FLA_Obj T1T, T2B;
FLA_Obj pT, p0,
pB, p1,
p2;
FLA_Obj wT, w0,
wB, w1,
w2;
dim_t b_alg, b;
// Query the algorithmic blocksize by inspecting the length of T.
b_alg = FLA_Obj_length( T );
FLA_Part_2x2( A, &ATL, &ATR,
&ABL, &ABR, 0, 0, FLA_TL );
FLA_Part_1x2( T, &TL, &TR, 0, FLA_LEFT );
FLA_Part_2x1( p, &pT,
&pB, 0, FLA_TOP );
FLA_Part_2x1( w, &wT,
&wB, 0, FLA_TOP );
// Loop over A; T is properly truncated.
while ( FLA_Obj_min_dim( ABR ) > 0 ){
b = min( b_alg, FLA_Obj_min_dim( ABR ) );
FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &A01, &A02,
/* ************* */ /* ******************** */
&A10, /**/ &A11, &A12,
ABL, /**/ ABR, &A20, /**/ &A21, &A22,
b, b, FLA_BR );
FLA_Repart_1x2_to_1x3( TL, /**/ TR, &T0, /**/ &T1, &W12,
b, FLA_RIGHT );
FLA_Repart_2x1_to_3x1( pT, &p0,
/* ** */ /* ** */
&p1,
pB, &p2, b, FLA_BOTTOM );
FLA_Repart_2x1_to_3x1( wT, &w0,
/* ** */ /* ** */
&w1,
wB, &w2, b, FLA_BOTTOM );
/*------------------------------------------------------------*/
FLA_Part_2x1( T1, &T1T,
&T2B, b, FLA_TOP );
// Perform a unblocked (BLAS2-oriented) QR factorization
// with pivoting via the UT transform on ABR:
//
// ABR -> QB1 R11
//
// where:
// - QB1 is formed from UB1 (which is stored column-wise below the
// diagonal of ( A11 A21 )^T and T1T (which is stored to the upper triangle
// of T11).
// - R11 is stored to ( A11 A12 ).
FLA_QR_UT_piv_internal( ABR, T1T, wB, p1,
FLA_Cntl_sub_qrut( cntl ) );
// Apply pivots to previous columns.
FLA_Apply_pivots( FLA_RIGHT, FLA_TRANSPOSE, p1, ATR );
/*------------------------------------------------------------*/
FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, A01, /**/ A02,
A10, A11, /**/ A12,
/* ************** */ /* ****************** */
&ABL, /**/ &ABR, A20, A21, /**/ A22,
FLA_TL );
FLA_Cont_with_1x3_to_1x2( &TL, /**/ &TR, T0, T1, /**/ W12,
FLA_LEFT );
FLA_Cont_with_3x1_to_2x1( &pT, p0,
p1,
/* ** */ /* ** */
&pB, p2, FLA_TOP );
FLA_Cont_with_3x1_to_2x1( &wT, w0,
w1,
/* ** */ /* ** */
&wB, w2, FLA_TOP );
}
return FLA_SUCCESS;
}
| FLA_Error FLA_QR_UT_piv_blk_var2 | ( | FLA_Obj | A, |
| FLA_Obj | T, | ||
| FLA_Obj | w, | ||
| FLA_Obj | p, | ||
| fla_qrut_t * | cntl | ||
| ) |
References FLA_Apply_pivots(), FLA_Cont_with_1x3_to_1x2(), FLA_Cont_with_3x1_to_2x1(), FLA_Cont_with_3x3_to_2x2(), FLA_Gemm_external(), FLA_MINUS_ONE, FLA_Obj_length(), FLA_Obj_min_dim(), FLA_Obj_width(), FLA_ONE, FLA_Part_1x2(), FLA_Part_2x1(), FLA_Part_2x2(), FLA_QR_UT_piv_internal(), FLA_Repart_1x2_to_1x3(), FLA_Repart_2x1_to_3x1(), and FLA_Repart_2x2_to_3x3().
Referenced by FLA_QR_UT_piv_internal().
{
FLA_Obj ATL, ATR, A00, A01, A02,
ABL, ABR, A10, A11, A12,
A20, A21, A22;
FLA_Obj TL, TR, T0, T1, W12;
FLA_Obj TT, TB;
FLA_Obj pT, p0,
pB, p1,
p2;
FLA_Obj wT, w0,
wB, w1,
w2;
dim_t b_alg, b;
// Query the algorithmic blocksize by inspecting the length of T.
b_alg = FLA_Obj_length( T );
FLA_Part_2x2( A, &ATL, &ATR,
&ABL, &ABR, 0, 0, FLA_TL );
FLA_Part_1x2( T, &TL, &TR, 0, FLA_LEFT );
FLA_Part_2x1( p, &pT,
&pB, 0, FLA_TOP );
FLA_Part_2x1( w, &wT,
&wB, 0, FLA_TOP );
while ( FLA_Obj_min_dim( ABR ) > 0 ){
b = min( b_alg, FLA_Obj_min_dim( ABR ) );
FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &A01, &A02,
/* ************* */ /* ******************** */
&A10, /**/ &A11, &A12,
ABL, /**/ ABR, &A20, /**/ &A21, &A22,
b, b, FLA_BR );
FLA_Repart_1x2_to_1x3( TL, /**/ TR, &T0, /**/ &T1, &W12,
b, FLA_RIGHT );
FLA_Repart_2x1_to_3x1( pT, &p0,
/* ** */ /* ** */
&p1,
pB, &p2, b, FLA_BOTTOM );
FLA_Repart_2x1_to_3x1( wT, &w0,
/* ** */ /* ** */
&w1,
wB, &w2, b, FLA_BOTTOM );
/*------------------------------------------------------------*/
// ** Reshape T matrices to match the blocksize b
FLA_Part_2x1( TR, &TT,
&TB, b, FLA_TOP );
// ** Perform a unblocked (BLAS2-oriented) QR factorization
// with pivoting via the UT transform on ABR:
//
// ABR -> QB1 R11
//
// where:
// - QB1 is formed from UB1 (which is stored column-wise below the
// diagonal of ( A11 A21 )^T and the upper-triangle of T1.
// - R11 is stored to ( A11 A12 ).
// - W12 stores T and partial updates for FLA_Apply_Q_UT_piv_var.
FLA_QR_UT_piv_internal( ABR, TT, wB, p1,
FLA_Cntl_sub_qrut( cntl ) );
if ( FLA_Obj_width( A12 ) > 0 )
{
// ** Block update
FLA_Part_2x1( W12, &TT,
&TB, b, FLA_TOP );
FLA_Gemm_external( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
FLA_MINUS_ONE, A21, TT, FLA_ONE, A22 );
}
// ** Apply pivots to previous columns.
FLA_Apply_pivots( FLA_RIGHT, FLA_TRANSPOSE, p1, ATR );
/*------------------------------------------------------------*/
FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, A01, /**/ A02,
A10, A11, /**/ A12,
/* ************** */ /* ****************** */
&ABL, /**/ &ABR, A20, A21, /**/ A22,
FLA_TL );
FLA_Cont_with_1x3_to_1x2( &TL, /**/ &TR, T0, T1, /**/ W12,
FLA_LEFT );
FLA_Cont_with_3x1_to_2x1( &pT, p0,
p1,
/* ** */ /* ** */
&pB, p2, FLA_TOP );
FLA_Cont_with_3x1_to_2x1( &wT, w0,
w1,
/* ** */ /* ** */
&wB, w2, FLA_TOP );
}
return FLA_SUCCESS;
}
References FLA_Amax_external(), FLA_Apply_H2_UT(), FLA_Apply_pivots(), FLA_Cont_with_3x1_to_2x1(), FLA_Cont_with_3x3_to_2x2(), FLA_Copyt_external(), FLA_Gemv_external(), FLA_Househ2_UT(), FLA_Merge_1x2(), FLA_MINUS_ONE, FLA_Obj_le(), FLA_Obj_min_dim(), FLA_Obj_width(), FLA_ONE, FLA_Part_1x2(), FLA_Part_2x1(), FLA_Part_2x2(), FLA_QR_UT_piv_colnorm(), FLA_Repart_2x1_to_3x1(), FLA_Repart_2x2_to_3x3(), FLA_Set(), and FLA_ZERO.
Referenced by FLA_QR_UT_piv_internal().
{
FLA_Obj AL, AR;
FLA_Obj ATL, ATR, A00, a01, A02,
ABL, ABR, a10t, alpha11, a12t,
A20, a21, A22;
FLA_Obj AT, A0,
AB, a1t,
A2;
FLA_Obj AB1, AT1, at1;
FLA_Obj TTL, TTR, T00, t01, T02,
TBL, TBR, t10t, tau11, t12t,
T20, t21, T22;
FLA_Obj pT, p0,
pB, pi1,
p2;
FLA_Obj wT, w0,
wB, omega1,
w2;
dim_t nb = FLA_Obj_width ( A ) - FLA_Obj_width( T );
//dim_t mb = FLA_Obj_length( A ) - FLA_Obj_width( T );
FLA_Part_1x2( A, &AL, &AR, nb, FLA_RIGHT );
FLA_Part_2x2( AL, &ATL, &ATR,
&ABL, &ABR, 0, 0, FLA_TL );
FLA_Part_2x1( AR, &AT,
&AB, 0, FLA_TOP );
FLA_Part_2x2( T, &TTL, &TTR,
&TBL, &TBR, 0, 0, FLA_TL );
FLA_Part_2x1( p, &pT,
&pB, 0, FLA_TOP );
FLA_Part_2x1( w, &wT,
&wB, 0, FLA_TOP );
while ( FLA_Obj_min_dim( ABR ) > 0 ){
FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &a01, &A02,
/* ************* */ /* ************************** */
&a10t, /**/ &alpha11, &a12t,
ABL, /**/ ABR, &A20, /**/ &a21, &A22,
1, 1, FLA_BR );
FLA_Repart_2x1_to_3x1( AT, &A0,
/* ** */ /* *** */
&a1t,
AB, &A2, 1, FLA_BOTTOM );
FLA_Repart_2x2_to_3x3( TTL, /**/ TTR, &T00, /**/ &t01, &T02,
/* ************* */ /* ************************ */
&t10t, /**/ &tau11, &t12t,
TBL, /**/ TBR, &T20, /**/ &t21, &T22,
1, 1, FLA_BR );
FLA_Repart_2x1_to_3x1( pT, &p0,
/* ** */ /* *** */
&pi1,
pB, &p2, 1, FLA_BOTTOM );
FLA_Repart_2x1_to_3x1( wT, &w0,
/* ** */ /* *** */
&omega1,
wB, &w2, 1, FLA_BOTTOM );
/*------------------------------------------------------------*/
// Ignore minus inputs for LAPACK compatability.
if ( FLA_Obj_le( pi1, FLA_ZERO ) == FALSE )
{
// Determine pivot index
FLA_Amax_external( wB, pi1 );
FLA_Apply_pivots( FLA_LEFT, FLA_NO_TRANSPOSE, pi1, wB );
// Apply pivots
FLA_Merge_1x2( ABR, AB, &AB1 );
FLA_Apply_pivots( FLA_RIGHT, FLA_TRANSPOSE, pi1, AB1 );
}
else
{
// Do not pivot.
FLA_Set( FLA_ZERO, pi1 );
}
// Compute tau11 and u21 from alpha11 and a21 such that tau11 and u21
// determine a Householder transform H such that applying H from the
// left to the column vector consisting of alpha11 and a21 annihilates
// the entries in a21 (and updates alpha11).
FLA_Househ2_UT( FLA_LEFT,
alpha11,
a21, tau11 );
// Apply H to (a12t A22)^T
// / a12t \ = H / a12t \
// \ A22 / \ A22 /
//
// where H is formed from tau11 and u21.
FLA_Merge_1x2( A22, A2, &AB1 );
FLA_Merge_1x2( a12t, a1t, &at1 );
FLA_Apply_H2_UT( FLA_LEFT, tau11, a21, at1,
AB1 );
// t01 = a10t' + A20' * u21;
FLA_Copyt_external( FLA_CONJ_TRANSPOSE, a10t, t01 );
FLA_Gemv_external( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ONE, t01 );
// Apply pivots to previous rows
if ( FLA_Obj_le( pi1, FLA_ZERO ) == FALSE )
{
FLA_Merge_1x2( ATR, AT, &AT1 );
FLA_Apply_pivots( FLA_RIGHT, FLA_TRANSPOSE, pi1, AT1 );
}
// Norm downdate w2 = alpha w2 + beta columnwisenorm2(a12t)
FLA_QR_UT_piv_colnorm( FLA_MINUS_ONE, at1, w2 );
/*------------------------------------------------------------*/
FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, a01, /**/ A02,
a10t, alpha11, /**/ a12t,
/* ************** */ /* ************************ */
&ABL, /**/ &ABR, A20, a21, /**/ A22,
FLA_TL );
FLA_Cont_with_3x1_to_2x1( &AT, A0,
a1t,
/* ** */ /* *** */
&AB, A2, FLA_TOP );
FLA_Cont_with_3x3_to_2x2( &TTL, /**/ &TTR, T00, t01, /**/ T02,
t10t, tau11, /**/ t12t,
/* ************** */ /* ********************** */
&TBL, /**/ &TBR, T20, t21, /**/ T22,
FLA_TL );
FLA_Cont_with_3x1_to_2x1( &pT, p0,
pi1,
/* ** */ /* *** */
&pB, p2, FLA_TOP );
FLA_Cont_with_3x1_to_2x1( &wT, w0,
omega1,
/* ** */ /* *** */
&wB, w2, FLA_TOP );
}
return FLA_SUCCESS;
}
References FLA_Amax_external(), FLA_Apply_H2_UT_piv_row(), FLA_Apply_pivots(), FLA_Cont_with_3x1_to_2x1(), FLA_Cont_with_3x3_to_2x2(), FLA_Copyt_external(), FLA_Gemv_external(), FLA_Househ2_UT(), FLA_Merge_2x1(), FLA_MINUS_ONE, FLA_Obj_create(), FLA_Obj_datatype(), FLA_Obj_free(), FLA_Obj_lt(), FLA_Obj_min_dim(), FLA_Obj_width(), FLA_ONE, FLA_Part_2x1(), FLA_Part_2x2(), FLA_QR_UT_piv_colnorm(), FLA_Repart_2x1_to_3x1(), FLA_Repart_2x2_to_3x3(), FLA_Set(), and FLA_ZERO.
Referenced by FLA_QR_UT_piv_internal().
{
FLA_Obj ATL, ATR, A00, a01, A02,
ABL, ABR, a10t, alpha11, a12t,
A20, a21, A22;
FLA_Obj TTL, TTR, T00, t01, T02,
TBL, TBR, t10t, tau11, t12t,
T20, t21, T22;
FLA_Obj pT, p0,
pB, pi1,
p2;
FLA_Obj wT, w0,
wB, omega1,
w2;
FLA_Obj ab1, v;
// Create workspace
FLA_Obj_create( FLA_Obj_datatype( T ), 1, FLA_Obj_width( T ), 0, 0, &v );
FLA_Part_2x2( A, &ATL, &ATR,
&ABL, &ABR, 0, 0, FLA_TL );
FLA_Part_2x2( T, &TTL, &TTR,
&TBL, &TBR, 0, 0, FLA_TL );
FLA_Part_2x1( p, &pT,
&pB, 0, FLA_TOP );
FLA_Part_2x1( w, &wT,
&wB, 0, FLA_TOP );
while ( FLA_Obj_min_dim( pB ) > 0 ) {
FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &a01, &A02,
/* ************* */ /* ************************** */
&a10t, /**/ &alpha11, &a12t,
ABL, /**/ ABR, &A20, /**/ &a21, &A22,
1, 1, FLA_BR );
FLA_Repart_2x2_to_3x3( TTL, /**/ TTR, &T00, /**/ &t01, &T02,
/* ************* */ /* ************************ */
&t10t, /**/ &tau11, &t12t,
TBL, /**/ TBR, &T20, /**/ &t21, &T22,
1, 1, FLA_BR );
FLA_Repart_2x1_to_3x1( pT, &p0,
/* ** */ /* *** */
&pi1,
pB, &p2, 1, FLA_BOTTOM );
FLA_Repart_2x1_to_3x1( wT, &w0,
/* ** */ /* *** */
&omega1,
wB, &w2, 1, FLA_BOTTOM );
/*------------------------------------------------------------*/
// ** Ignore minus inputs for LAPACK compatability.
if ( FLA_Obj_lt( pi1, FLA_ZERO ) == FALSE )
{
// ** Determine pivot index
FLA_Amax_external( wB, pi1 );
// ** BLIS returns -1 if it fails to search the maximum value
if ( FLA_Obj_lt( pi1, FLA_ZERO ) == TRUE )
FLA_Set( FLA_ZERO, pi1 );
// ** Apply a pivot on column norms
FLA_Apply_pivots( FLA_LEFT, FLA_NO_TRANSPOSE, pi1, wB );
// ** Apply a pivot on ABR
FLA_Apply_pivots( FLA_RIGHT, FLA_TRANSPOSE, pi1, ABR );
// ** Apply a pivot on TTR
FLA_Apply_pivots( FLA_RIGHT, FLA_TRANSPOSE, pi1, TTR );
}
else
{
// ** Do not pivot.
FLA_Set( FLA_ZERO, pi1 );
}
// ** Update the pivot column
FLA_Merge_2x1( alpha11,
a21, &ab1 );
// ab1 = ab1 - ABL t01
FLA_Gemv_external( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, ABL, t01, FLA_ONE, ab1 );
// ** Find the householder reflector on that column
FLA_Househ2_UT( FLA_LEFT, alpha11,
a21, tau11 );
// ** Update the pivot row
FLA_Apply_H2_UT_piv_row( tau11, a12t, a10t, T02,
a21, A22, A20, t12t,
v );
// ** Apply pivots on ATR
FLA_Apply_pivots( FLA_RIGHT, FLA_TRANSPOSE, pi1, ATR );
// ** Norm downdate w2 = w2 - columnwisenorm2(a12t)
FLA_QR_UT_piv_colnorm( FLA_MINUS_ONE, a12t, w2 );
// ** Update T matrix
// t01 = a10t' + A20' * u21;
FLA_Copyt_external( FLA_CONJ_TRANSPOSE, a10t, t01 );
FLA_Gemv_external( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ONE, t01 );
/*------------------------------------------------------------*/
FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, a01, /**/ A02,
a10t, alpha11, /**/ a12t,
/* ************** */ /* ************************ */
&ABL, /**/ &ABR, A20, a21, /**/ A22,
FLA_TL );
FLA_Cont_with_3x3_to_2x2( &TTL, /**/ &TTR, T00, t01, /**/ T02,
t10t, tau11, /**/ t12t,
/* ************** */ /* ********************** */
&TBL, /**/ &TBR, T20, t21, /**/ T22,
FLA_TL );
FLA_Cont_with_3x1_to_2x1( &pT, p0,
pi1,
/* ** */ /* *** */
&pB, p2, FLA_TOP );
FLA_Cont_with_3x1_to_2x1( &wT, w0,
omega1,
/* ** */ /* *** */
&wB, w2, FLA_TOP );
}
// Free the workspace
FLA_Obj_free( &v);
return FLA_SUCCESS;
}
1.7.6.1