BLAS-Prime: Basic Linear Algebra Subprograms - StephenG1CMZ - 09-08-2019 11:11 PM
I have been comparing the functionality of the common Basic Linear Algebra Subprograms and those operations built into the Prime.
In doing so, I have implemented some BLAS functions in PPL, based on the Fortran reference code that is unoptimised. Or using a built-in if obvious.
RE: BLAS-Prime: Basic Linear Algebra Subprograms - StephenG1CMZ - 09-08-2019 11:13 PM
V 0.1 implements most of BLAS Level 1 functions in single precision.
Main code:
Code:
//CUSTOMISE
LOCAL RABL:=1; //RETURN AS BLAS OR LIST
//TBD:BETTER EXAMPLE
//SET RAL TO 1: ENABLES THE BLAS-LIKE SYNTAX
//sto(SSWAP(XX,YY),{'XX','YY'}); //XX AND YY UPDATE
//a list of parameters can be updated
//SET RAL:=0: USE THE PPL SYNTAX
//RESULTS:=SSWAP(XX,YY);
//XX:=RESULTS(1);YY:=RESULTS(2);...ETC
LOCAL RAN:=1; //RESULT APPROXIMATE NUMERIC
//1-(RESULT APPROX NUMERIC: DEFAULT)
//0-(RESULT PPL EXACT WHEN AVAILABLE FROM IMPLEMENT)
//END CUSTOM
EXPORT ABOUT()
BEGIN
PRINT();
PRINT("BLAS-Prime V0.1 © StephenG1CMZ");
PRINT("Based on BLAS Reference Fortran code (not optimised)");
PRINT("http://www.netlib.org/blas/#_reference_blas_version_3_8_0");
PRINT("This PPL implementation is not verified ");
PRINT("V0.1: mimics BLAS Level 1");
PRINT("No N or stride");
PRINT("Some Givens rotations TBD");
END;
//PPL SUPPORT ROUTINES
RET(PARAM)
BEGIN
RETURN IFTE(RABL,{PARAM},PARAM);
END;
EXPORT SFPINFO () //ENVIRONMENT
BEGIN
END;
EXPORT MYCONJ(VEC)
BEGIN
LOCAL CNJVEC;
//CNJVC:=list2mat(conj(mat2list(VEC)));
LOCAL LVEC:=mat2list(VEC);
LOCAL LCNJVEC:=(RE(LVEC)-IM(LVEC)*);//CONJ NEGATES IM
CNJVEC:=list2mat(LCNJVEC,SIZE(LCNJVEC));
//PRINT("CNJ: "+CNJVEC);
RETURN CNJVEC(1);//AN ARTIFACT OF SYNTAX ON VECTORS
END;
//BLAS
EXPORT BLAS_L1_vector() //HEADING
BEGIN
END;
//BLAS LEVEL 1 VECTOR OPERATIONS
//Complex Single
//CROTG - setup Givens rotation
EXPORT CSROTGTBD()
BEGIN
END;
//CSROT - apply Givens rotation
EXPORT CSROTTBD()
BEGIN
END;
//CSWAP - swap x and y
EXPORT CSWAP(VECX,VECY)
//call: sto(SSWAP(XX,YY),{'XX','YY'});
BEGIN
RETURN {VECY,VECX};//IDENTICAL
END;
//CSCAL - x = a*x
EXPORT CSCAL(SCL,VECX) // SUBROUTINE CSCAL(N,CA,CX,INCX)
//*> CSCAL scales a vector by a constant.
//*> \param[in] N
//*> \param[in] CA CA is COMPLEX
//*> \param[in,out] CX CX Is COMPLEX array, dimension ( 1 + ( N - 1 )*abs( INCX ) )
BEGIN
RET (SCL*VECX);//IDENTICAL
END;
//CSSCAL - x = a*x
EXPORT CSSCAL(SCL,VECX) // SUBROUTINE CSSCAL(N,SA,CX,INCX)
//*> CSSCAL scales a complex vector by a real constant.
//*> \param[in] N
//*> \param[in] SA SA is REAL
//*> \param[in,out] CX CX is COMPLEX array, dimension ( 1 + ( N - 1 )*abs( INCX ) )
BEGIN
RET (SCL*VECX);//IDENTICAL
END;
//CCOPY - copy x into y
EXPORT CCOPY(VECX,VECY) // SUBROUTINE CCOPY(N,CX,INCX,CY,INCY)
//*> CCOPY copies a vector x to a vector y.
//*> \param[in] N
//*> \param[in] CX CX is COMPLEX array, dimension ( 1 + ( N - 1 )*abs( INCX ) )
//*> \param[out] CY CY is COMPLEX array, dimension ( 1 + ( N - 1 )*abs( INCY ) )
//NOTE:MY IMPLEMENT CURRENTLY REPLACES Y BY X (LEAVING IT SHORTER WHEN SIZE(X)<SIZE(Y))
BEGIN
RET (VECX);
END;
//CAXPY - y = a*x + y
EXPORT CAXPY(SCL,VECX,VECY) // SUBROUTINE CAXPY(N,CA,CX,INCX,CY,INCY)
//*> CAXPY constant times a vector plus a vector.
//*> \param[in] N
//*> \param[in] CA CA is COMPLEX
//*> On entry, CA specifies the scalar alpha.
//*> \param[in] CX CX is COMPLEX array, dimension ( 1 + ( N - 1 )*abs( INCX ) )
//*> \param[in,out] CY CY is COMPLEX array, dimension ( 1 + ( N - 1 )*abs( INCY ) )
BEGIN
RETURN {(SCL*VECX+VECY)};//IDENTICAL
END;
//CDOTU - dot product
EXPORT CDOTU(VECX,VECY) // COMPLEX FUNCTION CDOTU(N,CX,INCX,CY,INCY)
// COMPLEX CX(*),CY(*)
//*> CDOTU forms the dot product of two complex vectors
//*> CDOTU = X^T * Y
//*> \param[in] CX CX is REAL array, dimension ( 1 + ( N - 1 )*abs( INCX ) )
//*> \param[in] CY CY is REAL array, dimension ( 1 + ( N - 1 )*abs( INCY ) )
BEGIN
//LOCAL II;
//LOCAL CTMP:=0;
//LOCAL DIMX:=DIM(VECX);
//LOCAL DIMY:=DIM(VECY);
//LOCAL NNN:=MIN({DIMX(1),DIMX(1)});
//MSGBOX(DIM(VECX));
//FOR II:=1 TO NNN DO
// CTMP:=CTMP+VECX(II)*VECY(II);
//END;
//RETURN CTMP;
RETURN dot(VECX,VECY);//IDENTICAL
END;
//CDOTC - dot product, conjugating the first vector
EXPORT CDOTC(VECX,VECY) // COMPLEX FUNCTION CDOTC(N,CX,INCX,CY,INCY)
//* COMPLEX CX(*),CY(*)
//*> CDOTC forms the dot product of two complex vectors
//*> CDOTC = X^H * Y
//*> \param[in] CX CX is REAL array, dimension ( 1 + ( N - 1 )*abs( INCX ) )
//*> \param[in] CY CY is REAL array, dimension ( 1 + ( N - 1 )*abs( INCY ) )
BEGIN
//PRINT({VECX,VECY});
//PRINT({MYCONJ(VECX),VECY});
RETURN CDOTU(MYCONJ(VECX),VECY);
END;
//SCASUM - sum of absolute values
EXPORT SCASUM(VEC) // REAL FUNCTION SCASUM(N,CX,INCX)
//*> SCASUM takes the sum of the (|Re(.)| + |Im(.)|)'s of a complex vector and
//*> returns a single precision result.
//*> \param[in,out] CX CX is COMPLEX array, dimension ( 1 + ( N - 1 )*abs( INCX ) )
//Query: CX is inout but unchanged
//sum(abs) re and im separately
BEGIN
//RETURN SASUM(RE(mat2list(VEC)))+SASUM(IM(mat2list(VEC)))*;
RETURN CAS.l1norm(VEC);
//RETURN SASUM(VEC);//IDENTICAL
END;
//ICAMAX - index of max abs value
EXPORT ICAMAX(VC)
BEGIN
LOCAL STRIDE:=1;
//ICAMAX and IZAMAX find the first element xk, where k is defined as the smallest index k, such that:
//|ak|+|bk| = max{|aj|+|bj| for j = 1, n}
//where xk = (ak, bk)
//By specifying a positive or negative stride for vector x, the first or last occurrence, respectively, is found in the array. The position i, returned as the value of the function, is always figured relative to the location specified in the calling sequence for vector x (in argument x). Therefore, depending on the stride specified for incx, i has the following values:
//For incx ≥ 0, i = k
//For incx < 0, i = n-k+1
LOCAL LVC:=mat2list(VC);
LOCAL LBOTH:=ABS(RE(LVC))+ABS(IM(LVC));
//IF STRIDE<0 THEN NONSTANDARD
// RETURN ICAMAX(list2mat(REVERSE(LVC)),−STRIDE);//TBD
//END;
IF STRIDE>1 THEN
//TBD;
END;
RETURN POS(LBOTH,MAX(LBOTH));
END;
//Real Single
//SROTG - setup Givens rotation
EXPORT SROTGTBD(SA,SB) // SUBROUTINE SROTG(SA,SB,C,S)* .. Scalar Arguments ..
//REAL C,S,SA,SB
BEGIN
LOCAL CC,SS;
MSGBOX("TBD");
RETURN {CC,SS};
END;
//SROTMG - setup modified Givens rotation
EXPORT SROTMGTBD(SD1,SD2,SX1,SY1) // SUBROUTINE SROTMG(SD1,SD2,SX1,SY1,SPARAM)
//*> \param[in,out] SD1 SD1 is REAL
//*> \param[in,out] SD2 SD2 is REAL
//*> \param[in,out] SX1 SX1 is REAL
//*> \param[in] SY1 SY1 is REAL
//*> \param[out] SPARAM SPARAM is REAL array, dimension (5)
//*> SPARAM(1)=SFLAG
//*> SPARAM(2)=SH11
//*> SPARAM(3)=SH21
//*> SPARAM(4)=SH12
//*> SPARAM(5)=SH22
BEGIN
LOCAL SD1OUT,SD2OUT,SX1OUT;
LOCAL PARAM:={};
MSGBOX("TBD");
RETURN {SD1OUT,SD2OUT,SX1OUT,PARAM};
END;
//SROT - apply Givens rotation
EXPORT SROTTBD(VECSX,VECSY,CC,SS) //SUBROUTINE SROT(N,SX,INCX,SY,INCY,C,S)
//*> \param[in,out] SX SX is REAL array, dimension ( 1 + ( N - 1 )*abs( INCX )
//*> \param[in] INCX INCX is INTEGER
//*> \param[in,out] SY SY is REAL array, dimension ( 1 + ( N - 1 )*abs( INCY ) )
//*> \param[in] INCY INCY is INTEGER
//*> \param[in] C C is REAL
//*> \param[in] S S is REAL
BEGIN
LOCAL SX,SY;
MSGBOX("TBD");
RETURN {SX,SY};
END;
//SROTM - apply modified Givens rotation
EXPORT SROTMTBD(VECSX,VECSY,SPARAM) //SUBROUTINE SROTM(N,SX,INCX,SY,INCY,SPARAM)
//*> \param[in] N N is INTEGER
//*> number of elements in input vector
//*> \param[in,out] SX SX is REAL array, dimension ( 1 + ( N - 1 )*abs( INCX )
//*> \param[in,out] SY SY is REAL array, dimension ( 1 + ( N - 1 )*abs( INCY ) )
//*> \param[in] SPARAM SPARAM is REAL array, dimension (5)
//*> SPARAM(1)=SFLAG
//*> SPARAM(2)=SH11
//*> SPARAM(3)=SH21
//*> SPARAM(4)=SH12
//*> SPARAM(5)=SH22
BEGIN
LOCAL SX,SY;
RETURN {SX,SY};
END;
//SSWAP - swap x and y
EXPORT SSWAP(VECX,VECY) // SUBROUTINE SSWAP(N,SX,INCX,SY,INCY)
//call: sto(SSWAP(XX,YY),{'XX','YY'});
BEGIN
RETURN {VECY,VECX};//IDENTICAL
END;
//SSCAL - x = a*x
EXPORT SSCAL(SCL,VEC) //SUBROUTINE SSCAL(N,SA,SX,INCX)
//call: sto(SSCAL(SCL,VEC),{'VEC'});
BEGIN
RET (SCL*VEC);//IDENTICAL
END;
//SCOPY - copy x into y
EXPORT SCOPY(VECX,VECY) // SUBROUTINE SCOPY(N,SX,INCX,SY,INCY)
//NOTE:MY IMPLEMENT CURRENTLY REPLACES Y BY X (LEAVING IT SHORTER WHEN SIZE(X)<SIZE(Y))
BEGIN
RET (VECX);//IDENTICAL
END;
//SAXPY - y = a*x + y //const*VEC+VEC
EXPORT SAXPY(SCL,VECX,VECY) // SUBROUTINE SAXPY(N,SA,SX,INCX,SY,INCY)
BEGIN
RETURN CAXPY(SCL,VECX,VECY);//IDENTICAL (ASSUMING REAL VECS)
//RETURN {(SCL*VECX+VECY)};//IDENTICAL
END;
//SDOT - dot product //of 2 vectors
EXPORT SDOT(VECX,VECY) // REAL SDOT(N,SX,INCX,SY,INCY)
BEGIN
//PRINT({VECX,VECY});
RETURN CDOTU(VECX,VECY);//IDENTICAL (ASSUMING VECS ARE REAL)
//RETURN dot(VECX,VECY);//IDENTICAL
END;
//SDSDOT - dot product with extended precision accumulation
EXPORT SDSDOT(SB,VECX,VECY) //REAL FUNCTION SDSDOT(N,SB,SX,INCX,SY,INCY)
//THE ORIGINAL FORTRAN IMPLEMENTS DOT USING DOUBLES TO IMPROVE PRECISION
//PPL: NO EXTENDED PRECISION (iust making parameters of dot exact doesnt help)
BEGIN
RETURN SB+SDOT(VECX,VECY);
END;
//SNRM2 - Euclidean norm
EXPORT SNRM2(VECX) //REAL FUNCTION SNRM2(N,X,INCX)
BEGIN
LOCAL EUC:=CAS.l2norm(VECX);
RETURN IFTE(RAN,approx(EUC),EUC);
END;
//These currently return exact if RAN=0
//SCNRM2- Euclidean norm of complex
EXPORT SCNRM2(CVECX)
BEGIN
LOCAL EUC:=CAS.l2norm(CVECX);
RETURN IFTE(RAN,approx(EUC),EUC);
END;
//SASUM - sum of absolute values
EXPORT SASUM(VEC) // REAL FUNCTION SASUM(N,SX,INCX)
BEGIN
//RETURN ΣLIST(ABS(mat2list(VEC)));
RETURN CAS.l1norm(VEC);
//RETURN CSASUM(VEC);//IDENTICAL
END;
//ISAMAX - index of max abs value
EXPORT ISAMAX(VEC) //INTEGER FUNCTION ISAMAX(N,SX,INCX)
//INDEX OF MAX ABS REAL
//INDEX OF MAX ABS (REALPART+IMPART) Z
//NB THE CODE IS OPTIMISED ASSUMING REAL.
//IT WILL GIVE DIFFERENT RESULTS THAN ICAMAX IF GIVEN COMPLEX.
//IF YOU WANT IDENTICAL RESULTS CALL ICAMAX
BEGIN
LOCAL STRIDE:=1;
LOCAL LVC:=mat2list(VEC);
LOCAL LBOTH:=ABS(LVC);//OPTIMISED FOR REAL.
//ABS(RE(LVC))+ABS(IM(LVC));//COMPLEX
//IF STRIDE<0 THEN NONSTANDARD
// RETURN ICAMAX(list2mat(REVERSE(LVC)),−STRIDE);//TBD
//END;
IF STRIDE>1 THEN
//TBD;
END;
RETURN POS(LBOTH,MAX(LBOTH));
//RETURN ICAMAX(VEC);//UNOPTIMISED
END;
//EXTRAS: NOT BLAS STANDARD
//REAL SINGLE
EXPORT ISMAX (VC)
//INDEX MAXIMUM (CF ISAMAX)
//OK REAL
BEGIN
LOCAL LVC:=mat2list(VC);
LOCAL MX:=MAX(LVC);
RETURN POS(LVC,MX);
END;
EXPORT ISMIN (VC)
//INDEX MINIMUM (CF ISMAX ISAMAX)
//OK REAL
BEGIN
LOCAL LVC:=mat2list(VC);
LOCAL MN:=MIN(LVC);
RETURN POS(LVC,MN);
END;
EXPORT BLAS()
BEGIN
//PPL TYPE DOES NOT DISTINGUISH VECTORS AND MATRICES
//PRINT(TYPE([1]));
//PRINT(TYPE([[1],[2]]));
END;
Some Givens rotations:
Code:
EXPORT CABS(NUM)
//FTN COMPLEX ABS
BEGIN
RETURN ABS(NUM);
//RETURN SQRT(RE(NUM)^2+IM(NUM)^2);
END;
EXPORT SIGNFTN(AA,BB)
//FTN COPYSIGN RETURNS SGN(BB)*AA
//IF B ZERO ABS(A)
//RETURN {1,0,−1} (SOME IMPLEMENTS ALLOW −0 RETURN)
BEGIN
RETURN IFTE(BB<0,-ABS(AA),ABS(AA));
END;
EXPORT COPYSIGN(AVALUE,ASIGN)
BEGIN
RETURN IFTE(ASIGN<0,-ABS(AVALUE),ABS(AVALUE));
//RETURN SIGNFTN(AVALUE,ASIGN);
END;
EXPORT CONJG(ZZ)
//CONJUGATE OF A COMPLEX NUMBER
//FTN
BEGIN
RETURN RE(ZZ)-IM(ZZ)*;
END;
EXPORT CROTG(CXA,CXB) // * SUBROUTINE CROTG(CA,CB,C,S)
//* COMPLEX CA,CB,S
//* REAL C
//CROTG determines a complex Givens rotation.
//*> \param[in] CA CA is COMPLEX
//*> \param[in] CB CB is COMPLEX
//*> \param[out] C C is REAL
//*> \param[out] S S is COMPLEX
//call: sto(CROTG(AA,BB),{'AA','BB','CC','SS'})
BEGIN
LOCAL CA:=CXA; //LOCAL CB:=CXB;//INOUTS
LOCAL CC,SS;
// COMPLEX SS
// REAL CC
//* .. Local Scalars ..
LOCAL ALPHA; //COMPLEX
LOCAL NORM,SCALE; //REAL
//* .. Intrinsic Functions ..
// INTRINSIC CABS,CONJG,SQRT
//PRINT({"CROTG IN:",CA,CXB});
IF (CABS(CA)==0.) THEN
CC:= 0.;
SS:= 1.0; //(1.,0.);
CA:= CXB;
ELSE
SCALE:= CABS(CA) + CABS(CXB);
//SCALE:=1;//SIMPLER
NORM:= SCALE*SQRT((CABS(CA/SCALE))^2+ (CABS(CXB/SCALE))^2);
ALPHA:= CA/CABS(CA);
CC:= CABS(CA)/NORM;
SS:= ALPHA*CONJG(CXB)/NORM;
CA:= ALPHA*NORM;
END;//IF;
//RETURN ROUND({CA,CC,SS},2);//SIMPLER VALUES IN TESTS
RETURN {CA,CC,SS}; //RETURN 2 COMPLEXES AND CC REAL? (CB UNCHANGED)
END;
EXPORT CSROT(VECX,VECY,CS,SN) // SUBROUTINE CSROT( N, CX, INCX, CY, INCY, CS, SN)
//* .. Scalar Arguments ..
//* INTEGER INCX, INCY, N
//* REAL CS, SN
//* .. Array Arguments ..
//* COMPLEX CX( * ), CY( * )
//*> CSROT applies a plane rotation, where the cos and sin (c and s) are real
//*> and the vectors cx and cy are complex.
//*> \param[in,out] CX CX is COMPLEX array,
//*> \param[in,out] CY CY is COMPLEX array,
//*> \param[in] CS CS is REAL
//*> On entry, C specifies the cosine, cos.
//*> \param[in] SN SN is REAL
BEGIN
LOCAL VECXOUT,VECYOUT;
VECXOUT:=CS*VECX+SN*VECY;
VECYOUT:=CS*VECY-SN*VECX;
RETURN ROUND({VECXOUT,VECYOUT},3);//TESTS
RETURN {VECXOUT,VECYOUT};
END;
EXPORT SROTG(SA,SB) // SUBROUTINE SROTG(SA,SB,C,S)
// REAL C,S,SA,SB
//*> SA is REAL
//*> SB is REAL
//*> \param[out] C C is REAL
//*> \param[out] S S is REAL
//call: sto(SROTG(AA,BB),{'AA','BB','CC','SS'})
//*> SROTG construct givens plane rotation.
//A real Givens plane rotation is constructed for values a and b by computing values for r, c, s, and z, where:
//Real Givens Plane Rotation Math Graphic
BEGIN
LOCAL RR,ROE,SCALE,ZZ;
LOCAL CC;
LOCAL SS;
ROE:=IFTE(ABS(SA)>ABS(SB),SA,SB);
SCALE:= ABS(SA) + ABS(SB);//AVOID LOP
IF (SCALE==0.0) THEN
CC:= 1.0;
SS:= 0.0;
RR:= 0.0;
ZZ:= 0.0;
ELSE
RR:=SCALE*SQRT((SA/SCALE)^2+ (SB/SCALE)^2); //AVOID LOP
//FTN:SCALING TO AVOID LOP
//PPL:EXAMPLES BETTER WITHOUT (ALT. ROUND OUTPUTS)
//RR:=SQRT((SA)^2+ (SB)^2);
RR:=COPYSIGN(1.0,ROE)*RR; //COPYSIGN
//RR:=SIGN(ROE)*RR;
CC:= SA/RR;
SS:= SB/RR;
ZZ:= 1.0;
IF (ABS(SA)>ABS(SB)) THEN ZZ:= SS; END;
IF (ABS(SB)≥ABS(SA) AND CC≠0.0) THEN ZZ:= 1.0/CC; END;
END;// IF
//SA := RR;//RR AND ZZ EXPECTED TO REPLACE SA SB
//SB := ZZ;
//RETURN ROUND({RR,ZZ,CC,SS},3);//SIMPLER VALUES IN TESTS
RETURN {RR,ZZ,CC,SS}; //RETURN 4 REALS
END;
EXPORT EXGIVENS()
BEGIN
LOCAL CS:=0.5;
LOCAL SN:=(√(3.0))/2.0;
PRINT();
PRINT({SROTG(0.0,0.0),"=",{0,0,1,0}});
PRINT({SROTG(0.0,2.0),"=",{2,1,0,1}});
PRINT({SROTG(6.0,−8.0),"=",{−10,−1.667,−0.6,0.8}});
PRINT({SROTG(8.0,6.0),"=",{10,0.6,0.8,0.6}});
PRINT({CROTG(0.0,1.0),"=", "A=(1.0, 0.0) C = 0.0 S = (1.0, 0.0)"});
PRINT({CROTG((3.0+ 4.0*),(4.0+6.0*)),"=", " A = (5.26, 7.02) C = 0.57 S = (0.82, -0.05) "});
PRINT({CSROT([(1.0+ 2.0*),(2.0+ 3.0*), (3.0+ 4.0*)],[(-1.0+ 5.0*),(-2.0+ 4.0*),(-3.0+ 3.0*)],CS,SN)});
PRINT({"=","X=((-0.366, 5.330), (-0.732, 4.964), (-1.098, 4.598))
Y=((-1.366, 0.768),(-2.732, -0.598),(-4.098, -1.964))"})
END;
EXPORT GIVENS()
BEGIN
//A VECTOR MUST BE <20000
//BUT SOME ROUTINES USE MATLIST SO 10000
LOCAL CS:=0.5;
LOCAL SN:=(√3)/2;
LOCAL VECX:=MAKEMAT(10*J/1000,1000);
LOCAL VECY:=MAKEMAT(10*J/1000,1000);
PRINT();
PRINT("GO "+DIM(VECX));
PRINT({TEVAL(CROTG((3.0+ 4.0*),(4.0+6.0*)))});
PRINT({TEVAL(CSROT(VECX,VECY,CS,SN))});
END;
Vectors in PPL can have 20 000-1 elements, but some functions are implemented using mat2list, and lists only have 10 000 elements. There is currently no error checking.
Note: Some functions may round results. This is simply for clarity in displaying test results.
|