1 SUB RANK0C & ! 500.22 - STAT11 - RANK0C & ! & ! RANK CORRELATION MODULE & ! & ! RELEASED FOR SUBMISSION TO THE DECUS LIBRARY BY THE DEC & ! ENGINEERING SYSTEMS GROUP AND THE EDUCATION PRODUCTS GROUP & ! SEPTEMBER, 1977 & ! & 2 ! COPYRIGHT (C) 1973, DIGITAL EQUIPMENT CORPORATION, & ! & ! MAYNARD, MASSACHUSETTS & 3 ! THIS SOFTWARE IS FURNISHED TO PURCHASER UNDER A LICENSE FOR USE & ! ON A SINGLE COMPUTER SYSTEM AND CAN BE COPIED (WITH INCLUSION OF & ! DEC'S COPYRIGHT NOTICE) ONLY FOR USE IN SUCH SYSTEM, EXCEPT AS & ! MAY OTHERWISE BE PROVIDED IN WRITING BY DEC. & 4 ! THE INFORMATION IN THIS DOCUMENT IS SUBJECT TO CHANGE WITHOUT & ! NOTICE AND SHOULD NOT BE CONSTRUED AS A COMMITMENT BY DIGITAL & ! EQUIPMENT CORPORATION. & 5 ! DEC ASSUMES NO RESPONSIBILITY FOR USE OR RELIABILITY OF ITS & ! SOFTWARE ON EQUIPMENT WHICH IS NOT SUPPLIED BY DEC. & ! & ! & 6 ! THIS MODULE PERFORMS RANK CORRELATION, COMPUTING THE KENDALL RANK & ! CORRELATION COEFFICIENT, STANDARD DEVIATION, AND SIGNIFICANCE FOR ANY & ! TWO VARIABLES IN THE DATA MATRIX. & 7 ! AUTHOR: MICHAEL D. KNAUER & ! & ! VERSION NUMBER: 001 & ! & ! DATE: OCTOBER, 1973 & ! & ! & 8 ! MODIFICATIONS: MAY, 1975 & ! & ! MODIFIED TO ACCEPT MISSING DATA POINTS & ! & ! BY ARDOTH HASSLER WILSON & ! CENTRAL STATE UNIVERSITY & ! EDMOND, OKLAHOMA & ! & 00009! MODIFICATIONS: JUNE, 1976 & ! & ! CTRL/C TRAP ADDED & 10 ! & ! CALLING ARGUMENTS & 11 ! 1) VARIABLE NAME: F3$ & ! RANGE OF VALUES: S0000.RWM - S99999.RWM & ! USE: NAME OF THE 250 ROW BY 15 COLUMN & ! VIRTUAL DATA MATRIX. & 13 ! 2) VARIABLE NAME: R% & ! RANGE OF VALUES: 1 - 250 & ! USE: NUMBER OF ROWS IN DATA MATRIX & 15 ! 3) VARIABLE NAME: C% & ! RANGE OF VALUES: 1 - 15 & ! USE: NUMBER OF COLUMNS IN DATA MATRIX & ! & ! & 20 ! & ! RETURNING ARGUMENTS & ! & ! NONE -- THIS MODULE DOES NOT CHANGE OR ADD TO ANY OF THE & ! FILES OR VARIABLES PASSED TO IT. & ! & ! & 30 ! DESCRIPTION OF FUNCTION & 31 ! THIS MODULE PERFORMS RANK CORRELATION, COMPUTING THE KENDALL & ! RANK CORRELATION COEFFICIENT, STANDARD DEVIATION, AND & ! SIGNIFICANCE FOR ANY TWO VARIABLES IN THE DATA MATRIX. & 32 ! IT BEGINS BY RANKING THE OBSERVATIONS IN EACH OF THE TWO & ! VARIABLES, AND THEN COUNTING THE TIES FOR RANKS. & 33 ! THEN IT WILL PRINT THE DATA AND RANKS, IF THE USER DESIRES. & 34 ! NEXT IT SORTS THE RANKS FOR VARIABLE A IN ASCENDING & ! ORDER, KEEPING THE VARIABLE B RANKS MATCHING. & 35 ! THEN IT SCORES THE VARIABLE B RANKS, ESSENTIALLY SEEING HOW & ! CLOSE TO PERFECT ASCENDING RANKING ORDER THEY ARE. & 36 ! WITH THIS SCORE AND WITH THE COUNTS OF TIES FOR RANKS, IT & ! FINALLY COMPUTES AND PRINTS THE KENDALL RANK CORRELATION & ! COEFFICIENT, STANDARD DEVIATION, AND SIGNIFICANCE (Z). & 37 ! THE USER CAN HAVE MORE RANK CORRELATIONS DONE. OTHERWISE, & ! CONTROL IS RETURNED TO STATCM. & 40 ! & ! & ! & ! & ! & ! & ! & ! & ! & ! & ! & ! & ! & ! & ! 90 ! & ! Modifications: August,1981 & ! & ! COMMON (STAT) MODULE$ = 6%, LINE%, SAV.F$ = 127% & ! added to replace core common and also to let STTMGR know & ! who to chain to (MODULE$). Once in MODULE$, LINE% indicates & ! line to begin execution. & ! & \ COMMON (STAT) MODULE$ = 6%, LINE%, SAV.F$ = 127% & 99 ! & ! & ! & ! FIRST THE CALLING ARGUMENTS ARE RETRIEVED FROM COMMON, THIS DATA & ! MATRIX FILE IS OPENED, AND THE USER IS ASKED TO INPUT TWO & ! VARIABLES FOR RANK CORRELATION. & ! & ! & 100 ON ERROR GOTO 5000 & \ GOSUB 10000 & \ !F$=SYS(CHR$(7%)) & \ F$ = SAV.F$ & \ R%=VAL(MID(F$,46%,5%)) & \ C%=VAL(MID(F$,51%,5%)) & \ F3$=MID(F$,31%,15%) 110 A9$=MID(F$,62%,9%) & \ A9$=LEFT(A9$,INSTR(5%,A9$,"]")) & \ H9=.9E-38 140 OPEN F3$ FOR INPUT AS FILE 3, VIRTUAL & \ DIM #3%,Z(250%,15%) 160 PRINT "ENTER THE COLUMN NUMBERS OF TWO VARIABLES" 165 INPUT "FOR RANK CORRELATION. SEPARATE THEM WITH A COMMA";V1%,V2% 170 PRINT 180 IF V1%>=1% AND V1%<=C% AND V2%>=1% AND V2%<=C% GOTO 230 190 PRINT "YOUR VARIABLE NUMBERS MUST BE BETWEEN 1 AND"; C% 200 PRINT "PLEASE TRY AGAIN" 205 GOTO 160 229 ! & ! & ! & ! THIS SECTION MOVES THE DATA POINTS FOR THE TWO VARIABLES FROM THE & ! DATA MATRIX (Z) TO WORKING STORAGE ARRAYS (A AND B). & ! & ! & 230 N5=0 & \ FOR I%=1% TO R% & \ IF Z(I%,V1%)=H9 OR Z(I%,V2%)=H9 GOTO 250 240 N5=N5+1 & \ A(N5)=Z(I%,V1%) & \ B(N5)=Z(I%,V2%) 250 NEXT I% 260 IF N5=0 THEN PRINT "NO DATA EXISTS TO PERFORM THIS ANALYSIS" & \ GOTO 2500 1760 M9=N5*(N5-1%) & \ DIM A(250%),R(250%) & \ DIM B(250%),T(250%) & \ GOSUB 4000 1900 ! & ! & ! & ! THE PREVIOUS GOSUB 4000 RANKS THE DATA POINTS IN ARRAY A, COMPLETE & ! WITH TIED RANKS. THE NEXT GOSUB 4280 COUNTS THE NUMBER OF OBSERVATIONS & ! TIED FOR RANKS. & ! & ! & 1910 K1=1% & \ GOSUB 4280 1929 ! & ! & ! & ! AFTER RANKING VARIABLE A, IT SWITCHES THE INFORMATION OF VARIABLES & ! (ARRAYS) A AND B SO IT CAN USE THE SAME ROUTINE (BEGINNING AT & ! LINE 4000) TO RANK BOTH VARIABLES. & ! & ! & 1930 FOR L=1% TO N5 & \ R2=A(L) & \ A(L)=B(L) & \ B(L)=R2 & \ T(L)=R(L) 1980 NEXT L 1988 ! & ! & ! & ! THE NEXT GOSUB 4000 RANKS THE 2ND COLUMN. C8 WAS COMPUTED IN THE & ! PREVIOUS GOSUB 4280 AND IS USED TO COMPUTE THE KENDALL RANK & ! CORRELATION COEFFICIENT. 1989 ! THE NEXT GOSUB 4280 COUNTS HOW MANY TIES FOR RANKS THERE ARE FOR & ! THE 2ND VARIABLE. & ! & ! & 1990 GOSUB 4000 & \ C9=C8 & \ GOSUB 4280 2019 ! & ! & ! & ! THIS SECTION PRINTS THE DATA AND RANK, IF THE USER DESIRES. & ! & ! & 2020 INPUT "WOULD YOU LIKE TO SEE DATA AND RANK";Q$ & \ PRINT 2025 IF Q$="YES" OR LEFT(Q$,1%)="Y" GOTO 2050 2030 IF Q$="NO" OR LEFT(Q$,1%)="N" OR Q$="" GOTO 2150 2035 PRINT "YOU MUST ANSWER EITHER YES OR NO" 2040 GOTO 2020 2050 PRINT & \ PRINT "OBS. VAR. A RANK A VAR. B RANK B" & \ PRINT "................................................." 2080 FOR L%=1% TO N5 & \ PRINT L%;TAB(7%);B(L%);TAB(17%);T(L%);TAB(31%);A(L%);TAB(41%); & ! R(L%) 2100 NEXT L% & \ PRINT ".................................................." & \ PRINT & \ PRINT 2149 ! & ! & ! & ! THIS SORTS THE VARIABLE A RANKS (I.E. THE RANKS THAT WERE PRINTED & ! OUT UNDER RANK A), KEEPING THE VARIABLE B RANKS MATCHING. & ! & ! & 2150 I1=0% & \ FOR I=2% TO N5 & \ IF T(I)>=T(I-1%) THEN 2250 2180 I1=1% & \ R2=R(I) & \ R(I)=R(I-1%) & \ R(I-1%)=R2 & \ R2=T(I) & \ T(I)=T(I-1%) & \ T(I-1%)=R2 2250 NEXT I & \ IF I1>0% THEN 2150 2260 ! & ! & ! & ! THIS SECTION SCORES THE VARIABLE B RANKS.S1 IS THE TOTAL SCORE FOR & ! THE VARIABLE RANKS. WHAT THE SCORE MEASURES IS HOW CLOSE TO BEING & ! IN PERFECT ASCENDING ORDER THE RANKS FOR VARIABLE B ARE. 2261 ! FOR EACH R(I), IT ADDS ONE TO S1 FOR EACH LARGER RANK ON DOWN THE & ! COLUMN, AND SUBTRACTS ONE FROM S1 FOR EACH SMALLER RANK ON DOWN & ! THE COLUMN. & ! & ! & 2270 S1=0% & \ FOR I=1% TO N5 & \ FOR J=I TO N5 & \ IF R(J)>R(I) THEN 2340 2310 IF R(I) = R(J) THEN 2350 2320 S1 = S1 - 1% & \ GOTO 2350 2340 S1=S1+1% 2350 NEXT J & \ NEXT I & \ T9=S1/(SQR((.5*M9-C8)*(.5*M9-C9))) & \ PRINT 2389 ! & ! & ! TAU IS COMPUTED ON THE PRECEDING LINE, AND IS PRINTED BY THE NEXT & ! LINE. SD AND Z (S8 AND Z1) ARE ALSO COMPUTED AND PRINTED HERE. & ! & ! 2390 PRINT "KENDALL RANK CORRELATION COEFFICIENT (TAU)...."; & \ PRINT USING "####.####",T9 2430 S8=SQR((2%*(2%*N5+5%))/(9%*N5*(N5-1%))) & \ PRINT "STANDARD DEVIATION (SD)......................."; & \ PRINT USING "####.####",S8 & \ Z1=T9/S8 2455 PRINT "Z-VALUE TO TEST SIGNIFICANCE (TAU/SD)........."; & \ PRINT USING "####.####",Z1 & \ PRINT & \ PRINT 2499 ! & ! & ! & ! HERE IS THE MORE RANK CORRELATION QUESTION. IF NO, THEN CONTROL IS & ! RETURNED TO STATCM. & ! & ! & 2500 INPUT "DO YOU WISH TO PERFORM MORE RANK CORRELATION"; Q$ 2510 IF LEFT(Q$,1%)="Y" GOTO 160 2520 IF Q$="NO" OR LEFT(Q$,1%)="N" OR Q$="" GOTO 2550 2530 PRINT "YOU MUST ANSWER EITHER YES OR NO" 2540 GOTO 2500 2550 !R$=SYS(CHR$(8%)+F$) & \ SAV.F$ = F$ 2560 !CHAIN "STATCM"+A9$ & \ MODULE$= "STATCM" & \ LINE% = 0% & \ SUBEXIT & 3999 ! & ! & ! & ! THIS SECTION RANKS THE OBSERVATIONS FOR A VARIABLE. & ! & ! & 4000 REM 4009 ! & ! FIRST, THE R (RANK) ARRAY IS ZEROED OUT. & 4010 N=N5 & \ FOR I=1% TO N & \ R(I)=0% 4040 NEXT I & \ FOR I=1% TO N & \ IF R(I)>0% THEN 4260 4041 ! & ! THE PREVIOUS IF STATEMENT MEANS THAT TIED RANKS NEED NOT BE RECOMPUTED & 4068 ! & ! THE J LOOP COUNTS HOW MANY OBSERVATIONS ARE SMALLER THAN AND EQUAL TO & ! A GIVEN DATA POINT. & 4070 S=0% & \ E=0% & \ FOR J=1% TO N 4100 IF A(J)>A(I) THEN 4160 4110 IF A(J)=A(I) THEN 4140 4119 ! & ! S = THE NUMBER OF SMALLER DATA POINTS & 4120 S=S+1% & \ GOTO 4160 4139 ! & ! E = NUMBER OF DATA POINTS EQUAL TO A GIVEN POINT & ! & ! EQUAL DATA POINTS ARE GIVEN A TEMPORARY RANK OF -1 & 4140 E=E+1% & \ R(J)=-1% 4160 NEXT J & \ IF E>1% THEN 4200 4179 ! & ! A DATA POINT'S RANK IS SET HERE UNLESS IT WAS TIED WITH OTHER POINTS & 4180 R(I)=S+1% & \ GOTO 4260 4199 ! & ! HERE THE JOINT RANK FOR A GROUP OF TIED DATA POINTS IS COMPUTED & ! THEN, IN THE N4 LOOP, THE TIED RANKS (REMEMBER THEY WERE SET EQUAL & ! TO -1) ARE SET TO P1, THEIR JOINT RANK. & 4200 P1=S+E/2+.5000 & \ FOR N4=1% TO N & \ IF R(N4)>=0% THEN 4250 4230 R(N4) = P1 4250 NEXT N4 4260 NEXT I & \ RETURN 4278 ! & ! & ! & ! THIS SECTION COMPUTES C8, WHICH IS A FUNCTION OF THE COUNTS OF TIED & ! RANKS, AND WHICH IS USED IN COMPUTING TAU, THE RANK CORRELATION & ! COEFFICIENT. & ! & ! & 4280 C8 = 0% & \ Y = 0% 4289 ! & ! THIS I LOOP FINDS, FOR EACH STEP, THE NEXT LARGER RANK AND SETS X & ! EQUAL TO THAT RANK.SO IT GOES THROUGH FINDING THE RANKS IN ORDER FROM & ! LOWEST TO HIGHEST. & 4290 I1 = 0% & \ X = 999999 & \ FOR I = 1% TO N & \ IF R(I) <= Y THEN 4360 4330 IF R(I)>=X THEN 4360 4340 X=R(I) & \ I1=I1+1% 4360 NEXT I 4369 ! & ! I1 IS A SWITCH. YOU SEE, WHEN THE HIGHEST RANK HAS BEEN FOUND, THEN & ! ON THE NEXT STEP NO HIGHER RANK WILL BE FOUND, THUS I1 WON'T BE & ! INCREMENTED AND WILL BE 0 AND SO CONTROL WILL PASS TO 4500. & 4370 IF I1<1% THEN 4500 4379 ! & ! Y IS USED AS THE LOWER BOUND FOR THE NEXT STEP, SO RANKS LOWER THAN & ! Y WILL BE SKIPPED OVER. & 4380 Y=X 4389 ! & ! THIS I LOOP COUNTS (USING C1) HOW MANY OTHER RANKS ARE TIED WITH & ! A GIVEN RANK. & 4390 C1=0 & \ FOR I=1% TO N & \ IF R(I)<>X THEN 4430 4420 C1=C1+1% 4430 NEXT I & \ IF C1=0% THEN 4290 4448 ! & ! IF THERE WERE ANY RANKS TIED WITH THAT RANK, THEN C8 (THE CORRECTION & ! FACTOR FOR TIES) IS AUGMENTED. NOTE THAT K1, WHICH IS SET IN LINE & ! 1910, IS ALWAYS 1. SO THE ON GOTO ALWAYS GOES TO 4460. 4449 ! IN FACT, I DON'T KNOW WHAT THE OTHER EQUATION WOULD EVER BE USED FOR. & 4450 ON K1 GOTO 4460,4480% 4460 C8=C8+C1*(C1-1%)/2 & \ GOTO 4290 4480 C8=C8+(C1^3-C1)/12 & \ GOTO 4290 4500 RETURN 4999 ! & ! & ! & ! THIS IS THE ERROR ROUTINE FOR THE USER WHO TRIES TO BE CUTE.IT IS & ! USED BY THE VARIABLE NUMBER INPUT ROUTINE. & ! & ! & 5000 IF ERR=28% THEN GOSUB 10000 & !\ RESUME 2500 & \ MODULE$ = "FINISH" & \ GOTO 32767 5005 PRINT "YOU TYPED NON-NUMERIC CHARACTERS IN THE VARIABLE NUMBERS" 5010 PRINT "PLEASE TYPE ONLY NUMBERS WHEN NUMBERS ARE REQUESTED" 5020 RESUME 190 10000 V0%=CTRLC ! SYS(CHR$(6%)+CHR$(-7%)) & \ RETURN ! CTRL/C TRAP 32767 SUBEND