1 !
	500.17 - STAT11 - REGRES

		LINEAR REGRESSION MODULE

	RELEASED FOR SUBMISSION TO THE DECUS LIBRARY BY THE DEC
	ENGINEERING SYSTEMS GROUP AND THE EDUCATION PRODUCTS GROUP
		SEPTEMBER, 1977


2 !	COPYRIGHT (C) 1973, DIGITAL EQUIPMENT CORPORATION,

			MAYNARD, MASSACHUSETTS

3 !	THIS SOFTWARE IS FURNISHED TO PURCHASER UNDER A LICENSE FOR USE
	ON A SINGLE COMPUTER SYSTEM AND CAN BE COPIED (WITH INCLUSION OF
	DEC'S COPYRIGHT NOTICE) ONLY FOR USE IN SUCH SYSTEM, EXCEPT AS
	MAY OTHERWISE BE PROVIDED IN WRITING BY DEC.

4 !	THE INFORMATION IN THIS DOCUMENT IS SUBJECT TO CHANGE WITHOUT
	NOTICE AND SHOULD NOT BE CONSTRUED AS A COMMITMENT BY DIGITAL
	EQUIPMENT CORPORATION.

5 !	DEC ASSUMES NO RESPONSIBILITY FOR USE OR RELIABILITY OF ITS
	SOFTWARE ON EQUIPMENT WHICH IS NOT SUPPLIED BY DEC.



6 !	THIS MODULE PERFORMS LINEAR REGRESSION USING THE LEAST SQUARES METHOD.



7 ! 	AUTHOR:		MICHAEL D. KNAUER

	VERSION NUMBER:	001

	DATE:		OCTOBER, 1973



8  !	MODIFICATIONS:  MAY, 1975

	MODIFIED TO ACCEPT MISSING DATA POINTS

		BY ARDOTH HASSLER WILSON
		   CENTRAL STATE UNIVERSITY
		   EDMOND, OKLAHOMA


00009!	MODIFICATIONS:  JUNE, 1976

	CTRL/C TRAP ADDED

10 !
	CALLING ARGUMENTS

11 !		1) VARIABLE NAME:	F3$
		   RANGE OF VALUES:	S00000.RWM - S99999.RWM
		   USE:			NAME OF THE 250 ROW BY 15 COLUMN
					VIRTUAL DATA MATRIX.

15 !		3) VARIABLE NAME:	R%
		   RANGE OF VALUES:	1 - 250
		   USE:			NUMBER OF ROWS IN DATA MATRIX

17 !		4) VARIABLE NAME:	C%
		   RANGE OF VALUES:	1 - 15
		   USE:			NUMBER OF COLUMNS IN DATA MATRIX


20 !
	RETURNING ARGUMENTS

21 !		1) VARIABLE NAME:	F5$
		   RANGE OF VALUES:	S00000.PLT - S99999.PLT
		   USE:			NAME OF FILE CONTAINING DATA POINTS
					TO BE PLOTTED

22 !		2) VARIABLE NAME:	R%
		   RANGE OF VALUES:	1 - 250
		   USE:			CONTAINS NUMBER OF ROWS IN THE F5$ FILE

23 !		3) VARIABLE NAMES:	O$,E$
		   RANGE OF VALUES:	ANY PRINTABLE CHARACTER
		   USE:			CONTAIN PLOT CHARACTERS TO BE USED
					IN PLOTTING OBSERVED AND ESTIMATED
					VALUES

24 !		4) CONSTANT VALUE:	2
		   USE:			TELLS PLOTER THAT THERE ARE TWO
					DEPENDENT VARIABLES (OBSERVED AND
					ESTIMATED VALUES)

25 !		5) CONSTANT VALUE:	1990
		   USE:			LINE NUMBER OF ENTRY POINT IN REGRES
					TO WHICH PLOTER RETURNS CONTROL

30 !	DESCRIPTION OF FUNCTION

		THIS MODULE PERFORMS LINEAR REGRESSION USING THE LEAST
		SQUARES METHOD.

31 !		THE USER MUST ENTER THE INDEPENDENT AND DEPENDENT VARIABLE
		(COLUMN) NUMBERS AND CAN SPECIFY TO HAVE THE TABLE OF
		RESIDUALS PRINTED AND/OR TO HAVE THE Y ESTIMATES SAVED IN
		A NEW COLUMN OF THE DATA MATRIX.

32 !		THE USER CAN ALSO GET A PLOT OF THE OBSERVED AND ESTIMATED
		VALUES.

33 !		SPECIAL SECTIONS OF CODE ARE EXECUTED WHEN THERE IS NEAR-
		PERFECT CORRELATION, AND FOR THE EXTREME RESIDUALS TEST
		FOR SMALL MATRICES.

34 !		WHEN THE USER WANTS NO MORE REGRESSION, CONTROL IS RETURNED
		TO STATCM UNLESS THE Y ESTIMATES WERE ADDED TO THE DATA
		MATRIX, IN WHICH CASE CONTROL IS PASSED TO ESCRWM IN ORDER TO
		RECOMPUTE THE ELEMENTARY STATISTICS.
99	!



	THIS CODE OPEN THE DATA MATRIX



100	ON ERROR GOTO 4000: GOSUB 10000: F$=SYS(CHR$(7%))
102	GOSUB 6000
105  Z0%=0%
110  R%=VAL(MID(F$,46%,5%))
120  C%=VAL(MID(F$,51%,5%))
130  F3$=MID(F$,31%,15%)
140  F4$=LEFT(F3$,6%)+".ESF"
150  OPEN F3$ AS FILE 3
160  DIM #3,A(250%,15%)
181  IF C%<>15% GOTO 188
182 PRINT "NOTE: YOU WILL NOT BE ABLE TO SAVE YOUR ESTIMATES"
183 PRINT "YOUR DATA MATRIX ALREADY CONTAINS THE MAXIMUM (15)"
184 PRINT "NUMBER OF VARIABLES"
187 !


	HERE THE USER IS ASKED TO ENTER AN INDEPENDENT VARIABLE AND A
	DEPENDENT VARIABLE FOR THE REGRESSION.



188	!
190  INPUT "ENTER THE DEPENDENT VARIABLE ";Y%
195  IF Y%>=1% AND Y%<=C% GOTO 215
200  PRINT "YOUR DEPENDENT VARIABLE NUMBER MUST BE BETWEEN 1 AND";C%
205  PRINT "PLEASE TRY AGAIN"
210  GOTO 190
215	!
220  INPUT "ENTER THE INDEPENDENT VARIABLE ";X%
225  IF X%>=1% AND X%<=C% GOTO 250
230  PRINT "YOUR INDEPENDENT VARIABLE NUMBER MUST BE BETWEEN 1 AND";C%
235  PRINT "PLEASE TRY AGAIN"
240  GOTO 220     !


	THIS SECTION COMPUTES AND PRINTS THE REGRESSION COEFFICIENT AND
	Y INTERCEPT.



250	X1=0: X2=0: Y1=0: Y2=0: C2=0: N%=0%
270	FOR J%=1% TO R%
275	IF A(J%,X%)=H9 OR A(J%,Y%)=H9 GOTO 295
280	N%=N%+1%: X1=X1+A(J%,X%): X2=X2+A(J%,X%)^2
285	Y1=Y1+A(J%,Y%): Y2=Y2+A(J%,Y%)^2
290	C2=C2+A(J%,X%)*A(J%,Y%)
295	NEXT J%: R=N%
297	IF N%<=1% THEN PRINT "NOT ENOUGH DATA EXISTS TO PERFORM";
	" REGRESSION": PRINT: GOTO 2005
300	D1=C2-X1*Y1/R:  D2=X2-X1*X1/R:  D3=Y2-Y1*Y1/R
310	B=D1/D2
340	I=Y1/R-B*X1/R
345  S9=0
350  PRINT "INTERCEPT.....................";
360  PRINT USING " #######.#####",I
370  PRINT "REGRESSION COEFFICIENT........";
380  PRINT USING " #######.#####",B
390  PRINT :     !


	THIS SECTION COMPUTES AND PRINTS SEVERAL OTHER REGRESSION STATISTICS



400  !CORRELATION COEFFICIENT
410  P=D1/(SQR(D2)*SQR(D3))
415  IF P>=.9995 GOTO 3300
416  IF P<=-.9995 GOTO 3300
420  ! SUM OF SQUARES  ATTRIBUTABLE TO REGRESSION
430  S1=P^2*D3
440  ! SUM OF SQUARES DEVIATIONS FROM REGRESSION
450  S2=D3-S1
460  ! F-VALUE FOR THE ANALYSIS OF VARIANCE
470  F=S1/(S2/(R-2))
480  ! STANDARD ERROR OF ESTIMATE
490  S3=SQR(S2/(R-2))
500  ! STANDARD ERROR OF REGRESSION COEFFICIENT
510  S4=SQR((S2/(R-2))/D2)
515  !COMPUTED T-VALUE
520  T=B/S4
530  PRINT "STD. ERROR OF REG. COEF. .....";
540  PRINT USING " #######.###",S4
550  PRINT "COMPUTED T-VALUE..............";
560  PRINT USING " #######.###",T
570  PRINT 
580  PRINT "CORRELATION COEFFICIENT.......";
590  PRINT USING " #######.###",P
600  PRINT "STANDARD ERROR OF ESTIMATE....";
610  PRINT USING " #######.###",S3
620  PRINT     !


	THIS SECTION COMPUTES AND PRINTS STATISTICS RELEVANT TO THE
	ANALYSIS OF VARIABCE UNLESS S9=1, WHICH HAPPENS IF A NEAR-
	PERFECT CORRELATION COEFFICIENT (ABS(P)==1) HAS BEEN COMPUTED.



625  IF S9=1 GOTO 2000
630	F1=1 :
	F2=R-2 :
	F3=R-1 :
	M2=S2/F2 :
	M9=S1+S2
700  M$="#####  #######.###  #######.### ######.###"
710  PRINT TAB(18%);"ANALYSIS OF VARIANCE FOR THE REGRESSION"
715  PRINT
720  PRINT "   SOURCE OF VARIATION        D.F.   SUM OF SQ.";
730  PRINT "    MEAN SQ.    F VALUE"
740  PRINT "ATTRIBUTABLE TO REGRESSION  ";
750  PRINT USING M$,F1,S1,S1,F
760  M$=LEFT(M$,31%)
770  PRINT "DEVIATION FROM REGRESSION   ";
780  PRINT USING M$,F2,S2,M2
790  M$=LEFT(M$,18%)
800  PRINT "     TOTAL                  ";
810  PRINT USING M$,F3,M9
820  PRINT     !


	THIS SECTION COMPUTES AND PRINTS THE TABLE OF RESIDUALS



830  INPUT "DO YOU WISH TO PRINT THE TABLE OF RESIDUALS ";W$
831  PRINT
835  IF LEFT(W$,1%)="Y" GOTO 860
840	IF LEFT(W$,1%)="N" OR W$="" GOTO 1700
845  PRINT "PLEASE ANSWER EITHER YES OR NO"
850  GOTO 830
860  PRINT "CASE NO   Y OBSERVED   Y ESTIMATED";
870  PRINT "     RESIDUAL      STD.RESID."
880  M$=" #####    #######.###   #######.###"
890  M$=M$+"   #######.###   #######.###"
900  ! SET UP CRITICAL VALUE TABLE
910  DIM U(30%) : RESTORE
920  READ U(I%) FOR I%=3% TO 30%
930  DIM Z(250%)
940	I1%=0%
1000	FOR I%=1% TO R%
1005	IF A(I%,X%)=H9 OR A(I%,Y%)=H9 GOTO 1250
1010 !CALCULATE RESIDUALS
1020 Y=I+B*A(I%,X%)
1030 D=A(I%,Y%)-Y
1035	I1%=I1%+1%: Z(I1%)=D
1040 E=D/S3
1050	IF C%<15% THEN A(I%,C%+1%)=Y
1240	PRINT USING M$,I1%,A(I%,Y%),Y,D,E
1250	NEXT I%: PRINT: IF C%<15% THEN A(0%,C%+1%)=I1%
1267	!
	THE FOLLOWING GOSUB CAUSES THE RESIDUA ARRAY (Z) TO BE SORTED
	IN ASCENDING ORDER
	
1270 GOSUB 3100     !


	THIS CODE PERFORMS A TEST OF EXTREME RESIDUALS FOR VARIABLES FROM
	SMALL (<30 DATA POINTS) MATRICES.



1300 PRINT "TEST OF EXTREME RESIDUALS"
1301 ON ERROR GOTO 1303
1302 GOTO 1305
1303 PRINT "TEST OF EXTREME RESIDUALS DOES NOT APPLY"
1304 RESUME 1515
1305 IF R>30 GOTO 1800
1310 IF R%>=14% GOTO1430
1320 IF R%>=11% GOTO1400
1330 IF R%>=8% GOTO1370
1340 T1=(R2-R1)/(R9-R1)
1350 T2=(R9-R8)/(R9-R1)
1360 GOTO 1450
1370 T1=(R2-R1)/(R8-R1)
1380 T2=(R9-R8)/(R9-R2)
1390 GOTO 1450
1400 T1=(R3-R1)/(R8-R1)
1410 T2=(R9-R7)/(R9-R2)
1420 GOTO 1450
1430 T1=(R3-R1)/(R7-R1)
1440 T2=(R9-R7)/(R9-R3)
1450 PRINT "   RATIO OF RANGES FOR THE SMALLEST RESIDUAL......";
1460 PRINT USING " #######.###",T1
1470 PRINT "   RATIO OF RANGES FOR THE LARGEST RESIDUAL.......";
1480 PRINT USING " #######.###",T2
1490 PRINT "CRITICAL VALUE OF THE RESIDUAL AT ALPHA = .10 ....";
1500 PRINT USING " #######.###",U(R%)
1510 PRINT     !



	IF THE DATA MATRIX HAS ROOM (C%<15%), THE USER CAN HAVE THE Y
	ESTIMATES SAVED IN A NEW COLUMN. NOTE THAT THE ESTIMATES HAVE
	ALREADY BEEN STORED THERE AND ARE "SAVED" BY INCREMENTING THE
	COLUMN COUNTER.



1515	IF C%>=15% GOTO 1610
1520 INPUT "DO YOU WISH TO SAVE YOUR ESTIMATES ";W$
1525 PRINT
1530 IF LEFT(W$,1%)="Y" GOTO 1550
1535 IF LEFT(W$,1%)="N" OR W$="" GOTO 1610
1540 PRINT "PLEASE ANSWER EITHER YES OR NO"
1545 GOTO 1520
1550 C%=C%+1% : Z0%=1%
1560	PRINT "YOUR ESTIMATES OF VARIABLE ";Y%;
1570 PRINT " ARE NOW VARIABLE #";C%
1580 PRINT "YOUR DATA MATRIX NOW HAS ";R%;" ROWS ";
1590 PRINT "AND ";C%;" COLUMNS."
1600 PRINT     !



	THIS CODE PREPARES PART OF THE LIST OF CALLING ARGUMENTS TO BE
	PASSED TO PLOTER VIA COMMON.



1610 INPUT "DO YOU WISH TO PLOT Y OBSERVED AND Y ESTIMATED ";W$
1611 PRINT
1615 IF LEFT(W$,1%)="Y" GOTO 1625
1620 IF LEFT(W$,1%)="N" OR W$="" GOTO 2000
1622 PRINT "PLEASE ANSWER EITHER YES OR NO"
1623 GOTO 1610
1625 F$=LEFT(F$,45%)
1630 R$="00000"+RIGHT(NUM$(R%),2%)
1632 C$="00000"+RIGHT(NUM$(C%),2%)
1634 F$=F$+MID(R$,LEN(R$)-5%,5%)
1636 F$=F$+MID(C$,LEN(C$)-5%,5%)
1640 F$=F$+"STATCM"+A9$+"   "
1642	F$=LEFT(F$+SPACE$(10%),70%)
1644 !


	THIS OPENS AND BUILDS A FILE OF POINTS TO BE PLOTTED:  THE INDEPENDENT
	VARIABLE, THE DEPENDENT VARIABLE, AND THE Y ESTIMATES.



1645 F5$=LEFT(F3$,6%)+".PLT"
1650 OPEN F5$ FOR OUTPUT AS FILE 5
1655 DIM #5,P(250%,3%)
1657	I%=0%
1660	FOR J%=1% TO R%
1662	IF A(J%,X%)=H9 OR A(J%,Y%)=H9 GOTO 1670
1664	I%=I%+1%: P(I%,0%)=A(J%,X%)
1666	P(I%,1%)=A(J%,Y%)
1668	P(I%,2%)=A(J%,X%)*B+I
1670	NEXT J%: P(0%,J%)=N% FOR J%=0% TO 2%
1672 CLOSE 5     !



	THIS CODE ADDS THE PLOT CHARACTERS, RETURN MODULE, RETURN LINE
	NUMBER, NUMBER OF ROWS IN PLOT FILE, AND NUMBER OF DEPENDENT
	VARIABLES TO THE ARGUMENT LIST TO BE PASSED TO PLOTER.



1675 PRINT "TYPE PRINT CHARACTERS FOR Y OBSERVED AND Y ESTIMATED."
1677 INPUT "(FORM : X,X )"O$,E$
1680 G$=LEFT(F5$+SPACE$(15%),15%)+"REGRES"+A9$+"   "
1681	G$=LEFT(G$+SPACE$(10%),30%)
1682	G$=G$+CVT%$(1990%)+CVT%$(N%)+CVT%$(2%)
1684 G$=G$+LEFT(O$+"O",1%)+LEFT(E$+"E",1%)+"*****"
1686 G$=LEFT(G$+SPACE$(10),39%)
1688 G$=G$+F$
1690 R$=SYS(CHR$(8)+G$)
1695 CHAIN "PLOTER"+A9$
1699	!

	THESE LINES STORE THE ESTIMATES IN THE FIRST AVAILABLE COLUMN
	IN THE DATA MATRIX.  NOTE THAT THESE ESTIMATES WILL NOT BE
	"SAVED" UNLESS THE COLUMN COUNTER IS INCREMENTED.

	
1700	I9%=0%: FOR I%=1% TO R%
1705	IF A(I%,X%)=H9 OR A(I%,Y%)=H9 THEN A(I%,C%+1%)=H9: GOTO 1720
1710	A(I%,C%+1%)=I+B*A(I%,X%)
1715	I9%=I9%+1%
1720	NEXT I%: A(0%,C%+1%)=I9%: GOTO 1515
1799

	!THIS CODE PRINTS THE RANGE AND RANGE/STD ERROR OF ESTIMATE
	FOR MATRICES WITH MORE THAN 30 DATA POINTS PER VARIABLE


1800 T1=R9-R1
1810 T2=T1/S3
1820 PRINT "RANGE...................................";
1830 PRINT USING " #######.###",T1
1840 PRINT "RANGE OVER STANDARD ERROR OF ESTIMATE...";
1850 PRINT USING " #######.###",T2
1860 PRINT :
     GOTO 1515     !




	THIS IS WHERE CONTROL IS RETURNED TO FROM PLOTER. NOTE, Z0 IS A
	SWITCH TO CONTROL CHAINING TO ESCRWM.



1990 F$=SYS(CHR$(7))
1991 Z0%=1%
1992 F$=LEFT(F$,70%)
1995 R%=VAL(MID(F$,46%,5%))
1996 C%=VAL(MID(F$,51%,5%))
1997	GOSUB 6000
1999 !


	HERE IS WHERE THE MORE REGRESSION QUESTION IS ASKED, AND PASSED
	VARIABLES ARE RELOADED INTO COMMON.



2000	CLOSE 3
2005 INPUT "DO YOU WISH TO COMPUTE MORE REGRESSION";W$
2006 PRINT
2010 IF LEFT(W$,1%)="Y" GOTO 130
2015	IF LEFT(W$,1%)="N" OR LEFT(W$,1%)="" GOTO 2040
2020 PRINT "PLEASE ANSWER EITHER YES OR NO"
2030 GOTO 2000
2040 F$=LEFT(F$,45%)
2050 R$="00000"+RIGHT(NUM$(R%),2%)
2060 C$="00000"+RIGHT(NUM$(C%),2%)
2070 F$=F$+MID(R$,LEN(R$)-5%,5%)
2080 F$=F$+MID(C$,LEN(C$)-5%,5%)
2090 F$=F$+"STATCM"+A9$+"   "
2091 !


	IF THE ESTIMATES HAVE BEEN ADDED TO THE DATA MATRIX, THEN THE
	ELEMENTARY STATISTICS MUST BE RECOMPUTED, AND CONTROL IS TRANSFERRED
	TO ESCRWM. OTHERWISE, CONTROL IS RETURNED TO STATCM.



2092 IF Z0%<>0% GOTO 2200
2100 R$=SYS(CHR$(8)+F$)
2110 CHAIN "STATCM"+A9$
2200 R$=SYS(CHR$(8)+F$)
2210 CHAIN "ESCRWM"+A9$
2999 !


	THIS DATA IS USED BY THE TEST OF EXTREME RESIDUALS ROUTINE.



3000 DATA .886,.679,.557,.482,.434
3010 DATA .479,.441,.409
3020 DATA .517,.490,.467
3030 DATA .492,.472,.454,.438,.424,.412,.401,.391
3040 DATA .382,.374,.367,.360,.354,.348,.342,.337,.332
3099 !


	THIS ROUTINE SORTS THE RESIDUALS IN ASCENDING ORDER FOR USE IN THE
	TEST OF EXTREME RESIDUALS.



3100	T9%=N%-1%
3110	FOR I%=1% TO N%
3120 S9%=0%
3130	FOR J%=1% TO T9%
3140 IF Z(J%)<=Z(J%+1%) GOTO 3190
3150 Q=Z(J%)
3160 Z(J%)=Z(J%+1%)
3170 Z(J%+1%)=Q
3180 S9%=1%
3190 NEXT J%
3200 IF S9%=0% GOTO 3230
3210 T9%=T9%-1%
3220 NEXT I%
3230	R1=Z(1%): R2=Z(2%): R3=Z(3%)
3240	R9=Z(N%): R8=Z(N%-1%): R7=Z(N%-2%)
3250 RETURN     !



	THIS ROUTINE SETS THE STANDARD ERROR OF REGRESSION COEFFICIENT,
	STANDARD ERROR OF ESTIMATE, AND COMPUTED T FOR NEAR-PERFECT
	CORRELATIONS (I.E., P==1).



3300 S4=0 : S3=0 :
     T=9999999.00000 :
     S9=1 :
     GOTO 530     !



	THESE ARE ERROR ROUTINES IN CASE THE USER TYPES IN GARBAGE.



4000	IF ERR=28% THEN GOSUB 10000: RESUME 2000
4002	IF ERL<210 GOTO 4007 ELSE IF ERL<240 GOTO 5000
	ELSE IF ERL<1515 GOTO 5100 ELSE ON ERROR GOTO 0
4007	PRINT "YOU TYPED NON-NUMERIC CHARACTER(S)"
4010 PRINT "FOR THE DEPENDENT VARIABLE NUMBER"
4020 PRINT "PLEASE TYPE ONLY NUMBERS WHEN NUMBERS ARE REQUESTED"
4030 RESUME 200
5000 PRINT "YOU TYPED NON-NUMERIC CHARACTER(S)"
5010 PRINT "FOR THE INDEPENDENT VARIABLE NUMBER"
5020 PRINT "PLEASE TYPE ONLY NUMBERS WHEN NUMBERS ARE REQUESTED"
5030 RESUME 230
5100	PRINT "A COMPUTATION ERROR HAS OCCURED.  PLEASE CHECK YOUR"
5110	PRINT "INPUT DATA TO BE SURE THAT IT IS VALID."
5120	Z0%=0%: RESUME 2100
6000	A9$=MID(F$,62%,9%): A9$=LEFT(A9$,INSTR(5%,A9$,"]")):
	H9=.9E-38: RETURN
10000	V0$=SYS(CHR$(6%)+CHR$(-7%)): RETURN	! CTRL/C TRAP
32767 END
