1 !
	500.19 - STAT11 - CHI0SQ

		CHI SQUARE MODULE

	RELEASED FOR SUBMISSION TO THE DECUS LIBRARY BY
	DEC ENGINEERING SYSTEMS GROUP AND EDUCATION PRODUCTS GROUP
		SEPTEMBER, 1977
2 !	COPYRIGHT (C) 1973, DIGITAL EQUIPMENT CORPORATION,

			MAYNARD, MASSACHUSETTS

3 !	THIS SOFTWARE IS FURNISHED TO PURCHASER UNDER A LICENSE FOR USE
	ON A SINGLE COMPUTER SYSTEM AND CAN BE COPIED (WITH INCLUSION OF
	DEC'S COPYRIGHT NOTICE) ONLY FOR USE IN SUCH SYSTEM, EXCEPT AS
	MAY OTHERWISE BE PROVIDED IN WRITING BY DEC.

4 !	THE INFORMATION IN THIS DOCUMENT IS SUBJECT TO CHANGE WITHOUT
	NOTICE AND SHOULD NOT BE CONSTRUED AS A COMMITMENT BY DIGITAL
	EQUIPMENT CORPORATION.

5 !	DEC ASSUMES NO RESPONSIBILITY FOR USE OR RELIABILITY OF ITS
	SOFTWARE ON EQUIPMENT WHICH IS NOT SUPPLIED BY DEC.



6 !	THIS MODULE COMPUTES AND PRINTS THE CHI-SQUARED VALUE FOR THE
	USER'S DATA MATRIX.


7 ! 	AUTHOR:		MICHAEL D. KNAUER

	VERSION NUMBER:	001

	DATE:		OCTOBER, 1973



8  !	MODIFICATIONS:  MAY, 1975

	MODIFIED TO ACCEPT MISSING DATA POINTS

		BY ARDOTH HASSLER WILSON
		   CENTRAL STATE UNIVERSITY
		   EDMOND, OKLAHOMA


00009!	MODIFICATIONS:  JUNE, 1976

	CTRL/C TRAP ADDED

10 !

	CALLING ARGUMENTS

11 !		1) VARIABLE NAME:	F3$
		   RANGE OF VALUES:	S00000.RWM - S99999.RWM
		   USE:			NAME OF THE 250 ROW BY 15 COLUMN
					DATA MATRIX.

13 !		2) VARIABLE NAME:	F4$
		   RANGE OF VALUES:	S00000.ESF - S99999.ESF
		   USE:			NAME OF ELEMENTARY STATISTICS FILE

15 !		3) VARIABLE NAME:	R%
		   RANGE OF VALUES:	1 - 250
		   USE:			NUMBER OF ROWS IN DATA MATRIX

17 !		4) VARIABLE NAME:	C%
		   RANGE OF VALUES:	1 - 15
		   USE:			NUMBER OF COLUMNS IN DATA MATRIX



20 !
	RETURNING ARGUMENTS

22 !		NONE -- THIS MODULE DOES NOT ALTER OR ADD TO ANY OF THE
			FILES OR VARIABLES PASSED TO IT.



30 !	DESCRIPTION OF FUNCTION

31 !		THIS MODULE COMPUTES AND PRINTS THE CHI-SQUARED VALUE FOR
		THE USER'S DATA MATRIX

32!		IF THE DATA MATRIX IS 2 BY 2, A SPECIAL FORMULA IS USED AND
		THE FISHER EXACT PROBABILITY CAN BE COMPUTED.

33 !		FOR ALL OTHER MATRICES, THE CHI SQUARE AND DEGREES OF FREEDOM
		ARE COMPUTED AND PRINTED, AND THE NUMBER OF ROWS AND COLUMNS
		WITH ALL ZERO ENTRIES AND THE NUMBER OF CELLS WITH EXPECTED
		FREQUENCIES LESS THAN 5 ARE COUNTED AND PRINTED.

34 !		THE TABLE OF EXPECTED FREQUENCIES WILL BE PRINTED IF THE
		USER WANTS.

35 !		CONTROL IS ALWAYS SURRENDERED TO STATCM AT THE END OF THE
		CHI SQUARED ANALYSIS.

100 !	
	THIS NEXT LINE RETRIEVES THE CALLING ARGUMENTS FROM COMMON

110	ON ERROR GOTO 9000: GOSUB 10000: F$=SYS(CHR$(7%))
112	A9$=MID(F$,62%,9%): A9$=LEFT(A9$,INSTR(5%,A9$,"]")):
	H9=.9E-38
115 !

	FOLLOWING IS A FACTORIAL FUNCTION USED TO HELP COMPUTE THE FISHER
	EXACT PROBABILITY FOR A 2 BY 2 ARRAY.



120	DEF FNF(M)
130	IF M=1 THEN FNF=1 ELSE FNF=M*FNF(M-1)
140	FNEND
144 !


	THIS CODE OPENS THE DATA MATRIX, ELEMENTARY STATISTICS, AND TEMPORARY
	WORK FILES. NOTE THAT THE EXPECTED FREQUENCY FILE (E(250%,15%)) IS
	CREATED AS A TEMPORARY VIRTUAL MATRIX AND IS DELETED UPON EXIT FROM
	THIS MODULE.



145	L5%=0%
150	R%=VAL(MID(F$,46%,5%))
160	C%=VAL(MID(F$,51%,5%))
170	F3$=MID(F$,31%,15%)
180	F4$=LEFT(F3$,6%)+".ESF"
190	F5$=LEFT(F3$,6%)+".TMP"
200	OPEN F3$ FOR INPUT AS FILE 3
210	DIM #3,G(250%,15%)
220	OPEN F4$ FOR INPUT AS FILE 4
230	DIM #4,S(10%,15%)
240	OPEN F5$ AS FILE 5
250	DIM #5,E(250%,15%)
299 !


	THIS SECTION COMPUTES AND PRINTS THE CHI SQUARE FOR A 2 BY 2 ARRAY



300	IF R%<>2% GOTO 1000
310	IF C%<>2% GOTO 1000
320	A=G(1%,1%): B=G(1%,2%)
330	C=G(2%,1%): D=G(2%,2%)
332	A9%=A: B9%=B: C9%=C: D9%=D
334	IF A9%<>A OR B9%<>B OR C9%<>C OR D9%<>D THEN GOTO 1000
336	IF A9%<1% OR B9%<1% OR C9%<1% OR D9%<1% THEN GOTO 1000
340	N=A+B+C+D
350	C1=N*(ABS(A*D-B*C))^2
360	C1=C1/((A+B)*(C+D)*(A+C)*(B+D))
370	PRINT
380	PRINT "CHI-SQUARE..........";
385	PRINT USING " ###.#####",C1
390	PRINT "DEGREES OF FREEDOM..         1"
392	PRINT
395	C2=N*(ABS(A*D-B*C)-N/2)^2:
	C2=C2/((A+B)*(C+D)*(A+C)*(B+D))
397	PRINT "CHI-SQUARE (CORRECTED)....";:
	PRINT USING " ###.#####",C2:
	PRINT "DEGREES OF FREEDOM........         1": PRINT
399 !


	IF THE USER WANTS, THIS SECTION COMPUTES AND PRINTS (FOR 2 BY 2
	ARRAYS ONLY) THE FISHER EXACT PROBABILITY. IT IS COMPUTED ON LINES
	450 AND 460 AND IS MERELY A PRODUCT OF SEVERAL DIFFERENT FACTORIALS



400	IF N>33.0 GOTO 2000     !   NOTE--FLOATING POINT ERROR
		OCCURS FOR N>33
410	IF A>5 AND B>5 AND C>5 AND D>5 GOTO 2000
420	INPUT "DO YOU WISH TO COMPUTE THE FISHER EXACT PROBABILITY ";A$
425	PRINT
430	IF A$="YES" OR LEFT(A$,1%)="Y" GOTO 450
435	IF A$="NO" OR LEFT(A$,1%)="N" OR A$="" GOTO 2000
440	PRINT "YOU MUST ANSWER EITHER YES OR NO"
445	GOTO 420
450	P1 = FNF(A+B)*FNF(C+D)/FNF(N)
460	P1 = P1 * FNF(A+C) * FNF(B+D) / (FNF(A) *FNF(B) *FNF(C)*FNF(D))
470	PRINT "THE FISHER EXACT PROBABILITY...";
475	PRINT USING " ###.#####",P1
480	PRINT
490	GOTO 2000
999 !


	THIS SECTION COMPUTES THE CHI SQUARE AND RELATED STATISTICS FOR OTHER
	THAN 2 BY 2 ARRAYS.  



1000	X2=0.0 : N=0.0
1005 !		N IS THE GRAND SUM OF ALL OBSERVATIONS
1010	N=N+S(3%,I%) FOR I%=1% TO C%
1020	FOR J%=1% TO R%
1030	T=0.0
1035 !		T IS THE SUM OF THE OBSERVATIONS IN EACH ROW.
1038	FOR I%=1% TO C%: IF G(J%,I%)<>H9 THEN T=T+G(J%,I%)
1040	NEXT I%
1042	IF T<>0 GOTO 1050
1044	L7%=L7%+1%
1050	FOR K%=1% TO C%
1057	IF S(3%,K%)<>0 GOTO 1060
1058	L6%=L6%+1%
1059 !		THE E(J%,K%)'S ARE THE EXPECTED FREQUENCIES FOR EACH CELL.
1060	E(J%,K%)=(T*S(3%,K%))/N
1064	IF E(J%,K%)=0 OR G(J%,K%)=H9 GOTO 1080
1065	IF E(J%,K%)<5 THEN L5%=L5%+1%
1069 !		X2 IS THE CHI SQUARE
1070	X2=X2+(G(J%,K%)-E(J%,K%))^2/E(J%,K%)
1080	NEXT K%
1090	NEXT J%
1094 !		L6% IS THE SUM OF THE NUMBER OF ROWS AND THE NUMBER OF
		COLUMNS THAT HAVE ALL ZERO ENTRIES.
1095	L6%=L6%/R%+L7%
1099 !		D% IS THE DEGREES OF FREEDOM
1100	D%=(R%-1%)*(C%-1%)
1104 !


	HERE THE TABLE OF EXPECTED FREQUENCIES IS PRINTED IF THE USER WANTS.
	NOTE THAT THE TABLE WILL BE PRINTED IN TWO PARTS IF THERE ARE MORE 
	THAN SIX COLUMNS IN THE DATA MATRIX.



1105	INPUT "DO YOU WISH TO PRINT EXPECTED FREQUENCIES";P$:PRINT
1107	IF P$="YES" OR LEFT(P$,1%)="Y" GOTO 1115
1109	IF P$="NO" OR LEFT(P$,1%)="N" OR P$="" GOTO 1500
1111	PRINT "YOU MUST ANSWER EITHER YES OR NO"
1113	GOTO 1105
1115	PRINT "  EXPECTED FREQUENCY IN EACH CELL" : PRINT
1120	C1%=1% : C2%=C%
1125	K%=C1%+5%: IF K%>C% THEN K%=C%-C1%+1% ELSE K%=6%
1130	PRINT "ROW/COL";
1135	PRINT TAB(1%+K1%*11%);C1%+K1%-1%; FOR K1%=1% TO K%
1140	PRINT
1150	FOR J%=1% TO R%
1160	PRINT J%;TAB(5%);
1170	FOR K%=C1% TO C1%+5%
1175	IF K%>C% GOTO 1190
1180	PRINT USING " #####.####",E(J%,K%);
1190	NEXT K%
1200	PRINT
1210	NEXT J%
1220	PRINT
1230	IF C2%<=6% GOTO 1500
1240	C2%=C2%-6%
1250	C1%=C1%+6%
1260	GOTO 1125
1499 !


	THIS SECTION PRINTS THE PREVIOUSLY COMPUTED CHI SQUARE AND RELATED
	STATISTICS.


1500	PRINT
1501	IF L5%<=0% GOTO 1505
1502	PRINT L5%;" CELLS HAVE AN EXPECTED FREQUENCY"
1503	PRINT "GREATER THAN 0 AND LESS THAN 5."
1504	PRINT "THE COMPUTED CHI-SQUARE THEREFORE MAY NOT BE MEANINGFUL"
1505	IF L6%=0% GOTO 1509
1506	PRINT "(";L6%;" ROWS AND/OR COLUMNS HAVE ALL 0 ENTRIES"
1507	PRINT "AND HAVE BEEN DISREGARDED IN THIS CALCULATION)"
1509	PRINT
1510	PRINT "CHI-SQUARE..........";
1515	PRINT USING " ###.#####",X2
1520	PRINT "DEGREES OF FREEDOM..";
1525	PRINT USING "     #####",D%
1530	PRINT
1535	IF L5%<=0% AND L6%<=0% GOTO 1999
1537	PRINT "WOULD YOU LIKE TO COMPUTE CHI-SQUARE USING YATES";
	" CORRECTION";: INPUT Y9$:
	IF LEFT(Y9$,1%)<>"Y" GOTO 1999
1540	X2=0: FOR J%=1% TO R%: FORK%=1% TO C%:
	IF E(J%,K%)=0 OR G(J%,K%)=H9 GOTO 1550
1545	X2=X2+(ABS(G(J%,K%)-E(J%,K%))-0.5)^2/E(J%,K%)
1550	NEXT K%: NEXT J%
1560	PRINT
	: PRINT "CHI-SQUARE (CORRECTED)....";
	: PRINT USING " ###.#####",X2
1565	PRINT "DEGREES OF FREEDOM........";:	
	PRINT USING "    ######",D%
1570	PRINT
1999 !


	HERE CONTROL IS RETURNED TO STATCM. NOTE THAT IT DOESN'T ASK IF
	YOU WANT TO COMPUTE MORE CHI SQUARE, SINCE THE CHI SQUARE IS COMPUTED
	FOR THE WHOLE DATA MATRIX.



2000	PRINT 
2010	CLOSE 3,4,5
2020	KILL F5$
2030	R$=SYS(CHR$(8)+F$)
2040	CHAIN "STATCM"+A9$
09000	IF ERR=28% THEN GOSUB 10000: RESUME 2000
09010	ON ERROR GOTO 0
10000	V0$=SYS(CHR$(6%)+CHR$(-7%)): RETURN	! CTRL/C TRAP
32750	END
