1 !
	500.23 - STAT11 - CROSS0

		CROSS-TABULATION MODULE

	RELEASED FOR SUBMISSION TO THE DECUS LIBRARY BY THE
	DEC ENGINEERING SYSTEMS GROUP AND THE EDUCATION PRODUCTS GROUP
		SEPTEMBER, 1977


2 !	COPYRIGHT (C) 1973, DIGITAL EQUIPMENT CORPORATION,

			MAYNARD, MASSACHUSETTS

3 !	THIS SOFTWARE IS FURNISHED TO PURCHASER UNDER A LICENSE FOR USE
	ON A SINGLE COMPUTER SYSTEM AND CAN BE COPIED (WITH INCLUSION OF
	DEC'S COPYRIGHT NOTICE) ONLY FOR USE IN SUCH SYSTEM, EXCEPT AS
	MAY OTHERWISE BE PROVIDED IN WRITING BY DEC.

4 !	THE INFORMATION IN THIS DOCUMENT IS SUBJECT TO CHANGE WITHOUT
	NOTICE AND SHOULD NOT BE CONSTRUED AS A COMMITMENT BY DIGITAL
	EQUIPMENT CORPORATION.

5 !	DEC ASSUMES NO RESPONSIBILITY FOR USE OR RELIABILITY OF ITS
	SOFTWARE ON EQUIPMENT WHICH IS NOT SUPPLIED BY DEC.



6 !	THIS MODULE PERFORMS CROSS-TABULATION FOR ANY TWO VARIABLES IN THE
	DATA MATRIX.



7 ! 	AUTHOR:		MICHAEL D. KNAUER

	VERSION NUMBER:	001

	DATE:		OCTOBER, 1973



8  !	MODIFICATIONS:  MAY, 1975

	MODIFIED TO ACCEPT MISSING DATA POINTS

		BY ARDOTH HASSLER WILSON
		   CENTRAL STATE UNIVERSITY
		   EDMOND, OKLAHOMA


00009!	MODIFICATIONS:  JUNE, 1976

	CTRL/C TRAP ADDED

10 !
	CALLING ARGUMENTS

11 !		1) VARIABLE NAME:	F3$
		   RANGE OF VALUES:	S00000.RWM - S99999.RWM
		   USE:			NAME OF THE 250 ROW BY 15 COLUMN
					VIRTUAL DATA MATRIX.

13 !		2) VARIABLE NAME:	F4$
		   RANGE OF VALUES:	S00000.ESF - S99999.ESF
		   USE:			NAME OF ELEMENTARY STATISTICS FILE

15 !		3) VARIABLE NAME:	R%
		   RANGE OF VALUES:	1 - 250
		   USE:			NUMBER OF ROWS IN DATA MATRIX

17 !		4) VARIABLE NAME:	C%
		   RANGE OF VALUES:	1 - 15
		   USE:			NUMBER OF COLUMNS IN DATA MATRIX


20 !

	RETURNING ARGUMENTS

		NONE -- THIS MODULE DOES NOT ADD TO OR CHANGE ANY OF THE
			FILES OR VARIABLES PASSED TO IT.


30 !
	DESCRIPTION OF FUNCTION

31 !		THIS MODULE PERFORMS CROSS-TABULATION FOR ANY TWO VARIABLES
		IN THE DATA MATRIX. THE USER CAN ALSO HAVE PRINTED THE
		CHI-SQUARE OF THE CROSS-TABLE AND A TABLE OF EXPECTED
		FREQUENCIES.

32 !		AFTER OBTAINING VARIABLE NUMBERS AND NUMBERS OF INTERVALS
		FROM THE USER, IT READS AND SORTS THE DATA POINTS FROM THE
		DATA MATRIX, THEN COMPUTES AND PRINTS THE CROSS TABLE, ROW
		BY ROW.

33 !		IF THE USER WANTS, IT THEN COMPUTES AND PRINTS A TABLE OF
		EXPECTED FREQUENCIES AND/OR THE CHI-SQUARE OF THE CROSS TABLE.

34 !		UNLESS THE USER WANTS ANOTHER CROSS-TABULATION, CROSS0 THEN
		RETURNS CONTROL TO STATCM.


99 !


	THIS SECTION OPENS THE DATA FILE AND ASKS THE USER TO ENTER THE
	NUMBERS OF TWO VARIABLES TO BE USED IN THE CROSS-TABULATION.



100	ON ERROR GOTO 9000: GOSUB 10000: F$=SYS(CHR$(7%))
110	R%=VAL(MID(F$,46%,5%))
120	C%=VAL(MID(F$,51%,5%))
130	F3$=MID(F$,31%,15%)
135	A9$=MID(F$,62%,9%): A9$=LEFT(A9$,INSTR(5%,A9$,"]")):
	H9=.9E-38
140	OPEN F3$ FOR INPUT AS FILE 3
150	DIM #3,A(250%,15%)
160	DIM X(250%),Y(250%),A%(10%)
170	L6%=0%
172	L7%=0%
200	PRINT "ENTER TWO VARIABLES FOR CROSS TABULATION"
205	INPUT "(X) HORIZONTAL AND (Y) VERTICAL";X%,Y%:PRINT
210	IF X%>=1% AND Y%>=1% AND X%<=C% AND Y%<=C% GOTO 225
212	PRINT "YOUR VARIABLE NUMBERS MUST BE BETWEEN 1 AND";C%
215	PRINT "PLEASE TRY AGAIN"
220	GOTO 200
225 !


	HERE THE USER IS ASKED TO ENTER THE NUMBER OF INTERVALS HE WANTS IN
	THE CROSS-TABLE FOR EACH VARIABLE.



230	PRINT "ENTER THE NUMBER OF INTERVALS FOR VARIABLE";X%
235	INPUT "(MAXIMUM IS 10)"; I1%
240	IF I1%>=1% AND I1%<=10% GOTO 260
245	PRINT "YOU MUST SPECIFY FROM 1 TO 10 INTERVALS"
250	GOTO 230
260	PRINT "ENTER THE NUMBER OF INTERVALS FOR VARIABLE";Y%
265	INPUT "(MAXIMUM IS 10)";I2%
266	PRINT
270	IF I2%>=1% AND I2%<=10% GOTO 300
280	PRINT "YOU MUST SPECIFY FROM 1 TO 10 INTERVALS"
290	GOTO 260
299 !


	THIS CODE TRANSFERS THE DATA FOR THE TWO VARIABLES FROM THE DATA MATRIX
	TO THE WORKING STORAGE ARRAYS X AND Y.



300	I9%=0%: I8%=0%
310	FOR I%=1% TO R%:
	IF A(I%,X%)=H9 OR A(I%,Y%)=H9 THEN I8%=I8%+1%: GOTO 325
320	I9%=I9%+1%:
	X(I9%)=A(I%,X%): Y(I9%)=A(I%,Y%)
325	NEXT I%
329 !


	USING THE MAXIMUM AND MINIMUM FROM THE ELEMENTARY STATISTICS FILE,
	THIS SECTION COMPUTES THE X AND Y INTERVAL VALUES.



330	F4$=LEFT(F3$,6%)+".ESF"
340	OPEN F4$ FOR INPUT AS FILE 4
350	DIM #4,S(10%,15%)
360	X5=(S(1%,X%)-S(2%,X%))/I1%
370	Y5=(S(1%,Y%)-S(2%,Y%))/I2%
399 !


	HERE THE Y'S ARE SORTED IN DECREASING ORDER OF MAGNITUDE, KEEPING
	THE X'S MATCHING.



400	T9%=I9%-1%
410	FOR K%=1% TO R%
420	S9%=0%
430	FOR J%=1% TO T9%
440	IF Y(J%)>=Y(J%+1%) GOTO 490
450	Q1=X(J%) : Q2=Y(J%)
460	X(J%)=X(J%+1%) : Y(J%)=Y(J%+1%)
470	X(J%+1%)=Q1 : Y(J%+1%)=Q2
480	S9%=1%
490	NEXT J%
500	IF S9%=0% GOTO 600
510	T9%=T9%-1%
520	NEXT K%
589 !


	THIS CODE MAKES FREQUENCY COUNTS FOR EACH CELL AND PRINTS THE CROSS-
	TABLE, ROW BYE ROW.



590	DIM G(10%,10%)
600	V=Y(1%)
610	N%=1%
650	FOR J%=0% TO I2%*2%-1%
655	IF J%=I2%*2%-1% THEN V=Y(I9%)
660	IF J%-(J%/2%)*2%<>0% GOTO 700
670	PRINT USING "########.###",V;
680	PRINT " + "
690	GOTO 850
700	PRINT SPACE$(12)+" . ";
710	A%(I%)=0% FOR I%=0% TO 10%
730	IF Y(N%)<(V-Y5) GOTO 800
740	I5%=(X(N%)-S(2%,X%))/X5
750	A%(I5%)=A%(I5%)+1%
751 !


	HERE, IF ROUND-OFF ERROR CAUSES THE COMPUTED SUBSCRIPT I5% TO BE
	GREATER THAN I1%-1% RATHER THAN EQUAL TO OR LESS THAN I1%-1%, THE
	APPROPRIATE I1%-1% CELL COUNT IS INCREMENTED, AS IT SHOULD BE.



752	IF I5%>I1%-1% THEN A%(I1%-1%)=A%(I1%-1%)+1%
753	IF I5%>I1%-1% THEN G((J%-1%)/2%,I1%-1%)=G((J%-1%)/2%,I1%-1%)+1
755	G((J%-1%)/2%,I5%)=G((J%-1%)/2%,I5%)+1
759 !


	THIS CODE PRINTS EACH ROW OF THE CROSS TABLE. NOTE THAT V IS
	DECREMENTED TO THE NEXT LOWER INTERVAL BOUND.



760	N%=N%+1%
770	IF N%>I9% GOTO 800
780	GOTO 730
800	FOR I%=0% TO I1%-1%
810	PRINT USING "###  ",A%(I%);
820	NEXT I%
830	PRINT
835	V=V-Y5
840	IF N%>I9% GOTO 900
850	NEXT J%
899 !


	THIS SECTION PRINTS LABELS ALONG THE BOTTOM OF THE CROSS TABLE.



900	PRINT USING "########.###",Y(I9%);
910	PRINT "   +";
920	PRINT "....+"; FOR I%=1% TO I1%+1%
930	PRINT : PRINT SPACE$(10);
940	H=S(2%,X%)
950	FOR I%=0% TO I1% STEP 2%
960	PRINT USING "#####.### ",H;
970	H=H+2*X5
980	NEXT I%
985	PRINT : PRINT SPACE$(15);
990	H=S(2%,X%)+X5
1000	FOR I%=1% TO I1% STEP 2%
1005	PRINT USING "#####.### ",H;
1010	H=H+2*X5
1015	NEXT I%
1016 !

	NOTE THAT HERE CONTROL IS TRANSFERRED DOWN TO THE CHI SQUARE
	QUESTION; AND THEN BACK UP TO 1020 IF THE ANSWER IS YES.


1017	L5%=0% : X2=0.0
1018	GOTO 3000
1019 !


	T IS THE ROW TOTAL. IT IS COMPUTED HERE AND IS THE SUM OF THE
	FREQUENCY COUNTS FOR A GIVEN ROW IN THE CROSS TABLE. IT IS USED
	IN CALCULATING THE EXPECTED FREQUENCIES AND THE CHI SQUARE.



1020	FOR J%=0% TO I2%-1%
1030	T=0.0
1040	T=T+G(J%,I%) FOR I%=0% TO I1%-1%
1042	IF T<>0 GOTO 1050
1044	L7%=L7%+1%
1049 !


	T2 IS THE COLUMN TOTAL. IT IS COMPUTED HERE AND IS THE SUM OF
	THE FREQUENCY COUNTS FOR A GIVEN COLUMN IN THE CROSS TABLE. IT
	IS USED IN CALCULATING THE EXPECTED FREQUENCIES AND THE CHI SQ.



1050	FOR K%=0% TO I1%-1%
1054	T2=0.0
1055	T2=T2+G(I%,K%) FOR I%=0% TO I2%-1%
1057	IF T2<>0 GOTO 1060
1058	L6%=L6%+1%
1059 !


	THE E(J%,K%)'S, EXPECTED FREQUENCIES FOR EACH CELL, AND X2,
	THE CUMULATIVE CHI SQUARE, ARE COMPUTED HERE.



1060	E(J%,K%)=(T*T2)/N
1064	IF E(J%,K%)=0 GOTO 1080
1065	IF E(J%,K%)<5 THEN L5%=L5%+1%
1070	X2=X2+(G(J%,K%)-E(J%,K%))^2/E(J%,K%)
1080	NEXT K%
1090	NEXT J%
1094 !
	L6% AND L7% ARE COUNTS OF THE NUMBER OF ROWS AND COLUMNS IN THE
	CROSS TABLE WITH ALL ZERO ENTRIES. THESE COUNTS ARE COMBINED INTO
	L6% AND THE TOTAL IS PRINTED ALONG WITH THE CHI SQUARE.



1095	L6%=L6%/I2%+L7%
1099 !


	THIS CODE PRINTS THE TABLE OF EXPECTED FREQUENCIES. THE ALGORITHM
	IS COMPLICATED BY THE FACT THAT ONLY SIX COLUMNS CAN BE PRINTED
	ACROSS THE PAGE. HENCE THE TABLE MAY HAVE TO BE PRINTED IN TWO
	PARTS.



1100	D%=(I2%-1%)*(I1%-1%)
1105	INPUT "DO YOU WISH TO PRINT EXPECTED FREQUENCIES";P$ :PRINT
1107	IF LEFT(P$,1%)="Y" GOTO 1115
1109	IF LEFT(P$,1%)="N" OR P$="" GOTO 1500
1110	IF P$<>"YES" GOTO 1500
1111	PRINT "YOU MUST ANSWER EITHER YES OR NO"
1113	GOTO 1105
1115	PRINT "  EXPECTED FREQUENCY IN EACH CELL" : PRINT
1120	C1%=1% : C2%=I1%
1125	K%=C1%+5%: IF K%>I1% THEN K%=I1%-C1%+1% ELSE K%=6%
1130	PRINT "ROW/COL";
1135	PRINT TAB(1%+K1%*11%);C1%+K1%-1%; FOR K1%=1% TO K%: PRINT
1150	FOR J%=0% TO I2%-1%
1160	PRINT J%+1%;TAB(5%);
1170	FOR K%=C1%-1% TO C1%+4%
1175	IF K%>I1%-1% GOTO 1190
1180	PRINT USING " #####.####",E(J%,K%);
1190	NEXT K%
1200	PRINT
1210	NEXT J%
1220	PRINT
1230	IF C2%<=6% GOTO 1500
1240	C2%=C2%-6%
1250	C1%=C1%+6%
1260	GOTO 1125
1500	PRINT     !


	THIS SECTION PRINTS THE PREVIOUSLY COMPUTED CHI SQUARE AND
	RELATED INFORMATION.



1501	IF L5%<=0% GOTO 1505
1502	PRINT L5%;" CELLS HAVE AN EXPECTED FREQUENCY"
1503	PRINT "GREATER THAN 0 AND LESS THAN 5."
1504	PRINT "THE COMPUTED CHI-SQUARE THEREFORE MAY NOT BE MEANINGFUL"
1505	IF L6%=0% GOTO 1509
1506	PRINT "(";L6%;" ROWS AND/OR COLUMNS HAVE ALL 0 ENTRIES"
1507	PRINT "AND HAVE BEEN DISREGARDED IN THIS CALCULATION)"
1509	PRINT
1510	PRINT "CHI-SQUARE..........";
1515	PRINT USING " ###.#####",X2
1520	PRINT "DEGREES OF FREEDOM..";
1525	PRINT USING "     #####",D%
1530	PRINT
2000	PRINT 
2099 !


	HERE THE G ARRAY, WHICH HOLDS ALL THE FREQUENCY COUNTS FOR
	THE CROSS TABLE, IS INITIALIZED IN CASE THE USER WANTS TO
	COMPUTE MORE CROSS-TABULATION.



2100	FOR I%=0% TO 10%
2110	G(I%,J%)=0 FOR J%=0% TO 10%
2120	NEXT I%
2199 !


	THIS SECTION EITHER RETURNS CONTROL TO STATCM OR TRANSFERS
	BACK UP FOR MORE CROSS-TABULATION.



2200	INPUT "DO YOU WISH TO COMPUTE MORE CROSS-TABULATION ";W$
     : PRINT
2210	IF LEFT(W$,1%)="Y" GOTO 170
2222	IF LEFT(W$,1%)="N" OR W$="" GOTO 2400
2230	PRINT "YOU MUST ANSWER EITHER YES OR NO"
2240	GOTO 2200
2400	CLOSE 3,4
2500	R$=SYS(CHR$(8)+F$)
2510	CHAIN "STATCM"+A9$
2999 !


	N IS THE GRAND TOTAL COUNT. IT IS THE NUMBER OF DATA POINTS
	COUNTED IN THE CROSS TABLE. IT WILL BE EQUAL TO R%, SO THERE
	IS REALLY NO NEED TO SUM IT UP SEPARATELY HERE.



3000	PRINT : PRINT
3020	DIM E(10%,10%)
3045	N=0
3050	FOR I%=0% TO I1%-1%
3060	N=N+G(J%,I%) FOR J%=0% TO I2%-1%
3070	NEXT I%
3079 !


	IF THE USER ANSWERS NO TO THE CHI-SQUARE QUESTION, THEN HE IS
	NOT ASKED IF HE WANTS TO SEE THE EXPECTED FREQUENCIES.



3080	INPUT "DO YOU WISH TO COMPUTE CHI-SQUARE FOR THE ABOVE TABLE ";W$
     : PRINT
3085	IF LEFT(W$,1%)="Y" GOTO 1020
3090	IF LEFT(W$,1%)="N" OR W$="" GOTO 2200
3095	PRINT "YOU MUST ANSWER EITHER YES OR NO"
3097	GOTO 3080
3099 !


	THESE ERROR ROUTINES ARE FOR THE TERMINAL QUESTION AND ANSWER
	ROUTINES.



09000	IF ERR=28% THEN GOSUB 10000: RESUME 2200
09010	IF ERR>49% AND ERR<53% THEN 
	PRINT "YOU TYPED NON-NUMERIC CHARACTERS.  PLEASE TRY AGAIN."
	: IF ERL=235 THEN RESUME 245 ELSE IF ERL=265 THEN RESUME 280
	  ELSE RESUME 200
09020	ON ERROR GOTO 0
10000	V0$=SYS(CHR$(6%)+CHR$(-7%)): RETURN	! CTRL/C TRAP
32750	END
