/*****************************************************************************

       Copyright  1993, 1994 Digital Equipment Corporation,
                       Maynard, Massachusetts.

                        All Rights Reserved

Permission to use, copy, modify, and distribute this software and its 
documentation for any purpose and without fee is hereby granted, provided  
that the copyright notice and this permission notice appear in all copies  
of software and supporting documentation, and that the name of Digital not  
be used in advertising or publicity pertaining to distribution of the software 
without specific, written prior permission. Digital grants this permission 
provided that you prominently mark, as not part of the original, any 
modifications made to this software or documentation.

Digital Equipment Corporation disclaims all warranties and/or guarantees  
with regard to this software, including all implied warranties of fitness for 
a particular purpose and merchantability, and makes no representations 
regarding the use of, or the results of the use of, the software and 
documentation in terms of correctness, accuracy, reliability, currentness or
otherwise; and you rely on the software, documentation and results solely at 
your own risk. 

******************************************************************************/
/*---------------------------------------------------------------------
 *        [ Copyright (c) 1999 Alpha Processor Inc.] - Unpublished Work
 *          All rights reserved
 * 
 *    This file contains source code written by Alpha Processor, Inc.
 *    It may not be used without express written permission. The
 *    expression of the information contained herein is protected under
 *    federal copyright laws as an unpublished work and all copying
 *    without permission is prohibited and may be subject to criminal
 *    and civil penalties. Alpha Processor, Inc.  assumes no
 *    responsibility for errors, omissions, or damages caused by the use
 *    of these programs or from use of the information contained herein.
 *  
 *-------------------------------------------------------------------*/



#include "lib.h"
#include "northbridge.h"
#include "mcheck.h"
#include "osf.h"
#include "impure.h"		/* For logout area */
#include "impure_struct.h"	/* For impure area */
#include "cserve.h"
#include "beepcodes.h"

#include "uilib.h"
#include "platform.h"
#include "cpu.h"
#include "cmos_rtc.h"

#ifdef CONFIG_IRONGATE		/* chipset fixup, see below */
#include "northbridge/irongate.h"
#endif				/* CONFIG_IRONGATE */

unsigned char MachineCheckExpected;
unsigned char ExpectedMachineCheckTaken;

static void MchkHandler( unsigned int arg0, ui vector, LogoutFrame_t *logout );


/* links into mfrg diagnostics, put here until a unified model is built */
volatile uint8 IRQdivert = FALSE;
volatile uint8 irq_include_timer = FALSE;
void (*IRQTestHandler) ( int IRQ );


static const String intr_title = "INTERRUPT";

#define pause_on_error
#define MAX_CRD_REPORTS 8

unsigned sys_crd  = 0;
unsigned sys_mchk = 0;
unsigned proc_crd = 0;
unsigned proc_mchk= 0;

void UnexpectedInterruptHandler(unsigned int arg0, ui vector,
				LogoutFrame_t *logout )
{
    int IRQTaken;
    int rval;
    unsigned mask0, mask1;		/* IRQ debug */


    switch (arg0) {

    case INT_K_CLK:
	/* acknowldedge is done in the PALcode now for conformance with the 
	 * Linux kernel's needs */

	jiffies++;		/* no ack required here now, just count... */

	if ( IRQdivert && irq_include_timer )
		IRQTestHandler( 8 );	/* IRQ 8 = RTC */
	break;


    case INT_K_IP:	/* Interprocessor interrupt */
	smp_ipi_handler( );
	break;


    case INT_K_MCHK:	/* Machine check, pass on to dedicated handler */
	MchkHandler( arg0, vector, logout );
	break;


    case INT_K_DEV:		/* I/O device interrupt */

	IRQTaken = inIack();        /* set up so that Vector == IRQ */

	if (IRQdivert) {
	    IRQTestHandler( IRQTaken );
	} else {
	    mobo_logf( LOG_CRIT "INTERRUPT: Unexpected I/O interrupt on IRQ %d [jiffies=%d]\n",
                IRQTaken, jiffies );
	    mask0 = inportb( 0x21 );
	    mask1 = inportb( 0xA1 );
	    rval = mobo_alertf(intr_title,
		"Unexpected I/O interrupt on IRQ %d [jiffies=%d], "
		"masks=0x%02X,0x%02X",
		IRQTaken, jiffies, mask0, mask1 );

	    /* attempt to reset the interrupt in case it goes away */
	    if ( IRQTaken >= 8 )	outportb( 0xA0, 0x20 );

	    /* done for both PICs: I think the cascade needs clearing */
	    outportb( 0x20, 0x20 );

	}
	break;


    case INT_K_PERF:		/* Performance counter */
	mobo_logf( LOG_WARN "INTERRUPT: Unexpected performance counter interrupt\n");
	mobo_alertf(intr_title,
		    "Performance counter interrupt, vector=%x", vector);
	break;


    case INT_K_SL:		/* SROM UART serial line */
	mobo_logf( LOG_WARN "INTERRUPT: SROM UART interrupt on CPU %d\n",
		smp_phys_id() );
	mobo_alertf( intr_title, "SROM UART on CPU %d got key '%c' (0x%02X)",
		smp_phys_id(), vector, vector );
	break;


    default:
	mobo_logf( LOG_WARN "INTERRUPT: Unexpected unknown interrupt\n"
		   LOG_WARN "INTERRUPT: Parameters - a0=%x a1=%x a2=%x\n",
		   arg0, vector, logout );
	mobo_alertf(intr_title,
		  "Unknown interrupt type; Parameters a0=%x, a1=%x, a2=%x\n",
		    arg0, vector, logout);
    }
}


static void MchkHandler( unsigned int arg0, ui vector, LogoutFrame_t * logout )
{
    int be_silent = 0;
    char mchk_title[80];	/* title for mchk */
    char mchk_interp[128];	/* interpretation of mchk */
    char statbuf[256];		/* for reading system status on mchecks */

#ifdef CONFIG_IRONGATE		/* chipset fixup, see below... */
    ECC_t ECC;
    reg_handle r;
#endif				/* CONFIG_IRONGATE */

    if (MachineCheckExpected) {

	ExpectedMachineCheckTaken = TRUE;
	MachineCheckExpected = FALSE;

	/* Clear the machine check before returning */
	IOPCIClearNODEV();
	wrmces(MCES_M_SCE | MCES_M_PCE | MCES_M_MIP);
	mobo_logf( LOG_INFO "MACHINE CHECK: Expected Machine Check taken.");
	return;
    }


    /* if unexpected, we're in need of some analysis */

    mobo_logf( LOG_CRIT "MACHINE CHECK: unexpected machine check interrupt!\n");
    BeepCode( beep_k_mchk );

    osf_mchkinterp( vector, logout, mchk_title );	/* may be overridden */

    switch( vector ) {

    case SCB_Q_PROCERR:

	++proc_crd;

	if ( proc_crd > MAX_CRD_REPORTS ) {
	    be_silent=1;
	    break;
	}

	if (proc_crd == MAX_CRD_REPORTS) {
	    sprintf_dbm( mchk_title, "Further Correctable Errors" );
	    sprintf_dbm( mchk_interp, "More follow and are not reported");
	    mobo_logf( LOG_INFO "Further Correctable Errors are handled silently\n");
	    break;
	}

	/* falls through if the system isn't swamped with CRD mchecks */

    case SCB_Q_PROCMCHK:
	
	++proc_mchk;

	plat_sysstat( statbuf );
	mobo_logf( LOG_CRIT "Jiffy count is %d\n"
		   LOG_CRIT "%s\n", jiffies, statbuf );
	cpu_mchkinterp( mchk_interp, vector, logout);
	DumpLogout( (LogoutFrame_t *) logout );               /* log stream */
	break;


    case SCB_Q_SYSERR:			/* correctable system error */
    case SCB_Q_SYSMCHK:			/* system hard error */
    case SCB_Q_SYS_EVENT:		/* A power management issue */

	plat_mchkinterp( mchk_interp, vector, logout );

#ifdef CONFIG_IRONGATE

/* Chipset fixup: Irongate raises NMI on NXM accesses, we don't want it
 * to do this.  Also, there is some speculation that it doesn't always do the
 * right thing by flagging spurious ECC errors.  Irongate doesn't correct ECC
 * but ECC is enabled in Irongate so it generates ECC checksums for data
 * passing from the PCI bus.  We try our best to ignore any NMIs from Irongate
 *
 * The first job is to filter out NMIs raised by Irongate.  We do this by
 * looking at the ECC status register.
 */

	/* mobo_logf( LOG_DBG "NMI: Irongate playing up\n" ); */

	r.p16 = &ECC.i;
	ig_rdcsr( IG_ECC, r );

	if( ECC.r.chipsel == 0 && ECC.r.status != 0 )	/* IG's acting up */
	{
	    plat_nmiclr();
	    wrmces(MCES_M_MIP | MCES_M_SCE | MCES_M_PCE);
	    return;				/* quit now, don't report */
	}

#endif	/* CONFIG_IRONGATE */

	if ( vector == SCB_Q_SYSMCHK )	++sys_mchk;
	if ( vector == SCB_Q_SYSERR  )	++sys_crd;

	plat_nmiclr();			/* deassert the NMI */
	break;


    default:
	sprintf_dbm( mchk_title, "Unknown Machine Check type" );
	sprintf_dbm( mchk_interp, "Vector 0x%X not recognised", vector );
	DumpLogout( (LogoutFrame_t *) logout );               /* log stream */
	break;
    }


    /* output of the analysis, with consideration of volume reporting */

    if ( !be_silent )
    {
	mobo_logf( LOG_CRIT "%s\n" LOG_CRIT "%s\n", mchk_title, mchk_interp );

	/* swap carriage returns, newlines for printing on interactive screen */
	ntor( mchk_interp );
	mobo_alertf( mchk_title, mchk_interp );
    }

    wrmces(MCES_M_MIP | MCES_M_SCE | MCES_M_PCE);
}



/*----------------------------------------------------------------------*/
/* This code was orginally in file dbm/dp264mck.c, but has been put here
 * with the other handler code */

/* STIG - these are not configured correctly yet to be meaningful */
/* Originals were in memtest.c */

int byte_number_error_count[8];
int bank_number_error_count[4];
int dimm_number_error_count[16];


/* STIG - int types added here - was untyped before */
int syndrome_75_to_byte[8] =
{
    2, 1, 4, 7, 6, 5, 0, 3};

/* Decoder for memory errors */
/* Tables:
   Syndrome Hi or Lo determines low or high 127 bits
   - sets a Low or High Flag
   Decoded syndrome to bit in error, bit 3 determines even or odd byte
   - sets the Odd or Even Flag

   Decoder for DIMM Slot Number

 */






long array_address_register[4];
void get_array_address_register(void)
{
    array_address_register[0] = *(long *) (0x801a0000100ul);
    array_address_register[1] = *(long *) (0x801a0000140ul);
    array_address_register[2] = *(long *) (0x801a0000180ul);
    array_address_register[3] = *(long *) (0x801a00001c0ul);
}

void print_dimm_number(long C_ADDR, int byte_number)
{
    int dimm, i, j, bank = -1, index[4] =
    {0, 1, 2, 3};
    get_array_address_register();
    for (i = 0; i < 4; i++)
	array_address_register[i] &= ~(0xfffffful);
    for (i = 1; i < 4; i++)
	if (array_address_register[index[i]] < array_address_register[index[0]]) {
	    j = index[i];
	    index[i] = index[0];
	    index[0] = j;
	}
    for (i = 2; i < 4; i++)
	if (array_address_register[index[i]] < array_address_register[index[1]]) {
	    j = index[i];
	    index[i] = index[1];
	    index[1] = j;
	}
    if (array_address_register[index[3]] < array_address_register[index[2]]) {
	j = index[3];
	index[3] = index[2];
	index[2] = j;
    }
    dimm = ((byte_number & 1) << 1) + ((C_ADDR >> 4) & 1);
    for (i = 0; i < 4; i++) {
	if ((C_ADDR >= array_address_register[index[i]])
	    & (array_address_register[index[i + 1]] > C_ADDR))
	    bank = index[i];
    }
    printf_dbm("C_ADDR %x\nBank %d Dimm %d byte number %d in error\n",
	       C_ADDR, bank, dimm, byte_number);
    byte_number_error_count[byte_number]++;
    bank_number_error_count[bank]++;
    dimm_number_error_count[(bank << 2) + dimm]++;
}


int get_byte_number_from_ecc(int low)
{
    int i, j, byte_number = -1;

    j = 1;
    for (i = 0; i < 8; i++) {
	if (low == j)
	    byte_number = i;
	j += j;
    }
    if (byte_number < 0) {
	byte_number = syndrome_75_to_byte[low >> 5];
    }
    return byte_number;
}


void DumpLogout(LogoutFrame_t * Frame)
{
    /* put out the address of the frame before anything else */
    mobo_logf( LOG_CRIT "Logout Frame is at 0x%016lx:\n", Frame );
    mobo_logf( LOG_CRIT "MCHK_Code       %d\n"
	       LOG_CRIT "MCHK_Frame_Rev  %d\n"
	       LOG_CRIT "I_STAT          %016lx\n"
	       LOG_CRIT "DC_STAT         %016lx\n"
	       LOG_CRIT "C_ADDR          %016lx\n"
	       LOG_CRIT "DC1_SYNDROME    %016lx\n"
	       LOG_CRIT "DC0_SYNDROME    %016lx\n"
	       LOG_CRIT "C_STAT          %016lx\n"
	       LOG_CRIT "C_STS           %016lx\n"
	       LOG_CRIT "RESERVED0       %016lx\n"
	       LOG_CRIT "EXC_ADDR        %016lx\n"
	       LOG_CRIT "IER_CM          %016lx\n"
	       LOG_CRIT "ISUM            %016lx\n"
	       LOG_CRIT "MM_STAT         %016lx\n"
	       LOG_CRIT "PAL_BASE        %016lx\n"
	       LOG_CRIT "I_CTL           %016lx\n"
	       LOG_CRIT "PCTX            %016lx\n",
	       Frame->MCHK_Code, Frame->MCHK_Frame_Rev,
	       Frame->I_STAT, Frame->DC_STAT, Frame->C_ADDR,
	       Frame->DC1_SYNDROME, Frame->DC0_SYNDROME,
	       Frame->C_STAT, Frame->C_STS, Frame->RESERVED0,
	       Frame->EXC_ADDR, Frame->IER_CM, Frame->ISUM,
	       Frame->MM_STAT, Frame->PAL_BASE, Frame->I_CTL,
	       Frame->PCTX);
}

typedef struct {
	/* State saved by unexpectedException */
	uint64	r0;
	uint64	r1;
	uint64	r2;
	uint64	r3;
	uint64	r4;
	uint64	r5;
	uint64	r6;
	uint64	r7;
	uint64	r8;
	uint64	r9;
	uint64	r10;
	uint64	r11;
	uint64	r12;
	uint64	r13;
	uint64	r14;
	uint64	r15;
	uint64	r19;
	uint64	r20;
	uint64	r21;
	uint64	r22;
	uint64	r23;
	uint64	r24;
	uint64	r25;
	uint64	r26;
	uint64	r27;
	uint64	r28;
	uint64	osf_kgp;
	uint64	r30;

	/* Fault code saved by PALcode's handler entry point */
	uint64	fcode;
	uint64	osf_a0;
	uint64	osf_a1;
	uint64	osf_a2;

	/* Stuff left by OSF PALcode */
	uint64	ps;
	uint64	pc;
	uint64	r29;			/* GP */
	uint64	r16;			/* A0 */
	uint64	r17;			/* A1 */
	uint64	r18;			/* A2 */
	uint64	framesize;
} OSF_exception_state;


void UnexpectedFault( OSF_exception_state *F )
{
    static String fault[] = {
        "Unrecognised system fault!",
        "Arithmetic exception",
        "Memory management fault",
        "Invalid instruction fault",
        "Unaligned trap",
        "System trap"
    };
#define NFAULTS ( sizeof( fault ) / sizeof( String ) )

    uint32 *failingpc;
    int phys_id = smp_phys_id();

    BeepCode( beep_k_diags_err );

    if ( F->fcode > NFAULTS )      F->fcode = 0;

    mobo_logf(  LOG_CRIT "Unexpected CPU exception (%s) on CPU %d:\n"
		LOG_CRIT "Exception addr=0x%lx, PC=0x%lx\n"
		LOG_CRIT "(r0)  v0:  0x%016lx\n"		/* r0 */
		LOG_CRIT "(r1)  t0:  0x%016lx\n"		/* r1 */
		LOG_CRIT "(r2)  t1:  0x%016lx\n"		/* r2 */
		LOG_CRIT "(r3)  t2:  0x%016lx\n"		/* r3 */
		LOG_CRIT "(r4)  t3:  0x%016lx\n"		/* r4 */
		LOG_CRIT "(r5)  t4:  0x%016lx\n"		/* r5 */
		LOG_CRIT "(r6)  t5:  0x%016lx\n"		/* r6 */
		LOG_CRIT "(r7)  t6:  0x%016lx\n"		/* r7 */
		LOG_CRIT "(r8)  t7:  0x%016lx\n"		/* r8 */
		LOG_CRIT "(r9)  s0:  0x%016lx\n"		/* r9 */
		LOG_CRIT "(r10) s1:  0x%016lx\n"		/* r10 */
		LOG_CRIT "(r11) s2:  0x%016lx\n"		/* r11 */
		LOG_CRIT "(r12) s3:  0x%016lx\n"		/* r12 */
		LOG_CRIT "(r13) s4:  0x%016lx\n"		/* r13 */
		LOG_CRIT "(r14) s5:  0x%016lx\n"		/* r14 */
		LOG_CRIT "(r15) fp:  0x%016lx\n"		/* r15 */
		LOG_CRIT "(r16) a0:  0x%016lx\n"		/* r16 */
		LOG_CRIT "(r17) a1:  0x%016lx\n"		/* r17 */
		LOG_CRIT "(r18) a2:  0x%016lx\n"		/* r18 */
		LOG_CRIT "(r19) a3:  0x%016lx\n"		/* r19 */
		LOG_CRIT "(r20) a4:  0x%016lx\n"		/* r20 */
		LOG_CRIT "(r21) a5:  0x%016lx\n"		/* r21 */
		LOG_CRIT "(r22) t8:  0x%016lx\n"		/* r22 */
		LOG_CRIT "(r23) t9:  0x%016lx\n"		/* r23 */
		LOG_CRIT "(r24) t10: 0x%016lx\n"		/* r24 */
		LOG_CRIT "(r25) t11: 0x%016lx\n"		/* r25 */
		LOG_CRIT "(r26) ra:  0x%016lx\n"		/* r26 */
		LOG_CRIT "(r27) pv:  0x%016lx\n"		/* r27 */
		LOG_CRIT "(r28) AT:  0x%016lx\n"		/* r28 */
		LOG_CRIT "(r29) gp:  0x%016lx\n"		/* r29 */
		LOG_CRIT "(r30) sp:  0x%016lx\n",		/* r30 */
                fault[F->fcode], phys_id,
		F->osf_a0, F->pc,
		F->r0, F->r1, F->r2, F->r3, F->r4,
		F->r5, F->r6, F->r7, F->r8, F->r9,
		F->r10, F->r11, F->r12, F->r13, F->r14,
		F->r15, F->r16, F->r17, F->r18, F->r19,
		F->r20, F->r21, F->r22, F->r23, F->r24,
		F->r25, F->r26, F->r27, F->r28, F->r29,
		F->r30 );

    failingpc = (uint32 *)F->pc;

    if ( (failingpc >= (uint32 *)&_uncompressed_start) &&
	 (failingpc <= (uint32 *)&_uncompressed_text_end) )
    {
	mobo_logf( LOG_CRIT "Code at that address is:\n"
		   LOG_CRIT "\t   0x%08x\n"
		   LOG_CRIT "\t   0x%08x\n"
		   LOG_CRIT "\t   0x%08x\n"
		   LOG_CRIT "\t=> 0x%08x\n",
		   failingpc[-3], failingpc[-2], failingpc[-1], failingpc[0] );
    } else {
	mobo_logf( LOG_CRIT "PC doesn't point to valid diags code!\n" );
    }

    mobo_alertf( "Unexpected CPU exception!",
                 "Fault code was: %s at 0x%lx\r"
                 "System will reset to clear this error...",
		 fault[F->fcode], F->pc ); 

    return;     /* return to execute halt instruction */
}


String halt_code_string(int halt_code)
{
  String halt_string;
  switch(halt_code) {
  case HLT_K_RESET:
    halt_string = "Reset...";
    break;
  case HLT_K_HW_HALT:
    halt_string = "Hardware halt";
    break;
  case HLT_K_KSP_INVAL:
    halt_string = "Invalid Kernel Stack Pointer";
    break;
  case HLT_K_SCBB_INVAL:
    halt_string = "Invalid System Control Block Base";
    break;
  case HLT_K_PTBR_INVAL:
    halt_string = "Invalid Page Table Base Register";
    break;
  case HLT_K_SW_HALT:
    halt_string = "Software Halt";
    break;
  case HLT_K_DBL_MCHK:
    halt_string = "Double Machine Check";
    break;
  case HLT_K_MCHK_FROM_PAL:
    halt_string = "Machine Check from PALcode";
    break;
  default:
    halt_string = "Unknown (PALcode problem?)";
    mobo_logf( LOG_WARN "halt code: don't know what 0x%X is\n", halt_code );
  }
  return(halt_string);
}



/* Return the PALcode Machine Check interpretation based on MCHK code */
void osf_mchkinterp( ui vector, LogoutFrame_t *Frame, String result )
{
    char scb[32], osf[64];

    switch( vector ) {
	case SCB_Q_PROCERR:	sprintf_dbm( scb, "Processor correctable" );
				break;
	case SCB_Q_PROCMCHK:	sprintf_dbm( scb, "Processor uncorrectable" );
				break;
	case SCB_Q_SYSERR:	sprintf_dbm( scb, "System correctable" );
				break;
	case SCB_Q_SYSMCHK:	sprintf_dbm( scb, "System uncorrectable" );
				break;
	case SCB_Q_SYS_EVENT:	sprintf_dbm( scb, "System Power Event" );
				break;
	default:		sprintf_dbm( scb, "Unknown machine check");
				break;
    }

    switch( Frame->MCHK_Code ) {
	case MCHK_K_TPERR:	sprintf_dbm( osf, "TPERR machine check" );	
				break;
	case MCHK_K_TCPERR:	sprintf_dbm( osf, "TCPERR machine check" );
				break;
	case MCHK_K_HERR:	sprintf_dbm( osf, "Hard error" );
				break;
	case MCHK_K_ECC_C:	sprintf_dbm( osf, "Correctable ECC error" );
				break;
	case MCHK_K_ECC_NC:	sprintf_dbm( osf, "Non-correctable ECC error" );
				break;
	case MCHK_K_UNKNOWN:	sprintf_dbm( osf, "Unknown code" );
				break;
	case MCHK_K_CACKSOFT:	sprintf_dbm( osf, "CACKSOFT machine check" );
				break;
	case MCHK_K_BUGCHECK:	sprintf_dbm( osf, "Bugcheck in PALmode" );
				break;
	case MCHK_K_OS_BUGCHECK:sprintf_dbm( osf, "Bugcheck from OS" );
				break;
	case MCHK_K_DCPERR:	sprintf_dbm( osf, "Dcache parity error" );
				break;
	case MCHK_K_ICPERR:	sprintf_dbm( osf, "Icache parity error" );
				break;
	case MCHK_K_RETRY_IRD:	sprintf_dbm( osf, "Mysterious RETRY_IRD" );
				break;
	case MCHK_K_PROC_HERR:	sprintf_dbm( osf, "CPU Detected hard error" );
				break;
	case MCHK_K_DC_TAG_PERR:sprintf_dbm( osf, "Dcache tag parity error" );
				break;
	case MCHK_K_ISTREAM_CMOV:
				sprintf_dbm( osf, "Istream CMOV error" );
				break;
	case MCHK_K_ISTREAM_CMOV_FAULT:
				sprintf_dbm( osf, "Istream CMOV fault error" );
				break;

	case MCHK_K_READ_NXM:	sprintf_dbm( osf, "Non-existent memory error" );
				break;
	case MCHK_K_SYS_HERR:	sprintf_dbm( osf, "Hard error detected" );
				break;
	case MCHK_K_SYS_EVENT:	sprintf_dbm( osf, "System event error code" );
				break;

	default:		sprintf_dbm( osf, "Unknown error code 0x%X",
					Frame->MCHK_Code );
				break;
    }

    sprintf_dbm( result, "%s: %s", scb, osf );
}

