#include <stdio.h>
#include <stdlib.h>
#include <stdarg.h>
#include <string.h>
#include <stdint.h>
#include <time.h>
#include <sys/mman.h>

#define nil NULL
typedef struct Node Node;
typedef struct Ins Ins;

struct Node {
	int op;
	float val;
	Node *n1, *n2;
	int r;
};

enum {
	CONST = 256,
	LE,
	GE,
	MOV,
	UMINUS,
	RAND,
	FLOAT,
};

struct Ins {
	short op;
	char cons;
	char r1, r2;
};

enum {
	INSALL = 16,
	CONSALL = 16,
	CBUFALL = 128,
};

char *str;
int peeked;
float cval;
uint8_t regs;
Ins *ins;
int nins;
float cmem[64];
int ncmem;
int cnt, usex, usey;
int nbytes;
char *cbuf;
int ncbuf;

void
error(const char *fmt, ...)
{
	va_list va;

	va_start(va, fmt);
	vfprintf(stderr, fmt, va);
	fprintf(stderr, "\n");
	exit(1);
}

void *
emalloc(int n)
{
	void *v;

	v = malloc(n);
	if(v == nil)
		error("malloc");
	memset(v, 0, n);
	return v;
}

void *
xmem(int n)
{
	return mmap(nil, n, PROT_EXEC|PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
}

Node *
node(int op, ...)
{
	Node *n;
	va_list va;

	va_start(va, op);
	n = (Node *) emalloc(sizeof(*n));
	n->op = op;
	if(n->op == CONST)
		n->val = va_arg(va, double);
	else{
		n->n1 = va_arg(va, Node *);
		n->n2 = va_arg(va, Node *);
	}
	return n;
}

int
cons(void)
{
	int c;

	if(peeked != 0){
		c = peeked;
		peeked = 0;
		return c;
	}
	while(*str == ' ' || *str == '\t' || *str == '\r' || *str == '\n')
		str++;
	if(*str >= '0' && *str <= '9'){
		cval = strtof(str, &str);
		return CONST;
	}
	if(*str == '<' && str[1] == '='){
		str += 2;
		return LE;
	}
	if(*str == '>' && str[1] == '='){
		str += 2;
		return GE;
	}
	c = *str;
	if(c)
		str++;
	return c;
}

int
peek(void)
{
	if(peeked != 0)
		return peeked;
	return peeked = cons();
}

int
next(int c)
{
	if(peek() == c){
		cons();
		return 1;
	}
	return 0;
}

void
expect(char c)
{
	if(!next(c))
		error("syntax error, expected %c", c);
}

Node *
cvar(void)
{
	extern Node *expr(void);
	Node *n;
	int c;

	switch(c = cons()){
	case CONST:
		return node(CONST, cval);
	case '(':
		n = expr();
		expect(')');
		return n;
	case 'x': case 'y':
		return node(c, nil, nil);
	}
	error("syntax error, unexpected %d(%c)", c, c);
	return nil;
}

Node *
factor(void)
{
	int c;
	Node *n;

	n = cvar();
	while((c = peek()) == '*' || peek() == '/'){
		cons();
		n = node(c, n, cvar());
	}
	return n;
}

Node *
sum(void)
{
	int inv, c;
	Node *n, *m;

	for(inv = 0;;){
		if(next('-'))
			inv = !inv;
		else if(!next('+'))
			break;
	}
	n = factor();
	if(inv)
		n = node(UMINUS, n, nil);
	while((c = peek()) == '+' || peek() == '-'){
		cons();
		for(inv = 0;;){
			if(next('-'))
				inv = !inv;
			else if(!next('+'))
				break;
		}
		m = factor();
		if(inv)
			m = node(UMINUS, m, nil);
		n = node(c, n, m);
	}
	return n;
}

Node *
expr(void)
{
	Node *n;
	int c;

	n = sum();
	while((c = peek()) == '<' || c == LE || c == '>' || c == GE){
		cons();
		n = node(c, n, sum());
	}
	return n;
}

Node *
fold(Node *n)
{
	float r, a, b;

	switch(n->op){
	case '+': case '-': case '*': case '/':
		n->n1 = fold(n->n1);
		n->n2 = fold(n->n2);
		if(n->n1->op == CONST && n->n2->op == CONST){
			a = n->n1->val;
			b = n->n2->val;
			switch(n->op){
			case '+': r = a + b; break;
			case '-': r = a - b; break;
			case '*': r = a * b; break;
			case '/': r = a / b; break;
			default: error("unknown op %d", n->op); r = 0;
			}
			return node(CONST, r);
		}
		return n;
	case UMINUS:
		n->n1 = fold(n->n1);
		if(n->n1->op == CONST)
			return node(CONST, -n->n1->val);
		return node('-', node(CONST, 0), n->n1);
	case '>': case '<': case LE: case GE:
		n->n1 = fold(n->n1);
		n->n2 = fold(n->n2);
		break;
	case 'x':
		usex++;
		break;
	case 'y':
		usey++;
		break;
	case CONST:
		break;
	default:
		error("fold: unknown type %d(%c)", n->op, n->op);
	}
	return n;
}

int
getreg(void)
{
	uint8_t c;
	static char cs[] = {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3};

	c = regs ^ (regs | regs+1);
	regs |= c;
	if(c == 0) error("out of registers");
	if(c >= 0x10) return cs[c >> 4] + 4;
	return cs[c];
}

void
putreg(int n)
{
	regs |= 1<<n;
}

void
putins(int op, int r1, int cons, ...)
{
	va_list va;
	float v;
	int i;

	if((nins & INSALL - 1) == 0)
		ins = (Ins *) realloc(ins, sizeof(Ins) * (nins + INSALL));
	ins[nins].op = op;
	ins[nins].r1 = r1;
	ins[nins].cons = cons;
	va_start(va, cons);
	switch(cons){
	case 1:
		v = va_arg(va, double);
		for(i = 0; i < ncmem; i += 4){
			if(cmem[i] == v){
				ins[nins].r2 = i;
				break;
			}
		}
		if(i == ncmem){
			if(ncmem == 64)
				error("out of constants");
			ins[nins].r2 = ncmem;
			cmem[ncmem++] = v;
			cmem[ncmem++] = v;
			cmem[ncmem++] = v;
			cmem[ncmem++] = v;
		}
		break;
	case 0:
		ins[nins].r2 = va_arg(va, int);
	}
	va_end(va);
	nins++;
}

void
compile(Node *n)
{
	switch(n->op){
	case '+': case '-': case '*': case '/': case '<': case '>': case LE: case GE:
		compile(n->n1);
		if(n->n2->op != CONST){
			compile(n->n2);
			putreg(n->n2->r);
			putins(n->op, n->n1->r, 0, n->n2->r);
		}else
			putins(n->op, n->n1->r, 1, n->n2->val);
		n->r = n->n1->r;
		break;
	case CONST:
		putins(MOV, n->r = getreg(), 1, n->val);
		break;
	case 'x':
		putins(MOV, n->r = getreg(), 0, usex);
		putins(FLOAT, n->r, 0, 0);
		break;
	case 'y':
		putins(MOV, n->r = getreg(), 0, usey);
		putins(FLOAT, n->r, 0, 0);
		break;
	default:
		error("compile: unknown op %d(%c)", n->op, n->op);
	}
}

void
bytes(int c, ...)
{
	va_list va;

	va_start(va, c);
	while(c >= 0){
		cbuf[ncbuf++] = c;
		nbytes++;
		c = va_arg(va, int);
	}
}

void
assem(int r)
{
	Ins *i;
	int o;

	bytes(0xff, 0xf5, -1);
	bytes(0x89, 0xe5, -1); /* MOV EBP, ESP */
	bytes(0x60, -1); /* PUSHA */
	bytes(0x8b, 0x75, 0x08, -1); /* MOV ESI, [EBP+8] */
	bytes(0x8b, 0x7d, 0x0c, -1); /* MOV EDI, [EBP+12] */
	bytes(0x8b, 0x4d, 0x10, -1); /* MOV ECX, [EBP+16] */
	bytes(0x33, 0xc0, -1); /* XOR EAX, EAX */
	bytes(0x0f, 0x28, 0x47 | cnt << 3, 0x0, -1);
	if(usex >= 0)
		bytes(0x0f, 0x28, 0x47 | usex << 3, 0x10, -1);
	if(usey >= 0)
		bytes(0x0f, 0x28, 0x47 | usey << 3, 0x20, -1);
	nbytes = 0;
	for(i = ins; i < ins + nins; i++)
		switch(i->op){
		case RAND:
			bytes(0x66, 0x0f, 0xd5, 0x46 | i->r1 << 3, 0x00, -1);
			bytes(0x66, 0x0f, 0xfe, 0x46 | i->r1 << 3, 0x10, -1);
			break;
		case FLOAT:
			bytes(0x66, 0x0f, 0xdb, 0x46 | i->r1 << 3, 0x20, -1);
			bytes(0x66, 0x0f, 0xeb, 0x46 | i->r1 << 3, 0x30, -1);
			bytes(0x0f, 0x5c, 0x46 | i->r1 << 3, 0x40, -1);
			break;
		case MOV: o = 0x28; goto bin;
		case '+': o = 0x58; goto bin;
		case '-': o = 0x5c; goto bin;
		case '*': o = 0x59; goto bin;
		case '/': o = 0x5e; goto bin;
		bin:
			if(i->cons)
				bytes(0x0f, o, 0x46 | i->r1 << 3, i->r2 << 2, -1);
			else
				bytes(0x0f, o, 0xc0 | i->r1 << 3 | i->r2, -1);
			break;
		case '<': o = 1; goto cmp;
		case LE: o = 2; goto cmp;
		case '>': o = 6; goto cmp;
		case GE: o = 5; goto cmp;
		cmp:
			if(i->cons)
				bytes(0x0f, 0xc2, 0x46 | i->r1 << 3, i->r2 << 2, o, -1);
			else
				bytes(0x0f, 0xc2, 0xc0 | i->r1 << 3 | i->r2, o, -1);
			break;
		default:
			error("assem: unknown op %d(%c)", i->op, i->op);
		}
	bytes(0x66, 0x0f, 0xfe, 0xc0 | cnt << 3 | r, -1); /* PADDD XMMcnt, XMMn */
	bytes(0x0f, 0x29, 0x47 | cnt << 3, 0x0, -1);
	bytes(0xe2, 0xfe - nbytes, -1); /* LOOP */
	bytes(0x61, -1); /* POPA */
	bytes(0x5d, 0xc3, -1); /* POP BP; RET */
}

int
main(int argc, char **argv)
{
	Node *n;
	void (*f)(float*, int*, int);
	static int seed[12];
	int i, num;

	if(argc < 3 || (num = atoi(argv[2])) < 0)
		error("usage: %s expr num", argv[0]);
	num = (num + 3) / 4;
	str = argv[1];
	n = fold(expr());
	*(int*)&cmem[0] = 1664525;
	*(int*)&cmem[1] = 1664525;
	*(int*)&cmem[2] = 1664525;
	*(int*)&cmem[3] = 1664525;
	*(int*)&cmem[4] = 1013904223;
	*(int*)&cmem[5] = 1013904223;
	*(int*)&cmem[6] = 1013904223;
	*(int*)&cmem[7] = 1013904223;
	*(int*)&cmem[8] = 0x007fffff;
	*(int*)&cmem[9] = 0x007fffff;
	*(int*)&cmem[10] = 0x007fffff;
	*(int*)&cmem[11] = 0x007fffff;
	*(int*)&cmem[12] = 0x3f800000;
	*(int*)&cmem[13] = 0x3f800000;
	*(int*)&cmem[14] = 0x3f800000;
	*(int*)&cmem[15] = 0x3f800000;
	cmem[16] = 1.0;
	cmem[17] = 1.0;
	cmem[18] = 1.0;
	cmem[19] = 1.0;
	ncmem = 20;
	cnt = getreg();
	if(usex){
		usex = getreg();
		putins(RAND, usex, 2);
	}else
		usex = -1;
	if(usey){
		usey = getreg();
		putins(RAND, usey, 2);
	}else
		usey = -1;
	compile(n);
	cbuf = (char *) xmem(4096); 
	assem(n->r);
	f = (void(*)(float*,int*,int)) cbuf;
	srand(time(0));
	for(i = 0; i < 4; i++)
		seed[i] = 0;
	for(; i < 12; i++)
		seed[i] = rand();
	f(cmem, seed, num);
	printf("%g\n", (double)(-seed[0]-seed[1]-seed[2]-seed[3]) / (4 * num));
	return 0;
}
