diff -urN x86/CHANGES-VMW x86-modified/CHANGES-VMW
--- x86/CHANGES-VMW	1969-12-31 19:00:00.000000000 -0500
+++ x86-modified/CHANGES-VMW	2006-03-27 13:28:17.000000000 -0500
@@ -0,0 +1,9 @@
++ Fix the x86_read_effective_addr to read/return in a way that 
+  valgrind can generate the trace.  The old way did some wacky thing
+  where it was always one line ahead of where it needed to be and
+  stored a lot of state in static vars.  Fixed this a bit, but still
+  not ideal.  *FIXME* there's a possible memleak there.
+  
++ TODO: Move the x86_read_inst and x86_read_effective_addr parts
+  to a separate file, and have everything that uses them use
+  this separate file.
diff -urN x86/config/p3.config x86-modified/config/p3.config
--- x86/config/p3.config	1969-12-31 19:00:00.000000000 -0500
+++ x86-modified/config/p3.config	2006-06-01 12:02:52.000000000 -0400
@@ -0,0 +1,103 @@
+#
+# default sim-outorder configuration
+#
+
+# random number generator seed (0 for timer seed)
+-seed                             1 
+
+# instruction fetch queue size (in insts)
+-fetch:ifqsize                    4 
+
+# extra branch mis-prediction latency
+-fetch:mplat                      3 
+
+# branch predictor type {nottaken|taken|perfect|bimod|2lev}
+-bpred                        bimod 
+
+# bimodal predictor BTB size
+-bpred:bimod                   2048 
+
+# 2-level predictor config (<l1size> <l2size> <hist_size>)
+-bpred:2lev            1 512 4 
+
+# instruction decode B/W (insts/cycle)
+-decode:width                     4
+
+# instruction issue B/W (insts/cycle)
+-issue:width                      4
+
+# run pipeline with in-order issue
+-issue:inorder                false 
+
+# issue instructions down wrong execution paths
+-issue:wrongpath               true 
+
+# register update unit (RUU) size
+-ruu:size                        16 
+
+# load/store queue (LSQ) size
+-lsq:size                         8 
+
+# l1 data cache config, i.e., {<config>|none}
+-cache:dl1             dl1:16:32:4:l 
+
+# l1 data cache hit latency (in cycles)
+-cache:dl1lat                     1 
+
+# l2 data cache config, i.e., {<config>|none}
+-cache:dl2             ul2:512:32:4:l 
+
+# l2 data cache hit latency (in cycles)
+-cache:dl2lat                     6 
+
+# l1 inst cache config, i.e., {<config>|dl1|dl2|none}
+-cache:il1             il1:16:32:4:l 
+
+# l1 instruction cache hit latency (in cycles)
+-cache:il1lat                     1 
+
+# l2 instruction cache config, i.e., {<config>|dl2|none}
+-cache:il2                      dl2 
+
+# l2 instruction cache hit latency (in cycles)
+-cache:il2lat                     6 
+
+# flush caches on system calls
+-cache:flush                  false 
+
+# convert 64-bit inst addresses to 32-bit inst equivalents
+-cache:icompress              false 
+
+# memory access latency (<first_chunk> <inter_chunk>)
+-mem:lat               18 2 
+
+# memory access bus width (in bytes)
+-mem:width                        8 
+
+# instruction TLB config, i.e., {<config>|none}
+-tlb:itlb              itlb:32:4096:4:l 
+
+# data TLB config, i.e., {<config>|none}
+-tlb:dtlb              dtlb:64:4096:4:l 
+
+# inst/data TLB miss latency (in cycles)
+-tlb:lat                         30 
+
+# total number of integer ALU's available
+-res:ialu                         4 
+
+# total number of integer multiplier/dividers available
+-res:imult                        1 
+
+# total number of memory system ports available (to CPU)
+-res:memport                      2 
+
+# total number of floating point ALU's available
+-res:fpalu                        4 
+
+# total number of floating point multiplier/dividers available
+-res:fpmult                       1 
+
+# operate in backward-compatible bugs mode (for testing only)
+-bugcompat                    false 
+
diff -urN x86/disasm.c x86-modified/disasm.c
--- x86/disasm.c	2001-07-26 18:56:00.000000000 -0400
+++ x86-modified/disasm.c	2006-06-02 12:21:30.000000000 -0400
@@ -4,6 +4,9 @@
 #include "disasm.h"
 #include "x86_regs.h"
 
+int fld1_print=0,fucompp_print=0,sti_print=0,fucom_print=0,fsqrt_print=0;
+int fsin_count=0,fcos_count=0,fucomp_count=0;
+
 inline  Bit8u  fetch_byte(void) {
     return(*instruction++);
     };
@@ -191,7 +194,7 @@
 
 
 /* need to return the number of uops */
-unsigned int dis_asm(unsigned int is_32, Bit8u *instr,
+unsigned int dis_asm(unsigned int eip, unsigned int is_32, Bit8u *instr,
 		     int num_effective_addresses, 
 		     struct ea_addr_rec * addr_list, 
 		     Bit32u eflags,
@@ -675,8 +678,11 @@
           case 0x2E:
           case 0x2F: invalid_opcode(); goto done;
 
-          case 0x30:
+	  case 0x30: invalid_opcode(); goto done;
 	  case 0x31:
+	   	     fprintf(stderr, " opcode 0F 31, "
+			     "rdtsc not implemented\n");
+	             goto done;
 	  case 0x32:
 	  case 0x33:
 	  case 0x34:
@@ -693,65 +699,629 @@
 	  case 0x3F: invalid_opcode(); goto done;
 
           case 0x40: invalid_opcode(); goto done;
-	    /* cmovno */
+
 	  case 0x41:
-	    fprintf(stderr, "cmovno not implemented\n");
+	    /* cmovno */
+	    /* Move if no overflow */
+	       if (!OF(eflags)){
+		  uop->IR.a = 0x41;
+	          put_offset(0, &(uop->IR.b));
+	          GvEv(&load_uop, &source_reg, &dest_reg);   
+	          if (load_uop) {         /* xxx is a memory location */
+	             *uop_list = load_uop;
+	             if (load_uop->next) {  /* need to find the actual load */
+	                free_tmp_reg(extract_RT(load_uop->next->IR.b));
+	                put_RT(dest_reg, &(load_uop->next->IR.b));
+	                free(load_uop->last_use_tmp->next);
+	                load_uop->last_use_tmp->next = NULL;
+	                load_uop->next->last_use_tmp = load_uop->last_use_tmp;
+	                load_uop->last_use_tmp = NULL;
+		     }
+	             else {
+	                free_tmp_reg(extract_RT(load_uop->IR.b));
+	                put_RT(dest_reg, &(load_uop->IR.b));
+	                free(load_uop->last_use_tmp);
+	                load_uop->last_use_tmp = NULL;
+		     }
+	             free(uop);
+	             free_tmp_reg(source_reg);
+	             uop_count--;
+		  }
+	          else {
+	             put_RT(dest_reg, &(uop->IR.b));
+	             put_RS(source_reg, &(uop->IR.b));
+		  }
+	       }
+	   
+	       else { 
+	         uop->IR.a = 0x00; /* nop */
+	       }
+	   
+//	    fprintf(stderr, "cmovno not implemented\n");
 	    goto done;
-	    /* cmovc */
+
 	  case 0x42:
-	    fprintf(stderr, "cmovc not implemented\n");
+	   	    /* cmovc */
+	            /* Move if carry */
+	       if (CF(eflags)){
+		  uop->IR.a = 0x41;
+	          put_offset(0, &(uop->IR.b));
+	          GvEv(&load_uop, &source_reg, &dest_reg);   
+	          if (load_uop) {         /* xxx is a memory location */
+	             *uop_list = load_uop;
+	             if (load_uop->next) {  /* need to find the actual load */
+	                free_tmp_reg(extract_RT(load_uop->next->IR.b));
+	                put_RT(dest_reg, &(load_uop->next->IR.b));
+	                free(load_uop->last_use_tmp->next);
+	                load_uop->last_use_tmp->next = NULL;
+	                load_uop->next->last_use_tmp = load_uop->last_use_tmp;
+	                load_uop->last_use_tmp = NULL;
+		     }
+	             else {
+	                free_tmp_reg(extract_RT(load_uop->IR.b));
+	                put_RT(dest_reg, &(load_uop->IR.b));
+	                free(load_uop->last_use_tmp);
+	                load_uop->last_use_tmp = NULL;
+		     }
+	             free(uop);
+	             free_tmp_reg(source_reg);
+	             uop_count--;
+		  }
+	          else {
+	             put_RT(dest_reg, &(uop->IR.b));
+	             put_RS(source_reg, &(uop->IR.b));
+		  }
+	       }
+	   
+	       else { 
+	         uop->IR.a = 0x00; /* nop */
+	       }
+//	    fprintf(stderr, "cmovc not implemented\n");
 	    goto done;
-	    /* cmovae */
+
 	  case 0x43:
-	    fprintf(stderr, "cmovae not implemented\n");
+	   	    /* cmovae */
+	            /* Move if not carry */
+	       if (!CF(eflags)){
+		  uop->IR.a = 0x41;
+	          put_offset(0, &(uop->IR.b));
+	          GvEv(&load_uop, &source_reg, &dest_reg);   
+	          if (load_uop) {         /* xxx is a memory location */
+	             *uop_list = load_uop;
+	             if (load_uop->next) {  /* need to find the actual load */
+	                free_tmp_reg(extract_RT(load_uop->next->IR.b));
+	                put_RT(dest_reg, &(load_uop->next->IR.b));
+	                free(load_uop->last_use_tmp->next);
+	                load_uop->last_use_tmp->next = NULL;
+	                load_uop->next->last_use_tmp = load_uop->last_use_tmp;
+	                load_uop->last_use_tmp = NULL;
+		     }
+	             else {
+	                free_tmp_reg(extract_RT(load_uop->IR.b));
+	                put_RT(dest_reg, &(load_uop->IR.b));
+	                free(load_uop->last_use_tmp);
+	                load_uop->last_use_tmp = NULL;
+		     }
+	             free(uop);
+	             free_tmp_reg(source_reg);
+	             uop_count--;
+		  }
+	          else {
+	             put_RT(dest_reg, &(uop->IR.b));
+	             put_RS(source_reg, &(uop->IR.b));
+		  }
+	       }
+	   
+	       else { 
+	         uop->IR.a = 0x00; /* nop */
+	       }
+	   // fprintf(stderr, "cmovae not implemented\n");
 	    goto done;
-	    /* cmove */
+
 	  case 0x44:
-	    fprintf(stderr, "cmove not implemented\n");
+	    /* cmove */
+	    /* Move if equal (zero flag set) */
+	       if (ZF(eflags)){
+		  uop->IR.a = 0x41;
+	          put_offset(0, &(uop->IR.b));
+	          GvEv(&load_uop, &source_reg, &dest_reg);   
+	          if (load_uop) {         /* xxx is a memory location */
+	             *uop_list = load_uop;
+	             if (load_uop->next) {  /* need to find the actual load */
+	                free_tmp_reg(extract_RT(load_uop->next->IR.b));
+	                put_RT(dest_reg, &(load_uop->next->IR.b));
+	                free(load_uop->last_use_tmp->next);
+	                load_uop->last_use_tmp->next = NULL;
+	                load_uop->next->last_use_tmp = load_uop->last_use_tmp;
+	                load_uop->last_use_tmp = NULL;
+		     }
+	             else {
+	                free_tmp_reg(extract_RT(load_uop->IR.b));
+	                put_RT(dest_reg, &(load_uop->IR.b));
+	                free(load_uop->last_use_tmp);
+	                load_uop->last_use_tmp = NULL;
+		     }
+	             free(uop);
+	             free_tmp_reg(source_reg);
+	             uop_count--;
+		  }
+	          else {
+	             put_RT(dest_reg, &(uop->IR.b));
+	             put_RS(source_reg, &(uop->IR.b));
+		  }
+	       }
+	   
+	       else { 
+	         uop->IR.a = 0x00; /* nop */
+	       }
+	   
+	   
+//	    fprintf(stderr, "cmove not implemented\n");
 	    goto done;
-	    /* cmovne */
+
 	  case 0x45:
-	    fprintf(stderr, "cmovne not implemented\n");
+	   	    /* cmovne */
+	            /* move if not equal (zero flag not set */
+	   
+	      if (!ZF(eflags)){
+		  uop->IR.a = 0x41;
+	          put_offset(0, &(uop->IR.b));
+	          GvEv(&load_uop, &source_reg, &dest_reg);   
+	          if (load_uop) {         /* xxx is a memory location */
+	             *uop_list = load_uop;
+	             if (load_uop->next) {  /* need to find the actual load */
+	                free_tmp_reg(extract_RT(load_uop->next->IR.b));
+	                put_RT(dest_reg, &(load_uop->next->IR.b));
+	                free(load_uop->last_use_tmp->next);
+	                load_uop->last_use_tmp->next = NULL;
+	                load_uop->next->last_use_tmp = load_uop->last_use_tmp;
+	                load_uop->last_use_tmp = NULL;
+		     }
+	             else {
+	                free_tmp_reg(extract_RT(load_uop->IR.b));
+	                put_RT(dest_reg, &(load_uop->IR.b));
+	                free(load_uop->last_use_tmp);
+	                load_uop->last_use_tmp = NULL;
+		     }
+	             free(uop);
+	             free_tmp_reg(source_reg);
+	             uop_count--;
+		  }
+	          else {
+	             put_RT(dest_reg, &(uop->IR.b));
+	             put_RS(source_reg, &(uop->IR.b));
+		  }
+	       }
+	   
+	       else { 
+	         uop->IR.a = 0x00; /* nop */
+	       }
+	   
+//	    fprintf(stderr, "cmovne not implemented\n");
+	   
 	    goto done;
-	    /* cmovbe */
+
 	  case 0x46:
-	    fprintf(stderr, "cmovbe not implemented\n");
+	   	    /* cmovbe */
+	            /* move if carry or zero flag set */
+	   
+	       if (CF(eflags) || ZF(eflags)) {
+		  uop->IR.a = 0x41;
+	          put_offset(0, &(uop->IR.b));
+	          GvEv(&load_uop, &source_reg, &dest_reg);   
+	          if (load_uop) {         /* xxx is a memory location */
+	             *uop_list = load_uop;
+	             if (load_uop->next) {  /* need to find the actual load */
+	                free_tmp_reg(extract_RT(load_uop->next->IR.b));
+	                put_RT(dest_reg, &(load_uop->next->IR.b));
+	                free(load_uop->last_use_tmp->next);
+	                load_uop->last_use_tmp->next = NULL;
+	                load_uop->next->last_use_tmp = load_uop->last_use_tmp;
+	                load_uop->last_use_tmp = NULL;
+		     }
+	             else {
+	                free_tmp_reg(extract_RT(load_uop->IR.b));
+	                put_RT(dest_reg, &(load_uop->IR.b));
+	                free(load_uop->last_use_tmp);
+	                load_uop->last_use_tmp = NULL;
+		     }
+	             free(uop);
+	             free_tmp_reg(source_reg);
+	             uop_count--;
+		  }
+	          else {
+	             put_RT(dest_reg, &(uop->IR.b));
+	             put_RS(source_reg, &(uop->IR.b));
+		  }
+	       }
+	   
+	       else { 
+	         uop->IR.a = 0x00; /* nop */
+	       }
+	   
+	   
+//	    fprintf(stderr, "cmovbe not implemented\n");
 	    goto done;
-	    /* cmova */
+
 	  case 0x47:
-	    fprintf(stderr, "cmova not implemented\n");
+	   	    /* cmova */
+	            /* move if adjust flag set */
+	   
+	       if (AF(eflags)) {
+		  uop->IR.a = 0x41;
+	          put_offset(0, &(uop->IR.b));
+	          GvEv(&load_uop, &source_reg, &dest_reg);   
+	          if (load_uop) {         /* xxx is a memory location */
+	             *uop_list = load_uop;
+	             if (load_uop->next) {  /* need to find the actual load */
+	                free_tmp_reg(extract_RT(load_uop->next->IR.b));
+	                put_RT(dest_reg, &(load_uop->next->IR.b));
+	                free(load_uop->last_use_tmp->next);
+	                load_uop->last_use_tmp->next = NULL;
+	                load_uop->next->last_use_tmp = load_uop->last_use_tmp;
+	                load_uop->last_use_tmp = NULL;
+		     }
+	             else {
+	                free_tmp_reg(extract_RT(load_uop->IR.b));
+	                put_RT(dest_reg, &(load_uop->IR.b));
+	                free(load_uop->last_use_tmp);
+	                load_uop->last_use_tmp = NULL;
+		     }
+	             free(uop);
+	             free_tmp_reg(source_reg);
+	             uop_count--;
+		  }
+	          else {
+	             put_RT(dest_reg, &(uop->IR.b));
+	             put_RS(source_reg, &(uop->IR.b));
+		  }
+	       }
+	   
+	       else { 
+	         uop->IR.a = 0x00; /* nop */
+	       }
+	   
+//	    fprintf(stderr, "cmova not implemented\n");
 	    goto done;
-	    /* cmovs */
+	   
 	  case 0x48:
-	    fprintf(stderr, "cmovs not implemented\n");
+	   
+	    /* cmovs */
+	    /* move if sign bit set */
+	   	   
+	       if (SF(eflags)) {
+		  uop->IR.a = 0x41;
+	          put_offset(0, &(uop->IR.b));
+	          GvEv(&load_uop, &source_reg, &dest_reg);   
+	          if (load_uop) {         /* xxx is a memory location */
+	             *uop_list = load_uop;
+	             if (load_uop->next) {  /* need to find the actual load */
+	                free_tmp_reg(extract_RT(load_uop->next->IR.b));
+	                put_RT(dest_reg, &(load_uop->next->IR.b));
+	                free(load_uop->last_use_tmp->next);
+	                load_uop->last_use_tmp->next = NULL;
+	                load_uop->next->last_use_tmp = load_uop->last_use_tmp;
+	                load_uop->last_use_tmp = NULL;
+		     }
+	             else {
+	                free_tmp_reg(extract_RT(load_uop->IR.b));
+	                put_RT(dest_reg, &(load_uop->IR.b));
+	                free(load_uop->last_use_tmp);
+	                load_uop->last_use_tmp = NULL;
+		     }
+	             free(uop);
+	             free_tmp_reg(source_reg);
+	             uop_count--;
+		  }
+	          else {
+	             put_RT(dest_reg, &(uop->IR.b));
+	             put_RS(source_reg, &(uop->IR.b));
+		  }
+	       }
+	   
+	       else { 
+	         uop->IR.a = 0x00; /* nop */
+	       }
+	   
+//	    fprintf(stderr, "cmovs not implemented\n");
 	    goto done;
-	    /* cmovns */
+	   
 	  case 0x49:
-	    fprintf(stderr, "cmovns not implemented\n");
+	   
+	    /* cmovns */
+	    /* move if sign bit not set */
+
+	       if (!SF(eflags)) {
+		  uop->IR.a = 0x41;
+	          put_offset(0, &(uop->IR.b));
+	          GvEv(&load_uop, &source_reg, &dest_reg);   
+	          if (load_uop) {         /* xxx is a memory location */
+	             *uop_list = load_uop;
+	             if (load_uop->next) {  /* need to find the actual load */
+	                free_tmp_reg(extract_RT(load_uop->next->IR.b));
+	                put_RT(dest_reg, &(load_uop->next->IR.b));
+	                free(load_uop->last_use_tmp->next);
+	                load_uop->last_use_tmp->next = NULL;
+	                load_uop->next->last_use_tmp = load_uop->last_use_tmp;
+	                load_uop->last_use_tmp = NULL;
+		     }
+	             else {
+	                free_tmp_reg(extract_RT(load_uop->IR.b));
+	                put_RT(dest_reg, &(load_uop->IR.b));
+	                free(load_uop->last_use_tmp);
+	                load_uop->last_use_tmp = NULL;
+		     }
+	             free(uop);
+	             free_tmp_reg(source_reg);
+	             uop_count--;
+		  }
+	          else {
+	             put_RT(dest_reg, &(uop->IR.b));
+	             put_RS(source_reg, &(uop->IR.b));
+		  }
+	       }
+	   
+	       else { 
+	         uop->IR.a = 0x00; /* nop */
+	       }
+
+	   
+//	    fprintf(stderr, "cmovns not implemented\n");
 	    goto done;
-	    /* cmovp */
+
 	  case 0x4A:
-	    fprintf(stderr, "cmovp not implemented\n");
+	   
+	   	    /* cmovp */
+	            /* move it parity bit set */
+	   
+	       if (PF(eflags)) {
+		  uop->IR.a = 0x41;
+	          put_offset(0, &(uop->IR.b));
+	          GvEv(&load_uop, &source_reg, &dest_reg);   
+	          if (load_uop) {         /* xxx is a memory location */
+	             *uop_list = load_uop;
+	             if (load_uop->next) {  /* need to find the actual load */
+	                free_tmp_reg(extract_RT(load_uop->next->IR.b));
+	                put_RT(dest_reg, &(load_uop->next->IR.b));
+	                free(load_uop->last_use_tmp->next);
+	                load_uop->last_use_tmp->next = NULL;
+	                load_uop->next->last_use_tmp = load_uop->last_use_tmp;
+	                load_uop->last_use_tmp = NULL;
+		     }
+	             else {
+	                free_tmp_reg(extract_RT(load_uop->IR.b));
+	                put_RT(dest_reg, &(load_uop->IR.b));
+	                free(load_uop->last_use_tmp);
+	                load_uop->last_use_tmp = NULL;
+		     }
+	             free(uop);
+	             free_tmp_reg(source_reg);
+	             uop_count--;
+		  }
+	          else {
+	             put_RT(dest_reg, &(uop->IR.b));
+	             put_RS(source_reg, &(uop->IR.b));
+		  }
+	       }
+	   
+	       else { 
+	         uop->IR.a = 0x00; /* nop */
+	       }
+	   
+//	    fprintf(stderr, "cmovp not implemented\n");
 	    goto done;
-	    /* cmovnp */
+	   
 	  case 0x4B:
-	    fprintf(stderr, "cmovnp not implemented\n");
+	    /* cmovnp */
+	    /* move if parity bit not set */
+
+	       if (!PF(eflags)) {
+		  uop->IR.a = 0x41;
+	          put_offset(0, &(uop->IR.b));
+	          GvEv(&load_uop, &source_reg, &dest_reg);   
+	          if (load_uop) {         /* xxx is a memory location */
+	             *uop_list = load_uop;
+	             if (load_uop->next) {  /* need to find the actual load */
+	                free_tmp_reg(extract_RT(load_uop->next->IR.b));
+	                put_RT(dest_reg, &(load_uop->next->IR.b));
+	                free(load_uop->last_use_tmp->next);
+	                load_uop->last_use_tmp->next = NULL;
+	                load_uop->next->last_use_tmp = load_uop->last_use_tmp;
+	                load_uop->last_use_tmp = NULL;
+		     }
+	             else {
+	                free_tmp_reg(extract_RT(load_uop->IR.b));
+	                put_RT(dest_reg, &(load_uop->IR.b));
+	                free(load_uop->last_use_tmp);
+	                load_uop->last_use_tmp = NULL;
+		     }
+	             free(uop);
+	             free_tmp_reg(source_reg);
+	             uop_count--;
+		  }
+	          else {
+	             put_RT(dest_reg, &(uop->IR.b));
+	             put_RS(source_reg, &(uop->IR.b));
+		  }
+	       }
+	   
+	       else { 
+	         uop->IR.a = 0x00; /* nop */
+	       }	   
+	   
+	   
+//	    fprintf(stderr, "cmovnp not implemented\n");
 	    goto done;
-	    /* cmovl */
+
 	  case 0x4C:
-	    fprintf(stderr, "cmovl not implemented\n");
+	   	    /* cmovl */
+	            /* less than - move if sign or overflow, but not both */
+	   
+	       if ( (SF(eflags) || OF(eflags)) && 
+		     !((SF(eflags) && OF(eflags))) )   {
+		  uop->IR.a = 0x41;
+	          put_offset(0, &(uop->IR.b));
+	          GvEv(&load_uop, &source_reg, &dest_reg);   
+	          if (load_uop) {         /* xxx is a memory location */
+	             *uop_list = load_uop;
+	             if (load_uop->next) {  /* need to find the actual load */
+	                free_tmp_reg(extract_RT(load_uop->next->IR.b));
+	                put_RT(dest_reg, &(load_uop->next->IR.b));
+	                free(load_uop->last_use_tmp->next);
+	                load_uop->last_use_tmp->next = NULL;
+	                load_uop->next->last_use_tmp = load_uop->last_use_tmp;
+	                load_uop->last_use_tmp = NULL;
+		     }
+	             else {
+	                free_tmp_reg(extract_RT(load_uop->IR.b));
+	                put_RT(dest_reg, &(load_uop->IR.b));
+	                free(load_uop->last_use_tmp);
+	                load_uop->last_use_tmp = NULL;
+		     }
+	             free(uop);
+	             free_tmp_reg(source_reg);
+	             uop_count--;
+		  }
+	          else {
+	             put_RT(dest_reg, &(uop->IR.b));
+	             put_RS(source_reg, &(uop->IR.b));
+		  }
+	       }
+	   
+	       else { 
+	         uop->IR.a = 0x00; /* nop */
+	       }
+	   
+	   
+//	    fprintf(stderr, "cmovl not implemented\n");
 	    goto done;
-	    /* cmovge */
+
 	  case 0x4D:
-	    fprintf(stderr, "cmovge not implemented\n");
+	   	    /* cmovge */
+	            /* Move if not less than */
+	   
+	   	  if (!( (SF(eflags) || OF(eflags)) && 
+		     !((SF(eflags) && OF(eflags)))) )   {
+		  uop->IR.a = 0x41;
+	          put_offset(0, &(uop->IR.b));
+	          GvEv(&load_uop, &source_reg, &dest_reg);   
+	          if (load_uop) {         /* xxx is a memory location */
+	             *uop_list = load_uop;
+	             if (load_uop->next) {  /* need to find the actual load */
+	                free_tmp_reg(extract_RT(load_uop->next->IR.b));
+	                put_RT(dest_reg, &(load_uop->next->IR.b));
+	                free(load_uop->last_use_tmp->next);
+	                load_uop->last_use_tmp->next = NULL;
+	                load_uop->next->last_use_tmp = load_uop->last_use_tmp;
+	                load_uop->last_use_tmp = NULL;
+		     }
+	             else {
+	                free_tmp_reg(extract_RT(load_uop->IR.b));
+	                put_RT(dest_reg, &(load_uop->IR.b));
+	                free(load_uop->last_use_tmp);
+	                load_uop->last_use_tmp = NULL;
+		     }
+	             free(uop);
+	             free_tmp_reg(source_reg);
+	             uop_count--;
+		  }
+	          else {
+	             put_RT(dest_reg, &(uop->IR.b));
+	             put_RS(source_reg, &(uop->IR.b));
+		  }
+	       }
+	   
+	       else { 
+	         uop->IR.a = 0x00; /* nop */
+	       }
+
+	   
+//	    fprintf(stderr, "cmovge not implemented\n");
 	    goto done;
-	    /* cmovle */
+
 	  case 0x4E:
-	    fprintf(stderr, "cmovle not implemented\n");
+	   	    /* cmovle */
+	            /* less than or equal... zero or (sign or ovflow not both) */
+	   
+	       if ( ZF(eflags) || ( ( (SF(eflags) || OF(eflags)) && 
+		     !((SF(eflags) && OF(eflags)))) ) )   {
+		  uop->IR.a = 0x41;
+	          put_offset(0, &(uop->IR.b));
+	          GvEv(&load_uop, &source_reg, &dest_reg);   
+	          if (load_uop) {         /* xxx is a memory location */
+	             *uop_list = load_uop;
+	             if (load_uop->next) {  /* need to find the actual load */
+	                free_tmp_reg(extract_RT(load_uop->next->IR.b));
+	                put_RT(dest_reg, &(load_uop->next->IR.b));
+	                free(load_uop->last_use_tmp->next);
+	                load_uop->last_use_tmp->next = NULL;
+	                load_uop->next->last_use_tmp = load_uop->last_use_tmp;
+	                load_uop->last_use_tmp = NULL;
+		     }
+	             else {
+	                free_tmp_reg(extract_RT(load_uop->IR.b));
+	                put_RT(dest_reg, &(load_uop->IR.b));
+	                free(load_uop->last_use_tmp);
+	                load_uop->last_use_tmp = NULL;
+		     }
+	             free(uop);
+	             free_tmp_reg(source_reg);
+	             uop_count--;
+		  }
+	          else {
+	             put_RT(dest_reg, &(uop->IR.b));
+	             put_RS(source_reg, &(uop->IR.b));
+		  }
+	       }
+	   
+	       else { 
+	         uop->IR.a = 0x00; /* nop */
+	       }
+	   
+	   
+//	    fprintf(stderr, "cmovle not implemented\n");
 	    goto done;
+	   
+	  case 0x4F:
 	    /* cmovg */
-	  case 0x4F: 
-	    fprintf(stderr, "cmovg not implemented\n");
+	    /* move on greater than, not le */
+
+	      if (!( ZF(eflags) || ( ( (SF(eflags) || OF(eflags)) && 
+		     !((SF(eflags) && OF(eflags)))) )) )   {
+		  uop->IR.a = 0x41;
+	          put_offset(0, &(uop->IR.b));
+	          GvEv(&load_uop, &source_reg, &dest_reg);   
+	          if (load_uop) {         /* xxx is a memory location */
+	             *uop_list = load_uop;
+	             if (load_uop->next) {  /* need to find the actual load */
+	                free_tmp_reg(extract_RT(load_uop->next->IR.b));
+	                put_RT(dest_reg, &(load_uop->next->IR.b));
+	                free(load_uop->last_use_tmp->next);
+	                load_uop->last_use_tmp->next = NULL;
+	                load_uop->next->last_use_tmp = load_uop->last_use_tmp;
+	                load_uop->last_use_tmp = NULL;
+		     }
+	             else {
+	                free_tmp_reg(extract_RT(load_uop->IR.b));
+	                put_RT(dest_reg, &(load_uop->IR.b));
+	                free(load_uop->last_use_tmp);
+	                load_uop->last_use_tmp = NULL;
+		     }
+	             free(uop);
+	             free_tmp_reg(source_reg);
+	             uop_count--;
+		  }
+	          else {
+	             put_RT(dest_reg, &(uop->IR.b));
+	             put_RS(source_reg, &(uop->IR.b));
+		  }
+	       }
+	   
+	       else { 
+	         uop->IR.a = 0x00; /* nop */
+	       }	   
+	   
+//	    fprintf(stderr, "cmovg not implemented\n");
 	    goto done;
 
           case 0x50:
@@ -1715,7 +2285,9 @@
 	    }
 	  goto done;
 	  }
-	  case 0xAE: invalid_opcode(); goto done;
+	  case 0xAE: 
+	   fprintf(stderr, " opcode 0F AE, stmxcr/ldmxcsr not implemented\n");
+	   goto done;
 	    /* IMUL */
 	  case 0xAF: 
 	    uop->IR.a = 0x46; /* signed multiply */
@@ -2531,9 +3103,10 @@
 	  case 0xD5:
 	  case 0xD6:
 	  case 0xD7:
-	  case 0xD8:
-	  case 0xD9:
+	  case 0xD8: 
+	  case 0xD9: invalid_opcode(); goto done;
 	  case 0xDA:
+	   
 	  case 0xDB:
 	  case 0xDC:
 	  case 0xDD:
@@ -3310,7 +3883,7 @@
         break;
 	/* aas */
       case 0x3F: 
-	fprintf(stderr, "aas not implemented\n");
+	fprintf(stderr, "aas not implemented at %x\n",eip);
 	goto done;
 
 	/* inc eax */
@@ -7182,8 +7755,12 @@
             case 0x05:
               switch (rm) {
 		/* fld1 */
-                case 0x00: 
-		  fprintf(stderr, "fld1 not implemented\n");
+                case 0x00:
+		  uop->IR.a = 0x00; /* nop */
+		  if (!fld1_print) {
+		     fprintf(stderr, "FIXME: fld1 not implemented\n");
+		     fld1_print=1;
+		  }
 		  goto done;
 		  /* fldl2t */
                 case 0x01: 
@@ -7262,7 +7839,11 @@
 		  goto done;
 		  /* fsqrt */
                 case 0x02: 
-		  fprintf(stderr, "fsqrt not implemented\n");
+		  uop->IR.a = 0x00; /* NOP */
+		  if (!fsqrt_print) {
+		     fprintf(stderr, "fsqrt not implemented\n");
+		     fsqrt_print=1;
+		  }
 		  goto done;
 		  /* fsincos */
                 case 0x03: 
@@ -7278,12 +7859,20 @@
 		  goto done;
 		  /* fsin */
                 case 0x06: 
-		  fprintf(stderr, "fsin not implemented\n");
+		  fsin_count++;
+		  if (fsin_count%10000==1) {
+		     fprintf(stderr, "fsin not implemented (%i times)\n",
+			     fsin_count);
+		  }
 		  goto done;
 		  /* fcos */
                 case 0x07: 
-		  fprintf(stderr, "fcos not implemented\n");
-		  goto done;
+		   fcos_count++;
+		   if (fcos_count%10000==1) {
+		      fprintf(stderr, "fcos not implemented (%i times)\n",
+			      fcos_count);
+		   }
+		      goto done;
                 }
             default: bx_panic("debugger: invalid opcode\n"); goto done;
             }
@@ -7293,6 +7882,8 @@
       case 0xDA: /* ESC2 */
         mod_rm_byte = peek_byte();
         BX_DECODE_MODRM(mod_rm_byte, mod, opcode, rm);
+
+           
         if (mod != 3) {
           switch (opcode) {
 	    /* fiadd */
@@ -7370,13 +7961,25 @@
           }
         else { /* mod == 3 */
           switch (opcode) {
+	   case 0x01:
+	     /* FIXME */
+	      uop->IR.a = 0x70; /* take from fst */ /* mov.s */
+	      put_RD(STi(0xD0), &(uop->IR.b));
+	      put_RS(ST_0(), &(uop->IR.b));
+//	     fprintf(stderr, "fcmove %%st(1),%%st not implemented\n");
+	     goto done;
             case 0x05:
               if (rm == 1) {
 		/* fucompp */
-		fprintf(stderr, "fucompp not implemented\n");
+		uop->IR.a = 0x00; /* NOP */
+		if (!fucompp_print) {
+		   fprintf(stderr, "FIXME: fucompp not implemented\n");
+		   fucompp_print=1;
+		}
 		goto done;
 	        }
               else {
+		printf("VMW da opcode:  %i %i %i!\n",mod,opcode,rm);
                 invalid_opcode(); goto done;
                 }
             default: invalid_opcode(); goto done;
@@ -7930,11 +8533,19 @@
 	      goto done;
 	      /* fucom */
 	    case 0x04: /* STi_ST(); */
-	      fprintf(stderr, "fucom not implemented\n");
+	      uop->IR.a = 0x00; /* NOP */
+	      if (!fucom_print) {
+		 fprintf(stderr, "FIXME: fucom not implemented\n");
+		 fucom_print=1;
+	      }
 	      goto done;
 	      /* fucomp */
   	    case 0x05: /* STi(); */
-	      fprintf(stderr, "fucomp not implemented\n");
+	      fucomp_count++;
+	      if (fucomp_count%10000==1) {
+	         fprintf(stderr, "fucomp not implemented (%i times)\n",
+			 fucomp_count);
+	      }
 	      goto done;
             case 0x06: invalid_opcode(); goto done;
             case 0x07: invalid_opcode(); goto done;
@@ -8303,6 +8914,7 @@
 	}
 	/* jcxz */
       case 0xE3: 
+        printf("ERROR!  Checking IS_ECX_ZERO\n");
 	if (is_ecx_zero) {
 	   int reg;
 
@@ -10247,7 +10859,10 @@
   opc = fetch_byte();
   i = opc - byte_sub;  
   if (i > 7) {
-    fprintf(stderr, "in ST_STi, i = %d, byte_sub = 0x%x\n", i, byte_sub);
+    if (!sti_print) {
+       fprintf(stderr, "FIXME: in ST_STi, i = %d, byte_sub = 0x%x\n", i, byte_sub);
+       sti_print=1;
+    }
   }
   return ST(i);
 }
@@ -11505,6 +12120,15 @@
 
   return tmp;
 }
+/* is adjust flag set? */
+int AF(unsigned int eflags) {
+  unsigned int tmp;
+
+  /* bit 4 */
+  tmp = eflags & 0x0010;
+
+  return tmp;
+}
 
 struct uop_rec * 
 make_uop(void) {
diff -urN x86/disasm.h x86-modified/disasm.h
--- x86/disasm.h	2001-03-05 12:09:41.000000000 -0500
+++ x86-modified/disasm.h	2006-03-30 17:28:32.000000000 -0500
@@ -41,7 +41,7 @@
 
 
 
-unsigned int dis_asm(unsigned int is_32, Bit8u *instr, 
+unsigned int dis_asm(unsigned int eip, unsigned int is_32, Bit8u *instr, 
 		     int num_effective_addresses, 
 		     struct ea_addr_rec * addr_list,
 		     unsigned int eflags,
@@ -76,6 +76,7 @@
 int ZF(unsigned int eflags);
 int SF(unsigned int eflags);
 int PF(unsigned int eflags);
+int AF(unsigned int eflags);
 
 
 void fill_in_load_addr(struct uop_rec * uop);
diff -urN x86/eval.c x86-modified/eval.c
--- x86/eval.c	2000-10-03 15:03:31.000000000 -0400
+++ x86-modified/eval.c	2006-03-27 13:18:39.000000000 -0500
@@ -69,9 +69,7 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <ctype.h>
-#if defined(__CYGWIN32__)
 #include <errno.h>
-#endif
 
 #include "host.h"
 #include "misc.h"
diff -urN x86/host.h x86-modified/host.h
--- x86/host.h	2000-10-03 15:03:34.000000000 -0400
+++ x86-modified/host.h	2006-03-24 22:33:21.000000000 -0500
@@ -104,7 +104,7 @@
 #if defined(__GNUC__) || defined(__SUNPRO_C) || defined(__CC_C89) || defined(__CC_XLC)
 #define HOST_HAS_QUAD
 #if !defined(__FreeBSD__)
-typedef unsigned long long quad_t;	/* quad - 64 bits */
+//typedef unsigned long long quad_t;	/* quad - 64 bits */
 typedef signed long long squad_t;
 #else /* __FreeBSD__ */
 #define quad_t		unsigned long long
diff -urN x86/main.c x86-modified/main.c
--- x86/main.c	2001-01-10 21:42:57.000000000 -0500
+++ x86-modified/main.c	2006-03-25 15:32:21.000000000 -0500
@@ -250,7 +250,7 @@
 }
 
 /* print stats, uninitialize simulator components, and exit w/ exitcode */
-static void
+void
 exit_now(int exit_code)
 {
   /* print simulation stats */
diff -urN x86/Makefile x86-modified/Makefile
--- x86/Makefile	2002-08-08 21:17:25.000000000 -0400
+++ x86-modified/Makefile	2006-03-24 22:34:44.000000000 -0500
@@ -112,11 +112,12 @@
 ##	RS/6000 AIX Unix version 4, GNU GCC version cygnus-2.7-96q4
 ##	Windows NT version 4.0, Cygnus CygWin/32 beta 19
 ##
-##CC = gcc
+CC = gcc
 ##Change the compiler
-CC = i486-linuxlibc1-gcc
-OFLAGS = -g -Wall
+#CC = i486-linuxlibc1-gcc
+#OFLAGS = -g -Wall
 #OFLAGS = -O3 -funroll-loops -finline-functions
+OFLAGS = -O2 -Wall
 MFLAGS = `./sysprobe -flags`
 MLIBS  = `./sysprobe -libs` -lm
 ENDIAN = `./sysprobe -s`
diff -urN x86/p3 x86-modified/p3
--- x86/p3	1969-12-31 19:00:00.000000000 -0500
+++ x86-modified/p3	2006-03-25 14:41:51.000000000 -0500
@@ -0,0 +1,2 @@
+pentium3.c cache.c bpred.c resource.c ptrace.c main.c syscall.c memory.c regs.c loader.c endian.c dlite.c symbol.c eval.c options.c stats.c eio.c range.c misc.c machine.c timing.c init.c config.c mlfunctions.c state.c disasm.c reg_mapping.c output.c temporal.c
+
diff -urN x86/pentium3.c x86-modified/pentium3.c
--- x86/pentium3.c	2002-08-08 21:09:55.000000000 -0400
+++ x86-modified/pentium3.c	2006-03-27 13:19:31.000000000 -0500
@@ -5018,7 +5018,8 @@
       eip = x86_streaming_buffer[x86_fetch_head].eip;
       tag = x86_streaming_buffer[x86_fetch_head].tag;
       if (!complex_decode_overflow) {
-	num_uops =  dis_asm(x86_streaming_buffer[x86_fetch_head].is_32bit_code, 
+	num_uops =  dis_asm(eip,
+			    x86_streaming_buffer[x86_fetch_head].is_32bit_code, 
 			    x86_streaming_buffer[x86_fetch_head].assy, 
 			    x86_streaming_buffer[x86_fetch_head].num_ea_addrs, 
 			    x86_streaming_buffer[x86_fetch_head].addr_list,
@@ -5679,6 +5680,9 @@
   int i;
   struct ea_addr_rec * temp;
 
+
+   printf("KRW-1 -> tag = %i\n",tag);
+   
   if (first) {
     strcpy(tr_file, trace_file);
     strcat(tr_file, "/mem_state.txt");
@@ -5696,14 +5700,20 @@
   if (tag == 256)
     tag = 0;
 
+  printf("KRW1=>%d\n",last_read_tag);
   read_tag = last_read_tag;
   if (read_tag == tag) {
+    printf("KRW2\n");
     fgets(buffer, 256, trace);
     if (feof(trace)) {
       fclose(trace);
       longjmp(sim_exit_buf, /* exitcode */1);
     }
     read_in = sscanf(buffer, "%x %x %c", &ea_addrs[count], &value, &l_or_s[count]);
+     
+    printf("VMW%i: addr=%x value=%x l=%i\n",count,ea_addrs[count],value,
+	   l_or_s[count]);
+     
     count++;
     while (read_in == 3) {
       fgets(buffer, 256, trace);
@@ -5726,6 +5736,7 @@
   for (i = 0; i < count; i++) {
     temp[i].addr = ea_addrs[i];
     temp[i].L_or_S = l_or_s[i];
+  //  printf("VMW%i: addr=%x l=%i\n",i,ea_addrs[i],l_or_s[i]);
   }
   *list_of_ea_addr = temp;
   return count;
@@ -5801,6 +5812,14 @@
 	sscanf(&buffer[15+strlen(temp)], "%lx: %d %d", (long unsigned int *)eflags, 
 	       tag, is_32bit_code);
       old_ecx_value = *is_ecx_zero;
+	 {
+	    int i;
+       printf("VMW: is32=%i cs=%x eip=%x eflags=%x ecx=%i tag=%i len=%i ",
+	      *is_32bit_code,
+		  *cs, *eip, *eflags, *is_ecx_zero, *tag, inst_len);
+       for(i=0;i<inst_len;i++) printf("%x",assy[i]); 
+       printf("\n");
+    }
       return inst_len;
     }
     else {
@@ -6060,7 +6079,8 @@
   num_x86_insts++;
 
   num_effective_addresses = x86_read_effective_addr(tag+1, &list_of_ea_addr);
-  num_uops = dis_asm(is_32bit_code, assy, num_effective_addresses, list_of_ea_addr, 
+  num_uops = dis_asm(eip,
+		     is_32bit_code, assy, num_effective_addresses, list_of_ea_addr, 
 		     eflags, &uops,
 		     is_ecx_zero);
   fprintf(stderr, "\neip: %lx\n", (long unsigned int)eip);
diff -urN x86/pentium4.c x86-modified/pentium4.c
--- x86/pentium4.c	2002-08-08 21:04:28.000000000 -0400
+++ x86-modified/pentium4.c	2006-03-27 13:17:49.000000000 -0500
@@ -5345,7 +5345,8 @@
       eip = x86_streaming_buffer[x86_fetch_head].eip;
       tag = x86_streaming_buffer[x86_fetch_head].tag;
       if (!complex_decode_overflow) {
-	num_uops =  dis_asm(x86_streaming_buffer[x86_fetch_head].is_32bit_code, 
+	num_uops =  dis_asm(eip,
+			    x86_streaming_buffer[x86_fetch_head].is_32bit_code, 
 			    x86_streaming_buffer[x86_fetch_head].assy, 
 			    x86_streaming_buffer[x86_fetch_head].num_ea_addrs, 
 			    x86_streaming_buffer[x86_fetch_head].addr_list,
@@ -6176,7 +6177,8 @@
       if (SB_num) {
 	if (eip != x86_streaming_buffer[x86_fetch_head].eip)
 	  fprintf(stderr, "eip doesn't match!\n");
-	num_uops =  dis_asm(x86_streaming_buffer[x86_fetch_head].is_32bit_code, 
+	num_uops =  dis_asm(eip,
+			    x86_streaming_buffer[x86_fetch_head].is_32bit_code, 
 			    x86_streaming_buffer[x86_fetch_head].assy, 
 			    x86_streaming_buffer[x86_fetch_head].num_ea_addrs, 
 			    x86_streaming_buffer[x86_fetch_head].addr_list,
@@ -6216,7 +6218,8 @@
 	get_next_insn(&eip, &tag, &ilen, &is_32bit_code, &is_ecx_zero, 
 		  &eflags, &list_of_ea_addr, &num_effective_addresses,
 		  assy, last_assy, &last_eip, &last_ilen, 0);
-	num_uops = dis_asm(is_32bit_code, assy, 
+	num_uops = dis_asm(eip,
+			   is_32bit_code, assy, 
 			   num_effective_addresses, list_of_ea_addr, 
 			   eflags, &uop_list,
 			   is_ecx_zero);
@@ -6850,7 +6853,8 @@
   num_x86_insts++;
 
   num_effective_addresses = x86_read_effective_addr(tag+1, &list_of_ea_addr);
-  num_uops = dis_asm(is_32bit_code, assy, num_effective_addresses, list_of_ea_addr, 
+  num_uops = dis_asm(eip,
+		     is_32bit_code, assy, num_effective_addresses, list_of_ea_addr, 
 		     eflags, &uops,
 		     is_ecx_zero);
   fprintf(stderr, "\neip: %lx\n", (long unsigned int)eip);
diff -urN x86/range.c x86-modified/range.c
--- x86/range.c	2000-10-03 15:04:01.000000000 -0400
+++ x86-modified/range.c	2006-03-27 13:19:00.000000000 -0500
@@ -67,9 +67,7 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
-#if defined(__CYGWIN32__)
 #include <errno.h>
-#endif
 
 #include "host.h"
 #include "misc.h"
diff -urN x86/syscall.c x86-modified/syscall.c
--- x86/syscall.c	2000-10-03 15:04:39.000000000 -0400
+++ x86-modified/syscall.c	2006-03-24 22:34:21.000000000 -0500
@@ -124,7 +124,7 @@
 #endif
 #ifdef linux
 #include <utime.h>
-#include <bsd/sgtty.h>
+//#include <bsd/sgtty.h>
 #endif /* linux */
 
 #if defined(hpux) || defined(__hpux)
diff -urN x86/sysprobe.c x86-modified/sysprobe.c
--- x86/sysprobe.c	2000-10-03 15:04:38.000000000 -0400
+++ x86-modified/sysprobe.c	2006-03-24 22:51:47.000000000 -0500
@@ -188,7 +188,7 @@
 #endif /* BFD_LOADER */
 
 #ifdef linux
-      fprintf(stdout, "-lbsd ");
+
 #elif defined(__USLC__) || (defined(__svr4__) && defined(__i386__) && defined(__unix__))
       fprintf(stdout, "-L/usr/ucblib -lucb ");
 #else
diff -urN x86/target-pisa/syscall.c x86-modified/target-pisa/syscall.c
--- x86/target-pisa/syscall.c	2000-10-03 15:04:39.000000000 -0400
+++ x86-modified/target-pisa/syscall.c	2006-03-24 22:34:21.000000000 -0500
@@ -124,7 +124,7 @@
 #endif
 #ifdef linux
 #include <utime.h>
-#include <bsd/sgtty.h>
+//#include <bsd/sgtty.h>
 #endif /* linux */
 
 #if defined(hpux) || defined(__hpux)
diff -urN x86/trace_bpred.c x86-modified/trace_bpred.c
--- x86/trace_bpred.c	2001-07-10 16:29:33.000000000 -0400
+++ x86-modified/trace_bpred.c	2006-03-24 23:09:14.000000000 -0500
@@ -9,7 +9,7 @@
 #include "options.h"
 #include "stats.h"
 #include "x86_bpred.h"
-
+#include <ctype.h>
 
 static struct opt_odb_t *sim_odb;
 
@@ -60,10 +60,12 @@
 static counter_t sim_num_branches = 0;
 static counter_t sim_num_insn = 0;
 
-banner(FILE *fd, int argc, char **argv)
+int banner(FILE *fd, int argc, char **argv)
 {
  
   fprintf(fd,"Steve's hijacked branch predictor\n");
+   
+   return 0;
 }
 
 
@@ -89,7 +91,7 @@
   /* nada */
 }
 
-#ifdef 0
+#if 0
 
 int check_cfg_file(char * cfg_file){
   char * temp;
@@ -706,7 +708,7 @@
 
 
 
-#ifdef 0
+#if 0
 int qs(const void * s1, const void * s2) {
   const func_node * fn1 = *((func_node **) s1);
   const func_node * fn2 = *((func_node **) s2);
@@ -885,13 +887,14 @@
 }
 
 
-sim_print_stats(FILE *fd) {
+int sim_print_stats(FILE *fd) {
 
 /* print simulation stats */
   fprintf(fd, "\nsim: ** simulation statistics **\n");
   stat_print_stats(sim_sdb, fd);
   sim_aux_stats(fd);
   fprintf(fd, "\n");
+   return 0;
 }
 
 /* register branch predictor stats */
@@ -1020,7 +1023,7 @@
   
 
   static int first = 1;
-  char zipcommand[80];
+//  char zipcommand[80];
   int read_in = 0;
   char tr_file[256];
   static FILE * trace;
@@ -1028,7 +1031,7 @@
   char temp[33];
   char * curr;
   int j, k;
-  Bit8u new_curr;
+  Bit8u new_curr=0;
   int hold_int;
   int inst_len;
   unsigned int value;
@@ -1095,6 +1098,7 @@
 
     }
   }
+  return 0;
 }
 
  
@@ -1128,7 +1132,7 @@
     first = 0;
   }
 
-  sim_odb = opt_new((int)orphan_fn);
+  sim_odb = opt_new( ((int)orphan_fn) );
   opt_reg_flag(sim_odb, "-h", "print help message",
                &help_me, /* default */FALSE, /* !print */FALSE, NULL);
 
@@ -1151,7 +1155,7 @@
       usage(stderr, argc, argv);
       exit(1);
     }
-#ifdef 0
+#if 0
   /* exec_index is set in orphan_fn() */
   if (exec_index == -1)
     {
diff -urN x86/traces.script x86-modified/traces.script
--- x86/traces.script	1969-12-31 19:00:00.000000000 -0500
+++ x86-modified/traces.script	2006-03-25 22:32:44.000000000 -0500
@@ -0,0 +1,5 @@
+rm *.bz2
+cp ~/cpu_trace ./trace_state.txt
+cp ~/mem_trace ./mem_state.txt
+bzip2 ./trace_state.txt
+bzip2 ./mem_state.txt
diff -urN x86/x86-outorder.c x86-modified/x86-outorder.c
--- x86/x86-outorder.c	2001-05-13 20:36:52.000000000 -0400
+++ x86-modified/x86-outorder.c	2006-04-01 17:39:51.000000000 -0500
@@ -118,6 +118,7 @@
 #include <math.h>
 #include <assert.h>
 #include <signal.h>
+#include <ctype.h>
 
 #include "host.h"
 #include "misc.h"
@@ -311,7 +312,9 @@
 static int res_fpmult;
 
 /* x86 trace directory */
-static char *trace_file;
+static char *mem_trace_file,*cpu_trace_file;
+
+int trace_pid;
 
 /* x86 cfg file (virtual mem mapping) */
 static char *cfg_file;
@@ -719,11 +722,17 @@
 
   /* trace options */
 
-  opt_reg_string(odb, "-trace",
-                 "bzipped trace directory to read in (mem_state, trace_state)",
-                 &trace_file, /* default */"",
+  opt_reg_string(odb, "-tracemem",
+                 "name of mem trace file",
+                 &mem_trace_file, /* default */"mem_state.txt",
+                 /* print */TRUE, /* format */NULL);
+
+     opt_reg_string(odb, "-tracecpu",
+                 "name of cpu trace file",
+                 &cpu_trace_file, /* default */"trace_state.txt",
                  /* print */TRUE, /* format */NULL);
 
+
   opt_reg_string(odb, "-mem_cfg",
                  "config (virtual memory mapping) file to read in",
                  &cfg_file, /* default */"",
@@ -4993,7 +5002,8 @@
       eip = x86_streaming_buffer[x86_fetch_head].eip;
       tag = x86_streaming_buffer[x86_fetch_head].tag;
       if (!complex_decode_overflow) {
-	num_uops =  dis_asm(x86_streaming_buffer[x86_fetch_head].is_32bit_code, 
+	num_uops =  dis_asm(eip,
+			    x86_streaming_buffer[x86_fetch_head].is_32bit_code, 
 			    x86_streaming_buffer[x86_fetch_head].assy, 
 			    x86_streaming_buffer[x86_fetch_head].num_ea_addrs, 
 			    x86_streaming_buffer[x86_fetch_head].addr_list,
@@ -5249,7 +5259,7 @@
 			    &re_eip, &re_eflags, &re_is_ecx_zero, &re_tag, re_assy);
     x86_num_insn++;
     
-    re_num_effective_addresses = x86_read_effective_addr(re_tag+1, &re_list_of_ea_addr);
+    re_num_effective_addresses = x86_read_effective_addr(re_tag, &re_list_of_ea_addr);
   }
   
   if ((num_cache_lines_in_SB() + !is_cache_line_in_SB((re_eip & 0xFFFFFFE0), &i)) >
@@ -5340,12 +5350,12 @@
       ilen = x86_read_inst(&is_32bit_code, &cs, &eip, &eflags, &is_ecx_zero, &tag, assy);
       x86_num_insn++;
 #if 1
-      if (!(x86_num_insn%10000)) fprintf(stderr, "#");
-      if (!(x86_num_insn%600000)) fprintf(stderr, "\n");
+      if (!(x86_num_insn%100000)) fprintf(stderr, "#");
+      if (!(x86_num_insn%6000000)) fprintf(stderr, "%lld\n",x86_num_insn);
       if (x86_num_insn > 644095)
 	fart++;
 #endif
-      num_effective_addresses = x86_read_effective_addr(tag+1, &list_of_ea_addr);
+      num_effective_addresses = x86_read_effective_addr(tag, &list_of_ea_addr);
       /* fill the b/w.  What about insts that span cache lines?? */
       fetched_bytes += ilen;  
       
@@ -5598,75 +5608,95 @@
   return NULL;
 }
 
-/* read in effective address from Load/Store File */
+
+FILE *cpu_trace,*mem_trace;
+
 int x86_read_effective_addr(int tag, struct ea_addr_rec ** list_of_ea_addr) {
   int count=0;
-  char l_or_s[128];
-  md_addr_t ea_addrs[128];
+  static char l_or_s[128];
+  static md_addr_t ea_addrs[128];
   char buffer[256];
-  char tr_file[256];
-  static FILE * trace;
   Bit32u value;
   static int read_tag;
-  static int last_read_tag;
+  static int current_tag = -1,next_tag=0;
   static int last_count = 0;
   static int first = 1;
   int read_in;
-  int i;
-  struct ea_addr_rec * temp;
+  int i; //,new_values=0;
+  struct ea_addr_rec *temp=NULL;
 
-  if (first) {
-    strcpy(tr_file, trace_file);
-    strcat(tr_file, "/mem_state.txt");
-    /* strcpy(trace_file, "/nfs/germany/z/vlaovic/config-bochs/bochs-991107c/mem_state.txt"); */
-    if ((trace = zfopen(tr_file, "r")) == NULL) {
-      fprintf(stderr, "Can't open %s\n", tr_file);
-      exit(-1);
-    }
-    fgets(buffer, 256, trace);
-    sscanf(buffer, "%d:", &read_tag);
-    last_read_tag = read_tag;
-    first = 0;
-  }
-  if (tag == 256)
-    tag = 0;
+//  fprintf(stderr,"vmw: Looking for tag %i\n",tag);
 
-  read_tag = last_read_tag;
-  if (read_tag == tag) {
-    fgets(buffer, 256, trace);
-    if (feof(trace)) {
-      fclose(trace);
-      longjmp(sim_exit_buf, /* exitcode */1);
+     /* If never run before, open the trace file */
+  if (first) {
+    /* read in a line */
+    fgets(buffer, 256, mem_trace);
+    if (feof(mem_trace)) {
+       fclose(mem_trace);
+       longjmp(sim_exit_buf, /* exitcode */1);
     }
-    read_in = sscanf(buffer, "%x %x %c", &ea_addrs[count], &value, &l_or_s[count]);
-    count++;
-    while (read_in == 3) {
-      fgets(buffer, 256, trace);
-      if (feof(trace)) {
-	fclose(trace);
-	longjmp(sim_exit_buf, /* exitcode */1);
-      }
-      if (count < 0 || count > 128)
-	fprintf(stderr, "need to make ea_addrs[] larger, count = %d\n",count); 
-      read_in = sscanf(buffer, "%x %x %c", &ea_addrs[count], &value, &l_or_s[count]);
-      count++;
-    }
-    count--;
-    read_in = sscanf(buffer, "%d:", &read_tag);
-    last_read_tag = read_tag;
+    /* read in the tag */
+    sscanf(buffer, "%X:", &read_tag); 
+   
+    next_tag=read_tag;
+     
+    first=0;
   }
-  last_count = count;
-  if (count) 
-    temp = (struct ea_addr_rec *)malloc(count*sizeof(struct ea_addr_rec));
-  for (i = 0; i < count; i++) {
-    temp[i].addr = ea_addrs[i];
-    temp[i].L_or_S = l_or_s[i];
+   
+  if (tag!=current_tag) {
+
+     fgets(buffer, 256, mem_trace);
+
+     read_in = sscanf(buffer, "%X %X %c", &ea_addrs[count], &value, 
+		     &l_or_s[count]);
+     count++;
+     while (read_in == 3) {
+//            fprintf(stderr,"\nVMW READ IN: %x %x %c\n",
+//		    ea_addrs[count-1],value,l_or_s[count-1]);
+       fgets(buffer, 256, mem_trace);
+       if (feof(mem_trace)) {
+	  fclose(mem_trace);
+	  longjmp(sim_exit_buf, /* exitcode */1);
+       }
+       if (count < 0 || count > 128) {
+	  fprintf(stderr, "need to make ea_addrs[] larger, count = %d\n",
+		  count); 
+       }
+       read_in = sscanf(buffer, "%X %X %c", &ea_addrs[count], 
+		       &value, &l_or_s[count]);
+
+       count++;
+     }
+     count--;
+     current_tag=next_tag;
+     next_tag=ea_addrs[count];
+//     fprintf(stderr,"vmw: Current tag=%i, next_tag=%i\n",current_tag,next_tag);
+     last_count=count;
+//     new_values=1;
   }
-  *list_of_ea_addr = temp;
+
+//  if (tag==last_tag) {
+  
+     count=last_count;
+     
+//     if (new_values) {
+        temp = (struct ea_addr_rec *)malloc(count*sizeof(struct ea_addr_rec));
+        for (i = 0; i < count; i++) {
+            temp[i].addr = ea_addrs[i];
+            temp[i].L_or_S = l_or_s[i];
+        }
+//     }
+     *list_of_ea_addr = temp;
+//     fprintf(stderr,"vmw: returning count=%d of tag %d (%x %c)\n",
+//	  count,tag,temp[0].addr,temp[0].L_or_S);
+//  }
+    
   return count;
 
 }
 
+extern int exit_now(int status);
+
 /* read in instructyion from trace */
 int x86_read_inst(unsigned int * is_32bit_code,
 		  Bit32u * cs,
@@ -5679,10 +5709,8 @@
   
 
   static int first = 1;
-  char zipcommand[80];
+//  char zipcommand[80];
   int read_in = 0;
-  char tr_file[256];
-  static FILE * trace;
   char buffer[256];
   char temp[33];
   char * curr;
@@ -5694,23 +5722,19 @@
   static int old_ecx_value = 0;
 
   if (first) {
-    strcpy(tr_file, trace_file);
-    strcat(tr_file, "/trace_state.txt");
-    /* strcpy(trace_file, "/nfs/germany/z/vlaovic/config-bochs/bochs-991107c/trace_state.txt"); */
-    if ((trace = zfopen(tr_file, "r")) == NULL) {
-      fprintf(stderr, "Can't open %s\n", tr_file);
-      exit(-1);
-    }
+
     first = 0;
   }
   /*scan in ibuf */
   *is_ecx_zero = old_ecx_value;
-  while (fgets(buffer, 256, trace)) {
-    if (feof(trace)) {
+
+  while (fgets(buffer, 256, cpu_trace)) {
+     
+    if (feof(cpu_trace)) {
       longjmp(sim_exit_buf, /* exitcode */1);
-      fclose(trace);
+      fclose(cpu_trace);
     }
-    read_in = sscanf(buffer, "%x:%lx: %s: %lx: %d %d", 
+    read_in = sscanf(buffer, "%X:%lX: %s: %lX: %d %d", 
 		     cs, (long unsigned int *)eip, temp, (long unsigned int *)eflags, 
 		     tag, is_32bit_code);
     if (read_in > 2) {
@@ -5721,7 +5745,7 @@
 	if (isdigit(*curr)) 
 	  hold_int = *curr - 48;
 	else
-	  hold_int = *curr - 87;
+	  hold_int = *curr - 55; /* for capital hex */
 	if (!(j%2))
 	  new_curr = 16*hold_int;
 	else{
@@ -5733,18 +5757,18 @@
 	j++;
       }
       if (read_in == 3) 
-	sscanf(&buffer[15+strlen(temp)], "%lx: %d %d", (long unsigned int *)eflags, 
+	sscanf(&buffer[15+strlen(temp)], "%lX: %d %d", (long unsigned int *)eflags, 
 	       tag, is_32bit_code);
       old_ecx_value = *is_ecx_zero;
       return inst_len;
     }
     else {
-      read_in = sscanf(buffer, "%s:%lx", temp, (long unsigned int *)&value);
+      read_in = sscanf(buffer, "%s:%lX", temp, (long unsigned int *)&value);
       if (!strncmp(temp,"ECX:", 4) || !strncmp(temp,"CX:", 3)) {
 	if (temp[0] == 'E')
-	  sscanf(&buffer[4], "%lx", (long unsigned int *)&value);
+	  sscanf(&buffer[4], "%lX", (long unsigned int *)&value);
 	else 
-	  sscanf(&buffer[3], "%lx", (long unsigned int *)&value);
+	  sscanf(&buffer[3], "%lX", (long unsigned int *)&value);
 	if (value)
 	  *is_ecx_zero = 1;
 	else 
@@ -5753,6 +5777,9 @@
 
     }
   }
+  printf ("VMW: Ran out of instructions...!\n");
+  exit_now(0);
+  return 0;
 }
 
 /* start simulation, program loaded, processor precise state initialized */
@@ -5760,11 +5787,22 @@
 sim_main(void)
 {
   int six_stage_delay = 3;
-
+   
   /* ignore any floating point exceptions, they may occur on mis-speculated
      execution paths */
   signal(SIGFPE, SIG_IGN);
 
+  if ((cpu_trace = fopen(cpu_trace_file, "r")) == NULL) {
+     fprintf(stderr, "Can't open %s\n", cpu_trace_file);
+     exit(-1);
+  }
+   
+  if ((mem_trace = fopen(mem_trace_file, "r")) == NULL) {
+     fprintf(stderr, "Can't open %s\n", mem_trace_file);
+     exit(-1);
+  }
+
+   
   /* set up program entry state */
   /* regs.regs_PC = ld_prog_entry;
      regs.regs_NPC = regs.regs_PC + sizeof(md_inst_t); */
@@ -5867,7 +5905,7 @@
 
   /* main simulator loop, NOTE: the pipe stages are traverse in reverse order
      to eliminate this/next state synchronization and relaxation problems */
-
+#if 1
   for (;;)
     {
       /* RUU/LSQ sanity checks */
@@ -5963,10 +6001,12 @@
       sim_cycle++;
 
       /* finish early? */
-      if (max_insts && sim_total_insn >= max_insts)
-	return;
+      /* -- vmw, max:insts # of x86 instructions, not uops */
+//      if (max_insts && sim_total_insn >= max_insts)
+        if (max_insts && x86_num_insn >=max_insts)
+	   return;
     }
-#if 0
+#else
   while(1) {
     test_uop_gen();
   }
@@ -5993,16 +6033,22 @@
   int num_addrs_used =0;
   static int num_x86_insts = 0;
 
+  fprintf(stderr,"\n\nvmw: ---------------------------------\n"); 
   ilen = x86_read_inst(&is_32bit_code, &cs, &eip, &eflags, &is_ecx_zero, &tag, assy);
+  fprintf(stderr,"vmw: after x86_read_inst\n"); 
   if (ilen == 0)
     exit(0);
   num_x86_insts++;
 
-  num_effective_addresses = x86_read_effective_addr(tag+1, &list_of_ea_addr);
-  num_uops = dis_asm(is_32bit_code, assy, num_effective_addresses, list_of_ea_addr, 
+  num_effective_addresses = x86_read_effective_addr(tag, &list_of_ea_addr);
+  fprintf(stderr,"vmw: after x86_read_effective_addr\n");  
+  num_uops = dis_asm(eip,
+		     is_32bit_code, assy, num_effective_addresses, 
+		     list_of_ea_addr, 
 		     eflags, &uops,
 		     is_ecx_zero);
-  fprintf(stderr, "\neip: %lx\n", (long unsigned int)eip);
+  fprintf(stderr,"vmw: after dis_asm\n");  
+  fprintf(stderr, "\n\neip: %lx\n", (long unsigned int)eip);
   fprintf(stderr, "eflags: %lx\n", (long unsigned int)eflags);
   fprintf(stderr, "ilen: %d\n", ilen);
   fprintf(stderr, "num_uops: %d\n", num_uops);
@@ -6010,14 +6056,16 @@
   fprintf(stderr, "num_86_insts: %d\n", num_x86_insts);
   for(i = 0; i < num_uops; i++) {
     fprintf(stderr, "\n\tuop #%d\n", i); 
-    uops->IR.a = (uops->IR.a & ~0xff) | (word_t)MD_OP_ENUM(MD_OPFIELD(uops->IR));
+    uops->IR.a = (uops->IR.a & ~0xff) | 
+                 (word_t)MD_OP_ENUM(MD_OPFIELD(uops->IR));
     md_print_insn(uops->IR, (md_addr_t)eip, stderr); 
     if ((uops->EA_addr).L_or_S) {
       fprintf(stderr,"\n0x%lx\t%c\n", (long unsigned int)(uops->EA_addr).addr, 
 	      (uops->EA_addr).L_or_S); 
       /*       if ((uops->EA_addr).addr == 0x4452c8)
 	       check++; */
-      num_addrs_used++;} 
+      num_addrs_used++;
+    } 
     if (extract_RS(uops->IR.b) >= START_INT_TMP_REG &&
 	extract_RS(uops->IR.b) < MD_NUM_IREGS)
       free_tmp_reg(extract_RS(uops->IR.b));
@@ -6037,7 +6085,6 @@
   fprintf(stderr, "didn't use the same number of addys as we read in!\n"); */
   if (num_effective_addresses)
     free(list_of_ea_addr);
- 
 
 }
 

