Files
pcem/includes/private/codegen/codegen.h

377 lines
14 KiB
C

#ifndef _CODEGEN_H_
#define _CODEGEN_H_
#include "mem.h"
#include "x86_ops.h"
/*Handling self-modifying code (of which there is a lot on x86) :
PCem tracks a 'dirty mask' for each physical page, in which each bit
represents 64 bytes. This is only tracked for pages that have code in - when a
page first has a codeblock generated, it is evicted from the writelookup and
added to the page_lookup for this purpose. When in the page_lookup, each write
will go through the mem_write_ram*_page() functions and set the dirty mask
appropriately.
Each codeblock also contains a code mask (actually two masks, one for each
page the block is/may be in), again with each bit representing 64 bytes.
Each page has a list of codeblocks present in it. As each codeblock can span
up to two pages, two lists are present.
When a codeblock is about to be executed, the code masks are compared with the
dirty masks for the relevant pages. If either intersect, then
codegen_check_flush() is called on the affected page(s), and all affected
blocks are evicted.
The 64 byte granularity appears to work reasonably well for most cases,
avoiding most unnecessary evictions (eg when code & data are stored in the
same page).
*/
typedef struct codeblock_t {
uint32_t pc;
uint32_t _cs;
uint32_t phys, phys_2;
uint16_t status;
uint16_t flags;
uint8_t ins;
uint8_t TOP;
/*Pointers for codeblock tree, used to search for blocks when hash lookup
fails.*/
uint16_t parent, left, right;
uint8_t *data;
uint64_t page_mask, page_mask2;
uint64_t *dirty_mask, *dirty_mask2;
/*Previous and next pointers, for the codeblock list associated with
each physical page. Two sets of pointers, as a codeblock can be
present in two pages.*/
uint16_t prev, next;
uint16_t prev_2, next_2;
/*First mem_block_t used by this block. Any subsequent mem_block_ts
will be in the list starting at head_mem_block->next.*/
struct mem_block_t *head_mem_block;
} codeblock_t;
extern codeblock_t *codeblock;
extern uint16_t *codeblock_hash;
extern uint8_t *block_write_data;
/*Code block uses FPU*/
#define CODEBLOCK_HAS_FPU 1
/*Code block is always entered with the same FPU top-of-stack*/
#define CODEBLOCK_STATIC_TOP 2
/*Code block has been compiled*/
#define CODEBLOCK_WAS_RECOMPILED 4
/*Code block is in free list and is not valid*/
#define CODEBLOCK_IN_FREE_LIST 8
/*Code block spans two pages, page_mask2 and dirty_mask2 are valid*/
#define CODEBLOCK_HAS_PAGE2 0x10
/*Code block is using a byte mask for code present and dirty*/
#define CODEBLOCK_BYTE_MASK 0x20
/*Code block is in dirty list*/
#define CODEBLOCK_IN_DIRTY_LIST 0x40
/*Code block is not inlining immediate parameters, parameters must be fetched from memory*/
#define CODEBLOCK_NO_IMMEDIATES 0x80
#define BLOCK_PC_INVALID 0xffffffff
#define BLOCK_INVALID 0
static inline int get_block_nr(codeblock_t *block) {
return ((uintptr_t)block - (uintptr_t)codeblock) / sizeof(codeblock_t);
}
static inline codeblock_t *codeblock_tree_find(uint32_t phys, uint32_t _cs) {
codeblock_t *block;
uint64_t a = _cs | ((uint64_t)phys << 32);
if (!pages[phys >> 12].head)
return NULL;
block = &codeblock[pages[phys >> 12].head];
while (block) {
uint64_t block_cmp = block->_cs | ((uint64_t)block->phys << 32);
if (a == block_cmp) {
if (!((block->status ^ cpu_cur_status) & CPU_STATUS_FLAGS) &&
((block->status & cpu_cur_status & CPU_STATUS_MASK) == (cpu_cur_status & CPU_STATUS_MASK)))
break;
}
if (a < block_cmp)
block = block->left ? &codeblock[block->left] : NULL;
else
block = block->right ? &codeblock[block->right] : NULL;
}
return block;
}
static inline void codeblock_tree_add(codeblock_t *new_block) {
codeblock_t *block = &codeblock[pages[new_block->phys >> 12].head];
uint64_t a = new_block->_cs | ((uint64_t)new_block->phys << 32);
if (!pages[new_block->phys >> 12].head) {
pages[new_block->phys >> 12].head = get_block_nr(new_block);
new_block->parent = new_block->left = new_block->right = BLOCK_INVALID;
} else {
codeblock_t *old_block = NULL;
uint64_t old_block_cmp = 0;
while (block) {
old_block = block;
old_block_cmp = old_block->_cs | ((uint64_t)old_block->phys << 32);
if (a < old_block_cmp)
block = block->left ? &codeblock[block->left] : NULL;
else
block = block->right ? &codeblock[block->right] : NULL;
}
if (a < old_block_cmp)
old_block->left = get_block_nr(new_block);
else
old_block->right = get_block_nr(new_block);
new_block->parent = get_block_nr(old_block);
new_block->left = new_block->right = BLOCK_INVALID;
}
}
static inline void codeblock_tree_delete(codeblock_t *block) {
uint16_t parent_nr = block->parent;
codeblock_t *parent;
if (block->parent)
parent = &codeblock[block->parent];
else
parent = NULL;
if (!block->left && !block->right) {
/*Easy case - remove from parent*/
if (!parent)
pages[block->phys >> 12].head = BLOCK_INVALID;
else {
uint16_t block_nr = get_block_nr(block);
if (parent->left == block_nr)
parent->left = BLOCK_INVALID;
if (parent->right == block_nr)
parent->right = BLOCK_INVALID;
}
return;
} else if (!block->left) {
/*Only right node*/
if (!parent_nr) {
pages[block->phys >> 12].head = block->right;
codeblock[pages[block->phys >> 12].head].parent = BLOCK_INVALID;
} else {
uint16_t block_nr = get_block_nr(block);
if (parent->left == block_nr) {
parent->left = block->right;
codeblock[parent->left].parent = parent_nr;
}
if (parent->right == block_nr) {
parent->right = block->right;
codeblock[parent->right].parent = parent_nr;
}
}
return;
} else if (!block->right) {
/*Only left node*/
if (!parent_nr) {
pages[block->phys >> 12].head = block->left;
codeblock[pages[block->phys >> 12].head].parent = BLOCK_INVALID;
} else {
uint16_t block_nr = get_block_nr(block);
if (parent->left == block_nr) {
parent->left = block->left;
codeblock[parent->left].parent = parent_nr;
}
if (parent->right == block_nr) {
parent->right = block->left;
codeblock[parent->right].parent = parent_nr;
}
}
return;
} else {
/*Difficult case - node has two children. Walk right child to find lowest node*/
codeblock_t *lowest = &codeblock[block->right], *highest;
codeblock_t *old_parent;
uint16_t lowest_nr;
while (lowest->left)
lowest = &codeblock[lowest->left];
lowest_nr = get_block_nr(lowest);
old_parent = &codeblock[lowest->parent];
/*Replace deleted node with lowest node*/
if (!parent_nr)
pages[block->phys >> 12].head = lowest_nr;
else {
uint16_t block_nr = get_block_nr(block);
if (parent->left == block_nr)
parent->left = lowest_nr;
if (parent->right == block_nr)
parent->right = lowest_nr;
}
lowest->parent = parent_nr;
lowest->left = block->left;
if (lowest->left)
codeblock[lowest->left].parent = lowest_nr;
old_parent->left = BLOCK_INVALID;
highest = &codeblock[lowest->right];
if (!lowest->right) {
if (lowest_nr != block->right) {
lowest->right = block->right;
codeblock[block->right].parent = lowest_nr;
}
return;
}
while (highest->right)
highest = &codeblock[highest->right];
if (block->right && block->right != lowest_nr) {
highest->right = block->right;
codeblock[block->right].parent = get_block_nr(highest);
}
}
}
#define PAGE_MASK_MASK 63
#define PAGE_MASK_SHIFT 6
void codegen_mark_code_present_multibyte(codeblock_t *block, uint32_t start_pc, int len);
static inline void codegen_mark_code_present(codeblock_t *block, uint32_t start_pc, int len) {
if (len == 1) {
if (block->flags & CODEBLOCK_BYTE_MASK) {
if (!((start_pc ^ block->pc) & ~0x3f)) /*Starts in second page*/
block->page_mask |= ((uint64_t)1 << (start_pc & PAGE_MASK_MASK));
else
block->page_mask2 |= ((uint64_t)1 << (start_pc & PAGE_MASK_MASK));
} else {
if (!((start_pc ^ block->pc) & ~0xfff)) /*Starts in second page*/
block->page_mask |= ((uint64_t)1 << ((start_pc >> PAGE_MASK_SHIFT) & PAGE_MASK_MASK));
else
block->page_mask2 |= ((uint64_t)1 << ((start_pc >> PAGE_MASK_SHIFT) & PAGE_MASK_MASK));
}
} else
codegen_mark_code_present_multibyte(block, start_pc, len);
}
void codegen_init();
void codegen_close();
void codegen_reset();
void codegen_block_init(uint32_t phys_addr);
void codegen_block_remove();
void codegen_block_start_recompile(codeblock_t *block);
void codegen_block_end_recompile(codeblock_t *block);
void codegen_block_end();
void codegen_delete_block(codeblock_t *block);
void codegen_generate_call(uint8_t opcode, OpFn op, uint32_t fetchdat, uint32_t new_pc, uint32_t old_pc);
void codegen_generate_seg_restore();
void codegen_set_op32();
void codegen_flush();
void codegen_check_flush(struct page_t *page, uint64_t mask, uint32_t phys_addr);
struct ir_data_t;
x86seg *codegen_generate_ea(struct ir_data_t *ir, x86seg *op_ea_seg, uint32_t fetchdat, int op_ssegs, uint32_t *op_pc, uint32_t op_32, int stack_offset);
void codegen_check_seg_read(codeblock_t *block, struct ir_data_t *ir, x86seg *seg);
void codegen_check_seg_write(codeblock_t *block, struct ir_data_t *ir, x86seg *seg);
int codegen_purge_purgable_list();
/*Delete a random code block to free memory. This is obviously quite expensive, and
will only be called when the allocator is out of memory*/
void codegen_delete_random_block(int required_mem_block);
extern int cpu_block_end;
extern uint32_t codegen_endpc;
extern int cpu_recomp_blocks, cpu_recomp_full_ins, cpu_new_blocks;
extern int cpu_recomp_blocks_latched, cpu_recomp_ins_latched, cpu_recomp_full_ins_latched, cpu_new_blocks_latched;
extern int cpu_recomp_flushes, cpu_recomp_flushes_latched;
extern int cpu_recomp_evicted, cpu_recomp_evicted_latched;
extern int cpu_recomp_reuse, cpu_recomp_reuse_latched;
extern int cpu_recomp_removed, cpu_recomp_removed_latched;
extern int cpu_reps, cpu_reps_latched;
extern int cpu_notreps, cpu_notreps_latched;
extern int codegen_block_cycles;
extern void (*codegen_timing_start)();
extern void (*codegen_timing_prefix)(uint8_t prefix, uint32_t fetchdat);
extern void (*codegen_timing_opcode)(uint8_t opcode, uint32_t fetchdat, int op_32, uint32_t op_pc);
extern void (*codegen_timing_block_start)();
extern void (*codegen_timing_block_end)();
extern int (*codegen_timing_jump_cycles)();
typedef struct codegen_timing_t {
void (*start)();
void (*prefix)(uint8_t prefix, uint32_t fetchdat);
void (*opcode)(uint8_t opcode, uint32_t fetchdat, int op_32, uint32_t op_pc);
void (*block_start)();
void (*block_end)();
int (*jump_cycles)();
} codegen_timing_t;
extern codegen_timing_t codegen_timing_pentium;
extern codegen_timing_t codegen_timing_686;
extern codegen_timing_t codegen_timing_486;
extern codegen_timing_t codegen_timing_winchip;
extern codegen_timing_t codegen_timing_winchip2;
extern codegen_timing_t codegen_timing_cyrixiii;
extern codegen_timing_t codegen_timing_k6;
extern codegen_timing_t codegen_timing_p6;
void codegen_timing_set(codegen_timing_t *timing);
extern int block_current;
extern int block_pos;
#define CPU_BLOCK_END() cpu_block_end = 1
/*Current physical page of block being recompiled. -1 if no recompilation taking place */
extern uint32_t recomp_page;
extern x86seg *op_ea_seg;
extern int op_ssegs;
extern uint32_t op_old_pc;
/*Set to 1 if flags have been changed in the block being recompiled, and hence
flags_op is known and can be relied on */
extern int codegen_flags_changed;
extern int codegen_fpu_entered;
extern int codegen_mmx_entered;
extern int codegen_fpu_loaded_iq[8];
extern int codegen_reg_loaded[8];
extern int codegen_in_recompile;
void codegen_generate_reset();
int codegen_get_instruction_uop(codeblock_t *block, uint32_t pc, int *first_instruction, int *TOP);
void codegen_set_loop_start(struct ir_data_t *ir, int first_instruction);
#ifdef DEBUG_EXTRA
extern uint32_t instr_counts[256 * 256];
#endif
#endif