2025-01-22 16:18:30 +01:00

4424 lines
115 KiB
C

/*
* Copyright (C)2015-2016 Haxe Foundation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#ifdef _MSC_VER
#pragma warning(disable:4820)
#endif
#include <math.h>
#include <hlmodule.h>
#ifdef __arm__
# error "JIT does not support ARM processors, only x86 and x86-64 are supported, please use HashLink/C native compilation instead"
#endif
#ifdef HL_DEBUG
# define JIT_DEBUG
#endif
typedef enum {
Eax = 0,
Ecx = 1,
Edx = 2,
Ebx = 3,
Esp = 4,
Ebp = 5,
Esi = 6,
Edi = 7,
#ifdef HL_64
R8 = 8,
R9 = 9,
R10 = 10,
R11 = 11,
R12 = 12,
R13 = 13,
R14 = 14,
R15 = 15,
#endif
_LAST = 0xFF
} CpuReg;
typedef enum {
MOV,
LEA,
PUSH,
ADD,
SUB,
IMUL, // only overflow flag changes compared to MUL
DIV,
IDIV,
CDQ,
CDQE,
POP,
RET,
CALL,
AND,
OR,
XOR,
CMP,
TEST,
NOP,
SHL,
SHR,
SAR,
INC,
DEC,
JMP,
// FPU
FSTP,
FSTP32,
FLD,
FLD32,
FLDCW,
// SSE
MOVSD,
MOVSS,
COMISD,
COMISS,
ADDSD,
SUBSD,
MULSD,
DIVSD,
ADDSS,
SUBSS,
MULSS,
DIVSS,
XORPD,
CVTSI2SD,
CVTSI2SS,
CVTSD2SI,
CVTSD2SS,
CVTSS2SD,
CVTSS2SI,
STMXCSR,
LDMXCSR,
// 8-16 bits
MOV8,
CMP8,
TEST8,
PUSH8,
MOV16,
CMP16,
TEST16,
// --
_CPU_LAST
} CpuOp;
#define JAlways 0
#define JOverflow 0x80
#define JULt 0x82
#define JUGte 0x83
#define JEq 0x84
#define JNeq 0x85
#define JULte 0x86
#define JUGt 0x87
#define JParity 0x8A
#define JNParity 0x8B
#define JSLt 0x8C
#define JSGte 0x8D
#define JSLte 0x8E
#define JSGt 0x8F
#define JCarry JLt
#define JZero JEq
#define JNotZero JNeq
#define B(bv) *ctx->buf.b++ = (unsigned char)(bv)
#define W(wv) *ctx->buf.w++ = wv
#ifdef HL_64
# define W64(wv) *ctx->buf.w64++ = wv
#else
# define W64(wv) W(wv)
#endif
static const int SIB_MULT[] = {-1, 0, 1, -1, 2, -1, -1, -1, 3};
#define MOD_RM(mod,reg,rm) B(((mod) << 6) | (((reg)&7) << 3) | ((rm)&7))
#define SIB(mult,rmult,rbase) B((SIB_MULT[mult]<<6) | (((rmult)&7)<<3) | ((rbase)&7))
#define IS_SBYTE(c) ( (c) >= -128 && (c) < 128 )
#define AddJump(how,local) { if( (how) == JAlways ) { B(0xE9); } else { B(0x0F); B(how); }; local = BUF_POS(); W(0); }
#define AddJump_small(how,local) { if( (how) == JAlways ) { B(0xEB); } else B(how - 0x10); local = BUF_POS() | 0x40000000; B(0); }
#define XJump(how,local) AddJump(how,local)
#define XJump_small(how,local) AddJump_small(how,local)
#define MAX_OP_SIZE 256
#define BUF_POS() ((int)(ctx->buf.b - ctx->startBuf))
#define RTYPE(r) r->t->kind
#ifdef HL_64
# define RESERVE_ADDRESS 0x8000000000000000
#else
# define RESERVE_ADDRESS 0x80000000
#endif
#if defined(HL_WIN_CALL) && defined(HL_64)
# define IS_WINCALL64 1
#else
# define IS_WINCALL64 0
#endif
typedef struct jlist jlist;
struct jlist {
int pos;
int target;
jlist *next;
};
typedef struct vreg vreg;
typedef enum {
RCPU = 0,
RFPU = 1,
RSTACK = 2,
RCONST = 3,
RADDR = 4,
RMEM = 5,
RUNUSED = 6,
RCPU_CALL = 1 | 8,
RCPU_8BITS = 1 | 16
} preg_kind;
typedef struct {
preg_kind kind;
int id;
int lock;
vreg *holds;
} preg;
struct vreg {
int stackPos;
int size;
hl_type *t;
preg *current;
preg stack;
};
#define REG_AT(i) (ctx->pregs + (i))
#ifdef HL_64
# define RCPU_COUNT 16
# define RFPU_COUNT 16
# ifdef HL_WIN_CALL
# define CALL_NREGS 4
# define RCPU_SCRATCH_COUNT 7
# define RFPU_SCRATCH_COUNT 6
static const int RCPU_SCRATCH_REGS[] = { Eax, Ecx, Edx, R8, R9, R10, R11 };
static const CpuReg CALL_REGS[] = { Ecx, Edx, R8, R9 };
# else
# define CALL_NREGS 6 // TODO : XMM6+XMM7 are FPU reg parameters
# define RCPU_SCRATCH_COUNT 9
# define RFPU_SCRATCH_COUNT 16
static const int RCPU_SCRATCH_REGS[] = { Eax, Ecx, Edx, Esi, Edi, R8, R9, R10, R11 };
static const CpuReg CALL_REGS[] = { Edi, Esi, Edx, Ecx, R8, R9 };
# endif
#else
# define CALL_NREGS 0
# define RCPU_COUNT 8
# define RFPU_COUNT 8
# define RCPU_SCRATCH_COUNT 3
# define RFPU_SCRATCH_COUNT 8
static const int RCPU_SCRATCH_REGS[] = { Eax, Ecx, Edx };
#endif
#define XMM(i) ((i) + RCPU_COUNT)
#define PXMM(i) REG_AT(XMM(i))
#define REG_IS_FPU(i) ((i) >= RCPU_COUNT)
#define PEAX REG_AT(Eax)
#define PESP REG_AT(Esp)
#define PEBP REG_AT(Ebp)
#define REG_COUNT (RCPU_COUNT + RFPU_COUNT)
#define ID2(a,b) ((a) | ((b)<<8))
#define R(id) (ctx->vregs + (id))
#define ASSERT(i) { printf("JIT ERROR %d (jit.c line %d)\n",i,(int)__LINE__); jit_exit(); }
#define IS_FLOAT(r) ((r)->t->kind == HF64 || (r)->t->kind == HF32)
#define RLOCK(r) if( (r)->lock < ctx->currentPos ) (r)->lock = ctx->currentPos
#define RUNLOCK(r) if( (r)->lock == ctx->currentPos ) (r)->lock = 0
#define BREAK() B(0xCC)
#if defined(HL_64) && defined(HL_VCC)
# define JIT_CUSTOM_LONGJUMP
#endif
static preg _unused = { RUNUSED, 0, 0, NULL };
static preg *UNUSED = &_unused;
struct jit_ctx {
union {
unsigned char *b;
unsigned int *w;
unsigned long long *w64;
int *i;
double *d;
} buf;
vreg *vregs;
preg pregs[REG_COUNT];
vreg *savedRegs[REG_COUNT];
int savedLocks[REG_COUNT];
int *opsPos;
int maxRegs;
int maxOps;
int bufSize;
int totalRegsSize;
int functionPos;
int allocOffset;
int currentPos;
int nativeArgsCount;
unsigned char *startBuf;
hl_module *m;
hl_function *f;
jlist *jumps;
jlist *calls;
jlist *switchs;
hl_alloc falloc; // cleared per-function
hl_alloc galloc;
vclosure *closure_list;
hl_debug_infos *debug;
int c2hl;
int hl2c;
int longjump;
void *static_functions[8];
};
#define jit_exit() { hl_debug_break(); exit(-1); }
#define jit_error(msg) _jit_error(ctx,msg,__LINE__)
#ifndef HL_64
# ifdef HL_DEBUG
# define error_i64() jit_error("i64-32")
# else
void error_i64() {
printf("The module you are loading is using 64 bit ints that are not supported by the HL32.\nPlease run using HL64 or compile with -D hl-legacy32");
jit_exit();
}
# endif
#endif
static void _jit_error( jit_ctx *ctx, const char *msg, int line );
static void on_jit_error( const char *msg, int_val line );
static preg *pmem( preg *r, CpuReg reg, int offset ) {
r->kind = RMEM;
r->id = 0 | (reg << 4) | (offset << 8);
return r;
}
static preg *pmem2( preg *r, CpuReg reg, CpuReg reg2, int mult, int offset ) {
r->kind = RMEM;
r->id = mult | (reg << 4) | (reg2 << 8);
r->holds = (void*)(int_val)offset;
return r;
}
#ifdef HL_64
static preg *pcodeaddr( preg *r, int offset ) {
r->kind = RMEM;
r->id = 15 | (offset << 4);
return r;
}
#endif
static preg *pconst( preg *r, int c ) {
r->kind = RCONST;
r->holds = NULL;
r->id = c;
return r;
}
static preg *pconst64( preg *r, int_val c ) {
#ifdef HL_64
if( ((int)c) == c )
return pconst(r,(int)c);
r->kind = RCONST;
r->id = 0xC064C064;
r->holds = (vreg*)c;
return r;
#else
return pconst(r,(int)c);
#endif
}
#ifndef HL_64
// it is not possible to access direct 64 bit address in x86-64
static preg *paddr( preg *r, void *p ) {
r->kind = RADDR;
r->holds = (vreg*)p;
return r;
}
#endif
static void save_regs( jit_ctx *ctx ) {
int i;
for(i=0;i<REG_COUNT;i++) {
ctx->savedRegs[i] = ctx->pregs[i].holds;
ctx->savedLocks[i] = ctx->pregs[i].lock;
}
}
static void restore_regs( jit_ctx *ctx ) {
int i;
for(i=0;i<ctx->maxRegs;i++)
ctx->vregs[i].current = NULL;
for(i=0;i<REG_COUNT;i++) {
vreg *r = ctx->savedRegs[i];
preg *p = ctx->pregs + i;
p->holds = r;
p->lock = ctx->savedLocks[i];
if( r ) r->current = p;
}
}
static void jit_buf( jit_ctx *ctx ) {
if( BUF_POS() > ctx->bufSize - MAX_OP_SIZE ) {
int nsize = ctx->bufSize * 4 / 3;
unsigned char *nbuf;
int curpos;
if( nsize == 0 ) {
int i;
for(i=0;i<ctx->m->code->nfunctions;i++)
nsize += ctx->m->code->functions[i].nops;
nsize *= 4;
}
if( nsize < ctx->bufSize + MAX_OP_SIZE * 4 ) nsize = ctx->bufSize + MAX_OP_SIZE * 4;
curpos = BUF_POS();
nbuf = (unsigned char*)malloc(nsize);
if( nbuf == NULL ) ASSERT(nsize);
if( ctx->startBuf ) {
memcpy(nbuf,ctx->startBuf,curpos);
free(ctx->startBuf);
}
ctx->startBuf = nbuf;
ctx->buf.b = nbuf + curpos;
ctx->bufSize = nsize;
}
}
static const char *KNAMES[] = { "cpu","fpu","stack","const","addr","mem","unused" };
#define ERRIF(c) if( c ) { printf("%s(%s,%s)\n",f?f->name:"???",KNAMES[a->kind], KNAMES[b->kind]); ASSERT(0); }
typedef struct {
const char *name; // single operand
int r_mem; // r32 / r/m32 r32
int mem_r; // r/m32 / r32 r/m32
int r_const; // r32 / imm32 imm32
int r_i8; // r32 / imm8 imm8
int mem_const; // r/m32 / imm32 N/A
} opform;
#define FLAG_LONGOP 0x80000000
#define FLAG_16B 0x40000000
#define FLAG_8B 0x20000000
#define RM(op,id) ((op) | (((id)+1)<<8))
#define GET_RM(op) (((op) >> ((op) < 0 ? 24 : 8)) & 15)
#define SBYTE(op) ((op) << 16)
#define LONG_OP(op) ((op) | FLAG_LONGOP)
#define OP16(op) LONG_OP((op) | FLAG_16B)
#define LONG_RM(op,id) LONG_OP(op | (((id) + 1) << 24))
static opform OP_FORMS[_CPU_LAST] = {
{ "MOV", 0x8B, 0x89, 0xB8, 0, RM(0xC7,0) },
{ "LEA", 0x8D },
{ "PUSH", 0x50, RM(0xFF,6), 0x68, 0x6A },
{ "ADD", 0x03, 0x01, RM(0x81,0), RM(0x83,0) },
{ "SUB", 0x2B, 0x29, RM(0x81,5), RM(0x83,5) },
{ "IMUL", LONG_OP(0x0FAF) },
{ "DIV", RM(0xF7,6), RM(0xF7,6) },
{ "IDIV", RM(0xF7,7), RM(0xF7,7) },
{ "CDQ", 0x99 },
{ "CDQE", 0x98 },
{ "POP", 0x58, RM(0x8F,0) },
{ "RET", 0xC3 },
{ "CALL", RM(0xFF,2), RM(0xFF,2), 0xE8 },
{ "AND", 0x23, 0x21, RM(0x81,4), RM(0x83,4) },
{ "OR", 0x0B, 0x09, RM(0x81,1), RM(0x83,1) },
{ "XOR", 0x33, 0x31, RM(0x81,6), RM(0x83,6) },
{ "CMP", 0x3B, 0x39, RM(0x81,7), RM(0x83,7) },
{ "TEST", 0x85, 0x85/*SWP?*/, RM(0xF7,0) },
{ "NOP", 0x90 },
{ "SHL", RM(0xD3,4), 0, 0, RM(0xC1,4) },
{ "SHR", RM(0xD3,5), 0, 0, RM(0xC1,5) },
{ "SAR", RM(0xD3,7), 0, 0, RM(0xC1,7) },
{ "INC", IS_64 ? RM(0xFF,0) : 0x40, RM(0xFF,0) },
{ "DEC", IS_64 ? RM(0xFF,1) : 0x48, RM(0xFF,1) },
{ "JMP", RM(0xFF,4) },
// FPU
{ "FSTP", 0, RM(0xDD,3) },
{ "FSTP32", 0, RM(0xD9,3) },
{ "FLD", 0, RM(0xDD,0) },
{ "FLD32", 0, RM(0xD9,0) },
{ "FLDCW", 0, RM(0xD9, 5) },
// SSE
{ "MOVSD", 0xF20F10, 0xF20F11 },
{ "MOVSS", 0xF30F10, 0xF30F11 },
{ "COMISD", 0x660F2F },
{ "COMISS", LONG_OP(0x0F2F) },
{ "ADDSD", 0xF20F58 },
{ "SUBSD", 0xF20F5C },
{ "MULSD", 0xF20F59 },
{ "DIVSD", 0xF20F5E },
{ "ADDSS", 0xF30F58 },
{ "SUBSS", 0xF30F5C },
{ "MULSS", 0xF30F59 },
{ "DIVSS", 0xF30F5E },
{ "XORPD", 0x660F57 },
{ "CVTSI2SD", 0xF20F2A },
{ "CVTSI2SS", 0xF30F2A },
{ "CVTSD2SI", 0xF20F2D },
{ "CVTSD2SS", 0xF20F5A },
{ "CVTSS2SD", 0xF30F5A },
{ "CVTSS2SI", 0xF30F2D },
{ "STMXCSR", 0, LONG_RM(0x0FAE,3) },
{ "LDMXCSR", 0, LONG_RM(0x0FAE,2) },
// 8 bits,
{ "MOV8", 0x8A, 0x88, 0, 0xB0, RM(0xC6,0) },
{ "CMP8", 0x3A, 0x38, 0, RM(0x80,7) },
{ "TEST8", 0x84, 0x84, RM(0xF6,0) },
{ "PUSH8", 0, 0, 0x6A | FLAG_8B },
{ "MOV16", OP16(0x8B), OP16(0x89), OP16(0xB8) },
{ "CMP16", OP16(0x3B), OP16(0x39) },
{ "TEST16", OP16(0x85) },
};
#ifdef HL_64
# define REX() if( r64 ) B(r64 | 0x40)
#else
# define REX()
#endif
#define OP(b) \
if( (b) & 0xFF0000 ) { \
B((b)>>16); \
if( r64 ) B(r64 | 0x40); /* also in 32 bits mode */ \
B((b)>>8); \
B(b); \
} else { \
if( (b) & FLAG_16B ) { \
B(0x66); \
REX(); \
} else {\
REX(); \
if( (b) & FLAG_LONGOP ) B((b)>>8); \
}\
B(b); \
}
static bool is_reg8( preg *a ) {
return a->kind == RSTACK || a->kind == RMEM || a->kind == RCONST || (a->kind == RCPU && a->id != Esi && a->id != Edi);
}
static void op( jit_ctx *ctx, CpuOp o, preg *a, preg *b, bool mode64 ) {
opform *f = &OP_FORMS[o];
int r64 = mode64 && (o != PUSH && o != POP && o != CALL && o != PUSH8) ? 8 : 0;
switch( o ) {
case CMP8:
case TEST8:
case MOV8:
if( !is_reg8(a) || !is_reg8(b) )
ASSERT(0);
break;
default:
break;
}
switch( ID2(a->kind,b->kind) ) {
case ID2(RUNUSED,RUNUSED):
ERRIF(f->r_mem == 0);
OP(f->r_mem);
break;
case ID2(RCPU,RCPU):
case ID2(RFPU,RFPU):
ERRIF( f->r_mem == 0 );
if( a->id > 7 ) r64 |= 4;
if( b->id > 7 ) r64 |= 1;
OP(f->r_mem);
MOD_RM(3,a->id,b->id);
break;
case ID2(RCPU,RFPU):
case ID2(RFPU,RCPU):
ERRIF( (f->r_mem>>16) == 0 );
if( a->id > 7 ) r64 |= 4;
if( b->id > 7 ) r64 |= 1;
OP(f->r_mem);
MOD_RM(3,a->id,b->id);
break;
case ID2(RCPU,RUNUSED):
ERRIF( f->r_mem == 0 );
if( a->id > 7 ) r64 |= 1;
if( GET_RM(f->r_mem) > 0 ) {
OP(f->r_mem);
MOD_RM(3, GET_RM(f->r_mem)-1, a->id);
} else
OP(f->r_mem + (a->id&7));
break;
case ID2(RSTACK,RUNUSED):
ERRIF( f->mem_r == 0 || GET_RM(f->mem_r) == 0 );
{
int stackPos = R(a->id)->stackPos;
OP(f->mem_r);
if( IS_SBYTE(stackPos) ) {
MOD_RM(1,GET_RM(f->mem_r)-1,Ebp);
B(stackPos);
} else {
MOD_RM(2,GET_RM(f->mem_r)-1,Ebp);
W(stackPos);
}
}
break;
case ID2(RCPU,RCONST):
ERRIF( f->r_const == 0 && f->r_i8 == 0 );
if( a->id > 7 ) r64 |= 1;
{
int_val cval = b->holds ? (int_val)b->holds : b->id;
// short byte form
if( f->r_i8 && IS_SBYTE(cval) ) {
OP(f->r_i8);
MOD_RM(3,GET_RM(f->r_i8)-1,a->id);
B((int)cval);
} else if( GET_RM(f->r_const) > 0 ) {
OP(f->r_const&0xFF);
MOD_RM(3,GET_RM(f->r_const)-1,a->id);
if( mode64 && IS_64 && o == MOV ) W64(cval); else W((int)cval);
} else {
ERRIF( f->r_const == 0);
OP((f->r_const&0xFF) + (a->id&7));
if( mode64 && IS_64 && o == MOV ) W64(cval); else W((int)cval);
}
}
break;
case ID2(RSTACK,RCPU):
case ID2(RSTACK,RFPU):
ERRIF( f->mem_r == 0 );
if( b->id > 7 ) r64 |= 4;
{
int stackPos = R(a->id)->stackPos;
OP(f->mem_r);
if( IS_SBYTE(stackPos) ) {
MOD_RM(1,b->id,Ebp);
B(stackPos);
} else {
MOD_RM(2,b->id,Ebp);
W(stackPos);
}
}
break;
case ID2(RCPU,RSTACK):
case ID2(RFPU,RSTACK):
ERRIF( f->r_mem == 0 );
if( a->id > 7 ) r64 |= 4;
{
int stackPos = R(b->id)->stackPos;
OP(f->r_mem);
if( IS_SBYTE(stackPos) ) {
MOD_RM(1,a->id,Ebp);
B(stackPos);
} else {
MOD_RM(2,a->id,Ebp);
W(stackPos);
}
}
break;
case ID2(RCONST,RUNUSED):
ERRIF( f->r_const == 0 );
{
int_val cval = a->holds ? (int_val)a->holds : a->id;
OP(f->r_const);
if( f->r_const & FLAG_8B ) B((int)cval); else W((int)cval);
}
break;
case ID2(RMEM,RUNUSED):
ERRIF( f->mem_r == 0 );
{
int mult = a->id & 0xF;
int regOrOffs = mult == 15 ? a->id >> 4 : a->id >> 8;
CpuReg reg = (a->id >> 4) & 0xF;
if( mult == 15 ) {
ERRIF(1);
} else if( mult == 0 ) {
if( reg > 7 ) r64 |= 1;
OP(f->mem_r);
if( regOrOffs == 0 && (reg&7) != Ebp ) {
MOD_RM(0,GET_RM(f->mem_r)-1,reg);
if( (reg&7) == Esp ) B(0x24);
} else if( IS_SBYTE(regOrOffs) ) {
MOD_RM(1,GET_RM(f->mem_r)-1,reg);
if( (reg&7) == Esp ) B(0x24);
B(regOrOffs);
} else {
MOD_RM(2,GET_RM(f->mem_r)-1,reg);
if( (reg&7) == Esp ) B(0x24);
W(regOrOffs);
}
} else {
// [eax + ebx * M]
ERRIF(1);
}
}
break;
case ID2(RCPU, RMEM):
case ID2(RFPU, RMEM):
ERRIF( f->r_mem == 0 );
{
int mult = b->id & 0xF;
int regOrOffs = mult == 15 ? b->id >> 4 : b->id >> 8;
CpuReg reg = (b->id >> 4) & 0xF;
if( mult == 15 ) {
int pos;
if( a->id > 7 ) r64 |= 4;
OP(f->r_mem);
MOD_RM(0,a->id,5);
if( IS_64 ) {
// offset wrt current code
pos = BUF_POS() + 4;
W(regOrOffs - pos);
} else {
ERRIF(1);
}
} else if( mult == 0 ) {
if( a->id > 7 ) r64 |= 4;
if( reg > 7 ) r64 |= 1;
OP(f->r_mem);
if( regOrOffs == 0 && (reg&7) != Ebp ) {
MOD_RM(0,a->id,reg);
if( (reg&7) == Esp ) B(0x24);
} else if( IS_SBYTE(regOrOffs) ) {
MOD_RM(1,a->id,reg);
if( (reg&7) == Esp ) B(0x24);
B(regOrOffs);
} else {
MOD_RM(2,a->id,reg);
if( (reg&7) == Esp ) B(0x24);
W(regOrOffs);
}
} else {
int offset = (int)(int_val)b->holds;
if( a->id > 7 ) r64 |= 4;
if( reg > 7 ) r64 |= 1;
if( regOrOffs > 7 ) r64 |= 2;
OP(f->r_mem);
MOD_RM(offset == 0 ? 0 : IS_SBYTE(offset) ? 1 : 2,a->id,4);
SIB(mult,regOrOffs,reg);
if( offset ) {
if( IS_SBYTE(offset) ) B(offset); else W(offset);
}
}
}
break;
# ifndef HL_64
case ID2(RFPU,RADDR):
# endif
case ID2(RCPU,RADDR):
ERRIF( f->r_mem == 0 );
if( a->id > 7 ) r64 |= 4;
OP(f->r_mem);
MOD_RM(0,a->id,5);
if( IS_64 )
W64((int_val)b->holds);
else
W((int)(int_val)b->holds);
break;
# ifndef HL_64
case ID2(RADDR,RFPU):
# endif
case ID2(RADDR,RCPU):
ERRIF( f->mem_r == 0 );
if( b->id > 7 ) r64 |= 4;
OP(f->mem_r);
MOD_RM(0,b->id,5);
if( IS_64 )
W64((int_val)a->holds);
else
W((int)(int_val)a->holds);
break;
case ID2(RMEM, RCPU):
case ID2(RMEM, RFPU):
ERRIF( f->mem_r == 0 );
{
int mult = a->id & 0xF;
int regOrOffs = mult == 15 ? a->id >> 4 : a->id >> 8;
CpuReg reg = (a->id >> 4) & 0xF;
if( mult == 15 ) {
int pos;
if( b->id > 7 ) r64 |= 4;
OP(f->mem_r);
MOD_RM(0,b->id,5);
if( IS_64 ) {
// offset wrt current code
pos = BUF_POS() + 4;
W(regOrOffs - pos);
} else {
ERRIF(1);
}
} else if( mult == 0 ) {
if( b->id > 7 ) r64 |= 4;
if( reg > 7 ) r64 |= 1;
OP(f->mem_r);
if( regOrOffs == 0 && (reg&7) != Ebp ) {
MOD_RM(0,b->id,reg);
if( (reg&7) == Esp ) B(0x24);
} else if( IS_SBYTE(regOrOffs) ) {
MOD_RM(1,b->id,reg);
if( (reg&7) == Esp ) B(0x24);
B(regOrOffs);
} else {
MOD_RM(2,b->id,reg);
if( (reg&7) == Esp ) B(0x24);
W(regOrOffs);
}
} else {
int offset = (int)(int_val)a->holds;
if( b->id > 7 ) r64 |= 4;
if( reg > 7 ) r64 |= 1;
if( regOrOffs > 7 ) r64 |= 2;
OP(f->mem_r);
MOD_RM(offset == 0 ? 0 : IS_SBYTE(offset) ? 1 : 2,b->id,4);
SIB(mult,regOrOffs,reg);
if( offset ) {
if( IS_SBYTE(offset) ) B(offset); else W(offset);
}
}
}
break;
default:
ERRIF(1);
}
if( ctx->debug && ctx->f && o == CALL ) {
preg p;
op(ctx,MOV,pmem(&p,Esp,-HL_WSIZE),PEBP,true); // erase EIP (clean stack report)
}
}
static void op32( jit_ctx *ctx, CpuOp o, preg *a, preg *b ) {
op(ctx,o,a,b,false);
}
static void op64( jit_ctx *ctx, CpuOp o, preg *a, preg *b ) {
#ifndef HL_64
op(ctx,o,a,b,false);
#else
op(ctx,o,a,b,true);
#endif
}
static void patch_jump( jit_ctx *ctx, int p ) {
if( p == 0 ) return;
if( p & 0x40000000 ) {
int d;
p &= 0x3FFFFFFF;
d = BUF_POS() - (p + 1);
if( d < -128 || d >= 128 ) ASSERT(d);
*(char*)(ctx->startBuf + p) = (char)d;
} else {
*(int*)(ctx->startBuf + p) = BUF_POS() - (p + 4);
}
}
static void patch_jump_to( jit_ctx *ctx, int p, int target ) {
if( p == 0 ) return;
if( p & 0x40000000 ) {
int d;
p &= 0x3FFFFFFF;
d = target - (p + 1);
if( d < -128 || d >= 128 ) ASSERT(d);
*(char*)(ctx->startBuf + p) = (char)d;
} else {
*(int*)(ctx->startBuf + p) = target - (p + 4);
}
}
static int stack_size( hl_type *t ) {
switch( t->kind ) {
case HUI8:
case HUI16:
case HBOOL:
# ifdef HL_64
case HI32:
case HF32:
# endif
return sizeof(int_val);
case HI64:
default:
return hl_type_size(t);
}
}
static int call_reg_index( int reg ) {
# ifdef HL_64
int i;
for(i=0;i<CALL_NREGS;i++)
if( CALL_REGS[i] == reg )
return i;
# endif
return -1;
}
static bool is_call_reg( preg *p ) {
# ifdef HL_64
int i;
if( p->kind == RFPU )
return p->id < CALL_NREGS;
for(i=0;i<CALL_NREGS;i++)
if( p->kind == RCPU && p->id == CALL_REGS[i] )
return true;
return false;
# else
return false;
# endif
}
static preg *alloc_reg( jit_ctx *ctx, preg_kind k ) {
int i;
preg *p;
switch( k ) {
case RCPU:
case RCPU_CALL:
case RCPU_8BITS:
{
int off = ctx->allocOffset++;
const int count = RCPU_SCRATCH_COUNT;
for(i=0;i<count;i++) {
int r = RCPU_SCRATCH_REGS[(i + off)%count];
p = ctx->pregs + r;
if( p->lock >= ctx->currentPos ) continue;
if( k == RCPU_CALL && is_call_reg(p) ) continue;
if( k == RCPU_8BITS && !is_reg8(p) ) continue;
if( p->holds == NULL ) {
RLOCK(p);
return p;
}
}
for(i=0;i<count;i++) {
preg *p = ctx->pregs + RCPU_SCRATCH_REGS[(i + off)%count];
if( p->lock >= ctx->currentPos ) continue;
if( k == RCPU_CALL && is_call_reg(p) ) continue;
if( k == RCPU_8BITS && !is_reg8(p) ) continue;
if( p->holds ) {
RLOCK(p);
p->holds->current = NULL;
p->holds = NULL;
return p;
}
}
}
break;
case RFPU:
{
int off = ctx->allocOffset++;
const int count = RFPU_SCRATCH_COUNT;
for(i=0;i<count;i++) {
preg *p = PXMM((i + off)%count);
if( p->lock >= ctx->currentPos ) continue;
if( p->holds == NULL ) {
RLOCK(p);
return p;
}
}
for(i=0;i<count;i++) {
preg *p = PXMM((i + off)%count);
if( p->lock >= ctx->currentPos ) continue;
if( p->holds ) {
RLOCK(p);
p->holds->current = NULL;
p->holds = NULL;
return p;
}
}
}
break;
default:
ASSERT(k);
}
ASSERT(0); // out of registers !
return NULL;
}
static preg *fetch( vreg *r ) {
if( r->current )
return r->current;
return &r->stack;
}
static void scratch( preg *r ) {
if( r && r->holds ) {
r->holds->current = NULL;
r->holds = NULL;
r->lock = 0;
}
}
static preg *copy( jit_ctx *ctx, preg *to, preg *from, int size );
static void load( jit_ctx *ctx, preg *r, vreg *v ) {
preg *from = fetch(v);
if( from == r || v->size == 0 ) return;
if( r->holds ) r->holds->current = NULL;
if( v->current ) {
v->current->holds = NULL;
from = r;
}
r->holds = v;
v->current = r;
copy(ctx,r,from,v->size);
}
static preg *alloc_fpu( jit_ctx *ctx, vreg *r, bool andLoad ) {
preg *p = fetch(r);
if( p->kind != RFPU ) {
if( !IS_FLOAT(r) && (IS_64 || r->t->kind != HI64) ) ASSERT(r->t->kind);
p = alloc_reg(ctx, RFPU);
if( andLoad )
load(ctx,p,r);
else {
if( r->current )
r->current->holds = NULL;
r->current = p;
p->holds = r;
}
} else
RLOCK(p);
return p;
}
static void reg_bind( vreg *r, preg *p ) {
if( r->current )
r->current->holds = NULL;
r->current = p;
p->holds = r;
}
static preg *alloc_cpu( jit_ctx *ctx, vreg *r, bool andLoad ) {
preg *p = fetch(r);
if( p->kind != RCPU ) {
# ifndef HL_64
if( r->t->kind == HI64 ) return alloc_fpu(ctx,r,andLoad);
if( r->size > 4 ) ASSERT(r->size);
# endif
p = alloc_reg(ctx, RCPU);
if( andLoad )
load(ctx,p,r);
else
reg_bind(r,p);
} else
RLOCK(p);
return p;
}
// allocate a register that is not a call parameter
static preg *alloc_cpu_call( jit_ctx *ctx, vreg *r ) {
preg *p = fetch(r);
if( p->kind != RCPU ) {
# ifndef HL_64
if( r->t->kind == HI64 ) return alloc_fpu(ctx,r,true);
if( r->size > 4 ) ASSERT(r->size);
# endif
p = alloc_reg(ctx, RCPU_CALL);
load(ctx,p,r);
} else if( is_call_reg(p) ) {
preg *p2 = alloc_reg(ctx, RCPU_CALL);
op64(ctx,MOV,p2,p);
scratch(p);
reg_bind(r,p2);
return p2;
} else
RLOCK(p);
return p;
}
static preg *fetch32( jit_ctx *ctx, vreg *r ) {
if( r->current )
return r->current;
// make sure that the register is correctly erased
if( r->size < 4 ) {
preg *p = alloc_cpu(ctx, r, true);
RUNLOCK(p);
return p;
}
return fetch(r);
}
// make sure higher bits are zeroes
static preg *alloc_cpu64( jit_ctx *ctx, vreg *r, bool andLoad ) {
# ifndef HL_64
return alloc_cpu(ctx,r,andLoad);
# else
preg *p = fetch(r);
if( !andLoad ) ASSERT(0);
if( p->kind != RCPU ) {
p = alloc_reg(ctx, RCPU);
op64(ctx,XOR,p,p);
load(ctx,p,r);
} else {
// remove higher bits
preg tmp;
op64(ctx,SHL,p,pconst(&tmp,32));
op64(ctx,SHR,p,pconst(&tmp,32));
RLOCK(p);
}
return p;
# endif
}
// make sure the register can be used with 8 bits access
static preg *alloc_cpu8( jit_ctx *ctx, vreg *r, bool andLoad ) {
preg *p = fetch(r);
if( p->kind != RCPU ) {
p = alloc_reg(ctx, RCPU_8BITS);
load(ctx,p,r);
} else if( !is_reg8(p) ) {
preg *p2 = alloc_reg(ctx, RCPU_8BITS);
op64(ctx,MOV,p2,p);
scratch(p);
reg_bind(r,p2);
return p2;
} else
RLOCK(p);
return p;
}
static preg *copy( jit_ctx *ctx, preg *to, preg *from, int size ) {
if( size == 0 || to == from ) return to;
switch( ID2(to->kind,from->kind) ) {
case ID2(RMEM,RCPU):
case ID2(RSTACK,RCPU):
case ID2(RCPU,RSTACK):
case ID2(RCPU,RMEM):
case ID2(RCPU,RCPU):
# ifndef HL_64
case ID2(RCPU,RADDR):
case ID2(RADDR,RCPU):
# endif
switch( size ) {
case 1:
if( to->kind == RCPU ) {
op64(ctx,XOR,to,to);
if( !is_reg8(to) ) {
preg p;
op32(ctx,MOV16,to,from);
op32(ctx,SHL,to,pconst(&p,24));
op32(ctx,SHR,to,pconst(&p,24));
break;
}
} else if( !is_reg8(from) ) {
preg *r = alloc_reg(ctx, RCPU_CALL);
op32(ctx, MOV, r, from);
RUNLOCK(r);
op32(ctx,MOV8,to,r);
return from;
}
op32(ctx,MOV8,to,from);
break;
case 2:
if( to->kind == RCPU )
op64(ctx,XOR,to,to);
op32(ctx,MOV16,to,from);
break;
case 4:
op32(ctx,MOV,to,from);
break;
case 8:
if( IS_64 ) {
op64(ctx,MOV,to,from);
break;
}
default:
ASSERT(size);
}
return to->kind == RCPU ? to : from;
case ID2(RFPU,RFPU):
case ID2(RMEM,RFPU):
case ID2(RSTACK,RFPU):
case ID2(RFPU,RMEM):
case ID2(RFPU,RSTACK):
switch( size ) {
case 8:
op64(ctx,MOVSD,to,from);
break;
case 4:
op32(ctx,MOVSS,to,from);
break;
default:
ASSERT(size);
}
return to->kind == RFPU ? to : from;
case ID2(RMEM,RSTACK):
{
vreg *rfrom = R(from->id);
if( IS_FLOAT(rfrom) )
return copy(ctx,to,alloc_fpu(ctx,rfrom,true),size);
return copy(ctx,to,alloc_cpu(ctx,rfrom,true),size);
}
case ID2(RMEM,RMEM):
case ID2(RSTACK,RMEM):
case ID2(RSTACK,RSTACK):
# ifndef HL_64
case ID2(RMEM,RADDR):
case ID2(RSTACK,RADDR):
case ID2(RADDR,RSTACK):
# endif
{
preg *tmp;
if( (!IS_64 && size == 8) || (to->kind == RSTACK && IS_FLOAT(R(to->id))) || (from->kind == RSTACK && IS_FLOAT(R(from->id))) ) {
tmp = alloc_reg(ctx, RFPU);
op64(ctx,size == 8 ? MOVSD : MOVSS,tmp,from);
} else {
tmp = alloc_reg(ctx, RCPU);
copy(ctx,tmp,from,size);
}
return copy(ctx,to,tmp,size);
}
# ifdef HL_64
case ID2(RCPU,RADDR):
case ID2(RMEM,RADDR):
case ID2(RSTACK,RADDR):
{
preg p;
preg *tmp = alloc_reg(ctx, RCPU);
op64(ctx,MOV,tmp,pconst64(&p,(int_val)from->holds));
return copy(ctx,to,pmem(&p,tmp->id,0),size);
}
case ID2(RADDR,RCPU):
case ID2(RADDR,RMEM):
case ID2(RADDR,RSTACK):
{
preg p;
preg *tmp = alloc_reg(ctx, RCPU);
op64(ctx,MOV,tmp,pconst64(&p,(int_val)to->holds));
return copy(ctx,pmem(&p,tmp->id,0),from,size);
}
# endif
default:
break;
}
printf("copy(%s,%s)\n",KNAMES[to->kind], KNAMES[from->kind]);
ASSERT(0);
return NULL;
}
static void store( jit_ctx *ctx, vreg *r, preg *v, bool bind ) {
if( r->current && r->current != v ) {
r->current->holds = NULL;
r->current = NULL;
}
v = copy(ctx,&r->stack,v,r->size);
if( IS_FLOAT(r) != (v->kind == RFPU) )
ASSERT(0);
if( bind && r->current != v && (v->kind == RCPU || v->kind == RFPU) ) {
scratch(v);
r->current = v;
v->holds = r;
}
}
static void store_result( jit_ctx *ctx, vreg *r ) {
# ifndef HL_64
switch( r->t->kind ) {
case HF64:
scratch(r->current);
op64(ctx,FSTP,&r->stack,UNUSED);
break;
case HF32:
scratch(r->current);
op64(ctx,FSTP32,&r->stack,UNUSED);
break;
case HI64:
scratch(r->current);
error_i64();
break;
default:
# endif
store(ctx,r,IS_FLOAT(r) ? REG_AT(XMM(0)) : PEAX,true);
# ifndef HL_64
break;
}
# endif
}
static void op_mov( jit_ctx *ctx, vreg *to, vreg *from ) {
preg *r = fetch(from);
# ifndef HL_64
if( to->t->kind == HI64 ) {
error_i64();
return;
}
# endif
if( from->t->kind == HF32 && r->kind != RFPU )
r = alloc_fpu(ctx,from,true);
store(ctx, to, r, true);
}
static void copy_to( jit_ctx *ctx, vreg *to, preg *from ) {
store(ctx,to,from,true);
}
static void copy_from( jit_ctx *ctx, preg *to, vreg *from ) {
copy(ctx,to,fetch(from),from->size);
}
static void store_const( jit_ctx *ctx, vreg *r, int c ) {
preg p;
if( c == 0 )
op(ctx,XOR,alloc_cpu(ctx,r,false),alloc_cpu(ctx,r,false),r->size == 8);
else if( r->size == 8 )
op64(ctx,MOV,alloc_cpu(ctx,r,false),pconst64(&p,c));
else
op32(ctx,MOV,alloc_cpu(ctx,r,false),pconst(&p,c));
store(ctx,r,r->current,false);
}
static void discard_regs( jit_ctx *ctx, bool native_call ) {
int i;
for(i=0;i<RCPU_SCRATCH_COUNT;i++) {
preg *r = ctx->pregs + RCPU_SCRATCH_REGS[i];
if( r->holds ) {
r->holds->current = NULL;
r->holds = NULL;
}
}
for(i=0;i<RFPU_COUNT;i++) {
preg *r = ctx->pregs + XMM(i);
if( r->holds ) {
r->holds->current = NULL;
r->holds = NULL;
}
}
}
static int pad_before_call( jit_ctx *ctx, int size ) {
int total = size + ctx->totalRegsSize + HL_WSIZE * 2; // EIP+EBP
if( total & 15 ) {
int pad = 16 - (total & 15);
preg p;
if( pad ) op64(ctx,SUB,PESP,pconst(&p,pad));
size += pad;
}
return size;
}
static void push_reg( jit_ctx *ctx, vreg *r ) {
preg p;
switch( stack_size(r->t) ) {
case 1:
op64(ctx,SUB,PESP,pconst(&p,1));
op32(ctx,MOV8,pmem(&p,Esp,0),alloc_cpu8(ctx,r,true));
break;
case 2:
op64(ctx,SUB,PESP,pconst(&p,2));
op32(ctx,MOV16,pmem(&p,Esp,0),alloc_cpu(ctx,r,true));
break;
case 4:
if( r->size < 4 )
alloc_cpu(ctx,r,true); // force fetch (higher bits set to 0)
if( !IS_64 ) {
if( r->current != NULL && r->current->kind == RFPU ) scratch(r->current);
op32(ctx,PUSH,fetch(r),UNUSED);
} else {
// pseudo push32 (not available)
op64(ctx,SUB,PESP,pconst(&p,4));
op32(ctx,MOV,pmem(&p,Esp,0),alloc_cpu(ctx,r,true));
}
break;
case 8:
if( fetch(r)->kind == RFPU ) {
op64(ctx,SUB,PESP,pconst(&p,8));
op64(ctx,MOVSD,pmem(&p,Esp,0),fetch(r));
} else if( IS_64 )
op64(ctx,PUSH,fetch(r),UNUSED);
else if( r->stack.kind == RSTACK ) {
scratch(r->current);
r->stackPos += 4;
op32(ctx,PUSH,&r->stack,UNUSED);
r->stackPos -= 4;
op32(ctx,PUSH,&r->stack,UNUSED);
} else
ASSERT(0);
break;
default:
ASSERT(r->size);
}
}
static int begin_native_call( jit_ctx *ctx, int nargs ) {
ctx->nativeArgsCount = nargs;
return pad_before_call(ctx, nargs > CALL_NREGS ? (nargs - CALL_NREGS) * HL_WSIZE : 0);
}
static preg *alloc_native_arg( jit_ctx *ctx ) {
# ifdef HL_64
int rid = ctx->nativeArgsCount - 1;
preg *r = rid < CALL_NREGS ? REG_AT(CALL_REGS[rid]) : alloc_reg(ctx,RCPU_CALL);
scratch(r);
return r;
# else
return alloc_reg(ctx, RCPU);
# endif
}
static void set_native_arg( jit_ctx *ctx, preg *r ) {
if( r->kind == RSTACK ) {
vreg *v = ctx->vregs + r->id;
if( v->size < 4 )
r = fetch32(ctx, v);
}
# ifdef HL_64
if( r->kind == RFPU ) ASSERT(0);
int rid = --ctx->nativeArgsCount;
preg *target;
if( rid >= CALL_NREGS ) {
op64(ctx,PUSH,r,UNUSED);
return;
}
target = REG_AT(CALL_REGS[rid]);
if( target != r ) {
op64(ctx, MOV, target, r);
scratch(target);
}
# else
op32(ctx,PUSH,r,UNUSED);
# endif
}
static void set_native_arg_fpu( jit_ctx *ctx, preg *r, bool isf32 ) {
# ifdef HL_64
if( r->kind == RCPU ) ASSERT(0);
// can only be used if last argument !!
ctx->nativeArgsCount--;
preg *target = REG_AT(XMM(IS_WINCALL64 ? ctx->nativeArgsCount : 0));
if( target != r ) {
op64(ctx, isf32 ? MOVSS : MOVSD, target, r);
scratch(target);
}
# else
op32(ctx,PUSH,r,UNUSED);
# endif
}
typedef struct {
int nextCpu;
int nextFpu;
int mapped[REG_COUNT];
} call_regs;
static int select_call_reg( call_regs *regs, hl_type *t, int id ) {
# ifndef HL_64
return -1;
#else
bool isFloat = t->kind == HF32 || t->kind == HF64;
# ifdef HL_WIN_CALL
int index = regs->nextCpu++;
# else
int index = isFloat ? regs->nextFpu++ : regs->nextCpu++;
# endif
if( index >= CALL_NREGS )
return -1;
int reg = isFloat ? XMM(index) : CALL_REGS[index];
regs->mapped[reg] = id + 1;
return reg;
#endif
}
static int mapped_reg( call_regs *regs, int id ) {
# ifndef HL_64
return -1;
#else
int i;
for(i=0;i<CALL_NREGS;i++) {
int r = CALL_REGS[i];
if( regs->mapped[r] == id + 1 ) return r;
r = XMM(i);
if( regs->mapped[r] == id + 1 ) return r;
}
return -1;
#endif
}
static int prepare_call_args( jit_ctx *ctx, int count, int *args, vreg *vregs, int extraSize ) {
int i;
int size = extraSize, paddedSize;
call_regs ctmp = {0};
for(i=0;i<count;i++) {
vreg *r = vregs + args[i];
int cr = select_call_reg(&ctmp, r->t, i);
if( cr >= 0 ) {
preg *c = REG_AT(cr);
preg *cur = fetch(r);
if( cur != c ) {
copy(ctx,c,cur,r->size);
scratch(c);
}
RLOCK(c);
continue;
}
size += stack_size(r->t);
}
paddedSize = pad_before_call(ctx,size);
for(i=0;i<count;i++) {
// RTL
int j = count - (i + 1);
vreg *r = vregs + args[j];
if( (i & 7) == 0 ) jit_buf(ctx);
if( mapped_reg(&ctmp,j) >= 0 ) continue;
push_reg(ctx,r);
if( r->current ) RUNLOCK(r->current);
}
return paddedSize;
}
static void op_call( jit_ctx *ctx, preg *r, int size ) {
preg p;
# ifdef JIT_DEBUG
if( IS_64 && size >= 0 ) {
int jchk;
op32(ctx,TEST,PESP,pconst(&p,15));
XJump(JZero,jchk);
BREAK(); // unaligned ESP
patch_jump(ctx, jchk);
}
# endif
if( IS_WINCALL64 ) {
// MSVC requires 32bytes of free space here
op64(ctx,SUB,PESP,pconst(&p,32));
if( size >= 0 ) size += 32;
}
op32(ctx, CALL, r, UNUSED);
if( size > 0 ) op64(ctx,ADD,PESP,pconst(&p,size));
}
static void call_native( jit_ctx *ctx, void *nativeFun, int size ) {
bool isExc = nativeFun == hl_assert || nativeFun == hl_throw || nativeFun == on_jit_error;
preg p;
// native function, already resolved
op64(ctx,MOV,PEAX,pconst64(&p,(int_val)nativeFun));
op_call(ctx,PEAX, isExc ? -1 : size);
if( isExc )
return;
discard_regs(ctx, true);
}
static void op_call_fun( jit_ctx *ctx, vreg *dst, int findex, int count, int *args ) {
int fid = findex < 0 ? -1 : ctx->m->functions_indexes[findex];
bool isNative = fid >= ctx->m->code->nfunctions;
int size = prepare_call_args(ctx,count,args,ctx->vregs,0);
preg p;
if( fid < 0 ) {
ASSERT(fid);
} else if( isNative ) {
call_native(ctx,ctx->m->functions_ptrs[findex],size);
} else {
int cpos = BUF_POS() + (IS_WINCALL64 ? 4 : 0);
# ifdef JIT_DEBUG
if( IS_64 ) cpos += 13; // ESP CHECK
# endif
if( ctx->m->functions_ptrs[findex] ) {
// already compiled
op_call(ctx,pconst(&p,(int)(int_val)ctx->m->functions_ptrs[findex] - (cpos + 5)), size);
} else if( ctx->m->code->functions + fid == ctx->f ) {
// our current function
op_call(ctx,pconst(&p, ctx->functionPos - (cpos + 5)), size);
} else {
// stage for later
jlist *j = (jlist*)hl_malloc(&ctx->galloc,sizeof(jlist));
j->pos = cpos;
j->target = findex;
j->next = ctx->calls;
ctx->calls = j;
op_call(ctx,pconst(&p,0), size);
}
discard_regs(ctx, false);
}
if( dst )
store_result(ctx,dst);
}
static void op_enter( jit_ctx *ctx ) {
preg p;
op64(ctx, PUSH, PEBP, UNUSED);
op64(ctx, MOV, PEBP, PESP);
if( ctx->totalRegsSize ) op64(ctx, SUB, PESP, pconst(&p,ctx->totalRegsSize));
}
static void op_ret( jit_ctx *ctx, vreg *r ) {
preg p;
switch( r->t->kind ) {
case HF32:
# ifdef HL_64
op64(ctx, MOVSS, PXMM(0), fetch(r));
# else
op64(ctx,FLD32,&r->stack,UNUSED);
# endif
break;
case HF64:
# ifdef HL_64
op64(ctx, MOVSD, PXMM(0), fetch(r));
# else
op64(ctx,FLD,&r->stack,UNUSED);
# endif
break;
default:
if( r->size < 4 && !r->current )
fetch32(ctx, r);
if( r->current != PEAX )
op64(ctx,MOV,PEAX,fetch(r));
break;
}
if( ctx->totalRegsSize ) op64(ctx, ADD, PESP, pconst(&p, ctx->totalRegsSize));
# ifdef JIT_DEBUG
{
int jeq;
op64(ctx, CMP, PESP, PEBP);
XJump_small(JEq,jeq);
jit_error("invalid ESP");
patch_jump(ctx,jeq);
}
# endif
op64(ctx, POP, PEBP, UNUSED);
op64(ctx, RET, UNUSED, UNUSED);
}
static void call_native_consts( jit_ctx *ctx, void *nativeFun, int_val *args, int nargs ) {
int size = pad_before_call(ctx, IS_64 ? 0 : HL_WSIZE*nargs);
preg p;
int i;
# ifdef HL_64
for(i=0;i<nargs;i++)
op64(ctx, MOV, REG_AT(CALL_REGS[i]), pconst64(&p, args[i]));
# else
for(i=nargs-1;i>=0;i--)
op32(ctx, PUSH, pconst64(&p, args[i]), UNUSED);
# endif
call_native(ctx, nativeFun, size);
}
static void on_jit_error( const char *msg, int_val line ) {
char buf[256];
int iline = (int)line;
sprintf(buf,"%s (line %d)",msg,iline);
#ifdef HL_WIN
MessageBoxA(NULL,buf,"JIT ERROR",MB_OK);
#else
printf("JIT ERROR : %s\n",buf);
#endif
hl_debug_break();
hl_throw(NULL);
}
static void _jit_error( jit_ctx *ctx, const char *msg, int line ) {
int_val args[2] = { (int_val)msg, (int_val)line };
call_native_consts(ctx,on_jit_error,args,2);
}
static preg *op_binop( jit_ctx *ctx, vreg *dst, vreg *a, vreg *b, hl_op bop ) {
preg *pa = fetch(a), *pb = fetch(b), *out = NULL;
CpuOp o;
if( IS_FLOAT(a) ) {
bool isf32 = a->t->kind == HF32;
switch( bop ) {
case OAdd: o = isf32 ? ADDSS : ADDSD; break;
case OSub: o = isf32 ? SUBSS : SUBSD; break;
case OMul: o = isf32 ? MULSS : MULSD; break;
case OSDiv: o = isf32 ? DIVSS : DIVSD; break;
case OJSLt:
case OJSGte:
case OJSLte:
case OJSGt:
case OJEq:
case OJNotEq:
case OJNotLt:
case OJNotGte:
o = isf32 ? COMISS : COMISD;
break;
case OSMod:
{
int args[] = { a->stack.id, b->stack.id };
int size = prepare_call_args(ctx,2,args,ctx->vregs,0);
void *mod_fun;
if( isf32 ) mod_fun = fmodf; else mod_fun = fmod;
call_native(ctx,mod_fun,size);
store_result(ctx,dst);
return fetch(dst);
}
default:
printf("%s\n", hl_op_name(bop));
ASSERT(bop);
}
} else {
bool is64 = a->t->kind == HI64;
# ifndef HL_64
if( is64 ) {
error_i64();
return fetch(a);
}
# endif
switch( bop ) {
case OAdd: o = ADD; break;
case OSub: o = SUB; break;
case OMul: o = IMUL; break;
case OAnd: o = AND; break;
case OOr: o = OR; break;
case OXor: o = XOR; break;
case OShl:
case OUShr:
case OSShr:
if( !b->current || b->current->kind != RCPU || b->current->id != Ecx ) {
scratch(REG_AT(Ecx));
op(ctx,MOV,REG_AT(Ecx),pb,is64);
RLOCK(REG_AT(Ecx));
pa = fetch(a);
} else
RLOCK(b->current);
if( pa->kind != RCPU ) {
pa = alloc_reg(ctx, RCPU);
op(ctx,MOV,pa,fetch(a), is64);
}
op(ctx,bop == OShl ? SHL : (bop == OUShr ? SHR : SAR), pa, UNUSED,is64);
if( dst ) store(ctx, dst, pa, true);
return pa;
case OSDiv:
case OUDiv:
case OSMod:
case OUMod:
{
preg *out = bop == OSMod || bop == OUMod ? REG_AT(Edx) : PEAX;
preg *r;
int jz, jend;
if( pa->kind == RCPU && pa->id == Eax ) RLOCK(pa);
r = alloc_cpu(ctx,b,true);
// integer div 0 => 0
op(ctx,TEST,r,r,is64);
XJump_small(JNotZero,jz);
op(ctx,XOR,out,out,is64);
XJump_small(JAlways,jend);
patch_jump(ctx,jz);
pa = fetch(a);
if( pa->kind != RCPU || pa->id != Eax ) {
scratch(PEAX);
scratch(pa);
load(ctx,PEAX,a);
}
scratch(REG_AT(Edx));
scratch(REG_AT(Eax));
if( bop == OUDiv || bop == OUMod )
op(ctx, XOR, REG_AT(Edx), REG_AT(Edx), is64);
else
op(ctx, CDQ, UNUSED, UNUSED, is64); // sign-extend Eax into Eax:Edx
op(ctx, bop == OUDiv || bop == OUMod ? DIV : IDIV, fetch(b), UNUSED, is64);
patch_jump(ctx, jend);
if( dst ) store(ctx, dst, out, true);
return out;
}
case OJSLt:
case OJSGte:
case OJSLte:
case OJSGt:
case OJULt:
case OJUGte:
case OJEq:
case OJNotEq:
switch( a->t->kind ) {
case HUI8:
case HBOOL:
o = CMP8;
break;
case HUI16:
o = CMP16;
break;
default:
o = CMP;
break;
}
break;
default:
printf("%s\n", hl_op_name(bop));
ASSERT(bop);
}
}
switch( RTYPE(a) ) {
case HI32:
case HUI8:
case HUI16:
case HBOOL:
# ifndef HL_64
case HDYNOBJ:
case HVIRTUAL:
case HOBJ:
case HSTRUCT:
case HFUN:
case HMETHOD:
case HBYTES:
case HNULL:
case HENUM:
case HDYN:
case HTYPE:
case HABSTRACT:
case HARRAY:
# endif
switch( ID2(pa->kind, pb->kind) ) {
case ID2(RCPU,RCPU):
case ID2(RCPU,RSTACK):
op32(ctx, o, pa, pb);
scratch(pa);
out = pa;
break;
case ID2(RSTACK,RCPU):
if( dst == a && o != IMUL ) {
op32(ctx, o, pa, pb);
dst = NULL;
out = pa;
} else {
alloc_cpu(ctx,a, true);
return op_binop(ctx,dst,a,b,bop);
}
break;
case ID2(RSTACK,RSTACK):
alloc_cpu(ctx, a, true);
return op_binop(ctx, dst, a, b, bop);
default:
printf("%s(%d,%d)\n", hl_op_name(bop), pa->kind, pb->kind);
ASSERT(ID2(pa->kind, pb->kind));
}
if( dst ) store(ctx, dst, out, true);
return out;
# ifdef HL_64
case HOBJ:
case HSTRUCT:
case HDYNOBJ:
case HVIRTUAL:
case HFUN:
case HMETHOD:
case HBYTES:
case HNULL:
case HENUM:
case HDYN:
case HTYPE:
case HABSTRACT:
case HARRAY:
case HI64:
switch( ID2(pa->kind, pb->kind) ) {
case ID2(RCPU,RCPU):
case ID2(RCPU,RSTACK):
op64(ctx, o, pa, pb);
scratch(pa);
out = pa;
break;
case ID2(RSTACK,RCPU):
if( dst == a && OP_FORMS[o].mem_r ) {
op64(ctx, o, pa, pb);
dst = NULL;
out = pa;
} else {
alloc_cpu(ctx,a, true);
return op_binop(ctx,dst,a,b,bop);
}
break;
case ID2(RSTACK,RSTACK):
alloc_cpu(ctx, a, true);
return op_binop(ctx, dst, a, b, bop);
default:
printf("%s(%d,%d)\n", hl_op_name(bop), pa->kind, pb->kind);
ASSERT(ID2(pa->kind, pb->kind));
}
if( dst ) store(ctx, dst, out, true);
return out;
# endif
case HF64:
case HF32:
pa = alloc_fpu(ctx, a, true);
pb = alloc_fpu(ctx, b, true);
switch( ID2(pa->kind, pb->kind) ) {
case ID2(RFPU,RFPU):
op64(ctx,o,pa,pb);
if( o == COMISD && bop != OJSGt ) {
int jnotnan;
XJump_small(JNParity,jnotnan);
switch( bop ) {
case OJSLt:
case OJNotLt:
{
preg *r = alloc_reg(ctx,RCPU);
// set CF=0, ZF=1
op64(ctx,XOR,r,r);
RUNLOCK(r);
break;
}
case OJSGte:
case OJNotGte:
{
preg *r = alloc_reg(ctx,RCPU);
// set ZF=0, CF=1
op64(ctx,XOR,r,r);
op64(ctx,CMP,r,PESP);
RUNLOCK(r);
break;
}
break;
case OJNotEq:
case OJEq:
// set ZF=0, CF=?
case OJSLte:
// set ZF=0, CF=0
op64(ctx,TEST,PESP,PESP);
break;
default:
ASSERT(bop);
}
patch_jump(ctx,jnotnan);
}
scratch(pa);
out = pa;
break;
default:
printf("%s(%d,%d)\n", hl_op_name(bop), pa->kind, pb->kind);
ASSERT(ID2(pa->kind, pb->kind));
}
if( dst ) store(ctx, dst, out, true);
return out;
default:
ASSERT(RTYPE(a));
}
return NULL;
}
static int do_jump( jit_ctx *ctx, hl_op op, bool isFloat ) {
int j;
switch( op ) {
case OJAlways:
XJump(JAlways,j);
break;
case OJSGte:
XJump(isFloat ? JUGte : JSGte,j);
break;
case OJSGt:
XJump(isFloat ? JUGt : JSGt,j);
break;
case OJUGte:
XJump(JUGte,j);
break;
case OJSLt:
XJump(isFloat ? JULt : JSLt,j);
break;
case OJSLte:
XJump(isFloat ? JULte : JSLte,j);
break;
case OJULt:
XJump(JULt,j);
break;
case OJEq:
XJump(JEq,j);
break;
case OJNotEq:
XJump(JNeq,j);
break;
case OJNotLt:
XJump(JUGte,j);
break;
case OJNotGte:
XJump(JULt,j);
break;
default:
j = 0;
printf("Unknown JUMP %d\n",op);
break;
}
return j;
}
static void register_jump( jit_ctx *ctx, int pos, int target ) {
jlist *j = (jlist*)hl_malloc(&ctx->falloc, sizeof(jlist));
j->pos = pos;
j->target = target;
j->next = ctx->jumps;
ctx->jumps = j;
if( target != 0 && ctx->opsPos[target] == 0 )
ctx->opsPos[target] = -1;
}
#define HDYN_VALUE 8
static void dyn_value_compare( jit_ctx *ctx, preg *a, preg *b, hl_type *t ) {
preg p;
switch( t->kind ) {
case HUI8:
case HBOOL:
op32(ctx,MOV8,a,pmem(&p,a->id,HDYN_VALUE));
op32(ctx,MOV8,b,pmem(&p,b->id,HDYN_VALUE));
op64(ctx,CMP8,a,b);
break;
case HUI16:
op32(ctx,MOV16,a,pmem(&p,a->id,HDYN_VALUE));
op32(ctx,MOV16,b,pmem(&p,b->id,HDYN_VALUE));
op64(ctx,CMP16,a,b);
break;
case HI32:
op32(ctx,MOV,a,pmem(&p,a->id,HDYN_VALUE));
op32(ctx,MOV,b,pmem(&p,b->id,HDYN_VALUE));
op64(ctx,CMP,a,b);
break;
case HF32:
{
preg *fa = alloc_reg(ctx, RFPU);
preg *fb = alloc_reg(ctx, RFPU);
op64(ctx,MOVSS,fa,pmem(&p,a->id,HDYN_VALUE));
op64(ctx,MOVSS,fb,pmem(&p,b->id,HDYN_VALUE));
op64(ctx,COMISD,fa,fb);
}
break;
case HF64:
{
preg *fa = alloc_reg(ctx, RFPU);
preg *fb = alloc_reg(ctx, RFPU);
op64(ctx,MOVSD,fa,pmem(&p,a->id,HDYN_VALUE));
op64(ctx,MOVSD,fb,pmem(&p,b->id,HDYN_VALUE));
op64(ctx,COMISD,fa,fb);
}
break;
case HI64:
default:
// ptr comparison
op64(ctx,MOV,a,pmem(&p,a->id,HDYN_VALUE));
op64(ctx,MOV,b,pmem(&p,b->id,HDYN_VALUE));
op64(ctx,CMP,a,b);
break;
}
}
static void op_jump( jit_ctx *ctx, vreg *a, vreg *b, hl_opcode *op, int targetPos ) {
if( a->t->kind == HDYN || b->t->kind == HDYN || a->t->kind == HFUN || b->t->kind == HFUN ) {
int args[] = { a->stack.id, b->stack.id };
int size = prepare_call_args(ctx,2,args,ctx->vregs,0);
call_native(ctx,hl_dyn_compare,size);
if( op->op == OJSGt || op->op == OJSGte ) {
preg p;
int jinvalid;
op32(ctx,CMP,PEAX,pconst(&p,hl_invalid_comparison));
XJump_small(JEq,jinvalid);
op32(ctx,TEST,PEAX,PEAX);
register_jump(ctx,do_jump(ctx,op->op, IS_FLOAT(a)),targetPos);
patch_jump(ctx,jinvalid);
return;
}
op32(ctx,TEST,PEAX,PEAX);
} else switch( a->t->kind ) {
case HTYPE:
{
int args[] = { a->stack.id, b->stack.id };
int size = prepare_call_args(ctx,2,args,ctx->vregs,0);
preg p;
call_native(ctx,hl_same_type,size);
op64(ctx,CMP8,PEAX,pconst(&p,1));
}
break;
case HNULL:
{
preg *pa = hl_type_size(a->t->tparam) == 1 ? alloc_cpu8(ctx,a,true) : alloc_cpu(ctx,a,true);
preg *pb = hl_type_size(b->t->tparam) == 1 ? alloc_cpu8(ctx,b,true) : alloc_cpu(ctx,b,true);
if( op->op == OJEq ) {
// if( a == b || (a && b && a->v == b->v) ) goto
int ja, jb;
// if( a != b && (!a || !b || a->v != b->v) ) goto
op64(ctx,CMP,pa,pb);
register_jump(ctx,do_jump(ctx,OJEq,false),targetPos);
op64(ctx,TEST,pa,pa);
XJump_small(JZero,ja);
op64(ctx,TEST,pb,pb);
XJump_small(JZero,jb);
dyn_value_compare(ctx,pa,pb,a->t->tparam);
register_jump(ctx,do_jump(ctx,OJEq,false),targetPos);
scratch(pa);
scratch(pb);
patch_jump(ctx,ja);
patch_jump(ctx,jb);
} else if( op->op == OJNotEq ) {
int jeq, jcmp;
// if( a != b && (!a || !b || a->v != b->v) ) goto
op64(ctx,CMP,pa,pb);
XJump_small(JEq,jeq);
op64(ctx,TEST,pa,pa);
register_jump(ctx,do_jump(ctx,OJEq,false),targetPos);
op64(ctx,TEST,pb,pb);
register_jump(ctx,do_jump(ctx,OJEq,false),targetPos);
dyn_value_compare(ctx,pa,pb,a->t->tparam);
XJump_small(JZero,jcmp);
scratch(pa);
scratch(pb);
register_jump(ctx,do_jump(ctx,OJNotEq,false),targetPos);
patch_jump(ctx,jcmp);
patch_jump(ctx,jeq);
} else
ASSERT(op->op);
return;
}
case HVIRTUAL:
{
preg p;
preg *pa = alloc_cpu(ctx,a,true);
preg *pb = alloc_cpu(ctx,b,true);
int ja,jb,jav,jbv,jvalue;
if( b->t->kind == HOBJ ) {
if( op->op == OJEq ) {
// if( a ? (b && a->value == b) : (b == NULL) ) goto
op64(ctx,TEST,pa,pa);
XJump_small(JZero,ja);
op64(ctx,TEST,pb,pb);
XJump_small(JZero,jb);
op64(ctx,MOV,pa,pmem(&p,pa->id,HL_WSIZE));
op64(ctx,CMP,pa,pb);
XJump_small(JAlways,jvalue);
patch_jump(ctx,ja);
op64(ctx,TEST,pb,pb);
patch_jump(ctx,jvalue);
register_jump(ctx,do_jump(ctx,OJEq,false),targetPos);
patch_jump(ctx,jb);
} else if( op->op == OJNotEq ) {
// if( a ? (b == NULL || a->value != b) : (b != NULL) ) goto
op64(ctx,TEST,pa,pa);
XJump_small(JZero,ja);
op64(ctx,TEST,pb,pb);
register_jump(ctx,do_jump(ctx,OJEq,false),targetPos);
op64(ctx,MOV,pa,pmem(&p,pa->id,HL_WSIZE));
op64(ctx,CMP,pa,pb);
XJump_small(JAlways,jvalue);
patch_jump(ctx,ja);
op64(ctx,TEST,pb,pb);
patch_jump(ctx,jvalue);
register_jump(ctx,do_jump(ctx,OJNotEq,false),targetPos);
} else
ASSERT(op->op);
scratch(pa);
return;
}
op64(ctx,CMP,pa,pb);
if( op->op == OJEq ) {
// if( a == b || (a && b && a->value && b->value && a->value == b->value) ) goto
register_jump(ctx,do_jump(ctx,OJEq, false),targetPos);
op64(ctx,TEST,pa,pa);
XJump_small(JZero,ja);
op64(ctx,TEST,pb,pb);
XJump_small(JZero,jb);
op64(ctx,MOV,pa,pmem(&p,pa->id,HL_WSIZE));
op64(ctx,TEST,pa,pa);
XJump_small(JZero,jav);
op64(ctx,MOV,pb,pmem(&p,pb->id,HL_WSIZE));
op64(ctx,TEST,pb,pb);
XJump_small(JZero,jbv);
op64(ctx,CMP,pa,pb);
XJump_small(JNeq,jvalue);
register_jump(ctx,do_jump(ctx,OJEq, false),targetPos);
patch_jump(ctx,ja);
patch_jump(ctx,jb);
patch_jump(ctx,jav);
patch_jump(ctx,jbv);
patch_jump(ctx,jvalue);
} else if( op->op == OJNotEq ) {
int jnext;
// if( a != b && (!a || !b || !a->value || !b->value || a->value != b->value) ) goto
XJump_small(JEq,jnext);
op64(ctx,TEST,pa,pa);
XJump_small(JZero,ja);
op64(ctx,TEST,pb,pb);
XJump_small(JZero,jb);
op64(ctx,MOV,pa,pmem(&p,pa->id,HL_WSIZE));
op64(ctx,TEST,pa,pa);
XJump_small(JZero,jav);
op64(ctx,MOV,pb,pmem(&p,pb->id,HL_WSIZE));
op64(ctx,TEST,pb,pb);
XJump_small(JZero,jbv);
op64(ctx,CMP,pa,pb);
XJump_small(JEq,jvalue);
patch_jump(ctx,ja);
patch_jump(ctx,jb);
patch_jump(ctx,jav);
patch_jump(ctx,jbv);
register_jump(ctx,do_jump(ctx,OJAlways, false),targetPos);
patch_jump(ctx,jnext);
patch_jump(ctx,jvalue);
} else
ASSERT(op->op);
scratch(pa);
scratch(pb);
return;
}
break;
case HOBJ:
case HSTRUCT:
if( b->t->kind == HVIRTUAL ) {
op_jump(ctx,b,a,op,targetPos); // inverse
return;
}
if( hl_get_obj_rt(a->t)->compareFun ) {
preg *pa = alloc_cpu(ctx,a,true);
preg *pb = alloc_cpu(ctx,b,true);
preg p;
int jeq, ja, jb, jcmp;
int args[] = { a->stack.id, b->stack.id };
switch( op->op ) {
case OJEq:
// if( a == b || (a && b && cmp(a,b) == 0) ) goto
op64(ctx,CMP,pa,pb);
XJump_small(JEq,jeq);
op64(ctx,TEST,pa,pa);
XJump_small(JZero,ja);
op64(ctx,TEST,pb,pb);
XJump_small(JZero,jb);
op_call_fun(ctx,NULL,(int)(int_val)a->t->obj->rt->compareFun,2,args);
op32(ctx,TEST,PEAX,PEAX);
XJump_small(JNotZero,jcmp);
patch_jump(ctx,jeq);
register_jump(ctx,do_jump(ctx,OJAlways,false),targetPos);
patch_jump(ctx,ja);
patch_jump(ctx,jb);
patch_jump(ctx,jcmp);
break;
case OJNotEq:
// if( a != b && (!a || !b || cmp(a,b) != 0) ) goto
op64(ctx,CMP,pa,pb);
XJump_small(JEq,jeq);
op64(ctx,TEST,pa,pa);
register_jump(ctx,do_jump(ctx,OJEq,false),targetPos);
op64(ctx,TEST,pb,pb);
register_jump(ctx,do_jump(ctx,OJEq,false),targetPos);
op_call_fun(ctx,NULL,(int)(int_val)a->t->obj->rt->compareFun,2,args);
op32(ctx,TEST,PEAX,PEAX);
XJump_small(JZero,jcmp);
register_jump(ctx,do_jump(ctx,OJNotEq,false),targetPos);
patch_jump(ctx,jcmp);
patch_jump(ctx,jeq);
break;
default:
// if( a && b && cmp(a,b) ?? 0 ) goto
op64(ctx,TEST,pa,pa);
XJump_small(JZero,ja);
op64(ctx,TEST,pb,pb);
XJump_small(JZero,jb);
op_call_fun(ctx,NULL,(int)(int_val)a->t->obj->rt->compareFun,2,args);
op32(ctx,CMP,PEAX,pconst(&p,0));
register_jump(ctx,do_jump(ctx,op->op,false),targetPos);
patch_jump(ctx,ja);
patch_jump(ctx,jb);
break;
}
return;
}
// fallthrough
default:
// make sure we have valid 8 bits registers
if( a->size == 1 ) alloc_cpu8(ctx,a,true);
if( b->size == 1 ) alloc_cpu8(ctx,b,true);
op_binop(ctx,NULL,a,b,op->op);
break;
}
register_jump(ctx,do_jump(ctx,op->op, IS_FLOAT(a)),targetPos);
}
jit_ctx *hl_jit_alloc() {
int i;
jit_ctx *ctx = (jit_ctx*)malloc(sizeof(jit_ctx));
if( ctx == NULL ) return NULL;
memset(ctx,0,sizeof(jit_ctx));
hl_alloc_init(&ctx->falloc);
hl_alloc_init(&ctx->galloc);
for(i=0;i<RCPU_COUNT;i++) {
preg *r = REG_AT(i);
r->id = i;
r->kind = RCPU;
}
for(i=0;i<RFPU_COUNT;i++) {
preg *r = REG_AT(XMM(i));
r->id = i;
r->kind = RFPU;
}
return ctx;
}
void hl_jit_free( jit_ctx *ctx, h_bool can_reset ) {
free(ctx->vregs);
free(ctx->opsPos);
free(ctx->startBuf);
ctx->maxRegs = 0;
ctx->vregs = NULL;
ctx->maxOps = 0;
ctx->opsPos = NULL;
ctx->startBuf = NULL;
ctx->bufSize = 0;
ctx->buf.b = NULL;
ctx->calls = NULL;
ctx->switchs = NULL;
ctx->closure_list = NULL;
hl_free(&ctx->falloc);
hl_free(&ctx->galloc);
if( !can_reset ) free(ctx);
}
static void jit_nops( jit_ctx *ctx ) {
while( BUF_POS() & 15 )
op32(ctx, NOP, UNUSED, UNUSED);
}
#define MAX_ARGS 16
static void *call_jit_c2hl = NULL;
static void *call_jit_hl2c = NULL;
static void *callback_c2hl( void **f, hl_type *t, void **args, vdynamic *ret ) {
/*
prepare stack and regs according to prepare_call_args, but by reading runtime type information
from the function type. The stack and regs will be setup by the trampoline function.
*/
unsigned char stack[MAX_ARGS * 8];
call_regs cregs = {0};
if( t->fun->nargs > MAX_ARGS )
hl_error("Too many arguments for dynamic call");
int i, size = 0, pad = 0, pos = 0;
for(i=0;i<t->fun->nargs;i++) {
hl_type *at = t->fun->args[i];
int creg = select_call_reg(&cregs,at,i);
if( creg >= 0 )
continue;
size += stack_size(at);
}
pad = (-size) & 15;
size += pad;
pos = 0;
for(i=0;i<t->fun->nargs;i++) {
// RTL
hl_type *at = t->fun->args[i];
void *v = args[i];
int creg = mapped_reg(&cregs,i);
void *store;
if( creg >= 0 ) {
if( REG_IS_FPU(creg) ) {
store = stack + size + CALL_NREGS * HL_WSIZE + (creg - XMM(0)) * sizeof(double);
} else {
store = stack + size + call_reg_index(creg) * HL_WSIZE;
}
switch( at->kind ) {
case HBOOL:
case HUI8:
*(int_val*)store = *(unsigned char*)v;
break;
case HUI16:
*(int_val*)store = *(unsigned short*)v;
break;
case HI32:
*(int_val*)store = *(int*)v;
break;
case HF32:
{
double d = (double)*(float*)v;
*(double*)store = d;
}
break;
case HF64:
*(double*)store = *(double*)v;
break;
case HI64:
*(int64*)store = *(int64*)v;
break;
default:
*(void**)store = v;
break;
}
} else {
int tsize = stack_size(at);
store = stack + pos;
pos += tsize;
switch( at->kind ) {
case HBOOL:
case HUI8:
*(int*)store = *(unsigned char*)v;
break;
case HUI16:
*(int*)store = *(unsigned short*)v;
break;
case HI32:
case HF32:
*(int*)store = *(int*)v;
break;
case HF64:
*(double*)store = *(double*)v;
break;
case HI64:
*(int64*)store = *(int64*)v;
break;
default:
*(void**)store = v;
break;
}
}
}
pos += pad;
pos >>= IS_64 ? 3 : 2;
switch( t->fun->ret->kind ) {
case HUI8:
case HUI16:
case HI32:
case HBOOL:
ret->v.i = ((int (*)(void *, void *, void *))call_jit_c2hl)(*f, (void**)&stack + pos, &stack);
return &ret->v.i;
case HI64:
ret->v.i64 = ((int64 (*)(void *, void *, void *))call_jit_c2hl)(*f, (void**)&stack + pos, &stack);
return &ret->v.i64;
case HF32:
ret->v.f = ((float (*)(void *, void *, void *))call_jit_c2hl)(*f, (void**)&stack + pos, &stack);
return &ret->v.f;
case HF64:
ret->v.d = ((double (*)(void *, void *, void *))call_jit_c2hl)(*f, (void**)&stack + pos, &stack);
return &ret->v.d;
default:
return ((void *(*)(void *, void *, void *))call_jit_c2hl)(*f, (void**)&stack + pos, &stack);
}
}
static void jit_c2hl( jit_ctx *ctx ) {
// create the function that will be called by callback_c2hl
// it will make sure to prepare the stack/regs according to native calling conventions
int jeq, jloop, jstart;
preg *fptr, *stack, *stend;
preg p;
op64(ctx,PUSH,PEBP,UNUSED);
op64(ctx,MOV,PEBP,PESP);
# ifdef HL_64
fptr = REG_AT(R10);
stack = PEAX;
stend = REG_AT(R11);
op64(ctx, MOV, fptr, REG_AT(CALL_REGS[0]));
op64(ctx, MOV, stack, REG_AT(CALL_REGS[1]));
op64(ctx, MOV, stend, REG_AT(CALL_REGS[2]));
// set native call regs
int i;
for(i=0;i<CALL_NREGS;i++)
op64(ctx,MOV,REG_AT(CALL_REGS[i]),pmem(&p,stack->id,i*HL_WSIZE));
for(i=0;i<CALL_NREGS;i++)
op64(ctx,MOVSD,REG_AT(XMM(i)),pmem(&p,stack->id,(i+CALL_NREGS)*HL_WSIZE));
# else
// make sure the stack is aligned on 16 bytes
// the amount of push we will do afterwards is guaranteed to be a multiple of 16bytes by hl_callback
# ifdef HL_VCC
// VCC does not guarantee us an aligned stack...
op64(ctx,MOV,PEAX,PESP);
op64(ctx,AND,PEAX,pconst(&p,15));
op64(ctx,SUB,PESP,PEAX);
# else
op64(ctx,SUB,PESP,pconst(&p,8));
# endif
// mov arguments to regs
fptr = REG_AT(Eax);
stack = REG_AT(Edx);
stend = REG_AT(Ecx);
op64(ctx,MOV,fptr,pmem(&p,Ebp,HL_WSIZE*2));
op64(ctx,MOV,stack,pmem(&p,Ebp,HL_WSIZE*3));
op64(ctx,MOV,stend,pmem(&p,Ebp,HL_WSIZE*4));
# endif
// push stack args
jstart = BUF_POS();
op64(ctx,CMP,stack,stend);
XJump(JEq,jeq);
op64(ctx,SUB,stack,pconst(&p,HL_WSIZE));
op64(ctx,PUSH,pmem(&p,stack->id,0),UNUSED);
XJump(JAlways,jloop);
patch_jump(ctx,jeq);
patch_jump_to(ctx, jloop, jstart);
op_call(ctx,fptr,0);
// cleanup and ret
op64(ctx,MOV,PESP,PEBP);
op64(ctx,POP,PEBP, UNUSED);
op64(ctx,RET,UNUSED,UNUSED);
}
static vdynamic *jit_wrapper_call( vclosure_wrapper *c, char *stack_args, void **regs ) {
vdynamic *args[MAX_ARGS];
int i;
int nargs = c->cl.t->fun->nargs;
call_regs cregs = {0};
if( nargs > MAX_ARGS )
hl_error("Too many arguments for wrapped call");
cregs.nextCpu++; // skip fptr in HL64 - was passed as arg0
for(i=0;i<nargs;i++) {
hl_type *t = c->cl.t->fun->args[i];
int creg = select_call_reg(&cregs,t,i);
if( creg < 0 ) {
args[i] = hl_is_dynamic(t) ? *(vdynamic**)stack_args : hl_make_dyn(stack_args,t);
stack_args += stack_size(t);
} else if( hl_is_dynamic(t) ) {
args[i] = *(vdynamic**)(regs + call_reg_index(creg));
} else if( t->kind == HF32 || t->kind == HF64 ) {
args[i] = hl_make_dyn(regs + CALL_NREGS + creg - XMM(0),&hlt_f64);
} else {
args[i] = hl_make_dyn(regs + call_reg_index(creg),t);
}
}
return hl_dyn_call(c->wrappedFun,args,nargs);
}
static void *jit_wrapper_ptr( vclosure_wrapper *c, char *stack_args, void **regs ) {
vdynamic *ret = jit_wrapper_call(c, stack_args, regs);
hl_type *tret = c->cl.t->fun->ret;
switch( tret->kind ) {
case HVOID:
return NULL;
case HUI8:
case HUI16:
case HI32:
case HBOOL:
return (void*)(int_val)hl_dyn_casti(&ret,&hlt_dyn,tret);
case HI64:
return (void*)(int_val)hl_dyn_casti64(&ret,&hlt_dyn);
default:
return hl_dyn_castp(&ret,&hlt_dyn,tret);
}
}
static double jit_wrapper_d( vclosure_wrapper *c, char *stack_args, void **regs ) {
vdynamic *ret = jit_wrapper_call(c, stack_args, regs);
return hl_dyn_castd(&ret,&hlt_dyn);
}
static void jit_hl2c( jit_ctx *ctx ) {
// create a function that is called with a vclosure_wrapper* and native args
// and pack and pass the args to callback_hl2c
preg p;
int jfloat1, jfloat2, jexit;
hl_type_fun *ft = NULL;
int size;
# ifdef HL_64
preg *cl = REG_AT(CALL_REGS[0]);
preg *tmp = REG_AT(CALL_REGS[1]);
# else
preg *cl = REG_AT(Ecx);
preg *tmp = REG_AT(Edx);
# endif
op64(ctx,PUSH,PEBP,UNUSED);
op64(ctx,MOV,PEBP,PESP);
# ifdef HL_64
// push registers
int i;
op64(ctx,SUB,PESP,pconst(&p,CALL_NREGS*8));
for(i=0;i<CALL_NREGS;i++)
op64(ctx,MOVSD,pmem(&p,Esp,i*8),REG_AT(XMM(i)));
for(i=0;i<CALL_NREGS;i++)
op64(ctx,PUSH,REG_AT(CALL_REGS[CALL_NREGS - 1 - i]),UNUSED);
# endif
// opcodes for:
// switch( arg0->t->fun->ret->kind ) {
// case HF32: case HF64: return jit_wrapper_d(arg0,&args);
// default: return jit_wrapper_ptr(arg0,&args);
// }
if( !IS_64 )
op64(ctx,MOV,cl,pmem(&p,Ebp,HL_WSIZE*2)); // load arg0
op64(ctx,MOV,tmp,pmem(&p,cl->id,0)); // ->t
op64(ctx,MOV,tmp,pmem(&p,tmp->id,HL_WSIZE)); // ->fun
op64(ctx,MOV,tmp,pmem(&p,tmp->id,(int)(int_val)&ft->ret)); // ->ret
op32(ctx,MOV,tmp,pmem(&p,tmp->id,0)); // -> kind
op32(ctx,CMP,tmp,pconst(&p,HF64));
XJump_small(JEq,jfloat1);
op32(ctx,CMP,tmp,pconst(&p,HF32));
XJump_small(JEq,jfloat2);
// 64 bits : ESP + EIP (+WIN64PAD)
// 32 bits : ESP + EIP + PARAM0
int args_pos = IS_64 ? ((IS_WINCALL64 ? 32 : 0) + HL_WSIZE * 2) : (HL_WSIZE*3);
size = begin_native_call(ctx,3);
op64(ctx, LEA, tmp, pmem(&p,Ebp,-HL_WSIZE*CALL_NREGS*2));
set_native_arg(ctx, tmp);
op64(ctx, LEA, tmp, pmem(&p,Ebp,args_pos));
set_native_arg(ctx, tmp);
set_native_arg(ctx, cl);
call_native(ctx, jit_wrapper_ptr, size);
XJump_small(JAlways, jexit);
patch_jump(ctx,jfloat1);
patch_jump(ctx,jfloat2);
size = begin_native_call(ctx,3);
op64(ctx, LEA, tmp, pmem(&p,Ebp,-HL_WSIZE*CALL_NREGS*2));
set_native_arg(ctx, tmp);
op64(ctx, LEA, tmp, pmem(&p,Ebp,args_pos));
set_native_arg(ctx, tmp);
set_native_arg(ctx, cl);
call_native(ctx, jit_wrapper_d, size);
patch_jump(ctx,jexit);
op64(ctx,MOV,PESP,PEBP);
op64(ctx,POP,PEBP, UNUSED);
op64(ctx,RET,UNUSED,UNUSED);
}
#ifdef JIT_CUSTOM_LONGJUMP
// Win64 debug CRT performs a Rtl stack check in debug mode, preventing from
// using longjump. This in an alternate implementation that follows the native
// setjump storage.
//
// Another more reliable way of handling this would be to use RtlAddFunctionTable
// but this would require complex creation of unwind info
static void jit_longjump( jit_ctx *ctx ) {
preg *buf = REG_AT(CALL_REGS[0]);
preg *ret = REG_AT(CALL_REGS[1]);
preg p;
int i;
op64(ctx,MOV,PEAX,ret); // return value
op64(ctx,MOV,REG_AT(Edx),pmem(&p,buf->id,0x0));
op64(ctx,MOV,REG_AT(Ebx),pmem(&p,buf->id,0x8));
op64(ctx,MOV,REG_AT(Esp),pmem(&p,buf->id,0x10));
op64(ctx,MOV,REG_AT(Ebp),pmem(&p,buf->id,0x18));
op64(ctx,MOV,REG_AT(Esi),pmem(&p,buf->id,0x20));
op64(ctx,MOV,REG_AT(Edi),pmem(&p,buf->id,0x28));
op64(ctx,MOV,REG_AT(R12),pmem(&p,buf->id,0x30));
op64(ctx,MOV,REG_AT(R13),pmem(&p,buf->id,0x38));
op64(ctx,MOV,REG_AT(R14),pmem(&p,buf->id,0x40));
op64(ctx,MOV,REG_AT(R15),pmem(&p,buf->id,0x48));
op64(ctx,LDMXCSR,pmem(&p,buf->id,0x58), UNUSED);
op64(ctx,FLDCW,pmem(&p,buf->id,0x5C), UNUSED);
for(i=0;i<10;i++)
op64(ctx,MOVSD,REG_AT(XMM(i+6)),pmem(&p,buf->id,0x60 + i * 16));
op64(ctx,PUSH,pmem(&p,buf->id,0x50),UNUSED);
op64(ctx,RET,UNUSED,UNUSED);
}
#endif
static void jit_fail( uchar *msg ) {
if( msg == NULL ) {
hl_debug_break();
msg = USTR("assert");
}
vdynamic *d = hl_alloc_dynamic(&hlt_bytes);
d->v.ptr = msg;
hl_throw(d);
}
static void jit_null_access( jit_ctx *ctx ) {
op64(ctx,PUSH,PEBP,UNUSED);
op64(ctx,MOV,PEBP,PESP);
int_val arg = (int_val)USTR("Null access");
call_native_consts(ctx, jit_fail, &arg, 1);
}
static void jit_null_fail( int fhash ) {
vbyte *field = hl_field_name(fhash);
hl_buffer *b = hl_alloc_buffer();
hl_buffer_str(b, USTR("Null access ."));
hl_buffer_str(b, (uchar*)field);
vdynamic *d = hl_alloc_dynamic(&hlt_bytes);
d->v.ptr = hl_buffer_content(b,NULL);
hl_throw(d);
}
static void jit_null_field_access( jit_ctx *ctx ) {
preg p;
op64(ctx,PUSH,PEBP,UNUSED);
op64(ctx,MOV,PEBP,PESP);
int size = begin_native_call(ctx, 1);
int args_pos = (IS_WINCALL64 ? 32 : 0) + HL_WSIZE*2;
set_native_arg(ctx, pmem(&p,Ebp,args_pos));
call_native(ctx,jit_null_fail,size);
}
static void jit_assert( jit_ctx *ctx ) {
op64(ctx,PUSH,PEBP,UNUSED);
op64(ctx,MOV,PEBP,PESP);
int_val arg = 0;
call_native_consts(ctx, jit_fail, &arg, 1);
}
static int jit_build( jit_ctx *ctx, void (*fbuild)( jit_ctx *) ) {
int pos;
jit_buf(ctx);
jit_nops(ctx);
pos = BUF_POS();
fbuild(ctx);
jit_nops(ctx);
return pos;
}
static void hl_jit_init_module( jit_ctx *ctx, hl_module *m ) {
int i;
ctx->m = m;
if( m->code->hasdebug ) {
ctx->debug = (hl_debug_infos*)malloc(sizeof(hl_debug_infos) * m->code->nfunctions);
memset(ctx->debug, -1, sizeof(hl_debug_infos) * m->code->nfunctions);
}
for(i=0;i<m->code->nfloats;i++) {
jit_buf(ctx);
*ctx->buf.d++ = m->code->floats[i];
}
}
void hl_jit_init( jit_ctx *ctx, hl_module *m ) {
hl_jit_init_module(ctx,m);
ctx->c2hl = jit_build(ctx, jit_c2hl);
ctx->hl2c = jit_build(ctx, jit_hl2c);
# ifdef JIT_CUSTOM_LONGJUMP
ctx->longjump = jit_build(ctx, jit_longjump);
# endif
ctx->static_functions[0] = (void*)(int_val)jit_build(ctx,jit_null_access);
ctx->static_functions[1] = (void*)(int_val)jit_build(ctx,jit_assert);
ctx->static_functions[2] = (void*)(int_val)jit_build(ctx,jit_null_field_access);
}
void hl_jit_reset( jit_ctx *ctx, hl_module *m ) {
ctx->debug = NULL;
hl_jit_init_module(ctx,m);
}
static void *get_dyncast( hl_type *t ) {
switch( t->kind ) {
case HF32:
return hl_dyn_castf;
case HF64:
return hl_dyn_castd;
case HI64:
return hl_dyn_casti64;
case HI32:
case HUI16:
case HUI8:
case HBOOL:
return hl_dyn_casti;
default:
return hl_dyn_castp;
}
}
static void *get_dynset( hl_type *t ) {
switch( t->kind ) {
case HF32:
return hl_dyn_setf;
case HF64:
return hl_dyn_setd;
case HI64:
return hl_dyn_seti64;
case HI32:
case HUI16:
case HUI8:
case HBOOL:
return hl_dyn_seti;
default:
return hl_dyn_setp;
}
}
static void *get_dynget( hl_type *t ) {
switch( t->kind ) {
case HF32:
return hl_dyn_getf;
case HF64:
return hl_dyn_getd;
case HI64:
return hl_dyn_geti64;
case HI32:
case HUI16:
case HUI8:
case HBOOL:
return hl_dyn_geti;
default:
return hl_dyn_getp;
}
}
static double uint_to_double( unsigned int v ) {
return v;
}
static vclosure *alloc_static_closure( jit_ctx *ctx, int fid ) {
hl_module *m = ctx->m;
vclosure *c = hl_malloc(&m->ctx.alloc,sizeof(vclosure));
int fidx = m->functions_indexes[fid];
c->hasValue = 0;
if( fidx >= m->code->nfunctions ) {
// native
c->t = m->code->natives[fidx - m->code->nfunctions].t;
c->fun = m->functions_ptrs[fid];
c->value = NULL;
} else {
c->t = m->code->functions[fidx].type;
c->fun = (void*)(int_val)fid;
c->value = ctx->closure_list;
ctx->closure_list = c;
}
return c;
}
static void make_dyn_cast( jit_ctx *ctx, vreg *dst, vreg *v ) {
int size;
preg p;
preg *tmp;
if( v->t->kind == HNULL && v->t->tparam->kind == dst->t->kind ) {
int jnull, jend;
preg *out;
switch( dst->t->kind ) {
case HUI8:
case HUI16:
case HI32:
case HBOOL:
case HI64:
tmp = alloc_cpu(ctx, v, true);
op64(ctx, TEST, tmp, tmp);
XJump_small(JZero, jnull);
op64(ctx, MOV, tmp, pmem(&p,tmp->id,8));
XJump_small(JAlways, jend);
patch_jump(ctx, jnull);
op64(ctx, XOR, tmp, tmp);
patch_jump(ctx, jend);
store(ctx, dst, tmp, true);
return;
case HF32:
case HF64:
tmp = alloc_cpu(ctx, v, true);
out = alloc_fpu(ctx, dst, false);
op64(ctx, TEST, tmp, tmp);
XJump_small(JZero, jnull);
op64(ctx, dst->t->kind == HF32 ? MOVSS : MOVSD, out, pmem(&p,tmp->id,8));
XJump_small(JAlways, jend);
patch_jump(ctx, jnull);
op64(ctx, XORPD, out, out);
patch_jump(ctx, jend);
store(ctx, dst, out, true);
return;
default:
break;
}
}
switch( dst->t->kind ) {
case HF32:
case HF64:
case HI64:
size = begin_native_call(ctx, 2);
set_native_arg(ctx, pconst64(&p,(int_val)v->t));
break;
default:
size = begin_native_call(ctx, 3);
set_native_arg(ctx, pconst64(&p,(int_val)dst->t));
set_native_arg(ctx, pconst64(&p,(int_val)v->t));
break;
}
tmp = alloc_native_arg(ctx);
op64(ctx,MOV,tmp,REG_AT(Ebp));
if( v->stackPos >= 0 )
op64(ctx,ADD,tmp,pconst(&p,v->stackPos));
else
op64(ctx,SUB,tmp,pconst(&p,-v->stackPos));
set_native_arg(ctx,tmp);
call_native(ctx,get_dyncast(dst->t),size);
store_result(ctx, dst);
}
int hl_jit_function( jit_ctx *ctx, hl_module *m, hl_function *f ) {
int i, size = 0, opCount;
int codePos = BUF_POS();
int nargs = f->type->fun->nargs;
unsigned short *debug16 = NULL;
int *debug32 = NULL;
call_regs cregs = {0};
hl_thread_info *tinf = NULL;
preg p;
ctx->f = f;
ctx->allocOffset = 0;
if( f->nregs > ctx->maxRegs ) {
free(ctx->vregs);
ctx->vregs = (vreg*)malloc(sizeof(vreg) * (f->nregs + 1));
if( ctx->vregs == NULL ) {
ctx->maxRegs = 0;
return -1;
}
ctx->maxRegs = f->nregs;
}
if( f->nops > ctx->maxOps ) {
free(ctx->opsPos);
ctx->opsPos = (int*)malloc(sizeof(int) * (f->nops + 1));
if( ctx->opsPos == NULL ) {
ctx->maxOps = 0;
return -1;
}
ctx->maxOps = f->nops;
}
memset(ctx->opsPos,0,(f->nops+1)*sizeof(int));
for(i=0;i<f->nregs;i++) {
vreg *r = R(i);
r->t = f->regs[i];
r->size = hl_type_size(r->t);
r->current = NULL;
r->stack.holds = NULL;
r->stack.id = i;
r->stack.kind = RSTACK;
}
size = 0;
int argsSize = 0;
for(i=0;i<nargs;i++) {
vreg *r = R(i);
int creg = select_call_reg(&cregs,r->t,i);
if( creg < 0 || IS_WINCALL64 ) {
// use existing stack storage
r->stackPos = argsSize + HL_WSIZE * 2;
argsSize += stack_size(r->t);
} else {
// make room in local vars
size += r->size;
size += hl_pad_size(size,r->t);
r->stackPos = -size;
}
}
for(i=nargs;i<f->nregs;i++) {
vreg *r = R(i);
size += r->size;
size += hl_pad_size(size,r->t); // align local vars
r->stackPos = -size;
}
# ifdef HL_64
size += (-size) & 15; // align on 16 bytes
# else
size += hl_pad_size(size,&hlt_dyn); // align on word size
# endif
ctx->totalRegsSize = size;
jit_buf(ctx);
ctx->functionPos = BUF_POS();
op_enter(ctx);
# ifdef HL_64
{
// store in local var
for(i=0;i<nargs;i++) {
vreg *r = R(i);
preg *p;
int reg = mapped_reg(&cregs, i);
if( reg < 0 ) continue;
p = REG_AT(reg);
copy(ctx,fetch(r),p,r->size);
p->holds = r;
r->current = p;
}
}
# endif
if( ctx->m->code->hasdebug ) {
debug16 = (unsigned short*)malloc(sizeof(unsigned short) * (f->nops + 1));
debug16[0] = (unsigned short)(BUF_POS() - codePos);
}
ctx->opsPos[0] = BUF_POS();
for(opCount=0;opCount<f->nops;opCount++) {
int jump;
hl_opcode *o = f->ops + opCount;
vreg *dst = R(o->p1);
vreg *ra = R(o->p2);
vreg *rb = R(o->p3);
ctx->currentPos = opCount + 1;
jit_buf(ctx);
# ifdef JIT_DEBUG
{
int uid = opCount + (f->findex<<16);
op32(ctx, PUSH, pconst(&p,uid), UNUSED);
op64(ctx, ADD, PESP, pconst(&p,HL_WSIZE));
}
# endif
// emit code
switch( o->op ) {
case OMov:
case OUnsafeCast:
op_mov(ctx, dst, ra);
break;
case OInt:
store_const(ctx, dst, m->code->ints[o->p2]);
break;
case OBool:
store_const(ctx, dst, o->p2);
break;
case OGetGlobal:
{
void *addr = m->globals_data + m->globals_indexes[o->p2];
# ifdef HL_64
preg *tmp = alloc_reg(ctx, RCPU);
op64(ctx, MOV, tmp, pconst64(&p,(int_val)addr));
copy_to(ctx, dst, pmem(&p,tmp->id,0));
# else
copy_to(ctx, dst, paddr(&p,addr));
# endif
}
break;
case OSetGlobal:
{
void *addr = m->globals_data + m->globals_indexes[o->p1];
# ifdef HL_64
preg *tmp = alloc_reg(ctx, RCPU);
op64(ctx, MOV, tmp, pconst64(&p,(int_val)addr));
copy_from(ctx, pmem(&p,tmp->id,0), ra);
# else
copy_from(ctx, paddr(&p,addr), ra);
# endif
}
break;
case OCall3:
{
int args[3] = { o->p3, o->extra[0], o->extra[1] };
op_call_fun(ctx, dst, o->p2, 3, args);
}
break;
case OCall4:
{
int args[4] = { o->p3, o->extra[0], o->extra[1], o->extra[2] };
op_call_fun(ctx, dst, o->p2, 4, args);
}
break;
case OCallN:
op_call_fun(ctx, dst, o->p2, o->p3, o->extra);
break;
case OCall0:
op_call_fun(ctx, dst, o->p2, 0, NULL);
break;
case OCall1:
op_call_fun(ctx, dst, o->p2, 1, &o->p3);
break;
case OCall2:
{
int args[2] = { o->p3, (int)(int_val)o->extra };
op_call_fun(ctx, dst, o->p2, 2, args);
}
break;
case OSub:
case OAdd:
case OMul:
case OSDiv:
case OUDiv:
case OShl:
case OSShr:
case OUShr:
case OAnd:
case OOr:
case OXor:
case OSMod:
case OUMod:
op_binop(ctx, dst, ra, rb, o->op);
break;
case ONeg:
{
if( IS_FLOAT(ra) ) {
preg *pa = alloc_reg(ctx,RFPU);
preg *pb = alloc_fpu(ctx,ra,true);
op64(ctx,XORPD,pa,pa);
op64(ctx,ra->t->kind == HF32 ? SUBSS : SUBSD,pa,pb);
store(ctx,dst,pa,true);
} else if( ra->t->kind == HI64 ) {
# ifdef HL_64
preg *pa = alloc_reg(ctx,RCPU);
preg *pb = alloc_cpu(ctx,ra,true);
op64(ctx,XOR,pa,pa);
op64(ctx,SUB,pa,pb);
store(ctx,dst,pa,true);
# else
error_i64();
# endif
} else {
preg *pa = alloc_reg(ctx,RCPU);
preg *pb = alloc_cpu(ctx,ra,true);
op32(ctx,XOR,pa,pa);
op32(ctx,SUB,pa,pb);
store(ctx,dst,pa,true);
}
}
break;
case ONot:
{
preg *v = alloc_cpu(ctx,ra,true);
op32(ctx,XOR,v,pconst(&p,1));
store(ctx,dst,v,true);
}
break;
case OJFalse:
case OJTrue:
case OJNotNull:
case OJNull:
{
preg *r = dst->t->kind == HBOOL ? alloc_cpu8(ctx, dst, true) : alloc_cpu(ctx, dst, true);
op64(ctx, dst->t->kind == HBOOL ? TEST8 : TEST, r, r);
XJump( o->op == OJFalse || o->op == OJNull ? JZero : JNotZero,jump);
register_jump(ctx,jump,(opCount + 1) + o->p2);
}
break;
case OJEq:
case OJNotEq:
case OJSLt:
case OJSGte:
case OJSLte:
case OJSGt:
case OJULt:
case OJUGte:
case OJNotLt:
case OJNotGte:
op_jump(ctx,dst,ra,o,(opCount + 1) + o->p3);
break;
case OJAlways:
jump = do_jump(ctx,o->op,false);
register_jump(ctx,jump,(opCount + 1) + o->p1);
break;
case OToDyn:
if( ra->t->kind == HBOOL ) {
int size = begin_native_call(ctx, 1);
set_native_arg(ctx, fetch(ra));
call_native(ctx, hl_alloc_dynbool, size);
store(ctx, dst, PEAX, true);
} else {
int_val rt = (int_val)ra->t;
int jskip = 0;
if( hl_is_ptr(ra->t) ) {
int jnz;
preg *a = alloc_cpu(ctx,ra,true);
op64(ctx,TEST,a,a);
XJump_small(JNotZero,jnz);
op64(ctx,XOR,PEAX,PEAX); // will replace the result of alloc_dynamic at jump land
XJump_small(JAlways,jskip);
patch_jump(ctx,jnz);
}
call_native_consts(ctx, hl_alloc_dynamic, &rt, 1);
// copy value to dynamic
if( (IS_FLOAT(ra) || ra->size == 8) && !IS_64 ) {
preg *tmp = REG_AT(RCPU_SCRATCH_REGS[1]);
op64(ctx,MOV,tmp,&ra->stack);
op32(ctx,MOV,pmem(&p,Eax,HDYN_VALUE),tmp);
if( ra->t->kind == HF64 ) {
ra->stackPos += 4;
op64(ctx,MOV,tmp,&ra->stack);
op32(ctx,MOV,pmem(&p,Eax,HDYN_VALUE+4),tmp);
ra->stackPos -= 4;
}
} else {
preg *tmp = REG_AT(RCPU_SCRATCH_REGS[1]);
copy_from(ctx,tmp,ra);
op64(ctx,MOV,pmem(&p,Eax,HDYN_VALUE),tmp);
}
if( hl_is_ptr(ra->t) ) patch_jump(ctx,jskip);
store(ctx, dst, PEAX, true);
}
break;
case OToSFloat:
if( ra == dst ) break;
if( ra->t->kind == HI32 || ra->t->kind == HUI16 || ra->t->kind == HUI8 ) {
preg *r = alloc_cpu(ctx,ra,true);
preg *w = alloc_fpu(ctx,dst,false);
op32(ctx,dst->t->kind == HF64 ? CVTSI2SD : CVTSI2SS,w,r);
store(ctx, dst, w, true);
} else if( ra->t->kind == HF64 && dst->t->kind == HF32 ) {
preg *r = alloc_fpu(ctx,ra,true);
preg *w = alloc_fpu(ctx,dst,false);
op32(ctx,CVTSD2SS,w,r);
store(ctx, dst, w, true);
} else if( ra->t->kind == HF32 && dst->t->kind == HF64 ) {
preg *r = alloc_fpu(ctx,ra,true);
preg *w = alloc_fpu(ctx,dst,false);
op32(ctx,CVTSS2SD,w,r);
store(ctx, dst, w, true);
} else
ASSERT(0);
break;
case OToUFloat:
{
int size;
size = prepare_call_args(ctx,1,&o->p2,ctx->vregs,0);
call_native(ctx,uint_to_double,size);
store_result(ctx,dst);
}
break;
case OToInt:
if( ra == dst ) break;
if( ra->t->kind == HF64 ) {
preg *r = alloc_fpu(ctx,ra,true);
preg *w = alloc_cpu(ctx,dst,false);
preg *tmp = alloc_reg(ctx,RCPU);
op32(ctx,STMXCSR,pmem(&p,Esp,-4),UNUSED);
op32(ctx,MOV,tmp,&p);
op32(ctx,OR,tmp,pconst(&p,0x6000)); // set round towards 0
op32(ctx,MOV,pmem(&p,Esp,-8),tmp);
op32(ctx,LDMXCSR,&p,UNUSED);
op32(ctx,CVTSD2SI,w,r);
op32(ctx,LDMXCSR,pmem(&p,Esp,-4),UNUSED);
store(ctx, dst, w, true);
} else if (ra->t->kind == HF32) {
preg *r = alloc_fpu(ctx, ra, true);
preg *w = alloc_cpu(ctx, dst, false);
preg *tmp = alloc_reg(ctx, RCPU);
op32(ctx, STMXCSR, pmem(&p, Esp, -4), UNUSED);
op32(ctx, MOV, tmp, &p);
op32(ctx, OR, tmp, pconst(&p, 0x6000)); // set round towards 0
op32(ctx, MOV, pmem(&p, Esp, -8), tmp);
op32(ctx, LDMXCSR, &p, UNUSED);
op32(ctx, CVTSS2SI, w, r);
op32(ctx, LDMXCSR, pmem(&p, Esp, -4), UNUSED);
store(ctx, dst, w, true);
} else if( dst->t->kind == HI64 && ra->t->kind == HI32 ) {
if( ra->current != PEAX ) {
op32(ctx, MOV, PEAX, fetch(ra));
scratch(PEAX);
}
# ifdef HL_64
op64(ctx, CDQE, UNUSED, UNUSED); // sign-extend Eax into Rax
store(ctx, dst, PEAX, true);
# else
op32(ctx, CDQ, UNUSED, UNUSED); // sign-extend Eax into Eax:Edx
scratch(REG_AT(Edx));
op32(ctx, MOV, fetch(dst), PEAX);
dst->stackPos += 4;
op32(ctx, MOV, fetch(dst), REG_AT(Edx));
dst->stackPos -= 4;
} else if( dst->t->kind == HI32 && ra->t->kind == HI64 ) {
error_i64();
# endif
} else {
preg *r = alloc_cpu(ctx,dst,false);
copy_from(ctx, r, ra);
store(ctx, dst, r, true);
}
break;
case ORet:
op_ret(ctx, dst);
break;
case OIncr:
{
if( IS_FLOAT(dst) ) {
ASSERT(0);
} else {
preg *v = fetch32(ctx,dst);
op32(ctx,INC,v,UNUSED);
if( v->kind != RSTACK ) store(ctx, dst, v, false);
}
}
break;
case ODecr:
{
if( IS_FLOAT(dst) ) {
ASSERT(0);
} else {
preg *v = fetch32(ctx,dst);
op32(ctx,DEC,v,UNUSED);
if( v->kind != RSTACK ) store(ctx, dst, v, false);
}
}
break;
case OFloat:
{
if( m->code->floats[o->p2] == 0 ) {
preg *f = alloc_fpu(ctx,dst,false);
op64(ctx,XORPD,f,f);
} else switch( dst->t->kind ) {
case HF64:
case HF32:
# ifdef HL_64
op64(ctx,dst->t->kind == HF32 ? MOVSS : MOVSD,alloc_fpu(ctx,dst,false),pcodeaddr(&p,o->p2 * 8));
# else
op64(ctx,dst->t->kind == HF32 ? MOVSS : MOVSD,alloc_fpu(ctx,dst,false),paddr(&p,m->code->floats + o->p2));
# endif
break;
default:
ASSERT(dst->t->kind);
}
store(ctx,dst,dst->current,false);
}
break;
case OString:
op64(ctx,MOV,alloc_cpu(ctx, dst, false),pconst64(&p,(int_val)hl_get_ustring(m->code,o->p2)));
store(ctx,dst,dst->current,false);
break;
case OBytes:
{
char *b = m->code->version >= 5 ? m->code->bytes + m->code->bytes_pos[o->p2] : m->code->strings[o->p2];
op64(ctx,MOV,alloc_cpu(ctx,dst,false),pconst64(&p,(int_val)b));
store(ctx,dst,dst->current,false);
}
break;
case ONull:
{
op64(ctx,XOR,alloc_cpu(ctx, dst, false),alloc_cpu(ctx, dst, false));
store(ctx,dst,dst->current,false);
}
break;
case ONew:
{
int_val args[] = { (int_val)dst->t };
void *allocFun;
int nargs = 1;
switch( dst->t->kind ) {
case HOBJ:
case HSTRUCT:
allocFun = hl_alloc_obj;
break;
case HDYNOBJ:
allocFun = hl_alloc_dynobj;
nargs = 0;
break;
case HVIRTUAL:
allocFun = hl_alloc_virtual;
break;
default:
ASSERT(dst->t->kind);
}
call_native_consts(ctx, allocFun, args, nargs);
store(ctx, dst, PEAX, true);
}
break;
case OInstanceClosure:
{
preg *r = alloc_cpu(ctx, rb, true);
jlist *j = (jlist*)hl_malloc(&ctx->galloc,sizeof(jlist));
int size = begin_native_call(ctx,3);
set_native_arg(ctx,r);
j->pos = BUF_POS();
j->target = o->p2;
j->next = ctx->calls;
ctx->calls = j;
set_native_arg(ctx,pconst64(&p,RESERVE_ADDRESS));
set_native_arg(ctx,pconst64(&p,(int_val)m->code->functions[m->functions_indexes[o->p2]].type));
call_native(ctx,hl_alloc_closure_ptr,size);
store(ctx,dst,PEAX,true);
}
break;
case OVirtualClosure:
{
int size, i;
preg *r = alloc_cpu_call(ctx, ra);
hl_type *t = NULL;
hl_type *ot = ra->t;
while( t == NULL ) {
for(i=0;i<ot->obj->nproto;i++) {
hl_obj_proto *pp = ot->obj->proto + i;
if( pp->pindex == o->p3 ) {
t = m->code->functions[m->functions_indexes[pp->findex]].type;
break;
}
}
ot = ot->obj->super;
}
size = begin_native_call(ctx,3);
set_native_arg(ctx,r);
// read r->type->vobj_proto[i] for function address
op64(ctx,MOV,r,pmem(&p,r->id,0));
op64(ctx,MOV,r,pmem(&p,r->id,HL_WSIZE*2));
op64(ctx,MOV,r,pmem(&p,r->id,HL_WSIZE*o->p3));
set_native_arg(ctx,r);
op64(ctx,MOV,r,pconst64(&p,(int_val)t));
set_native_arg(ctx,r);
call_native(ctx,hl_alloc_closure_ptr,size);
store(ctx,dst,PEAX,true);
}
break;
case OCallClosure:
if( ra->t->kind == HDYN ) {
// ASM for {
// vdynamic *args[] = {args};
// vdynamic *ret = hl_dyn_call(closure,args,nargs);
// dst = hl_dyncast(ret,t_dynamic,t_dst);
// }
int offset = o->p3 * HL_WSIZE;
preg *r = alloc_reg(ctx, RCPU_CALL);
if( offset & 15 ) offset += 16 - (offset & 15);
op64(ctx,SUB,PESP,pconst(&p,offset));
op64(ctx,MOV,r,PESP);
for(i=0;i<o->p3;i++) {
vreg *a = R(o->extra[i]);
if( !hl_is_dynamic(a->t) ) ASSERT(0);
preg *v = alloc_cpu(ctx,a,true);
op64(ctx,MOV,pmem(&p,r->id,i * HL_WSIZE),v);
RUNLOCK(v);
}
# ifdef HL_64
int size = begin_native_call(ctx, 3) + offset;
set_native_arg(ctx, pconst(&p,o->p3));
set_native_arg(ctx, r);
set_native_arg(ctx, fetch(ra));
# else
int size = pad_before_call(ctx,HL_WSIZE*2 + sizeof(int) + offset);
op64(ctx,PUSH,pconst(&p,o->p3),UNUSED);
op64(ctx,PUSH,r,UNUSED);
op64(ctx,PUSH,alloc_cpu(ctx,ra,true),UNUSED);
# endif
call_native(ctx,hl_dyn_call,size);
if( dst->t->kind != HVOID ) {
store(ctx,dst,PEAX,true);
make_dyn_cast(ctx,dst,dst);
}
} else {
int jhasvalue, jend, size;
// ASM for if( c->hasValue ) c->fun(value,args) else c->fun(args)
preg *r = alloc_cpu(ctx,ra,true);
preg *tmp = alloc_reg(ctx, RCPU);
op32(ctx,MOV,tmp,pmem(&p,r->id,HL_WSIZE*2));
op32(ctx,TEST,tmp,tmp);
scratch(tmp);
XJump_small(JNotZero,jhasvalue);
save_regs(ctx);
size = prepare_call_args(ctx,o->p3,o->extra,ctx->vregs,0);
preg *rr = r;
if( rr->holds != ra ) rr = alloc_cpu(ctx, ra, true);
op_call(ctx, pmem(&p,rr->id,HL_WSIZE), size);
XJump_small(JAlways,jend);
patch_jump(ctx,jhasvalue);
restore_regs(ctx);
# ifdef HL_64
{
int regids[64];
preg *pc = REG_AT(CALL_REGS[0]);
vreg *sc = R(f->nregs); // scratch register that we temporary rebind
if( o->p3 >= 63 ) jit_error("assert");
memcpy(regids + 1, o->extra, o->p3 * sizeof(int));
regids[0] = f->nregs;
sc->size = HL_WSIZE;
sc->t = &hlt_dyn;
op64(ctx, MOV, pc, pmem(&p,r->id,HL_WSIZE*3));
scratch(pc);
sc->current = pc;
pc->holds = sc;
size = prepare_call_args(ctx,o->p3 + 1,regids,ctx->vregs,0);
if( r->holds != ra ) r = alloc_cpu(ctx, ra, true);
}
# else
size = prepare_call_args(ctx,o->p3,o->extra,ctx->vregs,HL_WSIZE);
if( r->holds != ra ) r = alloc_cpu(ctx, ra, true);
op64(ctx, PUSH,pmem(&p,r->id,HL_WSIZE*3),UNUSED); // push closure value
# endif
op_call(ctx, pmem(&p,r->id,HL_WSIZE), size);
discard_regs(ctx,false);
patch_jump(ctx,jend);
store_result(ctx, dst);
}
break;
case OStaticClosure:
{
vclosure *c = alloc_static_closure(ctx,o->p2);
preg *r = alloc_reg(ctx, RCPU);
op64(ctx, MOV, r, pconst64(&p,(int_val)c));
store(ctx,dst,r,true);
}
break;
case OField:
{
# ifndef HL_64
if( dst->t->kind == HI64 ) {
error_i64();
break;
}
# endif
switch( ra->t->kind ) {
case HOBJ:
case HSTRUCT:
{
hl_runtime_obj *rt = hl_get_obj_rt(ra->t);
preg *rr = alloc_cpu(ctx,ra, true);
if( dst->t->kind == HSTRUCT ) {
hl_type *ft = hl_obj_field_fetch(ra->t,o->p3)->t;
if( ft->kind == HPACKED ) {
preg *r = alloc_reg(ctx,RCPU);
op64(ctx,LEA,r,pmem(&p,(CpuReg)rr->id,rt->fields_indexes[o->p3]));
store(ctx,dst,r,true);
break;
}
}
copy_to(ctx,dst,pmem(&p, (CpuReg)rr->id, rt->fields_indexes[o->p3]));
}
break;
case HVIRTUAL:
// ASM for --> if( hl_vfields(o)[f] ) r = *hl_vfields(o)[f]; else r = hl_dyn_get(o,hash(field),vt)
{
int jhasfield, jend, size;
bool need_type = !(IS_FLOAT(dst) || dst->t->kind == HI64);
preg *v = alloc_cpu_call(ctx,ra);
preg *r = alloc_reg(ctx,RCPU);
op64(ctx,MOV,r,pmem(&p,v->id,sizeof(vvirtual)+HL_WSIZE*o->p3));
op64(ctx,TEST,r,r);
XJump_small(JNotZero,jhasfield);
size = begin_native_call(ctx, need_type ? 3 : 2);
if( need_type ) set_native_arg(ctx,pconst64(&p,(int_val)dst->t));
set_native_arg(ctx,pconst64(&p,(int_val)ra->t->virt->fields[o->p3].hashed_name));
set_native_arg(ctx,v);
call_native(ctx,get_dynget(dst->t),size);
store_result(ctx,dst);
XJump_small(JAlways,jend);
patch_jump(ctx,jhasfield);
copy_to(ctx, dst, pmem(&p,(CpuReg)r->id,0));
patch_jump(ctx,jend);
scratch(dst->current);
}
break;
default:
ASSERT(ra->t->kind);
break;
}
}
break;
case OSetField:
{
switch( dst->t->kind ) {
case HOBJ:
case HSTRUCT:
{
hl_runtime_obj *rt = hl_get_obj_rt(dst->t);
preg *rr = alloc_cpu(ctx, dst, true);
copy_from(ctx, pmem(&p, (CpuReg)rr->id, rt->fields_indexes[o->p2]), rb);
}
break;
case HVIRTUAL:
// ASM for --> if( hl_vfields(o)[f] ) *hl_vfields(o)[f] = v; else hl_dyn_set(o,hash(field),vt,v)
{
int jhasfield, jend;
preg *obj = alloc_cpu_call(ctx,dst);
preg *r = alloc_reg(ctx,RCPU);
op64(ctx,MOV,r,pmem(&p,obj->id,sizeof(vvirtual)+HL_WSIZE*o->p2));
op64(ctx,TEST,r,r);
XJump_small(JNotZero,jhasfield);
# ifdef HL_64
switch( rb->t->kind ) {
case HF64:
case HF32:
size = begin_native_call(ctx,3);
set_native_arg_fpu(ctx, fetch(rb), rb->t->kind == HF32);
break;
case HI64:
size = begin_native_call(ctx,3);
set_native_arg(ctx, fetch(rb));
break;
default:
size = begin_native_call(ctx, 4);
set_native_arg(ctx, fetch(rb));
set_native_arg(ctx, pconst64(&p,(int_val)rb->t));
break;
}
set_native_arg(ctx,pconst(&p,dst->t->virt->fields[o->p2].hashed_name));
set_native_arg(ctx,obj);
# else
switch( rb->t->kind ) {
case HF64:
case HI64:
size = pad_before_call(ctx,HL_WSIZE*2 + sizeof(double));
push_reg(ctx,rb);
break;
case HF32:
size = pad_before_call(ctx,HL_WSIZE*2 + sizeof(float));
push_reg(ctx,rb);
break;
default:
size = pad_before_call(ctx,HL_WSIZE*4);
op64(ctx,PUSH,fetch32(ctx,rb),UNUSED);
op64(ctx,MOV,r,pconst64(&p,(int_val)rb->t));
op64(ctx,PUSH,r,UNUSED);
break;
}
op32(ctx,MOV,r,pconst(&p,dst->t->virt->fields[o->p2].hashed_name));
op64(ctx,PUSH,r,UNUSED);
op64(ctx,PUSH,obj,UNUSED);
# endif
call_native(ctx,get_dynset(rb->t),size);
XJump_small(JAlways,jend);
patch_jump(ctx,jhasfield);
copy_from(ctx, pmem(&p,(CpuReg)r->id,0), rb);
patch_jump(ctx,jend);
scratch(rb->current);
}
break;
default:
ASSERT(dst->t->kind);
break;
}
}
break;
case OGetThis:
{
vreg *r = R(0);
hl_runtime_obj *rt = hl_get_obj_rt(r->t);
preg *rr = alloc_cpu(ctx,r, true);
if( dst->t->kind == HSTRUCT ) {
hl_type *ft = hl_obj_field_fetch(r->t,o->p2)->t;
if( ft->kind == HPACKED ) {
preg *r = alloc_reg(ctx,RCPU);
op64(ctx,LEA,r,pmem(&p,(CpuReg)rr->id,rt->fields_indexes[o->p2]));
store(ctx,dst,r,true);
break;
}
}
copy_to(ctx,dst,pmem(&p, (CpuReg)rr->id, rt->fields_indexes[o->p2]));
}
break;
case OSetThis:
{
vreg *r = R(0);
hl_runtime_obj *rt = hl_get_obj_rt(r->t);
preg *rr = alloc_cpu(ctx, r, true);
copy_from(ctx, pmem(&p, (CpuReg)rr->id, rt->fields_indexes[o->p1]), ra);
}
break;
case OCallThis:
{
int nargs = o->p3 + 1;
int *args = (int*)hl_malloc(&ctx->falloc,sizeof(int) * nargs);
int size;
preg *r = alloc_cpu(ctx, R(0), true);
preg *tmp;
tmp = alloc_reg(ctx, RCPU_CALL);
op64(ctx,MOV,tmp,pmem(&p,r->id,0)); // read type
op64(ctx,MOV,tmp,pmem(&p,tmp->id,HL_WSIZE*2)); // read proto
args[0] = 0;
for(i=1;i<nargs;i++)
args[i] = o->extra[i-1];
size = prepare_call_args(ctx,nargs,args,ctx->vregs,0);
op_call(ctx,pmem(&p,tmp->id,o->p2*HL_WSIZE),size);
discard_regs(ctx, false);
store_result(ctx, dst);
}
break;
case OCallMethod:
switch( R(o->extra[0])->t->kind ) {
case HOBJ: {
int size;
preg *r = alloc_cpu(ctx, R(o->extra[0]), true);
preg *tmp;
tmp = alloc_reg(ctx, RCPU_CALL);
op64(ctx,MOV,tmp,pmem(&p,r->id,0)); // read type
op64(ctx,MOV,tmp,pmem(&p,tmp->id,HL_WSIZE*2)); // read proto
size = prepare_call_args(ctx,o->p3,o->extra,ctx->vregs,0);
op_call(ctx,pmem(&p,tmp->id,o->p2*HL_WSIZE),size);
discard_regs(ctx, false);
store_result(ctx, dst);
break;
}
case HVIRTUAL:
// ASM for --> if( hl_vfields(o)[f] ) dst = *hl_vfields(o)[f](o->value,args...); else dst = hl_dyn_call_obj(o->value,field,args,&ret)
{
int size;
int paramsSize;
int jhasfield, jend;
bool need_dyn;
vreg *obj = R(o->extra[0]);
preg *v = alloc_cpu_call(ctx,obj);
preg *r = alloc_reg(ctx,RCPU_CALL);
op64(ctx,MOV,r,pmem(&p,v->id,sizeof(vvirtual)+HL_WSIZE*o->p2));
op64(ctx,TEST,r,r);
save_regs(ctx);
if( o->p3 < 6 ) {
XJump_small(JNotZero,jhasfield);
} else {
XJump(JNotZero,jhasfield);
}
need_dyn = !hl_is_ptr(dst->t) && dst->t->kind != HVOID;
paramsSize = (o->p3 - 1) * HL_WSIZE;
if( need_dyn ) paramsSize += sizeof(vdynamic);
if( paramsSize & 15 ) paramsSize += 16 - (paramsSize&15);
op64(ctx,SUB,PESP,pconst(&p,paramsSize));
op64(ctx,MOV,r,PESP);
for(i=0;i<o->p3-1;i++) {
vreg *a = R(o->extra[i+1]);
if( hl_is_ptr(a->t) ) {
op64(ctx,MOV,pmem(&p,r->id,i*HL_WSIZE),alloc_cpu(ctx,a,true));
if( a->current != v ) RUNLOCK(a->current);
} else {
preg *r2 = alloc_reg(ctx,RCPU);
op64(ctx,LEA,r2,&a->stack);
op64(ctx,MOV,pmem(&p,r->id,i*HL_WSIZE),r2);
if( r2 != v ) RUNLOCK(r2);
}
}
jit_buf(ctx);
if( !need_dyn ) {
size = begin_native_call(ctx, 5);
set_native_arg(ctx, pconst(&p,0));
} else {
preg *rtmp = alloc_reg(ctx,RCPU);
op64(ctx,LEA,rtmp,pmem(&p,Esp,paramsSize - sizeof(vdynamic)));
size = begin_native_call(ctx, 5);
set_native_arg(ctx,rtmp);
if( !IS_64 ) RUNLOCK(rtmp);
}
set_native_arg(ctx,r);
set_native_arg(ctx,pconst(&p,obj->t->virt->fields[o->p2].hashed_name)); // fid
set_native_arg(ctx,pconst64(&p,(int_val)obj->t->virt->fields[o->p2].t)); // ftype
set_native_arg(ctx,pmem(&p,v->id,HL_WSIZE)); // o->value
call_native(ctx,hl_dyn_call_obj,size + paramsSize);
if( need_dyn ) {
preg *r = IS_FLOAT(dst) ? REG_AT(XMM(0)) : PEAX;
copy(ctx,r,pmem(&p,Esp,HDYN_VALUE - (int)sizeof(vdynamic)),dst->size);
store(ctx, dst, r, false);
} else
store(ctx, dst, PEAX, false);
XJump_small(JAlways,jend);
patch_jump(ctx,jhasfield);
restore_regs(ctx);
/*
o = o->value hack
*/
if( v->holds ) v->holds->current = NULL;
obj->current = v;
v->holds = obj;
op64(ctx,MOV,v,pmem(&p,v->id,HL_WSIZE));
size = prepare_call_args(ctx,o->p3,o->extra,ctx->vregs,0);
op_call(ctx,r,size);
discard_regs(ctx, false);
store_result(ctx, dst);
patch_jump(ctx,jend);
}
break;
default:
ASSERT(0);
break;
}
break;
case ORethrow:
{
int size = prepare_call_args(ctx,1,&o->p1,ctx->vregs,0);
call_native(ctx,hl_rethrow,size);
}
break;
case OThrow:
{
int size = prepare_call_args(ctx,1,&o->p1,ctx->vregs,0);
call_native(ctx,hl_throw,size);
}
break;
case OLabel:
// NOP for now
discard_regs(ctx,false);
break;
case OGetI8:
case OGetI16:
{
preg *base = alloc_cpu(ctx, ra, true);
preg *offset = alloc_cpu64(ctx, rb, true);
preg *r = alloc_reg(ctx,o->op == OGetI8 ? RCPU_8BITS : RCPU);
op64(ctx,XOR,r,r);
op32(ctx, o->op == OGetI8 ? MOV8 : MOV16,r,pmem2(&p,base->id,offset->id,1,0));
store(ctx, dst, r, true);
}
break;
case OGetMem:
{
preg *base = alloc_cpu(ctx, ra, true);
preg *offset = alloc_cpu64(ctx, rb, true);
store(ctx, dst, pmem2(&p,base->id,offset->id,1,0), false);
}
break;
case OSetI8:
{
preg *base = alloc_cpu(ctx, dst, true);
preg *offset = alloc_cpu64(ctx, ra, true);
preg *value = alloc_cpu8(ctx, rb, true);
op32(ctx,MOV8,pmem2(&p,base->id,offset->id,1,0),value);
}
break;
case OSetI16:
{
preg *base = alloc_cpu(ctx, dst, true);
preg *offset = alloc_cpu64(ctx, ra, true);
preg *value = alloc_cpu(ctx, rb, true);
op32(ctx,MOV16,pmem2(&p,base->id,offset->id,1,0),value);
}
break;
case OSetMem:
{
preg *base = alloc_cpu(ctx, dst, true);
preg *offset = alloc_cpu64(ctx, ra, true);
preg *value;
switch( rb->t->kind ) {
case HI32:
value = alloc_cpu(ctx, rb, true);
op32(ctx,MOV,pmem2(&p,base->id,offset->id,1,0),value);
break;
case HF32:
value = alloc_fpu(ctx, rb, true);
op32(ctx,MOVSS,pmem2(&p,base->id,offset->id,1,0),value);
break;
case HF64:
value = alloc_fpu(ctx, rb, true);
op32(ctx,MOVSD,pmem2(&p,base->id,offset->id,1,0),value);
break;
case HI64:
value = alloc_cpu(ctx, rb, true);
op64(ctx,MOV,pmem2(&p,base->id,offset->id,1,0),value);
break;
default:
ASSERT(rb->t->kind);
break;
}
}
break;
case OType:
{
op64(ctx,MOV,alloc_cpu(ctx, dst, false),pconst64(&p,(int_val)(m->code->types + o->p2)));
store(ctx,dst,dst->current,false);
}
break;
case OGetType:
{
int jnext, jend;
preg *r = alloc_cpu(ctx, ra, true);
preg *tmp = alloc_reg(ctx, RCPU);
op64(ctx,TEST,r,r);
XJump_small(JNotZero,jnext);
op64(ctx,MOV, tmp, pconst64(&p,(int_val)&hlt_void));
XJump_small(JAlways,jend);
patch_jump(ctx,jnext);
op64(ctx, MOV, tmp, pmem(&p,r->id,0));
patch_jump(ctx,jend);
store(ctx,dst,tmp,true);
}
break;
case OGetArray:
{
preg *rdst = IS_FLOAT(dst) ? alloc_fpu(ctx,dst,false) : alloc_cpu(ctx,dst,false);
copy(ctx, rdst, pmem2(&p,alloc_cpu(ctx,ra,true)->id,alloc_cpu64(ctx,rb,true)->id,hl_type_size(dst->t),sizeof(varray)), dst->size);
store(ctx,dst,dst->current,false);
}
break;
case OSetArray:
{
preg *rrb = IS_FLOAT(rb) ? alloc_fpu(ctx,rb,true) : alloc_cpu(ctx,rb,true);
copy(ctx, pmem2(&p,alloc_cpu(ctx,dst,true)->id,alloc_cpu64(ctx,ra,true)->id,hl_type_size(rb->t),sizeof(varray)), rrb, rb->size);
}
break;
case OArraySize:
{
op32(ctx,MOV,alloc_cpu(ctx,dst,false),pmem(&p,alloc_cpu(ctx,ra,true)->id,HL_WSIZE*2));
store(ctx,dst,dst->current,false);
}
break;
case ORef:
{
scratch(ra->current);
op64(ctx,MOV,alloc_cpu(ctx,dst,false),REG_AT(Ebp));
if( ra->stackPos < 0 )
op64(ctx,SUB,dst->current,pconst(&p,-ra->stackPos));
else
op64(ctx,ADD,dst->current,pconst(&p,ra->stackPos));
store(ctx,dst,dst->current,false);
}
break;
case OUnref:
copy_to(ctx,dst,pmem(&p,alloc_cpu(ctx,ra,true)->id,0));
break;
case OSetref:
copy_from(ctx,pmem(&p,alloc_cpu(ctx,dst,true)->id,0),ra);
break;
case ORefData:
switch( ra->t->kind ) {
case HARRAY:
{
preg *r = fetch(ra);
preg *d = alloc_cpu(ctx,dst,false);
op64(ctx,MOV,d,r);
op64(ctx,ADD,d,pconst(&p,sizeof(varray)));
store(ctx,dst,dst->current,false);
}
break;
default:
ASSERT(ra->t->kind);
}
break;
case ORefOffset:
{
preg *d = alloc_cpu(ctx,rb,true);
preg *r2 = alloc_cpu(ctx,dst,false);
preg *r = fetch(ra);
int size = hl_type_size(dst->t->tparam);
op64(ctx,MOV,r2,r);
switch( size ) {
case 1:
break;
case 2:
op64(ctx,SHL,d,pconst(&p,1));
break;
case 4:
op64(ctx,SHL,d,pconst(&p,2));
break;
case 8:
op64(ctx,SHL,d,pconst(&p,3));
break;
default:
op64(ctx,IMUL,d,pconst(&p,size));
break;
}
op64(ctx,ADD,r2,d);
scratch(d);
store(ctx,dst,dst->current,false);
}
break;
case OToVirtual:
{
# ifdef HL_64
int size = pad_before_call(ctx, 0);
op64(ctx,MOV,REG_AT(CALL_REGS[1]),fetch(ra));
op64(ctx,MOV,REG_AT(CALL_REGS[0]),pconst64(&p,(int_val)dst->t));
# else
int size = pad_before_call(ctx, HL_WSIZE*2);
op32(ctx,PUSH,fetch(ra),UNUSED);
op32(ctx,PUSH,pconst(&p,(int)(int_val)dst->t),UNUSED);
# endif
if( ra->t->kind == HOBJ ) hl_get_obj_rt(ra->t); // ensure it's initialized
call_native(ctx,hl_to_virtual,size);
store(ctx,dst,PEAX,true);
}
break;
case OMakeEnum:
{
hl_enum_construct *c = &dst->t->tenum->constructs[o->p2];
int_val args[] = { (int_val)dst->t, o->p2 };
int i;
call_native_consts(ctx, hl_alloc_enum, args, 2);
RLOCK(PEAX);
for(i=0;i<c->nparams;i++) {
preg *r = fetch(R(o->extra[i]));
copy(ctx, pmem(&p,Eax,c->offsets[i]),r, R(o->extra[i])->size);
RUNLOCK(fetch(R(o->extra[i])));
if ((i & 15) == 0) jit_buf(ctx);
}
store(ctx, dst, PEAX, true);
}
break;
case OEnumAlloc:
{
int_val args[] = { (int_val)dst->t, o->p2 };
call_native_consts(ctx, hl_alloc_enum, args, 2);
store(ctx, dst, PEAX, true);
}
break;
case OEnumField:
{
hl_enum_construct *c = &ra->t->tenum->constructs[o->p3];
preg *r = alloc_cpu(ctx,ra,true);
copy_to(ctx,dst,pmem(&p,r->id,c->offsets[(int)(int_val)o->extra]));
}
break;
case OSetEnumField:
{
hl_enum_construct *c = &dst->t->tenum->constructs[0];
preg *r = alloc_cpu(ctx,dst,true);
switch( rb->t->kind ) {
case HF64:
{
preg *d = alloc_fpu(ctx,rb,true);
copy(ctx,pmem(&p,r->id,c->offsets[o->p2]),d,8);
break;
}
default:
copy(ctx,pmem(&p,r->id,c->offsets[o->p2]),alloc_cpu(ctx,rb,true),hl_type_size(c->params[o->p2]));
break;
}
}
break;
case ONullCheck:
{
int jz;
preg *r = alloc_cpu(ctx,dst,true);
op64(ctx,TEST,r,r);
XJump_small(JNotZero,jz);
hl_opcode *next = f->ops + opCount + 1;
bool null_field_access = false;
if( next->op == OField && next->p2 == o->p1 ) {
hl_obj_field *f = NULL;
if( dst->t->kind == HOBJ || dst->t->kind == HSTRUCT )
f = hl_obj_field_fetch(dst->t, next->p3);
else if( dst->t->kind == HVIRTUAL )
f = dst->t->virt->fields + next->p3;
if( f == NULL ) ASSERT(dst->t->kind);
null_field_access = true;
pad_before_call(ctx, HL_WSIZE);
if( f->hashed_name >= 0 && f->hashed_name < 256 )
op64(ctx,PUSH8,pconst(&p,f->hashed_name),UNUSED);
else
op32(ctx,PUSH,pconst(&p,f->hashed_name),UNUSED);
} else {
pad_before_call(ctx, 0);
}
jlist *j = (jlist*)hl_malloc(&ctx->galloc,sizeof(jlist));
j->pos = BUF_POS();
j->target = null_field_access ? -3 : -1;
j->next = ctx->calls;
ctx->calls = j;
op64(ctx,MOV,PEAX,pconst64(&p,RESERVE_ADDRESS));
op_call(ctx,PEAX,-1);
patch_jump(ctx,jz);
}
break;
case OSafeCast:
make_dyn_cast(ctx, dst, ra);
break;
case ODynGet:
{
int size;
# ifdef HL_64
if( IS_FLOAT(dst) || dst->t->kind == HI64 ) {
size = begin_native_call(ctx,2);
} else {
size = begin_native_call(ctx,3);
set_native_arg(ctx,pconst64(&p,(int_val)dst->t));
}
set_native_arg(ctx,pconst64(&p,(int_val)hl_hash_utf8(m->code->strings[o->p3])));
set_native_arg(ctx,fetch(ra));
# else
preg *r;
r = alloc_reg(ctx,RCPU);
if( IS_FLOAT(dst) || dst->t->kind == HI64 ) {
size = pad_before_call(ctx,HL_WSIZE*2);
} else {
size = pad_before_call(ctx,HL_WSIZE*3);
op64(ctx,MOV,r,pconst64(&p,(int_val)dst->t));
op64(ctx,PUSH,r,UNUSED);
}
op64(ctx,MOV,r,pconst64(&p,(int_val)hl_hash_utf8(m->code->strings[o->p3])));
op64(ctx,PUSH,r,UNUSED);
op64(ctx,PUSH,fetch(ra),UNUSED);
# endif
call_native(ctx,get_dynget(dst->t),size);
store_result(ctx,dst);
}
break;
case ODynSet:
{
int size;
# ifdef HL_64
switch( rb->t->kind ) {
case HF32:
case HF64:
size = begin_native_call(ctx, 3);
set_native_arg_fpu(ctx,fetch(rb),rb->t->kind == HF32);
set_native_arg(ctx,pconst64(&p,hl_hash_gen(hl_get_ustring(m->code,o->p2),true)));
set_native_arg(ctx,fetch(dst));
call_native(ctx,get_dynset(rb->t),size);
break;
case HI64:
size = begin_native_call(ctx, 3);
set_native_arg(ctx,fetch(rb));
set_native_arg(ctx,pconst64(&p,hl_hash_gen(hl_get_ustring(m->code,o->p2),true)));
set_native_arg(ctx,fetch(dst));
call_native(ctx,get_dynset(rb->t),size);
break;
default:
size = begin_native_call(ctx,4);
set_native_arg(ctx,fetch(rb));
set_native_arg(ctx,pconst64(&p,(int_val)rb->t));
set_native_arg(ctx,pconst64(&p,hl_hash_gen(hl_get_ustring(m->code,o->p2),true)));
set_native_arg(ctx,fetch(dst));
call_native(ctx,get_dynset(rb->t),size);
break;
}
# else
switch( rb->t->kind ) {
case HF32:
size = pad_before_call(ctx, HL_WSIZE*2 + sizeof(float));
push_reg(ctx,rb);
op32(ctx,PUSH,pconst64(&p,hl_hash_gen(hl_get_ustring(m->code,o->p2),true)),UNUSED);
op32(ctx,PUSH,fetch(dst),UNUSED);
call_native(ctx,get_dynset(rb->t),size);
break;
case HF64:
case HI64:
size = pad_before_call(ctx, HL_WSIZE*2 + sizeof(double));
push_reg(ctx,rb);
op32(ctx,PUSH,pconst64(&p,hl_hash_gen(hl_get_ustring(m->code,o->p2),true)),UNUSED);
op32(ctx,PUSH,fetch(dst),UNUSED);
call_native(ctx,get_dynset(rb->t),size);
break;
default:
size = pad_before_call(ctx, HL_WSIZE*4);
op32(ctx,PUSH,fetch32(ctx,rb),UNUSED);
op32(ctx,PUSH,pconst64(&p,(int_val)rb->t),UNUSED);
op32(ctx,PUSH,pconst64(&p,hl_hash_gen(hl_get_ustring(m->code,o->p2),true)),UNUSED);
op32(ctx,PUSH,fetch(dst),UNUSED);
call_native(ctx,get_dynset(rb->t),size);
break;
}
# endif
}
break;
case OTrap:
{
int size, jenter, jtrap;
int offset = 0;
int trap_size = (sizeof(hl_trap_ctx) + 15) & 0xFFF0;
hl_trap_ctx *t = NULL;
# ifndef HL_THREADS
if( tinf == NULL ) tinf = hl_get_thread(); // single thread
# endif
# ifdef HL_64
preg *trap = REG_AT(CALL_REGS[0]);
# else
preg *trap = PEAX;
# endif
RLOCK(trap);
preg *treg = alloc_reg(ctx, RCPU);
if( !tinf ) {
call_native(ctx, hl_get_thread, 0);
op64(ctx,MOV,treg,PEAX);
offset = (int)(int_val)&tinf->trap_current;
} else {
offset = 0;
op64(ctx,MOV,treg,pconst64(&p,(int_val)&tinf->trap_current));
}
op64(ctx,MOV,trap,pmem(&p,treg->id,offset));
op64(ctx,SUB,PESP,pconst(&p,trap_size));
op64(ctx,MOV,pmem(&p,Esp,(int)(int_val)&t->prev),trap);
op64(ctx,MOV,trap,PESP);
op64(ctx,MOV,pmem(&p,treg->id,offset),trap);
/*
This is a bit hackshish : we want to detect the type of exception filtered by the catch so we check the following
sequence of HL opcodes:
trap E,@catch
...
@catch:
global R, _
call _, ???(R,E)
??? is expected to be hl.BaseType.check
*/
hl_opcode *next = f->ops + opCount + 1 + o->p2;
hl_opcode *next2 = f->ops + opCount + 2 + o->p2;
if( next->op == OGetGlobal && next2->op == OCall2 && next2->p3 == next->p1 && dst->stack.id == (int)(int_val)next2->extra ) {
hl_type *gt = m->code->globals[next->p2];
while( gt->kind == HOBJ && gt->obj->super ) gt = gt->obj->super;
if( gt->kind == HOBJ && gt->obj->nfields && gt->obj->fields[0].t->kind == HTYPE ) {
void *addr = m->globals_data + m->globals_indexes[next->p2];
# ifdef HL_64
op64(ctx,MOV,treg,pconst64(&p,(int_val)addr));
op64(ctx,MOV,treg,pmem(&p,treg->id,0));
# else
op64(ctx,MOV,treg,paddr(&p,addr));
# endif
} else
op64(ctx,MOV,treg,pconst(&p,0));
} else {
op64(ctx,MOV,treg,pconst(&p,0));
}
op64(ctx,MOV,pmem(&p,Esp,(int)(int_val)&t->tcheck),treg);
size = begin_native_call(ctx, 1);
set_native_arg(ctx,trap);
call_native(ctx,setjmp,size);
op64(ctx,TEST,PEAX,PEAX);
XJump_small(JZero,jenter);
op64(ctx,ADD,PESP,pconst(&p,trap_size));
if( !tinf ) {
call_native(ctx, hl_get_thread, 0);
op64(ctx,MOV,PEAX,pmem(&p, Eax, (int)(int_val)&tinf->exc_value));
} else {
op64(ctx,MOV,PEAX,pconst64(&p,(int_val)&tinf->exc_value));
op64(ctx,MOV,PEAX,pmem(&p, Eax, 0));
}
store(ctx,dst,PEAX,false);
jtrap = do_jump(ctx,OJAlways,false);
register_jump(ctx,jtrap,(opCount + 1) + o->p2);
patch_jump(ctx,jenter);
}
break;
case OEndTrap:
{
int trap_size = (sizeof(hl_trap_ctx) + 15) & 0xFFF0;
hl_trap_ctx *tmp = NULL;
preg *addr,*r;
int offset;
if (!tinf) {
call_native(ctx, hl_get_thread, 0);
addr = PEAX;
RLOCK(addr);
offset = (int)(int_val)&tinf->trap_current;
} else {
offset = 0;
addr = alloc_reg(ctx, RCPU);
op64(ctx, MOV, addr, pconst64(&p, (int_val)&tinf->trap_current));
}
r = alloc_reg(ctx, RCPU);
op64(ctx, MOV, r, pmem(&p,addr->id,offset));
op64(ctx, MOV, r, pmem(&p,r->id,(int)(int_val)&tmp->prev));
op64(ctx, MOV, pmem(&p,addr->id, offset), r);
# ifdef HL_WIN
// erase eip (prevent false positive)
{
_JUMP_BUFFER *b = NULL;
# ifdef HL_64
op64(ctx,MOV,pmem(&p,Esp,(int)(int_val)&(b->Rip)),PEAX);
# else
op64(ctx,MOV,pmem(&p,Esp,(int)&(b->Eip)),PEAX);
# endif
}
# endif
op64(ctx,ADD,PESP,pconst(&p,trap_size));
}
break;
case OEnumIndex:
{
preg *r = alloc_reg(ctx,RCPU);
op64(ctx,MOV,r,pmem(&p,alloc_cpu(ctx,ra,true)->id,HL_WSIZE));
store(ctx,dst,r,true);
break;
}
break;
case OSwitch:
{
int jdefault;
int i;
preg *r = alloc_cpu(ctx, dst, true);
preg *r2 = alloc_reg(ctx, RCPU);
op32(ctx, CMP, r, pconst(&p,o->p2));
XJump(JUGte,jdefault);
// r2 = r * 5 + eip
# ifdef HL_64
op64(ctx, XOR, r2, r2);
# endif
op32(ctx, MOV, r2, r);
op32(ctx, SHL, r2, pconst(&p,2));
op32(ctx, ADD, r2, r);
# ifdef HL_64
preg *tmp = alloc_reg(ctx, RCPU);
op64(ctx, MOV, tmp, pconst64(&p,RESERVE_ADDRESS));
# else
op64(ctx, ADD, r2, pconst64(&p,RESERVE_ADDRESS));
# endif
{
jlist *s = (jlist*)hl_malloc(&ctx->galloc, sizeof(jlist));
s->pos = BUF_POS() - sizeof(void*);
s->next = ctx->switchs;
ctx->switchs = s;
}
# ifdef HL_64
op64(ctx, ADD, r2, tmp);
# endif
op64(ctx, JMP, r2, UNUSED);
for(i=0;i<o->p2;i++) {
int j = do_jump(ctx,OJAlways,false);
register_jump(ctx,j,(opCount + 1) + o->extra[i]);
if( (i & 15) == 0 ) jit_buf(ctx);
}
patch_jump(ctx, jdefault);
}
break;
case OGetTID:
op32(ctx, MOV, alloc_cpu(ctx,dst,false), pmem(&p,alloc_cpu(ctx,ra,true)->id,0));
store(ctx,dst,dst->current,false);
break;
case OAssert:
{
jlist *j = (jlist*)hl_malloc(&ctx->galloc,sizeof(jlist));
j->pos = BUF_POS();
j->target = -2;
j->next = ctx->calls;
ctx->calls = j;
op64(ctx,MOV,PEAX,pconst64(&p,RESERVE_ADDRESS));
op_call(ctx,PEAX,-1);
}
break;
case ONop:
break;
default:
jit_error(hl_op_name(o->op));
break;
}
// we are landing at this position, assume we have lost our registers
if( ctx->opsPos[opCount+1] == -1 )
discard_regs(ctx,true);
ctx->opsPos[opCount+1] = BUF_POS();
// write debug infos
size = BUF_POS() - codePos;
if( debug16 && size > 0xFF00 ) {
debug32 = malloc(sizeof(int) * (f->nops + 1));
for(i=0;i<ctx->currentPos;i++)
debug32[i] = debug16[i];
free(debug16);
debug16 = NULL;
}
if( debug16 ) debug16[ctx->currentPos] = (unsigned short)size; else if( debug32 ) debug32[ctx->currentPos] = size;
}
// patch jumps
{
jlist *j = ctx->jumps;
while( j ) {
*(int*)(ctx->startBuf + j->pos) = ctx->opsPos[j->target] - (j->pos + 4);
j = j->next;
}
ctx->jumps = NULL;
}
// add nops padding
jit_nops(ctx);
// clear regs
for(i=0;i<REG_COUNT;i++) {
preg *r = REG_AT(i);
r->holds = NULL;
r->lock = 0;
}
// save debug infos
{
int fid = (int)(f - m->code->functions);
ctx->debug[fid].start = codePos;
ctx->debug[fid].offsets = debug32 ? (void*)debug32 : (void*)debug16;
ctx->debug[fid].large = debug32 != NULL;
}
// reset tmp allocator
hl_free(&ctx->falloc);
return codePos;
}
static void *get_wrapper( hl_type *t ) {
return call_jit_hl2c;
}
void hl_jit_patch_method( void *old_fun, void **new_fun_table ) {
// mov eax, addr
// jmp [eax]
unsigned char *b = (unsigned char*)old_fun;
unsigned long long addr = (unsigned long long)(int_val)new_fun_table;
# ifdef HL_64
*b++ = 0x48;
*b++ = 0xB8;
*b++ = (unsigned char)addr;
*b++ = (unsigned char)(addr>>8);
*b++ = (unsigned char)(addr>>16);
*b++ = (unsigned char)(addr>>24);
*b++ = (unsigned char)(addr>>32);
*b++ = (unsigned char)(addr>>40);
*b++ = (unsigned char)(addr>>48);
*b++ = (unsigned char)(addr>>56);
# else
*b++ = 0xB8;
*b++ = (unsigned char)addr;
*b++ = (unsigned char)(addr>>8);
*b++ = (unsigned char)(addr>>16);
*b++ = (unsigned char)(addr>>24);
# endif
*b++ = 0xFF;
*b++ = 0x20;
}
static void missing_closure() {
hl_error("Missing static closure");
}
void *hl_jit_code( jit_ctx *ctx, hl_module *m, int *codesize, hl_debug_infos **debug, hl_module *previous ) {
jlist *c;
int size = BUF_POS();
unsigned char *code;
if( size & 4095 ) size += 4096 - (size&4095);
code = (unsigned char*)hl_alloc_executable_memory(size);
if( code == NULL ) return NULL;
memcpy(code,ctx->startBuf,BUF_POS());
*codesize = size;
*debug = ctx->debug;
if( !call_jit_c2hl ) {
call_jit_c2hl = code + ctx->c2hl;
call_jit_hl2c = code + ctx->hl2c;
hl_setup_callbacks2(callback_c2hl, get_wrapper, 1);
# ifdef JIT_CUSTOM_LONGJUMP
hl_setup_longjump(code + ctx->longjump);
# endif
int i;
for(i=0;i<sizeof(ctx->static_functions)/sizeof(void*);i++)
ctx->static_functions[i] = (void*)(code + (int)(int_val)ctx->static_functions[i]);
}
// patch calls
c = ctx->calls;
while( c ) {
void *fabs;
if( c->target < 0 )
fabs = ctx->static_functions[-c->target-1];
else {
fabs = m->functions_ptrs[c->target];
if( fabs == NULL ) {
// read absolute address from previous module
int old_idx = m->hash->functions_hashes[m->functions_indexes[c->target]];
if( old_idx < 0 )
return NULL;
fabs = previous->functions_ptrs[(previous->code->functions + old_idx)->findex];
} else {
// relative
fabs = (unsigned char*)code + (int)(int_val)fabs;
}
}
if( (code[c->pos]&~3) == (IS_64?0x48:0xB8) || code[c->pos] == 0x68 ) // MOV : absolute | PUSH
*(void**)(code + c->pos + (IS_64?2:1)) = fabs;
else {
int_val delta = (int_val)fabs - (int_val)code - (c->pos + 5);
int rpos = (int)delta;
if( (int_val)rpos != delta ) {
printf("Target code too far too rebase\n");
return NULL;
}
*(int*)(code + c->pos + 1) = rpos;
}
c = c->next;
}
// patch switchs
c = ctx->switchs;
while( c ) {
*(void**)(code + c->pos) = code + c->pos + (IS_64 ? 14 : 6);
c = c->next;
}
// patch closures
{
vclosure *c = ctx->closure_list;
while( c ) {
vclosure *next;
int fidx = (int)(int_val)c->fun;
void *fabs = m->functions_ptrs[fidx];
if( fabs == NULL ) {
// read absolute address from previous module
int old_idx = m->hash->functions_hashes[m->functions_indexes[fidx]];
if( old_idx < 0 )
fabs = missing_closure;
else
fabs = previous->functions_ptrs[(previous->code->functions + old_idx)->findex];
} else {
// relative
fabs = (unsigned char*)code + (int)(int_val)fabs;
}
c->fun = fabs;
next = (vclosure*)c->value;
c->value = NULL;
c = next;
}
}
return code;
}