Everywhere: Live patch MAlloc/Free to use RMAlloc/RFree
To make MAlloc/Free operations consistent across multiple processors, we use a dedicated task on core 5 to service the requests.
This commit is contained in:
parent
04a602bb3b
commit
fc2b4ba4e5
11 changed files with 668 additions and 362 deletions
|
@ -1,159 +1,4 @@
|
|||
AutoComplete(0);
|
||||
|
||||
#define include_noreindex #include
|
||||
|
||||
I64 tos_nist_offset = 5603; // UTC -4
|
||||
|
||||
#define NIST_TIME_OFFSET (tos_nist_offset - local_time_offset / CDATE_FREQ)
|
||||
|
||||
public
|
||||
I64 CDate2Unix(CDate dt)
|
||||
{ // TempleOS datetime to Unix timestamp.
|
||||
return ToI64((dt - Str2Date("1/1/1970")) / CDATE_FREQ + NIST_TIME_OFFSET);
|
||||
}
|
||||
|
||||
public
|
||||
CDate Unix2CDate(I64 timestamp)
|
||||
{ // Unix timestamp to TempleOS datetime.
|
||||
return (timestamp - NIST_TIME_OFFSET) * CDATE_FREQ + Str2Date("1/1/1970");
|
||||
}
|
||||
|
||||
// FIXME: Put these in a "Builtin" library?
|
||||
U0 FifoU8Cpy(CFifoU8* f, U8* s)
|
||||
{
|
||||
if (!f || !s)
|
||||
return;
|
||||
while (*s)
|
||||
FifoU8Ins(f, *s++);
|
||||
}
|
||||
Bool KeyDown(I64 sc) return Bt(kbd.down_bitmap, sc);
|
||||
I64 T(Bool _condition, I64 _true, I64 _false)
|
||||
{
|
||||
if (_condition)
|
||||
return _true;
|
||||
return _false;
|
||||
}
|
||||
|
||||
asm
|
||||
{
|
||||
_MEMCPY_U16::
|
||||
PUSH RBP
|
||||
MOV RBP,RSP
|
||||
PUSH RSI
|
||||
PUSH RDI
|
||||
CLD
|
||||
MOV RDI,U64 SF_ARG1[RBP]
|
||||
MOV RSI,U64 SF_ARG2[RBP]
|
||||
MOV RCX,U64 SF_ARG3[RBP]
|
||||
REP_MOVSW
|
||||
MOV RAX,RDI
|
||||
POP RDI
|
||||
POP RSI
|
||||
POP RBP
|
||||
RET1 24
|
||||
_MEMCPY_U32::
|
||||
PUSH RBP
|
||||
MOV RBP,RSP
|
||||
PUSH RSI
|
||||
PUSH RDI
|
||||
CLD
|
||||
MOV RDI,U64 SF_ARG1[RBP]
|
||||
MOV RSI,U64 SF_ARG2[RBP]
|
||||
MOV RCX,U64 SF_ARG3[RBP]
|
||||
REP_MOVSD
|
||||
MOV RAX,RDI
|
||||
POP RDI
|
||||
POP RSI
|
||||
POP RBP
|
||||
RET1 24
|
||||
_MEMCPY_U64::
|
||||
PUSH RBP
|
||||
MOV RBP,RSP
|
||||
PUSH RSI
|
||||
PUSH RDI
|
||||
CLD
|
||||
MOV RDI,U64 SF_ARG1[RBP]
|
||||
MOV RSI,U64 SF_ARG2[RBP]
|
||||
MOV RCX,U64 SF_ARG3[RBP]
|
||||
REP_MOVSQ
|
||||
MOV RAX,RDI
|
||||
POP RDI
|
||||
POP RSI
|
||||
POP RBP
|
||||
RET1 24
|
||||
}
|
||||
|
||||
public _extern _MEMCPY_U16 U16* MemCpyU16(U16* dst, U16* src, I64 cnt);
|
||||
public
|
||||
_extern _MEMCPY_U32 U32* MemCpyU32(U32* dst, U32* src, I64 cnt);
|
||||
public
|
||||
_extern _MEMCPY_U64 U64* MemCpyU64(U64* dst, U64* src, I64 cnt);
|
||||
|
||||
I64 @lerp(U32 val, U32 mx1, U32 mx2)
|
||||
{
|
||||
F64 r = (val & mx1) / ToF64(mx1);
|
||||
return ToI64(r * mx2);
|
||||
}
|
||||
|
||||
U0 @patch_call_rel32(U32 from, U32 to)
|
||||
{
|
||||
*(from(U8*)) = 0xE8;
|
||||
*((from + 1)(I32*)) = to - from - 5;
|
||||
}
|
||||
|
||||
U0 @patch_jmp_rel32(U32 from, U32 to)
|
||||
{
|
||||
*(from(U8*)) = 0xE9;
|
||||
*((from + 1)(I32*)) = to - from - 5;
|
||||
}
|
||||
|
||||
CMemBlk* ShrinkMemBlkByPags(CMemBlk* from, I64 count)
|
||||
{
|
||||
from->pags -= count;
|
||||
U64 to = from;
|
||||
to += count * MEM_PAG_SIZE;
|
||||
MemCpy(to, from, MEM_PAG_SIZE);
|
||||
return to;
|
||||
}
|
||||
|
||||
U0 @sse_enable()
|
||||
{
|
||||
/* clang-format off */
|
||||
asm
|
||||
{
|
||||
MOV_EAX_CR0
|
||||
AND AX, 0xFFFB // clear coprocessor emulation CR0.EM
|
||||
OR AX, 0x2 // set coprocessor monitoring CR0.MP
|
||||
MOV_CR0_EAX
|
||||
MOV_EAX_CR4
|
||||
OR AX, 3 << 9 // set CR4.OSFXSR and CR4.OSXMMEXCPT at the same time
|
||||
MOV_CR4_EAX
|
||||
}
|
||||
/* clang-format on */
|
||||
}
|
||||
|
||||
U0 @sse_enable_on_all_cores()
|
||||
{
|
||||
I64 i;
|
||||
for (i = 1; i < mp_cnt; i++)
|
||||
Spawn(&@sse_enable, , , i);
|
||||
}
|
||||
|
||||
I64 @t(Bool _condition, I64 _true, I64 _false)
|
||||
{
|
||||
if (_condition)
|
||||
return _true;
|
||||
return _false;
|
||||
}
|
||||
|
||||
U0 @erythros_mem_task_loop()
|
||||
{
|
||||
while (1) {
|
||||
Sleep(1);
|
||||
};
|
||||
}
|
||||
|
||||
// Before doing anything else, we:
|
||||
// Before continuing, we:
|
||||
|
||||
// 1. Mark memory in code heap below 0x1000000 as used.
|
||||
sys_code_bp->mem_free_lst->next->pags = 0;
|
||||
|
@ -161,50 +6,13 @@ sys_code_bp->mem_free_lst->next->pags = 0;
|
|||
// 2. Free up 64MB at bottom of code heap for non-HolyC programs
|
||||
sys_code_bp->mem_free_lst = ShrinkMemBlkByPags(sys_code_bp->mem_free_lst, 131072);
|
||||
|
||||
// 3. Enable SSE
|
||||
@sse_enable;
|
||||
@sse_enable_on_all_cores;
|
||||
// 3. Set mem_task
|
||||
|
||||
// 4. Init mem_tasks
|
||||
CTask* erythros_mem_task = sys_malloc_task;
|
||||
|
||||
CTask* erythros_mem_task = Spawn(&@erythros_mem_task_loop, , "ErythrosMemTask");
|
||||
|
||||
#define MALLOC_MEM_TASK_COUNT 16
|
||||
CTask** malloc_mem_task = CAlloc(sizeof(CTask*) * MALLOC_MEM_TASK_COUNT, adam_task);
|
||||
I64 malloc_current_mem_task = 0;
|
||||
|
||||
U0 @malloc_mem_tasks_init()
|
||||
{
|
||||
U8* scratch_buffer[64];
|
||||
I64 i;
|
||||
for (i = 0; i < MALLOC_MEM_TASK_COUNT; i++) {
|
||||
StrPrint(scratch_buffer, "ErythrosMallocTask%d", i);
|
||||
malloc_mem_task[i] = Spawn(&@erythros_mem_task_loop, , scratch_buffer);
|
||||
}
|
||||
}
|
||||
|
||||
@malloc_mem_tasks_init;
|
||||
|
||||
#define CALLOC_MEM_TASK_COUNT 16
|
||||
CTask** calloc_mem_task = CAlloc(sizeof(CTask*) * CALLOC_MEM_TASK_COUNT, adam_task);
|
||||
I64 calloc_current_mem_task = 0;
|
||||
|
||||
U0 @calloc_mem_tasks_init()
|
||||
{
|
||||
U8* scratch_buffer[64];
|
||||
I64 i;
|
||||
for (i = 0; i < CALLOC_MEM_TASK_COUNT; i++) {
|
||||
StrPrint(scratch_buffer, "ErythrosCallocTask%d", i);
|
||||
calloc_mem_task[i] = Spawn(&@erythros_mem_task_loop, , scratch_buffer);
|
||||
}
|
||||
}
|
||||
|
||||
@calloc_mem_tasks_init;
|
||||
|
||||
U0 dd() { DocDump(adam_task->put_doc); }
|
||||
//@patch_jmp_rel32(&Fault2, &Reboot); // Reboot instead of crashing to the debugger
|
||||
U0 NoBeep(I8, Bool) {};
|
||||
@patch_jmp_rel32(&Beep, &NoBeep); // Don't delay on beep when entering debugger
|
||||
//@patch_jmp_rel32(&Fault2, &Reboot); // Reboot instead of crashing to the debugger
|
||||
|
||||
Bool BlkDevLock2(CBlkDev* bd)
|
||||
{
|
||||
|
@ -233,99 +41,5 @@ Bool DrvLock2(CDrv* dv)
|
|||
return FALSE;
|
||||
}
|
||||
|
||||
CTask* SpawnQue2(U0 (*fp_addr)(U8* data), U8* data = NULL, U8* task_name = NULL,
|
||||
I64 target_cpu, CTask* parent = NULL, // NULL means adam
|
||||
I64 stk_size = 0, I64 flags = 1 << JOBf_ADD_TO_QUE)
|
||||
{
|
||||
CTask* res;
|
||||
CJob* tmpc = JobQue(fp_addr, data, target_cpu,
|
||||
flags, JOBT_SPAWN_TASK, task_name, parent, stk_size);
|
||||
CJobCtrl* ctrl;
|
||||
|
||||
while (!Bt(&tmpc->flags, JOBf_DONE)) {
|
||||
LBts(&Fs->task_flags, TASKf_IDLE);
|
||||
Sleep(1);
|
||||
}
|
||||
LBtr(&Fs->task_flags, TASKf_IDLE);
|
||||
|
||||
res = tmpc->spawned_task;
|
||||
ctrl = tmpc->ctrl;
|
||||
PUSHFD
|
||||
CLI while (LBts(&ctrl->flags, JOBCf_LOCKED))
|
||||
Sleep(1);
|
||||
QueRem(tmpc);
|
||||
LBtr(&ctrl->flags, JOBCf_LOCKED);
|
||||
POPFD
|
||||
JobDel(tmpc);
|
||||
return res;
|
||||
}
|
||||
|
||||
@patch_jmp_rel32(&BlkDevLock, &BlkDevLock2); // Patch BlkDevLock so we don't deadlock on multiple tasks reading from virtio disk
|
||||
@patch_jmp_rel32(&DrvLock, &DrvLock2); // Patch DrvLock so we don't deadlock on multiple tasks reading from virtio disk
|
||||
@patch_jmp_rel32(&SpawnQue, &SpawnQue2); // Patch SpawnQue so we don't deadlock on spawning multicore tasks simultaneously
|
||||
|
||||
#define MALLOC2_REQUEST_PTR 0x280000
|
||||
#define MALLOC2_REQUEST_TIMEOUT 4096
|
||||
|
||||
class @malloc2_request
|
||||
{
|
||||
U64 res_addr;
|
||||
CTask* task;
|
||||
U64 bytes;
|
||||
Bool zero;
|
||||
Bool kick;
|
||||
};
|
||||
|
||||
U0 @malloc2_task()
|
||||
{
|
||||
Sleep(50);
|
||||
U64* request_ptr = MALLOC2_REQUEST_PTR;
|
||||
@malloc2_request* request = NULL;
|
||||
while (1) {
|
||||
request = *request_ptr;
|
||||
if (request) {
|
||||
if (request->zero) {
|
||||
LXchgI64(request->res_addr, CAlloc(request->bytes, request->task));
|
||||
} else {
|
||||
LXchgI64(request->res_addr, MAlloc(request->bytes, request->task));
|
||||
}
|
||||
LXchgU8(&request->kick, 1);
|
||||
*request_ptr = NULL;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Spawn(&@malloc2_task, , "MAlloc2 Task", 5);
|
||||
|
||||
U64 MAlloc2(I64 size, CTask* task = NULL, Bool zero = FALSE)
|
||||
{
|
||||
I64 count = 0;
|
||||
U64 res = NULL;
|
||||
U64* request_ptr = MALLOC2_REQUEST_PTR;
|
||||
@malloc2_request request;
|
||||
|
||||
request.res_addr = &res;
|
||||
request.bytes = size;
|
||||
request.task = task;
|
||||
request.zero = zero;
|
||||
request.kick = FALSE;
|
||||
|
||||
retry_malloc2:
|
||||
|
||||
count = 0;
|
||||
while (*request_ptr)
|
||||
Busy(5);
|
||||
LXchgI64(request_ptr, &request);
|
||||
while (!request.kick) {
|
||||
++count;
|
||||
if (count > MALLOC2_REQUEST_TIMEOUT)
|
||||
goto retry_malloc2;
|
||||
Busy(5);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
U64 CAlloc2(I64 size, CTask* task = NULL)
|
||||
{
|
||||
return MAlloc2(size, task, TRUE);
|
||||
}
|
||||
|
|
446
System/Setup/MAlloc.HC
Normal file
446
System/Setup/MAlloc.HC
Normal file
|
@ -0,0 +1,446 @@
|
|||
#define RMALLOC_REQUEST_PTR 0x280000
|
||||
|
||||
#define RMALLOC_BUSY_WAIT 5
|
||||
#define RMALLOC_TASK_CPU 5
|
||||
#define RMALLOC_TIMEOUT 1024
|
||||
#define RMALLOC_TYPE_MALLOC 0
|
||||
#define RMALLOC_TYPE_CALLOC 1
|
||||
#define RMALLOC_TYPE_FREE 2
|
||||
|
||||
asm {
|
||||
//************************************
|
||||
// See ::/Doc/Credits.DD.
|
||||
_RMALLOC::
|
||||
// Throws 'OutMem'
|
||||
PUSH RBP
|
||||
MOV RBP,RSP
|
||||
PUSH RSI
|
||||
PUSH RDI
|
||||
|
||||
XOR RBX,RBX
|
||||
MOV RDX,U64 SF_ARG2[RBP]
|
||||
TEST RDX,RDX
|
||||
JNZ @@05
|
||||
MOV RDX,U64 FS:CTask.addr[RBX]
|
||||
@@05: CMP U32 CTask.task_signature[RDX],TASK_SIGNATURE_VAL
|
||||
|
||||
#assert CTask.task_signature == CHeapCtrl.hc_signature // location signature same
|
||||
|
||||
JNE @@10
|
||||
MOV RDX,U64 CTask.data_heap[RDX]
|
||||
@@10: CMP U32 CHeapCtrl.hc_signature[RDX],HEAP_CTRL_SIGNATURE_VAL
|
||||
JE @@15
|
||||
PUSH RDX
|
||||
CALL &SysBadMAlloc
|
||||
JMP I32 _SYS_HLT
|
||||
|
||||
@@15: MOV RAX,U64 SF_ARG1[RBP]
|
||||
PUSHFD
|
||||
ADD RAX,CMemUsed.start+7 // round-up to I64
|
||||
AND AL,0xF8
|
||||
#assert CMemUsed.start >= sizeof(CMemUnused)
|
||||
CMP RAX,CMemUsed.start
|
||||
JAE @@20
|
||||
MOV RAX,CMemUsed.start
|
||||
@@20:
|
||||
|
||||
CLI
|
||||
@@25: LOCK
|
||||
BTS U32 CHeapCtrl.locked_flags[RDX],HClf_LOCKED
|
||||
PAUSE // don't know if this inst helps
|
||||
JC @@25
|
||||
|
||||
CMP RAX,MEM_HEAP_HASH_SIZE
|
||||
JAE @@30
|
||||
MOV RSI,U64 CHeapCtrl.heap_hash[RAX+RDX]
|
||||
TEST RSI,RSI
|
||||
JZ @@35
|
||||
MOV RCX,U64 CMemUnused.next[RSI]
|
||||
MOV U64 CHeapCtrl.heap_hash[RAX+RDX],RCX
|
||||
JMP I32 RMALLOC_ALMOST_DONE
|
||||
|
||||
// Big allocation
|
||||
@@30: ADD RAX,sizeof(CMemBlk)+MEM_PAG_SIZE-1
|
||||
SHR RAX,MEM_PAG_BITS
|
||||
|
||||
PUSH RDX // preserve HeapCtrl
|
||||
PUSH RDX
|
||||
PUSH RAX
|
||||
CALL &MemPagTaskAlloc
|
||||
POP RDX
|
||||
TEST RAX,RAX
|
||||
JZ @@45 // Out of memory
|
||||
MOV RSI,RAX
|
||||
MOV EAX,U32 CMemBlk.pags[RSI]
|
||||
|
||||
SHL RAX,MEM_PAG_BITS
|
||||
SUB RAX,sizeof(CMemBlk)
|
||||
ADD RSI,sizeof(CMemBlk)
|
||||
JMP I32 RMALLOC_ALMOST_DONE
|
||||
|
||||
// Little allocation, chunk-off piece from free lst chunks
|
||||
@@35: LEA RSI,U64 CHeapCtrl.malloc_free_lst-CMemUnused.next[RDX]
|
||||
|
||||
@@40: MOV RBX,RSI
|
||||
MOV RSI,U64 CMemUnused.next[RBX]
|
||||
TEST RSI,RSI
|
||||
JNZ I32 @@60
|
||||
PUSH RAX //-**** save byte size
|
||||
ADD RAX,16*MEM_PAG_SIZE-1
|
||||
SHR RAX,MEM_PAG_BITS
|
||||
|
||||
PUSH RDX // preserve HeapCtrl
|
||||
PUSH RDX
|
||||
PUSH RAX
|
||||
CALL &MemPagTaskAlloc
|
||||
POP RDX
|
||||
TEST RAX,RAX
|
||||
JNZ @@50
|
||||
|
||||
// Out of memory
|
||||
@@45: LOCK
|
||||
BTR U32 CHeapCtrl.locked_flags[RDX],HClf_LOCKED
|
||||
POPFD
|
||||
PUSH TRUE
|
||||
MOV RAX,'OutMem'
|
||||
PUSH RAX
|
||||
CALL I32 &throw
|
||||
JMP I32 RMALLOC_FINAL_EXIT // Never gets here, hopefully.
|
||||
|
||||
@@50: MOV RSI,RAX
|
||||
MOV EAX,U32 CMemBlk.pags[RSI]
|
||||
SHL RAX,MEM_PAG_BITS
|
||||
|
||||
// Can it be combined with last chunk? (Never Free these chunks.)
|
||||
MOV RDI,U64 CHeapCtrl.last_mergable[RDX]
|
||||
LEA RBX,U64 [RSI+RAX]
|
||||
CMP RDI,RBX
|
||||
JNE @@55
|
||||
|
||||
PUSH RAX
|
||||
MOV EAX,U32 CMemBlk.pags[RDI]
|
||||
ADD U32 CMemBlk.pags[RSI],EAX
|
||||
// QueRem
|
||||
MOV RAX,U64 CMemBlk.next[RDI]
|
||||
MOV RBX,U64 CMemBlk.last[RDI]
|
||||
MOV U64 CMemBlk.last[RAX],RBX
|
||||
MOV U64 CMemBlk.next[RBX],RAX
|
||||
POP RAX
|
||||
|
||||
@@55: MOV U64 CHeapCtrl.last_mergable[RDX],RSI
|
||||
LEA RSI,U64 sizeof(CMemBlk)[RSI]
|
||||
SUB RAX,sizeof(CMemBlk)
|
||||
LEA RBX,U64 CHeapCtrl.malloc_free_lst-CMemUnused.next[RDX]
|
||||
MOV RDI,U64 CMemUnused.next[RBX]
|
||||
MOV U64 CMemUnused.next[RSI],RDI
|
||||
MOV U64 CMemUnused.size[RSI],RAX
|
||||
MOV U64 CMemUnused.next[RBX],RSI
|
||||
POP RAX //+****
|
||||
JMP @@70
|
||||
@@60: CMP U64 CMemUnused.size[RSI],RAX
|
||||
JB I32 @@40
|
||||
JNE @@70
|
||||
|
||||
@@65: MOV RDI,U64 CMemUnused.next[RSI]
|
||||
MOV U64 CMemUnused.next[RBX],RDI
|
||||
JMP RMALLOC_ALMOST_DONE
|
||||
|
||||
@@70: SUB U64 CMemUnused.size[RSI],RAX // UPDATE FREE ENTRY
|
||||
CMP U64 CMemUnused.size[RSI],sizeof(CMemUnused)
|
||||
JAE @@75 // take from top of block
|
||||
ADD U64 CMemUnused.size[RSI],RAX // doesn't fit, undo
|
||||
JMP I32 @@40
|
||||
|
||||
@@75: ADD RSI,U64 CMemUnused.size[RSI]
|
||||
|
||||
RMALLOC_ALMOST_DONE:
|
||||
// RSI=res-CMemUsed.size
|
||||
// RAX=size+CMemUsed.size
|
||||
// RDX=HeapCtrl
|
||||
ADD U64 CHeapCtrl.used_u8s[RDX],RAX
|
||||
|
||||
#if _CFG_HEAP_DBG
|
||||
// QueIns
|
||||
MOV RDI,U64 CHeapCtrl.last_um[RDX]
|
||||
MOV U64 CMemUsed.next[RDI],RSI
|
||||
MOV U64 CHeapCtrl.last_um[RDX],RSI
|
||||
MOV U64 CMemUsed.last[RSI],RDI
|
||||
LEA RDI,U64 CHeapCtrl.next_um-CMemUsed.next[RDX]
|
||||
MOV U64 CMemUsed.next[RSI],RDI
|
||||
|
||||
// Caller1/Caller2
|
||||
PUSH RDX
|
||||
MOV RDX,U64 [MEM_HEAP_LIMIT]
|
||||
MOV RDI,U64 SF_RIP[RBP]
|
||||
CMP RDI,RDX
|
||||
JB @@80
|
||||
XOR RDI,RDI
|
||||
MOV U64 CMemUsed.caller1[RSI],RDI
|
||||
JMP @@90
|
||||
@@80: MOV U64 CMemUsed.caller1[RSI],RDI
|
||||
MOV RDI,U64 SF_RBP[RBP]
|
||||
CMP RDI,RDX
|
||||
JB @@85
|
||||
XOR RDI,RDI
|
||||
JMP @@90
|
||||
@@85: MOV RDI,U64 SF_RIP[RDI]
|
||||
CMP RDI,RDX
|
||||
JB @@90
|
||||
XOR RDI,RDI
|
||||
@@90: MOV U64 CMemUsed.caller2[RSI],RDI
|
||||
POP RDX
|
||||
|
||||
#endif
|
||||
LOCK
|
||||
BTR U32 CHeapCtrl.locked_flags[RDX],HClf_LOCKED
|
||||
POPFD
|
||||
|
||||
MOV U64 CMemUsed.size[RSI],RAX
|
||||
MOV U64 CMemUsed.hc[RSI],RDX
|
||||
LEA RAX,U64 CMemUsed.start[RSI]
|
||||
|
||||
TEST U8 [SYS_SEMAS+SEMA_HEAPLOG_ACTIVE*DFT_CACHE_LINE_WIDTH],1
|
||||
JZ @@105
|
||||
PUSH RAX
|
||||
PUSH RAX
|
||||
MOV RAX,U64 [SYS_EXTERN_TABLE]
|
||||
MOV RAX,U64 EXT_HEAPLOG_MALLOC*8[RAX]
|
||||
TEST RAX,RAX
|
||||
JZ @@95
|
||||
CALL RAX
|
||||
JMP @@100
|
||||
@@95: ADD RSP,8
|
||||
@@100: POP RAX
|
||||
|
||||
@@105: TEST U8 [SYS_HEAP_INIT_FLAG],1
|
||||
JZ RMALLOC_FINAL_EXIT
|
||||
|
||||
PUSH RAX
|
||||
MOV RCX,U64 CMemUsed.size-CMemUsed.start[RAX]
|
||||
SUB RCX,CMemUsed.start
|
||||
MOV RDI,RAX
|
||||
MOV AL,U8 [SYS_HEAP_INIT_VAL]
|
||||
REP_STOSB
|
||||
POP RAX
|
||||
|
||||
RMALLOC_FINAL_EXIT:
|
||||
POP RDI
|
||||
POP RSI
|
||||
POP RBP
|
||||
RET1 16
|
||||
//************************************
|
||||
_RFREE::
|
||||
// Be aware of heap_hash in MemPagTaskAlloc().
|
||||
PUSH RBP
|
||||
MOV RBP,RSP
|
||||
PUSH RSI
|
||||
PUSH RDI
|
||||
|
||||
TEST U8 [SYS_SEMAS+SEMA_HEAPLOG_ACTIVE*DFT_CACHE_LINE_WIDTH],1
|
||||
JZ @@15
|
||||
MOV RBX,U64 SF_ARG1[RBP]
|
||||
TEST RBX,RBX
|
||||
JZ @@05
|
||||
MOV RAX,U64 CMemUsed.size-CMemUsed.start[RBX]
|
||||
TEST RAX,RAX
|
||||
JGE @@05 // Aligned alloced chunks have neg size
|
||||
ADD RBX,RAX
|
||||
@@05: PUSH RBX
|
||||
MOV RAX,U64 [SYS_EXTERN_TABLE]
|
||||
MOV RAX,U64 EXT_HEAPLOG_FREE*8[RAX]
|
||||
TEST RAX,RAX
|
||||
JZ @@10
|
||||
CALL RAX
|
||||
JMP @@15
|
||||
@@10: ADD RSP,8
|
||||
|
||||
@@15: MOV RSI,U64 SF_ARG1[RBP]
|
||||
TEST RSI,RSI
|
||||
|
||||
#if _CFG_HEAP_DBG
|
||||
JZ I32 RFREE_DONE
|
||||
#else
|
||||
JZ RFREE_DONE
|
||||
#endif
|
||||
|
||||
MOV RAX,U64 CMemUsed.size-CMemUsed.start[RSI]
|
||||
TEST RAX,RAX
|
||||
JGE @@20 // Aligned alloced chunks have neg size.
|
||||
// The neg size is offset to start of CMemUsed struct.
|
||||
ADD RSI,RAX
|
||||
|
||||
@@20: PUSHFD
|
||||
SUB RSI,CMemUsed.start
|
||||
MOV RDX,U64 CMemUsed.hc[RSI]
|
||||
CMP U32 CHeapCtrl.hc_signature[RDX],HEAP_CTRL_SIGNATURE_VAL
|
||||
JE @@25
|
||||
ADD RSI,CMemUsed.start
|
||||
PUSH RSI
|
||||
CALL &SysBadFree
|
||||
JMP I32 _SYS_HLT
|
||||
|
||||
@@25: MOV RAX,U64 CMemUsed.size[RSI]
|
||||
SUB U64 CHeapCtrl.used_u8s[RDX],RAX
|
||||
CLI
|
||||
@@30: LOCK
|
||||
BTS U32 CHeapCtrl.locked_flags[RDX],HClf_LOCKED
|
||||
PAUSE
|
||||
JC @@30
|
||||
#if _CFG_HEAP_DBG
|
||||
// QueRem
|
||||
MOV RDX,U64 CMemUsed.next[RSI]
|
||||
MOV RDI,U64 CMemUsed.last[RSI]
|
||||
MOV U64 CMemUsed.last[RDX],RDI
|
||||
MOV U64 CMemUsed.next[RDI],RDX
|
||||
|
||||
// Caller1/Caller2
|
||||
MOV RDX,U64 [MEM_HEAP_LIMIT]
|
||||
MOV RDI,U64 SF_RIP[RBP]
|
||||
CMP RDI,RDX
|
||||
JB @@35
|
||||
XOR RDI,RDI
|
||||
MOV U64 CMemUnused.caller1[RSI],RDI
|
||||
JMP @@45
|
||||
@@35: MOV U64 CMemUnused.caller1[RSI],RDI
|
||||
MOV RDI,U64 SF_RBP[RBP]
|
||||
CMP RDI,RDX
|
||||
JB @@40
|
||||
XOR RDI,RDI
|
||||
JMP @@45
|
||||
@@40: MOV RDI,U64 SF_RIP[RDI]
|
||||
CMP RDI,RDX
|
||||
JB @@45
|
||||
XOR RDI,RDI
|
||||
@@45: MOV U64 CMemUnused.caller2[RSI],RDI
|
||||
|
||||
MOV RDX,U64 CMemUsed.hc[RSI]
|
||||
#endif
|
||||
CMP RAX,MEM_HEAP_HASH_SIZE
|
||||
JAE @@50
|
||||
|
||||
#assert CMemUnused.size == CMemUsed.size
|
||||
// MOV U64 CMemUnused.size[RSI],RAX
|
||||
|
||||
MOV RBX,U64 CHeapCtrl.heap_hash[RAX+RDX]
|
||||
MOV U64 CMemUnused.next[RSI],RBX
|
||||
MOV U64 CHeapCtrl.heap_hash[RAX+RDX],RSI
|
||||
JMP @@55
|
||||
|
||||
@@50: SUB RSI,sizeof(CMemBlk)
|
||||
PUSH RDX
|
||||
PUSH RDX
|
||||
PUSH RSI
|
||||
CALL &MemPagTaskFree
|
||||
POP RDX
|
||||
|
||||
@@55: LOCK
|
||||
BTR U32 CHeapCtrl.locked_flags[RDX],HClf_LOCKED
|
||||
POPFD
|
||||
RFREE_DONE:
|
||||
POP RDI
|
||||
POP RSI
|
||||
POP RBP
|
||||
RET1 8
|
||||
//************************************
|
||||
}
|
||||
|
||||
_extern _RMALLOC U8* RMAlloc(I64 size, CTask* mem_task = NULL);
|
||||
_extern _RFREE U0 RFree(U8* addr);
|
||||
|
||||
U8* RCAlloc(I64 size, CTask* mem_task = NULL)
|
||||
{
|
||||
U8* res = RMAlloc(size, mem_task);
|
||||
if (res) {
|
||||
MemSet(res, NULL, size);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
class @rmalloc_request
|
||||
{
|
||||
U64 res_addr;
|
||||
CTask* task;
|
||||
U64 bytes;
|
||||
U64 ptr;
|
||||
Bool type;
|
||||
Bool kick;
|
||||
};
|
||||
|
||||
U0 @rmalloc_task()
|
||||
{
|
||||
U64* ptr = RMALLOC_REQUEST_PTR;
|
||||
@rmalloc_request* r = NULL;
|
||||
while (1) {
|
||||
r = *ptr;
|
||||
if (r) {
|
||||
switch (r->type) {
|
||||
case RMALLOC_TYPE_MALLOC:
|
||||
LXchgI64(r->res_addr, RMAlloc(r->bytes, r->task));
|
||||
break;
|
||||
case RMALLOC_TYPE_CALLOC:
|
||||
LXchgI64(r->res_addr, RCAlloc(r->bytes, r->task));
|
||||
break;
|
||||
case RMALLOC_TYPE_FREE:
|
||||
RFree(r->ptr);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
LXchgU8(&r->kick, 1);
|
||||
*ptr = NULL;
|
||||
}
|
||||
Yield;
|
||||
}
|
||||
}
|
||||
|
||||
U64 RMAllocOper(I64 size, CTask* task = NULL, Bool type = RMALLOC_TYPE_MALLOC, U64 addr = NULL)
|
||||
{
|
||||
I64 count = 0;
|
||||
U64 res = NULL;
|
||||
U64* ptr = RMALLOC_REQUEST_PTR;
|
||||
@rmalloc_request r;
|
||||
|
||||
r.res_addr = &res;
|
||||
r.bytes = size;
|
||||
r.task = task;
|
||||
r.ptr = addr;
|
||||
r.type = type;
|
||||
r.kick = FALSE;
|
||||
|
||||
retry_rmalloc:
|
||||
count = 0;
|
||||
while (*ptr)
|
||||
Busy(RMALLOC_BUSY_WAIT);
|
||||
LXchgI64(ptr, &r);
|
||||
while (!r.kick) {
|
||||
++count;
|
||||
if (count > RMALLOC_TIMEOUT)
|
||||
goto retry_rmalloc;
|
||||
Busy(RMALLOC_BUSY_WAIT);
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
U64 MAlloc2(I64 size, CTask* task = NULL)
|
||||
{
|
||||
return RMAllocOper(size, task);
|
||||
}
|
||||
|
||||
U64 CAlloc2(I64 size, CTask* task = NULL)
|
||||
{
|
||||
return RMAllocOper(size, task, RMALLOC_TYPE_CALLOC);
|
||||
}
|
||||
|
||||
U0 Free2(U8* addr)
|
||||
{
|
||||
RMAllocOper(NULL, NULL, RMALLOC_TYPE_FREE, addr);
|
||||
}
|
||||
|
||||
CTask* sys_malloc_task = Spawn(&@rmalloc_task, , "RMAlloc Task", RMALLOC_TASK_CPU);
|
||||
|
||||
Suspend(sys_winmgr_task);
|
||||
@patch_jmp_rel32(&MAlloc, &MAlloc2);
|
||||
@patch_jmp_rel32(&CAlloc, &CAlloc2);
|
||||
@patch_jmp_rel32(&Free, &Free2);
|
||||
Suspend(sys_winmgr_task, FALSE);
|
150
System/Setup/Util.HC
Normal file
150
System/Setup/Util.HC
Normal file
|
@ -0,0 +1,150 @@
|
|||
AutoComplete(0);
|
||||
|
||||
U0 @sse_enable()
|
||||
{
|
||||
/* clang-format off */
|
||||
asm
|
||||
{
|
||||
MOV_EAX_CR0
|
||||
AND AX, 0xFFFB // clear coprocessor emulation CR0.EM
|
||||
OR AX, 0x2 // set coprocessor monitoring CR0.MP
|
||||
MOV_CR0_EAX
|
||||
MOV_EAX_CR4
|
||||
OR AX, 3 << 9 // set CR4.OSFXSR and CR4.OSXMMEXCPT at the same time
|
||||
MOV_CR4_EAX
|
||||
}
|
||||
/* clang-format on */
|
||||
}
|
||||
|
||||
U0 @sse_enable_on_all_cores()
|
||||
{
|
||||
I64 i;
|
||||
for (i = 1; i < mp_cnt; i++)
|
||||
Spawn(&@sse_enable, , , i);
|
||||
}
|
||||
|
||||
// Enable SSE
|
||||
@sse_enable;
|
||||
@sse_enable_on_all_cores;
|
||||
|
||||
U0 @patch_call_rel32(U32 from, U32 to)
|
||||
{
|
||||
*(from(U8*)) = 0xE8;
|
||||
*((from + 1)(I32*)) = to - from - 5;
|
||||
}
|
||||
|
||||
U0 @patch_jmp_rel32(U32 from, U32 to)
|
||||
{
|
||||
*(from(U8*)) = 0xE9;
|
||||
*((from + 1)(I32*)) = to - from - 5;
|
||||
}
|
||||
|
||||
I64 tos_nist_offset = 5603; // UTC -4
|
||||
#define NIST_TIME_OFFSET (tos_nist_offset - local_time_offset / CDATE_FREQ)
|
||||
|
||||
public
|
||||
I64 CDate2Unix(CDate dt)
|
||||
{ // TempleOS datetime to Unix timestamp.
|
||||
return ToI64((dt - Str2Date("1/1/1970")) / CDATE_FREQ + NIST_TIME_OFFSET);
|
||||
}
|
||||
|
||||
public
|
||||
CDate Unix2CDate(I64 timestamp)
|
||||
{ // Unix timestamp to TempleOS datetime.
|
||||
return (timestamp - NIST_TIME_OFFSET) * CDATE_FREQ + Str2Date("1/1/1970");
|
||||
}
|
||||
|
||||
// FIXME: Put these in a "Builtin" library?
|
||||
U0 FifoU8Cpy(CFifoU8* f, U8* s)
|
||||
{
|
||||
if (!f || !s)
|
||||
return;
|
||||
while (*s)
|
||||
FifoU8Ins(f, *s++);
|
||||
}
|
||||
Bool KeyDown(I64 sc) return Bt(kbd.down_bitmap, sc);
|
||||
I64 T(Bool _condition, I64 _true, I64 _false)
|
||||
{
|
||||
if (_condition)
|
||||
return _true;
|
||||
return _false;
|
||||
}
|
||||
|
||||
asm
|
||||
{
|
||||
_MEMCPY_U16::
|
||||
PUSH RBP
|
||||
MOV RBP,RSP
|
||||
PUSH RSI
|
||||
PUSH RDI
|
||||
CLD
|
||||
MOV RDI,U64 SF_ARG1[RBP]
|
||||
MOV RSI,U64 SF_ARG2[RBP]
|
||||
MOV RCX,U64 SF_ARG3[RBP]
|
||||
REP_MOVSW
|
||||
MOV RAX,RDI
|
||||
POP RDI
|
||||
POP RSI
|
||||
POP RBP
|
||||
RET1 24
|
||||
_MEMCPY_U32::
|
||||
PUSH RBP
|
||||
MOV RBP,RSP
|
||||
PUSH RSI
|
||||
PUSH RDI
|
||||
CLD
|
||||
MOV RDI,U64 SF_ARG1[RBP]
|
||||
MOV RSI,U64 SF_ARG2[RBP]
|
||||
MOV RCX,U64 SF_ARG3[RBP]
|
||||
REP_MOVSD
|
||||
MOV RAX,RDI
|
||||
POP RDI
|
||||
POP RSI
|
||||
POP RBP
|
||||
RET1 24
|
||||
_MEMCPY_U64::
|
||||
PUSH RBP
|
||||
MOV RBP,RSP
|
||||
PUSH RSI
|
||||
PUSH RDI
|
||||
CLD
|
||||
MOV RDI,U64 SF_ARG1[RBP]
|
||||
MOV RSI,U64 SF_ARG2[RBP]
|
||||
MOV RCX,U64 SF_ARG3[RBP]
|
||||
REP_MOVSQ
|
||||
MOV RAX,RDI
|
||||
POP RDI
|
||||
POP RSI
|
||||
POP RBP
|
||||
RET1 24
|
||||
}
|
||||
|
||||
public _extern _MEMCPY_U16 U16* MemCpyU16(U16* dst, U16* src, I64 cnt);
|
||||
public
|
||||
_extern _MEMCPY_U32 U32* MemCpyU32(U32* dst, U32* src, I64 cnt);
|
||||
public
|
||||
_extern _MEMCPY_U64 U64* MemCpyU64(U64* dst, U64* src, I64 cnt);
|
||||
|
||||
I64 @lerp(U32 val, U32 mx1, U32 mx2)
|
||||
{
|
||||
F64 r = (val & mx1) / ToF64(mx1);
|
||||
return ToI64(r * mx2);
|
||||
}
|
||||
|
||||
CMemBlk* ShrinkMemBlkByPags(CMemBlk* from, I64 count)
|
||||
{
|
||||
from->pags -= count;
|
||||
U64 to = from;
|
||||
to += count * MEM_PAG_SIZE;
|
||||
MemCpy(to, from, MEM_PAG_SIZE);
|
||||
return to;
|
||||
}
|
||||
|
||||
I64 @t(Bool _condition, I64 _true, I64 _false)
|
||||
{
|
||||
if (_condition)
|
||||
return _true;
|
||||
return _false;
|
||||
}
|
||||
|
||||
U0 dd() { DocDump(adam_task->put_doc); }
|
Loading…
Add table
Add a link
Reference in a new issue