asm {
/***********************************/
_MEMSET::
        PUSH    RBP
        MOV     RBP, RSP
        PUSH    RDI
        CLD
        MOV     RDI, U64 SF_ARG1[RBP]
        MOV     RAX, U64 SF_ARG2[RBP]
        MOV     RCX, U64 SF_ARG3[RBP]
        REP_STOSB
        MOV     RAX, RDI
        POP     RDI
        POP     RBP
        RET1    24

/***********************************/
_MEMSET_U16::
        PUSH    RBP
        MOV     RBP, RSP
        PUSH    RDI
        CLD
        MOV     RDI, U64 SF_ARG1[RBP]
        MOV     RAX, U64 SF_ARG2[RBP]
        MOV     RCX, U64 SF_ARG3[RBP]
        REP_STOSW
        MOV     RAX, RDI
        POP     RDI
        POP     RBP
        RET1    24

/***********************************/
_MEMSET_U32::
        PUSH    RBP
        MOV     RBP, RSP
        PUSH    RDI
        CLD
        MOV     RDI, U64 SF_ARG1[RBP]
        MOV     RAX, U64 SF_ARG2[RBP]
        MOV     RCX, U64 SF_ARG3[RBP]
        REP_STOSD
        MOV     RAX, RDI
        POP     RDI
        POP     RBP
        RET1    24

/***********************************/
_MEMSET_I64::
        PUSH    RBP
        MOV     RBP, RSP
        PUSH    RDI
        CLD
        MOV     RDI, U64 SF_ARG1[RBP]
        MOV     RAX, U64 SF_ARG2[RBP]
        MOV     RCX, U64 SF_ARG3[RBP]
        REP_STOSQ
        MOV     RAX, RDI
        POP     RDI
        POP     RBP
        RET1    24

/***********************************/
_MEMCOPY::
        PUSH    RBP
        MOV     RBP, RSP
        PUSH    RSI
        PUSH    RDI
        MOV     RDI, U64 SF_ARG1[RBP] // dst
        MOV     RSI, U64 SF_ARG2[RBP] // src
        MOV     RCX, U64 SF_ARG3[RBP] // count
@@05: // SSE 128-bit memcopy (count >= 16)
        CMP     RCX, 16     // count <, >, == 16 bytes?
        JL      @@10        // if count less than 16, jump down
        MOVUPS  XMM15, [RSI]// move 128 bits, src-->xmm
        MOVUPS  [RDI], XMM15// move 128 bits, xmm-->dst mem
        ADD     RSI, 16     // increment src addr by 128 bits
        ADD     RDI, 16     // increment dst addr by 128 bits
        ADD     RCX, -16    // decrement count by 128 bits
        JMP     @@05        // jump back to 16 byte check
@@10: // TempleOS REP_MOVSB memcopy (count < 16)
        CLD
        REP_MOVSB
        MOV     RAX, RDI
        POP     RDI
        POP     RSI
        POP     RBP
        RET1    24

/***********************************/
_MEMCOMPARE::
        PUSH    RBP
        MOV     RBP, RSP
        PUSH    RSI
        PUSH    RDI
        CLD
        MOV     RSI, U64 SF_ARG1[RBP]
        MOV     RDI, U64 SF_ARG2[RBP]
        MOV     RCX, U64 SF_ARG3[RBP]
        XOR     RAX, RAX
        REPE_CMPSB
        JZ      @@05
        MOV     AL,  1
        JA      @@05
        MOV     RAX, -1
@@05:   POP     RDI
        POP     RSI
        POP     RBP
        RET1    24

/***********************************/
_BEQUAL::
        PUSH    RBP
        MOV     RBP, RSP
        XOR     RAX, RAX
        MOV     RDX, U64 SF_ARG3[RBP]
        MOV     RBX, U64 SF_ARG1[RBP]
        MOV     RCX, U64 SF_ARG2[RBP]
        TEST    RDX, RDX
        JZ      @@05
        BTS     U64 [RBX], RCX
        JMP     @@10
@@05:   BTR     U64 [RBX], RCX
@@10:   ADC     AL,  0
        POP     RBP
        RET1    24

/***********************************/
_LBEQUAL::
        PUSH    RBP
        MOV     RBP, RSP
        XOR     RAX, RAX
        MOV     RDX, U64 SF_ARG3[RBP]
        MOV     RBX, U64 SF_ARG1[RBP]
        MOV     RCX, U64 SF_ARG2[RBP]
        TEST    RDX, RDX
        JZ      @@05
        LOCK
        BTS     U64 [RBX], RCX
        JMP     @@10
@@05:   LOCK
        BTR     U64 [RBX], RCX
@@10:   ADC     AL,  0
        POP     RBP
        RET1    24

/***********************************/
_CLAMP_I64::
        PUSH    RBP
        MOV     RBP, RSP
        MOV     RAX, U64 SF_ARG1[RBP]
        MOV     RCX, U64 SF_ARG2[RBP]
        MOV     RDX, U64 SF_ARG3[RBP]
        CMP     RAX, RCX
        CMOVL   RAX, RCX
        CMP     RAX, RDX
        CMOVG   RAX, RDX
        POP     RBP
        RET1    24

/***********************************/
_CLAMP_U64::
        PUSH    RBP
        MOV     RBP, RSP
        MOV     RAX, U64 SF_ARG1[RBP]
        MOV     RCX, U64 SF_ARG2[RBP]
        MOV     RDX, U64 SF_ARG3[RBP]
        CMP     RAX, RCX
        CMOVB   RAX, RCX
        CMP     RAX, RDX
        CMOVA   RAX, RDX
        POP     RBP
        RET1    24

/***********************************/
_CALL::
        PUSH    RBP
        MOV     RBP, RSP
        MOV     RAX, U64 SF_ARG1[RBP]
        TEST    RAX, RAX
        JZ      @@05
        CALL    RAX
@@05:   POP     RBP
        RET1    8

/***********************************/
_BIT_FIELD_OR_U32::
        PUSH    RBP
        MOV     RBP, RSP
        MOV     RBX, U64 SF_ARG2[RBP]
        SHR     RBX, 3
        ADD     RBX, U64 SF_ARG1[RBP]
        MOV     RAX, U64 SF_ARG3[RBP]
        MOV     RCX, U64 SF_ARG2[RBP]
        AND     CL,  7
        SHL     RAX, CL
        OR      U64 [RBX], RAX
        POP     RBP
        RET1    24

/***********************************/
_BIT_FIELD_EXT_U32::
        PUSH    RBP
        MOV     RBP, RSP
        MOV     RBX, U64 SF_ARG2[RBP]
        MOV     RCX, RBX
        SHR     RBX, 3
        ADD     RBX, U64 SF_ARG1[RBP]
        MOV     RDX, U64 [RBX]
        AND     CL,  7
        SHR     RDX, CL
        MOV     RCX, U64 SF_ARG3[RBP]
        MOV     RAX, 1
        SHL     RAX, CL
        DEC     RAX
        AND     RAX, RDX
        POP     RBP
        RET1    24

/***********************************/
_XCHG_I64::
        PUSH    RBP
        MOV     RBP, RSP
        MOV     RDX, U64 SF_ARG1[RBP]
        MOV     RAX, U64 SF_ARG2[RBP]
        XCHG    U64 [RDX], RAX
        POP     RBP
        RET1    16

/***********************************/
_XCHG_U32::
        PUSH    RBP
        MOV     RBP, RSP
        MOV     RDX, U64 SF_ARG1[RBP]
        MOV     EAX, U32 SF_ARG2[RBP]
        XCHG    U32 [RDX], EAX
        POP     RBP
        RET1    16

/***********************************/
_XCHG_U16::
        PUSH    RBP
        MOV     RBP, RSP
        MOV     RDX, U64 SF_ARG1[RBP]
        MOVZX   RAX, U16 SF_ARG2[RBP]
        XCHG    U16 [RDX], AX
        POP     RBP
        RET1    16

/***********************************/
_XCHG_U8::
        PUSH    RBP
        MOV     RBP, RSP
        MOV     RDX, U64 SF_ARG1[RBP]
        MOVZX   RAX, U8 SF_ARG2[RBP]
        XCHG    U8 [RDX], AL
        POP     RBP
        RET1    16

/***********************************/
_LXCHG_I64::
        PUSH    RBP
        MOV     RBP, RSP
        MOV     RDX, U64 SF_ARG1[RBP]
        MOV     RAX, U64 SF_ARG2[RBP]
        LOCK
        XCHG    U64 [RDX], RAX
        POP     RBP
        RET1    16

/***********************************/
_LXCHG_U32::
        PUSH    RBP
        MOV     RBP, RSP
        MOV     RDX, U64 SF_ARG1[RBP]
        MOV     EAX, U32 SF_ARG2[RBP]
        LOCK
        XCHG    U32 [RDX], EAX
        POP     RBP
        RET1    16

/***********************************/
_LXCHG_U16::
        PUSH    RBP
        MOV     RBP, RSP
        MOV     RDX, U64 SF_ARG1[RBP]
        MOVZX   RAX, U16 SF_ARG2[RBP]
        LOCK
        XCHG    U16 [RDX], AX
        POP     RBP
        RET1    16

/***********************************/
_LXCHG_U8::
        PUSH    RBP
        MOV     RBP, RSP
        MOV     RDX, U64 SF_ARG1[RBP]
        MOVZX   RAX, U8 SF_ARG2[RBP]
        LOCK
        XCHG    U8 [RDX],AL
        POP     RBP
        RET1    16

/***********************************/
_ENDIAN_U16::
        PUSH    RBP
        MOV     RBP, RSP
        MOVZX   RAX, U16 SF_ARG1[RBP]
        XCHG    AL, AH
        POP     RBP
        RET1    8

/***********************************/
_ENDIAN_U32::
        PUSH    RBP
        MOV     RBP, RSP
        MOV     RAX, SF_ARG1[RBP]
        BSWAP   EAX
        POP     RBP
        RET1    8

/***********************************/
_ENDIAN_U64::
        PUSH    RBP
        MOV     RBP, RSP
        MOV     RAX, SF_ARG1[RBP]
        BSWAP   RAX
        POP     RBP
        RET1    8

/***********************************/
_REP_IN_U32::
        PUSH    RBP
        MOV     RBP, RSP
        PUSH    RDI
        MOV     RDI, U64 SF_ARG1[RBP]
        MOV     RDX, U64 SF_ARG3[RBP]
        MOV     RCX, U64 SF_ARG2[RBP]
        REP_INSD
        POP     RDI
        POP     RBP
        RET1    24

/***********************************/
_REP_IN_U16::
        PUSH    RBP
        MOV     RBP, RSP
        PUSH    RDI
        MOV     RDI, U64 SF_ARG1[RBP]
        MOV     RDX, U64 SF_ARG3[RBP]
        MOV     RCX, U64 SF_ARG2[RBP]
        REP_INSW
        POP     RDI
        POP     RBP
        RET1    24

/***********************************/
_REP_IN_U8::
        PUSH    RBP
        MOV     RBP, RSP
        PUSH    RDI
        MOV     RDI, U64 SF_ARG1[RBP]
        MOV     RDX, U64 SF_ARG3[RBP]
        MOV     RCX, U64 SF_ARG2[RBP]
        REP_INSB
        POP     RDI
        POP     RBP
        RET1    24

/***********************************/
_REP_OUT_U32::
        PUSH    RBP
        MOV     RBP, RSP
        PUSH    RSI
        MOV     RSI, U64 SF_ARG1[RBP]
        MOV     RDX, U64 SF_ARG3[RBP]
        MOV     RCX, U64 SF_ARG2[RBP]
        REP_OUTSD
        POP     RSI
        POP     RBP
        RET1    24

/***********************************/
_REP_OUT_U16::
        PUSH    RBP
        MOV     RBP, RSP
        PUSH    RSI
        MOV     RSI, U64 SF_ARG1[RBP]
        MOV     RDX, U64 SF_ARG3[RBP]
        MOV     RCX, U64 SF_ARG2[RBP]
        REP_OUTSW
        POP     RSI
        POP     RBP
        RET1    24

/***********************************/
_REP_OUT_U8::
        PUSH    RBP
        MOV     RBP, RSP
        PUSH    RSI
        MOV     RSI, U64 SF_ARG1[RBP]
        MOV     RDX, U64 SF_ARG3[RBP]
        MOV     RCX, U64 SF_ARG2[RBP]
        REP_OUTSB
        POP     RSI
        POP     RBP
        RET1    24

/***********************************/
_CPUID::
        PUSH    RBP
        MOV     RBP, RSP
        PUSH    RSI
        MOV     RAX, U64 SF_ARG1[RBP]
        CPUID
        MOV     RSI, U64 SF_ARG2[RBP]
        MOV     U64 [RSI],   RAX
        MOV     U64 8[RSI],  RBX
        MOV     U64 16[RSI], RCX
        MOV     U64 24[RSI], RDX
        POP     RSI
        POP     RBP
        RET1    16

/***********************************/
_CALL_IND::         //See _LAST_FUN
        PUSH    RBP
        MOV     RBP, RSP
        PUSH    RSI
        PUSH    RDI
        MOV     RDX, U64 SF_ARG1[RBP]
        MOV     RCX, U64 SF_ARG2[RBP] //argc
        LEA     RSI, U64 SF_ARG3[RBP] //argv
        SHL     RCX, 3
        SUB     RSP, RCX
        MOV     RDI, RSP
        REP_MOVSB
        TEST    RDX, RDX
        JZ      @@05

        CALL    RDX
        POP     RDI
        POP     RSI
        POP     RBP
        RET

@@05:   MOV     RCX, U64 SF_ARG2[RBP] //argc
        SHL     RCX, 3
        ADD     RSP, RCX
        XOR     RAX, RAX
        POP     RDI
        POP     RSI
        POP     RBP
        RET

/***********************************/
_CALLEXTNUM::
        PUSH    RBP
        MOV     RBP, RSP
        PUSH    RSI
        PUSH    RDI
        MOV     RDX, U64 SF_ARG1[RBP]
        MOV     RCX, U64 SF_ARG2[RBP] //argc
        LEA     RSI, U64 SF_ARG3[RBP] //argv
        SHL     RCX, 3
        SUB     RSP, RCX
        MOV     RDI, RSP
        REP_MOVSB
        MOV     RAX, U64 [SYS_EXTERN_TABLE]
        MOV     RAX, U64 [RAX + RDX * 8]
        TEST    RAX, RAX
        JZ      @@05

        CALL    RAX
        POP     RDI
        POP     RSI
        POP     RBP
        RET

@@05:   MOV     RCX, U64 SF_ARG2[RBP] //argc
        SHL     RCX, 3
        ADD     RSP, RCX
        POP     RDI
        POP     RSI
        POP     RBP
        RET

/***********************************/
_CALLEXTSTR::
        PUSH    RBP
        MOV     RBP, RSP
        PUSH    RSI
        PUSH    RDI
        MOV     RCX, U64 SF_ARG2[RBP] //argc
        LEA     RSI, U64 SF_ARG3[RBP] //argv
        SHL     RCX, 3
        SUB     RSP, RCX
        MOV     RDI, RSP
        REP_MOVSB
        MOV     RSI, U64 SF_ARG1[RBP]

        MOV     RCX, 1
        XOR     RAX, RAX
        MOV     RDI, U64 FS:CTask.hash_table[RAX]
        MOV     RBX, HTT_FUN | HTT_EXPORT_SYS_SYM
        CALL    SYS_HASH_FIND
        JZ      @@15

        TEST    U32 CHash.type[RAX], HTT_FUN
        JZ      @@05
        MOV     RAX, U64 CHashFun.exe_addr[RAX]
        JMP     @@10
@@05:   MOV     RAX, U64 CHashExport.val[RAX]
@@10:   TEST    RAX, RAX
        JZ      @@15

        CALL    RAX
        POP     RDI
        POP     RSI
        POP     RBP
        RET

@@15:   MOV     RCX, U64 SF_ARG2[RBP] //argc
        SHL     RCX, 3
        ADD     RSP, RCX
        POP     RDI
        POP     RSI
        POP     RBP
        RET

/***********************************/
SET_GS_BASE::
        PUSH    RAX
        PUSH    RCX
        PUSH    RDX
        MOV     RDX, RAX
        SHR     RDX, 32
        MOV     ECX, IA32_GS_BASE
        WRMSR
        POP     RDX
        POP     RCX
        POP     RAX
        RET

/***********************************/
SET_FS_BASE::
        PUSH    RAX
        PUSH    RCX
        PUSH    RDX
        MOV     RDX, RAX
        SHR     RDX, 32
        MOV     ECX, IA32_FS_BASE
        WRMSR
        POP     RDX
        POP     RCX
        POP     RAX

/***********************************/
_RET::
        RET

/***********************************/
_MSR_READ::
        PUSH    RBP
        MOV     RBP, RSP
        MOV     RCX, SF_ARG1[RBP]
        RDMSR
        SHL     RDX, 32
        MOV     EDX, EAX
        MOV     RAX, RDX
        POP     RBP
        RET1    8

/***********************************/
_MSR_WRITE::
        PUSH    RBP
        MOV     RBP, RSP
        MOV     RAX, SF_ARG2[RBP]
        MOV     RDX, RAX
        SHR     RDX, 32
        MOV     RCX, SF_ARG1[RBP]
        WRMSR
        POP     RBP
        RET1    16

/***********************************/
_SYS_HLT::
        CLI
@@05:   HLT
        JMP     @@05
};