// This code uses the problem when the data is used in the grahe method (if it is complicated if considering), the Len is also even, so the later LEN is an odd check part is not necessary, and the general method is not How much, it's minimal, only one of the other ideas, I originally original, if you have a better way, please inform
// Realize DEST = a - b, DEST, A, B are all in front, low, namely DEST [0] represents the highest bit, A, B is similar
Const unsigned int base = 1000000000; // 10 ^ 9
const unsigned __INT64 SIGN64 = 0x000000000000000000; Const unsigned __INT64 BORROWFIRST = 0x0000000100000000; Const unsigned __int64 Borrownext = 0x0000000000000001;
/ * SUB_MMX () MMX command version * / __ declspec (naked) long subs_mmx (unsigned long * dec, unsigned long * a, unsigned long * b, size_t len) {__ASM {MOV ECX, DWORD PTR [ESP 0x10] // Len Xor Eax, EAX Test ECX, ECX JZ SUB_EXIT
Push EBP MOV EBP, ECX
PUSH EBX MOV EBX, DWORD PTR [ESP 0x14] // EBX = B Push ESI MOV ESI, DWORD PTR [ESP 0x14] // ESI = a Push EDI MOV EDI, DWORD PTR [ESP 0x14] // EDI = DEST SUB ESI, EBX / / ESI = A - B Lea EDX, DWORD PTR [EBX 4 * ECX-8] // & b [i] Sub EDI, EBX // EDI = DEST - B
SHR ECX, 1 // Len = LEN / 2
MOVQ MM7, Base64 // 0x3b9aca003b9aca00 MOVQ MM5, BorrowFirst // 0x0000000100000000 MOVQ MM6, Borrownext // 0x000000000000000000 PXOR MM2, MM2 // Borrow Clear
SUB_LOOP: MOVQ MM0, DWORD PTR [ESI EDX] // A [i] MOVQ MM1, DWORD PTR [EDX] // B [i]
PSUBD MM0, MM2 // DIF = A [I] -Borrow PXOR MM3, MM3 // mm3 = SIGN64 (0x0000000000000000) PSUBD MM0, MM1 // DIF - = B [i] PCMPGTD MM3, MM0 // DIF <0? Here More complicated, pay more, must compare 2 PAND MM3, MM5 // mm5 = BorrowFirst PSRLQ MM3, 32 // Get borrowFirst, MM3 >> 32 PXOR MM4, MM4 // mm4 = SIGN64 (0x0000000000000000) PSUBD MM0, MM3 / / Reducing
PCMPGTD MM4, MM0 // DIF <0? After the carry, the second comparison
MOVQ MM2, MM4 // Backup Comparison Results PAND MM4, MM7 // Received EF = Base Pand MM2, MM6 // Get Next Return, Borrownext
Paddd MM0, MM4 / / Equivalent to DIF = Base PSLLQ MM2, 32 // Borrow = Borrownext << 32
MOVQ DWORD PTR [EDI EDX], MM0 // DEST [I] = DIF SUB EDX, 8 // EDX = & b [i] - 8, equivalent to I- = 2 dec ECX // LEN - JNE SUB_LOOP ///// / * TEST EBP, 1 // If len is an odd number, then the last number of jz sub_fast_ret
MOV ECX, DWORD PTR [ESI EDX] // ESI = a [i] MOV EBX, DWORD PTR [EDX] // EBX = B [i] Sub ECX, EBX // DIF = A [I] - B [i ] MOV EBX, BASE // ESI = Base Add ECX, EAX // Dif - = Borrow XOR EAX, EAX // Borrow = 0 CMP ECX, EBX // DIF <0? JB SUB_DIF // POP EDI POP ESI POP EBX POP EBP EMMSSUB_EXIT: RET SUB_FAST_RET: / / * / PSRLQ MM2, 32 // Borrow >> 32 MOVD EAX, MM2 // Return Borrow POP EDI POP ESI POP EBX POP EBP Emms // sub_exit: Ret}}