// This code uses the problem when the data is used in the grahe method (if it is complicated if considering), the Len is also even, so the later LEN is an odd check part is not necessary, and the general method is not How much, it's minimal, only one of the other ideas, I originally original, if you have a better way, please inform
// Realize DEST = A B, DEST, A, B is high before, low, namely DEST [0] represents the highest bit, A, B is similar
Const unsigned int base = 1000000000; // 10 ^ 9
Const unsigned __int64 base64 = 0x3b9aca003b9aca00; const unsigned __int64 carryfirst = 0x0000000100000000; Const unsigned __INT64 carrynext = 0x0000000000000001;
/ * add_mmx () MMX command version * / __ decspec (naked) long add_mmx (unsigned long * dec, unsigned long * a, unsigned long * b, size_t len) {__ASM {MOV ECX, DWORD PTR [ESP 0x10] // Len Xor Eax, EAX Test ECX, ECX JZ Add_exit
Push EBP MOV EBP, ECX
PUSH EBX MOV EBX, DWORD PTR [ESP 0x14] // EBX = B Push ESI MOV ESI, DWORD PTR [ESP 0x14] // ESI = a Push EDI MOV EDI, DWORD PTR [ESP 0x14] // EDI = DEST SUB ESI, EBX / / ESI = A - B Lea EDX, DWORD PTR [EBX 4 * ECX-8] // & b [i] Sub EDI, EBX // EDI = DEST - B
SHR ECX, 1 // Len = LEN / 2
MOVQ MM7, Base64 // 0x3b9aca003b9aca00 MOVQ MM5, Carryfirst // 0x00000001000000000 MOVQ MM6, Carrynext // 0x000000000000000000 PXOR MM2, MM2 // Carry Clear
Add_LOOP: MOVQ MM0, DWORD PTR [ESI EDX] // a [i] MOVQ MM1, DWORD PTR [EDX] // B [i]
Paddd MM0, MM2 // Sum = a [i] carry MOVQ MM3, MM7 / / MM7 = Base64 PADDD MM0, MM1 // SUM = B [i] PCMPGTD MM3, MM0 // Sum> = Base (10 ^ 9 )? Here is more complicated, there must be, must compare 2 Pandn mm3, mm5 // mm5 = carryfirst PSRLQ MM3, 32 // Get carry carryfirst, mm3 >> 32 MOVQ MM4, MM7 // mm7 = Base64 PADDD MM0, MM3 / / Accumulate
PCMPGTD MM4, MM0 // SUM> = BASE (10 ^ 9) after the carry, the second comparison
MOVQ MM2, MM4 // Backup comparison results PANDN MM4, MM7 // Get subtractive variable, used for SUM - = Base Pandn MM2, MM6 // Get the next carry, CarryNext
PSUBD MM0, MM4 / / Equivalent to Sum - = Base PSLLQ MM2, 32 // Carry = CarryNext << 32
MOVQ DWORD PTR [EDI EDX], MM0 // DEST [I] = SUM SUB EDX, 8 // EDX = & B [I] - 8, equivalent to I- = 2 DEC ECX // LEN - JNE ADD_LOOP
Test EBP, 1 // If len is an odd number, then accumulate the last number jz add_fast_ret
MOV ECX, DWORD PTR [ESI EDX] // ESI = a [i] MOV EBX, DWORD PTR [EDX] // EDX = B [i] Add ECX, EBX // SUM = A [i] b [i ] MOV EBX, BASE // ESI = Base Add ECX, EAX // SUM = Carry XOR EAX, EBX // Carry = 0 CMP ECX, EBX // SUM> = Base? JB Add_Sum // POP EDI POP ESI POP EBX POP EBP EmmsAdd_exit: Ret Add_fast_ret: PSRLQ MM2, 32 // Carry >> 32 MOVD EAX, MM2 // Returns Carry POP EDI POP ESI POP EBX POP EBP EMMS RET}}