Waiting for the two days waiting for the approved, boring, recalling more than 1 year, MPEG4 standard RTP stream, after receiving the local playback, DDRAW is displayed, 1 start to display something first Debug, and focus on synchronization and accepted stream, so I used YUV2BMP this time; convert YV12 into BMP, then 1 桢 1 桢 to the window DC. Oh, the effect can be imagined;) After a matrix conversion is the Draw-> DC, then the project enters the optimization phase, and decides to display YV12 videos with DDRAW overlay. The specific method is to refer to the "mosquito" program in DXSDK7. Put YV12 directly to Overlay display. version) implemented yuv2bmp of: void yuv2rgb_32 (uint8_t * puc_y, int stride_y, uint8_t * puc_u, uint8_t * puc_v, int stride_uv, uint8_t * puc_out, int width_y, int height_y, unsigned int _stride_out) {/ * int x, y; INT STRIDE_DIFF = 4 * (_Stride_out - width_y);
IF (Height_Y <0) {// We Are Flipping Our Output Upside-Down Height_Y = -Height_Y; PUC_Y = (Height_Y - 1) * Stride_Y; PUC_U = (HEIGHT_Y / 2 - 1) * Stride_uV; PUC_V = HEIGHT_Y / 2 - 1) * Stride_uv; stride_y = -stride_y; stride_uv = -stride_uv;}
For (y = 0; y Y = puc_y [x] 10; // - 16; u = puc_u [x >> 1] -128; v = puc_v [x >> 1] -128; _r = _r (y, u, v); _G = _G (y, u, v); _b = _b (y, u, v); R = _S (_r); g = _s (_g); b = _s (_b); PUC_OUT [0] = R; PUC_OUT [1] = G; PUC_OUT [2] = B; PUC_OUT [3] = 0; PUC_OUT = 4;} PUC_Y = Stride_y; if (Y% 2) {pUC_U = Stride_UV; PUC_V = Stride_uv;} PUC_OUT = Stride_Diff;} * / / Intel MMX /// INT Y, Horiz_count; int stride_out = width_y << 2; IF (Height_Y <0) {// We Are Flipping Our Output Upside-down Height_Y = -Height_Y; PUC_Y = (Height_Y - 1) * Stride_y; PUC_U = ((Height_Y >> 1) - 1) * Stride_uv; PUC_V = ((Height_Y >> 1) - 1) * stride_uv; stride_y = -stride_y; stride_uv = -stride_uv;} horiz_count = - (width_y >> 3); For (y = 0; y MOV EAX, PUC_OUT MOV EBX, PUC_Y MOV ECX, PUC_U MOV EDX, PUC_V MOV EDI, HORZ_COUNT HORZ_LOOP: MOVD MM2, [ECX] PXOR MM7, MM7 MOVD MM3, [EDX] PUNPCKLBW MM2, MM7; MM2 = __U3__U2__U1__U0 MOVQ MM0, [EBX]; MM0 = Y7Y6Y5Y4Y3Y2Y1Y0 PUNPCKLBW MM3, MM7; MM3 = __v3__v2__v1__v0 MOVQ MM1, MMW_0x00FF; MM1 = 00ff00FF00FF00FF PSUBUSB MM0, MMB_0x10; MM0 - = 16 PSUBW MM2, MMW_0x0080; MM2 - = 128 PAND MM1, MM0; MM1 = __Y6__Y4__Y2__Y0 PSUBW MM3, MMW_0x0080; MM3 - = 128 PSLLW MM1, 3; MM1 * = 8 PSRLW MM0, 8; MM0 = __Y7__Y5__Y3__Y1 PSLLW MM2, 3; MM2 * = 8 Pmulhw mm1, mmw_mult_y; mm1 * = luma coeff PSLLW MM0, 3; MM0 * = 8 PSLLW MM3, 3; MM3 * = 8 MOVQ MM5, MM3; MM5 = mm3 = V Pmulhw MM5, MMW_MULT_V_R; MM5 = Red Chroma MOVQ MM4, MM2; MM4 = MM2 = U Pmulhw MM0, MMW_MULT_Y; MM0 * = LUMA COEFF MOVQ MM7, MM1; Even LuMa Part Pmulhw mm2, mmw_mult_u_g; mm2 * = u Green Coeff Paddsw mm7, mm5; mm7 = luma chroma __r6__r4__r2__r0 pmulhw mm3, mmw_mult_V_G; mm3 * = v green coeff packuswb mm7, mm7; mm7 = r6r4r2r0r6r4r2r0pmulhw mm4, mmw_mult_U_B; mm4 = blue chroma paddsw mm5, mm0; mm5 = luma chroma __r7__r5__r3__r1 PackusWB MM5, MM5; MM6 = R7R5R3R1R7R5R3R1 PADDSW MM2, MM3; MM2 = Green Chroma MOVQ MM3, MM1; MM3 = __Y6__Y4__Y2__y0 MOVQ MM6, MM1; MM6 = __Y6__Y4__Y2__Y0 paddsw mm3, mm4; mm3 = luma chroma __b6__b4__b2__b0 paddsw mm6, mm2; mm6 = luma chroma __g6__g4__g2__g0 punpcklbw mm7, mm5; mm7 = r7r6r5r4r3r2r1r0 paddsw mm2, mm0; odd luma part plus chroma part __g7__g5__g3__g1 Packuswb mm6, mm6; mm2 = G6G4G2G0G6G4G2G0 Packuswb mm2, mm2; mm2 = G7G5G3G1G7G5G3G1 Packuswb mm3, mm3; mm3 = b6b4b2b0b6b4b2b0 Paddsw mm4, mm0; ODD LUMA Part PLUS chroma part __b7__b5__b3__b1 PackUSWB MM4, MM4; MM4 = B7B5B3B1B7B5B3B1 PUNPCKLBW MM6, MM2; MM6 = G7G6G5G4G3G2G1G0 PUNPCKLBW MM3, MM4; MM3 = B7B6B5B4B3B2B1B0 // 32-bit shuffle .... PXOR MM0, MM0; IS this NEEDED? MOVQ MM1, MM6; MM1 = G7G6G5G4G3G2G1G0 PUNPCKLBW MM1, MM0; MM1 = __G3__G2__G1__g0 MOVQ MM0, MM3; MM0 = B7B6B5B4B3B2B1B0 PUNPCKLBW MM0, MM7; MM0 = R3B3R2B2R1B1R0B0 MOVQ MM2, MM0; MM2 = R3B3R2B2R1B1R0B0 PUNPCKLBW MM0, MM1; MM0 = __r1G1B1__R0G0B0 PUNPCKHBW MM2, MM1; MM2 = __R3G3B3__R2G2B2 // 32-bit save ... MOVQ [EAX], MM0; EAX [0] = __R1G1B1__R0G0B0 MOVQ MM1, MM6; MM1 = G7G6G5G4G3G2G1G0 MOVQ 8 [EAX], MM2; EAX [8] = __R3G3B3_R2G2B2 // 32-bit shuffle .... PXOR MM0, MM0; Is this NEEDED? PUNPCKHBW MM1, MM0; MM1 = __G7__G6__g5__g4 MOVQ MM0, MM3; MM0 = B7B6B5B4B3B2B1B0 PUNPCKHBW MM0, MM7; MM0 = R7B7R6B6R5B5R4B4 MOVQ MM2, MM0; MM2 = R7B7R6B6R5B5R4B4 PUNPCKLBW MM0, MM1; MM0 = __R5G5B5_R4G4B4 PUNPCKHBW MM2, MM1; MM2 = __R7G7B7__R6G6B6 // 32-bit save ... add EBX, 8; PUC_Y = 8; Add ECX, 4; PUC_U = 4; MOVQ 16 [EAX], MM0; EAX [16] = __R5G5B5_R4G4B4 Add EDX, 4; PUC_V = 4; MOVQ 24 [EAX], MM2; EAX [24] = __R7G7B7__R6G6B6 / / 0 1 2 3 4 5 6 7 RGB Save Order Add Eax, 32; PUC_OUT = 32 INC EDI JNE HORZ_LOOP POP EDI POP EDX POP ECX POP EBX POP EAX EMMS} PUC_Y = Stride_y; if (y & 0x01) {//% 2) {pUC_U = Stride_UV; PUC_V = Stride_UV;} PUC_OUT = Stride_out;}}