Use MMX and SSE instructions to improve performance in video format conversion

xiaoxiao2021-04-11  575

Everyone knows that using the overlay plane display video may make full use of the displayed hardware acceleration function, free zoom, while significantly reducing the CPU consumption. However, the format that is currently output when the H.264 board is unzipped in the market, and this format does not display properly on the multi-graphics card, which generally needs to be converted to the YUYV format.

One. Direct C code conversion

Void cdispoverlay :: bufferframe

UINT NID, // Channel Number

PBYTE PBUF, // Source Data

UINT NLEN, / / ​​Buffer Length

DWORD NFMT, // Original format, can be yuyv, RGB24 or YV12

UINT NWIDTH, / / ​​Width (By default: 352)

UINT NHEIGHT / / High (By default: 288)

)

{

/ *

Conversion of other formats

* /

IF (Makefourcc ('Y', 'u', 'Y', '2') == m_pdisp-> getfmt () -> Bicompression

&& makefourcc ('Y', 'V', '1', '2') == NFMT)

{

// PBUF points data from YV12 format

UINT X = 0, y = 0;

Byte * pdest = (byte *) m_pdispdata [NID] .Getdata ();

BYTE * Y_SRC = (Byte *) PBUF;

BYTE * V_SRC = (Byte *) (PBUF (NWIDTH * NHEIGHT));

BYTE * u_src = (byte *) (PBUF (NWIDTH * NHEIGHT * 5/4));

Uint width = nwidth >> 1;

FOR (y = 0; y

{

For (x = 0; x

{

PDEST [0] = y_src [x << 1];

PDEST [1] = u_src [x];

PDEST [2] = Y_SRC [(x << 1) 1];

PDEST [3] = v_src [x];

PDEST = 4;

}

Y_SRC = NWIDTH;

IF (Y & 1)

{

u_src = width;

v_src = width;

}

}

}

}

2. Use the MMX instruction set

INT Height = NHEIGHT >> 1;

For (int y = 0; y

{// About 10% Faster Than Plain C

__ASM {

Mov EDI, [PDEST]

MOV EBX, [NWIDTH]

SHR EBX, 3

MOV ESI, [Y_SRC]

MOV ECX, [U_SRC]

MOV EDX, [V_SRC]

XOR EAX, EAX; x = 0

ALIGN 8

XXLOOP1:

MOVD MM1, [EDX 4 * EAX]

MOVD MM0, [ECX 4 * EAX]

PUNPCKLBW MM0, MM1; [VUVU | VUVU]

MOVQ MM2, [ESI 8 * EAX]

MOVQ MM3, MM2

PUNPCKLBW MM2, MM0; [Vyuy | Vyuy]

Movntq [EDI], MM2

PUNPCKHBW MM3, MM0; [Vyuy | Vyuy]

Movntq [EDI 8], MM3

Add EDI, 16

INC EAX

CMP EAX, EBX

JB XXLOOP1

Add ESI, [Width]

XOR EAX, EAX

XXLOOP2: MOVD MM1, [EDX 4 * EAX]

MOVD MM0, [ECX 4 * EAX]

PUNPCKLBW MM0, MM1; [VUVU | VUVU]

MOVQ MM2, [ESI 8 * EAX]

MOVQ MM3, MM2

PUNPCKLBW MM2, MM0; [Vyuy | Vyuy]

Movntq [EDI], MM2

PUNPCKHBW MM3, MM0; [Vyuy | Vyuy]

Movntq [EDI 8], MM3

Add EDI, 16

INC EAX

CMP EAX, EBX

JB xxloop2

Add ESI, [Width]

MOV [PDEST], EDI

MOV [Y_SRC], ESI

SHL EBX, 2

Add ECX, EBX

Add Edx, EBX

MOV [u_src], ECX

MOV [V_SRC], EDX

}

}

__ASM EMMS;

3. Use SSE instructions

INT Height = NHEIGHT >> 1;

For (int y = 0; y

{// About 20% Faster Than Plain C

__ASM {

Mov EDI, [PDEST]

MOV EBX, [NWIDTH]

SHR EBX, 4

MOV ESI, [Y_SRC]

MOV ECX, [U_SRC]

MOV EDX, [V_SRC]

XOR EAX, EAX; x = 0

ALIGN 16

XLOOP1:

MOVQ XMM1, MMWORD PTR [EDX 8 * EAX]

MOVQ XMM0, MMWORD PTR [ECX 8 * EAX]

MOVDQA XMM2, XMMWORD PTR [ESI]

INC EAX

MOVQ XMM5, MMWORD PTR [EDX 8 * EAX]

PUNPCKLBW XMM0, XMM1; [VUVU | VUVU]

MOVQ XMM4, MMWORD PTR [ECX 8 * EAX]

MOVDQA XMM3, XMM2

MOVDQA XMM7, XMMWORD PTR [ESI 16]

PUNPCKLBW XMM4, XMM5

MOVDQA XMM6, XMM7

PUNPCKLBW XMM2, XMM0; [Vyuy | Vyuy]

Movntdq [EDI], XMM2

Punpckhbw xmm3, xmm0; [Vyuy | Vyuy]

PUNPCKLBW XMM6, XMM4

Movntdq XMMWORD PTR [EDI 16], XMM3

PUNPCKHBW XMM7, XMM4

Movntdq [EDI 32], XMM6

Add ESI, 32

Movntdq [EDI 48], XMM7

Add EDI, 64

INC EAX

CMP EAX, EBX

JB xloop1

XOR EAX, EAX

XLOOP2:

MOVQ XMM1, MMWORD PTR [EDX 8 * EAX]

MOVQ XMM0, MMWORD PTR [ECX 8 * EAX]

MOVDQA XMM2, XMMWORD PTR [ESI]

INC EAX

MOVQ XMM5, MMWORD PTR [EDX 8 * EAX]

PUNPCKLBW XMM0, XMM1; [VUVU | VUVU]

MOVQ XMM4, MMWORD PTR [ECX 8 * EAX]

MOVDQA XMM3, XMM2

MOVDQA XMM7, XMMWORD PTR [ESI 16]

PUNPCKLBW XMM4, XMM5

MOVDQA XMM6, XMM7

PUNPCKLBW XMM2, XMM0; [Vyuy | Vyuy]

Movntdq [EDI], XMM2

Punpckhbw xmm3, xmm0; [Vyuy | Vyuy] PUNPCKLBW XMM6, XMM4

Movntdq XMMWORD PTR [EDI 16], XMM3

PUNPCKHBW XMM7, XMM4

Movntdq [EDI 32], XMM6

Add ESI, 32

Movntdq [EDI 48], XMM7

Add EDI, 64

INC EAX

CMP EAX, EBX

JB xloop2

MOV [PDEST], EDI

MOV [Y_SRC], ESI

SHL EBX, 3

Add ECX, EBX

Add Edx, EBX

MOV [u_src], ECX

MOV [V_SRC], EDX

}

}

__ASM EMMS;

four. How to determine if the CPU supports MMX and SSE instructions

#include

Static Bool_isfeature (DWORD DWREQUESTFEATURE)

{

_P_INFO CPUInfo;

_CPUID (& CPUInfo);

Return (CPUINFO.FEATURE & DWREQUESTFEATURE)! = 0;

}

BOOL ismmx ()

{

Static Bool Bmmx = _isFeature (_CPU_FEATURE_MMX);

Return (BMMX);

}

BOOL ISSSE2 ()

{

/ ** /

Static Bool BSSE2 = _isFeature (_CPU_FEATURE_SSE2);

Return (BSSE2);

/ ** /

}

Bool issse ()

{

/ ** /

Static Bool Bsse = _isFeature (_CPU_FEATURE_SSE);

Return (BSSE);

/ ** /

}

Bool is3dnow ()

{

/ ** /

Static Bool B3DNOW = _iFeature (_CPU_FEATURE_3DNOW);

Return (B3DNOW);

/ ** /

}

转载请注明原文地址:https://www.9cbs.com/read-133510.html

New Post(0)