http://blog.csdn.net/housisong/ HouSisong@GMail.com 2007.10.30 RGB颜色空间到YUV颜色空间的转换公式: YUV颜色空间到RGB颜色空间的转换公式: Y = 0.299*R + 0.587*G + 0.114*B; R = Y + 1.14*V;
#define asm __asm
typedef unsigned char TUInt8; // [0..255] struct TARGB32 //32 bit color { TUInt8 b,g,r,a; //a is alpha }; struct TPicRegion //一块颜色数据区的描述,便于参数传递 { TARGB32* pdata; //颜色数据首地址 long byte_width; //一行数据的物理宽度(字节宽度); //abs(byte_width)有可能大于等于width*sizeof(TARGB32); long width; //像素宽度 long height; //像素高度 }; //那么访问一个点的函数可以写为: __forceinline TARGB32& Pixels(const TPicRegion& pic,const long x,const long y) { return ( (TARGB32*)((TUInt8*)pic.pdata+pic.byte_width*y) )[x]; }
C.YUYV(也可以叫做YUY2)视频格式到RGB32的转化(本文先集中优化YUYV视频格式到RGB32的转化,然后再扩展到其他视频格式) YUYV视频格式的内存数据布局图示:
//颜色饱和函数
__forceinline long border_color(long color) { if (color>255) return 255; else if (color<0) return 0; else return color; } __forceinline TARGB32 YUVToRGB32_float(const TUInt8 Y,const TUInt8 U,const TUInt8 V) { TARGB32 result; result.b= border_color( 1.164 * (Y - 16) + 2.018 * (U - 128) ); result.g= border_color( 1.164 * (Y - 16) - 0.380 * (U - 128) - 0.813 * (V - 128) ); result.r= border_color( 1.164 * (Y - 16) + 1.159 * (V - 128) ); result.a= 255; return result; } void DECODE_YUYV_Float(const TUInt8* pYUYV,const TPicRegion& DstPic) { assert((DstPic.width & 1)==0); TARGB32* pDstLine=DstPic.pdata; for (long y=0;y<DstPic.height;++y) { for (long x=0;x<DstPic.width;x+=2) { pDstLine[x+0]=YUVToRGB32_float(pYUYV[0],pYUYV[1],pYUYV[3]); pDstLine[x+1]=YUVToRGB32_float(pYUYV[2],pYUYV[1],pYUYV[3]); pYUYV+=4; } ((TUInt8*&)pDstLine)+=DstPic.byte_width; } } 速度测试: const int csY_coeff_16 = 1.164*(1<<16);
const int csU_blue_16 = 2.018*(1<<16); const int csU_green_16 = (-0.380)*(1<<16); const int csV_green_16 = (-0.813)*(1<<16); const int csV_red_16 = 1.159*(1<<16); __forceinline TARGB32 YUVToRGB32_Int(const TUInt8 Y,const TUInt8 U,const TUInt8 V) { TARGB32 result; int Ye=csY_coeff_16 * (Y - 16); int Ue=U-128; int Ve=V-128; result.b= border_color( ( Ye + csU_blue_16 * Ue )>>16 ); result.g= border_color( ( Ye + csU_green_16 * Ue + csV_green_16 * Ve )>>16 ); result.r= border_color( ( Ye + csV_red_16 * Ve )>>16 ); result.a= 255; return result; } void DECODE_YUYV_Int(const TUInt8* pYUYV,const TPicRegion& DstPic) { assert((DstPic.width & 1)==0); TARGB32* pDstLine=DstPic.pdata; for (long y=0;y<DstPic.height;++y) { for (long x=0;x<DstPic.width;x+=2) { pDstLine[x+0]=YUVToRGB32_Int(pYUYV[0],pYUYV[1],pYUYV[3]); pDstLine[x+1]=YUVToRGB32_Int(pYUYV[2],pYUYV[1],pYUYV[3]); pYUYV+=4; } ((TUInt8*&)pDstLine)+=DstPic.byte_width; } }
//颜色查表
static TUInt8 _color_table[256*3]; static const TUInt8* color_table=&_color_table[256]; class _CAuto_inti_color_table { public: _CAuto_inti_color_table() { for (int i=0;i<256*3;++i) _color_table[i]=border_color(i-256); } }; static _CAuto_inti_color_table _Auto_inti_color_table; __forceinline TARGB32 YUVToRGB32_RGBTable(const TUInt8 Y,const TUInt8 U,const TUInt8 V) { TARGB32 result; int Ye=csY_coeff_16 * (Y - 16); int Ue=U-128; int Ve=V-128; result.b= color_table[ ( Ye + csU_blue_16 * Ue )>>16 ]; result.g= color_table[ ( Ye + csU_green_16 * Ue + csV_green_16 * Ve )>>16 ]; result.r= color_table[ ( Ye + csV_red_16 * Ve )>>16 ]; result.a= 255; return result; } void DECODE_YUYV_RGBTable(const TUInt8* pYUYV,const TPicRegion& DstPic) { assert((DstPic.width & 1)==0); TARGB32* pDstLine=DstPic.pdata; for (long y=0;y<DstPic.height;++y) { for (long x=0;x<DstPic.width;x+=2) { pDstLine[x+0]=YUVToRGB32_RGBTable(pYUYV[0],pYUYV[1],pYUYV[3]); pDstLine[x+1]=YUVToRGB32_RGBTable(pYUYV[2],pYUYV[1],pYUYV[3]); pYUYV+=4; } ((TUInt8*&)pDstLine)+=DstPic.byte_width; } }
static int Ym_table[256];
static int Um_blue_table[256]; static int Um_green_table[256]; static int Vm_green_table[256]; static int Vm_red_table[256]; class _CAuto_inti_yuv_table { public: _CAuto_inti_yuv_table() { for (int i=0;i<256;++i) { Ym_table[i]=csY_coeff_16 * (i - 16); Um_blue_table[i]=csU_blue_16 * (i - 128); Um_green_table[i]=csU_green_16 * (i - 128); Vm_green_table[i]=csV_green_16 * (i - 128); Vm_red_table[i]=csV_red_16 * (i - 128); } } }; static _CAuto_inti_yuv_table _Auto_inti_yuv_table; __forceinline TARGB32 YUVToRGB32_Table(const TUInt8 Y,const TUInt8 U,const TUInt8 V) { TARGB32 result; int Ye=Ym_table[Y]; result.b= color_table[ ( Ye + Um_blue_table[U] )>>16 ]; result.g= color_table[ ( Ye + Um_green_table[U] + Vm_green_table[V] )>>16 ]; result.r= color_table[ ( Ye + Vm_red_table[V] )>>16 ]; result.a= 255; return result; } void DECODE_YUYV_Table(const TUInt8* pYUYV,const TPicRegion& DstPic) { assert((DstPic.width & 1)==0); TARGB32* pDstLine=DstPic.pdata; for (long y=0;y<DstPic.height;++y) { for (long x=0;x<DstPic.width;x+=2) { pDstLine[x+0]=YUVToRGB32_Table(pYUYV[0],pYUYV[1],pYUYV[3]); pDstLine[x+1]=YUVToRGB32_Table(pYUYV[2],pYUYV[1],pYUYV[3]); pYUYV+=4; } ((TUInt8*&)pDstLine)+=DstPic.byte_width; } }
(提示:在没有“带符号右移”的CPU体系下或者能够忍受一点点小的误差,可以在生成YUV的查找表的时候不扩大2^16倍,从而在计算出结果的时候也就不需要右移16位的修正了,这样改进后函数速度还会提高一些) __forceinline void YUVToRGB32_Two(TARGB32* pDst,
const TUInt8 Y0,const TUInt8 Y1,const TUInt8 U,const TUInt8 V)
{ int Ye0=csY_coeff_16 * (Y0 - 16); int Ye1=csY_coeff_16 * (Y1 - 16); int Ue=(U-128); int Ue_blue=csU_blue_16 *Ue; int Ue_green=csU_green_16 *Ue; int Ve=(V-128); int Ve_green=csV_green_16 *Ve; int Ve_red=csV_red_16 *Ve; pDst[0].argb=color_table[ ( Ye0 + Ue_blue )>>16 ] | ( color_table[ ( Ye0 + Ue_green + Ve_green )>>16]<<8 ) | ( color_table[ ( Ye0 + Ve_red )>>16]<<16 ) | ( 255<<24); pDst[1].argb=color_table[ ( Ye1 + Ue_blue )>>16 ] | ( color_table[ ( Ye1 + Ue_green + Ve_green )>>16]<<8 ) | ( color_table[ ( Ye1 + Ve_red )>>16]<<16 ) | ( 255<<24); } void DECODE_YUYV_Common(const TUInt8* pYUYV,const TPicRegion& DstPic) { assert((DstPic.width & 1)==0); TARGB32* pDstLine=DstPic.pdata; for (long y=0;y<DstPic.height;++y) { for (long x=0;x<DstPic.width;x+=2) { YUVToRGB32_Two(&pDstLine[x],pYUYV[0],pYUYV[2],pYUYV[1],pYUYV[3]); pYUYV+=4; } ((TUInt8*&)pDstLine)+=DstPic.byte_width; } }
///////////////////////////////////////////// (文章写得比较慢,基本优化部分本章写完了;继续成倍的提高速度还是有希望的,后面两篇还没有动手,文章内容也可能变动; 欢迎提出不足和改进意见) (housisong) |