/* * yuv420-rgb555.c * * Copyright (C) Erik Walthinsen - April 2000 * * This is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2, or (at your * option) any later version. * * This software is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public License * along with GNU Make; see the file COPYING. If not, write to * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. * */ /* This is the first pass of a routine that does the conversion of a 16x16 * region of luma, and two 8x8 regions of chroma, into a 16x16 region of * RGB 5:5:5 pixels. It still needs some work. */ #include static guint64 luma_impact = 0x4a7f4a7f4a7f4a7fL; // 19071 static guint64 luma_impact_16 = 0x0254025402540254L; // 596 static guint64 green_impact = 0x0c831a050c831a05L; // 3203, 6661 static guint64 blue_impact = 0x3fdf3fdf3fdf3fdfL; // 16351 static guint64 red_impact = 0x3312331233123312L; // 13074 static guint64 upperbyte = 0xff00ff00ff00ff00L; static guint64 lowerbyte = 0x00ff00ff00ff00ffL; static guint64 val128w = 0x0080008000800080L; static guint64 shift_left_8 = 0x0100010001000100L; static guint64 mask_byte_5_bits = 0x00f800f800f800f8L; static guint64 red_blue_pack = 0x0400000104000001L; static guint64 blue_red_pack = 0x0001040000010400L; static inline void calculate_pixels(guint8 *y_plane,guint32 y_stride, guint8 *rgb_plane,guint32 rgb_stride); /* YUV 4:2:0 conversion to RGB 05:5:5, 16x16 block * * This routine will take three planes (y_plane, u_plane, v_plane) containing * YUV pixel data, and convert into the output plane. * A total of 256 pixels are converted, in a 16x16 block. 8x8 chroma samples * are used. void yuv420_to_rgb555_16x16(guint8 *y_plane,guint32 y_stride,guint8 *u_plane,guint32 u_stride,guint8 *v_plane,guint32 v_stride,guint8 *rgb_plane,guint32 rgb_stride) { int y; for (y=0;y<8;y++) { /* start with leftmost chroma samples */ /***** Calculate 4 RGB chroma samples' impact for 16 pixels total *****/ movq_m2r(*v_plane,mm1); // 2 pandn_r2r(mm7,mm7); // 3 punpckhbw_r2r(mm7,mm1); // 5 psubw_m2r(val128w,mm1); // 7 movq_r2r(mm1,mm3); // 8 movq_m2r(*u_plane,mm0); // 1 punpckhbw_r2r(mm7,mm0); // 4 movq_r2r(mm1,mm2); // 12 psubw_m2r(val128w,mm0); // 6 punpckhwd_r2r(mm0,mm3); // 9 punpcklwd_r2r(mm0,mm2); // 13 pmaddwd_m2r(green_impact,mm3); // 10 psrad_i2r(8,mm3); // 11 pmaddwd_m2r(green_impact,mm2); // 14 psrad_i2r(8,mm2); // 15 packssdw_r2r(mm3,mm2); // 16 pmullw_m2r(shift_left_8,mm0); pmullw_m2r(shift_left_8,mm1); pmulhw_m2r(red_impact,mm0); pmulhw_m2r(blue_impact,mm1); calculate_pixels(y_plane,y_stride,rgb_plane,rgb_stride); /* update pointers */ y_plane += 8; rgb_plane += 8; /* now do the rightmost chroma samples */ /***** Calculate 4 RGB chroma samples' impact for 16 pixels total *****/ movq_m2r(*v_plane,mm1); // 2 pandn_r2r(mm7,mm7); // 3 punpcklbw_r2r(mm7,mm1); // 5 psubw_m2r(val128w,mm1); // 7 movq_r2r(mm1,mm3); // 8 movq_m2r(*u_plane,mm0); // 1 punpcklbw_r2r(mm7,mm0); // 4 movq_r2r(mm1,mm2); // 12 psubw_m2r(val128w,mm0); // 6 punpckhwd_r2r(mm0,mm3); // 9 punpcklwd_r2r(mm0,mm2); // 13 pmaddwd_m2r(green_impact,mm3); // 10 psrad_i2r(8,mm3); // 11 pmaddwd_m2r(green_impact,mm2); // 14 psrad_i2r(8,mm2); // 15 packssdw_r2r(mm3,mm2); // 16 pmullw_m2r(shift_left_8,mm0); pmullw_m2r(shift_left_8,mm1); pmulhw_m2r(red_impact,mm0); pmulhw_m2r(blue_impact,mm1); calculate_pixels(y_plane,y_stride,rgb_plane,rgb_stride); /* update pointers */ u_plane += u_stride - 8; v_plane += v_stride - 8; y_plane += y_stride - 16; rgb_plane += rgb_stride - 32; } } static inline void calculate_pixels(guint8 *y_plane,guint32 y_stride, guint8 *rgb_plane,guint32 rgb_stride) { /***** Do the first group of four pixels *****/ // load 8 luma pixels movq_m2r(*y_plane,mm3); // calculate luma impact for even pixels movq_r2r(mm3,mm4); // 1 pand_m2r(upperbyte,mm4); // 2 psrld_i2r(1,mm4); // 3 pmulhw_m2r(luma_impact,mm4); // 4 psubw_m2r(luma_impact_16,mm4); // 5 movq_r2r(mm0,mm5); // 6 movq_r2r(mm1,mm6); // 7 paddw_r2r(mm4,mm5); // 8 paddw_r2r(mm4,mm6); // 9 psraw_i2r(5,mm5); // 10 psraw_i2r(5,mm6); // 11 packuswb_r2r(mm7,mm5); // 12 punpcklbw_r2r(mm7,mm5); // 13 packuswb_r2r(mm7,mm6); // 14 punpcklbw_r2r(mm7,mm6); // 15 // interleave red&blue components and merge into pixel ranges movq_r2r(mm6,mm7); punpcklwd_r2r(mm5,mm6); punpckhwd_r2r(mm7,mm5); pand_m2r(mask_byte_5_bits,mm5); pand_m2r(mask_byte_5_bits,mm6); pmaddwd_m2r(red_blue_pack,mm5); pmaddwd_m2r(blue_red_pack,mm6); psrld_i2r(3,mm5); psrld_i2r(3,mm6); packssdw_r2r(mm5,mm6); // REGISTER usage at this point: // mm0 - blue impact KEEP - need for all 4 luma sets // mm1 - red impact KEEP - ditto // mm2 - green impact KEEP - ditto // mm3 - 8 luma samples KEEP - need for next luma set // mm4 - even luma impact // mm5 - 0,2 semipacked pixels KEEP - to join green into // mm6 - 4,6 semipacked pixels KEEP - to join green into // mm7 - red components // calculate green impact pandn_r2r(mm7,mm7); psubw_r2r(mm2,mm4); psraw_i2r(5,mm4); packuswb_r2r(mm7,mm4); punpcklbw_r2r(mm7,mm4); pand_m2r(mask_byte_5_bits,mm4); psllw_i2r(2,mm4); por_r2r(mm4,mm6); /***** Do the second group of four pixels *****/ // NOTE: at this point mm6 is untouchable, since it has even pixels // calculate luma impact for odd pixels pand_m2r(lowerbyte,mm3); // 1 pmullw_m2r(val128w,mm3); // 2 pmulhw_m2r(luma_impact,mm3); // 3 psubw_m2r(luma_impact_16,mm3); // 4 // join red&blue chroma to luma for odd 4 pixels movq_r2r(mm0,mm4); // 5 movq_r2r(mm1,mm5); // 6 paddw_r2r(mm3,mm4); // 7 paddw_r2r(mm3,mm5); // 8 psraw_i2r(5,mm4); // 9 psraw_i2r(5,mm5); // 10 packuswb_r2r(mm7,mm4); // 11 punpcklbw_r2r(mm7,mm4); // 12 packuswb_r2r(mm7,mm5); // 13 punpcklbw_r2r(mm7,mm5); // 14 // interleave red&blue components and merge into pixel ranges movq_r2r(mm5,mm7); // 15 punpcklwd_r2r(mm4,mm5); // 16 punpckhwd_r2r(mm7,mm4); // 17 pand_m2r(mask_byte_5_bits,mm4); // 18 pand_m2r(mask_byte_5_bits,mm5); // 19 pmaddwd_m2r(red_blue_pack,mm4); // 20 pmaddwd_m2r(blue_red_pack,mm5); // 21 psrld_i2r(3,mm4); // 22 psrld_i2r(3,mm5); // 23 packssdw_r2r(mm4,mm5); // 24 // merge in green components pandn_r2r(mm7,mm7); // 25 psubw_r2r(mm2,mm3); // 26 psraw_i2r(5,mm3); // 27 packuswb_r2r(mm7,mm3); // 28 punpcklbw_r2r(mm7,mm3); // 29 pand_m2r(mask_byte_5_bits,mm3); // 30 psllw_i2r(2,mm3); // 31 por_r2r(mm3,mm5); // 32 // interleave 0,2,4,6 and 1,3,5,7 pixels movq_r2r(mm5,mm7); punpcklwd_r2r(mm6,mm7); punpckhwd_r2r(mm6,mm5); // write out pixels movq_r2m(mm7,*rgb_plane); movq_r2m(mm5,*(rgb_plane+8)); /***** Do the third group of four pixels *****/ // load 8 luma pixels movq_m2r(*(y_plane+y_stride),mm3); // calculate luma impact for even pixels movq_r2r(mm3,mm4); pand_m2r(upperbyte,mm4); psrld_i2r(1,mm4); pmulhw_m2r(luma_impact,mm4); psubw_m2r(luma_impact_16,mm4); // join red&blue chroma to luma for even 4 pixels movq_r2r(mm0,mm5); movq_r2r(mm1,mm6); paddw_r2r(mm4,mm5); paddw_r2r(mm4,mm6); psraw_i2r(5,mm5); psraw_i2r(5,mm6); packuswb_r2r(mm7,mm5); punpcklbw_r2r(mm7,mm5); packuswb_r2r(mm7,mm6); punpcklbw_r2r(mm7,mm6); // interleave red&blue components and merge into pixel ranges movq_r2r(mm6,mm7); punpcklwd_r2r(mm5,mm6); punpckhwd_r2r(mm7,mm5); pand_m2r(mask_byte_5_bits,mm5); pand_m2r(mask_byte_5_bits,mm6); pmaddwd_m2r(red_blue_pack,mm5); pmaddwd_m2r(blue_red_pack,mm6); psrld_i2r(3,mm5); psrld_i2r(3,mm6); packssdw_r2r(mm5,mm6); // REGISTER usage at this point: // mm0 - blue impact KEEP - need for all 4 luma sets // mm1 - red impact KEEP - ditto // mm2 - green impact KEEP - ditto // mm3 - 8 luma samples KEEP - need for next luma set // mm4 - even luma impact // mm5 - 0,2 semipacked pixels KEEP - to join green into // mm6 - 4,6 semipacked pixels KEEP - to join green into // mm7 - red components // calculate green impact pandn_r2r(mm7,mm7); psubw_r2r(mm2,mm4); psraw_i2r(5,mm4); packuswb_r2r(mm7,mm4); punpcklbw_r2r(mm7,mm4); pand_m2r(mask_byte_5_bits,mm4); psllw_i2r(2,mm4); por_r2r(mm4,mm6); /***** Do the fourth group of four pixels *****/ // NOTE: at this point mm6 is untouchable, since it has even pixels // calculate luma impact for odd pixels pand_m2r(lowerbyte,mm3); // 1 pmullw_m2r(val128w,mm3); // 2 pmulhw_m2r(luma_impact,mm3); // 3 psubw_m2r(luma_impact_16,mm3); // 4 // join red&blue chroma to luma for odd 4 pixels movq_r2r(mm0,mm4); // 5 movq_r2r(mm1,mm5); // 6 paddw_r2r(mm3,mm4); // 7 paddw_r2r(mm3,mm5); // 8 psraw_i2r(5,mm4); // 9 psraw_i2r(5,mm5); // 10 packuswb_r2r(mm7,mm4); // 11 punpcklbw_r2r(mm7,mm4); // 12 packuswb_r2r(mm7,mm5); // 13 punpcklbw_r2r(mm7,mm5); // 14 // interleave red&blue components and merge into pixel ranges movq_r2r(mm5,mm7); // 15 punpcklwd_r2r(mm4,mm5); // 16 punpckhwd_r2r(mm7,mm4); // 17 pand_m2r(mask_byte_5_bits,mm4); // 18 pand_m2r(mask_byte_5_bits,mm5); // 19 pmaddwd_m2r(red_blue_pack,mm4); // 20 pmaddwd_m2r(blue_red_pack,mm5); // 21 psrld_i2r(3,mm4); // 22 psrld_i2r(3,mm5); // 23 packssdw_r2r(mm4,mm5); // 24 // merge in green components pandn_r2r(mm7,mm7); // 25 psubw_r2r(mm2,mm3); // 26 psraw_i2r(5,mm3); // 27 packuswb_r2r(mm7,mm3); // 28 punpcklbw_r2r(mm7,mm3); // 29 pand_m2r(mask_byte_5_bits,mm3); // 30 psllw_i2r(2,mm3); // 31 por_r2r(mm3,mm5); // 32 // interleave 0,2,4,6 and 1,3,5,7 pixels movq_r2r(mm5,mm7); punpcklwd_r2r(mm6,mm7); punpckhwd_r2r(mm6,mm5); // write out pixels movq_r2m(mm7,*(rgb_plane+rgb_stride)); movq_r2m(mm5,*(rgb_plane+rgb_stride+7)); }