/* store.c, picture output routines                                         */

/* Copyright (C) 1996, MPEG Software Simulation Group. All Rights Reserved. */

/*
 * Disclaimer of Warranty
 *
 * These software programs are available to the user without any license fee or
 * royalty on an "as is" basis.  The MPEG Software Simulation Group disclaims
 * any and all warranties, whether express, implied, or statuary, including any
 * implied warranties or merchantability or of fitness for a particular
 * purpose.  In no event shall the copyright-holder be liable for any
 * incidental, punitive, or consequential damages of any kind whatsoever
 * arising from the use of these programs.
 *
 * This disclaimer of warranty extends to the user of these programs and user's
 * customers, employees, agents, transferees, successors, and assigns.
 *
 * The MPEG Software Simulation Group does not represent or warrant that the
 * programs furnished hereunder are free of infringement of any third-party
 * patents.
 *
 * Commercial implementations of MPEG-1 and MPEG-2 video, including shareware,
 * are subject to royalty fees to patent holders.  Many of these patents are
 * general enough such that they are unavoidable regardless of implementation
 * design.
 *
 */

#include "config.h"

#include <stdio.h>
#include <stdlib.h>

#include "mpeg-config.h"
#include "global.h"

/* Profile:
 *
 * 26.70      9.54     9.54     1914  4984.33 11776.38  _mpeg2Write_Frame
 * 21.30     17.15     7.61     3828  1987.98  1987.98  conv422to444
 * 15.09     22.54     5.39     3828  1408.05  1408.05  conv420to422
 *
 * A lot of work could be done here to fold these three functions in
 * together. However, maintaining correctness is tricky.
 */

/* private prototypes */
static void conv422to444 _ANSI_ARGS_((struct mpeg2obj *m,
				      unsigned char *src, unsigned char *dst));
static void conv420to422 _ANSI_ARGS_((struct mpeg2obj *m,
				      unsigned char *src, unsigned char *dst));

/*
 * store a picture as either one frame or two fields
 */
void _mpeg2Write_Frame(m, src, frame)
struct mpeg2obj *m;
unsigned char *src[];
int frame;
{
  int incr = m->Coded_Picture_Width;
  int height = m->vertical_size;
  int i, j, k, n;
  int y, u, v, r, g, b;
  int crv, cbu, cgu, cgv;
  static unsigned char *u422, *v422, *u444, *v444;
  unsigned char *p;

  if (m->chroma_format==CHROMA444)
  {
    u444 = src[1];
    v444 = src[2];
  }
  else
  {
    if (!u444)
    {
      if (m->chroma_format==CHROMA420)
      {
        if (!(u422 = (unsigned char *)malloc((m->Coded_Picture_Width>>1)
                                             *m->Coded_Picture_Height)))
          _mpeg2Error(m,"malloc failed");
        if (!(v422 = (unsigned char *)malloc((m->Coded_Picture_Width>>1)
                                             *m->Coded_Picture_Height)))
          _mpeg2Error(m,"malloc failed");
      }

      if (!(u444 = (unsigned char *)malloc(m->Coded_Picture_Width
                                           *m->Coded_Picture_Height)))
        _mpeg2Error(m,"malloc failed");

      if (!(v444 = (unsigned char *)malloc(m->Coded_Picture_Width
                                           *m->Coded_Picture_Height)))
        _mpeg2Error(m,"malloc failed");
    }

    if (m->chroma_format==CHROMA420)
    {
      conv420to422(m,src[1],u422);
      conv420to422(m,src[2],v422);
      conv422to444(m,u422,u444);
      conv422to444(m,v422,v444);
    }
    else
    {
      conv422to444(m,src[1],u444);
      conv422to444(m,src[2],v444);
    }
  }

  /* matrix coefficients */
  crv = _mpeg2Inverse_Table_6_9[m->matrix_coefficients][0];
  cbu = _mpeg2Inverse_Table_6_9[m->matrix_coefficients][1];
  cgu = _mpeg2Inverse_Table_6_9[m->matrix_coefficients][2];
  cgv = _mpeg2Inverse_Table_6_9[m->matrix_coefficients][3];

  p = (unsigned char *) m->output_buffer;

  /* We rely on standard compiler optimizations to optimize this
   * loop fully. The parameters for U, V, Y macros range over
   * [0..horizontal_size-1] and [0..height].
   */
#define U(x,y) (*(u444 + incr*(y) + (x)) - 128)
#define V(x,y) (*(v444 + incr*(y) + (x)) - 128)
#define Y(x,y) (76309 * (*(src[0] + incr*(y) + (x)) - 16))

#define R(u,v,y) (m->Clip[((y) + crv*(v) + 32768)>>16])
#define G(u,v,y) (m->Clip[((y) - cgu*(u) - cgv*(v) + 32768)>>16])
#define B(u,v,y) (m->Clip[((y) + cbu*(u) + 32786)>>16])

  if (m->scale_width_up && m->scale_height_up)
    {
      /* All pixels get bigger. */
      int row_size = m->horizontal_size * m->scale_width_up * 3;

      for (i = 0; i < height; ++i)
	{
	  unsigned char *row_start = p;

	  for (j = 0; j < m->horizontal_size; ++j)
	    {
	      u = U(j,i);
	      v = V(j,i);
	      y = Y(j,i);
	      r = R(u,v,y);
	      g = G(u,v,y);
	      b = B(u,v,y);

	      for (k = 0; k < m->scale_width_up; ++k)
		{
		  *p++ = r; *p++ = g; *p++ = b;
		}
	    }

	  for (k = 1; k < m->scale_height_up; ++k)
	    {
	      memcpy (p, row_start, row_size);
	      p += row_size;
	    }
	}
    }
  else if (m->scale_width_up && m->scale_height_down)
    {
      /* Pixels are stretched widthways but rows are accumulated together. */
      int row_size = m->horizontal_size * m->scale_width_up * 3;

      for (i = 0; i < height; i += m->scale_height_down)
	{
	  unsigned char *row_start = p;

	  memset (p, 0, row_size);

	  for (n = 0; n < m->scale_height_down; ++n)
	    {
	      p = row_start;

	      for (j = 0; j < m->horizontal_size; ++j)
		{
		  u = U(j,i+n);
		  v = V(j,i+n);
		  y = Y(j,i+n);
		  r = R(u,v,y);
		  g = G(u,v,y);
		  b = B(u,v,y);

		  for (k = 0; k < m->scale_width_up; ++k)
		    {
		      *p++ += r / m->scale_height_down;
		      *p++ += g / m->scale_height_down;
		      *p++ += b / m->scale_height_down;
		    }
		}
	    }
	}
    }
  else if (m->scale_width_down && m->scale_height_up)
    {
      /* Pixels are squashed widthways but stretched the other way. */
      int row_size = m->horizontal_size / m->scale_width_down * 3;

      for (i = 0; i < height; ++i)
	{
	  unsigned char *row_start = p;

	  for (j = 0; j < m->horizontal_size; j += m->scale_width_down)
	    {
	      p[0] = 0;
	      p[1] = 0;
	      p[2] = 0;

	      for (n = 0; n < m->scale_width_down; ++n)
		{
		  u = U(j+n,i);
		  v = V(j+n,i);
		  y = Y(j+n,i);
		  r = R(u,v,y);
		  g = G(u,v,y);
		  b = B(u,v,y);

		  p[0] += r / m->scale_width_down;
		  p[1] += g / m->scale_width_down;
		  p[2] += b / m->scale_width_down;
		}

	      p += 3;
	    }

	  for (k = 1; k < m->scale_height_up; ++k)
	    {
	      memcpy (p, row_start, row_size);
	      p += row_size;
	    }
	}
    }
  else /* if (m->scale_width_down && m->scale_height_down) */
    {
      /* Pixels are squashed both ways. */
      int row_size = m->horizontal_size / m->scale_width_down * 3;

      for (i = 0; i < height; i += m->scale_height_down)
	{
	  unsigned char *row_start = p;

	  memset (p, 0, row_size);

	  for (n = 0; n < m->scale_height_down; ++n)
	    {
	      p = row_start;

	      for (j = 0; j < m->horizontal_size; j += m->scale_width_down)
		{
		  p[0] = 0;
		  p[1] = 0;
		  p[2] = 0;

		  for (k = 0; k < m->scale_width_down; ++k)
		    {
		      u = U(j+k,i+n);
		      v = V(j+k,i+n);
		      y = Y(j+k,i+n);
		      r = R(u,v,y);
		      g = G(u,v,y);
		      b = B(u,v,y);

		      p[0] += r / (m->scale_width_down * m->scale_height_down);
		      p[1] += g / (m->scale_width_down * m->scale_height_down);
		      p[2] += b / (m->scale_width_down * m->scale_height_down);
		    }

		  p += 3;
		}
	    }
	}
    }
}

/* horizontal 1:2 interpolation filter */
static void conv422to444(m,src,dst)
  struct mpeg2obj *m;
unsigned char *src,*dst;
{
  int i, i2, w, j, im3, im2, im1, ip1, ip2, ip3;

  w = m->Coded_Picture_Width>>1;

  if (m->base.MPEG2_Flag)
  {
    for (j=0; j<m->Coded_Picture_Height; j++)
    {
      for (i=0; i<w; i++)
      {
        i2 = i<<1;
        im2 = (i<2) ? 0 : i-2;
        im1 = (i<1) ? 0 : i-1;
        ip1 = (i<w-1) ? i+1 : w-1;
        ip2 = (i<w-2) ? i+2 : w-1;
        ip3 = (i<w-3) ? i+3 : w-1;

        /* FIR filter coefficients (*256): 21 0 -52 0 159 256 159 0 -52 0 21 */
        /* even samples (0 0 256 0 0) */
        dst[i2] = src[i];

        /* odd samples (21 -52 159 159 -52 21) */
        dst[i2+1] = m->Clip[(int)(21*(src[im2]+src[ip3])
                        -52*(src[im1]+src[ip2]) 
                       +159*(src[i]+src[ip1])+128)>>8];
      }
      src+= w;
      dst+= m->Coded_Picture_Width;
    }
  }
  else
  {
    for (j=0; j<m->Coded_Picture_Height; j++)
    {
      for (i=0; i<w; i++)
      {

        i2 = i<<1;
        im3 = (i<3) ? 0 : i-3;
        im2 = (i<2) ? 0 : i-2;
        im1 = (i<1) ? 0 : i-1;
        ip1 = (i<w-1) ? i+1 : w-1;
        ip2 = (i<w-2) ? i+2 : w-1;
        ip3 = (i<w-3) ? i+3 : w-1;

        /* FIR filter coefficients (*256): 5 -21 70 228 -37 11 */
        dst[i2] =   m->Clip[(int)(  5*src[im3]
                         -21*src[im2]
                         +70*src[im1]
                        +228*src[i]
                         -37*src[ip1]
                         +11*src[ip2]+128)>>8];

       dst[i2+1] = m->Clip[(int)(  5*src[ip3]
                         -21*src[ip2]
                         +70*src[ip1]
                        +228*src[i]
                         -37*src[im1]
                         +11*src[im2]+128)>>8];
      }
      src+= w;
      dst+= m->Coded_Picture_Width;
    }
  }
}

/* vertical 1:2 interpolation filter */
static void conv420to422(m,src,dst)
  struct mpeg2obj *m;
unsigned char *src,*dst;
{
  int w, h, i, j, j2;
  int jm6, jm5, jm4, jm3, jm2, jm1, jp1, jp2, jp3, jp4, jp5, jp6, jp7;

  w = m->Coded_Picture_Width>>1;
  h = m->Coded_Picture_Height>>1;

  if (m->progressive_frame)
  {
    /* intra frame */
    for (i=0; i<w; i++)
    {
      for (j=0; j<h; j++)
      {
        j2 = j<<1;
        jm3 = (j<3) ? 0 : j-3;
        jm2 = (j<2) ? 0 : j-2;
        jm1 = (j<1) ? 0 : j-1;
        jp1 = (j<h-1) ? j+1 : h-1;
        jp2 = (j<h-2) ? j+2 : h-1;
        jp3 = (j<h-3) ? j+3 : h-1;

        /* FIR filter coefficients (*256): 5 -21 70 228 -37 11 */
        /* New FIR filter coefficients (*256): 3 -16 67 227 -32 7 */
        dst[w*j2] =     m->Clip[(int)(  3*src[w*jm3]
                             -16*src[w*jm2]
                             +67*src[w*jm1]
                            +227*src[w*j]
                             -32*src[w*jp1]
                             +7*src[w*jp2]+128)>>8];

        dst[w*(j2+1)] = m->Clip[(int)(  3*src[w*jp3]
                             -16*src[w*jp2]
                             +67*src[w*jp1]
                            +227*src[w*j]
                             -32*src[w*jm1]
                             +7*src[w*jm2]+128)>>8];
      }
      src++;
      dst++;
    }
  }
  else
  {
    /* intra field */
    for (i=0; i<w; i++)
    {
      for (j=0; j<h; j+=2)
      {
        j2 = j<<1;

        /* top field */
        jm6 = (j<6) ? 0 : j-6;
        jm4 = (j<4) ? 0 : j-4;
        jm2 = (j<2) ? 0 : j-2;
        jp2 = (j<h-2) ? j+2 : h-2;
        jp4 = (j<h-4) ? j+4 : h-2;
        jp6 = (j<h-6) ? j+6 : h-2;

        /* Polyphase FIR filter coefficients (*256): 2 -10 35 242 -18 5 */
        /* New polyphase FIR filter coefficients (*256): 1 -7 30 248 -21 5 */
        dst[w*j2] = m->Clip[(int)(  1*src[w*jm6]
                         -7*src[w*jm4]
                         +30*src[w*jm2]
                        +248*src[w*j]
                         -21*src[w*jp2]
                          +5*src[w*jp4]+128)>>8];

        /* Polyphase FIR filter coefficients (*256): 11 -38 192 113 -30 8 */
        /* New polyphase FIR filter coefficients (*256):7 -35 194 110 -24 4 */
        dst[w*(j2+2)] = m->Clip[(int)( 7*src[w*jm4]
                             -35*src[w*jm2]
                            +194*src[w*j]
                            +110*src[w*jp2]
                             -24*src[w*jp4]
                              +4*src[w*jp6]+128)>>8];

        /* bottom field */
        jm5 = (j<5) ? 1 : j-5;
        jm3 = (j<3) ? 1 : j-3;
        jm1 = (j<1) ? 1 : j-1;
        jp1 = (j<h-1) ? j+1 : h-1;
        jp3 = (j<h-3) ? j+3 : h-1;
        jp5 = (j<h-5) ? j+5 : h-1;
        jp7 = (j<h-7) ? j+7 : h-1;

        /* Polyphase FIR filter coefficients (*256): 11 -38 192 113 -30 8 */
        /* New polyphase FIR filter coefficients (*256):7 -35 194 110 -24 4 */
        dst[w*(j2+1)] = m->Clip[(int)( 7*src[w*jp5]
                             -35*src[w*jp3]
                            +194*src[w*jp1]
                            +110*src[w*jm1]
                             -24*src[w*jm3]
                              +4*src[w*jm5]+128)>>8];

        dst[w*(j2+3)] = m->Clip[(int)(  1*src[w*jp7]
                             -7*src[w*jp5]
                             +30*src[w*jp3]
                            +248*src[w*jp1]
                             -21*src[w*jm1]
                              +5*src[w*jm3]+128)>>8];
      }
      src++;
      dst++;
    }
  }
}
