/************************************************************************/
/*                                                     			*/
/* This software module was originally developed by              	*/
/*                                                               	*/
/* Stefan Rauthenberg (HHI / ACTS-MoMuSys).     	              	*/
/*                                                               	*/
/* and edited by                                                	*/
/*                                                               	*/
/* Jan De Lameillieure (HHI / ACTS-MoMuSys).     	              	*/
/* Klaas Schueuer (HHI / ACTS-MoMuSys). 	    	              	*/
/*                                                               	*/
/* in the course of development of the MPEG-4 Video (ISO/IEC 14496-2).	*/
/* This software module is an implementation of a part of one or 	*/
/* more MPEG-4 Video (ISO/IEC 14496-2) tools as specified by the        */
/* MPEG-4 Video (ISO/IEC 14496-2). ISO/IEC gives users of the MPEG-4    */
/* Video free license to this software module or modifications thereof 	*/
/* for use in hardware or software products claiming conformance to the */
/* MPEG-4 Video (ISO/IEC 14496-2). Those intending to use this software */
/* module in hardware or software products are advised that its use may */
/* infringe existing patents. The original developer of this software  	*/
/* module and his/her company, the subsequent editors and their     	*/
/* companies, and ISO/IEC have no liability for use of this software    */
/* module or modifications thereof in an implementation. Copyright is   */
/* not released for non MPEG-4 Video (ISO/IEC 14496-2) conforming 	*/
/* products. ACTS-MoMuSys partners retain full right to use  the code   */
/* for their own purposes, assign or donate the code to a third party   */
/* and to inhibit third parties from using the code for non MPEG-4    	*/
/* Video (ISO/IEC 14496-2) conforming products. This copyright notice 	*/
/* must be included in all copies or derivative works.                  */
/* Copyright (c)1997                                            	*/
/*                                                               	*/
/************************************************************************/
/***********************************************************HeaderBegin*******
 *                                                                         
 * File: sadct_blk_kaup.c 
 * 
 * Author: Stefan Rauthenberg (HHI)
 *
 *	Heinrich-Hertz-Institut fuer Nachrichtentechnik GmbH
 *	Image Processing Department
 *	Einsteinufer 37
 *	D-10587 Berlin 
 *	Federal Republic of Germany
 *	Phone: 	+49-30-31002-615
 *	Fax:	+49-30-3927200
 *	email:	rauthenberg@HHI.DE
 *
 * Created:  21/02/95
 *                                                                         
 * Description: 
 *	SADCT (shape adaptive DCT) transformation  of a single block.
 *
 *	User callable functions:
 *
 *		sadct_init_kaup()
 *		sadct_free_kaup()
 *		
 *		sadct_blk_kaup()
 *		saidct_blk_kaup()
 *
 *		fsadct_blk_kaup()
 *		fsaidct_blk_kaup()
 *
 * Notes:  
 *
 * Modified: 
 *      20-MAR-97 Jan De Lameillieure (HHI) : adaptation to MoMuSys common
 *			rules
 *      24-MAR-97 Klaas Schueuer (HHI) : Kaup'sche SA-DCT
 *      16-JUN-97 Jan De Lameillieure : renaming some include files to sadct_*
 *      16-JUN-97 Jan De Lameillieure (HHI) : free_dmatrix() is replaced by
 *                      free_dmatrix_sadct() and dmatrix() by dmatrix_sadct()
 *                      to prevent confusion with similar functions defined
 *                      in the sprite software
 *	14-APR-98 Klaas Schueuer (HHI) : a fast implemantation of SADCT and
 *			SAIDCT is added 
 *
 ***********************************************************HeaderEnd*********/

/************************    INCLUDE FILES    ********************************/

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/time.h>

#include "sadct_bsnrmem.h"
#include "sadct.h"
#include "sadct_kaup.h"
/* inserted for fsadct */
#include "sadct_vec.h"


/* Indexing:	dct_matrix[n] is a pointer to a transformation matrix of
 *              blocksize `n'.  The sensible range of the first index is 
 *		1..N AND not 0 .. N-1 as one might presume.
 */

#ifndef _FAST_SADCT_ 
static Double ***dct_matrix_kaup;
static Double ***idct_matrix_kaup;
#else
/* inserted for fsadct */
static Double *c_buf;
static Double **tmp_out;
#endif

#ifdef _GET_CPU_TIME_
extern long cpu_time_kaup_sadct;
extern long cpu_time_kaup_saidct;
extern Int kaup_sadct_calls;
extern Int kaup_saidct_calls;
extern long cpu_time_start_sec;
extern long cpu_time_start_usec;
extern long cpu_time_end_sec;
extern long cpu_time_end_usec;
  
extern struct timeval *cpu_time;
extern int *dummy_p;
#endif


/* N = number of (1D) transformation matrices of different block size.
   For a given N the matrix consists of N x N elements. */
static Int N = 0;

/* buffer to hold temporary results */
static Double ***reorder_h;
static Double ***reorder_v;
static Double **mat_tmp1;
static Double *row_buf;
static Int *l_y, *l_x;

/*
 *	Allocates memory and sets up internal tables to transform
 *	blocks of at most `bksize_max' by `bksize_max' elements.
 *	The allocated memory may be freed by calling `sadct_free()'.
 *	The client code is expected to call `sadct_init()' before 
 *	trying to transform a block for the very first time.
 *	Failure to do so is likely to cause a coredump.
 *
 *	Parameter:	
 *		`bksize_max'	maximum blocksize
 *
 */
void sadct_init_kaup(Int bksize_max, Double scale_dct, Double scale_idct)
{

  if ( bksize_max == N )
    return;	/* already initialized, check prevents a client to 
		   free something which is going to be initialized 
		   exactly the same way. */

  sadct_malloc_kaup(bksize_max);


#ifndef _FAST_SADCT_  
  sadct_trfmat_init_kaup(dct_matrix_kaup, bksize_max, scale_dct);
  saidct_trfmat_init_kaup(idct_matrix_kaup, bksize_max, scale_idct);
#endif  

}

/*
 *	free's any memory allocated by `sadct_init'.  
 */
void sadct_free_kaup(void)
{

  if ( ! N )
    return;	/* nothing to free */

#ifndef _FAST_SADCT_  
  sadct_free_internal(dct_matrix_kaup, N);
  dct_matrix_kaup = 0;
  sadct_free_internal(idct_matrix_kaup, N);
  idct_matrix_kaup = 0;
#endif  

  sadct_free_reorder(reorder_v, N);
  reorder_v = 0;

  sadct_free_reorder(reorder_h, N);
  reorder_h = 0;

  free_dmatrix_sadct(mat_tmp1, 0, N-1, 0, N-1);
  mat_tmp1 = 0;

  free_dvector(row_buf, 0, N-1);
  row_buf = 0;

#ifdef _FAST_SADCT_
  /*inserted for fsadct */
  free_dvector(c_buf, 0, N-1);
  c_buf = 0;
  free_dmatrix_sadct(tmp_out, 0, N-1, 0, N-1);
  tmp_out = 0;  
#endif  
  
  free_ivector( (int *)l_x, 0, N-1);
  l_x = 0;
  free_ivector( (int *)l_y, 0, N-1);
  l_y = 0;

  N = 0;

  return;
}

/*
 *	transforms the set of pels of a `bkx' * `bky' block `in' which 
 *	are marked by 1 in the `mask'. 
 *	The function returns in lx[0] - lx[bky-1] the
 *	number of dct coefficient per line and the coefficients are saved
 *	in the upper left corner of `out'; e.g.
 *	lx[0] = 3, lx[1] = 2, lx[2] = 1, lx[3] = 0 ...
 *	
 *		       
 *	X  X  X  - - - - - - - - -	    `out'
 *	X  X  -  - - - - - - - - -
 *      X  -  -  - - - - - - - - -
 *      -  -  -  - ...
 */

#ifndef _FAST_SADCT_
void
sadct_blk_kaup(Double **out, Int *lx, Double **in, UChar **mask, Int bky, Int bkx)
{
  Int i, j, jmax, k;
  Double **trf_mat, *row;
  Double c;

#ifdef _GET_CPU_TIME_
  kaup_sadct_calls++;
  if ( (gettimeofday (cpu_time, dummy_p)) == (-1) ) {
    printf ("Could not get cpu_time \n");
    exit (-1);
  }	
    
  cpu_time_start_sec = cpu_time->tv_sec;
  cpu_time_start_usec = cpu_time->tv_usec;
#endif
    
  sadct_shiftup_transpose(mat_tmp1, l_y, in, mask, bky, bkx);

#if SADCT_DEBUG > 1
  fprintf_smat(stdout, "sadct_blk:in", in, 0, 0, bky-1, bkx-1, "%4d");
  fprintf_dmat(stdout, "mat transposed", mat_tmp1, 0, 0, bkx-1, bky-1, 
	       "%6.3f ");  
#endif
  memset(lx, 0, sizeof(Int)*bky);

  for (i=0; i<bkx && l_y[i]; i++) {
    jmax = l_y[i];
    trf_mat = dct_matrix_kaup[jmax];
    row = mat_tmp1[i];
    for (k=0; k<jmax; k++) {
      for (c=0,j=0; j<jmax; j++) 
	c += trf_mat[k][j] * row[j];
      out[k][lx[k]] = c;
      lx[k]++;
    }
  }
#if SADCT_DEBUG > 1
  fprintf_dmat(stdout, "v transformed matrix", out, 0, 0, bky-1, bkx-1, 
	       "%6.3f ");  
#endif

  /* and finally the horizontal transformation */
  for (i=0; i<bky && lx[i]; i++) {
    jmax = lx[i];
    trf_mat = dct_matrix_kaup[jmax];
    memcpy(row_buf, out[i], jmax*sizeof(Double));
    row = out[i];
    for (k=0; k<jmax; k++) {
      for (c=0,j=0; j<jmax; j++) 
	c += trf_mat[k][j] * row_buf[j];
      *row++ = c;
    }    
  }

#ifdef _GET_CPU_TIME_
  if ( (gettimeofday (cpu_time, dummy_p)) == (-1) ) {
    printf ("Could not get cpu_time \n");
    exit (-1);
  }

  cpu_time_end_sec = cpu_time->tv_sec;
  cpu_time_end_usec = cpu_time->tv_usec;
  
  cpu_time_kaup_sadct += 1000000 * (cpu_time_end_sec - cpu_time_start_sec) + (cpu_time_end_usec - cpu_time_start_usec);
#endif
    
}

#else /* matches: ifndef _FAST_SADCT_ */

void
sadct_blk_kaup(Double **out, Int *lx, Double **in, UChar **mask, Int bky, Int bkx)
{
  Int i, jmax, k;
  Double *row, *row_coeff;

#ifdef _GET_CPU_TIME_
    kaup_sadct_calls++;
    if ( (gettimeofday (cpu_time, dummy_p)) == (-1) ) {
      printf ("Could not get cpu_time \n");
      exit (-1);
    }	
    
    cpu_time_start_sec = cpu_time->tv_sec;
    cpu_time_start_usec = cpu_time->tv_usec;
#endif      

  sadct_shiftup_transpose(mat_tmp1, l_y, in, mask, bky, bkx);

#if SADCT_DEBUG > 1
  fprintf_smat(stdout, "sadct_blk:in", in, 0, 0, bky-1, bkx-1, "%4d");
  fprintf_dmat(stdout, "mat transposed", mat_tmp1, 0, 0, bkx-1, bky-1, 
	       "%6.3f ");  
#endif
  memset(lx, 0, sizeof(Int)*bky);

  for (i=0; i<bkx && l_y[i]; i++) {
    jmax = l_y[i];
    row = mat_tmp1[i];
    switch (jmax) {
    case 1:
      c_buf[0] = row[0];
      break;      
    case 2:
      dct_vec2 (row, c_buf);
      break;
    case 3:
      dct_vec3 (row, c_buf);
      break;
    case 4:
      dct_vec4 (row, c_buf);
      break;      
    case 5:
      dct_vec5 (row, c_buf);
      break;
    case 6:
      dct_vec6 (row, c_buf);
      break;
    case 7:
      dct_vec7 (row, c_buf);
      break;
    case 8:
      dct_vec8 (row, c_buf);
      break;
    }      
    for (k=0; k<jmax; k++) {
      tmp_out[k][lx[k]] = c_buf[k];
      lx[k]++;
    }
  }
#if SADCT_DEBUG > 1
  fprintf_dmat(stdout, "v transformed matrix", out, 0, 0, bky-1, bkx-1, 
	       "%6.3f ");  
#endif

  /* and finally the horizontal transformation */
  for (i=0; i<bky && lx[i]; i++) {
    jmax = lx[i];
    /* memcpy(row_buf, out[i], jmax*sizeof(Double)); */
    row = out[i];
    row_coeff = tmp_out[i];
    switch (jmax) {
    case 1:
      *row = row_coeff[0];
      break;      
    case 2:
      dct_vec2 (row_coeff, row);
      break;
    case 3:
      dct_vec3 (row_coeff, row);
      break;
    case 4:
      dct_vec4 (row_coeff, row);
      break;      
    case 5:
      dct_vec5 (row_coeff, row);
      break;
    case 6:
      dct_vec6 (row_coeff, row);
      break;
    case 7:
      dct_vec7 (row_coeff, row);
      break;
    case 8:
      dct_vec8 (row_coeff, row);
      break;
    }
  }

#ifdef _GET_CPU_TIME_
    if ( (gettimeofday (cpu_time, dummy_p)) == (-1) ) {
      printf ("Could not get cpu_time \n");
      exit (-1);
    }

    cpu_time_end_sec = cpu_time->tv_sec;
    cpu_time_end_usec = cpu_time->tv_usec;

    cpu_time_kaup_sadct += 1000000 * (cpu_time_end_sec - cpu_time_start_sec) + (cpu_time_end_usec - cpu_time_start_usec);
#endif
  

}
#endif /* matches: ifndef _FAST_SADCT_ */

/*
 *	inverse sadct transformation of block `in'.  The spatial positions
 *	of valid pels are marked in `mask' by 1.  Please note that the
 *	dct coefficients encoding those pels are expected to be found in 
 *	the upper left corner of block `in'.
 *	  
 *
 *	The following drawing explains the relation between `in', `out'
 *	and `mask':
 *
 *	in ->     I I I - - - - - 
 *	 	  I I - - - - - -
 *		  I - - - - - - -
 *		  - - ...
 *				        out ->    - - - - O - - -         
 *	mask ->   - - - - 1 - - -         	  - - O O - - - -
 *		  - - 1 1 - - - -		  - - O O - - - -
 *		  - - 1 1 - - - -		  - - - O - - - -
 *		  - - - 1 - - - -		  - - - - - - - -
 *		  - - - - - - - -		  - - ...
 *		  - - ...
 *
 */

#ifndef _FAST_SADCT_

void
saidct_blk_kaup(Double **out, Double **in, UChar **mask, Int bky, Int bkx)
{
  Int i, j, k, jmax;
  Double **trf_mat, **dest;
  Double c;
  Double *in_ptr;

#ifdef _GET_CPU_TIME_
  kaup_saidct_calls++;
  if ( (gettimeofday (cpu_time, dummy_p)) == (-1) ) {
    printf ("Could not get cpu_time \n");
    exit (-1);
  }	
    
  cpu_time_start_sec = cpu_time->tv_sec;
  cpu_time_start_usec = cpu_time->tv_usec;
#endif
  
  build_v_reorder_tbl(reorder_v, l_y, out, mask, bky, bkx);
#if 0
  fprintf_dmataddr(stdout, "reorder_v (init)", reorder_v, 0, 0, bky-1, bkx-1, 
		   "%6d ", out[0]);
  fprintf_ivec(stdout, "l_y", l_y, 0, bkx-1, "%4d\n");
#endif
  build_h_reorder_tbl(reorder_h, l_x, l_y, mat_tmp1, bky, bkx);

#if 0
  fprintf_dmataddr(stdout, "reorder_h", reorder_h, 0, 0, bky-1, bkx-1, 
		   "%6d ", mat_tmp1[0]);
  fprintf_ivec(stdout, "l_x", l_x, 0, bky-1, "%4d\n");
#endif

  /* inverse horizontal transformation */
  for (i=0; i<bky && l_x[i]; i++) {
    jmax = l_x[i];
    trf_mat = idct_matrix_kaup[jmax];
    in_ptr = in[i];

    dest = reorder_h[i];
    for (k=0; k<jmax; k++) {
      for (c=0,j=0; j<jmax; j++) 
	c += trf_mat[k][j] * in_ptr[j];
      *dest[k] = c;
    }    
  }
 
  /* inverse vertical transformation */
  for (i=0; i<bkx && l_y[i]; i++) {
    jmax = l_y[i];
    trf_mat = idct_matrix_kaup[jmax];
    in_ptr = mat_tmp1[i];

    dest = reorder_v[i];
    for (k=0; k<jmax; k++) {
      for (c=0,j=0; j<jmax; j++) 
	c += trf_mat[k][j] * in_ptr[j];
      *dest[k] = c;
    }    
  }

#ifdef _GET_CPU_TIME_
  if ( (gettimeofday (cpu_time, dummy_p)) == (-1) ) {
    printf ("Could not get cpu_time \n");
    exit (-1);
  }

  cpu_time_end_sec = cpu_time->tv_sec;
  cpu_time_end_usec = cpu_time->tv_usec;

  cpu_time_kaup_saidct += 1000000 * (cpu_time_end_sec - cpu_time_start_sec) + (cpu_time_end_usec - cpu_time_start_usec);
#endif
  
}

#else /* matches: ifndef _FAST_SADCT */

void
saidct_blk_kaup(Double **out, Double **in, UChar **mask, Int bky, Int bkx)
{
  Int i, k, jmax;
  Double **dest;
  Double *in_ptr;

#ifdef _GET_CPU_TIME_
    kaup_saidct_calls++;
    if ( (gettimeofday (cpu_time, dummy_p)) == (-1) ) {
      printf ("Could not get cpu_time \n");
      exit (-1);
    }	
    
    cpu_time_start_sec = cpu_time->tv_sec;
    cpu_time_start_usec = cpu_time->tv_usec;
#endif
  
  build_v_reorder_tbl(reorder_v, l_y, out, mask, bky, bkx);
#if 0
  fprintf_dmataddr(stdout, "reorder_v (init)", reorder_v, 0, 0, bky-1, bkx-1, 
		   "%6d ", out[0]);
  fprintf_ivec(stdout, "l_y", l_y, 0, bkx-1, "%4d\n");
#endif
  build_h_reorder_tbl(reorder_h, l_x, l_y, mat_tmp1, bky, bkx);

#if 0
  fprintf_dmataddr(stdout, "reorder_h", reorder_h, 0, 0, bky-1, bkx-1, 
		   "%6d ", mat_tmp1[0]);
  fprintf_ivec(stdout, "l_x", l_x, 0, bky-1, "%4d\n");
#endif

  /* inverse horizontal transformation */
  for (i=0; i<bky && l_x[i]; i++) {
    jmax = l_x[i];
    in_ptr = in[i];

    dest = reorder_h[i];
    switch (jmax) {
    case 1:
      c_buf[0] = in_ptr[0];
      break;
    case 2:
      idct_vec2 (in_ptr, c_buf);
      break;
    case 3:
      idct_vec3 (in_ptr, c_buf);
      break;      
    case 4:
      idct_vec4 (in_ptr, c_buf);
      break;
    case 5:
      idct_vec5 (in_ptr, c_buf);
      break;
    case 6:
      idct_vec6 (in_ptr, c_buf);
      break;      
    case 7:
      idct_vec7 (in_ptr, c_buf);
      break;
    case 8:
      idct_vec8 (in_ptr, c_buf);
      break;
    }
    for (k=0; k<jmax; k++)
      *dest[k] = c_buf[k];    
  }
 
  /* inverse vertical transformation */
  for (i=0; i<bkx && l_y[i]; i++) {
    jmax = l_y[i];
    in_ptr = mat_tmp1[i];

    dest = reorder_v[i];
    switch (jmax) {
    case 1:
      c_buf[0] = in_ptr[0];
      break;
    case 2:
      idct_vec2 (in_ptr, c_buf);
      break;
    case 3:
      idct_vec3 (in_ptr, c_buf);
      break;      
    case 4:
      idct_vec4 (in_ptr, c_buf);
      break;
    case 5:
      idct_vec5 (in_ptr, c_buf);
      break;
    case 6:
      idct_vec6 (in_ptr, c_buf);
      break;      
    case 7:
      idct_vec7 (in_ptr, c_buf);
      break;
    case 8:
      idct_vec8 (in_ptr, c_buf);
      break;
    }
    for (k=0; k<jmax; k++)
      *dest[k] = c_buf[k];        
  }

#ifdef _GET_CPU_TIME_
    if ( (gettimeofday (cpu_time, dummy_p)) == (-1) ) {
      printf ("Could not get cpu_time \n");
      exit (-1);
    }

    cpu_time_end_sec = cpu_time->tv_sec;
    cpu_time_end_usec = cpu_time->tv_usec;

    cpu_time_kaup_saidct += 1000000 * (cpu_time_end_sec - cpu_time_start_sec) + (cpu_time_end_usec - cpu_time_start_usec);
#endif
  
  
}
#endif /* matches: ifndef _FAST_SADCT_ */

void sadct_malloc_kaup(Int bksize_max)
{

  sadct_free_kaup();

#ifndef _FAST_SADCT_   
  dct_matrix_kaup = sadct_malloc_internal(bksize_max);
  idct_matrix_kaup = sadct_malloc_internal(bksize_max);
#else
  /* inserted for fsadct */
  c_buf = dvector(0, bksize_max-1);
  tmp_out = dmatrix_sadct(0, bksize_max-1, 0, bksize_max-1);
#endif

  reorder_h = sadct_malloc_reorder(bksize_max);
  reorder_v = sadct_malloc_reorder(bksize_max);

  mat_tmp1 = dmatrix_sadct(0, bksize_max-1, 0, bksize_max-1);

  l_y = (Int *) ivector(0, bksize_max-1);
  l_x = (Int *) ivector(0, bksize_max-1);
  row_buf = dvector(0, bksize_max-1);

  N = bksize_max;
}


void sadct_trfmat_init_kaup(Double ***matrices, Int bksize, Double scale)
{
  Double **mat, a, factcos;
  Int u, x;
  Int n;

  for (n=1; n<=bksize; n++) {
    mat = matrices[n];
    factcos = M_PI/(2*n);
    /* new */
    a = sqrt(2.0 / n);
    /* end new */   
    for (u=0; u<n; u++) {
      for (x=0; x<n; x++) {
	mat[u][x] = a * cos(factcos*u*(2*x+1));
	if ( u == 0 )
	  mat[u][x] /= M_SQRT2;
      }
    }
  }
}

void saidct_trfmat_init_kaup(Double ***matrices, Int bksize, Double scale)
{
  Double **mat, factcos, a;
  Int u, x;
  Int n;

  for (n=1; n<=bksize; n++) {
    mat = matrices[n];
    factcos = M_PI/(2*n);
    /* new */
    a = sqrt(2.0 / n);
    /* end new */
    for (x=0; x<n; x++) {
      for (u=0; u<n; u++) {
	mat[x][u] = a * cos(factcos*u*(2*x+1));
	if ( u == 0 )
	  mat[x][u] /= M_SQRT2;
      }
    }
  }
}
