/************************************************************************/
/*                                                     			*/
/* This software module was originally developed by              	*/
/*                                                               	*/
/* Stefan Rauthenberg (HHI / ACTS-MoMuSys).     	              	*/
/*                                                               	*/
/* and edited by                                                	*/
/*                                                               	*/
/* Jan De Lameillieure (HHI / ACTS-MoMuSys).     	              	*/
/* Klaas Schueuer (HHI / ACTS-MoMuSys). 	    	              	*/
/*                                                               	*/
/* in the course of development of the MPEG-4 Video (ISO/IEC 14496-2).	*/
/* This software module is an implementation of a part of one or 	*/
/* more MPEG-4 Video (ISO/IEC 14496-2) tools as specified by the        */
/* MPEG-4 Video (ISO/IEC 14496-2). ISO/IEC gives users of the MPEG-4    */
/* Video free license to this software module or modifications thereof 	*/
/* for use in hardware or software products claiming conformance to the */
/* MPEG-4 Video (ISO/IEC 14496-2). Those intending to use this software */
/* module in hardware or software products are advised that its use may */
/* infringe existing patents. The original developer of this software  	*/
/* module and his/her company, the subsequent editors and their     	*/
/* companies, and ISO/IEC have no liability for use of this software    */
/* module or modifications thereof in an implementation. Copyright is   */
/* not released for non MPEG-4 Video (ISO/IEC 14496-2) conforming 	*/
/* products. ACTS-MoMuSys partners retain full right to use  the code   */
/* for their own purposes, assign or donate the code to a third party   */
/* and to inhibit third parties from using the code for non MPEG-4    	*/
/* Video (ISO/IEC 14496-2) conforming products. This copyright notice 	*/
/* must be included in all copies or derivative works.                  */
/* Copyright (c)1997                                            	*/
/*                                                               	*/
/************************************************************************/
/***********************************************************HeaderBegin*******
 *                                                                         
 * File: sadct_blk_s_k.c 
 * 
 * Author: Stefan Rauthenberg (HHI)
 *
 *	Heinrich-Hertz-Institut fuer Nachrichtentechnik GmbH
 *	Image Processing Department
 *	Einsteinufer 37
 *	D-10587 Berlin 
 *	Federal Republic of Germany
 *	Phone: 	+49-30-31002-615
 *	Fax:	+49-30-3927200
 *	email:	rauthenberg@HHI.DE
 *
 * Created:  21/02/95
 *                                                                         
 * Description: 
 *	SADCT (shape adaptive DCT) transformation  of a single block.
 *
 *	User callable functions:
 *
 *		sadct_init_s_k()
 *		sadct_free_s_k()
 *		
 *		sadct_blk_s_k()
 *		saidct_blk_s_k()
 *
 * Notes:  
 *
 * Modified: 
 *      20-MAR-97 Jan De Lameillieure (HHI) : adaptation to MoMuSys common
 *			rules
 *      16-JUN-97 Jan De Lameillieure : renaming some include files to sadct_*
 *      16-JUN-97 Jan De Lameillieure (HHI) : free_dmatrix() is replaced by
 *                      free_dmatrix_sadct() and dmatrix() by dmatrix_sadct()
 *                      to prevent confusion with similar functions defined
 *                      in the sprite software
 *	10-Feb-98 Klaas Schueuer: SA-DCT changed to dDC-SA-DCT
 *
 ***********************************************************HeaderEnd*********/

/************************    INCLUDE FILES    ********************************/

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/time.h>

#include "sadct_bsnrmem.h"
#include "sadct.h"
#include "sadct_s_k.h"
/* inserted for fsadct */
#include "sadct_vec.h"

#ifdef _GET_CPU_TIME_
extern long cpu_time_s_k_sadct;
extern long cpu_time_s_k_saidct;
extern Int s_k_sadct_calls;
extern Int s_k_saidct_calls;
extern long cpu_time_start_sec;
extern long cpu_time_start_usec;
extern long cpu_time_end_sec;
extern long cpu_time_end_usec;
  
extern struct timeval *cpu_time;
extern int *dummy_p;
#endif

/* Indexing:	dct_matrix[n] is a pointer to a transformation matrix of
 *              blocksize `n'.  The sensible range of the first index is 
 *		1..N AND not 0 .. N-1 as one might presume.
 */
#ifndef _FAST_SADCT_
static Double ***dct_matrix_s_k;
static Double ***idct_matrix_s_k;
#else
/* 17-04-98 Klaas Schueuer (HHI) : inserted for fast ddc-SA-DCT */
static Double *mean;
static Int *active_pels;
static Double *c_buf;
static Double **tmp_out;
static Double sq[9] = { 0.00000000000,
			1.00000000000,
			1.41421356237,
			1.73205080757,
			2.00000000000,
			2.23606797750,
			2.44948974278,
			2.64575131106,
			2.82842712475};
static Void 
sadct_shiftup_transpose_s_k(Double **out, Int *l_y, Double **in, UChar **mask, 
			Double *mean, Int *active_pels, Int bky, Int bkx);
#endif

/* N = number of (1D) transformation matrices of different block size.
   For a given N the matrix consists of N x N elements. */
static Int N = 0;

/* buffer to hold temporary results */
static Double ***reorder_h;
static Double ***reorder_v;
static Double **mat_tmp1;
static Double *row_buf;
static Int *l_y, *l_x;


/*
 *	Allocates memory and sets up internal tables to transform
 *	blocks of at most `bksize_max' by `bksize_max' elements.
 *	The allocated memory may be freed by calling `sadct_free()'.
 *	The client code is expected to call `sadct_init()' before 
 *	trying to transform a block for the very first time.
 *	Failure to do so is likely to cause a coredump.
 *
 *	Parameter:	
 *		`bksize_max'	maximum blocksize
 *
 */
void sadct_init_s_k(Int bksize_max, Double scale_dct, Double scale_idct)
{

  if ( bksize_max == N )
    return;	/* already initialized, check prevents a client to 
		   free something which is going to be initialized 
		   exactly the same way. */

  sadct_malloc_s_k(bksize_max);


#ifndef _FAST_SADCT_
  sadct_trfmat_init_s_k(dct_matrix_s_k, bksize_max, scale_dct);
  saidct_trfmat_init_s_k(idct_matrix_s_k, bksize_max, scale_idct);
#else
  mean = (Double*) malloc (sizeof (Double));
  active_pels = (int*) malloc (sizeof(Int));
#endif  

}

/*
 *	free's any memory allocated by `sadct_init'.  
 */
void sadct_free_s_k(void)
{

  if ( ! N )
    return;	/* nothing to free */

#ifndef _FAST_SADCT_    
  sadct_free_internal(dct_matrix_s_k, N);
  dct_matrix_s_k = 0;
  sadct_free_internal(idct_matrix_s_k, N);
  idct_matrix_s_k = 0;
#endif 
  
  sadct_free_reorder(reorder_v, N);
  reorder_v = 0;

  sadct_free_reorder(reorder_h, N);
  reorder_h = 0;

  free_dmatrix_sadct(mat_tmp1, 0, N-1, 0, N-1);
  mat_tmp1 = 0;

  free_dvector(row_buf, 0, N-1);
  row_buf = 0;
  free_ivector( (int *)l_x, 0, N-1);
  l_x = 0;
  free_ivector( (int *)l_y, 0, N-1);
  l_y = 0;

  N = 0;

#ifdef _FAST_SADCT_
  free (mean);
  free (active_pels);
  /*inserted for fsadct */
  free_dmatrix_sadct(tmp_out, 0, N-1, 0, N-1);
  tmp_out = 0;    
  free_dvector(c_buf, 0, N-1);
  c_buf = 0;
#endif  

  return;
}

/*
 *	transforms the set of pels of a `bkx' * `bky' block `in' which 
 *	are marked by 1 in the `mask'. 
 *	The function returns in lx[0] - lx[bky-1] the
 *	number of dct coefficient per line and the coefficients are saved
 *	in the upper left corner of `out'; e.g.
 *	lx[0] = 3, lx[1] = 2, lx[2] = 1, lx[3] = 0 ...
 *	
 *		       
 *	X  X  X  - - - - - - - - -	    `out'
 *	X  X  -  - - - - - - - - -
 *      X  -  -  - - - - - - - - -
 *      -  -  -  - ...
 */
#ifndef _FAST_SADCT_
void
sadct_blk_s_k(Double **out, Int *lx, Double **in, UChar **mask, Int bky, Int bkx)
{
  Int i, j, jmax, k;
  Double **trf_mat, *row;
  Double c;

  /* new */
  Double mean_value;
  Int x1, y1, active_pels;
  /* end new */


#ifdef _GET_CPU_TIME_
  s_k_sadct_calls++;
  if ( (gettimeofday (cpu_time, dummy_p)) == (-1) ) {
    printf ("Could not get cpu_time \n");
    exit (-1);
  }	
    
  cpu_time_start_sec = cpu_time->tv_sec;
  cpu_time_start_usec = cpu_time->tv_usec;
#endif
  
  /* new for ddc_sa_dct */
  out[0][0] = 0.0;
  /* end new */

  /* new */
  /* compute meanvalue */
  mean_value = 0.0;
  active_pels  = 0;
  for (x1 = 0; x1 < bkx; x1++) {
    for (y1 = 0; y1 < bky; y1++) {
      active_pels+= mask[x1][y1];
      mean_value += in[x1][y1] * mask[x1][y1];
    }
  }
      

  if (active_pels)
    mean_value = mean_value / (Double)active_pels;
  mean_value = mean_value + 0.5;
  mean_value = (int)mean_value;

  for (x1 = 0; x1 < bkx; x1++) {
    for (y1 = 0; y1 < bky; y1++) {
      in[x1][y1] -= mean_value;
    }
  }

  /* end new */
  
  sadct_shiftup_transpose(mat_tmp1, l_y, in, mask, bky, bkx);

#if SADCT_DEBUG > 1
  fprintf_smat(stdout, "sadct_blk:in", in, 0, 0, bky-1, bkx-1, "%4d");
  fprintf_dmat(stdout, "mat transposed", mat_tmp1, 0, 0, bkx-1, bky-1, 
	       "%6.3f ");  
#endif
  memset(lx, 0, sizeof(Int)*bky);

  for (i=0; i<bkx && l_y[i]; i++) {
    jmax = l_y[i];
    trf_mat = dct_matrix_s_k[jmax];
    row = mat_tmp1[i];
    for (k=0; k<jmax; k++) {
      for (c=0,j=0; j<jmax; j++) 
	c += trf_mat[k][j] * row[j];
      out[k][lx[k]] = c;
      lx[k]++;
    }
  }
#if SADCT_DEBUG > 1
  fprintf_dmat(stdout, "v transformed matrix", out, 0, 0, bky-1, bkx-1, 
	       "%6.3f ");  
#endif

  /* and finally the horizontal transformation */
  for (i=0; i<bky && lx[i]; i++) {
    jmax = lx[i];
    trf_mat = dct_matrix_s_k[jmax];
    memcpy(row_buf, out[i], jmax*sizeof(Double));
    row = out[i];
    for (k=0; k<jmax; k++) {
      for (c=0,j=0; j<jmax; j++) 
	c += trf_mat[k][j] * row_buf[j];
      *row++ = c;
    }    
  }

  /* new */
  /* copy meanvalue to DC-coefficient */
  out[0][0] = mean_value * 8.0;
  /* end new */

#ifdef _GET_CPU_TIME_
  if ( (gettimeofday (cpu_time, dummy_p)) == (-1) ) {
    printf ("Could not get cpu_time \n");
    exit (-1);
  }
  
  cpu_time_end_sec = cpu_time->tv_sec;
  cpu_time_end_usec = cpu_time->tv_usec;
  
  cpu_time_s_k_sadct += 1000000 * (cpu_time_end_sec - cpu_time_start_sec) + (cpu_time_end_usec - cpu_time_start_usec);
#endif

}
#else /* matches `ifndef _FAST_SADCT_ */
void
sadct_blk_s_k(Double **out, Int *lx, Double **in, UChar **mask, Int bky, Int bkx)
{
  Int i, j, jmax, k;
  Double *row, *tmp_coeff;

#ifdef _GET_CPU_TIME_
  s_k_sadct_calls++;
  if ( (gettimeofday (cpu_time, dummy_p)) == (-1) ) {
    printf ("Could not get cpu_time \n");
    exit (-1);
  }	
    
  cpu_time_start_sec = cpu_time->tv_sec;
  cpu_time_start_usec = cpu_time->tv_usec;
#endif
  
  /* new for ddc_sa_dct */
  out[0][0] = 0.0; 
  /* end new */

  sadct_shiftup_transpose_s_k(mat_tmp1, l_y, in, mask, mean, active_pels, bky, bkx);

  if (*active_pels) {
    *mean /= (double) *active_pels;
    if (*mean > 0)
      *mean = (int) (*mean + 0.5);
    else
      *mean = (int) (*mean - 0.5);
  }
  
#if SADCT_DEBUG > 1
  fprintf_smat(stdout, "sadct_blk:in", in, 0, 0, bky-1, bkx-1, "%4d");
  fprintf_dmat(stdout, "mat transposed", mat_tmp1, 0, 0, bkx-1, bky-1, 
	       "%6.3f ");  
#endif
  memset(lx, 0, sizeof(Int)*bky);

  for (i=0; i<bkx && l_y[i]; i++) {
    jmax = l_y[i];
    row = mat_tmp1[i];
    for (j = 0; j < jmax; j++)
      row[j] -= *mean;
    switch (jmax) {
    case 1:
      c_buf[0] = row[0];
      break;      
    case 2:
      dct_vec2 (row, c_buf);
      break;
    case 3:
      dct_vec3 (row, c_buf);
      break;
    case 4:
      dct_vec4 (row, c_buf);
      break;      
    case 5:
      dct_vec5 (row, c_buf);
      break;
    case 6:
      dct_vec6 (row, c_buf);
      break;
    case 7:
      dct_vec7 (row, c_buf);
      break;
    case 8:
      dct_vec8 (row, c_buf);
      break;
    }      
    for (k=0; k<jmax; k++) {
      tmp_out[k][lx[k]] = c_buf[k];
      lx[k]++;
    }
  }
#if SADCT_DEBUG > 1
  fprintf_dmat(stdout, "v transformed matrix", out, 0, 0, bky-1, bkx-1, 
	       "%6.3f ");  
#endif

  /* and finally the horizontal transformation */
  for (i=0; i<bky && lx[i]; i++) {
    jmax = lx[i];
    tmp_coeff = tmp_out[i];
    row = out[i];
    switch (jmax) {
    case 1:
      *row = tmp_coeff[0];
      break;      
    case 2:
      dct_vec2 (tmp_coeff, row);
      break;
    case 3:
      dct_vec3 (tmp_coeff, row);
      break;
    case 4:
      dct_vec4 (tmp_coeff, row);
      break;      
    case 5:
      dct_vec5 (tmp_coeff, row);
      break;
    case 6:
      dct_vec6 (tmp_coeff, row);
      break;
    case 7:
      dct_vec7 (tmp_coeff, row);
      break;
    case 8:
      dct_vec8 (tmp_coeff, row);
      break;
    }
  }

  /* new */
  /* copy meanvalue to DC-coefficient */
  out[0][0] = *mean * 8.0;
  /* end new */

#ifdef _GET_CPU_TIME_
  if ( (gettimeofday (cpu_time, dummy_p)) == (-1) ) {
    printf ("Could not get cpu_time \n");
    exit (-1);
  }
  
  cpu_time_end_sec = cpu_time->tv_sec;
  cpu_time_end_usec = cpu_time->tv_usec;
  
  cpu_time_s_k_sadct += 1000000 * (cpu_time_end_sec - cpu_time_start_sec) + (cpu_time_end_usec - cpu_time_start_usec);
#endif  
  
}
#endif /* matches `ifndef _FAST_SADCT */

/*
 *	inverse sadct transformation of block `in'.  The spatial positions
 *	of valid pels are marked in `mask' by 1.  Please note that the
 *	dct coefficients encoding those pels are expected to be found in 
 *	the upper left corner of block `in'.
 *	  
 *
 *	The following drawing explains the relation between `in', `out'
 *	and `mask':
 *
 *	in ->     I I I - - - - - 
 *	 	  I I - - - - - -
 *		  I - - - - - - -
 *		  - - ...
 *				        out ->    - - - - O - - -         
 *	mask ->   - - - - 1 - - -         	  - - O O - - - -
 *		  - - 1 1 - - - -		  - - O O - - - -
 *		  - - 1 1 - - - -		  - - - O - - - -
 *		  - - - 1 - - - -		  - - - - - - - -
 *		  - - - - - - - -		  - - ...
 *		  - - ...
 *
 */

#ifndef _FAST_SADCT_
void
saidct_blk_s_k(Double **out, Double **in, UChar **mask, Int bky, Int bkx)
{
  Int i, j, k, jmax, l;
  Double **trf_mat, **dest;
  Double c;
  Double *in_ptr;

  Double mean_value, check_sum = 0.0;
  Double ly_sum = 0.0, ddx;
  Int x1,y1;

#ifdef _GET_CPU_TIME_
  s_k_saidct_calls++;
  if ( (gettimeofday (cpu_time, dummy_p)) == (-1) ) {
    printf ("Could not get cpu_time \n");
    exit (-1);
  }	
    
  cpu_time_start_sec = cpu_time->tv_sec;
  cpu_time_start_usec = cpu_time->tv_usec;
#endif  

  /* reconstruction of meanvalue and zero setting of ddc */
  mean_value = (int) ((in[0][0] / 8.0) + 0.5);
  in[0][0] = 0.0;

  
  build_v_reorder_tbl(reorder_v, l_y, out, mask, bky, bkx);
#if 0
  fprintf_dmataddr(stdout, "reorder_v (init)", reorder_v, 0, 0, bky-1, bkx-1, 
		   "%6d ", out[0]);
  fprintf_ivec(stdout, "l_y", l_y, 0, bkx-1, "%4d\n");
#endif
  build_h_reorder_tbl(reorder_h, l_x, l_y, mat_tmp1, bky, bkx);

#if 0
  fprintf_dmataddr(stdout, "reorder_h", reorder_h, 0, 0, bky-1, bkx-1, 
		   "%6d ", mat_tmp1[0]);
  fprintf_ivec(stdout, "l_x", l_x, 0, bky-1, "%4d\n");
#endif

  /* inverse horizontal transformation */
  for (i=0; i<bky && l_x[i]; i++) {
    jmax = l_x[i];
    trf_mat = idct_matrix_s_k[jmax];
    in_ptr = in[i];

    dest = reorder_h[i];
    for (k=0; k<jmax; k++) {
      for (c=0,j=0; j<jmax; j++) 
	c += trf_mat[k][j] * in_ptr[j];
      *dest[k] = c;
    }    
  }
 
  /* inverse vertical transformation */
  for (i=0; i<bkx && l_y[i]; i++) {
    jmax = l_y[i];
    trf_mat = idct_matrix_s_k[jmax];
    in_ptr = mat_tmp1[i];

    dest = reorder_v[i];
    for (k=0; k<jmax; k++) {
      for (c=0,j=0; j<jmax; j++) 
	c += trf_mat[k][j] * in_ptr[j];
      *dest[k] = c;
    }    
  }

  /* computing of checksum and ddc correction */

  for (x1 = 0; x1 < bkx; x1++) 
    for (y1 = 0; y1 < bky; y1++)
      if (mask[x1][y1])
	check_sum += out[x1][y1	];
    

  for (i=0; i<bkx; i++) 
    if (l_y[i])
      ly_sum += sqrt ((Double)l_y[i]);

  k = 0;
  for (x1 = 0; x1 < bkx; x1++) {
    l = 0;
    for (y1 = 0; y1 < bky; y1++) {
      if (mask[y1][x1]) {
	if (l==0) {
	  k++;
	  l++;
          if (check_sum>0)  
	    ddx = (int) ((1.0 / (sqrt ((Double) l_y[k-1]) * ly_sum) * check_sum ) + 0.5);
	  else
	    ddx = (int) ((1.0 / (sqrt ((Double) l_y[k-1]) * ly_sum) * check_sum ) - 0.5);
	}
	out[y1][x1] += mean_value - ddx;
      }
    }
  }

  

#ifdef _GET_CPU_TIME_
  if ( (gettimeofday (cpu_time, dummy_p)) == (-1) ) {
    printf ("Could not get cpu_time \n");
    exit (-1);
  }

  cpu_time_end_sec = cpu_time->tv_sec;
  cpu_time_end_usec = cpu_time->tv_usec;

  cpu_time_s_k_saidct += 1000000 * (cpu_time_end_sec - cpu_time_start_sec) + (cpu_time_end_usec - cpu_time_start_usec);
#endif
  
}
#else /* matches `ifndef _FAST_SADCT_ */
void
saidct_blk_s_k(Double **out, Double **in, UChar **mask, Int bky, Int bkx)
{
  Int i, k, jmax;
  Double **dest;
  Double *in_ptr;

  Double mean_value;
  Double e1 = 0.0, e2 = 0.0, e_dc = 0.0;

#ifdef _GET_CPU_TIME_
  s_k_saidct_calls++;
  if ( (gettimeofday (cpu_time, dummy_p)) == (-1) ) {
    printf ("Could not get cpu_time \n");
    exit (-1);
  }	
    
  cpu_time_start_sec = cpu_time->tv_sec;
  cpu_time_start_usec = cpu_time->tv_usec;
#endif  

  /* reconstruction of meanvalue and setting ddc to zero */
  mean_value = in[0][0] / 8.0;
  in[0][0] = 0.0;

  
  build_v_reorder_tbl(reorder_v, l_y, out, mask, bky, bkx);
#if 0
  fprintf_dmataddr(stdout, "reorder_v (init)", reorder_v, 0, 0, bky-1, bkx-1, 
		   "%6d ", out[0]);
  fprintf_ivec(stdout, "l_y", l_y, 0, bkx-1, "%4d\n");
#endif
  build_h_reorder_tbl(reorder_h, l_x, l_y, mat_tmp1, bky, bkx);

#if 0
  fprintf_dmataddr(stdout, "reorder_h", reorder_h, 0, 0, bky-1, bkx-1, 
		   "%6d ", mat_tmp1[0]);
  fprintf_ivec(stdout, "l_x", l_x, 0, bky-1, "%4d\n");
#endif

  /* inverse horizontal transformation */
  for (i=0; i<bky && l_x[i]; i++) {
    jmax = l_x[i];
    in_ptr = in[i];

    dest = reorder_h[i];
    switch (jmax) {
    case 1:
      c_buf[0] = in_ptr[0];
      break;
    case 2:
      idct_vec2 (in_ptr, c_buf);
      break;
    case 3:
      idct_vec3 (in_ptr, c_buf);
      break;      
    case 4:
      idct_vec4 (in_ptr, c_buf);
      break;
    case 5:
      idct_vec5 (in_ptr, c_buf);
      break;
    case 6:
      idct_vec6 (in_ptr, c_buf);
      break;      
    case 7:
      idct_vec7 (in_ptr, c_buf);
      break;
    case 8:
      idct_vec8 (in_ptr, c_buf);
      break;
    }
    if ( i == 0) {
      for (k = 0; k < jmax; k++) {
	e1 += sq[l_y[k]] * c_buf[k];
	e2 += sq[l_y[k]];
      }
      e_dc = e1 / e2;
      for (k=0; k<jmax; k++)
      *dest[k] = c_buf[k] - e_dc;
    }
    else {
    for (k=0; k<jmax; k++)
      *dest[k] = c_buf[k];
    }
    
  }
 
  /* inverse vertical transformation */
  for (i=0; i<bkx && l_y[i]; i++) {
    jmax = l_y[i];
    in_ptr = mat_tmp1[i];

    dest = reorder_v[i];
    switch (jmax) {
    case 1:
      c_buf[0] = in_ptr[0];
      break;
    case 2:
      idct_vec2 (in_ptr, c_buf);
      break;
    case 3:
      idct_vec3 (in_ptr, c_buf);
      break;      
    case 4:
      idct_vec4 (in_ptr, c_buf);
      break;
    case 5:
      idct_vec5 (in_ptr, c_buf);
      break;
    case 6:
      idct_vec6 (in_ptr, c_buf);
      break;      
    case 7:
      idct_vec7 (in_ptr, c_buf);
      break;
    case 8:
      idct_vec8 (in_ptr, c_buf);
      break;
    }
    for (k=0; k<jmax; k++)
      *dest[k] = c_buf[k] + mean_value;        
  }
 

#ifdef _GET_CPU_TIME_
  if ( (gettimeofday (cpu_time, dummy_p)) == (-1) ) {
    printf ("Could not get cpu_time \n");
    exit (-1);
  }

  cpu_time_end_sec = cpu_time->tv_sec;
  cpu_time_end_usec = cpu_time->tv_usec;

  cpu_time_s_k_saidct += 1000000 * (cpu_time_end_sec - cpu_time_start_sec) + (cpu_time_end_usec - cpu_time_start_usec);
#endif
  
}
#endif /* matches `ifndef _FAST_SADCT` */


void sadct_malloc_s_k(Int bksize_max)
{

  sadct_free_s_k();

#ifndef _FAST_SADCT_    
  dct_matrix_s_k = sadct_malloc_internal(bksize_max);
  idct_matrix_s_k = sadct_malloc_internal(bksize_max);
#else
  /* inserted for fsadct */
  tmp_out = dmatrix_sadct(0, bksize_max-1, 0, bksize_max-1);
  c_buf = dvector(0, bksize_max-1);
#endif

  
  reorder_h = sadct_malloc_reorder(bksize_max);
  reorder_v = sadct_malloc_reorder(bksize_max);

  mat_tmp1 = dmatrix_sadct(0, bksize_max-1, 0, bksize_max-1);

  l_y = (Int *) ivector(0, bksize_max-1);
  l_x = (Int *) ivector(0, bksize_max-1);
  row_buf = dvector(0, bksize_max-1);

  N = bksize_max;
}


void sadct_trfmat_init_s_k(Double ***matrices, Int bksize, Double scale)
{
  Double **mat, a, factcos;
  Int u, x;
  Int n;

  for (n=1; n<=bksize; n++) {
    mat = matrices[n];
    factcos = M_PI/(2*n);
    /* new */
    a = sqrt(2.0 / n);
    /* end new */   
    for (u=0; u<n; u++) {
      for (x=0; x<n; x++) {
	mat[u][x] = a * cos(factcos*u*(2*x+1));
	if ( u == 0 )
	  mat[u][x] /= M_SQRT2;
      }
    }
  }
}

void saidct_trfmat_init_s_k(Double ***matrices, Int bksize, Double scale)
{
  Double **mat, factcos, a;
  Int u, x;
  Int n;

  for (n=1; n<=bksize; n++) {
    mat = matrices[n];
    factcos = M_PI/(2*n);
    /* new */
    a = sqrt(2.0 / n);
    /* end new */
    for (x=0; x<n; x++) {
      for (u=0; u<n; u++) {
	mat[x][u] = a * cos(factcos*u*(2*x+1));
	if ( u == 0 )
	  mat[x][u] /= M_SQRT2;
      }
    }
  }
}


/*   modifief shiftup_transpose for a fast ddc-SA-DCT 	*/
/*   meanvalue & active pels calculation are added 	*/
/*   to the original shiftup_transpose function		*/
#ifdef _FAST_SADCT_
static Void 
sadct_shiftup_transpose_s_k(Double **out, Int *l_y, Double **in, UChar **mask, 
			Double *mean, Int *active_pels, Int bky, Int bkx)
{
  Int iy_out = 0, ix_out;
  Int iy, ix, l;

  *mean = 0.0;
  *active_pels = 0;
  for (ix=0; ix<bkx; ix++) {
    ix_out = l = 0;
    for (iy=0; iy<bky; iy++) {
      if ( mask[iy][ix] ) {
	out[iy_out][ix_out++] = in[iy][ix];
	*mean += in[iy][ix];
	l++;
      }
    }
    if ( l ) { 
      l_y[iy_out++] = l;
      *active_pels += l;
    }
  }	
  /* initialize the length of the unoccupied columns to zero. The term 
     column refers to the pel positions in `in'.  In `out' columns are
     saved as rows (transposition) to speed up calculation. */
  for (ix=iy_out; ix<bkx; ix++) 
    l_y[ix] = 0;

}
#endif
