
/**************************************************************************
 *                                                                        *
 * Copyright (c) 1996 Michael Richmond and Richard Treffers               *
 *                                                                        *
 *                    This software may be copied and distributed for     *
 *                    educational, research and not for profit services   *
 *                    provided that this copyright and statement are      *
 *                    included in all such copies.                        *
 *                                                                        *
 **************************************************************************/


/*************************************************************************
 * 
 * Create a 1-D vector from the given 2-D image, finding the median
 * value of the pixels in each row (or col).
 * Place the resulting 1-D vector into the "output" file.
 *
 *    MAKEVEC file1 outfile= row|col 
 *
 *
 * 10/6/95 MWR
 **************************************************************************/

#include <stdio.h>
#include <math.h>
#include <stdlib.h>
#include <string.h>
#include "pcvista.h"
#include "fits.h"

#undef DEBUG        /* define for lots of diagnostic messages */


	/* used by the "rand()" function in "scramble()" */
#ifndef RAND_MAX
#define RAND_MAX 2147483647
#endif

	/* 
	 * If we have to find the median along columns, try using this
	 * as the size of the histogram array created for each column.
	 * We may need to increase or decrease the size, depending
	 * on the size of the image the amount of memory available.
	 * The user can modify this "minimal" size with the command-line 
	 * option "arraysize=".
	 */
#define COLARRAY_SIZE     1000      
#define MAX_LOOP             5   /* max number of times we adjust hist size */
static int colarray_size = COLARRAY_SIZE;

	/*
	 * When trying to find the range for histograms, start with
	 * the fraction of data falling below and above these fractions.
	 * If we fail, push the two fractions closer together, until
	 * we succeed in making a small enough range that we can fit
	 * all structures into memory.
	 */
#define BOTTOM 0.20          /* bottom of range is here in distribution */
#define TOP    0.80          /* top of range is here */

	/* there are this many possible values of a 16-bit signed int */
#define MEDARRAY_SIZE    65535

	/* this array has one element per value of a 16-bit unsigned int */
static int median_array[MEDARRAY_SIZE];

	/* and this array is used to keep track of which elements of the
	   median_array have been touched */
static uint16 pointer_array[MEDARRAY_SIZE];


	/* used for holding error messages. */
static char err_msg[NBUF];

	/* 
     * this structure is used to keep track of data values in each
     * column (used only if user asks for median along cols).
     * We use unsigned 16-bit ints because we are confident that
     * each column must contain fewer than 65,536 pixels.
     */
struct hist {
    int16 bottom;               /* bottom of range of the histogram */
    int16 top;                  /* top of range of histogram */
    uint16 underflow;           /* number of pixels with values < bottom */
    uint16 overflow;            /* number of pixels with values > top */
    uint16 ndat;                /* # elems in data array = (top-bottom) + 1 */
    uint16 *data;               /* data[i] = # occurences of i-32768 */
};
	  
#ifdef PROTO
int main(int, char **);
static int16 find_median(int ndat, int16 *array);
static void make_row_vector(FITS_HANDLE fh, int nr, int nc, char *outfile);
static void make_col_vector(FITS_HANDLE fh, int nr, int nc, char *outfile);
static struct hist *make_hist(int16 bottom, int16 top);
static void del_hist(struct hist *hist);
static int find_range(FITS_HANDLE fh, int nr, int nc, 
                      float bot_frac, float top_frac, int16 *bottom, int16 *top);
static void sort(int16 *data);
static int compar(int16 *a, int16 *b);
static double scramble(void);
#else
static int16 find_median();
static void make_row_vector();
static void make_col_vector();
static struct hist *make_hist();
static void del_hist();
static int find_range();
static double scramble();
static void sort();
static int compar();
static double scramble();
#endif


static char usage[] = "usage: makevec file outfile= row|col [arraysize=] [help]";


int 
main(argc, argv)
int argc;
char *argv[];
{
    static int i;
    int nrow, ncol;
    int do_row, do_col;
    char fname[NBUF], outfile[NBUF], *gotit;
    FITS_HANDLE fh;

    if (argc < 4)
        error(-1, usage);

    strcpy(outfile, "");
    do_row = 0;
    do_col = 0;

    /* first argument must be input file name */
    strcpy(fname, argv[1]);

    for (i = 2; i < argc; i++) {
        if ((gotit = find("outfile", argv[i])) != NULL) {
            strcpy(outfile, gotit);
            continue;
        }
        if (keyword("row", argv[i])) {
            do_row = 1;
        }
        if (keyword("col", argv[i])) {
            do_col = 1;
        }
        if ((gotit = find("arraysize", argv[i])) != NULL) {
			colarray_size = (int) floor(evaluate(gotit) + 0.5);
            continue;
        }
    }
    if (strcmp(outfile, "") == 0) {
        error(1, "makevec: not output file supplied");
    }
    if (do_row + do_col != 1) {
        error(1, "makevec: you must supply one of 'row' or 'col'");
    }

    fh = fits_open(fname, "r", &nrow, &ncol);
    
    if (do_row) {
        make_row_vector(fh, nrow, ncol, outfile);
    }
    else {
        make_col_vector(fh, nrow, ncol, outfile);
    }

    fits_close(fh);

    exit(0);
}



/**********************************************************************
 * Given an N-row by M-col image, create a new image, N-row by 1-col,
 * in which each row contains the median value of the pixels 
 * in the corresponding row of the original image.
 */

static void
make_row_vector(fh, nr, nc, outfile)
FITS_HANDLE fh;
int nr, nc;
char *outfile;
{
    int row;
    int nrow, ncol;                      /* size of output image */
    FITS_HANDLE outfh;
    static int16 in_data[NMAX];
    static int16 out_data[1];


    /* create the output file */
    nrow = nr;
    ncol = 1;
    outfh = fits_open(outfile, "w", &nrow, &ncol);
    
    for (row = 0; row < nr; row++) {
        fits_get_data(fh, row, 0, in_data, nc);
        out_data[0] = find_median(nc, in_data);
        fits_put_data(outfh, row, 0, out_data, ncol);
    }

    fits_close(outfh);
}




/**********************************************************************
 * Given an N-row by M-col image, create a new image, 1-row by M-col,
 * in which each col contains the median value of the pixels 
 * in the corresponding col of the original image.
 *
 * The idea here is to 
 *
 *     - create a histogram-array for each column of the image
 *     - read the image, one row at a time
 *          for each row, increment the bin for each column
 *     - go through the set of histogram-arrays, and use each
 *          one to find median of each column in turn
 *
 * The tricky part here is to figure out an appropriate range for
 * the histograms.  We can't just use all possible 65535 values
 * of a 16-bit int, because we'd run out of memory for all but tiny
 * images.  So, we have guess an appropriate "min" and "size"
 * for the histogram-arrays.  We would like to pick "min" slightly
 * less than the smallest value, and "size" large enough to include
 * the median value.
 *
 * We allow at most MAX_LOOP iterations through a cycle in which
 * we attempt to find a range for the histograms.  If we still fail
 * at the end of MAX_LOOP tries, use the value 
 * 'colarray_size' and hope that it works.
 *
 * 
 */

static void
make_col_vector(fh, nr, nc, outfile)
FITS_HANDLE fh;
int nr, nc;
char *outfile;
{
    int i, j, row, col, index, done;
	int iter;
    int nrow, ncol;                     /* size of output image */
	int halfway, sum;
    float bot_frac, top_frac;           /* fractions used in finding range */
	float delta_frac;                   /* amount by which to adjust fraction */
    int16 bottom, top;
    struct hist **hists;                /* array of hist ptrs, one per column */
	struct hist *hist_p;
    FITS_HANDLE outfh;
    static int16 in_data[NMAX], *indata_p;
	static int16 out_data[NMAX], *outdata_p;
	static uint16 *data_p;

	bot_frac = BOTTOM;
	top_frac = TOP;

	/* first, we try to allocate a histogram-array for each col */
	if ((hists = (struct hist **) malloc(nc*sizeof(struct hist *))) == NULL) {
		error(1, "make_col_vector: can't malloc for hist pointers");
	}

	/* 
	 * now, this is the tricky part.  If we can't create a hist for each
	 * col, then we decrease the range and try again.  Keep trying until
	 * we manage to cut down on the size enough to allocate a histogram
 	 * for each column.
	 */
	done = 0;
	for (iter = 0; iter < MAX_LOOP; iter++) {

#ifdef DEBUG
		printf("make_col_vector: in loop, iter %d, frac %5.2f %5.2f \n", 
				iter, bot_frac, top_frac);
#endif

		/* first, figure out an appropriate range for histogram arrays */
		if (find_range(fh, nr, nc, bot_frac, top_frac, &bottom, &top) != 0) {
			error(1, "make_col_vector: find_range fails, so giving up");
		}
#ifdef DEBUG
		printf("find_range returns [%5d, %5d]\n", bottom, top);
#endif

		/* 
		 * If this range is LESS than our "minimal" size of 'colarray_size',
		 * then increase it here.  Since we are confident that we can
		 * fit 'colarray_size' units into memory, we might as well play
		 * it safe.
		 * 
		 * Do check that we don't exceed the boundaries of "int16" when
		 * modifying the range, though!
		 */
		if ((1 + top - bottom) < colarray_size) {
			if ((bottom - colarray_size/2) < bottom) {
				bottom -= colarray_size/2;
			}
			if (bottom + colarray_size > top) {
				top = bottom + colarray_size;
			}
#ifdef DEBUG
			printf("tried to increase range: now [%5d, %5d]\n", bottom, top);
#endif
		}
    
		done = 1;
		for (i = 0; i < nc; i++) {
			if ((hists[i] = make_hist(bottom, top)) == NULL) {
				sprintf(err_msg, "make_col_vector: range %d - %d too big",
						bottom, top);
				error(0, err_msg);
				for (j = 0; j < i; j++) {
					del_hist(hists[j]);
				}
				done = 0;
				break;
			}
		}
		if (done == 0) {
			delta_frac = (top_frac - bot_frac)/10.0;
			bot_frac += delta_frac;
			top_frac -= delta_frac;
#ifdef DEBUG
			printf("range too big, make fracs %5.2f %5.2f\n", 
					bot_frac, top_frac);
#endif
		}
		else {
			break;
		}
	}
	if (done == 0) {
		top = bottom + colarray_size;
#ifdef DEBUG
		printf("range still too big, use fixed size [%5d, %5d]\n",
				bottom, top);
#endif
		for (i = 0; i < nc; i++) {
			if ((hists[i] = make_hist(bottom, top)) == NULL) {
				sprintf(err_msg, "make_col_vector: fixed range %d - %d too big",
						bottom, top);
				error(0, err_msg);
				for (j = 0; j < i; j++) {
					del_hist(hists[j]);
				}
				error(1, "make_vec: run out of memory, so give up");
			}
		}
	}

	/* 
	 * finally, we've created a set of histogram-arrays.  Now we can
	 * go through the input image and fill each one.
	 */
    for (row = 0; row < nr; row++) {
#ifdef DEBUG
		printf("row %5d ", row);
#endif
		indata_p = &(in_data[0]);
        fits_get_data(fh, row, 0, in_data, nc);
		for (col = 0; col < nc; col++, indata_p++) {
			hist_p = hists[col];
			if (*indata_p < hist_p->bottom) {
				hist_p->underflow++;
#ifdef DEBUG 
				putchar('u');
#endif
			}
			else if (*indata_p > hist_p->top) {
				hist_p->overflow++;
#ifdef DEBUG 
				putchar('o');
#endif
			}
			else {
				index = (*indata_p - hist_p->bottom);
				hist_p->data[index]++;
#ifdef DEBUG 
				putchar('.');
#endif
			}
		}
#ifdef DEBUG
		putchar('\n');
#endif
    }

	/*
	 * having filled all the histograms, we can now find the
	 * median of each one by walking up through the histogram
	 * until we have accumulated >= half the data points. 
	 *
	 * If more than half fall into underflow, return bottom of range.
	 * If more than half fall into overflow, return top of range.
	 * In either case, print warning messages.
	 */
	halfway = nr/2;
	outdata_p = &(out_data[0]);
	for (col = 0; col < nc; col++, outdata_p++) {
		hist_p = hists[col];
		if (hist_p->underflow >= halfway) {
			*outdata_p = hist_p->bottom;
			sprintf(err_msg, "make_col_vec: warning: col %d has underflow",
					col);
			error(0, err_msg);
		}
		else if (hist_p->overflow >= halfway) {
			*outdata_p = hist_p->top;
			sprintf(err_msg, "make_col_vec: warning: col %d has overflow",
					col);
			error(0, err_msg);
		}
		else {
			data_p = hist_p->data;
			sum = 0;
			for (i = 0; i < hist_p->ndat; i++) {
				sum += *data_p++;
				if (sum >= halfway) {
					*outdata_p = hist_p->bottom + i;
					break;
				}
			}
			if (i == hist_p->ndat) {
				*outdata_p = hist_p->top;
			}
		}
	}

	/* Phew.  Now we can write the output into a file */
    nrow = 1;
    ncol = nc;
    outfh = fits_open(outfile, "w", &nrow, &ncol);
    fits_put_data(outfh, 0, 0, out_data, ncol);
	fits_close(outfh);

	/* all done. */
}


/*********************************************************************
 * Create a "hist" struct with the given "bottom" and "top" for its
 * range.  
 * 
 * return a pointer to the new structure,
 *        or NULL if there's an error.
 */

static struct hist *
make_hist(bottom, top)
int16 bottom, top;
{
   uint16 ndat;
   struct hist *new;

   if ((new = (struct hist *) malloc(sizeof(struct hist))) == NULL) {
      return(NULL);
   }
   ndat = (top - bottom) + 1;

   /* use "calloc" to make sure values are initialized to zero */
   if ((new->data = (uint16 *) calloc((size_t) ndat, sizeof(uint16))) == NULL) {
      free(new);
      return(NULL);
   }
   new->bottom = bottom;
   new->top = top;
   new->ndat = ndat;
   new->underflow = 0;
   new->overflow = 0;

   return(new);
}
   

/*********************************************************************
 * Free a "hist" structure and its array.
 */

static void
del_hist(hist)
struct hist *hist;
{
   free(hist->data);
   free(hist);
}
  




/*********************************************************************
 * Given an array of "ndat" values, find the median and return it.
 * We assume that the data is distributed in a "bottom-heavy"
 * fashion, which is typical for astronomical images.
 *
 * Return 0 if we can't figure out a median value.
 */

static int16
find_median(ndat, array)
int ndat;
int16 *array;
{
   int16 median;
   uint16 *p, min, val;
   static int first = 0;
   int i, sum, half, flag;

#ifdef DEBUG
   printf("entering find_median, n = %d", ndat);
   for (i = 0; i < ndat; i++) {
      if (i % 12 == 0)
         printf("\n");
      printf(" %5u", array[i]);
   }
   printf("\n");
#endif

   /* initialize the median_array if this is the first time through */
   if (first == 0) {
      first = 1;
      for (i = 0; i < MEDARRAY_SIZE; i++) {
         median_array[i] = 0;
      }
   }

   /* if we need to, allocate space for the 'pointer' array */
   flag = 0;
   if (ndat >= MEDARRAY_SIZE) {
      flag = 1;
      if ((p = (uint16 *) malloc(ndat*sizeof(uint16))) == NULL) {
         error(0, "find_median: can't allocate for array");
         return(0);
      }
   }
   else {
      p = pointer_array;
   }

   /* first, we set 'median_array[j]' equal to the number of values
      equal to 'j + 32768' in the given array */
   min = 65535;
   for (i = 0; i < ndat; i++) {
      if ((val = array[i] + 32768) < min) {
         min = val;
      }
      median_array[val]++;
      p[i] = val;
   }

   /* now, starting at the minimum value, walk up through the
      'median_array', summing all elements, until we get up to
      half of 'ndat'.  That value will be the median! */
   sum = 0;

   half = ndat/2+1;
   for (i = min; sum < half; i++) {
      sum += median_array[i];
   }
   median = (i-1) - 32768;

   /* now, set all the elements of the 'median_array' which we've
      touched back to zero, so we can use them mmmediately in the
      next call to this function. */
   for (i = 0; i < ndat; i++) {
      median_array[p[i]] = 0;
   }

   if (flag == 1) {
      free(p);
   }

#ifdef DEBUG
   printf("   median value is %5u\n", median);
#endif

   return(median);
}



/**********************************************************************
 * Given an image, try to choose appropriate values for
 * histogram-arrays by picking a number of points at random
 * and examining their distribution.
 * 
 * Sort the chosen values, and choose those which appear at 
 * the bot_frac (bottom) and top_frac (top) percentiles.
 *
 * Return 0 if all goes well
 *        1 if there's an error.
 */

#define NPTS    100          /* pick this many points in image at random */

static int
find_range(fh, nr, nc, bot_frac, top_frac, bottom, top)
FITS_HANDLE fh;
int nr, nc;
float bot_frac, top_frac;
int16 *bottom, *top;
{
   int i, row, col;
   int16 data[NPTS];

   data[0] = 0;
   i = data[NPTS-1];
   srand(i);
   for (i = 0; i < NPTS; i++) {
      row = (int)(nr*scramble());
      col = (int)(nc*scramble());	
      fits_get_data(fh, row, col, &(data[i]), 1);
   }

   sort(data);

   *bottom = data[(int) (NPTS*bot_frac)];
   *top = data[(int) (NPTS*top_frac)];
   return(0);
}


	/* sort the data values */

static void
sort(data)
int16 data[];
{
	typedef int (*PFI)();

	qsort((char *) &(data[0]), NPTS, sizeof(int16), (PFI) compar);
}

	/* return 1 if the first value is greater, -1 if the second, 0 if
	   they are equal */

static int
compar(a, b)
int16 *a, *b;
{
	if (*a > *b) 
		return(1);
	else if (*a < *b) 
		return(-1);
	else
		return(0);
}


static double 
scramble()		/* returns 0 to 1.0 */
{
	double val;

	val= (double) rand();
	return(val / (double)RAND_MAX);
}
