
/**************************************************************************
 *                                                                        *
 * Copyright (c) 2001 Michael Richmond and Richard Treffers               *
 *                                                                        *
 *                    This software may be copied and distributed for     *
 *                    educational, research and not for profit services   *
 *                    provided that this copyright and statement are      *
 *                    included in all such copies.                        *
 *                                                                        *
 **************************************************************************/


/**************************************************************************
	Looks for "bad" regions in an image -- areas in which sets of
	connected pixels have values very different from the mean.
	The output is a simple ASCII list of bad regions.

		FINDBAD source  [gridsize=] [minsig=] [minpix=] [outfile=] [help]

	BOX       operates over box if specified
	GRIDSIZE  size of grid squares into which we divide image
	MINSIG    bad pixels must be this many (local) stdev from the 
	                             (local) clipped mean
	MINPIX    bad regions must contain at least this many connected pixels
	OUTFILE   send output to the given file, rather than stdout
	HELP      print command-line options


*/

#include "pcvista.h"
#include "fits.h"
#include <stdio.h>
#include <string.h>
#include <math.h>
#include <unistd.h>
#include <stdlib.h>


#undef DEBUG


/*
 * Ordinary pixels are "GOOD".
 * A pixel which deviates from the local mean is "BAD".
 */
#define GOOD   0
#define BAD    1

/* 
 * by default, we divide image into squares of this size in order
 * to calculate local means and stdev
 */
#define GRIDSIZE     300

/*
 * by default, we mark as "bad" pixels which are this many stdev
 * away from the local mean
 */
#define MINSIG        10.0

/*
 * by default, a "bad" region must contain at least this many connected
 * pixels marked as "bad"
 */
#define MINPIX        30


/*
 * When we calculate the mean and stdev in each grid square,
 * we use this many iterations of sigma-clipping.
 */
#define CLIP_ITER      2
#define CLIP_SIGMA     3


/*
 * We use this structure in a linked list to create connected regions
 * of bad pixels.
 */
typedef struct pixel_struct {
	int row;
	int col;
	int region_id;
	struct pixel_struct *next;
} PIXEL_STRUCT;

/*
 * We use this structure to contain all the information about one particular
 * bad region.
 */
typedef struct region_struct {
	int region_id;
	int is_valid;
	int num_pixels;
	int min_row;
	int max_row;
	int min_col;
	int max_col;
	PIXEL_STRUCT *pixels;
	struct region_struct *next;
} REGION_STRUCT;





int main(int, char **);
static char **create_map(int nrow, int ncol);
static void free_map(char **map, int nrow, int ncol);
static void print_map(char **map, int nrow, int ncol);
static int divide_and_mark(FITS_HANDLE fh, int nr, int nc,
	                        int gridsize, double minsig, char **pixel_map);
static int find_connected_regions(char **pixel_map, int nr, int nc,
		                            REGION_STRUCT **out_list);
static void grow_region(char **pixel_map, int nrow, int ncol, 
		                  REGION_STRUCT *reg, int row, int col);
static REGION_STRUCT * new_region(void);
static PIXEL_STRUCT * new_pixel(int region_id, int row, int col);
static void free_region_list(REGION_STRUCT *head);
static void free_pixel_list(PIXEL_STRUCT *head);
static void add_pix_to_reg(REGION_STRUCT *reg, PIXEL_STRUCT *pix);
static void print_regions(FILE *fp, REGION_STRUCT *reg_list);
static void check_regions(REGION_STRUCT *head, int minpix);


char *usage = "usage: mask file [gridsize=] [minsig=] [minpix=] [outfile=] [help]";

int 
main(argc, argv)
int argc;
char *argv[];
{
	FITS_HANDLE fh;
	int i, nrow, ncol, nr, nc;
	int gridsize = GRIDSIZE;
	int minpix = MINPIX;
	double minsig = MINSIG;
	char **pixel_map = NULL;
	char *gotit, fname[NBUF];
	REGION_STRUCT *region_head = NULL;
	FILE *outfp = stdout;

	if (argc == 1) {
		error(1, usage);
	}
	if (strcmp(argv[1], "help") == 0) {
		printf("%s\n", usage);
		exit(0);
	}

	strcpy(fname, argv[1]);
	fh = fits_open(fname, "r", &nrow, &ncol);
	nr = nrow;
	nc = ncol;

	for (i = 2; i < argc; i++) {
		if (keyword("help", argv[i])) {
			printf("%s\n", usage);
			exit(0);
		}
		if ((gotit = find("gridsize", argv[i])) != NULL) {
			gridsize = (int) (evaluate(gotit) + 0.5);
			continue;
		}
		if ((gotit = find("minsig", argv[i])) != NULL) {
			minsig = evaluate(gotit);
			continue;
		}
		if ((gotit = find("minpix", argv[i])) != NULL) {
			minpix = (int) (evaluate(gotit) + 0.5);
			continue;
		}
		if ((gotit = find("outfile", argv[i])) != NULL) {
			if ((outfp = fopen(gotit, "w")) == NULL) {
				error(1, "mask: can't open output file");
			}
			continue;
		}
		fprintf(stderr, "mask: unknown argument '%s'\n", argv[i]);
		exit(1);
	}

	/* 
	 * Step 1: create a 2-D array, a "map"  one element per pixel,
	 *         which we'll use to mark pixels as "good" or "bad".
	 */
	pixel_map = create_map(nr, nc);
	print_map(pixel_map, nr, nc);


	/*
	 * Step 2: divide entire image into grid squares; within
	 *         each grid square, calculate local mean and stdev.
	 *         Mark any pixel more than MINSIG deviations from the 
	 *         local mean as "bad".
	 */
	divide_and_mark(fh, nr, nc, gridsize, minsig, pixel_map);
	print_map(pixel_map, nr, nc);


	/* 
	 * Step 3: once all the pixels in the entire image have been
	 *         marked as "good" or "bad", we look for regions
	 *         of connected pixels.  The results go into a linked
	 *         list of REGION_STRUCTs to which 'region_head' points.
	 */
	if (find_connected_regions(pixel_map, nr, nc, &region_head) != 0) {
		error(1, "mask: find_connected_regions fails ");
	}

	/* 
	 * Step 4: Scrutinize each set of connected pixels, and mark as invalid
	 *         those which don't qualify.  Find the bounding box
	 *         for those which do qualify.
	 */
	check_regions(region_head, minpix);

	/*
	 * Step 5: print the output array of bad regions.
	 */
	print_regions(outfp, region_head);


	free_region_list(region_head);
	free_map(pixel_map, nr, nc);
	fits_close(fh);
	if (outfp != stdout) {
		fclose(outfp);
	}

	exit(0);
}


/****************************************************************************
 * PROCEDURE: create_map
 *
 * DESCRIPTION: create a 2-D "map", an array with one element per pixel.
 *              There is one element per pixel.  All elements start out
 *              with value "GOOD".  For each pixel which deviates from
 *              the local mean, we'll mark the corresponding element of
 *              the map to "BAD".
 *
 * RETURNS:
 *       pointer to the allocated map
 */

static char **
create_map
	(
	int nrow,            /* I: number of rows in map */
	int ncol             /* I: number of columns in map */
	)
{
	int i, j;
	char **new;

	if ((new = (char **) malloc(nrow*sizeof(char *))) == NULL) {
		error(1, "create_map: can't malloc for pointers to rows");
	}
	for (i = 0; i < nrow; i++) {
		if ((new[i] = (char *) malloc(ncol*sizeof(char))) == NULL) {
			error(1, "create_map: can't malloc for a row");
		}
	}

	/* set all elements to "GOOD" */
	for (i = 0; i < nrow; i++) {
		for (j = 0; j < ncol; j++) {
			new[i][j] = GOOD;
		}
	}

	return(new);
}


/****************************************************************************
 * PROCEDURE: free_map
 *
 * DESCRIPTION: de-allocate the memory used to create the "map"
 *
 * RETURNS:
 *    nothing
 */

static void
free_map
	(
	char **map,                 /* I/O: the map array */
	int nrow,                   /* I: number of rows in map */
	int ncol                    /* I: number of cols in map */
	)
{
	int i;

	for (i = 0; i < nrow; i++) {
		free(map[i]);
	}
	free(map);
}


/****************************************************************************
 * PROCEDURE: print_map
 *
 * DESCRIPTION: print the map, using '.' for GOOD and 'x' for BAD.
 *              Used only in debugging.
 *
 * RETURNS:
 *    nothing
 */


static void
print_map
	(
	char **map,                 /* I/O: the map array */
	int nrow,                   /* I: number of rows in map */
	int ncol                    /* I: number of cols in map */
	)
{
#ifdef DEBUG2
	int i, j;


	for (i = 0; i < nrow; i++) {
		for (j = 0; j < nrow; j++) {
			if (map[i][j] == GOOD) {
				putchar('.');
			} else {
				putchar('x');
			}
		}
		putchar('\n');
	}

#endif
}



/****************************************************************************
 * PROCEDURE: divide_and_mark
 *
 * DESCRIPTION: divide the entire image into a grid of squares with the 
 *              given size.  Within each square, 
 *
 *                 a) calculate a clipped mean and stdev
 *                 b) mark as BAD in the map all pixels which fall more than 
 *                            'minsig' stdevs from the mean
 *
 * RETURNS:
 *    0          if all goes well
 *    1          if there's a problem
 */

static int
divide_and_mark
	(
	FITS_HANDLE fh,           /* I: file containing image data */
	int nr,                   /* I: image has this many rows */
	int nc,                   /* I: image has this many cols */
	int gridsize,             /* I: size of squares into which we */
	                          /*      break up the entire image */
	double minsig,            /* I: mark as bad pixels which deviate */
	                          /*      from local mean by this many stdev */
	char **pixel_map          /* O: mark pixels in this array */
	)
{
	int row_sq, col_sq;
	int row, col;
	int iter;
	int num_row_squares, num_col_squares;
	int row_start, row_end, col_start, col_end;
	int16 data[NMAX];
	int16 clip_min, clip_max;
	double val, sum, pixnum, mean, sumsq, diff, stdev;

	num_row_squares = floor(nr/gridsize) + 1;
	num_col_squares = floor(nc/gridsize) + 1;
	for (row_sq = 0; row_sq < num_row_squares; row_sq++) {

		/* 
		 * row_start and row_end are row coordinates within the 
		 * entire image; they run from 0 to the number of rows
		 * in the entire images.
		 */
		row_start = row_sq*gridsize;
		if ((row_end = (row_sq + 1)*gridsize) > nr) {
			row_end = nr;
		}

		for (col_sq = 0; col_sq < num_col_squares; col_sq++) {

			/* 
			 * col_start, col_end are col coordinates with
			 * the entire image, too.
			 */
			col_start = col_sq*gridsize;
			if ((col_end = (col_sq + 1)*gridsize) > nc) {
				col_end = nc;
			}

#ifdef DEBUG
			printf("  grid %4d %4d   at rows %5d %5d  cols %5d %5d \n",
								 	row_sq, col_sq, row_start, row_end,
									col_start, col_end);
#endif
#ifdef DEBUG
			print_map(pixel_map, nr, nc);
#endif

			/* calculate clipped mean, stdev in this square */
			clip_min = (int16) MIN_DATA_VAL;
			clip_max = (int16) MAX_DATA_VAL;
			for (iter = 0; iter < CLIP_ITER; iter++) {
#ifdef DEBUG
				printf("   iter %3d  clip %6d %6d \n", 
									 	iter, clip_min, clip_max);
#endif
				sum = 0.0;
				sumsq = 0.0;
				pixnum = 0;
				for (row = row_start; row < row_end; row++) {
					fits_get_data(fh, row, col_start, data, col_end - col_start);
					for (col = 0; col < col_end - col_start; col++) {
						val = (double) data[col];
						if ((val > clip_min) && (val < clip_max)) {
							sum += val;		
							sumsq += (val*val);
							pixnum += 1.0;
						}
					}
				}

				/* find the mean and stdev for this iteration ... */
				if (pixnum == 0.0) {
					error(1, "mask: no pixels in grid square?");
				}
				mean = sum/pixnum;
				diff = sumsq/pixnum - mean*mean;
				if (diff >= 0.0)
					stdev = sqrt(diff);
				else {
					error(0, "mask: error in std. dev, using 1.0");	
					stdev = 1.0;
				}
#ifdef DEBUG
				printf("    mean %10.2f  stdev %10.2f \n", mean, stdev);
#endif

				/* and set the min, max clip values for next iteration */
				if (mean - CLIP_SIGMA*stdev > MIN_DATA_VAL) {
					clip_min = (int16) (mean - CLIP_SIGMA*stdev);
				}
				if (mean + CLIP_SIGMA*stdev < MAX_DATA_VAL) {
					clip_max = (int16) (mean + CLIP_SIGMA*stdev);
				}
			}

			/* now mark bad pixels in this square on the map */
			for (row = row_start; row < row_end; row++) {
				fits_get_data(fh, row, col_start, data, col_end - col_start);
				for (col = 0; col < col_end - col_start; col++) {
					if ((data[col] < clip_min) || (data[col] > clip_max)) {
							  pixel_map[row][col_start + col] = BAD;
					}
				}
			}

		}
	}

	return(0);
}



/****************************************************************************
 * PROCEDURE: find_connected_regions
 *
 * DESCRIPTION: Given a map of pixels, all marked as GOOD or BAD,
 *              we look for regions of connected BAD pixels.
 *              'Connected' means touching any adjacent side
 *              (left, right, up, down), or corner (up/left, up/right,
 *              down/left, down/right).
 *
 *              As soon as we find one BAD pixels, we use a recursive 
 *              subroutine to fill out the region around it.
 *
 *              The result of this routine is a linked list of
 *              REGION_STRUCTs, to which 'region_head' will point.
 *
 * RETURNS:
 *     0           if all goes well
 *     1           if there's an error
 */

static int
find_connected_regions
	(
	char **pixel_map,          /* I/O: look for bad pixels in here, */
	                           /*         we change all to GOOD by the end */
	int nr,                    /* I: number of rows in map */
	int nc,                    /* I: number of cols in map */
	REGION_STRUCT **out_list   /* O: we will make this point to a linked */
	                           /*         list of regions on output */
	)
{
	int row, col;
	PIXEL_STRUCT *pix;
	REGION_STRUCT *head = NULL;
	REGION_STRUCT *tail = NULL;
	REGION_STRUCT *reg;

	for (row = 0; row < nr; row++) {
		for (col = 0; col < nc; col++) {
			if (pixel_map[row][col] == BAD) {

#ifdef DEBUG2
				printf(" new region starts at %5d %5d \n", row, col);
#endif
				/* 
				 * first, we change this pixel to GOOD so that we won't 
				 * count it twice in a region.
				 */
				pixel_map[row][col] = GOOD;

				/*
				 * now, create a new REGION, place it at the tail of the 
				 * list of REGIONs, and add this pixel as the head
				 * of its linked list of pixels 
				 */
				reg = new_region();
				if (head == NULL) {
					head = reg;
				}
				if (tail == NULL) {
					tail = reg;
				} else {
					tail->next = reg;
					tail = reg;
				}
				pix = new_pixel(reg->region_id, row, col);
				add_pix_to_reg(reg, pix);

				/*
				 * now, we are ready to call a recursive routine which will
				 * fill out all the pixels which form a connected region
				 * around this first one.  We don't need to check previous
				 * rows, or previous cols on this row, so instead of calling 
				 * it 8 times (for the 8 adjacent pixels), we call it 
				 * only 4 times.
				 */
				grow_region(pixel_map, nr, nc, reg, row, col + 1);
				grow_region(pixel_map, nr, nc, reg, row + 1, col - 1);
				grow_region(pixel_map, nr, nc, reg, row + 1, col);
				grow_region(pixel_map, nr, nc, reg, row + 1, col + 1);

			}

		}
	}

	*out_list = head;

	return(0);
}



/***************************************************************************
 * PROCEDURE: grow_region
 *
 * DESCRIPTION: given a region which already exists, and a (row, col)
 *              position, we
 *
 *                  a) check to see if this position is GOOD. 
 *                     If so, we quit and return immediately.
 *
 *              OK, this position is BAD.  Now we
 *
 *                  b) mark this as GOOD, so we won't count it again
 *                  c) add this position to the region
 *                  d) call this function recursively on neighboring pixels
 *
 * RETURNS
 *    nothing
 */

static void
grow_region
	(
	char **pixel_map,               /* I: map of pixel values, GOOD or BAD */
	int nrow,                       /* I: number of rows in the pixel map */
	int ncol,                       /* I: number of columns in the pixel map */
	REGION_STRUCT *region,          /* I/O: try to grow this region */
	int row,                        /* I: starting at this row ... */
	int col                         /* I: ... and this column */
	)
{
	PIXEL_STRUCT *pix;


	/* if we're out of bounds, return immediately */
	if ((row < 0) || (row >= nrow)) {
		return;
	}
	if ((col < 0) || (col >= ncol)) {
		return;
	}

	/* if the pixel isn't BAD, return immediately */
	if (pixel_map[row][col] == GOOD) {
		return;
	}

	/* make sure we don't count this pixel again */
	pixel_map[row][col] = GOOD;

	/* add this pixel to the region */
	pix = new_pixel(region->region_id, row, col);
	add_pix_to_reg(region, pix);

	/* and now try to grow out in all 8 adjacent directions */
	grow_region(pixel_map, nrow, ncol, region, row - 1, col - 1);
	grow_region(pixel_map, nrow, ncol, region, row - 1, col);
	grow_region(pixel_map, nrow, ncol, region, row - 1, col + 1);
	grow_region(pixel_map, nrow, ncol, region, row, col - 1);
	grow_region(pixel_map, nrow, ncol, region, row, col + 1);
	grow_region(pixel_map, nrow, ncol, region, row + 1, col - 1);
	grow_region(pixel_map, nrow, ncol, region, row + 1, col);
	grow_region(pixel_map, nrow, ncol, region, row + 1, col + 1);

}


/****************************************************************************
 * PROCEDURE: new_region
 *
 * DESCRIPTION: create a new REGION_STRUCT, and fill it with 
 *              reasonable initial values.
 *
 * RETURNS:
 *    pointer to new REGION struct
 *    (terminates with error message if it fails to malloc)
 */

static REGION_STRUCT *
new_region
	(
	 void
	)
{
	static int id;
	REGION_STRUCT *new_region;

	if ((new_region = (REGION_STRUCT *) malloc(sizeof(REGION_STRUCT))) == NULL) {
		error(1, "mask: malloc fails in new_region");
	}
	new_region->region_id = id++;
	new_region->is_valid = 1;
	new_region->num_pixels = 0;
	new_region->min_row = -1;
	new_region->max_row = -1;
	new_region->min_col = -1;
	new_region->max_col = -1;
	new_region->pixels = NULL;
	new_region->next = NULL;
	
	return(new_region);
}


/****************************************************************************
 * PROCEDURE: new_pixel
 *
 * DESCRIPTION: create a new PIXEL_STRUCT, and fill it with 
 *              reasonable initial values.
 *
 * RETURNS:
 *    pointer to new PIXEL struct
 *    (terminates with error message if it fails to malloc)
 */

static PIXEL_STRUCT *
new_pixel
	(
	int region_id,        /* I: region to which this pixel belongs */
	int row,              /* I: row of this pixel */
	int col               /* I: column for this pixel */
	)
{
	PIXEL_STRUCT *new_pixel;

	if ((new_pixel = (PIXEL_STRUCT *) malloc(sizeof(PIXEL_STRUCT))) == NULL) {
		error(1, "mask: malloc fails in new_pixel");
	}
	new_pixel->region_id = region_id;
	new_pixel->row = row;
	new_pixel->col = col;
	new_pixel->next = NULL;
	
	return(new_pixel);
}



/****************************************************************************
 * PROCEDURE: free_region_list
 *
 * DESCRIPTION: de-allocate the memory for a linked list of REGION_STRUCTs.
 *
 * RETURNS:
 *    nothing
 */

static void
free_region_list
	(
	REGION_STRUCT *head            /* I: head of the linked list */
	)
{
	REGION_STRUCT *reg, *next;

	reg = head;
	while (reg != NULL) {
		next = reg->next;
		free_pixel_list(reg->pixels);
		free(reg);
		reg = next;
	}
}




/****************************************************************************
 * PROCEDURE: free_pixel_list
 *
 * DESCRIPTION: de-allocate the memory for a linked list of PIXEL_STRUCTs.
 *
 * RETURNS:
 *    nothing
 */

static void
free_pixel_list
	(
	PIXEL_STRUCT *head    /* I: head of the linked list */
	)
{
	PIXEL_STRUCT *pix, *next;

	pix = head;
	while (pix != NULL) {
		next = pix->next;
		free(pix);
		pix = next;
	}
}



/****************************************************************************
 * PROCEDURE: add_pix_to_reg
 *
 * DESCRIPTION: add the given pixel to the tail end of the linked list
 *              of pixels in the given region
 *
 * RETURNS:
 *    nothing
 */

static void
add_pix_to_reg
	(
	REGION_STRUCT *reg,        /* I: region to which we add pixel */
	PIXEL_STRUCT *pix          /* I: place this at end of region's list */
	)
{
	PIXEL_STRUCT *tail;

	if (reg->pixels == NULL) {
		reg->pixels = pix;
	}
	else {
		tail = reg->pixels;
		while (tail->next != NULL) {
			tail = tail->next;
		}
		tail->next = pix;
	}
	reg->num_pixels++;
}


/***************************************************************************
 * PROCEDURE: check_regions
 *
 * DESCRIPTION: Look at each REGION_STRUCT in the given list.  Check to see
 *              that the number of pixels in the linked list matches the
 *              count in 'num_pixels.'  If the number of pixels is too 
 *              small, set the 'is_valid' flag to zero.
 *              Set the bounding-box values.
 *
 * RETURNS:
 *   nothing
 */

static void
check_regions
	(
	REGION_STRUCT *head,        /* I/O: head of list of REGIONs to check */
	int minpix                  /* I: mark as invalid regions with fewer */
	                            /*        than this many pixels */
	)
{
	int pixel_count;
	PIXEL_STRUCT *pix;
   REGION_STRUCT *reg;

	for (reg = head; reg != NULL; reg = reg->next) {

		/* sanity check */
		pixel_count = 0;
		for (pix = reg->pixels; pix != NULL; pix = pix->next) {
			pixel_count++;
		}
		if (pixel_count != reg->num_pixels) {
			error(0, "mask: check_regions fails pixel count");
		}

		/* does this region have enough pixels to keep? */
		if (reg->num_pixels < minpix) {
			/* nope */
			reg->is_valid = 0;
		}

		/* set the bounding box */
		if (reg->num_pixels > 0) {
			pix = reg->pixels;
			reg->min_row = pix->row;
			reg->max_row = pix->row;
			reg->min_col = pix->col;
			reg->max_col = pix->col;
			while (pix->next != NULL) {
				pix = pix->next;
				if (pix->row < reg->min_row) {
					reg->min_row = pix->row;
				}
				if (pix->row > reg->max_row) {
					reg->max_row = pix->row;
				}
				if (pix->col < reg->min_col) {
					reg->min_col = pix->col;
				}
				if (pix->col > reg->max_col) {
					reg->max_col = pix->col;
				}
			}
		}

	}
}


/***************************************************************************
 * PROCEDURE: print_regions
 *
 * DESCRIPTION: print out information on each REGION_STRUCT in the given
 *              list.  Used only in debugging, probably.
 *
 * RETURNS:
 *    nothing
 */

static void
print_regions
	(
	FILE *fp,                      /* I: send output here */
	REGION_STRUCT *reg_list        /* I: linked list of regions to print */
	)
{
	PIXEL_STRUCT *pix;
	REGION_STRUCT *reg;

	reg = reg_list;
	while (reg != NULL) {

		if (reg->is_valid == 1) {
			fprintf(fp, " %6d %6d   %5d %5d  %5d %5d  \n", 
					reg->region_id, reg->num_pixels, 
					reg->min_row, reg->max_row, reg->min_col, reg->max_col);
			pix = reg->pixels;
#if 0
			while (pix != NULL) {
				fprintf(fp, "  %4d,%4d  ", pix->row, pix->col);
				pix = pix->next;
			}
			fprintf (fp, "\n");
#endif
		}

		reg = reg->next;
	}
}
 

