#!/usr/bin/perl
# Compute in a very simple manner the bias incurred by
#    errors in a parallax measurement.
#
# MWR 5/9/2017
#
# This version simulates stars in a large volume and 
#    iterates to find those which have measurements
#    within a certain range of parallaxes.  Compares their
#    true parallax (and dist) with measured parallax (and dist)
#
# MWR 5/9/2017

use POSIX;

$debug = 0;
my $mypi = 3.14159265;

srand();


# usage: calc_bias.pl      apparent_pi  sigma_pi  num_star
$USAGE = "calc_bias.pl  measured_pi  sigma_pi  num_star ";

if ($#ARGV != 2) {
  printf "usage: %s \n", $USAGE;
  exit(1);
}

$measured_pi = $ARGV[0];
$sigma_pi = $ARGV[1];
$num_star = $ARGV[2];

if ($debug > 0) {
  printf "  measured_pi    %10.2f \n", $measured_pi;
  printf "  sigma_pi       %10.2f \n", $sigma_pi   ;
  printf "  num_star       %10d \n", $num_star;
}


# now comes the more complicated part.  Here, we do a detailed
#    statistical modeling
#
#      - given that we want to DETECT num_star stars,
#           each of which has a parallax value between
#
#                 (measured_pi - sigma_pi)  to  (measured_pi + sigma_pi)
#
#      - enter loop 
#           - generate random uniform values (x, y, z) for location
#           - compute true distance from origin
#           - compute true parallax true_p
#           - generate random uncertainty sigma_p
#           - compute apparent angle apparent_p = (true_p + sigma_p)
#           - if apparent_p lies in the desired range
#                   add this star to our array of "observed" star
#
#      - in the end, we can print/plot distribution of true distances
# 
my $p = $measured_pi;
my $sigma = $sigma_pi;
my $min_p = $p - $sigma;
my $max_p = $p + $sigma;

# set reasonable limits for a box inside which we generate stars
#    box size in pc
my $dist = 1000.0 / $measured_pi;
my $boxsize = 8.0 * 2.0 * $dist;
my $halfsize = $boxsize / 2.0;

my $num_found = 0;
my $max_iter = 30000000;


  for (my $i = 0; $i < $max_iter; $i++) {

    # generate location of star
    $x = rand($boxsize) - $halfsize;
    $y = rand($boxsize) - $halfsize;
    $z = rand($boxsize) - $halfsize;
    if ($debug > 0) {
      printf " iter %8d  %6.0f %6.0f %6.0f \n", $i, $x, $y, $z;
    }
    
    my $true_d = sqrt($x*$x + $y*$y + $z*$z);
    # this is true parallax in mas
    my $true_p = 1000.0 / $true_d;

    # now generate a random error in the measured parallax
    my $error_p = gaussian_rand()*$sigma;

    # this is the measured parallax
    my $p = $true_p + $error_p;

    # if this doesn't fall into the range, then skip to next trial
    if (($p < $min_p) || ($p > $max_p)) {

      # skip it
      if ($debug > 0) {
        printf "i %9d  true_p %6.1f  p %6.1f  fails %6.1f - %6.1f \n",
                   $i, $true_p, $p, $min_p, $max_p;
      }
      next;
    }
    else {

      my $d = 1000.0 / $p;

      # now accumulate statistics
      $true_p_array[$num_found] = $true_p;
      $true_d_array[$num_found] = $true_d;
      $meas_p_array[$num_found] = $p;
      $meas_d_array[$num_found] = $d;

      $num_found++;

      if ($debug > 0) {
        printf "i %9d  true_p %6.1f  p %6.1f  okay  %6.1f - %6.1f  found %5d \n",
                   $i, $true_p, $p, $min_p, $max_p, $num_found;
      }

      # can we stop now?
      if ($num_found >= $num_star) {
        last;
      }
    }


  }


# print out the table 
if ($debug >= 0) {
  for (my $i = 0; ($i < $num_star) && ($i < $num_found); $i++) {
    printf "  star %5d  true_p %8.2f  true_d %9.3f  meas_p %8.2f  meas_d %9.3f \n", 
             $i, $true_p_array[$i], $true_d_array[$i],
             $meas_p_array[$i], $meas_d_array[$i]; 
  }
}


# compute interquartile mean of all true distances
(my $mean_true_d, my $sigma_true_d) = 
       compute_interquartiles($num_star, @true_d_array);
printf "#  true dist: iq mean %8.2f  iq range %8.2f \n",
          $mean_true_d, $sigma_true_d;



exit 0;


######################################################################
# PROCEDURE: gaussian_rand
#
# DESCRIPTION: Returns a random number drawn from a gaussian (normal)
#              distribution with mean 0.0 and standard deviation 1.0.
#
#              Stolen from 
#
#                 http://www.unix.com.ua/orelly/perl/cookbook/ch02_11.htm
#
#
sub gaussian_rand {
    my ($u1, $u2);  # uniformly distributed random numbers
    my $w;          # variance, then a weight
    my ($g1, $g2);  # gaussian-distributed numbers

    do {
        $u1 = 2 * rand() - 1;
        $u2 = 2 * rand() - 1;
        $w = $u1*$u1 + $u2*$u2;
    } while ( $w >= 1 );

    $w = sqrt( (-2 * log($w))  / $w );
    $g2 = $u1 * $w;
    $g1 = $u2 * $w;
    # could return both if wanted
    #   return wantarray ? ($g1, $g2) : $g1;
    # but I need just one
    return($g1);
}




##################################################################
# PROCEDURE: compute_interquartiles
#
# DESCRIPTION: Given an array of numbers, find the interquartile
#              mean and HALF the interquartile range.  
#
# RETURN:      (inter_quartile_mean, half_inter_quartile_range)
#
#
sub compute_interquartiles {

  my $i, $num, $index25, $index50, $index75;
  my $sum, $sum_terms;
  my $iqm, $iqrange;
  my @array, @sorted_array;

  $iqm = 0.0;
  $iqrange = -1.0;

  $num = $_[0];
  for (my $i = 0; $i < $num; $i++) {
    $array[$i] = $_[1 + $i];
  }

  @sorted_array = sort { $a <=> $b } @array;

  if ($num < 4) {
    printf "compute_interquartiles: warning: num %d is small! \n", $num;
  }
  $index25 = floor($num/4.0);
  $index50 = floor($num/2.0);
  $index75 = floor(3.0*$num/4.0);
  if ($debug > 1) {
    printf "   num %4d  index25 %3d  index50 %3d  index75 %3d \n",
          $num, $index25, $index50, $index75;
  }

  # compute interquartile mean
  $sum_terms = 0;
  $sum = 0;
  for (my $i = $index25; $i <= $index75; $i++) {
    if ($debug > 1) {
      printf "  i %5d  sorted_array %7.3f  \n", $i, $sorted_array[$i];
    }
    $sum += $sorted_array[$i];
    $sum_terms++;
  }
  if ($sum_terms > 0) {
    $iqm = $sum / $sum_terms;
    $iqrange = ($sorted_array[$index75] - $sorted_array[$index25]) / 2.0;
  }
  else {
    printf "compute_interquartiles: sum_terms == 0 ?? bogus results \n";
  }

  if ($debug > 1) {
    printf "  iqm %7.3f  iqrange %7.3f \n", $iqm, $iqrange;
  }
  
  return($iqm, $iqrange);
}
  

