#!/usr/bin/perl -w

# correlator.pl
# 4 March 2005
#
# Copyright 2005 by John R. Ackermann  N8UR (jra@febo.com)
# Licensed under the GPL version 2 or later; see the file COPYING
# included with this distribution.  I request, but do not require, that
# any modifications that correct bugs or errors, or increase the program's
# functionality, be sent via email to the author at the address above.

use strict;
use POSIX qw(tmpnam);
use IO::File;
use Getopt::Std;
use Statistics::OLS;
use n8ur qw(round lower_case);

###############################################################################
# local paths
###############################################################################

my $grace = "/usr/bin/grace";
my $work_dir = "./";
my $phase_template = "./phase.template";
my $sigma_template = "./sigma.template";

###############################################################################

my $opt;
my $data_file;
my $out_file;
my $df;
my $do;
my $html_file;
my $phase_file;
my $sigma_file;
my $i;
my $j;
my $reading;
my $x;
my $y1;
my $y2;
my $y3;
my @xarray;
my @y1array;
my @y2array;
my @y3array;
my @diff1array;
my @diff2array;
my @diff3array;
my @column;
my $y_axis_label;
my $days;
my $seconds;
my $total_offset;
my $linear_offset;
my $sigma_1;
my $sigma_2;
my $sigma_3;
my $sample_size;
my $ls;
my $intercept;
my $slope;
my $R_squared;
my $xmin;
my $xmax;
my $ymin;
my $ymax;
my $range;
my $avY;
my $avX;
my $scale;
my @xsigma;
my @ysigma;
my $avar;

#----------
# usage and option initialization
my $opt_string = 'hn:o:f:';
sub usage() {
print STDERR << "EOF";

usage: $0 [-h] [-n top|center|bottom] -o output filename -f filename

-h	: this (help) message

-n	: normalize phase data, setting top|center|bottom as zero

-o	: output file name

-f	: data file name; if "-" read from STDIN

EOF
}

#----------------------
getopts( "$opt_string", \my %opt ) or usage() and exit;
usage() and exit if $opt{h};
usage() and exit if !$opt{f};
usage() and exit if !$opt{o};

my $normalize = 0;
if ($opt{n}) {
	$normalize = lower_case($opt{n});
	}

if ($opt{f}) {
	$data_file = $opt{f};
	}
else {
	die "No data file specified!\n";
	}

if ($opt{o}) {
	$out_file = $opt{o};
	}
else {
	die "No output file specified!\n";
	}

###############################################################################
# main program
###############################################################################

# Add data points
$df = new IO::File $data_file, "r";
$do = new IO::File $out_file, "w";
if (defined $df) {
	while ($reading = <$df>) {
		if (substr($reading,0,1) =~ "[0-9]") {
			($x,$y1,$y2,$y3) = split(/\s/,$reading);
			push(@xarray,$x);
			push(@y1array,$y1);
			push(@y2array,$y2);
			push(@y3array,$y3);
		}
	}
} else {
	exit;
	}

for ($i=0;$i<3;$i++) {
	if ($i == 0) {@column = @y1array};
	if ($i == 1) {@column = @y2array};
	if ($i == 2) {@column = @y3array};
	# set up linear regression	
	$ls = Statistics::OLS->new(); 
	$ls->setData (\@xarray, \@column);
	$ls->regress();
	($intercept, $slope) = $ls->coefficients();
	$sample_size = $ls->size();
	($xmin, $xmax, $ymin, $ymax) = $ls->minMax();
	($avX, $avY) = $ls->av();

	# For some reason, the returned slope is per day, not per sample.
	# This could potentially break with differently formatted data.
	$linear_offset = $slope/86400;

	# normalize y values
	if ($normalize eq "top") {
		for ($j=0; $j<$sample_size; $j++) {
			$column[$j] -= $ymax;
		}
	}	
	if ($normalize eq "center") {
		for ($j=0; $j<$sample_size; $j++) {
			$column[$j] -= $ymin + ($range/2);
		}
	}
	if ($normalize eq "bottom") {
		for ($j=0; $j<$sample_size; $j++) {
			$column[$j] -= $ymin;
		}
	}
	if ($i == 0) {@y1array = @column};
	if ($i == 1) {@y2array = @column};
	if ($i == 2) {@y3array = @column};
}

for ($j=0; $j<$sample_size; $j++) {
	$diff1array[$j] = $y1array[$j] - $y2array[$j];
	$diff2array[$j] = $y1array[$j] - $y3array[$j];
	$diff3array[$j] = $y2array[$j] - $y3array[$j];
}
# set up linear regression
$ls = Statistics::OLS->new();
$ls->setData (\@xarray, \@diff1array);
$ls->regress();
printf "Sigma for A-B: %3.3e\n",$ls->sigma(),;

$ls = Statistics::OLS->new();
$ls->setData (\@xarray, \@diff2array);
$ls->regress();
printf "Sigma for A-C: %3.3e\n",$ls->sigma(),;

$ls = Statistics::OLS->new();
$ls->setData (\@xarray, \@diff3array);
$ls->regress();
printf "Sigma for B-C: %3.3e\n",$ls->sigma(),;



for ($j=0; $j<$sample_size; $j++) {
	printf $do "%s %+6.5e %+6.5e %+6.5e\n",
		$xarray[$j],$diff1array[$j],$diff2array[$j],$diff3array[$j];
}
exit 0;

