//
// Built by: Peter A Noble PhD Emails: panoble2017@gmail.com pnoble@upcontracter.up.com
// October, 2018

#include <fstream>
#include <string>
#include <iostream>
#include <math.h>
#include <cstdlib>
#include <float.h>
#include <complex>
#include <iomanip>

// g++ determine_regress_pytorch.cpp -o determine_regress
// ./determine_regress train_model_y.out.txt train_actual_y.out.txt regress_out.txt results.txt
// ./determine_regress valid_model_y.out.txt valid_actual_y.out.txt regress_out.txt  results2.txt
// ./determine_regress actual_model_y.out.txt actual_actual_y.out.txt regress_out.txt  results3.txt

// ./determine_regress test_model_y.out.txt test_actual_y.out.txt regress_out.txt  results4.txt

// determines the slope, R2 and 95% confidence and prediction intervals
// based on predict_actual data.  Note columns have to be predict_Y and actual_Y

using namespace std;

//int rows=1000000000;
int columns=2;
double double_array[10][1000000];
/*
double** double_array = new double*[columns];
for (int s = 0; s < columns; s++)		
	{
	double_array[s] = new double[rows];
	for (int t = 0; t < rows; t++)
		{
		double_array[s][t] = 0.0;
		}		
	}
*/
/// ******************************************************************///
/// ******************************************************************///

double correl(int r);						// DETERMINES THE CORREL OF ARRAY
double slope(int r, double means1, double means2);
double b(int r, double m);
double means2(int r);
double means1(int r);
//double RMSE(int r);
//double SE(int r);
void y_resid(int r, double m, double b1);
void CI(int r, double SSresd, double SSx, double means1);
void PI(int r, double SSresd, double SSx, double means1);

/// ******************************************************************///
/// ******************************************************************///
void PI(int r,double SSresd, double SSx, double means1)
{
//
// This subroutine calculates the residuals for y
double sum=0.0;
double answer=0.0;
int y=0;
double t=1.96;
double temp=0;

for (int y=0;y<(r);y++) 	
	{
	//double_array[5][y]=double_array[2][y]-(t*SSresd(r)*sqrt((double(double(1)/15))+double((double_array[1][y]-means1(r)*double_array[1][y]-means1(r)))/SSx(r)));
	double_array[7][y]=double_array[2][y]-((t*SSresd*sqrt(1+(double(double(1)/15))+double((double_array[1][y]-means1)*(double_array[1][y]-means1))/SSx)));
	double_array[8][y]=double_array[2][y]+((t*SSresd*sqrt(1+(double(double(1)/15))+double((double_array[1][y]-means1)*(double_array[1][y]-means1))/SSx)));
	}
cout << "PI's calculated\n";

return;
}
/// ******************************************************************///
/// ******************************************************************///

/// ******************************************************************///
/// ******************************************************************///
void CI(int r,double SSresd, double SSx, double means1)
{
//
// This subroutine calculates the residuals for y
double sum=0.0;
double answer=0.0;
int y=0;
double t=1.96;
double temp=0;

for (int y=0;y<(r);y++) 	
	{
	//double_array[5][y]=double_array[2][y]-(t*SSresd(r)*sqrt((double(double(1)/15))+double((double_array[1][y]-means1(r)*double_array[1][y]-means1(r)))/SSx(r)));
	double_array[5][y]=double_array[2][y]-((t*SSresd*sqrt((double(double(1)/15))+double((double_array[1][y]-means1)*(double_array[1][y]-means1))/SSx)));
	double_array[6][y]=double_array[2][y]+((t*SSresd*sqrt((double(double(1)/15))+double((double_array[1][y]-means1)*(double_array[1][y]-means1))/SSx)));
	}
cout << "CI's calculated\n";
return;
}
/// ******************************************************************///
/// ******************************************************************///

/// ******************************************************************///
/// ******************************************************************///
void y_resid(int r, double m, double b1)
{
//
// This subroutine calculates the residuals for y

int y=0;

for (int y=0;y<(r);y++) 	
	{
	double_array[2][y]=((double_array[1][y]*m)+b1);
	//cout << double_array[2][y] << "\n";
	}
for (int y=0;y<(r);y++) 	
	{
	double_array[3][y]=((double_array[0][y]-double_array[2][y]))*((double_array[0][y]-double_array[2][y]));
	//cout << double_array[3][y] << "\n";
	}
cout << "y_resid's calculated\n";
return ;
}
/// ******************************************************************///
/// ******************************************************************///

double b(int r, double m)
{
//
// This subroutine calculates the intercept b
double meanx=0.0,meanx1=0,xhat=0.0,yhat=0.0;
double intercept=0.0;

int y=0;

for (int y=0;y<(r);y++) 	
	{
	yhat=double_array[0][y]+yhat;
	xhat=double_array[1][y]+xhat;
	}
	yhat=double(double(yhat)/(r));
	xhat=double(double(xhat)/(r));
	
intercept=yhat-((m*xhat));

return intercept;
}
/// ******************************************************************///
/// ******************************************************************///
double RMSE(int r)
{
//
// This subroutine calculates Root Mean Squared Error
double diff=0.0;
double sqrt_diff=0.0;
int y=0;
for (y=0;y<(r);y++) 	
	{
	diff=sqrt((double_array[0][y]-double_array[1][y])*(double_array[0][y]-double_array[1][y]))+diff;
	}
//sqrt_diff=sqrt();	
return double(double(diff)/r);
}
/// ******************************************************************///
/// ******************************************************************///
double correl(int r)
{
//
// This subroutine calculates the Pearson correlation between two
// columns of numbers

double y1=0.0,y2=0.0,sumsqy1=0.0,sumsqy2=0.0,Sumxy=0.0,SSY1=0.0,
	SSY2=0.0,SP=0.0;int y=0,i=r;

for (y=0;y<(r);y++) 	
	{
	y1=double_array[0][y]+y1;
	sumsqy1=(double_array[0][y]*double_array[0][y])+sumsqy1;
	y2=double_array[1][y]+y2;
	sumsqy2=(double_array[1][y]*double_array[1][y])+sumsqy2;
	Sumxy=Sumxy+(double_array[0][y]*double_array[1][y]);
//	cout << B[y] << "\t" << A[c][y] << "\n";
	}
SSY1=sumsqy1-((y1*y1)/double(i));
SSY2=sumsqy2-((y2*y2)/double(i));
SP=Sumxy-((y1*y2)/double(i));
//cout << "\t" << SP/sqrt(SSY1*SSY2) <<"\n" << flush; 
//exit(1);
return SP/sqrt(SSY1*SSY2);
}
/// ******************************************************************///
double slope(int r, double means1, double means2)
{
//
// This slope subroutine is used to calculate the slope of the 
// normalized data
//
double meanx=0.0,meanx1=0,xhat=0.0,xy=0.0,sumxy=0.0,xsquare=0.0,sumxsquare=0.0;
double sumx=0.0,sumy=0.0;
meanx=means2;
meanx1=means1;
//cout << "c= " << c << " r=" << r <<  "\t" << "meanx1= "<< meanx1 <<  "\n" << flush;
//exit(1);
for (int y=0;y<(r);y++) 	
	{
	xhat=double(double_array[0][y])-meanx;
	xhat=double(double_array[1][y])-meanx1;
	sumy=double_array[0][y]+sumy; 
	sumx=double_array[1][y]+sumx;
	xy=(double_array[1][y]*double_array[0][y])+xy;
	xsquare=(xhat)*(xhat);
	sumxsquare=sumxsquare+xsquare; 
	}
sumxy=(xy-double((sumx*sumy)/double(r)));
return double(sumxy)/double(sumxsquare);;
}
//// ******************************************************************///
/// ******************************************************************///

int main (int argc, char * const argv[]) {

ifstream in(argv[1]); 
ifstream in2(argv[2]); 
ofstream out(argv[3]); 
ofstream out2(argv[4]); 

int number=0;
int count=0;
int s=0,i=0;
double r2=0.0;
double actual=0.0;
double SSresd=0.0;
int y=0;
double sum=0.0;
double SSx=0.0;
double means2=0.0;
double means1=0.0;
double m=0.0;
double b1=0.0;
double double_num=0.0;
double double_num2=0.0;
double max=21080;
double min=-21080;

// read infile
while(!in.eof())
	{
	in >> double_num; double_array[0][s]=double_num * (max-min) + min;
	in2 >> double_num2; double_array[1][s]=double_num2 * (max-min) + min;
	s=s+1;
	}
//cout << s << "\n" << flush; exit(1);
// calculated meanY
sum=0;
i=0;
for (int y=0;y<(s);y++) 	
	{
	sum=double_array[0][y]+sum;i=i+1;
	}
means2=double(double(sum))/double(i);

// calculated meanX
sum=0;
i=0;
for (int y=0;y<(s);y++) 	
	{
	sum=double_array[1][y]+sum;i=i+1;
	}
means1=double(double(sum))/double(i);

// determine basic components
m=slope(s,means1,means2);
b1=b(s,m);
r2=correl(s)*correl(s);
y_resid(s,m,b1); //load array[2][y] and load array[3][y]

// determine SSr -residuals
sum=0.0;
for (int y=0;y<(s);y++) 	
	{
	sum=double_array[3][y]+sum;
	}
	SSresd=sqrt(double(double(sum)/(s-2)));	

// determine x-meanx
sum=0.0;
for (int y=0;y<(s);y++) 	
	{
	double_array[4][y]=(double_array[1][y]-means1)*(double_array[1][y]-means1);
	}
	
// determine SSx 
for (int y=0;y<(s);y++) 	
	{
	SSx=(double_array[4][y])+SSx;
	}

// determine confidence intervals
CI(s, SSresd, SSx, means1);

// determine prediction intervals
PI(s, SSresd, SSx, means1);

// print to the screen
cout << "R2\t" << "Slope\t" << "Intercept\t"<< "MAE\t"<< "Count\n";
cout << r2 << "\t" << m << "\t" << b1 << "\t" << RMSE(s)<<"\t" << s << "\n";
out << "R2\t" << "Slope\t" << "Intercept\t"<< "MAE\t"<< "Count\n";
out << r2 << "\t" << m << "\t" << b1 << "\t" << RMSE(s)<<"\t" << s << "\n";

// print to the outfile

out2 << "Y\t" << "X\t" << "LCI\t" << "UCI\t" << "LPI\t" << "UPI\n";
for (int y=0;y<(s);y++) 	
	{
	out2 << double_array[0][y] << "\t" << double_array[1][y]<< "\t";	
	out2 << double_array[5][y] << "\t" << double_array[6][y]<< "\t";	
	out2 << double_array[7][y] << "\t" << double_array[8][y]<< "\n";	
	}
	
return 0;
}
