LinearRegression.java
/**
* The {@code LinearRegression} class performs a simple linear regression
* on an set of <em>n</em> data points (<em>y<sub>i</sub></em>, <em>x<sub>i</sub></em>).
* That is, it fits a straight line <em>y</em> = α + β <em>x</em>,
* (where <em>y</em> is the response variable, <em>x</em> is the predictor variable,
* α is the <em>y-intercept</em>, and β is the <em>slope</em>)
* that minimizes the sum of squared residuals of the linear regression model.
* It also computes associated statistics, including the coefficient of
* determination <em>R</em><sup>2</sup> and the standard deviation of the
* estimates for the slope and <em>y</em>-intercept.
*
* @author Robert Sedgewick
* @author Kevin Wayne
*/
public class LinearRegression {
private final double intercept, slope;
private final double r2;
private final double svar0, svar1;
/**
* Performs a linear regression on the data points {@code (y[i], x[i])}.
*
* @param x the values of the predictor variable
* @param y the corresponding values of the response variable
* @throws IllegalArgumentException if the lengths of the two arrays are not equal
*/
public LinearRegression(double[] x, double[] y) {
if (x.length != y.length) {
throw new IllegalArgumentException("array lengths are not equal");
}
int n = x.length;
// first pass
double sumx = 0.0, sumy = 0.0, sumx2 = 0.0;
for (int i = 0; i < n; i++) {
sumx += x[i];
sumx2 += x[i]*x[i];
sumy += y[i];
}
double xbar = sumx / n;
double ybar = sumy / n;
// second pass: compute summary statistics
double xxbar = 0.0, yybar = 0.0, xybar = 0.0;
for (int i = 0; i < n; i++) {
xxbar += (x[i] - xbar) * (x[i] - xbar);
yybar += (y[i] - ybar) * (y[i] - ybar);
xybar += (x[i] - xbar) * (y[i] - ybar);
}
slope = xybar / xxbar;
intercept = ybar - slope * xbar;
// more statistical analysis
double rss = 0.0; // residual sum of squares
double ssr = 0.0; // regression sum of squares
for (int i = 0; i < n; i++) {
double fit = slope*x[i] + intercept;
rss += (fit - y[i]) * (fit - y[i]);
ssr += (fit - ybar) * (fit - ybar);
}
int degreesOfFreedom = n-2;
r2 = ssr / yybar;
double svar = rss / degreesOfFreedom;
svar1 = svar / xxbar;
svar0 = svar/n + xbar*xbar*svar1;
}
/**
* Returns the <em>y</em>-intercept α of the best of the best-fit line <em>y</em> = α + β <em>x</em>.
*
* @return the <em>y</em>-intercept α of the best-fit line <em>y = α + β x</em>
*/
public double intercept() {
return intercept;
}
/**
* Returns the slope β of the best of the best-fit line <em>y</em> = α + β <em>x</em>.
*
* @return the slope β of the best-fit line <em>y</em> = α + β <em>x</em>
*/
public double slope() {
return slope;
}
/**
* Returns the coefficient of determination <em>R</em><sup>2</sup>.
*
* @return the coefficient of determination <em>R</em><sup>2</sup>,
* which is a real number between 0 and 1
*/
public double R2() {
return r2;
}
/**
* Returns the standard error of the estimate for the intercept.
*
* @return the standard error of the estimate for the intercept
*/
public double interceptStdErr() {
return Math.sqrt(svar0);
}
/**
* Returns the standard error of the estimate for the slope.
*
* @return the standard error of the estimate for the slope
*/
public double slopeStdErr() {
return Math.sqrt(svar1);
}
/**
* Returns the expected response {@code y} given the value of the predictor
* variable {@code x}.
*
* @param x the value of the predictor variable
* @return the expected response {@code y} given the value of the predictor
* variable {@code x}
*/
public double predict(double x) {
return slope*x + intercept;
}
/**
* Returns a string representation of the simple linear regression model.
*
* @return a string representation of the simple linear regression model,
* including the best-fit line and the coefficient of determination
* <em>R</em><sup>2</sup>
*/
public String toString() {
StringBuilder s = new StringBuilder();
s.append(String.format("%.2f n + %.2f", slope(), intercept()));
s.append(" (R^2 = " + String.format("%.3f", R2()) + ")");
return s.toString();
}
}
Main.java
public class Main {
public static void main(String[] args) {
double[] xValues = {1572004656619D, 1572004677789D, 1572004686013D,
1572004693209D, 1572004698606D, 1572004707601D,
1572004726618D};
double[] yValues = {51, 51, 52,
53, 54, 55,
55};
LinearRegression linearRegression = new LinearRegression(xValues, yValues);
System.out.println("Slope: " + linearRegression.slope());
System.out.println("Intercept: " + linearRegression.intercept());
System.out.println("R2: " + linearRegression.R2());
for(int i = 0; i < yValues.length; i++){
System.out.println("time: " + xValues[i] +
" value: " + yValues[i] +
" trend: " + linearRegression.predict(yValues[i]));
}
}
}
Slope: 7.131721781891837E-5
Intercept: -1.1211094805673766E8
R2: 0.8408101880333925
time: 1.572004656619E12 value: 51.0 trend: -1.1211094805310048E8
time: 1.572004677789E12 value: 51.0 trend: -1.1211094805310048E8
time: 1.572004686013E12 value: 52.0 trend: -1.1211094805302916E8
time: 1.572004693209E12 value: 53.0 trend: -1.1211094805295785E8
time: 1.572004698606E12 value: 54.0 trend: -1.1211094805288653E8
time: 1.572004707601E12 value: 55.0 trend: -1.1211094805281521E8
time: 1.572004726618E12 value: 55.0 trend: -1.1211094805281521E8