Computing linear regression in one pass
The RunningRegression
class is the analog of the RunningStats
class described here and uses that class. You add pairs of (x, y) values by using the Push
. At any point along the way you can call the Slope
, Intercept
, or Correlation
functions to see the current value of these statistics.
You can also combine two RunningRegression
objects by using the +
and +=
operators. For example, you might accrue data on several different threads in parallel then add their RunningRegression
objects together.
Here is the header file RunningRegression.h
:
#ifndef RUNNINGREGRESSION #define RUNNINGREGRESSION #include "RunningStats.h" class RunningRegression { public: RunningRegression(); void Clear(); void Push(double x, double y); long long NumDataValues() const; double Slope() const; double Intercept() const; double Correlation() const; friend RunningRegression operator+( const RunningRegression a, const RunningRegression b); RunningRegression& operator+=(const RunningRegression &rhs); private: RunningStats x_stats; RunningStats y_stats; double S_xy; long long n; }; #endif
Here is the implementation file RunningRegression.cpp
.
#include "RunningRegression.h" RunningRegression::RunningRegression() { Clear(); } void RunningRegression::Clear() { x_stats.Clear(); y_stats.Clear(); S_xy = 0.0; n = 0; } void RunningRegression::Push(double x, double y) { S_xy += (x_stats.Mean() -x)*(y_stats.Mean() - y)*double(n)/double(n+1); x_stats.Push(x); y_stats.Push(y); n++; } long long RunningRegression::NumDataValues() const { return n; } double RunningRegression::Slope() const { double S_xx = x_stats.Variance()*(n - 1.0); return S_xy / S_xx; } double RunningRegression::Intercept() const { return y_stats.Mean() - Slope()*x_stats.Mean(); } double RunningRegression::Correlation() const { double t = x_stats.StandardDeviation() * y_stats.StandardDeviation(); return S_xy / ( (n-1) * t ); } RunningRegression operator+(const RunningRegression a, const RunningRegression b) { RunningRegression combined; combined.x_stats = a.x_stats + b.x_stats; combined.y_stats = a.y_stats + b.y_stats; combined.n = a.n + b.n; double delta_x = b.x_stats.Mean() - a.x_stats.Mean(); double delta_y = b.y_stats.Mean() - a.y_stats.Mean(); combined.S_xy = a.S_xy + b.S_xy + double(a.n*b.n)*delta_x*delta_y/double(combined.n); return combined; } RunningRegression& RunningRegression::operator+=(const RunningRegression &rhs) { RunningRegression combined = *this + rhs; *this = combined; return *this; }
This code depends on RunningStats.h
and RunningStats.cpp
which are available here.