2.1.0
User Documentation for Apache MADlib
linear.sql_in File Reference

SQL functions for linear regression. More...

Functions

void linregr_train (varchar source_table, varchar out_table, varchar dependent_varname, varchar independent_varname, varchar grouping_cols, boolean heteroskedasticity_option)
 Linear regression training function with grouping support. More...
 
void linregr_train (varchar source_table, varchar out_table, varchar dependent_varname, varchar independent_varname, varchar grouping_cols)
 
void linregr_train (varchar source_table, varchar out_table, varchar dependent_varname, varchar independent_varname)
 
varchar linregr_train ()
 
varchar linregr_train (varchar message)
 
bytea8 linregr_transition (bytea8 state, float8 y, float8[] x)
 
bytea8 linregr_merge_states (bytea8 state1, bytea8 state2)
 
linregr_result linregr_final (bytea8 state)
 
bytea8 hetero_linregr_transition (bytea8 state, float8 y, float8[] x, float8[] coef)
 
bytea8 hetero_linregr_merge_states (bytea8 state1, bytea8 state2)
 
heteroskedasticity_test_result hetero_linregr_final (bytea8 state)
 
aggregate bytea8 linregr (float8 dependentVariable, float8[] independentVariables)
 Compute linear regression coefficients and diagnostic statistics. More...
 
aggregate bytea8 heteroskedasticity_test_linregr (float8 dependentVariable, float8[] independentVariables, float8[] olsCoefficients)
 Compute studentized Breuch-Pagan heteroskedasticity test for linear regression. More...
 
float8 linregr_predict (float8[] coef, float8[] col_ind_var)
 Predict the boolean value of a dependent variable for a specific independent variable value in a linear regression model. More...
 
text linregr_predict (text message)
 
text linregr_predict ()
 

Detailed Description

Date
January 2011
See also
For a brief introduction to linear regression, see the module description Linear Regression.

Function Documentation

◆ hetero_linregr_final()

heteroskedasticity_test_result hetero_linregr_final ( bytea8  state)

◆ hetero_linregr_merge_states()

bytea8 hetero_linregr_merge_states ( bytea8  state1,
bytea8  state2 
)

◆ hetero_linregr_transition()

bytea8 hetero_linregr_transition ( bytea8  state,
float8  y,
float8 []  x,
float8 []  coef 
)

◆ heteroskedasticity_test_linregr()

aggregate bytea8 heteroskedasticity_test_linregr ( float8  dependentVariable,
float8 []  independentVariables,
float8 []  olsCoefficients 
)
Parameters
dependentVariableColumn containing the dependent variable
independentVariablesColumn containing the array of independent variables
olsCoefficientsColumn containing the array of the OLS coefficients (as obtained by linregr)
To include an intercept in the model, set one coordinate in the independentVariables array to 1.
Returns
A composite value:
  • test_statistic FLOAT8[] - Prob > test_statistc
  • p_value FLOAT8[] - Prob > test_statistc
Usage
 SELECT (heteoskedasticity_test_linregr(dependentVariable,
 independentVariables, coef)).*
 FROM (
   SELECT linregr(dependentVariable, independentVariables).coef
 ) AS ols_coef, sourceName as src;

◆ linregr()

aggregate bytea8 linregr ( float8  dependentVariable,
float8 []  independentVariables 
)
Parameters
dependentVariableColumn containing the dependent variable
independentVariablesColumn containing the array of independent variables
To include an intercept in the model, set one coordinate in the independentVariables array to 1.
Returns
A composite value:
  • coef FLOAT8[] - Array of coefficients, \( \boldsymbol c \)
  • r2 FLOAT8 - Coefficient of determination, \( R^2 \)
  • std_err FLOAT8[] - Array of standard errors, \( \mathit{se}(c_1), \dots, \mathit{se}(c_k) \)
  • t_stats FLOAT8[] - Array of t-statistics, \( \boldsymbol t \)
  • p_values FLOAT8[] - Array of p-values, \( \boldsymbol p \)
  • condition_no FLOAT8 - The condition number of matrix \( X^T X \).
Usage
  • Get vector of coefficients \( \boldsymbol c \) and all diagnostic statistics:
    SELECT (linregr(dependentVariable,
            independentVariables)).*
    FROM sourceName;
  • Get vector of coefficients \( \boldsymbol c \):
    SELECT (linregr(dependentVariable,
            independentVariables)).coef
    FROM sourceName;
  • Get a subset of the output columns, e.g., only the array of coefficients \( \boldsymbol c \), the coefficient of determination \( R^2 \), and the array of p-values \( \boldsymbol p \):
    SELECT (lr).coef, (lr).r2, (lr).p_values
    FROM (
        SELECT linregr( dependentVariable,
                        independentVariables) AS lr
        FROM sourceName
    ) AS subq;

◆ linregr_final()

linregr_result linregr_final ( bytea8  state)

◆ linregr_merge_states()

bytea8 linregr_merge_states ( bytea8  state1,
bytea8  state2 
)

◆ linregr_predict() [1/3]

float8 linregr_predict ( float8 []  coef,
float8 []  col_ind_var 
)
Parameters
coefCoefficients obtained by running linear regression.
col_indIndependent variable array
Returns
DOUBLE PRECISION Predicted value

This function computes the dot product of the independent variables and the coefficients. This requires the length of the two vectors to be the same.

◆ linregr_predict() [2/3]

text linregr_predict ( text  message)

◆ linregr_predict() [3/3]

text linregr_predict ( )

◆ linregr_train() [1/5]

void linregr_train ( varchar  source_table,
varchar  out_table,
varchar  dependent_varname,
varchar  independent_varname,
varchar  grouping_cols,
boolean  heteroskedasticity_option 
)

◆ linregr_train() [2/5]

void linregr_train ( varchar  source_table,
varchar  out_table,
varchar  dependent_varname,
varchar  independent_varname,
varchar  grouping_cols 
)

◆ linregr_train() [3/5]

void linregr_train ( varchar  source_table,
varchar  out_table,
varchar  dependent_varname,
varchar  independent_varname 
)

◆ linregr_train() [4/5]

varchar linregr_train ( )

◆ linregr_train() [5/5]

varchar linregr_train ( varchar  message)

◆ linregr_transition()

bytea8 linregr_transition ( bytea8  state,
float8  y,
float8 []  x 
)