SQL functions for logistic regression. More...
Functions | |
float8 [] | __logregr_cg_step_transition (float8[], boolean, float8[], float8[]) |
float8 [] | __logregr_irls_step_transition (float8[], boolean, float8[], float8[]) |
float8 [] | __logregr_igd_step_transition (float8[], boolean, float8[], float8[]) |
float8 [] | __logregr_cg_step_merge_states (float8[] state1, float8[] state2) |
float8 [] | __logregr_irls_step_merge_states (float8[] state1, float8[] state2) |
float8 [] | __logregr_igd_step_merge_states (float8[] state1, float8[] state2) |
float8 [] | __logregr_cg_step_final (float8[] state) |
float8 [] | __logregr_irls_step_final (float8[] state) |
float8 [] | __logregr_igd_step_final (float8[] state) |
aggregate float8 [] | __logregr_cg_step (boolean y, float8[] x, float8[] previous_state) |
aggregate float8 [] | __logregr_irls_step (boolean y, float8[] x, float8[] previous_state) |
aggregate float8 [] | __logregr_igd_step (boolean y, float8[] x, float8[] previous_state) |
float8 | __logregr_cg_step_distance (float8[] state1, float8[] state2) |
__logregr_result | __logregr_cg_result (float8[] state) |
float8 | __logregr_irls_step_distance (float8[] state1, float8[] state2) |
__logregr_result | __logregr_irls_result (float8[] state) |
float8 | __logregr_igd_step_distance (float8[] state1, float8[] state2) |
__logregr_result | __logregr_igd_result (float8[] state) |
void | logregr_train (varchar source_table, varchar out_table, varchar dependent_varname, varchar independent_varname, varchar grouping_cols, integer max_iter, varchar optimizer, float8 tolerance, boolean verbose) |
Compute logistic-regression coefficients and diagnostic statistics. More... | |
void | logregr_train (varchar source_table, varchar out_table, varchar dependent_varname, varchar independent_varname) |
void | logregr_train (varchar source_table, varchar out_table, varchar dependent_varname, varchar independent_varname, varchar grouping_cols) |
void | logregr_train (varchar source_table, varchar out_table, varchar dependent_varname, varchar independent_varname, varchar grouping_cols, integer max_iter) |
void | logregr_train (varchar source_table, varchar out_table, varchar dependent_varname, varchar independent_varname, varchar grouping_cols, integer max_iter, varchar optimizer) |
void | logregr_train (varchar source_table, varchar out_table, varchar dependent_varname, varchar independent_varname, varchar grouping_cols, integer max_iter, varchar optimizer, float8 tolerance) |
text | logregr_train (text message) |
text | logregr_train () |
float8 | logistic (float8 x) |
Evaluate the usual logistic function in an under-/overflow-safe way. More... | |
boolean | logregr_predict (float8[] coef, float8[] col_ind_var) |
Predict the boolean value of a dependent variable for a specific independent variable value in a logistic regression model. More... | |
text | logregr_predict (text message) |
text | logregr_predict () |
float8 | logregr_predict_prob (float8[] coef, float8[] col_ind_var) |
Compute the probability of the boolean dependent variable being True for a specific independent variable iin a logistic regression model. More... | |
text | logregr_predict_prob (text message) |
text | logregr_predict_prob () |
__logregr_result __logregr_cg_result | ( | float8 [] | state | ) |
aggregate float8 [] __logregr_cg_step | ( | boolean | y, |
float8 [] | x, | ||
float8 [] | previous_state | ||
) |
float8 __logregr_cg_step_distance | ( | float8 [] | state1, |
float8 [] | state2 | ||
) |
float8 [] __logregr_cg_step_final | ( | float8 [] | state | ) |
float8 [] __logregr_cg_step_merge_states | ( | float8 [] | state1, |
float8 [] | state2 | ||
) |
float8 [] __logregr_cg_step_transition | ( | float8 | [], |
boolean | , | ||
float8 | [], | ||
float8 | [] | ||
) |
__logregr_result __logregr_igd_result | ( | float8 [] | state | ) |
aggregate float8 [] __logregr_igd_step | ( | boolean | y, |
float8 [] | x, | ||
float8 [] | previous_state | ||
) |
float8 __logregr_igd_step_distance | ( | float8 [] | state1, |
float8 [] | state2 | ||
) |
float8 [] __logregr_igd_step_final | ( | float8 [] | state | ) |
float8 [] __logregr_igd_step_merge_states | ( | float8 [] | state1, |
float8 [] | state2 | ||
) |
float8 [] __logregr_igd_step_transition | ( | float8 | [], |
boolean | , | ||
float8 | [], | ||
float8 | [] | ||
) |
__logregr_result __logregr_irls_result | ( | float8 [] | state | ) |
aggregate float8 [] __logregr_irls_step | ( | boolean | y, |
float8 [] | x, | ||
float8 [] | previous_state | ||
) |
float8 __logregr_irls_step_distance | ( | float8 [] | state1, |
float8 [] | state2 | ||
) |
float8 [] __logregr_irls_step_final | ( | float8 [] | state | ) |
float8 [] __logregr_irls_step_merge_states | ( | float8 [] | state1, |
float8 [] | state2 | ||
) |
float8 [] __logregr_irls_step_transition | ( | float8 | [], |
boolean | , | ||
float8 | [], | ||
float8 | [] | ||
) |
float8 logistic | ( | float8 | x | ) |
x |
Evaluating this expression directly can lead to under- or overflows. This function performs the evaluation in a safe manner, making use of the following observations:
In order for the outcome of \( \exp(x) \) to be within the range of the minimum positive double-precision number (i.e., \( 2^{-1074} \)) and the maximum positive double-precision number (i.e., \( (1 + (1 - 2^{52})) * 2^{1023}) \), \( x \) has to be within the natural logarithm of these numbers, so roughly in between -744 and 709. However, \( 1 + \exp(x) \) will just evaluate to 1 if \( \exp(x) \) is less than the machine epsilon (i.e., \( 2^{-52} \)) or, equivalently, if \( x \) is less than the natural logarithm of that; i.e., in any case if \( x \) is less than -37. Note that taking the reciprocal of the largest double-precision number will not cause an underflow. Hence, no further checks are necessary.
boolean logregr_predict | ( | float8 [] | coef, |
float8 [] | col_ind_var | ||
) |
coef | Coefficients obtained by running logistic regression. |
col_ind | Independent variable array |
This function computes the dot product of the independent variables and the coefficients. This requires the length of the two vectors to be the same.
text logregr_predict | ( | text | message | ) |
text logregr_predict | ( | ) |
float8 logregr_predict_prob | ( | float8 [] | coef, |
float8 [] | col_ind_var | ||
) |
coef | Coefficients obtained by running logistic regression. |
col_ind | Independent variable array |
This function computes the dot product of the independent variables and the coefficients, hence requires the length of the two vectors to be the same.
text logregr_predict_prob | ( | text | message | ) |
text logregr_predict_prob | ( | ) |
void logregr_train | ( | varchar | source_table, |
varchar | out_table, | ||
varchar | dependent_varname, | ||
varchar | independent_varname, | ||
varchar | grouping_cols, | ||
integer | max_iter, | ||
varchar | optimizer, | ||
float8 | tolerance, | ||
boolean | verbose | ||
) |
To include an intercept in the model, set one coordinate in the independentVariables
array to 1.
source_table | Name of the source relation containing the training data |
out_table | Name of the output relation to store the model results Columns of the output relation are as follows: - <tt>coef FLOAT8[]</tt> - Array of coefficients, \form#79 - <tt>log_likelihood FLOAT8</tt> - Log-likelihood \form#80 - <tt>std_err FLOAT8[]</tt> - Array of standard errors,\( \mathit{se}(c_1), \dots, \mathit{se}(c_k) \)
|
dependent_varname | Name of the dependent column (of type BOOLEAN) |
independent_varname | Name of the independent column (of type DOUBLE PRECISION[]) |
grouping_col | Comma delimited list of column names to group-by |
max_iter | The maximum number of iterations |
optimizer | The optimizer to use (either 'irls' /'newton' for iteratively reweighted least squares or 'cg' for conjugent gradient) |
tolerance | The difference between log-likelihood values in successive iterations that should indicate convergence. This value should be non-negative and a zero value here disables the convergence criterion, and execution will only stop after maxNumIterations iterations. |
verbose | If true, any error or warning message will be printed to the console (irrespective of the 'client_min_messages' set by server). If false, no error/warning message is printed to console. |
SELECT logregr_train('sourceName', 'outName' 'dependentVariable', 'independentVariables'); SELECT * from outName;
SELECT coef from outName;
SELECT coef, log_likelihood, p_values FROM outName;
void logregr_train | ( | varchar | source_table, |
varchar | out_table, | ||
varchar | dependent_varname, | ||
varchar | independent_varname | ||
) |
void logregr_train | ( | varchar | source_table, |
varchar | out_table, | ||
varchar | dependent_varname, | ||
varchar | independent_varname, | ||
varchar | grouping_cols | ||
) |
void logregr_train | ( | varchar | source_table, |
varchar | out_table, | ||
varchar | dependent_varname, | ||
varchar | independent_varname, | ||
varchar | grouping_cols, | ||
integer | max_iter | ||
) |
void logregr_train | ( | varchar | source_table, |
varchar | out_table, | ||
varchar | dependent_varname, | ||
varchar | independent_varname, | ||
varchar | grouping_cols, | ||
integer | max_iter, | ||
varchar | optimizer | ||
) |
void logregr_train | ( | varchar | source_table, |
varchar | out_table, | ||
varchar | dependent_varname, | ||
varchar | independent_varname, | ||
varchar | grouping_cols, | ||
integer | max_iter, | ||
varchar | optimizer, | ||
float8 | tolerance | ||
) |
text logregr_train | ( | text | message | ) |
text logregr_train | ( | ) |