SQL functions for multinomial logistic regression. More...

Functions
float8 []	__mlogregr_irls_step_transition (float8[] state, integer y, integer num_categories, integer ref_category, float8[] x, float8[] prev_state)

float8 []	__mlogregr_irls_step_merge_states (float8[] state1, float8[] state2)

float8 []	__mlogregr_irls_step_final (float8[] state)

aggregate float8 []	__mlogregr_irls_step (integer y, integer numcategories, integer ref_category, float8[] x, float8[] previous_state)

float8	__internal_mlogregr_irls_step_distance (float8[] state1, float8[] state2)

mlogregr_result	__internal_mlogregr_irls_result (float8[] state)

mlogregr_summary_result	__internal_mlogregr_summary_results (float8[] state)

void	mlogregr_train (varchar source_table, varchar output_table, varchar dependent_varname, varchar independent_varname, integer ref_category, varchar optimizer_params)
	Compute multinomial logistic regression coefficients. More...

void	mlogregr_train (varchar source_table, varchar output_table, varchar dependent_varname, varchar independent_varname, integer ref_category)

void	mlogregr_train (varchar source_table, varchar output_table, varchar dependent_varname, varchar independent_varname)

varchar	mlogregr_train (varchar message)

varchar	mlogregr_train ()

integer	__compute_mlogregr (varchar source_table, varchar dependent_varname, varchar independent_varname, integer num_categories, integer max_iter, varchar optimizer, float8 precision, integer ref_category)

mlogregr_result	mlogregr (varchar source, varchar depvar, varchar indepvar, integer max_num_iterations=20, varchar optimizer="irls", float8 precision=0.0001, integer ref_category)
	Compute logistic-regression coefficients and diagnostic statistics. More...

mlogregr_result	mlogregr (varchar source, varchar depvar, varchar indepvar)

mlogregr_result	mlogregr (varchar source, varchar depvar, varchar indepvar, integer max_num_iterations)

mlogregr_result	mlogregr (varchar source, varchar depvar, varchar indepvar, integer max_num_iterations, varchar optimizer)

set< __mlogregr_cat_coef >	__mlogregr_format (float8[] coef, integer num_feature, integer num_category, integer ref_category)

float8 []	__mlogregr_predict_prob (float8[] coef, integer ref_category, float8[] col_ind_var)

integer	__mlogregr_predict_response (float8[] coef, integer ref_category, float8[] col_ind_var)

void	mlogregr_predict (text model, text source, text id_col_name, text output, text pred_type)

void	mlogregr_predict (text model, text source, text id_col_name, text output)

text	mlogregr_predict (text message)

Detailed Description

Date: July 2012

See also: For a brief introduction to multinomial logistic regression, see the module description Multinomial Logistic Regression.

Function Documentation

◆ __compute_mlogregr()

integer __compute_mlogregr	(	varchar	source_table,
		varchar	dependent_varname,
		varchar	independent_varname,
		integer	num_categories,
		integer	max_iter,
		varchar	optimizer,
		float8	precision,
		integer	ref_category
	)

◆ __internal_mlogregr_irls_result()

mlogregr_result __internal_mlogregr_irls_result ( float8 [] state )

◆ __internal_mlogregr_irls_step_distance()

float8 __internal_mlogregr_irls_step_distance	(	float8 []	state1,
		float8 []	state2
	)

◆ __internal_mlogregr_summary_results()

mlogregr_summary_result __internal_mlogregr_summary_results ( float8 [] state )

◆ __mlogregr_format()

set<__mlogregr_cat_coef> __mlogregr_format	(	float8 []	coef,
		integer	num_feature,
		integer	num_category,
		integer	ref_category
	)

◆ __mlogregr_irls_step()

aggregate float8 [] __mlogregr_irls_step	(	integer	y,
		integer	numcategories,
		integer	ref_category,
		float8 []	x,
		float8 []	previous_state
	)

◆ __mlogregr_irls_step_final()

float8 [] __mlogregr_irls_step_final ( float8 [] state )

◆ __mlogregr_irls_step_merge_states()

float8 [] __mlogregr_irls_step_merge_states	(	float8 []	state1,
		float8 []	state2
	)

◆ __mlogregr_irls_step_transition()

float8 [] __mlogregr_irls_step_transition	(	float8 []	state,
		integer	y,
		integer	num_categories,
		integer	ref_category,
		float8 []	x,
		float8 []	prev_state
	)

◆ __mlogregr_predict_prob()

float8 [] __mlogregr_predict_prob	(	float8 []	coef,
		integer	ref_category,
		float8 []	col_ind_var
	)

◆ __mlogregr_predict_response()

integer __mlogregr_predict_response	(	float8 []	coef,
		integer	ref_category,
		float8 []	col_ind_var
	)

◆ mlogregr() [1/4]

mlogregr_result mlogregr	(	varchar	source,
		varchar	depvar,
		varchar	indepvar,
		integer	max_num_iterations = `20`,
		varchar	optimizer = `"irls"`,
		float8	precision = `0.0001`,
		integer	ref_category
	)

To include an intercept in the model, set one coordinate in the independentVariables array to 1.

Parameters

source	Name of the source relation containing the training data
depvar	Name of the dependent column (of type INTEGER < numcategories)
indepvar	Name of the independent column (of type DOUBLE PRECISION[])
max_num_iterations	The maximum number of iterations
optimizer	The optimizer to use ( `'irls'`/`'newton'` for iteratively reweighted least squares)
precision	The difference between log-likelihood values in successive iterations that should indicate convergence. Note that a non-positive value here disables the convergence criterion, and execution will only stop after \ max_num_iterations iterations.
ref_category	The reference category specified by the user

Returns

A composite value:

ref_category INTEGER - Reference category
coef FLOAT8[] - Array of coefficients, \( \boldsymbol c \)
log_likelihood FLOAT8 - Log-likelihood \( l(\boldsymbol c) \)
std_err FLOAT8[] - Array of standard errors, \( \mathit{se}(c_1), \dots, \mathit{se}(c_k) \)
z_stats FLOAT8[] - Array of Wald z-statistics, \( \boldsymbol z \)
p_values FLOAT8[] - Array of Wald p-values, \( \boldsymbol p \)
odds_ratios FLOAT8[]: Array of odds ratios, \( \mathit{odds}(c_1), \dots, \mathit{odds}(c_k) \)
condition_no FLOAT8 - The condition number of matrix \( X^T A X \) during the iteration immediately preceding convergence (i.e., \( A \) is computed using the coefficients of the previous iteration)
num_iterations INTEGER - The number of iterations before the algorithm terminated

Usage

Get vector of coefficients \( \boldsymbol c \) and all diagnostic statistics:

SELECT * FROM mlogregr('sourceName', 'dependentVariable',
   'numCategories', 'independentVariables');

Get vector of coefficients \( \boldsymbol c \):

SELECT (mlogregr('sourceName', 'dependentVariable',
   'numCategories', 'independentVariables')).coef;

Get a subset of the output columns, e.g., only the array of coefficients \( \boldsymbol c \), the log-likelihood of determination \( l(\boldsymbol c) \), and the array of p-values \( \boldsymbol p \):
```
SELECT coef, log_likelihood, p_values
   FROM mlogregr('sourceName', 'dependentVariable',
  'numCategories', 'independentVariables');
```

Note: This function starts an iterative algorithm. It is not an aggregate function. Source and column names have to be passed as strings (due to limitations of the SQL syntax).

◆ mlogregr() [2/4]

mlogregr_result mlogregr	(	varchar	source,
		varchar	depvar,
		varchar	indepvar
	)

◆ mlogregr() [3/4]

mlogregr_result mlogregr	(	varchar	source,
		varchar	depvar,
		varchar	indepvar,
		integer	max_num_iterations
	)

◆ mlogregr() [4/4]

mlogregr_result mlogregr	(	varchar	source,
		varchar	depvar,
		varchar	indepvar,
		integer	max_num_iterations,
		varchar	optimizer
	)

◆ mlogregr_predict() [1/3]

void mlogregr_predict	(	text	model,
		text	source,
		text	id_col_name,
		text	output,
		text	pred_type
	)

◆ mlogregr_predict() [2/3]

void mlogregr_predict	(	text	model,
		text	source,
		text	id_col_name,
		text	output
	)

◆ mlogregr_predict() [3/3]

text mlogregr_predict ( text message )

◆ mlogregr_train() [1/5]

void mlogregr_train	(	varchar	source_table,
		varchar	output_table,
		varchar	dependent_varname,
		varchar	independent_varname,
		integer	ref_category,
		varchar	optimizer_params
	)