2.1.0
User Documentation for Apache MADlib
encode_categorical.sql_in File Reference

SQL functions for encoding categorical variables to numerical values. More...

Functions

void encode_categorical_variables (varchar source_table, varchar output_table, varchar categorical_cols, varchar categorical_cols_to_exclude, varchar row_id, varchar top, varchar value_to_drop, boolean encode_null, varchar output_type, boolean output_dictionary, varchar distributed_by)
 Encode categorical columns using either one-hot encoding or dummy coding. More...
 
void encode_categorical_variables (varchar source_table, varchar output_table, varchar categorical_cols, varchar categorical_cols_to_exclude, varchar row_id, varchar top, varchar value_to_drop, boolean encode_null, varchar output_type, boolean output_dictionary)
 
void encode_categorical_variables (varchar source_table, varchar output_table, varchar categorical_cols, varchar categorical_cols_to_exclude, varchar row_id, varchar top, varchar value_to_drop, boolean encode_null, varchar output_type)
 
void encode_categorical_variables (varchar source_table, varchar output_table, varchar categorical_cols, varchar categorical_cols_to_exclude, varchar row_id, varchar top, varchar value_to_drop, boolean encode_null)
 
void encode_categorical_variables (varchar source_table, varchar output_table, varchar categorical_cols, varchar categorical_cols_to_exclude, varchar row_id, varchar top, varchar value_to_drop)
 
void encode_categorical_variables (varchar source_table, varchar output_table, varchar categorical_cols, varchar categorical_cols_to_exclude, varchar row_id, varchar top)
 
void encode_categorical_variables (varchar source_table, varchar output_table, varchar categorical_cols, varchar categorical_cols_to_exclude, varchar row_id)
 
void encode_categorical_variables (varchar source_table, varchar output_table, varchar categorical_cols, varchar categorical_cols_to_exclude)
 
void encode_categorical_variables (varchar source_table, varchar output_table, varchar categorical_cols)
 
varchar encode_categorical_variables (varchar message)
 
varchar encode_categorical_variables ()
 

Detailed Description

Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file distributed with this work for additional information regarding copyright ownership. The ASF licenses this file to you under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.

Date
Dec 2016
See also
Encodes categorical variables to numerical values

Function Documentation

◆ encode_categorical_variables() [1/11]

void encode_categorical_variables ( varchar  source_table,
varchar  output_table,
varchar  categorical_cols,
varchar  categorical_cols_to_exclude,
varchar  row_id,
varchar  top,
varchar  value_to_drop,
boolean  encode_null,
varchar  output_type,
boolean  output_dictionary,
varchar  distributed_by 
)
Parameters
source_tableName of table containing categorical variable
output_tableName of table to output dummy variables
categorical_colsComma-separated list of column names to dummy code (can be '*')
categorical_cols_to_excludeComma-separated list of column names to exclude (if categorical_cols = '*')
row_idColumns from source table to index output table
topParameter to include only top values of a categorical variable
value_to_dropParameter to set reference column in dummy coding
encode_nullBoolean to determine the behavior for rows with NULL value
output_typeParameter to set output data type: 'column', 'array' or 'svec'
output_dictionaryBoolean to simplify column naming and with a separate mapping table to actual values
distributed_byComma-separated list of column names to use for distribution of output
Returns
Void

◆ encode_categorical_variables() [2/11]

void encode_categorical_variables ( varchar  source_table,
varchar  output_table,
varchar  categorical_cols,
varchar  categorical_cols_to_exclude,
varchar  row_id,
varchar  top,
varchar  value_to_drop,
boolean  encode_null,
varchar  output_type,
boolean  output_dictionary 
)

◆ encode_categorical_variables() [3/11]

void encode_categorical_variables ( varchar  source_table,
varchar  output_table,
varchar  categorical_cols,
varchar  categorical_cols_to_exclude,
varchar  row_id,
varchar  top,
varchar  value_to_drop,
boolean  encode_null,
varchar  output_type 
)

◆ encode_categorical_variables() [4/11]

void encode_categorical_variables ( varchar  source_table,
varchar  output_table,
varchar  categorical_cols,
varchar  categorical_cols_to_exclude,
varchar  row_id,
varchar  top,
varchar  value_to_drop,
boolean  encode_null 
)

◆ encode_categorical_variables() [5/11]

void encode_categorical_variables ( varchar  source_table,
varchar  output_table,
varchar  categorical_cols,
varchar  categorical_cols_to_exclude,
varchar  row_id,
varchar  top,
varchar  value_to_drop 
)

◆ encode_categorical_variables() [6/11]

void encode_categorical_variables ( varchar  source_table,
varchar  output_table,
varchar  categorical_cols,
varchar  categorical_cols_to_exclude,
varchar  row_id,
varchar  top 
)

◆ encode_categorical_variables() [7/11]

void encode_categorical_variables ( varchar  source_table,
varchar  output_table,
varchar  categorical_cols,
varchar  categorical_cols_to_exclude,
varchar  row_id 
)

◆ encode_categorical_variables() [8/11]

void encode_categorical_variables ( varchar  source_table,
varchar  output_table,
varchar  categorical_cols,
varchar  categorical_cols_to_exclude 
)

◆ encode_categorical_variables() [9/11]

void encode_categorical_variables ( varchar  source_table,
varchar  output_table,
varchar  categorical_cols 
)

◆ encode_categorical_variables() [10/11]

varchar encode_categorical_variables ( varchar  message)

◆ encode_categorical_variables() [11/11]

varchar encode_categorical_variables ( )