2.1.0
User Documentation for Apache MADlib
svec_util.sql_in File Reference

SQL utility functions for sparse vector data type svec More...

Functions

svec __gen_svec (bigint[], float8[], bigint)
 
text gen_doc_svecs (text output_tbl, text dictionary_tbl, text dict_id_col, text dict_term_col, text documents_tbl, text doc_id_col, text doc_term_col, text doc_term_info_col)
 
text gen_doc_svecs ()
 
float8 svec_dmin (float8, float8)
 
float8 svec_dmax (float8, float8)
 
svec svec_count (svec, svec)
 
svec svec_log (svec)
 Computes the logarithm of each element of the input SVEC. More...
 
boolean svec_eq_non_zero (svec, svec)
 
boolean svec_contains (svec, svec)
 
float8 svec_l2norm (svec)
 
float8 svec_l2norm (float8[])
 
float8 l2norm (svec, svec)
 
float8 l1norm (svec, svec)
 
float8 svec_l1norm (svec)
 
float8 svec_l1norm (float8[])
 
float8 angle (svec, svec)
 
float8 tanimoto_distance (svec, svec)
 
set< float8 > svec_unnest (svec)
 
svec svec_pivot (svec, float8)
 
float8 svec_elsum (svec)
 
float8 svec_elsum (float8[])
 
float8 svec_median (float8[])
 
float8 svec_median (svec)
 
int8 [] svec_nonbase_positions (svec, float8)
 
float8 [] svec_nonbase_values (svec, float8)
 
integer svec_dimension (svec)
 
svec svec_lapply (text, svec)
 
svec svec_append (svec, float8, int8)
 
float8 svec_proj (svec, int4)
 
svec svec_subvec (svec, int4, int4)
 
svec svec_reverse (svec)
 
svec svec_change (svec, int4, svec)
 
int4 svec_hash (svec)
 
svec svec_sfv (text[], text[])
 
text [] svec_sort (text[])
 
text svec_to_string (svec)
 
svec svec_from_string (text)
 
float [] svec_mean_transition (float[], svec)
 
float [] svec_mean_prefunc (float[], float[])
 
svec svec_mean_final (float[])
 
aggregate float [] mean (svec)
 
aggregate svec svec_sum (svec)
 
aggregate svec svec_count_nonzero (svec)
 
aggregate svec svec_median_inmemory (float8)
 
svec normalize (svec)
 

Detailed Description

See also
For an introduction to the sparse vector implementation, see the module description Sparse Vectors.

Function Documentation

◆ __gen_svec()

svec __gen_svec ( bigint  [],
float8  [],
bigint   
)

Creates sparse vector representation given an array of indexes, respective values and size of the required vector. The function just does the bucket filling with the values at the respective indexes for the vector given the length.

◆ angle()

float8 angle ( svec  ,
svec   
)

Computes the angle between two SVECs in radians.

◆ gen_doc_svecs() [1/2]

text gen_doc_svecs ( text  output_tbl,
text  dictionary_tbl,
text  dict_id_col,
text  dict_term_col,
text  documents_tbl,
text  doc_id_col,
text  doc_term_col,
text  doc_term_info_col 
)

Creates the output table containing the sparse vector representation for the documents given the dictionary table, documents tables and names of the respective columns.

◆ gen_doc_svecs() [2/2]

text gen_doc_svecs ( )

Helper function for madlib.gen_doc_svec UDF.

◆ l1norm()

float8 l1norm ( svec  ,
svec   
)

Computes the l1norm distance between two SVECs.

◆ l2norm()

float8 l2norm ( svec  ,
svec   
)

Computes the l2norm distance between two SVECs.

◆ mean()

aggregate float [] mean ( svec  )

Aggregate that computes the element-wise mean of a list of vectors.

◆ normalize()

svec normalize ( svec  )

Normalizes an SVEC that is divides all elements by its norm/magnitude.

◆ svec_append()

svec svec_append ( svec  ,
float8  ,
int8   
)

Appends a run-length block to the back of an SVEC.

◆ svec_change()

svec svec_change ( svec  ,
int4  ,
svec   
)

Replaces the subvector of a given SVEC at a given start index with another SVEC. Note that element index should start at 1.

◆ svec_contains()

boolean svec_contains ( svec  ,
svec   
)

Returns true if left svec contains right one, meaning that every non-zero value in the right svec equals left one

◆ svec_count()

svec svec_count ( svec  ,
svec   
)

Counts the number of non-zero entries in the input vector; the second argument is capped at 1, then added to the first; used as the sfunc in the svec_count_nonzero() aggregate below.

◆ svec_count_nonzero()

aggregate svec svec_count_nonzero ( svec  )

Aggregate that provides a tally of nonzero entries in a list of vectors.

◆ svec_dimension()

integer svec_dimension ( svec  )

Returns the dimension of an SVEC.

◆ svec_dmax()

float8 svec_dmax ( float8  ,
float8   
)

Basic floating point scalar operator: MAX.

◆ svec_dmin()

float8 svec_dmin ( float8  ,
float8   
)

Basic floating point scalar operator: MIN.

◆ svec_elsum() [1/2]

float8 svec_elsum ( svec  )

Sums the elements of an SVEC.

◆ svec_elsum() [2/2]

float8 svec_elsum ( float8  [])

Sums the elements of a float8 array.

◆ svec_eq_non_zero()

boolean svec_eq_non_zero ( svec  ,
svec   
)

Returns true if two SVECs are equal, not counting zeros (zero equals anything). If the two SVECs are of different size, then the function essentially zero-pads the shorter one and performs the comparison.

◆ svec_from_string()

svec svec_from_string ( text  )

Converts a text string to an svec

◆ svec_hash()

int4 svec_hash ( svec  )

Computes the hash of an SVEC.

◆ svec_l1norm() [1/2]

float8 svec_l1norm ( svec  )

Computes the l1norm of an SVEC.

◆ svec_l1norm() [2/2]

float8 svec_l1norm ( float8  [])

Computes the l1norm of a float8 array.

◆ svec_l2norm() [1/2]

float8 svec_l2norm ( svec  )

Computes the l2norm of an SVEC.

◆ svec_l2norm() [2/2]

float8 svec_l2norm ( float8  [])

Computes the l2norm of a float8 array.

◆ svec_lapply()

svec svec_lapply ( text  ,
svec   
)

Applies a given function to each element of an SVEC.

◆ svec_log()

svec svec_log ( svec  )

◆ svec_mean_final()

svec svec_mean_final ( float  [])

Final function for mean(svec) aggregate

◆ svec_mean_prefunc()

float [] svec_mean_prefunc ( float  [],
float  [] 
)

Preliminary merge function for mean(svec) aggregate

◆ svec_mean_transition()

float [] svec_mean_transition ( float  [],
svec   
)

Transition function for mean(svec) aggregate

◆ svec_median() [1/2]

float8 svec_median ( float8  [])

Computes the median element of a float8 array.

◆ svec_median() [2/2]

float8 svec_median ( svec  )

Computes the median element of an SVEC.

◆ svec_median_inmemory()

aggregate svec svec_median_inmemory ( float8  )

Aggregate that turns a list of float8 values into an SVEC.Aggregate that computes the median element of a list of float8 values.

◆ svec_nonbase_positions()

int8 [] svec_nonbase_positions ( svec  ,
float8   
)

Compares an SVEC to a float8, and returns positions of all elements not equal to the float as an array. Element index here starts at 0.

◆ svec_nonbase_values()

float8 [] svec_nonbase_values ( svec  ,
float8   
)

Compares an SVEC to a float8, and returns values of all elements not equal to the float as an array.

◆ svec_pivot()

svec svec_pivot ( svec  ,
float8   
)

Appends an element to the back of an SVEC.

◆ svec_proj()

float8 svec_proj ( svec  ,
int4   
)

Projects onto an element of an SVEC.

◆ svec_reverse()

svec svec_reverse ( svec  )

Reverses the elements of an SVEC.

◆ svec_sfv()

svec svec_sfv ( text  [],
text  [] 
)

Computes the word-occurence vector of a document

◆ svec_sort()

text [] svec_sort ( text  [])

Sorts an array of texts. This function should be in MADlib common.

◆ svec_subvec()

svec svec_subvec ( svec  ,
int4  ,
int4   
)

Extracts a subvector of an SVEC given the subvector's start and end indices.

◆ svec_sum()

aggregate svec svec_sum ( svec  )

Aggregate that provides the element-wise sum of a list of vectors.

◆ svec_to_string()

text svec_to_string ( svec  )

Converts an svec to a text string

◆ svec_unnest()

set<float8> svec_unnest ( svec  )

Unnests an SVEC into a table of uncompressed values

◆ tanimoto_distance()

float8 tanimoto_distance ( svec  ,
svec   
)

Computes the Tanimoto distance between two SVECs.