public abstract class MLUtils
extends java.lang.Object
Constructor and Description |
---|
MLUtils() |
Modifier and Type | Method and Description |
---|---|
static java.lang.Object |
argmax(java.util.HashMap<?,java.lang.Integer> map)
maxItem - argmax function for a HashMap
NOTE: same as above, but for integer
(TODO: do something more clever than this)
|
static int |
bitCount(java.lang.String s) |
static int |
bitDifference(int[] y1,
int[] y2) |
static int |
bitDifference(java.lang.String[] y1,
java.lang.String[] y2) |
static int |
bitDifference(java.lang.String s1,
java.lang.String s2) |
static int |
char2int(char c) |
static java.util.HashMap<java.lang.String,java.lang.Integer> |
classCombinationCounts(weka.core.Instances D)
ClassCombinationCounts - multi-target version of countCombinations(...).
|
static void |
clearLabels(weka.core.Instance x)
Clear Labels -- set the value of all label attributes to 0.0
|
static weka.core.Instances |
combineInstances(weka.core.Instances D1,
weka.core.Instances D2)
Stack two Instances together row-wise.
|
static weka.core.Instance |
copyValues(weka.core.Instance x_dest,
weka.core.Instance x_src,
int[] indices)
CopyValues - Set x_dest[i++] = x_src[j] for all j in indices[].
|
static weka.core.Instance |
copyValues(weka.core.Instance x_dest,
weka.core.Instance x_src,
int from,
int offset)
CopyValues - Set x_dest[j+offset] = x_src[i+from].
|
static java.util.HashMap<java.lang.String,java.lang.Integer> |
countCombinations(weka.core.Instances D,
int L)
CountCombinations - return a mapping of each distinct label combination and its count.
|
static java.util.HashMap<LabelSet,java.lang.Integer> |
countCombinationsSparse(weka.core.Instances D,
int L)
CountCombinations in a sparse way.
|
static int[] |
decodeValue(java.lang.String a)
Deprecated.
|
static weka.core.Instance |
deleteAttributesAt(weka.core.Instance x,
int[] indicesToRemove) |
static weka.core.Instances |
deleteAttributesAt(weka.core.Instances D,
int[] indicesToRemove) |
static double |
emptyVectors(int[][] Y)
EmptyVectors - percentage of empty vectors sum(y[i])==0 in Y.
|
static java.lang.String |
encodeValue(int[] s)
Deprecated.
|
static void |
fixRelationName(weka.core.Instances data)
Fixes the relation name by adding the "-C" attribute to it if necessary.
|
static void |
fixRelationName(weka.core.Instances data,
int numClassAtts)
Fixes the relation name by adding the "-C" attribute to it if necessary.
|
static double[] |
fromBitString(java.lang.String s)
FromBitString - returns a double[] representation of s.
|
static int[] |
fromSparseString(java.lang.String s)
From Sparse String - From a sparse String representation, e.g., [1,34,73], to a binary int[] where those indices are set to 1.
|
static int[] |
gen_indices(int L)
Deprecated.
|
static java.lang.String |
getDatasetName(weka.core.Instances instances)
GetDataSetName - Look for name in the 'relationName' in format 'dataset-name: options'
|
static java.lang.String[] |
getDatasetOptions(weka.core.Instances instances)
GetDataSetOptions - Look for options in the 'relationName' in format 'dataset-name: options'
|
static int |
getIntegerOption(java.lang.String op,
int def)
GetIntegerOption - parse 'op' to an integer if we can, else used default 'def'.
|
int[] |
getK(weka.core.Instances D)
Get K - get the number of values associated with each label L.
|
static double[][] |
getXfromD(weka.core.Instances D)
GetXfromD - Extract attributes as a double X[][] from Instances D.
|
static double[] |
getxfromInstance(weka.core.Instance xy)
GetxfromInstances - Extract attributes as a double x[] from an Instance.
|
static double[][] |
getYfromD(weka.core.Instances D)
GetXfromD - Extract labels as a double Y[][] from Instances D.
|
static java.lang.String |
hashMapToString(java.util.HashMap<?,?> map) |
static java.lang.String |
hashMapToString(java.util.HashMap<?,?> map,
int dp)
HashMapToString - print out a HashMap nicely.
|
static weka.core.Instance |
keepAttributesAt(weka.core.Instance x,
int[] indicesToRemove,
int lim) |
static weka.core.Instances |
keepAttributesAt(weka.core.Instances D,
int[] indicesToRemove,
int lim) |
static double[] |
labelCardinalities(java.util.ArrayList<int[]> Y)
LabelCardinalities - return the frequency of each label of dataset D.
|
static double[] |
labelCardinalities(weka.core.Instances D)
LabelCardinalities - return the frequency of each label of dataset D.
|
static double |
labelCardinality(weka.core.Instances D)
LabelCardinality - return the label cardinality of dataset D.
|
static double |
labelCardinality(weka.core.Instances D,
int L)
LabelCardinality - return the label cardinality of dataset D of L labels.
|
static double |
labelCardinality(int[][] Y)
LabelCardinality - return the label cardinality of label data Y.
|
static double |
labelCardinality(int[][] Y,
int j)
LabelCardinality - return the average number of times the j-th label is relevant in label data Y.
|
static java.lang.Object |
loadObject(java.lang.String filename)
Load Object - load the Object stored in 'filename'.
|
static void |
main(java.lang.String[] args)
For retrieving some dataset statistics on the command line.
|
static java.lang.Object |
maxItem(java.util.HashMap<?,java.lang.Double> map)
maxItem - argmax function for a HashMap
|
static java.lang.String |
mostCommonCombination(weka.core.Instances D)
MostCommonCombination - Most common label combination in D.
|
static java.lang.String |
mostCommonCombination(weka.core.Instances D,
int L)
MostCommonCombination - Most common label combination in D (of L labels).
|
static int |
numberOfUniqueCombinations(weka.core.Instances D)
Get the number of unique label combinations in a dataset
|
static int |
peekClassIndex(java.io.File file)
Attempts to determine the number of classes/class index from the
specified file.
|
static java.lang.String[] |
permute(java.lang.String s)
Permute -- e.g., permute("AB") returns ["AB","BA"]
|
static void |
prepareData(weka.core.Instances data)
Prepares the class index of the data.
|
static java.lang.String |
printAsTextMatrix(double[][] M) |
static void |
pruneCountHashMap(java.util.HashMap<?,java.lang.Integer> hm,
int p)
PruneCountHashMap - remove entries in hm = {(label,count)} where 'count' is no more than 'p'.
|
static java.util.HashMap<?,java.lang.Integer> |
pruneCountHashMapBasedAsAFractionOf(java.util.HashMap<?,java.lang.Integer> hm,
double p,
int N) |
static void |
randomize(int[] array,
java.util.Random r)
Deprecated.
|
static weka.core.Instances |
replaceZasAttributes(weka.core.Instances D,
double[][] Z,
int L)
ReplaceZasAttributes - data Z[][] will be the new attributes in D.
|
static weka.core.Instances |
replaceZasClasses(weka.core.Instances D,
double[][] Z,
int L)
ReplaceZasClasses - data Z[][] will be the new class labels in D.
|
static void |
saveObject(java.lang.Object object,
java.lang.String filename)
Save Object - save 'object' into file 'filename'.
|
static weka.core.Instance |
setLabelsMissing(weka.core.Instance x)
SetLabelsMissing - Set all labels in x to missing.
|
static weka.core.Instance |
setLabelsMissing(weka.core.Instance x,
int L)
SetLabelsMissing - Set all (L) labels in x to missing.
|
static weka.core.Instances |
setLabelsMissing(weka.core.Instances D)
SetLabelsMissing - Set all labels in D to missing.
|
static weka.core.Instance |
setTemplate(weka.core.Instance x,
weka.core.Instance x_template,
weka.core.Instances D_template)
SetTemplate - returns a copy of x_template, set with x's attributes, and set to dataset D_template (of which x_template) is a template of this.
|
static weka.core.Instance |
setTemplate(weka.core.Instance x,
weka.core.Instances instancesTemplate) |
static weka.core.Instance |
setValues(weka.core.Instance x,
double[] z,
int L)
SetValues - set the attribute values in Instsance x (having L labels) to z[].
|
static java.lang.String |
toBinaryString(int l,
int L)
ToBinaryString - use to go through all 'L' binary combinations.
|
static java.lang.String |
toBitString(double[] d)
ToBitString - returns a String representation of d[].
|
static java.lang.String |
toBitString(weka.core.Instance x,
int L)
ToBitString - returns a String representation of x = [0,0,1,0,1,0,0,0], e.g., "000101000".
|
static java.lang.String |
toBitString(int[] i)
ToBitString - returns a String representation of i[].
|
static java.lang.String |
toDebugString(weka.core.Instance x) |
static java.lang.String |
toDebugString(weka.core.Instances D) |
static double[] |
toDoubleArray(weka.core.Instance x)
Instance with L labels to double[] of length L, where L = x.classIndex().
|
static double[] |
toDoubleArray(weka.core.Instance x,
int L)
Instance with L labels to double[] of length L.
|
static double[] |
toDoubleArray(java.lang.String s)
To Double Arary - Convert something like "[1.0,2.0]" to [1.0,2.0]
|
static double[] |
toDoubleArray(java.lang.String[] s)
To Double Arary - Convert something like ["1.0","2.0"] to [1.0,2.0]
|
static java.util.List |
toIndicesSet(double[] x,
double t)
To Indices Set - return the indices in x[], whose values are greater than t, e.g., [0.3,0.0,0.5,0.8],0.4 to {2,3}.
|
static java.util.List<java.lang.Integer> |
toIndicesSet(weka.core.Instance x,
int L)
To Indices Set - return the indices in x, whose values are greater than 1.
|
static java.util.List |
toIndicesSet(int[] x)
To Indices Set - return the indices in x[], whose values are greater than 0, e.g., [0,0,1,1] to {2,3}.
|
static int[] |
toIntArray(double[] z,
double t)
Deprecated.
|
static int[] |
toIntArray(weka.core.Instance x,
int L)
ToIntArray - raw instance to int[] representation
|
static int[] |
toIntArray(java.lang.String s)
ToIntArray - Return an int[] from a String, e.g., "[0,1,2,0]" to [0,1,2,3].
|
static int[] |
toIntArray(java.lang.String[] s)
ToIntArray - Return an int[] from a String[], e.g., ["0","1","2","3"] to [0,1,2,3].
|
static int[] |
toPrimitive(java.lang.Integer[] a) |
static int[] |
toSparseIntArray(weka.core.Instance x,
int L)
To Sparse Int Array - A sparse String representation, e.g., [1,34,73].
|
static java.util.List |
toSubIndicesSet(weka.core.Instance x,
int[] sub_indices)
To Sub Indices Set - return the indices out of 'sub_indices', in x, whose values are greater than 1.
|
public static final java.lang.String[] getDatasetOptions(weka.core.Instances instances)
public static final java.lang.String getDatasetName(weka.core.Instances instances)
@Deprecated public static final int[] gen_indices(int L)
@Deprecated public static final void randomize(int[] array, java.util.Random r)
public static final double[] toDoubleArray(weka.core.Instance x, int L)
public static final double[] toDoubleArray(weka.core.Instance x)
public static final java.lang.String toBitString(weka.core.Instance x, int L)
public static final java.lang.String toBitString(int[] i)
public static final java.lang.String toBitString(double[] d)
public static final double[] fromBitString(java.lang.String s)
public static final int[] toIntArray(java.lang.String s)
public static final int[] toIntArray(java.lang.String[] s)
public static final java.util.List toSubIndicesSet(weka.core.Instance x, int[] sub_indices)
public static final java.util.List toIndicesSet(double[] x, double t)
public static final java.util.List toIndicesSet(int[] x)
public static final java.util.List<java.lang.Integer> toIndicesSet(weka.core.Instance x, int L)
public static final int[] toSparseIntArray(weka.core.Instance x, int L)
public static final int[] fromSparseString(java.lang.String s)
public static final int[] toIntArray(weka.core.Instance x, int L)
@Deprecated public static final int[] toIntArray(double[] z, double t)
public static final double[] toDoubleArray(java.lang.String s)
public static final double[] toDoubleArray(java.lang.String[] s)
public static final double labelCardinality(weka.core.Instances D)
public static final double labelCardinality(weka.core.Instances D, int L)
public static final double labelCardinality(int[][] Y, int j)
public static final double labelCardinality(int[][] Y)
public static final double[] labelCardinalities(weka.core.Instances D)
public static final double[] labelCardinalities(java.util.ArrayList<int[]> Y)
public static final double emptyVectors(int[][] Y)
public static final java.lang.String mostCommonCombination(weka.core.Instances D)
public static final java.lang.String mostCommonCombination(weka.core.Instances D, int L)
public static final int bitDifference(java.lang.String s1, java.lang.String s2)
public static final int bitDifference(java.lang.String[] y1, java.lang.String[] y2)
public static final int bitDifference(int[] y1, int[] y2)
public static final int bitCount(java.lang.String s)
public static final int char2int(char c)
public static final java.util.HashMap<java.lang.String,java.lang.Integer> countCombinations(weka.core.Instances D, int L)
D
- datasetL
- number of labelspublic static final java.util.HashMap<LabelSet,java.lang.Integer> countCombinationsSparse(weka.core.Instances D, int L)
D
- datasetL
- number of labelscountCombinations(Instances,int)
public static final java.util.HashMap<java.lang.String,java.lang.Integer> classCombinationCounts(weka.core.Instances D)
D
- dataset@Deprecated public static java.lang.String encodeValue(int[] s)
@Deprecated public static int[] decodeValue(java.lang.String a)
public static final java.lang.Object maxItem(java.util.HashMap<?,java.lang.Double> map)
public static final java.lang.Object argmax(java.util.HashMap<?,java.lang.Integer> map)
public static final int numberOfUniqueCombinations(weka.core.Instances D)
public static final weka.core.Instance deleteAttributesAt(weka.core.Instance x, int[] indicesToRemove)
public static final weka.core.Instance keepAttributesAt(weka.core.Instance x, int[] indicesToRemove, int lim)
public static final weka.core.Instances deleteAttributesAt(weka.core.Instances D, int[] indicesToRemove)
public static final weka.core.Instances keepAttributesAt(weka.core.Instances D, int[] indicesToRemove, int lim)
public static final weka.core.Instance setTemplate(weka.core.Instance x, weka.core.Instances instancesTemplate)
public static final weka.core.Instance setTemplate(weka.core.Instance x, weka.core.Instance x_template, weka.core.Instances D_template)
public static final weka.core.Instance copyValues(weka.core.Instance x_dest, weka.core.Instance x_src, int from, int offset)
public static final weka.core.Instance copyValues(weka.core.Instance x_dest, weka.core.Instance x_src, int[] indices)
public static final weka.core.Instance setValues(weka.core.Instance x, double[] z, int L)
public static java.lang.String printAsTextMatrix(double[][] M)
public static void pruneCountHashMap(java.util.HashMap<?,java.lang.Integer> hm, int p)
public static java.util.HashMap<?,java.lang.Integer> pruneCountHashMapBasedAsAFractionOf(java.util.HashMap<?,java.lang.Integer> hm, double p, int N)
public static weka.core.Instances setLabelsMissing(weka.core.Instances D)
public static weka.core.Instance setLabelsMissing(weka.core.Instance x)
public static weka.core.Instance setLabelsMissing(weka.core.Instance x, int L)
public static final weka.core.Instances combineInstances(weka.core.Instances D1, weka.core.Instances D2)
public static final java.lang.String toDebugString(weka.core.Instances D)
public static final java.lang.String toDebugString(weka.core.Instance x)
public static int[] toPrimitive(java.lang.Integer[] a)
public static final java.lang.String toBinaryString(int l, int L)
l
- the number to permuteL
- number of labelsA.toDoubleArray(int, int)
public static java.lang.String[] permute(java.lang.String s)
public static java.lang.String hashMapToString(java.util.HashMap<?,?> map, int dp)
map
- HashMapdp
- decimal point precision (-1 for no limitation)public static java.lang.String hashMapToString(java.util.HashMap<?,?> map)
public static int getIntegerOption(java.lang.String op, int def)
public static void clearLabels(weka.core.Instance x)
public static double[][] getXfromD(weka.core.Instances D)
public static double[][] getYfromD(weka.core.Instances D)
public static double[] getxfromInstance(weka.core.Instance xy)
public static weka.core.Instances replaceZasAttributes(weka.core.Instances D, double[][] Z, int L)
D
- dataset (of N instances)Z
- attribute space (of N rows, H columns)L
- number of classes / labels.public static weka.core.Instances replaceZasClasses(weka.core.Instances D, double[][] Z, int L)
D
- dataset (of N instances)Z
- attribute space (of N rows, H columns)L
- column to add Z from in Dpublic int[] getK(weka.core.Instances D)
D
- a datasetpublic static final java.lang.Object loadObject(java.lang.String filename) throws java.lang.Exception
java.lang.Exception
public static final void saveObject(java.lang.Object object, java.lang.String filename) throws java.lang.Exception
java.lang.Exception
public static void fixRelationName(weka.core.Instances data)
data
- the dataset to fixpublic static void fixRelationName(weka.core.Instances data, int numClassAtts)
data
- the dataset to fixnumClassAtts
- the number of class attributes (0 for none, >0 for attributes at start, <0 for attributes at end)public static void prepareData(weka.core.Instances data) throws java.lang.Exception
data
- the data to preparejava.lang.Exception
- if preparation failspublic static int peekClassIndex(java.io.File file)
file
- the file to inspectpublic static final void main(java.lang.String[] args) throws java.lang.Exception
java.lang.Exception