Constructor and Description |
---|
MLUtils() |
Modifier and Type | Method and Description |
---|---|
static Object |
argmax(HashMap<?,Integer> map)
maxItem - argmax function for a HashMap
NOTE: same as above, but for integer
(TODO: do something more clever than this)
|
static int |
bitCount(String s) |
static int |
bitDifference(int[] y1,
int[] y2) |
static int |
bitDifference(String[] y1,
String[] y2) |
static int |
bitDifference(String s1,
String s2) |
static int |
char2int(char c) |
static HashMap<String,Integer> |
classCombinationCounts(weka.core.Instances D)
ClassCombinationCounts - multi-target version of countCombinations(...).
|
static void |
clearLabels(weka.core.Instance x)
Clear Labels -- set the value of all label attributes to 0.0
|
static weka.core.Instances |
combineInstances(weka.core.Instances D1,
weka.core.Instances D2)
Stack two Instances together row-wise.
|
static weka.core.Instance |
copyValues(weka.core.Instance x_dest,
weka.core.Instance x_src,
int[] indices)
CopyValues - Set x_dest[i++] = x_src[j] for all j in indices[].
|
static weka.core.Instance |
copyValues(weka.core.Instance x_dest,
weka.core.Instance x_src,
int from,
int offset)
CopyValues - Set x_dest[j+offset] = x_src[i+from].
|
static HashMap<String,Integer> |
countCombinations(weka.core.Instances D,
int L)
CountCombinations - return a mapping of each distinct label combination and its count.
|
static HashMap<LabelSet,Integer> |
countCombinationsSparse(weka.core.Instances D,
int L)
CountCombinations in a sparse way.
|
static int[] |
decodeValue(String a)
Deprecated.
|
static weka.core.Instance |
deleteAttributesAt(weka.core.Instance x,
int[] indicesToRemove)
Delete attributes from an instance 'x' indexed by 'indicesToRemove[]'.
|
static weka.core.Instances |
deleteAttributesAt(weka.core.Instances D,
int[] indicesToRemove)
Delete attributes from a dataset 'D' indexed by 'indicesToRemove[]'.
|
static double |
emptyVectors(int[][] Y)
EmptyVectors - percentage of empty vectors sum(y[i])==0 in Y.
|
static String |
encodeValue(int[] s)
Deprecated.
|
static void |
fixRelationName(weka.core.Instances data)
Fixes the relation name by adding the "-C" attribute to it if necessary.
|
static void |
fixRelationName(weka.core.Instances data,
int numClassAtts)
Fixes the relation name by adding the "-C" attribute to it if necessary.
|
static double[] |
fromBitString(String s)
FromBitString - returns a double[] representation of s.
|
static int[] |
fromSparseString(String s)
From Sparse String - From a sparse String representation, e.g., [1,34,73], to a binary int[] where those indices are set to 1.
|
static int[] |
gen_indices(int L)
Deprecated.
|
static String |
getDatasetName(weka.core.Instances instances)
GetDataSetName - Look for name in the 'relationName' in format 'dataset-name: options'
|
static String[] |
getDatasetOptions(weka.core.Instances instances)
GetDataSetOptions - Look for options in the 'relationName' in format 'dataset-name: options'
|
static int |
getIntegerOption(String op,
int def)
GetIntegerOption - parse 'op' to an integer if we can, else used default 'def'.
|
int[] |
getK(weka.core.Instances D)
Get K - get the number of values associated with each label L.
|
static String |
getRelationName(String name)
GetRelationName - get, e.g., 'Music' from 'Music: -C 6'
|
static String |
getShortMethodName(String method)
GetShortMethodName - get, e.g., 'BR' from 'meka.classifiers.multilabel.BR'.
|
static double[][] |
getXfromD(weka.core.Instances D)
GetXfromD - Extract attributes as a double X[][] from Instances D.
|
static double[] |
getxfromInstance(weka.core.Instance xy)
GetxfromInstances - Extract attributes as a double x[] from an Instance.
|
static double[][] |
getYfromD(weka.core.Instances D)
GetXfromD - Extract labels as a double Y[][] from Instances D.
|
static String |
hashMapToString(HashMap<?,?> map) |
static String |
hashMapToString(HashMap<?,?> map,
int dp)
Print out a HashMap nicely.
|
static weka.core.Instance |
keepAttributesAt(weka.core.Instance x,
int[] indicesToRemove,
int lim)
Delete all attributes from an instance 'x' except those indexed by 'indicesToRemove[]', up to the 'lim'-th attribute.
|
static weka.core.Instances |
keepAttributesAt(weka.core.Instances D,
int[] indicesToRemove,
int lim)
Delete all attributes from a dataset 'D' except those indexed by 'indicesToRemove[]', up to the 'lim'-th attribute.
|
static double[] |
labelCardinalities(ArrayList<int[]> Y)
LabelCardinalities - return the frequency of each label of dataset D.
|
static double[] |
labelCardinalities(weka.core.Instances D)
LabelCardinalities - return the frequency of each label of dataset D.
|
static double |
labelCardinality(weka.core.Instances D)
LabelCardinality - return the label cardinality of dataset D.
|
static double |
labelCardinality(weka.core.Instances D,
int L)
LabelCardinality - return the label cardinality of dataset D of L labels.
|
static double |
labelCardinality(int[][] Y)
LabelCardinality - return the label cardinality of label data Y.
|
static double |
labelCardinality(int[][] Y,
int j)
LabelCardinality - return the average number of times the j-th label is relevant in label data Y.
|
static Object |
loadObject(String filename)
Load Object - load the Object stored in 'filename'.
|
static void |
main(String[] args)
For retrieving some dataset statistics on the command line.
|
static Object |
maxItem(HashMap<?,Double> map)
maxItem - argmax function for a HashMap
|
static String |
mostCommonCombination(weka.core.Instances D)
MostCommonCombination - Most common label combination in D.
|
static String |
mostCommonCombination(weka.core.Instances D,
int L)
MostCommonCombination - Most common label combination in D (of L labels).
|
static int |
numberOfUniqueCombinations(weka.core.Instances D)
Get the number of unique label combinations in a dataset
|
static int |
peekClassIndex(File file)
Attempts to determine the number of classes/class index from the
specified file.
|
static String[] |
permute(String s)
Permute -- e.g., permute("AB") returns ["AB","BA"]
|
static int[] |
predictionsToRanking(double[] predictions)
Transforms the predictions into a ranking array.
|
static void |
prepareData(weka.core.Instances data)
Prepares the class index of the data.
|
static String |
printAsTextMatrix(double[][] M) |
static void |
pruneCountHashMap(HashMap<?,Integer> hm,
int p)
PruneCountHashMap - remove entries in hm = {(label,count)} where 'count' is no more than 'p'.
|
static HashMap<?,Integer> |
pruneCountHashMapBasedAsAFractionOf(HashMap<?,Integer> hm,
double p,
int N) |
static void |
randomize(int[] array,
Random r)
Deprecated.
|
static weka.core.Instances |
replaceZasAttributes(weka.core.Instances D,
double[][] Z,
int L)
ReplaceZasAttributes - data Z[][] will be the new attributes in D.
|
static weka.core.Instances |
replaceZasClasses(weka.core.Instances D,
double[][] Z,
int L)
ReplaceZasClasses - data Z[][] will be the new class labels in D.
|
static void |
saveObject(Object object,
String filename)
Save Object - save 'object' into file 'filename'.
|
static weka.core.Instance |
setLabelsMissing(weka.core.Instance x)
SetLabelsMissing - Set all labels in x to missing.
|
static weka.core.Instance |
setLabelsMissing(weka.core.Instance x,
int L)
SetLabelsMissing - Set all (L) labels in x to missing.
|
static weka.core.Instances |
setLabelsMissing(weka.core.Instances D)
SetLabelsMissing - Set all labels in D to missing.
|
static weka.core.Instance |
setTemplate(weka.core.Instance x,
weka.core.Instance x_template,
weka.core.Instances D_template)
SetTemplate - returns a copy of x_template, set with x's attributes, and set to dataset D_template (of which x_template) is a template of this.
|
static weka.core.Instance |
setTemplate(weka.core.Instance x,
weka.core.Instances instancesTemplate) |
static weka.core.Instance |
setValues(weka.core.Instance x,
double[] z,
int L)
SetValues - set the attribute values in Instsance x (having L labels) to z[].
|
static String |
toBinaryString(int l,
int L)
ToBinaryString - use to go through all 'L' binary combinations.
|
static String |
toBitString(double[] d)
ToBitString - returns a String representation of d[].
|
static String |
toBitString(weka.core.Instance x,
int L)
ToBitString - returns a String representation of x = [0,0,1,0,1,0,0,0], e.g., "000101000".
|
static String |
toBitString(int[] i)
ToBitString - returns a String representation of i[].
|
static String |
toDebugString(weka.core.Instance x) |
static String |
toDebugString(weka.core.Instances D) |
static double[] |
toDoubleArray(weka.core.Instance x)
Instance with L labels to double[] of length L, where L = x.classIndex().
|
static double[] |
toDoubleArray(weka.core.Instance x,
int L)
Instance with L labels to double[] of length L.
|
static double[] |
toDoubleArray(String s)
To Double Arary - Convert something like "[1.0,2.0]" to [1.0,2.0]
|
static double[] |
toDoubleArray(String[] s)
To Double Arary - Convert something like ["1.0","2.0"] to [1.0,2.0]
|
static List |
toIndicesSet(double[] x,
double t)
To Indices Set - return the indices in x[], whose values are greater than t, e.g., [0.3,0.0,0.5,0.8],0.4 to {2,3}.
|
static List<Integer> |
toIndicesSet(weka.core.Instance x,
int L)
To Indices Set - return the indices in x, whose values are greater than 1.
|
static List |
toIndicesSet(int[] x)
To Indices Set - return the indices in x[], whose values are greater than 0, e.g., [0,0,1,1] to {2,3}.
|
static int[] |
toIntArray(double[] z,
double t)
Deprecated.
|
static int[] |
toIntArray(weka.core.Instance x,
int L)
ToIntArray - raw instance to int[] representation
|
static int[] |
toIntArray(String s)
ToIntArray - Return an int[] from a String, e.g., "[0,1,2,0]" to [0,1,2,3].
|
static int[] |
toIntArray(String[] s)
ToIntArray - Return an int[] from a String[], e.g., ["0","1","2","3"] to [0,1,2,3].
|
static int[] |
toPrimitive(Integer[] a) |
static int[] |
toSparseIntArray(weka.core.Instance x,
int L)
To Sparse Int Array - A sparse String representation, e.g., [1,34,73].
|
static List |
toSubIndicesSet(weka.core.Instance x,
int[] sub_indices)
To Sub Indices Set - return the indices out of 'sub_indices', in x, whose values are greater than 1.
|
static ArrayList<weka.classifiers.evaluation.Prediction> |
toWekaPredictions(int[] y,
double[] p)
Convert to Weka (multi-target) Predictions.
|
public static final String[] getDatasetOptions(weka.core.Instances instances)
public static final String getDatasetName(weka.core.Instances instances)
public static final String getRelationName(String name)
name
- dataset namepublic static final String getShortMethodName(String method)
method
- long method name@Deprecated public static final int[] gen_indices(int L)
@Deprecated public static final void randomize(int[] array, Random r)
public static final double[] toDoubleArray(weka.core.Instance x, int L)
public static final double[] toDoubleArray(weka.core.Instance x)
public static final String toBitString(weka.core.Instance x, int L)
public static final String toBitString(int[] i)
public static final String toBitString(double[] d)
public static final double[] fromBitString(String s)
public static final int[] toIntArray(String s)
public static final int[] toIntArray(String[] s)
public static ArrayList<weka.classifiers.evaluation.Prediction> toWekaPredictions(int[] y, double[] p)
public static final List toSubIndicesSet(weka.core.Instance x, int[] sub_indices)
public static final List toIndicesSet(double[] x, double t)
public static final List toIndicesSet(int[] x)
public static final List<Integer> toIndicesSet(weka.core.Instance x, int L)
public static final int[] toSparseIntArray(weka.core.Instance x, int L)
public static final int[] fromSparseString(String s)
public static final int[] toIntArray(weka.core.Instance x, int L)
@Deprecated public static final int[] toIntArray(double[] z, double t)
public static final double[] toDoubleArray(String s)
public static final double[] toDoubleArray(String[] s)
public static final double labelCardinality(weka.core.Instances D)
public static final double labelCardinality(weka.core.Instances D, int L)
public static final double labelCardinality(int[][] Y, int j)
public static final double labelCardinality(int[][] Y)
public static final double[] labelCardinalities(weka.core.Instances D)
public static final double[] labelCardinalities(ArrayList<int[]> Y)
public static final double emptyVectors(int[][] Y)
public static final String mostCommonCombination(weka.core.Instances D)
public static final String mostCommonCombination(weka.core.Instances D, int L)
public static final int bitDifference(int[] y1, int[] y2)
public static final int bitCount(String s)
public static final int char2int(char c)
public static final HashMap<String,Integer> countCombinations(weka.core.Instances D, int L)
D
- datasetL
- number of labelspublic static final HashMap<LabelSet,Integer> countCombinationsSparse(weka.core.Instances D, int L)
D
- datasetL
- number of labelscountCombinations(Instances,int)
public static final HashMap<String,Integer> classCombinationCounts(weka.core.Instances D)
D
- dataset@Deprecated public static String encodeValue(int[] s)
@Deprecated public static int[] decodeValue(String a)
public static final Object maxItem(HashMap<?,Double> map)
public static final Object argmax(HashMap<?,Integer> map)
public static final int numberOfUniqueCombinations(weka.core.Instances D)
public static final weka.core.Instance deleteAttributesAt(weka.core.Instance x, int[] indicesToRemove)
x
- instanceindicesToRemove
- array of attribute indicespublic static final weka.core.Instance keepAttributesAt(weka.core.Instance x, int[] indicesToRemove, int lim)
x
- instanceindicesToRemove
- array of attribute indiceslim
- excludingpublic static final weka.core.Instances deleteAttributesAt(weka.core.Instances D, int[] indicesToRemove)
D
- datasetindicesToRemove
- array of attribute indicespublic static final weka.core.Instances keepAttributesAt(weka.core.Instances D, int[] indicesToRemove, int lim)
D
- datasetindicesToRemove
- array of attribute indiceslim
- excludingpublic static final weka.core.Instance setTemplate(weka.core.Instance x, weka.core.Instances instancesTemplate)
public static final weka.core.Instance setTemplate(weka.core.Instance x, weka.core.Instance x_template, weka.core.Instances D_template)
public static final weka.core.Instance copyValues(weka.core.Instance x_dest, weka.core.Instance x_src, int from, int offset)
public static final weka.core.Instance copyValues(weka.core.Instance x_dest, weka.core.Instance x_src, int[] indices)
public static final weka.core.Instance setValues(weka.core.Instance x, double[] z, int L)
public static String printAsTextMatrix(double[][] M)
public static void pruneCountHashMap(HashMap<?,Integer> hm, int p)
public static HashMap<?,Integer> pruneCountHashMapBasedAsAFractionOf(HashMap<?,Integer> hm, double p, int N)
public static weka.core.Instances setLabelsMissing(weka.core.Instances D)
public static weka.core.Instance setLabelsMissing(weka.core.Instance x)
public static weka.core.Instance setLabelsMissing(weka.core.Instance x, int L)
public static final weka.core.Instances combineInstances(weka.core.Instances D1, weka.core.Instances D2)
public static final String toDebugString(weka.core.Instances D)
public static final String toDebugString(weka.core.Instance x)
public static int[] toPrimitive(Integer[] a)
public static final String toBinaryString(int l, int L)
l
- the number to permuteL
- number of labelsA.toDoubleArray(int, int)
public static String hashMapToString(HashMap<?,?> map, int dp)
map
- HashMapdp
- decimal point precision (-1 for no limitation)public static int getIntegerOption(String op, int def)
public static void clearLabels(weka.core.Instance x)
public static double[][] getXfromD(weka.core.Instances D)
public static double[][] getYfromD(weka.core.Instances D)
public static double[] getxfromInstance(weka.core.Instance xy)
public static weka.core.Instances replaceZasAttributes(weka.core.Instances D, double[][] Z, int L)
D
- dataset (of N instances)Z
- attribute space (of N rows, H columns)L
- number of classes / labels.public static weka.core.Instances replaceZasClasses(weka.core.Instances D, double[][] Z, int L)
D
- dataset (of N instances)Z
- attribute space (of N rows, H columns)L
- column to add Z from in Dpublic int[] getK(weka.core.Instances D)
D
- a datasetpublic static final Object loadObject(String filename) throws Exception
Exception
public static final void saveObject(Object object, String filename) throws Exception
Exception
public static void fixRelationName(weka.core.Instances data)
data
- the dataset to fixpublic static void fixRelationName(weka.core.Instances data, int numClassAtts)
data
- the dataset to fixnumClassAtts
- the number of class attributes (0 for none, >0 for attributes at start, <0 for attributes at end)public static void prepareData(weka.core.Instances data) throws Exception
data
- the data to prepareException
- if preparation failspublic static int peekClassIndex(File file)
file
- the file to inspectpublic static final void main(String[] args) throws Exception
Exception
public static final int[] predictionsToRanking(double[] predictions)
predictions
- The predictions array.Copyright © 2017. All Rights Reserved.