FeatureSelectionCatalog.SelectFeaturesBasedOnMutualInformation Method
Definition
Important
Some information relates to prerelease product that may be substantially modified before it’s released. Microsoft makes no warranties, express or implied, with respect to the information provided here.
Overloads
SelectFeaturesBasedOnMutualInformation(TransformsCatalog+FeatureSelectionTransforms, InputOutputColumnPair[], String, Int32, Int32) |
Create a MutualInformationFeatureSelectingEstimator, which selects the top k slots across all specified columns ordered by their mutual information with the label column. |
SelectFeaturesBasedOnMutualInformation(TransformsCatalog+FeatureSelectionTransforms, String, String, String, Int32, Int32) |
Create a MutualInformationFeatureSelectingEstimator, which selects the top k slots across all specified columns ordered by their mutual information with the label column. |
SelectFeaturesBasedOnMutualInformation(TransformsCatalog+FeatureSelectionTransforms, InputOutputColumnPair[], String, Int32, Int32)
Create a MutualInformationFeatureSelectingEstimator, which selects the top k slots across all specified columns ordered by their mutual information with the label column.
public static Microsoft.ML.Transforms.MutualInformationFeatureSelectingEstimator SelectFeaturesBasedOnMutualInformation (this Microsoft.ML.TransformsCatalog.FeatureSelectionTransforms catalog, Microsoft.ML.InputOutputColumnPair[] columns, string labelColumnName = "Label", int slotsInOutput = 1000, int numberOfBins = 256);
static member SelectFeaturesBasedOnMutualInformation : Microsoft.ML.TransformsCatalog.FeatureSelectionTransforms * Microsoft.ML.InputOutputColumnPair[] * string * int * int -> Microsoft.ML.Transforms.MutualInformationFeatureSelectingEstimator
<Extension()>
Public Function SelectFeaturesBasedOnMutualInformation (catalog As TransformsCatalog.FeatureSelectionTransforms, columns As InputOutputColumnPair(), Optional labelColumnName As String = "Label", Optional slotsInOutput As Integer = 1000, Optional numberOfBins As Integer = 256) As MutualInformationFeatureSelectingEstimator
Parameters
The transform's catalog.
- columns
- InputOutputColumnPair[]
Specifies the names of the input columns for the transformation, and their respective output column names.
- labelColumnName
- String
The name of the label column.
- slotsInOutput
- Int32
The maximum number of slots to preserve in the output. The number of slots to preserve is taken across all input columns.
- numberOfBins
- Int32
Max number of bins used to approximate mutual information between each input column and the label column. Power of 2 recommended.
Returns
Examples
using System;
using System.Collections.Generic;
using Microsoft.ML;
using Microsoft.ML.Data;
namespace Samples.Dynamic
{
public static class SelectFeaturesBasedOnMutualInformationMultiColumn
{
public static void Example()
{
// Create a new ML context, for ML.NET operations. It can be used for
// exception tracking and logging, as well as the source of randomness.
var mlContext = new MLContext();
// Get a small dataset as an IEnumerable and convert it to an IDataView.
var rawData = GetData();
// Printing the columns of the input data.
Console.WriteLine($"NumericVectorA NumericVectorB");
foreach (var item in rawData)
Console.WriteLine("{0,-25} {1,-25}", string.Join(",", item
.NumericVectorA), string.Join(",", item.NumericVectorB));
// NumericVectorA NumericVectorB
// 4,0,6 7,8,9
// 0,5,7 7,9,0
// 4,0,6 7,8,9
// 0,5,7 7,8,0
var data = mlContext.Data.LoadFromEnumerable(rawData);
// We define a MutualInformationFeatureSelectingEstimator that selects
// the top k slots in a feature vector based on highest mutual
// information between that slot and a specified label.
// Multi column example : This pipeline transform two columns using the
// provided parameters.
var pipeline = mlContext.Transforms.FeatureSelection
.SelectFeaturesBasedOnMutualInformation(new InputOutputColumnPair[]
{ new InputOutputColumnPair("NumericVectorA"), new
InputOutputColumnPair("NumericVectorB") }, labelColumnName: "Label",
slotsInOutput: 4);
var transformedData = pipeline.Fit(data).Transform(data);
var convertedData = mlContext.Data.CreateEnumerable<TransformedData>(
transformedData, true);
// Printing the columns of the transformed data.
Console.WriteLine($"NumericVectorA NumericVectorB");
foreach (var item in convertedData)
Console.WriteLine("{0,-25} {1,-25}", string.Join(",", item
.NumericVectorA), string.Join(",", item.NumericVectorB));
// NumericVectorA NumericVectorB
// 4,0,6 9
// 0,5,7 0
// 4,0,6 9
// 0,5,7 0
}
private class TransformedData
{
public float[] NumericVectorA { get; set; }
public float[] NumericVectorB { get; set; }
}
public class NumericData
{
public bool Label;
[VectorType(3)]
public float[] NumericVectorA { get; set; }
[VectorType(3)]
public float[] NumericVectorB { get; set; }
}
/// <summary>
/// Returns a few rows of numeric data.
/// </summary>
public static IEnumerable<NumericData> GetData()
{
var data = new List<NumericData>
{
new NumericData
{
Label = true,
NumericVectorA = new float[] { 4, 0, 6 },
NumericVectorB = new float[] { 7, 8, 9 },
},
new NumericData
{
Label = false,
NumericVectorA = new float[] { 0, 5, 7 },
NumericVectorB = new float[] { 7, 9, 0 },
},
new NumericData
{
Label = true,
NumericVectorA = new float[] { 4, 0, 6 },
NumericVectorB = new float[] { 7, 8, 9 },
},
new NumericData
{
Label = false,
NumericVectorA = new float[] { 0, 5, 7 },
NumericVectorB = new float[] { 7, 8, 0 },
}
};
return data;
}
}
}
Applies to
SelectFeaturesBasedOnMutualInformation(TransformsCatalog+FeatureSelectionTransforms, String, String, String, Int32, Int32)
Create a MutualInformationFeatureSelectingEstimator, which selects the top k slots across all specified columns ordered by their mutual information with the label column.
public static Microsoft.ML.Transforms.MutualInformationFeatureSelectingEstimator SelectFeaturesBasedOnMutualInformation (this Microsoft.ML.TransformsCatalog.FeatureSelectionTransforms catalog, string outputColumnName, string inputColumnName = default, string labelColumnName = "Label", int slotsInOutput = 1000, int numberOfBins = 256);
static member SelectFeaturesBasedOnMutualInformation : Microsoft.ML.TransformsCatalog.FeatureSelectionTransforms * string * string * string * int * int -> Microsoft.ML.Transforms.MutualInformationFeatureSelectingEstimator
<Extension()>
Public Function SelectFeaturesBasedOnMutualInformation (catalog As TransformsCatalog.FeatureSelectionTransforms, outputColumnName As String, Optional inputColumnName As String = Nothing, Optional labelColumnName As String = "Label", Optional slotsInOutput As Integer = 1000, Optional numberOfBins As Integer = 256) As MutualInformationFeatureSelectingEstimator
Parameters
The transform's catalog.
- outputColumnName
- String
Name of the column resulting from the transformation of inputColumnName
.
- inputColumnName
- String
Name of column to transform. If set to null
, the value of the outputColumnName
will be used as source.
- labelColumnName
- String
The name of the label column.
- slotsInOutput
- Int32
The maximum number of slots to preserve in the output. The number of slots to preserve is taken across all input columns.
- numberOfBins
- Int32
Max number of bins used to approximate mutual information between each input column and the label column. Power of 2 recommended.
Returns
Examples
using System;
using System.Collections.Generic;
using Microsoft.ML;
using Microsoft.ML.Data;
namespace Samples.Dynamic
{
public static class SelectFeaturesBasedOnMutualInformation
{
public static void Example()
{
// Create a new ML context, for ML.NET operations. It can be used for
// exception tracking and logging, as well as the source of randomness.
var mlContext = new MLContext();
// Get a small dataset as an IEnumerable and convert it to an IDataView.
var rawData = GetData();
// Printing the columns of the input data.
Console.WriteLine($"Label NumericVector");
foreach (var item in rawData)
Console.WriteLine("{0,-25} {1,-25}", item.Label, string.Join(",",
item.NumericVector));
// Label NumericVector
// True 4,0,6
// False 0,5,7
// True 4,0,6
// False 0,5,7
var data = mlContext.Data.LoadFromEnumerable(rawData);
// We define a MutualInformationFeatureSelectingEstimator that selects
// the top k slots in a feature vector based on highest mutual
// information between that slot and a specified label.
var pipeline = mlContext.Transforms.FeatureSelection
.SelectFeaturesBasedOnMutualInformation(outputColumnName:
"NumericVector", labelColumnName: "Label", slotsInOutput: 2);
// The pipeline can then be trained, using .Fit(), and the resulting
// transformer can be used to transform data.
var transformedData = pipeline.Fit(data).Transform(data);
var convertedData = mlContext.Data.CreateEnumerable<TransformedData>(
transformedData, true);
// Printing the columns of the transformed data.
Console.WriteLine($"NumericVector");
foreach (var item in convertedData)
Console.WriteLine("{0,-25}", string.Join(",", item.NumericVector));
// NumericVector
// 4,0
// 0,5
// 4,0
// 0,5
}
public class TransformedData
{
public float[] NumericVector { get; set; }
}
public class NumericData
{
public bool Label;
[VectorType(3)]
public float[] NumericVector { get; set; }
}
/// <summary>
/// Returns a few rows of numeric data.
/// </summary>
public static IEnumerable<NumericData> GetData()
{
var data = new List<NumericData>
{
new NumericData
{
Label = true,
NumericVector = new float[] { 4, 0, 6 },
},
new NumericData
{
Label = false,
NumericVector = new float[] { 0, 5, 7 },
},
new NumericData
{
Label = true,
NumericVector = new float[] { 4, 0, 6 },
},
new NumericData
{
Label = false,
NumericVector = new float[] { 0, 5, 7 },
}
};
return data;
}
}
}