ML.net error: Could not find input column 'MonthNr,DayNr,roc_1,roc_2
Hello,
This is my first time I am trying to put down code for ML.net and train a model and have encountered a problem that I don't understand.
I have created a SQL database table like below:
CREATE TABLE AAPL1day
(
[INDEX] INT IDENTITY(1,1) PRIMARY KEY,
[DateTime] SMALLDATETIME NOT NULL,
[MonthNr] FLOAT NOT NULL,
[DayNr] FLOAT NOT NULL
[roc_1] FLOAT
[roc_2] FLOAT
[hypo_upordown7days] FLOAT
);
This is the first rows in the database:
- The Feature columns that I am trying to use for training are:
MonthNr, DayNr, roc_1, roc_2
- The Hypothes column or Target column is the below for which I am trying to make predictions:, hypo_upordown7days
I have now for my first time trying to put down the code in order to train and predict with below code.
But I do get this error on the line: var model = pipeline.Fit(split.TrainSet);
"System.ArgumentOutOfRangeException: 'Could not find input column 'MonthNr,DayNr,roc_1,roc_2'
Parameter name: inputSchema'"
Below is my complete code for what I don't understand where the problem is?
[code]
void trainModel()
{
// Set up MLContext
var mlContext = new MLContext();
// Define database connection parameters
string databaseName = "AAPL";
string connectionString = "Server=localhost;Database=" + databaseName + ";Trusted_Connection=True;";
var tableName = "AAPL_1day";
// Get the column names from the database table
var columnNames = GetColumnNamesFromDatabaseTable(connectionString, tableName);
// Generate the list of column conditions for IS NOT NULL
var columnConditions = columnNames.Select(column => $"{column} IS NOT NULL");
// Create the SELECT query with the column conditions
var query = $"SELECT * FROM {tableName} WHERE {string.Join(" AND ", columnConditions)}";
// Create the database source with the dynamically generated query
var databaseSource = new DatabaseSource(SqlClientFactory.Instance, connectionString, query);
// Load the data from the database
var loader = mlContext.Data.CreateDatabaseLoader<AAPLData>();
var dataView = loader.Load(databaseSource);
// Split the data into train and test sets
var split = mlContext.Data.TrainTestSplit(dataView, testFraction: 0.35);
// Exclude columns with specific names and include columns with "hypo" substring
var includedColumns = columnNames
.Where(columnName => (!columnName.Equals("INDEX", StringComparison.OrdinalIgnoreCase) &&
!columnName.Equals("DateTime", StringComparison.OrdinalIgnoreCase)) ||
columnName == "hypo_upordown7days");
// Define the data pipeline
var pipeline = mlContext.Transforms.Conversion.MapValueToKey("hypo_upordown7days")
.Append(mlContext.Transforms.Concatenate("Features", string.Join(",", includedColumns.Where(columnName => columnName != "hypo_upordown7days"))))
.Append(mlContext.Transforms.NormalizeMinMax("Features"))
.Append(mlContext.Transforms.DropColumns("Features"))
.Append(mlContext.Transforms.Conversion.MapKeyToValue("hypo_upordown7days"));
// Train the model
var model = pipeline.Fit(split.TrainSet);
// Make predictions
var predictions = model.Transform(split.TestSet);
// Evaluate the model
var metrics = mlContext.BinaryClassification.Evaluate(predictions, labelColumnName: "hypo_upordown7days");
// Retrieve the evaluation metrics
var accuracy = metrics.Accuracy;
var auc = metrics.AreaUnderRocCurve;
MessageBox.Show($"Accuracy: {accuracy}\n" + $"AUC: {auc}");
}
// Define a class to hold your data
public class AAPLData
{
[LoadColumn(2)] public float MonthNr { get; set; }
[LoadColumn(3)] public float DayNr { get; set; }
[LoadColumn(4)] public float roc_1 { get; set; }
[LoadColumn(5)] public float roc_2 { get; set; }
[LoadColumn(6)] public float hypo_upordown7days { get; set; }
}
public class AAPLPrediction
{
[ColumnName("PredictedLabel")]
public byte Prediction { get; set; }
}
public static List<string> GetColumnNamesFromDatabaseTable(string connectionString, string tableName)
{
var columnNames = new List<string>();
using (var connection = new SqlConnection(connectionString))
{
connection.Open();
// Get the schema information for the table
var schemaTable = connection.GetSchema("Columns", new[] { null, null, tableName });
foreach (DataRow row in schemaTable.Rows)
{
// Extract the column name from the schema table
var columnName = row["COLUMN_NAME"].ToString();
columnNames.Add(columnName);
}
}
return columnNames;
}
[/code]