Skip to content
This repository was archived by the owner on Nov 19, 2020. It is now read-only.
This repository was archived by the owner on Nov 19, 2020. It is now read-only.

Potential bug in RandomForest or C45Learning #249

@larsbeck

Description

@larsbeck

Hi,

I am getting an IndexOutOfBoundsException in line 640 of C45Learning in the current 3.1.0-alpha release, when executing the following code (I am aware of the "insert code" feature, but it doesn't seem to work with the following code...so sorry for the bad formatting):

using System;
using System.Collections.Generic;
using System.Globalization;
using System.IO;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using Accord.MachineLearning;
using Accord.MachineLearning.DecisionTrees;
using Accord.MachineLearning.DecisionTrees.Learning;
using Accord.MachineLearning.VectorMachines;
using Accord.MachineLearning.VectorMachines.Learning;
using Accord.Statistics.Kernels;
using CsvHelper;
using Microsoft.VisualBasic.FileIO;

namespace YuriMLClass
{
class Program
{
static void Main(string[] args)
{
CultureInfo.DefaultThreadCurrentCulture = CultureInfo.InvariantCulture;
var times = ReadCSV("ALL.times");
var features = ReadCSV("ALL.features");
var didSolve = times.Select(list => list.Select(d => d < 5000).ToList()).ToList();

        var foldCount = 10;
        for (int i = 0; i < foldCount; i++)
        {
            var elementsPerFold = didSolve.Count / foldCount;
            var y_test = didSolve.Skip(i * elementsPerFold).Take(elementsPerFold);
            var y_train = didSolve.Except(y_test).ToList();
            var x_test = features.Skip(i * elementsPerFold).Take(elementsPerFold);
            var x_train = features.Except(x_test);

            var allSolverPredictions = new List<bool[]>();
            for (int j = 0; j < y_train.First().Count; j++)
            {
                var y_train_current_solver = y_train.Select(list => list.Skip(j).First()).Select(b => b ? 1 : 0);
                var randomForestLearning = new RandomForestLearning() { Trees = 10 };
                var currentSolverPredictions = new List<bool>();

                var randomForest = randomForestLearning.Learn(x_train.Select(list => list.ToArray()).ToArray(),
                    y_train_current_solver.ToArray());

                foreach (var test_instance in x_test)
                {
                    var compute = randomForest.Compute(test_instance.ToArray());
                    currentSolverPredictions.Add(compute != 0);
                }
                allSolverPredictions.Add(currentSolverPredictions.ToArray());
            }
        }
    }

    private static List<List<double>> ReadCSV(string filename)
    {
        var returnList = new List<List<double>>();
        using (var parser = new TextFieldParser(filename))
        {
            parser.TextFieldType = FieldType.Delimited;
            parser.SetDelimiters(",");
            int i = 0;
            while (!parser.EndOfData)
            {
                var fields = parser.ReadFields();

                if (i == 0)
                {
                    i++;
                    continue;
                }
                //Process row
                var timeValues = fields.Skip(1).Select(double.Parse).ToList();
                returnList.Add(timeValues);
            }
        }
        return returnList;
    }
}

}

ALL.zip

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions