方法 : CSV テキスト ファイルの列値の計算 (LINQ)

更新 : 2007 年 11 月

この例では、.csv ファイルの列に対して、合計、平均、最小値、および最大値などの集計を実行する方法を示します。この例で示される原則は、別の型の構造化テキストにも適用できます。

ソース ファイルを作成するには

  • 以下の行を scores.csv という名前のファイルにコピーし、ソリューション フォルダに保存します。最初の列が学生 ID、残りの列が 4 つの試験の点数を表すとします。

    111, 97, 92, 81, 60
    112, 75, 84, 91, 39
    113, 88, 94, 65, 91
    114, 97, 89, 85, 82
    115, 35, 72, 91, 70
    116, 99, 86, 90, 94
    117, 93, 92, 80, 87
    118, 92, 90, 83, 78
    119, 68, 79, 88, 92
    120, 99, 82, 81, 79
    121, 96, 85, 91, 60
    122, 94, 92, 91, 91
    

使用例

Class SumColumns

    Public Shared Sub Main()

        Dim lines As String() = System.IO.File.ReadAllLines("../../../scores.csv")

        ' Specifies the column to compute
        ' This value could be passed in at runtime.
        Dim exam = 3

        ' Spreadsheet format:
        ' Student ID    Exam#1  Exam#2  Exam#3  Exam#4
        ' 111,          97,     92,     81,     60
        ' one is added to skip over the first column
        ' which holds the student ID.
        SumColumn(lines, exam + 1)
        Console.WriteLine()
        MultiColumns(lines)

        ' Keep the console window open in debug mode.
        Console.WriteLine("Press any key to exit...")
        Console.ReadKey()

    End Sub

    Shared Sub SumColumn(ByVal lines As IEnumerable(Of String), ByVal col As Integer)

        ' This query performs two steps:
        ' split the string into a string array
        ' convert the specified element to
        ' integer and select it.
        Dim columnQuery = From line In lines _
                           Let x = line.Split(",") _
                           Select Convert.ToInt32(x(col))

        ' Execute and cache the results for performance.
        ' Only needed with very large files.
        Dim results = columnQuery.ToList()

        ' Perform aggregate calculations 
        ' on the column specified by col.
        Dim avgScore = Aggregate score In results Into Average(score)
        Dim minScore = Aggregate score In results Into Min(score)
        Dim maxScore = Aggregate score In results Into Max(score)

        Console.WriteLine("Single Column Query:")
        Console.WriteLine("Exam #{0}: Average:{1:##.##} High Score:{2} Low Score:{3}", _
                     col, avgScore, maxScore, minScore)


    End Sub

    Shared Sub MultiColumns(ByVal lines As IEnumerable(Of String))

        Console.WriteLine("Multi Column Query:")

        ' Create the query. It will produce nested sequences. 
        ' multiColQuery performs these steps:
        ' 1) convert the string to a string array
        ' 2) skip over the "Student ID" column and take the rest
        ' 3) convert each field to an int and select that 
        '    entire sequence as one row in the results.
        Dim multiColQuery = From line In lines _
                            Let fields = line.Split(",") _
                            Select From str In fields Skip 1 _
                                        Select Convert.ToInt32(str)

        Dim results = multiColQuery.ToList()

        ' Find out how many columns we have.
        Dim columnCount = results(0).Count()

        ' Perform aggregate calculations on each column.            
        ' One loop for each score column in scores.
        ' We can use a for loop because we have already
        ' executed the multiColQuery in the call to ToList.

        For j As Integer = 0 To columnCount - 1
            Dim column = j
            Dim res2 = From row In results _
                       Select row.ElementAt(column)

            ' Perform aggregate calculations 
            ' on the column specified by col.
            Dim avgScore = Aggregate score In res2 Into Average(score)
            Dim minScore = Aggregate score In res2 Into Min(score)
            Dim maxScore = Aggregate score In res2 Into Max(score)

            ' Add 1 to column numbers because exams in this course start with #1
            Console.WriteLine("Exam #{0} Average: {1:##.##} High Score: {2} Low Score: {3}", _
                              column + 1, avgScore, maxScore, minScore)

        Next
    End Sub

End Class
' Output:
' Single Column Query:
' Exam #4: Average:76.92 High Score:94 Low Score:39

' Multi Column Query:
' Exam #1 Average: 86.08 High Score: 99 Low Score: 35
' Exam #2 Average: 86.42 High Score: 94 Low Score: 72
' Exam #3 Average: 84.75 High Score: 91 Low Score: 65
' Exam #4 Average: 76.92 High Score: 94 Low Score: 39

class SumColumns
{
    static void Main(string[] args)
    {
        string[] lines = System.IO.File.ReadAllLines(@"../../../scores.csv");

        // Specifies the column to compute
        int exam = 3;

        // Spreadsheet format:
        // Student ID    Exam#1  Exam#2  Exam#3  Exam#4
        // 111,          97,     92,     81,     60
        // one is added to skip over the first column
        // which holds the student ID.
        SingleColumn(lines, exam + 1);
        Console.WriteLine();
        MultiColumns(lines);

        Console.WriteLine("Press any key to exit");
        Console.ReadKey();
    }

    static void SingleColumn(IEnumerable<string> strs, int examNum)
    {
        Console.WriteLine("Single Column Query:");

        // examNum specifies the column to run the 
        // calculations on. This could also be
        // passed in dynamically at runtime.             

        // columnQuery is a IEnumerable<int>
        // This query performs two steps:
        // 1) split the string into a string[]
        // 2) convert the specified element to
        //    int and select it.
        var columnQuery =
            from line in strs
            let x = line.Split(',')
            select Convert.ToInt32(x[examNum]);

        // Execute and cache the results for performance.
        // Only needed with very large files.
        var results = columnQuery.ToList();

        // Perform aggregate calculations 
        // on the column specified by examNum.
        double average = results.Average();
        int max = results.Max();
        int min = results.Min();

        Console.WriteLine("Exam #{0}: Average:{1:##.##} High Score:{2} Low Score:{3}",
                 examNum, average, max, min);
    }
    static void MultiColumns(IEnumerable<string> strs)
    {
        Console.WriteLine("Multi Column Query:");

        // Create the columnQuery. Explicit typing is used
        // to make clear that the columnQuery will produce 
        // nested sequences. You can also just use 'var'.
        // The columnQuery performs these steps:
        // 1) convert the string to a string[]
        // 2) skip over the "Student ID" column and take the rest
        // 3) convert each string to an int and select that 
        //    entire sequence as one row in the results.
        IEnumerable<IEnumerable<int>> query =
            from line in strs
            let x = line.Split(',')
            let y = x.Skip(1)
            select (from str in y
                    select Convert.ToInt32(str));

        // Execute and cache the results for performance.
        // ToArray could also be used here.
        var results = query.ToList();

        // Find out how many columns we have.
        int columnCount = results[0].Count();

        // Perform aggregate calculations on each column.            
        // One loop for each score column in scores.
        // We can use a for loop because we have already
        // executed the columnQuery in the call to ToList.
        for (int column = 0; column < columnCount; column++)
        {
            var res2 = from row in results
                       select row.ElementAt(column);
            double average = res2.Average();
            int max = res2.Max();
            int min = res2.Min();

            // 1 is added to column because Exam numbers
            // begin with 1
            Console.WriteLine("Exam #{0} Average: {1:##.##} High Score: {2} Low Score: {3}",
                          column + 1, average, max, min);
        }
    }
}
/* Output:
    Single Column Query:
    Exam #4: Average:76.92 High Score:94 Low Score:39

    Multi Column Query:
    Exam #1 Average: 86.08 High Score: 99 Low Score: 35
    Exam #2 Average: 86.42 High Score: 94 Low Score: 72
    Exam #3 Average: 84.75 High Score: 91 Low Score: 65
    Exam #4 Average: 76.92 High Score: 94 Low Score: 39
 */

ファイルがタブ区切りファイルの場合は、Split メソッドの引数を \t に変更するだけです。

コードのコンパイル方法

  • .NET Framework Version 3.5 を対象とする Visual Studio プロジェクトを作成します。プロジェクトには、System.Core.dll への参照と、System.Linq 名前空間に対する using ディレクティブ (C#) または Imports ステートメント (Visual Basic) が既定で含まれます。

  • このコードをプロジェクト内にコピーします。

  • F5 キーを押して、プログラムをコンパイルおよび実行します。

  • 任意のキーを押して、コンソール ウィンドウを終了します。

参照

概念

LINQ と文字列

LINQ とファイル ディレクトリ