방법: LINQ 쿼리와 정규식 결합

업데이트: 2007년 11월

이 예제에서는 Regex 클래스를 사용하여 텍스트 문자열에서 좀 더 복잡한 비교를 위해 정규식을 만드는 방법을 보여 줍니다. LINQ 쿼리를 사용하면 정규식을 사용하여 검색하려는 파일만 정확하게 필터링하고 결과를 구체화하는 작업이 쉬워집니다.


Class LinqRegExVB

    Shared Sub Main()

        ' Root folder to query, along with all subfolders.
        ' Modify this path as necessary.
        Dim startFolder As String = "C:\program files\Microsoft Visual Studio 9.0\"

        ' Take a snapshot of the file system.
        Dim fileList As IEnumerable(Of System.IO.FileInfo) = GetFiles(startFolder)

        ' Create a regular expression to find all things "Visual".
        Dim searchTerm As System.Text.RegularExpressions.Regex = _
            New System.Text.RegularExpressions.Regex("Visual (Basic|C#|C\+\+|J#|SourceSafe|Studio)")

        ' Search the contents of each .htm file.
        ' Remove the where clause to find even more matches!
        ' This query produces a list of files where a match
        ' was found, and a list of the matches in that file.
        ' Note: Explicit typing of "Match" in select clause.
        ' This is required because MatchCollection is not a 
        ' generic IEnumerable collection.
        Dim queryMatchingFiles = From afile In fileList _
                                Where afile.Extension = ".htm" _
                                Let fileText = System.IO.File.ReadAllText(afile.FullName) _
                                Let matches = searchTerm.Matches(fileText) _
                                Where (searchTerm.Matches(fileText).Count > 0) _
                                Select Name = afile.FullName, _
                                       Matches = From match As System.Text.RegularExpressions.Match In matches _
                                                 Select match.Value

        ' Execute the query.
        Console.WriteLine("The term " & searchTerm.ToString() & " was found in:")

        For Each fileMatches In queryMatchingFiles
            ' Trim the path a bit, then write 
            ' the file name in which a match was found.
            Dim s = fileMatches.Name.Substring(startFolder.Length - 1)

            ' For this file, write out all the matching strings
            For Each match In fileMatches.Matches
                Console.WriteLine("  " + match)

        ' Keep the console window open in debug mode
        Console.WriteLine("Press any key to exit")
    End Sub

    ' Function to retrieve a list of files. Note that this is a copy
    ' of the file information.
    Shared Function GetFiles(ByVal root As String) As IEnumerable(Of System.IO.FileInfo)
        Return From file In My.Computer.FileSystem.GetFiles _
                  (root, FileIO.SearchOption.SearchAllSubDirectories, "*.*") _
               Select New System.IO.FileInfo(file)
    End Function

End Class
class QueryWithRegEx
    public static void Main()
        // Modify this path as necessary.
        string startFolder = @"c:\program files\Microsoft Visual Studio 9.0\";

        // Take a snapshot of the file system.
        IEnumerable<System.IO.FileInfo> fileList = GetFiles(startFolder);

        // Create the regular expression to find all things "Visual".
        System.Text.RegularExpressions.Regex searchTerm = 
            new System.Text.RegularExpressions.Regex(@"Visual (Basic|C#|C\+\+|J#|SourceSafe|Studio)");

        // Search the contents of each .htm file.
        // Remove the where clause to find even more matches!
        // This query produces a list of files where a match
        // was found, and a list of the matches in that file.
        // Note: Explicit typing of "Match" in select clause.
        // This is required because MatchCollection is not a 
        // generic IEnumerable collection.
        var queryMatchingFiles =
            from file in fileList
            where file.Extension == ".htm"
            let fileText = System.IO.File.ReadAllText(file.FullName)
            let matches = searchTerm.Matches(fileText)
            where searchTerm.Matches(fileText).Count > 0
            select new
                name = file.FullName,
                matches = from System.Text.RegularExpressions.Match match in matches
                          select match.Value

        // Execute the query.
        Console.WriteLine("The term \"{0}\" was found in:", searchTerm.ToString());

        foreach (var v in queryMatchingFiles)
            // Trim the path a bit, then write 
            // the file name in which a match was found.
            string s = v.name.Substring(startFolder.Length - 1);

            // For this file, write out all the matching strings
            foreach (var v2 in v.matches)
                Console.WriteLine("  " + v2);

        // Keep the console window open in debug mode
        Console.WriteLine("Press any key to exit");

    // This method assumes that the application has discovery 
    // permissions for all folders under the specified path.
    static IEnumerable<System.IO.FileInfo> GetFiles(string path)
        if (!System.IO.Directory.Exists(path))
            throw new System.IO.DirectoryNotFoundException();

        string[] fileNames = null;
        List<System.IO.FileInfo> files = new List<System.IO.FileInfo>();

        fileNames = System.IO.Directory.GetFiles(path, "*.*", System.IO.SearchOption.AllDirectories);
        foreach (string name in fileNames)
            files.Add(new System.IO.FileInfo(name));
        return files;

RegEx 검색에서 반환하는 MatchCollection 개체도 쿼리할 수 있습니다. 이 예제에서는 각 일치하는 항목의 값만 결과에서 생성됩니다. 하지만 LINQ를 사용하여 해당 컬렉션에서 모든 종류의 필터링, 정렬 및 그룹화를 수행할 수도 있습니다. MatchCollection은 제네릭이 아닌 IEnumerable 컬렉션이므로 쿼리에서 범위 변수의 형식을 명시적으로 지정해야 합니다.

코드 컴파일

  • .NET Framework 버전 3.5를 대상으로 하는 Visual Studio 프로젝트를 만듭니다. 기본적으로 프로젝트에는 System.Core.dll에 대한 참조 및 System.Linq 네임스페이스에 대한 using 지시문(C#) 또는 Imports 문(Visual Basic)이 있습니다. C# 프로젝트에서는 System.IO 네임스페이스에 대한 using 지시문을 추가합니다.

  • 프로젝트에 이 코드를 복사합니다.

  • F5 키를 눌러 프로그램을 컴파일하고 실행합니다.

  • 아무 키나 눌러 콘솔 창을 닫습니다.

