How to: Combine and Compare String Collections (LINQ)
This example shows how to merge files that contain lines of text and then sort the results. Specifically, it shows how to perform a simple concatenation, a union, and an intersection on the two sets of text lines.
To set up the project and the text files
Copy these names into a text file that is named names1.txt and save it in your solution folder:
Bankov, Peter Holm, Michael Garcia, Hugo Potra, Cristina Noriega, Fabricio Aw, Kam Foo Beebe, Ann Toyoshima, Tim Guy, Wey Yuan Garcia, Debra
Copy these names into a text file that is named names2.txt and save it in your solution folder. Note that the two files have some names in common.
Liu, Jinghao Bankov, Peter Holm, Michael Garcia, Hugo Beebe, Ann Gilchrist, Beth Myrcha, Jacek Giakoumakis, Leo McLin, Nkenge El Yassir, Mehdi
Example
Class ConcatenateStrings
Shared Sub Main()
' Create the IEnumerable data sources.
Dim fileA As String() = System.IO.File.ReadAllLines("../../../names1.txt")
Dim fileB As String() = System.IO.File.ReadAllLines("../../../names2.txt")
' Simple concatenation and sort.
Dim concatQuery = fileA.Concat(fileB).OrderBy(Function(name) name)
' Pass the query variable to another function for execution
OutputQueryResults(concatQuery, "Simple concatenation and sort. Duplicates are preserved:")
' New query. Concatenate files and remove duplicates
Dim uniqueNamesQuery = fileA.Union(fileB).OrderBy(Function(name) name)
OutputQueryResults(uniqueNamesQuery, "Union removes duplicate names:")
' New query. Find the names that occur in both files.
Dim commonNamesQuery = fileA.Intersect(fileB)
OutputQueryResults(commonNamesQuery, "Merge based on intersect: ")
' New query in three steps for better readability
' First filter each list separately
Dim nameToSearch As String = "Garcia"
Dim mergeQueryA As IEnumerable(Of String) = From name In fileA _
Let n = name.Split(New Char() {","c}) _
Where n(0) = nameToSearch _
Select name
Dim mergeQueryB = From name In fileB _
Let n = name.Split(New Char() {","c}) _
Where n(0) = nameToSearch _
Select name
' Create a new query to concatenate and sort results. Duplicates are removed in Union.
' Note that none of the queries actually executed until the call to OutputQueryResults.
Dim mergeSortQuery = mergeQueryA.Union(mergeQueryB).OrderBy(Function(str) str)
' Now execute mergeSortQuery
OutputQueryResults(mergeSortQuery, "Concat based on partial name match """ & nameToSearch & """ from each list:")
' Keep console window open in debug mode.
Console.WriteLine("Press any key to exit.")
Console.ReadKey()
End Sub
Shared Sub OutputQueryResults(ByVal query As IEnumerable(Of String), ByVal message As String)
Console.WriteLine(System.Environment.NewLine & message)
For Each item As String In query
Console.WriteLine(item)
Next
Console.WriteLine(query.Count & " total names in list")
End Sub
End Class
' Output:
' Simple concatenation and sort. Duplicates are preserved:
' Aw, Kam Foo
' Bankov, Peter
' Bankov, Peter
' Beebe, Ann
' Beebe, Ann
' El Yassir, Mehdi
' Garcia, Debra
' Garcia, Hugo
' Garcia, Hugo
' Giakoumakis, Leo
' Gilchrist, Beth
' Guy, Wey Yuan
' Holm, Michael
' Holm, Michael
' Liu, Jinghao
' McLin, Nkenge
' Myrcha, Jacek
' Noriega, Fabricio
' Potra, Cristina
' Toyoshima, Tim
' 20 total names in list
' Union removes duplicate names:
' Aw, Kam Foo
' Bankov, Peter
' Beebe, Ann
' El Yassir, Mehdi
' Garcia, Debra
' Garcia, Hugo
' Giakoumakis, Leo
' Gilchrist, Beth
' Guy, Wey Yuan
' Holm, Michael
' Liu, Jinghao
' McLin, Nkenge
' Myrcha, Jacek
' Noriega, Fabricio
' Potra, Cristina
' Toyoshima, Tim
' 16 total names in list
' Merge based on intersect:
' Bankov, Peter
' Holm, Michael
' Garcia, Hugo
' Beebe, Ann
' 4 total names in list
' Concat based on partial name match "Garcia" from each list:
' Garcia, Debra
' Garcia, Hugo
' 2 total names in list
class MergeStrings
{
static void Main(string[] args)
{
//Put text files in your solution folder
string[] fileA = System.IO.File.ReadAllLines(@"../../../names1.txt");
string[] fileB = System.IO.File.ReadAllLines(@"../../../names2.txt");
//Simple concatenation and sort. Duplicates are preserved.
IEnumerable<string> concatQuery =
fileA.Concat(fileB).OrderBy(s => s);
// Pass the query variable to another function for execution.
OutputQueryResults(concatQuery, "Simple concatenate and sort. Duplicates are preserved:");
// Concatenate and remove duplicate names based on
// default string comparer.
IEnumerable<string> uniqueNamesQuery =
fileA.Union(fileB).OrderBy(s => s);
OutputQueryResults(uniqueNamesQuery, "Union removes duplicate names:");
// Find the names that occur in both files (based on
// default string comparer).
IEnumerable<string> commonNamesQuery =
fileA.Intersect(fileB);
OutputQueryResults(commonNamesQuery, "Merge based on intersect:");
// Find the matching fields in each list. Merge the two
// results by using Concat, and then
// sort using the default string comparer.
string nameMatch = "Garcia";
IEnumerable<String> tempQuery1 =
from name in fileA
let n = name.Split(',')
where n[0] == nameMatch
select name;
IEnumerable<string> tempQuery2 =
from name2 in fileB
let n2 = name2.Split(',')
where n2[0] == nameMatch
select name2;
IEnumerable<string> nameMatchQuery =
tempQuery1.Concat(tempQuery2).OrderBy(s => s);
OutputQueryResults(nameMatchQuery, String.Format("Concat based on partial name match \"{0}\":", nameMatch));
// Keep the console window open in debug mode.
Console.WriteLine("Press any key to exit");
Console.ReadKey();
}
static void OutputQueryResults(IEnumerable<string> query, string message)
{
Console.WriteLine(System.Environment.NewLine + message);
foreach (string item in query)
{
Console.WriteLine(item);
}
Console.WriteLine("{0} total names in list", query.Count());
}
}
/* Output:
Simple concatenate and sort. Duplicates are preserved:
Aw, Kam Foo
Bankov, Peter
Bankov, Peter
Beebe, Ann
Beebe, Ann
El Yassir, Mehdi
Garcia, Debra
Garcia, Hugo
Garcia, Hugo
Giakoumakis, Leo
Gilchrist, Beth
Guy, Wey Yuan
Holm, Michael
Holm, Michael
Liu, Jinghao
McLin, Nkenge
Myrcha, Jacek
Noriega, Fabricio
Potra, Cristina
Toyoshima, Tim
20 total names in list
Union removes duplicate names:
Aw, Kam Foo
Bankov, Peter
Beebe, Ann
El Yassir, Mehdi
Garcia, Debra
Garcia, Hugo
Giakoumakis, Leo
Gilchrist, Beth
Guy, Wey Yuan
Holm, Michael
Liu, Jinghao
McLin, Nkenge
Myrcha, Jacek
Noriega, Fabricio
Potra, Cristina
Toyoshima, Tim
16 total names in list
Merge based on intersect:
Bankov, Peter
Holm, Michael
Garcia, Hugo
Beebe, Ann
4 total names in list
Concat based on partial name match "Garcia":
Garcia, Debra
Garcia, Hugo
Garcia, Hugo
3 total names in list
*/
Compiling the Code
Create a Visual Studio project that targets the .NET Framework version 3.5. By default, the project has a reference to System.Core.dll and a using directive (C#) or Imports statement (Visual Basic) for the System.Linq namespace. In C# projects, add a using directive for the System.IO namespace.
Copy this code into your project.
Press F5 to compile and run the program.
Press any key to exit the console window.