From 64a2437bd9a4d2387d476b780d64d0a83245e3c3 Mon Sep 17 00:00:00 2001 From: "Andy De George (from Dev Box)" Date: Mon, 9 Feb 2026 12:23:22 -0800 Subject: [PATCH 1/2] Initial --- .../5.0/icu-globalization-api.md | 1 + .../7.0/icu-globalization-api.md | 1 + docs/core/extensions/globalization-icu.md | 2 +- docs/fundamentals/toc.yml | 2 - .../base-types/best-practices-strings.md | 103 ++++++ .../csharp/collation-elements/Program.cs | 24 ++ .../collation-elements.csproj | 9 + .../csharp/icu-demo/Program.cs | 9 + .../csharp/icu-demo/icu-demo.csproj | 9 + .../csharp/security-filtering/Program.cs | 27 ++ .../security-filtering.csproj | 9 + .../vb/collation-elements/Program.vb | 28 ++ .../collation-elements.vbproj | 9 + .../vb/icu-demo/Program.vb | 13 + .../vb/icu-demo/icu-demo.vbproj | 9 + .../vb/security-filtering/Program.vb | 29 ++ .../security-filtering.vbproj | 9 + .../string-comparison-net-5-plus.md | 334 +----------------- 18 files changed, 292 insertions(+), 335 deletions(-) create mode 100644 docs/standard/base-types/snippets/best-practices-strings/csharp/collation-elements/Program.cs create mode 100644 docs/standard/base-types/snippets/best-practices-strings/csharp/collation-elements/collation-elements.csproj create mode 100644 docs/standard/base-types/snippets/best-practices-strings/csharp/icu-demo/Program.cs create mode 100644 docs/standard/base-types/snippets/best-practices-strings/csharp/icu-demo/icu-demo.csproj create mode 100644 docs/standard/base-types/snippets/best-practices-strings/csharp/security-filtering/Program.cs create mode 100644 docs/standard/base-types/snippets/best-practices-strings/csharp/security-filtering/security-filtering.csproj create mode 100644 docs/standard/base-types/snippets/best-practices-strings/vb/collation-elements/Program.vb create mode 100644 docs/standard/base-types/snippets/best-practices-strings/vb/collation-elements/collation-elements.vbproj create mode 100644 docs/standard/base-types/snippets/best-practices-strings/vb/icu-demo/Program.vb create mode 100644 docs/standard/base-types/snippets/best-practices-strings/vb/icu-demo/icu-demo.vbproj create mode 100644 docs/standard/base-types/snippets/best-practices-strings/vb/security-filtering/Program.vb create mode 100644 docs/standard/base-types/snippets/best-practices-strings/vb/security-filtering/security-filtering.vbproj diff --git a/docs/core/compatibility/globalization/5.0/icu-globalization-api.md b/docs/core/compatibility/globalization/5.0/icu-globalization-api.md index 69480cd3612e2..de315df9393e1 100644 --- a/docs/core/compatibility/globalization/5.0/icu-globalization-api.md +++ b/docs/core/compatibility/globalization/5.0/icu-globalization-api.md @@ -84,4 +84,5 @@ No action is required on the part of the developer. However, if you wish to cont ## See also +- [Best practices for comparing strings in .NET](../../../../standard/base-types/best-practices-strings.md) - [Globalization APIs use ICU libraries on Windows Server](../7.0/icu-globalization-api.md) diff --git a/docs/core/compatibility/globalization/7.0/icu-globalization-api.md b/docs/core/compatibility/globalization/7.0/icu-globalization-api.md index 629b7fd7bc36c..a3770c15024f2 100644 --- a/docs/core/compatibility/globalization/7.0/icu-globalization-api.md +++ b/docs/core/compatibility/globalization/7.0/icu-globalization-api.md @@ -60,4 +60,5 @@ If you wish to continue using NLS globalization APIs, you can set a [runtime swi ## See also +- [Best practices for comparing strings in .NET](../../../../standard/base-types/best-practices-strings.md) - [Globalization APIs use ICU libraries on Windows 10](../5.0/icu-globalization-api.md) diff --git a/docs/core/extensions/globalization-icu.md b/docs/core/extensions/globalization-icu.md index c90341bdf32d5..87852023dc6c7 100644 --- a/docs/core/extensions/globalization-icu.md +++ b/docs/core/extensions/globalization-icu.md @@ -79,7 +79,7 @@ By default, . - Use or for comparisons as your safe default for culture-agnostic string matching. - Use comparisons with or for better performance. +- Enable [code analyzers](../../fundamentals/code-analysis/overview.md) such as [CA1307](../../fundamentals/code-analysis/quality-rules/ca1307.md), [CA1309](../../fundamentals/code-analysis/quality-rules/ca1309.md), and [CA1310](../../fundamentals/code-analysis/quality-rules/ca1310.md) to detect potentially incorrect string comparisons in your code. - Use string operations that are based on when you display output to the user. - Use the non-linguistic or values instead of string operations based on when the comparison is linguistically irrelevant (symbolic, for example). - Use the method instead of the method when you normalize strings for comparison. @@ -90,6 +91,28 @@ However, evaluating two strings for equality or sort order doesn't yield a singl In addition, string comparisons using different versions of .NET or using .NET on different operating systems or operating system versions may return different results. For more information, see [Strings and the Unicode Standard](xref:System.String#Unicode). +### Globalization libraries: .NET vs .NET Framework + +.NET and .NET Framework use different globalization libraries, which can affect string comparison behavior: + +- **.NET** uses the [International Components for Unicode (ICU)](https://icu.unicode.org/) libraries for globalization functionality across all platforms (Windows, Linux, macOS). ICU is an industry-standard Unicode implementation that provides consistent behavior across operating systems. +- **.NET Framework** uses [National Language Support (NLS)](/windows/win32/intl/national-language-support) APIs on Windows, which is a Windows-specific globalization system. + +Because ICU and NLS implement different logic in their linguistic comparers, the same string comparison code can produce different results depending on which runtime you're using. Consider the following example that formats a number as currency using a German culture: + +:::code language="csharp" source="./snippets/best-practices-strings/csharp/icu-demo/Program.cs"::: +:::code language="vb" source="./snippets/best-practices-strings/vb/icu-demo/Program.vb"::: + +When running on .NET Framework, the output is `"100,00 €"` (using the euro symbol). On .NET, the output is `"100,00 ¤"` (using the international currency symbol). This difference occurs because ICU treats currency as a property of a country or region, not just a language, whereas the language-only German culture (`"de"`) doesn't specify a country. + +If your application requires the older NLS behavior when running on .NET, you can enable it through [runtime configuration](../../core/runtime-config/globalization.md#nls). However, for new applications, we recommend using explicit `StringComparison` parameters to make string comparison behavior clear and consistent. + +For detailed information about behavior changes and migration guidance, see: + +- [Globalization APIs use ICU libraries on Windows 10](../../core/compatibility/globalization/5.0/icu-globalization-api.md) +- [Globalization APIs use ICU libraries on Windows Server 2019](../../core/compatibility/globalization/7.0/icu-globalization-api.md) +- [.NET globalization and ICU](../../core/extensions/globalization-icu.md) + ### String comparisons that use the current culture One criterion involves using the conventions of the current culture when comparing strings. Comparisons that are based on the current culture use the thread's current culture or locale. If the culture isn't set by the user, it defaults to the operating system's setting. You should always use comparisons that are based on the current culture when data is linguistically relevant, and when it reflects culture-sensitive user interaction. @@ -131,6 +154,13 @@ In this case, because "file:" is meant to be interpreted as a non-linguistic, cu :::code language="csharp" source="./snippets/best-practices-strings/csharp/turkish/Program.cs" id="ordinal"::: :::code language="vb" source="./snippets/best-practices-strings/vb/turkish/Program.vb" id="ordinal"::: +Another security-sensitive scenario involves filtering or validation code. Consider the following example that attempts to detect HTML-sensitive characters: + +:::code language="csharp" source="./snippets/best-practices-strings/csharp/security-filtering/Program.cs"::: +:::code language="vb" source="./snippets/best-practices-strings/vb/security-filtering/Program.vb"::: + +The incorrect version uses the default linguistic search, which might not find literal `'<'` or `'&'` characters in all cultures. The corrected version explicitly uses `StringComparison.Ordinal` to ensure the literal characters are matched. For filtering, validation, and security-sensitive comparisons, always use ordinal comparison. + ### Ordinal string operations Specifying the or value in a method call signifies a non-linguistic comparison in which the features of natural languages are ignored. Methods that are invoked with these values base string operation decisions on simple byte comparisons instead of casing or equivalence tables that are parameterized by culture. In most cases, this approach best fits the intended interpretation of strings while making code faster and more reliable. @@ -168,6 +198,30 @@ Both and overloads that don't include a argument (including the equality operator). In any case, we recommend that you call an overload that has a parameter. +#### Linguistic comparison and collation elements + +Unlike ordinal comparisons, *linguistic* comparisons decompose strings into *collation elements* rather than individual characters. A collation element is a linguistic unit that may consist of one or more characters. For example, the accented character "é" can be represented as: + +- A single character: `'\u00E9'` +- A base character plus combining accent: `'e'` + `'\u0301'` + +When performing linguistic comparisons, these different representations are treated as semantically equivalent. This behavior is important for Unicode normalization but can produce unexpected results if you're expecting exact character-by-character matching. + +The following example demonstrates how Unicode normalization affects string searching and comparison: + +:::code language="csharp" source="./snippets/best-practices-strings/csharp/collation-elements/Program.cs"::: +:::code language="vb" source="./snippets/best-practices-strings/vb/collation-elements/Program.vb"::: + +As the example shows, ordinal comparison requires an exact byte-for-byte match, while linguistic comparison understands that `"\u00E9"` and `"e\u0301"` represent the same accented character. + +#### Culture-aware linguistic comparison + +Culture-aware comparisons extend linguistic comparison with culture-specific rules. For example, in the Hungarian alphabet, when "dz" appears as consecutive characters, it's treated as a single letter distinct from "d" or "z". This means that a Hungarian culture-aware comparer treats "dz" as a single collation element. + +The example in the preceding code snippet also demonstrates this behavior: when using the Hungarian culture (`"hu-HU"`), the string `"endz"` doesn't end with `"z"` because "dz" is considered a single letter. When using the invariant culture, `"endz"` does end with `"z"` because the characters are treated independently. + +Be aware that linguistic and culture-aware comparers can undergo behavioral adjustments over time as languages evolve and Unicode standards are updated. Ordinal comparisons never change because they perform exact binary matching. + ### String operations that use the invariant culture Comparisons with the invariant culture use the property returned by the static property. This behavior is the same on all systems; it translates any characters outside its range into what it believes are equivalent invariant characters. This policy can be useful for maintaining one set of string behavior across cultures, but it often provides unexpected results. @@ -278,6 +332,55 @@ if (Commands.Contains(command)) In .NET 9, `SearchValues` was extended to support searching for substrings within a larger string. For an example, see [`SearchValues` expansion](../../core/whats-new/dotnet-9/libraries.md#searchvalues-expansion). +## Default search and comparison types + +The following tables list the default search and comparison types for various string and string-like APIs. If the caller provides an explicit `CultureInfo` or `StringComparison` parameter, that parameter overrides the default behavior shown here. + +### String methods + +| API | Default behavior | Remarks | +|---------------------------|------------------|------------------------------------------| +| `string.Compare` | CurrentCulture | | +| `string.CompareTo` | CurrentCulture | | +| `string.Contains` | Ordinal | | +| `string.EndsWith` | Ordinal | (when the first parameter is a `char`) | +| `string.EndsWith` | CurrentCulture | (when the first parameter is a `string`) | +| `string.Equals` | Ordinal | | +| `string.GetHashCode` | Ordinal | | +| `string.IndexOf` | Ordinal | (when the first parameter is a `char`) | +| `string.IndexOf` | CurrentCulture | (when the first parameter is a `string`) | +| `string.IndexOfAny` | Ordinal | | +| `string.LastIndexOf` | Ordinal | (when the first parameter is a `char`) | +| `string.LastIndexOf` | CurrentCulture | (when the first parameter is a `string`) | +| `string.LastIndexOfAny` | Ordinal | | +| `string.Replace` | Ordinal | | +| `string.Split` | Ordinal | | +| `string.StartsWith` | Ordinal | (when the first parameter is a `char`) | +| `string.StartsWith` | CurrentCulture | (when the first parameter is a `string`) | +| `string.ToLower` | CurrentCulture | | +| `string.ToLowerInvariant` | InvariantCulture | | +| `string.ToUpper` | CurrentCulture | | +| `string.ToUpperInvariant` | InvariantCulture | | +| `string.Trim` | Ordinal | | +| `string.TrimEnd` | Ordinal | | +| `string.TrimStart` | Ordinal | | +| `string == string` | Ordinal | | +| `string != string` | Ordinal | | + +### MemoryExtensions methods + +Unlike `string` APIs, all `MemoryExtensions` APIs perform *Ordinal* searches and comparisons by default, with the following exceptions: + +| API | Default behavior | Remarks | +|-------------------------------------|------------------|---------------------------------------------| +| `MemoryExtensions.ToLower` | CurrentCulture | (when passed a null `CultureInfo` argument) | +| `MemoryExtensions.ToLowerInvariant` | InvariantCulture | | +| `MemoryExtensions.ToUpper` | CurrentCulture | (when passed a null `CultureInfo` argument) | +| `MemoryExtensions.ToUpperInvariant` | InvariantCulture | | + +> [!IMPORTANT] +> When converting code from consuming `string` to consuming `ReadOnlySpan`, behavioral changes may be introduced because `MemoryExtensions` methods default to ordinal comparison while some `string` methods default to culture-sensitive comparison. Always specify an explicit `StringComparison` parameter to avoid unexpected behavior changes. + ## Methods that perform string comparison indirectly Some non-string methods that have string comparison as a central operation use the type. The class includes six static properties that return instances whose methods perform the following types of string comparisons: diff --git a/docs/standard/base-types/snippets/best-practices-strings/csharp/collation-elements/Program.cs b/docs/standard/base-types/snippets/best-practices-strings/csharp/collation-elements/Program.cs new file mode 100644 index 0000000000000..beeb2ca9b1f54 --- /dev/null +++ b/docs/standard/base-types/snippets/best-practices-strings/csharp/collation-elements/Program.cs @@ -0,0 +1,24 @@ +using System; +using System.Globalization; + +// Demonstrate Unicode normalization with résumé +Console.WriteLine("=== Unicode Normalization Example ==="); +Console.WriteLine("resume".IndexOf("e", StringComparison.Ordinal)); // prints '1' +Console.WriteLine("r\u00E9sum\u00E9".IndexOf("e", StringComparison.Ordinal)); // prints '-1' +Console.WriteLine("r\u00E9sume\u0301".IndexOf("e", StringComparison.Ordinal)); // prints '5' +Console.WriteLine("re\u0301sum\u00E9".IndexOf("e", StringComparison.Ordinal)); // prints '1' +Console.WriteLine("re\u0301sume\u0301".IndexOf("e", StringComparison.Ordinal)); // prints '1' + +// Linguistic comparison +Console.WriteLine("\n=== Linguistic Comparison Example ==="); +Console.WriteLine("r\u00E9sum\u00E9".IndexOf("e")); // prints '-1' (not found) +Console.WriteLine("r\u00E9sum\u00E9".IndexOf("\u00E9")); // prints '1' +Console.WriteLine("\u00E9".IndexOf("e\u0301")); // prints '0' + +// Hungarian culture-aware comparison +Console.WriteLine("\n=== Hungarian Culture-Aware Example ==="); +CultureInfo.CurrentCulture = CultureInfo.GetCultureInfo("hu-HU"); +Console.WriteLine("endz".EndsWith("z")); // Prints 'False' + +CultureInfo.CurrentCulture = CultureInfo.InvariantCulture; +Console.WriteLine("endz".EndsWith("z")); // Prints 'True' diff --git a/docs/standard/base-types/snippets/best-practices-strings/csharp/collation-elements/collation-elements.csproj b/docs/standard/base-types/snippets/best-practices-strings/csharp/collation-elements/collation-elements.csproj new file mode 100644 index 0000000000000..66de39dddc4d9 --- /dev/null +++ b/docs/standard/base-types/snippets/best-practices-strings/csharp/collation-elements/collation-elements.csproj @@ -0,0 +1,9 @@ + + + + Exe + net9.0 + collation_elements + + + diff --git a/docs/standard/base-types/snippets/best-practices-strings/csharp/icu-demo/Program.cs b/docs/standard/base-types/snippets/best-practices-strings/csharp/icu-demo/Program.cs new file mode 100644 index 0000000000000..5a6ea4df97603 --- /dev/null +++ b/docs/standard/base-types/snippets/best-practices-strings/csharp/icu-demo/Program.cs @@ -0,0 +1,9 @@ +using System; +using System.Globalization; + +System.Threading.Thread.CurrentThread.CurrentCulture = new CultureInfo("de"); +string text = string.Format("{0:C}", 100); +Console.WriteLine($"Currency formatted: {text}"); + +// Output on .NET Framework (NLS): "100,00 €" +// Output on .NET (ICU): "100,00 ¤" diff --git a/docs/standard/base-types/snippets/best-practices-strings/csharp/icu-demo/icu-demo.csproj b/docs/standard/base-types/snippets/best-practices-strings/csharp/icu-demo/icu-demo.csproj new file mode 100644 index 0000000000000..1581e844c02c3 --- /dev/null +++ b/docs/standard/base-types/snippets/best-practices-strings/csharp/icu-demo/icu-demo.csproj @@ -0,0 +1,9 @@ + + + + Exe + net9.0 + icu_demo + + + diff --git a/docs/standard/base-types/snippets/best-practices-strings/csharp/security-filtering/Program.cs b/docs/standard/base-types/snippets/best-practices-strings/csharp/security-filtering/Program.cs new file mode 100644 index 0000000000000..a96cdc436cd74 --- /dev/null +++ b/docs/standard/base-types/snippets/best-practices-strings/csharp/security-filtering/Program.cs @@ -0,0 +1,27 @@ +using System; + +// +// THIS SAMPLE CODE IS INCORRECT. +// DO NOT USE IT IN PRODUCTION. +// +static bool ContainsHtmlSensitiveCharacters(string input) +{ + if (input.IndexOf("<") >= 0) { return true; } + if (input.IndexOf("&") >= 0) { return true; } + return false; +} + +// +// Corrected version using ordinal comparison +// +static bool ContainsHtmlSensitiveCharactersCorrect(string input) +{ + if (input.IndexOf("<", StringComparison.Ordinal) >= 0) { return true; } + if (input.IndexOf("&", StringComparison.Ordinal) >= 0) { return true; } + return false; +} + +// Test the functions +string testInput = "Hello "; +Console.WriteLine($"Incorrect version: {ContainsHtmlSensitiveCharacters(testInput)}"); +Console.WriteLine($"Correct version: {ContainsHtmlSensitiveCharactersCorrect(testInput)}"); diff --git a/docs/standard/base-types/snippets/best-practices-strings/csharp/security-filtering/security-filtering.csproj b/docs/standard/base-types/snippets/best-practices-strings/csharp/security-filtering/security-filtering.csproj new file mode 100644 index 0000000000000..d6f9d802595b3 --- /dev/null +++ b/docs/standard/base-types/snippets/best-practices-strings/csharp/security-filtering/security-filtering.csproj @@ -0,0 +1,9 @@ + + + + Exe + net9.0 + security_filtering + + + diff --git a/docs/standard/base-types/snippets/best-practices-strings/vb/collation-elements/Program.vb b/docs/standard/base-types/snippets/best-practices-strings/vb/collation-elements/Program.vb new file mode 100644 index 0000000000000..1393fd4422d9e --- /dev/null +++ b/docs/standard/base-types/snippets/best-practices-strings/vb/collation-elements/Program.vb @@ -0,0 +1,28 @@ +Imports System +Imports System.Globalization + +Module Program + Sub Main() + ' Demonstrate Unicode normalization with résumé + Console.WriteLine("=== Unicode Normalization Example ===") + Console.WriteLine("resume".IndexOf("e", StringComparison.Ordinal)) ' prints '1' + Console.WriteLine("r" & ChrW(&HE9) & "sum" & ChrW(&HE9)).IndexOf("e", StringComparison.Ordinal) ' prints '-1' + Console.WriteLine(("r" & ChrW(&HE9) & "sume" & ChrW(&H301)).IndexOf("e", StringComparison.Ordinal)) ' prints '5' + Console.WriteLine(("re" & ChrW(&H301) & "sum" & ChrW(&HE9)).IndexOf("e", StringComparison.Ordinal)) ' prints '1' + Console.WriteLine(("re" & ChrW(&H301) & "sume" & ChrW(&H301)).IndexOf("e", StringComparison.Ordinal)) ' prints '1' + + ' Linguistic comparison + Console.WriteLine(vbCrLf & "=== Linguistic Comparison Example ===") + Console.WriteLine(("r" & ChrW(&HE9) & "sum" & ChrW(&HE9)).IndexOf("e")) ' prints '-1' (not found) + Console.WriteLine(("r" & ChrW(&HE9) & "sum" & ChrW(&HE9)).IndexOf(ChrW(&HE9))) ' prints '1' + Console.WriteLine(ChrW(&HE9).IndexOf("e" & ChrW(&H301))) ' prints '0' + + ' Hungarian culture-aware comparison + Console.WriteLine(vbCrLf & "=== Hungarian Culture-Aware Example ===") + CultureInfo.CurrentCulture = CultureInfo.GetCultureInfo("hu-HU") + Console.WriteLine("endz".EndsWith("z")) ' Prints 'False' + + CultureInfo.CurrentCulture = CultureInfo.InvariantCulture + Console.WriteLine("endz".EndsWith("z")) ' Prints 'True' + End Sub +End Module diff --git a/docs/standard/base-types/snippets/best-practices-strings/vb/collation-elements/collation-elements.vbproj b/docs/standard/base-types/snippets/best-practices-strings/vb/collation-elements/collation-elements.vbproj new file mode 100644 index 0000000000000..66de39dddc4d9 --- /dev/null +++ b/docs/standard/base-types/snippets/best-practices-strings/vb/collation-elements/collation-elements.vbproj @@ -0,0 +1,9 @@ + + + + Exe + net9.0 + collation_elements + + + diff --git a/docs/standard/base-types/snippets/best-practices-strings/vb/icu-demo/Program.vb b/docs/standard/base-types/snippets/best-practices-strings/vb/icu-demo/Program.vb new file mode 100644 index 0000000000000..1d07f5f6abde6 --- /dev/null +++ b/docs/standard/base-types/snippets/best-practices-strings/vb/icu-demo/Program.vb @@ -0,0 +1,13 @@ +Imports System +Imports System.Globalization + +Module Program + Sub Main() + System.Threading.Thread.CurrentThread.CurrentCulture = New CultureInfo("de") + Dim text As String = String.Format("{0:C}", 100) + Console.WriteLine($"Currency formatted: {text}") + + ' Output on .NET Framework (NLS): "100,00 €" + ' Output on .NET (ICU): "100,00 ¤" + End Sub +End Module diff --git a/docs/standard/base-types/snippets/best-practices-strings/vb/icu-demo/icu-demo.vbproj b/docs/standard/base-types/snippets/best-practices-strings/vb/icu-demo/icu-demo.vbproj new file mode 100644 index 0000000000000..1581e844c02c3 --- /dev/null +++ b/docs/standard/base-types/snippets/best-practices-strings/vb/icu-demo/icu-demo.vbproj @@ -0,0 +1,9 @@ + + + + Exe + net9.0 + icu_demo + + + diff --git a/docs/standard/base-types/snippets/best-practices-strings/vb/security-filtering/Program.vb b/docs/standard/base-types/snippets/best-practices-strings/vb/security-filtering/Program.vb new file mode 100644 index 0000000000000..fdfae4ec18567 --- /dev/null +++ b/docs/standard/base-types/snippets/best-practices-strings/vb/security-filtering/Program.vb @@ -0,0 +1,29 @@ +Imports System + +Module Program + ' + ' THIS SAMPLE CODE IS INCORRECT. + ' DO NOT USE IT IN PRODUCTION. + ' + Function ContainsHtmlSensitiveCharacters(input As String) As Boolean + If input.IndexOf("<") >= 0 Then Return True + If input.IndexOf("&") >= 0 Then Return True + Return False + End Function + + ' + ' Corrected version using ordinal comparison + ' + Function ContainsHtmlSensitiveCharactersCorrect(input As String) As Boolean + If input.IndexOf("<", StringComparison.Ordinal) >= 0 Then Return True + If input.IndexOf("&", StringComparison.Ordinal) >= 0 Then Return True + Return False + End Function + + Sub Main() + ' Test the functions + Dim testInput As String = "Hello " + Console.WriteLine($"Incorrect version: {ContainsHtmlSensitiveCharacters(testInput)}") + Console.WriteLine($"Correct version: {ContainsHtmlSensitiveCharactersCorrect(testInput)}") + End Sub +End Module diff --git a/docs/standard/base-types/snippets/best-practices-strings/vb/security-filtering/security-filtering.vbproj b/docs/standard/base-types/snippets/best-practices-strings/vb/security-filtering/security-filtering.vbproj new file mode 100644 index 0000000000000..d6f9d802595b3 --- /dev/null +++ b/docs/standard/base-types/snippets/best-practices-strings/vb/security-filtering/security-filtering.vbproj @@ -0,0 +1,9 @@ + + + + Exe + net9.0 + security_filtering + + + diff --git a/docs/standard/base-types/string-comparison-net-5-plus.md b/docs/standard/base-types/string-comparison-net-5-plus.md index c6ddd04d137f0..84ac7c8816d3c 100644 --- a/docs/standard/base-types/string-comparison-net-5-plus.md +++ b/docs/standard/base-types/string-comparison-net-5-plus.md @@ -3,339 +3,9 @@ title: Behavior changes when comparing strings on .NET 5+ description: Learn about string-comparison behavior changes in .NET 5 and later versions on Windows. ms.topic: concept-article ms.date: 02/15/2022 +redirect_url: best-practices-strings --- # Behavior changes when comparing strings on .NET 5+ -.NET 5 introduces a runtime behavioral change where globalization APIs [use ICU by default](../../core/compatibility/globalization/5.0/icu-globalization-api.md) across all supported platforms. This is a departure from earlier versions of .NET Core and from .NET Framework, which utilize the operating system's national language support (NLS) functionality when running on Windows. For more information on these changes, including compatibility switches that can revert the behavior change, see [.NET globalization and ICU](../../core/extensions/globalization-icu.md). - -## Reason for change - -This change was introduced to unify .NET's globalization behavior across all supported operating systems. It also provides the ability for applications to bundle their own globalization libraries rather than depend on the OS's built-in libraries. For more information, see [the breaking change notification](../../core/compatibility/globalization/5.0/icu-globalization-api.md). - -## Behavioral differences - -If you use functions like `string.IndexOf(string)` without calling the overload that takes a argument, you might intend to perform an *ordinal* search, but instead you inadvertently take a dependency on culture-specific behavior. Since NLS and ICU implement different logic in their linguistic comparers, the results of methods like `string.IndexOf(string)` can return unexpected values. - -This can manifest itself even in places where you aren't always expecting globalization facilities to be active. For example, the following code can produce a different answer depending on the current runtime. - -```csharp -const string greeting = "Hel\0lo"; -Console.WriteLine($"{greeting.IndexOf("\0")}"); - -// The snippet prints: -// -// '3' when running on .NET Core 2.x - 3.x (Windows) -// '0' when running on .NET 5 or later (Windows) -// '0' when running on .NET Core 2.x - 3.x or .NET 5 (non-Windows) -// '3' when running on .NET Core 2.x or .NET 5+ (in invariant mode) - -string s = "Hello\r\nworld!"; -int idx = s.IndexOf("\n"); -Console.WriteLine(idx); - -// The snippet prints: -// -// '6' when running on .NET Core 3.1 -// '-1' when running on .NET 5 or .NET Core 3.1 (non-Windows OS) -// '-1' when running on .NET 5 (Windows 10 May 2019 Update or later) -// '6' when running on .NET 6+ (all Windows and non-Windows OSs) -``` - -For more information, see [Globalization APIs use ICU libraries on Windows](../../core/compatibility/globalization/5.0/icu-globalization-api.md). - -## Guard against unexpected behavior - -This section provides two options for dealing with unexpected behavior changes in .NET 5. - -### Enable code analyzers - -[Code analyzers](../../fundamentals/code-analysis/overview.md) can detect possibly buggy call sites. To help guard against any surprising behaviors, we recommend enabling .NET compiler platform (Roslyn) analyzers in your project. The analyzers help flag code that might inadvertently be using a linguistic comparer when an ordinal comparer was likely intended. The following rules should help flag these issues: - -- [CA1307: Specify StringComparison for clarity](../../fundamentals/code-analysis/quality-rules/ca1307.md) -- [CA1309: Use ordinal StringComparison](../../fundamentals/code-analysis/quality-rules/ca1309.md) -- [CA1310: Specify StringComparison for correctness](../../fundamentals/code-analysis/quality-rules/ca1310.md) - -These specific rules aren't enabled by default. To enable them and show any violations as build errors, set the following properties in your project file: - -```xml - - All - $(WarningsAsErrors);CA1307;CA1309;CA1310 - -``` - -The following snippet shows examples of code that produces the relevant code analyzer warnings or errors. - -```csharp -// -// Potentially incorrect code - answer might vary based on locale. -// -string s = GetString(); -// Produces analyzer warning CA1310 for string; CA1307 matches on char ',' -int idx = s.IndexOf(","); -Console.WriteLine(idx); - -// -// Corrected code - matches the literal substring ",". -// -string s = GetString(); -int idx = s.IndexOf(",", StringComparison.Ordinal); -Console.WriteLine(idx); - -// -// Corrected code (alternative) - searches for the literal ',' character. -// -string s = GetString(); -int idx = s.IndexOf(','); -Console.WriteLine(idx); -``` - -Similarly, when instantiating a sorted collection of strings or sorting an existing string-based collection, specify an explicit comparer. - -```csharp -// -// Potentially incorrect code - behavior might vary based on locale. -// -SortedSet mySet = new SortedSet(); -List list = GetListOfStrings(); -list.Sort(); - -// -// Corrected code - uses ordinal sorting; doesn't vary by locale. -// -SortedSet mySet = new SortedSet(StringComparer.Ordinal); -List list = GetListOfStrings(); -list.Sort(StringComparer.Ordinal); -``` - -### Revert back to NLS behaviors - -To revert .NET 5+ applications back to older NLS behaviors when running on Windows, follow the steps in [.NET Globalization and ICU](../../core/extensions/globalization-icu.md). This application-wide compatibility switch must be set at the application level. Individual libraries cannot opt-in or opt-out of this behavior. - -> [!TIP] -> We strongly recommend you enable the [CA1307](../../fundamentals/code-analysis/quality-rules/ca1307.md), [CA1309](../../fundamentals/code-analysis/quality-rules/ca1309.md), and [CA1310](../../fundamentals/code-analysis/quality-rules/ca1310.md) code analysis rules to help improve code hygiene and discover any existing latent bugs. For more information, see [Enable code analyzers](#enable-code-analyzers). - -## Affected APIs - -Most .NET applications won't encounter any unexpected behaviors due to the changes in .NET 5. However, due to the number of affected APIs and how foundational these APIs are to the wider .NET ecosystem, you should be aware of the potential for .NET 5 to introduce unwanted behaviors or to expose latent bugs that already exist in your application. - -The affected APIs include: - -- -- -- -- -- -- -- -- -- (most members) -- (most members) -- (when sorting arrays of strings) -- (when the list elements are strings) -- (when the keys are strings) -- (when the keys are strings) -- (when the set contains strings) - -> [!NOTE] -> This is not an exhaustive list of affected APIs. - -All of the above APIs use *linguistic* string searching and comparison using the thread's [current culture](xref:System.Threading.Thread.CurrentCulture), by default. The differences between *linguistic* and *ordinal* search and comparison are called out in the [Ordinal vs. linguistic search and comparison](#ordinal-vs-linguistic-search-and-comparison). - -Because ICU implements linguistic string comparisons differently from NLS, Windows-based applications that upgrade to .NET 5 from an earlier version of .NET Core or .NET Framework and that call one of the affected APIs may notice that the APIs begin exhibiting different behaviors. - -### Exceptions - -* If an API accepts an explicit `StringComparison` or `CultureInfo` parameter, that parameter overrides the API's default behavior. -* `System.String` members where the first parameter is of type `char` (for example, ) use ordinal searching, unless the caller passes an explicit `StringComparison` argument that specifies `CurrentCulture[IgnoreCase]` or `InvariantCulture[IgnoreCase]`. - -For a more detailed analysis of the default behavior of each API, see the [Default search and comparison types](#default-search-and-comparison-types) section. - -## Ordinal vs. linguistic search and comparison - -*Ordinal* (also known as *non-linguistic*) search and comparison decomposes a string into its individual `char` elements and performs a char-by-char search or comparison. For example, the strings `"dog"` and `"dog"` compare as *equal* under an `Ordinal` comparer, since the two strings consist of the exact same sequence of chars. However, `"dog"` and `"Dog"` compare as *not equal* under an `Ordinal` comparer, because they don't consist of the exact same sequence of chars. That is, uppercase `'D'`'s code point `U+0044` occurs before lowercase `'d'`'s code point `U+0064`, resulting in `"Dog"` sorting before `"dog"`. - -An `OrdinalIgnoreCase` comparer still operates on a char-by-char basis, but it eliminates case differences while performing the operation. Under an `OrdinalIgnoreCase` comparer, the char pairs `'d'` and `'D'` compare as *equal*, as do the char pairs `'á'` and `'Á'`. But the unaccented char `'a'` compares as *not equal* to the accented char `'á'`. - -Some examples of this are provided in the following table: - -| String 1 | String 2 | `Ordinal` comparison | `OrdinalIgnoreCase` comparison | -|------------|------------|----------------------|--------------------------------| -| `"dog"` | `"dog"` | equal | equal | -| `"dog"` | `"Dog"` | not equal | equal | -| `"resume"` | `"résumé"` | not equal | not equal | - -Unicode also allows strings to have several different in-memory representations. For example, an e-acute (é) can be represented in two possible ways: - -* A single literal `'é'` character (also written as `'\u00E9'`). -* A literal unaccented `'e'` character followed by a combining accent modifier character `'\u0301'`. - -This means that the following _four_ strings all display as `"résumé"`, even though their constituent pieces are different. The strings use a combination of literal `'é'` characters or literal unaccented `'e'` characters plus the combining accent modifier `'\u0301'`. - -* `"r\u00E9sum\u00E9"` -* `"r\u00E9sume\u0301"` -* `"re\u0301sum\u00E9"` -* `"re\u0301sume\u0301"` - -Under an ordinal comparer, none of these strings compare as equal to each other. This is because they all contain different underlying char sequences, even though when they're rendered to the screen, they all look the same. - -When performing a `string.IndexOf(..., StringComparison.Ordinal)` operation, the runtime looks for an exact substring match. The results are as follows. - -```csharp -Console.WriteLine("resume".IndexOf("e", StringComparison.Ordinal)); // prints '1' -Console.WriteLine("r\u00E9sum\u00E9".IndexOf("e", StringComparison.Ordinal)); // prints '-1' -Console.WriteLine("r\u00E9sume\u0301".IndexOf("e", StringComparison.Ordinal)); // prints '5' -Console.WriteLine("re\u0301sum\u00E9".IndexOf("e", StringComparison.Ordinal)); // prints '1' -Console.WriteLine("re\u0301sume\u0301".IndexOf("e", StringComparison.Ordinal)); // prints '1' -Console.WriteLine("resume".IndexOf("E", StringComparison.OrdinalIgnoreCase)); // prints '1' -Console.WriteLine("r\u00E9sum\u00E9".IndexOf("E", StringComparison.OrdinalIgnoreCase)); // prints '-1' -Console.WriteLine("r\u00E9sume\u0301".IndexOf("E", StringComparison.OrdinalIgnoreCase)); // prints '5' -Console.WriteLine("re\u0301sum\u00E9".IndexOf("E", StringComparison.OrdinalIgnoreCase)); // prints '1' -Console.WriteLine("re\u0301sume\u0301".IndexOf("E", StringComparison.OrdinalIgnoreCase)); // prints '1' -``` - -Ordinal search and comparison routines are never affected by the current thread's culture setting. - -*Linguistic* search and comparison routines decompose a string into *collation elements* and perform searches or comparisons on these elements. There's not necessarily a 1:1 mapping between a string's characters and its constituent collation elements. For example, a string of length 2 may consist of only a single collation element. When two strings are compared in a linguistic-aware fashion, the comparer checks whether the two strings' collation elements have the same semantic meaning, even if the string's literal characters are different. - -Consider again the string `"résumé"` and its four different representations. The following table shows each representation broken down into its collation elements. - -| String | As collation elements | -|------------------------|-------------------------------------------------| -| `"r\u00E9sum\u00E9"` | `"r" + "\u00E9" + "s" + "u" + "m" + "\u00E9"` | -| `"r\u00E9sume\u0301"` | `"r" + "\u00E9" + "s" + "u" + "m" + "e\u0301"` | -| `"re\u0301sum\u00E9"` | `"r" + "e\u0301" + "s" + "u" + "m" + "\u00E9"` | -| `"re\u0301sume\u0301"` | `"r" + "e\u0301" + "s" + "u" + "m" + "e\u0301"` | - -A collation element corresponds loosely to what readers would think of as a single character or cluster of characters. It's conceptually similar to a [grapheme cluster](character-encoding-introduction.md#grapheme-clusters) but encompasses a somewhat larger umbrella. - -Under a linguistic comparer, exact matches aren't necessary. Collation elements are instead compared based on their semantic meaning. For example, a linguistic comparer treats the substrings `"\u00E9"` and `"e\u0301"` as equal since they both semantically mean "a lowercase e with an acute accent modifier." This allows the `IndexOf` method to match the substring `"e\u0301"` within a larger string that contains the semantically equivalent substring `"\u00E9"`, as shown in the following code sample. - -```csharp -Console.WriteLine("r\u00E9sum\u00E9".IndexOf("e")); // prints '-1' (not found) -Console.WriteLine("r\u00E9sum\u00E9".IndexOf("\u00E9")); // prints '1' -Console.WriteLine("\u00E9".IndexOf("e\u0301")); // prints '0' -``` - -As a consequence of this, two strings of different lengths may compare as equal if a linguistic comparison is used. Callers should take care not to special-case logic that deals with string length in such scenarios. - -*Culture-aware* search and comparison routines are a special form of linguistic search and comparison routines. Under a culture-aware comparer, the concept of a collation element is extended to include information specific to the specified culture. - -For example, [in the Hungarian alphabet](https://en.wikipedia.org/wiki/Hungarian_alphabet), when the two characters \ appear back-to-back, they are considered their own unique letter distinct from either \ or \. This means that when \ is seen in a string, a Hungarian culture-aware comparer treats it as a single collation element. - -| String | As collation elements | Remarks | -|----------|-------------------------|--------------------------------------------| -| `"endz"` | `"e" + "n" + "d" + "z"` | (using a standard linguistic comparer) | -| `"endz"` | `"e" + "n" + "dz"` | (using a Hungarian culture-aware comparer) | - -When using a Hungarian culture-aware comparer, this means that the string `"endz"` *does not* end with the substring `"z"`, as \ and \ are considered collation elements with different semantic meaning. - -```csharp -// Set thread culture to Hungarian -CultureInfo.CurrentCulture = CultureInfo.GetCultureInfo("hu-HU"); -Console.WriteLine("endz".EndsWith("z")); // Prints 'False' - -// Set thread culture to invariant culture -CultureInfo.CurrentCulture = CultureInfo.InvariantCulture; -Console.WriteLine("endz".EndsWith("z")); // Prints 'True' -``` - -> [!NOTE] -> -> - Behavior: Linguistic and culture-aware comparers can undergo behavioral adjustments from time to time. Both ICU and the older Windows NLS facility are updated to account for how world languages change. For more information, see the blog post [Locale (culture) data churn](/archive/blogs/shawnste/locale-culture-data-churn). The *Ordinal* comparer's behavior will never change since it performs exact bitwise searching and comparison. However, the *OrdinalIgnoreCase* comparer's behavior may change as Unicode grows to encompass more character sets and corrects omissions in existing casing data. -> - Usage: The comparers `StringComparison.InvariantCulture` and `StringComparison.InvariantCultureIgnoreCase` are linguistic comparers that are not culture-aware. That is, these comparers understand concepts such as the accented character é having multiple possible underlying representations, and that all such representations should be treated equal. But non-culture-aware linguistic comparers won't contain special handling for \ as distinct from \ or \, as shown above. They also won't special-case characters like the German Eszett (ß). - -.NET also offers the *invariant globalization mode*. This opt-in mode disables code paths that deal with linguistic search and comparison routines. In this mode, all operations use *Ordinal* or *OrdinalIgnoreCase* behaviors, regardless of what `CultureInfo` or `StringComparison` argument the caller provides. For more information, see [Runtime configuration options for globalization](../../core/runtime-config/globalization.md) and [.NET Core Globalization Invariant Mode](https://github.com/dotnet/runtime/blob/main/docs/design/features/globalization-invariant-mode.md). - -For more information, see [Best practices for comparing strings in .NET](best-practices-strings.md). - -## Security implications - -If your app uses an affected API for filtering, we recommend enabling the CA1307 and CA1309 code analysis rules to help locate places where a linguistic search may have inadvertently been used instead of an ordinal search. Code patterns like the following may be susceptible to security exploits. - -```csharp -// -// THIS SAMPLE CODE IS INCORRECT. -// DO NOT USE IT IN PRODUCTION. -// -public bool ContainsHtmlSensitiveCharacters(string input) -{ - if (input.IndexOf("<") >= 0) { return true; } - if (input.IndexOf("&") >= 0) { return true; } - return false; -} -``` - -Because the `string.IndexOf(string)` method uses a linguistic search by default, it's possible for a string to contain a literal `'<'` or `'&'` character and for the `string.IndexOf(string)` routine to return `-1`, indicating that the search substring was not found. Code analysis rules CA1307 and CA1309 flag such call sites and alert the developer that there's a potential problem. - -## Default search and comparison types - -The following table lists the default search and comparison types for various string and string-like APIs. If the caller provides an explicit `CultureInfo` or `StringComparison` parameter, that parameter will be honored over any default. - -| API | Default behavior | Remarks | -|---------------------------|------------------|------------------------------------------| -| `string.Compare` | CurrentCulture | | -| `string.CompareTo` | CurrentCulture | | -| `string.Contains` | Ordinal | | -| `string.EndsWith` | Ordinal | (when the first parameter is a `char`) | -| `string.EndsWith` | CurrentCulture | (when the first parameter is a `string`) | -| `string.Equals` | Ordinal | | -| `string.GetHashCode` | Ordinal | | -| `string.IndexOf` | Ordinal | (when the first parameter is a `char`) | -| `string.IndexOf` | CurrentCulture | (when the first parameter is a `string`) | -| `string.IndexOfAny` | Ordinal | | -| `string.LastIndexOf` | Ordinal | (when the first parameter is a `char`) | -| `string.LastIndexOf` | CurrentCulture | (when the first parameter is a `string`) | -| `string.LastIndexOfAny` | Ordinal | | -| `string.Replace` | Ordinal | | -| `string.Split` | Ordinal | | -| `string.StartsWith` | Ordinal | (when the first parameter is a `char`) | -| `string.StartsWith` | CurrentCulture | (when the first parameter is a `string`) | -| `string.ToLower` | CurrentCulture | | -| `string.ToLowerInvariant` | InvariantCulture | | -| `string.ToUpper` | CurrentCulture | | -| `string.ToUpperInvariant` | InvariantCulture | | -| `string.Trim` | Ordinal | | -| `string.TrimEnd` | Ordinal | | -| `string.TrimStart` | Ordinal | | -| `string == string` | Ordinal | | -| `string != string` | Ordinal | | - -Unlike `string` APIs, all `MemoryExtensions` APIs perform *Ordinal* searches and comparisons by default, with the following exceptions. - -| API | Default behavior | Remarks | -|-------------------------------------|------------------|---------------------------------------------| -| `MemoryExtensions.ToLower` | CurrentCulture | (when passed a null `CultureInfo` argument) | -| `MemoryExtensions.ToLowerInvariant` | InvariantCulture | | -| `MemoryExtensions.ToUpper` | CurrentCulture | (when passed a null `CultureInfo` argument) | -| `MemoryExtensions.ToUpperInvariant` | InvariantCulture | | - -A consequence is that when converting code from consuming `string` to consuming `ReadOnlySpan`, behavioral changes may be introduced inadvertently. An example of this follows. - -```csharp -string str = GetString(); -if (str.StartsWith("Hello")) { /* do something */ } // this is a CULTURE-AWARE (linguistic) comparison - -ReadOnlySpan span = s.AsSpan(); -if (span.StartsWith("Hello")) { /* do something */ } // this is an ORDINAL (non-linguistic) comparison -``` - -The recommended way to address this is to pass an explicit `StringComparison` parameter to these APIs. The code analysis rules CA1307 and CA1309 can assist with this. - -```csharp -string str = GetString(); -if (str.StartsWith("Hello", StringComparison.Ordinal)) { /* do something */ } // ordinal comparison - -ReadOnlySpan span = s.AsSpan(); -if (span.StartsWith("Hello", StringComparison.Ordinal)) { /* do something */ } // ordinal comparison -``` - -## See also - -- [Globalization breaking changes in .NET 5](../../core/compatibility/5.0.md#globalization) -- [Best practices for comparing strings in .NET](best-practices-strings.md) -- [How to compare strings in C#](../../csharp/how-to/compare-strings.md) -- [.NET globalization and ICU](../../core/extensions/globalization-icu.md) -- [Ordinal vs. culture-sensitive string operations](/dotnet/api/system.string#ordinal-vs-culture-sensitive-operations) -- [Overview of .NET source code analysis](../../fundamentals/code-analysis/overview.md) +This content has been consolidated into [Best practices for comparing strings in .NET](best-practices-strings.md). From ca722a63289d1db0d27d3c97e87d249a4aedc5f3 Mon Sep 17 00:00:00 2001 From: "Andy De George (from Dev Box)" Date: Thu, 19 Feb 2026 17:06:36 -0800 Subject: [PATCH 2/2] Delete the old string file --- .openpublishing.redirection.standard.json | 4 ++++ .../globalization/5.0/icu-globalization-api.md | 2 +- .../base-types/string-comparison-net-5-plus.md | 11 ----------- 3 files changed, 5 insertions(+), 12 deletions(-) delete mode 100644 docs/standard/base-types/string-comparison-net-5-plus.md diff --git a/.openpublishing.redirection.standard.json b/.openpublishing.redirection.standard.json index 6bbdea81b7725..6569a50aa8409 100644 --- a/.openpublishing.redirection.standard.json +++ b/.openpublishing.redirection.standard.json @@ -207,6 +207,10 @@ "source_path_from_root": "/docs/standard/base-types/standard-timespan.md", "redirect_url": "/dotnet/standard/base-types/standard-timespan-format-strings" }, + { + "source_path_from_root": "/docs/standard/base-types/string-comparison-net-5-plus.md", + "redirect_url": "/dotnet/standard/base-types/best-practices-strings" + }, { "source_path_from_root": "/docs/standard/base-types/strip-characters.md", "redirect_url": "/dotnet/standard/base-types/how-to-strip-invalid-characters-from-a-string" diff --git a/docs/core/compatibility/globalization/5.0/icu-globalization-api.md b/docs/core/compatibility/globalization/5.0/icu-globalization-api.md index de315df9393e1..6e673b1204b0c 100644 --- a/docs/core/compatibility/globalization/5.0/icu-globalization-api.md +++ b/docs/core/compatibility/globalization/5.0/icu-globalization-api.md @@ -38,7 +38,7 @@ To fix this code by conducting an ordinal search instead of a culture-sensitive You can run code analysis rules [CA1307: Specify StringComparison for clarity](../../../../fundamentals/code-analysis/quality-rules/ca1307.md) and [CA1309: Use ordinal StringComparison](../../../../fundamentals/code-analysis/quality-rules/ca1309.md) to find these call sites in your code. -For more information, see [Behavior changes when comparing strings on .NET 5+](../../../../standard/base-types/string-comparison-net-5-plus.md). +For more information, see [Best practices for comparing strings in .NET](../../../../standard/base-types/best-practices-strings.md). ### Currency symbol diff --git a/docs/standard/base-types/string-comparison-net-5-plus.md b/docs/standard/base-types/string-comparison-net-5-plus.md deleted file mode 100644 index 84ac7c8816d3c..0000000000000 --- a/docs/standard/base-types/string-comparison-net-5-plus.md +++ /dev/null @@ -1,11 +0,0 @@ ---- -title: Behavior changes when comparing strings on .NET 5+ -description: Learn about string-comparison behavior changes in .NET 5 and later versions on Windows. -ms.topic: concept-article -ms.date: 02/15/2022 -redirect_url: best-practices-strings ---- - -# Behavior changes when comparing strings on .NET 5+ - -This content has been consolidated into [Best practices for comparing strings in .NET](best-practices-strings.md).