ripgrep: make --no-pcre2-unicode the canonical flag

Previously, we used --pcre2-unicode as the canonical flag despite the fact that it is enabled by default, which is inconsistent with how we handle other similar flags. The reason why --pcre2-unicode was made the canonical flag was to make it easier to discover since it would be sorted near the --pcre2 flag. To solve that problem, we simply start a convention that lists related flags in the docs. Fixes #1022
2025-08-04 21:52:54 +02:00 · 2018-08-21 18:35:19 -04:00
parent 7ac9782970
commit edd6eb4e06
2 changed files with 47 additions and 43 deletions
--- a/complete/_rg
+++ b/complete/_rg
@ -166,8 +166,8 @@ _rg() {
    $no'(pcre2-unicode)--no-pcre2[disable matching with PCRE2]'

    + '(pcre2-unicode)' # PCRE2 Unicode options
-    $no'(--no-pcre2-unicode)--pcre2-unicode[enable PCRE2 Unicode mode (with -P)]'
-    '(--no-pcre2-unicode)--no-pcre2-unicode[disable PCRE2 Unicode mode (with -P)]'
+    $no'(--no-pcre2 --no-pcre2-unicode)--pcre2-unicode[enable PCRE2 Unicode mode (with -P)]'
+    '(--no-pcre2 --pcre2-unicode)--no-pcre2-unicode[disable PCRE2 Unicode mode (with -P)]'

    + '(pre)' # Preprocessing options
    '(-z --search-zip)--pre=[specify preprocessor utility]:preprocessor utility:_command_names -e'
--- a/src/app.rs
+++ b/src/app.rs
@ -582,13 +582,13 @@ pub fn all_args_and_flags() -> Vec<RGArg> {
    flag_no_ignore_parent(&mut args);
    flag_no_ignore_vcs(&mut args);
    flag_no_messages(&mut args);
+    flag_no_pcre2_unicode(&mut args);
    flag_null(&mut args);
    flag_null_data(&mut args);
    flag_only_matching(&mut args);
    flag_path_separator(&mut args);
    flag_passthru(&mut args);
    flag_pcre2(&mut args);
-    flag_pcre2_unicode(&mut args);
    flag_pre(&mut args);
    flag_pretty(&mut args);
    flag_quiet(&mut args);
@ -1568,6 +1568,48 @@ This flag can be disabled with the --messages flag.
    args.push(arg);
 }

+fn flag_no_pcre2_unicode(args: &mut Vec<RGArg>) {
+    const SHORT: &str = "Disable Unicode mode for PCRE2 matching.";
+    const LONG: &str = long!("\
+When PCRE2 matching is enabled, this flag will disable Unicode mode, which is
+otherwise enabled by default. If PCRE2 matching is not enabled, then this flag
+has no effect.
+
+When PCRE2's Unicode mode is enabled, several different types of patterns
+become Unicode aware. This includes '\\b', '\\B', '\\w', '\\W', '\\d', '\\D',
+'\\s' and '\\S'. Similarly, the '.' meta character will match any Unicode
+codepoint instead of any byte. Caseless matching will also use Unicode simple
+case folding instead of ASCII-only case insensitivity.
+
+Unicode mode in PCRE2 represents a critical trade off in the user experience
+of ripgrep. In particular, unlike the default regex engine, PCRE2 does not
+support the ability to search possibly invalid UTF-8 with Unicode features
+enabled. Instead, PCRE2 *requires* that everything it searches when Unicode
+mode is enabled is valid UTF-8. (Or valid UTF-16/UTF-32, but for the purposes
+of ripgrep, we only discuss UTF-8.) This means that if you have PCRE2's Unicode
+mode enabled and you attempt to search invalid UTF-8, then the search for that
+file will halt and print an error. For this reason, when PCRE2's Unicode mode
+is enabled, ripgrep will automatically \"fix\" invalid UTF-8 sequences by
+replacing them with the Unicode replacement codepoint.
+
+If you would rather see the encoding errors surfaced by PCRE2 when Unicode mode
+is enabled, then pass the --no-encoding flag to disable all transcoding.
+
+Related flags: --pcre2
+
+This flag can be disabled with --pcre2-unicode.
+");
+    let arg = RGArg::switch("no-pcre2-unicode")
+        .help(SHORT).long_help(LONG)
+        .overrides("pcre2-unicode");
+    args.push(arg);
+
+    let arg = RGArg::switch("pcre2-unicode")
+        .hidden()
+        .overrides("no-pcre2-unicode");
+    args.push(arg);
+}
+
 fn flag_null(args: &mut Vec<RGArg>) {
    const SHORT: &str = "Print a NUL byte after file paths.";
    const LONG: &str = long!("\
@ -1658,6 +1700,8 @@ Note that PCRE2 is an optional ripgrep feature. If PCRE2 wasn't included in
 your build of ripgrep, then using this flag will result in ripgrep printing
 an error message and exiting.

+Related flags: --no-pcre2-unicode
+
 This flag can be disabled with --no-pcre2.
 ");
    let arg = RGArg::switch("pcre2").short("P")
@ -1671,46 +1715,6 @@ This flag can be disabled with --no-pcre2.
    args.push(arg);
 }

-fn flag_pcre2_unicode(args: &mut Vec<RGArg>) {
-    const SHORT: &str = "Enable Unicode mode for PCRE2 matching.";
-    const LONG: &str = long!("\
-When PCRE2 matching is enabled, this flag will enable Unicode mode. If PCRE2
-matching is not enabled, then this flag has no effect.
-
-This flag is enabled by default when PCRE2 matching is enabled.
-
-When PCRE2's Unicode mode is enabled several different types of patterns become
-Unicode aware. This includes '\\b', '\\B', '\\w', '\\W', '\\d', '\\D', '\\s'
-and '\\S'. Similarly, the '.' meta character will match any Unicode codepoint
-instead of any byte. Caseless matching will also use Unicode simple case
-folding instead of ASCII-only case insensitivity.
-
-Unicode mode in PCRE2 represents a critical trade off in the user experience
-of ripgrep. In particular, unlike the default regex engine, PCRE2 does not
-support the ability to search possibly invalid UTF-8 with Unicode features
-enabled. Instead, PCRE2 *requires* that everything it searches when Unicode
-mode is enabled is valid UTF-8. (Or valid UTF-16/UTF-32, but for the purposes
-of ripgrep, we only discuss UTF-8.) This means that if you have PCRE2's Unicode
-mode enabled and you attempt to search invalid UTF-8, then the search for that
-file will halt and print an error. For this reason, when PCRE2's Unicode mode
-is enabled, ripgrep will automatically \"fix\" invalid UTF-8 sequences by
-replacing them with the Unicode replacement codepoint.
-
-If you would rather see the encoding errors surfaced by PCRE2 when Unicode mode
-is enabled, then pass the --no-encoding flag to disable all transcoding.
-
-This flag can be disabled with --no-pcre2-unicode.
-");
-    let arg = RGArg::switch("pcre2-unicode")
-        .help(SHORT).long_help(LONG);
-    args.push(arg);
-
-    let arg = RGArg::switch("no-pcre2-unicode")
-        .hidden()
-        .overrides("pcre2-unicode");
-    args.push(arg);
-}
-
 fn flag_pretty(args: &mut Vec<RGArg>) {
    const SHORT: &str = "Alias for --color always --heading --line-number.";
    const LONG: &str = long!("\