mirror of
https://github.com/BurntSushi/ripgrep.git
synced 2025-01-19 05:49:14 +02:00
ripgrep: make --no-pcre2-unicode the canonical flag
Previously, we used --pcre2-unicode as the canonical flag despite the fact that it is enabled by default, which is inconsistent with how we handle other similar flags. The reason why --pcre2-unicode was made the canonical flag was to make it easier to discover since it would be sorted near the --pcre2 flag. To solve that problem, we simply start a convention that lists related flags in the docs. Fixes #1022
This commit is contained in:
parent
7ac9782970
commit
edd6eb4e06
@ -166,8 +166,8 @@ _rg() {
|
||||
$no'(pcre2-unicode)--no-pcre2[disable matching with PCRE2]'
|
||||
|
||||
+ '(pcre2-unicode)' # PCRE2 Unicode options
|
||||
$no'(--no-pcre2-unicode)--pcre2-unicode[enable PCRE2 Unicode mode (with -P)]'
|
||||
'(--no-pcre2-unicode)--no-pcre2-unicode[disable PCRE2 Unicode mode (with -P)]'
|
||||
$no'(--no-pcre2 --no-pcre2-unicode)--pcre2-unicode[enable PCRE2 Unicode mode (with -P)]'
|
||||
'(--no-pcre2 --pcre2-unicode)--no-pcre2-unicode[disable PCRE2 Unicode mode (with -P)]'
|
||||
|
||||
+ '(pre)' # Preprocessing options
|
||||
'(-z --search-zip)--pre=[specify preprocessor utility]:preprocessor utility:_command_names -e'
|
||||
|
86
src/app.rs
86
src/app.rs
@ -582,13 +582,13 @@ pub fn all_args_and_flags() -> Vec<RGArg> {
|
||||
flag_no_ignore_parent(&mut args);
|
||||
flag_no_ignore_vcs(&mut args);
|
||||
flag_no_messages(&mut args);
|
||||
flag_no_pcre2_unicode(&mut args);
|
||||
flag_null(&mut args);
|
||||
flag_null_data(&mut args);
|
||||
flag_only_matching(&mut args);
|
||||
flag_path_separator(&mut args);
|
||||
flag_passthru(&mut args);
|
||||
flag_pcre2(&mut args);
|
||||
flag_pcre2_unicode(&mut args);
|
||||
flag_pre(&mut args);
|
||||
flag_pretty(&mut args);
|
||||
flag_quiet(&mut args);
|
||||
@ -1568,6 +1568,48 @@ This flag can be disabled with the --messages flag.
|
||||
args.push(arg);
|
||||
}
|
||||
|
||||
fn flag_no_pcre2_unicode(args: &mut Vec<RGArg>) {
|
||||
const SHORT: &str = "Disable Unicode mode for PCRE2 matching.";
|
||||
const LONG: &str = long!("\
|
||||
When PCRE2 matching is enabled, this flag will disable Unicode mode, which is
|
||||
otherwise enabled by default. If PCRE2 matching is not enabled, then this flag
|
||||
has no effect.
|
||||
|
||||
When PCRE2's Unicode mode is enabled, several different types of patterns
|
||||
become Unicode aware. This includes '\\b', '\\B', '\\w', '\\W', '\\d', '\\D',
|
||||
'\\s' and '\\S'. Similarly, the '.' meta character will match any Unicode
|
||||
codepoint instead of any byte. Caseless matching will also use Unicode simple
|
||||
case folding instead of ASCII-only case insensitivity.
|
||||
|
||||
Unicode mode in PCRE2 represents a critical trade off in the user experience
|
||||
of ripgrep. In particular, unlike the default regex engine, PCRE2 does not
|
||||
support the ability to search possibly invalid UTF-8 with Unicode features
|
||||
enabled. Instead, PCRE2 *requires* that everything it searches when Unicode
|
||||
mode is enabled is valid UTF-8. (Or valid UTF-16/UTF-32, but for the purposes
|
||||
of ripgrep, we only discuss UTF-8.) This means that if you have PCRE2's Unicode
|
||||
mode enabled and you attempt to search invalid UTF-8, then the search for that
|
||||
file will halt and print an error. For this reason, when PCRE2's Unicode mode
|
||||
is enabled, ripgrep will automatically \"fix\" invalid UTF-8 sequences by
|
||||
replacing them with the Unicode replacement codepoint.
|
||||
|
||||
If you would rather see the encoding errors surfaced by PCRE2 when Unicode mode
|
||||
is enabled, then pass the --no-encoding flag to disable all transcoding.
|
||||
|
||||
Related flags: --pcre2
|
||||
|
||||
This flag can be disabled with --pcre2-unicode.
|
||||
");
|
||||
let arg = RGArg::switch("no-pcre2-unicode")
|
||||
.help(SHORT).long_help(LONG)
|
||||
.overrides("pcre2-unicode");
|
||||
args.push(arg);
|
||||
|
||||
let arg = RGArg::switch("pcre2-unicode")
|
||||
.hidden()
|
||||
.overrides("no-pcre2-unicode");
|
||||
args.push(arg);
|
||||
}
|
||||
|
||||
fn flag_null(args: &mut Vec<RGArg>) {
|
||||
const SHORT: &str = "Print a NUL byte after file paths.";
|
||||
const LONG: &str = long!("\
|
||||
@ -1658,6 +1700,8 @@ Note that PCRE2 is an optional ripgrep feature. If PCRE2 wasn't included in
|
||||
your build of ripgrep, then using this flag will result in ripgrep printing
|
||||
an error message and exiting.
|
||||
|
||||
Related flags: --no-pcre2-unicode
|
||||
|
||||
This flag can be disabled with --no-pcre2.
|
||||
");
|
||||
let arg = RGArg::switch("pcre2").short("P")
|
||||
@ -1671,46 +1715,6 @@ This flag can be disabled with --no-pcre2.
|
||||
args.push(arg);
|
||||
}
|
||||
|
||||
fn flag_pcre2_unicode(args: &mut Vec<RGArg>) {
|
||||
const SHORT: &str = "Enable Unicode mode for PCRE2 matching.";
|
||||
const LONG: &str = long!("\
|
||||
When PCRE2 matching is enabled, this flag will enable Unicode mode. If PCRE2
|
||||
matching is not enabled, then this flag has no effect.
|
||||
|
||||
This flag is enabled by default when PCRE2 matching is enabled.
|
||||
|
||||
When PCRE2's Unicode mode is enabled several different types of patterns become
|
||||
Unicode aware. This includes '\\b', '\\B', '\\w', '\\W', '\\d', '\\D', '\\s'
|
||||
and '\\S'. Similarly, the '.' meta character will match any Unicode codepoint
|
||||
instead of any byte. Caseless matching will also use Unicode simple case
|
||||
folding instead of ASCII-only case insensitivity.
|
||||
|
||||
Unicode mode in PCRE2 represents a critical trade off in the user experience
|
||||
of ripgrep. In particular, unlike the default regex engine, PCRE2 does not
|
||||
support the ability to search possibly invalid UTF-8 with Unicode features
|
||||
enabled. Instead, PCRE2 *requires* that everything it searches when Unicode
|
||||
mode is enabled is valid UTF-8. (Or valid UTF-16/UTF-32, but for the purposes
|
||||
of ripgrep, we only discuss UTF-8.) This means that if you have PCRE2's Unicode
|
||||
mode enabled and you attempt to search invalid UTF-8, then the search for that
|
||||
file will halt and print an error. For this reason, when PCRE2's Unicode mode
|
||||
is enabled, ripgrep will automatically \"fix\" invalid UTF-8 sequences by
|
||||
replacing them with the Unicode replacement codepoint.
|
||||
|
||||
If you would rather see the encoding errors surfaced by PCRE2 when Unicode mode
|
||||
is enabled, then pass the --no-encoding flag to disable all transcoding.
|
||||
|
||||
This flag can be disabled with --no-pcre2-unicode.
|
||||
");
|
||||
let arg = RGArg::switch("pcre2-unicode")
|
||||
.help(SHORT).long_help(LONG);
|
||||
args.push(arg);
|
||||
|
||||
let arg = RGArg::switch("no-pcre2-unicode")
|
||||
.hidden()
|
||||
.overrides("pcre2-unicode");
|
||||
args.push(arg);
|
||||
}
|
||||
|
||||
fn flag_pretty(args: &mut Vec<RGArg>) {
|
||||
const SHORT: &str = "Alias for --color always --heading --line-number.";
|
||||
const LONG: &str = long!("\
|
||||
|
Loading…
x
Reference in New Issue
Block a user