mirror of
https://github.com/BurntSushi/ripgrep.git
synced 2025-01-19 05:49:14 +02:00
ripgrep: make --no-pcre2-unicode the canonical flag
Previously, we used --pcre2-unicode as the canonical flag despite the fact that it is enabled by default, which is inconsistent with how we handle other similar flags. The reason why --pcre2-unicode was made the canonical flag was to make it easier to discover since it would be sorted near the --pcre2 flag. To solve that problem, we simply start a convention that lists related flags in the docs. Fixes #1022
This commit is contained in:
parent
7ac9782970
commit
edd6eb4e06
@ -166,8 +166,8 @@ _rg() {
|
|||||||
$no'(pcre2-unicode)--no-pcre2[disable matching with PCRE2]'
|
$no'(pcre2-unicode)--no-pcre2[disable matching with PCRE2]'
|
||||||
|
|
||||||
+ '(pcre2-unicode)' # PCRE2 Unicode options
|
+ '(pcre2-unicode)' # PCRE2 Unicode options
|
||||||
$no'(--no-pcre2-unicode)--pcre2-unicode[enable PCRE2 Unicode mode (with -P)]'
|
$no'(--no-pcre2 --no-pcre2-unicode)--pcre2-unicode[enable PCRE2 Unicode mode (with -P)]'
|
||||||
'(--no-pcre2-unicode)--no-pcre2-unicode[disable PCRE2 Unicode mode (with -P)]'
|
'(--no-pcre2 --pcre2-unicode)--no-pcre2-unicode[disable PCRE2 Unicode mode (with -P)]'
|
||||||
|
|
||||||
+ '(pre)' # Preprocessing options
|
+ '(pre)' # Preprocessing options
|
||||||
'(-z --search-zip)--pre=[specify preprocessor utility]:preprocessor utility:_command_names -e'
|
'(-z --search-zip)--pre=[specify preprocessor utility]:preprocessor utility:_command_names -e'
|
||||||
|
86
src/app.rs
86
src/app.rs
@ -582,13 +582,13 @@ pub fn all_args_and_flags() -> Vec<RGArg> {
|
|||||||
flag_no_ignore_parent(&mut args);
|
flag_no_ignore_parent(&mut args);
|
||||||
flag_no_ignore_vcs(&mut args);
|
flag_no_ignore_vcs(&mut args);
|
||||||
flag_no_messages(&mut args);
|
flag_no_messages(&mut args);
|
||||||
|
flag_no_pcre2_unicode(&mut args);
|
||||||
flag_null(&mut args);
|
flag_null(&mut args);
|
||||||
flag_null_data(&mut args);
|
flag_null_data(&mut args);
|
||||||
flag_only_matching(&mut args);
|
flag_only_matching(&mut args);
|
||||||
flag_path_separator(&mut args);
|
flag_path_separator(&mut args);
|
||||||
flag_passthru(&mut args);
|
flag_passthru(&mut args);
|
||||||
flag_pcre2(&mut args);
|
flag_pcre2(&mut args);
|
||||||
flag_pcre2_unicode(&mut args);
|
|
||||||
flag_pre(&mut args);
|
flag_pre(&mut args);
|
||||||
flag_pretty(&mut args);
|
flag_pretty(&mut args);
|
||||||
flag_quiet(&mut args);
|
flag_quiet(&mut args);
|
||||||
@ -1568,6 +1568,48 @@ This flag can be disabled with the --messages flag.
|
|||||||
args.push(arg);
|
args.push(arg);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn flag_no_pcre2_unicode(args: &mut Vec<RGArg>) {
|
||||||
|
const SHORT: &str = "Disable Unicode mode for PCRE2 matching.";
|
||||||
|
const LONG: &str = long!("\
|
||||||
|
When PCRE2 matching is enabled, this flag will disable Unicode mode, which is
|
||||||
|
otherwise enabled by default. If PCRE2 matching is not enabled, then this flag
|
||||||
|
has no effect.
|
||||||
|
|
||||||
|
When PCRE2's Unicode mode is enabled, several different types of patterns
|
||||||
|
become Unicode aware. This includes '\\b', '\\B', '\\w', '\\W', '\\d', '\\D',
|
||||||
|
'\\s' and '\\S'. Similarly, the '.' meta character will match any Unicode
|
||||||
|
codepoint instead of any byte. Caseless matching will also use Unicode simple
|
||||||
|
case folding instead of ASCII-only case insensitivity.
|
||||||
|
|
||||||
|
Unicode mode in PCRE2 represents a critical trade off in the user experience
|
||||||
|
of ripgrep. In particular, unlike the default regex engine, PCRE2 does not
|
||||||
|
support the ability to search possibly invalid UTF-8 with Unicode features
|
||||||
|
enabled. Instead, PCRE2 *requires* that everything it searches when Unicode
|
||||||
|
mode is enabled is valid UTF-8. (Or valid UTF-16/UTF-32, but for the purposes
|
||||||
|
of ripgrep, we only discuss UTF-8.) This means that if you have PCRE2's Unicode
|
||||||
|
mode enabled and you attempt to search invalid UTF-8, then the search for that
|
||||||
|
file will halt and print an error. For this reason, when PCRE2's Unicode mode
|
||||||
|
is enabled, ripgrep will automatically \"fix\" invalid UTF-8 sequences by
|
||||||
|
replacing them with the Unicode replacement codepoint.
|
||||||
|
|
||||||
|
If you would rather see the encoding errors surfaced by PCRE2 when Unicode mode
|
||||||
|
is enabled, then pass the --no-encoding flag to disable all transcoding.
|
||||||
|
|
||||||
|
Related flags: --pcre2
|
||||||
|
|
||||||
|
This flag can be disabled with --pcre2-unicode.
|
||||||
|
");
|
||||||
|
let arg = RGArg::switch("no-pcre2-unicode")
|
||||||
|
.help(SHORT).long_help(LONG)
|
||||||
|
.overrides("pcre2-unicode");
|
||||||
|
args.push(arg);
|
||||||
|
|
||||||
|
let arg = RGArg::switch("pcre2-unicode")
|
||||||
|
.hidden()
|
||||||
|
.overrides("no-pcre2-unicode");
|
||||||
|
args.push(arg);
|
||||||
|
}
|
||||||
|
|
||||||
fn flag_null(args: &mut Vec<RGArg>) {
|
fn flag_null(args: &mut Vec<RGArg>) {
|
||||||
const SHORT: &str = "Print a NUL byte after file paths.";
|
const SHORT: &str = "Print a NUL byte after file paths.";
|
||||||
const LONG: &str = long!("\
|
const LONG: &str = long!("\
|
||||||
@ -1658,6 +1700,8 @@ Note that PCRE2 is an optional ripgrep feature. If PCRE2 wasn't included in
|
|||||||
your build of ripgrep, then using this flag will result in ripgrep printing
|
your build of ripgrep, then using this flag will result in ripgrep printing
|
||||||
an error message and exiting.
|
an error message and exiting.
|
||||||
|
|
||||||
|
Related flags: --no-pcre2-unicode
|
||||||
|
|
||||||
This flag can be disabled with --no-pcre2.
|
This flag can be disabled with --no-pcre2.
|
||||||
");
|
");
|
||||||
let arg = RGArg::switch("pcre2").short("P")
|
let arg = RGArg::switch("pcre2").short("P")
|
||||||
@ -1671,46 +1715,6 @@ This flag can be disabled with --no-pcre2.
|
|||||||
args.push(arg);
|
args.push(arg);
|
||||||
}
|
}
|
||||||
|
|
||||||
fn flag_pcre2_unicode(args: &mut Vec<RGArg>) {
|
|
||||||
const SHORT: &str = "Enable Unicode mode for PCRE2 matching.";
|
|
||||||
const LONG: &str = long!("\
|
|
||||||
When PCRE2 matching is enabled, this flag will enable Unicode mode. If PCRE2
|
|
||||||
matching is not enabled, then this flag has no effect.
|
|
||||||
|
|
||||||
This flag is enabled by default when PCRE2 matching is enabled.
|
|
||||||
|
|
||||||
When PCRE2's Unicode mode is enabled several different types of patterns become
|
|
||||||
Unicode aware. This includes '\\b', '\\B', '\\w', '\\W', '\\d', '\\D', '\\s'
|
|
||||||
and '\\S'. Similarly, the '.' meta character will match any Unicode codepoint
|
|
||||||
instead of any byte. Caseless matching will also use Unicode simple case
|
|
||||||
folding instead of ASCII-only case insensitivity.
|
|
||||||
|
|
||||||
Unicode mode in PCRE2 represents a critical trade off in the user experience
|
|
||||||
of ripgrep. In particular, unlike the default regex engine, PCRE2 does not
|
|
||||||
support the ability to search possibly invalid UTF-8 with Unicode features
|
|
||||||
enabled. Instead, PCRE2 *requires* that everything it searches when Unicode
|
|
||||||
mode is enabled is valid UTF-8. (Or valid UTF-16/UTF-32, but for the purposes
|
|
||||||
of ripgrep, we only discuss UTF-8.) This means that if you have PCRE2's Unicode
|
|
||||||
mode enabled and you attempt to search invalid UTF-8, then the search for that
|
|
||||||
file will halt and print an error. For this reason, when PCRE2's Unicode mode
|
|
||||||
is enabled, ripgrep will automatically \"fix\" invalid UTF-8 sequences by
|
|
||||||
replacing them with the Unicode replacement codepoint.
|
|
||||||
|
|
||||||
If you would rather see the encoding errors surfaced by PCRE2 when Unicode mode
|
|
||||||
is enabled, then pass the --no-encoding flag to disable all transcoding.
|
|
||||||
|
|
||||||
This flag can be disabled with --no-pcre2-unicode.
|
|
||||||
");
|
|
||||||
let arg = RGArg::switch("pcre2-unicode")
|
|
||||||
.help(SHORT).long_help(LONG);
|
|
||||||
args.push(arg);
|
|
||||||
|
|
||||||
let arg = RGArg::switch("no-pcre2-unicode")
|
|
||||||
.hidden()
|
|
||||||
.overrides("pcre2-unicode");
|
|
||||||
args.push(arg);
|
|
||||||
}
|
|
||||||
|
|
||||||
fn flag_pretty(args: &mut Vec<RGArg>) {
|
fn flag_pretty(args: &mut Vec<RGArg>) {
|
||||||
const SHORT: &str = "Alias for --color always --heading --line-number.";
|
const SHORT: &str = "Alias for --color always --heading --line-number.";
|
||||||
const LONG: &str = long!("\
|
const LONG: &str = long!("\
|
||||||
|
Loading…
x
Reference in New Issue
Block a user