From 79f5a5a66e68aa50ef9f63ded777e640d345df5e Mon Sep 17 00:00:00 2001 From: Alexander Weiss Date: Tue, 24 Dec 2024 21:04:55 +0100 Subject: [PATCH] globset: add `Candidate::from_bytes` constructor This is already technically possible to do on Unix by going through `OsStr` and `&[u8]` conversions. This just makes it easier to do in all circumstances and is reasonable to intentionally support. Closes #2954, Closes #2955 --- crates/globset/src/lib.rs | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/crates/globset/src/lib.rs b/crates/globset/src/lib.rs index c3f53001..b9604272 100644 --- a/crates/globset/src/lib.rs +++ b/crates/globset/src/lib.rs @@ -598,7 +598,23 @@ impl<'a> std::fmt::Debug for Candidate<'a> { impl<'a> Candidate<'a> { /// Create a new candidate for matching from the given path. pub fn new + ?Sized>(path: &'a P) -> Candidate<'a> { - let path = normalize_path(Vec::from_path_lossy(path.as_ref())); + Self::from_cow(Vec::from_path_lossy(path.as_ref())) + } + + /// Create a new candidate for matching from the given path as a sequence + /// of bytes. + /// + /// Generally speaking, this routine expects the bytes to be + /// _conventionally_ UTF-8. It is legal for the byte sequence to contain + /// invalid UTF-8. However, if the bytes are in some other encoding that + /// isn't ASCII compatible (for example, UTF-16), then the results of + /// matching are unspecified. + pub fn from_bytes + ?Sized>(path: &'a P) -> Candidate<'a> { + Self::from_cow(Cow::Borrowed(path.as_ref())) + } + + fn from_cow(path: Cow<'a, [u8]>) -> Candidate<'a> { + let path = normalize_path(path); let basename = file_name(&path).unwrap_or(Cow::Borrowed(B(""))); let ext = file_name_ext(&basename).unwrap_or(Cow::Borrowed(B(""))); Candidate { path, basename, ext }