From 79f5a5a66e68aa50ef9f63ded777e640d345df5e Mon Sep 17 00:00:00 2001
From: Alexander Weiss <alex@weissfam.de>
Date: Tue, 24 Dec 2024 21:04:55 +0100
Subject: [PATCH] globset: add `Candidate::from_bytes` constructor

This is already technically possible to do on Unix by going through
`OsStr` and `&[u8]` conversions. This just makes it easier to do in all
circumstances and is reasonable to intentionally support.

Closes #2954, Closes #2955
---
 crates/globset/src/lib.rs | 18 +++++++++++++++++-
 1 file changed, 17 insertions(+), 1 deletion(-)
diff --git a/crates/globset/src/lib.rs b/crates/globset/src/lib.rs
index c3f53001..b9604272 100644
--- a/crates/globset/src/lib.rs
+++ b/crates/globset/src/lib.rs
@@ -598,7 +598,23 @@ impl<'a> std::fmt::Debug for Candidate<'a> {
 impl<'a> Candidate<'a> {
     /// Create a new candidate for matching from the given path.
     pub fn new<P: AsRef<Path> + ?Sized>(path: &'a P) -> Candidate<'a> {
-        let path = normalize_path(Vec::from_path_lossy(path.as_ref()));
+        Self::from_cow(Vec::from_path_lossy(path.as_ref()))
+    }
+
+    /// Create a new candidate for matching from the given path as a sequence
+    /// of bytes.
+    ///
+    /// Generally speaking, this routine expects the bytes to be
+    /// _conventionally_ UTF-8. It is legal for the byte sequence to contain
+    /// invalid UTF-8. However, if the bytes are in some other encoding that
+    /// isn't ASCII compatible (for example, UTF-16), then the results of
+    /// matching are unspecified.
+    pub fn from_bytes<P: AsRef<[u8]> + ?Sized>(path: &'a P) -> Candidate<'a> {
+        Self::from_cow(Cow::Borrowed(path.as_ref()))
+    }
+
+    fn from_cow(path: Cow<'a, [u8]>) -> Candidate<'a> {
+        let path = normalize_path(path);
         let basename = file_name(&path).unwrap_or(Cow::Borrowed(B("")));
         let ext = file_name_ext(&basename).unwrap_or(Cow::Borrowed(B("")));
         Candidate { path, basename, ext }