2016-10-05 02:22:13 +02:00
|
|
|
use std::borrow::Cow;
|
2016-10-01 01:42:41 +02:00
|
|
|
use std::ffi::OsStr;
|
|
|
|
use std::path::Path;
|
|
|
|
|
|
|
|
/// The final component of the path, if it is a normal file.
|
|
|
|
///
|
|
|
|
/// If the path terminates in ., .., or consists solely of a root of prefix,
|
|
|
|
/// file_name will return None.
|
|
|
|
#[cfg(unix)]
|
|
|
|
pub fn file_name<'a, P: AsRef<Path> + ?Sized>(
|
|
|
|
path: &'a P,
|
|
|
|
) -> Option<&'a OsStr> {
|
|
|
|
use std::os::unix::ffi::OsStrExt;
|
|
|
|
use memchr::memrchr;
|
|
|
|
|
|
|
|
let path = path.as_ref().as_os_str().as_bytes();
|
|
|
|
if path.is_empty() {
|
|
|
|
return None;
|
|
|
|
} else if path.len() == 1 && path[0] == b'.' {
|
|
|
|
return None;
|
|
|
|
} else if path.last() == Some(&b'.') {
|
|
|
|
return None;
|
|
|
|
} else if path.len() >= 2 && &path[path.len() - 2..] == &b".."[..] {
|
|
|
|
return None;
|
|
|
|
}
|
|
|
|
let last_slash = memrchr(b'/', path).map(|i| i + 1).unwrap_or(0);
|
|
|
|
Some(OsStr::from_bytes(&path[last_slash..]))
|
|
|
|
}
|
|
|
|
|
|
|
|
/// The final component of the path, if it is a normal file.
|
|
|
|
///
|
|
|
|
/// If the path terminates in ., .., or consists solely of a root of prefix,
|
|
|
|
/// file_name will return None.
|
|
|
|
#[cfg(not(unix))]
|
|
|
|
pub fn file_name<'a, P: AsRef<Path> + ?Sized>(
|
|
|
|
path: &'a P,
|
|
|
|
) -> Option<&'a OsStr> {
|
|
|
|
path.as_ref().file_name()
|
|
|
|
}
|
2016-10-05 02:22:13 +02:00
|
|
|
|
|
|
|
/// Return a file extension given a path's file name.
|
|
|
|
///
|
|
|
|
/// Note that this does NOT match the semantics of std::path::Path::extension.
|
|
|
|
/// Namely, the extension includes the `.` and matching is otherwise more
|
|
|
|
/// liberal. Specifically, the extenion is:
|
|
|
|
///
|
|
|
|
/// * None, if the file name given is empty;
|
|
|
|
/// * None, if there is no embedded `.`;
|
|
|
|
/// * Otherwise, the portion of the file name starting with the final `.`.
|
|
|
|
///
|
|
|
|
/// e.g., A file name of `.rs` has an extension `.rs`.
|
|
|
|
///
|
|
|
|
/// N.B. This is done to make certain glob match optimizations easier. Namely,
|
|
|
|
/// a pattern like `*.rs` is obviously trying to match files with a `rs`
|
|
|
|
/// extension, but it also matches files like `.rs`, which doesn't have an
|
|
|
|
/// extension according to std::path::Path::extension.
|
|
|
|
pub fn file_name_ext(name: &OsStr) -> Option<&OsStr> {
|
|
|
|
// Yes, these functions are awful, and yes, we are completely violating
|
|
|
|
// the abstraction barrier of std::ffi. The barrier we're violating is
|
|
|
|
// that an OsStr's encoding is *ASCII compatible*. While this is obviously
|
|
|
|
// true on Unix systems, it's also true on Windows because an OsStr uses
|
|
|
|
// WTF-8 internally: https://simonsapin.github.io/wtf-8/
|
|
|
|
//
|
|
|
|
// We should consider doing the same for the other path utility functions.
|
|
|
|
// Right now, we don't break any barriers, but Windows users are paying
|
|
|
|
// for it.
|
|
|
|
//
|
|
|
|
// Got any better ideas that don't cost anything? Hit me up. ---AG
|
|
|
|
unsafe fn os_str_as_u8_slice(s: &OsStr) -> &[u8] {
|
|
|
|
::std::mem::transmute(s)
|
|
|
|
}
|
|
|
|
unsafe fn u8_slice_as_os_str(s: &[u8]) -> &OsStr {
|
|
|
|
::std::mem::transmute(s)
|
|
|
|
}
|
|
|
|
if name.is_empty() {
|
|
|
|
return None;
|
|
|
|
}
|
|
|
|
let name = unsafe { os_str_as_u8_slice(name) };
|
|
|
|
for (i, &b) in name.iter().enumerate().rev() {
|
|
|
|
if b == b'.' {
|
|
|
|
return Some(unsafe { u8_slice_as_os_str(&name[i..]) });
|
|
|
|
}
|
|
|
|
}
|
|
|
|
None
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Return raw bytes of a path, transcoded to UTF-8 if necessary.
|
|
|
|
pub fn path_bytes(path: &Path) -> Cow<[u8]> {
|
|
|
|
os_str_bytes(path.as_os_str())
|
|
|
|
}
|
|
|
|
|
2016-10-12 01:57:09 +02:00
|
|
|
/// Return the raw bytes of the given OS string, possibly transcoded to UTF-8.
|
2016-10-05 02:22:13 +02:00
|
|
|
#[cfg(unix)]
|
|
|
|
pub fn os_str_bytes(s: &OsStr) -> Cow<[u8]> {
|
|
|
|
use std::os::unix::ffi::OsStrExt;
|
|
|
|
Cow::Borrowed(s.as_bytes())
|
|
|
|
}
|
|
|
|
|
2016-10-12 01:57:09 +02:00
|
|
|
/// Return the raw bytes of the given OS string, possibly transcoded to UTF-8.
|
2016-10-05 02:22:13 +02:00
|
|
|
#[cfg(not(unix))]
|
|
|
|
pub fn os_str_bytes(s: &OsStr) -> Cow<[u8]> {
|
2016-10-11 01:16:52 +02:00
|
|
|
// TODO(burntsushi): On Windows, OS strings are WTF-8, which is a superset
|
|
|
|
// of UTF-8, so even if we could get at the raw bytes, they wouldn't
|
|
|
|
// be useful. We *must* convert to UTF-8 before doing path matching.
|
|
|
|
// Unfortunate, but necessary.
|
2016-10-05 02:22:13 +02:00
|
|
|
match s.to_string_lossy() {
|
|
|
|
Cow::Owned(s) => Cow::Owned(s.into_bytes()),
|
|
|
|
Cow::Borrowed(s) => Cow::Borrowed(s.as_bytes()),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-10-11 01:16:52 +02:00
|
|
|
/// Normalizes a path to use `/` as a separator everywhere, even on platforms
|
|
|
|
/// that recognize other characters as separators.
|
|
|
|
#[cfg(unix)]
|
|
|
|
pub fn normalize_path(path: Cow<[u8]>) -> Cow<[u8]> {
|
|
|
|
// UNIX only uses /, so we're good.
|
|
|
|
path
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Normalizes a path to use `/` as a separator everywhere, even on platforms
|
|
|
|
/// that recognize other characters as separators.
|
|
|
|
#[cfg(not(unix))]
|
|
|
|
pub fn normalize_path(mut path: Cow<[u8]>) -> Cow<[u8]> {
|
|
|
|
use std::path::is_separator;
|
|
|
|
|
|
|
|
for i in 0..path.len() {
|
|
|
|
if path[i] == b'/' || !is_separator(path[i] as char) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
path.to_mut()[i] = b'/';
|
|
|
|
}
|
|
|
|
path
|
|
|
|
}
|
|
|
|
|
2016-10-05 02:22:13 +02:00
|
|
|
#[cfg(test)]
|
|
|
|
mod tests {
|
2016-10-11 01:16:52 +02:00
|
|
|
use std::borrow::Cow;
|
2016-10-05 02:22:13 +02:00
|
|
|
use std::ffi::OsStr;
|
|
|
|
|
2016-10-11 01:16:52 +02:00
|
|
|
use super::{file_name_ext, normalize_path};
|
2016-10-05 02:22:13 +02:00
|
|
|
|
|
|
|
macro_rules! ext {
|
|
|
|
($name:ident, $file_name:expr, $ext:expr) => {
|
|
|
|
#[test]
|
|
|
|
fn $name() {
|
|
|
|
let got = file_name_ext(OsStr::new($file_name));
|
|
|
|
assert_eq!($ext.map(OsStr::new), got);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
}
|
|
|
|
|
|
|
|
ext!(ext1, "foo.rs", Some(".rs"));
|
|
|
|
ext!(ext2, ".rs", Some(".rs"));
|
|
|
|
ext!(ext3, "..rs", Some(".rs"));
|
|
|
|
ext!(ext4, "", None::<&str>);
|
|
|
|
ext!(ext5, "foo", None::<&str>);
|
2016-10-11 01:16:52 +02:00
|
|
|
|
|
|
|
macro_rules! normalize {
|
|
|
|
($name:ident, $path:expr, $expected:expr) => {
|
|
|
|
#[test]
|
|
|
|
fn $name() {
|
|
|
|
let got = normalize_path(Cow::Owned($path.to_vec()));
|
|
|
|
assert_eq!($expected.to_vec(), got.into_owned());
|
|
|
|
}
|
|
|
|
};
|
|
|
|
}
|
|
|
|
|
|
|
|
normalize!(normal1, b"foo", b"foo");
|
|
|
|
normalize!(normal2, b"foo/bar", b"foo/bar");
|
|
|
|
#[cfg(unix)]
|
|
|
|
normalize!(normal3, b"foo\\bar", b"foo\\bar");
|
|
|
|
#[cfg(not(unix))]
|
|
|
|
normalize!(normal3, b"foo\\bar", b"foo/bar");
|
|
|
|
#[cfg(unix)]
|
|
|
|
normalize!(normal4, b"foo\\bar/baz", b"foo\\bar/baz");
|
|
|
|
#[cfg(not(unix))]
|
|
|
|
normalize!(normal4, b"foo\\bar/baz", b"foo/bar/baz");
|
2016-10-05 02:22:13 +02:00
|
|
|
}
|