mirror of
https://github.com/j178/prek.git
synced 2026-04-25 02:11:36 +02:00
Simplify archive extraction implementation (#193)
This commit is contained in:
+47
-151
@@ -23,10 +23,12 @@ use std::collections::HashSet;
|
||||
use std::ffi::OsString;
|
||||
use std::fmt::{Display, Formatter};
|
||||
use std::path::{Component, Path, PathBuf};
|
||||
use std::pin::Pin;
|
||||
|
||||
use futures::StreamExt;
|
||||
use tokio_util::compat::{FuturesAsyncReadCompatExt, TokioAsyncReadCompatExt};
|
||||
use async_compression::tokio::bufread::{GzipDecoder, XzDecoder};
|
||||
use async_zip::base::read::stream::ZipFileReader;
|
||||
use tokio::io::{AsyncRead, BufReader};
|
||||
use tokio_tar::ArchiveBuilder;
|
||||
use tokio_util::compat::FuturesAsyncReadCompatExt;
|
||||
use tracing::warn;
|
||||
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
@@ -128,10 +130,7 @@ pub fn strip_component(source: impl AsRef<Path>) -> Result<PathBuf, Error> {
|
||||
/// This is useful for unzipping files as they're being downloaded. If the archive
|
||||
/// is already fully on disk, consider using `unzip_archive`, which can use multiple
|
||||
/// threads to work faster in that case.
|
||||
pub async fn unzip<R: tokio::io::AsyncRead + Unpin>(
|
||||
reader: R,
|
||||
target: impl AsRef<Path>,
|
||||
) -> Result<(), Error> {
|
||||
pub async fn unzip<R: AsyncRead + Unpin>(reader: R, target: impl AsRef<Path>) -> Result<(), Error> {
|
||||
/// Ensure the file path is safe to use as a [`Path`].
|
||||
///
|
||||
/// See: <https://docs.rs/zip/latest/zip/read/struct.ZipFile.html#method.enclosed_name>
|
||||
@@ -153,8 +152,8 @@ pub async fn unzip<R: tokio::io::AsyncRead + Unpin>(
|
||||
}
|
||||
|
||||
let target = target.as_ref();
|
||||
let mut reader = futures::io::BufReader::with_capacity(DEFAULT_BUF_SIZE, reader.compat());
|
||||
let mut zip = async_zip::base::read::stream::ZipFileReader::new(&mut reader);
|
||||
let mut reader = BufReader::with_capacity(DEFAULT_BUF_SIZE, reader);
|
||||
let mut zip = ZipFileReader::with_tokio(&mut reader);
|
||||
|
||||
let mut directories = HashSet::new();
|
||||
|
||||
@@ -208,10 +207,13 @@ pub async fn unzip<R: tokio::io::AsyncRead + Unpin>(
|
||||
// which indicates the first entry in the central directory. So we continue reading from there.
|
||||
#[cfg(unix)]
|
||||
{
|
||||
use async_zip::base::read::cd::CentralDirectoryReader;
|
||||
use std::fs::Permissions;
|
||||
use std::os::unix::fs::PermissionsExt;
|
||||
use tokio_util::compat::TokioAsyncReadCompatExt;
|
||||
|
||||
let mut directory = async_zip::base::read::cd::CentralDirectoryReader::new(&mut reader);
|
||||
let mut reader = reader.compat();
|
||||
let mut directory = CentralDirectoryReader::new(&mut reader);
|
||||
while let Some(entry) = directory.next().await? {
|
||||
if entry.dir()? {
|
||||
continue;
|
||||
@@ -221,181 +223,75 @@ pub async fn unzip<R: tokio::io::AsyncRead + Unpin>(
|
||||
continue;
|
||||
};
|
||||
|
||||
// The executable bit is the only permission we preserve, otherwise we use the OS defaults.
|
||||
// https://github.com/pypa/pip/blob/3898741e29b7279e7bffe044ecfbe20f6a438b1e/src/pip/_internal/utils/unpacking.py#L88-L100
|
||||
let has_any_executable_bit = mode & 0o111;
|
||||
if has_any_executable_bit != 0 {
|
||||
// Construct the (expected) path to the file on-disk.
|
||||
let path = entry.filename().as_str()?;
|
||||
let Some(path) = enclosed_name(path) else {
|
||||
continue;
|
||||
};
|
||||
let path = target.join(path);
|
||||
|
||||
let permissions = fs_err::tokio::metadata(&path).await?.permissions();
|
||||
if permissions.mode() & 0o111 != 0o111 {
|
||||
fs_err::tokio::set_permissions(
|
||||
&path,
|
||||
Permissions::from_mode(permissions.mode() | 0o111),
|
||||
)
|
||||
.await?;
|
||||
}
|
||||
}
|
||||
// Construct the (expected) path to the file on-disk.
|
||||
let path = entry.filename().as_str()?;
|
||||
let Some(path) = enclosed_name(path) else {
|
||||
continue;
|
||||
};
|
||||
let path = target.join(path);
|
||||
fs_err::tokio::set_permissions(&path, Permissions::from_mode(mode)).await?;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Determine the path at which the given tar entry will be unpacked, when unpacking into `dst`.
|
||||
///
|
||||
/// See: <https://github.com/vorot93/tokio-tar/blob/87338a76092330bc6fe60de95d83eae5597332e1/src/entry.rs#L418>
|
||||
#[cfg_attr(not(unix), allow(dead_code))]
|
||||
fn unpacked_at(dst: &Path, entry: &Path) -> Option<PathBuf> {
|
||||
let mut file_dst = dst.to_path_buf();
|
||||
{
|
||||
for part in entry.components() {
|
||||
match part {
|
||||
// Leading '/' characters, root paths, and '.'
|
||||
// components are just ignored and treated as "empty
|
||||
// components"
|
||||
Component::Prefix(..) | Component::RootDir | Component::CurDir => {
|
||||
continue;
|
||||
}
|
||||
|
||||
// If any part of the filename is '..', then skip over
|
||||
// unpacking the file to prevent directory traversal
|
||||
// security issues. See, e.g.: CVE-2001-1267,
|
||||
// CVE-2002-0399, CVE-2005-1918, CVE-2007-4131
|
||||
Component::ParentDir => return None,
|
||||
|
||||
Component::Normal(part) => file_dst.push(part),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Skip cases where only slashes or '.' parts were seen, because
|
||||
// this is effectively an empty filename.
|
||||
if *dst == *file_dst {
|
||||
return None;
|
||||
}
|
||||
|
||||
// Skip entries without a parent (i.e. outside of FS root)
|
||||
file_dst.parent()?;
|
||||
|
||||
Some(file_dst)
|
||||
}
|
||||
|
||||
/// Unpack the given tar archive into the destination directory.
|
||||
///
|
||||
/// This is equivalent to `archive.unpack_in(dst)`, but it also preserves the executable bit.
|
||||
async fn untar_in(
|
||||
mut archive: tokio_tar::Archive<&mut (dyn tokio::io::AsyncRead + Unpin)>,
|
||||
dst: &Path,
|
||||
) -> std::io::Result<()> {
|
||||
let mut entries = archive.entries()?;
|
||||
let mut pinned = Pin::new(&mut entries);
|
||||
while let Some(entry) = pinned.next().await {
|
||||
// Unpack the file into the destination directory.
|
||||
let mut file = entry?;
|
||||
|
||||
// On Windows, skip symlink entries, as they're not supported. pip recursively copies the
|
||||
// symlink target instead.
|
||||
if cfg!(windows) && file.header().entry_type().is_symlink() {
|
||||
warn!(
|
||||
"Skipping symlink in tar archive: {}",
|
||||
file.path()?.display()
|
||||
);
|
||||
continue;
|
||||
}
|
||||
|
||||
file.unpack_in(dst).await?;
|
||||
|
||||
// Preserve the executable bit.
|
||||
#[cfg(unix)]
|
||||
{
|
||||
use std::fs::Permissions;
|
||||
use std::os::unix::fs::PermissionsExt;
|
||||
|
||||
let entry_type = file.header().entry_type();
|
||||
if entry_type.is_file() || entry_type.is_hard_link() {
|
||||
let mode = file.header().mode()?;
|
||||
let has_any_executable_bit = mode & 0o111;
|
||||
if has_any_executable_bit != 0 {
|
||||
if let Some(path) = unpacked_at(dst, &file.path()?) {
|
||||
let permissions = fs_err::tokio::metadata(&path).await?.permissions();
|
||||
if permissions.mode() & 0o111 != 0o111 {
|
||||
fs_err::tokio::set_permissions(
|
||||
&path,
|
||||
Permissions::from_mode(permissions.mode() | 0o111),
|
||||
)
|
||||
.await?;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Unpack a `.tar.gz` archive into the target directory, without requiring `Seek`.
|
||||
///
|
||||
/// This is useful for unpacking files as they're being downloaded.
|
||||
pub async fn untar_gz<R: tokio::io::AsyncRead + Unpin>(
|
||||
pub async fn untar_gz<R: AsyncRead + Unpin>(
|
||||
reader: R,
|
||||
target: impl AsRef<Path>,
|
||||
) -> Result<(), Error> {
|
||||
let reader = tokio::io::BufReader::with_capacity(DEFAULT_BUF_SIZE, reader);
|
||||
let mut decompressed_bytes = async_compression::tokio::bufread::GzipDecoder::new(reader);
|
||||
let reader = BufReader::with_capacity(DEFAULT_BUF_SIZE, reader);
|
||||
let reader = GzipDecoder::new(reader);
|
||||
|
||||
let archive = tokio_tar::ArchiveBuilder::new(
|
||||
&mut decompressed_bytes as &mut (dyn tokio::io::AsyncRead + Unpin),
|
||||
)
|
||||
.set_preserve_mtime(false)
|
||||
.build();
|
||||
Ok(untar_in(archive, target.as_ref()).await?)
|
||||
let mut archive = ArchiveBuilder::new(reader)
|
||||
.set_preserve_mtime(true)
|
||||
.set_preserve_permissions(true)
|
||||
.build();
|
||||
|
||||
archive.unpack(target.as_ref()).await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Unpack a `.tar.xz` archive into the target directory, without requiring `Seek`.
|
||||
///
|
||||
/// This is useful for unpacking files as they're being downloaded.
|
||||
pub async fn untar_xz<R: tokio::io::AsyncRead + Unpin>(
|
||||
pub async fn untar_xz<R: AsyncRead + Unpin>(
|
||||
reader: R,
|
||||
target: impl AsRef<Path>,
|
||||
) -> Result<(), Error> {
|
||||
let reader = tokio::io::BufReader::with_capacity(DEFAULT_BUF_SIZE, reader);
|
||||
let mut decompressed_bytes = async_compression::tokio::bufread::XzDecoder::new(reader);
|
||||
let reader = BufReader::with_capacity(DEFAULT_BUF_SIZE, reader);
|
||||
let reader = XzDecoder::new(reader);
|
||||
|
||||
let archive = tokio_tar::ArchiveBuilder::new(
|
||||
&mut decompressed_bytes as &mut (dyn tokio::io::AsyncRead + Unpin),
|
||||
)
|
||||
.set_preserve_mtime(false)
|
||||
.build();
|
||||
untar_in(archive, target.as_ref()).await?;
|
||||
let mut archive = ArchiveBuilder::new(reader)
|
||||
.set_preserve_mtime(true)
|
||||
.set_preserve_permissions(true)
|
||||
.build();
|
||||
|
||||
archive.unpack(target.as_ref()).await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Unpack a `.tar` archive into the target directory, without requiring `Seek`.
|
||||
///
|
||||
/// This is useful for unpacking files as they're being downloaded.
|
||||
pub async fn untar<R: tokio::io::AsyncRead + Unpin>(
|
||||
reader: R,
|
||||
target: impl AsRef<Path>,
|
||||
) -> Result<(), Error> {
|
||||
let mut reader = tokio::io::BufReader::with_capacity(DEFAULT_BUF_SIZE, reader);
|
||||
pub async fn untar<R: AsyncRead + Unpin>(reader: R, target: impl AsRef<Path>) -> Result<(), Error> {
|
||||
let reader = BufReader::with_capacity(DEFAULT_BUF_SIZE, reader);
|
||||
|
||||
let archive =
|
||||
tokio_tar::ArchiveBuilder::new(&mut reader as &mut (dyn tokio::io::AsyncRead + Unpin))
|
||||
.set_preserve_mtime(false)
|
||||
.build();
|
||||
untar_in(archive, target.as_ref()).await?;
|
||||
let mut archive = ArchiveBuilder::new(reader)
|
||||
.set_preserve_mtime(true)
|
||||
.set_preserve_permissions(true)
|
||||
.build();
|
||||
|
||||
archive.unpack(target.as_ref()).await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Unpack a `.zip`, `.tar.gz`, `.tar.bz2`, `.tar.zst`, or `.tar.xz` archive into the target directory,
|
||||
/// without requiring `Seek`.
|
||||
pub async fn unpack<R: tokio::io::AsyncRead + Unpin>(
|
||||
pub async fn unpack<R: AsyncRead + Unpin>(
|
||||
reader: R,
|
||||
ext: ArchiveExtension,
|
||||
target: impl AsRef<Path>,
|
||||
|
||||
Reference in New Issue
Block a user