mirror of
				https://github.com/facebook/zstd.git
				synced 2025-10-31 16:47:48 +02:00 
			
		
		
		
	UTIL_getFileSize() returns UTIL_FILESIZE_UNKNOWN on failure
UTIL_getFileSize() used to return zero on failure. This made it impossible to distinguish a failure from a genuine empty file. Both cases where coalesced. Adding UTIL_FILESIZE_UNKNOWN constant has many consequences on user code, since in many places, the `0` was assumed to mean "error". This is no longer the case, and the error code must be actively checked.
This commit is contained in:
		| @@ -551,6 +551,11 @@ static void BMK_loadFiles(void* buffer, size_t bufferSize, | ||||
|             fileSizes[n] = 0; | ||||
|             continue; | ||||
|         } | ||||
|         if (fileSize == UTIL_FILESIZE_UNKNOWN) { | ||||
|             DISPLAYLEVEL(2, "Cannot evaluate size of %s, ignoring ... \n", fileNamesTable[n]); | ||||
|             fileSizes[n] = 0; | ||||
|             continue; | ||||
|         } | ||||
|         f = fopen(fileNamesTable[n], "rb"); | ||||
|         if (f==NULL) EXM_THROW(10, "impossible to open file %s", fileNamesTable[n]); | ||||
|         DISPLAYUPDATE(2, "Loading %s...       \r", fileNamesTable[n]); | ||||
| @@ -581,11 +586,14 @@ static void BMK_benchFileTable(const char** fileNamesTable, unsigned nbFiles, co | ||||
|  | ||||
|     /* Load dictionary */ | ||||
|     if (dictFileName != NULL) { | ||||
|         U64 dictFileSize = UTIL_getFileSize(dictFileName); | ||||
|         if (dictFileSize > 64 MB) EXM_THROW(10, "dictionary file %s too large", dictFileName); | ||||
|         U64 const dictFileSize = UTIL_getFileSize(dictFileName); | ||||
|         if (dictFileSize > 64 MB) | ||||
|             EXM_THROW(10, "dictionary file %s too large", dictFileName); | ||||
|         dictBufferSize = (size_t)dictFileSize; | ||||
|         dictBuffer = malloc(dictBufferSize); | ||||
|         if (dictBuffer==NULL) EXM_THROW(11, "not enough memory for dictionary (%u bytes)", (U32)dictBufferSize); | ||||
|         if (dictBuffer==NULL) | ||||
|             EXM_THROW(11, "not enough memory for dictionary (%u bytes)", | ||||
|                             (U32)dictBufferSize); | ||||
|         BMK_loadFiles(dictBuffer, dictBufferSize, fileSizes, &dictFileName, 1); | ||||
|     } | ||||
|  | ||||
|   | ||||
| @@ -117,7 +117,7 @@ static unsigned DiB_loadFiles(void* buffer, size_t* bufferSizePtr, | ||||
|     for (fileIndex=0; fileIndex<nbFiles; fileIndex++) { | ||||
|         const char* const fileName = fileNamesTable[fileIndex]; | ||||
|         unsigned long long const fs64 = UTIL_getFileSize(fileName); | ||||
|         unsigned long long remainingToLoad = fs64; | ||||
|         unsigned long long remainingToLoad = (fs64 == UTIL_FILESIZE_UNKNOWN) ? 0 : fs64; | ||||
|         U32 const nbChunks = targetChunkSize ? (U32)((fs64 + (targetChunkSize-1)) / targetChunkSize) : 1; | ||||
|         U64 const chunkSize = targetChunkSize ? MIN(targetChunkSize, fs64) : fs64; | ||||
|         size_t const maxChunkSize = (size_t)MIN(chunkSize, SAMPLESIZE_MAX); | ||||
| @@ -245,8 +245,9 @@ static fileStats DiB_fileStats(const char** fileNamesTable, unsigned nbFiles, si | ||||
|     memset(&fs, 0, sizeof(fs)); | ||||
|     for (n=0; n<nbFiles; n++) { | ||||
|         U64 const fileSize = UTIL_getFileSize(fileNamesTable[n]); | ||||
|         U32 const nbSamples = (U32)(chunkSize ? (fileSize + (chunkSize-1)) / chunkSize : 1); | ||||
|         U64 const chunkToLoad = chunkSize ? MIN(chunkSize, fileSize) : fileSize; | ||||
|         U64 const srcSize = (fileSize == UTIL_FILESIZE_UNKNOWN) ? 0 : fileSize; | ||||
|         U32 const nbSamples = (U32)(chunkSize ? (srcSize + (chunkSize-1)) / chunkSize : 1); | ||||
|         U64 const chunkToLoad = chunkSize ? MIN(chunkSize, srcSize) : srcSize; | ||||
|         size_t const cappedChunkSize = (size_t)MIN(chunkToLoad, SAMPLESIZE_MAX); | ||||
|         fs.totalSizeToLoad += cappedChunkSize * nbSamples; | ||||
|         fs.oneSampleTooLarge |= (chunkSize > 2*SAMPLESIZE_MAX); | ||||
|   | ||||
| @@ -25,7 +25,7 @@ | ||||
| *  Includes | ||||
| ***************************************/ | ||||
| #include "platform.h"   /* Large Files support, SET_BINARY_MODE */ | ||||
| #include "util.h"       /* UTIL_getFileSize */ | ||||
| #include "util.h"       /* UTIL_getFileSize, UTIL_isRegularFile */ | ||||
| #include <stdio.h>      /* fprintf, fopen, fread, _fileno, stdin, stdout */ | ||||
| #include <stdlib.h>     /* malloc, free */ | ||||
| #include <string.h>     /* strcmp, strlen */ | ||||
| @@ -564,7 +564,7 @@ static unsigned long long FIO_compressGzFrame(cRess_t* ress, | ||||
|                 strm.avail_out = (uInt)ress->dstBufferSize; | ||||
|             } | ||||
|         } | ||||
|         if (!srcFileSize) | ||||
|         if (srcFileSize != UTIL_FILESIZE_UNKNOWN) | ||||
|             DISPLAYUPDATE(2, "\rRead : %u MB ==> %.2f%%", | ||||
|                             (U32)(inFileSize>>20), | ||||
|                             (double)outFileSize/inFileSize*100) | ||||
| @@ -651,7 +651,7 @@ static unsigned long long FIO_compressLzmaFrame(cRess_t* ress, | ||||
|                 strm.next_out = (BYTE*)ress->dstBuffer; | ||||
|                 strm.avail_out = ress->dstBufferSize; | ||||
|         }   } | ||||
|         if (!srcFileSize) | ||||
|         if (srcFileSize != UTIL_FILESIZE_UNKNOWN) | ||||
|             DISPLAYUPDATE(2, "\rRead : %u MB ==> %.2f%%", | ||||
|                             (U32)(inFileSize>>20), | ||||
|                             (double)outFileSize/inFileSize*100) | ||||
| @@ -697,18 +697,17 @@ static unsigned long long FIO_compressLz4Frame(cRess_t* ress, | ||||
|     prefs.frameInfo.blockSizeID = LZ4F_max4MB; | ||||
|     prefs.frameInfo.contentChecksumFlag = (contentChecksum_t)g_checksumFlag; | ||||
| #if LZ4_VERSION_NUMBER >= 10600 | ||||
|     prefs.frameInfo.contentSize = srcFileSize; | ||||
|     prefs.frameInfo.contentSize = (srcFileSize==UTIL_FILESIZE_UNKNOWN) ? 0 : srcFileSize; | ||||
| #endif | ||||
|  | ||||
|     { | ||||
|         size_t blockSize = FIO_LZ4_GetBlockSize_FromBlockId(LZ4F_max4MB); | ||||
|     {   size_t blockSize = FIO_LZ4_GetBlockSize_FromBlockId(LZ4F_max4MB); | ||||
|         size_t readSize; | ||||
|         size_t headerSize = LZ4F_compressBegin(ctx, ress->dstBuffer, ress->dstBufferSize, &prefs); | ||||
|         if (LZ4F_isError(headerSize)) | ||||
|             EXM_THROW(33, "File header generation failed : %s", | ||||
|                             LZ4F_getErrorName(headerSize)); | ||||
|         { size_t const sizeCheck = fwrite(ress->dstBuffer, 1, headerSize, ress->dstFile); | ||||
|           if (sizeCheck!=headerSize) EXM_THROW(34, "Write error : cannot write header"); } | ||||
|         if (fwrite(ress->dstBuffer, 1, headerSize, ress->dstFile) != headerSize) | ||||
|             EXM_THROW(34, "Write error : cannot write header"); | ||||
|         outFileSize += headerSize; | ||||
|  | ||||
|         /* Read first block */ | ||||
| @@ -725,7 +724,7 @@ static unsigned long long FIO_compressLz4Frame(cRess_t* ress, | ||||
|                 EXM_THROW(35, "zstd: %s: lz4 compression failed : %s", | ||||
|                             srcFileName, LZ4F_getErrorName(outSize)); | ||||
|             outFileSize += outSize; | ||||
|             if (!srcFileSize) | ||||
|             if (srcFileSize != UTIL_FILESIZE_UNKNOWN) | ||||
|                 DISPLAYUPDATE(2, "\rRead : %u MB ==> %.2f%%", | ||||
|                                 (U32)(inFileSize>>20), | ||||
|                                 (double)outFileSize/inFileSize*100) | ||||
| @@ -816,12 +815,12 @@ static int FIO_compressFilename_internal(cRess_t ress, | ||||
|  | ||||
|     /* init */ | ||||
| #ifdef ZSTD_NEWAPI | ||||
|     if (fileSize!=0)  /* when src is stdin, fileSize==0, but is effectively unknown */ | ||||
|     if (fileSize!=UTIL_FILESIZE_UNKNOWN)  /* when src is stdin, fileSize==0, but is effectively unknown */ | ||||
|         ZSTD_CCtx_setPledgedSrcSize(ress.cctx, fileSize); | ||||
| #elif defined(ZSTD_MULTITHREAD) | ||||
|     CHECK( ZSTDMT_resetCStream(ress.cctx, fileSize ? fileSize : ZSTD_CONTENTSIZE_UNKNOWN) ); | ||||
|     CHECK( ZSTDMT_resetCStream(ress.cctx, (fileSize==UTIL_FILESIZE_UNKNOWN) ? ZSTD_CONTENTSIZE_UNKNOWN : fileSize) ); | ||||
| #else | ||||
|     CHECK( ZSTD_resetCStream(ress.cctx, fileSize ? fileSize : ZSTD_CONTENTSIZE_UNKNOWN) ); | ||||
|     CHECK( ZSTD_resetCStream(ress.cctx, (fileSize==UTIL_FILESIZE_UNKNOWN) ? ZSTD_CONTENTSIZE_UNKNOWN : fileSize) ); | ||||
| #endif | ||||
|  | ||||
|     /* Main compression loop */ | ||||
| @@ -851,13 +850,13 @@ static int FIO_compressFilename_internal(cRess_t ress, | ||||
|                 compressedfilesize += outBuff.pos; | ||||
|         }   } | ||||
|         if (g_nbThreads > 1) { | ||||
|             if (!fileSize) | ||||
|             if (fileSize != UTIL_FILESIZE_UNKNOWN) | ||||
|                 DISPLAYUPDATE(2, "\rRead : %u MB", (U32)(readsize>>20)) | ||||
|             else | ||||
|                 DISPLAYUPDATE(2, "\rRead : %u / %u MB", | ||||
|                                     (U32)(readsize>>20), (U32)(fileSize>>20)); | ||||
|         } else { | ||||
|             if (!fileSize) | ||||
|             if (fileSize != UTIL_FILESIZE_UNKNOWN) | ||||
|                 DISPLAYUPDATE(2, "\rRead : %u MB ==> %.2f%%", | ||||
|                                 (U32)(readsize>>20), | ||||
|                                 (double)compressedfilesize/readsize*100) | ||||
| @@ -985,7 +984,8 @@ int FIO_compressFilename(const char* dstFileName, const char* srcFileName, | ||||
|                          const char* dictFileName, int compressionLevel, ZSTD_compressionParameters* comprParams) | ||||
| { | ||||
|     clock_t const start = clock(); | ||||
|     U64 const srcSize = UTIL_getFileSize(srcFileName); | ||||
|     U64 const fileSize = UTIL_getFileSize(srcFileName); | ||||
|     U64 const srcSize = (fileSize == UTIL_FILESIZE_UNKNOWN) ? ZSTD_CONTENTSIZE_UNKNOWN : fileSize; | ||||
|  | ||||
|     cRess_t const ress = FIO_createCResources(dictFileName, compressionLevel, srcSize, comprParams); | ||||
|     int const result = FIO_compressFilename_dstFile(ress, dstFileName, srcFileName, compressionLevel); | ||||
| @@ -1007,7 +1007,9 @@ int FIO_compressMultipleFilenames(const char** inFileNamesTable, unsigned nbFile | ||||
|     size_t dfnSize = FNSPACE; | ||||
|     char*  dstFileName = (char*)malloc(FNSPACE); | ||||
|     size_t const suffixSize = suffix ? strlen(suffix) : 0; | ||||
|     U64 const srcSize = (nbFiles != 1) ? ZSTD_CONTENTSIZE_UNKNOWN : UTIL_getFileSize(inFileNamesTable[0]) ; | ||||
|     U64 const firstFileSize = UTIL_getFileSize(inFileNamesTable[0]); | ||||
|     U64 const firstSrcSize = (firstFileSize == UTIL_FILESIZE_UNKNOWN) ? ZSTD_CONTENTSIZE_UNKNOWN : firstFileSize; | ||||
|     U64 const srcSize = (nbFiles != 1) ? ZSTD_CONTENTSIZE_UNKNOWN : firstSrcSize ; | ||||
|     cRess_t ress = FIO_createCResources(dictFileName, compressionLevel, srcSize, comprParams); | ||||
|  | ||||
|     /* init */ | ||||
| @@ -1799,7 +1801,7 @@ typedef struct { | ||||
|  *           2 for file not compressed with zstd | ||||
|  *           3 for cases in which file could not be opened. | ||||
|  */ | ||||
| static int getFileInfo(fileInfo_t* info, const char* inFileName){ | ||||
| static int getFileInfo_fileConfirmed(fileInfo_t* info, const char* inFileName){ | ||||
|     int detectError = 0; | ||||
|     FILE* const srcFile = FIO_openSrcFile(inFileName); | ||||
|     if (srcFile == NULL) { | ||||
| @@ -1815,7 +1817,8 @@ static int getFileInfo(fileInfo_t* info, const char* inFileName){ | ||||
|         if (numBytesRead < ZSTD_frameHeaderSize_min) { | ||||
|             if ( feof(srcFile) | ||||
|               && (numBytesRead == 0) | ||||
|               && (info->compressedSize > 0) ) { | ||||
|               && (info->compressedSize > 0) | ||||
|               && (info->compressedSize != UTIL_FILESIZE_UNKNOWN) ) { | ||||
|                 break; | ||||
|             } | ||||
|             else if (feof(srcFile)) { | ||||
| @@ -1928,6 +1931,17 @@ static int getFileInfo(fileInfo_t* info, const char* inFileName){ | ||||
|     return detectError; | ||||
| } | ||||
|  | ||||
| static int getFileInfo(fileInfo_t* info, const char* srcFileName) | ||||
| { | ||||
|     int const isAFile = UTIL_isRegularFile(srcFileName); | ||||
|     if (!isAFile) { | ||||
|         DISPLAY("Error : %s is not a file", srcFileName); | ||||
|         return 3; | ||||
|     } | ||||
|     return getFileInfo_fileConfirmed(info, srcFileName); | ||||
| } | ||||
|  | ||||
|  | ||||
| static void displayInfo(const char* inFileName, const fileInfo_t* info, int displayLevel){ | ||||
|     unsigned const unit = info->compressedSize < (1 MB) ? (1 KB) : (1 MB); | ||||
|     const char* const unitStr = info->compressedSize < (1 MB) ? "KB" : "MB"; | ||||
|   | ||||
| @@ -313,33 +313,40 @@ UTIL_STATIC U32 UTIL_isLink(const char* infilename) | ||||
| } | ||||
|  | ||||
|  | ||||
| #define UTIL_FILESIZE_UNKNOWN  ((U64)(-1)) | ||||
| UTIL_STATIC U64 UTIL_getFileSize(const char* infilename) | ||||
| { | ||||
|     int r; | ||||
|     if (!UTIL_isRegularFile(infilename)) return UTIL_FILESIZE_UNKNOWN; | ||||
|     {   int r; | ||||
| #if defined(_MSC_VER) | ||||
|     struct __stat64 statbuf; | ||||
|     r = _stat64(infilename, &statbuf); | ||||
|     if (r || !(statbuf.st_mode & S_IFREG)) return 0;   /* No good... */ | ||||
|         struct __stat64 statbuf; | ||||
|         r = _stat64(infilename, &statbuf); | ||||
|         if (r || !(statbuf.st_mode & S_IFREG)) return UTIL_FILESIZE_UNKNOWN; | ||||
| #elif defined(__MINGW32__) && defined (__MSVCRT__) | ||||
|     struct _stati64 statbuf; | ||||
|     r = _stati64(infilename, &statbuf); | ||||
|     if (r || !(statbuf.st_mode & S_IFREG)) return 0;   /* No good... */ | ||||
|         struct _stati64 statbuf; | ||||
|         r = _stati64(infilename, &statbuf); | ||||
|         if (r || !(statbuf.st_mode & S_IFREG)) return UTIL_FILESIZE_UNKNOWN; | ||||
| #else | ||||
|     struct stat statbuf; | ||||
|     r = stat(infilename, &statbuf); | ||||
|     if (r || !S_ISREG(statbuf.st_mode)) return 0;   /* No good... */ | ||||
|         struct stat statbuf; | ||||
|         r = stat(infilename, &statbuf); | ||||
|         if (r || !S_ISREG(statbuf.st_mode)) return UTIL_FILESIZE_UNKNOWN; | ||||
| #endif | ||||
|     return (U64)statbuf.st_size; | ||||
|         return (U64)statbuf.st_size; | ||||
|     } | ||||
| } | ||||
|  | ||||
|  | ||||
| UTIL_STATIC U64 UTIL_getTotalFileSize(const char** fileNamesTable, unsigned nbFiles) | ||||
| { | ||||
|     U64 total = 0; | ||||
|     int error = 0; | ||||
|     unsigned n; | ||||
|     for (n=0; n<nbFiles; n++) | ||||
|         total += UTIL_getFileSize(fileNamesTable[n]); | ||||
|     return total; | ||||
|     for (n=0; n<nbFiles; n++) { | ||||
|         U64 const size = UTIL_getFileSize(fileNamesTable[n]); | ||||
|         error |= (size == UTIL_FILESIZE_UNKNOWN); | ||||
|         total += size; | ||||
|     } | ||||
|     return error ? UTIL_FILESIZE_UNKNOWN : total; | ||||
| } | ||||
|  | ||||
|  | ||||
|   | ||||
| @@ -484,8 +484,8 @@ static int benchFiles(const char** fileNamesTable, const int nbFiles, U32 benchN | ||||
|     /* Loop for each file */ | ||||
|     int fileIdx; | ||||
|     for (fileIdx=0; fileIdx<nbFiles; fileIdx++) { | ||||
|         const char* inFileName = fileNamesTable[fileIdx]; | ||||
|         FILE* inFile = fopen( inFileName, "rb" ); | ||||
|         const char* const inFileName = fileNamesTable[fileIdx]; | ||||
|         FILE* const inFile = fopen( inFileName, "rb" ); | ||||
|         U64   inFileSize; | ||||
|         size_t benchedSize; | ||||
|         void* origBuff; | ||||
| @@ -495,6 +495,11 @@ static int benchFiles(const char** fileNamesTable, const int nbFiles, U32 benchN | ||||
|  | ||||
|         /* Memory allocation & restrictions */ | ||||
|         inFileSize = UTIL_getFileSize(inFileName); | ||||
|         if (inFileSize == UTIL_FILESIZE_UNKNOWN) { | ||||
|             DISPLAY( "Cannot measure size of %s\n", inFileName); | ||||
|             fclose(inFile); | ||||
|             return 11; | ||||
|         } | ||||
|         benchedSize = BMK_findMaxMem(inFileSize*3) / 3; | ||||
|         if ((U64)benchedSize > inFileSize) benchedSize = (size_t)inFileSize; | ||||
|         if (benchedSize < inFileSize) | ||||
|   | ||||
| @@ -687,6 +687,11 @@ int benchFiles(const char** fileNamesTable, int nbFiles) | ||||
|             DISPLAY( "Pb opening %s\n", inFileName); | ||||
|             return 11; | ||||
|         } | ||||
|         if (inFileSize == UTIL_FILESIZE_UNKNOWN) { | ||||
|             DISPLAY("Pb evaluatin size of %s \n", inFileName); | ||||
|             fclose(inFile); | ||||
|             return 11; | ||||
|         } | ||||
|  | ||||
|         /* Memory allocation */ | ||||
|         benchedSize = BMK_findMaxMem(inFileSize*3) / 3; | ||||
| @@ -740,6 +745,11 @@ int optimizeForSize(const char* inFileName, U32 targetSpeed) | ||||
|  | ||||
|     /* Init */ | ||||
|     if (inFile==NULL) { DISPLAY( "Pb opening %s\n", inFileName); return 11; } | ||||
|     if (inFileSize == UTIL_FILESIZE_UNKNOWN) { | ||||
|         DISPLAY("Pb evaluatin size of %s \n", inFileName); | ||||
|         fclose(inFile); | ||||
|         return 11; | ||||
|     } | ||||
|  | ||||
|     /* Memory allocation & restrictions */ | ||||
|     if ((U64)benchedSize > inFileSize) benchedSize = (size_t)inFileSize; | ||||
|   | ||||
| @@ -684,6 +684,11 @@ static void BMK_loadFiles(void* buffer, size_t bufferSize, | ||||
|             fileSizes[n] = 0; | ||||
|             continue; | ||||
|         } | ||||
|         if (fileSize == UTIL_FILESIZE_UNKNOWN) { | ||||
|             DISPLAYLEVEL(2, "Cannot determine size of %s ...    \n", fileNamesTable[n]); | ||||
|             fileSizes[n] = 0; | ||||
|             continue; | ||||
|         } | ||||
|         f = fopen(fileNamesTable[n], "rb"); | ||||
|         if (f==NULL) EXM_THROW(10, "impossible to open file %s", fileNamesTable[n]); | ||||
|         DISPLAYUPDATE(2, "Loading %s...       \r", fileNamesTable[n]); | ||||
| @@ -714,11 +719,13 @@ static void BMK_benchFileTable(const char** fileNamesTable, unsigned nbFiles, | ||||
|  | ||||
|     /* Load dictionary */ | ||||
|     if (dictFileName != NULL) { | ||||
|         U64 dictFileSize = UTIL_getFileSize(dictFileName); | ||||
|         if (dictFileSize > 64 MB) EXM_THROW(10, "dictionary file %s too large", dictFileName); | ||||
|         U64 const dictFileSize = UTIL_getFileSize(dictFileName); | ||||
|         if (dictFileSize > 64 MB) | ||||
|             EXM_THROW(10, "dictionary file %s too large", dictFileName); | ||||
|         dictBufferSize = (size_t)dictFileSize; | ||||
|         dictBuffer = malloc(dictBufferSize); | ||||
|         if (dictBuffer==NULL) EXM_THROW(11, "not enough memory for dictionary (%u bytes)", (U32)dictBufferSize); | ||||
|         if (dictBuffer==NULL) | ||||
|             EXM_THROW(11, "not enough memory for dictionary (%u bytes)", (U32)dictBufferSize); | ||||
|         BMK_loadFiles(dictBuffer, dictBufferSize, fileSizes, &dictFileName, 1); | ||||
|     } | ||||
|  | ||||
|   | ||||
		Reference in New Issue
	
	Block a user