diff --git a/.github/workflows/publish-code-coverage.yml b/.github/workflows/publish-code-coverage.yml deleted file mode 100644 index 4f3b4d18..00000000 --- a/.github/workflows/publish-code-coverage.yml +++ /dev/null @@ -1,26 +0,0 @@ -name: Publish Code Coverage - -on: - push: - branches: [ main ] -jobs: - build: - - runs-on: ubuntu-latest - - steps: - - uses: actions/checkout@v2 - with: - fetch-depth: 0 - - name: Setup .NET Core - uses: actions/setup-dotnet@v1 - with: - dotnet-version: 3.1.301 - - name: Install dependencies - run: dotnet restore - - name: Test - run: dotnet test /p:CollectCoverage=true /p:CoverletOutputFormat=lcov /p:CoverletOutput=./coverage/lcov.info - - name: Coveralls - uses: coverallsapp/github-action@master - with: - github-token: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/publish-wasm-to-gh-pages.yml b/.github/workflows/publish-wasm-to-gh-pages.yml deleted file mode 100644 index 8a91180d..00000000 --- a/.github/workflows/publish-wasm-to-gh-pages.yml +++ /dev/null @@ -1,31 +0,0 @@ -name: Publish Blazor - -on: - push: - branches: [ main ] -jobs: - build: - - runs-on: ubuntu-latest - - steps: - - uses: actions/checkout@v2 - with: - fetch-depth: 0 - - name: Setup .NET Core - uses: actions/setup-dotnet@v1 - with: - dotnet-version: 8.0.x - # Include wasm-tools to optimize build - - name: wasm-tools - run: dotner workload install wasm-tools - - name: Install dependencies - run: dotnet restore - - name: Build - run: dotnet publish RecursiveExtractor.Blazor --configuration Release --no-restore -o blazorOut - - name: GitHub Pages action - uses: peaceiris/actions-gh-pages@v3 - with: - github_token: ${{ secrets.GITHUB_TOKEN }} - publish_branch: gh-pages - publish_dir: blazorOut/wwwroot diff --git a/Pipelines/recursive-extractor-pr.yml b/Pipelines/recursive-extractor-pr.yml index 5e9f334f..45a6becf 100644 --- a/Pipelines/recursive-extractor-pr.yml +++ b/Pipelines/recursive-extractor-pr.yml @@ -13,7 +13,7 @@ resources: - repository: templates type: git name: SecurityEngineering/OSS-Tools-Pipeline-Templates - ref: refs/tags/v1.1.0 + ref: refs/tags/v1.1.1 variables: BuildConfiguration: 'Release' @@ -27,13 +27,13 @@ stages: parameters: jobName: 'lib_dotnet_test_windows' dotnetVersions: ['6.0.x','7.0.x','8.0.x'] - vmImage: 'oss-tools-win2022_1es-managed' + vmImage: 'win2022-image-base' projectPath: 'RecursiveExtractor.Tests/RecursiveExtractor.Tests.csproj' - template: dotnet-test-job.yml@templates parameters: jobName: 'cli_dotnet_test_windows' dotnetVersions: ['6.0.x','7.0.x','8.0.x'] - vmImage: 'oss-tools-win2022_1es-managed' + vmImage: 'win2022-image-base' projectPath: 'RecursiveExtractor.Cli.Tests/RecursiveExtractor.Cli.Tests.csproj' - stage: SDL diff --git a/Pipelines/recursive-extractor-release.yml b/Pipelines/recursive-extractor-release.yml index 65bf8aa1..f6cefe92 100644 --- a/Pipelines/recursive-extractor-release.yml +++ b/Pipelines/recursive-extractor-release.yml @@ -18,7 +18,7 @@ resources: - repository: templates type: git name: SecurityEngineering/OSS-Tools-Pipeline-Templates - ref: refs/tags/v1.1.0 + ref: refs/tags/v1.1.1 variables: BuildConfiguration: 'Release' @@ -32,13 +32,13 @@ stages: parameters: jobName: 'lib_dotnet_test_windows' dotnetVersions: ['6.0.x','7.0.x','8.0.x'] - vmImage: 'oss-tools-win2022_1es-managed' + vmImage: 'win2022-image-base' projectPath: 'RecursiveExtractor.Tests/RecursiveExtractor.Tests.csproj' - template: dotnet-test-job.yml@templates parameters: jobName: 'cli_dotnet_test_windows' dotnetVersions: ['6.0.x','7.0.x','8.0.x'] - vmImage: 'oss-tools-win2022_1es-managed' + vmImage: 'win2022-image-base' projectPath: 'RecursiveExtractor.Cli.Tests/RecursiveExtractor.Cli.Tests.csproj' - stage: SDL @@ -87,7 +87,7 @@ stages: displayName: Code Sign, Generate Hashes, Publish Public Releases pool: name: 'OSS-Tools-1ESPool' - vmImage: 'oss-tools-win2022_1es-managed' + vmImage: 'win2022-image-base' steps: - task: UseDotNet@2 # For ESRP. Do not use variable. inputs: diff --git a/README.md b/README.md index bea11e15..a23ced16 100644 --- a/README.md +++ b/README.md @@ -7,17 +7,18 @@ Recursive Extractor is a Cross-Platform [.NET Standard 2.0 Library](#library) an | | | | |-|-|-| | 7zip+ | ar | bzip2 | -| deb | gzip | iso | -| rar^ | tar | vhd | -| vhdx | vmdk | wim* | -| xzip | zip+ | | +| deb | dmg** | gzip | +| iso | rar^ | tar | +| vhd | vhdx | vmdk | +| wim* | xzip | zip+ |
Details
* Windows only
+ Encryption Supported
-^ Rar version 4 Encryption supported
+^ Encryption supported for Rar version 4 only
+** Limited support. Unencrypted HFS+ volumes with certain compression schemes.
# Variants @@ -25,9 +26,9 @@ Recursive Extractor is a Cross-Platform [.NET Standard 2.0 Library](#library) an ## Command Line ### Installing 1. Ensure you have the latest [.NET SDK](https://dotnet.microsoft.com/download). -2. run `dotnet tool install -g Microsoft.CST.RecursiveExtractor.Cli` +2. Run `dotnet tool install -g Microsoft.CST.RecursiveExtractor.Cli` -This adds `RecursiveExtractor` to your path so you can run it directly from the shell. +This adds `RecursiveExtractor` to your path so you can run it directly from your shell. ### Running Basic usage is: `RecursiveExtractor --input archive.ext --output outputDirectory` @@ -57,7 +58,7 @@ Run `RecursiveExtractor --help` for more details. ## .NET Standard Library -Recursive Extractor is available on NuGet as [Microsoft.CST.RecursiveExtractor](https://www.nuget.org/packages/Microsoft.CST.RecursiveExtractor/). Recursive Extractor targets netstandard2.0+ and the latest .NET, currently .NET 6.0 and .NET 7.0. +Recursive Extractor is available on NuGet as [Microsoft.CST.RecursiveExtractor](https://www.nuget.org/packages/Microsoft.CST.RecursiveExtractor/). Recursive Extractor targets netstandard2.0+ and the latest .NET, currently .NET 6.0, .NET 7.0 and .NET 8.0. ### Usage @@ -77,8 +78,7 @@ foreach(var file in extractor.Extract(path))
Extracting to Disk
-This code adapted from the Cli extracts the contents of given archive located at `options.Input` -to a directory located at `options.Output`, including extracting failed archives as themselves. +This code adapted from the Cli extracts the contents of given archive located at `options.Input` to a directory located at `options.Output`, including extracting failed archives as themselves. ```csharp using Microsoft.CST.RecursiveExtractor; @@ -166,12 +166,12 @@ catch(OverflowException) RecursiveExtractor protects against [ZipSlip](https://snyk.io/research/zip-slip-vulnerability), [Quines, and Zip Bombs](https://en.wikipedia.org/wiki/Zip_bomb). Calls to Extract will throw an `OverflowException` when a Quine or Zip bomb is detected and a `TimeOutException` if `EnableTiming` is set and the specified time period has elapsed before completion. -Otherwise, invalid files found while crawling will emit a logger message and be skipped. RecursiveExtractor uses NLog for logging. +Otherwise, invalid files found while crawling will emit a logger message and be skipped. You can also enable `ExtractSelfOnFail` to return the original archive file on an extraction failure. ## Notes on Enumeration ### Multiple Enumeration -You should not iterate the Enumeration returned from the `Extract` and `ExtractAsync` interfaces multiple times, if you need to do so, convert the Enumeration to the collection of your choice first. +You should not iterate the Enumeration returned from the `Extract` and `ExtractAsync` interfaces multiple times, if you need to do so, convert the Enumeration to an in memory collection first. ### Parallel Enumeration If you want to enumerate the output with parallelization you should use a batching mechanism, for example: @@ -208,7 +208,7 @@ while (moreAvailable) ``` ### Disposing During Enumeration -If you are working with a very large archive or in particularly constrained environment you can reduce memory/file handle usage for the Content streams in each FileEntry by disposing as you iterate. +If you are working with a very large archive or in particularly constrained environment you can reduce memory and file handle usage for the Content streams in each FileEntry by disposing as you iterate. ```csharp var results = extractor.Extract(path); @@ -217,7 +217,7 @@ foreach(var file in results) using var theStream = file.Content; // Do something with the stream. _ = theStream.ReadByte(); - // The stream is disposed here from the using statement +// The stream is disposed here by the using statement } ``` @@ -229,11 +229,11 @@ If you are having trouble parsing a specific archive of one of the supported for # Dependencies -Recursive Extractor uses a number of libraries to parse archives. +Recursive Extractor aims to provide a unified interface to extract arbitrary archives and relies on a number of libraries to parse the archives. * [SharpZipLib](https://github.com/icsharpcode/SharpZipLib) * [SharpCompress](https://github.com/adamhathcock/sharpcompress) -* [DiscUtils](https://github.com/discutils/discutils) +* [LTRData/DiscUtils](https://github.com/LTRData/discutils) # Contributing diff --git a/RecursiveExtractor.Blazor/RecursiveExtractor.Blazor.csproj b/RecursiveExtractor.Blazor/RecursiveExtractor.Blazor.csproj index 6dc8a452..76442f27 100644 --- a/RecursiveExtractor.Blazor/RecursiveExtractor.Blazor.csproj +++ b/RecursiveExtractor.Blazor/RecursiveExtractor.Blazor.csproj @@ -7,8 +7,8 @@ - - + + diff --git a/RecursiveExtractor.Cli.Tests/RecursiveExtractor.Cli.Tests.csproj b/RecursiveExtractor.Cli.Tests/RecursiveExtractor.Cli.Tests.csproj index a3b22db1..732733ff 100644 --- a/RecursiveExtractor.Cli.Tests/RecursiveExtractor.Cli.Tests.csproj +++ b/RecursiveExtractor.Cli.Tests/RecursiveExtractor.Cli.Tests.csproj @@ -11,8 +11,8 @@ - - + + all runtime; build; native; contentfiles; analyzers; buildtransitive diff --git a/RecursiveExtractor.Tests/ExtractorTests/ExpectedNumFilesTests.cs b/RecursiveExtractor.Tests/ExtractorTests/ExpectedNumFilesTests.cs index f8763dbb..0736deca 100644 --- a/RecursiveExtractor.Tests/ExtractorTests/ExpectedNumFilesTests.cs +++ b/RecursiveExtractor.Tests/ExtractorTests/ExpectedNumFilesTests.cs @@ -42,7 +42,8 @@ public static IEnumerable ArchiveData new object[] { "EmptyFile.txt", 1 }, new object[] { "TestDataArchivesNested.Zip", 54 }, new object[] { "UdfTest.iso", 3 }, - new object[] { "UdfTestWithMultiSystem.iso", 3 } + new object[] { "UdfTestWithMultiSystem.iso", 3 }, + new object[] { "HfsSampleUDCO.dmg", 2 } }; } } @@ -73,7 +74,8 @@ public static IEnumerable NoRecursionData new object[] { "TestData.wim", 3 }, new object[] { "EmptyFile.txt", 1 }, new object[] { "TestDataArchivesNested.Zip", 14 }, - new object[] { "UdfTestWithMultiSystem.iso", 3 } + new object[] { "UdfTestWithMultiSystem.iso", 3 }, + new object[] { "HfsSampleUDCO.dmg", 2 } }; } } diff --git a/RecursiveExtractor.Tests/ExtractorTests/MiniMagicTests.cs b/RecursiveExtractor.Tests/ExtractorTests/MiniMagicTests.cs index 361ae189..016ef133 100644 --- a/RecursiveExtractor.Tests/ExtractorTests/MiniMagicTests.cs +++ b/RecursiveExtractor.Tests/ExtractorTests/MiniMagicTests.cs @@ -23,6 +23,7 @@ public class MiniMagicTests : BaseExtractorTestClass [DataRow("TestData.vhdx", ArchiveFileType.VHDX)] [DataRow("TestData.wim", ArchiveFileType.WIM)] [DataRow("Empty.vmdk", ArchiveFileType.VMDK)] + [DataRow("HfsSampleUDCO.dmg", ArchiveFileType.DMG)] [DataRow("EmptyFile.txt", ArchiveFileType.UNKNOWN)] public void TestMiniMagic(string fileName, ArchiveFileType expectedArchiveFileType) { diff --git a/RecursiveExtractor.Tests/ExtractorTests/MiscTests.cs b/RecursiveExtractor.Tests/ExtractorTests/MiscTests.cs index b1d558e7..e375904a 100644 --- a/RecursiveExtractor.Tests/ExtractorTests/MiscTests.cs +++ b/RecursiveExtractor.Tests/ExtractorTests/MiscTests.cs @@ -49,6 +49,7 @@ public async Task ExtractFlatFileAsync(string fileName, bool requireTopLevelToBe [DataRow("TestDataCorrupt.tar", true, 1, 1)] [DataRow("TestDataCorrupt.tar.zip", false, 0, 2)] [DataRow("TestDataCorrupt.tar.zip", true, 0, 2)] + [DataRow("TestDataCorruptWim.zip", true, 0, 0)] public void ExtractCorruptArchive(string fileName, bool requireTopLevelToBeArchive, int expectedNumFailures, int expectedNumFiles) { var extractor = new Extractor(); diff --git a/RecursiveExtractor.Tests/RecursiveExtractor.Tests.csproj b/RecursiveExtractor.Tests/RecursiveExtractor.Tests.csproj index 8b24932b..1ff5e559 100644 --- a/RecursiveExtractor.Tests/RecursiveExtractor.Tests.csproj +++ b/RecursiveExtractor.Tests/RecursiveExtractor.Tests.csproj @@ -9,12 +9,12 @@ - - - - - - + + + + + + @@ -110,6 +110,12 @@ PreserveNewest + + PreserveNewest + + + PreserveNewest + PreserveNewest @@ -122,9 +128,24 @@ PreserveNewest + + PreserveNewest + + + PreserveNewest + + + PreserveNewest + PreserveNewest + + PreserveNewest + + + PreserveNewest + PreserveNewest @@ -176,6 +197,9 @@ PreserveNewest + + PreserveNewest + PreserveNewest diff --git a/RecursiveExtractor.Tests/TestData/TestDataArchives/HfsSampleUDCO.dmg b/RecursiveExtractor.Tests/TestData/TestDataArchives/HfsSampleUDCO.dmg new file mode 100644 index 00000000..c80f517f Binary files /dev/null and b/RecursiveExtractor.Tests/TestData/TestDataArchives/HfsSampleUDCO.dmg differ diff --git a/RecursiveExtractor.Tests/TestData/TestDataArchives/TestDataCorruptWim.zip b/RecursiveExtractor.Tests/TestData/TestDataArchives/TestDataCorruptWim.zip new file mode 100644 index 00000000..89235f2a Binary files /dev/null and b/RecursiveExtractor.Tests/TestData/TestDataArchives/TestDataCorruptWim.zip differ diff --git a/RecursiveExtractor.sln b/RecursiveExtractor.sln index 222c4cd9..8cd99673 100644 --- a/RecursiveExtractor.sln +++ b/RecursiveExtractor.sln @@ -1,7 +1,7 @@  Microsoft Visual Studio Solution File, Format Version 12.00 -# Visual Studio Version 16 -VisualStudioVersion = 16.0.30309.148 +# Visual Studio Version 17 +VisualStudioVersion = 17.8.34408.163 MinimumVisualStudioVersion = 10.0.40219.1 Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "RecursiveExtractor", "RecursiveExtractor\RecursiveExtractor.csproj", "{A7F7492B-60E0-468C-B267-BA60EC131E86}" EndProject @@ -15,9 +15,9 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solution Items", "Solution EndProject Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "RecursiveExtractor.Blazor", "RecursiveExtractor.Blazor\RecursiveExtractor.Blazor.csproj", "{18D0803C-052E-4338-9162-F2DB8F8E51E2}" EndProject -Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "RecursiveExtractor.Cli", "RecursiveExtractor.Cli\RecursiveExtractor.Cli.csproj", "{443B4E50-9AAF-436E-B3DF-644F782AF9B6}" +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "RecursiveExtractor.Cli", "RecursiveExtractor.Cli\RecursiveExtractor.Cli.csproj", "{443B4E50-9AAF-436E-B3DF-644F782AF9B6}" EndProject -Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "RecursiveExtractor.Cli.Tests", "RecursiveExtractor.Cli.Tests\RecursiveExtractor.Cli.Tests.csproj", "{F37B314B-F641-4336-BCD6-BC5B85BEC5DB}" +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "RecursiveExtractor.Cli.Tests", "RecursiveExtractor.Cli.Tests\RecursiveExtractor.Cli.Tests.csproj", "{F37B314B-F641-4336-BCD6-BC5B85BEC5DB}" EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution diff --git a/RecursiveExtractor/Extractor.cs b/RecursiveExtractor/Extractor.cs index 5373ff8c..290b554d 100644 --- a/RecursiveExtractor/Extractor.cs +++ b/RecursiveExtractor/Extractor.cs @@ -61,6 +61,7 @@ public void SetDefaultExtractors() SetExtractor(ArchiveFileType.VMDK, new VmdkExtractor(this)); SetExtractor(ArchiveFileType.XZ, new XzExtractor(this)); SetExtractor(ArchiveFileType.ZIP, new ZipExtractor(this)); + SetExtractor(ArchiveFileType.DMG, new DmgExtractor(this)); if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows)) { SetExtractor(ArchiveFileType.WIM, new WimExtractor(this)); diff --git a/RecursiveExtractor/Extractors/DiscCommon.cs b/RecursiveExtractor/Extractors/DiscCommon.cs index 6f1fe4a8..d15c8fcf 100644 --- a/RecursiveExtractor/Extractors/DiscCommon.cs +++ b/RecursiveExtractor/Extractors/DiscCommon.cs @@ -1,10 +1,9 @@ using DiscUtils; using System; -using System.Collections.Concurrent; using System.Collections.Generic; +using System.Collections.ObjectModel; using System.IO; using System.Linq; -using System.Threading.Tasks; namespace Microsoft.CST.RecursiveExtractor.Extractors { @@ -28,7 +27,7 @@ public static class DiscCommon /// public static async IAsyncEnumerable DumpLogicalVolumeAsync(LogicalVolumeInfo volume, string parentPath, ExtractorOptions options, ResourceGovernor governor, Extractor Context, FileEntry? parent = null, bool topLevel = true) { - DiscUtils.FileSystemInfo[]? fsInfos = null; + ReadOnlyCollection? fsInfos = null; try { fsInfos = FileSystemManager.DetectFileSystems(volume); @@ -38,7 +37,7 @@ public static async IAsyncEnumerable DumpLogicalVolumeAsync(LogicalVo Logger.Debug("Failed to get file systems from logical volume {0} Image {1} ({2}:{3})", volume.Identity, parentPath, e.GetType(), e.Message); } - foreach (var fsInfo in fsInfos ?? Array.Empty()) + foreach (var fsInfo in fsInfos ?? Enumerable.Empty()) { using var fs = fsInfo.Open(volume); var diskFiles = fs.GetFiles(fs.Root.FullName, "*.*", SearchOption.AllDirectories).ToList(); @@ -90,7 +89,7 @@ public static async IAsyncEnumerable DumpLogicalVolumeAsync(LogicalVo /// An enumerable of the contained File Entries. public static IEnumerable DumpLogicalVolume(LogicalVolumeInfo volume, string parentPath, ExtractorOptions options, ResourceGovernor governor, Extractor Context, FileEntry? parent = null, bool topLevel = true) { - DiscUtils.FileSystemInfo[]? fsInfos = null; + ReadOnlyCollection? fsInfos = null; try { fsInfos = FileSystemManager.DetectFileSystems(volume); @@ -100,7 +99,7 @@ public static IEnumerable DumpLogicalVolume(LogicalVolumeInfo volume, Logger.Debug("Failed to get file systems from logical volume {0} Image {1} ({2}:{3})", volume.Identity, parentPath, e.GetType(), e.Message); } - foreach (var fsInfo in fsInfos ?? Array.Empty()) + foreach (var fsInfo in fsInfos ?? Enumerable.Empty()) { using var fs = fsInfo.Open(volume); var diskFiles = fs.GetFiles(fs.Root.FullName, "*.*", SearchOption.AllDirectories).ToList(); diff --git a/RecursiveExtractor/Extractors/DmgExtractor.cs b/RecursiveExtractor/Extractors/DmgExtractor.cs new file mode 100644 index 00000000..5710f465 --- /dev/null +++ b/RecursiveExtractor/Extractors/DmgExtractor.cs @@ -0,0 +1,107 @@ +using DiscUtils; +using DiscUtils.Dmg; +using DiscUtils.Streams; +using System; +using System.Collections.Generic; + +namespace Microsoft.CST.RecursiveExtractor.Extractors +{ + /// + /// The DMG image extractor implementation. + /// + public class DmgExtractor : AsyncExtractorInterface + { + /// + /// The constructor takes the Extractor context for recursion. + /// + /// The Extractor context. + public DmgExtractor(Extractor context) + { + Context = context; + } + private readonly NLog.Logger Logger = NLog.LogManager.GetCurrentClassLogger(); + + internal Extractor Context { get; } + + /// + /// Extracts a DMG file + /// + /// + public async IAsyncEnumerable ExtractAsync(FileEntry fileEntry, ExtractorOptions options, ResourceGovernor governor, bool topLevel = true) + { + LogicalVolumeInfo[]? logicalVolumes = null; + Disk? disk = null; + + try + { + disk = new Disk(fileEntry.Content, Ownership.None); + var manager = new VolumeManager(disk); + logicalVolumes = manager.GetLogicalVolumes(); + } + catch (Exception e) + { + Logger.Debug("Error reading {0} disk at {1} ({2}:{3})", fileEntry.ArchiveType, fileEntry.FullPath, e.GetType(), e.Message); + } + if (logicalVolumes != null) + { + foreach (var volume in logicalVolumes) + { + await foreach (var entry in DiscCommon.DumpLogicalVolumeAsync(volume, fileEntry.FullPath, options, governor, Context, fileEntry, topLevel)) + { + yield return entry; + } + } + } + else + { + if (options.ExtractSelfOnFail) + { + fileEntry.EntryStatus = FileEntryStatus.FailedArchive; + yield return fileEntry; + } + } + disk?.Dispose(); + } + + /// + /// Extracts a DMG file + /// + /// + public IEnumerable Extract(FileEntry fileEntry, ExtractorOptions options, ResourceGovernor governor, bool topLevel = true) + { + LogicalVolumeInfo[]? logicalVolumes = null; + Disk? disk = null; + + try + { + disk = new Disk(fileEntry.Content, Ownership.None); + var manager = new VolumeManager(disk); + logicalVolumes = manager.GetLogicalVolumes(); + } + catch (Exception e) + { + Logger.Debug("Error reading {0} disk at {1} ({2}:{3})", fileEntry.ArchiveType, fileEntry.FullPath, e.GetType(), e.Message); + } + + if (logicalVolumes != null) + { + foreach (var volume in logicalVolumes) + { + foreach (var entry in DiscCommon.DumpLogicalVolume(volume, fileEntry.FullPath, options, governor, Context, fileEntry, topLevel)) + { + yield return entry; + } + } + } + else + { + if (options.ExtractSelfOnFail) + { + fileEntry.EntryStatus = FileEntryStatus.FailedArchive; + yield return fileEntry; + } + } + disk?.Dispose(); + } + } +} \ No newline at end of file diff --git a/RecursiveExtractor/Extractors/WimExtractor.cs b/RecursiveExtractor/Extractors/WimExtractor.cs index d4c2d477..fe7fd3df 100644 --- a/RecursiveExtractor/Extractors/WimExtractor.cs +++ b/RecursiveExtractor/Extractors/WimExtractor.cs @@ -1,9 +1,6 @@ -using DiscUtils; -using System; -using System.Collections.Concurrent; +using System; using System.Collections.Generic; using System.IO; -using System.Linq; namespace Microsoft.CST.RecursiveExtractor.Extractors { @@ -101,14 +98,19 @@ public IEnumerable Extract(FileEntry fileEntry, ExtractorOptions opti } catch (Exception e) { - Logger.Debug(e, "Failed to init WIM image."); + Logger.Debug(e, "Failed to init WIM image from {0}.", fileEntry.FullPath); } if (baseFile != null) { for (var i = 0; i < baseFile.ImageCount; i++) { - var image = baseFile.GetImage(i); - foreach (var file in image.GetFiles(image.Root.FullName, "*.*", SearchOption.AllDirectories)) + if (!TryGetImage(baseFile, i, out var image)) + { + Logger.Debug("Error reading image {0} from WIM {1}. Potentially malformed?", i, fileEntry.FullPath); + continue; + } + + foreach (var file in image!.GetFiles(image.Root.FullName, "*.*", SearchOption.AllDirectories)) { Stream? stream = null; try @@ -119,7 +121,7 @@ public IEnumerable Extract(FileEntry fileEntry, ExtractorOptions opti } catch (Exception e) { - Logger.Debug("Error reading {0} from WIM {1} ({2}:{3})", file, image.FriendlyName, e.GetType(), e.Message); + Logger.Debug("Error reading {0} from WIM image {1} in {2} ({3}:{4})", file, i, fileEntry.FullPath, e.GetType(), e.Message); } if (stream != null) { @@ -144,12 +146,29 @@ public IEnumerable Extract(FileEntry fileEntry, ExtractorOptions opti } else { + fileEntry.EntryStatus = FileEntryStatus.FailedArchive; if (options.ExtractSelfOnFail) { - fileEntry.EntryStatus = FileEntryStatus.FailedArchive; yield return fileEntry; } } } + + private bool TryGetImage(DiscUtils.Wim.WimFile wimFile, int index, out DiscUtils.Wim.WimFileSystem? image) + { + image = null; + + try + { + image = wimFile.GetImage(index); + } + catch (Exception e) + { + // Image may be corrupt or invalid + Logger.Debug(e, "Failed to retrieve WIM image with index {index}.", index); + } + + return image is not null; + } } } \ No newline at end of file diff --git a/RecursiveExtractor/MiniMagic.cs b/RecursiveExtractor/MiniMagic.cs index 0c1544b3..932ac5db 100644 --- a/RecursiveExtractor/MiniMagic.cs +++ b/RecursiveExtractor/MiniMagic.cs @@ -81,6 +81,10 @@ public enum ArchiveFileType /// VMDK, /// + /// A DMG disc image. + /// + DMG, + /// /// Unused. /// INVALID @@ -117,6 +121,22 @@ public static ArchiveFileType DetectFileType(Stream fileStream) } var initialPosition = fileStream.Position; var buffer = new byte[9]; + // DMG format uses the magic value 'koly' at the start of the 512 byte footer at the end of the file + // Due to compression used, needs to be first or can be misidentified as other formats + // https://newosxbook.com/DMG.html + if (fileStream.Length > 512) + { + var dmgFooterMagic = new byte[] { 0x6b, 0x6f, 0x6c, 0x79 }; + fileStream.Position = fileStream.Length - 0x200; // Footer position + fileStream.Read(buffer, 0, 4); + fileStream.Position = initialPosition; + + if (dmgFooterMagic.SequenceEqual(buffer[0..4])) + { + return ArchiveFileType.DMG; + } + } + if (fileStream.Length >= 9) { fileStream.Position = 0; diff --git a/RecursiveExtractor/RecursiveExtractor.csproj b/RecursiveExtractor/RecursiveExtractor.csproj index a1015211..dc710e4d 100644 --- a/RecursiveExtractor/RecursiveExtractor.csproj +++ b/RecursiveExtractor/RecursiveExtractor.csproj @@ -25,24 +25,25 @@ - - - - - - - - - - - - - + + + + + + + + + + + + + + - +