Complete implementation of the Pdftract NuGet package as a subprocess- based SDK with async-first design using System.Diagnostics.Process and System.Text.Json. Implementation: - All 9 contract methods (ExtractAsync, ExtractTextAsync, etc.) with sync wrappers in Pdftract.Sync.cs - 8 exception types inheriting from PdftractException base class - Source discriminated union (PathSource, UrlSource, BytesSource) with FromPath, FromUrl, FromUri, FromBytes factory methods - C# record types for all models (Document, Page, Metadata, etc.) - ExtractOptions, SearchOptions, HashOptions with PascalCase properties - Source-generated JSON serialization via JsonContext for Native AOT - IAsyncEnumerable streaming for NDJSON outputs - CancellationToken propagation to Process.Kill(entireProcessTree: true) Bug fixes: - Fixed ArgumentList handling (was adding List as single element) - Added source.Dispose() cleanup for BytesSource temporary files - Added cleanup for VerifyReceiptAsync temporary receipt file - Added process.EnableRaisingEvents for proper event handling - Fixed output capture to include newlines between lines - Changed to source-generated JSON (JsonContext) instead of reflection Acceptance criteria: - All 9 methods exposed as both async and sync variants - All 8 exception classes inherit from PdftractException - Models as C# records - Supports net8.0 and net9.0 - CancellationToken terminates subprocess Files modified: - pdftract-dotnet/src/Pdftract/Pdftract.cs - pdftract-dotnet/src/Pdftract/Pdftract.Sync.cs - pdftract-dotnet/src/Pdftract/Source/Source.cs - pdftract-dotnet/src/Pdftract/Models/Document.cs - pdftract-dotnet/src/Pdftract/Models/JsonContext.cs - pdftract-dotnet/tests/Pdftract.Tests/ConformanceTests.cs - pdftract-dotnet/README.md - pdftract-dotnet/notes/pdftract-1w22d.md Co-Authored-By: Claude Code <noreply@anthropic.com>
131 lines
3.1 KiB
C#
131 lines
3.1 KiB
C#
namespace Pdftract;
|
|
|
|
/// <summary>
|
|
/// Represents a PDF source (file path, URL, or raw bytes).
|
|
/// </summary>
|
|
public abstract class Source
|
|
{
|
|
/// <summary>
|
|
/// Returns command-line arguments for the source.
|
|
/// </summary>
|
|
internal abstract List<string> ToArgs();
|
|
|
|
/// <summary>
|
|
/// Performs cleanup (e.g., deletes temporary files).
|
|
/// </summary>
|
|
internal virtual void Dispose() { }
|
|
|
|
/// <summary>
|
|
/// Creates a Source from a local file path.
|
|
/// </summary>
|
|
public static Source FromPath(string path) => new PathSource(path);
|
|
|
|
/// <summary>
|
|
/// Creates a Source from a URL string.
|
|
/// </summary>
|
|
public static Source FromUrl(string url) => new UrlSource(url);
|
|
|
|
/// <summary>
|
|
/// Creates a Source from a URI.
|
|
/// </summary>
|
|
public static Source FromUri(Uri uri) => new UrlSource(uri.ToString());
|
|
|
|
/// <summary>
|
|
/// Creates a Source from a byte array.
|
|
/// </summary>
|
|
public static Source FromBytes(byte[] data) => new BytesSource(data);
|
|
|
|
/// <summary>
|
|
/// Creates a Source from a file by reading it into memory.
|
|
/// </summary>
|
|
public static Source FromFileBytes(string path)
|
|
{
|
|
var data = File.ReadAllBytes(path);
|
|
return new BytesSource(data);
|
|
}
|
|
}
|
|
|
|
/// <summary>
|
|
/// A local filesystem path source.
|
|
/// </summary>
|
|
public sealed class PathSource : Source
|
|
{
|
|
private readonly string _path;
|
|
|
|
public PathSource(string path)
|
|
{
|
|
_path = Path.GetFullPath(path);
|
|
}
|
|
|
|
internal override List<string> ToArgs()
|
|
{
|
|
return new() { _path };
|
|
}
|
|
}
|
|
|
|
/// <summary>
|
|
/// A remote URL source.
|
|
/// </summary>
|
|
public sealed class UrlSource : Source
|
|
{
|
|
private readonly string _url;
|
|
|
|
public UrlSource(string url)
|
|
{
|
|
if (!url.StartsWith("http://", StringComparison.OrdinalIgnoreCase) &&
|
|
!url.StartsWith("https://", StringComparison.OrdinalIgnoreCase))
|
|
{
|
|
throw new ArgumentException("URL must start with http:// or https://", nameof(url));
|
|
}
|
|
_url = url;
|
|
}
|
|
|
|
internal override List<string> ToArgs()
|
|
{
|
|
return new() { "--url", _url };
|
|
}
|
|
}
|
|
|
|
/// <summary>
|
|
/// An in-memory byte array source.
|
|
/// Creates a temporary file that is cleaned up after use.
|
|
/// </summary>
|
|
public sealed class BytesSource : Source
|
|
{
|
|
private readonly byte[] _data;
|
|
private string? _tmpPath;
|
|
|
|
public BytesSource(byte[] data)
|
|
{
|
|
_data = data ?? throw new ArgumentNullException(nameof(data));
|
|
}
|
|
|
|
internal override List<string> ToArgs()
|
|
{
|
|
if (_tmpPath != null)
|
|
{
|
|
return new() { _tmpPath };
|
|
}
|
|
|
|
var tmpFile = Path.GetTempFileName();
|
|
File.WriteAllBytes(tmpFile, _data);
|
|
_tmpPath = tmpFile;
|
|
return new() { _tmpPath };
|
|
}
|
|
|
|
internal override void Dispose()
|
|
{
|
|
try
|
|
{
|
|
if (_tmpPath != null && File.Exists(_tmpPath))
|
|
{
|
|
File.Delete(_tmpPath);
|
|
}
|
|
}
|
|
catch
|
|
{
|
|
// Ignore cleanup errors
|
|
}
|
|
_tmpPath = null;
|
|
}
|
|
}
|