Skip to content

Commit 2f669e6

Browse files
committed
Refactor PdfWriter to improve binary data handling by replacing placeholders with binary data during stream writing, enhancing object offset tracking, and restructuring catalog and pages writing methods for better clarity and performance.
1 parent d22f422 commit 2f669e6

File tree

1 file changed

+171
-75
lines changed

1 file changed

+171
-75
lines changed

Cspdf/Internal/PdfWriter.cs

Lines changed: 171 additions & 75 deletions
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,12 @@ internal class PdfWriter
1010
{
1111
private readonly PdfDocument _document;
1212
private readonly StringBuilder _content = new();
13-
private readonly List<(int position, byte[] data)> _binaryData = new();
13+
private readonly List<(string placeholder, byte[] data)> _binaryData = new();
1414
private int _objectNumber = 1;
1515
private readonly Dictionary<string, int> _objectMap = new();
16+
private readonly Dictionary<int, long> _objectOffsets = new();
17+
private readonly List<int> _imageObjectNumbers = new();
18+
private int _pagesObjectNumber = 0;
1619

1720
public PdfWriter(PdfDocument document)
1821
{
@@ -24,12 +27,18 @@ public void Write(Stream stream)
2427
_content.Clear();
2528
_objectMap.Clear();
2629
_binaryData.Clear();
30+
_objectOffsets.Clear();
31+
_imageObjectNumbers.Clear();
2732
_objectNumber = 1;
2833

2934
// PDF Header
3035
WriteLine("%PDF-1.7");
3136
WriteLine("%\xE2\xE3\xCF\xD3");
3237

38+
// Reserve object numbers for catalog and pages tree (we'll write them later)
39+
var catalogObjNum = GetNextObjectNumber();
40+
_pagesObjectNumber = GetNextObjectNumber();
41+
3342
// Write pages
3443
var pageRefs = new List<int>();
3544
foreach (var page in _document.Pages)
@@ -38,61 +47,106 @@ public void Write(Stream stream)
3847
pageRefs.Add(pageObjNum);
3948
}
4049

41-
// Write catalog
42-
var catalogObjNum = WriteCatalog(pageRefs);
50+
// Write catalog and pages tree with reserved numbers
51+
WriteCatalogWithNumber(catalogObjNum, pageRefs);
4352

4453
// Write document info
4554
var infoObjNum = WriteDocumentInfo();
4655

47-
// Write xref table
48-
var xrefOffset = WriteXRefTable();
49-
50-
// Write trailer
51-
WriteTrailer(catalogObjNum, infoObjNum, xrefOffset);
52-
5356
// Write to stream - replace placeholders with binary data
5457
var textContent = _content.ToString();
55-
var textBytes = Encoding.UTF8.GetBytes(textContent);
5658

57-
// Sort binary data by position
58-
var sortedBinaryData = _binaryData.OrderBy(b => b.position).ToList();
59-
60-
if (sortedBinaryData.Count == 0)
59+
if (_binaryData.Count == 0)
6160
{
6261
// No binary data, just write text
62+
var textBytes = Encoding.UTF8.GetBytes(textContent);
6363
stream.Write(textBytes, 0, textBytes.Length);
6464
return;
6565
}
6666

67-
// Write text content and binary data at correct positions
68-
int currentPos = 0;
67+
// Build final content by replacing placeholders with binary data
68+
using var ms = new MemoryStream();
69+
var writer = new StreamWriter(ms, Encoding.UTF8, leaveOpen: true);
70+
71+
int lastIndex = 0;
72+
long currentPosition = 0;
73+
74+
// Track object positions and replace placeholders
75+
var sortedData = _binaryData.OrderBy(b => textContent.IndexOf(b.placeholder)).ToList();
6976

70-
foreach (var (position, data) in sortedBinaryData)
77+
foreach (var (placeholder, data) in sortedData)
7178
{
72-
// Find placeholder in text
73-
var placeholder = $"<BINARY_DATA_{_binaryData.IndexOf((position, data))}>";
74-
var placeholderBytes = Encoding.UTF8.GetBytes(placeholder);
75-
var placeholderIndex = FindBytes(textBytes, placeholderBytes, currentPos);
76-
79+
var placeholderIndex = textContent.IndexOf(placeholder, lastIndex);
7780
if (placeholderIndex >= 0)
7881
{
79-
// Write text up to placeholder
80-
var textToWrite = new byte[placeholderIndex - currentPos];
81-
Array.Copy(textBytes, currentPos, textToWrite, 0, textToWrite.Length);
82-
stream.Write(textToWrite, 0, textToWrite.Length);
82+
// Write text before placeholder
83+
if (placeholderIndex > lastIndex)
84+
{
85+
var textPart = textContent.Substring(lastIndex, placeholderIndex - lastIndex);
86+
87+
// Track object offsets in this text part
88+
TrackObjectOffsets(textPart, currentPosition);
89+
90+
writer.Write(textPart);
91+
writer.Flush();
92+
currentPosition = ms.Position;
93+
}
8394

84-
// Write binary data
85-
stream.Write(data, 0, data.Length);
86-
currentPos = placeholderIndex + placeholderBytes.Length;
95+
// Write binary data directly to stream
96+
ms.Write(data, 0, data.Length);
97+
currentPosition = ms.Position;
98+
lastIndex = placeholderIndex + placeholder.Length;
8799
}
88100
}
89101

90-
// Write remaining text
91-
if (currentPos < textBytes.Length)
102+
// Write remaining text and track offsets
103+
if (lastIndex < textContent.Length)
92104
{
93-
var remainingText = new byte[textBytes.Length - currentPos];
94-
Array.Copy(textBytes, currentPos, remainingText, 0, remainingText.Length);
95-
stream.Write(remainingText, 0, remainingText.Length);
105+
var remainingText = textContent.Substring(lastIndex);
106+
TrackObjectOffsets(remainingText, currentPosition);
107+
writer.Write(remainingText);
108+
writer.Flush();
109+
}
110+
111+
// Write xref table
112+
var xrefOffset = WriteXRefTable(ms);
113+
114+
// Write trailer
115+
WriteTrailer(ms, catalogObjNum, infoObjNum, xrefOffset);
116+
117+
// Copy to output stream
118+
ms.Position = 0;
119+
ms.CopyTo(stream);
120+
}
121+
122+
private void TrackObjectOffsets(string text, long startPosition)
123+
{
124+
int searchIndex = 0;
125+
while (searchIndex < text.Length)
126+
{
127+
var objIndex = text.IndexOf(" 0 obj", searchIndex);
128+
if (objIndex < 0) break;
129+
130+
// Find the object number before " 0 obj"
131+
int numberStart = objIndex - 1;
132+
while (numberStart >= 0 && char.IsDigit(text[numberStart]))
133+
{
134+
numberStart--;
135+
}
136+
numberStart++;
137+
138+
if (numberStart < objIndex)
139+
{
140+
var objNumStr = text.Substring(numberStart, objIndex - numberStart);
141+
if (int.TryParse(objNumStr, out var objNum))
142+
{
143+
// Calculate position: count bytes before this point
144+
var bytesBeforeObj = Encoding.UTF8.GetByteCount(text.Substring(0, numberStart));
145+
_objectOffsets[objNum] = startPosition + bytesBeforeObj;
146+
}
147+
}
148+
149+
searchIndex = objIndex + 6;
96150
}
97151
}
98152

@@ -119,9 +173,27 @@ private int WritePage(IPage page)
119173
var objNum = GetNextObjectNumber();
120174
var contentObjNum = GetNextObjectNumber();
121175

122-
// Write page content
176+
// Write page content stream
123177
var content = WritePageContent(page);
124-
WriteObject(contentObjNum, content);
178+
var contentBytes = Encoding.UTF8.GetBytes(content);
179+
180+
// Write content stream object
181+
WriteLine($"{contentObjNum} 0 obj");
182+
WriteLine($@"<<
183+
/Length {contentBytes.Length}
184+
>>");
185+
WriteLine("stream");
186+
_content.Append(content);
187+
WriteLine("endstream");
188+
WriteLine("endobj");
189+
WriteLine("");
190+
191+
// Build XObject resources for images
192+
var xobjects = new StringBuilder();
193+
foreach (var imgNum in _imageObjectNumbers)
194+
{
195+
xobjects.Append($" /Im{imgNum} {imgNum} 0 R\n");
196+
}
125197

126198
// Write page object
127199
var pageContent = $@"<<
@@ -131,7 +203,7 @@ private int WritePage(IPage page)
131203
/Contents {contentObjNum} 0 R
132204
/Resources <<
133205
/XObject <<
134-
>>
206+
{xobjects} >>
135207
/Font <<
136208
>>
137209
>>
@@ -169,7 +241,11 @@ private string WritePageContent(IPage page)
169241
private int WriteImage(byte[] imageData, int width, int height)
170242
{
171243
var objNum = GetNextObjectNumber();
172-
var imageContent = $@"<<
244+
_imageObjectNumbers.Add(objNum);
245+
246+
// Write image XObject with stream
247+
WriteLine($"{objNum} 0 obj");
248+
WriteLine($@"<<
173249
/Type /XObject
174250
/Subtype /Image
175251
/Width {width}
@@ -178,50 +254,46 @@ private int WriteImage(byte[] imageData, int width, int height)
178254
/BitsPerComponent 8
179255
/Filter /DCTDecode
180256
/Length {imageData.Length}
181-
>>";
182-
WriteObject(objNum, imageContent);
183-
var streamStartPos = _content.Length;
257+
>>");
184258
WriteLine("stream");
185-
// Store binary data position and data
186-
var streamMarker = "stream\r\n";
187-
var streamMarkerBytes = Encoding.UTF8.GetBytes(streamMarker);
188-
var position = streamStartPos + streamMarkerBytes.Length;
189-
_binaryData.Add((position, imageData));
190-
// Write placeholder for binary data (will be replaced)
191-
WriteLine($"<BINARY_DATA_{_binaryData.Count - 1}>");
259+
var placeholder = $"<BINARY_DATA_{objNum}>";
260+
_binaryData.Add((placeholder, imageData));
261+
_content.Append(placeholder);
262+
WriteLine("");
192263
WriteLine("endstream");
264+
WriteLine("endobj");
265+
WriteLine("");
193266
return objNum;
194267
}
195268

196-
private int WriteCatalog(List<int> pageRefs)
197-
{
198-
var objNum = GetNextObjectNumber();
199-
var pagesObjNum = WritePages(pageRefs);
200-
var catalogContent = $@"<<
201-
/Type /Catalog
202-
/Pages {pagesObjNum} 0 R
203-
>>";
204-
WriteObject(objNum, catalogContent);
205-
return objNum;
206-
}
207-
208-
private int WritePages(List<int> pageRefs)
269+
private void WriteCatalogWithNumber(int catalogObjNum, List<int> pageRefs)
209270
{
210-
var objNum = GetNextObjectNumber();
271+
// Write Pages object with reserved number
211272
var kids = string.Join(" ", pageRefs.Select(p => $"{p} 0 R"));
212273
var pagesContent = $@"<<
213274
/Type /Pages
214275
/Kids [{kids}]
215276
/Count {pageRefs.Count}
216277
>>";
217-
WriteObject(objNum, pagesContent);
218-
return objNum;
278+
WriteLine($"{_pagesObjectNumber} 0 obj");
279+
WriteLine(pagesContent);
280+
WriteLine("endobj");
281+
WriteLine("");
282+
283+
// Write Catalog object with reserved number
284+
var catalogContent = $@"<<
285+
/Type /Catalog
286+
/Pages {_pagesObjectNumber} 0 R
287+
>>";
288+
WriteLine($"{catalogObjNum} 0 obj");
289+
WriteLine(catalogContent);
290+
WriteLine("endobj");
291+
WriteLine("");
219292
}
220293

221294
private int GetPageTreeRef()
222295
{
223-
// This would reference the Pages object
224-
return 2; // Simplified
296+
return _pagesObjectNumber;
225297
}
226298

227299
private int WriteDocumentInfo()
@@ -242,30 +314,53 @@ private int WriteDocumentInfo()
242314
return objNum;
243315
}
244316

245-
private int WriteXRefTable()
317+
private long WriteXRefTable(MemoryStream ms)
246318
{
247-
// Simplified xref table
248-
return _content.Length;
319+
var xrefOffset = ms.Position;
320+
var writer = new StreamWriter(ms, Encoding.UTF8, leaveOpen: true);
321+
322+
writer.WriteLine("xref");
323+
writer.WriteLine($"0 {_objectNumber}");
324+
writer.WriteLine("0000000000 65535 f ");
325+
326+
for (int i = 1; i < _objectNumber; i++)
327+
{
328+
if (_objectOffsets.TryGetValue(i, out var offset))
329+
{
330+
writer.WriteLine($"{offset:D10} 00000 n ");
331+
}
332+
else
333+
{
334+
writer.WriteLine("0000000000 00000 n ");
335+
}
336+
}
337+
338+
writer.Flush();
339+
return xrefOffset;
249340
}
250341

251-
private void WriteTrailer(int catalogObjNum, int infoObjNum, int xrefOffset)
342+
private void WriteTrailer(MemoryStream ms, int catalogObjNum, int infoObjNum, long xrefOffset)
252343
{
253-
WriteLine("trailer");
254-
WriteLine($@"<<
344+
var writer = new StreamWriter(ms, Encoding.UTF8, leaveOpen: true);
345+
346+
writer.WriteLine("trailer");
347+
writer.WriteLine($@"<<
255348
/Size {_objectNumber}
256349
/Root {catalogObjNum} 0 R
257350
/Info {infoObjNum} 0 R
258351
>>");
259-
WriteLine("startxref");
260-
WriteLine(xrefOffset.ToString());
261-
WriteLine("%%EOF");
352+
writer.WriteLine("startxref");
353+
writer.WriteLine(xrefOffset.ToString());
354+
writer.WriteLine("%%EOF");
355+
writer.Flush();
262356
}
263357

264358
private void WriteObject(int objNum, string content)
265359
{
266360
WriteLine($"{objNum} 0 obj");
267361
WriteLine(content);
268362
WriteLine("endobj");
363+
WriteLine("");
269364
}
270365

271366
private int GetNextObjectNumber()
@@ -288,3 +383,4 @@ private string EscapeString(string str)
288383
}
289384
}
290385

386+

0 commit comments

Comments
 (0)