@@ -10,9 +10,12 @@ internal class PdfWriter
1010{
1111 private readonly PdfDocument _document ;
1212 private readonly StringBuilder _content = new ( ) ;
13- private readonly List < ( int position , byte [ ] data ) > _binaryData = new ( ) ;
13+ private readonly List < ( string placeholder , byte [ ] data ) > _binaryData = new ( ) ;
1414 private int _objectNumber = 1 ;
1515 private readonly Dictionary < string , int > _objectMap = new ( ) ;
16+ private readonly Dictionary < int , long > _objectOffsets = new ( ) ;
17+ private readonly List < int > _imageObjectNumbers = new ( ) ;
18+ private int _pagesObjectNumber = 0 ;
1619
1720 public PdfWriter ( PdfDocument document )
1821 {
@@ -24,12 +27,18 @@ public void Write(Stream stream)
2427 _content . Clear ( ) ;
2528 _objectMap . Clear ( ) ;
2629 _binaryData . Clear ( ) ;
30+ _objectOffsets . Clear ( ) ;
31+ _imageObjectNumbers . Clear ( ) ;
2732 _objectNumber = 1 ;
2833
2934 // PDF Header
3035 WriteLine ( "%PDF-1.7" ) ;
3136 WriteLine ( "%\xE2 \xE3 \xCF \xD3 " ) ;
3237
38+ // Reserve object numbers for catalog and pages tree (we'll write them later)
39+ var catalogObjNum = GetNextObjectNumber ( ) ;
40+ _pagesObjectNumber = GetNextObjectNumber ( ) ;
41+
3342 // Write pages
3443 var pageRefs = new List < int > ( ) ;
3544 foreach ( var page in _document . Pages )
@@ -38,61 +47,106 @@ public void Write(Stream stream)
3847 pageRefs . Add ( pageObjNum ) ;
3948 }
4049
41- // Write catalog
42- var catalogObjNum = WriteCatalog ( pageRefs ) ;
50+ // Write catalog and pages tree with reserved numbers
51+ WriteCatalogWithNumber ( catalogObjNum , pageRefs ) ;
4352
4453 // Write document info
4554 var infoObjNum = WriteDocumentInfo ( ) ;
4655
47- // Write xref table
48- var xrefOffset = WriteXRefTable ( ) ;
49-
50- // Write trailer
51- WriteTrailer ( catalogObjNum , infoObjNum , xrefOffset ) ;
52-
5356 // Write to stream - replace placeholders with binary data
5457 var textContent = _content . ToString ( ) ;
55- var textBytes = Encoding . UTF8 . GetBytes ( textContent ) ;
5658
57- // Sort binary data by position
58- var sortedBinaryData = _binaryData . OrderBy ( b => b . position ) . ToList ( ) ;
59-
60- if ( sortedBinaryData . Count == 0 )
59+ if ( _binaryData . Count == 0 )
6160 {
6261 // No binary data, just write text
62+ var textBytes = Encoding . UTF8 . GetBytes ( textContent ) ;
6363 stream . Write ( textBytes , 0 , textBytes . Length ) ;
6464 return ;
6565 }
6666
67- // Write text content and binary data at correct positions
68- int currentPos = 0 ;
67+ // Build final content by replacing placeholders with binary data
68+ using var ms = new MemoryStream ( ) ;
69+ var writer = new StreamWriter ( ms , Encoding . UTF8 , leaveOpen : true ) ;
70+
71+ int lastIndex = 0 ;
72+ long currentPosition = 0 ;
73+
74+ // Track object positions and replace placeholders
75+ var sortedData = _binaryData . OrderBy ( b => textContent . IndexOf ( b . placeholder ) ) . ToList ( ) ;
6976
70- foreach ( var ( position , data ) in sortedBinaryData )
77+ foreach ( var ( placeholder , data ) in sortedData )
7178 {
72- // Find placeholder in text
73- var placeholder = $ "<BINARY_DATA_{ _binaryData . IndexOf ( ( position , data ) ) } >";
74- var placeholderBytes = Encoding . UTF8 . GetBytes ( placeholder ) ;
75- var placeholderIndex = FindBytes ( textBytes , placeholderBytes , currentPos ) ;
76-
79+ var placeholderIndex = textContent . IndexOf ( placeholder , lastIndex ) ;
7780 if ( placeholderIndex >= 0 )
7881 {
79- // Write text up to placeholder
80- var textToWrite = new byte [ placeholderIndex - currentPos ] ;
81- Array . Copy ( textBytes , currentPos , textToWrite , 0 , textToWrite . Length ) ;
82- stream . Write ( textToWrite , 0 , textToWrite . Length ) ;
82+ // Write text before placeholder
83+ if ( placeholderIndex > lastIndex )
84+ {
85+ var textPart = textContent . Substring ( lastIndex , placeholderIndex - lastIndex ) ;
86+
87+ // Track object offsets in this text part
88+ TrackObjectOffsets ( textPart , currentPosition ) ;
89+
90+ writer . Write ( textPart ) ;
91+ writer . Flush ( ) ;
92+ currentPosition = ms . Position ;
93+ }
8394
84- // Write binary data
85- stream . Write ( data , 0 , data . Length ) ;
86- currentPos = placeholderIndex + placeholderBytes . Length ;
95+ // Write binary data directly to stream
96+ ms . Write ( data , 0 , data . Length ) ;
97+ currentPosition = ms . Position ;
98+ lastIndex = placeholderIndex + placeholder . Length ;
8799 }
88100 }
89101
90- // Write remaining text
91- if ( currentPos < textBytes . Length )
102+ // Write remaining text and track offsets
103+ if ( lastIndex < textContent . Length )
92104 {
93- var remainingText = new byte [ textBytes . Length - currentPos ] ;
94- Array . Copy ( textBytes , currentPos , remainingText , 0 , remainingText . Length ) ;
95- stream . Write ( remainingText , 0 , remainingText . Length ) ;
105+ var remainingText = textContent . Substring ( lastIndex ) ;
106+ TrackObjectOffsets ( remainingText , currentPosition ) ;
107+ writer . Write ( remainingText ) ;
108+ writer . Flush ( ) ;
109+ }
110+
111+ // Write xref table
112+ var xrefOffset = WriteXRefTable ( ms ) ;
113+
114+ // Write trailer
115+ WriteTrailer ( ms , catalogObjNum , infoObjNum , xrefOffset ) ;
116+
117+ // Copy to output stream
118+ ms . Position = 0 ;
119+ ms . CopyTo ( stream ) ;
120+ }
121+
122+ private void TrackObjectOffsets ( string text , long startPosition )
123+ {
124+ int searchIndex = 0 ;
125+ while ( searchIndex < text . Length )
126+ {
127+ var objIndex = text . IndexOf ( " 0 obj" , searchIndex ) ;
128+ if ( objIndex < 0 ) break ;
129+
130+ // Find the object number before " 0 obj"
131+ int numberStart = objIndex - 1 ;
132+ while ( numberStart >= 0 && char . IsDigit ( text [ numberStart ] ) )
133+ {
134+ numberStart -- ;
135+ }
136+ numberStart ++ ;
137+
138+ if ( numberStart < objIndex )
139+ {
140+ var objNumStr = text . Substring ( numberStart , objIndex - numberStart ) ;
141+ if ( int . TryParse ( objNumStr , out var objNum ) )
142+ {
143+ // Calculate position: count bytes before this point
144+ var bytesBeforeObj = Encoding . UTF8 . GetByteCount ( text . Substring ( 0 , numberStart ) ) ;
145+ _objectOffsets [ objNum ] = startPosition + bytesBeforeObj ;
146+ }
147+ }
148+
149+ searchIndex = objIndex + 6 ;
96150 }
97151 }
98152
@@ -119,9 +173,27 @@ private int WritePage(IPage page)
119173 var objNum = GetNextObjectNumber ( ) ;
120174 var contentObjNum = GetNextObjectNumber ( ) ;
121175
122- // Write page content
176+ // Write page content stream
123177 var content = WritePageContent ( page ) ;
124- WriteObject ( contentObjNum , content ) ;
178+ var contentBytes = Encoding . UTF8 . GetBytes ( content ) ;
179+
180+ // Write content stream object
181+ WriteLine ( $ "{ contentObjNum } 0 obj") ;
182+ WriteLine ( $@ "<<
183+ /Length { contentBytes . Length }
184+ >>" ) ;
185+ WriteLine ( "stream" ) ;
186+ _content . Append ( content ) ;
187+ WriteLine ( "endstream" ) ;
188+ WriteLine ( "endobj" ) ;
189+ WriteLine ( "" ) ;
190+
191+ // Build XObject resources for images
192+ var xobjects = new StringBuilder ( ) ;
193+ foreach ( var imgNum in _imageObjectNumbers )
194+ {
195+ xobjects . Append ( $ " /Im{ imgNum } { imgNum } 0 R\n ") ;
196+ }
125197
126198 // Write page object
127199 var pageContent = $@ "<<
@@ -131,7 +203,7 @@ private int WritePage(IPage page)
131203/Contents { contentObjNum } 0 R
132204/Resources <<
133205 /XObject <<
134- >>
206+ { xobjects } >>
135207 /Font <<
136208 >>
137209>>
@@ -169,7 +241,11 @@ private string WritePageContent(IPage page)
169241 private int WriteImage ( byte [ ] imageData , int width , int height )
170242 {
171243 var objNum = GetNextObjectNumber ( ) ;
172- var imageContent = $@ "<<
244+ _imageObjectNumbers . Add ( objNum ) ;
245+
246+ // Write image XObject with stream
247+ WriteLine ( $ "{ objNum } 0 obj") ;
248+ WriteLine ( $@ "<<
173249/Type /XObject
174250/Subtype /Image
175251/Width { width }
@@ -178,50 +254,46 @@ private int WriteImage(byte[] imageData, int width, int height)
178254/BitsPerComponent 8
179255/Filter /DCTDecode
180256/Length { imageData . Length }
181- >>" ;
182- WriteObject ( objNum , imageContent ) ;
183- var streamStartPos = _content . Length ;
257+ >>" ) ;
184258 WriteLine ( "stream" ) ;
185- // Store binary data position and data
186- var streamMarker = "stream\r \n " ;
187- var streamMarkerBytes = Encoding . UTF8 . GetBytes ( streamMarker ) ;
188- var position = streamStartPos + streamMarkerBytes . Length ;
189- _binaryData . Add ( ( position , imageData ) ) ;
190- // Write placeholder for binary data (will be replaced)
191- WriteLine ( $ "<BINARY_DATA_{ _binaryData . Count - 1 } >") ;
259+ var placeholder = $ "<BINARY_DATA_{ objNum } >";
260+ _binaryData . Add ( ( placeholder , imageData ) ) ;
261+ _content . Append ( placeholder ) ;
262+ WriteLine ( "" ) ;
192263 WriteLine ( "endstream" ) ;
264+ WriteLine ( "endobj" ) ;
265+ WriteLine ( "" ) ;
193266 return objNum ;
194267 }
195268
196- private int WriteCatalog ( List < int > pageRefs )
197- {
198- var objNum = GetNextObjectNumber ( ) ;
199- var pagesObjNum = WritePages ( pageRefs ) ;
200- var catalogContent = $@ "<<
201- /Type /Catalog
202- /Pages { pagesObjNum } 0 R
203- >>" ;
204- WriteObject ( objNum , catalogContent ) ;
205- return objNum ;
206- }
207-
208- private int WritePages ( List < int > pageRefs )
269+ private void WriteCatalogWithNumber ( int catalogObjNum , List < int > pageRefs )
209270 {
210- var objNum = GetNextObjectNumber ( ) ;
271+ // Write Pages object with reserved number
211272 var kids = string . Join ( " " , pageRefs . Select ( p => $ "{ p } 0 R") ) ;
212273 var pagesContent = $@ "<<
213274/Type /Pages
214275/Kids [{ kids } ]
215276/Count { pageRefs . Count }
216277>>" ;
217- WriteObject ( objNum , pagesContent ) ;
218- return objNum ;
278+ WriteLine ( $ "{ _pagesObjectNumber } 0 obj") ;
279+ WriteLine ( pagesContent ) ;
280+ WriteLine ( "endobj" ) ;
281+ WriteLine ( "" ) ;
282+
283+ // Write Catalog object with reserved number
284+ var catalogContent = $@ "<<
285+ /Type /Catalog
286+ /Pages { _pagesObjectNumber } 0 R
287+ >>" ;
288+ WriteLine ( $ "{ catalogObjNum } 0 obj") ;
289+ WriteLine ( catalogContent ) ;
290+ WriteLine ( "endobj" ) ;
291+ WriteLine ( "" ) ;
219292 }
220293
221294 private int GetPageTreeRef ( )
222295 {
223- // This would reference the Pages object
224- return 2 ; // Simplified
296+ return _pagesObjectNumber ;
225297 }
226298
227299 private int WriteDocumentInfo ( )
@@ -242,30 +314,53 @@ private int WriteDocumentInfo()
242314 return objNum ;
243315 }
244316
245- private int WriteXRefTable ( )
317+ private long WriteXRefTable ( MemoryStream ms )
246318 {
247- // Simplified xref table
248- return _content . Length ;
319+ var xrefOffset = ms . Position ;
320+ var writer = new StreamWriter ( ms , Encoding . UTF8 , leaveOpen : true ) ;
321+
322+ writer . WriteLine ( "xref" ) ;
323+ writer . WriteLine ( $ "0 { _objectNumber } ") ;
324+ writer . WriteLine ( "0000000000 65535 f " ) ;
325+
326+ for ( int i = 1 ; i < _objectNumber ; i ++ )
327+ {
328+ if ( _objectOffsets . TryGetValue ( i , out var offset ) )
329+ {
330+ writer . WriteLine ( $ "{ offset : D10} 00000 n ") ;
331+ }
332+ else
333+ {
334+ writer . WriteLine ( "0000000000 00000 n " ) ;
335+ }
336+ }
337+
338+ writer . Flush ( ) ;
339+ return xrefOffset ;
249340 }
250341
251- private void WriteTrailer ( int catalogObjNum , int infoObjNum , int xrefOffset )
342+ private void WriteTrailer ( MemoryStream ms , int catalogObjNum , int infoObjNum , long xrefOffset )
252343 {
253- WriteLine ( "trailer" ) ;
254- WriteLine ( $@ "<<
344+ var writer = new StreamWriter ( ms , Encoding . UTF8 , leaveOpen : true ) ;
345+
346+ writer . WriteLine ( "trailer" ) ;
347+ writer . WriteLine ( $@ "<<
255348/Size { _objectNumber }
256349/Root { catalogObjNum } 0 R
257350/Info { infoObjNum } 0 R
258351>>" ) ;
259- WriteLine ( "startxref" ) ;
260- WriteLine ( xrefOffset . ToString ( ) ) ;
261- WriteLine ( "%%EOF" ) ;
352+ writer . WriteLine ( "startxref" ) ;
353+ writer . WriteLine ( xrefOffset . ToString ( ) ) ;
354+ writer . WriteLine ( "%%EOF" ) ;
355+ writer . Flush ( ) ;
262356 }
263357
264358 private void WriteObject ( int objNum , string content )
265359 {
266360 WriteLine ( $ "{ objNum } 0 obj") ;
267361 WriteLine ( content ) ;
268362 WriteLine ( "endobj" ) ;
363+ WriteLine ( "" ) ;
269364 }
270365
271366 private int GetNextObjectNumber ( )
@@ -288,3 +383,4 @@ private string EscapeString(string str)
288383 }
289384}
290385
386+
0 commit comments