| | 215 | if indirectReference.generation == 0 and \ |
|---|
| | 216 | self.xref_objStm.has_key(indirectReference.idnum): |
|---|
| | 217 | # indirect reference to object in object stream |
|---|
| | 218 | # read the entire object stream into memory |
|---|
| | 219 | stmnum,idx = self.xref_objStm[indirectReference.idnum] |
|---|
| | 220 | objStm = self.getObject(IndirectObject(stmnum, 0, self)) |
|---|
| | 221 | assert objStm['/Type'] == '/ObjStm' |
|---|
| | 222 | assert idx < objStm['/N'] |
|---|
| | 223 | streamData = StringIO(decodeStreamData(objStm)) |
|---|
| | 224 | for i in range(objStm['/N']): |
|---|
| | 225 | objnum = NumberObject.readFromStream(streamData) |
|---|
| | 226 | readNonWhitespace(streamData) |
|---|
| | 227 | streamData.seek(-1, 1) |
|---|
| | 228 | offset = NumberObject.readFromStream(streamData) |
|---|
| | 229 | readNonWhitespace(streamData) |
|---|
| | 230 | streamData.seek(-1, 1) |
|---|
| | 231 | t = streamData.tell() |
|---|
| | 232 | streamData.seek(objStm['/First']+offset, 0) |
|---|
| | 233 | obj = readObject(streamData, self) |
|---|
| | 234 | self.resolvedObjects[0][objnum] = obj |
|---|
| | 235 | streamData.seek(t, 0) |
|---|
| | 236 | return self.resolvedObjects[0][indirectReference.idnum] |
|---|
| | 245 | |
|---|
| | 246 | def readObjectHeader(self, stream): |
|---|
| | 247 | idnum = readUntilWhitespace(stream) |
|---|
| | 248 | generation = readUntilWhitespace(stream) |
|---|
| | 249 | obj = stream.read(3) |
|---|
| | 250 | readNonWhitespace(stream) |
|---|
| | 251 | stream.seek(-1, 1) |
|---|
| | 252 | return int(idnum), int(generation) |
|---|
| | 253 | |
|---|
| | 254 | def cacheIndirectObject(self, generation, idnum, obj): |
|---|
| | 255 | if not self.resolvedObjects.has_key(generation): |
|---|
| | 256 | self.resolvedObjects[generation] = {} |
|---|
| | 257 | self.resolvedObjects[generation][idnum] = obj |
|---|
| 240 | | line = stream.read(5) ; assert line[:4] == "xref" |
|---|
| 241 | | num = readObject(stream, self) |
|---|
| 242 | | readNonWhitespace(stream) |
|---|
| 243 | | stream.seek(-1, 1) |
|---|
| 244 | | size = readObject(stream, self) |
|---|
| 245 | | readNonWhitespace(stream) |
|---|
| 246 | | stream.seek(-1, 1) |
|---|
| 247 | | cnt = 0 |
|---|
| 248 | | while cnt < size: |
|---|
| 249 | | line = stream.readline() |
|---|
| 250 | | offset, generation = line[:16].split(" ") |
|---|
| 251 | | offset, generation = int(offset), int(generation) |
|---|
| 252 | | if not self.xref.has_key(generation): |
|---|
| 253 | | self.xref[generation] = {} |
|---|
| 254 | | self.xref[generation][num] = offset |
|---|
| 255 | | cnt += 1 |
|---|
| 256 | | num += 1 |
|---|
| 257 | | assert stream.read(7) == "trailer" |
|---|
| 258 | | readNonWhitespace(stream) |
|---|
| 259 | | stream.seek(-1, 1) |
|---|
| 260 | | newTrailer = readObject(stream, self) |
|---|
| 261 | | for key, value in newTrailer.items(): |
|---|
| 262 | | if not self.trailer.has_key(key): |
|---|
| 263 | | self.trailer[key] = value |
|---|
| 264 | | if newTrailer.has_key(NameObject("/Prev")): |
|---|
| 265 | | startxref = newTrailer[NameObject("/Prev")] |
|---|
| | 278 | x = stream.read(1) |
|---|
| | 279 | if x == "x": |
|---|
| | 280 | # standard cross-reference table |
|---|
| | 281 | ref = stream.read(4) |
|---|
| | 282 | assert ref[:3] == "ref" |
|---|
| | 283 | num = readObject(stream, self) |
|---|
| | 284 | readNonWhitespace(stream) |
|---|
| | 285 | stream.seek(-1, 1) |
|---|
| | 286 | size = readObject(stream, self) |
|---|
| | 287 | readNonWhitespace(stream) |
|---|
| | 288 | stream.seek(-1, 1) |
|---|
| | 289 | cnt = 0 |
|---|
| | 290 | while cnt < size: |
|---|
| | 291 | line = stream.readline() |
|---|
| | 292 | offset, generation = line[:16].split(" ") |
|---|
| | 293 | offset, generation = int(offset), int(generation) |
|---|
| | 294 | if not self.xref.has_key(generation): |
|---|
| | 295 | self.xref[generation] = {} |
|---|
| | 296 | self.xref[generation][num] = offset |
|---|
| | 297 | cnt += 1 |
|---|
| | 298 | num += 1 |
|---|
| | 299 | assert stream.read(7) == "trailer" |
|---|
| | 300 | readNonWhitespace(stream) |
|---|
| | 301 | stream.seek(-1, 1) |
|---|
| | 302 | newTrailer = readObject(stream, self) |
|---|
| | 303 | for key, value in newTrailer.items(): |
|---|
| | 304 | if not self.trailer.has_key(key): |
|---|
| | 305 | self.trailer[key] = value |
|---|
| | 306 | if newTrailer.has_key(NameObject("/Prev")): |
|---|
| | 307 | startxref = newTrailer[NameObject("/Prev")] |
|---|
| | 308 | else: |
|---|
| | 309 | break |
|---|
| 267 | | break |
|---|
| 268 | | |
|---|
| 269 | | ## read trailer dictionary |
|---|
| 270 | | #while line != "trailer": |
|---|
| 271 | | # line = self.readNextEndLine(stream) |
|---|
| 272 | | #stream.seek(10, 1) # read past "trailer" line |
|---|
| 273 | | #self.trailer = readObject(stream, self) |
|---|
| 274 | | |
|---|
| | 311 | # PDF 1.5+ Cross-Reference Stream |
|---|
| | 312 | stream.seek(-1, 1) |
|---|
| | 313 | idnum, generation = self.readObjectHeader(stream) |
|---|
| | 314 | xrefstream = readObject(stream, self) |
|---|
| | 315 | assert xrefstream["/Type"] == "/XRef" |
|---|
| | 316 | self.cacheIndirectObject(generation, idnum, xrefstream) |
|---|
| | 317 | streamData = StringIO(decodeStreamData(xrefstream)) |
|---|
| | 318 | num, size = xrefstream.get("/Index", [0, xrefstream.get("/Size")]) |
|---|
| | 319 | entrySizes = xrefstream.get("/W") |
|---|
| | 320 | cnt = 0 |
|---|
| | 321 | while cnt < size: |
|---|
| | 322 | for i in range(len(entrySizes)): |
|---|
| | 323 | d = streamData.read(entrySizes[i]) |
|---|
| | 324 | di = convertToInt(d, entrySizes[i]) |
|---|
| | 325 | if i == 0: |
|---|
| | 326 | xref_type = di |
|---|
| | 327 | elif i == 1: |
|---|
| | 328 | if xref_type == 0: |
|---|
| | 329 | next_free_object = di |
|---|
| | 330 | elif xref_type == 1: |
|---|
| | 331 | byte_offset = di |
|---|
| | 332 | elif xref_type == 2: |
|---|
| | 333 | objstr_num = di |
|---|
| | 334 | elif i == 2: |
|---|
| | 335 | if xref_type == 0: |
|---|
| | 336 | next_generation = di |
|---|
| | 337 | elif xref_type == 1: |
|---|
| | 338 | generation = di |
|---|
| | 339 | elif xref_type == 2: |
|---|
| | 340 | obstr_idx = di |
|---|
| | 341 | if xref_type == 0: |
|---|
| | 342 | pass |
|---|
| | 343 | elif xref_type == 1: |
|---|
| | 344 | if not self.xref.has_key(generation): |
|---|
| | 345 | self.xref[generation] = {} |
|---|
| | 346 | self.xref[generation][num] = byte_offset |
|---|
| | 347 | elif xref_type == 2: |
|---|
| | 348 | self.xref_objStm[num] = [objstr_num, obstr_idx] |
|---|
| | 349 | cnt += 1 |
|---|
| | 350 | num += 1 |
|---|
| | 351 | trailerKeys = "/Root", "/Encrypt", "/Info", "/ID" |
|---|
| | 352 | for key in trailerKeys: |
|---|
| | 353 | if xrefstream.has_key(key) and not self.trailer.has_key(key): |
|---|
| | 354 | self.trailer[NameObject(key)] = xrefstream[key] |
|---|
| | 355 | if xrefstream.has_key("/Prev"): |
|---|
| | 356 | startxref = xrefstream["/Prev"] |
|---|
| | 357 | else: |
|---|
| | 358 | break |
|---|
| | 702 | def decodeStreamData(stream): |
|---|
| | 703 | if stream.get("/Filter",None) == "/FlateDecode": |
|---|
| | 704 | data = zlib.decompress(stream["__streamdata__"]) |
|---|
| | 705 | else: |
|---|
| | 706 | # unsupported Filter |
|---|
| | 707 | assert False |
|---|
| | 708 | predictor = stream.get("/DecodeParms", {}).get("/Predictor", 1) |
|---|
| | 709 | if predictor != 1: |
|---|
| | 710 | columns = stream["/DecodeParms"]["/Columns"] |
|---|
| | 711 | if predictor >= 10: |
|---|
| | 712 | newdata = "" |
|---|
| | 713 | # PNG prediction can vary from row to row |
|---|
| | 714 | rowlength = columns + 1 |
|---|
| | 715 | assert len(data) % rowlength == 0 |
|---|
| | 716 | prev_rowdata = "\x00"*rowlength |
|---|
| | 717 | for row in range(len(data) / rowlength): |
|---|
| | 718 | rowdata = list(data[(row*rowlength):((row+1)*rowlength)]) |
|---|
| | 719 | filterByte = ord(rowdata[0]) |
|---|
| | 720 | if filterByte == 0: |
|---|
| | 721 | pass |
|---|
| | 722 | elif filterByte == 1: |
|---|
| | 723 | for i in range(2, rowlength): |
|---|
| | 724 | rowdata[i] = chr((ord(rowdata[i]) + ord(rowdata[i-1])) % 256) |
|---|
| | 725 | elif filterByte == 2: |
|---|
| | 726 | for i in range(1, rowlength): |
|---|
| | 727 | rowdata[i] = chr((ord(rowdata[i]) + ord(prev_rowdata[i])) % 256) |
|---|
| | 728 | else: |
|---|
| | 729 | # unsupported PNG filter |
|---|
| | 730 | assert False |
|---|
| | 731 | prev_rowdata = rowdata |
|---|
| | 732 | newdata += ''.join(rowdata[1:]) |
|---|
| | 733 | data = newdata |
|---|
| | 734 | else: |
|---|
| | 735 | # unsupported predictor |
|---|
| | 736 | assert False |
|---|
| | 737 | return data |
|---|
| | 738 | |
|---|
| | 739 | def convertToInt(d, size): |
|---|
| | 740 | if size <= 4: |
|---|
| | 741 | d = "\x00\x00\x00\x00" + d |
|---|
| | 742 | d = d[-4:] |
|---|
| | 743 | return struct.unpack(">l", d)[0] |
|---|
| | 744 | elif size <= 8: |
|---|
| | 745 | d = "\x00\x00\x00\x00\x00\x00\x00\x00" + d |
|---|
| | 746 | d = d[-8:] |
|---|
| | 747 | return struct.unpack(">q", d)[0] |
|---|
| | 748 | else: |
|---|
| | 749 | # size too big |
|---|
| | 750 | assert False |
|---|
| | 751 | |
|---|
| 613 | | output.addPage(input.getPage(0).rotateClockwise(90)) |
|---|
| 614 | | output.write(file("cc-cc-test.pdf", "wb")) |
|---|
| | 755 | |
|---|
| | 756 | #input1 = PdfFileReader(file("cc-cc.pdf", "rb")) |
|---|
| | 757 | #output.addPage(input1.getPage(0)) |
|---|
| | 758 | |
|---|
| | 759 | input2 = PdfFileReader(file("PDFReference16.pdf", "rb")) |
|---|
| | 760 | for i in range(input2.getNumPages()): |
|---|
| | 761 | output.addPage(input2.getPage(i)) |
|---|
| | 762 | |
|---|
| | 763 | output.write(file("test.pdf", "wb")) |
|---|