Changeset 731
- Timestamp:
- 05/02/06 10:57:55 (2 years ago)
- Files:
-
- pypdf/trunk/pyPdf/pdf.py (modified) (8 diffs)
- pypdf/trunk/pyPdf/utils.py (modified) (2 diffs)
Legend:
- Unmodified
- Added
- Removed
- Modified
- Copied
- Moved
pypdf/trunk/pyPdf/pdf.py
r729 r731 347 347 cnt = 0 348 348 while cnt < size: 349 line = stream.read line()349 line = stream.read(20) 350 350 offset, generation = line[:16].split(" ") 351 351 offset, generation = int(offset), int(generation) … … 525 525 _mergeResources = staticmethod(_mergeResources) 526 526 527 def _contentStreamRename(stream, rename ):527 def _contentStreamRename(stream, rename, pdf): 528 528 if not rename: 529 529 return stream 530 stream = ContentStream(stream )530 stream = ContentStream(stream, pdf) 531 531 for operands,operator in stream.operations: 532 532 for i in range(len(operands)): … … 537 537 _contentStreamRename = staticmethod(_contentStreamRename) 538 538 539 def _pushPopGS(contents ):539 def _pushPopGS(contents, pdf): 540 540 # adds a graphics state "push" and "pop" to the beginning and end 541 541 # of a content stream. This isolates it from changes such as 542 542 # transformation matricies. 543 stream = ContentStream(contents )543 stream = ContentStream(contents, pdf) 544 544 stream.operations.insert(0, [[], "q"]) 545 545 stream.operations.append([[], "Q"]) … … 581 581 582 582 originalContent = self["/Contents"].getObject() 583 newContentArray.append(PageObject._pushPopGS(originalContent ))583 newContentArray.append(PageObject._pushPopGS(originalContent, self.pdf)) 584 584 585 585 page2Content = page2['/Contents'].getObject() 586 page2Content = PageObject._contentStreamRename(page2Content, rename )587 page2Content = PageObject._pushPopGS(page2Content )586 page2Content = PageObject._contentStreamRename(page2Content, rename, self.pdf) 587 page2Content = PageObject._pushPopGS(page2Content, self.pdf) 588 588 newContentArray.append(page2Content) 589 589 590 self[NameObject('/Contents')] = ContentStream(newContentArray )590 self[NameObject('/Contents')] = ContentStream(newContentArray, self.pdf) 591 591 self[NameObject('/Resources')] = newResources 592 592 … … 602 602 content = self["/Contents"].getObject() 603 603 if not isinstance(content, ContentStream): 604 content = ContentStream(content )604 content = ContentStream(content, self.pdf) 605 605 self[NameObject("/Contents")] = content.flateEncode() 606 606 … … 645 645 646 646 class ContentStream(DecodedStreamObject): 647 def __init__(self, stream): 647 def __init__(self, stream, pdf): 648 self.pdf = pdf 648 649 self.operations = [] 649 650 # stream may be a StreamObject or an ArrayObject containing … … 660 661 661 662 def __parseContentStream(self, stream): 663 # file("f:\\tmp.txt", "w").write(stream.read()) 664 stream.seek(0, 0) 662 665 operands = [] 663 666 while True: … … 666 669 break 667 670 stream.seek(-1, 1) 668 if peek.isalpha(): 669 operator = readUntilWhitespace(stream) 670 self.operations.append((operands, operator)) 671 operands = [] 671 if peek.isalpha() or peek == "'" or peek == "\"": 672 operator = readUntilWhitespace(stream, maxchars=2) 673 if operator == "BI": 674 # begin inline image - a completely different parsing 675 # mechanism is required, of course... thanks buddy... 676 assert operands == [] 677 ii = self._readInlineImage(stream) 678 self.operations.append((ii, "INLINE IMAGE")) 679 else: 680 self.operations.append((operands, operator)) 681 operands = [] 672 682 else: 673 683 operands.append(readObject(stream, None)) 684 685 def _readInlineImage(self, stream): 686 # begin reading just after the "BI" - begin image 687 # first read the dictionary of settings. 688 settings = DictionaryObject() 689 while True: 690 tok = readNonWhitespace(stream) 691 stream.seek(-1, 1) 692 if tok == "I": 693 # "ID" - begin of image data 694 break 695 key = readObject(stream, self.pdf) 696 tok = readNonWhitespace(stream) 697 stream.seek(-1, 1) 698 value = readObject(stream, self.pdf) 699 settings[key] = value 700 # left at beginning of ID 701 tmp = stream.read(3) 702 assert tmp[:2] == "ID" 703 data = "" 704 while True: 705 tok = stream.read(1) 706 if tok == "E": 707 next = stream.read(1) 708 if next == "I": 709 break 710 else: 711 stream.seek(-1, 1) 712 data += tok 713 else: 714 data += tok 715 x = readNonWhitespace(stream) 716 stream.seek(-1, 1) 717 return {"settings": settings, "data": data} 674 718 675 719 def _getData(self): 676 720 newdata = StringIO() 677 721 for operands,operator in self.operations: 678 for op in operands: 679 op.writeToStream(newdata) 680 newdata.write(" ") 681 newdata.write(operator) 722 if operator == "INLINE IMAGE": 723 newdata.write("BI") 724 dicttext = StringIO() 725 operands["settings"].writeToStream(dicttext) 726 newdata.write(dicttext.getvalue()[2:-2]) 727 newdata.write("ID ") 728 newdata.write(operands["data"]) 729 newdata.write("EI") 730 else: 731 for op in operands: 732 op.writeToStream(newdata) 733 newdata.write(" ") 734 newdata.write(operator) 682 735 newdata.write("\n") 683 736 return newdata.getvalue() pypdf/trunk/pyPdf/utils.py
r710 r731 35 35 __author_email__ = "mfenniak@pobox.com" 36 36 37 def readUntilWhitespace(stream ):37 def readUntilWhitespace(stream, maxchars=None): 38 38 txt = "" 39 39 while True: … … 42 42 break 43 43 txt += tok 44 if len(txt) == maxchars: 45 break 44 46 return txt 45 47
