Changeset 765

Show
Ignore:
Timestamp:
12/12/06 13:32:13 (2 years ago)
Author:
mfenniak
Message:

Add pythondoc documentation.

Files:

Legend:

Unmodified
Added
Removed
Modified
Copied
Moved
  • pypdf/trunk/pyPdf/pdf.py

    r759 r765  
    4848from sets import ImmutableSet 
    4949 
     50## 
     51# This class supports writing PDF files out, given pages produced by another 
     52# class (typically {@link #PdfFileReader PdfFileReader}). 
    5053class PdfFileWriter(object): 
    5154    def __init__(self): 
     
    8588        return self._objects[ido.idnum - 1] 
    8689 
     90    ## 
     91    # Adds a page to this PDF file.  The page is usually acquired from a 
     92    # {@link #PdfFileReader PdfFileReader} instance. 
     93    # <p> 
     94    # Stability: Added in v1.0, will exist for all v1.x releases. 
     95    # 
     96    # @param page The page to add to the document.  This argument should be 
     97    #             an instance of {@link #PageObject PageObject}. 
    8798    def addPage(self, page): 
    88         """ 
    89         Adds a page to this PDF file.  A dictionary of /Type = /Page. 
    90         Currently usually aquired from PdfFileReader.getPage(). 
    91  
    92         Stability: Added in v1.0, will exist for all v1.x releases. 
    93         """ 
    9499        assert page["/Type"] == "/Page" 
    95100        page[NameObject("/Parent")] = self._pages 
     
    99104        pages["/Count"] = NumberObject(pages["/Count"] + 1) 
    100105 
     106    ## 
     107    # Writes the collection of pages added to this object out as a PDF file. 
     108    # <p> 
     109    # Stability: Added in v1.0, will exist for all v1.x releases. 
     110    # @param stream An object to write the file to.  The object must support 
     111    # the write method, and the tell method, similar to a file object. 
    101112    def write(self, stream): 
    102         """ 
    103         Writes this PDF file to an output stream.  Writes the file as a 
    104         PDF-1.3 format file. 
    105  
    106         Stability: Added in v1.0, will exist for all v1.x releases. 
    107         """ 
    108  
    109113        externalReferenceMap = {} 
    110114        self.stack = [] 
     
    196200 
    197201 
     202## 
     203# Initializes a PdfFileReader object.  This operation can take some time, as 
     204# the PDF stream's cross-reference tables are read into memory. 
     205# <p> 
     206# Stability: Added in v1.0, will exist for all v1.x releases. 
     207# 
     208# @param stream An object that supports the standard read and seek methods 
     209#               similar to a file object. 
    198210class PdfFileReader(object): 
    199211    def __init__(self, stream): 
    200         """ 
    201         Initializes a PdfFileReader object.  This operation can take some time, 
    202         as the PDF file cross-reference tables are read.  "stream" parameter 
    203         must be a data stream, not a string or a path name. 
    204  
    205         Stability: Added in v1.0, will exist for all v1.x releases. 
    206         """ 
    207212        self.flattenedPages = None 
    208213        self.resolvedObjects = {} 
     
    210215        self.stream = stream 
    211216 
     217    ## 
     218    # Retrieves the PDF file's document information dictionary, if it exists. 
     219    # Note that some PDF files use metadata streams instead of docinfo 
     220    # dictionaries, and these metadata streams will not be accessed by this 
     221    # function. 
     222    # <p> 
     223    # Stability: Added in v1.6, will exist for all future v1.x releases. 
     224    # @return Returns a {@link #DocumentInformation DocumentInformation} 
     225    #         instance, or None if none exists. 
    212226    def getDocumentInfo(self): 
    213         """ 
    214         Retrieves the PDF file's document information dictionary, if it 
    215         exists.  Returns a DocumentInformation instance, or None. 
    216         Note that some PDF files use metadata streams instead of docinfo 
    217         dictionaries, and these metadata streams will not be accessed by this 
    218         function. 
    219  
    220         Stability: Added in v1.6, will exist for all v1.x releases. 
    221         """ 
    222227        if not self.trailer.has_key("/Info"): 
    223228            return None 
     
    227232        return retval 
    228233 
    229     documentInfo = property(lambda self: self.getDocumentInfo(), None, None, 
    230             """See PdfFileReader.getDocumentInfo().  This property was added  
    231             in pyPdf v1.7, and will exist for all future v1.x releases.""") 
    232  
     234    ## 
     235    # Read-only property that accesses the {@link 
     236    # #PdfFileReader.getDocumentInfo getDocumentInfo} function. 
     237    # <p> 
     238    # Stability: Added in v1.7, will exist for all future v1.x releases. 
     239    documentInfo = property(lambda self: self.getDocumentInfo(), None, None) 
     240 
     241    ## 
     242    # Calculates the number of pages in this PDF file. 
     243    # <p> 
     244    # Stability: Added in v1.0, will exist for all v1.x releases. 
     245    # @return Returns an integer. 
    233246    def getNumPages(self): 
    234         """ 
    235         Returns the number of pages in this PDF file. 
    236  
    237         Stability: Added in v1.0, will exist for all v1.x releases. 
    238         """ 
    239247        if self.flattenedPages == None: 
    240248            self._flatten() 
    241249        return len(self.flattenedPages) 
    242250 
    243     numPages = property(lambda self: self.getNumPages(), None, None, 
    244             """See PdfFileReader.getNamePages().  This property was added in 
    245             v1.7, and will exist for all future v1.x releases.""") 
    246  
     251    ## 
     252    # Read-only property that accesses the {@link #PdfFileReader.getNumPages 
     253    # getNumPages} function. 
     254    # <p> 
     255    # Stability: Added in v1.7, will exist for all future v1.x releases. 
     256    numPages = property(lambda self: self.getNumPages(), None, None) 
     257 
     258    ## 
     259    # Retrieves a page by number from this PDF file. 
     260    # <p> 
     261    # Stability: Added in v1.0, will exist for all v1.x releases. 
     262    # @return Returns a {@link #PageObject PageObject} instance. 
    247263    def getPage(self, pageNumber): 
    248         """ 
    249         Retrieves a page by number from this PDF file.  Returns a PageObject 
    250         instance. 
    251  
    252         Stability: Added in v1.0, will exist for all v1.x releases. 
    253         """ 
    254264        # ensure that we're not trying to access an encrypted PDF 
    255265        assert not self.trailer.has_key("/Encrypt") 
     
    258268        return self.flattenedPages[pageNumber] 
    259269 
     270    ## 
     271    # Read-only property that emulates a list based upon the {@link 
     272    # #PdfFileReader.getNumPages getNumPages} and {@link #PdfFileReader.getPage 
     273    # getPage} functions. 
     274    # <p> 
     275    # Stability: Added in v1.7, and will exist for all future v1.x releases. 
    260276    pages = property(lambda self: ConvertFunctionsToVirtualList(self.getNumPages, self.getPage), 
    261             None, None, """Returns a sequence of pages.  This property was 
    262             added in v1.7 and will exist for all future v1.x releases.""") 
     277            None, None) 
    263278 
    264279    def _flatten(self, pages = None, inherit = None): 
     
    513528    del self[name] 
    514529 
    515 def addRectangleAccessor(klass, propname, name, fallback, docs): 
    516     setattr(klass, propname, 
     530def createRectangleAccessor(name, fallback): 
     531    return \ 
    517532        property( 
    518533            lambda self: getRectangle(self, name, fallback), 
    519534            lambda self, value: setRectangle(self, name, value), 
    520             lambda self: deleteRectangle(self, name), 
    521             docs 
     535            lambda self: deleteRectangle(self, name) 
    522536            ) 
    523         ) 
    524  
     537 
     538## 
     539# This class represents a single page within a PDF file.  Typically this object 
     540# will be created by accessing the {@link #PdfFileReader.getPage getPage} 
     541# function of the {@link #PdfFileReader PdfFileReader} class. 
    525542class PageObject(DictionaryObject): 
    526543    def __init__(self, pdf): 
     
    528545        self.pdf = pdf 
    529546 
     547    ## 
     548    # Rotates a page clockwise by increments of 90 degrees. 
     549    # <p> 
     550    # Stability: Added in v1.1, will exist for all future v1.x releases. 
     551    # @param angle Angle to rotate the page.  Must be an increment of 90 deg. 
    530552    def rotateClockwise(self, angle): 
    531         """ 
    532         Rotates a page clockwise by increments of 90 degrees. 
    533  
    534         Stability: Added in v1.1, will exist for all v1.x releases thereafter. 
    535         """ 
    536553        assert angle % 90 == 0 
    537554        self._rotate(angle) 
    538555        return self 
    539556 
     557    ## 
     558    # Rotates a page counter-clockwise by increments of 90 degrees. 
     559    # <p> 
     560    # Stability: Added in v1.1, will exist for all future v1.x releases. 
     561    # @param angle Angle to rotate the page.  Must be an increment of 90 deg. 
    540562    def rotateCounterClockwise(self, angle): 
    541         """ 
    542         Rotates a page counter-clockwise by increments of 90 degrees.  Note 
    543         that this is equivilant to calling rotateClockwise(-angle). 
    544  
    545         Stability: Added in v1.1, will exist for all v1.x releases thereafter. 
    546         """ 
    547563        assert angle % 90 == 0 
    548564        self._rotate(-angle) 
     
    590606    _pushPopGS = staticmethod(_pushPopGS) 
    591607 
     608    ## 
     609    # Merges the content streams of two pages into one.  Resource references 
     610    # (i.e. fonts) are maintained from both pages.  The mediabox/cropbox/etc 
     611    # of this page are not altered.  The parameter page's content stream will 
     612    # be added to the end of this page's content stream, meaning that it will 
     613    # be drawn after, or "on top" of this page. 
     614    # <p> 
     615    # Stability: Added in v1.4, will exist for all future 1.x releases. 
     616    # @param page2 An instance of {@link #PageObject PageObject} to be merged 
     617    #              into this one. 
    592618    def mergePage(self, page2): 
    593         """ 
    594         Merges the content streams of two pages into one.  Resource 
    595         references (i.e. fonts) are maintained from both pages.  The 
    596         mediabox/cropbox/etc on "self" are not altered. 
    597  
    598         Stability: Added in v1.4, will exist for all v1.x releases thereafter. 
    599         """ 
    600619 
    601620        # First we work on merging the resource dictionaries.  This allows us 
     
    634653        self[NameObject('/Resources')] = newResources 
    635654 
     655    ## 
     656    # Compresses the size of this page by joining all content streams and 
     657    # applying a FlateDecode filter. 
     658    # <p> 
     659    # Stability: Added in v1.6, will exist for all future v1.x releases. 
     660    # However, it is possible that this function will perform no action if 
     661    # content stream compression becomes "automatic" for some reason. 
    636662    def compressContentStreams(self): 
    637         """ 
    638         Join all content streams and apply a FlateDecode filter to decrease 
    639         the stream's size. 
    640  
    641         Stability: Added in v1.6, will exist for all v1.x releases thereafter. 
    642         However, if content stream compression is ever handled in a different 
    643         and/or more transparent way, this function may not do anything. 
    644         """ 
    645663        content = self["/Contents"].getObject() 
    646664        if not isinstance(content, ContentStream): 
     
    648666        self[NameObject("/Contents")] = content.flateEncode() 
    649667 
     668    ## 
     669    # Locate all text drawing commands, in the order they are provided in the 
     670    # content stream, and extract the text.  This works well for some PDF 
     671    # files, but poorly for others, depending on the generator used.  This will 
     672    # be refined in the future.  Do not rely on the order of text coming out of 
     673    # this function, as it will change if this function is made more 
     674    # sophisticated. 
     675    # <p> 
     676    # Stability: Added in v1.7, will exist for all future v1.x releases.  May 
     677    # be overhauled to provide more ordered text in the future. 
     678    # @return a string object 
    650679    def extractText(self): 
    651         """ 
    652         Locate all text drawing commands, in the order they are provided in 
    653         the content stream, and extract the text.  This works well for some 
    654         PDF files, but poorly for others, depending on the generator used. 
    655         This will be refined in the future.  Do not rely on the order of text 
    656         coming out of this function, as it will change if this function is  
    657         made more sophisticated. 
    658  
    659         Stability: Added in v1.7, will exist for all v1.x releases thereafter. 
    660         May be overhauled to provide more ordered text in the future. 
    661         """ 
    662680        text = "" 
    663681        content = self["/Contents"].getObject() 
     
    681699        return text 
    682700 
    683 addRectangleAccessor(PageObject, "mediaBox", "/MediaBox", (), 
    684         """A rectangle, expressed in default user space units, defining the 
    685         boundaries of the physical medium on which the page is intended to be 
    686         displayed or printed. 
    687  
    688         Stability: Added in v1.4, will exist for all v1.x releases 
    689         thereafter.""") 
    690 addRectangleAccessor(PageObject, "cropBox", "/CropBox", ("/MediaBox",), 
    691         """A rectangle, expressed in default user space units, defining the 
    692         visible region of default user space.  When the page is displayed or 
    693         printed, its contents are to be clipped (cropped) to this rectangle and 
    694         then imposed on the output medium in some implementation-defined 
    695         manner.  Default value: same as MediaBox. 
    696  
    697         Stability: Added in v1.4, will exist for all v1.x releases 
    698         thereafter.""") 
    699 addRectangleAccessor(PageObject, "bleedBox", "/BleedBox", ("/CropBox", 
    700         "/MediaBox"), """A rectangle, expressed in default user space units, 
    701         defining the region to which the contents of the page should be clipped 
    702         when output in a production environment. 
    703          
    704         Stability: Added in v1.4, will exist for all v1.x releases 
    705         thereafter.""") 
    706 addRectangleAccessor(PageObject, "trimBox", "/TrimBox", ("/CropBox", 
    707         "/MediaBox"), """A rectangle, expressed in default user space units, 
    708         defining the intended dimensions of the finished page after trimming. 
    709          
    710         Stability: Added in v1.4, will exist for all v1.x releases 
    711         thereafter.""") 
    712 addRectangleAccessor(PageObject, "artBox", "/ArtBox", ("/CropBox", 
    713         "/MediaBox"), """A rectangle, expressed in default user space units, 
    714         defining the extent of the page's meaningful content as intended by the 
    715         page's creator. 
    716          
    717         Stability: Added in v1.4, will exist for all v1.x releases 
    718         thereafter.""") 
     701    ## 
     702    # A rectangle (RectangleObject), expressed in default user space units, 
     703    # defining the boundaries of the physical medium on which the page is 
     704    # intended to be displayed or printed. 
     705    # <p> 
     706    # Stability: Added in v1.4, will exist for all future v1.x releases. 
     707    mediaBox = createRectangleAccessor("/MediaBox", ()) 
     708 
     709    ## 
     710    # A rectangle (RectangleObject), expressed in default user space units, 
     711    # defining the visible region of default user space.  When the page is 
     712    # displayed or printed, its contents are to be clipped (cropped) to this 
     713    # rectangle and then imposed on the output medium in some 
     714    # implementation-defined manner.  Default value: same as MediaBox. 
     715    # <p> 
     716    # Stability: Added in v1.4, will exist for all future v1.x releases. 
     717    cropBox = createRectangleAccessor("/CropBox", ("/CropBox",)) 
     718 
     719    ## 
     720    # A rectangle (RectangleObject), expressed in default user space units, 
     721    # defining the region to which the contents of the page should be clipped 
     722    # when output in a production enviroment. 
     723    # <p> 
     724    # Stability: Added in v1.4, will exist for all future v1.x releases. 
     725    bleedBox = createRectangleAccessor("/BleedBox", ("/CropBox", "/MediaBox")) 
     726 
     727    ## 
     728    # A rectangle (RectangleObject), expressed in default user space units, 
     729    # defining the intended dimensions of the finished page after trimming. 
     730    # <p> 
     731    # Stability: Added in v1.4, will exist for all future v1.x releases. 
     732    trimBox = createRectangleAccessor("/TrimBox", ("/CropBox", "/MediaBox")) 
     733 
     734    ## 
     735    # A rectangle (RectangleObject), expressed in default user space units, 
     736    # defining the extent of the page's meaningful content as intended by the 
     737    # page's creator. 
     738    # <p> 
     739    # Stability: Added in v1.4, will exist for all future v1.x releases. 
     740    artBox = createRectangleAccessor("/ArtBox", ("/CropBox", "/MediaBox")) 
    719741 
    720742 
     
    817839 
    818840 
     841## 
     842# A class representing the basic document metadata provided in a PDF File. 
    819843class DocumentInformation(DictionaryObject): 
    820844    def __init__(self): 
    821845        DictionaryObject.__init__(self) 
    822846 
    823     title = property( 
    824             lambda self: self.get("/Title", None), 
    825             None, None, 
    826             """The document's title, or None if not specified.  Added to pyPdf 
    827             in v1.6, will exist for all v1.x.""") 
    828  
    829     author = property( 
    830             lambda self: self.get("/Author", None), 
    831             None, None, 
    832             """The name of the person who created the document, or None if not 
    833             specified.  Added to pyPdf in v1.6, will exist for all v1.x.""") 
    834  
    835     subject = property( 
    836             lambda self: self.get("/Subject", None), 
    837             None, None, 
    838             """The subject of the document, or None if not specified.  Added to 
    839             pyPdf in v1.6, will exist for all v1.x.""") 
    840  
    841     creator = property( 
    842             lambda self: self.get("/Creator", None), 
    843             None, None, 
    844             """If the document was converted to PDF from another format, the 
    845             name of the application (for example, OpenOffice) that created the 
    846             original document from which it was converted, or None if not 
    847             specified.  Added to pyPdf in v1.6, will exist for all v1.x.""") 
    848  
    849     producer = property( 
    850             lambda self: self.get("/Producer", None), 
    851             None, None, 
    852             """If the document was converted to PDF from another format, the 
    853             name of the application (for example, OSX Quartz) that converted it 
    854             to PDF.  Added to pyPdf in v1.6, will exist for all v1.x.""") 
     847    ## 
     848    # Read-only property accessing the document's title.  Added in v1.6, will 
     849    # exist for all future v1.x releases. 
     850    # @return A string, or None if the title is not provided. 
     851    title = property(lambda self: self.get("/Title", None), None, None) 
     852 
     853    ## 
     854    # Read-only property accessing the document's author.  Added in v1.6, will 
     855    # exist for all future v1.x releases. 
     856    # @return A string, or None if the author is not provided. 
     857    author = property(lambda self: self.get("/Author", None), None, None) 
     858 
     859    ## 
     860    # Read-only property accessing the subject of the document.  Added in v1.6, 
     861    # will exist for all future v1.x releases. 
     862    # @return A string, or None if the subject is not provided. 
     863    subject = property(lambda self: self.get("/Subject", None), None, None) 
     864 
     865    ## 
     866    # Read-only property accessing the document's creator.  If the document was 
     867    # converted to PDF from another format, the name of the application (for 
     868    # example, OpenOffice) that created the original document from which it was 
     869    # converted.  Added in v1.6, will exist for all future v1.x releases. 
     870    # @return A string, or None if the creator is not provided. 
     871    creator = property(lambda self: self.get("/Creator", None), None, None) 
     872 
     873    ## 
     874    # Read-only property accessing the document's producer.  If the document 
     875    # was converted to PDF from another format, the name of the application 
     876    # (for example, OSX Quartz) that converted it to PDF.  Added in v1.6, will 
     877    # exist for all future v1.x releases. 
     878    # @return A string, or None if the producer is not provided. 
     879    producer = property(lambda self: self.get("/Producer", None), None, None) 
    855880 
    856881 
     
    869894 
    870895 
    871 if __name__ == "__main__": 
    872     output = PdfFileWriter() 
    873  
    874     input1 = PdfFileReader(file("test\\5000-s1-05e.pdf", "rb")) 
    875     page1 = input1.getPage(0) 
    876  
    877     input2 = PdfFileReader(file("test\\PDFReference16.pdf", "rb")) 
    878     page2 = input2.getPage(0) 
    879     page3 = input2.getPage(1) 
    880     page1.mergePage(page2) 
    881     page1.mergePage(page3) 
    882  
    883     input3 = PdfFileReader(file("test\\cc-cc.pdf", "rb")) 
    884     page1.mergePage(input3.getPage(0)) 
    885  
    886     page1.compressContentStreams() 
    887  
    888     output.addPage(page1) 
    889     output.write(file("test\\merge-test.pdf", "wb")) 
    890  
    891  
     896#if __name__ == "__main__": 
     897#    output = PdfFileWriter() 
     898
     899#    input1 = PdfFileReader(file("test\\5000-s1-05e.pdf", "rb")) 
     900#    page1 = input1.getPage(0) 
     901
     902#    input2 = PdfFileReader(file("test\\PDFReference16.pdf", "rb")) 
     903#    page2 = input2.getPage(0) 
     904#    page3 = input2.getPage(1) 
     905#    page1.mergePage(page2) 
     906#    page1.mergePage(page3) 
     907
     908#    input3 = PdfFileReader(file("test\\cc-cc.pdf", "rb")) 
     909#    page1.mergePage(input3.getPage(0)) 
     910
     911#    page1.compressContentStreams() 
     912
     913#    output.addPage(page1) 
     914#    output.write(file("test\\merge-test.pdf", "wb")) 
     915 
     916