Changeset 770

Show
Ignore:
Timestamp:
12/13/06 10:13:06 (2 years ago)
Author:
mfenniak
Message:

Working, but barely tested, document decryption!

Files:

Legend:

Unmodified
Added
Removed
Modified
Copied
Moved
  • pypdf/trunk/pyPdf/pdf.py

    r769 r770  
    263263    # @return Returns a {@link #PageObject PageObject} instance. 
    264264    def getPage(self, pageNumber): 
    265         # ensure that we're not trying to access an encrypted PDF 
    266         assert not self.trailer.has_key("/Encrypt") 
     265        ## ensure that we're not trying to access an encrypted PDF 
     266        #assert not self.trailer.has_key("/Encrypt") 
    267267        if self.flattenedPages == None: 
    268268            self._flatten() 
     
    345345        assert generation == indirectReference.generation 
    346346        retval = readObject(self.stream, self) 
     347 
     348        # if retval is a stream or string, it might be encrypted: 
     349        if isinstance(retval, StringObject) or isinstance(retval, StreamObject): 
     350            # if we don't have the encryption key: 
     351            if self.isEncrypted and not hasattr(self, '_decryption_key'): 
     352                raise Exception, "file has not been decrypted" 
     353            # otherwise, decrypt here... 
     354            import struct, md5 
     355            pack1 = struct.pack("<i", indirectReference.idnum)[:3] 
     356            pack2 = struct.pack("<i", indirectReference.generation)[:2] 
     357            key = self._decryption_key + pack1 + pack2 
     358            assert len(key) == (len(self._decryption_key) + 5) 
     359            md5_hash = md5.new(key).digest() 
     360            key = md5_hash[:min(16, len(self._decryption_key) + 5)] 
     361            if isinstance(retval, StringObject): 
     362                retval = StringObject(utils.RC4_encrypt(key, retval)) 
     363            elif isinstance(retval, StreamObject): 
     364                retval._data = utils.RC4_encrypt(key, retval._data) 
     365 
    347366        self.cacheIndirectObject(generation, idnum, retval) 
    348367        return retval 
     
    540559        key = self._alg32(password, 2, 5) 
    541560        U = utils.RC4_encrypt(key, self._encryption_padding) 
    542         return U 
     561        return U, key 
    543562 
    544563    def _alg33_1(self, password, rev, keylen): 
     
    569588                new_key += chr(ord(key[l]) ^ i) 
    570589            val = utils.RC4_encrypt(new_key, val) 
    571         return val + ('\x00' * 16) 
    572  
    573     ## 
    574     # Decrypt file. 
     590        return val + ('\x00' * 16), key 
     591 
     592    def _authenticateUserPassword(self, password): 
     593        encrypt = self.safeGetObject(self.trailer['/Encrypt']) 
     594        rev = self.safeGetObject(encrypt['/R']) 
     595        if rev == 2: 
     596            U, key = self._alg34(password) 
     597        elif rev >= 3: 
     598            U, key = self._alg35(password, rev, self.safeGetObject(encrypt["/Length"]) / 8, 
     599                    self.safeGetObject(encrypt.get("/EncryptMetadata", False))) 
     600        real_U = self.safeGetObject(encrypt['/U']) 
     601        return U == real_U, key 
     602 
     603    ## 
     604    # When using an encrypted / secured PDF file with the PDF Standard 
     605    # encryption handler, this function will allow the file to be decrypted. 
     606    # It checks the given password against the document's user password and 
     607    # owner password, and then stores the resulting decryption key if either 
     608    # password is correct. 
     609    # <p> 
     610    # It does not matter which password was matched.  Both passwords provide 
     611    # the correct decryption key that will allow the document to be used with 
     612    # this library. 
     613    # 
     614    # @return 0 if the password failed, 1 if the password matched the user 
     615    # password, and 2 if the password matched the owner password. 
     616    # 
     617    # @exception NotImplementedError Document uses an unsupported encryption 
     618    # method. 
    575619    def decrypt(self, password): 
    576         user_password = self._authenticateUserPassword(password) 
     620        encrypt = self.safeGetObject(self.trailer['/Encrypt']) 
     621        if encrypt['/Filter'] != '/Standard': 
     622            raise NotImplementedError, "only Standard PDF encryption handler is available" 
     623        if not (encrypt['/V'] in (1, 2)): 
     624            raise NotImplementedError, "only algorithm code 1 and 2 are supported" 
     625        user_password, key = self._authenticateUserPassword(password) 
    577626        if user_password: 
    578             print "User password accepted" 
     627            self._decryption_key = key 
     628            return 1 
    579629        else: 
    580             encrypt = self.safeGetObject(self.trailer['/Encrypt']) 
    581630            rev = self.safeGetObject(encrypt['/R']) 
    582631            if rev == 2: 
     
    596645                    val = utils.RC4_encrypt(new_key, val) 
    597646                userpass = val 
    598             owner_password = self._authenticateUserPassword(userpass) 
     647            owner_password, key = self._authenticateUserPassword(userpass) 
    599648            if owner_password: 
    600                 print "Owner password accepted" 
    601             else: 
    602                 print "Password auth failed." 
    603  
    604     def _authenticateUserPassword(self, password): 
    605         encrypt = self.safeGetObject(self.trailer['/Encrypt']) 
    606         rev = self.safeGetObject(encrypt['/R']) 
    607         if rev == 2: 
    608             U = self._alg34(password) 
    609         elif rev >= 3: 
    610             U = self._alg35(password, rev, self.safeGetObject(encrypt["/Length"]) / 8, 
    611                     self.safeGetObject(encrypt.get("/EncryptMetadata", False))) 
    612         real_U = self.safeGetObject(encrypt['/U']) 
    613         return U == real_U 
     649                self._decryption_key = key 
     650                return 2 
     651        return 0 
     652 
     653    def getIsEncrypted(self): 
     654        return self.trailer.has_key("/Encrypt") 
     655 
     656    ## 
     657    # Read-only boolean property showing whether this PDF file is encrypted. 
     658    # Note that this property, if true, will remain true even after the {@link 
     659    # #PdfFileReader.decrypt decrypt} function is called. 
     660    isEncrypted = property(lambda self: self.getIsEncrypted(), None, None) 
    614661 
    615662