Changeset 784

Show
Ignore:
Timestamp:
12/15/06 20:53:19 (2 years ago)
Author:
mfenniak
Message:

add documentation for encryption from PDF ref.

Files:

Legend:

Unmodified
Added
Removed
Modified
Copied
Moved
  • pypdf/trunk/pyPdf/pdf.py

    r783 r784  
    10791079        '\xa9\xfe\x64\x53\x69\x7a' 
    10801080 
     1081# Implementation of algorithm 3.2 of the PDF standard security handler, 
     1082# section 3.5.2 of the PDF 1.6 reference. 
    10811083def _alg32(password, rev, keylen, owner_entry, p_entry, id1_entry, metadata_encrypt=True): 
     1084    # 1. Pad or truncate the password string to exactly 32 bytes.  If the 
     1085    # password string is more than 32 bytes long, use only its first 32 bytes; 
     1086    # if it is less than 32 bytes long, pad it by appending the required number 
     1087    # of additional bytes from the beginning of the padding string 
     1088    # (_encryption_padding). 
     1089    password = (password + _encryption_padding)[:32] 
     1090    # 2. Initialize the MD5 hash function and pass the result of step 1 as 
     1091    # input to this function. 
    10821092    import md5, struct 
    1083     m = md5.new(
    1084     password = (password + _encryption_padding)[:32] 
    1085     m.update(password) 
     1093    m = md5.new(password
     1094    # 3. Pass the value of the encryption dictionary's /O entry to the MD5 hash 
     1095    # function. 
    10861096    m.update(owner_entry) 
     1097    # 4. Treat the value of the /P entry as an unsigned 4-byte integer and pass 
     1098    # these bytes to the MD5 hash function, low-order byte first. 
    10871099    p_entry = struct.pack('<i', p_entry) 
    10881100    m.update(p_entry) 
     1101    # 5. Pass the first element of the file's file identifier array to the MD5 
     1102    # hash function. 
    10891103    m.update(id1_entry) 
     1104    # 6. (Revision 3 or greater) If document metadata is not being encrypted, 
     1105    # pass 4 bytes with the value 0xFFFFFFFF to the MD5 hash function. 
    10901106    if rev >= 3 and not metadata_encrypt: 
    10911107        m.update("\xff\xff\xff\xff") 
     1108    # 7. Finish the hash. 
    10921109    md5_hash = m.digest() 
     1110    # 8. (Revision 3 or greater) Do the following 50 times: Take the output 
     1111    # from the previous MD5 hash and pass the first n bytes of the output as 
     1112    # input into a new MD5 hash, where n is the number of bytes of the 
     1113    # encryption key as defined by the value of the encryption dictionary's 
     1114    # /Length entry. 
    10931115    if rev >= 3: 
    10941116        for i in range(50): 
    10951117            md5_hash = md5.new(md5_hash[:keylen]).digest() 
     1118    # 9. Set the encryption key to the first n bytes of the output from the 
     1119    # final MD5 hash, where n is always 5 for revision 2 but, for revision 3 or 
     1120    # greater, depends on the value of the encryption dictionary's /Length 
     1121    # entry. 
    10961122    return md5_hash[:keylen] 
    10971123 
     1124# Implementation of algorithm 3.3 of the PDF standard security handler, 
     1125# section 3.5.2 of the PDF 1.6 reference. 
    10981126def _alg33(owner_pwd, user_pwd, rev, keylen): 
     1127    # steps 1 - 4 
    10991128    key = _alg33_1(owner_pwd, rev, keylen) 
     1129    # 5. Pad or truncate the user password string as described in step 1 of 
     1130    # algorithm 3.2. 
    11001131    user_pwd = (user_pwd + _encryption_padding)[:32] 
     1132    # 6. Encrypt the result of step 5, using an RC4 encryption function with 
     1133    # the encryption key obtained in step 4. 
    11011134    val = utils.RC4_encrypt(key, user_pwd) 
     1135    # 7. (Revision 3 or greater) Do the following 19 times: Take the output 
     1136    # from the previous invocation of the RC4 function and pass it as input to 
     1137    # a new invocation of the function; use an encryption key generated by 
     1138    # taking each byte of the encryption key obtained in step 4 and performing 
     1139    # an XOR operation between that byte and the single-byte value of the 
     1140    # iteration counter (from 1 to 19). 
    11021141    if rev >= 3: 
    11031142        for i in range(1, 20): 
     
    11061145                new_key += chr(ord(key[l]) ^ i) 
    11071146            val = utils.RC4_encrypt(new_key, val) 
     1147    # 8. Store the output from the final invocation of the RC4 as the value of 
     1148    # the /O entry in the encryption dictionary. 
    11081149    return val 
    11091150 
     1151# Steps 1-4 of algorithm 3.3 
    11101152def _alg33_1(password, rev, keylen): 
     1153    # 1. Pad or truncate the owner password string as described in step 1 of 
     1154    # algorithm 3.2.  If there is no owner password, use the user password 
     1155    # instead. 
     1156    password = (password + _encryption_padding)[:32] 
     1157    # 2. Initialize the MD5 hash function and pass the result of step 1 as 
     1158    # input to this function. 
    11111159    import md5 
    1112     m = md5.new(
    1113     password = (password + _encryption_padding)[:32] 
    1114     m.update(password) 
     1160    m = md5.new(password
     1161    # 3. (Revision 3 or greater) Do the following 50 times: Take the output 
     1162    # from the previous MD5 hash and pass it as input into a new MD5 hash. 
    11151163    md5_hash = m.digest() 
    11161164    if rev >= 3: 
    11171165        for i in range(50): 
    11181166            md5_hash = md5.new(md5_hash).digest() 
     1167    # 4. Create an RC4 encryption key using the first n bytes of the output 
     1168    # from the final MD5 hash, where n is always 5 for revision 2 but, for 
     1169    # revision 3 or greater, depends on the value of the encryption 
     1170    # dictionary's /Length entry. 
    11191171    key = md5_hash[:keylen] 
    11201172    return key 
    11211173 
     1174# Implementation of algorithm 3.4 of the PDF standard security handler, 
     1175# section 3.5.2 of the PDF 1.6 reference. 
    11221176def _alg34(password, owner_entry, p_entry, id1_entry): 
     1177    # 1. Create an encryption key based on the user password string, as 
     1178    # described in algorithm 3.2. 
    11231179    key = _alg32(password, 2, 5, owner_entry, p_entry, id1_entry) 
     1180    # 2. Encrypt the 32-byte padding string shown in step 1 of algorithm 3.2, 
     1181    # using an RC4 encryption function with the encryption key from the 
     1182    # preceding step. 
    11241183    U = utils.RC4_encrypt(key, _encryption_padding) 
     1184    # 3. Store the result of step 2 as the value of the /U entry in the 
     1185    # encryption dictionary. 
    11251186    return U, key 
    11261187 
     1188# Implementation of algorithm 3.4 of the PDF standard security handler, 
     1189# section 3.5.2 of the PDF 1.6 reference. 
    11271190def _alg35(password, rev, keylen, owner_entry, p_entry, id1_entry, metadata_encrypt): 
     1191    # 1. Create an encryption key based on the user password string, as 
     1192    # described in Algorithm 3.2. 
     1193    key = _alg32(password, rev, keylen, owner_entry, p_entry, id1_entry) 
     1194    # 2. Initialize the MD5 hash function and pass the 32-byte padding string 
     1195    # shown in step 1 of Algorithm 3.2 as input to this function.  
    11281196    import md5 
    11291197    m = md5.new() 
    11301198    m.update(_encryption_padding) 
     1199    # 3. Pass the first element of the file's file identifier array (the value 
     1200    # of the ID entry in the document's trailer dictionary; see Table 3.13 on 
     1201    # page 73) to the hash function and finish the hash.  (See implementation 
     1202    # note 25 in Appendix H.)  
    11311203    m.update(id1_entry) 
    11321204    md5_hash = m.digest() 
    1133     key = _alg32(password, rev, keylen, owner_entry, p_entry, id1_entry) 
     1205    # 4. Encrypt the 16-byte result of the hash, using an RC4 encryption 
     1206    # function with the encryption key from step 1.  
    11341207    val = utils.RC4_encrypt(key, md5_hash) 
     1208    # 5. Do the following 19 times: Take the output from the previous 
     1209    # invocation of the RC4 function and pass it as input to a new invocation 
     1210    # of the function; use an encryption key generated by taking each byte of 
     1211    # the original encryption key (obtained in step 2) and performing an XOR 
     1212    # operation between that byte and the single-byte value of the iteration 
     1213    # counter (from 1 to 19).  
    11351214    for i in range(1, 20): 
    11361215        new_key = '' 
     
    11381217            new_key += chr(ord(key[l]) ^ i) 
    11391218        val = utils.RC4_encrypt(new_key, val) 
     1219    # 6. Append 16 bytes of arbitrary padding to the output from the final 
     1220    # invocation of the RC4 function and store the 32-byte result as the value 
     1221    # of the U entry in the encryption dictionary.  
     1222    # (implementator note: I don't know what "arbitrary padding" is supposed to 
     1223    # mean, so I have used null bytes.  This seems to match a few other 
     1224    # people's implementations) 
    11401225    return val + ('\x00' * 16), key 
    11411226