Changeset 784
- Timestamp:
- 12/15/06 20:53:19 (2 years ago)
- Files:
-
- pypdf/trunk/pyPdf/pdf.py (modified) (3 diffs)
Legend:
- Unmodified
- Added
- Removed
- Modified
- Copied
- Moved
pypdf/trunk/pyPdf/pdf.py
r783 r784 1079 1079 '\xa9\xfe\x64\x53\x69\x7a' 1080 1080 1081 # Implementation of algorithm 3.2 of the PDF standard security handler, 1082 # section 3.5.2 of the PDF 1.6 reference. 1081 1083 def _alg32(password, rev, keylen, owner_entry, p_entry, id1_entry, metadata_encrypt=True): 1084 # 1. Pad or truncate the password string to exactly 32 bytes. If the 1085 # password string is more than 32 bytes long, use only its first 32 bytes; 1086 # if it is less than 32 bytes long, pad it by appending the required number 1087 # of additional bytes from the beginning of the padding string 1088 # (_encryption_padding). 1089 password = (password + _encryption_padding)[:32] 1090 # 2. Initialize the MD5 hash function and pass the result of step 1 as 1091 # input to this function. 1082 1092 import md5, struct 1083 m = md5.new( )1084 password = (password + _encryption_padding)[:32]1085 m.update(password)1093 m = md5.new(password) 1094 # 3. Pass the value of the encryption dictionary's /O entry to the MD5 hash 1095 # function. 1086 1096 m.update(owner_entry) 1097 # 4. Treat the value of the /P entry as an unsigned 4-byte integer and pass 1098 # these bytes to the MD5 hash function, low-order byte first. 1087 1099 p_entry = struct.pack('<i', p_entry) 1088 1100 m.update(p_entry) 1101 # 5. Pass the first element of the file's file identifier array to the MD5 1102 # hash function. 1089 1103 m.update(id1_entry) 1104 # 6. (Revision 3 or greater) If document metadata is not being encrypted, 1105 # pass 4 bytes with the value 0xFFFFFFFF to the MD5 hash function. 1090 1106 if rev >= 3 and not metadata_encrypt: 1091 1107 m.update("\xff\xff\xff\xff") 1108 # 7. Finish the hash. 1092 1109 md5_hash = m.digest() 1110 # 8. (Revision 3 or greater) Do the following 50 times: Take the output 1111 # from the previous MD5 hash and pass the first n bytes of the output as 1112 # input into a new MD5 hash, where n is the number of bytes of the 1113 # encryption key as defined by the value of the encryption dictionary's 1114 # /Length entry. 1093 1115 if rev >= 3: 1094 1116 for i in range(50): 1095 1117 md5_hash = md5.new(md5_hash[:keylen]).digest() 1118 # 9. Set the encryption key to the first n bytes of the output from the 1119 # final MD5 hash, where n is always 5 for revision 2 but, for revision 3 or 1120 # greater, depends on the value of the encryption dictionary's /Length 1121 # entry. 1096 1122 return md5_hash[:keylen] 1097 1123 1124 # Implementation of algorithm 3.3 of the PDF standard security handler, 1125 # section 3.5.2 of the PDF 1.6 reference. 1098 1126 def _alg33(owner_pwd, user_pwd, rev, keylen): 1127 # steps 1 - 4 1099 1128 key = _alg33_1(owner_pwd, rev, keylen) 1129 # 5. Pad or truncate the user password string as described in step 1 of 1130 # algorithm 3.2. 1100 1131 user_pwd = (user_pwd + _encryption_padding)[:32] 1132 # 6. Encrypt the result of step 5, using an RC4 encryption function with 1133 # the encryption key obtained in step 4. 1101 1134 val = utils.RC4_encrypt(key, user_pwd) 1135 # 7. (Revision 3 or greater) Do the following 19 times: Take the output 1136 # from the previous invocation of the RC4 function and pass it as input to 1137 # a new invocation of the function; use an encryption key generated by 1138 # taking each byte of the encryption key obtained in step 4 and performing 1139 # an XOR operation between that byte and the single-byte value of the 1140 # iteration counter (from 1 to 19). 1102 1141 if rev >= 3: 1103 1142 for i in range(1, 20): … … 1106 1145 new_key += chr(ord(key[l]) ^ i) 1107 1146 val = utils.RC4_encrypt(new_key, val) 1147 # 8. Store the output from the final invocation of the RC4 as the value of 1148 # the /O entry in the encryption dictionary. 1108 1149 return val 1109 1150 1151 # Steps 1-4 of algorithm 3.3 1110 1152 def _alg33_1(password, rev, keylen): 1153 # 1. Pad or truncate the owner password string as described in step 1 of 1154 # algorithm 3.2. If there is no owner password, use the user password 1155 # instead. 1156 password = (password + _encryption_padding)[:32] 1157 # 2. Initialize the MD5 hash function and pass the result of step 1 as 1158 # input to this function. 1111 1159 import md5 1112 m = md5.new( )1113 password = (password + _encryption_padding)[:32]1114 m.update(password)1160 m = md5.new(password) 1161 # 3. (Revision 3 or greater) Do the following 50 times: Take the output 1162 # from the previous MD5 hash and pass it as input into a new MD5 hash. 1115 1163 md5_hash = m.digest() 1116 1164 if rev >= 3: 1117 1165 for i in range(50): 1118 1166 md5_hash = md5.new(md5_hash).digest() 1167 # 4. Create an RC4 encryption key using the first n bytes of the output 1168 # from the final MD5 hash, where n is always 5 for revision 2 but, for 1169 # revision 3 or greater, depends on the value of the encryption 1170 # dictionary's /Length entry. 1119 1171 key = md5_hash[:keylen] 1120 1172 return key 1121 1173 1174 # Implementation of algorithm 3.4 of the PDF standard security handler, 1175 # section 3.5.2 of the PDF 1.6 reference. 1122 1176 def _alg34(password, owner_entry, p_entry, id1_entry): 1177 # 1. Create an encryption key based on the user password string, as 1178 # described in algorithm 3.2. 1123 1179 key = _alg32(password, 2, 5, owner_entry, p_entry, id1_entry) 1180 # 2. Encrypt the 32-byte padding string shown in step 1 of algorithm 3.2, 1181 # using an RC4 encryption function with the encryption key from the 1182 # preceding step. 1124 1183 U = utils.RC4_encrypt(key, _encryption_padding) 1184 # 3. Store the result of step 2 as the value of the /U entry in the 1185 # encryption dictionary. 1125 1186 return U, key 1126 1187 1188 # Implementation of algorithm 3.4 of the PDF standard security handler, 1189 # section 3.5.2 of the PDF 1.6 reference. 1127 1190 def _alg35(password, rev, keylen, owner_entry, p_entry, id1_entry, metadata_encrypt): 1191 # 1. Create an encryption key based on the user password string, as 1192 # described in Algorithm 3.2. 1193 key = _alg32(password, rev, keylen, owner_entry, p_entry, id1_entry) 1194 # 2. Initialize the MD5 hash function and pass the 32-byte padding string 1195 # shown in step 1 of Algorithm 3.2 as input to this function. 1128 1196 import md5 1129 1197 m = md5.new() 1130 1198 m.update(_encryption_padding) 1199 # 3. Pass the first element of the file's file identifier array (the value 1200 # of the ID entry in the document's trailer dictionary; see Table 3.13 on 1201 # page 73) to the hash function and finish the hash. (See implementation 1202 # note 25 in Appendix H.) 1131 1203 m.update(id1_entry) 1132 1204 md5_hash = m.digest() 1133 key = _alg32(password, rev, keylen, owner_entry, p_entry, id1_entry) 1205 # 4. Encrypt the 16-byte result of the hash, using an RC4 encryption 1206 # function with the encryption key from step 1. 1134 1207 val = utils.RC4_encrypt(key, md5_hash) 1208 # 5. Do the following 19 times: Take the output from the previous 1209 # invocation of the RC4 function and pass it as input to a new invocation 1210 # of the function; use an encryption key generated by taking each byte of 1211 # the original encryption key (obtained in step 2) and performing an XOR 1212 # operation between that byte and the single-byte value of the iteration 1213 # counter (from 1 to 19). 1135 1214 for i in range(1, 20): 1136 1215 new_key = '' … … 1138 1217 new_key += chr(ord(key[l]) ^ i) 1139 1218 val = utils.RC4_encrypt(new_key, val) 1219 # 6. Append 16 bytes of arbitrary padding to the output from the final 1220 # invocation of the RC4 function and store the 32-byte result as the value 1221 # of the U entry in the encryption dictionary. 1222 # (implementator note: I don't know what "arbitrary padding" is supposed to 1223 # mean, so I have used null bytes. This seems to match a few other 1224 # people's implementations) 1140 1225 return val + ('\x00' * 16), key 1141 1226
