| 417 | | def readObject(stream, pdf): |
|---|
| 418 | | tok = stream.read(1) |
|---|
| 419 | | stream.seek(-1, 1) # reset to start |
|---|
| 420 | | if tok == 't' or tok == 'f': |
|---|
| 421 | | # boolean object |
|---|
| 422 | | return BooleanObject.readFromStream(stream) |
|---|
| 423 | | elif tok == '(': |
|---|
| 424 | | # string object |
|---|
| 425 | | return StringObject.readFromStream(stream) |
|---|
| 426 | | elif tok == '/': |
|---|
| 427 | | # name object |
|---|
| 428 | | return NameObject.readFromStream(stream) |
|---|
| 429 | | elif tok == '[': |
|---|
| 430 | | # array object |
|---|
| 431 | | return ArrayObject.readFromStream(stream, pdf) |
|---|
| 432 | | elif tok == 'n': |
|---|
| 433 | | # null object |
|---|
| 434 | | return NullObject.readFromStream(stream) |
|---|
| 435 | | elif tok == '<': |
|---|
| 436 | | # hexadecimal string OR dictionary |
|---|
| 437 | | peek = stream.read(2) |
|---|
| 438 | | stream.seek(-2, 1) # reset to start |
|---|
| 439 | | if peek == '<<': |
|---|
| 440 | | return DictionaryObject.readFromStream(stream, pdf) |
|---|
| 441 | | else: |
|---|
| 442 | | return StringObject.readHexStringFromStream(stream) |
|---|
| 443 | | else: |
|---|
| 444 | | # number object OR indirect reference |
|---|
| 445 | | if tok == '+' or tok == '-': |
|---|
| 446 | | # number |
|---|
| 447 | | return NumberObject.readFromStream(stream) |
|---|
| 448 | | peek = stream.read(20) |
|---|
| 449 | | stream.seek(-len(peek), 1) # reset to start |
|---|
| 450 | | if re.match(r"(\d+)\s(\d+)\sR", peek) != None: |
|---|
| 451 | | return IndirectObject.readFromStream(stream, pdf) |
|---|
| 452 | | else: |
|---|
| 453 | | return NumberObject.readFromStream(stream) |
|---|
| 454 | | |
|---|
| 455 | | |
|---|
| 456 | | class BooleanObject(object): |
|---|
| 457 | | def __init__(self, value): |
|---|
| 458 | | self.value = value |
|---|
| 459 | | |
|---|
| 460 | | def writeToStream(self, stream): |
|---|
| 461 | | if self.value: |
|---|
| 462 | | stream.write("true") |
|---|
| 463 | | else: |
|---|
| 464 | | stream.write("false") |
|---|
| 465 | | |
|---|
| 466 | | def readFromStream(stream): |
|---|
| 467 | | word = stream.read(4) |
|---|
| 468 | | if word == "true": |
|---|
| 469 | | return BooleanObject(True) |
|---|
| 470 | | elif word == "fals": |
|---|
| 471 | | stream.read(1) |
|---|
| 472 | | return BooleanObject(False) |
|---|
| 473 | | assert False |
|---|
| 474 | | readFromStream = staticmethod(readFromStream) |
|---|
| 475 | | |
|---|
| 476 | | |
|---|
| 477 | | class ArrayObject(list): |
|---|
| 478 | | def writeToStream(self, stream): |
|---|
| 479 | | stream.write("[") |
|---|
| 480 | | for data in self: |
|---|
| 481 | | stream.write(" ") |
|---|
| 482 | | data.writeToStream(stream) |
|---|
| 483 | | stream.write(" ]") |
|---|
| 484 | | |
|---|
| 485 | | def readFromStream(stream, pdf): |
|---|
| 486 | | arr = ArrayObject() |
|---|
| 487 | | assert stream.read(1) == "[" |
|---|
| 488 | | while True: |
|---|
| 489 | | # skip leading whitespace |
|---|
| 490 | | tok = stream.read(1) |
|---|
| 491 | | while tok.isspace(): |
|---|
| 492 | | tok = stream.read(1) |
|---|
| 493 | | stream.seek(-1, 1) |
|---|
| 494 | | # check for array ending |
|---|
| 495 | | peekahead = stream.read(1) |
|---|
| 496 | | if peekahead == "]": |
|---|
| 497 | | break |
|---|
| 498 | | stream.seek(-1, 1) |
|---|
| 499 | | # read and append obj |
|---|
| 500 | | arr.append(readObject(stream, pdf)) |
|---|
| 501 | | return arr |
|---|
| 502 | | readFromStream = staticmethod(readFromStream) |
|---|
| 503 | | |
|---|
| 504 | | |
|---|
| 505 | | class IndirectObject(object): |
|---|
| 506 | | def __init__(self, idnum, generation, pdf): |
|---|
| 507 | | self.idnum = idnum |
|---|
| 508 | | self.generation = generation |
|---|
| 509 | | self.pdf = pdf |
|---|
| 510 | | |
|---|
| 511 | | def __repr__(self): |
|---|
| 512 | | return "IndirectObject(%r, %r)" % (self.idnum, self.generation) |
|---|
| 513 | | |
|---|
| 514 | | def writeToStream(self, stream): |
|---|
| 515 | | stream.write("%s %s R" % (self.idnum, self.generation)) |
|---|
| 516 | | |
|---|
| 517 | | def readFromStream(stream, pdf): |
|---|
| 518 | | idnum = "" |
|---|
| 519 | | while True: |
|---|
| 520 | | tok = stream.read(1) |
|---|
| 521 | | if tok.isspace(): |
|---|
| 522 | | break |
|---|
| 523 | | idnum += tok |
|---|
| 524 | | generation = "" |
|---|
| 525 | | while True: |
|---|
| 526 | | tok = stream.read(1) |
|---|
| 527 | | if tok.isspace(): |
|---|
| 528 | | break |
|---|
| 529 | | generation += tok |
|---|
| 530 | | r = stream.read(1) |
|---|
| 531 | | #if r != "R": |
|---|
| 532 | | # stream.seek(-20, 1) |
|---|
| 533 | | # print idnum, generation |
|---|
| 534 | | # print repr(stream.read(40)) |
|---|
| 535 | | assert r == "R" |
|---|
| 536 | | return IndirectObject(int(idnum), int(generation), pdf) |
|---|
| 537 | | readFromStream = staticmethod(readFromStream) |
|---|
| 538 | | |
|---|
| 539 | | |
|---|
| 540 | | class FloatObject(float): |
|---|
| 541 | | def writeToStream(self, stream): |
|---|
| 542 | | stream.write(repr(self)) |
|---|
| 543 | | |
|---|
| 544 | | |
|---|
| 545 | | class NumberObject(int): |
|---|
| 546 | | def __init__(self, value): |
|---|
| 547 | | int.__init__(self, value) |
|---|
| 548 | | |
|---|
| 549 | | def writeToStream(self, stream): |
|---|
| 550 | | stream.write(repr(self)) |
|---|
| 551 | | |
|---|
| 552 | | def readFromStream(stream): |
|---|
| 553 | | name = "" |
|---|
| 554 | | while True: |
|---|
| 555 | | tok = stream.read(1) |
|---|
| 556 | | if tok != '+' and tok != '-' and tok != '.' and not tok.isdigit(): |
|---|
| 557 | | stream.seek(-1, 1) |
|---|
| 558 | | break |
|---|
| 559 | | name += tok |
|---|
| 560 | | if name.find(".") != -1: |
|---|
| 561 | | return FloatObject(name) |
|---|
| 562 | | else: |
|---|
| 563 | | return NumberObject(name) |
|---|
| 564 | | readFromStream = staticmethod(readFromStream) |
|---|
| 565 | | |
|---|
| 566 | | |
|---|
| 567 | | class StringObject(str): |
|---|
| 568 | | def writeToStream(self, stream): |
|---|
| 569 | | stream.write("(") |
|---|
| 570 | | for c in self: |
|---|
| 571 | | if not c.isalnum() and not c.isspace(): |
|---|
| 572 | | stream.write("\\%03o" % ord(c)) |
|---|
| 573 | | else: |
|---|
| 574 | | stream.write(c) |
|---|
| 575 | | stream.write(")") |
|---|
| 576 | | |
|---|
| 577 | | def readHexStringFromStream(stream): |
|---|
| 578 | | stream.read(1) |
|---|
| 579 | | txt = "" |
|---|
| 580 | | x = "" |
|---|
| 581 | | while True: |
|---|
| 582 | | tok = readNonWhitespace(stream) |
|---|
| 583 | | if tok == ">": |
|---|
| 584 | | break |
|---|
| 585 | | x += tok |
|---|
| 586 | | if len(x) == 2: |
|---|
| 587 | | txt += chr(int(x, base=16)) |
|---|
| 588 | | x = "" |
|---|
| 589 | | if len(x) == 1: |
|---|
| 590 | | x += "0" |
|---|
| 591 | | if len(x) == 2: |
|---|
| 592 | | txt += chr(int(x, base=16)) |
|---|
| 593 | | return StringObject(txt) |
|---|
| 594 | | readHexStringFromStream = staticmethod(readHexStringFromStream) |
|---|
| 595 | | |
|---|
| 596 | | def readFromStream(stream): |
|---|
| 597 | | tok = stream.read(1) |
|---|
| 598 | | parens = 1 |
|---|
| 599 | | txt = "" |
|---|
| 600 | | while True: |
|---|
| 601 | | tok = stream.read(1) |
|---|
| 602 | | if tok == "(": |
|---|
| 603 | | parens += 1 |
|---|
| 604 | | elif tok == ")": |
|---|
| 605 | | parens -= 1 |
|---|
| 606 | | if parens == 0: |
|---|
| 607 | | break |
|---|
| 608 | | elif tok == "\\": |
|---|
| 609 | | tok = stream.read(1) |
|---|
| 610 | | if tok == "n": |
|---|
| 611 | | tok = "\n" |
|---|
| 612 | | elif tok == "r": |
|---|
| 613 | | tok = "\r" |
|---|
| 614 | | elif tok == "t": |
|---|
| 615 | | tok = "\t" |
|---|
| 616 | | elif tok == "b": |
|---|
| 617 | | tok == "\b" |
|---|
| 618 | | elif tok == "f": |
|---|
| 619 | | tok = "\f" |
|---|
| 620 | | elif tok == "(": |
|---|
| 621 | | tok = "(" |
|---|
| 622 | | elif tok == ")": |
|---|
| 623 | | tok = ")" |
|---|
| 624 | | elif tok == "\\": |
|---|
| 625 | | tok = "\\" |
|---|
| 626 | | elif tok.isdigit(): |
|---|
| 627 | | tok += stream.read(2) |
|---|
| 628 | | tok = chr(int(tok, base=8)) |
|---|
| 629 | | txt += tok |
|---|
| 630 | | return StringObject(txt) |
|---|
| 631 | | readFromStream = staticmethod(readFromStream) |
|---|
| 632 | | |
|---|
| 633 | | |
|---|
| 634 | | class NameObject(str): |
|---|
| 635 | | delimiterCharacters = "(", ")", "<", ">", "[", "]", "{", "}", "/", "%" |
|---|
| 636 | | |
|---|
| 637 | | def __init__(self, data): |
|---|
| 638 | | str.__init__(self, data) |
|---|
| 639 | | |
|---|
| 640 | | def writeToStream(self, stream): |
|---|
| 641 | | stream.write(self) |
|---|
| 642 | | |
|---|
| 643 | | def readFromStream(stream): |
|---|
| 644 | | name = stream.read(1) |
|---|
| 645 | | assert name == "/" |
|---|
| 646 | | while True: |
|---|
| 647 | | tok = stream.read(1) |
|---|
| 648 | | if tok.isspace() or tok in NameObject.delimiterCharacters: |
|---|
| 649 | | stream.seek(-1, 1) |
|---|
| 650 | | break |
|---|
| 651 | | name += tok |
|---|
| 652 | | return NameObject(name) |
|---|
| 653 | | readFromStream = staticmethod(readFromStream) |
|---|
| 654 | | |
|---|
| 655 | | |
|---|
| 656 | | class DictionaryObject(dict): |
|---|
| 657 | | def __init__(self): |
|---|
| 658 | | pass |
|---|
| 659 | | |
|---|
| 660 | | def writeToStream(self, stream): |
|---|
| 661 | | stream.write("<<\n") |
|---|
| 662 | | for key, value in self.items(): |
|---|
| 663 | | if key != "__streamdata__": |
|---|
| 664 | | key.writeToStream(stream) |
|---|
| 665 | | stream.write(" ") |
|---|
| 666 | | value.writeToStream(stream) |
|---|
| 667 | | stream.write("\n") |
|---|
| 668 | | stream.write(">>") |
|---|
| 669 | | if self.has_key("__streamdata__"): |
|---|
| 670 | | stream.write("\nstream\n") |
|---|
| 671 | | stream.write(self["__streamdata__"]) |
|---|
| 672 | | stream.write("\nendstream") |
|---|
| 673 | | |
|---|
| 674 | | def readFromStream(stream, pdf): |
|---|
| 675 | | assert stream.read(2) == "<<" |
|---|
| 676 | | retval = DictionaryObject() |
|---|
| 677 | | while True: |
|---|
| 678 | | tok = readNonWhitespace(stream) |
|---|
| 679 | | if tok == ">": |
|---|
| 680 | | stream.read(1) |
|---|
| 681 | | break |
|---|
| 682 | | stream.seek(-1, 1) |
|---|
| 683 | | key = readObject(stream, pdf) |
|---|
| 684 | | tok = readNonWhitespace(stream) |
|---|
| 685 | | stream.seek(-1, 1) |
|---|
| 686 | | value = readObject(stream, pdf) |
|---|
| 687 | | if retval.has_key(key): |
|---|
| 688 | | # multiple definitions of key not handled yet |
|---|
| 689 | | assert False |
|---|
| 690 | | retval[key] = value |
|---|
| 691 | | pos = stream.tell() |
|---|
| 692 | | s = readNonWhitespace(stream) |
|---|
| 693 | | if s == 's' and stream.read(5) == 'tream': |
|---|
| 694 | | eol = stream.read(1) |
|---|
| 695 | | assert eol in ("\n", "\r") |
|---|
| 696 | | if eol == "\r": |
|---|
| 697 | | # read \n after |
|---|
| 698 | | stream.read(1) |
|---|
| 699 | | # this is a stream object, not a dictionary |
|---|
| 700 | | assert retval.has_key("/Length") |
|---|
| 701 | | length = retval["/Length"] |
|---|
| 702 | | if isinstance(length, IndirectObject): |
|---|
| 703 | | t = stream.tell() |
|---|
| 704 | | length = pdf.getObject(length) |
|---|
| 705 | | stream.seek(t, 0) |
|---|
| 706 | | retval["__streamdata__"] = stream.read(length) |
|---|
| 707 | | e = readNonWhitespace(stream) |
|---|
| 708 | | ndstream = stream.read(8) |
|---|
| 709 | | assert e == "e" and ndstream == "ndstream" |
|---|
| 710 | | else: |
|---|
| 711 | | stream.seek(pos, 0) |
|---|
| 712 | | return retval |
|---|
| 713 | | readFromStream = staticmethod(readFromStream) |
|---|
| 714 | | |
|---|
| 715 | | |
|---|