// Number PDF bookmarks Version 1.0 // by Rob Elliott (Rob_Elliott@compuserve.com) // Copyright 2001 Compaq Computer Corporation. All rights reserved. // Also developed by Nathan Dansfiell and Joe Foster // // Released as freeware. // // Adds section numbers to PDF files generated by Microsoft Word. // // Revision history: // November 2001 - 1.0beta - first release // 22 April 2002 - 1.0 // removes debug output messages // forces all bookmarks closed // supports file names with spaces // detects Unicode strings and prints an error message // #include "stdafx.h" #include #include #include #include #define BUFF_LEN 256 // usually used for input line buffer mallocs //this is the object with all the variables from pdf format included. //it matches the id numbers found in pdf file with a corresponding //struct pointer to ref it later. struct pdf_node { int id; // PDF object ID int first_child_id; // PDF child object ID int next_peer_id; // PDF next peer object ID int last_id; // PDF last child object ID int parent_id; // PDF parent object ID int prev_id; // PDF previous peer object ID int count; // PDF children count struct pdf_node *next_peer; struct pdf_node *first_child; struct pdf_node *last; struct pdf_node *parent; struct pdf_node *prev; struct pdf_node *next_pdf_object; // linked list through all the objects char *title; // PDF bookmark title char *dest; }; // struct pdf_node int skip_initial_bookmarks = 0; // how many top level bookmarks to leave unnumbered at the start int initial_bookmarks = 0; // count how many bookmarks have been parsed int start_annex_numbering = 9999; // how many top level decimals to use before switching to annex mode int first_bookmark_id = 0; //this is the id number found in outlines in the begining int first_bookmark_generation = 0; char *bookmark_buffer = 0; // holds new bookmarks char *bookmark_output_ptr = 0; // current output location in bookmark buffer int bookmark_size; // size of new bookmark section char *xref_begin[9]; // HACKHACK array of pointers to xref sections char *xref_end[9]; int number_xref_sections = 0; // offset into the bookmark section of each bookmark // this is inserted by sprintf_pdf_index // and is used to update the xref tables in the PDF file int bookmark_offsets[9999]; // HACKHACK // several variables that have to be updated when the file changes int linearized_l = 0; // total file length int linearized_t = 0; // offset of 1st entry in xref table int linearized_e = 0; // offset to end of 1st page int trailer_prev[9]; // offset of the previous "xref" in linearized files int startxref_number[9]; // offset of an xref table char *linearized_l_offset = 0; // pointer to location of each variable char *linearized_t_offset = 0; char *linearized_e_offset = 0; char *trailer_prev_offset[9]; char *startxref_number_offset[9]; // function prototypes char *SkipToNull (char *); void print_pdf_node(struct pdf_node *); struct pdf_node *find_id (struct pdf_node *); void index_builder (struct pdf_node *, char *); void sprintf_pdf_root_node (struct pdf_node *); void sprintf_pdf_node (struct pdf_node *); // This function assumes that any charctor array pointed to // by "buffer" is terminated with a \0, other wise we could // go off into space... char *SkipToNull (char *buffer) { while (buffer && *buffer++) ; return buffer; } // SkipToNull // print single bookmark tree entry void print_pdf_node (struct pdf_node *ptr) { if (ptr != NULL) { printf ("\n"); printf ("ptr=%x\n", ptr); printf ("id=%d\n", ptr->id); printf ("first_child_id=%d\n", ptr->first_child_id); printf ("next_peer_id=%d\n", ptr->next_peer_id); printf ("last_id=%d\n", ptr->last_id); printf ("parent_id=%d\n", ptr->parent_id); printf ("prev_id=%d\n", ptr->prev_id); printf ("count=%d\n", ptr->count); printf ("next peer=%x\n", ptr->next_peer); printf ("first_child=%x\n", ptr->first_child); printf ("last=%x\n", ptr->last); printf ("parent=%x\n", ptr->parent); printf ("prev=%x\n", ptr->prev); printf ("title=%x=>%s<\n", ptr->title, ptr->title); printf ("next pdf object=%x\n", ptr->next_pdf_object); printf ("\n"); } } // print_pdf_node // return if the pdf_node tree is finished or not // call from the object being parsed int tree_finished (struct pdf_node *ptr, int current_id) { if (ptr) { //printf ("tree_finished %d %d\n", ptr->id, current_id); if ((ptr->first_child_id > current_id) || (ptr->next_peer_id > current_id)) return 0; else return tree_finished (ptr->next_pdf_object, current_id); } else return 1; } // print entire bookmark tree void print_pdf_nodes (struct pdf_node *ptr) { while (ptr) { print_pdf_node (ptr); ptr = ptr->next_pdf_object; } } // print_pdf_nodes //parsing of the data struct to include the index numbers now void index_builder (struct pdf_node *ptr, char *prefix) { char *src; char *desty; char *strtmp; int newprefix = 0; char *newstring; char *newprefixstring; char *dotinoldstring; if (!ptr || !prefix) { printf ("index builder called with null pointer! ptr=%d prefix=%s\n", ptr, prefix); exit (1); } newstring = (char *) malloc(BUFF_LEN); newprefixstring = (char *) malloc(BUFF_LEN); strtmp = (char *) malloc(BUFF_LEN); dotinoldstring = (char *) malloc(BUFF_LEN); newstring[0] = NULL; newprefixstring[0] = NULL; strtmp[0] = NULL; dotinoldstring[0] = NULL; //printf ("index_builder ptr id=%d, prefix=:%s:\n", ptr->id, prefix); if ((initial_bookmarks >= skip_initial_bookmarks) && (*prefix != 'Z')) { // FIXFIX unicode more complicated than this // the SkipToNul routine doesn't work since Unicode adds lots of nulls in the string // also need to call wsprintf to print the wide string to the output file // Unicode strings in PDF start with 254 then 255, then UTF-16 data // UTF-16 data usually has NULL, ASCII format if (((unsigned char) ptr->title[0] == 0xFE) && ((unsigned char) ptr->title[1] == 0xFF)) { printf ("WARNING: Unicode string found in bookmark - string will be lost\n"); swprintf ((wchar_t *) strtmp, L"%s %s", prefix, ptr->title); wcscpy ((wchar_t *) ptr->title, (wchar_t *) strtmp); } else { sprintf (strtmp, "%s %s", prefix, ptr->title); strcpy (ptr->title, strtmp); } } //printf("Title is %s\n", ptr->title); initial_bookmarks++; // recurse through children first // add .1 to the prefix if (ptr->first_child) { //printf ("index_builder id=%d first_child prefix=:%s:\n", ptr->id, prefix); sprintf (strtmp, "%s.1", prefix); //gives it a new .1 starting place each time it finds a new child node index_builder (ptr->first_child, strtmp); } //else //printf ("No more children of this node\n"); // recurse through peers next // increment the last digit of the prefix if (ptr->next_peer) { //printf("index_builder next_peer id=%d prefix=:%s:\n", ptr->id, prefix); newstring[0] = NULL; dotinoldstring = strrchr (prefix, '.'); // last dot if (dotinoldstring) { newprefix = atoi (dotinoldstring + 1) + 1; //finds and adds to the number behind the found "." src = prefix; desty = newstring; while (src <= dotinoldstring) { *desty++ = *src++; } *(desty)= NULL; sprintf (newprefixstring, "%d", newprefix); strcat (newstring, newprefixstring); index_builder (ptr->next_peer, newstring); } else { if (initial_bookmarks > skip_initial_bookmarks) { if (atoi (prefix) == start_annex_numbering) { strtmp[0] = 'A'; strtmp[1] = NULL; } else if (isalpha (*prefix)) { //printf ("isalpha true %s\n", prefix); if (*prefix == 'Z') { // in case there are too many annexes strtmp[0] = 'Z'; // Z means stop numbering strtmp[1] = NULL; } else strtmp[0] = *prefix + 1; // +1 works for letters too strtmp[1] = NULL; } else { sprintf (strtmp, "%d", atoi(prefix) + 1); } index_builder (ptr->next_peer, strtmp); } else { index_builder (ptr->next_peer, prefix); } } } //else // printf ("No more peers of this node\n"); } // index_builder // this prints the new output straight to a buffer void sprintf_pdf_root_node (struct pdf_node *ptr) { if (!ptr) { printf ("scan_pdf_root_node called with null pointer: ptr=%d\n", ptr); return; } if (ptr != NULL) { int i; for (i = 0; i < 10000; i++) bookmark_offsets[i] = -1; bookmark_offsets[ptr->id] = bookmark_output_ptr - bookmark_buffer; //printf ("Bookmark_offsets[%d] = %d\n", ptr->id, bookmark_offsets[ptr->id]); bookmark_output_ptr += sprintf (bookmark_output_ptr, "%d 0 obj\r", ptr->id); bookmark_output_ptr += sprintf (bookmark_output_ptr, "<< \r"); if (ptr->count != NULL) bookmark_output_ptr += sprintf (bookmark_output_ptr, "/Count %d \r", ptr->count); if (ptr->first_child_id != NULL) bookmark_output_ptr += sprintf (bookmark_output_ptr, "/First %d 0 R \r", ptr->first_child_id); if (ptr->last_id != NULL) bookmark_output_ptr += sprintf (bookmark_output_ptr, "/Last %d 0 R \r", ptr->last_id); bookmark_output_ptr += sprintf (bookmark_output_ptr, ">> \r"); bookmark_output_ptr += sprintf (bookmark_output_ptr, "endobj\r"); sprintf_pdf_node (ptr->next_pdf_object); } } // sprintf_pdf_root_node // this prints the new output straight to a buffer void sprintf_pdf_node (struct pdf_node *ptr) { if (!ptr) { printf ("scan_pdf_node called with null pointer: ptr=%d\n", ptr); return; } while (ptr != NULL) { bookmark_offsets[ptr->id] = bookmark_output_ptr - bookmark_buffer; //printf ("bookmark_offsets[%d] = %d\n", ptr->id, bookmark_offsets[ptr->id]); bookmark_output_ptr += sprintf (bookmark_output_ptr, "%d 0 obj\r", ptr->id); bookmark_output_ptr += sprintf (bookmark_output_ptr, "<< \r"); // FIXFIX bookmark_output_ptr += sprintf (bookmark_output_ptr, "/Title (%.9972s", ptr->title); // wrapping shouldn't be necessary, but Acrobat Distiller seems to do it if (strlen (ptr->title) > 9971) { // FIXFIX took out cannot wrap in between \ and a special character bookmark_output_ptr += sprintf (bookmark_output_ptr, "\\\r%s)\r", ptr->title + 72); } else { bookmark_output_ptr += sprintf (bookmark_output_ptr, ")\r"); } //fprintf (bookmark_output_ptr, "/Title (%s)\r", ptr->title); if (ptr->dest != NULL) bookmark_output_ptr += sprintf (bookmark_output_ptr, "/Dest %s\r", ptr->dest); // saved string includes trailing space if (ptr->parent_id != NULL) bookmark_output_ptr += sprintf (bookmark_output_ptr, "/Parent %d 0 R \r", ptr->parent_id); if (ptr->prev_id != NULL) bookmark_output_ptr += sprintf (bookmark_output_ptr, "/Prev %d 0 R \r", ptr->prev_id); if (ptr->next_peer_id != NULL) bookmark_output_ptr += sprintf (bookmark_output_ptr, "/Next %d 0 R \r", ptr->next_peer_id); if (ptr->first_child_id != NULL) bookmark_output_ptr += sprintf (bookmark_output_ptr, "/First %d 0 R \r", ptr->first_child_id); if (ptr->last_id != NULL) bookmark_output_ptr += sprintf (bookmark_output_ptr, "/Last %d 0 R \r", ptr->last_id); if (ptr->count != NULL) bookmark_output_ptr += sprintf (bookmark_output_ptr, "/Count %d \r", ptr->count); bookmark_output_ptr += sprintf (bookmark_output_ptr, ">> \r"); bookmark_output_ptr += sprintf (bookmark_output_ptr, "endobj\r"); ptr = ptr->next_pdf_object; } } // sprintf_pdf_node // ============ // Main program // ============ int main(int argc, char *argv[]) { long input_pdf_length = 0; long read_length = 0; FILE *input_pdf; int count = 0; int parent_id = 0; int id = 0; int exit_flag = 0; int id2= 0; int first_child_id = 0; // these are the ids found in each id#s obj int next_peer_id = 0; int prev_id = 0; int last_id = 0; int bookmark_count = 0; int bookmark_size_increase = 0; FILE *output_stuff; //the output file which allows use of fprintf in this format char *input_pdf_buffer; // original input PDF file char *input_pdf_buffer_end; char *nulled_pdf_buffer; // input PDF file with \r changed to \0 for easy string parsing char *nulled_pdf_buffer_end; char *begin_bookmarks; // pointer in nulled_pdf_buffer to the start of bookmarks char *after_bookmarks; // poitner in nulled_pdf_buffer to the byte after the bookmarks char *buffer; //current position of the pointer in the file char *buffer_image; //creates a copy of the pointer in the file char *close_paren; // for Title parsing char *title = (char *) malloc(BUFF_LEN); char *input_pdf_name = (char *) malloc(BUFF_LEN); char *output_pdf_name = (char *) malloc(BUFF_LEN); char *number = (char *) malloc(BUFF_LEN); char *dest = (char *) malloc(BUFF_LEN); //strcpy(dest, ""); struct pdf_node *UpdatingObject; // Temp. pointer to pdf_object that we are updating struct pdf_node *CheckingObject; // Temp. pointer to pdf_object that we are checking struct pdf_node *pdf_objects; // first pdf object struct pdf_node *pdf_object; // current object being filled in int parsing_xref = 0; // parsing an xref section int parsing_bookmarks = 0; // parsing bookmark objects int parsing_linearized = 0; // parsing /Linearized object int parsing_xref_trailer = 0; // parsing an xref table trailer int parsing_startxref_number = 0; // parsing startxref number after a trailer char tmp[256]; // temporary output lines char close_bookmarks_str[256]; // input question int close_bookmarks; // make all bookmarks closed printf ("Number PDF bookmarks -- Version 1.0\n"); printf ("by Rob Elliott (Rob_Elliott@compuserve.com)\n"); printf ("With contributions from Nathan Dansfiell and Joe Foster\n"); printf ("25 April 2002\n"); printf ("Released as freeware.\n\n"); printf("Name of .pdf file: "); //this section opens and checks to see if file is there and not corrupted if (scanf("%[^\n]", input_pdf_name) < 1) { printf ("File name too short\n"); return (0); } strcpy (output_pdf_name,input_pdf_name); if (strstr (output_pdf_name, ".pdf")) { strcpy (strstr (output_pdf_name, ".pdf"), ".num.pdf"); } else { strcat (output_pdf_name, ".num.pdf"); } printf ("Output file name is %s\n", output_pdf_name); if (!(input_pdf = fopen (input_pdf_name, "rb"))) { printf ("Error opening file %s errno=%d", input_pdf_name, errno); return -1; } if (fseek (input_pdf, 0, SEEK_END)) { printf ("Error seeking to end of file\n"); return -1; } input_pdf_length = ftell (input_pdf); if (fseek (input_pdf, 0, SEEK_SET)) { printf ("Error seeking to start of file\n"); return -1; } if (!(input_pdf_buffer = (char *) malloc (input_pdf_length + 1))) { printf ("Error allocation input_pdf_buffer\n"); exit (1); } input_pdf_buffer_end = input_pdf_buffer + (long) (input_pdf_length + 1); read_length = fread (input_pdf_buffer, 1, input_pdf_length, input_pdf); if (read_length != input_pdf_length) { printf ("Error reading entire PDF file: read %d wanted %d", read_length, input_pdf_length); return -1; } if (fclose (input_pdf)) { printf ("Error closing input PDF file\n"); return -1; } if (!(nulled_pdf_buffer = (char *) malloc (input_pdf_length + 1))) { printf ("Error allocation nulled_pdf_buffer\n"); exit (1); } nulled_pdf_buffer_end = nulled_pdf_buffer + (long) (input_pdf_length + 1); // Replace \r with \0 so we can safely use string functions buffer = nulled_pdf_buffer; buffer_image = input_pdf_buffer; while (buffer_image < input_pdf_buffer_end) { if (*buffer_image == '\r') { *buffer++ = '\0'; buffer_image++; } else { *buffer++ = *buffer_image++; } } *buffer = '\0'; // Terminate buffer with end of string. // Note that this is one character after the last // character from the file read. // Point back to the beginning of the buffers buffer = nulled_pdf_buffer; buffer_image = input_pdf_buffer; pdf_objects = NULL; printf("\nClose all bookmarks (Y/n)? "); scanf("%s", &close_bookmarks_str); if ((close_bookmarks_str[0] == 'n') || (close_bookmarks_str[0] == 'N')) close_bookmarks = 0; else close_bookmarks = 1; printf("\nSome files have bookmarks before section 1, e.g. for the table\n"); printf ("of contents. These can be skipped over.\n"); printf("How many bookmarks to skip at the front: "); scanf("%d", &skip_initial_bookmarks); printf("\nSome files have annexes that should be numbered A, A.1, B, B.1...\n"); printf("(use 0 for no annexes)\n"); printf("Last decimal top-level number before numbering as annexes: "); scanf("%d", &start_annex_numbering); if (start_annex_numbering == 0) start_annex_numbering = 9999; printf("Parsing input file\n"); // scan the whole file, not just the bookmarks while ((buffer < nulled_pdf_buffer_end)) { if (*buffer == '\0') // remove the \0\0... case buffer++; if (strstr(buffer, "/Type /Page")) { //printf ("Page found at %d\n", buffer - nulled_pdf_buffer); buffer = SkipToNull(buffer); } else if (sscanf(buffer, "/Outlines %d %d ", &first_bookmark_id, &first_bookmark_generation) == 2) { buffer = SkipToNull(buffer); } //else if (sscanf (buffer, "/PageMode %s", &page_mode)) //{ // change // UseNone to UseOutlines //} else if (!parsing_xref && (strstr(buffer, "xref")) && !strstr (buffer, "startxref")) { //printf ("found xref table at %d\n", buffer); parsing_xref = 1; buffer = SkipToNull(buffer); xref_begin[number_xref_sections] = buffer; // point after the xref } else if (parsing_xref && strstr(buffer, "trailer")) { parsing_xref = 0; xref_end[number_xref_sections] = buffer; // point before the trailer number_xref_sections++; parsing_xref_trailer = 1; buffer = SkipToNull(buffer); } else if (parsing_xref_trailer && (sscanf (buffer, "/Prev %d", &trailer_prev[number_xref_sections]) == 1)) { //printf ("found prev=%d\n", trailer_prev[number_xref_sections]); trailer_prev_offset[number_xref_sections] = buffer; parsing_xref_trailer = 0; buffer = SkipToNull(buffer); } else if (parsing_xref_trailer && strstr (buffer, ">>")) { parsing_xref_trailer = 0; buffer = SkipToNull(buffer); } // right before %%EOF is a startxref pointer else if (strstr (buffer, "startxref")) { parsing_startxref_number = 1; // next line has a number to fix buffer = SkipToNull(buffer); } else if (parsing_startxref_number && (sscanf (buffer, "%d", &startxref_number[number_xref_sections]) == 1)) { startxref_number_offset[number_xref_sections] = buffer; parsing_startxref_number = 0; buffer = SkipToNull(buffer); } // Linearized section at top of file else if (strstr(buffer, "/Linearized")) { parsing_linearized = 1; buffer = SkipToNull(buffer); } else if (parsing_linearized && sscanf (buffer, "/L %d", &linearized_l) == 1) { //printf ("found L=%d\n", linearized_l); linearized_l_offset = buffer; buffer = SkipToNull(buffer); } else if (parsing_linearized && sscanf (buffer, "/T %d", &linearized_t) == 1) { //printf ("found T=%d\n", linearized_t); linearized_t_offset = buffer; buffer = SkipToNull(buffer); } else if (parsing_linearized && sscanf (buffer, "/E %d", &linearized_e) == 1) { //printf ("found E=%d\n", linearized_e); linearized_e_offset = buffer; buffer = SkipToNull(buffer); } else if (parsing_linearized && strstr (buffer, ">>")) { parsing_linearized = 0; buffer = SkipToNull(buffer); } else if (sscanf(buffer, "%d %d obj", &id, &id2) == 2) { if ((id == first_bookmark_id) && (id2 == first_bookmark_generation)) { #ifdef DEBUG printf ("Parsing bookmarks at id %d at %d\n", first_bookmark_id, buffer); #endif begin_bookmarks = buffer; parsing_bookmarks = 1; } if (parsing_bookmarks) { // after special obj id has been found and parsing turned on it allows for the link list to be built if (pdf_objects == NULL) { if (!(pdf_objects = (struct pdf_node *) malloc(sizeof pdf_node))) { printf ("Error allocating the root pdf_object\n"); exit (1); } pdf_object = pdf_objects; } else { if (!(pdf_object->next_pdf_object = (struct pdf_node *) malloc(sizeof pdf_node))) { printf ("Error allocating a pdf_object\n"); exit (1); } pdf_object = pdf_object->next_pdf_object; // set pointer to next node down the list } pdf_object->title = (char *) malloc(BUFF_LEN); pdf_object->id = 0; pdf_object->count= 0; pdf_object->first_child_id = 0; //this cycles throu each time to build a new node on the list pdf_object->next_peer_id = 0; pdf_object->last_id = 0; pdf_object->parent_id = 0; pdf_object->prev_id = 0; pdf_object->next_peer = NULL; pdf_object->first_child = NULL; pdf_object->last = NULL; pdf_object->parent = NULL; pdf_object->prev = NULL; pdf_object->dest = (char *) malloc(BUFF_LEN); pdf_object->next_pdf_object = NULL; if (!pdf_object->title || !pdf_object->dest) { printf ("Error allocating pdf_object title or dest\n"); exit (1); } strcpy(pdf_object->title, ""); strcpy(pdf_object->dest, ""); pdf_object->id = id; } buffer = SkipToNull(buffer); } // end of the sscanf buffer id check else if (parsing_bookmarks && sscanf(buffer, "/Count %d", &count) == 1) { // Count means the number of open bookmarks, not the total number of bookmarks // the only way to declare the end is to determine that the tree has // no more empty first_child or next_peer links pdf_object->count = count; // force each bookmark to a negative value, which means the bookmark is closed if (close_bookmarks && (pdf_object->count > 0)) pdf_object->count = -pdf_object->count; buffer = SkipToNull(buffer); } // Note - this size in the scanf cant be a define due to some funky rules with searching blank spaces // don't include () in the title string extracted else if (parsing_bookmarks && sscanf(buffer, "/Title (%[^\0]", title) == 1) { // if Unicode found, skip until ) appears in the most significant byte of // one of the UTF-16 characters // copy the ) too to match non-Unicode results #ifdef DEBUG printf ("%x%x %s\n", title[0], title[1], title); #endif if (((unsigned char) title[0] == 0xFE) && ((unsigned char) title[1] == 0xFF)) { printf ("WARNING: found a Unicode string\n"); buffer += 10; // past "/Title (%\245\255" pointing at the \000 int ti = 2; // leave the \254\255 in the title while (1) { if ((title[ti++] = *buffer++) == ')') // copy the MSB - warning - not very resilient { printf ("Found closing paren\n"); title[ti] = 0; // add a final null and quit break; } else { title[ti++] = *buffer++; // copy the LSB } } memcpy (pdf_object->title, title, ti); wprintf (L"Unicode string is :%s: \n", title[2]); } else { // Title string has \ inserted after PDF line is 80 chars for some reason // no need to preserve that...just pull in the next line and combine them // FIXFIX only merges one additional line if (title[strlen (title) - 1] == '\\') { title[strlen (title) - 1] = 0; buffer = SkipToNull (buffer); while (*buffer == '\0') buffer++; strcat (title, buffer); //printf ("Long title=:%s:\n", title); } // null out the last ) close_paren = strrchr (title, ')'); if (close_paren) { *close_paren = 0; } strcpy (pdf_object->title, title); buffer = SkipToNull(buffer); } } else if (parsing_bookmarks && sscanf(buffer, "/First %d", &first_child_id) == 1) { #ifdef DEBUG if (pdf_object == pdf_objects) printf ("Found first child %d\n", first_child_id); #endif pdf_object->first_child_id = first_child_id; buffer = SkipToNull(buffer); } else if (parsing_bookmarks && sscanf(buffer, "/Next %d", &next_peer_id) == 1) { pdf_object->next_peer_id = next_peer_id; buffer = SkipToNull(buffer); } else if (parsing_bookmarks && sscanf(buffer, "/Parent %d", &parent_id) == 1) { pdf_object->parent_id = parent_id; buffer = SkipToNull(buffer); } else if (parsing_bookmarks && sscanf(buffer, "/Last %d", &last_id) == 1) { pdf_object->last_id = last_id; buffer = SkipToNull(buffer); } else if (parsing_bookmarks && sscanf(buffer, "/Prev %d", &prev_id) == 1) { pdf_object->prev_id = prev_id; buffer = SkipToNull(buffer); } else if (parsing_bookmarks && sscanf(buffer, "/Dest %255[^\0]", dest) == 1) { memcpy (pdf_object->dest, dest, BUFF_LEN); buffer = SkipToNull(buffer); } else if (parsing_bookmarks && (strncmp(buffer, "endobj", 6) == 0)) { buffer = SkipToNull(buffer); after_bookmarks = buffer; // Update in case this was the final object if (tree_finished (pdf_objects, pdf_object->id)) parsing_bookmarks = 0; } else { buffer = SkipToNull(buffer); } } // while buffer <= nulled_pdf_buffer_end if (!pdf_objects) { printf ("No bookmarks found!\n"); return -2; } // now add the section numbers printf ("Updating bookmark tree\n"); // fill in first_child and next_peer pointers through simple linked list traversal // ridiculously inefficient, but works UpdatingObject = pdf_objects; //resets the pointer to base object while (UpdatingObject != NULL) { CheckingObject = pdf_objects; //resets this pointer to base object each time throu the loop while (CheckingObject != NULL) //so it matches the pointers with the proper parent { //if (UpdatingObject->id = 3606) //printf ("Up=%d Ch=%d testing first child %d and next_peer %d\n", // UpdatingObject->id, // CheckingObject->id, // UpdatingObject->first_child_id, // UpdatingObject->next_peer); if ((UpdatingObject->first_child_id != 0) && (UpdatingObject->first_child_id == CheckingObject->id)) { UpdatingObject->first_child = CheckingObject; } if ((UpdatingObject->next_peer_id != 0) && (UpdatingObject->next_peer_id == CheckingObject->id)) { UpdatingObject->next_peer = CheckingObject; } CheckingObject = CheckingObject->next_pdf_object; } UpdatingObject = UpdatingObject->next_pdf_object; } //this is the function call that builds the tree into its repective levels index_builder(pdf_objects->first_child, "1"); printf("New bookmark tree built succesfully\n"); //print_pdf_nodes (pdf_objects); if (!(output_stuff = fopen(output_pdf_name, "wb"))) { printf("Error opening output file %s\n", output_pdf_name); return -1; } #ifdef DEBUG printf (" input_pdf_buffer=%d\n", input_pdf_buffer); printf (" input_pdf_buffer_end=%d diff=%d\n", input_pdf_buffer_end, input_pdf_buffer_end - input_pdf_buffer); printf (" nulled_pdf_buffer=%d\n", nulled_pdf_buffer); printf (" xref_begin[0]=%d diff=%d\n", xref_begin[0], xref_begin[0] - nulled_pdf_buffer); printf (" xref_end[0]=%d diff=%d diff=%d\n", xref_end[0], xref_end[0] - nulled_pdf_buffer, xref_end[0] - xref_begin[0]); printf (" xref_begin[1]=%d diff=%d\n", xref_begin[1], xref_begin[1] - nulled_pdf_buffer); printf (" xref_end[1]=%d diff=%d diff=%d\n", xref_end[1], xref_end[1] - nulled_pdf_buffer, xref_end[1] - xref_begin[1]); printf (" begin_bookmarks=%d diff=%d\n", begin_bookmarks, begin_bookmarks - nulled_pdf_buffer); printf (" after_bookmarks=%d diff=%d diff=%d\n", after_bookmarks, after_bookmarks - nulled_pdf_buffer, after_bookmarks - begin_bookmarks); printf ("nulled_pdf_buffer_end=%d diff=%d\n", nulled_pdf_buffer_end, nulled_pdf_buffer_end - nulled_pdf_buffer); printf (" buffer=%d\n", buffer); printf (" buffer_image=%d\n", buffer_image); #endif // create a buffer in memory to hold the rewritten bookmark section printf ("Updating bookmark image\n"); if (!(bookmark_buffer = (char *) malloc ((after_bookmarks - begin_bookmarks) * 2))) { // HACKHACK * 2 should do printf ("Error allocating bookmark_buffer\n"); exit (1); } bookmark_output_ptr = bookmark_buffer; sprintf_pdf_root_node (pdf_objects); //printf ("bookmark_output_ptr=%d bookmark_buffer=%d, diff=%d\n", bookmark_output_ptr, bookmark_buffer, bookmark_output_ptr - bookmark_buffer); bookmark_size = bookmark_output_ptr - bookmark_buffer; bookmark_size_increase = bookmark_size - (after_bookmarks - begin_bookmarks); printf ("New bookmark section is %d bytes larger\n", bookmark_size_increase); // update the file length and other irritants sprintf (tmp, "/L %d", linearized_l + bookmark_size_increase); memcpy (linearized_l_offset - nulled_pdf_buffer + input_pdf_buffer, tmp, strlen (tmp)); // update the first xref 1st entry pointer if (linearized_t > begin_bookmarks - nulled_pdf_buffer) { sprintf (tmp, "/T %d", linearized_t + bookmark_size_increase); memcpy (linearized_t_offset - nulled_pdf_buffer + input_pdf_buffer, tmp, strlen (tmp)); } // update the end of first page pointer if (linearized_e > begin_bookmarks - nulled_pdf_buffer) { sprintf (tmp, "/E %d", linearized_e + bookmark_size_increase); memcpy (linearized_e_offset - nulled_pdf_buffer + input_pdf_buffer, tmp, strlen (tmp)); } // modify all the cross references // this table has byte offsets to all the bookmark objects, and objects after the bookmarks // scan through nulled_ptr_buffer but write changes in input_pdf_buffer printf ("Updating cross references\n"); char *xref_ptr; int xref_section = 0; while (xref_section < number_xref_sections) { xref_ptr = xref_begin[xref_section]; // update the previous cross reference table if (trailer_prev[xref_section] > begin_bookmarks - nulled_pdf_buffer) { sprintf (tmp, "/Prev %d", trailer_prev[xref_section] + bookmark_size_increase); memcpy (trailer_prev_offset[xref_section] - nulled_pdf_buffer + input_pdf_buffer, tmp, strlen (tmp)); } #ifdef DEBUG printf ("Parsing xref section %d at %d\n", xref_section, xref_ptr); #endif // read in the starting object number of the xref section int xref_object; int num_objects; while ((*xref_ptr == '\0') || (*xref_ptr == '\n') || (*xref_ptr == '\r')) // remove the \0\0... case xref_ptr++; if (sscanf(xref_ptr, "%d %d", &xref_object, &num_objects) == 2) { #ifdef DEBUG printf ("xref starts at %d, with %d objects\n", xref_object, num_objects); #endif xref_ptr = SkipToNull(xref_ptr); } else { printf ("did not load xref base and number\n"); } while (num_objects--) { int byteoffset; int generation_id; //printf ("xref_ptr=%d\n", xref_ptr); while ((*xref_ptr == '\0') || (*xref_ptr == '\n') || (*xref_ptr == '\r')) // remove the \0\0... case xref_ptr++; if ((sscanf(xref_ptr, "%d %d n", &byteoffset, &generation_id) == 2) && strstr (xref_ptr, " n")) { //printf ("matched :%s:\n", xref_ptr); // adjust objects after the bookmarks if (byteoffset >= (after_bookmarks - nulled_pdf_buffer)) { byteoffset += bookmark_size_increase; char templine[255]; sprintf (templine, "%010d %05d", byteoffset, generation_id); memcpy (xref_ptr - nulled_pdf_buffer + input_pdf_buffer, templine, 16); //printf ("new post bookmark xref %d: %010d %05d\n", xref_object, byteoffset, generation_id); } // adjust the bookmarks themselves else if (byteoffset > (begin_bookmarks - nulled_pdf_buffer)) { //printf ("old bookmark xref %d: %010d %05d\n", xref_object, byteoffset, generation_id); if (xref_object < first_bookmark_id) { printf ("error object range %d vs %d\n", xref_object, first_bookmark_id); } if (bookmark_offsets[xref_object] == -1) printf ("Error - xref_object=%d does not have a bookmark_offsets entry\n", xref_object); char templine[255]; sprintf (templine, "%010d %05d", bookmark_offsets[xref_object] + (begin_bookmarks - nulled_pdf_buffer), generation_id); memcpy (xref_ptr - nulled_pdf_buffer + input_pdf_buffer, templine, 16); //printf ("new bookmark xref %d: %010d %05d\n", xref_object, bookmark_offsets[xref_object] + (begin_bookmarks - nulled_pdf_buffer), generation_id); } else { //printf ("ignoring pre-bookmark xref\n"); } } //else //{ // printf ("skipping unknown or f line:%s:\n", xref_ptr); //} xref_ptr = SkipToNull(xref_ptr); xref_object++; } xref_section++; } // copy original image up to the bookmarks fwrite (input_pdf_buffer, 1, begin_bookmarks - nulled_pdf_buffer, output_stuff); // write modified bookmarks section fwrite (bookmark_buffer, 1, bookmark_size, output_stuff); // copy original image after the bookmarks fwrite (input_pdf_buffer + (after_bookmarks - nulled_pdf_buffer), 1, input_pdf_buffer_end - 1 - (after_bookmarks - nulled_pdf_buffer) - input_pdf_buffer, output_stuff); fclose(output_stuff); //these innocious frees cause crashes. //free (bookmark_buffer); //free (input_pdf_buffer); //free (nulled_pdf_buffer); return 0; } // main