From d4dd71de8adea7ae6907fb51711b04b84d931d94 Mon Sep 17 00:00:00 2001 From: Adrian Johnson Date: Wed, 24 Feb 2016 20:57:37 +1030 Subject: [PATCH 4/4] pdfinfo: add -isodates for printing dates in ISO-8601 format --- utils/pdfinfo.1 | 3 +++ utils/pdfinfo.cc | 36 +++++++++++++++++++++++++++++++++++- 2 files changed, 38 insertions(+), 1 deletion(-) diff --git a/utils/pdfinfo.1 b/utils/pdfinfo.1 index 35bf6d5..f3a60d7 100644 --- a/utils/pdfinfo.1 +++ b/utils/pdfinfo.1 @@ -105,6 +105,9 @@ file. Note that extracting text this way might be slow for big PDF files. (Implies .BR \-struct .) .TP +.B \-isodates +Prints dates in ISO-8601 format (including the time zone). +.TP .B \-rawdates Prints the raw (undecoded) date strings, directly from the PDF file. .TP diff --git a/utils/pdfinfo.cc b/utils/pdfinfo.cc index 6194530..1c79f0b 100644 --- a/utils/pdfinfo.cc +++ b/utils/pdfinfo.cc @@ -63,6 +63,7 @@ static void printInfoString(Dict *infoDict, const char *key, const char *text, UnicodeMap *uMap); static void printInfoDate(Dict *infoDict, const char *key, const char *text); +static void printISODate(Dict *infoDict, const char *key, const char *text); static void printBox(const char *text, PDFRectangle *box); static void printStruct(const StructElement *element, unsigned indent = 0); static void printIndent(unsigned level); @@ -72,6 +73,7 @@ static int lastPage = 0; static GBool printBoxes = gFalse; static GBool printMetadata = gFalse; static GBool printJS = gFalse; +static GBool isoDates = gFalse; static GBool rawDates = gFalse; static char textEncName[128] = ""; static char ownerPassword[33] = "\001"; @@ -97,6 +99,8 @@ static const ArgDesc argDesc[] = { "print the logical document structure (for tagged files)"}, {"-struct-text", argFlag, &printStructureText, 0, "print text contents along with document structure (for tagged files)"}, + {"-isodates", argFlag, &isoDates, 0, + "print the dates in ISO-8601 format"}, {"-rawdates", argFlag, &rawDates, 0, "print the undecoded date strings directly from the PDF file"}, {"-enc", argString, textEncName, sizeof(textEncName), @@ -238,7 +242,10 @@ int main(int argc, char *argv[]) { printInfoString(info.getDict(), "Author", "Author: ", uMap); printInfoString(info.getDict(), "Creator", "Creator: ", uMap); printInfoString(info.getDict(), "Producer", "Producer: ", uMap); - if (rawDates) { + if (isoDates) { + printISODate(info.getDict(), "CreationDate", "CreationDate: "); + printISODate(info.getDict(), "ModDate", "ModDate: "); + } else if (rawDates) { printInfoString(info.getDict(), "CreationDate", "CreationDate: ", uMap); printInfoString(info.getDict(), "ModDate", "ModDate: ", @@ -507,6 +514,33 @@ static void printInfoDate(Dict *infoDict, const char *key, const char *text) { obj.free(); } +void printISODate(Dict *infoDict, const char *key, const char *text) +{ + Object obj; + char *s; + int year, mon, day, hour, min, sec, tz_hour, tz_minute; + char tz; + + if (infoDict->lookup(key, &obj)->isString()) { + fputs(text, stdout); + s = obj.getString()->getCString(); + if ( parseDateString( s, &year, &mon, &day, &hour, &min, &sec, &tz, &tz_hour, &tz_minute ) ) { + fprintf(stdout, "%04d-%02d-%02dT%02d:%02d:%02d", year, mon, day, hour, min, sec); + if (tz_hour == 0 && tz_minute == 0) { + fprintf(stdout, "Z"); + } else { + fprintf(stdout, "%c%02d", tz, tz_hour); + if (tz_minute) + fprintf(stdout, ":%02d", tz_minute); + } + } else { + fputs(s, stdout); + } + fputc('\n', stdout); + } + obj.free(); +} + static void printBox(const char *text, PDFRectangle *box) { printf("%s%8.2f %8.2f %8.2f %8.2f\n", text, box->x1, box->y1, box->x2, box->y2); -- 2.1.4