depot/third_party/nixpkgs/pkgs/applications/misc/k2pdfopt/mupdf.patch

1061 lines
31 KiB
Diff
Raw Normal View History

From d8927c969e3387ca2669a616c0ba53bce918a031 Mon Sep 17 00:00:00 2001
From: Daniel Fullmer <danielrf12@gmail.com>
Date: Fri, 13 Sep 2019 15:11:45 -0400
Subject: [PATCH] Willus mod for k2pdfopt
---
source/fitz/filter-basic.c | 3 +
source/fitz/font-win32.c | 866 +++++++++++++++++++++++++++++++++++++
source/fitz/font.c | 3 +
source/fitz/stext-device.c | 5 +
source/fitz/string.c | 5 +
source/pdf/pdf-annot.c | 14 +-
source/pdf/pdf-link.c | 3 +
source/pdf/pdf-parse.c | 5 +
source/pdf/pdf-xref.c | 9 +
9 files changed, 912 insertions(+), 1 deletion(-)
create mode 100644 source/fitz/font-win32.c
diff --git a/source/fitz/filter-basic.c b/source/fitz/filter-basic.c
index 0713a62e7..b8ef4d292 100644
--- a/source/fitz/filter-basic.c
+++ b/source/fitz/filter-basic.c
@@ -259,7 +259,10 @@ look_for_endstream:
if (!state->warned)
{
state->warned = 1;
+/* willus mod -- no warning */
+/*
fz_warn(ctx, "PDF stream Length incorrect");
+*/
}
return *stm->rp++;
}
diff --git a/source/fitz/font-win32.c b/source/fitz/font-win32.c
new file mode 100644
index 000000000..45de8cfd3
--- /dev/null
+++ b/source/fitz/font-win32.c
@@ -0,0 +1,866 @@
+/*
+** Routines to access MS Windows system fonts.
+** From sumatra PDF distro.
+** Modified for MuPDF v1.9a by willus.com
+*/
+#include "mupdf/pdf.h"
+
+/*
+ Which fonts are embedded is based on a few preprocessor definitions.
+
+ The base 14 fonts are always embedded.
+ For CJK font substitution we embed DroidSansFallback.
+
+ Set NOCJK to skip all CJK support (this also omits embedding the CJK CMaps)
+ Set NOCJKFONT to skip the embedded CJK font.
+ Set NOCJKFULL to embed a smaller CJK font without CJK Extension A support.
+*/
+
+#ifdef NOCJK
+#define NOCJKFONT
+#endif
+
+/* SumatraPDF: also load fonts included with Windows */
+#ifdef _WIN32
+
+#ifndef UNICODE
+#define UNICODE
+#endif
+#ifndef _UNICODE
+#define _UNICODE
+#endif
+
+#include <windows.h>
+
+// TODO: Use more of FreeType for TTF parsing (for performance reasons,
+// the fonts can't be parsed completely, though)
+#include <ft2build.h>
+#include FT_TRUETYPE_IDS_H
+#include FT_TRUETYPE_TAGS_H
+
+#define TTC_VERSION1 0x00010000
+#define TTC_VERSION2 0x00020000
+
+#define MAX_FACENAME 128
+
+// Note: the font face must be the first field so that the structure
+// can be treated like a simple string for searching
+typedef struct pdf_fontmapMS_s
+{
+ char fontface[MAX_FACENAME];
+ char fontpath[MAX_PATH];
+ int index;
+} pdf_fontmapMS;
+
+typedef struct pdf_fontlistMS_s
+{
+ pdf_fontmapMS *fontmap;
+ int len;
+ int cap;
+} pdf_fontlistMS;
+
+typedef struct _tagTT_OFFSET_TABLE
+{
+ ULONG uVersion;
+ USHORT uNumOfTables;
+ USHORT uSearchRange;
+ USHORT uEntrySelector;
+ USHORT uRangeShift;
+} TT_OFFSET_TABLE;
+
+typedef struct _tagTT_TABLE_DIRECTORY
+{
+ ULONG uTag; //table name
+ ULONG uCheckSum; //Check sum
+ ULONG uOffset; //Offset from beginning of file
+ ULONG uLength; //length of the table in bytes
+} TT_TABLE_DIRECTORY;
+
+typedef struct _tagTT_NAME_TABLE_HEADER
+{
+ USHORT uFSelector; //format selector. Always 0
+ USHORT uNRCount; //Name Records count
+ USHORT uStorageOffset; //Offset for strings storage, from start of the table
+} TT_NAME_TABLE_HEADER;
+
+typedef struct _tagTT_NAME_RECORD
+{
+ USHORT uPlatformID;
+ USHORT uEncodingID;
+ USHORT uLanguageID;
+ USHORT uNameID;
+ USHORT uStringLength;
+ USHORT uStringOffset; //from start of storage area
+} TT_NAME_RECORD;
+
+typedef struct _tagFONT_COLLECTION
+{
+ ULONG Tag;
+ ULONG Version;
+ ULONG NumFonts;
+} FONT_COLLECTION;
+
+static struct {
+ char *name;
+ char *pattern;
+} baseSubstitutes[] = {
+ { "Courier", "CourierNewPSMT" },
+ { "Courier-Bold", "CourierNewPS-BoldMT" },
+ { "Courier-Oblique", "CourierNewPS-ItalicMT" },
+ { "Courier-BoldOblique", "CourierNewPS-BoldItalicMT" },
+ { "Helvetica", "ArialMT" },
+ { "Helvetica-Bold", "Arial-BoldMT" },
+ { "Helvetica-Oblique", "Arial-ItalicMT" },
+ { "Helvetica-BoldOblique", "Arial-BoldItalicMT" },
+ { "Times-Roman", "TimesNewRomanPSMT" },
+ { "Times-Bold", "TimesNewRomanPS-BoldMT" },
+ { "Times-Italic", "TimesNewRomanPS-ItalicMT" },
+ { "Times-BoldItalic", "TimesNewRomanPS-BoldItalicMT" },
+ { "Symbol", "SymbolMT" },
+};
+static const char *base_font_names[][10] =
+{
+ { "Courier", "CourierNew", "CourierNewPSMT", NULL },
+ { "Courier-Bold", "CourierNew,Bold", "Courier,Bold",
+ "CourierNewPS-BoldMT", "CourierNew-Bold", NULL },
+ { "Courier-Oblique", "CourierNew,Italic", "Courier,Italic",
+ "CourierNewPS-ItalicMT", "CourierNew-Italic", NULL },
+ { "Courier-BoldOblique", "CourierNew,BoldItalic", "Courier,BoldItalic",
+ "CourierNewPS-BoldItalicMT", "CourierNew-BoldItalic", NULL },
+ { "Helvetica", "ArialMT", "Arial", NULL },
+ { "Helvetica-Bold", "Arial-BoldMT", "Arial,Bold", "Arial-Bold",
+ "Helvetica,Bold", NULL },
+ { "Helvetica-Oblique", "Arial-ItalicMT", "Arial,Italic", "Arial-Italic",
+ "Helvetica,Italic", "Helvetica-Italic", NULL },
+ { "Helvetica-BoldOblique", "Arial-BoldItalicMT",
+ "Arial,BoldItalic", "Arial-BoldItalic",
+ "Helvetica,BoldItalic", "Helvetica-BoldItalic", NULL },
+ { "Times-Roman", "TimesNewRomanPSMT", "TimesNewRoman",
+ "TimesNewRomanPS", NULL },
+ { "Times-Bold", "TimesNewRomanPS-BoldMT", "TimesNewRoman,Bold",
+ "TimesNewRomanPS-Bold", "TimesNewRoman-Bold", NULL },
+ { "Times-Italic", "TimesNewRomanPS-ItalicMT", "TimesNewRoman,Italic",
+ "TimesNewRomanPS-Italic", "TimesNewRoman-Italic", NULL },
+ { "Times-BoldItalic", "TimesNewRomanPS-BoldItalicMT",
+ "TimesNewRoman,BoldItalic", "TimesNewRomanPS-BoldItalic",
+ "TimesNewRoman-BoldItalic", NULL },
+ { "Symbol", "Symbol,Italic", "Symbol,Bold", "Symbol,BoldItalic",
+ "SymbolMT", "SymbolMT,Italic", "SymbolMT,Bold", "SymbolMT,BoldItalic", NULL },
+ { "ZapfDingbats", NULL }
+};
+
+static pdf_fontlistMS fontlistMS =
+{
+ NULL,
+ 0,
+ 0,
+};
+static int strcmp_ignore_space(const char *a, const char *b);
+static const char *clean_font_name(const char *fontname);
+static const char *pdf_clean_base14_name(const char *fontname);
+
+static inline USHORT BEtoHs(USHORT x)
+{
+ BYTE *data = (BYTE *)&x;
+ return (data[0] << 8) | data[1];
+}
+
+static inline ULONG BEtoHl(ULONG x)
+{
+ BYTE *data = (BYTE *)&x;
+ return (data[0] << 24) | (data[1] << 16) | (data[2] << 8) | data[3];
+}
+
+static int strcmp_ignore_space(const char *a, const char *b)
+{
+ while (1)
+ {
+ while (*a == ' ')
+ a++;
+ while (*b == ' ')
+ b++;
+ if (*a != *b)
+ return 1;
+ if (*a == 0)
+ return *a != *b;
+ if (*b == 0)
+ return *a != *b;
+ a++;
+ b++;
+ }
+}
+
+/* A little bit more sophisticated name matching so that e.g. "EurostileExtended"
+ matches "EurostileExtended-Roman" or "Tahoma-Bold,Bold" matches "Tahoma-Bold" */
+static int
+lookup_compare(const void *elem1, const void *elem2)
+{
+ const char *val1 = elem1;
+ const char *val2 = elem2;
+ int len1 = strlen(val1);
+ int len2 = strlen(val2);
+
+ if (len1 != len2)
+ {
+ const char *rest = len1 > len2 ? val1 + len2 : val2 + len1;
+ if (',' == *rest || !_stricmp(rest, "-roman"))
+ return _strnicmp(val1, val2, fz_mini(len1, len2));
+ }
+
+ return _stricmp(val1, val2);
+}
+
+static void
+remove_spaces(char *srcDest)
+{
+ char *dest;
+
+ for (dest = srcDest; *srcDest; srcDest++)
+ if (*srcDest != ' ')
+ *dest++ = *srcDest;
+ *dest = '\0';
+}
+
+static int
+str_ends_with(const char *str, const char *end)
+{
+ size_t len1 = strlen(str);
+ size_t len2 = strlen(end);
+
+ return len1 >= len2 && !strcmp(str + len1 - len2, end);
+}
+
+static pdf_fontmapMS *
+pdf_find_windows_font_path(const char *fontname)
+{
+ return bsearch(fontname, fontlistMS.fontmap, fontlistMS.len, sizeof(pdf_fontmapMS), lookup_compare);
+}
+
+/* source and dest can be same */
+static void
+decode_unicode_BE(fz_context *ctx, char *source, int sourcelen, char *dest, int destlen)
+{
+ WCHAR *tmp;
+ int converted, i;
+
+ if (sourcelen % 2 != 0)
+ fz_throw(ctx, FZ_ERROR_GENERIC, "fonterror : invalid unicode string");
+
+ tmp = fz_malloc_array(ctx, sourcelen / 2 + 1, sizeof(WCHAR));
+ for (i = 0; i < sourcelen / 2; i++)
+ tmp[i] = BEtoHs(((WCHAR *)source)[i]);
+ tmp[sourcelen / 2] = '\0';
+
+ converted = WideCharToMultiByte(CP_UTF8, 0, tmp, -1, dest, destlen, NULL, NULL);
+ fz_free(ctx, tmp);
+ if (!converted)
+ fz_throw(ctx, FZ_ERROR_GENERIC, "fonterror : invalid unicode string");
+}
+
+static void
+decode_platform_string(fz_context *ctx, int platform, int enctype, char *source, int sourcelen, char *dest, int destlen)
+{
+ switch (platform)
+ {
+ case TT_PLATFORM_APPLE_UNICODE:
+ switch (enctype)
+ {
+ case TT_APPLE_ID_DEFAULT:
+ case TT_APPLE_ID_UNICODE_2_0:
+ decode_unicode_BE(ctx, source, sourcelen, dest, destlen);
+ return;
+ }
+ fz_throw(ctx, FZ_ERROR_GENERIC, "fonterror : unsupported encoding (%d/%d)", platform, enctype);
+ case TT_PLATFORM_MACINTOSH:
+ switch (enctype)
+ {
+ case TT_MAC_ID_ROMAN:
+ if (sourcelen + 1 > destlen)
+ fz_throw(ctx, FZ_ERROR_GENERIC, "fonterror : overlong fontname: %s", source);
+ // TODO: Convert to UTF-8 from what encoding?
+ memcpy(dest, source, sourcelen);
+ dest[sourcelen] = 0;
+ return;
+ }
+ fz_throw(ctx, FZ_ERROR_GENERIC, "fonterror : unsupported encoding (%d/%d)", platform, enctype);
+ case TT_PLATFORM_MICROSOFT:
+ switch (enctype)
+ {
+ case TT_MS_ID_SYMBOL_CS:
+ case TT_MS_ID_UNICODE_CS:
+ case TT_MS_ID_UCS_4:
+ decode_unicode_BE(ctx, source, sourcelen, dest, destlen);
+ return;
+ }
+ fz_throw(ctx, FZ_ERROR_GENERIC, "fonterror : unsupported encoding (%d/%d)", platform, enctype);
+ default:
+ fz_throw(ctx, FZ_ERROR_GENERIC, "fonterror : unsupported encoding (%d/%d)", platform, enctype);
+ }
+}
+
+static void
+grow_system_font_list(fz_context *ctx, pdf_fontlistMS *fl)
+{
+ int newcap;
+ pdf_fontmapMS *newitems;
+
+ if (fl->cap == 0)
+ newcap = 1024;
+ else
+ newcap = fl->cap * 2;
+
+ // use realloc/free for the fontmap, since the list can
+ // remain in memory even with all fz_contexts destroyed
+ newitems = realloc(fl->fontmap, newcap * sizeof(pdf_fontmapMS));
+ if (!newitems)
+ fz_throw(ctx, FZ_ERROR_GENERIC, "OOM in grow_system_font_list");
+ memset(newitems + fl->cap, 0, sizeof(pdf_fontmapMS) * (newcap - fl->cap));
+
+ fl->fontmap = newitems;
+ fl->cap = newcap;
+}
+
+static void
+append_mapping(fz_context *ctx, pdf_fontlistMS *fl, const char *facename, const char *path, int index)
+{
+ if (fl->len == fl->cap)
+ grow_system_font_list(ctx, fl);
+
+ if (fl->len >= fl->cap)
+ fz_throw(ctx, FZ_ERROR_GENERIC, "fonterror : fontlist overflow");
+
+ fz_strlcpy(fl->fontmap[fl->len].fontface, facename, sizeof(fl->fontmap[0].fontface));
+ fz_strlcpy(fl->fontmap[fl->len].fontpath, path, sizeof(fl->fontmap[0].fontpath));
+ fl->fontmap[fl->len].index = index;
+
+ ++fl->len;
+}
+
+static void
+safe_read(fz_context *ctx, fz_stream *file, int offset, char *buf, int size)
+{
+ int n;
+ fz_seek(ctx, file, offset, 0);
+ n = fz_read(ctx, file, (unsigned char *)buf, size);
+ if (n != size)
+ fz_throw(ctx, FZ_ERROR_GENERIC, "safe_read: read %d, expected %d", n, size);
+}
+
+static void
+read_ttf_string(fz_context *ctx, fz_stream *file, int offset, TT_NAME_RECORD *ttRecordBE, char *buf, int size)
+{
+ char szTemp[MAX_FACENAME * 2];
+ // ignore empty and overlong strings
+ int stringLength = BEtoHs(ttRecordBE->uStringLength);
+ if (stringLength == 0 || stringLength >= sizeof(szTemp))
+ return;
+
+ safe_read(ctx, file, offset + BEtoHs(ttRecordBE->uStringOffset), szTemp, stringLength);
+ decode_platform_string(ctx, BEtoHs(ttRecordBE->uPlatformID),
+ BEtoHs(ttRecordBE->uEncodingID), szTemp, stringLength, buf, size);
+}
+
+static void
+makeFakePSName(char szName[MAX_FACENAME], const char *szStyle)
+{
+ // append the font's subfamily, unless it's a Regular font
+ if (*szStyle && _stricmp(szStyle, "Regular") != 0)
+ {
+ fz_strlcat(szName, "-", MAX_FACENAME);
+ fz_strlcat(szName, szStyle, MAX_FACENAME);
+ }
+ remove_spaces(szName);
+}
+
+static void
+parseTTF(fz_context *ctx, fz_stream *file, int offset, int index, const char *path)
+{
+ TT_OFFSET_TABLE ttOffsetTableBE;
+ TT_TABLE_DIRECTORY tblDirBE;
+ TT_NAME_TABLE_HEADER ttNTHeaderBE;
+ TT_NAME_RECORD ttRecordBE;
+
+ char szPSName[MAX_FACENAME] = { 0 };
+ char szTTName[MAX_FACENAME] = { 0 };
+ char szStyle[MAX_FACENAME] = { 0 };
+ char szCJKName[MAX_FACENAME] = { 0 };
+ int i, count, tblOffset;
+
+ safe_read(ctx, file, offset, (char *)&ttOffsetTableBE, sizeof(TT_OFFSET_TABLE));
+
+ // check if this is a TrueType font of version 1.0 or an OpenType font
+ if (BEtoHl(ttOffsetTableBE.uVersion) != TTC_VERSION1 &&
+ BEtoHl(ttOffsetTableBE.uVersion) != TTAG_OTTO)
+ {
+ fz_throw(ctx, FZ_ERROR_GENERIC, "fonterror : invalid font version %x", (unsigned int)BEtoHl(ttOffsetTableBE.uVersion));
+ }
+
+ // determine the name table's offset by iterating through the offset table
+ count = BEtoHs(ttOffsetTableBE.uNumOfTables);
+ for (i = 0; i < count; i++)
+ {
+ int entryOffset = offset + sizeof(TT_OFFSET_TABLE) + i * sizeof(TT_TABLE_DIRECTORY);
+ safe_read(ctx, file, entryOffset, (char *)&tblDirBE, sizeof(TT_TABLE_DIRECTORY));
+ if (!BEtoHl(tblDirBE.uTag) || BEtoHl(tblDirBE.uTag) == TTAG_name)
+ break;
+ }
+ if (count == i || !BEtoHl(tblDirBE.uTag))
+ fz_throw(ctx, FZ_ERROR_GENERIC, "fonterror : nameless font");
+ tblOffset = BEtoHl(tblDirBE.uOffset);
+
+ // read the 'name' table for record count and offsets
+ safe_read(ctx, file, tblOffset, (char *)&ttNTHeaderBE, sizeof(TT_NAME_TABLE_HEADER));
+ offset = tblOffset + sizeof(TT_NAME_TABLE_HEADER);
+ tblOffset += BEtoHs(ttNTHeaderBE.uStorageOffset);
+
+ // read through the strings for PostScript name and font family
+ count = BEtoHs(ttNTHeaderBE.uNRCount);
+ for (i = 0; i < count; i++)
+ {
+ short langId, nameId;
+ BOOL isCJKName;
+
+ safe_read(ctx, file, offset + i * sizeof(TT_NAME_RECORD), (char *)&ttRecordBE, sizeof(TT_NAME_RECORD));
+
+ langId = BEtoHs(ttRecordBE.uLanguageID);
+ nameId = BEtoHs(ttRecordBE.uNameID);
+ isCJKName = TT_NAME_ID_FONT_FAMILY == nameId && LANG_CHINESE == PRIMARYLANGID(langId);
+
+ // ignore non-English strings (except for Chinese font names)
+ if (langId && langId != TT_MS_LANGID_ENGLISH_UNITED_STATES && !isCJKName)
+ continue;
+ // ignore names other than font (sub)family and PostScript name
+ fz_try(ctx)
+ {
+ if (isCJKName)
+ read_ttf_string(ctx, file, tblOffset, &ttRecordBE, szCJKName, sizeof(szCJKName));
+ else if (TT_NAME_ID_FONT_FAMILY == nameId)
+ read_ttf_string(ctx, file, tblOffset, &ttRecordBE, szTTName, sizeof(szTTName));
+ else if (TT_NAME_ID_FONT_SUBFAMILY == nameId)
+ read_ttf_string(ctx, file, tblOffset, &ttRecordBE, szStyle, sizeof(szStyle));
+ else if (TT_NAME_ID_PS_NAME == nameId)
+ read_ttf_string(ctx, file, tblOffset, &ttRecordBE, szPSName, sizeof(szPSName));
+ }
+ fz_catch(ctx)
+ {
+ fz_warn(ctx, "ignoring face name decoding fonterror");
+ }
+ }
+
+ // try to prevent non-Arial fonts from accidentally substituting Arial
+ if (!strcmp(szPSName, "ArialMT"))
+ {
+ // cf. https://code.google.com/p/sumatrapdf/issues/detail?id=2471
+ if (strcmp(szTTName, "Arial") != 0)
+ szPSName[0] = '\0';
+ // TODO: is there a better way to distinguish Arial Caps from Arial proper?
+ // cf. http://code.google.com/p/sumatrapdf/issues/detail?id=1290
+ else if (strstr(path, "caps") || strstr(path, "Caps"))
+ fz_throw(ctx, FZ_ERROR_GENERIC, "ignore %s, as it can't be distinguished from Arial,Regular", path);
+ }
+
+ if (szPSName[0])
+ append_mapping(ctx, &fontlistMS, szPSName, path, index);
+ if (szTTName[0])
+ {
+ // derive a PostScript-like name and add it, if it's different from the font's
+ // included PostScript name; cf. http://code.google.com/p/sumatrapdf/issues/detail?id=376
+ makeFakePSName(szTTName, szStyle);
+ // compare the two names before adding this one
+ if (lookup_compare(szTTName, szPSName))
+ append_mapping(ctx, &fontlistMS, szTTName, path, index);
+ }
+ if (szCJKName[0])
+ {
+ makeFakePSName(szCJKName, szStyle);
+ if (lookup_compare(szCJKName, szPSName) && lookup_compare(szCJKName, szTTName))
+ append_mapping(ctx, &fontlistMS, szCJKName, path, index);
+ }
+}
+
+static void
+parseTTFs(fz_context *ctx, const char *path)
+{
+ fz_stream *file = fz_open_file(ctx, path);
+ /* "fonterror : %s not found", path */
+ fz_try(ctx)
+ {
+ parseTTF(ctx, file, 0, 0, path);
+ }
+ fz_always(ctx)
+ {
+ fz_drop_stream(ctx,file);
+ }
+ fz_catch(ctx)
+ {
+ fz_rethrow(ctx);
+ }
+}
+
+static void
+parseTTCs(fz_context *ctx, const char *path)
+{
+ FONT_COLLECTION fontcollectionBE;
+ ULONG i, numFonts, *offsettableBE = NULL;
+
+ fz_stream *file = fz_open_file(ctx, path);
+ /* "fonterror : %s not found", path */
+
+ fz_var(offsettableBE);
+
+ fz_try(ctx)
+ {
+ safe_read(ctx, file, 0, (char *)&fontcollectionBE, sizeof(FONT_COLLECTION));
+ if (BEtoHl(fontcollectionBE.Tag) != TTAG_ttcf)
+ fz_throw(ctx, FZ_ERROR_GENERIC, "fonterror : wrong format %x", (unsigned int)BEtoHl(fontcollectionBE.Tag));
+ if (BEtoHl(fontcollectionBE.Version) != TTC_VERSION1 &&
+ BEtoHl(fontcollectionBE.Version) != TTC_VERSION2)
+ {
+ fz_throw(ctx, FZ_ERROR_GENERIC, "fonterror : invalid version %x", (unsigned int)BEtoHl(fontcollectionBE.Version));
+ }
+
+ numFonts = BEtoHl(fontcollectionBE.NumFonts);
+ offsettableBE = fz_malloc_array(ctx, numFonts, sizeof(ULONG));
+
+ safe_read(ctx, file, sizeof(FONT_COLLECTION), (char *)offsettableBE, numFonts * sizeof(ULONG));
+ for (i = 0; i < numFonts; i++)
+ parseTTF(ctx, file, BEtoHl(offsettableBE[i]), i, path);
+ }
+ fz_always(ctx)
+ {
+ fz_free(ctx, offsettableBE);
+ fz_drop_stream(ctx,file);
+ }
+ fz_catch(ctx)
+ {
+ fz_rethrow(ctx);
+ }
+}
+
+static void
+extend_system_font_list(fz_context *ctx, const WCHAR *path)
+{
+ WCHAR szPath[MAX_PATH], *lpFileName;
+ WIN32_FIND_DATA FileData;
+ HANDLE hList;
+
+ GetFullPathName(path, nelem(szPath), szPath, &lpFileName);
+
+ hList = FindFirstFile(szPath, &FileData);
+ if (hList == INVALID_HANDLE_VALUE)
+ {
+ // Don't complain about missing directories
+ if (GetLastError() == ERROR_FILE_NOT_FOUND)
+ return;
+ fz_throw(ctx, FZ_ERROR_GENERIC, "extend_system_font_list: unknown error %d", (int)GetLastError());
+ }
+ do
+ {
+ if (!(FileData.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY))
+ {
+ char szPathUtf8[MAX_PATH], *fileExt;
+ int res;
+ lstrcpyn(lpFileName, FileData.cFileName, szPath + MAX_PATH - lpFileName);
+ res = WideCharToMultiByte(CP_UTF8, 0, szPath, -1, szPathUtf8, sizeof(szPathUtf8), NULL, NULL);
+ if (!res)
+ {
+ fz_warn(ctx, "WideCharToMultiByte failed for %S", szPath);
+ continue;
+ }
+ fileExt = szPathUtf8 + strlen(szPathUtf8) - 4;
+ fz_try(ctx)
+ {
+ if (!_stricmp(fileExt, ".ttc"))
+ parseTTCs(ctx, szPathUtf8);
+ else if (!_stricmp(fileExt, ".ttf") || !_stricmp(fileExt, ".otf"))
+ parseTTFs(ctx, szPathUtf8);
+ }
+ fz_catch(ctx)
+ {
+ // ignore errors occurring while parsing a given font file
+ }
+ }
+ } while (FindNextFile(hList, &FileData));
+ FindClose(hList);
+}
+
+static void
+destroy_system_font_list(void)
+{
+ free(fontlistMS.fontmap);
+ memset(&fontlistMS, 0, sizeof(fontlistMS));
+}
+
+static void
+create_system_font_list(fz_context *ctx)
+{
+ WCHAR szFontDir[MAX_PATH];
+ UINT cch;
+
+ cch = GetWindowsDirectory(szFontDir, nelem(szFontDir) - 12);
+ if (0 < cch && cch < nelem(szFontDir) - 12)
+ {
+ /* willus.com edit--Win XP default MSVCRT.DLL doesn't have wcscat_s */
+#ifdef _WIN64
+ wcscat_s(szFontDir, MAX_PATH, L"\\Fonts\\*.?t?");
+#else
+ wcscat(szFontDir,L"\\Fonts\\*.?t?");
+#endif
+ extend_system_font_list(ctx, szFontDir);
+ }
+
+ if (fontlistMS.len == 0)
+ fz_warn(ctx, "couldn't find any usable system fonts");
+
+#ifdef NOCJKFONT
+ {
+ // If no CJK fallback font is builtin but one has been shipped separately (in the same
+ // directory as the main executable), add it to the list of loadable system fonts
+ WCHAR szFile[MAX_PATH], *lpFileName;
+ GetModuleFileName(0, szFontDir, MAX_PATH);
+ GetFullPathName(szFontDir, MAX_PATH, szFile, &lpFileName);
+ lstrcpyn(lpFileName, L"DroidSansFallback.ttf", szFile + MAX_PATH - lpFileName);
+ extend_system_font_list(ctx, szFile);
+ }
+#endif
+
+ // sort the font list, so that it can be searched binarily
+ qsort(fontlistMS.fontmap, fontlistMS.len, sizeof(pdf_fontmapMS), _stricmp);
+
+#ifdef DEBUG
+ // allow to overwrite system fonts for debugging purposes
+ // (either pass a full path or a search pattern such as "fonts\*.ttf")
+ cch = GetEnvironmentVariable(L"MUPDF_FONTS_PATTERN", szFontDir, nelem(szFontDir));
+ if (0 < cch && cch < nelem(szFontDir))
+ {
+ int i, prev_len = fontlistMS.len;
+ extend_system_font_list(ctx, szFontDir);
+ for (i = prev_len; i < fontlistMS.len; i++)
+ {
+ pdf_fontmapMS *entry = bsearch(fontlistMS.fontmap[i].fontface, fontlistMS.fontmap, prev_len, sizeof(pdf_fontmapMS), lookup_compare);
+ if (entry)
+ *entry = fontlistMS.fontmap[i];
+ }
+ qsort(fontlistMS.fontmap, fontlistMS.len, sizeof(pdf_fontmapMS), _stricmp);
+ }
+#endif
+
+ // make sure to clean up after ourselves
+ atexit(destroy_system_font_list);
+}
+
+static fz_font *
+pdf_load_windows_font_by_name(fz_context *ctx, const char *orig_name)
+{
+ pdf_fontmapMS *found = NULL;
+ char *comma, *fontname;
+ fz_font *font;
+
+ /* WILLUS MOD--not multi-threaded for k2pdfopt */
+ /* fz_synchronize_begin(); */
+ if (fontlistMS.len == 0)
+ {
+ fz_try(ctx)
+ {
+ create_system_font_list(ctx);
+ }
+ fz_catch(ctx) { }
+ }
+ /* WILLUS MOD--not multi-threaded for k2pdfopt */
+ /* fz_synchronize_end(); */
+ if (fontlistMS.len == 0)
+ fz_throw(ctx, FZ_ERROR_GENERIC, "fonterror: couldn't find any fonts");
+
+ // work on a normalized copy of the font name
+ fontname = fz_strdup(ctx, orig_name);
+ remove_spaces(fontname);
+
+ // first, try to find the exact font name (including appended style information)
+ comma = strchr(fontname, ',');
+ if (comma)
+ {
+ *comma = '-';
+ found = pdf_find_windows_font_path(fontname);
+ *comma = ',';
+ }
+ // second, substitute the font name with a known PostScript name
+ else
+ {
+ int i;
+ for (i = 0; i < nelem(baseSubstitutes) && !found; i++)
+ if (!strcmp(fontname, baseSubstitutes[i].name))
+ found = pdf_find_windows_font_path(baseSubstitutes[i].pattern);
+ }
+ // third, search for the font name without additional style information
+ if (!found)
+ found = pdf_find_windows_font_path(fontname);
+ // fourth, try to separate style from basename for prestyled fonts (e.g. "ArialBold")
+ if (!found && !comma && (str_ends_with(fontname, "Bold") || str_ends_with(fontname, "Italic")))
+ {
+ int styleLen = str_ends_with(fontname, "Bold") ? 4 : str_ends_with(fontname, "BoldItalic") ? 10 : 6;
+ fontname = fz_resize_array(ctx, fontname, strlen(fontname) + 2, sizeof(char));
+ comma = fontname + strlen(fontname) - styleLen;
+ memmove(comma + 1, comma, styleLen + 1);
+ *comma = '-';
+ found = pdf_find_windows_font_path(fontname);
+ *comma = ',';
+ if (!found)
+ found = pdf_find_windows_font_path(fontname);
+ }
+ // fifth, try to convert the font name from the common Chinese codepage 936
+ if (!found && fontname[0] < 0)
+ {
+ WCHAR cjkNameW[MAX_FACENAME];
+ char cjkName[MAX_FACENAME];
+ if (MultiByteToWideChar(936, MB_ERR_INVALID_CHARS, fontname, -1, cjkNameW, nelem(cjkNameW)) &&
+ WideCharToMultiByte(CP_UTF8, 0, cjkNameW, -1, cjkName, nelem(cjkName), NULL, NULL))
+ {
+ comma = strchr(cjkName, ',');
+ if (comma)
+ {
+ *comma = '-';
+ found = pdf_find_windows_font_path(cjkName);
+ *comma = ',';
+ }
+ if (!found)
+ found = pdf_find_windows_font_path(cjkName);
+ }
+ }
+
+ fz_free(ctx, fontname);
+ if (!found)
+ fz_throw(ctx, FZ_ERROR_GENERIC, "couldn't find system font '%s'", orig_name);
+
+ /*
+ fz_warn(ctx, "loading non-embedded font '%s' from '%s'", orig_name, found->fontpath);
+ */
+
+ font = fz_new_font_from_file(ctx, orig_name, found->fontpath, found->index,
+ strcmp(found->fontface, "DroidSansFallback") != 0);
+ /* willus mod for MuPDF v1.10, 10-21-2016 */
+ {
+ fz_font_flags_t *flags;
+ flags=fz_font_flags(font);
+ if (flags!=NULL)
+ flags->ft_substitute = 1;
+ }
+ return font;
+}
+
+static fz_font *
+pdf_load_windows_font(fz_context *ctx, const char *fontname, int bold, int italic, int needs_exact_metrics)
+{
+ if (needs_exact_metrics)
+ {
+ const char *clean_name;
+ /* WILLUS: Declare pdf_clean_base14_name() */
+ extern const char *pdf_clean_base14_name(const char *fontname);
+
+ /* TODO: the metrics for Times-Roman and Courier don't match
+ those of Windows' Times New Roman and Courier New; for
+ some reason, Poppler doesn't seem to have this problem */
+ int len;
+ if (fz_lookup_builtin_font(ctx,fontname, bold, italic, &len))
+ return NULL;
+
+ /* cf. http://code.google.com/p/sumatrapdf/issues/detail?id=2173 */
+ clean_name = pdf_clean_base14_name(fontname);
+ if (clean_name != fontname && !strncmp(clean_name, "Times-", 6))
+ return NULL;
+ }
+
+ // TODO: unset font->ft_substitute for base14/needs_exact_metrics?
+ return pdf_load_windows_font_by_name(ctx, fontname);
+}
+
+static const char *clean_font_name(const char *fontname)
+{
+ int i, k;
+ for (i = 0; i < nelem(base_font_names); i++)
+ for (k = 0; base_font_names[i][k]; k++)
+ if (!strcmp_ignore_space(base_font_names[i][k], fontname))
+ return base_font_names[i][0];
+ return fontname;
+}
+
+
+/* SumatraPDF: expose clean_font_name */
+static const char * pdf_clean_base14_name(const char *fontname)
+{
+ return clean_font_name(fontname);
+}
+
+static fz_font *
+pdf_load_windows_cjk_font(fz_context *ctx, const char *fontname, int ros, int serif)
+{
+ fz_font *font;
+
+ font=NULL; /* WILLUS: Avoid compiler warning */
+ /* try to find a matching system font before falling back to an approximate one */
+ fz_try(ctx)
+ {
+ font = pdf_load_windows_font_by_name(ctx, fontname);
+ }
+ fz_catch(ctx)
+ {
+ font = NULL;
+ }
+ if (font)
+ return font;
+
+ /* try to fall back to a reasonable system font */
+ fz_try(ctx)
+ {
+ if (serif)
+ {
+ switch (ros)
+ {
+ case FZ_ADOBE_CNS: font = pdf_load_windows_font_by_name(ctx, "MingLiU"); break;
+ case FZ_ADOBE_GB: font = pdf_load_windows_font_by_name(ctx, "SimSun"); break;
+ case FZ_ADOBE_JAPAN: font = pdf_load_windows_font_by_name(ctx, "MS-Mincho"); break;
+ case FZ_ADOBE_KOREA: font = pdf_load_windows_font_by_name(ctx, "Batang"); break;
+ default: fz_throw(ctx, FZ_ERROR_GENERIC, "invalid serif ros");
+ }
+ }
+ else
+ {
+ switch (ros)
+ {
+ case FZ_ADOBE_CNS: font = pdf_load_windows_font_by_name(ctx, "DFKaiShu-SB-Estd-BF"); break;
+ case FZ_ADOBE_GB:
+ fz_try(ctx)
+ {
+ font = pdf_load_windows_font_by_name(ctx, "KaiTi");
+ }
+ fz_catch(ctx)
+ {
+ font = pdf_load_windows_font_by_name(ctx, "KaiTi_GB2312");
+ }
+ break;
+ case FZ_ADOBE_JAPAN: font = pdf_load_windows_font_by_name(ctx, "MS-Gothic"); break;
+ case FZ_ADOBE_KOREA: font = pdf_load_windows_font_by_name(ctx, "Gulim"); break;
+ default: fz_throw(ctx, FZ_ERROR_GENERIC, "invalid sans-serif ros");
+ }
+ }
+ }
+ fz_catch(ctx)
+ {
+#ifdef NOCJKFONT
+ /* If no CJK fallback font is builtin, maybe one has been shipped separately */
+ font = pdf_load_windows_font_by_name(ctx, "DroidSansFallback");
+#else
+ fz_rethrow(ctx);
+#endif
+ }
+
+ return font;
+}
+
+#endif
+
+void pdf_install_load_system_font_funcs(fz_context *ctx)
+{
+#ifdef _WIN32
+ fz_install_load_system_font_funcs(ctx, pdf_load_windows_font, pdf_load_windows_cjk_font, NULL);
+#endif
+}
diff --git a/source/fitz/font.c b/source/fitz/font.c
index 00c6e8f99..1448b4a56 100644
--- a/source/fitz/font.c
+++ b/source/fitz/font.c
@@ -4,8 +4,11 @@
#include "draw-imp.h"
#include <ft2build.h>
+/* willus mod -- remove hb includes */
+/*
#include "hb.h"
#include "hb-ft.h"
+*/
#include <assert.h>
diff --git a/source/fitz/stext-device.c b/source/fitz/stext-device.c
index 2df90305e..b1f99e056 100644
--- a/source/fitz/stext-device.c
+++ b/source/fitz/stext-device.c
@@ -825,6 +825,11 @@ fz_new_stext_device(fz_context *ctx, fz_stext_page *page, const fz_stext_options
dev->lastchar = ' ';
dev->curdir = 1;
dev->lasttext = NULL;
+ /* willus mod -- seems like this should be here, but not sure. */
+ if (opts)
+ dev->flags = opts->flags;
+ else
+ dev->flags = 0;
return (fz_device*)dev;
}
diff --git a/source/fitz/string.c b/source/fitz/string.c
index f8eedb682..7a767983d 100644
--- a/source/fitz/string.c
+++ b/source/fitz/string.c
@@ -560,6 +560,10 @@ fz_utflen(const char *s)
*/
float fz_atof(const char *s)
{
+/* willus mod: atof(s), #if-#else-#endif */
+#if (!defined(__SSE__))
+ return(atof(s));
+#else
float result;
if (s == NULL)
@@ -572,6 +576,7 @@ float fz_atof(const char *s)
return 1;
result = fz_clamp(result, -FLT_MAX, FLT_MAX);
return result;
+#endif
}
/*
diff --git a/source/pdf/pdf-annot.c b/source/pdf/pdf-annot.c
index 4dfdf36fe..acff7d12a 100644
--- a/source/pdf/pdf-annot.c
+++ b/source/pdf/pdf-annot.c
@@ -5,8 +5,20 @@
#include <string.h>
#include <time.h>
+/* willus mod--don't use _mkgmtime--not available in Win XP */
#ifdef _WIN32
-#define timegm _mkgmtime
+static time_t timegm(struct tm *date);
+static time_t timegm(struct tm *date)
+
+ {
+ time_t t,z;
+ struct tm gmz;
+
+ z=(time_t)0;
+ gmz=(*gmtime(&z));
+ t=mktime(date)-mktime(&gmz);
+ return(t);
+ }
#endif
#define isdigit(c) (c >= '0' && c <= '9')
diff --git a/source/pdf/pdf-link.c b/source/pdf/pdf-link.c
index 37444b471..613cc05b9 100644
--- a/source/pdf/pdf-link.c
+++ b/source/pdf/pdf-link.c
@@ -345,6 +345,9 @@ pdf_resolve_link(fz_context *ctx, pdf_document *doc, const char *uri, float *xp,
}
return page;
}
+/* willus mod -- be quiet */
+/*
fz_warn(ctx, "unknown link uri '%s'", uri);
+*/
return -1;
}
diff --git a/source/pdf/pdf-parse.c b/source/pdf/pdf-parse.c
index 04a772204..9dd0cd898 100644
--- a/source/pdf/pdf-parse.c
+++ b/source/pdf/pdf-parse.c
@@ -663,9 +663,14 @@ pdf_parse_ind_obj(fz_context *ctx, pdf_document *doc,
if (c == '\r')
{
c = fz_peek_byte(ctx, file);
+/* willus mod -- no warning */
+/*
if (c != '\n')
fz_warn(ctx, "line feed missing after stream begin marker (%d %d R)", num, gen);
else
+*/
+if (c=='\n')
+/* willus mod -- end */
fz_read_byte(ctx, file);
}
stm_ofs = fz_tell(ctx, file);
diff --git a/source/pdf/pdf-xref.c b/source/pdf/pdf-xref.c
index 8f888059b..08de7bfba 100644
--- a/source/pdf/pdf-xref.c
+++ b/source/pdf/pdf-xref.c
@@ -710,8 +710,11 @@ pdf_xref_size_from_old_trailer(fz_context *ctx, pdf_document *doc, pdf_lexbuf *b
if (!s)
fz_throw(ctx, FZ_ERROR_GENERIC, "xref subsection length missing");
len = fz_atoi(fz_strsep(&s, " "));
+/* willus mod -- no warning */
+/*
if (len < 0)
fz_throw(ctx, FZ_ERROR_GENERIC, "xref subsection length must be positive");
+*/
/* broken pdfs where the section is not on a separate line */
if (s && *s != '\0')
@@ -1378,7 +1381,10 @@ pdf_init_document(fz_context *ctx, pdf_document *doc)
{
pdf_drop_xref_sections(ctx, doc);
fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
+/* willus mod -- be quiet */
+/*
fz_warn(ctx, "trying to repair broken xref");
+*/
repaired = 1;
}
@@ -1506,7 +1512,10 @@ pdf_drop_document_imp(fz_context *ctx, pdf_document *doc)
/* Swallow error, but continue dropping */
}
+/* willu smod -- no pdf_drop_js */
+/*
pdf_drop_js(ctx, doc->js);
+*/
pdf_drop_xref_sections(ctx, doc);
fz_free(ctx, doc->xref_index);
--
2.22.0