Evince
Evince is a document viewer capable of displaying multiple and single page document formats like PDF and Postscript.
 All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
tfmfile.c
Go to the documentation of this file.
1 /* tfmfile.c -- readers for TFM, AFM, OTFM-0 and OTFM-1 files */
2 /*
3  * Copyright (C) 2000, Matias Atria
4  *
5  * This program is free software; you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License as published by
7  * the Free Software Foundation; either version 2 of the License, or
8  * (at your option) any later version.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program; if not, write to the Free Software
17  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18  */
19 
20 #include <config.h>
21 #include <stdio.h> /* tex-file.h needs this */
22 #include <stdlib.h>
23 #include <stdarg.h>
24 #include <string.h>
25 #include <sys/stat.h>
26 #include <unistd.h>
27 
28 #include "mdvi.h"
29 #include "private.h"
30 
31 #ifdef WITH_AFM_FILES
32 #undef TRUE
33 #undef FALSE
34 #include "afmparse.h"
35 #endif
36 
37 typedef struct tfmpool {
38  struct tfmpool *next;
39  struct tfmpool *prev;
40  char *short_name;
41  int links;
43 } TFMPool;
44 
45 static ListHead tfmpool = {NULL, NULL, 0};
47 
48 #define TFM_HASH_SIZE 31
49 
50 #ifdef WORD_LITTLE_ENDIAN
51 static inline void swap_array(Uint32 *ptr, int n)
52 {
53  Uint32 i;
54 
55  while(n-- > 0) {
56  i = *ptr;
57  *ptr++ = ((i & 0xff000000) >> 24)
58  | ((i & 0x00ff0000) >> 8)
59  | ((i & 0x0000ff00) << 8)
60  | ((i & 0x000000ff) << 24);
61  }
62 }
63 #endif
64 
65 #ifdef WITH_AFM_FILES
66 
67 static int __PROTO(ofm_load_file(const char *filename, TFMInfo *info));
68 
69 /* reading of AFM files */
70 /* macro to convert between AFM and TFM units */
71 #define AFM2TFM(x) FROUND((double)(x) * 0x100000 / 1000)
72 int afm_load_file(const char *filename, TFMInfo *info)
73 {
74  /* the information we want is:
75  * - tfmwidth
76  * - width and heights
77  * - character origins
78  */
79  FontInfo *fi = NULL;
80  int status;
81  CharMetricInfo *cm;
82  FILE *in;
83 
84  in = fopen(filename, "rb");
85  if(in == NULL)
86  return -1;
87  status = afm_parse_file(in, &fi, P_GM);
88  fclose(in);
89 
90  if(status != ok) {
91  mdvi_error(_("%s: Error reading AFM data\n"), filename);
92  return -1;
93  }
94 
95  /* aim high */
96  info->chars = xnalloc(TFMChar, 256);
97  info->loc = 256;
98  info->hic = 0;
99  info->design = 0xa00000; /* fake -- 10pt */
100  info->checksum = 0; /* no checksum */
101  info->type = DviFontAFM;
102  mdvi_strncpy(info->coding, fi->gfi->encodingScheme, 63);
103  mdvi_strncpy(info->family, fi->gfi->familyName, 63);
104 
105  /* now get the data */
106  for(cm = fi->cmi; cm < fi->cmi + fi->numOfChars; cm++) {
107  int code;
108  TFMChar *ch;
109 
110  code = cm->code;
111  if(code < 0 || code > 255)
112  continue; /* ignore it */
113  ch = &info->chars[code];
114  ch->present = 1;
115  if(code < info->loc)
116  info->loc = code;
117  if(code > info->hic)
118  info->hic = code;
119  ch->advance = AFM2TFM(cm->wx);
120  /* this is the `leftSideBearing' */
121  ch->left = AFM2TFM(cm->charBBox.llx);
122  /* this is the height (ascent - descent) -- the sign is to follow
123  * TeX conventions, as opposed to Adobe's ones */
124  ch->depth = -AFM2TFM(cm->charBBox.lly);
125  /* this is the width (rightSideBearing - leftSideBearing) */
126  ch->right = AFM2TFM(cm->charBBox.urx);
127  /* this is the `ascent' */
128  ch->height = AFM2TFM(cm->charBBox.ury);
129  }
130 
131  /* we don't need this anymore */
132  afm_free_fontinfo(fi);
133 
134  /* optimize storage */
135  if(info->loc > 0 || info->hic < 256) {
136  memmove(&info->chars[0],
137  &info->chars[info->loc],
138  (info->hic - info->loc + 1) * sizeof(TFMChar));
139  info->chars = mdvi_realloc(info->chars,
140  (info->hic - info->loc + 1) * sizeof(TFMChar));
141  }
142 
143  /* we're done */
144  return 0;
145 }
146 
147 #endif /* WITH_AFM_FILES */
148 
149 int tfm_load_file(const char *filename, TFMInfo *info)
150 {
151  int lf, lh, bc, ec, nw, nh, nd, ne;
152  int i, n;
153  Uchar *tfm;
154  Uchar *ptr;
155  struct stat st;
156  int size;
157  FILE *in;
158  Int32 *cb;
159  Int32 *charinfo;
160  Int32 *widths;
161  Int32 *heights;
162  Int32 *depths;
163  Uint32 checksum;
164 
165  in = fopen(filename, "rb");
166  if(in == NULL)
167  return -1;
168  tfm = NULL;
169 
170  DEBUG((DBG_FONTS, "(mt) reading TFM file `%s'\n",
171  filename));
172  /* We read the entire TFM file into core */
173  if(fstat(fileno(in), &st) < 0)
174  return -1;
175  /* according to the spec, TFM files are smaller than 16K */
176  if(st.st_size == 0 || st.st_size >= 16384)
177  goto bad_tfm;
178 
179  /* allocate a word-aligned buffer to hold the file */
180  size = 4 * ROUND(st.st_size, 4);
181  if(size != st.st_size)
182  mdvi_warning(_("Warning: TFM file `%s' has suspicious size\n"),
183  filename);
184  tfm = (Uchar *)mdvi_malloc(size);
185  if(fread(tfm, st.st_size, 1, in) != 1)
186  goto error;
187  /* we don't need this anymore */
188  fclose(in);
189  in = NULL;
190 
191  /* not a checksum, but serves a similar purpose */
192  checksum = 0;
193 
194  ptr = tfm;
195  /* get the counters */
196  lf = muget2(ptr);
197  lh = muget2(ptr); checksum += 6 + lh;
198  bc = muget2(ptr);
199  ec = muget2(ptr); checksum += ec - bc + 1;
200  nw = muget2(ptr); checksum += nw;
201  nh = muget2(ptr); checksum += nh;
202  nd = muget2(ptr); checksum += nd;
203  checksum += muget2(ptr); /* skip italics correction count */
204  checksum += muget2(ptr); /* skip lig/kern table size */
205  checksum += muget2(ptr); /* skip kern table size */
206  ne = muget2(ptr); checksum += ne;
207  checksum += muget2(ptr); /* skip # of font parameters */
208 
209  size = ec - bc + 1;
210  cb = (Int32 *)tfm; cb += 6 + lh;
211  charinfo = cb; cb += size;
212  widths = cb; cb += nw;
213  heights = cb; cb += nh;
214  depths = cb;
215 
216  if(widths[0] || heights[0] || depths[0] ||
217  checksum != lf || bc - 1 > ec || ec > 255 || ne > 256)
218  goto bad_tfm;
219 
220  /* from this point on, no error checking is done */
221 
222  /* now we're at the header */
223  /* get the checksum */
224  info->checksum = muget4(ptr);
225  /* get the design size */
226  info->design = muget4(ptr);
227  /* get the coding scheme */
228  if(lh > 2) {
229  /* get the coding scheme */
230  i = n = msget1(ptr);
231  if(n < 0 || n > 39) {
232  mdvi_warning(_("%s: font coding scheme truncated to 40 bytes\n"),
233  filename);
234  n = 39;
235  }
236  memcpy(info->coding, ptr, n);
237  info->coding[n] = 0;
238  ptr += i;
239  } else
240  strcpy(info->coding, "FontSpecific");
241  /* get the font family */
242  if(lh > 12) {
243  n = msget1(ptr);
244  if(n > 0) {
245  i = Max(n, 63);
246  memcpy(info->family, ptr, i);
247  info->family[i] = 0;
248  } else
249  strcpy(info->family, "unspecified");
250  ptr += n;
251  }
252  /* now we don't read from `ptr' anymore */
253 
254  info->loc = bc;
255  info->hic = ec;
256  info->type = DviFontTFM;
257 
258  /* allocate characters */
259  info->chars = xnalloc(TFMChar, size);
260 
261 
262 #ifdef WORD_LITTLE_ENDIAN
263  /* byte-swap the three arrays at once (they are consecutive in memory) */
264  swap_array((Uint32 *)widths, nw + nh + nd);
265 #endif
266 
267  /* get the relevant data */
268  ptr = (Uchar *)charinfo;
269  for(i = bc; i <= ec; ptr += 3, i++) {
270  int ndx;
271 
272  ndx = (int)*ptr; ptr++;
273  info->chars[i-bc].advance = widths[ndx];
274  /* TFM files lack this information */
275  info->chars[i-bc].left = 0;
276  info->chars[i-bc].right = widths[ndx];
277  info->chars[i-bc].present = (ndx != 0);
278  if(ndx) {
279  ndx = ((*ptr >> 4) & 0xf);
280  info->chars[i-bc].height = heights[ndx];
281  ndx = (*ptr & 0xf);
282  info->chars[i-bc].depth = depths[ndx];
283  }
284  }
285 
286  /* free everything */
287  mdvi_free(tfm);
288 
289  return 0;
290 
291 bad_tfm:
292  mdvi_error(_("%s: File corrupted, or not a TFM file\n"), filename);
293 error:
294  if(tfm) mdvi_free(tfm);
295  if(in) fclose(in);
296  return -1;
297 }
298 
299 static int ofm1_load_file(FILE *in, TFMInfo *info)
300 {
301  int lh, bc, ec, nw, nh, nd;
302  int nco, ncw, npc;
303  int i;
304  int n;
305  int size;
306  Int32 *tfm;
307  Int32 *widths;
308  Int32 *heights;
309  Int32 *depths;
310  TFMChar *tch;
311  TFMChar *end;
312 
313  lh = fuget4(in);
314  bc = fuget4(in);
315  ec = fuget4(in);
316  nw = fuget4(in);
317  nh = fuget4(in);
318  nd = fuget4(in);
319  fuget4(in); /* italics */
320  fuget4(in); /* lig-kern */
321  fuget4(in); /* kern */
322  fuget4(in); /* extensible recipe */
323  fuget4(in); /* parameters */
324  fuget4(in); /* direction */
325  nco = fuget4(in);
326  ncw = fuget4(in);
327  npc = fuget4(in);
328 
329  /* get the checksum */
330  info->checksum = fuget4(in);
331  /* the design size */
332  info->design = fuget4(in);
333  /* get the coding scheme */
334  if(lh > 2) {
335  /* get the coding scheme */
336  i = n = fsget1(in);
337  if(n < 0 || n > 39)
338  n = 39;
339  fread(info->coding, 39, 1, in);
340  info->coding[n] = 0;
341  } else
342  strcpy(info->coding, "FontSpecific");
343  /* get the font family */
344  if(lh > 12) {
345  n = fsget1(in);
346  if(n > 0) {
347  i = Max(n, 63);
348  fread(info->family, i, 1, in);
349  info->family[i] = 0;
350  } else
351  strcpy(info->family, "unspecified");
352  }
353  tfm = NULL;
354 
355  /* jump to the beginning of the char-info table */
356  fseek(in, 4L*nco, SEEK_SET);
357 
358  size = ec - bc + 1;
359  info->loc = bc;
360  info->hic = ec;
361  info->chars = xnalloc(TFMChar, size);
362  end = info->chars + size;
363 
364  for(tch = info->chars, i = 0; i < ncw; i++) {
365  TFMChar ch;
366  int nr;
367 
368  /* in the characters we store the actual indices */
369  ch.advance = fuget2(in);
370  ch.height = fuget1(in);
371  ch.depth = fuget1(in);
372  /* skip 2nd word */
373  fuget4(in);
374  /* get # of repeats */
375  nr = fuget2(in);
376  /* skip parameters */
377  fseek(in, (long)npc * 2, SEEK_CUR);
378  /* if npc is odd, skip padding */
379  if(npc & 1) fuget2(in);
380 
381  /* now repeat the character */
382  while(nr-- >= 0 && tch < end)
383  memcpy(tch++, &ch, sizeof(TFMChar));
384  if(tch == end)
385  goto bad_tfm;
386  }
387 
388  /* I wish we were done, but we aren't */
389 
390  /* get the widths, heights and depths */
391  size = nw + nh + nd;
392  tfm = xnalloc(Int32, size);
393  /* read them in one sweep */
394  if(fread(tfm, 4, size, in) != size) {
395  mdvi_free(tfm);
396  goto bad_tfm;
397  }
398 
399  /* byte-swap things if necessary */
400 #ifdef WORD_LITTLE_ENDIAN
401  swap_array((Uint32 *)tfm, size);
402 #endif
403  widths = tfm;
404  heights = widths + nw;
405  depths = heights + nh;
406 
407  if(widths[0] || heights[0] || depths[0])
408  goto bad_tfm;
409 
410  /* now fix the characters */
411  size = ec - bc + 1;
412  for(tch = info->chars; tch < end; tch++) {
413  tch->present = (tch->advance != 0);
414  tch->advance = widths[tch->advance];
415  tch->height = heights[tch->height];
416  tch->depth = depths[tch->depth];
417  tch->left = 0;
418  tch->right = tch->advance;
419  }
420 
421  /* NOW we're done */
422  mdvi_free(tfm);
423  return 0;
424 
425 bad_tfm:
426  if(tfm) mdvi_free(tfm);
427  return -1;
428 }
429 
430 /* we don't read OFM files into memory, because they can potentially be large */
431 static int ofm_load_file(const char *filename, TFMInfo *info)
432 {
433  int lf, lh, bc, ec, nw, nh, nd;
434  int i, n;
435  Int32 *tfm;
436  Uchar *ptr;
437  int size;
438  FILE *in;
439  Int32 *cb;
440  Int32 *charinfo;
441  Int32 *widths;
442  Int32 *heights;
443  Int32 *depths;
444  Uint32 checksum;
445  int olevel;
446  int nwords;
447 
448  in = fopen(filename, "rb");
449  if(in == NULL)
450  return -1;
451 
452  /* not a checksum, but serves a similar purpose */
453  checksum = 0;
454 
455  /* get the counters */
456  /* get file level */
457  olevel = fsget2(in);
458  if(olevel != 0)
459  goto bad_tfm;
460  olevel = fsget2(in);
461  if(olevel != 0) {
462  DEBUG((DBG_FONTS, "(mt) reading Level-1 OFM file `%s'\n",
463  filename));
464  /* we handle level-1 files separately */
465  if(ofm1_load_file(in, info) < 0)
466  goto bad_tfm;
467  return 0;
468  }
469 
470  DEBUG((DBG_FONTS, "(mt) reading Level-0 OFM file `%s'\n", filename));
471  nwords = 14;
472  lf = fuget4(in); checksum = nwords;
473  lh = fuget4(in); checksum += lh;
474  bc = fuget4(in);
475  ec = fuget4(in); checksum += 2 * (ec - bc + 1);
476  nw = fuget4(in); checksum += nw;
477  nh = fuget4(in); checksum += nh;
478  nd = fuget4(in); checksum += nd;
479  checksum += fuget4(in); /* skip italics correction count */
480  checksum += 2*fuget4(in); /* skip lig/kern table size */
481  checksum += fuget4(in); /* skip kern table size */
482  checksum += 2*fuget4(in); /* skip extensible recipe count */
483  checksum += fuget4(in); /* skip # of font parameters */
484 
485  /* I have found several .ofm files that seem to have the
486  * font-direction word missing, so we try to detect that here */
487  if(checksum == lf + 1) {
488  DEBUG((DBG_FONTS, "(mt) font direction missing in `%s'\n",
489  filename));
490  checksum--;
491  nwords--;
492  } else {
493  /* skip font direction */
494  fuget4(in);
495  }
496 
497  if(checksum != lf || bc > ec + 1 || ec > 65535)
498  goto bad_tfm;
499 
500  /* now we're at the header */
501 
502  /* get the checksum */
503  info->checksum = fuget4(in);
504  /* get the design size */
505  info->design = fuget4(in);
506 
507  /* get the coding scheme */
508  if(lh > 2) {
509  /* get the coding scheme */
510  i = n = fsget1(in);
511  if(n < 0 || n > 39) {
512  mdvi_warning(_("%s: font coding scheme truncated to 40 bytes\n"),
513  filename);
514  n = 39;
515  }
516  fread(info->coding, 39, 1, in);
517  info->coding[n] = 0;
518  } else
519  strcpy(info->coding, "FontSpecific");
520  /* get the font family */
521  if(lh > 12) {
522  n = fsget1(in);
523  if(n > 0) {
524  i = Max(n, 63);
525  fread(info->family, i, 1, in);
526  info->family[i] = 0;
527  } else
528  strcpy(info->family, "unspecified");
529  }
530 
531  /* now skip anything else in the header */
532  fseek(in, 4L*(nwords + lh), SEEK_SET);
533  /* and read everything at once */
534  size = 2*(ec - bc + 1) + nw + nh + nd;
535  tfm = xnalloc(Int32, size * sizeof(Int32));
536  if(fread(tfm, 4, size, in) != size) {
537  mdvi_free(tfm);
538  goto bad_tfm;
539  }
540  /* byte-swap all the tables at once */
541 #ifdef WORD_LITTLE_ENDIAN
542  swap_array((Uint32 *)tfm, size);
543 #endif
544  cb = tfm;
545  charinfo = cb; cb += 2*(ec - bc + 1);
546  widths = cb; cb += nw;
547  heights = cb; cb += nh;
548  depths = cb;
549 
550  if(widths[0] || heights[0] || depths[0]) {
551  mdvi_free(tfm);
552  goto bad_tfm;
553  }
554 
555  /* from this point on, no error checking is done */
556 
557  /* we don't need this anymore */
558  fclose(in);
559 
560  /* now we don't read from `ptr' anymore */
561 
562  info->loc = bc;
563  info->hic = ec;
564  info->type = DviFontTFM;
565 
566  /* allocate characters */
567  info->chars = xnalloc(TFMChar, size);
568 
569  /* get the relevant data */
570  ptr = (Uchar *)charinfo;
571  for(i = bc; i <= ec; ptr += 4, i++) {
572  int ndx;
573 
574  ndx = muget2(ptr);
575  info->chars[i-bc].advance = widths[ndx];
576  /* TFM files lack this information */
577  info->chars[i-bc].left = 0;
578  info->chars[i-bc].right = widths[ndx];
579  info->chars[i-bc].present = (ndx != 0);
580  ndx = muget1(ptr);
581  info->chars[i-bc].height = heights[ndx];
582  ndx = muget1(ptr);
583  info->chars[i-bc].depth = depths[ndx];
584  }
585 
586  mdvi_free(tfm);
587  return 0;
588 
589 bad_tfm:
590  mdvi_error(_("%s: File corrupted, or not a TFM file\n"), filename);
591  fclose(in);
592  return -1;
593 }
594 
595 char *lookup_font_metrics(const char *name, int *type)
596 {
597  char *file;
598 
599  switch(*type) {
600 #ifndef WITH_AFM_FILES
601  case DviFontAny:
602 #endif
603  case DviFontTFM:
604  file = kpse_find_tfm(name);
605  *type = DviFontTFM;
606  break;
607  case DviFontOFM: {
608  file = kpse_find_ofm(name);
609  /* we may have gotten a TFM back */
610  if(file != NULL) {
611  const char *ext = file_extension(file);
612  if(ext && STREQ(ext, "tfm"))
613  *type = DviFontTFM;
614  }
615  break;
616  }
617 #ifdef WITH_AFM_FILES
618  case DviFontAFM:
619  file = kpse_find_file(name, kpse_afm_format, 0);
620  break;
621  case DviFontAny:
622  file = kpse_find_file(name, kpse_afm_format, 0);
623  *type = DviFontAFM;
624  if(file == NULL) {
625  file = kpse_find_tfm(name);
626  *type = DviFontTFM;
627  }
628  break;
629 #endif
630  default:
631  return NULL;
632  }
633 
634  return file;
635 }
636 
637 /*
638  * The next two functions are just wrappers for the font metric loaders,
639  * and use the pool of TFM data
640  */
641 
642 /* this is how we interpret arguments:
643  * - if filename is NULL, we look for files of the given type,
644  * unless type is DviFontAny, in which case we try all the
645  * types we know of.
646  * - if filename is not NULL, we look at `type' to decide
647  * how to read the file. If type is DviFontAny, we just
648  * return an error.
649  */
650 TFMInfo *get_font_metrics(const char *short_name, int type, const char *filename)
651 {
652  TFMPool *tfm = NULL;
653  int status;
654  char *file;
655 
656  if(tfmpool.count) {
657  tfm = (TFMPool *)mdvi_hash_lookup(&tfmhash,
658  MDVI_KEY(short_name));
659  if(tfm != NULL) {
660  DEBUG((DBG_FONTS, "(mt) reusing metric file `%s' (%d links)\n",
661  short_name, tfm->links));
662  tfm->links++;
663  return &tfm->tfminfo;
664  }
665  }
666 
667  file = filename ? (char *)filename : lookup_font_metrics(short_name, &type);
668  if(file == NULL)
669  return NULL;
670 
671  tfm = xalloc(TFMPool);
672  DEBUG((DBG_FONTS, "(mt) loading font metric data from `%s'\n", file, file));
673  switch(type) {
674  case DviFontTFM:
675  status = tfm_load_file(file, &tfm->tfminfo);
676  break;
677  case DviFontOFM:
678  status = ofm_load_file(file, &tfm->tfminfo);
679  break;
680 #ifdef WITH_AFM_FILES
681  case DviFontAFM:
682  status = afm_load_file(file, &tfm->tfminfo);
683  break;
684 #endif
685  default:
686  status = -1;
687  break;
688  }
689  if(file != filename)
690  mdvi_free(file);
691  if(status < 0) {
692  mdvi_free(tfm);
693  return NULL;
694  }
695  tfm->short_name = mdvi_strdup(short_name);
696 
697  /* add it to the pool */
698  if(tfmpool.count == 0)
699  mdvi_hash_create(&tfmhash, TFM_HASH_SIZE);
700  mdvi_hash_add(&tfmhash, MDVI_KEY(tfm->short_name),
701  tfm, MDVI_HASH_UNCHECKED);
702  listh_prepend(&tfmpool, LIST(tfm));
703  tfm->links = 1;
704 
705  return &tfm->tfminfo;
706 }
707 
709 {
710  TFMPool *tfm;
711 
712  if(tfmpool.count == 0)
713  return;
714  /* get the entry -- can't use the hash table for this, because
715  * we don't have the short name */
716  for(tfm = (TFMPool *)tfmpool.head; tfm; tfm = tfm->next)
717  if(info == &tfm->tfminfo)
718  break;
719  if(tfm == NULL)
720  return;
721  if(--tfm->links > 0) {
722  DEBUG((DBG_FONTS, "(mt) %s not removed, still in use\n",
723  tfm->short_name));
724  return;
725  }
726  mdvi_hash_remove_ptr(&tfmhash, MDVI_KEY(tfm->short_name));
727 
728  DEBUG((DBG_FONTS, "(mt) removing unused TFM data for `%s'\n", tfm->short_name));
729  listh_remove(&tfmpool, LIST(tfm));
730  mdvi_free(tfm->short_name);
731  mdvi_free(tfm->tfminfo.chars);
732  mdvi_free(tfm);
733 }
734 
736 {
737  TFMPool *ptr;
738 
739  for(; (ptr = (TFMPool *)tfmpool.head); ) {
740  tfmpool.head = LIST(ptr->next);
741 
742  mdvi_free(ptr->short_name);
743  mdvi_free(ptr->tfminfo.chars);
744  mdvi_free(ptr);
745  }
746  mdvi_hash_reset(&tfmhash, 0);
747 }