Evince
Evince is a document viewer capable of displaying multiple and single page document formats like PDF and Postscript.
 All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
djvu-text-page.c
Go to the documentation of this file.
1 /*
2  * Implements search and copy functionality for Djvu files.
3  * Copyright (C) 2006 Michael Hofmann <mh21@piware.de>
4  *
5  * This program is free software; you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License as published by
7  * the Free Software Foundation; either version 2, or (at your option)
8  * any later version.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program; if not, write to the Free Software
17  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18  */
19 
20 #include <config.h>
21 #include <string.h>
22 #include <glib.h>
23 #include <libdjvu/miniexp.h>
24 #include "djvu-text-page.h"
25 
26 
35 static void
37  EvRectangle *source)
38 {
39  if (source->x1 < target->x1)
40  target->x1 = source->x1;
41  if (source->x2 > target->x2)
42  target->x2 = source->x2;
43  if (source->y1 < target->y1)
44  target->y1 = source->y1;
45  if (source->y2 > target->y2)
46  target->y2 = source->y2;
47 }
48 
59 static gboolean
61  miniexp_t p,
62  int delimit)
63 {
64  if (page->results || p == page->start) {
65  EvRectangle box;
66 
67  box.x1 = miniexp_to_int (miniexp_nth (1, p));
68  box.y1 = miniexp_to_int (miniexp_nth (2, p));
69  box.x2 = miniexp_to_int (miniexp_nth (3, p));
70  box.y2 = miniexp_to_int (miniexp_nth (4, p));
71 
72  if (!(delimit & 2) && page->results != NULL) {
73  EvRectangle *union_box = (EvRectangle *)page->results->data;
74 
75  /* If still on the same line, add box to union */
76  djvu_text_page_union (union_box, &box);
77  } else {
78  /* A new line, a new box */
79  page->results = g_list_prepend (page->results, ev_rectangle_copy (&box));
80  }
81 
82  if (p == page->end)
83  return FALSE;
84  }
85  return TRUE;
86 }
87 
98 static gboolean
100  miniexp_t p,
101  int delimit)
102 {
103  if (page->text || p == page->start) {
104  char *token_text = (char *) miniexp_to_str (miniexp_nth (5, p));
105  if (page->text) {
106  char *new_text =
107  g_strjoin (delimit & 2 ? "\n" :
108  delimit & 1 ? " " : NULL,
109  page->text, token_text,
110  NULL);
111  g_free (page->text);
112  page->text = new_text;
113  } else
114  page->text = g_strdup (token_text);
115  if (p == page->end)
116  return FALSE;
117  }
118  return TRUE;
119 }
120 
133 static gboolean
135  DjvuTextPage *page,
136  miniexp_t p,
137  int delimit)
138 {
139  miniexp_t deeper;
140 
141  g_return_val_if_fail (miniexp_consp (p) && miniexp_symbolp
142  (miniexp_car (p)), FALSE);
143 
144  if (miniexp_car (p) != page->char_symbol)
145  delimit |= miniexp_car (p) == page->word_symbol ? 1 : 2;
146 
147  deeper = miniexp_cddr (miniexp_cdddr (p));
148  while (deeper != miniexp_nil) {
149  miniexp_t str = miniexp_car (deeper);
150  if (miniexp_stringp (str)) {
151  if (type == DJVU_SELECTION_TEXT) {
152  if (!djvu_text_page_selection_process_text (page, p, delimit))
153  return FALSE;
154  } else {
155  if (!djvu_text_page_selection_process_box (page, p, delimit))
156  return FALSE;
157  }
158  } else {
159  if (!djvu_text_page_selection (type, page, str, delimit))
160  return FALSE;
161  }
162  delimit = 0;
163  deeper = miniexp_cdr (deeper);
164  }
165  return TRUE;
166 }
167 
168 static void
170  miniexp_t p,
171  EvRectangle *rect)
172 {
173  EvRectangle current;
174 
175  current.x1 = miniexp_to_int (miniexp_nth (1, p));
176  current.y1 = miniexp_to_int (miniexp_nth (2, p));
177  current.x2 = miniexp_to_int (miniexp_nth (3, p));
178  current.y2 = miniexp_to_int (miniexp_nth (4, p));
179  if (current.x2 >= rect->x1 && current.y1 <= rect->y2 &&
180  current.x1 <= rect->x2 && current.y2 >= rect->y1) {
181  if (page->start == miniexp_nil)
182  page->start = p;
183  page->end = p;
184  }
185 }
186 
187 
188 static void
190  miniexp_t p,
191  EvRectangle *rect)
192 {
193  miniexp_t deeper;
194 
195  g_return_if_fail (miniexp_consp (p) &&
196  miniexp_symbolp (miniexp_car (p)));
197 
198  deeper = miniexp_cddr (miniexp_cdddr (p));
199  while (deeper != miniexp_nil) {
200  miniexp_t str = miniexp_car (deeper);
201  if (miniexp_stringp (str))
202  djvu_text_page_limits_process (page, p, rect);
203  else
204  djvu_text_page_limits (page, str, rect);
205 
206  deeper = miniexp_cdr (deeper);
207  }
208 }
209 
217 GList *
219  EvRectangle *rectangle)
220 {
221  page->start = miniexp_nil;
222  page->end = miniexp_nil;
223 
224  /* Get page->start and page->end filled from selection rectangle */
225  djvu_text_page_limits (page, page->text_structure, rectangle);
226  /* Fills page->results with the bouding boxes */
228  page, page->text_structure, 0);
229 
230  return g_list_reverse (page->results);
231 }
232 
233 char *
235  EvRectangle *rectangle)
236 {
237  char* text;
238 
239  page->start = miniexp_nil;
240  page->end = miniexp_nil;
241  djvu_text_page_limits (page, page->text_structure, rectangle);
243  page->text_structure, 0);
244 
245  /* Do not free the string */
246  text = page->text;
247  page->text = NULL;
248 
249  return text;
250 }
251 
262 static miniexp_t
264  int position)
265 {
266  GArray *links = page->links;
267  int low = 0;
268  int hi = links->len - 1;
269  int mid = 0;
270 
271  g_return_val_if_fail (hi >= 0, miniexp_nil);
272 
273  /* Shamelessly copied from GNU classpath */
274  while (low <= hi) {
275  DjvuTextLink *link;
276 
277  mid = (low + hi) >> 1;
278  link = &g_array_index (links, DjvuTextLink, mid);
279  if (link->position == position)
280  break;
281  else if (link->position > position)
282  hi = --mid;
283  else
284  low = mid + 1;
285  }
286 
287  return g_array_index (page->links, DjvuTextLink, mid).pair;
288 }
289 
301 static gboolean
303  miniexp_t p,
304  miniexp_t start,
305  miniexp_t end)
306 {
307  if (page->bounding_box || p == start) {
308  EvRectangle *new_rectangle = ev_rectangle_new ();
309  new_rectangle->x1 = miniexp_to_int (miniexp_nth (1, p));
310  new_rectangle->y1 = miniexp_to_int (miniexp_nth (2, p));
311  new_rectangle->x2 = miniexp_to_int (miniexp_nth (3, p));
312  new_rectangle->y2 = miniexp_to_int (miniexp_nth (4, p));
313  if (page->bounding_box) {
315  new_rectangle);
316  g_free (new_rectangle);
317  } else
318  page->bounding_box = new_rectangle;
319  if (p == end)
320  return FALSE;
321  }
322  return TRUE;
323 }
324 
337 static gboolean
339  miniexp_t p,
340  miniexp_t start,
341  miniexp_t end)
342 {
343  miniexp_t deeper;
344 
345  g_return_val_if_fail (miniexp_consp (p) && miniexp_symbolp
346  (miniexp_car (p)), FALSE);
347 
348  deeper = miniexp_cddr (miniexp_cdddr (p));
349  while (deeper != miniexp_nil) {
350  miniexp_t str = miniexp_car (deeper);
351  if (miniexp_stringp (str)) {
353  (page, p, start, end))
354  return FALSE;
355  } else {
357  (page, str, start, end))
358  return FALSE;
359  }
360  deeper = miniexp_cdr (deeper);
361  }
362  return TRUE;
363 }
364 
373 static EvRectangle *
375  miniexp_t start,
376  miniexp_t end)
377 {
378  page->bounding_box = NULL;
379  djvu_text_page_sexpr (page, page->text_structure, start, end);
380  return page->bounding_box;
381 }
382 
392 static void
394  miniexp_t p,
395  gboolean case_sensitive,
396  gboolean delimit)
397 {
398  char *token_text;
399  miniexp_t deeper;
400 
401  g_return_if_fail (miniexp_consp (p) &&
402  miniexp_symbolp (miniexp_car (p)));
403 
404  delimit |= page->char_symbol != miniexp_car (p);
405 
406  deeper = miniexp_cddr (miniexp_cdddr (p));
407  while (deeper != miniexp_nil) {
408  miniexp_t data = miniexp_car (deeper);
409  if (miniexp_stringp (data)) {
410  DjvuTextLink link;
411  link.position = page->text == NULL ? 0 :
412  strlen (page->text);
413  link.pair = p;
414  g_array_append_val (page->links, link);
415 
416  token_text = (char *) miniexp_to_str (data);
417  if (!case_sensitive)
418  token_text = g_utf8_casefold (token_text, -1);
419  if (page->text == NULL)
420  page->text = g_strdup (token_text);
421  else {
422  char *new_text =
423  g_strjoin (delimit ? " " : NULL,
424  page->text, token_text,
425  NULL);
426  g_free (page->text);
427  page->text = new_text;
428  }
429  if (!case_sensitive)
430  g_free (token_text);
431  } else
432  djvu_text_page_append_text (page, data,
433  case_sensitive, delimit);
434  delimit = FALSE;
435  deeper = miniexp_cdr (deeper);
436  }
437 }
438 
447 void
449  const char *text)
450 {
451  char *haystack = page->text;
452  int search_len;
453  EvRectangle *result;
454  if (page->links->len == 0)
455  return;
456 
457  search_len = strlen (text);
458  while ((haystack = strstr (haystack, text)) != NULL) {
459  int start_p = haystack - page->text;
460  miniexp_t start = djvu_text_page_position (page, start_p);
461  int end_p = start_p + search_len - 1;
462  miniexp_t end = djvu_text_page_position (page, end_p);
463  result = djvu_text_page_box (page, start, end);
464  g_assert (result);
465  page->results = g_list_prepend (page->results, result);
466  haystack = haystack + search_len;
467  }
468  page->results = g_list_reverse (page->results);
469 }
470 
471 
479 void
481  gboolean case_sensitive)
482 {
484  case_sensitive, FALSE);
485 }
486 
495 DjvuTextPage *
496 djvu_text_page_new (miniexp_t text)
497 {
498  DjvuTextPage *page;
499 
500  page = g_new0 (DjvuTextPage, 1);
501  page->links = g_array_new (FALSE, FALSE, sizeof (DjvuTextLink));
502  page->char_symbol = miniexp_symbol ("char");
503  page->word_symbol = miniexp_symbol ("word");
504  page->text_structure = text;
505  return page;
506 }
507 
514 void
516 {
517  g_free (page->text);
518  g_array_free (page->links, TRUE);
519  g_free (page);
520 }