Evince
Evince is a document viewer capable of displaying multiple and single page document formats like PDF and Postscript.
 All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
parse-rar.c
Go to the documentation of this file.
1 /* Copyright 2015 the unarr project authors (see AUTHORS file).
2  License: LGPLv3 */
3 
4 /* adapted from https://code.google.com/p/theunarchiver/source/browse/XADMaster/XADRARParser.m */
5 
6 #include "rar.h"
7 
8 static inline uint8_t uint8le(unsigned char *data) { return data[0]; }
9 static inline uint16_t uint16le(unsigned char *data) { return data[0] | data[1] << 8; }
10 static inline uint32_t uint32le(unsigned char *data) { return data[0] | data[1] << 8 | data[2] << 16 | data[3] << 24; }
11 
12 bool rar_parse_header(ar_archive *ar, struct rar_header *header)
13 {
14  unsigned char header_data[7];
15  size_t read = ar_read(ar->stream, header_data, sizeof(header_data));
16  if (read == 0) {
17  ar->at_eof = true;
18  return false;
19  }
20  if (read < sizeof(header_data))
21  return false;
22 
23  header->crc = uint16le(header_data + 0);
24  header->type = uint8le(header_data + 2);
25  header->flags = uint16le(header_data + 3);
26  header->size = uint16le(header_data + 5);
27 
28  header->datasize = 0;
29  if ((header->flags & LHD_LONG_BLOCK) || header->type == 0x74) {
30  unsigned char size_data[4];
31  if (!(header->flags & LHD_LONG_BLOCK))
32  log("File header without LHD_LONG_BLOCK set");
33  read += ar_read(ar->stream, size_data, sizeof(size_data));
34  if (read < sizeof(header_data) + sizeof(size_data))
35  return false;
36  header->datasize = uint32le(size_data);
37  }
38 
39  if (header->size < read) {
40  warn("Invalid header size %d", header->size);
41  return false;
42  }
43 
44  return true;
45 }
46 
48 {
49  unsigned char buffer[256];
50  uint16_t crc16, size;
51  uint32_t crc32;
52 
53  if (!ar_seek(ar->stream, ar->entry_offset, SEEK_SET))
54  return false;
55  if (ar_read(ar->stream, buffer, 7) != 7)
56  return false;
57 
58  crc16 = uint16le(buffer + 0);
59  size = uint16le(buffer + 5);
60  if (size < 7)
61  return false;
62  size -= 7;
63 
64  crc32 = ar_crc32(0, buffer + 2, 5);
65  while (size > 0) {
66  if (ar_read(ar->stream, buffer, smin(size, sizeof(buffer))) != smin(size, sizeof(buffer)))
67  return false;
68  crc32 = ar_crc32(crc32, buffer, smin(size, sizeof(buffer)));
69  size -= (uint16_t)smin(size, sizeof(buffer));
70  }
71  return (crc32 & 0xFFFF) == crc16;
72 }
73 
74 bool rar_parse_header_entry(ar_archive_rar *rar, struct rar_header *header, struct rar_entry *entry)
75 {
76  unsigned char data[21];
77  if (ar_read(rar->super.stream, data, sizeof(data)) != sizeof(data))
78  return false;
79 
80  entry->size = uint32le(data + 0);
81  entry->os = uint8le(data + 4);
82  entry->crc = uint32le(data + 5);
83  entry->dosdate = uint32le(data + 9);
84  entry->version = uint8le(data + 13);
85  entry->method = uint8le(data + 14);
86  entry->namelen = uint16le(data + 15);
87  entry->attrs = uint32le(data + 17);
88  if ((header->flags & LHD_LARGE)) {
89  unsigned char more_data[8];
90  if (ar_read(rar->super.stream, more_data, sizeof(more_data)) != sizeof(more_data))
91  return false;
92  header->datasize += (uint64_t)uint32le(more_data + 0);
93  entry->size += (uint64_t)uint32le(more_data + 4);
94  }
95  if (!ar_skip(rar->super.stream, entry->namelen))
96  return false;
97  if ((header->flags & LHD_SALT)) {
98  log("Skipping LHD_SALT");
99  ar_skip(rar->super.stream, 8);
100  }
101 
102  rar->entry.version = entry->version;
103  rar->entry.method = entry->method;
104  rar->entry.crc = entry->crc;
105  rar->entry.header_size = header->size;
106  rar->entry.solid = entry->version < 20 ? (rar->archive_flags & MHD_SOLID) : (header->flags & LHD_SOLID);
107  free(rar->entry.name);
108  rar->entry.name = NULL;
109 
110  return true;
111 }
112 
113 /* this seems to be what RAR considers "Unicode" */
114 static char *rar_conv_unicode_to_utf8(const char *data, uint16_t len)
115 {
116 #define Check(cond) if (!(cond)) { free(str); return NULL; } else ((void)0)
117 
118  uint8_t highbyte, flagbyte, flagbits, size, length, i;
119  const uint8_t *in = (uint8_t *)data + strlen(data) + 1;
120  const uint8_t *end_in = (uint8_t *)data + len;
121  char *str = calloc(len + 1, 3);
122  char *out = str;
123  char *end_out = str + len * 3;
124 
125  if (!str)
126  return NULL;
127  if (end_in - in <= 1) {
128  memcpy(str, data, len);
129  return str;
130  }
131 
132  highbyte = *in++;
133  flagbyte = 0;
134  flagbits = 0;
135  size = 0;
136 
137  while (in < end_in && out < end_out) {
138  if (flagbits == 0) {
139  flagbyte = *in++;
140  flagbits = 8;
141  }
142  flagbits -= 2;
143  switch ((flagbyte >> flagbits) & 3) {
144  case 0:
145  Check(in + 1 <= end_in);
146  out += ar_conv_rune_to_utf8(*in++, out, end_out - out);
147  size++;
148  break;
149  case 1:
150  Check(in + 1 <= end_in);
151  out += ar_conv_rune_to_utf8(((uint16_t)highbyte << 8) | *in++, out, end_out - out);
152  size++;
153  break;
154  case 2:
155  Check(in + 2 <= end_in);
156  out += ar_conv_rune_to_utf8(((uint16_t)*(in + 1) << 8) | *in, out, end_out - out);
157  in += 2;
158  size++;
159  break;
160  case 3:
161  Check(in + 1 <= end_in);
162  length = *in++;
163  if ((length & 0x80)) {
164  uint8_t correction = *in++;
165  for (i = 0; i < (length & 0x7F) + 2; i++) {
166  Check(size < len);
167  out += ar_conv_rune_to_utf8(((uint16_t)highbyte << 8) | (data[size] + (correction & 0xFF)), out, end_out - out);
168  size++;
169  }
170  }
171  else {
172  for (i = 0; i < (length & 0x7F) + 2; i++) {
173  Check(size < len);
174  out += ar_conv_rune_to_utf8(data[size], out, end_out - out);
175  size++;
176  }
177  }
178  break;
179  }
180  }
181 
182  return str;
183 
184 #undef Check
185 }
186 
187 const char *rar_get_name(ar_archive *ar)
188 {
189  ar_archive_rar *rar = (ar_archive_rar *)ar;
190  if (!rar->entry.name) {
191  unsigned char data[21];
192  uint16_t namelen;
193  char *name;
194 
195  struct rar_header header;
196  if (!ar_seek(ar->stream, ar->entry_offset, SEEK_SET))
197  return NULL;
198  if (!rar_parse_header(ar, &header))
199  return NULL;
200  if (ar_read(ar->stream, data, sizeof(data)) != sizeof(data))
201  return NULL;
202  if ((header.flags & LHD_LARGE) && !ar_skip(ar->stream, 8))
203  return NULL;
204 
205  namelen = uint16le(data + 15);
206  name = malloc(namelen + 1);
207  if (!name || ar_read(ar->stream, name, namelen) != namelen) {
208  free(name);
209  return NULL;
210  }
211  name[namelen] = '\0';
212 
213  if (!(header.flags & LHD_UNICODE)) {
214  rar->entry.name = ar_conv_dos_to_utf8(name);
215  free(name);
216  }
217  else if (namelen == strlen(name)) {
218  rar->entry.name = name;
219  }
220  else {
221  rar->entry.name = rar_conv_unicode_to_utf8(name, namelen);
222  free(name);
223  }
224  /* normalize path separators */
225  if (rar->entry.name) {
226  char *p = rar->entry.name;
227  while ((p = strchr(p, '\\')) != NULL) {
228  *p = '/';
229  }
230  }
231 
232  if (!ar_seek(ar->stream, ar->entry_offset + rar->entry.header_size, SEEK_SET))
233  warn("Couldn't seek back to the end of the entry header");
234  }
235  return rar->entry.name;
236 }