* src/vm/jit/inline/Makefile.am (AM_CPPFLAGS): Added OS include dir.
[cacao.git] / src / vm / zip.c
1 /* src/vm/zip.c - ZIP file handling for bootstrap classloader
2
3    Copyright (C) 1996-2005, 2006 R. Grafl, A. Krall, C. Kruegel,
4    C. Oates, R. Obermaisser, M. Platter, M. Probst, S. Ring,
5    E. Steiner, C. Thalinger, D. Thuernbeck, P. Tomsich, C. Ullrich,
6    J. Wenninger, Institut f. Computersprachen - TU Wien
7
8    This file is part of CACAO.
9
10    This program is free software; you can redistribute it and/or
11    modify it under the terms of the GNU General Public License as
12    published by the Free Software Foundation; either version 2, or (at
13    your option) any later version.
14
15    This program is distributed in the hope that it will be useful, but
16    WITHOUT ANY WARRANTY; without even the implied warranty of
17    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18    General Public License for more details.
19
20    You should have received a copy of the GNU General Public License
21    along with this program; if not, write to the Free Software
22    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
23    02110-1301, USA.
24
25    Contact: cacao@cacaojvm.org
26
27    Authors: Christian Thalinger
28
29    Changes:
30
31    $Id: zip.c 4489 2006-02-12 13:11:06Z twisti $
32
33 */
34
35
36 #include "config.h"
37
38 #include <assert.h>
39 #include <fcntl.h>
40 #include <unistd.h>
41 #include <zlib.h>
42 #include <sys/mman.h>
43
44 #include "vm/types.h"
45
46 #include "mm/memory.h"
47 #include "vm/global.h"
48 #include "vm/hashtable.h"
49 #include "vm/suck.h"
50 #include "vm/utf8.h"
51 #include "vm/zip.h"
52
53
54 /* start size for classes hashtable *******************************************/
55
56 #define HASHTABLE_CLASSES_SIZE    (1 << 10)
57
58
59 /* info taken from:
60    http://www.pkware.com/business_and_developers/developer/popups/appnote.txt
61 */
62
63 /* all signatures in the ZIP file have a length of 4 bytes ********************/
64
65 #define SIGNATURE_LENGTH    4
66
67
68 /* Local file header ***********************************************************
69
70    local file header signature     4 bytes  (0x04034b50)
71    version needed to extract       2 bytes
72    general purpose bit flag        2 bytes
73    compression method              2 bytes
74    last mod file time              2 bytes
75    last mod file date              2 bytes
76    crc-32                          4 bytes
77    compressed size                 4 bytes
78    uncompressed size               4 bytes
79    file name length                2 bytes
80    extra field length              2 bytes
81
82    file name (variable size)
83    extra field (variable size)
84
85 *******************************************************************************/
86
87 #define LFH_HEADER_SIZE              30
88
89 #define LFH_SIGNATURE                0x04034b50
90 #define LFH_FILE_NAME_LENGTH         26
91 #define LFH_EXTRA_FIELD_LENGTH       28
92
93 typedef struct lfh lfh;
94
95 struct lfh {
96         u2 compressionmethod;
97         u4 compressedsize;
98         u4 uncompressedsize;
99         u2 filenamelength;
100         u2 extrafieldlength;
101 };
102
103
104 /* Central directory structure *************************************************
105
106    [file header 1]
107    .
108    .
109    . 
110    [file header n]
111    [digital signature] 
112    
113    File header:
114    
115      central file header signature   4 bytes  (0x02014b50)
116      version made by                 2 bytes
117      version needed to extract       2 bytes
118      general purpose bit flag        2 bytes
119      compression method              2 bytes
120      last mod file time              2 bytes
121      last mod file date              2 bytes
122      crc-32                          4 bytes
123      compressed size                 4 bytes
124      uncompressed size               4 bytes
125      file name length                2 bytes
126      extra field length              2 bytes
127      file comment length             2 bytes
128      disk number start               2 bytes
129      internal file attributes        2 bytes
130      external file attributes        4 bytes
131      relative offset of local header 4 bytes
132    
133      file name (variable size)
134      extra field (variable size)
135      file comment (variable size)
136
137    Digital signature:
138    
139      header signature                4 bytes  (0x05054b50)
140      size of data                    2 bytes
141      signature data (variable size)
142
143 *******************************************************************************/
144
145 #define CDSFH_HEADER_SIZE            46
146
147 #define CDSFH_SIGNATURE              0x02014b50
148 #define CDSFH_COMPRESSION_METHOD     10
149 #define CDSFH_COMPRESSED_SIZE        20
150 #define CDSFH_UNCOMPRESSED_SIZE      24
151 #define CDSFH_FILE_NAME_LENGTH       28
152 #define CDSFH_EXTRA_FIELD_LENGTH     30
153 #define CDSFH_FILE_COMMENT_LENGTH    32
154 #define CDSFH_RELATIVE_OFFSET        42
155 #define CDSFH_FILENAME               46
156
157 typedef struct cdsfh cdsfh;
158
159 struct cdsfh {
160         u2 compressionmethod;
161         u4 compressedsize;
162         u4 uncompressedsize;
163         u2 filenamelength;
164         u2 extrafieldlength;
165         u2 filecommentlength;
166         u4 relativeoffset;
167 };
168
169
170 /* End of central directory record *********************************************
171
172    end of central dir signature    4 bytes  (0x06054b50)
173    number of this disk             2 bytes
174    number of the disk with the
175    start of the central directory  2 bytes
176    total number of entries in the
177    central directory on this disk  2 bytes
178    total number of entries in
179    the central directory           2 bytes
180    size of the central directory   4 bytes
181    offset of start of central
182    directory with respect to
183    the starting disk number        4 bytes
184    .ZIP file comment length        2 bytes
185    .ZIP file comment       (variable size)
186
187 *******************************************************************************/
188
189 #define EOCDR_SIGNATURE              0x06054b50
190 #define EOCDR_ENTRIES                10
191 #define EOCDR_OFFSET                 16
192
193 typedef struct eocdr eocdr;
194
195 struct eocdr {
196         u2 entries;
197         u4 offset;
198 };
199
200
201 /* zip_open ********************************************************************
202
203    XXX
204
205 *******************************************************************************/
206
207 hashtable *zip_open(char *path)
208 {
209         hashtable               *ht;
210         hashtable_zipfile_entry *htzfe;
211         int                      fd;
212         u1                       lfh_signature[SIGNATURE_LENGTH];
213         off_t                    len;
214         u1                      *filep;
215         s4                       i;
216         u1                      *p;
217         eocdr                    eocdr;
218         cdsfh                    cdsfh;
219         const char              *filename;
220         const char              *classext;
221         utf                     *u;
222         u4                       key;       /* hashkey computed from utf-text     */
223         u4                       slot;      /* slot in hashtable                  */
224
225         /* first of all, open the file */
226
227         if ((fd = open(path, O_RDONLY)) == -1)
228                 return NULL;
229
230         /* check for signature in first local file header */
231
232         if (read(fd, lfh_signature, SIGNATURE_LENGTH) != SIGNATURE_LENGTH)
233                 return NULL;
234
235         if (SUCK_LE_U4(lfh_signature) != LFH_SIGNATURE)
236                 return NULL;
237
238         /* get the file length */
239
240         if ((len = lseek(fd, 0, SEEK_END)) == -1)
241                 return NULL;
242
243         /* we better mmap the file */
244
245         filep = mmap(0, len, PROT_READ, MAP_PRIVATE, fd, 0);
246
247         /* some older compilers, like DEC OSF cc, don't like comparisons
248        on void* types */
249
250         if ((ptrint) filep == (ptrint) MAP_FAILED)
251                 return NULL;
252
253         /* find end of central directory record */
254
255         for (p = filep + len; p >= filep; p--)
256                 if (SUCK_LE_U4(p) == EOCDR_SIGNATURE)
257                         break;
258
259         /* get number of entries in central directory */
260
261         eocdr.entries = SUCK_LE_U2(p + EOCDR_ENTRIES);
262         eocdr.offset  = SUCK_LE_U4(p + EOCDR_OFFSET);
263
264         /* create hashtable for filenames */
265
266         ht = NEW(hashtable);
267
268         hashtable_create(ht, HASHTABLE_CLASSES_SIZE);
269
270         /* add all file entries into the hashtable */
271
272         for (i = 0, p = filep + eocdr.offset; i < eocdr.entries; i++) {
273                 /* check file header signature */
274
275                 if (SUCK_LE_U4(p) != CDSFH_SIGNATURE)
276                         return NULL;
277
278                 /* we found an entry */
279
280                 cdsfh.compressionmethod = SUCK_LE_U2(p + CDSFH_COMPRESSION_METHOD);
281                 cdsfh.compressedsize    = SUCK_LE_U4(p + CDSFH_COMPRESSED_SIZE);
282                 cdsfh.uncompressedsize  = SUCK_LE_U4(p + CDSFH_UNCOMPRESSED_SIZE);
283                 cdsfh.filenamelength    = SUCK_LE_U2(p + CDSFH_FILE_NAME_LENGTH);
284                 cdsfh.extrafieldlength  = SUCK_LE_U2(p + CDSFH_EXTRA_FIELD_LENGTH);
285                 cdsfh.filecommentlength = SUCK_LE_U2(p + CDSFH_FILE_COMMENT_LENGTH);
286                 cdsfh.relativeoffset    = SUCK_LE_U4(p + CDSFH_RELATIVE_OFFSET);
287
288                 /* create utf8 string of filename, strip .class from classes */
289
290                 filename = (const char *) (p + CDSFH_FILENAME);
291                 classext = filename + cdsfh.filenamelength - strlen(".class");
292
293                 /* skip directory entries */
294
295                 if (filename[cdsfh.filenamelength - 1] != '/') {
296                         if (strncmp(classext, ".class", strlen(".class")) == 0)
297                                 u = utf_new(filename, cdsfh.filenamelength - strlen(".class"));
298                         else
299                                 u = utf_new(filename, cdsfh.filenamelength);
300
301                         /* insert class into hashtable */
302
303                         htzfe = NEW(hashtable_zipfile_entry);
304
305                         htzfe->filename          = u;
306                         htzfe->compressionmethod = cdsfh.compressionmethod;
307                         htzfe->compressedsize    = cdsfh.compressedsize;
308                         htzfe->uncompressedsize  = cdsfh.uncompressedsize;
309                         htzfe->data              = filep + cdsfh.relativeoffset;
310
311                         /* get hashtable slot */
312
313                         key  = utf_hashkey(u->text, u->blength);
314                         slot = key & (ht->size - 1);
315
316                         /* insert into external chain */
317
318                         htzfe->hashlink = ht->ptr[slot];
319
320                         /* insert hashtable zipfile entry */
321
322                         ht->ptr[slot] = htzfe;
323                         ht->entries++;
324                 }
325
326                 /* move to next central directory structure file header */
327
328                 p = p +
329                         CDSFH_HEADER_SIZE +
330                         cdsfh.filenamelength +
331                         cdsfh.extrafieldlength +
332                         cdsfh.filecommentlength;
333         }
334
335         /* return pointer to hashtable */
336
337         return ht;
338 }
339
340
341 /* zip_find ********************************************************************
342
343    XXX
344
345 *******************************************************************************/
346
347 hashtable_zipfile_entry *zip_find(list_classpath_entry *lce, utf *u)
348 {
349         hashtable               *ht;
350         u4                       key;       /* hashkey computed from utf-text     */
351         u4                       slot;      /* slot in hashtable                  */
352         hashtable_zipfile_entry *htzfe;     /* hashtable element                  */
353
354         /* get classes hashtable from the classpath entry */
355
356         ht = lce->htclasses;
357
358         /* get the hashtable slot of the name searched */
359
360         key   = utf_hashkey(u->text, u->blength);
361         slot  = key & (ht->size - 1);
362         htzfe = ht->ptr[slot];
363
364         /* search external hash chain for utf-symbol */
365
366         while (htzfe) {
367                 if (htzfe->filename == u)
368                         return htzfe;
369
370                 /* next element in external chain */
371
372                 htzfe = htzfe->hashlink;
373         }
374
375         /* file not found in this archive */
376
377         return NULL;
378 }
379
380
381 /* zip_get ********************************************************************
382
383    XXX
384
385 *******************************************************************************/
386
387 classbuffer *zip_get(list_classpath_entry *lce, classinfo *c)
388 {
389         hashtable_zipfile_entry *htzfe;
390         lfh                      lfh;
391         u1                      *indata;
392         u1                      *outdata;
393         z_stream                 zs;
394         int                      err;
395         classbuffer             *cb;
396
397         /* try to find the class in the current archive */
398
399         if ((htzfe = zip_find(lce, c->name)) == NULL)
400                 return NULL;
401
402         /* read stuff from local file header */
403
404         lfh.filenamelength   = SUCK_LE_U2(htzfe->data + LFH_FILE_NAME_LENGTH);
405         lfh.extrafieldlength = SUCK_LE_U2(htzfe->data + LFH_EXTRA_FIELD_LENGTH);
406
407         indata = htzfe->data +
408                 LFH_HEADER_SIZE +
409                 lfh.filenamelength +
410                 lfh.extrafieldlength;
411
412         /* allocate buffer for uncompressed data */
413
414         outdata = MNEW(u1, htzfe->uncompressedsize);
415
416         /* how is the file stored? */
417
418         switch (htzfe->compressionmethod) {
419         case Z_DEFLATED:
420                 /* fill z_stream structure */
421
422                 zs.next_in   = indata;
423                 zs.avail_in  = htzfe->compressedsize;
424                 zs.next_out  = outdata;
425                 zs.avail_out = htzfe->uncompressedsize;
426
427                 zs.zalloc = Z_NULL;
428                 zs.zfree  = Z_NULL;
429                 zs.opaque = Z_NULL;
430
431                 /* initialize this inflate run */
432
433                 if (inflateInit2(&zs, -MAX_WBITS) != Z_OK)
434                         assert(0);
435
436                 /* decompress the file into buffer */
437
438                 err = inflate(&zs, Z_SYNC_FLUSH);
439
440                 if ((err != Z_STREAM_END) && (err != Z_OK))
441                         assert(0);
442
443                 /* finish this inflate run */
444
445                 if (inflateEnd(&zs) != Z_OK)
446                         assert(0);
447                 break;
448
449         case 0:
450                 /* uncompressed file, just copy the data */
451                 MCOPY(outdata, indata, u1, htzfe->compressedsize);
452                 break;
453
454         default:
455                 assert(0);
456         }
457         
458         /* allocate classbuffer */
459
460         cb = NEW(classbuffer);
461
462         cb->class = c;
463         cb->size  = htzfe->uncompressedsize;
464         cb->data  = outdata;
465         cb->pos   = outdata;
466         cb->path  = lce->path;
467
468         /* return the filled classbuffer structure */
469
470         return cb;
471 }
472
473
474 /*
475  * These are local overrides for various environment variables in Emacs.
476  * Please do not remove this and leave it at the end of the file, where
477  * Emacs will automagically detect them.
478  * ---------------------------------------------------------------------
479  * Local variables:
480  * mode: c
481  * indent-tabs-mode: t
482  * c-basic-offset: 4
483  * tab-width: 4
484  * End:
485  */