root/lang/perl/File-MMagic-XS/trunk/my_mod_mime_magic.c @ 10246

Revision 10246, 56.9 kB (checked in by daisuke, 5 years ago)

まだコンパイルできないぞ

Line 
1#ifndef __MY_MOD_MIME_MAGIC_C__
2#define __MY_MOD_MIME_MAGIC_C__
3
4/*
5 * NOTES:
6 *
7 *   check_function(SV *content_type, args);
8 *
9 *   struct magic_result {
10 *     SV *content_type;
11 *     SV *content_encoding;
12 *   }
13 */
14
15/* Licensed to the Apache Software Foundation (ASF) under one or more
16 * contributor license agreements.  See the NOTICE file distributed with
17 * this work for additional information regarding copyright ownership.
18 * The ASF licenses this file to You under the Apache License, Version 2.0
19 * (the "License"); you may not use this file except in compliance with
20 * the License.  You may obtain a copy of the License at
21 *
22 *     http://www.apache.org/licenses/LICENSE-2.0
23 *
24 * Unless required by applicable law or agreed to in writing, software
25 * distributed under the License is distributed on an "AS IS" BASIS,
26 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
27 * See the License for the specific language governing permissions and
28 * limitations under the License.
29 */
30
31/*
32 * mod_mime_magic: MIME type lookup via file magic numbers
33 * Copyright (c) 1996-1997 Cisco Systems, Inc.
34 *
35 * This software was submitted by Cisco Systems to the Apache Software Foundation in July
36 * 1997.  Future revisions and derivatives of this source code must
37 * acknowledge Cisco Systems as the original contributor of this module.
38 * All other licensing and usage conditions are those of the Apache Software Foundation.
39 *
40 * Some of this code is derived from the free version of the file command
41 * originally posted to comp.sources.unix.  Copyright info for that program
42 * is included below as required.
43 * ---------------------------------------------------------------------------
44 * - Copyright (c) Ian F. Darwin, 1987. Written by Ian F. Darwin.
45 *
46 * This software is not subject to any license of the American Telephone and
47 * Telegraph Company or of the Regents of the University of California.
48 *
49 * Permission is granted to anyone to use this software for any purpose on any
50 * computer system, and to alter it and redistribute it freely, subject to
51 * the following restrictions:
52 *
53 * 1. The author is not responsible for the consequences of use of this
54 * software, no matter how awful, even if they arise from flaws in it.
55 *
56 * 2. The origin of this software must not be misrepresented, either by
57 * explicit claim or by omission.  Since few users ever read sources, credits
58 * must appear in the documentation.
59 *
60 * 3. Altered versions must be plainly marked as such, and must not be
61 * misrepresented as being the original software.  Since few users ever read
62 * sources, credits must appear in the documentation.
63 *
64 * 4. This notice may not be removed or altered.
65 * -------------------------------------------------------------------------
66 *
67 * For compliance with Mr Darwin's terms: this has been very significantly
68 * modified from the free "file" command.
69 * - all-in-one file for compilation convenience when moving from one
70 *   version of Apache to the next.
71 * - Memory allocation is done through the Apache API's apr_pool_t structure.
72 * - All functions have had necessary Apache API request or server
73 *   structures passed to them where necessary to call other Apache API
74 *   routines.  (i.e. usually for logging, files, or memory allocation in
75 *   itself or a called function.)
76 * - struct magic has been converted from an array to a single-ended linked
77 *   list because it only grows one record at a time, it's only accessed
78 *   sequentially, and the Apache API has no equivalent of realloc().
79 * - Functions have been changed to get their parameters from the server
80 *   configuration instead of globals.  (It should be reentrant now but has
81 *   not been tested in a threaded environment.)
82 * - Places where it used to print results to stdout now saves them in a
83 *   list where they're used to set the MIME type in the Apache request
84 *   record.
85 * - Command-line flags have been removed since they will never be used here.
86 *
87 * Ian Kluft <ikluft@cisco.com>
88 * Engineering Information Framework
89 * Central Engineering
90 * Cisco Systems, Inc.
91 * San Jose, CA, USA
92 *
93 * Initial installation          July/August 1996
94 * Misc bug fixes                May 1997
95 * Submission to Apache Software Foundation    July 1997
96 *
97 */
98
99#include "file-mmagic.h"
100
101/* XXX - temporary fix. should look at it later */
102#define OK 1
103#define DECLINED -1
104
105/*
106 * file-function prototypes
107 */
108static int ascmagic(magic_result *, unsigned char *, STRLEN);
109static int is_tar(unsigned char *, STRLEN);
110static int softmagic(magic_result *, unsigned char *, STRLEN);
111static int tryit(magic_result *, unsigned char *, STRLEN, int);
112static int zmagic(request_rec *, unsigned char *, STRLEN);
113
114static int getvalue(server_rec *, struct magic *, char **);
115static int hextoint(int);
116static char *getstr(server_rec *, char *, char *, int, int *);
117static int parse(server_rec *, apr_pool_t *p, char *, int);
118
119static int match(request_rec *, unsigned char *, STRLEN);
120static int mget(request_rec *, union VALUETYPE *, unsigned char *,
121                struct magic *, STRLEN);
122static int mcheck(request_rec *, union VALUETYPE *, struct magic *);
123static void mprint(request_rec *, union VALUETYPE *, struct magic *);
124
125static int uncompress(request_rec *, int,
126                      unsigned char **, STRLEN);
127static long from_oct(int, char *);
128static int fsmagic(magic_result *, const char *fn);
129
130static magic_result *
131magic_result_create()
132{
133    magic_result *rv;
134    Newxz(rv, 1, magic_result);
135    return rv;
136}
137
138/*
139 * includes for ASCII substring recognition formerly "names.h" in file
140 * command
141 *
142 * Original notes: names and types used by ascmagic in file(1). These tokens are
143 * here because they can appear anywhere in the first HOWMANY bytes, while
144 * tokens in /etc/magic must appear at fixed offsets into the file. Don't
145 * make HOWMANY too high unless you have a very fast CPU.
146 */
147
148/* these types are used to index the apr_table_t 'types': keep em in sync! */
149/* HTML inserted in first because this is a web server module now */
150#define L_HTML    0   /* HTML */
151#define L_C       1   /* first and foremost on UNIX */
152#define L_FORT    2   /* the oldest one */
153#define L_MAKE    3   /* Makefiles */
154#define L_PLI     4   /* PL/1 */
155#define L_MACH    5   /* some kinda assembler */
156#define L_ENG     6   /* English */
157#define L_PAS     7   /* Pascal */
158#define L_MAIL    8   /* Electronic mail */
159#define L_NEWS    9   /* Usenet Netnews */
160
161static char *types[] =
162{
163    "text/html",             /* HTML */
164    "text/plain",            /* "c program text", */
165    "text/plain",            /* "fortran program text", */
166    "text/plain",            /* "make commands text", */
167    "text/plain",            /* "pl/1 program text", */
168    "text/plain",            /* "assembler program text", */
169    "text/plain",            /* "English text", */
170    "text/plain",            /* "pascal program text", */
171    "message/rfc822",        /* "mail text", */
172    "message/news",          /* "news text", */
173    "application/binary",    /* "can't happen error on names.h/types", */
174    0
175};
176
177static struct names {
178    char *name;
179    short type;
180} names[] = {
181
182    /* These must be sorted by eye for optimal hit rate */
183    /* Add to this list only after substantial meditation */
184    {
185        "<html>", L_HTML
186    },
187    {
188        "<HTML>", L_HTML
189    },
190    {
191        "<head>", L_HTML
192    },
193    {
194        "<HEAD>", L_HTML
195    },
196    {
197        "<title>", L_HTML
198    },
199    {
200        "<TITLE>", L_HTML
201    },
202    {
203        "<h1>", L_HTML
204    },
205    {
206        "<H1>", L_HTML
207    },
208    {
209        "<!--", L_HTML
210    },
211    {
212        "<!DOCTYPE HTML", L_HTML
213    },
214    {
215        "/*", L_C
216    },               /* must precede "The", "the", etc. */
217    {
218        "#include", L_C
219    },
220    {
221        "char", L_C
222    },
223    {
224        "The", L_ENG
225    },
226    {
227        "the", L_ENG
228    },
229    {
230        "double", L_C
231    },
232    {
233        "extern", L_C
234    },
235    {
236        "float", L_C
237    },
238    {
239        "real", L_C
240    },
241    {
242        "struct", L_C
243    },
244    {
245        "union", L_C
246    },
247    {
248        "CFLAGS", L_MAKE
249    },
250    {
251        "LDFLAGS", L_MAKE
252    },
253    {
254        "all:", L_MAKE
255    },
256    {
257        ".PRECIOUS", L_MAKE
258    },
259    /*
260     * Too many files of text have these words in them.  Find another way to
261     * recognize Fortrash.
262     */
263#ifdef    NOTDEF
264    {
265        "subroutine", L_FORT
266    },
267    {
268        "function", L_FORT
269    },
270    {
271        "block", L_FORT
272    },
273    {
274        "common", L_FORT
275    },
276    {
277        "dimension", L_FORT
278    },
279    {
280        "integer", L_FORT
281    },
282    {
283        "data", L_FORT
284    },
285#endif /* NOTDEF */
286    {
287        ".ascii", L_MACH
288    },
289    {
290        ".asciiz", L_MACH
291    },
292    {
293        ".byte", L_MACH
294    },
295    {
296        ".even", L_MACH
297    },
298    {
299        ".globl", L_MACH
300    },
301    {
302        "clr", L_MACH
303    },
304    {
305        "(input,", L_PAS
306    },
307    {
308        "dcl", L_PLI
309    },
310    {
311        "Received:", L_MAIL
312    },
313    {
314        ">From", L_MAIL
315    },
316    {
317        "Return-Path:", L_MAIL
318    },
319    {
320        "Cc:", L_MAIL
321    },
322    {
323        "Newsgroups:", L_NEWS
324    },
325    {
326        "Path:", L_NEWS
327    },
328    {
329        "Organization:", L_NEWS
330    },
331    {
332        NULL, 0
333    }
334};
335
336#define NNAMES ((sizeof(names)/sizeof(struct names)) - 1)
337
338/*
339 * Result String List (RSL)
340 *
341 * The file(1) command prints its output.  Instead, we store the various
342 * "printed" strings in a list (allocating memory as we go) and concatenate
343 * them at the end when we finally know how much space they'll need.
344 */
345
346typedef struct magic_rsl_s {
347    char *str;                  /* string, possibly a fragment */
348    struct magic_rsl_s *next;   /* pointer to next fragment */
349} magic_rsl;
350
351/*
352 * Apache module configuration structures
353 */
354
355/* per-server info */
356typedef struct {
357    const char *magicfile;    /* where magic be found */
358    struct magic *magic;      /* head of magic config list */
359    struct magic *last;
360} magic_server_config_rec;
361
362/* per-request info */
363typedef struct {
364    magic_rsl *head;          /* result string list */
365    magic_rsl *tail;
366    unsigned suf_recursion;   /* recursion depth in suffix check */
367} magic_req_rec;
368
369/*
370 * RSL (result string list) processing routines
371 *
372 * These collect strings that would have been printed in fragments by file(1)
373 * into a list of magic_rsl structures with the strings. When complete,
374 * they're concatenated together to become the MIME content and encoding
375 * types.
376 *
377 * return value conventions for these functions: functions which return int:
378 * failure = -1, other = result functions which return pointers: failure = 0,
379 * other = result
380 */
381
382/* allocate a per-request structure and put it in the request record */
383static magic_req_rec *magic_set_config(request_rec *r)
384{
385    magic_req_rec *req_dat = (magic_req_rec *) apr_palloc(r->pool,
386                                                      sizeof(magic_req_rec));
387
388    req_dat->head = req_dat->tail = (magic_rsl *) NULL;
389    ap_set_module_config(r->request_config, &mime_magic_module, req_dat);
390    return req_dat;
391}
392
393/* add a string to the result string list for this request */
394/* it is the responsibility of the caller to allocate "str" */
395static int magic_rsl_add(request_rec *r, char *str)
396{
397    magic_req_rec *req_dat = (magic_req_rec *)
398                    ap_get_module_config(r->request_config, &mime_magic_module);
399    magic_rsl *rsl;
400
401    /* make sure we have a list to put it in */
402    if (!req_dat) {
403        ap_log_rerror(APLOG_MARK, APLOG_ERR, APR_EINVAL, r,
404                    MODNAME ": request config should not be NULL");
405        if (!(req_dat = magic_set_config(r))) {
406            /* failure */
407            return -1;
408        }
409    }
410
411    /* allocate the list entry */
412    rsl = (magic_rsl *) apr_palloc(r->pool, sizeof(magic_rsl));
413
414    /* fill it */
415    rsl->str = str;
416    rsl->next = (magic_rsl *) NULL;
417
418    /* append to the list */
419    if (req_dat->head && req_dat->tail) {
420        req_dat->tail->next = rsl;
421        req_dat->tail = rsl;
422    }
423    else {
424        req_dat->head = req_dat->tail = rsl;
425    }
426
427    /* success */
428    return 0;
429}
430
431/* RSL hook for puts-type functions */
432static int magic_rsl_puts(request_rec *r, char *str)
433{
434    return magic_rsl_add(r, str);
435}
436
437/* RSL hook for printf-type functions */
438static int magic_rsl_printf(request_rec *r, char *str,...)
439{
440    va_list ap;
441
442    char buf[MAXMIMESTRING];
443
444    /* assemble the string into the buffer */
445    va_start(ap, str);
446    apr_vsnprintf(buf, sizeof(buf), str, ap);
447    va_end(ap);
448
449    /* add the buffer to the list */
450    return magic_rsl_add(r, apr_pstrdup(r->pool, buf));
451}
452
453/* RSL hook for putchar-type functions */
454static int magic_rsl_putchar(request_rec *r, char c)
455{
456    char str[2];
457
458    /* high overhead for 1 char - just hope they don't do this much */
459    str[0] = c;
460    str[1] = '\0';
461    return magic_rsl_add(r, str);
462}
463
464/* allocate and copy a contiguous string from a result string list */
465static char *rsl_strdup(request_rec *r, int start_frag, int start_pos, int len)
466{
467    char *result;       /* return value */
468    int cur_frag,       /* current fragment number/counter */
469        cur_pos,        /* current position within fragment */
470        res_pos;        /* position in result string */
471    magic_rsl *frag;    /* list-traversal pointer */
472    magic_req_rec *req_dat = (magic_req_rec *)
473                    ap_get_module_config(r->request_config, &mime_magic_module);
474
475    /* allocate the result string */
476    result = (char *) apr_palloc(r->pool, len + 1);
477
478    /* loop through and collect the string */
479    res_pos = 0;
480    for (frag = req_dat->head, cur_frag = 0;
481         frag->next;
482         frag = frag->next, cur_frag++) {
483        /* loop to the first fragment */
484        if (cur_frag < start_frag)
485            continue;
486
487        /* loop through and collect chars */
488        for (cur_pos = (cur_frag == start_frag) ? start_pos : 0;
489             frag->str[cur_pos];
490             cur_pos++) {
491            if (cur_frag >= start_frag
492                && cur_pos >= start_pos
493                && res_pos <= len) {
494                result[res_pos++] = frag->str[cur_pos];
495                if (res_pos > len) {
496                    break;
497                }
498            }
499        }
500    }
501
502    /* clean up and return */
503    result[res_pos] = 0;
504    return result;
505}
506
507/* states for the state-machine algorithm in magic_rsl_to_request() */
508typedef enum {
509    rsl_leading_space, rsl_type, rsl_subtype, rsl_separator, rsl_encoding
510} rsl_states;
511
512/* process the RSL and set the MIME info in the request record */
513static int magic_rsl_to_request(request_rec *r)
514{
515    int cur_frag,         /* current fragment number/counter */
516        cur_pos,          /* current position within fragment */
517        type_frag,        /* content type starting point: fragment */
518        type_pos,         /* content type starting point: position */
519        type_len,         /* content type length */
520        encoding_frag,    /* content encoding starting point: fragment */
521        encoding_pos,     /* content encoding starting point: position */
522        encoding_len;     /* content encoding length */
523
524    magic_rsl *frag;      /* list-traversal pointer */
525    rsl_states state;
526
527    magic_req_rec *req_dat = (magic_req_rec *)
528                    ap_get_module_config(r->request_config, &mime_magic_module);
529
530    /* check if we have a result */
531    if (!req_dat || !req_dat->head) {
532        /* empty - no match, we defer to other Apache modules */
533        return DECLINED;
534    }
535
536    /* start searching for the type and encoding */
537    state = rsl_leading_space;
538    type_frag = type_pos = type_len = 0;
539    encoding_frag = encoding_pos = encoding_len = 0;
540    for (frag = req_dat->head, cur_frag = 0;
541         frag && frag->next;
542         frag = frag->next, cur_frag++) {
543        /* loop through the characters in the fragment */
544        for (cur_pos = 0; frag->str[cur_pos]; cur_pos++) {
545            if (isSPACE(frag->str[cur_pos])) {
546                /* process whitespace actions for each state */
547                if (state == rsl_leading_space) {
548                    /* eat whitespace in this state */
549                    continue;
550                }
551                else if (state == rsl_type) {
552                    /* whitespace: type has no slash! */
553                    return DECLINED;
554                }
555                else if (state == rsl_subtype) {
556                    /* whitespace: end of MIME type */
557                    state++;
558                    continue;
559                }
560                else if (state == rsl_separator) {
561                    /* eat whitespace in this state */
562                    continue;
563                }
564                else if (state == rsl_encoding) {
565                    /* whitespace: end of MIME encoding */
566                    /* we're done */
567                    frag = req_dat->tail;
568                    break;
569                }
570                else {
571                    /* should not be possible */
572                    /* abandon malfunctioning module */
573                    ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r,
574                                MODNAME ": bad state %d (ws)", state);
575                    return DECLINED;
576                }
577                /* NOTREACHED */
578            }
579            else if (state == rsl_type &&
580                     frag->str[cur_pos] == '/') {
581                /* copy the char and go to rsl_subtype state */
582                type_len++;
583                state++;
584            }
585            else {
586                /* process non-space actions for each state */
587                if (state == rsl_leading_space) {
588                    /* non-space: begin MIME type */
589                    state++;
590                    type_frag = cur_frag;
591                    type_pos = cur_pos;
592                    type_len = 1;
593                    continue;
594                }
595                else if (state == rsl_type ||
596                         state == rsl_subtype) {
597                    /* non-space: adds to type */
598                    type_len++;
599                    continue;
600                }
601                else if (state == rsl_separator) {
602                    /* non-space: begin MIME encoding */
603                    state++;
604                    encoding_frag = cur_frag;
605                    encoding_pos = cur_pos;
606                    encoding_len = 1;
607                    continue;
608                }
609                else if (state == rsl_encoding) {
610                    /* non-space: adds to encoding */
611                    encoding_len++;
612                    continue;
613                }
614                else {
615                    /* should not be possible */
616                    /* abandon malfunctioning module */
617                    ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r,
618                                MODNAME ": bad state %d (ns)", state);
619                    return DECLINED;
620                }
621                /* NOTREACHED */
622            }
623            /* NOTREACHED */
624        }
625    }
626
627    /* if we ended prior to state rsl_subtype, we had incomplete info */
628    if (state != rsl_subtype && state != rsl_separator &&
629        state != rsl_encoding) {
630        /* defer to other modules */
631        return DECLINED;
632    }
633
634    /* save the info in the request record */
635    if (state == rsl_subtype || state == rsl_encoding ||
636        state == rsl_encoding) {
637        char *tmp;
638        tmp = rsl_strdup(r, type_frag, type_pos, type_len);
639        /* XXX: this could be done at config time I'm sure... but I'm
640         * confused by all this magic_rsl stuff. -djg */
641        ap_content_type_tolower(tmp);
642        ap_set_content_type(r, tmp);
643    }
644    if (state == rsl_encoding) {
645        char *tmp;
646        tmp = rsl_strdup(r, encoding_frag,
647                                         encoding_pos, encoding_len);
648        /* XXX: this could be done at config time I'm sure... but I'm
649         * confused by all this magic_rsl stuff. -djg */
650        toLOWER(tmp);
651        r->content_encoding = tmp;
652    }
653
654    /* detect memory allocation or other errors */
655    if (!r->content_type ||
656        (state == rsl_encoding && !r->content_encoding)) {
657        ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r,
658                      MODNAME ": unexpected state %d; could be caused by bad "
659                      "data in magic file",
660                      state);
661        return HTTP_INTERNAL_SERVER_ERROR;
662    }
663
664    /* success! */
665    return OK;
666}
667
668/*
669 * magic_process - process input file r        Apache API request record
670 * (formerly called "process" in file command, prefix added for clarity) Opens
671 * the file and reads a fixed-size buffer to begin processing the contents.
672 */
673static int magic_process(magic_result *rv)
674{
675    apr_file_t *fd = NULL;
676    unsigned char buf[HOWMANY + 1];  /* one extra for terminating '\0' */
677    STRLEN nbytes = 0;           /* number of bytes read from a datafile */
678    int result;
679
680    /*
681     * first try judging the file based on its filesystem status
682     */
683    switch ((result = fsmagic(ct, r->filename))) {
684    case DONE:
685        magic_rsl_putchar(r, '\n');
686        return OK;
687    case OK:
688        break;
689    default:
690        /* fatal error, bail out */
691        return result;
692    }
693
694    if (apr_file_open(&fd, r->filename, APR_READ, APR_OS_DEFAULT, r->pool) != APR_SUCCESS) {
695        /* We can't open it, but we were able to stat it. */
696        ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r,
697                    MODNAME ": can't read `%s'", r->filename);
698        /* let some other handler decide what the problem is */
699        return DECLINED;
700    }
701
702    /*
703     * try looking at the first HOWMANY bytes
704     */
705    nbytes = sizeof(buf) - 1;
706    if ((result = apr_file_read(fd, (char *) buf, &nbytes)) != APR_SUCCESS) {
707        ap_log_rerror(APLOG_MARK, APLOG_ERR, result, r,
708                    MODNAME ": read failed: %s", r->filename);
709        return HTTP_INTERNAL_SERVER_ERROR;
710    }
711
712    if (nbytes == 0) {
713        return DECLINED;
714    }
715    else {
716        buf[nbytes++] = '\0';  /* null-terminate it */
717        result = tryit(r, buf, nbytes, 1);
718        if (result != OK) {
719            return result;
720        }
721    }
722
723    (void) apr_file_close(fd);
724    (void) magic_rsl_putchar(r, '\n');
725
726    return OK;
727}
728
729
730static int
731tryit(unsigned char *buf, STRLEN nb, int checkzmagic, SV *ct)
732{
733    /*
734     * Try compression stuff
735     */
736    if (checkzmagic == 1) {
737        if (zmagic(r, buf, nb) == 1)
738            return OK;
739    }
740
741    /*
742     * try tests in /etc/magic (or surrogate magic file)
743     */
744    if (softmagic(buf, nb, ct) == 1)
745        return OK;
746
747    /*
748     * try known keywords, check for ascii-ness too.
749     */
750    if (ascmagic(buf, nb, ct) == 1)
751        return OK;
752
753    /*
754     * abandon hope, all ye who remain here
755     */
756    return DECLINED;
757}
758
759#define    EATAB {while (isSPACE(*l))  ++l;}
760
761/*
762 * apprentice
763 *    Given a filename (the magic file), and uses it to learn rules
764 */
765static int apprentice(const char *filename)
766{
767    apr_file_t *f = NULL;
768    apr_status_t result;
769    char line[BUFSIZ + 1];
770    int errs = 0;
771    int lineno;
772    magic_server_config_rec *conf = (magic_server_config_rec *)
773                    ap_get_module_config(s->module_config, &mime_magic_module);
774    const char *fname = ap_server_root_relative(p, conf->magicfile);
775
776    if (!fname) {
777        ap_log_error(APLOG_MARK, APLOG_ERR, APR_EBADPATH, s,
778                     MODNAME ": Invalid magic file path %s", conf->magicfile);
779        return -1;
780    }
781    if ((result = apr_file_open(&f, fname, APR_READ | APR_BUFFERED,
782                                APR_OS_DEFAULT, p)) != APR_SUCCESS) {
783        ap_log_error(APLOG_MARK, APLOG_ERR, result, s,
784                     MODNAME ": can't read magic file %s", fname);
785        return -1;
786    }
787
788    /* set up the magic list (empty) */
789    conf->magic = conf->last = NULL;
790
791    /* parse it */
792    for (lineno = 1; apr_file_gets(line, BUFSIZ, f) == APR_SUCCESS; lineno++) {
793        int ws_offset;
794        char *last = line + strlen(line) - 1; /* guaranteed that len >= 1 since an
795                                               * "empty" line contains a '\n'
796                                               */
797
798        /* delete newline and any other trailing whitespace */
799        while (last >= line
800               && isSPACE(*last)) {
801            *last = '\0';
802            --last;
803        }
804
805        /* skip leading whitespace */
806        ws_offset = 0;
807        while (line[ws_offset] && isSPACE(line[ws_offset])) {
808            ws_offset++;
809        }
810
811        /* skip blank lines */
812        if (line[ws_offset] == 0) {
813            continue;
814        }
815
816        /* comment, do not parse */
817        if (line[ws_offset] == '#')
818            continue;
819
820        /* parse it */
821        if (parse(s, p, line + ws_offset, lineno) != 0)
822            ++errs;
823    }
824
825    (void) apr_file_close(f);
826
827    return (errs ? -1 : 0);
828}
829
830/*
831 * extend the sign bit if the comparison is to be signed
832 */
833static unsigned long signextend(server_rec *s, struct magic *m, unsigned long v)
834{
835    if (!(m->flag & UNSIGNED))
836        switch (m->type) {
837            /*
838             * Do not remove the casts below.  They are vital. When later
839             * compared with the data, the sign extension must have happened.
840             */
841        case BYTE:
842            v = (char) v;
843            break;
844        case SHORT:
845        case BESHORT:
846        case LESHORT:
847            v = (short) v;
848            break;
849        case DATE:
850        case BEDATE:
851        case LEDATE:
852        case LONG:
853        case BELONG:
854        case LELONG:
855            v = (long) v;
856            break;
857        case STRING:
858            break;
859        default:
860            ap_log_error(APLOG_MARK, APLOG_ERR, 0, s,
861                        MODNAME ": can't happen: m->type=%d", m->type);
862            return -1;
863        }
864    return v;
865}
866
867/*
868 * parse one line from magic file, put into magic[index++] if valid
869 */
870static int parse(char *l, int lineno)
871{
872    magic_entry *m;
873    char *t, *s;
874
875    /* allocate magic structure entry */
876    Newxz(m, 1, magic_entry);
877
878    /* append to linked list */
879    m->next = NULL;
880    if (!conf->magic || !conf->last) {
881        conf->magic = conf->last = m;
882    }
883    else {
884        conf->last->next = m;
885        conf->last = m;
886    }
887
888    /* set values in magic structure */
889    m->flag = 0;
890    m->cont_level = 0;
891    m->lineno = lineno;
892
893    while (*l == '>') {
894        ++l;  /* step over */
895        m->cont_level++;
896    }
897
898    if (m->cont_level != 0 && *l == '(') {
899        ++l;  /* step over */
900        m->flag |= INDIR;
901    }
902
903    /* get offset, then skip over it */
904    m->offset = (int) strtol(l, &t, 0);
905    if (l == t) {
906        ap_log_error(APLOG_MARK, APLOG_ERR, 0, serv,
907                    MODNAME ": offset %s invalid", l);
908    }
909    l = t;
910
911    if (m->flag & INDIR) {
912        m->in.type = LONG;
913        m->in.offset = 0;
914        /*
915         * read [.lbs][+-]nnnnn)
916         */
917        if (*l == '.') {
918            switch (*++l) {
919            case 'l':
920                m->in.type = LONG;
921                break;
922            case 's':
923                m->in.type = SHORT;
924                break;
925            case 'b':
926                m->in.type = BYTE;
927                break;
928            default:
929                ap_log_error(APLOG_MARK, APLOG_ERR, 0, serv,
930                        MODNAME ": indirect offset type %c invalid", *l);
931                break;
932            }
933            l++;
934        }
935        s = l;
936        if (*l == '+' || *l == '-')
937            l++;
938        if (isDIGIT((unsigned char) *l)) {
939            m->in.offset = strtol(l, &t, 0);
940            if (*s == '-')
941                m->in.offset = -m->in.offset;
942        }
943        else
944            t = l;
945        if (*t++ != ')') {
946            ap_log_error(APLOG_MARK, APLOG_ERR, 0, serv,
947                        MODNAME ": missing ')' in indirect offset");
948        }
949        l = t;
950    }
951
952
953    while (isDIGIT((unsigned char) *l))
954        ++l;
955    EATAB;
956
957#define NBYTE           4
958#define NSHORT          5
959#define NLONG           4
960#define NSTRING         6
961#define NDATE           4
962#define NBESHORT        7
963#define NBELONG         6
964#define NBEDATE         6
965#define NLESHORT        7
966#define NLELONG         6
967#define NLEDATE         6
968
969    if (*l == 'u') {
970        ++l;
971        m->flag |= UNSIGNED;
972    }
973
974    /* get type, skip it */
975    if (strnNE(l, "byte", NBYTE) == 0) {
976        m->type = BYTE;
977        l += NBYTE;
978    }
979    else if (strnNE(l, "short", NSHORT) == 0) {
980        m->type = SHORT;
981        l += NSHORT;
982    }
983    else if (strnNE(l, "long", NLONG) == 0) {
984        m->type = LONG;
985        l += NLONG;
986    }
987    else if (strnNE(l, "string", NSTRING) == 0) {
988        m->type = STRING;
989        l += NSTRING;
990    }
991    else if (strnNE(l, "date", NDATE) == 0) {
992        m->type = DATE;
993        l += NDATE;
994    }
995    else if (strnNE(l, "beshort", NBESHORT) == 0) {
996        m->type = BESHORT;
997        l += NBESHORT;
998    }
999    else if (strnNE(l, "belong", NBELONG) == 0) {
1000        m->type = BELONG;
1001        l += NBELONG;
1002    }
1003    else if (strnNE(l, "bedate", NBEDATE) == 0) {
1004        m->type = BEDATE;
1005        l += NBEDATE;
1006    }
1007    else if (strnNE(l, "leshort", NLESHORT) == 0) {
1008        m->type = LESHORT;
1009        l += NLESHORT;
1010    }
1011    else if (strnNE(l, "lelong", NLELONG) == 0) {
1012        m->type = LELONG;
1013        l += NLELONG;
1014    }
1015    else if (strnNE(l, "ledate", NLEDATE) == 0) {
1016        m->type = LEDATE;
1017        l += NLEDATE;
1018    }
1019    else {
1020        ap_log_error(APLOG_MARK, APLOG_ERR, 0, serv,
1021                    MODNAME ": type %s invalid", l);
1022        return -1;
1023    }
1024    /* New-style anding: "0 byte&0x80 =0x80 dynamically linked" */
1025    if (*l == '&') {
1026        ++l;
1027        m->mask = signextend(serv, m, strtol(l, &l, 0));
1028    }
1029    else
1030        m->mask = ~0L;
1031    EATAB;
1032
1033    switch (*l) {
1034    case '>':
1035    case '<':
1036        /* Old-style anding: "0 byte &0x80 dynamically linked" */
1037    case '&':
1038    case '^':
1039    case '=':
1040        m->reln = *l;
1041        ++l;
1042        break;
1043    case '!':
1044        if (m->type != STRING) {
1045            m->reln = *l;
1046            ++l;
1047            break;
1048        }
1049        /* FALL THROUGH */
1050    default:
1051        if (*l == 'x' && isSPACE(l[1])) {
1052            m->reln = *l;
1053            ++l;
1054            goto GetDesc;  /* Bill The Cat */
1055        }
1056        m->reln = '=';
1057        break;
1058    }
1059    EATAB;
1060
1061    if (getvalue(serv, m, &l))
1062        return -1;
1063    /*
1064     * now get last part - the description
1065     */
1066  GetDesc:
1067    EATAB;
1068    if (l[0] == '\b') {
1069        ++l;
1070        m->nospflag = 1;
1071    }
1072    else if ((l[0] == '\\') && (l[1] == 'b')) {
1073        ++l;
1074        ++l;
1075        m->nospflag = 1;
1076    }
1077    else
1078        m->nospflag = 0;
1079    Memcpy(m->desc, l, sizeof(m->desc) - 1);
1080    m->desc[sizeof(m->desc) - 1] = '\0';
1081
1082    return 0;
1083}
1084
1085/*
1086 * Read a numeric value from a pointer, into the value union of a magic
1087 * pointer, according to the magic type.  Update the string pointer to point
1088 * just after the number read.  Return 0 for success, non-zero for failure.
1089 */
1090static int getvalue(server_rec *s, struct magic *m, char **p)
1091{
1092    int slen;
1093
1094    if (m->type == STRING) {
1095        *p = getstr(s, *p, m->value.s, sizeof(m->value.s), &slen);
1096        m->vallen = slen;
1097    }
1098    else if (m->reln != 'x')
1099        m->value.l = signextend(s, m, strtol(*p, p, 0));
1100    return 0;
1101}
1102
1103/*
1104 * Convert a string containing C character escapes.  Stop at an unescaped
1105 * space or tab. Copy the converted version to "p", returning its length in
1106 * *slen. Return updated scan pointer as function result.
1107 */
1108static char *getstr(server_rec *serv, register char *s, register char *p,
1109                    int plen, int *slen)
1110{
1111    char *origs = s, *origp = p;
1112    char *pmax = p + plen - 1;
1113    register int c;
1114    register int val;
1115
1116    while ((c = *s++) != '\0') {
1117        if (isSPACE(c))
1118            break;
1119        if (p >= pmax) {
1120            ap_log_error(APLOG_MARK, APLOG_ERR, 0, serv,
1121                        MODNAME ": string too long: %s", origs);
1122            break;
1123        }
1124        if (c == '\\') {
1125            switch (c = *s++) {
1126
1127            case '\0':
1128                goto out;
1129
1130            default:
1131                *p++ = (char) c;
1132                break;
1133
1134            case 'n':
1135                *p++ = '\n';
1136                break;
1137
1138            case 'r':
1139                *p++ = '\r';
1140                break;
1141
1142            case 'b':
1143                *p++ = '\b';
1144                break;
1145
1146            case 't':
1147                *p++ = '\t';
1148                break;
1149
1150            case 'f':
1151                *p++ = '\f';
1152                break;
1153
1154            case 'v':
1155                *p++ = '\v';
1156                break;
1157
1158                /* \ and up to 3 octal digits */
1159            case '0':
1160            case '1':
1161            case '2':
1162            case '3':
1163            case '4':
1164            case '5':
1165            case '6':
1166            case '7':
1167                val = c - '0';
1168                c = *s++;  /* try for 2 */
1169                if (c >= '0' && c <= '7') {
1170                    val = (val << 3) | (c - '0');
1171                    c = *s++;  /* try for 3 */
1172                    if (c >= '0' && c <= '7')
1173                        val = (val << 3) | (c - '0');
1174                    else
1175                        --s;
1176                }
1177                else
1178                    --s;
1179                *p++ = (char) val;
1180                break;
1181
1182                /* \x and up to 3 hex digits */
1183            case 'x':
1184                val = 'x';            /* Default if no digits */
1185                c = hextoint(*s++);   /* Get next char */
1186                if (c >= 0) {
1187                    val = c;
1188                    c = hextoint(*s++);
1189                    if (c >= 0) {
1190                        val = (val << 4) + c;
1191                        c = hextoint(*s++);
1192                        if (c >= 0) {
1193                            val = (val << 4) + c;
1194                        }
1195                        else
1196                            --s;
1197                    }
1198                    else
1199                        --s;
1200                }
1201                else
1202                    --s;
1203                *p++ = (char) val;
1204                break;
1205            }
1206        }
1207        else
1208            *p++ = (char) c;
1209    }
1210  out:
1211    *p = '\0';
1212    *slen = p - origp;
1213    return s;
1214}
1215
1216
1217/* Single hex char to int; -1 if not a hex char. */
1218static int hextoint(int c)
1219{
1220    if (isDIGIT(c))
1221        return c - '0';
1222    if ((c >= 'a') && (c <= 'f'))
1223        return c + 10 - 'a';
1224    if ((c >= 'A') && (c <= 'F'))
1225        return c + 10 - 'A';
1226    return -1;
1227}
1228
1229
1230/*
1231 * return DONE to indicate it's been handled
1232 * return OK to indicate it's a regular file still needing handling
1233 * other returns indicate a failure of some sort
1234 */
1235static int fsmagic(SV *ct, const char *fn)
1236{
1237    Stat_t st;
1238    Stat(fn, st);
1239
1240    if (S_ISREG(st.st_mode)) {
1241        if (st.st_size == 0) {
1242            sv_setpv(ct, MIME_TEXT_UNKNOWN);
1243            return DONE;
1244        }
1245        return -1;
1246    }
1247
1248    if (S_ISDIR(st.st_mode)) {
1249        sv_setpv(ct, DIR_MAGIC_TYPE);
1250        return DONE;
1251    } else if (S_ISCHR(st.st_mode)) {
1252        sv_setpv(ct, MIME_BINARY_UNKNOWN);
1253        return DONE;
1254    } else if (S_ISBLK(st.st_mode)) {
1255        sv_setpv(ct, MIME_BINARY_UNKNOWN);
1256        return DONE;
1257/*    } else if (S_ISPIPE) {
1258        sv_setpv(ct, MIME_BINARY_UNKNOWN);
1259        return DONE;
1260*/
1261    } else if (S_ISLNK(st.st_mode)) {
1262        /* croak ? */
1263        warn("broken symlink (%s)", fn);
1264        return 0;
1265    } else if (S_ISSOCK(st.st_mode)) {
1266        sv_setpv(ct, MIME_BINARY_UNKNOWN);
1267        return DONE;
1268    } else if
1269    case APR_REG:
1270        break;
1271    default:
1272        ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r,
1273                      MODNAME ": invalid file type %d.", r->finfo.filetype);
1274        return HTTP_INTERNAL_SERVER_ERROR;
1275    }
1276}
1277
1278/*
1279 * softmagic - lookup one file in database (already read from /etc/magic by
1280 * apprentice.c). Passed the name and FILE * of one file to be typed.
1281 */
1282                /* ARGSUSED1 *//* nbytes passed for regularity, maybe need later */
1283static int softmagic(SV *ct, unsigned char *buf, STRLEN nbytes)
1284{
1285    if (match(r, buf, nbytes))
1286        return 1;
1287
1288    return 0;
1289}
1290
1291/*
1292 * Go through the whole list, stopping if you find a match.  Process all the
1293 * continuations of that match before returning.
1294 *
1295 * We support multi-level continuations:
1296 *
1297 * At any time when processing a successful top-level match, there is a current
1298 * continuation level; it represents the level of the last successfully
1299 * matched continuation.
1300 *
1301 * Continuations above that level are skipped as, if we see one, it means that
1302 * the continuation that controls them - i.e, the lower-level continuation
1303 * preceding them - failed to match.
1304 *
1305 * Continuations below that level are processed as, if we see one, it means
1306 * we've finished processing or skipping higher-level continuations under the
1307 * control of a successful or unsuccessful lower-level continuation, and are
1308 * now seeing the next lower-level continuation and should process it.  The
1309 * current continuation level reverts to the level of the one we're seeing.
1310 *
1311 * Continuations at the current level are processed as, if we see one, there's
1312 * no lower-level continuation that may have failed.
1313 *
1314 * If a continuation matches, we bump the current continuation level so that
1315 * higher-level continuations are processed.
1316 */
1317static int match(request_rec *r, unsigned char *s, STRLEN nbytes)
1318{
1319    int cont_level = 0;
1320    int need_separator = 0;
1321    union VALUETYPE p;
1322    magic_server_config_rec *conf = (magic_server_config_rec *)
1323                ap_get_module_config(r->server->module_config, &mime_magic_module);
1324    struct magic *m;
1325
1326    for (m = conf->magic; m; m = m->next) {
1327        /* check if main entry matches */
1328        if (!mget(r, &p, s, m, nbytes) ||
1329            !mcheck(r, &p, m)) {
1330            struct magic *m_cont;
1331
1332            /*
1333             * main entry didn't match, flush its continuations
1334             */
1335            if (!m->next || (m->next->cont_level == 0)) {
1336                continue;
1337            }
1338
1339            m_cont = m->next;
1340            while (m_cont && (m_cont->cont_level != 0)) {
1341                /*
1342                 * this trick allows us to keep *m in sync when the continue
1343                 * advances the pointer
1344                 */
1345                m = m_cont;
1346                m_cont = m_cont->next;
1347            }
1348            continue;
1349        }
1350
1351        /* if we get here, the main entry rule was a match */
1352        /* this will be the last run through the loop */
1353
1354        /* print the match */
1355        mprint(r, &p, m);
1356
1357        /*
1358         * If we printed something, we'll need to print a blank before we
1359         * print something else.
1360         */
1361        if (m->desc[0])
1362            need_separator = 1;
1363        /* and any continuations that match */
1364        cont_level++;
1365        /*
1366         * while (m && m->next && m->next->cont_level != 0 && ( m = m->next
1367         * ))
1368         */
1369        m = m->next;
1370        while (m && (m->cont_level != 0)) {
1371            if (cont_level >= m->cont_level) {
1372                if (cont_level > m->cont_level) {
1373                    /*
1374                     * We're at the end of the level "cont_level"
1375                     * continuations.
1376                     */
1377                    cont_level = m->cont_level;
1378                }
1379                if (mget(r, &p, s, m, nbytes) &&
1380                    mcheck(r, &p, m)) {
1381                    /*
1382                     * This continuation matched. Print its message, with a
1383                     * blank before it if the previous item printed and this
1384                     * item isn't empty.
1385                     */
1386                    /* space if previous printed */
1387                    if (need_separator
1388                        && (m->nospflag == 0)
1389                        && (m->desc[0] != '\0')
1390                        ) {
1391                        (void) magic_rsl_putchar(r, ' ');
1392                        need_separator = 0;
1393                    }
1394                    mprint(r, &p, m);
1395                    if (m->desc[0])
1396                        need_separator = 1;
1397
1398                    /*
1399                     * If we see any continuations at a higher level, process
1400                     * them.
1401                     */
1402                    cont_level++;
1403                }
1404            }
1405
1406            /* move to next continuation record */
1407            m = m->next;
1408        }
1409        return 1;  /* all through */
1410    }
1411    return 0;  /* no match at all */
1412}
1413
1414static void mprint(request_rec *r, union VALUETYPE *p, struct magic *m)
1415{
1416    char *pp;
1417    unsigned long v;
1418    char time_str[APR_CTIME_LEN];
1419
1420    switch (m->type) {
1421    case BYTE:
1422        v = p->b;
1423        break;
1424
1425    case SHORT:
1426    case BESHORT:
1427    case LESHORT:
1428        v = p->h;
1429        break;
1430
1431    case LONG:
1432    case BELONG:
1433    case LELONG:
1434        v = p->l;
1435        break;
1436
1437    case STRING:
1438        if (m->reln == '=') {
1439            (void) magic_rsl_printf(r, m->desc, m->value.s);
1440        }
1441        else {
1442            (void) magic_rsl_printf(r, m->desc, p->s);
1443        }
1444        return;
1445
1446    case DATE:
1447    case BEDATE:
1448    case LEDATE:
1449        apr_ctime(time_str, apr_time_from_sec(*(time_t *)&p->l));
1450        pp = time_str;
1451        (void) magic_rsl_printf(r, m->desc, pp);
1452        return;
1453    default:
1454        ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r,
1455                    MODNAME ": invalid m->type (%d) in mprint().",
1456                    m->type);
1457        return;
1458    }
1459
1460    v = signextend(r->server, m, v) & m->mask;
1461    (void) magic_rsl_printf(r, m->desc, (unsigned long) v);
1462}
1463
1464/*
1465 * Convert the byte order of the data we are looking at
1466 */
1467static int mconvert(request_rec *r, union VALUETYPE *p, struct magic *m)
1468{
1469    char *rt;
1470
1471    switch (m->type) {
1472    case BYTE:
1473    case SHORT:
1474    case LONG:
1475    case DATE:
1476        return 1;
1477    case STRING:
1478        /* Null terminate and eat the return */
1479        p->s[sizeof(p->s) - 1] = '\0';
1480        if ((rt = strchr(p->s, '\n')) != NULL)
1481            *rt = '\0';
1482        return 1;
1483    case BESHORT:
1484        p->h = (short) ((p->hs[0] << 8) | (p->hs[1]));
1485        return 1;
1486    case BELONG:
1487    case BEDATE:
1488        p->l = (long)
1489            ((p->hl[0] << 24) | (p->hl[1] << 16) | (p->hl[2] << 8) | (p->hl[3]));
1490        return 1;
1491    case LESHORT:
1492        p->h = (short) ((p->hs[1] << 8) | (p->hs[0]));
1493        return 1;
1494    case LELONG:
1495    case LEDATE:
1496        p->l = (long)
1497            ((p->hl[3] << 24) | (p->hl[2] << 16) | (p->hl[1] << 8) | (p->hl[0]));
1498        return 1;
1499    default:
1500        ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r,
1501                    MODNAME ": invalid type %d in mconvert().", m->type);
1502        return 0;
1503    }
1504}
1505
1506
1507static int mget(request_rec *r, union VALUETYPE *p, unsigned char *s,
1508                struct magic *m, STRLEN nbytes)
1509{
1510    long offset = m->offset;
1511
1512    if (offset + sizeof(union VALUETYPE) > nbytes)
1513                  return 0;
1514
1515    memcpy(p, s + offset, sizeof(union VALUETYPE));
1516
1517    if (!mconvert(r, p, m))
1518        return 0;
1519
1520    if (m->flag & INDIR) {
1521
1522        switch (m->in.type) {
1523        case BYTE:
1524            offset = p->b + m->in.offset;
1525            break;
1526        case SHORT:
1527            offset = p->h + m->in.offset;
1528            break;
1529        case LONG:
1530            offset = p->l + m->in.offset;
1531            break;
1532        }
1533
1534        if (offset + sizeof(union VALUETYPE) > nbytes)
1535                      return 0;
1536
1537        memcpy(p, s + offset, sizeof(union VALUETYPE));
1538
1539        if (!mconvert(r, p, m))
1540            return 0;
1541    }
1542    return 1;
1543}
1544
1545static int mcheck(request_rec *r, union VALUETYPE *p, struct magic *m)
1546{
1547    register unsigned long l = m->value.l;
1548    register unsigned long v;
1549    int matched;
1550
1551    if ((m->value.s[0] == 'x') && (m->value.s[1] == '\0')) {
1552        ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r,
1553                    MODNAME ": BOINK");
1554        return 1;
1555    }
1556
1557    switch (m->type) {
1558    case BYTE:
1559        v = p->b;
1560        break;
1561
1562    case SHORT:
1563    case BESHORT:
1564    case LESHORT:
1565        v = p->h;
1566        break;
1567
1568    case LONG:
1569    case BELONG:
1570    case LELONG:
1571    case DATE:
1572    case BEDATE:
1573    case LEDATE:
1574        v = p->l;
1575        break;
1576
1577    case STRING:
1578        l = 0;
1579        /*
1580         * What we want here is: v = strnNE(m->value.s, p->s, m->vallen);
1581         * but ignoring any nulls.  bcmp doesn't give -/+/0 and isn't
1582         * universally available anyway.
1583         */
1584        v = 0;
1585        {
1586            register unsigned char *a = (unsigned char *) m->value.s;
1587            register unsigned char *b = (unsigned char *) p->s;
1588            register int len = m->vallen;
1589
1590            while (--len >= 0)
1591                if ((v = *b++ - *a++) != 0)
1592                    break;
1593        }
1594        break;
1595    default:
1596        /*  bogosity, pretend that it just wasn't a match */
1597        ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r,
1598                    MODNAME ": invalid type %d in mcheck().", m->type);
1599        return 0;
1600    }
1601
1602    v = signextend(r->server, m, v) & m->mask;
1603
1604    switch (m->reln) {
1605    case 'x':
1606        matched = 1;
1607        break;
1608
1609    case '!':
1610        matched = v != l;
1611        break;
1612
1613    case '=':
1614        matched = v == l;
1615        break;
1616
1617    case '>':
1618        if (m->flag & UNSIGNED) {
1619            matched = v > l;
1620        }
1621        else {
1622            matched = (long) v > (long) l;
1623        }
1624        break;
1625
1626    case '<':
1627        if (m->flag & UNSIGNED) {
1628            matched = v < l;
1629        }
1630        else {
1631            matched = (long) v < (long) l;
1632        }
1633        break;
1634
1635    case '&':
1636        matched = (v & l) == l;
1637        break;
1638
1639    case '^':
1640        matched = (v & l) != l;
1641        break;
1642
1643    default:
1644        /* bogosity, pretend it didn't match */
1645        matched = 0;
1646        ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r,
1647                    MODNAME ": mcheck: can't happen: invalid relation %d.",
1648                    m->reln);
1649        break;
1650    }
1651
1652    return matched;
1653}
1654
1655static int
1656ascmagic(SV *ct, unsigned char *buf, STRLEN nbytes)
1657{
1658    int has_escapes = 0;
1659    unsigned char *s;
1660    char nbuf[HOWMANY + 1];  /* one extra for terminating '\0' */
1661    char *token;
1662    register struct names *p;
1663    int small_nbytes;
1664    char *strtok_state;
1665
1666    /* these are easy, do them first */
1667
1668    /*
1669     * for troff, look for . + letter + letter or .\"; this must be done to
1670     * disambiguate tar archives' ./file and other trash from real troff
1671     * input.
1672     */
1673    if (*buf == '.') {
1674        unsigned char *tp = buf + 1;
1675
1676        while (isSPACE(*tp))
1677            ++tp;  /* skip leading whitespace */
1678        if ((isALPHA(*tp) || *tp == '\\') &&
1679             (isALPHA(*(tp + 1)) || *tp == '"')) {
1680            sv_setpv(ct, "application/x-troff");
1681            return 1;
1682        }
1683    }
1684    if ((*buf == 'c' || *buf == 'C') && isSPACE(*(buf + 1))) {
1685        /* Fortran */
1686        sv_setpv(ct, "text/plain");
1687        return 1;
1688    }
1689
1690    /* look for tokens from names.h - this is expensive!, so we'll limit
1691     * ourselves to only SMALL_HOWMANY bytes */
1692    small_nbytes = (nbytes > SMALL_HOWMANY) ? SMALL_HOWMANY : nbytes;
1693    /* make a copy of the buffer here because apr_strtok() will destroy it */
1694    s = (unsigned char *) memcpy(nbuf, buf, small_nbytes);
1695    s[small_nbytes] = '\0';
1696    has_escapes = (memchr(s, '\033', small_nbytes) != NULL);
1697    while ((token = apr_strtok((char *) s, " \t\n\r\f", &strtok_state)) != NULL) {
1698        s = NULL;  /* make apr_strtok() keep on tokin' */
1699        for (p = names; p < names + NNAMES; p++) {
1700            if (strEQ(p->name, token)) {
1701                sv_setpv(ct, types[p->type]);
1702                if (has_escapes)
1703                    sv_catpv(ct, " (with escape sequences)");
1704                return 1;
1705            }
1706        }
1707    }
1708
1709    switch (is_tar(buf, nbytes)) {
1710    case 1:
1711        /* V7 tar archive */
1712        sv_setpv(ct, "application/x-tar");
1713        return 1;
1714    case 2:
1715        /* POSIX tar archive */
1716        sv_setpv(ct, "application/x-tar");
1717        return 1;
1718    }
1719
1720    /* all else fails, but it is ascii... */
1721    return 0;
1722}
1723
1724
1725/*
1726 * compress routines: zmagic() - returns 0 if not recognized, uncompresses
1727 * and prints information if recognized uncompress(s, method, old, n, newch)
1728 * - uncompress old into new, using method, return sizeof new
1729 */
1730
1731static struct {
1732    char *magic;
1733    STRLEN maglen;
1734    char *argv[3];
1735    int silent;
1736    char *encoding;  /* MUST be lowercase */
1737} compr[] = {
1738
1739    /* we use gzip here rather than uncompress because we have to pass
1740     * it a full filename -- and uncompress only considers filenames
1741     * ending with .Z
1742     */
1743    {
1744        "\037\235", 2, {
1745            "gzip", "-dcq", NULL
1746        }, 0, "x-compress"
1747    },
1748    {
1749        "\037\213", 2, {
1750            "gzip", "-dcq", NULL
1751        }, 1, "x-gzip"
1752    },
1753    /*
1754     * XXX pcat does not work, cause I don't know how to make it read stdin,
1755     * so we use gzip
1756     */
1757    {
1758        "\037\036", 2, {
1759            "gzip", "-dcq", NULL
1760        }, 0, "x-gzip"
1761    },
1762};
1763
1764static int ncompr = sizeof(compr) / sizeof(compr[0]);
1765
1766static int zmagic(request_rec *r, unsigned char *buf, STRLEN nbytes)
1767{
1768    unsigned char *newbuf;
1769    int newsize;
1770    int i;
1771
1772    for (i = 0; i < ncompr; i++) {
1773        if (nbytes < compr[i].maglen)
1774            continue;
1775        if (memcmp(buf, compr[i].magic, compr[i].maglen) == 0)
1776            break;
1777    }
1778
1779    if (i == ncompr)
1780        return 0;
1781
1782    if ((newsize = uncompress(r, i, &newbuf, nbytes)) > 0) {
1783        if (tryit(r, newbuf, newsize, 0) != OK) {
1784            return 0;
1785        }
1786
1787        /* set encoding type in the request record */
1788        r->content_encoding = compr[i].encoding;
1789    }
1790    return 1;
1791}
1792
1793
1794struct uncompress_parms {
1795    request_rec *r;
1796    int method;
1797};
1798
1799static int create_uncompress_child(struct uncompress_parms *parm, apr_pool_t *cntxt,
1800                                   apr_file_t **pipe_in)
1801{
1802    int rc = 1;
1803    const char *new_argv[4];
1804    const char *const *env;
1805    request_rec *r = parm->r;
1806    apr_pool_t *child_context = cntxt;
1807    apr_procattr_t *procattr;
1808    apr_proc_t *procnew;
1809
1810    /* XXX missing 1.3 logic:
1811     *
1812     * what happens when !compr[parm->method].silent?
1813     * Should we create the err pipe, read it, and copy to the log?
1814     */
1815
1816    env = (const char *const *)ap_create_environment(child_context, r->subprocess_env);
1817
1818    if ((apr_procattr_create(&procattr, child_context) != APR_SUCCESS) ||
1819        (apr_procattr_io_set(procattr, APR_FULL_BLOCK,
1820                           APR_FULL_BLOCK, APR_NO_PIPE)   != APR_SUCCESS) ||
1821        (apr_procattr_dir_set(procattr, r->filename)        != APR_SUCCESS) ||
1822        (apr_procattr_cmdtype_set(procattr, APR_PROGRAM)    != APR_SUCCESS)) {
1823        /* Something bad happened, tell the world. */
1824        ap_log_rerror(APLOG_MARK, APLOG_ERR, APR_ENOPROC, r,
1825               "couldn't setup child process: %s", r->filename);
1826    }
1827    else {
1828        new_argv[0] = compr[parm->method].argv[0];
1829        new_argv[1] = compr[parm->method].argv[1];
1830        new_argv[2] = r->filename;
1831        new_argv[3] = NULL;
1832
1833        procnew = apr_pcalloc(child_context, sizeof(*procnew));
1834        rc = apr_proc_create(procnew, compr[parm->method].argv[0],
1835                               new_argv, env, procattr, child_context);
1836
1837        if (rc != APR_SUCCESS) {
1838            /* Bad things happened. Everyone should have cleaned up. */
1839            ap_log_rerror(APLOG_MARK, APLOG_ERR, APR_ENOPROC, r,
1840                          MODNAME ": could not execute `%s'.",
1841                          compr[parm->method].argv[0]);
1842        }
1843        else {
1844            apr_pool_note_subprocess(child_context, procnew, APR_KILL_AFTER_TIMEOUT);
1845            *pipe_in = procnew->out;
1846        }
1847    }
1848
1849    return (rc);
1850}
1851
1852static int uncompress(request_rec *r, int method,
1853                      unsigned char **newch, STRLEN n)
1854{
1855    struct uncompress_parms parm;
1856    apr_file_t *pipe_out = NULL;
1857    apr_pool_t *sub_context;
1858    apr_status_t rv;
1859
1860    parm.r = r;
1861    parm.method = method;
1862
1863    /* We make a sub_pool so that we can collect our child early, otherwise
1864     * there are cases (i.e. generating directory indicies with mod_autoindex)
1865     * where we would end up with LOTS of zombies.
1866     */
1867    if (apr_pool_create(&sub_context, r->pool) != APR_SUCCESS)
1868        return -1;
1869
1870    if ((rv = create_uncompress_child(&parm, sub_context, &pipe_out)) != APR_SUCCESS) {
1871        ap_log_rerror(APLOG_MARK, APLOG_ERR, rv, r,
1872                    MODNAME ": couldn't spawn uncompress process: %s", r->uri);
1873        return -1;
1874    }
1875
1876    *newch = (unsigned char *) apr_palloc(r->pool, n);
1877    rv = apr_file_read(pipe_out, *newch, &n);
1878    if (n == 0) {
1879        apr_pool_destroy(sub_context);
1880        ap_log_rerror(APLOG_MARK, APLOG_ERR, rv, r,
1881            MODNAME ": read failed from uncompress of %s", r->filename);
1882        return -1;
1883    }
1884    apr_pool_destroy(sub_context);
1885    return n;
1886}
1887
1888/*
1889 * is_tar() -- figure out whether file is a tar archive.
1890 *
1891 * Stolen (by author of file utility) from the public domain tar program: Public
1892 * Domain version written 26 Aug 1985 John Gilmore (ihnp4!hoptoad!gnu).
1893 *
1894 * @(#)list.c 1.18 9/23/86 Public Domain - gnu $Id: mod_mime_magic.c,v 1.7
1895 * 1997/06/24 00:41:02 ikluft Exp ikluft $
1896 *
1897 * Comments changed and some code/comments reformatted for file command by Ian
1898 * Darwin.
1899 */
1900
1901#define isodigit(c) (((unsigned char)(c) >= '0') && ((unsigned char)(c) <= '7'))
1902
1903/*
1904 * Return 0 if the checksum is bad (i.e., probably not a tar archive), 1 for
1905 * old UNIX tar file, 2 for Unix Std (POSIX) tar file.
1906 */
1907
1908static int is_tar(unsigned char *buf, STRLEN nbytes)
1909{
1910    register union record *header = (union record *) buf;
1911    register int i;
1912    register long sum, recsum;
1913    register char *p;
1914
1915    if (nbytes < sizeof(union record))
1916               return 0;
1917
1918    recsum = from_oct(8, header->header.chksum);
1919
1920    sum = 0;
1921    p = header->charptr;
1922    for (i = sizeof(union record); --i >= 0;) {
1923        /*
1924         * We can't use unsigned char here because of old compilers, e.g. V7.
1925         */
1926        sum += 0xFF & *p++;
1927    }
1928
1929    /* Adjust checksum to count the "chksum" field as blanks. */
1930    for (i = sizeof(header->header.chksum); --i >= 0;)
1931        sum -= 0xFF & header->header.chksum[i];
1932    sum += ' ' * sizeof header->header.chksum;
1933
1934    if (sum != recsum)
1935        return 0;   /* Not a tar archive */
1936
1937    if (strEQ(header->header.magic, TMAGIC))
1938        return 2;   /* Unix Standard tar archive */
1939
1940    return 1;       /* Old fashioned tar archive */
1941}
1942
1943
1944/*
1945 * Quick and dirty octal conversion.
1946 *
1947 * Result is -1 if the field is invalid (all blank, or nonoctal).
1948 */
1949static long from_oct(int digs, char *where)
1950{
1951    register long value;
1952
1953    while (isSPACE(*where)) {  /* Skip spaces */
1954        where++;
1955        if (--digs <= 0)
1956            return -1;  /* All blank field */
1957    }
1958    value = 0;
1959    while (digs > 0 && isodigit(*where)) {  /* Scan til nonoctal */
1960        value = (value << 3) | (*where++ - '0');
1961        --digs;
1962    }
1963
1964    if (digs > 0 && *where && !isSPACE(*where))
1965        return -1;  /* Ended on non-space/nul */
1966
1967    return value;
1968}
1969
1970/*
1971 * Check for file-revision suffix
1972 *
1973 * This is for an obscure document control system used on an intranet.
1974 * The web representation of each file's revision has an @1, @2, etc
1975 * appended with the revision number.  This needs to be stripped off to
1976 * find the file suffix, which can be recognized by sending the name back
1977 * through a sub-request.  The base file name (without the @num suffix)
1978 * must exist because its type will be used as the result.
1979 */
1980static int revision_suffix(request_rec *r)
1981{
1982    int suffix_pos, result;
1983    char *sub_filename;
1984    request_rec *sub;
1985
1986    /* check for recognized revision suffix */
1987    suffix_pos = strlen(r->filename) - 1;
1988    if (!isDIGIT(r->filename[suffix_pos])) {
1989        return 0;
1990    }
1991    while (suffix_pos >= 0 && isDIGIT(r->filename[suffix_pos]))
1992        suffix_pos--;
1993    if (suffix_pos < 0 || r->filename[suffix_pos] != '@') {
1994        return 0;
1995    }
1996
1997    /* perform sub-request for the file name without the suffix */
1998    result = 0;
1999    sub_filename = apr_pstrndup(r->pool, r->filename, suffix_pos);
2000    sub = ap_sub_req_lookup_file(sub_filename, r, NULL);
2001
2002    /* extract content type/encoding/language from sub-request */
2003    if (sub->content_type) {
2004        ap_set_content_type(r, apr_pstrdup(r->pool, sub->content_type));
2005        if (sub->content_encoding)
2006            r->content_encoding =
2007                apr_pstrdup(r->pool, sub->content_encoding);
2008        if (sub->content_languages) {
2009            int n;
2010            r->content_languages = apr_array_copy(r->pool,
2011                                                  sub->content_languages);
2012            for (n = 0; n < r->content_languages->nelts; ++n) {
2013                char **lang = ((char **)r->content_languages->elts) + n;
2014                *lang = apr_pstrdup(r->pool, *lang);
2015            }
2016        }
2017        result = 1;
2018    }
2019
2020    /* clean up */
2021    ap_destroy_sub_req(sub);
2022
2023    return result;
2024}
2025
2026/*
2027 * Find the Content-Type from any resource this module has available
2028 */
2029
2030static int magic_find_ct(char *filename)
2031{
2032    int result;
2033
2034    /* the file has to exist */
2035    if (r->finfo.filetype == 0 || !r->filename) {
2036        return DECLINED;
2037    }
2038
2039    /* was someone else already here? */
2040    if (r->content_type) {
2041        return DECLINED;
2042    }
2043
2044    conf = ap_get_module_config(r->server->module_config, &mime_magic_module);
2045    if (!conf || !conf->magic) {
2046        return DECLINED;
2047    }
2048
2049    /* initialize per-request info */
2050    if (!magic_set_config(r)) {
2051        return HTTP_INTERNAL_SERVER_ERROR;
2052    }
2053
2054    /* try excluding file-revision suffixes */
2055    if (revision_suffix(r) != 1) {
2056        /* process it based on the file contents */
2057        if ((result = magic_process(filename)) != OK) {
2058            return result;
2059        }
2060    }
2061
2062    /* if we have any results, put them in the request structure */
2063    return magic_rsl_to_request(r);
2064}
2065
2066
2067#endif
Note: See TracBrowser for help on using the browser.