00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018 #include "kmimemagic.h"
00019 #include <kdebug.h>
00020 #include <kapplication.h>
00021 #include <qfile.h>
00022 #include <ksimpleconfig.h>
00023 #include <kstandarddirs.h>
00024 #include <kstaticdeleter.h>
00025 #include <assert.h>
00026
00027 KMimeMagic* KMimeMagic::s_pSelf = 0L;
00028 KStaticDeleter<KMimeMagic> kmimemagicsd;
00029
00030 KMimeMagic* KMimeMagic::self()
00031 {
00032 if( !s_pSelf )
00033 initStatic();
00034 return s_pSelf;
00035 }
00036
00037 void KMimeMagic::initStatic()
00038 {
00039 s_pSelf = kmimemagicsd.setObject( new KMimeMagic() );
00040 s_pSelf->setFollowLinks( TRUE );
00041 }
00042
00043 #include <stdio.h>
00044 #include <unistd.h>
00045 #include <stdlib.h>
00046 #include <sys/wait.h>
00047 #include <sys/types.h>
00048 #include <sys/stat.h>
00049 #include <fcntl.h>
00050 #include <errno.h>
00051 #include <ctype.h>
00052 #include <time.h>
00053 #include <utime.h>
00054 #include <stdarg.h>
00055 #include <qregexp.h>
00056 #include <qstring.h>
00057
00058
00059
00060
00061
00062
00063
00064
00065 #if (defined DEBUG_MIMEMAGIC || defined DEBUG_APPRENTICE)
00066 #define DEBUG_LINENUMBERS
00067 #endif
00068
00069
00070
00071
00072 #define DECLINED 999
00073 #define ERROR 998
00074 #define OK 0
00075
00076
00077
00078
00079 #define MIME_BINARY_UNKNOWN "application/octet-stream"
00080 #define MIME_BINARY_UNREADABLE "application/x-unreadable"
00081 #define MIME_BINARY_ZEROSIZE "application/x-zerosize"
00082 #define MIME_TEXT_UNKNOWN "text/plain"
00083 #define MIME_TEXT_PLAIN "text/plain"
00084 #define MIME_INODE_DIR "inode/directory"
00085 #define MIME_INODE_CDEV "inode/chardevice"
00086 #define MIME_INODE_BDEV "inode/blockdevice"
00087 #define MIME_INODE_FIFO "inode/fifo"
00088 #define MIME_INODE_LINK "inode/link"
00089 #define MIME_INODE_SOCK "inode/socket"
00090
00091 #define MIME_APPL_TROFF "application/x-troff"
00092 #define MIME_APPL_TAR "application/x-tar"
00093 #define MIME_TEXT_FORTRAN "text/x-fortran"
00094
00095 #define MAXMIMESTRING 256
00096
00097 #define HOWMANY 1024
00098 #define MAXDESC 50
00099 #define MAXstring 64
00100
00101 typedef union VALUETYPE {
00102 unsigned char b;
00103 unsigned short h;
00104 unsigned long l;
00105 char s[MAXstring];
00106 unsigned char hs[2];
00107 unsigned char hl[4];
00108 } VALUETYPE;
00109
00110 struct magic {
00111 struct magic *next;
00112 #ifdef DEBUG_LINENUMBERS
00113 int lineno;
00114 #endif
00115
00116 short flag;
00117 #define INDIR 1
00118 #define UNSIGNED 2
00119 short cont_level;
00120 struct {
00121 char type;
00122 long offset;
00123 } in;
00124 long offset;
00125 unsigned char reln;
00126 char type;
00127 char vallen;
00128 #define BYTE 1
00129 #define SHORT 2
00130 #define LONG 4
00131 #define STRING 5
00132 #define DATE 6
00133 #define BESHORT 7
00134 #define BELONG 8
00135 #define BEDATE 9
00136 #define LESHORT 10
00137 #define LELONG 11
00138 #define LEDATE 12
00139 VALUETYPE value;
00140 unsigned long mask;
00141 char nospflag;
00142
00143
00144 char desc[MAXDESC];
00145 };
00146
00147
00148
00149
00150
00151
00152
00153
00154
00155
00156
00157
00158
00159
00160
00161 #define RECORDSIZE 512
00162 #define NAMSIZ 100
00163 #define TUNMLEN 32
00164 #define TGNMLEN 32
00165
00166 union record {
00167 char charptr[RECORDSIZE];
00168 struct header {
00169 char name[NAMSIZ];
00170 char mode[8];
00171 char uid[8];
00172 char gid[8];
00173 char size[12];
00174 char mtime[12];
00175 char chksum[8];
00176 char linkflag;
00177 char linkname[NAMSIZ];
00178 char magic[8];
00179 char uname[TUNMLEN];
00180 char gname[TGNMLEN];
00181 char devmajor[8];
00182 char devminor[8];
00183 } header;
00184 };
00185
00186
00187 #define TMAGIC "ustar "
00188
00189
00190
00191
00192 static int is_tar(unsigned char *, int);
00193 static unsigned long signextend(struct magic *, unsigned long);
00194 static int getvalue(struct magic *, char **);
00195 static int hextoint(int);
00196 static char *getstr(char *, char *, int, int *);
00197 static int mget(union VALUETYPE *, unsigned char *, struct magic *, int);
00198 static int mcheck(union VALUETYPE *, struct magic *);
00199 static int mconvert(union VALUETYPE *, struct magic *);
00200 static long from_oct(int, char *);
00201
00202
00203
00204
00205
00206
00207
00208
00209
00210
00211
00212
00213
00214
00215
00216 #define L_HTML 0x001
00217 #define L_C 0x002
00218 #define L_MAKE 0x004
00219 #define L_PLI 0x008
00220 #define L_MACH 0x010
00221 #define L_PAS 0x020
00222 #define L_JAVA 0x040
00223 #define L_CPP 0x080
00224 #define L_MAIL 0x100
00225 #define L_NEWS 0x200
00226 #define L_DIFF 0x400
00227
00228 #define P_HTML 0
00229 #define P_C 1
00230 #define P_MAKE 2
00231 #define P_PLI 3
00232 #define P_MACH 4
00233 #define P_PAS 5
00234 #define P_JAVA 6
00235 #define P_CPP 7
00236 #define P_MAIL 8
00237 #define P_NEWS 9
00238 #define P_DIFF 10
00239
00240 typedef struct asc_type {
00241 const char *type;
00242 int kwords;
00243 double weight;
00244 } asc_type;
00245
00246 static const asc_type types[] = {
00247 { "text/html", 19, 2 },
00248 { "text/x-c", 9, 1.3 },
00249 { "text/x-makefile", 4, 1.9 },
00250 { "text/x-pli", 1, 3 },
00251 { "text/x-assembler", 6, 2.1 },
00252 { "text/x-pascal", 1, 1 },
00253 { "text/x-java", 14, 1 },
00254 { "text/x-c++", 14, 1 },
00255 { "message/rfc822", 4, 1.9 },
00256 { "message/news", 3, 2 },
00257 { "text/x-diff", 4, 2 }
00258 };
00259
00260 #define NTYPES (sizeof(types)/sizeof(asc_type))
00261
00262 static struct names {
00263 const char *name;
00264 short type;
00265 } const names[] = {
00266 {
00267 "<html", L_HTML
00268 },
00269 {
00270 "<HTML", L_HTML
00271 },
00272 {
00273 "<head", L_HTML
00274 },
00275 {
00276 "<HEAD", L_HTML
00277 },
00278 {
00279 "<body", L_HTML
00280 },
00281 {
00282 "<BODY", L_HTML
00283 },
00284 {
00285 "<title", L_HTML
00286 },
00287 {
00288 "<TITLE", L_HTML
00289 },
00290 {
00291 "<h1", L_HTML
00292 },
00293 {
00294 "<H1", L_HTML
00295 },
00296 {
00297 "<a", L_HTML
00298 },
00299 {
00300 "<A", L_HTML
00301 },
00302 {
00303 "<img", L_HTML
00304 },
00305 {
00306 "<IMG", L_HTML
00307 },
00308 {
00309 "<!--", L_HTML
00310 },
00311 {
00312 "<!doctype", L_HTML
00313 },
00314 {
00315 "<!DOCTYPE", L_HTML
00316 },
00317 {
00318 "<div", L_HTML
00319 },
00320 {
00321 "<DIV", L_HTML
00322 },
00323 {
00324 "<frame", L_HTML
00325 },
00326 {
00327 "<FRAME", L_HTML
00328 },
00329 {
00330 "<frameset", L_HTML
00331 },
00332 {
00333 "<FRAMESET", L_HTML
00334 },
00335 {
00336 "<script", L_HTML
00337 },
00338 {
00339 "<SCRIPT", L_HTML
00340 },
00341 {
00342 "/*", L_C|L_CPP|L_JAVA
00343 },
00344 {
00345 "//", L_CPP|L_JAVA
00346 },
00347 {
00348 "#include", L_C|L_CPP
00349 },
00350 {
00351 "char", L_C|L_CPP|L_JAVA
00352 },
00353 {
00354 "double", L_C|L_CPP|L_JAVA
00355 },
00356 {
00357 "extern", L_C|L_CPP
00358 },
00359 {
00360 "float", L_C|L_CPP|L_JAVA
00361 },
00362 {
00363 "real", L_C|L_CPP|L_JAVA
00364 },
00365 {
00366 "struct", L_C|L_CPP
00367 },
00368 {
00369 "union", L_C|L_CPP
00370 },
00371 {
00372 "implements", L_JAVA
00373 },
00374 {
00375 "super", L_JAVA
00376 },
00377 {
00378 "import", L_JAVA
00379 },
00380 {
00381 "class", L_CPP|L_JAVA
00382 },
00383 {
00384 "public", L_CPP|L_JAVA
00385 },
00386 {
00387 "private", L_CPP|L_JAVA
00388 },
00389 {
00390 "CFLAGS", L_MAKE
00391 },
00392 {
00393 "LDFLAGS", L_MAKE
00394 },
00395 {
00396 "all:", L_MAKE
00397 },
00398 {
00399 ".PRECIOUS", L_MAKE
00400 },
00401
00402
00403
00404
00405 {
00406 ".ascii", L_MACH
00407 },
00408 {
00409 ".asciiz", L_MACH
00410 },
00411 {
00412 ".byte", L_MACH
00413 },
00414 {
00415 ".even", L_MACH
00416 },
00417 {
00418 ".globl", L_MACH
00419 },
00420 {
00421 "clr", L_MACH
00422 },
00423 {
00424 "(input", L_PAS
00425 },
00426 {
00427 "dcl", L_PLI
00428 },
00429 {
00430 "Received:", L_MAIL
00431 },
00432
00433
00434
00435 {
00436 "Return-Path:", L_MAIL
00437 },
00438 {
00439 "Cc:", L_MAIL
00440 },
00441 {
00442 "Newsgroups:", L_NEWS
00443 },
00444 {
00445 "Path:", L_NEWS
00446 },
00447 {
00448 "Organization:", L_NEWS
00449 },
00450 {
00451 "---", L_DIFF
00452 },
00453 {
00454 "+++", L_DIFF
00455 },
00456 {
00457 "***", L_DIFF
00458 },
00459 {
00460 "@@", L_DIFF
00461 },
00462 {
00463 NULL, 0
00464 }
00465 };
00466
00477 class KMimeMagicUtimeConf
00478 {
00479 public:
00480 KMimeMagicUtimeConf()
00481 {
00482 tmpDirs << QString::fromLatin1("/tmp");
00483
00484
00485
00486 QStringList confDirs = KGlobal::dirs()->resourceDirs( "config" );
00487 if ( !confDirs.isEmpty() )
00488 {
00489 QString globalConf = confDirs.last() + "kmimemagicrc";
00490 if ( QFile::exists( globalConf ) )
00491 {
00492 KSimpleConfig cfg( globalConf );
00493 cfg.setGroup( "Settings" );
00494 tmpDirs = cfg.readListEntry( "atimeDirs" );
00495 }
00496 if ( confDirs.count() > 1 )
00497 {
00498 QString localConf = confDirs.first() + "kmimemagicrc";
00499 if ( QFile::exists( localConf ) )
00500 {
00501 KSimpleConfig cfg( localConf );
00502 cfg.setGroup( "Settings" );
00503 tmpDirs += cfg.readListEntry( "atimeDirs" );
00504 }
00505 }
00506 for ( QStringList::Iterator it = tmpDirs.begin() ; it != tmpDirs.end() ; ++it )
00507 {
00508 QString dir = *it;
00509 if ( !dir.isEmpty() && dir[ dir.length()-1 ] != '/' )
00510 (*it) += '/';
00511 }
00512 }
00513 #if 0
00514
00515 for ( QStringList::Iterator it = tmpDirs.begin() ; it != tmpDirs.end() ; ++it )
00516 kdDebug(7018) << " atimeDir: " << *it << endl;
00517 #endif
00518 }
00519
00520 bool restoreAccessTime( const QString & file ) const
00521 {
00522 QString dir = file.left( file.findRev( '/' ) );
00523 bool res = tmpDirs.contains( dir );
00524
00525 return res;
00526 }
00527 QStringList tmpDirs;
00528 };
00529
00530
00531 struct config_rec {
00532 struct magic *magic,
00533 *last;
00534 KMimeMagicUtimeConf * utimeConf;
00535 };
00536
00537 #ifdef MIME_MAGIC_DEBUG_TABLE
00538 static void
00539 test_table()
00540 {
00541 struct magic *m;
00542 struct magic *prevm = NULL;
00543
00544 kdDebug(7018) << "test_table : started" << endl;
00545 for (m = conf->magic; m; m = m->next) {
00546 if (isprint((((unsigned long) m) >> 24) & 255) &&
00547 isprint((((unsigned long) m) >> 16) & 255) &&
00548 isprint((((unsigned long) m) >> 8) & 255) &&
00549 isprint(((unsigned long) m) & 255)) {
00550
00551
00552 (((unsigned long) m) >> 24) & 255,
00553 (((unsigned long) m) >> 16) & 255,
00554 (((unsigned long) m) >> 8) & 255,
00555 ((unsigned long) m) & 255,
00556 prevm ? prevm->lineno : -1);
00557 break;
00558 }
00559 prevm = m;
00560 }
00561 }
00562 #endif
00563
00564 #define EATAB {while (isascii((unsigned char) *l) && \
00565 isspace((unsigned char) *l)) ++l;}
00566
00567 int KMimeMagic::parse_line(char *line, int *rule, int lineno)
00568 {
00569 int ws_offset;
00570
00571
00572 if (line[0]) {
00573 line[strlen(line) - 1] = '\0';
00574 }
00575
00576 ws_offset = 0;
00577 while (line[ws_offset] && isspace(line[ws_offset])) {
00578 ws_offset++;
00579 }
00580
00581
00582 if (line[ws_offset] == 0) {
00583 return 0;
00584 }
00585
00586 if (line[ws_offset] == '#')
00587 return 0;
00588
00589
00590 (*rule)++;
00591
00592
00593 return (parse(line + ws_offset, lineno) != 0);
00594 }
00595
00596
00597
00598
00599 int KMimeMagic::apprentice( const QString& magicfile )
00600 {
00601 FILE *f;
00602 char line[BUFSIZ + 1];
00603 int errs = 0;
00604 int lineno;
00605 int rule = 0;
00606 QCString fname;
00607
00608 if (magicfile.isEmpty())
00609 return -1;
00610 fname = QFile::encodeName(magicfile);
00611 f = fopen(fname, "r");
00612 if (f == NULL) {
00613 kdError(7018) << "can't read magic file " << fname.data() << ": " << strerror(errno) << endl;
00614 return -1;
00615 }
00616
00617
00618 for (lineno = 1; fgets(line, BUFSIZ, f) != NULL; lineno++)
00619 if (parse_line(line, &rule, lineno))
00620 errs++;
00621
00622 fclose(f);
00623
00624 #ifdef DEBUG_APPRENTICE
00625 kdDebug(7018) << "apprentice: conf=" << conf << " file=" << magicfile << " m=" << (conf->magic ? "set" : "NULL") << " m->next=" << ((conf->magic && conf->magic->next) ? "set" : "NULL") << " last=" << (conf->last ? "set" : "NULL") << endl;
00626 kdDebug(7018) << "apprentice: read " << lineno << " lines, " << rule << " rules, " << errs << " errors" << endl;
00627 #endif
00628
00629 #ifdef MIME_MAGIC_DEBUG_TABLE
00630 test_table();
00631 #endif
00632
00633 return (errs ? -1 : 0);
00634 }
00635
00636 int KMimeMagic::buff_apprentice(char *buff)
00637 {
00638 char line[BUFSIZ + 2];
00639 int errs = 0;
00640 int lineno = 1;
00641 char *start = buff;
00642 char *end;
00643 int count = 0;
00644 int rule = 0;
00645 int len = strlen(buff) + 1;
00646
00647
00648 do {
00649 count = (len > BUFSIZ-1)?BUFSIZ-1:len;
00650 strncpy(line, start, count);
00651 line[count] = '\0';
00652 if ((end = strchr(line, '\n'))) {
00653 *(++end) = '\0';
00654 count = strlen(line);
00655 } else
00656 strcat(line, "\n");
00657 start += count;
00658 len -= count;
00659 if (parse_line(line, &rule, lineno))
00660 errs++;
00661 lineno++;
00662 } while (len > 0);
00663
00664 #ifdef DEBUG_APPRENTICE
00665 kdDebug(7018) << "buff_apprentice: conf=" << conf << " m=" << (conf->magic ? "set" : "NULL") << " m->next=" << ((conf->magic && conf->magic->next) ? "set" : "NULL") << " last=" << (conf->last ? "set" : "NULL") << endl;
00666 kdDebug(7018) << "buff_apprentice: read " << lineno << " lines, " << rule << " rules, " << errs << " errors" << endl;
00667 #endif
00668
00669 #ifdef MIME_MAGIC_DEBUG_TABLE
00670 test_table();
00671 #endif
00672
00673 return (errs ? -1 : 0);
00674 }
00675
00676
00677
00678
00679 static unsigned long
00680 signextend(struct magic *m, unsigned long v)
00681 {
00682 if (!(m->flag & UNSIGNED))
00683 switch (m->type) {
00684
00685
00686
00687
00688
00689 case BYTE:
00690 v = (char) v;
00691 break;
00692 case SHORT:
00693 case BESHORT:
00694 case LESHORT:
00695 v = (short) v;
00696 break;
00697 case DATE:
00698 case BEDATE:
00699 case LEDATE:
00700 case LONG:
00701 case BELONG:
00702 case LELONG:
00703 v = (long) v;
00704 break;
00705 case STRING:
00706 break;
00707 default:
00708 kdError(7018) << "" << "signextend" << ": can't happen: m->type=" << m->type << endl;
00709 return ERROR;
00710 }
00711 return v;
00712 }
00713
00714
00715
00716
00717 int KMimeMagic::parse(char *l, int
00718 #ifdef DEBUG_LINENUMBERS
00719 lineno
00720 #endif
00721 )
00722 {
00723 int i = 0;
00724 struct magic *m;
00725 char *t,
00726 *s;
00727
00728 if ((m = (struct magic *) calloc(1, sizeof(struct magic))) == NULL) {
00729 kdError(7018) << "parse: Out of memory." << endl;
00730 return -1;
00731 }
00732
00733 m->next = NULL;
00734 if (!conf->magic || !conf->last) {
00735 conf->magic = conf->last = m;
00736 } else {
00737 conf->last->next = m;
00738 conf->last = m;
00739 }
00740
00741
00742 m->flag = 0;
00743 m->cont_level = 0;
00744 #ifdef DEBUG_LINENUMBERS
00745 m->lineno = lineno;
00746 #endif
00747
00748 while (*l == '>') {
00749 ++l;
00750 m->cont_level++;
00751 }
00752
00753 if (m->cont_level != 0 && *l == '(') {
00754 ++l;
00755 m->flag |= INDIR;
00756 }
00757
00758 m->offset = (int) strtol(l, &t, 0);
00759 if (l == t) {
00760 kdError(7018) << "parse: offset " << l << " invalid" << endl;
00761 }
00762 l = t;
00763
00764 if (m->flag & INDIR) {
00765 m->in.type = LONG;
00766 m->in.offset = 0;
00767
00768
00769
00770 if (*l == '.') {
00771 switch (*++l) {
00772 case 'l':
00773 m->in.type = LONG;
00774 break;
00775 case 's':
00776 m->in.type = SHORT;
00777 break;
00778 case 'b':
00779 m->in.type = BYTE;
00780 break;
00781 default:
00782 kdError(7018) << "parse: indirect offset type " << *l << " invalid" << endl;
00783 break;
00784 }
00785 l++;
00786 }
00787 s = l;
00788 if (*l == '+' || *l == '-')
00789 l++;
00790 if (isdigit((unsigned char) *l)) {
00791 m->in.offset = strtol(l, &t, 0);
00792 if (*s == '-')
00793 m->in.offset = -m->in.offset;
00794 } else
00795 t = l;
00796 if (*t++ != ')') {
00797 kdError(7018) << "parse: missing ')' in indirect offset" << endl;
00798 }
00799 l = t;
00800 }
00801 while (isascii((unsigned char) *l) && isdigit((unsigned char) *l))
00802 ++l;
00803 EATAB;
00804
00805 #define NBYTE 4
00806 #define NSHORT 5
00807 #define NLONG 4
00808 #define NSTRING 6
00809 #define NDATE 4
00810 #define NBESHORT 7
00811 #define NBELONG 6
00812 #define NBEDATE 6
00813 #define NLESHORT 7
00814 #define NLELONG 6
00815 #define NLEDATE 6
00816
00817 if (*l == 'u') {
00818 ++l;
00819 m->flag |= UNSIGNED;
00820 }
00821
00822 if (strncmp(l, "byte", NBYTE) == 0) {
00823 m->type = BYTE;
00824 l += NBYTE;
00825 } else if (strncmp(l, "short", NSHORT) == 0) {
00826 m->type = SHORT;
00827 l += NSHORT;
00828 } else if (strncmp(l, "long", NLONG) == 0) {
00829 m->type = LONG;
00830 l += NLONG;
00831 } else if (strncmp(l, "string", NSTRING) == 0) {
00832 m->type = STRING;
00833 l += NSTRING;
00834 } else if (strncmp(l, "date", NDATE) == 0) {
00835 m->type = DATE;
00836 l += NDATE;
00837 } else if (strncmp(l, "beshort", NBESHORT) == 0) {
00838 m->type = BESHORT;
00839 l += NBESHORT;
00840 } else if (strncmp(l, "belong", NBELONG) == 0) {
00841 m->type = BELONG;
00842 l += NBELONG;
00843 } else if (strncmp(l, "bedate", NBEDATE) == 0) {
00844 m->type = BEDATE;
00845 l += NBEDATE;
00846 } else if (strncmp(l, "leshort", NLESHORT) == 0) {
00847 m->type = LESHORT;
00848 l += NLESHORT;
00849 } else if (strncmp(l, "lelong", NLELONG) == 0) {
00850 m->type = LELONG;
00851 l += NLELONG;
00852 } else if (strncmp(l, "ledate", NLEDATE) == 0) {
00853 m->type = LEDATE;
00854 l += NLEDATE;
00855 } else {
00856 kdError(7018) << "parse: type " << l << " invalid" << endl;
00857 return -1;
00858 }
00859
00860 if (*l == '&') {
00861 ++l;
00862 m->mask = signextend(m, strtol(l, &l, 0));
00863 } else
00864 m->mask = (unsigned long) ~0L;
00865 EATAB;
00866
00867 switch (*l) {
00868 case '>':
00869 case '<':
00870
00871 case '&':
00872 case '^':
00873 case '=':
00874 m->reln = *l;
00875 ++l;
00876 break;
00877 case '!':
00878 if (m->type != STRING) {
00879 m->reln = *l;
00880 ++l;
00881 break;
00882 }
00883
00884 default:
00885 if (*l == 'x' && isascii((unsigned char) l[1]) &&
00886 isspace((unsigned char) l[1])) {
00887 m->reln = *l;
00888 ++l;
00889 goto GetDesc;
00890 }
00891 m->reln = '=';
00892 break;
00893 }
00894 EATAB;
00895
00896 if (getvalue(m, &l))
00897 return -1;
00898
00899
00900
00901 GetDesc:
00902 EATAB;
00903 if (l[0] == '\b') {
00904 ++l;
00905 m->nospflag = 1;
00906 } else if ((l[0] == '\\') && (l[1] == 'b')) {
00907 ++l;
00908 ++l;
00909 m->nospflag = 1;
00910 } else
00911 m->nospflag = 0;
00912
00913 while (*l != '\0' && *l != '#' && i < MAXDESC-1)
00914 m->desc[i++] = *l++;
00915 m->desc[i] = '\0';
00916
00917 while (--i>0 && isspace( m->desc[i] ))
00918 m->desc[i] = '\0';
00919
00920
00921
00922
00923 #ifdef DEBUG_APPRENTICE
00924 kdDebug(7018) << "parse: line=" << lineno << " m=" << m << " next=" << m->next << " cont=" << m->cont_level << " desc=" << (m->desc ? m->desc : "NULL") << endl;
00925 #endif
00926 return 0;
00927 }
00928
00929
00930
00931
00932
00933
00934 static int
00935 getvalue(struct magic *m, char **p)
00936 {
00937 int slen;
00938
00939 if (m->type == STRING) {
00940 *p = getstr(*p, m->value.s, sizeof(m->value.s), &slen);
00941 m->vallen = slen;
00942 } else if (m->reln != 'x')
00943 m->value.l = signextend(m, strtol(*p, p, 0));
00944 return 0;
00945 }
00946
00947
00948
00949
00950
00951
00952 static char *
00953 getstr(register char *s, register char *p, int plen, int *slen)
00954 {
00955 char *origs = s,
00956 *origp = p;
00957 char *pmax = p + plen - 1;
00958 register int c;
00959 register int val;
00960
00961 while ((c = *s++) != '\0') {
00962 if (isspace((unsigned char) c))
00963 break;
00964 if (p >= pmax) {
00965 kdError(7018) << "String too long: " << origs << endl;
00966 break;
00967 }
00968 if (c == '\\') {
00969 switch (c = *s++) {
00970
00971 case '\0':
00972 goto out;
00973
00974 default:
00975 *p++ = (char) c;
00976 break;
00977
00978 case 'n':
00979 *p++ = '\n';
00980 break;
00981
00982 case 'r':
00983 *p++ = '\r';
00984 break;
00985
00986 case 'b':
00987 *p++ = '\b';
00988 break;
00989
00990 case 't':
00991 *p++ = '\t';
00992 break;
00993
00994 case 'f':
00995 *p++ = '\f';
00996 break;
00997
00998 case 'v':
00999 *p++ = '\v';
01000 break;
01001
01002
01003 case '0':
01004 case '1':
01005 case '2':
01006 case '3':
01007 case '4':
01008 case '5':
01009 case '6':
01010 case '7':
01011 val = c - '0';
01012 c = *s++;
01013 if (c >= '0' && c <= '7') {
01014 val = (val << 3) | (c - '0');
01015 c = *s++;
01016 if (c >= '0' && c <= '7')
01017 val = (val << 3) | (c - '0');
01018 else
01019 --s;
01020 } else
01021 --s;
01022 *p++ = (char) val;
01023 break;
01024
01025
01026 case 'x':
01027 val = 'x';
01028 c = hextoint(*s++);
01029 if (c >= 0) {
01030 val = c;
01031 c = hextoint(*s++);
01032 if (c >= 0) {
01033 val = (val << 4) + c;
01034 c = hextoint(*s++);
01035 if (c >= 0) {
01036 val = (val << 4) + c;
01037 } else
01038 --s;
01039 } else
01040 --s;
01041 } else
01042 --s;
01043 *p++ = (char) val;
01044 break;
01045 }
01046 } else
01047 *p++ = (char) c;
01048 }
01049 out:
01050 *p = '\0';
01051 *slen = p - origp;
01052 return s;
01053 }
01054
01055
01056
01057 static int
01058 hextoint(int c)
01059 {
01060 if (!isascii((unsigned char) c))
01061 return -1;
01062 if (isdigit((unsigned char) c))
01063 return c - '0';
01064 if ((c >= 'a') && (c <= 'f'))
01065 return c + 10 - 'a';
01066 if ((c >= 'A') && (c <= 'F'))
01067 return c + 10 - 'A';
01068 return -1;
01069 }
01070
01071
01072
01073
01074 static int
01075 mconvert(union VALUETYPE *p, struct magic *m)
01076 {
01077 char *rt;
01078
01079 switch (m->type) {
01080 case BYTE:
01081 return 1;
01082 case STRING:
01083
01084 p->s[sizeof(p->s) - 1] = '\0';
01085 if ((rt = strchr(p->s, '\n')) != NULL)
01086 *rt = '\0';
01087 return 1;
01088 #ifndef WORDS_BIGENDIAN
01089 case SHORT:
01090 #endif
01091 case BESHORT:
01092 p->h = (short) ((p->hs[0] << 8) | (p->hs[1]));
01093 return 1;
01094 #ifndef WORDS_BIGENDIAN
01095 case LONG:
01096 case DATE:
01097 #endif
01098 case BELONG:
01099 case BEDATE:
01100 p->l = (long)
01101 ((p->hl[0] << 24) | (p->hl[1] << 16) | (p->hl[2] << 8) | (p->hl[3]));
01102 return 1;
01103 #ifdef WORDS_BIGENDIAN
01104 case SHORT:
01105 #endif
01106 case LESHORT:
01107 p->h = (short) ((p->hs[1] << 8) | (p->hs[0]));
01108 return 1;
01109 #ifdef WORDS_BIGENDIAN
01110 case LONG:
01111 case DATE:
01112 #endif
01113 case LELONG:
01114 case LEDATE:
01115 p->l = (long)
01116 ((p->hl[3] << 24) | (p->hl[2] << 16) | (p->hl[1] << 8) | (p->hl[0]));
01117 return 1;
01118 default:
01119 kdError(7018) << "mconvert: invalid type " << m->type << endl;
01120 return 0;
01121 }
01122 }
01123
01124
01125 static int
01126 mget(union VALUETYPE *p, unsigned char *s, struct magic *m,
01127 int nbytes)
01128 {
01129 long offset = m->offset;
01130
01131
01132
01133 if (offset + (int)sizeof(union VALUETYPE) > nbytes)
01134 {
01135 int have = nbytes - offset;
01136 memset(p, 0, sizeof(union VALUETYPE));
01137 if (have > 0)
01138 memcpy(p, s + offset, have);
01139 } else
01140 memcpy(p, s + offset, sizeof(union VALUETYPE));
01141
01142 if (!mconvert(p, m))
01143 return 0;
01144
01145 if (m->flag & INDIR) {
01146
01147 switch (m->in.type) {
01148 case BYTE:
01149 offset = p->b + m->in.offset;
01150 break;
01151 case SHORT:
01152 offset = p->h + m->in.offset;
01153 break;
01154 case LONG:
01155 offset = p->l + m->in.offset;
01156 break;
01157 }
01158
01159 if (offset + (int)sizeof(union VALUETYPE) > nbytes)
01160 return 0;
01161
01162 memcpy(p, s + offset, sizeof(union VALUETYPE));
01163
01164 if (!mconvert(p, m))
01165 return 0;
01166 }
01167 return 1;
01168 }
01169
01170 static int
01171 mcheck(union VALUETYPE *p, struct magic *m)
01172 {
01173 register unsigned long l = m->value.l;
01174 register unsigned long v;
01175 int matched;
01176
01177 if ((m->value.s[0] == 'x') && (m->value.s[1] == '\0')) {
01178 kdError(7018) << "BOINK" << endl;
01179 return 1;
01180 }
01181 switch (m->type) {
01182 case BYTE:
01183 v = p->b;
01184 break;
01185
01186 case SHORT:
01187 case BESHORT:
01188 case LESHORT:
01189 v = p->h;
01190 break;
01191
01192 case LONG:
01193 case BELONG:
01194 case LELONG:
01195 case DATE:
01196 case BEDATE:
01197 case LEDATE:
01198 v = p->l;
01199 break;
01200
01201 case STRING:
01202 l = 0;
01203
01204
01205
01206
01207
01208 v = 0;
01209 {
01210 register unsigned char *a = (unsigned char *) m->value.s;
01211 register unsigned char *b = (unsigned char *) p->s;
01212 register int len = m->vallen;
01213 Q_ASSERT(len);
01214
01215 while (--len >= 0)
01216 if ((v = *b++ - *a++) != 0)
01217 break;
01218 }
01219 break;
01220 default:
01221 kdError(7018) << "mcheck: invalid type " << m->type << endl;
01222 return 0;
01223 }
01224 #if 0
01225 debug("Before signextend %08x", v);
01226 #endif
01227 v = signextend(m, v) & m->mask;
01228 #if 0
01229 debug("After signextend %08x", v);
01230 #endif
01231
01232 switch (m->reln) {
01233 case 'x':
01234 matched = 1;
01235 break;
01236
01237 case '!':
01238 matched = v != l;
01239 break;
01240
01241 case '=':
01242 matched = v == l;
01243 break;
01244
01245 case '>':
01246 if (m->flag & UNSIGNED)
01247 matched = v > l;
01248 else
01249 matched = (long) v > (long) l;
01250 break;
01251
01252 case '<':
01253 if (m->flag & UNSIGNED)
01254 matched = v < l;
01255 else
01256 matched = (long) v < (long) l;
01257 break;
01258
01259 case '&':
01260 matched = (v & l) == l;
01261 break;
01262
01263 case '^':
01264 matched = (v & l) != l;
01265 break;
01266
01267 default:
01268 matched = 0;
01269 kdError(7018) << "mcheck: can't happen: invalid relation " << m->reln << "." << endl;
01270 break;
01271 }
01272
01273 return matched;
01274 }
01275
01276 #if 0
01277
01278
01279 typedef enum {
01280 rsl_leading_space, rsl_type, rsl_subtype, rsl_separator, rsl_encoding
01281 } rsl_states;
01282
01283
01284 int
01285 KMimeMagic::finishResult()
01286 {
01287 int cur_pos,
01288 type_pos,
01289 type_len,
01290 encoding_pos,
01291 encoding_len;
01292
01293 int state;
01294
01295 state = rsl_leading_space;
01296 type_pos = type_len = 0;
01297 encoding_pos = encoding_len = 0;
01298
01299
01300 for (cur_pos = 0; cur_pos < (int)resultBuf.length(); cur_pos++) {
01301 if (resultBuf[cur_pos].isSpace()) {
01302
01303 if (state == rsl_leading_space) {
01304
01305 continue;
01306 } else if (state == rsl_type) {
01307
01308 return DECLINED;
01309 } else if (state == rsl_subtype) {
01310
01311 state++;
01312 continue;
01313 } else if (state == rsl_separator) {
01314
01315 continue;
01316 } else if (state == rsl_encoding) {
01317
01318
01319 break;
01320 } else {
01321
01322
01323 kdError(7018) << "KMimeMagic::finishResult: bad state " << state << " (ws)" << endl;
01324 return DECLINED;
01325 }
01326
01327 } else if (state == rsl_type &&
01328 resultBuf.at(cur_pos) == '/') {
01329
01330 type_len++;
01331 state++;
01332 } else {
01333
01334 if (state == rsl_leading_space) {
01335
01336 state++;
01337 type_pos = cur_pos;
01338 type_len = 1;
01339 continue;
01340 } else if (state == rsl_type ||
01341 state == rsl_subtype) {
01342
01343 type_len++;
01344 continue;
01345 } else if (state == rsl_separator) {
01346
01347 state++;
01348 encoding_pos = cur_pos;
01349 encoding_len = 1;
01350 continue;
01351 } else if (state == rsl_encoding) {
01352
01353 encoding_len++;
01354 continue;
01355 } else {
01356
01357
01358 kdError(7018) << " KMimeMagic::finishResult: bad state " << state << " (ns)" << endl;
01359 return DECLINED;
01360 }
01361
01362 }
01363
01364 }
01365
01366
01367 if (state != rsl_subtype && state != rsl_separator &&
01368 state != rsl_encoding) {
01369
01370 return DECLINED;
01371 }
01372
01373 if (state == rsl_subtype || state == rsl_encoding ||
01374 state == rsl_encoding || state == rsl_separator) {
01375 magicResult->setMimeType(resultBuf.mid(type_pos, type_len).ascii());
01376 }
01377 if (state == rsl_encoding)
01378 magicResult->setEncoding(resultBuf.mid(encoding_pos,
01379 encoding_len).ascii());
01380
01381 if (!magicResult->mimeType() ||
01382 (state == rsl_encoding && !magicResult->encoding())) {
01383 return -1;
01384 }
01385
01386 return OK;
01387 }
01388 #endif
01389
01390
01391
01392
01393
01394 void
01395 KMimeMagic::process(const QString & fn)
01396 {
01397 int fd = 0;
01398 unsigned char buf[HOWMANY + 1];
01399 struct stat sb;
01400 int nbytes = 0;
01401 QCString fileName = QFile::encodeName( fn );
01402
01403
01404
01405
01406 if (fsmagic(fileName, &sb) != 0) {
01407
01408 return;
01409 }
01410 if ((fd = open(fileName, O_RDONLY)) < 0) {
01411
01412
01413
01414
01415
01416
01417 resultBuf = MIME_BINARY_UNREADABLE;
01418 return;
01419 }
01420
01421
01422
01423 if ((nbytes = read(fd, (char *) buf, HOWMANY)) == -1) {
01424 kdError(7018) << "" << fn << " read failed (" << strerror(errno) << ")." << endl;
01425 resultBuf = MIME_BINARY_UNREADABLE;
01426 return;
01427
01428 }
01429 if (nbytes == 0) {
01430 resultBuf = MIME_BINARY_ZEROSIZE;
01431 } else {
01432 buf[nbytes++] = '\0';
01433 tryit(buf, nbytes);
01434 }
01435
01436 if ( conf->utimeConf && conf->utimeConf->restoreAccessTime( fn ) )
01437 {
01438
01439
01440
01441
01442
01443 struct utimbuf utbuf;
01444 utbuf.actime = sb.st_atime;
01445 utbuf.modtime = sb.st_mtime;
01446 (void) utime(fileName, &utbuf);
01447 }
01448 (void) close(fd);
01449 }
01450
01451
01452 void
01453 KMimeMagic::tryit(unsigned char *buf, int nb)
01454 {
01455
01456 if (match(buf, nb))
01457 return;
01458
01459
01460 if (ascmagic(buf, nb) == 1)
01461 return;
01462
01463
01464 if (textmagic(buf, nb))
01465 return;
01466
01467
01468 resultBuf = MIME_BINARY_UNKNOWN;
01469 accuracy = 0;
01470 }
01471
01472 int
01473 KMimeMagic::fsmagic(const char *fn, struct stat *sb)
01474 {
01475 int ret = 0;
01476
01477
01478
01479
01480
01481 ret = lstat(fn, sb);
01482
01483 if (ret) {
01484 return 1;
01485 }
01486
01487
01488
01489
01490
01491
01492 switch (sb->st_mode & S_IFMT) {
01493 case S_IFDIR:
01494 resultBuf = MIME_INODE_DIR;
01495 return 1;
01496 case S_IFCHR:
01497 resultBuf = MIME_INODE_CDEV;
01498 return 1;
01499 case S_IFBLK:
01500 resultBuf = MIME_INODE_BDEV;
01501 return 1;
01502
01503 #ifdef S_IFIFO
01504 case S_IFIFO:
01505 resultBuf = MIME_INODE_FIFO;;
01506 return 1;
01507 #endif
01508 #ifdef S_IFLNK
01509 case S_IFLNK:
01510 {
01511 char buf[BUFSIZ + BUFSIZ + 4];
01512 register int nch;
01513 struct stat tstatbuf;
01514
01515 if ((nch = readlink(fn, buf, BUFSIZ - 1)) <= 0) {
01516 resultBuf = MIME_INODE_LINK;
01517
01518 return 1;
01519 }
01520 buf[nch] = '\0';
01521
01522 if (*buf == '/') {
01523 if (stat(buf, &tstatbuf) < 0) {
01524 resultBuf = MIME_INODE_LINK;
01525
01526 return 1;
01527 }
01528 } else {
01529 char *tmp;
01530 char buf2[BUFSIZ + BUFSIZ + 4];
01531
01532 strncpy(buf2, fn, BUFSIZ);
01533 buf2[BUFSIZ] = 0;
01534
01535 if ((tmp = strrchr(buf2, '/')) == NULL) {
01536 tmp = buf;
01537 } else {
01538
01539 *++tmp = '\0';
01540 strcat(buf2, buf);
01541 tmp = buf2;
01542 }
01543 if (stat(tmp, &tstatbuf) < 0) {
01544 resultBuf = MIME_INODE_LINK;
01545
01546 return 1;
01547 } else
01548 strcpy(buf, tmp);
01549 }
01550 if (followLinks)
01551 process( QFile::decodeName( buf ) );
01552 else
01553 resultBuf = MIME_INODE_LINK;
01554 return 1;
01555 }
01556 return 1;
01557 #endif
01558 #ifdef S_IFSOCK
01559 #ifndef __COHERENT__
01560 case S_IFSOCK:
01561 resultBuf = MIME_INODE_SOCK;
01562 return 1;
01563 #endif
01564 #endif
01565 case S_IFREG:
01566 break;
01567 default:
01568 kdError(7018) << "KMimeMagic::fsmagic: invalid mode 0" << sb->st_mode << "." << endl;
01569
01570 }
01571
01572
01573
01574
01575 if (sb->st_size == 0) {
01576 resultBuf = MIME_BINARY_ZEROSIZE;
01577 return 1;
01578 }
01579 return 0;
01580 }
01581
01582
01583
01584
01585
01586
01587
01588
01589
01590
01591
01592
01593
01594
01595
01596
01597
01598
01599
01600
01601
01602
01603
01604
01605
01606
01607
01608 int
01609 KMimeMagic::match(unsigned char *s, int nbytes)
01610 {
01611 int cont_level = 0;
01612 union VALUETYPE p;
01613 struct magic *m;
01614
01615 #ifdef DEBUG_MIMEMAGIC
01616 kdDebug(7018) << "match: conf=" << conf << " m=" << (conf->magic ? "set" : "NULL") << " m->next=" << ((conf->magic && conf->magic->next) ? "set" : "NULL") << " last=" << (conf->last ? "set" : "NULL") << endl;
01617 for (m = conf->magic; m; m = m->next) {
01618 if (isprint((((unsigned long) m) >> 24) & 255) &&
01619 isprint((((unsigned long) m) >> 16) & 255) &&
01620 isprint((((unsigned long) m) >> 8) & 255) &&
01621 isprint(((unsigned long) m) & 255)) {
01622 kdDebug(7018) << "match: POINTER CLOBBERED! " << endl;
01623 break;
01624 }
01625 }
01626 #endif
01627
01628 for (m = conf->magic; m; m = m->next) {
01629 #ifdef DEBUG_MIMEMAGIC
01630 kdDebug(7018) << "match: line=" << m->lineno << " desc=" << m->desc << endl;
01631 #endif
01632 memset(&p, 0, sizeof(union VALUETYPE));
01633
01634
01635 if (!mget(&p, s, m, nbytes) ||
01636 !mcheck(&p, m)) {
01637 struct magic *m_cont;
01638
01639
01640
01641
01642 if (!m->next || (m->next->cont_level == 0)) {
01643 continue;
01644 }
01645 m_cont = m->next;
01646 while (m_cont && (m_cont->cont_level != 0)) {
01647 #ifdef DEBUG_MIMEMAGIC
01648 kdDebug(7018) << "match: line=" << m->lineno << " cont=" << m_cont->cont_level << " mc=" << m_cont->lineno << " mc->next=" << m_cont << " " << endl;
01649 #endif
01650
01651
01652
01653
01654 m = m_cont;
01655 m_cont = m_cont->next;
01656 }
01657 continue;
01658 }
01659
01660
01661 #ifdef DEBUG_MIMEMAGIC
01662 kdDebug(7018) << "match: rule matched, line=" << m->lineno << " type=" << m->type << " " << ((m->type == STRING) ? m->value.s : "") << endl;
01663 #endif
01664
01665
01666 resultBuf = m->desc;
01667
01668 cont_level++;
01669
01670
01671
01672
01673 m = m->next;
01674 while (m && (m->cont_level != 0)) {
01675 #ifdef DEBUG_MIMEMAGIC
01676 kdDebug(7018) << "match: line=" << m->lineno << " cont=" << m->cont_level << " type=" << m->type << " " << ((m->type == STRING) ? m->value.s : "") << endl;
01677 #endif
01678 if (cont_level >= m->cont_level) {
01679 if (cont_level > m->cont_level) {
01680
01681
01682
01683
01684 cont_level = m->cont_level;
01685 }
01686 if (mget(&p, s, m, nbytes) &&
01687 mcheck(&p, m)) {
01688
01689
01690
01691
01692
01693
01694 #ifdef DEBUG_MIMEMAGIC
01695 kdDebug(7018) << "continuation matched" << endl;
01696 #endif
01697 resultBuf = m->desc;
01698 cont_level++;
01699 }
01700 }
01701
01702 m = m->next;
01703 }
01704
01705
01706 if ( !resultBuf.isEmpty() )
01707 {
01708 #ifdef DEBUG_MIMEMAGIC
01709 kdDebug(7018) << "match: matched" << endl;
01710 #endif
01711 return 1;
01712 }
01713 }
01714 #ifdef DEBUG_MIMEMAGIC
01715 kdDebug(7018) << "match: failed" << endl;
01716 #endif
01717 return 0;
01718 }
01719
01720
01721 #define STREQ(a, b) (*(a) == *(b) && strcmp((a), (b)) == 0)
01722
01723 int
01724 KMimeMagic::ascmagic(unsigned char *buf, int nbytes)
01725 {
01726 int i;
01727 double pct, maxpct, pctsum;
01728 double pcts[NTYPES];
01729 int mostaccurate, tokencount;
01730 int typeset, jonly, conly, jconly, cppcomm, ccomm;
01731 int has_escapes = 0;
01732 unsigned char *s;
01733 char nbuf[HOWMANY + 1];
01734 char *token;
01735 register const struct names *p;
01736 int typecount[NTYPES];
01737
01738
01739 accuracy = 70;
01740
01741
01742
01743
01744
01745
01746 if (*buf == '.') {
01747 unsigned char *tp = buf + 1;
01748
01749 while (isascii(*tp) && isspace(*tp))
01750 ++tp;
01751 if ((isascii(*tp) && (isalnum(*tp) || *tp == '\\') &&
01752 isascii(*(tp + 1)) && (isalnum(*(tp + 1)) || *tp == '"'))) {
01753 resultBuf = MIME_APPL_TROFF;
01754 return 1;
01755 }
01756 }
01757 if ((*buf == 'c' || *buf == 'C') &&
01758 isascii(*(buf + 1)) && isspace(*(buf + 1))) {
01759
01760 resultBuf = MIME_TEXT_FORTRAN;
01761 return 1;
01762 }
01763 assert(nbytes-1 < HOWMANY + 1);
01764
01765
01766 s = (unsigned char *) memcpy(nbuf, buf, nbytes);
01767 s[nbytes-1] = '\0';
01768 has_escapes = (memchr(s, '\033', nbytes) != NULL);
01769
01770
01771
01772
01773 memset(&typecount, 0, sizeof(typecount));
01774 typeset = 0;
01775 jonly = 0;
01776 conly = 0;
01777 jconly = 0;
01778 cppcomm = 0;
01779 ccomm = 0;
01780 tokencount = 0;
01781 bool foundClass = false;
01782
01783
01784 while ((token = strtok((char *) s, " \t\n\r\f,;>")) != NULL) {
01785 s = NULL;
01786 #ifdef DEBUG_MIMEMAGIC
01787 kdDebug(7018) << "KMimeMagic::ascmagic token=" << token << endl;
01788 #endif
01789 for (p = names; p->name ; p++) {
01790 if (STREQ(p->name, token)) {
01791 #ifdef DEBUG_MIMEMAGIC
01792 kdDebug(7018) << "KMimeMagic::ascmagic token matches ! name=" << p->name << " type=" << p->type << endl;
01793 #endif
01794 tokencount++;
01795 typeset |= p->type;
01796 if (p->type == L_JAVA)
01797 jonly++;
01798 if ((p->type & (L_C|L_CPP|L_JAVA))
01799 == (L_CPP|L_JAVA)) {
01800 jconly++;
01801 if ( !foundClass && STREQ("class", token) )
01802 foundClass = true;
01803 }
01804 if ((p->type & (L_C|L_CPP|L_JAVA))
01805 == (L_C|L_CPP))
01806 conly++;
01807 if (STREQ(token, "//"))
01808 cppcomm++;
01809 if (STREQ(token, "/*"))
01810 ccomm++;
01811 for (i = 0; i < (int)NTYPES; i++)
01812 if ((1 << i) & p->type)
01813 typecount[i]++;
01814 }
01815 }
01816 }
01817
01818 if (typeset & (L_C|L_CPP|L_JAVA)) {
01819 accuracy = 40;
01820 if (!(typeset & ~(L_C|L_CPP|L_JAVA))) {
01821 #ifdef DEBUG_MIMEMAGIC
01822 kdDebug(7018) << "C/C++/Java: jonly=" << jonly << " conly=" << conly << " jconly=" << jconly << " ccomm=" << ccomm << endl;
01823 #endif
01824 if (jonly && conly)
01825
01826 if ( jonly > conly )
01827 conly = 0;
01828 else
01829 jonly = 0;
01830 if (jonly > 1 && foundClass) {
01831
01832 resultBuf = QString(types[P_JAVA].type);
01833 return 1;
01834 }
01835 if (jconly > 1) {
01836
01837 if (typecount[P_JAVA] > typecount[P_CPP])
01838 resultBuf = QString(types[P_JAVA].type);
01839 else
01840 resultBuf = QString(types[P_CPP].type);
01841 return 1;
01842 }
01843 if (conly) {
01844
01845 if (cppcomm)
01846 resultBuf = QString(types[P_CPP].type);
01847 else
01848 resultBuf = QString(types[P_C].type);
01849 return 1;
01850 }
01851 if (ccomm) {
01852 resultBuf = QString(types[P_C].type);
01853 return 1;
01854 }
01855 }
01856 }
01857
01858
01859
01860
01861
01862 mostaccurate = -1;
01863 maxpct = pctsum = 0.0;
01864 for (i = 0; i < (int)NTYPES; i++) {
01865 if (typecount[i] > 1) {
01866 pct = (double)typecount[i] / (double)types[i].kwords *
01867 (double)types[i].weight;
01868 pcts[i] = pct;
01869 pctsum += pct;
01870 if (pct > maxpct) {
01871 maxpct = pct;
01872 mostaccurate = i;
01873 }
01874 #ifdef DEBUG_MIMEMAGIC
01875 kdDebug(7018) << "" << types[i].type << " has " << typecount[i] << " hits, " << types[i].kwords << " kw, weight " << types[i].weight << ", " << pct << " -> max = " << maxpct << "\n" << endl;
01876 #endif
01877 }
01878 }
01879 if (mostaccurate >= 0) {
01880 if ( mostaccurate != P_JAVA || foundClass )
01881 {
01882 accuracy = (int)(pcts[mostaccurate] / pctsum * 60);
01883 #ifdef DEBUG_MIMEMAGIC
01884 kdDebug(7018) << "mostaccurate=" << mostaccurate << " pcts=" << pcts[mostaccurate] << " pctsum=" << pctsum << " accuracy=" << accuracy << endl;
01885 #endif
01886 resultBuf = QString(types[mostaccurate].type);
01887 return 1;
01888 }
01889 }
01890
01891 switch (is_tar(buf, nbytes)) {
01892 case 1:
01893
01894 resultBuf = MIME_APPL_TAR;
01895 accuracy = 90;
01896 return 1;
01897 case 2:
01898
01899 resultBuf = MIME_APPL_TAR;
01900 accuracy = 90;
01901 return 1;
01902 }
01903
01904 for (i = 0; i < nbytes; i++) {
01905 if (!isascii(*(buf + i)))
01906 return 0;
01907 }
01908
01909
01910 accuracy = 90;
01911 if (has_escapes) {
01912
01913
01914 resultBuf = MIME_TEXT_UNKNOWN;
01915 } else {
01916
01917 resultBuf = MIME_TEXT_PLAIN;
01918 }
01919 return 1;
01920 }
01921
01922
01923 #define TEXT_MAXLINELEN 300
01924
01925
01926
01927
01928
01929 int KMimeMagic::textmagic(unsigned char * buf, int nbytes)
01930 {
01931 int i;
01932 unsigned char *cp;
01933
01934 nbytes--;
01935
01936
01937 for (i = 0, cp = buf; i < nbytes; i++, cp++)
01938 if ((*cp < 8) || (*cp>13 && *cp<32 && *cp!=27 ) || (*cp==0x7F))
01939 return 0;
01940
01941
01942
01943
01944 for (i = 0; i < nbytes;) {
01945 cp = (unsigned char *) memchr(buf, '\n', nbytes - i);
01946 if (cp == NULL) {
01947
01948 if (i + TEXT_MAXLINELEN >= nbytes)
01949 break;
01950 else
01951 return 0;
01952 }
01953 if (cp - buf > TEXT_MAXLINELEN)
01954 return 0;
01955 i += (cp - buf + 1);
01956 buf = cp + 1;
01957 }
01958 resultBuf = MIME_TEXT_PLAIN;
01959 return 1;
01960 }
01961
01962
01963
01964
01965
01966
01967
01968
01969
01970
01971
01972
01973
01974
01975
01976 #define isodigit(c) ( ((c) >= '0') && ((c) <= '7') )
01977
01978
01979
01980
01981
01982
01983 static int
01984 is_tar(unsigned char *buf, int nbytes)
01985 {
01986 register union record *header = (union record *) buf;
01987 register int i;
01988 register long sum,
01989 recsum;
01990 register char *p;
01991
01992 if (nbytes < (int)sizeof(union record))
01993 return 0;
01994
01995 recsum = from_oct(8, header->header.chksum);
01996
01997 sum = 0;
01998 p = header->charptr;
01999 for (i = sizeof(union record); --i >= 0;) {
02000
02001
02002
02003
02004 sum += 0xFF & *p++;
02005 }
02006
02007
02008 for (i = sizeof(header->header.chksum); --i >= 0;)
02009 sum -= 0xFF & header->header.chksum[i];
02010 sum += ' ' * sizeof header->header.chksum;
02011
02012 if (sum != recsum)
02013 return 0;
02014
02015 if (0 == strcmp(header->header.magic, TMAGIC))
02016 return 2;
02017
02018 return 1;
02019 }
02020
02021
02022
02023
02024
02025
02026
02027 static long
02028 from_oct(int digs, char *where)
02029 {
02030 register long value;
02031
02032 while (isspace(*where)) {
02033 where++;
02034 if (--digs <= 0)
02035 return -1;
02036 }
02037 value = 0;
02038 while (digs > 0 && isodigit(*where)) {
02039 value = (value << 3) | (*where++ - '0');
02040 --digs;
02041 }
02042
02043 if (digs > 0 && *where && !isspace(*where))
02044 return -1;
02045
02046 return value;
02047 }
02048
02049 KMimeMagic::KMimeMagic()
02050 {
02051
02052 QString mimefile = locate( "mime", "magic" );
02053 init( mimefile );
02054
02055 QStringList snippets = KGlobal::dirs()->findAllResources( "config", "magic/*.magic", true );
02056 for ( QStringList::Iterator it = snippets.begin() ; it != snippets.end() ; ++it )
02057 if ( !mergeConfig( *it ) )
02058 kdWarning() << k_funcinfo << "Failed to parse " << *it << endl;
02059 }
02060
02061 KMimeMagic::KMimeMagic(const QString & _configfile)
02062 {
02063 init( _configfile );
02064 }
02065
02066 void KMimeMagic::init( const QString& _configfile )
02067 {
02068 int result;
02069 conf = new config_rec;
02070
02071
02072 conf->magic = conf->last = NULL;
02073 magicResult = NULL;
02074 followLinks = FALSE;
02075
02076 conf->utimeConf = 0L;
02077
02078 result = apprentice(_configfile);
02079 if (result == -1)
02080 return;
02081 #ifdef MIME_MAGIC_DEBUG_TABLE
02082 test_table();
02083 #endif
02084 }
02085
02086
02087
02088
02089
02090 KMimeMagic::~KMimeMagic()
02091 {
02092 if (conf) {
02093 struct magic *p = conf->magic;
02094 struct magic *q;
02095 while (p) {
02096 q = p;
02097 p = p->next;
02098 free(q);
02099 }
02100 delete conf->utimeConf;
02101 delete conf;
02102 }
02103 delete magicResult;
02104 }
02105
02106 bool
02107 KMimeMagic::mergeConfig(const QString & _configfile)
02108 {
02109 kdDebug(7018) << k_funcinfo << _configfile << endl;
02110 int result;
02111
02112 if (_configfile.isEmpty())
02113 return false;
02114 result = apprentice(_configfile);
02115 if (result == -1) {
02116 return false;
02117 }
02118 #ifdef MIME_MAGIC_DEBUG_TABLE
02119 test_table();
02120 #endif
02121 return true;
02122 }
02123
02124 bool
02125 KMimeMagic::mergeBufConfig(char * _configbuf)
02126 {
02127 int result;
02128
02129 if (conf) {
02130 result = buff_apprentice(_configbuf);
02131 if (result == -1)
02132 return false;
02133 #ifdef MIME_MAGIC_DEBUG_TABLE
02134 test_table();
02135 #endif
02136 return true;
02137 }
02138 return false;
02139 }
02140
02141 void
02142 KMimeMagic::setFollowLinks( bool _enable )
02143 {
02144 followLinks = _enable;
02145 }
02146
02147 KMimeMagicResult *
02148 KMimeMagic::findBufferType(const QByteArray &array)
02149 {
02150 unsigned char buf[HOWMANY + 1];
02151
02152 resultBuf = QString::null;
02153 if ( !magicResult )
02154 magicResult = new KMimeMagicResult();
02155 magicResult->setInvalid();
02156 accuracy = 100;
02157
02158 int nbytes = array.size();
02159
02160 if (nbytes > HOWMANY)
02161 nbytes = HOWMANY;
02162 memcpy(buf, array.data(), nbytes);
02163 if (nbytes == 0) {
02164 resultBuf = MIME_BINARY_ZEROSIZE;
02165 } else {
02166 buf[nbytes++] = '\0';
02167 tryit(buf, nbytes);
02168 }
02169
02170
02171 magicResult->setMimeType(resultBuf.stripWhiteSpace());
02172 magicResult->setAccuracy(accuracy);
02173 return magicResult;
02174 }
02175
02176 static void
02177 refineResult(KMimeMagicResult *r, const QString & _filename)
02178 {
02179 QString tmp = r->mimeType();
02180 if (tmp.isEmpty())
02181 return;
02182 if ( tmp == "text/x-c" ||
02183 tmp == "text/x-c++" )
02184 {
02185 if ( _filename.right(2) == ".h" )
02186 tmp += "hdr";
02187 else
02188 tmp += "src";
02189 r->setMimeType(tmp);
02190 }
02191 }
02192
02193 KMimeMagicResult *
02194 KMimeMagic::findBufferFileType( const QByteArray &data,
02195 const QString &fn)
02196 {
02197 KMimeMagicResult * r = findBufferType( data );
02198 refineResult(r, fn);
02199 return r;
02200 }
02201
02202
02203
02204
02205 KMimeMagicResult* KMimeMagic::findFileType(const QString & fn)
02206 {
02207 #ifdef DEBUG_MIMEMAGIC
02208 kdDebug(7018) << "KMimeMagic::findFileType " << fn << endl;
02209 #endif
02210 resultBuf = QString::null;
02211
02212 if ( !magicResult )
02213 magicResult = new KMimeMagicResult();
02214 magicResult->setInvalid();
02215 accuracy = 100;
02216
02217 if ( !conf->utimeConf )
02218 conf->utimeConf = new KMimeMagicUtimeConf();
02219
02220
02221 process( fn );
02222
02223
02224
02225 magicResult->setMimeType(resultBuf.stripWhiteSpace());
02226 magicResult->setAccuracy(accuracy);
02227 refineResult(magicResult, fn);
02228 return magicResult;
02229 }