the following patch does these: 1. return the correct mime type for mbox, application/mbox. (see rfc 4155) 2. remove some magic strings as these are also found on xml files, not just html. 3. recognize interpeted executables (ones that start with #!) in a much better way. this change also makes file capable of detecting awk, sed, bash and perl scripts. diff -r cc8420fa5fce sys/src/cmd/file.c --- a/sys/src/cmd/file.c Fri Apr 09 16:48:07 2021 +0200 +++ b/sys/src/cmd/file.c Sun Apr 11 19:06:18 2021 +0300 @@ -169,6 +169,7 @@ int isface(void); int isexec(void); int isudiff(void); +int isintrexec(void); int p9bitnum(char*, int*); int p9subfont(uchar*); void print_utf(void); @@ -182,6 +183,7 @@ istring, /* recognizable by first string */ iself, /* ELF (foreign) executable */ isexec, /* native executables */ + isintrexec, /* interpeted executables */ iff, /* interchange file format (strings) */ longoff, /* recognizable by 4 bytes at some offset */ isoffstr, /* recognizable by string at some offset */ @@ -198,7 +200,7 @@ isp9bit, /* plan 9 image (as from /dev/window) */ isrtf, /* rich text format */ ismsdos, /* msdos exe (virus file attachement) */ - isicocur, /* windows icon or cursor file */ + isicocur, /* windows icon or cursor file */ isface, /* ascii face file */ istga, ismp4, @@ -722,6 +724,40 @@ return 0; } +/* interpeted executables */ +int +isintrexec(void) +{ + char *p; + + if (memcmp("#!", buf, 2) != 0) + return 0; + p = (char*)buf+2; + if (strncmp("/bin/", p, 5) == 0) + p += 5; + else if (strncmp("/usr/bin/", p, 9) == 0) + p += 9; + else if (strncmp("/usr/local/bin/", p, 15) == 0) + p += 15; + else + return 0; + + if (strncmp("rc", p, 2) == 0) + print("%s\n", mime ? PLAIN : "rc executable file"); + else if (strncmp("sh", p, 2) == 0) + print("%s\n", mime ? "application/x-sh" : "sh executable file"); + else if (strncmp("bash", p, 4) == 0) + print("%s\n", mime ? "application/x-sh" : "bash executable file"); + else if (strncmp("awk", p, 3) == 0) + print("%s\n", mime ? PLAIN : "awk script"); + else if (strncmp("sed", p, 3) == 0) + print("%s\n", mime ? PLAIN : "sed script"); + else if (strncmp("perl", p, 4) == 0) + print("%s\n", mime ? PLAIN : "perl script"); + else + print("%s\n", mime ? PLAIN : "interpeted executable file"); + return 1; +} /* from tar.c */ enum { NAMSIZ = 100, TBLOCK = 512 }; @@ -805,8 +841,6 @@ "!<arch>\n__.SYMDEF", "archive random library", 16, OCTET, "!<arch>\n", "archive", 8, OCTET, "070707", "cpio archive - ascii header", 6, OCTET, - "#!/bin/rc", "rc executable file", 9, PLAIN, - "#!/bin/sh", "sh executable file", 9, PLAIN, "%!", "postscript", 2, "application/postscript", "\004%!", "postscript", 3, "application/postscript", "x T post", "troff output for post", 8, "application/troff", @@ -820,10 +854,8 @@ "%PDF", "PDF", 4, "application/pdf", "<!DOCTYPE", "HTML file", 9, "text/html", "<!doctype", "HTML file", 9, "text/html", - "<!--", "HTML file", 4, "text/html", "<html>", "HTML file", 6, "text/html", "<HTML>", "HTML file", 6, "text/html", - "<?xml", "HTML file", 5, "text/html", "\111\111\052\000", "tiff", 4, "image/tiff", "\115\115\000\052", "tiff", 4, "image/tiff", "\377\330\377\340", "jpeg", 4, "image/jpeg", @@ -1108,7 +1140,7 @@ return 0; *q = 0; if(strncmp(p, "From ", 5) == 0 && strstr(p, " remote from ") == nil){ - print("%s\n", mime ? PLAIN : "mail box"); + print("%s\n", mime ? "application/mbox" : "mail box"); return 1; } *q = '\n';