* [9front] file: return the correct mime type for mbox, recognize interpeted executables in a better way, fix for some false positives
@ 2021-04-11 16:07 kemal
0 siblings, 0 replies; only message in thread
From: kemal @ 2021-04-11 16:07 UTC (permalink / raw)
To: 9front
the following patch does these:
1. return the correct mime type for mbox, application/mbox.
(see rfc 4155)
2. remove some magic strings as these are also found on xml
files, not just html.
3. recognize interpeted executables (ones that start with #!)
in a much better way. this change also makes file capable
of detecting awk, sed, bash and perl scripts.
diff -r cc8420fa5fce sys/src/cmd/file.c
--- a/sys/src/cmd/file.c Fri Apr 09 16:48:07 2021 +0200
+++ b/sys/src/cmd/file.c Sun Apr 11 19:06:18 2021 +0300
@@ -169,6 +169,7 @@
int isface(void);
int isexec(void);
int isudiff(void);
+int isintrexec(void);
int p9bitnum(char*, int*);
int p9subfont(uchar*);
void print_utf(void);
@@ -182,6 +183,7 @@
istring, /* recognizable by first string */
iself, /* ELF (foreign) executable */
isexec, /* native executables */
+ isintrexec, /* interpeted executables */
iff, /* interchange file format (strings) */
longoff, /* recognizable by 4 bytes at some offset */
isoffstr, /* recognizable by string at some offset */
@@ -198,7 +200,7 @@
isp9bit, /* plan 9 image (as from /dev/window) */
isrtf, /* rich text format */
ismsdos, /* msdos exe (virus file attachement) */
- isicocur, /* windows icon or cursor file */
+ isicocur, /* windows icon or cursor file */
isface, /* ascii face file */
istga,
ismp4,
@@ -722,6 +724,40 @@
return 0;
}
+/* interpeted executables */
+int
+isintrexec(void)
+{
+ char *p;
+
+ if (memcmp("#!", buf, 2) != 0)
+ return 0;
+ p = (char*)buf+2;
+ if (strncmp("/bin/", p, 5) == 0)
+ p += 5;
+ else if (strncmp("/usr/bin/", p, 9) == 0)
+ p += 9;
+ else if (strncmp("/usr/local/bin/", p, 15) == 0)
+ p += 15;
+ else
+ return 0;
+
+ if (strncmp("rc", p, 2) == 0)
+ print("%s\n", mime ? PLAIN : "rc executable file");
+ else if (strncmp("sh", p, 2) == 0)
+ print("%s\n", mime ? "application/x-sh" : "sh executable file");
+ else if (strncmp("bash", p, 4) == 0)
+ print("%s\n", mime ? "application/x-sh" : "bash executable file");
+ else if (strncmp("awk", p, 3) == 0)
+ print("%s\n", mime ? PLAIN : "awk script");
+ else if (strncmp("sed", p, 3) == 0)
+ print("%s\n", mime ? PLAIN : "sed script");
+ else if (strncmp("perl", p, 4) == 0)
+ print("%s\n", mime ? PLAIN : "perl script");
+ else
+ print("%s\n", mime ? PLAIN : "interpeted executable file");
+ return 1;
+}
/* from tar.c */
enum { NAMSIZ = 100, TBLOCK = 512 };
@@ -805,8 +841,6 @@
"!<arch>\n__.SYMDEF", "archive random library", 16, OCTET,
"!<arch>\n", "archive", 8, OCTET,
"070707", "cpio archive - ascii header", 6, OCTET,
- "#!/bin/rc", "rc executable file", 9, PLAIN,
- "#!/bin/sh", "sh executable file", 9, PLAIN,
"%!", "postscript", 2, "application/postscript",
"\004%!", "postscript", 3, "application/postscript",
"x T post", "troff output for post", 8, "application/troff",
@@ -820,10 +854,8 @@
"%PDF", "PDF", 4, "application/pdf",
"<!DOCTYPE", "HTML file", 9, "text/html",
"<!doctype", "HTML file", 9, "text/html",
- "<!--", "HTML file", 4, "text/html",
"<html>", "HTML file", 6, "text/html",
"<HTML>", "HTML file", 6, "text/html",
- "<?xml", "HTML file", 5, "text/html",
"\111\111\052\000", "tiff", 4, "image/tiff",
"\115\115\000\052", "tiff", 4, "image/tiff",
"\377\330\377\340", "jpeg", 4, "image/jpeg",
@@ -1108,7 +1140,7 @@
return 0;
*q = 0;
if(strncmp(p, "From ", 5) == 0 && strstr(p, " remote from ") == nil){
- print("%s\n", mime ? PLAIN : "mail box");
+ print("%s\n", mime ? "application/mbox" : "mail box");
return 1;
}
*q = '\n';
^ permalink raw reply [flat|nested] only message in thread
only message in thread, other threads:[~2021-04-12 7:10 UTC | newest]
Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-04-11 16:07 [9front] file: return the correct mime type for mbox, recognize interpeted executables in a better way, fix for some false positives kemal
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).