9front - general discussion about 9front
 help / color / mirror / Atom feed
From: kemal <kemali13@protonmail.com>
To: "9front@9front.org" <9front@9front.org>
Subject: [9front] file: return the correct mime type for mbox, recognize interpeted executables in a better way, fix for some false positives
Date: Sun, 11 Apr 2021 16:07:39 +0000	[thread overview]
Message-ID: <caa8AdhU8wCcq2qM1c5GDNL-uaY1_m8pslAoCGWhJeyIevYw_gCAbR1F2U83pEWASYZcmqU2jfPW1Vcto6AKW8ZC_0tkGEw3SHefNVc4m6Y=@protonmail.com> (raw)

the following patch does these:

1. return the correct mime type for mbox, application/mbox.
(see rfc 4155)
2. remove some magic strings as these are also found on xml
files, not just html.
3. recognize interpeted executables (ones that start with #!)
in a much better way. this change also makes file capable
of detecting awk, sed, bash and perl scripts.

diff -r cc8420fa5fce sys/src/cmd/file.c
--- a/sys/src/cmd/file.c	Fri Apr 09 16:48:07 2021 +0200
+++ b/sys/src/cmd/file.c	Sun Apr 11 19:06:18 2021 +0300
@@ -169,6 +169,7 @@
 int	isface(void);
 int	isexec(void);
 int	isudiff(void);
+int	isintrexec(void);
 int	p9bitnum(char*, int*);
 int	p9subfont(uchar*);
 void	print_utf(void);
@@ -182,6 +183,7 @@
 	istring,	/* recognizable by first string */
 	iself,		/* ELF (foreign) executable */
 	isexec,		/* native executables */
+	isintrexec,	/* interpeted executables */
 	iff,		/* interchange file format (strings) */
 	longoff,	/* recognizable by 4 bytes at some offset */
 	isoffstr,	/* recognizable by string at some offset */
@@ -198,7 +200,7 @@
 	isp9bit,	/* plan 9 image (as from /dev/window) */
 	isrtf,		/* rich text format */
 	ismsdos,	/* msdos exe (virus file attachement) */
-	isicocur,		/* windows icon or cursor file */
+	isicocur,	/* windows icon or cursor file */
 	isface,		/* ascii face file */
 	istga,
 	ismp4,
@@ -722,6 +724,40 @@
 	return 0;
 }

+/* interpeted executables */
+int
+isintrexec(void)
+{
+	char *p;
+
+	if (memcmp("#!", buf, 2) != 0)
+		return 0;
+	p = (char*)buf+2;
+	if (strncmp("/bin/", p, 5) == 0)
+		p += 5;
+	else if (strncmp("/usr/bin/", p, 9) == 0)
+		p += 9;
+	else if (strncmp("/usr/local/bin/", p, 15) == 0)
+		p += 15;
+	else
+		return 0;
+
+	if (strncmp("rc", p, 2) == 0)
+		print("%s\n", mime ? PLAIN : "rc executable file");
+	else if (strncmp("sh", p, 2) == 0)
+		print("%s\n", mime ? "application/x-sh" : "sh executable file");
+	else if (strncmp("bash", p, 4) == 0)
+		print("%s\n", mime ? "application/x-sh" : "bash executable file");
+	else if (strncmp("awk", p, 3) == 0)
+		print("%s\n", mime ? PLAIN : "awk script");
+	else if (strncmp("sed", p, 3) == 0)
+		print("%s\n", mime ? PLAIN : "sed script");
+	else if (strncmp("perl", p, 4) == 0)
+		print("%s\n", mime ? PLAIN : "perl script");
+	else
+		print("%s\n", mime ? PLAIN : "interpeted executable file");
+	return 1;
+}

 /* from tar.c */
 enum { NAMSIZ = 100, TBLOCK = 512 };
@@ -805,8 +841,6 @@
 	"!<arch>\n__.SYMDEF",	"archive random library",	16,	OCTET,
 	"!<arch>\n",		"archive",			8,	OCTET,
 	"070707",		"cpio archive - ascii header",	6,	OCTET,
-	"#!/bin/rc",		"rc executable file",		9,	PLAIN,
-	"#!/bin/sh",		"sh executable file",		9,	PLAIN,
 	"%!",			"postscript",			2,	"application/postscript",
 	"\004%!",		"postscript",			3,	"application/postscript",
 	"x T post",		"troff output for post",	8,	"application/troff",
@@ -820,10 +854,8 @@
 	"%PDF",			"PDF",				4,	"application/pdf",
 	"<!DOCTYPE",		"HTML file",			9,	"text/html",
 	"<!doctype",		"HTML file",			9,	"text/html",
-	"<!--",			"HTML file",			4,	"text/html",
 	"<html>",		"HTML file",			6,	"text/html",
 	"<HTML>",		"HTML file",			6,	"text/html",
-	"<?xml",		"HTML file",			5,	"text/html",
 	"\111\111\052\000",	"tiff",				4,	"image/tiff",
 	"\115\115\000\052",	"tiff",				4,	"image/tiff",
 	"\377\330\377\340",	"jpeg",				4,	"image/jpeg",
@@ -1108,7 +1140,7 @@
 		return 0;
 	*q = 0;
 	if(strncmp(p, "From ", 5) == 0 && strstr(p, " remote from ") == nil){
-		print("%s\n", mime ? PLAIN : "mail box");
+		print("%s\n", mime ? "application/mbox" : "mail box");
 		return 1;
 	}
 	*q = '\n';

                 reply	other threads:[~2021-04-12  7:10 UTC|newest]

Thread overview: [no followups] expand[flat|nested]  mbox.gz  Atom feed

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to='caa8AdhU8wCcq2qM1c5GDNL-uaY1_m8pslAoCGWhJeyIevYw_gCAbR1F2U83pEWASYZcmqU2jfPW1Vcto6AKW8ZC_0tkGEw3SHefNVc4m6Y=@protonmail.com' \
    --to=kemali13@protonmail.com \
    --cc=9front@9front.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).