9front - general discussion about 9front
 help / color / mirror / Atom feed
* [9front] file: return the correct mime type for mbox, recognize interpeted executables in a better way, fix for some false positives
@ 2021-04-11 16:07 kemal
  0 siblings, 0 replies; only message in thread
From: kemal @ 2021-04-11 16:07 UTC (permalink / raw)
  To: 9front

the following patch does these:

1. return the correct mime type for mbox, application/mbox.
(see rfc 4155)
2. remove some magic strings as these are also found on xml
files, not just html.
3. recognize interpeted executables (ones that start with #!)
in a much better way. this change also makes file capable
of detecting awk, sed, bash and perl scripts.

diff -r cc8420fa5fce sys/src/cmd/file.c
--- a/sys/src/cmd/file.c	Fri Apr 09 16:48:07 2021 +0200
+++ b/sys/src/cmd/file.c	Sun Apr 11 19:06:18 2021 +0300
@@ -169,6 +169,7 @@
 int	isface(void);
 int	isexec(void);
 int	isudiff(void);
+int	isintrexec(void);
 int	p9bitnum(char*, int*);
 int	p9subfont(uchar*);
 void	print_utf(void);
@@ -182,6 +183,7 @@
 	istring,	/* recognizable by first string */
 	iself,		/* ELF (foreign) executable */
 	isexec,		/* native executables */
+	isintrexec,	/* interpeted executables */
 	iff,		/* interchange file format (strings) */
 	longoff,	/* recognizable by 4 bytes at some offset */
 	isoffstr,	/* recognizable by string at some offset */
@@ -198,7 +200,7 @@
 	isp9bit,	/* plan 9 image (as from /dev/window) */
 	isrtf,		/* rich text format */
 	ismsdos,	/* msdos exe (virus file attachement) */
-	isicocur,		/* windows icon or cursor file */
+	isicocur,	/* windows icon or cursor file */
 	isface,		/* ascii face file */
 	istga,
 	ismp4,
@@ -722,6 +724,40 @@
 	return 0;
 }

+/* interpeted executables */
+int
+isintrexec(void)
+{
+	char *p;
+
+	if (memcmp("#!", buf, 2) != 0)
+		return 0;
+	p = (char*)buf+2;
+	if (strncmp("/bin/", p, 5) == 0)
+		p += 5;
+	else if (strncmp("/usr/bin/", p, 9) == 0)
+		p += 9;
+	else if (strncmp("/usr/local/bin/", p, 15) == 0)
+		p += 15;
+	else
+		return 0;
+
+	if (strncmp("rc", p, 2) == 0)
+		print("%s\n", mime ? PLAIN : "rc executable file");
+	else if (strncmp("sh", p, 2) == 0)
+		print("%s\n", mime ? "application/x-sh" : "sh executable file");
+	else if (strncmp("bash", p, 4) == 0)
+		print("%s\n", mime ? "application/x-sh" : "bash executable file");
+	else if (strncmp("awk", p, 3) == 0)
+		print("%s\n", mime ? PLAIN : "awk script");
+	else if (strncmp("sed", p, 3) == 0)
+		print("%s\n", mime ? PLAIN : "sed script");
+	else if (strncmp("perl", p, 4) == 0)
+		print("%s\n", mime ? PLAIN : "perl script");
+	else
+		print("%s\n", mime ? PLAIN : "interpeted executable file");
+	return 1;
+}

 /* from tar.c */
 enum { NAMSIZ = 100, TBLOCK = 512 };
@@ -805,8 +841,6 @@
 	"!<arch>\n__.SYMDEF",	"archive random library",	16,	OCTET,
 	"!<arch>\n",		"archive",			8,	OCTET,
 	"070707",		"cpio archive - ascii header",	6,	OCTET,
-	"#!/bin/rc",		"rc executable file",		9,	PLAIN,
-	"#!/bin/sh",		"sh executable file",		9,	PLAIN,
 	"%!",			"postscript",			2,	"application/postscript",
 	"\004%!",		"postscript",			3,	"application/postscript",
 	"x T post",		"troff output for post",	8,	"application/troff",
@@ -820,10 +854,8 @@
 	"%PDF",			"PDF",				4,	"application/pdf",
 	"<!DOCTYPE",		"HTML file",			9,	"text/html",
 	"<!doctype",		"HTML file",			9,	"text/html",
-	"<!--",			"HTML file",			4,	"text/html",
 	"<html>",		"HTML file",			6,	"text/html",
 	"<HTML>",		"HTML file",			6,	"text/html",
-	"<?xml",		"HTML file",			5,	"text/html",
 	"\111\111\052\000",	"tiff",				4,	"image/tiff",
 	"\115\115\000\052",	"tiff",				4,	"image/tiff",
 	"\377\330\377\340",	"jpeg",				4,	"image/jpeg",
@@ -1108,7 +1140,7 @@
 		return 0;
 	*q = 0;
 	if(strncmp(p, "From ", 5) == 0 && strstr(p, " remote from ") == nil){
-		print("%s\n", mime ? PLAIN : "mail box");
+		print("%s\n", mime ? "application/mbox" : "mail box");
 		return 1;
 	}
 	*q = '\n';

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2021-04-12  7:10 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-04-11 16:07 [9front] file: return the correct mime type for mbox, recognize interpeted executables in a better way, fix for some false positives kemal

9front - general discussion about 9front

This inbox may be cloned and mirrored by anyone:

	git clone --mirror http://inbox.vuxu.org/9front

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V1 9front 9front/ http://inbox.vuxu.org/9front \
		9front@9front.org
	public-inbox-index 9front

Example config snippet for mirrors.
Newsgroup available over NNTP:
	nntp://inbox.vuxu.org/vuxu.archive.9front


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git