9front - general discussion about 9front
 help / color / mirror / Atom feed
* [Bug] [PATCH] Mail cannot cope with multi-line header fields
@ 2020-09-09 19:33 theinicke
  2020-09-09 19:50 ` [9front] " ori
  0 siblings, 1 reply; 10+ messages in thread
From: theinicke @ 2020-09-09 19:33 UTC (permalink / raw)
  To: 9front

Having received a mail with line breaks in the subject I have noticed that Mail cannot deal with this. That is the current implementation assumes that each header field resides at a particular line inside the upasfs(4) info file.

Fixing this bug without touching the info file is probably error-prone, hence I'd like to propose two ways to correct this:

1. Replace newline characters with some other character - resulting in a minimal change and not breaking any scripts relying on the interface as it is currently provided

2. Add a field name to the entries in info file - then there is no need to make up some arbitrary character for encoding newlines; also this is probably more correct

Note that plan9port does not suffer from this; this is because each entry in the info file is prefixed with a field name. Given the fact that we are already incompatible to plan9ports upasfs maybe breaking the interface is neglectable.

Inlined is a patch which prefixes each entry in the info file with a field name and fixes Mail to work with it accordingly. In this first implementation Mail ignores the presence of any additional lines. Thoughts?

--
Tobias Heinicke

diff -r d8b6a8706f51 sys/src/cmd/upas/Mail/mesg.c
--- a/sys/src/cmd/upas/Mail/mesg.c	Mon Sep 07 19:32:50 2020 -0700
+++ b/sys/src/cmd/upas/Mail/mesg.c	Wed Sep 09 21:24:10 2020 +0200
@@ -75,18 +75,16 @@
 };
 
 char*
-line(char *data, char **pp)
+linep(char *data, char **pp)
 {
 	char *p, *q;
 
 	for(p=data; *p!='\0' && *p!='\n'; p++)
 		;
 	if(*p == '\n')
-		*pp = p+1;
-	else
-		*pp = p;
-	q = emalloc(p-data + 1);
-	memmove(q, data, p-data);
+		*p++ = '\0';
+	*pp = p;
+	q = data;
 	return q;
 }
 
@@ -110,31 +108,44 @@
 loadinfo(Message *m, char *dir)
 {
 	int n;
-	char *data, *p;
+	char *data, *p, *l, *t;
 
 	data = readfile(dir, "info", &n);
 	if(data == nil)
 		return 0;
-	m->from = line(data, &p);
-	m->to = line(p, &p);
-	m->cc = line(p, &p);
-	m->replyto = line(p, &p);
-	m->date = line(p, &p);
-	m->subject = line(p, &p);
-	m->type = line(p, &p);
-	m->disposition = line(p, &p);
-	m->filename = line(p, &p);
-	m->digest = line(p, &p);
-	/* m->bcc = */ free(line(p, &p));
-	/* m->inreplyto = */ free(line(p, &p));
-	/* m->date = */ free(line(p, &p));
-	/* m->sender = */ free(line(p, &p));
-	/* m->messageid = */ free(line(p, &p));
-	/* m->lines = */ free(line(p, &p));
-	/* m->size = */ free(line(p, &p));
-	m->flags = line(p, &p);
-	/* m->fileid = */ free(line(p, &p));
-	m->fromcolon = fc(m, line(p, &p));
+
+	l = linep(data, &p);
+	while(*l != '\0') {
+		t = strchr(l, ' ');
+		if(t != nil) {
+			*t++ = '\0';
+			if(strcmp(l, "from") == 0)
+				m->from = estrdup(t);
+			else if(strcmp(l, "to") == 0)
+				m->to = estrdup(t);
+			else if(strcmp(l, "cc") == 0)
+				m->cc = estrdup(t);
+			else if(strcmp(l, "replyto") == 0)
+				m->replyto = estrdup(t);
+			else if(strcmp(l, "unixdate") == 0)
+				m->date = estrdup(t);
+			else if(strcmp(l, "subject") == 0)
+				m->subject = estrdup(t);
+			else if(strcmp(l, "type") == 0)
+				m->type = estrdup(t);
+			else if(strcmp(l, "disposition") == 0)
+				m->disposition = estrdup(t);
+			else if(strcmp(l, "filename") == 0)
+				m->filename = estrdup(t);
+			else if(strcmp(l, "digest") == 0)
+				m->digest = estrdup(t);
+			else if(strcmp(l, "flags") == 0)
+				m->flags = estrdup(t);
+			else if(strcmp(l, "ffrom") == 0)
+				m->fromcolon = fc(m, estrdup(t));
+		}
+		l = linep(p, &p);
+	}
 
 	free(data);
 	return 1;
diff -r d8b6a8706f51 sys/src/cmd/upas/fs/fs.c
--- a/sys/src/cmd/upas/fs/fs.c	Mon Sep 07 19:32:50 2020 -0700
+++ b/sys/src/cmd/upas/fs/fs.c	Wed Sep 09 21:24:10 2020 +0200
@@ -568,8 +568,8 @@
 int
 readinfo(Mailbox *mb, Message *m, char *buf, long off, int count)
 {
-	char *s, *p, *e;
-	int i, n;
+	char *s, *p, *e, *fieldname;
+	int i, n, u;
 	long off0;
 
 	if(m->infolen > 0 && off >= m->infolen)
@@ -584,11 +584,24 @@
 		}
 		if((n = fileinfo(mb, m, infofields[i], &p)) < 0)
 			return -1;
-		if(off > n){
-			off -= n + 1;
+		fieldname = dirtab[infofields[i]];
+		u = strlen(fieldname) + 1;
+		if(off > n+u){
+			off -= n + u + 1;
 			continue;
 		}
-		if(off){
+		if(off <= u){
+			u -= off;
+			fieldname += off;
+			off = 0;
+
+			if(s + u > e)
+				u = e - s;
+			memcpy(s, fieldname, u);
+			s += u;
+			*(s-1) = ' ';	
+		}
+		if(off) {
 			n -= off;
 			p += off;
 			off = 0;



^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [9front] [Bug] [PATCH] Mail cannot cope with multi-line header fields
  2020-09-09 19:33 [Bug] [PATCH] Mail cannot cope with multi-line header fields theinicke
@ 2020-09-09 19:50 ` ori
  2020-09-10  6:27   ` theinicke
  0 siblings, 1 reply; 10+ messages in thread
From: ori @ 2020-09-09 19:50 UTC (permalink / raw)
  To: theinicke, 9front

> Having received a mail with line breaks in the subject I have noticed that Mail cannot deal with this. That is the current implementation assumes that each header field resides at a particular line inside the upasfs(4) info file.
> 
> Fixing this bug without touching the info file is probably error-prone, hence I'd like to propose two ways to correct this:
> 
> 1. Replace newline characters with some other character - resulting in a minimal change and not breaking any scripts relying on the interface as it is currently provided
> 
> 2. Add a field name to the entries in info file - then there is no need to make up some arbitrary character for encoding newlines; also this is probably more correct
> 
> Note that plan9port does not suffer from this; this is because each entry in the info file is prefixed with a field name. Given the fact that we are already incompatible to plan9ports upasfs maybe breaking the interface is neglectable.
> 
> Inlined is a patch which prefixes each entry in the info file with a field name and fixes Mail to work with it accordingly. In this first implementation Mail ignores the presence of any additional lines. Thoughts?


Will look at the code in more detail soon, but my first question is what happens when the
subject of a message is something like: 'the subject is\nsubject: blah'? If we're going to
make a breaking change, let's make sure that there's no ambiguity at all -- possibly
prefixing line splits with a space, or quoting the fields.



^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [9front] [Bug] [PATCH] Mail cannot cope with multi-line header fields
  2020-09-09 19:50 ` [9front] " ori
@ 2020-09-10  6:27   ` theinicke
  2020-09-10 19:58     ` theinicke
  0 siblings, 1 reply; 10+ messages in thread
From: theinicke @ 2020-09-10  6:27 UTC (permalink / raw)
  To: 9front

Hi,

thanks for your reply. Concerning your question: This should not be a problem, at least if I have read the rfc correctly. We are getting white space in front of split lines for free, because in rfc2822 it says that the line breaks may only be inserted in front of white space. The relevant part is in section 2.2.3.

--
Tobias Heinicke



^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [9front] [Bug] [PATCH] Mail cannot cope with multi-line header fields
  2020-09-10  6:27   ` theinicke
@ 2020-09-10 19:58     ` theinicke
  2020-09-10 21:12       ` ori
  0 siblings, 1 reply; 10+ messages in thread
From: theinicke @ 2020-09-10 19:58 UTC (permalink / raw)
  To: 9front

Maybe we should just get rid of the line breaks altogether (either at info file generation or possibly even before), because this would probably honor the following part of rfc2822 (this is identical in the newest rfc5322):

> Each header field should be treated in its unfolded form for further syntactic and semantic evaluation.

--
Tobias Heinicke



^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [9front] [Bug] [PATCH] Mail cannot cope with multi-line header fields
  2020-09-10 19:58     ` theinicke
@ 2020-09-10 21:12       ` ori
  2020-09-11 21:08         ` theinicke
  0 siblings, 1 reply; 10+ messages in thread
From: ori @ 2020-09-10 21:12 UTC (permalink / raw)
  To: theinicke, 9front

> Maybe we should just get rid of the line breaks altogether (either at info file generation or possibly even before), because this would probably honor the following part of rfc2822 (this is identical in the newest rfc5322):
> 
>> Each header field should be treated in its unfolded form for further syntactic and semantic evaluation.
> 
> --
> Tobias Heinicke

In that case, yes. If the newlines are not intended to be semantically meaningful,
let's strip them. It simplifies consumers.



^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [9front] [Bug] [PATCH] Mail cannot cope with multi-line header fields
  2020-09-10 21:12       ` ori
@ 2020-09-11 21:08         ` theinicke
  2020-09-12 21:19           ` theinicke
  2020-09-12 22:58           ` ori
  0 siblings, 2 replies; 10+ messages in thread
From: theinicke @ 2020-09-11 21:08 UTC (permalink / raw)
  To: 9front

Then the simplest way to do that might be something like the following (remove all line breaks from info fields, preserving it everywhere else).
But maybe we would like to take into account the fact that some info fields never contain line breaks or remove them not just in info file but earlier by stripping them in copy822,..

diff -r d8b6a8706f51 sys/src/cmd/upas/fs/fs.c
--- a/sys/src/cmd/upas/fs/fs.c	Mon Sep 07 19:32:50 2020 -0700
+++ b/sys/src/cmd/upas/fs/fs.c	Fri Sep 11 23:03:08 2020 +0200
@@ -387,6 +387,33 @@
 	return s;
 }
 
+int
+unfold(char *s, int len, char **pp)
+{
+	int i, u;
+	char c;
+	char *p;
+
+	if(s == nil) {
+		return 0;
+	}
+
+	p = malloc(len);
+	*pp = p;
+	u = 0;
+	for(i = 0; i < len; i++) {
+		c = s[i];
+		if(c != '\n') {
+			p[u++] = c;
+		}
+	}
+	if(u < len) {
+		p[u] = '\0';
+	}
+
+	return u;
+}
+
 static int
 fileinfo(Mailbox *mb, Message *m, int t, char **pp)
 {
@@ -568,7 +595,7 @@
 int
 readinfo(Mailbox *mb, Message *m, char *buf, long off, int count)
 {
-	char *s, *p, *e;
+	char *s, *p, *e, *q;
 	int i, n;
 	long off0;
 
@@ -584,18 +611,22 @@
 		}
 		if((n = fileinfo(mb, m, infofields[i], &p)) < 0)
 			return -1;
+		/* remove line breaks in fields s.t. consumers of info
+		can rely on finding particular field at certain line */
+		n = unfold(p, n, &q);
 		if(off > n){
 			off -= n + 1;
 			continue;
 		}
 		if(off){
 			n -= off;
-			p += off;
+			q += off;
 			off = 0;
 		}
 		if(s + n > e)
 			n = e - s;
-		memcpy(s, p, n);
+		memcpy(s, q, n);
+		free(q);
 		s += n;
 		if(s < e)
 			*s++ = '\n';



^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [9front] [Bug] [PATCH] Mail cannot cope with multi-line header fields
  2020-09-11 21:08         ` theinicke
@ 2020-09-12 21:19           ` theinicke
  2020-09-12 22:58           ` ori
  1 sibling, 0 replies; 10+ messages in thread
From: theinicke @ 2020-09-12 21:19 UTC (permalink / raw)
  To: 9front

Sorry for the noise.
But if the solution of removing line breaks from all fields in just the info file (and not taking into account that by design some info fields never contain line breaks) is considered, please regard the following patch instead; as the former contained useless code.

--
Tobias Heinicke

diff -r 9f6e77258f69 sys/src/cmd/upas/fs/fs.c
--- a/sys/src/cmd/upas/fs/fs.c	Thu Sep 10 16:26:42 2020 -0700
+++ b/sys/src/cmd/upas/fs/fs.c	Sat Sep 12 23:09:31 2020 +0200
@@ -387,6 +387,26 @@
 	return s;
 }
 
+int
+unfold(char *s, int len, char **pp)
+{
+	int i, u;
+	char c;
+	char *p;
+
+	p = malloc(len);
+	u = 0;
+	for(i = 0; i < len; i++) {
+		c = s[i];
+		if(c != '\n') {
+			p[u++] = c;
+		}
+	}
+
+	*pp = p;
+	return u;
+}
+
 static int
 fileinfo(Mailbox *mb, Message *m, int t, char **pp)
 {
@@ -568,7 +588,7 @@
 int
 readinfo(Mailbox *mb, Message *m, char *buf, long off, int count)
 {
-	char *s, *p, *e;
+	char *s, *p, *e, *q;
 	int i, n;
 	long off0;
 
@@ -584,18 +604,24 @@
 		}
 		if((n = fileinfo(mb, m, infofields[i], &p)) < 0)
 			return -1;
+
+		/* remove line breaks in fields s.t. consumers of info
+		can rely on finding particular field on certain line */
+		n = unfold(p, n, &q);
+
 		if(off > n){
 			off -= n + 1;
 			continue;
 		}
 		if(off){
 			n -= off;
-			p += off;
+			q += off;
 			off = 0;
 		}
 		if(s + n > e)
 			n = e - s;
-		memcpy(s, p, n);
+		memcpy(s, q, n);
+		free(q);
 		s += n;
 		if(s < e)
 			*s++ = '\n';





^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [9front] [Bug] [PATCH] Mail cannot cope with multi-line header fields
  2020-09-11 21:08         ` theinicke
  2020-09-12 21:19           ` theinicke
@ 2020-09-12 22:58           ` ori
  2020-09-14 21:49             ` theinicke
  1 sibling, 1 reply; 10+ messages in thread
From: ori @ 2020-09-12 22:58 UTC (permalink / raw)
  To: theinicke, 9front

> Then the simplest way to do that might be something like the following (remove all line breaks from info fields, preserving it everywhere else).
> But maybe we would like to take into account the fact that some info fields never contain line breaks or remove them not just in info file but earlier by stripping them in copy822,..

I think I like your idea of doing it in copy822. How's this?

diff -r 9f6e77258f69 sys/src/cmd/upas/fs/mbox.c
--- a/sys/src/cmd/upas/fs/mbox.c	Thu Sep 10 16:26:42 2020 -0700
+++ b/sys/src/cmd/upas/fs/mbox.c	Sat Sep 12 15:57:45 2020 -0700
@@ -733,6 +733,19 @@
 }
 
 static char*
+unfold(char *s)
+{
+	char *p, *q;
+
+	q = s;
+	for(p = q; *p != '\0'; p++)
+		if(*p != '\r' && *p != '\n')
+			*q++ = *p;
+	*q = '\0';
+	return s;
+}
+
+static char*
 addr822(char *p, char **ac)
 {
 	int n, c, space, incomment, addrdone, inanticomment, quoted;
@@ -883,7 +896,7 @@
 static char*
 copy822(Message*, Header *h, char*, char *p)
 {
-	return rtrim(strdup(skipwhite(p + h->len)));
+	return rtrim(unfold(strdup(skipwhite(p + h->len))));
 }
 
 static char*



^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [9front] [Bug] [PATCH] Mail cannot cope with multi-line header fields
  2020-09-12 22:58           ` ori
@ 2020-09-14 21:49             ` theinicke
  2020-09-17 23:01               ` ori
  0 siblings, 1 reply; 10+ messages in thread
From: theinicke @ 2020-09-14 21:49 UTC (permalink / raw)
  To: 9front

> I think I like your idea of doing it in copy822. How's this?

This is fine except for the potential corner case of having CRLF inside a quoted string in one of the header fields treated by addr822.

That is in rfc2855 it says in section 3.2.5 "Since a quoted-string is allowed to contain FWS, folding is permitted.". If we want to follow this path of fixing it here, then I think we also would have to at least amend the condition in /sys/src/cmd/upas/fs/mbox.c:763 (776 after applying your patch).

--
Tobias Heinicke

diff -r 9f6e77258f69 sys/src/cmd/upas/fs/mbox.c
--- a/sys/src/cmd/upas/fs/mbox.c	Thu Sep 10 16:26:42 2020 -0700
+++ b/sys/src/cmd/upas/fs/mbox.c	Mon Sep 14 23:40:27 2020 +0200
@@ -733,6 +733,19 @@
 }
 
 static char*
+unfold(char *s)
+{
+	char *p, *q;
+
+	q = s;
+	for(p = q; *p != '\0'; p++)
+		if(*p != '\r' && *p != '\n')
+			*q++ = *p;
+	*q = '\0';
+	return s;
+}
+
+static char*
 addr822(char *p, char **ac)
 {
 	int n, c, space, incomment, addrdone, inanticomment, quoted;
@@ -760,7 +773,7 @@
 			for(p++; c = *p; p++){
 				if(ac && c == '"')
 					break;
-				if(!addrdone && !incomment)
+				if(!addrdone && !incomment && c != '\r' && c != '\n')
 					ps = sputc(ps, e, c);
 				if(!quoted && *p == '"')
 					break;
@@ -883,7 +896,7 @@
 static char*
 copy822(Message*, Header *h, char*, char *p)
 {
-	return rtrim(strdup(skipwhite(p + h->len)));
+	return rtrim(unfold(strdup(skipwhite(p + h->len))));
 }
 
 static char*



^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [9front] [Bug] [PATCH] Mail cannot cope with multi-line header fields
  2020-09-14 21:49             ` theinicke
@ 2020-09-17 23:01               ` ori
  0 siblings, 0 replies; 10+ messages in thread
From: ori @ 2020-09-17 23:01 UTC (permalink / raw)
  To: theinicke, 9front

>> I think I like your idea of doing it in copy822. How's this?
> 
> This is fine except for the potential corner case of having CRLF inside a quoted string in one of the header fields treated by addr822.
> 
> That is in rfc2855 it says in section 3.2.5 "Since a quoted-string is allowed to contain FWS, folding is permitted.". If we want to follow this path of fixing it here, then I think we also would have to at least amend the condition in /sys/src/cmd/upas/fs/mbox.c:763 (776 after applying your patch).

Looks good -- committing.

Thanks!



^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2020-09-17 23:01 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-09-09 19:33 [Bug] [PATCH] Mail cannot cope with multi-line header fields theinicke
2020-09-09 19:50 ` [9front] " ori
2020-09-10  6:27   ` theinicke
2020-09-10 19:58     ` theinicke
2020-09-10 21:12       ` ori
2020-09-11 21:08         ` theinicke
2020-09-12 21:19           ` theinicke
2020-09-12 22:58           ` ori
2020-09-14 21:49             ` theinicke
2020-09-17 23:01               ` ori

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).