List for cgit developers and users
 help / color / mirror / Atom feed
From: plenz at cis.fu-berlin.de (Julius Plenz)
Subject: [PATCH 4/4] Introduce a fallback encoding (eg. for blobs)
Date: Tue, 30 Oct 2012 14:07:18 +0100	[thread overview]
Message-ID: <1351602438-955878-5-git-send-email-plenz@cis.fu-berlin.de> (raw)
In-Reply-To: <1351602438-955878-1-git-send-email-plenz@cis.fu-berlin.de>

Usually you'll want to deliver the web pages using UTF-8. It's no
problem to convert Git's commit information to the PAGE_ENCODING since
if it's not UTF-8, the encoding that was used is specified.

In the case of blobs, Git by design doesn't want to know anything about
the encoding. But to make the file appear "normal" in the browser, the
FALLBACK_ENCODING (default: latin1) has a hint as to from which
encoding the string originates. In case the plain file is delivered, the
encoding will be set to the fallback specified if the blob's contents
are not valid UTF-8.

The same applies to the "Tagger" information, since tag objects don't
have an "encoding" field. (See:
http://git.661346.n2.nabble.com/PATCH-RFC-Document-format-of-basic-Git-objects-tp7287428p7288762.html )

Signed-off-by: Julius Plenz <plenz at cis.fu-berlin.de>
---
 cgit.h      |  4 +++-
 parsing.c   | 11 +++++++++++
 ui-diff.c   |  2 +-
 ui-plain.c  |  2 ++
 ui-refs.c   |  2 +-
 ui-ssdiff.c | 10 +++++-----
 ui-tag.c    |  4 ++--
 ui-tree.c   |  2 +-
 8 files changed, 26 insertions(+), 11 deletions(-)

diff --git a/cgit.h b/cgit.h
index 4a3f528..fb7b9db 100644
--- a/cgit.h
+++ b/cgit.h
@@ -43,9 +43,11 @@
 
 
 /*
- * Default encoding
+ * Default encoding and fallback encoding in case blobs are not valid UTF-8
  */
 #define PAGE_ENCODING "UTF-8"
+#define FALLBACK_ENCODING "latin1"
+extern const char *to_pageencoding(const char *txt);
 
 typedef void (*configfn)(const char *name, const char *value);
 typedef void (*filepair_fn)(struct diff_filepair *pair);
diff --git a/parsing.c b/parsing.c
index 602e3de..2a03b11 100644
--- a/parsing.c
+++ b/parsing.c
@@ -98,6 +98,7 @@ char *parse_user(char *t, char **name, char **email, unsigned long *date)
 
 #ifdef NO_ICONV
 #define reencode(a, b, c)
+#define to_pageencoding(a)
 #else
 const char *reencode(char **txt, const char *src_enc, const char *dst_enc)
 {
@@ -120,6 +121,16 @@ const char *reencode(char **txt, const char *src_enc, const char *dst_enc)
 	}
 	return *txt;
 }
+const char *to_pageencoding(const char *txt)
+{
+	if(is_encoding_utf8(PAGE_ENCODING) && !is_utf8(txt)) {
+		char *tmp = xstrdup(txt);
+		reencode(&tmp, FALLBACK_ENCODING, PAGE_ENCODING);
+		return tmp;
+	}
+	return txt;
+}
+
 #endif
 
 struct commitinfo *cgit_parse_commit(struct commit *commit)
diff --git a/ui-diff.c b/ui-diff.c
index c6bad63..2d90a46 100644
--- a/ui-diff.c
+++ b/ui-diff.c
@@ -211,7 +211,7 @@ static void print_line(char *line, int len)
 
 	htmlf("<div class='%s'>", class);
 	line[len-1] = '\0';
-	html_txt(line);
+	html_txt(to_pageencoding(line));
 	html("</div>");
 	line[len-1] = c;
 }
diff --git a/ui-plain.c b/ui-plain.c
index 85877d7..baa5a2f 100644
--- a/ui-plain.c
+++ b/ui-plain.c
@@ -95,6 +95,8 @@ static void print_object(const unsigned char *sha1, const char *path)
 	ctx.page.filename = fmt("%s", path);
 	ctx.page.size = size;
 	ctx.page.etag = sha1_to_hex(sha1);
+	if(is_encoding_utf8(PAGE_ENCODING) && !is_utf8(buf)) /* best guess */
+		ctx.page.charset = FALLBACK_ENCODING;
 	cgit_print_http_headers(&ctx);
 	html_raw(buf, size);
 	match = 1;
diff --git a/ui-refs.c b/ui-refs.c
index caddfbc..15cfe0b 100644
--- a/ui-refs.c
+++ b/ui-refs.c
@@ -143,7 +143,7 @@ static int print_tag(struct refinfo *ref)
 			cgit_object_link(tag->tagged);
 		html("</td><td>");
 		if (info->tagger)
-			html(info->tagger);
+			html(to_pageencoding(info->tagger));
 		html("</td><td colspan='2'>");
 		if (info->tagger_date > 0)
 			cgit_print_age(info->tagger_date, -1, NULL);
diff --git a/ui-ssdiff.c b/ui-ssdiff.c
index fbb46cf..a60112e 100644
--- a/ui-ssdiff.c
+++ b/ui-ssdiff.c
@@ -208,7 +208,7 @@ static void print_part_with_lcs(char *class, char *line, char *lcs)
 			htmlf("</span>");
 			j += 1;
 		}
-		html_txt(c);
+		html_txt(to_pageencoding(c));
 	}
 }
 
@@ -244,7 +244,7 @@ static void print_ssdiff_line(char *class,
 		if (lcs)
 			print_part_with_lcs("del", old_line, lcs);
 		else
-			html_txt(old_line);
+			html_txt(to_pageencoding(old_line));
 	}
 
 	html("</td>\n");
@@ -265,7 +265,7 @@ static void print_ssdiff_line(char *class,
 		if (lcs)
 			print_part_with_lcs("add", new_line, lcs);
 		else
-			html_txt(new_line);
+			html_txt(to_pageencoding(new_line));
 	}
 
 	html("</td></tr>");
@@ -379,11 +379,11 @@ void cgit_ssdiff_line_cb(char *line, int len)
 		current_old_line += 1;
 	} else if (line[0] == '@') {
 		html("<tr><td colspan='4' class='hunk'>");
-		html_txt(line);
+		html_txt(to_pageencoding(line));
 		html("</td></tr>");
 	} else {
 		html("<tr><td colspan='4' class='ctx'>");
-		html_txt(line);
+		html_txt(to_pageencoding(line));
 		html("</td></tr>");
 	}
 	line[len - 1] = c;
diff --git a/ui-tag.c b/ui-tag.c
index 39e4cb8..de88880 100644
--- a/ui-tag.c
+++ b/ui-tag.c
@@ -21,7 +21,7 @@ static void print_tag_content(char *buf)
 	p = strchr(buf, '\n');
 	if (p)
 		*p = '\0';
-	html_txt(buf);
+	html_txt(to_pageencoding(buf));
 	html("</div>");
 	if (p) {
 		html("<div class='commit-msg'>");
@@ -74,7 +74,7 @@ void cgit_print_tag(char *revname)
 		}
 		if (info->tagger) {
 			html("<tr><td>tagged by</td><td>");
-			html_txt(info->tagger);
+			html_txt(to_pageencoding(info->tagger));
 			if (info->tagger_email && !ctx.cfg.noplainemail) {
 				html(" ");
 				html_txt(info->tagger_email);
diff --git a/ui-tree.c b/ui-tree.c
index b1adcc7..35f1ad5 100644
--- a/ui-tree.c
+++ b/ui-tree.c
@@ -55,7 +55,7 @@ static void print_text_buffer(const char *name, char *buf, unsigned long size)
 	}
 
 	html("<td class='lines'><pre><code>");
-	html_txt(buf);
+	html_txt(to_pageencoding(buf));
 	html("</code></pre></td></tr></table>\n");
 }
 
-- 
1.7.12.3-zedat





      parent reply	other threads:[~2012-10-30 13:07 UTC|newest]

Thread overview: 8+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2012-10-30 13:07 [PATCH 0/4] some miscellaneous features plenz
2012-10-30 13:07 ` [PATCH 1/4] tree view: imitate proper permissions plenz
2012-10-30 13:07 ` [PATCH 2/4] Introduce "dont-display-suffix" option plenz
2012-11-01  4:03   ` mathstuf
2012-11-01  4:22   ` Jason
2012-11-01 10:52     ` plenz
2012-10-30 13:07 ` [PATCH 3/4] Make number of columns in stat overview configurable plenz
2012-10-30 13:07 ` plenz [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1351602438-955878-5-git-send-email-plenz@cis.fu-berlin.de \
    --to=cgit@lists.zx2c4.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).