From mboxrd@z Thu Jan 1 00:00:00 1970 From: plenz at cis.fu-berlin.de (Julius Plenz) Date: Tue, 30 Oct 2012 14:07:18 +0100 Subject: [PATCH 4/4] Introduce a fallback encoding (eg. for blobs) In-Reply-To: <1351602438-955878-1-git-send-email-plenz@cis.fu-berlin.de> References: <1351602438-955878-1-git-send-email-plenz@cis.fu-berlin.de> Message-ID: <1351602438-955878-5-git-send-email-plenz@cis.fu-berlin.de> Usually you'll want to deliver the web pages using UTF-8. It's no problem to convert Git's commit information to the PAGE_ENCODING since if it's not UTF-8, the encoding that was used is specified. In the case of blobs, Git by design doesn't want to know anything about the encoding. But to make the file appear "normal" in the browser, the FALLBACK_ENCODING (default: latin1) has a hint as to from which encoding the string originates. In case the plain file is delivered, the encoding will be set to the fallback specified if the blob's contents are not valid UTF-8. The same applies to the "Tagger" information, since tag objects don't have an "encoding" field. (See: http://git.661346.n2.nabble.com/PATCH-RFC-Document-format-of-basic-Git-objects-tp7287428p7288762.html ) Signed-off-by: Julius Plenz --- cgit.h | 4 +++- parsing.c | 11 +++++++++++ ui-diff.c | 2 +- ui-plain.c | 2 ++ ui-refs.c | 2 +- ui-ssdiff.c | 10 +++++----- ui-tag.c | 4 ++-- ui-tree.c | 2 +- 8 files changed, 26 insertions(+), 11 deletions(-) diff --git a/cgit.h b/cgit.h index 4a3f528..fb7b9db 100644 --- a/cgit.h +++ b/cgit.h @@ -43,9 +43,11 @@ /* - * Default encoding + * Default encoding and fallback encoding in case blobs are not valid UTF-8 */ #define PAGE_ENCODING "UTF-8" +#define FALLBACK_ENCODING "latin1" +extern const char *to_pageencoding(const char *txt); typedef void (*configfn)(const char *name, const char *value); typedef void (*filepair_fn)(struct diff_filepair *pair); diff --git a/parsing.c b/parsing.c index 602e3de..2a03b11 100644 --- a/parsing.c +++ b/parsing.c @@ -98,6 +98,7 @@ char *parse_user(char *t, char **name, char **email, unsigned long *date) #ifdef NO_ICONV #define reencode(a, b, c) +#define to_pageencoding(a) #else const char *reencode(char **txt, const char *src_enc, const char *dst_enc) { @@ -120,6 +121,16 @@ const char *reencode(char **txt, const char *src_enc, const char *dst_enc) } return *txt; } +const char *to_pageencoding(const char *txt) +{ + if(is_encoding_utf8(PAGE_ENCODING) && !is_utf8(txt)) { + char *tmp = xstrdup(txt); + reencode(&tmp, FALLBACK_ENCODING, PAGE_ENCODING); + return tmp; + } + return txt; +} + #endif struct commitinfo *cgit_parse_commit(struct commit *commit) diff --git a/ui-diff.c b/ui-diff.c index c6bad63..2d90a46 100644 --- a/ui-diff.c +++ b/ui-diff.c @@ -211,7 +211,7 @@ static void print_line(char *line, int len) htmlf("
", class); line[len-1] = '\0'; - html_txt(line); + html_txt(to_pageencoding(line)); html("
"); line[len-1] = c; } diff --git a/ui-plain.c b/ui-plain.c index 85877d7..baa5a2f 100644 --- a/ui-plain.c +++ b/ui-plain.c @@ -95,6 +95,8 @@ static void print_object(const unsigned char *sha1, const char *path) ctx.page.filename = fmt("%s", path); ctx.page.size = size; ctx.page.etag = sha1_to_hex(sha1); + if(is_encoding_utf8(PAGE_ENCODING) && !is_utf8(buf)) /* best guess */ + ctx.page.charset = FALLBACK_ENCODING; cgit_print_http_headers(&ctx); html_raw(buf, size); match = 1; diff --git a/ui-refs.c b/ui-refs.c index caddfbc..15cfe0b 100644 --- a/ui-refs.c +++ b/ui-refs.c @@ -143,7 +143,7 @@ static int print_tag(struct refinfo *ref) cgit_object_link(tag->tagged); html(""); if (info->tagger) - html(info->tagger); + html(to_pageencoding(info->tagger)); html(""); if (info->tagger_date > 0) cgit_print_age(info->tagger_date, -1, NULL); diff --git a/ui-ssdiff.c b/ui-ssdiff.c index fbb46cf..a60112e 100644 --- a/ui-ssdiff.c +++ b/ui-ssdiff.c @@ -208,7 +208,7 @@ static void print_part_with_lcs(char *class, char *line, char *lcs) htmlf(""); j += 1; } - html_txt(c); + html_txt(to_pageencoding(c)); } } @@ -244,7 +244,7 @@ static void print_ssdiff_line(char *class, if (lcs) print_part_with_lcs("del", old_line, lcs); else - html_txt(old_line); + html_txt(to_pageencoding(old_line)); } html("\n"); @@ -265,7 +265,7 @@ static void print_ssdiff_line(char *class, if (lcs) print_part_with_lcs("add", new_line, lcs); else - html_txt(new_line); + html_txt(to_pageencoding(new_line)); } html(""); @@ -379,11 +379,11 @@ void cgit_ssdiff_line_cb(char *line, int len) current_old_line += 1; } else if (line[0] == '@') { html(""); - html_txt(line); + html_txt(to_pageencoding(line)); html(""); } else { html(""); - html_txt(line); + html_txt(to_pageencoding(line)); html(""); } line[len - 1] = c; diff --git a/ui-tag.c b/ui-tag.c index 39e4cb8..de88880 100644 --- a/ui-tag.c +++ b/ui-tag.c @@ -21,7 +21,7 @@ static void print_tag_content(char *buf) p = strchr(buf, '\n'); if (p) *p = '\0'; - html_txt(buf); + html_txt(to_pageencoding(buf)); html(""); if (p) { html("
"); @@ -74,7 +74,7 @@ void cgit_print_tag(char *revname) } if (info->tagger) { html("tagged by"); - html_txt(info->tagger); + html_txt(to_pageencoding(info->tagger)); if (info->tagger_email && !ctx.cfg.noplainemail) { html(" "); html_txt(info->tagger_email); diff --git a/ui-tree.c b/ui-tree.c index b1adcc7..35f1ad5 100644 --- a/ui-tree.c +++ b/ui-tree.c @@ -55,7 +55,7 @@ static void print_text_buffer(const char *name, char *buf, unsigned long size) } html("
");
-	html_txt(buf);
+	html_txt(to_pageencoding(buf));
 	html("
\n"); } -- 1.7.12.3-zedat