List for cgit developers and users
 help / color / mirror / Atom feed
From: john at keeping.me.uk (John Keeping)
Subject: RFE: have a way to request compressed patches and rawdiffs
Date: Mon, 1 May 2017 12:00:31 +0100	[thread overview]
Message-ID: <20170501110031.GE1788@john.keeping.me.uk> (raw)
In-Reply-To: <CAMwyc-RDJnbfroPCVZ+na2Obhe+6ELA6sHC_6854C8sY53_8iA@mail.gmail.com>

On Fri, Apr 28, 2017 at 04:46:34PM +0000, Konstantin Ryabitsev wrote:
> Since I'm on an RFE binge, here's another one. :)
> 
> Some of the rawdiffs we have been generating end up pretty large (e.g.
> linux-next to mainline rawdiff is around 80MB). We compress them using http
> protocol compression, but the reality is that most people would be getting
> these using wget or curl, which do not add the "accept-encoding: gzip"
> header and therefore get the uncompressed version.
> 
> It would be great to be able to tell cgit to generate and send compressed
> versions of raw output like patches or rawdiffs.

Here's an initial attempt at this, it needs splitting up into a series
of a few patches (I think there's about four hiding in here), but I'm
sending it now to see if this is roughly what's needed.

The idea is to support extensions on page names (for pages that opt in)
and it's implemented for patch and rawdiff at the moment with support
for GZIP compressing the response body.

For example, given the URL:

	https://git.zx2c4.com/cgit/patch/?id=8e9ddd21a50beb9fd660cf6cd6a583234924b932

with this change you can add ".gz" to the end of "patch":

	https://git.zx2c4.com/cgit/patch.gz/?id=8e9ddd21a50beb9fd660cf6cd6a583234924b932

and the content will be returned compressed with gzip.

I've only added gzip for now, but the setup is table driven so adding
more compression filters should be straightforward.

Signed-off-by: John Keeping <john at keeping.me.uk>
---
 cgit.c     | 72 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 cgit.h     |  2 ++
 cmd.c      | 44 +++++++++++++++++++-------------------
 cmd.h      |  3 ++-
 filter.c   |  2 ++
 ui-diff.c  |  6 +++++-
 ui-patch.c | 10 +++++++--
 7 files changed, 113 insertions(+), 26 deletions(-)

diff --git a/cgit.c b/cgit.c
index 1075753..8d25680 100644
--- a/cgit.c
+++ b/cgit.c
@@ -695,6 +695,46 @@ static inline void authenticate_cookie(void)
 	ctx.env.authenticated = cgit_close_filter(ctx.cfg.auth_filter);
 }
 
+static struct cgit_filter *create_gzip_filter(void)
+{
+	char **argv = xcalloc(3, sizeof(char *));
+	struct cgit_exec_filter *f = xmalloc(sizeof(*f));
+
+	argv[0] = xstrdup("gzip");
+	argv[1] = xstrdup("-n");
+	argv[2] = NULL;
+
+	cgit_exec_filter_init(f, argv[0], argv);
+	return &f->base;
+}
+
+struct cgit_page_compression {
+	const char *const ext;
+	const char *const mimetype;
+	struct cgit_filter *(*create)(void);
+};
+
+static const struct cgit_page_compression cgit_page_compressions[] = {
+	{ ".gz", "application/x-gzip", create_gzip_filter, },
+};
+
+static struct cgit_filter *get_page_compression(void)
+{
+	size_t i;
+
+	for (i = 0; i < ARRAY_SIZE(cgit_page_compressions); i++) {
+		const struct cgit_page_compression *c;
+
+		c = &cgit_page_compressions[i];
+		if (!strcmp(c->ext, ctx.qry.pageext)) {
+			ctx.page.mimetype = xstrdup(c->mimetype);
+			return c->create();
+		}
+	}
+
+	return NULL;
+}
+
 static void process_request(void)
 {
 	struct cgit_cmd *cmd;
@@ -738,6 +778,21 @@ static void process_request(void)
 		return;
 	}
 
+	if (ctx.qry.pageext) {
+		if (!cmd->want_compression) {
+			cgit_print_error_page(404, "Not found",
+					"Invalid request");
+			return;
+		}
+
+		ctx.page.body_filter = get_page_compression();
+		if (!ctx.page.body_filter) {
+			cgit_print_error_page(404, "Not found",
+					"Invalid request");
+			return;
+		}
+	}
+
 	if (ctx.repo && prepare_repo_cmd())
 		return;
 
@@ -1008,6 +1063,21 @@ static void cgit_parse_args(int argc, const char **argv)
 	}
 }
 
+static void split_page_ext(void)
+{
+	char *dot;
+
+	if (!ctx.qry.page)
+		return;
+
+	dot = strchr(ctx.qry.page, '.');
+	if (!dot)
+		return;
+
+	ctx.qry.pageext = xstrdup(dot);
+	*dot = '\0';
+}
+
 static int calc_ttl(void)
 {
 	if (!ctx.repo)
@@ -1075,6 +1145,8 @@ int cmd_main(int argc, const char **argv)
 		cgit_parse_url(ctx.qry.url);
 	}
 
+	split_page_ext();
+
 	/* Before we go any further, we set ctx.env.authenticated by checking to see
 	 * if the supplied cookie is valid. All cookies are valid if there is no
 	 * auth_filter. If there is an auth_filter, the filter decides. */
diff --git a/cgit.h b/cgit.h
index fbc6c6a..a04b03b 100644
--- a/cgit.h
+++ b/cgit.h
@@ -164,6 +164,7 @@ struct cgit_query {
 	char *raw;
 	char *repo;
 	char *page;
+	char *pageext;
 	char *search;
 	char *grep;
 	char *head;
@@ -280,6 +281,7 @@ struct cgit_page {
 	const char *title;
 	int status;
 	const char *statusmsg;
+	struct cgit_filter *body_filter;
 };
 
 struct cgit_environment {
diff --git a/cmd.c b/cmd.c
index d280e95..9cc5797 100644
--- a/cmd.c
+++ b/cmd.c
@@ -155,32 +155,32 @@ static void tree_fn(void)
 	cgit_print_tree(ctx.qry.sha1, ctx.qry.path);
 }
 
-#define def_cmd(name, want_repo, want_vpath, is_clone) \
-	{#name, name##_fn, want_repo, want_vpath, is_clone}
+#define def_cmd(name, want_repo, want_vpath, is_clone, want_compression) \
+	{#name, name##_fn, want_repo, want_vpath, is_clone, want_compression}
 
 struct cgit_cmd *cgit_get_cmd(void)
 {
 	static struct cgit_cmd cmds[] = {
-		def_cmd(HEAD, 1, 0, 1),
-		def_cmd(atom, 1, 0, 0),
-		def_cmd(about, 0, 0, 0),
-		def_cmd(blob, 1, 0, 0),
-		def_cmd(commit, 1, 1, 0),
-		def_cmd(diff, 1, 1, 0),
-		def_cmd(info, 1, 0, 1),
-		def_cmd(log, 1, 1, 0),
-		def_cmd(ls_cache, 0, 0, 0),
-		def_cmd(objects, 1, 0, 1),
-		def_cmd(patch, 1, 1, 0),
-		def_cmd(plain, 1, 0, 0),
-		def_cmd(rawdiff, 1, 1, 0),
-		def_cmd(refs, 1, 0, 0),
-		def_cmd(repolist, 0, 0, 0),
-		def_cmd(snapshot, 1, 0, 0),
-		def_cmd(stats, 1, 1, 0),
-		def_cmd(summary, 1, 0, 0),
-		def_cmd(tag, 1, 0, 0),
-		def_cmd(tree, 1, 1, 0),
+		def_cmd(HEAD, 1, 0, 1, 0),
+		def_cmd(atom, 1, 0, 0, 0),
+		def_cmd(about, 0, 0, 0, 0),
+		def_cmd(blob, 1, 0, 0, 0),
+		def_cmd(commit, 1, 1, 0, 0),
+		def_cmd(diff, 1, 1, 0, 0),
+		def_cmd(info, 1, 0, 1, 0),
+		def_cmd(log, 1, 1, 0, 0),
+		def_cmd(ls_cache, 0, 0, 0, 0),
+		def_cmd(objects, 1, 0, 1, 0),
+		def_cmd(patch, 1, 1, 0, 1),
+		def_cmd(plain, 1, 0, 0, 0),
+		def_cmd(rawdiff, 1, 1, 0, 1),
+		def_cmd(refs, 1, 0, 0, 0),
+		def_cmd(repolist, 0, 0, 0, 0),
+		def_cmd(snapshot, 1, 0, 0, 0),
+		def_cmd(stats, 1, 1, 0, 0),
+		def_cmd(summary, 1, 0, 0, 0),
+		def_cmd(tag, 1, 0, 0, 0),
+		def_cmd(tree, 1, 1, 0, 0),
 	};
 	int i;
 
diff --git a/cmd.h b/cmd.h
index 6249b1d..4d46ef9 100644
--- a/cmd.h
+++ b/cmd.h
@@ -8,7 +8,8 @@ struct cgit_cmd {
 	cgit_cmd_fn fn;
 	unsigned int want_repo:1,
 		want_vpath:1,
-		is_clone:1;
+		is_clone:1,
+		want_compression:1;
 };
 
 extern struct cgit_cmd *cgit_get_cmd(void);
diff --git a/filter.c b/filter.c
index 949c931..517add5 100644
--- a/filter.c
+++ b/filter.c
@@ -379,6 +379,8 @@ int cgit_close_filter(struct cgit_filter *filter)
 {
 	if (!filter)
 		return 0;
+
+	fflush(stdout);
 	return filter->close(filter);
 }
 
diff --git a/ui-diff.c b/ui-diff.c
index 173d351..1949fa1 100644
--- a/ui-diff.c
+++ b/ui-diff.c
@@ -447,8 +447,11 @@ void cgit_print_diff(const char *new_rev, const char *old_rev,
 		DIFF_OPT_SET(&diffopt, RECURSIVE);
 		diff_setup_done(&diffopt);
 
-		ctx.page.mimetype = "text/plain";
+		if (!ctx.qry.pageext)
+			ctx.page.mimetype = "text/plain";
 		cgit_print_http_headers();
+		cgit_open_filter(ctx.page.body_filter);
+
 		if (old_tree_sha1) {
 			diff_tree_sha1(old_tree_sha1, new_tree_sha1, "",
 				       &diffopt);
@@ -458,6 +461,7 @@ void cgit_print_diff(const char *new_rev, const char *old_rev,
 		diffcore_std(&diffopt);
 		diff_flush(&diffopt);
 
+		cgit_close_filter(ctx.page.body_filter);
 		return;
 	}
 
diff --git a/ui-patch.c b/ui-patch.c
index 6745b69..e24d2fa 100644
--- a/ui-patch.c
+++ b/ui-patch.c
@@ -64,8 +64,10 @@ void cgit_print_patch(const char *new_rev, const char *old_rev,
 			oid_to_hex(&new_rev_oid));
 	}
 
-	patchname = fmt("%s.patch", rev_range);
-	ctx.page.mimetype = "text/plain";
+	patchname = fmt("%s.patch%s", rev_range,
+			ctx.qry.pageext ? ctx.qry.pageext : "");
+	if (!ctx.qry.pageext)
+		ctx.page.mimetype = "text/plain";
 	ctx.page.filename = patchname;
 	cgit_print_http_headers();
 
@@ -75,6 +77,8 @@ void cgit_print_patch(const char *new_rev, const char *old_rev,
 			      "%s%n%n%w(0)%b";
 	}
 
+	cgit_open_filter(ctx.page.body_filter);
+
 	init_revisions(&rev, NULL);
 	rev.abbrev = DEFAULT_ABBREV;
 	rev.verbose_header = 1;
@@ -92,4 +96,6 @@ void cgit_print_patch(const char *new_rev, const char *old_rev,
 		log_tree_commit(&rev, commit);
 		printf("-- \ncgit %s\n\n", cgit_version);
 	}
+
+	cgit_close_filter(ctx.page.body_filter);
 }
-- 
2.12.2.648.g6730d8bc62.dirty



      reply	other threads:[~2017-05-01 11:00 UTC|newest]

Thread overview: 2+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-04-28 16:46 mricon
2017-05-01 11:00 ` john [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20170501110031.GE1788@john.keeping.me.uk \
    --to=cgit@lists.zx2c4.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).