* [PATCH] Simplify commit and tag parsing
@ 2015-03-03 12:00 cgit
2015-03-03 15:46 ` Jason
2015-03-05 10:46 ` Jason
0 siblings, 2 replies; 4+ messages in thread
From: cgit @ 2015-03-03 12:00 UTC (permalink / raw)
* Use skip_prefix to avoid magic numbers in the code.
* Use xcalloc() instead of xmalloc(), followed by manual initialization.
* Split out line splitting.
Signed-off-by: Lukas Fleischer <cgit at cryptocrack.de>
---
parsing.c | 114 +++++++++++++++++++++++---------------------------------------
1 file changed, 42 insertions(+), 72 deletions(-)
diff --git a/parsing.c b/parsing.c
index 53c29bb..0db181b 100644
--- a/parsing.c
+++ b/parsing.c
@@ -118,45 +118,50 @@ static const char *reencode(char **txt, const char *src_enc, const char *dst_enc
}
#endif
+static const char *next_header_line(const char *p)
+{
+ p = strchr(p, '\n');
+ if (!p)
+ return NULL;
+ return p + 1;
+}
+
+static int end_of_header(const char *p)
+{
+ return !p || (*p == '\n');
+}
+
struct commitinfo *cgit_parse_commit(struct commit *commit)
{
+ const int sha1hex_len = 40;
struct commitinfo *ret;
const char *p = get_cached_commit_buffer(commit, NULL);
const char *t;
- ret = xmalloc(sizeof(*ret));
+ ret = xcalloc(1, sizeof(struct commitinfo));
ret->commit = commit;
- ret->author = NULL;
- ret->author_email = NULL;
- ret->committer = NULL;
- ret->committer_email = NULL;
- ret->subject = NULL;
- ret->msg = NULL;
- ret->msg_encoding = NULL;
-
- if (p == NULL)
+
+ if (!p)
return ret;
- if (!starts_with(p, "tree "))
+ if (!skip_prefix(p, "tree ", &p))
die("Bad commit: %s", sha1_to_hex(commit->object.sha1));
- else
- p += 46; // "tree " + hex[40] + "\n"
+ p += sha1hex_len + 1;
- while (starts_with(p, "parent "))
- p += 48; // "parent " + hex[40] + "\n"
+ while (skip_prefix(p, "parent ", &p))
+ p += sha1hex_len + 1;
- if (p && starts_with(p, "author ")) {
- p = parse_user(p + 7, &ret->author, &ret->author_email,
+ if (p && skip_prefix(p, "author ", &p)) {
+ p = parse_user(p, &ret->author, &ret->author_email,
&ret->author_date);
}
- if (p && starts_with(p, "committer ")) {
- p = parse_user(p + 10, &ret->committer, &ret->committer_email,
+ if (p && skip_prefix(p, "committer ", &p)) {
+ p = parse_user(p, &ret->committer, &ret->committer_email,
&ret->committer_date);
}
- if (p && starts_with(p, "encoding ")) {
- p += 9;
+ if (p && skip_prefix(p, "encoding ", &p)) {
t = strchr(p, '\n');
if (t) {
ret->msg_encoding = substr(p, t + 1);
@@ -164,38 +169,21 @@ struct commitinfo *cgit_parse_commit(struct commit *commit)
}
}
- /* if no special encoding is found, assume UTF-8 */
if (!ret->msg_encoding)
ret->msg_encoding = xstrdup("UTF-8");
- // skip unknown header fields
- while (p && *p && (*p != '\n')) {
- p = strchr(p, '\n');
- if (p)
- p++;
- }
-
- // skip empty lines between headers and message
+ while (!end_of_header(p))
+ p = next_header_line(p);
while (p && *p == '\n')
p++;
-
if (!p)
return ret;
- t = strchr(p, '\n');
- if (t) {
- ret->subject = substr(p, t);
- p = t + 1;
-
- while (p && *p == '\n') {
- p = strchr(p, '\n');
- if (p)
- p++;
- }
- if (p)
- ret->msg = xstrdup(p);
- } else
- ret->subject = xstrdup(p);
+ t = strchrnul(p, '\n');
+ ret->subject = substr(p, t);
+ while (*t == '\n')
+ t++;
+ ret->msg = xstrdup(t);
reencode(&ret->author, ret->msg_encoding, PAGE_ENCODING);
reencode(&ret->author_email, ret->msg_encoding, PAGE_ENCODING);
@@ -207,49 +195,31 @@ struct commitinfo *cgit_parse_commit(struct commit *commit)
return ret;
}
-
struct taginfo *cgit_parse_tag(struct tag *tag)
{
void *data;
enum object_type type;
unsigned long size;
const char *p;
- struct taginfo *ret;
+ struct taginfo *ret = NULL;
data = read_sha1_file(tag->object.sha1, &type, &size);
- if (!data || type != OBJ_TAG) {
- free(data);
- return 0;
- }
+ if (!data || type != OBJ_TAG)
+ goto cleanup;
- ret = xmalloc(sizeof(*ret));
- ret->tagger = NULL;
- ret->tagger_email = NULL;
- ret->tagger_date = 0;
- ret->msg = NULL;
+ ret = xcalloc(1, sizeof(struct taginfo));
- p = data;
-
- while (p && *p) {
- if (*p == '\n')
- break;
-
- if (starts_with(p, "tagger ")) {
- p = parse_user(p + 7, &ret->tagger, &ret->tagger_email,
+ for (p = data; !end_of_header(p); p = next_header_line(p)) {
+ if (skip_prefix(p, "tagger ", &p)) {
+ p = parse_user(p, &ret->tagger, &ret->tagger_email,
&ret->tagger_date);
- } else {
- p = strchr(p, '\n');
- if (p)
- p++;
}
}
- // skip empty lines between headers and message
- while (p && *p == '\n')
- p++;
-
if (p && *p)
ret->msg = xstrdup(p);
+
+cleanup:
free(data);
return ret;
}
--
2.3.1
^ permalink raw reply [flat|nested] 4+ messages in thread
* [PATCH] Simplify commit and tag parsing
2015-03-03 12:00 [PATCH] Simplify commit and tag parsing cgit
@ 2015-03-03 15:46 ` Jason
2015-03-05 10:46 ` Jason
1 sibling, 0 replies; 4+ messages in thread
From: Jason @ 2015-03-03 15:46 UTC (permalink / raw)
That's a pretty super dense commit, but it looks good to me after a read
through. I'm going to merge this, but if somebody else wants to give it a
pair of eyes, that'd be welcomed.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.zx2c4.com/pipermail/cgit/attachments/20150303/623be448/attachment.html>
^ permalink raw reply [flat|nested] 4+ messages in thread
* [PATCH] Simplify commit and tag parsing
2015-03-03 12:00 [PATCH] Simplify commit and tag parsing cgit
2015-03-03 15:46 ` Jason
@ 2015-03-05 10:46 ` Jason
2015-03-05 10:52 ` john
1 sibling, 1 reply; 4+ messages in thread
From: Jason @ 2015-03-05 10:46 UTC (permalink / raw)
This commit breaks ui-tag. The first few lines of tag messages are cut off.
^ permalink raw reply [flat|nested] 4+ messages in thread
* [PATCH] Simplify commit and tag parsing
2015-03-05 10:46 ` Jason
@ 2015-03-05 10:52 ` john
0 siblings, 0 replies; 4+ messages in thread
From: john @ 2015-03-05 10:52 UTC (permalink / raw)
On Thu, Mar 05, 2015 at 11:46:55AM +0100, Jason A. Donenfeld wrote:
> This commit breaks ui-tag. The first few lines of tag messages are cut off.
It looks like parse_user() consumes the trailing LF on the user line, so
next_header_line() ends up skipping the blank line at the end of the
header.
I suspect the right answer is to stop parse_user() eating the LF since
all of the call sites go through next_header_line() after this patch.
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2015-03-05 10:52 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-03-03 12:00 [PATCH] Simplify commit and tag parsing cgit
2015-03-03 15:46 ` Jason
2015-03-05 10:46 ` Jason
2015-03-05 10:52 ` john
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).