From mboxrd@z Thu Jan 1 00:00:00 1970 From: john at keeping.me.uk (John Keeping) Date: Sat, 4 Mar 2017 12:35:21 +0000 Subject: [PATCH] filter: set environment variable PYTHONIOENCODING to utf-8 In-Reply-To: <20170223154823.18206-1-roy@marples.name> References: <20170223154823.18206-1-roy@marples.name> Message-ID: <20170304123521.GC2102@john.keeping.me.uk> On Thu, Feb 23, 2017 at 03:48:23PM +0000, Roy Marples wrote: > This allows different versions of Python to be used rather than > forcing version specific encoding in each script. > > Signed-off-by: Roy Marples > --- > filter.c | 8 ++++++++ > filters/email-gravatar.py | 3 --- > filters/syntax-highlighting.py | 5 +---- > 3 files changed, 9 insertions(+), 7 deletions(-) Neat! This definitely makes writing it easier to get it right with Python filters, but having filter_env_set seems unnecessary. Is there a reason not to either: 1) set PYTHONIOENCODING unconditionally early in startup or 2) set the environment in the child after forking ? > diff --git a/filter.c b/filter.c > index 949c931..3c0f978 100644 > --- a/filter.c > +++ b/filter.c > @@ -15,6 +15,8 @@ > #include > #endif > > +static bool filter_env_set; > + > static inline void reap_filter(struct cgit_filter *filter) > { > if (filter && filter->cleanup) > @@ -44,6 +46,12 @@ static int open_exec_filter(struct cgit_filter *base, va_list ap) > struct cgit_exec_filter *filter = (struct cgit_exec_filter *)base; > int i; > > + if (!filter_env_set) { > + /* Always input/output utf-8 for a Python filter. */ > + setenv("PYTHONIOENCODING", "utf-8", 1); > + filter_env_set = true; > + } > + > for (i = 0; i < filter->base.argument_count; i++) > filter->argv[i + 1] = va_arg(ap, char *); > > diff --git a/filters/email-gravatar.py b/filters/email-gravatar.py > index d70440e..8b98471 100755 > --- a/filters/email-gravatar.py > +++ b/filters/email-gravatar.py > @@ -30,9 +30,6 @@ if email[-1] == '>': > > page = sys.argv[2] > > -sys.stdin = codecs.getreader("utf-8")(sys.stdin.detach()) > -sys.stdout = codecs.getwriter("utf-8")(sys.stdout.detach()) > - > md5 = hashlib.md5(email.encode()).hexdigest() > text = sys.stdin.read().strip() > > diff --git a/filters/syntax-highlighting.py b/filters/syntax-highlighting.py > index 5888b50..936fdb7 100755 > --- a/filters/syntax-highlighting.py > +++ b/filters/syntax-highlighting.py > @@ -29,9 +29,6 @@ from pygments.lexers import guess_lexer > from pygments.lexers import guess_lexer_for_filename > from pygments.formatters import HtmlFormatter > > - > -sys.stdin = io.TextIOWrapper(sys.stdin.buffer, encoding='utf-8', errors='replace') > -sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8', errors='replace') > data = sys.stdin.read() > filename = sys.argv[1] > formatter = HtmlFormatter(style='pastie') > @@ -52,4 +49,4 @@ except TypeError: > sys.stdout.write('') > -sys.stdout.write(highlight(data, lexer, formatter, outfile=None)) > +highlight(data, lexer, formatter, outfile=sys.stdout) > -- > 2.11.1 > > _______________________________________________ > CGit mailing list > CGit at lists.zx2c4.com > https://lists.zx2c4.com/mailman/listinfo/cgit