From 8aba84c1f122f950e2568171fdf3b1d2575ba3b7 Mon Sep 17 00:00:00 2001 From: bra1nwave Date: Thu, 9 Apr 2020 16:57:53 +0200 Subject: [PATCH] WIP --- ...ib-ported-to-python-3-for-feedparser.patch | 590 ++++++++++++++++++ .../calibre/patches/disable-unrar-test.patch | 27 - srcpkgs/calibre/template | 39 +- 3 files changed, 610 insertions(+), 46 deletions(-) create mode 100644 srcpkgs/calibre/patches/add-sgmllib-ported-to-python-3-for-feedparser.patch delete mode 100644 srcpkgs/calibre/patches/disable-unrar-test.patch diff --git a/srcpkgs/calibre/patches/add-sgmllib-ported-to-python-3-for-feedparser.patch b/srcpkgs/calibre/patches/add-sgmllib-ported-to-python-3-for-feedparser.patch new file mode 100644 index 00000000000..a22a5816922 --- /dev/null +++ b/srcpkgs/calibre/patches/add-sgmllib-ported-to-python-3-for-feedparser.patch @@ -0,0 +1,590 @@ +From 2a09a839eb3c0b84d4f5f0e194bd3cb01cb50057 Mon Sep 17 00:00:00 2001 +From: Kovid Goyal +Date: Mon, 18 Nov 2019 18:38:06 +0530 +Subject: [PATCH] Add sgmllib ported to python 3 for feedparser + +--- + src/sgmllib.py | 574 +++++++++++++++++++++++++++++++++++++++++++++++++ + 1 file changed, 574 insertions(+) + create mode 100644 src/sgmllib.py + +diff --git a/src/sgmllib.py b/src/sgmllib.py +new file mode 100644 +index 0000000000..6422b7603c +--- /dev/null ++++ src/sgmllib.py +@@ -0,0 +1,574 @@ ++"""A parser for SGML, using the derived class as a static DTD.""" ++# Needed for feedparser under python 3 where this module has been removed ++ ++# XXX This only supports those SGML features used by HTML. ++ ++# XXX There should be a way to distinguish between PCDATA (parsed ++# character data -- the normal case), RCDATA (replaceable character ++# data -- only char and entity references and end tags are special) ++# and CDATA (character data -- only end tags are special). RCDATA is ++# not supported at all. ++ ++import _markupbase ++import re ++ ++__all__ = ["SGMLParser", "SGMLParseError"] ++ ++# Regular expressions used for parsing ++ ++interesting = re.compile('[&<]') ++incomplete = re.compile( ++ '&([a-zA-Z][a-zA-Z0-9]*|#[0-9]*)?|' ++ '<([a-zA-Z][^<>]*|' ++ '/([a-zA-Z][^<>]*)?|' ++ '![^<>]*)?' ++) ++ ++entityref = re.compile('&([a-zA-Z][-.a-zA-Z0-9]*)[^a-zA-Z0-9]') ++charref = re.compile('&#([0-9]+)[^0-9]') ++ ++starttagopen = re.compile('<[>a-zA-Z]') ++shorttagopen = re.compile('<[a-zA-Z][-.a-zA-Z0-9]*/') ++shorttag = re.compile('<([a-zA-Z][-.a-zA-Z0-9]*)/([^/]*)/') ++piclose = re.compile('>') ++endbracket = re.compile('[<>]') ++tagfind = re.compile('[a-zA-Z][-_.a-zA-Z0-9]*') ++attrfind = re.compile( ++ r'\s*([a-zA-Z_][-:.a-zA-Z_0-9]*)(\s*=\s*' ++ r'(\'[^\']*\'|"[^"]*"|[][\-a-zA-Z0-9./,:;+*%?!&$\(\)_#=~\'"@]*))?' ++) ++ ++ ++class SGMLParseError(RuntimeError): ++ """Exception raised for all parse errors.""" ++ pass ++ ++ ++# SGML parser base class -- find tags and call handler functions. ++# Usage: p = SGMLParser(); p.feed(data); ...; p.close(). ++# The dtd is defined by deriving a class which defines methods ++# with special names to handle tags: start_foo and end_foo to handle ++# and , respectively, or do_foo to handle by itself. ++# (Tags are converted to lower case for this purpose.) The data ++# between tags is passed to the parser by calling self.handle_data() ++# with some data as argument (the data may be split up in arbitrary ++# chunks). Entity references are passed by calling ++# self.handle_entityref() with the entity reference as argument. ++ ++ ++class SGMLParser(_markupbase.ParserBase): ++ # Definition of entities -- derived classes may override ++ entity_or_charref = re.compile( ++ '&(?:' ++ '([a-zA-Z][-.a-zA-Z0-9]*)|#([0-9]+)' ++ ')(;?)' ++ ) ++ ++ def __init__(self, verbose=0): ++ """Initialize and reset this instance.""" ++ self.verbose = verbose ++ self.reset() ++ ++ def reset(self): ++ """Reset this instance. Loses all unprocessed data.""" ++ self.__starttag_text = None ++ self.rawdata = '' ++ self.stack = [] ++ self.lasttag = '???' ++ self.nomoretags = 0 ++ self.literal = 0 ++ _markupbase.ParserBase.reset(self) ++ ++ def setnomoretags(self): ++ """Enter literal mode (CDATA) till EOF. ++ ++ Intended for derived classes only. ++ """ ++ self.nomoretags = self.literal = 1 ++ ++ def setliteral(self, *args): ++ """Enter literal mode (CDATA). ++ ++ Intended for derived classes only. ++ """ ++ self.literal = 1 ++ ++ def feed(self, data): ++ """Feed some data to the parser. ++ ++ Call this as often as you want, with as little or as much text ++ as you want (may include '\n'). (This just saves the text, ++ all the processing is done by goahead().) ++ """ ++ ++ self.rawdata = self.rawdata + data ++ self.goahead(0) ++ ++ def close(self): ++ """Handle the remaining data.""" ++ self.goahead(1) ++ ++ def error(self, message): ++ raise SGMLParseError(message) ++ ++ # Internal -- handle data as far as reasonable. May leave state ++ # and data to be processed by a subsequent call. If 'end' is ++ # true, force handling all data as if followed by EOF marker. ++ def goahead(self, end): ++ rawdata = self.rawdata ++ i = 0 ++ n = len(rawdata) ++ while i < n: ++ if self.nomoretags: ++ self.handle_data(rawdata[i:n]) ++ i = n ++ break ++ match = interesting.search(rawdata, i) ++ if match: ++ j = match.start() ++ else: ++ j = n ++ if i < j: ++ self.handle_data(rawdata[i:j]) ++ i = j ++ if i == n: ++ break ++ if rawdata[i] == '<': ++ if starttagopen.match(rawdata, i): ++ if self.literal: ++ self.handle_data(rawdata[i]) ++ i = i + 1 ++ continue ++ k = self.parse_starttag(i) ++ if k < 0: ++ break ++ i = k ++ continue ++ if rawdata.startswith(" (i + 1): ++ self.handle_data("<") ++ i = i + 1 ++ else: ++ # incomplete ++ break ++ continue ++ if rawdata.startswith("