From 8aba84c1f122f950e2568171fdf3b1d2575ba3b7 Mon Sep 17 00:00:00 2001
From: bra1nwave <bra1nwave@protonmail.com>
Date: Thu, 9 Apr 2020 16:57:53 +0200
Subject: [PATCH] WIP

---
 ...ib-ported-to-python-3-for-feedparser.patch | 590 ++++++++++++++++++
 .../calibre/patches/disable-unrar-test.patch  |  27 -
 srcpkgs/calibre/template                      |  39 +-
 3 files changed, 610 insertions(+), 46 deletions(-)
 create mode 100644 srcpkgs/calibre/patches/add-sgmllib-ported-to-python-3-for-feedparser.patch
 delete mode 100644 srcpkgs/calibre/patches/disable-unrar-test.patch
diff --git a/srcpkgs/calibre/patches/add-sgmllib-ported-to-python-3-for-feedparser.patch b/srcpkgs/calibre/patches/add-sgmllib-ported-to-python-3-for-feedparser.patch
new file mode 100644
index 00000000000..a22a5816922
--- /dev/null
+++ b/srcpkgs/calibre/patches/add-sgmllib-ported-to-python-3-for-feedparser.patch
@@ -0,0 +1,590 @@
+From 2a09a839eb3c0b84d4f5f0e194bd3cb01cb50057 Mon Sep 17 00:00:00 2001
+From: Kovid Goyal <kovid@kovidgoyal.net>
+Date: Mon, 18 Nov 2019 18:38:06 +0530
+Subject: [PATCH] Add sgmllib ported to python 3 for feedparser
+
+---
+ src/sgmllib.py | 574 +++++++++++++++++++++++++++++++++++++++++++++++++
+ 1 file changed, 574 insertions(+)
+ create mode 100644 src/sgmllib.py
+
+diff --git a/src/sgmllib.py b/src/sgmllib.py
+new file mode 100644
+index 0000000000..6422b7603c
+--- /dev/null
++++ src/sgmllib.py
+@@ -0,0 +1,574 @@
++"""A parser for SGML, using the derived class as a static DTD."""
++# Needed for feedparser under python 3 where this module has been removed
++
++# XXX This only supports those SGML features used by HTML.
++
++# XXX There should be a way to distinguish between PCDATA (parsed
++# character data -- the normal case), RCDATA (replaceable character
++# data -- only char and entity references and end tags are special)
++# and CDATA (character data -- only end tags are special).  RCDATA is
++# not supported at all.
++
++import _markupbase
++import re
++
++__all__ = ["SGMLParser", "SGMLParseError"]
++
++# Regular expressions used for parsing
++
++interesting = re.compile('[&<]')
++incomplete = re.compile(
++    '&([a-zA-Z][a-zA-Z0-9]*|#[0-9]*)?|'
++    '<([a-zA-Z][^<>]*|'
++    '/([a-zA-Z][^<>]*)?|'
++    '![^<>]*)?'
++)
++
++entityref = re.compile('&([a-zA-Z][-.a-zA-Z0-9]*)[^a-zA-Z0-9]')
++charref = re.compile('&#([0-9]+)[^0-9]')
++
++starttagopen = re.compile('<[>a-zA-Z]')
++shorttagopen = re.compile('<[a-zA-Z][-.a-zA-Z0-9]*/')
++shorttag = re.compile('<([a-zA-Z][-.a-zA-Z0-9]*)/([^/]*)/')
++piclose = re.compile('>')
++endbracket = re.compile('[<>]')
++tagfind = re.compile('[a-zA-Z][-_.a-zA-Z0-9]*')
++attrfind = re.compile(
++    r'\s*([a-zA-Z_][-:.a-zA-Z_0-9]*)(\s*=\s*'
++    r'(\'[^\']*\'|"[^"]*"|[][\-a-zA-Z0-9./,:;+*%?!&$\(\)_#=~\'"@]*))?'
++)
++
++
++class SGMLParseError(RuntimeError):
++    """Exception raised for all parse errors."""
++    pass
++
++
++# SGML parser base class -- find tags and call handler functions.
++# Usage: p = SGMLParser(); p.feed(data); ...; p.close().
++# The dtd is defined by deriving a class which defines methods
++# with special names to handle tags: start_foo and end_foo to handle
++# <foo> and </foo>, respectively, or do_foo to handle <foo> by itself.
++# (Tags are converted to lower case for this purpose.)  The data
++# between tags is passed to the parser by calling self.handle_data()
++# with some data as argument (the data may be split up in arbitrary
++# chunks).  Entity references are passed by calling
++# self.handle_entityref() with the entity reference as argument.
++
++
++class SGMLParser(_markupbase.ParserBase):
++    # Definition of entities -- derived classes may override
++    entity_or_charref = re.compile(
++        '&(?:'
++        '([a-zA-Z][-.a-zA-Z0-9]*)|#([0-9]+)'
++        ')(;?)'
++    )
++
++    def __init__(self, verbose=0):
++        """Initialize and reset this instance."""
++        self.verbose = verbose
++        self.reset()
++
++    def reset(self):
++        """Reset this instance. Loses all unprocessed data."""
++        self.__starttag_text = None
++        self.rawdata = ''
++        self.stack = []
++        self.lasttag = '???'
++        self.nomoretags = 0
++        self.literal = 0
++        _markupbase.ParserBase.reset(self)
++
++    def setnomoretags(self):
++        """Enter literal mode (CDATA) till EOF.
++
++        Intended for derived classes only.
++        """
++        self.nomoretags = self.literal = 1
++
++    def setliteral(self, *args):
++        """Enter literal mode (CDATA).
++
++        Intended for derived classes only.
++        """
++        self.literal = 1
++
++    def feed(self, data):
++        """Feed some data to the parser.
++
++        Call this as often as you want, with as little or as much text
++        as you want (may include '\n').  (This just saves the text,
++        all the processing is done by goahead().)
++        """
++
++        self.rawdata = self.rawdata + data
++        self.goahead(0)
++
++    def close(self):
++        """Handle the remaining data."""
++        self.goahead(1)
++
++    def error(self, message):
++        raise SGMLParseError(message)
++
++    # Internal -- handle data as far as reasonable.  May leave state
++    # and data to be processed by a subsequent call.  If 'end' is
++    # true, force handling all data as if followed by EOF marker.
++    def goahead(self, end):
++        rawdata = self.rawdata
++        i = 0
++        n = len(rawdata)
++        while i < n:
++            if self.nomoretags:
++                self.handle_data(rawdata[i:n])
++                i = n
++                break
++            match = interesting.search(rawdata, i)
++            if match:
++                j = match.start()
++            else:
++                j = n
++            if i < j:
++                self.handle_data(rawdata[i:j])
++            i = j
++            if i == n:
++                break
++            if rawdata[i] == '<':
++                if starttagopen.match(rawdata, i):
++                    if self.literal:
++                        self.handle_data(rawdata[i])
++                        i = i + 1
++                        continue
++                    k = self.parse_starttag(i)
++                    if k < 0:
++                        break
++                    i = k
++                    continue
++                if rawdata.startswith("</", i):
++                    k = self.parse_endtag(i)
++                    if k < 0:
++                        break
++                    i = k
++                    self.literal = 0
++                    continue
++                if self.literal:
++                    if n > (i + 1):
++                        self.handle_data("<")
++                        i = i + 1
++                    else:
++                        # incomplete
++                        break
++                    continue
++                if rawdata.startswith("<!--", i):
++                    # Strictly speaking, a comment is --.*--
++                    # within a declaration tag <!...>.
++                    # This should be removed,
++                    # and comments handled only in parse_declaration.
++                    k = self.parse_comment(i)
++                    if k < 0:
++                        break
++                    i = k
++                    continue
++                if rawdata.startswith("<?", i):
++                    k = self.parse_pi(i)
++                    if k < 0:
++                        break
++                    i = i + k
++                    continue
++                if rawdata.startswith("<!", i):
++                    # This is some sort of declaration; in "HTML as
++                    # deployed," this should only be the document type
++                    # declaration ("<!DOCTYPE html...>").
++                    k = self.parse_declaration(i)
++                    if k < 0:
++                        break
++                    i = k
++                    continue
++            elif rawdata[i] == '&':
++                if self.literal:
++                    self.handle_data(rawdata[i])
++                    i = i + 1
++                    continue
++                match = charref.match(rawdata, i)
++                if match:
++                    name = match.group(1)
++                    self.handle_charref(name)
++                    i = match.end(0)
++                    if rawdata[i - 1] != ';':
++                        i = i - 1
++                    continue
++                match = entityref.match(rawdata, i)
++                if match:
++                    name = match.group(1)
++                    self.handle_entityref(name)
++                    i = match.end(0)
++                    if rawdata[i - 1] != ';':
++                        i = i - 1
++                    continue
++            else:
++                self.error('neither < nor & ??')
++            # We get here only if incomplete matches but
++            # nothing else
++            match = incomplete.match(rawdata, i)
++            if not match:
++                self.handle_data(rawdata[i])
++                i = i + 1
++                continue
++            j = match.end(0)
++            if j == n:
++                break  # Really incomplete
++            self.handle_data(rawdata[i:j])
++            i = j
++        # end while
++        if end and i < n:
++            self.handle_data(rawdata[i:n])
++            i = n
++        self.rawdata = rawdata[i:]
++        # XXX if end: check for empty stack
++
++    # Extensions for the DOCTYPE scanner:
++    _decl_otherchars = '='
++
++    # Internal -- parse processing instr, return length or -1 if not terminated
++    def parse_pi(self, i):
++        rawdata = self.rawdata
++        if rawdata[i:i + 2] != '<?':
++            self.error('unexpected call to parse_pi()')
++        match = piclose.search(rawdata, i + 2)
++        if not match:
++            return -1
++        j = match.start(0)
++        self.handle_pi(rawdata[i + 2:j])
++        j = match.end(0)
++        return j - i
++
++    def get_starttag_text(self):
++        return self.__starttag_text
++
++    # Internal -- handle starttag, return length or -1 if not terminated
++    def parse_starttag(self, i):
++        self.__starttag_text = None
++        start_pos = i
++        rawdata = self.rawdata
++        if shorttagopen.match(rawdata, i):
++            # SGML shorthand: <tag/data/ == <tag>data</tag>
++            # XXX Can data contain &... (entity or char refs)?
++            # XXX Can data contain < or > (tag characters)?
++            # XXX Can there be whitespace before the first /?
++            match = shorttag.match(rawdata, i)
++            if not match:
++                return -1
++            tag, data = match.group(1, 2)
++            self.__starttag_text = '<%s/' % tag
++            tag = tag.lower()
++            k = match.end(0)
++            self.finish_shorttag(tag, data)
++            self.__starttag_text = rawdata[start_pos:match.end(1) + 1]
++            return k
++        # XXX The following should skip matching quotes (' or ")
++        # As a shortcut way to exit, this isn't so bad, but shouldn't
++        # be used to locate the actual end of the start tag since the
++        # < or > characters may be embedded in an attribute value.
++        match = endbracket.search(rawdata, i + 1)
++        if not match:
++            return -1
++        j = match.start(0)
++        # Now parse the data between i+1 and j into a tag and attrs
++        attrs = []
++        if rawdata[i:i + 2] == '<>':
++            # SGML shorthand: <> == <last open tag seen>
++            k = j
++            tag = self.lasttag
++        else:
++            match = tagfind.match(rawdata, i + 1)
++            if not match:
++                self.error('unexpected call to parse_starttag')
++            k = match.end(0)
++            tag = rawdata[i + 1:k].lower()
++            self.lasttag = tag
++        while k < j:
++            match = attrfind.match(rawdata, k)
++            if not match:
++                break
++            attrname, rest, attrvalue = match.group(1, 2, 3)
++            if not rest:
++                attrvalue = attrname
++            else:
++                if (
++                    attrvalue[:1] == "'" == attrvalue[-1:] or
++                    attrvalue[:1] == '"' == attrvalue[-1:]
++                ):
++                    # strip quotes
++                    attrvalue = attrvalue[1:-1]
++                attrvalue = self.entity_or_charref.sub(self._convert_ref, attrvalue)
++            attrs.append((attrname.lower(), attrvalue))
++            k = match.end(0)
++        if rawdata[j] == '>':
++            j = j + 1
++        self.__starttag_text = rawdata[start_pos:j]
++        self.finish_starttag(tag, attrs)
++        return j
++
++    # Internal -- convert entity or character reference
++    def _convert_ref(self, match):
++        if match.group(2):
++            return self.convert_charref(match.group(2)) or \
++                '&#%s%s' % match.groups()[1:]
++        elif match.group(3):
++            return self.convert_entityref(match.group(1)) or \
++                '&%s;' % match.group(1)
++        else:
++            return '&%s' % match.group(1)
++
++    # Internal -- parse endtag
++    def parse_endtag(self, i):
++        rawdata = self.rawdata
++        match = endbracket.search(rawdata, i + 1)
++        if not match:
++            return -1
++        j = match.start(0)
++        tag = rawdata[i + 2:j].strip().lower()
++        if rawdata[j] == '>':
++            j = j + 1
++        self.finish_endtag(tag)
++        return j
++
++    # Internal -- finish parsing of <tag/data/ (same as <tag>data</tag>)
++    def finish_shorttag(self, tag, data):
++        self.finish_starttag(tag, [])
++        self.handle_data(data)
++        self.finish_endtag(tag)
++
++    # Internal -- finish processing of start tag
++    # Return -1 for unknown tag, 0 for open-only tag, 1 for balanced tag
++    def finish_starttag(self, tag, attrs):
++        try:
++            method = getattr(self, 'start_' + tag)
++        except AttributeError:
++            try:
++                method = getattr(self, 'do_' + tag)
++            except AttributeError:
++                self.unknown_starttag(tag, attrs)
++                return -1
++            else:
++                self.handle_starttag(tag, method, attrs)
++                return 0
++        else:
++            self.stack.append(tag)
++            self.handle_starttag(tag, method, attrs)
++            return 1
++
++    # Internal -- finish processing of end tag
++    def finish_endtag(self, tag):
++        if not tag:
++            found = len(self.stack) - 1
++            if found < 0:
++                self.unknown_endtag(tag)
++                return
++        else:
++            if tag not in self.stack:
++                try:
++                    method = getattr(self, 'end_' + tag)
++                except AttributeError:
++                    self.unknown_endtag(tag)
++                else:
++                    self.report_unbalanced(tag)
++                return
++            found = len(self.stack)
++            for i in range(found):
++                if self.stack[i] == tag:
++                    found = i
++        while len(self.stack) > found:
++            tag = self.stack[-1]
++            try:
++                method = getattr(self, 'end_' + tag)
++            except AttributeError:
++                method = None
++            if method:
++                self.handle_endtag(tag, method)
++            else:
++                self.unknown_endtag(tag)
++            del self.stack[-1]
++
++    # Overridable -- handle start tag
++    def handle_starttag(self, tag, method, attrs):
++        method(attrs)
++
++    # Overridable -- handle end tag
++    def handle_endtag(self, tag, method):
++        method()
++
++    # Example -- report an unbalanced </...> tag.
++    def report_unbalanced(self, tag):
++        if self.verbose:
++            print('*** Unbalanced </' + tag + '>')
++            print('*** Stack:', self.stack)
++
++    def convert_charref(self, name):
++        """Convert character reference, may be overridden."""
++        try:
++            n = int(name)
++        except ValueError:
++            return
++        if not 0 <= n <= 127:
++            return
++        return self.convert_codepoint(n)
++
++    def convert_codepoint(self, codepoint):
++        return chr(codepoint)
++
++    def handle_charref(self, name):
++        """Handle character reference, no need to override."""
++        replacement = self.convert_charref(name)
++        if replacement is None:
++            self.unknown_charref(name)
++        else:
++            self.handle_data(replacement)
++
++    # Definition of entities -- derived classes may override
++    entitydefs = \
++            {'lt': '<', 'gt': '>', 'amp': '&', 'quot': '"', 'apos': '\''}
++
++    def convert_entityref(self, name):
++        """Convert entity references.
++
++        As an alternative to overriding this method; one can tailor the
++        results by setting up the self.entitydefs mapping appropriately.
++        """
++        table = self.entitydefs
++        if name in table:
++            return table[name]
++        else:
++            return
++
++    def handle_entityref(self, name):
++        """Handle entity references, no need to override."""
++        replacement = self.convert_entityref(name)
++        if replacement is None:
++            self.unknown_entityref(name)
++        else:
++            self.handle_data(replacement)
++
++    # Example -- handle data, should be overridden
++    def handle_data(self, data):
++        pass
++
++    # Example -- handle comment, could be overridden
++    def handle_comment(self, data):
++        pass
++
++    # Example -- handle declaration, could be overridden
++    def handle_decl(self, decl):
++        pass
++
++    # Example -- handle processing instruction, could be overridden
++    def handle_pi(self, data):
++        pass
++
++    # To be overridden -- handlers for unknown objects
++    def unknown_starttag(self, tag, attrs):
++        pass
++
++    def unknown_endtag(self, tag):
++        pass
++
++    def unknown_charref(self, ref):
++        pass
++
++    def unknown_entityref(self, ref):
++        pass
++
++
++class TestSGMLParser(SGMLParser):
++
++    def __init__(self, verbose=0):
++        self.testdata = ""
++        SGMLParser.__init__(self, verbose)
++
++    def handle_data(self, data):
++        self.testdata = self.testdata + data
++        if len(repr(self.testdata)) >= 70:
++            self.flush()
++
++    def flush(self):
++        data = self.testdata
++        if data:
++            self.testdata = ""
++            print('data:', repr(data))
++
++    def handle_comment(self, data):
++        self.flush()
++        r = repr(data)
++        if len(r) > 68:
++            r = r[:32] + '...' + r[-32:]
++        print('comment:', r)
++
++    def unknown_starttag(self, tag, attrs):
++        self.flush()
++        if not attrs:
++            print('start tag: <' + tag + '>')
++        else:
++            print('start tag: <' + tag, end=' ')
++            for name, value in attrs:
++                print(name + '=' + '"' + value + '"', end=' ')
++            print('>')
++
++    def unknown_endtag(self, tag):
++        self.flush()
++        print('end tag: </' + tag + '>')
++
++    def unknown_entityref(self, ref):
++        self.flush()
++        print('*** unknown entity ref: &' + ref + ';')
++
++    def unknown_charref(self, ref):
++        self.flush()
++        print('*** unknown char ref: &#' + ref + ';')
++
++    def unknown_decl(self, data):
++        self.flush()
++        print('*** unknown decl: [' + data + ']')
++
++    def close(self):
++        SGMLParser.close(self)
++        self.flush()
++
++
++def test(args=None):
++    import sys
++
++    if args is None:
++        args = sys.argv[1:]
++
++    if args and args[0] == '-s':
++        args = args[1:]
++        klass = SGMLParser
++    else:
++        klass = TestSGMLParser
++
++    if args:
++        file = args[0]
++    else:
++        file = 'test.html'
++
++    if file == '-':
++        f = sys.stdin
++    else:
++        try:
++            f = open(file, 'r')
++        except IOError as msg:
++            print(file, ":", msg)
++            sys.exit(1)
++
++    data = f.read()
++    if f is not sys.stdin:
++        f.close()
++
++    x = klass()
++    for c in data:
++        x.feed(c)
++    x.close()
++
++
++if __name__ == '__main__':
++    test()
diff --git a/srcpkgs/calibre/patches/disable-unrar-test.patch b/srcpkgs/calibre/patches/disable-unrar-test.patch
deleted file mode 100644
index bcbe25335a3..00000000000
--- a/srcpkgs/calibre/patches/disable-unrar-test.patch
+++ /dev/null
@@ -1,27 +0,0 @@
-Source: Debian
-
----
- src/calibre/test_build.py |    5 +++++
- 1 file changed, 5 insertions(+)
-
---- src/calibre/test_build.py
-+++ src/calibre/test_build.py
-@@ -17,6 +17,10 @@ from polyglot.builtins import iteritems,
- 
- is_ci = os.environ.get('CI', '').lower() == 'true'
- 
-+try:
-+    import unrardll
-+except:
-+    unrardll = None
- 
- class BuildTest(unittest.TestCase):
- 
-@@ -265,6 +269,7 @@ class BuildTest(unittest.TestCase):
-         from calibre.gui2.win_file_dialogs import test
-         test()
- 
-+    @unittest.skipUnless(unrardll, 'Module unrardll is missing')
-     def test_unrar(self):
-         from calibre.utils.unrar import test_basic
-         test_basic()
diff --git a/srcpkgs/calibre/template b/srcpkgs/calibre/template
index f9ae0a5923b..b02fac7df75 100644
--- a/srcpkgs/calibre/template
+++ b/srcpkgs/calibre/template
@@ -1,25 +1,26 @@
 # Template file for 'calibre'
 pkgname=calibre
 version=4.13.0
-revision=3
-build_style=python2-module
+revision=4
+build_style=python3-module
 pycompile_dirs="/usr/lib/calibre/"
-hostmakedepends="pkg-config python-BeautifulSoup4 python-Pillow python-PyQt5-svg
- python-PyQt5-webengine python-apsw python-css-parser python-dateutil
- python-devel python-html5-parser python-msgpack python-regex qt5-qmake
+hostmakedepends="pkg-config python3-BeautifulSoup4 python3-Pillow python3-PyQt5-svg
+ python3-PyQt5-webengine python3-apsw python3-css-parser python3-dateutil
+ python3-devel python3-html5-parser python3-msgpack python3-regex qt5-qmake
  xdg-utils rapydscript-ng"
 makedepends="glib-devel libchmlib-devel libinput-devel libmagick-devel libmtp-devel
  libpodofo-devel libwmf-devel libxkbcommon-devel python-PyQt5-devel
  qt5-devel sqlite-devel tslib-devel hunspell-devel hyphen-devel"
-depends="desktop-file-utils optipng poppler-utils python-BeautifulSoup4
- python-Markdown python-Pillow python-PyQt5-svg
- python-PyQt5-webengine python-PyQt5-webchannel python-Pygments python-apsw
- python-css-parser python-cssselect python-dateutil python-dbus
- python-dnspython python-feedparser python-html5-parser python-mechanize
- python-msgpack python-netifaces python-psutil python-regex python-html2text
- qt5-webengine"
+depends="desktop-file-utils optipng poppler-utils python3-BeautifulSoup4
+ python3-Markdown python3-Pillow python3-PyQt5-svg
+ python3-PyQt5-webengine python3-PyQt5-webchannel python3-Pygments python3-apsw
+ python3-css-parser python3-cssselect python3-dateutil python3-dbus
+ python3-dnspython python3-feedparser python3-html5-parser python3-mechanize
+ python3-msgpack python3-netifaces python3-psutil python3-regex python3-html2text
+ qt5-webengine python3-zeroconf"
+checkdepends="$depends libjpeg-turbo-tools"
 short_desc="Ebook management application"
-maintainer="bra1nwave <bra1nwave@protonmail.com>"
+maintainer="Orphaned <orphan@voidlinux.org>"
 license="GPL-3.0-only"
 homepage="https://calibre-ebook.com"
 changelog="https://raw.githubusercontent.com/kovidgoyal/calibre/master/Changelog.yaml"
@@ -39,20 +40,20 @@ do_configure() {
 }
 
 do_build() {
-	python2 setup.py build
-	python2 setup.py gui
-	python2 setup.py rapydscript
+	CALIBRE_PY3_PORT=1 python3 setup.py build
+	CALIBRE_PY3_PORT=1 python3 setup.py gui
+	CALIBRE_PY3_PORT=1 python3 setup.py rapydscript
 }
 
-# requires an X11 environment...
 do_check() {
-	:
+	CALIBRE_PY3_PORT=1 python3 setup.py test \
+		--exclude-test-name unrar
 }
 
 do_install() {
 	vmkdir usr/share/zsh/site-functions
 
-	python2 setup.py \
+	CALIBRE_PY3_PORT=1 python3 setup.py \
 		install --prefix=/usr --staging-root=${DESTDIR}/usr
 
 	for m in man-pages/man1/*.1; do