From 54918e601b60e8a00e4f2fb27c23381609f718b1 Mon Sep 17 00:00:00 2001 From: chrysos349 Date: Tue, 19 Sep 2023 04:03:59 +0300 Subject: [PATCH 1/4] leptonica: update to 1.83.1 --- common/shlibs | 2 +- .../patches/fix-flaky-test-on-i686.patch | 70 ------------------- srcpkgs/leptonica/template | 23 ++++-- 3 files changed, 19 insertions(+), 76 deletions(-) delete mode 100644 srcpkgs/leptonica/patches/fix-flaky-test-on-i686.patch diff --git a/common/shlibs b/common/shlibs index c9d59ef3b97ca..16ce591aa3592 100644 --- a/common/shlibs +++ b/common/shlibs @@ -2294,7 +2294,7 @@ libOkteta3Gui.so.0 okteta-0.26.0_1 libhttp_parser.so.2.9 http-parser-2.9.0_1 libmaa.so.4 libmaa-1.4.2_1 libcodeblocks.so.0 codeblocks-13.12_1 -liblept.so.5 leptonica-1.73_1 +libleptonica.so.6 leptonica-1.83.1_1 libtesseract.so.4 tesseract-ocr-4.0.0_1 libffmpegthumbnailer.so.4 ffmpegthumbnailer-2.0.10_1 libopenraw.so.7 libopenraw-0.1.0_1 diff --git a/srcpkgs/leptonica/patches/fix-flaky-test-on-i686.patch b/srcpkgs/leptonica/patches/fix-flaky-test-on-i686.patch deleted file mode 100644 index bec1a2482f414..0000000000000 --- a/srcpkgs/leptonica/patches/fix-flaky-test-on-i686.patch +++ /dev/null @@ -1,70 +0,0 @@ -From ea2bb8c9cf61d3eba2589cfaac05f59a33b4110d Mon Sep 17 00:00:00 2001 -From: danblooomberg -Date: Sun, 14 Nov 2021 14:52:24 -0800 -Subject: [PATCH] Fix flaky hash_reg test on i686 * The sets that are generated - from *SelectRange() functions can depend on the platform, resulting in - intersection sizes that differ by 1. * So, loosen the comparison to allow a - difference of 1. - ---- - prog/hash_reg.c | 12 ++++++------ - 1 file changed, 6 insertions(+), 6 deletions(-) - -diff --git a/prog/hash_reg.c b/prog/hash_reg.c -index 8b408d6d..3414ba90 100644 ---- a/prog/hash_reg.c -+++ b/prog/hash_reg.c -@@ -100,7 +100,7 @@ L_REGPARAMS *rp; - sarrayIntersectionByAset(sa1, sa2, &sa3); - c1 = sarrayGetCount(sa3); - sarrayDestroy(&sa3); -- regTestCompareValues(rp, string_intersection, c1, 0); /* 2 */ -+ regTestCompareValues(rp, string_intersection, c1, 1); /* 2 */ - if (rp->display) lept_stderr(" aset: intersection size = %d\n", c1); - sarrayUnionByAset(sa1, sa2, &sa3); - c1 = sarrayGetCount(sa3); -@@ -123,7 +123,7 @@ L_REGPARAMS *rp; - sarrayIntersectionByHmap(sa1, sa2, &sa3); - c1 = sarrayGetCount(sa3); - sarrayDestroy(&sa3); -- regTestCompareValues(rp, string_intersection, c1, 0); /* 6 */ -+ regTestCompareValues(rp, string_intersection, c1, 1); /* 6 */ - if (rp->display) lept_stderr(" hmap: intersection size = %d\n", c1); - sarrayUnionByHmap(sa1, sa2, &sa3); - c1 = sarrayGetCount(sa3); -@@ -160,7 +160,7 @@ L_REGPARAMS *rp; - ptaIntersectionByAset(pta1, pta2, &pta3); - c1 = ptaGetCount(pta3); - ptaDestroy(&pta3); -- regTestCompareValues(rp, pta_intersection, c1, 0); /* 10 */ -+ regTestCompareValues(rp, pta_intersection, c1, 1); /* 10 */ - if (rp->display) lept_stderr(" aset: intersection size = %d\n", c1); - ptaUnionByAset(pta1, pta2, &pta3); - c1 = ptaGetCount(pta3); -@@ -182,7 +182,7 @@ L_REGPARAMS *rp; - ptaIntersectionByHmap(pta1, pta2, &pta3); - c1 = ptaGetCount(pta3); - ptaDestroy(&pta3); -- regTestCompareValues(rp, pta_intersection, c1, 0); /* 14 */ -+ regTestCompareValues(rp, pta_intersection, c1, 1); /* 14 */ - if (rp->display) lept_stderr(" hmap: intersection size = %d\n", c1); - ptaUnionByHmap(pta1, pta2, &pta3); - c1 = ptaGetCount(pta3); -@@ -220,7 +220,7 @@ L_REGPARAMS *rp; - l_dnaIntersectionByAset(da1, da2, &da3); - c1 = l_dnaGetCount(da3); - l_dnaDestroy(&da3); -- regTestCompareValues(rp, da_intersection, c1, 0); /* 18 */ -+ regTestCompareValues(rp, da_intersection, c1, 1); /* 18 */ - if (rp->display) lept_stderr(" aset: intersection size = %d\n", c1); - l_dnaUnionByAset(da1, da2, &da3); - c1 = l_dnaGetCount(da3); -@@ -242,7 +242,7 @@ L_REGPARAMS *rp; - l_dnaIntersectionByHmap(da1, da2, &da3); - c1 = l_dnaGetCount(da3); - l_dnaDestroy(&da3); -- regTestCompareValues(rp, da_intersection, c1, 0); /* 22 */ -+ regTestCompareValues(rp, da_intersection, c1, 1); /* 22 */ - if (rp->display) lept_stderr(" hmap: intersection size = %d\n", c1); - l_dnaUnionByHmap(da1, da2, &da3); - c1 = l_dnaGetCount(da3); diff --git a/srcpkgs/leptonica/template b/srcpkgs/leptonica/template index 17256b7b157b4..04e8c9997a2f1 100644 --- a/srcpkgs/leptonica/template +++ b/srcpkgs/leptonica/template @@ -1,9 +1,9 @@ # Template file for 'leptonica' pkgname=leptonica -version=1.82.0 -revision=2 +version=1.83.1 +revision=1 build_style=gnu-configure -hostmakedepends="pkg-config" +hostmakedepends="pkg-config automake libtool" makedepends="libopenjpeg2-devel libwebp-devel" checkdepends="which gnuplot" short_desc="Image processing and analysis library" @@ -11,8 +11,17 @@ maintainer="Orphaned " license="BSD-2-Clause" homepage="http://leptonica.org/" changelog="http://leptonica.org/source/version-notes.html" -distfiles="http://leptonica.org/source/${pkgname}-${version}.tar.gz" -checksum=155302ee914668c27b6fe3ca9ff2da63b245f6d62f3061c8f27563774b8ae2d6 +distfiles="https://github.com/DanBloomberg/leptonica/archive/${version}.tar.gz" +checksum=4289d0a4224b614010072253531c0455a33a4d7c7a0017fe7825ed382290c0da + +pre_check() { + # boxa3_reg test fails for x86_64{,-musl} in CI buld + vsed -i prog/Makefile.am -e "s/boxa3_reg//" +} + +pre_configure() { + ./autogen.sh +} post_install() { vdoc moller52.jpg @@ -28,6 +37,7 @@ leptonica-devel_package() { vmove usr/lib/cmake vmove usr/lib/pkgconfig vmove "usr/lib/*.so" + vmove "usr/lib/*.a" vdoc style-guide.txt } } @@ -41,3 +51,6 @@ leptonica-examples_package() { vcopy prog usr/share/leptonica } } + +## add to common/shlibs +## libleptonica.so.6 leptonica-1.83.1_1 From dd157f4bf2484fce6af6381be5247f977334f379 Mon Sep 17 00:00:00 2001 From: chrysos349 Date: Tue, 19 Sep 2023 04:07:50 +0300 Subject: [PATCH 2/4] tesserat-ocr: update to 5.3.2 --- common/shlibs | 2 +- .../{tesseract-ocr-kur => tesseract-ocr-kmr} | 0 srcpkgs/tesseract-ocr-kur_ara | 1 - srcpkgs/tesseract-ocr/files/COPYING | 14 ------ .../tesseract-ocr/patches/disable-neon.patch | 14 ++++++ .../tesseract-ocr/patches/musl-sys-time.patch | 17 +++---- srcpkgs/tesseract-ocr/template | 48 ++++++++----------- 7 files changed, 43 insertions(+), 53 deletions(-) rename srcpkgs/{tesseract-ocr-kur => tesseract-ocr-kmr} (100%) delete mode 120000 srcpkgs/tesseract-ocr-kur_ara delete mode 100644 srcpkgs/tesseract-ocr/files/COPYING create mode 100644 srcpkgs/tesseract-ocr/patches/disable-neon.patch diff --git a/common/shlibs b/common/shlibs index 16ce591aa3592..ea2873e6cd085 100644 --- a/common/shlibs +++ b/common/shlibs @@ -2295,7 +2295,7 @@ libhttp_parser.so.2.9 http-parser-2.9.0_1 libmaa.so.4 libmaa-1.4.2_1 libcodeblocks.so.0 codeblocks-13.12_1 libleptonica.so.6 leptonica-1.83.1_1 -libtesseract.so.4 tesseract-ocr-4.0.0_1 +libtesseract.so.5 tesseract-ocr-5.3.2_1 libffmpegthumbnailer.so.4 ffmpegthumbnailer-2.0.10_1 libopenraw.so.7 libopenraw-0.1.0_1 libopenrawgnome.so.7 libopenraw-0.1.0_1 diff --git a/srcpkgs/tesseract-ocr-kur b/srcpkgs/tesseract-ocr-kmr similarity index 100% rename from srcpkgs/tesseract-ocr-kur rename to srcpkgs/tesseract-ocr-kmr diff --git a/srcpkgs/tesseract-ocr-kur_ara b/srcpkgs/tesseract-ocr-kur_ara deleted file mode 120000 index 79bcf15f05ba5..0000000000000 --- a/srcpkgs/tesseract-ocr-kur_ara +++ /dev/null @@ -1 +0,0 @@ -tesseract-ocr \ No newline at end of file diff --git a/srcpkgs/tesseract-ocr/files/COPYING b/srcpkgs/tesseract-ocr/files/COPYING deleted file mode 100644 index 11e05af425fc8..0000000000000 --- a/srcpkgs/tesseract-ocr/files/COPYING +++ /dev/null @@ -1,14 +0,0 @@ -This repository contains language data for Tesseract Open Source -OCR Engine. All data in the repository are licensed under the Apache -License: - -** Licensed under the Apache License, Version 2.0 (the "License"); -** you may not use this file except in compliance with the License. -** You may obtain a copy of the License at -** http://www.apache.org/licenses/LICENSE-2.0 -** Unless required by applicable law or agreed to in writing, software -** distributed under the License is distributed on an "AS IS" BASIS, -** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -** See the License for the specific language governing permissions and -** limitations under the License. - diff --git a/srcpkgs/tesseract-ocr/patches/disable-neon.patch b/srcpkgs/tesseract-ocr/patches/disable-neon.patch new file mode 100644 index 0000000000000..d491ef1e47b81 --- /dev/null +++ b/srcpkgs/tesseract-ocr/patches/disable-neon.patch @@ -0,0 +1,14 @@ +--- a/configure.ac ++++ b/configure.ac +@@ -177,6 +177,11 @@ + AC_DEFINE([HAVE_NEON], [1], [Enable NEON instructions]) + ;; + ++ arm|armv7l) ++ ++ AC_MSG_WARN([No compiler options for $host_cpu]) ++ ;; ++ + arm*) + + AX_CHECK_COMPILE_FLAG([-mfpu=neon], [neon=true], [neon=false], [$WERROR]) diff --git a/srcpkgs/tesseract-ocr/patches/musl-sys-time.patch b/srcpkgs/tesseract-ocr/patches/musl-sys-time.patch index 9c6337d188639..5c75864248fe8 100644 --- a/srcpkgs/tesseract-ocr/patches/musl-sys-time.patch +++ b/srcpkgs/tesseract-ocr/patches/musl-sys-time.patch @@ -1,12 +1,13 @@ ---- a/src/ccutil/ocrclass.h 2019-07-07 14:34:08.000000000 +0200 -+++ b/src/ccutil/ocrclass.h 2019-07-08 10:47:15.347415888 +0200 -@@ -31,6 +31,9 @@ - #ifdef _WIN32 - #include // for timeval - #endif +--- a/include/tesseract/ocrclass.h ++++ b/include/tesseract/ocrclass.h +@@ -29,6 +29,10 @@ + + #include + #include +#ifndef __GLIBC__ +#include +#endif ++ + + namespace tesseract { - /********************************************************************** - * EANYCODE_CHAR diff --git a/srcpkgs/tesseract-ocr/template b/srcpkgs/tesseract-ocr/template index de6df3a768d31..10e80e21f3d27 100644 --- a/srcpkgs/tesseract-ocr/template +++ b/srcpkgs/tesseract-ocr/template @@ -1,14 +1,13 @@ # Template file for 'tesseract-ocr' pkgname=tesseract-ocr -version=4.1.1 -revision=9 -_tessdataver=4.0.0 +version=5.3.2 +revision=1 +_tessdataver=4.1.0 create_wrksrc=yes build_style=gnu-configure configure_args="LIBLEPT_HEADERSDIR=${XBPS_CROSS_BASE}/usr/include $(vopt_enable openmp)" -make_build_args="all training" hostmakedepends="automake libtool pkg-config leptonica libxslt asciidoc" -makedepends="cairo-devel pango-devel leptonica-devel $(vopt_if openmp libgomp-devel) icu-devel" +makedepends="cairo-devel pango-devel leptonica-devel $(vopt_if openmp libgomp-devel) icu-devel libarchive-devel libcurl-devel" short_desc="Tesseract Open Source OCR engine" maintainer="Orphaned " license="Apache-2.0" @@ -16,8 +15,8 @@ homepage="https://github.com/tesseract-ocr/tesseract" distfiles=" https://github.com/tesseract-ocr/tesseract/archive/${version}.tar.gz>${pkgname}-${version}.tar.gz https://github.com/tesseract-ocr/tessdata/archive/${_tessdataver}.tar.gz>tessdata-${_tessdataver}.tar.gz" -checksum="2a66ff0d8595bff8f04032165e6c936389b1e5727c3ce5a27b3e059d218db1cb - 38c637d3a1763f6c3d32e8f1d979f045668676ec5feb8ee1869ee77cedd31b08" +checksum="b99d30fed47360d7168c3e25d194a7416ceb1d9e4b232c7f121cc5f77084d3e7 + 990fffb9b7a9b52dc9a2d053a9ef6852ca2b72bd8dfb22988b0b990a700fd3c7" build_options="openmp" build_options_default="openmp" @@ -46,8 +45,8 @@ pkg_lang() { post_extract() { mv tesseract-${version}/* . + rm -rf tessdata-${_tessdataver}/{tessconfigs,configs,pdf.ttf} mv tessdata-${_tessdataver}/* ${wrksrc}/tessdata - rmdir tessdata-${_tessdataver} } pre_configure() { NOCONFIGURE=1 ./autogen.sh @@ -55,6 +54,11 @@ pre_configure() { do_check() { : # submodule not in tarball } +do_build() { + # fails to build with make_build_args="all training" + make ${makejobs} all + make ${makejobs} training +} post_install() { local lang # Rename binary to avoid conflict with tesseract package @@ -62,7 +66,6 @@ post_install() { mv ${DESTDIR}/usr/share/man/man1/tesseract{,-ocr}.1 vdoc ChangeLog vdoc README.md - vlicense ${FILESDIR}/COPYING LICENSE-tessdata # Move the pseudo languges "equ" (math / equation detection) and # "osd" (orientation and script detection) to the main package for lang in equ osd; do @@ -79,13 +82,6 @@ tesseract-ocr-tools_package() { vmkdir usr/share/tesseract vmkdir usr/share/man/man1 vmkdir usr/share/man/man5 - # Copy shell scripts - for f in language-specific.sh tesstrain.sh tesstrain_utils.sh; do - if [ -e ${wrksrc}/training/${f} ]; then - cp -a ${wrksrc}/training/${f} \ - ${PKGDESTDIR}/usr/share/tesseract - fi - done # Move tool manual pages for f in ambiguous_words cntraining combine_tessdata \ dawg2wordlist mftraining shapeclustering unicharambigs \ @@ -99,7 +95,8 @@ tesseract-ocr-tools_package() { } } tesseract-ocr-devel_package() { - depends="${sourcepkg}>=${version}_${revision}" + depends="${sourcepkg}>=${version}_${revision} leptonica-devel + libarchive-devel libcurl-devel" short_desc+=" - development files" pkg_install() { vmove usr/include/tesseract @@ -129,7 +126,7 @@ tesseract-ocr-all_package() { for lang in afr amh ara asm aze aze_cyrl bel ben bod bos bre bul cat ceb \ ces chi_sim chi_tra chr cos cym dan deu div dzo ell eng enm epo est eus fao \ fas fil fin fra frk frm fry gla gle glg grc guj hat heb hin hrv hun hye iku ind isl ita \ - ita_old jav jpn kan kat kat_old kaz khm kir kor kur kur_ara lao lat lav lit ltz mal mar \ + ita_old jav jpn kan kat kat_old kaz khm kir kmr kor lao lat lav lit ltz mal mar \ mkd mlt mon mri msa mya nep nld nor oci ori pan pol por que pus ron rus san sin slk slv \ snd spa spa_old sqi srp srp_latn sun swa swe syr tam tat tel tgk tgl tha tir ton tur \ uig ukr urd uzb uzb_cyrl vie yid yor \ @@ -576,23 +573,16 @@ tesseract-ocr-kir_package() { $(pkg_lang ${pkgname#tesseract-ocr-}) } } -tesseract-ocr-kor_package() { - depends="${sourcepkg}>=${version}_${revision}" - short_desc+=" - Korean language data" - pkg_install() { - $(pkg_lang ${pkgname#tesseract-ocr-}) - } -} -tesseract-ocr-kur_package() { +tesseract-ocr-kmr_package() { depends="${sourcepkg}>=${version}_${revision}" - short_desc+=" - Kurdish language data" + short_desc+=" - Kurmanji (Kurdish - Latin Script) language data" pkg_install() { $(pkg_lang ${pkgname#tesseract-ocr-}) } } -tesseract-ocr-kur_ara_package() { +tesseract-ocr-kor_package() { depends="${sourcepkg}>=${version}_${revision}" - short_desc+=" - Kurdish (Arabic) language data" + short_desc+=" - Korean language data" pkg_install() { $(pkg_lang ${pkgname#tesseract-ocr-}) } From 38d40c69873f9b31eb65938c9961f71f63687cf4 Mon Sep 17 00:00:00 2001 From: chrysos349 Date: Tue, 19 Sep 2023 04:09:24 +0300 Subject: [PATCH 3/4] arcan: revbump for tesseract-5.3.2 --- srcpkgs/arcan/template | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/srcpkgs/arcan/template b/srcpkgs/arcan/template index efd0afe10576d..8d1c60bf0ef54 100644 --- a/srcpkgs/arcan/template +++ b/srcpkgs/arcan/template @@ -2,7 +2,7 @@ # !! keep synced with: acfgfs aclip aloadimage pkgname=arcan version=0.6.2.1 -revision=1 +revision=2 create_wrksrc=yes build_wrksrc=arcan/src build_style=cmake @@ -17,7 +17,7 @@ makedepends="MesaLib-devel ffmpeg-devel file-devel freetype-devel liblzma-devel vlc-devel SDL2-devel xcb-util-devel xcb-util-wm-devel $(vopt_if tts 'libespeak-ng-devel') $(vopt_if luajit 'LuaJIT-devel' 'lua51-devel') - $(vopt_if tesseract 'tesseract-ocr-devel leptonica-devel') + $(vopt_if tesseract 'tesseract-ocr-devel') $(vopt_if wayland 'wayland-devel wayland-protocols libxcb-devel xcb-util-wm-devel') " short_desc="Combined display server, multimedia framework and game engine" @@ -32,6 +32,12 @@ checksum="7bf083412bc61555472877313c13116431a0a36fccbf142f97559db43b4a1475 export CMAKE_GENERATOR="Unix Makefiles" +case "$XBPS_TARGET_MACHINE" in + i686*) + configure_args+=" -DSSE_42_DETECTED_EXITCODE=0" + ;; +esac + replaces="arcan-wayland>=0" build_options="luajit tesseract tts wayland" From b4bf71c6317302ee232d2d359272135acb56e66b Mon Sep 17 00:00:00 2001 From: chrysos349 Date: Tue, 19 Sep 2023 04:11:27 +0300 Subject: [PATCH 4/4] ccextractor: update to 0.94, build for tesseract-5.3.2 --- .../fix-autoconf-tesseract-detection.patch | 12 ++ srcpkgs/ccextractor/patches/fix-ocr-c.patch | 157 ++++++++++++++++++ srcpkgs/ccextractor/template | 21 ++- 3 files changed, 184 insertions(+), 6 deletions(-) create mode 100644 srcpkgs/ccextractor/patches/fix-autoconf-tesseract-detection.patch create mode 100644 srcpkgs/ccextractor/patches/fix-ocr-c.patch diff --git a/srcpkgs/ccextractor/patches/fix-autoconf-tesseract-detection.patch b/srcpkgs/ccextractor/patches/fix-autoconf-tesseract-detection.patch new file mode 100644 index 0000000000000..ef8c01eb4bb04 --- /dev/null +++ b/srcpkgs/ccextractor/patches/fix-autoconf-tesseract-detection.patch @@ -0,0 +1,12 @@ +diff -ru a/linux/configure.ac b/linux/configure.ac +--- a/linux/configure.ac 2021-12-15 20:05:37.000000000 +0300 ++++ b/linux/configure.ac 2023-09-14 05:40:30.267563620 +0300 +@@ -154,7 +154,7 @@ + AM_CONDITIONAL(HARDSUBX_IS_ENABLED, [ test x$hardsubx = xtrue ]) + AM_CONDITIONAL(OCR_IS_ENABLED, [ test x$ocr = xtrue || test x$hardsubx = xtrue ]) + AM_CONDITIONAL(FFMPEG_IS_ENABLED, [ test x$ffmpeg = xtrue ]) +-AM_CONDITIONAL(TESSERACT_PRESENT, [ test ! -z `pkg-config --libs-only-l --silence-errors tesseract` ]) ++AM_CONDITIONAL(TESSERACT_PRESENT, [ test -n "$(pkg-config --libs-only-l --silence-errors tesseract)" ]) + AM_CONDITIONAL(TESSERACT_PRESENT_RPI, [ test -d "/usr/include/tesseract" && test `ls -A /usr/include/tesseract | wc -l` -gt 0 ]) + AM_CONDITIONAL(SYS_IS_LINUX, [ test `uname -s` = "Linux"]) + AM_CONDITIONAL(SYS_IS_MAC, [ test `uname -s` = "Darwin"]) diff --git a/srcpkgs/ccextractor/patches/fix-ocr-c.patch b/srcpkgs/ccextractor/patches/fix-ocr-c.patch new file mode 100644 index 0000000000000..ca33872470971 --- /dev/null +++ b/srcpkgs/ccextractor/patches/fix-ocr-c.patch @@ -0,0 +1,157 @@ +diff -ru a/src/lib_ccx/ocr.c b/src/lib_ccx/ocr.c +--- a/src/lib_ccx/ocr.c 2021-12-15 20:03:45.000000000 +0300 ++++ b/src/lib_ccx/ocr.c 2023-09-13 23:06:42.538986623 +0300 +@@ -1,10 +1,10 @@ + #include +-#include "png.h" ++#include + #include "lib_ccx.h" + #ifdef ENABLE_OCR + #include +-#include "ccx_common_constants.h" + #include ++#include "ccx_common_constants.h" + #include + #include "ccx_encoders_helpers.h" + #include "ocr.h" +@@ -48,7 +48,7 @@ + if (!dir_name) + return -1; + +- //Search for a tessdata folder in the specified directory ++ // Search for a tessdata folder in the specified directory + char *dirname = strdup(dir_name); + dirname = realloc(dirname, strlen(dirname) + strlen("tessdata/") + 1); + strcat(dirname, "tessdata/"); +@@ -97,36 +97,22 @@ + char *probe_tessdata_location(const char *lang) + { + int ret = 0; +- char *tessdata_dir_path = getenv("TESSDATA_PREFIX"); + +- ret = search_language_pack(tessdata_dir_path, lang); +- if (!ret) +- return tessdata_dir_path; +- +- tessdata_dir_path = "./"; +- ret = search_language_pack(tessdata_dir_path, lang); +- if (!ret) +- return tessdata_dir_path; +- +- tessdata_dir_path = "/usr/share/"; +- ret = search_language_pack(tessdata_dir_path, lang); +- if (!ret) +- return tessdata_dir_path; +- +- tessdata_dir_path = "/usr/local/share/"; +- ret = search_language_pack(tessdata_dir_path, lang); +- if (!ret) +- return tessdata_dir_path; +- +- tessdata_dir_path = "/usr/share/tesseract-ocr/"; +- ret = search_language_pack(tessdata_dir_path, lang); +- if (!ret) +- return tessdata_dir_path; +- +- tessdata_dir_path = "/usr/share/tesseract-ocr/4.00/"; +- ret = search_language_pack(tessdata_dir_path, lang); +- if (!ret) +- return tessdata_dir_path; ++ const char *paths[] = { ++ getenv("TESSDATA_PREFIX"), ++ "./", ++ "/usr/share/", ++ "/usr/local/share/", ++ "/usr/share/tesseract-ocr/", ++ "/usr/share/tesseract-ocr/4.00/", ++ "/usr/share/tesseract-ocr/5/", ++ "/usr/share/tesseract/"}; ++ ++ for (int i = 0; i < sizeof(paths) / sizeof(paths[0]); i++) ++ { ++ if (!search_language_pack(paths[i], lang)) ++ return (char *)paths[i]; ++ } + + return NULL; + } +@@ -174,7 +160,7 @@ + char *pars_values = strdup("tess.log"); + + ctx->api = TessBaseAPICreate(); +- if (!strncmp("4.", TessVersion(), 2)) ++ if (!strncmp("4.", TessVersion(), 2) || !strncmp("5.", TessVersion(), 2)) + { + char tess_path[1024]; + snprintf(tess_path, 1024, "%s%s%s", tessdata_path, "/", "tessdata"); +@@ -331,6 +317,11 @@ + } + + BOX *crop_points = ignore_alpha_at_edge(copy->alpha, copy->data, w, h, color_pix, &color_pix_out); ++ ++ l_int32 x, y, _w, _h; ++ ++ boxGetGeometry(crop_points, &x, &y, &_w, &_h); ++ + // Converting image to grayscale for OCR to avoid issues with transparency + cpix_gs = pixConvertRGBToGray(cpix, 0.0, 0.0, 0.0); + +@@ -421,13 +412,13 @@ + memset(mcit, 0, copy->nb_colors * sizeof(uint32_t)); + + /* calculate histogram of image */ +- int firstpixel = copy->data[0]; //TODO: Verify this border pixel assumption holds ++ int firstpixel = copy->data[0]; // TODO: Verify this border pixel assumption holds + for (int i = y1; i <= y2; i++) + { + for (int j = x1; j <= x2; j++) + { +- if (copy->data[(crop_points->y + i) * w + (crop_points->x + j)] != firstpixel) +- histogram[copy->data[(crop_points->y + i) * w + (crop_points->x + j)]]++; ++ if (copy->data[(y + i) * w + (x + j)] != firstpixel) ++ histogram[copy->data[(y + i) * w + (x + j)]]++; + } + } + /* sorted in increasing order of intensity */ +@@ -956,18 +947,18 @@ + dest++; + while (*src != '\0') + { +- //checks if a line has actual content in it before adding it ++ // checks if a line has actual content in it before adding it + if (*src == '\n') + { + char_found = 0; + line_scan = src + 1; +- //multiple blocks of newlines ++ // multiple blocks of newlines + while (*(line_scan) == '\n') + { + line_scan++; + src++; + } +- //empty lines ++ // empty lines + while (*line_scan != '\n' && *line_scan != '\0') + { + if (*line_scan > 32) +@@ -991,8 +982,8 @@ + memcpy(dest, crlf, crlf_length); + dest[crlf_length] = 0; + /* +- *dest++ = '\n'; +- *dest = '\0'; */ ++ *dest++ = '\n'; ++ *dest = '\0'; */ + } + + /** +@@ -1017,7 +1008,7 @@ + return NULL; + else + { +- str = malloc(len + 1 + 10); //Extra space for possible trailing '/n's at the end of tesseract UTF8 text ++ str = malloc(len + 1 + 10); // Extra space for possible trailing '/n's at the end of tesseract UTF8 text + if (!str) + return NULL; + *str = '\0'; diff --git a/srcpkgs/ccextractor/template b/srcpkgs/ccextractor/template index 9abcd82852b27..a57b2fa05f1a6 100644 --- a/srcpkgs/ccextractor/template +++ b/srcpkgs/ccextractor/template @@ -1,23 +1,32 @@ # Template file for 'ccextractor' pkgname=ccextractor -version=0.93 +version=0.94 revision=1 build_wrksrc="linux" build_style=gnu-configure +build_helper=rust configure_args="--enable-ocr --enable-hardsubx" -hostmakedepends="automake pkg-config" -makedepends="leptonica-devel tesseract-ocr-devel ffmpeg-devel" +hostmakedepends="automake pkg-config cargo clang" +makedepends="tesseract-ocr-devel ffmpeg-devel rust-std" short_desc="Extract subtitles from video streams" maintainer="newbluemoon " license="GPL-2.0-or-later" homepage="https://www.ccextractor.org/" changelog="https://raw.githubusercontent.com/CCExtractor/ccextractor/master/docs/CHANGES.TXT" -distfiles="https://github.com/CCExtractor/${pkgname}/archive/v${version}.tar.gz" -checksum=0e66d3e360db1b02a88271af11313ca4c9bbda1b03728e264a44c4c9f77192e3 -CFLAGS="-I${XBPS_CROSS_BASE}/usr/include/tesseract -DPNG_POWERPC_VSX_OPT=0 -fcommon" +distfiles="https://github.com/CCExtractor/ccextractor/releases/download/v${version}/ccextractor_minimal.tar.gz" +checksum=1fe020bf5b45fcfa564958381a7fce5f09d6f3a888de7a80a6745c2f3bfdb324 +CFLAGS="-DPNG_POWERPC_VSX_OPT=0 -fcommon" + +if [ "$CROSS_BUILD" ]; then + hostmakedepends+=" tesseract-ocr" +fi pre_configure() { sed -i -e "s/tesseract --version/tesseract-ocr --version/g" configure.ac + ln -sf libleptonica.so ${XBPS_CROSS_BASE}/usr/lib/liblept.so + if [ "$CROSS_BUILD" ]; then + sed -i configure.ac -e "s/=release/=${RUST_TARGET}\/release/" + fi ./autogen.sh }