From 5b3b31cfcd9b713fc333119d268dbe8832591990 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joshua=20Kr=C3=A4mer?= Date: Wed, 9 Jun 2021 00:44:37 +0200 Subject: [PATCH 1/3] New package: highway-0.16.0 --- srcpkgs/highway/template | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) create mode 100644 srcpkgs/highway/template diff --git a/srcpkgs/highway/template b/srcpkgs/highway/template new file mode 100644 index 000000000000..a5c1bf284c68 --- /dev/null +++ b/srcpkgs/highway/template @@ -0,0 +1,17 @@ +# Template file for 'highway' +pkgname=highway +version=0.16.0 +revision=1 +build_style=cmake +configure_args="-DHWY_SYSTEM_GTEST=ON" +checkdepends="gtest-devel" +short_desc="C++ library providing portable SIMD/vector intrinsics" +maintainer="Joshua Krämer " +license="Apache-2.0" +homepage="https://github.com/google/highway" +distfiles="https://github.com/google/highway/archive/${version}.tar.gz" +checksum=746c9578446be6c5286e8846c5f0d4118c0c1f04219c401abadcb8a5f2051893 + +if [ "$CROSS_BUILD" ]; then + configure_args+=" -DBUILD_TESTING=OFF" +fi From 61e02f367acb97761abf753a980423116a944763 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joshua=20Kr=C3=A4mer?= Date: Wed, 9 Jun 2021 01:52:29 +0200 Subject: [PATCH 2/3] New package: libjxl-0.6.1 --- common/shlibs | 2 + srcpkgs/libjxl-devel | 1 + srcpkgs/libjxl-plugins | 1 + srcpkgs/libjxl-tools | 1 + .../deprecated_highway_functions.patch | 459 ++++++++++++++++++ srcpkgs/libjxl/template | 77 +++ 6 files changed, 541 insertions(+) create mode 120000 srcpkgs/libjxl-devel create mode 120000 srcpkgs/libjxl-plugins create mode 120000 srcpkgs/libjxl-tools create mode 100644 srcpkgs/libjxl/patches/deprecated_highway_functions.patch create mode 100644 srcpkgs/libjxl/template diff --git a/common/shlibs b/common/shlibs index ccc55a1b3772..1446c848ff9f 100644 --- a/common/shlibs +++ b/common/shlibs @@ -4145,3 +4145,5 @@ librz_lang.so.0.3.4 rizin-0.3.4_1 librz_search.so.0.3.4 rizin-0.3.4_1 librz_bin.so.0.3.4 rizin-0.3.4_1 libaravis-0.8.so.0 libaravis-0.8.21_1 +libjxl.so.0.6 libjxl-0.6.1_1 +libjxl_threads.so.0.6 libjxl-0.6.1_1 diff --git a/srcpkgs/libjxl-devel b/srcpkgs/libjxl-devel new file mode 120000 index 000000000000..ddc1abf827d9 --- /dev/null +++ b/srcpkgs/libjxl-devel @@ -0,0 +1 @@ +libjxl \ No newline at end of file diff --git a/srcpkgs/libjxl-plugins b/srcpkgs/libjxl-plugins new file mode 120000 index 000000000000..ddc1abf827d9 --- /dev/null +++ b/srcpkgs/libjxl-plugins @@ -0,0 +1 @@ +libjxl \ No newline at end of file diff --git a/srcpkgs/libjxl-tools b/srcpkgs/libjxl-tools new file mode 120000 index 000000000000..ddc1abf827d9 --- /dev/null +++ b/srcpkgs/libjxl-tools @@ -0,0 +1 @@ +libjxl \ No newline at end of file diff --git a/srcpkgs/libjxl/patches/deprecated_highway_functions.patch b/srcpkgs/libjxl/patches/deprecated_highway_functions.patch new file mode 100644 index 000000000000..a775eb5a7c56 --- /dev/null +++ b/srcpkgs/libjxl/patches/deprecated_highway_functions.patch @@ -0,0 +1,459 @@ +diff --git a/lib/jxl/dec_reconstruct.cc b/lib/jxl/dec_reconstruct.cc +index a1baef4..08279f8 100644 +--- a/lib/jxl/dec_reconstruct.cc ++++ b/lib/jxl/dec_reconstruct.cc +@@ -357,8 +357,8 @@ void DoYCbCrUpsampling(size_t hs, size_t vs, ImageF* plane_in, const Rect& rect, + Store(left, d, out + x); + Store(right, d, out + x + 1); + #else +- Store(InterleaveLower(left, right), d, out + x); +- Store(InterleaveUpper(left, right), d, out + x + Lanes(d)); ++ Store(InterleaveLower(d, left, right), d, out + x); ++ Store(InterleaveUpper(d, left, right), d, out + x + Lanes(d)); + #endif + } + } +diff --git a/lib/jxl/dec_upsample.cc b/lib/jxl/dec_upsample.cc +index 7277e4f..3cb3f36 100644 +--- a/lib/jxl/dec_upsample.cc ++++ b/lib/jxl/dec_upsample.cc +@@ -176,8 +176,8 @@ void Upsample(const ImageF& src, const Rect& src_rect, ImageF* dst, + min = Min(LoadU(df, raw_min_row + sx + fx), min); + max = Max(LoadU(df, raw_max_row + sx + fx), max); + } +- min = MinOfLanes(min); +- max = MaxOfLanes(max); ++ min = MinOfLanes(df, min); ++ max = MaxOfLanes(df, max); + for (size_t lx = 0; lx < N; lx += V) { + StoreU(min, df, min_row + N * sx + lx); + StoreU(max, df, max_row + N * sx + lx); +diff --git a/lib/jxl/enc_ac_strategy.cc b/lib/jxl/enc_ac_strategy.cc +index bc50465..c0ed68f 100644 +--- a/lib/jxl/enc_ac_strategy.cc ++++ b/lib/jxl/enc_ac_strategy.cc +@@ -429,8 +429,8 @@ float EstimateEntropy(const AcStrategy& acs, size_t x, size_t y, + } + entropy_v += nzeros_v * cost1; + +- entropy += GetLane(SumOfLanes(entropy_v)); +- size_t num_nzeros = GetLane(SumOfLanes(nzeros_v)); ++ entropy += GetLane(SumOfLanes(df, entropy_v)); ++ size_t num_nzeros = GetLane(SumOfLanes(df, nzeros_v)); + // Add #bit of num_nonzeros, as an estimate of the cost for encoding the + // number of non-zeros of the block. + size_t nbits = CeilLog2Nonzero(num_nzeros + 1) + 1; +@@ -441,9 +441,9 @@ float EstimateEntropy(const AcStrategy& acs, size_t x, size_t y, + float ret = + entropy + + masking * +- ((config.info_loss_multiplier * GetLane(SumOfLanes(info_loss))) + ++ ((config.info_loss_multiplier * GetLane(SumOfLanes(df, info_loss))) + + (config.info_loss_multiplier2 * +- sqrt(num_blocks * GetLane(SumOfLanes(info_loss2))))); ++ sqrt(num_blocks * GetLane(SumOfLanes(df, info_loss2))))); + return ret; + } + +diff --git a/lib/jxl/enc_adaptive_quantization.cc b/lib/jxl/enc_adaptive_quantization.cc +index f53393f..24f3d53 100644 +--- a/lib/jxl/enc_adaptive_quantization.cc ++++ b/lib/jxl/enc_adaptive_quantization.cc +@@ -189,7 +189,7 @@ V GammaModulation(const D d, const size_t x, const size_t y, + overall_ratio += avg_ratio; + } + } +- overall_ratio = SumOfLanes(overall_ratio); ++ overall_ratio = SumOfLanes(d, overall_ratio); + overall_ratio *= Set(d, 1.0f / 64); + // ideally -1.0, but likely optimal correction adds some entropy, so slightly + // less than that. +@@ -246,12 +246,12 @@ V ColorModulation(const D d, const size_t x, const size_t y, + // blue we consider as if it was fully red or blue. + static const float ratio = 30.610615782142737f; // out of 64 pixels. + +- auto overall_red_coverage = SumOfLanes(red_coverage); ++ auto overall_red_coverage = SumOfLanes(d, red_coverage); + overall_red_coverage = + Min(overall_red_coverage, Set(d, ratio * kRedRampLength)); + overall_red_coverage *= Set(d, red_strength / ratio); + +- auto overall_blue_coverage = SumOfLanes(blue_coverage); ++ auto overall_blue_coverage = SumOfLanes(d, blue_coverage); + overall_blue_coverage = + Min(overall_blue_coverage, Set(d, ratio * kBlueRampLength)); + overall_blue_coverage *= Set(d, blue_strength / ratio); +@@ -295,7 +295,7 @@ V HfModulation(const D d, const size_t x, const size_t y, const ImageF& xyb, + } + } + +- sum = SumOfLanes(sum); ++ sum = SumOfLanes(d, sum); + return MulAdd(sum, Set(d, -2.0052193233688884f / 112), out_val); + } + +diff --git a/lib/jxl/enc_ar_control_field.cc b/lib/jxl/enc_ar_control_field.cc +index f43340e..f8025ac 100644 +--- a/lib/jxl/enc_ar_control_field.cc ++++ b/lib/jxl/enc_ar_control_field.cc +@@ -157,7 +157,7 @@ void ProcessTile(const Image3F& opsin, PassesEncoderState* enc_state, + sum += LoadU(df4, rows_in[iy] + x * 4 + ix + 2); + } + } +- row_out[x] = GetLane(Sqrt(SumOfLanes(sum))) * (1.0f / 4.0f); ++ row_out[x] = GetLane(Sqrt(SumOfLanes(df4, sum))) * (1.0f / 4.0f); + } + } + // Indexing iy and ix is a bit tricky as we include a 2 pixel border +@@ -193,7 +193,7 @@ void ProcessTile(const Image3F& opsin, PassesEncoderState* enc_state, + sum += Load(df4, rows_in[iy] + sx + ix); + } + } +- row_out[x] = GetLane(Sqrt(SumOfLanes(sum))) * (1.0f / 4.0f); ++ row_out[x] = GetLane(Sqrt(SumOfLanes(df4, sum))) * (1.0f / 4.0f); + } else { + float sum = 0; + for (size_t iy = sy; iy < ey; iy++) { +diff --git a/lib/jxl/enc_butteraugli_pnorm.cc b/lib/jxl/enc_butteraugli_pnorm.cc +index 7c3fb9c..90b0440 100644 +--- a/lib/jxl/enc_butteraugli_pnorm.cc ++++ b/lib/jxl/enc_butteraugli_pnorm.cc +@@ -95,13 +95,13 @@ double ComputeDistanceP(const ImageF& distmap, const ButteraugliParams& params, + } + double v = 0; + v += pow( +- onePerPixels * (sum1[0] + GetLane(SumOfLanes(Load(d, sum_totals0)))), ++ onePerPixels * (sum1[0] + GetLane(SumOfLanes(d, Load(d, sum_totals0)))), + 1.0 / (p * 1.0)); + v += pow( +- onePerPixels * (sum1[1] + GetLane(SumOfLanes(Load(d, sum_totals1)))), ++ onePerPixels * (sum1[1] + GetLane(SumOfLanes(d, Load(d, sum_totals1)))), + 1.0 / (p * 2.0)); + v += pow( +- onePerPixels * (sum1[2] + GetLane(SumOfLanes(Load(d, sum_totals2)))), ++ onePerPixels * (sum1[2] + GetLane(SumOfLanes(d, Load(d, sum_totals2)))), + 1.0 / (p * 4.0)); + v /= 3.0; + return v; +diff --git a/lib/jxl/enc_chroma_from_luma.cc b/lib/jxl/enc_chroma_from_luma.cc +index e5c3f38..370595c 100644 +--- a/lib/jxl/enc_chroma_from_luma.cc ++++ b/lib/jxl/enc_chroma_from_luma.cc +@@ -91,9 +91,9 @@ struct CFLFunction { + fdme_v += IfThenElse(av >= thres, zero, dme); + } + +- *fpeps = first_derivative_peps + GetLane(SumOfLanes(fdpe_v)); +- *fmeps = first_derivative_meps + GetLane(SumOfLanes(fdme_v)); +- return first_derivative + GetLane(SumOfLanes(fd_v)); ++ *fpeps = first_derivative_peps + GetLane(SumOfLanes(df, fdpe_v)); ++ *fmeps = first_derivative_meps + GetLane(SumOfLanes(df, fdme_v)); ++ return first_derivative + GetLane(SumOfLanes(df, fd_v)); + } + + const float* JXL_RESTRICT values_m; +@@ -124,8 +124,8 @@ int32_t FindBestMultiplier(const float* values_m, const float* values_s, + cb = MulAdd(a, b, cb); + } + // + distance_mul * x^2 * num +- x = -GetLane(SumOfLanes(cb)) / +- (GetLane(SumOfLanes(ca)) + num * distance_mul * 0.5f); ++ x = -GetLane(SumOfLanes(df, cb)) / ++ (GetLane(SumOfLanes(df, ca)) + num * distance_mul * 0.5f); + } else { + constexpr float eps = 1; + constexpr float kClamp = 20.0f; +diff --git a/lib/jxl/enc_cluster.cc b/lib/jxl/enc_cluster.cc +index 1f12a29..8ae863c 100644 +--- a/lib/jxl/enc_cluster.cc ++++ b/lib/jxl/enc_cluster.cc +@@ -49,7 +49,7 @@ void HistogramEntropy(const Histogram& a) { + const auto counts = LoadU(di, &a.data_[i]); + entropy_lanes += Entropy(ConvertTo(df, counts), inv_tot, total); + } +- a.entropy_ += GetLane(SumOfLanes(entropy_lanes)); ++ a.entropy_ += GetLane(SumOfLanes(df, entropy_lanes)); + } + + float HistogramDistance(const Histogram& a, const Histogram& b) { +@@ -71,7 +71,7 @@ float HistogramDistance(const Histogram& a, const Histogram& b) { + const auto counts = ConvertTo(df, a_counts + b_counts); + distance_lanes += Entropy(counts, inv_tot, total); + } +- const float total_distance = GetLane(SumOfLanes(distance_lanes)); ++ const float total_distance = GetLane(SumOfLanes(df, distance_lanes)); + return total_distance - a.entropy_ - b.entropy_; + } + +diff --git a/lib/jxl/enc_entropy_coder.cc b/lib/jxl/enc_entropy_coder.cc +index 0946300..07fe5a0 100644 +--- a/lib/jxl/enc_entropy_coder.cc ++++ b/lib/jxl/enc_entropy_coder.cc +@@ -86,7 +86,7 @@ int32_t NumNonZeroExceptLLF(const size_t cx, const size_t cy, + + // We want area - sum_zero, add because neg_sum_zero is already negated. + const int32_t nzeros = +- int32_t(cx * cy * kDCTBlockSize) + GetLane(SumOfLanes(neg_sum_zero)); ++ int32_t(cx * cy * kDCTBlockSize) + GetLane(SumOfLanes(di, neg_sum_zero)); + + const int32_t shifted_nzeros = static_cast( + (nzeros + covered_blocks - 1) >> log2_covered_blocks); +@@ -135,7 +135,7 @@ int32_t NumNonZero8x8ExceptDC(const int32_t* JXL_RESTRICT block, + + // We want 64 - sum_zero, add because neg_sum_zero is already negated. + const int32_t nzeros = +- int32_t(kDCTBlockSize) + GetLane(SumOfLanes(neg_sum_zero)); ++ int32_t(kDCTBlockSize) + GetLane(SumOfLanes(di, neg_sum_zero)); + + *nzeros_pos = nzeros; + +diff --git a/lib/jxl/enc_fast_heuristics.cc b/lib/jxl/enc_fast_heuristics.cc +index 16f7670..0551782 100644 +--- a/lib/jxl/enc_fast_heuristics.cc ++++ b/lib/jxl/enc_fast_heuristics.cc +@@ -94,8 +94,8 @@ Status Heuristics(PassesEncoderState* enc_state, + cb = MulAdd(a, b, cb); + } + } +- float best = +- -GetLane(SumOfLanes(cb)) / (GetLane(SumOfLanes(ca)) + 1e-9f); ++ float best = -GetLane(SumOfLanes(df, cb)) / ++ (GetLane(SumOfLanes(df, ca)) + 1e-9f); + int8_t& res = (c == 0 ? shared.cmap.ytox_map : shared.cmap.ytob_map) + .Row(ty)[tx]; + res = std::max(-128.0f, std::min(127.0f, roundf(best))); +@@ -124,8 +124,8 @@ Status Heuristics(PassesEncoderState* enc_state, + max = IfThenElse(max > nn, max, nn); + } + } +- row_out_avg[x] = GetLane(SumOfLanes(sum)); +- row_out[x] = GetLane(MaxOfLanes(max)); ++ row_out_avg[x] = GetLane(SumOfLanes(df4, sum)); ++ row_out[x] = GetLane(MaxOfLanes(df4, max)); + } + } + }, +diff --git a/lib/jxl/gauss_blur.cc b/lib/jxl/gauss_blur.cc +index f9babe7..f24a74c 100644 +--- a/lib/jxl/gauss_blur.cc ++++ b/lib/jxl/gauss_blur.cc +@@ -421,7 +421,7 @@ ImageF ConvolveXSampleAndTranspose(const ImageF& in, + for (int i = -r; i <= r; i += Lanes(df)) { + sum = MulAdd(LoadU(df, rowp + x + i), LoadU(df, kernelp + i), sum); + } +- out.Row(ox)[y] = GetLane(SumOfLanes(sum)); ++ out.Row(ox)[y] = GetLane(SumOfLanes(df, sum)); + } + for (; x < in.xsize(); x += res, ++ox) { + float sum = 0.0f; +diff --git a/lib/jxl/modular/encoding/enc_ma.cc b/lib/jxl/modular/encoding/enc_ma.cc +index 0e2eaac..d485129 100644 +--- a/lib/jxl/modular/encoding/enc_ma.cc ++++ b/lib/jxl/modular/encoding/enc_ma.cc +@@ -84,7 +84,7 @@ float EstimateBits(const int32_t *counts, int32_t *rounded_counts, + bits_lanes -= + IfThenElse(counts_v == zero, zero, counts_v * BitCast(df, nbps)); + } +- return GetLane(SumOfLanes(bits_lanes)); ++ return GetLane(SumOfLanes(df, bits_lanes)); + } + + void MakeSplitNode(size_t pos, int property, int splitval, Predictor lpred, +diff --git a/lib/jxl/rational_polynomial_test.cc b/lib/jxl/rational_polynomial_test.cc +index 699afd0..f985eb0 100644 +--- a/lib/jxl/rational_polynomial_test.cc ++++ b/lib/jxl/rational_polynomial_test.cc +@@ -51,7 +51,7 @@ struct EvalLog2 { + const HWY_FULL(int32_t) di; + const auto x_bits = BitCast(di, vx); + // Cannot handle negative numbers / NaN. +- JXL_DASSERT(AllTrue(Abs(x_bits) == x_bits)); ++ JXL_DASSERT(AllTrue(di, Abs(x_bits) == x_bits)); + + // Range reduction to [-1/3, 1/3] - 3 integer, 2 float ops + const auto exp_bits = x_bits - Set(di, 0x3f2aaaab); // = 2/3 +diff --git a/lib/jxl/splines.cc b/lib/jxl/splines.cc +index 8653445..5dc2404 100644 +--- a/lib/jxl/splines.cc ++++ b/lib/jxl/splines.cc +@@ -52,7 +52,7 @@ float ContinuousIDCT(const float dct[32], float t) { + auto local_res = LoadU(df, dct + i) * cos; + result = MulAdd(Set(df, square_root<2>::value), local_res, result); + } +- return GetLane(SumOfLanes(result)); ++ return GetLane(SumOfLanes(df, result)); + } + + template +diff --git a/lib/jxl/transpose-inl.h b/lib/jxl/transpose-inl.h +index d12b129..4674420 100644 +--- a/lib/jxl/transpose-inl.h ++++ b/lib/jxl/transpose-inl.h +@@ -74,50 +74,51 @@ JXL_INLINE_TRANSPOSE void GenericTransposeBlock(TransposeSimdTag, + static_assert(COLS_or_0 % 8 == 0, "Invalid number of columns"); + for (size_t n = 0; n < ROWS; n += 8) { + for (size_t m = 0; m < COLS; m += 8) { +- auto i0 = from.LoadPart(BlockDesc<8>(), n + 0, m + 0); +- auto i1 = from.LoadPart(BlockDesc<8>(), n + 1, m + 0); +- auto i2 = from.LoadPart(BlockDesc<8>(), n + 2, m + 0); +- auto i3 = from.LoadPart(BlockDesc<8>(), n + 3, m + 0); +- auto i4 = from.LoadPart(BlockDesc<8>(), n + 4, m + 0); +- auto i5 = from.LoadPart(BlockDesc<8>(), n + 5, m + 0); +- auto i6 = from.LoadPart(BlockDesc<8>(), n + 6, m + 0); +- auto i7 = from.LoadPart(BlockDesc<8>(), n + 7, m + 0); ++ const BlockDesc<8> d; ++ auto i0 = from.LoadPart(d, n + 0, m + 0); ++ auto i1 = from.LoadPart(d, n + 1, m + 0); ++ auto i2 = from.LoadPart(d, n + 2, m + 0); ++ auto i3 = from.LoadPart(d, n + 3, m + 0); ++ auto i4 = from.LoadPart(d, n + 4, m + 0); ++ auto i5 = from.LoadPart(d, n + 5, m + 0); ++ auto i6 = from.LoadPart(d, n + 6, m + 0); ++ auto i7 = from.LoadPart(d, n + 7, m + 0); + // Surprisingly, this straightforward implementation (24 cycles on port5) + // is faster than load128+insert and LoadDup128+ConcatUpperLower+blend. +- const auto q0 = InterleaveLower(i0, i2); +- const auto q1 = InterleaveLower(i1, i3); +- const auto q2 = InterleaveUpper(i0, i2); +- const auto q3 = InterleaveUpper(i1, i3); +- const auto q4 = InterleaveLower(i4, i6); +- const auto q5 = InterleaveLower(i5, i7); +- const auto q6 = InterleaveUpper(i4, i6); +- const auto q7 = InterleaveUpper(i5, i7); +- +- const auto r0 = InterleaveLower(q0, q1); +- const auto r1 = InterleaveUpper(q0, q1); +- const auto r2 = InterleaveLower(q2, q3); +- const auto r3 = InterleaveUpper(q2, q3); +- const auto r4 = InterleaveLower(q4, q5); +- const auto r5 = InterleaveUpper(q4, q5); +- const auto r6 = InterleaveLower(q6, q7); +- const auto r7 = InterleaveUpper(q6, q7); +- +- i0 = ConcatLowerLower(r4, r0); +- i1 = ConcatLowerLower(r5, r1); +- i2 = ConcatLowerLower(r6, r2); +- i3 = ConcatLowerLower(r7, r3); +- i4 = ConcatUpperUpper(r4, r0); +- i5 = ConcatUpperUpper(r5, r1); +- i6 = ConcatUpperUpper(r6, r2); +- i7 = ConcatUpperUpper(r7, r3); +- to.StorePart(BlockDesc<8>(), i0, m + 0, n + 0); +- to.StorePart(BlockDesc<8>(), i1, m + 1, n + 0); +- to.StorePart(BlockDesc<8>(), i2, m + 2, n + 0); +- to.StorePart(BlockDesc<8>(), i3, m + 3, n + 0); +- to.StorePart(BlockDesc<8>(), i4, m + 4, n + 0); +- to.StorePart(BlockDesc<8>(), i5, m + 5, n + 0); +- to.StorePart(BlockDesc<8>(), i6, m + 6, n + 0); +- to.StorePart(BlockDesc<8>(), i7, m + 7, n + 0); ++ const auto q0 = InterleaveLower(d, i0, i2); ++ const auto q1 = InterleaveLower(d, i1, i3); ++ const auto q2 = InterleaveUpper(d, i0, i2); ++ const auto q3 = InterleaveUpper(d, i1, i3); ++ const auto q4 = InterleaveLower(d, i4, i6); ++ const auto q5 = InterleaveLower(d, i5, i7); ++ const auto q6 = InterleaveUpper(d, i4, i6); ++ const auto q7 = InterleaveUpper(d, i5, i7); ++ ++ const auto r0 = InterleaveLower(d, q0, q1); ++ const auto r1 = InterleaveUpper(d, q0, q1); ++ const auto r2 = InterleaveLower(d, q2, q3); ++ const auto r3 = InterleaveUpper(d, q2, q3); ++ const auto r4 = InterleaveLower(d, q4, q5); ++ const auto r5 = InterleaveUpper(d, q4, q5); ++ const auto r6 = InterleaveLower(d, q6, q7); ++ const auto r7 = InterleaveUpper(d, q6, q7); ++ ++ i0 = ConcatLowerLower(d, r4, r0); ++ i1 = ConcatLowerLower(d, r5, r1); ++ i2 = ConcatLowerLower(d, r6, r2); ++ i3 = ConcatLowerLower(d, r7, r3); ++ i4 = ConcatUpperUpper(d, r4, r0); ++ i5 = ConcatUpperUpper(d, r5, r1); ++ i6 = ConcatUpperUpper(d, r6, r2); ++ i7 = ConcatUpperUpper(d, r7, r3); ++ to.StorePart(d, i0, m + 0, n + 0); ++ to.StorePart(d, i1, m + 1, n + 0); ++ to.StorePart(d, i2, m + 2, n + 0); ++ to.StorePart(d, i3, m + 3, n + 0); ++ to.StorePart(d, i4, m + 4, n + 0); ++ to.StorePart(d, i5, m + 5, n + 0); ++ to.StorePart(d, i6, m + 6, n + 0); ++ to.StorePart(d, i7, m + 7, n + 0); + } + } + } +@@ -137,25 +138,26 @@ JXL_INLINE_TRANSPOSE void GenericTransposeBlock(TransposeSimdTag, + static_assert(COLS_or_0 % 4 == 0, "Invalid number of columns"); + for (size_t n = 0; n < ROWS; n += 4) { + for (size_t m = 0; m < COLS; m += 4) { +- const auto p0 = from.LoadPart(BlockDesc<4>(), n + 0, m + 0); +- const auto p1 = from.LoadPart(BlockDesc<4>(), n + 1, m + 0); +- const auto p2 = from.LoadPart(BlockDesc<4>(), n + 2, m + 0); +- const auto p3 = from.LoadPart(BlockDesc<4>(), n + 3, m + 0); +- +- const auto q0 = InterleaveLower(p0, p2); +- const auto q1 = InterleaveLower(p1, p3); +- const auto q2 = InterleaveUpper(p0, p2); +- const auto q3 = InterleaveUpper(p1, p3); +- +- const auto r0 = InterleaveLower(q0, q1); +- const auto r1 = InterleaveUpper(q0, q1); +- const auto r2 = InterleaveLower(q2, q3); +- const auto r3 = InterleaveUpper(q2, q3); +- +- to.StorePart(BlockDesc<4>(), r0, m + 0, n + 0); +- to.StorePart(BlockDesc<4>(), r1, m + 1, n + 0); +- to.StorePart(BlockDesc<4>(), r2, m + 2, n + 0); +- to.StorePart(BlockDesc<4>(), r3, m + 3, n + 0); ++ const BlockDesc<4> d; ++ const auto p0 = from.LoadPart(d, n + 0, m + 0); ++ const auto p1 = from.LoadPart(d, n + 1, m + 0); ++ const auto p2 = from.LoadPart(d, n + 2, m + 0); ++ const auto p3 = from.LoadPart(d, n + 3, m + 0); ++ ++ const auto q0 = InterleaveLower(d, p0, p2); ++ const auto q1 = InterleaveLower(d, p1, p3); ++ const auto q2 = InterleaveUpper(d, p0, p2); ++ const auto q3 = InterleaveUpper(d, p1, p3); ++ ++ const auto r0 = InterleaveLower(d, q0, q1); ++ const auto r1 = InterleaveUpper(d, q0, q1); ++ const auto r2 = InterleaveLower(d, q2, q3); ++ const auto r3 = InterleaveUpper(d, q2, q3); ++ ++ to.StorePart(d, r0, m + 0, n + 0); ++ to.StorePart(d, r1, m + 1, n + 0); ++ to.StorePart(d, r2, m + 2, n + 0); ++ to.StorePart(d, r3, m + 3, n + 0); + } + } + } +diff --git a/lib/profiler/profiler.cc b/lib/profiler/profiler.cc +index d21ee09..186ff06 100644 +--- a/lib/profiler/profiler.cc ++++ b/lib/profiler/profiler.cc +@@ -139,7 +139,7 @@ class Results { + void AnalyzePackets(const Packet* HWY_RESTRICT packets, + const size_t num_packets) { + // Ensures prior weakly-ordered streaming stores are globally visible. +- hwy::StoreFence(); ++ hwy::FlushStream(); + + const uint64_t t0 = TicksBefore(); + +@@ -372,12 +372,12 @@ void ThreadSpecific::ComputeOverhead() { + const size_t kReps = 10000; + // Analysis time should not be included => must fit within buffer. + HWY_ASSERT(kReps * 2 < max_packets_); +- hwy::StoreFence(); ++ hwy::FlushStream(); + const uint64_t t0 = TicksBefore(); + for (size_t i = 0; i < kReps; ++i) { + PROFILER_ZONE("Dummy"); + } +- hwy::StoreFence(); ++ hwy::FlushStream(); + const uint64_t t1 = TicksAfter(); + HWY_ASSERT(num_packets_ + buffer_size_ == kReps * 2); + buffer_size_ = 0; diff --git a/srcpkgs/libjxl/template b/srcpkgs/libjxl/template new file mode 100644 index 000000000000..18ba5824449d --- /dev/null +++ b/srcpkgs/libjxl/template @@ -0,0 +1,77 @@ +# Template file for 'libjxl' +pkgname=libjxl +version=0.6.1 +revision=1 +#_short_version=0.6 +#wrksrc=libjxl-${_short_version} +build_style=cmake +configure_args="-DJPEGXL_ENABLE_BENCHMARK=OFF -DJPEGXL_ENABLE_EXAMPLES=OFF + -DJPEGXL_ENABLE_SJPEG=OFF -DJPEGXL_ENABLE_PLUGINS=ON" +hostmakedepends="tar pkg-config asciidoc" +makedepends="brotli-devel highway libpng-devel giflib-devel libjpeg-turbo-devel + libopenexr-devel gdk-pixbuf-devel gimp-devel" +checkdepends="gtest-devel xdg-utils" +short_desc="JPEG XL image format reference implementation" +maintainer="Joshua Krämer " +license="BSD-3-Clause, custom:Patent grant" +homepage="https://jpeg.org/jpegxl/" +_lodepng_hash=48e5364ef48ec2408f44c727657ac1b6703185f8 +_skcms_hash=64374756e03700d649f897dbd98c95e78c30c7da +distfiles="https://github.com/libjxl/libjxl/archive/v${version}.tar.gz + https://github.com/lvandeve/lodepng/archive/${_lodepng_hash}.tar.gz>lodepng-${_lodepng_hash}.tar.gz + https://skia.googlesource.com/skcms/+archive/${_skcms_hash}.tar.gz>skcms-${_skcms_hash}.tar.gz" +checksum="ccbd5a729d730152303be399f033b905e608309d5802d77a61a95faa092592c5 + c47c48c77a205f1af484b7b5a847290af65de3ea6f15817aa27c5ec7cc5208fd + @a69230c7b6f03a178c93abc6edc832c040d6e198340193b27c3d04afecf8f617" +skip_extraction="lodepng-${_lodepng_hash}.tar.gz + skcms-${_skcms_hash}.tar.gz" + +if [ "$XBPS_TARGET_NO_ATOMIC8" ]; then + makedepends+=" libatomic-devel" + LIBS="-latomic" +fi + +if [ "$CROSS_BUILD" ]; then + configure_args+=" -DBUILD_TESTING=OFF" +fi + +post_extract() { + cd ${XBPS_SRCDISTDIR}/libjxl-${version} + tar -xf lodepng-${_lodepng_hash}.tar.gz --strip-components=1 -C ${wrksrc}/third_party/lodepng + tar -xf skcms-${_skcms_hash}.tar.gz -C ${wrksrc}/third_party/skcms +} + +post_install() { + vlicense LICENSE + vlicense PATENTS +} + +libjxl-devel_package() { + short_desc+=" - development files" + depends="libjxl>=${version}_${revision} highway brotli-devel" + pkg_install() { + vmove usr/include + vmove "usr/lib/*.a" + vmove "usr/lib/*.so" + vmove usr/lib/pkgconfig + } +} + +libjxl-tools_package() { + short_desc+=" - tools" + pkg_install() { + vmove usr/bin + vmove usr/share/man + } +} + +libjxl-plugins_package() { + short_desc+=" - plugins" + depends="desktop-file-utils" + pkg_install() { + vmove usr/lib/gdk-pixbuf-2.0 + vmove usr/lib/gimp + vmove usr/share/mime + vmove usr/share/thumbnailers + } +} From 3278f787c546e6a9b0db287f89c1c8e0866bfa25 Mon Sep 17 00:00:00 2001 From: Joshua Date: Wed, 30 Mar 2022 21:23:16 +0200 Subject: [PATCH 3/3] kimageformats: enable JPEG XL support --- srcpkgs/kimageformats/template | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/srcpkgs/kimageformats/template b/srcpkgs/kimageformats/template index ebd6e81db298..6965e041027c 100644 --- a/srcpkgs/kimageformats/template +++ b/srcpkgs/kimageformats/template @@ -1,12 +1,12 @@ # Template file for 'kimageformats' pkgname=kimageformats version=5.92.0 -revision=1 +revision=2 build_style=cmake configure_args="-DKIMAGEFORMATS_HEIF=ON" hostmakedepends="kcoreaddons extra-cmake-modules qt5-qmake qt5-host-tools pkg-config" -makedepends="karchive-devel libopenexr-devel libheif-devel" +makedepends="karchive-devel libopenexr-devel libheif-devel libjxl-devel" short_desc="KDE Plugins to allow QImage to support extra file formats" maintainer="John " license="LGPL-2.0-or-later"