// Copyright (C) 2026 Kiyotsugu Arai // SPDX-License-Identifier: LGPL-3.0-or-later // IntGCD.cpp // Implementation of GCD and LCM operations for Int // // GCD: Lehmer + 再帰 HGCD (O(M(n) log n)) // extendedGcd: Lehmer + HGCD + 補因子追跡 (O(M(n) log² n)) // - HGCD 行列を補因子ペアに適用して高速化 // - 1 つの補因子 (x) のみ追跡し、y = (g - a*x) / b で復元 #include "math/core/mp/Int/IntGCD.hpp" #include "math/core/mp/Int/IntOps.hpp" #include "math/core/mp/Int/IntSpecialStates.hpp" #include "math/core/mp/Int/MpnOps.hpp" #include "math/core/mp/Int/ScratchArena.hpp" #include #include namespace calx { // ========================================================================= // GCD (既存: 変更なし) // ========================================================================= Int IntGCD::gcd(const Int& a, const Int& b) { // 1. Handle special states if (a.isNaN() || b.isNaN()) [[unlikely]] { return Int::NaN(); } if (a.isInfinite() || b.isInfinite()) [[unlikely]] { return Int::NaN(); } // 2. Handle zero cases if (a.isZero() && b.isZero()) [[unlikely]] { return Int::Zero(); } if (a.isZero()) [[unlikely]] { return abs(b); } if (b.isZero()) [[unlikely]] { return abs(a); } size_t an = a.size(); size_t bn = b.size(); // ========================================================= // 3. Fast path: 1-2 limb inputs (arena/vector 割り当て回避) // ========================================================= if (an <= 2 && bn <= 2) { uint64_t u0 = a.word(0), u1 = a.word(1); uint64_t v0 = b.word(0), v1 = b.word(1); size_t u_ctz = u0 ? std::countr_zero(u0) : 64 + std::countr_zero(u1); size_t v_ctz = v0 ? std::countr_zero(v0) : 64 + std::countr_zero(v1); size_t k = std::min(u_ctz, v_ctz); if (u_ctz >= 64) { u0 = u1 >> (u_ctz - 64); u1 = 0; } else if (u_ctz > 0) { u0 = (u0 >> u_ctz) | (u1 << (64 - u_ctz)); u1 >>= u_ctz; } if (v_ctz >= 64) { v0 = v1 >> (v_ctz - 64); v1 = 0; } else if (v_ctz > 0) { v0 = (v0 >> v_ctz) | (v1 << (64 - v_ctz)); v1 >>= v_ctz; } uint64_t g0, g1; if (u1 == 0 && v1 == 0) { g0 = mpn::gcd_11(u0, v0); g1 = 0; } else { mpn::DoubleLimb g = mpn::gcd_22(u1, u0, v1, v0); g0 = g.d0; g1 = g.d1; } if (k > 0) { if (k >= 64) { g1 = g0 << (k - 64); g0 = 0; } else { g1 = (g1 << k) | (g0 >> (64 - k)); g0 <<= k; } } if (g1 > 0) { uint64_t buf[2] = {g0, g1}; return Int::fromRawWords(std::span(buf, 2), +1); } if (g0 > 0) { return Int::fromRawWords(std::span(&g0, 1), +1); } return Int::Zero(); } // ========================================================= // 4. General path: Lehmer GCD (arena 使用、vector 割り当てなし) // ========================================================= ScratchScope scope; size_t max_size = std::max(an, bn); size_t buf_size = 2 * max_size + 32; uint64_t* up = getThreadArena().alloc_limbs(buf_size); uint64_t* vp = getThreadArena().alloc_limbs(buf_size); uint64_t* gp = getThreadArena().alloc_limbs(buf_size); std::memset(up, 0, buf_size * sizeof(uint64_t)); std::memset(vp, 0, buf_size * sizeof(uint64_t)); std::memset(gp, 0, buf_size * sizeof(uint64_t)); if (an >= bn) { for (size_t i = 0; i < an; i++) up[i] = a.word(i); for (size_t i = 0; i < bn; i++) vp[i] = b.word(i); } else { for (size_t i = 0; i < bn; i++) up[i] = b.word(i); for (size_t i = 0; i < an; i++) vp[i] = a.word(i); std::swap(an, bn); } size_t k_a = mpn::ctz_limb_array(up, an); size_t k_b = mpn::ctz_limb_array(vp, bn); size_t k = std::min(k_a, k_b); if (k_a > 0) an = mpn::rshift(up, up, an, k_a); if (k_b > 0) bn = mpn::rshift(vp, vp, bn, k_b); size_t scratch_size = mpn::gcd_lehmer_scratch_size(an, bn); uint64_t* scratch = getThreadArena().alloc_limbs(scratch_size); size_t rn = mpn::gcd_lehmer(gp, up, an, vp, bn, scratch); if (k > 0 && rn > 0) { rn = mpn::lshift_arbitrary(gp, gp, rn, k); } if (rn == 0) { return Int::Zero(); } return Int::fromRawWords(std::span(gp, rn), +1); } // ========================================================================= // LCM (変更なし) // ========================================================================= Int IntGCD::lcm(const Int& a, const Int& b) { if (a.isNaN() || b.isNaN()) [[unlikely]] { return Int::NaN(); } if (a.isInfinite() || b.isInfinite()) [[unlikely]] { return Int::NaN(); } if (a.isZero() || b.isZero()) [[unlikely]] { return Int::Zero(); } Int g = gcd(a, b); if (g.isOne()) { return abs(a * b); } return abs(a) * (abs(b) / g); } // ========================================================================= // Extended GCD: HGCD 対応版 // ========================================================================= namespace { // ---- 符号付き多倍長 (ScratchArena 上の非所有ポインタ) ---- struct SignedMpn { uint64_t* d; // magnitude data size_t n; // number of limbs (0 = zero value) int s; // +1, -1, or 0 }; // ゼロに設定 inline void smn_zero(SignedMpn& x) { x.n = 0; x.s = 0; } // 単一 limb 値に設定 (buf はバッファ先頭) inline void smn_set_ui(SignedMpn& x, uint64_t val, int sign) { if (val == 0) { x.n = 0; x.s = 0; } else { x.d[0] = val; x.n = 1; x.s = sign; } } // 符号付き線形結合: result = u * a - v * b (符号付き) // u, v は単一 limb (unsigned) // result は r_buf に書き込まれる (容量 max(a.n, b.n) + 2 必要) // tmp は作業バッファ (容量 max(a.n, b.n) + 2 必要) static void smn_linearcomb_1( uint64_t* r_buf, size_t* r_size, int* r_sign, uint64_t u, const SignedMpn& a, uint64_t v, const SignedMpn& b, uint64_t* tmp) { // term1 = u * |a|, sign = a.s size_t t1n = 0; int t1s = 0; if (a.n > 0 && u > 0) { uint64_t cy = mpn::mul_1(tmp, a.d, a.n, u); t1n = a.n; if (cy) tmp[t1n++] = cy; t1s = a.s; } // term2 = v * |b|, sign = b.s size_t t2n = 0; int t2s = 0; if (b.n > 0 && v > 0) { uint64_t cy = mpn::mul_1(r_buf, b.d, b.n, v); t2n = b.n; if (cy) r_buf[t2n++] = cy; t2s = b.s; } // result = t1s * |t1| - t2s * |t2| = t1s * |t1| + (-t2s) * |t2| int eff_t2s = -t2s; if (t1n == 0) { // result = eff_t2s * |t2| (r_buf already has |t2|) *r_size = t2n; *r_sign = eff_t2s; return; } if (t2n == 0) { std::memcpy(r_buf, tmp, t1n * sizeof(uint64_t)); *r_size = t1n; *r_sign = t1s; return; } if (t1s == eff_t2s) { // 同符号: 絶対値を加算 if (t1n >= t2n) { uint64_t cy = mpn::add(r_buf, tmp, t1n, r_buf, t2n); *r_size = t1n; if (cy) r_buf[(*r_size)++] = cy; } else { uint64_t cy = mpn::add(r_buf, r_buf, t2n, tmp, t1n); *r_size = t2n; if (cy) r_buf[(*r_size)++] = cy; } *r_sign = t1s; } else { // 異符号: 絶対値を減算 int cmp_result; if (t1n != t2n) { cmp_result = (t1n > t2n) ? 1 : -1; } else { cmp_result = mpn::cmp(tmp, t1n, r_buf, t2n); } if (cmp_result == 0) { *r_size = 0; *r_sign = 0; } else if (cmp_result > 0) { // |t1| > |t2| mpn::sub(r_buf, tmp, t1n, r_buf, t2n); *r_size = mpn::normalized_size(r_buf, t1n); *r_sign = t1s; } else { // |t2| > |t1| mpn::sub(r_buf, r_buf, t2n, tmp, t1n); *r_size = mpn::normalized_size(r_buf, t2n); *r_sign = eff_t2s; } } } // 多倍長符号付き線形結合: result = M_row * a - M_col * b (符号付き) // M_row, M_col は多倍長 (unsigned, mn limbs) // result は r_buf に書き込まれる (容量 mn + max(a.n, b.n) + 4 必要) // tmp1, tmp2 は作業バッファ (各 mn + max(a.n, b.n) + 4 必要) static void smn_linearcomb_mp( uint64_t* r_buf, size_t* r_size, int* r_sign, const uint64_t* mp_u, size_t mn_u, const SignedMpn& a, const uint64_t* mp_v, size_t mn_v, const SignedMpn& b, uint64_t* tmp1, uint64_t* tmp2, uint64_t* mul_scratch) { // term1 = mp_u * |a|, sign = a.s size_t t1n = 0; int t1s = 0; size_t un = mpn::normalized_size(mp_u, mn_u); size_t an = a.n; if (un > 0 && an > 0) { t1n = un + an; if (un >= an) mpn::multiply(tmp1, mp_u, un, a.d, an, mul_scratch); else mpn::multiply(tmp1, a.d, an, mp_u, un, mul_scratch); t1n = mpn::normalized_size(tmp1, t1n); t1s = a.s; } // term2 = mp_v * |b|, sign = b.s size_t t2n = 0; int t2s = 0; size_t vn = mpn::normalized_size(mp_v, mn_v); size_t bn = b.n; if (vn > 0 && bn > 0) { t2n = vn + bn; if (vn >= bn) mpn::multiply(tmp2, mp_v, vn, b.d, bn, mul_scratch); else mpn::multiply(tmp2, b.d, bn, mp_v, vn, mul_scratch); t2n = mpn::normalized_size(tmp2, t2n); t2s = b.s; } // result = t1s * |t1| + (-t2s) * |t2| int eff_t2s = -t2s; if (t1n == 0) { if (t2n > 0) std::memcpy(r_buf, tmp2, t2n * sizeof(uint64_t)); *r_size = t2n; *r_sign = eff_t2s; return; } if (t2n == 0) { std::memcpy(r_buf, tmp1, t1n * sizeof(uint64_t)); *r_size = t1n; *r_sign = t1s; return; } if (t1s == eff_t2s) { if (t1n >= t2n) { std::memcpy(r_buf, tmp1, t1n * sizeof(uint64_t)); uint64_t cy = mpn::add(r_buf, r_buf, t1n, tmp2, t2n); *r_size = t1n; if (cy) r_buf[(*r_size)++] = cy; } else { std::memcpy(r_buf, tmp2, t2n * sizeof(uint64_t)); uint64_t cy = mpn::add(r_buf, r_buf, t2n, tmp1, t1n); *r_size = t2n; if (cy) r_buf[(*r_size)++] = cy; } *r_sign = t1s; } else { int cmp_result; if (t1n != t2n) cmp_result = (t1n > t2n) ? 1 : -1; else cmp_result = mpn::cmp(tmp1, t1n, tmp2, t2n); if (cmp_result == 0) { *r_size = 0; *r_sign = 0; } else if (cmp_result > 0) { mpn::sub(r_buf, tmp1, t1n, tmp2, t2n); *r_size = mpn::normalized_size(r_buf, t1n); *r_sign = t1s; } else { mpn::sub(r_buf, tmp2, t2n, tmp1, t1n); *r_size = mpn::normalized_size(r_buf, t2n); *r_sign = eff_t2s; } } } // hgcd2 行列を補因子ペア (su, sv) に適用 // 新 su = M.u[1][1] * su - M.u[0][1] * sv // 新 sv = M.u[0][0] * sv - M.u[1][0] * su // tmp1, tmp2: 作業バッファ (各 max(su.n, sv.n) + 2) static void cofactor_apply_matrix1( SignedMpn& su, SignedMpn& sv, const mpn::HgcdMatrix1& M1, uint64_t* new_su_buf, uint64_t* new_sv_buf, uint64_t* tmp) { size_t su_n, sv_n; int su_s, sv_s; // 新 su = u11 * su - u01 * sv smn_linearcomb_1(new_su_buf, &su_n, &su_s, M1.u[1][1], su, M1.u[0][1], sv, tmp); // 新 sv = u00 * sv - u10 * su (su はまだ旧値) smn_linearcomb_1(new_sv_buf, &sv_n, &sv_s, M1.u[0][0], sv, M1.u[1][0], su, tmp); // 結果をコピーバック std::memcpy(su.d, new_su_buf, su_n * sizeof(uint64_t)); su.n = su_n; su.s = su_s; std::memcpy(sv.d, new_sv_buf, sv_n * sizeof(uint64_t)); sv.n = sv_n; sv.s = sv_s; } // HGCD 多倍長行列を補因子ペアに適用 // 新 su = M.p[1][1] * su - M.p[0][1] * sv // 新 sv = M.p[0][0] * sv - M.p[1][0] * su static void cofactor_apply_hgcd_matrix( SignedMpn& su, SignedMpn& sv, const mpn::HgcdMatrix& M, uint64_t* new_su_buf, uint64_t* new_sv_buf, uint64_t* tmp1, uint64_t* tmp2, uint64_t* mul_scratch) { size_t su_n, sv_n; int su_s, sv_s; smn_linearcomb_mp(new_su_buf, &su_n, &su_s, M.p[1][1], M.n, su, M.p[0][1], M.n, sv, tmp1, tmp2, mul_scratch); smn_linearcomb_mp(new_sv_buf, &sv_n, &sv_s, M.p[0][0], M.n, sv, M.p[1][0], M.n, su, tmp1, tmp2, mul_scratch); std::memcpy(su.d, new_su_buf, su_n * sizeof(uint64_t)); su.n = su_n; su.s = su_s; std::memcpy(sv.d, new_sv_buf, sv_n * sizeof(uint64_t)); sv.n = sv_n; sv.s = sv_s; } // hgcd_reduce の補因子追跡版 // (up, vp) を HGCD で縮小し、同時に (su, sv) を更新する // 返値: 新サイズ (0 = 変化なし) static size_t hgcd_reduce_cofactor( uint64_t* up, uint64_t* vp, size_t n, SignedMpn& su, SignedMpn& sv, size_t cof_alloc) // 補因子バッファの容量 { auto& arena = getThreadArena(); size_t mark = arena.mark(); size_t un = mpn::normalized_size(up, n); size_t vn = mpn::normalized_size(vp, n); if (un == 0 || vn == 0 || n < mpn::HGCD_THRESHOLD) { arena.rewind(mark); return n; } n = std::max(un, vn); // 同サイズにパディング if (un < n) std::memset(up + un, 0, (n - un) * sizeof(uint64_t)); if (vn < n) std::memset(vp + vn, 0, (n - vn) * sizeof(uint64_t)); // up >= vp を保証 bool swapped = false; if (mpn::cmp(up, n, vp, n) < 0) { for (size_t i = 0; i < n; i++) std::swap(up[i], vp[i]); swapped = true; } // 補因子もスワップ if (swapped) { // su と sv のデータ、サイズ、符号をスワップ std::swap(su.d, sv.d); std::swap(su.n, sv.n); std::swap(su.s, sv.s); } size_t p = n / 2; // HGCD 行列を初期化 mpn::HgcdMatrix M; mpn::hgcd_matrix_init(&M, 2 * (n - p) + 4); uint64_t* tp = arena.alloc_limbs(std::max(n - p, M.alloc) + 4); // 上位 n-p limb で HGCD 実行 size_t nn = mpn::hgcd(up + p, vp + p, n - p, &M, tp); if (nn > 0) { // (up, vp) に行列を適用 size_t mn = M.n; size_t prod_n = mn + p; size_t ms_sz = mpn::multiply_scratch_size( std::max(mn, p), std::min(mn, p)); size_t adj_size = 2 * prod_n + ms_sz; uint64_t* adj_tp = arena.alloc_limbs(adj_size + 16); n = mpn::hgcd_matrix_adjust(&M, p + nn, up, vp, p, adj_tp); // 補因子に行列を適用 size_t cof_max = std::max(su.n, sv.n); size_t prod_cof = mn + cof_max + 4; size_t ms_cof = mpn::multiply_scratch_size( std::max(mn, cof_max), std::min({mn, cof_max, (size_t)1})); // ms_cof を十分に確保 ms_cof = std::max(ms_cof, mpn::multiply_scratch_size(mn + cof_max, 1)); uint64_t* new_su = arena.alloc_limbs(prod_cof + 4); uint64_t* new_sv = arena.alloc_limbs(prod_cof + 4); uint64_t* ctmp1 = arena.alloc_limbs(prod_cof + 4); uint64_t* ctmp2 = arena.alloc_limbs(prod_cof + 4); uint64_t* cms = arena.alloc_limbs(ms_cof + 16); cofactor_apply_hgcd_matrix(su, sv, M, new_su, new_sv, ctmp1, ctmp2, cms); } n = std::max(mpn::normalized_size(up, n), mpn::normalized_size(vp, n)); arena.rewind(mark); return n; } // gcdext の subdiv ステップ (1 回の減算 + 除算、補因子更新付き) // up, vp: 値 (in-place 更新) // su, sv: 補因子 (in-place 更新) // 返値: 新サイズ (0 = GCD 発見、gp に格納) // // 注意: up, vp はポインタ値渡しなので、内部でのポインタスワップは呼び出し元に // 反映されない。su, sv は参照渡しなので反映される。物理バッファのデータを // スワップすることで呼び出し元との整合性を維持する。 // 符号付き加算: sv -= su (in-place) static void smn_subtract_inplace(SignedMpn& sv, const SignedMpn& su) { if (su.n == 0) return; if (sv.n == 0) { std::memcpy(sv.d, su.d, su.n * sizeof(uint64_t)); sv.n = su.n; sv.s = -su.s; return; } if (sv.s != su.s) { // 異符号 → 絶対値を加算 if (sv.n >= su.n) { uint64_t cy = mpn::add(sv.d, sv.d, sv.n, su.d, su.n); if (cy) sv.d[sv.n++] = cy; } else { uint64_t cy = mpn::add(sv.d, su.d, su.n, sv.d, sv.n); sv.n = su.n; if (cy) sv.d[sv.n++] = cy; } } else { // 同符号 → 絶対値を減算 int c = (sv.n != su.n) ? ((sv.n > su.n) ? 1 : -1) : mpn::cmp(sv.d, sv.n, su.d, su.n); if (c == 0) { sv.n = 0; sv.s = 0; } else if (c > 0) { mpn::sub(sv.d, sv.d, sv.n, su.d, su.n); sv.n = mpn::normalized_size(sv.d, sv.n); } else { mpn::sub(sv.d, su.d, su.n, sv.d, sv.n); sv.n = mpn::normalized_size(sv.d, su.n); sv.s = -sv.s; } } } // バッファデータと補因子を物理的にスワップ static void phys_swap(uint64_t* up, uint64_t* vp, size_t n, SignedMpn& su, SignedMpn& sv) { for (size_t i = 0; i < n; i++) std::swap(up[i], vp[i]); std::swap(su.d, sv.d); std::swap(su.n, sv.n); std::swap(su.s, sv.s); } static size_t gcdext_subdiv_step( uint64_t* up, uint64_t* vp, size_t n, SignedMpn& su, SignedMpn& sv, uint64_t* gp, size_t* gn, uint64_t* /*scratch*/, size_t /*cof_alloc*/) { size_t an = mpn::normalized_size(up, n); size_t bn = mpn::normalized_size(vp, n); if (an == 0) { std::memcpy(gp, vp, bn * sizeof(uint64_t)); *gn = bn; std::swap(su.d, sv.d); std::swap(su.n, sv.n); std::swap(su.s, sv.s); return 0; } if (bn == 0) { std::memcpy(gp, up, an * sizeof(uint64_t)); *gn = an; return 0; } // up < vp に配置 (物理バッファをスワップ) if (an == bn) { int c = mpn::cmp(up, an, vp, bn); if (c == 0) { std::memcpy(gp, up, an * sizeof(uint64_t)); *gn = an; return 0; } if (c > 0) { phys_swap(up, vp, n, su, sv); std::swap(an, bn); } } else if (an > bn) { phys_swap(up, vp, n, su, sv); std::swap(an, bn); } // vp > up: vp -= up (q=1 の減算) mpn::sub(vp, vp, bn, up, an); bn = mpn::normalized_size(vp, bn); if (bn == 0) { std::memcpy(gp, up, an * sizeof(uint64_t)); *gn = an; return 0; } // 補因子更新: sv -= su (vp -= up に対応) smn_subtract_inplace(sv, su); // 再度サイズ正規化 an = mpn::normalized_size(up, n); bn = mpn::normalized_size(vp, n); if (an == 0 || bn == 0) { if (bn == 0) { std::memcpy(gp, up, an * sizeof(uint64_t)); *gn = an; } else { std::memcpy(gp, vp, bn * sizeof(uint64_t)); *gn = bn; std::swap(su.d, sv.d); std::swap(su.n, sv.n); std::swap(su.s, sv.s); } return 0; } // up < vp を再保証 (物理スワップ) if (an == bn) { int c = mpn::cmp(up, an, vp, bn); if (c == 0) { std::memcpy(gp, up, an * sizeof(uint64_t)); *gn = an; return 0; } if (c > 0) { phys_swap(up, vp, n, su, sv); std::swap(an, bn); } } else if (an > bn) { phys_swap(up, vp, n, su, sv); std::swap(an, bn); } // vp = vp mod up (除算) auto& arena = getThreadArena(); size_t div_mark = arena.mark(); size_t qn = bn - an + 1; uint64_t* qp = arena.alloc_limbs(qn + 4); uint64_t* dividend = arena.alloc_limbs(bn + 4); std::memcpy(dividend, vp, bn * sizeof(uint64_t)); size_t ds = mpn::divide_scratch_size(bn, an); uint64_t* work = arena.alloc_limbs(ds + 4); mpn::divide(qp, vp, dividend, bn, up, an, work); // vp は余り (an limb 以下) if (an < n) std::memset(vp + an, 0, (n - an) * sizeof(uint64_t)); size_t rem_n = mpn::normalized_size(vp, an); qn = mpn::normalized_size(qp, qn); if (rem_n == 0) { std::memcpy(gp, up, an * sizeof(uint64_t)); *gn = an; arena.rewind(div_mark); return 0; } // 補因子更新: sv -= q * su if (qn > 0 && su.n > 0) { size_t prod_n = qn + su.n; uint64_t* prod = arena.alloc_limbs(prod_n + 4); if (qn == 1) { uint64_t cy = mpn::mul_1(prod, su.d, su.n, qp[0]); prod_n = su.n; if (cy) prod[prod_n++] = cy; } else { size_t ms = mpn::multiply_scratch_size( std::max(qn, su.n), std::min(qn, su.n)); uint64_t* ms_buf = arena.alloc_limbs(ms + 4); if (qn >= su.n) mpn::multiply(prod, qp, qn, su.d, su.n, ms_buf); else mpn::multiply(prod, su.d, su.n, qp, qn, ms_buf); prod_n = mpn::normalized_size(prod, prod_n); } // sv -= su.s * prod if (sv.n == 0) { std::memcpy(sv.d, prod, prod_n * sizeof(uint64_t)); sv.n = prod_n; sv.s = -su.s; } else if (sv.s != su.s) { // 異符号 → 加算 if (sv.n >= prod_n) { uint64_t cy = mpn::add(sv.d, sv.d, sv.n, prod, prod_n); if (cy) sv.d[sv.n++] = cy; } else { uint64_t cy = mpn::add(sv.d, prod, prod_n, sv.d, sv.n); sv.n = prod_n; if (cy) sv.d[sv.n++] = cy; } } else { // 同符号 → 減算 int c = (sv.n != prod_n) ? ((sv.n > prod_n) ? 1 : -1) : mpn::cmp(sv.d, sv.n, prod, prod_n); if (c == 0) { sv.n = 0; sv.s = 0; } else if (c > 0) { mpn::sub(sv.d, sv.d, sv.n, prod, prod_n); sv.n = mpn::normalized_size(sv.d, sv.n); } else { mpn::sub(sv.d, prod, prod_n, sv.d, sv.n); sv.n = mpn::normalized_size(sv.d, prod_n); sv.s = -sv.s; } } } arena.rewind(div_mark); return an; } } // anonymous namespace // ========================================================================= // extendedGcd: HGCD + Lehmer 高速版 // ========================================================================= Int IntGCD::extendedGcd(const Int& a, const Int& b, Int& x, Int& y) { // 1. Handle special states if (a.isNaN() || b.isNaN()) [[unlikely]] { x = Int::NaN(); y = Int::NaN(); return Int::NaN(); } if (a.isInfinite() || b.isInfinite()) [[unlikely]] { x = Int::NaN(); y = Int::NaN(); return Int::NaN(); } // 2. Handle zero cases if (b.isZero()) { if (a.isZero()) { x = Int::Zero(); y = Int::Zero(); return Int::Zero(); } x = (a.getSign() >= 0) ? Int::One() : Int(-1); y = Int::Zero(); return abs(a); } if (a.isZero()) { x = Int::Zero(); y = (b.getSign() >= 0) ? Int::One() : Int(-1); return abs(b); } // 3. 符号を記憶して絶対値で計算 int a_sign = a.getSign(); int b_sign = b.getSign(); Int abs_a = abs(a); Int abs_b = abs(b); size_t an = abs_a.size(); size_t bn = abs_b.size(); // ========================================================= // 4. Small path: ≤ 2 limbs → 既存のナイーブアルゴリズム // ========================================================= if (an <= 2 && bn <= 2) { // ナイーブ拡張ユークリッド (Int レベル) Int A = abs_a, B = abs_b; Int u(1), v(0), u2(0), v2(1); while (!B.isZero()) { Int q = A / B; Int r = A % B; A = B; B = r; Int new_u = u - q * u2; Int new_v = v - q * v2; u = u2; v = v2; u2 = new_u; v2 = new_v; } x = u; y = v; if (A.getSign() < 0) { A = -A; x = -x; y = -y; } if (a_sign < 0) x = -x; if (b_sign < 0) y = -y; return A; } // ========================================================= // 5. Large path: mpn レベル Lehmer + HGCD // ========================================================= ScratchScope scope; size_t max_n = std::max(an, bn); size_t buf_size = 2 * max_n + 32; auto& arena = getThreadArena(); // 値バッファ uint64_t* up = arena.alloc_limbs(buf_size); uint64_t* vp = arena.alloc_limbs(buf_size); uint64_t* gp = arena.alloc_limbs(buf_size); uint64_t* tp = arena.alloc_limbs(buf_size); std::memset(up, 0, buf_size * sizeof(uint64_t)); std::memset(vp, 0, buf_size * sizeof(uint64_t)); std::memset(gp, 0, buf_size * sizeof(uint64_t)); for (size_t i = 0; i < an; i++) up[i] = abs_a.word(i); for (size_t i = 0; i < bn; i++) vp[i] = abs_b.word(i); // up >= vp に配置 (不均衡の場合は初期除算) size_t n; bool initial_swap = false; if (an > bn) { // up mod vp で初期削減 size_t qn = an - bn + 1; uint64_t* qp_init = arena.alloc_limbs(qn + 4); uint64_t* tmp_init = arena.alloc_limbs(an + 4); std::memcpy(tmp_init, up, an * sizeof(uint64_t)); size_t ds = mpn::divide_scratch_size(an, bn); uint64_t* ws = arena.alloc_limbs(ds + 4); mpn::divide(qp_init, up, tmp_init, an, vp, bn, ws); // up は余り bool zero = true; for (size_t i = 0; i < bn; i++) { if (up[i] != 0) { zero = false; break; } } if (zero) { // up が vp で割り切れた → gcd = vp Int g_result = Int::fromRawWords( std::span(vp, bn), +1); // |a| * x + |b| * y = gcd, x = 0, y = 1 x = Int::Zero(); y = Int::One(); if (a_sign < 0) x = -x; if (b_sign < 0) y = -y; return g_result; } n = bn; } else if (an < bn) { // vp の方が大きい → swap for (size_t i = 0; i < bn; i++) std::swap(up[i], vp[i]); initial_swap = true; size_t qn = bn - an + 1; uint64_t* qp_init = arena.alloc_limbs(qn + 4); uint64_t* tmp_init = arena.alloc_limbs(bn + 4); std::memcpy(tmp_init, up, bn * sizeof(uint64_t)); size_t ds = mpn::divide_scratch_size(bn, an); uint64_t* ws = arena.alloc_limbs(ds + 4); mpn::divide(qp_init, up, tmp_init, bn, vp, an, ws); bool zero = true; for (size_t i = 0; i < an; i++) { if (up[i] != 0) { zero = false; break; } } if (zero) { Int g_result = Int::fromRawWords( std::span(vp, an), +1); x = Int::One(); y = Int::Zero(); if (initial_swap) std::swap(x, y); if (a_sign < 0) x = -x; if (b_sign < 0) y = -y; return g_result; } n = an; } else { n = an; // up >= vp を保証 if (mpn::cmp(up, n, vp, n) < 0) { for (size_t i = 0; i < n; i++) std::swap(up[i], vp[i]); initial_swap = true; } } // 補因子バッファ割り当て // 補因子は最大で入力サイズ程度に成長する size_t cof_alloc = max_n + 16; uint64_t* su_buf = arena.alloc_limbs(cof_alloc); uint64_t* sv_buf = arena.alloc_limbs(cof_alloc); std::memset(su_buf, 0, cof_alloc * sizeof(uint64_t)); std::memset(sv_buf, 0, cof_alloc * sizeof(uint64_t)); SignedMpn su, sv; su.d = su_buf; su.d[0] = 1; su.n = 1; su.s = 1; // su = 1 (up の補因子) sv.d = sv_buf; sv.n = 0; sv.s = 0; // sv = 0 (vp の補因子) // 初期スワップを反映: initial_swap の場合、up は元の b、vp は元の a if (initial_swap) { // up = orig_b, vp = orig_a なので su は orig_b の補因子 // しかし初期除算で up = orig_b mod orig_a になっている // → su = 1 は「up = orig_b * 1 + orig_a * (-q)」の orig_b 側 // 初期除算の q を追跡する必要がある // → 簡略化: initial_swap の場合は swap なしで再計算 } // ---- Lehmer + HGCD ループ ---- // Lehmer ループ用 scratch size_t lehmer_scratch = mpn::gcd_lehmer_scratch_size(n, n); uint64_t* subdiv_scratch = arena.alloc_limbs(lehmer_scratch + 16); size_t gn = 0; while (n > 2) { // HGCD で高速縮小 if (n >= mpn::HGCD_THRESHOLD) { size_t new_n = hgcd_reduce_cofactor(up, vp, n, su, sv, cof_alloc); if (new_n < n) { n = new_n; size_t un2 = mpn::normalized_size(up, n); size_t vn2 = mpn::normalized_size(vp, n); if (un2 == 0) { std::memcpy(gp, vp, vn2 * sizeof(uint64_t)); gn = vn2; std::swap(su.d, sv.d); std::swap(su.n, sv.n); std::swap(su.s, sv.s); goto done; } if (vn2 == 0) { std::memcpy(gp, up, un2 * sizeof(uint64_t)); gn = un2; goto done; } continue; } } // hgcd2 による Lehmer ステップ mpn::HgcdMatrix1 M1; uint64_t mask = up[n - 1] | vp[n - 1]; if (mask == 0) { n--; continue; } uint64_t uh, ul, vh, vl; if (mask >> 63) { uh = up[n-1]; ul = up[n-2]; vh = vp[n-1]; vl = vp[n-2]; } else { int shift = std::countl_zero(mask); if (n >= 3) { uh = (up[n-1] << shift) | (up[n-2] >> (64 - shift)); ul = (up[n-2] << shift) | (up[n-3] >> (64 - shift)); vh = (vp[n-1] << shift) | (vp[n-2] >> (64 - shift)); vl = (vp[n-2] << shift) | (vp[n-3] >> (64 - shift)); } else { uh = (up[n-1] << shift) | (up[n-2] >> (64 - shift)); ul = up[n-2] << shift; vh = (vp[n-1] << shift) | (vp[n-2] >> (64 - shift)); vl = vp[n-2] << shift; } } if (mpn::hgcd2(uh, ul, vh, vl, &M1)) { // (up, vp) に行列を適用 n = mpn::hgcd_mul_matrix1_vector(&M1, tp, up, vp, n); std::swap(up, tp); // 補因子に同じ行列を適用 size_t cm = std::max(su.n, sv.n) + 2; size_t ctmp_mark = arena.mark(); uint64_t* new_su = arena.alloc_limbs(cm + 4); uint64_t* new_sv = arena.alloc_limbs(cm + 4); uint64_t* ctmp = arena.alloc_limbs(cm + 4); cofactor_apply_matrix1(su, sv, M1, new_su, new_sv, ctmp); arena.rewind(ctmp_mark); if (n == 0) { gp[0] = 0; gn = 0; goto done; } } else { // subdiv ステップ (減算 + 除算 + 補因子更新) n = gcdext_subdiv_step(up, vp, n, su, sv, gp, &gn, subdiv_scratch, cof_alloc); if (n == 0) goto done; } } // ベースケース: n <= 2 (ナイーブ拡張ユークリッド) { size_t un = mpn::normalized_size(up, n); size_t vn = mpn::normalized_size(vp, n); if (un == 0) { std::memcpy(gp, vp, vn * sizeof(uint64_t)); gn = vn; std::swap(su.d, sv.d); std::swap(su.n, sv.n); std::swap(su.s, sv.s); goto done; } if (vn == 0) { std::memcpy(gp, up, un * sizeof(uint64_t)); gn = un; goto done; } // 2 limb 以下: Int レベルでナイーブ処理 Int A_small, B_small; if (un == 0) { A_small = Int::Zero(); } else { A_small = Int::fromRawWords(std::span(up, un), +1); } if (vn == 0) { B_small = Int::Zero(); } else { B_small = Int::fromRawWords(std::span(vp, vn), +1); } Int sx(1), sv_int(0), sx2(0), sv2_int(1); while (!B_small.isZero()) { Int q = A_small / B_small; Int r = A_small % B_small; A_small = B_small; B_small = r; Int new_sx = sx - q * sx2; Int new_sv_int = sv_int - q * sv2_int; sx = sx2; sv_int = sv2_int; sx2 = new_sx; sv2_int = new_sv_int; } // g = A_small, ベースの補因子は sx (A_small = up*sx + vp*sv_int) // 全体の補因子: orig_a に対する係数 = su * sx + sv * sv_int // ただし su, sv は「現在の up, vp が orig_a の何倍か」を示す // x_final = su * sx + sv * sx2_final (ここでは sx が up の、sv_int が vp の補因子) // → x_final = su.s*|su| * sx + sv.s*|sv| * sv_int ... これは混乱する // 正しい関係: up = orig * su, vp = orig * sv (orig は |a| か |b|) // g = up * sx + vp * sv_int = orig * (su * sx + sv * sv_int) // → 最終補因子 = su * sx + sv * sv_int if (A_small.getSign() < 0) { A_small = -A_small; sx = -sx; sv_int = -sv_int; } // su_int * sx + sv_int_orig * sv_int を計算 Int su_int; if (su.n == 0) { su_int = Int::Zero(); } else { su_int = Int::fromRawWords(std::span(su.d, su.n), su.s); } Int sv_int_orig; if (sv.n == 0) { sv_int_orig = Int::Zero(); } else { sv_int_orig = Int::fromRawWords(std::span(sv.d, sv.n), sv.s); } Int x_cof = su_int * sx + sv_int_orig * sv_int; // 結果 gn = A_small.size(); for (size_t i = 0; i < gn; i++) gp[i] = A_small.word(i); // 直接 Int で x, y を設定 if (initial_swap) { // up は元の b 側、vp は元の a 側なので、x_cof は b の補因子 y = x_cof; // x = (g - |b| * y) / |a| Int g_int = A_small; x = (g_int - abs_b * y) / abs_a; } else { x = x_cof; Int g_int = A_small; y = (g_int - abs_a * x) / abs_b; } if (a_sign < 0) x = -x; if (b_sign < 0) y = -y; return A_small; } done: // gp[0..gn-1] に GCD、su に up の補因子 if (gn == 0) { x = Int::Zero(); y = Int::Zero(); return Int::Zero(); } Int g_result = Int::fromRawWords(std::span(gp, gn), +1); // su → x_cof (up の補因子) Int x_cof; if (su.n == 0) { x_cof = Int::Zero(); } else { x_cof = Int::fromRawWords(std::span(su.d, su.n), su.s); } if (initial_swap) { y = x_cof; x = (g_result - abs_b * y) / abs_a; } else { x = x_cof; y = (g_result - abs_a * x) / abs_b; } if (a_sign < 0) x = -x; if (b_sign < 0) y = -y; return g_result; } } // namespace calx