// Copyright (C) 2026 Kiyotsugu Arai // SPDX-License-Identifier: LGPL-3.0-or-later // IntPrimeMPQS.cpp // MPQS (Multiple Polynomial Quadratic Sieve) 実装 // 二次篩法による素因数分解 #include "math/core/mp/Int/IntPrime.hpp" #include "math/core/mp/Int/IntOps.hpp" #include "math/core/mp/Int/IntModular.hpp" #include "math/core/mp/Int/IntGCD.hpp" #include "math/core/mp/Int/IntSqrt.hpp" #include "math/core/mp/Int/IntNumberTheory.hpp" #include "math/core/mp/Int/BlockLanczos.hpp" #include #include #include #include #include #include #include #include #include #include #include #include "math/core/mp/ThreadPool.hpp" #ifdef _MSC_VER #include static inline int ctz64(uint64_t x) { unsigned long idx; _BitScanForward64(&idx, x); return static_cast(idx); } #else static inline int ctz64(uint64_t x) { return __builtin_ctzll(x); } #endif namespace calx { // ============================================================================ // 共通ユーティリティ // ============================================================================ namespace { // エラトステネスの篩で上限 limit 以下の素数リストを生成 std::vector sievePrimes(uint64_t limit) { std::vector is_prime(limit + 1, true); is_prime[0] = is_prime[1] = false; for (uint64_t i = 2; i * i <= limit; i++) { if (!is_prime[i]) continue; for (uint64_t j = i * i; j <= limit; j += i) is_prime[j] = false; } std::vector primes; for (uint64_t i = 2; i <= limit; i++) if (is_prime[i]) primes.push_back(i); return primes; } } // anonymous namespace // ============================================================================ // MPQS 内部実装 // ============================================================================ namespace { // uint64 冪剰余: b^e mod m (p < 2^32 なら乗算はオーバーフローしない) uint64_t powmod64(uint64_t b, uint64_t e, uint64_t m) { uint64_t r = 1; b %= m; while (e) { if (e & 1) r = r * b % m; b = b * b % m; e >>= 1; } return r; } // Tonelli-Shanks アルゴリズム (uint64 版): x² ≡ n (mod p) の解を求める // p は奇素数 (< 2^32)、(n/p) = 1 が前提 uint64_t sqrtmod64(uint64_t n, uint64_t p) { n %= p; if (n == 0) return 0; if (p == 2) return n & 1; if (p % 4 == 3) return powmod64(n, (p + 1) / 4, p); uint64_t Q = p - 1; int S = 0; while (Q % 2 == 0) { Q /= 2; S++; } uint64_t z = 2; while (powmod64(z, (p - 1) / 2, p) != p - 1) z++; uint64_t M = S, c = powmod64(z, Q, p); uint64_t t = powmod64(n, Q, p), R = powmod64(n, (Q + 1) / 2, p); while (true) { if (t == 1) return R; uint64_t tmp = t; int i = 0; while (tmp != 1) { tmp = tmp * tmp % p; i++; } uint64_t b = c; for (int j = 0; j < (int)M - i - 1; j++) b = b * b % p; M = i; c = b * b % p; t = t * c % p; R = R * b % p; } } // GF(2) Gauss 消去は BlockLanczos.hpp の共通実装を使用 // (gf2_linalg::findNullSpaceGF2) } // anonymous namespace // ============================================================================ // findFactor_MPQS — 多重多項式二次篩 // ============================================================================ Int IntPrime::findFactor_MPQS(const Int& n, const std::atomic* cancel) { if (n.isEven()) return Int(2); if (n <= Int(1)) return n; if (isProbablePrime(n)) return n; // 完全平方数チェック Int sqrtN = IntSqrt::sqrt(n); if (sqrtN * sqrtN == n) return sqrtN; // パラメータ設定（n のビット数に基づく） int bits = static_cast(n.bitLength()); // Factor base のサイズと篩の範囲 int fb_size; int sieve_size; if (bits <= 32) { fb_size = 25; sieve_size = 5000; } else if (bits <= 48) { fb_size = 60; sieve_size = 15000; } else if (bits <= 64) { fb_size = 120; sieve_size = 40000; } else if (bits <= 80) { fb_size = 250; sieve_size = 80000; } else if (bits <= 100) { fb_size = 500; sieve_size = 150000; } else if (bits <= 120) { fb_size = 1000; sieve_size = 300000; } else { fb_size = 2000; sieve_size = 500000; } // Factor base の構築: エラトステネス篩で素数を生成し Jacobi symbol でフィルタ // fb_size 個の二次剰余素数を集めるには約 2·fb_size 個の素数が必要 (確率 ~1/2) // 上限を大きめに取って篩を実行 uint64_t sieve_limit = static_cast(fb_size) * 30 + 1000; auto all_primes = sievePrimes(sieve_limit); std::vector factor_base; factor_base.push_back(2); for (size_t pi = 1; pi < all_primes.size() && static_cast(factor_base.size()) < fb_size; pi++) { uint64_t p = all_primes[pi]; Int n_mod_p = n % Int(p); if (n_mod_p.isNegative()) n_mod_p += Int(p); if (n_mod_p.isZero() || IntNumberTheory::jacobiSymbol(n_mod_p, Int(p)) == 1) { factor_base.push_back(p); } } int fb_count = static_cast(factor_base.size()); // 各素数 p について sqrt(n) mod p を計算 std::vector sqrt_n_mod_p(fb_count); for (int i = 0; i < fb_count; ++i) { uint64_t p = factor_base[i]; if (p == 2) { sqrt_n_mod_p[i] = (n % Int(2)).toUInt64(); continue; } uint64_t n_mod_p = (n % Int(p)).toUInt64(); sqrt_n_mod_p[i] = sqrtmod64(n_mod_p, p); } // 関係の収集 // Q(x) = (base + x)^2 - n を factor base で割り切れるか確認 struct Relation { Int x_plus_base; // x + base std::vector exponents; // factor base 上の指数 }; std::vector relations; int target_relations = fb_count + 10; // 篩の log テーブル + Barrett 逆数テーブル std::vector fb_logp(fb_count); std::vector fb_inv_mpqs(fb_count); for (int i = 0; i < fb_count; i++) { fb_logp[i] = static_cast(std::log2(static_cast(factor_base[i]))); fb_inv_mpqs[i] = UINT64_MAX / factor_base[i] + 1; } // 篩バッファ (ブロック篩) int sieve_len = 2 * sieve_size + 1; constexpr int MPQS_BLOCK = 8192; std::vector sieve_buf(MPQS_BLOCK); // 閾値 double log2_Qmax = std::log2(static_cast(sieve_size)) + n.bitLength() * 0.5 + 1.0; double log2_pmax = fb_logp[fb_count - 1]; float threshold = static_cast(log2_Qmax - 2.0 * log2_pmax); if (threshold < 10.0f) threshold = 10.0f; // 複数多項式: base を変えて篩を繰り返す int max_polys = 50; for (int poly_idx = 0; poly_idx < max_polys && static_cast(relations.size()) < target_relations; ++poly_idx) { if (cancel && cancel->load(std::memory_order_relaxed)) return n; Int base = sqrtN + Int(1 + poly_idx * sieve_size * 2); // 篩開始位置を事前計算 std::vector start1_arr(fb_count), start2_arr(fb_count); for (int i = 1; i < fb_count; i++) { uint64_t p = factor_base[i]; if (p == 2) continue; uint64_t base_mod = (base % Int(p)).toUInt64(); uint64_t r = sqrt_n_mod_p[i]; uint64_t x1 = (r + p - base_mod % p) % p; uint64_t x2 = (p - r + p - base_mod % p) % p; uint64_t ss_mod = static_cast(sieve_size) % p; start1_arr[i] = (x1 + ss_mod) % p; start2_arr[i] = (x2 + ss_mod) % p; } // ブロック篩 for (int blk_start = 0; blk_start < sieve_len && static_cast(relations.size()) < target_relations; blk_start += MPQS_BLOCK) { int blk_end = std::min(blk_start + MPQS_BLOCK, sieve_len); int blk_len = blk_end - blk_start; std::fill(sieve_buf.begin(), sieve_buf.begin() + blk_len, 0.0f); for (int i = 1; i < fb_count; i++) { uint64_t p = factor_base[i]; if (p == 2) continue; float lp = fb_logp[i]; uint64_t s1 = start1_arr[i]; if (s1 < static_cast(blk_start)) { uint64_t skip = (static_cast(blk_start) - s1 + p - 1) / p; s1 += skip * p; } for (uint64_t j = s1; j < static_cast(blk_end); j += p) sieve_buf[j - blk_start] += lp; if (start1_arr[i] != start2_arr[i]) { uint64_t s2 = start2_arr[i]; if (s2 < static_cast(blk_start)) { uint64_t skip = (static_cast(blk_start) - s2 + p - 1) / p; s2 += skip * p; } for (uint64_t j = s2; j < static_cast(blk_end); j += p) sieve_buf[j - blk_start] += lp; } } // smooth 候補の検出と試し割り (ブロック内) for (int idx_local = 0; idx_local < blk_len && static_cast(relations.size()) < target_relations; ++idx_local) { if (sieve_buf[idx_local] < threshold) continue; int idx = blk_start + idx_local; int x = idx - sieve_size; Int xb = base + Int(x); if (xb <= Int(0)) continue; Int Q = xb * xb - n; if (Q.isZero()) { return IntGCD::gcd(xb - sqrtN, n); } bool negative = Q.isNegative(); Int absQ = negative ? -Q : Q; // 試し割り (uint64_t ファストパス) std::vector exps(fb_count, 0); bool smooth = false; if (absQ.bitLength() <= 64) { uint64_t rem64 = absQ.toUInt64(); for (int i = 0; i < fb_count && rem64 > 1; i++) { uint64_t p = factor_base[i]; #ifdef _MSC_VER uint64_t q = __umulh(rem64, fb_inv_mpqs[i]); #else uint64_t q = static_cast( (static_cast(rem64) * fb_inv_mpqs[i]) >> 64); #endif uint64_t r = rem64 - q * p; while (r == 0) { exps[i]++; rem64 = q; #ifdef _MSC_VER q = __umulh(rem64, fb_inv_mpqs[i]); #else q = static_cast( (static_cast(rem64) * fb_inv_mpqs[i]) >> 64); #endif r = rem64 - q * p; } } smooth = (rem64 == 1); } else { Int rem = absQ; for (int i = 0; i < fb_count; i++) { Int p(factor_base[i]); while ((rem % p).isZero()) { rem /= p; exps[i]++; } } smooth = (rem == Int(1)); } if (smooth) { relations.push_back({xb, exps}); } } // smooth 候補ループ end } // ブロックループ end } if (static_cast(relations.size()) < fb_count + 1) { return n; // 十分な関係が見つからなかった } // GF(2) 上の行列を構築 int nrels = static_cast(relations.size()); std::vector> matrix(nrels, std::vector(fb_count, 0)); for (int i = 0; i < nrels; ++i) { for (int j = 0; j < fb_count; ++j) { matrix[i][j] = static_cast(relations[i].exponents[j] & 1); } } // ガウス消去で null space を見つける auto null_vectors = gf2_linalg::findNullSpaceGF2(matrix, nrels, fb_count); // 各 null vector から因数を導出 for (auto& indices : null_vectors) { // x = Π x_i, y² = Π Q(x_i) Int x_prod(1); std::vector total_exps(fb_count, 0); for (int idx : indices) { x_prod = (x_prod * relations[idx].x_plus_base) % n; for (int j = 0; j < fb_count; ++j) total_exps[j] += relations[idx].exponents[j]; } // y = Π p_j^{e_j/2} Int y(1); for (int j = 0; j < fb_count; ++j) { int half_exp = total_exps[j] / 2; if (half_exp > 0) { y = (y * IntModular::powerMod(Int(factor_base[j]), Int(half_exp), n)) % n; } } // gcd(x - y, n) または gcd(x + y, n) が非自明な因数 Int diff = x_prod - y; if (diff.isNegative()) diff += n; Int g = IntGCD::gcd(diff, n); if (g > Int(1) && g < n) return g; Int sum = (x_prod + y) % n; g = IntGCD::gcd(sum, n); if (g > Int(1) && g < n) return g; } return n; } // ============================================================================ // SIQS (Self-Initializing Quadratic Sieve) 実装 // MPQS を篩ベースに強化した高速版 // ============================================================================ namespace { // uint64_t モジュラー逆元 (拡張ユークリッド互除法) uint64_t modInverse64(uint64_t a, uint64_t p) { if (a == 0) return 0; int64_t old_r = static_cast(a % p); int64_t r = static_cast(p); int64_t old_s = 1, s = 0; while (r != 0) { int64_t q = old_r / r; int64_t tmp; tmp = r; r = old_r - q * r; old_r = tmp; tmp = s; s = old_s - q * s; old_s = tmp; } return old_s < 0 ? static_cast(old_s + static_cast(p)) : static_cast(old_s); } // Knuth-Schroeppel 乗数選択 // kn の factor base が良質になる乗数 k を選ぶ int knuthSchroeppel(const Int& n) { static const int candidates[] = { 1, 2, 3, 5, 6, 7, 10, 11, 13, 14, 15, 17, 19, 21, 23, 26, 29, 30, 31, 33, 34, 35, 37, 38, 39, 41, 42, 43 }; static const uint64_t small_p[] = { 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 73, 79, 83, 89, 97 }; double best_score = -1e100; int best_k = 1; for (int k : candidates) { double score = 0.0; Int kn = Int(k) * n; uint64_t kn8 = (kn % Int(8)).toUInt64(); // 2 の寄与 (kn mod 8 による) if (kn8 == 1) score += 2.0 * std::log(2.0); else if (kn8 == 5) score += std::log(2.0); else if (kn8 == 3 || kn8 == 7) score += 0.5 * std::log(2.0); // 小素数の寄与 for (uint64_t p : small_p) { Int kn_mod = kn % Int(p); if (kn_mod.isNegative()) kn_mod += Int(p); if (kn_mod.isZero()) { score += std::log(static_cast(p)) / p; } else if (IntNumberTheory::jacobiSymbol(kn_mod, Int(p)) == 1) { score += 2.0 * std::log(static_cast(p)) / (p - 1); } } // 大きい乗数へのペナルティ score -= 0.5 * std::log(static_cast(k)); if (score > best_score) { best_score = score; best_k = k; } } return best_k; } // SIQS パラメータテーブル (n の桁数から決定) struct SIQSParams { int fb_size; // factor base サイズ int M; // 半篩範囲 int num_a_factors; // A の素因数の個数 s int threshold_adj; // 篩閾値の調整量 (ビット) }; SIQSParams getSIQSParams(int digits) { // fb_size: factor base のサイズ // M: 篩の半径 (sieve_len = 2M+1) // num_a_factors: A の素因数の個数 s // threshold_adj: (未使用 — threshold は MPQS 同等の方式で計算) // s (num_a_factors) は A_optimal ≈ √(2N)/M に対して // per_prime ≈ A_optimal^(1/s) が factor base の素数範囲に収まるよう選ぶ if (digits <= 25) return {150, 20000, 3, 0}; if (digits <= 30) return {300, 40000, 3, 0}; if (digits <= 35) return {500, 150000, 3, 0}; if (digits <= 40) return {700, 80000, 5, 0}; if (digits <= 45) return {1000, 120000, 5, 0}; if (digits <= 50) return {1500, 180000, 6, 0}; if (digits <= 55) return {2000, 250000, 6, 0}; if (digits <= 60) return {2500, 350000, 7, 0}; if (digits <= 65) return {3500, 500000, 7, 0}; return {5000, 700000, 8, 0}; } } // anonymous namespace Int IntPrime::findFactor_SIQS(const Int& n, const std::atomic* cancel) { // --- 前処理 --- if (n.isEven()) return Int(2); if (n <= Int(1)) return n; if (isProbablePrime(n)) return n; Int sqrtN = IntSqrt::sqrt(n); if (sqrtN * sqrtN == n) return sqrtN; // 小さい数 (<30桁) は MPQS のほうが高速 (SIQS の初期化オーバーヘッドが相対的に大きい) std::string ns = n.toString(); int digits = static_cast(ns.size()); if (digits < 30) return findFactor_MPQS(n, cancel); // --- 1. Knuth-Schroeppel 乗数選択 --- int kk = knuthSchroeppel(n); Int kn = Int(kk) * n; auto params = getSIQSParams(digits); int fb_target = params.fb_size; int M = params.M; int s = params.num_a_factors; // --- 3. Factor base 構築 --- std::vector fb; fb.push_back(2); { uint64_t sieve_lim = static_cast(fb_target) * 30 + 1000; auto primes_list = sievePrimes(sieve_lim); for (size_t pi = 1; pi < primes_list.size() && static_cast(fb.size()) < fb_target; pi++) { uint64_t p = primes_list[pi]; Int kn_mod = kn % Int(p); if (kn_mod.isNegative()) kn_mod += Int(p); if (kn_mod.isZero() || IntNumberTheory::jacobiSymbol(kn_mod, Int(p)) == 1) fb.push_back(p); } } int fb_count = static_cast(fb.size()); // --- 4. Tonelli-Shanks 平方根と log 値 --- std::vector fb_roots(fb_count); std::vector fb_logp(fb_count); for (int i = 0; i < fb_count; i++) { fb_logp[i] = static_cast(std::log2(static_cast(fb[i]))); if (fb[i] == 2) { fb_roots[i] = (kn % Int(2)).toUInt64(); continue; } uint64_t kn_mod_p = (kn % Int(fb[i])).toUInt64(); fb_roots[i] = sqrtmod64(kn_mod_p, fb[i]); } // --- 4b. Barrett 逆数テーブル (試行除算の高速化) --- // fb_inv[i] = ceil(2^64 / fb[i]) — __umulh で除算を乗算に置換 std::vector fb_inv(fb_count); for (int i = 0; i < fb_count; i++) { uint64_t p = fb[i]; // ceil(2^64 / p) = (2^64 - 1) / p + 1 (p >= 2 なのでオーバーフローなし) fb_inv[i] = UINT64_MAX / p + 1; } // --- 5. 関係の収集 --- struct Relation { Int g_val; // g(x) = Ax + B std::vector exps; // factor base 上の指数 bool neg_sign; // Q(x) < 0 }; // Large prime variation: 試し割りの残余が単一の大素数なら partial relation struct PartialRelation { Int g_val; std::vector exps; bool neg_sign; uint64_t large_prime; }; std::vector rels; std::vector partials; int target = fb_count + 10; // Large prime 上限: FB 最大素数の 2 乗 (残余がこれ以下なら素数が保証される) uint64_t lp_bound = static_cast(fb[fb_count - 1]) * fb[fb_count - 1]; // A 素因数の選択: A ≈ √(2kn)/M となるよう最適な素数を選ぶ // 各素因数のサイズ ≈ A_target^(1/s) double A_target_d = std::sqrt(2.0 * kn.toDouble()) / M; double per_prime_target = std::pow(A_target_d, 1.0 / s); // 目標素数サイズに最も近い factor base 素数のインデックス範囲を決定 int a_center = 2; for (int i = 2; i < fb_count; i++) { if (fb[i] >= static_cast(per_prime_target)) { a_center = i; break; } a_center = i; } int a_half_range = std::max(s * 2, fb_count / 6); int a_start = std::max(2, a_center - a_half_range); int a_end = std::min(fb_count - 1, a_center + a_half_range); if (a_end - a_start + 1 < s) { a_start = 2; a_end = std::min(fb_count - 1, a_start + s * 3); } // A の集合を生成するための乱数 std::mt19937 rng(42); int max_A_tries = 500; // 篩バッファ (ブロック篩: L1 キャッシュに収まるサイズ) int sieve_len = 2 * M + 1; constexpr int SIEVE_BLOCK_SIZE = 8192; // 32KB L1 / 4B float std::vector sieve_buf(SIEVE_BLOCK_SIZE); for (int a_try = 0; a_try < max_A_tries && static_cast(rels.size()) < target; ++a_try) { if (cancel && cancel->load(std::memory_order_relaxed)) return n; // --- 5a. A の素因数を選択 --- std::vector a_indices; { std::vector pool; for (int i = a_start; i <= a_end; i++) pool.push_back(i); std::shuffle(pool.begin(), pool.end(), rng); for (int i = 0; i < s && i < static_cast(pool.size()); i++) a_indices.push_back(pool[i]); } if (static_cast(a_indices.size()) < s) continue; // A を計算 Int A(1); for (int idx : a_indices) A *= Int(fb[idx]); // --- 5b. CRT で B 成分を計算 --- // Bⱼ = (rⱼ · γⱼ mod qⱼ) · (A/qⱼ) // ここで γⱼ = (A/qⱼ)^{-1} mod qⱼ std::vector B_comp(s); bool crt_ok = true; for (int j = 0; j < s; j++) { uint64_t qj = fb[a_indices[j]]; Int A_over_qj = A / Int(qj); uint64_t A_qj_mod = (A_over_qj % Int(qj)).toUInt64(); if (A_qj_mod == 0) { crt_ok = false; break; } uint64_t gamma = modInverse64(A_qj_mod, qj); uint64_t rj = fb_roots[a_indices[j]]; uint64_t bj = (rj * gamma) % qj; B_comp[j] = Int(bj) * A_over_qj; } if (!crt_ok) continue; // --- 5c. A ごとの事前計算（全 B 値で共有） --- bool A_fits_64 = (A.bitLength() <= 63); uint64_t A64 = A_fits_64 ? A.toUInt64() : 0; // A に含まれる素数の log 合計 float a_log_sum = 0.0f; for (int idx : a_indices) a_log_sum += fb_logp[idx]; // A^{-1} mod p と A を割る素数フラグを事前計算 std::vector Ainv_cache(fb_count, 0); std::vector in_A_flag(fb_count, false); for (int i = 1; i < fb_count; i++) { uint64_t p = fb[i]; if (p == 2) continue; for (int idx : a_indices) if (fb[idx] == p) { in_A_flag[i] = true; break; } if (in_A_flag[i]) continue; uint64_t A_mod_p = A_fits_64 ? (A64 % p) : (A % Int(p)).toUInt64(); if (A_mod_p == 0) continue; Ainv_cache[i] = modInverse64(A_mod_p, p); } // B_comp[j] mod p を事前計算 (s × fb_count) // Gray code 差分更新で B mod p を毎回再計算せずに済む std::vector> Bcomp_mod_p(s, std::vector(fb_count, 0)); for (int j = 0; j < s; j++) { bool bc_fits_64 = (B_comp[j].bitLength() <= 63); uint64_t bc64 = bc_fits_64 ? B_comp[j].toUInt64() : 0; for (int i = 1; i < fb_count; i++) { if (in_A_flag[i]) continue; uint64_t p = fb[i]; Bcomp_mod_p[j][i] = bc_fits_64 ? (bc64 % p) : (B_comp[j] % Int(p)).toUInt64(); } } // --- 5d. 2^(s-1) 個の B 値を列挙（篩 + smooth 判定）--- int num_B = 1 << (s - 1); Int half_A = A >> 1; double log2_M = std::log2(static_cast(M)); float threshold = static_cast(2.0 * log2_M); if (threshold < 15.0f) threshold = 15.0f; // --- B 値の篩処理をラムダに抽出 (逐次/並列で共用) --- // B を独立に計算し、篩 + smooth 候補検出を実行 auto sieveOneB = [&](int bi, std::vector& out_rels, std::vector& out_partials) { // B を計算 (Gray code ビットパターンから符号を決定) uint64_t gray = static_cast(bi) ^ (static_cast(bi) >> 1); Int B(0); for (int j = 0; j < s; j++) { bool neg_sign = (j > 0) && ((gray >> (j - 1)) & 1); if (neg_sign) B -= B_comp[j]; else B += B_comp[j]; } B = ((B % A) + A) % A; // B² ≡ kn (mod A) を検証 { Int check = (B * B - kn) % A; if (check.isNegative()) check += A; if (!check.isZero()) { B = A - B; check = (B * B - kn) % A; if (check.isNegative()) check += A; if (!check.isZero()) return; } } if (B > half_A) B = B - A; // 篩開始位置を計算 std::vector sp1(fb_count, 0), sp2(fb_count, 0); { bool B_fits_64 = (B.isNegative() ? (-B).bitLength() : B.bitLength()) <= 63; for (int i = 1; i < fb_count; i++) { uint64_t p = fb[i]; if (p == 2 || in_A_flag[i] || Ainv_cache[i] == 0) continue; uint64_t Bmod; if (B_fits_64) { int64_t B64 = B.isNegative() ? -static_cast((-B).toUInt64()) : static_cast(B.toUInt64()); Bmod = static_cast(((B64 % static_cast(p)) + static_cast(p)) % static_cast(p)); } else { Int Bm = B % Int(p); if (Bm.isNegative()) Bm += Int(p); Bmod = Bm.toUInt64(); } uint64_t Ainv = Ainv_cache[i]; uint64_t r = fb_roots[i]; uint64_t diff1 = (r + p - Bmod) % p; uint64_t diff2 = (p - r + p - Bmod) % p; uint64_t pos1 = (Ainv * diff1) % p; uint64_t pos2 = (Ainv * diff2) % p; uint64_t Mmod = static_cast(M) % p; sp1[i] = static_cast((pos1 + Mmod) % p); sp2[i] = static_cast((pos2 + Mmod) % p); } } // ブロック篩: L1 キャッシュに収まるブロック単位で処理 constexpr int BLOCK_SIZE = 8192; // 32KB L1 / 4B float std::vector local_sieve(BLOCK_SIZE); for (int blk_start = 0; blk_start < sieve_len; blk_start += BLOCK_SIZE) { int blk_end = std::min(blk_start + BLOCK_SIZE, sieve_len); int blk_len = blk_end - blk_start; std::fill(local_sieve.begin(), local_sieve.begin() + blk_len, 0.0f); for (int i = 1; i < fb_count; i++) { uint64_t p = fb[i]; if (p == 2 || in_A_flag[i] || Ainv_cache[i] == 0) continue; float lp = fb_logp[i]; // sp1/sp2 はグローバルオフセット — ブロック内の最初のヒットを計算 int64_t j1 = sp1[i]; if (j1 < blk_start) { int64_t skip = (blk_start - j1 + static_cast(p) - 1) / static_cast(p); j1 += skip * static_cast(p); } for (int64_t j = j1; j < blk_end; j += static_cast(p)) local_sieve[j - blk_start] += lp; if (sp1[i] != sp2[i]) { int64_t j2 = sp2[i]; if (j2 < blk_start) { int64_t skip = (blk_start - j2 + static_cast(p) - 1) / static_cast(p); j2 += skip * static_cast(p); } for (int64_t j = j2; j < blk_end; j += static_cast(p)) local_sieve[j - blk_start] += lp; } } // smooth 候補の検出 (ブロック内) for (int idx_local = 0; idx_local < blk_len; ++idx_local) { if (local_sieve[idx_local] < threshold) continue; int idx = blk_start + idx_local; int x = idx - M; Int g = A * Int(x) + B; Int Q_val = g * g - kn; if (Q_val.isZero()) continue; // 完全平方は外で処理 bool neg = Q_val.isNegative(); Int absQ = neg ? -Q_val : Q_val; Int QdivA = absQ / A; std::vector exps(fb_count, 0); bool smooth = false; // A の素因数の指数を先に設定 for (int ai : a_indices) { for (int i = 0; i < fb_count; i++) { if (fb[i] == fb[ai]) { exps[i]++; break; } } } uint64_t rem64 = 0; if (QdivA.bitLength() <= 64) { rem64 = QdivA.toUInt64(); for (int i = 0; i < fb_count && rem64 > 1; i++) { if (in_A_flag[i]) continue; uint64_t p = fb[i]; // rem64 < p² なら残余は素数 (LP) または 1 // Barrett: q = mulhi(rem64, inv), r = rem64 - q*p uint64_t inv = fb_inv[i]; #ifdef _MSC_VER uint64_t q = __umulh(rem64, inv); #else uint64_t q = static_cast( (static_cast(rem64) * inv) >> 64); #endif uint64_t r = rem64 - q * p; while (r == 0) { exps[i]++; rem64 = q; #ifdef _MSC_VER q = __umulh(rem64, inv); #else q = static_cast( (static_cast(rem64) * inv) >> 64); #endif r = rem64 - q * p; } } smooth = (rem64 == 1); } else { Int rem = QdivA; for (int i = 0; i < fb_count; i++) { if (in_A_flag[i]) continue; Int p(fb[i]); while ((rem % p).isZero()) { rem /= p; exps[i]++; } } smooth = (rem == Int(1)); if (!smooth && rem.bitLength() <= 64) rem64 = rem.toUInt64(); } if (smooth) { out_rels.push_back({g, exps, neg}); } else if (rem64 > 1 && rem64 <= lp_bound) { // Large prime variation: 残余が単一素数 → partial relation out_partials.push_back({g, exps, neg, rem64}); } } // smooth 候補ループ end } // ブロックループ end }; // --- 並列 / 逐次の選択 --- if (num_B >= 4 && digits >= 30) { // 並列: 各 B 値を独立タスクとして実行 std::vector> per_b_rels(num_B); std::vector> per_b_partials(num_B); std::vector> futures; futures.reserve(num_B); for (int bi = 0; bi < num_B; bi++) { futures.push_back(calx::threadPool().submit([&, bi]() { sieveOneB(bi, per_b_rels[bi], per_b_partials[bi]); })); } for (auto& f : futures) f.get(); // 結果をマージ for (auto& br : per_b_rels) { for (auto& r : br) { rels.push_back(std::move(r)); if (static_cast(rels.size()) >= target) break; } if (static_cast(rels.size()) >= target) break; } for (auto& bp : per_b_partials) { for (auto& p : bp) partials.push_back(std::move(p)); } } else { // 逐次: Gray code 差分更新で効率的に処理 std::vector sieve_pos1(fb_count, 0); std::vector sieve_pos2(fb_count, 0); std::vector> delta_cache(s, std::vector(fb_count, 0)); for (int j = 0; j < s; j++) { for (int i = 1; i < fb_count; i++) { if (in_A_flag[i] || Ainv_cache[i] == 0) continue; uint64_t p = fb[i]; uint64_t bc2 = 2 * Bcomp_mod_p[j][i] % p; delta_cache[j][i] = bc2 * Ainv_cache[i] % p; } } Int B(0); for (int bi = 0; bi < num_B && static_cast(rels.size()) < target; ++bi) { if (bi == 0) { for (int j = 0; j < s; j++) B += B_comp[j]; B = ((B % A) + A) % A; { Int check = (B * B - kn) % A; if (check.isNegative()) check += A; if (!check.isZero()) { B = A - B; check = (B * B - kn) % A; if (check.isNegative()) check += A; if (!check.isZero()) continue; } } if (B > half_A) B = B - A; bool B_fits_64 = (B.isNegative() ? (-B).bitLength() : B.bitLength()) <= 63; for (int i = 1; i < fb_count; i++) { uint64_t p = fb[i]; if (p == 2 || in_A_flag[i] || Ainv_cache[i] == 0) continue; uint64_t Bmod; if (B_fits_64) { int64_t B64 = B.isNegative() ? -static_cast((-B).toUInt64()) : static_cast(B.toUInt64()); Bmod = static_cast(((B64 % static_cast(p)) + static_cast(p)) % static_cast(p)); } else { Int Bm = B % Int(p); if (Bm.isNegative()) Bm += Int(p); Bmod = Bm.toUInt64(); } uint64_t r = fb_roots[i]; uint64_t diff1 = (r + p - Bmod) % p; uint64_t diff2 = (p - r + p - Bmod) % p; uint64_t pos1 = (Ainv_cache[i] * diff1) % p; uint64_t pos2 = (Ainv_cache[i] * diff2) % p; uint64_t Mmod = static_cast(M) % p; sieve_pos1[i] = static_cast((pos1 + Mmod) % p); sieve_pos2[i] = static_cast((pos2 + Mmod) % p); } } else { int changed = ctz64(static_cast(bi)); int comp_idx = changed + 1; uint64_t gray = static_cast(bi) ^ (static_cast(bi) >> 1); bool now_negative = (gray >> changed) & 1; if (now_negative) B -= B_comp[comp_idx] * Int(2); else B += B_comp[comp_idx] * Int(2); for (int i = 1; i < fb_count; i++) { uint64_t p = fb[i]; if (p == 2 || in_A_flag[i] || Ainv_cache[i] == 0) continue; uint64_t d = delta_cache[comp_idx][i]; if (now_negative) { sieve_pos1[i] = static_cast((static_cast(sieve_pos1[i]) + d) % p); sieve_pos2[i] = static_cast((static_cast(sieve_pos2[i]) + d) % p); } else { sieve_pos1[i] = static_cast((static_cast(sieve_pos1[i]) + p - d) % p); sieve_pos2[i] = static_cast((static_cast(sieve_pos2[i]) + p - d) % p); } } } // ブロック篩の実行 constexpr int SEQ_BLOCK = 8192; for (int blk_start = 0; blk_start < sieve_len && static_cast(rels.size()) < target; blk_start += SEQ_BLOCK) { int blk_end = std::min(blk_start + SEQ_BLOCK, sieve_len); int blk_len = blk_end - blk_start; std::fill(sieve_buf.begin(), sieve_buf.begin() + blk_len, 0.0f); for (int i = 1; i < fb_count; i++) { uint64_t p = fb[i]; if (p == 2 || in_A_flag[i] || Ainv_cache[i] == 0) continue; float lp = fb_logp[i]; int64_t j1 = sieve_pos1[i]; if (j1 < blk_start) { int64_t skip = (blk_start - j1 + static_cast(p) - 1) / static_cast(p); j1 += skip * static_cast(p); } for (int64_t j = j1; j < blk_end; j += static_cast(p)) sieve_buf[j - blk_start] += lp; if (sieve_pos1[i] != sieve_pos2[i]) { int64_t j2 = sieve_pos2[i]; if (j2 < blk_start) { int64_t skip = (blk_start - j2 + static_cast(p) - 1) / static_cast(p); j2 += skip * static_cast(p); } for (int64_t j = j2; j < blk_end; j += static_cast(p)) sieve_buf[j - blk_start] += lp; } } // smooth 候補の検出 (ブロック内) for (int idx_local = 0; idx_local < blk_len && static_cast(rels.size()) < target; ++idx_local) { if (sieve_buf[idx_local] < threshold) continue; int idx = blk_start + idx_local; int x = idx - M; Int g = A * Int(x) + B; Int Q_val = g * g - kn; if (Q_val.isZero()) { Int factor = IntGCD::gcd(g, n); if (factor > Int(1) && factor < n) return factor; continue; } bool neg = Q_val.isNegative(); Int absQ = neg ? -Q_val : Q_val; Int QdivA = absQ / A; std::vector exps(fb_count, 0); for (int ai : a_indices) { for (int i = 0; i < fb_count; i++) { if (fb[i] == fb[ai]) { exps[i]++; break; } } } bool smooth = false; uint64_t rem64_seq = 0; if (QdivA.bitLength() <= 64) { rem64_seq = QdivA.toUInt64(); for (int i = 0; i < fb_count && rem64_seq > 1; i++) { if (in_A_flag[i]) continue; uint64_t p = fb[i]; if (rem64_seq < p * p) break; #ifdef _MSC_VER uint64_t q = __umulh(rem64_seq, fb_inv[i]); #else uint64_t q = static_cast( (static_cast(rem64_seq) * fb_inv[i]) >> 64); #endif uint64_t r = rem64_seq - q * p; while (r == 0) { exps[i]++; rem64_seq = q; #ifdef _MSC_VER q = __umulh(rem64_seq, fb_inv[i]); #else q = static_cast( (static_cast(rem64_seq) * fb_inv[i]) >> 64); #endif r = rem64_seq - q * p; } } smooth = (rem64_seq == 1); } else { Int rem = QdivA; for (int i = 0; i < fb_count; i++) { if (in_A_flag[i]) continue; Int p(fb[i]); while ((rem % p).isZero()) { rem /= p; exps[i]++; } } smooth = (rem == Int(1)); if (!smooth && rem.bitLength() <= 64) rem64_seq = rem.toUInt64(); } if (smooth) { rels.push_back({g, exps, neg}); } else if (rem64_seq > 1 && rem64_seq <= lp_bound) { partials.push_back({g, exps, neg, rem64_seq}); } } // smooth 候補ループ end } // ブロックループ end } } } // --- 5b. Large prime variation: partial relation の合成 --- // 同じ large prime を持つ 2 つの partial を合成して full relation に変換 // Q1/A = (FB部分) * L, Q2/A = (FB部分) * L → Q1*Q2/A² = (FB部分)² * L² // L² は完全平方なので GF(2) 上の指数は 0 { std::unordered_map lp_map; for (size_t i = 0; i < partials.size(); i++) { auto it = lp_map.find(partials[i].large_prime); if (it != lp_map.end()) { auto& p1 = partials[it->second]; auto& p2 = partials[i]; Relation combined; combined.g_val = (p1.g_val * p2.g_val) % kn; combined.exps.resize(fb_count, 0); for (int j = 0; j < fb_count; j++) combined.exps[j] = p1.exps[j] + p2.exps[j]; combined.neg_sign = p1.neg_sign ^ p2.neg_sign; rels.push_back(std::move(combined)); // 使用済みの partial を削除 (同じ LP で 3 つ以上ある場合に備える) lp_map.erase(it); } else { lp_map[partials[i].large_prime] = i; } } } if (static_cast(rels.size()) < fb_count + 1) { // 十分な関係が集まらなかった → MPQS にフォールバック return findFactor_MPQS(n); } // --- 6. GF(2) ガウス消去 --- int nrels = static_cast(rels.size()); int ncols = fb_count + 1; // +1 は符号列 (-1) std::vector> matrix(nrels, std::vector(ncols, 0)); for (int i = 0; i < nrels; i++) { for (int j = 0; j < fb_count; j++) matrix[i][j] = static_cast(rels[i].exps[j] & 1); matrix[i][fb_count] = rels[i].neg_sign ? 1 : 0; } auto null_vecs = gf2_linalg::findNullSpaceGF2(matrix, nrels, ncols); // --- 7. 因数の導出 --- for (auto& indices : null_vecs) { Int x_prod(1); std::vector total_exps(fb_count, 0); for (int idx : indices) { x_prod = (x_prod * rels[idx].g_val) % kn; for (int j = 0; j < fb_count; j++) total_exps[j] += rels[idx].exps[j]; } // y = Π p_j^{e_j/2} mod kn Int y(1); for (int j = 0; j < fb_count; j++) { int half = total_exps[j] / 2; if (half > 0) y = (y * IntModular::powerMod(Int(fb[j]), Int(half), kn)) % kn; } // gcd(x ± y, n) で非自明な因数を検出 Int diff = x_prod - y; if (diff.isNegative()) diff += kn; Int g = IntGCD::gcd(diff, n); if (g > Int(1) && g < n) return g; Int sum = (x_prod + y) % kn; g = IntGCD::gcd(sum, n); if (g > Int(1) && g < n) return g; } return n; } // ============================================================================ // SIQS 関係のファイル入出力 (大規模計算のチェックポイント用) // ============================================================================ namespace siqs_io { /// SIQS の full relation をテキスト形式で保存 /// フォーマット: SIQS_RELATIONS_V1 /// n /// fb_count /// fb ... /// nrels /// ... bool saveRelations(const std::string& path, const Int& n, const std::vector& fb, const std::vector& g_vals, const std::vector>& exps_list, const std::vector& neg_signs) { std::ofstream ofs(path); if (!ofs) return false; int fb_count = static_cast(fb.size()); int nrels = static_cast(g_vals.size()); ofs << "SIQS_RELATIONS_V1\n"; ofs << "n " << n.toString() << "\n"; ofs << "fb_count " << fb_count << "\n"; ofs << "fb"; for (auto p : fb) ofs << " " << p; ofs << "\n"; ofs << "nrels " << nrels << "\n"; for (int i = 0; i < nrels; i++) { ofs << g_vals[i].toString() << " " << (neg_signs[i] ? 1 : 0); for (int j = 0; j < fb_count; j++) ofs << " " << exps_list[i][j]; ofs << "\n"; } return ofs.good(); } /// SIQS の関係をテキスト形式から読み込み bool loadRelations(const std::string& path, Int& n_out, std::vector& fb_out, std::vector& g_vals, std::vector>& exps_list, std::vector& neg_signs) { std::ifstream ifs(path); if (!ifs) return false; std::string line; // ヘッダ検証 if (!std::getline(ifs, line) || line != "SIQS_RELATIONS_V1") return false; int fb_count = 0, nrels = 0; while (std::getline(ifs, line)) { if (line.substr(0, 2) == "n ") { n_out = Int(line.substr(2)); } else if (line.substr(0, 9) == "fb_count ") { fb_count = std::stoi(line.substr(9)); } else if (line.substr(0, 3) == "fb ") { std::istringstream iss(line.substr(3)); fb_out.clear(); fb_out.reserve(fb_count); uint64_t p; while (iss >> p) fb_out.push_back(p); } else if (line.substr(0, 6) == "nrels ") { nrels = std::stoi(line.substr(6)); break; } } g_vals.reserve(nrels); exps_list.reserve(nrels); neg_signs.reserve(nrels); for (int i = 0; i < nrels && std::getline(ifs, line); i++) { std::istringstream iss(line); std::string g_str; int neg; iss >> g_str >> neg; std::vector exps(fb_count); for (int j = 0; j < fb_count; j++) iss >> exps[j]; if (iss) { g_vals.push_back(Int(g_str)); exps_list.push_back(std::move(exps)); neg_signs.push_back(neg != 0); } } return true; } } // namespace siqs_io } // namespace calx