// Copyright (C) 2026 Kiyotsugu Arai // SPDX-License-Identifier: LGPL-3.0-or-later // blas.hpp // BLAS レベル 1/2/3 インターフェース // // BLAS (Basic Linear Algebra Subprograms) 風の API を提供する。 // 将来の MKL/OpenBLAS バックエンド切り替えの基盤。 // 現時点ではネイティブ実装 (calx の Vector/Matrix を直接操作)。 #ifndef CALX_BLAS_HPP #define CALX_BLAS_HPP #include

#include #include #include namespace calx {
namespace blas {

    // ====================================================================
    // Level 1: ベクトル-ベクトル演算
    // ====================================================================

    /**
     * @brief 内積: result = x^T * y
     */
    template T dot(const Vector & x, const Vector & y) {
        if (x.size() != y.size()) {
            throw DimensionError("blas::dot: size mismatch");
        }
        if constexpr (std::is_same_v || std::is_same_v) {
            return computation::simd::dot_product_simd (x.data(), y.data(), x.size());
        } else {
            T result = T{0};
            for (std::size_t i = 0; i < x.size(); ++i)
                result += x[i] * y[i];
            return result;
        }
    }

    /**
     * @brief 2-ノルム: result = ‖x‖₂
     */
    template T nrm2(const Vector & x) {
        if constexpr (std::is_same_v || std::is_same_v) {
            T sum = computation::simd::dot_product_simd (x.data(), x.data(), x.size());
            return std::sqrt(sum);
        } else {
            T sum = T{0};
            for (std::size_t i = 0; i < x.size(); ++i)
                sum += x[i] * x[i];
            return static_cast (std::sqrt(static_cast (sum)));
        }
    }

    /**
     * @brief 1-ノルム (絶対値の和): result = ‖x‖₁ = Σ|x_i|
     *
     * BLAS の dasum に相当。
     */
    template T asum(const Vector & x) {
        T sum = T{0};
        for (std::size_t i = 0; i < x.size(); ++i)
            sum += static_cast (std::abs(static_cast (x[i])));
        return sum;
    }

    /**
     * @brief 絶対値最大要素のインデックス: result = argmax_i |x_i|
     *
     * BLAS の idamax に相当。空ベクトルの場合は 0 を返す。
     */
    template std::size_t iamax(const Vector & x) {
        if (x.size() == 0) return 0;
        std::size_t idx = 0;
        double max_val = std::abs(static_cast (x[0]));
        for (std::size_t i = 1; i < x.size(); ++i) {
            double ai = std::abs(static_cast (x[i]));
            if (ai > max_val) { max_val = ai; idx = i; }
        }
        return idx;
    }

    /**
     * @brief スケーリング: x ← alpha * x
     */
    template void scal(T alpha, Vector & x) {
        if constexpr (std::is_same_v || std::is_same_v) {
            computation::simd::scale_simd (x.data(), alpha, x.size());
        } else {
            for (std::size_t i = 0; i < x.size(); ++i)
                x[i] *= alpha;
        }
    }

    /**
     * @brief AXPY: y ← alpha * x + y
     */
    template void axpy(T alpha, const Vector & x, Vector & y) {
        if (x.size() != y.size()) {
            throw DimensionError("blas::axpy: size mismatch");
        }
        if constexpr (std::is_same_v || std::is_same_v) {
            computation::simd::axpy_simd (y.data(), alpha, x.data(), x.size());
        } else {
            for (std::size_t i = 0; i < x.size(); ++i)
                y[i] += alpha * x[i];
        }
    }

    /**
     * @brief コピー: y ← x
     */
    template void copy(const Vector & x, Vector & y) {
        if (x.size() != y.size()) {
            throw DimensionError("blas::copy: size mismatch");
        }
        for (std::size_t i = 0; i < x.size(); ++i)
            y[i] = x[i];
    }

    /**
     * @brief スワップ: x ↔ y
     */
    template void swap(Vector & x, Vector & y) {
        if (x.size() != y.size()) {
            throw DimensionError("blas::swap: size mismatch");
        }
        for (std::size_t i = 0; i < x.size(); ++i)
            std::swap(x[i], y[i]);
    }

    // ====================================================================
    // Level 2: 行列-ベクトル演算
    // ====================================================================

    /**
     * @brief 一般行列-ベクトル積: y ← alpha * op(A) * x + beta * y
     *
     * @param trans false: op(A) = A, true: op(A) = A^T
     * @param alpha スカラー係数
     * @param A m×n 行列
     * @param x 入力ベクトル
     * @param beta スカラー係数
     * @param y 出力ベクトル (in-place 更新)
     */
    template void gemv(bool trans, T alpha, const Matrix & A,
              const Vector & x, T beta, Vector & y)
    {
        const auto m = A.rows();
        const auto n = A.cols();

        if (!trans) {
            // y ← alpha * A * x + beta * y
            if (x.size() != n || y.size() != m) {
                throw DimensionError("blas::gemv: dimension mismatch");
            }
            if constexpr (std::is_same_v || std::is_same_v) {
                for (std::size_t i = 0; i < m; ++i) {
                    T sum = computation::simd::dot_product_simd (&A(i, 0), x.data(), n);
                    y[i] = alpha * sum + beta * y[i];
                }
            } else {
                for (std::size_t i = 0; i < m; ++i) {
                    T sum = T{0};
                    for (std::size_t j = 0; j < n; ++j)
                        sum += A(i, j) * x[j];
                    y[i] = alpha * sum + beta * y[i];
                }
            }
        } else {
            // y ← alpha * A^T * x + beta * y
            if (x.size() != m || y.size() != n) {
                throw DimensionError("blas::gemv: dimension mismatch (transpose)");
            }
            if constexpr (std::is_same_v || std::is_same_v) {
                computation::simd::scale_simd