This documentation is automatically generated by online-judge-tools/verification-helper
#define PROBLEM "https://judge.yosupo.jp/problem/sqrt_mod"
#include "Misc/marco.hpp"
#include "Misc/debug.hpp"
const int INF=1e9;
const ll INFI=1e15;
//----------Author: Nguyen Ho Nam,UIT, Saigon-----------------
#include "Mod/mod_sqrt.hpp"
int main() {
int t; cin>>t;
while(t--){
ll a,b; cin>>a>>b;
cout<<mod_sqrt(a,b)<<'\n';
}
return 0;
}
#line 1 "test/yosupo/Math/mod_sqrt_yosupo.test.cpp"
#define PROBLEM "https://judge.yosupo.jp/problem/sqrt_mod"
#line 2 "Misc/marco.hpp"
// Judges with GCC >= 12 only needs Ofast
// #pragma GCC optimize("O3,no-stack-protector,fast-math,unroll-loops,tree-vectorize")
// MLE optimization
// #pragma GCC optimize("conserve-stack")
// Old judges
// #pragma GCC target("sse4.2,popcnt,lzcnt,abm,mmx,fma,bmi,bmi2")
// New judges. Test with assert(__builtin_cpu_supports("avx2"));
// #pragma GCC target("avx2,popcnt,lzcnt,abm,bmi,bmi2,fma,tune=native")
// Atcoder
// #pragma GCC target("avx2,popcnt,lzcnt,abm,bmi,bmi2,fma")
/*
#include <ext/pb_ds/assoc_container.hpp>
#include <ext/pb_ds/tree_policy.hpp>
using namespace __gnu_pbds;
typedef tree<int,null_type,less<int>,rb_tree_tag,tree_order_statistics_node_update> ods;
- insert(x),erase(x)
- find_by_order(k): return iterator to the k-th smallest element
- order_of_key(x): the number of elements that are strictly smaller
*/
#include<bits/stdc++.h>
using namespace std;
using ld = long double;
using ll = long long;
using u32 = unsigned int;
using u64 = unsigned long long;
using i128 = __int128;
using u128 = unsigned __int128;
using f128 = __float128;
#define pii pair<int,int>
#define pll pair<ll,ll>
#define all(x) (x).begin(),(x).end()
#define rall(x) (x).rbegin(),(x).rend()
#define ars(x) (x),(x+n)
#define TIME (1.0 * clock() / CLOCKS_PER_SEC)
#define For(i,a,b) for (int i=(a); i<(b); i++)
#define rep(i,a) For(i,0,a)
#define rev(i,a,b) for (int i=(a); i>(b); i--)
#define FOR(i,a,b) for (int i=(a); i<=(b); i++)
#define REP(i,a) FOR(i,1,a)
#define REV(i,a,b) for (int i=(a); i>=(b); i--)
#define pb push_back
#define eb emplace_back
#define mp make_pair
#define fi first
#define se second
#define FT ios_base::sync_with_stdio(false); cin.tie(nullptr);
mt19937 rng(chrono::steady_clock::now().time_since_epoch().count());
using vi=vector<int>;
using vll = vector<ll>;
template <class T>
using vc = vector<T>;
template <class T>
using vvc = vector<vc<T>>;
template <class T>
using vvvc = vector<vvc<T>>;
template <class T>
using vvvvc = vector<vvvc<T>>;
template <class T>
using vvvvvc = vector<vvvvc<T>>;
template <class T>
using pq = priority_queue<T>;
template <class T>
using pqg = priority_queue<T, vector<T>, greater<T>>;
#define vv(type, name, h, ...) \
vector<vector<type>> name(h, vector<type>(__VA_ARGS__))
#define vvv(type, name, h, w, ...) \
vector<vector<vector<type>>> name( \
h, vector<vector<type>>(w, vector<type>(__VA_ARGS__)))
#define vvvv(type, name, a, b, c, ...) \
vector<vector<vector<vector<type>>>> name( \
a, vector<vector<vector<type>>>( \
b, vector<vector<type>>(c, vector<type>(__VA_ARGS__))))
//template <class T>
//using ods =
// tree<T, null_type, less<T>, rb_tree_tag, tree_order_statistics_node_update>;
template <typename T> bool chkmin(T &x,T y){return x>y?x=y,1:0;}
template <typename T> bool chkmax(T &x,T y){return x<y?x=y,1:0;}
template<class T> using pq = priority_queue<T>;
template<class T> using pqg = priority_queue<T, vector<T>, greater<T>>;
#line 1 "Misc/debug.hpp"
void __print(int x) {cerr << x;}
void __print(long x) {cerr << x;}
void __print(long long x) {cerr << x;}
void __print(unsigned x) {cerr << x;}
void __print(unsigned long x) {cerr << x;}
void __print(unsigned long long x) {cerr << x;}
void __print(float x) {cerr << x;}
void __print(double x) {cerr << x;}
void __print(long double x) {cerr << x;}
void __print(char x) {cerr << '\'' << x << '\'';}
void __print(const char *x) {cerr << '\"' << x << '\"';}
void __print(const string &x) {cerr << '\"' << x << '\"';}
void __print(bool x) {cerr << (x ? "true" : "false");}
template<typename T, typename V>
void __print(const pair<T, V> &x) {cerr << '{'; __print(x.first); cerr << ", "; __print(x.second); cerr << '}';}
template<typename T>
void __print(const T &x) {int f = 0; cerr << '{'; for (auto &i: x) cerr << (f++ ? ", " : ""), __print(i); cerr << "}";}
template<>
void __print(const vector<bool> &x) {int f = 0; cerr << '{'; for (size_t i = 0; i < x.size(); ++i) cerr << (f++ ? ", " : ""), __print(x[i]); cerr << "}";}
void _print() {cerr << "]\n";}
template <typename T, typename... V>
void _print(T t, V... v) {__print(t); if (sizeof...(v)) cerr << ", "; _print(v...);}
void dbg_out() { cerr << endl; }
template<typename Head, typename... Tail> void dbg_out(Head H, Tail... T) { __print(H); if (sizeof...(T)) cerr << ", "; dbg_out(T...); }
#define dbg(...) cerr << "[" << #__VA_ARGS__ << "]:"; dbg_out(__VA_ARGS__);
#line 4 "test/yosupo/Math/mod_sqrt_yosupo.test.cpp"
const int INF=1e9;
const ll INFI=1e15;
//----------Author: Nguyen Ho Nam,UIT, Saigon-----------------
#line 2 "Modint/Barrett_reduction.hpp"
/*
@see https://nyaannyaan.github.io/library/modint/barrett-reduction.hpp
@see https://en.wikipedia.org/wiki/Barrett_reduction
*/
struct Barrett {
using u32 = unsigned int;
using i64 = long long;
using u64 = unsigned long long;
u32 m;
u64 im;
Barrett() : m(), im() {}
Barrett(int n) : m(n), im(u64(-1) / m + 1) {}
constexpr inline i64 quo(u64 n) {
u64 x = u64((__uint128_t(n) * im) >> 64);
u32 r = n - x * m;
return m <= r ? x - 1 : x;
}
constexpr inline i64 rem(u64 n) {
u64 x = u64((__uint128_t(n) * im) >> 64);
u32 r = n - x * m;
return m <= r ? r + m : r;
}
constexpr inline pair<i64, int> quorem(u64 n) {
u64 x = u64((__uint128_t(n) * im) >> 64);
u32 r = n - x * m;
if (m <= r) return {x - 1, r + m};
return {x, r};
}
constexpr inline i64 pow(u64 n, i64 p) {
u32 a = rem(n), r = m == 1 ? 0 : 1;
while (p) {
if (p & 1) r = rem(u64(r) * a);
a = rem(u64(a) * a);
p >>= 1;
}
return r;
}
constexpr inline u32 mul(u32 a, u32 b) {
return rem(u64(a) * b);
}
};
//u64 version:
struct Barrett_64 {
u128 mod, mh, ml;
explicit Barrett_64(u64 mod = 1) : mod(mod) {
u128 m = u128(-1) / mod;
if (m * mod + mod == u128(0)) ++m;
mh = m >> 64;
ml = m & u64(-1);
}
u64 umod() const { return mod; }
u64 rem(u128 x) {
u128 z = (x & u64(-1)) * ml;
z = (x & u64(-1)) * mh + (x >> 64) * ml + (z >> 64);
z = (x >> 64) * mh + (z >> 64);
x -= z * mod;
return x < mod ? x : x - mod;
}
u64 mul(u64 a, u64 b) { return rem(u128(a) * b); }
};
#line 3 "Modint/dynamic_modint.hpp"
template <int id>
struct dynamic_modint {
int x;
dynamic_modint() : x(0) {}
dynamic_modint(int64_t y) {
int z = y % get_mod();
if (z < 0) z += get_mod();
x = z;
}
dynamic_modint &operator+=(const dynamic_modint &p) {
if ((x += p.x) >= get_mod()) x -= get_mod();
return *this;
}
dynamic_modint &operator-=(const dynamic_modint &p) {
if ((x += get_mod() - p.x) >= get_mod()) x -= get_mod();
return *this;
}
dynamic_modint &operator*=(const dynamic_modint &p) {
x = rem((unsigned long long)x * p.x);
return *this;
}
dynamic_modint &operator/=(const dynamic_modint &p) {
*this *= p.inv();
return *this;
}
dynamic_modint operator-() const { return dynamic_modint(-x); }
dynamic_modint operator+() const { return *this; }
dynamic_modint operator+(const dynamic_modint &p) const {
return dynamic_modint(*this) += p;
}
dynamic_modint operator-(const dynamic_modint &p) const {
return dynamic_modint(*this) -= p;
}
dynamic_modint operator*(const dynamic_modint &p) const {
return dynamic_modint(*this) *= p;
}
dynamic_modint operator/(const dynamic_modint &p) const {
return dynamic_modint(*this) /= p;
}
bool operator==(const dynamic_modint &p) const { return x == p.x; }
bool operator!=(const dynamic_modint &p) const { return x != p.x; }
dynamic_modint inv() const {
int a = x, b = get_mod(), u = 1, v = 0, t;
while (b > 0) {
t = a / b;
swap(a -= t * b, b);
swap(u -= t * v, v);
}
return dynamic_modint(u);
}
dynamic_modint pow(int64_t n) const {
dynamic_modint ret(1), mul(x);
while (n > 0) {
if (n & 1) ret *= mul;
mul *= mul;
n >>= 1;
}
return ret;
}
friend ostream &operator<<(ostream &os, const dynamic_modint &p) {
return os << p.x;
}
friend istream &operator>>(istream &is, dynamic_modint &a) {
int64_t t;
is >> t;
a = dynamic_modint(t);
return (is);
}
int get() const { return x; }
inline unsigned int rem(unsigned long long p) { return barrett().rem(p); }
static inline Barrett &barrett() {
static Barrett b;
return b;
}
static inline int &get_mod() {
static int mod = 0;
return mod;
}
static void set_mod(int md) {
assert(0 < md && md <= (1LL << 30) - 1);
get_mod() = md;
barrett() = Barrett(md);
}
};
using modint = dynamic_modint<-1>;
#line 3 "Mod/mod_pow.hpp"
u32 mod_pow(int a, ll n, int mod) {
assert(n >= 0);
a = ((a %= mod) < 0 ? a + mod : a);
if ((mod & 1) && (mod < (1 << 30))) {
using mint = dynamic_modint<202311021>;
mint::set_mod(mod);
return mint(a).pow(n).get();
}
Barrett bt(mod);
int r = 1;
while (n) {
if (n & 1) r = bt.mul(r, a);
a = bt.mul(a, a), n >>= 1;
}
return r;
}
u64 mod_pow_64(ll a, ll n, u64 mod) {
assert(n >= 0);
a = ((a %= mod) < 0 ? a + mod : a);
if ((mod & 1) && (mod < (u64(1) << 62))) {
using mint =dynamic_modint<202311021>;
mint::set_mod(mod);
return mint(a).pow(n).get();
}
Barrett_64 bt(mod);
ll r = 1;
while (n) {
if (n & 1) r = bt.mul(r, a);
a = bt.mul(a, a), n >>= 1;
}
return r;
}
#line 3 "Mod/mod_sqrt.hpp"
template <typename T>
T mod_sqrt(const T &a, const T &p) {
if (a == 0) return 0;
if (p == 2) return a;
if (mod_pow_64(a, (p - 1) >> 1, p) != 1) return -1;
T b = 1;
while (mod_pow_64(b, (p - 1) >> 1, p) == 1) ++b;
T e = 0, m = p - 1;
while (m % 2 == 0) m >>= 1, ++e;
T x = mod_pow_64(a, (m - 1) >> 1, p);
T y = a * (x * x % p) % p;
(x *= a) %= p;
T z = mod_pow_64(b, m, p);
while (y != 1) {
T j = 0, t = y;
while (t != 1) {
j += 1;
(t *= t) %= p;
}
z = mod_pow_64(z, T(1) << (e - j - 1), p);
(x *= z) %= p;
(z *= z) %= p;
(y *= z) %= p;
e = j;
}
return x;
}
#line 8 "test/yosupo/Math/mod_sqrt_yosupo.test.cpp"
int main() {
int t; cin>>t;
while(t--){
ll a,b; cin>>a>>b;
cout<<mod_sqrt(a,b)<<'\n';
}
return 0;
}