Osmosmjerka (找字游戏)

Osmosmjerka (找字游戏)

此题比较综合,有一定难度。

从任一点出发,最多LCM(M,N)\texttt{LCM}(M,N)个字符之后,就会出现重复的pattern,因此我们只要比较LCM长度的串, 就可以找到所有唯一字符串了。对于任何一个串来说,被连续挑中两次概率是 “此串出现次数/总次数”的平方。所以对所有串来说,出现重复发生的概率就是所有unique串的“出现次数/总次数”的平方和。

要快速比较两个LCM长的字符串是否相等,需要使用string hashing。更进一步,如果每个hash用线性的方式来算,那也是有不少case TLE的, 因为LCM可能较大(25万),这时可以利用出发点个数有限的特点,按binary jumping方式来算hash,复用已经算过的值,这样就快了。

因为数据较多(5005008=2106500 \cdot 500 \cdot 8=2\cdot 10^6),所以32位的hash是不够的,会有birthday paradox collision。 办法是用两个32位hash,这个就相当于64位hash,就可以了。

最后的复杂度是O(n2logn)\mathcal{O}(n^2 \log n)

#include <bits/stdc++.h>
using namespace std;
typedef long long ll;
typedef pair<int,int> pi;
 
int m,n,K;  // m,n <= 500, K <= 1e9
char s[505][505];   // M rows, N cols
int dir[8][2] = {{0,1},{0,-1},{1,0},{-1,0},{1,1},{-1,-1},{1,-1},{-1,1}};
int hsh[2][23][505][505];  // hash value patterns for 23 log levels
int p[2][23];
map<pi,int> cnts;    // hash -> count
const int A[2] = {911382323, 69420}, M[2] = {972663749, (int)1e9+9};
 
ll gcd(ll a, ll b) {
    return b == 0 ? a : gcd(b, a%b);
}
 
int main() {
    scanf("%d %d %d", &m, &n, &K);
    for (int i = 0; i < m; i++)
        scanf("%s", s[i]);
    
    int lcm = m*n/gcd(m,n);
    if (K > lcm) K = lcm;
    for (int j = 0; j < 2; j++) {
        p[j][0] = A[j];
        for (int i = 1; i < 23; i++)
            p[j][i] = (ll)p[j][i-1] * p[j][i-1] % M[j];
    }
        
    for (int di = 0; di < 8; di++)  {   // 8 directions
        // calc hash values for 2^lg strings from each pos
        for (int lg = 0; lg < 23; lg++) {
            for (int r = 0; r < m; r++)
                for (int c = 0; c < n; c++) {
                    if (lg == 0)
                        hsh[0][di][r][c] = hsh[1][di][r][c] = s[r][c];
                    else {
                        int R = (r + (1 << (lg-1)) * (m + dir[di][0])) % m;
                        int C = (c + (1 << (lg-1)) * (n + dir[di][1])) % n;
                        for (int j = 0; j < 2; j++)
                            hsh[j][lg][r][c] = ((ll)hsh[j][lg-1][r][c] * p[j][lg-1] + hsh[j][lg-1][R][C]) % M[j];
                    }
                }
        }
        // calc hashes for K-len strings from each pos
        for (int r = 0; r < m; r++)
            for (int c = 0; c < n; c++) {
                int hs[2] = {0};
                int R = r, C = c;
                for (int j = 0; j < 23; j++) {      // binary jumping
                    if ((K & (1 << j)) == 0) continue;
                    for (int k = 0; k < 2; k++)
                        hs[k] = ((ll)hs[k] * p[k][j] + hsh[k][j][R][C]) % M[k];
                    R = (R + (ll)(1<<j)*(m+dir[di][0])) % m;
                    C = (C + (ll)(1<<j)*(n+dir[di][1])) % n;
                }
                cnts[{hs[0], hs[1]}]++;
            }
    }
    ll total = m*n*8;
    total = total*total;
    ll squared = 0;
    for (auto e: cnts) {
        int v = e.second;
        squared += (ll)v*v;
    }
    ll g = gcd(total, squared);
    printf("%lld/%lld", squared/g, total/g);
    return 0;
}