Censoring

Censoring

给定两个字符串SSTT,从SS从反复删除TT子串(删除后可能有形成新的TT子串),求最后的字符串。

办法就是从前往后扫描并复制SS中的字符到结果字符串,动态维护结果字符串的前缀hash数组,然后碰到刚复制完成的len(T)\texttt{len}(T)个字符串是TT (用哈希方法O(1)\mathcal{O}(1)检测),就删除这个子串。这个可以完成,依赖的就是删除后缀子串后,前缀哈希的数组h[]\texttt{h}[]依然是合法的。

复杂度为O(n)\mathcal{O}(n)

// http://www.usaco.org/index.php?page=viewproblem2&cpid=529
 
#include <cstdio>
#include <algorithm>
#include <functional>
#include <vector>
#include <cstring>
using namespace std;
typedef long long ll;
typedef pair<int,int> pi;
 
int n, m;
const int MAXN = 1000005;
char T[MAXN], S[MAXN], ans[MAXN];
const int A = 96420, M=1e9+9;
int h[MAXN], p[MAXN];
 
int main() {
    freopen("censor.in", "r", stdin);
    freopen("censor.out", "w", stdout);
    scanf("%s %s", S, T);
    n = strlen(S);
    m = strlen(T);
 
    p[0] = 1;
    for (int i = 1; i < n; i++)
        p[i] = (ll)p[i-1] * A % M;
    
    int H = T[0];
    for (int i = 1; i < m; i++)
        H = ((ll)H*A + T[i]) % M;
 
    int j = 0;
    for (int i = 0; i < n; i++) {
        ans[j] = S[i];
        if (j == 0) h[j] = S[i];
        else h[j] = ((ll)h[j-1]*A + S[i]) % M;
        int a = j-m+1;
        if (a >= 0) {   // hash of ans[a, j]
            int hsh = a == 0 ? h[j] : (h[j] - (ll)h[a-1] * p[j-a+1] + (ll)M * p[j-a+1]) % M;
            if (hsh == H) {     // found a match
                j = a;
                continue;
            }
        }
        j++;
    }
    ans[j] = 0;
    printf("%s", ans);
    return 0;
}