antaの競技プログラミング練習日記

SRM 283 DIV1 Hard SuspiciousStrings

SRM 行列文字列

問題 Editorial

問題

文字列の集合dictionaryが与えられる。
長さnの文字列のうち、dictionaryに含まれる単語を部分文字列として含むものの数をmod 10000で求めよ

1 <= |dictionary| <= 10
1 <= |dictionary[i]| <= 10
1 <= n <= 2^31-1

解答

行列累乗。
単語へのマッチング状態を状態として持って、次の文字で行く所に+1する。
「既に単語が含まれた」という状態を別個に持って、そこは常に*26するようにする。
その行列を^nしてやれば「まだ何にもマッチしていない状態」から「既に単語が含まれた」へのものが答えになる。
この単語のマッチング状態は(i番目の単語の * j文字目までマッチしている)というふうにすればO(Σ|dictionary[i]|)の状態数でできる。
この状態圧縮テクニックは他に東京大学プログラミングコンテスト2010 I 盗まれた宝石解説にも出てくるものだ

コメント

状態とその遷移の生成は手抜きしたのだけれど、これはパターンマッチングオートマトンをAho-Corasickっていうので作ってその状態を状態にしたらいいのかな？文字数多い時とかに使えるのかな？

コード

色々手抜きしてるのでわりとギリギリ。
行列累乗はMOD削減しないと通らなそう。

#include <string>
#include <vector>
#include <algorithm>
#include <cstring>
#include <cassert>
#define rep(i,n) for(int (i)=0;(i)<(int)(n);++(i))
#define rer(i,l,u) for(int (i)=(int)(l);(i)<=(int)(u);++(i))
using namespace std;
typedef long long ll;
typedef ll Num;

struct Matrix {
	vector<vector<Num> > v, w;
	Matrix() {}
	Matrix(int n, int m): v(n, vector<Num>(m)) { assert(n > 0 && m > 0); }
	inline int height() const { return (int)v.size(); }
	inline int width() const { return (int)v[0].size(); }
	inline Num& at(int i, int j) { assert(0 <= i && i < height() && 0 <= j && j < width()); return v[i][j]; }
	inline const Num& at(int i, int j) const { assert(0 <= i && i < height() && 0 <= j && j < width()); return v[i][j]; }
	static Matrix identity(int n) {
		Matrix A(n, n);
		rep(i, n) A.at(i, i) = 1;
		return A;
	}
	inline static Matrix identity(const Matrix& A) {
		assert(A.height() == A.width()); return identity(A.height()); }
	Matrix& operator*=(const Matrix& B) {
		int n = height(), m = B.width(), p = B.height();
		assert(p == width());
		w.resize(n, vector<Num>(m, 0));
		rep(i, n) rep(j, m) {
			Num x = 0;
			rep(k, p)
				x += at(i, k) * B.at(k, j);
			x %= 10000;
			w[i][j] = x;
		}
		v.swap(w);
		return *this;
	}
};

template<typename Mat>
Mat operator^(const Mat& t, ll k) {
	Mat A = t, B = Mat::identity(t);
	while(k) {
		if(k & 1) {
			B *= A;
		}
		A *= A;
		k >>= 1;
	}
	return B;
}

struct SuspiciousStrings {
	int getAmount(vector <string> dictionary, int n) {
		for(int i = 0; i < dictionary.size(); i ++) {
			if(dictionary[i].size() > n) dictionary.erase(dictionary.begin()+i), i --;
		}
		int m = dictionary.size();
		if(m == 0) return 0;
		Matrix A(m * 10 + 1, m * 10 + 1);
		rep(i, m) rep(j, dictionary[i].size()) {
			string prefix = dictionary[i].substr(0, j);
			int next[26] = {0};	//0は空の代表.
			rep(k, m) if(dictionary[k].size() > j && dictionary[k].substr(0, j) == prefix) {
				int c = dictionary[k][j] - 'a';

				if(j + 1 == dictionary[k].size())	//パターン完成は何を置いても優先させる
					next[c] = m * 10;
				else if(next[c] == 0)	//それ以外は同じなので先着順
					next[c] = k * 10 + (j + 1);
			}
			//失敗したところに途中から入るやつ
			//なるべく多くの文字を使うパターンに入るためにlは0からループさせる
			rep(c, 26) rer(l, 0, prefix.size()) rep(k, m)
			if(prefix.substr(l) + (char)(c+'a') == dictionary[k].substr(0, prefix.size() - l + 1)) {
				if(dictionary[k].size() == prefix.size() - l + 1)
					next[c] = m * 10;
				else if(next[c] == 0)
					next[c] = k * 10 + (prefix.size() - l + 1);
			}

			rep(c, 26)
				A.at(i * 10 + j, next[c]) ++;
		}
		A.at(m * 10, m * 10) = 26;
		A = A ^ n;
		return A.at(0, m * 10);
	}
};