The official solution divides the query to blocks on size sqrt, but there is an intuitive online solution. Consider an array `A[]`, where initially A[v] = `-T[v]`. When `v` goes on a vacation, subtract `n` from `A[v]` and add `1` to `A[u]` for all ancestor `u` of `v`. When `v` comes back from a vacation, add `n` to `A[v]` and subtract `1` from `A[u]` for all ancestor `u` of `v`. Now, each query is just asking for count of `v` such that `A[v] >= 0`. This can be maintained easily by using heavy-light-decomposition to transform "ancestor add" to "range add" queries, and the range add queries can be done with sqrt-decomposition.
Time complexity: O(n^1.5 lgn)
#pragma GCC optimize("O3,unroll-loops")
#include <stdio.h>
#define N 100000
#define B 254
#define NB (N / B + 2)
#define A 300000
int n, m, fa[N], ii, hd[N], e[N], Ln[N], sz[N], hld[N], dfn[N], dfc
, bel[N], tag[NB], val[N], ans, ll[NB], rr[NB];
unsigned char f[NB][A];
void dfs1(int u) {
sz[u] = 1;
for (int j = hd[u]; j; j = Ln[j]) {
dfs1(e[j]);
sz[u] += sz[e[j]];
if (sz[e[j]] > sz[e[hd[u]]])
e[j] ^= e[hd[u]], e[hd[u]] ^= e[j], e[j] ^= e[hd[u]];
}
}
void dfs2(int u) {
dfn[u] = dfc++;
for (int j = hd[u]; j; j = Ln[j]) {
hld[e[j]] = j == hd[u] ? hld[u] : e[j];
dfs2(e[j]);
}
}
void range_addone(int l, int r) {
register int bl = bel[l], br = bel[r];
for (int j = l; j <= r && j <= rr[bl]; ++j) {
--f[bl][val[j]];
ans += val[j] + tag[bl] == 2 * n - 1;
++val[j];
++f[bl][val[j]];
}
if ( bl ^ br ) {
for (int j = ll[br]; j <= r; ++j) {
--f[br][val[j]];
ans += val[j] + tag[br] == 2 * n - 1;
++val[j];
++f[br][val[j]];
}
for (int b = bl + 1; b ^ br; ++b) {
++tag[b];
ans += f[b][2 * n - tag[b]];
}
}
}
void range_subone(int l, int r) {
register int bl = bel[l], br = bel[r];
for (int j = l; j <= r && j <= rr[bl]; ++j) {
ans -= val[j] + tag[bl] == 2 * n;
--f[bl][val[j]];
--val[j];
++f[bl][val[j]];
}
if ( bl ^ br ) {
for (int j = ll[br]; j <= r; ++j) {
ans -= val[j] + tag[br] == 2 * n;
--f[br][val[j]];
--val[j];
++f[br][val[j]];
}
for (int b = bl + 1; b ^ br; ++b) {
ans -= f[b][2 * n - tag[b]];
--tag[b];
}
}
}
void point_add(int l, int k) {
ans -= val[l] + tag[bel[l]] >= 2 * n;
--f[bel[l]][val[l]];
val[l] += k;
++f[bel[l]][val[l]];
ans += val[l] + tag[bel[l]] >= 2 * n;
}
void Vacation(int v) {
point_add(dfn[v], - n);
for (; hld[v]; v = fa[hld[v]])
range_addone(dfn[hld[v]], dfn[v]);
range_addone(0, dfn[v]);
}
void Comeback(int v) {
point_add(dfn[v], n);
for (; hld[v]; v = fa[hld[v]])
range_subone(dfn[hld[v]], dfn[v]);
range_subone(0, dfn[v]);
}
int main() {
scanf("%d%d", &n, &m);
for (int i = 1; i < n; ++i) {
scanf("%d", &fa[i]);
--fa[i];
int j = ++ii;
Ln[j] = hd[fa[i]];
e[j] = i;
hd[fa[i]] = j;
}
dfs1(0);
hld[0] = 0;
dfs2(0);
for (int i = 0; i < n; ++i)
bel[i] = i / B;
for (int i = 0; i < n; ++i)
rr[bel[i]] = i;
for (int i = n - 1; i >= 0; --i)
ll[bel[i]] = i;
for (int i = 0; i < n; ++i) {
scanf("%d", &val[dfn[i]]);
val[dfn[i]] = 2 * n - val[dfn[i]] - 1;
++f[bel[dfn[i]]][val[dfn[i]]];
}
for (int i = 0, x; i < m; ++i) {
scanf("%d", &x);
if (x > 0)
Vacation(x - 1);
else
Comeback(-x - 1);
printf("%d ", ans);
}
return 0;
}