π§© AtCoder Beginner Contest 396
π§© AtCoder Beginner Contest 396
# Info & Summary
- Date:
2025-03-08
- Completion Status: A β / B β / C β / D β / E β / F β / G β
- Problem Type:
Problem | Type(s) | Data Structure / Algo | π₯ Key Insight / Mark |
---|---|---|---|
D | XOR operation | DFS | β |
E | XOR operation | BFS/DFS | π₯π§ |
F | Inversion number | Fenwick Tree | π₯π οΈπ |
G | Meet-in-the-Middle | Meet-in-the-Middle/Bitmask | π₯π οΈπ |
Details
Emoji | Label | Meaning |
---|---|---|
β | Important | Problem is strategically useful or has reusable patterns |
π₯ | Must master | Classic problem or technique that appears often and is worth mastering |
β οΈ | Reattempt | You attempted but failed; worth reviewing and solving again |
π§ | New to me | Introduced a new idea, technique, or pattern you hadnβt seen before |
π οΈ | Template | You built or improved a code template from solving this problem |
π | Too hard now | Beyond current ability to understand/solve; revisit after more practice |
π A - Triple Four

Two pointer
version
// Two pointer version
#include <bits/stdc++.h>
using namespace std;
int main()
{
ios::sync_with_stdio(false);
cin.tie(0);
int n;
cin >> n;
vector<int> a(n);
for (int i = 0; i < n; i++)
{
cin >> a[i];
}
for (int i = 0, j = 0; i < n; i = j)
{
while (j < n && a[i] == a[j])
{
j++;
}
if (j - i >= 3)
{
cout << "Yes" << endl;
return 0;
}
}
cout << "No" << endl;
return 0;
}
// Better version
#include <bits/stdc++.h>
using namespace std;
int main()
{
ios::sync_with_stdio(false);
cin.tie(0);
int n;
cin >> n;
vector<int> A(n);
for (int i = 0; i < n; i++)
{
cin >> A[i];
}
for (int i = 0; i < n - 2; i++)
{
if (A[i] == A[i + 1] && A[i + 1] == A[i + 2])
{
cout << "Yes" << endl;
return 0;
}
}
cout << "No" << endl;
return 0;
}
π B - Card Pile

#include <bits/stdc++.h>
using namespace std;
int main()
{
ios::sync_with_stdio(false);
cin.tie(0);
int q;
cin >> q;
stack<int> s;
for (int i = 0; i < 100; i++)
{
s.push(0);
}
while (q--)
{
int c;
cin >> c;
if (c == 1)
{
int num;
cin >> num;
s.push(num);
}
else
{
cout << s.top() << endl;
s.pop();
}
}
return 0;
}
π C - Buy Balls

Main Idea:
- Sort each of
A
andB
in descending order - Then, take some leading elements of
A
and some leading elements ofB
Tips
Since can take any value within , it is sufficient to use the maximum value among , which can be done in time by precalculating the cumulative max of in time
#include <bits/stdc++.h>
using namespace std;
int main()
{
ios::sync_with_stdio(false);
cin.tie(0);
int n, m;
cin >> n >> m;
vector<long long> A(n), B(m);
for (int i = 0; i < n; i++)
{
cin >> A[i];
}
for (int i = 0; i < m; i++)
{
cin >> B[i];
}
sort(A.rbegin(), A.rend());
sort(B.rbegin(), B.rend());
vector<long long> prefix_A(n + 1, 0), prefix_B(m + 1, 0);
vector<long long> maxT(m + 1, 0);
for (int i = 0; i < n; i++)
{
prefix_A[i + 1] = prefix_A[i] + A[i];
}
for (int i = 0; i < m; i++)
{
prefix_B[i + 1] = prefix_B[i] + B[i];
maxT[i + 1] = max(maxT[i], prefix_B[i + 1]);
}
long long ans = 0;
for (int i = 0; i <= n; i++)
{
ans = max(ans, prefix_A[i] + maxT[min(i, m)]);
}
cout << ans << endl;
return 0;
}
Better version
// Better
#include <bits/stdc++.h>
using namespace std;
int main()
{
ios::sync_with_stdio(false);
cin.tie(0);
int n, m;
cin >> n >> m;
vector<long long> A(n), B(m);
for (int i = 0; i < n; i++)
{
cin >> A[i];
}
for (int i = 0; i < m; i++)
{
cin >> B[i];
}
sort(A.rbegin(), A.rend());
sort(B.rbegin(), B.rend());
vector<long long> prefix_A(n + 1, 0), prefix_B(m + 1, 0);
for (int i = 0; i < n; i++)
{
prefix_A[i + 1] = prefix_A[i] + A[i];
}
for (int i = 0; i < m; i++)
{
prefix_B[i + 1] = prefix_B[i] + B[i];
}
long long ans = 0;
long long res = -1e18;
for (int i = 0; i <= n; i++)
{
if (i <= m)
{
res = max(res, prefix_B[i]);
}
ans = max(ans, res + prefix_A[i]);
}
cout << ans << endl;
return 0;
}
Why better:
- This optimization avoids storing an extra
maxT[]
array by maintaining arunning maximum (res)
of the prefix sums ofB
during the loop. This reduces memory usage and simplifies the logic, while also being more cache-friendly and efficient, especially when working with large inputs.
π D - Minimum XOR Path

A simple path:
- Start from node
1
- Visits any number of other nodes without repeating
- Ends at any node(e.g., in this case, at node
N
)
The complete graph:
- In a complete graph(with ), every onde is connected to every other
- From node
1
, you can go to any of the otherN - 1
nodes, then any of the remainingN - 2
nodes, and so on - So, you are free too choose any permutation of the other nodes as a path, stopping at any node, so the number of possible simple paths from node
1
is:
This sum is , which is brute-force-able within time limit, especially for DFS
or backtracking
code
DFS for simple paths:
- Traverse all possible paths without revisiting nodes(i.e., maintaining a visited array)
- Keep track of the current
XOR
sum as you explore - At each node, if you reach
N
, record the currentXOR
sum as a candidate answer - Backtrack and try other branches
#include <bits/stdc++.h>
using namespace std;
int main()
{
ios::sync_with_stdio(false);
cin.tie(0);
int n, m;
cin >> n >> m;
// Builds the adjacency list
vector<vector<pair<int, long long>>> adj(n);
for (int i = 0; i < m; i++)
{
int u, v;
long long w;
cin >> u >> v >> w;
u--;
v--;
adj[u].push_back({v, w});
adj[v].push_back({u, w});
}
// vis[] tracks visited nodes to avoid cycles (ensures simple path)
// ans stores the minimum XOR found so far
vector<bool> vis(n, false);
long long ans = 1ll << 60;
// recursive lambda (C++20-compatible), allowing DFS to call itself
auto dfs = [&](auto &&self, int u, long long s) -> void
{
if (u == n - 1)
{
ans = min(ans, s);
return;
}
// Backtrack after exploring to allow other paths to reuse the node
vis[u] = true;
for (auto [v, w] : adj[u])
{
if (!vis[v])
{
self(self, v, s ^ w);
}
}
vis[u] = false;
};
dfs(dfs, 0, 0ll);
cout << ans << endl;
return 0;
}
π E - Min of Restricted Sum

First, construct the following graph from the integer sequences , , and :
- an undirected graph with vertices and edges, where there is an edge between vertex and labeled
- Then we assume that the graph is connected, and we may consider each connected component independently
Tips
Fix the value to be . For any edge, once the value for the vertex on one end is set, the other is uniquely determined. Thus, picking determines all the value of . If any inconsistency occurs at this point, the answer is -1
Tips
Now, notice that the condition is independent for each place in 's binary representations
For all , the -th bit of is 0
or 1
, and fixing to one of them determines the -th bit of the others. Since we want to minimize the sum of the elements, we may try both 0
and 1
and pick whichever that makes fewer standing -th bit(if the number of standing bits is the same, picking anything is fine)
This can be done with BFS
or DFS
, the complexity is
DFS
version
#include <bits/stdc++.h>
using namespace std;
int main()
{
ios::sync_with_stdio(false);
cin.tie(0);
int n, m;
cin >> n >> m;
vector<vector<array<int, 2>>> adj(n);
for (int i = 0; i < m; i++)
{
int x, y, z;
cin >> x >> y >> z;
x--;
y--;
adj[x].push_back({y, z});
adj[y].push_back({x, z});
}
vector<bool> vis(n, false);
vector<int> ans(n, 0);
vector<int> connected_component;
auto dfs = [&](auto &&self, int x, int p) -> void
{
vis[x] = true;
connected_component.push_back(x);
ans[x] = p;
for (auto [y, z] : adj[x])
{
if (!vis[y])
{
self(self, y, p^z);
}
else if (ans[y] != (p ^ z))
{
cout << -1 << endl;
exit(0);
}
}
};
for (int x = 0; x < n; x++)
{
if (vis[x])
{
continue;
}
connected_component.clear();
dfs(dfs, x, 0);
// the counts of set bits for each bit
vector<int> cnt(30, 0);
for (auto x : connected_component)
{
for (int k = 0; k < 30; k++)
{
if ((ans[x] >> k) & 1)
{
cnt[k]++;
}
}
}
// adjust the values in A to ensure that the number of 1 bits in A is as balanced as possible
// minimize the sum of the elements
int off = 0;
for (int j = 0; j < 30; j++)
{
if (cnt[j] > connected_component.size() - cnt[j])
{
off ^= 1 << j;
}
}
// assign the final values
for (auto x : connected_component)
{
ans[x] ^= off;
}
}
for (int i = 0; i < n; i++)
{
cout << ans[i] << " \n"[i == n - 1];
}
return 0;
}
Tips
- Traditional
BFS
with Queue:- In a traditional
BFS
, aqueue
is used to store the nodes to be processed. As we traverse the graph level by level, we visit each node once and process its neighbors - The standard purpose of the queue is to maintain the order of nodes to visit and ensure that we process each node in the correct BFS sequence
- In a traditional
BFS
with Additional Computation (UsingVector
):- In this problem, while performing
BFS
, we are not only visiting nodes but also performing additional calculations, specifically related toXOR
values - The
vector
q (instead of aqueue
) is used for two purposes:- To store the nodes that are currently being processed in the
BFS
- To update the
XOR
values for each node in the graph, which requires checking conditions and performing additional computations during the traversal
- To store the nodes that are currently being processed in the
- In this problem, while performing
BFS
version
#include <bits/stdc++.h>
using namespace std;
int main()
{
ios::sync_with_stdio(false);
cin.tie(0);
int n, m;
cin >> n >> m;
vector<vector<array<int, 2>>> adj(n);
for (int i = 0; i < m; i++)
{
int x, y, z;
cin >> x >> y >> z;
x--;
y--;
adj[x].push_back({y, z});
adj[y].push_back({x, z});
}
// `-1` means un-visited
vector<int> A(n, -1);
for (int x = 0; x < n; x++)
{
if (A[x] != -1)
{
continue;
}
// BFS
vector<int> q = {x};
// Cur node's binary representation
A[x] = 0;
// the counts of set bits for each bit
int cnt[30] = {};
for (int i = 0; i < q.size(); i++)
{
int u = q[i];
for (int j = 0; j < 30; j++)
{
cnt[j] += (A[u] >> j) & 1;
}
for (auto [v, z] : adj[u])
{
// if un-visited
if (A[v] == -1)
{
A[v] = A[u] ^ z;
q.push_back(v);
}
// If any inconsistency occurs
else if (A[v] != (A[u] ^ z))
{
cout << -1 << endl;
return 0;
}
}
}
// adjust the values in A to ensure that the number of 1 bits in A is as balanced as possible
// minimize the sum of the elements
int off = 0;
for (int j = 0; j < 30; j++)
{
// If more than half the nodes have 1 at a particular bit position
// we flip that bit
if (cnt[j] > q.size() - cnt[j])
{
off ^= 1 << j;
}
}
// assign the final values
for (auto x : q)
{
A[x] ^= off;
}
}
for (int i = 0; i < n; i++)
{
cout << A[i] << " \n"[i == n - 1];
}
return 0;
}
π F - Rotated Inversions

Warning
Hard Problems to Tackle Later
Tips
For , coincides with : in this case, the inversion number can be found in time, for example by using Fenwick Tree
Now, we will consider how to find the delta of the inversion numbers for and for each :
- Let denote the sequence for , and denote that for
Then, the ordering of a pair of elements in and changes only if the pair contains with , which holds if and only if , which means belongs to
Details
Let be the set of with , then 's inversion number can be represented as:
where be 1
if is true
and 0
if is false
Thus, the delta of inversion numbers of and can be written as , where is the inversion number of an integer sequence defined as , and that for
where , where
#include <bits/stdc++.h>
using namespace std;
template <typename T>
struct Fenwick
{
int n;
vector<T> a;
Fenwick(int n_ = 0)
{
init(n_);
}
void init(int n_)
{
n = n_;
a.assign(n, T{});
}
void add(int x, const T &v)
{
for (int i = x + 1; i <= n; i += i & -i)
{
a[i - 1] = a[i - 1] + v;
}
}
T sum(int x)
{
T ans{};
for (int i = x; i > 0; i -= i & -i)
{
ans = ans + a[i - 1];
}
return ans;
}
T rangeSum(int l, int r)
{
return sum(r) - sum(l);
}
int select(const T &k)
{
int x = 0;
T cur{};
for (int i = 1 << __lg(n); i; i /= 2)
{
if (x + i <= n && cur + a[x + i - 1] <= k)
{
x += i;
cur = cur + a[x - 1];
}
}
return x;
}
};
int main()
{
ios::sync_with_stdio(false);
cin.tie(nullptr);
int n, m;
cin >> n >> m;
vector<int> A(n);
for (int i = 0; i < n; i++)
{
cin >> A[i];
}
long long ans = 0;
Fenwick<int> fen(m);
for (int i = n - 1; i >= 0; i--)
{
ans += fen.sum(A[i]);
fen.add(A[i], 1);
}
vector<long long> sum(m);
for (int i = 0; i < n; i++)
{
sum[A[i]] += i - (n - 1 - i);
}
for (int i = m - 1; i >= 0; i--)
{
cout << ans << endl;
ans += sum[i];
}
return 0;
}
π G - Filp Row or Col

Warning
Hard Problems to Tackle Later
Tips
When columns to perform operations are fixed, we can independently minimize the sum of the numbers written on each row
If a row contains zeros and ones,
- it is optimal to do nothing on it if
- perform an operation on it if
This way, the row will have ones
Regard the binary string in each row as a binary integer
Representing by the fixed set of operations against columns, we find that:
Details
popcount: the number of ones in the binary representation, and denotes bitwise XOR
Since can take between and , the sought value is:
Consider evaluating for each . This can be done by counting the number of indices with for each
We use bit DP
to find it
Define dp[mask][j][c]
(, ):
- the number of indices such that is one of , and and differ exactly at bits
The transitions are as follows:
- dp[mask][j + 1][c] += dp[mask][j][c]
- dp[mask][j + 1][c + 1] += dp[mask \oplus 2^j][j][c]
Then:
The time complexity is
Details
#include <bits/stdc++.h>
using namespace std;
int main()
{
ios::sync_with_stdio(false);
cin.tie(0);
int H, W;
cin >> H >> W;
// dp[j][mask] will store the count of configurations where exactly j bits are flipped
// and the bit pattern is represented by 'mask'
vector<vector<long long>> dp(W + 1, vector<long long>(1 << W, 0));
// Read the rows and initialize the dp for the first row
for (int i = 0; i < H; i++)
{
string s;
cin >> s;
// convert `string` to `int`
int bit = 0;
for (int j = 0; j < W; j++)
{
bit |= (s[j] - '0') << j;
}
dp[0][bit]++;
}
// Fill dp using dynamic programming
for (int i = 0; i < W; i++)
{
for (int j = i; j >= 0; j--)
{
for (int mask = 0; mask < (1 << W); mask++)
{
dp[j + 1][mask ^ (1 << i)] += dp[j][mask];
}
}
}
// Find the minimum possible result
long long ans = H * W;
for (int mask = 0; mask < (1 << W); mask++)
{
long long sum = 0;
for (int i = 0; i <= W; i++)
{
sum += min(i, W - i) * dp[i][mask];
}
ans = min(ans, sum);
}
cout << ans << endl;
return 0;
}
// Better version
#include <bits/stdc++.h>
using namespace std;
int main()
{
ios::sync_with_stdio(false);
cin.tie(0);
int H, W;
cin >> H >> W;
// dp[mask][j] will store the count of configurations where exactly j bits are flipped
// and the bit pattern is represented by 'mask'
vector<vector<int>> dp(1 << W, vector<int>(19, 0));
for (int i = 0; i < H; i++)
{
string s;
cin >> s;
// convert `string` to `int`
int mask = 0;
for (int j = 0; j < W; j++)
{
mask |= (s[j] - '0') << j;
}
dp[mask][0]++;
}
for (int k = 0; k < W; k++)
{
for (int mask = 0; mask < (1 << W); mask++)
{
if ((mask >> k) & 1)
{
int j = mask ^ (1 << k);
for (int v = W; v >= 0; v--)
{
dp[mask][v + 1] += dp[j][v];
dp[j][v + 1] += dp[mask][v];
}
}
}
}
int ans = H * W;
for (int mask = 0; mask < (1 << W); mask++)
{
int sum = 0;
for (int c = 0; c <= W; c++)
{
sum += dp[mask][c] * min(c, W - c);
}
ans = min(ans, sum);
}
cout << ans << endl;
return 0;
}
I don't fully understand the DP transition
code below
for (int i = 1; i < (1 << W); i *= 2)
{
// computes the integer logarithm (base 2) of an integer i
// It returns the index of the highest set bit (most significant bit) of i
int p = __lg(i);
for (int j = 0; j < (1 << W); j += 2 * i)
{
for (int k = 0; k < i; k++)
{
int u = j + k, v = i + j + k;
for (int l = p; l >= 0; l--)
{
dp[v][l + 1] += dp[u][l];
dp[u][l + 1] += dp[v][l];
}
}
}
}