🧩 AtCoder Beginner Contest 396

TayLockOriginalMarch 8, 2025About 11 min

🧩 AtCoder Beginner Contest 396

# Info & Summary

Date: 2025-03-08
Completion Status: A ✅ / B ✅ / C ✅ / D ✅ / E ❌ / F ❌ / G ❌
Problem Type:

Problem	Type(s)	Data Structure / Algo	🔥 Key Insight / Mark
D	XOR operation	DFS	⭐
E	XOR operation	BFS/DFS	🔥🧠
F	Inversion number	Fenwick Tree	🔥🛠️🌀
G	Meet-in-the-Middle	Meet-in-the-Middle/Bitmask	🔥🛠️🌀

Details

`Emoji`	`Label`	`Meaning`
⭐	Important	Problem is strategically useful or has reusable patterns
🔥	Must master	Classic problem or technique that appears often and is worth mastering
⚠️	Reattempt	You attempted but failed; worth reviewing and solving again
🧠	New to me	Introduced a new idea, technique, or pattern you hadn’t seen before
🛠️	Template	You built or improved a code template from solving this problem
🌀	Too hard now	Beyond current ability to understand/solve; revisit after more practice

📌 A - Triple Four

Two pointer version

// Two pointer version
#include <bits/stdc++.h>

using namespace std;

int main()
{
    ios::sync_with_stdio(false);
    cin.tie(0);

    int n;
    cin >> n;

    vector<int> a(n);
    for (int i = 0; i < n; i++)
    {
        cin >> a[i];
    }

    for (int i = 0, j = 0; i < n; i = j)
    {
        while (j < n && a[i] == a[j])
        {
            j++;
        }

        if (j - i >= 3)
        {
            cout << "Yes" << endl;
            return 0;
        }
    }

    cout << "No" << endl;

    return 0;
}

// Better version

#include <bits/stdc++.h>

using namespace std;

int main()
{
    ios::sync_with_stdio(false);
    cin.tie(0);

    int n;
    cin >> n;
    vector<int> A(n);
    for (int i = 0; i < n; i++)
    {
        cin >> A[i];
    }
    for (int i = 0; i < n - 2; i++)
    {
        if (A[i] == A[i + 1] && A[i + 1] == A[i + 2])
        {
            cout << "Yes" << endl;
            return 0;
        }
    }

    cout << "No" << endl;

    return 0;
}

📌 B - Card Pile

#include <bits/stdc++.h>

using namespace std;

int main()
{
    ios::sync_with_stdio(false);
    cin.tie(0);

    int q;
    cin >> q;

    stack<int> s;
    for (int i = 0; i < 100; i++)
    {
        s.push(0);
    }

    while (q--)
    {
        int c;
        cin >> c;

        if (c == 1)
        {
            int num;
            cin >> num;

            s.push(num);
        }
        else
        {
            cout << s.top() << endl;
            s.pop();
        }
    }

    return 0;
}

Since $j$ can take any value within $0 \le j \le \min(i, M)$ , it is sufficient to use the maximum value among $T_0, T_1, ..., T_{\min(i, M)}$ , which can be done in $O(1)$ time by precalculating the cumulative max of $T$ in $O(M)$ time

#include <bits/stdc++.h>

using namespace std;

int main()
{
    ios::sync_with_stdio(false);
    cin.tie(0);

    int n, m;
    cin >> n >> m;

    vector<long long> A(n), B(m);

    for (int i = 0; i < n; i++)
    {
        cin >> A[i];
    }
    for (int i = 0; i < m; i++)
    {
        cin >> B[i];
    }

    sort(A.rbegin(), A.rend());
    sort(B.rbegin(), B.rend());

    vector<long long> prefix_A(n + 1, 0), prefix_B(m + 1, 0);
    vector<long long> maxT(m + 1, 0);
    for (int i = 0; i < n; i++)
    {
        prefix_A[i + 1] = prefix_A[i] + A[i];
    }
    for (int i = 0; i < m; i++)
    {
        prefix_B[i + 1] = prefix_B[i] + B[i];
        maxT[i + 1] = max(maxT[i], prefix_B[i + 1]);
    }

    long long ans = 0;
    for (int i = 0; i <= n; i++)
    {
        ans = max(ans, prefix_A[i] + maxT[min(i, m)]);
    }

    cout << ans << endl;

    return 0;
}

Better version

// Better

#include <bits/stdc++.h>

using namespace std;

int main()
{
    ios::sync_with_stdio(false);
    cin.tie(0);

    int n, m;
    cin >> n >> m;

    vector<long long> A(n), B(m);

    for (int i = 0; i < n; i++)
    {
        cin >> A[i];
    }
    for (int i = 0; i < m; i++)
    {
        cin >> B[i];
    }

    sort(A.rbegin(), A.rend());
    sort(B.rbegin(), B.rend());

    vector<long long> prefix_A(n + 1, 0), prefix_B(m + 1, 0);
    for (int i = 0; i < n; i++)
    {
        prefix_A[i + 1] = prefix_A[i] + A[i];
    }
    for (int i = 0; i < m; i++)
    {
        prefix_B[i + 1] = prefix_B[i] + B[i];
    }

    long long ans = 0;
    long long res = -1e18;
    for (int i = 0; i <= n; i++)
    {
        if (i <= m)
        {
            res = max(res, prefix_B[i]);
        }
        ans = max(ans, res + prefix_A[i]);
    }

    cout << ans << endl;

    return 0;
}

Why better:

This optimization avoids storing an extra maxT[] array by maintaining a running maximum (res) of the prefix sums of B during the loop. This reduces memory usage and simplifies the logic, while also being more cache-friendly and efficient, especially when working with large inputs.

📌 D - Minimum XOR Path

A simple path:

Start from node 1
Visits any number of other nodes without repeating
Ends at any node(e.g., in this case, at node N)

The complete graph:

In a complete graph(with $M = N (N - 1) / 2$ ), every onde is connected to every other
From node 1, you can go to any of the other N - 1 nodes, then any of the remaining N - 2 nodes, and so on
So, you are free too choose any permutation of the other nodes as a path, stopping at any node, so the number of possible simple paths from node 1 is: $\sum_{k = 1}^{N - 1}{P(N - 1, k)} = \sum_{k = 1}^{N - 1}{\frac{(N - 1)!}{N - 1 - k}!}$

This sum is $\Theta((N - 1)!)$ , which is brute-force-able within time limit, especially for DFS or backtracking code

DFS for simple paths:

Traverse all possible paths without revisiting nodes(i.e., maintaining a visited array)
Keep track of the current XOR sum as you explore
At each node, if you reach N, record the current XOR sum as a candidate answer
Backtrack and try other branches

#include <bits/stdc++.h>

using namespace std;

int main()
{
    ios::sync_with_stdio(false);
    cin.tie(0);

    int n, m;
    cin >> n >> m;

    // Builds the adjacency list
    vector<vector<pair<int, long long>>> adj(n);
    for (int i = 0; i < m; i++)
    {
        int u, v;
        long long w;
        cin >> u >> v >> w;
        u--;
        v--;

        adj[u].push_back({v, w});
        adj[v].push_back({u, w});
    }

    // vis[] tracks visited nodes to avoid cycles (ensures simple path)
    // ans stores the minimum XOR found so far
    vector<bool> vis(n, false);
    long long ans = 1ll << 60;

    // recursive lambda (C++20-compatible), allowing DFS to call itself
    auto dfs = [&](auto &&self, int u, long long s) -> void
    {
        if (u == n - 1)
        {
            ans = min(ans, s);
            return;
        }

        // Backtrack after exploring to allow other paths to reuse the node
        vis[u] = true;
        for (auto [v, w] : adj[u])
        {
            if (!vis[v])
            {
                self(self, v, s ^ w);
            }
        }
        vis[u] = false;
    };

    dfs(dfs, 0, 0ll);

    cout << ans << endl;

    return 0;
}

📌 E - Min of Restricted Sum

First, construct the following graph from the integer sequences $X$ , $Y$ , and $Z$ :

an undirected graph with $N$ vertices and $M$ edges, where there is an edge between vertex $X_i$ and $Y_i$ labeled $Z_i$
Then we assume that the graph is connected, and we may consider each connected component independently

Tips

Fix the value $A_1$ to be $x$ . For any edge, once the value for the vertex on one end is set, the other is uniquely determined. Thus, picking $A_1$ determines all the value of $A$ . If any inconsistency occurs at this point, the answer is -1

Tips

Now, notice that the condition is independent for each place in $A$ 's binary representations

For all $k$ , the $k$ -th bit of $A_1$ is 0 or 1, and fixing to one of them determines the $k$ -th bit of the others. Since we want to minimize the sum of the elements, we may try both 0 and 1 and pick whichever that makes fewer standing $k$ -th bit(if the number of standing bits is the same, picking anything is fine)

This can be done with BFS or DFS, the complexity is $O((N + M)\log(\max(A)))$

DFS version

#include <bits/stdc++.h>

using namespace std;

int main()
{
    ios::sync_with_stdio(false);
    cin.tie(0);

    int n, m;
    cin >> n >> m;

    vector<vector<array<int, 2>>> adj(n);
    for (int i = 0; i < m; i++)
    {
        int x, y, z;
        cin >> x >> y >> z;
        x--;
        y--;

        adj[x].push_back({y, z});
        adj[y].push_back({x, z});
    }

    vector<bool> vis(n, false);
    vector<int> ans(n, 0);
    vector<int> connected_component;

    auto dfs = [&](auto &&self, int x, int p) -> void
    {
        vis[x] = true;
        connected_component.push_back(x);
        ans[x] = p;

        for (auto [y, z] : adj[x])
        {
            if (!vis[y])
            {
                self(self, y, p^z);
            }
            else if (ans[y] != (p ^ z))
            {
                cout << -1 << endl;
                exit(0);
            }
        }
    };

    for (int x = 0; x < n; x++)
    {
        if (vis[x])
        {
            continue;
        }

        connected_component.clear();
        dfs(dfs, x, 0);

        // the counts of set bits for each bit
        vector<int> cnt(30, 0);
        for (auto x : connected_component)
        {
            for (int k = 0; k < 30; k++)
            {
                if ((ans[x] >> k) & 1)
                {
                    cnt[k]++;
                }
            }
        }

        // adjust the values in A to ensure that the number of 1 bits in A is as balanced as possible
        // minimize the sum of the elements
        int off = 0;
        for (int j = 0; j < 30; j++)
        {
            if (cnt[j] > connected_component.size() - cnt[j])
            {
                off ^= 1 << j;
            }
        }
        // assign the final values
        for (auto x : connected_component)
        {
            ans[x] ^= off;
        }
    }

    for (int i = 0; i < n; i++)
    {
        cout << ans[i] << " \n"[i == n - 1];
    }

    return 0;
}

Tips

Traditional BFS with Queue:
- In a traditional BFS, a queue is used to store the nodes to be processed. As we traverse the graph level by level, we visit each node once and process its neighbors
- The standard purpose of the queue is to maintain the order of nodes to visit and ensure that we process each node in the correct BFS sequence
BFS with Additional Computation (Using Vector):
- In this problem, while performing BFS, we are not only visiting nodes but also performing additional calculations, specifically related to XOR values
- The vector q (instead of a queue) is used for two purposes:
  - To store the nodes that are currently being processed in the BFS
  - To update the XOR values for each node in the graph, which requires checking conditions and performing additional computations during the traversal

BFS version

#include <bits/stdc++.h>

using namespace std;

int main()
{
    ios::sync_with_stdio(false);
    cin.tie(0);

    int n, m;
    cin >> n >> m;

    vector<vector<array<int, 2>>> adj(n);
    for (int i = 0; i < m; i++)
    {
        int x, y, z;
        cin >> x >> y >> z;
        x--;
        y--;

        adj[x].push_back({y, z});
        adj[y].push_back({x, z});
    }

    // `-1` means un-visited
    vector<int> A(n, -1);

    for (int x = 0; x < n; x++)
    {
        if (A[x] != -1)
        {
            continue;
        }

        // BFS
        vector<int> q = {x};

        // Cur node's binary representation
        A[x] = 0;
        // the counts of set bits for each bit
        int cnt[30] = {};

        for (int i = 0; i < q.size(); i++)
        {
            int u = q[i];
            for (int j = 0; j < 30; j++)
            {
                cnt[j] += (A[u] >> j) & 1;
            }

            for (auto [v, z] : adj[u])
            {
                // if un-visited
                if (A[v] == -1)
                {
                    A[v] = A[u] ^ z;
                    q.push_back(v);
                }
                // If any inconsistency occurs
                else if (A[v] != (A[u] ^ z))
                {
                    cout << -1 << endl;
                    return 0;
                }
            }
        }

        // adjust the values in A to ensure that the number of 1 bits in A is as balanced as possible
        // minimize the sum of the elements
        int off = 0;
        for (int j = 0; j < 30; j++)
        {
            // If more than half the nodes have 1 at a particular bit position
            // we flip that bit
            if (cnt[j] > q.size() - cnt[j])
            {
                off ^= 1 << j;
            }
        }

        // assign the final values
        for (auto x : q)
        {
            A[x] ^= off;
        }
    }

    for (int i = 0; i < n; i++)
    {
        cout << A[i] << " \n"[i == n - 1];
    }

    return 0;
}

📌 F - Rotated Inversions

Warning

Hard Problems to Tackle Later

Tips

For $k = 0$ , $B$ coincides with $A$ : in this case, the inversion number can be found in $O(N\log(N))$ time, for example by using Fenwick Tree

Now, we will consider how to find the delta of the inversion numbers for $k = c - 1$ and $k = c$ for each $c = 1, 2, ..., M - 1$ :

Let $B^{c - 1}$ denote the sequence $B$ for $k = c - 1$ , and $B^c$ denote that for $k = c$

Then, the ordering of a pair of elements in $B^{c - 11}$ and $B^c$ changes only if the pair contains $i$ with $B_i^c = 0$ , which holds if and only if $A_i = M - c$ , which means $i$ belongs to $G_{M - c}$

Details

Let $G_i$ be the set of $j$ with $B_j = i$ , then $B$ 's inversion number can be represented as:

\sum_{1 \le i < j \le M} \sum_{x \in G_i} \sum_{y \in G_j} [x > y]

where $[F]$ be 1 if $F$ is true and 0 if $F$ is false

Thus, the delta of inversion numbers of $B^{c - 1}$ and $B^c$ can be written as $C_1 - C_2$ , where $C_1$ is the inversion number of an integer sequence $X$ defined as $X_i = [i \in G_{M - c}]$ , and $C_2$ that for $X_i = [i \notin G_{M - c}]$

C_1 = \sum_{i = 1}^{|G_{M - i}|}(Y_i - i)

C_2 = \sum_{i = 1}^{|G_{M - i}|}(N - 1 - Y_i - (|G_{M - i}| - 1 - i))

where $G_{M - i} = {Y_1, Y_2, ..., Y_{|G_{M - c}|}}$ , where $(0 \le Y_1 < Y_2 < ... < Y_{|G_{M - c}|} \le M - 1)$

#include <bits/stdc++.h>

using namespace std;

template <typename T>
struct Fenwick
{
    int n;
    vector<T> a;

    Fenwick(int n_ = 0)
    {
        init(n_);
    }

    void init(int n_)
    {
        n = n_;
        a.assign(n, T{});
    }

    void add(int x, const T &v)
    {
        for (int i = x + 1; i <= n; i += i & -i)
        {
            a[i - 1] = a[i - 1] + v;
        }
    }

    T sum(int x)
    {
        T ans{};
        for (int i = x; i > 0; i -= i & -i)
        {
            ans = ans + a[i - 1];
        }
        return ans;
    }

    T rangeSum(int l, int r)
    {
        return sum(r) - sum(l);
    }

    int select(const T &k)
    {
        int x = 0;
        T cur{};

        for (int i = 1 << __lg(n); i; i /= 2)
        {
            if (x + i <= n && cur + a[x + i - 1] <= k)
            {
                x += i;
                cur = cur + a[x - 1];
            }
        }

        return x;
    }
};

int main()
{
    ios::sync_with_stdio(false);
    cin.tie(nullptr);

    int n, m;
    cin >> n >> m;

    vector<int> A(n);
    for (int i = 0; i < n; i++)
    {
        cin >> A[i];
    }

    long long ans = 0;
    Fenwick<int> fen(m);
    for (int i = n - 1; i >= 0; i--)
    {
        ans += fen.sum(A[i]);
        fen.add(A[i], 1);
    }

    vector<long long> sum(m);
    for (int i = 0; i < n; i++)
    {
        sum[A[i]] += i - (n - 1 - i);
    }

    for (int i = m - 1; i >= 0; i--)
    {
        cout << ans << endl;
        ans += sum[i];
    }

    return 0;
}

📌 G - Filp Row or Col

Warning

Hard Problems to Tackle Later

Tips

When columns to perform operations are fixed, we can independently minimize the sum of the numbers written on each row

If a row contains $c_0$ zeros and $c_1$ ones,

it is optimal to do nothing on it if $c_0 \ge c_1$
perform an operation on it if $c_0 < c_1$

This way, the row will have $\min(c_0, c_1) = \min(c_1, W - c_1)$ ones

Regard the binary string in each row as a binary integer

B_i = \sum_{j = 0}^{W - 1}A_{i, j}2^j

Representing by $mask$ the fixed set of operations against columns, we find that:

\min(\sum A_{x, y}) = \sum_{i = 0}^{H - 1} \min(popcount(B_i \oplus mask), W - popcount(B_i \oplus mask))

Details

popcount: the number of ones in the binary representation, and $\oplus$ denotes bitwise XOR

Since $mask$ can take between $0$ and $2^W - 1$ , the sought value is:

\min_{0 \le mask \le 2^W - 1} \sum_{i = 0}^{H - 1} \min(popcount(B_i \oplus mask), W - popcount(B_i \oplus mask))

Consider evaluating $\sum_{i = 0}^{H - 1} \min(popcount(B_i \oplus mask), W - popcount(B_i \oplus mask))$ for each $mask = 0, 1, ..., 2^W - 1$ . This can be done by counting the number of indices $i$ with $B_i \oplus mask - j$ for each $j = 0, 1, ..., W$

We use bit DP to find it

Define dp[mask][j][c]( $0 \le mask < 2^W$ , $0 \le j, c \le W$ ):

the number of indices $i$ such that $B_i$ is one of $mask \oplus 0, mask \oplus 1, ..., mask \oplus (2^j - 1)$ , and $mask$ and $B_i$ differ exactly at $c$ bits

The transitions are as follows:

dp[mask][j + 1][c] += dp[mask][j][c]
dp[mask][j + 1][c + 1] += dp[mask \oplus 2^j][j][c]

Then:

\sum_{i = 0}^{H - 1} \min(popcount(B_i \oplus mask), W - popcount(B_i \oplus mask)) = \sum_{c = 0}^{W}dp[mask][W][c] \times \min(c, W - c)

The time complexity is $O(HW + 2^WW^2)$

Details

#include <bits/stdc++.h>

using namespace std;

int main()
{
    ios::sync_with_stdio(false);
    cin.tie(0);

    int H, W;
    cin >> H >> W;

    // dp[j][mask] will store the count of configurations where exactly j bits are flipped
    // and the bit pattern is represented by 'mask'
    vector<vector<long long>> dp(W + 1, vector<long long>(1 << W, 0));

    // Read the rows and initialize the dp for the first row
    for (int i = 0; i < H; i++)
    {
        string s;
        cin >> s;

        // convert `string` to `int`
        int bit = 0;
        for (int j = 0; j < W; j++)
        {
            bit |= (s[j] - '0') << j;
        }
        dp[0][bit]++;
    }

    // Fill dp using dynamic programming
    for (int i = 0; i < W; i++)
    {
        for (int j = i; j >= 0; j--)
        {
            for (int mask = 0; mask < (1 << W); mask++)
            {
                dp[j + 1][mask ^ (1 << i)] += dp[j][mask];
            }
        }
    }

    // Find the minimum possible result
    long long ans = H * W;
    for (int mask = 0; mask < (1 << W); mask++)
    {
        long long sum = 0;
        for (int i = 0; i <= W; i++)
        {
            sum += min(i, W - i) * dp[i][mask];
        }

        ans = min(ans, sum);
    }

    cout << ans << endl;

    return 0;
}

// Better version
#include <bits/stdc++.h>

using namespace std;

int main()
{
    ios::sync_with_stdio(false);
    cin.tie(0);

    int H, W;
    cin >> H >> W;

    // dp[mask][j] will store the count of configurations where exactly j bits are flipped
    // and the bit pattern is represented by 'mask'
    vector<vector<int>> dp(1 << W, vector<int>(19, 0));
    for (int i = 0; i < H; i++)
    {
        string s;
        cin >> s;

        // convert `string` to `int`
        int mask = 0;
        for (int j = 0; j < W; j++)
        {
            mask |= (s[j] - '0') << j;
        }

        dp[mask][0]++;
    }

    for (int k = 0; k < W; k++)
    {
        for (int mask = 0; mask < (1 << W); mask++)
        {
            if ((mask >> k) & 1)
            {
                int j = mask ^ (1 << k);
                for (int v = W; v >= 0; v--)
                {
                    dp[mask][v + 1] += dp[j][v];
                    dp[j][v + 1] += dp[mask][v];
                }
            }
        }
    }

    int ans = H * W;
    for (int mask = 0; mask < (1 << W); mask++)
    {
        int sum = 0;
        for (int c = 0; c <= W; c++)
        {
            sum += dp[mask][c] * min(c, W - c);
        }

        ans = min(ans, sum);
    }

    cout << ans << endl;

    return 0;
}

I don't fully understand the DP transition code below

for (int i = 1; i < (1 << W); i *= 2)
{
    // computes the integer logarithm (base 2) of an integer i
    // It returns the index of the highest set bit (most significant bit) of i
    int p = __lg(i);
    for (int j = 0; j < (1 << W); j += 2 * i)
    {
        for (int k = 0; k < i; k++)
        {
            int u = j + k, v = i + j + k;
            for (int l = p; l >= 0; l--)
            {
                dp[v][l + 1] += dp[u][l];
                dp[u][l + 1] += dp[v][l];
            }
        }
    }
}