Dividing data into optimal segments is a classic challenge in computer science, often requiring a balance between specific constraints and efficiency. This problem asks us to find the cheapest way to split an array while ensuring our split points stay within a certain “distance” of each other. It is a fantastic exercise in combining sliding window techniques with advanced data structures to maintain a sorted view of moving data.
Problem Summary
You’re given:
- An array of integers
nums. - An integer
k, representing the number of subarrays you must create. - An integer
dist, which limits how far apart the starting indices of the second and last subarrays can be.
Your goal:
- Minimize the total cost, where the cost of a subarray is its first element. Since the first subarray always starts at index 0, you need to pick additional starting indices from the rest of the array that minimize the sum and satisfy the
distconstraint.
Intuition
The first element nums[0] is always part of our cost because the first subarray always starts there. Our task is to pick other indices to be the “starts” of the remaining subarrays.
Let the starting indices of the subarrays be .
- is always 0.
- We need to choose indices from the range .
- The constraint means all our chosen indices (from the second to the -th) must fit within a window of size .
As we slide this window of size across the array, we need to quickly find the sum of the smallest elements within that window. To do this efficiently, we use a Binary Indexed Tree (BIT) or a Fenwick Tree combined with Coordinate Compression. This allows us to “rank” the numbers and use binary lifting to find the smallest values in logarithmic time.
Walkthrough: Understanding the Examples
Example 1: `nums = [1,3,2,6,4,2], k = 3, dist = 3`
- We must pick indices from the window.
- The window size is . If our first chosen index is , the last index can be at most .
- Looking at indices 1 through 4:
[3, 2, 6, 4]. The two smallest are 2 and 3. Cost: . - If , can be up to index 5. Indices 2 through 5:
[2, 6, 4, 2]. The two smallest are 2 and 2. Cost: . - The minimum cost is 5.
Code Blocks
C++
#include <vector>
#include <algorithm>
using namespace std;
class Solution {
int max_rank;
vector<int> bit_count;
vector<long long> bit_sum;
vector<int> sorted_values;
void update(int idx, int count_delta, long long val_delta) {
for (; idx <= max_rank; idx += idx & -idx) {
bit_count[idx] += count_delta;
bit_sum[idx] += val_delta;
}
}
long long query_k_smallest(int k) {
int idx = 0;
int current_k = 0;
long long current_val_sum = 0;
for (int i = 1 << 17; i > 0; i >>= 1) { // 17 is enough for 10^5 elements
int next_idx = idx + i;
if (next_idx <= max_rank && current_k + bit_count[next_idx] < k) {
idx = next_idx;
current_k += bit_count[idx];
current_val_sum += bit_sum[idx];
}
}
return current_val_sum + (long long)(k - current_k) * sorted_values[idx];
}
public:
long long minimumCost(vector<int>& nums, int k, int dist) {
int n = nums.size();
sorted_values = nums;
sort(sorted_values.begin(), sorted_values.end());
sorted_values.erase(unique(sorted_values.begin(), sorted_values.end()), sorted_values.end());
max_rank = sorted_values.size();
bit_count.assign(max_rank + 1, 0);
bit_sum.assign(max_rank + 1, 0);
auto get_rank = [&](int val) {
return lower_bound(sorted_values.begin(), sorted_values.end(), val) - sorted_values.begin() + 1;
};
int target_k = k - 1;
for (int i = 1; i <= min(1 + dist, n - 1); ++i) {
update(get_rank(nums[i]), 1, nums[i]);
}
long long min_cost = nums[0] + query_k_smallest(target_k);
for (int i = 2; i <= n - target_k; ++i) {
update(get_rank(nums[i - 1]), -1, -nums[i - 1]);
if (i + dist < n) {
update(get_rank(nums[i + dist]), 1, nums[i + dist]);
}
min_cost = min(min_cost, nums[0] + query_k_smallest(target_k));
}
return min_cost;
}
};
Python
import bisect
class Solution:
def minimumCost(self, nums: list[int], k: int, dist: int) -> int:
n = len(nums)
sorted_unique = sorted(list(set(nums)))
ranks = {val: i + 1 for i, val in enumerate(sorted_unique)}
max_rank = len(sorted_unique)
bit_count = [0] * (max_rank + 1)
bit_sum = [0] * (max_rank + 1)
def update(rank, c_delta, v_delta):
while rank <= max_rank:
bit_count[rank] += c_delta
bit_sum[rank] += v_delta
rank += rank & -rank
def query(target):
idx, cur_k, cur_s = 0, 0, 0
for i in range(max_rank.bit_length(), -1, -1):
next_idx = idx + (1 << i)
if next_idx <= max_rank and cur_k + bit_count[next_idx] < target:
idx = next_idx
cur_k += bit_count[idx]
cur_s += bit_sum[idx]
return cur_s + (target - cur_k) * sorted_unique[idx]
target_k = k - 1
for i in range(1, min(dist + 2, n)):
update(ranks[nums[i]], 1, nums[i])
ans = nums[0] + query(target_k)
for i in range(2, n - target_k + 1):
update(ranks[nums[i-1]], -1, -nums[i-1])
if i + dist < n:
update(ranks[nums[i+dist]], 1, nums[i+dist])
ans = min(ans, nums[0] + query(target_k))
return ans
JavaScript
class Solution {
minimumCost(nums, k, dist) {
const n = nums.length;
const sortedUnique = [...new Set(nums)].sort((a, b) => a - b);
const ranks = new Map();
sortedUnique.forEach((val, i) => ranks.set(val, i + 1));
const maxRank = sortedUnique.length;
const bitCount = new Array(maxRank + 1).fill(0);
const bitSum = new Array(maxRank + 1).fill(0n);
const update = (rank, cDelta, vDelta) => {
for (; rank <= maxRank; rank += rank & -rank) {
bitCount[rank] += cDelta;
bitSum[rank] += BigInt(vDelta);
}
};
const query = (target) => {
let idx = 0, curK = 0, curS = 0n;
for (let i = Math.floor(Math.log2(maxRank)); i >= 0; i--) {
let nextIdx = idx + (1 << i);
if (nextIdx <= maxRank && curK + bitCount[nextIdx] < target) {
idx = nextIdx;
curK += bitCount[idx];
curS += bitSum[idx];
}
}
return curS + BigInt(target - curK) * BigInt(sortedUnique[idx]);
};
const targetK = k - 1;
for (let i = 1; i <= Math.min(dist + 1, n - 1); i++) {
update(ranks.get(nums[i]), 1, nums[i]);
}
let minCost = BigInt(nums[0]) + query(targetK);
for (let i = 2; i <= n - targetK; i++) {
update(ranks.get(nums[i - 1]), -1, -nums[i - 1]);
if (i + dist < n) {
update(ranks.get(nums[i + dist]), 1, nums[i + dist]);
}
let current = BigInt(nums[0]) + query(targetK);
if (current < minCost) minCost = current;
}
return Number(minCost);
}
}
Key Takeaways
- Coordinate Compression: When the range of values (up to ) is much larger than the number of elements (), mapping values to their relative ranks allows us to use them as array indices.
- Dual Fenwick Tree: Using one BIT for counts and another for sums allows us to answer “sum of top K” queries efficiently.
- Binary Lifting on BIT: This technique turns a prefix sum search into an operation, making it much faster than a standard binary search over the BIT.
Final Thoughts
This problem is a masterclass in handling streaming data constraints. In real-world systems, like financial trading platforms, you often need to calculate “the best prices in the last minutes.” The combination of a sliding window and an efficient order-statistic data structure is exactly how you’d handle such high-frequency data. While it looks intimidating as a “Hard” problem, breaking it down into a moving window and a sorted frequency map makes the logic much more manageable.
