How can one write a helper method that for a given string like foo[0][1][2][3] splits it into the array's name and collection (e.g. vector) of indices? In the example above it should produce foo and 0, 1, 2, 3 respectively.
The format of a string is always like name[index_0][index_1]....[index_n].
The number of indices (n) is not known in advance. All should be numbers. For simplicity, no spaces are allowed in a string. The name of an array (name) can be arbitrary. The helper function throws in case the string is not according to the specified format.
Performance is not an issue here. I'm looking for the most elegant/short solution.
UPDATE
Well, regular expressions were suggested in the very first comment. I was new to the area and went through the hassle of getting it done in C++. Please, feel free to simplify it. In the meantime, two non-regular expressions based solutions were proposed by @MartinYork and @Frodyne. At a first glance, regular expressions didn't bring anything fascinating here. The solution does not seem to be much shorter or much more elegant in my mind.
#include <stdexcept>
#include <iostream>
#include <string>
#include <regex>
#include <tuple>
std::tuple<std::string, std::vector<int>> helper(std::string str) {
// used to validate that the incoming string is in format
// array[0][1][2]
const std::regex rx_validate{
"([[:alnum:]]+)((?:\\[[[:digit:]]+\\])+)$"};
std::match_results<std::string::const_iterator> match_results;
std::regex_search(str, match_results, rx_validate);
// regex_search array[0][1][2] gives
// match_results[0]: array[0][1][2]
// match_results[1]: array
// match_results[2]: [0][1][2]
if (match_results.size() == 3) {
std::vector<int> indices;
// used to extract indices, it is guaranteed that
// numbers are between brackets, no extra checks
// needed
const std::regex rx_index{"[0-9]+"};
const std::string match{match_results[2]};
auto it = std::sregex_iterator(match.begin(), match.end(), rx_index);
for (; it != std::sregex_iterator(); ++it)
indices.push_back(std::stoi((*it).str()));
return std::make_tuple(match_results[1], indices);
} else {
throw std::invalid_argument("Invalid format (" + str + ")");
}
}
int main() {
const std::string str{"a[0][1][2][3][4][5]"};
const auto tuple = helper(str);
std::cout << "Name: " << std::get<0>(tuple) << std::endl;
for (int index: std::get<1>(tuple))
std::cout << index << std::endl;
}
UPDATE2
@rici suggested a modification of an algorithm that used regular expressions. It's a bit shorter and more concise.
I was really interested in comparing those algorithms in terms of performance.
Not going to advocate the numbers :-) Everybody should decide on their own.
The below program compiled as g++ -std=c++11 -Ofast and running on i7-8550U gives:
Regex measurements...
min/max/avg 955/154859/1072.88
Stream measurements...
min/max/avg 722/41252/800.402
#include <iostream>
#include <cstdlib>
#include <cstdint>
#include <limits>
#include <string>
#include <vector>
#include <regex>
#include <tuple>
#include <time.h>
inline uint64_t Timestamp() {
timespec time_now;
clock_gettime(CLOCK_REALTIME, &time_now);
return static_cast<uint64_t>(time_now.tv_sec) * 1000000000ULL + time_now.tv_nsec;
}
std::tuple<std::string, std::vector<int>> helper_stream(std::string const& info)
{
std::stringstream is(info);
std::string name;
std::vector<int> index;
if (std::getline(is, name, '[')) {
is.putback('[');
name.erase(std::remove(std::begin(name), std::end(name), ' '), std::end(name));
int value;
char b1;
char b2;
while(is >> b1 >> value >> b2 && b1 == '[' && b2 == ']') {
index.push_back(value);
}
}
return std::make_tuple(name, index);
}
std::tuple<std::string, std::vector<int>> helper_regex(std::string str) {
static const std::regex strip_prefix{"^[[:alpha:]][[:alnum:]]*"};
static const std::regex index{"\\[([[:digit:]]+)\\]|."};
std::match_results<std::string::const_iterator> match;
if (std::regex_search(str, match, strip_prefix)) {
auto e = match[0].second;
std::vector<int> indices;
for (auto it = std::sregex_iterator(e, str.end(), index), lim = std::sregex_iterator(); it != lim; ++it) {
if ((*it)[1].matched)
indices.push_back(std::stoi((*it)[1]));
else throw std::invalid_argument("Invalid format");
}
return std::make_tuple(std::string(str.cbegin(), e), indices);
}
else
throw std::invalid_argument("Invalid format (" + str + ")");
}
std::string make_str(int n) {
std::string str{"array"};
for (int i = 0; i < n; ++i) {
str += "[";
str += std::to_string(std::rand());
str += "]";
}
return str;
}
template <typename F>
void measurements(F f) {
constexpr int kNumRounds = 1000000;
constexpr int kLength = 3;
std::vector<uint64_t> time_diffs(kNumRounds);
for (int i = 0; i < kNumRounds; ++i) {
const std::string str{make_str(kLength)};
const auto before = Timestamp();
f(str);
const auto after = Timestamp();
time_diffs[i] = after - before;
}
uint64_t min{std::numeric_limits<uint64_t>::max()};
uint64_t max{std::numeric_limits<uint64_t>::min()};
uint64_t sum{0};
for (int i = 0; i < kNumRounds; ++i) {
const auto time_diff = time_diffs[i];
if (time_diff < min)
min = time_diff;
if (time_diff > max)
max = time_diff;
sum += time_diff;
}
std::cout << "min/max/avg " << min << "/" << max << "/" << static_cast<double>(sum) / kNumRounds << std::endl;
}
int main() {
std::cout << "Regex measurements..." << std::endl;
measurements(helper_regex);
std::cout << "Stream measurements..." << std::endl;
measurements(helper_stream);
return 0;
}