Performance improvement for transliteration

Tracing CPU usage identified the cost of ICU transliteration: using a static dictionnary to improve performance
This commit is contained in:
Bruno
2021-08-01 16:44:46 +02:00
parent 8002f3164c
commit eaadc210ae

View File

@@ -7,6 +7,7 @@
#include <iomanip> #include <iomanip>
#include <algorithm> #include <algorithm>
#include <climits> #include <climits>
#include <map>
#include <locale> #include <locale>
#include <unicode/ustream.h> #include <unicode/ustream.h>
@@ -52,6 +53,13 @@ std::string BaseToolkit::uniqueName(const std::string &basename, std::list<std::
std::string BaseToolkit::transliterate(const std::string &input) std::string BaseToolkit::transliterate(const std::string &input)
{ {
// because icu::Transliterator is slow, we keep a dictionnary of already
// transliterated texts to be faster during repeated calls (update of user interface)
static std::map<std::string, std::string> dictionnary_;
std::map<std::string, std::string>::const_iterator existingentry = dictionnary_.find(input);
if (existingentry == dictionnary_.cend()) {
auto ucs = icu::UnicodeString::fromUTF8(input); auto ucs = icu::UnicodeString::fromUTF8(input);
UErrorCode status = U_ZERO_ERROR; UErrorCode status = U_ZERO_ERROR;
@@ -68,7 +76,12 @@ std::string BaseToolkit::transliterate(const std::string &input)
std::ostringstream output; std::ostringstream output;
output << ucs; output << ucs;
return output.str(); // remember for future
dictionnary_[input] = output.str();
}
// return remembered transliterated text
return dictionnary_[input];
} }