1
2
3
4
5
6 #include <soulng/util/TextUtils.hpp>
7 #include <soulng/util/Util.hpp>
8 #include <soulng/util/Unicode.hpp>
9 #include <cctype>
10 #include <sstream>
11 #include <fstream>
12 #include <iterator>
13 #include <algorithm>
14 #include <stdexcept>
15 #include <thread>
16 #ifdef _WIN32
17 #include <Windows.h>
18 #endif
19
20 #undef min
21 #undef max
22
23 namespace soulng { namespace util {
24
25 using namespace soulng::unicode;
26
27 std::string Trim(const std::string& s)
28 {
29 int b = 0;
30 while (b < int(s.length()) && std::isspace(s[b])) ++b;
31 int e = int(s.length()) - 1;
32 while (e >= b && std::isspace(s[e])) --e;
33 return s.substr(b, e - b + 1);
34 }
35
36 std::string TrimAll(const std::string& s)
37 {
38 std::string result;
39 result.reserve(s.length());
40 int state = 0;
41 std::string::const_iterator e = s.cend();
42 for (std::string::const_iterator i = s.cbegin(); i != e; ++i)
43 {
44 char c = *i;
45 switch (state)
46 {
47 case 0:
48 {
49 if (!std::isspace(c))
50 {
51 result.append(1, c);
52 state = 1;
53 }
54 break;
55 }
56 case 1:
57 {
58 if (std::isspace(c))
59 {
60 state = 2;
61 }
62 else
63 {
64 result.append(1, c);
65 }
66 break;
67 }
68 case 2:
69 {
70 if (!std::isspace(c))
71 {
72 result.append(1, ' ');
73 result.append(1, c);
74 state = 1;
75 }
76 break;
77 }
78 }
79 }
80 return result;
81 }
82
83 std::u32string Trim(const std::u32string& s)
84 {
85 int b = 0;
86 while (b < int(s.length()) && IsWhiteSpace(s[b])) ++b;
87 int e = int(s.length()) - 1;
88 while (e >= b && IsWhiteSpace(s[e])) --e;
89 return s.substr(b, e - b + 1);
90 }
91
92 std::u32string TrimAll(const std::u32string& s)
93 {
94 std::u32string result;
95 result.reserve(s.length());
96 int state = 0;
97 std::u32string::const_iterator e = s.cend();
98 for (std::u32string::const_iterator i = s.cbegin(); i != e; ++i)
99 {
100 char32_t c = *i;
101 switch (state)
102 {
103 case 0:
104 {
105 if (!IsWhiteSpace(c))
106 {
107 result.append(1, c);
108 state = 1;
109 }
110 break;
111 }
112 case 1:
113 {
114 if (IsWhiteSpace(c))
115 {
116 state = 2;
117 }
118 else
119 {
120 result.append(1, c);
121 }
122 break;
123 }
124 case 2:
125 {
126 if (!IsWhiteSpace(c))
127 {
128 result.append(1, ' ');
129 result.append(1, c);
130 state = 1;
131 }
132 break;
133 }
134 }
135 }
136 return result;
137 }
138
139 std::std::vector<std::string>Split(conststd::string&s, charc)
140 {
141 std::vector<std::string> v;
142 int start = 0;
143 int n = int(s.length());
144 for (int i = 0; i < n; ++i)
145 {
146 if (s[i] == c)
147 {
148 v.push_back(s.substr(start, i - start));
149 start = i + 1;
150 }
151 }
152 if (start < n)
153 {
154 v.push_back(s.substr(start, n - start));
155 }
156 return v;
157 }
158
159 std::string Replace(const std::string& s, char oldChar, char newChar)
160 {
161 std::string t(s);
162 std::string::iterator e = t.end();
163 for (std::string::iterator i = t.begin(); i != e; ++i)
164 {
165 if (*i == oldChar)
166 {
167 *i = newChar;
168 }
169 }
170 return t;
171 }
172
173 std::string Replace(const std::string& s, const std::string& oldString, const std::string& newString)
174 {
175 std::string r;
176 std::string::size_type start = 0;
177 std::string::size_type pos = s.find(oldString.c_str(), start);
178 while (pos != std::string::npos)
179 {
180 r.append(s.substr(start, pos - start));
181 r.append(newString);
182 start = pos + oldString.length();
183 pos = s.find(oldString.c_str(), start);
184 }
185 r.append(s.substr(start, s.length() - start));
186 return r;
187 }
188
189 std::string HexEscape(char c)
190 {
191 std::stringstream s;
192 s << "\\x" << std::hex << int(static_cast<unsigned char>(c));
193 return s.str();
194 }
195
196 std::u32string HexEscape(uint32_t c)
197 {
198 std::stringstream s;
199 s << "\\x" << std::hex << c;
200 return ToUtf32(s.str());
201 }
202
203 std::u32string CharHexEscape(char32_t c)
204 {
205 std::stringstream s;
206 s << "\\U" << ToHexString(static_cast<uint32_t>(c));
207 return ToUtf32(s.str());
208 }
209
210 std::string CharStr(char c)
211 {
212 switch (c)
213 {
214 case '\'': return "\\'";
215 case '\"': return "\\\"";
216 case '\\': return "\\\\";
217 case '\a': return "\\a";
218 case '\b': return "\\b";
219 case '\f': return "\\f";
220 case '\n': return "\\n";
221 case '\r': return "\\r";
222 case '\t': return "\\t";
223 case '\v': return "\\v";
224 case '\0': return "\\0";
225 default:
226 {
227 if (c >= 32 && c <= 126)
228 {
229 return std::string(1, c);
230 }
231 else
232 {
233 return ToUtf8(CharHexEscape(c));
234 }
235 }
236 }
237 }
238
239 std::u32string CharStr(char32_t c)
240 {
241 switch (c)
242 {
243 case '\'': return U"\\'";
244 case '\"': return U"\\\"";
245 case '\\': return U"\\\\";
246 case '\a': return U"\\a";
247 case '\b': return U"\\b";
248 case '\f': return U"\\f";
249 case '\n': return U"\\n";
250 case '\r': return U"\\r";
251 case '\t': return U"\\t";
252 case '\v': return U"\\v";
253 case '\0': return U"\\0";
254 default:
255 {
256 if (c >= 32 && c <= 126)
257 {
258 return std::u32string(1, c);
259 }
260 else
261 {
262 return CharHexEscape(c);
263 }
264 }
265 }
266 }
267
268 std::string StringStr(const std::string& s)
269 {
270 std::string r;
271 int n = int(s.length());
272 for (int i = 0; i < n; ++i)
273 {
274 r.append(CharStr(s[i]));
275 }
276 return r;
277 }
278
279 std::u32string StringStr(const std::u32string& s)
280 {
281 std::u32string r;
282 int n = int(s.length());
283 for (int i = 0; i < n; ++i)
284 {
285 r.append(CharStr(s[i]));
286 }
287 return r;
288 }
289
290 std::string MakeStringLiteral(const std::string& s)
291 {
292 std::string result = "\"";
293 result.append(StringStr(s));
294 result.append(1, '"');
295 return result;
296 }
297
298 std::u32string MakeStringLiteral(const std::u32string& s)
299 {
300 std::u32string result = U"\"";
301 result.append(StringStr(s));
302 result.append(1, '"');
303 return result;
304 }
305
306 std::string QuotedPath(const std::string& path)
307 {
308 if (path.find(' ') != std::string::npos)
309 {
310 return std::string("\"") + path + "\"";
311 }
312 return path;
313 }
314
315 bool LastComponentsEqual(const std::string& s0, const std::string& s1, char componentSeparator)
316 {
317 std::vector<std::string> c0 = Split(s0, componentSeparator);
318 std::vector<std::string> c1 = Split(s1, componentSeparator);
319 int n0 = int(c0.size());
320 int n1 = int(c1.size());
321 int n = std::min(n0, n1);
322 for (int i = 0; i < n; ++i)
323 {
324 if (c0[n0 - i - 1] != c1[n1 - i - 1]) return false;
325 }
326 return true;
327 }
328
329 bool StartsWith(const std::string& s, const std::string& prefix)
330 {
331 int n = int(prefix.length());
332 return int(s.length()) >= n && s.substr(0, n) == prefix;
333 }
334
335 bool StartsWith(const std::u32string& s, const std::u32string& prefix)
336 {
337 int n = int(prefix.length());
338 return int(s.length()) >= n && s.substr(0, n) == prefix;
339 }
340
341 bool EndsWith(const std::string& s, const std::string& suffix)
342 {
343 int n = int(suffix.length());
344 int m = int(s.length());
345 return m >= n && s.substr(m - n, n) == suffix;
346 }
347
348 bool EndsWith(const std::u32string& s, const std::u32string& suffix)
349 {
350 int n = int(suffix.length());
351 int m = int(s.length());
352 return m >= n && s.substr(m - n, n) == suffix;
353 }
354
355 std::string NarrowString(const char* str, int length)
356 {
357 #if defined(__linux) || defined(__posix) || defined(__unix)
358
359 #elif defined(_WIN32)
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382 #else
383 #error unknown platform
384 #endif
385 }
386
387 std::string ToUpper(const std::string& s)
388 {
389 std::string result;
390 int n = int(s.size());
391 result.reserve(n);
392 for (int i = 0; i < n; ++i)
393 {
394 result.append(1, std::toupper(s[i]));
395 }
396 return result;
397 }
398
399 std::string ToLower(const std::string& s)
400 {
401 std::string result;
402 int n = int(s.size());
403 result.reserve(n);
404 for (int i = 0; i < n; ++i)
405 {
406 result.append(1, std::tolower(s[i]));
407 }
408 return result;
409 }
410
411 std::string ToString(double x)
412 {
413 return ToString(x, 15);
414 }
415
416 std::string ToString(double x, int maxNumDecimals)
417 {
418 return ToString(x, 0, maxNumDecimals);
419 }
420
421 std::string ToString(double x, int minNumDecimals, int maxNumDecimals)
422 {
423 std::string result;
424 if (x < 0)
425 {
426 x = -x;
427 result.append(1, '-');
428 }
429 result.append(std::to_string(static_cast<int>(x)));
430 double d = x - static_cast<int>(x);
431 if (d > 0 || minNumDecimals > 0)
432 {
433 result.append(1, '.');
434 for (int i = 0; (d > 0 || i < minNumDecimals) && i < maxNumDecimals; ++i)
435 {
436 d = 10 * d;
437 int digit = static_cast<int>(d) % 10;
438 result.append(1, static_cast<char>(static_cast<int>('0') + digit));
439 d = d - static_cast<int>(d);
440 }
441 }
442 return result;
443 }
444
445 inline char HexNibble(uint8_t n)
446 {
447 static const char* h = "0123456789ABCDEF";
448 return h[n];
449 }
450
451 std::string ToHexString(uint8_t x)
452 {
453 std::string s;
454 s.append(1, HexNibble(x >> 4)).append(1, HexNibble(x & 0x0F));
455 return s;
456 }
457
458 std::string ToHexString(uint16_t x)
459 {
460 std::string s;
461 s.append(ToHexString(uint8_t((x >> 8) & 0xFF)));
462 s.append(ToHexString(uint8_t((x & 0xFF))));
463 return s;
464 }
465
466 std::string ToHexString(uint32_t x)
467 {
468 std::string s;
469 s.append(ToHexString(uint8_t((x >> 24) & 0xFF)));
470 s.append(ToHexString(uint8_t((x >> 16) & 0xFF)));
471 s.append(ToHexString(uint8_t((x >> 8) & 0xFF)));
472 s.append(ToHexString(uint8_t((x & 0xFF))));
473 return s;
474 }
475
476 std::string ToHexString(uint64_t x)
477 {
478 std::string s;
479 s.append(ToHexString(uint8_t((x >> 56) & 0xFF)));
480 s.append(ToHexString(uint8_t((x >> 48) & 0xFF)));
481 s.append(ToHexString(uint8_t((x >> 40) & 0xFF)));
482 s.append(ToHexString(uint8_t((x >> 32) & 0xFF)));
483 s.append(ToHexString(uint8_t((x >> 24) & 0xFF)));
484 s.append(ToHexString(uint8_t((x >> 16) & 0xFF)));
485 s.append(ToHexString(uint8_t((x >> 8) & 0xFF)));
486 s.append(ToHexString(uint8_t((x & 0xFF))));
487 return s;
488 }
489
490 uint8_t ParseHexByte(const std::string& hexByteStr)
491 {
492 std::string hex;
493 if (StartsWith(hexByteStr, "0x") || StartsWith(hexByteStr, "0X"))
494 {
495 hex = hexByteStr;
496 }
497 else
498 {
499 hex = "0x" + hexByteStr;
500 }
501 std::stringstream s;
502 s.str(hex);
503 uint64_t value = 0;
504 s >> std::hex >> value;
505 return static_cast<uint8_t>(value);
506 }
507
508 int Log10(int n)
509 {
510 int log10 = 1;
511 int m = n / 10;
512 while (m > 0)
513 {
514 ++log10;
515 m = m / 10;
516 }
517 return log10;
518 }
519
520 std::u32string FormatNumber(int n, int numDigits)
521 {
522 std::u32string s(numDigits, ' ');
523 int k = numDigits - 1;
524 while (n > 0)
525 {
526 if (k < 0)
527 {
528 throw std::runtime_error("invalid number of digits");
529 }
530 int digit = n % 10;
531 s[k] = digit + '0';
532 --k;
533 n = n / 10;
534 }
535 return s;
536 }
537
538 std::string CurrentThreadIdStr()
539 {
540 std::stringstream s;
541 s << std::this_thread::get_id();
542 return s.str();
543 }
544
545 std::string Format(const std::string& s, int width)
546 {
547 return Format(s, width, FormatWidth::exact, FormatJustify::left);
548 }
549
550 std::string Format(const std::string& s, int width, FormatJustify justify)
551 {
552 return Format(s, width, FormatWidth::exact, justify);
553 }
554
555 std::string Format(const std::string& s, int width, FormatWidth fw)
556 {
557 return Format(s, width, fw, FormatJustify::left);
558 }
559
560 std::string Format(const std::string& s, int width, FormatWidth fw, FormatJustify justify)
561 {
562 return Format(s, width, fw, justify, ' ');
563 }
564
565 std::string Format(const std::string& s, int width, FormatWidth fw, FormatJustify justify, char fillChar)
566 {
567 std::string result;
568 int m = static_cast<int>(s.length());
569 if (fw == FormatWidth::min)
570 {
571 width = std::max(width, m);
572 }
573 else if (fw == FormatWidth::exact)
574 {
575 m = std::min(m, width);
576 }
577 int n = std::max(0, width - m);
578 if (justify == FormatJustify::right)
579 {
580 for (int i = 0; i < n; ++i)
581 {
582 result.append(1, fillChar);
583 }
584 }
585 for (int i = 0; i < m; ++i)
586 {
587 result.append(1, s[i]);
588 }
589 if (justify == FormatJustify::left)
590 {
591 for (int i = 0; i < n; ++i)
592 {
593 result.append(1, fillChar);
594 }
595 }
596 return result;
597 }
598
599 #if defined(_WIN32)
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614 #else // on Linux it will probably be UTF-8 already...
615
616 std::string PlatformStringToUtf8(const std::string& platformString)
617 {
618 return platformString;
619 }
620
621 #endif
622
623 } }