Skip to content

Commit 8a5f928

Browse files
authored
Merge pull request hsutter#69 from filipsajdak/fsajdak-add-handling-of-cpp1-rawstringliterals
[SUGGESTION][FIX] Add support for raw string literals in mixed mode
2 parents a748d11 + 88c5ac9 commit 8a5f928

2 files changed

Lines changed: 46 additions & 15 deletions

File tree

source/common.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ struct source_line
4646
{
4747
std::string text;
4848

49-
enum class category { empty, preprocessor, comment, import, cpp1, cpp2 }
49+
enum class category { empty, preprocessor, comment, import, cpp1, cpp2, rawstring }
5050
cat = category::empty;
5151

5252
auto prefix() const -> std::string
@@ -58,6 +58,7 @@ struct source_line
5858
break;case category::import: return "/* i */ ";
5959
break;case category::cpp1: return "/* 1 */ ";
6060
break;case category::cpp2: return "/* 2 */ ";
61+
break;case category::rawstring: return "/* R */ ";
6162
break;default: assert(!"illegal category"); abort();
6263
}
6364
}

source/load.h

Lines changed: 44 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -196,27 +196,30 @@ auto starts_with_identifier_colon(std::string const& line) -> bool
196196
struct process_line_ret {
197197
bool all_comment_line;
198198
bool empty_line;
199+
bool all_rawstring_line;
199200
};
200201
auto process_cpp_line(
201202
std::string const& line,
202203
bool& in_comment,
203204
bool& in_string_literal,
205+
bool& in_raw_string_literal,
206+
std::string& raw_string_closing_seq,
204207
std::vector<int>& brace_depth,
205208
lineno_t lineno,
206209
std::vector<error>& errors
207210
)
208211
-> process_line_ret
209212
{
210-
if (!in_comment && !in_string_literal && starts_with_whitespace_slash_slash(line)) {
211-
return { true, false };
213+
if (!in_comment && !in_string_literal && !in_raw_string_literal && starts_with_whitespace_slash_slash(line)) {
214+
return { true, false, false };
212215
}
213216

214-
if (!in_comment && !in_string_literal && starts_with_whitespace_slash_star_and_no_star_slash(line)) {
217+
if (!in_comment && !in_string_literal && !in_raw_string_literal && starts_with_whitespace_slash_star_and_no_star_slash(line)) {
215218
in_comment = true;
216-
return { true, false };
219+
return { true, false, false };
217220
}
218221

219-
struct process_line_ret r { in_comment, true };
222+
struct process_line_ret r { in_comment, true , in_raw_string_literal};
220223

221224
auto prev = ' ';
222225
for (auto i = colno_t{0}; i < ssize(line); ++i)
@@ -225,27 +228,46 @@ auto process_cpp_line(
225228
// Note: in_literal is for { and } and so doesn't have to work for escaped ' characters
226229
//
227230
auto peek = [&](int num) { return (i+num < std::ssize(line)) ? line[i+num] : '\0'; };
228-
auto in_literal = [&]() { return in_string_literal || (prev == '\'' && peek(1) == '\''); };
231+
auto in_literal = [&]() { return in_string_literal || in_raw_string_literal || (prev == '\'' && peek(1) == '\''); };
229232

230233
// Process this source character
231234
//
232235
if (!std::isspace(line[i])) {
233236
r.empty_line = false;
234237
}
235238

236-
if (in_comment && !in_string_literal) {
239+
if (in_comment && !in_string_literal && !in_raw_string_literal) {
237240
switch (line[i]) {
238241
break;case '/': if (prev == '*') { in_comment = false; }
239242
break;default: ;
240243
}
241244
}
242-
245+
else if (in_raw_string_literal) {
246+
auto end_pos = line.find(raw_string_closing_seq, i);
247+
if (end_pos == std::string::npos) {
248+
return r;
249+
}
250+
in_raw_string_literal = false;
251+
i = end_pos+raw_string_closing_seq.size()-1;
252+
}
243253
else {
244254
r.all_comment_line = false;
255+
r.all_rawstring_line = false;
245256
switch (line[i]) {
257+
break;case 'R':
258+
if (!in_comment && !in_string_literal && !in_raw_string_literal && peek(1) == '"') {
259+
i+=2;
260+
if (i < ssize(line) - 1) {
261+
if (auto paren_pos = line.find("(", i); paren_pos != std::string::npos) {
262+
raw_string_closing_seq = ")"+line.substr(i, paren_pos-i)+"\"";
263+
in_raw_string_literal = true;
264+
}
265+
}
266+
}
267+
246268
break;case '\"':
247269
// If this isn't an escaped quote, toggle string literal state
248-
if (!in_comment && prev != '\\' && (in_string_literal || prev != '\'')) {
270+
if (!in_comment && prev != '\\' && (in_string_literal || prev != '\'') && !in_raw_string_literal) {
249271
in_string_literal = !in_string_literal;
250272
}
251273

@@ -270,10 +292,10 @@ auto process_cpp_line(
270292
}
271293

272294
break;case '*':
273-
if (!in_string_literal && prev == '/') { in_comment = true; }
295+
if (!in_string_literal && !in_raw_string_literal && prev == '/') { in_comment = true; }
274296

275297
break;case '/':
276-
if (!in_string_literal && prev == '/') { in_comment = false; return r; }
298+
if (!in_string_literal && !in_raw_string_literal && prev == '/') { in_comment = false; return r; }
277299

278300
break;default: ;
279301
}
@@ -436,8 +458,11 @@ class source
436458
return false;
437459
}
438460

439-
auto in_comment = false;
440-
auto in_string_literal = false;
461+
auto in_comment = false;
462+
auto in_string_literal = false;
463+
auto in_raw_string_literal = false;
464+
std::string raw_string_closing_seq;
465+
441466
auto brace_depth = std::vector<int>();
442467

443468
while (in.getline(&buf[0], max_line_len)) {
@@ -462,7 +487,7 @@ class source
462487
// Switch to cpp2 mode if we're not in a comment, not inside nested { },
463488
// and the line starts with "nonwhitespace :" but not "::"
464489
//
465-
if (!in_comment && std::ssize(brace_depth) == 0 && starts_with_identifier_colon(lines.back().text))
490+
if (!in_comment && !in_raw_string_literal && std::ssize(brace_depth) == 0 && starts_with_identifier_colon(lines.back().text))
466491
{
467492
cpp2_found= true;
468493

@@ -506,13 +531,18 @@ class source
506531
lines.back().text,
507532
in_comment,
508533
in_string_literal,
534+
in_raw_string_literal,
535+
raw_string_closing_seq,
509536
brace_depth,
510537
std::ssize(lines) - 1,
511538
errors
512539
);
513540
if (stats.all_comment_line) {
514541
lines.back().cat = source_line::category::comment;
515542
}
543+
else if (stats.all_rawstring_line) {
544+
lines.back().cat = source_line::category::rawstring;
545+
}
516546
else if (stats.empty_line) {
517547
lines.back().cat = source_line::category::empty;
518548
}

0 commit comments

Comments
 (0)