From a1d7e1f9a5a17d202e259a2fce9d0ca8a03a7a8e Mon Sep 17 00:00:00 2001 From: not-jakob Date: Tue, 19 May 2026 18:40:40 +0200 Subject: [PATCH 1/4] Fixed links not beeing highlightet correctly. Added escape character . Added alerts. Added [x] with a small x for task higlighting. Improved square bracket highlighting when using repeated square brackets. --- src/langs/markdown.jai | 156 ++++++++++++++++++++++++++++++----------- 1 file changed, 114 insertions(+), 42 deletions(-) diff --git a/src/langs/markdown.jai b/src/langs/markdown.jai index 33bd72bb0..e05cfb2dd 100644 --- a/src/langs/markdown.jai +++ b/src/langs/markdown.jai @@ -54,6 +54,7 @@ get_next_token :: (using tokenizer: *Markdown_Tokenizer) -> Token { } else if is_digit(char) { parse_number(tokenizer, *token); } else { + did_not_found_char := false; if char == { case #char "`"; parse_backtick(tokenizer, *token); case #char "~"; parse_tilde(tokenizer, *token); @@ -61,27 +62,39 @@ get_next_token :: (using tokenizer: *Markdown_Tokenizer) -> Token { case #char "["; parse_square_bracket(tokenizer, *token); case #char "!"; parse_exclamation_mark(tokenizer, *token); - + case #char "<"; parse_less_than(tokenizer, *token); + case #char "\\"; token.type = .default; t += 2; case; + did_not_found_char = true; + if !found_whitespace { + token.type = .default; + t += 1; + } + } + + // Only after any whitespace. + if did_not_found_char && found_whitespace { + if char == { + case #char "h"; parse_link(tokenizer, *token); + case #char "@"; parse_at(tokenizer, *token); + case #char "_"; parse_char_for_style(tokenizer, *token, char); + case; + if !found_new_line { + token.type = .default; + t += 1; + } + + // Only after a new line. if found_new_line { if char == { case #char "#"; parse_hash(tokenizer, *token); case #char ">"; parse_greater_than(tokenizer, *token); case #char "-"; parse_bullet_point(tokenizer, *token); case #char "+"; parse_bullet_point(tokenizer, *token); - case; token.type = .default; t += 1; + case; token.type = .default; t += 1; } - } else if found_whitespace { - if char == { - case #char "@"; parse_at(tokenizer, *token); - case #char "h"; parse_link(tokenizer, *token); - case #char "<"; parse_less_than(tokenizer, *token); - case #char "_"; parse_char_for_style(tokenizer, *token, char); - case; token.type = .default; t += 1; - } - } else { - token.type = .default; t += 1; } + } } } @@ -132,8 +145,36 @@ parse_hash :: (using tokenizer: *Markdown_Tokenizer, token: *Token) { } parse_greater_than :: (using tokenizer: *Markdown_Tokenizer, token: *Token) { - token.type = .operation; - t += 1; + identifier_str := read_identifier_string_tmp(tokenizer, stop_at_char = #char "\n", stop_at_white_space = false); + + found_allert := false; + end_of_next_line := false; + for alerts { + if begins_with(identifier_str, it.opening_tag) { + token.type = it.type; + found_allert = true; + + while t < max_t && is_white_space(t.*) { + if t.* == #char "\n" end_of_next_line = true; + t += 1; + } + + if t.* == #char ">" && !end_of_next_line { + while t < max_t { + if t.* == #char "\n" { + break; + } + + t += 1; + } + } + } + } + + if !found_allert { + token.type = .operation; + t = start_t + 1; + } } parse_bullet_point :: (using tokenizer: *Markdown_Tokenizer, token: *Token) { @@ -206,7 +247,7 @@ parse_task_list :: (using tokenizer: *Markdown_Tokenizer, token: *Token) { previous_t := t; identifier_str := read_identifier_string_tmp(tokenizer, stop_at_char = #char "\n", stop_at_white_space = false); - if begins_with(identifier_str, "[X]") token.type = .type; + if begins_with(identifier_str, "[X]") || begins_with(identifier_str, "[x]") token.type = .type; else if begins_with(identifier_str, "[ ]") token.type = .keyword; else { t = start_t; return; } @@ -215,27 +256,40 @@ parse_task_list :: (using tokenizer: *Markdown_Tokenizer, token: *Token) { parse_square_bracket :: (using tokenizer: *Markdown_Tokenizer, token: *Token) { token.type = .number; - add_link := false; - number_of_open_brackets := 1; t += 1; + number_of_opening_brackets_to_stop := 1; while t < max_t { - if number_of_open_brackets == 0 && t.* == #char "(" { - while t < max_t { - if t.* == #char "\n" return; - if t.* == #char ")" { t += 1; return; } - t +=1; - } + if t.* == #char "\n" { + t += 1; + break; + } else if t.* == #char "]" { + number_of_opening_brackets_to_stop -= 1; + } else if t.* == #char "[" { + number_of_opening_brackets_to_stop += 1; } - if t.* == #char "\n" return; - - else if t.* == #char "]" number_of_open_brackets -= 1; - else if t.* == #char "[" number_of_open_brackets += 1; + if number_of_opening_brackets_to_stop == 0 { + t += 1; + break; + } t += 1; } + eat_white_space(tokenizer, only_until_newline = true); + + if t.* == #char "(" { + t += 1; + while t < max_t { + if t.* == #char ")" || t.* == "\n" { + t += 1; + break; + } + + t += 1; + } + } } parse_exclamation_mark :: (using tokenizer: *Markdown_Tokenizer, token: *Token) { @@ -258,12 +312,9 @@ parse_link :: (using tokenizer: *Markdown_Tokenizer, token: *Token) { if begins_with(identifier_str, "https://") || begins_with(identifier_str, "http://") { token.type = .number; - while t < max_t { - if is_white_space(t.*) return; - t += 1; - } - } else token.type = .default; - + } else { + token.type = .default; + } } parse_less_than :: (using tokenizer: *Markdown_Tokenizer, token: *Token) { @@ -307,16 +358,26 @@ highlight_for_repeated_chars :: (char: u8, repeats: s64, using tokenizer: *Markd t += repeats; if t >= max_t return true; - while true { + while t < max_t { + if t.* == #char "\\" { + t += 2; + continue; + } + if check_repeated_chars(char, repeats, tokenizer) { t += repeats; break; } - if stop_at_new_paragraph && skip_white_space_and_look_for_next_paragraph(tokenizer) break; + if stop_at_new_paragraph { + is_new_paragraph := skip_white_space_and_look_for_next_paragraph(tokenizer); + + if is_new_paragraph { + break; + } + } t += 1; - if t >= max_t break; } return true; @@ -340,6 +401,7 @@ read_identifier_string_tmp :: (using tokenizer: *Markdown_Tokenizer, stop_at_cha } skip_white_space_and_look_for_next_paragraph :: (using tokenizer: *Markdown_Tokenizer) -> found_new_para: bool { + original_t := t; number_of_new_lines := 0; while t < max_t { if !is_white_space(t.*) break; @@ -347,6 +409,8 @@ skip_white_space_and_look_for_next_paragraph :: (using tokenizer: *Markdown_Toke if number_of_new_lines >= 2 return true; t += 1; } + + t = original_t; return false; } @@ -367,10 +431,18 @@ Tag :: struct { type: Token_Type; } -tags :: #run,stallable -> [] Tag { +tags, alerts :: #run,stallable -> [] Tag, [] Tag { tags: [..] Tag; - array_add(*tags, .{"", "", .number} ); - array_add(*tags, .{"", "", .number} ); - array_add(*tags, .{"", .comment} ); - return tags; -} + array_add(*tags, .{"", "", .number}); + array_add(*tags, .{"", "", .number}); + array_add(*tags, .{"", "", .number}); + array_add(*tags, .{"", .comment}); + + alerts: [..] Tag; + array_add(*alerts, .{"> [!NOTE]", "\n", .highlight}); + array_add(*alerts, .{"> [!TIP]", "\n", .type}); + array_add(*alerts, .{"> [!IMPORTANT]", "\n", .number}); + array_add(*alerts, .{"> [!WARNING]", "\n", .operation}); + array_add(*alerts, .{"> [!CAUTION]", "\n", .modifier}); + return tags, alerts; +}; \ No newline at end of file From bc88af424d033530b56e7cd149382e755a708665 Mon Sep 17 00:00:00 2001 From: not-jakob Date: Thu, 21 May 2026 19:46:57 +0200 Subject: [PATCH 2/4] Added alerts and escape character examples in color_preview_samples.jai for Markdown. --- src/widgets/color_preview_samples.jai | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/src/widgets/color_preview_samples.jai b/src/widgets/color_preview_samples.jai index e891fa2b2..c3a5930a6 100644 --- a/src/widgets/color_preview_samples.jai +++ b/src/widgets/color_preview_samples.jai @@ -1195,6 +1195,31 @@ This is a footnote reference[^1]. [^1]: This is the footnote text. +### Alerts + +> [!NOTE] +> Useful information that users should know, even when skimming content. + +> [!TIP] +> Helpful advice for doing things better or more easily. + +> [!IMPORTANT] +> Key information users need to know to achieve their goal. + +> [!WARNING] +> Urgent info that needs immediate user attention to avoid problems. + +> [!CAUTION] +> Advises about risks or negative outcomes of certain actions. + +### Escape character + +\# Escape heading + +\*This is also escaped\* + +`Escape character \` inside backtick highlighting` + MARKDOWN From 1a4e324cbcb5ebaa8fbfa2d5dda1e8043de7a748 Mon Sep 17 00:00:00 2001 From: not-jakob Date: Fri, 29 May 2026 18:57:48 +0200 Subject: [PATCH 3/4] Changed numbered list item to be highlightet only after a new line and if there is no number after the period. --- src/langs/markdown.jai | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/langs/markdown.jai b/src/langs/markdown.jai index e05cfb2dd..5c579733a 100644 --- a/src/langs/markdown.jai +++ b/src/langs/markdown.jai @@ -120,14 +120,14 @@ parse_number :: (using tokenizer: *Markdown_Tokenizer, token: *Token) { if t >= max_t return; } - if t.* == #char "." { + next_t := t + 1; + if t.* == #char "." && next_t < max_t && is_white_space(next_t.*) { token.type = .keyword; t += 1; maybe_do_task_list = true; } else { token.type = .default; } - } parse_hash :: (using tokenizer: *Markdown_Tokenizer, token: *Token) { From 58ccf2a6bcf9596ba8fc15c05387e7d6658ea0c4 Mon Sep 17 00:00:00 2001 From: not-jakob Date: Sun, 14 Jun 2026 17:59:48 +0200 Subject: [PATCH 4/4] Added horizontal rule and alternative heading highlighting. --- src/langs/markdown.jai | 188 +++++++++++++++++++------- src/widgets/color_preview_samples.jai | 5 +- 2 files changed, 146 insertions(+), 47 deletions(-) diff --git a/src/langs/markdown.jai b/src/langs/markdown.jai index 5c579733a..ac1973091 100644 --- a/src/langs/markdown.jai +++ b/src/langs/markdown.jai @@ -49,23 +49,24 @@ get_next_token :: (using tokenizer: *Markdown_Tokenizer) -> Token { start_t = t; char := t.*; - if maybe_do_task_list && !found_new_line { + + is_alternate_heading := parse_for_alternate_heading(tokenizer, *token); + + if !is_alternate_heading && !found_new_line && maybe_do_task_list { parse_task_list(tokenizer, *token); - } else if is_digit(char) { - parse_number(tokenizer, *token); - } else { - did_not_found_char := false; + } else if !is_alternate_heading { + did_not_find_char := false; if char == { case #char "`"; parse_backtick(tokenizer, *token); case #char "~"; parse_tilde(tokenizer, *token); - case #char "*"; parse_char_for_style(tokenizer, *token, char); + case #char "*"; parse_for_horizontal_rule_and_more(tokenizer, *token, char); case #char "["; parse_square_bracket(tokenizer, *token); case #char "!"; parse_exclamation_mark(tokenizer, *token); case #char "<"; parse_less_than(tokenizer, *token); case #char "\\"; token.type = .default; t += 2; case; - did_not_found_char = true; + did_not_find_char = true; if !found_whitespace { token.type = .default; t += 1; @@ -73,12 +74,12 @@ get_next_token :: (using tokenizer: *Markdown_Tokenizer) -> Token { } // Only after any whitespace. - if did_not_found_char && found_whitespace { + if did_not_find_char && found_whitespace { if char == { case #char "h"; parse_link(tokenizer, *token); case #char "@"; parse_at(tokenizer, *token); - case #char "_"; parse_char_for_style(tokenizer, *token, char); - case; + case #char "_"; parse_for_horizontal_rule_and_more(tokenizer, *token, char); + case; if !found_new_line { token.type = .default; t += 1; @@ -86,10 +87,12 @@ get_next_token :: (using tokenizer: *Markdown_Tokenizer) -> Token { // Only after a new line. if found_new_line { - if char == { + if is_digit(char) { + parse_number(tokenizer, *token); + } else if char == { case #char "#"; parse_hash(tokenizer, *token); case #char ">"; parse_greater_than(tokenizer, *token); - case #char "-"; parse_bullet_point(tokenizer, *token); + case #char "-"; parse_for_horizontal_rule_and_more(tokenizer, *token, char); case #char "+"; parse_bullet_point(tokenizer, *token); case; token.type = .default; t += 1; } @@ -106,6 +109,38 @@ get_next_token :: (using tokenizer: *Markdown_Tokenizer) -> Token { return token; } +parse_for_alternate_heading :: (using tokenizer: *Markdown_Tokenizer, token: *Token) -> bool { + old_t := t; + + while t < max_t { + if t.* == #char "\n" { + t += 1; + break; + } + t += 1; + } + + eat_white_space(tokenizer, only_until_newline = true); + + is_double := check_repeated_chars(#char "=" , 2, tokenizer) || check_repeated_chars(#char "-" , 2, tokenizer); + + if is_double { + token.type = .type; + while t < max_t { + if t.* == #char "\n" { + break; + } + + t += 1; + } + + return true; + } + + t = old_t; + return false; +} + parse_at :: (using tokenizer: *Markdown_Tokenizer, token: *Token) { token.type = .string_literal; while t < max_t && !is_white_space(t.*) { @@ -147,12 +182,12 @@ parse_hash :: (using tokenizer: *Markdown_Tokenizer, token: *Token) { parse_greater_than :: (using tokenizer: *Markdown_Tokenizer, token: *Token) { identifier_str := read_identifier_string_tmp(tokenizer, stop_at_char = #char "\n", stop_at_white_space = false); - found_allert := false; + found_alert := false; end_of_next_line := false; for alerts { if begins_with(identifier_str, it.opening_tag) { token.type = it.type; - found_allert = true; + found_alert = true; while t < max_t && is_white_space(t.*) { if t.* == #char "\n" end_of_next_line = true; @@ -171,49 +206,78 @@ parse_greater_than :: (using tokenizer: *Markdown_Tokenizer, token: *Token) { } } - if !found_allert { + if !found_alert { token.type = .operation; t = start_t + 1; } } -parse_bullet_point :: (using tokenizer: *Markdown_Tokenizer, token: *Token) { - next_t := t + 1; - if next_t < max_t && is_white_space(next_t.*) { - token.type = .keyword; - - if next_t < max_t && next_t.* == #char " " { - maybe_do_task_list = true; +parse_for_horizontal_rule_and_more :: (using tokenizer: *Markdown_Tokenizer, token: *Token, char: u8) { + is_triple := check_repeated_chars(char, 3, tokenizer); + + parsed_horizontal_rule := false; + if found_new_line && is_triple { + + next_t := t + 3; + while next_t < max_t { + is_white_space_except_newline := (is_white_space(next_t.*) && next_t.* != #char "\n"); + if is_white_space_except_newline || next_t.* == char { + next_t += 1; + continue; + } else if next_t.* == #char "\n" { + t = next_t; + parsed_horizontal_rule = true; + break; + } else { + break; + } } + } + + if parsed_horizontal_rule { + token.type = .modifier; + } else if char == #char "*" { + is_bullet_point := parse_bullet_point(tokenizer, token); + + if !is_bullet_point { + t -= 1; + parse_for_style(tokenizer, token, char); + } + } else if char == #char "_" { + parse_for_style(tokenizer, token, char); + } else if char == #char "-" { + parse_bullet_point(tokenizer, token); } - - else token.type = .default; - t += 1; } -parse_char_for_style :: (using tokenizer: *Markdown_Tokenizer, token: *Token, char: u8) { - // Bullet point. Same as minus. +parse_bullet_point :: (using tokenizer: *Markdown_Tokenizer, token: *Token) -> bool { next_t := t + 1; - is_bullet_point := next_t < max_t && is_white_space(next_t.*); - - if found_new_line && is_bullet_point { + if next_t < max_t && is_white_space(next_t.*) { token.type = .keyword; + + maybe_do_task_list = true; t += 1; - return; - } + return true; + } + + token.type = .default; + t += 1; + return false; +} +parse_for_style :: (using tokenizer: *Markdown_Tokenizer, token: *Token, char: u8) { // Bold, italic and bold+italic. token.type = .operation; - is_triple := highlight_for_repeated_chars(char, 3, tokenizer); + is_triple := highlight_for_repeated_chars(char, 3, tokenizer, token); is_double := true; if !is_triple { - is_double = highlight_for_repeated_chars(char, 2, tokenizer); + is_double = highlight_for_repeated_chars(char, 2, tokenizer, token); } if !is_double { - highlight_for_repeated_chars(char, 1, tokenizer); + highlight_for_repeated_chars(char, 1, tokenizer, token); } } @@ -221,7 +285,7 @@ parse_tilde :: (using tokenizer: *Markdown_Tokenizer, token: *Token) { next_t := t + 1; if next_t < max_t && next_t.* == #char "~" { token.type = .operation; - highlight_for_repeated_chars(#char "~", 2, tokenizer); + highlight_for_repeated_chars(#char "~", 2, tokenizer, token); } else { token.type = .default; t += 1; @@ -232,10 +296,10 @@ parse_backtick :: (using tokenizer: *Markdown_Tokenizer, token: *Token) { token.type = .function; // ```Code Block``` - is_triple := highlight_for_repeated_chars(#char "`", 3, tokenizer, stop_at_new_paragraph = false); + is_triple := highlight_for_repeated_chars(#char "`", 3, tokenizer, token, stop_at_new_paragraph = false); // `Code line` - if !is_triple highlight_for_repeated_chars(#char "`", 1, tokenizer); + if !is_triple highlight_for_repeated_chars(#char "`", 1, tokenizer, token); } parse_task_list :: (using tokenizer: *Markdown_Tokenizer, token: *Token) { @@ -256,13 +320,15 @@ parse_task_list :: (using tokenizer: *Markdown_Tokenizer, token: *Token) { parse_square_bracket :: (using tokenizer: *Markdown_Tokenizer, token: *Token) { token.type = .number; + old_t := t; t += 1; number_of_opening_brackets_to_stop := 1; while t < max_t { if t.* == #char "\n" { - t += 1; - break; + t = old_t + 1; + token.type = .default; + return; } else if t.* == #char "]" { number_of_opening_brackets_to_stop -= 1; } else if t.* == #char "[" { @@ -282,9 +348,11 @@ parse_square_bracket :: (using tokenizer: *Markdown_Tokenizer, token: *Token) { if t.* == #char "(" { t += 1; while t < max_t { - if t.* == #char ")" || t.* == "\n" { + if t.* == #char ")" { t += 1; break; + } else if t.* == "\n" { + break; } t += 1; @@ -344,19 +412,44 @@ parse_less_than :: (using tokenizer: *Markdown_Tokenizer, token: *Token) { } check_repeated_chars :: (char: u8, repeats: s64, using tokenizer: *Markdown_Tokenizer) -> bool { + if t + repeats > max_t return false; + + next_t: *u8; for i:0..repeats-1 { - next_t := t+i; + next_t = t+i; if next_t < max_t && next_t.* != char return false; } + return true; } -highlight_for_repeated_chars :: (char: u8, repeats: s64, using tokenizer: *Markdown_Tokenizer, stop_at_new_paragraph := true) -> bool { +parse_for_repeated_chars_on_new_line :: (char: u8, repeats: s64, using tokenizer: *Markdown_Tokenizer) -> bool { + found_chars := check_repeated_chars(char, repeats, tokenizer); + + next_t := t + repeats; + if found_chars { + while next_t < max_t { + if is_white_space(next_t.*) && next_t.* != #char "\n" { + next_t += 1; + continue; + } else if next_t.* == #char "\n" { + t = next_t; + return true; + } else { + return false; + } + } + } + + return false; +} + +highlight_for_repeated_chars :: (char: u8, repeats: s64, using tokenizer: *Markdown_Tokenizer, token: *Token, stop_at_new_paragraph := true) -> bool { is_repeated := check_repeated_chars(char, repeats, tokenizer); if !is_repeated return false; t += repeats; - if t >= max_t return true; + old_t := t; while t < max_t { if t.* == #char "\\" { @@ -366,7 +459,7 @@ highlight_for_repeated_chars :: (char: u8, repeats: s64, using tokenizer: *Markd if check_repeated_chars(char, repeats, tokenizer) { t += repeats; - break; + return true; } if stop_at_new_paragraph { @@ -379,8 +472,11 @@ highlight_for_repeated_chars :: (char: u8, repeats: s64, using tokenizer: *Markd t += 1; } + + t = old_t; + token.type = .default; - return true; + return false; } read_identifier_string_tmp :: (using tokenizer: *Markdown_Tokenizer, stop_at_char := #char " ", stop_at_white_space := true) -> string /* temp */ { diff --git a/src/widgets/color_preview_samples.jai b/src/widgets/color_preview_samples.jai index c3a5930a6..7e04fe43c 100644 --- a/src/widgets/color_preview_samples.jai +++ b/src/widgets/color_preview_samples.jai @@ -1147,6 +1147,9 @@ SAMPLE_Markdown :: #string MARKDOWN ## Heading Level 2 +Alternate Heading +================= + This is a paragraph with **bold text**, *italic text*, and `inline code`. > This is a blockquote. @@ -1214,7 +1217,7 @@ This is a footnote reference[^1]. ### Escape character -\# Escape heading +\# Escape heading \*This is also escaped\*