diff --git a/Applications/Internet/Cyberia.app/Cyberia.HC b/Applications/Internet/Cyberia.app/Cyberia.HC new file mode 100644 index 0000000..3502623 --- /dev/null +++ b/Applications/Internet/Cyberia.app/Cyberia.HC @@ -0,0 +1,410 @@ +JsonObject* cyberia = Json.CreateObject(Fs); + +Window* win = NULL; +Context2DWidget* controlsbackdrop1 = NULL; +TextLabelWidget* status1 = NULL; +TextLabelWidget* status2 = NULL; +Context2DWidget* statusbackdrop1 = NULL; +VerticalScrollBarWidget* vscroll1 = NULL; + +U8* previous_hovered_href = NULL; + +ButtonWidget* backbtn1 = NULL; +ButtonWidget* fwdbtn1 = NULL; +ButtonWidget* refreshbtn1 = NULL; +Context2DWidget* background1 = NULL; +TextInputWidget* addressbar1 = NULL; + +@window_widgets_list* widgets_base = NULL; +@html_dom_node* node_list = NULL; + +I64 old_window_width = -1; +I64 old_window_height = -1; + +class @browser +{ + HtmlRenderer* renderer; + JsonArray* bookmarks; + JsonArray* history; + JsonObject* javascript_link_handlers; + CTask* task; + U8* fetch_buffer; + U8* lazyload_buffer; + U8* lazyload_timeout_buffer; + U8* go_to_url_string; + U8* search_query; +}; + +@browser* browser = CAlloc(sizeof(@browser)); +browser->renderer = CAlloc(sizeof(HtmlRenderer)); +browser->task = Fs; +browser->history = Json.CreateArray(Fs); +browser->fetch_buffer = CAlloc(HTTP_FETCH_BUFFER_SIZE); +browser->go_to_url_string = NULL; +browser->javascript_link_handlers = Json.CreateObject(Fs); +browser->lazyload_buffer = CAlloc(HTTP_FETCH_BUFFER_SIZE); +browser->lazyload_timeout_buffer = CAlloc(HTTP_FETCH_BUFFER_SIZE); + +U0 @cyberia_win_close(Window* win) +{ + // Free everything + if (win == Compositor.active_win) { + Gui.Window.SetFocus(Compositor.GetWindowByTitle("Wallpaper")); + } + Compositor.UnregisterForGlobalInputEvents(win); + Compositor.DestroyWindow(win); +} + +U8* @browser_tls_connection_state(@http_response* resp) +{ + if (!resp || !resp->s || !resp->s->ctx) + return NULL; + + I64 connect_state = @tls_connection_status(resp->s->ctx); + switch (connect_state) { + case 0: + return "TLS: Sent Client Hello"; + case 1: + return "TLS: Parsed Server Hello"; + case 2: + return "TLS: Key Share"; + case 0xff: + return "TLS: Finished"; + default: + return ""; + } +} +extern U0 @cyberia_navigate(); + +U0 @cyberia_link_clicked(Widget* widget) +{ + @html_dom_node* node = @self_or_ancestor_matches_tag_name(widget->data, "a"); + if (!node) + return; + U8* unresolved_href = node->attributes->@("href"); + if (!unresolved_href) + return; + U8* resolved_href = @resolve_href(browser->renderer, unresolved_href); + if (!resolved_href) + return; + StrCpy(&addressbar1->text, resolved_href); + Free(resolved_href); + Spawn(&@cyberia_navigate); +} + +U0 @cyberia_refresh_clicked() +{ + Spawn(&@cyberia_navigate); +} + +U0 @cyberia_navigate() +{ + win->focused_widget = NULL; + + if (!StrLen(&addressbar1->text)) { + return; + } + + if (MemCmp(&addressbar1->text, "http://", 7) && MemCmp(&addressbar1->text, "https://", 8)) { + U8 prepend_buf[512]; + StrPrint(prepend_buf, "https://%s", &addressbar1->text); + StrCpy(&addressbar1->text, prepend_buf); + } + + U8* url_string = StrNew(&addressbar1->text); + if (!url_string || !browser || !browser->task) + return; + + HtmlRenderer* renderer = browser->renderer; + MemSet(renderer, 0, sizeof(HtmlRenderer)); + widgets_base->next = NULL; + renderer->images = NULL; + renderer->link_pointer = Compositor.theme.pointer.link; + renderer->link_callback = &@cyberia_link_clicked; + renderer->widgets_base = widgets_base; + renderer->status_widget = status1; + renderer->background_widget = background1; + renderer->vertical_scroll_widget = vscroll1; + renderer->win = win; + + renderer->current_url_string = StrNew(url_string, browser->task); + renderer->current_url = @http_parse_url(url_string); + renderer->cache_directory = HTTP_CACHE_DIRECTORY; + renderer->task = browser->task; + + U8 err_msg_buffer[128]; + U8 status_text_buffer[1024]; + + if (!renderer->current_url) { + StrCpy(err_msg_buffer, "ERROR: Could not parse URL"); + MessageBox.Error(err_msg_buffer); + Free(url_string); + return; + } + + if (!@is_supported_url_scheme(renderer->current_url)) { + StrPrint(err_msg_buffer, "ERROR: Unsupported URL scheme: %s", renderer->current_url->scheme); + MessageBox.Error(err_msg_buffer); + Free(url_string); + return; + } + + HttpUrl* url = renderer->current_url; + + Bool is_alternate_port = FALSE; + if (!StrICmp(url->scheme, "http://") && url->port != 80) + is_alternate_port = TRUE; + if (!StrICmp(url->scheme, "https://") && url->port != 443) + is_alternate_port = TRUE; + StrCpy(status_text_buffer, "Fetching "); + if (is_alternate_port) + String.Append(status_text_buffer, "%s%s:%d%s%s", url->scheme, url->host, url->port, url->path, url->query); + else + String.Append(status_text_buffer, "%s%s%s%s", url->scheme, url->host, url->path, url->query); + String.Append(status_text_buffer, "..."); + status1->SetText(status_text_buffer); + + U8* buffer = browser->fetch_buffer; + MemSet(buffer, 0, HTTP_FETCH_BUFFER_SIZE); + + @http_response* resp = Http.Get(renderer->current_url, buffer); + while (resp->state != HTTP_STATE_DONE) { + if (resp->state >= HTTP_STATE_HEADERS_RECEIVED) { + StrPrint(status_text_buffer, "Received %d bytes", resp->body.length); + status1->SetText(status_text_buffer); + } else { + if (@http_scheme_is_https(renderer->current_url)) { + if (@browser_tls_connection_state(resp)) { + StrPrint(status_text_buffer, "%s", @browser_tls_connection_state(resp)); + status1->SetText(status_text_buffer); + } + } + } + Sleep(1); + } + + if (resp->status.code == 301 || resp->status.code == 302) { + StrCpy(&addressbar1->text, resp->headers->@("Location")); + @cyberia_navigate; + return; + } + + // Create node tree + I64 images_count = 0; + node_list = @html_tokenize_and_create_node_list(resp->body.data, resp->body.length, renderer->task, &images_count); + + // Create empty CSS rules array, traverse node tree and populate CSS rules array + renderer->css_rules = Json.CreateArray(renderer->task); + renderer->forms = Json.CreateArray(renderer->task); + @process_css_rules_from_node_list(node_list, renderer); + + // // Add custom CSS rules + // @process_custom_css_rules(renderer); + + // background1->ctx->fill(Color(255, 255, 255)); + + status1->SetText("Rendering page..."); + @render_node_list(node_list, renderer); + + @window_widgets_list* append = renderer->widgets_base; + while (append->next) { + append = append->next; + } + append->next = CAlloc(sizeof(@window_widgets_list)); + append->next->widget = controlsbackdrop1; + append = append->next; + append->next = CAlloc(sizeof(@window_widgets_list)); + append->next->widget = backbtn1; + append = append->next; + append->next = CAlloc(sizeof(@window_widgets_list)); + append->next->widget = fwdbtn1; + append = append->next; + append->next = CAlloc(sizeof(@window_widgets_list)); + append->next->widget = refreshbtn1; + append = append->next; + append->next = CAlloc(sizeof(@window_widgets_list)); + append->next->widget = addressbar1; + append = append->next; + append->next = CAlloc(sizeof(@window_widgets_list)); + append->next->widget = statusbackdrop1; + append = append->next; + append->next = CAlloc(sizeof(@window_widgets_list)); + append->next->widget = status1; + append = append->next; + append->next = CAlloc(sizeof(@window_widgets_list)); + append->next->widget = status2; + append = append->next; + append->next = CAlloc(sizeof(@window_widgets_list)); + append->next->widget = vscroll1; + vscroll1->scroll = 0; + + @reflow_node_list(renderer); + @fetch_images_for_page(renderer); + + status1->SetText("Done"); +} + +U0 @cyberia_win_keypress(Window* w, I64) +{ + if (w->focused_widget == addressbar1 && KeyDown(SC_ENTER)) { + Spawn(&@cyberia_navigate); + } +} + +U0 @cyberia_new_tab() +{ + // JsonObject* new_tab = Json.CreateObject(Fs); + // cyberia->a("tabs")->append(new_tab); +} + +U0 @cyberia_win_repaint(Window*) +{ + if (!win || !addressbar1 || !background1 || !vscroll1 || !status1 || !statusbackdrop1) + return; + + addressbar1->width = win->width - 110; + background1->width = 0; + background1->height = 0; + background1->ctx->width = win->width; + background1->ctx->height = win->height - 84; + + vscroll1->x = win->width; + vscroll1->y = background1->y; + vscroll1->width = 16; + vscroll1->height = background1->ctx->height; + + if (StrLen(&status2->text)) { + status1->y = win->height; + status2->y = win->height - 40; + } else { + status1->y = win->height - 40; + status2->y = win->height; + } + + statusbackdrop1->y = background1->y + background1->ctx->height; + + if (!browser || !browser->renderer || !widgets_base) + return; + + if (widgets_base->next && (old_window_width != win->width || old_window_height != win->height)) { + @reflow_node_list(browser->renderer); + old_window_width = win->width; + old_window_height = win->height; + } + + // vscroll1->length = ToI64(browser->renderer->render_y / vscroll1->height); + // "render_y: %d\n", browser->renderer->render_y; + if (browser->renderer->render_y > win->height) { + vscroll1->x = win->width - 25; + vscroll1->length = ToI64((vscroll1->height - 32) / (browser->renderer->render_y / (vscroll1->height - 32))); + // vscroll1->length = vscroll1->height / (browser->renderer->render_y / vscroll1->height); + } +} + +U0 @cyberia_unset_status_text() +{ + StrCpy(&status2->text, ""); + previous_hovered_href = NULL; +} + +U0 @cyberia_win_mouseat(Window*) +{ + + if (!win->hovered_widget || !win->hovered_widget->pointer) { + @cyberia_unset_status_text; + return; + } + @html_dom_node* node = @self_or_ancestor_matches_tag_name(win->hovered_widget->data, "a"); + if (!node) + return; + U8* unresolved_href = node->attributes->@("href"); + if (!unresolved_href || previous_hovered_href == unresolved_href) + return; + previous_hovered_href = unresolved_href; + U8* resolved_href = @resolve_href(browser->renderer, unresolved_href); + if (!resolved_href) + return; + + StrCpy(&status2->text, resolved_href); + Free(resolved_href); +} + +U0 @cyberia_vscroll_change(Widget*) +{ + if (!browser || !browser->renderer) + return; + + @reflow_node_list(browser->renderer); +} + +U0 @cyberia_init() +{ + win = Compositor.CreateWindow(24, 24, 992, 768, WIN_FLAGS_DEFAULT); + // win->explicit_repaint = TRUE; + Gui.Window.SetCallback(win, "close", &@cyberia_win_close); + Gui.Window.SetCallback(win, "repaint", &@cyberia_win_repaint); + Gui.Window.SetCallback(win, "mouseat", &@cyberia_win_mouseat); + Gui.Window.SetIcon(win, Image.FileToContext2D("M:/Applications/Internet/Cyberia.app/lain.png")); + Gui.Window.Center(win); + Gui.Window.SetFocus(win); + + controlsbackdrop1 = Gui.CreateWidget(win, WIDGET_TYPE_CONTEXT2D, -14, 0, 0, 0); + controlsbackdrop1->y = -14; + controlsbackdrop1->ctx = NewContext2D(Display.Width(), 36 + 14)->fill(Color(204, 204, 204)); + + statusbackdrop1 = Gui.CreateWidget(win, WIDGET_TYPE_CONTEXT2D, 0, 0, Display.Width(), 48); + statusbackdrop1->ctx = NewContext2D(Display.Width(), 48)->fill(Color(204, 204, 204)); + + status1 = Gui.CreateWidget(win, WIDGET_TYPE_LABEL, 0, 0, 320, 16); + Gui.Widget.SetFont(status1, "Eight Bit Dragon"); + Gui.Widget.SetText(status1, "Idle"); + + status2 = Gui.CreateWidget(win, WIDGET_TYPE_LABEL, 0, 0, 320, 16); + Gui.Widget.SetFont(status2, "Eight Bit Dragon"); + Gui.Widget.SetText(status2, "Idle"); + + backbtn1 = Gui.CreateWidget(win, WIDGET_TYPE_BUTTON, 0, 0, 24, 24); + Gui.Widget.SetText(backbtn1, ""); + backbtn1->image = @image_file_to_context2d("M:/Media/Themes/Umami/Icon/actions/back.png"); + backbtn1->width = backbtn1->image->width + 8; + + fwdbtn1 = Gui.CreateWidget(win, WIDGET_TYPE_BUTTON, 33, 0, 24, 24); + Gui.Widget.SetText(fwdbtn1, ""); + fwdbtn1->image = @image_file_to_context2d("M:/Media/Themes/Umami/Icon/actions/forward.png"); + fwdbtn1->width = fwdbtn1->image->width + 8; + + refreshbtn1 = Gui.CreateWidget(win, WIDGET_TYPE_BUTTON, 66, 0, 24, 24); + Gui.Widget.SetText(refreshbtn1, ""); + Gui.Widget.SetCallback(refreshbtn1, "clicked", &@cyberia_refresh_clicked); + refreshbtn1->image = @image_file_to_context2d("M:/Media/Themes/Umami/Icon/actions/reload.png"); + refreshbtn1->width = refreshbtn1->image->width + 8; + + background1 = Gui.CreateWidget(win, WIDGET_TYPE_CONTEXT2D, 0, 36, 0, 0); + background1->ctx = NewContext2D(Display.Width(), Display.Height()); + background1->ctx->fill(Color(255, 255, 255)); + + vscroll1 = Gui.CreateWidget(win, WIDGET_TYPE_VERT_SCROLLBAR, -99999, -99999, 0, 0); + Gui.Widget.SetCallback(vscroll1, "change", &@cyberia_vscroll_change); + + addressbar1 = Gui.CreateWidget(win, WIDGET_TYPE_INPUT, 99, 6, 320, 16); + Gui.Widget.SetFont(addressbar1, "Eight Bit Dragon"); + Gui.Window.SetCallback(win, "keypress", &@cyberia_win_keypress); + widgets_base = win->widget; + + while (widgets_base->next) { + widgets_base = widgets_base->next; + } + + win->focused_widget = addressbar1; + @cyberia_win_repaint(win); +} + +@cyberia_init; + +U0 Main() +{ + while (1) + Sleep(1); +} + +Main; diff --git a/Applications/Internet/Cyberia.app/Run.HC b/Applications/Internet/Cyberia.app/Run.HC new file mode 100644 index 0000000..2afba1c --- /dev/null +++ b/Applications/Internet/Cyberia.app/Run.HC @@ -0,0 +1,3 @@ +Gui.App(); + +#include "Cyberia"; diff --git a/Applications/Internet/Cyberia.app/lain.png b/Applications/Internet/Cyberia.app/lain.png new file mode 100644 index 0000000..7c816e0 Binary files /dev/null and b/Applications/Internet/Cyberia.app/lain.png differ diff --git a/Applications/Internet/Icon.png b/Applications/Internet/Icon.png new file mode 100644 index 0000000..636d20f Binary files /dev/null and b/Applications/Internet/Icon.png differ diff --git a/Settings/SystemMenu.json b/Settings/SystemMenu.json index 0c42efb..e78501a 100644 --- a/Settings/SystemMenu.json +++ b/Settings/SystemMenu.json @@ -20,6 +20,17 @@ ], "icon": "M:/Applications/Accessories/Icon.png" }, + { + "name": "Internet", + "items": [ + { + "name": "Cyberia", + "path": "M:/Applications/Internet/Cyberia.app", + "icon": "M:/Applications/Internet/Cyberia.app/lain.png" + } + ], + "icon": "M:/Applications/Internet/Icon.png" + }, { "name": "System", "items": [ diff --git a/System/Libraries/Css/CustomRules.json b/System/Libraries/Css/CustomRules.json new file mode 100644 index 0000000..0967ef4 --- /dev/null +++ b/System/Libraries/Css/CustomRules.json @@ -0,0 +1 @@ +{} diff --git a/System/Libraries/Css/NamedColors.json b/System/Libraries/Css/NamedColors.json new file mode 100644 index 0000000..a27f0ee --- /dev/null +++ b/System/Libraries/Css/NamedColors.json @@ -0,0 +1 @@ +{"aliceblue":"#f0f8ff","antiquewhite":"#faebd7","aqua":"#00ffff","aquamarine":"#7fffd4","azure":"#f0ffff","beige":"#f5f5dc","bisque":"#ffe4c4","black":"#000000","blanchedalmond":"#ffebcd","blue":"#0000ff","blueviolet":"#8a2be2","brown":"#a52a2a","burlywood":"#deb887","cadetblue":"#5f9ea0","chartreuse":"#7fff00","chocolate":"#d2691e","coral":"#ff7f50","cornflowerblue":"#6495ed","cornsilk":"#fff8dc","crimson":"#dc143c","cyan":"#00ffff","darkblue":"#00008b","darkcyan":"#008b8b","darkgoldenrod":"#b8860b","darkgray":"#a9a9a9","darkgreen":"#006400","darkgrey":"#a9a9a9","darkkhaki":"#bdb76b","darkmagenta":"#8b008b","darkolivegreen":"#556b2f","darkorange":"#ff8c00","darkorchid":"#9932cc","darkred":"#8b0000","darksalmon":"#e9967a","darkseagreen":"#8fbc8f","darkslateblue":"#483d8b","darkslategray":"#2f4f4f","darkslategrey":"#2f4f4f","darkturquoise":"#00ced1","darkviolet":"#9400d3","deeppink":"#ff1493","deepskyblue":"#00bfff","dimgray":"#696969","dimgrey":"#696969","dodgerblue":"#1e90ff","firebrick":"#b22222","floralwhite":"#fffaf0","forestgreen":"#228b22","fuchsia":"#ff00ff","gainsboro":"#dcdcdc","ghostwhite":"#f8f8ff","gold":"#ffd700","goldenrod":"#daa520","gray":"#808080","green":"#008000","greenyellow":"#adff2f","grey":"#808080","honeydew":"#f0fff0","hotpink":"#ff69b4","indianred":"#cd5c5c","indigo":"#4b0082","ivory":"#fffff0","khaki":"#f0e68c","lavender":"#e6e6fa","lavenderblush":"#fff0f5","lawngreen":"#7cfc00","lemonchiffon":"#fffacd","lightblue":"#add8e6","lightcoral":"#f08080","lightcyan":"#e0ffff","lightgoldenrodyellow":"#fafad2","lightgray":"#d3d3d3","lightgreen":"#90ee90","lightgrey":"#d3d3d3","lightpink":"#ffb6c1","lightsalmon":"#ffa07a","lightseagreen":"#20b2aa","lightskyblue":"#87cefa","lightslategray":"#778899","lightslategrey":"#778899","lightsteelblue":"#b0c4de","lightyellow":"#ffffe0","lime":"#00ff00","limegreen":"#32cd32","linen":"#faf0e6","magenta":"#ff00ff","maroon":"#800000","mediumaquamarine":"#66cdaa","mediumblue":"#0000cd","mediumorchid":"#ba55d3","mediumpurple":"#9370db","mediumseagreen":"#3cb371","mediumslateblue":"#7b68ee","mediumspringgreen":"#00fa9a","mediumturquoise":"#48d1cc","mediumvioletred":"#c71585","midnightblue":"#191970","mintcream":"#f5fffa","mistyrose":"#ffe4e1","moccasin":"#ffe4b5","navajowhite":"#ffdead","navy":"#000080","oldlace":"#fdf5e6","olive":"#808000","olivedrab":"#6b8e23","orange":"#ffa500","orangered":"#ff4500","orchid":"#da70d6","palegoldenrod":"#eee8aa","palegreen":"#98fb98","paleturquoise":"#afeeee","palevioletred":"#db7093","papayawhip":"#ffefd5","peachpuff":"#ffdab9","peru":"#cd853f","pink":"#ffc0cb","plum":"#dda0dd","powderblue":"#b0e0e6","purple":"#800080","rebeccapurple":"#663399","red":"#ff0000","rosybrown":"#bc8f8f","royalblue":"#4169e1","saddlebrown":"#8b4513","salmon":"#fa8072","sandybrown":"#f4a460","seagreen":"#2e8b57","seashell":"#fff5ee","sienna":"#a0522d","silver":"#c0c0c0","skyblue":"#87ceeb","slateblue":"#6a5acd","slategray":"#708090","slategrey":"#708090","snow":"#fffafa","springgreen":"#00ff7f","steelblue":"#4682b4","tan":"#d2b48c","teal":"#008080","thistle":"#d8bfd8","tomato":"#ff6347","turquoise":"#40e0d0","violet":"#ee82ee","wheat":"#f5deb3","white":"#ffffff","whitesmoke":"#f5f5f5","yellow":"#ffff00","yellowgreen":"#9acd32"} \ No newline at end of file diff --git a/System/Libraries/Css/Tokenizer.HC b/System/Libraries/Css/Tokenizer.HC new file mode 100644 index 0000000..9c5aa90 --- /dev/null +++ b/System/Libraries/Css/Tokenizer.HC @@ -0,0 +1,239 @@ +#define CSS_TEXT_ALIGN_CENTER 1 +#define CSS_TEXT_ALIGN_RIGHT 2 + +#define CSS_TOKENIZER_STATE_CONSUME_MATCH 0 +#define CSS_TOKENIZER_STATE_CONSUME_PROPERTY 1 +#define CSS_TOKENIZER_STATE_CONSUME_VALUE 2 +#define CSS_TOKENIZER_SKIP_AT_RULE 3 +#define CSS_TOKENIZER_SKIP_COMMENT 4 + +U8* @css_named_colors_buffer = FileRead("M:/System/Libraries/Css/NamedColors.json"); +JsonObject* @css_named_colors = Json.Parse(@css_named_colors_buffer, erythros_mem_task); +Free(@css_named_colors_buffer); + +class @css_tokenizer +{ + U8* buffer; + I64 pos; + I64 size; + I64 state; + I64 previous_state; + CFifoU8* match_fifo; + CFifoU8* property_fifo; + CFifoU8* value_fifo; + JsonObject* current_rule; + JsonArray* current_values; + CTask* mem_task; +}; + +// U8* @custom_css_rules_buffer = FileRead("M:/System/Libraries/Css/CustomRules.json"); +JsonObject* @custom_css_rules = Json.Parse("{}", erythros_mem_task); +// Free(@custom_css_rules_buffer); + +U0 @css_init_current_values(@css_tokenizer* t) +{ + t->current_values = Json.CreateArray(erythros_mem_task); +} + +U0 @css_init_current_rule(@css_tokenizer* t) +{ + t->current_rule = Json.CreateObject(erythros_mem_task); + t->current_rule->set("matches", Json.CreateArray(erythros_mem_task), JSON_ARRAY); + t->current_rule->set("properties", Json.CreateObject(erythros_mem_task), JSON_OBJECT); +} + +U0 @css_init_tokenizer(@css_tokenizer* t, U8* buffer, I64 size, CTask* mem_task = NULL) +{ + t->buffer = buffer; + t->pos = 0; + t->size = size; + t->state = CSS_TOKENIZER_STATE_CONSUME_MATCH; + t->match_fifo = FifoU8New(1024); + t->mem_task = mem_task; + t->property_fifo = FifoU8New(1024); + t->value_fifo = FifoU8New(1024); + @css_init_current_rule(t); +} + +Bool @css_try_append_match(@css_tokenizer* t) +{ + U8* match; + if (FifoU8Cnt(t->match_fifo)) { + match = @json_string_from_fifo(t->match_fifo, Fs); + t->current_rule->a("matches")->append(match); + return TRUE; + } + return FALSE; +} + +Bool @css_try_append_value(@css_tokenizer* t) +{ + U8* value; + if (FifoU8Cnt(t->value_fifo)) { + value = @json_string_from_fifo(t->value_fifo, Fs); + t->current_values->append(value); + return TRUE; + } + return FALSE; +} + +Bool @css_try_set_property(@css_tokenizer* t) +{ + U8* property; + if (FifoU8Cnt(t->property_fifo)) { + property = CAlloc(FifoU8Cnt(t->property_fifo) + 1, t->mem_task); + while (FifoU8Cnt(t->property_fifo)) + FifoU8Rem(t->property_fifo, property + StrLen(property)); + t->current_rule->o("properties")->set(property, t->current_values, JSON_ARRAY); + return TRUE; + } + return FALSE; +} + +U0 @css_tokenize_and_create_rules_from_buffer(JsonArray* rules, U8* buffer, I64 size, CTask* mem_task = NULL) +{ + @css_tokenizer t; + @css_init_tokenizer(&t, buffer, size, mem_task); + I64 brace_depth = 0; + JsonItem* item; + while (t.pos < t.size) { + I64 token = t.buffer[t.pos]; + switch (t.state) { + case CSS_TOKENIZER_SKIP_COMMENT: + if (token == '*' && t.buffer[t.pos + 1] == '/') { + ++t.pos; + t.state = t.previous_state; + goto @css_tokenizer_continue; + } + break; + case CSS_TOKENIZER_SKIP_AT_RULE: + switch (token) { + case '{': + brace_depth++; + break; + case '}': + brace_depth--; + if (brace_depth <= 0) { + t.state = CSS_TOKENIZER_STATE_CONSUME_MATCH; + goto @css_tokenizer_continue; + } + break; + default: + break; + } + break; + case CSS_TOKENIZER_STATE_CONSUME_VALUE: + switch (token) { + case '/': + if (t.buffer[t.pos + 1] == '*') { + ++t.pos; + t.previous_state = t.state; + t.state = CSS_TOKENIZER_SKIP_COMMENT; + goto @css_tokenizer_continue; + } + case ' ': + case '\t': + case '\r': + case '\n': + if (FifoU8Cnt(t.value_fifo)) + @css_try_append_value(&t); + break; + case '}': + @css_try_append_value(&t); + if (FifoU8Cnt(t.property_fifo)) + @css_try_set_property(&t); + if (t.current_rule->a("matches")->length) { + rules->append(t.current_rule); + @css_init_current_rule(&t); + } + t.state = CSS_TOKENIZER_STATE_CONSUME_MATCH; + goto @css_tokenizer_continue; + case ';': + @css_try_append_value(&t); + if (FifoU8Cnt(t.property_fifo)) + @css_try_set_property(&t); + t.state = CSS_TOKENIZER_STATE_CONSUME_PROPERTY; + goto @css_tokenizer_continue; + default: + FifoU8Ins(t.value_fifo, token); + break; + } + break; + case CSS_TOKENIZER_STATE_CONSUME_PROPERTY: + switch (token) { + case '/': + if (t.buffer[t.pos + 1] == '*') { + ++t.pos; + t.previous_state = t.state; + t.state = CSS_TOKENIZER_SKIP_COMMENT; + goto @css_tokenizer_continue; + } + case ' ': + case '\t': + case '\r': + case '\n': + if (FifoU8Cnt(t.property_fifo)) { + PrintErr("Invalid token in CSS property at pos %d, token: '%c'\n", t.pos, token); + return; + } + break; + case '}': + if (t.current_rule->a("matches")->length) { + rules->append(t.current_rule); + @css_init_current_rule(&t); + } + t.state = CSS_TOKENIZER_STATE_CONSUME_MATCH; + goto @css_tokenizer_continue; + case ':': + if (!FifoU8Cnt(t.property_fifo)) { + PrintErr("CSS property is not defined at pos %d, token: '%c'\n", t.pos, token); + return; + } + @css_init_current_values(&t); + t.state = CSS_TOKENIZER_STATE_CONSUME_VALUE; + goto @css_tokenizer_continue; + default: + FifoU8Ins(t.property_fifo, token); + break; + } + break; + case CSS_TOKENIZER_STATE_CONSUME_MATCH: + switch (token) { + case '/': + if (t.buffer[t.pos + 1] == '*') { + ++t.pos; + t.previous_state = t.state; + t.state = CSS_TOKENIZER_SKIP_COMMENT; + goto @css_tokenizer_continue; + } + case '@': + t.state = CSS_TOKENIZER_SKIP_AT_RULE; + goto @css_tokenizer_continue; + case ' ': + case '\t': + case '\r': + case '\n': + case ',': + if (FifoU8Cnt(t.match_fifo)) + @css_try_append_match(&t); + break; + case '{': + @css_try_append_match(&t); + if (!t.current_rule->a("matches")->length) { + PrintErr("CSS match string is not defined at pos %d, token: '%c'\n", t.pos, token); + return; + } + t.state = CSS_TOKENIZER_STATE_CONSUME_PROPERTY; + goto @css_tokenizer_continue; + default: + FifoU8Ins(t.match_fifo, token); + break; + } + break; + } + @css_tokenizer_continue : ++t.pos; + } + FifoU8Del(t.match_fifo); + FifoU8Del(t.property_fifo); + FifoU8Del(t.value_fifo); +} diff --git a/System/Libraries/Html/Renderer.HC b/System/Libraries/Html/Renderer.HC new file mode 100644 index 0000000..14afbc0 --- /dev/null +++ b/System/Libraries/Html/Renderer.HC @@ -0,0 +1,890 @@ +#define RENDERER_DEFAULT_MAX_LINE_HEIGHT 8 + +class @html_lazyload_image +{ + HttpUrl* url; + @http_request* req; + @http_response* resp; + I64 index; + I64 jiffies; + @html_lazyload_image* next; +}; + +class @html_renderer +{ + CTask* task; + HttpUrl* current_url; + JsonArray* css_rules; + JsonArray* forms; + U8* cache_directory; + U8* current_title; + U8* current_url_string; + I64 forms_index; + @image_collection* img_coll; + I64 img_count; + Bool last_char_was_whitespace; + Bool enable_animations; + Bool enable_lazy_loading; + U8 status_text[128]; + VerticalScrollBarWidget* vertical_scroll_widget; + TextLabelWidget* status_widget; + Context2DWidget* background_widget; + Window* win; + @window_widgets_list* widgets_base; + @window_widgets_list* images; + I64 render_x; + I64 render_y; + I64 max_line_height; + Context2D* link_pointer; + U64 link_callback; +}; + +#define HtmlRenderer @html_renderer + +#define HTML_WORK_BUFFER_SIZE 2048 + +U0 @html_renderer_update_status_text(HtmlRenderer* renderer, U8* text) +{ + U8 buf[128]; + if (!renderer || !text) + return; + if (StrLen(text) < 128) + Gui.Widget.SetText(renderer->status_widget, text); + else { + MemSet(buf, NULL, 128); + MemCpy(buf, text, 127); + Gui.Widget.SetText(renderer->status_widget, buf); + } +} + +U0(*@html_follow_link_fp) +(HtmlRenderer* renderer, U8* url_string) = NULL; + +U8* @sanitize_node_text(HtmlRenderer* renderer, U8* text) +{ + if (!renderer || !text || !StrLen(text)) + return ""; + U8* original_text = text; + U8* ch = text; + Bool needs_sanitization = FALSE; + while (*ch && !needs_sanitization) { + switch (*ch) { + case 0x11: + case 0x12: + case 0x24: + needs_sanitization = TRUE; + break; + default: + break; + } + *ch++; + } + if (!needs_sanitization) + return text; + while (*text == ' ') + text++; + while (text[StrLen(text) - 1] == ' ') + text[StrLen(text) - 1] = NULL; + while (StrFind(" ", text)) + StrCpy(StrFind(" ", text), StrFind(" ", text) + 1); + U8* new_text = CAlloc(StrLen(text) * 2, renderer->task); + I64 i = 0; + while (i < StrLen(text)) { + switch (text[i]) { + case 0x11: + StrCpy(new_text + StrLen(new_text), "&"); + i++; + break; + case 0x12: + StrCpy(new_text + StrLen(new_text), "<"); + i++; + break; + case 0x24: + StrCpy(new_text + StrLen(new_text), "\d"); + i++; + break; + default: + StrPrint(new_text + StrLen(new_text), "%c", text[i]); + i++; + break; + } + } + Free(original_text); + return new_text; +} + +Bool @is_supported_url_scheme(@http_url* url) +{ + return @t(!StrICmp(url->scheme, "http://") || !StrICmp(url->scheme, "https://"), TRUE, FALSE); +} + +HttpUrl* @expand_url_from_string(CTask* task, HttpUrl* current_url, U8* str) +{ + U8 buf[HTML_WORK_BUFFER_SIZE]; + HttpUrl* url = @http_parse_url(str); + + // First, check if the parsed URL is a supported scheme. + if (@is_supported_url_scheme(url)) + return url; + else { + if (url->scheme[0] == '/' && url->scheme[1] == '/') { + // This is most likely a protocol agnostic URL, let's try to parse it: + StrPrint(buf, "%s%s", current_url->scheme, str + 2); + @http_free_url(url); + return @http_parse_url(buf); + } + + Bool is_alternate_port = FALSE; + if (!StrICmp(current_url->scheme, "http://") && current_url->port != 80) + is_alternate_port = TRUE; + if (!StrICmp(current_url->scheme, "https://") && current_url->port != 443) + is_alternate_port = TRUE; + + if (str[0] == '/' && str[1] != '/' && str[1]) { + // This is most likely a relative URL, let's try to parse it: + if (is_alternate_port) + StrPrint(buf, "%s%s:%d%s", current_url->scheme, + current_url->host, current_url->port, str); + else + StrPrint(buf, "%s%s%s", current_url->scheme, + current_url->host, str); + @http_free_url(url); + return @http_parse_url(buf); + } + + U8 resolved_relative_path[HTML_WORK_BUFFER_SIZE]; + StrCpy(resolved_relative_path, current_url->path); + MemSet(StrLastOcc(resolved_relative_path, "/") + 1, NULL, 1); + + // This could still be a relative URL, let's try to parse it: + if (is_alternate_port) + StrPrint(buf, "%s%s:%d%s%s", current_url->scheme, + current_url->host, current_url->port, resolved_relative_path, str); + else + StrPrint(buf, "%s%s%s%s", current_url->scheme, + current_url->host, resolved_relative_path, str); + @http_free_url(url); + return @http_parse_url(buf); + } +} + +U8* @resolve_href(HtmlRenderer* renderer, U8* href) +{ + if (!renderer || !href) + return NULL; + if (!MemCmp(href, "javascript:", 11)) + return href; + HttpUrl* url = @expand_url_from_string(renderer->task, renderer->current_url, href); + if (!url) + return NULL; + U8* resolved_href = CAlloc(HTML_WORK_BUFFER_SIZE, renderer->task); + Bool is_alternate_port = FALSE; + if (!StrICmp(url->scheme, "http://") && url->port != 80) + is_alternate_port = TRUE; + if (!StrICmp(url->scheme, "https://") && url->port != 443) + is_alternate_port = TRUE; + if (is_alternate_port) + StrPrint(resolved_href, "%s%s:%d%s%s", url->scheme, url->host, url->port, url->path, url->query); + else + StrPrint(resolved_href, "%s%s%s%s", url->scheme, url->host, url->path, url->query); + @http_free_url(url); + return resolved_href; +} + +I64 @css_resolve_byte_from_hex(U8* ch, Bool skip_increment = FALSE) +{ + I64 res = 0; + I64 b = ToUpper(*ch); + if (b < 'A') { + res += (b - '0') << 4; + } else { + res += (10 + (b - 'A')) << 4; + } + if (!skip_increment) { + ++ch; + b = ToUpper(*ch); + } + if (b < 'A') { + res += (b - '0'); + } else { + res += (10 + (b - 'A')); + } + return res; +} + +U32 @css_resolve_color_from_rrggbb(U8* str) +{ + *str++; + switch (StrLen(str)) { + case 6: + return Color(@css_resolve_byte_from_hex(str), @css_resolve_byte_from_hex(str + 2), @css_resolve_byte_from_hex(str + 4)); + case 3: + return Color(@css_resolve_byte_from_hex(str, 1), @css_resolve_byte_from_hex(str + 1, 1), @css_resolve_byte_from_hex(str + 2, 1)); + default: + return 0; + } +} + +Bool @render_css_for_node(@html_dom_node* node, HtmlRenderer* renderer) +{ + I64 i, j, k; + JsonObject* rule = NULL; + JsonArray* matches = NULL; + JsonObject* properties = NULL; + JsonKey* key = NULL; + JsonArray* values = NULL; + U8* selector = NULL; + Bool matched = FALSE; + Bool should_display = TRUE; + U8 node_classes_buffer[HTML_WORK_BUFFER_SIZE]; + U8 prepend_buffer[64]; + U8 append_buffer[64]; + MemSet(prepend_buffer, 0, 64); + MemSet(append_buffer, 0, 64); + U8 node_tmpnum_buf[16]; + U8** node_classes; + I64 node_classes_count = 0; + I64 color = TRANSPARENT; + U8 node_ptr_string[32]; + U8* tmpmd5; + + for (i = 0; i < renderer->css_rules->length; i++) { + rule = renderer->css_rules->@(i); + matched = FALSE; + if (rule->@("matches")) { + + matches = rule->@("matches"); + properties = rule->@("properties"); + + // check if node md5 hash matches + if (*(matches->@(0)(U8*)) == 0xFE) { + StrPrint(node_ptr_string, "0x%08x", node); + tmpmd5 = md5_string(node_ptr_string, StrLen(node_ptr_string)); + if (!StrCmp(matches->@(0) + 1, tmpmd5)) { + matched = TRUE; + Free(tmpmd5); + goto @css_rule_check_if_matched; + } + Free(tmpmd5); + } + + // try to match tagName + if (!StrICmp(matches->@(0), node->tagName)) { + matched = TRUE; + goto @css_rule_check_if_matched; + } + + // try to match id + if (*(matches->@(0)(U8*)) == '#' && node->attributes->@("id")) { + if (!StrCmp(matches->@(0) + 1, node->attributes->@("id"))) { + matched = TRUE; + goto @css_rule_check_if_matched; + } + } + + // try to match selectors + for (j = 0; j < matches->length; j++) { + selector = matches->@(j); + + if (node->attributes->@("class") && StrFirstOcc(selector, ".")) { + // node has class attribute and current selector has .class + + if (!StrFirstOcc(node->attributes->@("class"), " ")) { + if (!StrCmp(node->attributes->@("class"), StrFirstOcc(selector, ".") + 1)) { + matched = TRUE; + goto @css_rule_check_if_matched; + } + } else { + MemSet(node_classes_buffer, 0, HTML_WORK_BUFFER_SIZE); + StrCpy(node_classes_buffer, node->attributes->@("class")); + node_classes = String.Split(node_classes_buffer, ' ', &node_classes_count); + + for (k = 0; k < node_classes_count; k++) { + if (!StrCmp(node_classes[k], StrFirstOcc(selector, ".") + 1)) { + matched = TRUE; + Free(node_classes); + goto @css_rule_check_if_matched; + } + } + } + } + } + + @css_rule_check_if_matched : if (matched) + { + key = properties->keys; + for (j = 0; j < properties->length; j++) { + values = properties->@(key->name); + + if (!StrICmp(key->name, "display") && !StrICmp(values->@(0), "none")) + return FALSE; + + if (!StrICmp(key->name, "background") || !StrICmp(key->name, "background-color")) { + if (@css_named_colors->@(values->@(0))) { + node->backgroundColor = @css_resolve_color_from_rrggbb(@css_named_colors->@(values->@(0))); + } else if (values->@(0)(U8*)[0] == '#') { + node->backgroundColor = @css_resolve_color_from_rrggbb(values->@(0)); + } else { + // unsupported + } + } + + if (!StrICmp(key->name, "color")) { + if (@css_named_colors->@(values->@(0))) { + node->color = @css_resolve_color_from_rrggbb(@css_named_colors->@(values->@(0))); + } else if (values->@(0)(U8*)[0] == '#') { + node->color = @css_resolve_color_from_rrggbb(values->@(0)); + } else { + // unsupported + } + } + + if (!StrICmp(key->name, "width") && !StrICmp(values->@(0) + StrLen(values->@(0)) - 2, "px")) { + StrCpy(node_tmpnum_buf, values->@(0)); + node_tmpnum_buf[StrLen(node_tmpnum_buf) - 2] = NULL; + node->width = Str2I64(node_tmpnum_buf); + } + + if (!StrICmp(key->name, "height") && !StrICmp(values->@(0) + StrLen(values->@(0)) - 2, "px")) { + StrCpy(node_tmpnum_buf, values->@(0)); + node_tmpnum_buf[StrLen(node_tmpnum_buf) - 2] = NULL; + node->height = Str2I64(node_tmpnum_buf); + } + + if (!StrICmp(key->name, "text-align") && !StrICmp(values->@(0), "center")) + node->textAlign = CSS_TEXT_ALIGN_CENTER; + if (!StrICmp(key->name, "text-align") && !StrICmp(values->@(0), "right")) + node->textAlign = CSS_TEXT_ALIGN_RIGHT; + + if (!StrICmp(key->name, "line-height") && !StrICmp(values->@(0) + StrLen(values->@(0)) - 2, "px")) { + StrCpy(node_tmpnum_buf, values->@(0)); + node_tmpnum_buf[StrLen(node_tmpnum_buf) - 2] = NULL; + node->font_size = ToI64((Str2I64(node_tmpnum_buf) / 3) * 2); + } + + if (!StrICmp(key->name, "font-size") && !StrICmp(values->@(0) + StrLen(values->@(0)) - 2, "px")) { + StrCpy(node_tmpnum_buf, values->@(0)); + node_tmpnum_buf[StrLen(node_tmpnum_buf) - 2] = NULL; + node->font_size = Str2I64(node_tmpnum_buf); + } + + key = key->next; + } + } + } + } + + return should_display; +} + +Bool @html_text_is_printable_ascii(U8* str) +{ + while (*str) { + if (*str > 0x7f || *str < ' ') + return FALSE; + ++str; + } + return TRUE; +} + +U8* @doldoc_pt_to_cstring(U8* ptbuf, HtmlRenderer* renderer) +{ + U8* str = CAlloc(MSize2(ptbuf), renderer->task); + + while (*ptbuf) { + if (!MemCmp(ptbuf, "ER", 2)) + goto pt_to_cstring_done; + if (!MemCmp(ptbuf, "TX", 2)) { + ptbuf += 4; + ptbuf[StrLen(ptbuf) - 1] = NULL; + StrCpy(str + StrLen(str), ptbuf); + ptbuf = StrLen(ptbuf) + 2; + goto pt_to_cstring_next; + } + ptbuf = StrLen(ptbuf) + 1; + pt_to_cstring_next: + } + +pt_to_cstring_done: + return str; +} + +U0 @create_form_from_node(HtmlRenderer* renderer, @html_dom_node* node) +{ + if (!node || !node->attributes || !renderer) + return; + + JsonObject* form = Json.CreateObject(renderer->task); + JsonObject* attributes = Json.CreateObject(renderer->task); + + // Copy attributes + JsonKey* key = node->attributes->keys; + while (key) { + attributes->set(key->name, key->value, JSON_STRING); + key = key->next; + } + + form->set("attributes", attributes, JSON_OBJECT); + form->set("elements", Json.CreateArray(renderer->task), JSON_ARRAY); + renderer->forms->append(form); + renderer->forms_index = renderer->forms->length - 1; +} + +U0 @html_button_clicked(HtmlRenderer* renderer, I64 index, U8* name) +{ + no_warn renderer, index, name; +} + +U8* @form_elements_to_string(HtmlRenderer* renderer, JsonObject* form) +{ + if (!form) + return ""; + + JsonObject* attributes = form->@("attributes"); + if (!attributes) + return ""; + + JsonArray* elements = form->@("elements"); + if (!elements) + return ""; + + U8* action = attributes->@("action"); + U8* method = attributes->@("method"); + + if (!action) + action = StrNew(renderer->current_url_string); + if (!method) + method = "GET"; + + I64 i; + U8* str = CAlloc(2048, renderer->task); + JsonObject* element = NULL; + + if (!StrICmp(method, "GET")) + StrPrint(str, "%s?", attributes->@("action")); + + for (i = 0; i < elements->length; i++) { + element = elements->@(i); + StrPrint(str + StrLen(str), "%s=%s", element->@("name"), element->@("value")); + if (i < elements->length - 1) + StrCpy(str + StrLen(str), "&"); + } + + return str; +} + +U0 @html_submit_form(HtmlRenderer* renderer, I64 index) +{ + if (index < 0 || !renderer || !renderer->forms) + return; + + JsonObject* form = renderer->forms->@(index); + if (!form) + return; + + JsonObject* attributes = form->@("attributes"); + if (!attributes) + return; + + U8* method = attributes->@("method"); + + if (!StrICmp(method, "GET")) { + @html_follow_link_fp(renderer, @resolve_href(renderer, @form_elements_to_string(renderer, form))); + return; + } + + if (!StrICmp(method, "POST")) { + // FIXME: Implement POST method + return; + } +} + +U0 @render_form_element(@html_dom_node* node, HtmlRenderer* renderer) +{ + if (!node || !renderer || !node->attributes) + return; + + U8* type = node->attributes->@("type"); + U8* value = node->attributes->@("value"); + + if (!type) + return; + + ButtonWidget* btn = NULL; + + if (!StrICmp(type, "button")) { + btn = Gui.CreateWidget(renderer->win, WIDGET_TYPE_BUTTON, U64_MAX, U64_MAX, 64, 16); // FIXME: Derive width/height + btn->data = node; + StrCpy(&btn->text, @t(value, value, "")); + return; + } + + if (!StrICmp(type, "submit")) { + btn = Gui.CreateWidget(renderer->win, WIDGET_TYPE_BUTTON, U64_MAX, U64_MAX, 64, 16); // FIXME: Derive width/height + btn->data = node; + Gui.Widget.SetCallback(btn, "clicked", &Reboot); + StrCpy(&btn->text, @t(value, value, "Submit")); + return; + } +} + +JsonArray* parent_nodes_excluded_from_text_rendering = Json.Parse("[\"option\",\"script\",\"style\",\"title\"]", erythros_mem_task); +JsonArray* block_level_element_tag_names = Json.Parse("[\"address\",\"article\",\"aside\",\"blockquote\",\"br\",\"canvas\",\"dd\",\"div\",\"dl\",\"dt\",\"fieldset\",\"figcaption\",\"figure\",\"footer\",\"form\",\"h1\",\"h2\",\"h3\",\"h4\",\"h5\",\"h6\",\"header\",\"hr\",\"li\",\"main\",\"nav\",\"noscript\",\"ol\",\"p\",\"pre\",\"section\",\"table\",\"tfoot\",\"ul\",\"video\"]", erythros_mem_task); + +U0 @render_node_text(@html_dom_node* node, HtmlRenderer* renderer) +{ + if (!@html_text_is_printable_ascii(node->text)) { + // FIXME: Wire up UTF-8 handling for non-ASCII characters + return; + } + I64 background_color = Color(255, 255, 255); // FIXME: Alpha blend into rect beneath fragment in z-index + I64 default_font_size = 16; // FIXME: Derive this + U8* font_name = "Free Serif"; // FIXME: Derive this + I64 font_size = @t(node->parentNode->font_size, node->parentNode->font_size, default_font_size); + I64 text_width; + + U8* fragments = StrNew(node->text); + I64 fragment_count = 0; + U8** fragment = String.Split(fragments, ' ', &fragment_count); + I64 i; + + Context2DWidget* fragment_widget; + + I64 last_fragment_pos = 0; + for (i = 0; i < fragment_count; i++) { + if (fragment[i] && *fragment[i]) { + last_fragment_pos = i; + text_width = @get_truetype_text_width(font_name, font_size, fragment[i]); + if (text_width) { + text_width += 4; + fragment_widget = Gui.CreateWidget(renderer->win, WIDGET_TYPE_CONTEXT2D, + U64_MAX, U64_MAX, 0, 0); + fragment_widget->data = node; + fragment_widget->ctx = NewContext2D(text_width, ToI64(font_size * 1.5))->fill(Color(255, 255, 255))->text(font_name, 0, 0, font_size, node->parentNode->color, fragment[i]); + fragment_widget->width = fragment_widget->ctx->width; + fragment_widget->height = fragment_widget->ctx->height; + } + } + } + Free(fragments); +} + +U0 @renderer_append_image(HtmlRenderer* renderer, Context2DWidget* widget) +{ + @window_widgets_list* widget_list_item = CAlloc(sizeof(@window_widgets_list)); + @window_widgets_list* list = renderer->images; + widget_list_item->widget = widget; + if (!list) { + renderer->images = widget_list_item; + } else { + while (list->next) { + list = list->next; + } + list->next = widget_list_item; + } +} + +U0 @render_node_list(@html_dom_node* node, HtmlRenderer* renderer) +{ + if (!node || !renderer) + return; + + I64 i; + U8 buf[HTML_WORK_BUFFER_SIZE]; + + I64 margin_top = 32; // FIXME: Derive these + I64 margin_bottom = 32; + + if (StrICmp(node->tagName, "InternalTextNode") && StrICmp(node->tagName, "option") && StrICmp(node->tagName, "script") && StrICmp(node->tagName, "style") && StrICmp(node->tagName, "title")) + if (!@render_css_for_node(node, renderer)) + return; + + Context2DWidget* block_widget; + + // FIXME: Resolve if display: block is set + if (block_level_element_tag_names->contains(node->tagName)) { + node->display_block = TRUE; + } + + if (node->display_block) { + block_widget = Gui.CreateWidget(renderer->win, WIDGET_TYPE_CONTEXT2D, + U64_MAX, U64_MAX, 0, 0); + block_widget->data = node; + } + + if (!StrICmp(node->tagName, "body")) { + renderer->background_widget->ctx->width = Display.Width(); + renderer->background_widget->ctx->height = Display.Height(); + renderer->background_widget->ctx->fill(@image_pixel_flip_rgb_bgr(node->backgroundColor)); + } + + Context2DWidget* img_widget; + + if (!StrICmp(node->tagName, "form")) + @create_form_from_node(renderer, node); + + if (!StrICmp(node->tagName, "input")) + @render_form_element(node, renderer); + + if (!StrICmp(node->tagName, "InternalTextNode")) { + node->text = @sanitize_node_text(renderer, node->text); + if (!parent_nodes_excluded_from_text_rendering->contains(node->parentNode->tagName)) { + @render_node_text(node, renderer); + } + if (!StrICmp(node->parentNode->tagName, "title")) { + String.Trim(node->text); + Gui.Window.SetTitle(renderer->win, node->text); + MemSet(renderer->task->task_title, NULL, STR_LEN); + MemCpy(renderer->task->task_title, node->text, STR_LEN - 1); + renderer->current_title = StrNew(node->text, renderer->task); + } + } + + if (!StrICmp(node->tagName, "img")) { + if (!node->width || !node->height) { + node->width = 32; + node->height = 32; + } + img_widget = Gui.CreateWidget(renderer->win, WIDGET_TYPE_CONTEXT2D, + U64_MAX, U64_MAX, node->width, node->height); + img_widget->data = node; + @renderer_append_image(renderer, img_widget); + } + + if (node->children->length) { + for (i = 0; i < node->children->length; i++) + @render_node_list(node->children->@(i), renderer); + } + + if (node->display_block) { + block_widget = Gui.CreateWidget(renderer->win, WIDGET_TYPE_CONTEXT2D, + U64_MAX, U64_MAX, 0, 0); + block_widget->data = node; + } +} + +@html_dom_node* @self_or_ancestor_matches_tag_name(@html_dom_node* node, U8* tagName) +{ + while (node) { + if (!StrICmp(node->tagName, tagName)) + return node; + node = node->parentNode; + } + return NULL; +} + +U0 @reflow_node_list(HtmlRenderer* renderer) +{ + I64 prev_render_y = renderer->render_y; + renderer->render_x = 0; + renderer->render_y = renderer->background_widget->y; + if (renderer->vertical_scroll_widget && prev_render_y && renderer->vertical_scroll_widget->scroll) { + renderer->render_y -= (renderer->vertical_scroll_widget->scroll * (prev_render_y / renderer->vertical_scroll_widget->height)); + } + renderer->max_line_height = RENDERER_DEFAULT_MAX_LINE_HEIGHT; + + @window_widgets_list* widget_list_item = renderer->widgets_base->next; + Widget* widget; + @html_dom_node* node; + + while (widget_list_item) { + widget = widget_list_item->widget; + node = widget->data; + + if (node) { + if (node->display_block && renderer->render_x) { + renderer->render_x = 0; + renderer->render_y += renderer->max_line_height; + renderer->max_line_height = RENDERER_DEFAULT_MAX_LINE_HEIGHT; + } + + widget->x = renderer->render_x; + widget->y = renderer->render_y; + if (@self_or_ancestor_matches_tag_name(node, "a")) { + widget->pointer = renderer->link_pointer; + Gui.Widget.SetCallback(widget, "clicked", renderer->link_callback); + } + + renderer->render_x += widget->width; + renderer->max_line_height = Max(renderer->max_line_height, widget->height); + + if (renderer->render_x > renderer->win->width - widget->width) { + renderer->render_x = 0; + renderer->render_y += renderer->max_line_height; + renderer->max_line_height = RENDERER_DEFAULT_MAX_LINE_HEIGHT; + } + } + + widget_list_item = widget_list_item->next; + } +} + +U0 @process_css_rules_from_external_stylesheet(HtmlRenderer* renderer, U8* str) +{ + // download (or load from cache) and process stylesheet + if (!renderer || !str) + return; + U8 status_text_buffer[128]; + U8 buf[HTML_WORK_BUFFER_SIZE]; + HttpUrl* url = @expand_url_from_string(renderer->task, renderer->current_url, str); + if (!url) + return; + StrPrint(buf, "%s%s%s", url->scheme, url->host, url->path); + U8* buffer = NULL; + @http_response* resp = NULL; + I64 content_length = 0; + if (@http_is_resource_cached(buf, renderer->cache_directory)) { + StrPrint(status_text_buffer, "Loading CSS file from cache: %s", buf); + @html_renderer_update_status_text(renderer, status_text_buffer); + resp = CAlloc(sizeof(@http_response), renderer->task); + resp->body.data = FileRead(@http_get_cached_resource_filename(buf, renderer->cache_directory), &content_length); + } else { + StrPrint(status_text_buffer, "Fetching %s...", buf); + @html_renderer_update_status_text(renderer, status_text_buffer); + buffer = CAlloc(HTTP_FETCH_BUFFER_SIZE, renderer->task); + resp = Http.Get(url, buffer); + while (resp->state != HTTP_STATE_DONE) { + if (resp->state >= HTTP_STATE_HEADERS_RECEIVED) { + StrPrint(status_text_buffer, "Received %d bytes", resp->body.length); + @html_renderer_update_status_text(renderer, status_text_buffer); + } + Sleep(1); + } + content_length = StrLen(resp->body.data); + if (!content_length) + goto @css_content_length_is_zero; + @http_cache_resource(buf, resp->body.data, content_length, renderer->cache_directory); + } + + @css_tokenize_and_create_rules_from_buffer(renderer->css_rules, resp->body.data, content_length, renderer->task); + + @css_content_length_is_zero : if (buffer) Free(buffer); +} + +U0 @process_css_rules_from_node_list(@html_dom_node* node, HtmlRenderer* renderer) +{ + if (!node) + return; + I64 i; + U8 node_ptr_string[32]; + U8 tmpbuf[HTML_WORK_BUFFER_SIZE]; + U8* tmpmd5; + + // Process rules from LINK rel="stylesheet" elements + if (!StrICmp(node->tagName, "link")) { + if (!StrICmp(node->attributes->@("rel"), "stylesheet") && StrLen(node->attributes->@("rel")) == 10 && node->attributes->@("href")) { + @process_css_rules_from_external_stylesheet(renderer, node->attributes->@("href")); + } + } + + // Process rules from STYLE elements + if (!StrICmp(node->tagName, "InternalTextNode")) + if (!StrICmp(node->parentNode->tagName, "style")) + @css_tokenize_and_create_rules_from_buffer(renderer->css_rules, node->text, StrLen(node->text), renderer->task); + + // Process rules from style attributes on individual elements + if (StrICmp(node->tagName, "link") && node->attributes->@("style")) { + StrPrint(node_ptr_string, "0x%08x", node); + tmpmd5 = md5_string(node_ptr_string, StrLen(node_ptr_string)); + StrPrint(tmpbuf, "\xFE%s{%s}", tmpmd5, node->attributes->@("style")); + @css_tokenize_and_create_rules_from_buffer(renderer->css_rules, tmpbuf, StrLen(tmpbuf), renderer->task); + Free(tmpmd5); + } + + if (node->children->length) { + for (i = 0; i < node->children->length; i++) + @process_css_rules_from_node_list(node->children->@(i), renderer); + } +} + +U0 @process_custom_css_rules(HtmlRenderer* renderer) +{ + JsonItem* item; + JsonArray* rules = NULL; + I64 i; + rules = @custom_css_rules->@(renderer->current_url->host); + if (rules) { + for (i = 0; i < rules->length; i++) { + renderer->css_rules->append(rules->@(i)); + } + return; + } +} + +U0 @fetch_images_for_page(HtmlRenderer* renderer) +{ + if (!renderer) { + return; + } + + U8 status_text_buffer[128]; + U8 buf[HTML_WORK_BUFFER_SIZE]; + HttpUrl* url; + Context2DWidget* widget; + @html_dom_node* node; + U8* src; + Bool is_alternate_port; + U8* buffer = CAlloc(HTTP_FETCH_BUFFER_SIZE, renderer->task); + @http_response* resp = NULL; + + @window_widgets_list* image_list_item = renderer->images; + while (image_list_item) { + + widget = image_list_item->widget; + if (!widget) + goto @fetch_next_image; + node = widget->data; + if (!node) + goto @fetch_next_image; + src = node->attributes->@("src"); + if (!src) + goto @fetch_next_image; + url = @expand_url_from_string(renderer->task, renderer->current_url, src); + if (!url) + goto @fetch_next_image; + + is_alternate_port = FALSE; + if (!StrICmp(url->scheme, "http://") && url->port != 80) + is_alternate_port = TRUE; + if (!StrICmp(url->scheme, "https://") && url->port != 443) + is_alternate_port = TRUE; + if (is_alternate_port) + StrPrint(buf, "%s%s:%d%s", url->scheme, url->host, url->port, url->path); + else + StrPrint(buf, "%s%s%s", url->scheme, url->host, url->path); + + if (@http_is_resource_cached(buf, renderer->cache_directory)) { + StrPrint(status_text_buffer, "Loading image from cache: %s", buf); + @html_renderer_update_status_text(renderer, status_text_buffer); + resp = CAlloc(sizeof(@http_response), renderer->task); + resp->body.data = FileRead(@http_get_cached_resource_filename(buf, renderer->cache_directory), &resp->body.length); + } else { + StrPrint(status_text_buffer, "Fetching %s...", buf); + @html_renderer_update_status_text(renderer, status_text_buffer); + buffer = CAlloc(HTTP_FETCH_BUFFER_SIZE, renderer->task); + resp = Http.Get(url, buffer); + while (resp->state != HTTP_STATE_DONE) { + if (resp->state >= HTTP_STATE_HEADERS_RECEIVED) { + StrPrint(status_text_buffer, "Received %d bytes", resp->body.length); + @html_renderer_update_status_text(renderer, status_text_buffer); + } + Sleep(1); + } + if (!resp->body.length) + goto @fetch_next_image; + @http_cache_resource(buf, resp->body.data, resp->body.length, renderer->cache_directory); + } + + // FIXME: Wire up animated GIF handling + widget->ctx = @image_buffer_to_context2d(resp->body.data, resp->body.length); + if (widget->ctx) { + widget->width = widget->ctx->width; + widget->height = widget->ctx->height; + } + + @reflow_node_list(renderer); + @fetch_next_image : image_list_item = image_list_item->next; + } + + Free(buffer); +} \ No newline at end of file diff --git a/System/Libraries/Html/Tokenizer.HC b/System/Libraries/Html/Tokenizer.HC new file mode 100644 index 0000000..56e4f2f --- /dev/null +++ b/System/Libraries/Html/Tokenizer.HC @@ -0,0 +1,1281 @@ +#define GROWABLE_STRING_INCREMENT_SIZE 16 + +#define HTML_STATE_INVALID 0 +#define HTML_STATE_DATA 1 +#define HTML_STATE_RCDATA 2 +#define HTML_STATE_RAWTEXT 3 +#define HTML_STATE_SCRIPT_DATA 4 +#define HTML_STATE_PLAINTEXT 5 +#define HTML_STATE_TAG_OPEN 6 +#define HTML_STATE_END_TAG_OPEN 7 +#define HTML_STATE_TAG_NAME 8 +#define HTML_STATE_RCDATA_LESS_THAN_SIGN 9 +#define HTML_STATE_RCDATA_END_TAG_OPEN 10 +#define HTML_STATE_RCDATA_END_TAG_NAME 11 +#define HTML_STATE_RAWTEXT_LESS_THAN_SIGN 12 +#define HTML_STATE_RAWTEXT_END_TAG_OPEN 13 +#define HTML_STATE_RAWTEXT_END_TAG_NAME 14 +#define HTML_STATE_SCRIPT_DATA_LESS_THAN_SIGN 15 +#define HTML_STATE_SCRIPT_DATA_END_TAG_OPEN 16 +#define HTML_STATE_SCRIPT_DATA_END_TAG_NAME 17 +#define HTML_STATE_SCRIPT_DATA_ESCAPE_START 18 +#define HTML_STATE_SCRIPT_DATA_ESCAPE_START_DASH 19 +#define HTML_STATE_SCRIPT_DATA_ESCAPED 20 +#define HTML_STATE_SCRIPT_DATA_ESCAPED_DASH 21 +#define HTML_STATE_SCRIPT_DATA_ESCAPED_DASH_DASH 22 +#define HTML_STATE_SCRIPT_DATA_ESCAPED_LESS_THAN 23 +#define HTML_STATE_SCRIPT_DATA_ESCAPED_END_TAG_OPEN 24 +#define HTML_STATE_SCRIPT_DATA_ESCAPED_END_TAG_NAME 25 +#define HTML_STATE_SCRIPT_DATA_DOUBLE_ESCAPE_START 26 +#define HTML_STATE_SCRIPT_DATA_DOUBLE_ESCAPED 27 +#define HTML_STATE_SCRIPT_DATA_DOUBLE_ESCAPED_DASH 28 +#define HTML_STATE_SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH 29 +#define HTML_STATE_SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN 30 +#define HTML_STATE_SCRIPT_DATA_DOUBLE_ESCAPE_END 31 +#define HTML_STATE_BEFORE_ATTRIBUTE_NAME 32 +#define HTML_STATE_ATTRIBUTE_NAME 33 +#define HTML_STATE_AFTER_ATTRIBUTE_NAME 34 +#define HTML_STATE_BEFORE_ATTRIBUTE_VALUE 35 +#define HTML_STATE_ATTRIBUTE_VALUE_DOUBLE_QUOTED 36 +#define HTML_STATE_ATTRIBUTE_VALUE_SINGLE_QUOTED 37 +#define HTML_STATE_ATTRIBUTE_VALUE_UNQUOTED 38 +#define HTML_STATE_AFTER_ATTRIBUTE_VALUE_QUOTED 39 +#define HTML_STATE_SELF_CLOSING_START_TAG 40 +#define HTML_STATE_BOGUS_COMMENT 41 +#define HTML_STATE_MARKUP_DECLARATION_OPEN 42 +#define HTML_STATE_COMMENT_START 43 +#define HTML_STATE_COMMENT_START_DASH 44 +#define HTML_STATE_COMMENT 45 +#define HTML_STATE_COMMENT_LESS_THAN_SIGN 46 +#define HTML_STATE_COMMENT_LESS_THAN_SIGN_BANG 47 +#define HTML_STATE_COMMENT_LESS_THAN_SIGN_BANG_DASH 48 +#define HTML_STATE_COMMENT_LESS_THAN_SIGN_BANG_DASH_DASH 49 +#define HTML_STATE_COMMENT_END_DASH 50 +#define HTML_STATE_COMMENT_END 51 +#define HTML_STATE_COMMENT_END_BANG 52 +#define HTML_STATE_DOCTYPE 53 +#define HTML_STATE_BEFORE_DOCTYPE_NAME 54 +#define HTML_STATE_DOCTYPE_NAME 55 +#define HTML_STATE_AFTER_DOCTYPE_NAME 56 +#define HTML_STATE_AFTER_DOCTYPE_PUBLIC_KEYWORD 57 +#define HTML_STATE_BEFORE_DOCTYPE_PUBLIC_IDENTIFIER 58 +#define HTML_STATE_DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED 59 +#define HTML_STATE_DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED 60 +#define HTML_STATE_AFTER_DOCTYPE_PUBLIC_IDENTIFIER 61 +#define HTML_STATE_BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS 62 +#define HTML_STATE_AFTER_DOCTYPE_SYSTEM_KEYWORD 63 +#define HTML_STATE_BEFORE_DOCTYPE_SYSTEM_IDENTIFIER 64 +#define HTML_STATE_DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED 65 +#define HTML_STATE_DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED 66 +#define HTML_STATE_AFTER_DOCTYPE_SYSTEM_IDENTIFIER 67 +#define HTML_STATE_BOGUS_DOCTYPE 68 +#define HTML_STATE_CDATA_SECTION 69 +#define HTML_STATE_CDATA_SECTION_BRACKET 70 +#define HTML_STATE_CDATA_SECTION_END 71 +#define HTML_STATE_CHARACTER_REFERENCE 72 +#define HTML_STATE_NAMED_CHARACTER_REFERENCE 73 +#define HTML_STATE_AMBIGUOUS_AMPERSAND 74 +#define HTML_STATE_NUMERIC_CHARACTER_REFERENCE 75 +#define HTML_STATE_HEXADECIMAL_CHARACTER_REFERENCE_START 76 +#define HTML_STATE_DECIMAL_CHARACTER_REFERENCE_START 77 +#define HTML_STATE_HEXADECIMAL_CHARACTER_REFERENCE 78 +#define HTML_STATE_DECIMAL_CHARACTER_REFERENCE 79 +#define HTML_STATE_NUMERIC_CHARACTER_REFERENCE_END 80 + +class @html_dom_node : JsonElement +{ + @html_dom_node* parentNode; + U8 tagName[32]; + JsonObject* attributes; + JsonArray* children; + U8* text; + I64 textAlign; + I64 width; + I64 height; + U32 backgroundColor; + U32 color; + I64 font_size; + Bool display_block; +}; + +class @html_input_buffer +{ + U8* data; + I64 size; + I64 pos; +}; + +class @html_tokenizer +{ + @html_input_buffer inputBuffer; + I64 state; + I64 returnState; + U8 currentInputChar; + JsonKey* currentAttribute; + @html_dom_node* appendNode; + @html_dom_node* currentNode; + @html_dom_node* originNode; + I64 nodeTreeDepth; + I64 dataStateCounter; + @html_input_buffer tempBuffer; + Bool consumeTempBuffer; + I64 numOfImgNodes; + CTask* mem_task; +}; + +I64 @round_value_up(I64 numToRound, I64 multiple) +{ + if (multiple == 0) + return numToRound; + I64 remainder = Abs(numToRound) % multiple; + if (remainder == 0) + return numToRound; + if (numToRound < 0) + return -(Abs(numToRound) - remainder); + else + return numToRound + multiple - remainder; +} + +U8* @init_growable_string(CTask* mem_task) { return CAlloc(GROWABLE_STRING_INCREMENT_SIZE, mem_task); } + +U8* @append_char_to_growable_string(U8* s, I64 char, CTask* mem_task) +{ + I64 oldBufSize = + @round_value_up(StrLen(s), GROWABLE_STRING_INCREMENT_SIZE - 1); + I64 newBufSize = + @round_value_up(StrLen(s) + 1, GROWABLE_STRING_INCREMENT_SIZE - 1); + if (newBufSize > oldBufSize) { + U8* newBuf = CAlloc( + @round_value_up(StrLen(s) + 1, GROWABLE_STRING_INCREMENT_SIZE - 1) * 2, mem_task); + StrCpy(newBuf, s); + newBuf[StrLen(newBuf)] = char; + Free(s); + return newBuf; + } else { + s[StrLen(s)] = char; + return s; + } +} + +U0 @empty_temp_buffer(@html_tokenizer* t) +{ + MemSet(t->tempBuffer.data, NULL, 512); + t->tempBuffer.size = 0; + t->tempBuffer.pos = 0; +} + +U0 @recalculate_temp_buffer_size(@html_tokenizer* t) +{ + t->tempBuffer.size = StrLen(t->tempBuffer.data); + t->tempBuffer.pos = 0; +} + +U0 @replace_temp_buffer_with_named_character_reference(@html_tokenizer* t) +{ + + if (!StrICmp(t->tempBuffer.data, "&")) { + StrCpy(t->tempBuffer.data, "\x11"); + @recalculate_temp_buffer_size(t); + return; + } + if (!StrICmp(t->tempBuffer.data, "å")) { + StrCpy(t->tempBuffer.data, "\xc3\x85"); + @recalculate_temp_buffer_size(t); + return; + } + if (!StrICmp(t->tempBuffer.data, "•")) { + StrCpy(t->tempBuffer.data, "\xe2\x80\xa2"); + @recalculate_temp_buffer_size(t); + return; + } + if (!StrICmp(t->tempBuffer.data, "©")) { + StrCpy(t->tempBuffer.data, "\xc2\xa9"); + @recalculate_temp_buffer_size(t); + return; + } + if (!StrICmp(t->tempBuffer.data, " ")) { + StrCpy(t->tempBuffer.data, " "); + @recalculate_temp_buffer_size(t); + return; + } + if (!StrICmp(t->tempBuffer.data, "…")) { + StrCpy(t->tempBuffer.data, "..."); + @recalculate_temp_buffer_size(t); + return; + } + if (!StrICmp(t->tempBuffer.data, "—")) { + StrCpy(t->tempBuffer.data, "-"); + @recalculate_temp_buffer_size(t); + return; + } + if (!StrICmp(t->tempBuffer.data, " ")) { + StrCpy(t->tempBuffer.data, " "); + @recalculate_temp_buffer_size(t); + return; + } + if (!StrICmp(t->tempBuffer.data, "<")) { + StrCpy(t->tempBuffer.data, "\x12"); + @recalculate_temp_buffer_size(t); + return; + } + if (!StrICmp(t->tempBuffer.data, ">")) { + StrCpy(t->tempBuffer.data, ">"); + @recalculate_temp_buffer_size(t); + return; + } + if (!StrICmp(t->tempBuffer.data, """)) { + StrCpy(t->tempBuffer.data, "\""); + @recalculate_temp_buffer_size(t); + return; + } + if (!StrICmp(t->tempBuffer.data, "&zerowidthspace;")) { + StrCpy(t->tempBuffer.data, ""); + @recalculate_temp_buffer_size(t); + return; + } + + StrCpy(t->tempBuffer.data, "?"); + @recalculate_temp_buffer_size(t); + return; +} + +I64 @hex_table_i; +I64 @hex_table[256]; +MemSet(&@hex_table, NULL, sizeof(I64) * 256); + +for (@hex_table_i = '0'; @hex_table_i < ':'; @hex_table_i++) { + @hex_table[@hex_table_i] = @hex_table_i - '0'; +} + +for (@hex_table_i = 'A'; @hex_table_i < 'G'; @hex_table_i++) { + @hex_table[@hex_table_i] = 10 + (@hex_table_i - 'A'); +} + +for (@hex_table_i = 'a'; @hex_table_i < 'g'; @hex_table_i++) { + @hex_table[@hex_table_i] = 10 + (@hex_table_i - 'a'); +} + +I64 @utf8_encode(U8* out, I64 utf) +{ + if (utf <= 0x7F) { + // Plain ASCII + out[0] = utf; + out[1] = 0; + return 1; + } else if (utf <= 0x07FF) { + // 2-byte unicode + out[0] = (((utf >> 6) & 0x1F) | 0xC0); + out[1] = (((utf >> 0) & 0x3F) | 0x80); + out[2] = 0; + return 2; + } else if (utf <= 0xFFFF) { + // 3-byte unicode + out[0] = (((utf >> 12) & 0x0F) | 0xE0); + out[1] = (((utf >> 6) & 0x3F) | 0x80); + out[2] = (((utf >> 0) & 0x3F) | 0x80); + out[3] = 0; + return 3; + } else if (utf <= 0x10FFFF) { + // 4-byte unicode + out[0] = (((utf >> 18) & 0x07) | 0xF0); + out[1] = (((utf >> 12) & 0x3F) | 0x80); + out[2] = (((utf >> 6) & 0x3F) | 0x80); + out[3] = (((utf >> 0) & 0x3F) | 0x80); + out[4] = 0; + return 4; + } else { + // error - use replacement character + out[0] = 0xEF; + out[1] = 0xBF; + out[2] = 0xBD; + out[3] = 0; + return 0; + } +} + +U0 @replace_temp_buffer_with_dec_character_reference(@html_tokenizer* t) +{ + t->tempBuffer.data[StrLen(t->tempBuffer.data) - 1] = NULL; // chop off semicolon + I64 charCode = Str2I64(t->tempBuffer.data + 2); + @utf8_encode(t->tempBuffer.data, charCode); + @recalculate_temp_buffer_size(t); +} + +U0 @replace_temp_buffer_with_hex_character_reference(@html_tokenizer* t) +{ + I64 dec_char = 0; + + t->tempBuffer.data[StrLen(t->tempBuffer.data) - 1] = NULL; // chop off semicolon + + U8* ch = t->tempBuffer.data + 3; + + while (*ch && dec_char >= 0) { + dec_char = (dec_char << 4) | @hex_table[*ch++]; + } + + StrPrint(t->tempBuffer.data, "&#%d;", dec_char); + @recalculate_temp_buffer_size(t); + + @replace_temp_buffer_with_dec_character_reference(t); +} + +U0 @replace_temp_buffer_with_numeric_character_reference(@html_tokenizer* t) +{ + switch (t->tempBuffer.data[2]) { + case 'x': + @replace_temp_buffer_with_hex_character_reference(t); + break; + default: + @replace_temp_buffer_with_dec_character_reference(t); + break; + } +} + +U0 @append_char_to_temp_buffer(@html_tokenizer* t, I64 char) +{ + t->tempBuffer.data[StrLen(t->tempBuffer.data)] = char; + t->tempBuffer.size++; +} + +@html_dom_node* @create_new_node(U8* tagName, CTask* mem_task) +{ + @html_dom_node* node = CAlloc(sizeof(@html_dom_node), mem_task); + StrCpy(node->tagName, tagName); + node->attributes = Json.CreateObject(mem_task); + node->children = Json.CreateArray(mem_task); + node->text = @init_growable_string(mem_task); + node->sig = JSON_SIG; + node->type = JSON_HTML; + return node; +} + +U0 @init_tokenizer(@html_tokenizer* t, U8* data, I64 size, CTask* mem_task) +{ + t->mem_task = mem_task; + t->inputBuffer.data = data; + t->inputBuffer.size = size; + t->inputBuffer.pos = 0; + t->state = HTML_STATE_DATA; + t->tempBuffer.data = CAlloc(512, t->mem_task); + t->tempBuffer.size = size; + t->tempBuffer.pos = 0; + t->originNode = @create_new_node("Document", t->mem_task); + t->appendNode = t->originNode; + t->currentNode = t->originNode; + t->consumeTempBuffer = FALSE; + t->dataStateCounter = 0; + t->numOfImgNodes = 0; +} + +U0 @consume_next_input_char(@html_tokenizer* t) +{ + if (t->consumeTempBuffer) { + if (t->tempBuffer.pos < t->tempBuffer.size) { + t->currentInputChar = t->tempBuffer.data[t->tempBuffer.pos++]; + return; + } else { + t->consumeTempBuffer = FALSE; + } + } + t->currentInputChar = t->inputBuffer.data[t->inputBuffer.pos++]; +} + +U0 @emit_current_character(@html_tokenizer* t) +{ + if (!t->dataStateCounter) { + @html_dom_node* node = @create_new_node("InternalTextNode", t->mem_task); + t->currentNode = node; + } + t->currentNode->text = @append_char_to_growable_string(t->currentNode->text, + t->currentInputChar, t->mem_task); + t->dataStateCounter++; +} + +Bool @node_is_self_closing(@html_dom_node* node) +{ + if (!StrICmp(node->tagName, "InternalTextNode")) + return TRUE; + if (!StrICmp(node->tagName, "area")) + return TRUE; + if (!StrICmp(node->tagName, "base")) + return TRUE; + if (!StrICmp(node->tagName, "br")) + return TRUE; + if (!StrICmp(node->tagName, "col")) + return TRUE; + if (!StrICmp(node->tagName, "embed")) + return TRUE; + if (!StrICmp(node->tagName, "hr")) + return TRUE; + if (!StrICmp(node->tagName, "img")) + return TRUE; + if (!StrICmp(node->tagName, "input")) + return TRUE; + if (!StrICmp(node->tagName, "link")) + return TRUE; + if (!StrICmp(node->tagName, "meta")) + return TRUE; + if (!StrICmp(node->tagName, "param")) + return TRUE; + if (!StrICmp(node->tagName, "source")) + return TRUE; + if (!StrICmp(node->tagName, "track")) + return TRUE; + if (!StrICmp(node->tagName, "wbr")) + return TRUE; + return FALSE; +} + +U0 @emit_current_node(@html_tokenizer* t) +{ + @html_dom_node* origAppendNode = t->appendNode; + if (t->currentNode->tagName[0] == '/') { + if (StrICmp(t->appendNode->tagName, t->currentNode->tagName + 1)) { + /* end tag tagName for currentNode does not match appendNode tagName, + * traverse up parentNode until we find a match */ + while (StrICmp(t->appendNode->tagName, t->currentNode->tagName + 1)) { + if (!StrICmp(t->appendNode->tagName, + "Document")) { // If we've traversed this far up, then the + // closing tag is invalid + t->appendNode = origAppendNode; + return; + } + t->appendNode = t->appendNode->parentNode; + } + } + t->appendNode = t->appendNode->parentNode; + return; + } + // JsonItem* nodeItem = CAlloc(sizeof(JsonItem), t->mem_task); + t->currentNode->parentNode = t->appendNode; + // nodeItem->value = t->currentNode; + // Json.AppendItem(t->appendNode->children, nodeItem); + t->appendNode->children->append(t->currentNode); + if (!@node_is_self_closing(t->currentNode)) + t->appendNode = t->currentNode; +} + +U0 @set_current_attribute_on_current_node(@html_tokenizer* t) +{ + t->currentNode->attributes->set(t->currentAttribute->name, + t->currentAttribute->value, JSON_STRING); + // Json.Set(t->currentNode->attributes, t->currentAttribute->name, + // t->currentAttribute->value, JSON_STRING, t->mem_task); +} + +Bool @skip_script_data(@html_tokenizer* t) +{ + // FIXME: This will work in most cases, except for when tags are escaped in SCRIPT data. + U8 cmpbuf[16]; + MemSet(cmpbuf, NULL, 16); + MemCpy(cmpbuf, t->inputBuffer.data + t->inputBuffer.pos, 6); + if (!StrICmp(cmpbuf, "script")) { + t->inputBuffer.pos += 6; + while (StrICmp(cmpbuf, "")) { + MemSet(cmpbuf, NULL, 16); + MemCpy(cmpbuf, t->inputBuffer.data + t->inputBuffer.pos, 9); + ++t->inputBuffer.pos; + } + t->inputBuffer.pos += 8; + return TRUE; + } + return FALSE; +} + +U0 @tokenizer_html_state_data(@html_tokenizer* t) +{ + @consume_next_input_char(t); + switch (t->currentInputChar) { + case '&': + // Set the return state to the data state. Switch to the character reference + // state. + t->returnState = HTML_STATE_DATA; + t->state = HTML_STATE_CHARACTER_REFERENCE; + break; + case '<': + if (!@skip_script_data(t)) { + // Switch to the tag open state. + if (t->dataStateCounter) + @emit_current_node(t); + t->dataStateCounter = 0; + t->state = HTML_STATE_TAG_OPEN; + } + break; + default: + // Emit the current input character as a character token. + @emit_current_character(t); + break; + } +} + +U0 @tokenizer_html_state_tag_open(@html_tokenizer* t) +{ + @consume_next_input_char(t); + switch (t->currentInputChar) { + case '!': + // Switch to the markup declaration open state. + t->state = HTML_STATE_MARKUP_DECLARATION_OPEN; + break; + case '/': + // Switch to the end tag open state. + t->state = HTML_STATE_END_TAG_OPEN; + break; + case 'A' ... 'Z': + case 'a' ... 'z': + // Create a new start tag token, set its tag name to the empty string. + // Reconsume in the tag name state. + @html_dom_node* node = @create_new_node("", t->mem_task); + t->currentNode = node; + t->inputBuffer.pos--; + t->state = HTML_STATE_TAG_NAME; + break; + case '?': + // This is an unexpected-question-mark-instead-of-tag-name parse error. + // Create a comment token whose data is the empty string. Reconsume in the + // bogus comment state. + t->inputBuffer.pos--; + t->state = HTML_STATE_BOGUS_COMMENT; + break; + default: + // This is an invalid-first-character-of-tag-name parse error. Emit a U+003C + // LESS-THAN SIGN character token. Reconsume in the data state. + @emit_current_character(t); + t->inputBuffer.pos--; + t->state = HTML_STATE_DATA; + break; + } +} + +U0 @tokenizer_html_state_markup_declaration_open(@html_tokenizer* t) +{ + if ((t->inputBuffer.data[t->inputBuffer.pos] == '-') && (t->inputBuffer.data[t->inputBuffer.pos + 1] == '-')) { + // Consume those two characters, create a comment token whose data is the + // empty string, and switch to the comment state. + t->inputBuffer.pos += 2; + t->state = HTML_STATE_COMMENT; + return; + } + U8 buf[8]; + buf[7] = NULL; + MemCpy(buf, t->inputBuffer.data + t->inputBuffer.pos, 7); + if (!StrICmp(buf, "DOCTYPE")) { + // Consume those characters and switch to the DOCTYPE state. + t->inputBuffer.pos += 7; + t->state = HTML_STATE_DOCTYPE; + return; + } + t->state = HTML_STATE_BOGUS_COMMENT; +} + +U0 @tokenizer_html_state_doctype(@html_tokenizer* t) +{ + @consume_next_input_char(t); + switch (t->currentInputChar) { + case '\n': + case '\r': + case '\t': + case ' ': + // Switch to the before DOCTYPE name state. + t->state = HTML_STATE_BEFORE_DOCTYPE_NAME; + break; + case '>': + // Reconsume in the before DOCTYPE name state. + t->inputBuffer.pos--; + t->state = HTML_STATE_BEFORE_DOCTYPE_NAME; + break; + default: + // This is a missing-whitespace-before-doctype-name parse error. Reconsume + // in the before DOCTYPE name state. + t->inputBuffer.pos--; + t->state = HTML_STATE_BEFORE_DOCTYPE_NAME; + break; + } +} + +U0 @tokenizer_html_state_before_doctype_name(@html_tokenizer* t) +{ + @consume_next_input_char(t); + switch (t->currentInputChar) { + case '\n': + case '\r': + case '\t': + case ' ': + // Ignore the character. + break; + case 'A' ... 'Z': + // Create a new DOCTYPE token. Set the token's name to the lowercase version + // of the current input character (add 0x0020 to the character's code + // point). Switch to the DOCTYPE name state. + t->state = HTML_STATE_DOCTYPE_NAME; + break; + case '>': + // This is a missing-doctype-name parse error. Create a new DOCTYPE token. + // Set its force-quirks flag to on. Switch to the data state. Emit the + // current token. + @emit_current_character(t); + t->state = HTML_STATE_DATA; + break; + default: + // Create a new DOCTYPE token. Set the token's name to the current input + // character. Switch to the DOCTYPE name state. + t->state = HTML_STATE_DOCTYPE_NAME; + break; + } +} + +U0 @tokenizer_html_state_doctype_name(@html_tokenizer* t) +{ + @consume_next_input_char(t); + switch (t->currentInputChar) { + case '\n': + case '\r': + case '\t': + case ' ': + // Switch to the after DOCTYPE name state. + t->state = HTML_STATE_AFTER_DOCTYPE_NAME; + break; + case '>': + // Switch to the data state. Emit the current DOCTYPE token. + t->state = HTML_STATE_DATA; + break; + case 'A' ... 'Z': + // Append the lowercase version of the current input character (add 0x0020 + // to the character's code point) to the current DOCTYPE token's name. + break; + default: + // Append the current input character to the current DOCTYPE token's name. + break; + } +} + +U0 @tokenizer_html_state_after_doctype_name(@html_tokenizer* t) +{ + @consume_next_input_char(t); + switch (t->currentInputChar) { + case '\n': + case '\r': + case '\t': + case ' ': + // Ignore the character. + break; + case 'A' ... 'Z': + // Create a new DOCTYPE token. Set the token's name to the lowercase version + // of the current input character (add 0x0020 to the character's code + // point). Switch to the DOCTYPE name state. + t->state = HTML_STATE_DOCTYPE_NAME; + break; + case '>': + // Switch to the data state. Emit the current DOCTYPE token. + t->state = HTML_STATE_DATA; + break; + default: + // Reconsume in the bogus DOCTYPE state. + t->inputBuffer.pos--; + t->state = HTML_STATE_BOGUS_DOCTYPE; + break; + } +} + +U0 @tokenizer_html_state_bogus_doctype(@html_tokenizer* t) +{ + @consume_next_input_char(t); + switch (t->currentInputChar) { + case '>': + // Switch to the data state. Emit the DOCTYPE token. + t->state = HTML_STATE_DATA; + break; + default: + // Ignore the character. + break; + } +} + +U0 @tokenizer_html_state_bogus_comment(@html_tokenizer* t) +{ + @consume_next_input_char(t); + switch (t->currentInputChar) { + case '>': + // Switch to the data state. Emit the DOCTYPE token. + t->state = HTML_STATE_DATA; + break; + default: + // Ignore the character. + break; + } +} + +U0 @tokenizer_html_state_tag_name(@html_tokenizer* t) +{ + @consume_next_input_char(t); + switch (t->currentInputChar) { + case '\n': + case '\r': + case '\t': + case ' ': + // Switch to the before attribute name state. + t->state = HTML_STATE_BEFORE_ATTRIBUTE_NAME; + break; + case '/': + // Switch to the self-closing start tag state. + t->state = HTML_STATE_SELF_CLOSING_START_TAG; + break; + case '>': + // Switch to the data state. Emit the current tag token. + @emit_current_node(t); + t->state = HTML_STATE_DATA; + break; + case 'A' ... 'Z': + // Append the lowercase version of the current input character (add 0x0020 + // to the character's code point) to the current tag token's tag name. + t->currentNode->tagName[StrLen(t->currentNode->tagName)] = t->currentInputChar + 0x20; + if (!StrICmp(t->currentNode->tagName, "img")) + t->numOfImgNodes++; + if (!StrICmp(t->currentNode->tagName, "body")) { + t->currentNode->backgroundColor = Color(255, 255, 255); + t->currentNode->color = Color(0, 0, 0); + } + break; + default: + // Append the current input character to the current tag token's tag name. + t->currentNode->tagName[StrLen(t->currentNode->tagName)] = t->currentInputChar; + if (!StrICmp(t->currentNode->tagName, "img")) + t->numOfImgNodes++; + if (!StrICmp(t->currentNode->tagName, "body")) { + t->currentNode->backgroundColor = Color(255, 255, 255); + t->currentNode->color = Color(0, 0, 0); + } + break; + } +} + +U0 @tokenizer_html_state_before_attribute_name(@html_tokenizer* t) +{ + @consume_next_input_char(t); + switch (t->currentInputChar) { + case '\n': + case '\r': + case '\t': + case ' ': + // Ignore the character. + break; + case '/': + case '>': + // Reconsume in the after attribute name state. + t->inputBuffer.pos--; + t->state = HTML_STATE_AFTER_ATTRIBUTE_NAME; + break; + case '=': + // This is an unexpected-equals-sign-before-attribute-name parse error. + // Start a new attribute in the current tag token. Set that attribute's name + // to the current input character, and its value to the empty string. Switch + // to the attribute name state. + t->currentAttribute = CAlloc(sizeof(JsonKey), t->mem_task); + t->currentAttribute->name = @init_growable_string(t->mem_task); + t->currentAttribute->value = @init_growable_string(t->mem_task); + t->currentAttribute->name = @append_char_to_growable_string( + t->currentAttribute->name, t->currentInputChar, t->mem_task); + t->state = HTML_STATE_ATTRIBUTE_NAME; + break; + default: + // Start a new attribute in the current tag token. Set that attribute name + // and value to the empty string. Reconsume in the attribute name state. + t->currentAttribute = CAlloc(sizeof(JsonKey), t->mem_task); + t->currentAttribute->name = @init_growable_string(t->mem_task); + t->currentAttribute->value = @init_growable_string(t->mem_task); + t->inputBuffer.pos--; + t->state = HTML_STATE_ATTRIBUTE_NAME; + break; + } +} + +U0 @tokenizer_html_state_attribute_name(@html_tokenizer* t) +{ + @consume_next_input_char(t); + switch (t->currentInputChar) { + case '\n': + case '\r': + case '\t': + case ' ': + case '/': + case '>': + // Reconsume in the after attribute name state. + t->inputBuffer.pos--; + t->state = HTML_STATE_AFTER_ATTRIBUTE_NAME; + break; + case '=': + // Switch to the before attribute value state. + t->state = HTML_STATE_BEFORE_ATTRIBUTE_VALUE; + break; + case 'A' ... 'Z': + // Append the lowercase version of the current input character (add 0x0020 + // to the character's code point) to the current attribute's name. + t->currentAttribute->name = @append_char_to_growable_string( + t->currentAttribute->name, t->currentInputChar + 0x20, t->mem_task); + break; + case '"': + case '\'': + case '<': + // This is an unexpected-character-in-attribute-name parse error. Treat it as + // per the "anything else" entry below. + default: + // Append the current input character to the current attribute's name. + t->currentAttribute->name = @append_char_to_growable_string( + t->currentAttribute->name, t->currentInputChar, t->mem_task); + break; + } +} + +U0 @tokenizer_html_state_before_attribute_value(@html_tokenizer* t) +{ + @consume_next_input_char(t); + switch (t->currentInputChar) { + case '\n': + case '\r': + case '\t': + case ' ': + // Ignore the character. + break; + case '"': + // Switch to the attribute value (double-quoted) state. + t->state = HTML_STATE_ATTRIBUTE_VALUE_DOUBLE_QUOTED; + break; + case '\'': + // Switch to the attribute value (single-quoted) state. + t->state = HTML_STATE_ATTRIBUTE_VALUE_SINGLE_QUOTED; + break; + case '>': + // This is a missing-attribute-value parse error. Switch to the data state. + // Emit the current tag token. + @emit_current_node(t); + t->state = HTML_STATE_DATA; + break; + default: + // Reconsume in the attribute value (unquoted) state. + t->inputBuffer.pos--; + t->state = HTML_STATE_ATTRIBUTE_VALUE_UNQUOTED; + break; + } +} + +U0 @tokenizer_html_state_attribute_value_double_quoted(@html_tokenizer* t) +{ + @consume_next_input_char(t); + switch (t->currentInputChar) { + case '"': + // Switch to the after attribute value (quoted) state. + @set_current_attribute_on_current_node(t); + t->state = HTML_STATE_AFTER_ATTRIBUTE_VALUE_QUOTED; + break; + /* + case '&': + // Set the return state to the attribute value (double-quoted) state. Switch + // to the character reference state. + t->returnState = HTML_STATE_ATTRIBUTE_VALUE_DOUBLE_QUOTED; + t->state = HTML_STATE_CHARACTER_REFERENCE; + break; + */ + default: + // Append the current input character to the current attribute's value. + t->currentAttribute->value = @append_char_to_growable_string( + t->currentAttribute->value, t->currentInputChar, t->mem_task); + break; + } +} + +U0 @tokenizer_html_state_attribute_value_unquoted(@html_tokenizer* t) +{ + @consume_next_input_char(t); + switch (t->currentInputChar) { + case '\n': + case '\r': + case '\t': + case ' ': + // Switch to the before attribute name state. + t->state = HTML_STATE_BEFORE_ATTRIBUTE_NAME; + break; + /* + case '&': + // Set the return state to the attribute value (double-quoted) state. Switch + // to the character reference state. + t->returnState = HTML_STATE_ATTRIBUTE_VALUE_SINGLE_QUOTED; + t->state = HTML_STATE_CHARACTER_REFERENCE; + break; + */ + case '>': + // Switch to the data state. Emit the current tag token. + @emit_current_node(t); + t->state = HTML_STATE_DATA; + break; + default: + // Append the current input character to the current attribute's value. + t->currentAttribute->value = @append_char_to_growable_string( + t->currentAttribute->value, t->currentInputChar, t->mem_task); + break; + } +} + +U0 @tokenizer_html_state_attribute_value_single_quoted(@html_tokenizer* t) +{ + @consume_next_input_char(t); + switch (t->currentInputChar) { + case '\'': + // Switch to the after attribute value (quoted) state. + @set_current_attribute_on_current_node(t); + t->state = HTML_STATE_AFTER_ATTRIBUTE_VALUE_QUOTED; + break; + /* + case '&': + // Set the return state to the attribute value (double-quoted) state. Switch + // to the character reference state. + t->returnState = HTML_STATE_ATTRIBUTE_VALUE_SINGLE_QUOTED; + t->state = HTML_STATE_CHARACTER_REFERENCE; + break; + */ + default: + // Append the current input character to the current attribute's value. + t->currentAttribute->value = @append_char_to_growable_string( + t->currentAttribute->value, t->currentInputChar, t->mem_task); + break; + } +} + +U0 @tokenizer_html_state_after_attribute_value_quoted(@html_tokenizer* t) +{ + @consume_next_input_char(t); + switch (t->currentInputChar) { + case '\n': + case '\r': + case '\t': + case ' ': + // Switch to the before attribute name state. + t->state = HTML_STATE_BEFORE_ATTRIBUTE_NAME; + break; + case '/': + // Switch to the self-closing start tag state. + t->state = HTML_STATE_SELF_CLOSING_START_TAG; + break; + case '>': + // Switch to the data state. Emit the current tag token. + @emit_current_node(t); + t->state = HTML_STATE_DATA; + break; + default: + // This is a missing-whitespace-between-attributes parse error. Reconsume in + // the before attribute name state. + t->inputBuffer.pos--; + t->state = HTML_STATE_BEFORE_ATTRIBUTE_NAME; + break; + } +} + +U0 @tokenizer_html_state_end_tag_open(@html_tokenizer* t) +{ + @consume_next_input_char(t); + switch (t->currentInputChar) { + case 'A' ... 'Z': + case 'a' ... 'z': + // Create a new end tag token, set its tag name to the empty string. + // Reconsume in the tag name state. + @html_dom_node* node = @create_new_node("/", t->mem_task); + t->currentNode = node; + t->inputBuffer.pos--; + t->state = HTML_STATE_TAG_NAME; + break; + case '>': + // This is a missing-end-tag-name parse error. Switch to the data state. + t->state = HTML_STATE_DATA; + break; + default: + // This is an invalid-first-character-of-tag-name parse error. Create a + // comment token whose data is the empty string. Reconsume in the bogus + // comment state. + t->inputBuffer.pos--; + t->state = HTML_STATE_BOGUS_COMMENT; + break; + } +} + +U0 @tokenizer_html_state_after_attribute_name(@html_tokenizer* t) +{ + @consume_next_input_char(t); + switch (t->currentInputChar) { + case '\n': + case '\r': + case '\t': + case ' ': + // Ignore the character. + break; + /* + case '"': + // Switch to the attribute value (double-quoted) state. + t->state = HTML_STATE_ATTRIBUTE_VALUE_DOUBLE_QUOTED; + break; + case '\'': + // Switch to the attribute value (single-quoted) state. + t->state = HTML_STATE_ATTRIBUTE_VALUE_SINGLE_QUOTED; + break; + */ + case '/': + // Switch to the self-closing start tag state. + t->state = HTML_STATE_SELF_CLOSING_START_TAG; + break; + case '=': + // Switch to the before attribute value state. + t->state = HTML_STATE_BEFORE_ATTRIBUTE_VALUE; + break; + case '>': + // Switch to the data state. Emit the current tag token. + @set_current_attribute_on_current_node(t); + @emit_current_node(t); + t->state = HTML_STATE_DATA; + break; + default: + // Start a new attribute in the current tag token. Set that attribute name + // and value to the empty string. Reconsume in the attribute name state. + t->currentAttribute = CAlloc(sizeof(JsonKey), t->mem_task); + t->currentAttribute->name = @init_growable_string(t->mem_task); + t->currentAttribute->value = @init_growable_string(t->mem_task); + t->inputBuffer.pos--; + t->state = HTML_STATE_ATTRIBUTE_NAME; + break; + } +} + +U0 @tokenizer_html_state_self_closing_start_tag(@html_tokenizer* t) +{ + @consume_next_input_char(t); + switch (t->currentInputChar) { + case '>': + // Set the self-closing flag of the current tag token. Switch to the data + // state. Emit the current tag token. + @emit_current_node(t); + t->state = HTML_STATE_DATA; + break; + default: + // This is an unexpected-solidus-in-tag parse error. Reconsume in the before + // attribute name state. + t->inputBuffer.pos--; + t->state = HTML_STATE_BEFORE_ATTRIBUTE_NAME; + break; + } +} + +U0 @tokenizer_html_state_character_reference(@html_tokenizer* t) +{ + // Set the temporary buffer to the empty string. + @empty_temp_buffer(t); + // Append a U+0026 AMPERSAND (&) character to the temporary buffer. + @append_char_to_temp_buffer(t, '&'); + @consume_next_input_char(t); + switch (t->currentInputChar) { + case 'A' ... 'Z': + case 'a' ... 'z': + // Reconsume in the named character reference state. + t->inputBuffer.pos--; + t->state = HTML_STATE_NAMED_CHARACTER_REFERENCE; + break; + case '#': + // Append the current input character to the temporary buffer. Switch to the + // numeric character reference state. + @append_char_to_temp_buffer(t, '#'); + t->state = HTML_STATE_NUMERIC_CHARACTER_REFERENCE; + break; + default: + // Flush code points consumed as a character reference. Reconsume in the + // return state. + t->consumeTempBuffer = TRUE; + t->state = t->returnState; + break; + } +} + +U0 @tokenizer_html_state_named_character_reference(@html_tokenizer* t) +{ + // Consume the maximum number of characters possible, where the consumed + // characters are one of the identifiers in the first column of the named + // character references table. Append each character to the temporary buffer + // when it's consumed. + @consume_next_input_char(t); + @append_char_to_temp_buffer(t, t->currentInputChar); + switch (t->currentInputChar) { + case ';': + @replace_temp_buffer_with_named_character_reference(t); + t->consumeTempBuffer = TRUE; + t->state = t->returnState; + break; + default: + break; + } +} + +U0 @tokenizer_html_state_numeric_character_reference(@html_tokenizer* t) +{ + @consume_next_input_char(t); + @append_char_to_temp_buffer(t, t->currentInputChar); + switch (t->currentInputChar) { + case ';': + @replace_temp_buffer_with_numeric_character_reference(t); + t->consumeTempBuffer = TRUE; + t->state = t->returnState; + break; + default: + break; + } +} + +U0 @tokenizer_html_state_comment_start(@html_tokenizer* t) +{ + @consume_next_input_char(t); + switch (t->currentInputChar) { + case '-': + // Switch to the comment start dash state. + t->state = HTML_STATE_COMMENT_START_DASH; + break; + case '>': + // This is an abrupt-closing-of-empty-comment parse error. Switch to the + // data state. Emit the current comment token. + t->state = HTML_STATE_DATA; + break; + default: + // Reconsume in the comment state. + t->inputBuffer.pos--; + t->state = HTML_STATE_COMMENT; + break; + } +} + +U0 @tokenizer_html_state_comment(@html_tokenizer* t) +{ + if ((t->inputBuffer.data[t->inputBuffer.pos] == '-') && (t->inputBuffer.data[t->inputBuffer.pos + 1] == '-') && (t->inputBuffer.data[t->inputBuffer.pos + 2] == '>')) { + // Consume those three characters, and switch to the data state. + t->inputBuffer.pos += 3; + t->state = HTML_STATE_DATA; + return; + } + @consume_next_input_char(t); +} + +U0 @dump_node(@html_tokenizer* t, @html_dom_node* node) +{ + + I64 i; + + if (StrICmp(node->tagName, "InternalTextNode") && StrICmp(node->tagName, "Document")) { + for (i = 0; i < t->nodeTreeDepth; i++) + "-"; + "<%s> : parentNode: <%s 0x%08x>\n", node->tagName, + node->parentNode->tagName, node->parentNode; + } + + if (node->children->length) { + t->nodeTreeDepth += 2; + for (i = 0; i < node->children->length; i++) { + @dump_node(t, node->children->@(i)); + //@dump_node(t, Json.ArrayIndex(node->children, i)); + } + t->nodeTreeDepth -= 2; + } +} + +U0 @dump_node_list(@html_tokenizer* t) +{ + t->nodeTreeDepth = -2; + @dump_node(t, t->originNode); + "\n"; +} + +@html_dom_node* @html_tokenize_and_create_node_list(U8* buffer, I64 size, CTask* mem_task, + I64* num_of_images) +{ + @html_tokenizer t; + @init_tokenizer(&t, buffer, size, mem_task); + while (t.inputBuffer.pos < t.inputBuffer.size && buffer[t.inputBuffer.pos]) { + switch (t.state) { + case HTML_STATE_DATA: + @tokenizer_html_state_data(&t); + break; + case HTML_STATE_TAG_OPEN: + @tokenizer_html_state_tag_open(&t); + break; + case HTML_STATE_MARKUP_DECLARATION_OPEN: + @tokenizer_html_state_markup_declaration_open(&t); + break; + case HTML_STATE_DOCTYPE: + @tokenizer_html_state_doctype(&t); + break; + case HTML_STATE_BEFORE_DOCTYPE_NAME: + @tokenizer_html_state_before_doctype_name(&t); + break; + case HTML_STATE_DOCTYPE_NAME: + @tokenizer_html_state_doctype_name(&t); + break; + case HTML_STATE_TAG_NAME: + @tokenizer_html_state_tag_name(&t); + break; + case HTML_STATE_BEFORE_ATTRIBUTE_NAME: + @tokenizer_html_state_before_attribute_name(&t); + break; + case HTML_STATE_ATTRIBUTE_NAME: + @tokenizer_html_state_attribute_name(&t); + break; + case HTML_STATE_BEFORE_ATTRIBUTE_VALUE: + @tokenizer_html_state_before_attribute_value(&t); + break; + case HTML_STATE_ATTRIBUTE_VALUE_DOUBLE_QUOTED: + @tokenizer_html_state_attribute_value_double_quoted(&t); + break; + case HTML_STATE_AFTER_ATTRIBUTE_VALUE_QUOTED: + @tokenizer_html_state_after_attribute_value_quoted(&t); + break; + case HTML_STATE_CHARACTER_REFERENCE: + @tokenizer_html_state_character_reference(&t); + break; + case HTML_STATE_END_TAG_OPEN: + @tokenizer_html_state_end_tag_open(&t); + break; + case HTML_STATE_AFTER_ATTRIBUTE_NAME: + @tokenizer_html_state_after_attribute_name(&t); + break; + case HTML_STATE_ATTRIBUTE_VALUE_SINGLE_QUOTED: + @tokenizer_html_state_attribute_value_single_quoted(&t); + break; + case HTML_STATE_NAMED_CHARACTER_REFERENCE: + @tokenizer_html_state_named_character_reference(&t); + break; + case HTML_STATE_NUMERIC_CHARACTER_REFERENCE: + @tokenizer_html_state_numeric_character_reference(&t); + break; + case HTML_STATE_AFTER_DOCTYPE_NAME: + @tokenizer_html_state_after_doctype_name(&t); + break; + case HTML_STATE_BOGUS_DOCTYPE: + @tokenizer_html_state_bogus_doctype(&t); + break; + case HTML_STATE_SELF_CLOSING_START_TAG: + @tokenizer_html_state_self_closing_start_tag(&t); + break; + case HTML_STATE_BOGUS_COMMENT: + @tokenizer_html_state_bogus_comment(&t); + break; + case HTML_STATE_COMMENT_START: + @tokenizer_html_state_comment_start(&t); + break; + case HTML_STATE_COMMENT: + @tokenizer_html_state_comment(&t); + break; + case HTML_STATE_ATTRIBUTE_VALUE_UNQUOTED: + @tokenizer_html_state_attribute_value_unquoted(&t); + break; + case HTML_STATE_INVALID: + default: + "\n$FG,0$HTML Tokenization error: Invalid or unimplemented " + "state\nInputBuffer position: %d\nState: %d$FD$\n\n", + t.inputBuffer.pos, t.state; + PressAKey; + break; + } + } + @html_dom_node* node_list = t.originNode; + *num_of_images = t.numOfImgNodes; + return node_list; +} diff --git a/System/Libraries/Http.HC b/System/Libraries/Http.HC index 8f289f2..66fbad6 100644 --- a/System/Libraries/Http.HC +++ b/System/Libraries/Http.HC @@ -1,8 +1,8 @@ -#define HTTP_TMP_DIRECTORY "B:/Tmp" -#define HTTP_CACHE_DIRECTORY "B:/Tmp/Cache" +#define HTTP_TMP_DIRECTORY "A:/Tmp" +#define HTTP_CACHE_DIRECTORY "A:/Tmp/Cache" #define HTTP_FETCH_BUFFER_SIZE 1024 << 15 -#define SEDECIM_USER_AGENT_STRING "Mozilla/5.0 (compatible; Sedecim/1.0; TempleOS) (KHTML, like Gecko)" +#define CYBERIA_USER_AGENT_STRING "Mozilla/5.0 (compatible; Cyberia/1.0; TempleOS) (KHTML, like Gecko)" class @http_buffer { @@ -366,7 +366,7 @@ I64 @http_req(@http_request* req) "GET %s%s HTTP/1.0\r\n" "Host: %s\r\n" "%s" - "User-Agent: " SEDECIM_USER_AGENT_STRING + "User-Agent: " CYBERIA_USER_AGENT_STRING "\r\n\r\n", req->url->path, req->url->query, req->url->host, headers_buf); break; @@ -375,7 +375,7 @@ I64 @http_req(@http_request* req) "HEAD %s%s HTTP/1.0\r\n" "Host: %s\r\n" "%s" - "User-Agent: " SEDECIM_USER_AGENT_STRING + "User-Agent: " CYBERIA_USER_AGENT_STRING "\r\n\r\n", req->url->path, req->url->query, req->url->host, headers_buf); break; @@ -384,7 +384,7 @@ I64 @http_req(@http_request* req) "POST %s%s HTTP/1.0\r\n" "Host: %s\r\n" "%s" - "User-Agent: " SEDECIM_USER_AGENT_STRING + "User-Agent: " CYBERIA_USER_AGENT_STRING "\r\n" "Content-Length: %d\r\n\r\n", req->url->path, req->url->query, req->url->host, headers_buf, @@ -396,7 +396,7 @@ I64 @http_req(@http_request* req) "PUT %s%s HTTP/1.0\r\n" "Host: %s\r\n" "%s" - "User-Agent: " SEDECIM_USER_AGENT_STRING + "User-Agent: " CYBERIA_USER_AGENT_STRING "\r\n" "Content-Length: %d\r\n\r\n", req->url->path, req->url->query, req->url->host, headers_buf, diff --git a/System/MakeSystem.HC b/System/MakeSystem.HC index 8df45de..083cc77 100644 --- a/System/MakeSystem.HC +++ b/System/MakeSystem.HC @@ -77,10 +77,15 @@ load_elf("M:/build/bin/tlse"); #include "Libraries/Clipboard"; #include "Libraries/Widget"; #include "Libraries/Theme"; -"}\n"; @http_init_tmp_and_cache_directories; +#include "Libraries/Css/Tokenizer"; + +#include "Libraries/Html/Tokenizer"; +#include "Libraries/Html/Renderer"; +"}\n"; + load_elf("M:/build/bin/net"); // Networking Utilities