# Enhanced Toy Web Browser - Performance Optimized # run: pip install requests pillow urllib3 # then this script import socket import tkinter as tk from tkinter import ttk, messagebox import struct import re import json import gzip import zlib import warnings from urllib3.exceptions import InsecureRequestWarning warnings.simplefilter('ignore', InsecureRequestWarning) import random import sys import traceback import urllib.parse import requests from tkinter.font import Font from PIL import Image, ImageTk import threading import time import io from collections import OrderedDict, defaultdict from concurrent.futures import ThreadPoolExecutor ############################################################################### # CONSTANTS AND CONFIGURATION ############################################################################### DEFAULT_FONT_FAMILY = "Arial" DEFAULT_FONT_SIZE = 14 MAX_FONT_CACHE_SIZE = 100 CONNECTION_TIMEOUT = 10 READ_TIMEOUT = 30 USER_AGENT = "ToyBrowser/1.0 (Educational Project) AppleWebKit/537.36" REQUEST_DELAY = 0.3 LAST_REQUEST_TIME = {} HTML_ENTITIES = { 'nbsp': '\u00a0', 'lt': '<', 'gt': '>', 'amp': '&', 'quot': '"', 'apos': "'", 'cent': '¢', 'pound': '£', 'yen': '¥', 'euro': '€', 'copy': '©', 'reg': '®', 'trade': '™', 'times': '×', 'divide': '÷', 'mdash': '—', 'ndash': '–', 'lsquo': ''', 'rsquo': ''', 'ldquo': '"', 'rdquo': '"', 'bull': '•', 'hellip': '…', 'larr': '←', 'rarr': '→', 'uarr': '↑', 'darr': '↓', } INLINE_ELEMENTS = frozenset({ "text", "span", "a", "b", "strong", "i", "em", "u", "small", "code", "mark", "img", "sub", "sup", "del", "ins", "abbr", "kbd", "q", "var", "s", "cite", "time", "font", "tt", "big", "strike", "nobr", "wbr", "bdo", "dfn", "samp", "label", "data", "ruby", "rt", "rp", "bdi", "output", "meter", "progress", "picture", "source", "slot" }) VOID_ELEMENTS = frozenset({ "br", "hr", "meta", "link", "img", "input", "area", "base", "col", "command", "embed", "keygen", "param", "source", "track", "wbr", "frame", "spacer", "basefont", "isindex" }) SKIP_ELEMENTS = frozenset({ "script", "style", "head", "meta", "link", "title", "noscript", "template" }) # Pre-compiled regex patterns RE_COMMENT = re.compile(r'', re.DOTALL) RE_DOCTYPE = re.compile(r']*>', re.IGNORECASE) RE_ENTITY_NAMED = re.compile(r'&([a-zA-Z]+);') RE_ENTITY_NUM = re.compile(r'&#(\d+);') RE_ENTITY_HEX = re.compile(r'&#[xX]([0-9a-fA-F]+);') RE_WHITESPACE = re.compile(r'[\r\n\t]+') RE_MULTI_SPACE = re.compile(r' +') RE_ATTR = re.compile(r'([a-zA-Z_:][-a-zA-Z0-9_:.]*)(?:\s*=\s*(?:"([^"]*)"|\'([^\']*)\'|([^\s>]*)))?') RE_CSS_COMMENT = re.compile(r'/\*.*?\*/', re.DOTALL) RE_TAG_ONLY = re.compile(r'^[a-zA-Z][a-zA-Z0-9]*$') ############################################################################### # 1) DNS + URL PARSING ############################################################################### DNS_CACHE = {} def resolve_hostname_dns(hostname, dns_server="8.8.8.8", port=53, timeout=3): hostname = hostname.strip().lower() if hostname in DNS_CACHE: return DNS_CACHE[hostname] try: socket.inet_aton(hostname) return hostname except OSError: pass tid = random.randint(0, 65535) header = struct.pack(">HHHHHH", tid, 0x0100, 1, 0, 0, 0) qname = b"".join(bytes([len(part)]) + part.encode("ascii") for part in hostname.split(".")) question = qname + b"\x00" + struct.pack(">HH", 1, 1) query = header + question s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) s.settimeout(timeout) try: s.sendto(query, (dns_server, port)) data, _ = s.recvfrom(512) except: s.close() return None s.close() if struct.unpack(">H", data[:2])[0] != tid: return None idx = 12 while idx < len(data) and data[idx] != 0: idx += 1 idx += 5 while idx < len(data): if data[idx] & 0xC0 == 0xC0: idx += 2 else: while idx < len(data) and data[idx] != 0: idx += 1 idx += 1 if idx + 10 > len(data): break rtype, rclass, _, rdlength = struct.unpack(">HHIH", data[idx:idx+10]) idx += 10 if rtype == 1 and rclass == 1 and rdlength == 4: ip_addr = ".".join(map(str, data[idx:idx+4])) DNS_CACHE[hostname] = ip_addr return ip_addr idx += rdlength return None class ParsedURL: __slots__ = ['scheme', 'host', 'port', 'path', 'query', 'fragment'] def __init__(self, scheme="http", host="", port=80, path="/", query="", fragment=""): self.scheme = scheme self.host = host self.port = port self.path = path self.query = query self.fragment = fragment def __str__(self): url = f"{self.scheme}://{self.host}" if (self.scheme == "http" and self.port != 80) or \ (self.scheme == "https" and self.port != 443): url += f":{self.port}" url += self.path if self.query: url += f"?{self.query}" return url def full_path(self): return f"{self.path}?{self.query}" if self.query else self.path def parse_url(url): url = url.strip() scheme = "http" if url.startswith("http://"): after, scheme = url[7:], "http" elif url.startswith("https://"): after, scheme = url[8:], "https" elif url.startswith("//"): after, scheme = url[2:], "https" else: after = url fragment = "" if "#" in after: after, fragment = after.split("#", 1) query = "" if "?" in after: after, query = after.split("?", 1) slash = after.find("/") if slash == -1: host_port, path = after, "/" else: host_port, path = after[:slash], after[slash:] or "/" if ":" in host_port: h, p = host_port.rsplit(":", 1) try: port, host = int(p), h except ValueError: host, port = host_port, 443 if scheme == "https" else 80 else: host = host_port port = 443 if scheme == "https" else 80 return ParsedURL(scheme, host.strip().lower(), port, path, query, fragment) def resolve_url(base_url, relative_url): relative_url = relative_url.strip() if not relative_url: return base_url if relative_url.startswith(("http://", "https://")): return parse_url(relative_url) if relative_url.startswith("//"): return parse_url(f"{base_url.scheme}:{relative_url}") if relative_url.startswith("/"): qpart = relative_url.split("?", 1) return ParsedURL(base_url.scheme, base_url.host, base_url.port, qpart[0], qpart[1] if len(qpart) > 1 else "", "") if relative_url.startswith("#"): return ParsedURL(base_url.scheme, base_url.host, base_url.port, base_url.path, base_url.query, relative_url[1:]) if relative_url.startswith("?"): return ParsedURL(base_url.scheme, base_url.host, base_url.port, base_url.path, relative_url[1:], "") base_dir = base_url.path.rsplit("/", 1)[0] if "/" in base_url.path else "" new_path = f"{base_dir}/{relative_url}" parts = new_path.split("/") normalized = [] for part in parts: if part == "..": if normalized and normalized[-1] != "": normalized.pop() elif part != ".": normalized.append(part) final_path = "/".join(normalized) if not final_path.startswith("/"): final_path = "/" + final_path qpart = final_path.split("?", 1) return ParsedURL(base_url.scheme, base_url.host, base_url.port, qpart[0], qpart[1] if len(qpart) > 1 else "", "") ############################################################################### # 2) HTTP - With connection pooling ############################################################################### _session = requests.Session() _session.verify = False def http_request(url_obj, method="GET", headers=None, body="", max_redirects=10): if headers is None: headers = {} cur_url, cur_method, cur_body = url_obj, method, body for _ in range(max_redirects): r_headers, r_body, r_url = _single_http_request(cur_url, cur_method, headers, cur_body) status_code = int(r_headers.get(":status_code", "0")) if status_code in (301, 302, 303, 307, 308): location = r_headers.get("location", "") if not location: return r_headers, r_body, r_url cur_url = resolve_url(cur_url, location) if status_code in (302, 303): cur_method, cur_body = "GET", "" else: return r_headers, r_body, r_url return r_headers, r_body, r_url def _single_http_request(url_obj, method="GET", headers=None, body=""): if url_obj.scheme == "https": return _requests_https(url_obj, method, headers, body) else: return _raw_http(url_obj, method, headers, body) def _requests_https(url_obj, method="GET", headers=None, body=""): if headers is None: headers = {} host = url_obj.host now = time.time() if host in LAST_REQUEST_TIME: elapsed = now - LAST_REQUEST_TIME[host] if elapsed < REQUEST_DELAY: time.sleep(REQUEST_DELAY - elapsed) LAST_REQUEST_TIME[host] = time.time() final_h = { "User-Agent": USER_AGENT, "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8", "Accept-Language": "en-US,en;q=0.5", "Accept-Encoding": "gzip, deflate", } final_h.update({k: v for k, v in headers.items() if k.lower() not in ["host", "content-length"]}) try: resp = _session.request( method=method, url=str(url_obj), headers=final_h, data=body.encode("utf-8") if body else None, allow_redirects=False, timeout=(CONNECTION_TIMEOUT, READ_TIMEOUT) ) r_h = {":status_code": str(resp.status_code)} r_h.update({k.lower(): v for k, v in resp.headers.items()}) return r_h, resp.content, url_obj except Exception as e: raise Exception(f"HTTPS request failed: {e}") def _raw_http(url_obj, method="GET", headers=None, body=""): if headers is None: headers = {} host = url_obj.host now = time.time() if host in LAST_REQUEST_TIME: elapsed = now - LAST_REQUEST_TIME[host] if elapsed < REQUEST_DELAY: time.sleep(REQUEST_DELAY - elapsed) LAST_REQUEST_TIME[host] = time.time() ip_addr = resolve_hostname_dns(url_obj.host) if not ip_addr: raise Exception(f"DNS fail => {url_obj.host}") sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) sock.settimeout(READ_TIMEOUT) try: sock.connect((ip_addr, url_obj.port)) except Exception as e: sock.close() raise Exception(f"Connection failed: {e}") lines = [ f"{method} {url_obj.full_path()} HTTP/1.1", f"Host: {url_obj.host}", f"User-Agent: {USER_AGENT}", "Accept: text/html,*/*", "Accept-Encoding: gzip, deflate", "Connection: close", f"Content-Length: {len(body)}", ] lines.extend(f"{k}: {v}" for k, v in headers.items() if k.lower() not in ["host", "connection", "user-agent"]) req_str = "\r\n".join(lines) + "\r\n\r\n" + body try: sock.sendall(req_str.encode("utf-8")) chunks = [] while True: try: chunk = sock.recv(16384) if not chunk: break chunks.append(chunk) except socket.timeout: break response = b"".join(chunks) finally: sock.close() hd_end = response.find(b"\r\n\r\n") if hd_end == -1: return {}, b"", url_obj raw_header = response[:hd_end].decode("utf-8", "replace") raw_body = response[hd_end+4:] lines = raw_header.split("\r\n") parts = lines[0].split(" ", 2) headers_dict = {":status_code": parts[1]} if len(parts) >= 2 else {} for line in lines[1:]: if ":" in line: kk, vv = line.split(":", 1) headers_dict[kk.strip().lower()] = vv.strip() te = headers_dict.get("transfer-encoding", "").lower() if "chunked" in te: raw_body = decode_chunked_body(raw_body) ce = headers_dict.get("content-encoding", "").lower() if "gzip" in ce: try: raw_body = gzip.decompress(raw_body) except: pass elif "deflate" in ce: try: raw_body = zlib.decompress(raw_body, -zlib.MAX_WBITS) except: try: raw_body = zlib.decompress(raw_body) except: pass return headers_dict, raw_body, url_obj def decode_chunked_body(rb): i, decoded = 0, [] while True: newline = rb.find(b"\r\n", i) if newline == -1: break try: chunk_size = int(rb[i:newline].decode("utf-8", "replace").split(";")[0].strip(), 16) except: chunk_size = 0 if chunk_size == 0: break i = newline + 2 decoded.append(rb[i:i+chunk_size]) i += chunk_size if rb[i:i+2] == b"\r\n": i += 2 return b"".join(decoded) ############################################################################### # 3) HTML PARSING ############################################################################### def decode_html_entities(text): def replace_named(m): return HTML_ENTITIES.get(m.group(1), m.group(0)) text = RE_ENTITY_NAMED.sub(replace_named, text) text = RE_ENTITY_NUM.sub(lambda m: chr(int(m.group(1))) if int(m.group(1)) < 65536 else m.group(0), text) text = RE_ENTITY_HEX.sub(lambda m: chr(int(m.group(1), 16)) if int(m.group(1), 16) < 65536 else m.group(0), text) return text class DOMNode: __slots__ = [ 'tag_name', 'attributes', 'children', 'parent', 'text', 'styles', 'computed_styles', 'inline_css', 'script_code', 'is_form', 'method', 'action', 'form_fields', 'is_inline', 'id', 'classes', 'event_handlers' ] def __init__(self, tag_name="document", parent=None): self.tag_name = tag_name.lower() if tag_name else "document" self.attributes = {} self.children = [] self.parent = parent self.text = "" self.styles = {} self.computed_styles = {} self.inline_css = "" self.script_code = "" self.is_form = (self.tag_name == "form") self.method = "get" self.action = "" self.form_fields = {} self.is_inline = (self.tag_name in INLINE_ELEMENTS) self.id = "" self.classes = [] self.event_handlers = {} def get_text_content(self): if self.tag_name == "text": return self.text return "".join(c.get_text_content() for c in self.children) def parse_html(html_text): html_text = RE_COMMENT.sub('', html_text) html_text = RE_DOCTYPE.sub('', html_text) root = DOMNode("document") current = root text_buffer = [] i, length = 0, len(html_text) def flush_text(): nonlocal text_buffer, current if not text_buffer: return raw = "".join(text_buffer) text_buffer = [] raw = decode_html_entities(raw) if current.tag_name not in ("pre", "code", "textarea"): raw = RE_WHITESPACE.sub(' ', raw) raw = RE_MULTI_SPACE.sub(' ', raw) if raw and (raw.strip() or (raw == ' ' and current.children)): n = DOMNode("text", current) n.text = raw current.children.append(n) while i < length: if html_text[i] == "<": if html_text[i:i+2] == "", i) i = close_i + 1 if close_i != -1 else length continue flush_text() close_i = html_text.find(">", i) if close_i == -1: break tag_content = html_text[i+1:close_i].strip() if tag_content.startswith("/"): close_tag = tag_content[1:].split()[0].lower() temp = current while temp and temp.tag_name != close_tag: temp = temp.parent if temp and temp.parent: current = temp.parent elif current.parent: current = current.parent i = close_i + 1 continue space_idx = next((idx for idx, c in enumerate(tag_content) if c.isspace()), None) if space_idx: tag_name = tag_content[:space_idx].lower().rstrip("/") attr_string = tag_content[space_idx:].rstrip("/") else: tag_name = tag_content.lower().rstrip("/") attr_string = "" if not tag_name or tag_name.startswith("!"): i = close_i + 1 continue if tag_name == "body": while current and current.tag_name in ("head", "title", "meta", "link", "style", "script"): if current.parent: current = current.parent else: break elif tag_name == "li" and current.tag_name == "li": if current.parent: current = current.parent elif tag_name == "p" and current.tag_name == "p": if current.parent: current = current.parent elif tag_name in ("dt", "dd") and current.tag_name in ("dt", "dd"): if current.parent: current = current.parent elif tag_name in ("div", "table", "form", "ul", "ol", "h1", "h2", "h3", "h4", "h5", "h6", "blockquote", "pre") and current.tag_name == "p": if current.parent: current = current.parent nd = DOMNode(tag_name, current) for m in RE_ATTR.finditer(attr_string): name = m.group(1).lower() value = m.group(2) or m.group(3) or m.group(4) or "" nd.attributes[name] = decode_html_entities(value) nd.id = nd.attributes.get("id", "") if "class" in nd.attributes: nd.classes = nd.attributes["class"].split() if "style" in nd.attributes: for part in nd.attributes["style"].split(";"): if ":" in part: p, v = part.split(":", 1) nd.styles[p.strip().lower()] = v.strip() nd.event_handlers = {k[2:]: v for k, v in nd.attributes.items() if k.startswith("on")} if "bgcolor" in nd.attributes: nd.styles["background-color"] = nd.attributes["bgcolor"] if "color" in nd.attributes and tag_name == "font": nd.styles["color"] = nd.attributes["color"] if "align" in nd.attributes: nd.styles["text-align"] = nd.attributes["align"] if "valign" in nd.attributes: nd.styles["vertical-align"] = nd.attributes["valign"] if "width" in nd.attributes and tag_name in ("table", "td", "th", "img"): w = nd.attributes["width"] nd.styles["width"] = w if w.endswith(("%", "px")) else w + "px" if "height" in nd.attributes and tag_name in ("table", "td", "th", "img", "tr"): h = nd.attributes["height"] nd.styles["height"] = h if h.endswith(("%", "px")) else h + "px" if tag_name == "form": nd.is_form = True nd.method = nd.attributes.get("method", "get").lower() nd.action = nd.attributes.get("action", "") current.children.append(nd) is_self_closing = tag_content.endswith("/") or tag_name in VOID_ELEMENTS if tag_name == "input": nm = nd.attributes.get("name", "") val = nd.attributes.get("value", "") fa = current while fa and not fa.is_form: fa = fa.parent if fa and nm: fa.form_fields[nm] = [val, nd] if is_self_closing: i = close_i + 1 continue if tag_name in ("title", "textarea", "style", "script"): close_tag = f"" close_t = html_text.lower().find(close_tag, close_i+1) if close_t == -1: i = length continue content = html_text[close_i+1:close_t] if tag_name == "title": nd.text = decode_html_entities(content) elif tag_name == "textarea": nd.text = decode_html_entities(content) nm = nd.attributes.get("name", "") fa = current while fa and not fa.is_form: fa = fa.parent if fa and nm: fa.form_fields[nm] = [nd.text, nd] elif tag_name == "style": nd.inline_css = content elif tag_name == "script": nd.script_code = content i = close_t + len(close_tag) continue current = nd i = close_i + 1 else: text_buffer.append(html_text[i]) i += 1 flush_text() return root ############################################################################### # 4) CSS PARSING - With rule indexing ############################################################################### class CSSRule: __slots__ = ['selector', 'properties', 'specificity', 'tag', 'id_sel', 'class_sel'] def __init__(self, selector, properties): self.selector = selector.strip() self.properties = properties self.specificity = ( selector.count("#"), selector.count("."), len(re.findall(r'(?:^|[\s>+~])([a-zA-Z][a-zA-Z0-9]*)', selector)) ) sel = self.selector.split()[-1] if " " in self.selector else self.selector self.tag = None self.id_sel = None self.class_sel = None if sel.startswith("#"): self.id_sel = sel[1:].split(".")[0] elif sel.startswith("."): self.class_sel = sel[1:] elif RE_TAG_ONLY.match(sel): self.tag = sel.lower() class CSSIndex: __slots__ = ['by_tag', 'by_id', 'by_class', 'universal'] def __init__(self, rules): self.by_tag = defaultdict(list) self.by_id = defaultdict(list) self.by_class = defaultdict(list) self.universal = [] for rule in rules: if rule.tag: self.by_tag[rule.tag].append(rule) elif rule.id_sel: self.by_id[rule.id_sel].append(rule) elif rule.class_sel: self.by_class[rule.class_sel].append(rule) else: self.universal.append(rule) def get_matching_rules(self, node): candidates = list(self.universal) candidates.extend(self.by_tag.get(node.tag_name, [])) if node.id: candidates.extend(self.by_id.get(node.id, [])) for cls in node.classes: candidates.extend(self.by_class.get(cls, [])) return candidates def parse_css(css_text): rules = [] css_text = RE_CSS_COMMENT.sub('', css_text) css_text = re.sub(r'@media[^{]+\{', '{', css_text) css_text = re.sub(r'@supports[^{]+\{', '{', css_text) css_text = re.sub(r'@keyframes[^{]+\{[^}]*\}[^}]*\}', '', css_text) css_text = re.sub(r'@-webkit-keyframes[^{]+\{[^}]*\}[^}]*\}', '', css_text) css_text = re.sub(r'@font-face[^}]+\}', '', css_text) css_text = re.sub(r'@[a-z-]+[^;{]+[;{]', '', css_text) i = 0 while i < len(css_text): bo = css_text.find("{", i) if bo == -1: break sel_text = css_text[i:bo].strip() depth, bc = 1, bo + 1 while bc < len(css_text) and depth > 0: if css_text[bc] == '{': depth += 1 elif css_text[bc] == '}': depth -= 1 bc += 1 bc -= 1 if bc <= bo: i = bo + 1 continue block = css_text[bo+1:bc].strip() i = bc + 1 props = {} for decl in block.split(";"): if ":" in decl: colon = decl.find(":") prop = decl[:colon].strip().lower() val = decl[colon+1:].strip() val = re.sub(r'\s*!important\s*$', '', val, flags=re.IGNORECASE) if not prop.startswith(("-", "--")) and not val.startswith("var("): props[prop] = val for s in sel_text.split(","): s = s.strip() if s and not s.startswith("@") and "::" not in s: s_clean = re.sub(r':[a-zA-Z-]+($[^)]*$)?', '', s) if s_clean.strip(): rules.append(CSSRule(s_clean.strip(), dict(props))) return rules def selector_matches(sel, node): sel = sel.strip() tag = node.tag_name if RE_TAG_ONLY.match(sel): return sel.lower() == tag if sel.startswith("#"): return node.id == sel[1:].split(".")[0] if sel.startswith("."): return sel[1:] in node.classes if sel == "*": return True if " " in sel: parts = sel.split() if len(parts) >= 2 and selector_matches(parts[-1], node): p = node.parent ancestor_sel = " ".join(parts[:-1]) while p: if selector_matches(ancestor_sel, p): return True p = p.parent return False if "." in sel and not sel.startswith("."): parts = sel.split(".") elem = parts[0] if elem and elem.lower() != tag: return False return all(c in node.classes for c in parts[1:] if c) return False def apply_css_rules_indexed(node, css_index): candidates = css_index.get_matching_rules(node) matched = [] for r in candidates: if selector_matches(r.selector, node): matched.append((r.specificity, r)) matched.sort(key=lambda x: x[0]) for _, r in matched: node.styles.update(r.properties) for c in node.children: apply_css_rules_indexed(c, css_index) def _px_to_int(v, default=0, base=None, viewport_width=1000, viewport_height=800): try: s = str(v).strip().lower() if not s or s in ('auto', 'none'): return default if ' ' in s: s = s.split()[0] if s.endswith("%") and base is not None: return int(float(s[:-1]) * 0.01 * base) if s.endswith("px"): return int(float(s[:-2])) if s.endswith("pt"): return int(float(s[:-2]) * 1.33) if s.endswith("em"): return int(float(s[:-2]) * 16) if s.endswith("rem"): return int(float(s[:-3]) * 16) if s.endswith("vw"): return int(float(s[:-2]) * viewport_width / 100) if s.endswith("vh"): return int(float(s[:-2]) * viewport_height / 100) return int(float(s)) except: return default _COLOR_CACHE = {} _NAMED_COLORS = { 'black': '#000000', 'white': '#ffffff', 'red': '#ff0000', 'green': '#008000', 'blue': '#0000ff', 'yellow': '#ffff00', 'gray': '#808080', 'grey': '#808080', 'orange': '#ffa500', 'purple': '#800080', 'pink': '#ffc0cb', 'brown': '#a52a2a', 'cyan': '#00ffff', 'magenta': '#ff00ff', 'lime': '#00ff00', 'navy': '#000080', 'teal': '#008080', 'olive': '#808000', 'maroon': '#800000', 'silver': '#c0c0c0', 'aqua': '#00ffff', 'fuchsia': '#ff00ff', 'darkgray': '#a9a9a9', 'darkgrey': '#a9a9a9', 'lightgray': '#d3d3d3', 'lightgrey': '#d3d3d3', 'dimgray': '#696969', 'gold': '#ffd700', 'coral': '#ff7f50', 'tomato': '#ff6347', 'salmon': '#fa8072', 'wheat': '#f5deb3', 'beige': '#f5f5dc', 'ivory': '#fffff0', 'khaki': '#f0e68c', 'violet': '#ee82ee', 'indigo': '#4b0082', 'crimson': '#dc143c', 'chocolate': '#d2691e', } def parse_color(color_str): if not color_str: return None if color_str in _COLOR_CACHE: return _COLOR_CACHE[color_str] original = color_str color_str = color_str.strip().lower() if color_str in ('transparent', 'inherit', 'initial', 'unset', 'currentcolor', 'none'): return None result = None if color_str in _NAMED_COLORS: result = _NAMED_COLORS[color_str] elif color_str.startswith('#'): if len(color_str) == 4: result = f"#{color_str[1]*2}{color_str[2]*2}{color_str[3]*2}" elif len(color_str) >= 7: result = color_str[:7] else: result = color_str elif color_str.startswith('rgb'): m = re.match(r'rgba?\s*\(\s*(\d+)\s*,\s*(\d+)\s*,\s*(\d+)', color_str) if m: r, g, b = int(m.group(1)), int(m.group(2)), int(m.group(3)) result = f"#{r:02x}{g:02x}{b:02x}" _COLOR_CACHE[original] = result return result def is_color_visible(color, bg_color="#ffffff"): if not color or color in ('transparent', 'inherit', 'none'): return False try: if color.startswith('#') and len(color) >= 7: r = int(color[1:3], 16) g = int(color[3:5], 16) b = int(color[5:7], 16) if r > 250 and g > 250 and b > 250: return False if (r * 299 + g * 587 + b * 114) / 1000 > 245: return False return True except: return True def compute_styles(node, parent_computed=None): defaults = { "color": "black", "background-color": "transparent", "font-size": f"{DEFAULT_FONT_SIZE}px", "font-weight": "normal", "font-style": "normal", "font-family": DEFAULT_FONT_FAMILY, "text-decoration": "none", "display": "inline" if node.is_inline else "block", "margin-top": "0", "margin-right": "0", "margin-bottom": "0", "margin-left": "0", "padding-top": "0", "padding-right": "0", "padding-bottom": "0", "padding-left": "0", "text-align": "left", "vertical-align": "baseline", "white-space": "normal", "visibility": "visible", "opacity": "1" } if parent_computed: for p in ("color", "font-size", "font-family", "font-weight", "font-style", "text-align", "visibility"): if p in parent_computed: defaults[p] = parent_computed[p] for key, val in node.styles.items(): if key == "display": if val == "none": defaults["display"] = "none" elif val in ("block", "flex", "grid", "table", "list-item", "inline-block"): defaults["display"] = "block" elif val == "inline": defaults["display"] = "inline" elif key == "visibility" and val == "hidden": defaults["visibility"] = "hidden" elif key == "color": parsed = parse_color(val) if parsed and is_color_visible(parsed): defaults["color"] = parsed elif key in ("background", "background-color"): bg_val = val.split()[0] if val else "" parsed = parse_color(bg_val) if parsed: defaults["background-color"] = parsed elif key == "opacity": try: if float(val) >= 0.1: defaults["opacity"] = val except: pass elif key in defaults: defaults[key] = val for prop in ("margin", "padding"): if prop in node.styles: parts = node.styles[prop].split() dirs = ["top", "right", "bottom", "left"] if len(parts) == 1: for d in dirs: defaults[f"{prop}-{d}"] = parts[0] elif len(parts) == 2: defaults[f"{prop}-top"] = defaults[f"{prop}-bottom"] = parts[0] defaults[f"{prop}-right"] = defaults[f"{prop}-left"] = parts[1] elif len(parts) == 3: defaults[f"{prop}-top"] = parts[0] defaults[f"{prop}-right"] = defaults[f"{prop}-left"] = parts[1] defaults[f"{prop}-bottom"] = parts[2] elif len(parts) >= 4: for i, d in enumerate(dirs): defaults[f"{prop}-{d}"] = parts[i] t = node.tag_name if t in ("h1", "h2", "h3", "h4", "h5", "h6"): defaults["font-weight"] = "bold" sizes = {"h1": "28px", "h2": "24px", "h3": "20px", "h4": "18px", "h5": "16px", "h6": "14px"} defaults["font-size"] = sizes.get(t, "16px") defaults["margin-top"] = defaults["margin-bottom"] = "15px" elif t in ("b", "strong"): defaults["font-weight"] = "bold" elif t in ("i", "em"): defaults["font-style"] = "italic" elif t == "u": defaults["text-decoration"] = "underline" elif t == "a": if defaults["color"] == "black" or not is_color_visible(defaults["color"]): defaults["color"] = "#0000EE" elif t == "th": defaults["font-weight"] = "bold" defaults["text-align"] = "center" elif t == "p": if _px_to_int(defaults["margin-bottom"]) < 10: defaults["margin-bottom"] = "10px" elif t in ("code", "pre", "tt"): defaults["font-family"] = "Courier New" elif t in ("ul", "ol"): defaults["margin-top"] = "10px" defaults["margin-bottom"] = "10px" defaults["padding-left"] = "30px" elif t == "li": defaults["margin-bottom"] = "5px" if not is_color_visible(defaults.get("color")): defaults["color"] = "black" node.computed_styles = defaults for c in node.children: compute_styles(c, defaults) ############################################################################### # 5) JAVASCRIPT ENGINE - Minimal ############################################################################### class JSEngine: def __init__(self, dom_root, browser=None): self.dom_root = dom_root self.browser = browser self.global_vars = {} def execute_scripts(self): for sc in self._collect_scripts(self.dom_root): try: self._exec(sc) except: pass def _collect_scripts(self, node): arr = [] if node.tag_name == "script" and node.script_code: arr.append(node.script_code) for c in node.children: arr.extend(self._collect_scripts(c)) return arr def _exec(self, sc): sc = re.sub(r'//.*$', '', sc, flags=re.MULTILINE) sc = re.sub(r'/\*.*?\*/', '', sc, flags=re.DOTALL) for line in sc.split(";"): line = line.strip() if line and "=" in line and "==" not in line: parts = line.split("=", 1) var = parts[0].strip() for kw in ("var ", "let ", "const "): if var.startswith(kw): var = var[len(kw):].strip() break val = parts[1].strip() if val.startswith(('"', "'")) and val.endswith(('"', "'")): self.global_vars[var] = val[1:-1] elif val.isdigit(): self.global_vars[var] = int(val) def handle_event(self, node, event_type): if event_type in node.event_handlers: try: self._exec(node.event_handlers[event_type]) except: pass ############################################################################### # 6) LAYOUT ENGINE - With proper font measurement ############################################################################### GLOBAL_MEASURE_CANVAS = None FONT_CACHE = OrderedDict() MEASURE_CACHE = {} LINEHEIGHT_CACHE = {} class LayoutBox: __slots__ = ['dom_node', 'x', 'y', 'width', 'height', 'children', 'style', 'widget', 'is_image', 'is_input', 'is_button', 'is_textarea', 'is_inline'] def __init__(self, node): self.dom_node = node self.x = self.y = self.width = self.height = 0 self.children = [] self.style = { "bold": False, "italic": False, "underline": False, "color": "black", "size": DEFAULT_FONT_SIZE, "family": DEFAULT_FONT_FAMILY, "margin": {"top": 0, "right": 0, "bottom": 0, "left": 0}, "padding": {"top": 0, "right": 0, "bottom": 0, "left": 0}, "background_color": "transparent", "text_align": "left", "valign": "baseline" } self.widget = None self.is_image = self.is_input = self.is_button = self.is_textarea = False self.is_inline = node.is_inline if node else False def get_font(style): weight = "bold" if style.get("bold") else "normal" slant = "italic" if style.get("italic") else "roman" underline = 1 if style.get("underline") else 0 size = max(8, min(72, style.get("size", DEFAULT_FONT_SIZE))) family = style.get("family", DEFAULT_FONT_FAMILY) key = (family, size, weight, slant, underline) if key not in FONT_CACHE: if len(FONT_CACHE) > MAX_FONT_CACHE_SIZE: FONT_CACHE.popitem(last=False) try: FONT_CACHE[key] = Font(family=family, size=size, weight=weight, slant=slant, underline=underline) except: FONT_CACHE[key] = Font(family="Arial", size=size, weight=weight, slant=slant, underline=underline) return FONT_CACHE[key] def measure_text(txt, style): """Measure text width using actual font metrics with caching""" if not txt: return 0 size = style.get("size", DEFAULT_FONT_SIZE) bold = style.get("bold", False) italic = style.get("italic", False) family = style.get("family", DEFAULT_FONT_FAMILY) # Check cache first key = (txt, size, bold, italic, family) if key in MEASURE_CACHE: return MEASURE_CACHE[key] # Use actual font measurement if GLOBAL_MEASURE_CANVAS: f = get_font(style) w = f.measure(txt) else: # Fallback estimation if canvas not available avg_width = size * (0.6 if bold else 0.55) w = len(txt) * avg_width # Cache result (limit cache size) if len(MEASURE_CACHE) < 50000: MEASURE_CACHE[key] = w return w def measure_lineheight(style): """Measure line height using actual font metrics with caching""" size = style.get("size", DEFAULT_FONT_SIZE) bold = style.get("bold", False) italic = style.get("italic", False) family = style.get("family", DEFAULT_FONT_FAMILY) key = (size, bold, italic, family) if key in LINEHEIGHT_CACHE: return LINEHEIGHT_CACHE[key] if GLOBAL_MEASURE_CANVAS: f = get_font(style) h = f.metrics("linespace") else: h = size + 4 LINEHEIGHT_CACHE[key] = h return h def is_inside_link(node): p = node.parent if hasattr(node, 'parent') else None while p: if p.tag_name == "a": return True p = p.parent return False def combine_styles(parent_style, node): s = { "bold": parent_style.get("bold", False), "italic": parent_style.get("italic", False), "underline": False, "color": parent_style.get("color", "black"), "size": parent_style.get("size", DEFAULT_FONT_SIZE), "family": parent_style.get("family", DEFAULT_FONT_FAMILY), "margin": {"top": 0, "right": 0, "bottom": 0, "left": 0}, "padding": {"top": 0, "right": 0, "bottom": 0, "left": 0}, "background_color": "transparent", "text_align": parent_style.get("text_align", "left"), "valign": parent_style.get("valign", "baseline") } cs = getattr(node, 'computed_styles', {}) if cs: color_val = cs.get("color") if color_val: parsed = parse_color(color_val) if parsed and is_color_visible(parsed): s["color"] = parsed fw = cs.get("font-weight", "normal") s["bold"] = fw in ("bold", "bolder", "700", "800", "900") or parent_style.get("bold", False) if cs.get("font-style") == "italic": s["italic"] = True s["size"] = _px_to_int(cs.get("font-size", f"{DEFAULT_FONT_SIZE}px"), DEFAULT_FONT_SIZE) ff = cs.get("font-family", "") if ff: s["family"] = ff.split(",")[0].strip().strip("'\"") bc = cs.get("background-color", "transparent") if bc and bc not in ("transparent", "inherit", "initial"): parsed_bg = parse_color(bc) if parsed_bg: s["background_color"] = parsed_bg s["margin"] = { "top": _px_to_int(cs.get("margin-top", "0")), "right": _px_to_int(cs.get("margin-right", "0")), "bottom": _px_to_int(cs.get("margin-bottom", "0")), "left": _px_to_int(cs.get("margin-left", "0")) } s["padding"] = { "top": _px_to_int(cs.get("padding-top", "0")), "right": _px_to_int(cs.get("padding-right", "0")), "bottom": _px_to_int(cs.get("padding-bottom", "0")), "left": _px_to_int(cs.get("padding-left", "0")) } s["text_align"] = cs.get("text-align", s["text_align"]) s["valign"] = cs.get("vertical-align", s["valign"]) t = node.tag_name if hasattr(node, 'tag_name') else "" if t in ("b", "strong"): s["bold"] = True if t in ("i", "em"): s["italic"] = True if t == "u": s["underline"] = True if t == "a": s["underline"] = True if s["color"] == "black" or not is_color_visible(s["color"]): s["color"] = "#0000EE" elif t == "text" and is_inside_link(node): s["underline"] = True if s["color"] == "black" or not is_color_visible(s["color"]): s["color"] = "#0000EE" if not is_color_visible(s.get("color")): s["color"] = "black" return s def layout_tree(dom_node, container_width=800, offset_x=0, offset_y=0): root_box = LayoutBox(dom_node) root_box.style = { "bold": False, "italic": False, "underline": False, "color": "black", "size": DEFAULT_FONT_SIZE, "family": DEFAULT_FONT_FAMILY, "margin": {"top": 0, "right": 0, "bottom": 0, "left": 0}, "padding": {"top": 0, "right": 0, "bottom": 0, "left": 0}, "background_color": "white", "text_align": "left", "valign": "baseline" } def layout_block(node, pbox, x, y, avail_w, parent_st): box = LayoutBox(node) pbox.children.append(box) st = combine_styles(parent_st, node) box.style = st mt, ml, mr, mb = st["margin"]["top"], st["margin"]["left"], st["margin"]["right"], st["margin"]["bottom"] pt, pl, pr, pb = st["padding"]["top"], st["padding"]["left"], st["padding"]["right"], st["padding"]["bottom"] box.x = x + ml box.y = y + mt content_x = box.x + pl content_y = box.y + pt content_w = max(10, avail_w - ml - mr - pl - pr) tag = node.tag_name if tag in SKIP_ELEMENTS: box.width = box.height = 0 return box cs = getattr(node, 'computed_styles', {}) if cs.get("display") == "none": box.width = box.height = 0 return box if tag == "center": cy = content_y max_w = 0 for child in node.children: cb = layout_block(child, box, content_x, cy, content_w, st) max_w = max(max_w, cb.width) cy = cb.y + cb.height dx = (content_w - max_w) // 2 if dx > 0: def shift(lb, d): lb.x += d for ch in lb.children: shift(ch, d) for ch in box.children: shift(ch, dx) box.width = avail_w - ml - mr box.height = (cy - box.y) + pb + mb return box if tag == "table": layout_table(node, box, content_x, content_y, content_w, st) bh = content_y for c in box.children: bh = max(bh, c.y + c.height) box.width = content_w + pl + pr box.height = (bh - box.y) + pb return box if tag == "hr": box.width = content_w box.height = 2 + mt + mb return box if tag == "img": box.is_image = True wv = node.attributes.get("width", "") or node.styles.get("width", "") hv = node.attributes.get("height", "") or node.styles.get("height", "") w = _px_to_int(wv, 0, content_w) if wv else 0 h = _px_to_int(hv, 0, 800) if hv else 0 explicit_w = bool(wv and w > 0) explicit_h = bool(hv and h > 0) src = node.attributes.get("src", "") if (w == 0 or h == 0) and src: src_lower = src.lower() if any(x in src_lower for x in ("arrow", "vote", "grayarrow", "s.gif", "1x1", "spacer", "triangle", "icon")): w = w or 10 h = h or 10 explicit_w = explicit_h = True else: if w == 0: w = min(content_w - 20, 800) if h == 0: h = 450 elif w == 0: w = min(content_w - 20, 600) elif h == 0: h = int(w * 0.6) w = min(max(1, w), content_w) h = min(max(1, h), 800) box.width = w + pl + pr box.height = h + pt + pb box.style["explicit_w"] = explicit_w box.style["explicit_h"] = explicit_h box.style["target_w"] = w box.style["target_h"] = h return box if tag == "input": input_type = node.attributes.get("type", "text").lower() if input_type == "submit": box.is_button = True box.width = min(120, content_w) box.height = 28 elif input_type in ("checkbox", "radio"): box.is_input = True box.width = box.height = 20 elif input_type == "hidden": box.width = box.height = 0 else: box.is_input = True size = int(node.attributes.get("size", "20")) box.width = min(size * 8 + 16, content_w) box.height = 24 return box if tag == "textarea": box.is_textarea = True cols = int(node.attributes.get("cols", "40")) rows = int(node.attributes.get("rows", "4")) box.width = min(cols * 8, content_w) box.height = rows * 18 return box if tag == "button": box.is_button = True box.width = min(100, content_w) box.height = 28 return box if tag == "br": box.width = 0 box.height = measure_lineheight(st) return box if tag == "li": parent_tag = node.parent.tag_name if node.parent else "" if parent_tag == "ol": idx = sum(1 for s in node.parent.children[:node.parent.children.index(node)+1] if s.tag_name == "li") box.style["list_marker"] = f"{idx}. " else: box.style["list_marker"] = "• " current_y = content_y line_items = [] line_h = 0 line_x = content_x def flush_line(): nonlocal line_items, current_y, line_h, line_x if not line_items: line_x = content_x return line_w = sum(it.width for it in line_items) align = st.get("text_align", "left") shift = 0 if align == "center": shift = (content_w - line_w) // 2 elif align == "right": shift = content_w - line_w if shift > 0: for it in line_items: it.x += shift for it in line_items: it.y = current_y current_y += line_h line_items = [] line_x = content_x line_h = 0 for child in node.children: if child.tag_name in ("script", "style"): continue child_cs = getattr(child, 'computed_styles', {}) if child_cs.get("display") == "none": continue is_block = child.tag_name not in INLINE_ELEMENTS and child.tag_name != "text" if is_block: if line_items: flush_line() cb = layout_block(child, box, content_x, current_y, content_w, st) current_y = cb.y + cb.height else: if child.tag_name == "text": text = child.text if not text: continue # Split into words and whitespace, preserving spaces tokens = re.findall(r'\S+|\s+', text) child_st = combine_styles(st, child) th = measure_lineheight(child_st) for tok in tokens: # Skip leading whitespace on a line if tok.isspace() and not line_items: continue tw = measure_text(tok, child_st) # Wrap if needed (but not for whitespace) if line_x + tw > content_x + content_w and not tok.isspace() and line_items: flush_line() if tok.isspace(): continue tbox = LayoutBox(child) tbox.style = child_st tbox.x = line_x tbox.y = current_y tbox.width = tw tbox.height = th tbox.dom_node = DOMNode("text") tbox.dom_node.text = tok box.children.append(tbox) line_items.append(tbox) line_x += tw line_h = max(line_h, th) else: cbox = layout_inline(child, box, line_x, current_y, content_x + content_w - line_x, st) if line_x + cbox.width > content_x + content_w and line_items: flush_line() cbox.x = line_x line_items.append(cbox) line_x += cbox.width line_h = max(line_h, cbox.height) if line_items: flush_line() if current_y == content_y: current_y = content_y + measure_lineheight(st) box.width = avail_w - ml - mr box.height = (current_y - box.y) + pb + mb return box def layout_inline(node, pbox, x, y, avail_w, parent_st): box = LayoutBox(node) pbox.children.append(box) st = combine_styles(parent_st, node) box.style = st box.is_inline = True box.x = x box.y = y if node.tag_name == "img": box.is_image = True wv = node.attributes.get("width", "") or node.styles.get("width", "") hv = node.attributes.get("height", "") or node.styles.get("height", "") w = _px_to_int(wv, 0, avail_w) if wv else 0 h = _px_to_int(hv, 0, 800) if hv else 0 explicit_w = bool(wv and w > 0) explicit_h = bool(hv and h > 0) src = node.attributes.get("src", "") if (w == 0 or h == 0) and src: src_lower = src.lower() if any(x in src_lower for x in ("arrow", "vote", "grayarrow", "s.gif", "1x1", "spacer", "triangle", "icon")): w = w or 10 h = h or 10 explicit_w = explicit_h = True else: if w == 0: w = min(avail_w - 20, 600) if h == 0: h = 400 elif w == 0: w = min(avail_w - 20, 400) elif h == 0: h = int(w * 0.6) box.width = min(max(1, w), avail_w) box.height = max(1, h) box.style["explicit_w"] = explicit_w box.style["explicit_h"] = explicit_h box.style["target_w"] = box.width box.style["target_h"] = box.height return box if node.tag_name == "br": box.width = 0 box.height = measure_lineheight(st) return box cx = x max_h = measure_lineheight(st) for child in node.children: if child.tag_name == "text": text = child.text if not text: continue tokens = re.findall(r'\S+|\s+', text) child_st = combine_styles(st, child) th = measure_lineheight(child_st) for tok in tokens: tw = measure_text(tok, child_st) tbox = LayoutBox(child) tbox.style = child_st tbox.x = cx tbox.y = y tbox.width = tw tbox.height = th tbox.dom_node = DOMNode("text") tbox.dom_node.text = tok box.children.append(tbox) cx += tw max_h = max(max_h, th) else: cbox = layout_inline(child, box, cx, y, avail_w - (cx - x), st) cx += cbox.width max_h = max(max_h, cbox.height) box.width = cx - x box.height = max_h return box def get_colspan(cell): try: return max(1, int(cell.attributes.get("colspan", "1"))) except: return 1 def layout_table(node, pbox, x, y, w, st): cp = _px_to_int(node.attributes.get("cellpadding", "2")) cs = _px_to_int(node.attributes.get("cellspacing", "2")) rows = [] for c in node.children: if c.tag_name == "tr": rows.append(c) elif c.tag_name in ("tbody", "thead", "tfoot"): rows.extend(r for r in c.children if r.tag_name == "tr") if not rows: pbox.width = w pbox.height = 0 return ncols = max(sum(get_colspan(c) for c in rn.children if c.tag_name in ("td", "th")) for rn in rows) if rows else 0 if ncols == 0: pbox.width = w pbox.height = 0 return col_w = [0] * ncols col_has_content = [False] * ncols for rn in rows: ci = 0 for cell in rn.children: if cell.tag_name not in ("td", "th"): continue colspan = get_colspan(cell) if ci >= ncols: break cell_width = cell.styles.get("width") or cell.attributes.get("width", "") if cell_width and colspan == 1: pw = _px_to_int(cell_width, 0, w) if pw > 0: col_w[ci] = max(col_w[ci], min(pw, w // 2)) text = cell.get_text_content().strip() has_img = any(ch.tag_name == "img" for ch in cell.children) if text or has_img: col_has_content[ci] = True if colspan == 1: est_width = len(text) * 8 + (20 if has_img else 0) + 2 * cp col_w[ci] = max(col_w[ci], min(est_width, w // 2)) ci += colspan for i in range(ncols): if col_w[i] == 0: col_w[i] = 20 if not col_has_content[i] else 50 total_spacing = (ncols + 1) * cs used = sum(col_w) + total_spacing remaining = w - used if remaining > 0: content_cols = [i for i in range(ncols) if col_has_content[i]] if content_cols: extra = remaining // len(content_cols) for i in content_cols: col_w[i] += extra elif ncols > 0: extra = remaining // ncols for i in range(ncols): col_w[i] += extra row_y = y + cs for rn in rows: rbox = LayoutBox(rn) pbox.children.append(rbox) rbox.style = combine_styles(st, rn) rbox.x = x rbox.y = row_y ci = 0 cx = x + cs row_h = 0 for cell in rn.children: if cell.tag_name not in ("td", "th"): continue colspan = get_colspan(cell) if ci + colspan > ncols: colspan = ncols - ci if colspan < 1: break cell_w = sum(col_w[ci:ci+colspan]) + (colspan - 1) * cs cbox = LayoutBox(cell) rbox.children.append(cbox) cbox.style = combine_styles(rbox.style, cell) cbox.x = cx cbox.y = row_y + cs inner_x = cbox.x + cp inner_y = cbox.y + cp inner_w = max(10, cell_w - 2 * cp) cy = inner_y for cc in cell.children: cb = layout_block(cc, cbox, inner_x, cy, inner_w, cbox.style) cy = cb.y + cb.height cell_h = max(cy - (row_y + cs) + cp, measure_lineheight(cbox.style) + 2 * cp) h_attr = cell.styles.get("height") or cell.attributes.get("height", "") if h_attr: cell_h = max(cell_h, _px_to_int(h_attr)) cbox.width = cell_w cbox.height = cell_h cx += cell_w + cs ci += colspan row_h = max(row_h, cell_h) h_attr = rn.styles.get("height") or rn.attributes.get("height", "") if h_attr: row_h = max(row_h, _px_to_int(h_attr)) if row_h == 0: row_h = measure_lineheight(rbox.style) + 2 * cp for cbox in rbox.children: cbox.height = row_h rbox.width = w rbox.height = row_h + cs row_y += rbox.height pbox.width = w pbox.height = row_y - y + cs top_box = layout_block(dom_node, root_box, offset_x, offset_y, container_width, root_box.style) return top_box def find_box_bottom(lb): my = lb.y + lb.height for c in lb.children: my = max(my, find_box_bottom(c)) return my ############################################################################### # 7) RENDERING ############################################################################### class LinkArea: __slots__ = ['x1', 'y1', 'x2', 'y2', 'href'] def __init__(self, x1, y1, x2, y2, href): self.x1, self.y1, self.x2, self.y2, self.href = x1, y1, x2, y2, href def render_layout_box(browser, lb, canvas, widget_list, link_areas, y_min=0, y_max=50000): if lb.y > y_max + 200 or lb.y + lb.height < y_min - 200: return st = lb.style x, y, w, h = lb.x, lb.y, lb.width, lb.height node = lb.dom_node tag = node.tag_name if node else "" if not lb.is_image: bc = st.get("background_color", "transparent") if bc and bc not in ("transparent", "inherit") and w > 0 and h > 0: try: canvas.create_rectangle(x, y, x+w, y+h, fill=bc, outline="", width=0) except: pass if tag == "li": marker = st.get("list_marker", "") if marker: try: f = get_font(st) canvas.create_text(x - 5, y, anchor="ne", text=marker, fill=st.get("color", "black"), font=f) except: pass if tag == "hr": canvas.create_line(x+5, y+h//2, x+w-5, y+h//2, fill="#ccc", width=1) return if lb.is_image: src = node.attributes.get("src", "") if node else "" if src: explicit_w = st.get("explicit_w", False) explicit_h = st.get("explicit_h", False) target_w = st.get("target_w", w) target_h = st.get("target_h", h) browser.draw_image(canvas, src, x, y, target_w, target_h, explicit_w=explicit_w, explicit_h=explicit_h) return if lb.is_button: label = node.attributes.get("value", "Submit") if tag == "input" else (node.get_text_content().strip() or "Submit") btn = tk.Button(canvas, text=label, command=lambda n=node: browser.on_button_click(n), font=("Arial", 9), bg="#f5f5f5", relief=tk.RAISED, padx=4, pady=2) canvas.create_window(x+1, y+1, anchor="nw", window=btn, width=w-2, height=h-2) lb.widget = btn widget_list.append(lb) return if lb.is_input: input_type = node.attributes.get("type", "text").lower() if node else "text" if input_type in ("checkbox", "radio"): var = tk.BooleanVar(value="checked" in node.attributes) cb = tk.Checkbutton(canvas, variable=var, bg="white") if input_type == "checkbox" else tk.Radiobutton(canvas, variable=var, bg="white") cb.var = var canvas.create_window(x, y, anchor="nw", window=cb) lb.widget = cb widget_list.append(lb) elif input_type != "hidden": e_var = tk.StringVar(value=node.attributes.get("value", "")) e = tk.Entry(canvas, textvariable=e_var, font=("Arial", 10), show="*" if input_type == "password" else "") canvas.create_window(x+1, y+1, anchor="nw", window=e, width=w-2, height=h-2) lb.widget = e widget_list.append(lb) return if lb.is_textarea: txt = tk.Text(canvas, font=("Courier New", 9), wrap=tk.WORD) txt.insert("1.0", node.text if node else "") canvas.create_window(x+1, y+1, anchor="nw", window=txt, width=w-2, height=h-2) lb.widget = txt widget_list.append(lb) return if tag == "a": href = node.attributes.get("href", "") if node else "" coords = [] for c in lb.children: render_layout_box(browser, c, canvas, widget_list, link_areas, y_min, y_max) if c.width > 0 and c.height > 0: coords.append((c.x, c.y, c.x + c.width, c.y + c.height)) if coords and href: link_areas.append(LinkArea( min(c[0] for c in coords), min(c[1] for c in coords), max(c[2] for c in coords), max(c[3] for c in coords), href )) return if tag == "text" and node and node.text: try: f = get_font(st) color = st.get("color", "black") text_color = color if is_color_visible(color) else "black" canvas.create_text(x, y, anchor="nw", text=node.text, fill=text_color, font=f) except: pass return for c in lb.children: render_layout_box(browser, c, canvas, widget_list, link_areas, y_min, y_max) ############################################################################### # 8) HELPER ############################################################################### def find_form_ancestor(node): p = node.parent if node else None while p and not p.is_form: p = p.parent return p ############################################################################### # 9) UI COMPONENTS ############################################################################### class ModernButton(tk.Canvas): def __init__(self, parent, text, command=None, width=80, height=30, bg_color="#2c3e50", hover_color="#3498db", text_color="white", font=("Arial", 10, "bold"), **kwargs): super().__init__(parent, width=width, height=height, highlightthickness=0, bg=parent["bg"], **kwargs) self.command = command self.bg_color = bg_color self.hover_color = hover_color self.text_color = text_color self.btn_width = width self.btn_height = height self.text = text self.font = font self._draw(bg_color) self.bind("", lambda e: self._draw(hover_color)) self.bind("", lambda e: self._draw(bg_color)) self.bind("", lambda e: self.command() if self.command else None) def _draw(self, color): self.delete("all") self.create_rectangle(0, 0, self.btn_width, self.btn_height, fill=color, outline="") self.create_text(self.btn_width//2, self.btn_height//2, text=self.text, fill=self.text_color, font=self.font) ############################################################################### # 10) BROWSER ############################################################################### class ToyBrowser: def __init__(self): self.root = tk.Tk() self.root.title("Enhanced Web Browser") self.root.geometry("1100x850") bg = "#f0f2f5" self.root.configure(bg=bg) # Initialize global measurement canvas global GLOBAL_MEASURE_CANVAS GLOBAL_MEASURE_CANVAS = tk.Canvas(self.root) GLOBAL_MEASURE_CANVAS.pack_forget() self.history = [] self.hist_pos = -1 top = tk.Frame(self.root, bg="#2c3e50", pady=5, padx=10) top.pack(side=tk.TOP, fill=tk.X) ModernButton(top, "◀", self.go_back, 40, 30, "#34495e", "#3498db").pack(side=tk.LEFT, padx=2) ModernButton(top, "▶", self.go_fwd, 40, 30, "#34495e", "#3498db").pack(side=tk.LEFT, padx=2) ModernButton(top, "↻", self.refresh, 40, 30, "#34495e", "#3498db").pack(side=tk.LEFT, padx=2) ModernButton(top, "⌂", self.go_home, 40, 30, "#34495e", "#3498db").pack(side=tk.LEFT, padx=5) url_frame = tk.Frame(top, bg="#34495e", padx=2, pady=2, highlightthickness=1, highlightbackground="#1abc9c") url_frame.pack(side=tk.LEFT, fill=tk.X, expand=True, padx=10) self.url_bar = tk.Entry(url_frame, font=("Arial", 12), bg="#ecf0f1", fg="#2c3e50", bd=0) self.url_bar.pack(fill=tk.BOTH, expand=True, ipady=3) self.url_bar.bind("", lambda e: self.on_go()) ModernButton(top, "Go", self.on_go, 50, 30, "#2ecc71", "#27ae60").pack(side=tk.LEFT, padx=5) bm = tk.Frame(self.root, bg="#ecf0f1", padx=10, pady=5) bm.pack(side=tk.TOP, fill=tk.X) ModernButton(bm, "Words", lambda: self.load_url_str("https://justinjackson.ca/words.html"), 80, 28, "#e74c3c", "#c0392b").pack(side=tk.LEFT, padx=5) ModernButton(bm, "Forum", lambda: self.load_url_str("http://162.208.9.114:8081/"), 80, 28, "#9b59b6", "#8e44ad").pack(side=tk.LEFT, padx=5) ModernButton(bm, "HN", lambda: self.load_url_str("https://news.ycombinator.com"), 60, 28, "#f39c12", "#d35400").pack(side=tk.LEFT, padx=5) ModernButton(bm, "Example", lambda: self.load_url_str("http://example.com"), 80, 28, "#1abc9c", "#16a085").pack(side=tk.LEFT, padx=5) frame = tk.Frame(self.root, bg=bg) frame.pack(side=tk.TOP, fill=tk.BOTH, expand=True, padx=10, pady=10) canvas_frame = tk.Frame(frame, bg="white", bd=1, relief=tk.RAISED) canvas_frame.pack(side=tk.LEFT, fill=tk.BOTH, expand=True) self.canvas = tk.Canvas(canvas_frame, bg="white", scrollregion=(0,0,3000,10000), highlightthickness=0) self.canvas.pack(side=tk.LEFT, fill=tk.BOTH, expand=True) self.scroll = ttk.Scrollbar(canvas_frame, orient=tk.VERTICAL, command=self.canvas.yview) self.scroll.pack(side=tk.RIGHT, fill=tk.Y) self.canvas.config(yscrollcommand=self.scroll.set) self.canvas.bind("", lambda e: self.canvas.yview_scroll(int(-1*(e.delta/120)), "units")) self.canvas.bind("", lambda e: self.canvas.yview_scroll(-3, "units")) self.canvas.bind("", lambda e: self.canvas.yview_scroll(3, "units")) self.canvas.bind("", self.on_click) self.canvas.bind("", self.on_motion) self.root.bind("", lambda e: self.refresh()) self.root.bind("", lambda e: self.refresh()) self.status = tk.Label(self.root, text="Ready", bd=1, relief=tk.SUNKEN, anchor=tk.W, bg="#34495e", fg="white", font=("Arial", 9)) self.status.pack(side=tk.BOTTOM, fill=tk.X) self.images_cache = [] self.image_url_cache = {} self.current_dom = None self.layout_root = None self.current_url_obj = None self.form_widgets = [] self.link_areas = [] self.css_rules = [] self.css_index = None self.js_engine = None self.loading = False self.image_executor = ThreadPoolExecutor(max_workers=4) def go_home(self): self.load_url_str("https://news.ycombinator.com") def refresh(self): if self.current_url_obj: self.load_url(self.current_url_obj) def go_back(self): if self.hist_pos > 0: self.hist_pos -= 1 self.load_url_str(self.history[self.hist_pos], False) def go_fwd(self): if self.hist_pos < len(self.history) - 1: self.hist_pos += 1 self.load_url_str(self.history[self.hist_pos], False) def on_go(self): raw = self.url_bar.get().strip() if not raw: return if not raw.startswith(("http://", "https://")): raw = "https://" + raw if "." in raw else f"https://{raw}.com" self.load_url_str(raw, True) def load_url_str(self, url_s, push=True): try: purl = parse_url(url_s) except Exception as e: self.show_error(str(e)) return self.load_url(purl) if push: self.history = self.history[:self.hist_pos+1] self.history.append(url_s) self.hist_pos += 1 def load_url(self, url_obj, method="GET", body="", headers=None): # Clear caches for fresh measurement global MEASURE_CACHE, LINEHEIGHT_CACHE MEASURE_CACHE.clear() LINEHEIGHT_CACHE.clear() if headers is None: headers = {} self.loading = True start = time.time() try: self.status.config(text=f"Connecting to {url_obj.host}...") self.root.update_idletasks() rh, rb, fu = http_request(url_obj, method, headers, body) except Exception as e: self.loading = False self.show_error(str(e)) return self.url_bar.delete(0, tk.END) self.url_bar.insert(0, str(fu)) self.current_url_obj = fu ctype = rh.get("content-type", "").lower() encoding = "utf-8" if "charset=" in ctype: encoding = ctype.split("charset=")[-1].split(";")[0].strip() try: text = rb.decode(encoding, "replace") except: text = rb.decode("utf-8", "replace") if "text/html" in ctype or " target_w: scale = target_w / final_w final_w = int(final_w * scale) final_h = int(final_h * scale) if final_h > target_h: scale = target_h / final_h final_w = int(final_w * scale) final_h = int(final_h * scale) final_w = max(1, final_w) final_h = max(1, final_h) if im.width != final_w or im.height != final_h: im = im.resize((final_w, final_h), Image.Resampling.LANCZOS) tkimg = ImageTk.PhotoImage(im) def draw(): self.images_cache.append(tkimg) self.image_url_cache[cache_key] = tkimg canvas.create_image(x, y, anchor="nw", image=tkimg) self.root.after(0, draw) except Exception: def draw_err(): if w and w <= 20 and h and h <= 20: canvas.create_text(x + w//2, y + h//2, text="▲", fill="#828282", font=("Arial", 7)) else: canvas.create_rectangle(x, y, x+(w or 20), y+(h or 20), fill="#f8f8f8", outline="#ddd") self.root.after(0, draw_err) self.image_executor.submit(worker) def on_click(self, event): cx, cy = self.canvas.canvasx(event.x), self.canvas.canvasy(event.y) for la in self.link_areas: if la.x1 <= cx <= la.x2 and la.y1 <= cy <= la.y2: if not la.href.startswith("#"): self.load_url_str(str(resolve_url(self.current_url_obj, la.href)) if self.current_url_obj else la.href) break def on_motion(self, event): cx, cy = self.canvas.canvasx(event.x), self.canvas.canvasy(event.y) for la in self.link_areas: if la.x1 <= cx <= la.x2 and la.y1 <= cy <= la.y2: self.canvas.config(cursor="hand2") self.status.config(text=la.href) return self.canvas.config(cursor="") if not self.loading: self.status.config(text="Ready") def show_error(self, msg): self.canvas.delete("all") self.link_areas.clear() self.images_cache.clear() self.canvas.create_rectangle(50, 50, 650, 250, fill="#fff0f0", outline="#c00") self.canvas.create_text(60, 70, text="Error Loading Page", font=("Arial", 16, "bold"), fill="#c00", anchor="nw") self.canvas.create_text(60, 110, text=msg, font=("Arial", 11), fill="#800", anchor="nw", width=570) self.status.config(text=f"Error: {msg[:60]}...") def run(self): self.root.mainloop() if __name__ == "__main__": sys.setrecursionlimit(10**6) ToyBrowser().run()