import socket import tkinter as tk import struct import warnings from urllib3.exceptions import InsecureRequestWarning # Suppress "InsecureRequestWarning" for unverified HTTPS requests warnings.simplefilter('ignore', InsecureRequestWarning) import random import sys import traceback import urllib.parse import requests from tkinter.font import Font from PIL import Image, ImageTk ############################################################################### # 1) DNS + URL PARSING ############################################################################### def resolve_hostname_dns(hostname, dns_server="8.8.8.8", port=53, timeout=5): """ If 'hostname' is numeric, skip DNS. Otherwise do a naive DNS A-record lookup. """ hostname = hostname.strip() try: socket.inet_aton(hostname) # numeric => skip DNS return hostname except OSError: pass tid = random.randint(0,65535) header = struct.pack(">HHHHHH", tid, 0x0100, 1, 0, 0, 0) qname = b"" for part in hostname.split("."): qname += bytes([len(part)]) + part.encode("ascii") question = qname + b"\x00" + struct.pack(">HH", 1, 1) query = header + question s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) s.settimeout(timeout) try: s.sendto(query, (dns_server, port)) data, _ = s.recvfrom(512) except: s.close() return None s.close() resp_tid, flags, qdcount, ancount, nscount, arcount = struct.unpack(">HHHHHH", data[:12]) if resp_tid != tid: return None idx = 12 # skip question while data[idx] != 0: idx += 1 idx += 1 idx += 4 ip_addr = None for _ in range(ancount): if data[idx] & 0xC0 == 0xC0: idx += 2 else: while data[idx] != 0: idx += 1 idx += 1 rtype, rclass, rttl, rdlength = struct.unpack(">HHIH", data[idx:idx+10]) idx += 10 if rtype == 1 and rclass == 1 and rdlength == 4: ip_bytes = data[idx:idx+4] ip_addr = ".".join(map(str, ip_bytes)) break idx += rdlength return ip_addr class ParsedURL: def __init__(self, scheme="http", host="", port=80, path="/"): self.scheme = scheme self.host = host self.port = port self.path = path def parse_url(url): """ Minimal parse => scheme://host[:port]/path scheme= http => default port=80, https => default=443 """ url = url.strip() scheme = "http" if url.startswith("http://"): after = url[7:] scheme = "http" elif url.startswith("https://"): after = url[8:] scheme = "https" else: after = url slash = after.find("/") if slash == -1: host_port = after path = "/" else: host_port = after[:slash] path = after[slash:] or "/" if ":" in host_port: h,p = host_port.split(":",1) port = int(p) host = h else: host = host_port port = 443 if scheme=="https" else 80 return ParsedURL(scheme, host.strip(), port, path) ############################################################################### # 2) HTTP with chunked decode + manual 3xx ############################################################################### def http_request(url_obj, method="GET", headers=None, body="", max_redirects=10): if headers is None: headers={} cur_url = url_obj cur_method = method cur_body = body for _ in range(max_redirects): r_headers, r_body, r_url = _single_http_request(cur_url, cur_method, headers, cur_body) status_code = int(r_headers.get(":status_code","0")) if status_code in (301, 302, 303, 307, 308): location = r_headers.get("location","") if not location: return r_headers, r_body, r_url new_url = parse_url(location) if status_code in (302,303): cur_method="GET" cur_body="" cur_url=new_url else: return r_headers, r_body, r_url return r_headers, r_body, r_url def _single_http_request(url_obj, method="GET", headers=None, body=""): if url_obj.scheme=="https": return _requests_https(url_obj, method, headers, body) else: return _raw_http(url_obj, method, headers, body) def _requests_https(url_obj, method="GET", headers=None, body=""): import requests if headers is None: headers={} final_h={} for k,v in headers.items(): if k.lower() not in ["host","content-length"]: final_h[k]=v if url_obj.port != 443: full_url = f"https://{url_obj.host}:{url_obj.port}{url_obj.path}" else: full_url = f"https://{url_obj.host}{url_obj.path}" resp = requests.request( method=method, url=full_url, headers=final_h, data=body.encode("utf-8") if body else None, allow_redirects=False, verify=False ) r_h = {":status_code": str(resp.status_code)} for k,v in resp.headers.items(): r_h[k.lower()] = v return r_h, resp.content, url_obj def _raw_http(url_obj, method="GET", headers=None, body=""): if headers is None: headers={} ip_addr = resolve_hostname_dns(url_obj.host) if not ip_addr: raise Exception(f"DNS fail => {url_obj.host}") import socket sock=socket.socket(socket.AF_INET, socket.SOCK_STREAM) sock.connect((ip_addr, url_obj.port)) lines = [ f"{method} {url_obj.path} HTTP/1.1", f"Host: {url_obj.host}" ] for k,v in headers.items(): if k.lower()!="host": lines.append(f"{k}: {v}") lines.append("Connection: close") lines.append(f"Content-Length: {len(body)}") lines.append("") req_str="\r\n".join(lines)+"\r\n"+body sock.sendall(req_str.encode("utf-8")) response=b"" while True: chunk=sock.recv(4096) if not chunk: break response+=chunk sock.close() hd_end = response.find(b"\r\n\r\n") if hd_end == -1: return {}, b"", url_obj raw_header = response[:hd_end].decode("utf-8","replace") raw_body = response[hd_end+4:] lines=raw_header.split("\r\n") st_line=lines[0] parts = st_line.split(" ",2) headers_dict={} if len(parts)>=2: headers_dict[":status_code"] = parts[1] for line in lines[1:]: if ":" in line: kk,vv=line.split(":",1) headers_dict[kk.strip().lower()] = vv.strip() te = headers_dict.get("transfer-encoding","").lower() if "chunked" in te: raw_body = decode_chunked_body(raw_body) return headers_dict, raw_body, url_obj def decode_chunked_body(rb): i=0 decoded=b"" while True: newline=rb.find(b"\r\n", i) if newline==-1: break chunk_size_hex=rb[i:newline].decode("utf-8","replace").strip() i=newline+2 try: chunk_size=int(chunk_size_hex,16) except: chunk_size=0 if chunk_size==0: break chunk_data=rb[i:i+chunk_size] decoded+=chunk_data i+=chunk_size if rb[i:i+2]==b"\r\n": i+=2 return decoded ############################################################################### # 3) DOM Node ############################################################################### class DOMNode: def __init__(self, tag_name="document", parent=None): self.tag_name=tag_name.lower() self.attributes={} self.children=[] self.parent=parent self.text="" self.styles={} self.inline_css="" self.script_code="" self.is_form=(self.tag_name=="form") self.method="get" self.action="" self.form_fields={} def __repr__(self): return f"" def parse_html(ht): i=0 root=DOMNode("document") current=root tb=[] while i",i) if close_i==-1: break tag_c=ht[i+1:close_i].strip() # Check if it's a close tag if tag_c.startswith("/"): close_tag=tag_c[1:].lower() if current.tag_name==close_tag and current.parent: current=current.parent i=close_i+1 continue parts=tag_c.split() tname=parts[0].lower() nd=DOMNode(tname,parent=current) for ap in parts[1:]: eq=ap.find("=") if eq!=-1: an=ap[:eq].lower() av=ap[eq+1:].strip("\"' ") nd.attributes[an]=av if tname=="form": nd.is_form=True nd.method=nd.attributes.get("method","get").lower() nd.action=nd.attributes.get("action","") current.children.append(nd) # handle self-closing or special tags: if tname in ["br","hr","meta","link","img","input"]: # 'img' => no children if tname=="input": nm=nd.attributes.get("name","") val=nd.attributes.get("value","") fa=current while fa and not fa.is_form: fa=fa.parent if fa and nm: fa.form_fields[nm]=[val,nd] i=close_i+1 continue # handle same as <script>, <style>, <textarea> => read until closing tag elif tname=="title": title_close=ht.find("",close_i+1) if title_close==-1: i=len(ht) continue cont=ht[close_i+1:title_close] nd.text=cont i=title_close+len("") continue elif tname=="textarea": textarea_close=ht.find("",close_i+1) if textarea_close==-1: i=len(ht) continue cont=ht[close_i+1:textarea_close] nd.text=cont fa=current while fa and not fa.is_form: fa=fa.parent nm=nd.attributes.get("name","") if fa and nm: fa.form_fields[nm]=[cont,nd] i=textarea_close+len("") continue elif tname=="style": style_close=ht.find("",close_i+1) if style_close==-1: i=len(ht) continue st=ht[close_i+1:style_close] nd.inline_css=st i=style_close+len("") continue elif tname=="script": script_close=ht.find("",close_i+1) if script_close==-1: i=len(ht) continue sc=ht[close_i+1:script_close] nd.script_code=sc i=script_close+len("") continue else: # normal open tag => descend current=nd i=close_i+1 else: tb.append(ht[i]) i+=1 if tb: leftover="".join(tb) if leftover.strip(): tn=DOMNode(parent=current) tn.text=leftover current.children.append(tn) return root ############################################################################### # 4) CSS + apply ############################################################################### def parse_css(css_text): rules={} i=0 while i bounding-box link approach + fix for child text ############################################################################### class LinkArea: def __init__(self,x1,y1,x2,y2,href): self.x1=x1 self.y1=y1 self.x2=x2 self.y2=y2 self.href=href def gather_text_for_a(node): """ Recursively gather child text if has child nodes. If total is empty, fallback to href or '(link)'. """ if not node.children: return node.text pieces=[node.text] for c in node.children: pieces.append(gather_text_for_a(c)) return "".join(pieces) def render_layout_box(browser, layout_box, canvas, widget_list, link_areas): node=layout_box.dom_node st=layout_box.style x=layout_box.x y=layout_box.y w=layout_box.width h=layout_box.height if layout_box.is_image: src=node.attributes.get("src","") if src: browser.draw_image(canvas, src, x, y) return if layout_box.is_button: if node.tag_name=="button": label=node.text.strip() or "Submit" else: label=node.attributes.get("value","Submit") b=tk.Button(canvas, text=label, command=lambda:browser.on_button_click(node)) canvas.create_window(x+5, y+5, anchor="nw", window=b, width=w-10, height=h-10) layout_box.widget=b widget_list.append(layout_box) return if layout_box.is_input: inp_t=node.attributes.get("type","text").lower() if inp_t=="checkbox": var=tk.BooleanVar(value=False) if "checked" in node.attributes: var.set(True) nm=node.attributes.get("name","") cb=tk.Checkbutton(canvas, text=nm, variable=var) cb.var=var canvas.create_window(x+5, y+5, anchor="nw", window=cb, width=w-10, height=h-10) layout_box.widget=cb widget_list.append(layout_box) return else: e_var=tk.StringVar() nm=node.attributes.get("name","") fa=find_form_ancestor(node) if fa and nm in fa.form_fields: old_val,_=fa.form_fields[nm] e_var.set(old_val) e=tk.Entry(canvas, textvariable=e_var) canvas.create_window(x+5, y+5, anchor="nw", window=e, width=w-10, height=h-10) layout_box.widget=e widget_list.append(layout_box) return if layout_box.is_textarea: txt=tk.Text(canvas, width=40, height=4) nm=node.attributes.get("name","") fa=find_form_ancestor(node) old_val="" if fa and nm in fa.form_fields: old_val,_=fa.form_fields[nm] if node.text.strip() and not old_val: old_val=node.text.strip() txt.insert("1.0", old_val) canvas.create_window(x+5, y+5, anchor="nw", window=txt, width=w-10, height=h-10) layout_box.widget=txt widget_list.append(layout_box) return from tkinter.font import Font col=st["color"] wght="normal" slnt="roman" und=0 if st["bold"]: wght="bold" if st["italic"]: slnt="italic" if st["underline"]: und=1 f=Font(family="Arial", size=st["size"], weight=wght, slant=slnt, underline=und) if node.tag_name=="a": link_text = gather_text_for_a(node).strip() if not link_text: link_text=node.attributes.get("href","(link)") if link_text: tid=canvas.create_text(x+5,y+5,anchor="nw",text=link_text,fill=col,font=f) canvas.update_idletasks() bbox=canvas.bbox(tid) if bbox: x1,y1,x2,y2=bbox href=node.attributes.get("href","") link_areas.append(LinkArea(x1,y1,x2,y2,href)) # also render any nested children inside for c in layout_box.children: render_layout_box(browser,c,canvas,widget_list,link_areas) else: txt=node.text.strip() if txt: canvas.create_text(x+5,y+5,anchor="nw",text=txt,fill=col,font=f) for c in layout_box.children: render_layout_box(browser,c,canvas,widget_list,link_areas) ############################################################################### # 8) BROWSER => parse properly; if found => update title ############################################################################### class ToyBrowser: def __init__(self): self.root=tk.Tk() self.root.title("Toy Browser") self.history=[] self.hist_pos=-1 top_line=tk.Frame(self.root) top_line.pack(side=tk.TOP, fill=tk.X) self.back_btn=tk.Button(top_line,text="Back",command=self.go_back) self.back_btn.pack(side=tk.LEFT) self.fwd_btn=tk.Button(top_line,text="Fwd",command=self.go_fwd) self.fwd_btn.pack(side=tk.LEFT) self.url_bar=tk.Entry(top_line) self.url_bar.pack(side=tk.LEFT, fill=tk.X, expand=True) self.url_bar.bind("<Return>", self.on_url_enter) self.go_btn=tk.Button(top_line,text="Go",command=self.on_go_click) self.go_btn.pack(side=tk.LEFT) bm_line=tk.Frame(self.root) bm_line.pack(side=tk.TOP, fill=tk.X) self.bm1=tk.Button(bm_line,text="This is a web page",command=self.bookmark_page1) self.bm1.pack(side=tk.LEFT) self.bm2=tk.Button(bm_line,text="Founder's forum (wait 30s...)",command=self.bookmark_page2) self.bm2.pack(side=tk.LEFT) self.bm3=tk.Button(bm_line,text="Hacker News",command=self.bookmark_page3) self.bm3.pack(side=tk.LEFT) self.frame=tk.Frame(self.root) self.frame.pack(side=tk.TOP, fill=tk.BOTH, expand=True) self.canvas=tk.Canvas(self.frame,bg="white",scrollregion=(0,0,3000,3000)) self.canvas.pack(side=tk.LEFT, fill=tk.BOTH, expand=True) self.scroll=tk.Scrollbar(self.frame,orient=tk.VERTICAL,command=self.canvas.yview) self.scroll.pack(side=tk.RIGHT, fill=tk.Y) self.canvas.config(yscrollcommand=self.scroll.set) self.canvas.bind("<MouseWheel>", self.on_mousewheel_win) self.canvas.bind("<Button-4>", self.on_mousewheel_lin) self.canvas.bind("<Button-5>", self.on_mousewheel_lin) self.canvas.bind("<Button-1>", self.on_canvas_click) self.images_cache=[] self.current_dom=None self.layout_root=None self.current_url_obj=None self.form_widgets=[] self.link_areas=[] def on_url_enter(self,evt): self.on_go_click() def bookmark_page1(self): self.url_bar.delete(0,tk.END) self.url_bar.insert(0,"https://justinjackson.ca/words.html") self.on_go_click() def bookmark_page2(self): self.url_bar.delete(0,tk.END) self.url_bar.insert(0,"http://162.208.9.114:8081/") self.on_go_click() def bookmark_page3(self): self.url_bar.delete(0,tk.END) self.url_bar.insert(0,"https://news.ycombinator.com") self.on_go_click() def go_back(self): if self.hist_pos>0: self.hist_pos-=1 url_s=self.history[self.hist_pos] self.load_url_str(url_s, push_hist=False) def go_fwd(self): if self.hist_pos<len(self.history)-1: self.hist_pos+=1 url_s=self.history[self.hist_pos] self.load_url_str(url_s, push_hist=False) def on_go_click(self): raw_url=self.url_bar.get().strip() self.load_url_str(raw_url, True) def load_url_str(self,url_s, push_hist=True): try: purl=parse_url(url_s) except Exception as e: self.show_error(f"parse_url => {e}") return self.load_url(purl,"GET","",{}) if push_hist: self.history=self.history[:self.hist_pos+1] self.history.append(url_s) self.hist_pos+=1 def load_url(self, url_obj, method="GET", body="", extra_headers=None): if extra_headers is None: extra_headers={} try: rh, rb, fu = http_request(url_obj, method, extra_headers, body) except Exception as e: self.show_error(str(e)) return if (fu.scheme=="http" and fu.port==80) or (fu.scheme=="https" and fu.port==443): final_url = f"{fu.scheme}://{fu.host}{fu.path}" else: final_url = f"{fu.scheme}://{fu.host}:{fu.port}{fu.path}" self.url_bar.delete(0,tk.END) self.url_bar.insert(0,final_url) self.current_url_obj = fu ctype=rh.get("content-type","").lower() encoding="utf-8" if "charset=" in ctype: encp=ctype.split("charset=")[-1].split(";")[0].strip() encoding=encp try: text_data=rb.decode(encoding,"replace") except: text_data=rb.decode("utf-8","replace") if "text/html" in ctype: dom=parse_html(text_data) self.current_dom=dom self.canvas.delete("all") self.form_widgets.clear() self.link_areas.clear() ccss=self.collect_css(dom, fu) ccss+=self.collect_inline(dom) crules=parse_css(ccss) apply_css(dom, crules) self.layout_root=layout_tree(dom,800,0,0,25) render_layout_box(self,self.layout_root,self.canvas,self.form_widgets,self.link_areas) self.canvas.yview_moveto(0.0) # Now, let's see if we found <title> title_node = self.find_title(dom) if title_node and title_node.text.strip(): new_t = title_node.text.strip() self.root.title(new_t) else: self.canvas.delete("all") self.canvas.create_text(10,10,anchor="nw",text=text_data,fill="black",font=("Arial",12)) def collect_css(self,dom_node, base_url_obj): s="" if dom_node.tag_name=="link": if dom_node.attributes.get("rel","").lower()=="stylesheet": href=dom_node.attributes.get("href","") if href and not href.startswith("http"): if not href.startswith("/"): slash=base_url_obj.path.rfind("/") bd=base_url_obj.path[:slash] if slash>0 else "" href=f"{base_url_obj.scheme}://{base_url_obj.host}:{base_url_obj.port}{bd}/{href}" else: href=f"{base_url_obj.scheme}://{base_url_obj.host}:{base_url_obj.port}{href}" try: newu=parse_url(href) hh,bb,_=http_request(newu,"GET") s+=bb.decode("utf-8","replace")+"\n" except: pass for c in dom_node.children: s+=self.collect_css(c, base_url_obj) return s def collect_inline(self,dom_node): c=dom_node.inline_css for ch in dom_node.children: c+="\n"+self.collect_inline(ch) return c def on_button_click(self,node): fa=find_form_ancestor(node) if fa: self.submit_form(fa) def submit_form(self,form_node): method=form_node.method.upper() action=form_node.action.strip() if action.startswith("/"): if self.current_url_obj: s=self.current_url_obj.scheme h=self.current_url_obj.host p=self.current_url_obj.port action=f"{s}://{h}:{p}{action}" if not action: if self.current_url_obj: s=self.current_url_obj.scheme h=self.current_url_obj.host p=self.current_url_obj.port path=self.current_url_obj.path action=f"{s}://{h}:{p}{path}" else: action="http://127.0.0.1/" form_data=[] for nm,(old_val,nref) in form_node.form_fields.items(): lb=self.find_layout_box_for_node(self.layout_root,nref) typed_val=old_val if lb and lb.widget: t=nref.attributes.get("type","").lower() if t=="checkbox": c=lb.widget.var.get() if c: va=nref.attributes.get("value","on") typed_val=va else: continue else: import tkinter if isinstance(lb.widget,tkinter.Entry): typed_val=lb.widget.get() elif isinstance(lb.widget,tkinter.Text): typed_val=lb.widget.get("1.0","end-1c") encn=urllib.parse.quote_plus(nm) encv=urllib.parse.quote_plus(typed_val) form_data.append(f"{encn}={encv}") q_str="&".join(form_data) if method=="GET": if "?" in action: new_url=action+"&"+q_str else: new_url=action+"?"+q_str self.load_url_str(new_url) else: newu=parse_url(action) hh={"Content-Type":"application/x-www-form-urlencoded"} self.load_url(newu,"POST",q_str,hh) def find_layout_box_for_node(self, layout_box,node): if layout_box.dom_node is node: return layout_box for c in layout_box.children: f=self.find_layout_box_for_node(c,node) if f: return f return None def draw_image(self,canvas, src, x, y): if not src: return absu=self.make_absolute_url(src) try: hh,bb,_=http_request(absu,"GET") import io im=Image.open(io.BytesIO(bb)) tkimg=ImageTk.PhotoImage(im) self.images_cache.append(tkimg) self.canvas.create_image(x+5,y+5,anchor="nw",image=tkimg) except: pass def make_absolute_url(self, raw_url): """ If link doesn't start with http:// or https://, interpret relative to self.current_url_obj. So relative links & images also work. """ if raw_url.startswith("http://") or raw_url.startswith("https://"): return parse_url(raw_url) if not self.current_url_obj: return parse_url(raw_url) if raw_url.startswith("/"): return ParsedURL(self.current_url_obj.scheme, self.current_url_obj.host, self.current_url_obj.port, raw_url) basep=self.current_url_obj.path slash=basep.rfind("/") if slash==-1: base_dir="" else: base_dir=basep[:slash] new_path=base_dir+"/"+raw_url return ParsedURL(self.current_url_obj.scheme, self.current_url_obj.host, self.current_url_obj.port, new_path) def on_canvas_click(self,event): """ On link click => bounding box => interpret relative => open link """ cx=self.canvas.canvasx(event.x) cy=self.canvas.canvasy(event.y) for la in self.link_areas: if la.x1<=cx<=la.x2 and la.y1<=cy<=la.y2: abs_link=self.make_absolute_url(la.href) final_str=self.url_to_string(abs_link) self.load_url_str(final_str) break def url_to_string(self,purl): if (purl.scheme=="http" and purl.port==80) or (purl.scheme=="https" and purl.port==443): return f"{purl.scheme}://{purl.host}{purl.path}" else: return f"{purl.scheme}://{purl.host}:{purl.port}{purl.path}" def on_mousewheel_win(self,event): self.canvas.yview_scroll(int(-1*(event.delta/120)), "units") def on_mousewheel_lin(self,event): if event.num==4: self.canvas.yview_scroll(-1, "units") else: self.canvas.yview_scroll(1, "units") def show_error(self,msg): self.canvas.delete("all") self.link_areas.clear() self.images_cache.clear() self.canvas.create_text(10,10,anchor="nw",text=f"Error: {msg}",fill="red") def find_title(self,node): """ Check node's tag => if it's 'title', return that node. Otherwise recursively check children. """ if node.tag_name=="title": return node for ch in node.children: found=self.find_title(ch) if found: return found return None def run(self): self.root.mainloop() ############################################################################### # main ############################################################################### if __name__=="__main__": sys.setrecursionlimit(10**6) app=ToyBrowser() app.run()