Dump Browser HAR Content › har_dump.py | hackitude
Wed Feb 19 2025 09:43:39 GMT+0000 (Coordinated Universal Time)
Saved by @rhce143
#!/usr/bin/env python3 """ Extract responses from a HAR file, which can be exported by browser dev tools. Files are written for URLs that match a certain regular expression. """ # XXX: Warning - Quick best effort script, i.e. no proper error handling. import re import sys import json import base64 import argparse from typing import Dict, Iterator, Tuple, Optional def parse_har_log(filename: str) -> Iterator[Tuple[Dict, Dict]]: with open(filename, "r") as fp: har = json.load(fp) if not isinstance(har, dict) or \ "log" not in har or not isinstance(har["log"], dict)\ or "entries" not in har["log"] or not isinstance(har["log"]["entries"], list): raise ValueError(f"No/invalid log entries in {filename}") yield from ((_["request"], _["response"]) for _ in har["log"]["entries"]) def match_request(request: Dict, url_rx: re.Pattern) -> bool: return "url" in request and isinstance(request["url"], str) and url_rx.match(request["url"]) is not None def match_response(response: Dict) -> bool: return "status" in response and isinstance(response["status"], int) and response["status"] == 200 def dump_response(response: Dict, filename: str) -> bool: if "content" in response and isinstance(response["content"], dict): content: Dict = response["content"] if "text" in content and isinstance(content["text"], str): if "encoding" not in content: data: bytes = content["text"].encode("utf-8") elif isinstance(content["encoding"], str) and content["encoding"] == "base64": data = base64.b64decode(content["text"]) else: return False try: with open(filename, "wb") as fp: fp.write(data) return True except OSError: return False return False def main() -> int: parser = argparse.ArgumentParser(description=__doc__) parser.add_argument("--url", type=str, default=None, help="regular expression for the request url, no response content dump otherwise") parser.add_argument("file", metavar="archive.har", help="http archive file to process") args = parser.parse_args() filename: str = args.file url_rx: Optional[re.Pattern] = re.compile(args.url) if args.url is not None else None for request, response in parse_har_log(filename): url: str = request["url"] fn: str = re.sub('[^a-zA-Z0-9_-]', '_', url) if url_rx is not None and match_request(request, url_rx) and match_response(response): if dump_response(response, fn): print(f"DUMP: {url}") else: print(f"FAIL: {url}") else: print(f"SKIP: {url}") return 0 if __name__ == "__main__": sys.exit(main())
Comments