#!/usr/bin/env python3 """ make_jsons.py ============= 1. From a tree-style directory listing (with summaries after an en-dash “–”) produce .json : { "full/file/path": "summary", ... } 2. From a “concatenated source” file that looks like ================================ path/to/file.tsx -------------------------------- ...file content... produce .json : { "full/file/path": "", ... } 3. Checks that the key-sets of both JSON files are identical and prints any filenames that are missing in either mapping. --------------------------------------------------------------------------- USAGE ----- python make_jsons.py tree.txt bundle.txt summaries.json contents.json where • tree.txt – your original `tree` output with summaries • bundle.txt – the big text file with `=== / ---` separators + file bodies • summaries.json, contents.json – output files --------------------------------------------------------------------------- """ import json import re import sys from pathlib import Path INDENT_WIDTH = 4 # one indent level = 4 glyphs ("│ " or " ") EN_DASH_SPLIT = re.compile(r"\s+–\s+") # space–space delimiter # --------------------------------------------------------------------------- # # Part 1 – Parse the `tree` listing # --------------------------------------------------------------------------- # def parse_tree_listing(lines): """Yield (depth, name, summary_or_None) for each meaningful line.""" for raw in lines: if not raw.strip(): continue # Strip the "tree art" section up to the first '── ' m = re.search(r"[├└]──\s*", raw) if m: indent_prefix = raw[:m.start()] content = raw[m.end():].rstrip() else: # root line without glyphs indent_prefix = "" content = raw.strip() depth = len(indent_prefix) // INDENT_WIDTH # Split if "–" in content: name, summary = EN_DASH_SPLIT.split(content, maxsplit=1) summary = summary.strip() else: name, summary = content, None yield depth, name.strip(), summary def build_summary_map(tree_path: Path) -> dict: with tree_path.open(encoding="utf-8") as fh: lines = fh.readlines() stack, mapping = [], {} for depth, name, summary in parse_tree_listing(lines): stack = stack[:depth] stack.append(name) if summary: # directories have no summary full_path = "/".join(stack) mapping[full_path] = summary return mapping # --------------------------------------------------------------------------- # # Part 2 – Parse the “bundle” file that has file bodies # --------------------------------------------------------------------------- # SEP_EQ = re.compile(r"^=+\s*$") # line of only '=' chars SEP_DASH = re.compile(r"^-{3,}\s*$") # line of only '-' chars (3+) def parse_bundle_file(bundle_path: Path) -> dict: """ Return { "full/file/path": "", ... }. The expected pattern is: ======== (80 × '=') ======== path/to/file.ext --- (dashes) --- ======== (next file...) Everything up to (but **excluding**) the next line of '=' is considered file content. """ mapping = {} lines = bundle_path.read_text(encoding="utf-8").splitlines() i = 0 n = len(lines) while i < n: # 1) Find next "====" while i < n and not SEP_EQ.match(lines[i]): i += 1 if i >= n: break i += 1 # move past the "====" line # 2) Skip blank lines, then grab the filepath line while i < n and not lines[i].strip(): i += 1 if i >= n: break filepath = lines[i].strip() i += 1 # 3) Skip the '----' separator while i < n and not SEP_DASH.match(lines[i]): i += 1 if i < n: i += 1 # past the '----' # 4) Gather content until next '====' content_lines = [] while i < n and not SEP_EQ.match(lines[i]): content_lines.append(lines[i]) i += 1 mapping[filepath] = "\n".join(content_lines).rstrip("\n") return mapping # --------------------------------------------------------------------------- # # Part 3 – Writing JSON + consistency check # --------------------------------------------------------------------------- # def write_json(obj: dict, out_path: Path): with out_path.open("w", encoding="utf-8") as fh: json.dump(obj, fh, indent=2, ensure_ascii=False) print(f"✔ Wrote {len(obj):,} entries → {out_path}") def compare_keys(map1: dict, map2: dict): keys1, keys2 = set(map1), set(map2) if keys1 == keys2: print("🎉 SUCCESS – both JSONs reference the exact same filenames.") return True only_in_1 = sorted(keys1 - keys2) only_in_2 = sorted(keys2 - keys1) if only_in_1: print("\n⚠️ Present in summaries but missing in contents:") for k in only_in_1: print(" ", k) if only_in_2: print("\n⚠️ Present in contents but missing in summaries:") for k in only_in_2: print(" ", k) print( f"\n✖ Mismatch – summaries: {len(keys1)} paths, " f"contents: {len(keys2)} paths." ) return False # --------------------------------------------------------------------------- # def main(): if len(sys.argv) != 5: sys.exit( "USAGE:\n" " python make_jsons.py " " " ) tree_txt, bundle_txt, summaries_json, contents_json = map(Path, sys.argv[1:]) print("• Building summary mapping …") summary_map = build_summary_map(tree_txt) write_json(summary_map, summaries_json) print("\n• Building contents mapping …") contents_map = parse_bundle_file(bundle_txt) write_json(contents_map, contents_json) print("\n• Comparing filename sets …") compare_keys(summary_map, contents_map) if __name__ == "__main__": main()