1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
|
#!/usr/bin/env python3
"""
make_jsons.py
=============
1. From a tree-style directory listing (with summaries after an en-dash “–”)
produce <summaries>.json : { "full/file/path": "summary", ... }
2. From a “concatenated source” file that looks like
================================
path/to/file.tsx
--------------------------------
...file content...
produce <contents>.json : { "full/file/path": "<entire source>", ... }
3. Checks that the key-sets of both JSON files are identical and prints
any filenames that are missing in either mapping.
---------------------------------------------------------------------------
USAGE
-----
python make_jsons.py tree.txt bundle.txt summaries.json contents.json
where
• tree.txt – your original `tree` output with summaries
• bundle.txt – the big text file with `=== / ---` separators + file bodies
• summaries.json, contents.json – output files
---------------------------------------------------------------------------
"""
import json
import re
import sys
from pathlib import Path
INDENT_WIDTH = 4 # one indent level = 4 glyphs ("│ " or " ")
EN_DASH_SPLIT = re.compile(r"\s+–\s+") # space–space delimiter
# --------------------------------------------------------------------------- #
# Part 1 – Parse the `tree` listing
# --------------------------------------------------------------------------- #
def parse_tree_listing(lines):
"""Yield (depth, name, summary_or_None) for each meaningful line."""
for raw in lines:
if not raw.strip():
continue
# Strip the "tree art" section up to the first '── '
m = re.search(r"[├└]──\s*", raw)
if m:
indent_prefix = raw[:m.start()]
content = raw[m.end():].rstrip()
else: # root line without glyphs
indent_prefix = ""
content = raw.strip()
depth = len(indent_prefix) // INDENT_WIDTH
# Split <name> – <summary>
if "–" in content:
name, summary = EN_DASH_SPLIT.split(content, maxsplit=1)
summary = summary.strip()
else:
name, summary = content, None
yield depth, name.strip(), summary
def build_summary_map(tree_path: Path) -> dict:
with tree_path.open(encoding="utf-8") as fh:
lines = fh.readlines()
stack, mapping = [], {}
for depth, name, summary in parse_tree_listing(lines):
stack = stack[:depth]
stack.append(name)
if summary: # directories have no summary
full_path = "/".join(stack)
mapping[full_path] = summary
return mapping
# --------------------------------------------------------------------------- #
# Part 2 – Parse the “bundle” file that has file bodies
# --------------------------------------------------------------------------- #
SEP_EQ = re.compile(r"^=+\s*$") # line of only '=' chars
SEP_DASH = re.compile(r"^-{3,}\s*$") # line of only '-' chars (3+)
def parse_bundle_file(bundle_path: Path) -> dict:
"""
Return { "full/file/path": "<complete source text>", ... }.
The expected pattern is:
======== (80 × '=') ========
path/to/file.ext
--- (dashes) ---
<zero-or-more lines of code/text>
======== (next file...)
Everything up to (but **excluding**) the next line of '=' is considered
file content.
"""
mapping = {}
lines = bundle_path.read_text(encoding="utf-8").splitlines()
i = 0
n = len(lines)
while i < n:
# 1) Find next "===="
while i < n and not SEP_EQ.match(lines[i]):
i += 1
if i >= n:
break
i += 1 # move past the "====" line
# 2) Skip blank lines, then grab the filepath line
while i < n and not lines[i].strip():
i += 1
if i >= n:
break
filepath = lines[i].strip()
i += 1
# 3) Skip the '----' separator
while i < n and not SEP_DASH.match(lines[i]):
i += 1
if i < n:
i += 1 # past the '----'
# 4) Gather content until next '===='
content_lines = []
while i < n and not SEP_EQ.match(lines[i]):
content_lines.append(lines[i])
i += 1
mapping[filepath] = "\n".join(content_lines).rstrip("\n")
return mapping
# --------------------------------------------------------------------------- #
# Part 3 – Writing JSON + consistency check
# --------------------------------------------------------------------------- #
def write_json(obj: dict, out_path: Path):
with out_path.open("w", encoding="utf-8") as fh:
json.dump(obj, fh, indent=2, ensure_ascii=False)
print(f"✔ Wrote {len(obj):,} entries → {out_path}")
def compare_keys(map1: dict, map2: dict):
keys1, keys2 = set(map1), set(map2)
if keys1 == keys2:
print("🎉 SUCCESS – both JSONs reference the exact same filenames.")
return True
only_in_1 = sorted(keys1 - keys2)
only_in_2 = sorted(keys2 - keys1)
if only_in_1:
print("\n⚠️ Present in summaries but missing in contents:")
for k in only_in_1:
print(" ", k)
if only_in_2:
print("\n⚠️ Present in contents but missing in summaries:")
for k in only_in_2:
print(" ", k)
print(
f"\n✖ Mismatch – summaries: {len(keys1)} paths, "
f"contents: {len(keys2)} paths."
)
return False
# --------------------------------------------------------------------------- #
def main():
if len(sys.argv) != 5:
sys.exit(
"USAGE:\n"
" python make_jsons.py <tree.txt> <bundle.txt> "
"<summaries.json> <contents.json>"
)
tree_txt, bundle_txt, summaries_json, contents_json = map(Path, sys.argv[1:])
print("• Building summary mapping …")
summary_map = build_summary_map(tree_txt)
write_json(summary_map, summaries_json)
print("\n• Building contents mapping …")
contents_map = parse_bundle_file(bundle_txt)
write_json(contents_map, contents_json)
print("\n• Comparing filename sets …")
compare_keys(summary_map, contents_map)
if __name__ == "__main__":
main()
|