import os
import docx2txt
from docx import Document
from docx.opc.constants import RELATIONSHIP_TYPE as RT
import re
from pymongo import MongoClient
import shutil
import uuid

source = "./source"
dist = "./Dash-Web/src/server/public/files"

collection_handle = MongoClient("localhost", 27017)["Dash"]["buxton"]

def extract_links(fileName):
    links = []
    doc = Document(fileName)
    rels = doc.part.rels
    for rel in rels:
        item = rels[rel]
        if item.reltype == RT.HYPERLINK and ".aspx" not in item._target:
            links.append(item._target)
    return links

def extract_value(kv_string):
    return kv_string.split(":")[1].strip()

def mkdir_if_absent(path):
    try:
        if not os.path.exists(path):
            os.mkdir(path)
    except OSError:
        print("Failed to create the appropriate directory structures for %s" % file_name)

def parse_document(file_name: str):
    result = {}
    pure_name = file_name.split(".")[0]

    dir_path = dist + "/" + pure_name
    mkdir_if_absent(dir_path)

    raw = str(docx2txt.process(source + "/" + file_name, dir_path))

    sanitize = lambda line: re.sub("[\n\t]+", "", line).strip().replace(u"\u00A0", " ").replace(u"\u2013", "-").replace(u"\u201c", '''"''').replace(u"\u201d", '''"''')
    remove_empty = lambda line: len(line) > 1

    lines = list(map(sanitize, raw.split("\n")))
    lines = list(filter(remove_empty, lines))

    result["file_name"] = file_name
    result["title"] = lines[2]
    result["short_description"] = lines[3].replace("Short Description: ", "")

    cur = 5
    notes = ""
    while lines[cur] != "Device Details":
        notes += lines[cur] + " "
        cur += 1
    result["buxton_notes"] = notes.strip()
    
    cur += 1
    clean = list(map(lambda data: data.strip().split(":"), lines[cur].split("|")))
    result["company"] = clean[0][1].strip()
    result["year"] = clean[1][1].strip()
    result["original_price"] = clean[2][1].strip()

    cur += 1
    result["degrees_of_freedom"] = extract_value(lines[cur])
    cur += 1
    result["dimensions"] = extract_value(lines[cur])

    cur += 2
    result["primary_key"] = extract_value(lines[cur])
    cur += 1
    result["secondary_key"] = extract_value(lines[cur])

    result["hyperlinks"] = extract_links(source + "/" + file_name)

    cur += 2
    link_descriptions = []
    while lines[cur] != "Image":
        link_descriptions.append(lines[cur])
        cur += 1
    result["link_descriptions"] = link_descriptions

    images = []
    captions = []
    cur += 3
    while cur + 1 < len(lines) and lines[cur] != "NOTES:":
        images.append(lines[cur])
        captions.append(lines[cur + 1])
        cur += 2
    result["images"] = images
    result["captions"] = captions

    notes = []
    if (cur < len(lines) and lines[cur] == "NOTES:"):
        cur += 1
        while cur < len(lines):
            notes.append(lines[cur])
            cur += 1
    result["notes"] = notes

    return result  

def upload(document):
    wrapper = {}
    wrapper["_id"] = str(uuid.uuid4())
    wrapper["fields"] = document
    wrapper["__type"] = "Doc"
    collection_handle.insert_one(wrapper)

if os.path.exists(dist):
    shutil.rmtree(dist)
while (os.path.exists(dist)):
    pass
os.mkdir(dist)

for file_name in os.listdir(source):
    if file_name.endswith('.docx'):
        upload(parse_document(file_name))

lines = ['*', '!.gitignore']
with open(dist + "/.gitignore", 'w') as f:
    f.write('\n'.join(lines))