From b38ffeacef8b8ef6d5a8fc8595655409be65fdea Mon Sep 17 00:00:00 2001 From: "gowthaman.b" Date: Tue, 13 Jun 2023 09:13:34 +0530 Subject: [PATCH] add first version --- .gitignore | 3 + main.py | 151 +++++++++++++++++++++++++++++++++++++++++++++++ requirements.txt | 1 + 3 files changed, 155 insertions(+) create mode 100644 .gitignore create mode 100644 main.py create mode 100644 requirements.txt diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ee62f67 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +*.db +*.json +.idea/ \ No newline at end of file diff --git a/main.py b/main.py new file mode 100644 index 0000000..55e409a --- /dev/null +++ b/main.py @@ -0,0 +1,151 @@ +# This is a sample Python script. +import ijson +import sqlite3 + +# Press ⌃R to execute it or replace it with your code. +# Press Double ⇧ to search everywhere for classes, files, tool windows, actions, and settings. + +flat_keys = ["cost_center", "location", "customer"] +extract_keys = { + "price_modifiers": { + "flat_keys": [], + "extract_keys": {} + }, + "sale_items": { + "flat_keys": [], + "extract_keys": { + "categories": { + "key": "categories", + }, + "modifiers": { + "key": "modifiers", + "flat_keys": [], + "extract_keys": { + "categories": { + "key": "categories", + } + } + } + } + } +} + +parent = "transactions" +con = sqlite3.connect(f"{parent}.db") +cur = con.cursor() + + +def extract_child(merge_headers, item, k, ext, prev_step): + child_headers = merge_headers.get(k, []) + + for child in item[k]: + for (subKey, subValue) in child.items(): + is_ext = subKey in ext.get("extract_keys", {}).keys() + if is_ext: + extract_child(merge_headers, child, subKey, ext["extract_keys"][subKey], f"{prev_step}{k}_") + else: + child_header = f"{k}_{subKey}" + if child_header not in child_headers: + child_headers.append(child_header) + + merge_headers[f"{prev_step}{k}"] = child_headers + + +global_counter = 1 + + +def extract_child_value(merge_headers, item, k, ext, prev_step): + child_value = {} + for child in item[k]: + for (subKey, subValue) in child.items(): + is_ext = subKey in ext.get("extract_keys", {}).keys() + if is_ext: + extract_child_value(merge_headers, child, subKey, ext["extract_keys"][subKey], f"{prev_step}{k}_") + else: + child_header = f"{k}_{subKey}" + child_value[child_header] = subValue + + k_ = f"{prev_step}{k}" + write_to_database(k_, child_value) + + +def make_table(tbl_name, cols): + syspk = "syspk integer primary key autoincrement" + other_cols = ', '.join([f"{f} TEXT" for f in cols]) + create_tbl_sql = f"create table if not exists {tbl_name} ({syspk}, {other_cols})" + print(f"{tbl_name} = ", cols) + print(f"{tbl_name} = ", create_tbl_sql) + cur.execute(create_tbl_sql) + + +def write_to_database(tbl, cols): + keys = cols.keys() + + col_names = ', '.join( + [x for x in keys] + ) + value_placeholders = ', '.join( + ["?" for x in keys] + ) + + values = tuple([str(cols[k]) for k in keys]) + + sql = f"insert into {tbl} ({col_names}) values({value_placeholders})" + + print(f"execute {sql} with values {values}") + + cur.execute(sql, values) + + +def parse_json(): + extract_keys_names = extract_keys.keys() + headers = [] + merge_headers = {} + + # first pass, collect all headers + for item in ijson.items(open("transactions.json"), "item"): + keys = item.keys() + for k in keys: + if k in flat_keys: + for (fk, fv) in item[k].items(): + composite_key = f"{k}_{fk}" + if composite_key not in headers: + headers.append(composite_key) + elif k in extract_keys_names: + ext = extract_keys[k] + extract_child(merge_headers, item, k, ext, f"{parent}_") + + else: + if k not in headers: + headers.append(k) + + make_table(parent, headers) + + for (mhKey, mhVal) in merge_headers.items(): + make_table(mhKey, mhVal) + + # second pass, make flat json from original-json, create csv ( we will use sqlite, as it is faster to write) + + for item in ijson.items(open("transactions.json"), "item"): + keys = item.keys() + flat_json = {} + for k in keys: + if k in flat_keys: + for (fk, fv) in item[k].items(): + composite_key = f"{k}_{fk}" + flat_json[composite_key] = fv + elif k in extract_keys_names: + ext = extract_keys[k] + extract_child_value(merge_headers, item, k, ext, f"{parent}_") + else: + flat_json[k] = item[k] + + write_to_database(parent, flat_json) + con.commit() + + +# Press the green button in the gutter to run the script. +if __name__ == '__main__': + parse_json() + +# See PyCharm help at https://www.jetbrains.com/help/pycharm/ diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..9981eae --- /dev/null +++ b/requirements.txt @@ -0,0 +1 @@ +ijson \ No newline at end of file