ondemand add cols if found

This commit is contained in:
gowthaman.b 2023-06-19 16:38:44 +05:30
parent 0fd1a5f79b
commit 184ee74cda

23
main.py
View File

@ -12,6 +12,7 @@ parser.add_argument("--input", help="folder containing input json(s)", required=
parser.add_argument("--output", help="folder to place csv", required=True, type=pathlib.Path)
parser.add_argument("--delimiter", help="delimiter for CSV (default is '|'", default="|")
parser.add_argument("--single", action="store_true", help="merge all json files to single output csv")
parser.add_argument("--debug", action="store_true", help="print debug logs")
parser.add_argument("--metadata", type=int, help="how many records to parse for building metadata", default=1000)
parser.add_argument("--join-column", help="join column from top-level to merge nested json", required=True)
parser.add_argument("--name", help="join column from top-level to merge nested json", required=True)
@ -26,6 +27,7 @@ class DBConn:
self.counter = 0
self.ts = ''
self.reinit_db()
self.table_col_map = {}
def reinit_db(self):
self.counter += 1
@ -37,8 +39,10 @@ class DBConn:
syspk = "syspk integer primary key autoincrement"
other_cols = ', '.join([f"\"{f}\" TEXT" for f in cols])
create_tbl_sql = f"create table if not exists \"{tbl_name}\" ({syspk}, {other_cols})"
print(f"create sql = {create_tbl_sql}")
if args.debug:
print(f"create sql = {create_tbl_sql}")
self.cur.execute(create_tbl_sql)
self.table_col_map[tbl_name] = cols
def write_to_database(self, tbl, cols):
keys = cols.keys() # OrderedDict
@ -52,8 +56,22 @@ class DBConn:
# trim values of spaces
values = tuple([str(cols[k]).strip() for k in keys])
# there might be a scenario that new cols might appear at a later stage, if so,
# we shall accommodate by adding it to the database
tbl_cols = self.table_col_map[tbl]
not_found = []
for col in cols:
if col not in tbl_cols:
not_found.append(col)
if len(not_found) > 0:
for new_col in not_found:
self.cur.execute(f"alter table \"{tbl}\" add column \"{new_col}\" text")
print(f"added {not_found} cols to {tbl}")
self.table_col_map[tbl] = cols
sql = f"insert into \"{tbl}\" ({col_names}) values({value_placeholders})"
print(f"sql = {sql}")
self.cur.execute(sql, values)
def make_csv_from_tables(self, prefix=''):
@ -142,6 +160,7 @@ def parse_json():
top_level = args.name
# first pass, collect all headers
print(f"parsing {input_files[0]} for metadata")
for topLevelItem in ijson.items(open(input_path / input_files[0]), "item"):
if parsed == args.metadata:
print(f"parsed {parsed} records for metadata")