ondemand add cols if found
This commit is contained in:
parent
0fd1a5f79b
commit
184ee74cda
23
main.py
23
main.py
@ -12,6 +12,7 @@ parser.add_argument("--input", help="folder containing input json(s)", required=
|
||||
parser.add_argument("--output", help="folder to place csv", required=True, type=pathlib.Path)
|
||||
parser.add_argument("--delimiter", help="delimiter for CSV (default is '|'", default="|")
|
||||
parser.add_argument("--single", action="store_true", help="merge all json files to single output csv")
|
||||
parser.add_argument("--debug", action="store_true", help="print debug logs")
|
||||
parser.add_argument("--metadata", type=int, help="how many records to parse for building metadata", default=1000)
|
||||
parser.add_argument("--join-column", help="join column from top-level to merge nested json", required=True)
|
||||
parser.add_argument("--name", help="join column from top-level to merge nested json", required=True)
|
||||
@ -26,6 +27,7 @@ class DBConn:
|
||||
self.counter = 0
|
||||
self.ts = ''
|
||||
self.reinit_db()
|
||||
self.table_col_map = {}
|
||||
|
||||
def reinit_db(self):
|
||||
self.counter += 1
|
||||
@ -37,8 +39,10 @@ class DBConn:
|
||||
syspk = "syspk integer primary key autoincrement"
|
||||
other_cols = ', '.join([f"\"{f}\" TEXT" for f in cols])
|
||||
create_tbl_sql = f"create table if not exists \"{tbl_name}\" ({syspk}, {other_cols})"
|
||||
print(f"create sql = {create_tbl_sql}")
|
||||
if args.debug:
|
||||
print(f"create sql = {create_tbl_sql}")
|
||||
self.cur.execute(create_tbl_sql)
|
||||
self.table_col_map[tbl_name] = cols
|
||||
|
||||
def write_to_database(self, tbl, cols):
|
||||
keys = cols.keys() # OrderedDict
|
||||
@ -52,8 +56,22 @@ class DBConn:
|
||||
# trim values of spaces
|
||||
values = tuple([str(cols[k]).strip() for k in keys])
|
||||
|
||||
# there might be a scenario that new cols might appear at a later stage, if so,
|
||||
# we shall accommodate by adding it to the database
|
||||
tbl_cols = self.table_col_map[tbl]
|
||||
not_found = []
|
||||
for col in cols:
|
||||
if col not in tbl_cols:
|
||||
not_found.append(col)
|
||||
|
||||
if len(not_found) > 0:
|
||||
for new_col in not_found:
|
||||
self.cur.execute(f"alter table \"{tbl}\" add column \"{new_col}\" text")
|
||||
|
||||
print(f"added {not_found} cols to {tbl}")
|
||||
self.table_col_map[tbl] = cols
|
||||
|
||||
sql = f"insert into \"{tbl}\" ({col_names}) values({value_placeholders})"
|
||||
print(f"sql = {sql}")
|
||||
self.cur.execute(sql, values)
|
||||
|
||||
def make_csv_from_tables(self, prefix=''):
|
||||
@ -142,6 +160,7 @@ def parse_json():
|
||||
top_level = args.name
|
||||
|
||||
# first pass, collect all headers
|
||||
print(f"parsing {input_files[0]} for metadata")
|
||||
for topLevelItem in ijson.items(open(input_path / input_files[0]), "item"):
|
||||
if parsed == args.metadata:
|
||||
print(f"parsed {parsed} records for metadata")
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user