option to set delimiter

This commit is contained in:
gowthaman 2023-06-17 23:52:36 +05:30
parent 66133cc661
commit 20ff823972
2 changed files with 3 additions and 1 deletions

View File

@ -9,6 +9,7 @@ Steps
* run `python3 main.py`
* required `--input <dir of input json>`
* required `--output <dir of output csvs>`
* (optional) `--delimiter "|"`, change seperator/delimiter , default is |
* (optional) to merge all json to single csv `--single`, default is one set of csv files for one json
* (optional) # of records to check records for all headers `--metadata <number or records>` defaults to 1000
* required `--join-column <column name from first level to use as merge column>`

View File

@ -10,6 +10,7 @@ import argparse
parser = argparse.ArgumentParser()
parser.add_argument("--input", help="folder containing input json(s)", required=True, type=pathlib.Path)
parser.add_argument("--output", help="folder to place csv", required=True, type=pathlib.Path)
parser.add_argument("--delimiter", help="delimiter for CSV (default is '|'", default="|")
parser.add_argument("--single", action="store_true", help="merge all json files to single output csv")
parser.add_argument("--metadata", type=int, help="how many records to parse for building metadata", default=1000)
parser.add_argument("--join-column", help="join column from top-level to merge nested json", required=True)
@ -65,7 +66,7 @@ class DBConn:
for tbl in tbls:
clients = pd.read_sql(f"SELECT * FROM \"{tbl}\"", self.con)
clients.to_csv(args.output / f"{prefix}{tbl}.csv", index=False)
clients.to_csv(args.output / f"{prefix}{tbl}.csv", index=False, sep=args.delimiter)
dbConn = DBConn()