Extract data from PostgreSQL dump file
September 14, 2011
After taking a database dump from PostgreSQL using pg_dump
, you may want to only get the schema or only the data. This script has been created and tested using Python versions 2.7 (Linux) and 3.2 (Windows), using a dump file from PostgreSQL version 9.0 (Linux).
Usage is simple. Provide an input dump file with the -f flag; output file with -o flag; and then choose either to extract/export data with -d flag or schema with -s flag. If you only want to extract data for certain tables, use the -t flag and provide a comma-separated list of table names. These table names should match exactly with what’s in the dump file.
I hope you find this script useful and can modify/extend it to your needs. If you have ideas on how to make this code better, please do not hesitate to share your ideas.
from re import search
import argparse
import codecs
script_version='0.0.1'
parser = argparse.ArgumentParser(
description='From a pgsql dump file, extract only the data to be inserted',
version=script_version)
parser.add_argument('-f', '--file', metavar='in-file', action='store',
dest='in_file_name', type=str, required=True,
help='Name of pgsql dump file')
parser.add_argument('-o', '--out-file', metavar='out-file', action='store',
dest='out_file_name', type=str, required=True,
help='Name of output file')
parser.add_argument('-d', '--data-only', action="store_true", default=False,
dest='data_only', required=False,
help='''Only data is extracted and schema is ignored.
If not specified, then -s must be specified.''')
parser.add_argument('-t', '--table-list', metavar='table-name-list', action='store',
dest='table_name_list', type=str, required=False,
help='''Optional: Command-separated list of table names to process.
Works only with -d flag.''')
parser.add_argument('-s', '--schema-only', action="store_true", default=False,
dest='schema_only', required=False,
help='''Only schema is extracted and data is ignored.
If not specified, then -d must be specified.''')
args = parser.parse_args()
if args.data_only and args.schema_only:
print ('Error: You can\'t provide -d and -s flags at the same time; choose only one')
exit()
elif args.data_only:
data_only = True
schema_only = False
start_copy = False
elif args.schema_only:
data_only = False
schema_only = True
start_copy = True
else:
print ('Error: Choose one of -d and -s flags')
exit()
print ('Processing File:', args.in_file_name)
input_file_name = args.in_file_name
output_file_name = args.out_file_name
table_name_list = args.table_name_list
if table_name_list:
table_list = table_name_list.split(',')
else:
table_list = None
outfile = codecs.open(output_file_name, "w", encoding="utf-8")
with codecs.open(input_file_name, "r", encoding="utf-8") as infile:
for line in infile:
if data_only:
if (not start_copy) and search('^COPY', line) and table_list:
for table in table_list:
if search(''.join(['^COPY ', table.strip(), ' ']), line):
start_copy = True
outfile.write(line)
break
elif (not start_copy) and search('^COPY', line) and not table_list:
start_copy = True
outfile.write(line)
elif start_copy and search('^\\\.', line):
start_copy = False
outfile.write(line)
elif start_copy:
outfile.write(line)
elif schema_only:
if start_copy and search('^COPY', line):
start_copy = False
elif (not start_copy) and search('^\\\.', line):
start_copy = True
elif start_copy:
outfile.write(line)
print ('Done')
outfile.close()
Recent Comments