UserGuidelines/Importer: cleanCSV.py

File cleanCSV.py, 1.1 KB (added by Fran Boon, 8 years ago)

Remvoe line-breaks within text fields from a CSV file

Line 
1#!/bin/env python
2
3# Cleans CSV files with line-breaks in the middle of text fields
4# - Assumes all fields surrounded with ""
5
6import sys
7
8try:
9 if len(sys.argv) > 1:
10 # Run as python scriptname xxx.csv
11 input = sys.argv[1]
12 else:
13 # Run as ./scriptname xxx.csv
14 input = sys.argv[0]
15except:
16 print "Specify CSV file as argument: python cleanCSV.py myfile.csv"
17 sys.exit(2)
18
19try:
20 prefix, extension = input.split(".", 1)
21except:
22 print "Invalid filename!"
23 sys.exit(2)
24
25if extension != "csv":
26 print "Input file should be xxx.csv!"
27 sys.exit(2)
28
29try:
30 inputFile = open(input, "r")
31except:
32 print "Cannot open file!"
33 sys.exit(2)
34
35lines = []
36append = None
37for line in inputFile:
38 line = line.strip()
39 if append:
40 line = append + line
41 if not line.endswith('"'):
42 # This must be a line-break in the middle of a text field
43 append = line
44 continue
45 lines.append(line)
46 append = None
47
48inputFile.close()
49
50output = "%s-fixed.csv" % prefix
51outputFile = open(output, "w")
52outputFile.write("\n".join(lines))
53outputFile.close()