1 | #!/bin/env python
|
---|
2 |
|
---|
3 | # Cleans CSV files with line-breaks in the middle of text fields
|
---|
4 | # - Assumes all fields surrounded with ""
|
---|
5 |
|
---|
6 | import sys
|
---|
7 |
|
---|
8 | try:
|
---|
9 | if len(sys.argv) > 1:
|
---|
10 | # Run as python scriptname xxx.csv
|
---|
11 | input = sys.argv[1]
|
---|
12 | else:
|
---|
13 | # Run as ./scriptname xxx.csv
|
---|
14 | input = sys.argv[0]
|
---|
15 | except:
|
---|
16 | print "Specify CSV file as argument: python cleanCSV.py myfile.csv"
|
---|
17 | sys.exit(2)
|
---|
18 |
|
---|
19 | try:
|
---|
20 | prefix, extension = input.split(".", 1)
|
---|
21 | except:
|
---|
22 | print "Invalid filename!"
|
---|
23 | sys.exit(2)
|
---|
24 |
|
---|
25 | if extension != "csv":
|
---|
26 | print "Input file should be xxx.csv!"
|
---|
27 | sys.exit(2)
|
---|
28 |
|
---|
29 | try:
|
---|
30 | inputFile = open(input, "r")
|
---|
31 | except:
|
---|
32 | print "Cannot open file!"
|
---|
33 | sys.exit(2)
|
---|
34 |
|
---|
35 | lines = []
|
---|
36 | append = None
|
---|
37 | for line in inputFile:
|
---|
38 | line = line.strip()
|
---|
39 | if append:
|
---|
40 | line = append + line
|
---|
41 | if not line.endswith('"'):
|
---|
42 | # This must be a line-break in the middle of a text field
|
---|
43 | append = line
|
---|
44 | continue
|
---|
45 | lines.append(line)
|
---|
46 | append = None
|
---|
47 |
|
---|
48 | inputFile.close()
|
---|
49 |
|
---|
50 | output = "%s-fixed.csv" % prefix
|
---|
51 | outputFile = open(output, "w")
|
---|
52 | outputFile.write("\n".join(lines))
|
---|
53 | outputFile.close()
|
---|