|
|
|
# -*- coding: utf-8 -*-
|
|
|
|
|
|
|
|
class OvvpFile(object):
|
|
|
|
def __init__(self):
|
|
|
|
self.headers = {}
|
|
|
|
self.columns = []
|
|
|
|
self.rows = []
|
|
|
|
|
|
|
|
def to_lines(self):
|
|
|
|
# header
|
|
|
|
for hk in sorted(self.headers.keys()):
|
|
|
|
yield '%s\t%s\n' % (hk, self.headers[hk])
|
|
|
|
yield '\n'
|
|
|
|
# columns
|
|
|
|
yield '\t'.join([c for c in self.columns]) + '\n'
|
|
|
|
# rows
|
|
|
|
for r in self.rows:
|
|
|
|
yield '\t'.join([r[c] for c in self.columns]) + '\n'
|
|
|
|
|
|
|
|
def to_string(self):
|
|
|
|
return ''.join(self.to_lines())
|
|
|
|
|
|
|
|
def parse_from(self, source, with_headers=True):
|
|
|
|
"Parse data from file, string or line iterator, overwriting self"
|
|
|
|
if isinstance(source, str) or isinstance(source, unicode):
|
|
|
|
return self.parse_from(source.split('\n'))
|
|
|
|
|
|
|
|
it = iter(source)
|
|
|
|
|
|
|
|
# header
|
|
|
|
self.headers = {}
|
|
|
|
if with_headers:
|
|
|
|
for r in it:
|
|
|
|
if isinstance(r, str):
|
|
|
|
r = r.decode('utf8')
|
|
|
|
assert isinstance(r, unicode)
|
|
|
|
r = r.rstrip('\n')
|
|
|
|
if r == u"":
|
|
|
|
break
|
|
|
|
k, v = r.split(u'\t', 1)
|
|
|
|
self.headers[k] = v
|
|
|
|
|
|
|
|
# columns
|
|
|
|
r = it.next()
|
|
|
|
if isinstance(r, str):
|
|
|
|
r = r.decode('utf8')
|
|
|
|
self.columns = [cn.strip() for cn in r.split(u'\t') if cn.strip() != ""]
|
|
|
|
|
|
|
|
# rows
|
|
|
|
self.rows = []
|
|
|
|
for r in it:
|
|
|
|
if isinstance(r, str):
|
|
|
|
r = r.decode('utf8')
|
|
|
|
r = r.rstrip('\n')
|
|
|
|
if not r:
|
|
|
|
break
|
|
|
|
rtup = r.split(u'\t')
|
|
|
|
rdict = {}
|
|
|
|
for ci in range(len(self.columns)):
|
|
|
|
rdict[self.columns[ci]] = rtup[ci]
|
|
|
|
self.rows.append(rdict)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def parse(source, with_headers=True):
|
|
|
|
o = OvvpFile()
|
|
|
|
o.parse_from(source, with_headers=with_headers)
|
|
|
|
return o
|