Hotovy magic prikaz pro import a update skol z Aesopa

This commit is contained in:
Tomas Gavenciak 2015-05-14 19:53:01 +02:00
parent 29b32dadd8
commit d1c3948a6c
2 changed files with 130 additions and 3 deletions

View file

@ -0,0 +1,123 @@
# -*- coding: utf-8 -*-
import datetime
import os
import random
#import argparse
from optparse import make_option
from django.core.management.base import BaseCommand
from django.core.management import call_command
from django.conf import settings
from django.db import transaction
import django.contrib.auth
from django.utils.encoding import force_unicode
from seminar.models import Skola, Resitel, Rocnik, Cislo, Problem, Reseni, PrilohaReseni, Nastaveni
from seminar.testutils import create_test_data
from seminar import ovvpfile
User = django.contrib.auth.get_user_model()
class Command(BaseCommand):
help = "Import (add / notice changes) schools from a ovvp-format file (skoly.csv)"
option_list = BaseCommand.option_list + (
make_option('-n', '--dry_run', dest='dry_run', default=False,
action='store_true', help="No changes to DB."),
)
# def add_arguments(self, parser):
# parser.add_argument('file', nargs='?', type=argparse.FileType('r', encoding='utf8'), default=sys.stdin)
# parser.add_argument('-n', '--dry_run', dest='dry_run', default=False,
# action='store_true', type=bool, help="No changes to DB.")
def school_diffs(self, ovvpskola, dbskola):
def compare(ovvpcol, dbcol, t=unicode):
v1 = t(dbskola.__getattribute__(dbcol))
v2 = ovvpskola[ovvpcol]
if unicode(v1) != unicode(v2):
return "%s: '%s'->'%s', " % (ovvpcol, v1, v2, )
return ""
diff = ""
diff += compare('name','nazev')
diff += compare('street','ulice')
diff += compare('town','mesto')
diff += compare('postcode','psc')
return diff
def handle(self, *args, **options):
assert len(args) == 1
filename = args[0]
self.stdout.write('Parsing OVVP-format file \'%s\' ...' % (filename, ))
with open(filename, "r") as f:
o = ovvpfile.parse(f)
assert o.headers['version'] == '1'
self.stdout.write('Read %d schools with columns: %s' % (len(o.rows), o.columns, ))
self.stdout.write('Export created: %s' % (o.headers.get('date', 'N/A'), ))
assert 'id-aesop' in o.columns
same = 0
modified = 0
new = 0
with transaction.atomic():
for skola in o.rows:
aesop_id = 'aesop:%s' % (skola['id-aesop'], )
found = Skola.objects.filter(aesop_id=aesop_id)
assert len(found) <= 1
if found:
fs = found[0]
diff = self.school_diffs(skola, fs)
if diff:
modified += 1
self.stdout.write(u"M %11s %s" % (aesop_id, diff, )) # TODO
else:
same += 1
if int(skola['is-SS']) > int(fs.je_ss):
fs.je_ss = True
fs.save()
if int(skola['is-ZS']) > int(fs.je_zs):
fs.je_zs = True
fs.save()
else:
new += 1
# Name duplicates?
by_name = Skola.objects.filter(nazev=skola['name'], mesto=skola['town'], ulice=skola['street'])
if len(by_name) > 0:
self.stdout.write(u"W Same [name, street, town] for %s and %s (%s, %s, %s)" % (
by_name[0].aesop_id, aesop_id, skola['name'], skola['street'], skola['town'], ))
else:
self.stdout.write(u"+ %11s %s, %s, %s" % (aesop_id, skola['name'], skola['town'], skola['country'], ))
if not options['dry_run']:
newskola = Skola.objects.create(
aesop_id=aesop_id, nazev=skola['name'], kratky_nazev=skola['name'], izo=skola.get('id-izo', ''),
ulice=skola['street'], mesto=skola['town'], psc=skola['postcode'], stat=skola['country'],
je_zs=int(skola['is-ZS']), je_ss=int(skola['is-SS']),
)
self.stdout.write("Result: %d same, %d different, %d new schools" % (same, modified, new, ))

View file

@ -32,9 +32,8 @@ class OvvpFile(object):
for r in it: for r in it:
if isinstance(r, str): if isinstance(r, str):
r = r.decode('utf8') r = r.decode('utf8')
if r.endswith('\n'):
r = r[:-1]
assert isinstance(r, unicode) assert isinstance(r, unicode)
r = r.rstrip('\n')
if r == u"": if r == u"":
break break
k, v = r.split(u'\t', 1) k, v = r.split(u'\t', 1)
@ -42,11 +41,16 @@ class OvvpFile(object):
# columns # columns
r = it.next() r = it.next()
self.columns = [cn for cn in r.split(u'\t') if cn != ""] if isinstance(r, str):
r = r.decode('utf8')
self.columns = [cn.strip() for cn in r.split(u'\t') if cn.strip() != ""]
# rows # rows
self.rows = [] self.rows = []
for r in it: for r in it:
if isinstance(r, str):
r = r.decode('utf8')
r = r.rstrip('\n')
if not r: if not r:
break break
rtup = r.split(u'\t') rtup = r.split(u'\t')