diff --git a/seminar/management/commands/importschools.py b/seminar/management/commands/importschools.py new file mode 100644 index 00000000..08c0f4eb --- /dev/null +++ b/seminar/management/commands/importschools.py @@ -0,0 +1,123 @@ +# -*- coding: utf-8 -*- + +import datetime +import os +import random +#import argparse +from optparse import make_option + + +from django.core.management.base import BaseCommand +from django.core.management import call_command +from django.conf import settings +from django.db import transaction +import django.contrib.auth +from django.utils.encoding import force_unicode + +from seminar.models import Skola, Resitel, Rocnik, Cislo, Problem, Reseni, PrilohaReseni, Nastaveni +from seminar.testutils import create_test_data +from seminar import ovvpfile + +User = django.contrib.auth.get_user_model() + + +class Command(BaseCommand): + help = "Import (add / notice changes) schools from a ovvp-format file (skoly.csv)" + + option_list = BaseCommand.option_list + ( + make_option('-n', '--dry_run', dest='dry_run', default=False, + action='store_true', help="No changes to DB."), + ) +# def add_arguments(self, parser): +# parser.add_argument('file', nargs='?', type=argparse.FileType('r', encoding='utf8'), default=sys.stdin) +# parser.add_argument('-n', '--dry_run', dest='dry_run', default=False, +# action='store_true', type=bool, help="No changes to DB.") + + def school_diffs(self, ovvpskola, dbskola): + def compare(ovvpcol, dbcol, t=unicode): + v1 = t(dbskola.__getattribute__(dbcol)) + v2 = ovvpskola[ovvpcol] + if unicode(v1) != unicode(v2): + return "%s: '%s'->'%s', " % (ovvpcol, v1, v2, ) + return "" + + diff = "" + diff += compare('name','nazev') + diff += compare('street','ulice') + diff += compare('town','mesto') + diff += compare('postcode','psc') + return diff + + def handle(self, *args, **options): + assert len(args) == 1 + + filename = args[0] + self.stdout.write('Parsing OVVP-format file \'%s\' ...' % (filename, )) + with open(filename, "r") as f: + o = ovvpfile.parse(f) + + assert o.headers['version'] == '1' + self.stdout.write('Read %d schools with columns: %s' % (len(o.rows), o.columns, )) + self.stdout.write('Export created: %s' % (o.headers.get('date', 'N/A'), )) + assert 'id-aesop' in o.columns + + same = 0 + modified = 0 + new = 0 + + with transaction.atomic(): + for skola in o.rows: + aesop_id = 'aesop:%s' % (skola['id-aesop'], ) + found = Skola.objects.filter(aesop_id=aesop_id) + assert len(found) <= 1 + + if found: + fs = found[0] + diff = self.school_diffs(skola, fs) + if diff: + modified += 1 + self.stdout.write(u"M %11s %s" % (aesop_id, diff, )) # TODO + else: + same += 1 + if int(skola['is-SS']) > int(fs.je_ss): + fs.je_ss = True + fs.save() + + if int(skola['is-ZS']) > int(fs.je_zs): + fs.je_zs = True + fs.save() + + else: + new += 1 + # Name duplicates? + by_name = Skola.objects.filter(nazev=skola['name'], mesto=skola['town'], ulice=skola['street']) + if len(by_name) > 0: + self.stdout.write(u"W Same [name, street, town] for %s and %s (%s, %s, %s)" % ( + by_name[0].aesop_id, aesop_id, skola['name'], skola['street'], skola['town'], )) + else: + self.stdout.write(u"+ %11s %s, %s, %s" % (aesop_id, skola['name'], skola['town'], skola['country'], )) + if not options['dry_run']: + newskola = Skola.objects.create( + aesop_id=aesop_id, nazev=skola['name'], kratky_nazev=skola['name'], izo=skola.get('id-izo', ''), + ulice=skola['street'], mesto=skola['town'], psc=skola['postcode'], stat=skola['country'], + je_zs=int(skola['is-ZS']), je_ss=int(skola['is-SS']), + ) + + self.stdout.write("Result: %d same, %d different, %d new schools" % (same, modified, new, )) + + + + + + + + + + + + + + + + + diff --git a/seminar/ovvpfile.py b/seminar/ovvpfile.py index 59c56a24..24310053 100644 --- a/seminar/ovvpfile.py +++ b/seminar/ovvpfile.py @@ -32,9 +32,8 @@ class OvvpFile(object): for r in it: if isinstance(r, str): r = r.decode('utf8') - if r.endswith('\n'): - r = r[:-1] assert isinstance(r, unicode) + r = r.rstrip('\n') if r == u"": break k, v = r.split(u'\t', 1) @@ -42,11 +41,16 @@ class OvvpFile(object): # columns r = it.next() - self.columns = [cn for cn in r.split(u'\t') if cn != ""] + if isinstance(r, str): + r = r.decode('utf8') + self.columns = [cn.strip() for cn in r.split(u'\t') if cn.strip() != ""] # rows self.rows = [] for r in it: + if isinstance(r, str): + r = r.decode('utf8') + r = r.rstrip('\n') if not r: break rtup = r.split(u'\t')