mamweb/dakosdump
2015-05-26 13:12:56 +02:00
..
__init__.py Poznamky a zdroje k Dakos exportu a importu 2015-05-15 21:26:23 +02:00
helpers.py Kolekce helperu pro import 2015-05-17 02:07:49 +02:00
oracle_dump Poznamky a zdroje k Dakos exportu a importu 2015-05-15 21:26:23 +02:00
README.md Import komentaru, unescape dat, uprava uzivatelu 2015-05-26 13:12:56 +02:00

Poznámky k exportu a reimportu MaM dat

Dakos Oracle dump - postup

Na lib. stroji na KAMu spustit:

ORACLE_HOME=/nfs/nikam/sw/oracle-10.2/ora_home_10.2/ LC_ALL=C AESOP_PASSWD=<password-of-MAMOPER> perl oracle_dump

Dumpne vsechny tabulky krom SYS.*

from seminar.dksdump.helpers import read_all_tables, matchrows, onerow
ts = read_all_tables('seminar/dksdump/oracle_dks_dump/')

Načte a vypíše všechny tabulky. Yay! Pozor! V základu mají všechny podivný 2. řádek s bordelem! (čísla 3, 12, ...)

Import do SQLite3 pomocí .mode tabs a .import DKSROOT.V_SKOLY.csv DKSROOT_V_SKOLY.

Postup pomocí sqlite3 s Dakos daty

Řešitelé s něajkými úlohami

r=(sqget(db, "select dr.ROKMATURITY, o.JMENO, o.PRIJMENI, dr.DATUMNAROZENI, o.POHLAVI from MAMOPER_MM_RESENI as res, DKSROOT_OSOBA as o, DKSROOT_RESITEL as dr where res.RESITEL=o.ID and dr.ID=o.ID group by o.ID"))
MAMID_FOR_DAKOSID = {}

Překlad ID je potom:

MAMID_FOR_DAKOSID = {u'18313': 501, u'25075': 595, u'15162': 453, u'14673': 441, u'218': 549, u'20414': 525, u'27538': 617, u'18733': 504, u'13935': 427, u'12074': 409, u'23855': 580, u'12372': 412, u'14950': 448, u'428': 620, u'14989': 451, u'26574': 609, u'24791': 590, u'26628': 610, u'26705': 615, u'17273': 486, u'12172': 410, u'1377': 426, u'19859': 517, u'25597': 603, u'21176': 543, u'20453': 527, u'19278': 509, u'14107': 429, u'19274': 508, u'14842': 445, u'14667': 440, u'25176': 597, u'17713': 490, u'15753': 463, u'2': 518, u'17393': 489, u'22673': 568, u'1553': 459, u'13600': 423, u'1556': 460, u'1708': 480, u'18753': 506, u'13609': 424, u'415': 619, u'19653': 516, u'13499': 418, u'22637': 557, u'25608': 604, u'15744': 462, u'21102': 539, u'15740': 461, u'21107': 541, u'22633': 553, u'22793': 573, u'22711': 572, u'14323': 434, u'22643': 563, u'22013': 551, u'22641': 561, u'22640': 560, u'22646': 566, u'20113': 520, u'22644': 564, u'21993': 550, u'22833': 574, u'24022': 583, u'24375': 587, u'21183': 544, u'26693': 614, u'1359': 422, u'21184': 545, u'26555': 608, u'14290': 430, u'22635': 555, u'12313': 411, u'17253': 485, u'18734': 505, u'15522': 458, u'19219': 507, u'25535': 599, u'22653': 567, u'25538': 600, u'17234': 484, u'17233': 483, u'1135': 403, u'381': 618, u'21453': 546, u'20433': 526, u'17133': 481, u'101': 402, u'21056': 536, u'14522': 437, u'19282': 510, u'19285': 511, u'21513': 547, u'11515': 404, u'17974': 499, u'16813': 478, u'1648': 477, u'17973': 498, u'14689': 442, u'15833': 464, u'21094': 537, u'15836': 466, u'15837': 467, u'15834': 465, u'22974': 577, u'15838': 468, u'15839': 469, u'24930': 593, u'459': 623, u'16333': 475, u'11948': 408, u'15087': 452, u'19617': 514, u'23655': 579, u'14387': 435, u'457': 622, u'14307': 431, u'24126': 585, u'21105': 540, u'24081': 584, u'15324': 455, u'23193': 578, u'24004': 582, u'26661': 612, u'1730': 488, u'24825': 592, u'17960': 497, u'11620': 405, u'1469': 443, u'250': 594, u'19290': 512, u'24296': 586, u'17887': 494, u'26235': 606, u'14962': 450, u'14961': 449, u'1358': 421, u'22636': 556, u'17274': 487, u'22634': 554, u'13965': 428, u'19622': 515, u'22638': 558, u'1350': 419, u'1357': 420, u'24637': 589, u'14878': 446, u'25556': 602, u'25555': 601, u'20475': 530, u'14314': 432, u'14315': 433, u'221': 552, u'20413': 524, u'15313': 454, u'1188': 407, u'21733': 548, u'26684': 613, u'26455': 607, u'17955': 496, u'25916': 605, u'22863': 575, u'21098': 538, u'1453': 438, u'26642': 611, u'20154': 521, u'14625': 439, u'15874': 472, u'14879': 447, u'1169': 406, u'16053': 473, u'12546': 413, u'22685': 570, u'20014': 519, u'16353': 476, u'17829': 492, u'22642': 562, u'20393': 523, u'469': 624, u'17949': 495, u'16193': 474, u'20313': 522, u'13468': 417, u'27476': 616, u'24442': 588, u'22681': 569, u'21036': 535, u'25111': 596, u'19420': 513, u'18633': 503, u'22645': 565, u'1293': 414, u'25495': 598, u'18133': 500, u'17193': 482, u'22639': 559, u'15841': 471, u'15840': 470, u'450': 621, u'17838': 493, u'20478': 533, u'20479': 534, u'22694': 571, u'17793': 491, u'21159': 542, u'20473': 528, u'23981': 581, u'20476': 531, u'20477': 532, u'20474': 529, u'17073': 479, u'22865': 576, u'13072': 415, u'13073': 416, u'14404': 436, u'15375': 457, u'15374': 456, u'24815': 591, u'1365': 425, u'1474': 444, u'18453': 502}

Staří orgové [MAMOPER_MM_ULOHA]

for i in set([i.OPRAVUJICI for i in sqget(db, "select * from MAMOPER_MM_ULOHA")]):
if i and len(User.objects.filter(username=i))==0: User.objects.create_user(i)

Staré úlohy [MAMOPER_MM_ULOHA]

Temata (prořezávání opakujících se)

r=sqget(db, "select * from MAMOPER_MM_ULOHA where KOD like 't%' ORDER BY CISLO")
seen = set()
for i in r:
if i.NAZEV in seen:
  continue
seen.add(i.NAZEV)
o = User.objects.get(username=i.OPRAVUJICI) if i.OPRAVUJICI else None;
c = Cislo.objects.get(rocnik__rocnik=int(i.ROCNIK), cislo=i.CISLO);
t=Problem.TYP_ULOHA if i.KOD[0] == 'r' else Problem.TYP_TEMA;
Problem.objects.create(nazev=i.NAZEV, body=int(i.BODY) if i.BODY else None, typ=t, stav=Problem.STAV_ZADANY, opravovatel=o,
    kod=i.KOD[1:], cislo_zadani=c, import_dakos_id='ULOHA:%s' % (i.ID, ),
    text_problemu_org=u"Importováno z MAMOPER.MM_ULOHA\nKod: %s\nID: %s" % (i.KOD, i.ID, ))

Ulohy

r=sqget(db, "select * from MAMOPER_MM_ULOHA where KOD like 'r%'")
for i in r:
o = User.objects.get(username=i.OPRAVUJICI) if i.OPRAVUJICI else None;
c = Cislo.objects.get(rocnik__rocnik=int(i.ROCNIK), cislo=i.CISLO);
t=Problem.TYP_ULOHA if i.KOD[0] == 'r' else Problem.TYP_TEMA;
Problem.objects.create(nazev=i.NAZEV, body=int(i.BODY) if i.BODY else None, typ=t, stav=Problem.STAV_ZADANY, opravovatel=o,
    kod=i.KOD[1:], cislo_zadani=c, import_dakos_id='ULOHA:%s' % (i.ID, ),
    text_problemu_org=u"Importováno z MAMOPER.MM_ULOHA\nKod: %s\nID: %s" % (i.KOD, i.ID, ))

A pak prořezání opakujících se témátek ...

Stará řešení a body

r=sqget(db, "select res.RESITEL, res.KDY, b.ULOHA, b.BODY, res.VYZVEDL, res.FORMA, res.POZNAMKA from MAMOPER_MM_RESENI as res, MAMOPER_MM_RESENI_BODY as b where res.RESITEL=b.RESITEL and res.ULOHA=b.ULOHA")
for i in r:
print i
p = Problem.objects.filter(import_dakos_id='ULOHA:%s' % (i.ULOHA, ));
g = sqget(db, "select * from MAMOPER_MM_ULOHA where ID='%s'" % (i.ULOHA, ))[0]
if not p:
    assert g.KOD[0] == 't'
    p = Problem.objects.filter(cislo_zadani__rocnik__rocnik=int(g.ROCNIK), typ=Problem.TYP_TEMA, kod=g.KOD[1:]).order_by('cislo_zadani__cislo')
    if len(p) >= 2:
      p = [p.filter(cislo_zadani__cislo__lte=g.CISLO).last()]
p = p[0]
c = Cislo.objects.get(rocnik__rocnik=int(g.ROCNIK), cislo=g.CISLO)
c2 = c.relativni_v_rocniku(2) or c.relativni_v_rocniku(1)
assert c2
resitel=Resitel.objects.get(id=MAMID_FOR_DAKOSID[i.RESITEL]);
b = int(i.BODY)
Reseni.objects.create(problem=p, resitel=resitel, body=b, timestamp=transdate(i.KDY),
    forma=Reseni.FORMA_EMAIL if i.FORMA=='e' else Reseni.FORMA_PAPIR, cislo_body=c2,
    poznamka="Import z MAMOPER_MM_RESENI, MAMOPER_MM_RESENI_BODY\nVyzvedl: %s\nPuvodni poznamka: %s" % (i.VYZVEDL, i.POZNAMKA, ))

Novější problémy (znovu) [MM_ZADANIA]

Pozn.: CISLO udává vždy nejen číslo zadání, ale zároveň určuje číslo řešení i u témat (+2). Témata jsou též zadána vícekrát, jednou za každé číslo s výsledky (číslované (-2)).

Úlohy a seriály:

r = sqget(db, "select * from MAMOPER_MM_ZADANIA where TYP='1' order by CISLO")
r += sqget(db, "select * from MAMOPER_MM_ZADANIA where TYP='3' order by CISLO")
for i in r:
c = Cislo.objects.get(cislo=i.CISLO if i.CISLO!='9' else '1', rocnik__rocnik=int(i.ROCNIK));
opravovatel = User.objects.get(username=i.OPRAVUJE) if i.OPRAVUJE else None;
Problem.objects.create(nazev=i.NAZOV, typ=typtable[i.TYP], stav=Problem.STAV_ZADANY, opravovatel=opravovatel, kod=i.ULOHA,
    cislo_zadani=c, cislo_reseni=c.relativni_v_rocniku(2), body=int(i.MAX_BODY),
    import_dakos_id='ZAD:%s.%s.%s.%s'%(i.ROCNIK, i.CISLO, i.ULOHA, i.TYP,),
    text_problemu_org=u'Importováno z MAMOPER_MM_ZADANIA\nOpravuje: %s' % (i.OPRAVUJE, ) )

Témata:

r = sqget(db, "select * from MAMOPER_MM_ZADANIA where TYP='2' order by CISLO")
for i in r:
c = Cislo.objects.get(cislo=i.CISLO if i.CISLO!='9' else '1', rocnik__rocnik=int(i.ROCNIK));
opravovatel = User.objects.get(username=i.OPRAVUJE) if i.OPRAVUJE else None;
Problem.objects.create(nazev=i.NAZOV, typ=typtable[i.TYP], stav=Problem.STAV_ZADANY, opravovatel=opravovatel, kod=i.ULOHA,
    cislo_zadani=c, body=None, import_dakos_id='ZAD:%s.%s.%s.%s'%(i.ROCNIK, i.CISLO, i.ULOHA, i.TYP,),
    text_problemu_org=u'Importováno z MAMOPER_MM_ZADANIA\nOpravuje: %s' % (i.OPRAVUJE, ) ) if not Problem.objects.filter(
	nazev=i.NAZOV, typ=Problem.TYP_TEMA, cislo_zadani__isnull=False) else None

Novější řešení (znovu) [MM_RIESENIA]

r=sqget(db, "select * from MAMOPER_MM_RIESENIA")
for i in r:
print i
tstp = datetime.datetime.fromtimestamp(int(i.DORUCENA));
res = Resitel.objects.get(import_mamoper_id=i.RIESITEL);
c = Cislo.objects.get(rocnik__rocnik=int(i.ROCNIK), cislo=i.CISLO if i.CISLO!='9' else '1')
if i.TYP == '2':
    u = Problem.objects.filter(cislo_zadani__rocnik=c.rocnik, typ=Problem.TYP_TEMA, kod=i.ULOHA).order_by('cislo_zadani__cislo')
    print c, res, u
    if len(u) >= 2:
      u = u.filter(cislo_zadani__cislo__lte=c.cislo).last()
    else:
      u = u[0]
else: 
  u = Problem.objects.get(import_dakos_id="ZAD:%s.%s.%s.%s" % (i.ROCNIK, i.CISLO, i.ULOHA, i.TYP,));
cbod = c.relativni_v_rocniku(2)
if not cbod:
  cbod = c.relativni_v_rocniku(1)
assert cbod
Reseni.objects.create(problem=u, resitel=res, body=int(i.POCET_BODOV) if i.POCET_BODOV else None,
    forma=Reseni.FORMA_EMAIL if i.FORMA=='1' else Reseni.FORMA_PAPIR, timestamp=tstp,
    poznamka='Zdroj: MM_RIESENIA\nForma: %s\n%s' % ({'1':'E', '2':'P'}[i.FORMA], i.POZNAMKA, ),
    cislo_body=cbod)

Komentáře [MAMOPER_MM_DOZ_DISKUSIA]

from django.contrib.contenttypes.models import ContentType
from django.utils.encoding import force_text
from django.conf import settings
import HTMLParser

r = sqget(db, 'SELECT * from MAMOPER_MM_DOZ_DISKUSIA')
r = sorted(r, key=lambda x:x.ID_REAK)
tab_ID_REAK_to_ThrComment_id = {}
html_parser = HTMLParser.HTMLParser()

for i in r:
print i
prb = Problem.objects.get(import_dakos_id='DOZ:%s' % (i.ID, ))
print prb.id
un = i.ORG
if un == u'Al\u010da': un = 'alca'
if un == u'Anet': un = 'anet'
user = User.objects.get(username=un)
parent = None if not i.ID_PARENT else ThreadedComment.objects.get(id=tab_ID_REAK_to_ThrComment_id[i.ID_PARENT])
text = html_parser.unescape(i.TEXT_PRISPEVKU).replace('<br>', '\n').replace('<BR>', '\n')
text = html_parser.unescape(text)
d = dict(
    content_type=ContentType.objects.get_for_model(Problem),
        object_pk=force_text(prb._get_pk_val()),
        comment=text,
        submit_date=transdate(i.DATUM),
        site_id=settings.SITE_ID,
        is_public=True,
    is_removed=False,
    parent=parent,
    title="",
    user=user,
    )
c = ThreadedComment.objects.create(**d)
tab_ID_REAK_to_ThrComment_id[i.ID_REAK] = c.id

Konkrétní programy pro data (starší metoda)

Používají ovvpfile pro načtení souborů.

DAKOS ID pro nektere skoly

for i in sk[1:]:
f = Skola.objects.filter(nazev=i['NAZEV'], izo=i['IZO']);
if len(f)==1:
    f[0].dakos_id = i['ID']; f[0].save()

Resitel

for i in ts['BW-resitelia-export']:
Resitel.objects.create(jmeno=i['jmeno'], prijmeni=i['prijmeni'], pohlavi_muz=(i['pohlavi']==u'mu\u017e'),
    rok_maturity=int(i['rok_maturity']), email=i['email'], telefon=i['telefon'], zasilat=Resitel.ZASILAT_DOMU,
    ulice=i['ulica']+' '+i['cislo'], mesto=i['obec'], psc=i['psc'], stat=i['stat'], dakos_id=i['rid'],
    poznamka="{'skola': '%s', 'dakos_skola_id': '%s'}\n" % (i['skola'], i['dakos_skola_id'], ))

Doplneni nekterych skol:

ae = ts['Aesop-MaM-export-2014-10-20-mod']
skoly = ts['Aesop-skoly-export-2015-05-14-mod']
for i in ae:
r = Resitel.objects.get(jmeno=i['name'], prijmeni=i['surname']);
izo = i['school'].lstrip('izo:');
aesop_id = [s for s in skoly if izo in repr(s)][0]['id-aesop'];
sch = Skola.objects.get(aesop_id='aesop:'+aesop_id);
r.skola=sch; r.save()

Cisla

Hlavne ze souboru archiv/seznam.csv Navic nejaka data z DB, ale nic moc

for i in ts['BW-archiv-cisla-copy-mod']: Cislo.objects.create(rocnik=Rocnik.objects.get(rocnik=int(i['rocnik'])), cislo=i['cislo'])

Uzivatele

for i in ts['MAMOPER.MM_PASSWD']:
if not User.objects.filter(username=i['LOGIN']): User.objects.create_user(i['LOGIN'])

Problemy

MM_ZADANIA:

typtable={'1': Problem.TYP_ULOHA, '2':Problem.TYP_TEMA, '3':Problem.TYP_SERIAL}
for i in ts['MAMOPER.MM_ZADANIA']:
print i
c = Cislo.objects.get(cislo=i['CISLO'] if i['CISLO']!='9' else '1', rocnik__rocnik=int(i['ROCNIK']))
opravovatel = User.objects.get(username=i['OPRAVUJE']) if i['OPRAVUJE'] else None
Problem.objects.create(nazev=i['NAZOV'], typ=typtable[i['TYP']], stav=Problem.STAV_ZADANY, opravovatel=opravovatel,
    kod=i['ULOHA'], cislo_zadani=c, body=int(i['MAX_BODY']) if i['MAX_BODY'] else None,
    dakos_id='ZAD:%s.%s.%s.%s'%(i['ROCNIK'], i['CISLO'], i['ULOHA'], i['TYP'],))

MM_DOZ: Dekodovani stavu navrhu (MFIO - oblast, Tema, Konfera, Pouzita, Zamitnuta, Doplnit (?))

def dectag(x): return ["MFIOTKPZD"[bi] for bi in range(0, 9) if (int(x) & (1 << bi))]
from django.utils.html import escape
def transdate(s):
if '.' not in s: return None
d,m,r = s.split('.'); return datetime.date(int(r)+2000, int(m), int(d))
for i in ts['MAMOPER.MM_DOZ']:
print i;
tags=dectag(i['ZARADENIE']);
autor = User.objects.get(username=i['ORG']) if i['ORG'] else None;
Problem.objects.create(nazev=i['NAZOV'], typ=Problem.TYP_TEMA if 'T' in tags else Problem.TYP_ULOHA,
    stav=Problem.STAV_ZADANY if 'P' in tags else (Problem.STAV_SMAZANY if 'Z' in tags else Problem.STAV_NAVRH),
    dakos_id='DOZ:%s'%(i['ID'], ), text_problemu_org=escape(i['ZADANIE']), timestamp=transdate(i['CAS']))

MM_AZAD:

def romdec(rom):
if not rom: return 0
if rom[0] == 'X': return 10 + romdec(rom[1:])
if rom[-1] == 'X': return 10 - romdec(rom[:-1])
if rom[0] == 'V': return 5 + romdec(rom[1:])
if rom[-1] == 'V': return 5 - romdec(rom[:-1])
if rom[0] == 'I': return 1 + romdec(rom[1:])
assert False
for i in ts['MAMOPER.MM_AZAD'][1:]:
print i;
tags=dectag(i['ZAMERANIE']);
r = romdec(i['ROC_Z']);
c1 = Cislo.objects.filter(rocnik__rocnik=r, cislo=i['CIS_Z']);
c1 = c1[0] if len(c1) else None;
c2 = Cislo.objects.filter(rocnik__rocnik=r, cislo=i['CIS_V']);
c2 = c2[0] if len(c2) else None;
Problem.objects.create(nazev=i['NAZOV'], typ=Problem.TYP_TEMA if 'T' in tags else Problem.TYP_ULOHA, stav=Problem.STAV_ZADANY,
    kod=i['ZADANIE'][1:], cislo_zadani=c1, cislo_reseni=c2, dakos_id='AZAD:%s'%(i['ID'], ),
    text_problemu_org=escape(u'Abstrakt:\n'+i['ABSTRAKT']))
for i in ts['MAMOPER.MM_AZAD'][1:]:
print i; tags=dectag(i['ZAMERANIE']); p = Problem.objects.get(dakos_id='AZAD:%s'%(i['ID'], ));
[p.zamereni.add(t) for t in tags if t in 'MFIO']; p.save()

Reseni

MM_RIESENIA:

for i in ts['MAMOPER.MM_RIESENIA']:
tstp = datetime.datetime.fromtimestamp(int(i['DORUCENA']));
print i, tstp;
r = Resitel.objects.get(dakos_id=i['RIESITEL']);
u = Problem.objects.get(dakos_id="ZAD:%s.%s.%s.%s" % (i['ROCNIK'], i['CISLO'], i['ULOHA'], i['TYP'],));
Reseni.objects.create(problem=u, resitel=r, body=int(i['POCET_BODOV'] or '0'),
    poznamka='Zdroj: MM_RIESENIA\nForma: %s\n%s' % ({'1':'E', '2':'P'}[i['FORMA']], i['POZNAMKA'], ), timestamp=tstp)