Add xanadoc span references to database.

This commit is contained in:
Edward Betts 2018-06-04 17:38:32 +01:00
parent 3569cc76d5
commit 9199ce427c

View file

@ -1,4 +1,4 @@
from . import create_app, model, database from . import create_app, model, database, edl
from pprint import pprint from pprint import pprint
import click import click
@ -39,6 +39,10 @@ def delete_item(hashid):
def populate_references(): def populate_references():
home = 'http://localhost:5000/' home = 'http://localhost:5000/'
seen = set() seen = set()
for ref in model.Reference.query:
seen.add((ref.subject_id, ref.object_id))
for link_obj in model.XanaLink.query: for link_obj in model.XanaLink.query:
link = link_obj.parse() link = link_obj.parse()
for items in link['facets']: for items in link['facets']:
@ -56,4 +60,26 @@ def populate_references():
database.session.add(ref) database.session.add(ref)
seen.add(as_tuple) seen.add(as_tuple)
for xanadoc in model.XanaDoc.query:
doc_edl = edl.parse_edl(xanadoc.text)
if 'spans' not in doc_edl or not doc_edl['spans']:
continue
for url, start, length in doc_edl['spans']:
src_doc = model.Item.from_external(url, home=home)
print(xanadoc.id, '->', src_doc.id)
as_tuple = (link_obj.id, item.id)
if as_tuple in seen:
continue
ref = model.Reference(subject_id=link_obj.id, object_id=item.id)
database.session.add(ref)
seen.add(as_tuple)
database.session.commit() database.session.commit()
@app.cli.command()
@click.argument('hashid')
def show_references(hashid):
item = model.Item.get_by_hashid(hashid)
print('item_id:', item.id)
print('subjects:', [i.id for i in item.subjects])
print('objects:', [i.id for i in item.objects])