From 9199ce427c3b3ea0da6f301e207c0f63b89b1a4e Mon Sep 17 00:00:00 2001 From: Edward Betts Date: Mon, 4 Jun 2018 17:38:32 +0100 Subject: [PATCH] Add xanadoc span references to database. --- sourcing/cli.py | 28 +++++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/sourcing/cli.py b/sourcing/cli.py index 8604652..5251c40 100644 --- a/sourcing/cli.py +++ b/sourcing/cli.py @@ -1,4 +1,4 @@ -from . import create_app, model, database +from . import create_app, model, database, edl from pprint import pprint import click @@ -39,6 +39,10 @@ def delete_item(hashid): def populate_references(): home = 'http://localhost:5000/' seen = set() + + for ref in model.Reference.query: + seen.add((ref.subject_id, ref.object_id)) + for link_obj in model.XanaLink.query: link = link_obj.parse() for items in link['facets']: @@ -56,4 +60,26 @@ def populate_references(): database.session.add(ref) seen.add(as_tuple) + for xanadoc in model.XanaDoc.query: + doc_edl = edl.parse_edl(xanadoc.text) + if 'spans' not in doc_edl or not doc_edl['spans']: + continue + for url, start, length in doc_edl['spans']: + src_doc = model.Item.from_external(url, home=home) + print(xanadoc.id, '->', src_doc.id) + as_tuple = (link_obj.id, item.id) + if as_tuple in seen: + continue + ref = model.Reference(subject_id=link_obj.id, object_id=item.id) + database.session.add(ref) + seen.add(as_tuple) + database.session.commit() + +@app.cli.command() +@click.argument('hashid') +def show_references(hashid): + item = model.Item.get_by_hashid(hashid) + print('item_id:', item.id) + print('subjects:', [i.id for i in item.subjects]) + print('objects:', [i.id for i in item.objects])