add xanadoc viewer

This commit is contained in:
Edward Betts 2017-02-15 14:59:18 +00:00
parent c4bbc45372
commit 09893c4eff
10 changed files with 6748 additions and 179 deletions

138
sourcing/edl.py Normal file
View file

@ -0,0 +1,138 @@
from .url import get_url, get_text
from .parse import get_span, parse_span, parse_link, parse_sourcedoc_facet, xanadoc_span_html, span_html, get_urls
from collections import defaultdict
from html import escape
import re
re_comment = re.compile(r'#.*')
re_xanalink = re.compile('xanalink: +([^ ]+) *$')
max_sourcedoc_size = 600000
def fulfil_edl(edl):
text = {}
for url, start, length in parse_edl(edl):
if url not in text:
text[url] = get_text(url)
yield get_span(text, url, start, length)
def parse_edl(edl_text):
edl = {
'spans': [],
'links': [],
}
for line in edl_text.splitlines():
line = re_comment.sub('', line).strip()
if not line:
continue
span_pointer = parse_span(line)
if span_pointer:
edl['spans'].append(span_pointer)
continue
m = re_xanalink.match(line)
if m:
link_url = m.group(1)
edl['links'].append({
'url': link_url,
'text': get_url(link_url),
})
continue
return edl
def fulfil_edl_with_sources(edl_text):
edl = parse_edl(edl_text)
spans = edl['spans']
hide_transclusions = set()
two_facet_links = []
link_num = 0
for link in edl['links']:
link_detail = parse_link(link['text'])
if link_detail['type'] == 'HideTransclusions':
hide_transclusions.add(parse_sourcedoc_facet(link_detail['facets'][0]))
elif len(link_detail['facets']) == 2:
two_facet_links.append((link_num, [parse_span(span[0]) for span in link_detail['facets']]))
link_num += 1
source = [get_text(url) for url in get_urls(spans)]
source_text = {s['url']: s['text'] for s in source}
source_doc_links = defaultdict(list)
for link_num, facets in two_facet_links:
for span in facets:
url, start, length = span
source_doc_links[url].append((start, length, link_num, 'link'))
if url in source_text:
continue
s = get_text(url)
source.append(s)
source_text[s['url']] = s['text']
for s in source_doc_links.values():
s.sort()
spans = list(enumerate(spans))
doc_spans = []
for num, (url, start, length) in spans:
highlight = url not in hide_transclusions
span_text = source_text[url] # [start:start + length]
new_text = ''
pos = start
for link_start, link_len, link_num, span_type in source_doc_links[url]:
link_end = link_start + link_len
if link_start >= start + length:
break
if link_end < start:
continue
open_tag = '<span class="xanadoclink link" id="xanalink{}">'.format(link_num)
link_span = (open_tag +
escape(span_text[link_start:link_end]) +
'</span>')
new_text += escape(span_text[pos:link_start]) + link_span
pos = link_end
new_text += escape(span_text[pos:start + length])
cur = xanadoc_span_html(num, new_text, url, start, length, highlight=highlight)
doc_spans.append(cur)
doc = ''.join(doc_spans)
for s in source:
text = s.pop('text')
if s['length'] > max_sourcedoc_size:
# print('{} > {}'.format(s['length'], max_sourcedoc_size))
continue
if s['url'] in hide_transclusions:
continue
source_spans = [(start, length, num, 'transclusion') for num, (url, start, length) in spans if url == s['url']]
source_spans += source_doc_links[s['url']]
source_spans.sort()
new_text = ''
pos = 0
for start, length, num, span_type in source_spans:
end = start + length
new_text += (escape(text[pos:start]) +
span_html(span_type, num) +
escape(text[start:end]) +
'</span>')
pos = end
new_text += escape(text[pos:])
new_text = new_text.replace('\n', '<br/>\n')
s['text'] = new_text
return {
'source': source,
'doc': doc.replace('\n', '<br/>\n'),
'span_count': len(spans),
'link_count': len(two_facet_links),
}

View file

@ -1,87 +1,22 @@
import re import re
import requests
import os.path import os.path
import random
from html import escape from html import escape
from collections import defaultdict
re_comment = re.compile(r'#.*')
re_span_pointer = re.compile(r'span: (.*),start=(\d+),length=(\d+)') re_span_pointer = re.compile(r'span: (.*),start=(\d+),length=(\d+)')
re_xanalink = re.compile('xanalink: +([^ ]+) *$') re_xanalink = re.compile('xanalink: +([^ ]+) *$')
re_facet = re.compile('^facet\d* *=\s*(.*)\s*$') re_facet = re.compile('^facet\d* *=\s*(.*)\s*$')
re_comment = re.compile(r'#.*')
re_colon_slash = re.compile('[/:]+')
project_dir = os.path.dirname(os.path.dirname(__file__)) project_dir = os.path.dirname(os.path.dirname(__file__))
cache_location = os.path.join(project_dir, 'cache')
max_sourcedoc_size = 600000
xnb_per_char = 150000 xnb_per_char = 150000
censor_urls = {'http://hyperland.com/xuCambDemo/J.Ineffable.txt',
'http://royalty.pub/Ineffable/17/32/Trovato'}
def censor_text(text):
return ''.join(chr(random.randint(9728, 9983)) if c.isalnum() else c
for c in text)
def get_url(url):
filename = os.path.join(cache_location, url_filename(url))
if os.path.exists(filename):
content = open(filename, 'rb').read()
else:
content = requests.get(url).content
open(filename, 'wb').write(content)
return content.decode(errors='replace')
def get_text(url):
# assume UTF-8
text = get_url(url)
if url in censor_urls:
text = censor_text(text)
heading = url.rsplit('/', 1)[-1]
return {
'url': url,
'text': text,
'heading': heading,
'length': len(text),
}
def parse_span(line): def parse_span(line):
m = re_span_pointer.match(line) m = re_span_pointer.match(line)
if not m: if not m:
return None return None
return (m.group(1), int(m.group(2)), int(m.group(3))) return (m.group(1), int(m.group(2)), int(m.group(3)))
def parse_edl(edl_text):
edl = {
'spans': [],
'links': [],
}
for line in edl_text.splitlines():
line = re_comment.sub('', line).strip()
if not line:
continue
span_pointer = parse_span(line)
if span_pointer:
edl['spans'].append(span_pointer)
continue
m = re_xanalink.match(line)
if m:
link_url = m.group(1)
edl['links'].append({
'url': link_url,
'text': get_url(link_url),
})
continue
return edl
def get_span(text, url, start, length): def get_span(text, url, start, length):
return { return {
'url': url, 'url': url,
@ -90,20 +25,9 @@ def get_span(text, url, start, length):
'text': text[url][start:start + length] 'text': text[url][start:start + length]
} }
def fulfil_edl(edl):
text = {}
for url, start, length in parse_edl(edl):
if url not in text:
text[url] = get_text(url)
yield get_span(text, url, start, length)
def get_urls(spans): def get_urls(spans):
return {i[0] for i in spans} return {i[0] for i in spans}
def url_filename(url):
return re_colon_slash.sub('_', url)
def find_min_max(spans, source): def find_min_max(spans, source):
text_min, text_max = {}, {} text_min, text_max = {}, {}
for url, start, length in spans: for url, start, length in spans:
@ -175,99 +99,4 @@ def parse_link(link_text):
legs.append(line.strip()) legs.append(line.strip())
return {'type': link_type, 'facets': facets} return {'type': link_type, 'facets': facets}
def fulfil_edl_with_sources(edl_text):
edl = parse_edl(edl_text)
spans = edl['spans']
hide_transclusions = set()
two_facet_links = []
link_num = 0
for link in edl['links']:
link_detail = parse_link(link['text'])
if link_detail['type'] == 'HideTransclusions':
hide_transclusions.add(parse_sourcedoc_facet(link_detail['facets'][0]))
elif len(link_detail['facets']) == 2:
two_facet_links.append((link_num, [parse_span(span[0]) for span in link_detail['facets']]))
link_num += 1
source = [get_text(url) for url in get_urls(spans)]
source_text = {s['url']: s['text'] for s in source}
source_doc_links = defaultdict(list)
for link_num, facets in two_facet_links:
for span in facets:
url, start, length = span
source_doc_links[url].append((start, length, link_num, 'link'))
if url in source_text:
continue
s = get_text(url)
source.append(s)
source_text[s['url']] = s['text']
for s in source_doc_links.values():
s.sort()
spans = list(enumerate(spans))
doc_spans = []
for num, (url, start, length) in spans:
highlight = url not in hide_transclusions
span_text = source_text[url] # [start:start + length]
censor = url in censor_urls
new_text = ''
pos = start
for link_start, link_len, link_num, span_type in source_doc_links[url]:
link_end = link_start + link_len
if link_start >= start + length:
break
if link_end < start:
continue
open_tag = '<span class="xanadoclink link" id="xanalink{}">'.format(link_num)
link_span = (open_tag +
escape(span_text[link_start:link_end]) +
'</span>')
new_text += escape(span_text[pos:link_start]) + link_span
pos = link_end
new_text += escape(span_text[pos:start + length])
cur = xanadoc_span_html(num, new_text, url, start, length, highlight=highlight, censor=censor)
doc_spans.append(cur)
doc = ''.join(doc_spans)
for s in source:
text = s.pop('text')
if s['length'] > max_sourcedoc_size:
# print('{} > {}'.format(s['length'], max_sourcedoc_size))
continue
if s['url'] in hide_transclusions:
continue
source_spans = [(start, length, num, 'transclusion') for num, (url, start, length) in spans if url == s['url']]
source_spans += source_doc_links[s['url']]
source_spans.sort()
new_text = ''
pos = 0
for start, length, num, span_type in source_spans:
end = start + length
new_text += (escape(text[pos:start]) +
span_html(span_type, num) +
escape(text[start:end]) +
'</span>')
pos = end
new_text += escape(text[pos:])
new_text = new_text.replace('\n', '<br/>\n')
s['text'] = new_text
return {
'source': source,
'doc': doc.replace('\n', '<br/>\n'),
'span_count': len(spans),
'link_count': len(two_facet_links),
}

View file

@ -0,0 +1,106 @@
.document {
padding: 5px;
margin: 10px;
background-color: #eee;
width: 400px;
height: 400px;
border: 1px black solid;
position: fixed;
/* transform: scale(2); */
}
.sourcedoc {
background-color: #ffe;
}
.xanadoc {
background-color: #eef;
}
.scale {
transform: scale(2);
}
#edlform {
padding: 5px;
margin: 10px;
width: 400px;
background-color: #eee;
border: 1px black solid;
position: fixed;
left: 20px;
top: 20px;
}
.heading {
background-color: #888;
color: white;
margin-left: -5px;
margin-right: -5px;
margin-top: -5px;
padding: 5px;
}
.wrapper {
position: absolute;
top: 30px;
bottom: 0px;
}
.body {
text-align: justify;
overflow-y: auto;
box-sizing: border-box;
height: 100%;
}
.transclusion {
border: solid #FF901A 3px;
padding: 1px;
}
.link {
border: solid #33BDFF 3px;
padding: 1px;
}
.hovertransclusion {
background-color: #FF901A;
padding: 2px;
}
.hoverlink {
background-color: #33BDFF;
padding: 2px;
}
polygon {
fill:white;
fill-opacity: 0;
pointer-events: auto;
stroke-width: 3;
}
polygon.bridge { stroke:#FF901A; }
polygon.linkbridge { stroke:#33BDFF; }
polygon.hoverbridge {
fill:#FF901A;
fill-opacity: 0.75;
}
polygon.hoverlinkbridge {
fill:#33BDFF;
fill-opacity: 0.75;
}
html, body { margin:0; padding:0; overflow:hidden; }
svg {
position:fixed; top:0; bottom:0; left:0; right:0;
z-index: 100000;
pointer-events: none;
}
.paylink {
background-color: #33BDFF;
color: black;
padding: 2px;
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,397 @@
'use strict';
var doc1 = document.getElementById('doc1');
var edlform = document.getElementById('edlform');
var max_zindex = 0;
var bridge_count = 0;
var link_bridge_count = 0;
var payment_params = {};
var payment_content;
var xnb_per_char = 150000;
function listener (event) {
console.log(event.type, event.pageX, event.pageY);
}
function hl_location(hl) {
var rects = hl.getClientRects();
var top = rects[0].top;
var right = rects[0].right;
var bottom = rects[rects.length-1].bottom
var left = rects[rects.length-1].left;
console.log(top, right, bottom, left);
}
function dragMoveListener (event) {
var target = event.target,
// keep the dragged position in the data-x/data-y attributes
x = (parseFloat(target.getAttribute('data-x')) || 0) + event.dx,
y = (parseFloat(target.getAttribute('data-y')) || 0) + event.dy;
update_location(target, x, y);
update();
}
function update() {
for(var i = 0; i < bridge_count; i++) {
update_bridge('bridge' + i, 'span' + i, 'transclusion' + i);
}
for(var i = 0; i < link_bridge_count; i++) {
update_bridge('linkbridge' + i, 'xanalink' + i, 'link' + i);
}
}
function update_bridge(bridge, name_h1, name_h2) {
var points = []
var h1 = document.getElementById(name_h1);
var h2 = document.getElementById(name_h2);
if (!h1) {
return;
}
if (!h2) {
return; // because we skipped the large document
}
var rect1 = h1.getBoundingClientRect();
var rect2 = h2.getBoundingClientRect();
var h1_x_center = rect1.right - rect1.width / 2;
var h2_x_center = rect2.right - rect2.width / 2;
var bridge_element = document.getElementById(bridge);
var h_l, h_r, rect_l, rect_r;
if (h1_x_center < h2_x_center) {
h_l = h1;
h_r = h2;
rect_l = rect1;
rect_r = rect2;
} else {
h_l = h2;
h_r = h1;
rect_l = rect2;
rect_r = rect1;
}
var body_l = $(h_l).closest(".body").get(0).getBoundingClientRect();
var body_r = $(h_r).closest(".body").get(0).getBoundingClientRect();
var left_bottom = rect_l.bottom;
if(left_bottom < body_l.top) {
left_bottom = body_l.top;
} else if (left_bottom > body_l.bottom) {
left_bottom = body_l.bottom;
}
var left_top = rect_l.top;
if(left_top < body_l.top) {
left_top = body_l.top;
} else if (left_top > body_l.bottom) {
left_top = body_l.bottom;
}
var right_bottom = rect_r.bottom;
if(right_bottom < body_r.top) {
right_bottom = body_r.top;
} else if (right_bottom > body_r.bottom) {
right_bottom = body_r.bottom;
}
var right_top = rect_r.top;
if(right_top < body_r.top) {
right_top = body_r.top;
} else if (right_top > body_r.bottom) {
right_top = body_r.bottom;
}
points.push(rect_l.right + ',' + left_top);
points.push(rect_l.right + ',' + left_bottom);
points.push(rect_r.left + ',' + right_bottom);
points.push(rect_r.left + ',' + right_top);
var poly = document.getElementById(bridge);
poly.setAttribute('points', points.join(' '));
}
function update_location(target, x, y) {
// target.style.transform = 'scale(0.75) translate(' + x + 'px, ' + y + 'px)';
target.style.transform = 'translate(' + x + 'px, ' + y + 'px)';
target.setAttribute('data-x', x);
target.setAttribute('data-y', y);
max_zindex += 1;
target.style.zIndex = max_zindex;
}
function resizemove(event) {
var target = event.target,
x = (parseFloat(target.getAttribute('data-x')) || 0),
y = (parseFloat(target.getAttribute('data-y')) || 0);
// update the element's style
target.style.width = event.rect.width + 'px';
target.style.height = event.rect.height + 'px';
// translate when resizing from top or left edges
x += event.deltaRect.left;
y += event.deltaRect.top;
update_location(target, x, y);
update();
}
interact('.document')
.draggable({
onmove: dragMoveListener,
})
.resizable({
// preserveAspectRatio: false,
edges: { left: true, right: true, bottom: true, top: true },
})
.on('resizemove', resizemove)
.actionChecker(function (pointer, event, action, interactable, element, interaction) {
if (!interact.matchesSelector(event.target, '.heading') && action.name == 'drag') {
return;
}
return action;
});
interact('#edlform')
.draggable({
onmove: dragMoveListener,
})
.on('resizemove', resizemove)
.actionChecker(function (pointer, event, action, interactable, element, interaction) {
if (!interact.matchesSelector(event.target, '.heading') && action.name == 'drag') {
return;
}
return action;
});
// doc1.style.height = '400px';
// update_location(doc1, 200, 50);
var docid = 0;
function new_document(doc) {
var source = $("#document-template").html();
var template = Handlebars.compile(source);
docid++;
var context = {
docid: docid,
heading: doc.heading,
doctype: doc.doctype,
text: doc.text
};
var element = $(template(context));
var w = document.documentElement.clientWidth;
var h = document.documentElement.clientHeight;
if (doc.doctype == 'xanadoc') {
var x = (w / 2) - 400;
var y = (h / 2) - 400;
} else {
var x = Math.random() * (w - 400);
var y = Math.random() * (h - 400);
}
update_location(element.get(0), x, y);
$("#top").append(element);
}
function reset() {
$(".document").remove();
var svg = document.getElementById('svg');
while (svg.firstChild) {
svg.removeChild(svg.firstChild);
}
bridge_count = 0;
link_bridge_count = 0;
}
$("button#clear").click(function(event) {
$("#edl").val("");
reset();
});
function get_end_number(str) {
var len = str.length;
var num;
for(var i = 0; i < len; i++) {
if(!isNaN(num = parseInt(str.substr(i))))
return num;
}
throw "no number found";
}
function hover_bridge(element, hover) {
var num = get_end_number(element.id);
var bridge = document.getElementById('bridge' + num);
bridge.setAttribute("class", hover ? 'hoverbridge' : 'bridge');
var cls = 'hovertransclusion';
if (hover) {
$('#transclusion' + num).addClass(cls);
$('#span' + num).addClass(cls);
} else {
$('#transclusion' + num).removeClass(cls);
$('#span' + num).removeClass(cls);
}
}
function hover_link_bridge(element, hover) {
var num = get_end_number(element.id);
var bridge = document.getElementById('linkbridge' + num);
bridge.setAttribute("class", hover ? 'hoverlinkbridge' : 'linkbridge');
var cls = 'hoverlink';
if (hover) {
$('#xanalink' + num).addClass(cls);
$('#link' + num).addClass(cls);
} else {
$('#xanalink' + num).removeClass(cls);
$('#link' + num).removeClass(cls);
}
}
function fulfil() {
reset(); // clear all documents and bridges
bridge_count = doc.span_count;
link_bridge_count = doc.link_count;
var svg_element = document.getElementById('svg');
for(var i = 0; i < doc.span_count; i++) {
var p = document.createElementNS('http://www.w3.org/2000/svg', 'polygon');
p.setAttribute('id', 'bridge' + i);
p.setAttribute('class', 'bridge');
p.setAttribute('visibility', 'hidden');
p.addEventListener('mouseover', function(e) {
hover_bridge(this, true);
});
p.addEventListener('mouseout', function(e) {
hover_bridge(this, false);
});
svg_element.appendChild(p);
}
for(var i = 0; i < doc.link_count; i++) {
var p = document.createElementNS('http://www.w3.org/2000/svg', 'polygon');
p.setAttribute('id', 'linkbridge' + i);
p.setAttribute('class', 'linkbridge');
p.setAttribute('visibility', 'hidden');
p.addEventListener('mouseover', function(e) {
hover_link_bridge(this, true);
});
p.addEventListener('mouseout', function(e) {
hover_link_bridge(this, false);
});
svg_element.appendChild(p);
}
$.each(doc.source, function(key, doc) {
if (doc.text) {
doc.heading += " (source document)";
doc.doctype = 'sourcedoc';
new_document(doc);
}
});
new_document({'heading': 'xanadoc',
'text': doc.doc,
'doctype': 'xanadoc'});
update();
$('.transclusion').bind('mouseover', function(e) {
hover_bridge(this, true);
});
$('.transclusion').bind('mouseout', function(e) {
hover_bridge(this, false);
});
$('.link').bind('mouseover', function(e) {
hover_link_bridge(this, true);
});
$('.link').bind('mouseout', function(e) {
hover_link_bridge(this, false);
});
$('.xanadoclink').click(function(e) {
var num = get_end_number(this.id);
var source = $('#link' + num).closest( ".sourcedoc" );
var bridge1 = document.getElementById('linkbridge' + num);
var cur_state = bridge1.getAttribute('visibility');
var new_state = cur_state == 'hidden' ? 'visible' : 'hidden';
source.find('.sourcedoclink').each(function(index, value) {
var bridge_num = get_end_number(value.id);
var bridge = document.getElementById('linkbridge' + bridge_num);
bridge.setAttribute('visibility', new_state);
});
source.toggle();
update();
});
$('.xanadoctransclusion').click(function(e) {
var num = get_end_number(this.id);
var source = $('#transclusion' + num).closest( ".sourcedoc" );
var bridge1 = document.getElementById('bridge' + num);
var cur_state = bridge1.getAttribute('visibility');
var new_state = cur_state == 'hidden' ? 'visible' : 'hidden';
source.find('.sourcedoctransclusion').each(function(index, value) {
var bridge_num = get_end_number(value.id);
var bridge = document.getElementById('bridge' + bridge_num);
bridge.setAttribute('visibility', new_state);
});
source.toggle();
update();
});
$('.body').bind('scroll', function(event) {
update();
});
$('.sourcedoc').hide();
}
function size_svg() {
var w = document.documentElement.clientWidth - 10;
var h = document.documentElement.clientHeight - 10;
$('svg').width(w);
$('svg').height(h);
}
$(function() {
size_svg();
$(window).resize(size_svg);
fulfil();
});

View file

@ -7,6 +7,7 @@
<title>{{ title | default("Xanadu") }}</title> <title>{{ title | default("Xanadu") }}</title>
<link rel="stylesheet" href="{{ url_for('static', filename='bootstrap/css/bootstrap.css') }}"> <link rel="stylesheet" href="{{ url_for('static', filename='bootstrap/css/bootstrap.css') }}">
{{ style | safe }}
</head> </head>
<body> <body>

View file

@ -11,6 +11,9 @@ div#text { font-family: Courier; }
{% if doc.user == current_user %} {% if doc.user == current_user %}
<a href="{{ doc.edit_url }}" class="btn btn-default">edit</a> <a href="{{ doc.edit_url }}" class="btn btn-default">edit</a>
{% endif %} {% endif %}
{% if doc.type == 'xanadoc' %}
<a href="{{ doc.url }}" class="btn btn-default">fulfil</a>
{% endif %}
</h1> </h1>
<p><a href="{{ url_for('.home') }}">back to index</a></p> <p><a href="{{ url_for('.home') }}">back to index</a></p>
<div class="well" id="text"> <div class="well" id="text">

View file

@ -0,0 +1,53 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8">
<meta http-equiv="X-UA-Compatible" content="IE=edge">
<meta name="viewport" content="width=device-width, initial-scale=1">
<title>{{ title | default("Xanadu") }}</title>
<link rel="stylesheet" href="{{ url_for('static', filename='bootstrap/css/bootstrap.css') }}">
<link rel="stylesheet" href="{{ url_for('static', filename='css/xanadoc.css') }}">
</head>
<body>
{% include "navbar.html" %}
<div class="container-fluid">
{% include "flash_msg.html" %}
{% raw %}
<script id="document-template" type="text/x-handlebars-template">
<div class="document {{ doctype }}" id="doc{{ docid }}">
<div class="heading" id="head{{ docid }}">{{ heading }}</div>
<div class="wrapper">
<div class="body" id="body{{ docid }}">
{{{ text }}}
</div>
</div>
</div>
</script>
{% endraw %}
<svg id="svg" version="1.2" xmlns="http://www.w3.org/2000/svg">
</svg>
<div id="top">
<a href="{{ item.url + '/edl' }}" class="btn btn-default">view EDL</a>
</div>
<script src="{{ url_for('static', filename='jquery/jquery.js') }}"></script>
<script src="{{ url_for('static', filename='bootstrap/js/bootstrap.js') }}"></script>
<script>
var get_span_path = "{{ url_for('.get_span') }}";
var doc = {{ doc | tojson }};
</script>
<script src="{{ url_for('static', filename='js/interact.js') }}"></script>
<script src="{{ url_for('static', filename='js/viewer.js') }}"></script>
<script src="{{ url_for('static', filename='handlebars/handlebars.js') }}"></script>
</body>
</html>

45
sourcing/url.py Normal file
View file

@ -0,0 +1,45 @@
import requests
from .model import Item
import os.path
import re
project_dir = os.path.dirname(os.path.dirname(__file__))
cache_location = os.path.join(project_dir, 'cache')
re_colon_slash = re.compile('[/:]+')
def url_filename(url):
return re_colon_slash.sub('_', url)
def get_text(url):
# assume UTF-8
text = get_url(url)
heading = url.rsplit('/', 1)[-1]
return {
'url': url,
'text': text,
'heading': heading,
'length': len(text),
}
def get_url(url):
print('get_url:', url)
item = Item.from_external(url)
if item:
return item.text
content = requests.get(url).content
return content.decode(errors='replace')
filename = os.path.join(cache_location, url_filename(url))
if os.path.exists(filename):
content = open(filename, 'rb').read()
else:
content = requests.get(url).content
open(filename, 'wb').write(content)
return content.decode(errors='replace')

View file

@ -5,6 +5,8 @@ from flask_login import (login_user, current_user, logout_user,
from .forms import (LoginForm, SignupForm, AccountSettingsForm, from .forms import (LoginForm, SignupForm, AccountSettingsForm,
UploadSourceDocForm, SourceDocForm, ItemForm) UploadSourceDocForm, SourceDocForm, ItemForm)
from .model import User, SourceDoc, Item, XanaDoc, XanaLink from .model import User, SourceDoc, Item, XanaDoc, XanaLink
from .url import get_url
from .edl import fulfil_edl_with_sources
from .database import session from .database import session
from .text import iter_lines from .text import iter_lines
from werkzeug.debug.tbtools import get_current_traceback from werkzeug.debug.tbtools import get_current_traceback
@ -122,10 +124,27 @@ def get_item(username, hashid):
doc = None doc = None
return doc if doc else abort(404) return doc if doc else abort(404)
def view_xanadoc(item):
return render_template('view/xanadoc.html',
item=item,
doc=fulfil_edl_with_sources(item.text))
@bp.route('/<username>/<hashid>/edl')
def view_edl(username, hashid):
item = get_item(username, hashid)
if item.type != 'xanadoc':
return abort(404)
return render_template('view.html', doc=item, iter_lines=iter_lines)
@bp.route('/<username>/<hashid>') @bp.route('/<username>/<hashid>')
def view_item(username, hashid): def view_item(username, hashid):
item = get_item(username, hashid)
if item.type == 'xanadoc':
return view_xanadoc(item)
return render_template('view.html', return render_template('view.html',
doc=get_item(username, hashid), doc=item,
iter_lines=iter_lines) iter_lines=iter_lines)
@bp.route('/<username>/<hashid>/edit', methods=['GET', 'POST']) @bp.route('/<username>/<hashid>/edit', methods=['GET', 'POST'])
@ -224,9 +243,11 @@ def api_get_document(username, filename):
} }
return jsonify(ret) return jsonify(ret)
@bp.route('/all_titles') @bp.route('/get_span.json')
def get_all_titles(): def get_span():
titles = XanaLink.get_all_titles() url = request.args['url']
for k, v in titles.items(): start = int(request.args['start'])
print(from_external(k), v) length = int(request.args['length'])
return '' spanid = request.args['spanid']
text = get_url(url)
return jsonify(text=text[start:start + length], spanid=spanid)