sourcing/sourcing/model.py

372 lines
12 KiB
Python
Raw Normal View History

2017-01-30 10:22:05 +00:00
from flask import url_for
from .database import session
from .parse import parse_link, parse_sourcedoc_facet, parse_span
2018-08-20 13:33:18 +01:00
from .text import first_non_empty_line
2017-01-30 10:22:05 +00:00
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy import Column, ForeignKey
from sqlalchemy.types import String, Unicode, Integer, DateTime, Boolean, UnicodeText, Enum
from sqlalchemy import func
2017-04-07 15:43:46 +01:00
from sqlalchemy.orm import relationship, validates, synonym, configure_mappers
2017-01-30 10:22:05 +00:00
from sqlalchemy.sql import exists
from flask_login import UserMixin
from werkzeug.security import generate_password_hash, check_password_hash
from sqlalchemy.ext.hybrid import hybrid_property
2017-04-07 15:43:46 +01:00
from sqlalchemy_continuum import make_versioned
from sqlalchemy_continuum.plugins import FlaskPlugin, ActivityPlugin
2017-01-30 10:22:05 +00:00
import re
from hashids import Hashids
2017-04-07 15:43:46 +01:00
activity_plugin = ActivityPlugin()
make_versioned(plugins=[FlaskPlugin(), activity_plugin])
2017-01-30 10:22:05 +00:00
doc_hashids = Hashids(min_length=8)
Base = declarative_base()
Base.query = session.query_property()
2017-02-20 11:10:48 +00:00
re_server_url = re.compile('^http://perma.pub/\d+/([^/]+)/([^/]+)$')
2017-01-30 10:22:05 +00:00
# list of disallowed usernames - maybe this should be in the database
reserved_name = ['root', 'admin', 'administrator', 'support', 'info',
'test', 'tech', 'online', 'old', 'new', 'jobs', 'login', 'job', 'ipad'
'iphone', 'javascript', 'script', 'host', 'mail', 'image', 'faq',
'file', 'ftp', 'error', 'warning', 'the', 'assistance', 'maintenance',
'controller', 'head', 'chief', 'anon']
re_username = re.compile('^\w+$', re.U)
re_full_name = re.compile('^([-.\'" ]|[^\W\d_])+$', re.U)
2018-06-05 12:05:07 +01:00
def item_url():
return url_for('view.view_item',
username=self.user.username,
hashid=self.hashid)
2017-01-30 10:22:05 +00:00
def user_exists(field, value):
return session.query(exists().where(field == value)).scalar()
class TimeStampedModel(Base):
__abstract__ = True
created = Column(DateTime, default=func.now())
modified = Column(DateTime, default=func.now(), onupdate=func.now())
class LoginError(Exception):
def __init__(self, msg):
self.msg = msg
class User(TimeStampedModel, UserMixin):
__tablename__ = 'user'
id = Column(Integer, primary_key=True)
username = Column(Unicode(32), unique=True, nullable=False)
pw_hash = Column(String(160), nullable=False)
email = Column(Unicode(64), unique=True, nullable=False)
email_verified = Column(Boolean(), nullable=False, default=False)
disabled = Column(Boolean(), nullable=False, default=False)
deleted = Column(Boolean(), nullable=False, default=False)
is_super = Column(Boolean, nullable=False, default=False)
last_login = Column(DateTime)
full_name = Column(Unicode(64))
balance = Column(Integer, nullable=False, default=0)
user_id = synonym('id')
name = synonym('full_name')
user_name = synonym('username')
def __init__(self, **kwargs):
pw_hash = generate_password_hash(kwargs.pop('password'))
return super(User, self).__init__(pw_hash=pw_hash, **kwargs)
def __repr__(self):
return '<User: {!r}>'.format(self.username)
def set_password(self, password):
self.pw_hash = generate_password_hash(password)
def check_password(self, password):
return check_password_hash(self.pw_hash, password)
def get_id(self):
return self.id
@validates('email')
def validate_email(self, key, value):
assert '@' in value
return value
@validates('username')
def validate_usernane(self, key, value):
assert re_username.match(value)
return value
@validates('full_name')
def validate_full_name(self, key, value):
if value:
assert re_full_name.match(value)
return value
@hybrid_property
def is_live(self):
return self.email_verified & ~self.disabled & ~self.deleted
@classmethod
def lookup_user_or_email(cls, user_or_email):
field = cls.email if '@' in user_or_email else cls.username
return cls.query.filter(field == user_or_email).one_or_none()
@property
def mail_to_name(self):
'''Name to use on e-mails sent to the user.'''
return self.full_name or self.username
@classmethod
def attempt_login(cls, user_or_email, password):
user = cls.lookup_user_or_email(user_or_email)
if not user:
raise LoginError('user not found')
if user.disabled:
raise LoginError('user account disabled')
if not user.check_password(password):
raise LoginError('incorrect password')
return user
class Reference(Base):
__tablename__ = 'reference'
subject_id = Column(Integer, ForeignKey('item.id'), primary_key=True)
object_id = Column(Integer, ForeignKey('item.id'), primary_key=True)
2017-01-30 10:22:05 +00:00
class Item(TimeStampedModel):
__tablename__ = 'item'
2017-06-07 13:31:29 +01:00
__versioned__ = {'base_classes': (TimeStampedModel,)}
2017-04-07 15:43:46 +01:00
2017-01-30 10:22:05 +00:00
id = Column(Integer, primary_key=True)
user_id = Column(Integer, ForeignKey('user.id'))
published = Column(DateTime)
2017-06-07 13:31:29 +01:00
type = Column(Enum('sourcedoc', 'xanadoc', 'xanalink', name='item_type'),
nullable=False)
2017-01-30 10:22:05 +00:00
filename = Column(Unicode)
text = Column(UnicodeText)
2018-06-04 14:29:00 +01:00
subjects = relationship('Item',
lazy='dynamic',
secondary='reference',
primaryjoin=id == Reference.object_id,
secondaryjoin=id == Reference.subject_id)
2018-06-04 14:29:00 +01:00
objects = relationship('Item',
lazy='dynamic',
secondary='reference',
primaryjoin=id == Reference.subject_id,
secondaryjoin=id == Reference.object_id)
2017-01-30 10:22:05 +00:00
user = relationship('User', backref='items')
__mapper_args__ = {
'polymorphic_on': type,
2017-06-07 13:31:29 +01:00
'with_polymorphic': '*',
2017-01-30 10:22:05 +00:00
}
@property
def hashid(self):
return doc_hashids.encode(self.id)
@classmethod
def get_by_hashid(cls, hashid):
2017-02-20 11:10:48 +00:00
try:
item_id = doc_hashids.decode(hashid)[0]
except IndexError:
return
return cls.query.get(item_id)
2017-01-30 10:22:05 +00:00
2018-06-05 12:05:07 +01:00
def view_url(self, endpoint, **kwargs):
return url_for('view.' + endpoint,
username=self.user.username,
hashid=self.hashid,
**kwargs)
2017-01-30 10:22:05 +00:00
@property
def url(self):
2018-06-05 12:05:07 +01:00
return self.view_url('view_item')
2017-04-07 15:43:46 +01:00
def version_url(self, version):
2018-06-05 12:05:07 +01:00
return self.view_url('view_item', v=version)
2017-04-07 15:43:46 +01:00
@property
def history_url(self):
2018-06-05 12:05:07 +01:00
return self.view_url('history')
2017-01-30 10:22:05 +00:00
@property
def external_url(self):
2018-06-05 12:05:07 +01:00
return self.view_url('view_item', _external=True)
2017-01-30 10:22:05 +00:00
@property
def edit_url(self):
2018-06-05 12:05:07 +01:00
return self.view_url('edit_item')
2017-01-30 10:22:05 +00:00
2017-06-07 13:31:29 +01:00
@property
2017-06-07 14:25:27 +01:00
def set_title_url(self):
2018-06-05 12:05:07 +01:00
return self.view_url('set_title')
2017-06-07 13:31:29 +01:00
2018-08-20 13:33:18 +01:00
def title_from_link(self, titles=None):
2017-01-30 10:22:05 +00:00
if not titles:
titles = XanaLink.get_all_titles()
2018-08-20 13:33:18 +01:00
return titles.get(self)
def title(self, titles=None):
return self.type + ': ' + (self.title_from_link(titles) or self.hashid)
2017-01-30 10:22:05 +00:00
2017-06-07 13:31:29 +01:00
def has_title(self):
titles = XanaLink.get_all_titles()
return self in titles
2017-06-07 14:25:27 +01:00
def set_title(self, title, user):
2017-06-07 13:31:29 +01:00
title_source_doc = SourceDoc(text=title, user=user)
session.add(title_source_doc)
session.commit()
link_text = '''type=title
facet=
sourcedoc: {}
facet=
span: {},start=0,length={}'''.format(self.external_url, title_source_doc.external_url, len(title))
title_link = XanaLink(text=link_text, user=user)
session.add(title_link)
session.commit()
2017-01-30 10:22:05 +00:00
@classmethod
def from_external(cls, url, home=None):
if home is None:
home = url_for('view.home', _external=True)
2017-02-22 21:51:59 +00:00
if url.startswith(home):
2017-02-20 11:12:44 +00:00
username, _, hashid = url[len(home):].partition('/')
else:
m = re_server_url.match(url)
if not m:
return
2017-02-20 11:26:45 +00:00
username, hashid = m.groups()
2017-02-20 11:10:48 +00:00
item_id = doc_hashids.decode(hashid)[0]
q = cls.query.filter(User.username == username, cls.id == item_id)
2017-01-30 10:22:05 +00:00
return q.one_or_none()
class XanaDoc(Item):
__tablename__ = 'xanadoc'
__mapper_args__ = {'polymorphic_identity': 'xanadoc'}
2017-06-07 13:31:29 +01:00
id = Column(Integer, ForeignKey(Item.id), primary_key=True)
2018-05-31 14:56:32 +01:00
def snippet(self):
return self.text
2018-06-05 12:05:07 +01:00
@property
def xanaedit_url(self):
2018-06-05 20:26:50 +01:00
return self.view_url('xanaedit_item')
@property
def save_xanaedit_url(self):
return self.view_url('save_xanaedit')
2018-06-05 12:05:07 +01:00
2017-01-30 10:22:05 +00:00
class XanaLink(Item):
__tablename__ = 'xanalink'
__mapper_args__ = {'polymorphic_identity': 'xanalink'}
2017-06-07 13:31:29 +01:00
id = Column(Integer, ForeignKey(Item.id), primary_key=True)
2017-01-30 10:22:05 +00:00
def parse(self):
return parse_link(self.text)
2018-05-31 20:11:12 +01:00
@property
def link_type(self):
return self.parse()['type']
2017-01-30 10:22:05 +00:00
def title(self, titles=None):
if titles is None:
titles = XanaLink.get_all_titles()
if self in titles:
2018-08-20 13:33:18 +01:00
return self.type + ': ' + titles[self]
2017-01-30 10:22:05 +00:00
parsed = self.parse()
if parsed['type'] == 'title':
ident = parsed['facets'][0][0].partition(': ')[2]
item = Item.from_external(ident)
if item in titles:
return parsed['type'] + " link for " + item.title(titles=titles)
2017-02-22 21:51:59 +00:00
if parsed['type']:
return parsed['type'] + " link: " + self.hashid
else:
return "link: " + self.hashid
2017-01-30 10:22:05 +00:00
2018-06-04 07:44:14 +01:00
def item_and_title(self, home=None):
link = self.parse()
if link['type'] != 'title':
return
try:
facet1, facet2 = link['facets']
except ValueError:
return
2018-06-04 07:44:14 +01:00
link_type, _, ident = facet1[0].partition(': ')
item = Item.from_external(ident, home)
ident2, start, length = parse_span(facet2[0])
source_of_title = SourceDoc.from_external(ident2, home)
if source_of_title:
return(item, source_of_title.text[start:length + start])
2017-01-30 10:22:05 +00:00
@classmethod
2018-06-04 07:44:14 +01:00
def get_all_titles(cls, home=None):
2017-01-30 10:22:05 +00:00
titles = {}
2018-06-04 07:44:14 +01:00
for link in cls.query:
ret = link.item_and_title(home)
if ret is None:
2017-01-30 10:22:05 +00:00
continue
2018-06-04 07:44:14 +01:00
item, title = ret
titles[item] = title
2017-01-30 10:22:05 +00:00
return titles
2018-05-31 14:56:32 +01:00
def snippet(self):
return self.text
2017-01-30 10:22:05 +00:00
class SourceDoc(Item):
__tablename__ = 'sourcedoc'
2017-06-07 13:31:29 +01:00
__mapper_args__ = {'polymorphic_identity': 'sourcedoc'}
id = Column(Integer, ForeignKey(Item.id), primary_key=True)
2017-01-30 10:22:05 +00:00
db_price_per_character = Column(Integer)
db_document_price = Column(Integer)
@property
def document_price(self):
return self.db_document_price or self.db_price_per_character * len(self.text)
@property
def price_per_character(self):
return self.db_price_per_character or self.db_document_price / len(self.text)
2018-05-31 14:56:32 +01:00
def snippet(self, length=255, killwords=False, end='...', leeway=5):
s = self.text
assert length >= len(end), 'expected length >= %s, got %s' % (len(end), length)
assert leeway >= 0, 'expected leeway >= 0, got %s' % leeway
if len(s) <= length + leeway:
return s
if killwords:
return s[:length - len(end)] + end
result = s[:length - len(end)].rsplit(' ', 1)[0]
return result + end
2017-04-07 15:43:46 +01:00
def raw_title(self):
return self.title(with_type=False)
def title(self, titles=None, with_type=True):
start = self.type + ': ' if with_type else ''
2018-08-20 13:33:18 +01:00
titles = XanaLink.get_all_titles()
from_link = self.title_from_link(titles=titles)
if from_link:
return start + from_link
2018-08-20 13:33:18 +01:00
first_line = first_non_empty_line(self.text)
if first_line:
return start + first_line
return start + self.hashid
2018-08-20 13:33:18 +01:00
2017-04-07 15:43:46 +01:00
configure_mappers()