Initial commit

2023-10-04 12:56:21 +01:00 · 2023-10-04 12:56:21 +01:00 · f07b407e7a
commit f07b407e7a
25 changed files with 2383 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,4 @@
+__pycache__
+.mypy_cache/
+node_modules
+package-lock.json
--- a/add_front_end_libraries.py
+++ b/add_front_end_libraries.py
@ -0,0 +1,22 @@
+#!/usr/bin/python3
+
+import os
+import shutil
+import subprocess
+
+STATIC_DIR = "static"
+
+assert os.path.exists("package.json") and os.path.exists("node_modules")
+
+if not os.path.exists(STATIC_DIR):
+    os.mkdir(STATIC_DIR)
+
+shutil.copytree(
+    "node_modules/bootstrap/dist/",
+    os.path.join(STATIC_DIR, "bootstrap"),
+    dirs_exist_ok=True,
+)
+
+subprocess.run(["npm", "run", "build"], check=True)
+
+shutil.copy("dist/add_links.es.js", "static")
--- a/add_links/init.py
+++ b/add_links/init.py
--- a/add_links/api.py
+++ b/add_links/api.py
@ -0,0 +1,284 @@
+import re
+from typing import Any
+
+import requests
+from requests.adapters import HTTPAdapter
+from simplejson.scanner import JSONDecodeError
+
+from .language import get_current_language
+from .util import is_disambig
+
+ua = (
+    "find-link/2.2 "
+    + "(https://github.com/EdwardBetts/find_link; contact: edward@4angle.com)"
+)
+re_disambig = re.compile(r"^(.*) \((.*)\)$")
+
+
+def get_query_url() -> str:
+    """Get the wikipedia query API for the current language."""
+    return f"https://{get_current_language()}.wikipedia.org/w/api.php"
+
+
+sessions = {}
+
+
+def get_session():
+    lang = get_current_language()
+    if lang in sessions:
+        return sessions[lang]
+    s = requests.Session()
+    s.headers = {"User-Agent": ua}
+    s.mount("https://en.wikipedia.org", HTTPAdapter(max_retries=10))
+    s.params = {
+        "format": "json",
+        "action": "query",
+        "formatversion": 2,
+    }
+    sessions[lang] = s
+    return s
+
+
+class MediawikiError(Exception):
+    pass
+
+
+class MultipleRedirects(Exception):
+    pass
+
+
+class IncompleteReply(Exception):
+    pass
+
+
+class MissingPage(Exception):
+    pass
+
+
+def check_for_error(json_data):
+    if "error" in json_data:
+        raise MediawikiError(json_data["error"]["info"])
+
+
+webpage_error = (
+    "Our servers are currently under maintenance or experiencing a technical problem."
+)
+
+
+def api_get(params: dict[str, Any]) -> dict[str, Any]:
+    """Make call to Wikipedia API."""
+    s = get_session()
+
+    r = s.get(get_query_url(), params=params)
+    try:
+        ret = r.json()
+    except JSONDecodeError:
+        if webpage_error in r.text:
+            raise MediawikiError(webpage_error)
+        else:
+            raise MediawikiError("unknown error")
+    check_for_error(ret)
+    return ret
+
+
+def get_first_page(params: dict[str, str]) -> dict[str, Any]:
+    """Run Wikipedia API query and return the first page."""
+    page = api_get(params)["query"]["pages"][0]
+    if page.get("missing"):
+        raise MissingPage
+    return page
+
+
+def random_article_list(limit=50):
+    params = {
+        "list": "random",
+        "rnnamespace": "0",
+        "rnlimit": limit,
+    }
+
+    return api_get(params)["query"]["random"]
+
+
+def wiki_search(q):
+    m = re_disambig.match(q)
+    if m:
+        search = '"{}" AND "{}"'.format(*m.groups())
+    else:
+        search = '"{}"'.format(q)
+
+    params = {
+        "list": "search",
+        "srwhat": "text",
+        "srlimit": 50,
+        "srsearch": search,
+        "continue": "",
+    }
+    ret = api_get(params)
+    query = ret["query"]
+    totalhits = query["searchinfo"]["totalhits"]
+    results = query["search"]
+    for _ in range(10):
+        if "continue" not in ret:
+            break
+        params["sroffset"] = ret["continue"]["sroffset"]
+        ret = api_get(params)
+        results += ret["query"]["search"]
+    return (totalhits, results)
+
+
+def get_wiki_info(q):
+    params = {
+        "prop": "info",
+        "redirects": "",
+        "titles": q,
+    }
+    ret = api_get(params)["query"]
+    if "interwiki" in ret:
+        return None
+    redirects = []
+    if ret.get("redirects"):
+        redirects = ret["redirects"]
+        if len(redirects) != 1:
+            # multiple redirects, we should explain to the user that this is
+            # unsupported
+            raise MultipleRedirects
+    if ret["pages"][0].get("missing"):
+        raise MissingPage(q)
+    return redirects[0]["to"] if redirects else None
+
+
+def cat_start(q: str) -> list[str]:
+    """Find categories that start with this prefix."""
+    params = {
+        "list": "allpages",
+        "apnamespace": 14,  # categories
+        "apfilterredir": "nonredirects",
+        "aplimit": 500,
+        "apprefix": q,
+    }
+    ret = api_get(params)["query"]
+    return [i["title"] for i in ret["allpages"] if i["title"] != q]
+
+
+def all_pages(q: str) -> list[str]:
+    """Get all article titles with a given prefix."""
+    params = {
+        "list": "allpages",
+        "apnamespace": 0,
+        "apfilterredir": "nonredirects",
+        "aplimit": 500,
+        "apprefix": q,
+    }
+    ret = api_get(params)["query"]
+    return [i["title"] for i in ret["allpages"] if i["title"] != q]
+
+
+def categorymembers(q: str) -> list[str]:
+    """List of category members."""
+    params = {
+        "list": "categorymembers",
+        "cmnamespace": 0,
+        "cmlimit": 500,
+        "cmtitle": q[0].upper() + q[1:],
+    }
+    ret = api_get(params)["query"]
+    return [i["title"] for i in ret["categorymembers"] if i["title"] != q]
+
+
+def page_links(titles):  # unused
+    titles = list(titles)
+    assert titles
+    params = {
+        "prop": "links",
+        "pllimit": 500,
+        "plnamespace": 0,
+        "titles": "|".join(titles),
+    }
+    ret = api_get(params)["query"]
+    return dict(
+        (doc["title"], {l["title"] for l in doc["links"]})
+        for doc in ret["pages"].values()
+        if "links" in doc
+    )
+
+
+def find_disambig(titles: list[str]) -> list[str]:
+    """Find disambiguation articles in the given list of titles."""
+    titles = list(titles)
+    assert titles
+    pos = 0
+    disambig: list[str] = []
+    params = {
+        "prop": "templates",
+        "tllimit": 500,
+        "tlnamespace": 10,  # templates
+        "continue": "",
+    }
+    while pos < len(titles):
+        params["titles"] = "|".join(titles[pos : pos + 50])
+        ret = api_get(params)
+        disambig.extend(
+            doc["title"] for doc in ret["query"]["pages"] if is_disambig(doc)
+        )
+        for i in range(10):
+            if "continue" not in ret:
+                break
+            tlcontinue = ret["continue"]["tlcontinue"]
+            params["titles"] = "|".join(titles[pos : pos + 50])
+            params["tlcontinue"] = tlcontinue
+            ret = api_get(params)
+            disambig.extend(
+                doc["title"] for doc in ret["query"]["pages"] if is_disambig(doc)
+            )
+        pos += 50
+
+    return disambig
+
+
+def wiki_redirects(q):  # pages that link here
+    params = {
+        "list": "backlinks",
+        "blfilterredir": "redirects",
+        "bllimit": 500,
+        "blnamespace": 0,
+        "bltitle": q,
+    }
+    docs = api_get(params)["query"]["backlinks"]
+    assert all("redirect" in doc for doc in docs)
+    return (doc["title"] for doc in docs)
+
+
+def wiki_backlink(q: str) -> tuple[set[str], set[str]]:
+    """Get backlinks for article."""
+    params = {
+        "list": "backlinks",
+        "bllimit": 500,
+        "blnamespace": 0,
+        "bltitle": q,
+        "continue": "",
+    }
+    ret = api_get(params)
+    docs = ret["query"]["backlinks"]
+    while "continue" in ret:
+        params["blcontinue"] = ret["continue"]["blcontinue"]
+        ret = api_get(params)
+        docs += ret["query"]["backlinks"]
+
+    articles = {doc["title"] for doc in docs if "redirect" not in doc}
+    redirects = {doc["title"] for doc in docs if "redirect" in doc}
+    return (articles, redirects)
+
+
+def call_get_diff(title, section_num, section_text):
+    data = {
+        "prop": "revisions",
+        "rvprop": "timestamp",
+        "titles": title,
+        "rvsection": section_num,
+        "rvdifftotext": section_text.strip(),
+    }
+
+    s = get_session()
+    ret = s.post(get_query_url(), data=data).json()
+    check_for_error(ret)
+    return ret["query"]["pages"][0]["revisions"][0]["diff"]["body"]
--- a/add_links/core.py
+++ b/add_links/core.py
@ -0,0 +1,198 @@
+"""Core functions."""
+
+import html
+import re
+import typing
+from pprint import pprint
+
+from .api import (
+    MediawikiError,
+    all_pages,
+    cat_start,
+    categorymembers,
+    find_disambig,
+    get_first_page,
+    wiki_backlink,
+    wiki_search,
+)
+from .util import case_flip_first, norm
+
+re_redirect = re.compile(r"#REDIRECT \[\[(.)([^#]*?)(#.*)?\]\]")
+
+
+def get_content_and_timestamp(title: str) -> tuple[str, str]:
+    """Get article content and timestamp of last update."""
+    params = {
+        "prop": "revisions|info",
+        "rvprop": "content|timestamp",
+        "titles": title,
+    }
+    json_data: dict[str, typing.Any] = get_first_page(params)
+    if json_data.get("invalid"):
+        raise MediawikiError(json_data["invalidreason"])
+    rev = json_data["revisions"][0]
+    return (rev["content"], rev["timestamp"])
+
+
+def get_revision_info(title: str) -> dict[str, typing.Any]:
+    """Get info about latest revision of article."""
+    params = {
+        "prop": "revisions|info",
+        "rvprop": "content|timestamp|ids",
+        "titles": title,
+    }
+    json_data: dict[str, typing.Any] = get_first_page(params)
+    if json_data.get("invalid"):
+        raise MediawikiError(json_data["invalidreason"])
+    revs = json_data.pop("revisions")
+    ret = revs[0]
+    ret["pageid"] = json_data["pageid"]
+    pprint(json_data)
+    return typing.cast(dict[str, typing.Any], ret)
+
+
+def is_redirect_to(title_from: str, title_to: str) -> bool:
+    title_from = title_from.replace("_", " ")
+    params = {"prop": "info", "titles": title_from}
+    if "redirect" not in get_first_page(params):
+        return False
+
+    params = {"prop": "revisions", "rvprop": "content", "titles": title_from}
+    page_text = get_first_page(params)["revisions"][0]["content"]
+    m = re_redirect.match(page_text)
+    assert m
+    title_to = title_to[0].upper() + title_to[1:]
+    return m.group(1).upper() + m.group(2) == title_to
+
+
+def find_longer(
+    q: str, search: list[dict[str, typing.Any]], articles: set[str]
+) -> list[str]:
+    """Find other articles with titles that are longer."""
+    this_title = q[0].upper() + q[1:]
+    longer: list[str] = all_pages(this_title)
+    lq = q.lower()
+    for doc in search:
+        lt = doc["title"].lower()
+        if lq == lt or lq not in lt:
+            continue
+        articles.add(doc["title"])
+        more_articles, more_redirects = wiki_backlink(doc["title"])
+        articles.update(more_articles)
+        if doc["title"] not in longer:
+            longer.append(doc["title"])
+
+    return longer
+
+
+def tidy_snippet(snippet: str) -> str:
+    """Remove HTML from snippet."""
+    snippet = snippet.replace("\u2013", "-")
+    snippet = snippet.replace("</span>", "")
+    snippet = snippet.replace('<span class="searchmatch">', "")
+    return html.unescape(snippet)
+
+
+def match_type(q: str, snippet: str) -> str | None:
+    """Discover match type, ''exact', 'case_mismatch' or None.
+
+    >>> match_type('foo', 'foo')
+    'exact'
+    >>> match_type('foo', 'bar') is None
+    True
+    >>> match_type('bar', 'foo bar baz')
+    'exact'
+    >>> match_type('clean coal technology', 'foo clean coal technologies baz')
+    'exact'
+    >>> match_type('bar', 'foo Bar baz')
+    'exact'
+    >>> match_type('bar', 'foo BAR baz')
+    'case_mismatch'
+    >>> match_type('foo-bar', 'aa foo-bar cc')
+    'exact'
+    >>> match_type(u'foo\u2013bar', 'aa foo-bar cc')
+    'exact'
+    """
+    q = q.replace("\u2013", "-")
+    snippet = tidy_snippet(snippet)
+
+    if q in snippet or case_flip_first(q) in snippet:
+        return "exact"
+    match = None
+    if q.lower() in snippet.lower():
+        match = "case_mismatch"
+    if match != "exact" and q.endswith("y"):
+        if q[:-1] in snippet or case_flip_first(q[:-1]) in snippet:
+            return "exact"
+    elif match is None:
+        if q[:-1].lower() in snippet.lower():
+            match = "case_mismatch"
+    return match
+
+
+def do_search(
+    q: str, redirect_to: str | None = None
+) -> dict[str, int | list[dict[str, typing.Any]] | list[str] | None]:
+    this_title = q[0].upper() + q[1:]
+
+    totalhits, search_hits = wiki_search(q)
+    articles, redirects = wiki_backlink(redirect_to or q)
+    cm = set()
+    start = cat_start(q)
+    if len(start) > 5:
+        start = []  # big categories take too long
+    for cat in set(["Category:" + this_title] + start):
+        cm.update(categorymembers(cat))
+
+    norm_q = norm(q)
+    norm_match_redirect = {r for r in redirects if norm(r) == norm_q}
+    longer_redirect = {r for r in redirects if q.lower() in r.lower()}
+
+    articles.add(this_title)
+    if redirect_to:
+        articles.add(redirect_to[0].upper() + redirect_to[1:])
+
+    longer_redirect = {r for r in redirects if q.lower() in r.lower()}
+    for r in norm_match_redirect | longer_redirect:
+        articles.add(r)
+        a2, r2 = wiki_backlink(r)
+        articles.update(a2)
+        redirects.update(r2)
+
+    longer = find_longer(q, search_hits, articles) if len(q) > 6 else None
+
+    search: list[dict[str, typing.Any]] = [
+        doc
+        for doc in search_hits
+        if doc["title"] not in articles and doc["title"] not in cm
+    ]
+    if search:
+        disambig = set(find_disambig([doc["title"] for doc in search]))
+        search = [doc for doc in search if doc["title"] not in disambig]
+        # and (doc['title'] not in links or this_title not in links[doc['title']])]
+        for doc in search:
+            without_markup = (
+                doc["snippet"]
+                .replace("<span class='searchmatch'>", "")
+                .replace("</span>", "")
+                .replace("  ", " ")
+            )
+            doc["match"] = match_type(q, without_markup)
+            doc["snippet_without_markup"] = without_markup
+    return {
+        "totalhits": totalhits,
+        "results": search,
+        "longer": longer,
+    }
+
+
+def get_case_from_content(title: str) -> str | None:
+    """Check article content to find the case of the article title."""
+    content, timestamp = get_content_and_timestamp(title)
+    if title == title.lower() and title in content:
+        return title
+    start = content.lower().find("'''" + title.replace("_", " ").lower() + "'''")
+    if start != -1:
+        return content[start + 3 : start + 3 + len(title)]
+
+    return None  # article doesn't contain the title
--- a/add_links/language.py
+++ b/add_links/language.py
@ -0,0 +1,146 @@
+from flask import session, has_request_context
+
+langs = [
+    ('af', 'Afrikaans', 'Afrikaans'),
+    ('als', 'Alemannisch', 'Alemannic'),
+    ('am', 'አማርኛ', 'Amharic'),
+    ('an', 'aragonés', 'Aragonese'),
+    ('ar', 'العربية', 'Arabic'),
+    ('arz', 'مصرى', 'Egyptian Arabic'),
+    ('ast', 'asturianu', 'Asturian'),
+    ('az', 'azərbaycanca', 'Azerbaijani'),
+    ('azb', 'تۆرکجه', 'Southern Azerbaijani'),
+    ('ba', 'башҡортса', 'Bashkir'),
+    ('bar', 'Boarisch', 'Bavarian'),
+    ('bat-smg', 'žemaitėška', 'Samogitian'),
+    ('be', 'беларуская', 'Belarusian'),
+    ('be-tarask', 'беларуская (тарашкевіца)', 'Belarusian (Taraškievica)'),
+    ('bg', 'български', 'Bulgarian'),
+    ('bn', 'বাংলা', 'Bengali'),
+    ('bpy', 'বিষ্ণুপ্রিয়া মণিপুরী', 'Bishnupriya Manipuri'),
+    ('br', 'brezhoneg', 'Breton'),
+    ('bs', 'bosanski', 'Bosnian'),
+    ('bug', 'ᨅᨔ ᨕᨘᨁᨗ', 'Buginese'),
+    ('ca', 'català', 'Catalan'),
+    ('ce', 'нохчийн', 'Chechen'),
+    ('ceb', 'Cebuano', 'Cebuano'),
+    ('ckb', 'کوردیی ناوەندی', 'Kurdish (Sorani)'),
+    ('cs', 'čeština', 'Czech'),
+    ('cv', 'Чӑвашла', 'Chuvash'),
+    ('cy', 'Cymraeg', 'Welsh'),
+    ('da', 'dansk', 'Danish'),
+    ('de', 'Deutsch', 'German'),
+    ('el', 'Ελληνικά', 'Greek'),
+    ('en', 'English', 'English'),
+    ('eo', 'Esperanto', 'Esperanto'),
+    ('es', 'español', 'Spanish'),
+    ('et', 'eesti', 'Estonian'),
+    ('eu', 'euskara', 'Basque'),
+    ('fa', 'فارسی', 'Persian'),
+    ('fi', 'suomi', 'Finnish'),
+    ('fo', 'føroyskt', 'Faroese'),
+    ('fr', 'français', 'French'),
+    ('fy', 'Frysk', 'West Frisian'),
+    ('ga', 'Gaeilge', 'Irish'),
+    ('gd', 'Gàidhlig', 'Scottish Gaelic'),
+    ('gl', 'galego', 'Galician'),
+    ('gu', 'ગુજરાતી', 'Gujarati'),
+    ('he', 'עברית', 'Hebrew'),
+    ('hi', 'हिन्दी', 'Hindi'),
+    ('hr', 'hrvatski', 'Croatian'),
+    ('hsb', 'hornjoserbsce', 'Upper Sorbian'),
+    ('ht', 'Kreyòl ayisyen', 'Haitian'),
+    ('hu', 'magyar', 'Hungarian'),
+    ('hy', 'Հայերեն', 'Armenian'),
+    ('ia', 'interlingua', 'Interlingua'),
+    ('id', 'Bahasa Indonesia', 'Indonesian'),
+    ('io', 'Ido', 'Ido'),
+    ('is', 'íslenska', 'Icelandic'),
+    ('it', 'italiano', 'Italian'),
+    ('ja', '日本語', 'Japanese'),
+    ('jv', 'Basa Jawa', 'Javanese'),
+    ('ka', 'ქართული', 'Georgian'),
+    ('kk', 'қазақша', 'Kazakh'),
+    ('kn', 'ಕನ್ನಡ', 'Kannada'),
+    ('ko', '한국어', 'Korean'),
+    ('ku', 'Kurdî', 'Kurdish (Kurmanji)'),
+    ('ky', 'Кыргызча', 'Kirghiz'),
+    ('la', 'Latina', 'Latin'),
+    ('lb', 'Lëtzebuergesch', 'Luxembourgish'),
+    ('li', 'Limburgs', 'Limburgish'),
+    ('lmo', 'lumbaart', 'Lombard'),
+    ('lt', 'lietuvių', 'Lithuanian'),
+    ('lv', 'latviešu', 'Latvian'),
+    ('map-bms', 'Basa Banyumasan', 'Banyumasan'),
+    ('mg', 'Malagasy', 'Malagasy'),
+    ('min', 'Baso Minangkabau', 'Minangkabau'),
+    ('mk', 'македонски', 'Macedonian'),
+    ('ml', 'മലയാളം', 'Malayalam'),
+    ('mn', 'монгол', 'Mongolian'),
+    ('mr', 'मराठी', 'Marathi'),
+    ('mrj', 'кырык мары', 'Hill Mari'),
+    ('ms', 'Bahasa Melayu', 'Malay'),
+    ('my', 'မြန်မာဘာသာ', 'Burmese'),
+    ('mzn', 'مازِرونی', 'Mazandarani'),
+    ('nah', 'Nāhuatl', 'Nahuatl'),
+    ('nap', 'Napulitano', 'Neapolitan'),
+    ('nds', 'Plattdüütsch', 'Low Saxon'),
+    ('ne', 'नेपाली', 'Nepali'),
+    ('new', 'नेपाल भाषा', 'Newar'),
+    ('nl', 'Nederlands', 'Dutch'),
+    ('nn', 'norsk nynorsk', 'Norwegian (Nynorsk)'),
+    ('no', 'norsk bokmål', 'Norwegian (Bokmål)'),
+    ('oc', 'occitan', 'Occitan'),
+    ('or', 'ଓଡ଼ିଆ', 'Oriya'),
+    ('os', 'Ирон', 'Ossetian'),
+    ('pa', 'ਪੰਜਾਬੀ', 'Eastern Punjabi'),
+    ('pl', 'polski', 'Polish'),
+    ('pms', 'Piemontèis', 'Piedmontese'),
+    ('pnb', 'پنجابی', 'Western Punjabi'),
+    ('pt', 'português', 'Portuguese'),
+    ('qu', 'Runa Simi', 'Quechua'),
+    ('ro', 'română', 'Romanian'),
+    ('ru', 'русский', 'Russian'),
+    ('sa', 'संस्कृतम्', 'Sanskrit'),
+    ('sah', 'саха тыла', 'Sakha'),
+    ('scn', 'sicilianu', 'Sicilian'),
+    ('sco', 'Scots', 'Scots'),
+    ('sh', 'srpskohrvatski / српскохрватски', 'Serbo-Croatian'),
+    ('si', 'සිංහල', 'Sinhalese'),
+    ('simple', 'Simple English', 'Simple English'),
+    ('sk', 'slovenčina', 'Slovak'),
+    ('sl', 'slovenščina', 'Slovenian'),
+    ('sq', 'shqip', 'Albanian'),
+    ('sr', 'српски / srpski', 'Serbian'),
+    ('su', 'Basa Sunda', 'Sundanese'),
+    ('sv', 'svenska', 'Swedish'),
+    ('sw', 'Kiswahili', 'Swahili'),
+    ('ta', 'தமிழ்', 'Tamil'),
+    ('te', 'తెలుగు', 'Telugu'),
+    ('tg', 'тоҷикӣ', 'Tajik'),
+    ('th', 'ไทย', 'Thai'),
+    ('tl', 'Tagalog', 'Tagalog'),
+    ('tr', 'Türkçe', 'Turkish'),
+    ('tt', 'татарча/tatarça', 'Tatar'),
+    ('uk', 'українська', 'Ukrainian'),
+    ('ur', 'اردو', 'Urdu'),
+    ('uz', 'oʻzbekcha/ўзбекча', 'Uzbek'),
+    ('vec', 'vèneto', 'Venetian'),
+    ('vi', 'Tiếng Việt', 'Vietnamese'),
+    ('vo', 'Volapük', 'Volapük'),
+    ('wa', 'walon', 'Walloon'),
+    ('war', 'Winaray', 'Waray'),
+    ('yi', 'ייִדיש', 'Yiddish'),
+    ('yo', 'Yorùbá', 'Yoruba'),
+    ('zh', '中文', 'Chinese'),
+    ('zh-min-nan', 'Bân-lâm-gú', 'Min Nan'),
+    ('zh-yue', '粵語', 'Cantonese'),
+]
+
+def get_langs() -> list[dict[str, str]]:
+    """List of all known languages."""
+    return [dict(zip(('code', 'local', 'english'), l)) for l in langs]
+
+def get_current_language() -> str:
+    """Return ISO-3166 language code for the current language."""
+    return session.get('current_lang', 'en') if has_request_context() else 'en'
--- a/add_links/match.py
+++ b/add_links/match.py
@ -0,0 +1,381 @@
+from __future__ import unicode_literals
+
+import re
+import typing
+
+from .api import MissingPage, call_get_diff, get_wiki_info
+from .core import get_case_from_content, get_content_and_timestamp, get_revision_info
+from .util import is_title_case, lc_alpha
+
+re_link_in_text = re.compile(r"\[\[[^]]+?\]\]", re.I | re.S)
+
+
+class LinkReplace(Exception):
+    pass
+
+
+en_dash = "\u2013"
+trans = {",": ",?", " ": " *[-\n]? *"}
+trans[en_dash] = trans[" "]
+
+trans2 = {" ": r"('?s?\]\])?'?s? ?(\[\[(?:.+\|)?)?", "-": "[- ]"}
+trans2[en_dash] = trans2[" "]
+
+patterns = [
+    lambda q: re.compile(
+        r"(?<!-)(?:\[\[(?:[^]]+\|)?)?(%s)%s(?:\]\])?"
+        % (
+            re.escape(q[0]),
+            "".join("-?" + (trans2[c] if c in trans2 else re.escape(c)) for c in q[1:]),
+        ),
+        re.I,
+    ),
+    lambda q: re.compile(
+        r"(?<!-)\[\[[^|]+\|(%s)%s\]\]" % (re.escape(q[0]), re.escape(q[1:])), re.I
+    ),
+    lambda q: re.compile(
+        r"(?<!-)\[\[[^|]+\|(%s)%s(?:\]\])?"
+        % (
+            re.escape(q[0]),
+            "".join("-?" + (trans2[c] if c in trans2 else re.escape(c)) for c in q[1:]),
+        ),
+        re.I,
+    ),
+    lambda q: re.compile(r"(?<!-)(%s)%s" % (re.escape(q[0]), re.escape(q[1:])), re.I),
+    lambda q: re.compile(
+        r"(?<!-)(%s)%s"
+        % (
+            re.escape(q[0]),
+            "".join((trans[c] if c in trans else re.escape(c)) for c in q[1:]),
+        ),
+        re.I,
+    ),
+]
+
+
+class NoMatch(Exception):
+    pass
+
+
+re_cite = re.compile(
+    r"<ref( [^>]*?)?>\s*({{cite.*?}}|\[https?://[^]]*?\])\s*</ref>", re.I | re.S
+)
+
+
+def parse_cite(text: str) -> typing.Iterator[tuple[str, str]]:
+    """Parse a citation template."""
+    prev = 0
+    for m in re_cite.finditer(text):
+        yield ("text", text[prev : m.start()])
+        yield ("cite", m.group(0))
+        prev = m.end()
+    yield ("text", text[prev:])
+
+
+re_heading = re.compile(r"^\s*(=+)\s*(.+)\s*\1(<!--.*-->|\s)*$")
+
+
+def section_iter(text: str) -> typing.Iterator[tuple[str | None, str]]:
+    """Iterate sections yielding tuples of heading and section text."""
+    cur_section = ""
+    heading = None
+    in_comment = False
+    for line in text.splitlines(True):
+        if "<!--" in line:
+            in_comment = True
+        if "-->" in line:
+            in_comment = False
+        m = re_heading.match(line)
+        if in_comment or not m:
+            cur_section += line
+            continue
+        if cur_section or heading:
+            yield (heading, cur_section)
+        heading = m.group()
+        cur_section = ""
+        continue
+    yield (heading, cur_section)
+
+
+def get_subsections(text: str, section_num: int) -> str:
+    "retrieve the text of subsections for a given section number within an article"
+    found = ""
+    collection_level = None
+    for num, (heading, body) in enumerate(section_iter(text)):
+        if heading is None:
+            level = 0
+        else:
+            m = re_heading.match(heading)
+            assert m
+            level = len(m.group(1))
+        if num == section_num:
+            collection_level = level
+            continue
+        if collection_level:
+            if level > collection_level:
+                assert heading
+                found += heading + body
+            else:
+                break
+    return found
+
+
+def match_found(m, q, linkto):
+    if q[1:] == m.group(0)[1:]:
+        replacement = m.group(1) + q[1:]
+    elif any(c.isupper() for c in q[1:]) or m.group(0) == m.group(0).upper():
+        replacement = q
+    elif is_title_case(m.group(0)):
+        replacement = None
+        replacement = get_case_from_content(q)
+        if replacement is None:
+            replacement = q.lower()
+    else:
+        replacement = m.group(1) + q[1:]
+    assert replacement
+    if linkto:
+        if linkto[0].isupper() and replacement[0] == linkto[0].lower():
+            linkto = linkto[0].lower() + linkto[1:]
+        elif replacement[0].isupper():
+            linkto = linkto[0].upper() + linkto[1:]
+        replacement = linkto + "|" + replacement
+    return replacement
+
+
+def parse_links(text: str) -> typing.Iterator[tuple[str, str]]:
+    prev = 0
+    for m in re_link_in_text.finditer(text):
+        if prev != m.start():
+            yield ("text", text[prev : m.start()])
+        if any(
+            m.group().lower().startswith("[[" + prefix)
+            for prefix in ("file:", "image:")
+        ):
+            yield ("image", m.group(0))
+        else:
+            yield ("link", m.group(0))
+        prev = m.end()
+    if prev < len(text):
+        yield ("text", text[prev:])
+
+
+def mk_link_matcher(q):
+    re_links = [p(q) for p in patterns]
+
+    def search_for_link(text):
+        for re_link in re_links:
+            m = re_link.search(text)
+            if m and m.group(0).count("[[") < 4:
+                return m
+
+    return search_for_link
+
+
+def add_link(m, replacement, text):
+    return m.re.sub(lambda m: "[[%s]]" % replacement, text, count=1)
+
+
+def find_link_in_chunk(q, content, linkto=None):
+    search_for_link = mk_link_matcher(q)
+    new_content = ""
+    replacement = None
+
+    match_in_non_link = False
+    bad_link_match = False
+    found_text_to_link = None
+
+    for token_type, text in parse_links(content):
+        if token_type == "text":
+            if search_for_link(text):
+                match_in_non_link = True
+        elif token_type == "image":
+            before, sep, link_text = text[:-2].rpartition("|")
+            m = search_for_link(link_text)
+            if m:
+                found_text_to_link = m.group(0)
+                replacement = match_found(m, q, linkto)
+                text = before + sep + add_link(m, replacement, link_text) + "]]"
+        elif token_type == "link" and not replacement and not match_in_non_link:
+            link_text = text[2:-2]
+            link_dest = None
+            if "|" in link_text:
+                link_dest, link_text = link_text.split("|", 1)
+            m = search_for_link(link_text)
+            if m and (not link_dest or not link_dest.startswith("#")):
+                lc_alpha_q = lc_alpha(q)
+
+                bad_link_match = (
+                    link_dest
+                    and len(link_dest) > len(q)
+                    and (lc_alpha_q not in lc_alpha(link_dest))
+                )
+                if not link_dest:
+                    if q in link_text and len(link_text) > len(q):
+                        bad_link_match = True
+                if bad_link_match and link_dest:
+                    try:
+                        link_dest_redirect = get_wiki_info(link_dest)
+                    except MissingPage:
+                        link_dest_redirect = None
+                    if (
+                        link_dest_redirect
+                        and lc_alpha(link_dest_redirect) == lc_alpha_q
+                    ):
+                        bad_link_match = False
+                if not bad_link_match:
+                    replacement = match_found(m, q, linkto)
+                    found_text_to_link = m.group(0)
+                    text = add_link(m, replacement, link_text)
+        new_content += text
+    if not replacement:
+        if bad_link_match:
+            raise LinkReplace
+        m = search_for_link(content)
+        if m:
+            found_text_to_link = m.group(0)
+            replacement = match_found(m, q, linkto)
+            new_content = add_link(m, replacement, content)
+            if linkto:
+                m_end = m.end()
+                re_extend = re.compile(m.re.pattern + r"\w*\b", re.I)
+                m = re_extend.search(content)
+                if m and m.end() > m_end:
+                    replacement += content[m_end : m.end()]
+                    new_content = add_link(m, replacement, content)
+    return (new_content, replacement, found_text_to_link)
+
+
+def find_link_in_text(q, content):
+    (new_content, replacement) = find_link_in_chunk(q, content)
+    if replacement:
+        return (new_content, replacement)
+    raise NoMatch
+
+
+def find_link_in_content(q, content, linkto=None):
+    if linkto:
+        try:
+            return find_link_in_content(linkto, content)
+        except NoMatch:
+            pass
+    replacement = None
+    new_content = ""
+    link_replace = False
+    for header, section_text in section_iter(content):
+        if header:
+            new_content += header
+        for token_type, text in parse_cite(section_text):
+            if token_type == "text" and not replacement:
+                try:
+                    (new_text, replacement, replaced_text) = find_link_in_chunk(
+                        q, text, linkto=linkto
+                    )
+                except LinkReplace:
+                    link_replace = True
+                if replacement:
+                    text = new_text
+            new_content += text
+    if replacement:
+        return (new_content, replacement, replaced_text)
+    raise LinkReplace if link_replace else NoMatch
+
+
+def find_link_and_section(q, content, linkto=None):
+    if linkto:
+        try:
+            return find_link_and_section(linkto, content)
+        except NoMatch:
+            pass
+    sections = list(section_iter(content))
+    replacement = None
+
+    search_for_link = mk_link_matcher(q)
+
+    found: dict[str, str | int] = {}
+
+    for section_num, (header, section_text) in enumerate(sections):
+        new_content = ""
+        if header:
+            new_content += header
+        for token_type, text in parse_cite(section_text):
+            if token_type == "text" and not replacement:
+                new_text = ""
+                for token_type2, text2 in parse_links(text):
+                    if token_type2 == "link" and not replacement:
+                        link_text = text2[2:-2]
+                        if "|" in link_text:
+                            link_dest, link_text = link_text.split("|", 1)
+                        else:
+                            link_dest = None
+                        m = search_for_link(link_text)
+                        if m:
+                            if link_dest:
+                                found["link_dest"] = link_dest
+                            found["link_text"] = link_text
+                            replacement = match_found(m, q, None)
+                            text2 = add_link(m, replacement, link_text)
+                    new_text += text2
+                if replacement:
+                    text = new_text
+                else:
+                    m = search_for_link(text)
+                    if m:
+                        replacement = match_found(m, q, linkto)
+                        text = add_link(m, replacement, text)
+            new_content += text
+        if replacement:
+            found.update(
+                {
+                    "section_num": section_num,
+                    "section_text": new_content,
+                    "old_text": (header or "") + section_text,
+                    "replacement": replacement,
+                }
+            )
+            return found
+    raise NoMatch
+
+
+def find_refs(text: str) -> list[str]:
+    """Find <ref> in wikitext."""
+
+    refs = re.findall("<ref(?:[^>]*)>(.+?)</ref>", text)
+    print(refs)
+    return refs
+
+
+def new_link_is_in_ref(replacement: str, text: str) -> bool:
+    """Is the new link in a <ref>."""
+    link = f"[[{replacement}]]"
+    return any(link in ref for ref in find_refs(text))
+
+
+def get_match(q: str, title: str, linkto: str | None) -> dict[str, typing.Any]:
+    """Get match."""
+    rev = get_revision_info(title)
+
+    found: dict[str, typing.Any] = find_link_and_section(q, rev["content"], linkto)
+
+    assert not new_link_is_in_ref(found["replacement"], found["section_text"])
+
+    found["revid"] = rev["revid"]
+    found["pageid"] = rev["pageid"]
+    found["section_text"] += get_subsections(rev["content"], found["section_num"])
+
+    return found
+
+
+def get_diff(q: str, title: str, linkto: str | None) -> dict[str, typing.Any]:
+    """Get diff."""
+    content, timestamp = get_content_and_timestamp(title)
+    found: dict[str, typing.Any] = find_link_and_section(q, content, linkto)
+
+    if new_link_is_in_ref(found["replacement"], found["section_text"]):
+        raise NoMatch
+
+    section_text = found["section_text"] + get_subsections(
+        content, found["section_num"]
+    )
+
+    found["diff"] = call_get_diff(title, found["section_num"], section_text)
+    return found
--- a/add_links/mediawiki_api.py
+++ b/add_links/mediawiki_api.py
@ -0,0 +1,101 @@
+"""Interface with the mediawiki API."""
+
+import typing
+from pprint import pprint
+from typing import Any, cast
+
+from . import wikidata_oauth
+
+wiki_hostname = "en.wikipedia.org"
+wiki_api_php = f"https://{wiki_hostname}/w/api.php"
+user_agent = "add-links/0.1"
+
+
+def parse_page(enwiki: str) -> dict[str, Any]:
+    """Call mediawiki parse API for given article."""
+    params: dict[str, str | int] = {
+        "action": "parse",
+        "format": "json",
+        "formatversion": 2,
+        "disableeditsection": 1,
+        "page": enwiki,
+        "prop": "text|links|headhtml",
+        "disabletoc": 1,
+    }
+
+    parse: dict[str, Any] = call(params)["parse"]
+    return parse
+
+
+def call(params: dict[str, str | int]) -> dict[str, typing.Any]:
+    """Make GET request to mediawiki API."""
+    data = wikidata_oauth.api_post_request(params)
+    return cast(dict[str, Any], data.json())
+
+
+def article_exists(title: str) -> bool:
+    """Get article text."""
+    params: dict[str, str | int] = {
+        "action": "query",
+        "format": "json",
+        "formatversion": 2,
+        "titles": title,
+    }
+    return not call(params)["query"]["pages"][0].get("missing")
+
+
+def get_content(title: str) -> tuple[str, int]:
+    """Get article text."""
+    params: dict[str, str | int] = {
+        "action": "query",
+        "format": "json",
+        "formatversion": 2,
+        "prop": "revisions|info",
+        "rvprop": "content|timestamp|ids",
+        "titles": title,
+    }
+    data = call(params)
+    rev = data["query"]["pages"][0]["revisions"][0]
+    content: str = rev["content"]
+    revid: int = int(rev["revid"])
+    return content, revid
+
+
+def compare(title: str, new_text: str) -> str:
+    """Generate a diff for the new article text."""
+    params: dict[str, str | int] = {
+        "format": "json",
+        "formatversion": 2,
+        "action": "compare",
+        "fromtitle": title,
+        "toslots": "main",
+        "totext-main": new_text,
+        "prop": "diff",
+    }
+    diff: str = call(params)["compare"]["body"]
+    return diff
+
+
+def edit_page(
+    pageid: int, section: str | int, text: str, summary: str, baserevid: str, token: str
+) -> str:
+    """Edit a page on Wikipedia."""
+    params: dict[str, str | int] = {
+        "format": "json",
+        "formatversion": 2,
+        "action": "edit",
+        "pageid": pageid,
+        "text": text,
+        "baserevid": baserevid,
+        "token": token,
+        "nocreate": 1,
+        "summary": summary,
+        "section": section,
+    }
+    ret = call(params)
+    if "edit" not in ret:
+        print("params")
+        pprint(params)
+        print()
+        pprint(ret)
+    return typing.cast(str, ret["edit"])
--- a/add_links/mediawiki_api_old.py
+++ b/add_links/mediawiki_api_old.py
@ -0,0 +1,48 @@
+"""Interface with the mediawiki API."""
+
+from typing import Any
+
+import requests
+
+wiki_hostname = "en.wikipedia.org"
+wiki_api_php = f"https://{wiki_hostname}/w/api.php"
+user_agent = "dab-mechanic/0.1"
+
+
+def parse_page(enwiki: str) -> dict[str, Any]:
+    """Call mediawiki parse API for given article."""
+    params: dict[str, str | int] = {
+        "action": "parse",
+        "format": "json",
+        "formatversion": 2,
+        "disableeditsection": 1,
+        "page": enwiki,
+        "prop": "text|links|headhtml",
+        "disabletoc": 1,
+    }
+
+    parse: dict[str, Any] = get(params)["parse"]
+    return parse
+
+
+def get(params: dict[str, str | int]) -> dict[str, Any]:
+    """Make GET request to mediawiki API."""
+    data: dict[str, Any] = requests.get(
+        wiki_api_php, headers={"User-Agent": user_agent}, params=params
+    ).json()
+    return data
+
+
+def get_content(title: str) -> str:
+    """Get article text."""
+    params: dict[str, str | int] = {
+        "action": "query",
+        "format": "json",
+        "formatversion": 2,
+        "prop": "revisions|info",
+        "rvprop": "content|timestamp",
+        "titles": title,
+    }
+    data = get(params)
+    rev: str = data["query"]["pages"][0]["revisions"][0]["content"]
+    return rev
--- a/add_links/util.py
+++ b/add_links/util.py
@ -0,0 +1,115 @@
+"""Util functions."""
+
+import re
+import urllib
+from typing import Any
+
+# util functions that don't access the network
+
+namespaces = {
+    ns.casefold()
+    for ns in (
+        "Special",
+        "Media",
+        "Talk",
+        "Template",
+        "Portal",
+        "Portal talk",
+        "Book",
+        "Book talk",
+        "Template talk",
+        "Draft",
+        "Draft talk",
+        "Help",
+        "Help talk",
+        "Category",
+        "Category talk",
+        "User",
+        "Gadget",
+        "Gadget talk",
+        "Gadget definition",
+        "Gadget definition talk",
+        "Topic",
+        "User talk",
+        "Wikipedia",
+        "Education Program",
+        "Education Program talk",
+        "Wikipedia talk",
+        "File",
+        "File talk",
+        "TimedText",
+        "TimedText talk",
+        "MediaWiki",
+        "Module",
+        "Module talk",
+        "MediaWiki talk",
+    )
+}
+
+re_space_or_dash = re.compile("[ -]")
+
+
+def is_title_case(phrase: str) -> bool:
+    """Is a given phrase is in Title Case."""
+    return all(
+        term[0].isupper() and term[1:].islower()
+        for term in re_space_or_dash.split(phrase)
+        if term and term[0].isalpha()
+    )
+
+
+def urlquote(value: str) -> str:
+    """Prepare string for use in URL param."""
+    return urllib.parse.quote_plus(value.encode("utf-8"))
+
+
+def strip_parens(q: str) -> str:
+    """Remove a word in parenthesis from the end of a string."""
+    m = re.search(r" \(.*?\)$", q)
+    return q[: m.start()] if m else q
+
+
+def starts_with_namespace(title: str) -> bool:
+    """Check if a title starts with a namespace."""
+    return ":" in title and title.split(":", 1)[0].casefold() in namespaces
+
+
+def is_disambig(doc: dict[str, Any]) -> bool:
+    """Is a this a disambiguation page."""
+    return any(
+        "disambig" in t
+        or t.endswith("dis")
+        or "given name" in t
+        or t == "template:surname"
+        for t in (t["title"].lower() for t in doc.get("templates", []))
+    )
+
+
+def norm(s: str) -> str:
+    """Normalise string."""
+    s = re.sub(r"\W", "", s).lower()
+    return s[:-1] if s and s[-1] == "s" else s
+
+
+def case_flip(s: str) -> str:
+    """Switch case of character."""
+    if s.islower():
+        return s.upper()
+    if s.isupper():
+        return s.lower()
+    return s
+
+
+def case_flip_first(s: str) -> str:
+    """Switch case of first character in string."""
+    return case_flip(s[0]) + s[1:]
+
+
+def lc_alpha(s: str) -> str:
+    """Lower case alphabetic characters in string."""
+    return "".join(c.lower() for c in s if c.isalpha())
+
+
+def wiki_space_norm(s: str) -> str:
+    """Normalise article title."""
+    return s.replace("_", " ").strip()
--- a/add_links/wikidata_oauth.py
+++ b/add_links/wikidata_oauth.py
@ -0,0 +1,98 @@
+import typing
+import urllib
+from typing import cast
+
+from flask import current_app, session
+from requests_oauthlib import OAuth1Session
+
+wiki_hostname = "en.wikipedia.org"
+api_url = f"https://{wiki_hostname}/w/api.php"
+
+
+def get_edit_proxy() -> dict[str, str]:
+    """Retrieve proxy information from config."""
+    edit_proxy = current_app.config.get("EDIT_PROXY")
+    if edit_proxy:
+        return {"http": edit_proxy, "https": edit_proxy}
+    else:
+        return {}
+
+
+def api_post_request(params: dict[str, str | int]):
+    """HTTP Post using Oauth."""
+    app = current_app
+    # url = "https://www.wikidata.org/w/api.php"
+    client_key = app.config["CLIENT_KEY"]
+    client_secret = app.config["CLIENT_SECRET"]
+    oauth = OAuth1Session(
+        client_key,
+        client_secret=client_secret,
+        resource_owner_key=session["owner_key"],
+        resource_owner_secret=session["owner_secret"],
+    )
+    proxies = get_edit_proxy()
+    return oauth.post(api_url, data=params, timeout=4, proxies=proxies)
+
+
+def raw_request(params: typing.Mapping[str, str | int]):
+    """Low-level API request."""
+    app = current_app
+    # url = "https://www.wikidata.org/w/api.php?" + urlencode(params)
+    client_key = app.config["CLIENT_KEY"]
+    client_secret = app.config["CLIENT_SECRET"]
+    oauth = OAuth1Session(
+        client_key,
+        client_secret=client_secret,
+        resource_owner_key=session["owner_key"],
+        resource_owner_secret=session["owner_secret"],
+    )
+    proxies = get_edit_proxy()
+    return oauth.get(
+        api_url + "?" + urllib.parse.urlencode(params), timeout=4, proxies=proxies
+    )
+
+
+def api_request(params: typing.Mapping[str, str | int]) -> dict[str, typing.Any]:
+    """Make an API request with OAuth."""
+    r = raw_request(params)
+    try:
+        return cast(dict[str, typing.Any], r.json())
+    except Exception:
+        print("text")
+        print(r.text)
+        print("---")
+        raise
+
+
+def get_token() -> str:
+    """Get CSRF tokebn from MediaWiki API."""
+    params: dict[str, str | int] = {
+        "action": "query",
+        "meta": "tokens",
+        "format": "json",
+        "formatversion": 2,
+    }
+    reply = api_request(params)
+    token: str = reply["query"]["tokens"]["csrftoken"]
+
+    return token
+
+
+def userinfo_call() -> typing.Mapping[str, typing.Any]:
+    """Request user information via OAuth."""
+    params = {"action": "query", "meta": "userinfo", "format": "json"}
+    return api_request(params)
+
+
+def get_username() -> None | str:
+    """Get the username or None if not logged in."""
+    if "owner_key" not in session:
+        return None  # not authorized
+
+    if "username" not in session:
+        reply = userinfo_call()
+        if "query" not in reply:
+            return None
+        session["username"] = reply["query"]["userinfo"]["name"]
+
+    return cast(str, session["username"])
--- a/add_links/wikipedia.py
+++ b/add_links/wikipedia.py
@ -0,0 +1,201 @@
+from collections import defaultdict
+from typing import Any, Iterator, Optional, TypedDict
+
+import flask
+import lxml.html
+
+from . import mediawiki_api
+
+disambig_templates = [
+    "Template:Disambiguation",
+    "Template:Airport disambiguation",
+    "Template:Biology disambiguation",
+    "Template:Call sign disambiguation",
+    "Template:Caselaw disambiguation",
+    "Template:Chinese title disambiguation",
+    "Template:Disambiguation cleanup",
+    "Template:Genus disambiguation",
+    "Template:Hospital disambiguation",
+    "Template:Human name disambiguation",
+    "Template:Human name disambiguation cleanup",
+    "Template:Letter-number combination disambiguation",
+    "Template:Mathematical disambiguation",
+    "Template:Military unit disambiguation",
+    "Template:Music disambiguation",
+    "Template:Number disambiguation",
+    "Template:Opus number disambiguation",
+    "Template:Phonetics disambiguation",
+    "Template:Place name disambiguation",
+    "Template:Portal disambiguation",
+    "Template:Road disambiguation",
+    "Template:School disambiguation",
+    "Template:Species Latin name abbreviation disambiguation",
+    "Template:Species Latin name disambiguation",
+    "Template:Station disambiguation",
+    "Template:Synagogue disambiguation",
+    "Template:Taxonomic authority disambiguation",
+    "Template:Taxonomy disambiguation",
+    "Template:Template disambiguation",
+    "Template:WoO number disambiguation",
+]
+
+
+def link_params(enwiki: str) -> dict[str, str | int]:
+    """Parameters for finding article links from the API."""
+    params: dict[str, str | int] = {
+        "action": "query",
+        "format": "json",
+        "formatversion": 2,
+        "titles": enwiki,
+        "generator": "links",
+        "gpllimit": "max",
+        "gplnamespace": 0,
+        "tllimit": "max",
+        "redirects": 1,
+        "tlnamespace": 10,
+        "tltemplates": "|".join(disambig_templates),
+        "prop": "templates",
+    }
+    return params
+
+
+def needs_disambig(link: dict[str, Any]) -> bool:
+    """Is this a disambiguation link."""
+    return bool(
+        not link["title"].endswith(" (disambiguation)") and link.get("templates")
+    )
+
+
+def get_article_links(enwiki: str) -> list[str]:
+    """Get links that appear in this article."""
+
+    params: dict[str, str | int] = link_params(enwiki)
+    links: set[str] = set()
+
+    redirects = defaultdict(set)
+
+    while True:
+        data = mediawiki_api.get(params)
+        pages = data["query"].pop("pages")
+        for r in data["query"].pop("redirects"):
+            redirects[r["to"]].add(r["from"])
+
+        links.update(page["title"] for page in pages if needs_disambig(page))
+
+        if "continue" not in data:
+            break
+
+        params["gplcontinue"] = data["continue"]["gplcontinue"]
+
+    for link in set(links):
+        if link in redirects:
+            links.update(redirects[link])
+
+    return list(links)
+
+    # return {link["title"] for link in r.json()["query"]["pages"][0]["links"]}
+
+
+def get_article_html(enwiki: str) -> str:
+    """Parse article wikitext and return HTML."""
+    text: str = mediawiki_api.parse_page(enwiki)["text"]
+    return text
+
+
+class DabItem(TypedDict):
+    """Represent a disabiguation page."""
+
+    num: int
+    title: str
+    html: str
+
+
+def delete_toc(root: lxml.html.HtmlElement) -> None:
+    """Delete table of contents from article HTML."""
+    for toc in root.findall(".//div[@class='toc']"):
+        toc.getparent().remove(toc)
+
+
+def get_dab_html(dab_num: int, title: str) -> str:
+    """Parse dab page and rewrite links."""
+    dab_html = get_article_html(title)
+    root = lxml.html.fromstring(dab_html)
+    delete_toc(root)
+
+    element_id_map = {e.get("id"): e for e in root.findall(".//*[@id]")}
+
+    for a in root.findall(".//a[@href]"):
+        href: str | None = a.get("href")
+        if not href:
+            continue
+        if not href.startswith("#"):
+            a.set("href", "#")
+            a.set("onclick", f"return select_dab(this, {dab_num})")
+            continue
+
+        destination_element = element_id_map[href[1:]]
+        assert destination_element is not None
+        destination_element.set("id", f"{dab_num}{href[1:]}")
+        a.set("href", f"#{dab_num}{href[1:]}")
+
+    html: str = lxml.html.tostring(root, encoding=str)
+    return html
+
+
+class Article:
+    """Current article we're working on."""
+
+    def __init__(self, enwiki: str) -> None:
+        """Make a new Article object."""
+        self.enwiki = enwiki.replace("_", " ")
+
+        self.links = get_article_links(enwiki)
+
+        self.dab_list: list[DabItem] = []
+        self.dab_lookup: dict[int, str] = {}
+        self.dab_order: list[str] = []
+        self.parse: Optional[dict[str, Any]] = None
+
+    def save_endpoint(self) -> str:
+        """Endpoint for saving changes."""
+        href: str = flask.url_for("save", enwiki=self.enwiki.replace(" ", "_"))
+        return href
+
+    def load(self) -> None:
+        """Load parsed article HTML."""
+        self.parse = mediawiki_api.parse_page(self.enwiki)
+        self.root = lxml.html.fromstring(self.parse.pop("text"))
+
+    def iter_links(self) -> Iterator[tuple[lxml.html.Element, str]]:
+        """Disambiguation links that need fixing."""
+        seen = set()
+        for a in self.root.findall(".//a[@href]"):
+            title = a.get("title")
+            if title is None or title not in self.links:
+                continue
+            a.set("class", "disambig")
+
+            if title in seen:
+                continue
+            seen.add(title)
+
+            yield a, title
+
+    def process_links(self) -> None:
+        """Process links in parsed wikitext."""
+        for dab_num, (a, title) in enumerate(self.iter_links()):
+            a.set("id", f"dab-{dab_num}")
+
+            dab: DabItem = {
+                "num": dab_num,
+                "title": title,
+                "html": get_dab_html(dab_num, title),
+            }
+            self.dab_list.append(dab)
+            self.dab_order.append(title)
+            self.dab_lookup[dab_num] = title
+
+    def get_html(self) -> str:
+        """Return the processed article HTML."""
+        html: str = lxml.html.tostring(self.root, encoding=str)
+        return html
--- a/cmdline.py
+++ b/cmdline.py
@ -0,0 +1,114 @@
+#!/usr/bin/python3
+
+import collections
+import json
+import re
+import sys
+import time
+import typing
+
+from add_links import api
+
+# from_title = sys.argv[1]
+
+re_disambig = re.compile(r"^(.*) \((.*)\)$")
+
+
+def article_title_to_search_query(title: str) -> str:
+    """Convert from article title to search query string."""
+    m = re_disambig.match(title)
+    return f'"{m.group(1)}" AND "{m.group(2)}"' if m else f'"{title}"'
+
+
+def run_search(q: str, limit: int | str = "max") -> dict[str, typing.Any]:
+    """Search Wikipedia."""
+    params = {"list": "search", "srwhat": "text", "srlimit": limit, "srsearch": q}
+    return typing.cast(dict[str, typing.Any], api.api_get(params)["query"])
+
+
+def search_no_link(q: str) -> tuple[int, list[dict[str, str | int]]]:
+    """Search for mentions of article title with no link included."""
+    query = run_search(article_title_to_search_query(q) + f' -linksto:"{q}"', "max")
+    totalhits = query["searchinfo"]["totalhits"]
+    results = query["search"]
+    return (totalhits, results)
+
+
+def search_count(q: str) -> int:
+    """How often does this article title appear in Wikipedia."""
+    query = run_search(article_title_to_search_query(q), limit=0)
+    return typing.cast(int, query["searchinfo"]["totalhits"]) - 1
+
+
+def search_count_with_link(q: str) -> int:
+    """How often does this article title appear in Wikipedia."""
+    query = run_search(article_title_to_search_query(q) + f' linksto:"{q}"', limit=0)
+    return typing.cast(int, query["searchinfo"]["totalhits"])
+
+
+def parse_contribs() -> list[tuple[str, int]]:
+    re_comment = re.compile(r"^link \[\[(.*)\]\] using")
+
+    links: collections.Counter[str] = collections.Counter()
+
+    for line in open("../wikipedia-contribs/contribs"):
+        if (
+            '"comment": "link ' not in line
+            or "using [[User:Edward/Find link|Find link]]" not in line
+        ):
+            continue
+        comment = json.loads(line)["comment"]
+
+        m = re_comment.match(comment)
+        if not m:
+            continue
+        link = m.group(1)
+
+        if "|" not in link:
+            links[link] += 1
+
+    return links.most_common(200)
+
+
+with open("examples") as f:
+    seen = {json.loads(line)["title"] for line in f}
+
+
+out = open("examples", "a")
+for from_title, num in parse_contribs():
+    if from_title in seen:
+        continue
+    count = search_count(from_title)
+    count_with_link = search_count_with_link(from_title)
+    ratio = float(count_with_link) / float(count)
+
+    print(from_title, count, count_with_link, f"{ratio:.1%}")
+    print(
+        json.dumps(
+            {"title": from_title, "total": count, "with_links": count_with_link}
+        ),
+        file=out,
+    )
+    out.flush()
+    time.sleep(0.1)
+out.close()
+
+sys.exit(0)
+
+
+count = search_count(from_title)
+count_with_link = search_count_with_link(from_title)
+ratio = float(count_with_link) / float(count)
+
+print(count, count_with_link, f"{ratio:.1%}")
+
+sys.exit(0)
+
+totalhits, search_hits = search_no_link(from_title)
+
+for hit in search_hits:
+    print("  ", hit)
+print(count, count_with_link, f"{ratio:.1%}", totalhits, len(search_hits))
+
+# ret = core.do_search(from_title)
+# print(ret)
--- a/frontend/.eslintrc.js
+++ b/frontend/.eslintrc.js
@ -0,0 +1,23 @@
+module.exports = {
+    "env": {
+        "browser": true,
+        "es6": true
+    },
+    "extends": [
+        "plugin:vue/essential",
+        "standard"
+    ],
+    "globals": {
+        "Atomics": "readonly",
+        "SharedArrayBuffer": "readonly"
+    },
+    "parserOptions": {
+        "ecmaVersion": 14,
+        "sourceType": "module"
+    },
+    "plugins": [
+        "vue"
+    ],
+    "rules": {
+    }
+};
--- a/frontend/App.vue
+++ b/frontend/App.vue
@ -0,0 +1,55 @@
+<template>
+  Hello world: {{ title }}
+
+  <div v-for="hit in this.hits" class="mt-3">
+    <div><strong>{{ hit.title }}</strong> ({{ hit.wordcount }} words)</div>
+    <div v-html="hit.snippet"></div>
+    <table v-html="hit.diff"></table>
+    <div>replacement: {{ hit.replacement }}</div>
+  </div>
+</template>
+
+<script>
+import axios from "redaxios";
+
+export default {
+  props: {
+    title: String,
+    api_base_url: String,
+  },
+  data() {
+    return {
+      hits: [],
+    };
+  },
+  computed: {
+  },
+  watch: {
+  },
+  methods: {
+    api_call(endpoint, options) {
+      var url = `${this.api_base_url}/${endpoint}`;
+      return axios.get(url, options).catch(this.show_api_error_modal);
+    },
+    add_hit(hit) {
+      var params = { link_from: hit.title, link_to: this.title };
+      this.api_call("valid_hit", { params: params}).then((response) => {
+        if (response.data.valid) {
+          hit.diff = response.data.diff
+          hit.replacement = response.data.replacement
+          this.hits.push(hit);
+        }
+      });
+    }
+  },
+  mounted() {
+    var params = { title: this.title }
+    this.api_call("hits", { params: params}).then((response) => {
+      response.data.hits.forEach((hit) => { this.add_hit(hit) });
+    });
+  }
+};
+</script>
+
+<style>
+</style>
--- a/frontend/entry.js
+++ b/frontend/entry.js
@ -0,0 +1,7 @@
+import {createApp} from 'vue';
+import App from './App.vue';
+
+export default function(props) {
+  const app = createApp(App, props).mount('#app');
+  return app;
+}
--- a/package.json
+++ b/package.json
@ -0,0 +1,18 @@
+{
+  "name": "add-links",
+  "version": "0.0.1",
+  "scripts": {
+    "dev": "vite",
+    "build": "vite build"
+  },
+  "dependencies": {
+    "bootstrap": "^5.2.3",
+    "vue": "^3.3.4"
+  },
+  "devDependencies": {
+    "@vitejs/plugin-vue": "^4.2.3",
+    "eslint": "^8.41.0",
+    "eslint-plugin-vue": "^9.13.0",
+    "vite": "^4.3.8"
+  }
+}
--- a/templates/all_done.html
+++ b/templates/all_done.html
@ -0,0 +1,10 @@
+{% extends "base.html" %}
+
+{% block title %}Index{% endblock %}
+
+{% block content %}
+  <div class="container">
+    <h1>All done</h1>
+    <div><a href="{{ url_for('index') }}">back to index </a></div>
+  </div>
+{% endblock %}
--- a/templates/article.html
+++ b/templates/article.html
@ -0,0 +1,56 @@
+{% extends "base.html" %}
+
+{% block title %}{{ title }}{% endblock %}
+
+{% block style %}
+<style>
+
+span.exact { padding: 2px; background: green; color: white; font-weight: bold; }
+span.nomatch { padding: 2px; background: red; color: white; font-weight: bold; }
+span.case_mismatch { padding: 2px; background: orange; color: white; font-weight: bold; }
+span.searchmatch { font-weight: bold; }
+
+table.diff,td.diff-otitle,td.diff-ntitle{background-color:white}
+td.diff-otitle,td.diff-ntitle{text-align:center}
+td.diff-marker{text-align:right;font-weight:bold;font-size:1.25em}
+td.diff-lineno{font-weight:bold}
+td.diff-addedline,td.diff-deletedline,td.diff-context{font-size:88%;vertical-align:top;white-space:-moz-pre-wrap;white-space:pre-wrap}
+td.diff-addedline,td.diff-deletedline{border-style:solid;border-width:1px 1px 1px 4px;border-radius:0.33em}
+td.diff-addedline{border-color:#a3d3ff}
+td.diff-deletedline{border-color:#ffe49c}
+td.diff-context{background:#f3f3f3;color:#333333;border-style:solid;border-width:1px 1px 1px 4px;border-color:#e6e6e6;border-radius:0.33em}
+.diffchange{font-weight:bold;text-decoration:none}
+table.diff{border:none;width:98%;border-spacing:4px; table-layout:fixed}
+td.diff-addedline .diffchange,td.diff-deletedline .diffchange{border-radius:0.33em;padding:0.25em 0}
+td.diff-addedline .diffchange{background:#d8ecff}
+td.diff-deletedline .diffchange{background:#feeec8}
+table.diff td{padding:0.33em 0.66em}
+table.diff col.diff-marker{width:2%}
+table.diff col.diff-content{width:48%}
+table.diff td div{ word-wrap:break-word; overflow:auto}
+</style>
+{% endblock %}
+
+{% block content %}
+  <div class="container">
+    <h1>{{ self.title() }}</h1>
+    <form>
+      <input name="q">
+      <input type="submit" value="search">
+    </form>
+    <div id="app"></div>
+  </div>
+
+    <script type="module">
+      import main from {{ url_for('static', filename='add_links.es.js') | tojson }};
+      const props = {
+        title: {{ title | tojson }},
+        api_base_url: "/api/1"
+      }
+      main(props);
+    </script>
+
+
+
+{% endblock %}
+
--- a/templates/article2.html
+++ b/templates/article2.html
@ -0,0 +1,66 @@
+{% extends "base.html" %}
+
+{% block title %}{{ title }}{% endblock %}
+
+{% block style %}
+<style>
+
+span.exact { padding: 2px; background: green; color: white; font-weight: bold; }
+span.nomatch { padding: 2px; background: red; color: white; font-weight: bold; }
+span.case_mismatch { padding: 2px; background: orange; color: white; font-weight: bold; }
+span.searchmatch { font-weight: bold; }
+
+table.diff,td.diff-otitle,td.diff-ntitle{background-color:white}
+td.diff-otitle,td.diff-ntitle{text-align:center}
+td.diff-marker{text-align:right;font-weight:bold;font-size:1.25em}
+td.diff-lineno{font-weight:bold}
+td.diff-addedline,td.diff-deletedline,td.diff-context{font-size:88%;vertical-align:top;white-space:-moz-pre-wrap;white-space:pre-wrap}
+td.diff-addedline,td.diff-deletedline{border-style:solid;border-width:1px 1px 1px 4px;border-radius:0.33em}
+td.diff-addedline{border-color:#a3d3ff}
+td.diff-deletedline{border-color:#ffe49c}
+td.diff-context{background:#f3f3f3;color:#333333;border-style:solid;border-width:1px 1px 1px 4px;border-color:#e6e6e6;border-radius:0.33em}
+.diffchange{font-weight:bold;text-decoration:none}
+table.diff{border:none;width:98%;border-spacing:4px; table-layout:fixed}
+td.diff-addedline .diffchange,td.diff-deletedline .diffchange{border-radius:0.33em;padding:0.25em 0}
+td.diff-addedline .diffchange{background:#d8ecff}
+td.diff-deletedline .diffchange{background:#feeec8}
+table.diff td{padding:0.33em 0.66em}
+table.diff col.diff-marker{width:2%}
+table.diff col.diff-content{width:48%}
+table.diff td div{ word-wrap:break-word; overflow:auto}
+</style>
+{% endblock %}
+
+{% block content %}
+  <div class="container">
+    <h1>{{ self.title() }}</h1>
+    <form action="{{ url_for("index") }}">
+      <input name="q">
+      <input type="submit" value="search">
+    </form>
+
+    <div>Username: {{ g.user }}</div>
+
+    <div><a href="https://en.wikipedia.org/wiki/{{ title }}" target="_blank">view article</a></div>
+
+    <div><a href="{{ url_for('index') }}">back to index </a></div>
+
+    <div>total: {{ total }}</div>
+    <div>with link: {{ with_link }}</div>
+    <div>ratio: {{ "{:.1%}".format(with_link / total) }}</div>
+    <div>hit: {{ hit }}</div>
+    <div>replacement: {{ found.replacement }}</div>
+    <div>section: {{ found.section }}</div>
+    <table>
+      {{ diff | safe }}
+    </table>
+    <form method="POST">
+      <input type="hidden" name="hit" value="{{ hit.title }}">
+      <div class="my-3">
+        <input type="submit" class="btn btn-primary" value="save"/>
+        <a href="{{url_for("article_page", url_title=url_title, after=hit["title"])}}" class="btn btn-primary">skip</a>
+      </div>
+    </form>
+  </div>
+{% endblock %}
+
--- a/templates/base.html
+++ b/templates/base.html
@ -0,0 +1,22 @@
+<!doctype html>
+<html lang="en">
+<head>
+  <meta charset="utf-8">
+  <link href="{{ url_for("static", filename="bootstrap/css/bootstrap.min.css") }}" rel="stylesheet">
+  <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+
+  <title>
+    {% block title %}{% endblock %}
+  </title>
+
+  {% block style %}{% endblock %}
+</head>
+
+<body>
+  {% block content %}{% endblock %}
+
+  <script src="{{ url_for("static", filename="bootstrap/js/bootstrap.bundle.min.js")}}></script>
+
+  {% block script %}{% endblock %}
+</body>
+</html>
--- a/templates/index.html
+++ b/templates/index.html
@ -0,0 +1,25 @@
+{% extends "base.html" %}
+
+{% block title %}Index{% endblock %}
+
+{% block content %}
+  <div class="container">
+    <h1>Index</h1>
+    <form>
+      <input name="q">
+      <input type="submit" value="search">
+    </form>
+
+    <div>Username: {{ g.user }}</div>
+
+    <table class="table w-auto">
+      {% for item in examples %}
+      <tr>
+        <td><a href="{{ article_url(item.title) }}">{{ item.title }}</a></td>
+        <td>{{ item.total }}</td>
+        <td>{{ "{:.1%}".format(item.with_links / item.total) }}</td>
+      </tr>
+      {% endfor %}
+    </table>
+  </div>
+{% endblock %}
--- a/templates/save_done.html
+++ b/templates/save_done.html
@ -0,0 +1,10 @@
+{% extends "base.html" %}
+
+{% block title %}Index{% endblock %}
+
+{% block content %}
+  <div class="container">
+    <h1>Save done</h1>
+    <div>Save is complete.</div>
+  </div>
+{% endblock %}
--- a/vite.config.js
+++ b/vite.config.js
@ -0,0 +1,17 @@
+import { defineConfig } from 'vite'
+import vue from '@vitejs/plugin-vue'
+import path from 'path'
+
+export default defineConfig({
+  plugins: [vue()],
+  define: {
+    'process.env.NODE_ENV': JSON.stringify('production'),
+  },
+  build: {
+    lib: {
+      entry: path.resolve(__dirname, 'frontend/entry.js'),
+      name: 'AddLinks',
+      fileName: (format) => `add_links.${format}.js`,
+    },
+  },
+})
--- a/web_view.py
+++ b/web_view.py
@ -0,0 +1,362 @@
+#!/usr/bin/python3
+
+import html
+import itertools
+import json
+import re
+import typing
+
+import flask
+import werkzeug
+from requests_oauthlib import OAuth1Session
+from werkzeug.wrappers.response import Response
+
+from add_links import api, core, mediawiki_api, wikidata_oauth
+from add_links.match import NoMatch, get_diff, get_match
+
+app = flask.Flask(__name__)
+app.config.from_object("config.default")
+app.debug = True
+
+wiki_hostname = "en.wikipedia.org"
+wiki_api_php = f"https://{wiki_hostname}/w/api.php"
+wiki_index_php = f"https://{wiki_hostname}/w/index.php"
+
+
+class Hit(typing.TypedDict):
+    """Candidate articles."""
+
+    ns: int
+    title: str
+    pageid: int
+    size: int
+    wordcount: int
+    snippet: str
+    timestamp: str
+
+
+re_disambig = re.compile(r"^(.*) \((.*)\)$")
+
+
+def load_examples() -> list[dict[str, str | int]]:
+    """Load examples."""
+    return [json.loads(line) for line in open("examples")]
+
+
+def article_title_to_search_query(title: str) -> str:
+    """Convert from article title to search query string."""
+    m = re_disambig.match(title)
+    return f'"{m.group(1)}" AND "{m.group(2)}"' if m else f'"{title}"'
+
+
+def run_search(q: str, limit: int | str = "max") -> dict[str, typing.Any]:
+    """Search Wikipedia."""
+    params = {"list": "search", "srwhat": "text", "srlimit": limit, "srsearch": q}
+    return typing.cast(dict[str, typing.Any], api.api_get(params)["query"])
+
+
+def article_url(title: str) -> str:
+    """URL for search page."""
+    return flask.url_for("article_page", url_title=title.replace(" ", "_"))
+
+
+def search_count(q: str) -> int:
+    """How often does this article title appear in Wikipedia."""
+    query = run_search(article_title_to_search_query(q), limit=0)
+    return typing.cast(int, query["searchinfo"]["totalhits"]) - 1
+
+
+def search_count_with_link(q: str) -> int:
+    """How often does this article title appear in Wikipedia."""
+    query = run_search(article_title_to_search_query(q) + f' linksto:"{q}"', limit=0)
+    return typing.cast(int, query["searchinfo"]["totalhits"])
+
+
+def search_no_link(q: str) -> tuple[int, list[Hit]]:
+    """Search for mentions of article title with no link included."""
+    query = run_search(article_title_to_search_query(q) + f' -linksto:"{q}"', "max")
+    totalhits = query["searchinfo"]["totalhits"]
+    results = query["search"]
+    return (totalhits, results)
+
+
+@app.before_request
+def global_user() -> None:
+    """Make username available everywhere."""
+    flask.g.user = wikidata_oauth.get_username()
+
+
+@app.route("/")
+def index() -> str | Response:
+    """Index page."""
+    if "oauth_verifier" in flask.request.args and "oauth_token" in flask.request.args:
+        return flask.redirect(flask.url_for("oauth_callback", **flask.request.args))
+
+    examples = load_examples()
+    examples.sort(
+        key=lambda i: float(i["with_links"]) / float(i["total"]), reverse=True
+    )
+
+    if q := flask.request.args.get("q"):
+        if q_trimmed := q.strip():
+            return flask.redirect(article_url(q_trimmed))
+
+    return flask.render_template(
+        "index.html", examples=examples, article_url=article_url
+    )
+
+
+def case_flip(s: str) -> str:
+    """Switch case of character."""
+    if s.islower():
+        return s.upper()
+    if s.isupper():
+        return s.lower()
+    return s
+
+
+def case_flip_first(s: str) -> str:
+    """Switch case of first character in string."""
+    return case_flip(s[0]) + s[1:]
+
+
+def tidy_snippet(snippet: str) -> str:
+    """Remove HTML from snippet."""
+    snippet = snippet.replace("\u2013", "-")
+    snippet = snippet.replace("</span>", "")
+    snippet = snippet.replace('<span class="searchmatch">', "")
+    return html.unescape(snippet)
+
+
+@app.route("/oauth/start")
+def start_oauth() -> Response:
+    """Start OAuth."""
+    next_page = flask.request.args.get("next")
+    if next_page:
+        flask.session["after_login"] = next_page
+
+    client_key = app.config["CLIENT_KEY"]
+    client_secret = app.config["CLIENT_SECRET"]
+    request_token_url = wiki_index_php + "?title=Special%3aOAuth%2finitiate"
+
+    oauth = OAuth1Session(client_key, client_secret=client_secret, callback_uri="oob")
+    fetch_response = oauth.fetch_request_token(request_token_url)
+
+    flask.session["owner_key"] = fetch_response.get("oauth_token")
+    flask.session["owner_secret"] = fetch_response.get("oauth_token_secret")
+
+    base_authorization_url = f"https://{wiki_hostname}/wiki/Special:OAuth/authorize"
+    authorization_url = oauth.authorization_url(
+        base_authorization_url, oauth_consumer_key=client_key
+    )
+    return flask.redirect(authorization_url)
+
+
+@app.route("/oauth/callback", methods=["GET"])
+def oauth_callback() -> werkzeug.wrappers.response.Response:
+    """Oauth callback."""
+    client_key = app.config["CLIENT_KEY"]
+    client_secret = app.config["CLIENT_SECRET"]
+
+    oauth = OAuth1Session(
+        client_key,
+        client_secret=client_secret,
+        resource_owner_key=flask.session["owner_key"],
+        resource_owner_secret=flask.session["owner_secret"],
+    )
+
+    oauth_response = oauth.parse_authorization_response(flask.request.url)
+    verifier = oauth_response.get("oauth_verifier")
+    access_token_url = wiki_index_php + "?title=Special%3aOAuth%2ftoken"
+    oauth = OAuth1Session(
+        client_key,
+        client_secret=client_secret,
+        resource_owner_key=flask.session["owner_key"],
+        resource_owner_secret=flask.session["owner_secret"],
+        verifier=verifier,
+    )
+
+    oauth_tokens = oauth.fetch_access_token(access_token_url)
+    flask.session["owner_key"] = oauth_tokens.get("oauth_token")
+    flask.session["owner_secret"] = oauth_tokens.get("oauth_token_secret")
+
+    print("login successful")
+
+    next_page = flask.session.get("after_login")
+    return flask.redirect(next_page if next_page else flask.url_for("index"))
+
+
+@app.route("/oauth/disconnect")
+def oauth_disconnect() -> werkzeug.wrappers.response.Response:
+    """Disconnect OAuth."""
+    for key in "owner_key", "owner_secret", "username", "after_login":
+        if key in flask.session:
+            del flask.session[key]
+    return flask.redirect(flask.url_for("index"))
+
+
+def match_type(q: str, snippet: str) -> str | None:
+    """Discover match type, ''exact', 'case_mismatch' or None.
+
+    >>> match_type('foo', 'foo')
+    'exact'
+    >>> match_type('foo', 'bar') is None
+    True
+    >>> match_type('bar', 'foo bar baz')
+    'exact'
+    >>> match_type('clean coal technology', 'foo clean coal technologies baz')
+    'exact'
+    >>> match_type('bar', 'foo Bar baz')
+    'exact'
+    >>> match_type('bar', 'foo BAR baz')
+    'case_mismatch'
+    >>> match_type('foo-bar', 'aa foo-bar cc')
+    'exact'
+    >>> match_type(u'foo\u2013bar', 'aa foo-bar cc')
+    'exact'
+    """
+    q = q.replace("\u2013", "-")
+    snippet = tidy_snippet(snippet)
+
+    if q in snippet or case_flip_first(q) in snippet:
+        return "exact"
+    match = None
+    if q.lower() in snippet.lower():
+        match = "case_mismatch"
+    if match != "exact" and q.endswith("y"):
+        if q[:-1] in snippet or case_flip_first(q[:-1]) in snippet:
+            return "exact"
+    elif match is None:
+        if q[:-1].lower() in snippet.lower():
+            match = "case_mismatch"
+    return match
+
+
+class NoGoodHit(Exception):
+    pass
+
+
+def get_best_hit(title: str, hits: list[Hit]) -> tuple[Hit, dict[str, typing.Any]]:
+    """Find the best hit within the search results."""
+    for hit in hits:
+        if hit["title"].lower() == title.lower():
+            continue
+        if match_type(title, hit["snippet"]) != "exact":
+            continue
+
+        try:
+            print(f'get diff: {hit["title"]}, {title}')
+            found = get_diff(title, hit["title"], None)
+        except NoMatch:
+            print("no match")
+            continue
+
+        return (hit, found)
+
+    raise NoGoodHit
+
+
+@app.route("/<path:url_title>", methods=["GET", "POST"])
+def article_page(url_title: str) -> str | Response:
+    """Article page."""
+    from_title = url_title.replace("_", " ").strip()
+
+    if flask.request.method == "POST":
+        hit_title = flask.request.form["hit"]
+        do_save(from_title, hit_title)
+        return flask.redirect(
+            flask.url_for("article_page", url_title=url_title, after=hit_title)
+        )
+
+    total = search_count(from_title)
+    with_link = search_count_with_link(from_title)
+
+    no_link_count, hits = search_no_link(from_title)
+
+    after = flask.request.args.get("after")
+    if after:
+        print(after)
+        hits_iter = itertools.dropwhile(lambda hit: hit["title"] != after, hits)
+        skip = next(hits_iter, None)
+        if skip:
+            hits = list(hits_iter)
+
+    try:
+        hit, found = get_best_hit(from_title, hits)
+    except NoGoodHit:
+        return flask.render_template("all_done.html")
+
+    return flask.render_template(
+        "article2.html",
+        title=from_title,
+        total=total,
+        with_link=with_link,
+        hit=hit,
+        replacement=found["replacement"],
+        diff=found["diff"],
+        found=found,
+        url_title=url_title,
+    )
+
+
+def do_save(title: str, hit_title: str) -> str:
+    """Update page on Wikipedia."""
+    token = wikidata_oauth.get_token()
+
+    found = get_match(title, hit_title, None)
+
+    summary = (
+        f"link [[{found['replacement']}]] using [[:en:User:Edward/Find link|Find link]]"
+    )
+
+    edit = mediawiki_api.edit_page(
+        pageid=found["pageid"],
+        section=found["section_num"],
+        text=found["section_text"],
+        summary=summary,
+        baserevid=found["revid"],
+        token=token,
+    )
+
+    return edit
+
+
+@app.route("/saved")
+def save_done() -> str:
+    """Save complete."""
+    return flask.render_template("save_done.html")
+
+
+@app.route("/api/1/hits")
+def api_hits() -> werkzeug.wrappers.response.Response:
+    """Return canidates for the given article title."""
+    title = flask.request.args.get("title")
+    assert title
+    ret = core.do_search(title)
+    return flask.jsonify(title=title, hits=ret["results"])
+
+    # mock_hits: list[Hit] = json.load(open("sample.json"))
+    # return flask.jsonify(title=title, hits=mock_hits)
+
+
+@app.route("/api/1/valid_hit")
+def api_valid_hit() -> werkzeug.wrappers.response.Response:
+    """Return canidates for the given article title."""
+    link_from = flask.request.args.get("link_from")
+    link_to = flask.request.args.get("link_to")
+
+    try:
+        diff, replacement = get_diff(link_to, link_from, None)
+    except NoMatch:
+        return flask.jsonify(valid=False)
+
+    return flask.jsonify(valid=True, diff=diff, replacement=replacement)
+
+
+@app.route("/favicon.ico")
+def favicon() -> None:
+    flask.abort(404)
+
+
+if __name__ == "__main__":
+    app.run(host="0.0.0.0", port=8000)