Add User-Agent to mediawiki API calls
This commit is contained in:
parent
4e1ad4efbc
commit
5f8900a47a
48
dab_mechanic/mediawiki_api.py
Normal file
48
dab_mechanic/mediawiki_api.py
Normal file
|
@ -0,0 +1,48 @@
|
|||
"""Interface with the mediawiki API."""
|
||||
|
||||
from typing import Any
|
||||
|
||||
import requests
|
||||
|
||||
wiki_hostname = "en.wikipedia.org"
|
||||
wiki_api_php = f"https://{wiki_hostname}/w/api.php"
|
||||
user_agent = "dab-mechanic/0.1"
|
||||
|
||||
|
||||
def parse_page(enwiki: str) -> dict[str, Any]:
|
||||
"""Call mediawiki parse API for given article."""
|
||||
params: dict[str, str | int] = {
|
||||
"action": "parse",
|
||||
"format": "json",
|
||||
"formatversion": 2,
|
||||
"disableeditsection": 1,
|
||||
"page": enwiki,
|
||||
"prop": "text|links|headhtml",
|
||||
"disabletoc": 1,
|
||||
}
|
||||
|
||||
parse: dict[str, Any] = get(params)["parse"]
|
||||
return parse
|
||||
|
||||
|
||||
def get(params: dict[str, str | int]) -> dict[str, Any]:
|
||||
"""Make GET request to mediawiki API."""
|
||||
data: dict[str, Any] = requests.get(
|
||||
wiki_api_php, headers={"User-Agent": user_agent}, params=params
|
||||
).json()
|
||||
return data
|
||||
|
||||
|
||||
def get_content(title: str) -> str:
|
||||
"""Get article text."""
|
||||
params: dict[str, str | int] = {
|
||||
"action": "query",
|
||||
"format": "json",
|
||||
"formatversion": 2,
|
||||
"prop": "revisions|info",
|
||||
"rvprop": "content|timestamp",
|
||||
"titles": title,
|
||||
}
|
||||
data = get(params)
|
||||
rev: str = data["query"]["pages"][0]["revisions"][0]["content"]
|
||||
return rev
|
|
@ -3,7 +3,8 @@ from typing import Any, Iterator, Optional, TypedDict
|
|||
|
||||
import flask
|
||||
import lxml.html
|
||||
import requests
|
||||
|
||||
from . import mediawiki_api
|
||||
|
||||
disambig_templates = [
|
||||
"Template:Disambiguation",
|
||||
|
@ -67,7 +68,6 @@ def needs_disambig(link: dict[str, Any]) -> bool:
|
|||
|
||||
def get_article_links(enwiki: str) -> list[str]:
|
||||
"""Get links that appear in this article."""
|
||||
url = "https://en.wikipedia.org/w/api.php"
|
||||
|
||||
params: dict[str, str | int] = link_params(enwiki)
|
||||
links: set[str] = set()
|
||||
|
@ -75,7 +75,7 @@ def get_article_links(enwiki: str) -> list[str]:
|
|||
redirects = defaultdict(set)
|
||||
|
||||
while True:
|
||||
data = requests.get(url, params=params).json()
|
||||
data = mediawiki_api.get(params)
|
||||
pages = data["query"].pop("pages")
|
||||
for r in data["query"].pop("redirects"):
|
||||
redirects[r["to"]].add(r["from"])
|
||||
|
@ -96,28 +96,9 @@ def get_article_links(enwiki: str) -> list[str]:
|
|||
# return {link["title"] for link in r.json()["query"]["pages"][0]["links"]}
|
||||
|
||||
|
||||
def call_parse_api(enwiki: str) -> dict[str, Any]:
|
||||
"""Call mediawiki parse API for given article."""
|
||||
url = "https://en.wikipedia.org/w/api.php"
|
||||
|
||||
params: dict[str, str | int] = {
|
||||
"action": "parse",
|
||||
"format": "json",
|
||||
"formatversion": 2,
|
||||
"disableeditsection": 1,
|
||||
"page": enwiki,
|
||||
"prop": "text|links|headhtml",
|
||||
"disabletoc": 1,
|
||||
}
|
||||
|
||||
r = requests.get(url, params=params)
|
||||
parse: dict[str, Any] = r.json()["parse"]
|
||||
return parse
|
||||
|
||||
|
||||
def get_article_html(enwiki: str) -> str:
|
||||
"""Parse article wikitext and return HTML."""
|
||||
text: str = call_parse_api(enwiki)["text"]
|
||||
text: str = mediawiki_api.parse_page(enwiki)["text"]
|
||||
return text
|
||||
|
||||
|
||||
|
@ -182,7 +163,7 @@ class Article:
|
|||
|
||||
def load(self) -> None:
|
||||
"""Load parsed article HTML."""
|
||||
self.parse = call_parse_api(self.enwiki)
|
||||
self.parse = mediawiki_api.parse_page(self.enwiki)
|
||||
self.root = lxml.html.fromstring(self.parse.pop("text"))
|
||||
|
||||
def iter_links(self) -> Iterator[tuple[lxml.html.Element, str]]:
|
||||
|
|
19
web_view.py
19
web_view.py
|
@ -12,7 +12,7 @@ from requests_oauthlib import OAuth1Session
|
|||
from werkzeug.debug.tbtools import get_current_traceback
|
||||
from werkzeug.wrappers import Response
|
||||
|
||||
from dab_mechanic import wikidata_oauth, wikipedia
|
||||
from dab_mechanic import mediawiki_api, wikidata_oauth, wikipedia
|
||||
|
||||
app = flask.Flask(__name__)
|
||||
app.config.from_object("config.default")
|
||||
|
@ -47,21 +47,6 @@ def exception_handler(e):
|
|||
)
|
||||
|
||||
|
||||
def get_content(title: str) -> str:
|
||||
"""Get article text."""
|
||||
params: dict[str, str | int] = {
|
||||
"action": "query",
|
||||
"format": "json",
|
||||
"formatversion": 2,
|
||||
"prop": "revisions|info",
|
||||
"rvprop": "content|timestamp",
|
||||
"titles": title,
|
||||
}
|
||||
data = requests.get(wiki_api_php, params=params).json()
|
||||
rev: str = data["query"]["pages"][0]["revisions"][0]["content"]
|
||||
return rev
|
||||
|
||||
|
||||
def parse_articles_with_dab_links(root: lxml.html.Element) -> list[tuple[str, int]]:
|
||||
"""Parse Articles With Multiple Dablinks."""
|
||||
articles = []
|
||||
|
@ -127,7 +112,7 @@ def save(enwiki: str) -> Response | str:
|
|||
|
||||
edit_summary = f"Disambiguate {titles} using [[User:Edward/Dab mechanic]]"
|
||||
|
||||
article_text = apply_edits(get_content(enwiki), edits)
|
||||
article_text = apply_edits(mediawiki_api.get_content(enwiki), edits)
|
||||
|
||||
return flask.render_template(
|
||||
"save.html",
|
||||
|
|
Loading…
Reference in a new issue