Make mediawiki API calls via OAuth

The API had a timeout problem. Maybe this fixes it.
This commit is contained in:
Edward Betts 2022-08-17 20:04:43 +01:00
parent b1f402e1f9
commit e85cefbc2f
3 changed files with 15 additions and 14 deletions

View file

@ -1,8 +1,7 @@
"""Interface with the mediawiki API."""
from typing import Any
import requests
from . import wikidata_oauth
wiki_hostname = "en.wikipedia.org"
wiki_api_php = f"https://{wiki_hostname}/w/api.php"
@ -21,16 +20,14 @@ def parse_page(enwiki: str) -> dict[str, Any]:
"disabletoc": 1,
}
parse: dict[str, Any] = get(params)["parse"]
parse: dict[str, Any] = call(params)["parse"]
return parse
def get(params: dict[str, str | int]) -> dict[str, Any]:
def call(params: dict[str, str | int]) -> dict[str, Any]:
"""Make GET request to mediawiki API."""
data: dict[str, Any] = requests.get(
wiki_api_php, headers={"User-Agent": user_agent}, params=params
).json()
return data
data: dict[str, Any] = wikidata_oauth.api_post_request(params)
return data.json()
def get_content(title: str) -> str:
@ -43,6 +40,6 @@ def get_content(title: str) -> str:
"rvprop": "content|timestamp",
"titles": title,
}
data = get(params)
data = call(params)
rev: str = data["query"]["pages"][0]["revisions"][0]["content"]
return rev

View file

@ -19,7 +19,6 @@ def get_edit_proxy() -> dict[str, str]:
def api_post_request(params: dict[str, str | int]):
"""HTTP Post using Oauth."""
app = current_app
url = "https://www.wikidata.org/w/api.php"
client_key = app.config["CLIENT_KEY"]
client_secret = app.config["CLIENT_SECRET"]
oauth = OAuth1Session(
@ -29,12 +28,12 @@ def api_post_request(params: dict[str, str | int]):
resource_owner_secret=session["owner_secret"],
)
proxies = get_edit_proxy()
return oauth.post(url, data=params, timeout=4, proxies=proxies)
return oauth.post(api_url, data=params, timeout=10, proxies=proxies)
def raw_request(params):
app = current_app
url = "https://www.wikidata.org/w/api.php?" + urlencode(params)
url = api_url + "?" + urlencode(params)
client_key = app.config["CLIENT_KEY"]
client_secret = app.config["CLIENT_SECRET"]
oauth = OAuth1Session(
@ -44,7 +43,7 @@ def raw_request(params):
resource_owner_secret=session["owner_secret"],
)
proxies = get_edit_proxy()
return oauth.get(url, timeout=4, proxies=proxies)
return oauth.get(url, timeout=10, proxies=proxies)
def api_request(params):

View file

@ -5,6 +5,8 @@ import flask
import lxml.html
from . import mediawiki_api
from pprint import pprint
from time import sleep
disambig_templates = [
"Template:Disambiguation",
@ -75,7 +77,9 @@ def get_article_links(enwiki: str) -> list[str]:
redirects = defaultdict(set)
while True:
data = mediawiki_api.get(params)
data = mediawiki_api.call(params)
if "query" not in data:
pprint(data)
pages = data["query"].pop("pages")
for r in data["query"].pop("redirects"):
redirects[r["to"]].add(r["from"])
@ -86,6 +90,7 @@ def get_article_links(enwiki: str) -> list[str]:
break
params["gplcontinue"] = data["continue"]["gplcontinue"]
sleep(0.1)
for link in set(links):
if link in redirects: