Make mediawiki API calls via OAuth
The API had a timeout problem. Maybe this fixes it.
This commit is contained in:
parent
b1f402e1f9
commit
e85cefbc2f
|
@ -1,8 +1,7 @@
|
|||
"""Interface with the mediawiki API."""
|
||||
|
||||
from typing import Any
|
||||
|
||||
import requests
|
||||
from . import wikidata_oauth
|
||||
|
||||
wiki_hostname = "en.wikipedia.org"
|
||||
wiki_api_php = f"https://{wiki_hostname}/w/api.php"
|
||||
|
@ -21,16 +20,14 @@ def parse_page(enwiki: str) -> dict[str, Any]:
|
|||
"disabletoc": 1,
|
||||
}
|
||||
|
||||
parse: dict[str, Any] = get(params)["parse"]
|
||||
parse: dict[str, Any] = call(params)["parse"]
|
||||
return parse
|
||||
|
||||
|
||||
def get(params: dict[str, str | int]) -> dict[str, Any]:
|
||||
def call(params: dict[str, str | int]) -> dict[str, Any]:
|
||||
"""Make GET request to mediawiki API."""
|
||||
data: dict[str, Any] = requests.get(
|
||||
wiki_api_php, headers={"User-Agent": user_agent}, params=params
|
||||
).json()
|
||||
return data
|
||||
data: dict[str, Any] = wikidata_oauth.api_post_request(params)
|
||||
return data.json()
|
||||
|
||||
|
||||
def get_content(title: str) -> str:
|
||||
|
@ -43,6 +40,6 @@ def get_content(title: str) -> str:
|
|||
"rvprop": "content|timestamp",
|
||||
"titles": title,
|
||||
}
|
||||
data = get(params)
|
||||
data = call(params)
|
||||
rev: str = data["query"]["pages"][0]["revisions"][0]["content"]
|
||||
return rev
|
||||
|
|
|
@ -19,7 +19,6 @@ def get_edit_proxy() -> dict[str, str]:
|
|||
def api_post_request(params: dict[str, str | int]):
|
||||
"""HTTP Post using Oauth."""
|
||||
app = current_app
|
||||
url = "https://www.wikidata.org/w/api.php"
|
||||
client_key = app.config["CLIENT_KEY"]
|
||||
client_secret = app.config["CLIENT_SECRET"]
|
||||
oauth = OAuth1Session(
|
||||
|
@ -29,12 +28,12 @@ def api_post_request(params: dict[str, str | int]):
|
|||
resource_owner_secret=session["owner_secret"],
|
||||
)
|
||||
proxies = get_edit_proxy()
|
||||
return oauth.post(url, data=params, timeout=4, proxies=proxies)
|
||||
return oauth.post(api_url, data=params, timeout=10, proxies=proxies)
|
||||
|
||||
|
||||
def raw_request(params):
|
||||
app = current_app
|
||||
url = "https://www.wikidata.org/w/api.php?" + urlencode(params)
|
||||
url = api_url + "?" + urlencode(params)
|
||||
client_key = app.config["CLIENT_KEY"]
|
||||
client_secret = app.config["CLIENT_SECRET"]
|
||||
oauth = OAuth1Session(
|
||||
|
@ -44,7 +43,7 @@ def raw_request(params):
|
|||
resource_owner_secret=session["owner_secret"],
|
||||
)
|
||||
proxies = get_edit_proxy()
|
||||
return oauth.get(url, timeout=4, proxies=proxies)
|
||||
return oauth.get(url, timeout=10, proxies=proxies)
|
||||
|
||||
|
||||
def api_request(params):
|
||||
|
|
|
@ -5,6 +5,8 @@ import flask
|
|||
import lxml.html
|
||||
|
||||
from . import mediawiki_api
|
||||
from pprint import pprint
|
||||
from time import sleep
|
||||
|
||||
disambig_templates = [
|
||||
"Template:Disambiguation",
|
||||
|
@ -75,7 +77,9 @@ def get_article_links(enwiki: str) -> list[str]:
|
|||
redirects = defaultdict(set)
|
||||
|
||||
while True:
|
||||
data = mediawiki_api.get(params)
|
||||
data = mediawiki_api.call(params)
|
||||
if "query" not in data:
|
||||
pprint(data)
|
||||
pages = data["query"].pop("pages")
|
||||
for r in data["query"].pop("redirects"):
|
||||
redirects[r["to"]].add(r["from"])
|
||||
|
@ -86,6 +90,7 @@ def get_article_links(enwiki: str) -> list[str]:
|
|||
break
|
||||
|
||||
params["gplcontinue"] = data["continue"]["gplcontinue"]
|
||||
sleep(0.1)
|
||||
|
||||
for link in set(links):
|
||||
if link in redirects:
|
||||
|
|
Loading…
Reference in a new issue