diff --git a/dab_mechanic/mediawiki_api.py b/dab_mechanic/mediawiki_api.py index 0196207..26d7a20 100644 --- a/dab_mechanic/mediawiki_api.py +++ b/dab_mechanic/mediawiki_api.py @@ -1,8 +1,7 @@ """Interface with the mediawiki API.""" from typing import Any - -import requests +from . import wikidata_oauth wiki_hostname = "en.wikipedia.org" wiki_api_php = f"https://{wiki_hostname}/w/api.php" @@ -21,16 +20,14 @@ def parse_page(enwiki: str) -> dict[str, Any]: "disabletoc": 1, } - parse: dict[str, Any] = get(params)["parse"] + parse: dict[str, Any] = call(params)["parse"] return parse -def get(params: dict[str, str | int]) -> dict[str, Any]: +def call(params: dict[str, str | int]) -> dict[str, Any]: """Make GET request to mediawiki API.""" - data: dict[str, Any] = requests.get( - wiki_api_php, headers={"User-Agent": user_agent}, params=params - ).json() - return data + data: dict[str, Any] = wikidata_oauth.api_post_request(params) + return data.json() def get_content(title: str) -> str: @@ -43,6 +40,6 @@ def get_content(title: str) -> str: "rvprop": "content|timestamp", "titles": title, } - data = get(params) + data = call(params) rev: str = data["query"]["pages"][0]["revisions"][0]["content"] return rev diff --git a/dab_mechanic/wikidata_oauth.py b/dab_mechanic/wikidata_oauth.py index dca0707..5af0976 100644 --- a/dab_mechanic/wikidata_oauth.py +++ b/dab_mechanic/wikidata_oauth.py @@ -19,7 +19,6 @@ def get_edit_proxy() -> dict[str, str]: def api_post_request(params: dict[str, str | int]): """HTTP Post using Oauth.""" app = current_app - url = "https://www.wikidata.org/w/api.php" client_key = app.config["CLIENT_KEY"] client_secret = app.config["CLIENT_SECRET"] oauth = OAuth1Session( @@ -29,12 +28,12 @@ def api_post_request(params: dict[str, str | int]): resource_owner_secret=session["owner_secret"], ) proxies = get_edit_proxy() - return oauth.post(url, data=params, timeout=4, proxies=proxies) + return oauth.post(api_url, data=params, timeout=10, proxies=proxies) def raw_request(params): app = current_app - url = "https://www.wikidata.org/w/api.php?" + urlencode(params) + url = api_url + "?" + urlencode(params) client_key = app.config["CLIENT_KEY"] client_secret = app.config["CLIENT_SECRET"] oauth = OAuth1Session( @@ -44,7 +43,7 @@ def raw_request(params): resource_owner_secret=session["owner_secret"], ) proxies = get_edit_proxy() - return oauth.get(url, timeout=4, proxies=proxies) + return oauth.get(url, timeout=10, proxies=proxies) def api_request(params): diff --git a/dab_mechanic/wikipedia.py b/dab_mechanic/wikipedia.py index cf9510f..57c03c4 100644 --- a/dab_mechanic/wikipedia.py +++ b/dab_mechanic/wikipedia.py @@ -5,6 +5,8 @@ import flask import lxml.html from . import mediawiki_api +from pprint import pprint +from time import sleep disambig_templates = [ "Template:Disambiguation", @@ -75,7 +77,9 @@ def get_article_links(enwiki: str) -> list[str]: redirects = defaultdict(set) while True: - data = mediawiki_api.get(params) + data = mediawiki_api.call(params) + if "query" not in data: + pprint(data) pages = data["query"].pop("pages") for r in data["query"].pop("redirects"): redirects[r["to"]].add(r["from"]) @@ -86,6 +90,7 @@ def get_article_links(enwiki: str) -> list[str]: break params["gplcontinue"] = data["continue"]["gplcontinue"] + sleep(0.1) for link in set(links): if link in redirects: