Make mediawiki API calls via OAuth
The API had a timeout problem. Maybe this fixes it.
This commit is contained in:
parent
b1f402e1f9
commit
e85cefbc2f
|
@ -1,8 +1,7 @@
|
||||||
"""Interface with the mediawiki API."""
|
"""Interface with the mediawiki API."""
|
||||||
|
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
from . import wikidata_oauth
|
||||||
import requests
|
|
||||||
|
|
||||||
wiki_hostname = "en.wikipedia.org"
|
wiki_hostname = "en.wikipedia.org"
|
||||||
wiki_api_php = f"https://{wiki_hostname}/w/api.php"
|
wiki_api_php = f"https://{wiki_hostname}/w/api.php"
|
||||||
|
@ -21,16 +20,14 @@ def parse_page(enwiki: str) -> dict[str, Any]:
|
||||||
"disabletoc": 1,
|
"disabletoc": 1,
|
||||||
}
|
}
|
||||||
|
|
||||||
parse: dict[str, Any] = get(params)["parse"]
|
parse: dict[str, Any] = call(params)["parse"]
|
||||||
return parse
|
return parse
|
||||||
|
|
||||||
|
|
||||||
def get(params: dict[str, str | int]) -> dict[str, Any]:
|
def call(params: dict[str, str | int]) -> dict[str, Any]:
|
||||||
"""Make GET request to mediawiki API."""
|
"""Make GET request to mediawiki API."""
|
||||||
data: dict[str, Any] = requests.get(
|
data: dict[str, Any] = wikidata_oauth.api_post_request(params)
|
||||||
wiki_api_php, headers={"User-Agent": user_agent}, params=params
|
return data.json()
|
||||||
).json()
|
|
||||||
return data
|
|
||||||
|
|
||||||
|
|
||||||
def get_content(title: str) -> str:
|
def get_content(title: str) -> str:
|
||||||
|
@ -43,6 +40,6 @@ def get_content(title: str) -> str:
|
||||||
"rvprop": "content|timestamp",
|
"rvprop": "content|timestamp",
|
||||||
"titles": title,
|
"titles": title,
|
||||||
}
|
}
|
||||||
data = get(params)
|
data = call(params)
|
||||||
rev: str = data["query"]["pages"][0]["revisions"][0]["content"]
|
rev: str = data["query"]["pages"][0]["revisions"][0]["content"]
|
||||||
return rev
|
return rev
|
||||||
|
|
|
@ -19,7 +19,6 @@ def get_edit_proxy() -> dict[str, str]:
|
||||||
def api_post_request(params: dict[str, str | int]):
|
def api_post_request(params: dict[str, str | int]):
|
||||||
"""HTTP Post using Oauth."""
|
"""HTTP Post using Oauth."""
|
||||||
app = current_app
|
app = current_app
|
||||||
url = "https://www.wikidata.org/w/api.php"
|
|
||||||
client_key = app.config["CLIENT_KEY"]
|
client_key = app.config["CLIENT_KEY"]
|
||||||
client_secret = app.config["CLIENT_SECRET"]
|
client_secret = app.config["CLIENT_SECRET"]
|
||||||
oauth = OAuth1Session(
|
oauth = OAuth1Session(
|
||||||
|
@ -29,12 +28,12 @@ def api_post_request(params: dict[str, str | int]):
|
||||||
resource_owner_secret=session["owner_secret"],
|
resource_owner_secret=session["owner_secret"],
|
||||||
)
|
)
|
||||||
proxies = get_edit_proxy()
|
proxies = get_edit_proxy()
|
||||||
return oauth.post(url, data=params, timeout=4, proxies=proxies)
|
return oauth.post(api_url, data=params, timeout=10, proxies=proxies)
|
||||||
|
|
||||||
|
|
||||||
def raw_request(params):
|
def raw_request(params):
|
||||||
app = current_app
|
app = current_app
|
||||||
url = "https://www.wikidata.org/w/api.php?" + urlencode(params)
|
url = api_url + "?" + urlencode(params)
|
||||||
client_key = app.config["CLIENT_KEY"]
|
client_key = app.config["CLIENT_KEY"]
|
||||||
client_secret = app.config["CLIENT_SECRET"]
|
client_secret = app.config["CLIENT_SECRET"]
|
||||||
oauth = OAuth1Session(
|
oauth = OAuth1Session(
|
||||||
|
@ -44,7 +43,7 @@ def raw_request(params):
|
||||||
resource_owner_secret=session["owner_secret"],
|
resource_owner_secret=session["owner_secret"],
|
||||||
)
|
)
|
||||||
proxies = get_edit_proxy()
|
proxies = get_edit_proxy()
|
||||||
return oauth.get(url, timeout=4, proxies=proxies)
|
return oauth.get(url, timeout=10, proxies=proxies)
|
||||||
|
|
||||||
|
|
||||||
def api_request(params):
|
def api_request(params):
|
||||||
|
|
|
@ -5,6 +5,8 @@ import flask
|
||||||
import lxml.html
|
import lxml.html
|
||||||
|
|
||||||
from . import mediawiki_api
|
from . import mediawiki_api
|
||||||
|
from pprint import pprint
|
||||||
|
from time import sleep
|
||||||
|
|
||||||
disambig_templates = [
|
disambig_templates = [
|
||||||
"Template:Disambiguation",
|
"Template:Disambiguation",
|
||||||
|
@ -75,7 +77,9 @@ def get_article_links(enwiki: str) -> list[str]:
|
||||||
redirects = defaultdict(set)
|
redirects = defaultdict(set)
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
data = mediawiki_api.get(params)
|
data = mediawiki_api.call(params)
|
||||||
|
if "query" not in data:
|
||||||
|
pprint(data)
|
||||||
pages = data["query"].pop("pages")
|
pages = data["query"].pop("pages")
|
||||||
for r in data["query"].pop("redirects"):
|
for r in data["query"].pop("redirects"):
|
||||||
redirects[r["to"]].add(r["from"])
|
redirects[r["to"]].add(r["from"])
|
||||||
|
@ -86,6 +90,7 @@ def get_article_links(enwiki: str) -> list[str]:
|
||||||
break
|
break
|
||||||
|
|
||||||
params["gplcontinue"] = data["continue"]["gplcontinue"]
|
params["gplcontinue"] = data["continue"]["gplcontinue"]
|
||||||
|
sleep(0.1)
|
||||||
|
|
||||||
for link in set(links):
|
for link in set(links):
|
||||||
if link in redirects:
|
if link in redirects:
|
||||||
|
|
Loading…
Reference in a new issue