google-stocks/google_stocks/__init__.py
2023-09-08 00:07:40 +01:00

117 lines
3.4 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""Look up stock index on Google."""
import decimal
import os
import re
import urllib.parse
from datetime import datetime
import lxml.html
from playwright.sync_api import Playwright, expect, sync_playwright
auth_file = os.path.expanduser("~/lib/auth/google.json")
data_loc = os.path.expanduser("~/lib/google_stocks")
attr_map = {
"52-wk high": "price_52_wk_high",
"52-wk low": "price_52_wk_low",
"Company Name": "company_name",
"High": "price_high",
"Low": "price_low",
"Open": "price_open",
"Prev close": "price_prev_close",
"day change": "day_change",
"title": "title",
"subtitle": "subtitle",
}
def data_filename(page_type: str, ext: str = "html") -> str:
"""Filename to use for saving data."""
now_str = datetime.utcnow().strftime("%Y-%m-%d_%H%M%S")
return os.path.join(data_loc, now_str + f"_{page_type}.{ext}")
class Index:
"""Stock market index."""
price_52_wk_high: decimal.Decimal
price_52_wk_low: decimal.Decimal
company_name: str
price_high: decimal.Decimal
price_low: decimal.Decimal
price_open_price: decimal.Decimal
price_prev_close: decimal.Decimal
day_change: decimal.Decimal
percent_change: decimal.Decimal
price: decimal.Decimal
subtitle: str
title: str
def __init__(self, name: str):
"""Init."""
self.name = name
with sync_playwright() as playwright:
self.run(playwright)
@property
def search_url(self) -> str:
"""Search URL."""
return "https://www.google.com/search?q=" + urllib.parse.quote_plus(self.name)
def run(self, playwright: Playwright) -> None:
"""Run playwright."""
browser = playwright.chromium.launch(headless=True)
context = browser.new_context(storage_state=auth_file)
page = context.new_page()
page.goto(self.search_url, wait_until="domcontentloaded")
expect(page.get_by_text("Market Summary")).to_be_visible()
html = page.content()
filename = data_filename("serp")
with open(filename, "w") as out:
out.write(html)
self.parse_html(html)
page.close()
context.storage_state(path=auth_file)
context.close()
browser.close()
def parse_html(self, html: str) -> None:
"""Parse HTML."""
root = lxml.html.fromstring(html)
re_percent_change = re.compile(r" *\(([0-9.]+)%\) *")
for attrid_tag in root.findall(".//*[@data-attrid]"):
attrid = attrid_tag.get("data-attrid")
if attrid not in attr_map:
continue
setattr(self, attr_map[attrid], attrid_tag.text_content())
tag = root.find('.//*[@data-attrid="Price"]')
assert tag is not None
assert tag[0] is not None and tag[1] is not None
self.price = decimal.Decimal(tag[0].text_content().replace(",", "").strip())
percent_change_str = tag[1][0].text_content().strip().replace("", "-")
self.day_change = decimal.Decimal(percent_change_str)
m = re_percent_change.match(tag[1][1].text_content())
assert m
percent_change = decimal.Decimal(m.group(1))
if percent_change_str[0] == "-":
percent_change = -percent_change
self.percent_change = percent_change
@property
def one_line(self) -> str:
"""Index name, price and price change."""
return f"{self.title}: {self.price} ({self.percent_change}%)"