Refactor add-new-conference into agenda module
This commit is contained in:
parent
2cc25cd203
commit
76360c25f3
3 changed files with 634 additions and 455 deletions
467
agenda/add_new_conference.py
Normal file
467
agenda/add_new_conference.py
Normal file
|
|
@ -0,0 +1,467 @@
|
||||||
|
"""Helpers for adding conferences to the YAML data file."""
|
||||||
|
|
||||||
|
import configparser
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
import sys
|
||||||
|
import typing
|
||||||
|
from datetime import date, datetime, time, timezone
|
||||||
|
from urllib.parse import parse_qs, urlparse
|
||||||
|
|
||||||
|
import html2text
|
||||||
|
import lxml.html # type: ignore[import-untyped]
|
||||||
|
import openai
|
||||||
|
import pycountry
|
||||||
|
import requests
|
||||||
|
import yaml
|
||||||
|
|
||||||
|
USER_AGENT = "add-new-conference/0.1"
|
||||||
|
COORDINATE_PATTERNS = (
|
||||||
|
re.compile(r"@(-?\d+(?:\.\d+)?),(-?\d+(?:\.\d+)?)"),
|
||||||
|
re.compile(r"[?&]q=(-?\d+(?:\.\d+)?),(-?\d+(?:\.\d+)?)"),
|
||||||
|
re.compile(r"[?&](?:ll|center)=(-?\d+(?:\.\d+)?),(-?\d+(?:\.\d+)?)"),
|
||||||
|
re.compile(r"!3d(-?\d+(?:\.\d+)?)!4d(-?\d+(?:\.\d+)?)"),
|
||||||
|
re.compile(r"[?&]destination=(-?\d+(?:\.\d+)?),(-?\d+(?:\.\d+)?)"),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def read_api_key() -> str:
|
||||||
|
"""Read API key from ~/.config/openai/config."""
|
||||||
|
config_path = os.path.expanduser("~/.config/openai/config")
|
||||||
|
parser = configparser.ConfigParser()
|
||||||
|
parser.read(config_path)
|
||||||
|
return parser["openai"]["api_key"]
|
||||||
|
|
||||||
|
|
||||||
|
def build_prompt(
|
||||||
|
url: str,
|
||||||
|
source_text: str,
|
||||||
|
detected_coordinates: tuple[float, float] | None,
|
||||||
|
) -> str:
|
||||||
|
"""Build prompt with embedded YAML examples."""
|
||||||
|
examples = """
|
||||||
|
- name: Geomob London
|
||||||
|
topic: Maps
|
||||||
|
location: London
|
||||||
|
country: gb
|
||||||
|
start: 2026-01-28 18:00:00+00:00
|
||||||
|
end: 2026-01-28 23:00:00+00:00
|
||||||
|
url: https://thegeomob.com/post/jan-28th-2026-geomoblon-details
|
||||||
|
venue: Geovation Hub
|
||||||
|
address: Sutton Yard, 65 Goswell Rd, London EC1V 7EN
|
||||||
|
latitude: 51.5242464
|
||||||
|
longitude: -0.0997024
|
||||||
|
free: true
|
||||||
|
going: true
|
||||||
|
hashtag: '#geomobLON'
|
||||||
|
|
||||||
|
- name: DebConf 25
|
||||||
|
topic: Debian
|
||||||
|
location: Plouzané (Breast)
|
||||||
|
country: fr
|
||||||
|
start: 2025-07-07
|
||||||
|
end: 2025-07-20
|
||||||
|
url: https://wiki.debian.org/DebConf/25
|
||||||
|
going: true
|
||||||
|
cfp_url: https://debconf25.debconf.org/talks/new/
|
||||||
|
venue: École nationale supérieure Mines-Télécom Atlantique Bretagne Pays de la Loire
|
||||||
|
campus de Brest
|
||||||
|
latitude: 48.35934
|
||||||
|
longitude: -4.569889
|
||||||
|
|
||||||
|
- name: Wikimedia Hackathon
|
||||||
|
topic: Wikimedia
|
||||||
|
location: Istanbul
|
||||||
|
country: tr
|
||||||
|
start: 2025-05-02
|
||||||
|
end: 2025-05-04
|
||||||
|
venue: Renaissance Polat Istanbul Hotel
|
||||||
|
address: Yeşilyurt, Sahil Yolu Cd. No:2, 34149 Bakırköy/İstanbul
|
||||||
|
latitude: 40.959946
|
||||||
|
longitude: 28.838763
|
||||||
|
url: https://www.mediawiki.org/wiki/Wikimedia_Hackathon_2025
|
||||||
|
going: true
|
||||||
|
free: true
|
||||||
|
hackathon: true
|
||||||
|
registered: true
|
||||||
|
"""
|
||||||
|
coordinate_note = ""
|
||||||
|
if detected_coordinates is not None:
|
||||||
|
coordinate_note = (
|
||||||
|
"\nDetected venue coordinates from a map link on the page:\n"
|
||||||
|
f"latitude: {detected_coordinates[0]}\n"
|
||||||
|
f"longitude: {detected_coordinates[1]}\n"
|
||||||
|
)
|
||||||
|
|
||||||
|
prompt = f"""
|
||||||
|
I keep a record of interesting conferences in a YAML file.
|
||||||
|
|
||||||
|
Here are some examples of the format I use:
|
||||||
|
|
||||||
|
{examples}
|
||||||
|
|
||||||
|
Now here is a new conference of interest:
|
||||||
|
|
||||||
|
Conference URL: {url}
|
||||||
|
|
||||||
|
Return the YAML representation for this conference following the
|
||||||
|
same style and keys as the examples. Only include keys if the
|
||||||
|
information is available. Do not invent details.
|
||||||
|
|
||||||
|
Important: the `country` field must always be a valid ISO 3166-1 alpha-2
|
||||||
|
country code (two lowercase letters, e.g. `ca` for Canada, `gb` for United Kingdom).
|
||||||
|
Do not output full country names.
|
||||||
|
|
||||||
|
Important: always include an `end` field. If the event is a single-day event,
|
||||||
|
the `end` can be the same date as `start`, or a same-day datetime if the page
|
||||||
|
provides an end time.
|
||||||
|
|
||||||
|
Important: if this is a Geomob event, use an `end` datetime of 22:00 local time
|
||||||
|
on the event date unless the page explicitly provides a different end time.
|
||||||
|
{coordinate_note}
|
||||||
|
|
||||||
|
Wrap your answer in a JSON object with a single key "yaml".
|
||||||
|
===
|
||||||
|
{source_text}
|
||||||
|
"""
|
||||||
|
return prompt
|
||||||
|
|
||||||
|
|
||||||
|
def get_from_open_ai(prompt: str, model: str = "gpt-5.4") -> dict[str, str]:
|
||||||
|
"""Pass prompt to OpenAI and get reply."""
|
||||||
|
client = openai.OpenAI(api_key=read_api_key())
|
||||||
|
|
||||||
|
response = client.chat.completions.create(
|
||||||
|
messages=[{"role": "user", "content": prompt}],
|
||||||
|
model=model,
|
||||||
|
response_format={"type": "json_object"},
|
||||||
|
)
|
||||||
|
|
||||||
|
reply = response.choices[0].message.content
|
||||||
|
assert isinstance(reply, str)
|
||||||
|
return typing.cast(dict[str, str], json.loads(reply))
|
||||||
|
|
||||||
|
|
||||||
|
def fetch_webpage(url: str) -> lxml.html.HtmlElement:
|
||||||
|
"""Fetch webpage HTML and parse it."""
|
||||||
|
response = requests.get(url, headers={"User-Agent": USER_AGENT})
|
||||||
|
response.raise_for_status()
|
||||||
|
return lxml.html.fromstring(response.content)
|
||||||
|
|
||||||
|
|
||||||
|
def webpage_to_text(root: lxml.html.HtmlElement) -> str:
|
||||||
|
"""Convert parsed HTML into readable text content."""
|
||||||
|
root_copy = lxml.html.fromstring(lxml.html.tostring(root))
|
||||||
|
|
||||||
|
for script_or_style in root_copy.xpath("//script|//style"):
|
||||||
|
script_or_style.drop_tree()
|
||||||
|
|
||||||
|
text_maker = html2text.HTML2Text()
|
||||||
|
text_maker.ignore_links = True
|
||||||
|
text_maker.ignore_images = True
|
||||||
|
return text_maker.handle(lxml.html.tostring(root_copy, encoding="unicode"))
|
||||||
|
|
||||||
|
|
||||||
|
def parse_osm_url(url: str) -> tuple[float, float] | None:
|
||||||
|
"""Extract latitude/longitude from an OpenStreetMap URL."""
|
||||||
|
parsed = urlparse(url)
|
||||||
|
query = parse_qs(parsed.query)
|
||||||
|
|
||||||
|
mlat = query.get("mlat")
|
||||||
|
mlon = query.get("mlon")
|
||||||
|
if mlat and mlon:
|
||||||
|
return float(mlat[0]), float(mlon[0])
|
||||||
|
|
||||||
|
if parsed.fragment.startswith("map="):
|
||||||
|
parts = parsed.fragment.split("/")
|
||||||
|
if len(parts) >= 3:
|
||||||
|
return float(parts[-2]), float(parts[-1])
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def extract_google_maps_latlon(url: str) -> tuple[float, float] | None:
|
||||||
|
"""Extract latitude/longitude from a Google Maps URL."""
|
||||||
|
for pattern in COORDINATE_PATTERNS:
|
||||||
|
match = pattern.search(url)
|
||||||
|
if match:
|
||||||
|
return float(match.group(1)), float(match.group(2))
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def latlon_from_google_maps_url(
|
||||||
|
url: str, timeout: int = 10
|
||||||
|
) -> tuple[float, float] | None:
|
||||||
|
"""Resolve a Google Maps URL and extract latitude/longitude."""
|
||||||
|
response = requests.get(
|
||||||
|
url,
|
||||||
|
allow_redirects=True,
|
||||||
|
timeout=timeout,
|
||||||
|
headers={"User-Agent": "lookup.py/1.0"},
|
||||||
|
)
|
||||||
|
response.raise_for_status()
|
||||||
|
|
||||||
|
coordinates = extract_google_maps_latlon(response.url)
|
||||||
|
if coordinates is not None:
|
||||||
|
return coordinates
|
||||||
|
|
||||||
|
return extract_google_maps_latlon(response.text)
|
||||||
|
|
||||||
|
|
||||||
|
def parse_coordinates_from_url(url: str) -> tuple[float, float] | None:
|
||||||
|
"""Extract latitude/longitude from a supported map URL."""
|
||||||
|
lower_url = url.lower()
|
||||||
|
|
||||||
|
if "openstreetmap.org" in lower_url:
|
||||||
|
return parse_osm_url(url)
|
||||||
|
|
||||||
|
if "google." in lower_url or "maps.app.goo.gl" in lower_url:
|
||||||
|
coordinates = extract_google_maps_latlon(url)
|
||||||
|
if coordinates is not None:
|
||||||
|
return coordinates
|
||||||
|
|
||||||
|
try:
|
||||||
|
return latlon_from_google_maps_url(url)
|
||||||
|
except requests.RequestException:
|
||||||
|
return None
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def detect_page_coordinates(root: lxml.html.HtmlElement) -> tuple[float, float] | None:
|
||||||
|
"""Detect venue coordinates from Google Maps or OSM links."""
|
||||||
|
for link in root.xpath("//a[@href]"):
|
||||||
|
href = str(link.get("href", "")).strip()
|
||||||
|
if not href:
|
||||||
|
continue
|
||||||
|
|
||||||
|
coordinates = parse_coordinates_from_url(href)
|
||||||
|
if coordinates is not None:
|
||||||
|
return coordinates
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def parse_date(date_str: str) -> datetime:
|
||||||
|
"""Parse ISO date or datetime into a naive datetime (UTC if tz-aware)."""
|
||||||
|
try:
|
||||||
|
dt = datetime.fromisoformat(date_str)
|
||||||
|
except ValueError:
|
||||||
|
dt = datetime.fromisoformat(date_str.split("T")[0])
|
||||||
|
|
||||||
|
if dt.tzinfo is not None:
|
||||||
|
dt = dt.astimezone(timezone.utc).replace(tzinfo=None)
|
||||||
|
|
||||||
|
return dt
|
||||||
|
|
||||||
|
|
||||||
|
def url_has_year_component(url: str) -> bool:
|
||||||
|
"""Return True if the URL contains any digit."""
|
||||||
|
return any(ch.isdigit() for ch in url)
|
||||||
|
|
||||||
|
|
||||||
|
def insert_sorted(
|
||||||
|
conferences: list[dict[str, typing.Any]], new_conf: dict[str, typing.Any]
|
||||||
|
) -> list[dict[str, typing.Any]]:
|
||||||
|
"""Insert a conference sorted by start date and skip duplicate URLs."""
|
||||||
|
new_url = new_conf.get("url")
|
||||||
|
new_start = parse_date(str(new_conf["start"]))
|
||||||
|
new_year = new_start.year
|
||||||
|
|
||||||
|
if new_url:
|
||||||
|
for conf in conferences:
|
||||||
|
if conf.get("url") == new_url:
|
||||||
|
existing_start = parse_date(str(conf["start"]))
|
||||||
|
existing_year = existing_start.year
|
||||||
|
|
||||||
|
if url_has_year_component(new_url):
|
||||||
|
print(f"⚠️ Conference with URL {new_url} already exists, skipping.")
|
||||||
|
return conferences
|
||||||
|
if existing_year == new_year:
|
||||||
|
print(
|
||||||
|
f"⚠️ Conference already exists in YAML "
|
||||||
|
f"(url={new_url}, year={existing_year}), skipping."
|
||||||
|
)
|
||||||
|
return conferences
|
||||||
|
|
||||||
|
for idx, conf in enumerate(conferences):
|
||||||
|
existing_start = parse_date(str(conf["start"]))
|
||||||
|
if new_start < existing_start:
|
||||||
|
conferences.insert(idx, new_conf)
|
||||||
|
return conferences
|
||||||
|
conferences.append(new_conf)
|
||||||
|
return conferences
|
||||||
|
|
||||||
|
|
||||||
|
def validate_country(conf: dict[str, typing.Any]) -> None:
|
||||||
|
"""Ensure country is a valid ISO 3166-1 alpha-2 code, normalise if possible."""
|
||||||
|
country = conf.get("country")
|
||||||
|
if not country:
|
||||||
|
return
|
||||||
|
|
||||||
|
country = country.strip()
|
||||||
|
if len(country) == 2:
|
||||||
|
if pycountry.countries.get(alpha_2=country.upper()):
|
||||||
|
conf["country"] = country.lower()
|
||||||
|
return
|
||||||
|
raise ValueError(f"❌ Invalid ISO 3166-1 code '{country}'")
|
||||||
|
|
||||||
|
match = pycountry.countries.get(name=country)
|
||||||
|
if not match:
|
||||||
|
try:
|
||||||
|
match = pycountry.countries.search_fuzzy(country)[0]
|
||||||
|
except LookupError as exc:
|
||||||
|
raise ValueError(
|
||||||
|
f"❌ Country '{country}' not recognised as ISO 3166-1"
|
||||||
|
) from exc
|
||||||
|
|
||||||
|
conf["country"] = match.alpha_2.lower()
|
||||||
|
|
||||||
|
|
||||||
|
def parse_yaml_datetime(value: typing.Any) -> datetime | None:
|
||||||
|
"""Convert YAML date/datetime values to a datetime."""
|
||||||
|
if isinstance(value, datetime):
|
||||||
|
return value
|
||||||
|
|
||||||
|
if isinstance(value, date):
|
||||||
|
return datetime.combine(value, time())
|
||||||
|
|
||||||
|
if isinstance(value, str):
|
||||||
|
try:
|
||||||
|
return datetime.fromisoformat(value)
|
||||||
|
except ValueError:
|
||||||
|
return datetime.combine(date.fromisoformat(value.split("T")[0]), time())
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def same_type_as_start(
|
||||||
|
start_value: typing.Any,
|
||||||
|
new_dt: datetime,
|
||||||
|
keep_timezone: bool = True,
|
||||||
|
prefer_datetime: bool = False,
|
||||||
|
) -> typing.Any:
|
||||||
|
"""Return end value shaped like the start value when possible."""
|
||||||
|
if isinstance(start_value, datetime):
|
||||||
|
if keep_timezone:
|
||||||
|
return new_dt
|
||||||
|
return new_dt.replace(tzinfo=None)
|
||||||
|
|
||||||
|
if isinstance(start_value, date):
|
||||||
|
if prefer_datetime:
|
||||||
|
return new_dt
|
||||||
|
return new_dt.date()
|
||||||
|
|
||||||
|
if isinstance(start_value, str):
|
||||||
|
if prefer_datetime or " " in start_value or "T" in start_value:
|
||||||
|
return new_dt.isoformat(sep=" ")
|
||||||
|
return new_dt.date().isoformat()
|
||||||
|
|
||||||
|
return new_dt
|
||||||
|
|
||||||
|
|
||||||
|
def maybe_extract_explicit_end_time(source_text: str) -> int | None:
|
||||||
|
"""Extract an explicit 12-hour clock end time for Geomob-style pages."""
|
||||||
|
lowered = source_text.lower()
|
||||||
|
|
||||||
|
if "10pm" in lowered or "10 pm" in lowered or "22:00" in lowered:
|
||||||
|
return 22
|
||||||
|
|
||||||
|
if "11pm" in lowered or "11 pm" in lowered or "23:00" in lowered:
|
||||||
|
return 23
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def normalise_end_field(new_conf: dict[str, typing.Any], source_text: str) -> None:
|
||||||
|
"""Ensure an end value exists, with a Geomob-specific fallback."""
|
||||||
|
start_value = new_conf.get("start")
|
||||||
|
if start_value is None:
|
||||||
|
return
|
||||||
|
|
||||||
|
start_dt = parse_yaml_datetime(start_value)
|
||||||
|
if start_dt is None:
|
||||||
|
return
|
||||||
|
|
||||||
|
name = str(new_conf.get("name", ""))
|
||||||
|
url = str(new_conf.get("url", ""))
|
||||||
|
is_geomob = "geomob" in name.lower() or "thegeomob.com" in url.lower()
|
||||||
|
|
||||||
|
if is_geomob:
|
||||||
|
end_hour = maybe_extract_explicit_end_time(source_text)
|
||||||
|
if end_hour is None:
|
||||||
|
end_hour = 22
|
||||||
|
|
||||||
|
geomob_end = start_dt.replace(hour=end_hour, minute=0, second=0, microsecond=0)
|
||||||
|
new_conf["end"] = same_type_as_start(
|
||||||
|
start_value, geomob_end, prefer_datetime=True
|
||||||
|
)
|
||||||
|
return
|
||||||
|
|
||||||
|
if "end" not in new_conf:
|
||||||
|
new_conf["end"] = same_type_as_start(start_value, start_dt)
|
||||||
|
|
||||||
|
|
||||||
|
def load_conferences(yaml_path: str) -> list[dict[str, typing.Any]]:
|
||||||
|
"""Load conference YAML."""
|
||||||
|
with open(yaml_path) as file:
|
||||||
|
loaded = yaml.safe_load(file)
|
||||||
|
assert isinstance(loaded, list)
|
||||||
|
return typing.cast(list[dict[str, typing.Any]], loaded)
|
||||||
|
|
||||||
|
|
||||||
|
def dump_conferences(yaml_path: str, conferences: list[dict[str, typing.Any]]) -> None:
|
||||||
|
"""Write conference YAML."""
|
||||||
|
with open(yaml_path, "w") as file:
|
||||||
|
text = yaml.dump(conferences, sort_keys=False, allow_unicode=True)
|
||||||
|
text = text.replace("\n- name:", "\n\n- name:")
|
||||||
|
file.write(text.lstrip())
|
||||||
|
|
||||||
|
|
||||||
|
def add_new_conference(url: str, yaml_path: str) -> bool:
|
||||||
|
"""Fetch, generate and insert a conference into the YAML file."""
|
||||||
|
conferences = load_conferences(yaml_path)
|
||||||
|
|
||||||
|
if url_has_year_component(url):
|
||||||
|
for conf in conferences:
|
||||||
|
if conf.get("url") == url:
|
||||||
|
print(
|
||||||
|
"⚠️ Conference already exists in YAML "
|
||||||
|
+ f"(url={url}), skipping before API call."
|
||||||
|
)
|
||||||
|
return False
|
||||||
|
|
||||||
|
soup = fetch_webpage(url)
|
||||||
|
source_text = webpage_to_text(soup)
|
||||||
|
detected_coordinates = detect_page_coordinates(soup)
|
||||||
|
prompt = build_prompt(url, source_text, detected_coordinates)
|
||||||
|
new_yaml_text = get_from_open_ai(prompt)["yaml"]
|
||||||
|
|
||||||
|
new_conf = yaml.safe_load(new_yaml_text)
|
||||||
|
if isinstance(new_conf, list):
|
||||||
|
new_conf = new_conf[0]
|
||||||
|
assert isinstance(new_conf, dict)
|
||||||
|
|
||||||
|
validate_country(new_conf)
|
||||||
|
normalise_end_field(new_conf, source_text)
|
||||||
|
|
||||||
|
if detected_coordinates is not None:
|
||||||
|
new_conf["latitude"] = detected_coordinates[0]
|
||||||
|
new_conf["longitude"] = detected_coordinates[1]
|
||||||
|
|
||||||
|
updated = insert_sorted(conferences, new_conf)
|
||||||
|
dump_conferences(yaml_path, updated)
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
def main(argv: list[str] | None = None) -> int:
|
||||||
|
"""CLI entrypoint."""
|
||||||
|
args = argv if argv is not None else sys.argv[1:]
|
||||||
|
if not args:
|
||||||
|
raise SystemExit("Usage: add-new-conference URL")
|
||||||
|
|
||||||
|
yaml_path = os.path.expanduser("~/src/personal-data/conferences.yaml")
|
||||||
|
add_new_conference(args[0], yaml_path)
|
||||||
|
return 0
|
||||||
|
|
@ -1,463 +1,15 @@
|
||||||
#!/usr/bin/python3
|
#!/usr/bin/python3
|
||||||
|
|
||||||
import configparser
|
|
||||||
import json
|
|
||||||
import os
|
import os
|
||||||
import re
|
|
||||||
import sys
|
import sys
|
||||||
import typing
|
|
||||||
from datetime import date, datetime, time, timezone
|
|
||||||
from urllib.parse import parse_qs, urlparse
|
|
||||||
|
|
||||||
import html2text
|
SCRIPT_PATH = os.path.realpath(__file__)
|
||||||
import openai
|
SCRIPT_DIR = os.path.dirname(SCRIPT_PATH)
|
||||||
import pycountry
|
REPO_ROOT = os.path.dirname(SCRIPT_DIR)
|
||||||
import requests
|
if REPO_ROOT not in sys.path:
|
||||||
import yaml
|
sys.path.insert(0, REPO_ROOT)
|
||||||
from bs4 import BeautifulSoup
|
|
||||||
|
|
||||||
user_agent = "add-new-conference/0.1"
|
|
||||||
coordinate_patterns = (
|
|
||||||
re.compile(r"@(-?\d+(?:\.\d+)?),(-?\d+(?:\.\d+)?)"),
|
|
||||||
re.compile(r"[?&]q=(-?\d+(?:\.\d+)?),(-?\d+(?:\.\d+)?)"),
|
|
||||||
re.compile(r"[?&](?:ll|center)=(-?\d+(?:\.\d+)?),(-?\d+(?:\.\d+)?)"),
|
|
||||||
re.compile(r"!3d(-?\d+(?:\.\d+)?)!4d(-?\d+(?:\.\d+)?)"),
|
|
||||||
re.compile(r"[?&]destination=(-?\d+(?:\.\d+)?),(-?\d+(?:\.\d+)?)"),
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def read_api_key() -> str:
|
|
||||||
"""Read API key from ~/.config/openai/config."""
|
|
||||||
config_path = os.path.expanduser("~/.config/openai/config")
|
|
||||||
parser = configparser.ConfigParser()
|
|
||||||
parser.read(config_path)
|
|
||||||
return parser["openai"]["api_key"]
|
|
||||||
|
|
||||||
|
|
||||||
def build_prompt(
|
|
||||||
url: str,
|
|
||||||
source_text: str,
|
|
||||||
detected_coordinates: tuple[float, float] | None,
|
|
||||||
) -> str:
|
|
||||||
"""Build prompt with embedded YAML examples."""
|
|
||||||
examples = """
|
|
||||||
- name: Geomob London
|
|
||||||
topic: Maps
|
|
||||||
location: London
|
|
||||||
country: gb
|
|
||||||
start: 2026-01-28 18:00:00+00:00
|
|
||||||
end: 2026-01-28 23:00:00+00:00
|
|
||||||
url: https://thegeomob.com/post/jan-28th-2026-geomoblon-details
|
|
||||||
venue: Geovation Hub
|
|
||||||
address: Sutton Yard, 65 Goswell Rd, London EC1V 7EN
|
|
||||||
latitude: 51.5242464
|
|
||||||
longitude: -0.0997024
|
|
||||||
free: true
|
|
||||||
going: true
|
|
||||||
hashtag: '#geomobLON'
|
|
||||||
|
|
||||||
- name: DebConf 25
|
|
||||||
topic: Debian
|
|
||||||
location: Plouzané (Breast)
|
|
||||||
country: fr
|
|
||||||
start: 2025-07-07
|
|
||||||
end: 2025-07-20
|
|
||||||
url: https://wiki.debian.org/DebConf/25
|
|
||||||
going: true
|
|
||||||
cfp_url: https://debconf25.debconf.org/talks/new/
|
|
||||||
venue: École nationale supérieure Mines-Télécom Atlantique Bretagne Pays de la Loire
|
|
||||||
campus de Brest
|
|
||||||
latitude: 48.35934
|
|
||||||
longitude: -4.569889
|
|
||||||
|
|
||||||
- name: Wikimedia Hackathon
|
|
||||||
topic: Wikimedia
|
|
||||||
location: Istanbul
|
|
||||||
country: tr
|
|
||||||
start: 2025-05-02
|
|
||||||
end: 2025-05-04
|
|
||||||
venue: Renaissance Polat Istanbul Hotel
|
|
||||||
address: Yeşilyurt, Sahil Yolu Cd. No:2, 34149 Bakırköy/İstanbul
|
|
||||||
latitude: 40.959946
|
|
||||||
longitude: 28.838763
|
|
||||||
url: https://www.mediawiki.org/wiki/Wikimedia_Hackathon_2025
|
|
||||||
going: true
|
|
||||||
free: true
|
|
||||||
hackathon: true
|
|
||||||
registered: true
|
|
||||||
"""
|
|
||||||
coordinate_note = ""
|
|
||||||
if detected_coordinates is not None:
|
|
||||||
coordinate_note = (
|
|
||||||
"\nDetected venue coordinates from a map link on the page:\n"
|
|
||||||
f"latitude: {detected_coordinates[0]}\n"
|
|
||||||
f"longitude: {detected_coordinates[1]}\n"
|
|
||||||
)
|
|
||||||
|
|
||||||
prompt = f"""
|
|
||||||
I keep a record of interesting conferences in a YAML file.
|
|
||||||
|
|
||||||
Here are some examples of the format I use:
|
|
||||||
|
|
||||||
{examples}
|
|
||||||
|
|
||||||
Now here is a new conference of interest:
|
|
||||||
|
|
||||||
Conference URL: {url}
|
|
||||||
|
|
||||||
Return the YAML representation for this conference following the
|
|
||||||
same style and keys as the examples. Only include keys if the
|
|
||||||
information is available. Do not invent details.
|
|
||||||
|
|
||||||
Important: the `country` field must always be a valid ISO 3166-1 alpha-2
|
|
||||||
country code (two lowercase letters, e.g. `ca` for Canada, `gb` for United Kingdom).
|
|
||||||
Do not output full country names.
|
|
||||||
|
|
||||||
Important: always include an `end` field. If the event is a single-day event,
|
|
||||||
the `end` can be the same date as `start`, or a same-day datetime if the page
|
|
||||||
provides an end time.
|
|
||||||
|
|
||||||
Important: if this is a Geomob event, use an `end` datetime of 22:00 local time
|
|
||||||
on the event date unless the page explicitly provides a different end time.
|
|
||||||
{coordinate_note}
|
|
||||||
|
|
||||||
Wrap your answer in a JSON object with a single key "yaml".
|
|
||||||
===
|
|
||||||
{source_text}
|
|
||||||
"""
|
|
||||||
return prompt
|
|
||||||
|
|
||||||
|
|
||||||
def get_from_open_ai(prompt: str, model: str = "gpt-5.4") -> dict[str, str]:
|
|
||||||
"""Pass prompt to OpenAI and get reply."""
|
|
||||||
client = openai.OpenAI(api_key=read_api_key())
|
|
||||||
|
|
||||||
response = client.chat.completions.create(
|
|
||||||
messages=[{"role": "user", "content": prompt}],
|
|
||||||
model=model,
|
|
||||||
response_format={"type": "json_object"},
|
|
||||||
)
|
|
||||||
|
|
||||||
reply = response.choices[0].message.content
|
|
||||||
assert isinstance(reply, str)
|
|
||||||
return typing.cast(dict[str, str], json.loads(reply))
|
|
||||||
|
|
||||||
|
|
||||||
def fetch_webpage(url: str) -> BeautifulSoup:
|
|
||||||
"""Fetch webpage HTML and parse it."""
|
|
||||||
response = requests.get(url, headers={"User-Agent": user_agent})
|
|
||||||
response.raise_for_status()
|
|
||||||
return BeautifulSoup(response.content, "lxml")
|
|
||||||
|
|
||||||
|
|
||||||
def webpage_to_text(soup: BeautifulSoup) -> str:
|
|
||||||
"""Convert parsed HTML into readable text content."""
|
|
||||||
soup_copy = BeautifulSoup(str(soup), "lxml")
|
|
||||||
|
|
||||||
for script_or_style in soup_copy(["script", "style"]):
|
|
||||||
script_or_style.decompose()
|
|
||||||
|
|
||||||
text_maker = html2text.HTML2Text()
|
|
||||||
text_maker.ignore_links = True
|
|
||||||
text_maker.ignore_images = True
|
|
||||||
return text_maker.handle(str(soup_copy))
|
|
||||||
|
|
||||||
|
|
||||||
def parse_osm_url(url: str) -> tuple[float, float] | None:
|
|
||||||
"""Extract latitude/longitude from an OpenStreetMap URL."""
|
|
||||||
parsed = urlparse(url)
|
|
||||||
query = parse_qs(parsed.query)
|
|
||||||
|
|
||||||
mlat = query.get("mlat")
|
|
||||||
mlon = query.get("mlon")
|
|
||||||
if mlat and mlon:
|
|
||||||
return float(mlat[0]), float(mlon[0])
|
|
||||||
|
|
||||||
if parsed.fragment.startswith("map="):
|
|
||||||
parts = parsed.fragment.split("/")
|
|
||||||
if len(parts) >= 3:
|
|
||||||
return float(parts[-2]), float(parts[-1])
|
|
||||||
|
|
||||||
return None
|
|
||||||
|
|
||||||
|
|
||||||
def extract_google_maps_latlon(url: str) -> tuple[float, float] | None:
|
|
||||||
"""Extract latitude/longitude from a Google Maps URL."""
|
|
||||||
for pattern in coordinate_patterns:
|
|
||||||
match = pattern.search(url)
|
|
||||||
if match:
|
|
||||||
return float(match.group(1)), float(match.group(2))
|
|
||||||
|
|
||||||
return None
|
|
||||||
|
|
||||||
|
|
||||||
def latlon_from_google_maps_url(
|
|
||||||
url: str, timeout: int = 10
|
|
||||||
) -> tuple[float, float] | None:
|
|
||||||
"""Resolve a Google Maps URL and extract latitude/longitude."""
|
|
||||||
response = requests.get(
|
|
||||||
url,
|
|
||||||
allow_redirects=True,
|
|
||||||
timeout=timeout,
|
|
||||||
headers={"User-Agent": "lookup.py/1.0"},
|
|
||||||
)
|
|
||||||
response.raise_for_status()
|
|
||||||
|
|
||||||
coordinates = extract_google_maps_latlon(response.url)
|
|
||||||
if coordinates is not None:
|
|
||||||
return coordinates
|
|
||||||
|
|
||||||
return extract_google_maps_latlon(response.text)
|
|
||||||
|
|
||||||
|
|
||||||
def parse_coordinates_from_url(url: str) -> tuple[float, float] | None:
|
|
||||||
"""Extract latitude/longitude from a supported map URL."""
|
|
||||||
lower_url = url.lower()
|
|
||||||
|
|
||||||
if "openstreetmap.org" in lower_url:
|
|
||||||
return parse_osm_url(url)
|
|
||||||
|
|
||||||
if "google." in lower_url or "maps.app.goo.gl" in lower_url:
|
|
||||||
coordinates = extract_google_maps_latlon(url)
|
|
||||||
if coordinates is not None:
|
|
||||||
return coordinates
|
|
||||||
|
|
||||||
try:
|
|
||||||
return latlon_from_google_maps_url(url)
|
|
||||||
except requests.RequestException:
|
|
||||||
return None
|
|
||||||
|
|
||||||
return None
|
|
||||||
|
|
||||||
|
|
||||||
def detect_page_coordinates(soup: BeautifulSoup) -> tuple[float, float] | None:
|
|
||||||
"""Detect venue coordinates from Google Maps or OSM links."""
|
|
||||||
for link in soup.find_all("a", href=True):
|
|
||||||
href = str(link["href"]).strip()
|
|
||||||
if not href:
|
|
||||||
continue
|
|
||||||
|
|
||||||
coordinates = parse_coordinates_from_url(href)
|
|
||||||
if coordinates is not None:
|
|
||||||
return coordinates
|
|
||||||
|
|
||||||
return None
|
|
||||||
|
|
||||||
|
|
||||||
def parse_date(date_str: str) -> datetime:
|
|
||||||
"""Parse ISO date or datetime into a naive datetime (UTC if tz-aware)."""
|
|
||||||
try:
|
|
||||||
dt = datetime.fromisoformat(date_str)
|
|
||||||
except ValueError:
|
|
||||||
# fallback: just take the YYYY-MM-DD part
|
|
||||||
dt = datetime.fromisoformat(date_str.split("T")[0])
|
|
||||||
|
|
||||||
if dt.tzinfo is not None:
|
|
||||||
# normalise tz-aware datetimes to UTC, then strip tzinfo
|
|
||||||
dt = dt.astimezone(timezone.utc).replace(tzinfo=None)
|
|
||||||
|
|
||||||
return dt
|
|
||||||
|
|
||||||
|
|
||||||
def url_has_year_component(url: str) -> bool:
|
|
||||||
"""Return True if the URL contains any digit (assume year-specific)."""
|
|
||||||
return any(ch.isdigit() for ch in url)
|
|
||||||
|
|
||||||
|
|
||||||
def insert_sorted(
|
|
||||||
conferences: list[dict[str, typing.Any]], new_conf: dict[str, typing.Any]
|
|
||||||
) -> list[dict[str, typing.Any]]:
|
|
||||||
"""Insert new_conf into conferences sorted by start date, skip if duplicate URL (with year awareness)."""
|
|
||||||
new_url = new_conf.get("url")
|
|
||||||
new_start = parse_date(str(new_conf["start"]))
|
|
||||||
new_year = new_start.year
|
|
||||||
|
|
||||||
if new_url:
|
|
||||||
for conf in conferences:
|
|
||||||
if conf.get("url") == new_url:
|
|
||||||
existing_start = parse_date(str(conf["start"]))
|
|
||||||
existing_year = existing_start.year
|
|
||||||
|
|
||||||
if url_has_year_component(new_url):
|
|
||||||
# If URL has a year in it, treat exact URL as unique
|
|
||||||
print(f"⚠️ Conference with URL {new_url} already exists, skipping.")
|
|
||||||
return conferences
|
|
||||||
elif existing_year == new_year:
|
|
||||||
# Same URL, same year → definitely duplicate
|
|
||||||
print(
|
|
||||||
f"⚠️ Conference already exists in YAML "
|
|
||||||
f"(url={new_url}, year={existing_year}), skipping."
|
|
||||||
)
|
|
||||||
return conferences
|
|
||||||
else:
|
|
||||||
# Same URL reused for different year → allow new entry
|
|
||||||
continue
|
|
||||||
|
|
||||||
# Insert sorted by start date
|
|
||||||
for idx, conf in enumerate(conferences):
|
|
||||||
existing_start = parse_date(str(conf["start"]))
|
|
||||||
if new_start < existing_start:
|
|
||||||
conferences.insert(idx, new_conf)
|
|
||||||
return conferences
|
|
||||||
conferences.append(new_conf)
|
|
||||||
return conferences
|
|
||||||
|
|
||||||
|
|
||||||
def validate_country(conf: dict[str, typing.Any]) -> None:
|
|
||||||
"""Ensure country is a valid ISO 3166-1 alpha-2 code, normalise if possible."""
|
|
||||||
country = conf.get("country")
|
|
||||||
if not country:
|
|
||||||
return
|
|
||||||
|
|
||||||
country = country.strip()
|
|
||||||
# Already a 2-letter code
|
|
||||||
if len(country) == 2:
|
|
||||||
if pycountry.countries.get(alpha_2=country.upper()):
|
|
||||||
conf["country"] = country.lower()
|
|
||||||
return
|
|
||||||
else:
|
|
||||||
raise ValueError(f"❌ Invalid ISO 3166-1 code '{country}'")
|
|
||||||
|
|
||||||
# Try lookup by name
|
|
||||||
match = pycountry.countries.get(name=country)
|
|
||||||
if not match:
|
|
||||||
# fuzzy lookup (handles “United States” vs “United States of America”)
|
|
||||||
try:
|
|
||||||
match = pycountry.countries.search_fuzzy(country)[0]
|
|
||||||
except LookupError:
|
|
||||||
raise ValueError(f"❌ Country '{country}' not recognised as ISO 3166-1")
|
|
||||||
|
|
||||||
conf["country"] = match.alpha_2.lower()
|
|
||||||
|
|
||||||
|
|
||||||
def parse_yaml_datetime(value: typing.Any) -> datetime | None:
|
|
||||||
"""Convert YAML date/datetime values to a datetime."""
|
|
||||||
if isinstance(value, datetime):
|
|
||||||
return value
|
|
||||||
|
|
||||||
if isinstance(value, date):
|
|
||||||
return datetime.combine(value, time())
|
|
||||||
|
|
||||||
if isinstance(value, str):
|
|
||||||
try:
|
|
||||||
return datetime.fromisoformat(value)
|
|
||||||
except ValueError:
|
|
||||||
return datetime.combine(date.fromisoformat(value.split("T")[0]), time())
|
|
||||||
|
|
||||||
return None
|
|
||||||
|
|
||||||
|
|
||||||
def same_type_as_start(
|
|
||||||
start_value: typing.Any,
|
|
||||||
new_dt: datetime,
|
|
||||||
keep_timezone: bool = True,
|
|
||||||
prefer_datetime: bool = False,
|
|
||||||
) -> typing.Any:
|
|
||||||
"""Return end value shaped like the start value when possible."""
|
|
||||||
if isinstance(start_value, datetime):
|
|
||||||
if keep_timezone:
|
|
||||||
return new_dt
|
|
||||||
return new_dt.replace(tzinfo=None)
|
|
||||||
|
|
||||||
if isinstance(start_value, date):
|
|
||||||
if prefer_datetime:
|
|
||||||
return new_dt
|
|
||||||
return new_dt.date()
|
|
||||||
|
|
||||||
if isinstance(start_value, str):
|
|
||||||
if prefer_datetime or " " in start_value or "T" in start_value:
|
|
||||||
return new_dt.isoformat(sep=" ")
|
|
||||||
return new_dt.date().isoformat()
|
|
||||||
|
|
||||||
return new_dt
|
|
||||||
|
|
||||||
|
|
||||||
def maybe_extract_explicit_end_time(source_text: str) -> int | None:
|
|
||||||
"""Extract an explicit 12-hour clock end time for Geomob-style pages."""
|
|
||||||
lowered = source_text.lower()
|
|
||||||
|
|
||||||
if "10pm" in lowered or "10 pm" in lowered or "22:00" in lowered:
|
|
||||||
return 22
|
|
||||||
|
|
||||||
if "11pm" in lowered or "11 pm" in lowered or "23:00" in lowered:
|
|
||||||
return 23
|
|
||||||
|
|
||||||
return None
|
|
||||||
|
|
||||||
|
|
||||||
def normalise_end_field(new_conf: dict[str, typing.Any], source_text: str) -> None:
|
|
||||||
"""Ensure an end value exists, with a Geomob-specific fallback."""
|
|
||||||
start_value = new_conf.get("start")
|
|
||||||
if start_value is None:
|
|
||||||
return
|
|
||||||
|
|
||||||
start_dt = parse_yaml_datetime(start_value)
|
|
||||||
if start_dt is None:
|
|
||||||
return
|
|
||||||
|
|
||||||
name = str(new_conf.get("name", ""))
|
|
||||||
url = str(new_conf.get("url", ""))
|
|
||||||
is_geomob = "geomob" in name.lower() or "thegeomob.com" in url.lower()
|
|
||||||
|
|
||||||
if is_geomob:
|
|
||||||
end_hour = maybe_extract_explicit_end_time(source_text)
|
|
||||||
if end_hour is None:
|
|
||||||
end_hour = 22
|
|
||||||
|
|
||||||
geomob_end = start_dt.replace(hour=end_hour, minute=0, second=0, microsecond=0)
|
|
||||||
new_conf["end"] = same_type_as_start(
|
|
||||||
start_value, geomob_end, prefer_datetime=True
|
|
||||||
)
|
|
||||||
return
|
|
||||||
|
|
||||||
if "end" not in new_conf:
|
|
||||||
new_conf["end"] = same_type_as_start(start_value, start_dt)
|
|
||||||
|
|
||||||
|
|
||||||
def main() -> None:
|
|
||||||
"""Fetch page, generate YAML via LLM, update conferences.yaml."""
|
|
||||||
url = sys.argv[1]
|
|
||||||
yaml_path = os.path.expanduser("~/src/personal-data/conferences.yaml")
|
|
||||||
|
|
||||||
# Load conferences first
|
|
||||||
with open(yaml_path) as f:
|
|
||||||
conferences = yaml.safe_load(f)
|
|
||||||
|
|
||||||
# Early exit: if URL contains a year and already exists, skip
|
|
||||||
if url_has_year_component(url):
|
|
||||||
for conf in conferences:
|
|
||||||
if conf.get("url") == url:
|
|
||||||
print(
|
|
||||||
"⚠️ Conference already exists in YAML "
|
|
||||||
+ f"(url={url}), skipping before API call."
|
|
||||||
)
|
|
||||||
return
|
|
||||||
|
|
||||||
# Otherwise proceed with full workflow
|
|
||||||
soup = fetch_webpage(url)
|
|
||||||
source_text = webpage_to_text(soup)
|
|
||||||
detected_coordinates = detect_page_coordinates(soup)
|
|
||||||
prompt = build_prompt(url, source_text, detected_coordinates)
|
|
||||||
new_yaml_text = get_from_open_ai(prompt)["yaml"]
|
|
||||||
|
|
||||||
new_conf = yaml.safe_load(new_yaml_text)
|
|
||||||
if isinstance(new_conf, list):
|
|
||||||
new_conf = new_conf[0]
|
|
||||||
|
|
||||||
validate_country(new_conf)
|
|
||||||
normalise_end_field(new_conf, source_text)
|
|
||||||
|
|
||||||
if detected_coordinates is not None:
|
|
||||||
new_conf["latitude"] = detected_coordinates[0]
|
|
||||||
new_conf["longitude"] = detected_coordinates[1]
|
|
||||||
|
|
||||||
updated = insert_sorted(conferences, new_conf)
|
|
||||||
|
|
||||||
with open(yaml_path, "w") as f:
|
|
||||||
text = yaml.dump(updated, sort_keys=False, allow_unicode=True)
|
|
||||||
text = text.replace("\n- name:", "\n\n- name:") # keep blank lines
|
|
||||||
f.write(text.lstrip())
|
|
||||||
|
|
||||||
|
from agenda.add_new_conference import main
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
main()
|
raise SystemExit(main())
|
||||||
|
|
|
||||||
160
tests/test_add_new_conference.py
Normal file
160
tests/test_add_new_conference.py
Normal file
|
|
@ -0,0 +1,160 @@
|
||||||
|
"""Tests for agenda.add_new_conference."""
|
||||||
|
|
||||||
|
from datetime import date, datetime
|
||||||
|
import typing
|
||||||
|
|
||||||
|
import lxml.html # type: ignore[import-untyped]
|
||||||
|
import pytest
|
||||||
|
import yaml
|
||||||
|
|
||||||
|
from agenda import add_new_conference
|
||||||
|
|
||||||
|
|
||||||
|
def test_parse_osm_url_mlat_mlon() -> None:
|
||||||
|
"""OpenStreetMap URLs with mlat/mlon should parse."""
|
||||||
|
result = add_new_conference.parse_osm_url(
|
||||||
|
"https://www.openstreetmap.org/?mlat=51.5&mlon=-0.12"
|
||||||
|
)
|
||||||
|
assert result == (51.5, -0.12)
|
||||||
|
|
||||||
|
|
||||||
|
def test_extract_google_maps_latlon_at_pattern() -> None:
|
||||||
|
"""Google Maps @lat,lon URLs should parse."""
|
||||||
|
result = add_new_conference.extract_google_maps_latlon(
|
||||||
|
"https://www.google.com/maps/place/Venue/@51.5242464,-0.0997024,17z/"
|
||||||
|
)
|
||||||
|
assert result == (51.5242464, -0.0997024)
|
||||||
|
|
||||||
|
|
||||||
|
def test_insert_sorted_allows_same_url_different_year_without_year_component() -> None:
|
||||||
|
"""The same non-year-specific URL can be reused for a different year."""
|
||||||
|
conferences: list[dict[str, typing.Any]] = [
|
||||||
|
{
|
||||||
|
"name": "OldConf",
|
||||||
|
"start": date(2025, 6, 1),
|
||||||
|
"url": "https://example.com/conf",
|
||||||
|
}
|
||||||
|
]
|
||||||
|
new_conf: dict[str, typing.Any] = {
|
||||||
|
"name": "NewConf",
|
||||||
|
"start": date(2026, 6, 1),
|
||||||
|
"url": "https://example.com/conf",
|
||||||
|
}
|
||||||
|
|
||||||
|
updated = add_new_conference.insert_sorted(conferences, new_conf)
|
||||||
|
|
||||||
|
assert len(updated) == 2
|
||||||
|
assert updated[1]["name"] == "NewConf"
|
||||||
|
|
||||||
|
|
||||||
|
def test_validate_country_normalises_name() -> None:
|
||||||
|
"""Country names should be normalised to alpha-2 codes."""
|
||||||
|
conf: dict[str, typing.Any] = {"country": "United Kingdom"}
|
||||||
|
|
||||||
|
add_new_conference.validate_country(conf)
|
||||||
|
|
||||||
|
assert conf["country"] == "gb"
|
||||||
|
|
||||||
|
|
||||||
|
def test_normalise_end_field_defaults_single_day_date() -> None:
|
||||||
|
"""Non-Geomob conferences should default end to the start date."""
|
||||||
|
conf: dict[str, typing.Any] = {
|
||||||
|
"name": "PyCon",
|
||||||
|
"start": date(2026, 4, 10),
|
||||||
|
}
|
||||||
|
|
||||||
|
add_new_conference.normalise_end_field(conf, "plain text")
|
||||||
|
|
||||||
|
assert conf["end"] == date(2026, 4, 10)
|
||||||
|
|
||||||
|
|
||||||
|
def test_normalise_end_field_sets_geomob_end_time() -> None:
|
||||||
|
"""Geomob conferences should default to a 22:00 end time."""
|
||||||
|
conf: dict[str, typing.Any] = {
|
||||||
|
"name": "Geomob London",
|
||||||
|
"start": date(2026, 1, 28),
|
||||||
|
"url": "https://thegeomob.com/post/jan-28th-2026-geomoblon-details",
|
||||||
|
}
|
||||||
|
|
||||||
|
add_new_conference.normalise_end_field(conf, "see you there")
|
||||||
|
|
||||||
|
assert conf["end"] == datetime(2026, 1, 28, 22, 0)
|
||||||
|
|
||||||
|
|
||||||
|
def test_detect_page_coordinates_uses_first_supported_link() -> None:
|
||||||
|
"""Page coordinate detection should inspect anchor hrefs."""
|
||||||
|
root = lxml.html.fromstring(
|
||||||
|
(
|
||||||
|
"<html><body>"
|
||||||
|
'<a href="https://example.com">Example</a>'
|
||||||
|
'<a href="https://www.openstreetmap.org/?mlat=51.5&mlon=-0.12">Map</a>'
|
||||||
|
"</body></html>"
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
assert add_new_conference.detect_page_coordinates(root) == (51.5, -0.12)
|
||||||
|
|
||||||
|
|
||||||
|
def test_add_new_conference_updates_yaml(
|
||||||
|
tmp_path: typing.Any, monkeypatch: pytest.MonkeyPatch
|
||||||
|
) -> None:
|
||||||
|
"""The end-to-end import flow should append a generated conference."""
|
||||||
|
yaml_path = tmp_path / "conferences.yaml"
|
||||||
|
yaml_path.write_text(
|
||||||
|
yaml.dump(
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"name": "ExistingConf",
|
||||||
|
"start": date(2026, 4, 1),
|
||||||
|
"end": date(2026, 4, 2),
|
||||||
|
"url": "https://example.com/existing",
|
||||||
|
}
|
||||||
|
],
|
||||||
|
sort_keys=False,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
root = lxml.html.fromstring(
|
||||||
|
(
|
||||||
|
"<html><body>"
|
||||||
|
'<a href="https://www.openstreetmap.org/?mlat=40.0&mlon=-74.0">Map</a>'
|
||||||
|
"</body></html>"
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
monkeypatch.setattr(add_new_conference, "fetch_webpage", lambda url: root)
|
||||||
|
monkeypatch.setattr(
|
||||||
|
add_new_conference,
|
||||||
|
"webpage_to_text",
|
||||||
|
lambda parsed: "Conference details",
|
||||||
|
)
|
||||||
|
monkeypatch.setattr(
|
||||||
|
add_new_conference,
|
||||||
|
"get_from_open_ai",
|
||||||
|
lambda prompt: {
|
||||||
|
"yaml": yaml.dump(
|
||||||
|
{
|
||||||
|
"name": "NewConf",
|
||||||
|
"topic": "Tech",
|
||||||
|
"location": "New York",
|
||||||
|
"country": "United States",
|
||||||
|
"start": date(2026, 5, 3),
|
||||||
|
"url": "https://example.com/newconf",
|
||||||
|
},
|
||||||
|
sort_keys=False,
|
||||||
|
)
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
added = add_new_conference.add_new_conference(
|
||||||
|
"https://example.com/newconf", str(yaml_path)
|
||||||
|
)
|
||||||
|
|
||||||
|
assert added is True
|
||||||
|
written = yaml.safe_load(yaml_path.read_text())
|
||||||
|
assert len(written) == 2
|
||||||
|
assert written[1]["name"] == "NewConf"
|
||||||
|
assert written[1]["country"] == "us"
|
||||||
|
assert written[1]["end"] == date(2026, 5, 3)
|
||||||
|
assert written[1]["latitude"] == 40.0
|
||||||
|
assert written[1]["longitude"] == -74.0
|
||||||
Loading…
Add table
Add a link
Reference in a new issue