Improve conference importer date handling
This commit is contained in:
parent
56eea3f7a3
commit
dbce9e5358
2 changed files with 401 additions and 46 deletions
|
|
@ -16,7 +16,7 @@ import pycountry
|
||||||
import requests
|
import requests
|
||||||
import yaml
|
import yaml
|
||||||
|
|
||||||
from agenda.conference import conference_date_fields
|
from agenda.conference import ConferenceSeries, conference_date_fields, load_series
|
||||||
|
|
||||||
USER_AGENT = "add-new-conference/0.1"
|
USER_AGENT = "add-new-conference/0.1"
|
||||||
COORDINATE_PATTERNS = (
|
COORDINATE_PATTERNS = (
|
||||||
|
|
@ -36,58 +36,139 @@ def read_api_key() -> str:
|
||||||
return parser["openai"]["api_key"]
|
return parser["openai"]["api_key"]
|
||||||
|
|
||||||
|
|
||||||
def build_prompt(
|
def conference_yaml_format_description() -> str:
|
||||||
url: str,
|
"""Return the conference YAML format description for LLM prompts."""
|
||||||
source_text: str,
|
return """
|
||||||
detected_coordinates: tuple[float, float] | None,
|
Use this YAML format for one conference entry.
|
||||||
) -> str:
|
|
||||||
"""Build prompt with embedded YAML examples."""
|
Required fields:
|
||||||
examples = """
|
- `name`: event name.
|
||||||
|
- `topic`: topic/category.
|
||||||
|
- `location`: city or location label. Use `TBC` if the page confirms a future
|
||||||
|
event but not a city.
|
||||||
|
- Date information in nested `dates`.
|
||||||
|
|
||||||
|
Preferred date shape:
|
||||||
|
- `dates.status`: one of `exact`, `tentative`, or `approximate`.
|
||||||
|
- For `exact`: use when the page confirms specific dates/times. Include
|
||||||
|
`dates.start` and `dates.end` as YAML dates or timezone-aware datetimes.
|
||||||
|
- For `tentative`: use when specific dates are guessed or explicitly
|
||||||
|
unconfirmed. Include `dates.start`, `dates.end`, and preferably `dates.label`
|
||||||
|
and `dates.basis`.
|
||||||
|
- For `approximate`: use when only a broad date phrase is known. Include
|
||||||
|
`dates.label`, `dates.earliest`, and `dates.latest`. Examples: `March 2027`
|
||||||
|
should become earliest `2027-03-01`, latest `2027-03-31`; `mid-April 2027`
|
||||||
|
should become a sensible bounded range such as `2027-04-11` to `2027-04-20`.
|
||||||
|
|
||||||
|
Important date rule:
|
||||||
|
- If the source page contains exact dates, output `dates.status: exact` even if
|
||||||
|
the existing agenda entry or conference announcement previously had only
|
||||||
|
approximate dates.
|
||||||
|
- Always include an end date for `exact` and `tentative`. For a single-day
|
||||||
|
event, `dates.end` can be the same as `dates.start`.
|
||||||
|
- Do not output legacy top-level `start`, `end`, or `date_status`.
|
||||||
|
|
||||||
|
Common optional fields:
|
||||||
|
- `series`: a key from the known conference series list, when this event belongs
|
||||||
|
to a listed series.
|
||||||
|
- `country`: valid ISO 3166-1 alpha-2 country code in lowercase, for example
|
||||||
|
`ca`, `gb`, `us`. Do not output country names.
|
||||||
|
- `venue`, `address`, `latitude`, `longitude`, `url`, `cfp_url`, `cfp_end`,
|
||||||
|
`hashtag`, `description`.
|
||||||
|
- `free`, `price`, `currency`, `hackathon`, `online`, `attendees`.
|
||||||
|
- Do not include `going`, `registered`, `accommodation_booked`,
|
||||||
|
`transport_booked`, or `trip` unless the source explicitly says they apply to
|
||||||
|
my attendance.
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
def yaml_example_text() -> str:
|
||||||
|
"""Return examples of the conference YAML format."""
|
||||||
|
return """
|
||||||
- name: Geomob London
|
- name: Geomob London
|
||||||
|
series: geomob-london
|
||||||
topic: Maps
|
topic: Maps
|
||||||
location: London
|
location: London
|
||||||
country: gb
|
country: gb
|
||||||
start: 2026-01-28 18:00:00+00:00
|
dates:
|
||||||
end: 2026-01-28 23:00:00+00:00
|
status: exact
|
||||||
|
start: 2026-01-28 18:00:00+00:00
|
||||||
|
end: 2026-01-28 22:00:00+00:00
|
||||||
url: https://thegeomob.com/post/jan-28th-2026-geomoblon-details
|
url: https://thegeomob.com/post/jan-28th-2026-geomoblon-details
|
||||||
venue: Geovation Hub
|
venue: Geovation Hub
|
||||||
address: Sutton Yard, 65 Goswell Rd, London EC1V 7EN
|
address: Sutton Yard, 65 Goswell Rd, London EC1V 7EN
|
||||||
latitude: 51.5242464
|
latitude: 51.5242464
|
||||||
longitude: -0.0997024
|
longitude: -0.0997024
|
||||||
free: true
|
free: true
|
||||||
going: true
|
|
||||||
hashtag: '#geomobLON'
|
hashtag: '#geomobLON'
|
||||||
|
|
||||||
- name: DebConf 25
|
- name: DebConf 25
|
||||||
|
series: debconf
|
||||||
topic: Debian
|
topic: Debian
|
||||||
location: Plouzané (Breast)
|
location: Plouzane
|
||||||
country: fr
|
country: fr
|
||||||
start: 2025-07-07
|
dates:
|
||||||
end: 2025-07-20
|
status: exact
|
||||||
|
start: 2025-07-07
|
||||||
|
end: 2025-07-20
|
||||||
url: https://wiki.debian.org/DebConf/25
|
url: https://wiki.debian.org/DebConf/25
|
||||||
going: true
|
|
||||||
cfp_url: https://debconf25.debconf.org/talks/new/
|
cfp_url: https://debconf25.debconf.org/talks/new/
|
||||||
venue: École nationale supérieure Mines-Télécom Atlantique Bretagne Pays de la Loire
|
venue: Ecole nationale superieure Mines-Telecom Atlantique Bretagne Pays de la Loire
|
||||||
campus de Brest
|
campus de Brest
|
||||||
latitude: 48.35934
|
latitude: 48.35934
|
||||||
longitude: -4.569889
|
longitude: -4.569889
|
||||||
|
|
||||||
- name: Wikimedia Hackathon
|
- name: Wikimedia Hackathon
|
||||||
|
series: wikimedia-hackathon
|
||||||
topic: Wikimedia
|
topic: Wikimedia
|
||||||
location: Istanbul
|
location: Albania
|
||||||
country: tr
|
country: al
|
||||||
start: 2025-05-02
|
dates:
|
||||||
end: 2025-05-04
|
status: approximate
|
||||||
venue: Renaissance Polat Istanbul Hotel
|
label: mid-April 2027
|
||||||
address: Yeşilyurt, Sahil Yolu Cd. No:2, 34149 Bakırköy/İstanbul
|
earliest: 2027-04-11
|
||||||
latitude: 40.959946
|
latest: 2027-04-20
|
||||||
longitude: 28.838763
|
url: https://www.mediawiki.org/wiki/Wikimedia_Hackathon_2027
|
||||||
url: https://www.mediawiki.org/wiki/Wikimedia_Hackathon_2025
|
|
||||||
going: true
|
|
||||||
free: true
|
|
||||||
hackathon: true
|
hackathon: true
|
||||||
registered: true
|
|
||||||
|
- name: PyCascades
|
||||||
|
series: pycascades
|
||||||
|
topic: Python
|
||||||
|
location: Seattle, Washington
|
||||||
|
country: us
|
||||||
|
dates:
|
||||||
|
status: approximate
|
||||||
|
label: March 2027
|
||||||
|
earliest: 2027-03-01
|
||||||
|
latest: 2027-03-31
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
def series_prompt_text(series: dict[str, ConferenceSeries]) -> str:
|
||||||
|
"""Return compact known series text for the LLM prompt."""
|
||||||
|
if not series:
|
||||||
|
return "No known conference series loaded."
|
||||||
|
|
||||||
|
lines = ["Known conference series IDs:"]
|
||||||
|
for series_id, item in sorted(series.items()):
|
||||||
|
details = [item["name"]]
|
||||||
|
if topic := item.get("topic"):
|
||||||
|
details.append(f"topic: {topic}")
|
||||||
|
if location := item.get("usual_location"):
|
||||||
|
details.append(f"usual location: {location}")
|
||||||
|
if country := item.get("country"):
|
||||||
|
details.append(f"country: {country}")
|
||||||
|
lines.append(f"- {series_id}: " + "; ".join(details))
|
||||||
|
return "\n".join(lines)
|
||||||
|
|
||||||
|
|
||||||
|
def build_prompt(
|
||||||
|
url: str,
|
||||||
|
source_text: str,
|
||||||
|
detected_coordinates: tuple[float, float] | None,
|
||||||
|
series: dict[str, ConferenceSeries] | None = None,
|
||||||
|
) -> str:
|
||||||
|
"""Build prompt with embedded YAML format details and examples."""
|
||||||
coordinate_note = ""
|
coordinate_note = ""
|
||||||
if detected_coordinates is not None:
|
if detected_coordinates is not None:
|
||||||
coordinate_note = (
|
coordinate_note = (
|
||||||
|
|
@ -99,28 +180,26 @@ def build_prompt(
|
||||||
prompt = f"""
|
prompt = f"""
|
||||||
I keep a record of interesting conferences in a YAML file.
|
I keep a record of interesting conferences in a YAML file.
|
||||||
|
|
||||||
|
Format rules:
|
||||||
|
|
||||||
|
{conference_yaml_format_description()}
|
||||||
|
|
||||||
|
{series_prompt_text(series or {})}
|
||||||
|
|
||||||
Here are some examples of the format I use:
|
Here are some examples of the format I use:
|
||||||
|
|
||||||
{examples}
|
{yaml_example_text()}
|
||||||
|
|
||||||
Now here is a new conference of interest:
|
Now here is a new conference of interest:
|
||||||
|
|
||||||
Conference URL: {url}
|
Conference URL: {url}
|
||||||
|
|
||||||
Return the YAML representation for this conference following the
|
Return the YAML representation for this conference following the same style and
|
||||||
same style and keys as the examples. Only include keys if the
|
keys as the examples. Only include keys if the information is available. Do not
|
||||||
information is available. Do not invent details.
|
invent details.
|
||||||
|
|
||||||
Important: the `country` field must always be a valid ISO 3166-1 alpha-2
|
Important: if this is a Geomob event, use a `dates.end` datetime of 22:00 local
|
||||||
country code (two lowercase letters, e.g. `ca` for Canada, `gb` for United Kingdom).
|
time on the event date unless the page explicitly provides a different end time.
|
||||||
Do not output full country names.
|
|
||||||
|
|
||||||
Important: always include an `end` field. If the event is a single-day event,
|
|
||||||
the `end` can be the same date as `start`, or a same-day datetime if the page
|
|
||||||
provides an end time.
|
|
||||||
|
|
||||||
Important: if this is a Geomob event, use an `end` datetime of 22:00 local time
|
|
||||||
on the event date unless the page explicitly provides a different end time.
|
|
||||||
{coordinate_note}
|
{coordinate_note}
|
||||||
|
|
||||||
Wrap your answer in a JSON object with a single key "yaml".
|
Wrap your answer in a JSON object with a single key "yaml".
|
||||||
|
|
@ -261,9 +340,26 @@ def parse_date(date_str: str) -> datetime:
|
||||||
return dt
|
return dt
|
||||||
|
|
||||||
|
|
||||||
|
def data_dir_from_conferences_path(yaml_path: str) -> str:
|
||||||
|
"""Return personal-data directory from a conferences.yaml path."""
|
||||||
|
return os.path.dirname(os.path.abspath(yaml_path))
|
||||||
|
|
||||||
|
|
||||||
def url_has_year_component(url: str) -> bool:
|
def url_has_year_component(url: str) -> bool:
|
||||||
"""Return True if the URL contains any digit."""
|
"""Return True if the URL contains a year or edition path component."""
|
||||||
return any(ch.isdigit() for ch in url)
|
parsed = urlparse(url)
|
||||||
|
components = [part for part in parsed.path.split("/") if part]
|
||||||
|
if parsed.netloc:
|
||||||
|
components.extend(part for part in parsed.netloc.split(".") if part)
|
||||||
|
|
||||||
|
for component in components:
|
||||||
|
if re.fullmatch(r"20\d{2}", component):
|
||||||
|
return True
|
||||||
|
if re.search(r"(?:^|[-_/])20\d{2}(?:$|[-_/])", component):
|
||||||
|
return True
|
||||||
|
if re.fullmatch(r"\d{1,2}x", component, flags=re.IGNORECASE):
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
def insert_sorted(
|
def insert_sorted(
|
||||||
|
|
@ -273,6 +369,13 @@ def insert_sorted(
|
||||||
new_url = new_conf.get("url")
|
new_url = new_conf.get("url")
|
||||||
new_start = conference_sort_datetime(new_conf)
|
new_start = conference_sort_datetime(new_conf)
|
||||||
new_year = new_start.year
|
new_year = new_start.year
|
||||||
|
update_idx = find_inexact_existing_conference(conferences, new_conf)
|
||||||
|
if update_idx is not None:
|
||||||
|
existing = conferences.pop(update_idx)
|
||||||
|
merged = dict(existing)
|
||||||
|
merged.update(new_conf)
|
||||||
|
print(f"Updating inexact conference entry: {existing.get('name')}")
|
||||||
|
return insert_sorted(conferences, merged)
|
||||||
|
|
||||||
if new_url:
|
if new_url:
|
||||||
for conf in conferences:
|
for conf in conferences:
|
||||||
|
|
@ -299,6 +402,56 @@ def insert_sorted(
|
||||||
return conferences
|
return conferences
|
||||||
|
|
||||||
|
|
||||||
|
def date_ranges_overlap(
|
||||||
|
first: dict[str, typing.Any], second: dict[str, typing.Any]
|
||||||
|
) -> bool:
|
||||||
|
"""Return True if two conference date ranges overlap."""
|
||||||
|
first_fields = conference_date_fields(first)
|
||||||
|
second_fields = conference_date_fields(second)
|
||||||
|
return typing.cast(date, first_fields["start_date"]) <= typing.cast(
|
||||||
|
date, second_fields["end_date"]
|
||||||
|
) and typing.cast(date, second_fields["start_date"]) <= typing.cast(
|
||||||
|
date, first_fields["end_date"]
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def same_conference_identity(
|
||||||
|
existing: dict[str, typing.Any], new_conf: dict[str, typing.Any]
|
||||||
|
) -> bool:
|
||||||
|
"""Return True if two entries appear to represent the same conference."""
|
||||||
|
existing_url = existing.get("url")
|
||||||
|
new_url = new_conf.get("url")
|
||||||
|
if existing_url and new_url and existing_url == new_url:
|
||||||
|
return True
|
||||||
|
|
||||||
|
existing_series = existing.get("series")
|
||||||
|
new_series = new_conf.get("series")
|
||||||
|
if existing_series and new_series and existing_series == new_series:
|
||||||
|
return date_ranges_overlap(existing, new_conf)
|
||||||
|
|
||||||
|
return str(existing.get("name", "")).casefold() == str(
|
||||||
|
new_conf.get("name", "")
|
||||||
|
).casefold() and date_ranges_overlap(existing, new_conf)
|
||||||
|
|
||||||
|
|
||||||
|
def find_inexact_existing_conference(
|
||||||
|
conferences: list[dict[str, typing.Any]], new_conf: dict[str, typing.Any]
|
||||||
|
) -> int | None:
|
||||||
|
"""Return index of an inexact existing entry that exact new data can update."""
|
||||||
|
new_fields = conference_date_fields(new_conf)
|
||||||
|
if new_fields["date_status"] != "exact":
|
||||||
|
return None
|
||||||
|
|
||||||
|
for idx, existing in enumerate(conferences):
|
||||||
|
existing_fields = conference_date_fields(existing)
|
||||||
|
if existing_fields["date_status"] == "exact":
|
||||||
|
continue
|
||||||
|
if same_conference_identity(existing, new_conf):
|
||||||
|
return idx
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
def conference_sort_datetime(conf: dict[str, typing.Any]) -> datetime:
|
def conference_sort_datetime(conf: dict[str, typing.Any]) -> datetime:
|
||||||
"""Return conference sort date as a datetime."""
|
"""Return conference sort date as a datetime."""
|
||||||
sort_date = conference_date_fields(conf)["sort_date"]
|
sort_date = conference_date_fields(conf)["sort_date"]
|
||||||
|
|
@ -374,6 +527,31 @@ def same_type_as_start(
|
||||||
return new_dt
|
return new_dt
|
||||||
|
|
||||||
|
|
||||||
|
def normalize_dates_field(conf: dict[str, typing.Any]) -> None:
|
||||||
|
"""Move legacy top-level date fields into the nested dates mapping."""
|
||||||
|
raw_dates = conf.get("dates")
|
||||||
|
dates = raw_dates if isinstance(raw_dates, dict) else None
|
||||||
|
|
||||||
|
if dates is None and ("start" in conf or "end" in conf):
|
||||||
|
start = conf.pop("start", None)
|
||||||
|
end = conf.pop("end", start)
|
||||||
|
status = str(conf.pop("date_status", "exact"))
|
||||||
|
conf["dates"] = {"status": status, "start": start, "end": end}
|
||||||
|
return
|
||||||
|
|
||||||
|
if dates is not None:
|
||||||
|
if "start" in conf and "start" not in dates:
|
||||||
|
dates["start"] = conf["start"]
|
||||||
|
if "end" in conf and "end" not in dates:
|
||||||
|
dates["end"] = conf["end"]
|
||||||
|
if "date_status" in conf and "status" not in dates:
|
||||||
|
dates["status"] = conf["date_status"]
|
||||||
|
|
||||||
|
conf.pop("start", None)
|
||||||
|
conf.pop("end", None)
|
||||||
|
conf.pop("date_status", None)
|
||||||
|
|
||||||
|
|
||||||
def maybe_extract_explicit_end_time(source_text: str) -> int | None:
|
def maybe_extract_explicit_end_time(source_text: str) -> int | None:
|
||||||
"""Extract an explicit 12-hour clock end time for Geomob-style pages."""
|
"""Extract an explicit 12-hour clock end time for Geomob-style pages."""
|
||||||
lowered = source_text.lower()
|
lowered = source_text.lower()
|
||||||
|
|
@ -435,6 +613,14 @@ def load_conferences(yaml_path: str) -> list[dict[str, typing.Any]]:
|
||||||
return typing.cast(list[dict[str, typing.Any]], loaded)
|
return typing.cast(list[dict[str, typing.Any]], loaded)
|
||||||
|
|
||||||
|
|
||||||
|
def load_conference_series_for_path(yaml_path: str) -> dict[str, ConferenceSeries]:
|
||||||
|
"""Load conference series next to the target conferences YAML file."""
|
||||||
|
return typing.cast(
|
||||||
|
dict[str, ConferenceSeries],
|
||||||
|
load_series(data_dir_from_conferences_path(yaml_path)),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def dump_conferences(yaml_path: str, conferences: list[dict[str, typing.Any]]) -> None:
|
def dump_conferences(yaml_path: str, conferences: list[dict[str, typing.Any]]) -> None:
|
||||||
"""Write conference YAML."""
|
"""Write conference YAML."""
|
||||||
with open(yaml_path, "w") as file:
|
with open(yaml_path, "w") as file:
|
||||||
|
|
@ -450,6 +636,9 @@ def add_new_conference(url: str, yaml_path: str) -> bool:
|
||||||
if url_has_year_component(url):
|
if url_has_year_component(url):
|
||||||
for conf in conferences:
|
for conf in conferences:
|
||||||
if conf.get("url") == url:
|
if conf.get("url") == url:
|
||||||
|
fields = conference_date_fields(conf)
|
||||||
|
if fields["date_status"] != "exact":
|
||||||
|
continue
|
||||||
print(
|
print(
|
||||||
"⚠️ Conference already exists in YAML "
|
"⚠️ Conference already exists in YAML "
|
||||||
+ f"(url={url}), skipping before API call."
|
+ f"(url={url}), skipping before API call."
|
||||||
|
|
@ -459,7 +648,8 @@ def add_new_conference(url: str, yaml_path: str) -> bool:
|
||||||
soup = fetch_webpage(url)
|
soup = fetch_webpage(url)
|
||||||
source_text = webpage_to_text(soup)
|
source_text = webpage_to_text(soup)
|
||||||
detected_coordinates = detect_page_coordinates(soup)
|
detected_coordinates = detect_page_coordinates(soup)
|
||||||
prompt = build_prompt(url, source_text, detected_coordinates)
|
series = load_conference_series_for_path(yaml_path)
|
||||||
|
prompt = build_prompt(url, source_text, detected_coordinates, series)
|
||||||
new_yaml_text = get_from_open_ai(prompt)["yaml"]
|
new_yaml_text = get_from_open_ai(prompt)["yaml"]
|
||||||
|
|
||||||
new_conf = yaml.safe_load(new_yaml_text)
|
new_conf = yaml.safe_load(new_yaml_text)
|
||||||
|
|
@ -468,7 +658,9 @@ def add_new_conference(url: str, yaml_path: str) -> bool:
|
||||||
assert isinstance(new_conf, dict)
|
assert isinstance(new_conf, dict)
|
||||||
|
|
||||||
validate_country(new_conf)
|
validate_country(new_conf)
|
||||||
|
normalize_dates_field(new_conf)
|
||||||
normalise_end_field(new_conf, source_text)
|
normalise_end_field(new_conf, source_text)
|
||||||
|
normalize_dates_field(new_conf)
|
||||||
|
|
||||||
if detected_coordinates is not None:
|
if detected_coordinates is not None:
|
||||||
new_conf["latitude"] = detected_coordinates[0]
|
new_conf["latitude"] = detected_coordinates[0]
|
||||||
|
|
|
||||||
|
|
@ -26,6 +26,19 @@ def test_extract_google_maps_latlon_at_pattern() -> None:
|
||||||
assert result == (51.5242464, -0.0997024)
|
assert result == (51.5242464, -0.0997024)
|
||||||
|
|
||||||
|
|
||||||
|
def test_url_has_year_component() -> None:
|
||||||
|
"""Only actual year or edition components should count as year-specific."""
|
||||||
|
cases = [
|
||||||
|
("https://www.foss4gna.org/", False),
|
||||||
|
("https://foss4g.asia/2026/", True),
|
||||||
|
("https://2027.fossy.ca/", True),
|
||||||
|
("https://www.socallinuxexpo.org/scale/24x/", True),
|
||||||
|
("https://2026.stateofthebrowser.com/", True),
|
||||||
|
]
|
||||||
|
for url, expected in cases:
|
||||||
|
assert add_new_conference.url_has_year_component(url) is expected
|
||||||
|
|
||||||
|
|
||||||
def test_insert_sorted_allows_same_url_different_year_without_year_component() -> None:
|
def test_insert_sorted_allows_same_url_different_year_without_year_component() -> None:
|
||||||
"""The same non-year-specific URL can be reused for a different year."""
|
"""The same non-year-specific URL can be reused for a different year."""
|
||||||
conferences: list[dict[str, typing.Any]] = [
|
conferences: list[dict[str, typing.Any]] = [
|
||||||
|
|
@ -74,6 +87,86 @@ def test_insert_sorted_supports_nested_dates() -> None:
|
||||||
assert [conf["name"] for conf in updated] == ["FOSDEM", "PyCascades"]
|
assert [conf["name"] for conf in updated] == ["FOSDEM", "PyCascades"]
|
||||||
|
|
||||||
|
|
||||||
|
def test_insert_sorted_updates_inexact_existing_entry() -> None:
|
||||||
|
"""Exact dates should replace an existing inexact series entry."""
|
||||||
|
conferences: list[dict[str, typing.Any]] = [
|
||||||
|
{
|
||||||
|
"name": "PyCascades",
|
||||||
|
"series": "pycascades",
|
||||||
|
"topic": "Python",
|
||||||
|
"location": "Seattle, Washington",
|
||||||
|
"dates": {
|
||||||
|
"status": "approximate",
|
||||||
|
"label": "March 2027",
|
||||||
|
"earliest": date(2027, 3, 1),
|
||||||
|
"latest": date(2027, 3, 31),
|
||||||
|
},
|
||||||
|
"url": "https://2027.pycascades.com/",
|
||||||
|
}
|
||||||
|
]
|
||||||
|
new_conf: dict[str, typing.Any] = {
|
||||||
|
"name": "PyCascades",
|
||||||
|
"series": "pycascades",
|
||||||
|
"topic": "Python",
|
||||||
|
"location": "Seattle, Washington",
|
||||||
|
"dates": {
|
||||||
|
"status": "exact",
|
||||||
|
"start": date(2027, 3, 12),
|
||||||
|
"end": date(2027, 3, 14),
|
||||||
|
},
|
||||||
|
"url": "https://2027.pycascades.com/",
|
||||||
|
"venue": "Example Hall",
|
||||||
|
}
|
||||||
|
|
||||||
|
updated = add_new_conference.insert_sorted(conferences, new_conf)
|
||||||
|
|
||||||
|
assert len(updated) == 1
|
||||||
|
assert updated[0]["dates"]["status"] == "exact"
|
||||||
|
assert updated[0]["dates"]["start"] == date(2027, 3, 12)
|
||||||
|
assert updated[0]["venue"] == "Example Hall"
|
||||||
|
|
||||||
|
|
||||||
|
def test_normalize_dates_field_moves_legacy_dates() -> None:
|
||||||
|
"""Legacy start/end model output should be converted before writing YAML."""
|
||||||
|
conf: dict[str, typing.Any] = {
|
||||||
|
"name": "PyCon",
|
||||||
|
"start": date(2026, 4, 10),
|
||||||
|
"end": date(2026, 4, 12),
|
||||||
|
}
|
||||||
|
|
||||||
|
add_new_conference.normalize_dates_field(conf)
|
||||||
|
|
||||||
|
assert "start" not in conf
|
||||||
|
assert "end" not in conf
|
||||||
|
assert conf["dates"] == {
|
||||||
|
"status": "exact",
|
||||||
|
"start": date(2026, 4, 10),
|
||||||
|
"end": date(2026, 4, 12),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def test_build_prompt_includes_nested_dates_and_series() -> None:
|
||||||
|
"""The prompt should describe nested dates and known series IDs."""
|
||||||
|
prompt = add_new_conference.build_prompt(
|
||||||
|
"https://example.com",
|
||||||
|
"Conference details",
|
||||||
|
None,
|
||||||
|
{
|
||||||
|
"pycascades": {
|
||||||
|
"name": "PyCascades",
|
||||||
|
"topic": "Python",
|
||||||
|
"usual_location": "Seattle, Washington",
|
||||||
|
"country": "us",
|
||||||
|
}
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
assert "Do not output legacy top-level `start`, `end`, or `date_status`" in prompt
|
||||||
|
assert "dates.status" in prompt
|
||||||
|
assert "- pycascades: PyCascades" in prompt
|
||||||
|
assert "March 2027" in prompt
|
||||||
|
|
||||||
|
|
||||||
def test_validate_country_normalises_name() -> None:
|
def test_validate_country_normalises_name() -> None:
|
||||||
"""Country names should be normalised to alpha-2 codes."""
|
"""Country names should be normalised to alpha-2 codes."""
|
||||||
conf: dict[str, typing.Any] = {"country": "United Kingdom"}
|
conf: dict[str, typing.Any] = {"country": "United Kingdom"}
|
||||||
|
|
@ -197,6 +290,76 @@ def test_add_new_conference_updates_yaml(
|
||||||
assert len(written) == 2
|
assert len(written) == 2
|
||||||
assert written[1]["name"] == "NewConf"
|
assert written[1]["name"] == "NewConf"
|
||||||
assert written[1]["country"] == "us"
|
assert written[1]["country"] == "us"
|
||||||
assert written[1]["end"] == date(2026, 5, 3)
|
assert written[1]["dates"] == {
|
||||||
|
"status": "exact",
|
||||||
|
"start": date(2026, 5, 3),
|
||||||
|
"end": date(2026, 5, 3),
|
||||||
|
}
|
||||||
assert written[1]["latitude"] == 40.0
|
assert written[1]["latitude"] == 40.0
|
||||||
assert written[1]["longitude"] == -74.0
|
assert written[1]["longitude"] == -74.0
|
||||||
|
|
||||||
|
|
||||||
|
def test_add_new_conference_reuses_generic_url_for_new_year(
|
||||||
|
tmp_path: typing.Any, monkeypatch: pytest.MonkeyPatch
|
||||||
|
) -> None:
|
||||||
|
"""Generic URLs with digits in the domain should not be skipped early."""
|
||||||
|
yaml_path = tmp_path / "conferences.yaml"
|
||||||
|
yaml_path.write_text(
|
||||||
|
yaml.dump(
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"name": "FOSS4G North America",
|
||||||
|
"series": "foss4g-north-america",
|
||||||
|
"dates": {
|
||||||
|
"status": "exact",
|
||||||
|
"start": date(2025, 11, 3),
|
||||||
|
"end": date(2025, 11, 5),
|
||||||
|
},
|
||||||
|
"url": "https://www.foss4gna.org/",
|
||||||
|
}
|
||||||
|
],
|
||||||
|
sort_keys=False,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
root = lxml.html.fromstring("<html><body>Conference details</body></html>")
|
||||||
|
monkeypatch.setattr(add_new_conference, "fetch_webpage", lambda url: root)
|
||||||
|
monkeypatch.setattr(
|
||||||
|
add_new_conference,
|
||||||
|
"webpage_to_text",
|
||||||
|
lambda parsed: "FOSS4G North America 2026",
|
||||||
|
)
|
||||||
|
monkeypatch.setattr(
|
||||||
|
add_new_conference, "detect_page_coordinates", lambda parsed: None
|
||||||
|
)
|
||||||
|
monkeypatch.setattr(
|
||||||
|
add_new_conference,
|
||||||
|
"get_from_open_ai",
|
||||||
|
lambda prompt: {
|
||||||
|
"yaml": yaml.dump(
|
||||||
|
{
|
||||||
|
"name": "FOSS4G North America",
|
||||||
|
"series": "foss4g-north-america",
|
||||||
|
"topic": "Geospatial",
|
||||||
|
"location": "St. Louis, Missouri",
|
||||||
|
"country": "us",
|
||||||
|
"dates": {
|
||||||
|
"status": "exact",
|
||||||
|
"start": date(2026, 10, 26),
|
||||||
|
"end": date(2026, 10, 29),
|
||||||
|
},
|
||||||
|
"url": "https://www.foss4gna.org/",
|
||||||
|
},
|
||||||
|
sort_keys=False,
|
||||||
|
)
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
added = add_new_conference.add_new_conference(
|
||||||
|
"https://www.foss4gna.org/", str(yaml_path)
|
||||||
|
)
|
||||||
|
|
||||||
|
assert added is True
|
||||||
|
written = yaml.safe_load(yaml_path.read_text())
|
||||||
|
assert len(written) == 2
|
||||||
|
assert [conf["dates"]["start"].year for conf in written] == [2025, 2026]
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue