Improve conference importer date handling

This commit is contained in:
Edward Betts 2026-06-22 12:41:33 +01:00
parent 56eea3f7a3
commit dbce9e5358
2 changed files with 401 additions and 46 deletions

View file

@ -26,6 +26,19 @@ def test_extract_google_maps_latlon_at_pattern() -> None:
assert result == (51.5242464, -0.0997024)
def test_url_has_year_component() -> None:
"""Only actual year or edition components should count as year-specific."""
cases = [
("https://www.foss4gna.org/", False),
("https://foss4g.asia/2026/", True),
("https://2027.fossy.ca/", True),
("https://www.socallinuxexpo.org/scale/24x/", True),
("https://2026.stateofthebrowser.com/", True),
]
for url, expected in cases:
assert add_new_conference.url_has_year_component(url) is expected
def test_insert_sorted_allows_same_url_different_year_without_year_component() -> None:
"""The same non-year-specific URL can be reused for a different year."""
conferences: list[dict[str, typing.Any]] = [
@ -74,6 +87,86 @@ def test_insert_sorted_supports_nested_dates() -> None:
assert [conf["name"] for conf in updated] == ["FOSDEM", "PyCascades"]
def test_insert_sorted_updates_inexact_existing_entry() -> None:
"""Exact dates should replace an existing inexact series entry."""
conferences: list[dict[str, typing.Any]] = [
{
"name": "PyCascades",
"series": "pycascades",
"topic": "Python",
"location": "Seattle, Washington",
"dates": {
"status": "approximate",
"label": "March 2027",
"earliest": date(2027, 3, 1),
"latest": date(2027, 3, 31),
},
"url": "https://2027.pycascades.com/",
}
]
new_conf: dict[str, typing.Any] = {
"name": "PyCascades",
"series": "pycascades",
"topic": "Python",
"location": "Seattle, Washington",
"dates": {
"status": "exact",
"start": date(2027, 3, 12),
"end": date(2027, 3, 14),
},
"url": "https://2027.pycascades.com/",
"venue": "Example Hall",
}
updated = add_new_conference.insert_sorted(conferences, new_conf)
assert len(updated) == 1
assert updated[0]["dates"]["status"] == "exact"
assert updated[0]["dates"]["start"] == date(2027, 3, 12)
assert updated[0]["venue"] == "Example Hall"
def test_normalize_dates_field_moves_legacy_dates() -> None:
"""Legacy start/end model output should be converted before writing YAML."""
conf: dict[str, typing.Any] = {
"name": "PyCon",
"start": date(2026, 4, 10),
"end": date(2026, 4, 12),
}
add_new_conference.normalize_dates_field(conf)
assert "start" not in conf
assert "end" not in conf
assert conf["dates"] == {
"status": "exact",
"start": date(2026, 4, 10),
"end": date(2026, 4, 12),
}
def test_build_prompt_includes_nested_dates_and_series() -> None:
"""The prompt should describe nested dates and known series IDs."""
prompt = add_new_conference.build_prompt(
"https://example.com",
"Conference details",
None,
{
"pycascades": {
"name": "PyCascades",
"topic": "Python",
"usual_location": "Seattle, Washington",
"country": "us",
}
},
)
assert "Do not output legacy top-level `start`, `end`, or `date_status`" in prompt
assert "dates.status" in prompt
assert "- pycascades: PyCascades" in prompt
assert "March 2027" in prompt
def test_validate_country_normalises_name() -> None:
"""Country names should be normalised to alpha-2 codes."""
conf: dict[str, typing.Any] = {"country": "United Kingdom"}
@ -197,6 +290,76 @@ def test_add_new_conference_updates_yaml(
assert len(written) == 2
assert written[1]["name"] == "NewConf"
assert written[1]["country"] == "us"
assert written[1]["end"] == date(2026, 5, 3)
assert written[1]["dates"] == {
"status": "exact",
"start": date(2026, 5, 3),
"end": date(2026, 5, 3),
}
assert written[1]["latitude"] == 40.0
assert written[1]["longitude"] == -74.0
def test_add_new_conference_reuses_generic_url_for_new_year(
tmp_path: typing.Any, monkeypatch: pytest.MonkeyPatch
) -> None:
"""Generic URLs with digits in the domain should not be skipped early."""
yaml_path = tmp_path / "conferences.yaml"
yaml_path.write_text(
yaml.dump(
[
{
"name": "FOSS4G North America",
"series": "foss4g-north-america",
"dates": {
"status": "exact",
"start": date(2025, 11, 3),
"end": date(2025, 11, 5),
},
"url": "https://www.foss4gna.org/",
}
],
sort_keys=False,
)
)
root = lxml.html.fromstring("<html><body>Conference details</body></html>")
monkeypatch.setattr(add_new_conference, "fetch_webpage", lambda url: root)
monkeypatch.setattr(
add_new_conference,
"webpage_to_text",
lambda parsed: "FOSS4G North America 2026",
)
monkeypatch.setattr(
add_new_conference, "detect_page_coordinates", lambda parsed: None
)
monkeypatch.setattr(
add_new_conference,
"get_from_open_ai",
lambda prompt: {
"yaml": yaml.dump(
{
"name": "FOSS4G North America",
"series": "foss4g-north-america",
"topic": "Geospatial",
"location": "St. Louis, Missouri",
"country": "us",
"dates": {
"status": "exact",
"start": date(2026, 10, 26),
"end": date(2026, 10, 29),
},
"url": "https://www.foss4gna.org/",
},
sort_keys=False,
)
},
)
added = add_new_conference.add_new_conference(
"https://www.foss4gna.org/", str(yaml_path)
)
assert added is True
written = yaml.safe_load(yaml_path.read_text())
assert len(written) == 2
assert [conf["dates"]["start"].year for conf in written] == [2025, 2026]