Improve conference importer date handling
This commit is contained in:
parent
56eea3f7a3
commit
dbce9e5358
2 changed files with 401 additions and 46 deletions
|
|
@ -26,6 +26,19 @@ def test_extract_google_maps_latlon_at_pattern() -> None:
|
|||
assert result == (51.5242464, -0.0997024)
|
||||
|
||||
|
||||
def test_url_has_year_component() -> None:
|
||||
"""Only actual year or edition components should count as year-specific."""
|
||||
cases = [
|
||||
("https://www.foss4gna.org/", False),
|
||||
("https://foss4g.asia/2026/", True),
|
||||
("https://2027.fossy.ca/", True),
|
||||
("https://www.socallinuxexpo.org/scale/24x/", True),
|
||||
("https://2026.stateofthebrowser.com/", True),
|
||||
]
|
||||
for url, expected in cases:
|
||||
assert add_new_conference.url_has_year_component(url) is expected
|
||||
|
||||
|
||||
def test_insert_sorted_allows_same_url_different_year_without_year_component() -> None:
|
||||
"""The same non-year-specific URL can be reused for a different year."""
|
||||
conferences: list[dict[str, typing.Any]] = [
|
||||
|
|
@ -74,6 +87,86 @@ def test_insert_sorted_supports_nested_dates() -> None:
|
|||
assert [conf["name"] for conf in updated] == ["FOSDEM", "PyCascades"]
|
||||
|
||||
|
||||
def test_insert_sorted_updates_inexact_existing_entry() -> None:
|
||||
"""Exact dates should replace an existing inexact series entry."""
|
||||
conferences: list[dict[str, typing.Any]] = [
|
||||
{
|
||||
"name": "PyCascades",
|
||||
"series": "pycascades",
|
||||
"topic": "Python",
|
||||
"location": "Seattle, Washington",
|
||||
"dates": {
|
||||
"status": "approximate",
|
||||
"label": "March 2027",
|
||||
"earliest": date(2027, 3, 1),
|
||||
"latest": date(2027, 3, 31),
|
||||
},
|
||||
"url": "https://2027.pycascades.com/",
|
||||
}
|
||||
]
|
||||
new_conf: dict[str, typing.Any] = {
|
||||
"name": "PyCascades",
|
||||
"series": "pycascades",
|
||||
"topic": "Python",
|
||||
"location": "Seattle, Washington",
|
||||
"dates": {
|
||||
"status": "exact",
|
||||
"start": date(2027, 3, 12),
|
||||
"end": date(2027, 3, 14),
|
||||
},
|
||||
"url": "https://2027.pycascades.com/",
|
||||
"venue": "Example Hall",
|
||||
}
|
||||
|
||||
updated = add_new_conference.insert_sorted(conferences, new_conf)
|
||||
|
||||
assert len(updated) == 1
|
||||
assert updated[0]["dates"]["status"] == "exact"
|
||||
assert updated[0]["dates"]["start"] == date(2027, 3, 12)
|
||||
assert updated[0]["venue"] == "Example Hall"
|
||||
|
||||
|
||||
def test_normalize_dates_field_moves_legacy_dates() -> None:
|
||||
"""Legacy start/end model output should be converted before writing YAML."""
|
||||
conf: dict[str, typing.Any] = {
|
||||
"name": "PyCon",
|
||||
"start": date(2026, 4, 10),
|
||||
"end": date(2026, 4, 12),
|
||||
}
|
||||
|
||||
add_new_conference.normalize_dates_field(conf)
|
||||
|
||||
assert "start" not in conf
|
||||
assert "end" not in conf
|
||||
assert conf["dates"] == {
|
||||
"status": "exact",
|
||||
"start": date(2026, 4, 10),
|
||||
"end": date(2026, 4, 12),
|
||||
}
|
||||
|
||||
|
||||
def test_build_prompt_includes_nested_dates_and_series() -> None:
|
||||
"""The prompt should describe nested dates and known series IDs."""
|
||||
prompt = add_new_conference.build_prompt(
|
||||
"https://example.com",
|
||||
"Conference details",
|
||||
None,
|
||||
{
|
||||
"pycascades": {
|
||||
"name": "PyCascades",
|
||||
"topic": "Python",
|
||||
"usual_location": "Seattle, Washington",
|
||||
"country": "us",
|
||||
}
|
||||
},
|
||||
)
|
||||
|
||||
assert "Do not output legacy top-level `start`, `end`, or `date_status`" in prompt
|
||||
assert "dates.status" in prompt
|
||||
assert "- pycascades: PyCascades" in prompt
|
||||
assert "March 2027" in prompt
|
||||
|
||||
|
||||
def test_validate_country_normalises_name() -> None:
|
||||
"""Country names should be normalised to alpha-2 codes."""
|
||||
conf: dict[str, typing.Any] = {"country": "United Kingdom"}
|
||||
|
|
@ -197,6 +290,76 @@ def test_add_new_conference_updates_yaml(
|
|||
assert len(written) == 2
|
||||
assert written[1]["name"] == "NewConf"
|
||||
assert written[1]["country"] == "us"
|
||||
assert written[1]["end"] == date(2026, 5, 3)
|
||||
assert written[1]["dates"] == {
|
||||
"status": "exact",
|
||||
"start": date(2026, 5, 3),
|
||||
"end": date(2026, 5, 3),
|
||||
}
|
||||
assert written[1]["latitude"] == 40.0
|
||||
assert written[1]["longitude"] == -74.0
|
||||
|
||||
|
||||
def test_add_new_conference_reuses_generic_url_for_new_year(
|
||||
tmp_path: typing.Any, monkeypatch: pytest.MonkeyPatch
|
||||
) -> None:
|
||||
"""Generic URLs with digits in the domain should not be skipped early."""
|
||||
yaml_path = tmp_path / "conferences.yaml"
|
||||
yaml_path.write_text(
|
||||
yaml.dump(
|
||||
[
|
||||
{
|
||||
"name": "FOSS4G North America",
|
||||
"series": "foss4g-north-america",
|
||||
"dates": {
|
||||
"status": "exact",
|
||||
"start": date(2025, 11, 3),
|
||||
"end": date(2025, 11, 5),
|
||||
},
|
||||
"url": "https://www.foss4gna.org/",
|
||||
}
|
||||
],
|
||||
sort_keys=False,
|
||||
)
|
||||
)
|
||||
|
||||
root = lxml.html.fromstring("<html><body>Conference details</body></html>")
|
||||
monkeypatch.setattr(add_new_conference, "fetch_webpage", lambda url: root)
|
||||
monkeypatch.setattr(
|
||||
add_new_conference,
|
||||
"webpage_to_text",
|
||||
lambda parsed: "FOSS4G North America 2026",
|
||||
)
|
||||
monkeypatch.setattr(
|
||||
add_new_conference, "detect_page_coordinates", lambda parsed: None
|
||||
)
|
||||
monkeypatch.setattr(
|
||||
add_new_conference,
|
||||
"get_from_open_ai",
|
||||
lambda prompt: {
|
||||
"yaml": yaml.dump(
|
||||
{
|
||||
"name": "FOSS4G North America",
|
||||
"series": "foss4g-north-america",
|
||||
"topic": "Geospatial",
|
||||
"location": "St. Louis, Missouri",
|
||||
"country": "us",
|
||||
"dates": {
|
||||
"status": "exact",
|
||||
"start": date(2026, 10, 26),
|
||||
"end": date(2026, 10, 29),
|
||||
},
|
||||
"url": "https://www.foss4gna.org/",
|
||||
},
|
||||
sort_keys=False,
|
||||
)
|
||||
},
|
||||
)
|
||||
|
||||
added = add_new_conference.add_new_conference(
|
||||
"https://www.foss4gna.org/", str(yaml_path)
|
||||
)
|
||||
|
||||
assert added is True
|
||||
written = yaml.safe_load(yaml_path.read_text())
|
||||
assert len(written) == 2
|
||||
assert [conf["dates"]["start"].year for conf in written] == [2025, 2026]
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue