Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions common/test-utils.js
Original file line number Diff line number Diff line change
Expand Up @@ -106,8 +106,8 @@ const setupCacheMock = (dirname, suffix) => {
// A hash ID value was added to BFI cache. Instead of updating all manual
// recording filenames, let's remove the ID here so they continue to map.
if (filename.startsWith("bfi.org.uk-")) {
const [prefix, , ...remainder] = filename.split("-");
filename = `${prefix}-${remainder.join("-")}`;
const [prefix, venue, ...remainder] = filename.split("-");
if (venue !== "bfi") filename = `${prefix}-${remainder.join("-")}`;
}
return path.join(
dirname,
Expand Down
5 changes: 5 additions & 0 deletions sources/bfi.org.uk-bfi-festivals/attributes.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
module.exports = {
id: "bfi.org.uk-bfi-festivals",
name: "BFI Festivals",
url: "https://whatson.bfi.org.uk/flare/Online/",
};
223 changes: 223 additions & 0 deletions sources/bfi.org.uk-bfi-festivals/find-events.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,223 @@
const path = require("node:path");
const cheerio = require("cheerio");
const {
generateShowingId,
createOverview,
createPerformance,
getText,
createAccessibility,
readJSON,
} = require("../../common/utils");
const { venueMatchesCinema } = require("../../common/source-utils");
const { parseDate } = require("../../common/bfi.org.uk/utils");
const attributes = require("./attributes");

// Indices within each articleContext.searchResults entry (confirmed from live data)
const RESULT_DATETIME = 7; // "Friday 20 March 2026 15:30"
const RESULT_YEAR = 11; // "2026"
const RESULT_BOOKING_URL = 18; // "default.asp?doWork::WScontent..."
const RESULT_SCREEN_FULL_NAME = 64; // "BFI Southbank, Screen NFT1"

function getOverviewFor($) {
const overview = {
categories: "",
directors: "",
actors: "",
};

const $showInfo = $("ul.Film-info__information li");
$showInfo.each(function () {
const heading = getText(
$(this).find(".Film-info__information__heading"),
).toLowerCase();
const content = getText($(this).find(".Film-info__information__value"));

if (
(heading === "director" || heading === "director-screenwriter") &&
!overview.directors
) {
overview.directors = content;
} else if (heading === "with" && !overview.actors) {
overview.actors = content;
} else if (heading === "certificate" && !overview.classification) {
overview.classification = content;
} else {
const hasTimings = content.match(/\s+(\d{4}).\s+(\d+)min(?:\s|$)/i);
if (hasTimings && !overview.year) {
overview.year = hasTimings[1];
}
if (hasTimings && !overview.duration) {
overview.duration = hasTimings[2];
}
}
});

return createOverview(overview);
}

function getAccessibilityFlagsForType(typeText) {
if (typeText === "Relaxed screening") {
return { relaxed: true };
}
if (typeText === "Descriptive Subtitles") {
return { hardOfHearing: true };
}
if (typeText === "BSL") {
return { hardOfHearing: true };
}
if (typeText === "Live captioned") {
return { hardOfHearing: true };
}
if (typeText.startsWith("Closed Captions")) {
return { hardOfHearing: true };
}
if (typeText === "Audio Description") {
return { audioDescription: true };
}
console.warn(
`[bfi.org.uk-bfi-festivals] Unrecognised access screening type: "${typeText}"`,
);
return null;
}

function buildAccessibilityByTime($, searchResults) {
const accessibilityByTime = new Map();

// Find the "Access screenings" content section
const $accessHeading = $(".Film-info__content__heading").filter(function () {
return getText($(this)) === "Access screenings";
});

if (!$accessHeading.length) return accessibilityByTime;

const $content = $accessHeading
.closest(".Film-info__content")
.find(".Film-info__content__content");

// Get year from the first performance entry to parse partial access dates
const firstResult = searchResults[0];
if (!firstResult) return accessibilityByTime;
const year = firstResult[RESULT_YEAR];

$content.find("p").each(function () {
const $p = $(this);
const strongText = getText($p.find("strong"));
if (!strongText) return;

// "Wednesday 25 March 16:00" → "Wednesday 25 March 2026 16:00"
const fullDateStr = strongText.replace(/(\d{2}:\d{2})$/, `${year} $1`);
let date;
try {
date = parseDate(fullDateStr);
} catch {
console.warn(
`[bfi.org.uk-bfi-festivals] Unable to parse access screening date: "${strongText}"`,
);
return;
}

// Collect flags from all text nodes following a <br>
let flags = {};
for (const br of $p.find("br").toArray()) {
const typeText = br.nextSibling?.data?.trim();
if (!typeText) continue;
const brFlags = getAccessibilityFlagsForType(typeText);
if (brFlags) flags = { ...flags, ...brFlags };
}

if (Object.keys(flags).length === 0) return;

const existing = accessibilityByTime.get(date.getTime()) ?? {};
accessibilityByTime.set(date.getTime(), { ...existing, ...flags });
});

return accessibilityByTime;
}

async function findEvents(cinema) {
const dataSrc = path.join(
process.cwd(),
"retrieved-data",
"bfi.org.uk-bfi-festivals",
);

let data = {};
try {
data = await readJSON(dataSrc);
} catch {
return [];
}

const moviePages = data.moviePages ?? {};
const events = [];

for (const [articleUrl, movie] of Object.entries(moviePages)) {
const { html, searchResults, domain, festival } = movie;

if (!searchResults || searchResults.length === 0) continue;

const $ = cheerio.load(html);
const title = searchResults[0][5];

const overview = getOverviewFor($);

const $articleBody = $(".main-article-body").clone();
$articleBody.find(".Breadcrumbs,.Booking").remove();
$articleBody
.find(".Film-info__content__heading")
.filter(function () {
return getText($(this)) === "Access screenings";
})
.parent()
.remove();
$articleBody.find("script").remove();
const overviewText = getText($articleBody)
.split("\n")
.map((line) => line.trim())
.filter(Boolean)
.join("\n");

const accessibilityByTime = buildAccessibilityByTime($, searchResults);

const performances = [];
for (const result of searchResults) {
const [venueName, screen] = result[RESULT_SCREEN_FULL_NAME].split(", ");
if (!venueMatchesCinema(cinema, venueName)) continue;

const date = parseDate(result[RESULT_DATETIME]);
const accessibilityFlags = accessibilityByTime.get(date.getTime()) ?? {};

performances.push(
createPerformance({
date,
url: `${domain}${result[RESULT_BOOKING_URL]}`,
screen,
notesList: [`Part of the ${festival} festival`],
accessibility: createAccessibility(
title,
accessibilityFlags,
overviewText,
),
}),
);
}

if (performances.length === 0) continue;

const slug = new URL(articleUrl).searchParams.get(
"BOparam::WScontent::loadArticle::permalink",
);
events.push({
showingId: generateShowingId(attributes, slug),
title,
url: articleUrl,
overview,
performances,
matchingHints: { overview: overviewText },
});
}

return events;
}

module.exports = findEvents;
9 changes: 9 additions & 0 deletions sources/bfi.org.uk-bfi-festivals/index.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
const attributes = require("./attributes");
const retrieve = require("./retrieve");
const findEvents = require("./find-events");

module.exports = {
attributes,
retrieve,
findEvents,
};
110 changes: 110 additions & 0 deletions sources/bfi.org.uk-bfi-festivals/retrieve.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
const cheerio = require("cheerio");
const getPageWithPlaywright = require("../../common/get-page-with-playwright");

const FESTIVALS = [
{
id: "flare",
name: "BFI Flare",
azUrl:
"https://whatson.bfi.org.uk/flare/Online/default.asp?BOparam::WScontent::loadArticle::permalink=flare-films-az",
domain: "https://whatson.bfi.org.uk/flare/Online/",
},
];

async function getAzPage(festival) {
return getPageWithPlaywright(
festival.azUrl,
`bfi.org.uk-bfi-festivals-az-${festival.id}`,
async (page) => {
try {
await page.waitForLoadState("networkidle");
} catch {
// If this fails it timed out — keep going and let the next wait handle it
}
await page.locator("#content").waitFor({ strict: false });
return page.content();
},
);
}

async function getMoviePage(articleUrl) {
const slug = new URL(articleUrl).searchParams.get(
"BOparam::WScontent::loadArticle::permalink",
);
return getPageWithPlaywright(
articleUrl,
`bfi.org.uk-bfi-festivals-${slug}`,
async (page) => {
try {
await page.waitForLoadState("networkidle");
} catch {
// If this fails it timed out — keep going and let the next wait handle it
}

const errorLocator = page
.locator("#content h2")
.filter({ hasText: /500 - internal server error/i });
const validContentLocator = page.locator(".Film-info__information");

await errorLocator
.or(validContentLocator.first())
.waitFor({ state: "attached" });

if (await errorLocator.isVisible()) {
const errorText = await errorLocator.textContent();
return new Error(`Error page detected - ${errorText}`);
}

const html = await page.content();
if (typeof html !== "string" || html.length === 0) {
return new Error(`Empty page contents at ${articleUrl}`);
}

const searchResults = await page.evaluate(
// eslint-disable-next-line no-undef
() => window.articleContext?.searchResults ?? null,
);

return { html, searchResults };
},
);
}

async function retrieve() {
const movieListPages = {};
const moviePages = {};

for (const festival of FESTIVALS) {
console.log(` - Retrieving A-Z page for ${festival.name} ...`);
const html = await getAzPage(festival);
movieListPages[festival.id] = html;

const $ = cheerio.load(html);
const articleUrls = new Set();

$(".main-article-body .Rich-text li > a").each(function () {
const articleUrl = $(this).attr("href");
articleUrls.add(articleUrl);
});

for (const url of articleUrls) {
const absoluteUrl = `${festival.domain}${url}`;
console.log(
` - [${Date.now()}] Getting data for article ${absoluteUrl} ...`,
);
const result = await getMoviePage(absoluteUrl);

if (result instanceof Error) throw result;

moviePages[absoluteUrl] = {
...result,
domain: festival.domain,
festival: festival.name,
};
}
}

return { movieListPages, moviePages };
}

module.exports = retrieve;

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Loading