import { camelCaseObject, getConfig } from '@edx/frontend-platform'; import { getAuthenticatedHttpClient } from '@edx/frontend-platform/auth'; import type { Filter, MeiliSearch, MultiSearchQuery, } from 'meilisearch'; import { ContainerType } from '../../generic/key-utils'; export const getContentSearchConfigUrl = () => new URL( 'api/content_search/v2/studio/', getConfig().STUDIO_BASE_URL, ).href; export const HIGHLIGHT_PRE_TAG = '__meili-highlight__'; // Indicate the start of a highlighted (matching) term export const HIGHLIGHT_POST_TAG = '__/meili-highlight__'; // Indicate the end of a highlighted (matching) term /** The separator used for hierarchical tags in the search index, e.g. tags.level1 = "Subject > Math > Calculus" */ export const TAG_SEP = ' > '; export enum SearchSortOption { RELEVANCE = '', // Default; sorts results by keyword search ranking TITLE_AZ = 'display_name:asc', TITLE_ZA = 'display_name:desc', NEWEST = 'created:desc', OLDEST = 'created:asc', RECENTLY_PUBLISHED = 'last_published:desc', RECENTLY_MODIFIED = 'modified:desc', } export enum PublishStatus { Published = 'published', Modified = 'modified', NeverPublished = 'never', } export const allPublishFilters: PublishStatus[] = Object.values(PublishStatus); /** * Get the content search configuration from the CMS. */ export const getContentSearchConfig = async (): Promise<{ url: string, indexName: string, apiKey: string }> => { const url = getContentSearchConfigUrl(); const response = await getAuthenticatedHttpClient().get(url); return { url: response.data.url, indexName: response.data.index_name, apiKey: response.data.api_key, }; }; /** * Detailed "content" of an XBlock/component, from the block's index_dictionary function. Contents depends on the type. */ export interface ContentDetails { htmlContent?: string; capaContent?: string; childUsageKeys?: Array; childDisplayNames?: Array; [k: string]: any; } /** * Meilisearch filters can be expressed as strings or arrays. * This helper method converts from any supported input format to an array, for consistency. * @param filter A filter expression, e.g. `'foo = bar'` or `[['a = b', 'a = c'], 'd = e']` */ export function forceArray(filter?: Filter): string[] { if (typeof filter === 'string') { return [filter]; } if (Array.isArray(filter)) { return filter as string[]; } return []; } /** * Given tag paths like ["Difficulty > Hard", "Subject > Math"], convert them to an array of Meilisearch * filter conditions. The tag filters are all AND conditions (not OR). * @param tagsFilter e.g. `["Difficulty > Hard", "Subject > Math"]` */ function formatTagsFilter(tagsFilter?: string[]): string[] { const filters: string[] = []; tagsFilter?.forEach((tagPath) => { const parts = tagPath.split(TAG_SEP); if (parts.length === 1) { filters.push(`tags.taxonomy = "${tagPath}"`); } else { filters.push(`tags.level${parts.length - 2} = "${tagPath}"`); } }); return filters; } /** * The tags that are associated with a search result, at various levels of the tag hierarchy. */ export interface ContentHitTags { taxonomy?: string[]; level0?: string[]; level1?: string[]; level2?: string[]; level3?: string[]; } /** * Information about a single XBlock returned in the search results * Defined in edx-platform/openedx/core/djangoapps/content/search/documents.py */ interface BaseContentHit { id: string; type: 'course_block' | 'library_block' | 'collection' | 'library_container'; displayName: string; usageKey: string; blockId: string; /** The course or library ID */ contextKey: string; org: string; breadcrumbs: Array<{ displayName: string }>; tags: ContentHitTags; /** Same fields with ... highlights */ formatted: { displayName: string, content?: ContentDetails, description?: string }; created: number; modified: number; } /** * Information about a single XBlock returned in the search results * Defined in edx-platform/openedx/core/djangoapps/content/search/documents.py */ export interface ContentHit extends BaseContentHit { /** The block_type part of the usage key. What type of XBlock this is. */ blockType: string; /** * Breadcrumbs: * - First one is the name of the course/library itself. * - After that is the name and usage key of any parent Section/Subsection/Unit/etc. */ type: 'course_block' | 'library_block'; breadcrumbs: [ { displayName: string }, ...Array<{ displayName: string, usageKey: string }>, ]; description?: string; content?: ContentDetails; lastPublished: number | null; collections: { displayName?: string[], key?: string[] }; units: { displayName?: string[], key?: string[] }; published?: ContentPublishedData; publishStatus: PublishStatus; formatted: BaseContentHit['formatted'] & { published?: ContentPublishedData, }; } /** * Information about the published data of single Xblock returned in search results * Defined in edx-platform/openedx/core/djangoapps/content/search/documents.py */ export interface ContentPublishedData { description?: string; displayName?: string; numChildren?: number; content?: ContentDetails; } /** * Information about a single collection returned in the search results * Defined in edx-platform/openedx/core/djangoapps/content/search/documents.py */ export interface CollectionHit extends BaseContentHit { type: 'collection'; description: string; numChildren?: number; published?: ContentPublishedData; } /** * Information about a single container returned in the search results * Defined in edx-platform/openedx/core/djangoapps/content/search/documents.py */ interface ContainerHitContent { childUsageKeys?: string[], childDisplayNames?: string[], } export interface ContainerHit extends BaseContentHit { type: 'library_container'; blockType: ContainerType; // This should be expanded to include other container types numChildren?: number; published?: ContentPublishedData; publishStatus: PublishStatus; formatted: BaseContentHit['formatted'] & { published?: ContentPublishedData, }; content?: ContainerHitContent; sections?: { displayName?: string[], key?: string[] }; subsections?: { displayName?: string[], key?: string[] }; } export type HitType = ContentHit | CollectionHit | ContainerHit; /** * Convert search hits to camelCase * @param hit A search result directly from Meilisearch */ export function formatSearchHit(hit: Record): HitType { // eslint-disable-next-line @typescript-eslint/naming-convention const { _formatted, ...newHit } = hit; newHit.formatted = { displayName: _formatted?.display_name, content: _formatted?.content ?? {}, description: _formatted?.description, published: _formatted?.published, }; return camelCaseObject(newHit); } interface FetchSearchParams { client: MeiliSearch, indexName: string, searchKeywords: string, blockTypesFilter?: string[], problemTypesFilter?: string[], publishStatusFilter?: PublishStatus[], /** The full path of tags that each result MUST have, e.g. ["Difficulty > Hard", "Subject > Math"] */ tagsFilter?: string[], extraFilter?: Filter, sort?: SearchSortOption[], /** How many results to skip, e.g. if limit=20 then passing offset=20 gets the second page. */ offset?: number, skipBlockTypeFetch?: boolean, limit?: number, } export async function fetchSearchResults({ client, indexName, searchKeywords, blockTypesFilter, problemTypesFilter, publishStatusFilter, tagsFilter, extraFilter, sort, offset = 0, skipBlockTypeFetch = false, limit = 20, }: FetchSearchParams): Promise<{ hits: HitType[], nextOffset: number | undefined, totalHits: number, blockTypes: Record, problemTypes: Record, publishStatus: Record, }> { const queries: MultiSearchQuery[] = []; // Convert 'extraFilter' into an array const extraFilterFormatted = forceArray(extraFilter); const blockTypesFilterFormatted = blockTypesFilter?.length ? [blockTypesFilter.map(bt => `block_type = ${bt}`)] : []; const problemTypesFilterFormatted = problemTypesFilter?.length ? [problemTypesFilter.map(pt => `content.problem_types = ${pt}`)] : []; const publishStatusFilterFormatted = publishStatusFilter?.length ? [publishStatusFilter.map(ps => `publish_status = ${ps}`)] : []; const tagsFilterFormatted = formatTagsFilter(tagsFilter); // To filter normal block types and problem types as 'OR' query const typeFilters = [[ ...blockTypesFilterFormatted, ...problemTypesFilterFormatted, ].flat()]; // First query is always to get the hits, with all the filters applied. queries.push({ indexUid: indexName, q: searchKeywords, filter: [ // top-level entries in the array are AND conditions and must all match // Inner arrays are OR conditions, where only one needs to match. ...typeFilters, ...extraFilterFormatted, ...tagsFilterFormatted, ...publishStatusFilterFormatted, ], attributesToHighlight: ['display_name', 'description', 'published'], highlightPreTag: HIGHLIGHT_PRE_TAG, highlightPostTag: HIGHLIGHT_POST_TAG, attributesToCrop: ['description', 'published'], sort, offset, limit, }); // The second query is to get the possible values for the "block types" filter if (!skipBlockTypeFetch) { queries.push({ // We send search keywords so that the search results coincide with the filter counts. q: searchKeywords, indexUid: indexName, facets: ['block_type', 'content.problem_types', 'publish_status'], filter: [ ...extraFilterFormatted, // We exclude the block type filter here so we get all the other available options for it. ...tagsFilterFormatted, ], limit: 0, // We don't need any "hits" for this - just the facetDistribution }); } const { results } = await client.multiSearch(({ queries })); const hitLength = results[0].hits.length; return { hits: results[0].hits.map(formatSearchHit) as ContentHit[], totalHits: results[0].totalHits ?? results[0].estimatedTotalHits ?? hitLength, blockTypes: results[1]?.facetDistribution?.block_type ?? {}, problemTypes: results[1]?.facetDistribution?.['content.problem_types'] ?? {}, publishStatus: results[1]?.facetDistribution?.publish_status ?? {}, nextOffset: hitLength === limit ? offset + limit : undefined, }; } /** * Fetch the block types facet distribution for the search results. */ export const fetchBlockTypes = async ( client: MeiliSearch, indexName: string, extraFilter?: Filter, ): Promise> => { // Convert 'extraFilter' into an array const extraFilterFormatted = forceArray(extraFilter); const { results } = await client.multiSearch({ queries: [{ indexUid: indexName, facets: ['block_type'], filter: extraFilterFormatted, limit: 0, // We don't need any "hits" for this - just the facetDistribution }], }); return results[0].facetDistribution?.block_type ?? {}; }; /** Information about a single tag in the tag tree, as returned by fetchAvailableTagOptions() */ export interface TagEntry { tagName: string; tagPath: string; tagCount: number; hasChildren: boolean; } /** * In the context of a particular search (which may already be filtered to a specific course, specific block types, * and/or have a keyword search applied), get the tree of tags that can be used to further filter/refine the search. */ export async function fetchAvailableTagOptions({ client, indexName, searchKeywords, blockTypesFilter, extraFilter, parentTagPath, // Ideally this would include 'tagSearchKeywords' to filter the tag tree by keyword search but that's not possible yet }: { /** The Meilisearch client instance */ client: MeiliSearch; /** Which index to search */ indexName: string; /** Overall query string for the search; may be empty */ searchKeywords: string; /** Filter to only include these block types e.g. ["problem", "html"] */ blockTypesFilter?: string[]; /** Any other filters to apply, e.g. course ID. */ extraFilter?: Filter; /** Only fetch tags below this parent tag/taxonomy e.g. "Places > North America" */ parentTagPath?: string; }): Promise<{ tags: TagEntry[]; mayBeMissingResults: boolean; }> { const meilisearchFacetLimit = 100; // The 'maxValuesPerFacet' on the index. For Open edX we leave the default, 100. // Convert 'extraFilter' into an array const extraFilterFormatted = forceArray(extraFilter); const blockTypesFilterFormatted = blockTypesFilter?.length ? [blockTypesFilter.map(bt => `block_type = ${bt}`)] : []; // Figure out which "facet" (attribute of the documents in the search index) holds the tags at the level we want. // e.g. "tags.taxonomy" is the facet/attribute that holds the root tags, and "tags.level0" has its child tags. let facetName; let depth; let parentFilter: string[] = []; if (!parentTagPath) { facetName = 'tags.taxonomy'; depth = 0; } else { const parentParts = parentTagPath.split(TAG_SEP); depth = parentParts.length; facetName = `tags.level${depth - 1}`; const parentFacetName = parentParts.length === 1 ? 'tags.taxonomy' : `tags.level${parentParts.length - 2}`; parentFilter = [`${parentFacetName} = "${parentTagPath}"`]; } // As an optimization, start pre-loading the data about "has child tags", if we will need it later. // Notice we don't 'await' the result of this request, so it can happen in parallel with the main request that follows const maybeHasChildren = depth > 0 && depth < 4; // If depth=0, it definitely has children; we don't support depth > 4 const nextLevelFacet = `tags.level${depth}`; // This will give the children of the current tags. const preloadChildTagsData = maybeHasChildren ? client.index(indexName).searchForFacetValues({ facetName: nextLevelFacet, facetQuery: parentTagPath, q: searchKeywords, filter: [...extraFilterFormatted, ...blockTypesFilterFormatted, ...parentFilter], }) : undefined; // Now load the facet values. Doing it with this API gives us much more flexibility in loading than if we just // requested the facets by passing { facets: ["tags"] } into the main search request; that works fine for loading the // root tags but can't load specific child tags like we can using this approach. const tags: TagEntry[] = []; const { facetHits } = await client.index(indexName).searchForFacetValues({ facetName, // It's not super clear in the documentation, but facetQuery is basically a "startsWith" query, which is what we // need here to return just the tags below the selected parent tag. However, it's a fuzzy query that may match // more tags than we want it to, so we have to explicitly post-process and reduce the set of results using an // exact match. facetQuery: parentTagPath, q: searchKeywords, filter: [...extraFilterFormatted, ...blockTypesFilterFormatted, ...parentFilter], }); facetHits.forEach(({ value: tagPath, count: tagCount }) => { if (!parentTagPath) { tags.push({ tagName: tagPath, tagPath, tagCount, hasChildren: true, // You can't tag something with just a taxonomy, so this definitely has child tags. }); } else { const parts = tagPath.split(TAG_SEP); const tagName = parts[parts.length - 1]; if (tagPath === `${parentTagPath}${TAG_SEP}${tagName}`) { tags.push({ tagName, tagPath, tagCount, hasChildren: false, // We'll set this later }); } // Else this is a tag from another taxonomy/parent that was included because this search is "fuzzy". Ignore it. } }); // Figure out if [some of] the tags at this level have children: if (maybeHasChildren) { if (preloadChildTagsData === undefined) { throw new Error('Child tags data unexpectedly not pre-loaded'); } // Retrieve the children of the current tags: const { facetHits: childFacetHits } = await preloadChildTagsData; if (childFacetHits.length >= meilisearchFacetLimit) { // Assume they all have child tags; we can't retrieve more than 100 facet values (per Meilisearch docs) so // we can't say for sure on a tag-by-tag basis, but we know that at least some of them have children, so // it's a safe bet that most/all of them have children. And it's not a huge problem if we say they have children // but they don't. // eslint-disable-next-line no-param-reassign tags.forEach((t) => { t.hasChildren = true; }); } else if (childFacetHits.length > 0) { // Some (or maybe all) of these tags have child tags. Let's figure out which ones exactly. const tagsWithChildren = new Set(); childFacetHits.forEach(({ value }) => { // Trim the child tag off: 'Places > North America > New York' becomes 'Places > North America' const tagPath = value.split(TAG_SEP).slice(0, -1).join(TAG_SEP); tagsWithChildren.add(tagPath); }); // eslint-disable-next-line no-param-reassign tags.forEach((t) => { t.hasChildren = tagsWithChildren.has(t.tagPath); }); } } // If we hit the limit of facetHits, there are probably even more tags, but there is no API to retrieve // them (no pagination etc.), so just tell the user that not all tags could be displayed. This should be pretty rare. return { tags, mayBeMissingResults: facetHits.length >= meilisearchFacetLimit }; } /** * Best-effort search for *all* tags among the search results (with filters applied) that contain the given keyword. * * Unfortunately there is no good Meilisearch API for this, so we just have to do the best we can. If more than 1,000 * objects are tagged with matching tags, this will be an incomplete result. For example, if 1,000 XBlocks/components * are tagged with "Tag Alpha 1" and 10 XBlocks are tagged with "Tag Alpha 2", a search for "Alpha" may only return * ["Tag Alpha 1"] instead of the correct result ["Tag Alpha 1", "Tag Alpha 2"] because we are limited to 1,000 matches, * which may all have the same tags. */ export async function fetchTagsThatMatchKeyword({ client, indexName, blockTypesFilter, extraFilter, tagSearchKeywords, }: { /** The Meilisearch client instance */ client: MeiliSearch; /** Which index to search */ indexName: string; /** Filter to only include these block types e.g. `["problem", "html"]` */ blockTypesFilter?: string[]; /** Any other filters to apply to the overall search. */ extraFilter?: Filter; /** Only show taxonomies/tags that match these keywords */ tagSearchKeywords?: string; }): Promise<{ mayBeMissingResults: boolean; matches: { tagPath: string }[] }> { if (!tagSearchKeywords || tagSearchKeywords.trim() === '') { // This data isn't needed if there is no tag keyword search. Don't bother making a search query. return { matches: [], mayBeMissingResults: false }; } // Convert 'extraFilter' into an array const extraFilterFormatted = forceArray(extraFilter); const blockTypesFilterFormatted = blockTypesFilter?.length ? [blockTypesFilter.map(bt => `block_type = ${bt}`)] : []; const limit = 1000; // This is the most results we can retrieve in a single query. // We search for any matches of the keyword in the "tags" field, respecting the current filters like block type filter // or current course filter. (Unfortunately we cannot also include the overall `searchKeywords` so this will match // against more content than it should.) const { hits } = await client.index(indexName).search(tagSearchKeywords, { filter: [...extraFilterFormatted, ...blockTypesFilterFormatted], attributesToSearchOn: ['tags.taxonomy', 'tags.level0', 'tags.level1', 'tags.level2', 'tags.level3'], attributesToRetrieve: ['tags'], limit, // TODO: improve this - use 'showMatchesPosition: true' to know exactly which tags match. Previously it didn't // provide the detail we need (https://github.com/orgs/meilisearch/discussions/550) but it has now been implemented // in newer versions of Meilisearch. See https://github.com/meilisearch/meilisearch/pull/5005 which fixes it. }); const tagSearchKeywordsLower = tagSearchKeywords.toLocaleLowerCase(); const matches = new Set(); // We have data like this: // hits: [ // { // tags: { // taxonomy: ["Competency"], // level0: ["Competency > Abilities"], // level1: ["Competency > Abilities > ..."] // }, ... // }, ... // ] hits.forEach((hit) => { Object.values(hit.tags).forEach((tagPathList: string[]) => { tagPathList.forEach((tagPath) => { if (tagPath.toLocaleLowerCase().includes(tagSearchKeywordsLower)) { matches.add(tagPath); } }); }); }); return { matches: Array.from(matches).map((tagPath) => ({ tagPath })), mayBeMissingResults: hits.length === limit }; }