* fix: warnings about Duplicate message id * fix: paragon's Hyperlink no longer accepts a 'content' attribute * test: ensure all act() calls are async * test: Removed "async" from "describe" * fix: DiscussionsSettings tests * Don't nest userAction.click in act() -- nested act() statements have indeterminent behaviour. * Use getBy* instead of findBy* with userAction to avoid nested act() statements * Always await userEvent.click * Use fireEvent.click when the onClick handlers need to be called * Use queryBy* instead of getBy* when using .toBeInTheDocument or * fix: typo in data-testid * test: Use useLocation to test route changes * Don't nest userAction.click in act() -- nested act() statements have * chore: fix lint:fix and lint errors * remove "indent" setting from .eslintrc.js * add @typescript-eslint/ prefix to eslint-disable-line statements where flagged by linter * changed stylelint setting import-notation to "string" * test: fix failing tests after upgrade * fix: css error "target selector was not found" * chore: upgrades dependency frontend-lib-content-components@2.3.0 * chore: bumps @edx/frontend-component-ai-translations to ^2.1.0 --------- Co-authored-by: Yusuf Musleh <yusuf@opencraft.com>
390 lines
16 KiB
JavaScript
390 lines
16 KiB
JavaScript
// @ts-check
|
|
import { camelCaseObject, getConfig } from '@edx/frontend-platform';
|
|
import { getAuthenticatedHttpClient } from '@edx/frontend-platform/auth';
|
|
|
|
export const getContentSearchConfigUrl = () => new URL(
|
|
'api/content_search/v2/studio/',
|
|
getConfig().STUDIO_BASE_URL,
|
|
).href;
|
|
|
|
/** The separator used for hierarchical tags in the search index, e.g. tags.level1 = "Subject > Math > Calculus" */
|
|
export const TAG_SEP = ' > ';
|
|
|
|
export const highlightPreTag = '__meili-highlight__'; // Indicate the start of a highlighted (matching) term
|
|
export const highlightPostTag = '__/meili-highlight__'; // Indicate the end of a highlighted (matching) term
|
|
|
|
/**
|
|
* Get the content search configuration from the CMS.
|
|
*
|
|
* @returns {Promise<{url: string, indexName: string, apiKey: string}>}
|
|
*/
|
|
export const getContentSearchConfig = async () => {
|
|
const url = getContentSearchConfigUrl();
|
|
const response = await getAuthenticatedHttpClient().get(url);
|
|
return {
|
|
url: response.data.url,
|
|
indexName: response.data.index_name,
|
|
apiKey: response.data.api_key,
|
|
};
|
|
};
|
|
|
|
/**
|
|
* Detailed "content" of an XBlock/component, from the block's index_dictionary function. Contents depends on the type.
|
|
* @typedef {{htmlContent?: string, capaContent?: string, [k: string]: any}} ContentDetails
|
|
*/
|
|
|
|
/**
|
|
* Meilisearch filters can be expressed as strings or arrays.
|
|
* This helper method converts from any supported input format to an array, for consistency.
|
|
* @param {import('meilisearch').Filter} [filter] A filter expression, e.g. 'foo = bar' or [['a = b', 'a = c'], 'd = e']
|
|
* @returns {(string | string[])[]}
|
|
*/
|
|
function forceArray(filter) {
|
|
if (typeof filter === 'string') {
|
|
return [filter];
|
|
}
|
|
if (filter === undefined) {
|
|
return [];
|
|
}
|
|
return filter;
|
|
}
|
|
|
|
/**
|
|
* Given tag paths like ["Difficulty > Hard", "Subject > Math"], convert them to an array of Meilisearch
|
|
* filter conditions. The tag filters are all AND conditions (not OR).
|
|
* @param {string[]} [tagsFilter] e.g. ["Difficulty > Hard", "Subject > Math"]
|
|
* @returns {string[]}
|
|
*/
|
|
function formatTagsFilter(tagsFilter) {
|
|
/** @type {string[]} */
|
|
const filters = [];
|
|
|
|
tagsFilter?.forEach((tagPath) => {
|
|
const parts = tagPath.split(TAG_SEP);
|
|
if (parts.length === 1) {
|
|
filters.push(`tags.taxonomy = "${tagPath}"`);
|
|
} else {
|
|
filters.push(`tags.level${parts.length - 2} = "${tagPath}"`);
|
|
}
|
|
});
|
|
|
|
return filters;
|
|
}
|
|
|
|
/**
|
|
* Information about a single XBlock returned in the search results
|
|
* Defined in edx-platform/openedx/core/djangoapps/content/search/documents.py
|
|
* @typedef {Object} ContentHit
|
|
* @property {string} id
|
|
* @property {string} usageKey
|
|
* @property {"course_block"|"library_block"} type
|
|
* @property {string} blockId
|
|
* @property {string} displayName
|
|
* @property {string} blockType The block_type part of the usage key. What type of XBlock this is.
|
|
* @property {string} contextKey The course or library ID
|
|
* @property {string} org
|
|
* @property {[{displayName: string}, ...Array<{displayName: string, usageKey: string}>]} breadcrumbs
|
|
* First one is the name of the course/library itself.
|
|
* After that is the name and usage key of any parent Section/Subsection/Unit/etc.
|
|
* @property {Record<'taxonomy'|'level0'|'level1'|'level2'|'level3', string[]>} tags
|
|
* @property {ContentDetails} [content]
|
|
* @property {{displayName: string, content: ContentDetails}} formatted Same fields with <mark>...</mark> highlights
|
|
*/
|
|
|
|
/**
|
|
* Convert search hits to camelCase
|
|
* @param {Record<string, any>} hit A search result directly from Meilisearch
|
|
* @returns {ContentHit}
|
|
*/
|
|
function formatSearchHit(hit) {
|
|
// eslint-disable-next-line @typescript-eslint/naming-convention
|
|
const { _formatted, ...newHit } = hit;
|
|
newHit.formatted = {
|
|
displayName: _formatted.display_name,
|
|
content: _formatted.content ?? {},
|
|
};
|
|
return camelCaseObject(newHit);
|
|
}
|
|
|
|
/**
|
|
* @param {{
|
|
* client: import('meilisearch').MeiliSearch,
|
|
* indexName: string,
|
|
* searchKeywords: string,
|
|
* blockTypesFilter?: string[],
|
|
* tagsFilter?: string[],
|
|
* extraFilter?: import('meilisearch').Filter,
|
|
* offset?: number,
|
|
* }} context
|
|
* @returns {Promise<{
|
|
* hits: ContentHit[],
|
|
* nextOffset: number|undefined,
|
|
* totalHits: number,
|
|
* blockTypes: Record<string, number>,
|
|
* }>}
|
|
*/
|
|
export async function fetchSearchResults({
|
|
client,
|
|
indexName,
|
|
searchKeywords,
|
|
blockTypesFilter,
|
|
/** The full path of tags that each result MUST have, e.g. ["Difficulty > Hard", "Subject > Math"] */
|
|
tagsFilter,
|
|
extraFilter,
|
|
/** How many results to skip, e.g. if limit=20 then passing offset=20 gets the second page. */
|
|
offset = 0,
|
|
}) {
|
|
/** @type {import('meilisearch').MultiSearchQuery[]} */
|
|
const queries = [];
|
|
|
|
// Convert 'extraFilter' into an array
|
|
const extraFilterFormatted = forceArray(extraFilter);
|
|
|
|
const blockTypesFilterFormatted = blockTypesFilter?.length ? [blockTypesFilter.map(bt => `block_type = ${bt}`)] : [];
|
|
|
|
const tagsFilterFormatted = formatTagsFilter(tagsFilter);
|
|
|
|
const limit = 20; // How many results to retrieve per page.
|
|
|
|
// First query is always to get the hits, with all the filters applied.
|
|
queries.push({
|
|
indexUid: indexName,
|
|
q: searchKeywords,
|
|
filter: [
|
|
// top-level entries in the array are AND conditions and must all match
|
|
// Inner arrays are OR conditions, where only one needs to match.
|
|
...extraFilterFormatted,
|
|
...blockTypesFilterFormatted,
|
|
...tagsFilterFormatted,
|
|
],
|
|
attributesToHighlight: ['display_name', 'content'],
|
|
highlightPreTag,
|
|
highlightPostTag,
|
|
attributesToCrop: ['content'],
|
|
cropLength: 20,
|
|
offset,
|
|
limit,
|
|
});
|
|
|
|
// The second query is to get the possible values for the "block types" filter
|
|
queries.push({
|
|
indexUid: indexName,
|
|
q: searchKeywords,
|
|
facets: ['block_type'],
|
|
filter: [
|
|
...extraFilterFormatted,
|
|
// We exclude the block type filter here so we get all the other available options for it.
|
|
...tagsFilterFormatted,
|
|
],
|
|
limit: 0, // We don't need any "hits" for this - just the facetDistribution
|
|
});
|
|
|
|
const { results } = await client.multiSearch(({ queries }));
|
|
return {
|
|
hits: results[0].hits.map(formatSearchHit),
|
|
totalHits: results[0].totalHits ?? results[0].estimatedTotalHits ?? results[0].hits.length,
|
|
blockTypes: results[1].facetDistribution?.block_type ?? {},
|
|
nextOffset: results[0].hits.length === limit ? offset + limit : undefined,
|
|
};
|
|
}
|
|
|
|
/**
|
|
* In the context of a particular search (which may already be filtered to a specific course, specific block types,
|
|
* and/or have a keyword search applied), get the tree of tags that can be used to further filter/refine the search.
|
|
*
|
|
* @param {object} context
|
|
* @param {import('meilisearch').MeiliSearch} context.client The Meilisearch client instance
|
|
* @param {string} context.indexName Which index to search
|
|
* @param {string} context.searchKeywords Overall query string for the search; may be empty
|
|
* @param {string[]} [context.blockTypesFilter] Filter to only include these block types e.g. ["problem", "html"]
|
|
* @param {import('meilisearch').Filter} [context.extraFilter] Any other filters to apply, e.g. course ID.
|
|
* @param {string} [context.parentTagPath] Only fetch tags below this parent tag/taxonomy e.g. "Places > North America"
|
|
* @returns {Promise<{
|
|
* tags: {tagName: string, tagPath: string, tagCount: number, hasChildren: boolean}[];
|
|
* mayBeMissingResults: boolean;
|
|
* }>}
|
|
*/
|
|
export async function fetchAvailableTagOptions({
|
|
client,
|
|
indexName,
|
|
searchKeywords,
|
|
blockTypesFilter,
|
|
extraFilter,
|
|
parentTagPath,
|
|
// Ideally this would include 'tagSearchKeywords' to filter the tag tree by keyword search but that's not possible yet
|
|
}) {
|
|
const meilisearchFacetLimit = 100; // The 'maxValuesPerFacet' on the index. For Open edX we leave the default, 100.
|
|
|
|
// Convert 'extraFilter' into an array
|
|
const extraFilterFormatted = forceArray(extraFilter);
|
|
|
|
const blockTypesFilterFormatted = blockTypesFilter?.length ? [blockTypesFilter.map(bt => `block_type = ${bt}`)] : [];
|
|
|
|
// Figure out which "facet" (attribute of the documents in the search index) holds the tags at the level we want.
|
|
// e.g. "tags.taxonomy" is the facet/attribute that holds the root tags, and "tags.level0" has its child tags.
|
|
let facetName;
|
|
let depth;
|
|
/** @type {string[]} */
|
|
let parentFilter = [];
|
|
if (!parentTagPath) {
|
|
facetName = 'tags.taxonomy';
|
|
depth = 0;
|
|
} else {
|
|
const parentParts = parentTagPath.split(TAG_SEP);
|
|
depth = parentParts.length;
|
|
facetName = `tags.level${depth - 1}`;
|
|
const parentFacetName = parentParts.length === 1 ? 'tags.taxonomy' : `tags.level${parentParts.length - 2}`;
|
|
parentFilter = [`${parentFacetName} = "${parentTagPath}"`];
|
|
}
|
|
|
|
// As an optimization, start pre-loading the data about "has child tags", if we will need it later.
|
|
// Notice we don't 'await' the result of this request, so it can happen in parallel with the main request that follows
|
|
const maybeHasChildren = depth > 0 && depth < 4; // If depth=0, it definitely has children; we don't support depth > 4
|
|
const nextLevelFacet = `tags.level${depth}`; // This will give the children of the current tags.
|
|
const preloadChildTagsData = maybeHasChildren ? client.index(indexName).searchForFacetValues({
|
|
facetName: nextLevelFacet,
|
|
facetQuery: parentTagPath,
|
|
q: searchKeywords,
|
|
filter: [...extraFilterFormatted, ...blockTypesFilterFormatted, ...parentFilter],
|
|
}) : undefined;
|
|
|
|
// Now load the facet values. Doing it with this API gives us much more flexibility in loading than if we just
|
|
// requested the facets by passing { facets: ["tags"] } into the main search request; that works fine for loading the
|
|
// root tags but can't load specific child tags like we can using this approach.
|
|
/** @type {{tagName: string, tagPath: string, tagCount: number, hasChildren: boolean}[]} */
|
|
const tags = [];
|
|
const { facetHits } = await client.index(indexName).searchForFacetValues({
|
|
facetName,
|
|
// It's not super clear in the documentation, but facetQuery is basically a "startsWith" query, which is what we
|
|
// need here to return just the tags below the selected parent tag. However, it's a fuzzy query that may match
|
|
// more tags than we want it to, so we have to explicitly post-process and reduce the set of results using an
|
|
// exact match.
|
|
facetQuery: parentTagPath,
|
|
q: searchKeywords,
|
|
filter: [...extraFilterFormatted, ...blockTypesFilterFormatted, ...parentFilter],
|
|
});
|
|
facetHits.forEach(({ value: tagPath, count: tagCount }) => {
|
|
if (!parentTagPath) {
|
|
tags.push({
|
|
tagName: tagPath,
|
|
tagPath,
|
|
tagCount,
|
|
hasChildren: true, // You can't tag something with just a taxonomy, so this definitely has child tags.
|
|
});
|
|
} else {
|
|
const parts = tagPath.split(TAG_SEP);
|
|
const tagName = parts[parts.length - 1];
|
|
if (tagPath === `${parentTagPath}${TAG_SEP}${tagName}`) {
|
|
tags.push({
|
|
tagName,
|
|
tagPath,
|
|
tagCount,
|
|
hasChildren: false, // We'll set this later
|
|
});
|
|
} // Else this is a tag from another taxonomy/parent that was included because this search is "fuzzy". Ignore it.
|
|
}
|
|
});
|
|
|
|
// Figure out if [some of] the tags at this level have children:
|
|
if (maybeHasChildren) {
|
|
if (preloadChildTagsData === undefined) { throw new Error('Child tags data unexpectedly not pre-loaded'); }
|
|
// Retrieve the children of the current tags:
|
|
const { facetHits: childFacetHits } = await preloadChildTagsData;
|
|
if (childFacetHits.length >= meilisearchFacetLimit) {
|
|
// Assume they all have child tags; we can't retrieve more than 100 facet values (per Meilisearch docs) so
|
|
// we can't say for sure on a tag-by-tag basis, but we know that at least some of them have children, so
|
|
// it's a safe bet that most/all of them have children. And it's not a huge problem if we say they have children
|
|
// but they don't.
|
|
// eslint-disable-next-line no-param-reassign
|
|
tags.forEach((t) => { t.hasChildren = true; });
|
|
} else if (childFacetHits.length > 0) {
|
|
// Some (or maybe all) of these tags have child tags. Let's figure out which ones exactly.
|
|
/** @type {Set<string>} */
|
|
const tagsWithChildren = new Set();
|
|
childFacetHits.forEach(({ value }) => {
|
|
// Trim the child tag off: 'Places > North America > New York' becomes 'Places > North America'
|
|
const tagPath = value.split(TAG_SEP).slice(0, -1).join(TAG_SEP);
|
|
tagsWithChildren.add(tagPath);
|
|
});
|
|
// eslint-disable-next-line no-param-reassign
|
|
tags.forEach((t) => { t.hasChildren = tagsWithChildren.has(t.tagPath); });
|
|
}
|
|
}
|
|
|
|
// If we hit the limit of facetHits, there are probably even more tags, but there is no API to retrieve
|
|
// them (no pagination etc.), so just tell the user that not all tags could be displayed. This should be pretty rare.
|
|
return { tags, mayBeMissingResults: facetHits.length >= meilisearchFacetLimit };
|
|
}
|
|
|
|
/**
|
|
* Best-effort search for *all* tags among the search results (with filters applied) that contain the given keyword.
|
|
*
|
|
* Unfortunately there is no good Meilisearch API for this, so we just have to do the best we can. If more than 1,000
|
|
* objects are tagged with matching tags, this will be an incomplete result. For example, if 1,000 XBlocks/components
|
|
* are tagged with "Tag Alpha 1" and 10 XBlocks are tagged with "Tag Alpha 2", a search for "Alpha" may only return
|
|
* ["Tag Alpha 1"] instead of the correct result ["Tag Alpha 1", "Tag Alpha 2"] because we are limited to 1,000 matches,
|
|
* which may all have the same tags.
|
|
*
|
|
* @param {object} context
|
|
* @param {import('meilisearch').MeiliSearch} context.client The Meilisearch client instance
|
|
* @param {string} context.indexName Which index to search
|
|
* @param {string[]} [context.blockTypesFilter] Filter to only include these block types e.g. ["problem", "html"]
|
|
* @param {import('meilisearch').Filter} [context.extraFilter] Any other filters to apply to the overall search.
|
|
* @param {string} [context.tagSearchKeywords] Only show taxonomies/tags that match these keywords
|
|
* @returns {Promise<{ mayBeMissingResults: boolean; matches: {tagPath: string}[] }>}
|
|
*/
|
|
export async function fetchTagsThatMatchKeyword({
|
|
client,
|
|
indexName,
|
|
blockTypesFilter,
|
|
extraFilter,
|
|
tagSearchKeywords,
|
|
}) {
|
|
if (!tagSearchKeywords || tagSearchKeywords.trim() === '') {
|
|
// This data isn't needed if there is no tag keyword search. Don't bother making a search query.
|
|
return { matches: [], mayBeMissingResults: false };
|
|
}
|
|
// Convert 'extraFilter' into an array
|
|
const extraFilterFormatted = forceArray(extraFilter);
|
|
|
|
const blockTypesFilterFormatted = blockTypesFilter?.length ? [blockTypesFilter.map(bt => `block_type = ${bt}`)] : [];
|
|
|
|
const limit = 1000; // This is the most results we can retrieve in a single query.
|
|
|
|
// We search for any matches of the keyword in the "tags" field, respecting the current filters like block type filter
|
|
// or current course filter. (Unfortunately we cannot also include the overall `searchKeywords` so this will match
|
|
// against more content than it should.)
|
|
const { hits } = await client.index(indexName).search(tagSearchKeywords, {
|
|
filter: [...extraFilterFormatted, ...blockTypesFilterFormatted],
|
|
attributesToSearchOn: ['tags.taxonomy', 'tags.level0', 'tags.level1', 'tags.level2', 'tags.level3'],
|
|
attributesToRetrieve: ['tags'],
|
|
limit,
|
|
// We'd like to use 'showMatchesPosition: true' to know exaclty which tags match, but it doesn't provide the
|
|
// detail we need; it's impossible to tell which tag at a given level matched based on the returned _matchesPosition
|
|
// data - https://github.com/orgs/meilisearch/discussions/550
|
|
});
|
|
|
|
const tagSearchKeywordsLower = tagSearchKeywords.toLocaleLowerCase();
|
|
|
|
/** @type {Set<string>} */
|
|
const matches = new Set();
|
|
|
|
// We have data like this:
|
|
// hits: [
|
|
// {
|
|
// tags: { taxonomy: "Competency", "level0": "Competency > Abilities", "level1": "Competency > Abilities > ..." },
|
|
// }, ...
|
|
// ]
|
|
hits.forEach((hit) => {
|
|
Object.values(hit.tags).forEach((tagPathList) => {
|
|
tagPathList.forEach((tagPath) => {
|
|
if (tagPath.toLocaleLowerCase().includes(tagSearchKeywordsLower)) {
|
|
matches.add(tagPath);
|
|
}
|
|
});
|
|
});
|
|
});
|
|
|
|
return { matches: Array.from(matches).map((tagPath) => ({ tagPath })), mayBeMissingResults: hits.length === limit };
|
|
}
|