Files
frontend-app-authoring/src/search-modal/data/api.js
Jillian 6b6d3aaa7a Upgrade frontend-build to v14 (#1052)
* fix: warnings about Duplicate message id
* fix: paragon's Hyperlink no longer accepts a 'content' attribute
* test: ensure all act() calls are async
* test: Removed "async" from "describe"
* fix: DiscussionsSettings tests
* Don't nest userAction.click in act() -- nested act() statements have
  indeterminent behaviour.
* Use getBy* instead of findBy* with userAction to avoid nested act() statements
* Always await userEvent.click
* Use fireEvent.click when the onClick handlers need to be called
* Use queryBy* instead of getBy* when using .toBeInTheDocument or 
* fix: typo in data-testid
* test: Use useLocation to test route changes
* Don't nest userAction.click in act() -- nested act() statements have
* chore: fix lint:fix and lint errors
* remove "indent" setting from .eslintrc.js
* add @typescript-eslint/ prefix to eslint-disable-line statements where flagged by linter
* changed stylelint setting import-notation to "string"
* test: fix failing tests after upgrade
* fix: css error "target selector was not found"
* chore: upgrades dependency frontend-lib-content-components@2.3.0
* chore: bumps @edx/frontend-component-ai-translations to ^2.1.0

---------

Co-authored-by: Yusuf Musleh <yusuf@opencraft.com>
2024-06-22 00:14:46 +05:30

390 lines
16 KiB
JavaScript

// @ts-check
import { camelCaseObject, getConfig } from '@edx/frontend-platform';
import { getAuthenticatedHttpClient } from '@edx/frontend-platform/auth';
export const getContentSearchConfigUrl = () => new URL(
'api/content_search/v2/studio/',
getConfig().STUDIO_BASE_URL,
).href;
/** The separator used for hierarchical tags in the search index, e.g. tags.level1 = "Subject > Math > Calculus" */
export const TAG_SEP = ' > ';
export const highlightPreTag = '__meili-highlight__'; // Indicate the start of a highlighted (matching) term
export const highlightPostTag = '__/meili-highlight__'; // Indicate the end of a highlighted (matching) term
/**
* Get the content search configuration from the CMS.
*
* @returns {Promise<{url: string, indexName: string, apiKey: string}>}
*/
export const getContentSearchConfig = async () => {
const url = getContentSearchConfigUrl();
const response = await getAuthenticatedHttpClient().get(url);
return {
url: response.data.url,
indexName: response.data.index_name,
apiKey: response.data.api_key,
};
};
/**
* Detailed "content" of an XBlock/component, from the block's index_dictionary function. Contents depends on the type.
* @typedef {{htmlContent?: string, capaContent?: string, [k: string]: any}} ContentDetails
*/
/**
* Meilisearch filters can be expressed as strings or arrays.
* This helper method converts from any supported input format to an array, for consistency.
* @param {import('meilisearch').Filter} [filter] A filter expression, e.g. 'foo = bar' or [['a = b', 'a = c'], 'd = e']
* @returns {(string | string[])[]}
*/
function forceArray(filter) {
if (typeof filter === 'string') {
return [filter];
}
if (filter === undefined) {
return [];
}
return filter;
}
/**
* Given tag paths like ["Difficulty > Hard", "Subject > Math"], convert them to an array of Meilisearch
* filter conditions. The tag filters are all AND conditions (not OR).
* @param {string[]} [tagsFilter] e.g. ["Difficulty > Hard", "Subject > Math"]
* @returns {string[]}
*/
function formatTagsFilter(tagsFilter) {
/** @type {string[]} */
const filters = [];
tagsFilter?.forEach((tagPath) => {
const parts = tagPath.split(TAG_SEP);
if (parts.length === 1) {
filters.push(`tags.taxonomy = "${tagPath}"`);
} else {
filters.push(`tags.level${parts.length - 2} = "${tagPath}"`);
}
});
return filters;
}
/**
* Information about a single XBlock returned in the search results
* Defined in edx-platform/openedx/core/djangoapps/content/search/documents.py
* @typedef {Object} ContentHit
* @property {string} id
* @property {string} usageKey
* @property {"course_block"|"library_block"} type
* @property {string} blockId
* @property {string} displayName
* @property {string} blockType The block_type part of the usage key. What type of XBlock this is.
* @property {string} contextKey The course or library ID
* @property {string} org
* @property {[{displayName: string}, ...Array<{displayName: string, usageKey: string}>]} breadcrumbs
* First one is the name of the course/library itself.
* After that is the name and usage key of any parent Section/Subsection/Unit/etc.
* @property {Record<'taxonomy'|'level0'|'level1'|'level2'|'level3', string[]>} tags
* @property {ContentDetails} [content]
* @property {{displayName: string, content: ContentDetails}} formatted Same fields with <mark>...</mark> highlights
*/
/**
* Convert search hits to camelCase
* @param {Record<string, any>} hit A search result directly from Meilisearch
* @returns {ContentHit}
*/
function formatSearchHit(hit) {
// eslint-disable-next-line @typescript-eslint/naming-convention
const { _formatted, ...newHit } = hit;
newHit.formatted = {
displayName: _formatted.display_name,
content: _formatted.content ?? {},
};
return camelCaseObject(newHit);
}
/**
* @param {{
* client: import('meilisearch').MeiliSearch,
* indexName: string,
* searchKeywords: string,
* blockTypesFilter?: string[],
* tagsFilter?: string[],
* extraFilter?: import('meilisearch').Filter,
* offset?: number,
* }} context
* @returns {Promise<{
* hits: ContentHit[],
* nextOffset: number|undefined,
* totalHits: number,
* blockTypes: Record<string, number>,
* }>}
*/
export async function fetchSearchResults({
client,
indexName,
searchKeywords,
blockTypesFilter,
/** The full path of tags that each result MUST have, e.g. ["Difficulty > Hard", "Subject > Math"] */
tagsFilter,
extraFilter,
/** How many results to skip, e.g. if limit=20 then passing offset=20 gets the second page. */
offset = 0,
}) {
/** @type {import('meilisearch').MultiSearchQuery[]} */
const queries = [];
// Convert 'extraFilter' into an array
const extraFilterFormatted = forceArray(extraFilter);
const blockTypesFilterFormatted = blockTypesFilter?.length ? [blockTypesFilter.map(bt => `block_type = ${bt}`)] : [];
const tagsFilterFormatted = formatTagsFilter(tagsFilter);
const limit = 20; // How many results to retrieve per page.
// First query is always to get the hits, with all the filters applied.
queries.push({
indexUid: indexName,
q: searchKeywords,
filter: [
// top-level entries in the array are AND conditions and must all match
// Inner arrays are OR conditions, where only one needs to match.
...extraFilterFormatted,
...blockTypesFilterFormatted,
...tagsFilterFormatted,
],
attributesToHighlight: ['display_name', 'content'],
highlightPreTag,
highlightPostTag,
attributesToCrop: ['content'],
cropLength: 20,
offset,
limit,
});
// The second query is to get the possible values for the "block types" filter
queries.push({
indexUid: indexName,
q: searchKeywords,
facets: ['block_type'],
filter: [
...extraFilterFormatted,
// We exclude the block type filter here so we get all the other available options for it.
...tagsFilterFormatted,
],
limit: 0, // We don't need any "hits" for this - just the facetDistribution
});
const { results } = await client.multiSearch(({ queries }));
return {
hits: results[0].hits.map(formatSearchHit),
totalHits: results[0].totalHits ?? results[0].estimatedTotalHits ?? results[0].hits.length,
blockTypes: results[1].facetDistribution?.block_type ?? {},
nextOffset: results[0].hits.length === limit ? offset + limit : undefined,
};
}
/**
* In the context of a particular search (which may already be filtered to a specific course, specific block types,
* and/or have a keyword search applied), get the tree of tags that can be used to further filter/refine the search.
*
* @param {object} context
* @param {import('meilisearch').MeiliSearch} context.client The Meilisearch client instance
* @param {string} context.indexName Which index to search
* @param {string} context.searchKeywords Overall query string for the search; may be empty
* @param {string[]} [context.blockTypesFilter] Filter to only include these block types e.g. ["problem", "html"]
* @param {import('meilisearch').Filter} [context.extraFilter] Any other filters to apply, e.g. course ID.
* @param {string} [context.parentTagPath] Only fetch tags below this parent tag/taxonomy e.g. "Places > North America"
* @returns {Promise<{
* tags: {tagName: string, tagPath: string, tagCount: number, hasChildren: boolean}[];
* mayBeMissingResults: boolean;
* }>}
*/
export async function fetchAvailableTagOptions({
client,
indexName,
searchKeywords,
blockTypesFilter,
extraFilter,
parentTagPath,
// Ideally this would include 'tagSearchKeywords' to filter the tag tree by keyword search but that's not possible yet
}) {
const meilisearchFacetLimit = 100; // The 'maxValuesPerFacet' on the index. For Open edX we leave the default, 100.
// Convert 'extraFilter' into an array
const extraFilterFormatted = forceArray(extraFilter);
const blockTypesFilterFormatted = blockTypesFilter?.length ? [blockTypesFilter.map(bt => `block_type = ${bt}`)] : [];
// Figure out which "facet" (attribute of the documents in the search index) holds the tags at the level we want.
// e.g. "tags.taxonomy" is the facet/attribute that holds the root tags, and "tags.level0" has its child tags.
let facetName;
let depth;
/** @type {string[]} */
let parentFilter = [];
if (!parentTagPath) {
facetName = 'tags.taxonomy';
depth = 0;
} else {
const parentParts = parentTagPath.split(TAG_SEP);
depth = parentParts.length;
facetName = `tags.level${depth - 1}`;
const parentFacetName = parentParts.length === 1 ? 'tags.taxonomy' : `tags.level${parentParts.length - 2}`;
parentFilter = [`${parentFacetName} = "${parentTagPath}"`];
}
// As an optimization, start pre-loading the data about "has child tags", if we will need it later.
// Notice we don't 'await' the result of this request, so it can happen in parallel with the main request that follows
const maybeHasChildren = depth > 0 && depth < 4; // If depth=0, it definitely has children; we don't support depth > 4
const nextLevelFacet = `tags.level${depth}`; // This will give the children of the current tags.
const preloadChildTagsData = maybeHasChildren ? client.index(indexName).searchForFacetValues({
facetName: nextLevelFacet,
facetQuery: parentTagPath,
q: searchKeywords,
filter: [...extraFilterFormatted, ...blockTypesFilterFormatted, ...parentFilter],
}) : undefined;
// Now load the facet values. Doing it with this API gives us much more flexibility in loading than if we just
// requested the facets by passing { facets: ["tags"] } into the main search request; that works fine for loading the
// root tags but can't load specific child tags like we can using this approach.
/** @type {{tagName: string, tagPath: string, tagCount: number, hasChildren: boolean}[]} */
const tags = [];
const { facetHits } = await client.index(indexName).searchForFacetValues({
facetName,
// It's not super clear in the documentation, but facetQuery is basically a "startsWith" query, which is what we
// need here to return just the tags below the selected parent tag. However, it's a fuzzy query that may match
// more tags than we want it to, so we have to explicitly post-process and reduce the set of results using an
// exact match.
facetQuery: parentTagPath,
q: searchKeywords,
filter: [...extraFilterFormatted, ...blockTypesFilterFormatted, ...parentFilter],
});
facetHits.forEach(({ value: tagPath, count: tagCount }) => {
if (!parentTagPath) {
tags.push({
tagName: tagPath,
tagPath,
tagCount,
hasChildren: true, // You can't tag something with just a taxonomy, so this definitely has child tags.
});
} else {
const parts = tagPath.split(TAG_SEP);
const tagName = parts[parts.length - 1];
if (tagPath === `${parentTagPath}${TAG_SEP}${tagName}`) {
tags.push({
tagName,
tagPath,
tagCount,
hasChildren: false, // We'll set this later
});
} // Else this is a tag from another taxonomy/parent that was included because this search is "fuzzy". Ignore it.
}
});
// Figure out if [some of] the tags at this level have children:
if (maybeHasChildren) {
if (preloadChildTagsData === undefined) { throw new Error('Child tags data unexpectedly not pre-loaded'); }
// Retrieve the children of the current tags:
const { facetHits: childFacetHits } = await preloadChildTagsData;
if (childFacetHits.length >= meilisearchFacetLimit) {
// Assume they all have child tags; we can't retrieve more than 100 facet values (per Meilisearch docs) so
// we can't say for sure on a tag-by-tag basis, but we know that at least some of them have children, so
// it's a safe bet that most/all of them have children. And it's not a huge problem if we say they have children
// but they don't.
// eslint-disable-next-line no-param-reassign
tags.forEach((t) => { t.hasChildren = true; });
} else if (childFacetHits.length > 0) {
// Some (or maybe all) of these tags have child tags. Let's figure out which ones exactly.
/** @type {Set<string>} */
const tagsWithChildren = new Set();
childFacetHits.forEach(({ value }) => {
// Trim the child tag off: 'Places > North America > New York' becomes 'Places > North America'
const tagPath = value.split(TAG_SEP).slice(0, -1).join(TAG_SEP);
tagsWithChildren.add(tagPath);
});
// eslint-disable-next-line no-param-reassign
tags.forEach((t) => { t.hasChildren = tagsWithChildren.has(t.tagPath); });
}
}
// If we hit the limit of facetHits, there are probably even more tags, but there is no API to retrieve
// them (no pagination etc.), so just tell the user that not all tags could be displayed. This should be pretty rare.
return { tags, mayBeMissingResults: facetHits.length >= meilisearchFacetLimit };
}
/**
* Best-effort search for *all* tags among the search results (with filters applied) that contain the given keyword.
*
* Unfortunately there is no good Meilisearch API for this, so we just have to do the best we can. If more than 1,000
* objects are tagged with matching tags, this will be an incomplete result. For example, if 1,000 XBlocks/components
* are tagged with "Tag Alpha 1" and 10 XBlocks are tagged with "Tag Alpha 2", a search for "Alpha" may only return
* ["Tag Alpha 1"] instead of the correct result ["Tag Alpha 1", "Tag Alpha 2"] because we are limited to 1,000 matches,
* which may all have the same tags.
*
* @param {object} context
* @param {import('meilisearch').MeiliSearch} context.client The Meilisearch client instance
* @param {string} context.indexName Which index to search
* @param {string[]} [context.blockTypesFilter] Filter to only include these block types e.g. ["problem", "html"]
* @param {import('meilisearch').Filter} [context.extraFilter] Any other filters to apply to the overall search.
* @param {string} [context.tagSearchKeywords] Only show taxonomies/tags that match these keywords
* @returns {Promise<{ mayBeMissingResults: boolean; matches: {tagPath: string}[] }>}
*/
export async function fetchTagsThatMatchKeyword({
client,
indexName,
blockTypesFilter,
extraFilter,
tagSearchKeywords,
}) {
if (!tagSearchKeywords || tagSearchKeywords.trim() === '') {
// This data isn't needed if there is no tag keyword search. Don't bother making a search query.
return { matches: [], mayBeMissingResults: false };
}
// Convert 'extraFilter' into an array
const extraFilterFormatted = forceArray(extraFilter);
const blockTypesFilterFormatted = blockTypesFilter?.length ? [blockTypesFilter.map(bt => `block_type = ${bt}`)] : [];
const limit = 1000; // This is the most results we can retrieve in a single query.
// We search for any matches of the keyword in the "tags" field, respecting the current filters like block type filter
// or current course filter. (Unfortunately we cannot also include the overall `searchKeywords` so this will match
// against more content than it should.)
const { hits } = await client.index(indexName).search(tagSearchKeywords, {
filter: [...extraFilterFormatted, ...blockTypesFilterFormatted],
attributesToSearchOn: ['tags.taxonomy', 'tags.level0', 'tags.level1', 'tags.level2', 'tags.level3'],
attributesToRetrieve: ['tags'],
limit,
// We'd like to use 'showMatchesPosition: true' to know exaclty which tags match, but it doesn't provide the
// detail we need; it's impossible to tell which tag at a given level matched based on the returned _matchesPosition
// data - https://github.com/orgs/meilisearch/discussions/550
});
const tagSearchKeywordsLower = tagSearchKeywords.toLocaleLowerCase();
/** @type {Set<string>} */
const matches = new Set();
// We have data like this:
// hits: [
// {
// tags: { taxonomy: "Competency", "level0": "Competency > Abilities", "level1": "Competency > Abilities > ..." },
// }, ...
// ]
hits.forEach((hit) => {
Object.values(hit.tags).forEach((tagPathList) => {
tagPathList.forEach((tagPath) => {
if (tagPath.toLocaleLowerCase().includes(tagSearchKeywordsLower)) {
matches.add(tagPath);
}
});
});
});
return { matches: Array.from(matches).map((tagPath) => ({ tagPath })), mayBeMissingResults: hits.length === limit };
}