Merge "Add extracts to REST search as description"
diff --git a/extension.json b/extension.json
index abfb0ab..3f3e55c 100644
--- a/extension.json
+++ b/extension.json
@@ -35,11 +35,15 @@
"TextExtracts\\": "includes/"
},
"Hooks": {
- "ApiOpenSearchSuggest": "main"
+ "ApiOpenSearchSuggest": "main",
+ "SearchResultProvideDescription": "main"
},
"HookHandlers": {
"main": {
- "class": "TextExtracts\\Hooks"
+ "class": "TextExtracts\\Hooks",
+ "services": [
+ "ConfigFactory"
+ ]
}
},
"config": {
@@ -64,6 +68,9 @@
},
"ExtractsExtendOpenSearchXml": {
"value": false
+ },
+ "ExtractsExtendRestSearch": {
+ "value": false
}
},
"manifest_version": 2
diff --git a/includes/Hooks.php b/includes/Hooks.php
index e55b119..f181e59 100644
--- a/includes/Hooks.php
+++ b/includes/Hooks.php
@@ -2,41 +2,110 @@
namespace TextExtracts;
-use ApiBase;
use ApiMain;
use ApiResult;
+use Generator;
use MediaWiki\Api\Hook\ApiOpenSearchSuggestHook;
-use MediaWiki\MediaWikiServices;
+use MediaWiki\Config\Config;
+use MediaWiki\Config\ConfigFactory;
use MediaWiki\Request\FauxRequest;
+use MediaWiki\Rest\Hook\SearchResultProvideDescriptionHook;
/**
* @license GPL-2.0-or-later
*/
-class Hooks implements ApiOpenSearchSuggestHook {
+class Hooks implements
+ ApiOpenSearchSuggestHook,
+ SearchResultProvideDescriptionHook
+{
+
+ private Config $config;
+
+ public function __construct(
+ ConfigFactory $configFactory
+ ) {
+ $this->config = $configFactory->makeConfig( 'textextracts' );
+ }
/**
- * ApiOpenSearchSuggest hook handler
- * @param array &$results Array of search results
+ * Trim an extract to a sensible length.
+ *
+ * Adapted from Extension:OpenSearchXml, which adapted it from
+ * Extension:ActiveAbstract.
+ *
+ * @param string $text
+ * @param int $length Target length; actual result will continue to the end of a sentence.
+ * @return string
*/
- public function onApiOpenSearchSuggest( &$results ) {
- $config = MediaWikiServices::getInstance()->getConfigFactory()->makeConfig( 'textextracts' );
- if ( !$config->get( 'ExtractsExtendOpenSearchXml' ) || $results === [] ) {
- return;
+ private static function trimExtract( $text, $length ) {
+ static $regex = null;
+ if ( $regex === null ) {
+ $endchars = [
+ // regular ASCII
+ '([^\d])\.\s', '\!\s', '\?\s',
+ // full-width ideographic full-stop
+ '。',
+ // double-width roman forms
+ '.', '!', '?',
+ // half-width ideographic full stop
+ '。',
+ ];
+ $endgroup = implode( '|', $endchars );
+ $end = "(?:$endgroup)";
+ $sentence = ".{{$length},}?$end+";
+ $regex = "/^($sentence)/u";
}
+ $matches = [];
+ if ( preg_match( $regex, $text, $matches ) ) {
+ return trim( $matches[1] );
+ } else {
+ // Just return the first line
+ return trim( explode( "\n", $text )[0] );
+ }
+ }
- foreach ( array_chunk( array_keys( $results ), ApiBase::LIMIT_SML1 ) as $pageIds ) {
+ /**
+ * Retrieves extracts data for the given page IDs from the TextExtract API.
+ * The page IDs are chunked into the max limit of exlimit of the TextExtract API
+ *
+ * @param array $pageIds An array of page IDs to retrieve extracts for
+ * @return Generator Yields the result data from the API request
+ * $data = [
+ * 'pageId' => [
+ * 'ns' => int of the namespace
+ * 'title' => string of the title of the page
+ * 'extract' => string of the text extracts of the page
+ * ]
+ * ]
+ */
+ private function getExtractsData( array $pageIds ) {
+ foreach ( array_chunk( $pageIds, 20 ) as $chunkedPageIds ) {
$api = new ApiMain( new FauxRequest(
[
'action' => 'query',
'prop' => 'extracts',
'explaintext' => true,
'exintro' => true,
- 'exlimit' => count( $pageIds ),
- 'pageids' => implode( '|', $pageIds ),
- ] )
- );
+ 'exlimit' => count( $chunkedPageIds ),
+ 'pageids' => implode( '|', $chunkedPageIds ),
+ ]
+ ) );
$api->execute();
- $data = $api->getResult()->getResultData( [ 'query', 'pages' ] );
+ yield $api->getResult()->getResultData( [ 'query', 'pages' ] );
+ }
+ }
+
+ /**
+ * ApiOpenSearchSuggest hook handler
+ * @param array &$results Array of search results
+ */
+ public function onApiOpenSearchSuggest( &$results ) {
+ if ( !$this->config->get( 'ExtractsExtendOpenSearchXml' ) || $results === [] ) {
+ return;
+ }
+
+ $pageIds = array_keys( $results );
+ foreach ( $this->getExtractsData( $pageIds ) as $data ) {
foreach ( $pageIds as $id ) {
$contentKey = $data[$id]['extract'][ApiResult::META_CONTENT] ?? '*';
if ( isset( $data[$id]['extract'][$contentKey] ) ) {
@@ -46,4 +115,31 @@
}
}
}
+
+ /**
+ * Used to update Search Results with descriptions for Search Engine.
+ * @param array $pageIdentities Array (string=>SearchResultPageIdentity) where key is pageId
+ * @param array &$descriptions Output array (string=>string|null)
+ * where key is pageId and value is either a description for given page or null
+ */
+ public function onSearchResultProvideDescription(
+ array $pageIdentities,
+ &$descriptions
+ ): void {
+ if ( !$this->config->get( 'ExtractsExtendRestSearch' ) || $pageIdentities === [] ) {
+ return;
+ }
+
+ $pageIds = array_map( static function ( $identity ) {
+ return $identity->getId();
+ }, $pageIdentities );
+ foreach ( $this->getExtractsData( $pageIds ) as $data ) {
+ foreach ( $pageIds as $id ) {
+ $contentKey = $data[$id]['extract'][ApiResult::META_CONTENT] ?? '*';
+ if ( isset( $data[$id]['extract'][$contentKey] ) ) {
+ $descriptions[$id] = self::trimExtract( $data[$id]['extract'][$contentKey], 150 );
+ }
+ }
+ }
+ }
}