Merge "Add extracts to REST search as description"
diff --git a/extension.json b/extension.json
index abfb0ab..3f3e55c 100644
--- a/extension.json
+++ b/extension.json
@@ -35,11 +35,15 @@
 		"TextExtracts\\": "includes/"
 	},
 	"Hooks": {
-		"ApiOpenSearchSuggest": "main"
+		"ApiOpenSearchSuggest": "main",
+		"SearchResultProvideDescription": "main"
 	},
 	"HookHandlers": {
 		"main": {
-			"class": "TextExtracts\\Hooks"
+			"class": "TextExtracts\\Hooks",
+			"services": [
+				"ConfigFactory"
+			]
 		}
 	},
 	"config": {
@@ -64,6 +68,9 @@
 		},
 		"ExtractsExtendOpenSearchXml": {
 			"value": false
+		},
+		"ExtractsExtendRestSearch": {
+			"value": false
 		}
 	},
 	"manifest_version": 2
diff --git a/includes/Hooks.php b/includes/Hooks.php
index e55b119..f181e59 100644
--- a/includes/Hooks.php
+++ b/includes/Hooks.php
@@ -2,41 +2,110 @@
 
 namespace TextExtracts;
 
-use ApiBase;
 use ApiMain;
 use ApiResult;
+use Generator;
 use MediaWiki\Api\Hook\ApiOpenSearchSuggestHook;
-use MediaWiki\MediaWikiServices;
+use MediaWiki\Config\Config;
+use MediaWiki\Config\ConfigFactory;
 use MediaWiki\Request\FauxRequest;
+use MediaWiki\Rest\Hook\SearchResultProvideDescriptionHook;
 
 /**
  * @license GPL-2.0-or-later
  */
-class Hooks implements ApiOpenSearchSuggestHook {
+class Hooks implements
+	ApiOpenSearchSuggestHook,
+	SearchResultProvideDescriptionHook
+{
+
+	private Config $config;
+
+	public function __construct(
+		ConfigFactory $configFactory
+	) {
+		$this->config = $configFactory->makeConfig( 'textextracts' );
+	}
 
 	/**
-	 * ApiOpenSearchSuggest hook handler
-	 * @param array &$results Array of search results
+	 * Trim an extract to a sensible length.
+	 *
+	 * Adapted from Extension:OpenSearchXml, which adapted it from
+	 * Extension:ActiveAbstract.
+	 *
+	 * @param string $text
+	 * @param int $length Target length; actual result will continue to the end of a sentence.
+	 * @return string
 	 */
-	public function onApiOpenSearchSuggest( &$results ) {
-		$config = MediaWikiServices::getInstance()->getConfigFactory()->makeConfig( 'textextracts' );
-		if ( !$config->get( 'ExtractsExtendOpenSearchXml' ) || $results === [] ) {
-			return;
+	private static function trimExtract( $text, $length ) {
+		static $regex = null;
+		if ( $regex === null ) {
+			$endchars = [
+				// regular ASCII
+				'([^\d])\.\s', '\!\s', '\?\s',
+				// full-width ideographic full-stop
+				'。',
+				// double-width roman forms
+				'.', '!', '?',
+				// half-width ideographic full stop
+				'。',
+			];
+			$endgroup = implode( '|', $endchars );
+			$end = "(?:$endgroup)";
+			$sentence = ".{{$length},}?$end+";
+			$regex = "/^($sentence)/u";
 		}
+		$matches = [];
+		if ( preg_match( $regex, $text, $matches ) ) {
+			return trim( $matches[1] );
+		} else {
+			// Just return the first line
+			return trim( explode( "\n", $text )[0] );
+		}
+	}
 
-		foreach ( array_chunk( array_keys( $results ), ApiBase::LIMIT_SML1 ) as $pageIds ) {
+	/**
+	 * Retrieves extracts data for the given page IDs from the TextExtract API.
+	 * The page IDs are chunked into the max limit of exlimit of the TextExtract API
+	 *
+	 * @param array $pageIds An array of page IDs to retrieve extracts for
+	 * @return Generator Yields the result data from the API request
+	 *   $data = [
+	 *    'pageId' => [
+	 *      'ns' => int of the namespace
+	 *      'title' => string of the title of the page
+	 *      'extract' => string of the text extracts of the page
+	 *   ]
+	 * ]
+	 */
+	private function getExtractsData( array $pageIds ) {
+		foreach ( array_chunk( $pageIds, 20 ) as $chunkedPageIds ) {
 			$api = new ApiMain( new FauxRequest(
 				[
 					'action' => 'query',
 					'prop' => 'extracts',
 					'explaintext' => true,
 					'exintro' => true,
-					'exlimit' => count( $pageIds ),
-					'pageids' => implode( '|', $pageIds ),
-				] )
-			);
+					'exlimit' => count( $chunkedPageIds ),
+					'pageids' => implode( '|', $chunkedPageIds ),
+				]
+			) );
 			$api->execute();
-			$data = $api->getResult()->getResultData( [ 'query', 'pages' ] );
+			yield $api->getResult()->getResultData( [ 'query', 'pages' ] );
+		}
+	}
+
+	/**
+	 * ApiOpenSearchSuggest hook handler
+	 * @param array &$results Array of search results
+	 */
+	public function onApiOpenSearchSuggest( &$results ) {
+		if ( !$this->config->get( 'ExtractsExtendOpenSearchXml' ) || $results === [] ) {
+			return;
+		}
+
+		$pageIds = array_keys( $results );
+		foreach ( $this->getExtractsData( $pageIds ) as $data ) {
 			foreach ( $pageIds as $id ) {
 				$contentKey = $data[$id]['extract'][ApiResult::META_CONTENT] ?? '*';
 				if ( isset( $data[$id]['extract'][$contentKey] ) ) {
@@ -46,4 +115,31 @@
 			}
 		}
 	}
+
+	/**
+	 * Used to update Search Results with descriptions for Search Engine.
+	 * @param array $pageIdentities	Array (string=>SearchResultPageIdentity) where key is pageId
+	 * @param array &$descriptions Output array (string=>string|null)
+	 * where key is pageId and value is either a description for given page or null
+	 */
+	public function onSearchResultProvideDescription(
+		array $pageIdentities,
+		&$descriptions
+	): void {
+		if ( !$this->config->get( 'ExtractsExtendRestSearch' ) || $pageIdentities === [] ) {
+			return;
+		}
+
+		$pageIds = array_map( static function ( $identity ) {
+			return $identity->getId();
+		}, $pageIdentities );
+		foreach ( $this->getExtractsData( $pageIds ) as $data ) {
+			foreach ( $pageIds as $id ) {
+				$contentKey = $data[$id]['extract'][ApiResult::META_CONTENT] ?? '*';
+				if ( isset( $data[$id]['extract'][$contentKey] ) ) {
+					$descriptions[$id] = self::trimExtract( $data[$id]['extract'][$contentKey], 150 );
+				}
+			}
+		}
+	}
 }