|
| 1 | +#!/usr/bin/env php |
| 2 | +<?php |
| 3 | +/** |
| 4 | + * Utility script to populate the elastic search indexes |
| 5 | + * |
| 6 | + */ |
| 7 | + |
| 8 | +// Elastic search config |
| 9 | +define('ES_URL', 'http://localhost:9200'); |
| 10 | +define('ES_INDEX', 'documentation'); |
| 11 | + |
| 12 | + |
| 13 | +function main($argv) { |
| 14 | +if (empty($argv[1])) { |
| 15 | +echo "A language to scan is required.\n"; |
| 16 | +exit(1); |
| 17 | +} |
| 18 | +$lang = $argv[1]; |
| 19 | + |
| 20 | +$directory = new RecursiveDirectoryIterator($lang); |
| 21 | +$recurser = new RecursiveIteratorIterator($directory); |
| 22 | +$matcher = new RegexIterator($recurser, '/\.rst/'); |
| 23 | + |
| 24 | +foreach ($matcher as $file) { |
| 25 | +updateIndex($lang, $file); |
| 26 | +} |
| 27 | +echo "\nIndex update complete\n"; |
| 28 | +} |
| 29 | + |
| 30 | +function updateIndex($lang, $file) { |
| 31 | +$fileData = readFileData($file); |
| 32 | +$filename = $file->getPathName(); |
| 33 | +list($filename) = explode('.', $filename); |
| 34 | + |
| 35 | +$path = $filename . '.html'; |
| 36 | +$id = str_replace($lang . '/', '', $filename); |
| 37 | +$id = str_replace('/', '-', $id); |
| 38 | +$id = trim($id, '-'); |
| 39 | + |
| 40 | +$url = implode('/', array(ES_URL, ES_INDEX, $lang, $id)); |
| 41 | + |
| 42 | +$data = array( |
| 43 | +'contents' => $fileData['contents'], |
| 44 | +'title' => $fileData['title'], |
| 45 | +'url' => $path, |
| 46 | +); |
| 47 | + |
| 48 | +$data = json_encode($data); |
| 49 | +$size = strlen($data); |
| 50 | + |
| 51 | +$fh = fopen('php://memory', 'rw'); |
| 52 | +fwrite($fh, $data); |
| 53 | +rewind($fh); |
| 54 | + |
| 55 | +echo "Sending request:\n\tfile: $file\n\turl: $url\n"; |
| 56 | + |
| 57 | +$ch = curl_init($url); |
| 58 | +curl_setopt($ch, CURLOPT_PUT, true); |
| 59 | +curl_setopt($ch, CURLOPT_INFILE, $fh); |
| 60 | +curl_setopt($ch, CURLOPT_INFILESIZE, $size); |
| 61 | +curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); |
| 62 | + |
| 63 | +$response = curl_exec($ch); |
| 64 | +$metadata = curl_getinfo($ch); |
| 65 | + |
| 66 | +if ($metadata['http_code'] > 400) { |
| 67 | +echo "[ERROR] Failed to complete request.\n"; |
| 68 | +var_dump($response); |
| 69 | +exit(2); |
| 70 | +} |
| 71 | + |
| 72 | +curl_close($ch); |
| 73 | +fclose($fh); |
| 74 | + |
| 75 | +echo "Sent $file\n"; |
| 76 | +} |
| 77 | + |
| 78 | +function readFileData($file) { |
| 79 | +$contents = file_get_contents($file); |
| 80 | + |
| 81 | +// extract the title and guess that things underlined with # or == and first in the file |
| 82 | +// are the title. |
| 83 | +preg_match('/^(.*)\n[=#]+\n/', $contents, $matches); |
| 84 | +$title = $matches[1]; |
| 85 | + |
| 86 | +// Remove the title from the indexed text. |
| 87 | +$contents = str_replace($matches[0], '', $contents); |
| 88 | + |
| 89 | +// Remove title markers from the text. |
| 90 | +$contents = preg_replace('/\n[-=~]+\n/', '', $contents); |
| 91 | + |
| 92 | +return compact('contents', 'title'); |
| 93 | +} |
| 94 | + |
| 95 | +main($argv); |
0 commit comments