Edit File by line
/home/zeestwma/richards.../wp-conte.../plugins/litespee.../src
File: crawler.cls.php
// Role simulation.
[500] Fix | Delete
if ( ! empty( $current_crawler['uid'] ) ) {
[501] Fix | Delete
if ( empty( $this->_server_ip ) ) {
[502] Fix | Delete
self::debug( '🛑 Terminated crawler due to Server IP not set' );
[503] Fix | Delete
return false;
[504] Fix | Delete
}
[505] Fix | Delete
$vary_name = $this->cls( 'Vary' )->get_vary_name();
[506] Fix | Delete
$vary_val = $this->cls( 'Vary' )->finalize_default_vary( $current_crawler['uid'] );
[507] Fix | Delete
$this->_crawler_conf['cookies'][ $vary_name ] = $vary_val;
[508] Fix | Delete
$this->_crawler_conf['cookies']['litespeed_hash'] = Router::cls()->get_hash( $current_crawler['uid'] );
[509] Fix | Delete
}
[510] Fix | Delete
[511] Fix | Delete
return true;
[512] Fix | Delete
}
[513] Fix | Delete
[514] Fix | Delete
/**
[515] Fix | Delete
* Get crawler duration allowance.
[516] Fix | Delete
*
[517] Fix | Delete
* @since 7.0
[518] Fix | Delete
* @return int Seconds.
[519] Fix | Delete
*/
[520] Fix | Delete
public function get_crawler_duration() {
[521] Fix | Delete
$run_duration = defined( 'LITESPEED_CRAWLER_DURATION' ) ? (int) constant( 'LITESPEED_CRAWLER_DURATION' ) : 900;
[522] Fix | Delete
if ( $run_duration > 900 ) {
[523] Fix | Delete
$run_duration = 900; // reset to default value if defined higher than 900 seconds.
[524] Fix | Delete
}
[525] Fix | Delete
return $run_duration;
[526] Fix | Delete
}
[527] Fix | Delete
[528] Fix | Delete
/**
[529] Fix | Delete
* Start crawler.
[530] Fix | Delete
*
[531] Fix | Delete
* @since 1.1.0
[532] Fix | Delete
* @access private
[533] Fix | Delete
* @return void
[534] Fix | Delete
*/
[535] Fix | Delete
private function _engine_start() {
[536] Fix | Delete
// check current load.
[537] Fix | Delete
$this->_adjust_current_threads();
[538] Fix | Delete
if ( 0 === (int) $this->_cur_threads ) {
[539] Fix | Delete
$this->_end_reason = 'stopped_highload';
[540] Fix | Delete
self::debug( 'Stopped due to heavy load.' );
[541] Fix | Delete
return;
[542] Fix | Delete
}
[543] Fix | Delete
[544] Fix | Delete
// log started time.
[545] Fix | Delete
self::save_summary( [ 'last_start_time' => time() ] );
[546] Fix | Delete
[547] Fix | Delete
// set time limit.
[548] Fix | Delete
$max_time = (int) ini_get( 'max_execution_time' );
[549] Fix | Delete
self::debug( 'ini_get max_execution_time=' . $max_time );
[550] Fix | Delete
if ( 0 === $max_time ) {
[551] Fix | Delete
$max_time = 300; // hardlimit.
[552] Fix | Delete
} else {
[553] Fix | Delete
$max_time -= 5;
[554] Fix | Delete
}
[555] Fix | Delete
if ( $max_time >= (int) $this->_crawler_conf['run_duration'] ) {
[556] Fix | Delete
$max_time = (int) $this->_crawler_conf['run_duration'];
[557] Fix | Delete
self::debug( 'Use run_duration setting as max_execution_time=' . $max_time );
[558] Fix | Delete
// phpcs:ignore WordPress.PHP.IniSet.max_execution_time_Disallowed -- Required for crawler functionality.
[559] Fix | Delete
} elseif ( ini_set( 'max_execution_time', $this->_crawler_conf['run_duration'] + 15 ) !== false ) {
[560] Fix | Delete
$max_time = $this->_crawler_conf['run_duration'];
[561] Fix | Delete
self::debug( 'ini_set max_execution_time=' . $max_time );
[562] Fix | Delete
}
[563] Fix | Delete
self::debug( 'final max_execution_time=' . $max_time );
[564] Fix | Delete
$this->_max_run_time = $max_time + time();
[565] Fix | Delete
[566] Fix | Delete
// mark running.
[567] Fix | Delete
$this->_prepare_running();
[568] Fix | Delete
// run crawler.
[569] Fix | Delete
$this->_do_running();
[570] Fix | Delete
$this->_terminate_running();
[571] Fix | Delete
}
[572] Fix | Delete
[573] Fix | Delete
/**
[574] Fix | Delete
* Get server load.
[575] Fix | Delete
*
[576] Fix | Delete
* @since 5.5
[577] Fix | Delete
* @return int Load or -1 if unsupported.
[578] Fix | Delete
*/
[579] Fix | Delete
public function get_server_load() {
[580] Fix | Delete
if ( ! function_exists( 'sys_getloadavg' ) ) {
[581] Fix | Delete
return -1;
[582] Fix | Delete
}
[583] Fix | Delete
[584] Fix | Delete
$curload = sys_getloadavg();
[585] Fix | Delete
$curload = (float) $curload[0];
[586] Fix | Delete
self::debug( 'Server load: ' . $curload );
[587] Fix | Delete
return $curload;
[588] Fix | Delete
}
[589] Fix | Delete
[590] Fix | Delete
/**
[591] Fix | Delete
* Adjust threads dynamically.
[592] Fix | Delete
*
[593] Fix | Delete
* @since 1.1.0
[594] Fix | Delete
* @access private
[595] Fix | Delete
* @return void
[596] Fix | Delete
*/
[597] Fix | Delete
private function _adjust_current_threads() {
[598] Fix | Delete
$curload = $this->get_server_load();
[599] Fix | Delete
if ( -1 === (int) $curload ) {
[600] Fix | Delete
self::debug( 'set threads=0 due to func sys_getloadavg not exist!' );
[601] Fix | Delete
$this->_cur_threads = 0;
[602] Fix | Delete
return;
[603] Fix | Delete
}
[604] Fix | Delete
[605] Fix | Delete
$curload /= (float) $this->_ncpu;
[606] Fix | Delete
$crawler_threads = defined( 'LITESPEED_CRAWLER_THREADS' ) ? (int) constant( 'LITESPEED_CRAWLER_THREADS' ) : 3;
[607] Fix | Delete
$load_limit = (float) $this->_crawler_conf['load_limit'];
[608] Fix | Delete
$current_threads = (int) $this->_cur_threads;
[609] Fix | Delete
[610] Fix | Delete
if ( -1 === $current_threads ) {
[611] Fix | Delete
// init.
[612] Fix | Delete
if ( $curload > $load_limit ) {
[613] Fix | Delete
$curthreads = 0;
[614] Fix | Delete
} elseif ( $curload >= ( $load_limit - 1 ) ) {
[615] Fix | Delete
$curthreads = 1;
[616] Fix | Delete
} else {
[617] Fix | Delete
$curthreads = (int) ( $load_limit - $curload );
[618] Fix | Delete
if ( $curthreads > $crawler_threads ) {
[619] Fix | Delete
$curthreads = $crawler_threads;
[620] Fix | Delete
}
[621] Fix | Delete
}
[622] Fix | Delete
} else {
[623] Fix | Delete
// adjust.
[624] Fix | Delete
$curthreads = $current_threads;
[625] Fix | Delete
if ( $curload >= ( $load_limit + 1 ) ) {
[626] Fix | Delete
sleep( 5 ); // sleep 5 secs.
[627] Fix | Delete
if ( $curthreads >= 1 ) {
[628] Fix | Delete
--$curthreads;
[629] Fix | Delete
}
[630] Fix | Delete
} elseif ( $curload >= $load_limit ) {
[631] Fix | Delete
--$curthreads;
[632] Fix | Delete
} elseif ( ( $curload + 1 ) < $load_limit ) {
[633] Fix | Delete
if ( $curthreads < $crawler_threads ) {
[634] Fix | Delete
++$curthreads;
[635] Fix | Delete
}
[636] Fix | Delete
}
[637] Fix | Delete
}
[638] Fix | Delete
[639] Fix | Delete
$this->_cur_threads = (int) $curthreads;
[640] Fix | Delete
$this->_cur_thread_time = time();
[641] Fix | Delete
}
[642] Fix | Delete
[643] Fix | Delete
/**
[644] Fix | Delete
* Mark running status.
[645] Fix | Delete
*
[646] Fix | Delete
* @since 1.1.0
[647] Fix | Delete
* @access private
[648] Fix | Delete
* @return void
[649] Fix | Delete
*/
[650] Fix | Delete
private function _prepare_running() {
[651] Fix | Delete
$this->_summary['is_running'] = time();
[652] Fix | Delete
$this->_summary['done'] = 0; // reset done status.
[653] Fix | Delete
$this->_summary['last_status'] = 'prepare running';
[654] Fix | Delete
$this->_summary['last_crawled'] = 0;
[655] Fix | Delete
[656] Fix | Delete
// Current crawler starttime mark.
[657] Fix | Delete
if ( 0 === (int) $this->_summary['last_pos'] ) {
[658] Fix | Delete
$this->_summary['curr_crawler_beginning_time'] = time();
[659] Fix | Delete
}
[660] Fix | Delete
[661] Fix | Delete
if ( 0 === (int) $this->_summary['curr_crawler'] && 0 === (int) $this->_summary['last_pos'] ) {
[662] Fix | Delete
$this->_summary['this_full_beginning_time'] = time();
[663] Fix | Delete
$this->_summary['list_size'] = $this->cls( 'Crawler_Map' )->count_map();
[664] Fix | Delete
}
[665] Fix | Delete
[666] Fix | Delete
if ( 'end' === $this->_summary['end_reason'] && 0 === (int) $this->_summary['last_pos'] ) {
[667] Fix | Delete
$this->_summary['crawler_stats'][ $this->_summary['curr_crawler'] ] = [];
[668] Fix | Delete
}
[669] Fix | Delete
[670] Fix | Delete
self::save_summary();
[671] Fix | Delete
}
[672] Fix | Delete
[673] Fix | Delete
/**
[674] Fix | Delete
* Take over lane.
[675] Fix | Delete
*
[676] Fix | Delete
* @since 6.1
[677] Fix | Delete
* @return void
[678] Fix | Delete
*/
[679] Fix | Delete
private function _take_over_lane() {
[680] Fix | Delete
self::debug( 'Take over lane as lane is free: ' . $this->json_local_path() . '.pid' );
[681] Fix | Delete
File::save( $this->json_local_path() . '.pid', LITESPEED_LANE_HASH );
[682] Fix | Delete
}
[683] Fix | Delete
[684] Fix | Delete
/**
[685] Fix | Delete
* Update lane file mtime.
[686] Fix | Delete
*
[687] Fix | Delete
* @since 6.1
[688] Fix | Delete
* @return void
[689] Fix | Delete
*/
[690] Fix | Delete
private function _touch_lane() {
[691] Fix | Delete
// phpcs:ignore WordPress.WP.AlternativeFunctions.file_system_operations_touch
[692] Fix | Delete
touch( $this->json_local_path() . '.pid' );
[693] Fix | Delete
}
[694] Fix | Delete
[695] Fix | Delete
/**
[696] Fix | Delete
* Release lane file.
[697] Fix | Delete
*
[698] Fix | Delete
* @since 6.1
[699] Fix | Delete
* @return void
[700] Fix | Delete
*/
[701] Fix | Delete
public function Release_lane() {
[702] Fix | Delete
$lane_file = $this->json_local_path() . '.pid';
[703] Fix | Delete
if ( ! file_exists( $lane_file ) ) {
[704] Fix | Delete
return;
[705] Fix | Delete
}
[706] Fix | Delete
[707] Fix | Delete
self::debug( 'Release lane' );
[708] Fix | Delete
// phpcs:ignore WordPress.WP.AlternativeFunctions.unlink_unlink
[709] Fix | Delete
unlink( $lane_file );
[710] Fix | Delete
}
[711] Fix | Delete
[712] Fix | Delete
/**
[713] Fix | Delete
* Check if lane is used by other crawlers.
[714] Fix | Delete
*
[715] Fix | Delete
* @since 6.1
[716] Fix | Delete
* @param bool $strict_mode Strict check that file must exist.
[717] Fix | Delete
* @return bool True if valid lane.
[718] Fix | Delete
*/
[719] Fix | Delete
private function _check_valid_lane( $strict_mode = false ) {
[720] Fix | Delete
$lane_file = $this->json_local_path() . '.pid';
[721] Fix | Delete
if ( $strict_mode ) {
[722] Fix | Delete
if ( ! file_exists( $lane_file ) ) {
[723] Fix | Delete
self::debug( 'lane file not existed, strict mode is false [file] ' . $lane_file );
[724] Fix | Delete
return false;
[725] Fix | Delete
}
[726] Fix | Delete
}
[727] Fix | Delete
$pid = File::read( $lane_file );
[728] Fix | Delete
if ( $pid && LITESPEED_LANE_HASH !== $pid ) {
[729] Fix | Delete
// If lane file is older than 1h, ignore.
[730] Fix | Delete
if ( ( time() - filemtime( $lane_file ) ) > 3600 ) {
[731] Fix | Delete
self::debug( 'Lane file is older than 1h, releasing lane' );
[732] Fix | Delete
$this->Release_lane();
[733] Fix | Delete
return true;
[734] Fix | Delete
}
[735] Fix | Delete
return false;
[736] Fix | Delete
}
[737] Fix | Delete
return true;
[738] Fix | Delete
}
[739] Fix | Delete
[740] Fix | Delete
/**
[741] Fix | Delete
* Test port for simulator.
[742] Fix | Delete
*
[743] Fix | Delete
* @since 7.0
[744] Fix | Delete
* @access private
[745] Fix | Delete
* @return bool true if success and can continue crawling, false otherwise.
[746] Fix | Delete
*/
[747] Fix | Delete
private function _test_port() {
[748] Fix | Delete
if ( empty( $this->_server_ip ) ) {
[749] Fix | Delete
if ( empty( $this->_crawlers[ $this->_summary['curr_crawler'] ]['uid'] ) ) {
[750] Fix | Delete
self::debug( 'Bypass test port as Server IP is not set' );
[751] Fix | Delete
return true;
[752] Fix | Delete
}
[753] Fix | Delete
self::debug( '❌ Server IP not set' );
[754] Fix | Delete
return false;
[755] Fix | Delete
}
[756] Fix | Delete
if ( defined( 'LITESPEED_CRAWLER_LOCAL_PORT' ) ) {
[757] Fix | Delete
self::debug( '✅ LITESPEED_CRAWLER_LOCAL_PORT already defined' );
[758] Fix | Delete
return true;
[759] Fix | Delete
}
[760] Fix | Delete
// Don't repeat testing in 120s.
[761] Fix | Delete
if ( ! empty( $this->_summary['test_port_tts'] ) && ( time() - (int) $this->_summary['test_port_tts'] ) < 120 ) {
[762] Fix | Delete
if ( ! empty( $this->_summary['test_port'] ) ) {
[763] Fix | Delete
self::debug( '✅ Use tested local port: ' . $this->_summary['test_port'] );
[764] Fix | Delete
define( 'LITESPEED_CRAWLER_LOCAL_PORT', (int) $this->_summary['test_port'] );
[765] Fix | Delete
return true;
[766] Fix | Delete
}
[767] Fix | Delete
return false;
[768] Fix | Delete
}
[769] Fix | Delete
$this->_summary['test_port_tts'] = time();
[770] Fix | Delete
self::save_summary();
[771] Fix | Delete
[772] Fix | Delete
$options = $this->_get_curl_options();
[773] Fix | Delete
$home = home_url();
[774] Fix | Delete
File::save( LITESPEED_STATIC_DIR . '/crawler/test_port.html', $home, true );
[775] Fix | Delete
$url = LITESPEED_STATIC_URL . '/crawler/test_port.html';
[776] Fix | Delete
$parsed_url = wp_parse_url( $url );
[777] Fix | Delete
if ( empty( $parsed_url['host'] ) ) {
[778] Fix | Delete
self::debug( '❌ Test port failed, invalid URL: ' . $url );
[779] Fix | Delete
return false;
[780] Fix | Delete
}
[781] Fix | Delete
$resolved = $parsed_url['host'] . ':443:' . $this->_server_ip;
[782] Fix | Delete
$options[ CURLOPT_RESOLVE ] = [ $resolved ];
[783] Fix | Delete
$options[ CURLOPT_DNS_USE_GLOBAL_CACHE ] = false;
[784] Fix | Delete
$options[ CURLOPT_HEADER ] = false;
[785] Fix | Delete
self::debug( 'Test local 443 port for ' . $resolved );
[786] Fix | Delete
[787] Fix | Delete
// cURL is intentionally used for speed; suppress sniffs in this method.
[788] Fix | Delete
// phpcs:disable WordPress.WP.AlternativeFunctions
[789] Fix | Delete
$ch = curl_init();
[790] Fix | Delete
curl_setopt_array( $ch, $options );
[791] Fix | Delete
curl_setopt( $ch, CURLOPT_URL, $url );
[792] Fix | Delete
$result = curl_exec( $ch );
[793] Fix | Delete
$test_result = false;
[794] Fix | Delete
if ( curl_errno( $ch ) || $result !== $home ) {
[795] Fix | Delete
if ( curl_errno( $ch ) ) {
[796] Fix | Delete
self::debug( '❌ Test port curl error: [errNo] ' . curl_errno( $ch ) . ' [err] ' . curl_error( $ch ) );
[797] Fix | Delete
} elseif ( $result !== $home ) {
[798] Fix | Delete
self::debug( '❌ Test port response is wrong: ' . $result );
[799] Fix | Delete
}
[800] Fix | Delete
self::debug( '❌ Test local 443 port failed, try port 80' );
[801] Fix | Delete
[802] Fix | Delete
// Try port 80.
[803] Fix | Delete
$resolved = $parsed_url['host'] . ':80:' . $this->_server_ip;
[804] Fix | Delete
$options[ CURLOPT_RESOLVE ] = [ $resolved ];
[805] Fix | Delete
$url = str_replace( 'https://', 'http://', $url );
[806] Fix | Delete
if ( empty( $options[ CURLOPT_HTTPHEADER ] ) || ! in_array( 'X-Forwarded-Proto: https', $options[ CURLOPT_HTTPHEADER ], true ) ) {
[807] Fix | Delete
$options[ CURLOPT_HTTPHEADER ][] = 'X-Forwarded-Proto: https';
[808] Fix | Delete
}
[809] Fix | Delete
$ch = curl_init();
[810] Fix | Delete
curl_setopt_array( $ch, $options );
[811] Fix | Delete
curl_setopt( $ch, CURLOPT_URL, $url );
[812] Fix | Delete
$result = curl_exec( $ch );
[813] Fix | Delete
if ( curl_errno( $ch ) ) {
[814] Fix | Delete
self::debug( '❌ Test port curl error: [errNo] ' . curl_errno( $ch ) . ' [err] ' . curl_error( $ch ) );
[815] Fix | Delete
} elseif ( $result !== $home ) {
[816] Fix | Delete
self::debug( '❌ Test port response is wrong: ' . $result );
[817] Fix | Delete
} else {
[818] Fix | Delete
self::debug( '✅ Test local 80 port successfully' );
[819] Fix | Delete
define( 'LITESPEED_CRAWLER_LOCAL_PORT', 80 );
[820] Fix | Delete
$this->_summary['test_port'] = 80;
[821] Fix | Delete
$test_result = true;
[822] Fix | Delete
}
[823] Fix | Delete
} else {
[824] Fix | Delete
self::debug( '✅ Tested local 443 port successfully' );
[825] Fix | Delete
define( 'LITESPEED_CRAWLER_LOCAL_PORT', 443 );
[826] Fix | Delete
$this->_summary['test_port'] = 443;
[827] Fix | Delete
$test_result = true;
[828] Fix | Delete
}
[829] Fix | Delete
self::save_summary();
[830] Fix | Delete
unset( $ch );
[831] Fix | Delete
// phpcs:enable
[832] Fix | Delete
return $test_result;
[833] Fix | Delete
}
[834] Fix | Delete
[835] Fix | Delete
/**
[836] Fix | Delete
* Run crawler.
[837] Fix | Delete
*
[838] Fix | Delete
* @since 1.1.0
[839] Fix | Delete
* @access private
[840] Fix | Delete
* @return void
[841] Fix | Delete
* @throws \Exception When lane becomes invalid during run.
[842] Fix | Delete
*/
[843] Fix | Delete
private function _do_running() {
[844] Fix | Delete
$options = $this->_get_curl_options( true );
[845] Fix | Delete
[846] Fix | Delete
// If is role simulator and not defined local port, check port once.
[847] Fix | Delete
$test_result = $this->_test_port();
[848] Fix | Delete
if ( ! $test_result ) {
[849] Fix | Delete
$this->_end_reason = 'port_test_failed';
[850] Fix | Delete
self::debug( '❌ Test port failed, crawler stopped.' );
[851] Fix | Delete
return;
[852] Fix | Delete
}
[853] Fix | Delete
[854] Fix | Delete
while ( true ) {
[855] Fix | Delete
$url_chunks = $this->cls( 'Crawler_Map' )->list_map( self::CHUNKS, $this->_summary['last_pos'] );
[856] Fix | Delete
if ( empty( $url_chunks ) ) {
[857] Fix | Delete
break;
[858] Fix | Delete
}
[859] Fix | Delete
[860] Fix | Delete
$url_chunks = array_chunk( $url_chunks, (int) $this->_cur_threads );
[861] Fix | Delete
foreach ( $url_chunks as $rows ) {
[862] Fix | Delete
if ( ! $this->_check_valid_lane( true ) ) {
[863] Fix | Delete
$this->_end_reason = 'lane_invalid';
[864] Fix | Delete
self::debug( '🛑 The crawler lane is used by newer crawler.' );
[865] Fix | Delete
throw new \Exception( 'invalid crawler lane' );
[866] Fix | Delete
}
[867] Fix | Delete
// Update time.
[868] Fix | Delete
$this->_touch_lane();
[869] Fix | Delete
[870] Fix | Delete
// multi curl.
[871] Fix | Delete
$rets = $this->_multi_request( $rows, $options );
[872] Fix | Delete
[873] Fix | Delete
// check result headers.
[874] Fix | Delete
foreach ( $rows as $row ) {
[875] Fix | Delete
if ( empty( $rets[ $row['id'] ] ) ) {
[876] Fix | Delete
continue;
[877] Fix | Delete
}
[878] Fix | Delete
if ( 428 === (int) $rets[ $row['id'] ]['code'] ) {
[879] Fix | Delete
// HTTP/1.1 428 Precondition Required (need to test)
[880] Fix | Delete
$this->_end_reason = 'crawler_disabled';
[881] Fix | Delete
self::debug( 'crawler_disabled' );
[882] Fix | Delete
return;
[883] Fix | Delete
}
[884] Fix | Delete
[885] Fix | Delete
$status = $this->_status_parse( $rets[ $row['id'] ]['header'], $rets[ $row['id'] ]['code'], $row['url'] ); // B or H or M or N(nocache).
[886] Fix | Delete
self::debug( '[status] ' . $this->_status2title( $status ) . "\t\t [url] " . $row['url'] );
[887] Fix | Delete
$this->_map_status_list[ $status ][ $row['id'] ] = [
[888] Fix | Delete
'url' => $row['url'],
[889] Fix | Delete
'code' => (int) $rets[ $row['id'] ]['code'], // 201 or 200 or 404.
[890] Fix | Delete
];
[891] Fix | Delete
if ( empty( $this->_summary['crawler_stats'][ $this->_summary['curr_crawler'] ][ $status ] ) ) {
[892] Fix | Delete
$this->_summary['crawler_stats'][ $this->_summary['curr_crawler'] ][ $status ] = 0;
[893] Fix | Delete
}
[894] Fix | Delete
++$this->_summary['crawler_stats'][ $this->_summary['curr_crawler'] ][ $status ];
[895] Fix | Delete
}
[896] Fix | Delete
[897] Fix | Delete
// update offset position.
[898] Fix | Delete
$_time = time();
[899] Fix | Delete
$this->_summary['last_count'] = count( $rows );
[900] Fix | Delete
$this->_summary['last_pos'] += $this->_summary['last_count'];
[901] Fix | Delete
$this->_summary['last_crawled'] += $this->_summary['last_count'];
[902] Fix | Delete
$this->_summary['last_update_time'] = $_time;
[903] Fix | Delete
$this->_summary['last_status'] = 'updated position';
[904] Fix | Delete
[905] Fix | Delete
// check duration.
[906] Fix | Delete
if ( $this->_summary['last_update_time'] > $this->_max_run_time ) {
[907] Fix | Delete
$this->_end_reason = 'stopped_maxtime';
[908] Fix | Delete
self::debug( 'Terminated due to maxtime' );
[909] Fix | Delete
return;
[910] Fix | Delete
}
[911] Fix | Delete
[912] Fix | Delete
// make sure at least each 10s save meta & map status once.
[913] Fix | Delete
if ( $_time - $this->_summary['meta_save_time'] > 10 ) {
[914] Fix | Delete
$this->_map_status_list = $this->cls( 'Crawler_Map' )->save_map_status( $this->_map_status_list, $this->_summary['curr_crawler'] );
[915] Fix | Delete
self::save_summary();
[916] Fix | Delete
}
[917] Fix | Delete
[918] Fix | Delete
// check if need to reset pos each 5s.
[919] Fix | Delete
if ( $_time > $this->_summary['pos_reset_check'] ) {
[920] Fix | Delete
$this->_summary['pos_reset_check'] = $_time + 5;
[921] Fix | Delete
if ( file_exists( $this->_resetfile ) && unlink( $this->_resetfile ) ) { // phpcs:ignore WordPress.WP.AlternativeFunctions.unlink_unlink
[922] Fix | Delete
self::debug( 'Terminated due to reset file' );
[923] Fix | Delete
[924] Fix | Delete
$this->_summary['last_pos'] = 0;
[925] Fix | Delete
$this->_summary['curr_crawler'] = 0;
[926] Fix | Delete
$this->_summary['crawler_stats'][ $this->_summary['curr_crawler'] ] = [];
[927] Fix | Delete
// reset done status.
[928] Fix | Delete
$this->_summary['done'] = 0;
[929] Fix | Delete
$this->_summary['this_full_beginning_time'] = 0;
[930] Fix | Delete
$this->_end_reason = 'stopped_reset';
[931] Fix | Delete
return;
[932] Fix | Delete
}
[933] Fix | Delete
}
[934] Fix | Delete
[935] Fix | Delete
// check loads.
[936] Fix | Delete
if ( ( $this->_summary['last_update_time'] - $this->_cur_thread_time ) > 60 ) {
[937] Fix | Delete
$this->_adjust_current_threads();
[938] Fix | Delete
if ( 0 === (int) $this->_cur_threads ) {
[939] Fix | Delete
$this->_end_reason = 'stopped_highload';
[940] Fix | Delete
self::debug( '🛑 Terminated due to highload' );
[941] Fix | Delete
return;
[942] Fix | Delete
}
[943] Fix | Delete
}
[944] Fix | Delete
[945] Fix | Delete
$this->_summary['last_status'] = 'sleeping ' . (int) $this->_crawler_conf['run_delay'] . 'ms';
[946] Fix | Delete
[947] Fix | Delete
usleep( (int) $this->_crawler_conf['run_delay'] );
[948] Fix | Delete
}
[949] Fix | Delete
}
[950] Fix | Delete
[951] Fix | Delete
// All URLs are done for current crawler.
[952] Fix | Delete
$this->_end_reason = 'end';
[953] Fix | Delete
$this->_summary['crawler_stats'][ $this->_summary['curr_crawler'] ]['W'] = 0;
[954] Fix | Delete
self::debug( 'Crawler #' . $this->_summary['curr_crawler'] . ' touched end' );
[955] Fix | Delete
}
[956] Fix | Delete
[957] Fix | Delete
/**
[958] Fix | Delete
* If need to resolve DNS or not.
[959] Fix | Delete
*
[960] Fix | Delete
* @since 7.3.0.1
[961] Fix | Delete
* @return bool
[962] Fix | Delete
*/
[963] Fix | Delete
private function _should_force_resolve_dns() {
[964] Fix | Delete
if ( ! empty( $this->_server_ip ) ) {
[965] Fix | Delete
return true;
[966] Fix | Delete
}
[967] Fix | Delete
if ( ! empty( $this->_crawler_conf['cookies'] ) && ! empty( $this->_crawler_conf['cookies']['litespeed_hash'] ) ) {
[968] Fix | Delete
return true;
[969] Fix | Delete
}
[970] Fix | Delete
return false;
[971] Fix | Delete
}
[972] Fix | Delete
[973] Fix | Delete
/**
[974] Fix | Delete
* Send multi curl requests.
[975] Fix | Delete
* If res=B/N, bypass request and won't return.
[976] Fix | Delete
*
[977] Fix | Delete
* @since 1.1.0
[978] Fix | Delete
* @access private
[979] Fix | Delete
*
[980] Fix | Delete
* @param array<int,array<string,mixed>> $rows Rows to crawl.
[981] Fix | Delete
* @param array $options cURL options.
[982] Fix | Delete
* @return array<int,array{header:string,code:int}>
[983] Fix | Delete
*/
[984] Fix | Delete
private function _multi_request( $rows, $options ) {
[985] Fix | Delete
if ( ! function_exists( 'curl_multi_init' ) ) {
[986] Fix | Delete
exit( 'curl_multi_init disabled' );
[987] Fix | Delete
}
[988] Fix | Delete
// phpcs:disable WordPress.WP.AlternativeFunctions
[989] Fix | Delete
$mh = curl_multi_init();
[990] Fix | Delete
$crawler_drop_domain = defined( 'LITESPEED_CRAWLER_DROP_DOMAIN' ) ? (bool) constant( 'LITESPEED_CRAWLER_DROP_DOMAIN' ) : false;
[991] Fix | Delete
$curls = [];
[992] Fix | Delete
foreach ( $rows as $row ) {
[993] Fix | Delete
if ( self::STATUS_BLACKLIST === substr( $row['res'], $this->_summary['curr_crawler'], 1 ) ) {
[994] Fix | Delete
continue;
[995] Fix | Delete
}
[996] Fix | Delete
if ( self::STATUS_NOCACHE === substr( $row['res'], $this->_summary['curr_crawler'], 1 ) ) {
[997] Fix | Delete
continue;
[998] Fix | Delete
}
[999] Fix | Delete
It is recommended that you Edit text format, this type of Fix handles quite a lot in one request
Function