PHP实现自动锚文本

核心代码

public function setAnchors($str,$anchors)
{
    $rule = "/<img.*>/";
    //先把img排除掉,并且将其存为一个数组
    preg_match_all($rule, $str, $matches);
    $str_without_alt = preg_replace($rule, 'Its_Just_A_Mark', $str);
    //锚处理
    foreach ($anchors as $anchor) {
        $rule = "/".$anchor['anchor_name']."(?!((?!<ab)[sS])*</a>)/";
        $href = '<a href="'.$anchor['anchor_url'].'" class = "seo-anchor">'.$anchor['anchor_name'].'</a>';
        $str = preg_replace($rule, $href, $str,$anchor['anchor_num']);
    }
    //将img加上去
    foreach ($matches[0] as $alt_content) {
        preg_replace('/Its_Just_A_Mark/',$alt_content,$str,1);
    }
    return $str;
}

 

说明,这个代码中的正则有问题

$rule = "/".$anchor['anchor_name']."(?!((?!<ab)[sS])*</a>)/";

需要修改一下

 

$rule = ''(?!((<.*?)|(<a.*?)))(' . $anchor['anchor_name'] . ')(?!(([^<>]*?)>)|([^>]*?</a>))'si';

 

来源:https://www.jianshu.com/p/d21fb675fb5e

 

更新:

/*

$zh_CN 判断是否中文

$ignorecase 是否忽略大小写

preg_quote转义正则表达式字符

 

*/

if ($ignorecase) $case = "i"; else $case="";
$cleankeyword = preg_quote($cleankeyword,''');

if ($zh_CN)
$regEx = ''(?!((<.*?)|(<a.*?)))(' . $cleankeyword . ')(?!(([^<>]*?)>)|([^>]*?</a>))'s' . $case;
elseif (strpos( $cleankeyword , ''')>0)
$regEx = ''(?!((<.*?)|(<a.*?)))(' . $cleankeyword . ')(?!(([^<>]*?)>)|([^>]*?</a>))'s' . $case;
else
$regEx = ''(?!((<.*?)|(<a.*?)))(b'. $cleankeyword . 'b)(?!(([^<>]*?)>)|([^>]*?</a>))'s' . $case;

 

适用于wordpress的函数,来自wp_keyword_link插件

function wp_keywordlink_replace($content,$iscomments)
{
	 global $wp_keywordlinks;
     $links = $wp_keywordlinks; 

	$the_global_options = get_option(WP_GLOBAL_OPTION);
	if($the_global_options){
		list($match_num_from, $match_num_to, $link_itself, $ignore_pre, $ignore_page) = explode("|", $the_global_options);
	}else{
		$match_num_from = 2;
		$match_num_to = 3;
		$link_itself = 0;
		$ignore_pre = 0;
		$ignore_page =1;
	}

     if ($links)
	 	 foreach ($links as $keyword => $details)
		 {
			   list($link,$nofollow,$firstonly,$newwindow,$ignorecase,$isaffiliate,$docomments,$zh_CN,$desc) = explode("|",$details);
			   
				// If this keyword is not tagged for replacement in comments we continue
				if ($iscomments && $docomments==0)
					continue;

				//如果是链接本身,则跳过.
				if($link_itself){
					$recent_url = 'http://'.$_SERVER['SERVER_NAME'].$_SERVER["REQUEST_URI"];
					if($link == $recent_url)
						continue;
				}
			   
				//跳过Page页面
				if( $ignore_page && is_page() )
					continue;

			   $cleankeyword = stripslashes($keyword); 

			   if(!$desc){ $desc = $cleankeyword; }
			   $desc = addcslashes($desc, '$');
		 		if ($isaffiliate)
		 		   // $url  = "<span class='wp_keywordlink_affiliate'>";
		 		$url  = "";

		 		else
		 			// $url  = "<span class='wp_keywordlink'>";
		 		$url  = "";
		 			
		 	   $url .= "<a href="$link" title="$desc"";

				if ($nofollow) $url .= ' rel="nofollow"';
				if ($newwindow) $url .= ' target="_blank"';
		 	   
		 	   $url .= ">".addcslashes($cleankeyword, '$')."</a>";
		 	   // $url .= "</span>";
		 	   
				if ($firstonly) $limit = 1; else $limit= rand($match_num_from,$match_num_to);
				if ($ignorecase) $case = "i"; else $case="";

				// The regular expression comes from an older 
				// auto link plugin by Sean Hickey. It fixed the autolinking inside a link
				// problem. Thanks to [Steph] for the code.

		// we don't want to link the keyword if it is already linked.
		// so let's find all instances where the keyword is in a link and change it to &&&&&, which will be sufficient to avoid linking it. We use //&&&&&, since WP would pass that
        // the idea is come from 'kb-linker'
				  $ex_word = preg_quote($cleankeyword,''');
				  //ignore pre & ignore_keywordlink
			      if( $num_2 = preg_match_all("/<wp_nokeywordlink>.*?</wp_nokeywordlink>/is", $content, $ignore_keywordlink) )
					  for($i=1;$i<=$num_2;$i++)
						  $content = preg_replace( "/<wp_nokeywordlink>.*?</wp_nokeywordlink>/is", "%ignore_keywordlink_$i%", $content, 1);
				  if($ignore_pre){
					  if( $num_1 = preg_match_all("/<pre.*?>.*?</pre>/is", $content, $ignore_pre) )
						  for($i=1;$i<=$num_1;$i++)
							  $content = preg_replace( "/<pre.*?>.*?</pre>/is", "%ignore_pre_$i%", $content, 1);
				  }

                  //$content = preg_replace( '|(<a[^>]+>)(.*)('.$ex_word.')(.*)(</a[^>]*>)|U', '$1$2%&&&&&%$4$5', $content);
				  $content = preg_replace( '|(<img)([^>]*)('.$ex_word.')([^>]*)(>)|U', '$1$2%&&&&&%$4$5', $content);

        
				// For keywords with quotes (') to work, we need to disable word boundary matching
				$cleankeyword = preg_quote($cleankeyword,''');

				if ($zh_CN)
					$regEx = ''(?!((<.*?)|(<a.*?)))(' . $cleankeyword . ')(?!(([^<>]*?)>)|([^>]*?</a>))'s' . $case;
                elseif (strpos( $cleankeyword  , ''')>0)
				    $regEx = ''(?!((<.*?)|(<a.*?)))(' . $cleankeyword . ')(?!(([^<>]*?)>)|([^>]*?</a>))'s' . $case;
				else
    				 $regEx = ''(?!((<.*?)|(<a.*?)))(b'. $cleankeyword . 'b)(?!(([^<>]*?)>)|([^>]*?</a>))'s' . $case;	
				
				$content = preg_replace($regEx,$url,$content,$limit);

	//change our '%&&&&&%' things to $cleankeyword.
	##$content = str_replace( '%&&&&&%', $zn_word, $content);
	$content = str_replace( '%&&&&&%', stripslashes($ex_word), $content);

    //ignore pre & ignore_keywordlink
	if($ignore_pre){
		for($i=1;$i<=$num_1;$i++){
			$content = str_replace( "%ignore_pre_$i%", $ignore_pre[0][$i-1], $content);
		}
	}
	for($i=1;$i<=$num_2;$i++){
		$content = str_replace( "%ignore_keywordlink_$i%", $ignore_keywordlink[0][$i-1], $content);
	}
	}// end if($links)
	return $content; 
}