来自维基百科:
A slug 是 URL 的一部分,它使用 人类可读的关键字。
为了让用户更容易输入 URL,通常会使用特殊字符 也被删除或替换。例如,重音字符是 通常用英文字母表中的字母代替;标点 标记通常被去除;和空格(必须编码为 %20 或 +) 替换为破折号 (-) 或下划线 (_),即 更美观。
我开发了一个照片分享网站,用户可以在上面上传、分享和查看照片。
所有页面都是自动生成的,无需我掌握标题。由于照片标题或用户名可能包含重音字符或空格,因此我需要一个函数来自动创建 slugs 并保持可读的 URL。
我创建了以下函数,它替换重音字符 (âèêëçî)、删除标点符号和错误字符 (#@&~^!) 并转换破折号中的空格。
function sluggable($str) {
$before = array(
'àáâãäåòóôõöøèéêëðçìíîïùúûüñšž',
'/[^a-z0-9\s]/',
array('/\s/', '/--+/', '/---+/')
);
$after = array(
'aaaaaaooooooeeeeeciiiiuuuunsz',
'',
'-'
);
$str = strtolower($str);
$str = strtr($str, $before[0], $after[0]);
$str = preg_replace($before[1], $after[1], $str);
$str = trim($str);
$str = preg_replace($before[2], $after[2], $str);
return $str;
}
您还知道其他创建 slugs 的函数吗?
我喜欢谷歌代码解决方案中的 php-slugs 代码。但如果您想要一个可以使用 UTF-8 的更简单的版本:
function format_uri( $string, $separator = '-' )
{
$accents_regex = '~&([a-z]{1,2})(?:acute|cedil|circ|grave|lig|orn|ring|slash|th|tilde|uml);~i';
$special_cases = array( '&' => 'and', "'" => '');
$string = mb_strtolower( trim( $string ), 'UTF-8' );
$string = str_replace( array_keys($special_cases), array_values( $special_cases), $string );
$string = preg_replace( $accents_regex, '$1', htmlentities( $string, ENT_QUOTES, 'UTF-8' ) );
$string = preg_replace("/[^a-z0-9]/u", "$separator", $string);
$string = preg_replace("/[$separator]+/u", "$separator", $string);
return $string;
}
那么
echo format_uri("#@&~^!âèêëçî");
输出
-and-aeeeci
有些人在 google.com 上链接到“php-slugs”,但看起来他们的页面现在有点奇怪,所以如果有人需要它,这里是:
// source: https://code.google.com/archive/p/php-slugs/
function my_str_split($string)
{
$slen=strlen($string);
for($i=0; $i<$slen; $i++)
{
$sArray[$i]=$string{$i};
}
return $sArray;
}
function noDiacritics($string)
{
//cyrylic transcription
$cyrylicFrom = array('А', 'Б', 'В', 'Г', 'Д', 'Е', 'Ё', 'Ж', 'З', 'И', 'Й', 'К', 'Л', 'М', 'Н', 'О', 'П', 'Р', 'С', 'Т', 'У', 'Ф', 'Х', 'Ц', 'Ч', 'Ш', 'Щ', 'Ъ', 'Ы', 'Ь', 'Э', 'Ю', 'Я', 'а', 'б', 'в', 'г', 'д', 'е', 'ё', 'ж', 'з', 'и', 'й', 'к', 'л', 'м', 'н', 'о', 'п', 'р', 'с', 'т', 'у', 'ф', 'х', 'ц', 'ч', 'ш', 'щ', 'ъ', 'ы', 'ь', 'э', 'ю', 'я');
$cyrylicTo = array('A', 'B', 'W', 'G', 'D', 'Ie', 'Io', 'Z', 'Z', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'R', 'S', 'T', 'U', 'F', 'Ch', 'C', 'Tch', 'Sh', 'Shtch', '', 'Y', '', 'E', 'Iu', 'Ia', 'a', 'b', 'w', 'g', 'd', 'ie', 'io', 'z', 'z', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'r', 's', 't', 'u', 'f', 'ch', 'c', 'tch', 'sh', 'shtch', '', 'y', '', 'e', 'iu', 'ia');
$from = array("Á", "À", "Â", "Ä", "Ă", "Ā", "Ã", "Å", "Ą", "Æ", "Ć", "Ċ", "Ĉ", "Č", "Ç", "Ď", "Đ", "Ð", "É", "È", "Ė", "Ê", "Ë", "Ě", "Ē", "Ę", "Ə", "Ġ", "Ĝ", "Ğ", "Ģ", "á", "à", "â", "ä", "ă", "ā", "ã", "å", "ą", "æ", "ć", "ċ", "ĉ", "č", "ç", "ď", "đ", "ð", "é", "è", "ė", "ê", "ë", "ě", "ē", "ę", "ə", "ġ", "ĝ", "ğ", "ģ", "Ĥ", "Ħ", "I", "Í", "Ì", "İ", "Î", "Ï", "Ī", "Į", "IJ", "Ĵ", "Ķ", "Ļ", "Ł", "Ń", "Ň", "Ñ", "Ņ", "Ó", "Ò", "Ô", "Ö", "Õ", "Ő", "Ø", "Ơ", "Œ", "ĥ", "ħ", "ı", "í", "ì", "i", "î", "ï", "ī", "į", "ij", "ĵ", "ķ", "ļ", "ł", "ń", "ň", "ñ", "ņ", "ó", "ò", "ô", "ö", "õ", "ő", "ø", "ơ", "œ", "Ŕ", "Ř", "Ś", "Ŝ", "Š", "Ş", "Ť", "Ţ", "Þ", "Ú", "Ù", "Û", "Ü", "Ŭ", "Ū", "Ů", "Ų", "Ű", "Ư", "Ŵ", "Ý", "Ŷ", "Ÿ", "Ź", "Ż", "Ž", "ŕ", "ř", "ś", "ŝ", "š", "ş", "ß", "ť", "ţ", "þ", "ú", "ù", "û", "ü", "ŭ", "ū", "ů", "ų", "ű", "ư", "ŵ", "ý", "ŷ", "ÿ", "ź", "ż", "ž");
$to = array("A", "A", "A", "AE", "A", "A", "A", "A", "A", "AE", "C", "C", "C", "C", "C", "D", "D", "D", "E", "E", "E", "E", "E", "E", "E", "E", "G", "G", "G", "G", "G", "a", "a", "a", "ae", "ae", "a", "a", "a", "a", "ae", "c", "c", "c", "c", "c", "d", "d", "d", "e", "e", "e", "e", "e", "e", "e", "e", "g", "g", "g", "g", "g", "H", "H", "I", "I", "I", "I", "I", "I", "I", "I", "IJ", "J", "K", "L", "L", "N", "N", "N", "N", "O", "O", "O", "OE", "O", "O", "O", "O", "CE", "h", "h", "i", "i", "i", "i", "i", "i", "i", "i", "ij", "j", "k", "l", "l", "n", "n", "n", "n", "o", "o", "o", "oe", "o", "o", "o", "o", "o", "R", "R", "S", "S", "S", "S", "T", "T", "T", "U", "U", "U", "UE", "U", "U", "U", "U", "U", "U", "W", "Y", "Y", "Y", "Z", "Z", "Z", "r", "r", "s", "s", "s", "s", "ss", "t", "t", "b", "u", "u", "u", "ue", "u", "u", "u", "u", "u", "u", "w", "y", "y", "y", "z", "z", "z");
$from = array_merge($from, $cyrylicFrom);
$to = array_merge($to, $cyrylicTo);
$newstring=str_replace($from, $to, $string);
return $newstring;
}
function makeSlugs($string, $maxlen=0)
{
$newStringTab=array();
$string=strtolower(noDiacritics($string));
if(function_exists('str_split'))
{
$stringTab=str_split($string);
}
else
{
$stringTab=my_str_split($string);
}
$numbers=array("0","1","2","3","4","5","6","7","8","9","-");
//$numbers=array("0","1","2","3","4","5","6","7","8","9");
foreach($stringTab as $letter)
{
if(in_array($letter, range("a", "z")) || in_array($letter, $numbers))
{
$newStringTab[]=$letter;
}
elseif($letter==" ")
{
$newStringTab[]="-";
}
}
if(count($newStringTab))
{
$newString=implode($newStringTab);
if($maxlen>0)
{
$newString=substr($newString, 0, $maxlen);
}
$newString = removeDuplicates('--', '-', $newString);
}
else
{
$newString='';
}
return $newString;
}
function checkSlug($sSlug)
{
if(preg_match("/^[a-zA-Z0-9]+[a-zA-Z0-9\-]*$/", $sSlug) == 1)
{
return true;
}
return false;
}
function removeDuplicates($sSearch, $sReplace, $sSubject)
{
$i=0;
do{
$sSubject=str_replace($sSearch, $sReplace, $sSubject);
$pos=strpos($sSubject, $sSearch);
$i++;
if($i>100)
{
die('removeDuplicates() loop error');
}
}while($pos!==false);
return $sSubject;
}
setlocale(LC_ALL, 'en_US.UTF8');
function slugify($text)
{
// replace non letter or digits by -
$text = preg_replace('~[^\\pL\d]+~u', '-', $text);
// trim
$text = trim($text, '-');
// transliterate
$text = iconv('utf-8', 'us-ascii//TRANSLIT', $text);
// lowercase
$text = strtolower($text);
// remove unwanted characters
$text = preg_replace('~[^-\w]+~', '', $text);
if (empty($text))
{
return 'n-a';
}
return $text;
}
$slug = slugify($var);
我在网上找到了这个,完全符合你的要求,但保留了盒子。
function sluggable($p) {
$ts = array("/[À-Å]/","/Æ/","/Ç/","/[È-Ë]/","/[Ì-Ï]/","/Ð/","/Ñ/","/[Ò-ÖØ]/","/×/","/[Ù-Ü]/","/[Ý-ß]/","/[à-å]/","/æ/","/ç/","/[è-ë]/","/[ì-ï]/","/ð/","/ñ/","/[ò-öø]/","/÷/","/[ù-ü]/","/[ý-ÿ]/");
$tn = array("A","AE","C","E","I","D","N","O","X","U","Y","a","ae","c","e","i","d","n","o","x","u","y");
return preg_replace($ts,$tn, $p);
}
这确实很好用。返回正确的干净 url slug。
$string = '(1234) S*m@#ith S)&+*t `E}{xam)ple?>land - - 1!_2)#3)(*4""5';
// remove all non alphanumeric characters except spaces
$clean = preg_replace('/[^a-zA-Z0-9\s]/', '', strtolower($string));
// replace one or multiple spaces into single dash (-)
$clean = preg_replace('!\s+!', '-', $clean);
echo $clean; // 1234-smith-st-exampleland-12345
function seourl($phrase, $maxLength = 100000000000000) {
$result = strtolower($phrase);
$result = preg_replace("~[^A-Za-z0-9-\s]~", "", $result);
$result = trim(preg_replace("~[\s-]+~", " ", $result));
$result = trim(substr($result, 0, $maxLength));
$result = preg_replace("~\s~", "-", $result);
return $result;
}
function remove_accents($string)
{
$a = 'ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõöøùúûýýþÿŔŕ';
$b = 'aaaaaaaceeeeiiiidnoooooouuuuybsaaaaaaaceeeeiiiidnoooooouuuyybyRr';
$string = strtr(utf8_decode($string), utf8_decode($a), $b);
return utf8_encode($string);
}
function format_slug($title)
{
$title = remove_accents($title);
$title = trim(strtolower($title));
$title = preg_replace('#[^a-z0-9\\-/]#i', '_', $title);
return trim(preg_replace('/-+/', '-', $title), '-/');
}
使用:echo format_slug($var);
这是我们使用的类,虽然它可以执行单独的操作,但它还能够将字符串(或路径)转换为 slug 版本(最终输出中只有
a-z
、0-9
和 -
) 。它还执行一些额外的操作,例如将 & 符号 (&
) 转换为单词 and
。
用途:
echo (new Str('My Cover Letter & Résumé'))->slugify()->__toString();
我的求职信和简历
Str
班级:
<?php
use RuntimeException;
use Transliterator;
class Str
{
/**
* Will hold an instance of Transliterator
* for removing accents from characters.
* Same instance for all instances of this class is fine.
*/
private static $accent_transliterator;
private $string;
public function __construct(string $string)
{
$this->string = $string;
}
public function __toString()
{
return $this->string;
}
public function cleanForUrlPath(): self
{
$path = '';
// Loop through path sections (separated by `/`)
// and slugify each section.
foreach (explode('/', $this->string) as $section) {
$section = (new static($section))->slugify()->__toString();
if ($section !== '') {
$path .= "/$section";
}
}
// Save the cleaned path
$this->string = "$path/";
return $this;
}
public function cleanUpSlugDashes(): self
{
// Remove extra dashes
$this->string = preg_replace('/--+/', '-', $this->string);
// Remove leading and trailing dashes
$this->string = trim($this->string, '-');
return $this;
}
/**
* Replace symbols with word replacements.
* Eg, `&` becomes ` and `.
*/
public function convertSymbolsToWords(): self
{
$this->string = strtr($this->string, [
'@' => ' at ',
'%' => ' percent ',
'&' => ' and ',
]);
return $this;
}
public static function getSpacerCharacters(
array $with = [],
array $without = []
): array {
return array_unique(array_diff(array_merge([
' ', // space
'…', // ellipsis
'–', // en dash
'—', // em dash
'/', // slash
'\\', // backslash
':', // colon
';', // semi-colon
'.', // period
'+', // plus sign
'#', // pound sign
'~', // tilde
'_', // underscore
'|', // pipe
], array_values($with)), array_values($without)));
}
public function lower(): self
{
$this->string = strtolower($this->string);
return $this;
}
/**
* Replaces all accented characters
* with similar ASCII characters.
*/
public function removeAccents(): self
{
// If no accented characters are found,
// return the given string as-is.
if (!preg_match('/[\x80-\xff]/', $this->string)) {
return $this;
}
// Instantiate Transliterator if we haven't already
if (!isset(self::$accent_transliterator)) {
self::$accent_transliterator = Transliterator::create(
'Any-Latin; Latin-ASCII;'
);
if (self::$accent_transliterator === null) {
// @codeCoverageIgnoreStart
throw new RuntimeException(
'Could not create a transliterator'
);
// @codeCoverageIgnoreEnd
}
}
// Save transliterated string
$this->string = (self::$accent_transliterator)->transliterate(
$this->string
);
return $this;
}
public function replace($search, $replace)
{
$this->string = str_replace($search, $replace, $this->string);
return $this;
}
public function replaceRegex($pattern, $replacement): self
{
$this->string = preg_replace($pattern, $replacement, $this->string);
return $this;
}
/**
* @param int $length number of bytes to shorten the string to
*/
public function shorten(int $length): self
{
// If the string is already `$length` or shorter,
// return it as-is.
if (strlen($this->string) <= $length) {
return $this;
}
// Shorten by 2 additional characters
// to account for the three periods that are appended.
// Only need to shorten by 2
// as there's always at least one character (space) removed
// when the last word is popped off of the array.
$length -= 2;
// Shorten the string to `$length` and split into words
$words = explode(' ', substr($this->string, 0, $length));
// Discard the last word as it's a partial word,
// or empty if the last character happened to be a space.
// If there's only one word,
// then it was longer than `$length`
// and the truncated version should be returned.
if (count($words) > 1) {
array_pop($words);
}
// Save the shortened string with "..." appended
$this->string = rtrim(implode(' ', $words), ':').'...';
return $this;
}
public function slugify(): self
{
// If the string is already a slug
if (preg_match('/^[a-z0-9\\-]+$/', $this->string)) {
return $this;
}
// - Normalize accents
// - Normalize symbols
// - Lowercase
// - Replace space characters with dashes
// - Remove non-slug characters
// - Clean up leading, trailing, and consecutive dashes
return $this
->removeAccents()
->convertSymbolsToWords()
->lower()
->spacersToDashes()
->replaceRegex('/([^a-z0-9\\-]+)/', '')
->cleanUpSlugDashes();
}
public function spacersToDashes(): self
{
return $this->replace(static::getSpacerCharacters(), '-');
}
}