Note Submitter: djneoform at gmail dot com
----
wordwrap() doesn't handle Unicode (MB) strings properly, so
I wrote a new mb_wordwrap() that handles them properly:
function mb_wordwrap($str, $width = 70, $break =
"n", $cut = false)
{
$return = '';
$str_bytes = strlen($str);
$first_char = true;
$current_line = '';
$current_line_char_count = 0;
$current_word = '';
$current_word_char_count = 0;
for ($i=0; $i < $str_bytes; $i++)
{
//get the next char (unicode or ascii)
$char = $str{$i};
$h = ord($char);
if ($h <= 0x7F)
{ $char_code = $h; }
else if ($h < 0xC2)
{ $char_code = false; }
else if ($h <= 0xDF)
{
$c2 = $str{++$i};
$char .= $c2;
$char_code = ($h & 0x1F) << 6 | (ord($c2) &
0x3F);
}
else if ($h <= 0xEF)
{
$c2 = $str{++$i};
$c3 = $str{++$i};
$char .= $c2.$c3;
$char_code = ($h & 0x0F) << 12 | (ord($c2)
& 0x3F) << 6 | (ord($c3) & 0x3F);
}
else if ($h <= 0xF4)
{
$c2 = $str{++$i};
$c3 = $str{++$i};
$c4 = $str{++$i};
$char .= $c2.$c3.$c4;
$char_code = ($h & 0x0F) << 18 | (ord($c2)
& 0x3F) << 12 | (ord($c3) & 0x3F) << 6 |
(ord($c4) & 0x3F);
}
else
{
//unrecognized char, skip it
continue;
}
//if it's a space, new word commencing
if ($char_code == 32)
{
//if line is too long, linebreak time!
if ($current_line_char_count + $current_word_char_count
>= $width)
{
if ($current_line_char_count)
{ $return .= $current_line.$break; }
//reset the current line
$current_line = $current_word;
$current_line_char_count = $current_word_char_count;
}
else
{
//include a space at the front of the word if this isn't
the first char
//since we assume there was a space prior to this word
except for the first word
$current_line .= ($first_char ? '' : ' ').$current_word;
$current_line_char_count += $current_word_char_count +
($first_char ? 0 : 1);
}
$current_word = '';
$current_word_char_count = 0;
$first_char = false;
}
//if it's a char, add it to the word
else
{
if ($cut)
{
//check if this word is too long. if it is, slice it.
if ($current_word_char_count >= $width)
{
//clear the current line and word to the return value
if ($current_line_char_count)
{ $return .= $current_line.$break; }
$current_line = $current_word;
$current_line_char_count = $current_word_char_count;
$current_word = '';
$current_word_char_count = 0;
}
}
$current_word .= $char;
$current_word_char_count++;
}
}
//check for leftovers and add them to the string
if ($current_word_char_count)
{ $return .= $current_line.($current_word_char_count ?
($current_word_char_count + $current_line_char_count >
$width ? "n" : ' ').$current_word : ''); }
return $return;
}
--
PHP Notes Mailing List (http://www.php.net/)
To unsubscribe, visit: http://www.php.net/unsub
.php
|