I wish I could wordwrap only long words, ignore the rest, and not wordwrap any html, xml, or any type of any tag that appears. And with this function, I can! (It's built to ignore html/xml tags that it runs into.)
The Politics: This function was originally posted on PHP.net, which created an influx of others who wanted to provide their own functions for wordwrapping text without effecting HTML/XML tags. All of their functions failed when run, but that was enough for the original PHP people to delete the only working, wordwrap PHP function written in the entire world. This one :
<?php
// INBOUND DATA:
// $text_to_wordwrap
// $max_word_length_for_wordwrapping
// $wordwrap_every_x_chars_for_wordwrapping
// $delimiter_for_wordwrapping
// OUTBOUND DATA:
// $wrapped_text
$wrapped_text = $text_to_wordwrap;
$max_word_length = $max_word_length_for_wordwrapping;
$wordwrap_every_x_chars = $wordwrap_every_x_chars_for_wordwrapping;
$delimiter = $delimiter_for_wordwrapping;
// Stats Settings
$number_of_tag_chars = 0;
$number_of_regular_chars = 0;
$added_delimiters = 0;
$wrapped_words = 0;
$array_of_wrapped_words = array();
// Wordwrap Normal Text
// (Ignore Unicode + XML/HTML Tags)
// --------------------------------------------
$inside_an_xml_tag = 0;
$inside_unicode = 0;
$current_non_tag_word_size = 0;
$entered_unicode_at = 0;
$length_of_text_to_wordwrap = strlen($wrapped_text);
for($i_wordwrap = 0; $i_wordwrap < $length_of_text_to_wordwrap; $i_wordwrap++)
{
$current_char_to_examine = $wrapped_text[$i_wordwrap];
if($current_char_to_examine == "<")
{
$inside_an_xml_tag = 1;
}
if( ($current_char_to_examine == "&") &&
($inside_an_xml_tag == 0) )
{
$inside_an_xml_tag = 1;
$inside_unicode = 1;
}
if( ($current_char_to_examine == " ") ||
($current_char_to_examine == "\t") ||
($current_char_to_examine == "\n") ||
($current_char_to_examine == "\r") ||
($current_char_to_examine == "\s") ||
($current_char_to_examine == "&") ||
($current_char_to_examine == "<") )
{
$inside_unicode = 0;
$current_non_tag_word_size = 0;
}
if($inside_an_xml_tag == 1)
{
$number_of_tag_chars++;
$current_non_tag_word_size = 0;
}
if($inside_an_xml_tag == 0)
{
$number_of_regular_chars++;
}
if( ($current_char_to_examine == " ") ||
($current_char_to_examine == "\t") ||
($current_char_to_examine == "\n") ||
($current_char_to_examine == "\r") ||
($current_char_to_examine == "\s") )
{
$current_non_tag_word_size = 0;
if($inside_unicode == 1)
{
$inside_unicode = 0;
$inside_an_xml_tag = 0;
}
}
else
{
$current_non_tag_word_size++;
}
if( ($current_non_tag_word_size > $max_word_length) &&
($inside_an_xml_tag == 0) )
{
$wrapped_words++;
$left_side = substr($wrapped_text, 0, $i_wordwrap - $current_non_tag_word_size + 1);
$previous_word_length = $current_non_tag_word_size;
$word_so_far = substr($wrapped_text, $i_wordwrap - $previous_word_length + 1, $current_non_tag_word_size);
$extra_chars = 0;
for($i_search = $i_wordwrap + 1; $i_search < $length_of_text_to_wordwrap; $i_search++)
{
$next_char = $wrapped_text[$i_search];
if( ($next_char != " ") &&
($next_char != "\t") &&
($next_char != "\n") &&
($next_char != "\r") &&
($next_char != "\s") &&
($next_char != "&") &&
($next_char != "<") )
{
$current_non_tag_word_size++;
$extra_chars++;
}
else
{
$i_search = $length_of_text_to_wordwrap;
}
}
$number_of_regular_chars += $extra_chars;
$entire_word = substr($wrapped_text, $i_wordwrap - $previous_word_length + 1, $current_non_tag_word_size);
$array_of_wrapped_words[] = $entire_word;
$length_of_entire_word = strlen($entire_word);
$right_side = substr($wrapped_text, $i_wordwrap - $previous_word_length + 1 + $length_of_entire_word, $length_of_text_to_wordwrap);
$new_word = wordwrap($entire_word, $wordwrap_every_x_chars, $delimiter, TRUE);
$test = str_replace($delimiter, $delimiter, $new_word, $number_of_delimiters);
$added_delimiters += $number_of_delimiters;
$new_entry = $left_side . $new_word . $right_side;
$new_entry_so_far_test = $left_side . $new_word;
$i_wordwrap = strlen($new_entry_so_far_test) - 1;
$wrapped_text = $new_entry;
$length_of_text_to_wordwrap = strlen($new_entry);
}
if( ($current_char_to_examine == ">") ||
($current_char_to_examine == ";") )
{
$inside_an_xml_tag = 0;
$current_non_tag_word_size = 0;
}
}
// Wordwrap Unicode Text
// (Ignore Regular Text + XML/HTML Tags)
// --------------------------------------------
// Rule: Every foreign char gets a <wbr> in front of it, no matter what.
// Wordwrap every CHAR, otherwise, as normal.
$size_of_wrapped_unicode = 0;
$inside_an_xml_tag = 0;
$block_start_address = 0;
$block_end_address = 0;
$length_of_text_to_wordwrap = strlen($wrapped_text);
for($i_wordwrap = 0; $i_wordwrap < $length_of_text_to_wordwrap; $i_wordwrap++)
{
$current_char_to_examine = $wrapped_text[$i_wordwrap];
if($current_char_to_examine == "<")
{
$inside_an_xml_tag = 1;
}
if( ($current_char_to_examine == "&") &&
($inside_an_xml_tag == 0) )
{
$size_of_wrapped_unicode = 0;
$block_start_address = $i_wordwrap_text;
$update_tags = 1;
for($i_search = $i_wordwrap_text + 1; $i_search < $length_of_text_to_wordwrap; $i_search++)
{
$next_char_to_examine = $wrapped_text[$i_search];
if($next_char_to_examine == ";")
{
$last_block_end_address = $i_search;
$size_of_wrapped_unicode++;
$check_for_continuance = $wrapped_text[($i_search + 1)];
if($check_for_continuance != "&")
{
$block_end_address = $i_search;
$i_search = $length_of_text_to_wordwrap;
}
}
if( ($next_char_to_examine == " ") ||
($next_char_to_examine == "\t") ||
($next_char_to_examine == "\n") ||
($next_char_to_examine == "\r") ||
($next_char_to_examine == "\s") ||
($next_char_to_examine == "<") ||
(($i_search + 1) == $length_of_text_to_wordwrap) )
{
$block_end_address = $last_block_end_address;
$i_search = $length_of_text_to_wordwrap;
if($size_of_wrapped_unicode < 1)
{
$update_tags = 0;
}
}
}
if($update_tags == 1)
{
$block_size = $block_end_address - $block_start_address + 1;
$unicode_block = substr($wrapped_text, $block_start_address, $block_size);
if($size_of_wrapped_unicode > $max_word_length)
{
$unicode_explosion = explode(";&", $unicode_block);
$number_of_unicode_chars = count($unicode_explosion);
$length_of_first_unicode_char = strlen($unicode_explosion[0]);
$length_of_last_unicode_char = strlen($unicode_explosion[($number_of_unicode_chars-1)]);
$unicode_explosion[0] = substr($unicode_explosion[0], 1, $length_of_first_unicode_char);
$unicode_explosion[($number_of_unicode_chars-1)] = substr($unicode_explosion[($number_of_unicode_chars-1)], 0, $length_of_last_unicode_char - 1);
$test = $unicode_explosion[($number_of_unicode_chars-1)];
$new_block = $delimiter;
$current_char = 0;
for($i_block = 0; $i_block < $number_of_unicode_chars; $i_block++)
{
$new_block .= "&" . $unicode_explosion[$i_block] . ";";
if($current_char == $wordwrap_every_x_chars)
{
$new_block .= $delimiter;
$current_char = 0;
}
else
{
$current_char++;
}
}
$new_block .= $delimiter;
$left_side = substr($wrapped_text, 0, $block_start_address);
$right_side = substr($wrapped_text, $block_end_address + 1, $length_of_text_to_wordwrap);
$test = $left_side . $new_block;
$offset = strlen($test);
$i_wordwrap_text = $offset;
$new_entry = $left_side . $new_block . $right_side;
$wrapped_text = $new_entry;
$length_of_text_to_wordwrap = strlen($new_entry);
}
else
{
$new_block = $delimiter . $unicode_block . $delimiter;
$left_side = substr($wrapped_text, 0, $block_start_address);
$right_side = substr($wrapped_text, $block_end_address + 1, $length_of_text_to_wordwrap);
$test = $left_side . $new_block;
$offset = strlen($test);
$i_wordwrap_text = $offset;
$new_entry = $left_side . $new_block . $right_side;
$wrapped_text = $new_entry;
$length_of_text_to_wordwrap = strlen($new_entry);
}
}
}
if($current_char_to_examine == ">")
{
$inside_an_xml_tag = 0;
}
}
/*
// Destroy Redundant Encodings
// --------------------------------------------
$wrapped_text = str_replace(" <wbr>", " ", $wrapped_text);
$wrapped_text = str_replace("<wbr> ", " ", $wrapped_text);
*/
/*
$wrapped_text = str_replace(";&", ";$delimiter&", $wrapped_text, $unicode_added_delimiters);
$added_delimiters += $unicode_added_delimiters;
*/
?>
Official Function Page: http://php.net/manual/en/function.wordwrap.php
// Note: All code appearing on the PHP Revolution blog by the blog owner is released under the Hacktivismo Enhanced-Source Software License Agreement (HESSLA), unless otherwise noted. http://www.hacktivismo.com/about/hessla.php
No comments:
Post a Comment