// Name normalization function in PHP: splits a full name into
//   forename and surname, taking into account various common
//   surname prefixes and suffixes.
//
// By Jed Hartman (logos@kith.org), 11 February 2007.
// For more info, see:
// http://www.kith.org/journals/jed/2007/02/11/3813.html
//
// This code is in the public domain; no rights reserved.  Use as you like.
//
// Parameter: Pass in a string containing a full name.
// Returns: A 2-element array.  First element is fore-and-middle-names;
//   second element is surname including prefixes and suffixes, if any.
//   (Note that it's impossible to distinguish between a two-word
//   forename and a first-name-plus-middle-name, so we don't even try.)
//   Initials are normalized to have periods and spaces after them.
//
// Bugs:
//
//   *  Doesn't handle various non-Anglo approaches to naming, as in
//      names like "Garcia y Lopez".  To handle such cases, if you're
//      manually reviewing names before you call this function, you can
//      manually insert underscores between name elements that should
//      stay together: "Jaime Garcia_y_Lopez".
//
//   *  Doesn't handle cases where a surname prefix is used as a
//      middle name, as in names like "Joshua Ben David".  Again,
//      you can manually insert underscores: "Joshua_Ben David".
//
// TODO:
//
//   *  Consider generating capitalization and punctuation variants
//   for prefixes and suffixes rather than listing them all.
//
//   *  Clean up repetitive logic in middle of routine.
//
//   *  Remove other titles as well as "Dr."
//
//   *  Find a more elegant way to handle apostrophes in forenames.

function normalize_name($full_name)
{

  $last_name_prefixes = array ("da", "Da", "Dal", "de", "De", "del", "der", "Di", "e", "la", "La", "Le", "San", "St.", "Ste.", "van", "Van", "vel", "von");
  $last_name_suffixes = array ("Jr.", "jr.", "Jr", "jr", "Sr.", "2", "II", "III", "IV");

  $full_name = trim($full_name);
  $all_names = preg_split("/[ \xA0]/", $full_name); // Split on space or option-space.
  $last_name = array_pop($all_names);
  $second_to_last_word = array_pop($all_names);
  if (is_null($second_to_last_word))
  {
    return array ($last_name, "");  // If only one name, consider it to be a "first" (personal) name.
  }
  if (in_array($last_name, $last_name_suffixes))  // Doesn't account for multiple suffixes; fix eventually, but v. rare.
  {
    $last_name = $second_to_last_word . " " . $last_name;
	$second_to_last_word = array_pop($all_names);
  }
  if (is_null($second_to_last_word))
  {
    return array ($last_name, "");  // If only one name, consider it to be a "first" (personal) name.
  }
  while (in_array($second_to_last_word, $last_name_prefixes))
  {
    $last_name = $second_to_last_word . " " . $last_name;
	$second_to_last_word = array_pop($all_names);
  }
  if (is_null($second_to_last_word))
  {
    return array ($last_name, "");  // If only one name, consider it to be a "first" (personal) name.
  }
  $last_name = preg_replace("/_/", " ", $last_name); // Change underscores to spaces, for multiword last names
  array_push($all_names, $second_to_last_word); // Put latest unused name back on stack
  $first_name = join(" ", $all_names);
  $first_name = preg_replace("/_/", " ", $first_name); // Change underscores to spaces, for multiword first names
  $first_name = preg_replace("/^Dr\.? ?/", "", $first_name);  // Remove "Dr." from start of name
  // Change all initials to have periods and spaces after them.
  // Apostrophes cause problems with the word-boundary test, so temporarily change them.
  // This is inelegant; should probably come back and figure out how to do it right at some point.
  $first_name = preg_replace("/\'/", "QXZQXZQXZ", $first_name);
  $first_name = preg_replace("/\b([A-Z])(\.|\b)/", "$1.", $first_name);
  $first_name = preg_replace("/\b([A-Z])\.?([A-Z])(\.|\b)/", "$1. $2.", $first_name);
  $first_name = preg_replace("/QXZQXZQXZ/", "'", $first_name);
  return array ($first_name, $last_name);
}