转到正文

浪淘沙

静观己心,厚积薄发

存档

标签: natsort

特别喜欢PHP自带的自然排序 natsort / natcasesort。为此,还专门看了php 源码,详情请见传送门

这两天,不是太忙,就根据自己意愿用php重新实现了下,并稍微做了点修改。代码如下:

<?php

/**
 * @param $target
 * @param $destination
 *
 * @return int
 */
function strnatcmpExFoldCase0Improve($target, $destination)
{
    $return = natSortExtend($target, count($target), $destination, count($destination), 0);
    return $return;
}

function strnatcmpExFoldCase1Improve($target, $destination)
{
    return natSortExtend($target, count($target), $destination, count($destination), 1);
}

function natSortExtend($target, $targetLen, $destination, $destinationLen, $foldCase)
{
    $aend               = $targetLen;
    $bend               = $destinationLen;
    $targetLeading      = 1;//最后一次是字母或者字母后面紧跟了n个数字'0',可以跳过后面紧跟的0
    $destinationLeading = 1;//最后一次是字母或者字母后面紧跟了n个数字'0',可以跳过后面紧跟的0

    if ($targetLen == 0 || $destinationLen == 0) {
        return ($targetLen == $destinationLen ? 0 : ($targetLen > $destinationLen ? 1 : -1));
    }

    $targetIndex      = 0;
    $destinationIndex = 0;
    while (1) {
        $currentTargetItem      = $target[$targetIndex];
        $currentDestinationItem = $destination[$destinationIndex];

        /* skip over leading zeros */
        while ($targetLeading && $currentTargetItem == '0' && ($targetIndex + 1 < $aend) && ctype_digit(
                        $target[$targetIndex + 1]
                )) {
            $currentTargetItem = $target[++$targetIndex];
        }

        while ($destinationLeading && $currentDestinationItem == '0' && ($destinationIndex + 1 < $bend) && ctype_digit(
                        $destination[$destinationIndex + 1]
                )) {
            $currentDestinationItem = $destination[++$destinationIndex];
        }

        $targetLeading      = 0;
        $destinationLeading = 0;

        /* Skip consecutive whitespace */
        while (ctype_space($currentTargetItem)) {
            $currentTargetItem = $target[++$targetIndex];
        }

        while (ctype_space($currentDestinationItem)) {
            $currentDestinationItem = $destination[++$destinationIndex];
        }

        /* process run of digits */
        if (ctype_digit($currentTargetItem) && ctype_digit($currentDestinationItem)) {
            $fractional = ($currentTargetItem == '0' || $currentDestinationItem == '0');

            $ap = array_slice($target, $targetIndex);
            $bp = array_slice($destination, $destinationIndex);

            if ($fractional) {
                $result = compare_left($ap, count($ap), $bp, count($bp));
            } else {
                $result = compare_right($ap, count($ap), $bp, count($bp));
            }
            if ($result != 0) {
                return $result;
            } else if ($targetIndex == $aend && $destinationIndex == $bend) {
                /* End of the strings. Let caller sort them out. */
                return 0;
            } else {
                /* Keep on comparing from the current point. */
                $currentTargetItem      = $target[$targetIndex];
                $currentDestinationItem = $destination[$destinationIndex];
            }
        }

        if ($foldCase) {
            $currentTargetItem      = strtoupper($currentTargetItem);
            $currentDestinationItem = strtoupper($currentDestinationItem);
        }

        if ($currentTargetItem < $currentDestinationItem) {
            return -1;
        } else if ($currentTargetItem > $currentDestinationItem) {
            return +1;
        }

        $targetIndex++;
        $destinationIndex++;
        if ($targetIndex >= $aend && $destinationIndex >= $bend) {
            /* The strings compare the same.  Perhaps the caller
                will want to call strcmp to break the tie. */
            if ($aend < $bend) {
                return -1;
            } else if ($aend > $bend) {
                return 1;
            } else {
                return 0;
            }
        } else if ($targetIndex >= $aend) {
            return -1;
        } else if ($destinationIndex >= $bend) {
            return 1;
        }

        if (ctype_alpha($currentTargetItem)) {//需要处理后面的0 ,空格可以跳过
            $targetLeading = 1;
        } else if (ctype_digit($currentTargetItem) && $currentTargetItem != '0') {//非0数字 后面的0不可以跳过
            $targetLeading = 0;
        }
        if (ctype_alpha($currentDestinationItem)) {//需要处理后面的0 ,空格可以跳过
            $destinationLeading = 1;
        } else if (ctype_digit($currentDestinationItem) && $currentDestinationItem != '0') {//非0数字 后面的0不可以跳过
            $destinationLeading = 0;
        }
    }

    return 0;
}

function compare_left($a, $aend, $b, $bend)
{
    $aIndex = 0;
    $bIndex = 0;
    /* Compare two left-aligned numbers: the first to have a
       different value wins. */
    for (; ; $aIndex++, $bIndex++) {
        if ($aIndex >= count($a)) {
            echo 'warning $aIndex: ', $aIndex, ' => ', var_export($a);
            $ca = 0;
        } else {
            $ca = $a[$aIndex];//warning..
        }

        if ($bIndex >= count($b)) {
            echo 'warning $bIndex:', $bIndex, ' => ', var_export($b);
            $cb = 0;
        } else {
            $cb = $b[$bIndex];//warning..
        }

        if (($aIndex == $aend - 1 || !ctype_digit($ca)) && ($bIndex == $bend - 1 || !ctype_digit($cb))) {
            return 0;
        } else if ($aIndex == $aend - 1 || !ctype_digit($ca)) {
            return -1;
        } else if ($bIndex == $bend - 1 || !ctype_digit($cb)) {
            return +1;
        } else if ($ca < $cb) {
            return -1;
        } else if ($ca > $cb) {
            return +1;
        }
    }

    return 0;
}

function compare_right($a, $aend, $b, $bend)
{
    $aIndex = 0;
    $bIndex = 0;
    $bias   = 0;

    /* The longest run of digits wins.  That aside, the greatest
       value wins, but we can't know that it will until we've scanned
       both numbers to know that they have the same magnitude, so we
       remember it in BIAS. */
    for (; ; $aIndex++, $bIndex++) {
        if ($aIndex >= count($a)) {
            echo 'warning $aIndex: ', $aIndex, ' => ', var_export($a);
            $ca = 0;
        } else {
            $ca = $a[$aIndex];//warning..
        }

        if ($bIndex >= count($b)) {
            echo 'warning $bIndex:', $bIndex, ' => ', var_export($b);
            $cb = 0;
        } else {
            $cb = $b[$bIndex];//warning..
        }
        if (($aIndex == $aend - 1 || !ctype_digit($ca)) && ($bIndex == $bend - 1 || !ctype_digit($cb))) {
            return $bias;
        } else if ($aIndex == $aend - 1 || !ctype_digit($ca)) {
            return -1;
        } else if ($bIndex == $bend - 1 || !ctype_digit($cb)) {
            return +1;
        } else if ($ca < $cb) {
            if (!$bias) {
                $bias = -1;
            }
        } else if ($ca > $cb) {
            if (!$bias) {
                $bias = +1;
            }
        }
    }

    return 0;
}

$data = array(
        '1ab' => 'abc001',
        '2ab' => 'abc021',
        '3ab' => 'abc1',
        '4ab' => 'abc2',
        '5ab' => 'abc4',
        '6ab' => 'abc50',
        '1ae' => 'aec001',
        '2ae' => 'aec021',
        '3ae' => 'aec1',
        '4ae' => 'aec2',
        '5ae' => 'aec4',
        '6ae' => 'aec50',
        '1be' => 'bec001',
        '2be' => 'bec021',
        '3be' => 'bec1',
);

function prestrnatcmpEx2($data, $fold_case)
{
    foreach ($data as $index => $item) {
        $data[$index] = str_split($item);
    }

    if ($fold_case == 0) {
        usort($data, 'strnatcmpExFoldCase0Improve');
    } else {
        usort($data, 'strnatcmpExFoldCase1Improve');

    }

    foreach ($data as $index => $item) {
        $data[$index] = implode('', $item);
    }

    echo '<pre>';
    var_export($data);
    echo '</pre>';
}

prestrnatcmpEx2($data, 0);

$data = array(
        '1ab' => 'abc001',
        '2ab' => 'abc021',
        '3ab' => 'abc1',
        '4ab' => 'abc2',
        '5ab' => 'abc4',
        '6ab' => 'abc50',
        '1ae' => 'aec001',
        '2ae' => 'aec021',
        '3ae' => 'aec1',
        '4ae' => 'aec2',
        '5ae' => 'aec4',
        '6ae' => 'aec50',
        '1be' => 'bec001',
        '2be' => 'bec021',
        '3be' => 'bec1',
);

natsort($data);
echo '<pre>';
var_export($data);
echo '</pre>';


//输出结果
array (
  0 => 'abc1',
  1 => 'abc001',
  2 => 'abc2',
  3 => 'abc4',
  4 => 'abc021',
  5 => 'abc50',
  6 => 'aec1',
  7 => 'aec001',
  8 => 'aec2',
  9 => 'aec4',
  10 => 'aec021',
  11 => 'aec50',
  12 => 'bec1',
  13 => 'bec001',
  14 => 'bec021',
)
array (
  '1ab' => 'abc001',
  '2ab' => 'abc021',
  '3ab' => 'abc1',
  '4ab' => 'abc2',
  '5ab' => 'abc4',
  '6ab' => 'abc50',
  '1ae' => 'aec001',
  '2ae' => 'aec021',
  '3ae' => 'aec1',
  '4ae' => 'aec2',
  '5ae' => 'aec4',
  '6ae' => 'aec50',
  '1be' => 'bec001',
  '2be' => 'bec021',
  '3be' => 'bec1',
)

natsort的核心实现方法如下:

/* {{{ strnatcmp_ex
*/
PHPAPI int strnatcmp_ex(char const *a, size_t a_len, char const *b, size_t b_len, int fold_case)
{
    unsigned char ca, cb;   
    char const *ap, *bp;
    char const *aend = a + a_len,
    *bend = b + b_len;
    int fractional, result;
    short leading = 1;

    if (a_len == 0 || b_len == 0) {
        return (a_len == b_len ? 0 : (a_len > b_len ? 1 : -1));
    }  

    ap = a;
    bp = b;
    while (1) {
        ca = *ap; cb = *bp;

        /* skip over leading zeros */
        while (leading && ca == '0' && (ap+1 < aend) && isdigit((int)(unsigned char)*(ap+1))) {
            ca = *++ap;
        }

        while (leading && cb == '0' && (bp+1 < bend) && isdigit((int)(unsigned char)*(bp+1))) {
            cb = *++bp;
        }

        leading = 0;

        /* Skip consecutive whitespace */
        while (isspace((int)(unsigned char)ca)) {
            ca = *++ap;
        }

        while (isspace((int)(unsigned char)cb)) {
            cb = *++bp;
        }

        /* process run of digits */
        if (isdigit((int)(unsigned char)ca)  &&  isdigit((int)(unsigned char)cb)) {
            fractional = (ca == '0' || cb == '0');

            if (fractional)
                result = compare_left(&ap, aend, &bp, bend);
            else
                result = compare_right(&ap, aend, &bp, bend);

            if (result != 0)
                return result;
            else if (ap == aend && bp == bend)
                /* End of the strings. Let caller sort them out. */
                return 0;
            else {
                /* Keep on comparing from the current point. */
                ca = *ap; cb = *bp;
            }
        }

        if (fold_case) {
            ca = toupper((int)(unsigned char)ca);
            cb = toupper((int)(unsigned char)cb);
        }

        if (ca < cb)
            return -1;
        else if (ca > cb)
            return +1;

        ++ap; ++bp;
        if (ap >= aend && bp >= bend)
            /* The strings compare the same.  Perhaps the caller
                will want to call strcmp to break the tie. */
            return 0;
        else if (ap >= aend)
            return -1;
        else if (bp >= bend)
            return 1;
    }
}
/* }}} */