//...
if( strcmp( str, "January" ) == 0 )
month = 1;
else if( strcmp( str, "February") == 0 )
month = 2;
//...
Q:有没有更有效的方法来确定,比如“四月”是一年中的第四个月?重复调用
strcmp()
肯定是非常低效的,而且 if/else
梯子的代码也很乏味。有时是“三月”,有时缩写为“MAR”……一定有更好的办法……
将已知字符串放入排序的结构数组中至少可以进行二进制搜索,但仍然涉及代码中的大量猜测。
这是一个我可以回答我自己的问题吗?答案。欢迎其他答案。
有几种方法可以将有限字符串集中的任意字符串转换为简洁、可用的形式。 其中大部分涉及迭代(或次优线性)搜索,涉及重复比较(可能需要考虑大小写敏感性。)
我对最近一个问题的回应建议“分享”一个(不可否认的神秘的)散列函数, 意识到误报,返回月份序号 (1-12) 当以 7 位 ASCII 传递包含月份名称(英文)的字符串时。 该函数对第 2 个和第 3 个字符执行原始操作 并弹出函数的字符串哈希值。 请注意,“January”、“jan”和“JAN”都返回值 1。 同样,“feb”、“FEBRUARY”和“Feb”将返回值 2。
static int monthOrd( char cp[] ) {
return "DIE@CB@LJF@HAG@K"[ cp[1]/4&7 ^ cp[2]*2 &0xF ] &0xF;
}
所示的操作是通过一些原始操作的“蛮力”排列发现的 寻找一个组合,该组合将返回 0x0 和 0xF(4 位)之间的 12 个不同值。 鼓励读者分解两个 ASCII 字符位的每个步骤。 这个结果不是“发明”的,而是“发现”的。
两个字符的位被破坏后, 该值用作字符串(又名“便宜的 LUT”)的索引,该字符串的 12 个字母 A-L 的位置使得 “?an”(一月)将分解为字母“A”的索引。 屏蔽该字母的低 4 位产生值 1 作为字符串“JANUARY”的序数... 当函数传递字符串“Jan”的变体时,1 将是返回值。
NB:使用此函数允许调用者检查字符串是否确实适合应用程序的“JAN”、“jan”、“January”。 调用者无需尝试匹配其他 11 个月的任何名称。 此函数将为字符串“Random”返回误报值 1, 所以调用者只需要验证一个月份的名称(适合应用程序的长度和大小写。)
奖金回合:
static int wkdayOrd( char cp[] ) {
return "65013427"[*cp/2 + ~cp[1] & 0x7] & 0x7;
}
将“Sun(day)”(不区分大小写)转换为 1、“MON”转换为 2、“tue”转换为 3 等的等效函数...
再次,调用者必须仅根据一天的名称确认字符串,以避免“误报”。
虽然我们在这里,但以下是从“零”到“十”的“数字名称”的等效函数,同样不区分大小写。 (数字名称不像月份名称或星期名称那样缩写。)
static int numberOrd( char cp[] ) {
return "@~IBAH~FCGE~~DJ~"[ ( cp[0] ^ cp[1]/2 + cp[2]*4 ) & 0xF ] & 0xF;
}
编辑
为了反击反对者,这里还有一个:
static int ZodiacOrd( char cp[] ) {
return "BJGA@@HIECK@@DLF"[(cp[0]/2 ^ (cp[1]/2&1) + cp[2]*2) & 0xF] & 0xF;
}
将十二生肖之一的名称(大小写矛盾)传递给它,它将返回该星座的序号(“白羊座”= 1,...)同样,就像any哈希函数一样,会有与其他字符串的碰撞。调用者只需要随后检查一个已知的字符串;不是十二。
为什么有些人会因为无法理解小型受限词典的散列而选择继续陷入分支猜测???
我已经检查过
gperf
将它传递为“January”,“Jan”,“JANUARY”,“JAN”,“january”,“jan”等所有月份会发生什么。
struct months {
char *name;
int number;
};
#define TOTAL_KEYWORDS 69
#define MIN_WORD_LENGTH 3
#define MAX_WORD_LENGTH 9
#define MIN_HASH_VALUE 3
#define MAX_HASH_VALUE 218
/* maximum key range = 216, duplicates = 0 */
#ifdef __GNUC__
__inline
#else
#ifdef __cplusplus
inline
#endif
#endif
static unsigned int
hash (register const char *str, register size_t len)
{
static unsigned char asso_values[] =
{
219, 219, 219, 219, 219, 219, 219, 219, 219, 219,
219, 219, 219, 219, 219, 219, 219, 219, 219, 219,
219, 219, 219, 219, 219, 219, 219, 219, 219, 219,
219, 219, 219, 219, 219, 219, 219, 219, 219, 219,
219, 219, 219, 219, 219, 219, 219, 219, 219, 219,
219, 219, 219, 219, 219, 219, 219, 219, 219, 219,
219, 219, 219, 219, 219, 10, 80, 75, 95, 5,
125, 95, 219, 219, 5, 219, 95, 55, 45, 60,
60, 219, 85, 95, 50, 90, 25, 219, 219, 12,
219, 219, 219, 219, 219, 219, 219, 0, 40, 35,
35, 35, 40, 25, 219, 219, 0, 219, 10, 50,
0, 25, 15, 219, 15, 35, 30, 10, 25, 219,
219, 25, 219, 219, 219, 219, 219, 219, 219, 219,
219, 219, 219, 219, 219, 219, 219, 219, 219, 219,
219, 219, 219, 219, 219, 219, 219, 219, 219, 219,
219, 219, 219, 219, 219, 219, 219, 219, 219, 219,
219, 219, 219, 219, 219, 219, 219, 219, 219, 219,
219, 219, 219, 219, 219, 219, 219, 219, 219, 219,
219, 219, 219, 219, 219, 219, 219, 219, 219, 219,
219, 219, 219, 219, 219, 219, 219, 219, 219, 219,
219, 219, 219, 219, 219, 219, 219, 219, 219, 219,
219, 219, 219, 219, 219, 219, 219, 219, 219, 219,
219, 219, 219, 219, 219, 219, 219, 219, 219, 219,
219, 219, 219, 219, 219, 219, 219, 219, 219, 219,
219, 219, 219, 219, 219, 219, 219, 219, 219, 219,
219, 219, 219, 219, 219, 219
};
return len + asso_values[(unsigned char)str[2]] + asso_values[(unsigned char)str[1]] + asso_values[(unsigned char)str[0]];
}
struct months *
in_word_set (register const char *str, register size_t len)
{
static struct months wordlist[] =
{
{""}, {""}, {""},
{"jan",1},
{""}, {""}, {""},
{"january",1},
{"Jan",1},
{""}, {""}, {""},
{"January",1},
{"jun",6},
{"june",6},
{""}, {""}, {""},
{"Jun",6},
{"June",6},
{""}, {""}, {""},
{"jul",7},
{"july",7},
{""}, {""}, {""},
{"Jul",7},
{"July",7},
{""}, {""}, {""},
{"apr",4},
{""},
{"april",4},
{""}, {""},
{"aug",8},
{""}, {""},
{"august",8},
{""},
{"Apr",4},
{""},
{"April",4},
{""}, {""},
{"Aug",8},
{""}, {""},
{"August",8},
{""},
{"nov",11},
{""}, {""}, {""}, {""},
{"november",11},
{""}, {""}, {""}, {""},
{"JAN",1},
{""}, {""}, {""},
{"JANUARY",1},
{"mar",3},
{""},
{"march",3},
{""}, {""},
{"Mar",3},
{""},
{"March",3},
{""}, {""},
{"may",5},
{""},
{"MAY",5},
{""}, {""},
{"May",5},
{""}, {""}, {""}, {""},
{"sep",9},
{""}, {""}, {""}, {""},
{"oct",10},
{"september",9},
{""}, {""},
{"october",10},
{"Nov",11},
{""}, {""}, {""}, {""},
{"November",11},
{""}, {""}, {""}, {""},
{"dec",12},
{""}, {""}, {""}, {""},
{"december",12},
{""}, {""}, {""}, {""},
{"feb",2},
{""}, {""}, {""}, {""},
{"february",2},
{""}, {""}, {""}, {""},
{"Oct",10},
{""}, {""}, {""},
{"October",10},
{"NOV",11},
{""}, {""}, {""}, {""},
{"NOVEMBER",11},
{""}, {""}, {""}, {""},
{"JUN",6},
{"JUNE",6},
{""}, {""}, {""},
{"Sep",9},
{""}, {""}, {""}, {""},
{"MAR",3},
{"September",9},
{"MARCH",3},
{""}, {""},
{"APR",4},
{""},
{"APRIL",4},
{""}, {""},
{"SEP",9},
{""}, {""}, {""}, {""},
{"Dec",12},
{"SEPTEMBER",9},
{""}, {""}, {""},
{"December",12},
{""}, {""}, {""}, {""},
{"DEC",12},
{""}, {""}, {""}, {""},
{"DECEMBER",12},
{""}, {""}, {""}, {""},
{"OCT",10},
{""}, {""}, {""},
{"OCTOBER",10},
{"JUL",7},
{"JULY",7},
{""}, {""}, {""},
{"AUG",8},
{""}, {""},
{"AUGUST",8},
{""},
{"Feb",2},
{""}, {""}, {""}, {""},
{"February",2},
{""}, {""}, {""}, {""},
{"FEB",2},
{""}, {""}, {""}, {""},
{"FEBRUARY",2}
};
if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
{
register unsigned int key = hash (str, len);
if (key <= MAX_HASH_VALUE)
{
register const char *s = wordlist[key].name;
if (*str == *s && !strcmp (str + 1, s + 1))
return &wordlist[key];
}
}
return 0;
}
我想这相当快,每个字符串只需要一个
strcmp
。这正是 GCC 中用于关键字检查的内容。
gperf
的非常好的介绍这里.