我创建了以下代码来从用户提供的字符串中提取数字信息,该字符串指定建筑物中的楼层或楼层。目标是从输入中准确提取数值。但是,当前的实现无法正确处理带连字符的数字。例如,“二十三”被错误地解析为 20 而不是 23。
function extractLevelFromString(input) {
// Normalize the input string
const normalizedInput = input.toLowerCase();
const wordToNumberMap = {
"one": 1, "first": 1,
"two": 2, "second": 2,
"three": 3, "third": 3,
"four": 4, "fourth": 4,
"five": 5, "fifth": 5,
"six": 6, "sixth": 6,
"seven": 7, "seventh": 7,
"eight": 8, "eighth": 8,
"nine": 9, "ninth": 9,
"ten": 10, "tenth": 10,
"eleven": 11, "eleventh": 11,
"twelve": 12, "twelfth": 12,
"thirteen": 13, "thirteenth": 13,
"fourteen": 14, "fourteenth": 14,
"fifteen": 15, "fifteenth": 15,
"sixteen": 16, "sixteenth": 16,
"seventeen": 17, "seventeenth": 17,
"eighteen": 18, "eighteenth": 18,
"nineteen": 19, "nineteenth": 19,
"twenty": 20, "twentieth": 20,
"twenty-one": 21, "twenty-first": 21,
"twenty-two": 22, "twenty-second": 22,
"twenty-three": 23, "twenty-third": 23,
"twenty-four": 24, "twenty-fourth": 24,
"twenty-five": 25, "twenty-fifth": 25,
"twenty-six": 26, "twenty-sixth": 26,
"twenty-seven": 27, "twenty-seventh": 27,
"twenty-eight": 28, "twenty-eighth": 28,
"twenty-nine": 29, "twenty-ninth": 29,
"thirty": 30, "thirtieth": 30,
"thirty-one": 31, "thirty-first": 31,
"thirty-two": 32, "thirty-second": 32,
"thirty-three": 33, "thirty-third": 33,
"thirty-four": 34, "thirty-fourth": 34,
"thirty-five": 35, "thirty-fifth": 35,
"thirty-six": 36, "thirty-sixth": 36,
"thirty-seven": 37, "thirty-seventh": 37,
"thirty-eight": 38, "thirty-eighth": 38,
"thirty-nine": 39, "thirty-ninth": 39,
"forty": 40, "fortieth": 40,
"forty-one": 41, "forty-first": 41,
"forty-two": 42, "forty-second": 42,
"forty-three": 43, "forty-third": 43,
"forty-four": 44, "forty-fourth": 44,
"forty-five": 45, "forty-fifth": 45,
"forty-six": 46, "forty-sixth": 46,
"forty-seven": 47, "forty-seventh": 47,
"forty-eight": 48, "forty-eighth": 48,
"forty-nine": 49, "forty-ninth": 49,
"fifty": 50, "fiftieth": 50
};
const levelRegex = /\b(level|floor|on|at)?\s*(\d+|one|two|three|four|five|six|seven|eight|nine|ten|eleven|twelve|thirteen|fourteen|fifteen|sixteen|seventeen|eighteen|nineteen|twenty|twenty-one|twenty-two|twenty-three|twenty-four|twenty-five|twenty-six|twenty-seven|twenty-eight|twenty-nine|thirty|thirty-one|thirty-two|thirty-three|thirty-four|thirty-five|thirty-six|thirty-seven|thirty-eight|thirty-nine|forty|forty-one|forty-two|forty-three|forty-four|forty-five|forty-six|forty-seven|forty-eight|forty-nine|fifty|first|second|third|fourth|fifth|sixth|seventh|eighth|ninth|tenth|eleventh|twelfth|thirteenth|fourteenth|fifteenth|sixteenth|seventeenth|eighteenth|nineteenth|twentieth|twenty-first|twenty-second|twenty-third|twenty-fourth|twenty-fifth|twenty-sixth|twenty-seventh|twenty-eighth|twenty-ninth|thirtieth|thirty-first|thirty-second|thirty-third|thirty-fourth|thirty-fifth|thirty-sixth|thirty-seventh|thirty-eighth|thirty-ninth|fortieth|forty-first|forty-second|forty-third|forty-fourth|forty-fifth|forty-sixth|forty-seventh|forty-eighth|forty-ninth|fiftieth)(?:st|nd|rd|th)?\b/gi;
const matches = normalizedInput.matchAll(levelRegex);
// Process matches
for (const match of matches) {
const levelCandidate = match[2]; // Get the potential level part
// If numeric, return directly
if (!isNaN(levelCandidate)) {
return parseInt(levelCandidate, 10);
}
// If word-based, map to a number
if (wordToNumberMap[levelCandidate]) {
return wordToNumberMap[levelCandidate];
}
}
// Return null if no level found
return null;
}
我使用正则表达式模式匹配尝试了此操作,并期望从输入字符串中解析数字。
当字符串为
"twenty"
或 "thirty"
时,正则表达式会贪婪地匹配 "twenty-three"
和 "thirty-third"
等字符串值。
更新正则表达式以包含负向前瞻,以便仅在
不立即后跟连字符时匹配"twenty"
和 "thirty"
等值。
示例:
const levelRegex =
/\b(level|floor|on|at)?\s*(\d+|one|two|three|four|five|six|seven|eight|nine|ten|eleven|twelve|thirteen|fourteen|fifteen|sixteen|seventeen|eighteen|nineteen|(twenty|thirty|forty|fifty)(?!-)|(twenty|thirty|forty|fifty)-(one|first|two|second|three|third|four|fourth|five|fifth|six|sixth|seven|seventh|eight|eigth|nine|ninth)|first|second|third|fourth|fifth|sixth|seventh|eighth|ninth|tenth|eleventh|twelfth|thirteenth|fourteenth|fifteenth|sixteenth|seventeenth|eighteenth|nineteenth|twentieth|thirtieth|fortieth|fiftieth)(?:st|nd|rd|th)?\b/gi;
这里我还对一些值进行了分组以减少重复的子字符串。
function extractLevelFromString(input) {
// Normalize the input string
const normalizedInput = input.toLowerCase();
const wordToNumberMap = {
one: 1,
first: 1,
two: 2,
second: 2,
three: 3,
third: 3,
four: 4,
fourth: 4,
five: 5,
fifth: 5,
six: 6,
sixth: 6,
seven: 7,
seventh: 7,
eight: 8,
eighth: 8,
nine: 9,
ninth: 9,
ten: 10,
tenth: 10,
eleven: 11,
eleventh: 11,
twelve: 12,
twelfth: 12,
thirteen: 13,
thirteenth: 13,
fourteen: 14,
fourteenth: 14,
fifteen: 15,
fifteenth: 15,
sixteen: 16,
sixteenth: 16,
seventeen: 17,
seventeenth: 17,
eighteen: 18,
eighteenth: 18,
nineteen: 19,
nineteenth: 19,
twenty: 20,
twentieth: 20,
"twenty-one": 21,
"twenty-first": 21,
"twenty-two": 22,
"twenty-second": 22,
"twenty-three": 23,
"twenty-third": 23,
"twenty-four": 24,
"twenty-fourth": 24,
"twenty-five": 25,
"twenty-fifth": 25,
"twenty-six": 26,
"twenty-sixth": 26,
"twenty-seven": 27,
"twenty-seventh": 27,
"twenty-eight": 28,
"twenty-eighth": 28,
"twenty-nine": 29,
"twenty-ninth": 29,
thirty: 30,
thirtieth: 30,
"thirty-one": 31,
"thirty-first": 31,
"thirty-two": 32,
"thirty-second": 32,
"thirty-three": 33,
"thirty-third": 33,
"thirty-four": 34,
"thirty-fourth": 34,
"thirty-five": 35,
"thirty-fifth": 35,
"thirty-six": 36,
"thirty-sixth": 36,
"thirty-seven": 37,
"thirty-seventh": 37,
"thirty-eight": 38,
"thirty-eighth": 38,
"thirty-nine": 39,
"thirty-ninth": 39,
forty: 40,
fortieth: 40,
"forty-one": 41,
"forty-first": 41,
"forty-two": 42,
"forty-second": 42,
"forty-three": 43,
"forty-third": 43,
"forty-four": 44,
"forty-fourth": 44,
"forty-five": 45,
"forty-fifth": 45,
"forty-six": 46,
"forty-sixth": 46,
"forty-seven": 47,
"forty-seventh": 47,
"forty-eight": 48,
"forty-eighth": 48,
"forty-nine": 49,
"forty-ninth": 49,
fifty: 50,
fiftieth: 50,
};
const levelRegex =
/\b(level|floor|on|at)?\s*(\d+|one|two|three|four|five|six|seven|eight|nine|ten|eleven|twelve|thirteen|fourteen|fifteen|sixteen|seventeen|eighteen|nineteen|(twenty|thirty|forty|fifty)(?!-)|(twenty|thirty|forty|fifty)-(one|first|two|second|three|third|four|fourth|five|fifth|six|sixth|seven|seventh|eight|eigth|nine|ninth)|first|second|third|fourth|fifth|sixth|seventh|eighth|ninth|tenth|eleventh|twelfth|thirteenth|fourteenth|fifteenth|sixteenth|seventeenth|eighteenth|nineteenth|twentieth|thirtieth|fortieth|fiftieth)(?:st|nd|rd|th)?\b/gi;
const matches = normalizedInput.matchAll(levelRegex);
// Process matches
for (const match of matches) {
const levelCandidate = match[2]; // Get the potential level part
// If numeric, return directly
if (!isNaN(levelCandidate)) {
return parseInt(levelCandidate, 10);
}
// If word-based, map to a number
if (wordToNumberMap[levelCandidate]) {
return wordToNumberMap[levelCandidate];
}
}
// Return null if no level found
return null;
}
console.log(extractLevelFromString("twenty-third"));