批量提取引号之间的数字,相乘并写回

问题描述 投票:0回答:2

我有一个文本文件,我使用以下批处理代码提取所需的部分:

@echo off > newfile & setLocal enableDELAYedeXpansioN

set H=
set T=
for /f "tokens=1* delims=[]" %%a in ('find /n "<face>" ^< C:\XML\INTERM\image-0000001.txt') do (
set H=%%a
)
for /f "tokens=1* delims=[]" %%a in ('find /n "</face>" ^< C:\XML\INTERM\image-0000001.txt') do (
set T=%%a
)
for /f "tokens=1* delims=[]" %%a in ('find /n /v "" ^< C:\XML\INTERM\image-0000001.txt') do (
if %%a gtr !H! if %%a lss !T! echo.%%b
)>> c:\XML\ADD\image-0000001.txt
goto :eof
 

我的结果文本文件是:

<attribute name="personName">Test</attribute>
<attribute name="face-type">human</attribute>
<geometry>
<LinearRing ptCount="36">
<Point x="968.13" y="762.91"/>
<Point x="940.54" y="764.38"/>
<Point x="921.17" y="755.09"/>
<Point x="905.96" y="742.91"/>
<Point x="909" y="705.92"/>
<Point x="918.23" y="660.54"/>
<Point x="926.39" y="629"/>
<Point x="875.92" y="638.23"/>
<Point x="869.77" y="664.38"/>
<Point x="862.08" y="689.77"/>
<Point x="847.46" y="701.31"/>
<Point x="829.77" y="702.85"/>
<Point x="840.3" y="655.52"/>
<Point x="849" y="607.7"/>
<Point x="854.22" y="560.3"/>
<Point x="886.39" y="520.74"/>
<Point x="922.48" y="501.17"/>
<Point x="966.83" y="476.39"/>
<Point x="1001.61" y="445.96"/>
<Point x="1027.26" y="412.04"/>
<Point x="1038.23" y="393.62"/>
<Point x="1051.31" y="417.46"/>
<Point x="1062.08" y="445.92"/>
<Point x="1067.46" y="472.08"/>
<Point x="1075.15" y="505.15"/>
<Point x="1078.23" y="536.69"/>
<Point x="1073.62" y="562.85"/>
<Point x="1062.85" y="592.85"/>
<Point x="1039.87" y="622.91"/>
<Point x="1017.7" y="640.3"/>
<Point x="1003.35" y="658.13"/>
<Point x="1005.09" y="675.09"/>
<Point x="1003.35" y="691.61"/>
<Point x="999.87" y="712.04"/>
<Point x="992.91" y="728.13"/>
<Point x="982.04" y="742.48"/>
</LinearRing>
</geometry>
<eye>
<geometry>
<Rectangle x="995.9565217391305" y="661.6086956521739" width="5.217391304347757" height="12.608695652173992"/>
</geometry>
<eye-bounds>
<attribute name="occlusion">hidden</attribute>
<attribute name="open-closed-state">half-open</attribute>
<geometry>
<Rectangle x="973.3478260869565" y="659.4347826086956" width="29.565217391304373" height="21.739130434782624"/>
</geometry>
</eye-bounds>
</eye>

我需要从

x
y
width
height
(不适用于
LinearRing ptCount="36"
)中提取双引号之间的每个数字,将每个数字与
2
相乘并一一写回。我必须提到
Point
标签的数量(在本例中为 36 个)可能会有所不同。我的操作系统是 Windows 10。

xml batch-file
2个回答
2
投票

这是一个依赖于JREPL.BAT的“简单”且快速的解决方案。它完全取代了您的整个脚本。

@echo off
setlocal
set "input=C:\XML\INTERM\image-0000001.txt"
set "output=c:\XML\ADD\image-0000001.txt"

jrepl "<face>([\s\S]*)</face>" $1 /m /jmatch /f "%input%" | ^
jrepl "((?:x|y|width|height)=\q)([^\q]*)(\q)" "$txt=$1+(Number($2)*2)+$3" /x /jq /o "%output%"

但实际上您应该使用专门用于处理 XML 文件的工具。


1
投票

这是一个纯粹的解决方案,可以完成您想要的任务,但速度相当慢。

以下限制适用:

  • XML 文件被视为“普通”文本文件;
  • 文件必须是 ANSI 编码的;
  • 文件必须是Windows文本文件(行尾标记为回车加换行);
  • 空行会丢失;
  • 行的长度不得超过 8190 个字符/字节;
  • 感兴趣的标签(
    Point
    Rectangle
    )必须各占一行;
  • 感兴趣的标签必须类似于
    <Point/>
    ;不允许使用类似
    <Point>
    </Point>
    的内容;
  • 标签名称以及属性名称(
    x
    y
    width
    height
    )必须在大小写方面完全匹配;
  • 属性值必须是(有符号)浮点数(不允许指数格式);
  • 浮点数四舍五入为不超过八位小数;
  • 浮点数的绝对值必须小于 108
  • 与浮点数相乘的整数因子不得超过值
    21
  • 返回结果(相乘)浮点数,并带有八个小数位;

所以这是代码:

@echo off
setlocal EnableExtensions DisableDelayedExpansion

rem // Define constants here:
set "_FILE=%~1"         & rem // (use file provided as first command line argument)
set "_FACTOR=%~2"       & rem // (use factor provided as second command line argument)
set "_TAG[1]=Point"     & rem // (name of XML tag containing numeric values)
set "_TAG[2]=Rectangle" & rem // (name of XML tag containing numeric values)
rem "_TAG[...]=..."
set "_ATT[1]=x"         & rem // (name of XML attribute holding a numeric value)
set "_ATT[2]=y"         & rem // (name of XML attribute holding a numeric value)
set "_ATT[3]=width"     & rem // (name of XML attribute holding a numeric value)
set "_ATT[4]=height"    & rem // (name of XML attribute holding a numeric value)
rem "_ATT[...]=..."

rem // Main routine: check tag names and tag format:
for /F "usebackq delims=" %%K in ("%_FILE%") do (
    set "LINE=%%K"
    set "FLAG="
    for /F "tokens=2,* delims=[]=" %%I in ('2^> nul set _TAG[') do (
        if not defined FLAG (
            setlocal EnableDelayedExpansion
            (cmd /V /C echo(^^!LINE^^!| > nul findstr /R ^
                /C:"^ *<%%J  *[^</>^&(|)][^</>^&(|)]*/> *$"
            ) && (
                for /F "tokens=1,* delims=< " %%L in ("!LINE!") do (
                    endlocal
                    set "COLL=%%L"
                    for /F "delims=/>" %%N in ("%%M") do (
                        call :ATTRIBUTE COLL "%%N" && set "FLAG=#"
                    )
                    setlocal EnableDelayedExpansion
                )
            )
            endlocal
        )
    )
    setlocal EnableDelayedExpansion
    if defined FLAG (
        echo(^<!COLL!/^>
    ) else (
        echo(!LINE!
    )
    endlocal
)

endlocal
exit /B


:ATTRIBUTE  rtn_line_string  val_attribute
    rem // Sub-routine: check attribute names and format:
    setlocal DisableDelayedExpansion
    set "STR=%~2"
    setlocal EnableDelayedExpansion
    :LOOP
    for /F "tokens=1,* delims= " %%A in ("!STR!") do (
        endlocal
        set "NEW=%%A"
        set "STR=%%B"
        set "FLG="
        setlocal EnableDelayedExpansion
        for /F "tokens=1,* delims==" %%E in ('2^> nul set _ATT[') do (
            if not defined FLG (
                (cmd /V /C echo(^^!NEW^^!| > nul findstr /R ^
                    /C:"^%%F=\"[0-9\.+-][0-9\.]*\"$"
                ) && (
                    for /F "tokens=2 delims==" %%C in ("!NEW!") do (
                        call :MULTIPLY PRO %%~C %_FACTOR% && (
                            for /F "delims=" %%D in ("!%~1! %%F="!PRO!"") do (
                                endlocal
                                set "%~1=%%D"
                                set "FLG=#"
                                setlocal EnableDelayedExpansion
                            )
                        )
                    )
                )
            )
        )
        if not defined FLG (
            endlocal
            endlocal
            set "%~1="
            exit /B 1
        )
    )
    if defined STR goto :LOOP
    for /F "delims=" %%D in (^""!%~1!"^") do (
        endlocal
        endlocal
        set "%~1=%%~D"
    )
    exit /B 0


:MULTIPLY  rtn_product  val_float_num  val_integer
    rem // Sub-routine: multiply of floating-point number by integer:
    setlocal DisableDelayedExpansion
    set "MUL=%~3" & if not defined MUL set /A "MUL=1"
    set /A "MUL+=0" & set "NEG="
    if %MUL% LSS 0 set /A "MUL=-MUL" & set "NEG=-"
    set "NUM=%~2" & if not defined NUM set "NUM=0"
    if "%NUM:~,1%"=="-" if defined NEG (set "NEG=") else (set "NEG=-")
    for /F "tokens=1,2 delims=." %%X in ("%NUM%") do (
        set "INT=%%X" & set "FRA=%%Y"
    )
    for /F "tokens=* delims=+-0" %%Z in ("%INT%") do set "INT=%%Z"
    if not defined INT set "INT=0"
    if not "%INT:~,-8%"=="" goto :OVERFLOW
    set "FRA=1%FRA%00000000"
    if %FRA:~8,1% GEQ 5 (set /A "RND=1") else (set /A "RND=0")
    set /A "FRA=%FRA:~,9%%%100000000+RND"
    set /A "RSI=INT*MUL, RSF=FRA*MUL"
    if %RSF% LSS 0 goto :OVERFLOW
    if %RSF% NEQ 0 if %RSF% LSS %FRA% goto :OVERFLOW
    2> nul set /A "RSI+=%RSF:~,-8%" & set "RSF=00000000%RSF%"
    if %RSI% LSS 0 goto :OVERFLOW
    if %RSI% NEQ 0 if %RSI% LSS %INT% goto :OVERFLOW
    (
        endlocal
        set "%~1=%NEG%%RSI%.%RSF:~-8%"
    )
    exit /B 0
    :OVERFLOW
    endlocal
    set "%~1="
    exit /B 1

要使用此脚本(让我们称之为

xml-multiply.bat
),请提供 XML 文件 (
result.xml
) 作为第一个命令行参数,以及将属性值与 (
2
将其加倍) 相乘的因子,如下所示第二个,像这样:

xml-multiply.bat "result.xml" 2

要将输出写入文件 (

result_NEW.xml
),请使用以下命令:

xml-multiply.bat "result.xml" 2 > "result_NEW.xml"

要替换原始 XML 文件,请执行以下操作:

xml-multiply.bat "result.xml" 2 > "result_NEW.xml"
move /Y "result_NEW.xml" "result.xml"
© www.soinside.com 2019 - 2024. All rights reserved.