Apache Nifi:Puthdfs 处理器 - 复制到 0 个节点,而不是 minReplication (= 1)。有 1 个数据节点正在运行,1 个节点被排除

问题描述 投票:0回答:1

我正在使用 Apache NIFI

1.28
版本,我正在尝试创建一个简约的数据流,在其中生成数据并希望在 HDP(Hortonworks 数据平台)2.5.0 中摄取 HDFS,我收到以下错误,

2024-10-31 15:31:07,952 ERROR [Timer-Driven Process Thread-1] o.apache.nifi.processors.hadoop.PutHDFS PutHDFS[id=d2b7ad77-0192-1000-e937-9d1797c4e0fd] Failed to write to HDFS
org.apache.nifi.processor.exception.ProcessException: IOException thrown from PutHDFS[id=d2b7ad77-0192-1000-e937-9d1797c4e0fd]: org.apache.hadoop.ipc.RemoteException(java.io.IOException): File /pakistan/.db7bd26c-6ccc-4dcf-88f0-fc3965286fc3 could only be replicated to 0 nodes instead of minReplication (=1).  There are 1 datanode(s) running and 1 node(s) are excluded in this operation.
    at org.apache.hadoop.hdfs.server.blockmanagement.BlockManager.chooseTarget4NewBlock(BlockManager.java:1641)
    at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.getNewBlockTargets(FSNamesystem.java:3198)
    at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.getAdditionalBlock(FSNamesystem.java:3122)
    at org.apache.hadoop.hdfs.server.namenode.NameNodeRpcServer.addBlock(NameNodeRpcServer.java:843)
    at org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolServerSideTranslatorPB.addBlock(ClientNamenodeProtocolServerSideTranslatorPB.java:500)
    at org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos$ClientNamenodeProtocol$2.callBlockingMethod(ClientNamenodeProtocolProtos.java)
    at org.apache.hadoop.ipc.ProtobufRpcEngine$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine.java:640)
    at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:982)
    at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:2313)
    at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:2309)
    at java.security.AccessController.doPrivileged(Native Method)
    at javax.security.auth.Subject.doAs(Subject.java:422)
    at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1724)
    at org.apache.hadoop.ipc.Server$Handler.run(Server.java:2307)

    at org.apache.nifi.controller.repository.StandardProcessSession.read(StandardProcessSession.java:2711)
    at org.apache.nifi.controller.repository.StandardProcessSession.read(StandardProcessSession.java:2656)
    at org.apache.nifi.processors.hadoop.PutHDFS$1.run(PutHDFS.java:405)
    at java.base/java.security.AccessController.doPrivileged(AccessController.java:400)
    at java.base/javax.security.auth.Subject.doAs(Subject.java:453)
    at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1930)
    at org.apache.nifi.processors.hadoop.PutHDFS.onTrigger(PutHDFS.java:328)
    at org.apache.nifi.processor.AbstractProcessor.onTrigger(AbstractProcessor.java:27)
    at org.apache.nifi.controller.StandardProcessorNode.onTrigger(StandardProcessorNode.java:1361)
    at org.apache.nifi.controller.tasks.ConnectableTask.invoke(ConnectableTask.java:247)
    at org.apache.nifi.controller.scheduling.AbstractTimeBasedSchedulingAgent.lambda$doScheduleOnce$0(AbstractTimeBasedSchedulingAgent.java:59)
    at org.apache.nifi.engine.FlowEngine$2.run(FlowEngine.java:110)
    at java.base/java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:572)
    at java.base/java.util.concurrent.FutureTask.run(FutureTask.java:317)
    at java.base/java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.run(ScheduledThreadPoolExecutor.java:304)
    at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1144)
    at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:642)
    at java.base/java.lang.Thread.run(Thread.java:1570)
Caused by: org.apache.hadoop.ipc.RemoteException: File /pakistan/.db7bd26c-6ccc-4dcf-88f0-fc3965286fc3 could only be replicated to 0 nodes instead of minReplication (=1).  There are 1 datanode(s) running and 1 node(s) are excluded in this operation.
    at org.apache.hadoop.hdfs.server.blockmanagement.BlockManager.chooseTarget4NewBlock(BlockManager.java:1641)
    at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.getNewBlockTargets(FSNamesystem.java:3198)
    at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.getAdditionalBlock(FSNamesystem.java:3122)
    at org.apache.hadoop.hdfs.server.namenode.NameNodeRpcServer.addBlock(NameNodeRpcServer.java:843)
    at org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolServerSideTranslatorPB.addBlock(ClientNamenodeProtocolServerSideTranslatorPB.java:500)
    at org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos$ClientNamenodeProtocol$2.callBlockingMethod(ClientNamenodeProtocolProtos.java)
    at org.apache.hadoop.ipc.ProtobufRpcEngine$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine.java:640)
    at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:982)
    at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:2313)
    at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:2309)
    at java.security.AccessController.doPrivileged(Native Method)
    at javax.security.auth.Subject.doAs(Subject.java:422)
    at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1724)
    at org.apache.hadoop.ipc.Server$Handler.run(Server.java:2307)

    at org.apache.hadoop.ipc.Client.getRpcResponse(Client.java:1584)
    at org.apache.hadoop.ipc.Client.call(Client.java:1529)
    at org.apache.hadoop.ipc.Client.call(Client.java:1426)
    at org.apache.hadoop.ipc.ProtobufRpcEngine2$Invoker.invoke(ProtobufRpcEngine2.java:258)
    at org.apache.hadoop.ipc.ProtobufRpcEngine2$Invoker.invoke(ProtobufRpcEngine2.java:139)
    at jdk.proxy7/jdk.proxy7.$Proxy171.addBlock(Unknown Source)
    at org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolTranslatorPB.lambda$addBlock$11(ClientNamenodeProtocolTranslatorPB.java:500)
    at org.apache.hadoop.ipc.internal.ShadedProtobufHelper.ipc(ShadedProtobufHelper.java:160)
    at org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolTranslatorPB.addBlock(ClientNamenodeProtocolTranslatorPB.java:500)
    at java.base/jdk.internal.reflect.DirectMethodHandleAccessor.invoke(DirectMethodHandleAccessor.java:103)
    at java.base/java.lang.reflect.Method.invoke(Method.java:580)
    at org.apache.hadoop.io.retry.RetryInvocationHandler.invokeMethod(RetryInvocationHandler.java:437)
    at org.apache.hadoop.io.retry.RetryInvocationHandler$Call.invokeMethod(RetryInvocationHandler.java:170)
    at org.apache.hadoop.io.retry.RetryInvocationHandler$Call.invoke(RetryInvocationHandler.java:162)
    at org.apache.hadoop.io.retry.RetryInvocationHandler$Call.invokeOnce(RetryInvocationHandler.java:100)
    at org.apache.hadoop.io.retry.RetryInvocationHandler.invoke(RetryInvocationHandler.java:366)
    at jdk.proxy7/jdk.proxy7.$Proxy172.addBlock(Unknown Source)
    at org.apache.hadoop.hdfs.DFSOutputStream.addBlock(DFSOutputStream.java:1143)
    at org.apache.hadoop.hdfs.DataStreamer.locateFollowingBlock(DataStreamer.java:2009)
    at org.apache.hadoop.hdfs.DataStreamer.nextBlockOutputStream(DataStreamer.java:1811)
    at org.apache.hadoop.hdfs.DataStreamer.run(DataStreamer.java:751)
2024-10-31 15:31:07,958 INFO [Timer-Driven Process Thread-1] o.a.n.c.s.StandardProcessScheduler Stopping PutHDFS[id=d2b7ad77-0192-1000-e937-9d1797c4e0fd]
2024-10-31 15:31:07,958 INFO [Timer-Driven Process Thread-1] o.a.n.controller.StandardProcessorNode Stopping processor: PutHDFS[id=d2b7ad77-0192-1000-e937-9d1797c4e0fd]
2024-10-31 15:31:07,959 INFO [Timer-Driven Process Thread-1] o.a.n.c.s.TimerDrivenSchedulingAgent Stopped scheduling PutHDFS[id=d2b7ad77-0192-1000-e937-9d1797c4e0fd] to run
2024-10-31 15:31:07,960 WARN [org.apache.hadoop.fs.FileSystem$Statistics$StatisticsDataReferenceCleaner] org.apache.hadoop.fs.FileSystem Cleaner thread interrupted, will stop
java.lang.InterruptedException: null

下面是画布上出现错误的处理器组的图像

processor group Plus Error

下面是

puthdfs processor
配置图。

puthdfs Processor Configurations

我还附上了

core-site.xml

  <configuration>
    
    <property>
      <name>fs.defaultFS</name>
      <value>hdfs://192.168.136.131:8020</value>
      <final>true</final>
    </property>
    
    <property>
      <name>fs.trash.interval</name>
      <value>360</value>
    </property>
    
    <property>
      <name>ha.failover-controller.active-standby-elector.zk.op.retries</name>
      <value>120</value>
    </property>
    
    <property>
      <name>hadoop.http.authentication.simple.anonymous.allowed</name>
      <value>true</value>
    </property>
    
    <property>
      <name>hadoop.proxyuser.falcon.groups</name>
      <value>*</value>
    </property>
    
    <property>
      <name>hadoop.proxyuser.falcon.hosts</name>
      <value>*</value>
    </property>
    
    <property>
      <name>hadoop.proxyuser.hbase.groups</name>
      <value>*</value>
    </property>
    
    <property>
      <name>hadoop.proxyuser.hbase.hosts</name>
      <value>*</value>
    </property>
    
    <property>
      <name>hadoop.proxyuser.hcat.groups</name>
      <value>*</value>
    </property>
    
    <property>
      <name>hadoop.proxyuser.hcat.hosts</name>
      <value>sandbox.hortonworks.com</value>
    </property>
    
    <property>
      <name>hadoop.proxyuser.hdfs.groups</name>
      <value>*</value>
    </property>
    
    <property>
      <name>hadoop.proxyuser.hdfs.hosts</name>
      <value>*</value>
    </property>
    
    <property>
      <name>hadoop.proxyuser.hive.groups</name>
      <value>*</value>
    </property>
    
    <property>
      <name>hadoop.proxyuser.hive.hosts</name>
      <value>sandbox.hortonworks.com</value>
    </property>
    
    <property>
      <name>hadoop.proxyuser.hue.groups</name>
      <value>*</value>
    </property>
    
    <property>
      <name>hadoop.proxyuser.hue.hosts</name>
      <value>*</value>
    </property>
    
    <property>
      <name>hadoop.proxyuser.livy.groups</name>
      <value>*</value>
    </property>
    
    <property>
      <name>hadoop.proxyuser.livy.hosts</name>
      <value>*</value>
    </property>
    
    <property>
      <name>hadoop.proxyuser.oozie.groups</name>
      <value>*</value>
    </property>
    
    <property>
      <name>hadoop.proxyuser.oozie.hosts</name>
      <value>sandbox.hortonworks.com</value>
    </property>
    
    <property>
      <name>hadoop.proxyuser.root.groups</name>
      <value>*</value>
    </property>
    
    <property>
      <name>hadoop.proxyuser.root.hosts</name>
      <value>sandbox.hortonworks.com</value>
    </property>
    
    <property>
      <name>hadoop.security.auth_to_local</name>
      <value>DEFAULT</value>
    </property>
    
    <property>
      <name>hadoop.security.authentication</name>
      <value>simple</value>
    </property>
    
    <property>
      <name>hadoop.security.authorization</name>
      <value>false</value>
    </property>
    
    <property>
      <name>hadoop.security.key.provider.path</name>
      <value></value>
    </property>
    
    <property>
      <name>io.compression.codec.lzo.class</name>
      <value>com.hadoop.compression.lzo.LzoCodec</value>
    </property>
    
    <property>
      <name>io.file.buffer.size</name>
      <value>131072</value>
    </property>
    
    <property>
      <name>io.serializations</name>
      <value>org.apache.hadoop.io.serializer.WritableSerialization</value>
    </property>
    
    <property>
      <name>ipc.client.connect.max.retries</name>
      <value>50</value>
    </property>
    
    <property>
      <name>ipc.client.connection.maxidletime</name>
      <value>30000</value>
    </property>
    
    <property>
      <name>ipc.client.idlethreshold</name>
      <value>8000</value>
    </property>
    
    <property>
      <name>ipc.server.tcpnodelay</name>
      <value>true</value>
    </property>
    
    <property>
      <name>mapreduce.jobtracker.webinterface.trusted</name>
      <value>false</value>
    </property>
    
    <property>
      <name>net.topology.script.file.name</name>
      <value>/etc/hadoop/conf/topology_script.py</value>
    </property>
        <property>
      <name>hadoop.proxyuser.nifi.groups</name>
      <value>*</value>
    </property>
    <property>
      <name>hadoop.proxyuser.nifi.hosts</name>
      <value>*</value>
    </property>
    
  </configuration>

以下是

hdfs-site.xml
配置:

  <configuration>
    
    <property>
      <name>dfs.block.access.token.enable</name>
      <value>true</value>
    </property>
    
    <property>
      <name>dfs.blockreport.initialDelay</name>
      <value>120</value>
    </property>
    
    <property>
      <name>dfs.blocksize</name>
      <value>134217728</value>
    </property>
    
    <property>
      <name>dfs.client.read.shortcircuit</name>
      <value>true</value>
    </property>
    
    <property>
      <name>dfs.client.read.shortcircuit.streams.cache.size</name>
      <value>4096</value>
    </property>
    
    <property>
      <name>dfs.client.retry.policy.enabled</name>
      <value>false</value>
    </property>
    
    <property>
      <name>dfs.cluster.administrators</name>
      <value> hdfs</value>
    </property>
    
    <property>
      <name>dfs.content-summary.limit</name>
      <value>5000</value>
    </property>
    
    <property>
      <name>dfs.datanode.address</name>
      <value>192.168.136.131:50075</value>
    </property>
    
    <property>
      <name>dfs.datanode.balance.bandwidthPerSec</name>
      <value>6250000</value>
    </property>
    
    <property>
      <name>dfs.datanode.data.dir</name>
      <value>/hadoop/hdfs/data</value>
      <final>true</final>
    </property>
    
    <property>
      <name>dfs.datanode.data.dir.perm</name>
      <value>750</value>
    </property>
    
    <property>
      <name>dfs.datanode.du.reserved</name>
      <value>1073741824</value>
    </property>
    
    <property>
      <name>dfs.datanode.failed.volumes.tolerated</name>
      <value>0</value>
      <final>true</final>
    </property>
    
    <property>
      <name>dfs.datanode.http.address</name>
      <value>192.168.136.131:50075</value>
    </property>
    
    <property>
      <name>dfs.datanode.https.address</name>
      <value>192.168.136.131:50075</value>
    </property>
    
    <property>
      <name>dfs.datanode.ipc.address</name>
      <value>0.0.0.0:8010</value>
    </property>
    
    <property>
      <name>dfs.datanode.max.transfer.threads</name>
      <value>16384</value>
    </property>
    
    <property>
      <name>dfs.domain.socket.path</name>
      <value>/var/lib/hadoop-hdfs/dn_socket</value>
    </property>
    
    <property>
      <name>dfs.encrypt.data.transfer.cipher.suites</name>
      <value>AES/CTR/NoPadding</value>
    </property>
    
    <property>
      <name>dfs.encryption.key.provider.uri</name>
      <value></value>
    </property>
    
    <property>
      <name>dfs.heartbeat.interval</name>
      <value>3</value>
    </property>
    
    <property>
      <name>dfs.hosts.exclude</name>
      <value>/etc/hadoop/conf/dfs.exclude</value>
    </property>
    
    <property>
      <name>dfs.http.policy</name>
      <value>HTTP_ONLY</value>
    </property>
    
    <property>
      <name>dfs.https.port</name>
      <value>50070</value>
    </property>
    
    <property>
      <name>dfs.journalnode.edits.dir</name>
      <value>/hadoop/hdfs/journalnode</value>
    </property>
    
    <property>
      <name>dfs.journalnode.http-address</name>
      <value>0.0.0.0:8480</value>
    </property>
    
    <property>
      <name>dfs.journalnode.https-address</name>
      <value>0.0.0.0:8481</value>
    </property>
    
    <property>
      <name>dfs.namenode.accesstime.precision</name>
      <value>0</value>
    </property>
    
    <property>
      <name>dfs.namenode.audit.log.async</name>
      <value>true</value>
    </property>
    
    <property>
      <name>dfs.namenode.avoid.read.stale.datanode</name>
      <value>true</value>
    </property>
    
    <property>
      <name>dfs.namenode.avoid.write.stale.datanode</name>
      <value>true</value>
    </property>
    
    <property>
      <name>dfs.namenode.checkpoint.dir</name>
      <value>/hadoop/hdfs/namesecondary</value>
    </property>
    
    <property>
      <name>dfs.namenode.checkpoint.edits.dir</name>
      <value>${dfs.namenode.checkpoint.dir}</value>
    </property>
    
    <property>
      <name>dfs.namenode.checkpoint.period</name>
      <value>21600</value>
    </property>
    
    <property>
      <name>dfs.namenode.checkpoint.txns</name>
      <value>1000000</value>
    </property>
    
    <property>
      <name>dfs.namenode.fslock.fair</name>
      <value>false</value>
    </property>
    
    <property>
      <name>dfs.namenode.handler.count</name>
      <value>25</value>
    </property>
    
    <property>
      <name>dfs.namenode.http-address</name>
      <value>192.168.136.131:50070</value>
      <final>true</final>
    </property>
    
    <property>
      <name>dfs.namenode.https-address</name>
      <value>192.168.136.131:50470</value>
    </property>
    
    <property>
      <name>dfs.namenode.inode.attributes.provider.class</name>
      <value>org.apache.ranger.authorization.hadoop.RangerHdfsAuthorizer</value>
    </property>
    
    <property>
      <name>dfs.namenode.name.dir</name>
      <value>/hadoop/hdfs/namenode</value>
      <final>true</final>
    </property>
    
    <property>
      <name>dfs.namenode.name.dir.restore</name>
      <value>true</value>
    </property>
    
    <property>
      <name>dfs.namenode.rpc-address</name>
      <value>192.168.136.131:8020</value>
    </property>
    
    <property>
      <name>dfs.namenode.safemode.threshold-pct</name>
      <value>1.000</value>
    </property>
    
    <property>
      <name>dfs.namenode.secondary.http-address</name>
      <value>192.168.136.131:50070</value>
    </property>
    
    <property>
      <name>dfs.namenode.stale.datanode.interval</name>
      <value>30000</value>
    </property>
    
    <property>
      <name>dfs.namenode.startup.delay.block.deletion.sec</name>
      <value>3600</value>
    </property>
    
    <property>
      <name>dfs.namenode.write.stale.datanode.ratio</name>
      <value>1.0f</value>
    </property>
    
    <property>
      <name>dfs.permissions.enabled</name>
      <value>true</value>
    </property>
    
    <property>
      <name>dfs.permissions.superusergroup</name>
      <value>hdfs</value>
    </property>
    
    <property>
      <name>dfs.replication</name>
      <value>1</value>
    </property>
    
    <property>
      <name>dfs.replication.max</name>
      <value>50</value>
    </property>
    
    <property>
      <name>dfs.support.append</name>
      <value>true</value>
      <final>true</final>
    </property>
    
    <property>
      <name>dfs.webhdfs.enabled</name>
      <value>true</value>
      <final>true</final>
    </property>
    
    <property>
      <name>fs.permissions.umask-mode</name>
      <value>022</value>
    </property>
    
    <property>
      <name>nfs.exports.allowed.hosts</name>
      <value>* rw</value>
    </property>
    
    <property>
      <name>nfs.file.dump.dir</name>
      <value>/tmp/.hdfs-nfs</value>
    </property>


    
  </configuration>

如果有人可以解决此错误,我将不胜感激。

java hadoop hdfs
1个回答
0
投票

我通过在 Windows 11 计算机上从技术堆栈从

HDP2.5
切换到独立的 hadoop 安装来解决所有这些问题。由于
HDP2.5
没有更多可用支持,这就是为什么在复制时无法解析
datanodes
的内部 ip。以下
hdfs-site.xml
core-site.xml
我已包含在apache nifi安装目录中,即

C: ifi-1.28.0-bin ifi-1.28。

© www.soinside.com 2019 - 2024. All rights reserved.