概要
yarn已经开启了日志聚合, 想通过java的api去拉取已结束的application日志, 在调用时报错找不到文件; 经排查发现yarn.nodemanager.remote-app-log-dir-suffix
和yarn.log-aggregation.TFile.remote-app-log-dir-suffix
这两个配置项没有保持统一, TFILE
没有配置前缀目录, 导致使用了默认的bucket-logs-tfile
日志聚合配置
<property>
<name>yarn.log-aggregation-enable</name>
<value>true</value>
</property>
<property>
<name>yarn.nodemanager.log-aggregation.roll-monitoring-interval-seconds</name>
<value>3600</value>
</property>
<property>
<name>yarn.nodemanager.remote-app-log-dir</name>
<value>/tmp/logs</value>
</property>
<!-- 若配置了nodemanager的suffix, 则yarn.log-aggregation.TFile.remote-app-log-dir-suffix也要进行配置 -->
<property>
<name>yarn.nodemanager.remote-app-log-dir-suffix</name>
<value>logs</value>
</property>
pom.xml
<dependency>
<groupId>javax.ws.rs</groupId>
<artifactId>jsr311-api</artifactId>
<version>1.1.1</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>3.3.3</version>
</dependency>
<dependency>
<groupId>org.codehaus.jettison</groupId>
<artifactId>jettison</artifactId>
<version>1.1</version>
</dependency>
代码
public class Main {
public static void main(String[] args) throws Exception {
System.setProperty("HADOOP_USER_NAME", "hdfs");
YarnConfiguration conf = new YarnConfiguration();
conf.addResource(new Path("D:\\env\\core-site.xml"));
conf.addResource(new Path("D:\\env\\yarn-site.xml"));
//若xml中没有配置,则可通过该方式来赋值
//conf.set("yarn.log-aggregation.TFile.remote-app-log-dir-suffix", "logs");
YarnClient yarnClient = YarnClient.createYarnClient();
yarnClient.init(conf);
yarnClient.start();
//通过LogCLI的方式获取日志结合详细具体的ContainerLogs会更好,通过参数去空值可避免瞬间日志太大
MyLogsCLI logsCLI = new MyLogsCLI(yarnClient);
logsCLI.setConf(conf);
WebServiceClient.initialize(conf);
logsCLI.run(new String[]{"-applicationId", "application_1688354072523_0656"});
WebServiceClient.destroy();
//查询容器日志meta信息
LogAggregationFileControllerFactory factory = new LogAggregationFileControllerFactory(conf);
ApplicationId appId = ApplicationId.fromString("application_1688354072523_0656");
LogAggregationFileController read = factory.getFileControllerForRead(appId, "hdfs");
ContainerLogsRequest req = new ContainerLogsRequest(appId, null, true, "hdfs", null, null,
null, null, Sets.newHashSet("ALL"), Long.MAX_VALUE, null);
List<ContainerLogMeta> metas = read.readAggregatedLogsMeta(req);
}
}
class MyLogsCLI extends LogsCLI {
private final YarnClient yarnClient;
public MyLogsCLI(YarnClient yarnClient) {
this.yarnClient = yarnClient;
}
@Override
protected YarnClient createYarnClient() {
return yarnClient;
}
}