Ubuntu datasophon1.2.1 二开之二:解决三大监控组件安装后,启动失败:报缺失common.sh
- 背景
- 问题
- 解决
- 最后
背景
在上次安装集群成功后,继续往下安装监控三大组件:AlertManager,Prometheus,Grafana.已经做好安装失败准备
问题
结果安装后,启动报common.sh找不到!为何找不到呢,我看压缩包也有啊,tar命令没看出问题。后来问一下ai,说缺少个参数:–strip-components=1
后来详细了解一下这个参数作用: 去掉第一层目录
解决
根据ai提示,修改了InstallServiceHandler.java,主要是decompressTarGz,
decompressWithStripComponents方法
/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */packagecom.datasophon.worker.handler;importcn.hutool.core.io.FileUtil;importcn.hutool.core.io.StreamProgress;importcn.hutool.core.lang.Console;importcn.hutool.http.HttpUtil;importcom.datasophon.common.Constants;importcom.datasophon.common.cache.CacheUtils;importcom.datasophon.common.command.InstallServiceRoleCommand;importcom.datasophon.common.model.RunAs;importcom.datasophon.common.utils.ExecResult;importcom.datasophon.common.utils.FileUtils;importcom.datasophon.common.utils.PropertyUtils;importcom.datasophon.common.utils.ShellUtils;importcom.datasophon.worker.utils.TaskConstants;importlombok.Data;importorg.apache.commons.lang.StringUtils;importorg.slf4j.Logger;importorg.slf4j.LoggerFactory;importjava.io.File;importjava.util.ArrayList;importjava.util.Objects;@DatapublicclassInstallServiceHandler{privatestaticfinalStringHADOOP="hadoop";privateStringserviceName;privateStringserviceRoleName;privateLoggerlogger;publicInstallServiceHandler(StringserviceName,StringserviceRoleName){this.serviceName=serviceName;this.serviceRoleName=serviceRoleName;StringloggerName=String.format("%s-%s-%s",TaskConstants.TASK_LOG_LOGGER_NAME,serviceName,serviceRoleName);logger=LoggerFactory.getLogger(loggerName);}publicExecResultinstall(InstallServiceRoleCommandcommand){ExecResultexecResult=newExecResult();try{StringdestDir=Constants.INSTALL_PATH+Constants.SLASH+"DDP/packages"+Constants.SLASH;StringpackageName=command.getPackageName();StringpackagePath=destDir+packageName;BooleanneedDownLoad=!Objects.equals(PropertyUtils.getString(Constants.MASTER_HOST),CacheUtils.get(Constants.HOSTNAME))&&isNeedDownloadPkg(packagePath,command.getPackageMd5());if(Boolean.TRUE.equals(needDownLoad)){downloadPkg(packageName,packagePath);}booleanresult=decompressPkg(packageName,command.getDecompressPackageName(),command.getRunAs(),packagePath);execResult.setExecResult(result);}catch(Exceptione){execResult.setExecOut(e.getMessage());e.printStackTrace();}returnexecResult;}privateBooleanisNeedDownloadPkg(StringpackagePath,StringpackageMd5){BooleanneedDownLoad=true;logger.info("Remote package md5 is {}",packageMd5);if(FileUtil.exist(packagePath)){// check md5Stringmd5=FileUtils.md5(newFile(packagePath));logger.info("Local md5 is {}",md5);if(StringUtils.isNotBlank(md5)&&packageMd5.trim().equals(md5.trim())){needDownLoad=false;}}returnneedDownLoad;}privatevoiddownloadPkg(StringpackageName,StringpackagePath){StringmasterHost=PropertyUtils.getString(Constants.MASTER_HOST);StringmasterPort=PropertyUtils.getString(Constants.MASTER_WEB_PORT);StringdownloadUrl="http://"+masterHost+":"+masterPort+"/ddh/service/install/downloadPackage?packageName="+packageName;logger.info("download url is {}",downloadUrl);HttpUtil.downloadFile(downloadUrl,FileUtil.file(packagePath),newStreamProgress(){@Overridepublicvoidstart(){Console.log("start to install。。。。");}@Overridepublicvoidprogress(longprogressSize,longl1){Console.log("installed:{}",FileUtil.readableFileSize(progressSize));}@Overridepublicvoidfinish(){Console.log("install success!");}});logger.info("download package {} success",packageName);}privatebooleandecompressPkg(StringpackageName,StringdecompressPackageName,RunAsrunAs,StringpackagePath){StringinstallPath=Constants.INSTALL_PATH;StringtargetDir=installPath+Constants.SLASH+decompressPackageName;logger.info("Target directory for decompression: {}",targetDir);// 确保父目录存在FileparentDir=newFile(installPath);if(!parentDir.exists()){parentDir.mkdirs();}BooleandecompressResult=decompressTarGz(packagePath,targetDir);// 直接解压到目标目录if(Boolean.TRUE.equals(decompressResult)){// 验证解压结果if(FileUtil.exist(targetDir)){logger.info("Verifying installation in: {}",targetDir);File[]files=newFile(targetDir).listFiles();booleanhasControlSh=false;booleanhasBinary=false;if(files!=null){for(Filefile:files){if(file.getName().equals("control.sh")){hasControlSh=true;}if(file.getName().equals(decompressPackageName.split("-")[0])){hasBinary=true;}}}logger.info("control.sh exists: {}, binary exists: {}",hasControlSh,hasBinary);if(Objects.nonNull(runAs)){ShellUtils.exceShell(" chown -R "+runAs.getUser()+":"+runAs.getGroup()+" "+targetDir);}ShellUtils.exceShell(" chmod -R 775 "+targetDir);returntrue;}}returnfalse;}publicBooleandecompressTarGz(StringsourceTarGzFile,StringtargetDir){logger.info("Start to use tar -zxvf to decompress {} to {}",sourceTarGzFile,targetDir);// 新增:创建目标目录(如果不存在) - 增强版本FiletargetDirFile=newFile(targetDir);if(!targetDirFile.exists()){logger.info("Target directory does not exist, creating: {}",targetDir);// 尝试创建目录booleancreated=targetDirFile.mkdirs();if(!created){logger.error("Failed to create target directory: {}",targetDir);// 添加更多诊断信息FileparentDir=targetDirFile.getParentFile();if(parentDir!=null){logger.error("Parent directory exists: {}, writable: {}",parentDir.exists(),parentDir.canWrite());}// 检查是否有权限问题logger.error("Current user: {}",System.getProperty("user.name"));returnfalse;}logger.info("Successfully created target directory: {}",targetDir);}// 1. 首先列出tar包中的文件数量inttarFileCount=getTarFileCount(sourceTarGzFile);logger.info("Tar file contains {} files/directories",tarFileCount);// 2. 记录目标目录当前的文件数量intinitialFileCount=targetDirFile.exists()?(targetDirFile.listFiles()!=null?targetDirFile.listFiles().length:0):0;// 3. 执行解压(使用 --strip-components=1)ArrayList<String>command=newArrayList<>();command.add("tar");command.add("-zxvf");command.add(sourceTarGzFile);command.add("-C");command.add(targetDir);command.add("--strip-components=1");ExecResultexecResult=ShellUtils.execWithStatus(targetDir,command,120,logger);// 4. 验证解压结果if(execResult.getExecResult()){// 等待文件系统同步try{Thread.sleep(1000);}catch(InterruptedExceptione){// ignore}// 检查目标目录的文件数量intfinalFileCount=targetDirFile.exists()?(targetDirFile.listFiles()!=null?targetDirFile.listFiles().length:0):0;intextractedFileCount=finalFileCount-initialFileCount;logger.info("Initial files: {}, Final files: {}, Extracted files: {}",initialFileCount,finalFileCount,extractedFileCount);if(extractedFileCount>0){logger.info("Decompression successful, extracted {} files to {}",extractedFileCount,targetDir);// 列出前几个文件作为验证File[]files=targetDirFile.listFiles();if(files!=null){intlimit=Math.min(files.length,5);for(inti=0;i<limit;i++){logger.debug("Extracted file: {}",files[i].getName());}}// 关键文件验证if(sourceTarGzFile.contains("alertmanager")){FilecontrolSh=newFile(targetDir+Constants.SLASH+"control.sh");Filebinary=newFile(targetDir+Constants.SLASH+"alertmanager");if(!controlSh.exists()||!binary.exists()){logger.error("Missing key files after decompression: control.sh={}, alertmanager={}",controlSh.exists(),binary.exists());returnfalse;}}elseif(sourceTarGzFile.contains("prometheus")){FilecontrolSh=newFile(targetDir+Constants.SLASH+"control.sh");Filebinary=newFile(targetDir+Constants.SLASH+"prometheus");if(!controlSh.exists()||!binary.exists()){logger.error("Missing key files after decompression: control.sh={}, prometheus={}",controlSh.exists(),binary.exists());returnfalse;}}returntrue;}else{logger.error("No files extracted! Something went wrong with decompression.");returnfalse;}}logger.error("Decompression command failed: {}",execResult.getExecOut());returnfalse;}/** * 获取tar包中的文件数量 */privateintgetTarFileCount(StringtarFile){try{ArrayList<String>command=newArrayList<>();command.add("tar");command.add("-tzf");command.add(tarFile);ExecResultexecResult=ShellUtils.execWithStatus(".",command,30,logger);if(execResult.getExecResult()&&execResult.getExecOut()!=null){// 按行分割,统计非空行String[]lines=execResult.getExecOut().split("\n");intcount=0;for(Stringline:lines){if(line!=null&&!line.trim().isEmpty()){count++;}}returncount;}}catch(Exceptione){logger.warn("Failed to count tar files: {}",e.getMessage());}return-1;// 未知}privateBooleandecompressWithStripComponents(StringsourceTarGzFile,StringtargetDir){logger.info("Retrying decompression with --strip-components=1");ArrayList<String>command=newArrayList<>();command.add("tar");command.add("-zxvf");command.add(sourceTarGzFile);command.add("-C");command.add(targetDir);command.add("--strip-components=1");ExecResultexecResult=ShellUtils.execWithStatus(targetDir,command,120,logger);if(execResult.getExecResult()){// 验证解压的文件StringpackageName=extractPackageName(sourceTarGzFile);FiletargetDirFile=newFile(targetDir);File[]files=targetDirFile.listFiles((dir,name)->name.contains(packageName.split("-")[0]));if(files!=null&&files.length>0){logger.info("Decompression with --strip-components=1 successful");returntrue;}}returnfalse;}privateStringextractPackageName(StringtarFile){StringfileName=newFile(tarFile).getName();// 移除 .tar.gz 或 .tgz 后缀if(fileName.endsWith(".tar.gz")){returnfileName.substring(0,fileName.length()-7);}elseif(fileName.endsWith(".tgz")){returnfileName.substring(0,fileName.length()-4);}returnfileName;}privatevoidchangeHadoopInstallPathPerm(StringdecompressPackageName){ShellUtils.exceShell(" chown -R root:hadoop "+Constants.INSTALL_PATH+Constants.SLASH+decompressPackageName);ShellUtils.exceShell(" chmod 755 "+Constants.INSTALL_PATH+Constants.SLASH+decompressPackageName);ShellUtils.exceShell(" chmod -R 755 "+Constants.INSTALL_PATH+Constants.SLASH+decompressPackageName+"/etc");ShellUtils.exceShell(" chmod 6050 "+Constants.INSTALL_PATH+Constants.SLASH+decompressPackageName+"/bin/container-executor");ShellUtils.exceShell(" chmod 400 "+Constants.INSTALL_PATH+Constants.SLASH+decompressPackageName+"/etc/hadoop/container-executor.cfg");ShellUtils.exceShell(" chown -R yarn:hadoop "+Constants.INSTALL_PATH+Constants.SLASH+decompressPackageName+"/logs/userlogs");ShellUtils.exceShell(" chmod 775 "+Constants.INSTALL_PATH+Constants.SLASH+decompressPackageName+"/logs/userlogs");}}最后
安装成功后,界面截图:
总览:
AlertManager:
Prometheus:
Grafana:
界面还是挺酷的,总览监控了集群所有节点,但是后面三大组件,好像只是localhost节点(ddp1),这是美中不足。如果有哪位高手知道如何修改,可以沟通指点一下:lita2lz