news 2026/5/19 1:14:25

做GIS开发的看过来,用java获取shapefile的编码

作者头像

张小明

前端开发工程师

1.2k 24
文章封面图
做GIS开发的看过来,用java获取shapefile的编码

在 Java 中判断 Shapefile(.shp)的编码(特别是属性表 .dbf 的编码),由于 Shapefile 本身不直接存储编码信息(除非有.cpg文件),我们可以采取以下策略:

  1. 优先检查是否存在.cpg文件,若有则直接读取其内容;
  2. 若无.cpg文件,则尝试用常见编码(如 UTF-8、GBK)读取 .dbf 文件的字段名或记录内容,通过是否出现乱码或解码异常来判断。

Java 生态中常用读取 Shapefile 的库是GeoTools。下面提供一个完整的 Java 示例代码,使用 GeoTools 实现自动检测编码。具体代码实现如下:

import org.geotools.data.shapefile.ShapefileDataStore; import org.geotools.data.simple.SimpleFeatureCollection; import org.geotools.data.simple.SimpleFeatureIterator; import org.geotools.data.simple.SimpleFeatureSource; import org.opengis.feature.simple.SimpleFeature; import org.opengis.feature.simple.SimpleFeatureType; import java.io.BufferedReader; import java.io.IOException; import java.nio.charset.Charset; import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; import java.util.Arrays; import java.util.List; import java.util.regex.Pattern; public class ShapefileEncodingDetector { private static final List<String> CANDIDATE_ENCODINGS = Arrays.asList("UTF-8", "GBK", "GB2312"); private static final Pattern CHINESE_PATTERN = Pattern.compile("[\u4e00-\u9fff]"); private static final Pattern OBVIOUS_GARBAGE_PATTERN = Pattern.compile(".*[\?]{2,}.*|[\x00-\x08\x0B\x0C\x0E-\x1F].*"); /** * 严谨检测 Shapefile 编码(JDK 8 兼容) */ public static String detectEncoding(String shpFilePath) { Path shpPath = Paths.get(shpFilePath); if (!shpPath.toString().toLowerCase().endsWith(".shp")) { shpPath = Paths.get(shpFilePath + ".shp"); } // Step 1: Check .cpg file (JDK 8 compatible read) Path cpgPath = shpPath.resolveSibling(shpPath.getFileName().toString().replace(".shp", ".cpg")); if (Files.exists(cpgPath)) { String encodingFromCpg = readCpgFile(cpgPath); if (encodingFromCpg != null && Charset.isSupported(encodingFromCpg)) { System.out.println("[INFO] Encoding from .cpg: " + encodingFromCpg); return encodingFromCpg; } } String bestEncoding = null; // Step 2: Try candidate encodings for (String encoding : CANDIDATE_ENCODINGS) { try { ShapefileDataStore store = new ShapefileDataStore(shpPath.toUri().toURL()); store.setCharset(Charset.forName(encoding)); SimpleFeatureSource source = store.getFeatureSource(); SimpleFeatureType schema = source.getSchema(); boolean valid = true; boolean currentHasChinese = false; // Validate field names for (int i = 0; i < schema.getAttributeCount(); i++) { String fieldName = schema.getDescriptor(i).getLocalName(); if (isObviousGarbage(fieldName)) { valid = false; break; } if (containsChinese(fieldName)) { currentHasChinese = true; } } if (!valid) { store.dispose(); continue; } // Validate first few feature attribute values SimpleFeatureCollection features = source.getFeatures(); SimpleFeatureIterator iter = features.features(); try { int count = 0; while (iter.hasNext() && count < 5) { SimpleFeature feature = iter.next(); for (Object attr : feature.getAttributes()) { if (attr == null) continue; String value = attr.toString(); if (isObviousGarbage(value)) { valid = false; break; } if (containsChinese(value)) { currentHasChinese = true; } } if (!valid) break; count++; } } finally { iter.close(); // JDK 8 没有 try-with-resources for AutoCloseable in older GeoTools? } store.dispose(); if (valid) { if (currentHasChinese) { System.out.println("[INFO] Valid encoding with Chinese detected: " + encoding); return encoding; } else { if (bestEncoding == null) { bestEncoding = encoding; } } } } catch (Exception e) { // Skip this encoding continue; } } if (bestEncoding != null) { System.out.println("[INFO] No Chinese found, using safe fallback: " + bestEncoding); return bestEncoding; } System.out.println("[WARN] All encodings failed, defaulting to UTF-8"); return "UTF-8"; } /** * JDK 8 兼容方式读取 .cpg 文件(纯文本,单行编码名) */ private static String readCpgFile(Path cpgPath) { try { // 使用默认字符集(通常是 UTF-8)读取 .cpg,因为 .cpg 本身是 ASCII/UTF-8 StringBuilder content = new StringBuilder(); try (BufferedReader reader = Files.newBufferedReader(cpgPath)) { String line; while ((line = reader.readLine()) != null) { content.append(line.trim()); break; // .cpg 通常只有一行 } } return content.toString().trim(); } catch (IOException e) { System.err.println("[WARN] Failed to read .cpg file: " + e.getMessage()); return null; } } /** * 判断字符串是否包含中文字符 */ private static boolean containsChinese(String s) { return s != null && CHINESE_PATTERN.matcher(s).find(); } /** * 判断是否为明显乱码 */ private static boolean isObviousGarbage(String s) { if (s == null || s.isEmpty()) return false; if (OBVIOUS_GARBAGE_PATTERN.matcher(s).matches()) { return true; } long badCharCount = 0; for (char c : s.toCharArray()) { if (c == '?' || c == '\ufffd') { badCharCount++; } } return badCharCount >= 2 || (s.length() > 0 && (double) badCharCount / s.length() > 0.3); }

版权声明: 本文来自互联网用户投稿,该文观点仅代表作者本人,不代表本站立场。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如若内容造成侵权/违法违规/事实不符,请联系邮箱:809451989@qq.com进行投诉反馈,一经查实,立即删除!
网站建设 2026/5/10 13:34:42

白帽黑客学习网站大全!全网合法资源汇总,速存防删

我们学习网络安全&#xff0c;很多学习路线都有提到多逛论坛&#xff0c;阅读他人的技术分析帖&#xff0c;学习其挖洞思路和技巧。但是往往对于初学者来说&#xff0c;不知道去哪里寻找技术分析帖&#xff0c;也不知道网络安全有哪些相关论坛或网站&#xff0c;所以在这里给大…

作者头像 李华
网站建设 2026/5/11 11:19:49

python+vue开发的性格测试系统-pycharm DJANGO FLASK

文章目录技术栈选择核心功能模块数据交互流程部署与优化大数据系统开发流程主要运用技术介绍源码文档获取定制开发/同行可拿货,招校园代理 &#xff1a;文章底部获取博主联系方式&#xff01;技术栈选择 Python后端框架可选Django或Flask&#xff1a; Django&#xff1a;适合…

作者头像 李华
网站建设 2026/5/13 0:10:43

AIGC检测率怎么降低?ChatGPT写的论文这样处理

AIGC检测率怎么降低&#xff1f;ChatGPT写的论文这样处理 ChatGPT论文的降AI全攻略 AIGC检测率怎么降低&#xff1f;用ChatGPT写论文的同学越来越多&#xff0c;但AI率动辄90%以上。这篇教程帮你解决问题。 ChatGPT论文的典型特征 ChatGPT生成的内容有明显标记&#xff1a;…

作者头像 李华
网站建设 2026/5/15 6:51:33

基于python OpenCV的车牌识别计费系统

基于Python OpenCV的车牌识别计费系统设计 第一章 绪论 停车场、高速公路等场景的车牌识别计费是智慧交通的核心应用环节&#xff0c;传统人工计费模式效率低、易出错&#xff0c;而商用车牌识别系统成本高、适配性差&#xff0c;难以满足中小场景的个性化需求。Python语言具…

作者头像 李华