`
lizhuang
  • 浏览: 888238 次
  • 性别: Icon_minigender_1
  • 来自: 上海
社区版块
存档分类
最新评论

解析文件去标点

阅读更多

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.PrintStream;
import java.io.PrintWriter;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class ReadTxtFile {
	
	public static void main(String[] args){
		String raw = ReadTxtFile.readTxtFile("/Users/yourname/Downloads/a.txt");
		String noNumRaw = raw.replaceAll("\\d+", "").replaceAll(",", "");
		//String noRaw = noNumRaw.replaceAll("[,|\\\"]", "\t").replace("[\t|\n]", "\r\n");
		//String result = noRaw.replaceAll("[\\n]", "").replaceAll("\\s", "\n").replaceAll("(?m)^\\s+", "");

		//String result = noRaw.replaceAll("^\\n", "").replaceAll("\\t", "");
		//System.out.println(result);
		String regex = "\\\"([^\\\"]+)\\\"\\b";
		Matcher m = Pattern.compile(regex).matcher(noNumRaw);
		String result = m.replaceAll("$1\r\n");
		result = result.replaceAll("\\\"\\\"", "\r\n");
		result = result.replaceAll("\\\"", "");
		//m = Pattern.compile("^[\u4E00-\u9FFF]+").matcher(result);
		//result = m.replaceAll("");
		
		System.out.println(result);
		
		try {
			ReadTxtFile.writeFromBuffer(result, "/Users/yourname/Downloads/b.txt");
		} catch (IOException e) {
			
			e.printStackTrace();
		}
	}
	
	public static void writeFromBuffer(String str, String outFileName) throws IOException {
        OutputStream os = new FileOutputStream(outFileName);
       PrintStream ps = new PrintStream(os);  
       ps.print(str);
       
       PrintWriter writer = new PrintWriter(new OutputStreamWriter(os));
       writer.flush(); 
       os.close();
   }

	@SuppressWarnings("finally")
	public static String readTxtFile(String filePath) {
		StringBuilder sb = new StringBuilder();

		try {
			//String encoding = "GBK";
			File file = new File(filePath);
			if (file.isFile() && file.exists()) { // 判断文件是否存在
				InputStreamReader read = new InputStreamReader(
						new FileInputStream(file));// 考虑到汉字编码格式
				BufferedReader bufferedReader = new BufferedReader(read);
				String lineTxt = null;
				while ((lineTxt = bufferedReader.readLine()) != null) {
					System.out.println(lineTxt);
					sb.append(lineTxt);
				}
				read.close();
			} else {
				System.out.println("找不到指定的文件");
			}
		} catch (Exception e) {
			System.out.println("读取文件内容出错");
			e.printStackTrace();
		} finally {
			return sb.toString();
		}

	}
}

分享到:
评论

相关推荐

Global site tag (gtag.js) - Google Analytics