单词计数
- 需求:输入小说文本,输出每个单词出现的次数
- 实现:分map、combine、reduce三个阶段实现
1 /* Data Analysis with Java 2 * John R. Hubbard 3 * Aug 4, 2017 4 */ 5 6 package com.hongfeng.Chapter11; 7 8 import java.io.File; 9 import java.io.IOException; 10 import java.io.PrintWriter; 11 import java.util.ArrayList; 12 import java.util.Collections; 13 import java.util.HashMap; 14 import java.util.List; 15 import java.util.Map; 16 import java.util.Scanner; 17 18 public class Example1 { 19 public static void main(String[] args) { 20 try { 21 File tempFile = new File("data/Temp.dat"); 22 map("data/sonnets/", 80, tempFile); 23 24 Map<String,StringBuilder> hashTable = new HashMap(2500); 25 combine(tempFile, hashTable); 26 27 File outFile = new File("data/Output.dat"); 28 reduce(hashTable, outFile); 29 } catch (IOException e) { 30 System.err.println(e); 31 } 32 } 33 34 public static void map(String src, int n, File temp) throws IOException { 35 PrintWriter writer = new PrintWriter(temp); 36 for (int i = 0; i < n; i++) { 37 String filename = String.format("%sSonnet%03d.txt", src, i+1); 38 map(filename, writer); 39 } 40 writer.close(); 41 } 42 43 public static void combine(File temp, Map<String,StringBuilder> table) 44 throws IOException { 45 Scanner scanner = new Scanner(temp); 46 while (scanner.hasNext()) { 47 String word = scanner.next(); 48 StringBuilder value = table.get(word); 49 if (value == null) { 50 value = new StringBuilder(""); 51 } 52 table.put(word, value.append(" 1")); 53 scanner.nextLine(); // scan past the rest of the line (a "1") 54 } 55 scanner.close(); 56 } 57 58 public static void reduce(Map<String,StringBuilder> table, File out) 59 throws IOException { 60 PrintWriter writer = new PrintWriter(out); 61 for (Map.Entry<String, StringBuilder> entry : table.entrySet()) { 62 String key = entry.getKey(); // e.g., "speak" 63 String value = entry.getValue().toString(); // e.g., "1 1 1 1 1" 64 reduce(key, value, writer); 65 } 66 writer.close(); 67 } 68 69 70 /* Writes the pair (word, 1) for each word in the specified file. 71 */ 72 public static void map(String filename, PrintWriter writer) 73 throws IOException { 74 Scanner input = new Scanner(new File(filename)); 75 input.useDelimiter("[.,:;()?!\"\\s]+"); 76 while (input.hasNext()) { 77 String word = input.next(); 78 writer.printf("%s 1%n", word.toLowerCase()); 79 } 80 input.close(); 81 } 82 83 /* Counts the 1s in the value argument and writes (key, count) to file. 84 */ 85 public static void reduce(String key, String value, PrintWriter writer) 86 throws IOException { 87 int count = (value.length() + 1)/2; // e.g. "1 1 1 1 1" => 5 88 writer.printf("%s %d%n", key, count); 89 } 90 91 private static void sort(File file) throws IOException { 92 Scanner input = new Scanner(file); 93 List<String> list = new ArrayList(); 94 while (input.hasNext()) { 95 list.add(input.nextLine()); 96 } 97 input.close(); 98 Collections.sort(list); 99 PrintWriter output = new PrintWriter(file); 100 for (String string : list) { 101 output.println(string); 102 } 103 output.close(); 104 } 105 }View Code