LAB1 partII

PartII   实现单词统计 实现 main/wc.go 两个函数 mapF() 、 reduceF() 单词是任意字母连续序列, 由unicode.IsLetter 决定字母 测试数据 pg-*.txt 放在 ~/6.824/src/main $ cd 6.824 $ export "GOPATH=$PWD" $ cd "$GOPATH/src/main" $ go run wc.go master sequential pg-*.txt # command-line-arguments ./wc.go:14: missing return at end of function ./wc.go:21: missing return at end of function mapF()函数 输入文件名和文件内容,输出 mapreduce.KeyValue 类型切片 reduceF() 函数将调用每个 key 和 切片mapreduce.KeyValu ,返回 单个 key 和 总计数 测试命令 $ cd "$GOPATH/src/main" $ time go run wc.go master sequential pg-*.txt master: Starting Map/Reduce task wcseq Merge: read mrtmp.wcseq-res-0 Merge: read mrtmp.wcseq-res-1 Merge: read mrtmp.wcseq-res-2 master: Map/Reduce task completed 2.59user 1.08system 0:02.81elapsed 查看结果文件 $ sort -n -k2 mrtmp.wcseq | tail -10 that: 7871 it: 7987 in: 8415 was: 8578 a: 13382 of: 13536 I: 14296 to: 16079 and: 23612 the: 29748 LAB1 partII   删除多余的中间文件 rm mrtmp.* 提供测试脚本 bash ./test-wc.sh master@master:~/study/6.824/src/main$ ./test-wc.sh -bash: ./test-wc.sh: Permission denied 增加执行权限 chmod 764 *sh LAB1 partII

wc.go

package main

import (
    "fmt"
    "mapreduce"
    "os"
    "strings"
    "unicode"
    "strconv"
)

//
// The map function is called once for each file of input. The first
// argument is the name of the input file, and the second is the
// file's complete contents. You should ignore the input file name,
// and look only at the contents argument. The return value is a slice
// of key/value pairs.
//
func mapF(filename string, contents string) []mapreduce.KeyValue {
    // Your code here (Part II).
    
    f := func(c rune) bool {
    return !unicode.IsLetter(c)
  }
  rst := make([]mapreduce.KeyValue, 0)
    
      keys := strings.FieldsFunc(contents, f)
      for _, key := range keys {
          kv := mapreduce.KeyValue{ Key: key, Value:"1"}
          rst = append(rst, kv)
      }
    return rst
}

//
// The reduce function is called once for each key generated by the
// map tasks, with a list of all the values created for that key by
// any map task.
//
func reduceF(key string, values []string) string {
    // Your code here (Part II).
    cnt := 0 
    
    for _ , val := range values {
       
       i , err := strconv.Atoi(val);
         if  err != nil {
             panic(err)
         }
         cnt += i
    }
    
    rst := strconv.Itoa(cnt)
    return rst
}

// Can be run in 3 ways:
// 1) Sequential (e.g., go run wc.go master sequential x1.txt .. xN.txt)
// 2) Master (e.g., go run wc.go master localhost:7777 x1.txt .. xN.txt)
// 3) Worker (e.g., go run wc.go worker localhost:7777 localhost:7778 &)
func main() {
    if len(os.Args) < 4 {
        fmt.Printf("%s: see usage comments in file\n", os.Args[0])
    } else if os.Args[1] == "master" {
        var mr *mapreduce.Master
        if os.Args[2] == "sequential" {
            mr = mapreduce.Sequential("wcseq", os.Args[3:], 3, mapF, reduceF)
        } else {
            mr = mapreduce.Distributed("wcseq", os.Args[3:], 3, os.Args[2])
        }
        mr.Wait()
    } else {
        mapreduce.RunWorker(os.Args[2], os.Args[3], mapF, reduceF, 100, nil)
    }
}

 

 

 
上一篇:ucore-lab1-练习6report


下一篇:《ucore lab1 exercise6》实验报告