hadoop的merge操作脚本

import math
import struct
import traceback
 
import numpy as np
 
 
def mapper():
    #filepath = os.environ["map_input_file"]
    #filename = "zhangpeng66"
    filepath = 'jianku_data'
    for line in sys.stdin:
        if "jianku_data" in filepath:
            line = line.rstrip("\n")
            tokens = line.split('\t')
            if len(tokens) < 13: 
                continue
            os_key = tokens[0]
            title=tokens[5]
            real_title=tokens[10]
            alt=tokens[7]
            ct0=tokens[12]
            print '\t'.join([os_key, title, real_title, alt, ct0])
 
def reducer():
    for line in sys.stdin:
        line = line.strip('\r\n')
        l_info = line.split('\t')
       
        os_key = l_info[0]
 
        for os_query in open(sys.argv[2], 'r'):
            os_query = os_query.strip('\n\r')
            if os_key == os_query:
                print(line)
                break
 
if __name__ == '__main__':
    if sys.argv[1] == 'map':
        mapper()
    elif sys.argv[1] == 'reduce':
        reducer()
    else:
        print >> sys.stderr, 'map or reduce, please.'  
上一篇:ng9.1新特性


下一篇:【剑指 Offer II】 036. 后缀表达式