# -*- coding: utf-8 -*-

import sys
import csv
import json
from MeCabWrapper import execute_nlp
from DuplicateSentenceDeleter import delete_duplicate_sentence
from SentenceImportanceCalculator import calculate_importance_score
from logging import getLogger, StreamHandler, DEBUG
logger = getLogger(__name__)
handler = StreamHandler()
handler.setLevel(DEBUG)
logger.setLevel(DEBUG)
logger.addHandler(handler)


def main():

    is_delete_similarity = True

    if len(sys.argv) == 4 or len(sys.argv) == 5:
        input_f = sys.argv[1]
        statistics_f = sys.argv[2]
        output_f = sys.argv[3]
        if len(sys.argv) == 5:
            if sys.argv[4]=='-nd':
                is_delete_similarity = False
                logger.info ("is_delete_similarity is false")

    else:
        logger.error('Usage: SummaryReOrder.py <summariesFile> <outputIdfFile> <sortedSummariesFile> [-nd]')
        sys.exit()

    statistics_map = get_statistics_info(statistics_f)
    reorder(input_f, statistics_map, output_f,is_delete_similarity)

    logger.info('SummaryReOrder is complete.')


def get_statistics_info(statistics_f):
    statistics_map = {}

    stats_file = open(statistics_f, 'r')
    read = csv.reader(stats_file)
    data_cnt = 0
    for row in read:
        statistics_map[row[0]] = row[1]

        data_cnt += 1

    stats_file.close()
    r_len = data_cnt
    logger.info('Read statistics data file.The number of data is %s' % r_len)

    return statistics_map


def reorder(input_f, statistics_map, output_f,is_deleted_duplicated):

    read = open(input_f, 'r')
    write = open(output_f,'w')

    cnt = 0

    for row in read:
        cnt += 1

        json_dict = json.loads(row)
        #print('json_dict:{}'.format(type(json_dict)))

        summary = json_dict['summary']
        sentence_score_map = {}

        sentence_list = []
        words_list = []

        for sentence in summary:
            encoded_sentence = sentence.encode('utf-8')
            sentence_list.append(encoded_sentence)

            words = execute_nlp(encoded_sentence)
            words_list.append(words)

        use_index = []
        if is_deleted_duplicated:
            use_index = delete_duplicate_sentence(sentence_list,words_list,statistics_map)
        else:
            for i in range(0,len(sentence_list)):use_index.append(i)

        for i in use_index:
            score = calculate_importance_score (words_list[i], statistics_map)
            sentence_score_map[sentence_list[i]] = score

        sorted_sentences = sort_sentences(sentence_score_map)

        write.write('{"debug_info": ' + str(json_dict['debug_info']) + ", ")
        write.write('"summary": ["')
        i = 0
        for sen in sorted_sentences:
            if i != 0:
                write.write('","')
            write.write(sen[0])
            i += 1
        write.write('"]}')
        write.write('\r\n')

        if cnt%5==0:
            logger.info(str(cnt)+ " is arranged.")

#        text = json.dumps(json_string, sort_keys=True, ensure_ascii=False, indent=2)
#        with codecs.open(output_f, "w","utf-8") as fh:
#            fh.write(text.encode('utf-8'))

    read.close()
    write.close()



def sort_sentences(sentence_score_map):
    sorted_sentences = sorted(sentence_score_map.items(), key=lambda x: x[1],reverse=True)

    return sorted_sentences

if __name__ == '__main__':
    main()
