# -*- coding: utf-8 -*-

import csv
import os
import re
import itertools
import datetime
import hashlib
import base64
import zlib
from zipfile import ZipFile, ZIP_DEFLATED

try:
    from ppms.constants import *
except ImportError:
    import sys

    DBMS_ORACLE = '000002'
    DBMS_MSSQL = '000004'
    MENU_FILTER = 12
    MSG_REPLY_YES = 1
    SYS_VAR_LICENSE = '@3'

    if sys.platform.startswith('win32'):
        SERVER_ENCODING = 'CP1252'
    else:
        SERVER_ENCODING = 'ISO-8859-15'

from ppms import ppms
from ppms.module_subclasses.base_class import Base


__all__ = ['AnonymizedDataExport']


def flatten(nested_list):
    return list(itertools.chain.from_iterable(nested_list))


def prepare_list_for_query(some_list):
    return ", ".join(map(str, some_list))


class AnonymizedDataExport(Base):
    TABLES = ('401', '418', '430', '431', '461', '463', '465', '466', '467', '468', '469', '470', '472', '476')
    SCHEDULING_FLAG = (1, 2, 3, 4, 7)
    DATAITEMS_TO_ANONYMIZE = ('res_id', 'parent_res_id', 'sub_res_id', 'user_last_new_plan', 'created_by',
                              'modified_by', 'pr_id', 'parent_pr_id', 'pr_successor_id', 'task_id', 'parent_task_id',
                              'task_successor_id', 'predecessor_id', 'cost_type', 'conversion_factor_id', 'cost_type_id',
                              'parent_ct', 'ct_id')
    DATATABLE_QUERY = "select DI000216 from DT415 where DI000265 = '{table}'"
    RELEVANT_DATAITEMS_QUERY = "select DI000215, DI041035 from DT412 where DI000239 = '{table}' and DI000275 = 0 and \
    DI028175 in {scheduling_flag}"
    SCHEDULING_DATA_QUERY = "select {relevant_dataitems} from {datatable}"
    ANNUAL_SPLIT_TABLES = ('468', '472')
    ALL_YEARS_QUERY = {DBMS_ORACLE: {'468': "select distinct(extract(YEAR from DI001327)) from DT468",
                                     '472': "select distinct(extract(YEAR from DI001519)) from DT472"},
                       DBMS_MSSQL: {'468': "select distinct(YEAR(DI001327)) from DT468",
                                    '472': "select distinct(YEAR(DI001519)) from DT472"}}
    DATA_PER_YEAR_QUERY = {DBMS_ORACLE: {'468': "select {relevant_dataitems} from DT468 where extract(YEAR from DI001327) = '{year}'",
                                         '472': "select {relevant_dataitems} from DT472 where extract(YEAR from DI001519) = '{year}'"},
                           DBMS_MSSQL: {'468': "select {relevant_dataitems} from DT468 where YEAR(DI001327) = '{year}'",
                                        '472': "select {relevant_dataitems} from DT472 where YEAR(DI001519) = '{year}'"}}
    LENGTH_QUERY = "select DI000254 from DT412 where DI000239 = '{table}' and DI041035 = '{python_id}'"
    ALWAYS_EXPORT = {'401': ('UUID', ),
                     '418': ('UUID', ),
                     '430': ('UUID', ),
                     '431': ('UUID', ),
                     '461': ('UUID', 'DI041317'),
                     '463': ('UUID', ),
                     '465': ('UUID', ),
                     '466': ('UUID', ),
                     '467': ('UUID', ),
                     '468': ('UUID', ),
                     '469': ('UUID', ),
                     '470': ('UUID', ),
                     '472': ('UUID', ),
                     '476': ('UUID', )}

    anon_dict = dict()

    def anonymize(self, dataitem, value, length, encoding=SERVER_ENCODING):
        if not value:
            return ''

        if dataitem not in self.anon_dict.keys():
            self.anon_dict[dataitem] = dict()

        pattern = '{id}'

        if value not in self.anon_dict[dataitem].keys():
            encoded_value = value.encode(encoding)
            hashed_value = hashlib.md5(encoded_value).digest()
            encoded_hash = base64.urlsafe_b64encode(hashed_value)
            trimmed_id = encoded_hash[:length].decode(encoding)
            anonymized_value = pattern.format(id=trimmed_id.upper())
            self.anon_dict[dataitem][value] = anonymized_value
        else:
            anonymized_value = self.anon_dict[dataitem][value]

        return anonymized_value
    
    def on_initial_focus(self):
        self.menu(MENU_FILTER)
        self.parameter_area.get_records()[0].export_dir.set_text_value(os.path.curdir)
        self.parameter_area.get_records()[0].export_mode.set_text_value('test')
        self.dbms = ppms.db_get_dbms_name()

    def on_reset(self):
        self.on_initial_focus()

    def export_data(self):
        export_dir = os.path.abspath(self.parameter_area.get_records()[0].export_dir.get_text_value())
        export_mode = self.parameter_area.get_records()[0].export_mode.get_text_value()

        dataitem_lengths = dict()

        if not (os.path.exists(export_dir) and os.access(os.path.dirname(export_dir), os.W_OK)):
            ppms.ui_message_id('0362')
            return

        ppms.ui_message_id('0591')

        msg = ppms.msg_pop()

        if not msg.get_reply() == MSG_REPLY_YES:
            return

        for table in self.TABLES:
            datatable = prepare_list_for_query(flatten(ppms.db_select(self.DATATABLE_QUERY.format(table=table))))
            relevant_dataitems = ppms.db_select(self.RELEVANT_DATAITEMS_QUERY.format(table=table,
                                                scheduling_flag=self.SCHEDULING_FLAG))
            dataitem_lengths[table] = dict()

            if not relevant_dataitems:
                continue

            relevant_dataitems_for_query = list()
            relevant_python_ids = list()

            for (dataitem, python_id) in relevant_dataitems:
                relevant_dataitems_for_query.append(dataitem)
                relevant_python_ids.append(python_id)

            # check if DI exists
            #relevant_dataitems_for_query.append(self.ALWAYS_EXPORT[table])

            export_path = os.path.abspath(
                os.path.join(export_dir, '{export_mode}_data_{table}.csv'.format(export_mode=export_mode,
                                                                                 table=table)))


            with open(export_path, 'w', encoding=SERVER_ENCODING) as csv_file:
                writer = csv.DictWriter(csv_file, fieldnames=relevant_python_ids, delimiter=';')

                writer.writerow(dict((python_id, python_id) for python_id in relevant_python_ids))

            if table in self.ANNUAL_SPLIT_TABLES:                
                all_years = flatten(ppms.db_select(self.ALL_YEARS_QUERY[self.dbms][table]))

                for year in all_years:
                    all_rows = ppms.db_select(
                    self.DATA_PER_YEAR_QUERY[self.dbms][table].format(relevant_dataitems=prepare_list_for_query(relevant_dataitems_for_query),
                                                                      year=int(year)))

                    for row in all_rows:
                        scheduling_data_mapping = dict.fromkeys(relevant_python_ids)

                        for i in range(len(relevant_python_ids)):
                            if relevant_python_ids[i] in self.DATAITEMS_TO_ANONYMIZE:
                                if relevant_python_ids[i] not in dataitem_lengths[table].keys():
                                    dataitem_lengths[table][relevant_python_ids[i]] = int(
                                        flatten(ppms.db_select(
                                            self.LENGTH_QUERY.format(table=table, python_id=relevant_python_ids[i])))[0])
                                dataitem_length = dataitem_lengths[table][relevant_python_ids[i]]
                                row[i] = self.anonymize(dataitem=relevant_python_ids[i], value=row[i], length=dataitem_length)
                            scheduling_data_mapping[relevant_python_ids[i]] = row[i]

                        with open(export_path, 'a', encoding=SERVER_ENCODING) as csv_file:
                            writer = csv.DictWriter(csv_file, fieldnames=relevant_python_ids, delimiter=';')

                            writer.writerow(scheduling_data_mapping)

            else:
                all_rows = ppms.db_select(
                    self.SCHEDULING_DATA_QUERY.format(relevant_dataitems=prepare_list_for_query(relevant_dataitems_for_query),
                                                      datatable=datatable))

                for row in all_rows:
                    scheduling_data_mapping = dict.fromkeys(relevant_python_ids)

                    for i in range(len(relevant_python_ids)):
                        if relevant_python_ids[i] in self.DATAITEMS_TO_ANONYMIZE:
                            if relevant_python_ids[i] not in dataitem_lengths[table].keys():
                                dataitem_lengths[table][relevant_python_ids[i]] = int(
                                    flatten(ppms.db_select(
                                        self.LENGTH_QUERY.format(table=table, python_id=relevant_python_ids[i])))[0])
                            dataitem_length = dataitem_lengths[table][relevant_python_ids[i]]
                            row[i] = self.anonymize(dataitem=relevant_python_ids[i], value=row[i], length=dataitem_length)
                        scheduling_data_mapping[relevant_python_ids[i]] = row[i]

                    with open(export_path, 'a', encoding=SERVER_ENCODING) as csv_file:
                        writer = csv.DictWriter(csv_file, fieldnames=relevant_python_ids, delimiter=';')

                        writer.writerow(scheduling_data_mapping)

        license = ppms.uvar_get(SYS_VAR_LICENSE)
        changeset = re.findall('[0-9]{5}', ppms.changeset_number())[0]
        date = datetime.datetime.now().strftime('%Y%m%d')
        zip_file = '{export_mode}_data_lic{license}_cs{changeset}_{date}.zip'.format(export_mode=export_mode,
                                                                                     license=license,
                                                                                     changeset=changeset,
                                                                                     date=date)

        zip_container = ZipFile(zip_file, mode='w', compression=ZIP_DEFLATED)
        for root, dirs, files in os.walk(export_dir):
            for name in files:
                if re.match('%s_data_[0-9]{3}\.csv' % export_mode, name):
                    zip_container.write(name)
                    os.remove(name)
        zip_container.close()

        ppms.ui_message_box("{text}\r\n{zipfile}".format(text="Successfully exported to:",
                                                         zipfile=os.path.join(export_dir, zip_file)))