"""
 Tool Name:
 Description:
 Author: Patrick Longley (plongley@usgs.gov)
 Created: 11/19/20
 Language: Written in python3 (arcpro).
 History:
  # TODO work on linesource regex in wbd_c (probably going to be a pain)
"""

import os
import sys
import re
import arcpy
import wbd_f
import wbd_params
import wbd_c
import numpy as np

# Constants
PYTHON_VERSION = sys.version_info.major
MEMORY_FPATH = wbd_f.get_memoryfpath(PYTHON_VERSION)
N_INFNAME = r"(?<![0-9])(2|4|6|8|10|12|14|16)(?![0-9])"
DUPLICATE_COL = "duplicate"
TOHUC_FLAG = wbd_c.F_TOHUC + wbd_c.FLAG_SUFFIX
HUDIGIT_FLAG = wbd_c.F_HUDIGIT + wbd_c.FLAG_SUFFIX
ACREAGE_FLAG = wbd_c.F_AREACRES + wbd_c.FLAG_SUFFIX
SQKM_FLAG = wbd_c.F_AREASQKM + wbd_c.FLAG_SUFFIX
OID_JOIN = "oid_join"
NOTCHECKED = (wbd_c.CORRECT_FLAG, wbd_c.NOTCHECKED_FLAG)
WRONG = NOTCHECKED + (wbd_c.WRONG_FLAG,)


# global variables
field_lists = {
    "2": wbd_f.hu2_fields,
    "4": wbd_f.hu4_fields,
    "6": wbd_f.hu6_fields,
    "8": wbd_f.hu8_fields,
    "10": wbd_f.hu10_fields,
    "12": wbd_f.hu12_fields,
    "14": wbd_f.hu14_fields,
    "16": wbd_f.hu16_fields,
}
regex_dictionaries = {
    "2": wbd_f.regexdict_huc2,
    "4": wbd_f.regexdict_huc4,
    "6": wbd_f.regexdict_huc6,
    "8": wbd_f.regexdict_huc8,
    "10": wbd_f.regexdict_huc10,
    "12": wbd_f.regexdict_huc12,
    "14": wbd_f.regexdict_huc14,
    "16": wbd_f.regexdict_huc16,
}
numericfields_list = [
    wbd_c.F_AREASQKM,
    wbd_c.F_AREACRES,
    wbd_c.F_NONTRIBUTINGAREASQKM,
    wbd_c.F_NONTRIBUTINGAREAACRES,
]
acreage_dict = {
    "10": (40000, 250000),
    "12": (10000, 40000),
}


class GeneralCheck(object):
    def __init__(self):
        """
        Initialize variables
        """
        self.label = "1) General Check"
        self.description = "General Check"
        self.callfrom_pyt = True
        self.category = "Attribution"

    def getParameterInfo(self):
        """
        Define the parameters for use in arcmap/pro.
        """
        params = [wbd_params.linefc_updated, wbd_params.polygonfcs_updated]
        return params

    def updateMessages(self, params):
        """
        Modify the messages created by internal validation for each tool
        parameter.This method is called after internal validation.
        """
        MESSAGE = "Feature class must contain the following fields: {}."
        if params[0].altered:
            line_fc = params[0].valueAsText
            needed_fields = wbd_f.wbdline_fields
            if not wbd_f.check_fieldsexist(line_fc, needed_fields):
                params[0].setErrorMessage(MESSAGE.format(", ".join(needed_fields)))
        if params[1].altered:
            try:
                polygon_fcs = params[1].valueAsText.split(";")
            except AttributeError:
                pass
            else:
                for fc in polygon_fcs:
                    fc_name = arcpy.Describe(fc).baseName
                    numbers_inname = re.findall(N_INFNAME, fc_name)
                    hudigit = numbers_inname[0] if len(numbers_inname) == 1 else None
                    if hudigit:
                        needed_fields = field_lists[hudigit]
                        if not wbd_f.check_fieldsexist(fc, needed_fields):
                            params[1].setErrorMessage(MESSAGE.format(", ".join(needed_fields)))
                    else:
                        INCORRECTNAME_MESSAGE = "{} is not a valid name.".format(fc_name)
                        params[1].setErrorMessage(INCORRECTNAME_MESSAGE)

    def check_textcols(self, df, patterns_dict):
        """Checks that text columns match the correct regular expressions (i.e. are a plausible value)

        Args:
            df (pandas dataframe)
            patterns_dict (dictionary): keys = column names, values = regular expressions

        Returns:
            df (pandas df): updated pandas dataframe with new columns added where incorrect regular expressions are flagged
        """
        for col in patterns_dict:
            flagcol = col + wbd_c.FLAG_SUFFIX
            pattern = patterns_dict[col]
            if flagcol not in df.columns:
                df[flagcol] = wbd_c.CORRECT_FLAG
            # pattern is wrong or data is missing >>> flag as wrong
            df.loc[
                np.logical_and(~df[col].str.match(pattern), np.isin(df[flagcol], NOTCHECKED)), flagcol
            ] = wbd_c.WRONG_FLAG
            # missing data >>> flag as no data
            df.loc[
                np.logical_and(df[col].str.match(wbd_c.EMPTY_REGEX), np.isin(df[flagcol], WRONG)), flagcol
            ] = wbd_c.NODATA_FLAG
            # Null becomes n for this field since it is only one character long.  N >>> nodata not wrong
            if col == wbd_c.F_HUTYPE:
                df.loc[np.logical_and(df[col] == "N", np.isin(df[flagcol], WRONG)), flagcol] = wbd_c.NODATA_FLAG
        return df

    def check_numericcols(self, df):
        """Checks that numeric fields are > 0 and are not Nan (null in ARCGIS)

        Args:
            df (pandas dataframe)

        Returns:
            df (pandas df): updated pandas dataframe with new columns added where errors are flagged
        """
        tocheck = {c for c in numericfields_list if c in df.columns}
        for col in tocheck:
            flagcol = col + wbd_c.FLAG_SUFFIX
            if flagcol not in df.columns:
                df[flagcol] = wbd_c.CORRECT_FLAG
            df.loc[np.logical_and(df[col] < 0, np.isin(df[flagcol], NOTCHECKED)), flagcol] = wbd_c.WRONG_FLAG
            df.loc[np.logical_and(np.isnan(df[col]), np.isin(df[flagcol], WRONG)), flagcol] = wbd_c.NODATA_FLAG
        return df

    def check_duplicates(self, df):
        """Check for duplicate names, tnmids, and huc codes.

        Args:
            df (pandas dataframe)

        Returns:
            df (pandas df): updated pandas dataframe where duplications are flagged
        """
        try:
            tocheck = {self.hucfield, wbd_c.F_TNMID, wbd_c.F_NAME}
        except AttributeError:
            tocheck = {wbd_c.F_TNMID}
        for col in tocheck:
            flagcol = col + wbd_c.FLAG_SUFFIX
            df[DUPLICATE_COL] = df.duplicated(subset=[col], keep=False)
            df.loc[np.logical_and(df[DUPLICATE_COL], np.isin(df[flagcol], NOTCHECKED)), flagcol] = wbd_c.WRONG_FLAG
            df.drop(columns=[DUPLICATE_COL], axis=1, inplace=True)
        return df

    def check_hudigit(self, df):
        """Checks that the hudigt values are valid.

        Args:
            df (pandas dataframe)

        Returns:
            df (pandas df): updated pandas dataframe with new columns added where incorrect hudigits are flagged
        """
        hudigits = {2, 4, 6, 8, 10, 12, 14, 16}
        if HUDIGIT_FLAG not in df.columns:
            df[HUDIGIT_FLAG] = wbd_c.CORRECT_FLAG
        # pattern is wrong or data is missing >>> flag as wrong
        df.loc[
            np.logical_and(np.isin(df[wbd_c.F_HUDIGIT], hudigits), np.isin(df[HUDIGIT_FLAG], NOTCHECKED)), HUDIGIT_FLAG
        ] = wbd_c.WRONG_FLAG
        # missing data (None replaced with 0) >>> flag as no data
        df.loc[
            np.logical_and(df[wbd_c.F_HUDIGIT] == 0, np.isin(df[HUDIGIT_FLAG], WRONG)), HUDIGIT_FLAG
        ] = wbd_c.NODATA_FLAG
        return df

    def check_hucvstohuc(self, df):
        """Checks that huc != tohuc.

        Args:
            df ([type]): [description]
        """
        try:
            df.loc[
                np.logical_and(df[self.hucfield] == df[wbd_c.F_TOHUC], np.isin(df[TOHUC_FLAG], NOTCHECKED)), TOHUC_FLAG
            ] = wbd_c.WRONG_FLAG
        except KeyError:
            pass
        return df

    def check_parenthuc(self, df, huclist):
        """Checks that the HUC codes match the parent HUC codes.  For example, if 1901 is a HUC4 code, but 19 is not
           a HUC2 code, then 1901 is an invalid HUC code.
        Args:
            df (pandas dataframe)
            huclist (list): list of parent huc codes as strings

        Returns:
           df (pandas df): updated pandas df with incorrect HUC codes flagged
        """
        df.loc[
            np.logical_and(
                np.isin([x[:-2] for x in df[self.hucfield]], huclist), np.isin(self.hucfield_check, NOTCHECKED)
            ),
            self.hucfield_check,
        ] = wbd_c.WRONG_FLAG
        return df

    def check_parenttohuc(self, row, tohuc_dict):
        """Checks that tohuc matches a parent huccode or tohuc.

        Args:
            row (pandas object): contains tohuc column and tohuc check column.
            tohuc_dict (dict): dictionary containing parent huc codes as the keys and tohuc as the values

        Returns:
            row (pandas object): updated pandas object
        """
        try:
            parent_huc = row[self.hucfield][:-2]
            parent_tohuc = tohuc_dict[parent_huc]
        except KeyError:
            if row[TOHUC_FLAG] in NOTCHECKED:
                row[TOHUC_FLAG] = wbd_c.WRONG_FLAG
        else:
            if (
                row[wbd_c.F_TOHUC][:-2] != parent_huc
                and row[wbd_c.F_TOHUC][:-2] != parent_tohuc
                and row[TOHUC_FLAG] in NOTCHECKED
            ):
                row[TOHUC_FLAG] = wbd_c.WRONG_FLAG
        return row

    def check_area(self, df, hudigit):
        """Checks that the area is within the allowed range

        Args:
            df (pandas df)
            hudigit (string): 2,4,6,...

        Returns:
            df (pandas df): updated df with acreages flagged
        """
        min_acreage = acreage_dict[hudigit][0]
        max_acreage = acreage_dict[hudigit][1]
        df.loc[
            np.logical_and(
                np.logical_or(
                    np.less(df[wbd_c.F_AREACRES], min_acreage), np.greater(df[wbd_c.F_AREACRES], max_acreage)
                ),
                np.isin(df[ACREAGE_FLAG], NOTCHECKED),
            ),
            ACREAGE_FLAG,
        ] = wbd_c.WRONG_FLAG
        df.loc[
            np.logical_and(
                np.logical_or(
                    np.less(df[wbd_c.F_AREASQKM], min_acreage * wbd_c.ACRES_TO_KM2),
                    np.greater(df[wbd_c.F_AREASQKM], max_acreage * wbd_c.ACRES_TO_KM2),
                ),
                np.isin(df[SQKM_FLAG], NOTCHECKED),
            ),
            SQKM_FLAG,
        ] = wbd_c.WRONG_FLAG
        return df

    def check_linefc(self, line_fc):
        """Checks line featureclass.

        Args:
            line_fc (feature class): WBD line featureclass

        Returns:
            df (pandas df): Pandas data frame with new calculated columns
        """
        columns = [self.line_oid] + wbd_f.wbdline_fields + [x + wbd_c.FLAG_SUFFIX for x in wbd_f.wbdline_fields]
        columns = [c for c in columns if c in [f.name for f in arcpy.ListFields(line_fc)]]
        df = wbd_f.create_df(line_fc, columns)
        df = df.rename(columns={self.line_oid: OID_JOIN})
        df = self.check_textcols(df, wbd_f.regexdict_line)
        df = self.check_duplicates(df)
        df = self.check_hudigit(df)
        return df

    def check_polygon_fc(self, polygon_fc, hudigit, huc_list, tohuc_list):
        """Checks polygon featureclass.
           1) Checks text fields match regeular expressions.
           2) Check numeric fields are >= 0.
           3) Checks tohuc and huc fields match parents
           4)
        Args:
            polygon_fc (featureclass): WBD polygon featureclass.
            hudigit (integer): HUDigit.

        Returns:
            df (pandas df): Pandas data frame with new calculated columns
        """
        # create data frame
        possible_columns = (
            [self.polygon_oid] + field_lists[hudigit] + [x + wbd_c.FLAG_SUFFIX for x in field_lists[hudigit]]
        )
        columns = [c for c in possible_columns if c in [f.name for f in arcpy.ListFields(polygon_fc)]]
        df = wbd_f.create_df(polygon_fc, columns)
        df = df.rename(columns={self.polygon_oid: OID_JOIN})
        column_regexs = regex_dictionaries[hudigit]
        df = self.check_textcols(df, column_regexs)
        df = self.check_numericcols(df)
        df = self.check_duplicates(df)
        df = self.check_hucvstohuc(df)
        # Check huc/tohuc matches parent
        if huc_list:
            df = self.check_parenthuc(df, huc_list)
            if tohuc_list:
                parenttohuc_dict = dict(zip(huc_list, tohuc_list))
                df[[self.hucfield, wbd_c.F_TOHUC, TOHUC_FLAG]] = df[[self.hucfield, wbd_c.F_TOHUC, TOHUC_FLAG]].apply(
                    self.check_parenttohuc, tohuc_dict=parenttohuc_dict, axis=1
                )
        # check acreage
        if hudigit in ("10", "12"):
            df = self.check_area(df, hudigit)
        return df

    def saveas_table(self, df):
        """
        Save df as ESRI table.
        """
        fields_tojoin = [f for f in df.columns if f.endswith(wbd_c.FLAG_SUFFIX)]
        dtype = "<U{}".format(str(int(wbd_c.FLAG_LENGTH / 2)))  # in bytes not characters >>> divide by 2
        data_types = [(k, dtype) for k in fields_tojoin]
        data_types.append((OID_JOIN, np.int64))
        arr = np.zeros(len(df), dtype=data_types)
        for col in [c[0] for c in data_types]:
            arr[col] = df[col]
        check_table = os.path.join(
            arcpy.env.workspace, "check_table"
        )  # can't use memmory fpath here (messes up field lengths)
        if arcpy.Exists(check_table):
            arcpy.Delete_management(check_table)
        arcpy.da.NumPyArrayToTable(arr, check_table)  # doesn't respect overwrite environmental setting, no return?
        return check_table, fields_tojoin

    def join_fields(self, fc, fc_oid, table_tojoin, fields_tojoin):
        existing_fields = [f.name for f in arcpy.ListFields(fc)]
        for field in fields_tojoin:
            if field in existing_fields:
                arcpy.DeleteField_management(fc, field)
            arcpy.JoinField_management(fc, fc_oid, table_tojoin, OID_JOIN, field)

    def execute(self, params, messages):
        """The execute function is called when the run button is pushed in arcpro/arcmap.

        Args:
            params (iterable): iterable of arcpy parameter objects
            messages (iterable): iterable of arcpy messages objects
        """
        # parameters
        if self.callfrom_pyt:
            line_fc = params[0].valueAsText
            polygon_fc = params[1].valueAsText
        else:
            line_fc = params[0]
            polygon_fc = params[1]
        try:
            polygon_fc = polygon_fc.split(";")
        except AttributeError:
            pass
        polygon_fc, self.hudigits = wbd_f.sort_polygons(polygon_fc)
        # check line feature class
        self.line_oid = arcpy.Describe(line_fc).OIDFieldName
        line_df = self.check_linefc(line_fc)
        check_table, fields_tojoin = self.saveas_table(line_df)
        self.join_fields(line_fc, self.line_oid, check_table, fields_tojoin)
        arcpy.Delete_management(check_table)
        # loop through polygon feature class (biggest to smallest i.e. HUC2 >>> HUC16)
        parenthuc_list = []
        parenttohuc_list = []
        for i, fc in enumerate(polygon_fc):
            self.polygon_oid = arcpy.Describe(fc).OIDFieldName
            self.hucfield = wbd_c.HUC + self.hudigits[i]
            self.hucfield_check = self.hucfield + wbd_c.FLAG_SUFFIX
            df = self.check_polygon_fc(fc, self.hudigits[i], parenthuc_list, parenttohuc_list)
            check_table, fields_tojoin = self.saveas_table(df)
            self.join_fields(fc, self.polygon_oid, check_table, fields_tojoin)
            arcpy.Delete_management(check_table)
            # list parent hucs/tohuc
            parenthuc_list = list(df[self.hucfield])
            try:
                parenttohuc_list = list(df[wbd_c.F_TOHUC])
            except KeyError:
                pass


if __name__ == "__main__":
    """
    Execute as standalone script.
    """
    general_check = GeneralCheck()
    general_check.callfrom_pyt = False
    arcpy.env.workspace = r"C:\Users\plongley\Desktop\tooltest_041221\1908030501_prep\1908030501_data.gdb"
    lines = r"C:\Users\plongley\Desktop\tooltest_041221\1908030501_prep\1908030501_data.gdb\initial_data\WBDLine_1908030501"
    polygons = [
        r"C:\Users\plongley\Desktop\tooltest_041221\1908030501_prep\1908030501_data.gdb\initial_data\HU10_1908030501",
        # r'C:\Users\plongley\Desktop\tooltest_041221\1908030501_prep\1908030501_data.gdb\initial_data\HU12_1908030501'
    ]
    params = (lines, polygons)
    general_check.execute(params, None)

