"""
 Description:  Functions that assist in the WBD automation process.
 Author:    Patrick Longley  (plongley@usgs.gov)
 Created:   08/19/2020
 Language: python3/python2
 History:
"""

import os
import sys
import arcpy
from zipfile import ZipFile
import urllib
import re
import pandas as pd
import wbd_c

PYTHON_VERSION = sys.version_info.major
N_INFNAME = r'(?<![0-9])(2|4|6|8|10|12|14|16)(?![0-9])'


# prefix regex
river_synonyms = ('Stream', 'River', 'Creek')
if PYTHON_VERSION == 3:
    prefix_regex = re.compile(r'(-|Lower\s*|Middle\s*|Upper\s*|Outlet\s*|Headwaters\s*)(?=.*River|.*Stream|.*Creek)')
    prefix_regex2 = re.compile(r'(Lower\s*|Middle\s*|Upper\s*|Outlet\s*|Headwaters\s*)(?=.*River|.*Stream|.*Creek)')
else:
    prefix_regex = r'(-|Lower\s*|Middle\s*|Upper\s*|Outlet\s*|Headwaters\s*)(?=.*River|.*Stream|.*Creek)'
    prefix_regex2 = r'(Lower\s*|Middle\s*|Upper\s*|Outlet\s*|Headwaters\s*)(?=.*River|.*Stream|.*Creek)'

def get_memoryfpath(python_version):
    """
    Gets the fielpath for the in memory workspace.

    Args:
        pyton_version (int)

    Returns:
        fpath (str): File path to memory workspace
    """
    if python_version == 3:
        return 'memory'
    elif python_version == 2:
        return 'in_memory'

def get_hucfield(fc):
    """
    Returns the huc field name.

    Args:
        field (str): Lower case field name
        field_names (list):  List containing field names as strings
        beggining (boolean): If true, field must beggin with field.
                             If false, str.lower(field) must equal field.

    Returns:
        field_name (str): Return field name with correct capitalizion or returns None
                          if no correct field exists.
    """
    field_names = [f.name for f in arcpy.ListFields(fc)]
    PATTERN = r'^{}(2|4|6|8|10|12|14|16)$'.format(wbd_c.HUC)
    matches = [f for f in field_names if re.match(PATTERN, f)]
    if len(matches) == 1:
        return matches[0]
    else:
        return None

def check_hucfieldsmatch(fc_list):
    """
    Checks that feature classes contain matching fields.

    Args:
        fc_list (list): List of feature classes as strings.
        field (str): Lower case field name.
        beggining (boolean): If true, field must beggin with field.
                             If false, str.lower(field) must equal field.

    Returns:
        boolean: True if featureclasses contain matching field, False if not.
    """
    hucfields = set()
    for fc in fc_list:
        hucfields.add(get_hucfield(fc))
    if None in hucfields or len(hucfields) != 1:
        return False
    else:
        return True

def check_fieldsexist(fc, required_fields):
    """
    Checks that field exists in the feature class.

    Args:
        field (str): Lower case field name
        fc(str):  Feature class
        beggining (boolean): If true, field must beggin with field.
                             If false, str.lower(field) must equal field.

    Returns:
        boolean: True if featureclass contain matching field, False if not.
    """
    fields = {x.name for x in arcpy.ListFields(fc)}
    return set(required_fields).issubset(fields)

def delete_extrafields(fc, to_keep):
    """
    Deletes extra fields in a fc/table if they are not in the to keep list.

    Args:
        fc (str): feature class
        to_keep (iterable): List of field names to keep.

    Returns:
        None
    """
    fpath = arcpy.Describe(fc).catalogPath
    workspace = os.path.dirname(fpath)
    if '.gdb' in workspace and not workspace.endswith('.gdb'):
        workspace = os.path.dirname(workspace)
    with arcpy.EnvManager(workspace=workspace):
        fields = [f.name for f in arcpy.ListFields(fc) if not f.required and f.name not in to_keep]
        arcpy.DeleteField_management(fc, fields)

def add_quotes(string):
    """
    Adds single quotes to a string
    
    Args:
        string (str)

    Returns:
        string (str): 
    """
    return '{}{}{}'.format("'", string, "'")

def spatialjoin_singlefield(target_feature, join_feature, field, field_outname, out_fc, match_option='INTERSECT', join_type='KEEP_ALL'):
    """
    Use a spatial join to join a single field from the join_feature onto the target_feature.

    Args:
        target_feature (str): Feature that the join will be performed on.
        join_feature (str): Feature that will be joined.
        field (str):  Single field that will be mapped from the join feature to the output.
        field_outname (str): Name of field in the output feature.
        out_fc (str): Output feature class

    Returns:
        None
    """
    # field mappings object for target feature with all fields 
    fms = arcpy.FieldMappings()
    fms.addTable(target_feature)
    # field map object for new field from join feature
    fm1 = arcpy.FieldMap()
    fm1.addInputField(join_feature, field)
    # create/format output field
    outfield = fm1.outputField
    outfield.name = field_outname
    outfield.aliasName = outfield.name  
    # update outputfield
    fm1.outputField = outfield
    # add field map
    fms.addFieldMap(fm1)
    # spatial
    return arcpy.SpatialJoin_analysis(target_feature,
                               join_feature,
                               out_fc,
                               join_operation='JOIN_ONE_TO_MANY',
                               field_mapping=fms,
                               join_type=join_type,
                               match_option = match_option)

def reproject_fd(fd, sr):
    """
    Reprojects a feature dataset.

    Args:
        fd (str): Feature dataset to be reprojected
        sr (str): Spatial reference the dataset will be reprojected to.

    Returns:
        fd_rpj (str): Reprojected feature dataset.
    """
    fd_rpj = fd + '_rpj'
    if arcpy.Describe(fd).spatialReference != sr and sr:
        if not arcpy.Exists(fd_rpj):
            arcpy.Project_management(fd, fd_rpj, sr)
        elif arcpy.Describe(fd_rpj).spatialReference != sr:
            arcpy.Project_management(fd, fd_rpj, sr)
        return fd_rpj
    else:
        return fd

def download_fd(out_folder, url, fd_name):
    """
    Downloads data (nhd or wbd) using the national map api.

    Args:
        out_folder (str): Folder where output will be saved.
        url (str): URL for downloading data.
        fd_name(str): Feature datset name for newly created feature dataset

    Returns:
        fd (str): feature dataset file path
    """
    # where files are saved
    fpath = os.path.join(out_folder, url.split('/')[-1].strip('.zip'))
    zip_folder = fpath + '.zip'   #  what the zip file is saved as
    auto_gdb = fpath + '.gdb'     #  name of unzipped gdb
    jpg = fpath + '.jpg'          #  extra jpeg file
    xml = fpath + '.xml'          #  extra xml file
    # name for new gdb
    out_fpath = fpath.replace('_H_', '_').replace('_GDB', '')
    rename_gdb = out_fpath + '.gdb'
    fd = os.path.join(rename_gdb, fd_name)
    # download, unzip, delete extra files
    if not arcpy.Exists(rename_gdb):
        urllib.request.urlretrieve(url, zip_folder)
        with ZipFile(zip_folder, 'r') as zip_obj:
            zip_obj.extractall(out_folder)
        os.rename(auto_gdb, rename_gdb)
        os.remove(zip_folder)
        os.remove(jpg)
        os.remove(xml)
    return fd

def check_huc4(huc_list):
    """
    Checks that the HUC codes are at least 4 digits long.
    Args:
        huc_list (list): list of HUC codes as strings

    Returns:
        boolean: True if HUC codes are valid, False if invalid.
    """
    pattern = wbd_c.REGEX_HUC.format('1,7')
    rematches = [re.match(pattern, x) for x in huc_list]
    if None in rematches:
        return False
    else:
        return True

def check_hucre(huc_list):
    """
    Checks that HUC codes are an even number of digits long.

    Args:
        huc_list (list): list of HUC codes as strings

    Returns:
        boolean: True if HUC codes are valid, False if invalid.
    """
    pattern = wbd_c.REGEX_HUC.format('0,7')
    rematches = [re.match(pattern, x) for x in huc_list]
    if None in rematches:
        return False
    else:
        return True

def state_abrev(state_list):
    """
    Get abreviations of state names

    Args:
        state_list (list): List of state names as strings.

    Returns:
        abbreviations (list): List of state abbreviations as strings
    """
    return  [wbd_c.abbrev_dict[x] for x in state_list]

def modify_3depchoices(huc2s, datasets):
    """
    Modifies 3DEP choices depending on data availabiltiy.  Can only download ifsar data for region 19.
    Cannot download 1/3 arc-second data for region 19.
    
    Args:
        huc2s (list): list of HUC2 codes as strings
        datasets (list): list of available 3dep datasets as strings.

    Returns:
        datasets (list): list of available 3dep datasets as strings.
    """
    if '19' in huc2s:
        try:
            datasets.remove('National Elevation Dataset (NED) 1/3 arc-second')
        except ValueError:
            pass
    if set(huc2s) != {'19'}:
        try:
            datasets.remove('Alaska IFSAR 5 meter DEM')
        except ValueError:
            pass
    return datasets

def check_srmatches(obj_list):
    """
    Checks that spatial references match
    
    Args:
        hobj_list (list): list of features as strings

    Returns:
        boolean: True if spatial references match, False if spatial references do not match
    """
    sr_list = [arcpy.Describe(x).spatialReference.factoryCode for x in obj_list]
    if len(set(sr_list)) == 1:
        return True
    else:
        return False

def df_stringconcat(x):
    """
    Concatenates strings in df using groupby-apply notation.
    
    Args:
        x: column where concatenation is performed

    Returns:
        concatenated strings
    """
    x = list(set(x))
    if '' in x:
        x = x.remove('')
    if not x:
        return ''
    else:
        x.sort()
        return ','.join(x)

def create_df(arc_object, columns='*'):
    """
    Converts ESRI table to structured numpy array then to pandas df.

    Args:
        arc_object (str): ESRI table, featureclass, or featurelayer.
        columns (list): List of strings describing which fields will be included in the dataframe.

    Returns:
        pandas dataframe
    """
    nulldict = {wbd_c.F_HUDIGIT: 0}
    arr = arcpy.da.TableToNumPyArray(arc_object, field_names=columns, null_value=nulldict)
    return pd.DataFrame(arr)

def create_dict(arc_object, key_col, val_col):
    """
    Converts ESRI table to structured numpy array then to dictionary

    Args:
        arc_object (str): ESRI table, featureclass, or featurelayer.
        columns (list): List of strings describing which fields will be included in the dataframe.

    Returns:
        pandas dataframe
    """
    arr = arcpy.da.TableToNumPyArray(arc_object, field_names=[key_col, val_col])
    return dict(arr)

def ah_workspaces(python_version, callfrom_pyt, sr=None):
    """
    Returns default filepaths for archydro.  Creates folder, feature dataset, geodatabase if they do not exist already.

    Args:
        python_version (integer)
        callfrom_pyt (boolean): True if called from toolbox, False if called from standalone script.

    Returns:
        fdpath (str):  File path to feature dataset
        dbpath (str):  Filepath to geodatabase
        rasterpath (str):  Filepath to raster folder.
    """
    if python_version == 2 and callfrom_pyt:
        mxd = arcpy.mapping.MapDocument('CURRENT')
        mxd_fpath = mxd.filePath.lower()
        dbpath = mxd_fpath.replace(".mxd", ".gdb")
    else:
        workspace = arcpy.env.workspace
        if not workspace:
            workspace = arcpy.env.scratchGDB
        _,fileext = os.path.splitext(workspace)
        if fileext.lower() in ('.gdb', '.mdb', '.sde'):
            dbpath = workspace
        else:
            dbpath = os.path.join(workspace, 'AHDefault.gdb')
    fdpath = os.path.join(dbpath, 'Layers')  
    rasterpath = os.path.join(os.path.dirname(dbpath), 'Layers')
    # create raster folder if doesn't exist
    if not os.path.exists(rasterpath):
        os.mkdir(rasterpath)
    # create geodatabase if needed
    if not arcpy.Exists(dbpath):
        arcpy.CreateFileGDB_management(os.path.dirname(dbpath), os.path.basename(dbpath))
    if not arcpy.Exists(fdpath):
        arcpy.CreateFeatureDataset_management(os.path.dirname(fdpath), os.path.basename(fdpath), spatial_reference=sr)
    return fdpath, dbpath, rasterpath

def get_fpath(feature):
    """
    
    """
    desc = arcpy.Describe(feature)
    return desc.catalogPath

def sort_polygons(polygon_list):
    """Sorts list of polygon feature class by HUDigit ex: HU10, HU12, HU8 >>> HU8, HU10, HU12

    Args:
        list: list of polygon featureclasses

    Returns:
        list: sorted list of polygon featureclasses
        list: list of hudigits obtained from filenames (as strings)
    """
    sorted_list = []
    for fc in polygon_list:
        fc_name = arcpy.Describe(fc).baseName
        try:
            n = int(re.findall(r'(?<![0-9])(2|4|6|8|10|12|14|16)(?![0-9])', fc_name)[0])
        except IndexError:
            n = None
        sorted_list.append((fc, n))
    try:
        sorted_list.sort(key = lambda x: x[1])
        return [t[0] for t in sorted_list], [str(t[1]) for t in sorted_list]
    except TypeError:
        return None, None

# Flags in order of precidence
AUTO_FLAGS = [wbd_c.CORRECT_FLAG, wbd_c.FILLED_FLAG, wbd_c.NOTCHECKED_FLAG, wbd_c.WRONG_FLAG, wbd_c.NODATA_FLAG]

def update_flag(oldflag, newflag):
    # return old flag if modified by user
    if oldflag not in AUTO_FLAGS or newflag not in AUTO_FLAGS:
        return oldflag
    # update if newflag has higher severity
    elif AUTO_FLAGS.index(oldflag) < AUTO_FLAGS.index(newflag):
        return newflag
    # return old falg if oldflag has equal or higher severity
    else:
        return oldflag

def updaterow_fromdict(row, value_index, calc_index, flag_index, updates_dict, key, fill_missing):
    old_value = row[value_index]
    try:
        if re.match(r'^\s*$', old_value):
            old_value = None
    except TypeError:
        pass
    old_flag = row[flag_index]
    try:
        new_value = updates_dict[key]
    except KeyError:
        new_value = None
    row[calc_index] = new_value
    if not new_value:
        new_flag = wbd_c.NOTCHECKED_FLAG
    elif not old_value:
        if fill_missing:
            new_flag = wbd_c.FILLED_FLAG
            row[value_index]= new_value
            if old_flag in AUTO_FLAGS:
                row[flag_index] = new_flag  # overwrite with filled flag
        else:
            new_flag = wbd_c.NODATA_FLAG
    elif old_value != new_value:
        new_flag = wbd_c.WRONG_FLAG
    else:
        new_flag = wbd_c.CORRECT_FLAG
    row[flag_index] = update_flag(old_flag, new_flag)
    return row

# lists of required fields
wbdline_fields = [
    wbd_c.F_TNMID,
    wbd_c.F_HUDIGIT,
    wbd_c.F_LINESOURCE,
    wbd_c.F_HUMOD,
]
polygon_fields = [
    wbd_c.F_NAME,
    wbd_c.F_TNMID,
    wbd_c.F_REFERENCEGNIS_IDS,
    wbd_c.F_STATES,
]
hu2_fields = polygon_fields + [wbd_c.F_HUC2]
hu4_fields = polygon_fields + [wbd_c.F_HUC4]
hu6_fields = polygon_fields + [wbd_c.F_HUC6]
hu8_fields = polygon_fields + [wbd_c.F_HUC8]
hu10_fields = polygon_fields + [wbd_c.F_HUC10, wbd_c.F_HUTYPE, wbd_c.F_HUMOD, wbd_c.F_AREASQKM, wbd_c.F_AREACRES,]
hu12_fields = polygon_fields + [
    wbd_c.F_HUC12,
    wbd_c.F_HUTYPE,
    wbd_c.F_HUMOD,
    wbd_c.F_TOHUC,
    wbd_c.F_AREASQKM,
    wbd_c.F_AREACRES,
    wbd_c.F_NONTRIBUTINGAREAACRES,
    wbd_c.F_NONTRIBUTINGAREASQKM,
]
hu14_fields = polygon_fields + [
    wbd_c.F_HUC14,
    wbd_c.F_HUTYPE,
    wbd_c.F_HUMOD,
    wbd_c.F_TOHUC,
    wbd_c.F_NONTRIBUTINGAREAACRES,
    wbd_c.F_NONTRIBUTINGAREASQKM,
]
hu16_fields = polygon_fields + [
    wbd_c.F_HUC16,
    wbd_c.F_HUTYPE,
    wbd_c.F_HUMOD,
    wbd_c.F_TOHUC,
    wbd_c.F_NONTRIBUTINGAREAACRES,
    wbd_c.F_NONTRIBUTINGAREASQKM,
]

# Regex dictionaries
regexdict_line = {
    wbd_c.F_TNMID: wbd_c.REGEX_TNMID,
    wbd_c.F_HUMOD: wbd_c.REGEX_HUMOD_LINE,
    wbd_c.F_LINESOURCE: wbd_c.REGEX_LINESOURCE,
}

regexdict_polygons = {
    wbd_c.F_TNMID: wbd_c.REGEX_TNMID,
    wbd_c.F_REFERENCEGNIS_IDS: wbd_c.REGEX_GNISID,
    wbd_c.F_NAME: wbd_c.REGEX_NAME,
    wbd_c.F_STATES: wbd_c.REGEX_STATES,
}
extrafields_dict = {
    wbd_c.F_HUTYPE: wbd_c.REGEX_HUTYPE,
    wbd_c.F_HUMOD: wbd_c.REGEX_HUMOD_POLYGON,
}
# HUC2
regexdict_huc2 = {
    wbd_c.F_HUC2: wbd_c.REGEX_HUC.format('0'),
}
regexdict_huc2.update(regexdict_polygons)
# HUC4
regexdict_huc4 = {
    wbd_c.F_HUC4: wbd_c.REGEX_HUC.format('1'),
}
regexdict_huc4.update(regexdict_polygons)
# HUC6
regexdict_huc6 = {
    wbd_c.F_HUC6: wbd_c.REGEX_HUC.format('2'),
}
regexdict_huc6.update(regexdict_polygons)
# HUC8
regexdict_huc8 = {
    wbd_c.F_HUC8: wbd_c.REGEX_HUC.format('3'),
}
regexdict_huc8.update(regexdict_polygons)
# HUC10
regexdict_huc10 = {
    wbd_c.F_HUC10: wbd_c.REGEX_HUC.format('4'),
}
regexdict_huc10.update(regexdict_polygons)
regexdict_huc10.update(extrafields_dict)
# HUC12
regexdict_huc12 = {
    wbd_c.F_HUC12: wbd_c.REGEX_HUC.format('5'),
    wbd_c.F_TOHUC: wbd_c.REGEX_TOHUC.format('5')
}
regexdict_huc12.update(regexdict_polygons)
regexdict_huc12.update(extrafields_dict)
# HUC14
regexdict_huc14 = {
    wbd_c.F_HUC14: wbd_c.REGEX_HUC.format('6'),
    wbd_c.F_TOHUC: wbd_c.REGEX_TOHUC.format('6')
}
regexdict_huc14.update(regexdict_polygons)
regexdict_huc14.update(extrafields_dict)
# HUC16
regexdict_huc16 = {
    wbd_c.F_HUC16: wbd_c.REGEX_HUC.format('7'),
    wbd_c.F_TOHUC: wbd_c.REGEX_TOHUC.format('7')
}
regexdict_huc16.update(regexdict_polygons)
regexdict_huc16.update(extrafields_dict)

# TEMPLATE DICT
template_dict = {
    '8': wbd_c.HUC8_TEMPLATE,
    '10': wbd_c.HUC10_TEMPLATE,
    '12': wbd_c.HUC12_TEMPLATE,
    '14': wbd_c.HUC14_TEMPLATE,
    '16': wbd_c.HUC16_TEMPLATE,
    'line': wbd_c.WBDLINE_TEMPLATE,
}