#!/usr/bin/env bash

#######################################################################
# Copyright (c): 2020-2025, Huawei Tech. Co., Ltd.
# descript: 
#     Load data into database
#     Return 0 means Load all rows OK.
#     Return 1 means Load failed.
#     Return 2 means Load all or some rows rejected or discarded.
# version:  0.1
# date:     2021-09-07
#######################################################################
set -e
gs_loader_version="gs_loader: version 0.1"

# env variables
gs_loader_log_level=${gs_loader_log_level:="warning"}
gs_loader_log_fmt=${gs_loader_log_fmt:=""}

# user input parameters
cmd_param_host=""
cmd_param_port=""
cmd_param_user=""
cmd_param_passwd=""
cmd_param_db=""

cmd_param_create="true"
cmd_param_clean="false"

cmd_param_ctl=""
cmd_param_guc=""
cmd_param_data=""
cmd_param_log=""
cmd_param_bad=""
cmd_param_discard=""
cmd_param_errors=""
cmd_param_skip=""
cmd_param_rows=""
cmd_param_compatible_nul="true"
cmd_param_binary="false"
cmd_param_compatible_illegal_chars="false"
cmd_param_parallel=""
cmd_param_limit=""

copy_tables_namespace="pg_catalog"

# global variables
rnd_suffix=$(dd if=/dev/urandom bs=128 count=1 2>/dev/null | tr -dc 'a-zA-Z0-9')
gs_loader_file_tmp=${gs_loader_file_tmp:=".gs_loader_file.tmp.${rnd_suffix}_end"}
gs_loader_table_name=""

EXIT_CODE_OK=0
EXIT_CODE_FAIL=1
EXIT_CODE_WARN=2
EXIT_CODE_FATAL=3

declare -A loader_datafiles
declare -A loader_badfiles
declare -A loader_discardfiles
declare -A loader_txids

loader_datafile_count=0
loader_datafile_index=0
loader_txids_count=0

parallel_loader_skip=0

function load_log()
{
    level=$1 && shift
    case "$level" in
        debug)
            if [[ "$gs_loader_log_level" =~ debug ]]; then
                echo -e "$( date "+${gs_loader_log_fmt}" )DEBUG: $@" 1>&2
                if [[ -f "$cmd_param_log" ]]; then
                    echo -e "$( date "+${gs_loader_log_fmt}" )INFO: $@" >> $cmd_param_log 2>&1
                fi
            fi
            ;;
        info)
            if [[ "$gs_loader_log_level" =~ debug|info ]]; then
                echo -e "$( date "+${gs_loader_log_fmt}" )INFO: $@" 1>&2
                if [[ -f "$cmd_param_log" ]]; then
                    echo -e "$( date "+${gs_loader_log_fmt}" )INFO: $@" >> $cmd_param_log 2>&1
                fi
            fi
            ;;
        warning)
            if [[ "$gs_loader_log_level" =~ debug|info|warn ]]; then
                echo -e "$( date "+${gs_loader_log_fmt}" )WARNING: $@" 1>&2
                if [[ -f "$cmd_param_log" ]]; then
                    echo -e "$( date "+${gs_loader_log_fmt}" )WARNING: $@" >> $cmd_param_log 2>&1
                fi
            fi
            ;;
        error)
            if [[ ! "$gs_loader_log_level" =~ none ]]; then
                echo -e "$( date "+${gs_loader_log_fmt}" )ERROR: $@" 1>&2
                if [[ -f "$cmd_param_log" ]]; then
                    echo -e "$( date "+${gs_loader_log_fmt}" )ERROR: $@" >> $cmd_param_log 2>&1
                fi
            fi
            ;;
    esac
    true
}

function gs_loader_check_res()
{
    msg="$1"
    res="$2"
    if [ $(echo "$res" | grep -i "ERROR:" | wc -l) -ge 1 ]; then
        load_log error "$msg"
        res=$(echo "$res"|sed 's/gsql:.gs_loader_file.tmp.*_end:/gsql:.gs_loader_file.tmp:/')
        load_log error "$res"
        exit $EXIT_CODE_FAIL
    fi
}

function gs_loader_check_create_table_res()
{
    msg="$1"
    res="$2"
    if [ $(echo "$res" | grep -i "ERROR:" | grep -v "already exists" | wc -l) -ge 1 ]; then
        load_log error "$msg"
        res=$(echo "$res"|sed 's/gsql:.gs_loader_file.tmp.*_end:/gsql:.gs_loader_file.tmp:/')
        load_log error "$res"
        exit $EXIT_CODE_FAIL
    fi
}

function gs_loader_check_res_with_clean_file()
{
    msg="$1"
    res="$2"
    if [ $(echo "$res" | grep -i "ERROR:" | wc -l) -ge 1 ]; then
        load_log error "$msg"
        res=$(echo "$res"|sed 's/gsql:.gs_loader_file.tmp.*_end:/gsql:.gs_loader_file.tmp:/')
        load_log error "$res"
        exit_with_clean_file
    fi
}
function gs_loader_check_res_with_clean_file_txid()
{
    msg="$1"
    res="$2"
    txid="$3"
    if [ $(echo "$res" | grep -i "ERROR:" | wc -l) -ge 1 ]; then
        load_log error "$msg"
        res=$(echo "$res"|sed 's/gsql:.gs_loader_file.tmp.*_end:/gsql:.gs_loader_file.tmp:/')
        load_log error "$res"
        clean_copy_table_record "$txid"
        exit_with_clean_file
    fi
}
function get_value()
{
    kv="$1"
    value=$(echo $kv | awk -F'=' '{print $2}')
    echo $value
}

function gen_load_options()
{
    datafile="$1"
    skip="$2"
    errors="$3"
    discardfile="$4"
    badfile="$5"
    rows="$6"
    binary="$7"
    compatible_illegal_chars="$8"
    parallel="$9"
    limit="${10}"

    if [ "$datafile" == "" ]; then
        load_log error "data file is empty"
        exit $EXIT_CODE_FAIL
    fi

    if [ -d "$datafile" ]; then
        load_log error "data file $datafile is a directory"
        exit $EXIT_CODE_FAIL
    fi

    if [ ! -f "$datafile" ]; then
        load_log error "data file $datafile not found"
        exit $EXIT_CODE_FAIL
    fi
    
    if [ ! -r "$datafile" ]; then
        load_log error "data file $datafile Permission denied"
        exit $EXIT_CODE_FAIL
    fi

    load_log debug "generate OPTIONS"

    options="OPTIONS("

    options="${options}data='$datafile'"

    if [ "$skip" != "" ]; then
        options="${options},skip=$skip"
    fi

    if [ "$errors" != "" ]; then
        options="${options},errors=$errors"
    fi

    if [ "$discardfile" != "" ]; then
        options="${options},discard_path='$discardfile'"
    fi

    if [ "$badfile" != "" ]; then
        options="${options},bad_path='$badfile'"
    fi

    if [ "$rows" != "" ]; then
        options="${options},rows=$rows"
    fi

    if [ "$binary" != "" ]; then
        if [[ "$binary" == "false" ]]; then 
            options="${options},binary=0"
        fi

        if [[ "$binary" == "true" ]]; then 
            options="${options},binary=1"
        fi
    fi
    
    if [ "$compatible_illegal_chars" == "true" ]; then
        options="${options},compatible_illegal_chars=1"
    fi

    if [ "$parallel" != "" ]; then
        options="${options},split=$parallel"
    fi

    if [ "$limit" != "" ]; then
        options="${options},limit=$limit"
    fi

    options="${options}) "
    echo $options
}

function parse_ctl_options()
{
    # grep parameters in options
    ctlfile="$1"
    res_skip=$(cat ${ctlfile} | grep -iE 'options *\(.*\)' | sed 's/ //g' | sed 's/[\t]//g' | \
        egrep -io '[(,]skip=[0-9]*' | grep -io 'skip=[0-9]*' | head -1 | awk -F'=' '{print $2}')
    res_rows=$(cat ${ctlfile} | grep -iE 'options *\(.*\)' | sed 's/ //g' | sed 's/[\t]//g' | \
        egrep -io '[(,]rows=[0-9]*' | grep -io 'rows=[0-9]*' | head -1 | awk -F'=' '{print $2}')

    # check parameters null
    if [[ "X$cmd_param_skip" == "X" ]] && [[ "X$res_skip" != "X" ]] ; then
        cmd_param_skip=${res_skip}
    fi
    if [[ "X$cmd_param_rows" == "X" ]] && [[ "X$res_rows" != "X" ]] ; then
        cmd_param_rows=${res_rows}
    fi

    # check parameters validity
    error_skip=""
    error_rows=""
    re='^[0-9]+$'
    if [[ "X$cmd_param_skip" != "X" ]] && ! [[ "$cmd_param_skip" =~ $re ]] ; then
        load_log error "invalid param:skip"
        error_skip=${cmd_param_skip}
    fi
    if [[ "X$cmd_param_rows" != "X" ]] && ! [[ "$cmd_param_rows" =~ $re ]] ; then
        load_log error "invalid param:rows"
        error_rows=${cmd_param_rows}
    fi

    # exit with invalide parameter
    if [[ "X$error_skip" != "X" ]] ; then
        exit $error_skip
    fi
    if [[ "X$error_rows" != "X" ]] ; then
        exit $error_rows
    fi
}

function load_data_sql()
{
    ctlfile="$1"
    res=$(cat ${ctlfile})
    # remove the options in control file
    res=$(echo -e "${res}" | sed -r 's/options *\(.*\)//ig')

    # remove the badfile in control file
    res=$(echo -e "${res}" | sed -r "s/badfile *'.*'//ig;s/badfile *\".*\"//ig")
    
    # remove the infile in control file
    res=$(echo -e "${res}" | sed -r "s/infile *'.*'//ig;s/infile *\".*\"//ig;s/infile *\* *\".*\"//ig;s/infile *\* *'.*'//ig")

    echo -e "$res"
}

function exec_sql()
{
    sql="$1"

    host=""
    if [ "$cmd_param_host" != "" ]; then
        host="-h $cmd_param_host"
    fi

    port=""
    if [ "$cmd_param_port" != "" ]; then
        port="-p $cmd_param_port"
    fi

    user=""
    if [ "$cmd_param_user" != "" ]; then
        user="-U $cmd_param_user"
    fi

    passwd=""
    if [ "$cmd_param_passwd" != "" ]; then
        passwd="-W $cmd_param_passwd"
    fi

    db=""
    if [ "$cmd_param_db" != "" ]; then
        db="-d $cmd_param_db"
    fi

    if res=$(gsql $host $port $user $passwd $db -t -c "$sql" 2>&1)
    then
        echo -e "$res" | sed -r 's/^Connect primary node.*//g' | sed -r 's/^total time.*//g'
    else
        echo "ERROR: function exec_sql"
    fi
}

function exec_sql_file()
{
    sql_file="$1"

    host=""
    if [ "$cmd_param_host" != "" ]; then
        host="-h $cmd_param_host"
    fi

    port=""
    if [ "$cmd_param_port" != "" ]; then
        port="-p $cmd_param_port"
    fi

    user=""
    if [ "$cmd_param_user" != "" ]; then
        user="-U $cmd_param_user"
    fi

    passwd=""
    if [ "$cmd_param_passwd" != "" ]; then
        passwd="-W $cmd_param_passwd"
    fi

    db=""
    if [ "$cmd_param_db" != "" ]; then
        db="-d $cmd_param_db"
    fi

    # delete last line: total time: 10ms
    if res=$(gsql $host $port $user $passwd $db -t -f "$sql_file" 2>&1)
    then
        echo -e "$res" | sed -r 's/^Connect primary node.*//g' | sed -r 's/^total time.*//g' | sed -r 's/^(BEGIN|SET|COMMIT)//g'
    else
        echo "ERROR: function exec_sql_file"
    fi
    check_res=$(echo -e \"$res\" | egrep 'open failed:|lock failed:')
    echo "$check_res" >> $cmd_param_log
}

function trans_load_to_copy()
{
    load_sql="$1"
    copy_sql=$(exec_sql_file "$load_sql")
    echo "$copy_sql"
}

function copy_into_table()
{
    sql_file="$1"
    res=$(exec_sql_file "$sql_file")
    echo "$res"
}

function check_legal_path()
{
    file_path="$1"
    if touch $file_path 1>/dev/null 2>&1
    then
        echo "0"
        chmod 600 $file_path
    else 
        echo "1"
    fi
}

function init_logfile()
{
    logfile="$1"

    if [ "$logfile" == "" ]; then
        logfile=$(echo "${cmd_param_ctl%.*}")
        logfile="${logfile}.log"
        cmd_param_log=$logfile
    fi
    res=$(check_legal_path $logfile)
    if [ "$res" == "0" ]; then
        echo $gs_loader_version > $logfile
    else
        load_log warning "logfile file path $logfile is invalid or don't have permission"
        exit $EXIT_CODE_FAIL
    fi
}

function init_badfiles()
{
    badfile="$1"

    i=0

    # no bad file, generate bad filename according to data file
    if [ "$badfile" == "" ]; then
        for data in "${loader_datafiles[@]}"
        do
            bad=$(echo "${data%.*}")
            bad="${bad}.bad"
            loader_badfiles[$i]=$bad
            i=$(($i+1))
        done
        return 0
    fi

    # bad file is a directory, generate bad filename according to data file
    if [ -d "$badfile" ]; then
        for data in "${loader_datafiles[@]}"
        do
            data=$(basename $data)
            bad=$(echo "${data%.*}")
            bad="$badfile/${bad}.bad"
            loader_badfiles[$i]=$bad
            i=$(($i+1))
        done
        return 0
    fi

    # user specified bad file
    if [ "$badfile" != "" ]; then
        # check path is valid
        res=$(check_legal_path "$badfile")
        if [ "$res" != "0" ]; then
            load_log warning "bad file path $badfile is invalid"
            return 0
        fi
        rm -f $badfile
        # Write the name to the loader_datafiles array
        for data in "${loader_datafiles[@]}"
        do
            loader_badfiles[$i]=$badfile
            i=$(($i+1))
        done
        return 0
    fi
}

function init_discardfiles()
{
    discardfile="$1"

    i=0

    if [ "$discardfile" == "" ]; then
        load_log info "discard file is empty"
        return 0
    fi

    if [ -d "$discardfile" ]; then
        i=0
        for data in "${loader_datafiles[@]}"
        do
            data=$(basename $data)
            discard=$(echo "${data%.*}")
            discard="$discardfile/${discard}.dsc"
            loader_discardfiles[$i]=$discard
            i=$(($i+1))
        done
        return 0
    fi

    i=0
    for data in "${loader_datafiles[@]}"
    do
        loader_discardfiles[$i]=$discardfile
        i=$(($i+1))
    done
}

function get_txids_array()
{
    txids_array="("
    for ((i=0; i<loader_txids_count; i++)); do
        txids_array="${txids_array}${loader_txids[$i]}"

        last_idx=$(($loader_txids_count-1))
        if [ "$i" != "$last_idx" ]; then
            txids_array="${txids_array},"
        fi
    done
    txids_array="${txids_array})"
    echo $txids_array
}

function loader_stat_summary()
{
    logfile="$1"

    txids_array=$(get_txids_array)

    echo "" >> $logfile
    echo "Table $gs_loader_table_name:" >> $logfile

    if [ "X$txids_array" != "X()" ]; then
        sql="select pg_catalog.sum(loadrows)||' Rows successfully loaded.'  from ${copy_tables_namespace}.gs_copy_summary where id in $txids_array"
        res=$(exec_sql "$sql")
        gs_loader_check_res "query ${copy_tables_namespace}.gs_copy_summary failed: $sql" "$res"
    else
        res="0 Rows successfully loaded."
    fi
    echo " "$res >> $logfile

    if [ "X$txids_array" != "X()" ]; then
        sql="select pg_catalog.sum(errorrows)||' Rows not loaded due to data errors.'  from ${copy_tables_namespace}.gs_copy_summary where id in $txids_array"
        res=$(exec_sql "$sql")
        gs_loader_check_res "query ${copy_tables_namespace}.gs_copy_summary failed: $sql" "$res"
    else
        res="0 Rows not loaded due to data errors."
    fi
    echo " "$res >> $logfile

    if [ "X$txids_array" != "X()" ]; then
        sql="select pg_catalog.sum(whenrows)||' Rows not loaded because all WHEN clauses were failed.'  from ${copy_tables_namespace}.gs_copy_summary where id in $txids_array"
        res=$(exec_sql "$sql")
        gs_loader_check_res "query ${copy_tables_namespace}.gs_copy_summary failed: $sql" "$res"
    else
        res="0 Rows not loaded because all WHEN clauses were failed."
    fi
    echo " "$res >> $logfile

    if [ "X$txids_array" != "X()" ]; then
        sql="select pg_catalog.sum(allnullrows)||' Rows not loaded because all fields were null.'  from ${copy_tables_namespace}.gs_copy_summary where id in $txids_array"
        res=$(exec_sql "$sql")
        gs_loader_check_res "query ${copy_tables_namespace}.gs_copy_summary failed: $sql" "$res"
    else
        res="0 Rows not loaded because all fields were null."
    fi
    echo " "$res >> $logfile

    echo "" >> $logfile

    if [ "X$txids_array" != "X()" ]; then
        sql="select 'Total logical records skipped:    ' || pg_catalog.sum(skiprows)+$parallel_loader_skip from ${copy_tables_namespace}.gs_copy_summary where id in $txids_array"
        res=$(exec_sql "$sql")
        gs_loader_check_res "query ${copy_tables_namespace}.gs_copy_summary failed: $sql" "$res"
    else
        res="Total logical records skipped:    $parallel_loader_skip"
    fi
    echo " "$res >> $logfile

    echo "" >> $logfile

    if [[ "$gs_loader_log_level" =~ info|debug ]]; then
        printf '\nLOG:\n%b\n\n' "$(cat $logfile)"
    fi
}

function gen_badfile()
{
    badfile="$1"
    txid="$2"
    
    if [ "$badfile" == "" ]; then
        load_log info "bad file name is empty"
        return 0
    fi
    condition=$(exec_sql "select ' relname = ''' || relname || ''' and begintime = '''|| begintime || '''' from ${copy_tables_namespace}.gs_copy_summary where id=$txid")
    if [ "X$condition" == "X" ]; then
        load_log error "An error occurred. Please check logfile."
        exit_with_clean_file
    fi
    sql="select rawrecord from ${copy_tables_namespace}.pgxc_copy_error_log where detail not like 'COPY_WHEN_ROWS' and $condition"
    res=$(exec_sql "$sql")
    gs_loader_check_res_with_clean_file_txid "query ${copy_tables_namespace}.pgxc_copy_error_log failed: $sql" "$res" "$txid"
    if [ "$res" == "" ]; then
        load_log info "$badfile is not generated due to 0 errors."
        return 0
    fi
    printf '%s\n' "$res" > $badfile
    if [[ "$gs_loader_log_level" =~ info|debug ]]; then
        printf 'BAD:\n%b\n\n' "$(cat $badfile)"
    fi
}

function enable_error_log()
{
    tmp=$(cat ${cmd_param_guc} | grep "enable_copy_error_log" | sed -n -e 's/.*=\(on\|off\);/\1/p')

    if [ "$tmp" == "" ]; then
        sql="show enable_copy_error_log;"
        svr=$(exec_sql "$sql")
        echo $svr | tr -d '\r'
    else
        echo $tmp | tr -d '\r'
    fi
}

function gen_loader_support_nul_guc()
{
    if [[ "$cmd_param_compatible_nul" == "false" ]]; then
        echo "set loader_support_nul_character='s1';"
    fi

    if [[ "$cmd_param_compatible_nul" == "true" ]]; then
        echo "set loader_support_nul_character='s2';"
    fi
}

function gen_full_load_sql()
{
    rnd_suffix=$(dd if=/dev/urandom bs=128 count=1 2>/dev/null | tr -dc 'a-zA-Z0-9')
    load_sql_tmp_file=".gs_loader_file.tmp.${rnd_suffix}_guc"

    echo "BEGIN;" > $load_sql_tmp_file
    cat $2 >> $load_sql_tmp_file
    cat $1 >> $load_sql_tmp_file
    echo ";" >> $load_sql_tmp_file
    echo "COMMIT;" >> $load_sql_tmp_file
    chmod 600 $load_sql_tmp_file
    mv $load_sql_tmp_file $1
}

function gen_full_copy_sql()
{
    copy_sql="$1"
    guc_sql="$2"

    table_act=$(echo "$copy_sql" | sed -r "s/ (SELECT 'has_data_in_table' FROM .*? LIMIT 1;) (.*)/\1/")
    pat="SELECT 'has_data_in_table' FROM .*? LIMIT 1;"
    if [[ "$table_act" =~ $pat ]]; then
        if [ "$loader_datafile_index" == 0 ]; then
            res=$(exec_sql "$table_act")
            gs_loader_check_res_with_clean_file "check table empty using: $table_act" "$res"
            load_log debug "$res"
            has_data_in_table=$(echo $res | grep 'has_data_in_table' | wc -l)
            if [ "$has_data_in_table" == "1" ]; then
                load_log error "insert into table, but table is not empty"
                exit_with_clean_file
            fi
        fi
        copy_sql=$(echo "$copy_sql" | sed -r "s/ (SELECT 'has_data_in_table' FROM .*? LIMIT 1;) (.*)/\2/")
    else
        table_act=$(echo "$copy_sql" | sed -r 's/ (TRUNCATE TABLE .*? ;) .*/\1/')
        pat="TRUNCATE TABLE .*? ;"
        if [[ "$table_act" =~ $pat ]]; then
            if [ "$loader_datafile_index" == 0 ]; then
                res=$(exec_sql "$table_act")
                gs_loader_check_res_with_clean_file "truncate table using: $table_act" "$res"
            fi
            copy_sql=$(echo "$copy_sql" | sed -r 's/ (TRUNCATE TABLE .*? ;) (.*)/\2/')
        fi
    fi

    if [[ "X$cmd_param_parallel" != "X" ]] && [[ $(expr $cmd_param_parallel + 0) -gt 1 ]] && [[ "$cmd_param_binary" != "true" ]] ; then
        echo "$guc_sql"
        echo "$(gen_loader_support_nul_guc)"
        echo "$copy_sql"
    elif [[ "X$cmd_param_rows" == "X" ]] ; then
        echo "BEGIN;"
        echo "$guc_sql"
        echo "$(gen_loader_support_nul_guc)"
        echo "$copy_sql"
        echo "select 'copy_txid:'||pg_catalog.txid_current();"
        echo "COMMIT;"
    else
        echo "$guc_sql"
        echo "$(gen_loader_support_nul_guc)"
        echo "$copy_sql"
        echo "select 'copy_txid:'||pg_catalog.current_setting('copy.id');"
    fi
}

function get_gs_loader_version()
{
    echo $gs_loader_version
}

function gs_loader_help()
{
    echo "$(get_gs_loader_version)

Usage: gs_loader key=value [key=value ...]

General options:
      help -- print this
      host -- database server host
        -h -- database server host
      port -- database server port
        -p -- database server port
      user -- database user name
        -U -- database user name
    passwd -- the password of specified database user
        -W -- the password of specified database user
        db -- database name to connect to
        -d -- database name to connect to

    create -- create error and summary tables(default:true)
     clean -- clean error and summary tables(default:true)

   control -- the name of control file
      data -- the name of data file
       log -- the name of log file
       bad -- the name of bad file
   discard -- the name of discard file
 guc_param -- the name of guc_parameter file
      skip -- skip line numbers
    errors -- allow errors numbers
      rows -- number of lines per commit
  bindsize -- not support, only compatible
compatible_nul -- compatible for nul character in datafile(default:true)  
"
}

declare -A unique_params

function check_param_exists()
{
    param_name="$1"
    if [[ ! -z "${unique_params[$param_name]:-}" ]]; then
        load_log error "parameter '$param_name' can only be specified once"
        exit $EXIT_CODE_FAIL
    fi
    unique_params[$param_name]="true"
}

function parse_cmd_params()
{
    while [[ $# -gt 0 ]]; do
        key="$1"
        case "$key" in
            host=*)
                check_param_exists "host"
                cmd_param_host=$(get_value $1)
                shift # past value
                ;;
            -h)
                check_param_exists "host"
                shift 
                cmd_param_host=$1
                shift 
                ;;
            port=*)
                check_param_exists "port"
                cmd_param_port=$(get_value $1)
                shift # past value
                ;;
            -p)
                check_param_exists "port"
                shift 
                cmd_param_port=$1
                shift 
                ;;
            user=*)
                check_param_exists "user"
                cmd_param_user=$(get_value $1)
                shift # past value
                ;;
            -U)
                check_param_exists "user"
                shift 
                cmd_param_user=$1
                shift 
                ;;
            db=*)
                check_param_exists "db"
                cmd_param_db=$(get_value $1)
                shift # past value
                ;;
            -d)
                check_param_exists "db"
                shift 
                cmd_param_db=$1
                shift 
                ;;
            create=*)
                check_param_exists "create"
                cmd_param_create=$(get_value $1)
                shift;
                ;;
            clean=*)
                check_param_exists "clean"
                cmd_param_clean=$(get_value $1)
                shift;
                ;;
            control=*)
                check_param_exists "control"
                cmd_param_ctl=$(get_value $1)
                shift # past value
                ;;
            guc_param=*)
                check_param_exists "guc_param"
                cmd_param_guc=$(get_value $1)
                shift # past value
                ;;
            log=*)
                check_param_exists "log"
                cmd_param_log=$(get_value $1)
                shift # past value
                ;;
            data=*)
                cmd_param_data=$(get_value $1)
                append_one_data "$cmd_param_data"
                shift # past value
                ;;
            bad=*)
                check_param_exists "bad"
                cmd_param_bad=$(get_value $1)
                shift # past value
                ;;
            discard=*)
                check_param_exists "discard"
                cmd_param_discard=$(get_value $1)
                shift # past value
                ;;
            skip=*)
                check_param_exists "skip"
                cmd_param_skip=$(get_value $1)

                # check skip is digit number
                re='^[0-9]+$'
                if ! [[ "$cmd_param_skip" =~ $re ]] ; then
                    load_log error "invalid param:$1"
                    exit $cmd_param_skip
                fi

                shift # past argument
                ;;
            errors=*)
                check_param_exists "errors"
                cmd_param_errors=$(get_value $1)

                # check errors is digit number
                re='^[0-9]+$'
                if ! [[ "$cmd_param_errors" =~ $re ]] ; then
                    load_log error "invalid param:$1"
                    exit $EXIT_CODE_FAIL
                fi

                shift # past argument
                ;;
            bindsize=*)
                check_param_exists "bindsize"
                shift # past argument
                ;;
            rows=*)
                check_param_exists "rows"
                cmd_param_rows=$(get_value $1)

                # check rows is digit number
                re='^[0-9]+$'
                if ! [[ "$cmd_param_rows" =~ $re ]] ; then
                    load_log error "invalid param:$1"
                    exit $cmd_param_rows
                fi

                shift # past argument
                ;;
            compatible_nul=*)
                check_param_exists "compatible_nul"
                cmd_param_compatible_nul=$(get_value $1)
                shift # past argument
                ;;
            binary=*)
                check_param_exists "binary"
                cmd_param_binary=$(get_value $1)
                shift # past argument
                ;;
            compatible_illegal_chars=*)
                check_param_exists "compatible_illegal_chars"
                cmd_param_compatible_illegal_chars=$(get_value $1)
                shift # past argument
                ;;
            parallel=*)
                check_param_exists "parallel"
                cmd_param_parallel=$(get_value $1)

                # check errors is digit number
                re='^[0-9]+$'
                if ! [[ "$cmd_param_parallel" =~ $re ]] ; then
                    load_log error "invalid param:$1"
                    exit $EXIT_CODE_FAIL
                fi

                shift # past argument
                ;;
            limit=*)
                check_param_exists "limit"
                cmd_param_limit=$(get_value $1)

                # check limit is digit number
                re='^[0-9]+$'
                if ! [[ "$cmd_param_limit" =~ $re ]] ; then
                    load_log error "invalid param:$1"
                    exit $EXIT_CODE_FAIL
                fi

                shift # past argument
                ;;
            help)
                gs_loader_help
                exit $EXIT_CODE_OK
                ;;
            version)
                get_gs_loader_version
                exit $EXIT_CODE_OK
                ;;
            *)    # unknown option
                load_log error "unknown param:$1"
                exit $EXIT_CODE_FAIL
                ;;
        esac
    done
    read -r cmd_param_passwd
}

function enable_separation_of_duty()
{
    sql="show enableSeparationOfDuty;"
    svr=$(exec_sql "$sql")
    echo $svr
}

function set_namespace()
{
    if [ "X$(enable_separation_of_duty)" == "Xon" ]; then
        sql="select CURRENT_USER;"
        copy_tables_namespace=$(exec_sql "$sql")
    fi
}

function clean_copy_table_record() {
    txid="$1"
    condition=$(exec_sql "select ' relname = ''' || relname || ''' and begintime = '''|| begintime || '''' from ${copy_tables_namespace}.gs_copy_summary where id=$txid");
    
    if [ "$cmd_param_clean" == "true" ]; then
        condition=$(exec_sql "select ' relname = ''' || relname || ''' and begintime = '''|| begintime || '''' from ${copy_tables_namespace}.gs_copy_summary where id=$txid")
        res=$(exec_sql "delete from ${copy_tables_namespace}.pgxc_copy_error_log where $condition")
        res=$(exec_sql "delete from ${copy_tables_namespace}.gs_copy_summary where id = $txid")
    fi
}

function clean_copy_tables()
{
    for ((i=0; i<loader_txids_count; i++)); do
        txid=${loader_txids[$i]}
        clean_copy_table_record "$txid"
    done
}

function exit_with_clean_file() {
    rm -f ${gs_loader_file_tmp}
    rm -f "${badfile}_commit.log"
    exit $EXIT_CODE_FAIL
}

function clean_and_get_exit_code()
{
    rm -f ${gs_loader_file_tmp}
    rm -f "${badfile}_commit.log"

    txids_array=$(get_txids_array)
    if [ "X$txids_array" != "X()" ]; then
        sql="select 'not_load_lines:'||pg_catalog.sum(errorrows)+pg_catalog.sum(whenrows)+pg_catalog.sum(allnullrows) from ${copy_tables_namespace}.gs_copy_summary where id in $txids_array"
        res=$(exec_sql "$sql")
        gs_loader_check_res "query ${copy_tables_namespace}.gs_copy_summary failed: $sql" "$res"
    else
        res="not_load_lines:0"
    fi
    not_load_lines=$(echo $res | grep 'not_load_lines:' | sed 's/[^0-9]*//g')
    load_log info "not loaded lines:"$not_load_lines

    clean_copy_tables

    if [ "$not_load_lines" == "0" ]; then
        return $EXIT_CODE_OK
    fi

    return $EXIT_CODE_WARN
}

function check_command_exist()
{
    res=$(which $1)
    if [ -z "$res" ]; then
        load_log error "can not find command: $1"
        exit $EXIT_CODE_FAIL
    fi
}

function check_parameters()
{
    check_command_exist "gsql"
    # check control file
    ctlfile="$cmd_param_ctl"
    if [ "$ctlfile" == "" ]; then
        echo "ERROR: control file is empty"
        exit $EXIT_CODE_FAIL
    fi

    if [ -d "$ctlfile" ]; then
        echo "ERROR: control file $ctlfile is a directory"
        exit $EXIT_CODE_FAIL
    fi

    if [ ! -f "$ctlfile" ]; then
        echo "ERROR: control file $ctlfile not found or don't have permission"
        exit $EXIT_CODE_FAIL
    fi
    
    if [ ! -r "$ctlfile" ]; then
        echo "ERROR: control file $ctlfile Permission denied"
        exit $EXIT_CODE_FAIL
    fi

    # check data file
    if [ "$loader_datafile_count" == 0 ]; then
        echo "ERROR: data file is empty, or don't have permission"
        exit $EXIT_CODE_FAIL
    fi

    if [[ "$cmd_param_create" != "true" ]] && [[ "$cmd_param_create" != "false" ]]; then
        echo "ERROR: 'create' parameter should be true/false"
        exit $EXIT_CODE_FAIL
    fi
    if [[ "$cmd_param_clean" != "true" ]] && [[ "$cmd_param_clean" != "false" ]]; then
        echo "ERROR: 'clean' parameter should be true/false"
        exit $EXIT_CODE_FAIL
    fi
    if [[ "$cmd_param_compatible_nul" != "true" ]] && [[ "$cmd_param_compatible_nul" != "false" ]]; then
        echo "ERROR: 'compatible_nul' parameter should be true/false"
        exit $EXIT_CODE_FAIL
    fi
    if [[ "$cmd_param_binary" != "true" ]] && [[ "$cmd_param_binary" != "false" ]]; then
        echo "ERROR: 'binary' parameter should be true/false"
        exit $EXIT_CODE_FAIL
    fi

    if [[ "$cmd_param_compatible_illegal_chars" != "true" ]] && [[ "$cmd_param_compatible_illegal_chars" != "false" ]]; then
        echo "ERROR: 'compatible_illegal_chars' parameter should be true/false"
        exit $EXIT_CODE_FAIL
    fi
    if [[ "$cmd_param_compatible_illegal_chars" == "true" ]] && [[ "$cmd_param_compatible_nul" == "false" ]]; then
        echo "ERROR: 'compatible_illegal_chars' parameter cannot be true when compatible_nul is false."
        exit $EXIT_CODE_FAIL
    fi
    if [[ "$cmd_param_compatible_illegal_chars" == "true" ]] && [[ "$cmd_param_binary" == "true" ]]; then
        echo "ERROR: 'compatible_illegal_chars' parameter cannot be uesd with binary format data file."
        exit $EXIT_CODE_FAIL
    fi

    # check guc_parameter file
    guc_param_file="$cmd_param_guc"

    if [ -d "$guc_param_file" ]; then
        echo "ERROR: guc parameter file $guc_param_file is a directory"
        exit $EXIT_CODE_FAIL
    fi

    if [ -f "$guc_param_file" ]; then
        if [ ! -r "$guc_param_file" ]; then
            echo "ERROR: guc parameter file $guc_param_file Permission denied"
            exit $EXIT_CODE_FAIL
        fi
    fi
}

function pre_process_load_file()
{
    load_file="$1"

    # replace WHEN (1:2) -> WHEN (1-2)
    sed -i -r 's/WHEN[[:space:]]*(\([[:space:]]*[[:digit:]]+[[:space:]]*):([[:space:]]*[[:digit:]]+[[:space:]]*\))/WHEN \1-\2/ig' $load_file

    # replace AND (1:2) -> AND (1-2)
    sed -i -r 's/AND[[:space:]]*(\([[:space:]]*[[:digit:]]+[[:space:]]*):([[:space:]]*[[:digit:]]+[[:space:]]*\))/AND \1-\2/ig' $load_file
    
    # replace POSITION (1:2) -> POSITION (1-2)
    sed -i -r 's/POSITION[[:space:]]*(\([[:space:]]*[[:digit:]]+[[:space:]]*):([[:space:]]*[[:digit:]]+[[:space:]]*\))/POSITION \1-\2/ig' $load_file
    
    # replace sequence (MAX, 1) -> sequence (MAXVALUE, 1)
    sed -i -r 's/SEQUENCE[[:space:]]*\([[:space:]]*MAX[[:space:]]*,([[:space:]]*[[:digit:]]+[[:space:]]*\))/SEQUENCE (MAXVALUE,\1/ig' $load_file
    
    # replace sequence (COUNT, 1) -> sequence (ROWS, 1)
    sed -i -r 's/SEQUENCE[[:space:]]*\([[:space:]]*COUNT[[:space:]]*,([[:space:]]*[[:digit:]]+[[:space:]]*\))/SEQUENCE (ROWS,\1/ig' $load_file

    # replace constant "" -> constant ''
    sed -i -r 's/CONSTANT[[:space:]]*""/CONSTANT '\'''\''/ig' $load_file
}

function check_db_conn()
{
    sql="select 'GS_LOADER_CONNECT_OK'"
    res=$(exec_sql "$sql")
    # gs_loader_check_res "check db connection using: $sql" "$res"

    load_log debug "$res"

    conn_ok=$(echo $res | grep 'GS_LOADER_CONNECT_OK' | wc -l)
    if [ "$conn_ok" != "1" ]; then
        load_log error "check db connection failed"
        exit $EXIT_CODE_FAIL
    fi
}

function check_db_is_not_m()
{
    load_log debug "check database $cmd_param_db is M format."

    sql="select count(*) from pg_database where datname='$cmd_param_db' and datcompatibility='M';"
    res=$(exec_sql "$sql")

    if [ $res -ne 0 ]; then
        load_log error "gs_loader is not supported in M format."
        exit $EXIT_CODE_FAIL
    fi
}

function append_one_data()
{
    datafile=$1

    for file in $(ls $datafile)
    do
        loader_datafiles[$loader_datafile_count]=$file
        loader_datafile_count=$(($loader_datafile_count+1))
    done
}

function append_one_txid()
{
    txid=$1
    loader_txids[$loader_txids_count]=$txid
    loader_txids_count=$(($loader_txids_count+1))
}

function recalcuate_errors()
{
    txid=$1

    if [ "$cmd_param_errors" == "" ]; then
        return 0
    fi

    sql="select 'errorrows:' || errorrows from ${copy_tables_namespace}.gs_copy_summary where id = $txid"
    res=$(exec_sql "$sql")
    gs_loader_check_res_with_clean_file_txid "query ${copy_tables_namespace}.gs_copy_summary failed: $sql" "$res" "$txid"
    errorrows=$(echo $res | grep 'errorrows:' | sed -r 's/.*errorrows:([0-9]+).*/\1/')
    cmd_param_errors=$((cmd_param_errors-errorrows))
}

function recalcuate_skip()
{
    txid=$1

    if [ "$cmd_param_skip" == "" ]; then
        return 0
    fi

    sql="select 'skiprows:' || skiprows from ${copy_tables_namespace}.gs_copy_summary where id = $txid"
    res=$(exec_sql "$sql")
    gs_loader_check_res_with_clean_file_txid "query ${copy_tables_namespace}.gs_copy_summary failed: $sql" "$res" "$txid"
    skiprows=$(echo $res | grep 'skiprows:' | sed -r 's/.*skiprows:([0-9]+).*/\1/')
    cmd_param_skip=$((cmd_param_skip-skiprows))
}

# load on datafile into table
function load_one_datafile()
{
    datafile="$1"
    badfile="$2"
    discardfile="$3"

    # generate LOAD DATA SQL
    echo "" > ${gs_loader_file_tmp}
    chmod 600 ${gs_loader_file_tmp}
    parse_ctl_options "$cmd_param_ctl"
    gen_load_options "$datafile" "$cmd_param_skip" "$cmd_param_errors" "$discardfile" "$badfile" "$cmd_param_rows" "$cmd_param_binary" "$cmd_param_compatible_illegal_chars" "$cmd_param_parallel" "$cmd_param_limit" >> ${gs_loader_file_tmp}
    load_data_sql "$cmd_param_ctl" >> ${gs_loader_file_tmp}
    pre_process_load_file "$gs_loader_file_tmp"
    gen_full_load_sql "$gs_loader_file_tmp" "$cmd_param_guc"

    # transform to \COPY SQL
    copy_sql=$(trans_load_to_copy ${gs_loader_file_tmp})

    load_log info "copy sql: $copy_sql"
    gs_loader_check_res_with_clean_file "transform load to copy failed: $(cat ${gs_loader_file_tmp})" "$copy_sql"

    guc_sql=$(cat ${cmd_param_guc})
    gen_full_copy_sql "$copy_sql" "$guc_sql" > ${gs_loader_file_tmp}

    # execute \COPY SQL
    copy_res=$(copy_into_table ${gs_loader_file_tmp})
    load_log info "copy result: $copy_res"
    gs_loader_check_res_with_clean_file "after transform: $(cat ${gs_loader_file_tmp})" "$copy_res"

    # get txid
    txids=$(echo $copy_res | grep 'copy_txid:' | sed -r 's/.*copy_txid:(((skip_)?[0-9,])*).*/\1/' | awk -F',' '{for(i=1;i<=NF;i++)print $i}')
    load_log info "txid is "$txids
    if [ "$txids" == "" ]; then
        load_log error "cannot get copy txid"
        exit_with_clean_file
    fi
    for txid in $txids; do
        if [[ "$txid" == "skip_"* ]]; then
            skiprows=$(echo $txid | sed -r 's/skip_([0-9]+)/\1/')
            parallel_loader_skip=$((parallel_loader_skip+skiprows))
            cmd_param_skip=$((cmd_param_skip-skiprows))
            continue
        fi

        append_one_txid $txid
        recalcuate_errors $txid
        recalcuate_skip $txid

        if [ "$(enable_error_log)" == "on" ]; then
            gen_badfile "$badfile" $txid
        fi
    done
}

function loader_stat_files()
{
    logfile="$1"
    echo ""                                                      >> $logfile
    echo "Control File:    $cmd_param_ctl"                       >> $logfile
    echo ""                                                      >> $logfile

    echo "There are $loader_datafile_count data files:"          >> $logfile
    for ((i=0; i<loader_datafile_count; i++)); do
        echo " Data File:     "${loader_datafiles[$i]}           >> $logfile
        echo " Bad File:      "${loader_badfiles[$i]}            >> $logfile
        echo " Discard File:  "${loader_discardfiles[$i]}        >> $logfile
        echo ""                                                  >> $logfile
    done
}

function loader_stat_info()
{
    start_time="$1"
    end_time="$2"

    loader_stat_files "$cmd_param_log"
    loader_stat_summary "$cmd_param_log"

    if [[ "XX$cmd_param_rows" != "XX" ]] && [[ "$cmd_param_binary" == "false" ]]; then
        cat "${badfile}_commit.log"
    fi
    echo -e "$gs_loader_version\n"
    succ_rows_info=$(grep -e "Rows successfully loaded" $cmd_param_log | xargs)
    echo " "$succ_rows_info" "
    echo -e ""
    echo -e "log file is: \n $cmd_param_log"

    echo "Run began on $start_time" >>  $cmd_param_log
    echo "Run ended on $end_time" >>  $cmd_param_log

    start_time=$(date -u -d "$start_time" +"%s.%N")
    end_time=$(date -u -d "$end_time" +"%s.%N")

    echo -e "" >> $cmd_param_log

    elapsed_time=$(date -u -d "0 $end_time sec - $start_time sec" +"%H:%M:%S.%3N")
    echo "Elapsed time was:     $elapsed_time" >>  $cmd_param_log
}

function main()
{
    start_time=$(date "+%Y-%m-%d %H:%M:%S.%3N")

    parse_cmd_params "$@"
    check_parameters
    set_namespace

    init_logfile "$cmd_param_log"
    init_badfiles "$cmd_param_bad"
    init_discardfiles "$cmd_param_discard"

    check_db_conn
    check_db_is_not_m

    load_log info "load data begin..."

    for ((i=0; i<loader_datafile_count; i++)); do
        load_log info "processing data: ${loader_datafiles[$i]} bad: ${loader_badfiles[$i]} discard: ${loader_discardfiles[$i]}"
        load_log info "options: skip:${cmd_param_skip} errors:${cmd_param_errors}"
        loader_datafile_index=$i
        load_one_datafile "${loader_datafiles[$i]}" "${loader_badfiles[$i]}" "${loader_discardfiles[$i]}"
    done

    gs_loader_table_name=$(cat ${gs_loader_file_tmp}|grep "\COPY" | awk '{print $2}')

    end_time=$(date "+%Y-%m-%d %H:%M:%S.%3N")

    loader_stat_info "$start_time" "$end_time"

    clean_and_get_exit_code
    exit_code=$?

    load_log info "load data end."

    exit $exit_code
}

main "$@"
