import csv
from pprint import pprint

#open files and use dictreader to cache information

#select only customers born in USA
def read_filter_by_country(country, file_path):
    with open(file_path, mode='r', encoding='utf-8-sig') as file:
        csv_customer = csv.DictReader(file, delimiter=";")

        customers_gender = dict()
        for line in csv_customer:
            if line["country"] == country:
                customer_id = int(line["customer_id"])
                customers_gender[customer_id] = line["gender"]

    return customers_gender


#groupby and sum
def group_and_sum(customers_gender, file_path):
    file = open(file_path, mode='r', encoding='utf-8-sig')
    csv_sales = csv.DictReader(file, delimiter=";")

    store_gender_info = dict()
    for line in csv_sales:
        customer_id = int(line["customer_id"])
        if customer_id in customers_gender:
            store_id = int(line["store_id"])
            if store_id not in store_gender_info:
                store_gender_info[store_id] = dict()

            gender_info = store_gender_info[store_id]
            gender = customers_gender[customer_id]
            if gender not in gender_info:
                gender_info[gender] = {"total_sales": 0, "distinct_months": set()}

            month_of_year = int(line["month_of_year"])
            gender_info[gender]["total_sales"] += float(line["total_sales"])
            gender_info[gender]["distinct_months"].add(month_of_year)
    file.close()

    return store_gender_info


#caclulating yearly average
# store_gender_info={1: {'M': {"total_sales": 10.5, "distinct_months": set(1)}}}
def compute_yearly_average(store_gender_info):
    for store in store_gender_info:
        for gender in store_gender_info[store]:
            info = store_gender_info[store][gender]
            info["yearly_average"] = info["total_sales"] / len(info["distinct_months"])
            keys = ["yearly_average"]
            new_dict = {key: info[key] for key in keys}
            store_gender_info[store][gender] = new_dict

    return store_gender_info

if __name__ == '__main__':
    customers_usa_gender = read_filter_by_country('USA', "customer_info.csv")
    store_gender_info_france = group_and_sum(customers_usa_gender, "sales_by_customer_month_store.csv")
    pprint(compute_yearly_average(store_gender_info_france))
