# Beginning of app.py
# Authors: BMVP, Samuel Beck

from flask import Flask, render_template, request
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression

#pip install flask
app = Flask(__name__)


@app.route('/run-analysis', methods=['GET', 'POST'])
def run_analysis():

    #load dataset
    data = pd.read_csv('US_youtube_trending_data_popularity_cleaned.csv')

    #define popularity top 80% of views
    threshold = data['5-7 days'].quantile(0.8)
    #we will be looking at how it holds by the end of the week (5-7 days out)
    data['pop_5_7'] = np.where(data['5-7 days'] >= threshold, 1, 0)

    #remove missing vals
    data = data.dropna()


    #full model
    features = [
        'likes',
        'comment_count',
        'dislikes',
        'title_length',
        'trending_weekday',
        'trending_month',
        'desc_length',
        'num_tags',
        'categoryId'
    ]

    X = data[features]
    y = data['pop_5_7']

    #https://www.askpython.com/python/examples/split-data-training-and-testing-set
    # shuffle dataset
    data = data.sample(frac=1, random_state=1234).reset_index(drop=True)

    # split index
    split_index = int(0.8 * len(data))

    # train/test split
    train = data[:split_index]
    test = data[split_index:]

    X_train = train[features]
    y_train = train['pop_5_7']

    X_test = test[features]
    y_test = test['pop_5_7']

    # train model
    model = RandomForestClassifier(n_estimators=500, max_features=2, random_state=1234)
    model.fit(X_train, y_train)

    #form info
    if request.method == 'POST':

        if request.form['likes']:

            # get form info
            likes = float(request.form['likes'])
            comment_count = float(request.form['comment_count'])
            dislikes = float(request.form['dislikes'])
            title_length = float(request.form['title_length'])
            trending_weekday = int(request.form['trending_weekday'])
            trending_month = int(request.form['trending_month'])
            desc_length = float(request.form['desc_length'])
            num_tags = float(request.form['num_tags'])
            categoryId = int(request.form['categoryId'])

            new_data = pd.DataFrame([{
            'likes': likes,
            'comment_count': comment_count,
            'dislikes': dislikes,
            'title_length': title_length,
            'trending_weekday': trending_weekday,
            'trending_month': trending_month,
            'desc_length': desc_length,
            'num_tags': num_tags,
            'categoryId': categoryId
            }])

            new_data = new_data[features]

            # predict
            prob = model.predict_proba(new_data)[0][1]
            
            if prob >= 0.7:
                result = "Video will most likely become POPULAR"
            #need to use elif instead of else if
            elif prob >= 0.5:
                result = "Video may become POPULAR"
            else:
                result = "Video will NOT become popular"

            return render_template(
                'results.html',
                result=result,
                probability=round(prob, 3)
            )

        else:
            return "Please fill in required fields"

    popular = data[data["pop_5_7"] == 1]

    weekdays = {
    1: "Monday",
    2: "Tuesday",
    3: "Wednesday",
    4: "Thursday",
    5: "Friday",
    6: "Saturday",
    7: "Sunday"
    }

    months = {
    1: "January",
    2: "February",
    3: "March",
    4: "April",
    5: "May",
    6: "June",
    7: "July",
    8: "August",
    9: "September",
    10: "October",
    11: "November",
    12: "December"
    }

    categories = {
    1: "Film & Animation",
    2: "Autos & Vehicles",
    10: "Music",
    15: "Pets & Animals",
    17: "Sports",
    19: "Travel & Events",
    20: "Gaming",
    22: "People & Blogs",
    23: "Comedy",
    24: "Entertainment",
    25: "News & Politics",
    26: "Howto & Style",
    27: "Education",
    28: "Science & Technology"
    }

    recommended = {
    "likes": int(popular["likes"].median()),
    "comment_count": int(popular["comment_count"].median()),
    "dislikes": int(popular["dislikes"].median()),
    "title_length": int(popular["title_length"].median()),
    "trending_weekday": weekdays[int(popular["trending_weekday"].mode()[0])],
    "trending_month": months[int(popular["trending_month"].mode()[0])],
    "desc_length": int(popular["desc_length"].median()),
    "num_tags": int(popular["num_tags"].median()),
    "categoryId": categories[int(popular["categoryId"].mode()[0])]
    }

    # GET request
    return render_template('run-analysis.html', recommended=recommended)

# homepage
@app.route('/')
def home():
    return render_template('run-analysis.html')


if __name__ == '__main__':
    app.run(host="0.0.0.0", debug=True, port=3924)