-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathapp.py
More file actions
190 lines (142 loc) · 7.87 KB
/
app.py
File metadata and controls
190 lines (142 loc) · 7.87 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
import json
import gradio as gr
import joblib
import pandas as pd
from related_topics_prediction import MultiLabelThresholdOptimizer
def convert_to_float(value):
if 'K' in value:
return float(value.replace('K', '')) * 1_000
elif 'M' in value:
return float(value.replace('M', '')) * 1_000_000
return float(value) # If it's already a number
def convert_to_string(value):
if value >= 1_000_000:
return f"{value / 1_000_000:.1f}M"
elif value >= 1_000:
return f"{value / 1_000:.1f}K"
return str(int(value)) # Keep it as an integer if it's below 1,000
def greet(title, description, difficulty, topics, likes, accepted, submission, comments, is_premium, predict):
x_new = pd.DataFrame([{
'id': 1,
'title': str(title),
'description': str(description),
'is_premium': 1 if is_premium == "premium" else 0,
'difficulty': 0 if difficulty == "Easy" else 1 if difficulty == "Hard" else 2,
'acceptance_rate': convert_to_float(accepted)/convert_to_float(submission),
'frequency': 0,
'discuss_count': float(comments),
'accepted': convert_to_float(accepted),
'submissions': convert_to_float(submission),
'companies': [""],
'related_topics': topics.split(',') if isinstance(topics, str) else topics,
'likes': convert_to_float(likes),
'dislikes': 0,
'rating': convert_to_float(likes) / (convert_to_float(likes) + 0),
'asked_by_faang': 0,
'similar_questions': ""
}])
# Efficient Multi-Hot Encoding for Companies
company_data = {company: 1 if company in x_new["companies"].iloc[0] else 0 for company in companies_columns}
x_new = pd.concat([x_new, pd.DataFrame([company_data])], axis=1)
x_new = x_new.drop(columns=["companies"]) # Drop original column
# Efficient Multi-Hot Encoding for Topics
topic_data = {topic: 1 if topic in x_new["related_topics"].iloc[0] else 0 for topic in the_topics}
x_new = pd.concat([x_new, pd.DataFrame([topic_data])], axis=1)
x_new = x_new.drop(columns=["related_topics"]) # Drop original topics column
# Label encode 'title'
title_model = joblib.load("title_encoder.pkl")
x_new['title'] = title_model.fit_transform(x_new['title'])
if predict == "related topics":
vectorizer = joblib.load("related_topics_vectorizer.pkl")
new_tfidf = vectorizer.transform(x_new["description"])
best_model_info = joblib.load('best_model_related_topics_info.pkl')
best_model = joblib.load("best_related_topics_model.pkl")
optimizer = MultiLabelThresholdOptimizer()
optimizer.optimal_thresholds[best_model_info['model_name']] = best_model_info['threshold']
predictions = optimizer.predict(best_model, new_tfidf, best_model_info['model_name'])
mlb = joblib.load("related_topics_label_binarizer.pkl")
predictions = mlb.inverse_transform(predictions)
ans = f"the related topics are: {', '.join(map(str, predictions[0]))}"
return ans
else:
vectorizer = joblib.load("tfidf_vectorizer.pkl")
new_tfidf = vectorizer.transform(x_new["description"])
# Convert to DataFrame
new_tfidf_df = pd.DataFrame(new_tfidf.toarray(), columns=vectorizer.get_feature_names_out())
x_new = pd.concat([x_new, new_tfidf_df], axis=1)
x_new = x_new.drop(columns=['description'])
if predict == "difficulty level":
# load the dislike model because there is no dislike in the input
dislikes_model, feature_names = joblib.load("dislikes_XGB_regression_model.pkl")
x_new_filtered = x_new[feature_names] # Select only the required features
dislike = dislikes_model.predict(x_new_filtered)
x_new['dislikes'] = dislike[0]
x_new['rating']: convert_to_float(likes) / (convert_to_float(likes) + dislike[0])
# Load the model
class_model = joblib.load("level_classifier_model.pkl")
# Get feature names from trained model
trained_feature_names = class_model.named_steps['standardscaler'].get_feature_names_out()
x_new = x_new[trained_feature_names] # Reorder and remove extra columns
# Fill missing columns with 0 (or a suitable default)
for col in trained_feature_names:
if col not in x_new:
x_new[col] = 0 # or another default value
x_new = x_new[trained_feature_names] # Ensure correct order again
predictions = class_model.predict(x_new)
if predictions == 1:
prediction = "Hard"
elif predictions == 0:
prediction = "Easy"
elif predictions == 2:
prediction = "Medium"
ans = f"the level difficulty is: {prediction}"
return ans
elif predict == "acceptance":
# Load the model
accepted_submissions_model, feature_names = joblib.load("accepted_submissions_regression_model.pkl")
# Assuming `X_new` is a DataFrame with extra features
x_new_filtered = x_new[feature_names] # Select only the required features
predictions = accepted_submissions_model.predict(x_new_filtered)
ans = f"the accepted is: {convert_to_string(predictions[0])}"
return ans
elif predict == "number of likes":
# Load the model
likes_model, feature_names = joblib.load("likes_random_forest_regression_model.pkl")
# Assuming `X_new` is a DataFrame with extra features
x_new_filtered = x_new[feature_names] # Select only the required features
predictions = likes_model.predict(x_new_filtered)
ans = f"the likes amount is: {convert_to_string(predictions[0])}"
return ans
elif predict == "number of dislikes":
# Load the model
dislikes_model, feature_names = joblib.load("dislikes_XGB_regression_model.pkl")
# Assuming `x_new` is a DataFrame with extra features
x_new_filtered = x_new[feature_names] # Select only the required features
predictions = dislikes_model.predict(x_new_filtered)
ans = f"the dislikes amount is: {convert_to_string(predictions[0])}"
return ans
with open("encoding_metadata.json", "r") as f:
encoding_metadata = json.load(f)
the_topics = encoding_metadata["related_topics_columns"]
the_topics.remove("")
companies_columns = encoding_metadata["companies_columns"]
companies_columns.remove("")
demo = gr.Interface(
fn=greet,
inputs=[gr.Text(label="Title"), gr.Text(label="Description"),
gr.Radio(choices=["Easy", "Medium", "Hard"], label="Difficulty Level"),
gr.Dropdown(the_topics, multiselect=True, label="Related Topics",
info="choose all the related topics of this question"),
gr.Text(label="Likes Amount"),
gr.Text(label="Accepted Amount"),
gr.Text(label="Submission Amount"),
gr.Text(label="Comments Amount"),
gr.Radio(choices=["premium", "not premium"], label="Is Premium"),
gr.Radio(choices=["acceptance", "difficulty level", "number of likes", "number of dislikes",
"related topics"], label="Please Predict..")
],
outputs=[gr.Text(label="The Prediction")],
title="LEETCODE PREDICTOR",
description="please go to the leetcode website (https://leetcode.com/problemset//) choose a question and copy the question's detiles to the relevant spaces, then choose what you whould like to predict and submit. the prediction result will appear on the right side of the screen 😉"
)
demo.launch()