DEEP LEARNING ON USA DEMOCRATES DEBATE

By Pamela Dekas

import sys
import csv
import re 
import nltk
import string
import unicodedata
from textblob import TextBlob
from collections import Counter
import pandas as pd
import numpy as np
from wordcloud import WordCloud
from nltk.classify import * 
from nltk.corpus import stopwords
from sklearn.metrics import f1_score, roc_auc_score
from sklearn.feature_extraction.text import CountVectorizer
from nltk.tokenize import word_tokenize
import nltk.classify.util
import matplotlib.pyplot as plt
from string import punctuation 
from nltk.corpus import stopwords
from wordcloud import STOPWORDS
import os
from sklearn.model_selection import train_test_split
from keras.datasets import imdb
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers.embeddings import Embedding
from keras.preprocessing import sequence, text
from keras.callbacks import EarlyStopping

Using TensorFlow backend.



---------------------------------------------------------------------------

AttributeError                            Traceback (most recent call last)


   
     in 
    
     ()
     22 import os
     23 from sklearn.model_selection import train_test_split
---> 24 from keras.datasets import imdb
     25 from keras.models import Sequential
     26 from keras.layers import Dense


~\Anaconda3\lib\site-packages\keras\__init__.py in 
     
      ()
      1 from __future__ import absolute_import
      2 
----> 3 from . import utils
      4 from . import activations
      5 from . import applications


~\Anaconda3\lib\site-packages\keras\utils\__init__.py in 
      
       ()
      4 from . import data_utils
      5 from . import io_utils
----> 6 from . import conv_utils
      7 from . import losses_utils
      8 from . import metrics_utils


~\Anaconda3\lib\site-packages\keras\utils\conv_utils.py in 
       
        () 7 from six.moves import range 8 import numpy as np ----> 9 from .. import backend as K 10 11 ~\Anaconda3\lib\site-packages\keras\backend\__init__.py in 
        
         () ----> 1 from .load_backend import epsilon 2 from .load_backend import set_epsilon 3 from .load_backend import floatx 4 from .load_backend import set_floatx 5 from .load_backend import cast_to_floatx ~\Anaconda3\lib\site-packages\keras\backend\load_backend.py in 
         
          () 88 elif _BACKEND == 'tensorflow': 89 sys.stderr.write('Using TensorFlow backend.\n') ---> 90 from .tensorflow_backend import * 91 else: 92 # Try and load external backend. ~\Anaconda3\lib\site-packages\keras\backend\tensorflow_backend.py in 
          
           () 52 53 # Private TF Keras utils ---> 54 get_graph = tf_keras_backend.get_graph 55 # learning_phase_scope = tf_keras_backend.learning_phase_scope # TODO 56 name_scope = tf.name_scope AttributeError: module 'tensorflow.python.keras.backend' has no attribute 'get_graph'

speech = pd.read_csv('debate_transcripts_v3_2020-02-26.csv',encoding= 'unicode_escape')

df= pd.DataFrame(speech)
dem_speakers = df["speaker"]
number_of_speakers = len(set(dem_speakers))
print("Nombre de speakers:",number_of_speakers, "speakers")

# Mean duration of speech.
print("temps moyen de parole:",np.mean(df["speaking_time_seconds"]), "seconds")
print("Dataset size:", len(df))

Nombre de speakers: 106 speakers
temps moyen de parole: 16.49230769230769 seconds
Dataset size: 5911

df.info()


   
    
RangeIndex: 5911 entries, 0 to 5910
Data columns (total 6 columns):
date                     5911 non-null object
debate_name              5911 non-null object
debate_section           5911 non-null object
speaker                  5911 non-null object
speech                   5911 non-null object
speaking_time_seconds    5395 non-null float64
dtypes: float64(1), object(5)
memory usage: 277.2+ KB

df.groupby('speaker')['speaking_time_seconds'].sum(level=0).nlargest(10).plot.bar()
plt.title('Repartition par temps de parole')
plt.show()

debate_time = df.groupby(by=['speaker', 'date']).speaking_time_seconds.sum().nlargest(15)
debate_time.plot()

suppresion des colonnes qui ne seront pas utilisé dans la suite du projet et creation du dataset final###

df=df.drop(['date','debate_name','debate_section','speaking_time_seconds'],1)
df.head(5)

	speaker	speech
0	Norah O�Donnell	Good evening and welcome, the Democratic presi...
1	Gayle King	And Super Tuesday is just a week away and this...
2	Norah O�Donnell	And CBS News is proud to bring you this debate...
3	Gayle King	And we are partnering tonight also with Twitte...
4	Norah O�Donnell	Now, here are the rules for the next two hours...

PREPROCESSING

import nltk 
nltk.download('punkt')
stopwords = nltk.corpus.stopwords.words('english')
Tailored_stopwords=('im','ive','mr','weve','dont','well','will','make','us','we',
                      'I','make','got','need','want','think',
                      'going','go','one','thank','going',
                      'way','say','every','re','us','first',
                     'now','said','know','look','done','take',
                     'number','two','three','s','m',"t",
                      'let','don','tell','ve','im','mr','put','maybe','whether','many', 'll','around','thing','Secondly','doesn','lot')
#stopwords = nltk.corpus.stopwords.words('english')
stopwords = set(STOPWORDS)
stopwords= stopwords.union(Tailored_stopwords)

[nltk_data] Downloading package punkt to C:\Users\pamel.DESKTOP-O19M7N
[nltk_data]     F\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!

def Text_cleansing(speech):
    speech = re.sub('@[A-Za-z0–9]+', '', str(speech))
    speech = re.sub('#', '', speech) # Enlever les '#' hash tag
    speech = re.sub('rt', '', speech)
    speech=re.sub(',',' ', speech)
    speech=re.sub('!',' ',speech)
    speech=re.sub(':',' ',speech)
    speech=re.sub("'","",speech)
    speech=re.sub('"','',speech)
    speech=speech.lower()
    speech = word_tokenize(speech)
    return speech

def remove_stopwords(speech):
    speech_clean = [word for word in speech if word not in stopwords]
    return speech_clean

df['speech_tokens']= df['speech'].apply(Text_cleansing)
df.head(5)

	speaker	speech	speech_tokens
0	Norah O�Donnell	Good evening and welcome, the Democratic presi...	[good, evening, and, welcome, the, democratic,...
1	Gayle King	And Super Tuesday is just a week away and this...	[and, super, tuesday, is, just, a, week, away,...
2	Norah O�Donnell	And CBS News is proud to bring you this debate...	[and, cbs, news, is, proud, to, bring, you, th...
3	Gayle King	And we are partnering tonight also with Twitte...	[and, we, are, panering, tonight, also, with, ...
4	Norah O�Donnell	Now, here are the rules for the next two hours...	[now, here, are, the, rules, for, the, next, t...

df['speech_clean']=df['speech_tokens'].apply(remove_stopwords)
df.head(5)

	speaker	speech	speech_tokens	speech_clean
0	Norah O�Donnell	Good evening and welcome, the Democratic presi...	[good, evening, and, welcome, the, democratic,...	[good, evening, welcome, democratic, president...
1	Gayle King	And Super Tuesday is just a week away and this...	[and, super, tuesday, is, just, a, week, away,...	[super, tuesday, week, away, biggest, primary,...
2	Norah O�Donnell	And CBS News is proud to bring you this debate...	[and, cbs, news, is, proud, to, bring, you, th...	[cbs, news, proud, bring, debate, along, co-sp...
3	Gayle King	And we are partnering tonight also with Twitte...	[and, we, are, panering, tonight, also, with, ...	[panering, tonight, twitter, ., home, paicipat...
4	Norah O�Donnell	Now, here are the rules for the next two hours...	[now, here, are, the, rules, for, the, next, t...	[rules, next, hours, ., asked, question, minut...

def wordcloud(dataframe):
    Aw= df['speech_clean']
    wordCloud = WordCloud(width=500, height=300,background_color='white', max_font_size=110).generate(str(Aw))
    plt.imshow(wordCloud, interpolation="bilinear")
    plt.axis("off")
    plt.title("speech wordcloud")

wordcloud(df['speech_clean'])

Pour la suite du projet on reduira la liste des speakers aux candidats les plus notoires (top 7 speakers)###

df = df.loc[df.speaker.isin({'Joe Biden', 'Bernie Sanders', 'Elizabeth Warren', 'Michael Bloomberg', 'Pete Buttigieg', 'Amy Klobuchar',  'Tulsi Gabbard'})]
df.head()
df.shape

(2245, 4)

CountVectorizer et creation du dict des mots par candidat a utiliser sur les modeles ML qui seront en back-up###

Analyse Lexicale

cv = CountVectorizer(stop_words=stopwords)
df_cv = cv.fit_transform(df.speech)
df_words = pd.DataFrame(df_cv.toarray(), columns=cv.get_feature_names())
df_words.index = df.speaker
df_words = df_words.transpose()
df_words

speaker	Bernie Sanders	Michael Bloomberg	Michael Bloomberg	Bernie Sanders	Pete Buttigieg	Elizabeth Warren	Elizabeth Warren	Pete Buttigieg	Joe Biden	Bernie Sanders	...	Amy Klobuchar	Elizabeth Warren	Amy Klobuchar	Tulsi Gabbard	Tulsi Gabbard	Amy Klobuchar	Amy Klobuchar	Amy Klobuchar	Elizabeth Warren	Elizabeth Warren
00	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0
000	2	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0
001st	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0
01	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0
02	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0
03	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0
04	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0
05	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0
06	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0
07	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0
08	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0
09	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0
10	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0
100	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0
10000	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0
100s	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0
10th	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0
11	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0
12	0	0	0	0	0	0	0	0	1	0	...	0	0	0	0	0	0	0	0	0	0
120	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0
125	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0
12th	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0
13	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0
130	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0
135	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0
137	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0
13th	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0
14	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0
140	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0
149	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
xinjiang	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0
yachts	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0
yale	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0
yang	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0
yanked	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0
ye	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0
yeah	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	1	0	0
year	1	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0
yearly	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0
years	2	0	0	0	0	0	0	1	0	0	...	0	0	0	0	0	0	0	0	1	0
yellow	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0
yemen	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0
yemin	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0
yep	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0
yes	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0
yesterday	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0
yet	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0
yo	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0
york	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0
yorker	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0
young	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0
younger	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0
youngest	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0
youth	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0
youtube	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0
zealand	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0
zero	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0
zeroed	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0
zip	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0
zone	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0

6385 rows × 2245 columns

top_dict = {}
for c in df_words.columns:
    top = df_words[c].sort_values(ascending=False).head(30)
    top_dict[c]= list(zip(top.index, top.values))
for speaker, top_words in top_dict.items():
    print(speaker)
    print(', '.join([word for word, count in top_words[0:9]]))
    print('---')

---------------------------------------------------------------------------

TypeError                                 Traceback (most recent call last)


   
     in 
    
     ()
      1 top_dict = {}
      2 for c in df_words.columns:
----> 3     top = df_words[c].sort_values(ascending=False).head(30)
      4     top_dict[c]= list(zip(top.index, top.values))
      5 for speaker, top_words in top_dict.items():


TypeError: sort_values() missing 1 required positional argument: 'by'

df2=pd.DataFrame(top_dict)
df2.head(15)

from collections import Counter
words = []
for speaker in df_words.columns:
    top = [word for (word, count) in top_dict[speaker]]
    for t in top:
        words.append(t)
Counter(words).most_common(15)

---------------------------------------------------------------------------

KeyError                                  Traceback (most recent call last)


   
     in 
    
     ()
      2 words = []
      3 for speaker in df_words.columns:
----> 4     top = [word for (word, count) in top_dict[speaker]]
      5     for t in top:
      6         words.append(t)


KeyError: 'Bernie Sanders'

Implemantation du modèle###

print(df.columns)
print(df.shape)
df['speaker'] = df['speaker'].astype(str)

Index(['speaker', 'speech', 'speech_tokens', 'speech_clean'], dtype='object')
(2245, 4)

Embedding

import gensim

RANDOM_STATE = 50
EPOCHS = 5
BATCH_SIZE = 256
EMB_DIM = 100
SAVE_MODEL = True

X = df['speech_clean']
print(X.head())
X.shape

5     [well, you�re, right, economy, really, great, ...
6                                            [senator-]
8     [think, donald, trump, thinks, would, better, ...
9     [oh, mr., bloomberg, ., let, tell, mr., putin,...
11     [know, president, russia, wants, it�s, chaos, .]
Name: speech_clean, dtype: object





(2245,)

emb_model = gensim.models.Word2Vec(sentences = X, size = EMB_DIM, window = 5, workers = 4, min_count = 1)

print('La taille du vocabulaire appris est de ',len(list(emb_model.wv.vocab)))

La taille du vocabulaire appris est de  7139

from keras.preprocessing.text import Tokenizer
import tokenize

max_length = max([len(s) for s in X])

tokenizer_new = Tokenizer()
tokenizer_new.fit_on_texts(X)

X_seq = tokenizer_new.texts_to_sequences(X)
X_fin = sequence.pad_sequences(X_seq, maxlen = max_length)
print(X_fin.shape)

(2245, 140)

emb_vec = emb_model.wv
MAX_NB_WORDS = len(list(emb_vec.vocab))
tokenizer_word_index = tokenizer_new.word_index
vocab_size = len(tokenizer_new.word_index) + 1

embedded_matrix = np.zeros((vocab_size, EMB_DIM))


for word, i in tokenizer_word_index.items():
    if i>= MAX_NB_WORDS:
        continue
    try:
        embedding_vector = emb_vec[word]
        wv_matrix[i] = embedding_vector
    except:
        pass

embedded_matrix.shape
print(embedded_matrix)

[[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]]

Préparation des variables

from keras.utils import to_categorical
from sklearn.preprocessing import LabelEncoder

y = df.speaker
print(y.head(10))
y.shape

5     1
6     4
8     4
9     1
11    5
12    2
13    2
15    5
21    3
23    1
Name: speaker, dtype: int32





(2245,)

Counter(y)

Counter({'Bernie Sanders': 430,
         'Michael Bloomberg': 97,
         'Pete Buttigieg': 392,
         'Elizabeth Warren': 440,
         'Joe Biden': 456,
         'Amy Klobuchar': 353,
         'Tulsi Gabbard': 77})

le=LabelEncoder()
df['speaker'] = le.fit_transform(df['speaker'])
df.head()

y = df.speaker
y.head()
print(y.shape)
print(X_fin.shape)

(2245,)
(2245, 140)

X_train, X_test, y_train, y_test = train_test_split(X_fin , y, test_size = 0.2, random_state = 42)


print(X_train.shape)
print(y_train.shape)

(1796, 140)
(1796,)

Construction des NN

model_pre_trained = Sequential()

model_pre_trained.add(Embedding(vocab_size, EMB_DIM, weights = [embedded_matrix], 
                    input_length = max_length, trainable = False))
model_pre_trained.add(LSTM(128, dropout=0.2, recurrent_dropout=0.2))
model_pre_trained.add(Dense(1, activation='softmax'))

model_pre_trained.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

model_pre_trained.summary()

Model: "sequential_11"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
embedding_11 (Embedding)     (None, 140, 100)          714000    
_________________________________________________________________
lstm_13 (LSTM)               (None, 128)               117248    
_________________________________________________________________
dense_9 (Dense)              (None, 1)                 129       
=================================================================
Total params: 831,377
Trainable params: 117,377
Non-trainable params: 714,000
_________________________________________________________________

Fitting

history_pre_trained = model_pre_trained.fit(X_fin, y, batch_size = BATCH_SIZE, epochs =20, verbose =1, validation_split = 0.2)

Train on 1796 samples, validate on 449 samples
Epoch 1/20
1796/1796 [==============================] - 4s 2ms/step - loss: 0.5429 - accuracy: 0.1754 - val_loss: -0.4417 - val_accuracy: 0.2472
Epoch 2/20
1796/1796 [==============================] - 3s 2ms/step - loss: -6.7429 - accuracy: 0.1776 - val_loss: -14.1017 - val_accuracy: 0.2472
Epoch 3/20
1796/1796 [==============================] - 3s 2ms/step - loss: -15.8550 - accuracy: 0.1776 - val_loss: -19.5441 - val_accuracy: 0.2472
Epoch 4/20
1796/1796 [==============================] - 3s 2ms/step - loss: -20.7949 - accuracy: 0.1776 - val_loss: -23.4335 - val_accuracy: 0.2472
Epoch 5/20
1796/1796 [==============================] - 3s 2ms/step - loss: -24.1430 - accuracy: 0.1776 - val_loss: -25.9735 - val_accuracy: 0.2472
Epoch 6/20
1796/1796 [==============================] - 3s 2ms/step - loss: -26.4535 - accuracy: 0.1776 - val_loss: -28.0725 - val_accuracy: 0.2472
Epoch 7/20
1796/1796 [==============================] - 3s 2ms/step - loss: -28.4266 - accuracy: 0.1776 - val_loss: -29.9313 - val_accuracy: 0.2472
Epoch 8/20
1796/1796 [==============================] - 3s 2ms/step - loss: -30.1754 - accuracy: 0.1776 - val_loss: -31.6261 - val_accuracy: 0.2472
Epoch 9/20
1796/1796 [==============================] - 3s 2ms/step - loss: -31.8791 - accuracy: 0.1776 - val_loss: -33.3337 - val_accuracy: 0.2472
Epoch 10/20
1796/1796 [==============================] - 4s 2ms/step - loss: -33.5166 - accuracy: 0.1776 - val_loss: -34.9834 - val_accuracy: 0.2472
Epoch 11/20
1796/1796 [==============================] - 3s 2ms/step - loss: -35.1544 - accuracy: 0.1776 - val_loss: -36.5973 - val_accuracy: 0.2472
Epoch 12/20
1796/1796 [==============================] - 3s 2ms/step - loss: -36.7253 - accuracy: 0.1776 - val_loss: -38.2070 - val_accuracy: 0.2472
Epoch 13/20
1796/1796 [==============================] - 3s 2ms/step - loss: -38.3344 - accuracy: 0.1776 - val_loss: -39.8655 - val_accuracy: 0.2472
Epoch 14/20
1796/1796 [==============================] - 3s 2ms/step - loss: -39.9810 - accuracy: 0.1776 - val_loss: -41.5162 - val_accuracy: 0.2472
Epoch 15/20
1796/1796 [==============================] - 3s 1ms/step - loss: -41.6567 - accuracy: 0.1776 - val_loss: -43.2049 - val_accuracy: 0.2472
Epoch 16/20
1796/1796 [==============================] - 3s 1ms/step - loss: -43.2579 - accuracy: 0.1776 - val_loss: -44.8235 - val_accuracy: 0.2472
Epoch 17/20
1796/1796 [==============================] - 3s 1ms/step - loss: -44.9030 - accuracy: 0.1776 - val_loss: -46.4982 - val_accuracy: 0.2472
Epoch 18/20
1796/1796 [==============================] - 2s 1ms/step - loss: -46.5038 - accuracy: 0.1776 - val_loss: -48.0627 - val_accuracy: 0.2472
Epoch 19/20
1796/1796 [==============================] - 3s 1ms/step - loss: -48.0124 - accuracy: 0.1776 - val_loss: -49.5424 - val_accuracy: 0.2472
Epoch 20/20
1796/1796 [==============================] - 2s 1ms/step - loss: -49.5209 - accuracy: 0.1776 - val_loss: -51.1489 - val_accuracy: 0.2472

Evaluation du modèle

score = model_pre_trained.evaluate(X_test, y_test, verbose = 0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

Test loss: -51.148848297866785
Test accuracy: 0.18930958211421967

ptoblèmes: npmbre important de stopwords à rajouter au dictionnaire, doutes sur la fonction dactivation, stemming/lemmatization qui semble peu efficace; axes d'amélioration: explorer les N grammes pouir contextualiser les mots et creer u_n dictionnaire de stopwords customisé pour les deabts ( association d'idées)/

Drone-based Joint Density Map Estimation, Localization and Tracking with Space-Time Multi-Scale Attention Network

DroneCrowd Paper Detection, Tracking, and Counting Meets Drones in Crowds: A Benchmark. Introduction This paper proposes a space-time multi-scale atte

98 Nov 16, 2022

An Artificial Intelligence trying to drive a car by itself on a user created map

17 Jan 13, 2022

This is code to fit per-pixel environment map with spherical Gaussian lobes, using LBFGS optimization

Spherical Gaussian Optimization This is code to fit per-pixel environment map with spherical Gaussian lobes, using LBFGS optimization. This code has b

41 Dec 14, 2022

A tool for making map images from OpenTTD save games

OpenTTD Surveyor A tool for making map images from OpenTTD save games. This is not part of the main OpenTTD codebase, nor is it ever intended to be pa

9 Feb 15, 2022

A neuroanatomy-based augmented reality experience powered by computer vision. Features 3D visuals of the Atlas Brain Map slices.

Brain Augmented Reality (AR) A neuroanatomy-based augmented reality experience powered by computer vision that features 3D visuals of the Atlas Brain

10 Oct 6, 2022

A large dataset of 100k Google Satellite and matching Map images, resembling pix2pix's Google Maps dataset.

Larger Google Sat2Map dataset This dataset extends the aerial ⟷ Maps dataset used in pix2pix (Isola et al., CVPR17). The provide script download_sat2m

34 Dec 28, 2022

Codes for TS-CAM: Token Semantic Coupled Attention Map for Weakly Supervised Object Localization.

TS-CAM: Token Semantic Coupled Attention Map for Weakly SupervisedObject Localization This is the official implementaion of paper TS-CAM: Token Semant

112 Jan 2, 2023

Implementation of the Triangle Multiplicative module, used in Alphafold2 as an efficient way to mix rows or columns of a 2d feature map, as a standalone package for Pytorch

Triangle Multiplicative Module - Pytorch Implementation of the Triangle Multiplicative module, used in Alphafold2 as an efficient way to mix rows or c

22 Oct 28, 2022

Official implementation of "StyleCariGAN: Caricature Generation via StyleGAN Feature Map Modulation" (SIGGRAPH 2021)

StyleCariGAN in PyTorch Official implementation of StyleCariGAN:Caricature Generation via StyleGAN Feature Map Modulation in PyTorch Requirements PyTo

49 Oct 31, 2022

Deep learning model, heat map, data prepo

Related tags

Overview

DEEP LEARNING ON USA DEMOCRATES DEBATE

By Pamela Dekas

suppresion des colonnes qui ne seront pas utilisé dans la suite du projet et creation du dataset final###

Pour la suite du projet on reduira la liste des speakers aux candidats les plus notoires (top 7 speakers)###

CountVectorizer et creation du dict des mots par candidat a utiliser sur les modeles ML qui seront en back-up###

Analyse Lexicale

Implemantation du modèle###

Embedding

Préparation des variables

Construction des NN

Fitting

Evaluation du modèle

You might also like...

Drone-based Joint Density Map Estimation, Localization and Tracking with Space-Time Multi-Scale Attention Network

An Artificial Intelligence trying to drive a car by itself on a user created map

This is code to fit per-pixel environment map with spherical Gaussian lobes, using LBFGS optimization

A tool for making map images from OpenTTD save games

A neuroanatomy-based augmented reality experience powered by computer vision. Features 3D visuals of the Atlas Brain Map slices.

A large dataset of 100k Google Satellite and matching Map images, resembling pix2pix's Google Maps dataset.

Codes for TS-CAM: Token Semantic Coupled Attention Map for Weakly Supervised Object Localization.

Implementation of the Triangle Multiplicative module, used in Alphafold2 as an efficient way to mix rows or columns of a 2d feature map, as a standalone package for Pytorch

Official implementation of "StyleCariGAN: Caricature Generation via StyleGAN Feature Map Modulation" (SIGGRAPH 2021)

Owner

Pamela Dekas

Heat transfer problemas solved using python

Nest Protect integration for Home Assistant. This will allow you to integrate your smoke, heat, co and occupancy status real-time in HA.

This YoloV5 based model is fit to detect people and different types of land vehicles, and displaying their density on a fitted map, according to their coordinates and detected labels.

An end-to-end image translation model with weight-map for color constancy

Contrastive learning of Class-agnostic Activation Map for Weakly Supervised Object Localization and Semantic Segmentation (CVPR 2022)

😇A pyTorch implementation of the DeepMoji model: state-of-the-art deep learning model for analyzing sentiment, emotion, sarcasm etc

A multi-functional library for full-stack Deep Learning. Simplifies Model Building, API development, and Model Deployment.

deep-table implements various state-of-the-art deep learning and self-supervised learning algorithms for tabular data using PyTorch.

A complete end-to-end demonstration in which we collect training data in Unity and use that data to train a deep neural network to predict the pose of a cube. This model is then deployed in a simulated robotic pick-and-place task.

Predicting Semantic Map Representations from Images with Pyramid Occupancy Networks