Annexe
Annexe I : Information de notre base de
données
1 DATABASE. info ()
<class 'pandas core frame DataFrame'> Int64Index : 332
entries, 0 to 331 Data columns (total 25 columns) :
#
|
Column
|
Non-Null Count
|
Dtype
|
0
|
Adresse(km)
|
332 non-null
|
float64
|
1
|
Age
|
332 non-null
|
float64
|
2
|
Hopital
|
332 non-null
|
category
|
3
|
Grossesse
|
332 non-null
|
category
|
4
|
IU
|
332 non-null
|
category
|
5
|
MAV
|
332 non-null
|
category
|
6
|
Anémie
|
332 non-null
|
category
|
7
|
Paludisme
|
332 non-null
|
category
|
8
|
Avortement
|
332 non-null
|
category
|
9
|
Infections
|
332 non-null
|
category
|
10
|
Autres
|
332 non-null
|
category
|
11
|
AB
|
332 non-null
|
category
|
12
|
AP
|
332 non-null
|
category
|
II
13
|
OCYTOCIQUES
|
332 non-null
|
category
|
14
|
AI
|
332 non-null
|
category
|
15
|
ASM
|
332 non-null
|
category
|
16
|
ANAL
|
332 non-null
|
category
|
17
|
AAL
|
332 non-null
|
category
|
18
|
Vitamine
|
332 non-null
|
category
|
19
|
Celphalo
|
332 non-null
|
category
|
20
|
AA
|
332 non-null
|
category
|
21
|
Transfusion
|
332 non-null
|
category
|
22
|
Autre2
|
332 non-null
|
category
|
23
|
MeanDDSHop
|
332 non-null
|
float64
|
24
|
DDS
|
332 non-null
|
int32
|
dtypes : category(21), float64(3), int32(1) memory usage : 19.6
KB memory usage : 70.4 KB
Annexe II : Subdivision de la base de
données
1 #Subdivision de la base de données en
target et data
2 data=BASE[ [ ' Adresse (km) ', 'Age ' , '
Hopital ' , ' Grossesse ', 'IU ' , 'MAV' , 'Ané mie' ,
3 ' Paludisme' , ' Avortement' , ' Infections '
, ' Autres' , 'AB' , 'AP' ,
4 'OCYTOCIQUES' , 'AI ' , 'ASM' , 'ANAL' , 'AAL'
, ' Vitamine ' , ' Celphalo ' , 'AA' ,
5 ' Transfusion ', ' Autre2 ' , 'MeanDDSHop ' ]
]
6 target=BASE[ [ 'DDS' ] ]
7
8 #Données de test et données d '
entrainement
9 x , y=data , target
10 x_train , x_test , y_train , y_test=
train_test_split (x , y , test_size =0.20)
III
Annexe III :Entrainement de nos données aux
modèles de machine learning
Arbes de décision
1
2
3 # Split the data into training and testing
sets
4 x_train , x_test , y_train , y_test =
train_test_split (x , y , test_size =0.20 , random_state=0) # Adjust test_size
and random_state as needed
5
6 # Model initialization and training
7 model1 =t ree . DecisionTreeRegressor
(max_depth=300, min_samples_split =25)
8 model1 . fit ( x_train , y_train )
9
10 # Model evaluation
11 y_pred = model1 . predict ( x_test )
12 # Compute various scores
13 mae = mean_absolute_error ( y_test ,
y_pred)
14 mse = mean_squared_error ( y_test ,
y_pred)
15 r_squared = r2_score ( y_test , y_pred)
16
17 # Print the scores in a formatted manner
18 print ("Mean Absolute Error : { :.2 f }".
format (mae) )
19 print ("Mean Squared Error : { :.2 f }".
format (mse) )
20 print ("R-squared : { :.2 f }". format (
r_squared ) )
21
22 # Print the R-squared score in a formatted
manner
23 print ("R-squared : { :.2 f }". format (
r_squared ) )
k plus proches voisins
1 import numpy as np
2 import matplotlib . pyplot as plt
3 from sklearn . datasets import load_digits
4 from sklearn . neighbors import
KNeighborsRegressor
IV
5 from sklearn . model_selection import
train_test_split
6 from sklearn . metrics import
mean_absolute_error , mean_squared_error , r2_score
7
8 # Assuming x and y are your data and target
9 x , y = data, target
10
11 # Split the data into training and testing
sets
12 x_train , x_test , y_train , y_test =
train_test_split (x , y , test_size =0.20 , random_state=42)
13 # Adjust random_state as needed
14
15 # Model initialization and training
16 MODEL = KNeighborsRegressor ( leaf_size
=30000000, metric='minkowski ' , n_neighbors=10, p=4000, weights ='uniform '
)
17 MODEL. fit ( x_train , y_train )
18
19 # Model evaluation
20 y_pred = MODEL. predict ( x_test )
21
22 # Compute various scores
23 mae = mean_absolute_error ( y_test ,
y_pred)
24 mse = mean_squared_error ( y_test ,
y_pred)
25 r_squared = r2_score ( y_test , y_pred)
26
27 # Print the scores
28 print ("Mean Absolute Error :" , mae)
29 print ("Mean Squared Error :" , mse)
30 print ("R-squared :" , r_squared )
31
32 # Visualize the results
33 plt . scatter ( y_test , y_pred , color
='blue ' )
34 plt . xlabel ("Actual Values")
35 plt . ylabel (" Predicted Values")
36 plt . title ("Actual vs . Predicted
Values")
37 plt . show ()
V
Réseau de neurone
1 import numpy as np
2 from sklearn . datasets import load_digits
3 from sklearn . neural_network import MLPRegressor
4 from sklearn . model_selection import train_test_split
5 from sklearn . preprocessing import StandardScaler
6 from sklearn . metrics import mean_squared_error , r2_score
7
8 # Assuming x and y are your data and target
9 x , y = data, target
10
11 # Split the data into training and testing sets
12 x_train , x_test , y_train , y_test = train_test_split (x , y
, test_size =0.20 , random_state=42)
13
14 # Feature scaling
15 scaler = StandardScaler ()
16 x_train_scaled = scaler . fit_transform ( x_train )
17 x_test_scaled = scaler. transform ( x_test )
18
19 # Model initialization and training
20 model = MLPRegressor( hidden_layer_sizes =(300, 700 , 1) ,
max_iter=1000)
21 model. fit ( x_train_scaled , y_train )
22
23 # Model evaluation
24 y_pred = model. predict ( x_test_scaled )
25 # Compute various scores
26 mae = mean_absolute_error ( y_test , y_pred)
27 mse = mean_squared_error ( y_test , y_pred)
28 r_squared = r2_score ( y_test , y_pred)
29
30 # Print the scores in a formatted manner
31 print ("Mean Absolute Error : { :.2 f }". format (mae) )
32 print ("Mean Squared Error : { :.2 f }". format (mse) )
VI
33 print ("R-squared : { :.2 f } " . format (
r_squared ) )
34
35 # Visualize the results
36 plt . scatter ( y_test , y_pred , color
='blue ' )
37 plt . xlabel ( " Actual Values " )
38 plt . ylabel ( " Predicted Values " )
39 plt . title ( " Actual vs . Predicted
Values " )
40 plt . show ()
Modèle linéaire
généralisé avec la distribution de poisson
1 import pandas as pd
2 from patsy import dmatrices
3 import numpy as np
4 import statsmodels . api as sm
5 import matplotlib . pyplot as plt
6 poisson_training_results = sm.GLM( y_train ,
x_train , family=sm. families . Poisson()). fit ()
7 print ( poisson_training_results . summary())
#affichage du résumé
|
Modèle linéaire
généralisé avec la distribution de Binomiale
négative
1 import pandas as pd
2 from patsy import dmatrices
3 import numpy as np
4 import statsmodels . api as sm
5 import matplotlib . pyplot as plt
6 nb_training_results = sm.GLM( y_train ,
X_train , family=sm. families. NegativeBinomial () ) . fit ()
7 print ( nb_training_results . summary ( ) )
#affichage du résumé
|
|