Skip to content
Snippets Groups Projects
Commit e6cc509d authored by Sayed Saeedi's avatar Sayed Saeedi
Browse files

TGAN

parent 6d584a59
No related branches found
No related tags found
No related merge requests found
%% Cell type:code id:fefebe4b tags:
``` python
#https://github.com/sdv-dev/TGAN/tree/master
import pandas as pd
from tgan.data import load_demo_data
from tgan.model import TGANModel
import tensorflow as tf
import numpy as np
```
%% Cell type:code id:a345cefc tags:
``` python
#loading datasets
def load_data(location):
data = pd.read_csv(location)
data_columns = data.columns
data = preprocessing(data)
return data, data_columns
#Dataset preprocessing
def preprocessing(data):
"""
dropping duplicate values
changing timeformat to d/m/Y H:M:S and then to Unix fomrat that starts from 1970/1/ 00:00:00
making sure that numeric columns only have numeric values and if not numeric then to NaN
dropping all NaN values
"""
print("Shape of data before preprocessing:", data.shape)
data.drop_duplicates(inplace=True) #dropping duplicated
data.replace([np.inf, -np.inf], np.nan, inplace=True)# changing inf and -inf to nan
data['Timestamp'] = pd.to_datetime(data['Timestamp'], format='%d/%m/%Y %H:%M:%S', errors='coerce')
data['Timestamp'] = (data['Timestamp'] - pd.Timestamp("1970-01-01")) // pd.Timedelta('1s')
for col in data.columns: #changing columns to numeric if not, then to NaN
if data[col].dtype == 'object' and col != 'Label':
data[col] = pd.to_numeric(data[col], errors='coerce')
data.dropna(inplace=True) #droping Na
print("Shape of data after preprocessing:", data.shape)
return data
```
%% Cell type:code id:41dfaeff tags:
``` python
def tGAN(data, continuous_columns,max_epoch=5, steps_per_epoch=10000, batch_size=200, z_dim=200, noise=0.2, l2norm=0.00001,
learning_rate=0.001, num_gen_rnn=100, num_gen_feature=100, num_dis_layers=1, num_dis_hidden=100,
optimizer='AdamOptimizer'):
"""
Required arguments to be passed:
-data: dataframe with rows and columns
-continuous_columns: a list containing all the columns that are continuous
"""
print(data.shape)
tgan = TGANModel(continuous_columns=continuous_columns, max_epoch=max_epoch, steps_per_epoch=steps_per_epoch,
batch_size=batch_size, z_dim=z_dim, noise=noise, l2norm=l2norm, learning_rate=learning_rate,
num_gen_rnn=num_gen_rnn, num_gen_feature=num_gen_feature, num_dis_layers=num_dis_layers,
num_dis_hidden=num_dis_hidden, optimizer=optimizer)
tgan.fit(data)
return tgan
```
%% Cell type:code id:07714e0c tags:
``` python
continuous_columns = [2, 3, 17, 18, 20, 21, 38, 39]
data, data_columns = load_data("C:\\Users\\sayed\\Desktop\\Dataset\\02-14-2018.csv")
data = data[data["Label"] == "FTP-BruteForce"]
data.columns = [None] * len(data.columns) # revoming column names
```
%% Cell type:code id:95f316b5 tags:
``` python
#fitting the TGAN model
tgan= tGAN(data=data, continuous_columns=continuous_columns, batch_size=150, max_epoch = 15)
```
%% Cell type:code id:d0b5a40e tags:
``` python
#Saving the model
model_path = 'C:\\Users\\sayed\\Desktop\\Dataset\\models\\tGAN_model_firstrun.pkl'
tgan.save(model_path)
```
%% Cell type:code id:d7343348 tags:
``` python
num_samples = 8000
new_tgan = TGANModel.load(model_path)
samples = new_tgan.sample(num_samples)
samples.head()
```
%% Cell type:code id:20667896 tags:
``` python
#assinging back the column names
samples.columns = data_columns
data.columns = data_columns
```
%% Cell type:code id:4bdf2761 tags:
``` python
```
%% Cell type:code id:8423e0de tags:
``` python
```
%% Cell type:code id:9ae7fce1 tags:
``` python
```
%% Cell type:code id:d2fd753c tags:
``` python
```
%% Cell type:code id:06a0123c tags:
``` python
```
%% Cell type:code id:ebf1c5cd tags:
``` python
```
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment