diff --git a/TGAN/.gitkeep b/TGAN/.gitkeep deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/TGAN/TGAN.ipynb b/TGAN/TGAN.ipynb deleted file mode 100644 index 2a6038a592fb521e58da9fcbecb216ec9be822c1..0000000000000000000000000000000000000000 --- a/TGAN/TGAN.ipynb +++ /dev/null @@ -1,226 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "id": "fefebe4b", - "metadata": {}, - "outputs": [], - "source": [ - "#https://github.com/sdv-dev/TGAN/tree/master\n", - "import pandas as pd\n", - "from tgan.data import load_demo_data\n", - "from tgan.model import TGANModel\n", - "import tensorflow as tf\n", - "import numpy as np" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "a345cefc", - "metadata": {}, - "outputs": [], - "source": [ - "#loading datasets\n", - "def load_data(location):\n", - " data = pd.read_csv(location)\n", - " data_columns = data.columns\n", - " data = preprocessing(data)\n", - " return data, data_columns\n", - "\n", - "\n", - "#Dataset preprocessing\n", - "def preprocessing(data):\n", - " \n", - " \"\"\"\n", - " dropping duplicate values\n", - " changing timeformat to d/m/Y H:M:S and then to Unix fomrat that starts from 1970/1/ 00:00:00\n", - " making sure that numeric columns only have numeric values and if not numeric then to NaN\n", - " dropping all NaN values \n", - " \"\"\"\n", - " print(\"Shape of data before preprocessing:\", data.shape)\n", - " data.drop_duplicates(inplace=True) #dropping duplicated\n", - " data.replace([np.inf, -np.inf], np.nan, inplace=True)# changing inf and -inf to nan\n", - "\n", - " data['Timestamp'] = pd.to_datetime(data['Timestamp'], format='%d/%m/%Y %H:%M:%S', errors='coerce')\n", - " data['Timestamp'] = (data['Timestamp'] - pd.Timestamp(\"1970-01-01\")) // pd.Timedelta('1s') \n", - "\n", - " for col in data.columns: #changing columns to numeric if not, then to NaN\n", - " if data[col].dtype == 'object' and col != 'Label':\n", - " data[col] = pd.to_numeric(data[col], errors='coerce')\n", - " \n", - " \n", - " \n", - " data.dropna(inplace=True) #droping Na\n", - " \n", - " print(\"Shape of data after preprocessing:\", data.shape)\n", - " \n", - " return data" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "41dfaeff", - "metadata": {}, - "outputs": [], - "source": [ - "def tGAN(data, continuous_columns,max_epoch=5, steps_per_epoch=10000, batch_size=200, z_dim=200, noise=0.2, l2norm=0.00001, \n", - " learning_rate=0.001, num_gen_rnn=100, num_gen_feature=100, num_dis_layers=1, num_dis_hidden=100, \n", - " optimizer='AdamOptimizer'):\n", - " \n", - " \"\"\"\n", - " Required arguments to be passed:\n", - " -data: dataframe with rows and columns\n", - " -continuous_columns: a list containing all the columns that are continuous \n", - " \"\"\"\n", - "\n", - " print(data.shape)\n", - "\n", - " tgan = TGANModel(continuous_columns=continuous_columns, max_epoch=max_epoch, steps_per_epoch=steps_per_epoch, \n", - " batch_size=batch_size, z_dim=z_dim, noise=noise, l2norm=l2norm, learning_rate=learning_rate, \n", - " num_gen_rnn=num_gen_rnn, num_gen_feature=num_gen_feature, num_dis_layers=num_dis_layers, \n", - " num_dis_hidden=num_dis_hidden, optimizer=optimizer)\n", - " \n", - " tgan.fit(data)\n", - "\n", - " return tgan" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "07714e0c", - "metadata": {}, - "outputs": [], - "source": [ - "continuous_columns = [2, 3, 17, 18, 20, 21, 38, 39]\n", - "\n", - "data, data_columns = load_data(\"C:\\\\Users\\\\sayed\\\\Desktop\\\\Dataset\\\\02-14-2018.csv\")\n", - "\n", - "data = data[data[\"Label\"] == \"FTP-BruteForce\"]\n", - "data.columns = [None] * len(data.columns) # revoming column names" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "95f316b5", - "metadata": {}, - "outputs": [], - "source": [ - "#fitting the TGAN model\n", - "tgan= tGAN(data=data, continuous_columns=continuous_columns, batch_size=150, max_epoch = 15)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d0b5a40e", - "metadata": {}, - "outputs": [], - "source": [ - "#Saving the model\n", - "model_path = 'C:\\\\Users\\\\sayed\\\\Desktop\\\\Dataset\\\\models\\\\tGAN_model_firstrun.pkl'\n", - "tgan.save(model_path)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d7343348", - "metadata": { - "scrolled": true - }, - "outputs": [], - "source": [ - "num_samples = 8000\n", - "new_tgan = TGANModel.load(model_path)\n", - "samples = new_tgan.sample(num_samples)\n", - "samples.head()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "20667896", - "metadata": {}, - "outputs": [], - "source": [ - "#assinging back the column names\n", - "samples.columns = data_columns\n", - "data.columns = data_columns" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "4bdf2761", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "8423e0de", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "9ae7fce1", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d2fd753c", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "06a0123c", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "ebf1c5cd", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.16" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/TGAN/mymodel_17_12.pkl b/TGAN/mymodel_17_12.pkl deleted file mode 100644 index e7c968357447bb6e37c68fbe5bbad51f67447bbe..0000000000000000000000000000000000000000 Binary files a/TGAN/mymodel_17_12.pkl and /dev/null differ