Applying Deep Reinforcement Learning to Improve Throughput and Reduce Collision Rate in IEEE 802.11 Networks

 

I will provide the basic simulation code for my paper. If you use my work, please cite my paper in your work.

C. Ke and L. Astuti, "Applying Deep Reinforcement Learning to Improve Throughput and Reduce Collision Rate in IEEE 802.11 Networks," KSII Transactions on Internet and Information Systems, vol. 16, no. 1, pp. 334-349, 2022. DOI: 10.3837/tiis.2022.01.019. (SCI)

 

By the way, the parameters used in the code is for IEEE 802.11b environment. Also, the DQN was implemented using PARL framework. You can download the VM. (user/user, root/ubuntu) You can directly run the following code in this VM.

 

CSMACA

import numpy as np

import random

 

_n=50 # number of nodes

_simTime=2000 # sec

 

rate=11 # 11, 5.5, 2 or 1 Mbps

_cwmin=32

_cwmax=1024

rtsmode=0 #0: data->ack; 1:rts->cts->data->ack

 

SIFS=10

DIFS=50

EIFS=SIFS+DIFS+192+112

SLOT=20

M=1000000

 

_pktSize=1000 # bytes

stat_succ=0

stat_coll=0

stat_pkts=np.zeros(_n)

cw=np.zeros(_n)

bo=np.zeros(_n)

 

now=0.0

 

def init_bo():

    for i in range(0,_n):

        cw[i]=_cwmin

        bo[i]=random.randint(0,_cwmax)%cw[i]

        #print("cw[",i,"]=",cw[i]," bo[",i,"]=",bo[i])

 

def Trts():

    time=192+(20*8)/1

    return time

 

def Tcts():

    time=192+(14*8)/1

    return time

 

def Tdata():

    global rate

    time=192+((_pktSize+28)*8.0)/rate

    return time

 

def Tack():

    time=192+(14*8.0)/1

    return time

 

def getMinBoAllStationsIndex():

    index=0

    min=bo[index]

    for i in range(0,_n):

        if bo[i]<min:

            index=i

            min=bo[index]

 

    return index

 

def getCountMinBoAllStations(min):

    count=0

    for i in range(0,_n):

        if(bo[i]==min):

            count+=1

 

    return count

 

def subMinBoFromAll(min,count):

    global _cwmin,_cwmax

    for i in range(0,_n):

        if bo[i]<min:

            print("<Error> min=",min," bo=",bo[i])

            exit(1)

 

        if(bo[i]>min):

            bo[i]-=min

        elif bo[i]==min:

            if count==1:

                cw[i]=_cwmin

                bo[i] = random.randint(0, _cwmax) % cw[i]

            elif count>1:

                if(cw[i]<_cwmax):

                    cw[i]*=2

                else:

                    cw[i]=_cwmax

                bo[i] = random.randint(0, _cwmax) % cw[i]

            else:

                print("<Error> count=",count)

                exit(1)

 

def setStats(min,index,count):

    global stat_succ,stat_coll

    if count==1:

        stat_pkts[index]+=1

        stat_succ+=1

    else:

        stat_coll+=1

        for i in range(0,_n):

            if bo[i]<min:

                print("<Error> min=", min, " bo=", bo[i])

                exit(1)

            #elif bo[i]==min:

            #    print("Collision with min=", min)

 

def setNow(min,count):

    global M, now, SIFS, DIFS, EIFS, SLOT

    if count==1:

        now+=DIFS/M

        now+=min*SLOT/M        

        if(rtsmode==1):

            now+=Trts()/M;

            now+=SIFS/M

            now+=Tcts()/M

            now+=SIFS/M

        now+=Tdata()/M

        now+=SIFS/M

        now+=Tack()/M

    elif count>1:

      now+=DIFS/M

      now+=min*SLOT/M

      if rtsmode==1:

          now+=Trts()/M;

          now+=EIFS/M

      else:

          now+=Tdata()/M

          now+=EIFS/M

    else:

          print("<Error> count=", count)

          exit(1)

 

def resolve():

    index=getMinBoAllStationsIndex()

    min=bo[index]

    count=getCountMinBoAllStations(min)

 

    setNow(min, count)

    setStats(min, index, count)

    subMinBoFromAll(min, count)

 

def printStats():

    global stat_succ, stat_coll, stat_pkts

    print("\nGeneral Statistics\n")

    print("-"*50)

 

    numPkts=0

    for i in range(0,_n):

        numPkts+=stat_pkts[i]

    print("stat_coll:", stat_coll, "stat_succ:", stat_succ)

    print("Collision rate:", stat_coll/(stat_succ+stat_coll)*100, "%")

    print("Aggregate Throughput:", numPkts*(_pktSize*8.0)/now)

 

def main():

    global now, _simTime

    random.seed(1)

 

    init_bo()

    while now < _simTime:

        resolve()

    printStats()

 

main()

 

Execution

 

 

CCOD-DQN

# -*- coding: UTF-8 -*-

import numpy as np

import random

import os

import parl

from parl import layers 

import copy

import paddle.fluid as fluid

import collections

 

MEMORY_SIZE = 20000 

MEMORY_WARMUP_SIZE = 200 

BATCH_SIZE = 32 

LEARNING_RATE = 0.001 

GAMMA = 0.9

 

pre_time=0.0

pre_stat_succ=0

pre_stat_coll=0

 

_n=50

_simTime=2000

 

rate=11

_cwmin=32

_cwmax=1024

rtsmode=0

 

SIFS=10

DIFS=50

EIFS=SIFS+DIFS+192+112

SLOT=20

M=1000000

 

_pktSize=1000

stat_succ=0

stat_coll=0

stat_pkts=np.zeros(_n)

cw=np.zeros(_n)

bo=np.zeros(_n)

 

now=0.0

 

class Model(parl.Model):

    def __init__(self, act_dim):

        hid1_size = 128

        hid2_size = 128

        self.fc1 = layers.fc(size=hid1_size, act='relu')

        self.fc2 = layers.fc(size=hid2_size, act='relu')

        self.fc3 = layers.fc(size=act_dim, act=None)

 

    def value(self, obs):

        h1 = self.fc1(obs)

        h2 = self.fc2(h1)

        Q = self.fc3(h2)

        return Q

 

class DQN(parl.Algorithm):

    def __init__(self, model, act_dim=None, gamma=None, lr=None):

        self.model = model

        self.target_model = copy.deepcopy(model)

 

        assert isinstance(act_dim, int)

        assert isinstance(gamma, float)

        assert isinstance(lr, float)

        self.act_dim = act_dim

        self.gamma = gamma

        self.lr = lr

 

    def predict(self, obs):

        return self.model.value(obs)

 

    def learn(self, obs, action, reward, next_obs, terminal):

        next_pred_value = self.target_model.value(next_obs)

        best_v = layers.reduce_max(next_pred_value, dim=1)

        best_v.stop_gradient = True

        terminal = layers.cast(terminal, dtype='float32')

        target = reward + (1.0 - terminal) * self.gamma * best_v

 

        pred_value = self.model.value(obs) 

 

        action_onehot = layers.one_hot(action, self.act_dim)

        action_onehot = layers.cast(action_onehot, dtype='float32')

 

       

        pred_action_value = layers.reduce_sum(

            layers.elementwise_mul(action_onehot, pred_value), dim=1)

 

        cost = layers.square_error_cost(pred_action_value, target)

        cost = layers.reduce_mean(cost)

        optimizer = fluid.optimizer.Adam(learning_rate=self.lr)

        optimizer.minimize(cost)

        return cost

 

    def sync_target(self):

        self.model.sync_weights_to(self.target_model)

 

class Agent(parl.Agent):

    def __init__(self,

                 algorithm,

                 obs_dim,

                 act_dim,

                 e_greed=0.1,

                 e_greed_decrement=0):

        assert isinstance(obs_dim, int)

        assert isinstance(act_dim, int)

        self.obs_dim = obs_dim

        self.act_dim = act_dim

        super(Agent, self).__init__(algorithm)

 

        self.global_step = 0

        self.update_target_steps = 200

 

        self.e_greed = e_greed 

        self.e_greed_decrement = e_greed_decrement 

 

    def build_program(self):

        self.pred_program = fluid.Program()

        self.learn_program = fluid.Program()

 

        with fluid.program_guard(self.pred_program):

            obs = layers.data(

                name='obs', shape=[self.obs_dim], dtype='float32')

            self.value = self.alg.predict(obs)

 

        with fluid.program_guard(self.learn_program):

            obs = layers.data(

                name='obs', shape=[self.obs_dim], dtype='float32')

            action = layers.data(name='act', shape=[1], dtype='int32')

            reward = layers.data(name='reward', shape=[], dtype='float32')

            next_obs = layers.data(

                name='next_obs', shape=[self.obs_dim], dtype='float32')

            terminal = layers.data(name='terminal', shape=[], dtype='bool')

            self.cost = self.alg.learn(obs, action, reward, next_obs, terminal)

 

    def sample(self, obs):

        sample = np.random.rand() 

        if sample < self.e_greed:

            act = np.random.randint(self.act_dim)

        else:

            act = self.predict(obs)

        self.e_greed = max(

            0.01, self.e_greed - self.e_greed_decrement)

        return act

 

    def predict(self, obs):

        obs = np.expand_dims(obs, axis=0)

        pred_Q = self.fluid_executor.run(

            self.pred_program,

            feed={'obs': obs.astype('float32')},

            fetch_list=[self.value])[0]

        pred_Q = np.squeeze(pred_Q, axis=0)

        act = np.argmax(pred_Q)

        return act

 

    def learn(self, obs, act, reward, next_obs, terminal):

     

        if self.global_step % self.update_target_steps == 0:

            self.alg.sync_target()

        self.global_step += 1

 

        act = np.expand_dims(act, -1)

        feed = {

            'obs': obs.astype('float32'),

            'act': act.astype('int32'),

            'reward': reward,

            'next_obs': next_obs.astype('float32'),

            'terminal': terminal

        }

        cost = self.fluid_executor.run(

            self.learn_program, feed=feed, fetch_list=[self.cost])[0] 

        return cost

 

class ReplayMemory(object):

    def __init__(self, max_size):

        self.buffer = collections.deque(maxlen=max_size)

 

    def append(self, exp):

        self.buffer.append(exp)

 

    def sample(self, batch_size):

        mini_batch = random.sample(self.buffer, batch_size)

        obs_batch, action_batch, reward_batch, next_obs_batch, done_batch = [], [], [], [], []

 

        for experience in mini_batch:

            s, a, r, s_p, done = experience

            obs_batch.append(s)

            action_batch.append(a)

            reward_batch.append(r)

            next_obs_batch.append(s_p)

            done_batch.append(done)

 

        return np.array(obs_batch).astype('float32'), \

            np.array(action_batch).astype('float32'), np.array(reward_batch).astype('float32'),\

            np.array(next_obs_batch).astype('float32'), np.array(done_batch).astype('float32')

 

    def __len__(self):

        return len(self.buffer)

 

def init_bo():

    for i in range(0,_n):

        cw[i]=_cwmin

        bo[i]=random.randint(0,_cwmax)%cw[i]

       

 

def Trts():

    time=192+(20*8)/1

    return time

 

def Tcts():

    time=192+(14*8)/1

    return time

 

def Tdata():

    global rate

    time=192+((_pktSize+28)*8.0)/rate

    return time

 

def Tack():

    time=192+(14*8.0)/1

    return time

 

def getMinBoAllStationsIndex():

    index=0

    min=bo[index]

    for i in range(0,_n):

        if bo[i]<min:

            index=i

            min=bo[index]

 

    return index

 

def getCountMinBoAllStations(min):

    count=0

    for i in range(0,_n):

        if(bo[i]==min):

            count+=1

 

    return count

 

def subMinBoFromAll(min,count):

    global _cwmin,_cwmax

    for i in range(0,_n):

        if bo[i]<min:

            print("<Error> min=",min," bo=",bo[i])

            exit(1)

 

        if(bo[i]>min):

            bo[i]-=min

        elif bo[i]==min:

            if count==1:

                cw[i]=_cwmin

                bo[i] = random.randint(0, _cwmax) % cw[i]

            elif count>1:

                if(cw[i]<_cwmax):

                    cw[i]*=2

                else:

                    cw[i]=_cwmax

                bo[i] = random.randint(0, _cwmax) % cw[i]

            else:

                print("<Error> count=",count)

                exit(1)

 

def setStats(min,index,count):

    global stat_succ,stat_coll

    if count==1:

        stat_pkts[index]+=1

        stat_succ+=1

    else:

        stat_coll+=1

        for i in range(0,_n):

            if bo[i]<min:

                print("<Error> min=", min, " bo=", bo[i])

                exit(1)

           

           

 

def setNow(min,count):

    global M, now, SIFS, DIFS, EIFS, SLOT

    if count==1:

        now+=DIFS/M

        now+=min*SLOT/M         

        if(rtsmode==1):

            now+=Trts()/M;

            now+=SIFS/M

            now+=Tcts()/M

            now+=SIFS/M

        now+=Tdata()/M

        now+=SIFS/M

        now+=Tack()/M

    elif count>1:

      now+=DIFS/M

      now+=min*SLOT/M

      if rtsmode==1:

          now+=Trts()/M;

          now+=EIFS/M

      else:

          now+=Tdata()/M

          now+=EIFS/M

    else:

          print("<Error> count=", count)

          exit(1)

 

def resolve():

    index=getMinBoAllStationsIndex()

    min=bo[index]

    count=getCountMinBoAllStations(min)

 

    setNow(min, count)

    setStats(min, index, count)

    subMinBoFromAll(min, count)

 

def new_resolve(new_cw):

    global _cwmin,_cwmax

 

    _cwmin=new_cw

    _cwmax=new_cw

 

    index=getMinBoAllStationsIndex()

    min=bo[index]

    count=getCountMinBoAllStations(min)

 

    setNow(min, count)

    setStats(min, index, count)

    subMinBoFromAll(min, count)

 

def printStats():

    print("\nGeneral Statistics\n")

    print("-"*50) 

    print("stat_succ:",stat_succ,"stat_coll:",stat_coll)

    print("Collision rate:", stat_coll/(stat_succ+stat_coll)*100, "%")

    print("Aggregate Throughput:", (stat_succ)*(_pktSize*8.0)/now)

 

def main():

    global _n, now, _simTime, stat_succ, stat_coll, pre_stat_succ, pre_stat_coll, _pktSize, pre_time

    pre_collision_rate=0.0

    random.seed(1)

    np.random.seed(1)

    init_bo()

 

    obs_dim=2

    act_dim=6

    print("obs_dim=",obs_dim,"act_dim=",act_dim)

 

    model = Model(act_dim=act_dim)

    algorithm = DQN(model, act_dim=act_dim, gamma=GAMMA, lr=LEARNING_RATE)

    agent = Agent(

        algorithm,

        obs_dim=obs_dim,

        act_dim=act_dim,

        e_greed=0.1,

        e_greed_decrement=1e-6) 

 

    rpm = ReplayMemory(MEMORY_SIZE)

 

    #save_path = './dnq_model.ckpt'

   

    step=0

    reward=0.0

    state = [0.0, 0.0]

    show=0

    while now < _simTime:

        while len(rpm) < MEMORY_WARMUP_SIZE:

            obs = np.array(state)

            action = agent.sample(obs)

            new_cw = pow(2, 5 + action)

 

            t1=now

            while True:

             new_resolve(new_cw)

             if now - t1 > 0.1:

               break

 

            collision_rate = (stat_coll - pre_stat_coll) / (stat_succ + stat_coll - pre_stat_succ - pre_stat_coll) * 100

            thr = (stat_succ - pre_stat_succ) * (_pktSize * 8.0) / (now - pre_time)

            reward = thr / rate / M

            next_state=[]

            next_state.append(pre_collision_rate)

            next_state.append(collision_rate)

           

            pre_stat_succ = stat_succ

            pre_stat_coll = stat_coll

            pre_collision_rate = collision_rate

            pre_time = now

           

            next_obs=np.array(next_state)

            done = False

            rpm.append((obs, action, reward, next_obs, done))

           

            state = next_state

           

           

 

        if step%5==0:

          (batch_obs, batch_action, batch_reward, batch_next_obs, batch_done) = rpm.sample(BATCH_SIZE)

          train_loss = agent.learn(batch_obs, batch_action, batch_reward, batch_next_obs, batch_done)

         

 

        obs = np.array(state)

        action = agent.sample(obs)

        new_cw = pow(2, 5 + action)

 

        t1=now

        while True:

          new_resolve(new_cw)

          if now - t1 > 0.1:

            break

 

        collision_rate = (stat_coll - pre_stat_coll) / (stat_succ + stat_coll - pre_stat_succ - pre_stat_coll) * 100

        thr = (stat_succ - pre_stat_succ) * (_pktSize * 8.0) / (now - pre_time)

        reward = thr / rate / M

        next_state = []

        next_state.append(pre_collision_rate)  

        next_state.append(collision_rate)

        step += 1

        pre_stat_succ = stat_succ

        pre_stat_coll = stat_coll

        pre_collision_rate = collision_rate

        pre_time = now

       

        next_obs = np.array(next_state)

        done = False

        rpm.append((obs, action, reward, next_obs, done))

        if now > show:

          print("now=", now, "obs=", obs, " action=", action, " next_obs=", next_obs, " reward=", reward)

          show+=100

        state = next_state

 

    printStats()

    #agent.save(save_path)

 

    now=pre_time=0.0

    state = [0.0, 0.0]

    stat_coll=pre_stat_coll=0

    stat_succ=pre_stat_succ=0

    stat_pkts=np.zeros(_n)

    while now < 5:

        obs = np.array(state)

        action = agent.predict(obs)

        new_cw = pow(2, 5 + action)

        print("new_cw=", new_cw)

 

        t1=now

        while True:

          new_resolve(new_cw)

          if now - t1 > 0.1:

            break

 

        collision_rate = (stat_coll - pre_stat_coll) / (stat_succ + stat_coll - pre_stat_succ - pre_stat_coll) * 100

        thr = (stat_succ - pre_stat_succ) * (_pktSize * 8.0) / (now - pre_time)

        next_state = []

        next_state.append(pre_collision_rate)  

        next_state.append(collision_rate)

        pre_stat_succ = stat_succ

        pre_stat_coll = stat_coll

        pre_collision_rate = collision_rate

        pre_time = now

        print("now=", now, " collison rate=",collision_rate," thr=", thr)

        state = next_state

    print("="*25, " Evaluation Result:")

    printStats()

 

main()

 

Execution

…………………..

 

SETL-DQN

import numpy as np

import random

import os

import parl

from parl import layers 

import copy

import paddle.fluid as fluid

import collections

 

MEMORY_SIZE = 20000 

MEMORY_WARMUP_SIZE = 200 

BATCH_SIZE = 32 

LEARNING_RATE = 0.001 

GAMMA = 0.9 

 

pre_time=0.0

pre_stat_succ=0

pre_stat_coll=0

 

_n=50 # number of nodes

_simTime=2000 # sec

 

rate=11 # 11, 5.5, 2 or 1 Mbps

_cwmin=32

_cwmax=1024

rtsmode=0 #0: data->ack; 1:rts->cts->data->ack

cwthreshold=512

 

SIFS=10

DIFS=50

EIFS=SIFS+DIFS+192+112

SLOT=20

M=1000000

 

_pktSize=1000 # bytes

stat_succ=0

stat_coll=0

stat_pkts=np.zeros(_n)

cw=np.zeros(_n)

bo=np.zeros(_n)

 

now=0.0

 

class Model(parl.Model):

    def __init__(self, act_dim):

        hid1_size = 128

        hid2_size = 128

 

        self.fc1 = layers.fc(size=hid1_size, act='relu')

        self.fc2 = layers.fc(size=hid2_size, act='relu')

        self.fc3 = layers.fc(size=act_dim, act=None)

 

    def value(self, obs):

        h1 = self.fc1(obs)

        h2 = self.fc2(h1)

        Q = self.fc3(h2)

        return Q

 

class DQN(parl.Algorithm):

    def __init__(self, model, act_dim=None, gamma=None, lr=None):

        self.model = model

        self.target_model = copy.deepcopy(model)

 

        assert isinstance(act_dim, int)

        assert isinstance(gamma, float)

        assert isinstance(lr, float)

        self.act_dim = act_dim

        self.gamma = gamma

        self.lr = lr

 

    def predict(self, obs):

        return self.model.value(obs)

 

    def learn(self, obs, action, reward, next_obs, terminal):

 

        next_pred_value = self.target_model.value(next_obs)

        best_v = layers.reduce_max(next_pred_value, dim=1)

        best_v.stop_gradient = True 

        terminal = layers.cast(terminal, dtype='float32')

        target = reward + (1.0 - terminal) * self.gamma * best_v

 

        pred_value = self.model.value(obs) 

 

        action_onehot = layers.one_hot(action, self.act_dim)

        action_onehot = layers.cast(action_onehot, dtype='float32')

 

        pred_action_value = layers.reduce_sum(

            layers.elementwise_mul(action_onehot, pred_value), dim=1)

 

        cost = layers.square_error_cost(pred_action_value, target)

        cost = layers.reduce_mean(cost)

        optimizer = fluid.optimizer.Adam(learning_rate=self.lr) 

        optimizer.minimize(cost)

        return cost

 

    def sync_target(self):

         self.model.sync_weights_to(self.target_model)

 

class Agent(parl.Agent):

    def __init__(self,

                 algorithm,

                 obs_dim,

                 act_dim,

                 e_greed=0.1,

                 e_greed_decrement=0):

        assert isinstance(obs_dim, int)

        assert isinstance(act_dim, int)

        self.obs_dim = obs_dim

        self.act_dim = act_dim

        super(Agent, self).__init__(algorithm)

 

        self.global_step = 0

        self.update_target_steps = 200

 

        self.e_greed = e_greed

        self.e_greed_decrement = e_greed_decrement 

 

    def build_program(self):

        self.pred_program = fluid.Program()

        self.learn_program = fluid.Program()

 

        with fluid.program_guard(self.pred_program):

            obs = layers.data(

                name='obs', shape=[self.obs_dim], dtype='float32')

            self.value = self.alg.predict(obs)

 

        with fluid.program_guard(self.learn_program): 

            obs = layers.data(

                name='obs', shape=[self.obs_dim], dtype='float32')

            action = layers.data(name='act', shape=[1], dtype='int32')

            reward = layers.data(name='reward', shape=[], dtype='float32')

            next_obs = layers.data(

                name='next_obs', shape=[self.obs_dim], dtype='float32')

            terminal = layers.data(name='terminal', shape=[], dtype='bool')

            self.cost = self.alg.learn(obs, action, reward, next_obs, terminal)

 

    def sample(self, obs):

        sample = np.random.rand() 

        if sample < self.e_greed:

            act = np.random.randint(self.act_dim)

        else:

            act = self.predict(obs) 

        self.e_greed = max(

            0.01, self.e_greed - self.e_greed_decrement) 

        return act

 

    def predict(self, obs):

        obs = np.expand_dims(obs, axis=0)

        pred_Q = self.fluid_executor.run(

            self.pred_program,

            feed={'obs': obs.astype('float32')},

            fetch_list=[self.value])[0]

        pred_Q = np.squeeze(pred_Q, axis=0)

        act = np.argmax(pred_Q)

        return act

 

    def learn(self, obs, act, reward, next_obs, terminal):

        if self.global_step % self.update_target_steps == 0:

            self.alg.sync_target()

        self.global_step += 1

 

        act = np.expand_dims(act, -1)

        feed = {

            'obs': obs.astype('float32'),

            'act': act.astype('int32'),

            'reward': reward,

            'next_obs': next_obs.astype('float32'),

            'terminal': terminal

        }

        cost = self.fluid_executor.run(

            self.learn_program, feed=feed, fetch_list=[self.cost])[0]

        return cost

 

class ReplayMemory(object):

    def __init__(self, max_size):

        self.buffer = collections.deque(maxlen=max_size)

 

    def append(self, exp):

        self.buffer.append(exp)

 

    def sample(self, batch_size):

        mini_batch = random.sample(self.buffer, batch_size)

        obs_batch, action_batch, reward_batch, next_obs_batch, done_batch = [], [], [], [], []

 

        for experience in mini_batch:

            s, a, r, s_p, done = experience

            obs_batch.append(s)

            action_batch.append(a)

            reward_batch.append(r)

            next_obs_batch.append(s_p)

            done_batch.append(done)

 

        return np.array(obs_batch).astype('float32'), \

            np.array(action_batch).astype('float32'), np.array(reward_batch).astype('float32'),\

            np.array(next_obs_batch).astype('float32'), np.array(done_batch).astype('float32')

 

    def __len__(self):

        return len(self.buffer)

 

def init_bo():

    for i in range(0,_n):

        cw[i]=_cwmin

        bo[i]=random.randint(0,_cwmax)%cw[i]

        #print("cw[",i,"]=",cw[i]," bo[",i,"]=",bo[i])

 

def Trts():

    time=192+(20*8)/1

    return time

 

def Tcts():

    time=192+(14*8)/1

    return time

 

def Tdata():

    global rate

    time=192+((_pktSize+28)*8.0)/rate

    return time

 

def Tack():

    time=192+(14*8.0)/1

    return time

 

def getMinBoAllStationsIndex():

    index=0

    min=bo[index]

    for i in range(0,_n):

        if bo[i]<min:

            index=i

            min=bo[index]

 

    return index

 

def getCountMinBoAllStations(min):

    count=0

    for i in range(0,_n):

        if(bo[i]==min):

            count+=1

 

    return count

 

def subMinBoFromAll(min,count):

    global _cwmin, _cwmax, cwthreshold

    for i in range(0,_n):

        if bo[i]<min:

            print("<Error> min=",min," bo=",bo[i])

            exit(1)

 

        if(bo[i]>min):

            bo[i]-=min

        elif bo[i]==min:

            if count==1:

                if (cw[i]> cwthreshold):

                  cw[i]-=32

                elif (cw[i]>_cwmin):

                  cw[i]= cw[i]/2

                else:

                  cw[i]=_cwmin;

                bo[i] = random.randint(0, _cwmax) % cw[i]

            elif count>1:

                if (cw[i]<cwthreshold):

                  cw[i]*=2

                elif (cw[i]<_cwmax):

                  cw[i]+=32;         

                else:

                  cw[i]=_cwmax

                bo[i] = random.randint(0, _cwmax) % cw[i]

            else:

                print("<Error> count=",count)

                exit(1)

 

def setStats(min,index,count):

    global stat_succ,stat_coll

    if count==1:

        stat_pkts[index]+=1

        stat_succ+=1

    else:

        stat_coll+=1

        for i in range(0,_n):

            if bo[i]<min:

                print("<Error> min=", min, " bo=", bo[i])

                exit(1)

            #elif bo[i]==min:

            #    print("Collision with min=", min)

 

def setNow(min,count):

    global M, now, SIFS, DIFS, EIFS, SLOT

    if rtsmode==1:

        now+=Trts()/M;

 

    if count==1:

        if(rtsmode==1):

            now+=SIFS/M

            now+=Tcts()/M

            now+=SIFS/M

        now+=DIFS/M

        now+=min*SLOT/M

        now+=Tdata()/M

        now+=SIFS/M

        now+=Tack()/M

    elif count>1:

      if rtsmode==1:

          now+=EIFS/M

          now+=min*SLOT/M

      else:

          now+=EIFS/M

          now+=min*SLOT/M

          now+=Tdata()/M

    else:

          print("<Error> count=", count)

          exit(1)

 

def new_resolve(new_cwthreshold):

    global cwthreshold

    cwthreshold=new_cwthreshold

 

    index=getMinBoAllStationsIndex()

    min=bo[index]

    count=getCountMinBoAllStations(min)

 

    setNow(min, count)

    setStats(min, index, count)

    subMinBoFromAll(min, count)

 

def printStats():

    print("\nGeneral Statistics\n")

    print("-"*50)

    print("stat_succ:",stat_succ,"stat_coll:",stat_coll)

    print("Collision rate:", stat_coll/(stat_succ+stat_coll)*100, "%")

    print("Aggregate Throughput:", (stat_succ)*(_pktSize*8.0)/now)

 

def main():

    global _n, now, _simTime, stat_succ, stat_coll, pre_stat_succ, pre_stat_coll, _pktSize, pre_time

    pre_collision_rate=0.0

    random.seed(1)

    np.random.seed(1)

    init_bo()

 

    obs_dim=2

    act_dim=8

    print("obs_dim=",obs_dim,"act_dim=",act_dim)

 

    model = Model(act_dim=act_dim)

    algorithm = DQN(model, act_dim=act_dim, gamma=GAMMA, lr=LEARNING_RATE)

    agent = Agent(

        algorithm,

        obs_dim=obs_dim,

        act_dim=act_dim,

        e_greed=0.1, 

        e_greed_decrement=1e-6)

 

    rpm = ReplayMemory(MEMORY_SIZE) 

 

    #save_path = './dnq_model.ckpt'

    #if os.path.isfile(save_path):

    #    agent.restore(save_path)

 

    step=0

    reward=0.0

    state = [0.0, 0.0]

    show=0

    while now < _simTime:

        while len(rpm) < MEMORY_WARMUP_SIZE:

            obs = np.array(state)

            action = agent.sample(obs)

            new_cwthreshold = 128*(1+action)

 

            t1=now

            while True:

             new_resolve(new_cwthreshold)

             if now - t1 > 0.1:

               break

              

            collision_rate = (stat_coll - pre_stat_coll) / (stat_succ + stat_coll - pre_stat_succ - pre_stat_coll) * 100

            thr = (stat_succ - pre_stat_succ) * (_pktSize * 8.0) / (now - pre_time)

            reward = thr / rate / M

            next_state=[]

            next_state.append(collision_rate)

            next_state.append(pre_collision_rate)

            #step += 1

            pre_stat_succ = stat_succ

            pre_stat_coll = stat_coll

            pre_collision_rate = collision_rate

            pre_time = now

            #print("now=", now, " collison rate=",collision_rate," thr=", thr)

            next_obs=np.array(next_state)

            done = False

            rpm.append((obs, action, reward, next_obs, done))

            #print("len(rpm)=", len(rpm), "obs=", obs, " action=", action, " next_obs=", next_obs, " reward=", reward)

            state = next_state

            #if step>=5:

            #    exit()

 

        if step%5==0:

          (batch_obs, batch_action, batch_reward, batch_next_obs, batch_done) = rpm.sample(BATCH_SIZE)

          train_loss = agent.learn(batch_obs, batch_action, batch_reward, batch_next_obs, batch_done)

          #print("agent.learn() is called, train_loss=", train_loss)

 

        obs = np.array(state)

        action = agent.sample(obs)

        new_cwthreshold = 128*(1+action)

 

        t1=now

        while True:

          new_resolve(new_cwthreshold)

          if now - t1 > 0.1:

            break

 

        collision_rate = (stat_coll - pre_stat_coll) / (stat_succ + stat_coll - pre_stat_succ - pre_stat_coll) * 100

        thr = (stat_succ - pre_stat_succ) * (_pktSize * 8.0) / (now - pre_time)

        reward = thr / rate / M

        next_state = []

        next_state.append(collision_rate)

        next_state.append(pre_collision_rate)

        step += 1

        pre_stat_succ = stat_succ

        pre_stat_coll = stat_coll

        pre_collision_rate = collision_rate

        pre_time = now

        # print("now=", now, " collison rate=",collision_rate," thr=", thr)

        next_obs = np.array(next_state)

        done = False

        rpm.append((obs, action, reward, next_obs, done))

        if now > show:

          print("now=", now, "obs=", obs, " action=", action, " next_obs=", next_obs, " reward=", reward)

          show+=100

        state = next_state

 

    printStats()

    #agent.save(save_path)

 

    #evalution

    now=pre_time=0.0

    state = [0.0, 0.0]

    stat_coll=pre_stat_coll=0

    stat_succ=pre_stat_succ=0

    stat_pkts=np.zeros(_n)

    while now < 5:

        obs = np.array(state)

        action = agent.predict(obs)

        new_cwthreshold = 128*(1+action)

        print("new_cwthreshold=", new_cwthreshold)

 

        t1=now

        while True:

          new_resolve(new_cwthreshold)

          if now - t1 > 0.1:

            break

 

        collision_rate = (stat_coll - pre_stat_coll) / (stat_succ + stat_coll - pre_stat_succ - pre_stat_coll) * 100

        thr = (stat_succ - pre_stat_succ) * (_pktSize * 8.0) / (now - pre_time)

        next_state = []

        next_state.append(collision_rate)

        next_state.append(pre_collision_rate)

        pre_stat_succ = stat_succ

        pre_stat_coll = stat_coll

        pre_collision_rate = collision_rate

        pre_time = now

        print("now=", now, " collison rate=",collision_rate," thr=", thr)

        state = next_state

    print("="*25, " Evaluation Result:")

    printStats()

 

main()

 

Execution

……………………….

 

Last Modified: 2022/2/5

 

Dr. Chih-Heng Ke

Department of Computer Science and Information Engineering, National Quemoy University, Kinmen, Taiwan

Email: smallko@gmail.com