Jakub Grajciar | 07363a4 | 2020-04-02 10:02:17 +0200 | [diff] [blame] | 1 | /* |
| 2 | *------------------------------------------------------------------ |
| 3 | * Copyright (c) 2020 Cisco and/or its affiliates. |
| 4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | * you may not use this file except in compliance with the License. |
| 6 | * You may obtain a copy of the License at: |
| 7 | * |
| 8 | * http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | * |
| 10 | * Unless required by applicable law or agreed to in writing, software |
| 11 | * distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | * See the License for the specific language governing permissions and |
| 14 | * limitations under the License. |
| 15 | *------------------------------------------------------------------ |
| 16 | */ |
| 17 | |
| 18 | // Package memif provides the implementation of shared memory interface (memif). |
| 19 | // |
| 20 | // Memif network interfaces communicate using UNIX domain socket. This socket |
| 21 | // must be first created using NewSocket(). Then interfaces can be added |
| 22 | // to this socket using NewInterface(). To start communication on each socket |
| 23 | // socket.StartPolling() must be called. socket.StopPolling() will stop |
| 24 | // the communication. When the interface changes link status Connected and |
| 25 | // Disconencted callbacks set in Arguments for each interface are called |
| 26 | // respectively. Once the interface is connected rx and tx queues can be |
| 27 | // aquired using interface.GetRxQueue() and interface.GetTxQueue(). |
| 28 | // Packets can be transmitted by calling queue.ReadPacket() on rx queues and |
| 29 | // queue.WritePacket() on tx queues. If the interface is disconnected |
| 30 | // queue.ReadPacket() and queue.WritePacket() MUST not be called. |
| 31 | // |
| 32 | // Data transmission is backed by shared memory. The driver works in |
| 33 | // promiscuous mode only. |
| 34 | package memif |
| 35 | |
| 36 | import ( |
| 37 | "container/list" |
| 38 | "fmt" |
| 39 | "os" |
| 40 | "syscall" |
| 41 | ) |
| 42 | |
| 43 | const ( |
| 44 | DefaultSocketFilename = "/run/vpp/memif.sock" |
| 45 | DefaultNumQueuePairs = 1 |
| 46 | DefaultLog2RingSize = 10 |
| 47 | DefaultPacketBufferSize = 2048 |
| 48 | ) |
| 49 | |
| 50 | const mfd_allow_sealing = 2 |
| 51 | const sys_memfd_create = 319 |
| 52 | const f_add_seals = 1033 |
| 53 | const f_seal_shrink = 0x0002 |
| 54 | |
| 55 | const efd_nonblock = 04000 |
| 56 | |
| 57 | // ConnectedFunc is a callback called when an interface is connected |
| 58 | type ConnectedFunc func(i *Interface) error |
| 59 | |
| 60 | // DisconnectedFunc is a callback called when an interface is disconnected |
| 61 | type DisconnectedFunc func(i *Interface) error |
| 62 | |
| 63 | // MemoryConfig represents shared memory configuration |
| 64 | type MemoryConfig struct { |
| 65 | NumQueuePairs uint16 // number of queue pairs |
| 66 | Log2RingSize uint8 // ring size as log2 |
| 67 | PacketBufferSize uint32 // size of single packet buffer |
| 68 | } |
| 69 | |
| 70 | // Arguments represent interface configuration |
| 71 | type Arguments struct { |
| 72 | Id uint32 // Interface identifier unique across socket. Used to identify peer interface when connecting |
| 73 | IsMaster bool // Interface role master/slave |
Nathan Skrzypczak | 176373c | 2021-05-07 19:39:07 +0200 | [diff] [blame] | 74 | Mode interfaceMode |
Jakub Grajciar | 07363a4 | 2020-04-02 10:02:17 +0200 | [diff] [blame] | 75 | Name string |
| 76 | Secret [24]byte // optional parameter, secrets of the interfaces must match if they are to connect |
| 77 | MemoryConfig MemoryConfig |
| 78 | ConnectedFunc ConnectedFunc // callback called when interface changes status to connected |
| 79 | DisconnectedFunc DisconnectedFunc // callback called when interface changes status to disconnected |
| 80 | PrivateData interface{} // private data used by client program |
| 81 | } |
| 82 | |
| 83 | // memoryRegion represents a shared memory mapped file |
| 84 | type memoryRegion struct { |
| 85 | data []byte |
| 86 | size uint64 |
| 87 | fd int |
| 88 | packetBufferOffset uint32 |
| 89 | } |
| 90 | |
| 91 | // Queue represents rx or tx queue |
| 92 | type Queue struct { |
| 93 | ring *ring |
| 94 | i *Interface |
| 95 | lastHead uint16 |
| 96 | lastTail uint16 |
| 97 | interruptFd int |
| 98 | } |
| 99 | |
| 100 | // Interface represents memif network interface |
| 101 | type Interface struct { |
| 102 | args Arguments |
| 103 | run MemoryConfig |
| 104 | privateData interface{} |
| 105 | listRef *list.Element |
| 106 | socket *Socket |
| 107 | cc *controlChannel |
| 108 | remoteName string |
| 109 | peerName string |
| 110 | regions []memoryRegion |
| 111 | txQueues []Queue |
| 112 | rxQueues []Queue |
| 113 | } |
| 114 | |
| 115 | // IsMaster returns true if the interfaces role is master, else returns false |
| 116 | func (i *Interface) IsMaster() bool { |
| 117 | return i.args.IsMaster |
| 118 | } |
| 119 | |
| 120 | // GetRemoteName returns the name of the application on which the peer |
| 121 | // interface exists |
| 122 | func (i *Interface) GetRemoteName() string { |
| 123 | return i.remoteName |
| 124 | } |
| 125 | |
| 126 | // GetPeerName returns peer interfaces name |
| 127 | func (i *Interface) GetPeerName() string { |
| 128 | return i.peerName |
| 129 | } |
| 130 | |
| 131 | // GetName returens interfaces name |
| 132 | func (i *Interface) GetName() string { |
| 133 | return i.args.Name |
| 134 | } |
| 135 | |
| 136 | // GetMemoryConfig returns interfaces active memory config. |
| 137 | // If interface is not connected the config is invalid. |
| 138 | func (i *Interface) GetMemoryConfig() MemoryConfig { |
| 139 | return i.run |
| 140 | } |
| 141 | |
| 142 | // GetRxQueue returns an rx queue specified by queue index |
| 143 | func (i *Interface) GetRxQueue(qid int) (*Queue, error) { |
| 144 | if qid >= len(i.rxQueues) { |
| 145 | return nil, fmt.Errorf("Invalid Queue index") |
| 146 | } |
| 147 | return &i.rxQueues[qid], nil |
| 148 | } |
| 149 | |
| 150 | // GetRxQueue returns a tx queue specified by queue index |
| 151 | func (i *Interface) GetTxQueue(qid int) (*Queue, error) { |
| 152 | if qid >= len(i.txQueues) { |
| 153 | return nil, fmt.Errorf("Invalid Queue index") |
| 154 | } |
| 155 | return &i.txQueues[qid], nil |
| 156 | } |
| 157 | |
| 158 | // GetEventFd returns queues interrupt event fd |
| 159 | func (q *Queue) GetEventFd() (int, error) { |
| 160 | return q.interruptFd, nil |
| 161 | } |
| 162 | |
| 163 | // GetFilename returns sockets filename |
| 164 | func (socket *Socket) GetFilename() string { |
| 165 | return socket.filename |
| 166 | } |
| 167 | |
| 168 | // close closes the queue |
| 169 | func (q *Queue) close() { |
| 170 | syscall.Close(q.interruptFd) |
| 171 | } |
| 172 | |
| 173 | // IsConnecting returns true if the interface is connecting |
| 174 | func (i *Interface) IsConnecting() bool { |
| 175 | if i.cc != nil { |
| 176 | return true |
| 177 | } |
| 178 | return false |
| 179 | } |
| 180 | |
| 181 | // IsConnected returns true if the interface is connected |
| 182 | func (i *Interface) IsConnected() bool { |
| 183 | if i.cc != nil && i.cc.isConnected { |
| 184 | return true |
| 185 | } |
| 186 | return false |
| 187 | } |
| 188 | |
| 189 | // Disconnect disconnects the interface |
| 190 | func (i *Interface) Disconnect() (err error) { |
| 191 | if i.cc != nil { |
| 192 | // close control and disconenct interface |
| 193 | return i.cc.close(true, "Interface disconnected") |
| 194 | } |
| 195 | return nil |
| 196 | } |
| 197 | |
| 198 | // disconnect finalizes interface disconnection |
| 199 | func (i *Interface) disconnect() (err error) { |
| 200 | if i.cc == nil { // disconnected |
| 201 | return nil |
| 202 | } |
| 203 | |
| 204 | err = i.args.DisconnectedFunc(i) |
| 205 | if err != nil { |
| 206 | return fmt.Errorf("DisconnectedFunc: ", err) |
| 207 | } |
| 208 | |
| 209 | for _, q := range i.txQueues { |
| 210 | q.close() |
| 211 | } |
| 212 | i.txQueues = []Queue{} |
| 213 | |
| 214 | for _, q := range i.rxQueues { |
| 215 | q.close() |
| 216 | } |
| 217 | i.rxQueues = []Queue{} |
| 218 | |
| 219 | // unmap regions |
| 220 | for _, r := range i.regions { |
| 221 | err = syscall.Munmap(r.data) |
| 222 | if err != nil { |
| 223 | return err |
| 224 | } |
| 225 | err = syscall.Close(r.fd) |
| 226 | if err != nil { |
| 227 | return err |
| 228 | } |
| 229 | } |
| 230 | i.regions = nil |
| 231 | i.cc = nil |
| 232 | |
| 233 | i.peerName = "" |
| 234 | i.remoteName = "" |
| 235 | |
| 236 | return nil |
| 237 | } |
| 238 | |
| 239 | // Delete deletes the interface |
| 240 | func (i *Interface) Delete() (err error) { |
| 241 | i.Disconnect() |
| 242 | |
| 243 | // remove referance on socket |
| 244 | i.socket.interfaceList.Remove(i.listRef) |
| 245 | i = nil |
| 246 | |
| 247 | return nil |
| 248 | } |
| 249 | |
| 250 | // GetSocket returns the socket the interface belongs to |
| 251 | func (i *Interface) GetSocket() *Socket { |
| 252 | return i.socket |
| 253 | } |
| 254 | |
| 255 | // GetPrivateDate returns interfaces private data |
| 256 | func (i *Interface) GetPrivateData() interface{} { |
| 257 | return i.args.PrivateData |
| 258 | } |
| 259 | |
| 260 | // GetId returns interfaces id |
| 261 | func (i *Interface) GetId() uint32 { |
| 262 | return i.args.Id |
| 263 | } |
| 264 | |
| 265 | // RoleToString returns 'Master' if isMaster os true, else returns 'Slave' |
| 266 | func RoleToString(isMaster bool) string { |
| 267 | if isMaster { |
| 268 | return "Master" |
| 269 | } |
| 270 | return "Slave" |
| 271 | } |
| 272 | |
| 273 | // RequestConnection is used by slave interface to connect to a socket and |
| 274 | // create a control channel |
| 275 | func (i *Interface) RequestConnection() error { |
| 276 | if i.IsMaster() { |
| 277 | return fmt.Errorf("Only slave can request connection") |
| 278 | } |
| 279 | // create socket |
| 280 | fd, err := syscall.Socket(syscall.AF_UNIX, syscall.SOCK_SEQPACKET, 0) |
| 281 | if err != nil { |
| 282 | return fmt.Errorf("Failed to create UNIX domain socket: %v", err) |
| 283 | } |
| 284 | usa := &syscall.SockaddrUnix{Name: i.socket.filename} |
| 285 | |
| 286 | // Connect to listener socket |
| 287 | err = syscall.Connect(fd, usa) |
| 288 | if err != nil { |
| 289 | return fmt.Errorf("Failed to connect socket %s : %v", i.socket.filename, err) |
| 290 | } |
| 291 | |
| 292 | // Create control channel |
| 293 | i.cc, err = i.socket.addControlChannel(fd, i) |
| 294 | if err != nil { |
| 295 | return fmt.Errorf("Failed to create control channel: %v", err) |
| 296 | } |
| 297 | |
| 298 | return nil |
| 299 | } |
| 300 | |
| 301 | // NewInterface returns a new memif network interface. When creating an interface |
| 302 | // it's id must be unique across socket with the exception of loopback interface |
| 303 | // in which case the id is the same but role differs |
| 304 | func (socket *Socket) NewInterface(args *Arguments) (*Interface, error) { |
| 305 | var err error |
| 306 | // make sure the ID is unique on this socket |
| 307 | for elt := socket.interfaceList.Front(); elt != nil; elt = elt.Next() { |
| 308 | i, ok := elt.Value.(*Interface) |
| 309 | if ok { |
| 310 | if i.args.Id == args.Id && i.args.IsMaster == args.IsMaster { |
| 311 | return nil, fmt.Errorf("Interface with id %u role %s already exists on this socket", args.Id, RoleToString(args.IsMaster)) |
| 312 | } |
| 313 | } |
| 314 | } |
| 315 | |
| 316 | // copy interface configuration |
| 317 | i := Interface{ |
| 318 | args: *args, |
| 319 | } |
| 320 | // set default values |
| 321 | if i.args.MemoryConfig.NumQueuePairs == 0 { |
| 322 | i.args.MemoryConfig.NumQueuePairs = DefaultNumQueuePairs |
| 323 | } |
| 324 | if i.args.MemoryConfig.Log2RingSize == 0 { |
| 325 | i.args.MemoryConfig.Log2RingSize = DefaultLog2RingSize |
| 326 | } |
| 327 | if i.args.MemoryConfig.PacketBufferSize == 0 { |
| 328 | i.args.MemoryConfig.PacketBufferSize = DefaultPacketBufferSize |
| 329 | } |
| 330 | |
| 331 | i.socket = socket |
| 332 | |
| 333 | // append interface to the list |
| 334 | i.listRef = socket.interfaceList.PushBack(&i) |
| 335 | |
| 336 | if i.args.IsMaster { |
| 337 | if socket.listener == nil { |
| 338 | err = socket.addListener() |
| 339 | if err != nil { |
| 340 | return nil, fmt.Errorf("Failed to create listener channel: %s", err) |
| 341 | } |
| 342 | } |
| 343 | } |
| 344 | |
| 345 | return &i, nil |
| 346 | } |
| 347 | |
| 348 | // eventFd returns an eventfd (SYS_EVENTFD2) |
| 349 | func eventFd() (efd int, err error) { |
| 350 | u_efd, _, errno := syscall.Syscall(syscall.SYS_EVENTFD2, uintptr(0), uintptr(efd_nonblock), 0) |
| 351 | if errno != 0 { |
| 352 | return -1, os.NewSyscallError("eventfd", errno) |
| 353 | } |
| 354 | return int(u_efd), nil |
| 355 | } |
| 356 | |
| 357 | // addRegions creates and adds a new memory region to the interface (slave only) |
| 358 | func (i *Interface) addRegion(hasPacketBuffers bool, hasRings bool) (err error) { |
| 359 | var r memoryRegion |
| 360 | |
| 361 | if hasRings { |
| 362 | r.packetBufferOffset = uint32((i.run.NumQueuePairs + i.run.NumQueuePairs) * (ringSize + descSize*(1<<i.run.Log2RingSize))) |
| 363 | } else { |
| 364 | r.packetBufferOffset = 0 |
| 365 | } |
| 366 | |
| 367 | if hasPacketBuffers { |
| 368 | r.size = uint64(r.packetBufferOffset + i.run.PacketBufferSize*uint32(1<<i.run.Log2RingSize)*uint32(i.run.NumQueuePairs+i.run.NumQueuePairs)) |
| 369 | } else { |
| 370 | r.size = uint64(r.packetBufferOffset) |
| 371 | } |
| 372 | |
| 373 | r.fd, err = memfdCreate() |
| 374 | if err != nil { |
| 375 | return err |
| 376 | } |
| 377 | |
| 378 | _, _, errno := syscall.Syscall(syscall.SYS_FCNTL, uintptr(r.fd), uintptr(f_add_seals), uintptr(f_seal_shrink)) |
| 379 | if errno != 0 { |
| 380 | syscall.Close(r.fd) |
| 381 | return fmt.Errorf("memfdCreate: %s", os.NewSyscallError("fcntl", errno)) |
| 382 | } |
| 383 | |
| 384 | err = syscall.Ftruncate(r.fd, int64(r.size)) |
| 385 | if err != nil { |
| 386 | syscall.Close(r.fd) |
| 387 | r.fd = -1 |
| 388 | return fmt.Errorf("memfdCreate: %s", err) |
| 389 | } |
| 390 | |
| 391 | r.data, err = syscall.Mmap(r.fd, 0, int(r.size), syscall.PROT_READ|syscall.PROT_WRITE, syscall.MAP_SHARED) |
| 392 | if err != nil { |
| 393 | return fmt.Errorf("addRegion: %s", err) |
| 394 | } |
| 395 | |
| 396 | i.regions = append(i.regions, r) |
| 397 | |
| 398 | return nil |
| 399 | } |
| 400 | |
| 401 | // initializeRegions initializes interfaces regions (slave only) |
| 402 | func (i *Interface) initializeRegions() (err error) { |
| 403 | |
| 404 | err = i.addRegion(true, true) |
| 405 | if err != nil { |
| 406 | return fmt.Errorf("initializeRegions: %s", err) |
| 407 | } |
| 408 | |
| 409 | return nil |
| 410 | } |
| 411 | |
| 412 | // initializeQueues initializes interfaces queues (slave only) |
| 413 | func (i *Interface) initializeQueues() (err error) { |
| 414 | var q *Queue |
| 415 | var desc descBuf |
| 416 | var slot int |
| 417 | |
| 418 | desc = newDescBuf() |
| 419 | desc.setFlags(0) |
| 420 | desc.setRegion(0) |
| 421 | desc.setLength(int(i.run.PacketBufferSize)) |
| 422 | |
| 423 | for qid := 0; qid < int(i.run.NumQueuePairs); qid++ { |
| 424 | /* TX */ |
| 425 | q = &Queue{ |
| 426 | ring: i.newRing(0, ringTypeS2M, qid), |
| 427 | lastHead: 0, |
| 428 | lastTail: 0, |
| 429 | i: i, |
| 430 | } |
| 431 | q.ring.setCookie(cookie) |
| 432 | q.ring.setFlags(1) |
| 433 | q.interruptFd, err = eventFd() |
| 434 | if err != nil { |
| 435 | return err |
| 436 | } |
| 437 | q.putRing() |
| 438 | i.txQueues = append(i.txQueues, *q) |
| 439 | |
| 440 | for j := 0; j < q.ring.size; j++ { |
| 441 | slot = qid*q.ring.size + j |
| 442 | desc.setOffset(int(i.regions[0].packetBufferOffset + uint32(slot)*i.run.PacketBufferSize)) |
| 443 | q.putDescBuf(slot, desc) |
| 444 | } |
| 445 | } |
| 446 | for qid := 0; qid < int(i.run.NumQueuePairs); qid++ { |
| 447 | /* RX */ |
| 448 | q = &Queue{ |
| 449 | ring: i.newRing(0, ringTypeM2S, qid), |
| 450 | lastHead: 0, |
| 451 | lastTail: 0, |
| 452 | i: i, |
| 453 | } |
| 454 | q.ring.setCookie(cookie) |
| 455 | q.ring.setFlags(1) |
| 456 | q.interruptFd, err = eventFd() |
| 457 | if err != nil { |
| 458 | return err |
| 459 | } |
| 460 | q.putRing() |
| 461 | i.rxQueues = append(i.rxQueues, *q) |
| 462 | |
| 463 | for j := 0; j < q.ring.size; j++ { |
| 464 | slot = qid*q.ring.size + j |
| 465 | desc.setOffset(int(i.regions[0].packetBufferOffset + uint32(slot)*i.run.PacketBufferSize)) |
| 466 | q.putDescBuf(slot, desc) |
| 467 | } |
| 468 | } |
| 469 | |
| 470 | return nil |
| 471 | } |
| 472 | |
| 473 | // connect finalizes interface connection |
| 474 | func (i *Interface) connect() (err error) { |
| 475 | for rid, _ := range i.regions { |
| 476 | r := &i.regions[rid] |
| 477 | if r.data == nil { |
| 478 | r.data, err = syscall.Mmap(r.fd, 0, int(r.size), syscall.PROT_READ|syscall.PROT_WRITE, syscall.MAP_SHARED) |
| 479 | if err != nil { |
| 480 | return fmt.Errorf("Mmap: %s", err) |
| 481 | } |
| 482 | } |
| 483 | } |
| 484 | |
| 485 | for _, q := range i.txQueues { |
| 486 | q.updateRing() |
| 487 | |
| 488 | if q.ring.getCookie() != cookie { |
| 489 | return fmt.Errorf("Wrong cookie") |
| 490 | } |
| 491 | |
| 492 | q.lastHead = 0 |
| 493 | q.lastTail = 0 |
| 494 | } |
| 495 | |
| 496 | for _, q := range i.rxQueues { |
| 497 | q.updateRing() |
| 498 | |
| 499 | if q.ring.getCookie() != cookie { |
| 500 | return fmt.Errorf("Wrong cookie") |
| 501 | } |
| 502 | |
| 503 | q.lastHead = 0 |
| 504 | q.lastTail = 0 |
| 505 | } |
| 506 | |
| 507 | return i.args.ConnectedFunc(i) |
| 508 | } |