Jakub Grajciar | 07363a4 | 2020-04-02 10:02:17 +0200 | [diff] [blame] | 1 | /* |
| 2 | *------------------------------------------------------------------ |
| 3 | * Copyright (c) 2020 Cisco and/or its affiliates. |
| 4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | * you may not use this file except in compliance with the License. |
| 6 | * You may obtain a copy of the License at: |
| 7 | * |
| 8 | * http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | * |
| 10 | * Unless required by applicable law or agreed to in writing, software |
| 11 | * distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | * See the License for the specific language governing permissions and |
| 14 | * limitations under the License. |
| 15 | *------------------------------------------------------------------ |
| 16 | */ |
| 17 | |
| 18 | // Package memif provides the implementation of shared memory interface (memif). |
| 19 | // |
| 20 | // Memif network interfaces communicate using UNIX domain socket. This socket |
| 21 | // must be first created using NewSocket(). Then interfaces can be added |
| 22 | // to this socket using NewInterface(). To start communication on each socket |
| 23 | // socket.StartPolling() must be called. socket.StopPolling() will stop |
| 24 | // the communication. When the interface changes link status Connected and |
| 25 | // Disconencted callbacks set in Arguments for each interface are called |
| 26 | // respectively. Once the interface is connected rx and tx queues can be |
| 27 | // aquired using interface.GetRxQueue() and interface.GetTxQueue(). |
| 28 | // Packets can be transmitted by calling queue.ReadPacket() on rx queues and |
| 29 | // queue.WritePacket() on tx queues. If the interface is disconnected |
| 30 | // queue.ReadPacket() and queue.WritePacket() MUST not be called. |
| 31 | // |
| 32 | // Data transmission is backed by shared memory. The driver works in |
| 33 | // promiscuous mode only. |
| 34 | package memif |
| 35 | |
| 36 | import ( |
| 37 | "container/list" |
| 38 | "fmt" |
| 39 | "os" |
| 40 | "syscall" |
| 41 | ) |
| 42 | |
| 43 | const ( |
| 44 | DefaultSocketFilename = "/run/vpp/memif.sock" |
| 45 | DefaultNumQueuePairs = 1 |
| 46 | DefaultLog2RingSize = 10 |
| 47 | DefaultPacketBufferSize = 2048 |
| 48 | ) |
| 49 | |
| 50 | const mfd_allow_sealing = 2 |
| 51 | const sys_memfd_create = 319 |
| 52 | const f_add_seals = 1033 |
| 53 | const f_seal_shrink = 0x0002 |
| 54 | |
| 55 | const efd_nonblock = 04000 |
| 56 | |
| 57 | // ConnectedFunc is a callback called when an interface is connected |
| 58 | type ConnectedFunc func(i *Interface) error |
| 59 | |
| 60 | // DisconnectedFunc is a callback called when an interface is disconnected |
| 61 | type DisconnectedFunc func(i *Interface) error |
| 62 | |
Daniel Béreš | 82ec908 | 2022-07-27 12:22:39 +0000 | [diff] [blame^] | 63 | type InterruptFunc func(i *Interface) error |
| 64 | |
Jakub Grajciar | 07363a4 | 2020-04-02 10:02:17 +0200 | [diff] [blame] | 65 | // MemoryConfig represents shared memory configuration |
| 66 | type MemoryConfig struct { |
| 67 | NumQueuePairs uint16 // number of queue pairs |
| 68 | Log2RingSize uint8 // ring size as log2 |
| 69 | PacketBufferSize uint32 // size of single packet buffer |
| 70 | } |
| 71 | |
| 72 | // Arguments represent interface configuration |
| 73 | type Arguments struct { |
| 74 | Id uint32 // Interface identifier unique across socket. Used to identify peer interface when connecting |
| 75 | IsMaster bool // Interface role master/slave |
Nathan Skrzypczak | 176373c | 2021-05-07 19:39:07 +0200 | [diff] [blame] | 76 | Mode interfaceMode |
Jakub Grajciar | 07363a4 | 2020-04-02 10:02:17 +0200 | [diff] [blame] | 77 | Name string |
| 78 | Secret [24]byte // optional parameter, secrets of the interfaces must match if they are to connect |
| 79 | MemoryConfig MemoryConfig |
| 80 | ConnectedFunc ConnectedFunc // callback called when interface changes status to connected |
| 81 | DisconnectedFunc DisconnectedFunc // callback called when interface changes status to disconnected |
Daniel Béreš | 82ec908 | 2022-07-27 12:22:39 +0000 | [diff] [blame^] | 82 | InterruptFunc InterruptFunc |
| 83 | PrivateData interface{} // private data used by client program |
| 84 | InterruptFd uint16 |
Jakub Grajciar | 07363a4 | 2020-04-02 10:02:17 +0200 | [diff] [blame] | 85 | } |
| 86 | |
| 87 | // memoryRegion represents a shared memory mapped file |
| 88 | type memoryRegion struct { |
| 89 | data []byte |
| 90 | size uint64 |
| 91 | fd int |
| 92 | packetBufferOffset uint32 |
| 93 | } |
| 94 | |
| 95 | // Queue represents rx or tx queue |
| 96 | type Queue struct { |
| 97 | ring *ring |
| 98 | i *Interface |
| 99 | lastHead uint16 |
| 100 | lastTail uint16 |
| 101 | interruptFd int |
| 102 | } |
| 103 | |
| 104 | // Interface represents memif network interface |
| 105 | type Interface struct { |
| 106 | args Arguments |
| 107 | run MemoryConfig |
| 108 | privateData interface{} |
| 109 | listRef *list.Element |
| 110 | socket *Socket |
| 111 | cc *controlChannel |
| 112 | remoteName string |
| 113 | peerName string |
| 114 | regions []memoryRegion |
| 115 | txQueues []Queue |
| 116 | rxQueues []Queue |
Daniel Béreš | 82ec908 | 2022-07-27 12:22:39 +0000 | [diff] [blame^] | 117 | onInterrupt InterruptFunc |
Jakub Grajciar | 07363a4 | 2020-04-02 10:02:17 +0200 | [diff] [blame] | 118 | } |
| 119 | |
| 120 | // IsMaster returns true if the interfaces role is master, else returns false |
| 121 | func (i *Interface) IsMaster() bool { |
| 122 | return i.args.IsMaster |
| 123 | } |
| 124 | |
| 125 | // GetRemoteName returns the name of the application on which the peer |
| 126 | // interface exists |
| 127 | func (i *Interface) GetRemoteName() string { |
| 128 | return i.remoteName |
| 129 | } |
| 130 | |
| 131 | // GetPeerName returns peer interfaces name |
| 132 | func (i *Interface) GetPeerName() string { |
| 133 | return i.peerName |
| 134 | } |
| 135 | |
| 136 | // GetName returens interfaces name |
| 137 | func (i *Interface) GetName() string { |
| 138 | return i.args.Name |
| 139 | } |
| 140 | |
| 141 | // GetMemoryConfig returns interfaces active memory config. |
| 142 | // If interface is not connected the config is invalid. |
| 143 | func (i *Interface) GetMemoryConfig() MemoryConfig { |
| 144 | return i.run |
| 145 | } |
| 146 | |
| 147 | // GetRxQueue returns an rx queue specified by queue index |
| 148 | func (i *Interface) GetRxQueue(qid int) (*Queue, error) { |
| 149 | if qid >= len(i.rxQueues) { |
| 150 | return nil, fmt.Errorf("Invalid Queue index") |
| 151 | } |
| 152 | return &i.rxQueues[qid], nil |
| 153 | } |
| 154 | |
| 155 | // GetRxQueue returns a tx queue specified by queue index |
| 156 | func (i *Interface) GetTxQueue(qid int) (*Queue, error) { |
| 157 | if qid >= len(i.txQueues) { |
| 158 | return nil, fmt.Errorf("Invalid Queue index") |
| 159 | } |
| 160 | return &i.txQueues[qid], nil |
| 161 | } |
| 162 | |
| 163 | // GetEventFd returns queues interrupt event fd |
| 164 | func (q *Queue) GetEventFd() (int, error) { |
| 165 | return q.interruptFd, nil |
| 166 | } |
| 167 | |
| 168 | // GetFilename returns sockets filename |
| 169 | func (socket *Socket) GetFilename() string { |
| 170 | return socket.filename |
| 171 | } |
| 172 | |
| 173 | // close closes the queue |
| 174 | func (q *Queue) close() { |
| 175 | syscall.Close(q.interruptFd) |
| 176 | } |
| 177 | |
| 178 | // IsConnecting returns true if the interface is connecting |
| 179 | func (i *Interface) IsConnecting() bool { |
| 180 | if i.cc != nil { |
| 181 | return true |
| 182 | } |
| 183 | return false |
| 184 | } |
| 185 | |
| 186 | // IsConnected returns true if the interface is connected |
| 187 | func (i *Interface) IsConnected() bool { |
| 188 | if i.cc != nil && i.cc.isConnected { |
| 189 | return true |
| 190 | } |
| 191 | return false |
| 192 | } |
| 193 | |
| 194 | // Disconnect disconnects the interface |
| 195 | func (i *Interface) Disconnect() (err error) { |
| 196 | if i.cc != nil { |
| 197 | // close control and disconenct interface |
| 198 | return i.cc.close(true, "Interface disconnected") |
| 199 | } |
| 200 | return nil |
| 201 | } |
| 202 | |
| 203 | // disconnect finalizes interface disconnection |
| 204 | func (i *Interface) disconnect() (err error) { |
| 205 | if i.cc == nil { // disconnected |
| 206 | return nil |
| 207 | } |
| 208 | |
| 209 | err = i.args.DisconnectedFunc(i) |
| 210 | if err != nil { |
| 211 | return fmt.Errorf("DisconnectedFunc: ", err) |
| 212 | } |
| 213 | |
| 214 | for _, q := range i.txQueues { |
| 215 | q.close() |
| 216 | } |
| 217 | i.txQueues = []Queue{} |
| 218 | |
| 219 | for _, q := range i.rxQueues { |
| 220 | q.close() |
| 221 | } |
| 222 | i.rxQueues = []Queue{} |
| 223 | |
| 224 | // unmap regions |
| 225 | for _, r := range i.regions { |
| 226 | err = syscall.Munmap(r.data) |
| 227 | if err != nil { |
| 228 | return err |
| 229 | } |
| 230 | err = syscall.Close(r.fd) |
| 231 | if err != nil { |
| 232 | return err |
| 233 | } |
| 234 | } |
| 235 | i.regions = nil |
| 236 | i.cc = nil |
| 237 | |
| 238 | i.peerName = "" |
| 239 | i.remoteName = "" |
| 240 | |
| 241 | return nil |
| 242 | } |
| 243 | |
| 244 | // Delete deletes the interface |
| 245 | func (i *Interface) Delete() (err error) { |
| 246 | i.Disconnect() |
| 247 | |
| 248 | // remove referance on socket |
| 249 | i.socket.interfaceList.Remove(i.listRef) |
| 250 | i = nil |
| 251 | |
| 252 | return nil |
| 253 | } |
| 254 | |
| 255 | // GetSocket returns the socket the interface belongs to |
| 256 | func (i *Interface) GetSocket() *Socket { |
| 257 | return i.socket |
| 258 | } |
| 259 | |
| 260 | // GetPrivateDate returns interfaces private data |
| 261 | func (i *Interface) GetPrivateData() interface{} { |
| 262 | return i.args.PrivateData |
| 263 | } |
| 264 | |
| 265 | // GetId returns interfaces id |
| 266 | func (i *Interface) GetId() uint32 { |
| 267 | return i.args.Id |
| 268 | } |
| 269 | |
| 270 | // RoleToString returns 'Master' if isMaster os true, else returns 'Slave' |
| 271 | func RoleToString(isMaster bool) string { |
| 272 | if isMaster { |
| 273 | return "Master" |
| 274 | } |
| 275 | return "Slave" |
| 276 | } |
| 277 | |
Daniel Béreš | 82ec908 | 2022-07-27 12:22:39 +0000 | [diff] [blame^] | 278 | func memifPathIsAbstract(filename string) bool { |
| 279 | return (filename[0] == '@') |
| 280 | } |
| 281 | |
Jakub Grajciar | 07363a4 | 2020-04-02 10:02:17 +0200 | [diff] [blame] | 282 | // RequestConnection is used by slave interface to connect to a socket and |
| 283 | // create a control channel |
| 284 | func (i *Interface) RequestConnection() error { |
| 285 | if i.IsMaster() { |
| 286 | return fmt.Errorf("Only slave can request connection") |
| 287 | } |
| 288 | // create socket |
| 289 | fd, err := syscall.Socket(syscall.AF_UNIX, syscall.SOCK_SEQPACKET, 0) |
| 290 | if err != nil { |
| 291 | return fmt.Errorf("Failed to create UNIX domain socket: %v", err) |
| 292 | } |
| 293 | usa := &syscall.SockaddrUnix{Name: i.socket.filename} |
| 294 | |
Daniel Béreš | 82ec908 | 2022-07-27 12:22:39 +0000 | [diff] [blame^] | 295 | if memifPathIsAbstract(i.socket.GetFilename()) { |
| 296 | usa.Name = "\000" + usa.Name[1:] |
| 297 | } |
Jakub Grajciar | 07363a4 | 2020-04-02 10:02:17 +0200 | [diff] [blame] | 298 | // Connect to listener socket |
| 299 | err = syscall.Connect(fd, usa) |
| 300 | if err != nil { |
| 301 | return fmt.Errorf("Failed to connect socket %s : %v", i.socket.filename, err) |
| 302 | } |
| 303 | |
| 304 | // Create control channel |
| 305 | i.cc, err = i.socket.addControlChannel(fd, i) |
| 306 | if err != nil { |
| 307 | return fmt.Errorf("Failed to create control channel: %v", err) |
| 308 | } |
| 309 | |
| 310 | return nil |
| 311 | } |
| 312 | |
| 313 | // NewInterface returns a new memif network interface. When creating an interface |
| 314 | // it's id must be unique across socket with the exception of loopback interface |
| 315 | // in which case the id is the same but role differs |
| 316 | func (socket *Socket) NewInterface(args *Arguments) (*Interface, error) { |
| 317 | var err error |
| 318 | // make sure the ID is unique on this socket |
| 319 | for elt := socket.interfaceList.Front(); elt != nil; elt = elt.Next() { |
| 320 | i, ok := elt.Value.(*Interface) |
| 321 | if ok { |
| 322 | if i.args.Id == args.Id && i.args.IsMaster == args.IsMaster { |
| 323 | return nil, fmt.Errorf("Interface with id %u role %s already exists on this socket", args.Id, RoleToString(args.IsMaster)) |
| 324 | } |
| 325 | } |
| 326 | } |
| 327 | |
| 328 | // copy interface configuration |
| 329 | i := Interface{ |
Daniel Béreš | 82ec908 | 2022-07-27 12:22:39 +0000 | [diff] [blame^] | 330 | args: *args, |
| 331 | onInterrupt: args.InterruptFunc, |
Jakub Grajciar | 07363a4 | 2020-04-02 10:02:17 +0200 | [diff] [blame] | 332 | } |
| 333 | // set default values |
| 334 | if i.args.MemoryConfig.NumQueuePairs == 0 { |
| 335 | i.args.MemoryConfig.NumQueuePairs = DefaultNumQueuePairs |
| 336 | } |
| 337 | if i.args.MemoryConfig.Log2RingSize == 0 { |
| 338 | i.args.MemoryConfig.Log2RingSize = DefaultLog2RingSize |
| 339 | } |
| 340 | if i.args.MemoryConfig.PacketBufferSize == 0 { |
| 341 | i.args.MemoryConfig.PacketBufferSize = DefaultPacketBufferSize |
| 342 | } |
| 343 | |
| 344 | i.socket = socket |
| 345 | |
| 346 | // append interface to the list |
| 347 | i.listRef = socket.interfaceList.PushBack(&i) |
| 348 | |
| 349 | if i.args.IsMaster { |
| 350 | if socket.listener == nil { |
| 351 | err = socket.addListener() |
| 352 | if err != nil { |
| 353 | return nil, fmt.Errorf("Failed to create listener channel: %s", err) |
| 354 | } |
| 355 | } |
| 356 | } |
| 357 | |
| 358 | return &i, nil |
| 359 | } |
| 360 | |
| 361 | // eventFd returns an eventfd (SYS_EVENTFD2) |
| 362 | func eventFd() (efd int, err error) { |
| 363 | u_efd, _, errno := syscall.Syscall(syscall.SYS_EVENTFD2, uintptr(0), uintptr(efd_nonblock), 0) |
| 364 | if errno != 0 { |
| 365 | return -1, os.NewSyscallError("eventfd", errno) |
| 366 | } |
| 367 | return int(u_efd), nil |
| 368 | } |
| 369 | |
| 370 | // addRegions creates and adds a new memory region to the interface (slave only) |
| 371 | func (i *Interface) addRegion(hasPacketBuffers bool, hasRings bool) (err error) { |
| 372 | var r memoryRegion |
| 373 | |
| 374 | if hasRings { |
| 375 | r.packetBufferOffset = uint32((i.run.NumQueuePairs + i.run.NumQueuePairs) * (ringSize + descSize*(1<<i.run.Log2RingSize))) |
| 376 | } else { |
| 377 | r.packetBufferOffset = 0 |
| 378 | } |
| 379 | |
| 380 | if hasPacketBuffers { |
| 381 | r.size = uint64(r.packetBufferOffset + i.run.PacketBufferSize*uint32(1<<i.run.Log2RingSize)*uint32(i.run.NumQueuePairs+i.run.NumQueuePairs)) |
| 382 | } else { |
| 383 | r.size = uint64(r.packetBufferOffset) |
| 384 | } |
| 385 | |
| 386 | r.fd, err = memfdCreate() |
| 387 | if err != nil { |
| 388 | return err |
| 389 | } |
| 390 | |
| 391 | _, _, errno := syscall.Syscall(syscall.SYS_FCNTL, uintptr(r.fd), uintptr(f_add_seals), uintptr(f_seal_shrink)) |
| 392 | if errno != 0 { |
| 393 | syscall.Close(r.fd) |
| 394 | return fmt.Errorf("memfdCreate: %s", os.NewSyscallError("fcntl", errno)) |
| 395 | } |
| 396 | |
| 397 | err = syscall.Ftruncate(r.fd, int64(r.size)) |
| 398 | if err != nil { |
| 399 | syscall.Close(r.fd) |
| 400 | r.fd = -1 |
| 401 | return fmt.Errorf("memfdCreate: %s", err) |
| 402 | } |
| 403 | |
| 404 | r.data, err = syscall.Mmap(r.fd, 0, int(r.size), syscall.PROT_READ|syscall.PROT_WRITE, syscall.MAP_SHARED) |
| 405 | if err != nil { |
| 406 | return fmt.Errorf("addRegion: %s", err) |
| 407 | } |
| 408 | |
| 409 | i.regions = append(i.regions, r) |
| 410 | |
| 411 | return nil |
| 412 | } |
| 413 | |
| 414 | // initializeRegions initializes interfaces regions (slave only) |
| 415 | func (i *Interface) initializeRegions() (err error) { |
| 416 | |
| 417 | err = i.addRegion(true, true) |
| 418 | if err != nil { |
| 419 | return fmt.Errorf("initializeRegions: %s", err) |
| 420 | } |
| 421 | |
| 422 | return nil |
| 423 | } |
| 424 | |
| 425 | // initializeQueues initializes interfaces queues (slave only) |
| 426 | func (i *Interface) initializeQueues() (err error) { |
| 427 | var q *Queue |
| 428 | var desc descBuf |
| 429 | var slot int |
| 430 | |
| 431 | desc = newDescBuf() |
| 432 | desc.setFlags(0) |
| 433 | desc.setRegion(0) |
| 434 | desc.setLength(int(i.run.PacketBufferSize)) |
| 435 | |
| 436 | for qid := 0; qid < int(i.run.NumQueuePairs); qid++ { |
| 437 | /* TX */ |
| 438 | q = &Queue{ |
| 439 | ring: i.newRing(0, ringTypeS2M, qid), |
| 440 | lastHead: 0, |
| 441 | lastTail: 0, |
| 442 | i: i, |
| 443 | } |
| 444 | q.ring.setCookie(cookie) |
| 445 | q.ring.setFlags(1) |
| 446 | q.interruptFd, err = eventFd() |
| 447 | if err != nil { |
| 448 | return err |
| 449 | } |
Daniel Béreš | 82ec908 | 2022-07-27 12:22:39 +0000 | [diff] [blame^] | 450 | i.socket.addInterrupt(q.interruptFd) |
Jakub Grajciar | 07363a4 | 2020-04-02 10:02:17 +0200 | [diff] [blame] | 451 | q.putRing() |
| 452 | i.txQueues = append(i.txQueues, *q) |
| 453 | |
| 454 | for j := 0; j < q.ring.size; j++ { |
| 455 | slot = qid*q.ring.size + j |
| 456 | desc.setOffset(int(i.regions[0].packetBufferOffset + uint32(slot)*i.run.PacketBufferSize)) |
| 457 | q.putDescBuf(slot, desc) |
| 458 | } |
| 459 | } |
| 460 | for qid := 0; qid < int(i.run.NumQueuePairs); qid++ { |
| 461 | /* RX */ |
| 462 | q = &Queue{ |
| 463 | ring: i.newRing(0, ringTypeM2S, qid), |
| 464 | lastHead: 0, |
| 465 | lastTail: 0, |
| 466 | i: i, |
| 467 | } |
| 468 | q.ring.setCookie(cookie) |
Daniel Béreš | 82ec908 | 2022-07-27 12:22:39 +0000 | [diff] [blame^] | 469 | if i.args.InterruptFunc == nil { |
| 470 | q.ring.setFlags(1) |
| 471 | } else { |
| 472 | q.ring.setFlags(0) |
| 473 | } |
Jakub Grajciar | 07363a4 | 2020-04-02 10:02:17 +0200 | [diff] [blame] | 474 | q.interruptFd, err = eventFd() |
| 475 | if err != nil { |
| 476 | return err |
| 477 | } |
Daniel Béreš | 82ec908 | 2022-07-27 12:22:39 +0000 | [diff] [blame^] | 478 | i.args.InterruptFd = uint16(q.interruptFd) |
| 479 | i.socket.addInterrupt(q.interruptFd) |
Jakub Grajciar | 07363a4 | 2020-04-02 10:02:17 +0200 | [diff] [blame] | 480 | q.putRing() |
| 481 | i.rxQueues = append(i.rxQueues, *q) |
| 482 | |
| 483 | for j := 0; j < q.ring.size; j++ { |
| 484 | slot = qid*q.ring.size + j |
| 485 | desc.setOffset(int(i.regions[0].packetBufferOffset + uint32(slot)*i.run.PacketBufferSize)) |
| 486 | q.putDescBuf(slot, desc) |
| 487 | } |
| 488 | } |
| 489 | |
| 490 | return nil |
| 491 | } |
| 492 | |
| 493 | // connect finalizes interface connection |
| 494 | func (i *Interface) connect() (err error) { |
| 495 | for rid, _ := range i.regions { |
| 496 | r := &i.regions[rid] |
| 497 | if r.data == nil { |
| 498 | r.data, err = syscall.Mmap(r.fd, 0, int(r.size), syscall.PROT_READ|syscall.PROT_WRITE, syscall.MAP_SHARED) |
| 499 | if err != nil { |
| 500 | return fmt.Errorf("Mmap: %s", err) |
| 501 | } |
| 502 | } |
| 503 | } |
| 504 | |
| 505 | for _, q := range i.txQueues { |
| 506 | q.updateRing() |
| 507 | |
| 508 | if q.ring.getCookie() != cookie { |
| 509 | return fmt.Errorf("Wrong cookie") |
| 510 | } |
| 511 | |
| 512 | q.lastHead = 0 |
| 513 | q.lastTail = 0 |
| 514 | } |
| 515 | |
| 516 | for _, q := range i.rxQueues { |
| 517 | q.updateRing() |
| 518 | |
| 519 | if q.ring.getCookie() != cookie { |
| 520 | return fmt.Errorf("Wrong cookie") |
| 521 | } |
| 522 | |
| 523 | q.lastHead = 0 |
| 524 | q.lastTail = 0 |
| 525 | } |
| 526 | |
| 527 | return i.args.ConnectedFunc(i) |
| 528 | } |