blob: ec2d07bb9beb73279d0ac24a7d3ddc1f72eff4a5 [file] [log] [blame]
Kyle Swenson8d8f6542021-03-15 11:02:55 -06001/*
2 * fs/cifs/file.c
3 *
4 * vfs operations that deal with files
5 *
6 * Copyright (C) International Business Machines Corp., 2002,2010
7 * Author(s): Steve French (sfrench@us.ibm.com)
8 * Jeremy Allison (jra@samba.org)
9 *
10 * This library is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU Lesser General Public License as published
12 * by the Free Software Foundation; either version 2.1 of the License, or
13 * (at your option) any later version.
14 *
15 * This library is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
18 * the GNU Lesser General Public License for more details.
19 *
20 * You should have received a copy of the GNU Lesser General Public License
21 * along with this library; if not, write to the Free Software
22 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 */
24#include <linux/fs.h>
25#include <linux/backing-dev.h>
26#include <linux/stat.h>
27#include <linux/fcntl.h>
28#include <linux/pagemap.h>
29#include <linux/pagevec.h>
30#include <linux/writeback.h>
31#include <linux/task_io_accounting_ops.h>
32#include <linux/delay.h>
33#include <linux/mount.h>
34#include <linux/slab.h>
35#include <linux/swap.h>
36#include <asm/div64.h>
37#include "cifsfs.h"
38#include "cifspdu.h"
39#include "cifsglob.h"
40#include "cifsproto.h"
41#include "cifs_unicode.h"
42#include "cifs_debug.h"
43#include "cifs_fs_sb.h"
44#include "fscache.h"
45
46
47static inline int cifs_convert_flags(unsigned int flags)
48{
49 if ((flags & O_ACCMODE) == O_RDONLY)
50 return GENERIC_READ;
51 else if ((flags & O_ACCMODE) == O_WRONLY)
52 return GENERIC_WRITE;
53 else if ((flags & O_ACCMODE) == O_RDWR) {
54 /* GENERIC_ALL is too much permission to request
55 can cause unnecessary access denied on create */
56 /* return GENERIC_ALL; */
57 return (GENERIC_READ | GENERIC_WRITE);
58 }
59
60 return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
61 FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
62 FILE_READ_DATA);
63}
64
65static u32 cifs_posix_convert_flags(unsigned int flags)
66{
67 u32 posix_flags = 0;
68
69 if ((flags & O_ACCMODE) == O_RDONLY)
70 posix_flags = SMB_O_RDONLY;
71 else if ((flags & O_ACCMODE) == O_WRONLY)
72 posix_flags = SMB_O_WRONLY;
73 else if ((flags & O_ACCMODE) == O_RDWR)
74 posix_flags = SMB_O_RDWR;
75
76 if (flags & O_CREAT) {
77 posix_flags |= SMB_O_CREAT;
78 if (flags & O_EXCL)
79 posix_flags |= SMB_O_EXCL;
80 } else if (flags & O_EXCL)
81 cifs_dbg(FYI, "Application %s pid %d has incorrectly set O_EXCL flag but not O_CREAT on file open. Ignoring O_EXCL\n",
82 current->comm, current->tgid);
83
84 if (flags & O_TRUNC)
85 posix_flags |= SMB_O_TRUNC;
86 /* be safe and imply O_SYNC for O_DSYNC */
87 if (flags & O_DSYNC)
88 posix_flags |= SMB_O_SYNC;
89 if (flags & O_DIRECTORY)
90 posix_flags |= SMB_O_DIRECTORY;
91 if (flags & O_NOFOLLOW)
92 posix_flags |= SMB_O_NOFOLLOW;
93 if (flags & O_DIRECT)
94 posix_flags |= SMB_O_DIRECT;
95
96 return posix_flags;
97}
98
99static inline int cifs_get_disposition(unsigned int flags)
100{
101 if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
102 return FILE_CREATE;
103 else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
104 return FILE_OVERWRITE_IF;
105 else if ((flags & O_CREAT) == O_CREAT)
106 return FILE_OPEN_IF;
107 else if ((flags & O_TRUNC) == O_TRUNC)
108 return FILE_OVERWRITE;
109 else
110 return FILE_OPEN;
111}
112
113int cifs_posix_open(char *full_path, struct inode **pinode,
114 struct super_block *sb, int mode, unsigned int f_flags,
115 __u32 *poplock, __u16 *pnetfid, unsigned int xid)
116{
117 int rc;
118 FILE_UNIX_BASIC_INFO *presp_data;
119 __u32 posix_flags = 0;
120 struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
121 struct cifs_fattr fattr;
122 struct tcon_link *tlink;
123 struct cifs_tcon *tcon;
124
125 cifs_dbg(FYI, "posix open %s\n", full_path);
126
127 presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL);
128 if (presp_data == NULL)
129 return -ENOMEM;
130
131 tlink = cifs_sb_tlink(cifs_sb);
132 if (IS_ERR(tlink)) {
133 rc = PTR_ERR(tlink);
134 goto posix_open_ret;
135 }
136
137 tcon = tlink_tcon(tlink);
138 mode &= ~current_umask();
139
140 posix_flags = cifs_posix_convert_flags(f_flags);
141 rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data,
142 poplock, full_path, cifs_sb->local_nls,
143 cifs_remap(cifs_sb));
144 cifs_put_tlink(tlink);
145
146 if (rc)
147 goto posix_open_ret;
148
149 if (presp_data->Type == cpu_to_le32(-1))
150 goto posix_open_ret; /* open ok, caller does qpathinfo */
151
152 if (!pinode)
153 goto posix_open_ret; /* caller does not need info */
154
155 cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb);
156
157 /* get new inode and set it up */
158 if (*pinode == NULL) {
159 cifs_fill_uniqueid(sb, &fattr);
160 *pinode = cifs_iget(sb, &fattr);
161 if (!*pinode) {
162 rc = -ENOMEM;
163 goto posix_open_ret;
164 }
165 } else {
166 cifs_fattr_to_inode(*pinode, &fattr);
167 }
168
169posix_open_ret:
170 kfree(presp_data);
171 return rc;
172}
173
174static int
175cifs_nt_open(char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
176 struct cifs_tcon *tcon, unsigned int f_flags, __u32 *oplock,
177 struct cifs_fid *fid, unsigned int xid)
178{
179 int rc;
180 int desired_access;
181 int disposition;
182 int create_options = CREATE_NOT_DIR;
183 FILE_ALL_INFO *buf;
184 struct TCP_Server_Info *server = tcon->ses->server;
185 struct cifs_open_parms oparms;
186
187 if (!server->ops->open)
188 return -ENOSYS;
189
190 desired_access = cifs_convert_flags(f_flags);
191
192/*********************************************************************
193 * open flag mapping table:
194 *
195 * POSIX Flag CIFS Disposition
196 * ---------- ----------------
197 * O_CREAT FILE_OPEN_IF
198 * O_CREAT | O_EXCL FILE_CREATE
199 * O_CREAT | O_TRUNC FILE_OVERWRITE_IF
200 * O_TRUNC FILE_OVERWRITE
201 * none of the above FILE_OPEN
202 *
203 * Note that there is not a direct match between disposition
204 * FILE_SUPERSEDE (ie create whether or not file exists although
205 * O_CREAT | O_TRUNC is similar but truncates the existing
206 * file rather than creating a new file as FILE_SUPERSEDE does
207 * (which uses the attributes / metadata passed in on open call)
208 *?
209 *? O_SYNC is a reasonable match to CIFS writethrough flag
210 *? and the read write flags match reasonably. O_LARGEFILE
211 *? is irrelevant because largefile support is always used
212 *? by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
213 * O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
214 *********************************************************************/
215
216 disposition = cifs_get_disposition(f_flags);
217
218 /* BB pass O_SYNC flag through on file attributes .. BB */
219
220 buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
221 if (!buf)
222 return -ENOMEM;
223
224 if (backup_cred(cifs_sb))
225 create_options |= CREATE_OPEN_BACKUP_INTENT;
226
227 /* O_SYNC also has bit for O_DSYNC so following check picks up either */
228 if (f_flags & O_SYNC)
229 create_options |= CREATE_WRITE_THROUGH;
230
231 if (f_flags & O_DIRECT)
232 create_options |= CREATE_NO_BUFFER;
233
234 oparms.tcon = tcon;
235 oparms.cifs_sb = cifs_sb;
236 oparms.desired_access = desired_access;
237 oparms.create_options = create_options;
238 oparms.disposition = disposition;
239 oparms.path = full_path;
240 oparms.fid = fid;
241 oparms.reconnect = false;
242
243 rc = server->ops->open(xid, &oparms, oplock, buf);
244
245 if (rc)
246 goto out;
247
248 if (tcon->unix_ext)
249 rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
250 xid);
251 else
252 rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
253 xid, fid);
254
255out:
256 kfree(buf);
257 return rc;
258}
259
260static bool
261cifs_has_mand_locks(struct cifsInodeInfo *cinode)
262{
263 struct cifs_fid_locks *cur;
264 bool has_locks = false;
265
266 down_read(&cinode->lock_sem);
267 list_for_each_entry(cur, &cinode->llist, llist) {
268 if (!list_empty(&cur->locks)) {
269 has_locks = true;
270 break;
271 }
272 }
273 up_read(&cinode->lock_sem);
274 return has_locks;
275}
276
277struct cifsFileInfo *
278cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
279 struct tcon_link *tlink, __u32 oplock)
280{
281 struct dentry *dentry = file->f_path.dentry;
282 struct inode *inode = d_inode(dentry);
283 struct cifsInodeInfo *cinode = CIFS_I(inode);
284 struct cifsFileInfo *cfile;
285 struct cifs_fid_locks *fdlocks;
286 struct cifs_tcon *tcon = tlink_tcon(tlink);
287 struct TCP_Server_Info *server = tcon->ses->server;
288
289 cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
290 if (cfile == NULL)
291 return cfile;
292
293 fdlocks = kzalloc(sizeof(struct cifs_fid_locks), GFP_KERNEL);
294 if (!fdlocks) {
295 kfree(cfile);
296 return NULL;
297 }
298
299 INIT_LIST_HEAD(&fdlocks->locks);
300 fdlocks->cfile = cfile;
301 cfile->llist = fdlocks;
302 down_write(&cinode->lock_sem);
303 list_add(&fdlocks->llist, &cinode->llist);
304 up_write(&cinode->lock_sem);
305
306 cfile->count = 1;
307 cfile->pid = current->tgid;
308 cfile->uid = current_fsuid();
309 cfile->dentry = dget(dentry);
310 cfile->f_flags = file->f_flags;
311 cfile->invalidHandle = false;
312 cfile->tlink = cifs_get_tlink(tlink);
313 INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
314 mutex_init(&cfile->fh_mutex);
315 spin_lock_init(&cfile->file_info_lock);
316
317 cifs_sb_active(inode->i_sb);
318
319 /*
320 * If the server returned a read oplock and we have mandatory brlocks,
321 * set oplock level to None.
322 */
323 if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
324 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
325 oplock = 0;
326 }
327
328 spin_lock(&tcon->open_file_lock);
329 if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE && oplock)
330 oplock = fid->pending_open->oplock;
331 list_del(&fid->pending_open->olist);
332
333 fid->purge_cache = false;
334 server->ops->set_fid(cfile, fid, oplock);
335
336 list_add(&cfile->tlist, &tcon->openFileList);
337
338 /* if readable file instance put first in list*/
339 if (file->f_mode & FMODE_READ)
340 list_add(&cfile->flist, &cinode->openFileList);
341 else
342 list_add_tail(&cfile->flist, &cinode->openFileList);
343 spin_unlock(&tcon->open_file_lock);
344
345 if (fid->purge_cache)
346 cifs_zap_mapping(inode);
347
348 file->private_data = cfile;
349 return cfile;
350}
351
352struct cifsFileInfo *
353cifsFileInfo_get(struct cifsFileInfo *cifs_file)
354{
355 spin_lock(&cifs_file->file_info_lock);
356 cifsFileInfo_get_locked(cifs_file);
357 spin_unlock(&cifs_file->file_info_lock);
358 return cifs_file;
359}
360
361/*
362 * Release a reference on the file private data. This may involve closing
363 * the filehandle out on the server. Must be called without holding
364 * tcon->open_file_lock and cifs_file->file_info_lock.
365 */
366void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
367{
368 struct inode *inode = d_inode(cifs_file->dentry);
369 struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
370 struct TCP_Server_Info *server = tcon->ses->server;
371 struct cifsInodeInfo *cifsi = CIFS_I(inode);
372 struct super_block *sb = inode->i_sb;
373 struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
374 struct cifsLockInfo *li, *tmp;
375 struct cifs_fid fid;
376 struct cifs_pending_open open;
377 bool oplock_break_cancelled;
378
379 spin_lock(&tcon->open_file_lock);
380
381 spin_lock(&cifs_file->file_info_lock);
382 if (--cifs_file->count > 0) {
383 spin_unlock(&cifs_file->file_info_lock);
384 spin_unlock(&tcon->open_file_lock);
385 return;
386 }
387 spin_unlock(&cifs_file->file_info_lock);
388
389 if (server->ops->get_lease_key)
390 server->ops->get_lease_key(inode, &fid);
391
392 /* store open in pending opens to make sure we don't miss lease break */
393 cifs_add_pending_open_locked(&fid, cifs_file->tlink, &open);
394
395 /* remove it from the lists */
396 list_del(&cifs_file->flist);
397 list_del(&cifs_file->tlist);
398
399 if (list_empty(&cifsi->openFileList)) {
400 cifs_dbg(FYI, "closing last open instance for inode %p\n",
401 d_inode(cifs_file->dentry));
402 /*
403 * In strict cache mode we need invalidate mapping on the last
404 * close because it may cause a error when we open this file
405 * again and get at least level II oplock.
406 */
407 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO)
408 set_bit(CIFS_INO_INVALID_MAPPING, &cifsi->flags);
409 cifs_set_oplock_level(cifsi, 0);
410 }
411
412 spin_unlock(&tcon->open_file_lock);
413
414 oplock_break_cancelled = cancel_work_sync(&cifs_file->oplock_break);
415
416 if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
417 struct TCP_Server_Info *server = tcon->ses->server;
418 unsigned int xid;
419
420 xid = get_xid();
421 if (server->ops->close)
422 server->ops->close(xid, tcon, &cifs_file->fid);
423 _free_xid(xid);
424 }
425
426 if (oplock_break_cancelled)
427 cifs_done_oplock_break(cifsi);
428
429 cifs_del_pending_open(&open);
430
431 /*
432 * Delete any outstanding lock records. We'll lose them when the file
433 * is closed anyway.
434 */
435 down_write(&cifsi->lock_sem);
436 list_for_each_entry_safe(li, tmp, &cifs_file->llist->locks, llist) {
437 list_del(&li->llist);
438 cifs_del_lock_waiters(li);
439 kfree(li);
440 }
441 list_del(&cifs_file->llist->llist);
442 kfree(cifs_file->llist);
443 up_write(&cifsi->lock_sem);
444
445 cifs_put_tlink(cifs_file->tlink);
446 dput(cifs_file->dentry);
447 cifs_sb_deactive(sb);
448 kfree(cifs_file);
449}
450
451int cifs_open(struct inode *inode, struct file *file)
452
453{
454 int rc = -EACCES;
455 unsigned int xid;
456 __u32 oplock;
457 struct cifs_sb_info *cifs_sb;
458 struct TCP_Server_Info *server;
459 struct cifs_tcon *tcon;
460 struct tcon_link *tlink;
461 struct cifsFileInfo *cfile = NULL;
462 char *full_path = NULL;
463 bool posix_open_ok = false;
464 struct cifs_fid fid;
465 struct cifs_pending_open open;
466
467 xid = get_xid();
468
469 cifs_sb = CIFS_SB(inode->i_sb);
470 tlink = cifs_sb_tlink(cifs_sb);
471 if (IS_ERR(tlink)) {
472 free_xid(xid);
473 return PTR_ERR(tlink);
474 }
475 tcon = tlink_tcon(tlink);
476 server = tcon->ses->server;
477
478 full_path = build_path_from_dentry(file->f_path.dentry);
479 if (full_path == NULL) {
480 rc = -ENOMEM;
481 goto out;
482 }
483
484 cifs_dbg(FYI, "inode = 0x%p file flags are 0x%x for %s\n",
485 inode, file->f_flags, full_path);
486
487 if (file->f_flags & O_DIRECT &&
488 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) {
489 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
490 file->f_op = &cifs_file_direct_nobrl_ops;
491 else
492 file->f_op = &cifs_file_direct_ops;
493 }
494
495 if (server->oplocks)
496 oplock = REQ_OPLOCK;
497 else
498 oplock = 0;
499
500 if (!tcon->broken_posix_open && tcon->unix_ext &&
501 cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP &
502 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
503 /* can not refresh inode info since size could be stale */
504 rc = cifs_posix_open(full_path, &inode, inode->i_sb,
505 cifs_sb->mnt_file_mode /* ignored */,
506 file->f_flags, &oplock, &fid.netfid, xid);
507 if (rc == 0) {
508 cifs_dbg(FYI, "posix open succeeded\n");
509 posix_open_ok = true;
510 } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
511 if (tcon->ses->serverNOS)
512 cifs_dbg(VFS, "server %s of type %s returned unexpected error on SMB posix open, disabling posix open support. Check if server update available.\n",
513 tcon->ses->serverName,
514 tcon->ses->serverNOS);
515 tcon->broken_posix_open = true;
516 } else if ((rc != -EIO) && (rc != -EREMOTE) &&
517 (rc != -EOPNOTSUPP)) /* path not found or net err */
518 goto out;
519 /*
520 * Else fallthrough to retry open the old way on network i/o
521 * or DFS errors.
522 */
523 }
524
525 if (server->ops->get_lease_key)
526 server->ops->get_lease_key(inode, &fid);
527
528 cifs_add_pending_open(&fid, tlink, &open);
529
530 if (!posix_open_ok) {
531 if (server->ops->get_lease_key)
532 server->ops->get_lease_key(inode, &fid);
533
534 rc = cifs_nt_open(full_path, inode, cifs_sb, tcon,
535 file->f_flags, &oplock, &fid, xid);
536 if (rc) {
537 cifs_del_pending_open(&open);
538 goto out;
539 }
540 }
541
542 cfile = cifs_new_fileinfo(&fid, file, tlink, oplock);
543 if (cfile == NULL) {
544 if (server->ops->close)
545 server->ops->close(xid, tcon, &fid);
546 cifs_del_pending_open(&open);
547 rc = -ENOMEM;
548 goto out;
549 }
550
551 cifs_fscache_set_inode_cookie(inode, file);
552
553 if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) {
554 /*
555 * Time to set mode which we can not set earlier due to
556 * problems creating new read-only files.
557 */
558 struct cifs_unix_set_info_args args = {
559 .mode = inode->i_mode,
560 .uid = INVALID_UID, /* no change */
561 .gid = INVALID_GID, /* no change */
562 .ctime = NO_CHANGE_64,
563 .atime = NO_CHANGE_64,
564 .mtime = NO_CHANGE_64,
565 .device = 0,
566 };
567 CIFSSMBUnixSetFileInfo(xid, tcon, &args, fid.netfid,
568 cfile->pid);
569 }
570
571out:
572 kfree(full_path);
573 free_xid(xid);
574 cifs_put_tlink(tlink);
575 return rc;
576}
577
578static int cifs_push_posix_locks(struct cifsFileInfo *cfile);
579
580/*
581 * Try to reacquire byte range locks that were released when session
582 * to server was lost.
583 */
584static int
585cifs_relock_file(struct cifsFileInfo *cfile)
586{
587 struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
588 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
589 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
590 int rc = 0;
591
592 down_read(&cinode->lock_sem);
593 if (cinode->can_cache_brlcks) {
594 /* can cache locks - no need to relock */
595 up_read(&cinode->lock_sem);
596 return rc;
597 }
598
599 if (cap_unix(tcon->ses) &&
600 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
601 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
602 rc = cifs_push_posix_locks(cfile);
603 else
604 rc = tcon->ses->server->ops->push_mand_locks(cfile);
605
606 up_read(&cinode->lock_sem);
607 return rc;
608}
609
610static int
611cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
612{
613 int rc = -EACCES;
614 unsigned int xid;
615 __u32 oplock;
616 struct cifs_sb_info *cifs_sb;
617 struct cifs_tcon *tcon;
618 struct TCP_Server_Info *server;
619 struct cifsInodeInfo *cinode;
620 struct inode *inode;
621 char *full_path = NULL;
622 int desired_access;
623 int disposition = FILE_OPEN;
624 int create_options = CREATE_NOT_DIR;
625 struct cifs_open_parms oparms;
626
627 xid = get_xid();
628 mutex_lock(&cfile->fh_mutex);
629 if (!cfile->invalidHandle) {
630 mutex_unlock(&cfile->fh_mutex);
631 rc = 0;
632 free_xid(xid);
633 return rc;
634 }
635
636 inode = d_inode(cfile->dentry);
637 cifs_sb = CIFS_SB(inode->i_sb);
638 tcon = tlink_tcon(cfile->tlink);
639 server = tcon->ses->server;
640
641 /*
642 * Can not grab rename sem here because various ops, including those
643 * that already have the rename sem can end up causing writepage to get
644 * called and if the server was down that means we end up here, and we
645 * can never tell if the caller already has the rename_sem.
646 */
647 full_path = build_path_from_dentry(cfile->dentry);
648 if (full_path == NULL) {
649 rc = -ENOMEM;
650 mutex_unlock(&cfile->fh_mutex);
651 free_xid(xid);
652 return rc;
653 }
654
655 cifs_dbg(FYI, "inode = 0x%p file flags 0x%x for %s\n",
656 inode, cfile->f_flags, full_path);
657
658 if (tcon->ses->server->oplocks)
659 oplock = REQ_OPLOCK;
660 else
661 oplock = 0;
662
663 if (tcon->unix_ext && cap_unix(tcon->ses) &&
664 (CIFS_UNIX_POSIX_PATH_OPS_CAP &
665 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
666 /*
667 * O_CREAT, O_EXCL and O_TRUNC already had their effect on the
668 * original open. Must mask them off for a reopen.
669 */
670 unsigned int oflags = cfile->f_flags &
671 ~(O_CREAT | O_EXCL | O_TRUNC);
672
673 rc = cifs_posix_open(full_path, NULL, inode->i_sb,
674 cifs_sb->mnt_file_mode /* ignored */,
675 oflags, &oplock, &cfile->fid.netfid, xid);
676 if (rc == 0) {
677 cifs_dbg(FYI, "posix reopen succeeded\n");
678 oparms.reconnect = true;
679 goto reopen_success;
680 }
681 /*
682 * fallthrough to retry open the old way on errors, especially
683 * in the reconnect path it is important to retry hard
684 */
685 }
686
687 desired_access = cifs_convert_flags(cfile->f_flags);
688
689 if (backup_cred(cifs_sb))
690 create_options |= CREATE_OPEN_BACKUP_INTENT;
691
692 if (server->ops->get_lease_key)
693 server->ops->get_lease_key(inode, &cfile->fid);
694
695 oparms.tcon = tcon;
696 oparms.cifs_sb = cifs_sb;
697 oparms.desired_access = desired_access;
698 oparms.create_options = create_options;
699 oparms.disposition = disposition;
700 oparms.path = full_path;
701 oparms.fid = &cfile->fid;
702 oparms.reconnect = true;
703
704 /*
705 * Can not refresh inode by passing in file_info buf to be returned by
706 * ops->open and then calling get_inode_info with returned buf since
707 * file might have write behind data that needs to be flushed and server
708 * version of file size can be stale. If we knew for sure that inode was
709 * not dirty locally we could do this.
710 */
711 rc = server->ops->open(xid, &oparms, &oplock, NULL);
712 if (rc == -ENOENT && oparms.reconnect == false) {
713 /* durable handle timeout is expired - open the file again */
714 rc = server->ops->open(xid, &oparms, &oplock, NULL);
715 /* indicate that we need to relock the file */
716 oparms.reconnect = true;
717 }
718
719 if (rc) {
720 mutex_unlock(&cfile->fh_mutex);
721 cifs_dbg(FYI, "cifs_reopen returned 0x%x\n", rc);
722 cifs_dbg(FYI, "oplock: %d\n", oplock);
723 goto reopen_error_exit;
724 }
725
726reopen_success:
727 cfile->invalidHandle = false;
728 mutex_unlock(&cfile->fh_mutex);
729 cinode = CIFS_I(inode);
730
731 if (can_flush) {
732 rc = filemap_write_and_wait(inode->i_mapping);
733 mapping_set_error(inode->i_mapping, rc);
734
735 if (tcon->unix_ext)
736 rc = cifs_get_inode_info_unix(&inode, full_path,
737 inode->i_sb, xid);
738 else
739 rc = cifs_get_inode_info(&inode, full_path, NULL,
740 inode->i_sb, xid, NULL);
741 }
742 /*
743 * Else we are writing out data to server already and could deadlock if
744 * we tried to flush data, and since we do not know if we have data that
745 * would invalidate the current end of file on the server we can not go
746 * to the server to get the new inode info.
747 */
748
749 server->ops->set_fid(cfile, &cfile->fid, oplock);
750 if (oparms.reconnect)
751 cifs_relock_file(cfile);
752
753reopen_error_exit:
754 kfree(full_path);
755 free_xid(xid);
756 return rc;
757}
758
759int cifs_close(struct inode *inode, struct file *file)
760{
761 if (file->private_data != NULL) {
762 cifsFileInfo_put(file->private_data);
763 file->private_data = NULL;
764 }
765
766 /* return code from the ->release op is always ignored */
767 return 0;
768}
769
770int cifs_closedir(struct inode *inode, struct file *file)
771{
772 int rc = 0;
773 unsigned int xid;
774 struct cifsFileInfo *cfile = file->private_data;
775 struct cifs_tcon *tcon;
776 struct TCP_Server_Info *server;
777 char *buf;
778
779 cifs_dbg(FYI, "Closedir inode = 0x%p\n", inode);
780
781 if (cfile == NULL)
782 return rc;
783
784 xid = get_xid();
785 tcon = tlink_tcon(cfile->tlink);
786 server = tcon->ses->server;
787
788 cifs_dbg(FYI, "Freeing private data in close dir\n");
789 spin_lock(&cfile->file_info_lock);
790 if (server->ops->dir_needs_close(cfile)) {
791 cfile->invalidHandle = true;
792 spin_unlock(&cfile->file_info_lock);
793 if (server->ops->close_dir)
794 rc = server->ops->close_dir(xid, tcon, &cfile->fid);
795 else
796 rc = -ENOSYS;
797 cifs_dbg(FYI, "Closing uncompleted readdir with rc %d\n", rc);
798 /* not much we can do if it fails anyway, ignore rc */
799 rc = 0;
800 } else
801 spin_unlock(&cfile->file_info_lock);
802
803 buf = cfile->srch_inf.ntwrk_buf_start;
804 if (buf) {
805 cifs_dbg(FYI, "closedir free smb buf in srch struct\n");
806 cfile->srch_inf.ntwrk_buf_start = NULL;
807 if (cfile->srch_inf.smallBuf)
808 cifs_small_buf_release(buf);
809 else
810 cifs_buf_release(buf);
811 }
812
813 cifs_put_tlink(cfile->tlink);
814 kfree(file->private_data);
815 file->private_data = NULL;
816 /* BB can we lock the filestruct while this is going on? */
817 free_xid(xid);
818 return rc;
819}
820
821static struct cifsLockInfo *
822cifs_lock_init(__u64 offset, __u64 length, __u8 type)
823{
824 struct cifsLockInfo *lock =
825 kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
826 if (!lock)
827 return lock;
828 lock->offset = offset;
829 lock->length = length;
830 lock->type = type;
831 lock->pid = current->tgid;
832 INIT_LIST_HEAD(&lock->blist);
833 init_waitqueue_head(&lock->block_q);
834 return lock;
835}
836
837void
838cifs_del_lock_waiters(struct cifsLockInfo *lock)
839{
840 struct cifsLockInfo *li, *tmp;
841 list_for_each_entry_safe(li, tmp, &lock->blist, blist) {
842 list_del_init(&li->blist);
843 wake_up(&li->block_q);
844 }
845}
846
847#define CIFS_LOCK_OP 0
848#define CIFS_READ_OP 1
849#define CIFS_WRITE_OP 2
850
851/* @rw_check : 0 - no op, 1 - read, 2 - write */
852static bool
853cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset,
854 __u64 length, __u8 type, struct cifsFileInfo *cfile,
855 struct cifsLockInfo **conf_lock, int rw_check)
856{
857 struct cifsLockInfo *li;
858 struct cifsFileInfo *cur_cfile = fdlocks->cfile;
859 struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
860
861 list_for_each_entry(li, &fdlocks->locks, llist) {
862 if (offset + length <= li->offset ||
863 offset >= li->offset + li->length)
864 continue;
865 if (rw_check != CIFS_LOCK_OP && current->tgid == li->pid &&
866 server->ops->compare_fids(cfile, cur_cfile)) {
867 /* shared lock prevents write op through the same fid */
868 if (!(li->type & server->vals->shared_lock_type) ||
869 rw_check != CIFS_WRITE_OP)
870 continue;
871 }
872 if ((type & server->vals->shared_lock_type) &&
873 ((server->ops->compare_fids(cfile, cur_cfile) &&
874 current->tgid == li->pid) || type == li->type))
875 continue;
876 if (conf_lock)
877 *conf_lock = li;
878 return true;
879 }
880 return false;
881}
882
883bool
884cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
885 __u8 type, struct cifsLockInfo **conf_lock,
886 int rw_check)
887{
888 bool rc = false;
889 struct cifs_fid_locks *cur;
890 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
891
892 list_for_each_entry(cur, &cinode->llist, llist) {
893 rc = cifs_find_fid_lock_conflict(cur, offset, length, type,
894 cfile, conf_lock, rw_check);
895 if (rc)
896 break;
897 }
898
899 return rc;
900}
901
902/*
903 * Check if there is another lock that prevents us to set the lock (mandatory
904 * style). If such a lock exists, update the flock structure with its
905 * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
906 * or leave it the same if we can't. Returns 0 if we don't need to request to
907 * the server or 1 otherwise.
908 */
909static int
910cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
911 __u8 type, struct file_lock *flock)
912{
913 int rc = 0;
914 struct cifsLockInfo *conf_lock;
915 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
916 struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
917 bool exist;
918
919 down_read(&cinode->lock_sem);
920
921 exist = cifs_find_lock_conflict(cfile, offset, length, type,
922 &conf_lock, CIFS_LOCK_OP);
923 if (exist) {
924 flock->fl_start = conf_lock->offset;
925 flock->fl_end = conf_lock->offset + conf_lock->length - 1;
926 flock->fl_pid = conf_lock->pid;
927 if (conf_lock->type & server->vals->shared_lock_type)
928 flock->fl_type = F_RDLCK;
929 else
930 flock->fl_type = F_WRLCK;
931 } else if (!cinode->can_cache_brlcks)
932 rc = 1;
933 else
934 flock->fl_type = F_UNLCK;
935
936 up_read(&cinode->lock_sem);
937 return rc;
938}
939
940static void
941cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock)
942{
943 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
944 down_write(&cinode->lock_sem);
945 list_add_tail(&lock->llist, &cfile->llist->locks);
946 up_write(&cinode->lock_sem);
947}
948
949/*
950 * Set the byte-range lock (mandatory style). Returns:
951 * 1) 0, if we set the lock and don't need to request to the server;
952 * 2) 1, if no locks prevent us but we need to request to the server;
953 * 3) -EACCESS, if there is a lock that prevents us and wait is false.
954 */
955static int
956cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock,
957 bool wait)
958{
959 struct cifsLockInfo *conf_lock;
960 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
961 bool exist;
962 int rc = 0;
963
964try_again:
965 exist = false;
966 down_write(&cinode->lock_sem);
967
968 exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length,
969 lock->type, &conf_lock, CIFS_LOCK_OP);
970 if (!exist && cinode->can_cache_brlcks) {
971 list_add_tail(&lock->llist, &cfile->llist->locks);
972 up_write(&cinode->lock_sem);
973 return rc;
974 }
975
976 if (!exist)
977 rc = 1;
978 else if (!wait)
979 rc = -EACCES;
980 else {
981 list_add_tail(&lock->blist, &conf_lock->blist);
982 up_write(&cinode->lock_sem);
983 rc = wait_event_interruptible(lock->block_q,
984 (lock->blist.prev == &lock->blist) &&
985 (lock->blist.next == &lock->blist));
986 if (!rc)
987 goto try_again;
988 down_write(&cinode->lock_sem);
989 list_del_init(&lock->blist);
990 }
991
992 up_write(&cinode->lock_sem);
993 return rc;
994}
995
996/*
997 * Check if there is another lock that prevents us to set the lock (posix
998 * style). If such a lock exists, update the flock structure with its
999 * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
1000 * or leave it the same if we can't. Returns 0 if we don't need to request to
1001 * the server or 1 otherwise.
1002 */
1003static int
1004cifs_posix_lock_test(struct file *file, struct file_lock *flock)
1005{
1006 int rc = 0;
1007 struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1008 unsigned char saved_type = flock->fl_type;
1009
1010 if ((flock->fl_flags & FL_POSIX) == 0)
1011 return 1;
1012
1013 down_read(&cinode->lock_sem);
1014 posix_test_lock(file, flock);
1015
1016 if (flock->fl_type == F_UNLCK && !cinode->can_cache_brlcks) {
1017 flock->fl_type = saved_type;
1018 rc = 1;
1019 }
1020
1021 up_read(&cinode->lock_sem);
1022 return rc;
1023}
1024
1025/*
1026 * Set the byte-range lock (posix style). Returns:
1027 * 1) 0, if we set the lock and don't need to request to the server;
1028 * 2) 1, if we need to request to the server;
1029 * 3) <0, if the error occurs while setting the lock.
1030 */
1031static int
1032cifs_posix_lock_set(struct file *file, struct file_lock *flock)
1033{
1034 struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1035 int rc = 1;
1036
1037 if ((flock->fl_flags & FL_POSIX) == 0)
1038 return rc;
1039
1040try_again:
1041 down_write(&cinode->lock_sem);
1042 if (!cinode->can_cache_brlcks) {
1043 up_write(&cinode->lock_sem);
1044 return rc;
1045 }
1046
1047 rc = posix_lock_file(file, flock, NULL);
1048 up_write(&cinode->lock_sem);
1049 if (rc == FILE_LOCK_DEFERRED) {
1050 rc = wait_event_interruptible(flock->fl_wait, !flock->fl_next);
1051 if (!rc)
1052 goto try_again;
1053 posix_unblock_lock(flock);
1054 }
1055 return rc;
1056}
1057
1058int
1059cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
1060{
1061 unsigned int xid;
1062 int rc = 0, stored_rc;
1063 struct cifsLockInfo *li, *tmp;
1064 struct cifs_tcon *tcon;
1065 unsigned int num, max_num, max_buf;
1066 LOCKING_ANDX_RANGE *buf, *cur;
1067 int types[] = {LOCKING_ANDX_LARGE_FILES,
1068 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES};
1069 int i;
1070
1071 xid = get_xid();
1072 tcon = tlink_tcon(cfile->tlink);
1073
1074 /*
1075 * Accessing maxBuf is racy with cifs_reconnect - need to store value
1076 * and check it for zero before using.
1077 */
1078 max_buf = tcon->ses->server->maxBuf;
1079 if (!max_buf) {
1080 free_xid(xid);
1081 return -EINVAL;
1082 }
1083
1084 max_num = (max_buf - sizeof(struct smb_hdr)) /
1085 sizeof(LOCKING_ANDX_RANGE);
1086 buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1087 if (!buf) {
1088 free_xid(xid);
1089 return -ENOMEM;
1090 }
1091
1092 for (i = 0; i < 2; i++) {
1093 cur = buf;
1094 num = 0;
1095 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1096 if (li->type != types[i])
1097 continue;
1098 cur->Pid = cpu_to_le16(li->pid);
1099 cur->LengthLow = cpu_to_le32((u32)li->length);
1100 cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1101 cur->OffsetLow = cpu_to_le32((u32)li->offset);
1102 cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1103 if (++num == max_num) {
1104 stored_rc = cifs_lockv(xid, tcon,
1105 cfile->fid.netfid,
1106 (__u8)li->type, 0, num,
1107 buf);
1108 if (stored_rc)
1109 rc = stored_rc;
1110 cur = buf;
1111 num = 0;
1112 } else
1113 cur++;
1114 }
1115
1116 if (num) {
1117 stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1118 (__u8)types[i], 0, num, buf);
1119 if (stored_rc)
1120 rc = stored_rc;
1121 }
1122 }
1123
1124 kfree(buf);
1125 free_xid(xid);
1126 return rc;
1127}
1128
1129struct lock_to_push {
1130 struct list_head llist;
1131 __u64 offset;
1132 __u64 length;
1133 __u32 pid;
1134 __u16 netfid;
1135 __u8 type;
1136};
1137
1138static int
1139cifs_push_posix_locks(struct cifsFileInfo *cfile)
1140{
1141 struct inode *inode = d_inode(cfile->dentry);
1142 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1143 struct file_lock *flock;
1144 struct file_lock_context *flctx = inode->i_flctx;
1145 unsigned int count = 0, i;
1146 int rc = 0, xid, type;
1147 struct list_head locks_to_send, *el;
1148 struct lock_to_push *lck, *tmp;
1149 __u64 length;
1150
1151 xid = get_xid();
1152
1153 if (!flctx)
1154 goto out;
1155
1156 spin_lock(&flctx->flc_lock);
1157 list_for_each(el, &flctx->flc_posix) {
1158 count++;
1159 }
1160 spin_unlock(&flctx->flc_lock);
1161
1162 INIT_LIST_HEAD(&locks_to_send);
1163
1164 /*
1165 * Allocating count locks is enough because no FL_POSIX locks can be
1166 * added to the list while we are holding cinode->lock_sem that
1167 * protects locking operations of this inode.
1168 */
1169 for (i = 0; i < count; i++) {
1170 lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL);
1171 if (!lck) {
1172 rc = -ENOMEM;
1173 goto err_out;
1174 }
1175 list_add_tail(&lck->llist, &locks_to_send);
1176 }
1177
1178 el = locks_to_send.next;
1179 spin_lock(&flctx->flc_lock);
1180 list_for_each_entry(flock, &flctx->flc_posix, fl_list) {
1181 if (el == &locks_to_send) {
1182 /*
1183 * The list ended. We don't have enough allocated
1184 * structures - something is really wrong.
1185 */
1186 cifs_dbg(VFS, "Can't push all brlocks!\n");
1187 break;
1188 }
1189 length = 1 + flock->fl_end - flock->fl_start;
1190 if (flock->fl_type == F_RDLCK || flock->fl_type == F_SHLCK)
1191 type = CIFS_RDLCK;
1192 else
1193 type = CIFS_WRLCK;
1194 lck = list_entry(el, struct lock_to_push, llist);
1195 lck->pid = flock->fl_pid;
1196 lck->netfid = cfile->fid.netfid;
1197 lck->length = length;
1198 lck->type = type;
1199 lck->offset = flock->fl_start;
1200 }
1201 spin_unlock(&flctx->flc_lock);
1202
1203 list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1204 int stored_rc;
1205
1206 stored_rc = CIFSSMBPosixLock(xid, tcon, lck->netfid, lck->pid,
1207 lck->offset, lck->length, NULL,
1208 lck->type, 0);
1209 if (stored_rc)
1210 rc = stored_rc;
1211 list_del(&lck->llist);
1212 kfree(lck);
1213 }
1214
1215out:
1216 free_xid(xid);
1217 return rc;
1218err_out:
1219 list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1220 list_del(&lck->llist);
1221 kfree(lck);
1222 }
1223 goto out;
1224}
1225
1226static int
1227cifs_push_locks(struct cifsFileInfo *cfile)
1228{
1229 struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
1230 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1231 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1232 int rc = 0;
1233
1234 /* we are going to update can_cache_brlcks here - need a write access */
1235 down_write(&cinode->lock_sem);
1236 if (!cinode->can_cache_brlcks) {
1237 up_write(&cinode->lock_sem);
1238 return rc;
1239 }
1240
1241 if (cap_unix(tcon->ses) &&
1242 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1243 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1244 rc = cifs_push_posix_locks(cfile);
1245 else
1246 rc = tcon->ses->server->ops->push_mand_locks(cfile);
1247
1248 cinode->can_cache_brlcks = false;
1249 up_write(&cinode->lock_sem);
1250 return rc;
1251}
1252
1253static void
1254cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock,
1255 bool *wait_flag, struct TCP_Server_Info *server)
1256{
1257 if (flock->fl_flags & FL_POSIX)
1258 cifs_dbg(FYI, "Posix\n");
1259 if (flock->fl_flags & FL_FLOCK)
1260 cifs_dbg(FYI, "Flock\n");
1261 if (flock->fl_flags & FL_SLEEP) {
1262 cifs_dbg(FYI, "Blocking lock\n");
1263 *wait_flag = true;
1264 }
1265 if (flock->fl_flags & FL_ACCESS)
1266 cifs_dbg(FYI, "Process suspended by mandatory locking - not implemented yet\n");
1267 if (flock->fl_flags & FL_LEASE)
1268 cifs_dbg(FYI, "Lease on file - not implemented yet\n");
1269 if (flock->fl_flags &
1270 (~(FL_POSIX | FL_FLOCK | FL_SLEEP |
1271 FL_ACCESS | FL_LEASE | FL_CLOSE)))
1272 cifs_dbg(FYI, "Unknown lock flags 0x%x\n", flock->fl_flags);
1273
1274 *type = server->vals->large_lock_type;
1275 if (flock->fl_type == F_WRLCK) {
1276 cifs_dbg(FYI, "F_WRLCK\n");
1277 *type |= server->vals->exclusive_lock_type;
1278 *lock = 1;
1279 } else if (flock->fl_type == F_UNLCK) {
1280 cifs_dbg(FYI, "F_UNLCK\n");
1281 *type |= server->vals->unlock_lock_type;
1282 *unlock = 1;
1283 /* Check if unlock includes more than one lock range */
1284 } else if (flock->fl_type == F_RDLCK) {
1285 cifs_dbg(FYI, "F_RDLCK\n");
1286 *type |= server->vals->shared_lock_type;
1287 *lock = 1;
1288 } else if (flock->fl_type == F_EXLCK) {
1289 cifs_dbg(FYI, "F_EXLCK\n");
1290 *type |= server->vals->exclusive_lock_type;
1291 *lock = 1;
1292 } else if (flock->fl_type == F_SHLCK) {
1293 cifs_dbg(FYI, "F_SHLCK\n");
1294 *type |= server->vals->shared_lock_type;
1295 *lock = 1;
1296 } else
1297 cifs_dbg(FYI, "Unknown type of lock\n");
1298}
1299
1300static int
1301cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
1302 bool wait_flag, bool posix_lck, unsigned int xid)
1303{
1304 int rc = 0;
1305 __u64 length = 1 + flock->fl_end - flock->fl_start;
1306 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1307 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1308 struct TCP_Server_Info *server = tcon->ses->server;
1309 __u16 netfid = cfile->fid.netfid;
1310
1311 if (posix_lck) {
1312 int posix_lock_type;
1313
1314 rc = cifs_posix_lock_test(file, flock);
1315 if (!rc)
1316 return rc;
1317
1318 if (type & server->vals->shared_lock_type)
1319 posix_lock_type = CIFS_RDLCK;
1320 else
1321 posix_lock_type = CIFS_WRLCK;
1322 rc = CIFSSMBPosixLock(xid, tcon, netfid, current->tgid,
1323 flock->fl_start, length, flock,
1324 posix_lock_type, wait_flag);
1325 return rc;
1326 }
1327
1328 rc = cifs_lock_test(cfile, flock->fl_start, length, type, flock);
1329 if (!rc)
1330 return rc;
1331
1332 /* BB we could chain these into one lock request BB */
1333 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, type,
1334 1, 0, false);
1335 if (rc == 0) {
1336 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1337 type, 0, 1, false);
1338 flock->fl_type = F_UNLCK;
1339 if (rc != 0)
1340 cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1341 rc);
1342 return 0;
1343 }
1344
1345 if (type & server->vals->shared_lock_type) {
1346 flock->fl_type = F_WRLCK;
1347 return 0;
1348 }
1349
1350 type &= ~server->vals->exclusive_lock_type;
1351
1352 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1353 type | server->vals->shared_lock_type,
1354 1, 0, false);
1355 if (rc == 0) {
1356 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1357 type | server->vals->shared_lock_type, 0, 1, false);
1358 flock->fl_type = F_RDLCK;
1359 if (rc != 0)
1360 cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1361 rc);
1362 } else
1363 flock->fl_type = F_WRLCK;
1364
1365 return 0;
1366}
1367
1368void
1369cifs_move_llist(struct list_head *source, struct list_head *dest)
1370{
1371 struct list_head *li, *tmp;
1372 list_for_each_safe(li, tmp, source)
1373 list_move(li, dest);
1374}
1375
1376void
1377cifs_free_llist(struct list_head *llist)
1378{
1379 struct cifsLockInfo *li, *tmp;
1380 list_for_each_entry_safe(li, tmp, llist, llist) {
1381 cifs_del_lock_waiters(li);
1382 list_del(&li->llist);
1383 kfree(li);
1384 }
1385}
1386
1387int
1388cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
1389 unsigned int xid)
1390{
1391 int rc = 0, stored_rc;
1392 int types[] = {LOCKING_ANDX_LARGE_FILES,
1393 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES};
1394 unsigned int i;
1395 unsigned int max_num, num, max_buf;
1396 LOCKING_ANDX_RANGE *buf, *cur;
1397 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1398 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1399 struct cifsLockInfo *li, *tmp;
1400 __u64 length = 1 + flock->fl_end - flock->fl_start;
1401 struct list_head tmp_llist;
1402
1403 INIT_LIST_HEAD(&tmp_llist);
1404
1405 /*
1406 * Accessing maxBuf is racy with cifs_reconnect - need to store value
1407 * and check it for zero before using.
1408 */
1409 max_buf = tcon->ses->server->maxBuf;
1410 if (!max_buf)
1411 return -EINVAL;
1412
1413 max_num = (max_buf - sizeof(struct smb_hdr)) /
1414 sizeof(LOCKING_ANDX_RANGE);
1415 buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1416 if (!buf)
1417 return -ENOMEM;
1418
1419 down_write(&cinode->lock_sem);
1420 for (i = 0; i < 2; i++) {
1421 cur = buf;
1422 num = 0;
1423 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1424 if (flock->fl_start > li->offset ||
1425 (flock->fl_start + length) <
1426 (li->offset + li->length))
1427 continue;
1428 if (current->tgid != li->pid)
1429 continue;
1430 if (types[i] != li->type)
1431 continue;
1432 if (cinode->can_cache_brlcks) {
1433 /*
1434 * We can cache brlock requests - simply remove
1435 * a lock from the file's list.
1436 */
1437 list_del(&li->llist);
1438 cifs_del_lock_waiters(li);
1439 kfree(li);
1440 continue;
1441 }
1442 cur->Pid = cpu_to_le16(li->pid);
1443 cur->LengthLow = cpu_to_le32((u32)li->length);
1444 cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1445 cur->OffsetLow = cpu_to_le32((u32)li->offset);
1446 cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1447 /*
1448 * We need to save a lock here to let us add it again to
1449 * the file's list if the unlock range request fails on
1450 * the server.
1451 */
1452 list_move(&li->llist, &tmp_llist);
1453 if (++num == max_num) {
1454 stored_rc = cifs_lockv(xid, tcon,
1455 cfile->fid.netfid,
1456 li->type, num, 0, buf);
1457 if (stored_rc) {
1458 /*
1459 * We failed on the unlock range
1460 * request - add all locks from the tmp
1461 * list to the head of the file's list.
1462 */
1463 cifs_move_llist(&tmp_llist,
1464 &cfile->llist->locks);
1465 rc = stored_rc;
1466 } else
1467 /*
1468 * The unlock range request succeed -
1469 * free the tmp list.
1470 */
1471 cifs_free_llist(&tmp_llist);
1472 cur = buf;
1473 num = 0;
1474 } else
1475 cur++;
1476 }
1477 if (num) {
1478 stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1479 types[i], num, 0, buf);
1480 if (stored_rc) {
1481 cifs_move_llist(&tmp_llist,
1482 &cfile->llist->locks);
1483 rc = stored_rc;
1484 } else
1485 cifs_free_llist(&tmp_llist);
1486 }
1487 }
1488
1489 up_write(&cinode->lock_sem);
1490 kfree(buf);
1491 return rc;
1492}
1493
1494static int
1495cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
1496 bool wait_flag, bool posix_lck, int lock, int unlock,
1497 unsigned int xid)
1498{
1499 int rc = 0;
1500 __u64 length = 1 + flock->fl_end - flock->fl_start;
1501 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1502 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1503 struct TCP_Server_Info *server = tcon->ses->server;
1504 struct inode *inode = d_inode(cfile->dentry);
1505
1506 if (posix_lck) {
1507 int posix_lock_type;
1508
1509 rc = cifs_posix_lock_set(file, flock);
1510 if (!rc || rc < 0)
1511 return rc;
1512
1513 if (type & server->vals->shared_lock_type)
1514 posix_lock_type = CIFS_RDLCK;
1515 else
1516 posix_lock_type = CIFS_WRLCK;
1517
1518 if (unlock == 1)
1519 posix_lock_type = CIFS_UNLCK;
1520
1521 rc = CIFSSMBPosixLock(xid, tcon, cfile->fid.netfid,
1522 current->tgid, flock->fl_start, length,
1523 NULL, posix_lock_type, wait_flag);
1524 goto out;
1525 }
1526
1527 if (lock) {
1528 struct cifsLockInfo *lock;
1529
1530 lock = cifs_lock_init(flock->fl_start, length, type);
1531 if (!lock)
1532 return -ENOMEM;
1533
1534 rc = cifs_lock_add_if(cfile, lock, wait_flag);
1535 if (rc < 0) {
1536 kfree(lock);
1537 return rc;
1538 }
1539 if (!rc)
1540 goto out;
1541
1542 /*
1543 * Windows 7 server can delay breaking lease from read to None
1544 * if we set a byte-range lock on a file - break it explicitly
1545 * before sending the lock to the server to be sure the next
1546 * read won't conflict with non-overlapted locks due to
1547 * pagereading.
1548 */
1549 if (!CIFS_CACHE_WRITE(CIFS_I(inode)) &&
1550 CIFS_CACHE_READ(CIFS_I(inode))) {
1551 cifs_zap_mapping(inode);
1552 cifs_dbg(FYI, "Set no oplock for inode=%p due to mand locks\n",
1553 inode);
1554 CIFS_I(inode)->oplock = 0;
1555 }
1556
1557 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1558 type, 1, 0, wait_flag);
1559 if (rc) {
1560 kfree(lock);
1561 return rc;
1562 }
1563
1564 cifs_lock_add(cfile, lock);
1565 } else if (unlock)
1566 rc = server->ops->mand_unlock_range(cfile, flock, xid);
1567
1568out:
1569 if (flock->fl_flags & FL_POSIX && !rc)
1570 rc = locks_lock_file_wait(file, flock);
1571 return rc;
1572}
1573
1574int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
1575{
1576 int rc, xid;
1577 int lock = 0, unlock = 0;
1578 bool wait_flag = false;
1579 bool posix_lck = false;
1580 struct cifs_sb_info *cifs_sb;
1581 struct cifs_tcon *tcon;
1582 struct cifsInodeInfo *cinode;
1583 struct cifsFileInfo *cfile;
1584 __u16 netfid;
1585 __u32 type;
1586
1587 rc = -EACCES;
1588 xid = get_xid();
1589
1590 cifs_dbg(FYI, "Lock parm: 0x%x flockflags: 0x%x flocktype: 0x%x start: %lld end: %lld\n",
1591 cmd, flock->fl_flags, flock->fl_type,
1592 flock->fl_start, flock->fl_end);
1593
1594 cfile = (struct cifsFileInfo *)file->private_data;
1595 tcon = tlink_tcon(cfile->tlink);
1596
1597 cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag,
1598 tcon->ses->server);
1599
1600 cifs_sb = CIFS_FILE_SB(file);
1601 netfid = cfile->fid.netfid;
1602 cinode = CIFS_I(file_inode(file));
1603
1604 if (cap_unix(tcon->ses) &&
1605 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1606 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1607 posix_lck = true;
1608 /*
1609 * BB add code here to normalize offset and length to account for
1610 * negative length which we can not accept over the wire.
1611 */
1612 if (IS_GETLK(cmd)) {
1613 rc = cifs_getlk(file, flock, type, wait_flag, posix_lck, xid);
1614 free_xid(xid);
1615 return rc;
1616 }
1617
1618 if (!lock && !unlock) {
1619 /*
1620 * if no lock or unlock then nothing to do since we do not
1621 * know what it is
1622 */
1623 free_xid(xid);
1624 return -EOPNOTSUPP;
1625 }
1626
1627 rc = cifs_setlk(file, flock, type, wait_flag, posix_lck, lock, unlock,
1628 xid);
1629 free_xid(xid);
1630 return rc;
1631}
1632
1633/*
1634 * update the file size (if needed) after a write. Should be called with
1635 * the inode->i_lock held
1636 */
1637void
1638cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
1639 unsigned int bytes_written)
1640{
1641 loff_t end_of_write = offset + bytes_written;
1642
1643 if (end_of_write > cifsi->server_eof)
1644 cifsi->server_eof = end_of_write;
1645}
1646
1647static ssize_t
1648cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data,
1649 size_t write_size, loff_t *offset)
1650{
1651 int rc = 0;
1652 unsigned int bytes_written = 0;
1653 unsigned int total_written;
1654 struct cifs_sb_info *cifs_sb;
1655 struct cifs_tcon *tcon;
1656 struct TCP_Server_Info *server;
1657 unsigned int xid;
1658 struct dentry *dentry = open_file->dentry;
1659 struct cifsInodeInfo *cifsi = CIFS_I(d_inode(dentry));
1660 struct cifs_io_parms io_parms;
1661
1662 cifs_sb = CIFS_SB(dentry->d_sb);
1663
1664 cifs_dbg(FYI, "write %zd bytes to offset %lld of %pd\n",
1665 write_size, *offset, dentry);
1666
1667 tcon = tlink_tcon(open_file->tlink);
1668 server = tcon->ses->server;
1669
1670 if (!server->ops->sync_write)
1671 return -ENOSYS;
1672
1673 xid = get_xid();
1674
1675 for (total_written = 0; write_size > total_written;
1676 total_written += bytes_written) {
1677 rc = -EAGAIN;
1678 while (rc == -EAGAIN) {
1679 struct kvec iov[2];
1680 unsigned int len;
1681
1682 if (open_file->invalidHandle) {
1683 /* we could deadlock if we called
1684 filemap_fdatawait from here so tell
1685 reopen_file not to flush data to
1686 server now */
1687 rc = cifs_reopen_file(open_file, false);
1688 if (rc != 0)
1689 break;
1690 }
1691
1692 len = min(server->ops->wp_retry_size(d_inode(dentry)),
1693 (unsigned int)write_size - total_written);
1694 /* iov[0] is reserved for smb header */
1695 iov[1].iov_base = (char *)write_data + total_written;
1696 iov[1].iov_len = len;
1697 io_parms.pid = pid;
1698 io_parms.tcon = tcon;
1699 io_parms.offset = *offset;
1700 io_parms.length = len;
1701 rc = server->ops->sync_write(xid, &open_file->fid,
1702 &io_parms, &bytes_written, iov, 1);
1703 }
1704 if (rc || (bytes_written == 0)) {
1705 if (total_written)
1706 break;
1707 else {
1708 free_xid(xid);
1709 return rc;
1710 }
1711 } else {
1712 spin_lock(&d_inode(dentry)->i_lock);
1713 cifs_update_eof(cifsi, *offset, bytes_written);
1714 spin_unlock(&d_inode(dentry)->i_lock);
1715 *offset += bytes_written;
1716 }
1717 }
1718
1719 cifs_stats_bytes_written(tcon, total_written);
1720
1721 if (total_written > 0) {
1722 spin_lock(&d_inode(dentry)->i_lock);
1723 if (*offset > d_inode(dentry)->i_size)
1724 i_size_write(d_inode(dentry), *offset);
1725 spin_unlock(&d_inode(dentry)->i_lock);
1726 }
1727 mark_inode_dirty_sync(d_inode(dentry));
1728 free_xid(xid);
1729 return total_written;
1730}
1731
1732struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
1733 bool fsuid_only)
1734{
1735 struct cifsFileInfo *open_file = NULL;
1736 struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1737 struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb);
1738
1739 /* only filter by fsuid on multiuser mounts */
1740 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1741 fsuid_only = false;
1742
1743 spin_lock(&tcon->open_file_lock);
1744 /* we could simply get the first_list_entry since write-only entries
1745 are always at the end of the list but since the first entry might
1746 have a close pending, we go through the whole list */
1747 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1748 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
1749 continue;
1750 if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) {
1751 if (!open_file->invalidHandle) {
1752 /* found a good file */
1753 /* lock it so it will not be closed on us */
1754 cifsFileInfo_get(open_file);
1755 spin_unlock(&tcon->open_file_lock);
1756 return open_file;
1757 } /* else might as well continue, and look for
1758 another, or simply have the caller reopen it
1759 again rather than trying to fix this handle */
1760 } else /* write only file */
1761 break; /* write only files are last so must be done */
1762 }
1763 spin_unlock(&tcon->open_file_lock);
1764 return NULL;
1765}
1766
1767struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *cifs_inode,
1768 bool fsuid_only)
1769{
1770 struct cifsFileInfo *open_file, *inv_file = NULL;
1771 struct cifs_sb_info *cifs_sb;
1772 struct cifs_tcon *tcon;
1773 bool any_available = false;
1774 int rc;
1775 unsigned int refind = 0;
1776
1777 /* Having a null inode here (because mapping->host was set to zero by
1778 the VFS or MM) should not happen but we had reports of on oops (due to
1779 it being zero) during stress testcases so we need to check for it */
1780
1781 if (cifs_inode == NULL) {
1782 cifs_dbg(VFS, "Null inode passed to cifs_writeable_file\n");
1783 dump_stack();
1784 return NULL;
1785 }
1786
1787 cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1788 tcon = cifs_sb_master_tcon(cifs_sb);
1789
1790 /* only filter by fsuid on multiuser mounts */
1791 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1792 fsuid_only = false;
1793
1794 spin_lock(&tcon->open_file_lock);
1795refind_writable:
1796 if (refind > MAX_REOPEN_ATT) {
1797 spin_unlock(&tcon->open_file_lock);
1798 return NULL;
1799 }
1800 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1801 if (!any_available && open_file->pid != current->tgid)
1802 continue;
1803 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
1804 continue;
1805 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
1806 if (!open_file->invalidHandle) {
1807 /* found a good writable file */
1808 cifsFileInfo_get(open_file);
1809 spin_unlock(&tcon->open_file_lock);
1810 return open_file;
1811 } else {
1812 if (!inv_file)
1813 inv_file = open_file;
1814 }
1815 }
1816 }
1817 /* couldn't find useable FH with same pid, try any available */
1818 if (!any_available) {
1819 any_available = true;
1820 goto refind_writable;
1821 }
1822
1823 if (inv_file) {
1824 any_available = false;
1825 cifsFileInfo_get(inv_file);
1826 }
1827
1828 spin_unlock(&tcon->open_file_lock);
1829
1830 if (inv_file) {
1831 rc = cifs_reopen_file(inv_file, false);
1832 if (!rc)
1833 return inv_file;
1834 else {
1835 spin_lock(&tcon->open_file_lock);
1836 list_move_tail(&inv_file->flist,
1837 &cifs_inode->openFileList);
1838 spin_unlock(&tcon->open_file_lock);
1839 cifsFileInfo_put(inv_file);
1840 ++refind;
1841 inv_file = NULL;
1842 spin_lock(&tcon->open_file_lock);
1843 goto refind_writable;
1844 }
1845 }
1846
1847 return NULL;
1848}
1849
1850static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
1851{
1852 struct address_space *mapping = page->mapping;
1853 loff_t offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
1854 char *write_data;
1855 int rc = -EFAULT;
1856 int bytes_written = 0;
1857 struct inode *inode;
1858 struct cifsFileInfo *open_file;
1859
1860 if (!mapping || !mapping->host)
1861 return -EFAULT;
1862
1863 inode = page->mapping->host;
1864
1865 offset += (loff_t)from;
1866 write_data = kmap(page);
1867 write_data += from;
1868
1869 if ((to > PAGE_CACHE_SIZE) || (from > to)) {
1870 kunmap(page);
1871 return -EIO;
1872 }
1873
1874 /* racing with truncate? */
1875 if (offset > mapping->host->i_size) {
1876 kunmap(page);
1877 return 0; /* don't care */
1878 }
1879
1880 /* check to make sure that we are not extending the file */
1881 if (mapping->host->i_size - offset < (loff_t)to)
1882 to = (unsigned)(mapping->host->i_size - offset);
1883
1884 open_file = find_writable_file(CIFS_I(mapping->host), false);
1885 if (open_file) {
1886 bytes_written = cifs_write(open_file, open_file->pid,
1887 write_data, to - from, &offset);
1888 cifsFileInfo_put(open_file);
1889 /* Does mm or vfs already set times? */
1890 inode->i_atime = inode->i_mtime = current_fs_time(inode->i_sb);
1891 if ((bytes_written > 0) && (offset))
1892 rc = 0;
1893 else if (bytes_written < 0)
1894 rc = bytes_written;
1895 } else {
1896 cifs_dbg(FYI, "No writeable filehandles for inode\n");
1897 rc = -EIO;
1898 }
1899
1900 kunmap(page);
1901 return rc;
1902}
1903
1904static struct cifs_writedata *
1905wdata_alloc_and_fillpages(pgoff_t tofind, struct address_space *mapping,
1906 pgoff_t end, pgoff_t *index,
1907 unsigned int *found_pages)
1908{
1909 unsigned int nr_pages;
1910 struct page **pages;
1911 struct cifs_writedata *wdata;
1912
1913 wdata = cifs_writedata_alloc((unsigned int)tofind,
1914 cifs_writev_complete);
1915 if (!wdata)
1916 return NULL;
1917
1918 /*
1919 * find_get_pages_tag seems to return a max of 256 on each
1920 * iteration, so we must call it several times in order to
1921 * fill the array or the wsize is effectively limited to
1922 * 256 * PAGE_CACHE_SIZE.
1923 */
1924 *found_pages = 0;
1925 pages = wdata->pages;
1926 do {
1927 nr_pages = find_get_pages_tag(mapping, index,
1928 PAGECACHE_TAG_DIRTY, tofind,
1929 pages);
1930 *found_pages += nr_pages;
1931 tofind -= nr_pages;
1932 pages += nr_pages;
1933 } while (nr_pages && tofind && *index <= end);
1934
1935 return wdata;
1936}
1937
1938static unsigned int
1939wdata_prepare_pages(struct cifs_writedata *wdata, unsigned int found_pages,
1940 struct address_space *mapping,
1941 struct writeback_control *wbc,
1942 pgoff_t end, pgoff_t *index, pgoff_t *next, bool *done)
1943{
1944 unsigned int nr_pages = 0, i;
1945 struct page *page;
1946
1947 for (i = 0; i < found_pages; i++) {
1948 page = wdata->pages[i];
1949 /*
1950 * At this point we hold neither mapping->tree_lock nor
1951 * lock on the page itself: the page may be truncated or
1952 * invalidated (changing page->mapping to NULL), or even
1953 * swizzled back from swapper_space to tmpfs file
1954 * mapping
1955 */
1956
1957 if (nr_pages == 0)
1958 lock_page(page);
1959 else if (!trylock_page(page))
1960 break;
1961
1962 if (unlikely(page->mapping != mapping)) {
1963 unlock_page(page);
1964 break;
1965 }
1966
1967 if (!wbc->range_cyclic && page->index > end) {
1968 *done = true;
1969 unlock_page(page);
1970 break;
1971 }
1972
1973 if (*next && (page->index != *next)) {
1974 /* Not next consecutive page */
1975 unlock_page(page);
1976 break;
1977 }
1978
1979 if (wbc->sync_mode != WB_SYNC_NONE)
1980 wait_on_page_writeback(page);
1981
1982 if (PageWriteback(page) ||
1983 !clear_page_dirty_for_io(page)) {
1984 unlock_page(page);
1985 break;
1986 }
1987
1988 /*
1989 * This actually clears the dirty bit in the radix tree.
1990 * See cifs_writepage() for more commentary.
1991 */
1992 set_page_writeback(page);
1993 if (page_offset(page) >= i_size_read(mapping->host)) {
1994 *done = true;
1995 unlock_page(page);
1996 end_page_writeback(page);
1997 break;
1998 }
1999
2000 wdata->pages[i] = page;
2001 *next = page->index + 1;
2002 ++nr_pages;
2003 }
2004
2005 /* reset index to refind any pages skipped */
2006 if (nr_pages == 0)
2007 *index = wdata->pages[0]->index + 1;
2008
2009 /* put any pages we aren't going to use */
2010 for (i = nr_pages; i < found_pages; i++) {
2011 page_cache_release(wdata->pages[i]);
2012 wdata->pages[i] = NULL;
2013 }
2014
2015 return nr_pages;
2016}
2017
2018static int
2019wdata_send_pages(struct cifs_writedata *wdata, unsigned int nr_pages,
2020 struct address_space *mapping, struct writeback_control *wbc)
2021{
2022 int rc = 0;
2023 struct TCP_Server_Info *server;
2024 unsigned int i;
2025
2026 wdata->sync_mode = wbc->sync_mode;
2027 wdata->nr_pages = nr_pages;
2028 wdata->offset = page_offset(wdata->pages[0]);
2029 wdata->pagesz = PAGE_CACHE_SIZE;
2030 wdata->tailsz = min(i_size_read(mapping->host) -
2031 page_offset(wdata->pages[nr_pages - 1]),
2032 (loff_t)PAGE_CACHE_SIZE);
2033 wdata->bytes = ((nr_pages - 1) * PAGE_CACHE_SIZE) + wdata->tailsz;
2034
2035 if (wdata->cfile != NULL)
2036 cifsFileInfo_put(wdata->cfile);
2037 wdata->cfile = find_writable_file(CIFS_I(mapping->host), false);
2038 if (!wdata->cfile) {
2039 cifs_dbg(VFS, "No writable handles for inode\n");
2040 rc = -EBADF;
2041 } else {
2042 wdata->pid = wdata->cfile->pid;
2043 server = tlink_tcon(wdata->cfile->tlink)->ses->server;
2044 rc = server->ops->async_writev(wdata, cifs_writedata_release);
2045 }
2046
2047 for (i = 0; i < nr_pages; ++i)
2048 unlock_page(wdata->pages[i]);
2049
2050 return rc;
2051}
2052
2053static int cifs_writepages(struct address_space *mapping,
2054 struct writeback_control *wbc)
2055{
2056 struct cifs_sb_info *cifs_sb = CIFS_SB(mapping->host->i_sb);
2057 struct TCP_Server_Info *server;
2058 bool done = false, scanned = false, range_whole = false;
2059 pgoff_t end, index;
2060 struct cifs_writedata *wdata;
2061 int rc = 0;
2062
2063 /*
2064 * If wsize is smaller than the page cache size, default to writing
2065 * one page at a time via cifs_writepage
2066 */
2067 if (cifs_sb->wsize < PAGE_CACHE_SIZE)
2068 return generic_writepages(mapping, wbc);
2069
2070 if (wbc->range_cyclic) {
2071 index = mapping->writeback_index; /* Start from prev offset */
2072 end = -1;
2073 } else {
2074 index = wbc->range_start >> PAGE_CACHE_SHIFT;
2075 end = wbc->range_end >> PAGE_CACHE_SHIFT;
2076 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
2077 range_whole = true;
2078 scanned = true;
2079 }
2080 server = cifs_sb_master_tcon(cifs_sb)->ses->server;
2081retry:
2082 while (!done && index <= end) {
2083 unsigned int i, nr_pages, found_pages, wsize, credits;
2084 pgoff_t next = 0, tofind, saved_index = index;
2085
2086 rc = server->ops->wait_mtu_credits(server, cifs_sb->wsize,
2087 &wsize, &credits);
2088 if (rc)
2089 break;
2090
2091 tofind = min((wsize / PAGE_CACHE_SIZE) - 1, end - index) + 1;
2092
2093 wdata = wdata_alloc_and_fillpages(tofind, mapping, end, &index,
2094 &found_pages);
2095 if (!wdata) {
2096 rc = -ENOMEM;
2097 add_credits_and_wake_if(server, credits, 0);
2098 break;
2099 }
2100
2101 if (found_pages == 0) {
2102 kref_put(&wdata->refcount, cifs_writedata_release);
2103 add_credits_and_wake_if(server, credits, 0);
2104 break;
2105 }
2106
2107 nr_pages = wdata_prepare_pages(wdata, found_pages, mapping, wbc,
2108 end, &index, &next, &done);
2109
2110 /* nothing to write? */
2111 if (nr_pages == 0) {
2112 kref_put(&wdata->refcount, cifs_writedata_release);
2113 add_credits_and_wake_if(server, credits, 0);
2114 continue;
2115 }
2116
2117 wdata->credits = credits;
2118
2119 rc = wdata_send_pages(wdata, nr_pages, mapping, wbc);
2120
2121 /* send failure -- clean up the mess */
2122 if (rc != 0) {
2123 add_credits_and_wake_if(server, wdata->credits, 0);
2124 for (i = 0; i < nr_pages; ++i) {
2125 if (rc == -EAGAIN)
2126 redirty_page_for_writepage(wbc,
2127 wdata->pages[i]);
2128 else
2129 SetPageError(wdata->pages[i]);
2130 end_page_writeback(wdata->pages[i]);
2131 page_cache_release(wdata->pages[i]);
2132 }
2133 if (rc != -EAGAIN)
2134 mapping_set_error(mapping, rc);
2135 }
2136 kref_put(&wdata->refcount, cifs_writedata_release);
2137
2138 if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN) {
2139 index = saved_index;
2140 continue;
2141 }
2142
2143 wbc->nr_to_write -= nr_pages;
2144 if (wbc->nr_to_write <= 0)
2145 done = true;
2146
2147 index = next;
2148 }
2149
2150 if (!scanned && !done) {
2151 /*
2152 * We hit the last page and there is more work to be done: wrap
2153 * back to the start of the file
2154 */
2155 scanned = true;
2156 index = 0;
2157 goto retry;
2158 }
2159
2160 if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
2161 mapping->writeback_index = index;
2162
2163 return rc;
2164}
2165
2166static int
2167cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
2168{
2169 int rc;
2170 unsigned int xid;
2171
2172 xid = get_xid();
2173/* BB add check for wbc flags */
2174 page_cache_get(page);
2175 if (!PageUptodate(page))
2176 cifs_dbg(FYI, "ppw - page not up to date\n");
2177
2178 /*
2179 * Set the "writeback" flag, and clear "dirty" in the radix tree.
2180 *
2181 * A writepage() implementation always needs to do either this,
2182 * or re-dirty the page with "redirty_page_for_writepage()" in
2183 * the case of a failure.
2184 *
2185 * Just unlocking the page will cause the radix tree tag-bits
2186 * to fail to update with the state of the page correctly.
2187 */
2188 set_page_writeback(page);
2189retry_write:
2190 rc = cifs_partialpagewrite(page, 0, PAGE_CACHE_SIZE);
2191 if (rc == -EAGAIN && wbc->sync_mode == WB_SYNC_ALL)
2192 goto retry_write;
2193 else if (rc == -EAGAIN)
2194 redirty_page_for_writepage(wbc, page);
2195 else if (rc != 0)
2196 SetPageError(page);
2197 else
2198 SetPageUptodate(page);
2199 end_page_writeback(page);
2200 page_cache_release(page);
2201 free_xid(xid);
2202 return rc;
2203}
2204
2205static int cifs_writepage(struct page *page, struct writeback_control *wbc)
2206{
2207 int rc = cifs_writepage_locked(page, wbc);
2208 unlock_page(page);
2209 return rc;
2210}
2211
2212static int cifs_write_end(struct file *file, struct address_space *mapping,
2213 loff_t pos, unsigned len, unsigned copied,
2214 struct page *page, void *fsdata)
2215{
2216 int rc;
2217 struct inode *inode = mapping->host;
2218 struct cifsFileInfo *cfile = file->private_data;
2219 struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
2220 __u32 pid;
2221
2222 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2223 pid = cfile->pid;
2224 else
2225 pid = current->tgid;
2226
2227 cifs_dbg(FYI, "write_end for page %p from pos %lld with %d bytes\n",
2228 page, pos, copied);
2229
2230 if (PageChecked(page)) {
2231 if (copied == len)
2232 SetPageUptodate(page);
2233 ClearPageChecked(page);
2234 } else if (!PageUptodate(page) && copied == PAGE_CACHE_SIZE)
2235 SetPageUptodate(page);
2236
2237 if (!PageUptodate(page)) {
2238 char *page_data;
2239 unsigned offset = pos & (PAGE_CACHE_SIZE - 1);
2240 unsigned int xid;
2241
2242 xid = get_xid();
2243 /* this is probably better than directly calling
2244 partialpage_write since in this function the file handle is
2245 known which we might as well leverage */
2246 /* BB check if anything else missing out of ppw
2247 such as updating last write time */
2248 page_data = kmap(page);
2249 rc = cifs_write(cfile, pid, page_data + offset, copied, &pos);
2250 /* if (rc < 0) should we set writebehind rc? */
2251 kunmap(page);
2252
2253 free_xid(xid);
2254 } else {
2255 rc = copied;
2256 pos += copied;
2257 set_page_dirty(page);
2258 }
2259
2260 if (rc > 0) {
2261 spin_lock(&inode->i_lock);
2262 if (pos > inode->i_size)
2263 i_size_write(inode, pos);
2264 spin_unlock(&inode->i_lock);
2265 }
2266
2267 unlock_page(page);
2268 page_cache_release(page);
2269
2270 return rc;
2271}
2272
2273int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
2274 int datasync)
2275{
2276 unsigned int xid;
2277 int rc = 0;
2278 struct cifs_tcon *tcon;
2279 struct TCP_Server_Info *server;
2280 struct cifsFileInfo *smbfile = file->private_data;
2281 struct inode *inode = file_inode(file);
2282 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2283
2284 rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
2285 if (rc)
2286 return rc;
2287 mutex_lock(&inode->i_mutex);
2288
2289 xid = get_xid();
2290
2291 cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2292 file, datasync);
2293
2294 if (!CIFS_CACHE_READ(CIFS_I(inode))) {
2295 rc = cifs_zap_mapping(inode);
2296 if (rc) {
2297 cifs_dbg(FYI, "rc: %d during invalidate phase\n", rc);
2298 rc = 0; /* don't care about it in fsync */
2299 }
2300 }
2301
2302 tcon = tlink_tcon(smbfile->tlink);
2303 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2304 server = tcon->ses->server;
2305 if (server->ops->flush)
2306 rc = server->ops->flush(xid, tcon, &smbfile->fid);
2307 else
2308 rc = -ENOSYS;
2309 }
2310
2311 free_xid(xid);
2312 mutex_unlock(&inode->i_mutex);
2313 return rc;
2314}
2315
2316int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
2317{
2318 unsigned int xid;
2319 int rc = 0;
2320 struct cifs_tcon *tcon;
2321 struct TCP_Server_Info *server;
2322 struct cifsFileInfo *smbfile = file->private_data;
2323 struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
2324 struct inode *inode = file->f_mapping->host;
2325
2326 rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
2327 if (rc)
2328 return rc;
2329 mutex_lock(&inode->i_mutex);
2330
2331 xid = get_xid();
2332
2333 cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2334 file, datasync);
2335
2336 tcon = tlink_tcon(smbfile->tlink);
2337 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2338 server = tcon->ses->server;
2339 if (server->ops->flush)
2340 rc = server->ops->flush(xid, tcon, &smbfile->fid);
2341 else
2342 rc = -ENOSYS;
2343 }
2344
2345 free_xid(xid);
2346 mutex_unlock(&inode->i_mutex);
2347 return rc;
2348}
2349
2350/*
2351 * As file closes, flush all cached write data for this inode checking
2352 * for write behind errors.
2353 */
2354int cifs_flush(struct file *file, fl_owner_t id)
2355{
2356 struct inode *inode = file_inode(file);
2357 int rc = 0;
2358
2359 if (file->f_mode & FMODE_WRITE)
2360 rc = filemap_write_and_wait(inode->i_mapping);
2361
2362 cifs_dbg(FYI, "Flush inode %p file %p rc %d\n", inode, file, rc);
2363
2364 return rc;
2365}
2366
2367static int
2368cifs_write_allocate_pages(struct page **pages, unsigned long num_pages)
2369{
2370 int rc = 0;
2371 unsigned long i;
2372
2373 for (i = 0; i < num_pages; i++) {
2374 pages[i] = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2375 if (!pages[i]) {
2376 /*
2377 * save number of pages we have already allocated and
2378 * return with ENOMEM error
2379 */
2380 num_pages = i;
2381 rc = -ENOMEM;
2382 break;
2383 }
2384 }
2385
2386 if (rc) {
2387 for (i = 0; i < num_pages; i++)
2388 put_page(pages[i]);
2389 }
2390 return rc;
2391}
2392
2393static inline
2394size_t get_numpages(const size_t wsize, const size_t len, size_t *cur_len)
2395{
2396 size_t num_pages;
2397 size_t clen;
2398
2399 clen = min_t(const size_t, len, wsize);
2400 num_pages = DIV_ROUND_UP(clen, PAGE_SIZE);
2401
2402 if (cur_len)
2403 *cur_len = clen;
2404
2405 return num_pages;
2406}
2407
2408static void
2409cifs_uncached_writedata_release(struct kref *refcount)
2410{
2411 int i;
2412 struct cifs_writedata *wdata = container_of(refcount,
2413 struct cifs_writedata, refcount);
2414
2415 for (i = 0; i < wdata->nr_pages; i++)
2416 put_page(wdata->pages[i]);
2417 cifs_writedata_release(refcount);
2418}
2419
2420static void
2421cifs_uncached_writev_complete(struct work_struct *work)
2422{
2423 struct cifs_writedata *wdata = container_of(work,
2424 struct cifs_writedata, work);
2425 struct inode *inode = d_inode(wdata->cfile->dentry);
2426 struct cifsInodeInfo *cifsi = CIFS_I(inode);
2427
2428 spin_lock(&inode->i_lock);
2429 cifs_update_eof(cifsi, wdata->offset, wdata->bytes);
2430 if (cifsi->server_eof > inode->i_size)
2431 i_size_write(inode, cifsi->server_eof);
2432 spin_unlock(&inode->i_lock);
2433
2434 complete(&wdata->done);
2435
2436 kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2437}
2438
2439static int
2440wdata_fill_from_iovec(struct cifs_writedata *wdata, struct iov_iter *from,
2441 size_t *len, unsigned long *num_pages)
2442{
2443 size_t save_len, copied, bytes, cur_len = *len;
2444 unsigned long i, nr_pages = *num_pages;
2445
2446 save_len = cur_len;
2447 for (i = 0; i < nr_pages; i++) {
2448 bytes = min_t(const size_t, cur_len, PAGE_SIZE);
2449 copied = copy_page_from_iter(wdata->pages[i], 0, bytes, from);
2450 cur_len -= copied;
2451 /*
2452 * If we didn't copy as much as we expected, then that
2453 * may mean we trod into an unmapped area. Stop copying
2454 * at that point. On the next pass through the big
2455 * loop, we'll likely end up getting a zero-length
2456 * write and bailing out of it.
2457 */
2458 if (copied < bytes)
2459 break;
2460 }
2461 cur_len = save_len - cur_len;
2462 *len = cur_len;
2463
2464 /*
2465 * If we have no data to send, then that probably means that
2466 * the copy above failed altogether. That's most likely because
2467 * the address in the iovec was bogus. Return -EFAULT and let
2468 * the caller free anything we allocated and bail out.
2469 */
2470 if (!cur_len)
2471 return -EFAULT;
2472
2473 /*
2474 * i + 1 now represents the number of pages we actually used in
2475 * the copy phase above.
2476 */
2477 *num_pages = i + 1;
2478 return 0;
2479}
2480
2481static int
2482cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from,
2483 struct cifsFileInfo *open_file,
2484 struct cifs_sb_info *cifs_sb, struct list_head *wdata_list)
2485{
2486 int rc = 0;
2487 size_t cur_len;
2488 unsigned long nr_pages, num_pages, i;
2489 struct cifs_writedata *wdata;
2490 struct iov_iter saved_from;
2491 loff_t saved_offset = offset;
2492 pid_t pid;
2493 struct TCP_Server_Info *server;
2494
2495 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2496 pid = open_file->pid;
2497 else
2498 pid = current->tgid;
2499
2500 server = tlink_tcon(open_file->tlink)->ses->server;
2501 memcpy(&saved_from, from, sizeof(struct iov_iter));
2502
2503 do {
2504 unsigned int wsize, credits;
2505
2506 rc = server->ops->wait_mtu_credits(server, cifs_sb->wsize,
2507 &wsize, &credits);
2508 if (rc)
2509 break;
2510
2511 nr_pages = get_numpages(wsize, len, &cur_len);
2512 wdata = cifs_writedata_alloc(nr_pages,
2513 cifs_uncached_writev_complete);
2514 if (!wdata) {
2515 rc = -ENOMEM;
2516 add_credits_and_wake_if(server, credits, 0);
2517 break;
2518 }
2519
2520 rc = cifs_write_allocate_pages(wdata->pages, nr_pages);
2521 if (rc) {
2522 kfree(wdata);
2523 add_credits_and_wake_if(server, credits, 0);
2524 break;
2525 }
2526
2527 num_pages = nr_pages;
2528 rc = wdata_fill_from_iovec(wdata, from, &cur_len, &num_pages);
2529 if (rc) {
2530 for (i = 0; i < nr_pages; i++)
2531 put_page(wdata->pages[i]);
2532 kfree(wdata);
2533 add_credits_and_wake_if(server, credits, 0);
2534 break;
2535 }
2536
2537 /*
2538 * Bring nr_pages down to the number of pages we actually used,
2539 * and free any pages that we didn't use.
2540 */
2541 for ( ; nr_pages > num_pages; nr_pages--)
2542 put_page(wdata->pages[nr_pages - 1]);
2543
2544 wdata->sync_mode = WB_SYNC_ALL;
2545 wdata->nr_pages = nr_pages;
2546 wdata->offset = (__u64)offset;
2547 wdata->cfile = cifsFileInfo_get(open_file);
2548 wdata->pid = pid;
2549 wdata->bytes = cur_len;
2550 wdata->pagesz = PAGE_SIZE;
2551 wdata->tailsz = cur_len - ((nr_pages - 1) * PAGE_SIZE);
2552 wdata->credits = credits;
2553
2554 if (!wdata->cfile->invalidHandle ||
2555 !(rc = cifs_reopen_file(wdata->cfile, false)))
2556 rc = server->ops->async_writev(wdata,
2557 cifs_uncached_writedata_release);
2558 if (rc) {
2559 add_credits_and_wake_if(server, wdata->credits, 0);
2560 kref_put(&wdata->refcount,
2561 cifs_uncached_writedata_release);
2562 if (rc == -EAGAIN) {
2563 memcpy(from, &saved_from,
2564 sizeof(struct iov_iter));
2565 iov_iter_advance(from, offset - saved_offset);
2566 continue;
2567 }
2568 break;
2569 }
2570
2571 list_add_tail(&wdata->list, wdata_list);
2572 offset += cur_len;
2573 len -= cur_len;
2574 } while (len > 0);
2575
2576 return rc;
2577}
2578
2579ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from)
2580{
2581 struct file *file = iocb->ki_filp;
2582 ssize_t total_written = 0;
2583 struct cifsFileInfo *open_file;
2584 struct cifs_tcon *tcon;
2585 struct cifs_sb_info *cifs_sb;
2586 struct cifs_writedata *wdata, *tmp;
2587 struct list_head wdata_list;
2588 struct iov_iter saved_from;
2589 int rc;
2590
2591 /*
2592 * BB - optimize the way when signing is disabled. We can drop this
2593 * extra memory-to-memory copying and use iovec buffers for constructing
2594 * write request.
2595 */
2596
2597 rc = generic_write_checks(iocb, from);
2598 if (rc <= 0)
2599 return rc;
2600
2601 INIT_LIST_HEAD(&wdata_list);
2602 cifs_sb = CIFS_FILE_SB(file);
2603 open_file = file->private_data;
2604 tcon = tlink_tcon(open_file->tlink);
2605
2606 if (!tcon->ses->server->ops->async_writev)
2607 return -ENOSYS;
2608
2609 memcpy(&saved_from, from, sizeof(struct iov_iter));
2610
2611 rc = cifs_write_from_iter(iocb->ki_pos, iov_iter_count(from), from,
2612 open_file, cifs_sb, &wdata_list);
2613
2614 /*
2615 * If at least one write was successfully sent, then discard any rc
2616 * value from the later writes. If the other write succeeds, then
2617 * we'll end up returning whatever was written. If it fails, then
2618 * we'll get a new rc value from that.
2619 */
2620 if (!list_empty(&wdata_list))
2621 rc = 0;
2622
2623 /*
2624 * Wait for and collect replies for any successful sends in order of
2625 * increasing offset. Once an error is hit or we get a fatal signal
2626 * while waiting, then return without waiting for any more replies.
2627 */
2628restart_loop:
2629 list_for_each_entry_safe(wdata, tmp, &wdata_list, list) {
2630 if (!rc) {
2631 /* FIXME: freezable too? */
2632 rc = wait_for_completion_killable(&wdata->done);
2633 if (rc)
2634 rc = -EINTR;
2635 else if (wdata->result)
2636 rc = wdata->result;
2637 else
2638 total_written += wdata->bytes;
2639
2640 /* resend call if it's a retryable error */
2641 if (rc == -EAGAIN) {
2642 struct list_head tmp_list;
2643 struct iov_iter tmp_from;
2644
2645 INIT_LIST_HEAD(&tmp_list);
2646 list_del_init(&wdata->list);
2647
2648 memcpy(&tmp_from, &saved_from,
2649 sizeof(struct iov_iter));
2650 iov_iter_advance(&tmp_from,
2651 wdata->offset - iocb->ki_pos);
2652
2653 rc = cifs_write_from_iter(wdata->offset,
2654 wdata->bytes, &tmp_from,
2655 open_file, cifs_sb, &tmp_list);
2656
2657 list_splice(&tmp_list, &wdata_list);
2658
2659 kref_put(&wdata->refcount,
2660 cifs_uncached_writedata_release);
2661 goto restart_loop;
2662 }
2663 }
2664 list_del_init(&wdata->list);
2665 kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2666 }
2667
2668 if (unlikely(!total_written))
2669 return rc;
2670
2671 iocb->ki_pos += total_written;
2672 set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(file_inode(file))->flags);
2673 cifs_stats_bytes_written(tcon, total_written);
2674 return total_written;
2675}
2676
2677static ssize_t
2678cifs_writev(struct kiocb *iocb, struct iov_iter *from)
2679{
2680 struct file *file = iocb->ki_filp;
2681 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
2682 struct inode *inode = file->f_mapping->host;
2683 struct cifsInodeInfo *cinode = CIFS_I(inode);
2684 struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
2685 ssize_t rc;
2686
2687 /*
2688 * We need to hold the sem to be sure nobody modifies lock list
2689 * with a brlock that prevents writing.
2690 */
2691 down_read(&cinode->lock_sem);
2692 mutex_lock(&inode->i_mutex);
2693
2694 rc = generic_write_checks(iocb, from);
2695 if (rc <= 0)
2696 goto out;
2697
2698 if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(from),
2699 server->vals->exclusive_lock_type, NULL,
2700 CIFS_WRITE_OP))
2701 rc = __generic_file_write_iter(iocb, from);
2702 else
2703 rc = -EACCES;
2704out:
2705 mutex_unlock(&inode->i_mutex);
2706
2707 if (rc > 0) {
2708 ssize_t err = generic_write_sync(file, iocb->ki_pos - rc, rc);
2709 if (err < 0)
2710 rc = err;
2711 }
2712 up_read(&cinode->lock_sem);
2713 return rc;
2714}
2715
2716ssize_t
2717cifs_strict_writev(struct kiocb *iocb, struct iov_iter *from)
2718{
2719 struct inode *inode = file_inode(iocb->ki_filp);
2720 struct cifsInodeInfo *cinode = CIFS_I(inode);
2721 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2722 struct cifsFileInfo *cfile = (struct cifsFileInfo *)
2723 iocb->ki_filp->private_data;
2724 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
2725 ssize_t written;
2726
2727 written = cifs_get_writer(cinode);
2728 if (written)
2729 return written;
2730
2731 if (CIFS_CACHE_WRITE(cinode)) {
2732 if (cap_unix(tcon->ses) &&
2733 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability))
2734 && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) {
2735 written = generic_file_write_iter(iocb, from);
2736 goto out;
2737 }
2738 written = cifs_writev(iocb, from);
2739 goto out;
2740 }
2741 /*
2742 * For non-oplocked files in strict cache mode we need to write the data
2743 * to the server exactly from the pos to pos+len-1 rather than flush all
2744 * affected pages because it may cause a error with mandatory locks on
2745 * these pages but not on the region from pos to ppos+len-1.
2746 */
2747 written = cifs_user_writev(iocb, from);
2748 if (written > 0 && CIFS_CACHE_READ(cinode)) {
2749 /*
2750 * Windows 7 server can delay breaking level2 oplock if a write
2751 * request comes - break it on the client to prevent reading
2752 * an old data.
2753 */
2754 cifs_zap_mapping(inode);
2755 cifs_dbg(FYI, "Set no oplock for inode=%p after a write operation\n",
2756 inode);
2757 cinode->oplock = 0;
2758 }
2759out:
2760 cifs_put_writer(cinode);
2761 return written;
2762}
2763
2764static struct cifs_readdata *
2765cifs_readdata_alloc(unsigned int nr_pages, work_func_t complete)
2766{
2767 struct cifs_readdata *rdata;
2768
2769 rdata = kzalloc(sizeof(*rdata) + (sizeof(struct page *) * nr_pages),
2770 GFP_KERNEL);
2771 if (rdata != NULL) {
2772 kref_init(&rdata->refcount);
2773 INIT_LIST_HEAD(&rdata->list);
2774 init_completion(&rdata->done);
2775 INIT_WORK(&rdata->work, complete);
2776 }
2777
2778 return rdata;
2779}
2780
2781void
2782cifs_readdata_release(struct kref *refcount)
2783{
2784 struct cifs_readdata *rdata = container_of(refcount,
2785 struct cifs_readdata, refcount);
2786
2787 if (rdata->cfile)
2788 cifsFileInfo_put(rdata->cfile);
2789
2790 kfree(rdata);
2791}
2792
2793static int
2794cifs_read_allocate_pages(struct cifs_readdata *rdata, unsigned int nr_pages)
2795{
2796 int rc = 0;
2797 struct page *page;
2798 unsigned int i;
2799
2800 for (i = 0; i < nr_pages; i++) {
2801 page = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2802 if (!page) {
2803 rc = -ENOMEM;
2804 break;
2805 }
2806 rdata->pages[i] = page;
2807 }
2808
2809 if (rc) {
2810 for (i = 0; i < nr_pages; i++) {
2811 put_page(rdata->pages[i]);
2812 rdata->pages[i] = NULL;
2813 }
2814 }
2815 return rc;
2816}
2817
2818static void
2819cifs_uncached_readdata_release(struct kref *refcount)
2820{
2821 struct cifs_readdata *rdata = container_of(refcount,
2822 struct cifs_readdata, refcount);
2823 unsigned int i;
2824
2825 for (i = 0; i < rdata->nr_pages; i++) {
2826 put_page(rdata->pages[i]);
2827 rdata->pages[i] = NULL;
2828 }
2829 cifs_readdata_release(refcount);
2830}
2831
2832/**
2833 * cifs_readdata_to_iov - copy data from pages in response to an iovec
2834 * @rdata: the readdata response with list of pages holding data
2835 * @iter: destination for our data
2836 *
2837 * This function copies data from a list of pages in a readdata response into
2838 * an array of iovecs. It will first calculate where the data should go
2839 * based on the info in the readdata and then copy the data into that spot.
2840 */
2841static int
2842cifs_readdata_to_iov(struct cifs_readdata *rdata, struct iov_iter *iter)
2843{
2844 size_t remaining = rdata->got_bytes;
2845 unsigned int i;
2846
2847 for (i = 0; i < rdata->nr_pages; i++) {
2848 struct page *page = rdata->pages[i];
2849 size_t copy = min_t(size_t, remaining, PAGE_SIZE);
2850 size_t written = copy_page_to_iter(page, 0, copy, iter);
2851 remaining -= written;
2852 if (written < copy && iov_iter_count(iter) > 0)
2853 break;
2854 }
2855 return remaining ? -EFAULT : 0;
2856}
2857
2858static void
2859cifs_uncached_readv_complete(struct work_struct *work)
2860{
2861 struct cifs_readdata *rdata = container_of(work,
2862 struct cifs_readdata, work);
2863
2864 complete(&rdata->done);
2865 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
2866}
2867
2868static int
2869cifs_uncached_read_into_pages(struct TCP_Server_Info *server,
2870 struct cifs_readdata *rdata, unsigned int len)
2871{
2872 int result = 0;
2873 unsigned int i;
2874 unsigned int nr_pages = rdata->nr_pages;
2875 struct kvec iov;
2876
2877 rdata->got_bytes = 0;
2878 rdata->tailsz = PAGE_SIZE;
2879 for (i = 0; i < nr_pages; i++) {
2880 struct page *page = rdata->pages[i];
2881
2882 if (len >= PAGE_SIZE) {
2883 /* enough data to fill the page */
2884 iov.iov_base = kmap(page);
2885 iov.iov_len = PAGE_SIZE;
2886 cifs_dbg(FYI, "%u: iov_base=%p iov_len=%zu\n",
2887 i, iov.iov_base, iov.iov_len);
2888 len -= PAGE_SIZE;
2889 } else if (len > 0) {
2890 /* enough for partial page, fill and zero the rest */
2891 iov.iov_base = kmap(page);
2892 iov.iov_len = len;
2893 cifs_dbg(FYI, "%u: iov_base=%p iov_len=%zu\n",
2894 i, iov.iov_base, iov.iov_len);
2895 memset(iov.iov_base + len, '\0', PAGE_SIZE - len);
2896 rdata->tailsz = len;
2897 len = 0;
2898 } else {
2899 /* no need to hold page hostage */
2900 rdata->pages[i] = NULL;
2901 rdata->nr_pages--;
2902 put_page(page);
2903 continue;
2904 }
2905
2906 result = cifs_readv_from_socket(server, &iov, 1, iov.iov_len);
2907 kunmap(page);
2908 if (result < 0)
2909 break;
2910
2911 rdata->got_bytes += result;
2912 }
2913
2914 return rdata->got_bytes > 0 && result != -ECONNABORTED ?
2915 rdata->got_bytes : result;
2916}
2917
2918static int
2919cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file,
2920 struct cifs_sb_info *cifs_sb, struct list_head *rdata_list)
2921{
2922 struct cifs_readdata *rdata;
2923 unsigned int npages, rsize, credits;
2924 size_t cur_len;
2925 int rc;
2926 pid_t pid;
2927 struct TCP_Server_Info *server;
2928
2929 server = tlink_tcon(open_file->tlink)->ses->server;
2930
2931 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2932 pid = open_file->pid;
2933 else
2934 pid = current->tgid;
2935
2936 do {
2937 rc = server->ops->wait_mtu_credits(server, cifs_sb->rsize,
2938 &rsize, &credits);
2939 if (rc)
2940 break;
2941
2942 cur_len = min_t(const size_t, len, rsize);
2943 npages = DIV_ROUND_UP(cur_len, PAGE_SIZE);
2944
2945 /* allocate a readdata struct */
2946 rdata = cifs_readdata_alloc(npages,
2947 cifs_uncached_readv_complete);
2948 if (!rdata) {
2949 add_credits_and_wake_if(server, credits, 0);
2950 rc = -ENOMEM;
2951 break;
2952 }
2953
2954 rc = cifs_read_allocate_pages(rdata, npages);
2955 if (rc)
2956 goto error;
2957
2958 rdata->cfile = cifsFileInfo_get(open_file);
2959 rdata->nr_pages = npages;
2960 rdata->offset = offset;
2961 rdata->bytes = cur_len;
2962 rdata->pid = pid;
2963 rdata->pagesz = PAGE_SIZE;
2964 rdata->read_into_pages = cifs_uncached_read_into_pages;
2965 rdata->credits = credits;
2966
2967 if (!rdata->cfile->invalidHandle ||
2968 !(rc = cifs_reopen_file(rdata->cfile, true)))
2969 rc = server->ops->async_readv(rdata);
2970error:
2971 if (rc) {
2972 add_credits_and_wake_if(server, rdata->credits, 0);
2973 kref_put(&rdata->refcount,
2974 cifs_uncached_readdata_release);
2975 if (rc == -EAGAIN)
2976 continue;
2977 break;
2978 }
2979
2980 list_add_tail(&rdata->list, rdata_list);
2981 offset += cur_len;
2982 len -= cur_len;
2983 } while (len > 0);
2984
2985 return rc;
2986}
2987
2988ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to)
2989{
2990 struct file *file = iocb->ki_filp;
2991 ssize_t rc;
2992 size_t len;
2993 ssize_t total_read = 0;
2994 loff_t offset = iocb->ki_pos;
2995 struct cifs_sb_info *cifs_sb;
2996 struct cifs_tcon *tcon;
2997 struct cifsFileInfo *open_file;
2998 struct cifs_readdata *rdata, *tmp;
2999 struct list_head rdata_list;
3000
3001 len = iov_iter_count(to);
3002 if (!len)
3003 return 0;
3004
3005 INIT_LIST_HEAD(&rdata_list);
3006 cifs_sb = CIFS_FILE_SB(file);
3007 open_file = file->private_data;
3008 tcon = tlink_tcon(open_file->tlink);
3009
3010 if (!tcon->ses->server->ops->async_readv)
3011 return -ENOSYS;
3012
3013 if ((file->f_flags & O_ACCMODE) == O_WRONLY)
3014 cifs_dbg(FYI, "attempting read on write only file instance\n");
3015
3016 rc = cifs_send_async_read(offset, len, open_file, cifs_sb, &rdata_list);
3017
3018 /* if at least one read request send succeeded, then reset rc */
3019 if (!list_empty(&rdata_list))
3020 rc = 0;
3021
3022 len = iov_iter_count(to);
3023 /* the loop below should proceed in the order of increasing offsets */
3024again:
3025 list_for_each_entry_safe(rdata, tmp, &rdata_list, list) {
3026 if (!rc) {
3027 /* FIXME: freezable sleep too? */
3028 rc = wait_for_completion_killable(&rdata->done);
3029 if (rc)
3030 rc = -EINTR;
3031 else if (rdata->result == -EAGAIN) {
3032 /* resend call if it's a retryable error */
3033 struct list_head tmp_list;
3034 unsigned int got_bytes = rdata->got_bytes;
3035
3036 list_del_init(&rdata->list);
3037 INIT_LIST_HEAD(&tmp_list);
3038
3039 /*
3040 * Got a part of data and then reconnect has
3041 * happened -- fill the buffer and continue
3042 * reading.
3043 */
3044 if (got_bytes && got_bytes < rdata->bytes) {
3045 rc = cifs_readdata_to_iov(rdata, to);
3046 if (rc) {
3047 kref_put(&rdata->refcount,
3048 cifs_uncached_readdata_release);
3049 continue;
3050 }
3051 }
3052
3053 rc = cifs_send_async_read(
3054 rdata->offset + got_bytes,
3055 rdata->bytes - got_bytes,
3056 rdata->cfile, cifs_sb,
3057 &tmp_list);
3058
3059 list_splice(&tmp_list, &rdata_list);
3060
3061 kref_put(&rdata->refcount,
3062 cifs_uncached_readdata_release);
3063 goto again;
3064 } else if (rdata->result)
3065 rc = rdata->result;
3066 else
3067 rc = cifs_readdata_to_iov(rdata, to);
3068
3069 /* if there was a short read -- discard anything left */
3070 if (rdata->got_bytes && rdata->got_bytes < rdata->bytes)
3071 rc = -ENODATA;
3072 }
3073 list_del_init(&rdata->list);
3074 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3075 }
3076
3077 total_read = len - iov_iter_count(to);
3078
3079 cifs_stats_bytes_read(tcon, total_read);
3080
3081 /* mask nodata case */
3082 if (rc == -ENODATA)
3083 rc = 0;
3084
3085 if (total_read) {
3086 iocb->ki_pos += total_read;
3087 return total_read;
3088 }
3089 return rc;
3090}
3091
3092ssize_t
3093cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to)
3094{
3095 struct inode *inode = file_inode(iocb->ki_filp);
3096 struct cifsInodeInfo *cinode = CIFS_I(inode);
3097 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
3098 struct cifsFileInfo *cfile = (struct cifsFileInfo *)
3099 iocb->ki_filp->private_data;
3100 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3101 int rc = -EACCES;
3102
3103 /*
3104 * In strict cache mode we need to read from the server all the time
3105 * if we don't have level II oplock because the server can delay mtime
3106 * change - so we can't make a decision about inode invalidating.
3107 * And we can also fail with pagereading if there are mandatory locks
3108 * on pages affected by this read but not on the region from pos to
3109 * pos+len-1.
3110 */
3111 if (!CIFS_CACHE_READ(cinode))
3112 return cifs_user_readv(iocb, to);
3113
3114 if (cap_unix(tcon->ses) &&
3115 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
3116 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
3117 return generic_file_read_iter(iocb, to);
3118
3119 /*
3120 * We need to hold the sem to be sure nobody modifies lock list
3121 * with a brlock that prevents reading.
3122 */
3123 down_read(&cinode->lock_sem);
3124 if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(to),
3125 tcon->ses->server->vals->shared_lock_type,
3126 NULL, CIFS_READ_OP))
3127 rc = generic_file_read_iter(iocb, to);
3128 up_read(&cinode->lock_sem);
3129 return rc;
3130}
3131
3132static ssize_t
3133cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset)
3134{
3135 int rc = -EACCES;
3136 unsigned int bytes_read = 0;
3137 unsigned int total_read;
3138 unsigned int current_read_size;
3139 unsigned int rsize;
3140 struct cifs_sb_info *cifs_sb;
3141 struct cifs_tcon *tcon;
3142 struct TCP_Server_Info *server;
3143 unsigned int xid;
3144 char *cur_offset;
3145 struct cifsFileInfo *open_file;
3146 struct cifs_io_parms io_parms;
3147 int buf_type = CIFS_NO_BUFFER;
3148 __u32 pid;
3149
3150 xid = get_xid();
3151 cifs_sb = CIFS_FILE_SB(file);
3152
3153 /* FIXME: set up handlers for larger reads and/or convert to async */
3154 rsize = min_t(unsigned int, cifs_sb->rsize, CIFSMaxBufSize);
3155
3156 if (file->private_data == NULL) {
3157 rc = -EBADF;
3158 free_xid(xid);
3159 return rc;
3160 }
3161 open_file = file->private_data;
3162 tcon = tlink_tcon(open_file->tlink);
3163 server = tcon->ses->server;
3164
3165 if (!server->ops->sync_read) {
3166 free_xid(xid);
3167 return -ENOSYS;
3168 }
3169
3170 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3171 pid = open_file->pid;
3172 else
3173 pid = current->tgid;
3174
3175 if ((file->f_flags & O_ACCMODE) == O_WRONLY)
3176 cifs_dbg(FYI, "attempting read on write only file instance\n");
3177
3178 for (total_read = 0, cur_offset = read_data; read_size > total_read;
3179 total_read += bytes_read, cur_offset += bytes_read) {
3180 do {
3181 current_read_size = min_t(uint, read_size - total_read,
3182 rsize);
3183 /*
3184 * For windows me and 9x we do not want to request more
3185 * than it negotiated since it will refuse the read
3186 * then.
3187 */
3188 if ((tcon->ses) && !(tcon->ses->capabilities &
3189 tcon->ses->server->vals->cap_large_files)) {
3190 current_read_size = min_t(uint,
3191 current_read_size, CIFSMaxBufSize);
3192 }
3193 if (open_file->invalidHandle) {
3194 rc = cifs_reopen_file(open_file, true);
3195 if (rc != 0)
3196 break;
3197 }
3198 io_parms.pid = pid;
3199 io_parms.tcon = tcon;
3200 io_parms.offset = *offset;
3201 io_parms.length = current_read_size;
3202 rc = server->ops->sync_read(xid, &open_file->fid, &io_parms,
3203 &bytes_read, &cur_offset,
3204 &buf_type);
3205 } while (rc == -EAGAIN);
3206
3207 if (rc || (bytes_read == 0)) {
3208 if (total_read) {
3209 break;
3210 } else {
3211 free_xid(xid);
3212 return rc;
3213 }
3214 } else {
3215 cifs_stats_bytes_read(tcon, total_read);
3216 *offset += bytes_read;
3217 }
3218 }
3219 free_xid(xid);
3220 return total_read;
3221}
3222
3223/*
3224 * If the page is mmap'ed into a process' page tables, then we need to make
3225 * sure that it doesn't change while being written back.
3226 */
3227static int
3228cifs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
3229{
3230 struct page *page = vmf->page;
3231
3232 lock_page(page);
3233 return VM_FAULT_LOCKED;
3234}
3235
3236static const struct vm_operations_struct cifs_file_vm_ops = {
3237 .fault = filemap_fault,
3238 .map_pages = filemap_map_pages,
3239 .page_mkwrite = cifs_page_mkwrite,
3240};
3241
3242int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
3243{
3244 int rc, xid;
3245 struct inode *inode = file_inode(file);
3246
3247 xid = get_xid();
3248
3249 if (!CIFS_CACHE_READ(CIFS_I(inode))) {
3250 rc = cifs_zap_mapping(inode);
3251 if (rc)
3252 return rc;
3253 }
3254
3255 rc = generic_file_mmap(file, vma);
3256 if (rc == 0)
3257 vma->vm_ops = &cifs_file_vm_ops;
3258 free_xid(xid);
3259 return rc;
3260}
3261
3262int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
3263{
3264 int rc, xid;
3265
3266 xid = get_xid();
3267 rc = cifs_revalidate_file(file);
3268 if (rc) {
3269 cifs_dbg(FYI, "Validation prior to mmap failed, error=%d\n",
3270 rc);
3271 free_xid(xid);
3272 return rc;
3273 }
3274 rc = generic_file_mmap(file, vma);
3275 if (rc == 0)
3276 vma->vm_ops = &cifs_file_vm_ops;
3277 free_xid(xid);
3278 return rc;
3279}
3280
3281static void
3282cifs_readv_complete(struct work_struct *work)
3283{
3284 unsigned int i, got_bytes;
3285 struct cifs_readdata *rdata = container_of(work,
3286 struct cifs_readdata, work);
3287
3288 got_bytes = rdata->got_bytes;
3289 for (i = 0; i < rdata->nr_pages; i++) {
3290 struct page *page = rdata->pages[i];
3291
3292 lru_cache_add_file(page);
3293
3294 if (rdata->result == 0 ||
3295 (rdata->result == -EAGAIN && got_bytes)) {
3296 flush_dcache_page(page);
3297 SetPageUptodate(page);
3298 }
3299
3300 unlock_page(page);
3301
3302 if (rdata->result == 0 ||
3303 (rdata->result == -EAGAIN && got_bytes))
3304 cifs_readpage_to_fscache(rdata->mapping->host, page);
3305
3306 got_bytes -= min_t(unsigned int, PAGE_CACHE_SIZE, got_bytes);
3307
3308 page_cache_release(page);
3309 rdata->pages[i] = NULL;
3310 }
3311 kref_put(&rdata->refcount, cifs_readdata_release);
3312}
3313
3314static int
3315cifs_readpages_read_into_pages(struct TCP_Server_Info *server,
3316 struct cifs_readdata *rdata, unsigned int len)
3317{
3318 int result = 0;
3319 unsigned int i;
3320 u64 eof;
3321 pgoff_t eof_index;
3322 unsigned int nr_pages = rdata->nr_pages;
3323 struct kvec iov;
3324
3325 /* determine the eof that the server (probably) has */
3326 eof = CIFS_I(rdata->mapping->host)->server_eof;
3327 eof_index = eof ? (eof - 1) >> PAGE_CACHE_SHIFT : 0;
3328 cifs_dbg(FYI, "eof=%llu eof_index=%lu\n", eof, eof_index);
3329
3330 rdata->got_bytes = 0;
3331 rdata->tailsz = PAGE_CACHE_SIZE;
3332 for (i = 0; i < nr_pages; i++) {
3333 struct page *page = rdata->pages[i];
3334
3335 if (len >= PAGE_CACHE_SIZE) {
3336 /* enough data to fill the page */
3337 iov.iov_base = kmap(page);
3338 iov.iov_len = PAGE_CACHE_SIZE;
3339 cifs_dbg(FYI, "%u: idx=%lu iov_base=%p iov_len=%zu\n",
3340 i, page->index, iov.iov_base, iov.iov_len);
3341 len -= PAGE_CACHE_SIZE;
3342 } else if (len > 0) {
3343 /* enough for partial page, fill and zero the rest */
3344 iov.iov_base = kmap(page);
3345 iov.iov_len = len;
3346 cifs_dbg(FYI, "%u: idx=%lu iov_base=%p iov_len=%zu\n",
3347 i, page->index, iov.iov_base, iov.iov_len);
3348 memset(iov.iov_base + len,
3349 '\0', PAGE_CACHE_SIZE - len);
3350 rdata->tailsz = len;
3351 len = 0;
3352 } else if (page->index > eof_index) {
3353 /*
3354 * The VFS will not try to do readahead past the
3355 * i_size, but it's possible that we have outstanding
3356 * writes with gaps in the middle and the i_size hasn't
3357 * caught up yet. Populate those with zeroed out pages
3358 * to prevent the VFS from repeatedly attempting to
3359 * fill them until the writes are flushed.
3360 */
3361 zero_user(page, 0, PAGE_CACHE_SIZE);
3362 lru_cache_add_file(page);
3363 flush_dcache_page(page);
3364 SetPageUptodate(page);
3365 unlock_page(page);
3366 page_cache_release(page);
3367 rdata->pages[i] = NULL;
3368 rdata->nr_pages--;
3369 continue;
3370 } else {
3371 /* no need to hold page hostage */
3372 lru_cache_add_file(page);
3373 unlock_page(page);
3374 page_cache_release(page);
3375 rdata->pages[i] = NULL;
3376 rdata->nr_pages--;
3377 continue;
3378 }
3379
3380 result = cifs_readv_from_socket(server, &iov, 1, iov.iov_len);
3381 kunmap(page);
3382 if (result < 0)
3383 break;
3384
3385 rdata->got_bytes += result;
3386 }
3387
3388 return rdata->got_bytes > 0 && result != -ECONNABORTED ?
3389 rdata->got_bytes : result;
3390}
3391
3392static int
3393readpages_get_pages(struct address_space *mapping, struct list_head *page_list,
3394 unsigned int rsize, struct list_head *tmplist,
3395 unsigned int *nr_pages, loff_t *offset, unsigned int *bytes)
3396{
3397 struct page *page, *tpage;
3398 unsigned int expected_index;
3399 int rc;
3400 gfp_t gfp = mapping_gfp_constraint(mapping, GFP_KERNEL);
3401
3402 INIT_LIST_HEAD(tmplist);
3403
3404 page = list_entry(page_list->prev, struct page, lru);
3405
3406 /*
3407 * Lock the page and put it in the cache. Since no one else
3408 * should have access to this page, we're safe to simply set
3409 * PG_locked without checking it first.
3410 */
3411 __set_page_locked(page);
3412 rc = add_to_page_cache_locked(page, mapping,
3413 page->index, gfp);
3414
3415 /* give up if we can't stick it in the cache */
3416 if (rc) {
3417 __clear_page_locked(page);
3418 return rc;
3419 }
3420
3421 /* move first page to the tmplist */
3422 *offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
3423 *bytes = PAGE_CACHE_SIZE;
3424 *nr_pages = 1;
3425 list_move_tail(&page->lru, tmplist);
3426
3427 /* now try and add more pages onto the request */
3428 expected_index = page->index + 1;
3429 list_for_each_entry_safe_reverse(page, tpage, page_list, lru) {
3430 /* discontinuity ? */
3431 if (page->index != expected_index)
3432 break;
3433
3434 /* would this page push the read over the rsize? */
3435 if (*bytes + PAGE_CACHE_SIZE > rsize)
3436 break;
3437
3438 __set_page_locked(page);
3439 if (add_to_page_cache_locked(page, mapping, page->index, gfp)) {
3440 __clear_page_locked(page);
3441 break;
3442 }
3443 list_move_tail(&page->lru, tmplist);
3444 (*bytes) += PAGE_CACHE_SIZE;
3445 expected_index++;
3446 (*nr_pages)++;
3447 }
3448 return rc;
3449}
3450
3451static int cifs_readpages(struct file *file, struct address_space *mapping,
3452 struct list_head *page_list, unsigned num_pages)
3453{
3454 int rc;
3455 struct list_head tmplist;
3456 struct cifsFileInfo *open_file = file->private_data;
3457 struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
3458 struct TCP_Server_Info *server;
3459 pid_t pid;
3460
3461 /*
3462 * Reads as many pages as possible from fscache. Returns -ENOBUFS
3463 * immediately if the cookie is negative
3464 *
3465 * After this point, every page in the list might have PG_fscache set,
3466 * so we will need to clean that up off of every page we don't use.
3467 */
3468 rc = cifs_readpages_from_fscache(mapping->host, mapping, page_list,
3469 &num_pages);
3470 if (rc == 0)
3471 return rc;
3472
3473 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3474 pid = open_file->pid;
3475 else
3476 pid = current->tgid;
3477
3478 rc = 0;
3479 server = tlink_tcon(open_file->tlink)->ses->server;
3480
3481 cifs_dbg(FYI, "%s: file=%p mapping=%p num_pages=%u\n",
3482 __func__, file, mapping, num_pages);
3483
3484 /*
3485 * Start with the page at end of list and move it to private
3486 * list. Do the same with any following pages until we hit
3487 * the rsize limit, hit an index discontinuity, or run out of
3488 * pages. Issue the async read and then start the loop again
3489 * until the list is empty.
3490 *
3491 * Note that list order is important. The page_list is in
3492 * the order of declining indexes. When we put the pages in
3493 * the rdata->pages, then we want them in increasing order.
3494 */
3495 while (!list_empty(page_list)) {
3496 unsigned int i, nr_pages, bytes, rsize;
3497 loff_t offset;
3498 struct page *page, *tpage;
3499 struct cifs_readdata *rdata;
3500 unsigned credits;
3501
3502 rc = server->ops->wait_mtu_credits(server, cifs_sb->rsize,
3503 &rsize, &credits);
3504 if (rc)
3505 break;
3506
3507 /*
3508 * Give up immediately if rsize is too small to read an entire
3509 * page. The VFS will fall back to readpage. We should never
3510 * reach this point however since we set ra_pages to 0 when the
3511 * rsize is smaller than a cache page.
3512 */
3513 if (unlikely(rsize < PAGE_CACHE_SIZE)) {
3514 add_credits_and_wake_if(server, credits, 0);
3515 return 0;
3516 }
3517
3518 rc = readpages_get_pages(mapping, page_list, rsize, &tmplist,
3519 &nr_pages, &offset, &bytes);
3520 if (rc) {
3521 add_credits_and_wake_if(server, credits, 0);
3522 break;
3523 }
3524
3525 rdata = cifs_readdata_alloc(nr_pages, cifs_readv_complete);
3526 if (!rdata) {
3527 /* best to give up if we're out of mem */
3528 list_for_each_entry_safe(page, tpage, &tmplist, lru) {
3529 list_del(&page->lru);
3530 lru_cache_add_file(page);
3531 unlock_page(page);
3532 page_cache_release(page);
3533 }
3534 rc = -ENOMEM;
3535 add_credits_and_wake_if(server, credits, 0);
3536 break;
3537 }
3538
3539 rdata->cfile = cifsFileInfo_get(open_file);
3540 rdata->mapping = mapping;
3541 rdata->offset = offset;
3542 rdata->bytes = bytes;
3543 rdata->pid = pid;
3544 rdata->pagesz = PAGE_CACHE_SIZE;
3545 rdata->read_into_pages = cifs_readpages_read_into_pages;
3546 rdata->credits = credits;
3547
3548 list_for_each_entry_safe(page, tpage, &tmplist, lru) {
3549 list_del(&page->lru);
3550 rdata->pages[rdata->nr_pages++] = page;
3551 }
3552
3553 if (!rdata->cfile->invalidHandle ||
3554 !(rc = cifs_reopen_file(rdata->cfile, true)))
3555 rc = server->ops->async_readv(rdata);
3556 if (rc) {
3557 add_credits_and_wake_if(server, rdata->credits, 0);
3558 for (i = 0; i < rdata->nr_pages; i++) {
3559 page = rdata->pages[i];
3560 lru_cache_add_file(page);
3561 unlock_page(page);
3562 page_cache_release(page);
3563 }
3564 /* Fallback to the readpage in error/reconnect cases */
3565 kref_put(&rdata->refcount, cifs_readdata_release);
3566 break;
3567 }
3568
3569 kref_put(&rdata->refcount, cifs_readdata_release);
3570 }
3571
3572 /* Any pages that have been shown to fscache but didn't get added to
3573 * the pagecache must be uncached before they get returned to the
3574 * allocator.
3575 */
3576 cifs_fscache_readpages_cancel(mapping->host, page_list);
3577 return rc;
3578}
3579
3580/*
3581 * cifs_readpage_worker must be called with the page pinned
3582 */
3583static int cifs_readpage_worker(struct file *file, struct page *page,
3584 loff_t *poffset)
3585{
3586 char *read_data;
3587 int rc;
3588
3589 /* Is the page cached? */
3590 rc = cifs_readpage_from_fscache(file_inode(file), page);
3591 if (rc == 0)
3592 goto read_complete;
3593
3594 read_data = kmap(page);
3595 /* for reads over a certain size could initiate async read ahead */
3596
3597 rc = cifs_read(file, read_data, PAGE_CACHE_SIZE, poffset);
3598
3599 if (rc < 0)
3600 goto io_error;
3601 else
3602 cifs_dbg(FYI, "Bytes read %d\n", rc);
3603
3604 file_inode(file)->i_atime =
3605 current_fs_time(file_inode(file)->i_sb);
3606
3607 if (PAGE_CACHE_SIZE > rc)
3608 memset(read_data + rc, 0, PAGE_CACHE_SIZE - rc);
3609
3610 flush_dcache_page(page);
3611 SetPageUptodate(page);
3612
3613 /* send this page to the cache */
3614 cifs_readpage_to_fscache(file_inode(file), page);
3615
3616 rc = 0;
3617
3618io_error:
3619 kunmap(page);
3620 unlock_page(page);
3621
3622read_complete:
3623 return rc;
3624}
3625
3626static int cifs_readpage(struct file *file, struct page *page)
3627{
3628 loff_t offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
3629 int rc = -EACCES;
3630 unsigned int xid;
3631
3632 xid = get_xid();
3633
3634 if (file->private_data == NULL) {
3635 rc = -EBADF;
3636 free_xid(xid);
3637 return rc;
3638 }
3639
3640 cifs_dbg(FYI, "readpage %p at offset %d 0x%x\n",
3641 page, (int)offset, (int)offset);
3642
3643 rc = cifs_readpage_worker(file, page, &offset);
3644
3645 free_xid(xid);
3646 return rc;
3647}
3648
3649static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
3650{
3651 struct cifsFileInfo *open_file;
3652 struct cifs_tcon *tcon =
3653 cifs_sb_master_tcon(CIFS_SB(cifs_inode->vfs_inode.i_sb));
3654
3655 spin_lock(&tcon->open_file_lock);
3656 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
3657 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
3658 spin_unlock(&tcon->open_file_lock);
3659 return 1;
3660 }
3661 }
3662 spin_unlock(&tcon->open_file_lock);
3663 return 0;
3664}
3665
3666/* We do not want to update the file size from server for inodes
3667 open for write - to avoid races with writepage extending
3668 the file - in the future we could consider allowing
3669 refreshing the inode only on increases in the file size
3670 but this is tricky to do without racing with writebehind
3671 page caching in the current Linux kernel design */
3672bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file)
3673{
3674 if (!cifsInode)
3675 return true;
3676
3677 if (is_inode_writable(cifsInode)) {
3678 /* This inode is open for write at least once */
3679 struct cifs_sb_info *cifs_sb;
3680
3681 cifs_sb = CIFS_SB(cifsInode->vfs_inode.i_sb);
3682 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
3683 /* since no page cache to corrupt on directio
3684 we can change size safely */
3685 return true;
3686 }
3687
3688 if (i_size_read(&cifsInode->vfs_inode) < end_of_file)
3689 return true;
3690
3691 return false;
3692 } else
3693 return true;
3694}
3695
3696static int cifs_write_begin(struct file *file, struct address_space *mapping,
3697 loff_t pos, unsigned len, unsigned flags,
3698 struct page **pagep, void **fsdata)
3699{
3700 int oncethru = 0;
3701 pgoff_t index = pos >> PAGE_CACHE_SHIFT;
3702 loff_t offset = pos & (PAGE_CACHE_SIZE - 1);
3703 loff_t page_start = pos & PAGE_MASK;
3704 loff_t i_size;
3705 struct page *page;
3706 int rc = 0;
3707
3708 cifs_dbg(FYI, "write_begin from %lld len %d\n", (long long)pos, len);
3709
3710start:
3711 page = grab_cache_page_write_begin(mapping, index, flags);
3712 if (!page) {
3713 rc = -ENOMEM;
3714 goto out;
3715 }
3716
3717 if (PageUptodate(page))
3718 goto out;
3719
3720 /*
3721 * If we write a full page it will be up to date, no need to read from
3722 * the server. If the write is short, we'll end up doing a sync write
3723 * instead.
3724 */
3725 if (len == PAGE_CACHE_SIZE)
3726 goto out;
3727
3728 /*
3729 * optimize away the read when we have an oplock, and we're not
3730 * expecting to use any of the data we'd be reading in. That
3731 * is, when the page lies beyond the EOF, or straddles the EOF
3732 * and the write will cover all of the existing data.
3733 */
3734 if (CIFS_CACHE_READ(CIFS_I(mapping->host))) {
3735 i_size = i_size_read(mapping->host);
3736 if (page_start >= i_size ||
3737 (offset == 0 && (pos + len) >= i_size)) {
3738 zero_user_segments(page, 0, offset,
3739 offset + len,
3740 PAGE_CACHE_SIZE);
3741 /*
3742 * PageChecked means that the parts of the page
3743 * to which we're not writing are considered up
3744 * to date. Once the data is copied to the
3745 * page, it can be set uptodate.
3746 */
3747 SetPageChecked(page);
3748 goto out;
3749 }
3750 }
3751
3752 if ((file->f_flags & O_ACCMODE) != O_WRONLY && !oncethru) {
3753 /*
3754 * might as well read a page, it is fast enough. If we get
3755 * an error, we don't need to return it. cifs_write_end will
3756 * do a sync write instead since PG_uptodate isn't set.
3757 */
3758 cifs_readpage_worker(file, page, &page_start);
3759 page_cache_release(page);
3760 oncethru = 1;
3761 goto start;
3762 } else {
3763 /* we could try using another file handle if there is one -
3764 but how would we lock it to prevent close of that handle
3765 racing with this read? In any case
3766 this will be written out by write_end so is fine */
3767 }
3768out:
3769 *pagep = page;
3770 return rc;
3771}
3772
3773static int cifs_release_page(struct page *page, gfp_t gfp)
3774{
3775 if (PagePrivate(page))
3776 return 0;
3777
3778 return cifs_fscache_release_page(page, gfp);
3779}
3780
3781static void cifs_invalidate_page(struct page *page, unsigned int offset,
3782 unsigned int length)
3783{
3784 struct cifsInodeInfo *cifsi = CIFS_I(page->mapping->host);
3785
3786 if (offset == 0 && length == PAGE_CACHE_SIZE)
3787 cifs_fscache_invalidate_page(page, &cifsi->vfs_inode);
3788}
3789
3790static int cifs_launder_page(struct page *page)
3791{
3792 int rc = 0;
3793 loff_t range_start = page_offset(page);
3794 loff_t range_end = range_start + (loff_t)(PAGE_CACHE_SIZE - 1);
3795 struct writeback_control wbc = {
3796 .sync_mode = WB_SYNC_ALL,
3797 .nr_to_write = 0,
3798 .range_start = range_start,
3799 .range_end = range_end,
3800 };
3801
3802 cifs_dbg(FYI, "Launder page: %p\n", page);
3803
3804 if (clear_page_dirty_for_io(page))
3805 rc = cifs_writepage_locked(page, &wbc);
3806
3807 cifs_fscache_invalidate_page(page, page->mapping->host);
3808 return rc;
3809}
3810
3811void cifs_oplock_break(struct work_struct *work)
3812{
3813 struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
3814 oplock_break);
3815 struct inode *inode = d_inode(cfile->dentry);
3816 struct cifsInodeInfo *cinode = CIFS_I(inode);
3817 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3818 struct TCP_Server_Info *server = tcon->ses->server;
3819 int rc = 0;
3820
3821 wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS,
3822 TASK_UNINTERRUPTIBLE);
3823
3824 server->ops->downgrade_oplock(server, cinode,
3825 test_bit(CIFS_INODE_DOWNGRADE_OPLOCK_TO_L2, &cinode->flags));
3826
3827 if (!CIFS_CACHE_WRITE(cinode) && CIFS_CACHE_READ(cinode) &&
3828 cifs_has_mand_locks(cinode)) {
3829 cifs_dbg(FYI, "Reset oplock to None for inode=%p due to mand locks\n",
3830 inode);
3831 cinode->oplock = 0;
3832 }
3833
3834 if (inode && S_ISREG(inode->i_mode)) {
3835 if (CIFS_CACHE_READ(cinode))
3836 break_lease(inode, O_RDONLY);
3837 else
3838 break_lease(inode, O_WRONLY);
3839 rc = filemap_fdatawrite(inode->i_mapping);
3840 if (!CIFS_CACHE_READ(cinode)) {
3841 rc = filemap_fdatawait(inode->i_mapping);
3842 mapping_set_error(inode->i_mapping, rc);
3843 cifs_zap_mapping(inode);
3844 }
3845 cifs_dbg(FYI, "Oplock flush inode %p rc %d\n", inode, rc);
3846 }
3847
3848 rc = cifs_push_locks(cfile);
3849 if (rc)
3850 cifs_dbg(VFS, "Push locks rc = %d\n", rc);
3851
3852 /*
3853 * releasing stale oplock after recent reconnect of smb session using
3854 * a now incorrect file handle is not a data integrity issue but do
3855 * not bother sending an oplock release if session to server still is
3856 * disconnected since oplock already released by the server
3857 */
3858 if (!cfile->oplock_break_cancelled) {
3859 rc = tcon->ses->server->ops->oplock_response(tcon, &cfile->fid,
3860 cinode);
3861 cifs_dbg(FYI, "Oplock release rc = %d\n", rc);
3862 }
3863 cifs_done_oplock_break(cinode);
3864}
3865
3866/*
3867 * The presence of cifs_direct_io() in the address space ops vector
3868 * allowes open() O_DIRECT flags which would have failed otherwise.
3869 *
3870 * In the non-cached mode (mount with cache=none), we shunt off direct read and write requests
3871 * so this method should never be called.
3872 *
3873 * Direct IO is not yet supported in the cached mode.
3874 */
3875static ssize_t
3876cifs_direct_io(struct kiocb *iocb, struct iov_iter *iter, loff_t pos)
3877{
3878 /*
3879 * FIXME
3880 * Eventually need to support direct IO for non forcedirectio mounts
3881 */
3882 return -EINVAL;
3883}
3884
3885
3886const struct address_space_operations cifs_addr_ops = {
3887 .readpage = cifs_readpage,
3888 .readpages = cifs_readpages,
3889 .writepage = cifs_writepage,
3890 .writepages = cifs_writepages,
3891 .write_begin = cifs_write_begin,
3892 .write_end = cifs_write_end,
3893 .set_page_dirty = __set_page_dirty_nobuffers,
3894 .releasepage = cifs_release_page,
3895 .direct_IO = cifs_direct_io,
3896 .invalidatepage = cifs_invalidate_page,
3897 .launder_page = cifs_launder_page,
3898};
3899
3900/*
3901 * cifs_readpages requires the server to support a buffer large enough to
3902 * contain the header plus one complete page of data. Otherwise, we need
3903 * to leave cifs_readpages out of the address space operations.
3904 */
3905const struct address_space_operations cifs_addr_ops_smallbuf = {
3906 .readpage = cifs_readpage,
3907 .writepage = cifs_writepage,
3908 .writepages = cifs_writepages,
3909 .write_begin = cifs_write_begin,
3910 .write_end = cifs_write_end,
3911 .set_page_dirty = __set_page_dirty_nobuffers,
3912 .releasepage = cifs_release_page,
3913 .invalidatepage = cifs_invalidate_page,
3914 .launder_page = cifs_launder_page,
3915};