Bug Summary

File:ufs/ffs/ffs_softdep.c
Warning:line 4531, column 7
Value stored to 'error' is never read

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple amd64-unknown-openbsd7.4 -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name ffs_softdep.c -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -mrelocation-model static -mframe-pointer=all -relaxed-aliasing -ffp-contract=on -fno-rounding-math -mconstructor-aliases -ffreestanding -mcmodel=kernel -target-cpu x86-64 -target-feature +retpoline-indirect-calls -target-feature +retpoline-indirect-branches -target-feature -sse2 -target-feature -sse -target-feature -3dnow -target-feature -mmx -target-feature +save-args -target-feature +retpoline-external-thunk -disable-red-zone -no-implicit-float -tune-cpu generic -debugger-tuning=gdb -fcoverage-compilation-dir=/usr/src/sys/arch/amd64/compile/GENERIC.MP/obj -nostdsysteminc -nobuiltininc -resource-dir /usr/local/llvm16/lib/clang/16 -I /usr/src/sys -I /usr/src/sys/arch/amd64/compile/GENERIC.MP/obj -I /usr/src/sys/arch -I /usr/src/sys/dev/pci/drm/include -I /usr/src/sys/dev/pci/drm/include/uapi -I /usr/src/sys/dev/pci/drm/amd/include/asic_reg -I /usr/src/sys/dev/pci/drm/amd/include -I /usr/src/sys/dev/pci/drm/amd/amdgpu -I /usr/src/sys/dev/pci/drm/amd/display -I /usr/src/sys/dev/pci/drm/amd/display/include -I /usr/src/sys/dev/pci/drm/amd/display/dc -I /usr/src/sys/dev/pci/drm/amd/display/amdgpu_dm -I /usr/src/sys/dev/pci/drm/amd/pm/inc -I /usr/src/sys/dev/pci/drm/amd/pm/legacy-dpm -I /usr/src/sys/dev/pci/drm/amd/pm/swsmu -I /usr/src/sys/dev/pci/drm/amd/pm/swsmu/inc -I /usr/src/sys/dev/pci/drm/amd/pm/swsmu/smu11 -I /usr/src/sys/dev/pci/drm/amd/pm/swsmu/smu12 -I /usr/src/sys/dev/pci/drm/amd/pm/swsmu/smu13 -I /usr/src/sys/dev/pci/drm/amd/pm/powerplay/inc -I /usr/src/sys/dev/pci/drm/amd/pm/powerplay/hwmgr -I /usr/src/sys/dev/pci/drm/amd/pm/powerplay/smumgr -I /usr/src/sys/dev/pci/drm/amd/pm/swsmu/inc -I /usr/src/sys/dev/pci/drm/amd/pm/swsmu/inc/pmfw_if -I /usr/src/sys/dev/pci/drm/amd/display/dc/inc -I /usr/src/sys/dev/pci/drm/amd/display/dc/inc/hw -I /usr/src/sys/dev/pci/drm/amd/display/dc/clk_mgr -I /usr/src/sys/dev/pci/drm/amd/display/modules/inc -I /usr/src/sys/dev/pci/drm/amd/display/modules/hdcp -I /usr/src/sys/dev/pci/drm/amd/display/dmub/inc -I /usr/src/sys/dev/pci/drm/i915 -D DDB -D DIAGNOSTIC -D KTRACE -D ACCOUNTING -D KMEMSTATS -D PTRACE -D POOL_DEBUG -D CRYPTO -D SYSVMSG -D SYSVSEM -D SYSVSHM -D UVM_SWAP_ENCRYPT -D FFS -D FFS2 -D FFS_SOFTUPDATES -D UFS_DIRHASH -D QUOTA -D EXT2FS -D MFS -D NFSCLIENT -D NFSSERVER -D CD9660 -D UDF -D MSDOSFS -D FIFO -D FUSE -D SOCKET_SPLICE -D TCP_ECN -D TCP_SIGNATURE -D INET6 -D IPSEC -D PPP_BSDCOMP -D PPP_DEFLATE -D PIPEX -D MROUTING -D MPLS -D BOOT_CONFIG -D USER_PCICONF -D APERTURE -D MTRR -D NTFS -D SUSPEND -D HIBERNATE -D PCIVERBOSE -D USBVERBOSE -D WSDISPLAY_COMPAT_USL -D WSDISPLAY_COMPAT_RAWKBD -D WSDISPLAY_DEFAULTSCREENS=6 -D X86EMU -D ONEWIREVERBOSE -D MULTIPROCESSOR -D MAXUSERS=80 -D _KERNEL -O2 -Wno-pointer-sign -Wno-address-of-packed-member -Wno-constant-conversion -Wno-unused-but-set-variable -Wno-gnu-folding-constant -fdebug-compilation-dir=/usr/src/sys/arch/amd64/compile/GENERIC.MP/obj -ferror-limit 19 -fwrapv -D_RET_PROTECTOR -ret-protector -fcf-protection=branch -fgnuc-version=4.2.1 -vectorize-loops -vectorize-slp -fno-builtin-malloc -fno-builtin-calloc -fno-builtin-realloc -fno-builtin-valloc -fno-builtin-free -fno-builtin-strdup -fno-builtin-strndup -analyzer-output=html -faddrsig -o /home/ben/Projects/scan/2024-01-11-110808-61670-1 -x c /usr/src/sys/ufs/ffs/ffs_softdep.c
1/* $OpenBSD: ffs_softdep.c,v 1.152 2023/07/05 15:13:28 beck Exp $ */
2
3/*
4 * Copyright 1998, 2000 Marshall Kirk McKusick. All Rights Reserved.
5 *
6 * The soft updates code is derived from the appendix of a University
7 * of Michigan technical report (Gregory R. Ganger and Yale N. Patt,
8 * "Soft Updates: A Solution to the Metadata Update Problem in File
9 * Systems", CSE-TR-254-95, August 1995).
10 *
11 * Further information about soft updates can be obtained from:
12 *
13 * Marshall Kirk McKusick http://www.mckusick.com/softdep/
14 * 1614 Oxford Street mckusick@mckusick.com
15 * Berkeley, CA 94709-1608 +1-510-843-9542
16 * USA
17 *
18 * Redistribution and use in source and binary forms, with or without
19 * modification, are permitted provided that the following conditions
20 * are met:
21 *
22 * 1. Redistributions of source code must retain the above copyright
23 * notice, this list of conditions and the following disclaimer.
24 * 2. Redistributions in binary form must reproduce the above copyright
25 * notice, this list of conditions and the following disclaimer in the
26 * documentation and/or other materials provided with the distribution.
27 *
28 * THIS SOFTWARE IS PROVIDED BY MARSHALL KIRK MCKUSICK ``AS IS'' AND ANY
29 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
30 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
31 * DISCLAIMED. IN NO EVENT SHALL MARSHALL KIRK MCKUSICK BE LIABLE FOR
32 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
33 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
34 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
35 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
36 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
37 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
38 * SUCH DAMAGE.
39 *
40 * from: @(#)ffs_softdep.c 9.59 (McKusick) 6/21/00
41 * $FreeBSD: src/sys/ufs/ffs/ffs_softdep.c,v 1.86 2001/02/04 16:08:18 phk Exp $
42 */
43
44#include <sys/param.h>
45#include <sys/buf.h>
46#include <sys/kernel.h>
47#include <sys/malloc.h>
48#include <sys/mount.h>
49#include <sys/proc.h>
50#include <sys/pool.h>
51#include <sys/syslog.h>
52#include <sys/systm.h>
53#include <sys/vnode.h>
54#include <sys/specdev.h>
55#include <crypto/siphash.h>
56#include <ufs/ufs/dir.h>
57#include <ufs/ufs/quota.h>
58#include <ufs/ufs/inode.h>
59#include <ufs/ufs/ufsmount.h>
60#include <ufs/ffs/fs.h>
61#include <ufs/ffs/softdep.h>
62#include <ufs/ffs/ffs_extern.h>
63#include <ufs/ufs/ufs_extern.h>
64
65#define STATIC
66
67/*
68 * Mapping of dependency structure types to malloc types.
69 */
70#define D_PAGEDEP0 0
71#define D_INODEDEP1 1
72#define D_NEWBLK2 2
73#define D_BMSAFEMAP3 3
74#define D_ALLOCDIRECT4 4
75#define D_INDIRDEP5 5
76#define D_ALLOCINDIR6 6
77#define D_FREEFRAG7 7
78#define D_FREEBLKS8 8
79#define D_FREEFILE9 9
80#define D_DIRADD10 10
81#define D_MKDIR11 11
82#define D_DIRREM12 12
83#define D_NEWDIRBLK13 13
84#define D_LAST13 13
85/*
86 * Names of softdep types.
87 */
88const char *softdep_typenames[] = {
89 "pagedep",
90 "inodedep",
91 "newblk",
92 "bmsafemap",
93 "allocdirect",
94 "indirdep",
95 "allocindir",
96 "freefrag",
97 "freeblks",
98 "freefile",
99 "diradd",
100 "mkdir",
101 "dirrem",
102 "newdirblk",
103};
104#define TYPENAME(type)((unsigned)(type) <= 13 ? softdep_typenames[type] : "???") \
105 ((unsigned)(type) <= D_LAST13 ? softdep_typenames[type] : "???")
106/*
107 * Finding the current process.
108 */
109#define CURPROC({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc
curproc({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc
110/*
111 * End system adaptation definitions.
112 */
113
114/*
115 * Internal function prototypes.
116 */
117STATIC void softdep_error(char *, int);
118STATIC void drain_output(struct vnode *, int);
119STATIC int getdirtybuf(struct buf *, int);
120STATIC void clear_remove(struct proc *);
121STATIC void clear_inodedeps(struct proc *);
122STATIC int flush_pagedep_deps(struct vnode *, struct mount *,
123 struct diraddhd *);
124STATIC int flush_inodedep_deps(struct fs *, ufsino_t);
125STATIC int handle_written_filepage(struct pagedep *, struct buf *);
126STATIC void diradd_inode_written(struct diradd *, struct inodedep *);
127STATIC int handle_written_inodeblock(struct inodedep *, struct buf *);
128STATIC void handle_allocdirect_partdone(struct allocdirect *);
129STATIC void handle_allocindir_partdone(struct allocindir *);
130STATIC void initiate_write_filepage(struct pagedep *, struct buf *);
131STATIC void handle_written_mkdir(struct mkdir *, int);
132STATIC void initiate_write_inodeblock_ufs1(struct inodedep *, struct buf *);
133#ifdef FFS21
134STATIC void initiate_write_inodeblock_ufs2(struct inodedep *, struct buf *);
135#endif
136STATIC void handle_workitem_freefile(struct freefile *);
137STATIC void handle_workitem_remove(struct dirrem *);
138STATIC struct dirrem *newdirrem(struct buf *, struct inode *,
139 struct inode *, int, struct dirrem **);
140STATIC void free_diradd(struct diradd *);
141STATIC void free_allocindir(struct allocindir *, struct inodedep *);
142STATIC void free_newdirblk(struct newdirblk *);
143STATIC int indir_trunc(struct inode *, daddr_t, int, daddr_t, long *);
144STATIC void deallocate_dependencies(struct buf *, struct inodedep *);
145STATIC void free_allocdirect(struct allocdirectlst *,
146 struct allocdirect *, int);
147STATIC int check_inode_unwritten(struct inodedep *);
148STATIC int free_inodedep(struct inodedep *);
149STATIC void handle_workitem_freeblocks(struct freeblks *);
150STATIC void merge_inode_lists(struct inodedep *);
151STATIC void setup_allocindir_phase2(struct buf *, struct inode *,
152 struct allocindir *);
153STATIC struct allocindir *newallocindir(struct inode *, int, daddr_t,
154 daddr_t);
155STATIC void handle_workitem_freefrag(struct freefrag *);
156STATIC struct freefrag *newfreefrag(struct inode *, daddr_t, long);
157STATIC void allocdirect_merge(struct allocdirectlst *,
158 struct allocdirect *, struct allocdirect *);
159STATIC struct bmsafemap *bmsafemap_lookup(struct buf *);
160STATIC int newblk_lookup(struct fs *, daddr_t, int,
161 struct newblk **);
162STATIC int inodedep_lookup(struct fs *, ufsino_t, int, struct inodedep **);
163STATIC int pagedep_lookup(struct inode *, daddr_t, int, struct pagedep **);
164STATIC void pause_timer(void *);
165STATIC int request_cleanup(int, int);
166STATIC int process_worklist_item(struct mount *, int *, int);
167STATIC void add_to_worklist(struct worklist *);
168
169/*
170 * Exported softdep operations.
171 */
172void softdep_disk_io_initiation(struct buf *);
173void softdep_disk_write_complete(struct buf *);
174void softdep_deallocate_dependencies(struct buf *);
175void softdep_move_dependencies(struct buf *, struct buf *);
176int softdep_count_dependencies(struct buf *bp, int, int);
177
178/*
179 * Locking primitives.
180 *
181 * For a uniprocessor, all we need to do is protect against disk
182 * interrupts. For a multiprocessor, this lock would have to be
183 * a mutex. A single mutex is used throughout this file, though
184 * finer grain locking could be used if contention warranted it.
185 *
186 * For a multiprocessor, the sleep call would accept a lock and
187 * release it after the sleep processing was complete. In a uniprocessor
188 * implementation there is no such interlock, so we simple mark
189 * the places where it needs to be done with the `interlocked' form
190 * of the lock calls. Since the uniprocessor sleep already interlocks
191 * the spl, there is nothing that really needs to be done.
192 */
193#ifndef /* NOT */ DEBUG
194STATIC struct lockit {
195 int lkt_spl;
196} lk = { 0 };
197#define ACQUIRE_LOCK(lk)(lk)->lkt_spl = splraise(0x3) (lk)->lkt_spl = splbio()splraise(0x3)
198#define FREE_LOCK(lk)spllower((lk)->lkt_spl) splx((lk)->lkt_spl)spllower((lk)->lkt_spl)
199#define ACQUIRE_LOCK_INTERLOCKED(lk,s)(lk)->lkt_spl = (s) (lk)->lkt_spl = (s)
200#define FREE_LOCK_INTERLOCKED(lk)((lk)->lkt_spl) ((lk)->lkt_spl)
201
202#else /* DEBUG */
203STATIC struct lockit {
204 int lkt_spl;
205 pid_t lkt_held;
206 int lkt_line;
207} lk = { 0, -1 };
208STATIC int lockcnt;
209
210STATIC void acquire_lock(struct lockit *, int);
211STATIC void free_lock(struct lockit *, int);
212STATIC void acquire_lock_interlocked(struct lockit *, int, int);
213STATIC int free_lock_interlocked(struct lockit *, int);
214
215#define ACQUIRE_LOCK(lk)(lk)->lkt_spl = splraise(0x3) acquire_lock(lk, __LINE__215)
216#define FREE_LOCK(lk)spllower((lk)->lkt_spl) free_lock(lk, __LINE__216)
217#define ACQUIRE_LOCK_INTERLOCKED(lk,s)(lk)->lkt_spl = (s) acquire_lock_interlocked(lk, (s), __LINE__217)
218#define FREE_LOCK_INTERLOCKED(lk)((lk)->lkt_spl) free_lock_interlocked(lk, __LINE__218)
219
220STATIC void
221acquire_lock(struct lockit *lk, int line)
222{
223 pid_t holder;
224 int original_line;
225
226 if (lk->lkt_held != -1) {
227 holder = lk->lkt_held;
228 original_line = lk->lkt_line;
229 FREE_LOCK(lk)spllower((lk)->lkt_spl);
230 if (holder == CURPROC({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc
->p_tid)
231 panic("softdep_lock: locking against myself, acquired at line %d, relocked at line %d", original_line, line);
232 else
233 panic("softdep_lock: lock held by %d, acquired at line %d, relocked at line %d", holder, original_line, line);
234 }
235 lk->lkt_spl = splbio()splraise(0x3);
236 lk->lkt_held = CURPROC({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc
->p_tid;
237 lk->lkt_line = line;
238 lockcnt++;
239}
240
241STATIC void
242free_lock(struct lockit *lk, int line)
243{
244
245 if (lk->lkt_held == -1)
246 panic("softdep_unlock: lock not held at line %d", line);
247 lk->lkt_held = -1;
248 splx(lk->lkt_spl)spllower(lk->lkt_spl);
249}
250
251STATIC void
252acquire_lock_interlocked(struct lockit *lk, int s, int line)
253{
254 pid_t holder;
255 int original_line;
256
257 if (lk->lkt_held != -1) {
258 holder = lk->lkt_held;
259 original_line = lk->lkt_line;
260 FREE_LOCK_INTERLOCKED(lk)((lk)->lkt_spl);
261 if (holder == CURPROC({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc
->p_tid)
262 panic("softdep_lock: locking against myself, acquired at line %d, relocked at line %d", original_line, line);
263 else
264 panic("softdep_lock: lock held by %d, acquired at line %d, relocked at line %d", holder, original_line, line);
265 }
266 lk->lkt_held = CURPROC({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc
->p_tid;
267 lk->lkt_line = line;
268 lk->lkt_spl = s;
269 lockcnt++;
270}
271
272STATIC int
273free_lock_interlocked(struct lockit *lk, int line)
274{
275
276 if (lk->lkt_held == -1)
277 panic("softdep_unlock_interlocked: lock not held at line %d", line);
278 lk->lkt_held = -1;
279
280 return (lk->lkt_spl);
281}
282#endif /* DEBUG */
283
284/*
285 * Place holder for real semaphores.
286 */
287struct sema {
288 int value;
289 pid_t holder;
290 char *name;
291 int prio;
292};
293STATIC void sema_init(struct sema *, char *, int);
294STATIC int sema_get(struct sema *, struct lockit *);
295STATIC void sema_release(struct sema *);
296
297STATIC void
298sema_init(struct sema *semap, char *name, int prio)
299{
300
301 semap->holder = -1;
302 semap->value = 0;
303 semap->name = name;
304 semap->prio = prio;
305}
306
307STATIC int
308sema_get(struct sema *semap, struct lockit *interlock)
309{
310 int s;
311
312 if (semap->value++ > 0) {
313 if (interlock != NULL((void *)0))
314 s = FREE_LOCK_INTERLOCKED(interlock)((interlock)->lkt_spl);
315 tsleep_nsec(semap, semap->prio, semap->name, INFSLP0xffffffffffffffffULL);
316 if (interlock != NULL((void *)0)) {
317 ACQUIRE_LOCK_INTERLOCKED(interlock, s)(interlock)->lkt_spl = (s);
318 FREE_LOCK(interlock)spllower((interlock)->lkt_spl);
319 }
320 return (0);
321 }
322 semap->holder = CURPROC({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc
->p_tid;
323 if (interlock != NULL((void *)0))
324 FREE_LOCK(interlock)spllower((interlock)->lkt_spl);
325 return (1);
326}
327
328STATIC void
329sema_release(struct sema *semap)
330{
331
332 if (semap->value <= 0 || semap->holder != CURPROC({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc
->p_tid) {
333#ifdef DEBUG
334 if (lk.lkt_held != -1)
335 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
336#endif
337 panic("sema_release: not held");
338 }
339 if (--semap->value > 0) {
340 semap->value = 0;
341 wakeup(semap);
342 }
343 semap->holder = -1;
344}
345
346/*
347 * Memory management.
348 */
349STATIC struct pool pagedep_pool;
350STATIC struct pool inodedep_pool;
351STATIC struct pool newblk_pool;
352STATIC struct pool bmsafemap_pool;
353STATIC struct pool allocdirect_pool;
354STATIC struct pool indirdep_pool;
355STATIC struct pool allocindir_pool;
356STATIC struct pool freefrag_pool;
357STATIC struct pool freeblks_pool;
358STATIC struct pool freefile_pool;
359STATIC struct pool diradd_pool;
360STATIC struct pool mkdir_pool;
361STATIC struct pool dirrem_pool;
362STATIC struct pool newdirblk_pool;
363
364static __inline void
365softdep_free(struct worklist *item, int type)
366{
367
368 switch (type) {
369 case D_PAGEDEP0:
370 pool_put(&pagedep_pool, item);
371 break;
372
373 case D_INODEDEP1:
374 pool_put(&inodedep_pool, item);
375 break;
376
377 case D_BMSAFEMAP3:
378 pool_put(&bmsafemap_pool, item);
379 break;
380
381 case D_ALLOCDIRECT4:
382 pool_put(&allocdirect_pool, item);
383 break;
384
385 case D_INDIRDEP5:
386 pool_put(&indirdep_pool, item);
387 break;
388
389 case D_ALLOCINDIR6:
390 pool_put(&allocindir_pool, item);
391 break;
392
393 case D_FREEFRAG7:
394 pool_put(&freefrag_pool, item);
395 break;
396
397 case D_FREEBLKS8:
398 pool_put(&freeblks_pool, item);
399 break;
400
401 case D_FREEFILE9:
402 pool_put(&freefile_pool, item);
403 break;
404
405 case D_DIRADD10:
406 pool_put(&diradd_pool, item);
407 break;
408
409 case D_MKDIR11:
410 pool_put(&mkdir_pool, item);
411 break;
412
413 case D_DIRREM12:
414 pool_put(&dirrem_pool, item);
415 break;
416
417 case D_NEWDIRBLK13:
418 pool_put(&newdirblk_pool, item);
419 break;
420
421 default:
422#ifdef DEBUG
423 if (lk.lkt_held != -1)
424 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
425#endif
426 panic("softdep_free: unknown type %d", type);
427 }
428}
429
430struct workhead softdep_freequeue;
431
432static __inline void
433softdep_freequeue_add(struct worklist *item)
434{
435 int s;
436
437 s = splbio()splraise(0x3);
438 LIST_INSERT_HEAD(&softdep_freequeue, item, wk_list)do { if (((item)->wk_list.le_next = (&softdep_freequeue
)->lh_first) != ((void *)0)) (&softdep_freequeue)->
lh_first->wk_list.le_prev = &(item)->wk_list.le_next
; (&softdep_freequeue)->lh_first = (item); (item)->
wk_list.le_prev = &(&softdep_freequeue)->lh_first;
} while (0)
;
439 splx(s)spllower(s);
440}
441
442static __inline void
443softdep_freequeue_process(void)
444{
445 struct worklist *wk;
446
447 splassert(IPL_BIO)do { if (splassert_ctl > 0) { splassert_check(0x3, __func__
); } } while (0)
;
448
449 while ((wk = LIST_FIRST(&softdep_freequeue)((&softdep_freequeue)->lh_first)) != NULL((void *)0)) {
450 LIST_REMOVE(wk, wk_list)do { if ((wk)->wk_list.le_next != ((void *)0)) (wk)->wk_list
.le_next->wk_list.le_prev = (wk)->wk_list.le_prev; *(wk
)->wk_list.le_prev = (wk)->wk_list.le_next; ((wk)->wk_list
.le_prev) = ((void *)-1); ((wk)->wk_list.le_next) = ((void
*)-1); } while (0)
;
451 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
452 softdep_free(wk, wk->wk_type);
453 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x3);
454 }
455}
456
457/*
458 * Worklist queue management.
459 * These routines require that the lock be held.
460 */
461#ifndef /* NOT */ DEBUG
462#define WORKLIST_INSERT(head, item)do { (item)->wk_state |= 0x8000; do { if (((item)->wk_list
.le_next = (head)->lh_first) != ((void *)0)) (head)->lh_first
->wk_list.le_prev = &(item)->wk_list.le_next; (head
)->lh_first = (item); (item)->wk_list.le_prev = &(head
)->lh_first; } while (0); } while (0)
do { \
463 (item)->wk_state |= ONWORKLIST0x8000; \
464 LIST_INSERT_HEAD(head, item, wk_list)do { if (((item)->wk_list.le_next = (head)->lh_first) !=
((void *)0)) (head)->lh_first->wk_list.le_prev = &
(item)->wk_list.le_next; (head)->lh_first = (item); (item
)->wk_list.le_prev = &(head)->lh_first; } while (0)
; \
465} while (0)
466#define WORKLIST_REMOVE(item)do { (item)->wk_state &= ~0x8000; do { if ((item)->
wk_list.le_next != ((void *)0)) (item)->wk_list.le_next->
wk_list.le_prev = (item)->wk_list.le_prev; *(item)->wk_list
.le_prev = (item)->wk_list.le_next; ((item)->wk_list.le_prev
) = ((void *)-1); ((item)->wk_list.le_next) = ((void *)-1)
; } while (0); } while (0)
do { \
467 (item)->wk_state &= ~ONWORKLIST0x8000; \
468 LIST_REMOVE(item, wk_list)do { if ((item)->wk_list.le_next != ((void *)0)) (item)->
wk_list.le_next->wk_list.le_prev = (item)->wk_list.le_prev
; *(item)->wk_list.le_prev = (item)->wk_list.le_next; (
(item)->wk_list.le_prev) = ((void *)-1); ((item)->wk_list
.le_next) = ((void *)-1); } while (0)
; \
469} while (0)
470#define WORKITEM_FREE(item, type)softdep_freequeue_add((struct worklist *)item) softdep_freequeue_add((struct worklist *)item)
471
472#else /* DEBUG */
473STATIC void worklist_insert(struct workhead *, struct worklist *);
474STATIC void worklist_remove(struct worklist *);
475STATIC void workitem_free(struct worklist *);
476
477#define WORKLIST_INSERT(head, item)do { (item)->wk_state |= 0x8000; do { if (((item)->wk_list
.le_next = (head)->lh_first) != ((void *)0)) (head)->lh_first
->wk_list.le_prev = &(item)->wk_list.le_next; (head
)->lh_first = (item); (item)->wk_list.le_prev = &(head
)->lh_first; } while (0); } while (0)
worklist_insert(head, item)
478#define WORKLIST_REMOVE(item)do { (item)->wk_state &= ~0x8000; do { if ((item)->
wk_list.le_next != ((void *)0)) (item)->wk_list.le_next->
wk_list.le_prev = (item)->wk_list.le_prev; *(item)->wk_list
.le_prev = (item)->wk_list.le_next; ((item)->wk_list.le_prev
) = ((void *)-1); ((item)->wk_list.le_next) = ((void *)-1)
; } while (0); } while (0)
worklist_remove(item)
479#define WORKITEM_FREE(item, type)softdep_freequeue_add((struct worklist *)item) workitem_free((struct worklist *)item)
480
481STATIC void
482worklist_insert(struct workhead *head, struct worklist *item)
483{
484
485 if (lk.lkt_held == -1)
486 panic("worklist_insert: lock not held");
487 if (item->wk_state & ONWORKLIST0x8000) {
488 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
489 panic("worklist_insert: already on list");
490 }
491 item->wk_state |= ONWORKLIST0x8000;
492 LIST_INSERT_HEAD(head, item, wk_list)do { if (((item)->wk_list.le_next = (head)->lh_first) !=
((void *)0)) (head)->lh_first->wk_list.le_prev = &
(item)->wk_list.le_next; (head)->lh_first = (item); (item
)->wk_list.le_prev = &(head)->lh_first; } while (0)
;
493}
494
495STATIC void
496worklist_remove(struct worklist *item)
497{
498
499 if (lk.lkt_held == -1)
500 panic("worklist_remove: lock not held");
501 if ((item->wk_state & ONWORKLIST0x8000) == 0) {
502 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
503 panic("worklist_remove: not on list");
504 }
505 item->wk_state &= ~ONWORKLIST0x8000;
506 LIST_REMOVE(item, wk_list)do { if ((item)->wk_list.le_next != ((void *)0)) (item)->
wk_list.le_next->wk_list.le_prev = (item)->wk_list.le_prev
; *(item)->wk_list.le_prev = (item)->wk_list.le_next; (
(item)->wk_list.le_prev) = ((void *)-1); ((item)->wk_list
.le_next) = ((void *)-1); } while (0)
;
507}
508
509STATIC void
510workitem_free(struct worklist *item)
511{
512
513 if (item->wk_state & ONWORKLIST0x8000) {
514 if (lk.lkt_held != -1)
515 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
516 panic("workitem_free: still on list");
517 }
518 softdep_freequeue_add(item);
519}
520#endif /* DEBUG */
521
522/*
523 * Workitem queue management
524 */
525STATIC struct workhead softdep_workitem_pending;
526STATIC struct worklist *worklist_tail;
527STATIC int num_on_worklist; /* number of worklist items to be processed */
528STATIC int softdep_worklist_busy; /* 1 => trying to do unmount */
529STATIC int softdep_worklist_req; /* serialized waiters */
530STATIC int max_softdeps; /* maximum number of structs before slowdown */
531STATIC int tickdelay = 2; /* number of ticks to pause during slowdown */
532STATIC int proc_waiting; /* tracks whether we have a timeout posted */
533STATIC int *stat_countp; /* statistic to count in proc_waiting timeout */
534STATIC struct timeout proc_waiting_timeout;
535STATIC struct proc *filesys_syncer; /* proc of filesystem syncer process */
536STATIC int req_clear_inodedeps; /* syncer process flush some inodedeps */
537#define FLUSH_INODES1 1
538STATIC int req_clear_remove; /* syncer process flush some freeblks */
539#define FLUSH_REMOVE2 2
540/*
541 * runtime statistics
542 */
543STATIC int stat_worklist_push; /* number of worklist cleanups */
544STATIC int stat_blk_limit_push; /* number of times block limit neared */
545STATIC int stat_ino_limit_push; /* number of times inode limit neared */
546STATIC int stat_blk_limit_hit; /* number of times block slowdown imposed */
547STATIC int stat_ino_limit_hit; /* number of times inode slowdown imposed */
548STATIC int stat_sync_limit_hit; /* number of synchronous slowdowns imposed */
549STATIC int stat_indir_blk_ptrs; /* bufs redirtied as indir ptrs not written */
550STATIC int stat_inode_bitmap; /* bufs redirtied as inode bitmap not written */
551STATIC int stat_direct_blk_ptrs;/* bufs redirtied as direct ptrs not written */
552STATIC int stat_dir_entry; /* bufs redirtied as dir entry cannot write */
553
554/*
555 * Add an item to the end of the work queue.
556 * This routine requires that the lock be held.
557 * This is the only routine that adds items to the list.
558 * The following routine is the only one that removes items
559 * and does so in order from first to last.
560 */
561STATIC void
562add_to_worklist(struct worklist *wk)
563{
564
565 if (wk->wk_state & ONWORKLIST0x8000) {
566#ifdef DEBUG
567 if (lk.lkt_held != -1)
568 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
569#endif
570 panic("add_to_worklist: already on list");
571 }
572 wk->wk_state |= ONWORKLIST0x8000;
573 if (LIST_FIRST(&softdep_workitem_pending)((&softdep_workitem_pending)->lh_first) == NULL((void *)0))
574 LIST_INSERT_HEAD(&softdep_workitem_pending, wk, wk_list)do { if (((wk)->wk_list.le_next = (&softdep_workitem_pending
)->lh_first) != ((void *)0)) (&softdep_workitem_pending
)->lh_first->wk_list.le_prev = &(wk)->wk_list.le_next
; (&softdep_workitem_pending)->lh_first = (wk); (wk)->
wk_list.le_prev = &(&softdep_workitem_pending)->lh_first
; } while (0)
;
575 else
576 LIST_INSERT_AFTER(worklist_tail, wk, wk_list)do { if (((wk)->wk_list.le_next = (worklist_tail)->wk_list
.le_next) != ((void *)0)) (worklist_tail)->wk_list.le_next
->wk_list.le_prev = &(wk)->wk_list.le_next; (worklist_tail
)->wk_list.le_next = (wk); (wk)->wk_list.le_prev = &
(worklist_tail)->wk_list.le_next; } while (0)
;
577 worklist_tail = wk;
578 num_on_worklist += 1;
579}
580
581/*
582 * Process that runs once per second to handle items in the background queue.
583 *
584 * Note that we ensure that everything is done in the order in which they
585 * appear in the queue. The code below depends on this property to ensure
586 * that blocks of a file are freed before the inode itself is freed. This
587 * ordering ensures that no new <vfsid, inum, lbn> triples will be generated
588 * until all the old ones have been purged from the dependency lists.
589 */
590int
591softdep_process_worklist(struct mount *matchmnt)
592{
593 struct proc *p = CURPROC({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc
;
594 int matchcnt, loopcount;
595 struct timeval starttime;
596
597 /*
598 * First process any items on the delayed-free queue.
599 */
600 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x3);
601 softdep_freequeue_process();
602 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
603
604 /*
605 * Record the process identifier of our caller so that we can give
606 * this process preferential treatment in request_cleanup below.
607 * We can't do this in softdep_initialize, because the syncer doesn't
608 * have to run then.
609 * NOTE! This function _could_ be called with a curproc != syncerproc.
610 */
611 filesys_syncer = syncerproc;
612 matchcnt = 0;
613
614 /*
615 * There is no danger of having multiple processes run this
616 * code, but we have to single-thread it when softdep_flushfiles()
617 * is in operation to get an accurate count of the number of items
618 * related to its mount point that are in the list.
619 */
620 if (matchmnt == NULL((void *)0)) {
621 if (softdep_worklist_busy < 0)
622 return(-1);
623 softdep_worklist_busy += 1;
624 }
625
626 /*
627 * If requested, try removing inode or removal dependencies.
628 */
629 if (req_clear_inodedeps) {
630 clear_inodedeps(p);
631 req_clear_inodedeps -= 1;
632 wakeup_one(&proc_waiting)wakeup_n((&proc_waiting), 1);
633 }
634 if (req_clear_remove) {
635 clear_remove(p);
636 req_clear_remove -= 1;
637 wakeup_one(&proc_waiting)wakeup_n((&proc_waiting), 1);
638 }
639 loopcount = 1;
640 getmicrouptime(&starttime);
641 while (num_on_worklist > 0) {
642 if (process_worklist_item(matchmnt, &matchcnt, LK_NOWAIT0x0040UL) == 0)
643 break;
644
645 /*
646 * If a umount operation wants to run the worklist
647 * accurately, abort.
648 */
649 if (softdep_worklist_req && matchmnt == NULL((void *)0)) {
650 matchcnt = -1;
651 break;
652 }
653
654 /*
655 * If requested, try removing inode or removal dependencies.
656 */
657 if (req_clear_inodedeps) {
658 clear_inodedeps(p);
659 req_clear_inodedeps -= 1;
660 wakeup_one(&proc_waiting)wakeup_n((&proc_waiting), 1);
661 }
662 if (req_clear_remove) {
663 clear_remove(p);
664 req_clear_remove -= 1;
665 wakeup_one(&proc_waiting)wakeup_n((&proc_waiting), 1);
666 }
667 /*
668 * We do not generally want to stop for buffer space, but if
669 * we are really being a buffer hog, we will stop and wait.
670 */
671#if 0
672 if (loopcount++ % 128 == 0)
673 bwillwrite();
674#endif
675 /*
676 * Never allow processing to run for more than one
677 * second. Otherwise the other syncer tasks may get
678 * excessively backlogged.
679 */
680 {
681 struct timeval diff;
682 struct timeval tv;
683
684 getmicrouptime(&tv);
685 timersub(&tv, &starttime, &diff)do { (&diff)->tv_sec = (&tv)->tv_sec - (&starttime
)->tv_sec; (&diff)->tv_usec = (&tv)->tv_usec
- (&starttime)->tv_usec; if ((&diff)->tv_usec <
0) { (&diff)->tv_sec--; (&diff)->tv_usec += 1000000
; } } while (0)
;
686 if (diff.tv_sec != 0 && matchmnt == NULL((void *)0)) {
687 matchcnt = -1;
688 break;
689 }
690 }
691
692 /*
693 * Process any new items on the delayed-free queue.
694 */
695 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x3);
696 softdep_freequeue_process();
697 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
698 }
699 if (matchmnt == NULL((void *)0)) {
700 softdep_worklist_busy -= 1;
701 if (softdep_worklist_req && softdep_worklist_busy == 0)
702 wakeup(&softdep_worklist_req);
703 }
704 return (matchcnt);
705}
706
707/*
708 * Process one item on the worklist.
709 */
710STATIC int
711process_worklist_item(struct mount *matchmnt, int *matchcnt, int flags)
712{
713 struct worklist *wk, *wkend;
714 struct dirrem *dirrem;
715 struct mount *mp;
716 struct vnode *vp;
717
718 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x3);
719 /*
720 * Normally we just process each item on the worklist in order.
721 * However, if we are in a situation where we cannot lock any
722 * inodes, we have to skip over any dirrem requests whose
723 * vnodes are resident and locked.
724 */
725 LIST_FOREACH(wk, &softdep_workitem_pending, wk_list)for((wk) = ((&softdep_workitem_pending)->lh_first); (wk
)!= ((void *)0); (wk) = ((wk)->wk_list.le_next))
{
726 if ((flags & LK_NOWAIT0x0040UL) == 0 || wk->wk_type != D_DIRREM12)
727 break;
728 dirrem = WK_DIRREM(wk)((struct dirrem *)(wk));
729 vp = ufs_ihashlookup(VFSTOUFS(dirrem->dm_mnt)((struct ufsmount *)((dirrem->dm_mnt)->mnt_data))->um_dev,
730 dirrem->dm_oldinum);
731 if (vp == NULL((void *)0) || !VOP_ISLOCKED(vp))
732 break;
733 }
734 if (wk == NULL((void *)0)) {
735 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
736 return (0);
737 }
738 /*
739 * Remove the item to be processed. If we are removing the last
740 * item on the list, we need to recalculate the tail pointer.
741 * As this happens rarely and usually when the list is short,
742 * we just run down the list to find it rather than tracking it
743 * in the above loop.
744 */
745 WORKLIST_REMOVE(wk)do { (wk)->wk_state &= ~0x8000; do { if ((wk)->wk_list
.le_next != ((void *)0)) (wk)->wk_list.le_next->wk_list
.le_prev = (wk)->wk_list.le_prev; *(wk)->wk_list.le_prev
= (wk)->wk_list.le_next; ((wk)->wk_list.le_prev) = ((void
*)-1); ((wk)->wk_list.le_next) = ((void *)-1); } while (0
); } while (0)
;
746 if (wk == worklist_tail) {
747 LIST_FOREACH(wkend, &softdep_workitem_pending, wk_list)for((wkend) = ((&softdep_workitem_pending)->lh_first);
(wkend)!= ((void *)0); (wkend) = ((wkend)->wk_list.le_next
))
748 if (LIST_NEXT(wkend, wk_list)((wkend)->wk_list.le_next) == NULL((void *)0))
749 break;
750 worklist_tail = wkend;
751 }
752 num_on_worklist -= 1;
753 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
754 switch (wk->wk_type) {
755
756 case D_DIRREM12:
757 /* removal of a directory entry */
758 mp = WK_DIRREM(wk)((struct dirrem *)(wk))->dm_mnt;
759#if 0
760 if (vn_write_suspend_wait(NULL((void *)0), mp, V_NOWAIT))
761 panic("%s: dirrem on suspended filesystem",
762 "process_worklist_item");
763#endif
764 if (matchmnt != NULL((void *)0) && mp == matchmnt)
765 *matchcnt += 1;
766 handle_workitem_remove(WK_DIRREM(wk)((struct dirrem *)(wk)));
767 break;
768
769 case D_FREEBLKS8:
770 /* releasing blocks and/or fragments from a file */
771 mp = WK_FREEBLKS(wk)((struct freeblks *)(wk))->fb_mnt;
772#if 0
773 if (vn_write_suspend_wait(NULL((void *)0), mp, V_NOWAIT))
774 panic("%s: freeblks on suspended filesystem",
775 "process_worklist_item");
776#endif
777 if (matchmnt != NULL((void *)0) && mp == matchmnt)
778 *matchcnt += 1;
779 handle_workitem_freeblocks(WK_FREEBLKS(wk)((struct freeblks *)(wk)));
780 break;
781
782 case D_FREEFRAG7:
783 /* releasing a fragment when replaced as a file grows */
784 mp = WK_FREEFRAG(wk)((struct freefrag *)(wk))->ff_mnt;
785#if 0
786 if (vn_write_suspend_wait(NULL((void *)0), mp, V_NOWAIT))
787 panic("%s: freefrag on suspended filesystem",
788 "process_worklist_item");
789#endif
790 if (matchmnt != NULL((void *)0) && mp == matchmnt)
791 *matchcnt += 1;
792 handle_workitem_freefrag(WK_FREEFRAG(wk)((struct freefrag *)(wk)));
793 break;
794
795 case D_FREEFILE9:
796 /* releasing an inode when its link count drops to 0 */
797 mp = WK_FREEFILE(wk)((struct freefile *)(wk))->fx_mnt;
798#if 0
799 if (vn_write_suspend_wait(NULL((void *)0), mp, V_NOWAIT))
800 panic("%s: freefile on suspended filesystem",
801 "process_worklist_item");
802#endif
803 if (matchmnt != NULL((void *)0) && mp == matchmnt)
804 *matchcnt += 1;
805 handle_workitem_freefile(WK_FREEFILE(wk)((struct freefile *)(wk)));
806 break;
807
808 default:
809 panic("%s_process_worklist: Unknown type %s",
810 "softdep", TYPENAME(wk->wk_type)((unsigned)(wk->wk_type) <= 13 ? softdep_typenames[wk->
wk_type] : "???")
);
811 /* NOTREACHED */
812 }
813 return (1);
814}
815
816/*
817 * Move dependencies from one buffer to another.
818 */
819void
820softdep_move_dependencies(struct buf *oldbp, struct buf *newbp)
821{
822 struct worklist *wk, *wktail;
823
824 if (LIST_FIRST(&newbp->b_dep)((&newbp->b_dep)->lh_first) != NULL((void *)0))
825 panic("softdep_move_dependencies: need merge code");
826 wktail = NULL((void *)0);
827 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x3);
828 while ((wk = LIST_FIRST(&oldbp->b_dep)((&oldbp->b_dep)->lh_first)) != NULL((void *)0)) {
829 LIST_REMOVE(wk, wk_list)do { if ((wk)->wk_list.le_next != ((void *)0)) (wk)->wk_list
.le_next->wk_list.le_prev = (wk)->wk_list.le_prev; *(wk
)->wk_list.le_prev = (wk)->wk_list.le_next; ((wk)->wk_list
.le_prev) = ((void *)-1); ((wk)->wk_list.le_next) = ((void
*)-1); } while (0)
;
830 if (wktail == NULL((void *)0))
831 LIST_INSERT_HEAD(&newbp->b_dep, wk, wk_list)do { if (((wk)->wk_list.le_next = (&newbp->b_dep)->
lh_first) != ((void *)0)) (&newbp->b_dep)->lh_first
->wk_list.le_prev = &(wk)->wk_list.le_next; (&newbp
->b_dep)->lh_first = (wk); (wk)->wk_list.le_prev = &
(&newbp->b_dep)->lh_first; } while (0)
;
832 else
833 LIST_INSERT_AFTER(wktail, wk, wk_list)do { if (((wk)->wk_list.le_next = (wktail)->wk_list.le_next
) != ((void *)0)) (wktail)->wk_list.le_next->wk_list.le_prev
= &(wk)->wk_list.le_next; (wktail)->wk_list.le_next
= (wk); (wk)->wk_list.le_prev = &(wktail)->wk_list
.le_next; } while (0)
;
834 wktail = wk;
835 }
836 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
837}
838
839/*
840 * Purge the work list of all items associated with a particular mount point.
841 */
842int
843softdep_flushworklist(struct mount *oldmnt, int *countp, struct proc *p)
844{
845 struct vnode *devvp;
846 int count, error = 0;
847
848 /*
849 * Await our turn to clear out the queue, then serialize access.
850 */
851 while (softdep_worklist_busy) {
852 softdep_worklist_req += 1;
853 tsleep_nsec(&softdep_worklist_req, PRIBIO16, "softflush", INFSLP0xffffffffffffffffULL);
854 softdep_worklist_req -= 1;
855 }
856 softdep_worklist_busy = -1;
857 /*
858 * Alternately flush the block device associated with the mount
859 * point and process any dependencies that the flushing
860 * creates. We continue until no more worklist dependencies
861 * are found.
862 */
863 *countp = 0;
864 devvp = VFSTOUFS(oldmnt)((struct ufsmount *)((oldmnt)->mnt_data))->um_devvp;
865 while ((count = softdep_process_worklist(oldmnt)) > 0) {
866 *countp += count;
867 vn_lock(devvp, LK_EXCLUSIVE0x0001UL | LK_RETRY0x2000UL);
868 error = VOP_FSYNC(devvp, p->p_ucred, MNT_WAIT1, p);
869 VOP_UNLOCK(devvp);
870 if (error)
871 break;
872 }
873 softdep_worklist_busy = 0;
874 if (softdep_worklist_req)
875 wakeup(&softdep_worklist_req);
876 return (error);
877}
878
879/*
880 * Flush all vnodes and worklist items associated with a specified mount point.
881 */
882int
883softdep_flushfiles(struct mount *oldmnt, int flags, struct proc *p)
884{
885 int error, count, loopcnt;
886
887 /*
888 * Alternately flush the vnodes associated with the mount
889 * point and process any dependencies that the flushing
890 * creates. In theory, this loop can happen at most twice,
891 * but we give it a few extra just to be sure.
892 */
893 for (loopcnt = 10; loopcnt > 0; loopcnt--) {
894 /*
895 * Do another flush in case any vnodes were brought in
896 * as part of the cleanup operations.
897 */
898 if ((error = ffs_flushfiles(oldmnt, flags, p)) != 0)
899 break;
900 if ((error = softdep_flushworklist(oldmnt, &count, p)) != 0 ||
901 count == 0)
902 break;
903 }
904 /*
905 * If the reboot process sleeps during the loop, the update
906 * process may call softdep_process_worklist() and create
907 * new dirty vnodes at the mount point. Call ffs_flushfiles()
908 * again after the loop has flushed all soft dependencies.
909 */
910 if (error == 0)
911 error = ffs_flushfiles(oldmnt, flags, p);
912 /*
913 * If we are unmounting then it is an error to fail. If we
914 * are simply trying to downgrade to read-only, then filesystem
915 * activity can keep us busy forever, so we just fail with EBUSY.
916 */
917 if (loopcnt == 0) {
918 error = EBUSY16;
919 }
920 return (error);
921}
922
923/*
924 * Structure hashing.
925 *
926 * There are three types of structures that can be looked up:
927 * 1) pagedep structures identified by mount point, inode number,
928 * and logical block.
929 * 2) inodedep structures identified by mount point and inode number.
930 * 3) newblk structures identified by mount point and
931 * physical block number.
932 *
933 * The "pagedep" and "inodedep" dependency structures are hashed
934 * separately from the file blocks and inodes to which they correspond.
935 * This separation helps when the in-memory copy of an inode or
936 * file block must be replaced. It also obviates the need to access
937 * an inode or file page when simply updating (or de-allocating)
938 * dependency structures. Lookup of newblk structures is needed to
939 * find newly allocated blocks when trying to associate them with
940 * their allocdirect or allocindir structure.
941 *
942 * The lookup routines optionally create and hash a new instance when
943 * an existing entry is not found.
944 */
945#define DEPALLOC0x0001 0x0001 /* allocate structure if lookup fails */
946#define NODELAY0x0002 0x0002 /* cannot do background work */
947
948SIPHASH_KEY softdep_hashkey;
949
950/*
951 * Structures and routines associated with pagedep caching.
952 */
953LIST_HEAD(pagedep_hashhead, pagedep)struct pagedep_hashhead { struct pagedep *lh_first; } *pagedep_hashtbl;
954u_long pagedep_hash; /* size of hash table - 1 */
955STATIC struct sema pagedep_in_progress;
956
957/*
958 * Look up a pagedep. Return 1 if found, 0 if not found or found
959 * when asked to allocate but not associated with any buffer.
960 * If not found, allocate if DEPALLOC flag is passed.
961 * Found or allocated entry is returned in pagedeppp.
962 * This routine must be called with splbio interrupts blocked.
963 */
964STATIC int
965pagedep_lookup(struct inode *ip, daddr_t lbn, int flags,
966 struct pagedep **pagedeppp)
967{
968 SIPHASH_CTX ctx;
969 struct pagedep *pagedep;
970 struct pagedep_hashhead *pagedephd;
971 struct mount *mp;
972 int i;
973
974 splassert(IPL_BIO)do { if (splassert_ctl > 0) { splassert_check(0x3, __func__
); } } while (0)
;
975
976#ifdef DEBUG
977 if (lk.lkt_held == -1)
978 panic("pagedep_lookup: lock not held");
979#endif
980 mp = ITOV(ip)((ip)->i_vnode)->v_mount;
981
982 SipHash24_Init(&ctx, &softdep_hashkey)SipHash_Init((&ctx), (&softdep_hashkey));
983 SipHash24_Update(&ctx, &mp, sizeof(mp))SipHash_Update((&ctx), 2, 4, (&mp), (sizeof(mp)));
984 SipHash24_Update(&ctx, &ip->i_number, sizeof(ip->i_number))SipHash_Update((&ctx), 2, 4, (&ip->i_number), (sizeof
(ip->i_number)))
;
985 SipHash24_Update(&ctx, &lbn, sizeof(lbn))SipHash_Update((&ctx), 2, 4, (&lbn), (sizeof(lbn)));
986 pagedephd = &pagedep_hashtbl[SipHash24_End(&ctx)SipHash_End((&ctx), 2, 4) & pagedep_hash];
987top:
988 LIST_FOREACH(pagedep, pagedephd, pd_hash)for((pagedep) = ((pagedephd)->lh_first); (pagedep)!= ((void
*)0); (pagedep) = ((pagedep)->pd_hash.le_next))
989 if (ip->i_number == pagedep->pd_ino &&
990 lbn == pagedep->pd_lbn &&
991 mp == pagedep->pd_mnt)
992 break;
993 if (pagedep) {
994 *pagedeppp = pagedep;
995 if ((flags & DEPALLOC0x0001) != 0 &&
996 (pagedep->pd_statepd_list.wk_state & ONWORKLIST0x8000) == 0)
997 return (0);
998 return (1);
999 }
1000 if ((flags & DEPALLOC0x0001) == 0) {
1001 *pagedeppp = NULL((void *)0);
1002 return (0);
1003 }
1004 if (sema_get(&pagedep_in_progress, &lk) == 0) {
1005 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x3);
1006 goto top;
1007 }
1008 pagedep = pool_get(&pagedep_pool, PR_WAITOK0x0001 | PR_ZERO0x0008);
1009 pagedep->pd_list.wk_type = D_PAGEDEP0;
1010 pagedep->pd_mnt = mp;
1011 pagedep->pd_ino = ip->i_number;
1012 pagedep->pd_lbn = lbn;
1013 LIST_INIT(&pagedep->pd_dirremhd)do { ((&pagedep->pd_dirremhd)->lh_first) = ((void *
)0); } while (0)
;
1014 LIST_INIT(&pagedep->pd_pendinghd)do { ((&pagedep->pd_pendinghd)->lh_first) = ((void *
)0); } while (0)
;
1015 for (i = 0; i < DAHASHSZ6; i++)
1016 LIST_INIT(&pagedep->pd_diraddhd[i])do { ((&pagedep->pd_diraddhd[i])->lh_first) = ((void
*)0); } while (0)
;
1017 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x3);
1018 LIST_INSERT_HEAD(pagedephd, pagedep, pd_hash)do { if (((pagedep)->pd_hash.le_next = (pagedephd)->lh_first
) != ((void *)0)) (pagedephd)->lh_first->pd_hash.le_prev
= &(pagedep)->pd_hash.le_next; (pagedephd)->lh_first
= (pagedep); (pagedep)->pd_hash.le_prev = &(pagedephd
)->lh_first; } while (0)
;
1019 sema_release(&pagedep_in_progress);
1020 *pagedeppp = pagedep;
1021 return (0);
1022}
1023
1024/*
1025 * Structures and routines associated with inodedep caching.
1026 */
1027LIST_HEAD(inodedep_hashhead, inodedep)struct inodedep_hashhead { struct inodedep *lh_first; } *inodedep_hashtbl;
1028STATIC u_long inodedep_hash; /* size of hash table - 1 */
1029STATIC long num_inodedep; /* number of inodedep allocated */
1030STATIC struct sema inodedep_in_progress;
1031
1032/*
1033 * Look up a inodedep. Return 1 if found, 0 if not found.
1034 * If not found, allocate if DEPALLOC flag is passed.
1035 * Found or allocated entry is returned in inodedeppp.
1036 * This routine must be called with splbio interrupts blocked.
1037 */
1038STATIC int
1039inodedep_lookup(struct fs *fs, ufsino_t inum, int flags,
1040 struct inodedep **inodedeppp)
1041{
1042 SIPHASH_CTX ctx;
1043 struct inodedep *inodedep;
1044 struct inodedep_hashhead *inodedephd;
1045 int firsttry;
1046
1047 splassert(IPL_BIO)do { if (splassert_ctl > 0) { splassert_check(0x3, __func__
); } } while (0)
;
1048
1049#ifdef DEBUG
1050 if (lk.lkt_held == -1)
1051 panic("inodedep_lookup: lock not held");
1052#endif
1053 firsttry = 1;
1054 SipHash24_Init(&ctx, &softdep_hashkey)SipHash_Init((&ctx), (&softdep_hashkey));
1055 SipHash24_Update(&ctx, &fs, sizeof(fs))SipHash_Update((&ctx), 2, 4, (&fs), (sizeof(fs)));
1056 SipHash24_Update(&ctx, &inum, sizeof(inum))SipHash_Update((&ctx), 2, 4, (&inum), (sizeof(inum)));
1057 inodedephd = &inodedep_hashtbl[SipHash24_End(&ctx)SipHash_End((&ctx), 2, 4) & inodedep_hash];
1058top:
1059 LIST_FOREACH(inodedep, inodedephd, id_hash)for((inodedep) = ((inodedephd)->lh_first); (inodedep)!= ((
void *)0); (inodedep) = ((inodedep)->id_hash.le_next))
1060 if (inum == inodedep->id_ino && fs == inodedep->id_fs)
1061 break;
1062 if (inodedep) {
1063 *inodedeppp = inodedep;
1064 return (1);
1065 }
1066 if ((flags & DEPALLOC0x0001) == 0) {
1067 *inodedeppp = NULL((void *)0);
1068 return (0);
1069 }
1070 /*
1071 * If we are over our limit, try to improve the situation.
1072 */
1073 if (num_inodedep > max_softdeps && firsttry && (flags & NODELAY0x0002) == 0 &&
1074 request_cleanup(FLUSH_INODES1, 1)) {
1075 firsttry = 0;
1076 goto top;
1077 }
1078 if (sema_get(&inodedep_in_progress, &lk) == 0) {
1079 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x3);
1080 goto top;
1081 }
1082 num_inodedep += 1;
1083 inodedep = pool_get(&inodedep_pool, PR_WAITOK0x0001);
1084 inodedep->id_list.wk_type = D_INODEDEP1;
1085 inodedep->id_fs = fs;
1086 inodedep->id_ino = inum;
1087 inodedep->id_stateid_list.wk_state = ALLCOMPLETE(0x0001 | 0x0004 | 0x0008);
1088 inodedep->id_nlinkdelta = 0;
1089 inodedep->id_savedino1id_un.idu_savedino1 = NULL((void *)0);
1090 inodedep->id_savedsize = -1;
1091 inodedep->id_buf = NULL((void *)0);
1092 LIST_INIT(&inodedep->id_pendinghd)do { ((&inodedep->id_pendinghd)->lh_first) = ((void
*)0); } while (0)
;
1093 LIST_INIT(&inodedep->id_inowait)do { ((&inodedep->id_inowait)->lh_first) = ((void *
)0); } while (0)
;
1094 LIST_INIT(&inodedep->id_bufwait)do { ((&inodedep->id_bufwait)->lh_first) = ((void *
)0); } while (0)
;
1095 TAILQ_INIT(&inodedep->id_inoupdt)do { (&inodedep->id_inoupdt)->tqh_first = ((void *)
0); (&inodedep->id_inoupdt)->tqh_last = &(&
inodedep->id_inoupdt)->tqh_first; } while (0)
;
1096 TAILQ_INIT(&inodedep->id_newinoupdt)do { (&inodedep->id_newinoupdt)->tqh_first = ((void
*)0); (&inodedep->id_newinoupdt)->tqh_last = &
(&inodedep->id_newinoupdt)->tqh_first; } while (0)
;
1097 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x3);
1098 LIST_INSERT_HEAD(inodedephd, inodedep, id_hash)do { if (((inodedep)->id_hash.le_next = (inodedephd)->lh_first
) != ((void *)0)) (inodedephd)->lh_first->id_hash.le_prev
= &(inodedep)->id_hash.le_next; (inodedephd)->lh_first
= (inodedep); (inodedep)->id_hash.le_prev = &(inodedephd
)->lh_first; } while (0)
;
1099 sema_release(&inodedep_in_progress);
1100 *inodedeppp = inodedep;
1101 return (0);
1102}
1103
1104/*
1105 * Structures and routines associated with newblk caching.
1106 */
1107LIST_HEAD(newblk_hashhead, newblk)struct newblk_hashhead { struct newblk *lh_first; } *newblk_hashtbl;
1108u_long newblk_hash; /* size of hash table - 1 */
1109STATIC struct sema newblk_in_progress;
1110
1111/*
1112 * Look up a newblk. Return 1 if found, 0 if not found.
1113 * If not found, allocate if DEPALLOC flag is passed.
1114 * Found or allocated entry is returned in newblkpp.
1115 */
1116STATIC int
1117newblk_lookup(struct fs *fs, daddr_t newblkno, int flags,
1118 struct newblk **newblkpp)
1119{
1120 SIPHASH_CTX ctx;
1121 struct newblk *newblk;
1122 struct newblk_hashhead *newblkhd;
1123
1124 SipHash24_Init(&ctx, &softdep_hashkey)SipHash_Init((&ctx), (&softdep_hashkey));
1125 SipHash24_Update(&ctx, &fs, sizeof(fs))SipHash_Update((&ctx), 2, 4, (&fs), (sizeof(fs)));
1126 SipHash24_Update(&ctx, &newblkno, sizeof(newblkno))SipHash_Update((&ctx), 2, 4, (&newblkno), (sizeof(newblkno
)))
;
1127 newblkhd = &newblk_hashtbl[SipHash24_End(&ctx)SipHash_End((&ctx), 2, 4) & newblk_hash];
1128top:
1129 LIST_FOREACH(newblk, newblkhd, nb_hash)for((newblk) = ((newblkhd)->lh_first); (newblk)!= ((void *
)0); (newblk) = ((newblk)->nb_hash.le_next))
1130 if (newblkno == newblk->nb_newblkno && fs == newblk->nb_fs)
1131 break;
1132 if (newblk) {
1133 *newblkpp = newblk;
1134 return (1);
1135 }
1136 if ((flags & DEPALLOC0x0001) == 0) {
1137 *newblkpp = NULL((void *)0);
1138 return (0);
1139 }
1140 if (sema_get(&newblk_in_progress, NULL((void *)0)) == 0)
1141 goto top;
1142 newblk = pool_get(&newblk_pool, PR_WAITOK0x0001);
1143 newblk->nb_state = 0;
1144 newblk->nb_fs = fs;
1145 newblk->nb_newblkno = newblkno;
1146 LIST_INSERT_HEAD(newblkhd, newblk, nb_hash)do { if (((newblk)->nb_hash.le_next = (newblkhd)->lh_first
) != ((void *)0)) (newblkhd)->lh_first->nb_hash.le_prev
= &(newblk)->nb_hash.le_next; (newblkhd)->lh_first
= (newblk); (newblk)->nb_hash.le_prev = &(newblkhd)->
lh_first; } while (0)
;
1147 sema_release(&newblk_in_progress);
1148 *newblkpp = newblk;
1149 return (0);
1150}
1151
1152/*
1153 * Executed during filesystem system initialization before
1154 * mounting any file systems.
1155 */
1156void
1157softdep_initialize(void)
1158{
1159
1160 bioops.io_start = softdep_disk_io_initiation;
1161 bioops.io_complete = softdep_disk_write_complete;
1162 bioops.io_deallocate = softdep_deallocate_dependencies;
1163 bioops.io_movedeps = softdep_move_dependencies;
1164 bioops.io_countdeps = softdep_count_dependencies;
1165
1166 LIST_INIT(&mkdirlisthd)do { ((&mkdirlisthd)->lh_first) = ((void *)0); } while
(0)
;
1167 LIST_INIT(&softdep_workitem_pending)do { ((&softdep_workitem_pending)->lh_first) = ((void *
)0); } while (0)
;
1168#ifdef KMEMSTATS1
1169 max_softdeps = min (initialvnodes * 8,
1170 kmemstats[M_INODEDEP79].ks_limit / (2 * sizeof(struct inodedep)));
1171#else
1172 max_softdeps = initialvnodes * 4;
1173#endif
1174 arc4random_buf(&softdep_hashkey, sizeof(softdep_hashkey));
1175 pagedep_hashtbl = hashinit(initialvnodes / 5, M_PAGEDEP78, M_WAITOK0x0001,
1176 &pagedep_hash);
1177 sema_init(&pagedep_in_progress, "pagedep", PRIBIO16);
1178 inodedep_hashtbl = hashinit(initialvnodes, M_INODEDEP79, M_WAITOK0x0001,
1179 &inodedep_hash);
1180 sema_init(&inodedep_in_progress, "inodedep", PRIBIO16);
1181 newblk_hashtbl = hashinit(64, M_NEWBLK80, M_WAITOK0x0001, &newblk_hash);
1182 sema_init(&newblk_in_progress, "newblk", PRIBIO16);
1183 timeout_set(&proc_waiting_timeout, pause_timer, NULL((void *)0));
1184 pool_init(&pagedep_pool, sizeof(struct pagedep), 0, IPL_NONE0x0,
1185 PR_WAITOK0x0001, "pagedep", NULL((void *)0));
1186 pool_init(&inodedep_pool, sizeof(struct inodedep), 0, IPL_NONE0x0,
1187 PR_WAITOK0x0001, "inodedep", NULL((void *)0));
1188 pool_init(&newblk_pool, sizeof(struct newblk), 0, IPL_NONE0x0,
1189 PR_WAITOK0x0001, "newblk", NULL((void *)0));
1190 pool_init(&bmsafemap_pool, sizeof(struct bmsafemap), 0, IPL_NONE0x0,
1191 PR_WAITOK0x0001, "bmsafemap", NULL((void *)0));
1192 pool_init(&allocdirect_pool, sizeof(struct allocdirect), 0, IPL_NONE0x0,
1193 PR_WAITOK0x0001, "allocdir", NULL((void *)0));
1194 pool_init(&indirdep_pool, sizeof(struct indirdep), 0, IPL_NONE0x0,
1195 PR_WAITOK0x0001, "indirdep", NULL((void *)0));
1196 pool_init(&allocindir_pool, sizeof(struct allocindir), 0, IPL_NONE0x0,
1197 PR_WAITOK0x0001, "allocindir", NULL((void *)0));
1198 pool_init(&freefrag_pool, sizeof(struct freefrag), 0, IPL_NONE0x0,
1199 PR_WAITOK0x0001, "freefrag", NULL((void *)0));
1200 pool_init(&freeblks_pool, sizeof(struct freeblks), 0, IPL_NONE0x0,
1201 PR_WAITOK0x0001, "freeblks", NULL((void *)0));
1202 pool_init(&freefile_pool, sizeof(struct freefile), 0, IPL_NONE0x0,
1203 PR_WAITOK0x0001, "freefile", NULL((void *)0));
1204 pool_init(&diradd_pool, sizeof(struct diradd), 0, IPL_NONE0x0,
1205 PR_WAITOK0x0001, "diradd", NULL((void *)0));
1206 pool_init(&mkdir_pool, sizeof(struct mkdir), 0, IPL_NONE0x0,
1207 PR_WAITOK0x0001, "mkdir", NULL((void *)0));
1208 pool_init(&dirrem_pool, sizeof(struct dirrem), 0, IPL_NONE0x0,
1209 PR_WAITOK0x0001, "dirrem", NULL((void *)0));
1210 pool_init(&newdirblk_pool, sizeof(struct newdirblk), 0, IPL_NONE0x0,
1211 PR_WAITOK0x0001, "newdirblk", NULL((void *)0));
1212}
1213
1214/*
1215 * Called at mount time to notify the dependency code that a
1216 * filesystem wishes to use it.
1217 */
1218int
1219softdep_mount(struct vnode *devvp, struct mount *mp, struct fs *fs,
1220 struct ucred *cred)
1221{
1222 struct csum_total cstotal;
1223 struct cg *cgp;
1224 struct buf *bp;
1225 int error, cyl;
1226
1227 KASSERTMSG(1, "softdep_mount should not have been called")((1) ? (void)0 : panic("kernel %sassertion \"%s\" failed: file \"%s\", line %d"
" " "softdep_mount should not have been called", "diagnostic "
, "1", "/usr/src/sys/ufs/ffs/ffs_softdep.c", 1227))
;
1228
1229 /*
1230 * When doing soft updates, the counters in the
1231 * superblock may have gotten out of sync, so we have
1232 * to scan the cylinder groups and recalculate them.
1233 */
1234 if ((fs->fs_flags & FS_UNCLEAN0x01) == 0)
1235 return (0);
1236 memset(&cstotal, 0, sizeof(cstotal))__builtin_memset((&cstotal), (0), (sizeof(cstotal)));
1237 for (cyl = 0; cyl < fs->fs_ncg; cyl++) {
1238 if ((error = bread(devvp, fsbtodb(fs, cgtod(fs, cyl))((((((daddr_t)(fs)->fs_fpg * (cyl)) + (fs)->fs_cgoffset
* ((cyl) & ~((fs)->fs_cgmask))) + (fs)->fs_cblkno)
) << (fs)->fs_fsbtodb)
,
1239 fs->fs_cgsize, &bp)) != 0) {
1240 brelse(bp);
1241 return (error);
1242 }
1243 cgp = (struct cg *)bp->b_data;
1244 cstotal.cs_nffree += cgp->cg_cs.cs_nffree;
1245 cstotal.cs_nbfree += cgp->cg_cs.cs_nbfree;
1246 cstotal.cs_nifree += cgp->cg_cs.cs_nifree;
1247 cstotal.cs_ndir += cgp->cg_cs.cs_ndir;
1248 fs->fs_cs(fs, cyl)fs_csp[cyl] = cgp->cg_cs;
1249 brelse(bp);
1250 }
1251#ifdef DEBUG
1252 if (memcmp(&cstotal, &fs->fs_cstotal, sizeof(cstotal))__builtin_memcmp((&cstotal), (&fs->fs_cstotal), (sizeof
(cstotal)))
)
1253 printf("ffs_mountfs: superblock updated for soft updates\n");
1254#endif
1255 memcpy(&fs->fs_cstotal, &cstotal, sizeof(cstotal))__builtin_memcpy((&fs->fs_cstotal), (&cstotal), (sizeof
(cstotal)))
;
1256 return (0);
1257}
1258
1259/*
1260 * Protecting the freemaps (or bitmaps).
1261 *
1262 * To eliminate the need to execute fsck before mounting a file system
1263 * after a power failure, one must (conservatively) guarantee that the
1264 * on-disk copy of the bitmaps never indicate that a live inode or block is
1265 * free. So, when a block or inode is allocated, the bitmap should be
1266 * updated (on disk) before any new pointers. When a block or inode is
1267 * freed, the bitmap should not be updated until all pointers have been
1268 * reset. The latter dependency is handled by the delayed de-allocation
1269 * approach described below for block and inode de-allocation. The former
1270 * dependency is handled by calling the following procedure when a block or
1271 * inode is allocated. When an inode is allocated an "inodedep" is created
1272 * with its DEPCOMPLETE flag cleared until its bitmap is written to disk.
1273 * Each "inodedep" is also inserted into the hash indexing structure so
1274 * that any additional link additions can be made dependent on the inode
1275 * allocation.
1276 *
1277 * The ufs file system maintains a number of free block counts (e.g., per
1278 * cylinder group, per cylinder and per <cylinder, rotational position> pair)
1279 * in addition to the bitmaps. These counts are used to improve efficiency
1280 * during allocation and therefore must be consistent with the bitmaps.
1281 * There is no convenient way to guarantee post-crash consistency of these
1282 * counts with simple update ordering, for two main reasons: (1) The counts
1283 * and bitmaps for a single cylinder group block are not in the same disk
1284 * sector. If a disk write is interrupted (e.g., by power failure), one may
1285 * be written and the other not. (2) Some of the counts are located in the
1286 * superblock rather than the cylinder group block. So, we focus our soft
1287 * updates implementation on protecting the bitmaps. When mounting a
1288 * filesystem, we recompute the auxiliary counts from the bitmaps.
1289 */
1290
1291/*
1292 * Called just after updating the cylinder group block to allocate an inode.
1293 */
1294/* buffer for cylgroup block with inode map */
1295/* inode related to allocation */
1296/* new inode number being allocated */
1297void
1298softdep_setup_inomapdep(struct buf *bp, struct inode *ip, ufsino_t newinum)
1299{
1300 struct inodedep *inodedep;
1301 struct bmsafemap *bmsafemap;
1302
1303 /*
1304 * Create a dependency for the newly allocated inode.
1305 * Panic if it already exists as something is seriously wrong.
1306 * Otherwise add it to the dependency list for the buffer holding
1307 * the cylinder group map from which it was allocated.
1308 */
1309 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x3);
1310 if (inodedep_lookup(ip->i_fsinode_u.fs, newinum, DEPALLOC0x0001 | NODELAY0x0002, &inodedep)
1311 != 0) {
1312 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
1313 panic("softdep_setup_inomapdep: found inode");
1314 }
1315 inodedep->id_buf = bp;
1316 inodedep->id_stateid_list.wk_state &= ~DEPCOMPLETE0x0008;
1317 bmsafemap = bmsafemap_lookup(bp);
1318 LIST_INSERT_HEAD(&bmsafemap->sm_inodedephd, inodedep, id_deps)do { if (((inodedep)->id_deps.le_next = (&bmsafemap->
sm_inodedephd)->lh_first) != ((void *)0)) (&bmsafemap->
sm_inodedephd)->lh_first->id_deps.le_prev = &(inodedep
)->id_deps.le_next; (&bmsafemap->sm_inodedephd)->
lh_first = (inodedep); (inodedep)->id_deps.le_prev = &
(&bmsafemap->sm_inodedephd)->lh_first; } while (0)
;
1319 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
1320}
1321
1322/*
1323 * Called just after updating the cylinder group block to
1324 * allocate block or fragment.
1325 */
1326/* buffer for cylgroup block with block map */
1327/* filesystem doing allocation */
1328/* number of newly allocated block */
1329void
1330softdep_setup_blkmapdep(struct buf *bp, struct fs *fs, daddr_t newblkno)
1331{
1332 struct newblk *newblk;
1333 struct bmsafemap *bmsafemap;
1334
1335 /*
1336 * Create a dependency for the newly allocated block.
1337 * Add it to the dependency list for the buffer holding
1338 * the cylinder group map from which it was allocated.
1339 */
1340 if (newblk_lookup(fs, newblkno, DEPALLOC0x0001, &newblk) != 0)
1341 panic("softdep_setup_blkmapdep: found block");
1342 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x3);
1343 newblk->nb_bmsafemap = bmsafemap = bmsafemap_lookup(bp);
1344 LIST_INSERT_HEAD(&bmsafemap->sm_newblkhd, newblk, nb_deps)do { if (((newblk)->nb_deps.le_next = (&bmsafemap->
sm_newblkhd)->lh_first) != ((void *)0)) (&bmsafemap->
sm_newblkhd)->lh_first->nb_deps.le_prev = &(newblk)
->nb_deps.le_next; (&bmsafemap->sm_newblkhd)->lh_first
= (newblk); (newblk)->nb_deps.le_prev = &(&bmsafemap
->sm_newblkhd)->lh_first; } while (0)
;
1345 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
1346}
1347
1348/*
1349 * Find the bmsafemap associated with a cylinder group buffer.
1350 * If none exists, create one. The buffer must be locked when
1351 * this routine is called and this routine must be called with
1352 * splbio interrupts blocked.
1353 */
1354STATIC struct bmsafemap *
1355bmsafemap_lookup(struct buf *bp)
1356{
1357 struct bmsafemap *bmsafemap;
1358 struct worklist *wk;
1359
1360 splassert(IPL_BIO)do { if (splassert_ctl > 0) { splassert_check(0x3, __func__
); } } while (0)
;
1361
1362#ifdef DEBUG
1363 if (lk.lkt_held == -1)
1364 panic("bmsafemap_lookup: lock not held");
1365#endif
1366 LIST_FOREACH(wk, &bp->b_dep, wk_list)for((wk) = ((&bp->b_dep)->lh_first); (wk)!= ((void *
)0); (wk) = ((wk)->wk_list.le_next))
1367 if (wk->wk_type == D_BMSAFEMAP3)
1368 return (WK_BMSAFEMAP(wk)((struct bmsafemap *)(wk)));
1369 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
1370 bmsafemap = pool_get(&bmsafemap_pool, PR_WAITOK0x0001);
1371 bmsafemap->sm_list.wk_type = D_BMSAFEMAP3;
1372 bmsafemap->sm_list.wk_state = 0;
1373 bmsafemap->sm_buf = bp;
1374 LIST_INIT(&bmsafemap->sm_allocdirecthd)do { ((&bmsafemap->sm_allocdirecthd)->lh_first) = (
(void *)0); } while (0)
;
1375 LIST_INIT(&bmsafemap->sm_allocindirhd)do { ((&bmsafemap->sm_allocindirhd)->lh_first) = ((
void *)0); } while (0)
;
1376 LIST_INIT(&bmsafemap->sm_inodedephd)do { ((&bmsafemap->sm_inodedephd)->lh_first) = ((void
*)0); } while (0)
;
1377 LIST_INIT(&bmsafemap->sm_newblkhd)do { ((&bmsafemap->sm_newblkhd)->lh_first) = ((void
*)0); } while (0)
;
1378 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x3);
1379 WORKLIST_INSERT(&bp->b_dep, &bmsafemap->sm_list)do { (&bmsafemap->sm_list)->wk_state |= 0x8000; do {
if (((&bmsafemap->sm_list)->wk_list.le_next = (&
bp->b_dep)->lh_first) != ((void *)0)) (&bp->b_dep
)->lh_first->wk_list.le_prev = &(&bmsafemap->
sm_list)->wk_list.le_next; (&bp->b_dep)->lh_first
= (&bmsafemap->sm_list); (&bmsafemap->sm_list)
->wk_list.le_prev = &(&bp->b_dep)->lh_first;
} while (0); } while (0)
;
1380 return (bmsafemap);
1381}
1382
1383/*
1384 * Direct block allocation dependencies.
1385 *
1386 * When a new block is allocated, the corresponding disk locations must be
1387 * initialized (with zeros or new data) before the on-disk inode points to
1388 * them. Also, the freemap from which the block was allocated must be
1389 * updated (on disk) before the inode's pointer. These two dependencies are
1390 * independent of each other and are needed for all file blocks and indirect
1391 * blocks that are pointed to directly by the inode. Just before the
1392 * "in-core" version of the inode is updated with a newly allocated block
1393 * number, a procedure (below) is called to setup allocation dependency
1394 * structures. These structures are removed when the corresponding
1395 * dependencies are satisfied or when the block allocation becomes obsolete
1396 * (i.e., the file is deleted, the block is de-allocated, or the block is a
1397 * fragment that gets upgraded). All of these cases are handled in
1398 * procedures described later.
1399 *
1400 * When a file extension causes a fragment to be upgraded, either to a larger
1401 * fragment or to a full block, the on-disk location may change (if the
1402 * previous fragment could not simply be extended). In this case, the old
1403 * fragment must be de-allocated, but not until after the inode's pointer has
1404 * been updated. In most cases, this is handled by later procedures, which
1405 * will construct a "freefrag" structure to be added to the workitem queue
1406 * when the inode update is complete (or obsolete). The main exception to
1407 * this is when an allocation occurs while a pending allocation dependency
1408 * (for the same block pointer) remains. This case is handled in the main
1409 * allocation dependency setup procedure by immediately freeing the
1410 * unreferenced fragments.
1411 */
1412/* inode to which block is being added */
1413/* block pointer within inode */
1414/* disk block number being added */
1415/* previous block number, 0 unless frag */
1416/* size of new block */
1417/* size of new block */
1418/* bp for allocated block */
1419void
1420softdep_setup_allocdirect(struct inode *ip, daddr_t lbn, daddr_t newblkno,
1421 daddr_t oldblkno, long newsize, long oldsize, struct buf *bp)
1422{
1423 struct allocdirect *adp, *oldadp;
1424 struct allocdirectlst *adphead;
1425 struct bmsafemap *bmsafemap;
1426 struct inodedep *inodedep;
1427 struct pagedep *pagedep;
1428 struct newblk *newblk;
1429
1430 adp = pool_get(&allocdirect_pool, PR_WAITOK0x0001 | PR_ZERO0x0008);
1431 adp->ad_list.wk_type = D_ALLOCDIRECT4;
1432 adp->ad_lbn = lbn;
1433 adp->ad_newblkno = newblkno;
1434 adp->ad_oldblkno = oldblkno;
1435 adp->ad_newsize = newsize;
1436 adp->ad_oldsize = oldsize;
1437 adp->ad_statead_list.wk_state = ATTACHED0x0001;
1438 LIST_INIT(&adp->ad_newdirblk)do { ((&adp->ad_newdirblk)->lh_first) = ((void *)0)
; } while (0)
;
1439 if (newblkno == oldblkno)
1440 adp->ad_freefrag = NULL((void *)0);
1441 else
1442 adp->ad_freefrag = newfreefrag(ip, oldblkno, oldsize);
1443
1444 if (newblk_lookup(ip->i_fsinode_u.fs, newblkno, 0, &newblk) == 0)
1445 panic("softdep_setup_allocdirect: lost block");
1446
1447 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x3);
1448 inodedep_lookup(ip->i_fsinode_u.fs, ip->i_number, DEPALLOC0x0001 | NODELAY0x0002, &inodedep);
1449 adp->ad_inodedep = inodedep;
1450
1451 if (newblk->nb_state == DEPCOMPLETE0x0008) {
1452 adp->ad_statead_list.wk_state |= DEPCOMPLETE0x0008;
1453 adp->ad_buf = NULL((void *)0);
1454 } else {
1455 bmsafemap = newblk->nb_bmsafemap;
1456 adp->ad_buf = bmsafemap->sm_buf;
1457 LIST_REMOVE(newblk, nb_deps)do { if ((newblk)->nb_deps.le_next != ((void *)0)) (newblk
)->nb_deps.le_next->nb_deps.le_prev = (newblk)->nb_deps
.le_prev; *(newblk)->nb_deps.le_prev = (newblk)->nb_deps
.le_next; ((newblk)->nb_deps.le_prev) = ((void *)-1); ((newblk
)->nb_deps.le_next) = ((void *)-1); } while (0)
;
1458 LIST_INSERT_HEAD(&bmsafemap->sm_allocdirecthd, adp, ad_deps)do { if (((adp)->ad_deps.le_next = (&bmsafemap->sm_allocdirecthd
)->lh_first) != ((void *)0)) (&bmsafemap->sm_allocdirecthd
)->lh_first->ad_deps.le_prev = &(adp)->ad_deps.le_next
; (&bmsafemap->sm_allocdirecthd)->lh_first = (adp);
(adp)->ad_deps.le_prev = &(&bmsafemap->sm_allocdirecthd
)->lh_first; } while (0)
;
1459 }
1460 LIST_REMOVE(newblk, nb_hash)do { if ((newblk)->nb_hash.le_next != ((void *)0)) (newblk
)->nb_hash.le_next->nb_hash.le_prev = (newblk)->nb_hash
.le_prev; *(newblk)->nb_hash.le_prev = (newblk)->nb_hash
.le_next; ((newblk)->nb_hash.le_prev) = ((void *)-1); ((newblk
)->nb_hash.le_next) = ((void *)-1); } while (0)
;
1461 pool_put(&newblk_pool, newblk);
1462
1463 if (bp == NULL((void *)0)) {
1464 /*
1465 * XXXUBC - Yes, I know how to fix this, but not right now.
1466 */
1467 panic("softdep_setup_allocdirect: Bonk art in the head");
1468 }
1469 WORKLIST_INSERT(&bp->b_dep, &adp->ad_list)do { (&adp->ad_list)->wk_state |= 0x8000; do { if (
((&adp->ad_list)->wk_list.le_next = (&bp->b_dep
)->lh_first) != ((void *)0)) (&bp->b_dep)->lh_first
->wk_list.le_prev = &(&adp->ad_list)->wk_list
.le_next; (&bp->b_dep)->lh_first = (&adp->ad_list
); (&adp->ad_list)->wk_list.le_prev = &(&bp
->b_dep)->lh_first; } while (0); } while (0)
;
1470 if (lbn >= NDADDR12) {
1471 /* allocating an indirect block */
1472 if (oldblkno != 0) {
1473 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
1474 panic("softdep_setup_allocdirect: non-zero indir");
1475 }
1476 } else {
1477 /*
1478 * Allocating a direct block.
1479 *
1480 * If we are allocating a directory block, then we must
1481 * allocate an associated pagedep to track additions and
1482 * deletions.
1483 */
1484 if ((DIP(ip, mode)(((ip)->i_ump->um_fstype == 1) ? (ip)->dinode_u.ffs1_din
->di_mode : (ip)->dinode_u.ffs2_din->di_mode)
& IFMT0170000) == IFDIR0040000 &&
1485 pagedep_lookup(ip, lbn, DEPALLOC0x0001, &pagedep) == 0)
1486 WORKLIST_INSERT(&bp->b_dep, &pagedep->pd_list)do { (&pagedep->pd_list)->wk_state |= 0x8000; do { if
(((&pagedep->pd_list)->wk_list.le_next = (&bp->
b_dep)->lh_first) != ((void *)0)) (&bp->b_dep)->
lh_first->wk_list.le_prev = &(&pagedep->pd_list
)->wk_list.le_next; (&bp->b_dep)->lh_first = (&
pagedep->pd_list); (&pagedep->pd_list)->wk_list.
le_prev = &(&bp->b_dep)->lh_first; } while (0);
} while (0)
;
1487 }
1488 /*
1489 * The list of allocdirects must be kept in sorted and ascending
1490 * order so that the rollback routines can quickly determine the
1491 * first uncommitted block (the size of the file stored on disk
1492 * ends at the end of the lowest committed fragment, or if there
1493 * are no fragments, at the end of the highest committed block).
1494 * Since files generally grow, the typical case is that the new
1495 * block is to be added at the end of the list. We speed this
1496 * special case by checking against the last allocdirect in the
1497 * list before laboriously traversing the list looking for the
1498 * insertion point.
1499 */
1500 adphead = &inodedep->id_newinoupdt;
1501 oldadp = TAILQ_LAST(adphead, allocdirectlst)(*(((struct allocdirectlst *)((adphead)->tqh_last))->tqh_last
))
;
1502 if (oldadp == NULL((void *)0) || oldadp->ad_lbn <= lbn) {
1503 /* insert at end of list */
1504 TAILQ_INSERT_TAIL(adphead, adp, ad_next)do { (adp)->ad_next.tqe_next = ((void *)0); (adp)->ad_next
.tqe_prev = (adphead)->tqh_last; *(adphead)->tqh_last =
(adp); (adphead)->tqh_last = &(adp)->ad_next.tqe_next
; } while (0)
;
1505 if (oldadp != NULL((void *)0) && oldadp->ad_lbn == lbn)
1506 allocdirect_merge(adphead, adp, oldadp);
1507 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
1508 return;
1509 }
1510 TAILQ_FOREACH(oldadp, adphead, ad_next)for((oldadp) = ((adphead)->tqh_first); (oldadp) != ((void *
)0); (oldadp) = ((oldadp)->ad_next.tqe_next))
{
1511 if (oldadp->ad_lbn >= lbn)
1512 break;
1513 }
1514 if (oldadp == NULL((void *)0)) {
1515 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
1516 panic("softdep_setup_allocdirect: lost entry");
1517 }
1518 /* insert in middle of list */
1519 TAILQ_INSERT_BEFORE(oldadp, adp, ad_next)do { (adp)->ad_next.tqe_prev = (oldadp)->ad_next.tqe_prev
; (adp)->ad_next.tqe_next = (oldadp); *(oldadp)->ad_next
.tqe_prev = (adp); (oldadp)->ad_next.tqe_prev = &(adp)
->ad_next.tqe_next; } while (0)
;
1520 if (oldadp->ad_lbn == lbn)
1521 allocdirect_merge(adphead, adp, oldadp);
1522 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
1523}
1524
1525/*
1526 * Replace an old allocdirect dependency with a newer one.
1527 * This routine must be called with splbio interrupts blocked.
1528 */
1529/* head of list holding allocdirects */
1530/* allocdirect being added */
1531/* existing allocdirect being checked */
1532STATIC void
1533allocdirect_merge(struct allocdirectlst *adphead, struct allocdirect *newadp,
1534 struct allocdirect *oldadp)
1535{
1536 struct worklist *wk;
1537 struct freefrag *freefrag;
1538 struct newdirblk *newdirblk;
1539
1540 splassert(IPL_BIO)do { if (splassert_ctl > 0) { splassert_check(0x3, __func__
); } } while (0)
;
1541
1542#ifdef DEBUG
1543 if (lk.lkt_held == -1)
1544 panic("allocdirect_merge: lock not held");
1545#endif
1546 if (newadp->ad_oldblkno != oldadp->ad_newblkno ||
1547 newadp->ad_oldsize != oldadp->ad_newsize ||
1548 newadp->ad_lbn >= NDADDR12) {
1549 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
1550 panic("allocdirect_merge: old %lld != new %lld || lbn %lld >= "
1551 "%d", (long long)newadp->ad_oldblkno,
1552 (long long)oldadp->ad_newblkno, (long long)newadp->ad_lbn,
1553 NDADDR12);
1554 }
1555 newadp->ad_oldblkno = oldadp->ad_oldblkno;
1556 newadp->ad_oldsize = oldadp->ad_oldsize;
1557 /*
1558 * If the old dependency had a fragment to free or had never
1559 * previously had a block allocated, then the new dependency
1560 * can immediately post its freefrag and adopt the old freefrag.
1561 * This action is done by swapping the freefrag dependencies.
1562 * The new dependency gains the old one's freefrag, and the
1563 * old one gets the new one and then immediately puts it on
1564 * the worklist when it is freed by free_allocdirect. It is
1565 * not possible to do this swap when the old dependency had a
1566 * non-zero size but no previous fragment to free. This condition
1567 * arises when the new block is an extension of the old block.
1568 * Here, the first part of the fragment allocated to the new
1569 * dependency is part of the block currently claimed on disk by
1570 * the old dependency, so cannot legitimately be freed until the
1571 * conditions for the new dependency are fulfilled.
1572 */
1573 if (oldadp->ad_freefrag != NULL((void *)0) || oldadp->ad_oldblkno == 0) {
1574 freefrag = newadp->ad_freefrag;
1575 newadp->ad_freefrag = oldadp->ad_freefrag;
1576 oldadp->ad_freefrag = freefrag;
1577 }
1578 /*
1579 * If we are tracking a new directory-block allocation,
1580 * move it from the old allocdirect to the new allocdirect.
1581 */
1582 if ((wk = LIST_FIRST(&oldadp->ad_newdirblk)((&oldadp->ad_newdirblk)->lh_first)) != NULL((void *)0)) {
1583 newdirblk = WK_NEWDIRBLK(wk)((struct newdirblk *)(wk));
1584 WORKLIST_REMOVE(&newdirblk->db_list)do { (&newdirblk->db_list)->wk_state &= ~0x8000
; do { if ((&newdirblk->db_list)->wk_list.le_next !=
((void *)0)) (&newdirblk->db_list)->wk_list.le_next
->wk_list.le_prev = (&newdirblk->db_list)->wk_list
.le_prev; *(&newdirblk->db_list)->wk_list.le_prev =
(&newdirblk->db_list)->wk_list.le_next; ((&newdirblk
->db_list)->wk_list.le_prev) = ((void *)-1); ((&newdirblk
->db_list)->wk_list.le_next) = ((void *)-1); } while (0
); } while (0)
;
1585 if (LIST_FIRST(&oldadp->ad_newdirblk)((&oldadp->ad_newdirblk)->lh_first) != NULL((void *)0))
1586 panic("allocdirect_merge: extra newdirblk");
1587 WORKLIST_INSERT(&newadp->ad_newdirblk, &newdirblk->db_list)do { (&newdirblk->db_list)->wk_state |= 0x8000; do {
if (((&newdirblk->db_list)->wk_list.le_next = (&
newadp->ad_newdirblk)->lh_first) != ((void *)0)) (&
newadp->ad_newdirblk)->lh_first->wk_list.le_prev = &
(&newdirblk->db_list)->wk_list.le_next; (&newadp
->ad_newdirblk)->lh_first = (&newdirblk->db_list
); (&newdirblk->db_list)->wk_list.le_prev = &(&
newadp->ad_newdirblk)->lh_first; } while (0); } while (
0)
;
1588 }
1589 free_allocdirect(adphead, oldadp, 0);
1590}
1591
1592/*
1593 * Allocate a new freefrag structure if needed.
1594 */
1595STATIC struct freefrag *
1596newfreefrag(struct inode *ip, daddr_t blkno, long size)
1597{
1598 struct freefrag *freefrag;
1599 struct fs *fs;
1600
1601 if (blkno == 0)
1602 return (NULL((void *)0));
1603 fs = ip->i_fsinode_u.fs;
1604 if (fragnum(fs, blkno)((blkno) & ((fs)->fs_frag - 1)) + numfrags(fs, size)((size) >> (fs)->fs_fshift) > fs->fs_frag)
1605 panic("newfreefrag: frag size");
1606 freefrag = pool_get(&freefrag_pool, PR_WAITOK0x0001);
1607 freefrag->ff_list.wk_type = D_FREEFRAG7;
1608 freefrag->ff_stateff_list.wk_state = DIP(ip, uid)(((ip)->i_ump->um_fstype == 1) ? (ip)->dinode_u.ffs1_din
->di_uid : (ip)->dinode_u.ffs2_din->di_uid)
& ~ONWORKLIST0x8000; /* used below */
1609 freefrag->ff_inum = ip->i_number;
1610 freefrag->ff_mnt = ITOV(ip)((ip)->i_vnode)->v_mount;
1611 freefrag->ff_devvp = ip->i_devvpi_ump->um_devvp;
1612 freefrag->ff_blkno = blkno;
1613 freefrag->ff_fragsize = size;
1614 return (freefrag);
1615}
1616
1617/*
1618 * This workitem de-allocates fragments that were replaced during
1619 * file block allocation.
1620 */
1621STATIC void
1622handle_workitem_freefrag(struct freefrag *freefrag)
1623{
1624 struct inode tip;
1625 struct ufs1_dinode dtip1;
1626
1627 tip.i_vnode = NULL((void *)0);
1628 tip.i_din1dinode_u.ffs1_din = &dtip1;
1629 tip.i_fsinode_u.fs = VFSTOUFS(freefrag->ff_mnt)((struct ufsmount *)((freefrag->ff_mnt)->mnt_data))->um_fsufsmount_u.fs;
1630 tip.i_ump = VFSTOUFS(freefrag->ff_mnt)((struct ufsmount *)((freefrag->ff_mnt)->mnt_data));
1631 tip.i_dev = freefrag->ff_devvp->v_rdevv_un.vu_specinfo->si_rdev;
1632 tip.i_number = freefrag->ff_inum;
1633 tip.i_ffs1_uiddinode_u.ffs1_din->di_uid = freefrag->ff_stateff_list.wk_state & ~ONWORKLIST0x8000; /* set above */
1634 ffs_blkfree(&tip, freefrag->ff_blkno, freefrag->ff_fragsize);
1635 pool_put(&freefrag_pool, freefrag);
1636}
1637
1638/*
1639 * Indirect block allocation dependencies.
1640 *
1641 * The same dependencies that exist for a direct block also exist when
1642 * a new block is allocated and pointed to by an entry in a block of
1643 * indirect pointers. The undo/redo states described above are also
1644 * used here. Because an indirect block contains many pointers that
1645 * may have dependencies, a second copy of the entire in-memory indirect
1646 * block is kept. The buffer cache copy is always completely up-to-date.
1647 * The second copy, which is used only as a source for disk writes,
1648 * contains only the safe pointers (i.e., those that have no remaining
1649 * update dependencies). The second copy is freed when all pointers
1650 * are safe. The cache is not allowed to replace indirect blocks with
1651 * pending update dependencies. If a buffer containing an indirect
1652 * block with dependencies is written, these routines will mark it
1653 * dirty again. It can only be successfully written once all the
1654 * dependencies are removed. The ffs_fsync routine in conjunction with
1655 * softdep_sync_metadata work together to get all the dependencies
1656 * removed so that a file can be successfully written to disk. Three
1657 * procedures are used when setting up indirect block pointer
1658 * dependencies. The division is necessary because of the organization
1659 * of the "balloc" routine and because of the distinction between file
1660 * pages and file metadata blocks.
1661 */
1662
1663/*
1664 * Allocate a new allocindir structure.
1665 */
1666/* inode for file being extended */
1667/* offset of pointer in indirect block */
1668/* disk block number being added */
1669/* previous block number, 0 if none */
1670STATIC struct allocindir *
1671newallocindir(struct inode *ip, int ptrno, daddr_t newblkno,
1672 daddr_t oldblkno)
1673{
1674 struct allocindir *aip;
1675
1676 aip = pool_get(&allocindir_pool, PR_WAITOK0x0001 | PR_ZERO0x0008);
1677 aip->ai_list.wk_type = D_ALLOCINDIR6;
1678 aip->ai_stateai_list.wk_state = ATTACHED0x0001;
1679 aip->ai_offset = ptrno;
1680 aip->ai_newblkno = newblkno;
1681 aip->ai_oldblkno = oldblkno;
1682 aip->ai_freefrag = newfreefrag(ip, oldblkno, ip->i_fsinode_u.fs->fs_bsize);
1683 return (aip);
1684}
1685
1686/*
1687 * Called just before setting an indirect block pointer
1688 * to a newly allocated file page.
1689 */
1690/* inode for file being extended */
1691/* allocated block number within file */
1692/* buffer with indirect blk referencing page */
1693/* offset of pointer in indirect block */
1694/* disk block number being added */
1695/* previous block number, 0 if none */
1696/* buffer holding allocated page */
1697void
1698softdep_setup_allocindir_page(struct inode *ip, daddr_t lbn, struct buf *bp,
1699 int ptrno, daddr_t newblkno, daddr_t oldblkno, struct buf *nbp)
1700{
1701 struct allocindir *aip;
1702 struct pagedep *pagedep;
1703
1704 aip = newallocindir(ip, ptrno, newblkno, oldblkno);
1705 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x3);
1706 /*
1707 * If we are allocating a directory page, then we must
1708 * allocate an associated pagedep to track additions and
1709 * deletions.
1710 */
1711 if ((DIP(ip, mode)(((ip)->i_ump->um_fstype == 1) ? (ip)->dinode_u.ffs1_din
->di_mode : (ip)->dinode_u.ffs2_din->di_mode)
& IFMT0170000) == IFDIR0040000 &&
1712 pagedep_lookup(ip, lbn, DEPALLOC0x0001, &pagedep) == 0)
1713 WORKLIST_INSERT(&nbp->b_dep, &pagedep->pd_list)do { (&pagedep->pd_list)->wk_state |= 0x8000; do { if
(((&pagedep->pd_list)->wk_list.le_next = (&nbp
->b_dep)->lh_first) != ((void *)0)) (&nbp->b_dep
)->lh_first->wk_list.le_prev = &(&pagedep->pd_list
)->wk_list.le_next; (&nbp->b_dep)->lh_first = (&
pagedep->pd_list); (&pagedep->pd_list)->wk_list.
le_prev = &(&nbp->b_dep)->lh_first; } while (0)
; } while (0)
;
1714 if (nbp == NULL((void *)0)) {
1715 /*
1716 * XXXUBC - Yes, I know how to fix this, but not right now.
1717 */
1718 panic("softdep_setup_allocindir_page: Bonk art in the head");
1719 }
1720 WORKLIST_INSERT(&nbp->b_dep, &aip->ai_list)do { (&aip->ai_list)->wk_state |= 0x8000; do { if (
((&aip->ai_list)->wk_list.le_next = (&nbp->b_dep
)->lh_first) != ((void *)0)) (&nbp->b_dep)->lh_first
->wk_list.le_prev = &(&aip->ai_list)->wk_list
.le_next; (&nbp->b_dep)->lh_first = (&aip->ai_list
); (&aip->ai_list)->wk_list.le_prev = &(&nbp
->b_dep)->lh_first; } while (0); } while (0)
;
1721 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
1722 setup_allocindir_phase2(bp, ip, aip);
1723}
1724
1725/*
1726 * Called just before setting an indirect block pointer to a
1727 * newly allocated indirect block.
1728 */
1729/* newly allocated indirect block */
1730/* inode for file being extended */
1731/* indirect block referencing allocated block */
1732/* offset of pointer in indirect block */
1733/* disk block number being added */
1734void
1735softdep_setup_allocindir_meta(struct buf *nbp, struct inode *ip,
1736 struct buf *bp, int ptrno, daddr_t newblkno)
1737{
1738 struct allocindir *aip;
1739
1740 aip = newallocindir(ip, ptrno, newblkno, 0);
1741 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x3);
1742 WORKLIST_INSERT(&nbp->b_dep, &aip->ai_list)do { (&aip->ai_list)->wk_state |= 0x8000; do { if (
((&aip->ai_list)->wk_list.le_next = (&nbp->b_dep
)->lh_first) != ((void *)0)) (&nbp->b_dep)->lh_first
->wk_list.le_prev = &(&aip->ai_list)->wk_list
.le_next; (&nbp->b_dep)->lh_first = (&aip->ai_list
); (&aip->ai_list)->wk_list.le_prev = &(&nbp
->b_dep)->lh_first; } while (0); } while (0)
;
1743 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
1744 setup_allocindir_phase2(bp, ip, aip);
1745}
1746
1747/*
1748 * Called to finish the allocation of the "aip" allocated
1749 * by one of the two routines above.
1750 */
1751/* in-memory copy of the indirect block */
1752/* inode for file being extended */
1753/* allocindir allocated by the above routines */
1754STATIC void
1755setup_allocindir_phase2(struct buf *bp, struct inode *ip,
1756 struct allocindir *aip)
1757{
1758 struct worklist *wk;
1759 struct indirdep *indirdep, *newindirdep;
1760 struct bmsafemap *bmsafemap;
1761 struct allocindir *oldaip;
1762 struct freefrag *freefrag;
1763 struct newblk *newblk;
1764
1765 if (bp->b_lblkno >= 0)
1766 panic("setup_allocindir_phase2: not indir blk");
1767 for (indirdep = NULL((void *)0), newindirdep = NULL((void *)0); ; ) {
1768 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x3);
1769 LIST_FOREACH(wk, &bp->b_dep, wk_list)for((wk) = ((&bp->b_dep)->lh_first); (wk)!= ((void *
)0); (wk) = ((wk)->wk_list.le_next))
{
1770 if (wk->wk_type != D_INDIRDEP5)
1771 continue;
1772 indirdep = WK_INDIRDEP(wk)((struct indirdep *)(wk));
1773 break;
1774 }
1775 if (indirdep == NULL((void *)0) && newindirdep) {
1776 indirdep = newindirdep;
1777 WORKLIST_INSERT(&bp->b_dep, &indirdep->ir_list)do { (&indirdep->ir_list)->wk_state |= 0x8000; do {
if (((&indirdep->ir_list)->wk_list.le_next = (&
bp->b_dep)->lh_first) != ((void *)0)) (&bp->b_dep
)->lh_first->wk_list.le_prev = &(&indirdep->
ir_list)->wk_list.le_next; (&bp->b_dep)->lh_first
= (&indirdep->ir_list); (&indirdep->ir_list)->
wk_list.le_prev = &(&bp->b_dep)->lh_first; } while
(0); } while (0)
;
1778 newindirdep = NULL((void *)0);
1779 }
1780 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
1781 if (indirdep) {
1782 if (newblk_lookup(ip->i_fsinode_u.fs, aip->ai_newblkno, 0,
1783 &newblk) == 0)
1784 panic("setup_allocindir: lost block");
1785 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x3);
1786 if (newblk->nb_state == DEPCOMPLETE0x0008) {
1787 aip->ai_stateai_list.wk_state |= DEPCOMPLETE0x0008;
1788 aip->ai_buf = NULL((void *)0);
1789 } else {
1790 bmsafemap = newblk->nb_bmsafemap;
1791 aip->ai_buf = bmsafemap->sm_buf;
1792 LIST_REMOVE(newblk, nb_deps)do { if ((newblk)->nb_deps.le_next != ((void *)0)) (newblk
)->nb_deps.le_next->nb_deps.le_prev = (newblk)->nb_deps
.le_prev; *(newblk)->nb_deps.le_prev = (newblk)->nb_deps
.le_next; ((newblk)->nb_deps.le_prev) = ((void *)-1); ((newblk
)->nb_deps.le_next) = ((void *)-1); } while (0)
;
1793 LIST_INSERT_HEAD(&bmsafemap->sm_allocindirhd,do { if (((aip)->ai_deps.le_next = (&bmsafemap->sm_allocindirhd
)->lh_first) != ((void *)0)) (&bmsafemap->sm_allocindirhd
)->lh_first->ai_deps.le_prev = &(aip)->ai_deps.le_next
; (&bmsafemap->sm_allocindirhd)->lh_first = (aip); (
aip)->ai_deps.le_prev = &(&bmsafemap->sm_allocindirhd
)->lh_first; } while (0)
1794 aip, ai_deps)do { if (((aip)->ai_deps.le_next = (&bmsafemap->sm_allocindirhd
)->lh_first) != ((void *)0)) (&bmsafemap->sm_allocindirhd
)->lh_first->ai_deps.le_prev = &(aip)->ai_deps.le_next
; (&bmsafemap->sm_allocindirhd)->lh_first = (aip); (
aip)->ai_deps.le_prev = &(&bmsafemap->sm_allocindirhd
)->lh_first; } while (0)
;
1795 }
1796 LIST_REMOVE(newblk, nb_hash)do { if ((newblk)->nb_hash.le_next != ((void *)0)) (newblk
)->nb_hash.le_next->nb_hash.le_prev = (newblk)->nb_hash
.le_prev; *(newblk)->nb_hash.le_prev = (newblk)->nb_hash
.le_next; ((newblk)->nb_hash.le_prev) = ((void *)-1); ((newblk
)->nb_hash.le_next) = ((void *)-1); } while (0)
;
1797 pool_put(&newblk_pool, newblk);
1798 aip->ai_indirdep = indirdep;
1799 /*
1800 * Check to see if there is an existing dependency
1801 * for this block. If there is, merge the old
1802 * dependency into the new one.
1803 */
1804 if (aip->ai_oldblkno == 0)
1805 oldaip = NULL((void *)0);
1806 else
1807
1808 LIST_FOREACH(oldaip, &indirdep->ir_deplisthd, ai_next)for((oldaip) = ((&indirdep->ir_deplisthd)->lh_first
); (oldaip)!= ((void *)0); (oldaip) = ((oldaip)->ai_next.le_next
))
1809 if (oldaip->ai_offset == aip->ai_offset)
1810 break;
1811 freefrag = NULL((void *)0);
1812 if (oldaip != NULL((void *)0)) {
1813 if (oldaip->ai_newblkno != aip->ai_oldblkno) {
1814 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
1815 panic("setup_allocindir_phase2: blkno");
1816 }
1817 aip->ai_oldblkno = oldaip->ai_oldblkno;
1818 freefrag = aip->ai_freefrag;
1819 aip->ai_freefrag = oldaip->ai_freefrag;
1820 oldaip->ai_freefrag = NULL((void *)0);
1821 free_allocindir(oldaip, NULL((void *)0));
1822 }
1823 LIST_INSERT_HEAD(&indirdep->ir_deplisthd, aip, ai_next)do { if (((aip)->ai_next.le_next = (&indirdep->ir_deplisthd
)->lh_first) != ((void *)0)) (&indirdep->ir_deplisthd
)->lh_first->ai_next.le_prev = &(aip)->ai_next.le_next
; (&indirdep->ir_deplisthd)->lh_first = (aip); (aip
)->ai_next.le_prev = &(&indirdep->ir_deplisthd)
->lh_first; } while (0)
;
1824 if (ip->i_ump->um_fstype == UM_UFS11)
1825 ((int32_t *)indirdep->ir_savebp->b_data)
1826 [aip->ai_offset] = aip->ai_oldblkno;
1827 else
1828 ((int64_t *)indirdep->ir_savebp->b_data)
1829 [aip->ai_offset] = aip->ai_oldblkno;
1830 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
1831 if (freefrag != NULL((void *)0))
1832 handle_workitem_freefrag(freefrag);
1833 }
1834 if (newindirdep) {
1835 if (indirdep->ir_savebp != NULL((void *)0))
1836 brelse(newindirdep->ir_savebp);
1837 WORKITEM_FREE(newindirdep, D_INDIRDEP)softdep_freequeue_add((struct worklist *)newindirdep);
1838 }
1839 if (indirdep)
1840 break;
1841 newindirdep = pool_get(&indirdep_pool, PR_WAITOK0x0001);
1842 newindirdep->ir_list.wk_type = D_INDIRDEP5;
1843 newindirdep->ir_stateir_list.wk_state = ATTACHED0x0001;
1844 if (ip->i_ump->um_fstype == UM_UFS11)
1845 newindirdep->ir_stateir_list.wk_state |= UFS1FMT0x2000;
1846 LIST_INIT(&newindirdep->ir_deplisthd)do { ((&newindirdep->ir_deplisthd)->lh_first) = ((void
*)0); } while (0)
;
1847 LIST_INIT(&newindirdep->ir_donehd)do { ((&newindirdep->ir_donehd)->lh_first) = ((void
*)0); } while (0)
;
1848 if (bp->b_blkno == bp->b_lblkno) {
1849 VOP_BMAP(bp->b_vp, bp->b_lblkno, NULL((void *)0), &bp->b_blkno,
1850 NULL((void *)0));
1851 }
1852 newindirdep->ir_savebp =
1853 getblk(ip->i_devvpi_ump->um_devvp, bp->b_blkno, bp->b_bcount, 0, INFSLP0xffffffffffffffffULL);
1854#if 0
1855 BUF_KERNPROC(newindirdep->ir_savebp);
1856#endif
1857 memcpy(newindirdep->ir_savebp->b_data, bp->b_data, bp->b_bcount)__builtin_memcpy((newindirdep->ir_savebp->b_data), (bp->
b_data), (bp->b_bcount))
;
1858 }
1859}
1860
1861/*
1862 * Block de-allocation dependencies.
1863 *
1864 * When blocks are de-allocated, the on-disk pointers must be nullified before
1865 * the blocks are made available for use by other files. (The true
1866 * requirement is that old pointers must be nullified before new on-disk
1867 * pointers are set. We chose this slightly more stringent requirement to
1868 * reduce complexity.) Our implementation handles this dependency by updating
1869 * the inode (or indirect block) appropriately but delaying the actual block
1870 * de-allocation (i.e., freemap and free space count manipulation) until
1871 * after the updated versions reach stable storage. After the disk is
1872 * updated, the blocks can be safely de-allocated whenever it is convenient.
1873 * This implementation handles only the common case of reducing a file's
1874 * length to zero. Other cases are handled by the conventional synchronous
1875 * write approach.
1876 *
1877 * The ffs implementation with which we worked double-checks
1878 * the state of the block pointers and file size as it reduces
1879 * a file's length. Some of this code is replicated here in our
1880 * soft updates implementation. The freeblks->fb_chkcnt field is
1881 * used to transfer a part of this information to the procedure
1882 * that eventually de-allocates the blocks.
1883 *
1884 * This routine should be called from the routine that shortens
1885 * a file's length, before the inode's size or block pointers
1886 * are modified. It will save the block pointer information for
1887 * later release and zero the inode so that the calling routine
1888 * can release it.
1889 */
1890/* The inode whose length is to be reduced */
1891/* The new length for the file */
1892void
1893softdep_setup_freeblocks(struct inode *ip, off_t length)
1894{
1895 struct freeblks *freeblks;
1896 struct inodedep *inodedep;
1897 struct allocdirect *adp;
1898 struct vnode *vp;
1899 struct buf *bp;
1900 struct fs *fs;
1901 int i, delay, error;
1902
1903 fs = ip->i_fsinode_u.fs;
1904 if (length != 0)
1905 panic("softdep_setup_freeblocks: non-zero length");
1906 freeblks = pool_get(&freeblks_pool, PR_WAITOK0x0001 | PR_ZERO0x0008);
1907 freeblks->fb_list.wk_type = D_FREEBLKS8;
1908 freeblks->fb_statefb_list.wk_state = ATTACHED0x0001;
1909 freeblks->fb_uid = DIP(ip, uid)(((ip)->i_ump->um_fstype == 1) ? (ip)->dinode_u.ffs1_din
->di_uid : (ip)->dinode_u.ffs2_din->di_uid)
;
1910 freeblks->fb_previousinum = ip->i_number;
1911 freeblks->fb_devvp = ip->i_devvpi_ump->um_devvp;
1912 freeblks->fb_mnt = ITOV(ip)((ip)->i_vnode)->v_mount;
1913 freeblks->fb_oldsize = DIP(ip, size)(((ip)->i_ump->um_fstype == 1) ? (ip)->dinode_u.ffs1_din
->di_size : (ip)->dinode_u.ffs2_din->di_size)
;
1914 freeblks->fb_newsize = length;
1915 freeblks->fb_chkcnt = DIP(ip, blocks)(((ip)->i_ump->um_fstype == 1) ? (ip)->dinode_u.ffs1_din
->di_blocks : (ip)->dinode_u.ffs2_din->di_blocks)
;
1916
1917 for (i = 0; i < NDADDR12; i++) {
1918 freeblks->fb_dblks[i] = DIP(ip, db[i])(((ip)->i_ump->um_fstype == 1) ? (ip)->dinode_u.ffs1_din
->di_db[i] : (ip)->dinode_u.ffs2_din->di_db[i])
;
1919 DIP_ASSIGN(ip, db[i], 0)do { if ((ip)->i_ump->um_fstype == 1) (ip)->dinode_u
.ffs1_din->di_db[i] = (0); else (ip)->dinode_u.ffs2_din
->di_db[i] = (0); } while (0)
;
1920 }
1921
1922 for (i = 0; i < NIADDR3; i++) {
1923 freeblks->fb_iblks[i] = DIP(ip, ib[i])(((ip)->i_ump->um_fstype == 1) ? (ip)->dinode_u.ffs1_din
->di_ib[i] : (ip)->dinode_u.ffs2_din->di_ib[i])
;
1924 DIP_ASSIGN(ip, ib[i], 0)do { if ((ip)->i_ump->um_fstype == 1) (ip)->dinode_u
.ffs1_din->di_ib[i] = (0); else (ip)->dinode_u.ffs2_din
->di_ib[i] = (0); } while (0)
;
1925 }
1926
1927 DIP_ASSIGN(ip, blocks, 0)do { if ((ip)->i_ump->um_fstype == 1) (ip)->dinode_u
.ffs1_din->di_blocks = (0); else (ip)->dinode_u.ffs2_din
->di_blocks = (0); } while (0)
;
1928 DIP_ASSIGN(ip, size, 0)do { if ((ip)->i_ump->um_fstype == 1) (ip)->dinode_u
.ffs1_din->di_size = (0); else (ip)->dinode_u.ffs2_din->
di_size = (0); } while (0)
;
1929
1930 /*
1931 * Push the zero'ed inode to its disk buffer so that we are free
1932 * to delete its dependencies below. Once the dependencies are gone
1933 * the buffer can be safely released.
1934 */
1935 if ((error = bread(ip->i_devvpi_ump->um_devvp,
1936 fsbtodb(fs, ino_to_fsba(fs, ip->i_number))((((daddr_t)(((((daddr_t)(fs)->fs_fpg * (((ip->i_number
) / (fs)->fs_ipg))) + (fs)->fs_cgoffset * ((((ip->i_number
) / (fs)->fs_ipg)) & ~((fs)->fs_cgmask))) + (fs)->
fs_iblkno) + ((((((ip->i_number) % (fs)->fs_ipg) / ((fs
)->fs_inopb))) << ((fs))->fs_fragshift))))) <<
(fs)->fs_fsbtodb)
,
1937 (int)fs->fs_bsize, &bp)) != 0)
1938 softdep_error("softdep_setup_freeblocks", error);
1939
1940 if (ip->i_ump->um_fstype == UM_UFS11)
1941 *((struct ufs1_dinode *) bp->b_data +
1942 ino_to_fsbo(fs, ip->i_number)((ip->i_number) % ((fs)->fs_inopb))) = *ip->i_din1dinode_u.ffs1_din;
1943 else
1944 *((struct ufs2_dinode *) bp->b_data +
1945 ino_to_fsbo(fs, ip->i_number)((ip->i_number) % ((fs)->fs_inopb))) = *ip->i_din2dinode_u.ffs2_din;
1946
1947 /*
1948 * Find and eliminate any inode dependencies.
1949 */
1950 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x3);
1951 (void) inodedep_lookup(fs, ip->i_number, DEPALLOC0x0001, &inodedep);
1952 if ((inodedep->id_stateid_list.wk_state & IOSTARTED0x0200) != 0) {
1953 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
1954 panic("softdep_setup_freeblocks: inode busy");
1955 }
1956 /*
1957 * Add the freeblks structure to the list of operations that
1958 * must await the zero'ed inode being written to disk. If we
1959 * still have a bitmap dependency (delay == 0), then the inode
1960 * has never been written to disk, so we can process the
1961 * freeblks below once we have deleted the dependencies.
1962 */
1963 delay = (inodedep->id_stateid_list.wk_state & DEPCOMPLETE0x0008);
1964 if (delay)
1965 WORKLIST_INSERT(&inodedep->id_bufwait, &freeblks->fb_list)do { (&freeblks->fb_list)->wk_state |= 0x8000; do {
if (((&freeblks->fb_list)->wk_list.le_next = (&
inodedep->id_bufwait)->lh_first) != ((void *)0)) (&
inodedep->id_bufwait)->lh_first->wk_list.le_prev = &
(&freeblks->fb_list)->wk_list.le_next; (&inodedep
->id_bufwait)->lh_first = (&freeblks->fb_list); (
&freeblks->fb_list)->wk_list.le_prev = &(&inodedep
->id_bufwait)->lh_first; } while (0); } while (0)
;
1966 /*
1967 * Because the file length has been truncated to zero, any
1968 * pending block allocation dependency structures associated
1969 * with this inode are obsolete and can simply be de-allocated.
1970 * We must first merge the two dependency lists to get rid of
1971 * any duplicate freefrag structures, then purge the merged list.
1972 * If we still have a bitmap dependency, then the inode has never
1973 * been written to disk, so we can free any fragments without delay.
1974 */
1975 merge_inode_lists(inodedep);
1976 while ((adp = TAILQ_FIRST(&inodedep->id_inoupdt)((&inodedep->id_inoupdt)->tqh_first)) != NULL((void *)0))
1977 free_allocdirect(&inodedep->id_inoupdt, adp, delay);
1978 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
1979 bdwrite(bp);
1980 /*
1981 * We must wait for any I/O in progress to finish so that
1982 * all potential buffers on the dirty list will be visible.
1983 * Once they are all there, walk the list and get rid of
1984 * any dependencies.
1985 */
1986 vp = ITOV(ip)((ip)->i_vnode);
1987 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x3);
1988 drain_output(vp, 1);
1989 while ((bp = LIST_FIRST(&vp->v_dirtyblkhd)((&vp->v_dirtyblkhd)->lh_first))) {
1990 if (getdirtybuf(bp, MNT_WAIT1) <= 0)
1991 break;
1992 (void) inodedep_lookup(fs, ip->i_number, 0, &inodedep);
1993 deallocate_dependencies(bp, inodedep);
1994 bp->b_flags |= B_INVAL0x00000800 | B_NOCACHE0x00001000;
1995 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
1996 brelse(bp);
1997 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x3);
1998 }
1999 if (inodedep_lookup(fs, ip->i_number, 0, &inodedep) != 0)
2000 (void) free_inodedep(inodedep);
2001
2002 if (delay) {
2003 freeblks->fb_statefb_list.wk_state |= DEPCOMPLETE0x0008;
2004 /*
2005 * If the inode with zeroed block pointers is now on disk we
2006 * can start freeing blocks. Add freeblks to the worklist
2007 * instead of calling handle_workitem_freeblocks() directly as
2008 * it is more likely that additional IO is needed to complete
2009 * the request than in the !delay case.
2010 */
2011 if ((freeblks->fb_statefb_list.wk_state & ALLCOMPLETE(0x0001 | 0x0004 | 0x0008)) == ALLCOMPLETE(0x0001 | 0x0004 | 0x0008))
2012 add_to_worklist(&freeblks->fb_list);
2013 }
2014
2015 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
2016 /*
2017 * If the inode has never been written to disk (delay == 0),
2018 * then we can process the freeblks now that we have deleted
2019 * the dependencies.
2020 */
2021 if (!delay)
2022 handle_workitem_freeblocks(freeblks);
2023}
2024
2025/*
2026 * Reclaim any dependency structures from a buffer that is about to
2027 * be reallocated to a new vnode. The buffer must be locked, thus,
2028 * no I/O completion operations can occur while we are manipulating
2029 * its associated dependencies. The mutex is held so that other I/O's
2030 * associated with related dependencies do not occur.
2031 */
2032STATIC void
2033deallocate_dependencies(struct buf *bp, struct inodedep *inodedep)
2034{
2035 struct worklist *wk;
2036 struct indirdep *indirdep;
2037 struct allocindir *aip;
2038 struct pagedep *pagedep;
2039 struct dirrem *dirrem;
2040 struct diradd *dap;
2041 int i;
2042
2043 while ((wk = LIST_FIRST(&bp->b_dep)((&bp->b_dep)->lh_first)) != NULL((void *)0)) {
2044 switch (wk->wk_type) {
2045
2046 case D_INDIRDEP5:
2047 indirdep = WK_INDIRDEP(wk)((struct indirdep *)(wk));
2048 /*
2049 * None of the indirect pointers will ever be visible,
2050 * so they can simply be tossed. GOINGAWAY ensures
2051 * that allocated pointers will be saved in the buffer
2052 * cache until they are freed. Note that they will
2053 * only be able to be found by their physical address
2054 * since the inode mapping the logical address will
2055 * be gone. The save buffer used for the safe copy
2056 * was allocated in setup_allocindir_phase2 using
2057 * the physical address so it could be used for this
2058 * purpose. Hence we swap the safe copy with the real
2059 * copy, allowing the safe copy to be freed and holding
2060 * on to the real copy for later use in indir_trunc.
2061 */
2062 if (indirdep->ir_stateir_list.wk_state & GOINGAWAY0x0100) {
2063 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
2064 panic("deallocate_dependencies: already gone");
2065 }
2066 indirdep->ir_stateir_list.wk_state |= GOINGAWAY0x0100;
2067 while ((aip = LIST_FIRST(&indirdep->ir_deplisthd)((&indirdep->ir_deplisthd)->lh_first)))
2068 free_allocindir(aip, inodedep);
2069 if (bp->b_lblkno >= 0 ||
2070 bp->b_blkno != indirdep->ir_savebp->b_lblkno) {
2071 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
2072 panic("deallocate_dependencies: not indir");
2073 }
2074 memcpy(indirdep->ir_savebp->b_data, bp->b_data,__builtin_memcpy((indirdep->ir_savebp->b_data), (bp->
b_data), (bp->b_bcount))
2075 bp->b_bcount)__builtin_memcpy((indirdep->ir_savebp->b_data), (bp->
b_data), (bp->b_bcount))
;
2076 WORKLIST_REMOVE(wk)do { (wk)->wk_state &= ~0x8000; do { if ((wk)->wk_list
.le_next != ((void *)0)) (wk)->wk_list.le_next->wk_list
.le_prev = (wk)->wk_list.le_prev; *(wk)->wk_list.le_prev
= (wk)->wk_list.le_next; ((wk)->wk_list.le_prev) = ((void
*)-1); ((wk)->wk_list.le_next) = ((void *)-1); } while (0
); } while (0)
;
2077 WORKLIST_INSERT(&indirdep->ir_savebp->b_dep, wk)do { (wk)->wk_state |= 0x8000; do { if (((wk)->wk_list.
le_next = (&indirdep->ir_savebp->b_dep)->lh_first
) != ((void *)0)) (&indirdep->ir_savebp->b_dep)->
lh_first->wk_list.le_prev = &(wk)->wk_list.le_next;
(&indirdep->ir_savebp->b_dep)->lh_first = (wk);
(wk)->wk_list.le_prev = &(&indirdep->ir_savebp
->b_dep)->lh_first; } while (0); } while (0)
;
2078 continue;
2079
2080 case D_PAGEDEP0:
2081 pagedep = WK_PAGEDEP(wk)((struct pagedep *)(wk));
2082 /*
2083 * None of the directory additions will ever be
2084 * visible, so they can simply be tossed.
2085 */
2086 for (i = 0; i < DAHASHSZ6; i++)
2087 while ((dap =
2088 LIST_FIRST(&pagedep->pd_diraddhd[i])((&pagedep->pd_diraddhd[i])->lh_first)))
2089 free_diradd(dap);
2090 while ((dap = LIST_FIRST(&pagedep->pd_pendinghd)((&pagedep->pd_pendinghd)->lh_first)))
2091 free_diradd(dap);
2092 /*
2093 * Copy any directory remove dependencies to the list
2094 * to be processed after the zero'ed inode is written.
2095 * If the inode has already been written, then they
2096 * can be dumped directly onto the work list.
2097 */
2098 while ((dirrem = LIST_FIRST(&pagedep->pd_dirremhd)((&pagedep->pd_dirremhd)->lh_first))) {
2099 LIST_REMOVE(dirrem, dm_next)do { if ((dirrem)->dm_next.le_next != ((void *)0)) (dirrem
)->dm_next.le_next->dm_next.le_prev = (dirrem)->dm_next
.le_prev; *(dirrem)->dm_next.le_prev = (dirrem)->dm_next
.le_next; ((dirrem)->dm_next.le_prev) = ((void *)-1); ((dirrem
)->dm_next.le_next) = ((void *)-1); } while (0)
;
2100 dirrem->dm_dirinumdm_un.dmu_dirinum = pagedep->pd_ino;
2101 if (inodedep == NULL((void *)0) ||
2102 (inodedep->id_stateid_list.wk_state & ALLCOMPLETE(0x0001 | 0x0004 | 0x0008)) ==
2103 ALLCOMPLETE(0x0001 | 0x0004 | 0x0008))
2104 add_to_worklist(&dirrem->dm_list);
2105 else
2106 WORKLIST_INSERT(&inodedep->id_bufwait,do { (&dirrem->dm_list)->wk_state |= 0x8000; do { if
(((&dirrem->dm_list)->wk_list.le_next = (&inodedep
->id_bufwait)->lh_first) != ((void *)0)) (&inodedep
->id_bufwait)->lh_first->wk_list.le_prev = &(&
dirrem->dm_list)->wk_list.le_next; (&inodedep->id_bufwait
)->lh_first = (&dirrem->dm_list); (&dirrem->
dm_list)->wk_list.le_prev = &(&inodedep->id_bufwait
)->lh_first; } while (0); } while (0)
2107 &dirrem->dm_list)do { (&dirrem->dm_list)->wk_state |= 0x8000; do { if
(((&dirrem->dm_list)->wk_list.le_next = (&inodedep
->id_bufwait)->lh_first) != ((void *)0)) (&inodedep
->id_bufwait)->lh_first->wk_list.le_prev = &(&
dirrem->dm_list)->wk_list.le_next; (&inodedep->id_bufwait
)->lh_first = (&dirrem->dm_list); (&dirrem->
dm_list)->wk_list.le_prev = &(&inodedep->id_bufwait
)->lh_first; } while (0); } while (0)
;
2108 }
2109 if ((pagedep->pd_statepd_list.wk_state & NEWBLOCK0x0800) != 0) {
2110 LIST_FOREACH(wk, &inodedep->id_bufwait, wk_list)for((wk) = ((&inodedep->id_bufwait)->lh_first); (wk
)!= ((void *)0); (wk) = ((wk)->wk_list.le_next))
2111 if (wk->wk_type == D_NEWDIRBLK13 &&
2112 WK_NEWDIRBLK(wk)((struct newdirblk *)(wk))->db_pagedep ==
2113 pagedep)
2114 break;
2115 if (wk != NULL((void *)0)) {
2116 WORKLIST_REMOVE(wk)do { (wk)->wk_state &= ~0x8000; do { if ((wk)->wk_list
.le_next != ((void *)0)) (wk)->wk_list.le_next->wk_list
.le_prev = (wk)->wk_list.le_prev; *(wk)->wk_list.le_prev
= (wk)->wk_list.le_next; ((wk)->wk_list.le_prev) = ((void
*)-1); ((wk)->wk_list.le_next) = ((void *)-1); } while (0
); } while (0)
;
2117 free_newdirblk(WK_NEWDIRBLK(wk)((struct newdirblk *)(wk)));
2118 } else {
2119 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
2120 panic("deallocate_dependencies: "
2121 "lost pagedep");
2122 }
2123 }
2124 WORKLIST_REMOVE(&pagedep->pd_list)do { (&pagedep->pd_list)->wk_state &= ~0x8000; do
{ if ((&pagedep->pd_list)->wk_list.le_next != ((void
*)0)) (&pagedep->pd_list)->wk_list.le_next->wk_list
.le_prev = (&pagedep->pd_list)->wk_list.le_prev; *(
&pagedep->pd_list)->wk_list.le_prev = (&pagedep
->pd_list)->wk_list.le_next; ((&pagedep->pd_list
)->wk_list.le_prev) = ((void *)-1); ((&pagedep->pd_list
)->wk_list.le_next) = ((void *)-1); } while (0); } while (
0)
;
2125 LIST_REMOVE(pagedep, pd_hash)do { if ((pagedep)->pd_hash.le_next != ((void *)0)) (pagedep
)->pd_hash.le_next->pd_hash.le_prev = (pagedep)->pd_hash
.le_prev; *(pagedep)->pd_hash.le_prev = (pagedep)->pd_hash
.le_next; ((pagedep)->pd_hash.le_prev) = ((void *)-1); ((pagedep
)->pd_hash.le_next) = ((void *)-1); } while (0)
;
2126 WORKITEM_FREE(pagedep, D_PAGEDEP)softdep_freequeue_add((struct worklist *)pagedep);
2127 continue;
2128
2129 case D_ALLOCINDIR6:
2130 free_allocindir(WK_ALLOCINDIR(wk)((struct allocindir *)(wk)), inodedep);
2131 continue;
2132
2133 case D_ALLOCDIRECT4:
2134 case D_INODEDEP1:
2135 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
2136 panic("deallocate_dependencies: Unexpected type %s",
2137 TYPENAME(wk->wk_type)((unsigned)(wk->wk_type) <= 13 ? softdep_typenames[wk->
wk_type] : "???")
);
2138 /* NOTREACHED */
2139
2140 default:
2141 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
2142 panic("deallocate_dependencies: Unknown type %s",
2143 TYPENAME(wk->wk_type)((unsigned)(wk->wk_type) <= 13 ? softdep_typenames[wk->
wk_type] : "???")
);
2144 /* NOTREACHED */
2145 }
2146 }
2147}
2148
2149/*
2150 * Free an allocdirect. Generate a new freefrag work request if appropriate.
2151 * This routine must be called with splbio interrupts blocked.
2152 */
2153STATIC void
2154free_allocdirect(struct allocdirectlst *adphead, struct allocdirect *adp,
2155 int delay)
2156{
2157 struct newdirblk *newdirblk;
2158 struct worklist *wk;
2159
2160 splassert(IPL_BIO)do { if (splassert_ctl > 0) { splassert_check(0x3, __func__
); } } while (0)
;
2161
2162#ifdef DEBUG
2163 if (lk.lkt_held == -1)
2164 panic("free_allocdirect: lock not held");
2165#endif
2166 if ((adp->ad_statead_list.wk_state & DEPCOMPLETE0x0008) == 0)
2167 LIST_REMOVE(adp, ad_deps)do { if ((adp)->ad_deps.le_next != ((void *)0)) (adp)->
ad_deps.le_next->ad_deps.le_prev = (adp)->ad_deps.le_prev
; *(adp)->ad_deps.le_prev = (adp)->ad_deps.le_next; ((adp
)->ad_deps.le_prev) = ((void *)-1); ((adp)->ad_deps.le_next
) = ((void *)-1); } while (0)
;
2168 TAILQ_REMOVE(adphead, adp, ad_next)do { if (((adp)->ad_next.tqe_next) != ((void *)0)) (adp)->
ad_next.tqe_next->ad_next.tqe_prev = (adp)->ad_next.tqe_prev
; else (adphead)->tqh_last = (adp)->ad_next.tqe_prev; *
(adp)->ad_next.tqe_prev = (adp)->ad_next.tqe_next; ((adp
)->ad_next.tqe_prev) = ((void *)-1); ((adp)->ad_next.tqe_next
) = ((void *)-1); } while (0)
;
2169 if ((adp->ad_statead_list.wk_state & COMPLETE0x0004) == 0)
2170 WORKLIST_REMOVE(&adp->ad_list)do { (&adp->ad_list)->wk_state &= ~0x8000; do {
if ((&adp->ad_list)->wk_list.le_next != ((void *)0
)) (&adp->ad_list)->wk_list.le_next->wk_list.le_prev
= (&adp->ad_list)->wk_list.le_prev; *(&adp->
ad_list)->wk_list.le_prev = (&adp->ad_list)->wk_list
.le_next; ((&adp->ad_list)->wk_list.le_prev) = ((void
*)-1); ((&adp->ad_list)->wk_list.le_next) = ((void
*)-1); } while (0); } while (0)
;
2171 if (adp->ad_freefrag != NULL((void *)0)) {
2172 if (delay)
2173 WORKLIST_INSERT(&adp->ad_inodedep->id_bufwait,do { (&adp->ad_freefrag->ff_list)->wk_state |= 0x8000
; do { if (((&adp->ad_freefrag->ff_list)->wk_list
.le_next = (&adp->ad_inodedep->id_bufwait)->lh_first
) != ((void *)0)) (&adp->ad_inodedep->id_bufwait)->
lh_first->wk_list.le_prev = &(&adp->ad_freefrag
->ff_list)->wk_list.le_next; (&adp->ad_inodedep->
id_bufwait)->lh_first = (&adp->ad_freefrag->ff_list
); (&adp->ad_freefrag->ff_list)->wk_list.le_prev
= &(&adp->ad_inodedep->id_bufwait)->lh_first
; } while (0); } while (0)
2174 &adp->ad_freefrag->ff_list)do { (&adp->ad_freefrag->ff_list)->wk_state |= 0x8000
; do { if (((&adp->ad_freefrag->ff_list)->wk_list
.le_next = (&adp->ad_inodedep->id_bufwait)->lh_first
) != ((void *)0)) (&adp->ad_inodedep->id_bufwait)->
lh_first->wk_list.le_prev = &(&adp->ad_freefrag
->ff_list)->wk_list.le_next; (&adp->ad_inodedep->
id_bufwait)->lh_first = (&adp->ad_freefrag->ff_list
); (&adp->ad_freefrag->ff_list)->wk_list.le_prev
= &(&adp->ad_inodedep->id_bufwait)->lh_first
; } while (0); } while (0)
;
2175 else
2176 add_to_worklist(&adp->ad_freefrag->ff_list);
2177 }
2178 if ((wk = LIST_FIRST(&adp->ad_newdirblk)((&adp->ad_newdirblk)->lh_first)) != NULL((void *)0)) {
2179 newdirblk = WK_NEWDIRBLK(wk)((struct newdirblk *)(wk));
2180 WORKLIST_REMOVE(&newdirblk->db_list)do { (&newdirblk->db_list)->wk_state &= ~0x8000
; do { if ((&newdirblk->db_list)->wk_list.le_next !=
((void *)0)) (&newdirblk->db_list)->wk_list.le_next
->wk_list.le_prev = (&newdirblk->db_list)->wk_list
.le_prev; *(&newdirblk->db_list)->wk_list.le_prev =
(&newdirblk->db_list)->wk_list.le_next; ((&newdirblk
->db_list)->wk_list.le_prev) = ((void *)-1); ((&newdirblk
->db_list)->wk_list.le_next) = ((void *)-1); } while (0
); } while (0)
;
2181 if (LIST_FIRST(&adp->ad_newdirblk)((&adp->ad_newdirblk)->lh_first) != NULL((void *)0))
2182 panic("free_allocdirect: extra newdirblk");
2183 if (delay)
2184 WORKLIST_INSERT(&adp->ad_inodedep->id_bufwait,do { (&newdirblk->db_list)->wk_state |= 0x8000; do {
if (((&newdirblk->db_list)->wk_list.le_next = (&
adp->ad_inodedep->id_bufwait)->lh_first) != ((void *
)0)) (&adp->ad_inodedep->id_bufwait)->lh_first->
wk_list.le_prev = &(&newdirblk->db_list)->wk_list
.le_next; (&adp->ad_inodedep->id_bufwait)->lh_first
= (&newdirblk->db_list); (&newdirblk->db_list)
->wk_list.le_prev = &(&adp->ad_inodedep->id_bufwait
)->lh_first; } while (0); } while (0)
2185 &newdirblk->db_list)do { (&newdirblk->db_list)->wk_state |= 0x8000; do {
if (((&newdirblk->db_list)->wk_list.le_next = (&
adp->ad_inodedep->id_bufwait)->lh_first) != ((void *
)0)) (&adp->ad_inodedep->id_bufwait)->lh_first->
wk_list.le_prev = &(&newdirblk->db_list)->wk_list
.le_next; (&adp->ad_inodedep->id_bufwait)->lh_first
= (&newdirblk->db_list); (&newdirblk->db_list)
->wk_list.le_prev = &(&adp->ad_inodedep->id_bufwait
)->lh_first; } while (0); } while (0)
;
2186 else
2187 free_newdirblk(newdirblk);
2188 }
2189 WORKITEM_FREE(adp, D_ALLOCDIRECT)softdep_freequeue_add((struct worklist *)adp);
2190}
2191
2192/*
2193 * Free a newdirblk. Clear the NEWBLOCK flag on its associated pagedep.
2194 * This routine must be called with splbio interrupts blocked.
2195 */
2196void
2197free_newdirblk(struct newdirblk *newdirblk)
2198{
2199 struct pagedep *pagedep;
2200 struct diradd *dap;
2201 int i;
2202
2203 splassert(IPL_BIO)do { if (splassert_ctl > 0) { splassert_check(0x3, __func__
); } } while (0)
;
2204
2205#ifdef DEBUG
2206 if (lk.lkt_held == -1)
2207 panic("free_newdirblk: lock not held");
2208#endif
2209 /*
2210 * If the pagedep is still linked onto the directory buffer
2211 * dependency chain, then some of the entries on the
2212 * pd_pendinghd list may not be committed to disk yet. In
2213 * this case, we will simply clear the NEWBLOCK flag and
2214 * let the pd_pendinghd list be processed when the pagedep
2215 * is next written. If the pagedep is no longer on the buffer
2216 * dependency chain, then all the entries on the pd_pending
2217 * list are committed to disk and we can free them here.
2218 */
2219 pagedep = newdirblk->db_pagedep;
2220 pagedep->pd_statepd_list.wk_state &= ~NEWBLOCK0x0800;
2221 if ((pagedep->pd_statepd_list.wk_state & ONWORKLIST0x8000) == 0)
2222 while ((dap = LIST_FIRST(&pagedep->pd_pendinghd)((&pagedep->pd_pendinghd)->lh_first)) != NULL((void *)0))
2223 free_diradd(dap);
2224 /*
2225 * If no dependencies remain, the pagedep will be freed.
2226 */
2227 for (i = 0; i < DAHASHSZ6; i++)
2228 if (LIST_FIRST(&pagedep->pd_diraddhd[i])((&pagedep->pd_diraddhd[i])->lh_first) != NULL((void *)0))
2229 break;
2230 if (i == DAHASHSZ6 && (pagedep->pd_statepd_list.wk_state & ONWORKLIST0x8000) == 0) {
2231 LIST_REMOVE(pagedep, pd_hash)do { if ((pagedep)->pd_hash.le_next != ((void *)0)) (pagedep
)->pd_hash.le_next->pd_hash.le_prev = (pagedep)->pd_hash
.le_prev; *(pagedep)->pd_hash.le_prev = (pagedep)->pd_hash
.le_next; ((pagedep)->pd_hash.le_prev) = ((void *)-1); ((pagedep
)->pd_hash.le_next) = ((void *)-1); } while (0)
;
2232 WORKITEM_FREE(pagedep, D_PAGEDEP)softdep_freequeue_add((struct worklist *)pagedep);
2233 }
2234 WORKITEM_FREE(newdirblk, D_NEWDIRBLK)softdep_freequeue_add((struct worklist *)newdirblk);
2235}
2236
2237/*
2238 * Prepare an inode to be freed. The actual free operation is not
2239 * done until the zero'ed inode has been written to disk.
2240 */
2241void
2242softdep_freefile(struct vnode *pvp, ufsino_t ino, mode_t mode)
2243{
2244 struct inode *ip = VTOI(pvp)((struct inode *)(pvp)->v_data);
2245 struct inodedep *inodedep;
2246 struct freefile *freefile;
2247
2248 /*
2249 * This sets up the inode de-allocation dependency.
2250 */
2251 freefile = pool_get(&freefile_pool, PR_WAITOK0x0001);
2252 freefile->fx_list.wk_type = D_FREEFILE9;
2253 freefile->fx_list.wk_state = 0;
2254 freefile->fx_mode = mode;
2255 freefile->fx_oldinum = ino;
2256 freefile->fx_devvp = ip->i_devvpi_ump->um_devvp;
2257 freefile->fx_mnt = ITOV(ip)((ip)->i_vnode)->v_mount;
2258
2259 /*
2260 * If the inodedep does not exist, then the zero'ed inode has
2261 * been written to disk. If the allocated inode has never been
2262 * written to disk, then the on-disk inode is zero'ed. In either
2263 * case we can free the file immediately.
2264 */
2265 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x3);
2266 if (inodedep_lookup(ip->i_fsinode_u.fs, ino, 0, &inodedep) == 0 ||
2267 check_inode_unwritten(inodedep)) {
2268 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
2269 handle_workitem_freefile(freefile);
2270 return;
2271 }
2272 WORKLIST_INSERT(&inodedep->id_inowait, &freefile->fx_list)do { (&freefile->fx_list)->wk_state |= 0x8000; do {
if (((&freefile->fx_list)->wk_list.le_next = (&
inodedep->id_inowait)->lh_first) != ((void *)0)) (&
inodedep->id_inowait)->lh_first->wk_list.le_prev = &
(&freefile->fx_list)->wk_list.le_next; (&inodedep
->id_inowait)->lh_first = (&freefile->fx_list); (
&freefile->fx_list)->wk_list.le_prev = &(&inodedep
->id_inowait)->lh_first; } while (0); } while (0)
;
2273 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
2274}
2275
2276/*
2277 * Check to see if an inode has never been written to disk. If
2278 * so free the inodedep and return success, otherwise return failure.
2279 * This routine must be called with splbio interrupts blocked.
2280 *
2281 * If we still have a bitmap dependency, then the inode has never
2282 * been written to disk. Drop the dependency as it is no longer
2283 * necessary since the inode is being deallocated. We set the
2284 * ALLCOMPLETE flags since the bitmap now properly shows that the
2285 * inode is not allocated. Even if the inode is actively being
2286 * written, it has been rolled back to its zero'ed state, so we
2287 * are ensured that a zero inode is what is on the disk. For short
2288 * lived files, this change will usually result in removing all the
2289 * dependencies from the inode so that it can be freed immediately.
2290 */
2291STATIC int
2292check_inode_unwritten(struct inodedep *inodedep)
2293{
2294 splassert(IPL_BIO)do { if (splassert_ctl > 0) { splassert_check(0x3, __func__
); } } while (0)
;
2295
2296 if ((inodedep->id_stateid_list.wk_state & DEPCOMPLETE0x0008) != 0 ||
2297 LIST_FIRST(&inodedep->id_pendinghd)((&inodedep->id_pendinghd)->lh_first) != NULL((void *)0) ||
2298 LIST_FIRST(&inodedep->id_bufwait)((&inodedep->id_bufwait)->lh_first) != NULL((void *)0) ||
2299 LIST_FIRST(&inodedep->id_inowait)((&inodedep->id_inowait)->lh_first) != NULL((void *)0) ||
2300 TAILQ_FIRST(&inodedep->id_inoupdt)((&inodedep->id_inoupdt)->tqh_first) != NULL((void *)0) ||
2301 TAILQ_FIRST(&inodedep->id_newinoupdt)((&inodedep->id_newinoupdt)->tqh_first) != NULL((void *)0) ||
2302 inodedep->id_nlinkdelta != 0)
2303 return (0);
2304 inodedep->id_stateid_list.wk_state |= ALLCOMPLETE(0x0001 | 0x0004 | 0x0008);
2305 LIST_REMOVE(inodedep, id_deps)do { if ((inodedep)->id_deps.le_next != ((void *)0)) (inodedep
)->id_deps.le_next->id_deps.le_prev = (inodedep)->id_deps
.le_prev; *(inodedep)->id_deps.le_prev = (inodedep)->id_deps
.le_next; ((inodedep)->id_deps.le_prev) = ((void *)-1); ((
inodedep)->id_deps.le_next) = ((void *)-1); } while (0)
;
2306 inodedep->id_buf = NULL((void *)0);
2307 if (inodedep->id_stateid_list.wk_state & ONWORKLIST0x8000)
2308 WORKLIST_REMOVE(&inodedep->id_list)do { (&inodedep->id_list)->wk_state &= ~0x8000;
do { if ((&inodedep->id_list)->wk_list.le_next != (
(void *)0)) (&inodedep->id_list)->wk_list.le_next->
wk_list.le_prev = (&inodedep->id_list)->wk_list.le_prev
; *(&inodedep->id_list)->wk_list.le_prev = (&inodedep
->id_list)->wk_list.le_next; ((&inodedep->id_list
)->wk_list.le_prev) = ((void *)-1); ((&inodedep->id_list
)->wk_list.le_next) = ((void *)-1); } while (0); } while (
0)
;
2309 if (inodedep->id_savedino1id_un.idu_savedino1 != NULL((void *)0)) {
2310 free(inodedep->id_savedino1id_un.idu_savedino1, M_INODEDEP79, inodedep->id_unsize);
2311 inodedep->id_savedino1id_un.idu_savedino1 = NULL((void *)0);
2312 }
2313 if (free_inodedep(inodedep) == 0) {
2314 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
2315 panic("check_inode_unwritten: busy inode");
2316 }
2317 return (1);
2318}
2319
2320/*
2321 * Try to free an inodedep structure. Return 1 if it could be freed.
2322 */
2323STATIC int
2324free_inodedep(struct inodedep *inodedep)
2325{
2326
2327 if ((inodedep->id_stateid_list.wk_state & ONWORKLIST0x8000) != 0 ||
2328 (inodedep->id_stateid_list.wk_state & ALLCOMPLETE(0x0001 | 0x0004 | 0x0008)) != ALLCOMPLETE(0x0001 | 0x0004 | 0x0008) ||
2329 LIST_FIRST(&inodedep->id_pendinghd)((&inodedep->id_pendinghd)->lh_first) != NULL((void *)0) ||
2330 LIST_FIRST(&inodedep->id_bufwait)((&inodedep->id_bufwait)->lh_first) != NULL((void *)0) ||
2331 LIST_FIRST(&inodedep->id_inowait)((&inodedep->id_inowait)->lh_first) != NULL((void *)0) ||
2332 TAILQ_FIRST(&inodedep->id_inoupdt)((&inodedep->id_inoupdt)->tqh_first) != NULL((void *)0) ||
2333 TAILQ_FIRST(&inodedep->id_newinoupdt)((&inodedep->id_newinoupdt)->tqh_first) != NULL((void *)0) ||
2334 inodedep->id_nlinkdelta != 0 || inodedep->id_savedino1id_un.idu_savedino1 != NULL((void *)0))
2335 return (0);
2336 LIST_REMOVE(inodedep, id_hash)do { if ((inodedep)->id_hash.le_next != ((void *)0)) (inodedep
)->id_hash.le_next->id_hash.le_prev = (inodedep)->id_hash
.le_prev; *(inodedep)->id_hash.le_prev = (inodedep)->id_hash
.le_next; ((inodedep)->id_hash.le_prev) = ((void *)-1); ((
inodedep)->id_hash.le_next) = ((void *)-1); } while (0)
;
2337 WORKITEM_FREE(inodedep, D_INODEDEP)softdep_freequeue_add((struct worklist *)inodedep);
2338 num_inodedep -= 1;
2339 return (1);
2340}
2341
2342/*
2343 * This workitem routine performs the block de-allocation.
2344 * The workitem is added to the pending list after the updated
2345 * inode block has been written to disk. As mentioned above,
2346 * checks regarding the number of blocks de-allocated (compared
2347 * to the number of blocks allocated for the file) are also
2348 * performed in this function.
2349 */
2350STATIC void
2351handle_workitem_freeblocks(struct freeblks *freeblks)
2352{
2353 struct inode tip;
2354 daddr_t bn;
2355 union {
2356 struct ufs1_dinode di1;
2357 struct ufs2_dinode di2;
2358 } di;
2359 struct fs *fs;
2360 int i, level, bsize;
2361 long nblocks, blocksreleased = 0;
2362 int error, allerror = 0;
2363 daddr_t baselbns[NIADDR3], tmpval;
2364
2365 if (VFSTOUFS(freeblks->fb_mnt)((struct ufsmount *)((freeblks->fb_mnt)->mnt_data))->um_fstype == UM_UFS11)
2366 tip.i_din1dinode_u.ffs1_din = &di.di1;
2367 else
2368 tip.i_din2dinode_u.ffs2_din = &di.di2;
2369
2370 tip.i_fsinode_u.fs = fs = VFSTOUFS(freeblks->fb_mnt)((struct ufsmount *)((freeblks->fb_mnt)->mnt_data))->um_fsufsmount_u.fs;
2371 tip.i_number = freeblks->fb_previousinum;
2372 tip.i_ump = VFSTOUFS(freeblks->fb_mnt)((struct ufsmount *)((freeblks->fb_mnt)->mnt_data));
2373 tip.i_dev = freeblks->fb_devvp->v_rdevv_un.vu_specinfo->si_rdev;
2374 DIP_ASSIGN(&tip, size, freeblks->fb_oldsize)do { if ((&tip)->i_ump->um_fstype == 1) (&tip)->
dinode_u.ffs1_din->di_size = (freeblks->fb_oldsize); else
(&tip)->dinode_u.ffs2_din->di_size = (freeblks->
fb_oldsize); } while (0)
;
2375 DIP_ASSIGN(&tip, uid, freeblks->fb_uid)do { if ((&tip)->i_ump->um_fstype == 1) (&tip)->
dinode_u.ffs1_din->di_uid = (freeblks->fb_uid); else (&
tip)->dinode_u.ffs2_din->di_uid = (freeblks->fb_uid)
; } while (0)
;
2376 tip.i_vnode = NULL((void *)0);
2377 tmpval = 1;
2378 baselbns[0] = NDADDR12;
2379 for (i = 1; i < NIADDR3; i++) {
2380 tmpval *= NINDIR(fs)((fs)->fs_nindir);
2381 baselbns[i] = baselbns[i - 1] + tmpval;
2382 }
2383 nblocks = btodb(fs->fs_bsize)((fs->fs_bsize) >> 9);
2384 blocksreleased = 0;
2385 /*
2386 * Indirect blocks first.
2387 */
2388 for (level = (NIADDR3 - 1); level >= 0; level--) {
2389 if ((bn = freeblks->fb_iblks[level]) == 0)
2390 continue;
2391 if ((error = indir_trunc(&tip, fsbtodb(fs, bn)((bn) << (fs)->fs_fsbtodb), level,
2392 baselbns[level], &blocksreleased)) != 0)
2393 allerror = error;
2394 ffs_blkfree(&tip, bn, fs->fs_bsize);
2395 blocksreleased += nblocks;
2396 }
2397 /*
2398 * All direct blocks or frags.
2399 */
2400 for (i = (NDADDR12 - 1); i >= 0; i--) {
2401 if ((bn = freeblks->fb_dblks[i]) == 0)
2402 continue;
2403 bsize = blksize(fs, &tip, i)(((i) >= 12 || ((((&tip))->i_ump->um_fstype == 1
) ? ((&tip))->dinode_u.ffs1_din->di_size : ((&tip
))->dinode_u.ffs2_din->di_size) >= ((i) + 1) <<
(fs)->fs_bshift) ? (u_int64_t)(fs)->fs_bsize : (((((((
(((&tip))->i_ump->um_fstype == 1) ? ((&tip))->
dinode_u.ffs1_din->di_size : ((&tip))->dinode_u.ffs2_din
->di_size)) & (fs)->fs_qbmask)) + (fs)->fs_qfmask
) & (fs)->fs_fmask)))
;
2404 ffs_blkfree(&tip, bn, bsize);
2405 blocksreleased += btodb(bsize)((bsize) >> 9);
2406 }
2407
2408#ifdef DIAGNOSTIC1
2409 if (freeblks->fb_chkcnt != blocksreleased)
2410 printf("handle_workitem_freeblocks: block count\n");
2411 if (allerror)
2412 softdep_error("handle_workitem_freeblks", allerror);
2413#endif /* DIAGNOSTIC */
2414 WORKITEM_FREE(freeblks, D_FREEBLKS)softdep_freequeue_add((struct worklist *)freeblks);
2415}
2416
2417/*
2418 * Release blocks associated with the inode ip and stored in the indirect
2419 * block dbn. If level is greater than SINGLE, the block is an indirect block
2420 * and recursive calls to indirtrunc must be used to cleanse other indirect
2421 * blocks.
2422 */
2423STATIC int
2424indir_trunc(struct inode *ip, daddr_t dbn, int level, daddr_t lbn,
2425 long *countp)
2426{
2427 struct buf *bp;
2428 int32_t *bap1 = NULL((void *)0);
2429 int64_t nb, *bap2 = NULL((void *)0);
2430 struct fs *fs;
2431 struct worklist *wk;
2432 struct indirdep *indirdep;
2433 int i, lbnadd, nblocks, ufs1fmt;
2434 int error, allerror = 0;
2435
2436 fs = ip->i_fsinode_u.fs;
2437 lbnadd = 1;
2438 for (i = level; i > 0; i--)
2439 lbnadd *= NINDIR(fs)((fs)->fs_nindir);
2440 /*
2441 * Get buffer of block pointers to be freed. This routine is not
2442 * called until the zero'ed inode has been written, so it is safe
2443 * to free blocks as they are encountered. Because the inode has
2444 * been zero'ed, calls to bmap on these blocks will fail. So, we
2445 * have to use the on-disk address and the block device for the
2446 * filesystem to look them up. If the file was deleted before its
2447 * indirect blocks were all written to disk, the routine that set
2448 * us up (deallocate_dependencies) will have arranged to leave
2449 * a complete copy of the indirect block in memory for our use.
2450 * Otherwise we have to read the blocks in from the disk.
2451 */
2452 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x3);
2453 if ((bp = incore(ip->i_devvpi_ump->um_devvp, dbn)) != NULL((void *)0) &&
2454 (wk = LIST_FIRST(&bp->b_dep)((&bp->b_dep)->lh_first)) != NULL((void *)0)) {
2455 if (wk->wk_type != D_INDIRDEP5 ||
2456 (indirdep = WK_INDIRDEP(wk)((struct indirdep *)(wk)))->ir_savebp != bp ||
2457 (indirdep->ir_stateir_list.wk_state & GOINGAWAY0x0100) == 0) {
2458 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
2459 panic("indir_trunc: lost indirdep");
2460 }
2461 WORKLIST_REMOVE(wk)do { (wk)->wk_state &= ~0x8000; do { if ((wk)->wk_list
.le_next != ((void *)0)) (wk)->wk_list.le_next->wk_list
.le_prev = (wk)->wk_list.le_prev; *(wk)->wk_list.le_prev
= (wk)->wk_list.le_next; ((wk)->wk_list.le_prev) = ((void
*)-1); ((wk)->wk_list.le_next) = ((void *)-1); } while (0
); } while (0)
;
2462 WORKITEM_FREE(indirdep, D_INDIRDEP)softdep_freequeue_add((struct worklist *)indirdep);
2463 if (LIST_FIRST(&bp->b_dep)((&bp->b_dep)->lh_first) != NULL((void *)0)) {
2464 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
2465 panic("indir_trunc: dangling dep");
2466 }
2467 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
2468 } else {
2469 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
2470 error = bread(ip->i_devvpi_ump->um_devvp, dbn, (int)fs->fs_bsize, &bp);
2471 if (error)
2472 return (error);
2473 }
2474 /*
2475 * Recursively free indirect blocks.
2476 */
2477 if (ip->i_ump->um_fstype == UM_UFS11) {
2478 ufs1fmt = 1;
2479 bap1 = (int32_t *)bp->b_data;
2480 } else {
2481 ufs1fmt = 0;
2482 bap2 = (int64_t *)bp->b_data;
2483 }
2484 nblocks = btodb(fs->fs_bsize)((fs->fs_bsize) >> 9);
2485 for (i = NINDIR(fs)((fs)->fs_nindir) - 1; i >= 0; i--) {
2486 if (ufs1fmt)
2487 nb = bap1[i];
2488 else
2489 nb = bap2[i];
2490 if (nb == 0)
2491 continue;
2492 if (level != 0) {
2493 if ((error = indir_trunc(ip, fsbtodb(fs, nb)((nb) << (fs)->fs_fsbtodb),
2494 level - 1, lbn + (i * lbnadd), countp)) != 0)
2495 allerror = error;
2496 }
2497 ffs_blkfree(ip, nb, fs->fs_bsize);
2498 *countp += nblocks;
2499 }
2500 bp->b_flags |= B_INVAL0x00000800 | B_NOCACHE0x00001000;
2501 brelse(bp);
2502 return (allerror);
2503}
2504
2505/*
2506 * Free an allocindir.
2507 * This routine must be called with splbio interrupts blocked.
2508 */
2509STATIC void
2510free_allocindir(struct allocindir *aip, struct inodedep *inodedep)
2511{
2512 struct freefrag *freefrag;
2513
2514 splassert(IPL_BIO)do { if (splassert_ctl > 0) { splassert_check(0x3, __func__
); } } while (0)
;
2515
2516#ifdef DEBUG
2517 if (lk.lkt_held == -1)
2518 panic("free_allocindir: lock not held");
2519#endif
2520 if ((aip->ai_stateai_list.wk_state & DEPCOMPLETE0x0008) == 0)
2521 LIST_REMOVE(aip, ai_deps)do { if ((aip)->ai_deps.le_next != ((void *)0)) (aip)->
ai_deps.le_next->ai_deps.le_prev = (aip)->ai_deps.le_prev
; *(aip)->ai_deps.le_prev = (aip)->ai_deps.le_next; ((aip
)->ai_deps.le_prev) = ((void *)-1); ((aip)->ai_deps.le_next
) = ((void *)-1); } while (0)
;
2522 if (aip->ai_stateai_list.wk_state & ONWORKLIST0x8000)
2523 WORKLIST_REMOVE(&aip->ai_list)do { (&aip->ai_list)->wk_state &= ~0x8000; do {
if ((&aip->ai_list)->wk_list.le_next != ((void *)0
)) (&aip->ai_list)->wk_list.le_next->wk_list.le_prev
= (&aip->ai_list)->wk_list.le_prev; *(&aip->
ai_list)->wk_list.le_prev = (&aip->ai_list)->wk_list
.le_next; ((&aip->ai_list)->wk_list.le_prev) = ((void
*)-1); ((&aip->ai_list)->wk_list.le_next) = ((void
*)-1); } while (0); } while (0)
;
2524 LIST_REMOVE(aip, ai_next)do { if ((aip)->ai_next.le_next != ((void *)0)) (aip)->
ai_next.le_next->ai_next.le_prev = (aip)->ai_next.le_prev
; *(aip)->ai_next.le_prev = (aip)->ai_next.le_next; ((aip
)->ai_next.le_prev) = ((void *)-1); ((aip)->ai_next.le_next
) = ((void *)-1); } while (0)
;
2525 if ((freefrag = aip->ai_freefrag) != NULL((void *)0)) {
2526 if (inodedep == NULL((void *)0))
2527 add_to_worklist(&freefrag->ff_list);
2528 else
2529 WORKLIST_INSERT(&inodedep->id_bufwait,do { (&freefrag->ff_list)->wk_state |= 0x8000; do {
if (((&freefrag->ff_list)->wk_list.le_next = (&
inodedep->id_bufwait)->lh_first) != ((void *)0)) (&
inodedep->id_bufwait)->lh_first->wk_list.le_prev = &
(&freefrag->ff_list)->wk_list.le_next; (&inodedep
->id_bufwait)->lh_first = (&freefrag->ff_list); (
&freefrag->ff_list)->wk_list.le_prev = &(&inodedep
->id_bufwait)->lh_first; } while (0); } while (0)
2530 &freefrag->ff_list)do { (&freefrag->ff_list)->wk_state |= 0x8000; do {
if (((&freefrag->ff_list)->wk_list.le_next = (&
inodedep->id_bufwait)->lh_first) != ((void *)0)) (&
inodedep->id_bufwait)->lh_first->wk_list.le_prev = &
(&freefrag->ff_list)->wk_list.le_next; (&inodedep
->id_bufwait)->lh_first = (&freefrag->ff_list); (
&freefrag->ff_list)->wk_list.le_prev = &(&inodedep
->id_bufwait)->lh_first; } while (0); } while (0)
;
2531 }
2532 WORKITEM_FREE(aip, D_ALLOCINDIR)softdep_freequeue_add((struct worklist *)aip);
2533}
2534
2535/*
2536 * Directory entry addition dependencies.
2537 *
2538 * When adding a new directory entry, the inode (with its incremented link
2539 * count) must be written to disk before the directory entry's pointer to it.
2540 * Also, if the inode is newly allocated, the corresponding freemap must be
2541 * updated (on disk) before the directory entry's pointer. These requirements
2542 * are met via undo/redo on the directory entry's pointer, which consists
2543 * simply of the inode number.
2544 *
2545 * As directory entries are added and deleted, the free space within a
2546 * directory block can become fragmented. The ufs file system will compact
2547 * a fragmented directory block to make space for a new entry. When this
2548 * occurs, the offsets of previously added entries change. Any "diradd"
2549 * dependency structures corresponding to these entries must be updated with
2550 * the new offsets.
2551 */
2552
2553/*
2554 * This routine is called after the in-memory inode's link
2555 * count has been incremented, but before the directory entry's
2556 * pointer to the inode has been set.
2557 */
2558/* buffer containing directory block */
2559/* inode for directory */
2560/* offset of new entry in directory */
2561/* inode referenced by new directory entry */
2562/* non-NULL => contents of new mkdir */
2563/* entry is in a newly allocated block */
2564int
2565softdep_setup_directory_add(struct buf *bp, struct inode *dp, off_t diroffset,
2566 long newinum, struct buf *newdirbp, int isnewblk)
2567{
2568 int offset; /* offset of new entry within directory block */
2569 daddr_t lbn; /* block in directory containing new entry */
2570 struct fs *fs;
2571 struct diradd *dap;
2572 struct allocdirect *adp;
2573 struct pagedep *pagedep;
2574 struct inodedep *inodedep;
2575 struct newdirblk *newdirblk = NULL((void *)0);
2576 struct mkdir *mkdir1, *mkdir2;
2577
2578
2579 fs = dp->i_fsinode_u.fs;
2580 lbn = lblkno(fs, diroffset)((diroffset) >> (fs)->fs_bshift);
2581 offset = blkoff(fs, diroffset)((diroffset) & (fs)->fs_qbmask);
2582 dap = pool_get(&diradd_pool, PR_WAITOK0x0001 | PR_ZERO0x0008);
2583 dap->da_list.wk_type = D_DIRADD10;
2584 dap->da_offset = offset;
2585 dap->da_newinum = newinum;
2586 dap->da_stateda_list.wk_state = ATTACHED0x0001;
2587 if (isnewblk && lbn < NDADDR12 && fragoff(fs, diroffset)((diroffset) & (fs)->fs_qfmask) == 0) {
2588 newdirblk = pool_get(&newdirblk_pool, PR_WAITOK0x0001);
2589 newdirblk->db_list.wk_type = D_NEWDIRBLK13;
2590 newdirblk->db_statedb_list.wk_state = 0;
2591 }
2592 if (newdirbp == NULL((void *)0)) {
2593 dap->da_stateda_list.wk_state |= DEPCOMPLETE0x0008;
2594 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x3);
2595 } else {
2596 dap->da_stateda_list.wk_state |= MKDIR_BODY0x0020 | MKDIR_PARENT0x0010;
2597 mkdir1 = pool_get(&mkdir_pool, PR_WAITOK0x0001);
2598 mkdir1->md_list.wk_type = D_MKDIR11;
2599 mkdir1->md_statemd_list.wk_state = MKDIR_BODY0x0020;
2600 mkdir1->md_diradd = dap;
2601 mkdir2 = pool_get(&mkdir_pool, PR_WAITOK0x0001);
2602 mkdir2->md_list.wk_type = D_MKDIR11;
2603 mkdir2->md_statemd_list.wk_state = MKDIR_PARENT0x0010;
2604 mkdir2->md_diradd = dap;
2605 /*
2606 * Dependency on "." and ".." being written to disk.
2607 */
2608 mkdir1->md_buf = newdirbp;
2609 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x3);
2610 LIST_INSERT_HEAD(&mkdirlisthd, mkdir1, md_mkdirs)do { if (((mkdir1)->md_mkdirs.le_next = (&mkdirlisthd)
->lh_first) != ((void *)0)) (&mkdirlisthd)->lh_first
->md_mkdirs.le_prev = &(mkdir1)->md_mkdirs.le_next;
(&mkdirlisthd)->lh_first = (mkdir1); (mkdir1)->md_mkdirs
.le_prev = &(&mkdirlisthd)->lh_first; } while (0)
;
2611 WORKLIST_INSERT(&newdirbp->b_dep, &mkdir1->md_list)do { (&mkdir1->md_list)->wk_state |= 0x8000; do { if
(((&mkdir1->md_list)->wk_list.le_next = (&newdirbp
->b_dep)->lh_first) != ((void *)0)) (&newdirbp->
b_dep)->lh_first->wk_list.le_prev = &(&mkdir1->
md_list)->wk_list.le_next; (&newdirbp->b_dep)->lh_first
= (&mkdir1->md_list); (&mkdir1->md_list)->wk_list
.le_prev = &(&newdirbp->b_dep)->lh_first; } while
(0); } while (0)
;
2612 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
2613 bdwrite(newdirbp);
2614 /*
2615 * Dependency on link count increase for parent directory
2616 */
2617 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x3);
2618 if (inodedep_lookup(fs, dp->i_number, 0, &inodedep) == 0
2619 || (inodedep->id_stateid_list.wk_state & ALLCOMPLETE(0x0001 | 0x0004 | 0x0008)) == ALLCOMPLETE(0x0001 | 0x0004 | 0x0008)) {
2620 dap->da_stateda_list.wk_state &= ~MKDIR_PARENT0x0010;
2621 WORKITEM_FREE(mkdir2, D_MKDIR)softdep_freequeue_add((struct worklist *)mkdir2);
2622 } else {
2623 LIST_INSERT_HEAD(&mkdirlisthd, mkdir2, md_mkdirs)do { if (((mkdir2)->md_mkdirs.le_next = (&mkdirlisthd)
->lh_first) != ((void *)0)) (&mkdirlisthd)->lh_first
->md_mkdirs.le_prev = &(mkdir2)->md_mkdirs.le_next;
(&mkdirlisthd)->lh_first = (mkdir2); (mkdir2)->md_mkdirs
.le_prev = &(&mkdirlisthd)->lh_first; } while (0)
;
2624 WORKLIST_INSERT(&inodedep->id_bufwait,&mkdir2->md_list)do { (&mkdir2->md_list)->wk_state |= 0x8000; do { if
(((&mkdir2->md_list)->wk_list.le_next = (&inodedep
->id_bufwait)->lh_first) != ((void *)0)) (&inodedep
->id_bufwait)->lh_first->wk_list.le_prev = &(&
mkdir2->md_list)->wk_list.le_next; (&inodedep->id_bufwait
)->lh_first = (&mkdir2->md_list); (&mkdir2->
md_list)->wk_list.le_prev = &(&inodedep->id_bufwait
)->lh_first; } while (0); } while (0)
;
2625 }
2626 }
2627 /*
2628 * Link into parent directory pagedep to await its being written.
2629 */
2630 if (pagedep_lookup(dp, lbn, DEPALLOC0x0001, &pagedep) == 0)
2631 WORKLIST_INSERT(&bp->b_dep, &pagedep->pd_list)do { (&pagedep->pd_list)->wk_state |= 0x8000; do { if
(((&pagedep->pd_list)->wk_list.le_next = (&bp->
b_dep)->lh_first) != ((void *)0)) (&bp->b_dep)->
lh_first->wk_list.le_prev = &(&pagedep->pd_list
)->wk_list.le_next; (&bp->b_dep)->lh_first = (&
pagedep->pd_list); (&pagedep->pd_list)->wk_list.
le_prev = &(&bp->b_dep)->lh_first; } while (0);
} while (0)
;
2632 dap->da_pagedepda_un.dau_pagedep = pagedep;
2633 LIST_INSERT_HEAD(&pagedep->pd_diraddhd[DIRADDHASH(offset)], dap,do { if (((dap)->da_pdlist.le_next = (&pagedep->pd_diraddhd
[(((offset) >> 2) % 6)])->lh_first) != ((void *)0)) (
&pagedep->pd_diraddhd[(((offset) >> 2) % 6)])->
lh_first->da_pdlist.le_prev = &(dap)->da_pdlist.le_next
; (&pagedep->pd_diraddhd[(((offset) >> 2) % 6)])
->lh_first = (dap); (dap)->da_pdlist.le_prev = &(&
pagedep->pd_diraddhd[(((offset) >> 2) % 6)])->lh_first
; } while (0)
2634 da_pdlist)do { if (((dap)->da_pdlist.le_next = (&pagedep->pd_diraddhd
[(((offset) >> 2) % 6)])->lh_first) != ((void *)0)) (
&pagedep->pd_diraddhd[(((offset) >> 2) % 6)])->
lh_first->da_pdlist.le_prev = &(dap)->da_pdlist.le_next
; (&pagedep->pd_diraddhd[(((offset) >> 2) % 6)])
->lh_first = (dap); (dap)->da_pdlist.le_prev = &(&
pagedep->pd_diraddhd[(((offset) >> 2) % 6)])->lh_first
; } while (0)
;
2635 /*
2636 * Link into its inodedep. Put it on the id_bufwait list if the inode
2637 * is not yet written. If it is written, do the post-inode write
2638 * processing to put it on the id_pendinghd list.
2639 */
2640 (void) inodedep_lookup(fs, newinum, DEPALLOC0x0001, &inodedep);
2641 if ((inodedep->id_stateid_list.wk_state & ALLCOMPLETE(0x0001 | 0x0004 | 0x0008)) == ALLCOMPLETE(0x0001 | 0x0004 | 0x0008))
2642 diradd_inode_written(dap, inodedep);
2643 else
2644 WORKLIST_INSERT(&inodedep->id_bufwait, &dap->da_list)do { (&dap->da_list)->wk_state |= 0x8000; do { if (
((&dap->da_list)->wk_list.le_next = (&inodedep->
id_bufwait)->lh_first) != ((void *)0)) (&inodedep->
id_bufwait)->lh_first->wk_list.le_prev = &(&dap
->da_list)->wk_list.le_next; (&inodedep->id_bufwait
)->lh_first = (&dap->da_list); (&dap->da_list
)->wk_list.le_prev = &(&inodedep->id_bufwait)->
lh_first; } while (0); } while (0)
;
2645 if (isnewblk) {
2646 /*
2647 * Directories growing into indirect blocks are rare
2648 * enough and the frequency of new block allocation
2649 * in those cases even more rare, that we choose not
2650 * to bother tracking them. Rather we simply force the
2651 * new directory entry to disk.
2652 */
2653 if (lbn >= NDADDR12) {
2654 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
2655 /*
2656 * We only have a new allocation when at the
2657 * beginning of a new block, not when we are
2658 * expanding into an existing block.
2659 */
2660 if (blkoff(fs, diroffset)((diroffset) & (fs)->fs_qbmask) == 0)
2661 return (1);
2662 return (0);
2663 }
2664 /*
2665 * We only have a new allocation when at the beginning
2666 * of a new fragment, not when we are expanding into an
2667 * existing fragment. Also, there is nothing to do if we
2668 * are already tracking this block.
2669 */
2670 if (fragoff(fs, diroffset)((diroffset) & (fs)->fs_qfmask) != 0) {
2671 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
2672 return (0);
2673 }
2674
2675 if ((pagedep->pd_statepd_list.wk_state & NEWBLOCK0x0800) != 0) {
2676 WORKITEM_FREE(newdirblk, D_NEWDIRBLK)softdep_freequeue_add((struct worklist *)newdirblk);
2677 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
2678 return (0);
2679 }
2680 /*
2681 * Find our associated allocdirect and have it track us.
2682 */
2683 if (inodedep_lookup(fs, dp->i_number, 0, &inodedep) == 0)
2684 panic("softdep_setup_directory_add: lost inodedep");
2685 adp = TAILQ_LAST(&inodedep->id_newinoupdt, allocdirectlst)(*(((struct allocdirectlst *)((&inodedep->id_newinoupdt
)->tqh_last))->tqh_last))
;
2686 if (adp == NULL((void *)0) || adp->ad_lbn != lbn) {
2687 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
2688 panic("softdep_setup_directory_add: lost entry");
2689 }
2690 pagedep->pd_statepd_list.wk_state |= NEWBLOCK0x0800;
2691 newdirblk->db_pagedep = pagedep;
2692 WORKLIST_INSERT(&adp->ad_newdirblk, &newdirblk->db_list)do { (&newdirblk->db_list)->wk_state |= 0x8000; do {
if (((&newdirblk->db_list)->wk_list.le_next = (&
adp->ad_newdirblk)->lh_first) != ((void *)0)) (&adp
->ad_newdirblk)->lh_first->wk_list.le_prev = &(&
newdirblk->db_list)->wk_list.le_next; (&adp->ad_newdirblk
)->lh_first = (&newdirblk->db_list); (&newdirblk
->db_list)->wk_list.le_prev = &(&adp->ad_newdirblk
)->lh_first; } while (0); } while (0)
;
2693 }
2694 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
2695 return (0);
2696}
2697
2698/*
2699 * This procedure is called to change the offset of a directory
2700 * entry when compacting a directory block which must be owned
2701 * exclusively by the caller. Note that the actual entry movement
2702 * must be done in this procedure to ensure that no I/O completions
2703 * occur while the move is in progress.
2704 */
2705/* inode for directory */
2706/* address of dp->i_offset */
2707/* address of old directory location */
2708/* address of new directory location */
2709/* size of directory entry */
2710void
2711softdep_change_directoryentry_offset(struct inode *dp, caddr_t base,
2712 caddr_t oldloc, caddr_t newloc, int entrysize)
2713{
2714 int offset, oldoffset, newoffset;
2715 struct pagedep *pagedep;
2716 struct diradd *dap;
2717 daddr_t lbn;
2718
2719 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x3);
2720 lbn = lblkno(dp->i_fs, dp->i_offset)((dp->i_offset) >> (dp->inode_u.fs)->fs_bshift
)
;
2721 offset = blkoff(dp->i_fs, dp->i_offset)((dp->i_offset) & (dp->inode_u.fs)->fs_qbmask);
2722 if (pagedep_lookup(dp, lbn, 0, &pagedep) == 0)
2723 goto done;
2724 oldoffset = offset + (oldloc - base);
2725 newoffset = offset + (newloc - base);
2726
2727 LIST_FOREACH(dap, &pagedep->pd_diraddhd[DIRADDHASH(oldoffset)], da_pdlist)for((dap) = ((&pagedep->pd_diraddhd[(((oldoffset) >>
2) % 6)])->lh_first); (dap)!= ((void *)0); (dap) = ((dap)
->da_pdlist.le_next))
{
2728 if (dap->da_offset != oldoffset)
2729 continue;
2730 dap->da_offset = newoffset;
2731 if (DIRADDHASH(newoffset)(((newoffset) >> 2) % 6) == DIRADDHASH(oldoffset)(((oldoffset) >> 2) % 6))
2732 break;
2733 LIST_REMOVE(dap, da_pdlist)do { if ((dap)->da_pdlist.le_next != ((void *)0)) (dap)->
da_pdlist.le_next->da_pdlist.le_prev = (dap)->da_pdlist
.le_prev; *(dap)->da_pdlist.le_prev = (dap)->da_pdlist.
le_next; ((dap)->da_pdlist.le_prev) = ((void *)-1); ((dap)
->da_pdlist.le_next) = ((void *)-1); } while (0)
;
2734 LIST_INSERT_HEAD(&pagedep->pd_diraddhd[DIRADDHASH(newoffset)],do { if (((dap)->da_pdlist.le_next = (&pagedep->pd_diraddhd
[(((newoffset) >> 2) % 6)])->lh_first) != ((void *)0
)) (&pagedep->pd_diraddhd[(((newoffset) >> 2) % 6
)])->lh_first->da_pdlist.le_prev = &(dap)->da_pdlist
.le_next; (&pagedep->pd_diraddhd[(((newoffset) >>
2) % 6)])->lh_first = (dap); (dap)->da_pdlist.le_prev =
&(&pagedep->pd_diraddhd[(((newoffset) >> 2)
% 6)])->lh_first; } while (0)
2735 dap, da_pdlist)do { if (((dap)->da_pdlist.le_next = (&pagedep->pd_diraddhd
[(((newoffset) >> 2) % 6)])->lh_first) != ((void *)0
)) (&pagedep->pd_diraddhd[(((newoffset) >> 2) % 6
)])->lh_first->da_pdlist.le_prev = &(dap)->da_pdlist
.le_next; (&pagedep->pd_diraddhd[(((newoffset) >>
2) % 6)])->lh_first = (dap); (dap)->da_pdlist.le_prev =
&(&pagedep->pd_diraddhd[(((newoffset) >> 2)
% 6)])->lh_first; } while (0)
;
2736 break;
2737 }
2738 if (dap == NULL((void *)0)) {
2739
2740 LIST_FOREACH(dap, &pagedep->pd_pendinghd, da_pdlist)for((dap) = ((&pagedep->pd_pendinghd)->lh_first); (
dap)!= ((void *)0); (dap) = ((dap)->da_pdlist.le_next))
{
2741 if (dap->da_offset == oldoffset) {
2742 dap->da_offset = newoffset;
2743 break;
2744 }
2745 }
2746 }
2747done:
2748 memmove(newloc, oldloc, entrysize)__builtin_memmove((newloc), (oldloc), (entrysize));
2749 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
2750}
2751
2752/*
2753 * Free a diradd dependency structure. This routine must be called
2754 * with splbio interrupts blocked.
2755 */
2756STATIC void
2757free_diradd(struct diradd *dap)
2758{
2759 struct dirrem *dirrem;
2760 struct pagedep *pagedep;
2761 struct inodedep *inodedep;
2762 struct mkdir *mkdir, *nextmd;
2763
2764 splassert(IPL_BIO)do { if (splassert_ctl > 0) { splassert_check(0x3, __func__
); } } while (0)
;
2765
2766#ifdef DEBUG
2767 if (lk.lkt_held == -1)
2768 panic("free_diradd: lock not held");
2769#endif
2770 WORKLIST_REMOVE(&dap->da_list)do { (&dap->da_list)->wk_state &= ~0x8000; do {
if ((&dap->da_list)->wk_list.le_next != ((void *)0
)) (&dap->da_list)->wk_list.le_next->wk_list.le_prev
= (&dap->da_list)->wk_list.le_prev; *(&dap->
da_list)->wk_list.le_prev = (&dap->da_list)->wk_list
.le_next; ((&dap->da_list)->wk_list.le_prev) = ((void
*)-1); ((&dap->da_list)->wk_list.le_next) = ((void
*)-1); } while (0); } while (0)
;
2771 LIST_REMOVE(dap, da_pdlist)do { if ((dap)->da_pdlist.le_next != ((void *)0)) (dap)->
da_pdlist.le_next->da_pdlist.le_prev = (dap)->da_pdlist
.le_prev; *(dap)->da_pdlist.le_prev = (dap)->da_pdlist.
le_next; ((dap)->da_pdlist.le_prev) = ((void *)-1); ((dap)
->da_pdlist.le_next) = ((void *)-1); } while (0)
;
2772 if ((dap->da_stateda_list.wk_state & DIRCHG0x0080) == 0) {
2773 pagedep = dap->da_pagedepda_un.dau_pagedep;
2774 } else {
2775 dirrem = dap->da_previousda_un.dau_previous;
2776 pagedep = dirrem->dm_pagedepdm_un.dmu_pagedep;
2777 dirrem->dm_dirinumdm_un.dmu_dirinum = pagedep->pd_ino;
2778 add_to_worklist(&dirrem->dm_list);
2779 }
2780 if (inodedep_lookup(VFSTOUFS(pagedep->pd_mnt)((struct ufsmount *)((pagedep->pd_mnt)->mnt_data))->um_fsufsmount_u.fs, dap->da_newinum,
2781 0, &inodedep) != 0)
2782 (void) free_inodedep(inodedep);
2783 if ((dap->da_stateda_list.wk_state & (MKDIR_PARENT0x0010 | MKDIR_BODY0x0020)) != 0) {
2784 for (mkdir = LIST_FIRST(&mkdirlisthd)((&mkdirlisthd)->lh_first); mkdir; mkdir = nextmd) {
2785 nextmd = LIST_NEXT(mkdir, md_mkdirs)((mkdir)->md_mkdirs.le_next);
2786 if (mkdir->md_diradd != dap)
2787 continue;
2788 dap->da_stateda_list.wk_state &= ~mkdir->md_statemd_list.wk_state;
2789 WORKLIST_REMOVE(&mkdir->md_list)do { (&mkdir->md_list)->wk_state &= ~0x8000; do
{ if ((&mkdir->md_list)->wk_list.le_next != ((void
*)0)) (&mkdir->md_list)->wk_list.le_next->wk_list
.le_prev = (&mkdir->md_list)->wk_list.le_prev; *(&
mkdir->md_list)->wk_list.le_prev = (&mkdir->md_list
)->wk_list.le_next; ((&mkdir->md_list)->wk_list.
le_prev) = ((void *)-1); ((&mkdir->md_list)->wk_list
.le_next) = ((void *)-1); } while (0); } while (0)
;
2790 LIST_REMOVE(mkdir, md_mkdirs)do { if ((mkdir)->md_mkdirs.le_next != ((void *)0)) (mkdir
)->md_mkdirs.le_next->md_mkdirs.le_prev = (mkdir)->md_mkdirs
.le_prev; *(mkdir)->md_mkdirs.le_prev = (mkdir)->md_mkdirs
.le_next; ((mkdir)->md_mkdirs.le_prev) = ((void *)-1); ((mkdir
)->md_mkdirs.le_next) = ((void *)-1); } while (0)
;
2791 WORKITEM_FREE(mkdir, D_MKDIR)softdep_freequeue_add((struct worklist *)mkdir);
2792 }
2793 if ((dap->da_stateda_list.wk_state & (MKDIR_PARENT0x0010 | MKDIR_BODY0x0020)) != 0) {
2794 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
2795 panic("free_diradd: unfound ref");
2796 }
2797 }
2798 WORKITEM_FREE(dap, D_DIRADD)softdep_freequeue_add((struct worklist *)dap);
2799}
2800
2801/*
2802 * Directory entry removal dependencies.
2803 *
2804 * When removing a directory entry, the entry's inode pointer must be
2805 * zero'ed on disk before the corresponding inode's link count is decremented
2806 * (possibly freeing the inode for re-use). This dependency is handled by
2807 * updating the directory entry but delaying the inode count reduction until
2808 * after the directory block has been written to disk. After this point, the
2809 * inode count can be decremented whenever it is convenient.
2810 */
2811
2812/*
2813 * This routine should be called immediately after removing
2814 * a directory entry. The inode's link count should not be
2815 * decremented by the calling procedure -- the soft updates
2816 * code will do this task when it is safe.
2817 */
2818/* buffer containing directory block */
2819/* inode for the directory being modified */
2820/* inode for directory entry being removed */
2821/* indicates if doing RMDIR */
2822void
2823softdep_setup_remove(struct buf *bp, struct inode *dp, struct inode *ip,
2824 int isrmdir)
2825{
2826 struct dirrem *dirrem, *prevdirrem;
2827
2828 /*
2829 * Allocate a new dirrem if appropriate and ACQUIRE_LOCK.
2830 */
2831 dirrem = newdirrem(bp, dp, ip, isrmdir, &prevdirrem);
2832
2833 /*
2834 * If the COMPLETE flag is clear, then there were no active
2835 * entries and we want to roll back to a zeroed entry until
2836 * the new inode is committed to disk. If the COMPLETE flag is
2837 * set then we have deleted an entry that never made it to
2838 * disk. If the entry we deleted resulted from a name change,
2839 * then the old name still resides on disk. We cannot delete
2840 * its inode (returned to us in prevdirrem) until the zeroed
2841 * directory entry gets to disk. The new inode has never been
2842 * referenced on the disk, so can be deleted immediately.
2843 */
2844 if ((dirrem->dm_statedm_list.wk_state & COMPLETE0x0004) == 0) {
2845 LIST_INSERT_HEAD(&dirrem->dm_pagedep->pd_dirremhd, dirrem,do { if (((dirrem)->dm_next.le_next = (&dirrem->dm_un
.dmu_pagedep->pd_dirremhd)->lh_first) != ((void *)0)) (
&dirrem->dm_un.dmu_pagedep->pd_dirremhd)->lh_first
->dm_next.le_prev = &(dirrem)->dm_next.le_next; (&
dirrem->dm_un.dmu_pagedep->pd_dirremhd)->lh_first = (
dirrem); (dirrem)->dm_next.le_prev = &(&dirrem->
dm_un.dmu_pagedep->pd_dirremhd)->lh_first; } while (0)
2846 dm_next)do { if (((dirrem)->dm_next.le_next = (&dirrem->dm_un
.dmu_pagedep->pd_dirremhd)->lh_first) != ((void *)0)) (
&dirrem->dm_un.dmu_pagedep->pd_dirremhd)->lh_first
->dm_next.le_prev = &(dirrem)->dm_next.le_next; (&
dirrem->dm_un.dmu_pagedep->pd_dirremhd)->lh_first = (
dirrem); (dirrem)->dm_next.le_prev = &(&dirrem->
dm_un.dmu_pagedep->pd_dirremhd)->lh_first; } while (0)
;
2847 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
2848 } else {
2849 if (prevdirrem != NULL((void *)0))
2850 LIST_INSERT_HEAD(&dirrem->dm_pagedep->pd_dirremhd,do { if (((prevdirrem)->dm_next.le_next = (&dirrem->
dm_un.dmu_pagedep->pd_dirremhd)->lh_first) != ((void *)
0)) (&dirrem->dm_un.dmu_pagedep->pd_dirremhd)->lh_first
->dm_next.le_prev = &(prevdirrem)->dm_next.le_next;
(&dirrem->dm_un.dmu_pagedep->pd_dirremhd)->lh_first
= (prevdirrem); (prevdirrem)->dm_next.le_prev = &(&
dirrem->dm_un.dmu_pagedep->pd_dirremhd)->lh_first; }
while (0)
2851 prevdirrem, dm_next)do { if (((prevdirrem)->dm_next.le_next = (&dirrem->
dm_un.dmu_pagedep->pd_dirremhd)->lh_first) != ((void *)
0)) (&dirrem->dm_un.dmu_pagedep->pd_dirremhd)->lh_first
->dm_next.le_prev = &(prevdirrem)->dm_next.le_next;
(&dirrem->dm_un.dmu_pagedep->pd_dirremhd)->lh_first
= (prevdirrem); (prevdirrem)->dm_next.le_prev = &(&
dirrem->dm_un.dmu_pagedep->pd_dirremhd)->lh_first; }
while (0)
;
2852 dirrem->dm_dirinumdm_un.dmu_dirinum = dirrem->dm_pagedepdm_un.dmu_pagedep->pd_ino;
2853 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
2854 handle_workitem_remove(dirrem);
2855 }
2856}
2857
2858STATIC long num_dirrem; /* number of dirrem allocated */
2859/*
2860 * Allocate a new dirrem if appropriate and return it along with
2861 * its associated pagedep. Called without a lock, returns with lock.
2862 */
2863/* buffer containing directory block */
2864/* inode for the directory being modified */
2865/* inode for directory entry being removed */
2866/* indicates if doing RMDIR */
2867/* previously referenced inode, if any */
2868STATIC struct dirrem *
2869newdirrem(struct buf *bp, struct inode *dp, struct inode *ip, int isrmdir,
2870 struct dirrem **prevdirremp)
2871{
2872 int offset;
2873 daddr_t lbn;
2874 struct diradd *dap;
2875 struct dirrem *dirrem;
2876 struct pagedep *pagedep;
2877
2878 /*
2879 * Whiteouts have no deletion dependencies.
2880 */
2881 if (ip == NULL((void *)0))
2882 panic("newdirrem: whiteout");
2883 /*
2884 * If we are over our limit, try to improve the situation.
2885 * Limiting the number of dirrem structures will also limit
2886 * the number of freefile and freeblks structures.
2887 */
2888 if (num_dirrem > max_softdeps / 2)
2889 (void) request_cleanup(FLUSH_REMOVE2, 0);
2890 num_dirrem += 1;
2891 dirrem = pool_get(&dirrem_pool, PR_WAITOK0x0001 | PR_ZERO0x0008);
2892 dirrem->dm_list.wk_type = D_DIRREM12;
2893 dirrem->dm_statedm_list.wk_state = isrmdir ? RMDIR0x0040 : 0;
2894 dirrem->dm_mnt = ITOV(ip)((ip)->i_vnode)->v_mount;
2895 dirrem->dm_oldinum = ip->i_number;
2896 *prevdirremp = NULL((void *)0);
2897
2898 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x3);
2899 lbn = lblkno(dp->i_fs, dp->i_offset)((dp->i_offset) >> (dp->inode_u.fs)->fs_bshift
)
;
2900 offset = blkoff(dp->i_fs, dp->i_offset)((dp->i_offset) & (dp->inode_u.fs)->fs_qbmask);
2901 if (pagedep_lookup(dp, lbn, DEPALLOC0x0001, &pagedep) == 0)
2902 WORKLIST_INSERT(&bp->b_dep, &pagedep->pd_list)do { (&pagedep->pd_list)->wk_state |= 0x8000; do { if
(((&pagedep->pd_list)->wk_list.le_next = (&bp->
b_dep)->lh_first) != ((void *)0)) (&bp->b_dep)->
lh_first->wk_list.le_prev = &(&pagedep->pd_list
)->wk_list.le_next; (&bp->b_dep)->lh_first = (&
pagedep->pd_list); (&pagedep->pd_list)->wk_list.
le_prev = &(&bp->b_dep)->lh_first; } while (0);
} while (0)
;
2903 dirrem->dm_pagedepdm_un.dmu_pagedep = pagedep;
2904 /*
2905 * Check for a diradd dependency for the same directory entry.
2906 * If present, then both dependencies become obsolete and can
2907 * be de-allocated. Check for an entry on both the pd_dirraddhd
2908 * list and the pd_pendinghd list.
2909 */
2910
2911 LIST_FOREACH(dap, &pagedep->pd_diraddhd[DIRADDHASH(offset)], da_pdlist)for((dap) = ((&pagedep->pd_diraddhd[(((offset) >>
2) % 6)])->lh_first); (dap)!= ((void *)0); (dap) = ((dap)
->da_pdlist.le_next))
2912 if (dap->da_offset == offset)
2913 break;
2914 if (dap == NULL((void *)0)) {
2915
2916 LIST_FOREACH(dap, &pagedep->pd_pendinghd, da_pdlist)for((dap) = ((&pagedep->pd_pendinghd)->lh_first); (
dap)!= ((void *)0); (dap) = ((dap)->da_pdlist.le_next))
2917 if (dap->da_offset == offset)
2918 break;
2919 if (dap == NULL((void *)0))
2920 return (dirrem);
2921 }
2922 /*
2923 * Must be ATTACHED at this point.
2924 */
2925 if ((dap->da_stateda_list.wk_state & ATTACHED0x0001) == 0) {
2926 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
2927 panic("newdirrem: not ATTACHED");
2928 }
2929 if (dap->da_newinum != ip->i_number) {
2930 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
2931 panic("newdirrem: inum %u should be %u",
2932 ip->i_number, dap->da_newinum);
2933 }
2934 /*
2935 * If we are deleting a changed name that never made it to disk,
2936 * then return the dirrem describing the previous inode (which
2937 * represents the inode currently referenced from this entry on disk).
2938 */
2939 if ((dap->da_stateda_list.wk_state & DIRCHG0x0080) != 0) {
2940 *prevdirremp = dap->da_previousda_un.dau_previous;
2941 dap->da_stateda_list.wk_state &= ~DIRCHG0x0080;
2942 dap->da_pagedepda_un.dau_pagedep = pagedep;
2943 }
2944 /*
2945 * We are deleting an entry that never made it to disk.
2946 * Mark it COMPLETE so we can delete its inode immediately.
2947 */
2948 dirrem->dm_statedm_list.wk_state |= COMPLETE0x0004;
2949 free_diradd(dap);
2950 return (dirrem);
2951}
2952
2953/*
2954 * Directory entry change dependencies.
2955 *
2956 * Changing an existing directory entry requires that an add operation
2957 * be completed first followed by a deletion. The semantics for the addition
2958 * are identical to the description of adding a new entry above except
2959 * that the rollback is to the old inode number rather than zero. Once
2960 * the addition dependency is completed, the removal is done as described
2961 * in the removal routine above.
2962 */
2963
2964/*
2965 * This routine should be called immediately after changing
2966 * a directory entry. The inode's link count should not be
2967 * decremented by the calling procedure -- the soft updates
2968 * code will perform this task when it is safe.
2969 */
2970/* buffer containing directory block */
2971/* inode for the directory being modified */
2972/* inode for directory entry being removed */
2973/* new inode number for changed entry */
2974/* indicates if doing RMDIR */
2975void
2976softdep_setup_directory_change(struct buf *bp, struct inode *dp,
2977 struct inode *ip, long newinum, int isrmdir)
2978{
2979 int offset;
2980 struct diradd *dap;
2981 struct dirrem *dirrem, *prevdirrem;
2982 struct pagedep *pagedep;
2983 struct inodedep *inodedep;
2984
2985 offset = blkoff(dp->i_fs, dp->i_offset)((dp->i_offset) & (dp->inode_u.fs)->fs_qbmask);
2986 dap = pool_get(&diradd_pool, PR_WAITOK0x0001 | PR_ZERO0x0008);
2987 dap->da_list.wk_type = D_DIRADD10;
2988 dap->da_stateda_list.wk_state = DIRCHG0x0080 | ATTACHED0x0001 | DEPCOMPLETE0x0008;
2989 dap->da_offset = offset;
2990 dap->da_newinum = newinum;
2991
2992 /*
2993 * Allocate a new dirrem and ACQUIRE_LOCK.
2994 */
2995 dirrem = newdirrem(bp, dp, ip, isrmdir, &prevdirrem);
2996 pagedep = dirrem->dm_pagedepdm_un.dmu_pagedep;
2997 /*
2998 * The possible values for isrmdir:
2999 * 0 - non-directory file rename
3000 * 1 - directory rename within same directory
3001 * inum - directory rename to new directory of given inode number
3002 * When renaming to a new directory, we are both deleting and
3003 * creating a new directory entry, so the link count on the new
3004 * directory should not change. Thus we do not need the followup
3005 * dirrem which is usually done in handle_workitem_remove. We set
3006 * the DIRCHG flag to tell handle_workitem_remove to skip the
3007 * followup dirrem.
3008 */
3009 if (isrmdir > 1)
3010 dirrem->dm_statedm_list.wk_state |= DIRCHG0x0080;
3011
3012 /*
3013 * If the COMPLETE flag is clear, then there were no active
3014 * entries and we want to roll back to the previous inode until
3015 * the new inode is committed to disk. If the COMPLETE flag is
3016 * set, then we have deleted an entry that never made it to disk.
3017 * If the entry we deleted resulted from a name change, then the old
3018 * inode reference still resides on disk. Any rollback that we do
3019 * needs to be to that old inode (returned to us in prevdirrem). If
3020 * the entry we deleted resulted from a create, then there is
3021 * no entry on the disk, so we want to roll back to zero rather
3022 * than the uncommitted inode. In either of the COMPLETE cases we
3023 * want to immediately free the unwritten and unreferenced inode.
3024 */
3025 if ((dirrem->dm_statedm_list.wk_state & COMPLETE0x0004) == 0) {
3026 dap->da_previousda_un.dau_previous = dirrem;
3027 } else {
3028 if (prevdirrem != NULL((void *)0)) {
3029 dap->da_previousda_un.dau_previous = prevdirrem;
3030 } else {
3031 dap->da_stateda_list.wk_state &= ~DIRCHG0x0080;
3032 dap->da_pagedepda_un.dau_pagedep = pagedep;
3033 }
3034 dirrem->dm_dirinumdm_un.dmu_dirinum = pagedep->pd_ino;
3035 add_to_worklist(&dirrem->dm_list);
3036 }
3037 /*
3038 * Link into its inodedep. Put it on the id_bufwait list if the inode
3039 * is not yet written. If it is written, do the post-inode write
3040 * processing to put it on the id_pendinghd list.
3041 */
3042 if (inodedep_lookup(dp->i_fsinode_u.fs, newinum, DEPALLOC0x0001, &inodedep) == 0 ||
3043 (inodedep->id_stateid_list.wk_state & ALLCOMPLETE(0x0001 | 0x0004 | 0x0008)) == ALLCOMPLETE(0x0001 | 0x0004 | 0x0008)) {
3044 dap->da_stateda_list.wk_state |= COMPLETE0x0004;
3045 LIST_INSERT_HEAD(&pagedep->pd_pendinghd, dap, da_pdlist)do { if (((dap)->da_pdlist.le_next = (&pagedep->pd_pendinghd
)->lh_first) != ((void *)0)) (&pagedep->pd_pendinghd
)->lh_first->da_pdlist.le_prev = &(dap)->da_pdlist
.le_next; (&pagedep->pd_pendinghd)->lh_first = (dap
); (dap)->da_pdlist.le_prev = &(&pagedep->pd_pendinghd
)->lh_first; } while (0)
;
3046 WORKLIST_INSERT(&inodedep->id_pendinghd, &dap->da_list)do { (&dap->da_list)->wk_state |= 0x8000; do { if (
((&dap->da_list)->wk_list.le_next = (&inodedep->
id_pendinghd)->lh_first) != ((void *)0)) (&inodedep->
id_pendinghd)->lh_first->wk_list.le_prev = &(&dap
->da_list)->wk_list.le_next; (&inodedep->id_pendinghd
)->lh_first = (&dap->da_list); (&dap->da_list
)->wk_list.le_prev = &(&inodedep->id_pendinghd)
->lh_first; } while (0); } while (0)
;
3047 } else {
3048 LIST_INSERT_HEAD(&pagedep->pd_diraddhd[DIRADDHASH(offset)],do { if (((dap)->da_pdlist.le_next = (&pagedep->pd_diraddhd
[(((offset) >> 2) % 6)])->lh_first) != ((void *)0)) (
&pagedep->pd_diraddhd[(((offset) >> 2) % 6)])->
lh_first->da_pdlist.le_prev = &(dap)->da_pdlist.le_next
; (&pagedep->pd_diraddhd[(((offset) >> 2) % 6)])
->lh_first = (dap); (dap)->da_pdlist.le_prev = &(&
pagedep->pd_diraddhd[(((offset) >> 2) % 6)])->lh_first
; } while (0)
3049 dap, da_pdlist)do { if (((dap)->da_pdlist.le_next = (&pagedep->pd_diraddhd
[(((offset) >> 2) % 6)])->lh_first) != ((void *)0)) (
&pagedep->pd_diraddhd[(((offset) >> 2) % 6)])->
lh_first->da_pdlist.le_prev = &(dap)->da_pdlist.le_next
; (&pagedep->pd_diraddhd[(((offset) >> 2) % 6)])
->lh_first = (dap); (dap)->da_pdlist.le_prev = &(&
pagedep->pd_diraddhd[(((offset) >> 2) % 6)])->lh_first
; } while (0)
;
3050 WORKLIST_INSERT(&inodedep->id_bufwait, &dap->da_list)do { (&dap->da_list)->wk_state |= 0x8000; do { if (
((&dap->da_list)->wk_list.le_next = (&inodedep->
id_bufwait)->lh_first) != ((void *)0)) (&inodedep->
id_bufwait)->lh_first->wk_list.le_prev = &(&dap
->da_list)->wk_list.le_next; (&inodedep->id_bufwait
)->lh_first = (&dap->da_list); (&dap->da_list
)->wk_list.le_prev = &(&inodedep->id_bufwait)->
lh_first; } while (0); } while (0)
;
3051 }
3052 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
3053}
3054
3055/*
3056 * Called whenever the link count on an inode is changed.
3057 * It creates an inode dependency so that the new reference(s)
3058 * to the inode cannot be committed to disk until the updated
3059 * inode has been written.
3060 */
3061/* the inode with the increased link count */
3062/* do background work or not */
3063void
3064softdep_change_linkcnt(struct inode *ip, int nodelay)
3065{
3066 struct inodedep *inodedep;
3067 int flags;
3068
3069 /*
3070 * If requested, do not allow background work to happen.
3071 */
3072 flags = DEPALLOC0x0001;
3073 if (nodelay)
3074 flags |= NODELAY0x0002;
3075
3076 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x3);
3077
3078 (void) inodedep_lookup(ip->i_fsinode_u.fs, ip->i_number, flags, &inodedep);
3079 if (DIP(ip, nlink)(((ip)->i_ump->um_fstype == 1) ? (ip)->dinode_u.ffs1_din
->di_nlink : (ip)->dinode_u.ffs2_din->di_nlink)
< ip->i_effnlink) {
3080 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
3081 panic("softdep_change_linkcnt: bad delta");
3082 }
3083
3084 inodedep->id_nlinkdelta = DIP(ip, nlink)(((ip)->i_ump->um_fstype == 1) ? (ip)->dinode_u.ffs1_din
->di_nlink : (ip)->dinode_u.ffs2_din->di_nlink)
- ip->i_effnlink;
3085
3086 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
3087}
3088
3089/*
3090 * This workitem decrements the inode's link count.
3091 * If the link count reaches zero, the file is removed.
3092 */
3093STATIC void
3094handle_workitem_remove(struct dirrem *dirrem)
3095{
3096 struct proc *p = CURPROC({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc
; /* XXX */
3097 struct inodedep *inodedep;
3098 struct vnode *vp;
3099 struct inode *ip;
3100 ufsino_t oldinum;
3101 int error;
3102
3103 if ((error = VFS_VGET(dirrem->dm_mnt, dirrem->dm_oldinum, &vp)(*(dirrem->dm_mnt)->mnt_op->vfs_vget)(dirrem->dm_mnt
, dirrem->dm_oldinum, &vp)
) != 0) {
3104 softdep_error("handle_workitem_remove: vget", error);
3105 return;
3106 }
3107 ip = VTOI(vp)((struct inode *)(vp)->v_data);
3108 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x3);
3109 if ((inodedep_lookup(ip->i_fsinode_u.fs, dirrem->dm_oldinum, 0, &inodedep))
3110 == 0) {
3111 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
3112 panic("handle_workitem_remove: lost inodedep");
3113 }
3114 /*
3115 * Normal file deletion.
3116 */
3117 if ((dirrem->dm_statedm_list.wk_state & RMDIR0x0040) == 0) {
3118 DIP_ADD(ip, nlink, -1)do { if ((ip)->i_ump->um_fstype == 1) (ip)->dinode_u
.ffs1_din->di_nlink += (-1); else (ip)->dinode_u.ffs2_din
->di_nlink += (-1); } while (0)
;
3119 ip->i_flag |= IN_CHANGE0x0002;
3120 if (DIP(ip, nlink)(((ip)->i_ump->um_fstype == 1) ? (ip)->dinode_u.ffs1_din
->di_nlink : (ip)->dinode_u.ffs2_din->di_nlink)
< ip->i_effnlink) {
3121 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
3122 panic("handle_workitem_remove: bad file delta");
3123 }
3124 inodedep->id_nlinkdelta = DIP(ip, nlink)(((ip)->i_ump->um_fstype == 1) ? (ip)->dinode_u.ffs1_din
->di_nlink : (ip)->dinode_u.ffs2_din->di_nlink)
- ip->i_effnlink;
3125 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
3126 vput(vp);
3127 num_dirrem -= 1;
3128 WORKITEM_FREE(dirrem, D_DIRREM)softdep_freequeue_add((struct worklist *)dirrem);
3129 return;
3130 }
3131 /*
3132 * Directory deletion. Decrement reference count for both the
3133 * just deleted parent directory entry and the reference for ".".
3134 * Next truncate the directory to length zero. When the
3135 * truncation completes, arrange to have the reference count on
3136 * the parent decremented to account for the loss of "..".
3137 */
3138 DIP_ADD(ip, nlink, -2)do { if ((ip)->i_ump->um_fstype == 1) (ip)->dinode_u
.ffs1_din->di_nlink += (-2); else (ip)->dinode_u.ffs2_din
->di_nlink += (-2); } while (0)
;
3139 ip->i_flag |= IN_CHANGE0x0002;
3140 if (DIP(ip, nlink)(((ip)->i_ump->um_fstype == 1) ? (ip)->dinode_u.ffs1_din
->di_nlink : (ip)->dinode_u.ffs2_din->di_nlink)
< ip->i_effnlink)
3141 panic("handle_workitem_remove: bad dir delta");
3142 inodedep->id_nlinkdelta = DIP(ip, nlink)(((ip)->i_ump->um_fstype == 1) ? (ip)->dinode_u.ffs1_din
->di_nlink : (ip)->dinode_u.ffs2_din->di_nlink)
- ip->i_effnlink;
3143 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
3144 if ((error = UFS_TRUNCATE(ip, (off_t)0, 0, p->p_ucred)((ip)->i_vtbl->iv_truncate)((ip), ((off_t)0), (0), (p->
p_ucred))
) != 0)
3145 softdep_error("handle_workitem_remove: truncate", error);
3146 /*
3147 * Rename a directory to a new parent. Since, we are both deleting
3148 * and creating a new directory entry, the link count on the new
3149 * directory should not change. Thus we skip the followup dirrem.
3150 */
3151 if (dirrem->dm_statedm_list.wk_state & DIRCHG0x0080) {
3152 vput(vp);
3153 num_dirrem -= 1;
3154 WORKITEM_FREE(dirrem, D_DIRREM)softdep_freequeue_add((struct worklist *)dirrem);
3155 return;
3156 }
3157 /*
3158 * If the inodedep does not exist, then the zero'ed inode has
3159 * been written to disk. If the allocated inode has never been
3160 * written to disk, then the on-disk inode is zero'ed. In either
3161 * case we can remove the file immediately.
3162 */
3163 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x3);
3164 dirrem->dm_statedm_list.wk_state = 0;
3165 oldinum = dirrem->dm_oldinum;
3166 dirrem->dm_oldinum = dirrem->dm_dirinumdm_un.dmu_dirinum;
3167 if (inodedep_lookup(ip->i_fsinode_u.fs, oldinum, 0, &inodedep) == 0 ||
3168 check_inode_unwritten(inodedep)) {
3169 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
3170 vput(vp);
3171 handle_workitem_remove(dirrem);
3172 return;
3173 }
3174 WORKLIST_INSERT(&inodedep->id_inowait, &dirrem->dm_list)do { (&dirrem->dm_list)->wk_state |= 0x8000; do { if
(((&dirrem->dm_list)->wk_list.le_next = (&inodedep
->id_inowait)->lh_first) != ((void *)0)) (&inodedep
->id_inowait)->lh_first->wk_list.le_prev = &(&
dirrem->dm_list)->wk_list.le_next; (&inodedep->id_inowait
)->lh_first = (&dirrem->dm_list); (&dirrem->
dm_list)->wk_list.le_prev = &(&inodedep->id_inowait
)->lh_first; } while (0); } while (0)
;
3175 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
3176 ip->i_flag |= IN_CHANGE0x0002;
3177 UFS_UPDATE(VTOI(vp), 0)((((struct inode *)(vp)->v_data))->i_vtbl->iv_update
)((((struct inode *)(vp)->v_data)), (0))
;
3178 vput(vp);
3179}
3180
3181/*
3182 * Inode de-allocation dependencies.
3183 *
3184 * When an inode's link count is reduced to zero, it can be de-allocated. We
3185 * found it convenient to postpone de-allocation until after the inode is
3186 * written to disk with its new link count (zero). At this point, all of the
3187 * on-disk inode's block pointers are nullified and, with careful dependency
3188 * list ordering, all dependencies related to the inode will be satisfied and
3189 * the corresponding dependency structures de-allocated. So, if/when the
3190 * inode is reused, there will be no mixing of old dependencies with new
3191 * ones. This artificial dependency is set up by the block de-allocation
3192 * procedure above (softdep_setup_freeblocks) and completed by the
3193 * following procedure.
3194 */
3195STATIC void
3196handle_workitem_freefile(struct freefile *freefile)
3197{
3198 struct fs *fs;
3199 struct vnode vp;
3200 struct inode tip;
3201#ifdef DEBUG
3202 struct inodedep *idp;
3203#endif
3204 int error;
3205
3206 fs = VFSTOUFS(freefile->fx_mnt)((struct ufsmount *)((freefile->fx_mnt)->mnt_data))->um_fsufsmount_u.fs;
3207#ifdef DEBUG
3208 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x3);
3209 error = inodedep_lookup(fs, freefile->fx_oldinum, 0, &idp);
3210 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
3211 if (error)
3212 panic("handle_workitem_freefile: inodedep survived");
3213#endif
3214 tip.i_ump = VFSTOUFS(freefile->fx_mnt)((struct ufsmount *)((freefile->fx_mnt)->mnt_data));
3215 tip.i_dev = freefile->fx_devvp->v_rdevv_un.vu_specinfo->si_rdev;
3216 tip.i_fsinode_u.fs = fs;
3217 tip.i_vnode = &vp;
3218 vp.v_data = &tip;
3219
3220 if ((error = ffs_freefile(&tip, freefile->fx_oldinum,
3221 freefile->fx_mode)) != 0) {
3222 softdep_error("handle_workitem_freefile", error);
3223 }
3224 WORKITEM_FREE(freefile, D_FREEFILE)softdep_freequeue_add((struct worklist *)freefile);
3225}
3226
3227/*
3228 * Disk writes.
3229 *
3230 * The dependency structures constructed above are most actively used when file
3231 * system blocks are written to disk. No constraints are placed on when a
3232 * block can be written, but unsatisfied update dependencies are made safe by
3233 * modifying (or replacing) the source memory for the duration of the disk
3234 * write. When the disk write completes, the memory block is again brought
3235 * up-to-date.
3236 *
3237 * In-core inode structure reclamation.
3238 *
3239 * Because there are a finite number of "in-core" inode structures, they are
3240 * reused regularly. By transferring all inode-related dependencies to the
3241 * in-memory inode block and indexing them separately (via "inodedep"s), we
3242 * can allow "in-core" inode structures to be reused at any time and avoid
3243 * any increase in contention.
3244 *
3245 * Called just before entering the device driver to initiate a new disk I/O.
3246 * The buffer must be locked, thus, no I/O completion operations can occur
3247 * while we are manipulating its associated dependencies.
3248 */
3249/* structure describing disk write to occur */
3250void
3251softdep_disk_io_initiation(struct buf *bp)
3252{
3253 struct worklist *wk, *nextwk;
3254 struct indirdep *indirdep;
3255 struct inodedep *inodedep;
3256 struct buf *sbp;
3257
3258 /*
3259 * We only care about write operations. There should never
3260 * be dependencies for reads.
3261 */
3262 if (bp->b_flags & B_READ0x00008000)
3263 panic("softdep_disk_io_initiation: read");
3264
3265 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x3);
3266
3267 /*
3268 * Do any necessary pre-I/O processing.
3269 */
3270 for (wk = LIST_FIRST(&bp->b_dep)((&bp->b_dep)->lh_first); wk; wk = nextwk) {
3271 nextwk = LIST_NEXT(wk, wk_list)((wk)->wk_list.le_next);
3272 switch (wk->wk_type) {
3273
3274 case D_PAGEDEP0:
3275 initiate_write_filepage(WK_PAGEDEP(wk)((struct pagedep *)(wk)), bp);
3276 continue;
3277
3278 case D_INODEDEP1:
3279 inodedep = WK_INODEDEP(wk)((struct inodedep *)(wk));
3280 if (inodedep->id_fs->fs_magic == FS_UFS1_MAGIC0x011954)
3281 initiate_write_inodeblock_ufs1(inodedep, bp);
3282#ifdef FFS21
3283 else
3284 initiate_write_inodeblock_ufs2(inodedep, bp);
3285#endif
3286 continue;
3287
3288 case D_INDIRDEP5:
3289 indirdep = WK_INDIRDEP(wk)((struct indirdep *)(wk));
3290 if (indirdep->ir_stateir_list.wk_state & GOINGAWAY0x0100)
3291 panic("disk_io_initiation: indirdep gone");
3292 /*
3293 * If there are no remaining dependencies, this
3294 * will be writing the real pointers, so the
3295 * dependency can be freed.
3296 */
3297 if (LIST_FIRST(&indirdep->ir_deplisthd)((&indirdep->ir_deplisthd)->lh_first) == NULL((void *)0)) {
3298 sbp = indirdep->ir_savebp;
3299 sbp->b_flags |= B_INVAL0x00000800 | B_NOCACHE0x00001000;
3300 /* inline expand WORKLIST_REMOVE(wk); */
3301 wk->wk_state &= ~ONWORKLIST0x8000;
3302 LIST_REMOVE(wk, wk_list)do { if ((wk)->wk_list.le_next != ((void *)0)) (wk)->wk_list
.le_next->wk_list.le_prev = (wk)->wk_list.le_prev; *(wk
)->wk_list.le_prev = (wk)->wk_list.le_next; ((wk)->wk_list
.le_prev) = ((void *)-1); ((wk)->wk_list.le_next) = ((void
*)-1); } while (0)
;
3303 WORKITEM_FREE(indirdep, D_INDIRDEP)softdep_freequeue_add((struct worklist *)indirdep);
3304 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
3305 brelse(sbp);
3306 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x3);
3307 continue;
3308 }
3309 /*
3310 * Replace up-to-date version with safe version.
3311 */
3312 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
3313 indirdep->ir_saveddata = malloc(bp->b_bcount,
3314 M_INDIRDEP83, M_WAITOK0x0001);
3315 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x3);
3316 indirdep->ir_stateir_list.wk_state &= ~ATTACHED0x0001;
3317 indirdep->ir_stateir_list.wk_state |= UNDONE0x0002;
3318 memcpy(indirdep->ir_saveddata, bp->b_data, bp->b_bcount)__builtin_memcpy((indirdep->ir_saveddata), (bp->b_data)
, (bp->b_bcount))
;
3319 memcpy(bp->b_data, indirdep->ir_savebp->b_data,__builtin_memcpy((bp->b_data), (indirdep->ir_savebp->
b_data), (bp->b_bcount))
3320 bp->b_bcount)__builtin_memcpy((bp->b_data), (indirdep->ir_savebp->
b_data), (bp->b_bcount))
;
3321 continue;
3322
3323 case D_MKDIR11:
3324 case D_BMSAFEMAP3:
3325 case D_ALLOCDIRECT4:
3326 case D_ALLOCINDIR6:
3327 continue;
3328
3329 default:
3330 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
3331 panic("handle_disk_io_initiation: Unexpected type %s",
3332 TYPENAME(wk->wk_type)((unsigned)(wk->wk_type) <= 13 ? softdep_typenames[wk->
wk_type] : "???")
);
3333 /* NOTREACHED */
3334 }
3335 }
3336
3337 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
3338}
3339
3340/*
3341 * Called from within the procedure above to deal with unsatisfied
3342 * allocation dependencies in a directory. The buffer must be locked,
3343 * thus, no I/O completion operations can occur while we are
3344 * manipulating its associated dependencies.
3345 */
3346STATIC void
3347initiate_write_filepage(struct pagedep *pagedep, struct buf *bp)
3348{
3349 struct diradd *dap;
3350 struct direct *ep;
3351 int i;
3352
3353 if (pagedep->pd_statepd_list.wk_state & IOSTARTED0x0200) {
3354 /*
3355 * This can only happen if there is a driver that does not
3356 * understand chaining. Here biodone will reissue the call
3357 * to strategy for the incomplete buffers.
3358 */
3359 printf("initiate_write_filepage: already started\n");
3360 return;
3361 }
3362 pagedep->pd_statepd_list.wk_state |= IOSTARTED0x0200;
3363 for (i = 0; i < DAHASHSZ6; i++) {
3364 LIST_FOREACH(dap, &pagedep->pd_diraddhd[i], da_pdlist)for((dap) = ((&pagedep->pd_diraddhd[i])->lh_first);
(dap)!= ((void *)0); (dap) = ((dap)->da_pdlist.le_next))
{
3365 ep = (struct direct *)
3366 ((char *)bp->b_data + dap->da_offset);
3367 if (ep->d_ino != dap->da_newinum) {
3368 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
3369 panic("%s: dir inum %u != new %u",
3370 "initiate_write_filepage",
3371 ep->d_ino, dap->da_newinum);
3372 }
3373 if (dap->da_stateda_list.wk_state & DIRCHG0x0080)
3374 ep->d_ino = dap->da_previousda_un.dau_previous->dm_oldinum;
3375 else
3376 ep->d_ino = 0;
3377 dap->da_stateda_list.wk_state &= ~ATTACHED0x0001;
3378 dap->da_stateda_list.wk_state |= UNDONE0x0002;
3379 }
3380 }
3381}
3382
3383/*
3384 * Called from within the procedure above to deal with unsatisfied
3385 * allocation dependencies in an inodeblock. The buffer must be
3386 * locked, thus, no I/O completion operations can occur while we
3387 * are manipulating its associated dependencies.
3388 */
3389/* The inode block */
3390STATIC void
3391initiate_write_inodeblock_ufs1(struct inodedep *inodedep, struct buf *bp)
3392{
3393 struct allocdirect *adp, *lastadp;
3394 struct ufs1_dinode *dp;
3395 struct fs *fs;
3396#ifdef DIAGNOSTIC1
3397 daddr_t prevlbn = 0;
3398 int32_t d1, d2;
3399#endif
3400 int i, deplist;
3401
3402 if (inodedep->id_stateid_list.wk_state & IOSTARTED0x0200) {
3403 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
3404 panic("initiate_write_inodeblock: already started");
3405 }
3406 inodedep->id_stateid_list.wk_state |= IOSTARTED0x0200;
3407 fs = inodedep->id_fs;
3408 dp = (struct ufs1_dinode *)bp->b_data +
3409 ino_to_fsbo(fs, inodedep->id_ino)((inodedep->id_ino) % ((fs)->fs_inopb));
3410 /*
3411 * If the bitmap is not yet written, then the allocated
3412 * inode cannot be written to disk.
3413 */
3414 if ((inodedep->id_stateid_list.wk_state & DEPCOMPLETE0x0008) == 0) {
3415 if (inodedep->id_savedino1id_un.idu_savedino1 != NULL((void *)0)) {
3416 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
3417 panic("initiate_write_inodeblock: already doing I/O");
3418 }
3419 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
3420 inodedep->id_savedino1id_un.idu_savedino1 = malloc(sizeof(struct ufs1_dinode),
3421 M_INODEDEP79, M_WAITOK0x0001);
3422 inodedep->id_unsize = sizeof(struct ufs1_dinode);
3423 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x3);
3424 *inodedep->id_savedino1id_un.idu_savedino1 = *dp;
3425 memset(dp, 0, sizeof(struct ufs1_dinode))__builtin_memset((dp), (0), (sizeof(struct ufs1_dinode)));
3426 return;
3427 }
3428 /*
3429 * If no dependencies, then there is nothing to roll back.
3430 */
3431 inodedep->id_savedsize = dp->di_size;
3432 if (TAILQ_FIRST(&inodedep->id_inoupdt)((&inodedep->id_inoupdt)->tqh_first) == NULL((void *)0))
3433 return;
3434 /*
3435 * Set the dependencies to busy.
3436 */
3437 for (deplist = 0, adp = TAILQ_FIRST(&inodedep->id_inoupdt)((&inodedep->id_inoupdt)->tqh_first); adp;
3438 adp = TAILQ_NEXT(adp, ad_next)((adp)->ad_next.tqe_next)) {
3439#ifdef DIAGNOSTIC1
3440 if (deplist != 0 && prevlbn >= adp->ad_lbn) {
3441 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
3442 panic("softdep_write_inodeblock: lbn order");
3443 }
3444 prevlbn = adp->ad_lbn;
3445 if (adp->ad_lbn < NDADDR12 &&
3446 (d1 = dp->di_db[adp->ad_lbn]) != (d2 = adp->ad_newblkno)) {
3447 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
3448 panic("%s: direct pointer #%lld mismatch %d != %d",
3449 "softdep_write_inodeblock", (long long)adp->ad_lbn,
3450 d1, d2);
3451 }
3452 if (adp->ad_lbn >= NDADDR12 &&
3453 (d1 = dp->di_ib[adp->ad_lbn - NDADDR12]) !=
3454 (d2 = adp->ad_newblkno)) {
3455 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
3456 panic("%s: indirect pointer #%lld mismatch %d != %d",
3457 "softdep_write_inodeblock", (long long)(adp->ad_lbn -
3458 NDADDR12), d1, d2);
3459 }
3460 deplist |= 1 << adp->ad_lbn;
3461 if ((adp->ad_statead_list.wk_state & ATTACHED0x0001) == 0) {
3462 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
3463 panic("softdep_write_inodeblock: Unknown state 0x%x",
3464 adp->ad_statead_list.wk_state);
3465 }
3466#endif /* DIAGNOSTIC */
3467 adp->ad_statead_list.wk_state &= ~ATTACHED0x0001;
3468 adp->ad_statead_list.wk_state |= UNDONE0x0002;
3469 }
3470 /*
3471 * The on-disk inode cannot claim to be any larger than the last
3472 * fragment that has been written. Otherwise, the on-disk inode
3473 * might have fragments that were not the last block in the file
3474 * which would corrupt the filesystem.
3475 */
3476 for (lastadp = NULL((void *)0), adp = TAILQ_FIRST(&inodedep->id_inoupdt)((&inodedep->id_inoupdt)->tqh_first); adp;
3477 lastadp = adp, adp = TAILQ_NEXT(adp, ad_next)((adp)->ad_next.tqe_next)) {
3478 if (adp->ad_lbn >= NDADDR12)
3479 break;
3480 dp->di_db[adp->ad_lbn] = adp->ad_oldblkno;
3481 /* keep going until hitting a rollback to a frag */
3482 if (adp->ad_oldsize == 0 || adp->ad_oldsize == fs->fs_bsize)
3483 continue;
3484 dp->di_size = fs->fs_bsize * adp->ad_lbn + adp->ad_oldsize;
3485 for (i = adp->ad_lbn + 1; i < NDADDR12; i++) {
3486#ifdef DIAGNOSTIC1
3487 if (dp->di_db[i] != 0 && (deplist & (1 << i)) == 0) {
3488 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
3489 panic("softdep_write_inodeblock: lost dep1");
3490 }
3491#endif /* DIAGNOSTIC */
3492 dp->di_db[i] = 0;
3493 }
3494 for (i = 0; i < NIADDR3; i++) {
3495#ifdef DIAGNOSTIC1
3496 if (dp->di_ib[i] != 0 &&
3497 (deplist & ((1 << NDADDR12) << i)) == 0) {
3498 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
3499 panic("softdep_write_inodeblock: lost dep2");
3500 }
3501#endif /* DIAGNOSTIC */
3502 dp->di_ib[i] = 0;
3503 }
3504 return;
3505 }
3506 /*
3507 * If we have zero'ed out the last allocated block of the file,
3508 * roll back the size to the last currently allocated block.
3509 * We know that this last allocated block is a full-sized as
3510 * we already checked for fragments in the loop above.
3511 */
3512 if (lastadp != NULL((void *)0) &&
3513 dp->di_size <= (lastadp->ad_lbn + 1) * fs->fs_bsize) {
3514 for (i = lastadp->ad_lbn; i >= 0; i--)
3515 if (dp->di_db[i] != 0)
3516 break;
3517 dp->di_size = (i + 1) * fs->fs_bsize;
3518 }
3519 /*
3520 * The only dependencies are for indirect blocks.
3521 *
3522 * The file size for indirect block additions is not guaranteed.
3523 * Such a guarantee would be non-trivial to achieve. The conventional
3524 * synchronous write implementation also does not make this guarantee.
3525 * Fsck should catch and fix discrepancies. Arguably, the file size
3526 * can be over-estimated without destroying integrity when the file
3527 * moves into the indirect blocks (i.e., is large). If we want to
3528 * postpone fsck, we are stuck with this argument.
3529 */
3530 for (; adp; adp = TAILQ_NEXT(adp, ad_next)((adp)->ad_next.tqe_next))
3531 dp->di_ib[adp->ad_lbn - NDADDR12] = 0;
3532}
3533
3534#ifdef FFS21
3535/*
3536 * Version of initiate_write_inodeblock that handles FFS2 dinodes.
3537 */
3538/* The inode block */
3539STATIC void
3540initiate_write_inodeblock_ufs2(struct inodedep *inodedep, struct buf *bp)
3541{
3542 struct allocdirect *adp, *lastadp;
3543 struct ufs2_dinode *dp;
3544 struct fs *fs = inodedep->id_fs;
3545#ifdef DIAGNOSTIC1
3546 daddr_t prevlbn = -1, d1, d2;
3547#endif
3548 int deplist, i;
3549
3550 if (inodedep->id_stateid_list.wk_state & IOSTARTED0x0200)
3551 panic("initiate_write_inodeblock_ufs2: already started");
3552 inodedep->id_stateid_list.wk_state |= IOSTARTED0x0200;
3553 fs = inodedep->id_fs;
3554 dp = (struct ufs2_dinode *)bp->b_data +
3555 ino_to_fsbo(fs, inodedep->id_ino)((inodedep->id_ino) % ((fs)->fs_inopb));
3556 /*
3557 * If the bitmap is not yet written, then the allocated
3558 * inode cannot be written to disk.
3559 */
3560 if ((inodedep->id_stateid_list.wk_state & DEPCOMPLETE0x0008) == 0) {
3561 if (inodedep->id_savedino2id_un.idu_savedino2 != NULL((void *)0))
3562 panic("initiate_write_inodeblock_ufs2: I/O underway");
3563 inodedep->id_savedino2id_un.idu_savedino2 = malloc(sizeof(struct ufs2_dinode),
3564 M_INODEDEP79, M_WAITOK0x0001);
3565 inodedep->id_unsize = sizeof(struct ufs2_dinode);
3566 *inodedep->id_savedino2id_un.idu_savedino2 = *dp;
3567 memset(dp, 0, sizeof(struct ufs2_dinode))__builtin_memset((dp), (0), (sizeof(struct ufs2_dinode)));
3568 return;
3569 }
3570 /*
3571 * If no dependencies, then there is nothing to roll back.
3572 */
3573 inodedep->id_savedsize = dp->di_size;
3574 if (TAILQ_FIRST(&inodedep->id_inoupdt)((&inodedep->id_inoupdt)->tqh_first) == NULL((void *)0))
3575 return;
3576
3577#ifdef notyet
3578 inodedep->id_savedextsize = dp->di_extsize;
3579 if (TAILQ_FIRST(&inodedep->id_inoupdt)((&inodedep->id_inoupdt)->tqh_first) == NULL((void *)0) &&
3580 TAILQ_FIRST(&inodedep->id_extupdt)((&inodedep->id_extupdt)->tqh_first) == NULL((void *)0))
3581 return;
3582 /*
3583 * Set the ext data dependencies to busy.
3584 */
3585 for (deplist = 0, adp = TAILQ_FIRST(&inodedep->id_extupdt)((&inodedep->id_extupdt)->tqh_first); adp;
3586 adp = TAILQ_NEXT(adp, ad_next)((adp)->ad_next.tqe_next)) {
3587#ifdef DIAGNOSTIC1
3588 if (deplist != 0 && prevlbn >= adp->ad_lbn) {
3589 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
3590 panic("softdep_write_inodeblock: lbn order");
3591 }
3592 prevlbn = adp->ad_lbn;
3593 if ((d1 = dp->di_extb[adp->ad_lbn]) !=
3594 (d2 = adp->ad_newblkno)) {
3595 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
3596 panic("%s: direct pointer #%lld mismatch %lld != %lld",
3597 "softdep_write_inodeblock", (long long)adp->ad_lbn,
3598 d1, d2);
3599 }
3600 deplist |= 1 << adp->ad_lbn;
3601 if ((adp->ad_statead_list.wk_state & ATTACHED0x0001) == 0) {
3602 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
3603 panic("softdep_write_inodeblock: Unknown state 0x%x",
3604 adp->ad_statead_list.wk_state);
3605 }
3606#endif /* DIAGNOSTIC */
3607 adp->ad_statead_list.wk_state &= ~ATTACHED0x0001;
3608 adp->ad_statead_list.wk_state |= UNDONE0x0002;
3609 }
3610 /*
3611 * The on-disk inode cannot claim to be any larger than the last
3612 * fragment that has been written. Otherwise, the on-disk inode
3613 * might have fragments that were not the last block in the ext
3614 * data which would corrupt the filesystem.
3615 */
3616 for (lastadp = NULL((void *)0), adp = TAILQ_FIRST(&inodedep->id_extupdt)((&inodedep->id_extupdt)->tqh_first); adp;
3617 lastadp = adp, adp = TAILQ_NEXT(adp, ad_next)((adp)->ad_next.tqe_next)) {
3618 dp->di_extb[adp->ad_lbn] = adp->ad_oldblkno;
3619 /* keep going until hitting a rollback to a frag */
3620 if (adp->ad_oldsize == 0 || adp->ad_oldsize == fs->fs_bsize)
3621 continue;
3622 dp->di_extsize = fs->fs_bsize * adp->ad_lbn + adp->ad_oldsize;
3623 for (i = adp->ad_lbn + 1; i < NXADDR2; i++) {
3624#ifdef DIAGNOSTIC1
3625 if (dp->di_extb[i] != 0 && (deplist & (1 << i)) == 0) {
3626 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
3627 panic("softdep_write_inodeblock: lost dep1");
3628 }
3629#endif /* DIAGNOSTIC */
3630 dp->di_extb[i] = 0;
3631 }
3632 lastadp = NULL((void *)0);
3633 break;
3634 }
3635 /*
3636 * If we have zero'ed out the last allocated block of the ext
3637 * data, roll back the size to the last currently allocated block.
3638 * We know that this last allocated block is a full-sized as
3639 * we already checked for fragments in the loop above.
3640 */
3641 if (lastadp != NULL((void *)0) &&
3642 dp->di_extsize <= (lastadp->ad_lbn + 1) * fs->fs_bsize) {
3643 for (i = lastadp->ad_lbn; i >= 0; i--)
3644 if (dp->di_extb[i] != 0)
3645 break;
3646 dp->di_extsize = (i + 1) * fs->fs_bsize;
3647 }
3648#endif /* notyet */
3649
3650 /*
3651 * Set the file data dependencies to busy.
3652 */
3653 for (deplist = 0, adp = TAILQ_FIRST(&inodedep->id_inoupdt)((&inodedep->id_inoupdt)->tqh_first); adp;
3654 adp = TAILQ_NEXT(adp, ad_next)((adp)->ad_next.tqe_next)) {
3655#ifdef DIAGNOSTIC1
3656 if (deplist != 0 && prevlbn >= adp->ad_lbn) {
3657 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
3658 panic("softdep_write_inodeblock: lbn order");
3659 }
3660 prevlbn = adp->ad_lbn;
3661 if (adp->ad_lbn < NDADDR12 &&
3662 (d1 = dp->di_db[adp->ad_lbn]) != (d2 = adp->ad_newblkno)) {
3663 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
3664 panic("%s: direct pointer #%lld mismatch %lld != %lld",
3665 "softdep_write_inodeblock", (long long)adp->ad_lbn,
3666 d1, d2);
3667 }
3668 if (adp->ad_lbn >= NDADDR12 &&
3669 (d1 = dp->di_ib[adp->ad_lbn - NDADDR12]) !=
3670 (d2 = adp->ad_newblkno)) {
3671 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
3672 panic("%s: indirect pointer #%lld mismatch %lld != %lld",
3673 "softdep_write_inodeblock", (long long)(adp->ad_lbn -
3674 NDADDR12), d1, d2);
3675 }
3676 deplist |= 1 << adp->ad_lbn;
3677 if ((adp->ad_statead_list.wk_state & ATTACHED0x0001) == 0) {
3678 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
3679 panic("softdep_write_inodeblock: Unknown state 0x%x",
3680 adp->ad_statead_list.wk_state);
3681 }
3682#endif /* DIAGNOSTIC */
3683 adp->ad_statead_list.wk_state &= ~ATTACHED0x0001;
3684 adp->ad_statead_list.wk_state |= UNDONE0x0002;
3685 }
3686 /*
3687 * The on-disk inode cannot claim to be any larger than the last
3688 * fragment that has been written. Otherwise, the on-disk inode
3689 * might have fragments that were not the last block in the file
3690 * which would corrupt the filesystem.
3691 */
3692 for (lastadp = NULL((void *)0), adp = TAILQ_FIRST(&inodedep->id_inoupdt)((&inodedep->id_inoupdt)->tqh_first); adp;
3693 lastadp = adp, adp = TAILQ_NEXT(adp, ad_next)((adp)->ad_next.tqe_next)) {
3694 if (adp->ad_lbn >= NDADDR12)
3695 break;
3696 dp->di_db[adp->ad_lbn] = adp->ad_oldblkno;
3697 /* keep going until hitting a rollback to a frag */
3698 if (adp->ad_oldsize == 0 || adp->ad_oldsize == fs->fs_bsize)
3699 continue;
3700 dp->di_size = fs->fs_bsize * adp->ad_lbn + adp->ad_oldsize;
3701 for (i = adp->ad_lbn + 1; i < NDADDR12; i++) {
3702#ifdef DIAGNOSTIC1
3703 if (dp->di_db[i] != 0 && (deplist & (1 << i)) == 0) {
3704 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
3705 panic("softdep_write_inodeblock: lost dep2");
3706 }
3707#endif /* DIAGNOSTIC */
3708 dp->di_db[i] = 0;
3709 }
3710 for (i = 0; i < NIADDR3; i++) {
3711#ifdef DIAGNOSTIC1
3712 if (dp->di_ib[i] != 0 &&
3713 (deplist & ((1 << NDADDR12) << i)) == 0) {
3714 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
3715 panic("softdep_write_inodeblock: lost dep3");
3716 }
3717#endif /* DIAGNOSTIC */
3718 dp->di_ib[i] = 0;
3719 }
3720 return;
3721 }
3722 /*
3723 * If we have zero'ed out the last allocated block of the file,
3724 * roll back the size to the last currently allocated block.
3725 * We know that this last allocated block is a full-sized as
3726 * we already checked for fragments in the loop above.
3727 */
3728 if (lastadp != NULL((void *)0) &&
3729 dp->di_size <= (lastadp->ad_lbn + 1) * fs->fs_bsize) {
3730 for (i = lastadp->ad_lbn; i >= 0; i--)
3731 if (dp->di_db[i] != 0)
3732 break;
3733 dp->di_size = (i + 1) * fs->fs_bsize;
3734 }
3735 /*
3736 * The only dependencies are for indirect blocks.
3737 *
3738 * The file size for indirect block additions is not guaranteed.
3739 * Such a guarantee would be non-trivial to achieve. The conventional
3740 * synchronous write implementation also does not make this guarantee.
3741 * Fsck should catch and fix discrepancies. Arguably, the file size
3742 * can be over-estimated without destroying integrity when the file
3743 * moves into the indirect blocks (i.e., is large). If we want to
3744 * postpone fsck, we are stuck with this argument.
3745 */
3746 for (; adp; adp = TAILQ_NEXT(adp, ad_next)((adp)->ad_next.tqe_next))
3747 dp->di_ib[adp->ad_lbn - NDADDR12] = 0;
3748}
3749#endif /* FFS2 */
3750
3751/*
3752 * This routine is called during the completion interrupt
3753 * service routine for a disk write (from the procedure called
3754 * by the device driver to inform the file system caches of
3755 * a request completion). It should be called early in this
3756 * procedure, before the block is made available to other
3757 * processes or other routines are called.
3758 */
3759/* describes the completed disk write */
3760void
3761softdep_disk_write_complete(struct buf *bp)
3762{
3763 struct worklist *wk;
3764 struct workhead reattach;
3765 struct newblk *newblk;
3766 struct allocindir *aip;
3767 struct allocdirect *adp;
3768 struct indirdep *indirdep;
3769 struct inodedep *inodedep;
3770 struct bmsafemap *bmsafemap;
3771
3772 /*
3773 * If an error occurred while doing the write, then the data
3774 * has not hit the disk and the dependencies cannot be unrolled.
3775 */
3776 if ((bp->b_flags & B_ERROR0x00000400) && !(bp->b_flags & B_INVAL0x00000800))
3777 return;
3778
3779#ifdef DEBUG
3780 if (lk.lkt_held != -1)
3781 panic("softdep_disk_write_complete: lock is held");
3782 lk.lkt_held = -2;
3783#endif
3784 LIST_INIT(&reattach)do { ((&reattach)->lh_first) = ((void *)0); } while (0
)
;
3785 while ((wk = LIST_FIRST(&bp->b_dep)((&bp->b_dep)->lh_first)) != NULL((void *)0)) {
3786 WORKLIST_REMOVE(wk)do { (wk)->wk_state &= ~0x8000; do { if ((wk)->wk_list
.le_next != ((void *)0)) (wk)->wk_list.le_next->wk_list
.le_prev = (wk)->wk_list.le_prev; *(wk)->wk_list.le_prev
= (wk)->wk_list.le_next; ((wk)->wk_list.le_prev) = ((void
*)-1); ((wk)->wk_list.le_next) = ((void *)-1); } while (0
); } while (0)
;
3787 switch (wk->wk_type) {
3788
3789 case D_PAGEDEP0:
3790 if (handle_written_filepage(WK_PAGEDEP(wk)((struct pagedep *)(wk)), bp))
3791 WORKLIST_INSERT(&reattach, wk)do { (wk)->wk_state |= 0x8000; do { if (((wk)->wk_list.
le_next = (&reattach)->lh_first) != ((void *)0)) (&
reattach)->lh_first->wk_list.le_prev = &(wk)->wk_list
.le_next; (&reattach)->lh_first = (wk); (wk)->wk_list
.le_prev = &(&reattach)->lh_first; } while (0); } while
(0)
;
3792 continue;
3793
3794 case D_INODEDEP1:
3795 if (handle_written_inodeblock(WK_INODEDEP(wk)((struct inodedep *)(wk)), bp))
3796 WORKLIST_INSERT(&reattach, wk)do { (wk)->wk_state |= 0x8000; do { if (((wk)->wk_list.
le_next = (&reattach)->lh_first) != ((void *)0)) (&
reattach)->lh_first->wk_list.le_prev = &(wk)->wk_list
.le_next; (&reattach)->lh_first = (wk); (wk)->wk_list
.le_prev = &(&reattach)->lh_first; } while (0); } while
(0)
;
3797 continue;
3798
3799 case D_BMSAFEMAP3:
3800 bmsafemap = WK_BMSAFEMAP(wk)((struct bmsafemap *)(wk));
3801 while ((newblk = LIST_FIRST(&bmsafemap->sm_newblkhd)((&bmsafemap->sm_newblkhd)->lh_first))) {
3802 newblk->nb_state |= DEPCOMPLETE0x0008;
3803 newblk->nb_bmsafemap = NULL((void *)0);
3804 LIST_REMOVE(newblk, nb_deps)do { if ((newblk)->nb_deps.le_next != ((void *)0)) (newblk
)->nb_deps.le_next->nb_deps.le_prev = (newblk)->nb_deps
.le_prev; *(newblk)->nb_deps.le_prev = (newblk)->nb_deps
.le_next; ((newblk)->nb_deps.le_prev) = ((void *)-1); ((newblk
)->nb_deps.le_next) = ((void *)-1); } while (0)
;
3805 }
3806 while ((adp =
3807 LIST_FIRST(&bmsafemap->sm_allocdirecthd)((&bmsafemap->sm_allocdirecthd)->lh_first))) {
3808 adp->ad_statead_list.wk_state |= DEPCOMPLETE0x0008;
3809 adp->ad_buf = NULL((void *)0);
3810 LIST_REMOVE(adp, ad_deps)do { if ((adp)->ad_deps.le_next != ((void *)0)) (adp)->
ad_deps.le_next->ad_deps.le_prev = (adp)->ad_deps.le_prev
; *(adp)->ad_deps.le_prev = (adp)->ad_deps.le_next; ((adp
)->ad_deps.le_prev) = ((void *)-1); ((adp)->ad_deps.le_next
) = ((void *)-1); } while (0)
;
3811 handle_allocdirect_partdone(adp);
3812 }
3813 while ((aip =
3814 LIST_FIRST(&bmsafemap->sm_allocindirhd)((&bmsafemap->sm_allocindirhd)->lh_first))) {
3815 aip->ai_stateai_list.wk_state |= DEPCOMPLETE0x0008;
3816 aip->ai_buf = NULL((void *)0);
3817 LIST_REMOVE(aip, ai_deps)do { if ((aip)->ai_deps.le_next != ((void *)0)) (aip)->
ai_deps.le_next->ai_deps.le_prev = (aip)->ai_deps.le_prev
; *(aip)->ai_deps.le_prev = (aip)->ai_deps.le_next; ((aip
)->ai_deps.le_prev) = ((void *)-1); ((aip)->ai_deps.le_next
) = ((void *)-1); } while (0)
;
3818 handle_allocindir_partdone(aip);
3819 }
3820 while ((inodedep =
3821 LIST_FIRST(&bmsafemap->sm_inodedephd)((&bmsafemap->sm_inodedephd)->lh_first)) != NULL((void *)0)) {
3822 inodedep->id_stateid_list.wk_state |= DEPCOMPLETE0x0008;
3823 LIST_REMOVE(inodedep, id_deps)do { if ((inodedep)->id_deps.le_next != ((void *)0)) (inodedep
)->id_deps.le_next->id_deps.le_prev = (inodedep)->id_deps
.le_prev; *(inodedep)->id_deps.le_prev = (inodedep)->id_deps
.le_next; ((inodedep)->id_deps.le_prev) = ((void *)-1); ((
inodedep)->id_deps.le_next) = ((void *)-1); } while (0)
;
3824 inodedep->id_buf = NULL((void *)0);
3825 }
3826 WORKITEM_FREE(bmsafemap, D_BMSAFEMAP)softdep_freequeue_add((struct worklist *)bmsafemap);
3827 continue;
3828
3829 case D_MKDIR11:
3830 handle_written_mkdir(WK_MKDIR(wk)((struct mkdir *)(wk)), MKDIR_BODY0x0020);
3831 continue;
3832
3833 case D_ALLOCDIRECT4:
3834 adp = WK_ALLOCDIRECT(wk)((struct allocdirect *)(wk));
3835 adp->ad_statead_list.wk_state |= COMPLETE0x0004;
3836 handle_allocdirect_partdone(adp);
3837 continue;
3838
3839 case D_ALLOCINDIR6:
3840 aip = WK_ALLOCINDIR(wk)((struct allocindir *)(wk));
3841 aip->ai_stateai_list.wk_state |= COMPLETE0x0004;
3842 handle_allocindir_partdone(aip);
3843 continue;
3844
3845 case D_INDIRDEP5:
3846 indirdep = WK_INDIRDEP(wk)((struct indirdep *)(wk));
3847 if (indirdep->ir_stateir_list.wk_state & GOINGAWAY0x0100)
3848 panic("disk_write_complete: indirdep gone");
3849 memcpy(bp->b_data, indirdep->ir_saveddata, bp->b_bcount)__builtin_memcpy((bp->b_data), (indirdep->ir_saveddata)
, (bp->b_bcount))
;
3850 free(indirdep->ir_saveddata, M_INDIRDEP83, bp->b_bcount);
3851 indirdep->ir_saveddata = NULL((void *)0);
3852 indirdep->ir_stateir_list.wk_state &= ~UNDONE0x0002;
3853 indirdep->ir_stateir_list.wk_state |= ATTACHED0x0001;
3854 while ((aip = LIST_FIRST(&indirdep->ir_donehd)((&indirdep->ir_donehd)->lh_first))) {
3855 handle_allocindir_partdone(aip);
3856 if (aip == LIST_FIRST(&indirdep->ir_donehd)((&indirdep->ir_donehd)->lh_first))
3857 panic("disk_write_complete: not gone");
3858 }
3859 WORKLIST_INSERT(&reattach, wk)do { (wk)->wk_state |= 0x8000; do { if (((wk)->wk_list.
le_next = (&reattach)->lh_first) != ((void *)0)) (&
reattach)->lh_first->wk_list.le_prev = &(wk)->wk_list
.le_next; (&reattach)->lh_first = (wk); (wk)->wk_list
.le_prev = &(&reattach)->lh_first; } while (0); } while
(0)
;
3860 if ((bp->b_flags & B_DELWRI0x00000080) == 0)
3861 stat_indir_blk_ptrs++;
3862 buf_dirty(bp);
3863 continue;
3864
3865 default:
3866 panic("handle_disk_write_complete: Unknown type %s",
3867 TYPENAME(wk->wk_type)((unsigned)(wk->wk_type) <= 13 ? softdep_typenames[wk->
wk_type] : "???")
);
3868 /* NOTREACHED */
3869 }
3870 }
3871 /*
3872 * Reattach any requests that must be redone.
3873 */
3874 while ((wk = LIST_FIRST(&reattach)((&reattach)->lh_first)) != NULL((void *)0)) {
3875 WORKLIST_REMOVE(wk)do { (wk)->wk_state &= ~0x8000; do { if ((wk)->wk_list
.le_next != ((void *)0)) (wk)->wk_list.le_next->wk_list
.le_prev = (wk)->wk_list.le_prev; *(wk)->wk_list.le_prev
= (wk)->wk_list.le_next; ((wk)->wk_list.le_prev) = ((void
*)-1); ((wk)->wk_list.le_next) = ((void *)-1); } while (0
); } while (0)
;
3876 WORKLIST_INSERT(&bp->b_dep, wk)do { (wk)->wk_state |= 0x8000; do { if (((wk)->wk_list.
le_next = (&bp->b_dep)->lh_first) != ((void *)0)) (
&bp->b_dep)->lh_first->wk_list.le_prev = &(wk
)->wk_list.le_next; (&bp->b_dep)->lh_first = (wk
); (wk)->wk_list.le_prev = &(&bp->b_dep)->lh_first
; } while (0); } while (0)
;
3877 }
3878#ifdef DEBUG
3879 if (lk.lkt_held != -2)
3880 panic("softdep_disk_write_complete: lock lost");
3881 lk.lkt_held = -1;
3882#endif
3883}
3884
3885/*
3886 * Called from within softdep_disk_write_complete above. Note that
3887 * this routine is always called from interrupt level with further
3888 * splbio interrupts blocked.
3889 */
3890/* the completed allocdirect */
3891STATIC void
3892handle_allocdirect_partdone(struct allocdirect *adp)
3893{
3894 struct allocdirect *listadp;
3895 struct inodedep *inodedep;
3896 long bsize, delay;
3897
3898 splassert(IPL_BIO)do { if (splassert_ctl > 0) { splassert_check(0x3, __func__
); } } while (0)
;
3899
3900 if ((adp->ad_statead_list.wk_state & ALLCOMPLETE(0x0001 | 0x0004 | 0x0008)) != ALLCOMPLETE(0x0001 | 0x0004 | 0x0008))
3901 return;
3902 if (adp->ad_buf != NULL((void *)0))
3903 panic("handle_allocdirect_partdone: dangling dep");
3904
3905 /*
3906 * The on-disk inode cannot claim to be any larger than the last
3907 * fragment that has been written. Otherwise, the on-disk inode
3908 * might have fragments that were not the last block in the file
3909 * which would corrupt the filesystem. Thus, we cannot free any
3910 * allocdirects after one whose ad_oldblkno claims a fragment as
3911 * these blocks must be rolled back to zero before writing the inode.
3912 * We check the currently active set of allocdirects in id_inoupdt.
3913 */
3914 inodedep = adp->ad_inodedep;
3915 bsize = inodedep->id_fs->fs_bsize;
3916 TAILQ_FOREACH(listadp, &inodedep->id_inoupdt, ad_next)for((listadp) = ((&inodedep->id_inoupdt)->tqh_first
); (listadp) != ((void *)0); (listadp) = ((listadp)->ad_next
.tqe_next))
{
3917 /* found our block */
3918 if (listadp == adp)
3919 break;
3920 /* continue if ad_oldlbn is not a fragment */
3921 if (listadp->ad_oldsize == 0 ||
3922 listadp->ad_oldsize == bsize)
3923 continue;
3924 /* hit a fragment */
3925 return;
3926 }
3927 /*
3928 * If we have reached the end of the current list without
3929 * finding the just finished dependency, then it must be
3930 * on the future dependency list. Future dependencies cannot
3931 * be freed until they are moved to the current list.
3932 */
3933 if (listadp == NULL((void *)0)) {
3934#ifdef DEBUG
3935 TAILQ_FOREACH(listadp, &inodedep->id_newinoupdt, ad_next)for((listadp) = ((&inodedep->id_newinoupdt)->tqh_first
); (listadp) != ((void *)0); (listadp) = ((listadp)->ad_next
.tqe_next))
3936 /* found our block */
3937 if (listadp == adp)
3938 break;
3939 if (listadp == NULL((void *)0))
3940 panic("handle_allocdirect_partdone: lost dep");
3941#endif /* DEBUG */
3942 return;
3943 }
3944 /*
3945 * If we have found the just finished dependency, then free
3946 * it along with anything that follows it that is complete.
3947 * If the inode still has a bitmap dependency, then it has
3948 * never been written to disk, hence the on-disk inode cannot
3949 * reference the old fragment so we can free it without delay.
3950 */
3951 delay = (inodedep->id_stateid_list.wk_state & DEPCOMPLETE0x0008);
3952 for (; adp; adp = listadp) {
3953 listadp = TAILQ_NEXT(adp, ad_next)((adp)->ad_next.tqe_next);
3954 if ((adp->ad_statead_list.wk_state & ALLCOMPLETE(0x0001 | 0x0004 | 0x0008)) != ALLCOMPLETE(0x0001 | 0x0004 | 0x0008))
3955 return;
3956 free_allocdirect(&inodedep->id_inoupdt, adp, delay);
3957 }
3958}
3959
3960/*
3961 * Called from within softdep_disk_write_complete above. Note that
3962 * this routine is always called from interrupt level with further
3963 * splbio interrupts blocked.
3964 */
3965/* the completed allocindir */
3966STATIC void
3967handle_allocindir_partdone(struct allocindir *aip)
3968{
3969 struct indirdep *indirdep;
3970
3971 splassert(IPL_BIO)do { if (splassert_ctl > 0) { splassert_check(0x3, __func__
); } } while (0)
;
3972
3973 if ((aip->ai_stateai_list.wk_state & ALLCOMPLETE(0x0001 | 0x0004 | 0x0008)) != ALLCOMPLETE(0x0001 | 0x0004 | 0x0008))
3974 return;
3975 if (aip->ai_buf != NULL((void *)0))
3976 panic("handle_allocindir_partdone: dangling dependency");
3977 indirdep = aip->ai_indirdep;
3978 if (indirdep->ir_stateir_list.wk_state & UNDONE0x0002) {
3979 LIST_REMOVE(aip, ai_next)do { if ((aip)->ai_next.le_next != ((void *)0)) (aip)->
ai_next.le_next->ai_next.le_prev = (aip)->ai_next.le_prev
; *(aip)->ai_next.le_prev = (aip)->ai_next.le_next; ((aip
)->ai_next.le_prev) = ((void *)-1); ((aip)->ai_next.le_next
) = ((void *)-1); } while (0)
;
3980 LIST_INSERT_HEAD(&indirdep->ir_donehd, aip, ai_next)do { if (((aip)->ai_next.le_next = (&indirdep->ir_donehd
)->lh_first) != ((void *)0)) (&indirdep->ir_donehd)
->lh_first->ai_next.le_prev = &(aip)->ai_next.le_next
; (&indirdep->ir_donehd)->lh_first = (aip); (aip)->
ai_next.le_prev = &(&indirdep->ir_donehd)->lh_first
; } while (0)
;
3981 return;
3982 }
3983 if (indirdep->ir_stateir_list.wk_state & UFS1FMT0x2000)
3984 ((int32_t *)indirdep->ir_savebp->b_data)[aip->ai_offset] =
3985 aip->ai_newblkno;
3986 else
3987 ((int64_t *)indirdep->ir_savebp->b_data)[aip->ai_offset] =
3988 aip->ai_newblkno;
3989 LIST_REMOVE(aip, ai_next)do { if ((aip)->ai_next.le_next != ((void *)0)) (aip)->
ai_next.le_next->ai_next.le_prev = (aip)->ai_next.le_prev
; *(aip)->ai_next.le_prev = (aip)->ai_next.le_next; ((aip
)->ai_next.le_prev) = ((void *)-1); ((aip)->ai_next.le_next
) = ((void *)-1); } while (0)
;
3990 if (aip->ai_freefrag != NULL((void *)0))
3991 add_to_worklist(&aip->ai_freefrag->ff_list);
3992 WORKITEM_FREE(aip, D_ALLOCINDIR)softdep_freequeue_add((struct worklist *)aip);
3993}
3994
3995/*
3996 * Called from within softdep_disk_write_complete above to restore
3997 * in-memory inode block contents to their most up-to-date state. Note
3998 * that this routine is always called from interrupt level with further
3999 * splbio interrupts blocked.
4000 */
4001/* buffer containing the inode block */
4002STATIC int
4003handle_written_inodeblock(struct inodedep *inodedep, struct buf *bp)
4004{
4005 struct worklist *wk, *filefree;
4006 struct allocdirect *adp, *nextadp;
4007 struct ufs1_dinode *dp1 = NULL((void *)0);
4008 struct ufs2_dinode *dp2 = NULL((void *)0);
4009 int hadchanges, fstype;
4010
4011 splassert(IPL_BIO)do { if (splassert_ctl > 0) { splassert_check(0x3, __func__
); } } while (0)
;
4012
4013 if ((inodedep->id_stateid_list.wk_state & IOSTARTED0x0200) == 0)
4014 panic("handle_written_inodeblock: not started");
4015 inodedep->id_stateid_list.wk_state &= ~IOSTARTED0x0200;
4016
4017 if (inodedep->id_fs->fs_magic == FS_UFS1_MAGIC0x011954) {
4018 fstype = UM_UFS11;
4019 dp1 = (struct ufs1_dinode *) bp->b_data +
4020 ino_to_fsbo(inodedep->id_fs, inodedep->id_ino)((inodedep->id_ino) % ((inodedep->id_fs)->fs_inopb));
4021 } else {
4022 fstype = UM_UFS22;
4023 dp2 = (struct ufs2_dinode *) bp->b_data +
4024 ino_to_fsbo(inodedep->id_fs, inodedep->id_ino)((inodedep->id_ino) % ((inodedep->id_fs)->fs_inopb));
4025 }
4026
4027 /*
4028 * If we had to rollback the inode allocation because of
4029 * bitmaps being incomplete, then simply restore it.
4030 * Keep the block dirty so that it will not be reclaimed until
4031 * all associated dependencies have been cleared and the
4032 * corresponding updates written to disk.
4033 */
4034 if (inodedep->id_savedino1id_un.idu_savedino1 != NULL((void *)0)) {
4035 if (fstype == UM_UFS11)
4036 *dp1 = *inodedep->id_savedino1id_un.idu_savedino1;
4037 else
4038 *dp2 = *inodedep->id_savedino2id_un.idu_savedino2;
4039 free(inodedep->id_savedino1id_un.idu_savedino1, M_INODEDEP79, inodedep->id_unsize);
4040 inodedep->id_savedino1id_un.idu_savedino1 = NULL((void *)0);
4041 if ((bp->b_flags & B_DELWRI0x00000080) == 0)
4042 stat_inode_bitmap++;
4043 buf_dirty(bp);
4044 return (1);
4045 }
4046 inodedep->id_stateid_list.wk_state |= COMPLETE0x0004;
4047 /*
4048 * Roll forward anything that had to be rolled back before
4049 * the inode could be updated.
4050 */
4051 hadchanges = 0;
4052 for (adp = TAILQ_FIRST(&inodedep->id_inoupdt)((&inodedep->id_inoupdt)->tqh_first); adp; adp = nextadp) {
4053 nextadp = TAILQ_NEXT(adp, ad_next)((adp)->ad_next.tqe_next);
4054 if (adp->ad_statead_list.wk_state & ATTACHED0x0001)
4055 panic("handle_written_inodeblock: new entry");
4056 if (fstype == UM_UFS11) {
4057 if (adp->ad_lbn < NDADDR12) {
4058 if (dp1->di_db[adp->ad_lbn] != adp->ad_oldblkno)
4059 panic("%s: %s #%lld mismatch %d != "
4060 "%lld",
4061 "handle_written_inodeblock",
4062 "direct pointer",
4063 (long long)adp->ad_lbn,
4064 dp1->di_db[adp->ad_lbn],
4065 (long long)adp->ad_oldblkno);
4066 dp1->di_db[adp->ad_lbn] = adp->ad_newblkno;
4067 } else {
4068 if (dp1->di_ib[adp->ad_lbn - NDADDR12] != 0)
4069 panic("%s: %s #%lld allocated as %d",
4070 "handle_written_inodeblock",
4071 "indirect pointer",
4072 (long long)(adp->ad_lbn - NDADDR12),
4073 dp1->di_ib[adp->ad_lbn - NDADDR12]);
4074 dp1->di_ib[adp->ad_lbn - NDADDR12] =
4075 adp->ad_newblkno;
4076 }
4077 } else {
4078 if (adp->ad_lbn < NDADDR12) {
4079 if (dp2->di_db[adp->ad_lbn] != adp->ad_oldblkno)
4080 panic("%s: %s #%lld mismatch %lld != "
4081 "%lld", "handle_written_inodeblock",
4082 "direct pointer",
4083 (long long)adp->ad_lbn,
4084 dp2->di_db[adp->ad_lbn],
4085 (long long)adp->ad_oldblkno);
4086 dp2->di_db[adp->ad_lbn] = adp->ad_newblkno;
4087 } else {
4088 if (dp2->di_ib[adp->ad_lbn - NDADDR12] != 0)
4089 panic("%s: %s #%lld allocated as %lld",
4090 "handle_written_inodeblock",
4091 "indirect pointer",
4092 (long long)(adp->ad_lbn - NDADDR12),
4093 dp2->di_ib[adp->ad_lbn - NDADDR12]);
4094 dp2->di_ib[adp->ad_lbn - NDADDR12] =
4095 adp->ad_newblkno;
4096 }
4097 }
4098 adp->ad_statead_list.wk_state &= ~UNDONE0x0002;
4099 adp->ad_statead_list.wk_state |= ATTACHED0x0001;
4100 hadchanges = 1;
4101 }
4102 if (hadchanges && (bp->b_flags & B_DELWRI0x00000080) == 0)
4103 stat_direct_blk_ptrs++;
4104 /*
4105 * Reset the file size to its most up-to-date value.
4106 */
4107 if (inodedep->id_savedsize == -1)
4108 panic("handle_written_inodeblock: bad size");
4109
4110 if (fstype == UM_UFS11) {
4111 if (dp1->di_size != inodedep->id_savedsize) {
4112 dp1->di_size = inodedep->id_savedsize;
4113 hadchanges = 1;
4114 }
4115 } else {
4116 if (dp2->di_size != inodedep->id_savedsize) {
4117 dp2->di_size = inodedep->id_savedsize;
4118 hadchanges = 1;
4119 }
4120 }
4121 inodedep->id_savedsize = -1;
4122 /*
4123 * If there were any rollbacks in the inode block, then it must be
4124 * marked dirty so that its will eventually get written back in
4125 * its correct form.
4126 */
4127 if (hadchanges)
4128 buf_dirty(bp);
4129 /*
4130 * Process any allocdirects that completed during the update.
4131 */
4132 if ((adp = TAILQ_FIRST(&inodedep->id_inoupdt)((&inodedep->id_inoupdt)->tqh_first)) != NULL((void *)0))
4133 handle_allocdirect_partdone(adp);
4134 /*
4135 * Process deallocations that were held pending until the
4136 * inode had been written to disk. Freeing of the inode
4137 * is delayed until after all blocks have been freed to
4138 * avoid creation of new <vfsid, inum, lbn> triples
4139 * before the old ones have been deleted.
4140 */
4141 filefree = NULL((void *)0);
4142 while ((wk = LIST_FIRST(&inodedep->id_bufwait)((&inodedep->id_bufwait)->lh_first)) != NULL((void *)0)) {
4143 WORKLIST_REMOVE(wk)do { (wk)->wk_state &= ~0x8000; do { if ((wk)->wk_list
.le_next != ((void *)0)) (wk)->wk_list.le_next->wk_list
.le_prev = (wk)->wk_list.le_prev; *(wk)->wk_list.le_prev
= (wk)->wk_list.le_next; ((wk)->wk_list.le_prev) = ((void
*)-1); ((wk)->wk_list.le_next) = ((void *)-1); } while (0
); } while (0)
;
4144 switch (wk->wk_type) {
4145
4146 case D_FREEFILE9:
4147 /*
4148 * We defer adding filefree to the worklist until
4149 * all other additions have been made to ensure
4150 * that it will be done after all the old blocks
4151 * have been freed.
4152 */
4153 if (filefree != NULL((void *)0))
4154 panic("handle_written_inodeblock: filefree");
4155 filefree = wk;
4156 continue;
4157
4158 case D_MKDIR11:
4159 handle_written_mkdir(WK_MKDIR(wk)((struct mkdir *)(wk)), MKDIR_PARENT0x0010);
4160 continue;
4161
4162 case D_DIRADD10:
4163 diradd_inode_written(WK_DIRADD(wk)((struct diradd *)(wk)), inodedep);
4164 continue;
4165
4166 case D_FREEBLKS8:
4167 wk->wk_state |= COMPLETE0x0004;
4168 if ((wk->wk_state & ALLCOMPLETE(0x0001 | 0x0004 | 0x0008)) != ALLCOMPLETE(0x0001 | 0x0004 | 0x0008))
4169 continue;
4170 /* FALLTHROUGH */
4171 case D_FREEFRAG7:
4172 case D_DIRREM12:
4173 add_to_worklist(wk);
4174 continue;
4175
4176 case D_NEWDIRBLK13:
4177 free_newdirblk(WK_NEWDIRBLK(wk)((struct newdirblk *)(wk)));
4178 continue;
4179
4180 default:
4181 panic("handle_written_inodeblock: Unknown type %s",
4182 TYPENAME(wk->wk_type)((unsigned)(wk->wk_type) <= 13 ? softdep_typenames[wk->
wk_type] : "???")
);
4183 /* NOTREACHED */
4184 }
4185 }
4186 if (filefree != NULL((void *)0)) {
4187 if (free_inodedep(inodedep) == 0)
4188 panic("handle_written_inodeblock: live inodedep");
4189 add_to_worklist(filefree);
4190 return (0);
4191 }
4192
4193 /*
4194 * If no outstanding dependencies, free it.
4195 */
4196 if (free_inodedep(inodedep) ||
4197 TAILQ_FIRST(&inodedep->id_inoupdt)((&inodedep->id_inoupdt)->tqh_first) == NULL((void *)0))
4198 return (0);
4199 return (hadchanges);
4200}
4201
4202/*
4203 * Process a diradd entry after its dependent inode has been written.
4204 * This routine must be called with splbio interrupts blocked.
4205 */
4206STATIC void
4207diradd_inode_written(struct diradd *dap, struct inodedep *inodedep)
4208{
4209 struct pagedep *pagedep;
4210
4211 splassert(IPL_BIO)do { if (splassert_ctl > 0) { splassert_check(0x3, __func__
); } } while (0)
;
4212
4213 dap->da_stateda_list.wk_state |= COMPLETE0x0004;
4214 if ((dap->da_stateda_list.wk_state & ALLCOMPLETE(0x0001 | 0x0004 | 0x0008)) == ALLCOMPLETE(0x0001 | 0x0004 | 0x0008)) {
4215 if (dap->da_stateda_list.wk_state & DIRCHG0x0080)
4216 pagedep = dap->da_previousda_un.dau_previous->dm_pagedepdm_un.dmu_pagedep;
4217 else
4218 pagedep = dap->da_pagedepda_un.dau_pagedep;
4219 LIST_REMOVE(dap, da_pdlist)do { if ((dap)->da_pdlist.le_next != ((void *)0)) (dap)->
da_pdlist.le_next->da_pdlist.le_prev = (dap)->da_pdlist
.le_prev; *(dap)->da_pdlist.le_prev = (dap)->da_pdlist.
le_next; ((dap)->da_pdlist.le_prev) = ((void *)-1); ((dap)
->da_pdlist.le_next) = ((void *)-1); } while (0)
;
4220 LIST_INSERT_HEAD(&pagedep->pd_pendinghd, dap, da_pdlist)do { if (((dap)->da_pdlist.le_next = (&pagedep->pd_pendinghd
)->lh_first) != ((void *)0)) (&pagedep->pd_pendinghd
)->lh_first->da_pdlist.le_prev = &(dap)->da_pdlist
.le_next; (&pagedep->pd_pendinghd)->lh_first = (dap
); (dap)->da_pdlist.le_prev = &(&pagedep->pd_pendinghd
)->lh_first; } while (0)
;
4221 }
4222 WORKLIST_INSERT(&inodedep->id_pendinghd, &dap->da_list)do { (&dap->da_list)->wk_state |= 0x8000; do { if (
((&dap->da_list)->wk_list.le_next = (&inodedep->
id_pendinghd)->lh_first) != ((void *)0)) (&inodedep->
id_pendinghd)->lh_first->wk_list.le_prev = &(&dap
->da_list)->wk_list.le_next; (&inodedep->id_pendinghd
)->lh_first = (&dap->da_list); (&dap->da_list
)->wk_list.le_prev = &(&inodedep->id_pendinghd)
->lh_first; } while (0); } while (0)
;
4223}
4224
4225/*
4226 * Handle the completion of a mkdir dependency.
4227 */
4228STATIC void
4229handle_written_mkdir(struct mkdir *mkdir, int type)
4230{
4231 struct diradd *dap;
4232 struct pagedep *pagedep;
4233
4234 splassert(IPL_BIO)do { if (splassert_ctl > 0) { splassert_check(0x3, __func__
); } } while (0)
;
4235
4236 if (mkdir->md_statemd_list.wk_state != type)
4237 panic("handle_written_mkdir: bad type");
4238 dap = mkdir->md_diradd;
4239 dap->da_stateda_list.wk_state &= ~type;
4240 if ((dap->da_stateda_list.wk_state & (MKDIR_PARENT0x0010 | MKDIR_BODY0x0020)) == 0)
4241 dap->da_stateda_list.wk_state |= DEPCOMPLETE0x0008;
4242 if ((dap->da_stateda_list.wk_state & ALLCOMPLETE(0x0001 | 0x0004 | 0x0008)) == ALLCOMPLETE(0x0001 | 0x0004 | 0x0008)) {
4243 if (dap->da_stateda_list.wk_state & DIRCHG0x0080)
4244 pagedep = dap->da_previousda_un.dau_previous->dm_pagedepdm_un.dmu_pagedep;
4245 else
4246 pagedep = dap->da_pagedepda_un.dau_pagedep;
4247 LIST_REMOVE(dap, da_pdlist)do { if ((dap)->da_pdlist.le_next != ((void *)0)) (dap)->
da_pdlist.le_next->da_pdlist.le_prev = (dap)->da_pdlist
.le_prev; *(dap)->da_pdlist.le_prev = (dap)->da_pdlist.
le_next; ((dap)->da_pdlist.le_prev) = ((void *)-1); ((dap)
->da_pdlist.le_next) = ((void *)-1); } while (0)
;
4248 LIST_INSERT_HEAD(&pagedep->pd_pendinghd, dap, da_pdlist)do { if (((dap)->da_pdlist.le_next = (&pagedep->pd_pendinghd
)->lh_first) != ((void *)0)) (&pagedep->pd_pendinghd
)->lh_first->da_pdlist.le_prev = &(dap)->da_pdlist
.le_next; (&pagedep->pd_pendinghd)->lh_first = (dap
); (dap)->da_pdlist.le_prev = &(&pagedep->pd_pendinghd
)->lh_first; } while (0)
;
4249 }
4250 LIST_REMOVE(mkdir, md_mkdirs)do { if ((mkdir)->md_mkdirs.le_next != ((void *)0)) (mkdir
)->md_mkdirs.le_next->md_mkdirs.le_prev = (mkdir)->md_mkdirs
.le_prev; *(mkdir)->md_mkdirs.le_prev = (mkdir)->md_mkdirs
.le_next; ((mkdir)->md_mkdirs.le_prev) = ((void *)-1); ((mkdir
)->md_mkdirs.le_next) = ((void *)-1); } while (0)
;
4251 WORKITEM_FREE(mkdir, D_MKDIR)softdep_freequeue_add((struct worklist *)mkdir);
4252}
4253
4254/*
4255 * Called from within softdep_disk_write_complete above.
4256 * A write operation was just completed. Removed inodes can
4257 * now be freed and associated block pointers may be committed.
4258 * Note that this routine is always called from interrupt level
4259 * with further splbio interrupts blocked.
4260 */
4261/* buffer containing the written page */
4262STATIC int
4263handle_written_filepage(struct pagedep *pagedep, struct buf *bp)
4264{
4265 struct dirrem *dirrem;
4266 struct diradd *dap, *nextdap;
4267 struct direct *ep;
4268 int i, chgs;
4269
4270 splassert(IPL_BIO)do { if (splassert_ctl > 0) { splassert_check(0x3, __func__
); } } while (0)
;
4271
4272 if ((pagedep->pd_statepd_list.wk_state & IOSTARTED0x0200) == 0)
4273 panic("handle_written_filepage: not started");
4274 pagedep->pd_statepd_list.wk_state &= ~IOSTARTED0x0200;
4275 /*
4276 * Process any directory removals that have been committed.
4277 */
4278 while ((dirrem = LIST_FIRST(&pagedep->pd_dirremhd)((&pagedep->pd_dirremhd)->lh_first)) != NULL((void *)0)) {
4279 LIST_REMOVE(dirrem, dm_next)do { if ((dirrem)->dm_next.le_next != ((void *)0)) (dirrem
)->dm_next.le_next->dm_next.le_prev = (dirrem)->dm_next
.le_prev; *(dirrem)->dm_next.le_prev = (dirrem)->dm_next
.le_next; ((dirrem)->dm_next.le_prev) = ((void *)-1); ((dirrem
)->dm_next.le_next) = ((void *)-1); } while (0)
;
4280 dirrem->dm_dirinumdm_un.dmu_dirinum = pagedep->pd_ino;
4281 add_to_worklist(&dirrem->dm_list);
4282 }
4283 /*
4284 * Free any directory additions that have been committed.
4285 * If it is a newly allocated block, we have to wait until
4286 * the on-disk directory inode claims the new block.
4287 */
4288 if ((pagedep->pd_statepd_list.wk_state & NEWBLOCK0x0800) == 0)
4289 while ((dap = LIST_FIRST(&pagedep->pd_pendinghd)((&pagedep->pd_pendinghd)->lh_first)) != NULL((void *)0))
4290 free_diradd(dap);
4291 /*
4292 * Uncommitted directory entries must be restored.
4293 */
4294 for (chgs = 0, i = 0; i < DAHASHSZ6; i++) {
4295 for (dap = LIST_FIRST(&pagedep->pd_diraddhd[i])((&pagedep->pd_diraddhd[i])->lh_first); dap;
4296 dap = nextdap) {
4297 nextdap = LIST_NEXT(dap, da_pdlist)((dap)->da_pdlist.le_next);
4298 if (dap->da_stateda_list.wk_state & ATTACHED0x0001)
4299 panic("handle_written_filepage: attached");
4300 ep = (struct direct *)
4301 ((char *)bp->b_data + dap->da_offset);
4302 ep->d_ino = dap->da_newinum;
4303 dap->da_stateda_list.wk_state &= ~UNDONE0x0002;
4304 dap->da_stateda_list.wk_state |= ATTACHED0x0001;
4305 chgs = 1;
4306 /*
4307 * If the inode referenced by the directory has
4308 * been written out, then the dependency can be
4309 * moved to the pending list.
4310 */
4311 if ((dap->da_stateda_list.wk_state & ALLCOMPLETE(0x0001 | 0x0004 | 0x0008)) == ALLCOMPLETE(0x0001 | 0x0004 | 0x0008)) {
4312 LIST_REMOVE(dap, da_pdlist)do { if ((dap)->da_pdlist.le_next != ((void *)0)) (dap)->
da_pdlist.le_next->da_pdlist.le_prev = (dap)->da_pdlist
.le_prev; *(dap)->da_pdlist.le_prev = (dap)->da_pdlist.
le_next; ((dap)->da_pdlist.le_prev) = ((void *)-1); ((dap)
->da_pdlist.le_next) = ((void *)-1); } while (0)
;
4313 LIST_INSERT_HEAD(&pagedep->pd_pendinghd, dap,do { if (((dap)->da_pdlist.le_next = (&pagedep->pd_pendinghd
)->lh_first) != ((void *)0)) (&pagedep->pd_pendinghd
)->lh_first->da_pdlist.le_prev = &(dap)->da_pdlist
.le_next; (&pagedep->pd_pendinghd)->lh_first = (dap
); (dap)->da_pdlist.le_prev = &(&pagedep->pd_pendinghd
)->lh_first; } while (0)
4314 da_pdlist)do { if (((dap)->da_pdlist.le_next = (&pagedep->pd_pendinghd
)->lh_first) != ((void *)0)) (&pagedep->pd_pendinghd
)->lh_first->da_pdlist.le_prev = &(dap)->da_pdlist
.le_next; (&pagedep->pd_pendinghd)->lh_first = (dap
); (dap)->da_pdlist.le_prev = &(&pagedep->pd_pendinghd
)->lh_first; } while (0)
;
4315 }
4316 }
4317 }
4318 /*
4319 * If there were any rollbacks in the directory, then it must be
4320 * marked dirty so that its will eventually get written back in
4321 * its correct form.
4322 */
4323 if (chgs) {
4324 if ((bp->b_flags & B_DELWRI0x00000080) == 0)
4325 stat_dir_entry++;
4326 buf_dirty(bp);
4327 return (1);
4328 }
4329 /*
4330 * If we are not waiting for a new directory block to be
4331 * claimed by its inode, then the pagedep will be freed.
4332 * Otherwise it will remain to track any new entries on
4333 * the page in case they are fsync'ed.
4334 */
4335 if ((pagedep->pd_statepd_list.wk_state & NEWBLOCK0x0800) == 0) {
4336 LIST_REMOVE(pagedep, pd_hash)do { if ((pagedep)->pd_hash.le_next != ((void *)0)) (pagedep
)->pd_hash.le_next->pd_hash.le_prev = (pagedep)->pd_hash
.le_prev; *(pagedep)->pd_hash.le_prev = (pagedep)->pd_hash
.le_next; ((pagedep)->pd_hash.le_prev) = ((void *)-1); ((pagedep
)->pd_hash.le_next) = ((void *)-1); } while (0)
;
4337 WORKITEM_FREE(pagedep, D_PAGEDEP)softdep_freequeue_add((struct worklist *)pagedep);
4338 }
4339 return (0);
4340}
4341
4342/*
4343 * Writing back in-core inode structures.
4344 *
4345 * The file system only accesses an inode's contents when it occupies an
4346 * "in-core" inode structure. These "in-core" structures are separate from
4347 * the page frames used to cache inode blocks. Only the latter are
4348 * transferred to/from the disk. So, when the updated contents of the
4349 * "in-core" inode structure are copied to the corresponding in-memory inode
4350 * block, the dependencies are also transferred. The following procedure is
4351 * called when copying a dirty "in-core" inode to a cached inode block.
4352 */
4353
4354/*
4355 * Called when an inode is loaded from disk. If the effective link count
4356 * differed from the actual link count when it was last flushed, then we
4357 * need to ensure that the correct effective link count is put back.
4358 */
4359/* the "in_core" copy of the inode */
4360void
4361softdep_load_inodeblock(struct inode *ip)
4362{
4363 struct inodedep *inodedep;
4364
4365 /*
4366 * Check for alternate nlink count.
4367 */
4368 ip->i_effnlink = DIP(ip, nlink)(((ip)->i_ump->um_fstype == 1) ? (ip)->dinode_u.ffs1_din
->di_nlink : (ip)->dinode_u.ffs2_din->di_nlink)
;
4369 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x3);
4370 if (inodedep_lookup(ip->i_fsinode_u.fs, ip->i_number, 0, &inodedep) == 0) {
4371 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
4372 return;
4373 }
4374 ip->i_effnlink -= inodedep->id_nlinkdelta;
4375 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
4376}
4377
4378/*
4379 * This routine is called just before the "in-core" inode
4380 * information is to be copied to the in-memory inode block.
4381 * Recall that an inode block contains several inodes. If
4382 * the force flag is set, then the dependencies will be
4383 * cleared so that the update can always be made. Note that
4384 * the buffer is locked when this routine is called, so we
4385 * will never be in the middle of writing the inode block
4386 * to disk.
4387 */
4388/* the "in_core" copy of the inode */
4389/* the buffer containing the inode block */
4390/* nonzero => update must be allowed */
4391void
4392softdep_update_inodeblock(struct inode *ip, struct buf *bp, int waitfor)
4393{
4394 struct inodedep *inodedep;
4395 struct worklist *wk;
4396 int error, gotit;
4397
4398 /*
4399 * If the effective link count is not equal to the actual link
4400 * count, then we must track the difference in an inodedep while
4401 * the inode is (potentially) tossed out of the cache. Otherwise,
4402 * if there is no existing inodedep, then there are no dependencies
4403 * to track.
4404 */
4405 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x3);
4406 if (inodedep_lookup(ip->i_fsinode_u.fs, ip->i_number, 0, &inodedep) == 0) {
4407 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
4408 if (ip->i_effnlink != DIP(ip, nlink)(((ip)->i_ump->um_fstype == 1) ? (ip)->dinode_u.ffs1_din
->di_nlink : (ip)->dinode_u.ffs2_din->di_nlink)
)
4409 panic("softdep_update_inodeblock: bad link count");
4410 return;
4411 }
4412 if (inodedep->id_nlinkdelta != DIP(ip, nlink)(((ip)->i_ump->um_fstype == 1) ? (ip)->dinode_u.ffs1_din
->di_nlink : (ip)->dinode_u.ffs2_din->di_nlink)
- ip->i_effnlink) {
4413 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
4414 panic("softdep_update_inodeblock: bad delta");
4415 }
4416 /*
4417 * Changes have been initiated. Anything depending on these
4418 * changes cannot occur until this inode has been written.
4419 */
4420 inodedep->id_stateid_list.wk_state &= ~COMPLETE0x0004;
4421 if ((inodedep->id_stateid_list.wk_state & ONWORKLIST0x8000) == 0)
4422 WORKLIST_INSERT(&bp->b_dep, &inodedep->id_list)do { (&inodedep->id_list)->wk_state |= 0x8000; do {
if (((&inodedep->id_list)->wk_list.le_next = (&
bp->b_dep)->lh_first) != ((void *)0)) (&bp->b_dep
)->lh_first->wk_list.le_prev = &(&inodedep->
id_list)->wk_list.le_next; (&bp->b_dep)->lh_first
= (&inodedep->id_list); (&inodedep->id_list)->
wk_list.le_prev = &(&bp->b_dep)->lh_first; } while
(0); } while (0)
;
4423 /*
4424 * Any new dependencies associated with the incore inode must
4425 * now be moved to the list associated with the buffer holding
4426 * the in-memory copy of the inode. Once merged process any
4427 * allocdirects that are completed by the merger.
4428 */
4429 merge_inode_lists(inodedep);
4430 if (TAILQ_FIRST(&inodedep->id_inoupdt)((&inodedep->id_inoupdt)->tqh_first) != NULL((void *)0))
4431 handle_allocdirect_partdone(TAILQ_FIRST(&inodedep->id_inoupdt)((&inodedep->id_inoupdt)->tqh_first));
4432 /*
4433 * Now that the inode has been pushed into the buffer, the
4434 * operations dependent on the inode being written to disk
4435 * can be moved to the id_bufwait so that they will be
4436 * processed when the buffer I/O completes.
4437 */
4438 while ((wk = LIST_FIRST(&inodedep->id_inowait)((&inodedep->id_inowait)->lh_first)) != NULL((void *)0)) {
4439 WORKLIST_REMOVE(wk)do { (wk)->wk_state &= ~0x8000; do { if ((wk)->wk_list
.le_next != ((void *)0)) (wk)->wk_list.le_next->wk_list
.le_prev = (wk)->wk_list.le_prev; *(wk)->wk_list.le_prev
= (wk)->wk_list.le_next; ((wk)->wk_list.le_prev) = ((void
*)-1); ((wk)->wk_list.le_next) = ((void *)-1); } while (0
); } while (0)
;
4440 WORKLIST_INSERT(&inodedep->id_bufwait, wk)do { (wk)->wk_state |= 0x8000; do { if (((wk)->wk_list.
le_next = (&inodedep->id_bufwait)->lh_first) != ((void
*)0)) (&inodedep->id_bufwait)->lh_first->wk_list
.le_prev = &(wk)->wk_list.le_next; (&inodedep->
id_bufwait)->lh_first = (wk); (wk)->wk_list.le_prev = &
(&inodedep->id_bufwait)->lh_first; } while (0); } while
(0)
;
4441 }
4442 /*
4443 * Newly allocated inodes cannot be written until the bitmap
4444 * that allocates them have been written (indicated by
4445 * DEPCOMPLETE being set in id_state). If we are doing a
4446 * forced sync (e.g., an fsync on a file), we force the bitmap
4447 * to be written so that the update can be done.
4448 */
4449 do {
4450 if ((inodedep->id_stateid_list.wk_state & DEPCOMPLETE0x0008) != 0 || waitfor == 0) {
4451 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
4452 return;
4453 }
4454 bp = inodedep->id_buf;
4455 gotit = getdirtybuf(bp, MNT_WAIT1);
4456 } while (gotit == -1);
4457 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
4458 if (gotit && (error = bwrite(bp)) != 0)
4459 softdep_error("softdep_update_inodeblock: bwrite", error);
4460 if ((inodedep->id_stateid_list.wk_state & DEPCOMPLETE0x0008) == 0)
4461 panic("softdep_update_inodeblock: update failed");
4462}
4463
4464/*
4465 * Merge the new inode dependency list (id_newinoupdt) into the old
4466 * inode dependency list (id_inoupdt). This routine must be called
4467 * with splbio interrupts blocked.
4468 */
4469STATIC void
4470merge_inode_lists(struct inodedep *inodedep)
4471{
4472 struct allocdirect *listadp, *newadp;
4473
4474 splassert(IPL_BIO)do { if (splassert_ctl > 0) { splassert_check(0x3, __func__
); } } while (0)
;
4475
4476 newadp = TAILQ_FIRST(&inodedep->id_newinoupdt)((&inodedep->id_newinoupdt)->tqh_first);
4477 for (listadp = TAILQ_FIRST(&inodedep->id_inoupdt)((&inodedep->id_inoupdt)->tqh_first); listadp && newadp;) {
4478 if (listadp->ad_lbn < newadp->ad_lbn) {
4479 listadp = TAILQ_NEXT(listadp, ad_next)((listadp)->ad_next.tqe_next);
4480 continue;
4481 }
4482 TAILQ_REMOVE(&inodedep->id_newinoupdt, newadp, ad_next)do { if (((newadp)->ad_next.tqe_next) != ((void *)0)) (newadp
)->ad_next.tqe_next->ad_next.tqe_prev = (newadp)->ad_next
.tqe_prev; else (&inodedep->id_newinoupdt)->tqh_last
= (newadp)->ad_next.tqe_prev; *(newadp)->ad_next.tqe_prev
= (newadp)->ad_next.tqe_next; ((newadp)->ad_next.tqe_prev
) = ((void *)-1); ((newadp)->ad_next.tqe_next) = ((void *)
-1); } while (0)
;
4483 TAILQ_INSERT_BEFORE(listadp, newadp, ad_next)do { (newadp)->ad_next.tqe_prev = (listadp)->ad_next.tqe_prev
; (newadp)->ad_next.tqe_next = (listadp); *(listadp)->ad_next
.tqe_prev = (newadp); (listadp)->ad_next.tqe_prev = &(
newadp)->ad_next.tqe_next; } while (0)
;
4484 if (listadp->ad_lbn == newadp->ad_lbn) {
4485 allocdirect_merge(&inodedep->id_inoupdt, newadp,
4486 listadp);
4487 listadp = newadp;
4488 }
4489 newadp = TAILQ_FIRST(&inodedep->id_newinoupdt)((&inodedep->id_newinoupdt)->tqh_first);
4490 }
4491 TAILQ_CONCAT(&inodedep->id_inoupdt, &inodedep->id_newinoupdt, ad_next)do { if (!(((&inodedep->id_newinoupdt)->tqh_first) ==
((void *)0))) { *(&inodedep->id_inoupdt)->tqh_last
= (&inodedep->id_newinoupdt)->tqh_first; (&inodedep
->id_newinoupdt)->tqh_first->ad_next.tqe_prev = (&
inodedep->id_inoupdt)->tqh_last; (&inodedep->id_inoupdt
)->tqh_last = (&inodedep->id_newinoupdt)->tqh_last
; do { ((&inodedep->id_newinoupdt))->tqh_first = ((
void *)0); ((&inodedep->id_newinoupdt))->tqh_last =
&((&inodedep->id_newinoupdt))->tqh_first; } while
(0); } } while (0)
;
4492}
4493
4494/*
4495 * If we are doing an fsync, then we must ensure that any directory
4496 * entries for the inode have been written after the inode gets to disk.
4497 */
4498/* the "in_core" copy of the inode */
4499int
4500softdep_fsync(struct vnode *vp)
4501{
4502 struct inodedep *inodedep;
4503 struct pagedep *pagedep;
4504 struct worklist *wk;
4505 struct diradd *dap;
4506 struct mount *mnt;
4507 struct vnode *pvp;
4508 struct inode *ip;
4509 struct inode *pip;
4510 struct buf *bp;
4511 struct fs *fs;
4512 struct proc *p = CURPROC({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc
; /* XXX */
4513 int error, flushparent;
4514 ufsino_t parentino;
4515 daddr_t lbn;
4516
4517 ip = VTOI(vp)((struct inode *)(vp)->v_data);
4518 fs = ip->i_fsinode_u.fs;
4519 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x3);
4520 if (inodedep_lookup(fs, ip->i_number, 0, &inodedep) == 0) {
4521 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
4522 return (0);
4523 }
4524 if (LIST_FIRST(&inodedep->id_inowait)((&inodedep->id_inowait)->lh_first) != NULL((void *)0) ||
4525 LIST_FIRST(&inodedep->id_bufwait)((&inodedep->id_bufwait)->lh_first) != NULL((void *)0) ||
4526 TAILQ_FIRST(&inodedep->id_inoupdt)((&inodedep->id_inoupdt)->tqh_first) != NULL((void *)0) ||
4527 TAILQ_FIRST(&inodedep->id_newinoupdt)((&inodedep->id_newinoupdt)->tqh_first) != NULL((void *)0)) {
4528 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
4529 panic("softdep_fsync: pending ops");
4530 }
4531 for (error = 0, flushparent = 0; ; ) {
Value stored to 'error' is never read
4532 if ((wk = LIST_FIRST(&inodedep->id_pendinghd)((&inodedep->id_pendinghd)->lh_first)) == NULL((void *)0))
4533 break;
4534 if (wk->wk_type != D_DIRADD10) {
4535 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
4536 panic("softdep_fsync: Unexpected type %s",
4537 TYPENAME(wk->wk_type)((unsigned)(wk->wk_type) <= 13 ? softdep_typenames[wk->
wk_type] : "???")
);
4538 }
4539 dap = WK_DIRADD(wk)((struct diradd *)(wk));
4540 /*
4541 * Flush our parent if this directory entry has a MKDIR_PARENT
4542 * dependency or is contained in a newly allocated block.
4543 */
4544 if (dap->da_stateda_list.wk_state & DIRCHG0x0080)
4545 pagedep = dap->da_previousda_un.dau_previous->dm_pagedepdm_un.dmu_pagedep;
4546 else
4547 pagedep = dap->da_pagedepda_un.dau_pagedep;
4548 mnt = pagedep->pd_mnt;
4549 parentino = pagedep->pd_ino;
4550 lbn = pagedep->pd_lbn;
4551 if ((dap->da_stateda_list.wk_state & (MKDIR_BODY0x0020 | COMPLETE0x0004)) != COMPLETE0x0004) {
4552 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
4553 panic("softdep_fsync: dirty");
4554 }
4555 if ((dap->da_stateda_list.wk_state & MKDIR_PARENT0x0010) ||
4556 (pagedep->pd_statepd_list.wk_state & NEWBLOCK0x0800))
4557 flushparent = 1;
4558 else
4559 flushparent = 0;
4560 /*
4561 * If we are being fsync'ed as part of vgone'ing this vnode,
4562 * then we will not be able to release and recover the
4563 * vnode below, so we just have to give up on writing its
4564 * directory entry out. It will eventually be written, just
4565 * not now, but then the user was not asking to have it
4566 * written, so we are not breaking any promises.
4567 */
4568 mtx_enter(&vnode_mtx);
4569 if (vp->v_lflag & VXLOCK0x0100) {
4570 mtx_leave(&vnode_mtx);
4571 break;
4572 }
4573 mtx_leave(&vnode_mtx);
4574 /*
4575 * We prevent deadlock by always fetching inodes from the
4576 * root, moving down the directory tree. Thus, when fetching
4577 * our parent directory, we must unlock ourselves before
4578 * requesting the lock on our parent. See the comment in
4579 * ufs_lookup for details on possible races.
4580 */
4581 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
4582 VOP_UNLOCK(vp);
4583 error = VFS_VGET(mnt, parentino, &pvp)(*(mnt)->mnt_op->vfs_vget)(mnt, parentino, &pvp);
4584 vn_lock(vp, LK_EXCLUSIVE0x0001UL | LK_RETRY0x2000UL);
4585 if (error != 0)
4586 return (error);
4587 /*
4588 * All MKDIR_PARENT dependencies and all the NEWBLOCK pagedeps
4589 * that are contained in direct blocks will be resolved by
4590 * doing a UFS_UPDATE. Pagedeps contained in indirect blocks
4591 * may require a complete sync'ing of the directory. So, we
4592 * try the cheap and fast UFS_UPDATE first, and if that fails,
4593 * then we do the slower VOP_FSYNC of the directory.
4594 */
4595 pip = VTOI(pvp)((struct inode *)(pvp)->v_data);
4596 if (flushparent) {
4597 error = UFS_UPDATE(pip, 1)((pip)->i_vtbl->iv_update)((pip), (1));
4598 if (error) {
4599 vput(pvp);
4600 return (error);
4601 }
4602 if (pagedep->pd_statepd_list.wk_state & NEWBLOCK0x0800) {
4603 error = VOP_FSYNC(pvp, p->p_ucred, MNT_WAIT1, p);
4604 if (error) {
4605 vput(pvp);
4606 return (error);
4607 }
4608 }
4609 }
4610 /*
4611 * Flush directory page containing the inode's name.
4612 */
4613 error = bread(pvp, lbn, fs->fs_bsize, &bp);
4614 if (error == 0) {
4615 bp->b_bcount = blksize(fs, pip, lbn)(((lbn) >= 12 || ((((pip))->i_ump->um_fstype == 1) ?
((pip))->dinode_u.ffs1_din->di_size : ((pip))->dinode_u
.ffs2_din->di_size) >= ((lbn) + 1) << (fs)->fs_bshift
) ? (u_int64_t)(fs)->fs_bsize : ((((((((((pip))->i_ump->
um_fstype == 1) ? ((pip))->dinode_u.ffs1_din->di_size :
((pip))->dinode_u.ffs2_din->di_size)) & (fs)->fs_qbmask
)) + (fs)->fs_qfmask) & (fs)->fs_fmask)))
;
4616 error = bwrite(bp);
4617 } else
4618 brelse(bp);
4619 vput(pvp);
4620 if (error != 0)
4621 return (error);
4622 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x3);
4623 if (inodedep_lookup(fs, ip->i_number, 0, &inodedep) == 0)
4624 break;
4625 }
4626 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
4627 return (0);
4628}
4629
4630/*
4631 * Flush all the dirty bitmaps associated with the block device
4632 * before flushing the rest of the dirty blocks so as to reduce
4633 * the number of dependencies that will have to be rolled back.
4634 */
4635void
4636softdep_fsync_mountdev(struct vnode *vp, int waitfor)
4637{
4638 struct buf *bp, *nbp;
4639 struct worklist *wk;
4640
4641 if (!vn_isdisk(vp, NULL((void *)0)))
4642 panic("softdep_fsync_mountdev: vnode not a disk");
4643 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x3);
4644 LIST_FOREACH_SAFE(bp, &vp->v_dirtyblkhd, b_vnbufs, nbp)for ((bp) = ((&vp->v_dirtyblkhd)->lh_first); (bp) &&
((nbp) = ((bp)->b_vnbufs.le_next), 1); (bp) = (nbp))
{
4645 /*
4646 * If it is already scheduled, skip to the next buffer.
4647 */
4648 splassert(IPL_BIO)do { if (splassert_ctl > 0) { splassert_check(0x3, __func__
); } } while (0)
;
4649 if (bp->b_flags & B_BUSY0x00000010)
4650 continue;
4651
4652 if ((bp->b_flags & B_DELWRI0x00000080) == 0) {
4653 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
4654 panic("softdep_fsync_mountdev: not dirty");
4655 }
4656 /*
4657 * We are only interested in bitmaps with outstanding
4658 * dependencies.
4659 */
4660 if ((wk = LIST_FIRST(&bp->b_dep)((&bp->b_dep)->lh_first)) == NULL((void *)0) ||
4661 wk->wk_type != D_BMSAFEMAP3) {
4662 continue;
4663 }
4664 bremfreebufcache_take(bp);
4665 buf_acquire(bp);
4666 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
4667 (void) bawrite(bp);
4668 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x3);
4669 /*
4670 * Since we may have slept during the I/O, we need
4671 * to start from a known point.
4672 */
4673 nbp = LIST_FIRST(&vp->v_dirtyblkhd)((&vp->v_dirtyblkhd)->lh_first);
4674 }
4675 if (waitfor == MNT_WAIT1)
4676 drain_output(vp, 1);
4677 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
4678}
4679
4680/*
4681 * This routine is called when we are trying to synchronously flush a
4682 * file. This routine must eliminate any filesystem metadata dependencies
4683 * so that the syncing routine can succeed by pushing the dirty blocks
4684 * associated with the file. If any I/O errors occur, they are returned.
4685 */
4686int
4687softdep_sync_metadata(struct vop_fsync_args *ap)
4688{
4689 struct vnode *vp = ap->a_vp;
4690 struct pagedep *pagedep;
4691 struct allocdirect *adp;
4692 struct allocindir *aip;
4693 struct buf *bp, *nbp;
4694 struct worklist *wk;
4695 int i, gotit, error, waitfor;
4696
4697 /*
4698 * Check whether this vnode is involved in a filesystem
4699 * that is doing soft dependency processing.
4700 */
4701 if (!vn_isdisk(vp, NULL((void *)0))) {
4702 if (!DOINGSOFTDEP(vp)((vp)->v_mount->mnt_flag & 0x04000000))
4703 return (0);
4704 } else
4705 if (vp->v_specmountpointv_un.vu_specinfo->si_mountpoint == NULL((void *)0) ||
4706 (vp->v_specmountpointv_un.vu_specinfo->si_mountpoint->mnt_flag & MNT_SOFTDEP0x04000000) == 0)
4707 return (0);
4708 /*
4709 * Ensure that any direct block dependencies have been cleared.
4710 */
4711 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x3);
4712 if ((error = flush_inodedep_deps(VTOI(vp)((struct inode *)(vp)->v_data)->i_fsinode_u.fs, VTOI(vp)((struct inode *)(vp)->v_data)->i_number))) {
4713 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
4714 return (error);
4715 }
4716 /*
4717 * For most files, the only metadata dependencies are the
4718 * cylinder group maps that allocate their inode or blocks.
4719 * The block allocation dependencies can be found by traversing
4720 * the dependency lists for any buffers that remain on their
4721 * dirty buffer list. The inode allocation dependency will
4722 * be resolved when the inode is updated with MNT_WAIT.
4723 * This work is done in two passes. The first pass grabs most
4724 * of the buffers and begins asynchronously writing them. The
4725 * only way to wait for these asynchronous writes is to sleep
4726 * on the filesystem vnode which may stay busy for a long time
4727 * if the filesystem is active. So, instead, we make a second
4728 * pass over the dependencies blocking on each write. In the
4729 * usual case we will be blocking against a write that we
4730 * initiated, so when it is done the dependency will have been
4731 * resolved. Thus the second pass is expected to end quickly.
4732 */
4733 waitfor = MNT_NOWAIT2;
4734top:
4735 /*
4736 * We must wait for any I/O in progress to finish so that
4737 * all potential buffers on the dirty list will be visible.
4738 */
4739 drain_output(vp, 1);
4740 bp = LIST_FIRST(&vp->v_dirtyblkhd)((&vp->v_dirtyblkhd)->lh_first);
4741 gotit = getdirtybuf(bp, MNT_WAIT1);
4742 if (gotit == 0) {
4743 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
4744 return (0);
4745 } else if (gotit == -1)
4746 goto top;
4747loop:
4748 /*
4749 * As we hold the buffer locked, none of its dependencies
4750 * will disappear.
4751 */
4752 LIST_FOREACH(wk, &bp->b_dep, wk_list)for((wk) = ((&bp->b_dep)->lh_first); (wk)!= ((void *
)0); (wk) = ((wk)->wk_list.le_next))
{
4753 switch (wk->wk_type) {
4754
4755 case D_ALLOCDIRECT4:
4756 adp = WK_ALLOCDIRECT(wk)((struct allocdirect *)(wk));
4757 if (adp->ad_statead_list.wk_state & DEPCOMPLETE0x0008)
4758 break;
4759 nbp = adp->ad_buf;
4760 gotit = getdirtybuf(nbp, waitfor);
4761 if (gotit == 0)
4762 break;
4763 else if (gotit == -1)
4764 goto loop;
4765 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
4766 if (waitfor == MNT_NOWAIT2) {
4767 bawrite(nbp);
4768 } else if ((error = VOP_BWRITE(nbp)) != 0) {
4769 bawrite(bp);
4770 return (error);
4771 }
4772 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x3);
4773 break;
4774
4775 case D_ALLOCINDIR6:
4776 aip = WK_ALLOCINDIR(wk)((struct allocindir *)(wk));
4777 if (aip->ai_stateai_list.wk_state & DEPCOMPLETE0x0008)
4778 break;
4779 nbp = aip->ai_buf;
4780 gotit = getdirtybuf(nbp, waitfor);
4781 if (gotit == 0)
4782 break;
4783 else if (gotit == -1)
4784 goto loop;
4785 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
4786 if (waitfor == MNT_NOWAIT2) {
4787 bawrite(nbp);
4788 } else if ((error = VOP_BWRITE(nbp)) != 0) {
4789 bawrite(bp);
4790 return (error);
4791 }
4792 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x3);
4793 break;
4794
4795 case D_INDIRDEP5:
4796 restart:
4797
4798 LIST_FOREACH(aip, &WK_INDIRDEP(wk)->ir_deplisthd, ai_next)for((aip) = ((&((struct indirdep *)(wk))->ir_deplisthd
)->lh_first); (aip)!= ((void *)0); (aip) = ((aip)->ai_next
.le_next))
{
4799 if (aip->ai_stateai_list.wk_state & DEPCOMPLETE0x0008)
4800 continue;
4801 nbp = aip->ai_buf;
4802 if (getdirtybuf(nbp, MNT_WAIT1) <= 0)
4803 goto restart;
4804 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
4805 if ((error = VOP_BWRITE(nbp)) != 0) {
4806 bawrite(bp);
4807 return (error);
4808 }
4809 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x3);
4810 goto restart;
4811 }
4812 break;
4813
4814 case D_INODEDEP1:
4815 if ((error = flush_inodedep_deps(WK_INODEDEP(wk)((struct inodedep *)(wk))->id_fs,
4816 WK_INODEDEP(wk)((struct inodedep *)(wk))->id_ino)) != 0) {
4817 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
4818 bawrite(bp);
4819 return (error);
4820 }
4821 break;
4822
4823 case D_PAGEDEP0:
4824 /*
4825 * We are trying to sync a directory that may
4826 * have dependencies on both its own metadata
4827 * and/or dependencies on the inodes of any
4828 * recently allocated files. We walk its diradd
4829 * lists pushing out the associated inode.
4830 */
4831 pagedep = WK_PAGEDEP(wk)((struct pagedep *)(wk));
4832 for (i = 0; i < DAHASHSZ6; i++) {
4833 if (LIST_FIRST(&pagedep->pd_diraddhd[i])((&pagedep->pd_diraddhd[i])->lh_first) ==
4834 NULL((void *)0))
4835 continue;
4836 if ((error =
4837 flush_pagedep_deps(vp, pagedep->pd_mnt,
4838 &pagedep->pd_diraddhd[i]))) {
4839 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
4840 bawrite(bp);
4841 return (error);
4842 }
4843 }
4844 break;
4845
4846 case D_MKDIR11:
4847 /*
4848 * This case should never happen if the vnode has
4849 * been properly sync'ed. However, if this function
4850 * is used at a place where the vnode has not yet
4851 * been sync'ed, this dependency can show up. So,
4852 * rather than panic, just flush it.
4853 */
4854 nbp = WK_MKDIR(wk)((struct mkdir *)(wk))->md_buf;
4855 KASSERT(bp != nbp)((bp != nbp) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/ufs/ffs/ffs_softdep.c"
, 4855, "bp != nbp"))
;
4856 gotit = getdirtybuf(nbp, waitfor);
4857 if (gotit == 0)
4858 break;
4859 else if (gotit == -1)
4860 goto loop;
4861 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
4862 if (waitfor == MNT_NOWAIT2) {
4863 bawrite(nbp);
4864 } else if ((error = VOP_BWRITE(nbp)) != 0) {
4865 bawrite(bp);
4866 return (error);
4867 }
4868 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x3);
4869 break;
4870
4871 case D_BMSAFEMAP3:
4872 /*
4873 * This case should never happen if the vnode has
4874 * been properly sync'ed. However, if this function
4875 * is used at a place where the vnode has not yet
4876 * been sync'ed, this dependency can show up. So,
4877 * rather than panic, just flush it.
4878 */
4879 nbp = WK_BMSAFEMAP(wk)((struct bmsafemap *)(wk))->sm_buf;
4880 if (bp == nbp)
4881 break;
4882 gotit = getdirtybuf(nbp, waitfor);
4883 if (gotit == 0)
4884 break;
4885 else if (gotit == -1)
4886 goto loop;
4887 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
4888 if (waitfor == MNT_NOWAIT2) {
4889 bawrite(nbp);
4890 } else if ((error = VOP_BWRITE(nbp)) != 0) {
4891 bawrite(bp);
4892 return (error);
4893 }
4894 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x3);
4895 break;
4896
4897 default:
4898 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
4899 panic("softdep_sync_metadata: Unknown type %s",
4900 TYPENAME(wk->wk_type)((unsigned)(wk->wk_type) <= 13 ? softdep_typenames[wk->
wk_type] : "???")
);
4901 /* NOTREACHED */
4902 }
4903 }
4904 do {
4905 nbp = LIST_NEXT(bp, b_vnbufs)((bp)->b_vnbufs.le_next);
4906 gotit = getdirtybuf(nbp, MNT_WAIT1);
4907 } while (gotit == -1);
4908 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
4909 bawrite(bp);
4910 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x3);
4911 if (nbp != NULL((void *)0)) {
4912 bp = nbp;
4913 goto loop;
4914 }
4915 /*
4916 * The brief unlock is to allow any pent up dependency
4917 * processing to be done. Then proceed with the second pass.
4918 */
4919 if (waitfor == MNT_NOWAIT2) {
4920 waitfor = MNT_WAIT1;
4921 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
4922 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x3);
4923 goto top;
4924 }
4925
4926 /*
4927 * If we have managed to get rid of all the dirty buffers,
4928 * then we are done. For certain directories and block
4929 * devices, we may need to do further work.
4930 *
4931 * We must wait for any I/O in progress to finish so that
4932 * all potential buffers on the dirty list will be visible.
4933 */
4934 drain_output(vp, 1);
4935 if (LIST_EMPTY(&vp->v_dirtyblkhd)(((&vp->v_dirtyblkhd)->lh_first) == ((void *)0))) {
4936 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
4937 return (0);
4938 }
4939
4940 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
4941 /*
4942 * If we are trying to sync a block device, some of its buffers may
4943 * contain metadata that cannot be written until the contents of some
4944 * partially written files have been written to disk. The only easy
4945 * way to accomplish this is to sync the entire filesystem (luckily
4946 * this happens rarely).
4947 */
4948 if (vn_isdisk(vp, NULL((void *)0)) &&
4949 vp->v_specmountpointv_un.vu_specinfo->si_mountpoint && !VOP_ISLOCKED(vp) &&
4950 (error = VFS_SYNC(vp->v_specmountpoint, MNT_WAIT, 0, ap->a_cred,(*(vp->v_un.vu_specinfo->si_mountpoint)->mnt_op->
vfs_sync)(vp->v_un.vu_specinfo->si_mountpoint, 1, 0, ap
->a_cred, ap->a_p)
4951 ap->a_p)(*(vp->v_un.vu_specinfo->si_mountpoint)->mnt_op->
vfs_sync)(vp->v_un.vu_specinfo->si_mountpoint, 1, 0, ap
->a_cred, ap->a_p)
) != 0)
4952 return (error);
4953 return (0);
4954}
4955
4956/*
4957 * Flush the dependencies associated with an inodedep.
4958 * Called with splbio blocked.
4959 */
4960STATIC int
4961flush_inodedep_deps(struct fs *fs, ufsino_t ino)
4962{
4963 struct inodedep *inodedep;
4964 struct allocdirect *adp;
4965 int gotit, error, waitfor;
4966 struct buf *bp;
4967
4968 splassert(IPL_BIO)do { if (splassert_ctl > 0) { splassert_check(0x3, __func__
); } } while (0)
;
4969
4970 /*
4971 * This work is done in two passes. The first pass grabs most
4972 * of the buffers and begins asynchronously writing them. The
4973 * only way to wait for these asynchronous writes is to sleep
4974 * on the filesystem vnode which may stay busy for a long time
4975 * if the filesystem is active. So, instead, we make a second
4976 * pass over the dependencies blocking on each write. In the
4977 * usual case we will be blocking against a write that we
4978 * initiated, so when it is done the dependency will have been
4979 * resolved. Thus the second pass is expected to end quickly.
4980 * We give a brief window at the top of the loop to allow
4981 * any pending I/O to complete.
4982 */
4983 for (waitfor = MNT_NOWAIT2; ; ) {
4984 retry_ino:
4985 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
4986 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x3);
4987 if (inodedep_lookup(fs, ino, 0, &inodedep) == 0)
4988 return (0);
4989 TAILQ_FOREACH(adp, &inodedep->id_inoupdt, ad_next)for((adp) = ((&inodedep->id_inoupdt)->tqh_first); (
adp) != ((void *)0); (adp) = ((adp)->ad_next.tqe_next))
{
4990 if (adp->ad_statead_list.wk_state & DEPCOMPLETE0x0008)
4991 continue;
4992 bp = adp->ad_buf;
4993 gotit = getdirtybuf(bp, waitfor);
4994 if (gotit == 0) {
4995 if (waitfor == MNT_NOWAIT2)
4996 continue;
4997 break;
4998 } else if (gotit == -1)
4999 goto retry_ino;
5000 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
5001 if (waitfor == MNT_NOWAIT2) {
5002 bawrite(bp);
5003 } else if ((error = VOP_BWRITE(bp)) != 0) {
5004 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x3);
5005 return (error);
5006 }
5007 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x3);
5008 break;
5009 }
5010 if (adp != NULL((void *)0))
5011 continue;
5012 retry_newino:
5013 TAILQ_FOREACH(adp, &inodedep->id_newinoupdt, ad_next)for((adp) = ((&inodedep->id_newinoupdt)->tqh_first)
; (adp) != ((void *)0); (adp) = ((adp)->ad_next.tqe_next))
{
5014 if (adp->ad_statead_list.wk_state & DEPCOMPLETE0x0008)
5015 continue;
5016 bp = adp->ad_buf;
5017 gotit = getdirtybuf(bp, waitfor);
5018 if (gotit == 0) {
5019 if (waitfor == MNT_NOWAIT2)
5020 continue;
5021 break;
5022 } else if (gotit == -1)
5023 goto retry_newino;
5024 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
5025 if (waitfor == MNT_NOWAIT2) {
5026 bawrite(bp);
5027 } else if ((error = VOP_BWRITE(bp)) != 0) {
5028 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x3);
5029 return (error);
5030 }
5031 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x3);
5032 break;
5033 }
5034 if (adp != NULL((void *)0))
5035 continue;
5036 /*
5037 * If pass2, we are done, otherwise do pass 2.
5038 */
5039 if (waitfor == MNT_WAIT1)
5040 break;
5041 waitfor = MNT_WAIT1;
5042 }
5043 /*
5044 * Try freeing inodedep in case all dependencies have been removed.
5045 */
5046 if (inodedep_lookup(fs, ino, 0, &inodedep) != 0)
5047 (void) free_inodedep(inodedep);
5048 return (0);
5049}
5050
5051/*
5052 * Eliminate a pagedep dependency by flushing out all its diradd dependencies.
5053 * Called with splbio blocked.
5054 */
5055STATIC int
5056flush_pagedep_deps(struct vnode *pvp, struct mount *mp,
5057 struct diraddhd *diraddhdp)
5058{
5059 struct proc *p = CURPROC({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc
; /* XXX */
5060 struct worklist *wk;
5061 struct inodedep *inodedep;
5062 struct ufsmount *ump;
5063 struct diradd *dap;
5064 struct vnode *vp;
5065 int gotit, error = 0;
5066 struct buf *bp;
5067 ufsino_t inum;
5068
5069 splassert(IPL_BIO)do { if (splassert_ctl > 0) { splassert_check(0x3, __func__
); } } while (0)
;
5070
5071 ump = VFSTOUFS(mp)((struct ufsmount *)((mp)->mnt_data));
5072 while ((dap = LIST_FIRST(diraddhdp)((diraddhdp)->lh_first)) != NULL((void *)0)) {
5073 /*
5074 * Flush ourselves if this directory entry
5075 * has a MKDIR_PARENT dependency.
5076 */
5077 if (dap->da_stateda_list.wk_state & MKDIR_PARENT0x0010) {
5078 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
5079 if ((error = UFS_UPDATE(VTOI(pvp), 1)((((struct inode *)(pvp)->v_data))->i_vtbl->iv_update
)((((struct inode *)(pvp)->v_data)), (1))
))
5080 break;
5081 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x3);
5082 /*
5083 * If that cleared dependencies, go on to next.
5084 */
5085 if (dap != LIST_FIRST(diraddhdp)((diraddhdp)->lh_first))
5086 continue;
5087 if (dap->da_stateda_list.wk_state & MKDIR_PARENT0x0010) {
5088 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
5089 panic("flush_pagedep_deps: MKDIR_PARENT");
5090 }
5091 }
5092 /*
5093 * A newly allocated directory must have its "." and
5094 * ".." entries written out before its name can be
5095 * committed in its parent. We do not want or need
5096 * the full semantics of a synchronous VOP_FSYNC as
5097 * that may end up here again, once for each directory
5098 * level in the filesystem. Instead, we push the blocks
5099 * and wait for them to clear. We have to fsync twice
5100 * because the first call may choose to defer blocks
5101 * that still have dependencies, but deferral will
5102 * happen at most once.
5103 */
5104 inum = dap->da_newinum;
5105 if (dap->da_stateda_list.wk_state & MKDIR_BODY0x0020) {
5106 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
5107 if ((error = VFS_VGET(mp, inum, &vp)(*(mp)->mnt_op->vfs_vget)(mp, inum, &vp)) != 0)
5108 break;
5109 if ((error=VOP_FSYNC(vp, p->p_ucred, MNT_NOWAIT2, p)) ||
5110 (error=VOP_FSYNC(vp, p->p_ucred, MNT_NOWAIT2, p))) {
5111 vput(vp);
5112 break;
5113 }
5114 drain_output(vp, 0);
5115 /*
5116 * If first block is still dirty with a D_MKDIR
5117 * dependency then it needs to be written now.
5118 */
5119 for (;;) {
5120 error = 0;
5121 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x3);
5122 bp = incore(vp, 0);
5123 if (bp == NULL((void *)0)) {
5124 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
5125 break;
5126 }
5127 LIST_FOREACH(wk, &bp->b_dep, wk_list)for((wk) = ((&bp->b_dep)->lh_first); (wk)!= ((void *
)0); (wk) = ((wk)->wk_list.le_next))
5128 if (wk->wk_type == D_MKDIR11)
5129 break;
5130 if (wk) {
5131 gotit = getdirtybuf(bp, MNT_WAIT1);
5132 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
5133 if (gotit == -1)
5134 continue;
5135 if (gotit && (error = bwrite(bp)) != 0)
5136 break;
5137 } else
5138 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
5139 break;
5140 }
5141 vput(vp);
5142 /* Flushing of first block failed */
5143 if (error)
5144 break;
5145 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x3);
5146 /*
5147 * If that cleared dependencies, go on to next.
5148 */
5149 if (dap != LIST_FIRST(diraddhdp)((diraddhdp)->lh_first))
5150 continue;
5151 if (dap->da_stateda_list.wk_state & MKDIR_BODY0x0020) {
5152 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
5153 panic("flush_pagedep_deps: MKDIR_BODY");
5154 }
5155 }
5156 /*
5157 * Flush the inode on which the directory entry depends.
5158 * Having accounted for MKDIR_PARENT and MKDIR_BODY above,
5159 * the only remaining dependency is that the updated inode
5160 * count must get pushed to disk. The inode has already
5161 * been pushed into its inode buffer (via VOP_UPDATE) at
5162 * the time of the reference count change. So we need only
5163 * locate that buffer, ensure that there will be no rollback
5164 * caused by a bitmap dependency, then write the inode buffer.
5165 */
5166 if (inodedep_lookup(ump->um_fsufsmount_u.fs, inum, 0, &inodedep) == 0) {
5167 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
5168 panic("flush_pagedep_deps: lost inode");
5169 }
5170 /*
5171 * If the inode still has bitmap dependencies,
5172 * push them to disk.
5173 */
5174 retry:
5175 if ((inodedep->id_stateid_list.wk_state & DEPCOMPLETE0x0008) == 0) {
5176 bp = inodedep->id_buf;
5177 gotit = getdirtybuf(bp, MNT_WAIT1);
5178 if (gotit == -1)
5179 goto retry;
5180 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
5181 if (gotit && (error = bwrite(bp)) != 0)
5182 break;
5183 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x3);
5184 if (dap != LIST_FIRST(diraddhdp)((diraddhdp)->lh_first))
5185 continue;
5186 }
5187 /*
5188 * If the inode is still sitting in a buffer waiting
5189 * to be written, push it to disk.
5190 */
5191 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
5192 if ((error = bread(ump->um_devvp,
5193 fsbtodb(ump->um_fs, ino_to_fsba(ump->um_fs, inum))((((daddr_t)(((((daddr_t)(ump->ufsmount_u.fs)->fs_fpg *
(((inum) / (ump->ufsmount_u.fs)->fs_ipg))) + (ump->
ufsmount_u.fs)->fs_cgoffset * ((((inum) / (ump->ufsmount_u
.fs)->fs_ipg)) & ~((ump->ufsmount_u.fs)->fs_cgmask
))) + (ump->ufsmount_u.fs)->fs_iblkno) + ((((((inum) % (
ump->ufsmount_u.fs)->fs_ipg) / ((ump->ufsmount_u.fs)
->fs_inopb))) << ((ump->ufsmount_u.fs))->fs_fragshift
))))) << (ump->ufsmount_u.fs)->fs_fsbtodb)
,
5194 (int)ump->um_fsufsmount_u.fs->fs_bsize, &bp)) != 0) {
5195 brelse(bp);
5196 break;
5197 }
5198 if ((error = bwrite(bp)) != 0)
5199 break;
5200 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x3);
5201 /*
5202 * If we have failed to get rid of all the dependencies
5203 * then something is seriously wrong.
5204 */
5205 if (dap == LIST_FIRST(diraddhdp)((diraddhdp)->lh_first)) {
5206 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
5207 panic("flush_pagedep_deps: flush failed");
5208 }
5209 }
5210 if (error)
5211 ACQUIRE_LOCK(&lk)(&lk)->lkt_spl = splraise(0x3);
5212 return (error);
5213}
5214
5215/*
5216 * A large burst of file addition or deletion activity can drive the
5217 * memory load excessively high. First attempt to slow things down
5218 * using the techniques below. If that fails, this routine requests
5219 * the offending operations to fall back to running synchronously
5220 * until the memory load returns to a reasonable level.
5221 */
5222int
5223softdep_slowdown(struct vnode *vp)
5224{
5225 int max_softdeps_hard;
5226
5227 max_softdeps_hard = max_softdeps * 11 / 10;
5228 if (num_dirrem < max_softdeps_hard / 2 &&
5229 num_inodedep < max_softdeps_hard)
5230 return (0);
5231 stat_sync_limit_hit += 1;
5232 return (1);
5233}
5234
5235/*
5236 * If memory utilization has gotten too high, deliberately slow things
5237 * down and speed up the I/O processing.
5238 */
5239STATIC int
5240request_cleanup(int resource, int islocked)
5241{
5242 struct proc *p = CURPROC({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc
;
5243 int s;
5244
5245 /*
5246 * We never hold up the filesystem syncer process.
5247 */
5248 if (p == filesys_syncer || (p->p_flag & P_SOFTDEP0x10000000))
5249 return (0);
5250 /*
5251 * First check to see if the work list has gotten backlogged.
5252 * If it has, co-opt this process to help clean up two entries.
5253 * Because this process may hold inodes locked, we cannot
5254 * handle any remove requests that might block on a locked
5255 * inode as that could lead to deadlock. We set P_SOFTDEP
5256 * to avoid recursively processing the worklist.
5257 */
5258 if (num_on_worklist > max_softdeps / 10) {
5259 atomic_setbits_intx86_atomic_setbits_u32(&p->p_flag, P_SOFTDEP0x10000000);
5260 if (islocked)
5261 FREE_LOCK(&lk)spllower((&lk)->lkt_spl);
5262 process_worklist_item(NULL((void *)0), NULL((void *)0), LK_NOWAIT0x0040UL);
5263 process_worklist_item(NULL((void *)0), NULL((void *)0), LK_NOWAIT0x0040UL);
5264