Gitweb: http://git.fedorahosted.org/git/?p=gfs2-utils.git;a=commitdiff;h=761a035c6e…
Commit: 761a035c6ec0ad2b33676da63c2799b10f276a2f
Parent: a4aceaded44d1f00597ad3b2d6a86f8e7adde2a2
Author: Steven Whitehouse <swhiteho(a)redhat.com>
AuthorDate: Fri Feb 22 12:08:03 2013 +0000
Committer: Andrew Price <anprice(a)redhat.com>
CommitterDate: Fri Feb 22 12:54:21 2013 +0000
fsck: Clean up pass1 inode iteration code
The original version of this patch, including readahead for the
inodes only appears to deliver a performance improvement in some
particular cases, and slows down other cases. So this patch is
fairly similar in that it includes the clean ups from the previous
attempt, however the readahead has been left out for the time
being until the problem is better understood, and can be fixed.
This clean up still has the advantage of better code structure
around the main loop, and in addition it also reduced the number
of read requests which are generated too.
Signed-off-by: Steven Whitehouse <swhiteho(a)redhat.com>
---
gfs2/fsck/pass1.c | 238 ++++++++++++++++++++++++---------------------
gfs2/libgfs2/libgfs2.h | 4 +-
gfs2/libgfs2/structures.c | 23 ++++-
3 files changed, 147 insertions(+), 118 deletions(-)
diff --git a/gfs2/fsck/pass1.c b/gfs2/fsck/pass1.c
index 540f2a9..9a34e97 100644
--- a/gfs2/fsck/pass1.c
+++ b/gfs2/fsck/pass1.c
@@ -14,6 +14,7 @@
#include <unistd.h>
#include <string.h>
#include <time.h>
+#include <fcntl.h>
#include <sys/ioctl.h>
#include <inttypes.h>
#include <libintl.h>
@@ -1547,6 +1548,128 @@ static int check_system_inodes(struct gfs2_sbd *sdp)
return 0;
}
+static int pass1_process_bitmap(struct gfs2_sbd *sdp, struct rgrp_tree *rgd, uint64_t *ibuf, unsigned n)
+{
+ struct gfs2_buffer_head *bh;
+ unsigned i;
+ uint64_t block;
+
+ for (i = 0; i < n; i++) {
+ block = ibuf[i];
+
+ /* skip gfs1 rindex indirect blocks */
+ if (sdp->gfs1 && blockfind(&gfs1_rindex_blks, block)) {
+ log_debug(_("Skipping rindex indir block "
+ "%lld (0x%llx)\n"),
+ (unsigned long long)block,
+ (unsigned long long)block);
+ continue;
+ }
+ warm_fuzzy_stuff(block);
+
+ if (fsck_abort) { /* if asked to abort */
+ gfs2_special_free(&gfs1_rindex_blks);
+ return FSCK_OK;
+ }
+ if (skip_this_pass) {
+ printf( _("Skipping pass 1 is not a good idea.\n"));
+ skip_this_pass = FALSE;
+ fflush(stdout);
+ }
+ if (fsck_system_inode(sdp, block)) {
+ log_debug(_("Already processed system inode "
+ "%lld (0x%llx)\n"),
+ (unsigned long long)block,
+ (unsigned long long)block);
+ continue;
+ }
+ bh = bread(sdp, block);
+
+ if (gfs2_check_meta(bh, GFS2_METATYPE_DI)) {
+ /* In gfs2, a bitmap mark of 2 means an inode,
+ but in gfs1 it means any metadata. So if
+ this is gfs1 and not an inode, it may be
+ okay. If it's non-dinode metadata, it will
+ be referenced by an inode, so we need to
+ skip it here and it will be sorted out
+ when the referencing inode is checked. */
+ if (sdp->gfs1) {
+ uint32_t check_magic;
+
+ check_magic = ((struct gfs2_meta_header *)
+ (bh->b_data))->mh_magic;
+ if (be32_to_cpu(check_magic) == GFS2_MAGIC) {
+ log_debug( _("Deferring GFS1 "
+ "metadata block #"
+ "%" PRIu64" (0x%"
+ PRIx64 ")\n"),
+ block, block);
+ brelse(bh);
+ continue;
+ }
+ }
+ log_err( _("Found invalid inode at block #"
+ "%llu (0x%llx)\n"),
+ (unsigned long long)block,
+ (unsigned long long)block);
+ if (gfs2_blockmap_set(bl, block, gfs2_block_free)) {
+ stack;
+ brelse(bh);
+ gfs2_special_free(&gfs1_rindex_blks);
+ return FSCK_ERROR;
+ }
+ check_n_fix_bitmap(sdp, block, gfs2_block_free);
+ } else if (handle_di(sdp, bh) < 0) {
+ stack;
+ brelse(bh);
+ gfs2_special_free(&gfs1_rindex_blks);
+ return FSCK_ERROR;
+ }
+ /* Ignore everything else - they should be hit by the
+ handle_di step. Don't check NONE either, because
+ check_meta passes everything if GFS2_METATYPE_NONE
+ is specified. Hopefully, other metadata types such
+ as indirect blocks will be handled when the inode
+ itself is processed, and if it's not, it should be
+ caught in pass5. */
+ brelse(bh);
+ }
+
+ return 0;
+}
+
+static int pass1_process_rgrp(struct gfs2_sbd *sdp, struct rgrp_tree *rgd)
+{
+ unsigned k, n, i;
+ uint64_t *ibuf = malloc(sdp->bsize * GFS2_NBBY * sizeof(uint64_t));
+ int ret;
+
+ for (k = 0; k < rgd->ri.ri_length; k++) {
+ n = lgfs2_bm_scan(rgd, k, ibuf, GFS2_BLKST_DINODE);
+
+ if (n) {
+ ret = pass1_process_bitmap(sdp, rgd, ibuf, n);
+ if (ret)
+ return ret;
+ }
+
+ /*
+ For GFS1, we have to count the "free meta" blocks in the
+ resource group and mark them specially so we can count them
+ properly in pass5.
+ */
+ if (!sdp->gfs1)
+ continue;
+
+ n = lgfs2_bm_scan(rgd, k, ibuf, GFS2_BLKST_UNLINKED);
+ for (i = 0; i < n; i++)
+ gfs2_blockmap_set(bl, ibuf[i], gfs2_freemeta);
+ }
+
+ free(ibuf);
+ return 0;
+}
+
/**
* pass1 - walk through inodes and check inode state
*
@@ -1563,12 +1686,10 @@ static int check_system_inodes(struct gfs2_sbd *sdp)
int pass1(struct gfs2_sbd *sdp)
{
struct osi_node *n, *next = NULL;
- struct gfs2_buffer_head *bh;
- uint64_t block = 0;
struct rgrp_tree *rgd;
- int first;
uint64_t i;
uint64_t rg_count = 0;
+ int ret;
osi_list_init(&gfs1_rindex_blks.list);
@@ -1611,115 +1732,10 @@ int pass1(struct gfs2_sbd *sdp)
gfs2_meta_rgrp);*/
}
- first = 1;
+ ret = pass1_process_rgrp(sdp, rgd);
+ if (ret)
+ return ret;
- while (1) {
- /* "block" is relative to the entire file system */
- /* Get the next dinode in the file system, according
- to the bitmap. This should ONLY be dinodes unless
- it's GFS1, in which case it can be any metadata. */
- if (gfs2_next_rg_meta(rgd, &block, first))
- break;
- /* skip gfs1 rindex indirect blocks */
- if (sdp->gfs1 && blockfind(&gfs1_rindex_blks, block)) {
- log_debug(_("Skipping rindex indir block "
- "%lld (0x%llx)\n"),
- (unsigned long long)block,
- (unsigned long long)block);
- first = 0;
- continue;
- }
- warm_fuzzy_stuff(block);
-
- if (fsck_abort) { /* if asked to abort */
- gfs2_special_free(&gfs1_rindex_blks);
- return FSCK_OK;
- }
- if (skip_this_pass) {
- printf( _("Skipping pass 1 is not a good idea.\n"));
- skip_this_pass = FALSE;
- fflush(stdout);
- }
- if (fsck_system_inode(sdp, block)) {
- log_debug(_("Already processed system inode "
- "%lld (0x%llx)\n"),
- (unsigned long long)block,
- (unsigned long long)block);
- first = 0;
- continue;
- }
- bh = bread(sdp, block);
-
- /*log_debug( _("Checking metadata block #%" PRIu64
- " (0x%" PRIx64 ")\n"), block, block);*/
-
- if (gfs2_check_meta(bh, GFS2_METATYPE_DI)) {
- /* In gfs2, a bitmap mark of 2 means an inode,
- but in gfs1 it means any metadata. So if
- this is gfs1 and not an inode, it may be
- okay. If it's non-dinode metadata, it will
- be referenced by an inode, so we need to
- skip it here and it will be sorted out
- when the referencing inode is checked. */
- if (sdp->gfs1) {
- uint32_t check_magic;
-
- check_magic = ((struct
- gfs2_meta_header *)
- (bh->b_data))->mh_magic;
- if (be32_to_cpu(check_magic) ==
- GFS2_MAGIC) {
- log_debug( _("Deferring GFS1 "
- "metadata block #"
- "%" PRIu64" (0x%"
- PRIx64 ")\n"),
- block, block);
- brelse(bh);
- first = 0;
- continue;
- }
- }
- log_err( _("Found invalid inode at block #"
- "%llu (0x%llx)\n"),
- (unsigned long long)block,
- (unsigned long long)block);
- if (gfs2_blockmap_set(bl, block,
- gfs2_block_free)) {
- stack;
- brelse(bh);
- gfs2_special_free(&gfs1_rindex_blks);
- return FSCK_ERROR;
- }
- check_n_fix_bitmap(sdp, block,
- gfs2_block_free);
- } else if (handle_di(sdp, bh) < 0) {
- stack;
- brelse(bh);
- gfs2_special_free(&gfs1_rindex_blks);
- return FSCK_ERROR;
- }
- /* Ignore everything else - they should be hit by the
- handle_di step. Don't check NONE either, because
- check_meta passes everything if GFS2_METATYPE_NONE
- is specified. Hopefully, other metadata types such
- as indirect blocks will be handled when the inode
- itself is processed, and if it's not, it should be
- caught in pass5. */
- brelse(bh);
- first = 0;
- }
- /*
- For GFS1, we have to count the "free meta" blocks in the
- resource group and mark them specially so we can count them
- properly in pass5.
- */
- if (!sdp->gfs1)
- continue;
- first = 1;
- while (gfs2_next_rg_freemeta(rgd, &block, first) == 0) {
- gfs2_blockmap_set(bl, block, gfs2_freemeta);
- first = 0;
- }
}
gfs2_special_free(&gfs1_rindex_blks);
return FSCK_OK;
diff --git a/gfs2/libgfs2/libgfs2.h b/gfs2/libgfs2/libgfs2.h
index 46d4d67..db31a6c 100644
--- a/gfs2/libgfs2/libgfs2.h
+++ b/gfs2/libgfs2/libgfs2.h
@@ -757,12 +757,12 @@ extern int build_root(struct gfs2_sbd *sdp);
extern int do_init_inum(struct gfs2_sbd *sdp);
extern int do_init_statfs(struct gfs2_sbd *sdp);
extern int gfs2_check_meta(struct gfs2_buffer_head *bh, int type);
+extern unsigned lgfs2_bm_scan(struct rgrp_tree *rgd, unsigned idx,
+ uint64_t *buf, uint8_t state);
extern int gfs2_next_rg_meta(struct rgrp_tree *rgd, uint64_t *block,
int first);
extern int gfs2_next_rg_metatype(struct gfs2_sbd *sdp, struct rgrp_tree *rgd,
uint64_t *block, uint32_t type, int first);
-extern int gfs2_next_rg_freemeta(struct rgrp_tree *rgd, uint64_t *block,
- int first);
/* super.c */
extern int check_sb(struct gfs2_sb *sb);
diff --git a/gfs2/libgfs2/structures.c b/gfs2/libgfs2/structures.c
index 645c45a..f9231ca 100644
--- a/gfs2/libgfs2/structures.c
+++ b/gfs2/libgfs2/structures.c
@@ -495,6 +495,24 @@ int gfs2_check_meta(struct gfs2_buffer_head *bh, int type)
return 0;
}
+unsigned lgfs2_bm_scan(struct rgrp_tree *rgd, unsigned idx, uint64_t *buf, uint8_t state)
+{
+ struct gfs2_bitmap *bi = &rgd->bits[idx];
+ unsigned n = 0;
+ uint32_t blk = 0;
+
+ while(blk < (bi->bi_len * GFS2_NBBY)) {
+ blk = gfs2_bitfit((const unsigned char *)rgd->bh[idx]->b_data + bi->bi_offset,
+ bi->bi_len, blk, state);
+ if (blk == BFITNOENT)
+ break;
+ buf[n++] = blk + (bi->bi_start * GFS2_NBBY) + rgd->ri.ri_data0;
+ blk++;
+ }
+
+ return n;
+}
+
/**
* gfs2_next_rg_meta
* @rgd:
@@ -545,11 +563,6 @@ int gfs2_next_rg_meta(struct rgrp_tree *rgd, uint64_t *block, int first)
return __gfs2_next_rg_meta(rgd, block, first, GFS2_BLKST_DINODE);
}
-int gfs2_next_rg_freemeta(struct rgrp_tree *rgd, uint64_t *block, int first)
-{
- return __gfs2_next_rg_meta(rgd, block, first, GFS2_BLKST_UNLINKED);
-}
-
/**
* next_rg_metatype
* @rgd:
Gitweb: http://git.fedorahosted.org/git/?p=gfs2-utils.git;a=commitdiff;h=a4aceaded4…
Commit: a4aceaded44d1f00597ad3b2d6a86f8e7adde2a2
Parent: 361ba0c65f68fbd63d8f7cc6d97b68532e3cc8fd
Author: Steven Whitehouse <swhiteho(a)redhat.com>
AuthorDate: Mon Feb 18 17:06:58 2013 +0000
Committer: Andrew Price <anprice(a)redhat.com>
CommitterDate: Mon Feb 18 17:39:36 2013 +0000
fsck: Speed up reading of dir leaf blocks
This patch adds readahead for directory leaf blocks. It gives me a speed
up of only around one second on my test filesystem, however that only
has one directory with a reasonable number of files in it. So that is
actually pretty good going for that small a filesystem.
Due to the reading of the dir hash table in a single sweep, this reduces
the number of calls to read dir hash table blocks considerably.
The patch takes all the valid leaf block pointers, sorts them into disk
block order and then issues readahead requests for the blocks in order
that they are read in, in good time before they are needed.
Signed-off-by: Steven Whitehouse <swhiteho(a)redhat.com>
---
gfs2/fsck/metawalk.c | 86 ++++++++++++++++++++++++++++++++++++++++++++++---
1 files changed, 80 insertions(+), 6 deletions(-)
diff --git a/gfs2/fsck/metawalk.c b/gfs2/fsck/metawalk.c
index f19c0f7..d8193c5 100644
--- a/gfs2/fsck/metawalk.c
+++ b/gfs2/fsck/metawalk.c
@@ -9,6 +9,7 @@
#include <unistd.h>
#include <libintl.h>
#include <ctype.h>
+#include <fcntl.h>
#define _(String) gettext(String)
#include "libgfs2.h"
@@ -638,24 +639,87 @@ out_copy_old_leaf:
return 1;
}
+static uint64_t *get_dir_hash(struct gfs2_inode *ip)
+{
+ unsigned hsize = (1 << ip->i_di.di_depth) * sizeof(uint64_t);
+ int ret;
+ uint64_t *tbl = malloc(hsize);
+
+ if (tbl == NULL)
+ return NULL;
+
+ ret = gfs2_readi(ip, tbl, 0, hsize);
+ if (ret != hsize) {
+ free(tbl);
+ return NULL;
+ }
+
+ return tbl;
+}
+
+static int u64cmp(const void *p1, const void *p2)
+{
+ uint64_t a = *(uint64_t *)p1;
+ uint64_t b = *(uint64_t *)p2;
+
+ if (a > b)
+ return 1;
+ if (b < b)
+ return -1;
+
+ return 0;
+}
+
+static void dir_leaf_reada(struct gfs2_inode *ip, uint64_t *tbl, unsigned hsize)
+{
+ uint64_t *t = alloca(hsize * sizeof(uint64_t));
+ uint64_t leaf_no;
+ struct gfs2_sbd *sdp = ip->i_sbd;
+ unsigned n = 0;
+ unsigned i;
+
+ for (i = 0; i < hsize; i++) {
+ leaf_no = be64_to_cpu(tbl[i]);
+ if (valid_block(ip->i_sbd, leaf_no))
+ t[n++] = leaf_no * sdp->bsize;
+ }
+ qsort(t, n, sizeof(uint64_t), u64cmp);
+ for (i = 0; i < n; i++)
+ posix_fadvise(sdp->device_fd, t[i], sdp->bsize, POSIX_FADV_WILLNEED);
+}
+
/* Checks exhash directory entries */
static int check_leaf_blks(struct gfs2_inode *ip, struct metawalk_fxns *pass)
{
int error;
struct gfs2_leaf leaf, oldleaf;
+ unsigned hsize = (1 << ip->i_di.di_depth);
uint64_t leaf_no, old_leaf, bad_leaf = -1;
uint64_t first_ok_leaf;
struct gfs2_buffer_head *lbh;
int lindex;
struct gfs2_sbd *sdp = ip->i_sbd;
int ref_count = 0, old_was_dup;
+ uint64_t *tbl;
+
+ tbl = get_dir_hash(ip);
+ if (tbl == NULL) {
+ perror("get_dir_hash");
+ return -1;
+ }
+
+ /* Turn off system readahead */
+ posix_fadvise(sdp->device_fd, 0, 0, POSIX_FADV_RANDOM);
+
+ /* Readahead */
+ dir_leaf_reada(ip, tbl, hsize);
/* Find the first valid leaf pointer in range and use it as our "old"
leaf. That way, bad blocks at the beginning will be overwritten
with the first valid leaf. */
first_ok_leaf = leaf_no = -1;
- for (lindex = 0; lindex < (1 << ip->i_di.di_depth); lindex++) {
- gfs2_get_leaf_nr(ip, lindex, &leaf_no);
+ for (lindex = 0; lindex < hsize; lindex++) {
+ leaf_no = be64_to_cpu(tbl[lindex]);
if (valid_block(ip->i_sbd, leaf_no)) {
lbh = bread(sdp, leaf_no);
/* Make sure it's really a valid leaf block. */
@@ -672,19 +736,22 @@ static int check_leaf_blks(struct gfs2_inode *ip, struct metawalk_fxns *pass)
"blocks\n"),
(unsigned long long)ip->i_di.di_num.no_addr,
(unsigned long long)ip->i_di.di_num.no_addr);
+ free(tbl);
+ posix_fadvise(sdp->device_fd, 0, 0, POSIX_FADV_NORMAL);
return 1;
}
old_leaf = -1;
memset(&oldleaf, 0, sizeof(oldleaf));
old_was_dup = 0;
- for (lindex = 0; lindex < (1 << ip->i_di.di_depth); lindex++) {
+ for (lindex = 0; lindex < hsize; lindex++) {
if (fsck_abort)
break;
- gfs2_get_leaf_nr(ip, lindex, &leaf_no);
+ leaf_no = be64_to_cpu(tbl[lindex]);
/* GFS has multiple indirect pointers to the same leaf
* until those extra pointers are needed, so skip the dups */
if (leaf_no == bad_leaf) {
+ tbl[lindex] = cpu_to_be64(old_leaf);
gfs2_put_leaf_nr(ip, lindex, old_leaf);
ref_count++;
continue;
@@ -694,8 +761,11 @@ static int check_leaf_blks(struct gfs2_inode *ip, struct metawalk_fxns *pass)
}
do {
- if (fsck_abort)
+ if (fsck_abort) {
+ free(tbl);
+ posix_fadvise(sdp->device_fd, 0, 0, POSIX_FADV_NORMAL);
return 0;
+ }
/* If the old leaf was a duplicate referenced by a
previous dinode, we can't check the number of
pointers because the number of pointers may be for
@@ -706,8 +776,10 @@ static int check_leaf_blks(struct gfs2_inode *ip, struct metawalk_fxns *pass)
&ref_count,
&lindex,
&oldleaf);
- if (error)
+ if (error) {
+ free(tbl);
return error;
+ }
}
error = check_leaf(ip, lindex, pass, &ref_count,
&leaf_no, old_leaf, &bad_leaf,
@@ -722,6 +794,8 @@ static int check_leaf_blks(struct gfs2_inode *ip, struct metawalk_fxns *pass)
(unsigned long long)leaf_no);
} while (1); /* while we have chained leaf blocks */
} /* for every leaf block */
+ free(tbl);
+ posix_fadvise(sdp->device_fd, 0, 0, POSIX_FADV_NORMAL);
return 0;
}