以文本方式查看主题

-  计算机科学论坛  (http://bbs.xml.org.cn/index.asp)
--  『 C/C++编程思想 』  (http://bbs.xml.org.cn/list.asp?boardid=61)
----  C#直接读取磁盘文件(类似linux的Direct IO模式)  (http://bbs.xml.org.cn/dispbbs.asp?boardid=61&rootid=&id=125046)


--  作者:卷积内核
--  发布时间:6/6/2012 1:21:00 PM

--  C#直接读取磁盘文件(类似linux的Direct IO模式)

1. 程序的要求
程序的要求很简单。

(1)命令行程序

(2)有3个参数,读取的文件名,一次读取buffer size,读取的次数count

(3)如果读取次数count未到,文件已经读完,就再次从头读取文件。

使用格式如下:

C:\>****.exe “c:\****.bin” 32768 32768

读取文件“c:\****.bin”,每次读取4K,读取32768次,读取的量大概1G。

2. 一般的FileStream方式
利用FileStream来读取文件,非常简单,代码如下:

using System;
using System.Collections.Generic;
using System.Text;
using System.IO;
using System.Reflection;

namespace DirectIO
{
    public class DIOReader
    {
        static void Main(string[] args)
        {
            long start = DateTime.Now.Ticks;

            if (args.Length < 3)
            {
                Console.WriteLine("parameter error!!");
                return;
            }
            FileStream input = null;

            try
            {
                int bs = Convert.ToInt32(args[1]);
                int count = Convert.ToInt32(args[2]);
                input = new FileStream(filename, FileMode.Open, FileAccess.Read, FileShare.None, bs);

                byte[] b = new byte[bs];
                for (int i = 0; i < count; i++)
                {
                    if (input.Read(b, 0, bs) == 0)
                        input.Seek(0, SeekOrigin.Begin);
                }
                Console.WriteLine("Read successed! ");
                Console.WriteLine(DateTime.Now.Ticks - start);
            }
            catch (Exception ex)
            {
                Console.WriteLine(ex.Message);
            }
            finally
            {
                if (input != null)
                {
                    input.Flush();
                    input.Close();
                    // 清除使用的对象
                    GC.Collect();
                    GC.Collect();
                }
            }
        }
    }
}编译后的exe文件可以按照既定要求执行,但是对于同一文件,第二次读取明显比第一次快很多(大家可以用个1G左右的大文件试试)。第三次读取,第四次读取……和第二次差不多,都很快。

基于上述情况,可以判断是缓存的原因,导致第二次及以后各次都比较快。

但是从代码中来看,已经执行了input.Flush();input.Close();甚至是GC.Collect();

所以可能是Windows系统或者CLR对文件读取操作进行了优化,使用了缓存。


--  作者:卷积内核
--  发布时间:6/6/2012 1:22:00 PM

--  
3. 利用kernel32.dll中的CreateFile函数
既然上述方法行不通,就得调查新的方法。通过google的查询,大部分人都是建议用C/C++调用系统API来实现。

不过最后终于找到了用c#实现了无缓存直接读取磁盘上的文件的方法。其实也是通过DllImport利用了kernel32.dll,不完全是托管代码。(估计用纯托管代码实现不了)

参考的文章:How do I read a disk directly with .Net?

还有msdn中的CreateFile API

实现代码就是参考的How do I read a disk directly with .Net?,分为两部分

(1)利用CreateFile API构造的可直接读取磁盘的DeviceStream

using System;
using System.Runtime.InteropServices;
using System.IO;
using Microsoft.Win32.SafeHandles;

namespace DirectIO
{
    public class DeviceStream : Stream, IDisposable
    {
        public const short FILE_ATTRIBUTE_NORMAL = 0x80;
        public const short INVALID_HANDLE_VALUE = -1;
        public const uint GENERIC_READ = 0x80000000;
        public const uint NO_BUFFERING = 0x20000000;
        public const uint GENERIC_WRITE = 0x40000000;
        public const uint CREATE_NEW = 1;
        public const uint CREATE_ALWAYS = 2;
        public const uint OPEN_EXISTING = 3;

        // Use interop to call the CreateFile function.
        // For more information about CreateFile,
        // see the unmanaged MSDN reference library.
        [DllImport("kernel32.dll", SetLastError = true, CharSet = CharSet.Unicode)]
        private static extern IntPtr CreateFile(string lpFileName, uint dwDesiredAccess,
          uint dwShareMode, IntPtr lpSecurityAttributes, uint dwCreationDisposition,
          uint dwFlagsAndAttributes, IntPtr hTemplateFile);

        [DllImport("kernel32.dll", SetLastError = true)]
        private static extern bool ReadFile(
            IntPtr hFile,                        // handle to file
            byte[] lpBuffer,                // data buffer
            int nNumberOfBytesToRead,        // number of bytes to read
            ref int lpNumberOfBytesRead,    // number of bytes read
            IntPtr lpOverlapped
            //
            // ref OVERLAPPED lpOverlapped        // overlapped buffer
            );

        private SafeFileHandle handleValue = null;
        private FileStream _fs = null;

        public DeviceStream(string device)
        {
            Load(device);
        }

        private void Load(string Path)
        {
            if (string.IsNullOrEmpty(Path))
            {
                throw new ArgumentNullException("Path");
            }

            // Try to open the file.
            IntPtr ptr = CreateFile(Path, GENERIC_READ, 0, IntPtr.Zero, OPEN_EXISTING, NO_BUFFERING, IntPtr.Zero);

            handleValue = new SafeFileHandle(ptr, true);
            _fs = new FileStream(handleValue, FileAccess.Read);

            // If the handle is invalid,
            // get the last Win32 error
            // and throw a Win32Exception.
            if (handleValue.IsInvalid)
            {
                Marshal.ThrowExceptionForHR(Marshal.GetHRForLastWin32Error());
            }
        }

        public override bool CanRead
        {
            get { return true; }
        }

        public override bool CanSeek
        {
            get { return false; }
        }

        public override bool CanWrite
        {
            get { return false; }
        }

        public override void Flush()
        {
            return;
        }

        public override long Length
        {
            get { return -1; }
        }

        public override long Position
        {
            get
            {
                throw new NotImplementedException();
            }
            set
            {
                throw new NotImplementedException();
            }
        }
        /// <summary>
        /// </summary>
        /// <param name="buffer">An array of bytes. When this method returns, the buffer contains the specified byte array with the values between offset and
        /// (offset + count - 1) replaced by the bytes read from the current source. </param>
        /// <param name="offset">The zero-based byte offset in buffer at which to begin storing the data read from the current stream. </param>
        /// <param name="count">The maximum number of bytes to be read from the current stream.</param>
        /// <returns></returns>
        public override int Read(byte[] buffer, int offset, int count)
        {
            int BytesRead = 0;
            var BufBytes = new byte[count];
            if (!ReadFile(handleValue.DangerousGetHandle(), BufBytes, count, ref BytesRead, IntPtr.Zero))
            {
                Marshal.ThrowExceptionForHR(Marshal.GetHRForLastWin32Error());
            }
            for (int i = 0; i < BytesRead; i++)
            {
                buffer[offset + i] = BufBytes[i];
            }
            return BytesRead;
        }
        public override int ReadByte()
        {
            int BytesRead = 0;
            var lpBuffer = new byte[1];
            if (!ReadFile(
            handleValue.DangerousGetHandle(),                        // handle to file
            lpBuffer,                // data buffer
            1,        // number of bytes to read
            ref BytesRead,    // number of bytes read
            IntPtr.Zero
            ))
            { Marshal.ThrowExceptionForHR(Marshal.GetHRForLastWin32Error()); ;}
            return lpBuffer[0];
        }

        public override long Seek(long offset, SeekOrigin origin)
        {
            throw new NotImplementedException();
        }

        public override void SetLength(long value)
        {
            throw new NotImplementedException();
        }

        public override void Write(byte[] buffer, int offset, int count)
        {
            throw new NotImplementedException();
        }

        public override void Close()
        {
            handleValue.Close();
            handleValue.Dispose();
            handleValue = null;
            base.Close();
        }
        private bool disposed = false;

        new void Dispose()
        {
            Dispose(true);
            base.Dispose();
            GC.SuppressFinalize(this);
        }

        private new void Dispose(bool disposing)
        {
            // Check to see if Dispose has already been called.
            if (!this.disposed)
            {
                if (disposing)
                {
                    if (handleValue != null)
                    {
                        _fs.Dispose();
                        handleValue.Close();
                        handleValue.Dispose();
                        handleValue = null;
                    }
                }
                // Note disposing has been done.
                disposed = true;

            }
        }

    }
}注意和原文相比,改动了一个地方。即加了个NO_BUFFERING的参数,并在调用CreateFile时使用了这个参数。

IntPtr ptr = CreateFile(Path, GENERIC_READ, 0, IntPtr.Zero, OPEN_EXISTING, NO_BUFFERING, IntPtr.Zero);

之前没有加这个参数的时候,在xp上测试还是第二次比第一次快很多。


--  作者:卷积内核
--  发布时间:6/6/2012 1:22:00 PM

--  
(2)完成指定要求的DIOReader

using System;
using System.Collections.Generic;
using System.Text;
using System.IO;
using System.Reflection;

namespace DirectIO
{
    public class DIOReader
    {
        static void Main(string[] args)
        {
            long start = DateTime.Now.Ticks;

            if (args.Length < 3)
            {
                Console.WriteLine("parameter error!!");
                return;
            }
            BinaryReader input = null;

            try
            {
                int bs = Convert.ToInt32(args[1]);
                int count = Convert.ToInt32(args[2]);
                input = new BinaryReader(new DeviceStream(args[0]));

                byte[] b = new byte[bs];
                for (int i = 0; i < count; i++)
                {
                    if (input.Read(b, 0, bs) == 0)
                        input.BaseStream.Seek(0, SeekOrigin.Begin);
                }
                Console.WriteLine("Read successed! ");
                Console.WriteLine("Total cost " + (new TimeSpan(DateTime.Now.Ticks - start)).TotalSeconds + " seconds");
            }
            catch (Exception ex)
            {
                Console.WriteLine(ex.Message);
            }
            finally
            {
                if (input != null)
                {
                    input.Close();
                }
                //Console.ReadKey(true);
            }
        }
    }
}

这样,就完成了类似linux上Direct IO模式读取文件的操作。

通过这个例子可以看出,C#不仅可以开发上层的应用,也可以结合一些非托管的dll完成更加底层的操作。


W 3 C h i n a ( since 2003 ) 旗 下 站 点
苏ICP备05006046号《全国人大常委会关于维护互联网安全的决定》《计算机信息网络国际联网安全保护管理办法》
116.943ms