List Info

Thread: Taking a stab at a pure Ruby Dir.glob




Taking a stab at a pure Ruby Dir.glob
country flaguser name
United States
2007-10-21 09:42:22
Hi all,

Here's what I've come up with so far for a pure Ruby
Dir.glob for MS 
Windows. It almost works. The problem right now is the []
notation, 
which I'm not translating properly into a regex.

I haven't started on the '**' notation yet either, but I
figure that's 
more of a control flow issue. Feel free to disagree with me
and/or 
provide a solution. 

# mydir.rb - make sure you have the windows-pr library
first.
require 'windows/file'
require 'windows/error'
require 'windows/unicode'
require 'windows/directory'
require 'windows/process'
require 'windows/handle'
require 'windows/path'

class MyDir
    extend Windows::Error
    extend Windows::File
    extend Windows::Unicode
    extend Windows:irectory

    extend Windows::Process
    extend Windows::Handle
    extend Windows::Path
    include Windows::Error
    include Windows::File
    include Windows::Handle

    def self.glob(pattern, flags = 0)
       raise TypeError unless flags.is_a?(Integer)

       dirname   = File.dirname(pattern) + "\*"
       pattern   = File.basename(pattern)
       recursive = false
       regex     = nil

       #puts "ORIG PATTERN: '#'"

       # Convert backslashes to literal backslashes
       pattern.gsub!(".", "\.")

       # Convert 3 or more '*' characters to a single '*'
       pattern.gsub!(/*{3,}/, '*')

       # Convert leading text to '/^xxx' format
       pattern.sub!(/^(w)/, '^1')

       # Convert all remaining literal '?' to a '.' (any
single char).
       pattern.sub!(/^?/, '^.')
       pattern.tr!('?', '.')

       # Convert all '*' to '.*?' to get the (nongreedy)
intended result.
       pattern.gsub!("*", ".*?")

       # Convert {x, y} to (x|y)
       pattern.gsub!(/{(.*?)}/, '(1)')
       pattern.gsub!(/,s*/, '|')

       # Convert {x, y} to (x|y)
       pattern.gsub!(/{(.*?)}/, '(1)')
       pattern.gsub!(/,s*/, '|')

       dp = pattern.dup

       # This is an attempt to convert '[]' sets into
       # a regular expression.
       # TODO: Fix!
       dp.scan(/[(.*?)]/){ |array|
          array.each{ |match|
             start, finish = match.split('-')
             if finish
                chars = Range.new(start, finish).to_a
             else
                chars = start.split('')
             end
             pattern << '(' + chars.join('|')
          }
          pattern << ')'
       }

       pattern << '$'
       pattern.gsub!(/[.*?]/, '')

       puts "NEW PATTERN: '#'"

       regex = Regexp.new(pattern)

       fdata = 0.chr * 320 # 580 if wide
       array = []

       hfind = FindFirstFile(dirname, fdata)

       if hfind == INVALID_HANDLE_VALUE
          raise ArgumentError, get_last_error
       end

       file = fdata[44, MAX_PATH].unpack("Z*")[0]

       array << file if regex.match(file)

       while FindNextFile(hfind, fdata)
          file = fdata[44,
MAX_PATH].unpack("Z*")[0]
          array << file if regex.match(file)
       end

       error = GetLastError()

       FindClose(hfind)

       if(error != ERROR_NO_MORE_FILES)
          raise get_last_error(error)
       end

       unless flags & File::FNM_DOTMATCH > 0
          array.delete_if{ |file| file =~ /^..*/ }
       end

       array
    end
end

Here's a test suite you can use to verify your results.
Additions welcome:

############################################################
############
# tc_glob.rb
#
# Test case for the MyDir.glob class method.
############################################################
############
$:.unshift Dir.pwd
require 'test/unit'
require 'fileutils'
require 'mydir'

class TC_MyDir_Glob_Class < Test::Unit::TestCase

    # Helper method to get just the basename of the
filename
    def base(files)
       files.map{ |f| File.basename(f) }
    end

    def setup
       foo_files = %w/a.c a.cpp b.c b.h g.rb d/
       bar_files = %w/a.c a2.cpp a3.h a4.rb/

       FileUtils.mkdir_p('foo/bar/baz')
       Dir.chdir('foo'){ foo_files.each{ |f|
FileUtils.touch(f) } }
       Dir.chdir('foo/bar'){ bar_files.each{ |f|
FileUtils.touch(f) } }
    end

    def test_glob_pattern
       assert_equal(%w/a.c a.cpp b.c b.h bar d g.rb/, 
base(MyDir.glob('foo/{*}')))
       assert_equal(%w/g.rb/,
base(MyDir.glob('foo/{*.rb}')))
       assert_equal(%w/a.cpp g.rb/,
base(MyDir.glob('foo/*.{rb,cpp}')))
       assert_equal(%w/a.cpp g.rb/,
base(MyDir.glob('foo/*.{rb,cp}*')))
       assert_equal([], base(MyDir.glob('foo/*.{}')))
    end

    def test_glob_char_list
       assert_equal(%w/d/, base(MyDir.glob('foo/[a-d]')))
       assert_equal(%w/a.c a.cpp/,
base(MyDir.glob('foo/[a]*')))
       assert_equal(%w/a.c a.cpp b.c b.h bar d/, 
base(MyDir.glob('foo/[a-d]*')))
       assert_equal(%w/d g.rb/,
base(MyDir.glob('foo/[^a-b]*')))
       if WINDOWS
          assert_equal(%w/a.c a.cpp b.c b.h bar d g.rb/, 
base(MyDir.glob('foo/[A-Z]*')))
       else
          assert_equal([], base(MyDir.glob('foo/[A-Z]*')))
       end
    end

    def test_glob_char_list_edge_cases
       assert_equal([], MyDir.glob('foo/[]'))
       assert_equal(['d'], base(MyDir.glob('foo/[^]')))
    end

    def test_glob_question_mark
       assert_equal(%w/a.c/, base(MyDir.glob('foo/a.?')))
       assert_equal(%w/a.cpp/,
base(MyDir.glob('foo/a.c?p')))
       assert_equal(%w/a.c b.c b.h bar/,
base(MyDir.glob('foo/???')))
       assert_equal(%w/a.c b.c b.h/,
base(MyDir.glob('foo/?.?')))
    end

    def test_glob_basic
       assert_respond_to(MyDir, :glob)
       assert_nothing_raised{ MyDir.glob("*") }
    end

    def test_glob_valid_metacharacters
       assert_nothing_raised{ MyDir.glob("**") }
       assert_nothing_raised{ MyDir.glob("foo.*")
}
       assert_nothing_raised{ MyDir.glob("foo.?")
}
       assert_nothing_raised{
MyDir.glob("*.[^r]*") }
       assert_nothing_raised{
MyDir.glob("*.[a-z][a-z]") }
       assert_nothing_raised{
MyDir.glob("*.{rb,h}") }
       assert_nothing_raised{ MyDir.glob("*.t")
}
    end

    def test_glob_star
       assert_equal(%w/a.c a.cpp b.c b.h bar d g.rb/, 
base(MyDir.glob('foo/*')))
       assert_equal(%w/a.c a.cpp b.c b.h bar d g.rb/, 
base(MyDir.glob('foo/****')))
       assert_equal(%w/a.c b.c/,
base(MyDir.glob('foo/*.c')))
       assert_equal(%w/a.c a.cpp/,
base(MyDir.glob('foo/a*')))
       assert_equal(%w/a.c a.cpp/,
base(MyDir.glob('foo/a*c*')))
       assert_equal(%w/a.cpp/,
base(MyDir.glob('foo/a*p*')))
       assert_equal([], MyDir.glob('x*'))
    end

    def test_glob_double_star
       assert_equal(%w/a.c a.cpp b.c b.h bar d g.rb/, 
base(MyDir.glob('foo/**')))
       assert_equal(%w/a.c b.c a.c/,
base(MyDir.glob('**/*.c')))
       assert_equal(%w/a.c b.c a.c/,
base(MyDir.glob('foo/**/*.c')))
       assert_equal(%w/a.c a.cpp a.c a2.cpp a3.h a4.rb/, 
base(MyDir.glob('foo/**/a*')))
       assert_equal([], MyDir.glob('**/x*'))
    end

    def test_glob_flags
       assert_nothing_raised{ MyDir.glob("*",
File::FNM_DOTMATCH) }
       assert_nothing_raised{ MyDir.glob("*",
File::FNM_NOESCAPE) }
       assert_nothing_raised{ MyDir.glob("*",
File::FNM_PATHNAME) }
       assert_nothing_raised{ MyDir.glob("*",
File::FNM_CASEFOLD) }
    end

    def test_glob_expected_errors
       assert_raises(TypeError){ MyDir.glob("*",
"*") }
    end

    def teardown
       foo_files = nil
       bar_files = nil
       FileUtils.rm_rf('foo')
    end
end

Please feel free to take a stab at this. Good luck!

Regards,

Dan

PS - I took a look at Python's fnmatch.py code. It's of no
use - they 
don't support '**' or '{}' notation.
_______________________________________________
win32utils-devel mailing list
win32utils-develrubyforge.org
http://rubyforge.org/mailman/listinfo/win32utils-devel


[1]

about | contact  Other archives ( Real Estate discussion Medical topics )