diff --git a/.gitignore b/.gitignore index 0da3d84..a80d19e 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,5 @@ !/lib/ManUtils/Build.PL !/lib/ManUtils/ManUtils.pm !/lib/ManUtils/ManUtils.xs +indexer/target + diff --git a/Makefile b/Makefile index b1169e5..259b15e 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,6 @@ -.PHONY: ManUtils +.PHONY: ManUtils indexer clean + +all: ManUtils indexer ManUtils: lib/ManUtils/Build cd lib/ManUtils && perl Build.PL && ./Build install --install-base=inst @@ -6,7 +8,12 @@ ManUtils: lib/ManUtils/Build lib/ManUtils/Build: lib/ManUtils/Build.PL cd lib/ManUtils && perl Build.PL +indexer: indexer/target/release/indexer + +indexer/target/release/indexer: indexer/Cargo.toml indexer/src/*.rs + cd indexer && cargo build --release + clean: cd lib/ManUtils && ./Build distclean rm -rf lib/ManUtils/inst - + cd indexer && cargo clean diff --git a/indexer/Cargo.lock b/indexer/Cargo.lock new file mode 100644 index 0000000..5b22fa7 --- /dev/null +++ b/indexer/Cargo.lock @@ -0,0 +1,703 @@ +[root] +name = "indexer" +version = "0.1.0" +dependencies = [ + "chrono 0.2.25 (registry+https://github.com/rust-lang/crates.io-index)", + "clap 2.17.1 (registry+https://github.com/rust-lang/crates.io-index)", + "encoding 0.3.0-dev (git+https://github.com/lifthrasiir/rust-encoding)", + "env_logger 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)", + "hyper 0.9.11 (registry+https://github.com/rust-lang/crates.io-index)", + "lazy_static 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)", + "libarchive3-sys 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)", + "libc 0.2.17 (registry+https://github.com/rust-lang/crates.io-index)", + "log 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)", + "postgres 0.12.0 (registry+https://github.com/rust-lang/crates.io-index)", + "regex 0.1.80 (registry+https://github.com/rust-lang/crates.io-index)", + "ring 0.5.3 (registry+https://github.com/rust-lang/crates.io-index)", + "url 1.2.3 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "aho-corasick" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "memchr 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "ansi_term" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "bitflags" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "bufstream" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "byteorder" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "chrono" +version = "0.2.25" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "num 0.1.36 (registry+https://github.com/rust-lang/crates.io-index)", + "time 0.1.35 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "clap" +version = "2.17.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "ansi_term 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)", + "bitflags 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)", + "libc 0.2.17 (registry+https://github.com/rust-lang/crates.io-index)", + "strsim 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)", + "term_size 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)", + "unicode-segmentation 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)", + "unicode-width 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)", + "vec_map 0.6.0 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "cookie" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "openssl 0.7.14 (registry+https://github.com/rust-lang/crates.io-index)", + "rustc-serialize 0.3.19 (registry+https://github.com/rust-lang/crates.io-index)", + "time 0.1.35 (registry+https://github.com/rust-lang/crates.io-index)", + "url 1.2.3 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "encoding" +version = "0.3.0-dev" +source = "git+https://github.com/lifthrasiir/rust-encoding#61e331b0820311572fa00a06349b0f02511e810c" +dependencies = [ + "encoding-index-japanese 1.20141219.6 (git+https://github.com/lifthrasiir/rust-encoding)", + "encoding-index-korean 1.20141219.6 (git+https://github.com/lifthrasiir/rust-encoding)", + "encoding-index-simpchinese 1.20160120.0 (git+https://github.com/lifthrasiir/rust-encoding)", + "encoding-index-singlebyte 1.20160120.0 (git+https://github.com/lifthrasiir/rust-encoding)", + "encoding-index-tradchinese 1.20141219.6 (git+https://github.com/lifthrasiir/rust-encoding)", + "encoding-types 0.2.0 (git+https://github.com/lifthrasiir/rust-encoding)", +] + +[[package]] +name = "encoding-index-japanese" +version = "1.20141219.6" +source = "git+https://github.com/lifthrasiir/rust-encoding#61e331b0820311572fa00a06349b0f02511e810c" +dependencies = [ + "encoding_index_tests 0.1.5 (git+https://github.com/lifthrasiir/rust-encoding)", +] + +[[package]] +name = "encoding-index-korean" +version = "1.20141219.6" +source = "git+https://github.com/lifthrasiir/rust-encoding#61e331b0820311572fa00a06349b0f02511e810c" +dependencies = [ + "encoding_index_tests 0.1.5 (git+https://github.com/lifthrasiir/rust-encoding)", +] + +[[package]] +name = "encoding-index-simpchinese" +version = "1.20160120.0" +source = "git+https://github.com/lifthrasiir/rust-encoding#61e331b0820311572fa00a06349b0f02511e810c" +dependencies = [ + "encoding_index_tests 0.1.5 (git+https://github.com/lifthrasiir/rust-encoding)", +] + +[[package]] +name = "encoding-index-singlebyte" +version = "1.20160120.0" +source = "git+https://github.com/lifthrasiir/rust-encoding#61e331b0820311572fa00a06349b0f02511e810c" +dependencies = [ + "encoding_index_tests 0.1.5 (git+https://github.com/lifthrasiir/rust-encoding)", +] + +[[package]] +name = "encoding-index-tradchinese" +version = "1.20141219.6" +source = "git+https://github.com/lifthrasiir/rust-encoding#61e331b0820311572fa00a06349b0f02511e810c" +dependencies = [ + "encoding_index_tests 0.1.5 (git+https://github.com/lifthrasiir/rust-encoding)", +] + +[[package]] +name = "encoding-types" +version = "0.2.0" +source = "git+https://github.com/lifthrasiir/rust-encoding#61e331b0820311572fa00a06349b0f02511e810c" + +[[package]] +name = "encoding_index_tests" +version = "0.1.5" +source = "git+https://github.com/lifthrasiir/rust-encoding#61e331b0820311572fa00a06349b0f02511e810c" + +[[package]] +name = "env_logger" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "log 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)", + "regex 0.1.80 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "fallible-iterator" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "gcc" +version = "0.3.38" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "gdi32-sys" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)", + "winapi-build 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "hex" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "hpack" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "log 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "httparse" +version = "1.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "hyper" +version = "0.9.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "cookie 0.2.5 (registry+https://github.com/rust-lang/crates.io-index)", + "httparse 1.1.2 (registry+https://github.com/rust-lang/crates.io-index)", + "language-tags 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)", + "log 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)", + "mime 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)", + "num_cpus 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)", + "openssl 0.7.14 (registry+https://github.com/rust-lang/crates.io-index)", + "openssl-verify 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)", + "rustc-serialize 0.3.19 (registry+https://github.com/rust-lang/crates.io-index)", + "solicit 0.4.4 (registry+https://github.com/rust-lang/crates.io-index)", + "time 0.1.35 (registry+https://github.com/rust-lang/crates.io-index)", + "traitobject 0.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "typeable 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)", + "unicase 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)", + "url 1.2.3 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "idna" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "matches 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)", + "unicode-bidi 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)", + "unicode-normalization 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "kernel32-sys" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)", + "winapi-build 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "language-tags" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "lazy_static" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "libarchive3-sys" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "libc 0.2.17 (registry+https://github.com/rust-lang/crates.io-index)", + "pkg-config 0.3.8 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "libc" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "libressl-pnacl-sys" +version = "2.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "pnacl-build-helper 1.4.10 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "log" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "matches" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "md5" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "memchr" +version = "0.1.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "libc 0.2.17 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "mime" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "log 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "num" +version = "0.1.36" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "num-integer 0.1.32 (registry+https://github.com/rust-lang/crates.io-index)", + "num-iter 0.1.32 (registry+https://github.com/rust-lang/crates.io-index)", + "num-traits 0.1.36 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "num-integer" +version = "0.1.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "num-traits 0.1.36 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "num-iter" +version = "0.1.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "num-integer 0.1.32 (registry+https://github.com/rust-lang/crates.io-index)", + "num-traits 0.1.36 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "num-traits" +version = "0.1.36" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "num_cpus" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "libc 0.2.17 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "openssl" +version = "0.7.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "bitflags 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)", + "gcc 0.3.38 (registry+https://github.com/rust-lang/crates.io-index)", + "lazy_static 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)", + "libc 0.2.17 (registry+https://github.com/rust-lang/crates.io-index)", + "openssl-sys 0.7.17 (registry+https://github.com/rust-lang/crates.io-index)", + "openssl-sys-extras 0.7.14 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "openssl-sys" +version = "0.7.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "gdi32-sys 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)", + "libc 0.2.17 (registry+https://github.com/rust-lang/crates.io-index)", + "libressl-pnacl-sys 2.1.6 (registry+https://github.com/rust-lang/crates.io-index)", + "pkg-config 0.3.8 (registry+https://github.com/rust-lang/crates.io-index)", + "user32-sys 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "openssl-sys-extras" +version = "0.7.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "gcc 0.3.38 (registry+https://github.com/rust-lang/crates.io-index)", + "libc 0.2.17 (registry+https://github.com/rust-lang/crates.io-index)", + "openssl-sys 0.7.17 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "openssl-verify" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "openssl 0.7.14 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "phf" +version = "0.7.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "phf_shared 0.7.15 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "phf_shared" +version = "0.7.15" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "pkg-config" +version = "0.3.8" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "pnacl-build-helper" +version = "1.4.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "tempdir 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "postgres" +version = "0.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "bufstream 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)", + "fallible-iterator 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)", + "hex 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)", + "log 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)", + "phf 0.7.15 (registry+https://github.com/rust-lang/crates.io-index)", + "postgres-protocol 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "postgres-protocol" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "byteorder 0.5.3 (registry+https://github.com/rust-lang/crates.io-index)", + "fallible-iterator 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)", + "hex 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)", + "md5 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "rand" +version = "0.3.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "libc 0.2.17 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "regex" +version = "0.1.80" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "aho-corasick 0.5.3 (registry+https://github.com/rust-lang/crates.io-index)", + "memchr 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)", + "regex-syntax 0.3.9 (registry+https://github.com/rust-lang/crates.io-index)", + "thread_local 0.2.7 (registry+https://github.com/rust-lang/crates.io-index)", + "utf8-ranges 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "regex-syntax" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "ring" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "lazy_static 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)", + "untrusted 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "rustc-serialize" +version = "0.3.19" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "rustc_version" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "semver 0.1.20 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "semver" +version = "0.1.20" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "solicit" +version = "0.4.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "hpack 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)", + "log 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "strsim" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "tempdir" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "rand 0.3.14 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "term_size" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)", + "libc 0.2.17 (registry+https://github.com/rust-lang/crates.io-index)", + "winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "thread-id" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)", + "libc 0.2.17 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "thread_local" +version = "0.2.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "thread-id 2.0.0 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "time" +version = "0.1.35" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)", + "libc 0.2.17 (registry+https://github.com/rust-lang/crates.io-index)", + "winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "traitobject" +version = "0.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "typeable" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "unicase" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "rustc_version 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "unicode-bidi" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "matches 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "unicode-normalization" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "unicode-segmentation" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "unicode-width" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "untrusted" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "url" +version = "1.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "idna 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)", + "matches 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "user32-sys" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)", + "winapi-build 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "utf8-ranges" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "vec_map" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "winapi" +version = "0.2.8" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "winapi-build" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[metadata] +"checksum aho-corasick 0.5.3 (registry+https://github.com/rust-lang/crates.io-index)" = "ca972c2ea5f742bfce5687b9aef75506a764f61d37f8f649047846a9686ddb66" +"checksum ansi_term 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)" = "23ac7c30002a5accbf7e8987d0632fa6de155b7c3d39d0067317a391e00a2ef6" +"checksum bitflags 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "aad18937a628ec6abcd26d1489012cc0e18c21798210f491af69ded9b881106d" +"checksum bufstream 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "7b48dbe2ff0e98fa2f03377d204a9637d3c9816cd431bfe05a8abbd0ea11d074" +"checksum byteorder 0.5.3 (registry+https://github.com/rust-lang/crates.io-index)" = "0fc10e8cc6b2580fda3f36eb6dc5316657f812a3df879a44a66fc9f0fdbc4855" +"checksum chrono 0.2.25 (registry+https://github.com/rust-lang/crates.io-index)" = "9213f7cd7c27e95c2b57c49f0e69b1ea65b27138da84a170133fd21b07659c00" +"checksum clap 2.17.1 (registry+https://github.com/rust-lang/crates.io-index)" = "27dac76762fb56019b04aed3ccb43a770a18f80f9c2eb62ee1a18d9fb4ea2430" +"checksum cookie 0.2.5 (registry+https://github.com/rust-lang/crates.io-index)" = "0e3d6405328b6edb412158b3b7710e2634e23f3614b9bb1c412df7952489a626" +"checksum encoding 0.3.0-dev (git+https://github.com/lifthrasiir/rust-encoding)" = "" +"checksum encoding-index-japanese 1.20141219.6 (git+https://github.com/lifthrasiir/rust-encoding)" = "" +"checksum encoding-index-korean 1.20141219.6 (git+https://github.com/lifthrasiir/rust-encoding)" = "" +"checksum encoding-index-simpchinese 1.20160120.0 (git+https://github.com/lifthrasiir/rust-encoding)" = "" +"checksum encoding-index-singlebyte 1.20160120.0 (git+https://github.com/lifthrasiir/rust-encoding)" = "" +"checksum encoding-index-tradchinese 1.20141219.6 (git+https://github.com/lifthrasiir/rust-encoding)" = "" +"checksum encoding-types 0.2.0 (git+https://github.com/lifthrasiir/rust-encoding)" = "" +"checksum encoding_index_tests 0.1.5 (git+https://github.com/lifthrasiir/rust-encoding)" = "" +"checksum env_logger 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)" = "15abd780e45b3ea4f76b4e9a26ff4843258dd8a3eed2775a0e7368c2e7936c2f" +"checksum fallible-iterator 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "5d48ab1bc11a086628e8cc0cc2c2dc200b884ac05c4b48fb71d6036b6999ff1d" +"checksum gcc 0.3.38 (registry+https://github.com/rust-lang/crates.io-index)" = "553f11439bdefe755bf366b264820f1da70f3aaf3924e594b886beb9c831bcf5" +"checksum gdi32-sys 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "0912515a8ff24ba900422ecda800b52f4016a56251922d397c576bf92c690518" +"checksum hex 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "d6a22814455d41612f41161581c2883c0c6a1c41852729b17d5ed88f01e153aa" +"checksum hpack 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "3d2da7d3a34cf6406d9d700111b8eafafe9a251de41ae71d8052748259343b58" +"checksum httparse 1.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "46534074dbb80b070d60a5cb8ecadd8963a00a438ae1a95268850a7ef73b67ae" +"checksum hyper 0.9.11 (registry+https://github.com/rust-lang/crates.io-index)" = "edd47c66782933e546a32ae89ca3c49263b2ba9bc29f3a0d5c52fff48e0ac67c" +"checksum idna 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "1053236e00ce4f668aeca4a769a09b3bf5a682d802abd6f3cb39374f6b162c11" +"checksum kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "7507624b29483431c0ba2d82aece8ca6cdba9382bff4ddd0f7490560c056098d" +"checksum language-tags 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "a91d884b6667cd606bb5a69aa0c99ba811a115fc68915e7056ec08a46e93199a" +"checksum lazy_static 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "49247ec2a285bb3dcb23cbd9c35193c025e7251bfce77c1d5da97e6362dffe7f" +"checksum libarchive3-sys 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "3cd3beae8f59a4c7a806523269b5392037577c150446e88d684dfa6de6031ca7" +"checksum libc 0.2.17 (registry+https://github.com/rust-lang/crates.io-index)" = "044d1360593a78f5c8e5e710beccdc24ab71d1f01bc19a29bcacdba22e8475d8" +"checksum libressl-pnacl-sys 2.1.6 (registry+https://github.com/rust-lang/crates.io-index)" = "cbc058951ab6a3ef35ca16462d7642c4867e6403520811f28537a4e2f2db3e71" +"checksum log 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)" = "ab83497bf8bf4ed2a74259c1c802351fcd67a65baa86394b6ba73c36f4838054" +"checksum matches 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)" = "efd7622e3022e1a6eaa602c4cea8912254e5582c9c692e9167714182244801b1" +"checksum md5 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "7df230903ccdffd6b3b4ec21624498ea64c912ce50297846907f0b8e1bb249dd" +"checksum memchr 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)" = "d8b629fb514376c675b98c1421e80b151d3817ac42d7c667717d282761418d20" +"checksum mime 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "b5c93a4bd787ddc6e7833c519b73a50883deb5863d76d9b71eb8216fb7f94e66" +"checksum num 0.1.36 (registry+https://github.com/rust-lang/crates.io-index)" = "bde7c03b09e7c6a301ee81f6ddf66d7a28ec305699e3d3b056d2fc56470e3120" +"checksum num-integer 0.1.32 (registry+https://github.com/rust-lang/crates.io-index)" = "fb24d9bfb3f222010df27995441ded1e954f8f69cd35021f6bef02ca9552fb92" +"checksum num-iter 0.1.32 (registry+https://github.com/rust-lang/crates.io-index)" = "287a1c9969a847055e1122ec0ea7a5c5d6f72aad97934e131c83d5c08ab4e45c" +"checksum num-traits 0.1.36 (registry+https://github.com/rust-lang/crates.io-index)" = "a16a42856a256b39c6d3484f097f6713e14feacd9bfb02290917904fae46c81c" +"checksum num_cpus 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "8890e6084723d57d0df8d2720b0d60c6ee67d6c93e7169630e4371e88765dcad" +"checksum openssl 0.7.14 (registry+https://github.com/rust-lang/crates.io-index)" = "c4117b6244aac42ed0150a6019b4d953d28247c5dd6ae6f46ae469b5f2318733" +"checksum openssl-sys 0.7.17 (registry+https://github.com/rust-lang/crates.io-index)" = "89c47ee94c352eea9ddaf8e364be7f978a3bb6d66d73176572484238dd5a5c3f" +"checksum openssl-sys-extras 0.7.14 (registry+https://github.com/rust-lang/crates.io-index)" = "11c5e1dba7d3d03d80f045bf0d60111dc69213b67651e7c889527a3badabb9fa" +"checksum openssl-verify 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "3ed86cce894f6b0ed4572e21eb34026f1dc8869cb9ee3869029131bc8c3feb2d" +"checksum phf 0.7.15 (registry+https://github.com/rust-lang/crates.io-index)" = "17896951e179a6cbed7d3519b3078ac6c03a347d3e9cf8f303c8a1a73c5a3e44" +"checksum phf_shared 0.7.15 (registry+https://github.com/rust-lang/crates.io-index)" = "bb6c14aac1140c2b06b41477096f249416b17c893d56386a892ac657edfdffba" +"checksum pkg-config 0.3.8 (registry+https://github.com/rust-lang/crates.io-index)" = "8cee804ecc7eaf201a4a207241472cc870e825206f6c031e3ee2a72fa425f2fa" +"checksum pnacl-build-helper 1.4.10 (registry+https://github.com/rust-lang/crates.io-index)" = "61c9231d31aea845007443d62fcbb58bb6949ab9c18081ee1e09920e0cf1118b" +"checksum postgres 0.12.0 (registry+https://github.com/rust-lang/crates.io-index)" = "a7ef92468927003a037e175b54320319e358886865899b37f7318837a646a9fd" +"checksum postgres-protocol 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "7e2fc3d800dacc2dd749b690ad15b9b78bc04c26c3f0525cbe163436559bc3fc" +"checksum rand 0.3.14 (registry+https://github.com/rust-lang/crates.io-index)" = "2791d88c6defac799c3f20d74f094ca33b9332612d9aef9078519c82e4fe04a5" +"checksum regex 0.1.80 (registry+https://github.com/rust-lang/crates.io-index)" = "4fd4ace6a8cf7860714a2c2280d6c1f7e6a413486c13298bbc86fd3da019402f" +"checksum regex-syntax 0.3.9 (registry+https://github.com/rust-lang/crates.io-index)" = "f9ec002c35e86791825ed294b50008eea9ddfc8def4420124fbc6b08db834957" +"checksum ring 0.5.3 (registry+https://github.com/rust-lang/crates.io-index)" = "c0743ef007bcff4909b107907a410418eb7e5c6ad55b843d70b39f62bfb7112e" +"checksum rustc-serialize 0.3.19 (registry+https://github.com/rust-lang/crates.io-index)" = "6159e4e6e559c81bd706afe9c8fd68f547d3e851ce12e76b1de7914bab61691b" +"checksum rustc_version 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)" = "c5f5376ea5e30ce23c03eb77cbe4962b988deead10910c372b226388b594c084" +"checksum semver 0.1.20 (registry+https://github.com/rust-lang/crates.io-index)" = "d4f410fedcf71af0345d7607d246e7ad15faaadd49d240ee3b24e5dc21a820ac" +"checksum solicit 0.4.4 (registry+https://github.com/rust-lang/crates.io-index)" = "172382bac9424588d7840732b250faeeef88942e37b6e35317dce98cafdd75b2" +"checksum strsim 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)" = "50c069df92e4b01425a8bf3576d5d417943a6a7272fbabaf5bd80b1aaa76442e" +"checksum tempdir 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)" = "87974a6f5c1dfb344d733055601650059a3363de2a6104819293baff662132d6" +"checksum term_size 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "3f7f5f3f71b0040cecc71af239414c23fd3c73570f5ff54cf50e03cef637f2a0" +"checksum thread-id 2.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "a9539db560102d1cef46b8b78ce737ff0bb64e7e18d35b2a5688f7d097d0ff03" +"checksum thread_local 0.2.7 (registry+https://github.com/rust-lang/crates.io-index)" = "8576dbbfcaef9641452d5cf0df9b0e7eeab7694956dd33bb61515fb8f18cfdd5" +"checksum time 0.1.35 (registry+https://github.com/rust-lang/crates.io-index)" = "3c7ec6d62a20df54e07ab3b78b9a3932972f4b7981de295563686849eb3989af" +"checksum traitobject 0.0.1 (registry+https://github.com/rust-lang/crates.io-index)" = "07eaeb7689bb7fca7ce15628319635758eda769fed481ecfe6686ddef2600616" +"checksum typeable 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "1410f6f91f21d1612654e7cc69193b0334f909dcf2c790c4826254fbb86f8887" +"checksum unicase 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "13a5906ca2b98c799f4b1ab4557b76367ebd6ae5ef14930ec841c74aed5f3764" +"checksum unicode-bidi 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "c1f7ceb96afdfeedee42bade65a0d585a6a0106f681b6749c8ff4daa8df30b3f" +"checksum unicode-normalization 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "26643a2f83bac55f1976fb716c10234485f9202dcd65cfbdf9da49867b271172" +"checksum unicode-segmentation 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "b905d0fc2a1f0befd86b0e72e31d1787944efef9d38b9358a9e92a69757f7e3b" +"checksum unicode-width 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "2d6722facc10989f63ee0e20a83cd4e1714a9ae11529403ac7e0afd069abc39e" +"checksum untrusted 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "5d9bc0e6e73a10975d1fbff8ac3541e221181b0d8998351600fb5523de634c0d" +"checksum url 1.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "48ccf7bd87a81b769cf84ad556e034541fb90e1cd6d4bc375c822ed9500cd9d7" +"checksum user32-sys 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "4ef4711d107b21b410a3a974b1204d9accc8b10dad75d8324b5d755de1617d47" +"checksum utf8-ranges 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "a1ca13c08c41c9c3e04224ed9ff80461d97e121589ff27c753a16cb10830ae0f" +"checksum vec_map 0.6.0 (registry+https://github.com/rust-lang/crates.io-index)" = "cac5efe5cb0fa14ec2f84f83c701c562ee63f6dcc680861b21d65c682adfb05f" +"checksum winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)" = "167dc9d6949a9b857f3451275e911c3f44255842c1f7a76f33c55103a909087a" +"checksum winapi-build 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "2d315eee3b34aca4797b2da6b13ed88266e6d612562a0c46390af8299fc699bc" diff --git a/indexer/Cargo.toml b/indexer/Cargo.toml new file mode 100644 index 0000000..a15db65 --- /dev/null +++ b/indexer/Cargo.toml @@ -0,0 +1,19 @@ +[package] +name = "indexer" +version = "0.1.0" +authors = ["Yorhel "] + +[dependencies] +regex = "0.1.77" +log = "0.3.6" +env_logger = "0.3.5" +lazy_static = "0.2.1" +libc = "0.2.17" +libarchive3-sys = "0.1.2" +encoding = { git = "https://github.com/lifthrasiir/rust-encoding", features = ["no-optimized-legacy-encoding"] } +ring = "0.5.3" +postgres = "0.12.0" +clap = "2.16.3" +hyper = "0.9.11" +url = "1.2.3" +chrono = "0.2.25" diff --git a/indexer/src/archive.rs b/indexer/src/archive.rs new file mode 100644 index 0000000..c536dcb --- /dev/null +++ b/indexer/src/archive.rs @@ -0,0 +1,343 @@ +use std::str; +use std::ptr; +use std::error::Error as ErrorTrait; +use std::io::{Result,Error,Read}; +use std::ffi::{CStr,CString}; + +use libc::{c_void,ssize_t}; +use libarchive3_sys::ffi; + + +/* This is a safe, limited and opinionated wrapper around the libarchive C bindings. + * I initially used the libarchive crate, but it has several issues. Some of which are not fixable + * without a complete rewrite. + * - Panics on non-UTF8 path names + * - Panics on hard links (PR #6) + * - API is far too flexible, easy to misuse and get panics/segfaults + * - Impossible to correctly read files from an archive (issue #7) + * - Does not provide a convenient Read interface for files + * + * Barring any unexpected behaviour or bugs in libarchive, the API below should not panic or + * segfault for any archive or usage pattern. + */ + +pub struct Archive<'a> { + a: *mut ffi::Struct_archive, + rd: &'a mut Read, + buf: Vec, + err: Option, + eof: bool, +} + + +pub struct ArchiveEntry<'a> { + a: Box>, + e: *mut ffi::Struct_archive_entry, +} + +pub struct RawEntry<'a>(Box>); + + +#[derive(Debug,PartialEq,Eq)] +pub enum FileType { + File, + Directory, + Link(String), + Other, // Also includes Link() +} + + +unsafe extern "C" fn archive_read_cb(_: *mut ffi::Struct_archive, data: *mut c_void, buf: *mut *const c_void) -> ssize_t { + let arch: &mut Archive = &mut *(data as *mut Archive); + *buf = arch.buf.as_mut_ptr() as *mut c_void; + match arch.rd.read(&mut arch.buf[..]) { + Ok(s) => s as ssize_t, + Err(e) => { + let desc = CString::new(e.description()).unwrap(); + let fmt = CString::new("%s").unwrap(); + ffi::archive_set_error(arch.a, e.raw_os_error().unwrap_or(0), fmt.as_ptr(), desc.as_ptr()); + arch.err = Some(e); + -1 + } + } +} + + +impl<'a> Archive<'a> { + fn new(rd: &mut Read, a: *mut ffi::Struct_archive) -> Result> { + let bufsize = 64*1024; + let mut buf = Vec::with_capacity(bufsize); + unsafe { buf.set_len(bufsize) }; + let mut ret = Box::new(Archive { a: a, rd: rd, buf: buf, err: None, eof: false }); + + let aptr: *mut c_void = &mut *ret as *mut Archive as *mut c_void; + let r = unsafe { ffi::archive_read_open(a, aptr, None, Some(archive_read_cb), None) }; + if r == ffi::ARCHIVE_FATAL { + return Err(ret.error()); + } + Ok(ret) + } + + fn error(&mut self) -> Error { + self.err.take().unwrap_or_else(|| { + let err = Error::from_raw_os_error(unsafe { ffi::archive_errno(self.a) }); + let desc = unsafe { ffi::archive_error_string(self.a) }; + if desc.is_null() { + return err; + } + if let Ok(s) = str::from_utf8(unsafe { CStr::from_ptr(desc) }.to_bytes()) { + Error::new(err.kind(), s) + } else { + err + } + }) + } + + fn entry(self: Box) -> Result>> { + let mut ent = ArchiveEntry { + a: self, + e: ptr::null_mut() + }; + ent.a.eof = false; + let res = unsafe { ffi::archive_read_next_header(ent.a.a, &mut ent.e) }; + match res { + ffi::ARCHIVE_EOF => Ok(None), + ffi::ARCHIVE_FATAL => Err(ent.a.error()), + _ => Ok(Some(ent)) + } + } + + fn read(&mut self, buf: &mut [u8]) -> Result { + // libarchive tends to throw an error if you try to read after an EOF; handle that case + // here. + if self.eof { + return Ok(0); + } + let cbuf = buf.as_mut_ptr() as *mut c_void; + let n = unsafe { ffi::archive_read_data(self.a, cbuf, buf.len()) }; + if n >= 0 { + self.eof = n == 0; + Ok(n as usize) + } else { + Err(self.error()) + } + } + + pub fn open_archive(rd: &mut Read) -> Result> { + let a = unsafe { + let a = ffi::archive_read_new(); + ffi::archive_read_support_filter_all(a); + ffi::archive_read_support_format_all(a); + a + }; + try!(Self::new(rd, a)).entry() + } + + pub fn open_raw(rd: &mut Read) -> Result { + let a = unsafe { + let a = ffi::archive_read_new(); + ffi::archive_read_support_filter_all(a); + ffi::archive_read_support_format_raw(a); + ffi::archive_read_support_format_empty(a); + a + }; + let mut a = try!(Self::new(rd, a)); + let mut e: *mut ffi::Struct_archive_entry = ptr::null_mut(); + let res = unsafe { ffi::archive_read_next_header(a.a, &mut e) }; + match res { + ffi::ARCHIVE_FATAL => Err(a.error()), + ffi::ARCHIVE_EOF => { + a.eof = true; + Ok(RawEntry(a)) + }, + _ => Ok(RawEntry(a)) + } + } +} + + +impl<'a> Drop for Archive<'a> { + fn drop(&mut self) { + unsafe { + ffi::archive_read_free(self.a); + } + } +} + + +impl<'a> ArchiveEntry<'a> { + pub fn next(self) -> Result>> { + self.a.entry() + } + + // Returns None in NULL (when does that even happen?) or on invalid UTF-8. + pub fn path(&self) -> Option<&str> { + let c_str: &CStr = unsafe { + let ptr = ffi::archive_entry_pathname(self.e); + if ptr.is_null() { + return None; + } + CStr::from_ptr(ptr) + }; + str::from_utf8(c_str.to_bytes()).ok() + // Perform some simple opinionated normalization. Full normalization might be better, + // but also slower and more complex. This solution covers the most important cases. + .map(|s| s.trim_left_matches('/').trim_left_matches("./").trim_right_matches('/')) + } + + pub fn size(&self) -> usize { + unsafe { ffi::archive_entry_size(self.e) as usize } + } + + fn symlink(&self) -> Option { + let c_str: &CStr = unsafe { + let ptr = ffi::archive_entry_symlink(self.e); + if ptr.is_null() { + return None; + } + CStr::from_ptr(ptr) + }; + str::from_utf8(c_str.to_bytes()).map(str::to_string).ok() + } + + fn hardlink(&self) -> Option { + let c_str: &CStr = unsafe { + let ptr = ffi::archive_entry_hardlink(self.e); + if ptr.is_null() { + return None; + } + CStr::from_ptr(ptr) + }; + // Hard links have the same name as an earlier pathname(), and those typically don't have a + // preceding slash. Add this slash here so that the same resolution logic can be used for + // both hardlinks and symlinks. I really don't care about the difference between these two. + str::from_utf8(c_str.to_bytes()).map(|p| format!("/{}", p)).ok() + } + + pub fn filetype(&self) -> FileType { + // If it has a symlink/hardlink path, then just consider it a link regardless of what + // _filetype() says. + if let Some(l) = self.symlink().or(self.hardlink()) { + return FileType::Link(l); + } + match unsafe { ffi::archive_entry_filetype(self.e) } { + ffi::AE_IFDIR => FileType::Directory, + ffi::AE_IFREG => FileType::File, + _ => FileType::Other, + } + } +} + + +impl<'a> Read for ArchiveEntry<'a> { + fn read(&mut self, buf: &mut [u8]) -> Result { + self.a.read(buf) + } +} + + +impl<'a> Read for RawEntry<'a> { + fn read(&mut self, buf: &mut [u8]) -> Result { + self.0.read(buf) + } +} + + +// We can't provide an Iterator object for ArchiveEntries because Rust doesn't support streaming +// iterators. Let's instead provide a walk function for convenience. +// cb should return Ok(true) to continue, Ok(false) to break +pub fn walk(ent: Option, mut cb: F) -> Result<()> + where F: FnMut(&mut ArchiveEntry) -> Result +{ + let mut ent = ent; + while let Some(mut e) = ent { + if !try!(cb(&mut e)) { + break; + } + ent = try!(e.next()); + } + Ok(()) +} + + + +#[cfg(test)] +mod tests { + use super::*; + use std; + use std::io::Read; + use std::fs::File; + + #[test] + fn invalid() { + let mut r = std::io::repeat(0x0a).take(64*1024); + let ent = Archive::open_archive(&mut r); + assert!(ent.is_err()); + } + + #[test] + fn zerolength() { + let mut r = std::io::empty(); + { + let ent = Archive::open_archive(&mut r); + assert!(ent.unwrap().is_none()); + } + { + let mut ent = Archive::open_raw(&mut r).unwrap(); + let mut v = Vec::new(); + assert_eq!(ent.read_to_end(&mut v).unwrap(), 0); + } + } + + #[test] + fn archive() { + let mut f = File::open("tests/simpletest.tar.gz").unwrap(); + let mut ent = Archive::open_archive(&mut f).unwrap().unwrap(); + + let t = |e:&mut ArchiveEntry, path, size, ft, cont| { + assert_eq!(e.path(), path); + assert_eq!(e.size(), size); + assert_eq!(e.filetype(), ft); + let mut contents = String::new(); + assert_eq!(e.read_to_string(&mut contents).unwrap(), size); + assert_eq!(&contents, cont); + }; + + t(&mut ent, Some("simple"), 0, FileType::Directory, ""); + + ent = ent.next().unwrap().unwrap(); + t(&mut ent, Some("simple/file"), 3, FileType::File, "Hi\n"); + + ent = ent.next().unwrap().unwrap(); + t(&mut ent, Some("simple/link"), 0, FileType::Link("file".to_string()), ""); + + ent = ent.next().unwrap().unwrap(); + t(&mut ent, Some("simple/hardlink"), 0, FileType::Link("/simple/file".to_string()), ""); + + ent = ent.next().unwrap().unwrap(); + t(&mut ent, Some("simple/fifo"), 0, FileType::Other, ""); + + ent = ent.next().unwrap().unwrap(); + t(&mut ent, None, 0, FileType::File, ""); + + assert!(ent.next().unwrap().is_none()); + } + + #[test] + fn raw() { + let mut f = File::open("tests/rawtest.gz.xz.bzip2").unwrap(); + let mut r = Archive::open_raw(&mut f).unwrap(); + let mut c = String::new(); + r.read_to_string(&mut c).unwrap(); + assert_eq!(&c, "File contents!\n"); + } + + #[test] + fn raw_passthrough() { + let mut r = std::io::Cursor::new(&b"This is an uncompressed text file"[..]); + let mut ent = Archive::open_raw(&mut r).unwrap(); + let mut s = String::new(); + ent.read_to_string(&mut s).unwrap(); + assert_eq!(&s, "This is an uncompressed text file"); + } +} diff --git a/indexer/src/archread.rs b/indexer/src/archread.rs new file mode 100644 index 0000000..22086f8 --- /dev/null +++ b/indexer/src/archread.rs @@ -0,0 +1,363 @@ +use std::io::Result; +use std::collections::HashMap; + +use archive::{walk,ArchiveEntry,FileType}; + +/* I had hoped that reading man pages from an archive would just be a simple: + * + * 1. Walk through all files in the archive in a streaming fashion + * 2. Parse/index man pages + * + * But alas, it was not to be. Symlinks and hardlinks have ruined it. Now we have to... + * + * 1. Walk through all entries in the archive in a streaming fashion + * 2. Parse/index regular file man pages + * 3. Keep track of all paths in the archive + * 4. Use the result of step (3) to resolve symlinks/hardlinks to their actual file + * 5. Read the entire damn archive again if one of the links resolved to a file that was not + * recognized as a man page in step (2). Luckily, this isn't very common. + * + * And this doesn't even cover the problem of duplicate entries in a tar, which is also quite + * annoying to handle. + * + * What annoys me the most about all of this is that it's not possible to stream an archive from + * the network and read/index the entire thing in a single step. Now we either have to buffer + * packages to disk or redownload the archive in order to be able to follow all links to man pages. + * + * (Note that it is possible to resolve links while walking through the entries, which will allow + * us to match files found later in the archive against links found earlier, thus potentially + * saving the need to read the archive a second time. This is merely a performance improvement for + * an uncommon case, and it certainly won't simplify the code) + * + * (Note that it's also possible to just flush all files <10MB* to disk to completely avoid the + * need for a second archive read, but that's going to significantly slow down the common case in + * order to handle a rare case. It's possible to further optimize this using some heuristics to + * determine whether a file is potentially a man page, but that's both complex and may not even + * save much) + * + * (* So apparently some man pages are close to 10MB...) + */ + + +#[derive(Clone,Debug,PartialEq,Eq)] +pub enum EntryType { + // Regular file that has been handled/indexed + Handled, + // Regular file that hasn't been handled because the caller wasn't interested in it. Could + // still be an interesting file if it is referenced from an interesting path. + Regular, + // Link to another file (interesting or not is irrelevant) + Link(String), + // Directory; need this information when resolving links + Directory, + // Something that couldn't be an interesting file (chardev/socket/etc); If any link resolves to + // this we know we're done. + Other, +} + +pub struct FileList { + // List of seen files. This is used to resolve links + seen: HashMap, + // List of interesting links + links: Vec, +} + +pub struct MissedFiles(HashMap>); + + +impl FileList { + + /* Read an archive until the end. Accepts two callbacks: + * + * interest_cb: Called on every path in the archive, should return whether the file is + * interesting (i.e. whether we want to know its contents). + * file_cb: Called on every regular file for which interest_cb() showed an interest. + * The callback accepts multiple path names, but this function will only provide one. + * + * Returns a FileList struct that can be used to retreive all interesting non-regular files. + */ + pub fn read(ent: Option, interest_cb: F, mut file_cb: G) -> Result + where F: Fn(&str) -> bool, G: FnMut(&[&str], &mut ArchiveEntry) -> Result<()> + { + let mut fl = FileList { + seen: HashMap::new(), + links: Vec::new(), + }; + + try!(walk(ent, |mut e| { + let path = match e.path() { + Some(x) => x.to_string(), + None => { warn!("Invalid UTF-8 filename in archive"); return Ok(true) } + }; + let ft = e.filetype(); + trace!("Archive entry: {:10} {} {:?}", e.size(), path, ft); + + // We ought to throw away the result of the previous entry with the same name and use + // this new entry instead, but fuck it. This case is too rare, so let's just warn. + if let Some(_) = fl.seen.get(&path) { + warn!("Duplicate file entry: {}", path); + return Ok(true); + } + + let et = match ft { + FileType::File => { + if interest_cb(&path) { + let pathv = [&path as &str]; + try!(file_cb(&pathv[..], &mut e)); + EntryType::Handled + } else { + EntryType::Regular + } + }, + FileType::Link(l) => { + if interest_cb(&path) { + fl.links.push(path.clone()); + } + EntryType::Link(l) + }, + FileType::Directory => EntryType::Directory, + FileType::Other => EntryType::Other, + }; + + fl.seen.insert(path, et); + Ok(true) + })); + Ok(fl) + } + + + // This is basically realpath(), using the virtual filesystem in self.seen. + // This method is not particularly efficient, it allocates like crazy. + fn resolve_link(&self, base: &str, path: &str, depth: usize) -> Option<(EntryType, Vec)> { + if depth < 1 { + warn!("Unresolved link: {} -> {}; Recursion depth exceeded", base, path); + return None + } + + // Remove filename from the base + let basedir = if let Some(i) = base.rfind('/') { base.split_at(i).0 } else { return None }; + + let comp : Vec<&str> = + if path.starts_with('/') { path.split('/').collect() } + else { basedir.split('/').chain(path.split('/')).collect() }; + + let mut dest = Vec::new(); + + for (i, &c) in comp.iter().enumerate() { + if c == "" || c == "." { + continue; + } + if c == ".." { + if dest.len() > 1 { + dest.pop(); + } + continue; + } + dest.push(c.to_string()); + let curpath = dest.join("/"); + match self.seen.get(&curpath) { + + // If it's a directory, we're good + Some(&EntryType::Directory) => (), + + // If it's a file or man page, it must be the last item. + Some(& ref x@ EntryType::Regular) | + Some(& ref x@ EntryType::Handled) => return + if i == comp.len()-1 { + Some((x.clone(), dest)) + } else { + warn!("Unresolved link: {} -> {}; Non-directory component", base, path); + None + }, + + // Links... Ugh + Some(&EntryType::Link(ref d)) => { + match self.resolve_link(&curpath, &d, depth-1) { + // Same as above, with dirs we can continue, files have to be last + Some((EntryType::Directory, d)) => dest = d, + x@Some((EntryType::Regular, _)) | + x@Some((EntryType::Handled, _)) => return + if i == comp.len()-1 { x } + else { + warn!("Unresolved link: {} -> {}; Non-directory link component", base, path); + None + }, + _ => return None, + } + }, + + // Don't care about anything else, just stop. + _ => { + warn!("Unresolved link: {} -> {}; Special or non-existing file", base, path); + return None + } + } + } + Some((EntryType::Directory, dest)) + } + + /* Calls cb() on every 'interesting' link to a file that has already been passed to a file_cb() + * in FileList::read(). + * If there are any interesting links that have not yet been passed to file_cb(), a MissedFiles + * struct is returned that can be used to retrieve those files by re-reading the archive. + */ + pub fn links(self, mut cb: F) -> Option where F: FnMut(&str, &str) { + let mut missed = HashMap::new(); + + for p in self.links.iter() { + let dest = match self.seen.get(p) { Some(&EntryType::Link(ref x)) => x, _ => unreachable!() }; + + match self.resolve_link(&p, dest, 32) { + Some((EntryType::Handled, d)) => { + let dstr = d.join("/"); + cb(&p, &dstr); + }, + Some((EntryType::Regular, d)) => { + let dstr = d.join("/"); + missed.entry(dstr).or_insert_with(Vec::new).push(p.to_string()); + } + _ => (), + } + } + + if missed.len() > 0 { + Some(MissedFiles(missed)) + } else { + None + } + } +} + + +impl MissedFiles { + /* Reads the archive again and calls file_cb() on every interesting file that was missed during + * the first read of the archive (using FileList::{read,links}). file_cb is exactly the same as + * in FileList::read, but this time it can actually get multiple paths as first argument; which + * happens when multiple interesting links point to the same file. */ + pub fn read(mut self, ent: Option, mut file_cb: G) -> Result<()> + where G: FnMut(&[&str], &mut ArchiveEntry) -> Result<()> + { + walk(ent, |mut e| { + if let Some(f) = e.path().and_then(|p| self.0.remove(p)) { + let v: Vec<&str> = f.iter().map(|x| x as &str).collect(); + try!(file_cb(&v, &mut e)) + } + Ok(self.0.len() > 0) + }) + } +} + + +#[cfg(test)] +mod tests { + use super::*; + use archive::Archive; + use std::io::Read; + use std::fs::File; + + fn test_read() -> FileList { + let mut f = File::open("tests/testarchive.tar.xz").unwrap(); + let arch = Archive::open_archive(&mut f).unwrap(); + let mut cnt = 0; + FileList::read(arch, + |p| p.starts_with("man/man"), + |p,e| { + assert_eq!(cnt, 0); + cnt += 1; + assert_eq!(p, &["man/man3/helloworld.3"][..]); + assert_eq!(e.size(), 12); + + let mut cont = String::new(); + e.read_to_string(&mut cont).unwrap(); + assert_eq!(&cont, "Hello World\n"); + Ok(()) + } + ).unwrap() + } + + fn test_resolve_links(r: &FileList) { + let res = |p| { + if let Some(&EntryType::Link(ref l)) = r.seen.get(p) { + r.resolve_link(p, &l, 5) + } else { + panic!("Not found or not a link: {}", p); + } + }; + let helloworld = Some((EntryType::Handled, vec!["man".to_string(), "man3".to_string(), "helloworld.3".to_string()])); + + assert_eq!(res("man/mans"), Some((EntryType::Directory, vec!["man".to_string(), "man3".to_string()]))); + assert_eq!(res("man/man6/hardlink.6"), helloworld); + assert_eq!(res("man/man1/symlinkbefore.1"), helloworld); + assert_eq!(res("man/man6/symlinkafter.6"), helloworld); + + assert_eq!(res("man/man1/badsymlink1.1"), None); + assert_eq!(res("man/man1/badsymlink2.1"), None); + assert_eq!(res("man/man1/badsymlink3.1"), None); + assert_eq!(res("man/man1/badsymlink4.1"), None); + assert_eq!(res("man/man1/badsymlink5.1"), None); + + assert_eq!(res("man/man1/doublesymlink1.1"), helloworld); + assert_eq!(res("man/man1/doublesymlink2.1"), helloworld); + assert_eq!(res("man/man1/triplesymlink.1"), helloworld); + assert_eq!(res("man/man1/infinitesymlink.1"), None); + } + + fn test_links(r: FileList) -> Option { + let mut links = Vec::new(); + let missed = r.links(|p,d| links.push((p.to_string(), d.to_string()))); + links.sort(); + + { + let mut res = |p:&str| { + let r = links.remove(0); + assert_eq!(r.0, p.to_string()); + assert_eq!(r.1, "man/man3/helloworld.3".to_string()); + }; + res("man/man1/doublesymlink1.1"); + res("man/man1/doublesymlink2.1"); + res("man/man1/symlinkbefore.1"); + res("man/man1/triplesymlink.1"); + res("man/man6/hardlink.6"); + res("man/man6/symlinkafter.6"); + } + assert_eq!(links.len(), 0); + missed + } + + fn test_reread(r: MissedFiles) { + let mut f = File::open("tests/testarchive.tar.xz").unwrap(); + let ent = Archive::open_archive(&mut f).unwrap(); + let mut files = Vec::new(); + r.read(ent, + |p,e| { + let mut cont = String::new(); + e.read_to_string(&mut cont).unwrap(); + files.extend(p.iter().map(|x| (x.to_string(), cont.clone()) )); + Ok(()) + } + ).unwrap(); + files.sort(); + + { + let mut res = |a:&str, b:&str| { + let r = files.remove(0); + assert_eq!(&r.0, a); + assert_eq!(&r.1, b); + }; + res("man/man3/needreread.3", "Potentially interesting file\n"); + res("man/man6/needreread.6", "Potentially interesting file\n"); + } + assert_eq!(files.len(), 0); + } + + #[test] + fn test_reader() { + //use env_logger; + //env_logger::init().unwrap(); + + let r = test_read(); + test_resolve_links(&r); + let l = test_links(r).unwrap(); + test_reread(l); + } +} diff --git a/indexer/src/main.rs b/indexer/src/main.rs new file mode 100644 index 0000000..1083559 --- /dev/null +++ b/indexer/src/main.rs @@ -0,0 +1,95 @@ +#[macro_use] extern crate log; +#[macro_use] extern crate lazy_static; +#[macro_use] extern crate clap; +extern crate env_logger; +extern crate regex; +extern crate libarchive3_sys; +extern crate libc; +extern crate ring; +extern crate encoding; +extern crate postgres; +extern crate hyper; +extern crate url; +extern crate chrono; + +mod archive; +mod archread; +mod man; +mod open; +mod pkg; +mod sys_arch; + + +// Convenience function to get a system id by short-name. Panics if the system doesn't exist. +fn sysbyshort(conn: &postgres::GenericConnection, short: &str) -> i32 { + let r = conn.query("SELECT id FROM systems WHERE short = $1", &[&short]).unwrap(); + if r.is_empty() { + panic!("Invalid system: {}", short); + } + r.get(0).get(0) +} + + +fn main() { + let arg = clap_app!(indexer => + (about: "Manned.org man page indexer") + (@arg v: -v +multiple "Increase verbosity") + (@subcommand pkg => + (about: "Index a single package") + (@arg force: --force "Overwrite existing indexed package") + (@arg sys: --sys +required +takes_value "System short-name") + (@arg cat: --cat +required +takes_value "Package category") + (@arg pkg: --pkg +required +takes_value "Package name") + (@arg ver: --ver +required +takes_value "Package version") + (@arg date: --date +required +takes_value "Package release date") + (@arg FILE: +required "Package file") + ) + (@subcommand arch => + (about: "Index an Arch Linux repository") + (@arg sys: --sys +required +takes_value "System short-name") + (@arg mirror: --mirror +required +takes_value "Mirror URL") + (@arg repo: --repo +required +takes_value "Repository name") + ) + ).get_matches(); + + let verbose = arg.occurrences_of("v"); + env_logger::LogBuilder::new() + .filter(Some("indexer"), match verbose { + 0 => log::LogLevelFilter::Warn, + 1 => log::LogLevelFilter::Info, + 2 => log::LogLevelFilter::Debug, + _ => log::LogLevelFilter::Trace, + }) + .filter(Some("postgres"), if verbose >= 4 { log::LogLevelFilter::Trace } else { log::LogLevelFilter::Info }) + .init().unwrap(); + + let dbhost = match std::env::var("MANNED_PG") { + Ok(x) => x, + Err(_) => { error!("MANNED_PG not set."); return } + }; + let db = match postgres::Connection::connect(&dbhost[..], postgres::TlsMode::None) { + Ok(x) => x, + Err(x) => { error!("Can't connect to postgres: {}", x); return }, + }; + debug!("Connected to database"); + + if let Some(matches) = arg.subcommand_matches("pkg") { + pkg::pkg(&db, pkg::PkgOpt { + force: matches.is_present("force"), + sys: sysbyshort(&db, matches.value_of("sys").unwrap()), + cat: matches.value_of("cat").unwrap(), + pkg: matches.value_of("pkg").unwrap(), + ver: matches.value_of("ver").unwrap(), + date: matches.value_of("date").unwrap(), + file: open::Path{ path: matches.value_of("FILE").unwrap(), cache: false, canbelocal: true}, + }); + } + + if let Some(matches) = arg.subcommand_matches("arch") { + sys_arch::sync(&db, + sysbyshort(&db, matches.value_of("sys").unwrap()), + matches.value_of("mirror").unwrap(), + matches.value_of("repo").unwrap() + ); + } +} diff --git a/indexer/src/man.rs b/indexer/src/man.rs new file mode 100644 index 0000000..9bcb2bf --- /dev/null +++ b/indexer/src/man.rs @@ -0,0 +1,301 @@ +use std::str; +use std::io; +use std::io::Read; +use regex::bytes; +use regex::Regex; +use encoding; +use encoding::{all,EncodingRef}; +use encoding::label::encoding_from_whatwg_label; +use ring::digest; + +use archive::Archive; + +// Anything larger than this just isn't a man page. I hope. +const MAX_MAN_SIZE: u64 = 20*1024*1024; +// I've also not seen valid man pages smaller than this +const MIN_MAN_SIZE: u64 = 9; + + +// Checks a path for a man page candidate. Returns None if it doesn't seem like a man page +// location, otherwise Some((manPageName, Section, Locale)). +pub fn parse_path(path: &str) -> Option<(&str, &str, &str)> { + // Roughly: man[/locale]/man1/manpage.section[.compression]+ + lazy_static! { + static ref RE: Regex = Regex::new(r"(?x) + man + (?: / ([^/]+) )? # Optional locale + /man[a-z0-9]/ # Subdir + ([^/]+?) # Man page name (non-greedy) + \. ([^/\.]+) # Section + (?: \. (?: gz|lzma|bz2|xz ))* $ # Any number of compression extensions + ").unwrap(); + } + + let cap = match RE.captures(path) { Some(x) => x, None => return None }; + let locale = cap.at(1).unwrap_or(""); + let name = cap.at(2).unwrap(); + let section = cap.at(3).unwrap(); + + // Not everything matching the regex is necessarily a man page, exclude some special cases. + match (name, section, locale) { + // Files that totally aren't man pages + ("Makefile", "am", _) | + (".cvsignore", _, _) | + (_, "in", _) | + (_, "gz", _) | + (_, "lzma", _) | + (_, "bz2", _) | + (_, "xz", _) | + (_, "html", _) => None, + // Some weird directories that happen to match the locale + (n, s, "5man") | + (n, s, "c") | + (n, s, "man1") | + (n, s, "man2") | + (n, s, "man3") | + (n, s, "man4") | + (n, s, "man5") | + (n, s, "man6") | + (n, s, "man7") | + (n, s, "man8") | + (n, s, "Man-Part1") | + (n, s, "Man-Part2") => Some((n, s, "")), + // Nothing special! + x => Some(x) + } +} + + +// Convenient wrapper for archread's interest_cb +pub fn ismanpath(path: &str) -> bool { + parse_path(path).is_some() +} + + +fn validate(data: &Vec) -> Option<&'static str> { + lazy_static! { + static ref HTML: bytes::Regex = bytes::Regex::new(r"^\s*<(?:html|head|!DOCTYPE)").unwrap(); + } + + if data.len() >= MAX_MAN_SIZE as usize { + Some("File too large") + } else if data.len() < MIN_MAN_SIZE as usize { + Some("File too small") + } else if &data[..] == &b".so man3/\n"[..] { + Some("Contents: '.so man3/'") + } else if &data[..] == &b"timestamp\n"[..] { + Some("Contents: 'timestamp'") + } else if HTML.is_match(&data) { + Some("Looks like an HTML file") + } else { + None + } +} + + +// Look for 'coding:' indications in the file header, a la preconv(1). +fn codec_from_tag(data: &Vec) -> Option { + lazy_static! { + // According to the emacs docs the tag should be on the first line; according to preconv(1) + // it should be on the first or second line. I've also seen some files with the tag on the + // last line. I've not seen the tag itself used in a different context, so just get it from + // anywhere... + static ref TAG: bytes::Regex = bytes::Regex::new(r"-\*-.*coding:\s*(?u:([^\s;]+)).*-\*").unwrap(); + } + let cap = match TAG.captures(&data) { Some(x) => x, None => return None }; + let tag = str::from_utf8(cap.at(1).unwrap()).unwrap().to_lowercase(); + + match &tag[..] { + // Deny some common UTF-8-compatible encodings. These tags are obviously incorrect. + "us-ascii" | "ascii" | "utf8" | "utf-8" | "utf-8-unix" => None, + + // latin-1 isn't in the whatwg spec under that name + "latin-1" => Some(all::WINDOWS_1252), + + // armscii isn't in the whatwg spec at all + "armscii-8" => Some(all::ARMSCII_8), + + // Anything else should be found by its whatwg label. + x => match encoding_from_whatwg_label(x) { + Some(x) => Some(x), + None => { warn!("Unknown encoding in emacs tag: {}", x); None }, + } + } +} + + +fn codec_from_path(path: &str) -> Option { + let locale = match parse_path(path) { + Some((_,_,l)) if l != "" => l.to_lowercase(), + _ => return None, + }; + + lazy_static! { + static ref RE: Regex = Regex::new(r"^(?x) + ([a-z]+) # primary language + (?:_ ([a-z]+))? # secondary language + (?:@ [a-z]+)? # script (potentially useful, but uncommon and not currently used) + (?:\. ([^\.@]+))? # encoding (FUCKING USEFUL) + $").unwrap(); + } + + let cap = match RE.captures(&locale) { Some(x) => x, None => return None }; + let lang = cap.at(1).unwrap(); + let seclang = cap.at(2); + let enc = cap.at(3); + + // Try to do something with the encoding tag + match (lang, enc) { + (_, Some("eucjp")) | + (_, Some("ujis")) | // Not sure about this one, but it seems to come out alright + ("ja", Some("euc")) => return Some(all::EUC_JP), + + (_, Some("euckr")) => return Some(all::WINDOWS_949), + + ("ja", Some("jis7")) | + ("ja", Some("pck")) => return None, /* WAT? TODO: DO SOMETHING WITH THESE */ + + (_, Some(x)) => match encoding_from_whatwg_label(x) { + Some(x) => return Some(x), + _ => { warn!("Unknown encoding in locale: {}", x) }, + }, + _ => {}, + }; + + // Fall back to language + match (lang, seclang) { + ("pl", _) | + ("cs", _) | + ("hr", _) | + ("hu", _) | + ("sl", _) | + ("sk", _) => Some(all::ISO_8859_2), + ("bg", _) | + ("be", _) | + ("uk", _) => Some(all::ISO_8859_5), + ("el", _) => Some(all::ISO_8859_7), + ("et", _) => Some(all::ISO_8859_15), + ("tr", _) => Some(all::WINDOWS_1254), + ("ru", _) => Some(all::KOI8_R), + ("ja", _) | + ("jp", _) => Some(all::EUC_JP), // Tricky; but JIS is certainly less common + ("zh", Some("cn")) => Some(all::GBK), // These are based purely on what I've observed, + ("zh", _) => Some(all::BIG5_2003), // perhaps some heuristics based on contents can do better + ("ko", _) => Some(all::WINDOWS_949), + (_, _) => None, + } +} + + +// Decompresses / decodes a man page and returns its SHA-1 hash, encoding name, and UTF-8 contents. +pub fn decode(paths: &[&str], ent: &mut Read) -> io::Result<(digest::Digest,&'static str,String)> { + let mut decomp = try!(Archive::open_raw(ent)).take(MAX_MAN_SIZE+1); + let mut data = Vec::new(); + try!(decomp.read_to_end(&mut data)); + + if let Some(e) = validate(&data) { + return Err(io::Error::new(io::ErrorKind::InvalidData, e)); + } + + let dig = digest::digest(&digest::SHA1, &data); + + // TODO: Handle BOM? UTF-16? + // If it passes as UTF-8, then just consider it UTF-8. + if let Ok(_) = str::from_utf8(&data) { + return Ok((dig, "utf8", unsafe { String::from_utf8_unchecked(data) } )); + } + // Otherwise, look for a coding tag in the contents + if let Some(e) = codec_from_tag(&data) { + if let Ok(s) = e.decode(&data, encoding::DecoderTrap::Strict) { + return Ok((dig, e.name(), s)); + } + } + // If that fails as well, look for clues in the file path. + for path in paths { + if let Some(e) = codec_from_path(path) { + if let Ok(s) = e.decode(&data, encoding::DecoderTrap::Strict) { + return Ok((dig, e.name(), s)); + } + } + } + // If all else fails, use a lossy iso-8859-1 + Ok((dig, "iso-8859-1", (all::ISO_8859_1 as EncodingRef).decode(&data, encoding::DecoderTrap::Ignore).unwrap() )) +} + + + + +#[test] +fn test_parse_path() { + // Generic tests + assert_eq!(parse_path("/"), None); + assert_eq!(parse_path("/man1/ncdu.1"), None); + assert_eq!(parse_path("/man/man?/ncdu.1"), None); + assert_eq!(parse_path("/man/man1/ncdu.1"), Some(("ncdu", "1", ""))); + assert_eq!(parse_path("/man/man1/ncdu.1.gz.lzma.xz.bz2.gz"), Some(("ncdu", "1", ""))); // This stuff happens + assert_eq!(parse_path("/man/en_US.UTF-8/man1/ncdu.1"), Some(("ncdu", "1", "en_US.UTF-8"))); + + // Special cases + assert_eq!(parse_path("/usr/share/man/man1/INDEX"), None); + assert_eq!(parse_path("/usr/share/man/man1/Makefile"), None); + assert_eq!(parse_path("/usr/share/man/man1/Makefile.am"), None); + assert_eq!(parse_path("/usr/share/man/man1/Makefile.in"), None); + assert_eq!(parse_path("/usr/share/man/man1/.cvsignore"), None); + assert_eq!(parse_path("/usr/share/man/man1/.cvsignore.gz"), None); + + // Some actual locations + assert_eq!(parse_path("/usr/local/man/man1/list_audio_tracks.1.gz"), Some(("list_audio_tracks", "1", ""))); + assert_eq!(parse_path("/usr/local/lib/perl5/site_perl/man/man3/DBIx::Class::Helper::ResultSet::DateMethods1::Announcement.3.gz"), Some(("DBIx::Class::Helper::ResultSet::DateMethods1::Announcement", "3", ""))); + assert_eq!(parse_path("/usr/man/man3/exit.3tk"), Some(("exit", "3tk", ""))); + assert_eq!(parse_path("/usr/local/brlcad/share/man/mann/exit.nged.gz"), Some(("exit", "nged", ""))); + assert_eq!(parse_path("/usr/X11R6/man/man3/intro.3xglut.gz"), Some(("intro", "3xglut", ""))); + assert_eq!(parse_path("/usr/local/share/man/ko_KR.eucKR/man3/intro.3.gz"), Some(("intro", "3", "ko_KR.eucKR"))); + + assert_eq!(parse_path("/usr/lib/scilab/man/Man-Part1/man1/ans.1"), Some(("ans", "1", ""))); + assert_eq!(parse_path("/heirloom/usr/share/man/5man/man1/chgrp.1.gz"), Some(("chgrp", "1", ""))); + + assert_eq!(parse_path("/usr/local/plan9/man/man8/index.html"), None); + assert_eq!(parse_path("/usr/local/share/doc/gmt/html/man/grdpaste.html"), None); +} + + +#[test] +fn test_codec_from_path() { + let t = |p,n| { + assert_eq!(codec_from_path(p).unwrap().name(), n); + }; + t("man/de_DE.ISO8859-15/man1/scribus.1.gz", "iso-8859-15"); + t("man/de_DE.ISO_8859-1/man1/scribus.1.gz", "windows-1252"); + t("man/ja.UTF-8/man1/test.1", "utf-8"); + t("man/ja_JP/man1/test.1", "euc-jp"); + t("man/ja_JP.EUC/man1/test.1", "euc-jp"); + t("man/ja_JP.SJIS/man1/test.1", "windows-31j"); + t("man/jp.eucJP/man1/test.1", "euc-jp"); + t("man/jp/man1/test.1", "euc-jp"); + t("man/lt.ISO8859-13/man1/test.1", "iso-8859-13"); + t("man/ru/man1/test.1", "koi8-r"); + t("man/ru_RU@Cyr/man1/test.1", "koi8-r"); + t("man/zh_CN/man1/test.1", "gbk"); + t("man/zh_TW/man1/test.1", "big5-2003"); +} + + +#[test] +fn test_decode_zh() { + use std::fs::File; + use ring::test::from_hex; + + // cat exit.1.gz | lzma -d | gzip -d | sha1sum + let filehash = from_hex("cdf9b3e8d96a83c908eb0a0c277485e2f3eebe87").unwrap(); + // cat exit.1.gz | lzma -d | gzip -d | iconv -f gbk -t utf8 | sha1sum + let utf8hash = from_hex("47f3e441137b207c0abdc38adac692298da4927a").unwrap(); + + let mut f = File::open("tests/exit.3.gz.lzma").unwrap(); + let (dig, enc, s) = decode(&["bullshit", "/usr/share/man/zh_CN/man3/exit.3.gz"][..], &mut f).unwrap(); + + assert_eq!(dig.as_ref(), &filehash[..]); + assert_eq!(enc, "gbk"); + + let utf8dig = digest::digest(&digest::SHA1, s.as_bytes()); + assert_eq!(utf8dig.as_ref(), &utf8hash[..]); +} diff --git a/indexer/src/open.rs b/indexer/src/open.rs new file mode 100644 index 0000000..6919fc4 --- /dev/null +++ b/indexer/src/open.rs @@ -0,0 +1,82 @@ +use std::io::{Read,Result,Error,ErrorKind,copy}; +use std::fs::{File,create_dir_all,metadata}; +use std::hash::{Hash,Hasher,SipHasher}; +use std::time::{Duration,SystemTime}; +use url::Url; +use hyper; + + +const CACHE_PATH: &'static str = "/var/tmp/manned-indexer"; +const CACHE_TIME: u64 = 24*3600; + + +pub struct Path<'a> { + pub path: &'a str, + pub cache: bool, + pub canbelocal: bool, +} + + +fn cache_fn(url: &Url) -> String { + let name = url.path_segments().unwrap().last().unwrap(); + let name = if name == "" { "index" } else { name }; + + let mut hash = SipHasher::new(); + url.hash(&mut hash); + format!("{}/{}-{}-{:x}", CACHE_PATH, url.host_str().unwrap(), name, hash.finish()) +} + + +fn fetch(url: &str) -> Result> { + let res = try!(hyper::Client::new() + .get(url) + .header(hyper::header::UserAgent("Man page crawler (info@manned.org; https://manned.org/)".to_owned())) + .send() + .map_err(|e| Error::new(ErrorKind::Other, format!("Hyper: {}", e))) + ); + if !res.status.is_success() { + return Err(Error::new(ErrorKind::Other, format!("HTTP: {}", res.status) )); + } + Ok(Box::new(res) as Box) +} + + +fn file(path: &str) -> Result> { + Ok(Box::new(try!(File::open(path))) as Box) +} + + +impl<'a> Path<'a> { + pub fn open(&self) -> Result> { + if let Ok(url) = Url::parse(self.path) { + if url.scheme() != "http" && url.scheme() != "https" { + return Err(Error::new(ErrorKind::Other, "Invalid scheme")); + } + + if self.cache { + let cfn = cache_fn(&url); + if let Ok(m) = metadata(&cfn) { + if m.modified().unwrap() > SystemTime::now() - Duration::from_secs(CACHE_TIME) { + return file(&cfn); + } + } + try!(create_dir_all(CACHE_PATH)); + { + let mut rd = try!(fetch(url.as_str())); + let mut wr = try!(File::create(&cfn)); + try!(copy(&mut rd, &mut wr)); + } + file(&cfn) + + } else { + fetch(url.as_str()) + } + + } else if self.canbelocal { + file(self.path) + + } else { + Err(Error::new(ErrorKind::Other, "Invalid URL")) + } + } +} diff --git a/indexer/src/pkg.rs b/indexer/src/pkg.rs new file mode 100644 index 0000000..4d3379d --- /dev/null +++ b/indexer/src/pkg.rs @@ -0,0 +1,142 @@ +use std; +use std::io::Read; +use postgres; + +use open; +use archread; +use man; +use archive::{Archive,ArchiveEntry}; + +pub struct PkgOpt<'a> { + pub force: bool, + pub sys: i32, + pub cat: &'a str, + pub pkg: &'a str, + pub ver: &'a str, + pub date: &'a str, // TODO: Option to extract date from package metadata itself + pub file: open::Path<'a> +} + + +fn insert_pkg(tr: &postgres::transaction::Transaction, opt: &PkgOpt) -> Option { + // The ON CONFLICT .. DO UPDATE is used instead of DO NOTHING because in that case the + // RETURNING clause wouldn't give us a package id. + let q = "INSERT INTO packages (system, category, name) VALUES($1, $2, $3) + ON CONFLICT ON CONSTRAINT packages_system_name_category_key DO UPDATE SET name=$3 RETURNING id"; + let pkgid: i32 = match tr.query(q, &[&opt.sys, &opt.cat, &opt.pkg]) { + Err(e) => { + error!("Can't insert package in database: {}", e); + return None; + }, + Ok(r) => r.get(0).get(0), + }; + + let q = "SELECT id FROM package_versions WHERE package = $1 AND version = $2"; + let res = tr.query(q, &[&pkgid, &opt.ver]).unwrap(); + + let verid : i32; + if res.is_empty() { + let q = "INSERT INTO package_versions (package, version, released) VALUES($1, $2, $3::text::date) RETURNING id"; + verid = tr.query(q, &[&pkgid, &opt.ver, &opt.date]).unwrap().get(0).get(0); + info!("New package pkgid {} verid {}", pkgid, verid); + Some(verid) + + } else if opt.force { + verid = res.get(0).get(0); + info!("Overwriting package pkgid {} verid {}", pkgid, verid); + tr.query("DELETE FROM man WHERE package = $1", &[&verid]).unwrap(); + Some(verid) + + } else { + info!("Package already in database, pkgid {} verid {}", pkgid, res.get(0).get::(0)); + None + } +} + + +fn insert_man_row(tr: &postgres::GenericConnection, verid: i32, path: &str, hash: &[u8]) { + // TODO: Store 'encoding' in the database + let (name, sect, locale) = man::parse_path(path).unwrap(); + if let Err(e) = tr.execute( + "INSERT INTO man (package, name, filename, locale, hash, section) VALUES ($1, $2, '/'||$3, $4, $5, $6)", + &[&verid, &name, &path, &locale, &hash, §] + ) { + // I think this can only happen if archread gives us the same file twice, which really + // shouldn't happen. But I'd rather continue with an error logged than panic. + error!("Can't insert verid {} fn {}: {}", verid, path, e); + } +} + + +fn insert_man(tr: &postgres::GenericConnection, verid: i32, paths: &[&str], ent: &mut Read) { + let (dig, enc, cont) = match man::decode(paths, ent) { + Err(e) => { error!("Error decoding {}: {}", paths[0], e); return }, + Ok(x) => x, + }; + + // Overwrite entry if the contents are different. It's possible that earlier decoding + // implementations didn't properly detect the encoding. (On the other hand, due to differences + // in filenames it's also possible that THIS decoding step went wrong, but that's slightly less + // likely) + tr.execute( + "INSERT INTO contents (hash, content) VALUES($1, $2) ON CONFLICT (hash) DO UPDATE SET content = $2", + &[&dig.as_ref(), &cont] + ).unwrap(); + + for path in paths { + insert_man_row(tr, verid, path, dig.as_ref()); + debug!("Inserted man page: {} ({})", path, enc); + } +} + + +fn insert_link(tr: &postgres::GenericConnection, verid: i32, src: &str, dest: &str) { + let hash = tr.query("SELECT hash FROM man WHERE package = $1 AND filename = '/'||$2", &[&verid, &dest]).unwrap(); + if hash.is_empty() { /* Can happen if man::decode() failed previously. */ + error!("Link to unindexed man page: {} -> {}", src, dest); + return; + } + let hash: Vec = hash.get(0).get(0); + insert_man_row(tr, verid, src, &hash); + debug!("Inserted man link: {} -> {}", src, dest); +} + + +fn index_pkg(tr: &postgres::GenericConnection, opt: &PkgOpt, verid: i32) -> std::io::Result<()> { + let indexfunc = |paths: &[&str], ent: &mut ArchiveEntry| { + insert_man(tr, verid, paths, ent); + Ok(()) /* Don't propagate errors, continue handling other man pages */ + }; + + let mut rd = try!(opt.file.open()); + let missed = try!(archread::FileList::read( + try!(Archive::open_archive(&mut rd)), + man::ismanpath, &indexfunc)) + .links(|src, dest| { insert_link(tr, verid, src, dest) }); + + if let Some(missed) = missed { + warn!("Some links were missed, reading package again"); + let mut rd = try!(opt.file.open()); + try!(missed.read(try!(Archive::open_archive(&mut rd)), indexfunc)); + } + Ok(()) +} + + +pub fn pkg(conn: &postgres::GenericConnection, opt: PkgOpt) { + info!("Handling pkg: {} / {} / {} - {} @ {} @ {}", opt.sys, opt.cat, opt.pkg, opt.ver, opt.date, opt.file.path); + + let tr = conn.transaction().unwrap(); + tr.set_rollback(); + + let verid = match insert_pkg(&tr, &opt) { Some(x) => x, None => return }; + + match index_pkg(&tr, &opt, verid) { + Err(e) => error!("Error reading package: {}", e), + Ok(_) => tr.set_commit() + } + + if let Err(e) = tr.finish() { + error!("Error finishing transaction: {}", e); + } +} diff --git a/indexer/src/sys_arch.rs b/indexer/src/sys_arch.rs new file mode 100644 index 0000000..7a0bf1f --- /dev/null +++ b/indexer/src/sys_arch.rs @@ -0,0 +1,128 @@ +use std::str::FromStr; +use std::io::{Read,BufRead,BufReader,Result}; +use regex::Regex; +use chrono::NaiveDateTime; +use postgres; + +use archive; +use open; +use man; +use pkg; + + +struct Meta { + filename: String, + name: String, + version: String, + date: String, +} + + +fn read_files(lst: T) -> Result { + let rd = BufReader::new(lst); + for line in rd.lines() { + let line = try!(line); + if man::ismanpath(&line) { + return Ok(true); + } + } + Ok(false) +} + + +fn read_desc(rd: &mut archive::ArchiveEntry) -> Result> { + let mut data = String::new(); + try!(rd.take(64*1024).read_to_string(&mut data)); + + let path = rd.path().unwrap(); + lazy_static! { + static ref RE: Regex = Regex::new(r"\s*%([^%]+)%\s*\n\s*([^\n]+)\s*\n").unwrap(); + } + + let mut filename = None; + let mut name = None; + let mut version = None; + let mut builddate = None; + + for kv in RE.captures_iter(&data) { + let key = kv.at(1).unwrap(); + let val = kv.at(2).unwrap(); + trace!("{}: {} = {}", path, key, val); + match key { + "FILENAME" => filename = Some(val), + "NAME" => name = Some(val), + "VERSION" => version = Some(val), + "BUILDDATE" => builddate = i64::from_str(val).ok(), + _ => {}, + } + } + + if filename.is_some() && name.is_some() && version.is_some() && builddate.is_some() { + Ok(Some(Meta { + filename: filename.unwrap().to_string(), + name: name.unwrap().to_string(), + version: version.unwrap().to_string(), + date: NaiveDateTime::from_timestamp(builddate.unwrap(), 0).format("%Y-%m-%d").to_string(), + })) + } else { + warn!("Metadata missing from package description: {}", path); + Ok(None) + } +} + + +// TODO: Switch to x86_64 instead of i686 +pub fn sync(pg: &postgres::GenericConnection, sys: i32, mirror: &str, repo: &str) { + info!("Reading packages from {} {}", mirror, repo); + + let path = format!("{}/{}/os/i686/{1:}.files.tar.gz", mirror, repo); + let path = open::Path{ path: &path, cache: true, canbelocal: false }; + let mut index = match path.open() { + Err(e) => { error!("Can't read package index: {}", e); return }, + Ok(x) => x, + }; + + let ent = match archive::Archive::open_archive(&mut index) { + Err(e) => { error!("Can't read package index: {}", e); return }, + Ok(x) => x, + }; + + let mut hasman = false; + let mut meta = None; + let r = archive::walk(ent, |x| { + if x.filetype() == archive::FileType::Directory { + hasman = false; + meta = None; + } else if x.path().unwrap().ends_with("/files") { + hasman = try!(read_files(x)); + } else if x.path().unwrap().ends_with("/desc") { + meta = try!(read_desc(x)); + } + + if hasman && meta.is_some() { + hasman = false; + let m = meta.take().unwrap(); + + let p = format!("{}/{}/os/i686/{}", mirror, repo, m.filename); + pkg::pkg(pg, pkg::PkgOpt{ + force: false, + sys: sys, + cat: repo, + pkg: &m.name, + ver: &m.version, + date: &m.date, + file: open::Path{ + path: &p, + cache: false, + canbelocal: false, + }, + }); + } + + Ok(true) + }); + + if let Err(e) = r { + error!("Error reading package index: {}", e); + } +} diff --git a/indexer/tests/exit.3.gz.lzma b/indexer/tests/exit.3.gz.lzma new file mode 100644 index 0000000..7c84e6a Binary files /dev/null and b/indexer/tests/exit.3.gz.lzma differ diff --git a/indexer/tests/mkarchives.sh b/indexer/tests/mkarchives.sh new file mode 100755 index 0000000..169f2bd --- /dev/null +++ b/indexer/tests/mkarchives.sh @@ -0,0 +1,71 @@ +#!/bin/sh + +# The order of inserting the files into the tar is not fully deterministic this +# way. The tests will fail quite badly if a hardlink is considered the +# "original" version. + + +# simpletest.tar.gz + +mkdir simple +echo Hi >simple/file +ln -s file simple/link +ln simple/file simple/hardlink +mkfifo simple/fifo +badfn=`echo 'Héllö.txt' | iconv -t ISO-8859-1` +touch $badfn +tar -czf simpletest.tar.gz simple $badfn +rm -rf $badfn simple + + + +# rawtest.gz.xz.bzip2 + +echo "File contents!" | gzip | xz | bzip2 >rawtest.gz.xz.bzip2 + + +# testarchive.tar.xz + +mkdir man +cd man + +mkdir man1 +mkdir man3 +mkdir man6 +ln -s man3 mans + +echo 'Hello World' >man3/helloworld.3 +echo 'Not a very interesting file' >notinteresting +echo 'Potentially interesting file' >possiblyinteresting + +ln man3/helloworld.3 man6/hardlink.6 + +ln -s ../man3/helloworld.3 man1/symlinkbefore.1 +ln -s ../man3/helloworld.3 man6/symlinkafter.6 + +ln -s notadir/../../man3/helloworld.3 man1/badsymlink1.1 +ln -s man3/helloworld.3 man1/badsymlink2.1 +ln -s ../man3/helloworld.3/. man1/badsymlink3.1 +ln -s ../man3/helloworld.3/../helloworld.3 man1/badsymlink4.1 +ln -s ../man1/symlinkbefore.1/../../man1/helloworld.3 man1/badsymlink5.1 + +ln -s symlinkbefore.1 man1/doublesymlink1.1 +ln -s ../mans/helloworld.3 man1/doublesymlink2.1 +ln -s ../mans/../man1/symlinkbefore.1 man1/triplesymlink.1 +ln -s infinitesymlink.1 man1/infinitesymlink.1 + +ln -s ../possiblyinteresting man3/needreread.3 +ln -s ../possiblyinteresting man6/needreread.6 + +cd .. +rm -f testarchive.tar +tar -cf testarchive.tar man/ +rm -r man/ + +mkdir man +echo 'Overwritten file' >man/possiblyinteresting +tar -rf testarchive.tar man/ +rm -r man/ + +rm -f testarchive.tar.xz +xz testarchive.tar diff --git a/indexer/tests/rawtest.gz.xz.bzip2 b/indexer/tests/rawtest.gz.xz.bzip2 new file mode 100644 index 0000000..bc4f2e8 Binary files /dev/null and b/indexer/tests/rawtest.gz.xz.bzip2 differ diff --git a/indexer/tests/simpletest.tar.gz b/indexer/tests/simpletest.tar.gz new file mode 100644 index 0000000..409f5ca Binary files /dev/null and b/indexer/tests/simpletest.tar.gz differ diff --git a/indexer/tests/testarchive.tar.xz b/indexer/tests/testarchive.tar.xz new file mode 100644 index 0000000..9892fae Binary files /dev/null and b/indexer/tests/testarchive.tar.xz differ diff --git a/util/arch.sh b/util/arch.sh deleted file mode 100755 index 57ff957..0000000 --- a/util/arch.sh +++ /dev/null @@ -1,76 +0,0 @@ -#!/bin/bash - -# Usage: ./arch.sh -# Synchronises the database with an Arch mirror, fetching any packages that -# aren't yet in the database and may have man pages. - -MIRROR=http://ftp.nluug.nl/pub/os/Linux/distr/archlinux -REPOS="core extra community" -DEBUG=false -SYSID=1 - -. ./common.sh - - -checkpkg() { - REPO=$1 - FN=$2 - D="$TMP/$REPO/$FN" - if [ ! \( -d "$D" -a -f "$D/files" -a -f "$D/desc" \) ]; then - echo "===> $FN" - echo "Invalid item, ignoring" - return - fi - grep -q /man/ "$D/files" - if [ "$?" -ne 0 ]; then - $DEBUG && echo "===> $FN" - $DEBUG && echo "No mans" - return - fi - - # Somewhat inefficient description parsing - FILENAME=`grep -A 1 '%FILENAME%' "$D/desc" | tail -n 1` - NAME=`grep -A 1 '%NAME%' "$D/desc" | tail -n 1` - VERSION=`grep -A 1 '%VERSION%' "$D/desc" | tail -n 1` - BUILDDATE=`grep -A 1 '%BUILDDATE%' "$D/desc" | tail -n 1` - if [ -z "$FILENAME" -o -z "$NAME" -o -z "$VERSION" -o -z "$BUILDDATE" ]; then - echo "===> $FN" - echo "Invalid/missing description info" - return - fi - BUILDDATE=`date -d "@$BUILDDATE" '+%F'` - - add_pkginfo $SYSID "$REPO" "$NAME" "$VERSION" "$BUILDDATE" - if [ "$?" -eq 0 ]; then - $DEBUG && echo "===> $FN" - $DEBUG && echo "Already up-to-date" - return - fi - - echo "===> $FN" - F="$TMP/$REPO/$FILENAME" - $CURL "$MIRROR/$REPO/os/i686/$FILENAME" -o "$F" || return - add_tar "$F" "$PKGID" - rm -f "$F" -} - - -syncrepo() { - REPO=$1 - F="$TMP/$REPO/repo.tar.gz" - echo "============ $MIRROR $REPO" - $CURL "$MIRROR/$REPO/os/i686/$REPO.files.tar.gz" -o "$F" || return 1 - tar -C "$TMP/$REPO" -xf "$F" || return 1 - rm -f "$F" - for fn in "$TMP/$REPO"/*; do - checkpkg "$REPO" `basename "$fn"` - done -} - - -for r in $REPOS; do - mkdir "$TMP/$r" - syncrepo $r - rm -rf "$TMP/$r" -done - diff --git a/util/cron.sh b/util/cron.sh index 47fdc4b..ae3f553 100755 --- a/util/cron.sh +++ b/util/cron.sh @@ -2,7 +2,7 @@ . ./common.sh -./arch.sh +./index.sh daily ./deb.sh ubuntu_active ./deb.sh debian_active echo "============ Updating SQL indices" diff --git a/util/index.sh b/util/index.sh new file mode 100755 index 0000000..87b355b --- /dev/null +++ b/util/index.sh @@ -0,0 +1,22 @@ +if test -f .config; then + source .config +fi + +INDEX="./indexer -vv" + +set -x + +arch() { + local MIRROR=http://ftp.nluug.nl/pub/os/Linux/distr/archlinux + local REPOS="core extra community" + for REPO in $REPOS; do + $INDEX arch --sys arch --mirror $MIRROR --repo $REPO + done +} + + +daily() { + arch +} + +$@ diff --git a/util/indexer b/util/indexer new file mode 120000 index 0000000..a15109f --- /dev/null +++ b/util/indexer @@ -0,0 +1 @@ +../indexer/target/release/indexer \ No newline at end of file