Merge branch 'indexer'
This commit is contained in:
commit
b8a1945d38
20 changed files with 2282 additions and 79 deletions
2
.gitignore
vendored
2
.gitignore
vendored
|
|
@ -2,3 +2,5 @@
|
|||
!/lib/ManUtils/Build.PL
|
||||
!/lib/ManUtils/ManUtils.pm
|
||||
!/lib/ManUtils/ManUtils.xs
|
||||
indexer/target
|
||||
|
||||
|
|
|
|||
11
Makefile
11
Makefile
|
|
@ -1,4 +1,6 @@
|
|||
.PHONY: ManUtils
|
||||
.PHONY: ManUtils indexer clean
|
||||
|
||||
all: ManUtils indexer
|
||||
|
||||
ManUtils: lib/ManUtils/Build
|
||||
cd lib/ManUtils && perl Build.PL && ./Build install --install-base=inst
|
||||
|
|
@ -6,7 +8,12 @@ ManUtils: lib/ManUtils/Build
|
|||
lib/ManUtils/Build: lib/ManUtils/Build.PL
|
||||
cd lib/ManUtils && perl Build.PL
|
||||
|
||||
indexer: indexer/target/release/indexer
|
||||
|
||||
indexer/target/release/indexer: indexer/Cargo.toml indexer/src/*.rs
|
||||
cd indexer && cargo build --release
|
||||
|
||||
clean:
|
||||
cd lib/ManUtils && ./Build distclean
|
||||
rm -rf lib/ManUtils/inst
|
||||
|
||||
cd indexer && cargo clean
|
||||
|
|
|
|||
703
indexer/Cargo.lock
generated
Normal file
703
indexer/Cargo.lock
generated
Normal file
|
|
@ -0,0 +1,703 @@
|
|||
[root]
|
||||
name = "indexer"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"chrono 0.2.25 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"clap 2.17.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"encoding 0.3.0-dev (git+https://github.com/lifthrasiir/rust-encoding)",
|
||||
"env_logger 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"hyper 0.9.11 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"lazy_static 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"libarchive3-sys 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"libc 0.2.17 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"log 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"postgres 0.12.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"regex 0.1.80 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"ring 0.5.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"url 1.2.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "aho-corasick"
|
||||
version = "0.5.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"memchr 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ansi_term"
|
||||
version = "0.9.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "bitflags"
|
||||
version = "0.7.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "bufstream"
|
||||
version = "0.1.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "byteorder"
|
||||
version = "0.5.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "chrono"
|
||||
version = "0.2.25"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"num 0.1.36 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"time 0.1.35 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "clap"
|
||||
version = "2.17.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"ansi_term 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"bitflags 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"libc 0.2.17 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"strsim 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"term_size 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"unicode-segmentation 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"unicode-width 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"vec_map 0.6.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "cookie"
|
||||
version = "0.2.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"openssl 0.7.14 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"rustc-serialize 0.3.19 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"time 0.1.35 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"url 1.2.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "encoding"
|
||||
version = "0.3.0-dev"
|
||||
source = "git+https://github.com/lifthrasiir/rust-encoding#61e331b0820311572fa00a06349b0f02511e810c"
|
||||
dependencies = [
|
||||
"encoding-index-japanese 1.20141219.6 (git+https://github.com/lifthrasiir/rust-encoding)",
|
||||
"encoding-index-korean 1.20141219.6 (git+https://github.com/lifthrasiir/rust-encoding)",
|
||||
"encoding-index-simpchinese 1.20160120.0 (git+https://github.com/lifthrasiir/rust-encoding)",
|
||||
"encoding-index-singlebyte 1.20160120.0 (git+https://github.com/lifthrasiir/rust-encoding)",
|
||||
"encoding-index-tradchinese 1.20141219.6 (git+https://github.com/lifthrasiir/rust-encoding)",
|
||||
"encoding-types 0.2.0 (git+https://github.com/lifthrasiir/rust-encoding)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "encoding-index-japanese"
|
||||
version = "1.20141219.6"
|
||||
source = "git+https://github.com/lifthrasiir/rust-encoding#61e331b0820311572fa00a06349b0f02511e810c"
|
||||
dependencies = [
|
||||
"encoding_index_tests 0.1.5 (git+https://github.com/lifthrasiir/rust-encoding)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "encoding-index-korean"
|
||||
version = "1.20141219.6"
|
||||
source = "git+https://github.com/lifthrasiir/rust-encoding#61e331b0820311572fa00a06349b0f02511e810c"
|
||||
dependencies = [
|
||||
"encoding_index_tests 0.1.5 (git+https://github.com/lifthrasiir/rust-encoding)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "encoding-index-simpchinese"
|
||||
version = "1.20160120.0"
|
||||
source = "git+https://github.com/lifthrasiir/rust-encoding#61e331b0820311572fa00a06349b0f02511e810c"
|
||||
dependencies = [
|
||||
"encoding_index_tests 0.1.5 (git+https://github.com/lifthrasiir/rust-encoding)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "encoding-index-singlebyte"
|
||||
version = "1.20160120.0"
|
||||
source = "git+https://github.com/lifthrasiir/rust-encoding#61e331b0820311572fa00a06349b0f02511e810c"
|
||||
dependencies = [
|
||||
"encoding_index_tests 0.1.5 (git+https://github.com/lifthrasiir/rust-encoding)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "encoding-index-tradchinese"
|
||||
version = "1.20141219.6"
|
||||
source = "git+https://github.com/lifthrasiir/rust-encoding#61e331b0820311572fa00a06349b0f02511e810c"
|
||||
dependencies = [
|
||||
"encoding_index_tests 0.1.5 (git+https://github.com/lifthrasiir/rust-encoding)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "encoding-types"
|
||||
version = "0.2.0"
|
||||
source = "git+https://github.com/lifthrasiir/rust-encoding#61e331b0820311572fa00a06349b0f02511e810c"
|
||||
|
||||
[[package]]
|
||||
name = "encoding_index_tests"
|
||||
version = "0.1.5"
|
||||
source = "git+https://github.com/lifthrasiir/rust-encoding#61e331b0820311572fa00a06349b0f02511e810c"
|
||||
|
||||
[[package]]
|
||||
name = "env_logger"
|
||||
version = "0.3.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"log 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"regex 0.1.80 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "fallible-iterator"
|
||||
version = "0.1.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "gcc"
|
||||
version = "0.3.38"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "gdi32-sys"
|
||||
version = "0.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"winapi-build 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "hex"
|
||||
version = "0.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "hpack"
|
||||
version = "0.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"log 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "httparse"
|
||||
version = "1.1.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "hyper"
|
||||
version = "0.9.11"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"cookie 0.2.5 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"httparse 1.1.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"language-tags 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"log 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"mime 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"num_cpus 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"openssl 0.7.14 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"openssl-verify 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"rustc-serialize 0.3.19 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"solicit 0.4.4 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"time 0.1.35 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"traitobject 0.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"typeable 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"unicase 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"url 1.2.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "idna"
|
||||
version = "0.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"matches 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"unicode-bidi 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"unicode-normalization 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "kernel32-sys"
|
||||
version = "0.2.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"winapi-build 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "language-tags"
|
||||
version = "0.2.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "lazy_static"
|
||||
version = "0.2.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "libarchive3-sys"
|
||||
version = "0.1.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"libc 0.2.17 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"pkg-config 0.3.8 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "libc"
|
||||
version = "0.2.17"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "libressl-pnacl-sys"
|
||||
version = "2.1.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"pnacl-build-helper 1.4.10 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "log"
|
||||
version = "0.3.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "matches"
|
||||
version = "0.1.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "md5"
|
||||
version = "0.2.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "memchr"
|
||||
version = "0.1.11"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"libc 0.2.17 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "mime"
|
||||
version = "0.2.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"log 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "num"
|
||||
version = "0.1.36"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"num-integer 0.1.32 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"num-iter 0.1.32 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"num-traits 0.1.36 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "num-integer"
|
||||
version = "0.1.32"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"num-traits 0.1.36 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "num-iter"
|
||||
version = "0.1.32"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"num-integer 0.1.32 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"num-traits 0.1.36 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "num-traits"
|
||||
version = "0.1.36"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "num_cpus"
|
||||
version = "1.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"libc 0.2.17 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "openssl"
|
||||
version = "0.7.14"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"bitflags 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"gcc 0.3.38 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"lazy_static 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"libc 0.2.17 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"openssl-sys 0.7.17 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"openssl-sys-extras 0.7.14 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "openssl-sys"
|
||||
version = "0.7.17"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"gdi32-sys 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"libc 0.2.17 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"libressl-pnacl-sys 2.1.6 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"pkg-config 0.3.8 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"user32-sys 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "openssl-sys-extras"
|
||||
version = "0.7.14"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"gcc 0.3.38 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"libc 0.2.17 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"openssl-sys 0.7.17 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "openssl-verify"
|
||||
version = "0.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"openssl 0.7.14 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "phf"
|
||||
version = "0.7.15"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"phf_shared 0.7.15 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "phf_shared"
|
||||
version = "0.7.15"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "pkg-config"
|
||||
version = "0.3.8"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "pnacl-build-helper"
|
||||
version = "1.4.10"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"tempdir 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "postgres"
|
||||
version = "0.12.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"bufstream 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"fallible-iterator 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"hex 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"log 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"phf 0.7.15 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"postgres-protocol 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "postgres-protocol"
|
||||
version = "0.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"byteorder 0.5.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"fallible-iterator 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"hex 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"md5 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rand"
|
||||
version = "0.3.14"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"libc 0.2.17 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "regex"
|
||||
version = "0.1.80"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"aho-corasick 0.5.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"memchr 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"regex-syntax 0.3.9 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"thread_local 0.2.7 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"utf8-ranges 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "regex-syntax"
|
||||
version = "0.3.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "ring"
|
||||
version = "0.5.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"lazy_static 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"untrusted 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rustc-serialize"
|
||||
version = "0.3.19"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "rustc_version"
|
||||
version = "0.1.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"semver 0.1.20 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "semver"
|
||||
version = "0.1.20"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "solicit"
|
||||
version = "0.4.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"hpack 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"log 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "strsim"
|
||||
version = "0.5.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "tempdir"
|
||||
version = "0.3.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"rand 0.3.14 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "term_size"
|
||||
version = "0.2.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"libc 0.2.17 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "thread-id"
|
||||
version = "2.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"libc 0.2.17 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "thread_local"
|
||||
version = "0.2.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"thread-id 2.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "time"
|
||||
version = "0.1.35"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"libc 0.2.17 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "traitobject"
|
||||
version = "0.0.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "typeable"
|
||||
version = "0.1.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "unicase"
|
||||
version = "1.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"rustc_version 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "unicode-bidi"
|
||||
version = "0.2.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"matches 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "unicode-normalization"
|
||||
version = "0.1.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "unicode-segmentation"
|
||||
version = "0.1.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "unicode-width"
|
||||
version = "0.1.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "untrusted"
|
||||
version = "0.3.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "url"
|
||||
version = "1.2.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"idna 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"matches 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "user32-sys"
|
||||
version = "0.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"winapi-build 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "utf8-ranges"
|
||||
version = "0.1.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "vec_map"
|
||||
version = "0.6.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "winapi"
|
||||
version = "0.2.8"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "winapi-build"
|
||||
version = "0.1.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[metadata]
|
||||
"checksum aho-corasick 0.5.3 (registry+https://github.com/rust-lang/crates.io-index)" = "ca972c2ea5f742bfce5687b9aef75506a764f61d37f8f649047846a9686ddb66"
|
||||
"checksum ansi_term 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)" = "23ac7c30002a5accbf7e8987d0632fa6de155b7c3d39d0067317a391e00a2ef6"
|
||||
"checksum bitflags 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "aad18937a628ec6abcd26d1489012cc0e18c21798210f491af69ded9b881106d"
|
||||
"checksum bufstream 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "7b48dbe2ff0e98fa2f03377d204a9637d3c9816cd431bfe05a8abbd0ea11d074"
|
||||
"checksum byteorder 0.5.3 (registry+https://github.com/rust-lang/crates.io-index)" = "0fc10e8cc6b2580fda3f36eb6dc5316657f812a3df879a44a66fc9f0fdbc4855"
|
||||
"checksum chrono 0.2.25 (registry+https://github.com/rust-lang/crates.io-index)" = "9213f7cd7c27e95c2b57c49f0e69b1ea65b27138da84a170133fd21b07659c00"
|
||||
"checksum clap 2.17.1 (registry+https://github.com/rust-lang/crates.io-index)" = "27dac76762fb56019b04aed3ccb43a770a18f80f9c2eb62ee1a18d9fb4ea2430"
|
||||
"checksum cookie 0.2.5 (registry+https://github.com/rust-lang/crates.io-index)" = "0e3d6405328b6edb412158b3b7710e2634e23f3614b9bb1c412df7952489a626"
|
||||
"checksum encoding 0.3.0-dev (git+https://github.com/lifthrasiir/rust-encoding)" = "<none>"
|
||||
"checksum encoding-index-japanese 1.20141219.6 (git+https://github.com/lifthrasiir/rust-encoding)" = "<none>"
|
||||
"checksum encoding-index-korean 1.20141219.6 (git+https://github.com/lifthrasiir/rust-encoding)" = "<none>"
|
||||
"checksum encoding-index-simpchinese 1.20160120.0 (git+https://github.com/lifthrasiir/rust-encoding)" = "<none>"
|
||||
"checksum encoding-index-singlebyte 1.20160120.0 (git+https://github.com/lifthrasiir/rust-encoding)" = "<none>"
|
||||
"checksum encoding-index-tradchinese 1.20141219.6 (git+https://github.com/lifthrasiir/rust-encoding)" = "<none>"
|
||||
"checksum encoding-types 0.2.0 (git+https://github.com/lifthrasiir/rust-encoding)" = "<none>"
|
||||
"checksum encoding_index_tests 0.1.5 (git+https://github.com/lifthrasiir/rust-encoding)" = "<none>"
|
||||
"checksum env_logger 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)" = "15abd780e45b3ea4f76b4e9a26ff4843258dd8a3eed2775a0e7368c2e7936c2f"
|
||||
"checksum fallible-iterator 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "5d48ab1bc11a086628e8cc0cc2c2dc200b884ac05c4b48fb71d6036b6999ff1d"
|
||||
"checksum gcc 0.3.38 (registry+https://github.com/rust-lang/crates.io-index)" = "553f11439bdefe755bf366b264820f1da70f3aaf3924e594b886beb9c831bcf5"
|
||||
"checksum gdi32-sys 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "0912515a8ff24ba900422ecda800b52f4016a56251922d397c576bf92c690518"
|
||||
"checksum hex 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "d6a22814455d41612f41161581c2883c0c6a1c41852729b17d5ed88f01e153aa"
|
||||
"checksum hpack 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "3d2da7d3a34cf6406d9d700111b8eafafe9a251de41ae71d8052748259343b58"
|
||||
"checksum httparse 1.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "46534074dbb80b070d60a5cb8ecadd8963a00a438ae1a95268850a7ef73b67ae"
|
||||
"checksum hyper 0.9.11 (registry+https://github.com/rust-lang/crates.io-index)" = "edd47c66782933e546a32ae89ca3c49263b2ba9bc29f3a0d5c52fff48e0ac67c"
|
||||
"checksum idna 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "1053236e00ce4f668aeca4a769a09b3bf5a682d802abd6f3cb39374f6b162c11"
|
||||
"checksum kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "7507624b29483431c0ba2d82aece8ca6cdba9382bff4ddd0f7490560c056098d"
|
||||
"checksum language-tags 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "a91d884b6667cd606bb5a69aa0c99ba811a115fc68915e7056ec08a46e93199a"
|
||||
"checksum lazy_static 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "49247ec2a285bb3dcb23cbd9c35193c025e7251bfce77c1d5da97e6362dffe7f"
|
||||
"checksum libarchive3-sys 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "3cd3beae8f59a4c7a806523269b5392037577c150446e88d684dfa6de6031ca7"
|
||||
"checksum libc 0.2.17 (registry+https://github.com/rust-lang/crates.io-index)" = "044d1360593a78f5c8e5e710beccdc24ab71d1f01bc19a29bcacdba22e8475d8"
|
||||
"checksum libressl-pnacl-sys 2.1.6 (registry+https://github.com/rust-lang/crates.io-index)" = "cbc058951ab6a3ef35ca16462d7642c4867e6403520811f28537a4e2f2db3e71"
|
||||
"checksum log 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)" = "ab83497bf8bf4ed2a74259c1c802351fcd67a65baa86394b6ba73c36f4838054"
|
||||
"checksum matches 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)" = "efd7622e3022e1a6eaa602c4cea8912254e5582c9c692e9167714182244801b1"
|
||||
"checksum md5 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "7df230903ccdffd6b3b4ec21624498ea64c912ce50297846907f0b8e1bb249dd"
|
||||
"checksum memchr 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)" = "d8b629fb514376c675b98c1421e80b151d3817ac42d7c667717d282761418d20"
|
||||
"checksum mime 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "b5c93a4bd787ddc6e7833c519b73a50883deb5863d76d9b71eb8216fb7f94e66"
|
||||
"checksum num 0.1.36 (registry+https://github.com/rust-lang/crates.io-index)" = "bde7c03b09e7c6a301ee81f6ddf66d7a28ec305699e3d3b056d2fc56470e3120"
|
||||
"checksum num-integer 0.1.32 (registry+https://github.com/rust-lang/crates.io-index)" = "fb24d9bfb3f222010df27995441ded1e954f8f69cd35021f6bef02ca9552fb92"
|
||||
"checksum num-iter 0.1.32 (registry+https://github.com/rust-lang/crates.io-index)" = "287a1c9969a847055e1122ec0ea7a5c5d6f72aad97934e131c83d5c08ab4e45c"
|
||||
"checksum num-traits 0.1.36 (registry+https://github.com/rust-lang/crates.io-index)" = "a16a42856a256b39c6d3484f097f6713e14feacd9bfb02290917904fae46c81c"
|
||||
"checksum num_cpus 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "8890e6084723d57d0df8d2720b0d60c6ee67d6c93e7169630e4371e88765dcad"
|
||||
"checksum openssl 0.7.14 (registry+https://github.com/rust-lang/crates.io-index)" = "c4117b6244aac42ed0150a6019b4d953d28247c5dd6ae6f46ae469b5f2318733"
|
||||
"checksum openssl-sys 0.7.17 (registry+https://github.com/rust-lang/crates.io-index)" = "89c47ee94c352eea9ddaf8e364be7f978a3bb6d66d73176572484238dd5a5c3f"
|
||||
"checksum openssl-sys-extras 0.7.14 (registry+https://github.com/rust-lang/crates.io-index)" = "11c5e1dba7d3d03d80f045bf0d60111dc69213b67651e7c889527a3badabb9fa"
|
||||
"checksum openssl-verify 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "3ed86cce894f6b0ed4572e21eb34026f1dc8869cb9ee3869029131bc8c3feb2d"
|
||||
"checksum phf 0.7.15 (registry+https://github.com/rust-lang/crates.io-index)" = "17896951e179a6cbed7d3519b3078ac6c03a347d3e9cf8f303c8a1a73c5a3e44"
|
||||
"checksum phf_shared 0.7.15 (registry+https://github.com/rust-lang/crates.io-index)" = "bb6c14aac1140c2b06b41477096f249416b17c893d56386a892ac657edfdffba"
|
||||
"checksum pkg-config 0.3.8 (registry+https://github.com/rust-lang/crates.io-index)" = "8cee804ecc7eaf201a4a207241472cc870e825206f6c031e3ee2a72fa425f2fa"
|
||||
"checksum pnacl-build-helper 1.4.10 (registry+https://github.com/rust-lang/crates.io-index)" = "61c9231d31aea845007443d62fcbb58bb6949ab9c18081ee1e09920e0cf1118b"
|
||||
"checksum postgres 0.12.0 (registry+https://github.com/rust-lang/crates.io-index)" = "a7ef92468927003a037e175b54320319e358886865899b37f7318837a646a9fd"
|
||||
"checksum postgres-protocol 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "7e2fc3d800dacc2dd749b690ad15b9b78bc04c26c3f0525cbe163436559bc3fc"
|
||||
"checksum rand 0.3.14 (registry+https://github.com/rust-lang/crates.io-index)" = "2791d88c6defac799c3f20d74f094ca33b9332612d9aef9078519c82e4fe04a5"
|
||||
"checksum regex 0.1.80 (registry+https://github.com/rust-lang/crates.io-index)" = "4fd4ace6a8cf7860714a2c2280d6c1f7e6a413486c13298bbc86fd3da019402f"
|
||||
"checksum regex-syntax 0.3.9 (registry+https://github.com/rust-lang/crates.io-index)" = "f9ec002c35e86791825ed294b50008eea9ddfc8def4420124fbc6b08db834957"
|
||||
"checksum ring 0.5.3 (registry+https://github.com/rust-lang/crates.io-index)" = "c0743ef007bcff4909b107907a410418eb7e5c6ad55b843d70b39f62bfb7112e"
|
||||
"checksum rustc-serialize 0.3.19 (registry+https://github.com/rust-lang/crates.io-index)" = "6159e4e6e559c81bd706afe9c8fd68f547d3e851ce12e76b1de7914bab61691b"
|
||||
"checksum rustc_version 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)" = "c5f5376ea5e30ce23c03eb77cbe4962b988deead10910c372b226388b594c084"
|
||||
"checksum semver 0.1.20 (registry+https://github.com/rust-lang/crates.io-index)" = "d4f410fedcf71af0345d7607d246e7ad15faaadd49d240ee3b24e5dc21a820ac"
|
||||
"checksum solicit 0.4.4 (registry+https://github.com/rust-lang/crates.io-index)" = "172382bac9424588d7840732b250faeeef88942e37b6e35317dce98cafdd75b2"
|
||||
"checksum strsim 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)" = "50c069df92e4b01425a8bf3576d5d417943a6a7272fbabaf5bd80b1aaa76442e"
|
||||
"checksum tempdir 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)" = "87974a6f5c1dfb344d733055601650059a3363de2a6104819293baff662132d6"
|
||||
"checksum term_size 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "3f7f5f3f71b0040cecc71af239414c23fd3c73570f5ff54cf50e03cef637f2a0"
|
||||
"checksum thread-id 2.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "a9539db560102d1cef46b8b78ce737ff0bb64e7e18d35b2a5688f7d097d0ff03"
|
||||
"checksum thread_local 0.2.7 (registry+https://github.com/rust-lang/crates.io-index)" = "8576dbbfcaef9641452d5cf0df9b0e7eeab7694956dd33bb61515fb8f18cfdd5"
|
||||
"checksum time 0.1.35 (registry+https://github.com/rust-lang/crates.io-index)" = "3c7ec6d62a20df54e07ab3b78b9a3932972f4b7981de295563686849eb3989af"
|
||||
"checksum traitobject 0.0.1 (registry+https://github.com/rust-lang/crates.io-index)" = "07eaeb7689bb7fca7ce15628319635758eda769fed481ecfe6686ddef2600616"
|
||||
"checksum typeable 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "1410f6f91f21d1612654e7cc69193b0334f909dcf2c790c4826254fbb86f8887"
|
||||
"checksum unicase 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "13a5906ca2b98c799f4b1ab4557b76367ebd6ae5ef14930ec841c74aed5f3764"
|
||||
"checksum unicode-bidi 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "c1f7ceb96afdfeedee42bade65a0d585a6a0106f681b6749c8ff4daa8df30b3f"
|
||||
"checksum unicode-normalization 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "26643a2f83bac55f1976fb716c10234485f9202dcd65cfbdf9da49867b271172"
|
||||
"checksum unicode-segmentation 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "b905d0fc2a1f0befd86b0e72e31d1787944efef9d38b9358a9e92a69757f7e3b"
|
||||
"checksum unicode-width 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "2d6722facc10989f63ee0e20a83cd4e1714a9ae11529403ac7e0afd069abc39e"
|
||||
"checksum untrusted 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "5d9bc0e6e73a10975d1fbff8ac3541e221181b0d8998351600fb5523de634c0d"
|
||||
"checksum url 1.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "48ccf7bd87a81b769cf84ad556e034541fb90e1cd6d4bc375c822ed9500cd9d7"
|
||||
"checksum user32-sys 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "4ef4711d107b21b410a3a974b1204d9accc8b10dad75d8324b5d755de1617d47"
|
||||
"checksum utf8-ranges 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "a1ca13c08c41c9c3e04224ed9ff80461d97e121589ff27c753a16cb10830ae0f"
|
||||
"checksum vec_map 0.6.0 (registry+https://github.com/rust-lang/crates.io-index)" = "cac5efe5cb0fa14ec2f84f83c701c562ee63f6dcc680861b21d65c682adfb05f"
|
||||
"checksum winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)" = "167dc9d6949a9b857f3451275e911c3f44255842c1f7a76f33c55103a909087a"
|
||||
"checksum winapi-build 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "2d315eee3b34aca4797b2da6b13ed88266e6d612562a0c46390af8299fc699bc"
|
||||
19
indexer/Cargo.toml
Normal file
19
indexer/Cargo.toml
Normal file
|
|
@ -0,0 +1,19 @@
|
|||
[package]
|
||||
name = "indexer"
|
||||
version = "0.1.0"
|
||||
authors = ["Yorhel <git@yorhel.nl>"]
|
||||
|
||||
[dependencies]
|
||||
regex = "0.1.77"
|
||||
log = "0.3.6"
|
||||
env_logger = "0.3.5"
|
||||
lazy_static = "0.2.1"
|
||||
libc = "0.2.17"
|
||||
libarchive3-sys = "0.1.2"
|
||||
encoding = { git = "https://github.com/lifthrasiir/rust-encoding", features = ["no-optimized-legacy-encoding"] }
|
||||
ring = "0.5.3"
|
||||
postgres = "0.12.0"
|
||||
clap = "2.16.3"
|
||||
hyper = "0.9.11"
|
||||
url = "1.2.3"
|
||||
chrono = "0.2.25"
|
||||
343
indexer/src/archive.rs
Normal file
343
indexer/src/archive.rs
Normal file
|
|
@ -0,0 +1,343 @@
|
|||
use std::str;
|
||||
use std::ptr;
|
||||
use std::error::Error as ErrorTrait;
|
||||
use std::io::{Result,Error,Read};
|
||||
use std::ffi::{CStr,CString};
|
||||
|
||||
use libc::{c_void,ssize_t};
|
||||
use libarchive3_sys::ffi;
|
||||
|
||||
|
||||
/* This is a safe, limited and opinionated wrapper around the libarchive C bindings.
|
||||
* I initially used the libarchive crate, but it has several issues. Some of which are not fixable
|
||||
* without a complete rewrite.
|
||||
* - Panics on non-UTF8 path names
|
||||
* - Panics on hard links (PR #6)
|
||||
* - API is far too flexible, easy to misuse and get panics/segfaults
|
||||
* - Impossible to correctly read files from an archive (issue #7)
|
||||
* - Does not provide a convenient Read interface for files
|
||||
*
|
||||
* Barring any unexpected behaviour or bugs in libarchive, the API below should not panic or
|
||||
* segfault for any archive or usage pattern.
|
||||
*/
|
||||
|
||||
pub struct Archive<'a> {
|
||||
a: *mut ffi::Struct_archive,
|
||||
rd: &'a mut Read,
|
||||
buf: Vec<u8>,
|
||||
err: Option<Error>,
|
||||
eof: bool,
|
||||
}
|
||||
|
||||
|
||||
pub struct ArchiveEntry<'a> {
|
||||
a: Box<Archive<'a>>,
|
||||
e: *mut ffi::Struct_archive_entry,
|
||||
}
|
||||
|
||||
pub struct RawEntry<'a>(Box<Archive<'a>>);
|
||||
|
||||
|
||||
#[derive(Debug,PartialEq,Eq)]
|
||||
pub enum FileType {
|
||||
File,
|
||||
Directory,
|
||||
Link(String),
|
||||
Other, // Also includes Link(<non-utf8-path>)
|
||||
}
|
||||
|
||||
|
||||
unsafe extern "C" fn archive_read_cb(_: *mut ffi::Struct_archive, data: *mut c_void, buf: *mut *const c_void) -> ssize_t {
|
||||
let arch: &mut Archive = &mut *(data as *mut Archive);
|
||||
*buf = arch.buf.as_mut_ptr() as *mut c_void;
|
||||
match arch.rd.read(&mut arch.buf[..]) {
|
||||
Ok(s) => s as ssize_t,
|
||||
Err(e) => {
|
||||
let desc = CString::new(e.description()).unwrap();
|
||||
let fmt = CString::new("%s").unwrap();
|
||||
ffi::archive_set_error(arch.a, e.raw_os_error().unwrap_or(0), fmt.as_ptr(), desc.as_ptr());
|
||||
arch.err = Some(e);
|
||||
-1
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
impl<'a> Archive<'a> {
|
||||
fn new(rd: &mut Read, a: *mut ffi::Struct_archive) -> Result<Box<Archive>> {
|
||||
let bufsize = 64*1024;
|
||||
let mut buf = Vec::with_capacity(bufsize);
|
||||
unsafe { buf.set_len(bufsize) };
|
||||
let mut ret = Box::new(Archive { a: a, rd: rd, buf: buf, err: None, eof: false });
|
||||
|
||||
let aptr: *mut c_void = &mut *ret as *mut Archive as *mut c_void;
|
||||
let r = unsafe { ffi::archive_read_open(a, aptr, None, Some(archive_read_cb), None) };
|
||||
if r == ffi::ARCHIVE_FATAL {
|
||||
return Err(ret.error());
|
||||
}
|
||||
Ok(ret)
|
||||
}
|
||||
|
||||
fn error(&mut self) -> Error {
|
||||
self.err.take().unwrap_or_else(|| {
|
||||
let err = Error::from_raw_os_error(unsafe { ffi::archive_errno(self.a) });
|
||||
let desc = unsafe { ffi::archive_error_string(self.a) };
|
||||
if desc.is_null() {
|
||||
return err;
|
||||
}
|
||||
if let Ok(s) = str::from_utf8(unsafe { CStr::from_ptr(desc) }.to_bytes()) {
|
||||
Error::new(err.kind(), s)
|
||||
} else {
|
||||
err
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
fn entry(self: Box<Self>) -> Result<Option<ArchiveEntry<'a>>> {
|
||||
let mut ent = ArchiveEntry {
|
||||
a: self,
|
||||
e: ptr::null_mut()
|
||||
};
|
||||
ent.a.eof = false;
|
||||
let res = unsafe { ffi::archive_read_next_header(ent.a.a, &mut ent.e) };
|
||||
match res {
|
||||
ffi::ARCHIVE_EOF => Ok(None),
|
||||
ffi::ARCHIVE_FATAL => Err(ent.a.error()),
|
||||
_ => Ok(Some(ent))
|
||||
}
|
||||
}
|
||||
|
||||
fn read(&mut self, buf: &mut [u8]) -> Result<usize> {
|
||||
// libarchive tends to throw an error if you try to read after an EOF; handle that case
|
||||
// here.
|
||||
if self.eof {
|
||||
return Ok(0);
|
||||
}
|
||||
let cbuf = buf.as_mut_ptr() as *mut c_void;
|
||||
let n = unsafe { ffi::archive_read_data(self.a, cbuf, buf.len()) };
|
||||
if n >= 0 {
|
||||
self.eof = n == 0;
|
||||
Ok(n as usize)
|
||||
} else {
|
||||
Err(self.error())
|
||||
}
|
||||
}
|
||||
|
||||
pub fn open_archive(rd: &mut Read) -> Result<Option<ArchiveEntry>> {
|
||||
let a = unsafe {
|
||||
let a = ffi::archive_read_new();
|
||||
ffi::archive_read_support_filter_all(a);
|
||||
ffi::archive_read_support_format_all(a);
|
||||
a
|
||||
};
|
||||
try!(Self::new(rd, a)).entry()
|
||||
}
|
||||
|
||||
pub fn open_raw(rd: &mut Read) -> Result<RawEntry> {
|
||||
let a = unsafe {
|
||||
let a = ffi::archive_read_new();
|
||||
ffi::archive_read_support_filter_all(a);
|
||||
ffi::archive_read_support_format_raw(a);
|
||||
ffi::archive_read_support_format_empty(a);
|
||||
a
|
||||
};
|
||||
let mut a = try!(Self::new(rd, a));
|
||||
let mut e: *mut ffi::Struct_archive_entry = ptr::null_mut();
|
||||
let res = unsafe { ffi::archive_read_next_header(a.a, &mut e) };
|
||||
match res {
|
||||
ffi::ARCHIVE_FATAL => Err(a.error()),
|
||||
ffi::ARCHIVE_EOF => {
|
||||
a.eof = true;
|
||||
Ok(RawEntry(a))
|
||||
},
|
||||
_ => Ok(RawEntry(a))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
impl<'a> Drop for Archive<'a> {
|
||||
fn drop(&mut self) {
|
||||
unsafe {
|
||||
ffi::archive_read_free(self.a);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
impl<'a> ArchiveEntry<'a> {
|
||||
pub fn next(self) -> Result<Option<ArchiveEntry<'a>>> {
|
||||
self.a.entry()
|
||||
}
|
||||
|
||||
// Returns None in NULL (when does that even happen?) or on invalid UTF-8.
|
||||
pub fn path(&self) -> Option<&str> {
|
||||
let c_str: &CStr = unsafe {
|
||||
let ptr = ffi::archive_entry_pathname(self.e);
|
||||
if ptr.is_null() {
|
||||
return None;
|
||||
}
|
||||
CStr::from_ptr(ptr)
|
||||
};
|
||||
str::from_utf8(c_str.to_bytes()).ok()
|
||||
// Perform some simple opinionated normalization. Full normalization might be better,
|
||||
// but also slower and more complex. This solution covers the most important cases.
|
||||
.map(|s| s.trim_left_matches('/').trim_left_matches("./").trim_right_matches('/'))
|
||||
}
|
||||
|
||||
pub fn size(&self) -> usize {
|
||||
unsafe { ffi::archive_entry_size(self.e) as usize }
|
||||
}
|
||||
|
||||
fn symlink(&self) -> Option<String> {
|
||||
let c_str: &CStr = unsafe {
|
||||
let ptr = ffi::archive_entry_symlink(self.e);
|
||||
if ptr.is_null() {
|
||||
return None;
|
||||
}
|
||||
CStr::from_ptr(ptr)
|
||||
};
|
||||
str::from_utf8(c_str.to_bytes()).map(str::to_string).ok()
|
||||
}
|
||||
|
||||
fn hardlink(&self) -> Option<String> {
|
||||
let c_str: &CStr = unsafe {
|
||||
let ptr = ffi::archive_entry_hardlink(self.e);
|
||||
if ptr.is_null() {
|
||||
return None;
|
||||
}
|
||||
CStr::from_ptr(ptr)
|
||||
};
|
||||
// Hard links have the same name as an earlier pathname(), and those typically don't have a
|
||||
// preceding slash. Add this slash here so that the same resolution logic can be used for
|
||||
// both hardlinks and symlinks. I really don't care about the difference between these two.
|
||||
str::from_utf8(c_str.to_bytes()).map(|p| format!("/{}", p)).ok()
|
||||
}
|
||||
|
||||
pub fn filetype(&self) -> FileType {
|
||||
// If it has a symlink/hardlink path, then just consider it a link regardless of what
|
||||
// _filetype() says.
|
||||
if let Some(l) = self.symlink().or(self.hardlink()) {
|
||||
return FileType::Link(l);
|
||||
}
|
||||
match unsafe { ffi::archive_entry_filetype(self.e) } {
|
||||
ffi::AE_IFDIR => FileType::Directory,
|
||||
ffi::AE_IFREG => FileType::File,
|
||||
_ => FileType::Other,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
impl<'a> Read for ArchiveEntry<'a> {
|
||||
fn read(&mut self, buf: &mut [u8]) -> Result<usize> {
|
||||
self.a.read(buf)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
impl<'a> Read for RawEntry<'a> {
|
||||
fn read(&mut self, buf: &mut [u8]) -> Result<usize> {
|
||||
self.0.read(buf)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// We can't provide an Iterator object for ArchiveEntries because Rust doesn't support streaming
|
||||
// iterators. Let's instead provide a walk function for convenience.
|
||||
// cb should return Ok(true) to continue, Ok(false) to break
|
||||
pub fn walk<F>(ent: Option<ArchiveEntry>, mut cb: F) -> Result<()>
|
||||
where F: FnMut(&mut ArchiveEntry) -> Result<bool>
|
||||
{
|
||||
let mut ent = ent;
|
||||
while let Some(mut e) = ent {
|
||||
if !try!(cb(&mut e)) {
|
||||
break;
|
||||
}
|
||||
ent = try!(e.next());
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use std;
|
||||
use std::io::Read;
|
||||
use std::fs::File;
|
||||
|
||||
#[test]
|
||||
fn invalid() {
|
||||
let mut r = std::io::repeat(0x0a).take(64*1024);
|
||||
let ent = Archive::open_archive(&mut r);
|
||||
assert!(ent.is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn zerolength() {
|
||||
let mut r = std::io::empty();
|
||||
{
|
||||
let ent = Archive::open_archive(&mut r);
|
||||
assert!(ent.unwrap().is_none());
|
||||
}
|
||||
{
|
||||
let mut ent = Archive::open_raw(&mut r).unwrap();
|
||||
let mut v = Vec::new();
|
||||
assert_eq!(ent.read_to_end(&mut v).unwrap(), 0);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn archive() {
|
||||
let mut f = File::open("tests/simpletest.tar.gz").unwrap();
|
||||
let mut ent = Archive::open_archive(&mut f).unwrap().unwrap();
|
||||
|
||||
let t = |e:&mut ArchiveEntry, path, size, ft, cont| {
|
||||
assert_eq!(e.path(), path);
|
||||
assert_eq!(e.size(), size);
|
||||
assert_eq!(e.filetype(), ft);
|
||||
let mut contents = String::new();
|
||||
assert_eq!(e.read_to_string(&mut contents).unwrap(), size);
|
||||
assert_eq!(&contents, cont);
|
||||
};
|
||||
|
||||
t(&mut ent, Some("simple"), 0, FileType::Directory, "");
|
||||
|
||||
ent = ent.next().unwrap().unwrap();
|
||||
t(&mut ent, Some("simple/file"), 3, FileType::File, "Hi\n");
|
||||
|
||||
ent = ent.next().unwrap().unwrap();
|
||||
t(&mut ent, Some("simple/link"), 0, FileType::Link("file".to_string()), "");
|
||||
|
||||
ent = ent.next().unwrap().unwrap();
|
||||
t(&mut ent, Some("simple/hardlink"), 0, FileType::Link("/simple/file".to_string()), "");
|
||||
|
||||
ent = ent.next().unwrap().unwrap();
|
||||
t(&mut ent, Some("simple/fifo"), 0, FileType::Other, "");
|
||||
|
||||
ent = ent.next().unwrap().unwrap();
|
||||
t(&mut ent, None, 0, FileType::File, "");
|
||||
|
||||
assert!(ent.next().unwrap().is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn raw() {
|
||||
let mut f = File::open("tests/rawtest.gz.xz.bzip2").unwrap();
|
||||
let mut r = Archive::open_raw(&mut f).unwrap();
|
||||
let mut c = String::new();
|
||||
r.read_to_string(&mut c).unwrap();
|
||||
assert_eq!(&c, "File contents!\n");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn raw_passthrough() {
|
||||
let mut r = std::io::Cursor::new(&b"This is an uncompressed text file"[..]);
|
||||
let mut ent = Archive::open_raw(&mut r).unwrap();
|
||||
let mut s = String::new();
|
||||
ent.read_to_string(&mut s).unwrap();
|
||||
assert_eq!(&s, "This is an uncompressed text file");
|
||||
}
|
||||
}
|
||||
363
indexer/src/archread.rs
Normal file
363
indexer/src/archread.rs
Normal file
|
|
@ -0,0 +1,363 @@
|
|||
use std::io::Result;
|
||||
use std::collections::HashMap;
|
||||
|
||||
use archive::{walk,ArchiveEntry,FileType};
|
||||
|
||||
/* I had hoped that reading man pages from an archive would just be a simple:
|
||||
*
|
||||
* 1. Walk through all files in the archive in a streaming fashion
|
||||
* 2. Parse/index man pages
|
||||
*
|
||||
* But alas, it was not to be. Symlinks and hardlinks have ruined it. Now we have to...
|
||||
*
|
||||
* 1. Walk through all entries in the archive in a streaming fashion
|
||||
* 2. Parse/index regular file man pages
|
||||
* 3. Keep track of all paths in the archive
|
||||
* 4. Use the result of step (3) to resolve symlinks/hardlinks to their actual file
|
||||
* 5. Read the entire damn archive again if one of the links resolved to a file that was not
|
||||
* recognized as a man page in step (2). Luckily, this isn't very common.
|
||||
*
|
||||
* And this doesn't even cover the problem of duplicate entries in a tar, which is also quite
|
||||
* annoying to handle.
|
||||
*
|
||||
* What annoys me the most about all of this is that it's not possible to stream an archive from
|
||||
* the network and read/index the entire thing in a single step. Now we either have to buffer
|
||||
* packages to disk or redownload the archive in order to be able to follow all links to man pages.
|
||||
*
|
||||
* (Note that it is possible to resolve links while walking through the entries, which will allow
|
||||
* us to match files found later in the archive against links found earlier, thus potentially
|
||||
* saving the need to read the archive a second time. This is merely a performance improvement for
|
||||
* an uncommon case, and it certainly won't simplify the code)
|
||||
*
|
||||
* (Note that it's also possible to just flush all files <10MB* to disk to completely avoid the
|
||||
* need for a second archive read, but that's going to significantly slow down the common case in
|
||||
* order to handle a rare case. It's possible to further optimize this using some heuristics to
|
||||
* determine whether a file is potentially a man page, but that's both complex and may not even
|
||||
* save much)
|
||||
*
|
||||
* (* So apparently some man pages are close to 10MB...)
|
||||
*/
|
||||
|
||||
|
||||
#[derive(Clone,Debug,PartialEq,Eq)]
|
||||
pub enum EntryType {
|
||||
// Regular file that has been handled/indexed
|
||||
Handled,
|
||||
// Regular file that hasn't been handled because the caller wasn't interested in it. Could
|
||||
// still be an interesting file if it is referenced from an interesting path.
|
||||
Regular,
|
||||
// Link to another file (interesting or not is irrelevant)
|
||||
Link(String),
|
||||
// Directory; need this information when resolving links
|
||||
Directory,
|
||||
// Something that couldn't be an interesting file (chardev/socket/etc); If any link resolves to
|
||||
// this we know we're done.
|
||||
Other,
|
||||
}
|
||||
|
||||
pub struct FileList {
|
||||
// List of seen files. This is used to resolve links
|
||||
seen: HashMap<String, EntryType>,
|
||||
// List of interesting links
|
||||
links: Vec<String>,
|
||||
}
|
||||
|
||||
pub struct MissedFiles(HashMap<String, Vec<String>>);
|
||||
|
||||
|
||||
impl FileList {
|
||||
|
||||
/* Read an archive until the end. Accepts two callbacks:
|
||||
*
|
||||
* interest_cb: Called on every path in the archive, should return whether the file is
|
||||
* interesting (i.e. whether we want to know its contents).
|
||||
* file_cb: Called on every regular file for which interest_cb() showed an interest.
|
||||
* The callback accepts multiple path names, but this function will only provide one.
|
||||
*
|
||||
* Returns a FileList struct that can be used to retreive all interesting non-regular files.
|
||||
*/
|
||||
pub fn read<F,G>(ent: Option<ArchiveEntry>, interest_cb: F, mut file_cb: G) -> Result<FileList>
|
||||
where F: Fn(&str) -> bool, G: FnMut(&[&str], &mut ArchiveEntry) -> Result<()>
|
||||
{
|
||||
let mut fl = FileList {
|
||||
seen: HashMap::new(),
|
||||
links: Vec::new(),
|
||||
};
|
||||
|
||||
try!(walk(ent, |mut e| {
|
||||
let path = match e.path() {
|
||||
Some(x) => x.to_string(),
|
||||
None => { warn!("Invalid UTF-8 filename in archive"); return Ok(true) }
|
||||
};
|
||||
let ft = e.filetype();
|
||||
trace!("Archive entry: {:10} {} {:?}", e.size(), path, ft);
|
||||
|
||||
// We ought to throw away the result of the previous entry with the same name and use
|
||||
// this new entry instead, but fuck it. This case is too rare, so let's just warn.
|
||||
if let Some(_) = fl.seen.get(&path) {
|
||||
warn!("Duplicate file entry: {}", path);
|
||||
return Ok(true);
|
||||
}
|
||||
|
||||
let et = match ft {
|
||||
FileType::File => {
|
||||
if interest_cb(&path) {
|
||||
let pathv = [&path as &str];
|
||||
try!(file_cb(&pathv[..], &mut e));
|
||||
EntryType::Handled
|
||||
} else {
|
||||
EntryType::Regular
|
||||
}
|
||||
},
|
||||
FileType::Link(l) => {
|
||||
if interest_cb(&path) {
|
||||
fl.links.push(path.clone());
|
||||
}
|
||||
EntryType::Link(l)
|
||||
},
|
||||
FileType::Directory => EntryType::Directory,
|
||||
FileType::Other => EntryType::Other,
|
||||
};
|
||||
|
||||
fl.seen.insert(path, et);
|
||||
Ok(true)
|
||||
}));
|
||||
Ok(fl)
|
||||
}
|
||||
|
||||
|
||||
// This is basically realpath(), using the virtual filesystem in self.seen.
|
||||
// This method is not particularly efficient, it allocates like crazy.
|
||||
fn resolve_link(&self, base: &str, path: &str, depth: usize) -> Option<(EntryType, Vec<String>)> {
|
||||
if depth < 1 {
|
||||
warn!("Unresolved link: {} -> {}; Recursion depth exceeded", base, path);
|
||||
return None
|
||||
}
|
||||
|
||||
// Remove filename from the base
|
||||
let basedir = if let Some(i) = base.rfind('/') { base.split_at(i).0 } else { return None };
|
||||
|
||||
let comp : Vec<&str> =
|
||||
if path.starts_with('/') { path.split('/').collect() }
|
||||
else { basedir.split('/').chain(path.split('/')).collect() };
|
||||
|
||||
let mut dest = Vec::new();
|
||||
|
||||
for (i, &c) in comp.iter().enumerate() {
|
||||
if c == "" || c == "." {
|
||||
continue;
|
||||
}
|
||||
if c == ".." {
|
||||
if dest.len() > 1 {
|
||||
dest.pop();
|
||||
}
|
||||
continue;
|
||||
}
|
||||
dest.push(c.to_string());
|
||||
let curpath = dest.join("/");
|
||||
match self.seen.get(&curpath) {
|
||||
|
||||
// If it's a directory, we're good
|
||||
Some(&EntryType::Directory) => (),
|
||||
|
||||
// If it's a file or man page, it must be the last item.
|
||||
Some(& ref x@ EntryType::Regular) |
|
||||
Some(& ref x@ EntryType::Handled) => return
|
||||
if i == comp.len()-1 {
|
||||
Some((x.clone(), dest))
|
||||
} else {
|
||||
warn!("Unresolved link: {} -> {}; Non-directory component", base, path);
|
||||
None
|
||||
},
|
||||
|
||||
// Links... Ugh
|
||||
Some(&EntryType::Link(ref d)) => {
|
||||
match self.resolve_link(&curpath, &d, depth-1) {
|
||||
// Same as above, with dirs we can continue, files have to be last
|
||||
Some((EntryType::Directory, d)) => dest = d,
|
||||
x@Some((EntryType::Regular, _)) |
|
||||
x@Some((EntryType::Handled, _)) => return
|
||||
if i == comp.len()-1 { x }
|
||||
else {
|
||||
warn!("Unresolved link: {} -> {}; Non-directory link component", base, path);
|
||||
None
|
||||
},
|
||||
_ => return None,
|
||||
}
|
||||
},
|
||||
|
||||
// Don't care about anything else, just stop.
|
||||
_ => {
|
||||
warn!("Unresolved link: {} -> {}; Special or non-existing file", base, path);
|
||||
return None
|
||||
}
|
||||
}
|
||||
}
|
||||
Some((EntryType::Directory, dest))
|
||||
}
|
||||
|
||||
/* Calls cb() on every 'interesting' link to a file that has already been passed to a file_cb()
|
||||
* in FileList::read().
|
||||
* If there are any interesting links that have not yet been passed to file_cb(), a MissedFiles
|
||||
* struct is returned that can be used to retrieve those files by re-reading the archive.
|
||||
*/
|
||||
pub fn links<F>(self, mut cb: F) -> Option<MissedFiles> where F: FnMut(&str, &str) {
|
||||
let mut missed = HashMap::new();
|
||||
|
||||
for p in self.links.iter() {
|
||||
let dest = match self.seen.get(p) { Some(&EntryType::Link(ref x)) => x, _ => unreachable!() };
|
||||
|
||||
match self.resolve_link(&p, dest, 32) {
|
||||
Some((EntryType::Handled, d)) => {
|
||||
let dstr = d.join("/");
|
||||
cb(&p, &dstr);
|
||||
},
|
||||
Some((EntryType::Regular, d)) => {
|
||||
let dstr = d.join("/");
|
||||
missed.entry(dstr).or_insert_with(Vec::new).push(p.to_string());
|
||||
}
|
||||
_ => (),
|
||||
}
|
||||
}
|
||||
|
||||
if missed.len() > 0 {
|
||||
Some(MissedFiles(missed))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
impl MissedFiles {
|
||||
/* Reads the archive again and calls file_cb() on every interesting file that was missed during
|
||||
* the first read of the archive (using FileList::{read,links}). file_cb is exactly the same as
|
||||
* in FileList::read, but this time it can actually get multiple paths as first argument; which
|
||||
* happens when multiple interesting links point to the same file. */
|
||||
pub fn read<G>(mut self, ent: Option<ArchiveEntry>, mut file_cb: G) -> Result<()>
|
||||
where G: FnMut(&[&str], &mut ArchiveEntry) -> Result<()>
|
||||
{
|
||||
walk(ent, |mut e| {
|
||||
if let Some(f) = e.path().and_then(|p| self.0.remove(p)) {
|
||||
let v: Vec<&str> = f.iter().map(|x| x as &str).collect();
|
||||
try!(file_cb(&v, &mut e))
|
||||
}
|
||||
Ok(self.0.len() > 0)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use archive::Archive;
|
||||
use std::io::Read;
|
||||
use std::fs::File;
|
||||
|
||||
fn test_read() -> FileList {
|
||||
let mut f = File::open("tests/testarchive.tar.xz").unwrap();
|
||||
let arch = Archive::open_archive(&mut f).unwrap();
|
||||
let mut cnt = 0;
|
||||
FileList::read(arch,
|
||||
|p| p.starts_with("man/man"),
|
||||
|p,e| {
|
||||
assert_eq!(cnt, 0);
|
||||
cnt += 1;
|
||||
assert_eq!(p, &["man/man3/helloworld.3"][..]);
|
||||
assert_eq!(e.size(), 12);
|
||||
|
||||
let mut cont = String::new();
|
||||
e.read_to_string(&mut cont).unwrap();
|
||||
assert_eq!(&cont, "Hello World\n");
|
||||
Ok(())
|
||||
}
|
||||
).unwrap()
|
||||
}
|
||||
|
||||
fn test_resolve_links(r: &FileList) {
|
||||
let res = |p| {
|
||||
if let Some(&EntryType::Link(ref l)) = r.seen.get(p) {
|
||||
r.resolve_link(p, &l, 5)
|
||||
} else {
|
||||
panic!("Not found or not a link: {}", p);
|
||||
}
|
||||
};
|
||||
let helloworld = Some((EntryType::Handled, vec!["man".to_string(), "man3".to_string(), "helloworld.3".to_string()]));
|
||||
|
||||
assert_eq!(res("man/mans"), Some((EntryType::Directory, vec!["man".to_string(), "man3".to_string()])));
|
||||
assert_eq!(res("man/man6/hardlink.6"), helloworld);
|
||||
assert_eq!(res("man/man1/symlinkbefore.1"), helloworld);
|
||||
assert_eq!(res("man/man6/symlinkafter.6"), helloworld);
|
||||
|
||||
assert_eq!(res("man/man1/badsymlink1.1"), None);
|
||||
assert_eq!(res("man/man1/badsymlink2.1"), None);
|
||||
assert_eq!(res("man/man1/badsymlink3.1"), None);
|
||||
assert_eq!(res("man/man1/badsymlink4.1"), None);
|
||||
assert_eq!(res("man/man1/badsymlink5.1"), None);
|
||||
|
||||
assert_eq!(res("man/man1/doublesymlink1.1"), helloworld);
|
||||
assert_eq!(res("man/man1/doublesymlink2.1"), helloworld);
|
||||
assert_eq!(res("man/man1/triplesymlink.1"), helloworld);
|
||||
assert_eq!(res("man/man1/infinitesymlink.1"), None);
|
||||
}
|
||||
|
||||
fn test_links(r: FileList) -> Option<MissedFiles> {
|
||||
let mut links = Vec::new();
|
||||
let missed = r.links(|p,d| links.push((p.to_string(), d.to_string())));
|
||||
links.sort();
|
||||
|
||||
{
|
||||
let mut res = |p:&str| {
|
||||
let r = links.remove(0);
|
||||
assert_eq!(r.0, p.to_string());
|
||||
assert_eq!(r.1, "man/man3/helloworld.3".to_string());
|
||||
};
|
||||
res("man/man1/doublesymlink1.1");
|
||||
res("man/man1/doublesymlink2.1");
|
||||
res("man/man1/symlinkbefore.1");
|
||||
res("man/man1/triplesymlink.1");
|
||||
res("man/man6/hardlink.6");
|
||||
res("man/man6/symlinkafter.6");
|
||||
}
|
||||
assert_eq!(links.len(), 0);
|
||||
missed
|
||||
}
|
||||
|
||||
fn test_reread(r: MissedFiles) {
|
||||
let mut f = File::open("tests/testarchive.tar.xz").unwrap();
|
||||
let ent = Archive::open_archive(&mut f).unwrap();
|
||||
let mut files = Vec::new();
|
||||
r.read(ent,
|
||||
|p,e| {
|
||||
let mut cont = String::new();
|
||||
e.read_to_string(&mut cont).unwrap();
|
||||
files.extend(p.iter().map(|x| (x.to_string(), cont.clone()) ));
|
||||
Ok(())
|
||||
}
|
||||
).unwrap();
|
||||
files.sort();
|
||||
|
||||
{
|
||||
let mut res = |a:&str, b:&str| {
|
||||
let r = files.remove(0);
|
||||
assert_eq!(&r.0, a);
|
||||
assert_eq!(&r.1, b);
|
||||
};
|
||||
res("man/man3/needreread.3", "Potentially interesting file\n");
|
||||
res("man/man6/needreread.6", "Potentially interesting file\n");
|
||||
}
|
||||
assert_eq!(files.len(), 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_reader() {
|
||||
//use env_logger;
|
||||
//env_logger::init().unwrap();
|
||||
|
||||
let r = test_read();
|
||||
test_resolve_links(&r);
|
||||
let l = test_links(r).unwrap();
|
||||
test_reread(l);
|
||||
}
|
||||
}
|
||||
95
indexer/src/main.rs
Normal file
95
indexer/src/main.rs
Normal file
|
|
@ -0,0 +1,95 @@
|
|||
#[macro_use] extern crate log;
|
||||
#[macro_use] extern crate lazy_static;
|
||||
#[macro_use] extern crate clap;
|
||||
extern crate env_logger;
|
||||
extern crate regex;
|
||||
extern crate libarchive3_sys;
|
||||
extern crate libc;
|
||||
extern crate ring;
|
||||
extern crate encoding;
|
||||
extern crate postgres;
|
||||
extern crate hyper;
|
||||
extern crate url;
|
||||
extern crate chrono;
|
||||
|
||||
mod archive;
|
||||
mod archread;
|
||||
mod man;
|
||||
mod open;
|
||||
mod pkg;
|
||||
mod sys_arch;
|
||||
|
||||
|
||||
// Convenience function to get a system id by short-name. Panics if the system doesn't exist.
|
||||
fn sysbyshort(conn: &postgres::GenericConnection, short: &str) -> i32 {
|
||||
let r = conn.query("SELECT id FROM systems WHERE short = $1", &[&short]).unwrap();
|
||||
if r.is_empty() {
|
||||
panic!("Invalid system: {}", short);
|
||||
}
|
||||
r.get(0).get(0)
|
||||
}
|
||||
|
||||
|
||||
fn main() {
|
||||
let arg = clap_app!(indexer =>
|
||||
(about: "Manned.org man page indexer")
|
||||
(@arg v: -v +multiple "Increase verbosity")
|
||||
(@subcommand pkg =>
|
||||
(about: "Index a single package")
|
||||
(@arg force: --force "Overwrite existing indexed package")
|
||||
(@arg sys: --sys +required +takes_value "System short-name")
|
||||
(@arg cat: --cat +required +takes_value "Package category")
|
||||
(@arg pkg: --pkg +required +takes_value "Package name")
|
||||
(@arg ver: --ver +required +takes_value "Package version")
|
||||
(@arg date: --date +required +takes_value "Package release date")
|
||||
(@arg FILE: +required "Package file")
|
||||
)
|
||||
(@subcommand arch =>
|
||||
(about: "Index an Arch Linux repository")
|
||||
(@arg sys: --sys +required +takes_value "System short-name")
|
||||
(@arg mirror: --mirror +required +takes_value "Mirror URL")
|
||||
(@arg repo: --repo +required +takes_value "Repository name")
|
||||
)
|
||||
).get_matches();
|
||||
|
||||
let verbose = arg.occurrences_of("v");
|
||||
env_logger::LogBuilder::new()
|
||||
.filter(Some("indexer"), match verbose {
|
||||
0 => log::LogLevelFilter::Warn,
|
||||
1 => log::LogLevelFilter::Info,
|
||||
2 => log::LogLevelFilter::Debug,
|
||||
_ => log::LogLevelFilter::Trace,
|
||||
})
|
||||
.filter(Some("postgres"), if verbose >= 4 { log::LogLevelFilter::Trace } else { log::LogLevelFilter::Info })
|
||||
.init().unwrap();
|
||||
|
||||
let dbhost = match std::env::var("MANNED_PG") {
|
||||
Ok(x) => x,
|
||||
Err(_) => { error!("MANNED_PG not set."); return }
|
||||
};
|
||||
let db = match postgres::Connection::connect(&dbhost[..], postgres::TlsMode::None) {
|
||||
Ok(x) => x,
|
||||
Err(x) => { error!("Can't connect to postgres: {}", x); return },
|
||||
};
|
||||
debug!("Connected to database");
|
||||
|
||||
if let Some(matches) = arg.subcommand_matches("pkg") {
|
||||
pkg::pkg(&db, pkg::PkgOpt {
|
||||
force: matches.is_present("force"),
|
||||
sys: sysbyshort(&db, matches.value_of("sys").unwrap()),
|
||||
cat: matches.value_of("cat").unwrap(),
|
||||
pkg: matches.value_of("pkg").unwrap(),
|
||||
ver: matches.value_of("ver").unwrap(),
|
||||
date: matches.value_of("date").unwrap(),
|
||||
file: open::Path{ path: matches.value_of("FILE").unwrap(), cache: false, canbelocal: true},
|
||||
});
|
||||
}
|
||||
|
||||
if let Some(matches) = arg.subcommand_matches("arch") {
|
||||
sys_arch::sync(&db,
|
||||
sysbyshort(&db, matches.value_of("sys").unwrap()),
|
||||
matches.value_of("mirror").unwrap(),
|
||||
matches.value_of("repo").unwrap()
|
||||
);
|
||||
}
|
||||
}
|
||||
301
indexer/src/man.rs
Normal file
301
indexer/src/man.rs
Normal file
|
|
@ -0,0 +1,301 @@
|
|||
use std::str;
|
||||
use std::io;
|
||||
use std::io::Read;
|
||||
use regex::bytes;
|
||||
use regex::Regex;
|
||||
use encoding;
|
||||
use encoding::{all,EncodingRef};
|
||||
use encoding::label::encoding_from_whatwg_label;
|
||||
use ring::digest;
|
||||
|
||||
use archive::Archive;
|
||||
|
||||
// Anything larger than this just isn't a man page. I hope.
|
||||
const MAX_MAN_SIZE: u64 = 20*1024*1024;
|
||||
// I've also not seen valid man pages smaller than this
|
||||
const MIN_MAN_SIZE: u64 = 9;
|
||||
|
||||
|
||||
// Checks a path for a man page candidate. Returns None if it doesn't seem like a man page
|
||||
// location, otherwise Some((manPageName, Section, Locale)).
|
||||
pub fn parse_path(path: &str) -> Option<(&str, &str, &str)> {
|
||||
// Roughly: man[/locale]/man1/manpage.section[.compression]+
|
||||
lazy_static! {
|
||||
static ref RE: Regex = Regex::new(r"(?x)
|
||||
man
|
||||
(?: / ([^/]+) )? # Optional locale
|
||||
/man[a-z0-9]/ # Subdir
|
||||
([^/]+?) # Man page name (non-greedy)
|
||||
\. ([^/\.]+) # Section
|
||||
(?: \. (?: gz|lzma|bz2|xz ))* $ # Any number of compression extensions
|
||||
").unwrap();
|
||||
}
|
||||
|
||||
let cap = match RE.captures(path) { Some(x) => x, None => return None };
|
||||
let locale = cap.at(1).unwrap_or("");
|
||||
let name = cap.at(2).unwrap();
|
||||
let section = cap.at(3).unwrap();
|
||||
|
||||
// Not everything matching the regex is necessarily a man page, exclude some special cases.
|
||||
match (name, section, locale) {
|
||||
// Files that totally aren't man pages
|
||||
("Makefile", "am", _) |
|
||||
(".cvsignore", _, _) |
|
||||
(_, "in", _) |
|
||||
(_, "gz", _) |
|
||||
(_, "lzma", _) |
|
||||
(_, "bz2", _) |
|
||||
(_, "xz", _) |
|
||||
(_, "html", _) => None,
|
||||
// Some weird directories that happen to match the locale
|
||||
(n, s, "5man") |
|
||||
(n, s, "c") |
|
||||
(n, s, "man1") |
|
||||
(n, s, "man2") |
|
||||
(n, s, "man3") |
|
||||
(n, s, "man4") |
|
||||
(n, s, "man5") |
|
||||
(n, s, "man6") |
|
||||
(n, s, "man7") |
|
||||
(n, s, "man8") |
|
||||
(n, s, "Man-Part1") |
|
||||
(n, s, "Man-Part2") => Some((n, s, "")),
|
||||
// Nothing special!
|
||||
x => Some(x)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// Convenient wrapper for archread's interest_cb
|
||||
pub fn ismanpath(path: &str) -> bool {
|
||||
parse_path(path).is_some()
|
||||
}
|
||||
|
||||
|
||||
fn validate(data: &Vec<u8>) -> Option<&'static str> {
|
||||
lazy_static! {
|
||||
static ref HTML: bytes::Regex = bytes::Regex::new(r"^\s*<(?:html|head|!DOCTYPE)").unwrap();
|
||||
}
|
||||
|
||||
if data.len() >= MAX_MAN_SIZE as usize {
|
||||
Some("File too large")
|
||||
} else if data.len() < MIN_MAN_SIZE as usize {
|
||||
Some("File too small")
|
||||
} else if &data[..] == &b".so man3/\n"[..] {
|
||||
Some("Contents: '.so man3/'")
|
||||
} else if &data[..] == &b"timestamp\n"[..] {
|
||||
Some("Contents: 'timestamp'")
|
||||
} else if HTML.is_match(&data) {
|
||||
Some("Looks like an HTML file")
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// Look for 'coding:' indications in the file header, a la preconv(1).
|
||||
fn codec_from_tag(data: &Vec<u8>) -> Option<EncodingRef> {
|
||||
lazy_static! {
|
||||
// According to the emacs docs the tag should be on the first line; according to preconv(1)
|
||||
// it should be on the first or second line. I've also seen some files with the tag on the
|
||||
// last line. I've not seen the tag itself used in a different context, so just get it from
|
||||
// anywhere...
|
||||
static ref TAG: bytes::Regex = bytes::Regex::new(r"-\*-.*coding:\s*(?u:([^\s;]+)).*-\*").unwrap();
|
||||
}
|
||||
let cap = match TAG.captures(&data) { Some(x) => x, None => return None };
|
||||
let tag = str::from_utf8(cap.at(1).unwrap()).unwrap().to_lowercase();
|
||||
|
||||
match &tag[..] {
|
||||
// Deny some common UTF-8-compatible encodings. These tags are obviously incorrect.
|
||||
"us-ascii" | "ascii" | "utf8" | "utf-8" | "utf-8-unix" => None,
|
||||
|
||||
// latin-1 isn't in the whatwg spec under that name
|
||||
"latin-1" => Some(all::WINDOWS_1252),
|
||||
|
||||
// armscii isn't in the whatwg spec at all
|
||||
"armscii-8" => Some(all::ARMSCII_8),
|
||||
|
||||
// Anything else should be found by its whatwg label.
|
||||
x => match encoding_from_whatwg_label(x) {
|
||||
Some(x) => Some(x),
|
||||
None => { warn!("Unknown encoding in emacs tag: {}", x); None },
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
fn codec_from_path(path: &str) -> Option<EncodingRef> {
|
||||
let locale = match parse_path(path) {
|
||||
Some((_,_,l)) if l != "" => l.to_lowercase(),
|
||||
_ => return None,
|
||||
};
|
||||
|
||||
lazy_static! {
|
||||
static ref RE: Regex = Regex::new(r"^(?x)
|
||||
([a-z]+) # primary language
|
||||
(?:_ ([a-z]+))? # secondary language
|
||||
(?:@ [a-z]+)? # script (potentially useful, but uncommon and not currently used)
|
||||
(?:\. ([^\.@]+))? # encoding (FUCKING USEFUL)
|
||||
$").unwrap();
|
||||
}
|
||||
|
||||
let cap = match RE.captures(&locale) { Some(x) => x, None => return None };
|
||||
let lang = cap.at(1).unwrap();
|
||||
let seclang = cap.at(2);
|
||||
let enc = cap.at(3);
|
||||
|
||||
// Try to do something with the encoding tag
|
||||
match (lang, enc) {
|
||||
(_, Some("eucjp")) |
|
||||
(_, Some("ujis")) | // Not sure about this one, but it seems to come out alright
|
||||
("ja", Some("euc")) => return Some(all::EUC_JP),
|
||||
|
||||
(_, Some("euckr")) => return Some(all::WINDOWS_949),
|
||||
|
||||
("ja", Some("jis7")) |
|
||||
("ja", Some("pck")) => return None, /* WAT? TODO: DO SOMETHING WITH THESE */
|
||||
|
||||
(_, Some(x)) => match encoding_from_whatwg_label(x) {
|
||||
Some(x) => return Some(x),
|
||||
_ => { warn!("Unknown encoding in locale: {}", x) },
|
||||
},
|
||||
_ => {},
|
||||
};
|
||||
|
||||
// Fall back to language
|
||||
match (lang, seclang) {
|
||||
("pl", _) |
|
||||
("cs", _) |
|
||||
("hr", _) |
|
||||
("hu", _) |
|
||||
("sl", _) |
|
||||
("sk", _) => Some(all::ISO_8859_2),
|
||||
("bg", _) |
|
||||
("be", _) |
|
||||
("uk", _) => Some(all::ISO_8859_5),
|
||||
("el", _) => Some(all::ISO_8859_7),
|
||||
("et", _) => Some(all::ISO_8859_15),
|
||||
("tr", _) => Some(all::WINDOWS_1254),
|
||||
("ru", _) => Some(all::KOI8_R),
|
||||
("ja", _) |
|
||||
("jp", _) => Some(all::EUC_JP), // Tricky; but JIS is certainly less common
|
||||
("zh", Some("cn")) => Some(all::GBK), // These are based purely on what I've observed,
|
||||
("zh", _) => Some(all::BIG5_2003), // perhaps some heuristics based on contents can do better
|
||||
("ko", _) => Some(all::WINDOWS_949),
|
||||
(_, _) => None,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// Decompresses / decodes a man page and returns its SHA-1 hash, encoding name, and UTF-8 contents.
|
||||
pub fn decode(paths: &[&str], ent: &mut Read) -> io::Result<(digest::Digest,&'static str,String)> {
|
||||
let mut decomp = try!(Archive::open_raw(ent)).take(MAX_MAN_SIZE+1);
|
||||
let mut data = Vec::new();
|
||||
try!(decomp.read_to_end(&mut data));
|
||||
|
||||
if let Some(e) = validate(&data) {
|
||||
return Err(io::Error::new(io::ErrorKind::InvalidData, e));
|
||||
}
|
||||
|
||||
let dig = digest::digest(&digest::SHA1, &data);
|
||||
|
||||
// TODO: Handle BOM? UTF-16?
|
||||
// If it passes as UTF-8, then just consider it UTF-8.
|
||||
if let Ok(_) = str::from_utf8(&data) {
|
||||
return Ok((dig, "utf8", unsafe { String::from_utf8_unchecked(data) } ));
|
||||
}
|
||||
// Otherwise, look for a coding tag in the contents
|
||||
if let Some(e) = codec_from_tag(&data) {
|
||||
if let Ok(s) = e.decode(&data, encoding::DecoderTrap::Strict) {
|
||||
return Ok((dig, e.name(), s));
|
||||
}
|
||||
}
|
||||
// If that fails as well, look for clues in the file path.
|
||||
for path in paths {
|
||||
if let Some(e) = codec_from_path(path) {
|
||||
if let Ok(s) = e.decode(&data, encoding::DecoderTrap::Strict) {
|
||||
return Ok((dig, e.name(), s));
|
||||
}
|
||||
}
|
||||
}
|
||||
// If all else fails, use a lossy iso-8859-1
|
||||
Ok((dig, "iso-8859-1", (all::ISO_8859_1 as EncodingRef).decode(&data, encoding::DecoderTrap::Ignore).unwrap() ))
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
#[test]
|
||||
fn test_parse_path() {
|
||||
// Generic tests
|
||||
assert_eq!(parse_path("/"), None);
|
||||
assert_eq!(parse_path("/man1/ncdu.1"), None);
|
||||
assert_eq!(parse_path("/man/man?/ncdu.1"), None);
|
||||
assert_eq!(parse_path("/man/man1/ncdu.1"), Some(("ncdu", "1", "")));
|
||||
assert_eq!(parse_path("/man/man1/ncdu.1.gz.lzma.xz.bz2.gz"), Some(("ncdu", "1", ""))); // This stuff happens
|
||||
assert_eq!(parse_path("/man/en_US.UTF-8/man1/ncdu.1"), Some(("ncdu", "1", "en_US.UTF-8")));
|
||||
|
||||
// Special cases
|
||||
assert_eq!(parse_path("/usr/share/man/man1/INDEX"), None);
|
||||
assert_eq!(parse_path("/usr/share/man/man1/Makefile"), None);
|
||||
assert_eq!(parse_path("/usr/share/man/man1/Makefile.am"), None);
|
||||
assert_eq!(parse_path("/usr/share/man/man1/Makefile.in"), None);
|
||||
assert_eq!(parse_path("/usr/share/man/man1/.cvsignore"), None);
|
||||
assert_eq!(parse_path("/usr/share/man/man1/.cvsignore.gz"), None);
|
||||
|
||||
// Some actual locations
|
||||
assert_eq!(parse_path("/usr/local/man/man1/list_audio_tracks.1.gz"), Some(("list_audio_tracks", "1", "")));
|
||||
assert_eq!(parse_path("/usr/local/lib/perl5/site_perl/man/man3/DBIx::Class::Helper::ResultSet::DateMethods1::Announcement.3.gz"), Some(("DBIx::Class::Helper::ResultSet::DateMethods1::Announcement", "3", "")));
|
||||
assert_eq!(parse_path("/usr/man/man3/exit.3tk"), Some(("exit", "3tk", "")));
|
||||
assert_eq!(parse_path("/usr/local/brlcad/share/man/mann/exit.nged.gz"), Some(("exit", "nged", "")));
|
||||
assert_eq!(parse_path("/usr/X11R6/man/man3/intro.3xglut.gz"), Some(("intro", "3xglut", "")));
|
||||
assert_eq!(parse_path("/usr/local/share/man/ko_KR.eucKR/man3/intro.3.gz"), Some(("intro", "3", "ko_KR.eucKR")));
|
||||
|
||||
assert_eq!(parse_path("/usr/lib/scilab/man/Man-Part1/man1/ans.1"), Some(("ans", "1", "")));
|
||||
assert_eq!(parse_path("/heirloom/usr/share/man/5man/man1/chgrp.1.gz"), Some(("chgrp", "1", "")));
|
||||
|
||||
assert_eq!(parse_path("/usr/local/plan9/man/man8/index.html"), None);
|
||||
assert_eq!(parse_path("/usr/local/share/doc/gmt/html/man/grdpaste.html"), None);
|
||||
}
|
||||
|
||||
|
||||
#[test]
|
||||
fn test_codec_from_path() {
|
||||
let t = |p,n| {
|
||||
assert_eq!(codec_from_path(p).unwrap().name(), n);
|
||||
};
|
||||
t("man/de_DE.ISO8859-15/man1/scribus.1.gz", "iso-8859-15");
|
||||
t("man/de_DE.ISO_8859-1/man1/scribus.1.gz", "windows-1252");
|
||||
t("man/ja.UTF-8/man1/test.1", "utf-8");
|
||||
t("man/ja_JP/man1/test.1", "euc-jp");
|
||||
t("man/ja_JP.EUC/man1/test.1", "euc-jp");
|
||||
t("man/ja_JP.SJIS/man1/test.1", "windows-31j");
|
||||
t("man/jp.eucJP/man1/test.1", "euc-jp");
|
||||
t("man/jp/man1/test.1", "euc-jp");
|
||||
t("man/lt.ISO8859-13/man1/test.1", "iso-8859-13");
|
||||
t("man/ru/man1/test.1", "koi8-r");
|
||||
t("man/ru_RU@Cyr/man1/test.1", "koi8-r");
|
||||
t("man/zh_CN/man1/test.1", "gbk");
|
||||
t("man/zh_TW/man1/test.1", "big5-2003");
|
||||
}
|
||||
|
||||
|
||||
#[test]
|
||||
fn test_decode_zh() {
|
||||
use std::fs::File;
|
||||
use ring::test::from_hex;
|
||||
|
||||
// cat exit.1.gz | lzma -d | gzip -d | sha1sum
|
||||
let filehash = from_hex("cdf9b3e8d96a83c908eb0a0c277485e2f3eebe87").unwrap();
|
||||
// cat exit.1.gz | lzma -d | gzip -d | iconv -f gbk -t utf8 | sha1sum
|
||||
let utf8hash = from_hex("47f3e441137b207c0abdc38adac692298da4927a").unwrap();
|
||||
|
||||
let mut f = File::open("tests/exit.3.gz.lzma").unwrap();
|
||||
let (dig, enc, s) = decode(&["bullshit", "/usr/share/man/zh_CN/man3/exit.3.gz"][..], &mut f).unwrap();
|
||||
|
||||
assert_eq!(dig.as_ref(), &filehash[..]);
|
||||
assert_eq!(enc, "gbk");
|
||||
|
||||
let utf8dig = digest::digest(&digest::SHA1, s.as_bytes());
|
||||
assert_eq!(utf8dig.as_ref(), &utf8hash[..]);
|
||||
}
|
||||
82
indexer/src/open.rs
Normal file
82
indexer/src/open.rs
Normal file
|
|
@ -0,0 +1,82 @@
|
|||
use std::io::{Read,Result,Error,ErrorKind,copy};
|
||||
use std::fs::{File,create_dir_all,metadata};
|
||||
use std::hash::{Hash,Hasher,SipHasher};
|
||||
use std::time::{Duration,SystemTime};
|
||||
use url::Url;
|
||||
use hyper;
|
||||
|
||||
|
||||
const CACHE_PATH: &'static str = "/var/tmp/manned-indexer";
|
||||
const CACHE_TIME: u64 = 24*3600;
|
||||
|
||||
|
||||
pub struct Path<'a> {
|
||||
pub path: &'a str,
|
||||
pub cache: bool,
|
||||
pub canbelocal: bool,
|
||||
}
|
||||
|
||||
|
||||
fn cache_fn(url: &Url) -> String {
|
||||
let name = url.path_segments().unwrap().last().unwrap();
|
||||
let name = if name == "" { "index" } else { name };
|
||||
|
||||
let mut hash = SipHasher::new();
|
||||
url.hash(&mut hash);
|
||||
format!("{}/{}-{}-{:x}", CACHE_PATH, url.host_str().unwrap(), name, hash.finish())
|
||||
}
|
||||
|
||||
|
||||
fn fetch(url: &str) -> Result<Box<Read>> {
|
||||
let res = try!(hyper::Client::new()
|
||||
.get(url)
|
||||
.header(hyper::header::UserAgent("Man page crawler (info@manned.org; https://manned.org/)".to_owned()))
|
||||
.send()
|
||||
.map_err(|e| Error::new(ErrorKind::Other, format!("Hyper: {}", e)))
|
||||
);
|
||||
if !res.status.is_success() {
|
||||
return Err(Error::new(ErrorKind::Other, format!("HTTP: {}", res.status) ));
|
||||
}
|
||||
Ok(Box::new(res) as Box<Read>)
|
||||
}
|
||||
|
||||
|
||||
fn file(path: &str) -> Result<Box<Read>> {
|
||||
Ok(Box::new(try!(File::open(path))) as Box<Read>)
|
||||
}
|
||||
|
||||
|
||||
impl<'a> Path<'a> {
|
||||
pub fn open(&self) -> Result<Box<Read>> {
|
||||
if let Ok(url) = Url::parse(self.path) {
|
||||
if url.scheme() != "http" && url.scheme() != "https" {
|
||||
return Err(Error::new(ErrorKind::Other, "Invalid scheme"));
|
||||
}
|
||||
|
||||
if self.cache {
|
||||
let cfn = cache_fn(&url);
|
||||
if let Ok(m) = metadata(&cfn) {
|
||||
if m.modified().unwrap() > SystemTime::now() - Duration::from_secs(CACHE_TIME) {
|
||||
return file(&cfn);
|
||||
}
|
||||
}
|
||||
try!(create_dir_all(CACHE_PATH));
|
||||
{
|
||||
let mut rd = try!(fetch(url.as_str()));
|
||||
let mut wr = try!(File::create(&cfn));
|
||||
try!(copy(&mut rd, &mut wr));
|
||||
}
|
||||
file(&cfn)
|
||||
|
||||
} else {
|
||||
fetch(url.as_str())
|
||||
}
|
||||
|
||||
} else if self.canbelocal {
|
||||
file(self.path)
|
||||
|
||||
} else {
|
||||
Err(Error::new(ErrorKind::Other, "Invalid URL"))
|
||||
}
|
||||
}
|
||||
}
|
||||
142
indexer/src/pkg.rs
Normal file
142
indexer/src/pkg.rs
Normal file
|
|
@ -0,0 +1,142 @@
|
|||
use std;
|
||||
use std::io::Read;
|
||||
use postgres;
|
||||
|
||||
use open;
|
||||
use archread;
|
||||
use man;
|
||||
use archive::{Archive,ArchiveEntry};
|
||||
|
||||
pub struct PkgOpt<'a> {
|
||||
pub force: bool,
|
||||
pub sys: i32,
|
||||
pub cat: &'a str,
|
||||
pub pkg: &'a str,
|
||||
pub ver: &'a str,
|
||||
pub date: &'a str, // TODO: Option to extract date from package metadata itself
|
||||
pub file: open::Path<'a>
|
||||
}
|
||||
|
||||
|
||||
fn insert_pkg(tr: &postgres::transaction::Transaction, opt: &PkgOpt) -> Option<i32> {
|
||||
// The ON CONFLICT .. DO UPDATE is used instead of DO NOTHING because in that case the
|
||||
// RETURNING clause wouldn't give us a package id.
|
||||
let q = "INSERT INTO packages (system, category, name) VALUES($1, $2, $3)
|
||||
ON CONFLICT ON CONSTRAINT packages_system_name_category_key DO UPDATE SET name=$3 RETURNING id";
|
||||
let pkgid: i32 = match tr.query(q, &[&opt.sys, &opt.cat, &opt.pkg]) {
|
||||
Err(e) => {
|
||||
error!("Can't insert package in database: {}", e);
|
||||
return None;
|
||||
},
|
||||
Ok(r) => r.get(0).get(0),
|
||||
};
|
||||
|
||||
let q = "SELECT id FROM package_versions WHERE package = $1 AND version = $2";
|
||||
let res = tr.query(q, &[&pkgid, &opt.ver]).unwrap();
|
||||
|
||||
let verid : i32;
|
||||
if res.is_empty() {
|
||||
let q = "INSERT INTO package_versions (package, version, released) VALUES($1, $2, $3::text::date) RETURNING id";
|
||||
verid = tr.query(q, &[&pkgid, &opt.ver, &opt.date]).unwrap().get(0).get(0);
|
||||
info!("New package pkgid {} verid {}", pkgid, verid);
|
||||
Some(verid)
|
||||
|
||||
} else if opt.force {
|
||||
verid = res.get(0).get(0);
|
||||
info!("Overwriting package pkgid {} verid {}", pkgid, verid);
|
||||
tr.query("DELETE FROM man WHERE package = $1", &[&verid]).unwrap();
|
||||
Some(verid)
|
||||
|
||||
} else {
|
||||
info!("Package already in database, pkgid {} verid {}", pkgid, res.get(0).get::<usize,i32>(0));
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
fn insert_man_row(tr: &postgres::GenericConnection, verid: i32, path: &str, hash: &[u8]) {
|
||||
// TODO: Store 'encoding' in the database
|
||||
let (name, sect, locale) = man::parse_path(path).unwrap();
|
||||
if let Err(e) = tr.execute(
|
||||
"INSERT INTO man (package, name, filename, locale, hash, section) VALUES ($1, $2, '/'||$3, $4, $5, $6)",
|
||||
&[&verid, &name, &path, &locale, &hash, §]
|
||||
) {
|
||||
// I think this can only happen if archread gives us the same file twice, which really
|
||||
// shouldn't happen. But I'd rather continue with an error logged than panic.
|
||||
error!("Can't insert verid {} fn {}: {}", verid, path, e);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
fn insert_man(tr: &postgres::GenericConnection, verid: i32, paths: &[&str], ent: &mut Read) {
|
||||
let (dig, enc, cont) = match man::decode(paths, ent) {
|
||||
Err(e) => { error!("Error decoding {}: {}", paths[0], e); return },
|
||||
Ok(x) => x,
|
||||
};
|
||||
|
||||
// Overwrite entry if the contents are different. It's possible that earlier decoding
|
||||
// implementations didn't properly detect the encoding. (On the other hand, due to differences
|
||||
// in filenames it's also possible that THIS decoding step went wrong, but that's slightly less
|
||||
// likely)
|
||||
tr.execute(
|
||||
"INSERT INTO contents (hash, content) VALUES($1, $2) ON CONFLICT (hash) DO UPDATE SET content = $2",
|
||||
&[&dig.as_ref(), &cont]
|
||||
).unwrap();
|
||||
|
||||
for path in paths {
|
||||
insert_man_row(tr, verid, path, dig.as_ref());
|
||||
debug!("Inserted man page: {} ({})", path, enc);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
fn insert_link(tr: &postgres::GenericConnection, verid: i32, src: &str, dest: &str) {
|
||||
let hash = tr.query("SELECT hash FROM man WHERE package = $1 AND filename = '/'||$2", &[&verid, &dest]).unwrap();
|
||||
if hash.is_empty() { /* Can happen if man::decode() failed previously. */
|
||||
error!("Link to unindexed man page: {} -> {}", src, dest);
|
||||
return;
|
||||
}
|
||||
let hash: Vec<u8> = hash.get(0).get(0);
|
||||
insert_man_row(tr, verid, src, &hash);
|
||||
debug!("Inserted man link: {} -> {}", src, dest);
|
||||
}
|
||||
|
||||
|
||||
fn index_pkg(tr: &postgres::GenericConnection, opt: &PkgOpt, verid: i32) -> std::io::Result<()> {
|
||||
let indexfunc = |paths: &[&str], ent: &mut ArchiveEntry| {
|
||||
insert_man(tr, verid, paths, ent);
|
||||
Ok(()) /* Don't propagate errors, continue handling other man pages */
|
||||
};
|
||||
|
||||
let mut rd = try!(opt.file.open());
|
||||
let missed = try!(archread::FileList::read(
|
||||
try!(Archive::open_archive(&mut rd)),
|
||||
man::ismanpath, &indexfunc))
|
||||
.links(|src, dest| { insert_link(tr, verid, src, dest) });
|
||||
|
||||
if let Some(missed) = missed {
|
||||
warn!("Some links were missed, reading package again");
|
||||
let mut rd = try!(opt.file.open());
|
||||
try!(missed.read(try!(Archive::open_archive(&mut rd)), indexfunc));
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
||||
pub fn pkg(conn: &postgres::GenericConnection, opt: PkgOpt) {
|
||||
info!("Handling pkg: {} / {} / {} - {} @ {} @ {}", opt.sys, opt.cat, opt.pkg, opt.ver, opt.date, opt.file.path);
|
||||
|
||||
let tr = conn.transaction().unwrap();
|
||||
tr.set_rollback();
|
||||
|
||||
let verid = match insert_pkg(&tr, &opt) { Some(x) => x, None => return };
|
||||
|
||||
match index_pkg(&tr, &opt, verid) {
|
||||
Err(e) => error!("Error reading package: {}", e),
|
||||
Ok(_) => tr.set_commit()
|
||||
}
|
||||
|
||||
if let Err(e) = tr.finish() {
|
||||
error!("Error finishing transaction: {}", e);
|
||||
}
|
||||
}
|
||||
128
indexer/src/sys_arch.rs
Normal file
128
indexer/src/sys_arch.rs
Normal file
|
|
@ -0,0 +1,128 @@
|
|||
use std::str::FromStr;
|
||||
use std::io::{Read,BufRead,BufReader,Result};
|
||||
use regex::Regex;
|
||||
use chrono::NaiveDateTime;
|
||||
use postgres;
|
||||
|
||||
use archive;
|
||||
use open;
|
||||
use man;
|
||||
use pkg;
|
||||
|
||||
|
||||
struct Meta {
|
||||
filename: String,
|
||||
name: String,
|
||||
version: String,
|
||||
date: String,
|
||||
}
|
||||
|
||||
|
||||
fn read_files<T: Read>(lst: T) -> Result<bool> {
|
||||
let rd = BufReader::new(lst);
|
||||
for line in rd.lines() {
|
||||
let line = try!(line);
|
||||
if man::ismanpath(&line) {
|
||||
return Ok(true);
|
||||
}
|
||||
}
|
||||
Ok(false)
|
||||
}
|
||||
|
||||
|
||||
fn read_desc(rd: &mut archive::ArchiveEntry) -> Result<Option<Meta>> {
|
||||
let mut data = String::new();
|
||||
try!(rd.take(64*1024).read_to_string(&mut data));
|
||||
|
||||
let path = rd.path().unwrap();
|
||||
lazy_static! {
|
||||
static ref RE: Regex = Regex::new(r"\s*%([^%]+)%\s*\n\s*([^\n]+)\s*\n").unwrap();
|
||||
}
|
||||
|
||||
let mut filename = None;
|
||||
let mut name = None;
|
||||
let mut version = None;
|
||||
let mut builddate = None;
|
||||
|
||||
for kv in RE.captures_iter(&data) {
|
||||
let key = kv.at(1).unwrap();
|
||||
let val = kv.at(2).unwrap();
|
||||
trace!("{}: {} = {}", path, key, val);
|
||||
match key {
|
||||
"FILENAME" => filename = Some(val),
|
||||
"NAME" => name = Some(val),
|
||||
"VERSION" => version = Some(val),
|
||||
"BUILDDATE" => builddate = i64::from_str(val).ok(),
|
||||
_ => {},
|
||||
}
|
||||
}
|
||||
|
||||
if filename.is_some() && name.is_some() && version.is_some() && builddate.is_some() {
|
||||
Ok(Some(Meta {
|
||||
filename: filename.unwrap().to_string(),
|
||||
name: name.unwrap().to_string(),
|
||||
version: version.unwrap().to_string(),
|
||||
date: NaiveDateTime::from_timestamp(builddate.unwrap(), 0).format("%Y-%m-%d").to_string(),
|
||||
}))
|
||||
} else {
|
||||
warn!("Metadata missing from package description: {}", path);
|
||||
Ok(None)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// TODO: Switch to x86_64 instead of i686
|
||||
pub fn sync(pg: &postgres::GenericConnection, sys: i32, mirror: &str, repo: &str) {
|
||||
info!("Reading packages from {} {}", mirror, repo);
|
||||
|
||||
let path = format!("{}/{}/os/i686/{1:}.files.tar.gz", mirror, repo);
|
||||
let path = open::Path{ path: &path, cache: true, canbelocal: false };
|
||||
let mut index = match path.open() {
|
||||
Err(e) => { error!("Can't read package index: {}", e); return },
|
||||
Ok(x) => x,
|
||||
};
|
||||
|
||||
let ent = match archive::Archive::open_archive(&mut index) {
|
||||
Err(e) => { error!("Can't read package index: {}", e); return },
|
||||
Ok(x) => x,
|
||||
};
|
||||
|
||||
let mut hasman = false;
|
||||
let mut meta = None;
|
||||
let r = archive::walk(ent, |x| {
|
||||
if x.filetype() == archive::FileType::Directory {
|
||||
hasman = false;
|
||||
meta = None;
|
||||
} else if x.path().unwrap().ends_with("/files") {
|
||||
hasman = try!(read_files(x));
|
||||
} else if x.path().unwrap().ends_with("/desc") {
|
||||
meta = try!(read_desc(x));
|
||||
}
|
||||
|
||||
if hasman && meta.is_some() {
|
||||
hasman = false;
|
||||
let m = meta.take().unwrap();
|
||||
|
||||
let p = format!("{}/{}/os/i686/{}", mirror, repo, m.filename);
|
||||
pkg::pkg(pg, pkg::PkgOpt{
|
||||
force: false,
|
||||
sys: sys,
|
||||
cat: repo,
|
||||
pkg: &m.name,
|
||||
ver: &m.version,
|
||||
date: &m.date,
|
||||
file: open::Path{
|
||||
path: &p,
|
||||
cache: false,
|
||||
canbelocal: false,
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
Ok(true)
|
||||
});
|
||||
|
||||
if let Err(e) = r {
|
||||
error!("Error reading package index: {}", e);
|
||||
}
|
||||
}
|
||||
BIN
indexer/tests/exit.3.gz.lzma
Normal file
BIN
indexer/tests/exit.3.gz.lzma
Normal file
Binary file not shown.
71
indexer/tests/mkarchives.sh
Executable file
71
indexer/tests/mkarchives.sh
Executable file
|
|
@ -0,0 +1,71 @@
|
|||
#!/bin/sh
|
||||
|
||||
# The order of inserting the files into the tar is not fully deterministic this
|
||||
# way. The tests will fail quite badly if a hardlink is considered the
|
||||
# "original" version.
|
||||
|
||||
|
||||
# simpletest.tar.gz
|
||||
|
||||
mkdir simple
|
||||
echo Hi >simple/file
|
||||
ln -s file simple/link
|
||||
ln simple/file simple/hardlink
|
||||
mkfifo simple/fifo
|
||||
badfn=`echo 'Héllö.txt' | iconv -t ISO-8859-1`
|
||||
touch $badfn
|
||||
tar -czf simpletest.tar.gz simple $badfn
|
||||
rm -rf $badfn simple
|
||||
|
||||
|
||||
|
||||
# rawtest.gz.xz.bzip2
|
||||
|
||||
echo "File contents!" | gzip | xz | bzip2 >rawtest.gz.xz.bzip2
|
||||
|
||||
|
||||
# testarchive.tar.xz
|
||||
|
||||
mkdir man
|
||||
cd man
|
||||
|
||||
mkdir man1
|
||||
mkdir man3
|
||||
mkdir man6
|
||||
ln -s man3 mans
|
||||
|
||||
echo 'Hello World' >man3/helloworld.3
|
||||
echo 'Not a very interesting file' >notinteresting
|
||||
echo 'Potentially interesting file' >possiblyinteresting
|
||||
|
||||
ln man3/helloworld.3 man6/hardlink.6
|
||||
|
||||
ln -s ../man3/helloworld.3 man1/symlinkbefore.1
|
||||
ln -s ../man3/helloworld.3 man6/symlinkafter.6
|
||||
|
||||
ln -s notadir/../../man3/helloworld.3 man1/badsymlink1.1
|
||||
ln -s man3/helloworld.3 man1/badsymlink2.1
|
||||
ln -s ../man3/helloworld.3/. man1/badsymlink3.1
|
||||
ln -s ../man3/helloworld.3/../helloworld.3 man1/badsymlink4.1
|
||||
ln -s ../man1/symlinkbefore.1/../../man1/helloworld.3 man1/badsymlink5.1
|
||||
|
||||
ln -s symlinkbefore.1 man1/doublesymlink1.1
|
||||
ln -s ../mans/helloworld.3 man1/doublesymlink2.1
|
||||
ln -s ../mans/../man1/symlinkbefore.1 man1/triplesymlink.1
|
||||
ln -s infinitesymlink.1 man1/infinitesymlink.1
|
||||
|
||||
ln -s ../possiblyinteresting man3/needreread.3
|
||||
ln -s ../possiblyinteresting man6/needreread.6
|
||||
|
||||
cd ..
|
||||
rm -f testarchive.tar
|
||||
tar -cf testarchive.tar man/
|
||||
rm -r man/
|
||||
|
||||
mkdir man
|
||||
echo 'Overwritten file' >man/possiblyinteresting
|
||||
tar -rf testarchive.tar man/
|
||||
rm -r man/
|
||||
|
||||
rm -f testarchive.tar.xz
|
||||
xz testarchive.tar
|
||||
BIN
indexer/tests/rawtest.gz.xz.bzip2
Normal file
BIN
indexer/tests/rawtest.gz.xz.bzip2
Normal file
Binary file not shown.
BIN
indexer/tests/simpletest.tar.gz
Normal file
BIN
indexer/tests/simpletest.tar.gz
Normal file
Binary file not shown.
BIN
indexer/tests/testarchive.tar.xz
Normal file
BIN
indexer/tests/testarchive.tar.xz
Normal file
Binary file not shown.
76
util/arch.sh
76
util/arch.sh
|
|
@ -1,76 +0,0 @@
|
|||
#!/bin/bash
|
||||
|
||||
# Usage: ./arch.sh
|
||||
# Synchronises the database with an Arch mirror, fetching any packages that
|
||||
# aren't yet in the database and may have man pages.
|
||||
|
||||
MIRROR=http://ftp.nluug.nl/pub/os/Linux/distr/archlinux
|
||||
REPOS="core extra community"
|
||||
DEBUG=false
|
||||
SYSID=1
|
||||
|
||||
. ./common.sh
|
||||
|
||||
|
||||
checkpkg() {
|
||||
REPO=$1
|
||||
FN=$2
|
||||
D="$TMP/$REPO/$FN"
|
||||
if [ ! \( -d "$D" -a -f "$D/files" -a -f "$D/desc" \) ]; then
|
||||
echo "===> $FN"
|
||||
echo "Invalid item, ignoring"
|
||||
return
|
||||
fi
|
||||
grep -q /man/ "$D/files"
|
||||
if [ "$?" -ne 0 ]; then
|
||||
$DEBUG && echo "===> $FN"
|
||||
$DEBUG && echo "No mans"
|
||||
return
|
||||
fi
|
||||
|
||||
# Somewhat inefficient description parsing
|
||||
FILENAME=`grep -A 1 '%FILENAME%' "$D/desc" | tail -n 1`
|
||||
NAME=`grep -A 1 '%NAME%' "$D/desc" | tail -n 1`
|
||||
VERSION=`grep -A 1 '%VERSION%' "$D/desc" | tail -n 1`
|
||||
BUILDDATE=`grep -A 1 '%BUILDDATE%' "$D/desc" | tail -n 1`
|
||||
if [ -z "$FILENAME" -o -z "$NAME" -o -z "$VERSION" -o -z "$BUILDDATE" ]; then
|
||||
echo "===> $FN"
|
||||
echo "Invalid/missing description info"
|
||||
return
|
||||
fi
|
||||
BUILDDATE=`date -d "@$BUILDDATE" '+%F'`
|
||||
|
||||
add_pkginfo $SYSID "$REPO" "$NAME" "$VERSION" "$BUILDDATE"
|
||||
if [ "$?" -eq 0 ]; then
|
||||
$DEBUG && echo "===> $FN"
|
||||
$DEBUG && echo "Already up-to-date"
|
||||
return
|
||||
fi
|
||||
|
||||
echo "===> $FN"
|
||||
F="$TMP/$REPO/$FILENAME"
|
||||
$CURL "$MIRROR/$REPO/os/i686/$FILENAME" -o "$F" || return
|
||||
add_tar "$F" "$PKGID"
|
||||
rm -f "$F"
|
||||
}
|
||||
|
||||
|
||||
syncrepo() {
|
||||
REPO=$1
|
||||
F="$TMP/$REPO/repo.tar.gz"
|
||||
echo "============ $MIRROR $REPO"
|
||||
$CURL "$MIRROR/$REPO/os/i686/$REPO.files.tar.gz" -o "$F" || return 1
|
||||
tar -C "$TMP/$REPO" -xf "$F" || return 1
|
||||
rm -f "$F"
|
||||
for fn in "$TMP/$REPO"/*; do
|
||||
checkpkg "$REPO" `basename "$fn"`
|
||||
done
|
||||
}
|
||||
|
||||
|
||||
for r in $REPOS; do
|
||||
mkdir "$TMP/$r"
|
||||
syncrepo $r
|
||||
rm -rf "$TMP/$r"
|
||||
done
|
||||
|
||||
|
|
@ -2,7 +2,7 @@
|
|||
|
||||
. ./common.sh
|
||||
|
||||
./arch.sh
|
||||
./index.sh daily
|
||||
./deb.sh ubuntu_active
|
||||
./deb.sh debian_active
|
||||
echo "============ Updating SQL indices"
|
||||
|
|
|
|||
22
util/index.sh
Executable file
22
util/index.sh
Executable file
|
|
@ -0,0 +1,22 @@
|
|||
if test -f .config; then
|
||||
source .config
|
||||
fi
|
||||
|
||||
INDEX="./indexer -vv"
|
||||
|
||||
set -x
|
||||
|
||||
arch() {
|
||||
local MIRROR=http://ftp.nluug.nl/pub/os/Linux/distr/archlinux
|
||||
local REPOS="core extra community"
|
||||
for REPO in $REPOS; do
|
||||
$INDEX arch --sys arch --mirror $MIRROR --repo $REPO
|
||||
done
|
||||
}
|
||||
|
||||
|
||||
daily() {
|
||||
arch
|
||||
}
|
||||
|
||||
$@
|
||||
1
util/indexer
Symbolic link
1
util/indexer
Symbolic link
|
|
@ -0,0 +1 @@
|
|||
../indexer/target/release/indexer
|
||||
Loading…
Add table
Add a link
Reference in a new issue