Skip to content

Commit d408641

Browse files
authored
Merge pull request #37 from octu0/v1.19.3
v1.19.3
2 parents ec681a0 + e144526 commit d408641

239 files changed

Lines changed: 989 additions & 271 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

Dockerfile.generator

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,10 @@ WORKDIR /halide
55
RUN set -eux && \
66
apt update && \
77
apt install -y wget clang g++ binutils libpng-dev libjpeg-dev && \
8-
wget https://github.com/halide/Halide/releases/download/v13.0.1/Halide-13.0.1-x86-64-linux-fb39d7e3149c0ee1e848bb9957be2ea18765c35d.tar.gz && \
9-
tar xzf Halide-13.0.1-x86-64-linux-fb39d7e3149c0ee1e848bb9957be2ea18765c35d.tar.gz && \
10-
rm Halide-13.0.1-x86-64-linux-fb39d7e3149c0ee1e848bb9957be2ea18765c35d.tar.gz && \
11-
mv Halide-13.0.1-x86-64-linux Halide-Runtime
8+
wget https://github.com/halide/Halide/releases/download/v14.0.0/Halide-14.0.0-x86-64-linux-6b9ed2afd1d6d0badf04986602c943e287d44e46.tar.gz && \
9+
tar xzf Halide-14.0.0-x86-64-linux-6b9ed2afd1d6d0badf04986602c943e287d44e46.tar.gz && \
10+
rm Halide-14.0.0-x86-64-linux-6b9ed2afd1d6d0badf04986602c943e287d44e46.tar.gz && \
11+
mv Halide-14.0.0-x86-64-linux Halide-Runtime
1212

1313
COPY docker-entrypoint.generator.sh /usr/local/bin/docker-entrypoint.generator.sh
1414
ENTRYPOINT [ "docker-entrypoint.generator.sh" ]

Makefile

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ _NAME = $(shell grep -o 'AppName string = "[^"]*"' $(VERSION_GO) | cut -d
44
_VERSION = $(shell grep -oE 'Version string = "[0-9]+\.[0-9]+\.[0-9]+"' $(VERSION_GO) | cut -d '"' -f2)
55

66
_HALIDE = "generator"
7-
_HALIDE_VER = "13.0.1"
7+
_HALIDE_VER = "14.0.0"
88

99
.PHONY: vet
1010
vet:
@@ -27,21 +27,21 @@ setup-halide-runtime_linux:
2727
ifeq ($(shell [ -d detector/Halide-Runtime ] && echo "1"),1)
2828
@echo "detector/Halide-Runtime exists"
2929
else
30-
curl -O -sSL https://github.com/halide/Halide/releases/download/v13.0.1/Halide-13.0.1-x86-64-linux-fb39d7e3149c0ee1e848bb9957be2ea18765c35d.tar.gz
31-
tar xzf Halide-13.0.1-x86-64-linux-fb39d7e3149c0ee1e848bb9957be2ea18765c35d.tar.gz
32-
mv Halide-13.0.1-x86-64-linux detector/Halide-Runtime
33-
rm Halide-13.0.1-x86-64-linux-fb39d7e3149c0ee1e848bb9957be2ea18765c35d.tar.gz
30+
curl -O -sSL https://github.com/halide/Halide/releases/download/v14.0.0/Halide-14.0.0-x86-64-linux-6b9ed2afd1d6d0badf04986602c943e287d44e46.tar.gz
31+
tar xzf Halide-14.0.0-x86-64-linux-6b9ed2afd1d6d0badf04986602c943e287d44e46.tar.gz
32+
mv Halide-14.0.0-x86-64-linux detector/Halide-Runtime
33+
rm Halide-14.0.0-x86-64-linux-6b9ed2afd1d6d0badf04986602c943e287d44e46.tar.gz
3434
endif
3535

3636
.PHONY: setup-halide-runtime_darwin
3737
setup-halide-runtime_darwin:
3838
ifeq ($(shell [ -d detector/Halide-Runtime ] && echo "1"),1)
3939
@echo "detector/Halide-Runtime exists"
4040
else
41-
curl -O -sSL https://github.com/halide/Halide/releases/download/v13.0.1/Halide-13.0.1-x86-64-osx-fb39d7e3149c0ee1e848bb9957be2ea18765c35d.tar.gz
42-
tar xzf Halide-13.0.1-x86-64-osx-fb39d7e3149c0ee1e848bb9957be2ea18765c35d.tar.gz
43-
mv Halide-13.0.1-x86-64-osx ./Halide-Runtime
44-
rm Halide-13.0.1-x86-64-osx-fb39d7e3149c0ee1e848bb9957be2ea18765c35d.tar.gz
41+
curl -O -sSL https://github.com/halide/Halide/releases/download/v14.0.0/Halide-14.0.0-x86-64-osx-6b9ed2afd1d6d0badf04986602c943e287d44e46.tar.gz
42+
tar xzf Halide-14.0.0-x86-64-osx-6b9ed2afd1d6d0badf04986602c943e287d44e46.tar.gz
43+
mv Halide-14.0.0-x86-64-osx ./Halide-Runtime
44+
rm Halide-14.0.0-x86-64-osx-6b9ed2afd1d6d0badf04986602c943e287d44e46.tar.gz
4545
endif
4646

4747
.PHONY: setup-halide-runtime

README.md

Lines changed: 51 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -19,57 +19,57 @@ darwin/amd64 Intel(R) Core(TM) i7-8569U CPU @ 2.80GHz
1919

2020
```
2121
src 320x240
22-
BenchmarkJIT/cloneimg : 0.00752ms
23-
BenchmarkJIT/convert_from_argb : 0.02369ms
24-
BenchmarkJIT/convert_from_abgr : 0.03711ms
25-
BenchmarkJIT/convert_from_bgra : 0.02472ms
26-
BenchmarkJIT/convert_from_rabg : 0.03139ms
27-
BenchmarkJIT/convert_from_yuv_420 : 0.02957ms
28-
BenchmarkJIT/convert_from_yuv_444 : 0.02586ms
29-
BenchmarkJIT/convert_to_yuv_420 : 0.05634ms
30-
BenchmarkJIT/convert_to_yuv_444 : 0.06963ms
31-
BenchmarkJIT/rotate0 : 0.00739ms
32-
BenchmarkJIT/rotate90 : 0.02580ms
33-
BenchmarkJIT/rotate180 : 0.00746ms
34-
BenchmarkJIT/rotate270 : 0.02557ms
35-
BenchmarkJIT/crop : 0.06071ms
36-
BenchmarkJIT/scale : 0.14003ms
37-
BenchmarkJIT/scale_box : 0.19936ms
38-
BenchmarkJIT/scale_linear : 0.19754ms
39-
BenchmarkJIT/scale_gaussian : 0.22766ms
40-
BenchmarkJIT/blend_normal : 0.08383ms
41-
BenchmarkJIT/blend_sub : 0.08447ms
42-
BenchmarkJIT/blend_add : 0.08394ms
43-
BenchmarkJIT/blend_diff : 0.08423ms
44-
BenchmarkJIT/grayscale : 0.03839ms
45-
BenchmarkJIT/invert : 0.04330ms
46-
BenchmarkJIT/brightness : 0.04931ms
47-
BenchmarkJIT/gammacorrection : 0.08158ms
48-
BenchmarkJIT/contrast : 0.01506ms
49-
BenchmarkJIT/boxblur : 0.12091ms
50-
BenchmarkJIT/gaussianblur : 0.32293ms
51-
BenchmarkJIT/blockmozaic : 0.27398ms
52-
BenchmarkJIT/erosion : 0.12039ms
53-
BenchmarkJIT/dilation : 0.12439ms
54-
BenchmarkJIT/morphology_open : 0.10255ms
55-
BenchmarkJIT/morphology_close : 0.10472ms
56-
BenchmarkJIT/morphology_gradient : 0.08321ms
57-
BenchmarkJIT/emboss$1 : 0.05385ms
58-
BenchmarkJIT/laplacian : 0.03204ms
59-
BenchmarkJIT/highpass : 0.03783ms
60-
BenchmarkJIT/gradient : 0.03303ms
61-
BenchmarkJIT/edgedetect : 0.02638ms
62-
BenchmarkJIT/sobel : 0.06399ms
63-
BenchmarkJIT/canny : 0.29472ms
64-
BenchmarkJIT/canny_dilate : 0.36258ms
65-
BenchmarkJIT/canny_morphology_open : 0.39542ms
66-
BenchmarkJIT/canny_morphology_close : 0.40479ms
67-
BenchmarkJIT/match_template_sad : 6.64854ms
68-
BenchmarkJIT/match_template_ssd : 4.76639ms
69-
BenchmarkJIT/match_template_ncc : 9.37937ms
70-
BenchmarkJIT/prepared_match_template_ncc : 6.85107ms
71-
BenchmarkJIT/match_template_zncc : 13.29085ms
72-
BenchmarkJIT/prepared_match_template_zncc : 12.07535ms
22+
BenchmarkJIT/cloneimg : 0.00767ms
23+
BenchmarkJIT/convert_from_argb : 0.02328ms
24+
BenchmarkJIT/convert_from_abgr : 0.03573ms
25+
BenchmarkJIT/convert_from_bgra : 0.02430ms
26+
BenchmarkJIT/convert_from_rabg : 0.03159ms
27+
BenchmarkJIT/convert_from_yuv_420 : 0.02964ms
28+
BenchmarkJIT/convert_from_yuv_444 : 0.02652ms
29+
BenchmarkJIT/convert_to_yuv_420 : 0.05654ms
30+
BenchmarkJIT/convert_to_yuv_444 : 0.07356ms
31+
BenchmarkJIT/rotate0 : 0.00828ms
32+
BenchmarkJIT/rotate90 : 0.02600ms
33+
BenchmarkJIT/rotate180 : 0.00792ms
34+
BenchmarkJIT/rotate270 : 0.02560ms
35+
BenchmarkJIT/crop : 0.06128ms
36+
BenchmarkJIT/scale : 0.13941ms
37+
BenchmarkJIT/scale_box : 0.20701ms
38+
BenchmarkJIT/scale_linear : 0.20637ms
39+
BenchmarkJIT/scale_gaussian : 0.31937ms
40+
BenchmarkJIT/blend_normal : 0.09480ms
41+
BenchmarkJIT/blend_sub : 0.08381ms
42+
BenchmarkJIT/blend_add : 0.08439ms
43+
BenchmarkJIT/blend_diff : 0.08445ms
44+
BenchmarkJIT/grayscale : 0.03752ms
45+
BenchmarkJIT/invert : 0.03684ms
46+
BenchmarkJIT/brightness : 0.04142ms
47+
BenchmarkJIT/gammacorrection : 0.07600ms
48+
BenchmarkJIT/contrast : 0.01532ms
49+
BenchmarkJIT/boxblur : 0.10674ms
50+
BenchmarkJIT/gaussianblur : 0.31972ms
51+
BenchmarkJIT/blockmozaic : 0.27346ms
52+
BenchmarkJIT/erosion : 0.11407ms
53+
BenchmarkJIT/dilation : 0.11997ms
54+
BenchmarkJIT/morphology_open : 0.14157ms
55+
BenchmarkJIT/morphology_close : 0.10427ms
56+
BenchmarkJIT/morphology_gradient : 0.07612ms
57+
BenchmarkJIT/emboss : 0.06083ms
58+
BenchmarkJIT/laplacian : 0.04251ms
59+
BenchmarkJIT/highpass : 0.03843ms
60+
BenchmarkJIT/gradient : 0.03320ms
61+
BenchmarkJIT/edgedetect : 0.02701ms
62+
BenchmarkJIT/sobel : 0.06392ms
63+
BenchmarkJIT/canny : 0.28839ms
64+
BenchmarkJIT/canny_dilate : 0.34880ms
65+
BenchmarkJIT/canny_morphology_open : 0.38943ms
66+
BenchmarkJIT/canny_morphology_close : 0.39011ms
67+
BenchmarkJIT/match_template_sad : 5.69188ms
68+
BenchmarkJIT/match_template_ssd : 4.75666ms
69+
BenchmarkJIT/match_template_ncc : 8.98426ms
70+
BenchmarkJIT/prepared_match_template_ncc : 6.23328ms
71+
BenchmarkJIT/match_template_zncc : 12.64066ms
72+
BenchmarkJIT/prepared_match_template_zncc : 11.67131ms
7373
```
7474

7575
## AOT benchmarks

blurry.cpp

Lines changed: 33 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -608,7 +608,8 @@ Func filter2d_gray(
608608
);
609609

610610
conv.compute_at(gradient, yi)
611-
.vectorize(x);
611+
.vectorize(x)
612+
.update(0).unscheduled();
612613

613614
gradient.compute_at(in, ti)
614615
.tile(x, y, xo, yo, xi, yi, 32, 32)
@@ -707,7 +708,8 @@ Func gaussian(Func in, Expr sigma, RDom rd, const char *name) {
707708
gaussian(x, y) += cast<uint8_t>(val / center_val);
708709

709710
sum_kernel.compute_at(gaussian, y)
710-
.vectorize(x);
711+
.vectorize(x)
712+
.update(0).unscheduled();
711713
return gaussian;
712714
}
713715

@@ -781,11 +783,14 @@ Func canny(Func in, Param<int32_t> threshold_max, Param<int32_t> threshold_min)
781783

782784
gauss.compute_at(hy, ti)
783785
.vectorize(y)
784-
.vectorize(x);
786+
.vectorize(x)
787+
.update(0).unscheduled();
785788
gy.compute_at(hy, ti)
786-
.vectorize(x);
789+
.vectorize(x)
790+
.update(0).unscheduled();
787791
gx.compute_at(hy, ti)
788-
.vectorize(x);
792+
.vectorize(x)
793+
.update(0).unscheduled();
789794

790795
nms.compute_at(hy, ti)
791796
.vectorize(x);
@@ -1971,10 +1976,12 @@ Func sobel_fn(Func input, Param<int32_t> width, Param<int32_t> height){
19711976

19721977
gy.compute_at(sobel, yi)
19731978
.parallel(y)
1974-
.vectorize(x);
1979+
.vectorize(x)
1980+
.update(0).unscheduled();
19751981
gx.compute_at(sobel, yi)
19761982
.parallel(y)
1977-
.vectorize(x);
1983+
.vectorize(x)
1984+
.update(0).unscheduled();
19781985

19791986
sobel.compute_at(in, ti)
19801987
.tile(x, y, xo, yo, xi, yi, 32, 32)
@@ -2119,7 +2126,8 @@ Func emboss_fn(Func input, Param<int32_t> width, Param<int32_t> height){
21192126
);
21202127

21212128
conv.compute_at(emboss, yi)
2122-
.vectorize(x);
2129+
.vectorize(x)
2130+
.update(0).unscheduled();
21232131

21242132
emboss.compute_at(in, ti)
21252133
.tile(x, y, xo, yo, xi, yi, 32, 32)
@@ -2552,7 +2560,7 @@ Func linearsum_xy(Func in, Expr size, Expr xfactor, Expr yfactor) {
25522560
Var x("x"), y("y");
25532561

25542562
RDom rd = RDom(0, size, "rd_linearsum");
2555-
Func f = Func("linearsum");
2563+
Func f = Func("linearsum_xy");
25562564
f(x, y) += in(x + (rd * xfactor), y + (rd * yfactor));
25572565
return f;
25582566
}
@@ -2869,28 +2877,36 @@ Func contour_line(Func binary_input, Expr width, Expr height, Expr size) {
28692877

28702878
next_top.compute_at(f, ti)
28712879
.vectorize(y)
2872-
.vectorize(x);
2880+
.vectorize(x)
2881+
.update(0).unscheduled();
28732882
next_top_right.compute_at(f, ti)
28742883
.vectorize(y)
2875-
.vectorize(x);
2884+
.vectorize(x)
2885+
.update(0).unscheduled();
28762886
next_right.compute_at(f, ti)
28772887
.vectorize(y)
2878-
.vectorize(x);
2888+
.vectorize(x)
2889+
.update(0).unscheduled();
28792890
next_bottom_right.compute_at(f, ti)
28802891
.vectorize(y)
2881-
.vectorize(x);
2892+
.vectorize(x)
2893+
.update(0).unscheduled();
28822894
next_bottom.compute_at(f, ti)
28832895
.vectorize(y)
2884-
.vectorize(x);
2896+
.vectorize(x)
2897+
.update(0).unscheduled();
28852898
next_bottom_left.compute_at(f, ti)
28862899
.vectorize(y)
2887-
.vectorize(x);
2900+
.vectorize(x)
2901+
.update(0).unscheduled();
28882902
next_left.compute_at(f, ti)
28892903
.vectorize(y)
2890-
.vectorize(x);
2904+
.vectorize(x)
2905+
.update(0).unscheduled();
28912906
next_top_left.compute_at(f, ti)
28922907
.vectorize(y)
2893-
.vectorize(x);
2908+
.vectorize(x)
2909+
.update(0).unscheduled();
28942910

28952911
nb.compute_at(f, ti)
28962912
.vectorize(y, 8)

0 commit comments

Comments
 (0)