diff --git a/.gitignore b/.gitignore index 2a889f2..91838f8 100644 --- a/.gitignore +++ b/.gitignore @@ -60,4 +60,5 @@ docs/_build/ *~ *# -.DS_Store \ No newline at end of file +.DS_Store +.venv diff --git a/README.md b/README.md index 5c7f4ce..de38dc0 100644 --- a/README.md +++ b/README.md @@ -55,7 +55,7 @@ To build a development version of usaddress on your machine, run the following c ``` git clone https://github.com/datamade/usaddress.git cd usaddress - pip install -e .[dev] + pip install -e ."[dev]" ``` Then run the testing suite to confirm that everything is working properly: @@ -103,4 +103,4 @@ If something in the library is not behaving intuitively, it is a bug, and should ## Copyright -Copyright (c) 2014 Atlanta Journal Constitution. Released under the [MIT License](https://github.com/datamade/usaddress/blob/master/LICENSE). +Copyright (c) 2025 Atlanta Journal Constitution. Released under the [MIT License](https://github.com/datamade/usaddress/blob/master/LICENSE). diff --git a/measure_performance/test_data/multi_word_state_addresses.xml b/measure_performance/test_data/multi_word_state_addresses.xml new file mode 100644 index 0000000..d929596 --- /dev/null +++ b/measure_performance/test_data/multi_word_state_addresses.xml @@ -0,0 +1,7 @@ + + 1646 Red Leaf Drive Fort Mill, South Carolina 29715 United States + 15 Bridge Street Providence, Rhode Island 02903 United States + 150 Citizens Circle Little River, South Carolina 29566 United States + 4079 U.S. 17 Business Murrells Inlet, South Carolina 29576 United States + 43 South Broadway Pitman, New Jersey 08071 United States + diff --git a/pyproject.toml b/pyproject.toml index d9af84e..04d7928 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "usaddress" -version = "0.5.11" +version = "0.5.12" description = "Parse US addresses using conditional random fields" readme = "README.md" license = {text = "MIT License", url = "http://www.opensource.org/licenses/mit-license.php"} diff --git a/training/README.md b/training/README.md index 44dde75..55442fa 100644 --- a/training/README.md +++ b/training/README.md @@ -29,7 +29,7 @@ After forking the repo, you'll need to get usaddress running on your machine. Ru ``` cd usaddress -pip install -r requirements.txt +pip install setuptools python setup.py develop parserator train training/labeled.xml usaddress ``` diff --git a/training/multi_word_state_addresses.xml b/training/multi_word_state_addresses.xml new file mode 100644 index 0000000..96c6056 --- /dev/null +++ b/training/multi_word_state_addresses.xml @@ -0,0 +1,10 @@ + + 84 Social Street Woonsocket, Rhode Island 02895 United States + 3481 Kingstown Road South Kingstown, Rhode Island 02892 United States + 209 4th Avenue Asbury Park, New Jersey 07712 United States + 600 E Boulevard Ave, Dept 301 Bismarck, North Dakota 58505 United States + 510 U.S. 17 Business Surfside Beach, South Carolina 29575 United States + 3110 West 12th Street Sioux Falls, South Dakota 57104 United States + 42 Water Street New Shoreham, Rhode Island 02807 United States + 291 Dairy Barn Lane Fort Mill, South Carolina 29715 United States +