Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 0 additions & 10 deletions contrib/hamilton/contrib/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,22 +23,12 @@
except ImportError:
from version import VERSION as __version__ # noqa: F401

from hamilton import telemetry


def track(module_name: str):
"""Function to call to track module usage."""
if hasattr(telemetry, "create_and_send_contrib_use"): # makes sure Hamilton version is fine.
telemetry.create_and_send_contrib_use(module_name, __version__)


@contextmanager
def catch_import_errors(module_name: str, file_location: str, logger: logging.Logger):
try:
# Yield control to the inner block which will have the import statements.
yield
# After all imports succeed send telemetry
track(module_name)
except ImportError as e:
location = file_location[: file_location.rfind("/")]
logger.error("ImportError: %s", e)
Expand Down
23 changes: 0 additions & 23 deletions docs/get-started/license.rst
Original file line number Diff line number Diff line change
Expand Up @@ -3,26 +3,3 @@ License
=======

Apache Hamilton is released under the `Apache 2.0 License <https://github.com/apache/hamilton/blob/main/LICENSE>`_.



Usage analytics & data privacy
-----------------------------------
By default, when using Apache Hamilton, it collects anonymous usage data to help improve Apache Hamilton and know where to apply development
efforts.

We capture three types of events: one when the `Driver` object is instantiated, one when the `execute()` call on the \
`Driver` object completes, and one for most `Driver` object function invocations.
No user data or potentially sensitive information is or ever will be collected. The captured data is limited to:

* Operating System and Python version
* A persistent UUID to indentify the session, stored in ~/.hamilton.conf.
* Error stack trace limited to Apache Hamilton code, if one occurs.
* Information on what features you're using from Apache Hamilton: decorators, adapters, result builders.
* How Apache Hamilton is being used: number of final nodes in DAG, number of modules, size of objects passed to `execute()`, \
the name of the Driver function being invoked.


Else see :doc:`/reference/disabling-telemetry` for how to disable telemetry.

Otherwise we invite you to inspect telemetry.py for details.
1 change: 0 additions & 1 deletion docs/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,6 @@ reference/lifecycle-hooks/index
reference/result-builders/index
reference/io/index
reference/dataflows/index
reference/disabling-telemetry.md
```

```{toctree}
Expand Down
40 changes: 0 additions & 40 deletions docs/reference/disabling-telemetry.md

This file was deleted.

67 changes: 1 addition & 66 deletions hamilton/async_driver.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,15 +18,12 @@
import asyncio
import inspect
import logging
import sys
import time
import typing
import uuid
from types import ModuleType
from typing import Any

import hamilton.lifecycle.base as lifecycle_base
from hamilton import base, driver, graph, lifecycle, node, telemetry
from hamilton import base, driver, graph, lifecycle, node
from hamilton.execution.graph_functions import create_error_message
from hamilton.io.materialization import ExtractorFactory, MaterializerFactory

Expand Down Expand Up @@ -375,9 +372,6 @@ async def execute(
"display_graph=True is not supported for the async graph adapter. "
"Instead you should be using visualize_execution."
)
start_time = time.time()
run_successful = True
error = None
_final_vars = self._create_final_vars(final_vars)
try:
outputs = await self.raw_execute(_final_vars, overrides, display_graph, inputs=inputs)
Expand All @@ -386,67 +380,8 @@ async def execute(
return self.adapter.call_lifecycle_method_sync("do_build_result", outputs=outputs)
return outputs
except Exception as e:
run_successful = False
logger.error(driver.SLACK_ERROR_MESSAGE)
error = telemetry.sanitize_error(*sys.exc_info())
raise e
finally:
duration = time.time() - start_time
# ensure we can capture telemetry in async friendly way.
if telemetry.is_telemetry_enabled():

async def make_coroutine():
self.capture_execute_telemetry(
error, final_vars, inputs, overrides, run_successful, duration
)

try:
# we don't have to await because we are running within the event loop.
asyncio.create_task(make_coroutine())
except Exception as e:
if logger.isEnabledFor(logging.DEBUG):
logger.error(f"Encountered error submitting async telemetry:\n{e}")

def capture_constructor_telemetry(
self,
error: str | None,
modules: tuple[ModuleType],
config: dict[str, Any],
adapter: base.HamiltonGraphAdapter,
):
"""Ensures we capture constructor telemetry the right way in an async context.

This is a simpler wrapper around what's in the driver class.

:param error: sanitized error string, if any.
:param modules: tuple of modules to build DAG from.
:param config: config to create the driver.
:param adapter: adapter class object.
"""
if telemetry.is_telemetry_enabled():
try:
# check whether the event loop has been started yet or not
loop = asyncio.get_event_loop()
if loop.is_running():
loop.run_in_executor(
None,
super(AsyncDriver, self).capture_constructor_telemetry,
error,
modules,
config,
adapter,
)
else:

async def make_coroutine():
super(AsyncDriver, self).capture_constructor_telemetry(
error, modules, config, adapter
)

loop.run_until_complete(make_coroutine())
except Exception as e:
if logger.isEnabledFor(logging.DEBUG):
logger.error(f"Encountered error submitting async telemetry:\n{e}")


class Builder(driver.Builder):
Expand Down
3 changes: 0 additions & 3 deletions hamilton/cli/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@
warnings.filterwarnings("ignore", category=UserWarning)
from hamilton import driver

from hamilton import telemetry
from hamilton.cli import commands

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -120,8 +119,6 @@ def main(
] = False,
):
"""Hamilton CLI"""
if telemetry.is_telemetry_enabled():
telemetry.create_and_send_cli_event(ctx.invoked_subcommand)
state.verbose = verbose
state.json_out = json_out
logger.debug(f"verbose set to {verbose}")
Expand Down
8 changes: 0 additions & 8 deletions hamilton/contrib/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,20 +25,12 @@

__version__ = "__unknown__" # this will be overwritten once sf-hamilton-contrib is installed.

from hamilton import telemetry


@contextmanager
def catch_import_errors(module_name: str, file_location: str, logger: logging.Logger):
try:
# Yield control to the inner block which will have the import statements.
yield
# After all imports succeed send telemetry
if "." in module_name:
telemetry.create_and_send_contrib_use(module_name, __version__)
else:
# we are importing it dynamically thus a "package" isn't present so file_location has the info.
telemetry.create_and_send_contrib_use(file_location, __version__)
except ImportError as e:
location = file_location[: file_location.rfind("/")]
logger.error("ImportError: %s", e)
Expand Down
35 changes: 7 additions & 28 deletions hamilton/dataflows/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@
TODO: expect this to have a CLI interface in the future.
"""

import functools
import importlib
import json
import logging
Expand All @@ -36,7 +35,7 @@
from types import ModuleType
from typing import TYPE_CHECKING, Dict, List, NamedTuple, Optional, Tuple, Type, Union

from hamilton import driver, telemetry
from hamilton import driver

if TYPE_CHECKING:
import builtins
Expand Down Expand Up @@ -65,37 +64,17 @@


def _track_function_call(call_fn: Callable) -> Callable:
"""Decorator to wrap the __call__ to count usage.
"""No-op decorator kept for backwards compatibility.

:param call_fn: the `__call__` function.
:return: the wrapped call function.
:param call_fn: the function.
:return: the same function, unwrapped.
"""

@functools.wraps(call_fn)
def track_call(*args, **kwargs):
event_json = telemetry.create_dataflow_function_invocation_event_json(call_fn.__name__)
telemetry.send_event_json(event_json)
return call_fn(*args, **kwargs)

return track_call
return call_fn


def _track_download(is_official: bool, user: str | None, dataflow_name: str, version: str):
"""Inner function to track "downloads" of a dataflow.

:param is_official: is this an official dataflow? False == user.
:param user: If not official, what is the github user name.
:param dataflow_name: the name of the dataflow
:param version: the version. Either git hash, or the package version.
"""
if is_official:
category = "DAGWORKS"
else:
category = "USER"
event_json = telemetry.create_dataflow_download_event_json(
category, user, dataflow_name, version
)
telemetry.send_event_json(event_json)
"""No-op. Telemetry has been removed."""
pass


def _get_request(url: str) -> tuple[int, str]:
Expand Down
Loading
Loading